From 55fa2e7ccd0f4fe2b01892fb59cd3c34ce3bfa07 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Wed, 30 Aug 2023 22:10:27 +0100 Subject: [PATCH] riscv64: Implement SIMD floating point conversion instructions (#6924) * riscv64: Implement SIMD `fvpromote_low`/`fvdemote` * riscv64: Implement SIMD `fcvt_from_{u,s}int` * riscv64: Implement SIMD `fcvt_to_{u,s}int_sat` * riscv64: Use `i8_to_imm5` constructor --- build.rs | 12 +-- .../codegen/src/isa/riscv64/inst/vector.rs | 31 ++++++-- .../codegen/src/isa/riscv64/inst_vector.isle | 14 ++++ cranelift/codegen/src/isa/riscv64/lower.isle | 53 ++++++++++--- .../isa/riscv64/simd-fcvt-from-sint.clif | 43 ++++++++++ .../isa/riscv64/simd-fcvt-from-uint.clif | 43 ++++++++++ .../isa/riscv64/simd-fcvt-to-sint-sat.clif | 47 +++++++++++ .../isa/riscv64/simd-fcvt-to-uint-sat.clif | 47 +++++++++++ .../filetests/isa/riscv64/simd-fvdemote.clif | 49 ++++++++++++ .../isa/riscv64/simd-fvpromote-low.clif | 43 ++++++++++ .../filetests/runtests/simd-conversion.clif | 79 ------------------- .../runtests/simd-fcvt-from-sint.clif | 17 ++++ .../runtests/simd-fcvt-from-uint.clif | 18 +++++ .../runtests/simd-fcvt-to-sint-sat.clif | 18 +++++ .../runtests/simd-fcvt-to-uint-sat.clif | 28 +++++++ .../filetests/runtests/simd-fvdemote.clif | 22 ++++++ .../runtests/simd-fvpromote-low.clif | 21 +++++ 17 files changed, 477 insertions(+), 108 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-sint.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-uint.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-sint-sat.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-uint-sat.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fvdemote.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fvpromote-low.clif delete mode 100644 cranelift/filetests/filetests/runtests/simd-conversion.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fcvt-from-sint.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fcvt-from-uint.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fcvt-to-sint-sat.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fcvt-to-uint-sat.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fvdemote.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fvpromote-low.clif diff --git a/build.rs b/build.rs index 4ffc8894c58b..1e60177e4a13 100644 --- a/build.rs +++ b/build.rs @@ -254,17 +254,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { return true; } - let known_failure = [ - "canonicalize_nan", - "cvt_from_uint", - "issue_3327_bnot_lowering", - "simd_conversions", - "simd_i32x4_trunc_sat_f32x4", - "simd_i32x4_trunc_sat_f64x2", - "simd_load", - "simd_splat", - ] - .contains(&testname); + let known_failure = ["issue_3327_bnot_lowering"].contains(&testname); known_failure } diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index ec3e99fd7390..78e7a2f2d3eb 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -757,7 +757,9 @@ impl VecAluOpRR { | VecAluOpRR::VfcvtrtzxufV | VecAluOpRR::VfcvtrtzxfV | VecAluOpRR::VfcvtfxuV - | VecAluOpRR::VfcvtfxV => 0b010010, + | VecAluOpRR::VfcvtfxV + | VecAluOpRR::VfwcvtffV + | VecAluOpRR::VfncvtffW => 0b010010, } } @@ -779,7 +781,9 @@ impl VecAluOpRR { | VecAluOpRR::VfcvtrtzxufV | VecAluOpRR::VfcvtrtzxfV | VecAluOpRR::VfcvtfxuV - | VecAluOpRR::VfcvtfxV => VecOpCategory::OPFVV, + | VecAluOpRR::VfcvtfxV + | VecAluOpRR::VfwcvtffV + | VecAluOpRR::VfncvtffW => VecOpCategory::OPFVV, VecAluOpRR::VmvVV => VecOpCategory::OPIVV, VecAluOpRR::VmvVX => VecOpCategory::OPIVX, } @@ -806,12 +810,17 @@ impl VecAluOpRR { VecAluOpRR::VzextVF2 => 0b00110, VecAluOpRR::VsextVF2 => 0b00111, // VFUNARY0 + // single-width converts VecAluOpRR::VfcvtxufV => 0b00000, VecAluOpRR::VfcvtxfV => 0b00001, VecAluOpRR::VfcvtrtzxufV => 0b00110, VecAluOpRR::VfcvtrtzxfV => 0b00111, VecAluOpRR::VfcvtfxuV => 0b00010, VecAluOpRR::VfcvtfxV => 0b00011, + // widening converts + VecAluOpRR::VfwcvtffV => 0b01100, + // narrowing converts + VecAluOpRR::VfncvtffW => 0b10100, // These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states: // > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved. VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0, @@ -837,7 +846,9 @@ impl VecAluOpRR { | VecAluOpRR::VfcvtrtzxufV | VecAluOpRR::VfcvtrtzxfV | VecAluOpRR::VfcvtfxuV - | VecAluOpRR::VfcvtfxV => true, + | VecAluOpRR::VfcvtfxV + | VecAluOpRR::VfwcvtffV + | VecAluOpRR::VfncvtffW => true, VecAluOpRR::VmvSX | VecAluOpRR::VfmvSF | VecAluOpRR::VmvVV @@ -865,7 +876,9 @@ impl VecAluOpRR { | VecAluOpRR::VfcvtrtzxufV | VecAluOpRR::VfcvtrtzxfV | VecAluOpRR::VfcvtfxuV - | VecAluOpRR::VfcvtfxV => RegClass::Vector, + | VecAluOpRR::VfcvtfxV + | VecAluOpRR::VfwcvtffV + | VecAluOpRR::VfncvtffW => RegClass::Vector, VecAluOpRR::VmvXS => RegClass::Int, VecAluOpRR::VfmvFS => RegClass::Float, } @@ -888,7 +901,9 @@ impl VecAluOpRR { | VecAluOpRR::VfcvtrtzxufV | VecAluOpRR::VfcvtrtzxfV | VecAluOpRR::VfcvtfxuV - | VecAluOpRR::VfcvtfxV => RegClass::Vector, + | VecAluOpRR::VfcvtfxV + | VecAluOpRR::VfwcvtffV + | VecAluOpRR::VfncvtffW => RegClass::Vector, VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => RegClass::Float, VecAluOpRR::VmvSX | VecAluOpRR::VmvVX => RegClass::Int, } @@ -902,7 +917,9 @@ impl VecAluOpRR { | VecAluOpRR::VzextVF8 | VecAluOpRR::VsextVF2 | VecAluOpRR::VsextVF4 - | VecAluOpRR::VsextVF8 => true, + | VecAluOpRR::VsextVF8 + | VecAluOpRR::VfwcvtffV + | VecAluOpRR::VfncvtffW => true, _ => false, } } @@ -931,6 +948,8 @@ impl fmt::Display for VecAluOpRR { VecAluOpRR::VfcvtrtzxfV => "vfcvt.rtz.x.f.v", VecAluOpRR::VfcvtfxuV => "vfcvt.f.xu.v", VecAluOpRR::VfcvtfxV => "vfcvt.f.x.v", + VecAluOpRR::VfwcvtffV => "vfwcvt.f.f.v", + VecAluOpRR::VfncvtffW => "vfncvt.f.f.w", }) } } diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index 63db6c751146..685761e2a8d0 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -291,6 +291,8 @@ (VfcvtrtzxfV) (VfcvtfxuV) (VfcvtfxV) + (VfwcvtffV) + (VfncvtffW) )) ;; Returns the canonical destination type for a VecAluOpRRImm5. @@ -1060,6 +1062,18 @@ (rule (rv_vfcvt_f_x_v vs mask vstate) (vec_alu_rr (VecAluOpRR.VfcvtfxV) vs mask vstate)) + ;; Helper for emitting the `vfwcvt.f.f.v` instruction. +;; Convert single-width float to double-width float. +(decl rv_vfwcvt_f_f_v (VReg VecOpMasking VState) VReg) +(rule (rv_vfwcvt_f_f_v vs mask vstate) + (vec_alu_rr (VecAluOpRR.VfwcvtffV) vs mask vstate)) + +;; Helper for emitting the `vfncvt.f.f.w` instruction. +;; Convert double-width float to single-width float. +(decl rv_vfncvt_f_f_w (VReg VecOpMasking VState) VReg) +(rule (rv_vfncvt_f_f_w vs mask vstate) + (vec_alu_rr (VecAluOpRR.VfncvtffW) vs mask vstate)) + ;; Helper for emitting the `vslidedown.vx` instruction. ;; `vslidedown` moves all elements in the vector down by n elements. ;; The top most elements are up to the tail policy. diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 4e991a75459c..26b375f3cdf3 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -1201,10 +1201,26 @@ (rule (lower (fpromote x)) (rv_fcvtds x)) +;;;;; Rules for `fvpromote_low`;;;;;;;;;;;; + +(rule (lower (has_type (ty_vec_fits_in_register ty) (fvpromote_low x))) + (if-let half_ty (ty_half_width ty)) + (rv_vfwcvt_f_f_v x (unmasked) (vstate_mf2 half_ty))) + ;;;;; Rules for `fdemote`;;;;;;;;;;;;;;;;;; (rule (lower (fdemote x)) (rv_fcvtsd x)) +;;;;; Rules for `fvdemote`;;;;;;;;;;;;;;;;; + +;; `vfncvt...` leaves the upper bits of the register undefined so +;; we need to zero them out. +(rule (lower (has_type (ty_vec_fits_in_register ty @ $F32X4) (fvdemote x))) + (if-let zero (i8_to_imm5 0)) + (let ((narrow VReg (rv_vfncvt_f_f_w x (unmasked) (vstate_mf2 ty))) + (mask VReg (gen_vec_mask 0xC))) + (rv_vmerge_vim narrow zero mask ty))) + ;;;;; Rules for for float arithmetic @@ -1613,39 +1629,52 @@ (gen_fcvt_int $false v $false from to)) ;;;;; Rules for `fcvt_to_sint`;;;;;;;;; -(rule - (lower (has_type to (fcvt_to_sint v @ (value_type from)))) +(rule 0 (lower (has_type to (fcvt_to_sint v @ (value_type (ty_scalar_float from))))) (gen_fcvt_int $false v $true from to)) ;;;;; Rules for `fcvt_to_sint_sat`;;;;;;;;; -(rule - (lower (has_type to (fcvt_to_sint_sat v @ (value_type from)))) +(rule 0 (lower (has_type to (fcvt_to_sint_sat v @ (value_type (ty_scalar_float from))))) (gen_fcvt_int $true v $true from to)) +(rule 1 (lower (has_type (ty_vec_fits_in_register _) (fcvt_to_sint_sat v @ (value_type from_ty)))) + (if-let zero (i8_to_imm5 0)) + (let ((is_nan VReg (rv_vmfne_vv v v (unmasked) from_ty)) + (cvt VReg (rv_vfcvt_rtz_x_f_v v (unmasked) from_ty))) + (rv_vmerge_vim cvt zero is_nan from_ty))) + ;;;;; Rules for `fcvt_to_uint_sat`;;;;;;;;; -(rule - (lower (has_type to (fcvt_to_uint_sat v @ (value_type from)))) +(rule 0 (lower (has_type to (fcvt_to_uint_sat v @ (value_type (ty_scalar_float from))))) (gen_fcvt_int $true v $false from to)) +(rule 1 (lower (has_type (ty_vec_fits_in_register _) (fcvt_to_uint_sat v @ (value_type from_ty)))) + (if-let zero (i8_to_imm5 0)) + (let ((is_nan VReg (rv_vmfne_vv v v (unmasked) from_ty)) + (cvt VReg (rv_vfcvt_rtz_xu_f_v v (unmasked) from_ty))) + (rv_vmerge_vim cvt zero is_nan from_ty))) + ;;;;; Rules for `fcvt_from_sint`;;;;;;;;; -(rule - (lower (has_type to (fcvt_from_sint v @ (value_type from_ty)))) +(rule 0 (lower (has_type (ty_scalar_float to) (fcvt_from_sint v @ (value_type from_ty)))) (let ((float_op FpuOPRR (int_convert_2_float_op from_ty $true to)) (value XReg (normalize_fcvt_from_int v from_ty (ExtendOp.Signed)))) (fpu_rr float_op to value))) +(rule 1 (lower (has_type (ty_vec_fits_in_register _) (fcvt_from_sint v @ (value_type from_ty)))) + (rv_vfcvt_f_x_v v (unmasked) from_ty)) + ;;;;; Rules for `fcvt_from_uint`;;;;;;;;; -(rule - (lower (has_type to (fcvt_from_uint v @ (value_type from_ty)))) +(rule 0 (lower (has_type (ty_scalar_float to) (fcvt_from_uint v @ (value_type from_ty)))) (let ((float_op FpuOPRR (int_convert_2_float_op from_ty $false to)) (value XReg (normalize_fcvt_from_int v from_ty (ExtendOp.Zero)))) (fpu_rr float_op to value))) +(rule 1 (lower (has_type (ty_vec_fits_in_register _) (fcvt_from_uint v @ (value_type from_ty)))) + (rv_vfcvt_f_xu_v v (unmasked) from_ty)) + ;;;;; Rules for `symbol_value`;;;;;;;;; (rule (lower (symbol_value (symbol_value_data name _ offset))) - (load_ext_name name offset) -) + (load_ext_name name offset)) + ;;;;; Rules for `bitcast`;;;;;;;;; (rule (lower (has_type out_ty (bitcast _ v @ (value_type in_ty)))) diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-sint.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-sint.clif new file mode 100644 index 000000000000..19d4b7beeff3 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-sint.clif @@ -0,0 +1,43 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %fcvt_from_sint(i32x4) -> f32x4 { +block0(v0: i32x4): + v1 = fcvt_from_sint.f32x4 v0 + return v1 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vfcvt.f.x.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x92, 0x11, 0x4a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-uint.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-uint.clif new file mode 100644 index 000000000000..a774fd592998 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-uint.clif @@ -0,0 +1,43 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %fcvt_from_uint(i32x4) -> f32x4 { +block0(v0: i32x4): + v1 = fcvt_from_uint.f32x4 v0 + return v1 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vfcvt.f.xu.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x12, 0x11, 0x4a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-sint-sat.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-sint-sat.clif new file mode 100644 index 000000000000..c8639dde8e88 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-sint-sat.clif @@ -0,0 +1,47 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %fcvt_to_sint_sat(f32x4) -> i32x4 { +block0(v0:f32x4): + v1 = fcvt_to_sint_sat.i32x4 v0 + return v1 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfne.vv v0,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vfcvt.rtz.x.f.v v6,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v8,v6,0,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x90, 0x10, 0x72 +; .byte 0x57, 0x93, 0x13, 0x4a +; .byte 0x57, 0x34, 0x60, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-uint-sat.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-uint-sat.clif new file mode 100644 index 000000000000..78db69d4470d --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-uint-sat.clif @@ -0,0 +1,47 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %fcvt_to_uint_sat(f32x4) -> i32x4 { +block0(v0:f32x4): + v1 = fcvt_to_uint_sat.i32x4 v0 + return v1 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfne.vv v0,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vfcvt.rtz.xu.f.v v6,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v8,v6,0,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x90, 0x10, 0x72 +; .byte 0x57, 0x13, 0x13, 0x4a +; .byte 0x57, 0x34, 0x60, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fvdemote.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fvdemote.clif new file mode 100644 index 000000000000..1422b4d3e632 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fvdemote.clif @@ -0,0 +1,49 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %fvdemote(f64x2) -> f32x4 { +block0(v0: f64x2): + v1 = fvdemote v0 + return v1 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vfncvt.f.f.w v4,v1 #avl=4, #vtype=(e32, mf2, ta, ma) +; vmv.v.i v0,12 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v8,v4,0,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x72, 0xcd +; .byte 0x57, 0x12, 0x1a, 0x4a +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x30, 0x06, 0x5e +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x34, 0x40, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fvpromote-low.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fvpromote-low.clif new file mode 100644 index 000000000000..355a132edcf0 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fvpromote-low.clif @@ -0,0 +1,43 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %fvpromote_low(f32x4) -> f64x2 { +block0(v0: f32x4): + v1 = fvpromote_low v0 + return v1 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vfwcvt.f.f.v v4,v1 #avl=2, #vtype=(e32, mf2, ta, ma) +; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x71, 0xcd +; .byte 0x57, 0x12, 0x16, 0x4a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/runtests/simd-conversion.clif b/cranelift/filetests/filetests/runtests/simd-conversion.clif deleted file mode 100644 index 86219ff48db3..000000000000 --- a/cranelift/filetests/filetests/runtests/simd-conversion.clif +++ /dev/null @@ -1,79 +0,0 @@ -test interpret -test run -target aarch64 -target s390x -target x86_64 -target x86_64 sse41 -target x86_64 sse42 -target x86_64 sse42 has_avx - -function %fcvt_from_sint(i32x4) -> f32x4 { -block0(v0: i32x4): - v1 = fcvt_from_sint.f32x4 v0 - return v1 -} -; run: %fcvt_from_sint([-1 0 1 123456789]) == [-0x1.0 0.0 0x1.0 0x75bcd18.0] -; Note that 123456789 rounds to 123456792.0, an error of 3 - -function %fcvt_from_uint(i32x4) -> f32x4 { -block0(v0: i32x4): - v1 = fcvt_from_uint.f32x4 v0 - return v1 -} -; run: %fcvt_from_uint([0 0 0 0]) == [0x0.0 0x0.0 0x0.0 0x0.0] -; run: %fcvt_from_uint([0xFFFFFFFF 0 1 123456789]) == [0x100000000.0 0.0 0x1.0 0x75bcd18.0] -; Note that 0xFFFFFFFF is decimal 4,294,967,295 and is rounded up 1 to 4,294,967,296 in f32x4. - -function %fcvt_to_sint_sat(f32x4) -> i32x4 { -block0(v0:f32x4): - v1 = fcvt_to_sint_sat.i32x4 v0 - return v1 -} -; run: %fcvt_to_sint_sat([0x0.0 -0x1.0 0x1.0 0x1.0p100]) == [0 -1 1 0x7FFFFFFF] -; run: %fcvt_to_sint_sat([-0x8.1 0x0.0 0x0.0 -0x1.0p100]) == [-8 0 0 0x80000000] -; run: %fcvt_to_sint_sat([+NaN +NaN +NaN +NaN]) == [0 0 0 0] - -function %fcvt_to_uint_sat(f32x4) -> i32x4 { -block0(v0:f32x4): - v1 = fcvt_to_uint_sat.i32x4 v0 - return v1 -} -; run: %fcvt_to_uint_sat([0x1.0 0x4.2 0x4.6 0x1.0p100]) == [1 4 4 0xFFFFFFFF] -; run: %fcvt_to_uint_sat([-0x8.1 -0x0.0 0x0.0 -0x1.0p100]) == [0 0 0 0] -; run: %fcvt_to_uint_sat([0xB2D05E00.0 0.0 0.0 0.0]) == [3000000000 0 0 0] -; run: %fcvt_to_uint_sat([+NaN +NaN +NaN +NaN]) == [0 0 0 0] - -function %fcvt_low_from_sint(i32x4) -> f64x2 { -block0(v0: i32x4): - v1 = swiden_low v0 - v2 = fcvt_from_sint.f64x2 v1 - return v2 -} -; run: %fcvt_low_from_sint([0 1 -1 65535]) == [0x0.0 0x1.0] -; run: %fcvt_low_from_sint([-1 123456789 0 1]) == [-0x1.0 0x1.d6f3454p26] - -function %fvdemote(f64x2) -> f32x4 { -block0(v0: f64x2): - v1 = fvdemote v0 - return v1 -} - -; run: %fvdemote([0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0] -; run: %fvdemote([0x0.1 0x0.2]) == [0x0.1 0x0.2 0x0.0 0x0.0] -; run: %fvdemote([0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0] -; run: %fvdemote([0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0] -; run: %fvdemote([0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0] - - -function %fvpromote_low(f32x4) -> f64x2 { -block0(v0: f32x4): - v1 = fvpromote_low v0 - return v1 -} - -; run: %fvpromote_low([0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0] -; run: %fvpromote_low([0x0.1 0x0.2 0x0.0 0x0.0]) == [0x0.1 0x0.2] -; run: %fvpromote_low([0x2.1 0x1.2 0x0.0 0x0.0]) == [0x2.1 0x1.2] -; run: %fvpromote_low([0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0] -; run: %fvpromote_low([0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0] - diff --git a/cranelift/filetests/filetests/runtests/simd-fcvt-from-sint.clif b/cranelift/filetests/filetests/runtests/simd-fcvt-from-sint.clif new file mode 100644 index 000000000000..a746827d2a13 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fcvt-from-sint.clif @@ -0,0 +1,17 @@ +test interpret +test run +target aarch64 +target s390x +target x86_64 +target x86_64 sse41 +target x86_64 sse42 +target x86_64 sse42 has_avx +target riscv64 has_v + +function %fcvt_from_sint(i32x4) -> f32x4 { +block0(v0: i32x4): + v1 = fcvt_from_sint.f32x4 v0 + return v1 +} +; run: %fcvt_from_sint([-1 0 1 123456789]) == [-0x1.0 0.0 0x1.0 0x75bcd18.0] +; Note that 123456789 rounds to 123456792.0, an error of 3 diff --git a/cranelift/filetests/filetests/runtests/simd-fcvt-from-uint.clif b/cranelift/filetests/filetests/runtests/simd-fcvt-from-uint.clif new file mode 100644 index 000000000000..1cf1f1a544d8 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fcvt-from-uint.clif @@ -0,0 +1,18 @@ +test interpret +test run +target aarch64 +target s390x +target x86_64 +target x86_64 sse41 +target x86_64 sse42 +target x86_64 sse42 has_avx +target riscv64 has_v + +function %fcvt_from_uint(i32x4) -> f32x4 { +block0(v0: i32x4): + v1 = fcvt_from_uint.f32x4 v0 + return v1 +} +; run: %fcvt_from_uint([0 0 0 0]) == [0x0.0 0x0.0 0x0.0 0x0.0] +; run: %fcvt_from_uint([0xFFFFFFFF 0 1 123456789]) == [0x100000000.0 0.0 0x1.0 0x75bcd18.0] +; Note that 0xFFFFFFFF is decimal 4,294,967,295 and is rounded up 1 to 4,294,967,296 in f32x4. diff --git a/cranelift/filetests/filetests/runtests/simd-fcvt-to-sint-sat.clif b/cranelift/filetests/filetests/runtests/simd-fcvt-to-sint-sat.clif new file mode 100644 index 000000000000..d08955322e11 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fcvt-to-sint-sat.clif @@ -0,0 +1,18 @@ +test interpret +test run +target aarch64 +target s390x +target x86_64 +target x86_64 sse41 +target x86_64 sse42 +target x86_64 sse42 has_avx +target riscv64 has_v + +function %fcvt_to_sint_sat(f32x4) -> i32x4 { +block0(v0:f32x4): + v1 = fcvt_to_sint_sat.i32x4 v0 + return v1 +} +; run: %fcvt_to_sint_sat([0x0.0 -0x1.0 0x1.0 0x1.0p100]) == [0 -1 1 0x7FFFFFFF] +; run: %fcvt_to_sint_sat([-0x8.1 0x0.0 0x0.0 -0x1.0p100]) == [-8 0 0 0x80000000] +; run: %fcvt_to_sint_sat([+NaN +NaN +NaN +NaN]) == [0 0 0 0] diff --git a/cranelift/filetests/filetests/runtests/simd-fcvt-to-uint-sat.clif b/cranelift/filetests/filetests/runtests/simd-fcvt-to-uint-sat.clif new file mode 100644 index 000000000000..4bb1d105274e --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fcvt-to-uint-sat.clif @@ -0,0 +1,28 @@ +test interpret +test run +target aarch64 +target s390x +target x86_64 +target x86_64 sse41 +target x86_64 sse42 +target x86_64 sse42 has_avx +target riscv64 has_v + +function %fcvt_to_uint_sat(f32x4) -> i32x4 { +block0(v0:f32x4): + v1 = fcvt_to_uint_sat.i32x4 v0 + return v1 +} +; run: %fcvt_to_uint_sat([0x1.0 0x4.2 0x4.6 0x1.0p100]) == [1 4 4 0xFFFFFFFF] +; run: %fcvt_to_uint_sat([-0x8.1 -0x0.0 0x0.0 -0x1.0p100]) == [0 0 0 0] +; run: %fcvt_to_uint_sat([0xB2D05E00.0 0.0 0.0 0.0]) == [3000000000 0 0 0] +; run: %fcvt_to_uint_sat([+NaN +NaN +NaN +NaN]) == [0 0 0 0] + +function %fcvt_low_from_sint(i32x4) -> f64x2 { +block0(v0: i32x4): + v1 = swiden_low v0 + v2 = fcvt_from_sint.f64x2 v1 + return v2 +} +; run: %fcvt_low_from_sint([0 1 -1 65535]) == [0x0.0 0x1.0] +; run: %fcvt_low_from_sint([-1 123456789 0 1]) == [-0x1.0 0x1.d6f3454p26] diff --git a/cranelift/filetests/filetests/runtests/simd-fvdemote.clif b/cranelift/filetests/filetests/runtests/simd-fvdemote.clif new file mode 100644 index 000000000000..2290785e340e --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fvdemote.clif @@ -0,0 +1,22 @@ +test interpret +test run +target aarch64 +target s390x +target x86_64 +target x86_64 sse41 +target x86_64 sse42 +target x86_64 sse42 has_avx +target riscv64 has_v + +function %fvdemote(f64x2) -> f32x4 { +block0(v0: f64x2): + v1 = fvdemote v0 + return v1 +} + +; run: %fvdemote([0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0] +; run: %fvdemote([0x0.1 0x0.2]) == [0x0.1 0x0.2 0x0.0 0x0.0] +; run: %fvdemote([0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0] +; run: %fvdemote([0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0] +; run: %fvdemote([0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0] + diff --git a/cranelift/filetests/filetests/runtests/simd-fvpromote-low.clif b/cranelift/filetests/filetests/runtests/simd-fvpromote-low.clif new file mode 100644 index 000000000000..5c2d8d5e81d8 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fvpromote-low.clif @@ -0,0 +1,21 @@ +test interpret +test run +target aarch64 +target s390x +target x86_64 +target x86_64 sse41 +target x86_64 sse42 +target x86_64 sse42 has_avx +target riscv64 has_v + +function %fvpromote_low(f32x4) -> f64x2 { +block0(v0: f32x4): + v1 = fvpromote_low v0 + return v1 +} + +; run: %fvpromote_low([0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0] +; run: %fvpromote_low([0x0.1 0x0.2 0x0.0 0x0.0]) == [0x0.1 0x0.2] +; run: %fvpromote_low([0x2.1 0x1.2 0x0.0 0x0.0]) == [0x2.1 0x1.2] +; run: %fvpromote_low([0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0] +; run: %fvpromote_low([0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0]