From 6755f35d1d44c8afdd8144c32eae51961931e019 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Wed, 28 Jun 2023 16:22:54 +0100 Subject: [PATCH] riscv64: Implement various SIMD float ops (#6657) * riscv64: Implement SIMD `fabs` * riscv64: Implement SIMD `fcopysign` * riscv64: Implement SIMD `f{min,max}_pseudo` * riscv64: Implement SIMD `f{min,max}` --- build.rs | 4 - cranelift/codegen/src/isa/riscv64/inst.isle | 5 + cranelift/codegen/src/isa/riscv64/inst/mod.rs | 3 + .../codegen/src/isa/riscv64/inst/vector.rs | 9 + .../codegen/src/isa/riscv64/inst_vector.isle | 38 ++++ cranelift/codegen/src/isa/riscv64/lower.isle | 69 +++++-- .../filetests/isa/riscv64/simd-fabs.clif | 83 +++++++++ .../filetests/isa/riscv64/simd-fcopysign.clif | 169 ++++++++++++++++++ .../isa/riscv64/simd-fmax-pseudo.clif | 92 ++++++++++ .../filetests/isa/riscv64/simd-fmax.clif | 116 ++++++++++++ .../isa/riscv64/simd-fmin-pseudo.clif | 92 ++++++++++ .../filetests/isa/riscv64/simd-fmin.clif | 116 ++++++++++++ .../filetests/runtests/simd-fabs.clif | 24 +++ .../runtests/simd-fcopysign-64bit.clif | 1 + .../filetests/runtests/simd-fcopysign.clif | 1 + ...md-fmax-fmin-nondeterministic-aarch64.clif | 4 + ...md-fmax-fmin-nondeterministic-riscv64.clif | 29 +++ ...imd-fmax-fmin-nondeterministic-x86_64.clif | 4 + .../filetests/runtests/simd-fmax-fmin.clif | 5 +- ...-vector.clif => simd-fmin-max-pseudo.clif} | 1 + 20 files changed, 845 insertions(+), 20 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fabs.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcopysign.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fmax-pseudo.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fmax.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fmin-pseudo.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fmin.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fabs.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-riscv64.clif rename cranelift/filetests/filetests/runtests/{fmin-max-pseudo-vector.clif => simd-fmin-max-pseudo.clif} (98%) diff --git a/build.rs b/build.rs index cde74576aa7b..10dda5e9d8de 100644 --- a/build.rs +++ b/build.rs @@ -233,11 +233,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { "cvt_from_uint", "issue_3327_bnot_lowering", "simd_conversions", - "simd_f32x4", - "simd_f32x4_pmin_pmax", "simd_f32x4_rounding", - "simd_f64x2", - "simd_f64x2_pmin_pmax", "simd_f64x2_rounding", "simd_i32x4_trunc_sat_f32x4", "simd_i32x4_trunc_sat_f64x2", diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index ec70bbe361d9..e4ab3354e06f 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -1610,6 +1610,11 @@ ;; Float Helpers +;; Returns the bitpattern of the Canonical NaN for the given type. +(decl pure canonical_nan_u64 (Type) u64) +(rule (canonical_nan_u64 $F32) 0x7fc00000) +(rule (canonical_nan_u64 $F64) 0x7ff8000000000000) + (decl gen_default_frm () OptionFloatRoundingMode) (extern constructor gen_default_frm gen_default_frm) diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index 239c479efbd4..091d2128db17 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -1744,6 +1744,9 @@ impl Inst { (VecAluOpRRR::VfsgnjnVV, vs2, vs1) if vs2 == vs1 => { format!("vfneg.v {vd_s},{vs2_s}{mask} {vstate}") } + (VecAluOpRRR::VfsgnjxVV, vs2, vs1) if vs2 == vs1 => { + format!("vfabs.v {vd_s},{vs2_s}{mask} {vstate}") + } (VecAluOpRRR::VmnandMM, vs2, vs1) if vs2 == vs1 => { format!("vmnot.m {vd_s},{vs2_s}{mask} {vstate}") } diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index 9fc038bade8d..19e4b2216339 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -345,9 +345,13 @@ impl VecAluOpRRR { | VecAluOpRRR::VsadduVV | VecAluOpRRR::VsadduVX => 0b100000, VecAluOpRRR::VfrdivVF | VecAluOpRRR::VsaddVV | VecAluOpRRR::VsaddVX => 0b100001, + VecAluOpRRR::VfminVV => 0b000100, + VecAluOpRRR::VfmaxVV => 0b000110, VecAluOpRRR::VssubuVV | VecAluOpRRR::VssubuVX => 0b100010, VecAluOpRRR::VssubVV | VecAluOpRRR::VssubVX => 0b100011, + VecAluOpRRR::VfsgnjVV | VecAluOpRRR::VfsgnjVF => 0b001000, VecAluOpRRR::VfsgnjnVV => 0b001001, + VecAluOpRRR::VfsgnjxVV => 0b001010, VecAluOpRRR::VrgatherVV | VecAluOpRRR::VrgatherVX => 0b001100, VecAluOpRRR::VwadduVV | VecAluOpRRR::VwadduVX => 0b110000, VecAluOpRRR::VwaddVV | VecAluOpRRR::VwaddVX => 0b110001, @@ -473,7 +477,11 @@ impl VecAluOpRRR { | VecAluOpRRR::VfsubVV | VecAluOpRRR::VfmulVV | VecAluOpRRR::VfdivVV + | VecAluOpRRR::VfmaxVV + | VecAluOpRRR::VfminVV + | VecAluOpRRR::VfsgnjVV | VecAluOpRRR::VfsgnjnVV + | VecAluOpRRR::VfsgnjxVV | VecAluOpRRR::VmfeqVV | VecAluOpRRR::VmfneVV | VecAluOpRRR::VmfltVV @@ -485,6 +493,7 @@ impl VecAluOpRRR { | VecAluOpRRR::VfdivVF | VecAluOpRRR::VfrdivVF | VecAluOpRRR::VfmergeVFM + | VecAluOpRRR::VfsgnjVF | VecAluOpRRR::VmfeqVF | VecAluOpRRR::VmfneVF | VecAluOpRRR::VmfltVF diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index a51e96cdf4d7..49133a2ae2fb 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -122,7 +122,11 @@ (VfsubVV) (VfmulVV) (VfdivVV) + (VfminVV) + (VfmaxVV) + (VfsgnjVV) (VfsgnjnVV) + (VfsgnjxVV) (VmergeVVM) (VredmaxuVS) (VredminuVS) @@ -180,6 +184,7 @@ (VfrsubVF) (VfmulVF) (VfdivVF) + (VfsgnjVF) (VfrdivVF) (VmergeVXM) (VfmergeVFM) @@ -836,6 +841,27 @@ (rule (rv_vfrdiv_vf vs2 vs1 mask vstate) (vec_alu_rrr (VecAluOpRRR.VfrdivVF) vs2 vs1 mask vstate)) +;; Helper for emitting the `vfmin.vv` instruction. +(decl rv_vfmin_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfmin_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfminVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfmax.vv` instruction. +(decl rv_vfmax_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfmax_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfmaxVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfsgnj.vv` ("Floating Point Sign Injection") instruction. +;; The output of this instruction is `vs2` with the sign bit from `vs1` +(decl rv_vfsgnj_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfsgnj_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfsgnjVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfsgnj.vf` ("Floating Point Sign Injection") instruction. +(decl rv_vfsgnj_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vfsgnj_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfsgnjVF) vs2 vs1 mask vstate)) + ;; Helper for emitting the `vfsgnjn.vv` ("Floating Point Sign Injection Negated") instruction. ;; The output of this instruction is `vs2` with the negated sign bit from `vs1` (decl rv_vfsgnjn_vv (VReg VReg VecOpMasking VState) VReg) @@ -847,6 +873,18 @@ (decl rv_vfneg_v (VReg VecOpMasking VState) VReg) (rule (rv_vfneg_v vs mask vstate) (rv_vfsgnjn_vv vs vs mask vstate)) +;; Helper for emitting the `vfsgnjx.vv` ("Floating Point Sign Injection Exclusive") instruction. +;; The output of this instruction is `vs2` with the XOR of the sign bits from `vs2` and `vs1`. +;; When `vs2 == vs1` this implements `fabs` +(decl rv_vfsgnjx_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vfsgnjx_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VfsgnjxVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vfabs.v` instruction. +;; This instruction is a mnemonic for `vfsgnjx.vv vd, vs, vs` +(decl rv_vfabs_v (VReg VecOpMasking VState) VReg) +(rule (rv_vfabs_v vs mask vstate) (rv_vfsgnjx_vv vs vs mask vstate)) + ;; Helper for emitting the `vfsqrt.v` instruction. ;; This instruction splats the F regsiter into all elements of the destination vector. (decl rv_vfsqrt_v (VReg VecOpMasking VState) VReg) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index dfd67eb5ec2e..ae65b21a4aca 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -981,9 +981,12 @@ ;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type ty (fabs x))) +(rule 0 (lower (has_type (ty_scalar_float ty) (fabs x))) (rv_fabs ty x)) +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fabs x))) + (rv_vfabs_v x (unmasked) ty)) + ;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 0 (lower (has_type (ty_scalar_float ty) (fneg x))) (rv_fneg ty x)) @@ -992,9 +995,15 @@ (rv_vfneg_v x (unmasked) ty)) ;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type ty (fcopysign x y))) +(rule 0 (lower (has_type (ty_scalar_float ty) (fcopysign x y))) (rv_fsgnj ty x y)) +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fcopysign x y))) + (rv_vfsgnj_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fcopysign x (splat y)))) + (rv_vfsgnj_vf x y (unmasked) ty)) + ;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (fma x y z))) (rv_fmadd ty x y z)) @@ -1169,24 +1178,60 @@ (rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fdiv (splat x) y))) (rv_vfrdiv_vf y x (unmasked) ty)) -;;;; Rules for `fmin/fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule - (lower (has_type ty (fmin x y))) +(rule 0 (lower (has_type (ty_scalar_float ty) (fmin x y))) (gen_float_select (FloatSelectOP.Min) x y ty)) -(rule - (lower (has_type ty (fmin_pseudo x y))) +;; vfmin does almost the right thing, but it does not handle NaN's correctly. +;; We should return a NaN if any of the inputs is a NaN, but vfmin returns the +;; number input instead. +;; +;; TODO: We can improve this by using a masked `fmin` instruction that modifies +;; the canonical nan register. That way we could avoid the `vmerge.vv` instruction. +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmin x y))) + (let ((is_not_nan VReg (gen_fcmp_mask ty (FloatCC.Ordered) x y)) + (nan XReg (imm $I64 (canonical_nan_u64 (lane_type ty)))) + (vec_nan VReg (rv_vmv_vx nan ty)) + (min VReg (rv_vfmin_vv x y (unmasked) ty))) + (rv_vmerge_vvm vec_nan min is_not_nan ty))) + +;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_scalar_float ty) (fmax x y))) + (gen_float_select (FloatSelectOP.Max) x y ty)) + +;; vfmax does almost the right thing, but it does not handle NaN's correctly. +;; We should return a NaN if any of the inputs is a NaN, but vfmax returns the +;; number input instead. +;; +;; TODO: We can improve this by using a masked `fmax` instruction that modifies +;; the canonical nan register. That way we could avoid the `vmerge.vv` instruction. +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmax x y))) + (let ((is_not_nan VReg (gen_fcmp_mask ty (FloatCC.Ordered) x y)) + (nan XReg (imm $I64 (canonical_nan_u64 (lane_type ty)))) + (vec_nan VReg (rv_vmv_vx nan ty)) + (max VReg (rv_vfmax_vv x y (unmasked) ty))) + (rv_vmerge_vvm vec_nan max is_not_nan ty))) + +;;;; Rules for `fmin_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_scalar_float ty) (fmin_pseudo x y))) (gen_float_select_pseudo (FloatSelectOP.Min) x y ty)) -(rule - (lower (has_type ty (fmax x y))) - (gen_float_select (FloatSelectOP.Max) x y ty)) +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmin_pseudo x y))) + (let ((mask VReg (gen_fcmp_mask ty (FloatCC.LessThan) y x))) + (rv_vmerge_vvm x y mask ty))) -(rule - (lower (has_type ty (fmax_pseudo x y))) +;;;; Rules for `fmax_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_scalar_float ty) (fmax_pseudo x y))) (gen_float_select_pseudo (FloatSelectOP.Max) x y ty)) +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmax_pseudo x y))) + (let ((mask VReg (gen_fcmp_mask ty (FloatCC.LessThan) x y))) + (rv_vmerge_vvm x y mask ty))) + ;;;;; Rules for `stack_addr`;;;;;;;;; (rule (lower (stack_addr ss offset)) diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fabs.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fabs.clif new file mode 100644 index 000000000000..f41147eb48d8 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fabs.clif @@ -0,0 +1,83 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %fabs_f32x4(f32x4) -> f32x4 { +block0(v0: f32x4): + v1 = fabs v0 + return v1 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vfabs.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x92, 0x10, 0x2a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %fabs_f64x2(f64x2) -> f64x2 { +block0(v0: f64x2): + v1 = fabs v0 + return v1 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vfabs.v v4,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x92, 0x10, 0x2a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcopysign.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcopysign.clif new file mode 100644 index 000000000000..ab2fef1091c2 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcopysign.clif @@ -0,0 +1,169 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %fcopysign_f32x4(f32x4, f32x4) -> f32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcopysign v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vfsgnj.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x93, 0x11, 0x22 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %fcopysign_splat_f32x4(f32x4, f32) -> f32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcopysign v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vfsgnj.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x22 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %fcopysign_f64x2(f64x2, f64x2) -> f64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcopysign v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vfsgnj.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x93, 0x11, 0x22 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %fcopysign_splat_f64x4(f64x2, f64) -> f64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcopysign v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vfsgnj.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x22 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fmax-pseudo.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fmax-pseudo.clif new file mode 100644 index 000000000000..c1b3f21cffb4 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fmax-pseudo.clif @@ -0,0 +1,92 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %fmax_pseudo_f32x4(f32x4, f32x4) -> f32x4 { +block0(v0:f32x4, v1:f32x4): + v2 = fmax_pseudo v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vvm v8,v1,v3,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x90, 0x11, 0x6e +; .byte 0x57, 0x84, 0x11, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %fmax_pseudo_f64x2(f64x2, f64x2) -> f64x2 { +block0(v0:f64x2, v1:f64x2): + v2 = fmax_pseudo v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vvm v8,v1,v3,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x90, 0x11, 0x6e +; .byte 0x57, 0x84, 0x11, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fmax.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fmax.clif new file mode 100644 index 000000000000..eef98e78f2f3 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fmax.clif @@ -0,0 +1,116 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %fmax_f64x2(f64x2, f64x2) -> f64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fmax v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfeq.vv v6,v1,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmfeq.vv v8,v3,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmand.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma) +; auipc t4,0; ld t4,12(t4); j 12; .8byte 0x7ff8000000000000 +; vmv.v.x v14,t4 #avl=2, #vtype=(e64, m1, ta, ma) +; vfmax.vv v16,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vvm v18,v14,v16,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v18,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x93, 0x10, 0x62 +; .byte 0x57, 0x94, 0x31, 0x62 +; .byte 0x57, 0x20, 0x64, 0x66 +; auipc t4, 0 +; ld t4, 0xc(t4) +; j 0xc +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0xf8, 0x7f +; .byte 0x57, 0xc7, 0x0e, 0x5e +; .byte 0x57, 0x98, 0x11, 0x1a +; .byte 0x57, 0x09, 0xe8, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x09, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %fmax_f32x4(f32x4, f32x4) -> f32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fmax v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfeq.vv v6,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmfeq.vv v8,v3,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmand.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma) +; lui t4,523264 +; vmv.v.x v14,t4 #avl=4, #vtype=(e32, m1, ta, ma) +; vfmax.vv v16,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vvm v18,v14,v16,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v18,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x93, 0x10, 0x62 +; .byte 0x57, 0x94, 0x31, 0x62 +; .byte 0x57, 0x20, 0x64, 0x66 +; lui t4, 0x7fc00 +; .byte 0x57, 0xc7, 0x0e, 0x5e +; .byte 0x57, 0x98, 0x11, 0x1a +; .byte 0x57, 0x09, 0xe8, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x09, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fmin-pseudo.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fmin-pseudo.clif new file mode 100644 index 000000000000..608ad3767dc9 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fmin-pseudo.clif @@ -0,0 +1,92 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %fmin_pseudo_f32x4(f32x4, f32x4) -> f32x4 { +block0(v0:f32x4, v1:f32x4): + v2 = fmin_pseudo v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vv v0,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vvm v8,v1,v3,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x90, 0x30, 0x6e +; .byte 0x57, 0x84, 0x11, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %fmin_pseudo_f64x2(f64x2, f64x2) -> f64x2 { +block0(v0:f64x2, v1:f64x2): + v2 = fmin_pseudo v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vv v0,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vvm v8,v1,v3,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x90, 0x30, 0x6e +; .byte 0x57, 0x84, 0x11, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fmin.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fmin.clif new file mode 100644 index 000000000000..6a8d643db748 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fmin.clif @@ -0,0 +1,116 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %fmin_f64x2(f64x2, f64x2) -> f64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fmin v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfeq.vv v6,v1,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmfeq.vv v8,v3,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmand.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma) +; auipc t4,0; ld t4,12(t4); j 12; .8byte 0x7ff8000000000000 +; vmv.v.x v14,t4 #avl=2, #vtype=(e64, m1, ta, ma) +; vfmin.vv v16,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vvm v18,v14,v16,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v18,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x93, 0x10, 0x62 +; .byte 0x57, 0x94, 0x31, 0x62 +; .byte 0x57, 0x20, 0x64, 0x66 +; auipc t4, 0 +; ld t4, 0xc(t4) +; j 0xc +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0xf8, 0x7f +; .byte 0x57, 0xc7, 0x0e, 0x5e +; .byte 0x57, 0x98, 0x11, 0x12 +; .byte 0x57, 0x09, 0xe8, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x09, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %fmin_f32x4(f32x4, f32x4) -> f32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fmin v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfeq.vv v6,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmfeq.vv v8,v3,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmand.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma) +; lui t4,523264 +; vmv.v.x v14,t4 #avl=4, #vtype=(e32, m1, ta, ma) +; vfmin.vv v16,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vvm v18,v14,v16,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v18,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x93, 0x10, 0x62 +; .byte 0x57, 0x94, 0x31, 0x62 +; .byte 0x57, 0x20, 0x64, 0x66 +; lui t4, 0x7fc00 +; .byte 0x57, 0xc7, 0x0e, 0x5e +; .byte 0x57, 0x98, 0x11, 0x12 +; .byte 0x57, 0x09, 0xe8, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x09, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/runtests/simd-fabs.clif b/cranelift/filetests/filetests/runtests/simd-fabs.clif new file mode 100644 index 000000000000..45d63d728582 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fabs.clif @@ -0,0 +1,24 @@ +test run +target aarch64 +target s390x +target x86_64 +target riscv64 has_v + +function %fabs_f32x4(f32x4) -> f32x4 { +block0(v0: f32x4): + v1 = fabs v0 + return v1 +} +; run: %fabs_f32x4([0x0.5 -0x1.5 0x1.1p10 -0x1.4cccccp0]) == [0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0] +; run: %fabs_f32x4([0x0.0 -0x0.0 Inf -Inf]) == [0x0.0 0x0.0 Inf Inf] +; run: %fabs_f32x4([NaN -NaN Inf -Inf]) == [NaN NaN Inf Inf] + +function %fabs_f64x2(f64x2) -> f64x2 { +block0(v0: f64x2): + v1 = fabs v0 + return v1 +} +; run: %fabs_f64x2([0x0.5 -0x1.5]) == [0x0.5 0x1.5] +; run: %fabs_f64x2([0x0.0 -0x0.0]) == [0x0.0 0x0.0] +; run: %fabs_f64x2([Inf -Inf]) == [Inf Inf] +; run: %fabs_f64x2([NaN -NaN]) == [NaN NaN] diff --git a/cranelift/filetests/filetests/runtests/simd-fcopysign-64bit.clif b/cranelift/filetests/filetests/runtests/simd-fcopysign-64bit.clif index 253e4e74d6e8..8bdb5f4e5eb0 100644 --- a/cranelift/filetests/filetests/runtests/simd-fcopysign-64bit.clif +++ b/cranelift/filetests/filetests/runtests/simd-fcopysign-64bit.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target riscv64 has_v ; x86_64 and s390x do not support 64-bit vectors in `fcopysign`. function %fcopysign_f32x2(f32x2, f32x2) -> f32x2 { diff --git a/cranelift/filetests/filetests/runtests/simd-fcopysign.clif b/cranelift/filetests/filetests/runtests/simd-fcopysign.clif index 331301038785..ebabcd0c5286 100644 --- a/cranelift/filetests/filetests/runtests/simd-fcopysign.clif +++ b/cranelift/filetests/filetests/runtests/simd-fcopysign.clif @@ -2,6 +2,7 @@ test interpret test run target s390x target aarch64 +target riscv64 has_v ; x86_64 does not support SIMD fcopysign. function %fcopysign_f32x4(f32x4, f32x4) -> f32x4 { diff --git a/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-aarch64.clif index 92ffddeef20f..28dc45aaea31 100644 --- a/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-aarch64.clif +++ b/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-aarch64.clif @@ -12,6 +12,9 @@ block0(v0: f64x2, v1: f64x2): } ; run: %fmax_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [NaN:0x42 0.0] +; run: %fmax_f64x2([-NaN NaN], [0x0.0 0x100.0]) == [-NaN NaN] +; run: %fmax_f64x2([NaN 0.0], [0.0 0.0]) == [NaN 0.0] +; run: %fmax_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0] function %fmin_f64x2(f64x2, f64x2) -> f64x2 { block0(v0: f64x2, v1: f64x2): @@ -22,3 +25,4 @@ block0(v0: f64x2, v1: f64x2): ; run: %fmin_f64x2([-NaN 0x100.0], [0.0 NaN]) == [-NaN NaN] ; run: %fmin_f64x2([NaN 0.0], [0.0 0.0]) == [NaN 0.0] ; run: %fmin_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [NaN:0x42 0.0] +; run: %fmin_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0] diff --git a/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-riscv64.clif b/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-riscv64.clif new file mode 100644 index 000000000000..b13ff253c62d --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-riscv64.clif @@ -0,0 +1,29 @@ +; Test the non-deterministic aspects of the SIMD arithmetic operations. +; If you change this file, you should most likely update +; simd-arithmetic-nondeterministic*.clif as well. +test run +target riscv64gc has_v + +;; With the current implementation on RISC-V we always return a positive Canonical NaN +;; if any input is NaN. This is compatible with the spec but different from the +;; other architectures. + +function %fmax_f64x2(f64x2, f64x2) -> f64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fmax v0, v1 + return v2 +} +; run: %fmax_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [NaN 0.0] +; run: %fmax_f64x2([-NaN NaN], [0x0.0 0x100.0]) == [NaN NaN] +; run: %fmax_f64x2([NaN 0.0], [0.0 0.0]) == [NaN 0.0] +; run: %fmax_f64x2([-NaN 0.0], [0x1.0 0.0]) == [NaN 0.0] + +function %fmin_f64x2(f64x2, f64x2) -> f64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fmin v0, v1 + return v2 +} +; run: %fmin_f64x2([-NaN 0x100.0], [0.0 NaN]) == [NaN NaN] +; run: %fmin_f64x2([NaN 0.0], [0.0 0.0]) == [NaN 0.0] +; run: %fmin_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [NaN 0.0] +; run: %fmin_f64x2([-NaN 0.0], [0x1.0 0.0]) == [NaN 0.0] diff --git a/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-x86_64.clif b/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-x86_64.clif index c257bcdf008c..abe6c935abc6 100644 --- a/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-x86_64.clif +++ b/cranelift/filetests/filetests/runtests/simd-fmax-fmin-nondeterministic-x86_64.clif @@ -14,6 +14,9 @@ block0(v0: f64x2, v1: f64x2): ; note below how NaNs are quieted but (unlike fmin), retain their sign: this discrepancy is allowed by non-determinism ; in the spec, see https://webassembly.github.io/spec/core/bikeshed/index.html#nan-propagation%E2%91%A0. ; run: %fmax_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [NaN 0.0] +; run: %fmax_f64x2([-NaN NaN], [0x0.0 0x100.0]) == [-NaN NaN] +; run: %fmax_f64x2([NaN 0.0], [0.0 0.0]) == [NaN 0.0] +; run: %fmax_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0] function %fmin_f64x2(f64x2, f64x2) -> f64x2 { block0(v0: f64x2, v1: f64x2): @@ -26,3 +29,4 @@ block0(v0: f64x2, v1: f64x2): ; run: %fmin_f64x2([-NaN 0x100.0], [0.0 NaN]) == [-NaN -NaN] ; run: %fmin_f64x2([NaN 0.0], [0.0 0.0]) == [-NaN 0.0] ; run: %fmin_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [-NaN 0.0] +; run: %fmin_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0] diff --git a/cranelift/filetests/filetests/runtests/simd-fmax-fmin.clif b/cranelift/filetests/filetests/runtests/simd-fmax-fmin.clif index 5b29816e7274..06cb0387427b 100644 --- a/cranelift/filetests/filetests/runtests/simd-fmax-fmin.clif +++ b/cranelift/filetests/filetests/runtests/simd-fmax-fmin.clif @@ -3,6 +3,7 @@ target aarch64 target s390x target x86_64 target x86_64 skylake +target riscv64gc has_v function %fmax_f64x2(f64x2, f64x2) -> f64x2 { block0(v0: f64x2, v1: f64x2): @@ -12,9 +13,6 @@ block0(v0: f64x2, v1: f64x2): ; This operation exhibits non-deterministic behaviour for some input NaN values; ; refer to the simd-fmax-fmin-nondeterministic*.clif files for the respective tests. ; run: %fmax_f64x2([-0x0.0 -0x1.0], [+0x0.0 0x1.0]) == [+0x0.0 0x1.0] -; run: %fmax_f64x2([-NaN NaN], [0x0.0 0x100.0]) == [-NaN NaN] -; run: %fmax_f64x2([NaN 0.0], [0.0 0.0]) == [NaN 0.0] -; run: %fmax_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0] function %fmin_f64x2(f64x2, f64x2) -> f64x2 { block0(v0: f64x2, v1: f64x2): @@ -24,4 +22,3 @@ block0(v0: f64x2, v1: f64x2): ; This operation exhibits non-deterministic behaviour for some input NaN values; ; refer to the simd-fmax-fmin-nondeterministic*.clif files for the respective tests. ; run: %fmin_f64x2([-0x0.0 -0x1.0], [+0x0.0 0x1.0]) == [-0x0.0 -0x1.0] -; run: %fmin_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0] diff --git a/cranelift/filetests/filetests/runtests/fmin-max-pseudo-vector.clif b/cranelift/filetests/filetests/runtests/simd-fmin-max-pseudo.clif similarity index 98% rename from cranelift/filetests/filetests/runtests/fmin-max-pseudo-vector.clif rename to cranelift/filetests/filetests/runtests/simd-fmin-max-pseudo.clif index df6d5b0fc3d9..5cb46d1ad38d 100644 --- a/cranelift/filetests/filetests/runtests/fmin-max-pseudo-vector.clif +++ b/cranelift/filetests/filetests/runtests/simd-fmin-max-pseudo.clif @@ -3,6 +3,7 @@ target aarch64 ; target s390x FIXME: This currently fails under qemu due to a qemu bug target x86_64 target x86_64 skylake +target riscv64gc has_v function %fmin_pseudo_f32x4(f32x4, f32x4) -> f32x4 { block0(v0:f32x4, v1:f32x4):