riscv64: Add Float SIMD lowerings for some instructions (bytecodealli…

…ance#6403) * riscv64: Add SIMD `fadd` lowerings * riscv64: Add SIMD `fsub` lowerings * riscv64: Add SIMD `fmul` lowerings * riscv64: Add SIMD `fdiv` lowerings * riscv64: Add SIMD `fneg` lowerings * riscv64: Add SIMD `sqrt` lowerings * wasmtime: Enable float SIMD arithmetic tests for RISC-V
gurry · May 18, 2023 · c8e1e76 · c8e1e76
1 parent 8756fcf
commit c8e1e76
Show file tree

Hide file tree

Showing 18 changed files with 1,500 additions and 17 deletions.
diff --git a/build.rs b/build.rs
@@ -217,12 +217,10 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
                 "simd_boolean",
                 "simd_conversions",
                 "simd_f32x4",
-                "simd_f32x4_arith",
                 "simd_f32x4_cmp",
                 "simd_f32x4_pmin_pmax",
                 "simd_f32x4_rounding",
                 "simd_f64x2",
-                "simd_f64x2_arith",
                 "simd_f64x2_cmp",
                 "simd_f64x2_pmin_pmax",
                 "simd_f64x2_rounding",

diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
@@ -1580,10 +1580,13 @@ impl Inst {
 
                 // Note: vs2 and vs1 here are opposite to the standard scalar ordering.
                 // This is noted in Section 10.1 of the RISC-V Vector spec.
-                match (op, vs1) {
-                    (VecAluOpRRR::VrsubVX, vs1) if vs1 == zero_reg() => {
+                match (op, vs2, vs1) {
+                    (VecAluOpRRR::VrsubVX, _, vs1) if vs1 == zero_reg() => {
                         format!("vneg.v {},{} {}", vd_s, vs2_s, vstate)
                     }
+                    (VecAluOpRRR::VfsgnjnVV, vs2, vs1) if vs2 == vs1 => {
+                        format!("vfneg.v {},{} {}", vd_s, vs2_s, vstate)
+                    }
                     _ => format!("{} {},{},{} {}", op, vd_s, vs2_s, vs1_s, vstate),
                 }
             }

diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs
@@ -251,16 +251,26 @@ impl VecAluOpRRR {
     pub fn funct6(&self) -> u32 {
         // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
         match self {
-            VecAluOpRRR::VaddVV | VecAluOpRRR::VaddVX => 0b000000,
-            VecAluOpRRR::VsubVV | VecAluOpRRR::VsubVX => 0b000010,
+            VecAluOpRRR::VaddVV
+            | VecAluOpRRR::VaddVX
+            | VecAluOpRRR::VfaddVV
+            | VecAluOpRRR::VfaddVF => 0b000000,
+            VecAluOpRRR::VsubVV
+            | VecAluOpRRR::VsubVX
+            | VecAluOpRRR::VfsubVV
+            | VecAluOpRRR::VfsubVF => 0b000010,
             VecAluOpRRR::VrsubVX => 0b000011,
             VecAluOpRRR::VmulVV => 0b100101,
             VecAluOpRRR::VmulhVV => 0b100111,
-            VecAluOpRRR::VmulhuVV => 0b100100,
+            VecAluOpRRR::VmulhuVV | VecAluOpRRR::VfmulVV | VecAluOpRRR::VfmulVF => 0b100100,
             VecAluOpRRR::VandVV => 0b001001,
             VecAluOpRRR::VorVV => 0b001010,
             VecAluOpRRR::VxorVV => 0b001011,
             VecAluOpRRR::VslidedownVX => 0b001111,
+            VecAluOpRRR::VfrsubVF => 0b100111,
+            VecAluOpRRR::VfdivVV | VecAluOpRRR::VfdivVF => 0b100000,
+            VecAluOpRRR::VfrdivVF => 0b100001,
+            VecAluOpRRR::VfsgnjnVV => 0b001001,
         }
     }
 
@@ -278,6 +288,17 @@ impl VecAluOpRRR {
             | VecAluOpRRR::VsubVX
             | VecAluOpRRR::VrsubVX
             | VecAluOpRRR::VslidedownVX => VecOpCategory::OPIVX,
+            VecAluOpRRR::VfaddVV
+            | VecAluOpRRR::VfsubVV
+            | VecAluOpRRR::VfmulVV
+            | VecAluOpRRR::VfdivVV
+            | VecAluOpRRR::VfsgnjnVV => VecOpCategory::OPFVV,
+            VecAluOpRRR::VfaddVF
+            | VecAluOpRRR::VfsubVF
+            | VecAluOpRRR::VfrsubVF
+            | VecAluOpRRR::VfmulVF
+            | VecAluOpRRR::VfdivVF
+            | VecAluOpRRR::VfrdivVF => VecOpCategory::OPFVF,
         }
     }
 
@@ -360,6 +381,7 @@ impl VecAluOpRR {
             VecAluOpRR::VmvSX | VecAluOpRR::VmvXS | VecAluOpRR::VfmvSF | VecAluOpRR::VfmvFS => {
                 0b010000
             }
+            VecAluOpRR::VfsqrtV => 0b010011,
             VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0b010111,
         }
     }
@@ -369,7 +391,7 @@ impl VecAluOpRR {
             VecAluOpRR::VmvSX => VecOpCategory::OPMVX,
             VecAluOpRR::VmvXS => VecOpCategory::OPMVV,
             VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => VecOpCategory::OPFVF,
-            VecAluOpRR::VfmvFS => VecOpCategory::OPFVV,
+            VecAluOpRR::VfmvFS | VecAluOpRR::VfsqrtV => VecOpCategory::OPFVV,
             VecAluOpRR::VmvVV => VecOpCategory::OPIVV,
             VecAluOpRR::VmvVX => VecOpCategory::OPIVX,
         }
@@ -386,6 +408,8 @@ impl VecAluOpRR {
             VecAluOpRR::VfmvSF => 0b00000,
             // VWFUNARY0
             VecAluOpRR::VfmvFS => 0b00000,
+            // VFUNARY1
+            VecAluOpRR::VfsqrtV => 0b00000,
             // These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states:
             // > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved.
             VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0,
@@ -397,7 +421,11 @@ impl VecAluOpRR {
     /// other way around. As far as I can tell only vmv.v.* are backwards.
     pub fn vs_is_vs2_encoded(&self) -> bool {
         match self {
-            VecAluOpRR::VmvSX | VecAluOpRR::VmvXS | VecAluOpRR::VfmvSF | VecAluOpRR::VfmvFS => true,
+            VecAluOpRR::VmvSX
+            | VecAluOpRR::VmvXS
+            | VecAluOpRR::VfmvSF
+            | VecAluOpRR::VfmvFS
+            | VecAluOpRR::VfsqrtV => true,
             VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => false,
         }
     }
@@ -408,15 +436,18 @@ impl VecAluOpRR {
             | VecAluOpRR::VmvSX
             | VecAluOpRR::VmvVV
             | VecAluOpRR::VmvVX
-            | VecAluOpRR::VfmvVF => RegClass::Vector,
+            | VecAluOpRR::VfmvVF
+            | VecAluOpRR::VfsqrtV => RegClass::Vector,
             VecAluOpRR::VmvXS => RegClass::Int,
             VecAluOpRR::VfmvFS => RegClass::Float,
         }
     }
 
     pub fn src_regclass(&self) -> RegClass {
         match self {
-            VecAluOpRR::VmvXS | VecAluOpRR::VfmvFS | VecAluOpRR::VmvVV => RegClass::Vector,
+            VecAluOpRR::VmvXS | VecAluOpRR::VfmvFS | VecAluOpRR::VmvVV | VecAluOpRR::VfsqrtV => {
+                RegClass::Vector
+            }
             VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => RegClass::Float,
             VecAluOpRR::VmvSX | VecAluOpRR::VmvVX => RegClass::Int,
         }
@@ -430,6 +461,7 @@ impl fmt::Display for VecAluOpRR {
             VecAluOpRR::VmvXS => "vmv.x.s",
             VecAluOpRR::VfmvSF => "vfmv.s.f",
             VecAluOpRR::VfmvFS => "vfmv.f.s",
+            VecAluOpRR::VfsqrtV => "vfsqrt.v",
             VecAluOpRR::VmvVV => "vmv.v.v",
             VecAluOpRR::VmvVX => "vmv.v.x",
             VecAluOpRR::VfmvVF => "vfmv.v.f",

diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle
@@ -92,12 +92,23 @@
   (VandVV)
   (VorVV)
   (VxorVV)
+  (VfaddVV)
+  (VfsubVV)
+  (VfmulVV)
+  (VfdivVV)
+  (VfsgnjnVV)
 
   ;; Vector-Scalar Opcodes
   (VaddVX)
   (VsubVX)
   (VrsubVX)
   (VslidedownVX)
+  (VfaddVF)
+  (VfsubVF)
+  (VfrsubVF)
+  (VfmulVF)
+  (VfdivVF)
+  (VfrdivVF)
 ))
 
 ;; Register-Imm ALU Ops
@@ -125,6 +136,7 @@
   (VmvVV)
   (VmvVX)
   (VfmvVF)
+  (VfsqrtV)
 ))
 
 ;; Returns the canonical destination type for a VecAluOpRRImm5.
@@ -307,6 +319,73 @@
 (rule (rv_vxor_vv vs2 vs1 vstate)
   (vec_alu_rrr (VecAluOpRRR.VxorVV) vs2 vs1 vstate))
 
+;; Helper for emitting the `vfadd.vv` instruction.
+(decl rv_vfadd_vv (Reg Reg VState) Reg)
+(rule (rv_vfadd_vv vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfaddVV) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfadd.vf` instruction.
+(decl rv_vfadd_vf (Reg Reg VState) Reg)
+(rule (rv_vfadd_vf vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfaddVF) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfsub.vv` instruction.
+(decl rv_vfsub_vv (Reg Reg VState) Reg)
+(rule (rv_vfsub_vv vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsubVV) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfsub.vf` instruction.
+(decl rv_vfsub_vf (Reg Reg VState) Reg)
+(rule (rv_vfsub_vf vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsubVF) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfrsub.vf` instruction.
+(decl rv_vfrsub_vf (Reg Reg VState) Reg)
+(rule (rv_vfrsub_vf vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfrsubVF) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfmul.vv` instruction.
+(decl rv_vfmul_vv (Reg Reg VState) Reg)
+(rule (rv_vfmul_vv vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfmulVV) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfmul.vf` instruction.
+(decl rv_vfmul_vf (Reg Reg VState) Reg)
+(rule (rv_vfmul_vf vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfmulVF) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfdiv.vv` instruction.
+(decl rv_vfdiv_vv (Reg Reg VState) Reg)
+(rule (rv_vfdiv_vv vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfdivVV) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfdiv.vf` instruction.
+(decl rv_vfdiv_vf (Reg Reg VState) Reg)
+(rule (rv_vfdiv_vf vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfdivVF) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfrdiv.vf` instruction.
+(decl rv_vfrdiv_vf (Reg Reg VState) Reg)
+(rule (rv_vfrdiv_vf vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfrdivVF) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfsgnjn.vv` ("Floating Point Sign Injection Negated") instruction.
+;; The output of this instruction is `vs2` with the negated sign bit from `vs1`
+(decl rv_vfsgnjn_vv (Reg Reg VState) Reg)
+(rule (rv_vfsgnjn_vv vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsgnjnVV) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfneg.v` instruction.
+;; This instruction is a mnemonic for `vfsgnjn.vv vd, vs, vs`
+(decl rv_vfneg_v (Reg VState) Reg)
+(rule (rv_vfneg_v vs vstate) (rv_vfsgnjn_vv vs vs vstate))
+
+;; Helper for emitting the `vfsqrt.v` instruction.
+;; This instruction splats the F regsiter into all elements of the destination vector.
+(decl rv_vfsqrt_v (Reg VState) Reg)
+(rule (rv_vfsqrt_v vs vstate)
+  (vec_alu_rr (VecAluOpRR.VfsqrtV) vs vstate))
+
 ;; Helper for emitting the `vslidedown.vx` instruction.
 ;; `vslidedown` moves all elements in the vector down by n elements.
 ;; The top most elements are up to the tail policy.

diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle
@@ -584,9 +584,12 @@
   (rv_fabs ty x))
 
 ;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type ty (fneg x)))
+(rule 0 (lower (has_type (ty_scalar_float ty) (fneg x)))
   (rv_fneg ty x))
 
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fneg x)))
+  (rv_vfneg_v x ty))
+
 ;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 (rule (lower (has_type ty (fcopysign x y)))
   (rv_fsgnj ty x y))
@@ -597,9 +600,11 @@
 
 
 ;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type ty (sqrt x)))
+(rule 0 (lower (has_type (ty_scalar_float ty) (sqrt x)))
   (rv_fsqrt ty x))
 
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (sqrt x)))
+  (rv_vfsqrt_v x ty))
 
 ;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 (rule -1
@@ -706,18 +711,65 @@
 
 
 ;;;;;  Rules for for float arithmetic
-(rule (lower (has_type ty (fadd x y)))
+
+
+;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_scalar_float ty) (fadd x y)))
   (rv_fadd ty x y))
 
-(rule (lower (has_type ty (fsub x y)))
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fadd x y)))
+  (rv_vfadd_vv x y ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fadd x (splat y))))
+  (rv_vfadd_vf x y ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fadd (splat x) y)))
+  (rv_vfadd_vf y x ty))
+
+
+;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_scalar_float ty) (fsub x y)))
   (rv_fsub ty x y))
 
-(rule (lower (has_type ty (fmul x y)))
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fsub x y)))
+  (rv_vfsub_vv x y ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fsub x (splat y))))
+  (rv_vfsub_vf x y ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fsub (splat x) y)))
+  (rv_vfrsub_vf y x ty))
+
+;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_scalar_float ty) (fmul x y)))
   (rv_fmul ty x y))
 
-(rule (lower (has_type ty (fdiv x y)))
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmul x y)))
+  (rv_vfmul_vv x y ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fmul x (splat y))))
+  (rv_vfmul_vf x y ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fmul (splat x) y)))
+  (rv_vfmul_vf y x ty))
+
+
+;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_scalar_float ty) (fdiv x y)))
   (rv_fdiv ty x y))
 
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fdiv x y)))
+  (rv_vfdiv_vv x y ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fdiv x (splat y))))
+  (rv_vfdiv_vf x y ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fdiv (splat x) y)))
+  (rv_vfrdiv_vf y x ty))
+
+;;;; Rules for `fmin/fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
 (rule
   (lower (has_type ty (fmin x y)))
   (gen_float_select (FloatSelectOP.Min) x y ty))