From 55fa2e7ccd0f4fe2b01892fb59cd3c34ce3bfa07 Mon Sep 17 00:00:00 2001
From: Afonso Bordado <afonso360@users.noreply.github.com>
Date: Wed, 30 Aug 2023 22:10:27 +0100
Subject: [PATCH] riscv64: Implement SIMD floating point conversion
 instructions (#6924)

* riscv64: Implement SIMD `fvpromote_low`/`fvdemote`

* riscv64: Implement SIMD `fcvt_from_{u,s}int`

* riscv64: Implement SIMD `fcvt_to_{u,s}int_sat`

* riscv64: Use `i8_to_imm5` constructor
---
 build.rs                                      | 12 +--
 .../codegen/src/isa/riscv64/inst/vector.rs    | 31 ++++++--
 .../codegen/src/isa/riscv64/inst_vector.isle  | 14 ++++
 cranelift/codegen/src/isa/riscv64/lower.isle  | 53 ++++++++++---
 .../isa/riscv64/simd-fcvt-from-sint.clif      | 43 ++++++++++
 .../isa/riscv64/simd-fcvt-from-uint.clif      | 43 ++++++++++
 .../isa/riscv64/simd-fcvt-to-sint-sat.clif    | 47 +++++++++++
 .../isa/riscv64/simd-fcvt-to-uint-sat.clif    | 47 +++++++++++
 .../filetests/isa/riscv64/simd-fvdemote.clif  | 49 ++++++++++++
 .../isa/riscv64/simd-fvpromote-low.clif       | 43 ++++++++++
 .../filetests/runtests/simd-conversion.clif   | 79 -------------------
 .../runtests/simd-fcvt-from-sint.clif         | 17 ++++
 .../runtests/simd-fcvt-from-uint.clif         | 18 +++++
 .../runtests/simd-fcvt-to-sint-sat.clif       | 18 +++++
 .../runtests/simd-fcvt-to-uint-sat.clif       | 28 +++++++
 .../filetests/runtests/simd-fvdemote.clif     | 22 ++++++
 .../runtests/simd-fvpromote-low.clif          | 21 +++++
 17 files changed, 477 insertions(+), 108 deletions(-)
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-sint.clif
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-uint.clif
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-sint-sat.clif
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-uint-sat.clif
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fvdemote.clif
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fvpromote-low.clif
 delete mode 100644 cranelift/filetests/filetests/runtests/simd-conversion.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fcvt-from-sint.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fcvt-from-uint.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fcvt-to-sint-sat.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fcvt-to-uint-sat.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fvdemote.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fvpromote-low.clif

diff --git a/build.rs b/build.rs
index 4ffc8894c58b..1e60177e4a13 100644
--- a/build.rs
+++ b/build.rs
@@ -254,17 +254,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
                 return true;
             }
 
-            let known_failure = [
-                "canonicalize_nan",
-                "cvt_from_uint",
-                "issue_3327_bnot_lowering",
-                "simd_conversions",
-                "simd_i32x4_trunc_sat_f32x4",
-                "simd_i32x4_trunc_sat_f64x2",
-                "simd_load",
-                "simd_splat",
-            ]
-            .contains(&testname);
+            let known_failure = ["issue_3327_bnot_lowering"].contains(&testname);
 
             known_failure
         }
diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs
index ec3e99fd7390..78e7a2f2d3eb 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs
@@ -757,7 +757,9 @@ impl VecAluOpRR {
             | VecAluOpRR::VfcvtrtzxufV
             | VecAluOpRR::VfcvtrtzxfV
             | VecAluOpRR::VfcvtfxuV
-            | VecAluOpRR::VfcvtfxV => 0b010010,
+            | VecAluOpRR::VfcvtfxV
+            | VecAluOpRR::VfwcvtffV
+            | VecAluOpRR::VfncvtffW => 0b010010,
         }
     }
 
@@ -779,7 +781,9 @@ impl VecAluOpRR {
             | VecAluOpRR::VfcvtrtzxufV
             | VecAluOpRR::VfcvtrtzxfV
             | VecAluOpRR::VfcvtfxuV
-            | VecAluOpRR::VfcvtfxV => VecOpCategory::OPFVV,
+            | VecAluOpRR::VfcvtfxV
+            | VecAluOpRR::VfwcvtffV
+            | VecAluOpRR::VfncvtffW => VecOpCategory::OPFVV,
             VecAluOpRR::VmvVV => VecOpCategory::OPIVV,
             VecAluOpRR::VmvVX => VecOpCategory::OPIVX,
         }
@@ -806,12 +810,17 @@ impl VecAluOpRR {
             VecAluOpRR::VzextVF2 => 0b00110,
             VecAluOpRR::VsextVF2 => 0b00111,
             // VFUNARY0
+            // single-width converts
             VecAluOpRR::VfcvtxufV => 0b00000,
             VecAluOpRR::VfcvtxfV => 0b00001,
             VecAluOpRR::VfcvtrtzxufV => 0b00110,
             VecAluOpRR::VfcvtrtzxfV => 0b00111,
             VecAluOpRR::VfcvtfxuV => 0b00010,
             VecAluOpRR::VfcvtfxV => 0b00011,
+            // widening converts
+            VecAluOpRR::VfwcvtffV => 0b01100,
+            // narrowing converts
+            VecAluOpRR::VfncvtffW => 0b10100,
             // These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states:
             // > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved.
             VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0,
@@ -837,7 +846,9 @@ impl VecAluOpRR {
             | VecAluOpRR::VfcvtrtzxufV
             | VecAluOpRR::VfcvtrtzxfV
             | VecAluOpRR::VfcvtfxuV
-            | VecAluOpRR::VfcvtfxV => true,
+            | VecAluOpRR::VfcvtfxV
+            | VecAluOpRR::VfwcvtffV
+            | VecAluOpRR::VfncvtffW => true,
             VecAluOpRR::VmvSX
             | VecAluOpRR::VfmvSF
             | VecAluOpRR::VmvVV
@@ -865,7 +876,9 @@ impl VecAluOpRR {
             | VecAluOpRR::VfcvtrtzxufV
             | VecAluOpRR::VfcvtrtzxfV
             | VecAluOpRR::VfcvtfxuV
-            | VecAluOpRR::VfcvtfxV => RegClass::Vector,
+            | VecAluOpRR::VfcvtfxV
+            | VecAluOpRR::VfwcvtffV
+            | VecAluOpRR::VfncvtffW => RegClass::Vector,
             VecAluOpRR::VmvXS => RegClass::Int,
             VecAluOpRR::VfmvFS => RegClass::Float,
         }
@@ -888,7 +901,9 @@ impl VecAluOpRR {
             | VecAluOpRR::VfcvtrtzxufV
             | VecAluOpRR::VfcvtrtzxfV
             | VecAluOpRR::VfcvtfxuV
-            | VecAluOpRR::VfcvtfxV => RegClass::Vector,
+            | VecAluOpRR::VfcvtfxV
+            | VecAluOpRR::VfwcvtffV
+            | VecAluOpRR::VfncvtffW => RegClass::Vector,
             VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => RegClass::Float,
             VecAluOpRR::VmvSX | VecAluOpRR::VmvVX => RegClass::Int,
         }
@@ -902,7 +917,9 @@ impl VecAluOpRR {
             | VecAluOpRR::VzextVF8
             | VecAluOpRR::VsextVF2
             | VecAluOpRR::VsextVF4
-            | VecAluOpRR::VsextVF8 => true,
+            | VecAluOpRR::VsextVF8
+            | VecAluOpRR::VfwcvtffV
+            | VecAluOpRR::VfncvtffW => true,
             _ => false,
         }
     }
@@ -931,6 +948,8 @@ impl fmt::Display for VecAluOpRR {
             VecAluOpRR::VfcvtrtzxfV => "vfcvt.rtz.x.f.v",
             VecAluOpRR::VfcvtfxuV => "vfcvt.f.xu.v",
             VecAluOpRR::VfcvtfxV => "vfcvt.f.x.v",
+            VecAluOpRR::VfwcvtffV => "vfwcvt.f.f.v",
+            VecAluOpRR::VfncvtffW => "vfncvt.f.f.w",
         })
     }
 }
diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle
index 63db6c751146..685761e2a8d0 100644
--- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle
+++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle
@@ -291,6 +291,8 @@
   (VfcvtrtzxfV)
   (VfcvtfxuV)
   (VfcvtfxV)
+  (VfwcvtffV)
+  (VfncvtffW)
 ))
 
 ;; Returns the canonical destination type for a VecAluOpRRImm5.
@@ -1060,6 +1062,18 @@
 (rule (rv_vfcvt_f_x_v vs mask vstate)
   (vec_alu_rr (VecAluOpRR.VfcvtfxV) vs mask vstate))
 
+  ;; Helper for emitting the `vfwcvt.f.f.v` instruction.
+;; Convert single-width float to double-width float.
+(decl rv_vfwcvt_f_f_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfwcvt_f_f_v vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VfwcvtffV) vs mask vstate))
+
+;; Helper for emitting the `vfncvt.f.f.w` instruction.
+;; Convert double-width float to single-width float.
+(decl rv_vfncvt_f_f_w (VReg VecOpMasking VState) VReg)
+(rule (rv_vfncvt_f_f_w vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VfncvtffW) vs mask vstate))
+
 ;; Helper for emitting the `vslidedown.vx` instruction.
 ;; `vslidedown` moves all elements in the vector down by n elements.
 ;; The top most elements are up to the tail policy.
diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle
index 4e991a75459c..26b375f3cdf3 100644
--- a/cranelift/codegen/src/isa/riscv64/lower.isle
+++ b/cranelift/codegen/src/isa/riscv64/lower.isle
@@ -1201,10 +1201,26 @@
 (rule (lower (fpromote x))
   (rv_fcvtds x))
 
+;;;;;  Rules for `fvpromote_low`;;;;;;;;;;;;
+
+(rule (lower (has_type (ty_vec_fits_in_register ty) (fvpromote_low x)))
+  (if-let half_ty (ty_half_width ty))
+  (rv_vfwcvt_f_f_v x (unmasked) (vstate_mf2 half_ty)))
+
 ;;;;;  Rules for `fdemote`;;;;;;;;;;;;;;;;;;
 (rule (lower (fdemote x))
   (rv_fcvtsd x))
 
+;;;;;  Rules for `fvdemote`;;;;;;;;;;;;;;;;;
+
+;; `vfncvt...` leaves the upper bits of the register undefined so
+;; we need to zero them out.
+(rule (lower (has_type (ty_vec_fits_in_register ty @ $F32X4) (fvdemote x)))
+  (if-let zero (i8_to_imm5 0))
+  (let ((narrow VReg (rv_vfncvt_f_f_w x (unmasked) (vstate_mf2 ty)))
+        (mask VReg (gen_vec_mask 0xC)))
+    (rv_vmerge_vim narrow zero mask ty)))
+
 
 ;;;;;  Rules for for float arithmetic
 
@@ -1613,39 +1629,52 @@
   (gen_fcvt_int $false v $false from to))
 
 ;;;;;  Rules for `fcvt_to_sint`;;;;;;;;;
-(rule
-  (lower (has_type to (fcvt_to_sint v @ (value_type from))))
+(rule 0 (lower (has_type to (fcvt_to_sint v @ (value_type (ty_scalar_float from)))))
   (gen_fcvt_int $false v $true from to))
 
 ;;;;;  Rules for `fcvt_to_sint_sat`;;;;;;;;;
-(rule
-  (lower (has_type to (fcvt_to_sint_sat v @ (value_type from))))
+(rule 0 (lower (has_type to (fcvt_to_sint_sat v @ (value_type (ty_scalar_float from)))))
   (gen_fcvt_int $true v $true from to))
 
+(rule 1 (lower (has_type (ty_vec_fits_in_register _) (fcvt_to_sint_sat v @ (value_type from_ty))))
+  (if-let zero (i8_to_imm5 0))
+  (let ((is_nan VReg (rv_vmfne_vv v v (unmasked) from_ty))
+        (cvt VReg (rv_vfcvt_rtz_x_f_v v (unmasked) from_ty)))
+    (rv_vmerge_vim cvt zero is_nan from_ty)))
+
 ;;;;;  Rules for `fcvt_to_uint_sat`;;;;;;;;;
-(rule
-  (lower (has_type to (fcvt_to_uint_sat v @ (value_type from))))
+(rule 0 (lower (has_type to (fcvt_to_uint_sat v @ (value_type (ty_scalar_float from)))))
   (gen_fcvt_int $true v $false from to))
 
+(rule 1 (lower (has_type (ty_vec_fits_in_register _) (fcvt_to_uint_sat v @ (value_type from_ty))))
+  (if-let zero (i8_to_imm5 0))
+  (let ((is_nan VReg (rv_vmfne_vv v v (unmasked) from_ty))
+        (cvt VReg (rv_vfcvt_rtz_xu_f_v v (unmasked) from_ty)))
+    (rv_vmerge_vim cvt zero is_nan from_ty)))
+
 ;;;;;  Rules for `fcvt_from_sint`;;;;;;;;;
-(rule
-  (lower (has_type to (fcvt_from_sint v @ (value_type from_ty))))
+(rule 0 (lower (has_type (ty_scalar_float to) (fcvt_from_sint v @ (value_type from_ty))))
   (let ((float_op FpuOPRR (int_convert_2_float_op from_ty $true to))
         (value XReg (normalize_fcvt_from_int v from_ty (ExtendOp.Signed))))
     (fpu_rr float_op to value)))
 
+(rule 1 (lower (has_type (ty_vec_fits_in_register _) (fcvt_from_sint v @ (value_type from_ty))))
+  (rv_vfcvt_f_x_v v (unmasked) from_ty))
+
 ;;;;;  Rules for `fcvt_from_uint`;;;;;;;;;
-(rule
-  (lower (has_type to (fcvt_from_uint v @ (value_type from_ty))))
+(rule 0 (lower (has_type (ty_scalar_float to) (fcvt_from_uint v @ (value_type from_ty))))
   (let ((float_op FpuOPRR (int_convert_2_float_op from_ty $false to))
         (value XReg (normalize_fcvt_from_int v from_ty (ExtendOp.Zero))))
     (fpu_rr float_op to value)))
 
+(rule 1 (lower (has_type (ty_vec_fits_in_register _) (fcvt_from_uint v @ (value_type from_ty))))
+  (rv_vfcvt_f_xu_v v (unmasked) from_ty))
+
 ;;;;;  Rules for `symbol_value`;;;;;;;;;
 (rule
    (lower (symbol_value (symbol_value_data name _ offset)))
-   (load_ext_name name offset)
-)
+   (load_ext_name name offset))
+
 ;;;;;  Rules for `bitcast`;;;;;;;;;
 (rule
    (lower (has_type out_ty (bitcast _ v @ (value_type in_ty))))
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-sint.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-sint.clif
new file mode 100644
index 000000000000..19d4b7beeff3
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-sint.clif
@@ -0,0 +1,43 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %fcvt_from_sint(i32x4) -> f32x4 {
+block0(v0: i32x4):
+    v1 = fcvt_from_sint.f32x4 v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfcvt.f.x.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x92, 0x11, 0x4a
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-uint.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-uint.clif
new file mode 100644
index 000000000000..a774fd592998
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-from-uint.clif
@@ -0,0 +1,43 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %fcvt_from_uint(i32x4) -> f32x4 {
+block0(v0: i32x4):
+    v1 = fcvt_from_uint.f32x4 v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfcvt.f.xu.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x12, 0x11, 0x4a
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-sint-sat.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-sint-sat.clif
new file mode 100644
index 000000000000..c8639dde8e88
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-sint-sat.clif
@@ -0,0 +1,47 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %fcvt_to_sint_sat(f32x4) -> i32x4 {
+block0(v0:f32x4):
+    v1 = fcvt_to_sint_sat.i32x4 v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfne.vv v0,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vfcvt.rtz.x.f.v v6,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v8,v6,0,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x90, 0x10, 0x72
+;   .byte 0x57, 0x93, 0x13, 0x4a
+;   .byte 0x57, 0x34, 0x60, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-uint-sat.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-uint-sat.clif
new file mode 100644
index 000000000000..78db69d4470d
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcvt-to-uint-sat.clif
@@ -0,0 +1,47 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %fcvt_to_uint_sat(f32x4) -> i32x4 {
+block0(v0:f32x4):
+    v1 = fcvt_to_uint_sat.i32x4 v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfne.vv v0,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vfcvt.rtz.xu.f.v v6,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v8,v6,0,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x90, 0x10, 0x72
+;   .byte 0x57, 0x13, 0x13, 0x4a
+;   .byte 0x57, 0x34, 0x60, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fvdemote.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fvdemote.clif
new file mode 100644
index 000000000000..1422b4d3e632
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fvdemote.clif
@@ -0,0 +1,49 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %fvdemote(f64x2) -> f32x4 {
+block0(v0: f64x2):
+    v1 = fvdemote v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfncvt.f.f.w v4,v1 #avl=4, #vtype=(e32, mf2, ta, ma)
+;   vmv.v.i v0,12 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v8,v4,0,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x72, 0xcd
+;   .byte 0x57, 0x12, 0x1a, 0x4a
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x30, 0x06, 0x5e
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x34, 0x40, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fvpromote-low.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fvpromote-low.clif
new file mode 100644
index 000000000000..355a132edcf0
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fvpromote-low.clif
@@ -0,0 +1,43 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %fvpromote_low(f32x4) -> f64x2 {
+block0(v0: f32x4):
+    v1 = fvpromote_low v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfwcvt.f.f.v v4,v1 #avl=2, #vtype=(e32, mf2, ta, ma)
+;   vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x71, 0xcd
+;   .byte 0x57, 0x12, 0x16, 0x4a
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/runtests/simd-conversion.clif b/cranelift/filetests/filetests/runtests/simd-conversion.clif
deleted file mode 100644
index 86219ff48db3..000000000000
--- a/cranelift/filetests/filetests/runtests/simd-conversion.clif
+++ /dev/null
@@ -1,79 +0,0 @@
-test interpret
-test run
-target aarch64
-target s390x
-target x86_64
-target x86_64 sse41
-target x86_64 sse42
-target x86_64 sse42 has_avx
-
-function %fcvt_from_sint(i32x4) -> f32x4 {
-block0(v0: i32x4):
-    v1 = fcvt_from_sint.f32x4 v0
-    return v1
-}
-; run: %fcvt_from_sint([-1 0 1 123456789]) == [-0x1.0 0.0 0x1.0 0x75bcd18.0]
-; Note that 123456789 rounds to 123456792.0, an error of 3
-
-function %fcvt_from_uint(i32x4) -> f32x4 {
-block0(v0: i32x4):
-    v1 = fcvt_from_uint.f32x4 v0
-    return v1
-}
-; run: %fcvt_from_uint([0 0 0 0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
-; run: %fcvt_from_uint([0xFFFFFFFF 0 1 123456789]) == [0x100000000.0 0.0 0x1.0 0x75bcd18.0]
-; Note that 0xFFFFFFFF is decimal 4,294,967,295 and is rounded up 1 to 4,294,967,296 in f32x4.
-
-function %fcvt_to_sint_sat(f32x4) -> i32x4 {
-block0(v0:f32x4):
-    v1 = fcvt_to_sint_sat.i32x4 v0
-    return v1
-}
-; run: %fcvt_to_sint_sat([0x0.0 -0x1.0 0x1.0 0x1.0p100]) == [0 -1 1 0x7FFFFFFF]
-; run: %fcvt_to_sint_sat([-0x8.1 0x0.0 0x0.0 -0x1.0p100]) == [-8 0 0 0x80000000]
-; run: %fcvt_to_sint_sat([+NaN +NaN +NaN +NaN]) == [0 0 0 0]
-
-function %fcvt_to_uint_sat(f32x4) -> i32x4 {
-block0(v0:f32x4):
-    v1 = fcvt_to_uint_sat.i32x4 v0
-    return v1
-}
-; run: %fcvt_to_uint_sat([0x1.0 0x4.2 0x4.6 0x1.0p100]) == [1 4 4 0xFFFFFFFF]
-; run: %fcvt_to_uint_sat([-0x8.1 -0x0.0 0x0.0 -0x1.0p100]) == [0 0 0 0]
-; run: %fcvt_to_uint_sat([0xB2D05E00.0 0.0 0.0 0.0]) == [3000000000 0 0 0]
-; run: %fcvt_to_uint_sat([+NaN +NaN +NaN +NaN]) == [0 0 0 0]
-
-function %fcvt_low_from_sint(i32x4) -> f64x2 {
-block0(v0: i32x4):
-    v1 = swiden_low v0
-    v2 = fcvt_from_sint.f64x2 v1
-    return v2
-}
-; run: %fcvt_low_from_sint([0 1 -1 65535]) == [0x0.0 0x1.0]
-; run: %fcvt_low_from_sint([-1 123456789 0 1]) == [-0x1.0 0x1.d6f3454p26]
-
-function %fvdemote(f64x2) -> f32x4 {
-block0(v0: f64x2):
-    v1 = fvdemote v0
-    return v1
-}
-
-; run: %fvdemote([0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
-; run: %fvdemote([0x0.1 0x0.2]) == [0x0.1 0x0.2 0x0.0 0x0.0]
-; run: %fvdemote([0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
-; run: %fvdemote([0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
-; run: %fvdemote([0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
-
-
-function %fvpromote_low(f32x4) -> f64x2 {
-block0(v0: f32x4):
-    v1 = fvpromote_low v0
-    return v1
-}
-
-; run: %fvpromote_low([0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0]
-; run: %fvpromote_low([0x0.1 0x0.2 0x0.0 0x0.0]) == [0x0.1 0x0.2]
-; run: %fvpromote_low([0x2.1 0x1.2 0x0.0 0x0.0]) == [0x2.1 0x1.2]
-; run: %fvpromote_low([0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0]
-; run: %fvpromote_low([0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0]
-
diff --git a/cranelift/filetests/filetests/runtests/simd-fcvt-from-sint.clif b/cranelift/filetests/filetests/runtests/simd-fcvt-from-sint.clif
new file mode 100644
index 000000000000..a746827d2a13
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fcvt-from-sint.clif
@@ -0,0 +1,17 @@
+test interpret
+test run
+target aarch64
+target s390x
+target x86_64
+target x86_64 sse41
+target x86_64 sse42
+target x86_64 sse42 has_avx
+target riscv64 has_v
+
+function %fcvt_from_sint(i32x4) -> f32x4 {
+block0(v0: i32x4):
+    v1 = fcvt_from_sint.f32x4 v0
+    return v1
+}
+; run: %fcvt_from_sint([-1 0 1 123456789]) == [-0x1.0 0.0 0x1.0 0x75bcd18.0]
+; Note that 123456789 rounds to 123456792.0, an error of 3
diff --git a/cranelift/filetests/filetests/runtests/simd-fcvt-from-uint.clif b/cranelift/filetests/filetests/runtests/simd-fcvt-from-uint.clif
new file mode 100644
index 000000000000..1cf1f1a544d8
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fcvt-from-uint.clif
@@ -0,0 +1,18 @@
+test interpret
+test run
+target aarch64
+target s390x
+target x86_64
+target x86_64 sse41
+target x86_64 sse42
+target x86_64 sse42 has_avx
+target riscv64 has_v
+
+function %fcvt_from_uint(i32x4) -> f32x4 {
+block0(v0: i32x4):
+    v1 = fcvt_from_uint.f32x4 v0
+    return v1
+}
+; run: %fcvt_from_uint([0 0 0 0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
+; run: %fcvt_from_uint([0xFFFFFFFF 0 1 123456789]) == [0x100000000.0 0.0 0x1.0 0x75bcd18.0]
+; Note that 0xFFFFFFFF is decimal 4,294,967,295 and is rounded up 1 to 4,294,967,296 in f32x4.
diff --git a/cranelift/filetests/filetests/runtests/simd-fcvt-to-sint-sat.clif b/cranelift/filetests/filetests/runtests/simd-fcvt-to-sint-sat.clif
new file mode 100644
index 000000000000..d08955322e11
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fcvt-to-sint-sat.clif
@@ -0,0 +1,18 @@
+test interpret
+test run
+target aarch64
+target s390x
+target x86_64
+target x86_64 sse41
+target x86_64 sse42
+target x86_64 sse42 has_avx
+target riscv64 has_v
+
+function %fcvt_to_sint_sat(f32x4) -> i32x4 {
+block0(v0:f32x4):
+    v1 = fcvt_to_sint_sat.i32x4 v0
+    return v1
+}
+; run: %fcvt_to_sint_sat([0x0.0 -0x1.0 0x1.0 0x1.0p100]) == [0 -1 1 0x7FFFFFFF]
+; run: %fcvt_to_sint_sat([-0x8.1 0x0.0 0x0.0 -0x1.0p100]) == [-8 0 0 0x80000000]
+; run: %fcvt_to_sint_sat([+NaN +NaN +NaN +NaN]) == [0 0 0 0]
diff --git a/cranelift/filetests/filetests/runtests/simd-fcvt-to-uint-sat.clif b/cranelift/filetests/filetests/runtests/simd-fcvt-to-uint-sat.clif
new file mode 100644
index 000000000000..4bb1d105274e
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fcvt-to-uint-sat.clif
@@ -0,0 +1,28 @@
+test interpret
+test run
+target aarch64
+target s390x
+target x86_64
+target x86_64 sse41
+target x86_64 sse42
+target x86_64 sse42 has_avx
+target riscv64 has_v
+
+function %fcvt_to_uint_sat(f32x4) -> i32x4 {
+block0(v0:f32x4):
+    v1 = fcvt_to_uint_sat.i32x4 v0
+    return v1
+}
+; run: %fcvt_to_uint_sat([0x1.0 0x4.2 0x4.6 0x1.0p100]) == [1 4 4 0xFFFFFFFF]
+; run: %fcvt_to_uint_sat([-0x8.1 -0x0.0 0x0.0 -0x1.0p100]) == [0 0 0 0]
+; run: %fcvt_to_uint_sat([0xB2D05E00.0 0.0 0.0 0.0]) == [3000000000 0 0 0]
+; run: %fcvt_to_uint_sat([+NaN +NaN +NaN +NaN]) == [0 0 0 0]
+
+function %fcvt_low_from_sint(i32x4) -> f64x2 {
+block0(v0: i32x4):
+    v1 = swiden_low v0
+    v2 = fcvt_from_sint.f64x2 v1
+    return v2
+}
+; run: %fcvt_low_from_sint([0 1 -1 65535]) == [0x0.0 0x1.0]
+; run: %fcvt_low_from_sint([-1 123456789 0 1]) == [-0x1.0 0x1.d6f3454p26]
diff --git a/cranelift/filetests/filetests/runtests/simd-fvdemote.clif b/cranelift/filetests/filetests/runtests/simd-fvdemote.clif
new file mode 100644
index 000000000000..2290785e340e
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fvdemote.clif
@@ -0,0 +1,22 @@
+test interpret
+test run
+target aarch64
+target s390x
+target x86_64
+target x86_64 sse41
+target x86_64 sse42
+target x86_64 sse42 has_avx
+target riscv64 has_v
+
+function %fvdemote(f64x2) -> f32x4 {
+block0(v0: f64x2):
+    v1 = fvdemote v0
+    return v1
+}
+
+; run: %fvdemote([0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
+; run: %fvdemote([0x0.1 0x0.2]) == [0x0.1 0x0.2 0x0.0 0x0.0]
+; run: %fvdemote([0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
+; run: %fvdemote([0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
+; run: %fvdemote([0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0]
+
diff --git a/cranelift/filetests/filetests/runtests/simd-fvpromote-low.clif b/cranelift/filetests/filetests/runtests/simd-fvpromote-low.clif
new file mode 100644
index 000000000000..5c2d8d5e81d8
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fvpromote-low.clif
@@ -0,0 +1,21 @@
+test interpret
+test run
+target aarch64
+target s390x
+target x86_64
+target x86_64 sse41
+target x86_64 sse42
+target x86_64 sse42 has_avx
+target riscv64 has_v
+
+function %fvpromote_low(f32x4) -> f64x2 {
+block0(v0: f32x4):
+    v1 = fvpromote_low v0
+    return v1
+}
+
+; run: %fvpromote_low([0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0]
+; run: %fvpromote_low([0x0.1 0x0.2 0x0.0 0x0.0]) == [0x0.1 0x0.2]
+; run: %fvpromote_low([0x2.1 0x1.2 0x0.0 0x0.0]) == [0x2.1 0x1.2]
+; run: %fvpromote_low([0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0]
+; run: %fvpromote_low([0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0]