diff --git a/build.rs b/build.rs
index 5825af1be0e3..682187eaf17c 100644
--- a/build.rs
+++ b/build.rs
@@ -217,12 +217,10 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
                 "simd_boolean",
                 "simd_conversions",
                 "simd_f32x4",
-                "simd_f32x4_arith",
                 "simd_f32x4_cmp",
                 "simd_f32x4_pmin_pmax",
                 "simd_f32x4_rounding",
                 "simd_f64x2",
-                "simd_f64x2_arith",
                 "simd_f64x2_cmp",
                 "simd_f64x2_pmin_pmax",
                 "simd_f64x2_rounding",
diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
index 4a13ca4c62cb..6324484eebbb 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
@@ -1580,10 +1580,13 @@ impl Inst {
 
                 // Note: vs2 and vs1 here are opposite to the standard scalar ordering.
                 // This is noted in Section 10.1 of the RISC-V Vector spec.
-                match (op, vs1) {
-                    (VecAluOpRRR::VrsubVX, vs1) if vs1 == zero_reg() => {
+                match (op, vs2, vs1) {
+                    (VecAluOpRRR::VrsubVX, _, vs1) if vs1 == zero_reg() => {
                         format!("vneg.v {},{} {}", vd_s, vs2_s, vstate)
                     }
+                    (VecAluOpRRR::VfsgnjnVV, vs2, vs1) if vs2 == vs1 => {
+                        format!("vfneg.v {},{} {}", vd_s, vs2_s, vstate)
+                    }
                     _ => format!("{} {},{},{} {}", op, vd_s, vs2_s, vs1_s, vstate),
                 }
             }
diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs
index a45cf39b3369..8d21ef9ebe57 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs
@@ -251,16 +251,26 @@ impl VecAluOpRRR {
     pub fn funct6(&self) -> u32 {
         // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
         match self {
-            VecAluOpRRR::VaddVV | VecAluOpRRR::VaddVX => 0b000000,
-            VecAluOpRRR::VsubVV | VecAluOpRRR::VsubVX => 0b000010,
+            VecAluOpRRR::VaddVV
+            | VecAluOpRRR::VaddVX
+            | VecAluOpRRR::VfaddVV
+            | VecAluOpRRR::VfaddVF => 0b000000,
+            VecAluOpRRR::VsubVV
+            | VecAluOpRRR::VsubVX
+            | VecAluOpRRR::VfsubVV
+            | VecAluOpRRR::VfsubVF => 0b000010,
             VecAluOpRRR::VrsubVX => 0b000011,
             VecAluOpRRR::VmulVV => 0b100101,
             VecAluOpRRR::VmulhVV => 0b100111,
-            VecAluOpRRR::VmulhuVV => 0b100100,
+            VecAluOpRRR::VmulhuVV | VecAluOpRRR::VfmulVV | VecAluOpRRR::VfmulVF => 0b100100,
             VecAluOpRRR::VandVV => 0b001001,
             VecAluOpRRR::VorVV => 0b001010,
             VecAluOpRRR::VxorVV => 0b001011,
             VecAluOpRRR::VslidedownVX => 0b001111,
+            VecAluOpRRR::VfrsubVF => 0b100111,
+            VecAluOpRRR::VfdivVV | VecAluOpRRR::VfdivVF => 0b100000,
+            VecAluOpRRR::VfrdivVF => 0b100001,
+            VecAluOpRRR::VfsgnjnVV => 0b001001,
         }
     }
 
@@ -278,6 +288,17 @@ impl VecAluOpRRR {
             | VecAluOpRRR::VsubVX
             | VecAluOpRRR::VrsubVX
             | VecAluOpRRR::VslidedownVX => VecOpCategory::OPIVX,
+            VecAluOpRRR::VfaddVV
+            | VecAluOpRRR::VfsubVV
+            | VecAluOpRRR::VfmulVV
+            | VecAluOpRRR::VfdivVV
+            | VecAluOpRRR::VfsgnjnVV => VecOpCategory::OPFVV,
+            VecAluOpRRR::VfaddVF
+            | VecAluOpRRR::VfsubVF
+            | VecAluOpRRR::VfrsubVF
+            | VecAluOpRRR::VfmulVF
+            | VecAluOpRRR::VfdivVF
+            | VecAluOpRRR::VfrdivVF => VecOpCategory::OPFVF,
         }
     }
 
@@ -360,6 +381,7 @@ impl VecAluOpRR {
             VecAluOpRR::VmvSX | VecAluOpRR::VmvXS | VecAluOpRR::VfmvSF | VecAluOpRR::VfmvFS => {
                 0b010000
             }
+            VecAluOpRR::VfsqrtV => 0b010011,
             VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0b010111,
         }
     }
@@ -369,7 +391,7 @@ impl VecAluOpRR {
             VecAluOpRR::VmvSX => VecOpCategory::OPMVX,
             VecAluOpRR::VmvXS => VecOpCategory::OPMVV,
             VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => VecOpCategory::OPFVF,
-            VecAluOpRR::VfmvFS => VecOpCategory::OPFVV,
+            VecAluOpRR::VfmvFS | VecAluOpRR::VfsqrtV => VecOpCategory::OPFVV,
             VecAluOpRR::VmvVV => VecOpCategory::OPIVV,
             VecAluOpRR::VmvVX => VecOpCategory::OPIVX,
         }
@@ -386,6 +408,8 @@ impl VecAluOpRR {
             VecAluOpRR::VfmvSF => 0b00000,
             // VWFUNARY0
             VecAluOpRR::VfmvFS => 0b00000,
+            // VFUNARY1
+            VecAluOpRR::VfsqrtV => 0b00000,
             // These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states:
             // > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved.
             VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0,
@@ -397,7 +421,11 @@ impl VecAluOpRR {
     /// other way around. As far as I can tell only vmv.v.* are backwards.
     pub fn vs_is_vs2_encoded(&self) -> bool {
         match self {
-            VecAluOpRR::VmvSX | VecAluOpRR::VmvXS | VecAluOpRR::VfmvSF | VecAluOpRR::VfmvFS => true,
+            VecAluOpRR::VmvSX
+            | VecAluOpRR::VmvXS
+            | VecAluOpRR::VfmvSF
+            | VecAluOpRR::VfmvFS
+            | VecAluOpRR::VfsqrtV => true,
             VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => false,
         }
     }
@@ -408,7 +436,8 @@ impl VecAluOpRR {
             | VecAluOpRR::VmvSX
             | VecAluOpRR::VmvVV
             | VecAluOpRR::VmvVX
-            | VecAluOpRR::VfmvVF => RegClass::Vector,
+            | VecAluOpRR::VfmvVF
+            | VecAluOpRR::VfsqrtV => RegClass::Vector,
             VecAluOpRR::VmvXS => RegClass::Int,
             VecAluOpRR::VfmvFS => RegClass::Float,
         }
@@ -416,7 +445,9 @@ impl VecAluOpRR {
 
     pub fn src_regclass(&self) -> RegClass {
         match self {
-            VecAluOpRR::VmvXS | VecAluOpRR::VfmvFS | VecAluOpRR::VmvVV => RegClass::Vector,
+            VecAluOpRR::VmvXS | VecAluOpRR::VfmvFS | VecAluOpRR::VmvVV | VecAluOpRR::VfsqrtV => {
+                RegClass::Vector
+            }
             VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => RegClass::Float,
             VecAluOpRR::VmvSX | VecAluOpRR::VmvVX => RegClass::Int,
         }
@@ -430,6 +461,7 @@ impl fmt::Display for VecAluOpRR {
             VecAluOpRR::VmvXS => "vmv.x.s",
             VecAluOpRR::VfmvSF => "vfmv.s.f",
             VecAluOpRR::VfmvFS => "vfmv.f.s",
+            VecAluOpRR::VfsqrtV => "vfsqrt.v",
             VecAluOpRR::VmvVV => "vmv.v.v",
             VecAluOpRR::VmvVX => "vmv.v.x",
             VecAluOpRR::VfmvVF => "vfmv.v.f",
diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle
index bfbe10e958cd..dcd8c2bdeb86 100644
--- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle
+++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle
@@ -92,12 +92,23 @@
   (VandVV)
   (VorVV)
   (VxorVV)
+  (VfaddVV)
+  (VfsubVV)
+  (VfmulVV)
+  (VfdivVV)
+  (VfsgnjnVV)
 
   ;; Vector-Scalar Opcodes
   (VaddVX)
   (VsubVX)
   (VrsubVX)
   (VslidedownVX)
+  (VfaddVF)
+  (VfsubVF)
+  (VfrsubVF)
+  (VfmulVF)
+  (VfdivVF)
+  (VfrdivVF)
 ))
 
 ;; Register-Imm ALU Ops
@@ -125,6 +136,7 @@
   (VmvVV)
   (VmvVX)
   (VfmvVF)
+  (VfsqrtV)
 ))
 
 ;; Returns the canonical destination type for a VecAluOpRRImm5.
@@ -307,6 +319,73 @@
 (rule (rv_vxor_vv vs2 vs1 vstate)
   (vec_alu_rrr (VecAluOpRRR.VxorVV) vs2 vs1 vstate))
 
+;; Helper for emitting the `vfadd.vv` instruction.
+(decl rv_vfadd_vv (Reg Reg VState) Reg)
+(rule (rv_vfadd_vv vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfaddVV) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfadd.vf` instruction.
+(decl rv_vfadd_vf (Reg Reg VState) Reg)
+(rule (rv_vfadd_vf vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfaddVF) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfsub.vv` instruction.
+(decl rv_vfsub_vv (Reg Reg VState) Reg)
+(rule (rv_vfsub_vv vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsubVV) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfsub.vf` instruction.
+(decl rv_vfsub_vf (Reg Reg VState) Reg)
+(rule (rv_vfsub_vf vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsubVF) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfrsub.vf` instruction.
+(decl rv_vfrsub_vf (Reg Reg VState) Reg)
+(rule (rv_vfrsub_vf vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfrsubVF) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfmul.vv` instruction.
+(decl rv_vfmul_vv (Reg Reg VState) Reg)
+(rule (rv_vfmul_vv vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfmulVV) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfmul.vf` instruction.
+(decl rv_vfmul_vf (Reg Reg VState) Reg)
+(rule (rv_vfmul_vf vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfmulVF) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfdiv.vv` instruction.
+(decl rv_vfdiv_vv (Reg Reg VState) Reg)
+(rule (rv_vfdiv_vv vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfdivVV) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfdiv.vf` instruction.
+(decl rv_vfdiv_vf (Reg Reg VState) Reg)
+(rule (rv_vfdiv_vf vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfdivVF) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfrdiv.vf` instruction.
+(decl rv_vfrdiv_vf (Reg Reg VState) Reg)
+(rule (rv_vfrdiv_vf vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfrdivVF) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfsgnjn.vv` ("Floating Point Sign Injection Negated") instruction.
+;; The output of this instruction is `vs2` with the negated sign bit from `vs1`
+(decl rv_vfsgnjn_vv (Reg Reg VState) Reg)
+(rule (rv_vfsgnjn_vv vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsgnjnVV) vs2 vs1 vstate))
+
+;; Helper for emitting the `vfneg.v` instruction.
+;; This instruction is a mnemonic for `vfsgnjn.vv vd, vs, vs`
+(decl rv_vfneg_v (Reg VState) Reg)
+(rule (rv_vfneg_v vs vstate) (rv_vfsgnjn_vv vs vs vstate))
+
+;; Helper for emitting the `vfsqrt.v` instruction.
+;; This instruction splats the F regsiter into all elements of the destination vector.
+(decl rv_vfsqrt_v (Reg VState) Reg)
+(rule (rv_vfsqrt_v vs vstate)
+  (vec_alu_rr (VecAluOpRR.VfsqrtV) vs vstate))
+
 ;; Helper for emitting the `vslidedown.vx` instruction.
 ;; `vslidedown` moves all elements in the vector down by n elements.
 ;; The top most elements are up to the tail policy.
diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle
index 7dc69850ebd0..d28e30344d60 100644
--- a/cranelift/codegen/src/isa/riscv64/lower.isle
+++ b/cranelift/codegen/src/isa/riscv64/lower.isle
@@ -584,9 +584,12 @@
   (rv_fabs ty x))
 
 ;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type ty (fneg x)))
+(rule 0 (lower (has_type (ty_scalar_float ty) (fneg x)))
   (rv_fneg ty x))
 
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fneg x)))
+  (rv_vfneg_v x ty))
+
 ;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 (rule (lower (has_type ty (fcopysign x y)))
   (rv_fsgnj ty x y))
@@ -597,9 +600,11 @@
 
 
 ;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule (lower (has_type ty (sqrt x)))
+(rule 0 (lower (has_type (ty_scalar_float ty) (sqrt x)))
   (rv_fsqrt ty x))
 
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (sqrt x)))
+  (rv_vfsqrt_v x ty))
 
 ;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 (rule -1
@@ -706,18 +711,65 @@
 
 
 ;;;;;  Rules for for float arithmetic
-(rule (lower (has_type ty (fadd x y)))
+
+
+;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_scalar_float ty) (fadd x y)))
   (rv_fadd ty x y))
 
-(rule (lower (has_type ty (fsub x y)))
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fadd x y)))
+  (rv_vfadd_vv x y ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fadd x (splat y))))
+  (rv_vfadd_vf x y ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fadd (splat x) y)))
+  (rv_vfadd_vf y x ty))
+
+
+;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_scalar_float ty) (fsub x y)))
   (rv_fsub ty x y))
 
-(rule (lower (has_type ty (fmul x y)))
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fsub x y)))
+  (rv_vfsub_vv x y ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fsub x (splat y))))
+  (rv_vfsub_vf x y ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fsub (splat x) y)))
+  (rv_vfrsub_vf y x ty))
+
+;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_scalar_float ty) (fmul x y)))
   (rv_fmul ty x y))
 
-(rule (lower (has_type ty (fdiv x y)))
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmul x y)))
+  (rv_vfmul_vv x y ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fmul x (splat y))))
+  (rv_vfmul_vf x y ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fmul (splat x) y)))
+  (rv_vfmul_vf y x ty))
+
+
+;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_scalar_float ty) (fdiv x y)))
   (rv_fdiv ty x y))
 
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fdiv x y)))
+  (rv_vfdiv_vv x y ty))
+
+(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fdiv x (splat y))))
+  (rv_vfdiv_vf x y ty))
+
+(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fdiv (splat x) y)))
+  (rv_vfrdiv_vf y x ty))
+
+;;;; Rules for `fmin/fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
 (rule
   (lower (has_type ty (fmin x y)))
   (gen_float_select (FloatSelectOP.Min) x y ty))
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fadd.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fadd.clif
new file mode 100644
index 000000000000..a4535c6e8715
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fadd.clif
@@ -0,0 +1,249 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+
+function %fadd_f32x4(f32x4, f32x4) -> f32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fadd v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfadd.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x93, 0x11, 0x02
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x03, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fadd_splat_f32x4(f32x4, f32) -> f32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fadd v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfadd.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x02
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fadd_splat_reverse_f32x4(f32x4, f32) -> f32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fadd v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfadd.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x02
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fadd_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fadd v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfadd.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x93, 0x11, 0x02
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x03, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fadd_splat_f64x2(f64x2, f64) -> f64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fadd v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfadd.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x02
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fadd_splat_reverse_f64x2(f64x2, f64) -> f64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fadd v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfadd.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x02
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fdiv.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fdiv.clif
new file mode 100644
index 000000000000..a7bb956cb390
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fdiv.clif
@@ -0,0 +1,249 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+
+function %fdiv_f32x4(f32x4, f32x4) -> f32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fdiv v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfdiv.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x93, 0x11, 0x82
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x03, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fdiv_splat_f32x4(f32x4, f32) -> f32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fdiv v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfdiv.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x82
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fdiv_splat_reverse_f32x4(f32x4, f32) -> f32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fdiv v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfrdiv.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x86
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fdiv_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fdiv v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfdiv.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x93, 0x11, 0x82
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x03, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fdiv_splat_f64x2(f64x2, f64) -> f64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fdiv v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfdiv.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x82
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fdiv_splat_reverse_f64x2(f64x2, f64) -> f64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fdiv v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfrdiv.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x86
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fmul.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fmul.clif
new file mode 100644
index 000000000000..de5aef4d7bc2
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fmul.clif
@@ -0,0 +1,249 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+
+function %fmul_f32x4(f32x4, f32x4) -> f32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fmul v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfmul.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x93, 0x11, 0x92
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x03, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fmul_splat_f32x4(f32x4, f32) -> f32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fmul v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfmul.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x92
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fmul_splat_reverse_f32x4(f32x4, f32) -> f32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fmul v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfmul.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x92
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fmul_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fmul v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfmul.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x93, 0x11, 0x92
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x03, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fmul_splat_f64x2(f64x2, f64) -> f64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fmul v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfmul.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x92
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fmul_splat_reverse_f64x2(f64x2, f64) -> f64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fmul v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfmul.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x92
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fneg.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fneg.clif
new file mode 100644
index 000000000000..21c66e0a6e07
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fneg.clif
@@ -0,0 +1,83 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+
+function %fneg_f32x4(f32x4) -> f32x4 {
+block0(v0: f32x4):
+    v1 = fneg v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfneg.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x92, 0x10, 0x26
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fneg_f64x2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+    v1 = fneg v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfneg.v v4,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x92, 0x10, 0x26
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fsub.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fsub.clif
new file mode 100644
index 000000000000..a9e57567ae7d
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fsub.clif
@@ -0,0 +1,249 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+
+function %fsub_f32x4(f32x4, f32x4) -> f32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fsub v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfsub.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x93, 0x11, 0x0a
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x03, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fsub_splat_f32x4(f32x4, f32) -> f32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fsub v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfsub.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x0a
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fsub_splat_reverse_f32x4(f32x4, f32) -> f32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fsub v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfrsub.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x9e
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fsub_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fsub v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfsub.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x93, 0x11, 0x0a
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x03, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fsub_splat_f64x2(f64x2, f64) -> f64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fsub v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfsub.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x0a
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %fsub_splat_reverse_f64x2(f64x2, f64) -> f64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fsub v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfrsub.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x9e
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-sqrt.clif b/cranelift/filetests/filetests/isa/riscv64/simd-sqrt.clif
new file mode 100644
index 000000000000..a0b3b698516b
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-sqrt.clif
@@ -0,0 +1,83 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+
+function %sqrt_f32x4(f32x4) -> f32x4 {
+block0(v0: f32x4):
+    v1 = sqrt v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfsqrt.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x12, 0x10, 0x4e
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %sqrt_f64x2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+    v1 = sqrt v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfsqrt.v v4,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x12, 0x10, 0x4e
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x02, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/runtests/simd-fadd-splat.clif b/cranelift/filetests/filetests/runtests/simd-fadd-splat.clif
index 4e74ba91b850..da8c526ca1df 100644
--- a/cranelift/filetests/filetests/runtests/simd-fadd-splat.clif
+++ b/cranelift/filetests/filetests/runtests/simd-fadd-splat.clif
@@ -7,6 +7,7 @@ target x86_64
 target x86_64 sse41
 target x86_64 sse42
 target x86_64 sse42 has_avx
+target riscv64 has_v
 
 function %splat_f32x4_2(f32x4) -> f32x4 {
 block0(v0: f32x4):
diff --git a/cranelift/filetests/filetests/runtests/simd-fadd.clif b/cranelift/filetests/filetests/runtests/simd-fadd.clif
new file mode 100644
index 000000000000..60764d4a87c2
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fadd.clif
@@ -0,0 +1,28 @@
+test run
+target aarch64
+target s390x
+target x86_64 ssse3 has_sse41=false
+set enable_simd
+target x86_64
+target x86_64 sse41
+target x86_64 sse42
+target x86_64 sse42 has_avx
+target riscv64 has_v
+
+
+function %fadd_f32x4(f32x4, f32x4) -> f32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fadd v0, v1
+    return v2
+}
+; run: %fadd_f32x4([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x1.0 0x2.9 0x1.400000p1 0x1.800000p0]) == [0x1.5 0x1.fp1 0x1.10ap10 0x1.666666p1]
+; run: %fadd_f32x4([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [0x0.0 0x0.0 +Inf -Inf]
+
+
+function %fadd_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fadd v0, v1
+    return v2
+}
+; run: %fadd_f64x2([0x0.5 0x1.5], [0x1.0 0x2.9]) == [0x1.5 0x1.fp1]
+; run: %fadd_f64x2([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [0x0.0 0x0.0]
diff --git a/cranelift/filetests/filetests/runtests/simd-fdiv.clif b/cranelift/filetests/filetests/runtests/simd-fdiv.clif
new file mode 100644
index 000000000000..3a6381e47275
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fdiv.clif
@@ -0,0 +1,26 @@
+test run
+target aarch64
+target s390x
+target x86_64 ssse3 has_sse41=false
+set enable_simd
+target x86_64
+target x86_64 sse41
+target x86_64 sse42
+target x86_64 sse42 has_avx
+target riscv64 has_v
+
+
+function %fdiv_f32x4(f32x4, f32x4) -> f32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fdiv v0, v1
+    return v2
+}
+; run: %fdiv_f32x4([0x1.5 0x1.5 0x1.5 0x1.5], [0x2.9 0x2.9 0x2.9 0x2.9]) == [0x1.063e70p-1 0x1.063e70p-1 0x1.063e70p-1 0x1.063e70p-1]
+
+
+function %fdiv_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fdiv v0, v1
+    return v2
+}
+; run: %fdiv_f64x2([0x1.5 0x1.5], [0x2.9 0x2.9]) == [0x1.063e7063e7064p-1 0x1.063e7063e7064p-1]
diff --git a/cranelift/filetests/filetests/runtests/simd-fmul.clif b/cranelift/filetests/filetests/runtests/simd-fmul.clif
new file mode 100644
index 000000000000..4f0d5eb68d98
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fmul.clif
@@ -0,0 +1,26 @@
+test run
+target aarch64
+target s390x
+target x86_64 ssse3 has_sse41=false
+set enable_simd
+target x86_64
+target x86_64 sse41
+target x86_64 sse42
+target x86_64 sse42 has_avx
+target riscv64 has_v
+
+
+function %fmul_f32x4(f32x4, f32x4) -> f32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fmul v0, v1
+    return v2
+}
+; run: %fmul_f32x4([0x1.5 0x1.5 0x1.5 0x1.5], [0x2.9 0x2.9 0x2.9 0x2.9]) == [0x1.ae8p1 0x1.ae8p1 0x1.ae8p1 0x1.ae8p1]
+
+
+function %fmul_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fmul v0, v1
+    return v2
+}
+; run: %fmul_f64x2([0x1.5 0x1.5], [0x2.9 0x2.9]) == [0x1.ae8p1 0x1.ae8p1]
diff --git a/cranelift/filetests/filetests/runtests/simd-fneg.clif b/cranelift/filetests/filetests/runtests/simd-fneg.clif
new file mode 100644
index 000000000000..643c4f9c3ea7
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fneg.clif
@@ -0,0 +1,24 @@
+test run
+target aarch64
+target s390x
+target x86_64 ssse3 has_sse41=false
+set enable_simd
+target x86_64
+target x86_64 sse41
+target x86_64 sse42
+target x86_64 sse42 has_avx
+target riscv64 has_v
+
+function %fneg_f32x4(f32x4) -> f32x4 {
+block0(v0: f32x4):
+    v1 = fneg v0
+    return v1
+}
+; run: %fneg_f32x4([0x9.0 0x9.0 0x9.0 0x9.0]) == [-0x9.0 -0x9.0 -0x9.0 -0x9.0]
+
+function %fneg_f64x2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+    v1 = fneg v0
+    return v1
+}
+; run: %fneg_f64x2([0x9.0 0x9.0]) == [-0x9.0 -0x9.0]
\ No newline at end of file
diff --git a/cranelift/filetests/filetests/runtests/simd-fsub.clif b/cranelift/filetests/filetests/runtests/simd-fsub.clif
new file mode 100644
index 000000000000..b04affda5668
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fsub.clif
@@ -0,0 +1,26 @@
+test run
+target aarch64
+target s390x
+target x86_64 ssse3 has_sse41=false
+set enable_simd
+target x86_64
+target x86_64 sse41
+target x86_64 sse42
+target x86_64 sse42 has_avx
+target riscv64 has_v
+
+
+function %fsub_f32x4(f32x4, f32x4) -> f32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fsub v0, v1
+    return v2
+}
+; run: %fsub_f32x4([0x0.5 0x0.5 0x0.5 0x0.5], [0x1.0 0x1.0 0x1.0 0x1.0]) == [-0x1.6p-1 -0x1.6p-1 -0x1.6p-1 -0x1.6p-1]
+
+
+function %fsub_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fsub v0, v1
+    return v2
+}
+; run: %fsub_f64x2([0x0.5 0x0.5], [0x1.0 0x1.0]) == [-0x1.6p-1 -0x1.6p-1]
diff --git a/cranelift/filetests/filetests/runtests/simd-sqrt.clif b/cranelift/filetests/filetests/runtests/simd-sqrt.clif
new file mode 100644
index 000000000000..10152fbb7117
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-sqrt.clif
@@ -0,0 +1,26 @@
+test run
+target aarch64
+target s390x
+target x86_64 ssse3 has_sse41=false
+set enable_simd
+target x86_64
+target x86_64 sse41
+target x86_64 sse42
+target x86_64 sse42 has_avx
+target riscv64 has_v
+
+
+function %sqrt_f32x4(f32x4) -> f32x4 {
+block0(v0: f32x4):
+    v1 = sqrt v0
+    return v1
+}
+; run: %sqrt_f32x4([0x9.0 0x9.0 0x9.0 0x9.0]) == [0x3.0 0x3.0 0x3.0 0x3.0]
+
+function %sqrt_f64x2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+    v1 = sqrt v0
+    return v1
+}
+; run: %sqrt_f64x2([0x9.0 0x9.0]) == [0x3.0 0x3.0]
+