diff --git a/build.rs b/build.rs
index b0d64c190a0c..5825af1be0e3 100644
--- a/build.rs
+++ b/build.rs
@@ -253,14 +253,8 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
                 "simd_load64_lane",
                 "simd_load8_lane",
                 "simd_load_extend",
-                "simd_load_splat",
                 "simd_load_zero",
                 "simd_splat",
-                "simd_store16_lane",
-                "simd_store32_lane",
-                "simd_store64_lane",
-                "simd_store8_lane",
-                "spillslot_size_fuzzbug",
                 "v128_select",
             ]
             .contains(&testname);
diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle
index 2a6ef131ea68..9a4f99b8e36c 100644
--- a/cranelift/codegen/src/isa/riscv64/inst.isle
+++ b/cranelift/codegen/src/isa/riscv64/inst.isle
@@ -337,6 +337,18 @@
       (imm Imm5)
       (vstate VState))
 
+    (VecAluRR
+      (op VecAluOpRR)
+      (vd WritableReg)
+      (vs Reg)
+      (vstate VState))
+
+    (VecAluRImm5
+      (op VecAluOpRImm5)
+      (vd WritableReg)
+      (imm Imm5)
+      (vstate VState))
+
     (VecSetState
       (rd WritableReg)
       (vstate VState))
@@ -1353,6 +1365,14 @@
 (extractor (replicated_imm5 n)
   (def_inst (splat (iconst (u64_from_imm64 (imm5_from_u64 n))))))
 
+;; UImm5 Helpers
+
+;; Extract a `UImm5` from an `u8`.
+(decl pure partial uimm5_from_u8 (UImm5) u8)
+(extern extractor uimm5_from_u8 uimm5_from_u8)
+
+(decl uimm5_bitcast_to_imm5 (UImm5) Imm5)
+(extern constructor uimm5_bitcast_to_imm5 uimm5_bitcast_to_imm5)
 
 ;; Float Helpers
 
diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs
index df7567918bf4..d4c03c1ea053 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs
@@ -467,7 +467,9 @@ impl Inst {
             // VecSetState does not expect any vstate, rather it updates it.
             Inst::VecSetState { .. } => None,
 
+            Inst::VecAluRR { vstate, .. } |
             Inst::VecAluRRR { vstate, .. } |
+            Inst::VecAluRImm5 { vstate, .. } |
             Inst::VecAluRRImm5 { vstate, .. } |
             // TODO: Unit-stride loads and stores only need the AVL to be correct, not
             // the full vtype. A future optimization could be to decouple these two when
@@ -2818,6 +2820,17 @@ impl MachInstEmit for Inst {
 
                 sink.put4(encode_valu_imm(op, vd, imm, vs2, VecOpMasking::Disabled));
             }
+            &Inst::VecAluRR { op, vd, vs, .. } => {
+                let vs = allocs.next(vs);
+                let vd = allocs.next_writable(vd);
+
+                sink.put4(encode_valu_rr(op, vd, vs, VecOpMasking::Disabled));
+            }
+            &Inst::VecAluRImm5 { op, vd, imm, .. } => {
+                let vd = allocs.next_writable(vd);
+
+                sink.put4(encode_valu_r_imm(op, vd, imm, VecOpMasking::Disabled));
+            }
             &Inst::VecSetState { rd, ref vstate } => {
                 let rd = allocs.next_writable(rd);
 
diff --git a/cranelift/codegen/src/isa/riscv64/inst/encode.rs b/cranelift/codegen/src/isa/riscv64/inst/encode.rs
index 2a479867578c..e52d05aa48a1 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/encode.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/encode.rs
@@ -9,7 +9,8 @@
 use super::{Imm12, Imm5, UImm5, VType};
 use crate::isa::riscv64::inst::reg_to_gpr_num;
 use crate::isa::riscv64::lower::isle::generated_code::{
-    VecAluOpRRImm5, VecAluOpRRR, VecElementWidth, VecOpCategory, VecOpMasking,
+    VecAluOpRImm5, VecAluOpRR, VecAluOpRRImm5, VecAluOpRRR, VecElementWidth, VecOpCategory,
+    VecOpMasking,
 };
 use crate::machinst::isle::WritableReg;
 use crate::Reg;
@@ -145,6 +146,48 @@ pub fn encode_valu_imm(
     )
 }
 
+pub fn encode_valu_rr(op: VecAluOpRR, vd: WritableReg, vs: Reg, masking: VecOpMasking) -> u32 {
+    let funct7 = (op.funct6() << 1) | masking.encode();
+
+    let (vs1, vs2) = if op.vs_is_vs2_encoded() {
+        (op.aux_encoding(), reg_to_gpr_num(vs))
+    } else {
+        (reg_to_gpr_num(vs), op.aux_encoding())
+    };
+
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(vd.to_reg()),
+        op.funct3(),
+        vs1,
+        vs2,
+        funct7,
+    )
+}
+
+pub fn encode_valu_r_imm(
+    op: VecAluOpRImm5,
+    vd: WritableReg,
+    imm: Imm5,
+    masking: VecOpMasking,
+) -> u32 {
+    let funct7 = (op.funct6() << 1) | masking.encode();
+
+    // This is true for this opcode, not sure if there are any other ones.
+    debug_assert_eq!(op, VecAluOpRImm5::VmvVI);
+    let vs1 = imm.bits() as u32;
+    let vs2 = op.aux_encoding();
+
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(vd.to_reg()),
+        op.funct3(),
+        vs1,
+        vs2,
+        funct7,
+    )
+}
+
 /// Encodes a Vector CFG Imm instruction.
 ///
 /// See: https://github.com/riscv/riscv-v-spec/blob/master/vcfg-format.adoc
diff --git a/cranelift/codegen/src/isa/riscv64/inst/imms.rs b/cranelift/codegen/src/isa/riscv64/inst/imms.rs
index 7ef4f52451ea..2f9b544b15ed 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/imms.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/imms.rs
@@ -143,6 +143,12 @@ impl Imm5 {
         }
     }
 
+    pub fn from_bits(value: u8) -> Imm5 {
+        assert_eq!(value & 0x1f, value);
+        let signed = ((value << 3) as i8) >> 3;
+        Imm5 { value: signed }
+    }
+
     /// Bits for encoding.
     pub fn bits(&self) -> u8 {
         self.value as u8 & 0x1f
diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
index d654e77765a6..4a13ca4c62cb 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
@@ -642,6 +642,18 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
             collector.reg_use(vs2);
             collector.reg_def(vd);
         }
+        &Inst::VecAluRR { op, vd, vs, .. } => {
+            debug_assert_eq!(vd.to_reg().class(), op.dst_regclass());
+            debug_assert_eq!(vs.class(), op.src_regclass());
+
+            collector.reg_use(vs);
+            collector.reg_def(vd);
+        }
+        &Inst::VecAluRImm5 { vd, .. } => {
+            debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
+
+            collector.reg_def(vd);
+        }
         &Inst::VecSetState { rd, .. } => {
             collector.reg_def(rd);
         }
@@ -1585,7 +1597,36 @@ impl Inst {
                 let vs2_s = format_reg(vs2, allocs);
                 let vd_s = format_reg(vd.to_reg(), allocs);
 
-                format!("{} {},{},{} {}", op, vd_s, vs2_s, imm, vstate)
+                // Some opcodes interpret the immediate as unsigned, lets show the
+                // correct number here.
+                let imm_s = if op.imm_is_unsigned() {
+                    format!("{}", imm.bits())
+                } else {
+                    format!("{}", imm)
+                };
+
+                format!("{} {},{},{} {}", op, vd_s, vs2_s, imm_s, vstate)
+            }
+            &Inst::VecAluRR {
+                op,
+                vd,
+                vs,
+                ref vstate,
+            } => {
+                let vs_s = format_reg(vs, allocs);
+                let vd_s = format_reg(vd.to_reg(), allocs);
+
+                format!("{} {},{} {}", op, vd_s, vs_s, vstate)
+            }
+            &Inst::VecAluRImm5 {
+                op,
+                vd,
+                imm,
+                ref vstate,
+            } => {
+                let vd_s = format_reg(vd.to_reg(), allocs);
+
+                format!("{} {},{} {}", op, vd_s, imm, vstate)
             }
             &Inst::VecSetState { rd, ref vstate } => {
                 let rd_s = format_reg(rd.to_reg(), allocs);
diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs
index 603da8690ea4..a45cf39b3369 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs
@@ -1,8 +1,8 @@
 use crate::isa::riscv64::inst::AllocationConsumer;
 use crate::isa::riscv64::inst::EmitState;
 use crate::isa::riscv64::lower::isle::generated_code::{
-    VecAMode, VecAluOpRRImm5, VecAluOpRRR, VecAvl, VecElementWidth, VecLmul, VecMaskMode,
-    VecOpCategory, VecOpMasking, VecTailMode,
+    VecAMode, VecAluOpRImm5, VecAluOpRR, VecAluOpRRImm5, VecAluOpRRR, VecAvl, VecElementWidth,
+    VecLmul, VecMaskMode, VecOpCategory, VecOpMasking, VecTailMode,
 };
 use crate::machinst::RegClass;
 use crate::Reg;
@@ -260,6 +260,7 @@ impl VecAluOpRRR {
             VecAluOpRRR::VandVV => 0b001001,
             VecAluOpRRR::VorVV => 0b001010,
             VecAluOpRRR::VxorVV => 0b001011,
+            VecAluOpRRR::VslidedownVX => 0b001111,
         }
     }
 
@@ -273,9 +274,10 @@ impl VecAluOpRRR {
             VecAluOpRRR::VmulVV | VecAluOpRRR::VmulhVV | VecAluOpRRR::VmulhuVV => {
                 VecOpCategory::OPMVV
             }
-            VecAluOpRRR::VaddVX | VecAluOpRRR::VsubVX | VecAluOpRRR::VrsubVX => {
-                VecOpCategory::OPIVX
-            }
+            VecAluOpRRR::VaddVX
+            | VecAluOpRRR::VsubVX
+            | VecAluOpRRR::VrsubVX
+            | VecAluOpRRR::VslidedownVX => VecOpCategory::OPIVX,
         }
     }
 
@@ -305,13 +307,30 @@ impl VecAluOpRRImm5 {
         0x57
     }
     pub fn funct3(&self) -> u32 {
-        VecOpCategory::OPIVI.encode()
+        self.category().encode()
     }
+
     pub fn funct6(&self) -> u32 {
         // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
         match self {
             VecAluOpRRImm5::VaddVI => 0b000000,
             VecAluOpRRImm5::VrsubVI => 0b000011,
+            VecAluOpRRImm5::VslidedownVI => 0b001111,
+        }
+    }
+
+    pub fn category(&self) -> VecOpCategory {
+        match self {
+            VecAluOpRRImm5::VaddVI | VecAluOpRRImm5::VrsubVI | VecAluOpRRImm5::VslidedownVI => {
+                VecOpCategory::OPIVI
+            }
+        }
+    }
+
+    pub fn imm_is_unsigned(&self) -> bool {
+        match self {
+            VecAluOpRRImm5::VslidedownVI => true,
+            VecAluOpRRImm5::VaddVI | VecAluOpRRImm5::VrsubVI => false,
         }
     }
 }
@@ -325,6 +344,139 @@ impl fmt::Display for VecAluOpRRImm5 {
     }
 }
 
+impl VecAluOpRR {
+    pub fn opcode(&self) -> u32 {
+        // Vector Opcode
+        0x57
+    }
+
+    pub fn funct3(&self) -> u32 {
+        self.category().encode()
+    }
+
+    pub fn funct6(&self) -> u32 {
+        // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
+        match self {
+            VecAluOpRR::VmvSX | VecAluOpRR::VmvXS | VecAluOpRR::VfmvSF | VecAluOpRR::VfmvFS => {
+                0b010000
+            }
+            VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0b010111,
+        }
+    }
+
+    pub fn category(&self) -> VecOpCategory {
+        match self {
+            VecAluOpRR::VmvSX => VecOpCategory::OPMVX,
+            VecAluOpRR::VmvXS => VecOpCategory::OPMVV,
+            VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => VecOpCategory::OPFVF,
+            VecAluOpRR::VfmvFS => VecOpCategory::OPFVV,
+            VecAluOpRR::VmvVV => VecOpCategory::OPIVV,
+            VecAluOpRR::VmvVX => VecOpCategory::OPIVX,
+        }
+    }
+
+    /// Returns the auxiliary encoding field for the instruction, if any.
+    pub fn aux_encoding(&self) -> u32 {
+        match self {
+            // VRXUNARY0
+            VecAluOpRR::VmvSX => 0b00000,
+            // VWXUNARY0
+            VecAluOpRR::VmvXS => 0b00000,
+            // VRFUNARY0
+            VecAluOpRR::VfmvSF => 0b00000,
+            // VWFUNARY0
+            VecAluOpRR::VfmvFS => 0b00000,
+            // These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states:
+            // > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved.
+            VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0,
+        }
+    }
+
+    /// Most of these opcodes have the source register encoded in the VS2 field and
+    /// the `aux_encoding` field in VS1. However some special snowflakes have it the
+    /// other way around. As far as I can tell only vmv.v.* are backwards.
+    pub fn vs_is_vs2_encoded(&self) -> bool {
+        match self {
+            VecAluOpRR::VmvSX | VecAluOpRR::VmvXS | VecAluOpRR::VfmvSF | VecAluOpRR::VfmvFS => true,
+            VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => false,
+        }
+    }
+
+    pub fn dst_regclass(&self) -> RegClass {
+        match self {
+            VecAluOpRR::VfmvSF
+            | VecAluOpRR::VmvSX
+            | VecAluOpRR::VmvVV
+            | VecAluOpRR::VmvVX
+            | VecAluOpRR::VfmvVF => RegClass::Vector,
+            VecAluOpRR::VmvXS => RegClass::Int,
+            VecAluOpRR::VfmvFS => RegClass::Float,
+        }
+    }
+
+    pub fn src_regclass(&self) -> RegClass {
+        match self {
+            VecAluOpRR::VmvXS | VecAluOpRR::VfmvFS | VecAluOpRR::VmvVV => RegClass::Vector,
+            VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => RegClass::Float,
+            VecAluOpRR::VmvSX | VecAluOpRR::VmvVX => RegClass::Int,
+        }
+    }
+}
+
+impl fmt::Display for VecAluOpRR {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(match self {
+            VecAluOpRR::VmvSX => "vmv.s.x",
+            VecAluOpRR::VmvXS => "vmv.x.s",
+            VecAluOpRR::VfmvSF => "vfmv.s.f",
+            VecAluOpRR::VfmvFS => "vfmv.f.s",
+            VecAluOpRR::VmvVV => "vmv.v.v",
+            VecAluOpRR::VmvVX => "vmv.v.x",
+            VecAluOpRR::VfmvVF => "vfmv.v.f",
+        })
+    }
+}
+
+impl VecAluOpRImm5 {
+    pub fn opcode(&self) -> u32 {
+        // Vector Opcode
+        0x57
+    }
+    pub fn funct3(&self) -> u32 {
+        self.category().encode()
+    }
+
+    pub fn funct6(&self) -> u32 {
+        // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
+        match self {
+            VecAluOpRImm5::VmvVI => 0b010111,
+        }
+    }
+
+    pub fn category(&self) -> VecOpCategory {
+        match self {
+            VecAluOpRImm5::VmvVI => VecOpCategory::OPIVI,
+        }
+    }
+
+    /// Returns the auxiliary encoding field for the instruction, if any.
+    pub fn aux_encoding(&self) -> u32 {
+        match self {
+            // These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states:
+            // > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved.
+            VecAluOpRImm5::VmvVI => 0,
+        }
+    }
+}
+
+impl fmt::Display for VecAluOpRImm5 {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(match self {
+            VecAluOpRImm5::VmvVI => "vmv.v.i",
+        })
+    }
+}
+
 impl VecAMode {
     pub fn get_base_register(&self) -> Option<Reg> {
         match self {
diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle
index ee02f7b7c503..bfbe10e958cd 100644
--- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle
+++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle
@@ -97,14 +97,39 @@
   (VaddVX)
   (VsubVX)
   (VrsubVX)
+  (VslidedownVX)
 ))
 
 ;; Register-Imm ALU Ops
 (type VecAluOpRRImm5 (enum
+  ;; Regular VI Opcodes
   (VaddVI)
   (VrsubVI)
+  (VslidedownVI)
 ))
 
+;; Imm only ALU Ops
+(type VecAluOpRImm5 (enum
+  (VmvVI)
+))
+
+;; These are all of the special cases that have weird encodings. They are all
+;; single source, single destination instructions, and usually use one of
+;; the two source registers as auxiliary encoding space.
+(type VecAluOpRR (enum
+  (VmvSX)
+  (VmvXS)
+  (VfmvSF)
+  (VfmvFS)
+  ;; vmv.v* is special in that vs2 must be v0 (and is ignored) otherwise the instruction is illegal.
+  (VmvVV)
+  (VmvVX)
+  (VfmvVF)
+))
+
+;; Returns the canonical destination type for a VecAluOpRRImm5.
+(decl pure vec_alu_rr_dst_type (VecAluOpRR) Type)
+(extern constructor vec_alu_rr_dst_type vec_alu_rr_dst_type)
 
 
 ;; Vector Addressing Mode
@@ -139,9 +164,15 @@
 (rule (element_width_from_type ty)
   (if-let $I32 (lane_type ty))
   (VecElementWidth.E32))
+(rule (element_width_from_type ty)
+  (if-let $F32 (lane_type ty))
+  (VecElementWidth.E32))
 (rule (element_width_from_type ty)
   (if-let $I64 (lane_type ty))
   (VecElementWidth.E64))
+(rule (element_width_from_type ty)
+  (if-let $F64 (lane_type ty))
+  (VecElementWidth.E64))
 
 (decl pure min_vec_reg_size () u64)
 (extern constructor min_vec_reg_size min_vec_reg_size)
@@ -172,6 +203,27 @@
             (_ Unit (emit (MInst.VecAluRRImm5 op vd vs2 imm vstate))))
         vd))
 
+;; Helper for emitting `MInst.VecAluRRImm5` instructions where the immediate
+;; is zero extended instead of sign extended.
+(decl vec_alu_rr_uimm5 (VecAluOpRRImm5 Reg UImm5 VState) Reg)
+(rule (vec_alu_rr_uimm5 op vs2 imm vstate)
+      (vec_alu_rr_imm5 op vs2 (uimm5_bitcast_to_imm5 imm) vstate))
+
+;; Helper for emitting `MInst.VecAluRRImm5` instructions that use the Imm5 as
+;; auxiliary encoding space.
+(decl vec_alu_rr (VecAluOpRR Reg VState) Reg)
+(rule (vec_alu_rr op vs vstate)
+      (let ((vd WritableReg (temp_writable_reg (vec_alu_rr_dst_type op)))
+            (_ Unit (emit (MInst.VecAluRR op vd vs vstate))))
+        vd))
+
+;; Helper for emitting `MInst.VecAluRImm5` instructions.
+(decl vec_alu_r_imm5 (VecAluOpRImm5 Imm5 VState) Reg)
+(rule (vec_alu_r_imm5 op imm vstate)
+      (let ((vd WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.VecAluRImm5 op vd imm vstate))))
+        vd))
+
 ;; Helper for emitting `MInst.VecLoad` instructions.
 (decl vec_load (VecElementWidth VecAMode MemFlags VState) Reg)
 (rule (vec_load eew from flags vstate)
@@ -254,3 +306,70 @@
 (decl rv_vxor_vv (Reg Reg VState) Reg)
 (rule (rv_vxor_vv vs2 vs1 vstate)
   (vec_alu_rrr (VecAluOpRRR.VxorVV) vs2 vs1 vstate))
+
+;; Helper for emitting the `vslidedown.vx` instruction.
+;; `vslidedown` moves all elements in the vector down by n elements.
+;; The top most elements are up to the tail policy.
+(decl rv_vslidedown_vx (Reg Reg VState) Reg)
+(rule (rv_vslidedown_vx vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VslidedownVX) vs2 vs1 vstate))
+
+;; Helper for emitting the `vslidedown.vi` instruction.
+;; Unlike other `vi` instructions the immediate is zero extended.
+(decl rv_vslidedown_vi (Reg UImm5 VState) Reg)
+(rule (rv_vslidedown_vi vs2 imm vstate)
+  (vec_alu_rr_uimm5 (VecAluOpRRImm5.VslidedownVI) vs2 imm vstate))
+
+;; Helper for emitting the `vmv.x.s` instruction.
+;; This instruction copies the first element of the source vector to the destination X register.
+(decl rv_vmv_xs (Reg VState) Reg)
+(rule (rv_vmv_xs vs vstate)
+  (vec_alu_rr (VecAluOpRR.VmvXS) vs vstate))
+
+;; Helper for emitting the `vfmv.f.s` instruction.
+;; This instruction copies the first element of the source vector to the destination F register.
+(decl rv_vfmv_fs (Reg VState) Reg)
+(rule (rv_vfmv_fs vs vstate)
+  (vec_alu_rr (VecAluOpRR.VfmvFS) vs vstate))
+
+;; Helper for emitting the `vmv.v.x` instruction.
+;; This instruction splats the X regsiter into all elements of the destination vector.
+(decl rv_vmv_vx (Reg VState) Reg)
+(rule (rv_vmv_vx vs vstate)
+  (vec_alu_rr (VecAluOpRR.VmvVX) vs vstate))
+
+;; Helper for emitting the `vfmv.v.f` instruction.
+;; This instruction splats the F regsiter into all elements of the destination vector.
+(decl rv_vfmv_vf (Reg VState) Reg)
+(rule (rv_vfmv_vf vs vstate)
+  (vec_alu_rr (VecAluOpRR.VfmvVF) vs vstate))
+
+;; Helper for emitting the `vmv.v.i` instruction.
+;; This instruction splat's the immediate value into all elements of the destination vector.
+(decl rv_vmv_vi (Imm5 VState) Reg)
+(rule (rv_vmv_vi imm vstate)
+  (vec_alu_r_imm5 (VecAluOpRImm5.VmvVI) imm vstate))
+
+;;;; Multi-Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl gen_extractlane (Type Reg u8) Reg)
+
+;; When extracting lane 0 for floats, we can use `vfmv.f.s` directly.
+(rule 3 (gen_extractlane (ty_vec_fits_in_register ty) src 0)
+  (if (ty_vector_float ty))
+  (rv_vfmv_fs src ty))
+
+;; When extracting lane 0 for integers, we can use `vmv.x.s` directly.
+(rule 2 (gen_extractlane (ty_vec_fits_in_register ty) src 0)
+  (if (ty_vector_not_float ty))
+  (rv_vmv_xs src ty))
+
+;; In the general case, we must first use a `vslidedown` to place the correct lane
+;; in index 0, and then use the appropriate `vmv` instruction.
+;; If the index fits into a 5-bit immediate, we can emit a `vslidedown.vi`.
+(rule 1 (gen_extractlane (ty_vec_fits_in_register ty) src (uimm5_from_u8 idx))
+  (gen_extractlane ty (rv_vslidedown_vi src idx ty) 0))
+
+;; Otherwise lower it into an X register.
+(rule 0 (gen_extractlane (ty_vec_fits_in_register ty) src idx)
+  (gen_extractlane ty (rv_vslidedown_vx src (imm $I64 idx) ty) 0))
diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle
index cf30e6e89b0f..7dc69850ebd0 100644
--- a/cranelift/codegen/src/isa/riscv64/lower.isle
+++ b/cranelift/codegen/src/isa/riscv64/lower.isle
@@ -1030,3 +1030,23 @@
 
 (rule (lower (call_indirect sig_ref val inputs))
   (gen_call_indirect sig_ref val inputs))
+
+;;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (extractlane x @ (value_type ty) (u8_from_uimm8 idx)))
+  (gen_extractlane ty x idx))
+
+;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type ty (splat n @ (value_type (ty_scalar_float _)))))
+  (rv_vfmv_vf n ty))
+
+(rule 1 (lower (has_type ty (splat n @ (value_type (ty_int_ref_scalar_64 _)))))
+  (rv_vmv_vx n ty))
+
+(rule 2 (lower (has_type ty (splat (iconst (u64_from_imm64 (imm5_from_u64 imm))))))
+  (rv_vmv_vi imm ty))
+
+;; TODO: We can splat out more patterns by using for example a vmv.v.i i8x16 for
+;; a i64x2 const with a compatible bit pattern. The AArch64 Backend does something
+;; similar in its splat rules.
diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs
index 1094dfce2113..8dc6c6675903 100644
--- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs
@@ -6,6 +6,7 @@ pub mod generated_code;
 use generated_code::{Context, ExtendOp, MInst};
 
 // Types that the generated ISLE code uses via `use super::*`.
+use self::generated_code::VecAluOpRR;
 use super::{writable_zero_reg, zero_reg};
 use crate::isa::riscv64::abi::Riscv64ABICaller;
 use crate::isa::riscv64::Riscv64Backend;
@@ -205,6 +206,14 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend>
         Imm5::maybe_from_i8(i8::try_from(arg0 as i64).ok()?)
     }
     #[inline]
+    fn uimm5_bitcast_to_imm5(&mut self, arg0: UImm5) -> Imm5 {
+        Imm5::from_bits(arg0.bits() as u8)
+    }
+    #[inline]
+    fn uimm5_from_u8(&mut self, arg0: u8) -> Option<UImm5> {
+        UImm5::maybe_from_u8(arg0)
+    }
+    #[inline]
     fn writable_zero_reg(&mut self) -> WritableReg {
         writable_zero_reg()
     }
@@ -455,6 +464,10 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend>
             None
         }
     }
+
+    fn vec_alu_rr_dst_type(&mut self, op: &VecAluOpRR) -> Type {
+        MInst::canonical_type_for_rc(op.dst_regclass())
+    }
 }
 
 /// The main entry point for lowering with ISLE.
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-extractlane.clif b/cranelift/filetests/filetests/isa/riscv64/simd-extractlane.clif
new file mode 100644
index 000000000000..1cd78cd557be
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-extractlane.clif
@@ -0,0 +1,446 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %extractlane_i8x16_idx_0(i8x16) -> i8 {
+block0(v0: i8x16):
+    v1 = extractlane v0, 0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v0,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmv.x.s a0,v0 #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x07, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x25, 0x00, 0x42
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %extractlane_i16x8_idx_0(i16x8) -> i16 {
+block0(v0: i16x8):
+    v1 = extractlane v0, 0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v0,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmv.x.s a0,v0 #avl=8, #vtype=(e16, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x07, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x84, 0xcc
+;   .byte 0x57, 0x25, 0x00, 0x42
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %extractlane_i32x4_idx_0(i32x4) -> i32 {
+block0(v0: i32x4):
+    v1 = extractlane v0, 0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v0,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmv.x.s a0,v0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x07, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x25, 0x00, 0x42
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %extractlane_i64x2_idx_0(i64x2) -> i64 {
+block0(v0: i64x2):
+    v1 = extractlane v0, 0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v0,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmv.x.s a0,v0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x07, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x25, 0x00, 0x42
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %extractlane_f32x4_idx_0(f32x4) -> f32 {
+block0(v0: f32x4):
+    v1 = extractlane v0, 0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v0,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfmv.f.s fa0,v0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x07, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x15, 0x00, 0x42
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %extractlane_f64x2_idx_0(f64x2) -> f64 {
+block0(v0: f64x2):
+    v1 = extractlane v0, 0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v0,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfmv.f.s fa0,v0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x07, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x15, 0x00, 0x42
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %extractlane_i8x16_idx_1(i8x16) -> i8 {
+block0(v0: i8x16):
+    v1 = extractlane v0, 1
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v0,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vslidedown.vi v2,v0,1 #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmv.x.s a0,v2 #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x07, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0xb1, 0x00, 0x3e
+;   .byte 0x57, 0x25, 0x20, 0x42
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %extractlane_i16x8_idx_1(i16x8) -> i16 {
+block0(v0: i16x8):
+    v1 = extractlane v0, 1
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v0,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vslidedown.vi v2,v0,1 #avl=8, #vtype=(e16, m1, ta, ma)
+;   vmv.x.s a0,v2 #avl=8, #vtype=(e16, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x07, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x84, 0xcc
+;   .byte 0x57, 0xb1, 0x00, 0x3e
+;   .byte 0x57, 0x25, 0x20, 0x42
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %extractlane_i32x4_idx_1(i32x4) -> i32 {
+block0(v0: i32x4):
+    v1 = extractlane v0, 1
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v0,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vslidedown.vi v2,v0,1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.x.s a0,v2 #avl=4, #vtype=(e32, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x07, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0xb1, 0x00, 0x3e
+;   .byte 0x57, 0x25, 0x20, 0x42
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %extractlane_i64x2_idx_1(i64x2) -> i64 {
+block0(v0: i64x2):
+    v1 = extractlane v0, 1
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v0,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vslidedown.vi v2,v0,1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.x.s a0,v2 #avl=2, #vtype=(e64, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x07, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0xb1, 0x00, 0x3e
+;   .byte 0x57, 0x25, 0x20, 0x42
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %extractlane_f32x4_idx_1(f32x4) -> f32 {
+block0(v0: f32x4):
+    v1 = extractlane v0, 1
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v0,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vslidedown.vi v2,v0,1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vfmv.f.s fa0,v2 #avl=4, #vtype=(e32, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x07, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0xb1, 0x00, 0x3e
+;   .byte 0x57, 0x15, 0x20, 0x42
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %extractlane_f64x2_idx_1(f64x2) -> f64 {
+block0(v0: f64x2):
+    v1 = extractlane v0, 1
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v0,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vslidedown.vi v2,v0,1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vfmv.f.s fa0,v2 #avl=2, #vtype=(e64, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x07, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0xb1, 0x00, 0x3e
+;   .byte 0x57, 0x15, 0x20, 0x42
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-splat.clif b/cranelift/filetests/filetests/isa/riscv64/simd-splat.clif
new file mode 100644
index 000000000000..af0099034fac
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-splat.clif
@@ -0,0 +1,206 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %splat_i8x16(i8) -> i8x16 {
+block0(v0: i8):
+    v1 = splat.i8x16 v0
+    return v1
+}
+
+; VCode:
+; block0:
+;   vmv.v.x v3,a0 #avl=16, #vtype=(e8, m1, ta, ma)
+;   vse8.v v3,0(a1) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xd7, 0x41, 0x05, 0x5e
+;   .byte 0xa7, 0x81, 0x05, 0x02
+;   ret
+
+function %splat_i16x8(i16) -> i16x8 {
+block0(v0: i16):
+    v1 = splat.i16x8 v0
+    return v1
+}
+
+; VCode:
+; block0:
+;   vmv.v.x v3,a0 #avl=8, #vtype=(e16, m1, ta, ma)
+;   vse8.v v3,0(a1) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   .byte 0x57, 0x70, 0x84, 0xcc
+;   .byte 0xd7, 0x41, 0x05, 0x5e
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x81, 0x05, 0x02
+;   ret
+
+function %splat_i32x4(i32) -> i32x4 {
+block0(v0: i32):
+    v1 = splat.i32x4 v0
+    return v1
+}
+
+; VCode:
+; block0:
+;   vmv.v.x v3,a0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v3,0(a1) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x41, 0x05, 0x5e
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x81, 0x05, 0x02
+;   ret
+
+function %splat_i64x2(i64) -> i64x2 {
+block0(v0: i64):
+    v1 = splat.i64x2 v0
+    return v1
+}
+
+; VCode:
+; block0:
+;   vmv.v.x v3,a0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v3,0(a1) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x41, 0x05, 0x5e
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x81, 0x05, 0x02
+;   ret
+
+function %splat_const_i8x16() -> i8x16 {
+block0:
+    v0 = iconst.i8 2
+    v1 = splat.i8x16 v0
+    return v1
+}
+
+; VCode:
+; block0:
+;   vmv.v.i v2,2 #avl=16, #vtype=(e8, m1, ta, ma)
+;   vse8.v v2,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x57, 0x31, 0x01, 0x5e
+;   .byte 0x27, 0x01, 0x05, 0x02
+;   ret
+
+function %splat_const_i16x8() -> i16x8 {
+block0:
+    v0 = iconst.i16 2
+    v1 = splat.i16x8 v0
+    return v1
+}
+
+; VCode:
+; block0:
+;   vmv.v.i v2,2 #avl=8, #vtype=(e16, m1, ta, ma)
+;   vse8.v v2,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   .byte 0x57, 0x70, 0x84, 0xcc
+;   .byte 0x57, 0x31, 0x01, 0x5e
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x01, 0x05, 0x02
+;   ret
+
+function %splat_const_i32x4() -> i32x4 {
+block0:
+    v0 = iconst.i32 2
+    v1 = splat.i32x4 v0
+    return v1
+}
+
+; VCode:
+; block0:
+;   vmv.v.i v2,2 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v2,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x31, 0x01, 0x5e
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x01, 0x05, 0x02
+;   ret
+
+function %splat_const_i64x2() -> i64x2 {
+block0:
+    v0 = iconst.i64 2
+    v1 = splat.i64x2 v0
+    return v1
+}
+
+; VCode:
+; block0:
+;   vmv.v.i v2,2 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v2,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x31, 0x01, 0x5e
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x01, 0x05, 0x02
+;   ret
+
+function %splat_f32x4(f32) -> f32x4 {
+block0(v0: f32):
+    v1 = splat.f32x4 v0
+    return v1
+}
+
+; VCode:
+; block0:
+;   vfmv.v.f v3,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v3,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x51, 0x05, 0x5e
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x01, 0x05, 0x02
+;   ret
+
+function %splat_f64x2(f64) -> f64x2 {
+block0(v0: f64):
+    v1 = splat.f64x2 v0
+    return v1
+}
+
+; VCode:
+; block0:
+;   vfmv.v.f v3,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v3,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x51, 0x05, 0x5e
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x01, 0x05, 0x02
+;   ret
+
diff --git a/cranelift/filetests/filetests/runtests/simd-extractlane.clif b/cranelift/filetests/filetests/runtests/simd-extractlane.clif
index 016f504edb35..032079c0da0c 100644
--- a/cranelift/filetests/filetests/runtests/simd-extractlane.clif
+++ b/cranelift/filetests/filetests/runtests/simd-extractlane.clif
@@ -8,6 +8,7 @@ target x86_64
 target x86_64 sse41
 target x86_64 sse42
 target x86_64 sse42 has_avx
+target riscv64 has_v
 
 function %extractlane_4(i8x16) -> i8 {
 block0(v0: i8x16):
diff --git a/cranelift/filetests/filetests/runtests/simd-fadd-splat.clif b/cranelift/filetests/filetests/runtests/simd-fadd-splat.clif
new file mode 100644
index 000000000000..4e74ba91b850
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fadd-splat.clif
@@ -0,0 +1,27 @@
+test run
+target aarch64
+target s390x
+target x86_64 ssse3 has_sse41=false
+set enable_simd
+target x86_64
+target x86_64 sse41
+target x86_64 sse42
+target x86_64 sse42 has_avx
+
+function %splat_f32x4_2(f32x4) -> f32x4 {
+block0(v0: f32x4):
+  v1 = f32const 0x1.5
+  v2 = splat.f32x4 v1
+  v3 = fadd v0, v2
+  return v3
+}
+; run: %splat_f32x4_2([0x0.0 NaN 0x1.0 0x2.0]) == [0x1.5 NaN 0x2.5 0x3.5]
+
+function %splat_f64x2_2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+  v1 = f64const 0x7.5
+  v2 = splat.f64x2 v1
+  v3 = fadd v0, v2
+  return v3
+}
+; run: %splat_f64x2_2([0x0.0 0x1.0]) == [0x7.5 0x8.5]
\ No newline at end of file
diff --git a/cranelift/filetests/filetests/runtests/simd-splat.clif b/cranelift/filetests/filetests/runtests/simd-splat.clif
index de2b49fd4adb..c07ef8833c10 100644
--- a/cranelift/filetests/filetests/runtests/simd-splat.clif
+++ b/cranelift/filetests/filetests/runtests/simd-splat.clif
@@ -6,6 +6,7 @@ set enable_simd
 target x86_64 has_sse3 has_ssse3 has_sse41
 target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
 target x86_64 has_sse3 has_ssse3 has_sse41 has_avx has_avx2
+target riscv64 has_v
 
 function %splat_i8x16(i8) -> i8x16 {
 block0(v0: i8):
@@ -127,24 +128,6 @@ block0(v0: i64x2):
 }
 ; run: %splat_i64x2_2([-1 0]) == [-2 -1]
 
-function %splat_f32x4_2(f32x4) -> f32x4 {
-block0(v0: f32x4):
-  v1 = f32const 0x1.5
-  v2 = splat.f32x4 v1
-  v3 = fadd v0, v2
-  return v3
-}
-; run: %splat_f32x4_2([0x0.0 NaN 0x1.0 0x2.0]) == [0x1.5 NaN 0x2.5 0x3.5]
-
-function %splat_f64x2_2(f64x2) -> f64x2 {
-block0(v0: f64x2):
-  v1 = f64const 0x7.5
-  v2 = splat.f64x2 v1
-  v3 = fadd v0, v2
-  return v3
-}
-; run: %splat_f64x2_2([0x0.0 0x1.0]) == [0x7.5 0x8.5]
-
 function %load_splat_i8x16(i8) -> i8x16 {
     ss0 = explicit_slot 8