diff --git a/build.rs b/build.rs
index 6cdb1f3a80f0..4ffc8894c58b 100644
--- a/build.rs
+++ b/build.rs
@@ -259,8 +259,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
                 "cvt_from_uint",
                 "issue_3327_bnot_lowering",
                 "simd_conversions",
-                "simd_f32x4_rounding",
-                "simd_f64x2_rounding",
                 "simd_i32x4_trunc_sat_f32x4",
                 "simd_i32x4_trunc_sat_f64x2",
                 "simd_load",
diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle
index 5a489ee812f0..624579eef987 100644
--- a/cranelift/codegen/src/isa/riscv64/inst.isle
+++ b/cranelift/codegen/src/isa/riscv64/inst.isle
@@ -59,6 +59,20 @@
       (rs Reg)
       (imm12 Imm12))
 
+    ;; A CSR Reading or Writing instruction with a register source and a register destination.
+    (CsrReg
+      (op CsrRegOP)
+      (rd WritableReg)
+      (rs Reg)
+      (csr CSR))
+
+    ;; A CSR Writing instruction with an immediate source and a register destination.
+    (CsrImm
+      (op CsrImmOP)
+      (rd WritableReg)
+      (imm UImm5)
+      (csr CSR))
+
     ;; An load
     (Load
       (rd WritableReg)
@@ -689,6 +703,30 @@
   (Bseti)
 ))
 
+(type CsrRegOP (enum
+  ;; Atomic Read/Write CSR
+  (CsrRW)
+  ;; Atomic Read and Set Bits in CSR
+  (CsrRS)
+  ;; Atomic Read and Clear Bits in CSR
+  (CsrRC)
+))
+
+(type CsrImmOP (enum
+  ;; Atomic Read/Write CSR (Immediate Source)
+  (CsrRWI)
+  ;; Atomic Read and Set Bits in CSR (Immediate Source)
+  (CsrRSI)
+  ;; Atomic Read and Clear Bits in CSR (Immediate Source)
+  (CsrRCI)
+))
+
+;; Enum of the known CSR registers
+(type CSR (enum
+  ;; Floating-Point Dynamic Rounding Mode
+  (Frm)
+))
+
 
 (type FRM (enum
   ;; Round to Nearest, ties to Even
@@ -706,6 +744,10 @@
   (Fcsr)
 ))
 
+(decl pure frm_bits (FRM) UImm5)
+(extern constructor frm_bits frm_bits)
+(convert FRM UImm5 frm_bits)
+
 (type FFlagsException (enum
   ;; Invalid Operation
   (NV)
@@ -1508,6 +1550,30 @@
   (alu_rrr (AluOPRRR.Packw) rs1 rs2))
 
 
+;; `Zicsr` Extension Instructions
+
+;; Helper for emitting the `csrrwi` instruction.
+(decl rv_csrrwi (CSR UImm5) XReg)
+(rule (rv_csrrwi csr imm)
+  (csr_imm (CsrImmOP.CsrRWI) csr imm))
+
+;; This is a special case of `csrrwi` when the CSR is the `frm` CSR.
+(decl rv_fsrmi (FRM) XReg)
+(rule (rv_fsrmi frm) (rv_csrrwi (CSR.Frm) frm))
+
+
+;; Helper for emitting the `csrw` instruction. This is a special case of
+;; `csrrw` where the destination register is always `x0`.
+(decl rv_csrw (CSR XReg) Unit)
+(rule (rv_csrw csr rs)
+  (csr_reg_dst_zero (CsrRegOP.CsrRW) csr rs))
+
+;; This is a special case of `csrw` when the CSR is the `frm` CSR.
+(decl rv_fsrm (XReg) Unit)
+(rule (rv_fsrm rs) (rv_csrw (CSR.Frm) rs))
+
+
+
 
 
 ;; Generate a mask for the bit-width of the given type
@@ -1686,7 +1752,6 @@
             (_ Unit (emit (MInst.FpuRRR op (gen_default_frm) dst src1 src2))))
         dst))
 
-
 ;; Helper for emitting `MInst.FpuRRRR` instructions.
 (decl fpu_rrrr (FpuOPRRRR Type Reg Reg Reg) Reg)
 (rule (fpu_rrrr op ty src1 src2 src3)
@@ -1710,7 +1775,6 @@
             (_ Unit (emit (MInst.AluRRImm12 op dst src (imm12_zero)))))
         dst))
 
-
 ;; Helper for emitting the `Lui` instruction.
 ;; TODO: This should be something like `emit_u_type`. And should share the
 ;; `MInst` with `auipc` since these instructions share the U-Type format.
@@ -1720,6 +1784,18 @@
             (_ Unit (emit (MInst.Lui dst imm))))
         dst))
 
+;; Helper for emitting `MInst.CsrImm` instructions.
+(decl csr_imm (CsrImmOP CSR UImm5) XReg)
+(rule (csr_imm op csr imm)
+      (let ((dst WritableXReg (temp_writable_xreg))
+            (_ Unit (emit (MInst.CsrImm op dst imm csr))))
+        dst))
+
+;; Helper for emitting a `MInst.CsrReg` instruction that writes the result to x0.
+(decl csr_reg_dst_zero (CsrRegOP CSR XReg) Unit)
+(rule (csr_reg_dst_zero op csr rs)
+      (emit (MInst.CsrReg op (writable_zero_reg) rs csr)))
+
 
 
 (decl select_addi (Type) AluOPRRI)
diff --git a/cranelift/codegen/src/isa/riscv64/inst/args.rs b/cranelift/codegen/src/isa/riscv64/inst/args.rs
index 46827fab1e3a..40ebbc94fd1f 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/args.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/args.rs
@@ -1810,3 +1810,79 @@ pub(crate) fn f64_cvt_to_int_bounds(signed: bool, out_bits: u8) -> (f64, f64) {
         _ => unreachable!(),
     }
 }
+
+impl CsrRegOP {
+    pub(crate) fn funct3(self) -> u32 {
+        match self {
+            CsrRegOP::CsrRW => 0b001,
+            CsrRegOP::CsrRS => 0b010,
+            CsrRegOP::CsrRC => 0b011,
+        }
+    }
+
+    pub(crate) fn opcode(self) -> u32 {
+        0b1110011
+    }
+
+    pub(crate) fn name(self) -> &'static str {
+        match self {
+            CsrRegOP::CsrRW => "csrrw",
+            CsrRegOP::CsrRS => "csrrs",
+            CsrRegOP::CsrRC => "csrrc",
+        }
+    }
+}
+
+impl Display for CsrRegOP {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.name())
+    }
+}
+
+impl CsrImmOP {
+    pub(crate) fn funct3(self) -> u32 {
+        match self {
+            CsrImmOP::CsrRWI => 0b101,
+            CsrImmOP::CsrRSI => 0b110,
+            CsrImmOP::CsrRCI => 0b111,
+        }
+    }
+
+    pub(crate) fn opcode(self) -> u32 {
+        0b1110011
+    }
+
+    pub(crate) fn name(self) -> &'static str {
+        match self {
+            CsrImmOP::CsrRWI => "csrrwi",
+            CsrImmOP::CsrRSI => "csrrsi",
+            CsrImmOP::CsrRCI => "csrrci",
+        }
+    }
+}
+
+impl Display for CsrImmOP {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.name())
+    }
+}
+
+impl CSR {
+    pub(crate) fn bits(self) -> Imm12 {
+        Imm12::from_bits(match self {
+            CSR::Frm => 0x0002,
+        })
+    }
+
+    pub(crate) fn name(self) -> &'static str {
+        match self {
+            CSR::Frm => "frm",
+        }
+    }
+}
+
+impl Display for CSR {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.name())
+    }
+}
diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs
index f86569b24323..10ab5649412f 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs
@@ -331,6 +331,8 @@ impl Inst {
             | Inst::AluRRR { .. }
             | Inst::FpuRRR { .. }
             | Inst::AluRRImm12 { .. }
+            | Inst::CsrReg { .. }
+            | Inst::CsrImm { .. }
             | Inst::Load { .. }
             | Inst::Store { .. }
             | Inst::Args { .. }
@@ -595,6 +597,17 @@ impl MachInstEmit for Inst {
                     | alu_op.imm12(imm12) << 20;
                 sink.put4(x);
             }
+            &Inst::CsrReg { op, rd, rs, csr } => {
+                let rs = allocs.next(rs);
+                let rd = allocs.next_writable(rd);
+
+                sink.put4(encode_csr_reg(op, rd, rs, csr));
+            }
+            &Inst::CsrImm { op, rd, csr, imm } => {
+                let rd = allocs.next_writable(rd);
+
+                sink.put4(encode_csr_imm(op, rd, csr, imm));
+            }
             &Inst::Load {
                 rd,
                 op,
diff --git a/cranelift/codegen/src/isa/riscv64/inst/encode.rs b/cranelift/codegen/src/isa/riscv64/inst/encode.rs
index cff00cecabbf..b1e17c57a22d 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/encode.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/encode.rs
@@ -6,7 +6,7 @@
 //! Some instructions especially in extensions have slight variations from
 //! the base RISC-V specification.
 
-use super::{Imm12, Imm5, UImm5, VType};
+use super::*;
 use crate::isa::riscv64::inst::reg_to_gpr_num;
 use crate::isa::riscv64::lower::isle::generated_code::{
     VecAluOpRImm5, VecAluOpRR, VecAluOpRRImm5, VecAluOpRRR, VecAluOpRRRImm5, VecAluOpRRRR,
@@ -53,21 +53,30 @@ pub fn encode_r_type(
     )
 }
 
-/// Encode an I-type instruction.
-///
 /// Layout:
 /// 0-------6-7-------11-12------14-15------19-20------------------31
 /// | Opcode |   rd     |  width   |   rs1    |     Offset[11:0]    |
-pub fn encode_i_type(opcode: u32, rd: WritableReg, width: u32, rs1: Reg, offset: Imm12) -> u32 {
+fn encode_i_type_bits(opcode: u32, rd: u32, funct3: u32, rs1: u32, offset: u32) -> u32 {
     let mut bits = 0;
     bits |= unsigned_field_width(opcode, 7);
-    bits |= reg_to_gpr_num(rd.to_reg()) << 7;
-    bits |= unsigned_field_width(width, 3) << 12;
-    bits |= reg_to_gpr_num(rs1) << 15;
-    bits |= unsigned_field_width(offset.as_u32(), 12) << 20;
+    bits |= unsigned_field_width(rd, 5) << 7;
+    bits |= unsigned_field_width(funct3, 3) << 12;
+    bits |= unsigned_field_width(rs1, 5) << 15;
+    bits |= unsigned_field_width(offset, 12) << 20;
     bits
 }
 
+/// Encode an I-type instruction.
+pub fn encode_i_type(opcode: u32, rd: WritableReg, width: u32, rs1: Reg, offset: Imm12) -> u32 {
+    encode_i_type_bits(
+        opcode,
+        reg_to_gpr_num(rd.to_reg()),
+        width,
+        reg_to_gpr_num(rs1),
+        offset.as_u32(),
+    )
+}
+
 /// Encode an S-type instruction.
 ///
 /// Layout:
@@ -297,3 +306,21 @@ pub fn encode_vmem_store(
     // with different names on the fields.
     encode_vmem_load(opcode, vs3, width, rs1, sumop, masking, mop, nf)
 }
+
+// The CSR Reg instruction is really just an I type instruction with the CSR in
+// the immediate field.
+pub fn encode_csr_reg(op: CsrRegOP, rd: WritableReg, rs: Reg, csr: CSR) -> u32 {
+    encode_i_type(op.opcode(), rd, op.funct3(), rs, csr.bits())
+}
+
+// The CSR Imm instruction is an I type instruction with the CSR in
+// the immediate field and the value to be set in the `rs1` field.
+pub fn encode_csr_imm(op: CsrImmOP, rd: WritableReg, csr: CSR, imm: UImm5) -> u32 {
+    encode_i_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(rd.to_reg()),
+        op.funct3(),
+        imm.bits(),
+        csr.bits().as_u32(),
+    )
+}
diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
index 6a136d9b6a74..d0378a4aa15b 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
@@ -55,8 +55,9 @@ pub(crate) type VecWritableReg = Vec<Writable<Reg>>;
 // Instructions (top level): definition
 
 pub use crate::isa::riscv64::lower::isle::generated_code::{
-    AluOPRRI, AluOPRRR, AtomicOP, FClassResult, FFlagsException, FloatRoundOP, FloatSelectOP,
-    FpuOPRR, FpuOPRRR, FpuOPRRRR, IntSelectOP, LoadOP, MInst as Inst, StoreOP, FRM,
+    AluOPRRI, AluOPRRR, AtomicOP, CsrImmOP, CsrRegOP, FClassResult, FFlagsException, FloatRoundOP,
+    FloatSelectOP, FpuOPRR, FpuOPRRR, FpuOPRRRR, IntSelectOP, LoadOP, MInst as Inst, StoreOP, CSR,
+    FRM,
 };
 use crate::isa::riscv64::lower::isle::generated_code::{MInst, VecAluOpRRImm5, VecAluOpRRR};
 
@@ -399,6 +400,13 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
             collector.reg_use(rs);
             collector.reg_def(rd);
         }
+        &Inst::CsrReg { rd, rs, .. } => {
+            collector.reg_use(rs);
+            collector.reg_def(rd);
+        }
+        &Inst::CsrImm { rd, .. } => {
+            collector.reg_def(rd);
+        }
         &Inst::Load { rd, from, .. } => {
             if let Some(r) = from.get_allocatable_register() {
                 collector.reg_use(r);
@@ -1512,6 +1520,31 @@ impl Inst {
                     }
                 }
             }
+            &Inst::CsrReg { op, rd, rs, csr } => {
+                let rs_s = format_reg(rs, allocs);
+                let rd_s = format_reg(rd.to_reg(), allocs);
+
+                match (op, csr, rd) {
+                    (CsrRegOP::CsrRW, CSR::Frm, rd) if rd.to_reg() == zero_reg() => {
+                        format!("fsrm {rs_s}")
+                    }
+                    _ => {
+                        format!("{op} {rd_s},{csr},{rs_s}")
+                    }
+                }
+            }
+            &Inst::CsrImm { op, rd, csr, imm } => {
+                let rd_s = format_reg(rd.to_reg(), allocs);
+
+                match (op, csr, rd) {
+                    (CsrImmOP::CsrRWI, CSR::Frm, rd) if rd.to_reg() != zero_reg() => {
+                        format!("fsrmi {rd_s},{imm}")
+                    }
+                    _ => {
+                        format!("{op} {rd_s},{csr},{imm}")
+                    }
+                }
+            }
             &Inst::Load {
                 rd,
                 op,
diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs
index 8124a37c05af..ec3e99fd7390 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs
@@ -752,6 +752,12 @@ impl VecAluOpRR {
             | VecAluOpRR::VsextVF8 => 0b010010,
             VecAluOpRR::VfsqrtV => 0b010011,
             VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0b010111,
+            VecAluOpRR::VfcvtxufV
+            | VecAluOpRR::VfcvtxfV
+            | VecAluOpRR::VfcvtrtzxufV
+            | VecAluOpRR::VfcvtrtzxfV
+            | VecAluOpRR::VfcvtfxuV
+            | VecAluOpRR::VfcvtfxV => 0b010010,
         }
     }
 
@@ -766,7 +772,14 @@ impl VecAluOpRR {
             | VecAluOpRR::VsextVF4
             | VecAluOpRR::VsextVF8 => VecOpCategory::OPMVV,
             VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => VecOpCategory::OPFVF,
-            VecAluOpRR::VfmvFS | VecAluOpRR::VfsqrtV => VecOpCategory::OPFVV,
+            VecAluOpRR::VfmvFS
+            | VecAluOpRR::VfsqrtV
+            | VecAluOpRR::VfcvtxufV
+            | VecAluOpRR::VfcvtxfV
+            | VecAluOpRR::VfcvtrtzxufV
+            | VecAluOpRR::VfcvtrtzxfV
+            | VecAluOpRR::VfcvtfxuV
+            | VecAluOpRR::VfcvtfxV => VecOpCategory::OPFVV,
             VecAluOpRR::VmvVV => VecOpCategory::OPIVV,
             VecAluOpRR::VmvVX => VecOpCategory::OPIVX,
         }
@@ -792,6 +805,13 @@ impl VecAluOpRR {
             VecAluOpRR::VsextVF4 => 0b00101,
             VecAluOpRR::VzextVF2 => 0b00110,
             VecAluOpRR::VsextVF2 => 0b00111,
+            // VFUNARY0
+            VecAluOpRR::VfcvtxufV => 0b00000,
+            VecAluOpRR::VfcvtxfV => 0b00001,
+            VecAluOpRR::VfcvtrtzxufV => 0b00110,
+            VecAluOpRR::VfcvtrtzxfV => 0b00111,
+            VecAluOpRR::VfcvtfxuV => 0b00010,
+            VecAluOpRR::VfcvtfxV => 0b00011,
             // These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states:
             // > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved.
             VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0,
@@ -811,7 +831,13 @@ impl VecAluOpRR {
             | VecAluOpRR::VzextVF8
             | VecAluOpRR::VsextVF2
             | VecAluOpRR::VsextVF4
-            | VecAluOpRR::VsextVF8 => true,
+            | VecAluOpRR::VsextVF8
+            | VecAluOpRR::VfcvtxufV
+            | VecAluOpRR::VfcvtxfV
+            | VecAluOpRR::VfcvtrtzxufV
+            | VecAluOpRR::VfcvtrtzxfV
+            | VecAluOpRR::VfcvtfxuV
+            | VecAluOpRR::VfcvtfxV => true,
             VecAluOpRR::VmvSX
             | VecAluOpRR::VfmvSF
             | VecAluOpRR::VmvVV
@@ -833,7 +859,13 @@ impl VecAluOpRR {
             | VecAluOpRR::VzextVF8
             | VecAluOpRR::VsextVF2
             | VecAluOpRR::VsextVF4
-            | VecAluOpRR::VsextVF8 => RegClass::Vector,
+            | VecAluOpRR::VsextVF8
+            | VecAluOpRR::VfcvtxufV
+            | VecAluOpRR::VfcvtxfV
+            | VecAluOpRR::VfcvtrtzxufV
+            | VecAluOpRR::VfcvtrtzxfV
+            | VecAluOpRR::VfcvtfxuV
+            | VecAluOpRR::VfcvtfxV => RegClass::Vector,
             VecAluOpRR::VmvXS => RegClass::Int,
             VecAluOpRR::VfmvFS => RegClass::Float,
         }
@@ -850,7 +882,13 @@ impl VecAluOpRR {
             | VecAluOpRR::VzextVF8
             | VecAluOpRR::VsextVF2
             | VecAluOpRR::VsextVF4
-            | VecAluOpRR::VsextVF8 => RegClass::Vector,
+            | VecAluOpRR::VsextVF8
+            | VecAluOpRR::VfcvtxufV
+            | VecAluOpRR::VfcvtxfV
+            | VecAluOpRR::VfcvtrtzxufV
+            | VecAluOpRR::VfcvtrtzxfV
+            | VecAluOpRR::VfcvtfxuV
+            | VecAluOpRR::VfcvtfxV => RegClass::Vector,
             VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => RegClass::Float,
             VecAluOpRR::VmvSX | VecAluOpRR::VmvVX => RegClass::Int,
         }
@@ -887,6 +925,12 @@ impl fmt::Display for VecAluOpRR {
             VecAluOpRR::VmvVV => "vmv.v.v",
             VecAluOpRR::VmvVX => "vmv.v.x",
             VecAluOpRR::VfmvVF => "vfmv.v.f",
+            VecAluOpRR::VfcvtxufV => "vfcvt.xu.f.v",
+            VecAluOpRR::VfcvtxfV => "vfcvt.x.f.v",
+            VecAluOpRR::VfcvtrtzxufV => "vfcvt.rtz.xu.f.v",
+            VecAluOpRR::VfcvtrtzxfV => "vfcvt.rtz.x.f.v",
+            VecAluOpRR::VfcvtfxuV => "vfcvt.f.xu.v",
+            VecAluOpRR::VfcvtfxV => "vfcvt.f.x.v",
         })
     }
 }
diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle
index dc0618aec1ed..63db6c751146 100644
--- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle
+++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle
@@ -285,6 +285,12 @@
   (VzextVF2)
   (VzextVF4)
   (VzextVF8)
+  (VfcvtxufV)
+  (VfcvtxfV)
+  (VfcvtrtzxufV)
+  (VfcvtrtzxfV)
+  (VfcvtfxuV)
+  (VfcvtfxV)
 ))
 
 ;; Returns the canonical destination type for a VecAluOpRRImm5.
@@ -1014,6 +1020,46 @@
 (rule (rv_vfsqrt_v vs mask vstate)
   (vec_alu_rr (VecAluOpRR.VfsqrtV) vs mask vstate))
 
+;; Helper for emitting the `vfcvt.xu.f.v` instruction.
+;; This instruction converts a float to an unsigned integer.
+(decl rv_vfcvt_xu_f_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfcvt_xu_f_v vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VfcvtxufV) vs mask vstate))
+
+;; Helper for emitting the `vfcvt.x.f.v` instruction.
+;; This instruction converts a float to a signed integer.
+(decl rv_vfcvt_x_f_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfcvt_x_f_v vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VfcvtxfV) vs mask vstate))
+
+;; Helper for emitting the `vfcvt.rtz.xu.f.v` instruction.
+;; This instruction converts a float to an unsigned integer
+;; using the Round to Zero (RTZ) rounding mode and ignoring
+;; the currently set FRM rounding mode.
+(decl rv_vfcvt_rtz_xu_f_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfcvt_rtz_xu_f_v vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VfcvtrtzxufV) vs mask vstate))
+
+;; Helper for emitting the `vfcvt.rtz.x.f.v` instruction.
+;; This instruction converts a float to a signed integer.
+;; using the Round to Zero (RTZ) rounding mode and ignoring
+;; the currently set FRM rounding mode.
+(decl rv_vfcvt_rtz_x_f_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfcvt_rtz_x_f_v vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VfcvtrtzxfV) vs mask vstate))
+
+;; Helper for emitting the `vfcvt.f.xu.v` instruction.
+;; This instruction converts a unsigned integer to a float.
+(decl rv_vfcvt_f_xu_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfcvt_f_xu_v vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VfcvtfxuV) vs mask vstate))
+
+;; Helper for emitting the `vfcvt.x.f.v` instruction.
+;; This instruction converts a signed integer to a float.
+(decl rv_vfcvt_f_x_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfcvt_f_x_v vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VfcvtfxV) vs mask vstate))
+
 ;; Helper for emitting the `vslidedown.vx` instruction.
 ;; `vslidedown` moves all elements in the vector down by n elements.
 ;; The top most elements are up to the tail policy.
@@ -1771,3 +1817,57 @@
 
 (rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrLessThanOrEqual) x y)
   (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.GreaterThan) x y) ty))
+
+
+;; Emits a `vfcvt.x.f.v` instruction with the given rounding mode.
+(decl gen_vfcvt_x_f (VReg FRM VState) VReg)
+
+;; We have a special instruction for RTZ
+(rule 1 (gen_vfcvt_x_f x (FRM.RTZ) vstate)
+  (rv_vfcvt_rtz_x_f_v x (unmasked) vstate))
+
+;; In the general case we need to first switch into the appropriate rounding mode.
+(rule 0 (gen_vfcvt_x_f x frm vstate)
+  (let (;; Set the rounding mode and save the current mode
+        (saved_frm XReg (rv_fsrmi frm))
+        (res VReg (rv_vfcvt_x_f_v x (unmasked) vstate))
+        ;; Restore the previous rounding mode
+        (_ Unit (rv_fsrm saved_frm)))
+    res))
+
+
+;; Retruns the maximum value integer value that can be represented by a float
+(decl float_int_max (Type) u64)
+(rule (float_int_max $F32) 0x4B000000)
+(rule (float_int_max $F64) 0x4330000000000000)
+
+;; Builds the instruction sequence to round a vector register to FRM
+(decl gen_vec_round (VReg FRM Type) VReg)
+
+;; For floating-point round operations, if the input is NaN, +/-infinity, or +/-0, the
+;; same input is returned as the rounded result; this differs from behavior of
+;; RISCV fcvt instructions (which round out-of-range values to the nearest
+;; max or min value), therefore special handling is needed for these values.
+(rule (gen_vec_round x frm (ty_vec_fits_in_register ty))
+  (let ((scalar_ty Type (lane_type ty))
+        ;; if x is NaN/+-Infinity/+-Zero or if the exponent is larger than # of bits
+        ;; in mantissa, the result is the same as src, build a mask for those cases.
+        ;; (There is an additional fixup for NaN's at the end)
+        (abs VReg (rv_vfabs_v x (unmasked) ty))
+        (max FReg (imm scalar_ty (float_int_max scalar_ty)))
+        (exact VReg (rv_vmflt_vf abs max (unmasked) ty))
+
+        ;; The rounding is performed by converting from float to integer, with the
+        ;; desired rounding mode. And then converting back with the default rounding
+        ;; mode.
+        (int VReg (gen_vfcvt_x_f x frm ty))
+        (cvt VReg (rv_vfcvt_f_x_v int (unmasked) ty))
+        ;; Copy the sign bit from the original value.
+        (signed VReg (rv_vfsgnj_vv cvt x (unmasked) ty))
+
+        ;; We want to return a arithmetic nan if the input is a canonical nan.
+        ;; Convert them by adding 0.0 to the input.
+        (float_zero FReg (gen_bitcast (zero_reg) (float_int_of_same_size scalar_ty) scalar_ty))
+        (corrected_nan VReg (rv_vfadd_vf x float_zero (unmasked) ty)))
+    ;; Merge the original value if it does not need rounding, or the rounded value
+    (rv_vmerge_vvm corrected_nan signed exact ty)))
diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle
index 66aa3abd5259..4e991a75459c 100644
--- a/cranelift/codegen/src/isa/riscv64/lower.isle
+++ b/cranelift/codegen/src/isa/riscv64/lower.isle
@@ -1652,25 +1652,33 @@
    (gen_bitcast v in_ty out_ty))
 
 ;;;;;  Rules for `ceil`;;;;;;;;;
-(rule
-  (lower (has_type ty (ceil x)))
-  (gen_float_round (FloatRoundOP.Ceil) x ty)
-)
+(rule 0 (lower (has_type (ty_scalar_float ty) (ceil x)))
+  (gen_float_round (FloatRoundOP.Ceil) x ty))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (ceil x)))
+  (gen_vec_round x (FRM.RUP) ty))
 
 ;;;;;  Rules for `floor`;;;;;;;;;
-(rule
-  (lower (has_type ty (floor x)))
+(rule 0 (lower (has_type (ty_scalar_float ty) (floor x)))
   (gen_float_round (FloatRoundOP.Floor) x ty))
+
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (floor x)))
+  (gen_vec_round x (FRM.RDN) ty))
+
 ;;;;;  Rules for `trunc`;;;;;;;;;
-(rule
-  (lower (has_type ty (trunc x)))
+(rule 0 (lower (has_type (ty_scalar_float ty) (trunc x)))
   (gen_float_round (FloatRoundOP.Trunc) x ty))
 
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (trunc x)))
+  (gen_vec_round x (FRM.RTZ) ty))
+
 ;;;;;  Rules for `nearest`;;;;;;;;;
-(rule
-  (lower (has_type ty (nearest x)))
+(rule 0 (lower (has_type (ty_scalar_float ty) (nearest x)))
   (gen_float_round (FloatRoundOP.Nearest) x ty))
 
+(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (nearest x)))
+  (gen_vec_round x (FRM.RNE) ty))
+
 
 ;;;;;  Rules for `select_spectre_guard`;;;;;;;;;
 
diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs
index 4ce9a4c49836..81ecba07d7c4 100644
--- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs
@@ -358,6 +358,11 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend>
     fn gen_default_frm(&mut self) -> OptionFloatRoundingMode {
         None
     }
+
+    fn frm_bits(&mut self, frm: &FRM) -> UImm5 {
+        UImm5::maybe_from_u8(frm.bits()).unwrap()
+    }
+
     fn gen_select_reg(&mut self, cc: &IntCC, a: XReg, b: XReg, rs1: Reg, rs2: Reg) -> Reg {
         let rd = self.temp_writable_reg(MInst::canonical_type_for_rc(rs1.class()));
         self.emit(&MInst::SelectReg {
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-ceil.clif b/cranelift/filetests/filetests/isa/riscv64/simd-ceil.clif
new file mode 100644
index 000000000000..3cc3b9b21a46
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-ceil.clif
@@ -0,0 +1,131 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+
+function %ceil_f32x4(f32x4) -> f32x4 {
+block0(v0: f32x4):
+    v1 = ceil v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfabs.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   lui a3,307200
+;   fmv.w.x fa0,a3
+;   vmflt.vf v0,v4,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   fsrmi t4,3
+;   vfcvt.x.f.v v14,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   fsrm t4
+;   vfcvt.f.x.v v17,v14 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vfsgnj.vv v19,v17,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   fmv.w.x ft1,zero
+;   vfadd.vf v23,v1,ft1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vvm v25,v23,v19,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v25,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x92, 0x10, 0x2a
+;   lui a3, 0x4b000
+;   fmv.w.x fa0, a3
+;   .byte 0x57, 0x50, 0x45, 0x6e
+;   fsrmi t4, 3
+;   .byte 0x57, 0x97, 0x10, 0x4a
+;   fsrm t4
+;   .byte 0xd7, 0x98, 0xe1, 0x4a
+;   .byte 0xd7, 0x99, 0x10, 0x23
+;   fmv.w.x ft1, zero
+;   .byte 0xd7, 0xdb, 0x10, 0x02
+;   .byte 0xd7, 0x8c, 0x79, 0x5d
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x0c, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %ceil_f64x2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+    v1 = ceil v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfabs.v v4,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   ld a3,[const(0)]
+;   fmv.d.x fa0,a3
+;   vmflt.vf v0,v4,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   fsrmi t4,3
+;   vfcvt.x.f.v v14,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   fsrm t4
+;   vfcvt.f.x.v v17,v14 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vfsgnj.vv v19,v17,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   fmv.d.x ft1,zero
+;   vfadd.vf v23,v1,ft1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vvm v25,v23,v19,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v25,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x92, 0x10, 0x2a
+;   auipc t6, 0
+;   addi t6, t6, 0x4c
+;   ld a3, 0(t6)
+;   fmv.d.x fa0, a3
+;   .byte 0x57, 0x50, 0x45, 0x6e
+;   fsrmi t4, 3
+;   .byte 0x57, 0x97, 0x10, 0x4a
+;   fsrm t4
+;   .byte 0xd7, 0x98, 0xe1, 0x4a
+;   .byte 0xd7, 0x99, 0x10, 0x23
+;   fmv.d.x ft1, zero
+;   .byte 0xd7, 0xdb, 0x10, 0x02
+;   .byte 0xd7, 0x8c, 0x79, 0x5d
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x0c, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x00, 0x30, 0x43
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-floor.clif b/cranelift/filetests/filetests/isa/riscv64/simd-floor.clif
new file mode 100644
index 000000000000..6ecdcc7c88f0
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-floor.clif
@@ -0,0 +1,131 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+
+function %floor_f32x4(f32x4) -> f32x4 {
+block0(v0: f32x4):
+    v1 = floor v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfabs.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   lui a3,307200
+;   fmv.w.x fa0,a3
+;   vmflt.vf v0,v4,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   fsrmi t4,2
+;   vfcvt.x.f.v v14,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   fsrm t4
+;   vfcvt.f.x.v v17,v14 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vfsgnj.vv v19,v17,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   fmv.w.x ft1,zero
+;   vfadd.vf v23,v1,ft1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vvm v25,v23,v19,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v25,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x92, 0x10, 0x2a
+;   lui a3, 0x4b000
+;   fmv.w.x fa0, a3
+;   .byte 0x57, 0x50, 0x45, 0x6e
+;   fsrmi t4, 2
+;   .byte 0x57, 0x97, 0x10, 0x4a
+;   fsrm t4
+;   .byte 0xd7, 0x98, 0xe1, 0x4a
+;   .byte 0xd7, 0x99, 0x10, 0x23
+;   fmv.w.x ft1, zero
+;   .byte 0xd7, 0xdb, 0x10, 0x02
+;   .byte 0xd7, 0x8c, 0x79, 0x5d
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x0c, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %floor_f64x2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+    v1 = floor v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfabs.v v4,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   ld a3,[const(0)]
+;   fmv.d.x fa0,a3
+;   vmflt.vf v0,v4,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   fsrmi t4,2
+;   vfcvt.x.f.v v14,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   fsrm t4
+;   vfcvt.f.x.v v17,v14 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vfsgnj.vv v19,v17,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   fmv.d.x ft1,zero
+;   vfadd.vf v23,v1,ft1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vvm v25,v23,v19,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v25,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x92, 0x10, 0x2a
+;   auipc t6, 0
+;   addi t6, t6, 0x4c
+;   ld a3, 0(t6)
+;   fmv.d.x fa0, a3
+;   .byte 0x57, 0x50, 0x45, 0x6e
+;   fsrmi t4, 2
+;   .byte 0x57, 0x97, 0x10, 0x4a
+;   fsrm t4
+;   .byte 0xd7, 0x98, 0xe1, 0x4a
+;   .byte 0xd7, 0x99, 0x10, 0x23
+;   fmv.d.x ft1, zero
+;   .byte 0xd7, 0xdb, 0x10, 0x02
+;   .byte 0xd7, 0x8c, 0x79, 0x5d
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x0c, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x00, 0x30, 0x43
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-nearest.clif b/cranelift/filetests/filetests/isa/riscv64/simd-nearest.clif
new file mode 100644
index 000000000000..359fdf6a1c90
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-nearest.clif
@@ -0,0 +1,131 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+
+function %nearest_f32x4(f32x4) -> f32x4 {
+block0(v0: f32x4):
+    v1 = nearest v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfabs.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   lui a3,307200
+;   fmv.w.x fa0,a3
+;   vmflt.vf v0,v4,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   fsrmi t4,0
+;   vfcvt.x.f.v v14,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   fsrm t4
+;   vfcvt.f.x.v v17,v14 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vfsgnj.vv v19,v17,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   fmv.w.x ft1,zero
+;   vfadd.vf v23,v1,ft1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vvm v25,v23,v19,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v25,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x92, 0x10, 0x2a
+;   lui a3, 0x4b000
+;   fmv.w.x fa0, a3
+;   .byte 0x57, 0x50, 0x45, 0x6e
+;   fsrmi t4, 0
+;   .byte 0x57, 0x97, 0x10, 0x4a
+;   fsrm t4
+;   .byte 0xd7, 0x98, 0xe1, 0x4a
+;   .byte 0xd7, 0x99, 0x10, 0x23
+;   fmv.w.x ft1, zero
+;   .byte 0xd7, 0xdb, 0x10, 0x02
+;   .byte 0xd7, 0x8c, 0x79, 0x5d
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x0c, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %nearest_f64x2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+    v1 = nearest v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfabs.v v4,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   ld a3,[const(0)]
+;   fmv.d.x fa0,a3
+;   vmflt.vf v0,v4,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   fsrmi t4,0
+;   vfcvt.x.f.v v14,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   fsrm t4
+;   vfcvt.f.x.v v17,v14 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vfsgnj.vv v19,v17,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   fmv.d.x ft1,zero
+;   vfadd.vf v23,v1,ft1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vvm v25,v23,v19,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v25,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x92, 0x10, 0x2a
+;   auipc t6, 0
+;   addi t6, t6, 0x4c
+;   ld a3, 0(t6)
+;   fmv.d.x fa0, a3
+;   .byte 0x57, 0x50, 0x45, 0x6e
+;   fsrmi t4, 0
+;   .byte 0x57, 0x97, 0x10, 0x4a
+;   fsrm t4
+;   .byte 0xd7, 0x98, 0xe1, 0x4a
+;   .byte 0xd7, 0x99, 0x10, 0x23
+;   fmv.d.x ft1, zero
+;   .byte 0xd7, 0xdb, 0x10, 0x02
+;   .byte 0xd7, 0x8c, 0x79, 0x5d
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x0c, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x00, 0x30, 0x43
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-trunc.clif b/cranelift/filetests/filetests/isa/riscv64/simd-trunc.clif
new file mode 100644
index 000000000000..515643924a38
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-trunc.clif
@@ -0,0 +1,123 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+
+function %trunc_f32x4(f32x4) -> f32x4 {
+block0(v0: f32x4):
+    v1 = trunc v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfabs.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   lui a3,307200
+;   fmv.w.x fa0,a3
+;   vmflt.vf v0,v4,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vfcvt.rtz.x.f.v v12,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vfcvt.f.x.v v14,v12 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vfsgnj.vv v16,v14,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   fmv.w.x ft10,zero
+;   vfadd.vf v20,v1,ft10 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vvm v22,v20,v16,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v22,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x92, 0x10, 0x2a
+;   lui a3, 0x4b000
+;   fmv.w.x fa0, a3
+;   .byte 0x57, 0x50, 0x45, 0x6e
+;   .byte 0x57, 0x96, 0x13, 0x4a
+;   .byte 0x57, 0x97, 0xc1, 0x4a
+;   .byte 0x57, 0x98, 0xe0, 0x22
+;   fmv.w.x ft10, zero
+;   .byte 0x57, 0x5a, 0x1f, 0x02
+;   .byte 0x57, 0x0b, 0x48, 0x5d
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x0b, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %trunc_f64x2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+    v1 = trunc v0
+    return v1
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfabs.v v4,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   ld a3,[const(0)]
+;   fmv.d.x fa0,a3
+;   vmflt.vf v0,v4,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vfcvt.rtz.x.f.v v12,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vfcvt.f.x.v v14,v12 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vfsgnj.vv v16,v14,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   fmv.d.x ft10,zero
+;   vfadd.vf v20,v1,ft10 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vvm v22,v20,v16,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v22,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x92, 0x10, 0x2a
+;   auipc t6, 0
+;   addi t6, t6, 0x44
+;   ld a3, 0(t6)
+;   fmv.d.x fa0, a3
+;   .byte 0x57, 0x50, 0x45, 0x6e
+;   .byte 0x57, 0x96, 0x13, 0x4a
+;   .byte 0x57, 0x97, 0xc1, 0x4a
+;   .byte 0x57, 0x98, 0xe0, 0x22
+;   fmv.d.x ft10, zero
+;   .byte 0x57, 0x5a, 0x1f, 0x02
+;   .byte 0x57, 0x0b, 0x48, 0x5d
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x0b, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x00, 0x30, 0x43
+
diff --git a/cranelift/filetests/filetests/runtests/ceil.clif b/cranelift/filetests/filetests/runtests/ceil.clif
index 78c0734a1db4..5848ba260449 100644
--- a/cranelift/filetests/filetests/runtests/ceil.clif
+++ b/cranelift/filetests/filetests/runtests/ceil.clif
@@ -6,8 +6,7 @@ target x86_64 sse42
 target x86_64 sse42 has_avx
 target aarch64
 target s390x
-;; FIXME: needs support for vectors
-;;target riscv64
+target riscv64
 
 function %ceil_f32(f32) -> f32 {
 block0(v0: f32):
@@ -150,19 +149,3 @@ block0(v0: f64):
 ; run: %ceil_is_nan_f64(-sNaN:0x1) == 1
 ; run: %ceil_is_nan_f64(+sNaN:0x4000000000001) == 1
 ; run: %ceil_is_nan_f64(-sNaN:0x4000000000001) == 1
-
-function %ceil_f32x4(f32x4) -> f32x4 {
-block0(v0: f32x4):
-  v1 = ceil v0
-  return v1
-}
-; run: %ceil_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x1.0 0x1.0 0x1.0p1 0x1.8p1]
-; run: %ceil_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x0.0 -0x1.0 -0x1.0 -0x1.0p1]
-
-function %ceil_f64x2(f64x2) -> f64x2 {
-block0(v0: f64x2):
-  v1 = ceil v0
-  return v1
-}
-; run: %ceil_f64x2([0x0.5 0x1.0]) == [0x1.0 0x1.0]
-; run: %ceil_f64x2([-0x0.5 -0x1.0]) == [-0x0.0 -0x1.0]
diff --git a/cranelift/filetests/filetests/runtests/floor.clif b/cranelift/filetests/filetests/runtests/floor.clif
index b283a907c8ca..bde2a77907aa 100644
--- a/cranelift/filetests/filetests/runtests/floor.clif
+++ b/cranelift/filetests/filetests/runtests/floor.clif
@@ -6,8 +6,7 @@ target x86_64 sse42
 target x86_64 sse42 has_avx
 target aarch64
 target s390x
-;; FIXME: needs support for vectors
-;;target riscv64
+target riscv64
 
 function %floor_f32(f32) -> f32 {
 block0(v0: f32):
@@ -150,19 +149,3 @@ block0(v0: f64):
 ; run: %floor_is_nan_f64(-sNaN:0x1) == 1
 ; run: %floor_is_nan_f64(+sNaN:0x4000000000001) == 1
 ; run: %floor_is_nan_f64(-sNaN:0x4000000000001) == 1
-
-function %floor_f32x4(f32x4) -> f32x4 {
-block0(v0: f32x4):
-  v1 = floor v0
-  return v1
-}
-; run: %floor_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x0.0 0x1.0 0x1.0 0x1.0p1]
-; run: %floor_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x1.0 -0x1.0 -0x1.0p1 -0x1.8p1]
-
-function %floor_f64x2(f64x2) -> f64x2 {
-block0(v0: f64x2):
-  v1 = floor v0
-  return v1
-}
-; run: %floor_f64x2([0x0.5 0x1.0]) == [0x0.0 0x1.0]
-; run: %floor_f64x2([-0x0.5 -0x1.0]) == [-0x1.0 -0x1.0]
diff --git a/cranelift/filetests/filetests/runtests/nearest.clif b/cranelift/filetests/filetests/runtests/nearest.clif
index a45dc8361b23..a5ce1aacb2e0 100644
--- a/cranelift/filetests/filetests/runtests/nearest.clif
+++ b/cranelift/filetests/filetests/runtests/nearest.clif
@@ -6,8 +6,7 @@ target x86_64 sse42
 target x86_64 sse42 has_avx
 target aarch64
 target s390x
-;; FIXME: needs support for vectors
-;;target riscv64
+target riscv64
 
 function %nearest_f32(f32) -> f32 {
 block0(v0: f32):
@@ -150,19 +149,3 @@ block0(v0: f64):
 ; run: %near_is_nan_f64(-sNaN:0x1) == 1
 ; run: %near_is_nan_f64(+sNaN:0x4000000000001) == 1
 ; run: %near_is_nan_f64(-sNaN:0x4000000000001) == 1
-
-function %nearest_f32x4(f32x4) -> f32x4 {
-block0(v0: f32x4):
-  v1 = nearest v0
-  return v1
-}
-; run: %nearest_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x0.0 0x1.0 0x1.0 0x1.8p1]
-; run: %nearest_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x0.0 -0x1.0 -0x1.0 -0x1.8p1]
-
-function %nearest_f64x2(f64x2) -> f64x2 {
-block0(v0: f64x2):
-  v1 = nearest v0
-  return v1
-}
-; run: %nearest_f64x2([0x0.5 0x1.0]) == [0x0.0 0x1.0]
-; run: %nearest_f64x2([-0x0.5 -0x1.0]) == [-0x0.0 -0x1.0]
diff --git a/cranelift/filetests/filetests/runtests/simd-ceil.clif b/cranelift/filetests/filetests/runtests/simd-ceil.clif
new file mode 100644
index 000000000000..1abebd9f23a2
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-ceil.clif
@@ -0,0 +1,25 @@
+test interpret
+test run
+target x86_64
+target x86_64 sse41
+target x86_64 sse42
+target x86_64 sse42 has_avx
+target aarch64
+target s390x
+target riscv64 has_v
+
+function %ceil_f32x4(f32x4) -> f32x4 {
+block0(v0: f32x4):
+  v1 = ceil v0
+  return v1
+}
+; run: %ceil_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x1.0 0x1.0 0x1.0p1 0x1.8p1]
+; run: %ceil_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x0.0 -0x1.0 -0x1.0 -0x1.0p1]
+
+function %ceil_f64x2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+  v1 = ceil v0
+  return v1
+}
+; run: %ceil_f64x2([0x0.5 0x1.0]) == [0x1.0 0x1.0]
+; run: %ceil_f64x2([-0x0.5 -0x1.0]) == [-0x0.0 -0x1.0]
diff --git a/cranelift/filetests/filetests/runtests/simd-floor.clif b/cranelift/filetests/filetests/runtests/simd-floor.clif
new file mode 100644
index 000000000000..8193f3fa1a06
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-floor.clif
@@ -0,0 +1,25 @@
+test interpret
+test run
+target x86_64
+target x86_64 sse41
+target x86_64 sse42
+target x86_64 sse42 has_avx
+target aarch64
+target s390x
+target riscv64 has_v
+
+function %floor_f32x4(f32x4) -> f32x4 {
+block0(v0: f32x4):
+  v1 = floor v0
+  return v1
+}
+; run: %floor_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x0.0 0x1.0 0x1.0 0x1.0p1]
+; run: %floor_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x1.0 -0x1.0 -0x1.0p1 -0x1.8p1]
+
+function %floor_f64x2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+  v1 = floor v0
+  return v1
+}
+; run: %floor_f64x2([0x0.5 0x1.0]) == [0x0.0 0x1.0]
+; run: %floor_f64x2([-0x0.5 -0x1.0]) == [-0x1.0 -0x1.0]
diff --git a/cranelift/filetests/filetests/runtests/simd-nearest.clif b/cranelift/filetests/filetests/runtests/simd-nearest.clif
new file mode 100644
index 000000000000..bcd4993ffd42
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-nearest.clif
@@ -0,0 +1,25 @@
+test interpret
+test run
+target x86_64
+target x86_64 sse41
+target x86_64 sse42
+target x86_64 sse42 has_avx
+target aarch64
+target s390x
+target riscv64 has_v
+
+function %nearest_f32x4(f32x4) -> f32x4 {
+block0(v0: f32x4):
+  v1 = nearest v0
+  return v1
+}
+; run: %nearest_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x0.0 0x1.0 0x1.0 0x1.8p1]
+; run: %nearest_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x0.0 -0x1.0 -0x1.0 -0x1.8p1]
+
+function %nearest_f64x2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+  v1 = nearest v0
+  return v1
+}
+; run: %nearest_f64x2([0x0.5 0x1.0]) == [0x0.0 0x1.0]
+; run: %nearest_f64x2([-0x0.5 -0x1.0]) == [-0x0.0 -0x1.0]
diff --git a/cranelift/filetests/filetests/runtests/simd-trunc.clif b/cranelift/filetests/filetests/runtests/simd-trunc.clif
new file mode 100644
index 000000000000..91fb8e1bde0e
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-trunc.clif
@@ -0,0 +1,25 @@
+test interpret
+test run
+target x86_64
+target x86_64 sse41
+target x86_64 sse42
+target x86_64 sse42 has_avx
+target aarch64
+target s390x
+target riscv64 has_v
+
+function %trunc_f32x4(f32x4) -> f32x4 {
+block0(v0: f32x4):
+  v1 = trunc v0
+  return v1
+}
+; run: %trunc_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x0.0 0x1.0 0x1.0 0x1.0p1]
+; run: %trunc_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x0.0 -0x1.0 -0x1.0 -0x1.0p1]
+
+function %trunc_f64x2(f64x2) -> f64x2 {
+block0(v0: f64x2):
+  v1 = trunc v0
+  return v1
+}
+; run: %trunc_f64x2([0x0.5 0x1.0]) == [0x0.0 0x1.0]
+; run: %trunc_f64x2([-0x0.5 -0x1.0]) == [-0x0.0 -0x1.0]
diff --git a/cranelift/filetests/filetests/runtests/trunc.clif b/cranelift/filetests/filetests/runtests/trunc.clif
index a40d3326faef..37bcb61d07b5 100644
--- a/cranelift/filetests/filetests/runtests/trunc.clif
+++ b/cranelift/filetests/filetests/runtests/trunc.clif
@@ -6,8 +6,7 @@ target x86_64 sse42
 target x86_64 sse42 has_avx
 target aarch64
 target s390x
-;; FIXME: needs support for vectors
-;;target riscv64
+target riscv64
 
 function %trunc_f32(f32) -> f32 {
 block0(v0: f32):
@@ -150,19 +149,3 @@ block0(v0: f64):
 ; run: %trunc_is_nan_f64(-sNaN:0x1) == 1
 ; run: %trunc_is_nan_f64(+sNaN:0x4000000000001) == 1
 ; run: %trunc_is_nan_f64(-sNaN:0x4000000000001) == 1
-
-function %trunc_f32x4(f32x4) -> f32x4 {
-block0(v0: f32x4):
-  v1 = trunc v0
-  return v1
-}
-; run: %trunc_f32x4([0x0.5 0x1.0 0x1.5 0x2.9]) == [0x0.0 0x1.0 0x1.0 0x1.0p1]
-; run: %trunc_f32x4([-0x0.5 -0x1.0 -0x1.5 -0x2.9]) == [-0x0.0 -0x1.0 -0x1.0 -0x1.0p1]
-
-function %trunc_f64x2(f64x2) -> f64x2 {
-block0(v0: f64x2):
-  v1 = trunc v0
-  return v1
-}
-; run: %trunc_f64x2([0x0.5 0x1.0]) == [0x0.0 0x1.0]
-; run: %trunc_f64x2([-0x0.5 -0x1.0]) == [-0x0.0 -0x1.0]