From 3cab6443dd3aad5679111f0307381db2a3ed83d2 Mon Sep 17 00:00:00 2001
From: Afonso Bordado <afonso360@users.noreply.github.com>
Date: Tue, 27 Jun 2023 02:15:01 +0100
Subject: [PATCH] riscv64: Implement SIMD `fcmp` (#6643)

* riscv64: Add float vector mask instructions

* riscv64: Add some vector mask bitwise instructions

* riscv64: Implement SIMD `fcmp`

* cranelift: Add SIMD `fcmp` testsuite
---
 build.rs                                      |   2 -
 cranelift/codegen/src/isa/riscv64/inst/mod.rs |   3 +
 .../codegen/src/isa/riscv64/inst/vector.rs    |  52 ++-
 .../codegen/src/isa/riscv64/inst_vector.isle  | 232 +++++++++++++
 cranelift/codegen/src/isa/riscv64/lower.isle  |   6 +-
 .../filetests/isa/riscv64/simd-fcmp-eq.clif   | 272 ++++++++++++++++
 .../filetests/isa/riscv64/simd-fcmp-ge.clif   | 272 ++++++++++++++++
 .../filetests/isa/riscv64/simd-fcmp-gt.clif   | 272 ++++++++++++++++
 .../filetests/isa/riscv64/simd-fcmp-le.clif   | 272 ++++++++++++++++
 .../filetests/isa/riscv64/simd-fcmp-lt.clif   | 272 ++++++++++++++++
 .../filetests/isa/riscv64/simd-fcmp-ne.clif   | 272 ++++++++++++++++
 .../filetests/isa/riscv64/simd-fcmp-one.clif  | 296 +++++++++++++++++
 .../filetests/isa/riscv64/simd-fcmp-ord.clif  | 304 ++++++++++++++++++
 .../filetests/isa/riscv64/simd-fcmp-ueq.clif  | 296 +++++++++++++++++
 .../filetests/isa/riscv64/simd-fcmp-uge.clif  | 284 ++++++++++++++++
 .../filetests/isa/riscv64/simd-fcmp-ugt.clif  | 284 ++++++++++++++++
 .../filetests/isa/riscv64/simd-fcmp-ule.clif  | 284 ++++++++++++++++
 .../filetests/isa/riscv64/simd-fcmp-ult.clif  | 284 ++++++++++++++++
 .../filetests/isa/riscv64/simd-fcmp-uno.clif  | 304 ++++++++++++++++++
 .../filetests/runtests/simd-fcmp-eq.clif      |  88 +++++
 .../filetests/runtests/simd-fcmp-ge.clif      |  86 +++++
 .../filetests/runtests/simd-fcmp-gt.clif      |  90 ++++++
 .../filetests/runtests/simd-fcmp-le.clif      |  76 +++++
 .../filetests/runtests/simd-fcmp-lt.clif      |  86 +++++
 .../filetests/runtests/simd-fcmp-ne.clif      |  78 +++++
 .../filetests/runtests/simd-fcmp-one.clif     |  75 +++++
 .../filetests/runtests/simd-fcmp-ord.clif     |  78 +++++
 .../filetests/runtests/simd-fcmp-ueq.clif     |  75 +++++
 .../filetests/runtests/simd-fcmp-uge.clif     |  75 +++++
 .../filetests/runtests/simd-fcmp-ugt.clif     |  75 +++++
 .../filetests/runtests/simd-fcmp-ule.clif     |  75 +++++
 .../filetests/runtests/simd-fcmp-ult.clif     |  75 +++++
 .../filetests/runtests/simd-fcmp-uno.clif     |  88 +++++
 .../filetests/runtests/simd-fcmp.clif         |  60 ----
 34 files changed, 5368 insertions(+), 75 deletions(-)
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-eq.clif
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ge.clif
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-gt.clif
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-le.clif
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-lt.clif
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ne.clif
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-one.clif
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ord.clif
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ueq.clif
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-uge.clif
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ugt.clif
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ule.clif
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ult.clif
 create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-uno.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-eq.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-ge.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-gt.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-le.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-lt.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-ne.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-one.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-ord.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-ueq.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-uge.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-ugt.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-ule.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-ult.clif
 create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-uno.clif
 delete mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp.clif

diff --git a/build.rs b/build.rs
index 96935c823edb..3c506a4ebecf 100644
--- a/build.rs
+++ b/build.rs
@@ -234,11 +234,9 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
                 "issue_3327_bnot_lowering",
                 "simd_conversions",
                 "simd_f32x4",
-                "simd_f32x4_cmp",
                 "simd_f32x4_pmin_pmax",
                 "simd_f32x4_rounding",
                 "simd_f64x2",
-                "simd_f64x2_cmp",
                 "simd_f64x2_pmin_pmax",
                 "simd_f64x2_rounding",
                 "simd_i32x4_trunc_sat_f32x4",
diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
index 93f3772a4676..239c479efbd4 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
@@ -1744,6 +1744,9 @@ impl Inst {
                     (VecAluOpRRR::VfsgnjnVV, vs2, vs1) if vs2 == vs1 => {
                         format!("vfneg.v {vd_s},{vs2_s}{mask} {vstate}")
                     }
+                    (VecAluOpRRR::VmnandMM, vs2, vs1) if vs2 == vs1 => {
+                        format!("vmnot.m {vd_s},{vs2_s}{mask} {vstate}")
+                    }
                     _ => format!("{op} {vd_s},{vs2_s},{vs1_s}{mask} {vstate}"),
                 }
             }
diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs
index 69b485ac14aa..9fc038bade8d 100644
--- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs
@@ -357,14 +357,30 @@ impl VecAluOpRRR {
             VecAluOpRRR::VwaddWV | VecAluOpRRR::VwaddWX => 0b110101,
             VecAluOpRRR::VwsubuWV | VecAluOpRRR::VwsubuWX => 0b110110,
             VecAluOpRRR::VwsubWV | VecAluOpRRR::VwsubWX => 0b110111,
-            VecAluOpRRR::VmseqVV | VecAluOpRRR::VmseqVX => 0b011000,
-            VecAluOpRRR::VmsneVV | VecAluOpRRR::VmsneVX => 0b011001,
-            VecAluOpRRR::VmsltuVV | VecAluOpRRR::VmsltuVX => 0b011010,
-            VecAluOpRRR::VmsltVV | VecAluOpRRR::VmsltVX => 0b011011,
-            VecAluOpRRR::VmsleuVV | VecAluOpRRR::VmsleuVX => 0b011100,
-            VecAluOpRRR::VmsleVV | VecAluOpRRR::VmsleVX => 0b011101,
-            VecAluOpRRR::VmsgtuVX => 0b011110,
-            VecAluOpRRR::VmsgtVX => 0b011111,
+            VecAluOpRRR::VmseqVV
+            | VecAluOpRRR::VmseqVX
+            | VecAluOpRRR::VmfeqVV
+            | VecAluOpRRR::VmfeqVF => 0b011000,
+            VecAluOpRRR::VmsneVV
+            | VecAluOpRRR::VmsneVX
+            | VecAluOpRRR::VmfleVV
+            | VecAluOpRRR::VmfleVF
+            | VecAluOpRRR::VmandMM => 0b011001,
+            VecAluOpRRR::VmsltuVV | VecAluOpRRR::VmsltuVX | VecAluOpRRR::VmorMM => 0b011010,
+            VecAluOpRRR::VmsltVV
+            | VecAluOpRRR::VmsltVX
+            | VecAluOpRRR::VmfltVV
+            | VecAluOpRRR::VmfltVF => 0b011011,
+            VecAluOpRRR::VmsleuVV
+            | VecAluOpRRR::VmsleuVX
+            | VecAluOpRRR::VmfneVV
+            | VecAluOpRRR::VmfneVF => 0b011100,
+            VecAluOpRRR::VmsleVV
+            | VecAluOpRRR::VmsleVX
+            | VecAluOpRRR::VmfgtVF
+            | VecAluOpRRR::VmnandMM => 0b011101,
+            VecAluOpRRR::VmsgtuVX | VecAluOpRRR::VmnorMM => 0b011110,
+            VecAluOpRRR::VmsgtVX | VecAluOpRRR::VmfgeVF => 0b011111,
         }
     }
 
@@ -408,7 +424,11 @@ impl VecAluOpRRR {
             | VecAluOpRRR::VmulhuVV
             | VecAluOpRRR::VredmaxuVS
             | VecAluOpRRR::VredminuVS
-            | VecAluOpRRR::VcompressVM => VecOpCategory::OPMVV,
+            | VecAluOpRRR::VcompressVM
+            | VecAluOpRRR::VmandMM
+            | VecAluOpRRR::VmorMM
+            | VecAluOpRRR::VmnandMM
+            | VecAluOpRRR::VmnorMM => VecOpCategory::OPMVV,
             VecAluOpRRR::VwaddVX
             | VecAluOpRRR::VwadduVX
             | VecAluOpRRR::VwadduWX
@@ -453,14 +473,24 @@ impl VecAluOpRRR {
             | VecAluOpRRR::VfsubVV
             | VecAluOpRRR::VfmulVV
             | VecAluOpRRR::VfdivVV
-            | VecAluOpRRR::VfsgnjnVV => VecOpCategory::OPFVV,
+            | VecAluOpRRR::VfsgnjnVV
+            | VecAluOpRRR::VmfeqVV
+            | VecAluOpRRR::VmfneVV
+            | VecAluOpRRR::VmfltVV
+            | VecAluOpRRR::VmfleVV => VecOpCategory::OPFVV,
             VecAluOpRRR::VfaddVF
             | VecAluOpRRR::VfsubVF
             | VecAluOpRRR::VfrsubVF
             | VecAluOpRRR::VfmulVF
             | VecAluOpRRR::VfdivVF
             | VecAluOpRRR::VfrdivVF
-            | VecAluOpRRR::VfmergeVFM => VecOpCategory::OPFVF,
+            | VecAluOpRRR::VfmergeVFM
+            | VecAluOpRRR::VmfeqVF
+            | VecAluOpRRR::VmfneVF
+            | VecAluOpRRR::VmfltVF
+            | VecAluOpRRR::VmfleVF
+            | VecAluOpRRR::VmfgtVF
+            | VecAluOpRRR::VmfgeVF => VecOpCategory::OPFVF,
         }
     }
 
diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle
index 2a52f4f95803..a51e96cdf4d7 100644
--- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle
+++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle
@@ -134,6 +134,14 @@
   (VmsltVV)
   (VmsleuVV)
   (VmsleVV)
+  (VmfeqVV)
+  (VmfneVV)
+  (VmfltVV)
+  (VmfleVV)
+  (VmandMM)
+  (VmorMM)
+  (VmnandMM)
+  (VmnorMM)
 
 
   ;; Vector-Scalar Opcodes
@@ -184,6 +192,12 @@
   (VmsleVX)
   (VmsgtuVX)
   (VmsgtVX)
+  (VmfeqVF)
+  (VmfneVF)
+  (VmfltVF)
+  (VmfleVF)
+  (VmfgtVF)
+  (VmfgeVF)
 ))
 
 
@@ -1111,6 +1125,66 @@
 (decl rv_vmsge_vv (VReg VReg VecOpMasking VState) VReg)
 (rule (rv_vmsge_vv vs2 vs1 mask vstate) (rv_vmsle_vv vs1 vs2 mask vstate))
 
+;; Helper for emitting the `vmfeq.vv` (Vector Mask Set If Float Equal) instruction.
+(decl rv_vmfeq_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmfeq_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfeqVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfeq.vf` (Vector Mask Set If Float Equal) instruction.
+(decl rv_vmfeq_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vmfeq_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfeqVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfne.vv` (Vector Mask Set If Float Not Equal) instruction.
+(decl rv_vmfne_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmfne_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfneVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfne.vf` (Vector Mask Set If Float Not Equal) instruction.
+(decl rv_vmfne_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vmfne_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfneVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmflt.vv` (Vector Mask Set If Float Less Than) instruction.
+(decl rv_vmflt_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmflt_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfltVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmflt.vf` (Vector Mask Set If Float Less Than) instruction.
+(decl rv_vmflt_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vmflt_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfltVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfle.vv` (Vector Mask Set If Float Less Than Or Equal) instruction.
+(decl rv_vmfle_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmfle_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfleVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfle.vf` (Vector Mask Set If Float Less Than Or Equal) instruction.
+(decl rv_vmfle_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vmfle_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfleVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfgt.vv` (Vector Mask Set If Float Greater Than) instruction.
+;; This is an alias for `vmflt.vv` with the operands inverted.
+(decl rv_vmfgt_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmfgt_vv vs2 vs1 mask vstate) (rv_vmflt_vv vs1 vs2 mask vstate))
+
+;; Helper for emitting the `vmfgt.vf` (Vector Mask Set If Float Greater Than) instruction.
+(decl rv_vmfgt_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vmfgt_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfgtVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfge.vv` (Vector Mask Set If Float Greater Than Or Equal) instruction.
+;; This is an alias for `vmfle.vv` with the operands inverted.
+(decl rv_vmfge_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmfge_vv vs2 vs1 mask vstate) (rv_vmfle_vv vs1 vs2 mask vstate))
+
+;; Helper for emitting the `vmfge.vf` (Vector Mask Set If Float Greater Than Or Equal) instruction.
+(decl rv_vmfge_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vmfge_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfgeVF) vs2 vs1 mask vstate))
+
 ;; Helper for emitting the `vzext.vf2` instruction.
 ;; Zero-extend SEW/2 source to SEW destination
 (decl rv_vzext_vf2 (VReg VecOpMasking VState) VReg)
@@ -1161,6 +1235,40 @@
 (rule (rv_vnclipu_wi vs2 imm mask vstate)
   (vec_alu_rr_uimm5 (VecAluOpRRImm5.VnclipuWI) vs2 imm mask vstate))
 
+;; Helper for emitting the `vmand.mm` (Mask Bitwise AND) instruction.
+;;
+;; vd.mask[i] = vs2.mask[i] &&  vs1.mask[i]
+(decl rv_vmand_mm (VReg VReg VState) VReg)
+(rule (rv_vmand_mm vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmandMM) vs2 vs1 (unmasked) vstate))
+
+;; Helper for emitting the `vmor.mm` (Mask Bitwise OR) instruction.
+;;
+;; vd.mask[i] = vs2.mask[i] ||  vs1.mask[i]
+(decl rv_vmor_mm (VReg VReg VState) VReg)
+(rule (rv_vmor_mm vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmorMM) vs2 vs1 (unmasked) vstate))
+
+;; Helper for emitting the `vmnand.mm` (Mask Bitwise NAND) instruction.
+;;
+;; vd.mask[i] = !(vs2.mask[i] &&  vs1.mask[i])
+(decl rv_vmnand_mm (VReg VReg VState) VReg)
+(rule (rv_vmnand_mm vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmnandMM) vs2 vs1 (unmasked) vstate))
+
+;; Helper for emitting the `vmnot.m` (Mask Bitwise NOT) instruction.
+;; This is an alias for `vmnand.mm vd, vs, vs`
+;;
+;; vd.mask[i] = !vs.mask[i]
+(decl rv_vmnot_m (VReg VState) VReg)
+(rule (rv_vmnot_m vs vstate) (rv_vmnand_mm vs vs vstate))
+
+;; Helper for emitting the `vmnor.mm` (Mask Bitwise NOR) instruction.
+;;
+;; vd.mask[i] = !(vs2.mask[i] ||  vs1.mask[i])
+(decl rv_vmnor_mm (VReg VReg VState) VReg)
+(rule (rv_vmnor_mm vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmnorMM) vs2 vs1 (unmasked) vstate))
 
 ;;;; Multi-Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
@@ -1378,3 +1486,127 @@
 
 (rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) (replicated_imm5 x) y)
   (rv_vmsle_vi y x (unmasked) ty))
+
+
+
+;; Builds a vector mask corresponding to the FloatCC operation.
+(decl gen_fcmp_mask (Type FloatCC Value Value) VReg)
+
+;; FloatCC.Equal
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) x y)
+  (rv_vmfeq_vv x y (unmasked) ty))
+
+(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) x (splat y))
+  (rv_vmfeq_vf x y (unmasked) ty))
+
+(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) (splat x) y)
+  (rv_vmfeq_vf y x (unmasked) ty))
+
+;; FloatCC.NotEqual
+;; Note: This is UnorderedNotEqual. It is the only unoredered comparison that is not named as such.
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) x y)
+  (rv_vmfne_vv x y (unmasked) ty))
+
+(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) x (splat y))
+  (rv_vmfne_vf x y (unmasked) ty))
+
+(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) (splat x) y)
+  (rv_vmfne_vf y x (unmasked) ty))
+
+;; FloatCC.LessThan
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) x y)
+  (rv_vmflt_vv x y (unmasked) ty))
+
+(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) x (splat y))
+  (rv_vmflt_vf x y (unmasked) ty))
+
+(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) (splat x) y)
+  (rv_vmfgt_vf y x (unmasked) ty))
+
+;; FloatCC.LessThanOrEqual
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) x y)
+  (rv_vmfle_vv x y (unmasked) ty))
+
+(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) x (splat y))
+  (rv_vmfle_vf x y (unmasked) ty))
+
+(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) (splat x) y)
+  (rv_vmfge_vf y x (unmasked) ty))
+
+;; FloatCC.GreaterThan
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) x y)
+  (rv_vmfgt_vv x y (unmasked) ty))
+
+(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) x (splat y))
+  (rv_vmfgt_vf x y (unmasked) ty))
+
+(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) (splat x) y)
+  (rv_vmflt_vf y x (unmasked) ty))
+
+;; FloatCC.GreaterThanOrEqual
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) x y)
+  (rv_vmfge_vv x y (unmasked) ty))
+
+(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) x (splat y))
+  (rv_vmfge_vf x y (unmasked) ty))
+
+(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) (splat x) y)
+  (rv_vmfle_vf y x (unmasked) ty))
+
+;; FloatCC.Ordered
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Ordered) x y)
+  (rv_vmand_mm
+    (gen_fcmp_mask ty (FloatCC.Equal) x x)
+    (gen_fcmp_mask ty (FloatCC.Equal) y y)
+    ty))
+
+;; FloatCC.Unordered
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Unordered) x y)
+  (rv_vmor_mm
+    (gen_fcmp_mask ty (FloatCC.NotEqual) x x)
+    (gen_fcmp_mask ty (FloatCC.NotEqual) y y)
+    ty))
+
+;; FloatCC.OrderedNotEqual
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.OrderedNotEqual) x y)
+  (rv_vmor_mm
+    (gen_fcmp_mask ty (FloatCC.LessThan) x y)
+    (gen_fcmp_mask ty (FloatCC.LessThan) y x)
+    ty))
+
+;; FloatCC.UnorderedOrEqual
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrEqual) x y)
+  (rv_vmnor_mm
+    (gen_fcmp_mask ty (FloatCC.LessThan) x y)
+    (gen_fcmp_mask ty (FloatCC.LessThan) y x)
+    ty))
+
+;; FloatCC.UnorderedOrGreaterThan
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrGreaterThan) x y)
+  (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.LessThanOrEqual) x y) ty))
+
+;; FloatCC.UnorderedOrGreaterThanOrEqual
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrGreaterThanOrEqual) x y)
+  (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.LessThan) x y) ty))
+
+;; FloatCC.UnorderedOrLessThan
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrLessThan) x y)
+  (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.GreaterThanOrEqual) x y) ty))
+
+;; FloatCC.UnorderedOrLessThanOrEqual
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrLessThanOrEqual) x y)
+  (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.GreaterThan) x y) ty))
diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle
index d8193998064c..bc46b8279323 100644
--- a/cranelift/codegen/src/isa/riscv64/lower.isle
+++ b/cranelift/codegen/src/isa/riscv64/lower.isle
@@ -1462,10 +1462,12 @@
 
 
 ;;;;;  Rules for `fcmp`;;;;;;;;;
-(rule
-  (lower (fcmp cc x @ (value_type ty) y))
+(rule 0 (lower (fcmp cc x @ (value_type (ty_scalar_float ty)) y))
   (cmp_value (emit_fcmp cc ty x y)))
 
+(rule 1 (lower (fcmp cc x @ (value_type (ty_vec_fits_in_register ty)) y))
+  (gen_expand_mask ty (gen_fcmp_mask ty cc x y)))
+
 ;;;;;  Rules for `func_addr`;;;;;;;;;
 (rule
   (lower (func_addr (func_ref_data _ name _)))
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-eq.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-eq.clif
new file mode 100644
index 000000000000..f6c6331fc17b
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-eq.clif
@@ -0,0 +1,272 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %simd_fcmp_eq_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp eq v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfeq.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x90, 0x11, 0x62
+;   .byte 0x57, 0x34, 0x00, 0x5e
+;   .byte 0x57, 0xb5, 0x8f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_eq_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp eq v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfeq.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x62
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_eq_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp eq v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfeq.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x62
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_eq_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp eq v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfeq.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x90, 0x11, 0x62
+;   .byte 0x57, 0x34, 0x00, 0x5e
+;   .byte 0x57, 0xb5, 0x8f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_eq_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp eq v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfeq.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x62
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_eq_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp eq v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfeq.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x62
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ge.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ge.clif
new file mode 100644
index 000000000000..6cff3a6b9725
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ge.clif
@@ -0,0 +1,272 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %simd_fcmp_ge_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp ge v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfle.vv v0,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x90, 0x30, 0x66
+;   .byte 0x57, 0x34, 0x00, 0x5e
+;   .byte 0x57, 0xb5, 0x8f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_ge_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ge v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfge.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x7e
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_ge_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ge v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfle.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x66
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_ge_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp ge v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfle.vv v0,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x90, 0x30, 0x66
+;   .byte 0x57, 0x34, 0x00, 0x5e
+;   .byte 0x57, 0xb5, 0x8f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_ge_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ge v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfge.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x7e
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_ge_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ge v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfle.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x66
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-gt.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-gt.clif
new file mode 100644
index 000000000000..25c967a3028c
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-gt.clif
@@ -0,0 +1,272 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %simd_fcmp_gt_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp gt v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vv v0,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x90, 0x30, 0x6e
+;   .byte 0x57, 0x34, 0x00, 0x5e
+;   .byte 0x57, 0xb5, 0x8f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_gt_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp gt v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfgt.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x76
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_gt_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp gt v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x6e
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_gt_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp gt v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vv v0,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x90, 0x30, 0x6e
+;   .byte 0x57, 0x34, 0x00, 0x5e
+;   .byte 0x57, 0xb5, 0x8f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_gt_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp gt v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfgt.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x76
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_gt_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp gt v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x6e
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-le.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-le.clif
new file mode 100644
index 000000000000..52ff43c7e4e3
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-le.clif
@@ -0,0 +1,272 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %simd_fcmp_le_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp le v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfle.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x90, 0x11, 0x66
+;   .byte 0x57, 0x34, 0x00, 0x5e
+;   .byte 0x57, 0xb5, 0x8f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_le_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp le v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfle.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x66
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_le_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp le v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfge.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x7e
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_le_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp le v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfle.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x90, 0x11, 0x66
+;   .byte 0x57, 0x34, 0x00, 0x5e
+;   .byte 0x57, 0xb5, 0x8f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_le_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp le v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfle.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x66
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_le_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp le v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfge.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x7e
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-lt.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-lt.clif
new file mode 100644
index 000000000000..f946cb1bec16
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-lt.clif
@@ -0,0 +1,272 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %simd_fcmp_lt_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp lt v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x90, 0x11, 0x6e
+;   .byte 0x57, 0x34, 0x00, 0x5e
+;   .byte 0x57, 0xb5, 0x8f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_lt_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp lt v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x6e
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_lt_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp lt v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfgt.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x76
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_lt_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp lt v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x90, 0x11, 0x6e
+;   .byte 0x57, 0x34, 0x00, 0x5e
+;   .byte 0x57, 0xb5, 0x8f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_lt_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp lt v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x6e
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_lt_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp lt v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfgt.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x76
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ne.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ne.clif
new file mode 100644
index 000000000000..ef8f09e10532
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ne.clif
@@ -0,0 +1,272 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %simd_fcmp_ne_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp ne v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfne.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x90, 0x11, 0x72
+;   .byte 0x57, 0x34, 0x00, 0x5e
+;   .byte 0x57, 0xb5, 0x8f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_ne_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ne v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfne.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x72
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_ne_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ne v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfne.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x72
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_ne_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp ne v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfne.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x90, 0x11, 0x72
+;   .byte 0x57, 0x34, 0x00, 0x5e
+;   .byte 0x57, 0xb5, 0x8f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_ne_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ne v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfne.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x72
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_ne_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ne v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfne.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x50, 0x15, 0x72
+;   .byte 0xd7, 0x33, 0x00, 0x5e
+;   .byte 0xd7, 0xb4, 0x7f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x04, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-one.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-one.clif
new file mode 100644
index 000000000000..def2636720e4
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-one.clif
@@ -0,0 +1,296 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %simd_fcmp_one_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp one v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmflt.vv v8,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmor.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v12,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v14,v12,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x93, 0x11, 0x6e
+;   .byte 0x57, 0x94, 0x30, 0x6e
+;   .byte 0x57, 0x20, 0x64, 0x6a
+;   .byte 0x57, 0x36, 0x00, 0x5e
+;   .byte 0x57, 0xb7, 0xcf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x07, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_one_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp one v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmfgt.vf v7,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmor.mm v0,v5,v7 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v11,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v13,v11,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v13,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x6e
+;   .byte 0xd7, 0x53, 0x15, 0x76
+;   .byte 0x57, 0xa0, 0x53, 0x6a
+;   .byte 0xd7, 0x35, 0x00, 0x5e
+;   .byte 0xd7, 0xb6, 0xbf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x06, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_one_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp one v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfgt.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmflt.vf v7,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmor.mm v0,v5,v7 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v11,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v13,v11,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v13,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x76
+;   .byte 0xd7, 0x53, 0x15, 0x6e
+;   .byte 0x57, 0xa0, 0x53, 0x6a
+;   .byte 0xd7, 0x35, 0x00, 0x5e
+;   .byte 0xd7, 0xb6, 0xbf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x06, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_one_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp one v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmflt.vv v8,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmor.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v12,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v14,v12,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x93, 0x11, 0x6e
+;   .byte 0x57, 0x94, 0x30, 0x6e
+;   .byte 0x57, 0x20, 0x64, 0x6a
+;   .byte 0x57, 0x36, 0x00, 0x5e
+;   .byte 0x57, 0xb7, 0xcf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x07, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_one_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp one v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmfgt.vf v7,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmor.mm v0,v5,v7 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v11,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v13,v11,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v13,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x6e
+;   .byte 0xd7, 0x53, 0x15, 0x76
+;   .byte 0x57, 0xa0, 0x53, 0x6a
+;   .byte 0xd7, 0x35, 0x00, 0x5e
+;   .byte 0xd7, 0xb6, 0xbf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x06, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_one_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp one v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfgt.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmflt.vf v7,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmor.mm v0,v5,v7 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v11,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v13,v11,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v13,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x76
+;   .byte 0xd7, 0x53, 0x15, 0x6e
+;   .byte 0x57, 0xa0, 0x53, 0x6a
+;   .byte 0xd7, 0x35, 0x00, 0x5e
+;   .byte 0xd7, 0xb6, 0xbf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x06, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ord.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ord.clif
new file mode 100644
index 000000000000..81ac612b3a8b
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ord.clif
@@ -0,0 +1,304 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %simd_fcmp_ord_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp ord v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfeq.vv v6,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmfeq.vv v8,v3,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmand.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v12,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v14,v12,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x93, 0x10, 0x62
+;   .byte 0x57, 0x94, 0x31, 0x62
+;   .byte 0x57, 0x20, 0x64, 0x66
+;   .byte 0x57, 0x36, 0x00, 0x5e
+;   .byte 0x57, 0xb7, 0xcf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x07, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_ord_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ord v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfmv.v.f v10,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmfeq.vv v6,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmfeq.vf v8,v10,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmand.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v12,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v14,v12,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x55, 0x05, 0x5e
+;   .byte 0x57, 0x93, 0x10, 0x62
+;   .byte 0x57, 0x54, 0xa5, 0x62
+;   .byte 0x57, 0x20, 0x64, 0x66
+;   .byte 0x57, 0x36, 0x00, 0x5e
+;   .byte 0x57, 0xb7, 0xcf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x07, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_ord_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ord v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfmv.v.f v10,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmfeq.vf v6,v10,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmfeq.vv v8,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmand.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v12,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v14,v12,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x55, 0x05, 0x5e
+;   .byte 0x57, 0x53, 0xa5, 0x62
+;   .byte 0x57, 0x94, 0x10, 0x62
+;   .byte 0x57, 0x20, 0x64, 0x66
+;   .byte 0x57, 0x36, 0x00, 0x5e
+;   .byte 0x57, 0xb7, 0xcf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x07, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_ord_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp ord v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfeq.vv v6,v1,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmfeq.vv v8,v3,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmand.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v12,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v14,v12,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x93, 0x10, 0x62
+;   .byte 0x57, 0x94, 0x31, 0x62
+;   .byte 0x57, 0x20, 0x64, 0x66
+;   .byte 0x57, 0x36, 0x00, 0x5e
+;   .byte 0x57, 0xb7, 0xcf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x07, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_ord_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ord v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfmv.v.f v10,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmfeq.vv v6,v1,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmfeq.vf v8,v10,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmand.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v12,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v14,v12,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x55, 0x05, 0x5e
+;   .byte 0x57, 0x93, 0x10, 0x62
+;   .byte 0x57, 0x54, 0xa5, 0x62
+;   .byte 0x57, 0x20, 0x64, 0x66
+;   .byte 0x57, 0x36, 0x00, 0x5e
+;   .byte 0x57, 0xb7, 0xcf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x07, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_ord_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ord v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfmv.v.f v10,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmfeq.vf v6,v10,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmfeq.vv v8,v1,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmand.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v12,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v14,v12,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x55, 0x05, 0x5e
+;   .byte 0x57, 0x53, 0xa5, 0x62
+;   .byte 0x57, 0x94, 0x10, 0x62
+;   .byte 0x57, 0x20, 0x64, 0x66
+;   .byte 0x57, 0x36, 0x00, 0x5e
+;   .byte 0x57, 0xb7, 0xcf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x07, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ueq.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ueq.clif
new file mode 100644
index 000000000000..17df6ffa5a2b
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ueq.clif
@@ -0,0 +1,296 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %simd_fcmp_ueq_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp ueq v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmflt.vv v8,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmnor.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v12,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v14,v12,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x93, 0x11, 0x6e
+;   .byte 0x57, 0x94, 0x30, 0x6e
+;   .byte 0x57, 0x20, 0x64, 0x7a
+;   .byte 0x57, 0x36, 0x00, 0x5e
+;   .byte 0x57, 0xb7, 0xcf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x07, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_ueq_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ueq v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmfgt.vf v7,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmnor.mm v0,v5,v7 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v11,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v13,v11,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v13,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x6e
+;   .byte 0xd7, 0x53, 0x15, 0x76
+;   .byte 0x57, 0xa0, 0x53, 0x7a
+;   .byte 0xd7, 0x35, 0x00, 0x5e
+;   .byte 0xd7, 0xb6, 0xbf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x06, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_ueq_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ueq v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfgt.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmflt.vf v7,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmnor.mm v0,v5,v7 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v11,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v13,v11,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v13,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x76
+;   .byte 0xd7, 0x53, 0x15, 0x6e
+;   .byte 0x57, 0xa0, 0x53, 0x7a
+;   .byte 0xd7, 0x35, 0x00, 0x5e
+;   .byte 0xd7, 0xb6, 0xbf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x06, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_ueq_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp ueq v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmflt.vv v8,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmnor.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v12,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v14,v12,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x93, 0x11, 0x6e
+;   .byte 0x57, 0x94, 0x30, 0x6e
+;   .byte 0x57, 0x20, 0x64, 0x7a
+;   .byte 0x57, 0x36, 0x00, 0x5e
+;   .byte 0x57, 0xb7, 0xcf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x07, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_ueq_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ueq v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmfgt.vf v7,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmnor.mm v0,v5,v7 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v11,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v13,v11,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v13,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x6e
+;   .byte 0xd7, 0x53, 0x15, 0x76
+;   .byte 0x57, 0xa0, 0x53, 0x7a
+;   .byte 0xd7, 0x35, 0x00, 0x5e
+;   .byte 0xd7, 0xb6, 0xbf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x06, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_ueq_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ueq v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfgt.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmflt.vf v7,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmnor.mm v0,v5,v7 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v11,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v13,v11,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v13,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x76
+;   .byte 0xd7, 0x53, 0x15, 0x6e
+;   .byte 0x57, 0xa0, 0x53, 0x7a
+;   .byte 0xd7, 0x35, 0x00, 0x5e
+;   .byte 0xd7, 0xb6, 0xbf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x06, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-uge.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-uge.clif
new file mode 100644
index 000000000000..26bdfa36e428
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-uge.clif
@@ -0,0 +1,284 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %simd_fcmp_uge_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp uge v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmnot.m v0,v6 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v10,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v12,v10,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v12,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x93, 0x11, 0x6e
+;   .byte 0x57, 0x20, 0x63, 0x76
+;   .byte 0x57, 0x35, 0x00, 0x5e
+;   .byte 0x57, 0xb6, 0xaf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x06, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_uge_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp uge v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmnot.m v0,v5 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v9,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v11,v9,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x6e
+;   .byte 0x57, 0xa0, 0x52, 0x76
+;   .byte 0xd7, 0x34, 0x00, 0x5e
+;   .byte 0xd7, 0xb5, 0x9f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_uge_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp uge v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfgt.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmnot.m v0,v5 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v9,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v11,v9,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x76
+;   .byte 0x57, 0xa0, 0x52, 0x76
+;   .byte 0xd7, 0x34, 0x00, 0x5e
+;   .byte 0xd7, 0xb5, 0x9f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_uge_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp uge v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmnot.m v0,v6 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v10,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v12,v10,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v12,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x93, 0x11, 0x6e
+;   .byte 0x57, 0x20, 0x63, 0x76
+;   .byte 0x57, 0x35, 0x00, 0x5e
+;   .byte 0x57, 0xb6, 0xaf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x06, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_uge_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp uge v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmnot.m v0,v5 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v9,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v11,v9,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x6e
+;   .byte 0x57, 0xa0, 0x52, 0x76
+;   .byte 0xd7, 0x34, 0x00, 0x5e
+;   .byte 0xd7, 0xb5, 0x9f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_uge_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp uge v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfgt.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmnot.m v0,v5 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v9,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v11,v9,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x76
+;   .byte 0x57, 0xa0, 0x52, 0x76
+;   .byte 0xd7, 0x34, 0x00, 0x5e
+;   .byte 0xd7, 0xb5, 0x9f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ugt.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ugt.clif
new file mode 100644
index 000000000000..ff53c69d8621
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ugt.clif
@@ -0,0 +1,284 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %simd_fcmp_ugt_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp ugt v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfle.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmnot.m v0,v6 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v10,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v12,v10,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v12,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x93, 0x11, 0x66
+;   .byte 0x57, 0x20, 0x63, 0x76
+;   .byte 0x57, 0x35, 0x00, 0x5e
+;   .byte 0x57, 0xb6, 0xaf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x06, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_ugt_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ugt v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfle.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmnot.m v0,v5 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v9,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v11,v9,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x66
+;   .byte 0x57, 0xa0, 0x52, 0x76
+;   .byte 0xd7, 0x34, 0x00, 0x5e
+;   .byte 0xd7, 0xb5, 0x9f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_ugt_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ugt v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfge.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmnot.m v0,v5 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v9,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v11,v9,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x7e
+;   .byte 0x57, 0xa0, 0x52, 0x76
+;   .byte 0xd7, 0x34, 0x00, 0x5e
+;   .byte 0xd7, 0xb5, 0x9f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_ugt_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp ugt v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfle.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmnot.m v0,v6 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v10,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v12,v10,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v12,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x93, 0x11, 0x66
+;   .byte 0x57, 0x20, 0x63, 0x76
+;   .byte 0x57, 0x35, 0x00, 0x5e
+;   .byte 0x57, 0xb6, 0xaf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x06, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_ugt_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ugt v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfle.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmnot.m v0,v5 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v9,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v11,v9,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x66
+;   .byte 0x57, 0xa0, 0x52, 0x76
+;   .byte 0xd7, 0x34, 0x00, 0x5e
+;   .byte 0xd7, 0xb5, 0x9f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_ugt_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ugt v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfge.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmnot.m v0,v5 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v9,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v11,v9,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x7e
+;   .byte 0x57, 0xa0, 0x52, 0x76
+;   .byte 0xd7, 0x34, 0x00, 0x5e
+;   .byte 0xd7, 0xb5, 0x9f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ule.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ule.clif
new file mode 100644
index 000000000000..e5cd8a93c901
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ule.clif
@@ -0,0 +1,284 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %simd_fcmp_ule_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp ule v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vv v6,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmnot.m v0,v6 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v10,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v12,v10,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v12,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x93, 0x30, 0x6e
+;   .byte 0x57, 0x20, 0x63, 0x76
+;   .byte 0x57, 0x35, 0x00, 0x5e
+;   .byte 0x57, 0xb6, 0xaf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x06, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_ule_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ule v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfgt.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmnot.m v0,v5 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v9,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v11,v9,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x76
+;   .byte 0x57, 0xa0, 0x52, 0x76
+;   .byte 0xd7, 0x34, 0x00, 0x5e
+;   .byte 0xd7, 0xb5, 0x9f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_ule_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ule v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmnot.m v0,v5 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v9,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v11,v9,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x6e
+;   .byte 0x57, 0xa0, 0x52, 0x76
+;   .byte 0xd7, 0x34, 0x00, 0x5e
+;   .byte 0xd7, 0xb5, 0x9f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_ule_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp ule v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vv v6,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmnot.m v0,v6 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v10,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v12,v10,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v12,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x93, 0x30, 0x6e
+;   .byte 0x57, 0x20, 0x63, 0x76
+;   .byte 0x57, 0x35, 0x00, 0x5e
+;   .byte 0x57, 0xb6, 0xaf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x06, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_ule_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ule v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfgt.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmnot.m v0,v5 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v9,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v11,v9,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x76
+;   .byte 0x57, 0xa0, 0x52, 0x76
+;   .byte 0xd7, 0x34, 0x00, 0x5e
+;   .byte 0xd7, 0xb5, 0x9f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_ule_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ule v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmflt.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmnot.m v0,v5 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v9,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v11,v9,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x6e
+;   .byte 0x57, 0xa0, 0x52, 0x76
+;   .byte 0xd7, 0x34, 0x00, 0x5e
+;   .byte 0xd7, 0xb5, 0x9f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ult.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ult.clif
new file mode 100644
index 000000000000..dd8d9fb411fe
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ult.clif
@@ -0,0 +1,284 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %simd_fcmp_ult_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp ult v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfle.vv v6,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmnot.m v0,v6 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v10,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v12,v10,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v12,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x93, 0x30, 0x66
+;   .byte 0x57, 0x20, 0x63, 0x76
+;   .byte 0x57, 0x35, 0x00, 0x5e
+;   .byte 0x57, 0xb6, 0xaf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x06, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_ult_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ult v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfge.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmnot.m v0,v5 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v9,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v11,v9,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x7e
+;   .byte 0x57, 0xa0, 0x52, 0x76
+;   .byte 0xd7, 0x34, 0x00, 0x5e
+;   .byte 0xd7, 0xb5, 0x9f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_ult_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ult v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfle.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmnot.m v0,v5 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v9,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v11,v9,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x66
+;   .byte 0x57, 0xa0, 0x52, 0x76
+;   .byte 0xd7, 0x34, 0x00, 0x5e
+;   .byte 0xd7, 0xb5, 0x9f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_ult_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp ult v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfle.vv v6,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmnot.m v0,v6 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v10,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v12,v10,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v12,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x93, 0x30, 0x66
+;   .byte 0x57, 0x20, 0x63, 0x76
+;   .byte 0x57, 0x35, 0x00, 0x5e
+;   .byte 0x57, 0xb6, 0xaf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x06, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_ult_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ult v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfge.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmnot.m v0,v5 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v9,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v11,v9,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x7e
+;   .byte 0x57, 0xa0, 0x52, 0x76
+;   .byte 0xd7, 0x34, 0x00, 0x5e
+;   .byte 0xd7, 0xb5, 0x9f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_ult_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ult v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfle.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmnot.m v0,v5 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v9,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v11,v9,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x52, 0x15, 0x66
+;   .byte 0x57, 0xa0, 0x52, 0x76
+;   .byte 0xd7, 0x34, 0x00, 0x5e
+;   .byte 0xd7, 0xb5, 0x9f, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x05, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-uno.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-uno.clif
new file mode 100644
index 000000000000..1c62186204fc
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-uno.clif
@@ -0,0 +1,304 @@
+test compile precise-output
+set unwind_info=false
+target riscv64 has_v
+
+function %simd_fcmp_uno_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp uno v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfne.vv v6,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmfne.vv v8,v3,v3 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmor.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v12,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v14,v12,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x93, 0x10, 0x72
+;   .byte 0x57, 0x94, 0x31, 0x72
+;   .byte 0x57, 0x20, 0x64, 0x6a
+;   .byte 0x57, 0x36, 0x00, 0x5e
+;   .byte 0x57, 0xb7, 0xcf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x07, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_uno_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp uno v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfmv.v.f v10,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmfne.vv v6,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmfne.vf v8,v10,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmor.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v12,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v14,v12,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x55, 0x05, 0x5e
+;   .byte 0x57, 0x93, 0x10, 0x72
+;   .byte 0x57, 0x54, 0xa5, 0x72
+;   .byte 0x57, 0x20, 0x64, 0x6a
+;   .byte 0x57, 0x36, 0x00, 0x5e
+;   .byte 0x57, 0xb7, 0xcf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x07, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_uno_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp uno v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfmv.v.f v10,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmfne.vf v6,v10,fa0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmfne.vv v8,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmor.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmv.v.i v12,0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vmerge.vim v14,v12,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0x57, 0x55, 0x05, 0x5e
+;   .byte 0x57, 0x53, 0xa5, 0x72
+;   .byte 0x57, 0x94, 0x10, 0x72
+;   .byte 0x57, 0x20, 0x64, 0x6a
+;   .byte 0x57, 0x36, 0x00, 0x5e
+;   .byte 0x57, 0xb7, 0xcf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x07, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_uno_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp uno v0, v1
+    return v2
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vmfne.vv v6,v1,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmfne.vv v8,v3,v3 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmor.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v12,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v14,v12,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   addi t6, s0, 0x20
+;   .byte 0x87, 0x81, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x93, 0x10, 0x72
+;   .byte 0x57, 0x94, 0x31, 0x72
+;   .byte 0x57, 0x20, 0x64, 0x6a
+;   .byte 0x57, 0x36, 0x00, 0x5e
+;   .byte 0x57, 0xb7, 0xcf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x07, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_rhs_uno_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp uno v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfmv.v.f v10,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmfne.vv v6,v1,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmfne.vf v8,v10,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmor.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v12,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v14,v12,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x55, 0x05, 0x5e
+;   .byte 0x57, 0x93, 0x10, 0x72
+;   .byte 0x57, 0x54, 0xa5, 0x72
+;   .byte 0x57, 0x20, 0x64, 0x6a
+;   .byte 0x57, 0x36, 0x00, 0x5e
+;   .byte 0x57, 0xb7, 0xcf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x07, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %simd_fcmp_splat_lhs_uno_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp uno v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vfmv.v.f v10,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmfne.vf v6,v10,fa0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmfne.vv v8,v1,v1 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmor.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmv.v.i v12,0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vmerge.vim v14,v12,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+;
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0x57, 0x55, 0x05, 0x5e
+;   .byte 0x57, 0x53, 0xa5, 0x72
+;   .byte 0x57, 0x94, 0x10, 0x72
+;   .byte 0x57, 0x20, 0x64, 0x6a
+;   .byte 0x57, 0x36, 0x00, 0x5e
+;   .byte 0x57, 0xb7, 0xcf, 0x5c
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0x27, 0x07, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-eq.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-eq.clif
new file mode 100644
index 000000000000..fec7a57179d5
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fcmp-eq.clif
@@ -0,0 +1,88 @@
+test run
+target aarch64
+target s390x
+target x86_64 has_sse3 has_ssse3 has_sse41
+target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
+target riscv64 has_v
+
+function %simd_fcmp_eq_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp eq v0, v1
+    return v2
+}
+; run: %simd_fcmp_eq_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [-1 0 0 0]
+; run: %simd_fcmp_eq_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [-1 -1 0 0]
+; run: %simd_fcmp_eq_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [-1 -1 0 -1]
+; run: %simd_fcmp_eq_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [0 0 0 0]
+; run: %simd_fcmp_eq_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [0 -1 0 0]
+
+function %simd_fcmp_splat_rhs_eq_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp eq v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_eq_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 0 0 0]
+; run: %simd_fcmp_splat_rhs_eq_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_eq_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_eq_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_eq_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0]
+
+
+function %simd_fcmp_splat_lhs_eq_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp eq v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_eq_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 0 0 0]
+; run: %simd_fcmp_splat_lhs_eq_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_eq_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_eq_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_eq_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0]
+
+function %simd_fcmp_eq_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp eq v0, v1
+    return v2
+}
+; run: %simd_fcmp_eq_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [-1 0]
+; run: %simd_fcmp_eq_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-1 -1]
+; run: %simd_fcmp_eq_f64([+Inf +Inf], [-Inf +Inf]) == [0 -1]
+; run: %simd_fcmp_eq_f64([-NaN NaN], [NaN NaN]) == [0 0]
+; run: %simd_fcmp_eq_f64([NaN -0x0.0], [-NaN 0x0.0]) == [0 -1]
+
+
+function %simd_fcmp_splat_rhs_eq_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp eq v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_eq_f64([0x0.5 0x1.5], 0x0.5) == [-1 0]
+; run: %simd_fcmp_splat_rhs_eq_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_eq_f64([+Inf +Inf], -Inf) == [0 0]
+; run: %simd_fcmp_splat_rhs_eq_f64([-NaN NaN], NaN) == [0 0]
+; run: %simd_fcmp_splat_rhs_eq_f64([NaN -0x0.0], -NaN) == [0 0]
+
+function %simd_fcmp_splat_lhs_eq_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp eq v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_eq_f64([0x0.5 0x1.5], 0x0.5) == [-1 0]
+; run: %simd_fcmp_splat_lhs_eq_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_eq_f64([+Inf +Inf], -Inf) == [0 0]
+; run: %simd_fcmp_splat_lhs_eq_f64([-NaN NaN], NaN) == [0 0]
+; run: %simd_fcmp_splat_lhs_eq_f64([NaN -0x0.0], -NaN) == [0 0]
+
+function %fcmp_eq_f32x4() -> i8 {
+block0:
+    v0 = vconst.f32x4 [0.0 -0x4.2 0x0.33333 -0.0]
+    v1 = vconst.f32x4 [0.0 -0x4.2 0x0.33333 -0.0]
+    v2 = fcmp eq v0, v1
+    v8 = vall_true v2
+    return v8
+}
+; run: %fcmp_eq_f32x4() == 1
diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-ge.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-ge.clif
new file mode 100644
index 000000000000..02169c1ca6f4
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fcmp-ge.clif
@@ -0,0 +1,86 @@
+test run
+target aarch64
+target s390x
+target x86_64 has_sse3 has_ssse3 has_sse41
+target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
+target riscv64 has_v
+
+function %simd_fcmp_ge_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp ge v0, v1
+    return v2
+}
+; run: %simd_fcmp_ge_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [-1 0 -1 0]
+; run: %simd_fcmp_ge_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [-1 -1 0 -1]
+; run: %simd_fcmp_ge_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [-1 -1 -1 -1]
+; run: %simd_fcmp_ge_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [0 0 0 0]
+; run: %simd_fcmp_ge_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [0 -1 0 -1]
+
+function %simd_fcmp_splat_rhs_ge_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ge v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_ge_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_ge_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_ge_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_ge_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_ge_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0]
+
+function %simd_fcmp_splat_lhs_ge_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ge v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_ge_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 0 0 0]
+; run: %simd_fcmp_splat_lhs_ge_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_ge_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_ge_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_ge_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0]
+
+function %simd_fcmp_ge_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp ge v0, v1
+    return v2
+}
+; run: %simd_fcmp_ge_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [-1 0]
+; run: %simd_fcmp_ge_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-1 -1]
+; run: %simd_fcmp_ge_f64([+Inf +Inf], [-Inf +Inf]) == [-1 -1]
+; run: %simd_fcmp_ge_f64([-NaN NaN], [NaN NaN]) == [0 0]
+; run: %simd_fcmp_ge_f64([NaN -0x0.0], [-NaN 0x0.0]) == [0 -1]
+
+function %simd_fcmp_splat_rhs_ge_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ge v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_ge_f64([0x0.5 0x1.5], 0x0.5) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_ge_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_ge_f64([+Inf +Inf], -Inf) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_ge_f64([-NaN NaN], NaN) == [0 0]
+; run: %simd_fcmp_splat_rhs_ge_f64([NaN -0x0.0], -NaN) == [0 0]
+
+function %simd_fcmp_splat_lhs_ge_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ge v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_ge_f64([0x0.5 0x1.5], 0x0.5) == [-1 0]
+; run: %simd_fcmp_splat_lhs_ge_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_ge_f64([+Inf +Inf], -Inf) == [0 0]
+; run: %simd_fcmp_splat_lhs_ge_f64([-NaN NaN], NaN) == [0 0]
+; run: %simd_fcmp_splat_lhs_ge_f64([NaN -0x0.0], -NaN) == [0 0]
+
+function %fcmp_ge_f64x2() -> i8 {
+block0:
+    v0 = vconst.f64x2 [0x0.0  0x4.2]
+    v1 = vconst.f64x2 [0.0    0x4.1]
+    v2 = fcmp ge v0, v1
+    v8 = vall_true v2
+    return v8
+}
+; run: %fcmp_ge_f64x2() == 1
diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-gt.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-gt.clif
new file mode 100644
index 000000000000..e5bcf192ac01
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fcmp-gt.clif
@@ -0,0 +1,90 @@
+test run
+target aarch64
+target s390x
+target x86_64 has_sse3 has_ssse3 has_sse41
+target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
+target riscv64 has_v
+
+function %simd_fcmp_gt_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp gt v0, v1
+    return v2
+}
+; run: %simd_fcmp_gt_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [0 0 -1 0]
+; run: %simd_fcmp_gt_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [0 0 0 -1]
+; run: %simd_fcmp_gt_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [0 0 -1 0]
+; run: %simd_fcmp_gt_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [0 0 0 0]
+; run: %simd_fcmp_gt_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [0 0 0 -1]
+
+function %simd_fcmp_splat_rhs_gt_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp gt v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_gt_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_gt_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_gt_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_gt_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_gt_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0]
+
+function %simd_fcmp_splat_lhs_gt_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp gt v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_gt_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_gt_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_gt_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_gt_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_gt_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0]
+
+function %simd_fcmp_gt_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp gt v0, v1
+    return v2
+}
+; run: %simd_fcmp_gt_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [0 0]
+; run: %simd_fcmp_gt_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [0 0]
+; run: %simd_fcmp_gt_f64([+Inf +Inf], [-Inf +Inf]) == [-1 0]
+; run: %simd_fcmp_gt_f64([-NaN NaN], [NaN NaN]) == [0 0]
+; run: %simd_fcmp_gt_f64([NaN -0x0.0], [-NaN 0x0.0]) == [0 0]
+
+function %simd_fcmp_splat_rhs_gt_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp gt v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_gt_f64([0x0.5 0x1.5], 0x0.5) == [0 -1]
+; run: %simd_fcmp_splat_rhs_gt_f64([0x0.0 -0x0.0], -0x0.0) == [0 0]
+; run: %simd_fcmp_splat_rhs_gt_f64([+Inf +Inf], -Inf) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_gt_f64([-NaN NaN], NaN) == [0 0]
+; run: %simd_fcmp_splat_rhs_gt_f64([NaN -0x0.0], -NaN) == [0 0]
+
+function %simd_fcmp_splat_lhs_gt_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp gt v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_gt_f64([0x0.5 0x1.5], 0x0.5) == [0 0]
+; run: %simd_fcmp_splat_lhs_gt_f64([0x0.0 -0x0.0], -0x0.0) == [0 0]
+; run: %simd_fcmp_splat_lhs_gt_f64([+Inf +Inf], -Inf) == [0 0]
+; run: %simd_fcmp_splat_lhs_gt_f64([-NaN NaN], NaN) == [0 0]
+; run: %simd_fcmp_splat_lhs_gt_f64([NaN -0x0.0], -NaN) == [0 0]
+
+function %fcmp_gt_nans_f32x4() -> i8 {
+block0:
+    v0 = vconst.f32x4 [NaN 0x42.0 -NaN NaN]
+    v1 = vconst.f32x4 [NaN NaN 0x42.0 Inf]
+    v2 = fcmp gt v0, v1
+    ; now check that the result v2 is all zeroes
+    v3 = vconst.i32x4 0x00
+    v4 = bitcast.i32x4 v2
+    v5 = icmp eq v3, v4
+    v8 = vall_true v5
+    return v8
+}
+; run: %fcmp_gt_nans_f32x4() == 1
diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-le.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-le.clif
new file mode 100644
index 000000000000..36739b1eb63a
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fcmp-le.clif
@@ -0,0 +1,76 @@
+test run
+target aarch64
+target s390x
+target x86_64 has_sse3 has_ssse3 has_sse41
+target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
+target riscv64 has_v
+
+function %simd_fcmp_le_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp le v0, v1
+    return v2
+}
+; run: %simd_fcmp_le_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [-1 -1 0 -1]
+; run: %simd_fcmp_le_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [-1 -1 -1 0]
+; run: %simd_fcmp_le_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [-1 -1 0 -1]
+; run: %simd_fcmp_le_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [0 0 0 0]
+; run: %simd_fcmp_le_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [0 -1 -1 0]
+
+function %simd_fcmp_splat_rhs_le_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp le v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_le_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 0 0 0]
+; run: %simd_fcmp_splat_rhs_le_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_le_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_le_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_le_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0]
+
+function %simd_fcmp_splat_lhs_le_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp le v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_le_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_le_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_le_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_le_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_le_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0]
+
+function %simd_fcmp_le_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp le v0, v1
+    return v2
+}
+; run: %simd_fcmp_le_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [-1 -1]
+; run: %simd_fcmp_le_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-1 -1]
+; run: %simd_fcmp_le_f64([+Inf +Inf], [-Inf +Inf]) == [0 -1]
+; run: %simd_fcmp_le_f64([-NaN NaN], [NaN NaN]) == [0 0]
+; run: %simd_fcmp_le_f64([NaN -0x0.0], [-NaN 0x0.0]) == [0 -1]
+
+function %simd_fcmp_splat_rhs_le_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp le v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_le_f64([0x0.5 0x1.5], 0x0.5) == [-1 0]
+; run: %simd_fcmp_splat_rhs_le_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_le_f64([+Inf +Inf], -Inf) == [0 0]
+; run: %simd_fcmp_splat_rhs_le_f64([-NaN NaN], NaN) == [0 0]
+; run: %simd_fcmp_splat_rhs_le_f64([NaN -0x0.0], -NaN) == [0 0]
+
+function %simd_fcmp_splat_lhs_le_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp le v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_le_f64([0x0.5 0x1.5], 0x0.5) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_le_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_le_f64([+Inf +Inf], -Inf) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_le_f64([-NaN NaN], NaN) == [0 0]
+; run: %simd_fcmp_splat_lhs_le_f64([NaN -0x0.0], -NaN) == [0 0]
diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-lt.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-lt.clif
new file mode 100644
index 000000000000..e15cd1f8e1a8
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fcmp-lt.clif
@@ -0,0 +1,86 @@
+test run
+target aarch64
+target s390x
+target x86_64 has_sse3 has_ssse3 has_sse41
+target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
+target riscv64 has_v
+
+function %simd_fcmp_lt_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp lt v0, v1
+    return v2
+}
+; run: %simd_fcmp_lt_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [0 -1 0 -1]
+; run: %simd_fcmp_lt_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [0 0 -1 0]
+; run: %simd_fcmp_lt_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [0 0 0 0]
+; run: %simd_fcmp_lt_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [0 0 0 0]
+; run: %simd_fcmp_lt_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [0 0 -1 0]
+
+function %simd_fcmp_splat_rhs_lt_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp lt v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_lt_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_lt_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_lt_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_lt_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_lt_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0]
+
+function %simd_fcmp_splat_lhs_lt_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp lt v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_lt_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_lt_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_lt_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_lt_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_lt_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0]
+
+function %simd_fcmp_lt_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp lt v0, v1
+    return v2
+}
+; run: %simd_fcmp_lt_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [0 -1]
+; run: %simd_fcmp_lt_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [0 0]
+; run: %simd_fcmp_lt_f64([+Inf +Inf], [-Inf +Inf]) == [0 0]
+; run: %simd_fcmp_lt_f64([-NaN NaN], [NaN NaN]) == [0 0]
+; run: %simd_fcmp_lt_f64([NaN -0x0.0], [-NaN 0x0.0]) == [0 0]
+
+function %simd_fcmp_splat_rhs_lt_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp lt v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_lt_f64([0x0.5 0x1.5], 0x0.5) == [0 0]
+; run: %simd_fcmp_splat_rhs_lt_f64([0x0.0 -0x0.0], -0x0.0) == [0 0]
+; run: %simd_fcmp_splat_rhs_lt_f64([+Inf +Inf], -Inf) == [0 0]
+; run: %simd_fcmp_splat_rhs_lt_f64([-NaN NaN], NaN) == [0 0]
+; run: %simd_fcmp_splat_rhs_lt_f64([NaN -0x0.0], -NaN) == [0 0]
+
+function %simd_fcmp_splat_lhs_lt_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp lt v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_lt_f64([0x0.5 0x1.5], 0x0.5) == [0 -1]
+; run: %simd_fcmp_splat_lhs_lt_f64([0x0.0 -0x0.0], -0x0.0) == [0 0]
+; run: %simd_fcmp_splat_lhs_lt_f64([+Inf +Inf], -Inf) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_lt_f64([-NaN NaN], NaN) == [0 0]
+; run: %simd_fcmp_splat_lhs_lt_f64([NaN -0x0.0], -NaN) == [0 0]
+
+function %fcmp_lt_f32x4() -> i8 {
+block0:
+    v0 = vconst.f32x4 [0.0      -0x4.2  0x0.0       -0.0]
+    v1 = vconst.f32x4 [0x0.001  0x4.2   0x0.33333   0x1.0]
+    v2 = fcmp lt v0, v1
+    v8 = vall_true v2
+    return v8
+}
+; run: %fcmp_lt_f32x4() == 1
diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-ne.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-ne.clif
new file mode 100644
index 000000000000..0913bf3e206f
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fcmp-ne.clif
@@ -0,0 +1,78 @@
+test run
+target aarch64
+target s390x
+target x86_64 has_sse3 has_ssse3 has_sse41
+target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
+target riscv64 has_v
+
+function %simd_fcmp_ne_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp ne v0, v1
+    return v2
+}
+; run: %simd_fcmp_ne_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [0 -1 -1 -1]
+; run: %simd_fcmp_ne_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [0 0 -1 -1]
+; run: %simd_fcmp_ne_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [0 0 -1 0]
+; run: %simd_fcmp_ne_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [-1 -1 -1 -1]
+; run: %simd_fcmp_ne_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [-1 0 -1 -1]
+
+function %simd_fcmp_splat_rhs_ne_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ne v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_ne_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_ne_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_ne_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_ne_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_ne_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1]
+
+
+function %simd_fcmp_splat_lhs_ne_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ne v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_ne_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_ne_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_ne_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_ne_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_ne_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1]
+
+function %simd_fcmp_ne_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp ne v0, v1
+    return v2
+}
+; run: %simd_fcmp_ne_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [0 -1]
+; run: %simd_fcmp_ne_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [0 0]
+; run: %simd_fcmp_ne_f64([+Inf +Inf], [-Inf +Inf]) == [-1 0]
+; run: %simd_fcmp_ne_f64([-NaN NaN], [NaN NaN]) == [-1 -1]
+; run: %simd_fcmp_ne_f64([NaN -0x0.0], [-NaN 0x0.0]) == [-1 0]
+
+
+function %simd_fcmp_splat_rhs_ne_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ne v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_ne_f64([0x0.5 0x1.5], 0x0.5) == [0 -1]
+; run: %simd_fcmp_splat_rhs_ne_f64([0x0.0 -0x0.0], -0x0.0) == [0 0]
+; run: %simd_fcmp_splat_rhs_ne_f64([+Inf +Inf], -Inf) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_ne_f64([-NaN NaN], NaN) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_ne_f64([NaN -0x0.0], -NaN) == [-1 -1]
+
+function %simd_fcmp_splat_lhs_ne_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ne v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_ne_f64([0x0.5 0x1.5], 0x0.5) == [0 -1]
+; run: %simd_fcmp_splat_lhs_ne_f64([0x0.0 -0x0.0], -0x0.0) == [0 0]
+; run: %simd_fcmp_splat_lhs_ne_f64([+Inf +Inf], -Inf) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_ne_f64([-NaN NaN], NaN) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_ne_f64([NaN -0x0.0], -NaN) == [-1 -1]
diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-one.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-one.clif
new file mode 100644
index 000000000000..d22daf95cbc7
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fcmp-one.clif
@@ -0,0 +1,75 @@
+test run
+target s390x
+target riscv64 has_v
+
+function %simd_fcmp_one_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp one v0, v1
+    return v2
+}
+; run: %simd_fcmp_one_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [0 -1 -1 -1]
+; run: %simd_fcmp_one_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [0 0 -1 -1]
+; run: %simd_fcmp_one_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [0 0 -1 0]
+; run: %simd_fcmp_one_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [0 0 0 0]
+; run: %simd_fcmp_one_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [0 0 -1 -1]
+
+function %simd_fcmp_splat_rhs_one_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp one v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_one_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_one_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_one_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_one_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_one_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0]
+
+
+function %simd_fcmp_splat_lhs_one_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp one v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_one_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_one_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_one_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_one_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_one_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0]
+
+function %simd_fcmp_one_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp one v0, v1
+    return v2
+}
+; run: %simd_fcmp_one_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [0 -1]
+; run: %simd_fcmp_one_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [0 0]
+; run: %simd_fcmp_one_f64([+Inf +Inf], [-Inf +Inf]) == [-1 0]
+; run: %simd_fcmp_one_f64([-NaN NaN], [NaN NaN]) == [0 0]
+; run: %simd_fcmp_one_f64([NaN -0x0.0], [-NaN 0x0.0]) == [0 0]
+
+
+function %simd_fcmp_splat_rhs_one_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp one v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_one_f64([0x0.5 0x1.5], 0x0.5) == [0 -1]
+; run: %simd_fcmp_splat_rhs_one_f64([0x0.0 -0x0.0], -0x0.0) == [0 0]
+; run: %simd_fcmp_splat_rhs_one_f64([+Inf +Inf], -Inf) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_one_f64([-NaN NaN], NaN) == [0 0]
+; run: %simd_fcmp_splat_rhs_one_f64([NaN -0x0.0], -NaN) == [0 0]
+
+function %simd_fcmp_splat_lhs_one_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp one v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_one_f64([0x0.5 0x1.5], 0x0.5) == [0 -1]
+; run: %simd_fcmp_splat_lhs_one_f64([0x0.0 -0x0.0], -0x0.0) == [0 0]
+; run: %simd_fcmp_splat_lhs_one_f64([+Inf +Inf], -Inf) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_one_f64([-NaN NaN], NaN) == [0 0]
+; run: %simd_fcmp_splat_lhs_one_f64([NaN -0x0.0], -NaN) == [0 0]
diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-ord.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-ord.clif
new file mode 100644
index 000000000000..8b9a1af073de
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fcmp-ord.clif
@@ -0,0 +1,78 @@
+test run
+target aarch64
+target s390x
+target x86_64 has_sse3 has_ssse3 has_sse41
+target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
+target riscv64 has_v
+
+function %simd_fcmp_ord_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp ord v0, v1
+    return v2
+}
+; run: %simd_fcmp_ord_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [-1 -1 -1 -1]
+; run: %simd_fcmp_ord_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [-1 -1 -1 -1]
+; run: %simd_fcmp_ord_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [-1 -1 -1 -1]
+; run: %simd_fcmp_ord_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [0 0 0 0]
+; run: %simd_fcmp_ord_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [0 -1 -1 -1]
+
+function %simd_fcmp_splat_rhs_ord_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ord v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_ord_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_ord_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_ord_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_ord_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_ord_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0]
+
+
+function %simd_fcmp_splat_lhs_ord_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ord v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_ord_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_ord_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_ord_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_ord_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_ord_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0]
+
+function %simd_fcmp_ord_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp ord v0, v1
+    return v2
+}
+; run: %simd_fcmp_ord_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [-1 -1]
+; run: %simd_fcmp_ord_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-1 -1]
+; run: %simd_fcmp_ord_f64([+Inf +Inf], [-Inf +Inf]) == [-1 -1]
+; run: %simd_fcmp_ord_f64([-NaN NaN], [NaN NaN]) == [0 0]
+; run: %simd_fcmp_ord_f64([NaN -0x0.0], [-NaN 0x0.0]) == [0 -1]
+
+
+function %simd_fcmp_splat_rhs_ord_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ord v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_ord_f64([0x0.5 0x1.5], 0x0.5) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_ord_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_ord_f64([+Inf +Inf], -Inf) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_ord_f64([-NaN NaN], NaN) == [0 0]
+; run: %simd_fcmp_splat_rhs_ord_f64([NaN -0x0.0], -NaN) == [0 0]
+
+function %simd_fcmp_splat_lhs_ord_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ord v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_ord_f64([0x0.5 0x1.5], 0x0.5) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_ord_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_ord_f64([+Inf +Inf], -Inf) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_ord_f64([-NaN NaN], NaN) == [0 0]
+; run: %simd_fcmp_splat_lhs_ord_f64([NaN -0x0.0], -NaN) == [0 0]
diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-ueq.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-ueq.clif
new file mode 100644
index 000000000000..f248b6c268d0
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fcmp-ueq.clif
@@ -0,0 +1,75 @@
+test run
+target s390x
+target riscv64 has_v
+
+function %simd_fcmp_ueq_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp ueq v0, v1
+    return v2
+}
+; run: %simd_fcmp_ueq_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [-1 0 0 0]
+; run: %simd_fcmp_ueq_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [-1 -1 0 0]
+; run: %simd_fcmp_ueq_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [-1 -1 0 -1]
+; run: %simd_fcmp_ueq_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [-1 -1 -1 -1]
+; run: %simd_fcmp_ueq_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [-1 -1 0 0]
+
+function %simd_fcmp_splat_rhs_ueq_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ueq v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_ueq_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 0 0 0]
+; run: %simd_fcmp_splat_rhs_ueq_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_ueq_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_ueq_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_ueq_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1]
+
+
+function %simd_fcmp_splat_lhs_ueq_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ueq v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_ueq_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 0 0 0]
+; run: %simd_fcmp_splat_lhs_ueq_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_ueq_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_ueq_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_ueq_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1]
+
+function %simd_fcmp_ueq_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp ueq v0, v1
+    return v2
+}
+; run: %simd_fcmp_ueq_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [-1 0]
+; run: %simd_fcmp_ueq_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-1 -1]
+; run: %simd_fcmp_ueq_f64([+Inf +Inf], [-Inf +Inf]) == [0 -1]
+; run: %simd_fcmp_ueq_f64([-NaN NaN], [NaN NaN]) == [-1 -1]
+; run: %simd_fcmp_ueq_f64([NaN -0x0.0], [-NaN 0x0.0]) == [-1 -1]
+
+
+function %simd_fcmp_splat_rhs_ueq_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ueq v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_ueq_f64([0x0.5 0x1.5], 0x0.5) == [-1 0]
+; run: %simd_fcmp_splat_rhs_ueq_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_ueq_f64([+Inf +Inf], -Inf) == [0 0]
+; run: %simd_fcmp_splat_rhs_ueq_f64([-NaN NaN], NaN) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_ueq_f64([NaN -0x0.0], -NaN) == [-1 -1]
+
+function %simd_fcmp_splat_lhs_ueq_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ueq v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_ueq_f64([0x0.5 0x1.5], 0x0.5) == [-1 0]
+; run: %simd_fcmp_splat_lhs_ueq_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_ueq_f64([+Inf +Inf], -Inf) == [0 0]
+; run: %simd_fcmp_splat_lhs_ueq_f64([-NaN NaN], NaN) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_ueq_f64([NaN -0x0.0], -NaN) == [-1 -1]
diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-uge.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-uge.clif
new file mode 100644
index 000000000000..c1c39d654945
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fcmp-uge.clif
@@ -0,0 +1,75 @@
+test run
+target s390x
+target x86_64 has_sse3 has_ssse3 has_sse41
+target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
+target riscv64 has_v
+
+function %simd_fcmp_uge_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp uge v0, v1
+    return v2
+}
+; run: %simd_fcmp_uge_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [-1 0 -1 0]
+; run: %simd_fcmp_uge_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [-1 -1 0 -1]
+; run: %simd_fcmp_uge_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [-1 -1 -1 -1]
+; run: %simd_fcmp_uge_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [-1 -1 -1 -1]
+; run: %simd_fcmp_uge_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [-1 -1 0 -1]
+
+function %simd_fcmp_splat_rhs_uge_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp uge v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_uge_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_uge_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_uge_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_uge_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_uge_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1]
+
+function %simd_fcmp_splat_lhs_uge_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp uge v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_uge_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 0 0 0]
+; run: %simd_fcmp_splat_lhs_uge_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_uge_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_uge_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_uge_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1]
+
+function %simd_fcmp_uge_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp uge v0, v1
+    return v2
+}
+; run: %simd_fcmp_uge_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [-1 0]
+; run: %simd_fcmp_uge_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-1 -1]
+; run: %simd_fcmp_uge_f64([+Inf +Inf], [-Inf +Inf]) == [-1 -1]
+; run: %simd_fcmp_uge_f64([-NaN NaN], [NaN NaN]) == [-1 -1]
+; run: %simd_fcmp_uge_f64([NaN -0x0.0], [-NaN 0x0.0]) == [-1 -1]
+
+function %simd_fcmp_splat_rhs_uge_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp uge v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_uge_f64([0x0.5 0x1.5], 0x0.5) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_uge_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_uge_f64([+Inf +Inf], -Inf) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_uge_f64([-NaN NaN], NaN) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_uge_f64([NaN -0x0.0], -NaN) == [-1 -1]
+
+function %simd_fcmp_splat_lhs_uge_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp uge v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_uge_f64([0x0.5 0x1.5], 0x0.5) == [-1 0]
+; run: %simd_fcmp_splat_lhs_uge_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_uge_f64([+Inf +Inf], -Inf) == [0 0]
+; run: %simd_fcmp_splat_lhs_uge_f64([-NaN NaN], NaN) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_uge_f64([NaN -0x0.0], -NaN) == [-1 -1]
diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-ugt.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-ugt.clif
new file mode 100644
index 000000000000..c82d54dc6011
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fcmp-ugt.clif
@@ -0,0 +1,75 @@
+test run
+target s390x
+target x86_64 has_sse3 has_ssse3 has_sse41
+target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
+target riscv64 has_v
+
+function %simd_fcmp_ugt_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp ugt v0, v1
+    return v2
+}
+; run: %simd_fcmp_ugt_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [0 0 -1 0]
+; run: %simd_fcmp_ugt_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [0 0 0 -1]
+; run: %simd_fcmp_ugt_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [0 0 -1 0]
+; run: %simd_fcmp_ugt_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [-1 -1 -1 -1]
+; run: %simd_fcmp_ugt_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [-1 0 0 -1]
+
+function %simd_fcmp_splat_rhs_ugt_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ugt v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_ugt_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_ugt_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_ugt_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_ugt_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_ugt_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1]
+
+function %simd_fcmp_splat_lhs_ugt_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ugt v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_ugt_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_ugt_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_ugt_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_ugt_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_ugt_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1]
+
+function %simd_fcmp_ugt_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp ugt v0, v1
+    return v2
+}
+; run: %simd_fcmp_ugt_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [0 0]
+; run: %simd_fcmp_ugt_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [0 0]
+; run: %simd_fcmp_ugt_f64([+Inf +Inf], [-Inf +Inf]) == [-1 0]
+; run: %simd_fcmp_ugt_f64([-NaN NaN], [NaN NaN]) == [-1 -1]
+; run: %simd_fcmp_ugt_f64([NaN -0x0.0], [-NaN 0x0.0]) == [-1 0]
+
+function %simd_fcmp_splat_rhs_ugt_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ugt v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_ugt_f64([0x0.5 0x1.5], 0x0.5) == [0 -1]
+; run: %simd_fcmp_splat_rhs_ugt_f64([0x0.0 -0x0.0], -0x0.0) == [0 0]
+; run: %simd_fcmp_splat_rhs_ugt_f64([+Inf +Inf], -Inf) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_ugt_f64([-NaN NaN], NaN) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_ugt_f64([NaN -0x0.0], -NaN) == [-1 -1]
+
+function %simd_fcmp_splat_lhs_ugt_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ugt v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_ugt_f64([0x0.5 0x1.5], 0x0.5) == [0 0]
+; run: %simd_fcmp_splat_lhs_ugt_f64([0x0.0 -0x0.0], -0x0.0) == [0 0]
+; run: %simd_fcmp_splat_lhs_ugt_f64([+Inf +Inf], -Inf) == [0 0]
+; run: %simd_fcmp_splat_lhs_ugt_f64([-NaN NaN], NaN) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_ugt_f64([NaN -0x0.0], -NaN) == [-1 -1]
diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-ule.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-ule.clif
new file mode 100644
index 000000000000..fc64250fe8a7
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fcmp-ule.clif
@@ -0,0 +1,75 @@
+test run
+target s390x
+target x86_64 has_sse3 has_ssse3 has_sse41
+target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
+target riscv64 has_v
+
+function %simd_fcmp_ule_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp ule v0, v1
+    return v2
+}
+; run: %simd_fcmp_ule_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [-1 -1 0 -1]
+; run: %simd_fcmp_ule_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [-1 -1 -1 0]
+; run: %simd_fcmp_ule_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [-1 -1 0 -1]
+; run: %simd_fcmp_ule_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [-1 -1 -1 -1]
+; run: %simd_fcmp_ule_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [-1 -1 -1 0]
+
+function %simd_fcmp_splat_rhs_ule_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ule v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_ule_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 0 0 0]
+; run: %simd_fcmp_splat_rhs_ule_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_ule_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_ule_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_ule_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1]
+
+function %simd_fcmp_splat_lhs_ule_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ule v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_ule_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_ule_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_ule_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_ule_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_ule_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1]
+
+function %simd_fcmp_ule_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp ule v0, v1
+    return v2
+}
+; run: %simd_fcmp_ule_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [-1 -1]
+; run: %simd_fcmp_ule_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-1 -1]
+; run: %simd_fcmp_ule_f64([+Inf +Inf], [-Inf +Inf]) == [0 -1]
+; run: %simd_fcmp_ule_f64([-NaN NaN], [NaN NaN]) == [-1 -1]
+; run: %simd_fcmp_ule_f64([NaN -0x0.0], [-NaN 0x0.0]) == [-1 -1]
+
+function %simd_fcmp_splat_rhs_ule_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ule v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_ule_f64([0x0.5 0x1.5], 0x0.5) == [-1 0]
+; run: %simd_fcmp_splat_rhs_ule_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_ule_f64([+Inf +Inf], -Inf) == [0 0]
+; run: %simd_fcmp_splat_rhs_ule_f64([-NaN NaN], NaN) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_ule_f64([NaN -0x0.0], -NaN) == [-1 -1]
+
+function %simd_fcmp_splat_lhs_ule_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ule v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_ule_f64([0x0.5 0x1.5], 0x0.5) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_ule_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_ule_f64([+Inf +Inf], -Inf) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_ule_f64([-NaN NaN], NaN) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_ule_f64([NaN -0x0.0], -NaN) == [-1 -1]
diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-ult.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-ult.clif
new file mode 100644
index 000000000000..19ab6a4d9918
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fcmp-ult.clif
@@ -0,0 +1,75 @@
+test run
+target s390x
+target x86_64 has_sse3 has_ssse3 has_sse41
+target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
+target riscv64 has_v
+
+function %simd_fcmp_ult_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp ult v0, v1
+    return v2
+}
+; run: %simd_fcmp_ult_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [0 -1 0 -1]
+; run: %simd_fcmp_ult_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [0 0 -1 0]
+; run: %simd_fcmp_ult_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [0 0 0 0]
+; run: %simd_fcmp_ult_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [-1 -1 -1 -1]
+; run: %simd_fcmp_ult_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [-1 0 -1 0]
+
+function %simd_fcmp_splat_rhs_ult_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ult v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_ult_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_ult_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_ult_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_ult_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_ult_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1]
+
+function %simd_fcmp_splat_lhs_ult_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp ult v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_ult_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_ult_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_ult_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_ult_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_ult_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1]
+
+function %simd_fcmp_ult_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp ult v0, v1
+    return v2
+}
+; run: %simd_fcmp_ult_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [0 -1]
+; run: %simd_fcmp_ult_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [0 0]
+; run: %simd_fcmp_ult_f64([+Inf +Inf], [-Inf +Inf]) == [0 0]
+; run: %simd_fcmp_ult_f64([-NaN NaN], [NaN NaN]) == [-1 -1]
+; run: %simd_fcmp_ult_f64([NaN -0x0.0], [-NaN 0x0.0]) == [-1 0]
+
+function %simd_fcmp_splat_rhs_ult_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ult v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_ult_f64([0x0.5 0x1.5], 0x0.5) == [0 0]
+; run: %simd_fcmp_splat_rhs_ult_f64([0x0.0 -0x0.0], -0x0.0) == [0 0]
+; run: %simd_fcmp_splat_rhs_ult_f64([+Inf +Inf], -Inf) == [0 0]
+; run: %simd_fcmp_splat_rhs_ult_f64([-NaN NaN], NaN) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_ult_f64([NaN -0x0.0], -NaN) == [-1 -1]
+
+function %simd_fcmp_splat_lhs_ult_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp ult v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_ult_f64([0x0.5 0x1.5], 0x0.5) == [0 -1]
+; run: %simd_fcmp_splat_lhs_ult_f64([0x0.0 -0x0.0], -0x0.0) == [0 0]
+; run: %simd_fcmp_splat_lhs_ult_f64([+Inf +Inf], -Inf) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_ult_f64([-NaN NaN], NaN) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_ult_f64([NaN -0x0.0], -NaN) == [-1 -1]
diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-uno.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-uno.clif
new file mode 100644
index 000000000000..0181a9aa1c61
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/simd-fcmp-uno.clif
@@ -0,0 +1,88 @@
+test run
+target aarch64
+target s390x
+target x86_64 has_sse3 has_ssse3 has_sse41
+target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
+target riscv64 has_v
+
+function %simd_fcmp_uno_f32(f32x4, f32x4) -> i32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fcmp uno v0, v1
+    return v2
+}
+; run: %simd_fcmp_uno_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [0 0 0 0]
+; run: %simd_fcmp_uno_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [0 0 0 0]
+; run: %simd_fcmp_uno_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [0 0 0 0]
+; run: %simd_fcmp_uno_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [-1 -1 -1 -1]
+; run: %simd_fcmp_uno_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [-1 0 0 0]
+
+function %simd_fcmp_splat_rhs_uno_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp uno v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_uno_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_uno_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_uno_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0]
+; run: %simd_fcmp_splat_rhs_uno_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_rhs_uno_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1]
+
+
+function %simd_fcmp_splat_lhs_uno_f32(f32x4, f32) -> i32x4 {
+block0(v0: f32x4, v1: f32):
+    v2 = splat.f32x4 v1
+    v3 = fcmp uno v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_uno_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_uno_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_uno_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0]
+; run: %simd_fcmp_splat_lhs_uno_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1]
+; run: %simd_fcmp_splat_lhs_uno_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1]
+
+function %simd_fcmp_uno_f64(f64x2, f64x2) -> i64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fcmp uno v0, v1
+    return v2
+}
+; run: %simd_fcmp_uno_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [0 0]
+; run: %simd_fcmp_uno_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [0 0]
+; run: %simd_fcmp_uno_f64([+Inf +Inf], [-Inf +Inf]) == [0 0]
+; run: %simd_fcmp_uno_f64([-NaN NaN], [NaN NaN]) == [-1 -1]
+; run: %simd_fcmp_uno_f64([NaN -0x0.0], [-NaN 0x0.0]) == [-1 0]
+
+
+function %simd_fcmp_splat_rhs_uno_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp uno v0, v2
+    return v3
+}
+; run: %simd_fcmp_splat_rhs_uno_f64([0x0.5 0x1.5], 0x0.5) == [0 0]
+; run: %simd_fcmp_splat_rhs_uno_f64([0x0.0 -0x0.0], -0x0.0) == [0 0]
+; run: %simd_fcmp_splat_rhs_uno_f64([+Inf +Inf], -Inf) == [0 0]
+; run: %simd_fcmp_splat_rhs_uno_f64([-NaN NaN], NaN) == [-1 -1]
+; run: %simd_fcmp_splat_rhs_uno_f64([NaN -0x0.0], -NaN) == [-1 -1]
+
+function %simd_fcmp_splat_lhs_uno_f64(f64x2, f64) -> i64x2 {
+block0(v0: f64x2, v1: f64):
+    v2 = splat.f64x2 v1
+    v3 = fcmp uno v2, v0
+    return v3
+}
+; run: %simd_fcmp_splat_lhs_uno_f64([0x0.5 0x1.5], 0x0.5) == [0 0]
+; run: %simd_fcmp_splat_lhs_uno_f64([0x0.0 -0x0.0], -0x0.0) == [0 0]
+; run: %simd_fcmp_splat_lhs_uno_f64([+Inf +Inf], -Inf) == [0 0]
+; run: %simd_fcmp_splat_lhs_uno_f64([-NaN NaN], NaN) == [-1 -1]
+; run: %simd_fcmp_splat_lhs_uno_f64([NaN -0x0.0], -NaN) == [-1 -1]
+
+function %fcmp_uno_f64x2() -> i8 {
+block0:
+    v0 = vconst.f64x2 [0.0  NaN]
+    v1 = vconst.f64x2 [NaN  0x4.1]
+    v2 = fcmp uno v0, v1
+    v8 = vall_true v2
+    return v8
+}
+; run: %fcmp_uno_f64x2() == 1
diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp.clif b/cranelift/filetests/filetests/runtests/simd-fcmp.clif
deleted file mode 100644
index c48a8c975f92..000000000000
--- a/cranelift/filetests/filetests/runtests/simd-fcmp.clif
+++ /dev/null
@@ -1,60 +0,0 @@
-test run
-target aarch64
-target s390x
-target x86_64 has_sse3 has_ssse3 has_sse41
-target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
-
-
-function %fcmp_eq_f32x4() -> i8 {
-block0:
-    v0 = vconst.f32x4 [0.0 -0x4.2 0x0.33333 -0.0]
-    v1 = vconst.f32x4 [0.0 -0x4.2 0x0.33333 -0.0]
-    v2 = fcmp eq v0, v1
-    v8 = vall_true v2
-    return v8
-}
-; run: %fcmp_eq_f32x4() == 1
-
-function %fcmp_lt_f32x4() -> i8 {
-block0:
-    v0 = vconst.f32x4 [0.0      -0x4.2  0x0.0       -0.0]
-    v1 = vconst.f32x4 [0x0.001  0x4.2   0x0.33333   0x1.0]
-    v2 = fcmp lt v0, v1
-    v8 = vall_true v2
-    return v8
-}
-; run: %fcmp_lt_f32x4() == 1
-
-function %fcmp_ge_f64x2() -> i8 {
-block0:
-    v0 = vconst.f64x2 [0x0.0  0x4.2]
-    v1 = vconst.f64x2 [0.0    0x4.1]
-    v2 = fcmp ge v0, v1
-    v8 = vall_true v2
-    return v8
-}
-; run: %fcmp_ge_f64x2() == 1
-
-function %fcmp_uno_f64x2() -> i8 {
-block0:
-    v0 = vconst.f64x2 [0.0  NaN]
-    v1 = vconst.f64x2 [NaN  0x4.1]
-    v2 = fcmp uno v0, v1
-    v8 = vall_true v2
-    return v8
-}
-; run: %fcmp_uno_f64x2() == 1
-
-function %fcmp_gt_nans_f32x4() -> i8 {
-block0:
-    v0 = vconst.f32x4 [NaN 0x42.0 -NaN NaN]
-    v1 = vconst.f32x4 [NaN NaN 0x42.0 Inf]
-    v2 = fcmp gt v0, v1
-    ; now check that the result v2 is all zeroes
-    v3 = vconst.i32x4 0x00
-    v4 = bitcast.i32x4 v2
-    v5 = icmp eq v3, v4
-    v8 = vall_true v5
-    return v8
-}
-; run: %fcmp_gt_nans_f32x4() == 1