diff --git a/build.rs b/build.rs
index 374bb83fa76c..b735f076b095 100644
--- a/build.rs
+++ b/build.rs
@@ -30,6 +30,12 @@ fn main() -> anyhow::Result<()> {
             test_directory_module(out, "tests/misc_testsuite/threads", strategy)?;
             test_directory_module(out, "tests/misc_testsuite/memory64", strategy)?;
             test_directory_module(out, "tests/misc_testsuite/component-model", strategy)?;
+
+            // NB: these are copied from upstream and updated to wasmtime's
+            // current version of `wast`. This local copy should go away when
+            // all of Wasmtime's tooling is updated and the upstream
+            // `testsuite` module is additionally updated.
+            test_directory_module(out, "tests/misc_testsuite/relaxed-simd", strategy)?;
             Ok(())
         })?;
 
@@ -64,6 +70,7 @@ fn main() -> anyhow::Result<()> {
     drop(Command::new("rustfmt").arg(&output).status());
     Ok(())
 }
+
 fn test_directory_module(
     out: &mut String,
     path: impl AsRef<Path>,
@@ -182,7 +189,9 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
         // Currently the simd wasm proposal is not implemented in the riscv64
         // backend so skip all tests which could use simd.
         "riscv64" => {
-            testsuite == "simd" || testname.contains("simd") || testname.contains("memory_multi")
+            testsuite.contains("simd")
+                || testname.contains("simd")
+                || testname.contains("memory_multi")
         }
 
         _ => false,
diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs
index 46fcff76f3b5..8692207c5481 100755
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -386,6 +386,27 @@ fn define_simd_lane_access(
         .operands_out(vec![a]),
     );
 
+    ig.push(
+        Inst::new(
+            "x86_pshufb",
+            r#"
+        A vector swizzle lookalike which has the semantics of `pshufb` on x64.
+
+        This instruction will permute the 8-bit lanes of `x` with the indices
+        specified in `y`. Each lane in the mask, `y`, uses the bottom four
+        bits for selecting the lane from `x` unless the most significant bit
+        is set, in which case the lane is zeroed. The output vector will have
+        the following contents when the element of `y` is in these ranges:
+
+        * `[0, 127]` -> `x[y[i] % 16]`
+        * `[128, 255]` -> 0
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
     let x = &Operand::new("x", TxN).with_doc("The vector to modify");
     let y = &Operand::new("y", &TxN.lane_of()).with_doc("New lane value");
     let Idx = &Operand::new("Idx", &imm.uimm8).with_doc("Lane index");
@@ -1436,7 +1457,7 @@ pub(crate) fn define(
         Conditional select of bits.
 
         For each bit in `c`, this instruction selects the corresponding bit from `x` if the bit
-        in `c` is 1 and the corresponding bit from `y` if the bit in `c` is 0. See also:
+        in `x` is 1 and the corresponding bit from `y` if the bit in `c` is 0. See also:
         `select`, `vselect`.
         "#,
             &formats.ternary,
@@ -1445,6 +1466,24 @@ pub(crate) fn define(
         .operands_out(vec![a]),
     );
 
+    ig.push(
+        Inst::new(
+            "x86_blendv",
+            r#"
+        A bitselect-lookalike instruction except with the semantics of
+        `blendv`-related instructions on x86.
+
+        This instruction will use the top bit of each lane in `c`, the condition
+        mask. If the bit is 1 then the corresponding lane from `x` is chosen.
+        Otherwise the corresponding lane from `y` is chosen.
+
+            "#,
+            &formats.ternary,
+        )
+        .operands_in(vec![c, x, y])
+        .operands_out(vec![a]),
+    );
+
     let c = &Operand::new("c", &TxN.as_bool()).with_doc("Controlling vector");
     let x = &Operand::new("x", TxN).with_doc("Value to use where `c` is true");
     let y = &Operand::new("y", TxN).with_doc("Value to use where `c` is false");
@@ -1698,6 +1737,22 @@ pub(crate) fn define(
         .operands_out(vec![qa]),
     );
 
+    ig.push(
+        Inst::new(
+            "x86_pmulhrsw",
+            r#"
+        A similar instruction to `sqmul_round_sat` except with the semantics
+        of x86's `pmulhrsw` instruction.
+
+        This is the same as `sqmul_round_sat` except when both input lanes are
+        `i16::MIN`.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![qx, qy])
+        .operands_out(vec![qa]),
+    );
+
     {
         // Integer division and remainder are scalar-only; most
         // hardware does not directly support vector integer division.
@@ -3135,6 +3190,36 @@ pub(crate) fn define(
         .operands_out(vec![a]),
     );
 
+    let I8x16 = &TypeVar::new(
+        "I8x16",
+        "A SIMD vector type consisting of 16 lanes of 8-bit integers",
+        TypeSetBuilder::new()
+            .ints(8..8)
+            .simd_lanes(16..16)
+            .includes_scalars(false)
+            .build(),
+    );
+    let x = &Operand::new("x", I8x16);
+    let y = &Operand::new("y", I8x16);
+    let a = &Operand::new("a", I16x8);
+
+    ig.push(
+        Inst::new(
+            "x86_pmaddubsw",
+            r#"
+        An instruction with equivalent semantics to `pmaddubsw` on x86.
+
+        This instruction will take signed bytes from the first argument and
+        multiply them against unsigned bytes in the second argument. Adjacent
+        pairs are then added, with saturating, to a 16-bit value and are packed
+        into the result.
+            "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
     let IntTo = &TypeVar::new(
         "IntTo",
         "A larger integer type with the same number of lanes",
@@ -3378,6 +3463,20 @@ pub(crate) fn define(
         .operands_out(vec![a]),
     );
 
+    ig.push(
+        Inst::new(
+            "x86_cvtt2dq",
+            r#"
+        A float-to-integer conversion instruction for vectors-of-floats which
+        has the same semantics as `cvttp{s,d}2dq` on x86. This specifically
+        returns `INT_MIN` for NaN or out-of-bounds lanes.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
     let Int = &TypeVar::new(
         "Int",
         "A scalar or vector integer type",
diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs
index afd32dd40af6..65c628b430f1 100644
--- a/cranelift/codegen/src/isa/aarch64/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/mod.rs
@@ -214,6 +214,10 @@ impl TargetIsa for AArch64Backend {
         cs.set_skipdata(true)?;
         Ok(cs)
     }
+
+    fn has_native_fma(&self) -> bool {
+        true
+    }
 }
 
 impl fmt::Display for AArch64Backend {
diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs
index eef8233b3603..539bdb431723 100644
--- a/cranelift/codegen/src/isa/mod.rs
+++ b/cranelift/codegen/src/isa/mod.rs
@@ -315,6 +315,13 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
     fn to_capstone(&self) -> Result<capstone::Capstone, capstone::Error> {
         Err(capstone::Error::UnsupportedArch)
     }
+
+    /// Returns whether this ISA has a native fused-multiply-and-add instruction
+    /// for floats.
+    ///
+    /// Currently this only returns false on x86 when some native features are
+    /// not detected.
+    fn has_native_fma(&self) -> bool;
 }
 
 /// Methods implemented for free for target ISA!
diff --git a/cranelift/codegen/src/isa/riscv64/mod.rs b/cranelift/codegen/src/isa/riscv64/mod.rs
index 53d90348172d..69711e0c8228 100644
--- a/cranelift/codegen/src/isa/riscv64/mod.rs
+++ b/cranelift/codegen/src/isa/riscv64/mod.rs
@@ -186,6 +186,10 @@ impl TargetIsa for Riscv64Backend {
         cs.set_skipdata(true)?;
         Ok(cs)
     }
+
+    fn has_native_fma(&self) -> bool {
+        true
+    }
 }
 
 impl fmt::Display for Riscv64Backend {
diff --git a/cranelift/codegen/src/isa/s390x/mod.rs b/cranelift/codegen/src/isa/s390x/mod.rs
index 6a6dad94c3ab..9ba81d14ac39 100644
--- a/cranelift/codegen/src/isa/s390x/mod.rs
+++ b/cranelift/codegen/src/isa/s390x/mod.rs
@@ -186,6 +186,10 @@ impl TargetIsa for S390xBackend {
 
         Ok(cs)
     }
+
+    fn has_native_fma(&self) -> bool {
+        true
+    }
 }
 
 impl fmt::Display for S390xBackend {
diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle
index ec21968b381b..f6abbec48df2 100644
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -1212,6 +1212,20 @@
 (decl pure vconst_all_ones_or_all_zeros () Constant)
 (extern extractor vconst_all_ones_or_all_zeros vconst_all_ones_or_all_zeros)
 
+;;;; Rules for `x86_blendv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $I8X16
+                       (x86_blendv condition if_true if_false)))
+      (x64_pblendvb if_false if_true condition))
+
+(rule (lower (has_type $I32X4
+                       (x86_blendv condition if_true if_false)))
+      (x64_blendvps if_false if_true condition))
+
+(rule (lower (has_type $I64X2
+                       (x86_blendv condition if_true if_false)))
+      (x64_blendvpd if_false if_true condition))
+
 ;;;; Rules for `vselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (has_type ty @ (multi_lane _bits _lanes)
@@ -2145,6 +2159,11 @@
 (rule (lower (debugtrap))
       (side_effect (x64_hlt)))
 
+;; Rules for `x86_pmaddubsw` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $I16X8 (x86_pmaddubsw x y)))
+      (x64_pmaddubsw y x))
+
 ;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (has_type $F32 (fadd x y)))
@@ -3169,6 +3188,11 @@
         ;; values greater than max signed int.
         (x64_paddd tmp1 dst)))
 
+;; Rules for `x86_cvtt2dq` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $I32X4 (x86_cvtt2dq val @ (value_type $F32X4))))
+      (x64_cvttps2dq val))
+
 ;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (has_type $I16X8 (iadd_pairwise x y)))
@@ -3304,6 +3328,12 @@
             (dst Xmm (x64_minpd a tmp1)))
         (x64_cvttpd2dq dst)))
 
+;; This rule is a special case for handling the translation of the wasm op
+;; `i32x4.relaxed_trunc_f64x2_s_zero`.
+(rule (lower (has_type $I32X4 (snarrow (has_type $I64X2 (x86_cvtt2dq val))
+                                       (vconst (u128_from_constant 0)))))
+        (x64_cvttpd2dq val))
+
 ;; Rules for `unarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (has_type $I8X16 (unarrow a @ (value_type $I16X8) b)))
@@ -3559,6 +3589,11 @@
       (let ((mask Xmm (x64_paddusb mask (swizzle_zero_mask))))
         (x64_pshufb src mask)))
 
+;; Rules for `x86_pshufb` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (x86_pshufb src mask))
+      (x64_pshufb src mask))
+
 ;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; Remove the extractlane instruction, leaving the float where it is. The upper
@@ -3736,7 +3771,12 @@
             (cmp Xmm (x64_pcmpeqw dst mask)))
         (x64_pxor dst cmp)))
 
-;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Rules for `x86_pmulhrsw` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (x86_pmulhrsw qx @ (value_type $I16X8) qy))
+      (x64_pmulhrsw qx qy))
+
+;; Rules for `uunarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; TODO: currently we only lower a special case of `uunarrow` needed to support
 ;; the translation of wasm's i32x4.trunc_sat_f64x2_u_zero operation.
diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs
index 7c085ad583b7..68f218fae44f 100644
--- a/cranelift/codegen/src/isa/x64/mod.rs
+++ b/cranelift/codegen/src/isa/x64/mod.rs
@@ -184,6 +184,10 @@ impl TargetIsa for X64Backend {
             .syntax(arch::x86::ArchSyntax::Att)
             .build()
     }
+
+    fn has_native_fma(&self) -> bool {
+        self.x64_flags.use_fma()
+    }
 }
 
 impl fmt::Display for X64Backend {
diff --git a/cranelift/filetests/filetests/wasm/aarch64-relaxed-simd.wat b/cranelift/filetests/filetests/wasm/aarch64-relaxed-simd.wat
new file mode 100644
index 000000000000..b1a0bcd4cb87
--- /dev/null
+++ b/cranelift/filetests/filetests/wasm/aarch64-relaxed-simd.wat
@@ -0,0 +1,87 @@
+;;! target = "aarch64"
+;;! compile = true
+
+(module
+  (func (param v128) (result v128)
+    local.get 0
+    i32x4.relaxed_trunc_f32x4_s
+  )
+
+  (func (param v128) (result v128)
+    local.get 0
+    i32x4.relaxed_trunc_f32x4_u
+  )
+
+  (func (param v128) (result v128)
+    local.get 0
+    i32x4.relaxed_trunc_f64x2_s_zero
+  )
+
+  (func (param v128) (result v128)
+    local.get 0
+    i32x4.relaxed_trunc_f64x2_u_zero
+  )
+
+  (func (param v128 v128) (result v128)
+    local.get 0
+    local.get 1
+    i16x8.relaxed_dot_i8x16_i7x16_s
+  )
+
+  (func (param v128 v128 v128) (result v128)
+    local.get 0
+    local.get 1
+    local.get 2
+    i32x4.relaxed_dot_i8x16_i7x16_add_s
+  )
+)
+
+;; function u0:0:
+;; block0:
+;;   fcvtzs v0.4s, v0.4s
+;;   b label1
+;; block1:
+;;   ret
+;;
+;; function u0:1:
+;; block0:
+;;   fcvtzu v0.4s, v0.4s
+;;   b label1
+;; block1:
+;;   ret
+;;
+;; function u0:2:
+;; block0:
+;;   fcvtzs v4.2d, v0.2d
+;;   sqxtn v0.2s, v4.2d
+;;   b label1
+;; block1:
+;;   ret
+;;
+;; function u0:3:
+;; block0:
+;;   fcvtzu v4.2d, v0.2d
+;;   uqxtn v0.2s, v4.2d
+;;   b label1
+;; block1:
+;;   ret
+;;
+;; function u0:4:
+;; block0:
+;;   smull v6.8h, v0.8b, v1.8b
+;;   smull2 v7.8h, v0.16b, v1.16b
+;;   addp v0.8h, v6.8h, v7.8h
+;;   b label1
+;; block1:
+;;   ret
+;;
+;; function u0:5:
+;; block0:
+;;   smull v17.8h, v0.8b, v1.8b
+;;   smull2 v18.8h, v0.16b, v1.16b
+;;   addp v17.8h, v17.8h, v18.8h
+;;   saddlp v17.4s, v17.8h
+;;   add v0.4s, v17.4s, v2.4s
+;;   b label1
+;; block1:
+;;   ret
diff --git a/cranelift/filetests/filetests/wasm/x64-relaxed-simd-deterministic.wat b/cranelift/filetests/filetests/wasm/x64-relaxed-simd-deterministic.wat
new file mode 100644
index 000000000000..f3ae7c7358d5
--- /dev/null
+++ b/cranelift/filetests/filetests/wasm/x64-relaxed-simd-deterministic.wat
@@ -0,0 +1,161 @@
+;;! target = "x86_64"
+;;! compile = true
+;;! relaxed_simd_deterministic = true
+;;! settings = ["has_avx=true"]
+
+(module
+  (func (param v128) (result v128)
+    local.get 0
+    i32x4.relaxed_trunc_f32x4_s
+  )
+
+  (func (param v128) (result v128)
+    local.get 0
+    i32x4.relaxed_trunc_f32x4_u
+  )
+
+  (func (param v128) (result v128)
+    local.get 0
+    i32x4.relaxed_trunc_f64x2_s_zero
+  )
+
+  (func (param v128) (result v128)
+    local.get 0
+    i32x4.relaxed_trunc_f64x2_u_zero
+  )
+
+  (func (param v128 v128) (result v128)
+    local.get 0
+    local.get 1
+    i16x8.relaxed_dot_i8x16_i7x16_s
+  )
+
+  (func (param v128 v128 v128) (result v128)
+    local.get 0
+    local.get 1
+    local.get 2
+    i32x4.relaxed_dot_i8x16_i7x16_add_s
+  )
+)
+
+;; function u0:0:
+;;   pushq   %rbp
+;;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;;   movq    %rsp, %rbp
+;;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+;; block0:
+;;   vcmpps  $0 %xmm0, %xmm0, %xmm3
+;;   vandps  %xmm0, %xmm3, %xmm5
+;;   vpxor   %xmm3, %xmm5, %xmm7
+;;   vcvttps2dq %xmm5, %xmm9
+;;   vpand   %xmm9, %xmm7, %xmm11
+;;   vpsrad  %xmm11, $31, %xmm13
+;;   vpxor   %xmm13, %xmm9, %xmm0
+;;   jmp     label1
+;; block1:
+;;   movq    %rbp, %rsp
+;;   popq    %rbp
+;;   ret
+;;
+;; function u0:1:
+;;   pushq   %rbp
+;;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;;   movq    %rsp, %rbp
+;;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+;; block0:
+;;   xorps   %xmm3, %xmm3, %xmm3
+;;   vmaxps  %xmm0, %xmm3, %xmm5
+;;   vpcmpeqd %xmm3, %xmm3, %xmm7
+;;   vpsrld  %xmm7, $1, %xmm9
+;;   vcvtdq2ps %xmm9, %xmm11
+;;   vcvttps2dq %xmm5, %xmm13
+;;   vsubps  %xmm5, %xmm11, %xmm15
+;;   vcmpps  $2 %xmm11, %xmm15, %xmm1
+;;   vcvttps2dq %xmm15, %xmm3
+;;   vpxor   %xmm3, %xmm1, %xmm5
+;;   pxor    %xmm7, %xmm7, %xmm7
+;;   vpmaxsd %xmm5, %xmm7, %xmm9
+;;   vpaddd  %xmm9, %xmm13, %xmm0
+;;   jmp     label1
+;; block1:
+;;   movq    %rbp, %rsp
+;;   popq    %rbp
+;;   ret
+;;
+;; function u0:2:
+;;   pushq   %rbp
+;;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;;   movq    %rsp, %rbp
+;;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+;; block0:
+;;   vcmppd  $0 %xmm0, %xmm0, %xmm3
+;;   vandps  %xmm3, const(0), %xmm5
+;;   vminpd  %xmm0, %xmm5, %xmm7
+;;   vcvttpd2dq %xmm7, %xmm0
+;;   jmp     label1
+;; block1:
+;;   movq    %rbp, %rsp
+;;   popq    %rbp
+;;   ret
+;;
+;; function u0:3:
+;;   pushq   %rbp
+;;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;;   movq    %rsp, %rbp
+;;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+;; block0:
+;;   xorpd   %xmm3, %xmm3, %xmm3
+;;   vmaxpd  %xmm0, %xmm3, %xmm5
+;;   vminpd  %xmm5, const(0), %xmm7
+;;   vroundpd $3, %xmm7, %xmm9
+;;   vaddpd  %xmm9, const(1), %xmm11
+;;   vshufps $136 %xmm11, %xmm3, %xmm0
+;;   jmp     label1
+;; block1:
+;;   movq    %rbp, %rsp
+;;   popq    %rbp
+;;   ret
+;;
+;; function u0:4:
+;;   pushq   %rbp
+;;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;;   movq    %rsp, %rbp
+;;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+;; block0:
+;;   vpmovsxbw %xmm0, %xmm10
+;;   vpmovsxbw %xmm1, %xmm12
+;;   vpmullw %xmm10, %xmm12, %xmm14
+;;   vpalignr $8 %xmm0, %xmm0, %xmm8
+;;   vpmovsxbw %xmm8, %xmm10
+;;   vpalignr $8 %xmm1, %xmm1, %xmm12
+;;   vpmovsxbw %xmm12, %xmm15
+;;   vpmullw %xmm10, %xmm15, %xmm0
+;;   vphaddw %xmm14, %xmm0, %xmm0
+;;   jmp     label1
+;; block1:
+;;   movq    %rbp, %rsp
+;;   popq    %rbp
+;;   ret
+;;
+;; function u0:5:
+;;   pushq   %rbp
+;;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;;   movq    %rsp, %rbp
+;;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+;; block0:
+;;   vpmovsxbw %xmm0, %xmm13
+;;   vpmovsxbw %xmm1, %xmm15
+;;   vpmullw %xmm13, %xmm15, %xmm3
+;;   vpalignr $8 %xmm0, %xmm0, %xmm11
+;;   vpmovsxbw %xmm11, %xmm13
+;;   vpalignr $8 %xmm1, %xmm1, %xmm15
+;;   vpmovsxbw %xmm15, %xmm1
+;;   vpmullw %xmm13, %xmm1, %xmm4
+;;   vphaddw %xmm3, %xmm4, %xmm15
+;;   vpmaddwd %xmm15, const(0), %xmm15
+;;   vpaddd  %xmm15, %xmm2, %xmm0
+;;   jmp     label1
+;; block1:
+;;   movq    %rbp, %rsp
+;;   popq    %rbp
+;;   ret
diff --git a/cranelift/filetests/filetests/wasm/x64-relaxed-simd.wat b/cranelift/filetests/filetests/wasm/x64-relaxed-simd.wat
new file mode 100644
index 000000000000..43586fcb2c47
--- /dev/null
+++ b/cranelift/filetests/filetests/wasm/x64-relaxed-simd.wat
@@ -0,0 +1,140 @@
+;;! target = "x86_64"
+;;! compile = true
+
+(module
+  (func (param v128) (result v128)
+    local.get 0
+    i32x4.relaxed_trunc_f32x4_s
+  )
+
+  (func (param v128) (result v128)
+    local.get 0
+    i32x4.relaxed_trunc_f32x4_u
+  )
+
+  (func (param v128) (result v128)
+    local.get 0
+    i32x4.relaxed_trunc_f64x2_s_zero
+  )
+
+  (func (param v128) (result v128)
+    local.get 0
+    i32x4.relaxed_trunc_f64x2_u_zero
+  )
+
+  (func (param v128 v128) (result v128)
+    local.get 0
+    local.get 1
+    i16x8.relaxed_dot_i8x16_i7x16_s
+  )
+
+  (func (param v128 v128 v128) (result v128)
+    local.get 0
+    local.get 1
+    local.get 2
+    i32x4.relaxed_dot_i8x16_i7x16_add_s
+  )
+)
+
+;; function u0:0:
+;;   pushq   %rbp
+;;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;;   movq    %rsp, %rbp
+;;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+;; block0:
+;;   cvttps2dq %xmm0, %xmm0
+;;   jmp     label1
+;; block1:
+;;   movq    %rbp, %rsp
+;;   popq    %rbp
+;;   ret
+;;
+;; function u0:1:
+;;   pushq   %rbp
+;;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;;   movq    %rsp, %rbp
+;;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+;; block0:
+;;   xorps   %xmm6, %xmm6, %xmm6
+;;   movdqa  %xmm0, %xmm10
+;;   maxps   %xmm10, %xmm6, %xmm10
+;;   pcmpeqd %xmm6, %xmm6, %xmm6
+;;   psrld   %xmm6, $1, %xmm6
+;;   cvtdq2ps %xmm6, %xmm14
+;;   cvttps2dq %xmm10, %xmm13
+;;   subps   %xmm10, %xmm14, %xmm10
+;;   cmpps   $2, %xmm14, %xmm10, %xmm14
+;;   cvttps2dq %xmm10, %xmm0
+;;   pxor    %xmm0, %xmm14, %xmm0
+;;   pxor    %xmm7, %xmm7, %xmm7
+;;   pmaxsd  %xmm0, %xmm7, %xmm0
+;;   paddd   %xmm0, %xmm13, %xmm0
+;;   jmp     label1
+;; block1:
+;;   movq    %rbp, %rsp
+;;   popq    %rbp
+;;   ret
+;;
+;; function u0:2:
+;;   pushq   %rbp
+;;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;;   movq    %rsp, %rbp
+;;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+;; block0:
+;;   cvttpd2dq %xmm0, %xmm0
+;;   jmp     label1
+;; block1:
+;;   movq    %rbp, %rsp
+;;   popq    %rbp
+;;   ret
+;;
+;; function u0:3:
+;;   pushq   %rbp
+;;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;;   movq    %rsp, %rbp
+;;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+;; block0:
+;;   xorpd   %xmm3, %xmm3, %xmm3
+;;   movdqa  %xmm0, %xmm6
+;;   maxpd   %xmm6, %xmm3, %xmm6
+;;   minpd   %xmm6, const(0), %xmm6
+;;   roundpd $3, %xmm6, %xmm0
+;;   addpd   %xmm0, const(1), %xmm0
+;;   shufps  $136, %xmm0, %xmm3, %xmm0
+;;   jmp     label1
+;; block1:
+;;   movq    %rbp, %rsp
+;;   popq    %rbp
+;;   ret
+;;
+;; function u0:4:
+;;   pushq   %rbp
+;;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;;   movq    %rsp, %rbp
+;;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+;; block0:
+;;   movdqa  %xmm1, %xmm4
+;;   pmaddubsw %xmm4, %xmm0, %xmm4
+;;   movdqa  %xmm4, %xmm0
+;;   jmp     label1
+;; block1:
+;;   movq    %rbp, %rsp
+;;   popq    %rbp
+;;   ret
+;;
+;; function u0:5:
+;;   pushq   %rbp
+;;   unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
+;;   movq    %rsp, %rbp
+;;   unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 }
+;; block0:
+;;   movdqa  %xmm0, %xmm8
+;;   movdqa  %xmm1, %xmm0
+;;   pmaddubsw %xmm0, %xmm8, %xmm0
+;;   pmaddwd %xmm0, const(0), %xmm0
+;;   paddd   %xmm0, %xmm2, %xmm0
+;;   jmp     label1
+;; block1:
+;;   movq    %rbp, %rsp
+;;   popq    %rbp
+;;   ret
diff --git a/cranelift/filetests/src/test_wasm/config.rs b/cranelift/filetests/src/test_wasm/config.rs
index 4b8ad4ad5236..6ba4f3c66a30 100644
--- a/cranelift/filetests/src/test_wasm/config.rs
+++ b/cranelift/filetests/src/test_wasm/config.rs
@@ -29,6 +29,9 @@ pub struct TestConfig {
 
     #[serde(default)]
     pub heaps: Vec<TestHeap>,
+
+    #[serde(default)]
+    pub relaxed_simd_deterministic: bool,
 }
 
 impl TestConfig {
diff --git a/cranelift/filetests/src/test_wasm/env.rs b/cranelift/filetests/src/test_wasm/env.rs
index 5d363aa412ab..8a0ea656b9a4 100644
--- a/cranelift/filetests/src/test_wasm/env.rs
+++ b/cranelift/filetests/src/test_wasm/env.rs
@@ -82,6 +82,7 @@ impl<'data> ModuleEnvironment<'data> for ModuleEnv {
         wasmparser::WasmFeatures {
             memory64: true,
             multi_memory: true,
+            relaxed_simd: true,
             ..self.inner.wasm_features()
         }
     }
@@ -613,4 +614,12 @@ impl<'a> FuncEnvironment for FuncEnv<'a> {
     {
         self.inner.heaps()
     }
+
+    fn relaxed_simd_deterministic(&self) -> bool {
+        self.config.relaxed_simd_deterministic
+    }
+
+    fn is_x86(&self) -> bool {
+        self.config.target.contains("x86_64")
+    }
 }
diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs
index 155e5dd7d1d9..56a894fd6275 100644
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -1358,6 +1358,11 @@ where
         Opcode::GetFramePointer => unimplemented!("GetFramePointer"),
         Opcode::GetStackPointer => unimplemented!("GetStackPointer"),
         Opcode::GetReturnAddress => unimplemented!("GetReturnAddress"),
+        Opcode::X86Pshufb => unimplemented!("X86Pshufb"),
+        Opcode::X86Blendv => unimplemented!("X86Blendv"),
+        Opcode::X86Pmulhrsw => unimplemented!("X86Pmulhrsw"),
+        Opcode::X86Pmaddubsw => unimplemented!("X86Pmaddubsw"),
+        Opcode::X86Cvtt2dq => unimplemented!("X86Cvtt2dq"),
     })
 }
 
diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs
index cb7b29fffb41..3f506374b622 100644
--- a/cranelift/wasm/src/code_translator.rs
+++ b/cranelift/wasm/src/code_translator.rs
@@ -1778,13 +1778,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
             state.push1(builder.ins().sshr(bitcast_a, b))
         }
         Operator::V128Bitselect => {
-            let (a, b, c) = state.pop3();
-            let bitcast_a = optionally_bitcast_vector(a, I8X16, builder);
-            let bitcast_b = optionally_bitcast_vector(b, I8X16, builder);
-            let bitcast_c = optionally_bitcast_vector(c, I8X16, builder);
+            let (a, b, c) = pop3_with_bitcast(state, I8X16, builder);
             // The CLIF operand ordering is slightly different and the types of all three
             // operands must match (hence the bitcast).
-            state.push1(builder.ins().bitselect(bitcast_c, bitcast_a, bitcast_b))
+            state.push1(builder.ins().bitselect(c, a, b))
         }
         Operator::V128AnyTrue => {
             let a = pop1_with_bitcast(state, type_of(op), builder);
@@ -1938,11 +1935,23 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
 
             state.push1(builder.ins().snarrow(converted_a, zero));
         }
-        Operator::I32x4TruncSatF32x4U => {
+
+        // FIXME(#5913): the relaxed instructions here are translated the same
+        // as the saturating instructions, even when the code generator
+        // configuration allow for different semantics across hosts. On x86,
+        // however, it's theoretically possible to have a slightly more optimal
+        // lowering which accounts for NaN differently, although the lowering is
+        // still not trivial (e.g. one instruction). At this time the
+        // more-optimal-but-still-large lowering for x86 is not implemented so
+        // the relaxed instructions are listed here instead of down below with
+        // the other relaxed instructions. An x86-specific implementation (or
+        // perhaps for other backends too) should be added and the codegen for
+        // the relaxed instruction should conditionally be different.
+        Operator::I32x4RelaxedTruncF32x4U | Operator::I32x4TruncSatF32x4U => {
             let a = pop1_with_bitcast(state, F32X4, builder);
             state.push1(builder.ins().fcvt_to_uint_sat(I32X4, a))
         }
-        Operator::I32x4TruncSatF64x2UZero => {
+        Operator::I32x4RelaxedTruncF64x2UZero | Operator::I32x4TruncSatF64x2UZero => {
             let a = pop1_with_bitcast(state, F64X2, builder);
             let converted_a = builder.ins().fcvt_to_uint_sat(I64X2, a);
             let handle = builder.func.dfg.constants.insert(vec![0u8; 16].into());
@@ -1950,6 +1959,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
 
             state.push1(builder.ins().uunarrow(converted_a, zero));
         }
+
         Operator::I8x16NarrowI16x8S => {
             let (a, b) = pop2_with_bitcast(state, I16X8, builder);
             state.push1(builder.ins().snarrow(a, b))
@@ -2156,27 +2166,175 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
                 op
             ));
         }
-        Operator::I8x16RelaxedSwizzle
-        | Operator::I32x4RelaxedTruncF32x4S
-        | Operator::I32x4RelaxedTruncF32x4U
-        | Operator::I32x4RelaxedTruncF64x2SZero
-        | Operator::I32x4RelaxedTruncF64x2UZero
-        | Operator::F32x4RelaxedMadd
-        | Operator::F32x4RelaxedNmadd
-        | Operator::F64x2RelaxedMadd
-        | Operator::F64x2RelaxedNmadd
-        | Operator::I8x16RelaxedLaneselect
+
+        Operator::F32x4RelaxedMax | Operator::F64x2RelaxedMax => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(
+                if environ.relaxed_simd_deterministic() || !environ.is_x86() {
+                    // Deterministic semantics match the `fmax` instruction, or
+                    // the `fAAxBB.max` wasm instruction.
+                    builder.ins().fmax(a, b)
+                } else {
+                    builder.ins().fmax_pseudo(a, b)
+                },
+            )
+        }
+
+        Operator::F32x4RelaxedMin | Operator::F64x2RelaxedMin => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(
+                if environ.relaxed_simd_deterministic() || !environ.is_x86() {
+                    // Deterministic semantics match the `fmin` instruction, or
+                    // the `fAAxBB.min` wasm instruction.
+                    builder.ins().fmin(a, b)
+                } else {
+                    builder.ins().fmin_pseudo(a, b)
+                },
+            );
+        }
+
+        Operator::I8x16RelaxedSwizzle => {
+            let (a, b) = pop2_with_bitcast(state, I8X16, builder);
+            state.push1(
+                if environ.relaxed_simd_deterministic() || !environ.is_x86() {
+                    // Deterministic semantics match the `i8x16.swizzle`
+                    // instruction which is the CLIF `swizzle`.
+                    builder.ins().swizzle(a, b)
+                } else {
+                    builder.ins().x86_pshufb(a, b)
+                },
+            );
+        }
+
+        Operator::F32x4RelaxedMadd | Operator::F64x2RelaxedMadd => {
+            let (a, b, c) = pop3_with_bitcast(state, type_of(op), builder);
+            state.push1(
+                if environ.relaxed_simd_deterministic() || environ.has_native_fma() {
+                    // Deterministic semantics are "fused multiply and add"
+                    // which the CLIF `fma` guarantees.
+                    builder.ins().fma(a, b, c)
+                } else {
+                    let mul = builder.ins().fmul(a, b);
+                    builder.ins().fadd(mul, c)
+                },
+            );
+        }
+        Operator::F32x4RelaxedNmadd | Operator::F64x2RelaxedNmadd => {
+            let (a, b, c) = pop3_with_bitcast(state, type_of(op), builder);
+            let a = builder.ins().fneg(a);
+            state.push1(
+                if environ.relaxed_simd_deterministic() || environ.has_native_fma() {
+                    // Deterministic semantics are "fused multiply and add"
+                    // which the CLIF `fma` guarantees.
+                    builder.ins().fma(a, b, c)
+                } else {
+                    let mul = builder.ins().fmul(a, b);
+                    builder.ins().fadd(mul, c)
+                },
+            );
+        }
+
+        Operator::I8x16RelaxedLaneselect
         | Operator::I16x8RelaxedLaneselect
         | Operator::I32x4RelaxedLaneselect
-        | Operator::I64x2RelaxedLaneselect
-        | Operator::F32x4RelaxedMin
-        | Operator::F32x4RelaxedMax
-        | Operator::F64x2RelaxedMin
-        | Operator::F64x2RelaxedMax
-        | Operator::I16x8RelaxedQ15mulrS
-        | Operator::I16x8RelaxedDotI8x16I7x16S
-        | Operator::I32x4RelaxedDotI8x16I7x16AddS => {
-            return Err(wasm_unsupported!("proposed relaxed-simd operator {:?}", op));
+        | Operator::I64x2RelaxedLaneselect => {
+            let ty = type_of(op);
+            let (a, b, c) = pop3_with_bitcast(state, ty, builder);
+            // Note that the variable swaps here are intentional due to
+            // the difference of the order of the wasm op and the clif
+            // op.
+            //
+            // Additionally note that even on x86 the I16X8 type uses the
+            // `bitselect` instruction since x86 has no corresponding
+            // `blendv`-style instruction for 16-bit operands.
+            state.push1(
+                if environ.relaxed_simd_deterministic() || !environ.is_x86() || ty == I16X8 {
+                    // Deterministic semantics are a `bitselect` along the lines
+                    // of the wasm `v128.bitselect` instruction.
+                    builder.ins().bitselect(c, a, b)
+                } else {
+                    builder.ins().x86_blendv(c, a, b)
+                },
+            );
+        }
+
+        Operator::I32x4RelaxedTruncF32x4S => {
+            let a = pop1_with_bitcast(state, F32X4, builder);
+            state.push1(
+                if environ.relaxed_simd_deterministic() || !environ.is_x86() {
+                    // Deterministic semantics are to match the
+                    // `i32x4.trunc_sat_f32x4_s` instruction.
+                    builder.ins().fcvt_to_sint_sat(I32X4, a)
+                } else {
+                    builder.ins().x86_cvtt2dq(I32X4, a)
+                },
+            )
+        }
+        Operator::I32x4RelaxedTruncF64x2SZero => {
+            let a = pop1_with_bitcast(state, F64X2, builder);
+            let converted_a = if environ.relaxed_simd_deterministic() || !environ.is_x86() {
+                // Deterministic semantics are to match the
+                // `i32x4.trunc_sat_f64x2_s_zero` instruction.
+                builder.ins().fcvt_to_sint_sat(I64X2, a)
+            } else {
+                builder.ins().x86_cvtt2dq(I64X2, a)
+            };
+            let handle = builder.func.dfg.constants.insert(vec![0u8; 16].into());
+            let zero = builder.ins().vconst(I64X2, handle);
+
+            state.push1(builder.ins().snarrow(converted_a, zero));
+        }
+        Operator::I16x8RelaxedQ15mulrS => {
+            let (a, b) = pop2_with_bitcast(state, I16X8, builder);
+            state.push1(
+                if environ.relaxed_simd_deterministic() || !environ.is_x86() {
+                    // Deterministic semantics are to match the
+                    // `i16x8.q15mulr_sat_s` instruction.
+                    builder.ins().sqmul_round_sat(a, b)
+                } else {
+                    builder.ins().x86_pmulhrsw(a, b)
+                },
+            );
+        }
+        Operator::I16x8RelaxedDotI8x16I7x16S => {
+            let (a, b) = pop2_with_bitcast(state, I8X16, builder);
+            state.push1(
+                if environ.relaxed_simd_deterministic() || !environ.is_x86() {
+                    // Deterministic semantics are to treat both operands as
+                    // signed integers and perform the dot product.
+                    let alo = builder.ins().swiden_low(a);
+                    let blo = builder.ins().swiden_low(b);
+                    let lo = builder.ins().imul(alo, blo);
+                    let ahi = builder.ins().swiden_high(a);
+                    let bhi = builder.ins().swiden_high(b);
+                    let hi = builder.ins().imul(ahi, bhi);
+                    builder.ins().iadd_pairwise(lo, hi)
+                } else {
+                    builder.ins().x86_pmaddubsw(a, b)
+                },
+            );
+        }
+
+        Operator::I32x4RelaxedDotI8x16I7x16AddS => {
+            let c = pop1_with_bitcast(state, I32X4, builder);
+            let (a, b) = pop2_with_bitcast(state, I8X16, builder);
+            let dot = if environ.relaxed_simd_deterministic() || !environ.is_x86() {
+                // Deterministic semantics are to treat both operands as
+                // signed integers and perform the dot product.
+                let alo = builder.ins().swiden_low(a);
+                let blo = builder.ins().swiden_low(b);
+                let lo = builder.ins().imul(alo, blo);
+                let ahi = builder.ins().swiden_high(a);
+                let bhi = builder.ins().swiden_high(b);
+                let hi = builder.ins().imul(ahi, bhi);
+                builder.ins().iadd_pairwise(lo, hi)
+            } else {
+                builder.ins().x86_pmaddubsw(a, b)
+            };
+            let dotlo = builder.ins().swiden_low(dot);
+            let dothi = builder.ins().swiden_high(dot);
+            let dot32 = builder.ins().iadd_pairwise(dotlo, dothi);
+            state.push1(builder.ins().iadd(dot32, c));
         }
 
         Operator::CallRef { .. }
@@ -2945,7 +3103,8 @@ fn type_of(operator: &Operator) -> Type {
         | Operator::I8x16MaxU
         | Operator::I8x16AvgrU
         | Operator::I8x16Bitmask
-        | Operator::I8x16Popcnt => I8X16,
+        | Operator::I8x16Popcnt
+        | Operator::I8x16RelaxedLaneselect => I8X16,
 
         Operator::I16x8Splat
         | Operator::V128Load16Splat { .. }
@@ -2982,7 +3141,8 @@ fn type_of(operator: &Operator) -> Type {
         | Operator::I16x8MaxU
         | Operator::I16x8AvgrU
         | Operator::I16x8Mul
-        | Operator::I16x8Bitmask => I16X8,
+        | Operator::I16x8Bitmask
+        | Operator::I16x8RelaxedLaneselect => I16X8,
 
         Operator::I32x4Splat
         | Operator::V128Load32Splat { .. }
@@ -3016,6 +3176,7 @@ fn type_of(operator: &Operator) -> Type {
         | Operator::I32x4Bitmask
         | Operator::I32x4TruncSatF32x4S
         | Operator::I32x4TruncSatF32x4U
+        | Operator::I32x4RelaxedLaneselect
         | Operator::V128Load32Zero { .. } => I32X4,
 
         Operator::I64x2Splat
@@ -3040,6 +3201,7 @@ fn type_of(operator: &Operator) -> Type {
         | Operator::I64x2Sub
         | Operator::I64x2Mul
         | Operator::I64x2Bitmask
+        | Operator::I64x2RelaxedLaneselect
         | Operator::V128Load64Zero { .. } => I64X2,
 
         Operator::F32x4Splat
@@ -3067,7 +3229,11 @@ fn type_of(operator: &Operator) -> Type {
         | Operator::F32x4Ceil
         | Operator::F32x4Floor
         | Operator::F32x4Trunc
-        | Operator::F32x4Nearest => F32X4,
+        | Operator::F32x4Nearest
+        | Operator::F32x4RelaxedMax
+        | Operator::F32x4RelaxedMin
+        | Operator::F32x4RelaxedMadd
+        | Operator::F32x4RelaxedNmadd => F32X4,
 
         Operator::F64x2Splat
         | Operator::F64x2ExtractLane { .. }
@@ -3092,7 +3258,11 @@ fn type_of(operator: &Operator) -> Type {
         | Operator::F64x2Ceil
         | Operator::F64x2Floor
         | Operator::F64x2Trunc
-        | Operator::F64x2Nearest => F64X2,
+        | Operator::F64x2Nearest
+        | Operator::F64x2RelaxedMax
+        | Operator::F64x2RelaxedMin
+        | Operator::F64x2RelaxedMadd
+        | Operator::F64x2RelaxedNmadd => F64X2,
 
         _ => unimplemented!(
             "Currently only SIMD instructions are mapped to their return type; the \
@@ -3219,6 +3389,18 @@ fn pop2_with_bitcast(
     (bitcast_a, bitcast_b)
 }
 
+fn pop3_with_bitcast(
+    state: &mut FuncTranslationState,
+    needed_type: Type,
+    builder: &mut FunctionBuilder,
+) -> (Value, Value, Value) {
+    let (a, b, c) = state.pop3();
+    let bitcast_a = optionally_bitcast_vector(a, needed_type, builder);
+    let bitcast_b = optionally_bitcast_vector(b, needed_type, builder);
+    let bitcast_c = optionally_bitcast_vector(c, needed_type, builder);
+    (bitcast_a, bitcast_b, bitcast_c)
+}
+
 fn bitcast_arguments<'a>(
     builder: &FunctionBuilder,
     arguments: &'a mut [Value],
diff --git a/cranelift/wasm/src/environ/spec.rs b/cranelift/wasm/src/environ/spec.rs
index 1b64ec811a62..03121c8c9cfc 100644
--- a/cranelift/wasm/src/environ/spec.rs
+++ b/cranelift/wasm/src/environ/spec.rs
@@ -525,6 +525,27 @@ pub trait FuncEnvironment: TargetEnvironment {
     /// Returns the target ISA's condition to check for unsigned addition
     /// overflowing.
     fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC;
+
+    /// Whether or not to force relaxed simd instructions to have deterministic
+    /// lowerings meaning they will produce the same results across all hosts,
+    /// regardless of the cost to performance.
+    fn relaxed_simd_deterministic(&self) -> bool {
+        true
+    }
+
+    /// Whether or not the target being translated for has a native fma
+    /// instruction. If it does not then when relaxed simd isn't deterministic
+    /// the translation of the `f32x4.relaxed_fma` instruction, for example,
+    /// will do a multiplication and then an add instead of the fused version.
+    fn has_native_fma(&self) -> bool {
+        false
+    }
+
+    /// Returns whether this is an x86 target, which may alter lowerings of
+    /// relaxed simd instructions.
+    fn is_x86(&self) -> bool {
+        false
+    }
 }
 
 /// An object satisfying the `ModuleEnvironment` trait can be passed as argument to the
diff --git a/crates/cli-flags/src/lib.rs b/crates/cli-flags/src/lib.rs
index 7e40bff3aa8d..3488096acefd 100644
--- a/crates/cli-flags/src/lib.rs
+++ b/crates/cli-flags/src/lib.rs
@@ -35,6 +35,10 @@ pub const SUPPORTED_WASM_FEATURES: &[(&str, &str)] = &[
     ("multi-value", "enables support for multi-value functions"),
     ("reference-types", "enables support for reference types"),
     ("simd", "enables support for proposed SIMD instructions"),
+    (
+        "relaxed-simd",
+        "enables support for the relaxed simd proposal",
+    ),
     ("threads", "enables support for WebAssembly threads"),
     ("memory64", "enables support for 64-bit memories"),
     #[cfg(feature = "component-model")]
@@ -235,6 +239,17 @@ pub struct CommonOptions {
     /// stack overflow is reported.
     #[clap(long)]
     pub max_wasm_stack: Option<usize>,
+
+    /// Whether or not to force deterministic and host-independent behavior of
+    /// the relaxed-simd instructions.
+    ///
+    /// By default these instructions may have architecture-specific behavior as
+    /// allowed by the specification, but this can be used to force the behavior
+    /// of these instructions to match the deterministic behavior classified in
+    /// the specification. Note that enabling this option may come at a
+    /// performance cost.
+    #[clap(long)]
+    pub relaxed_simd_deterministic: bool,
 }
 
 impl CommonOptions {
@@ -329,12 +344,15 @@ impl CommonOptions {
             config.max_wasm_stack(max);
         }
 
+        config.relaxed_simd_deterministic(self.relaxed_simd_deterministic);
+
         Ok(config)
     }
 
     pub fn enable_wasm_features(&self, config: &mut Config) {
         let WasmFeatures {
             simd,
+            relaxed_simd,
             bulk_memory,
             reference_types,
             multi_value,
@@ -348,6 +366,9 @@ impl CommonOptions {
         if let Some(enable) = simd {
             config.wasm_simd(enable);
         }
+        if let Some(enable) = relaxed_simd {
+            config.wasm_relaxed_simd(enable);
+        }
         if let Some(enable) = bulk_memory {
             config.wasm_bulk_memory(enable);
         }
@@ -400,6 +421,7 @@ pub struct WasmFeatures {
     pub multi_value: Option<bool>,
     pub bulk_memory: Option<bool>,
     pub simd: Option<bool>,
+    pub relaxed_simd: Option<bool>,
     pub threads: Option<bool>,
     pub multi_memory: Option<bool>,
     pub memory64: Option<bool>,
@@ -450,6 +472,7 @@ fn parse_wasm_features(features: &str) -> Result<WasmFeatures> {
         multi_value: all.or(values["multi-value"]),
         bulk_memory: all.or(values["bulk-memory"]),
         simd: all.or(values["simd"]),
+        relaxed_simd: all.or(values["relaxed-simd"]),
         threads: all.or(values["threads"]),
         multi_memory: all.or(values["multi-memory"]),
         memory64: all.or(values["memory64"]),
@@ -560,6 +583,7 @@ mod test {
             multi_value,
             bulk_memory,
             simd,
+            relaxed_simd,
             threads,
             multi_memory,
             memory64,
@@ -572,6 +596,7 @@ mod test {
         assert_eq!(threads, Some(true));
         assert_eq!(multi_memory, Some(true));
         assert_eq!(memory64, Some(true));
+        assert_eq!(relaxed_simd, Some(true));
 
         Ok(())
     }
@@ -585,6 +610,7 @@ mod test {
             multi_value,
             bulk_memory,
             simd,
+            relaxed_simd,
             threads,
             multi_memory,
             memory64,
@@ -597,6 +623,7 @@ mod test {
         assert_eq!(threads, Some(false));
         assert_eq!(multi_memory, Some(false));
         assert_eq!(memory64, Some(false));
+        assert_eq!(relaxed_simd, Some(false));
 
         Ok(())
     }
@@ -613,6 +640,7 @@ mod test {
             multi_value,
             bulk_memory,
             simd,
+            relaxed_simd,
             threads,
             multi_memory,
             memory64,
@@ -625,6 +653,7 @@ mod test {
         assert_eq!(threads, None);
         assert_eq!(multi_memory, Some(true));
         assert_eq!(memory64, Some(true));
+        assert_eq!(relaxed_simd, None);
 
         Ok(())
     }
@@ -662,6 +691,7 @@ mod test {
     feature_test!(test_multi_value_feature, multi_value, "multi-value");
     feature_test!(test_bulk_memory_feature, bulk_memory, "bulk-memory");
     feature_test!(test_simd_feature, simd, "simd");
+    feature_test!(test_relaxed_simd_feature, relaxed_simd, "relaxed-simd");
     feature_test!(test_threads_feature, threads, "threads");
     feature_test!(test_multi_memory_feature, multi_memory, "multi-memory");
     feature_test!(test_memory64_feature, memory64, "memory64");
diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs
index 205c43b2cd05..55272b1b9804 100644
--- a/crates/cranelift/src/func_environ.rs
+++ b/crates/cranelift/src/func_environ.rs
@@ -2153,4 +2153,16 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m
     fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC {
         self.isa.unsigned_add_overflow_condition()
     }
+
+    fn relaxed_simd_deterministic(&self) -> bool {
+        self.tunables.relaxed_simd_deterministic
+    }
+
+    fn has_native_fma(&self) -> bool {
+        self.isa.has_native_fma()
+    }
+
+    fn is_x86(&self) -> bool {
+        self.isa.triple().architecture == target_lexicon::Architecture::X86_64
+    }
 }
diff --git a/crates/cranelift/src/obj.rs b/crates/cranelift/src/obj.rs
index a596a1a75ceb..d5983598aa56 100644
--- a/crates/cranelift/src/obj.rs
+++ b/crates/cranelift/src/obj.rs
@@ -545,6 +545,8 @@ fn libcall_name(call: LibCall) -> &'static str {
         LibCall::CeilF64 => LC::CeilF64,
         LibCall::TruncF32 => LC::TruncF32,
         LibCall::TruncF64 => LC::TruncF64,
+        LibCall::FmaF32 => LC::FmaF32,
+        LibCall::FmaF64 => LC::FmaF64,
         _ => panic!("unknown libcall to give a name to: {call:?}"),
     };
     other.symbol()
diff --git a/crates/environ/src/obj.rs b/crates/environ/src/obj.rs
index efd48f0e2f89..6e39cc319f9f 100644
--- a/crates/environ/src/obj.rs
+++ b/crates/environ/src/obj.rs
@@ -166,4 +166,6 @@ libcalls! {
     CeilF64 = "libcall_ceilf64"
     TruncF32 = "libcall_truncf32"
     TruncF64 = "libcall_truncf64"
+    FmaF32 = "libcall_fmaf32"
+    FmaF64 = "libcall_fmaf64"
 }
diff --git a/crates/environ/src/tunables.rs b/crates/environ/src/tunables.rs
index 59992f60d6ed..4b37cd08b935 100644
--- a/crates/environ/src/tunables.rs
+++ b/crates/environ/src/tunables.rs
@@ -45,6 +45,10 @@ pub struct Tunables {
     /// Flag for the component module whether adapter modules have debug
     /// assertions baked into them.
     pub debug_adapter_modules: bool,
+
+    /// Whether or not lowerings for relaxed simd instructions are forced to
+    /// be deterministic.
+    pub relaxed_simd_deterministic: bool,
 }
 
 impl Default for Tunables {
@@ -91,6 +95,7 @@ impl Default for Tunables {
             guard_before_linear_memory: true,
             generate_address_map: true,
             debug_adapter_modules: false,
+            relaxed_simd_deterministic: false,
         }
     }
 }
diff --git a/crates/jit/src/code_memory.rs b/crates/jit/src/code_memory.rs
index f4bd18a2d6a7..128c3acec3e5 100644
--- a/crates/jit/src/code_memory.rs
+++ b/crates/jit/src/code_memory.rs
@@ -296,6 +296,8 @@ impl CodeMemory {
                 obj::LibCall::CeilF64 => libcalls::relocs::ceilf64 as usize,
                 obj::LibCall::TruncF32 => libcalls::relocs::truncf32 as usize,
                 obj::LibCall::TruncF64 => libcalls::relocs::truncf64 as usize,
+                obj::LibCall::FmaF32 => libcalls::relocs::fmaf32 as usize,
+                obj::LibCall::FmaF64 => libcalls::relocs::fmaf64 as usize,
             };
             *self.mmap.as_mut_ptr().add(offset).cast::<usize>() = libcall;
         }
diff --git a/crates/runtime/src/libcalls.rs b/crates/runtime/src/libcalls.rs
index 2ce3bfc7d3aa..ee04b146b44f 100644
--- a/crates/runtime/src/libcalls.rs
+++ b/crates/runtime/src/libcalls.rs
@@ -584,4 +584,12 @@ pub mod relocs {
             (x.abs() + TOINT_64 - TOINT_64).copysign(x)
         }
     }
+
+    pub extern "C" fn fmaf32(a: f32, b: f32, c: f32) -> f32 {
+        a.mul_add(b, c)
+    }
+
+    pub extern "C" fn fmaf64(a: f64, b: f64, c: f64) -> f64 {
+        a.mul_add(b, c)
+    }
 }
diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs
index eae23a75733b..44c3367fa931 100644
--- a/crates/wasmtime/src/config.rs
+++ b/crates/wasmtime/src/config.rs
@@ -682,6 +682,56 @@ impl Config {
         self
     }
 
+    /// Configures whether the WebAssembly Relaxed SIMD proposal will be
+    /// enabled for compilation.
+    ///
+    /// The [WebAssembly Relaxed SIMD proposal][proposal] is not, at the time of
+    /// this writing, at stage 4. The relaxed SIMD proposal adds new
+    /// instructions to WebAssembly which, for some specific inputs, are allowed
+    /// to produce different results on different hosts. More-or-less this
+    /// proposal enables exposing platform-specific semantics of SIMD
+    /// instructions in a controlled fashion to a WebAssembly program. From an
+    /// embedder's perspective this means that WebAssembly programs may execute
+    /// differently depending on whether the host is x86_64 or AArch64, for
+    /// example.
+    ///
+    /// By default Wasmtime lowers relaxed SIMD instructions to the fastest
+    /// lowering for the platform it's running on. This means that, by default,
+    /// some relaxed SIMD instructions may have different results for the same
+    /// inputs across x86_64 and AArch64. This behavior can be disabled through
+    /// the [`Config::relaxed_simd_deterministic`] option which will force
+    /// deterministic behavior across all platforms, as classified by the
+    /// specification, at the cost of performance.
+    ///
+    /// This is `false` by default.
+    ///
+    /// [proposal]: https://github.com/webassembly/relaxed-simd
+    pub fn wasm_relaxed_simd(&mut self, enable: bool) -> &mut Self {
+        self.features.relaxed_simd = enable;
+        self
+    }
+
+    /// This option can be used to control the behavior of the [relaxed SIMD
+    /// proposal's][proposal] instructions.
+    ///
+    /// The relaxed SIMD proposal introduces instructions that are allowed to
+    /// have different behavior on different architectures, primarily to afford
+    /// an efficient implementation on all architectures. This means, however,
+    /// that the same module may execute differently on one host than another,
+    /// which typically is not otherwise the case. This option is provided to
+    /// force Wasmtime to generate deterministic code for all relaxed simd
+    /// instructions, at the cost of performance, for all architectures. When
+    /// this option is enabled then the deterministic behavior of all
+    /// instructions in the relaxed SIMD proposal is selected.
+    ///
+    /// This is `false` by default.
+    ///
+    /// [proposal]: https://github.com/webassembly/relaxed-simd
+    pub fn relaxed_simd_deterministic(&mut self, enable: bool) -> &mut Self {
+        self.tunables.relaxed_simd_deterministic = enable;
+        self
+    }
+
     /// Configures whether the [WebAssembly bulk memory operations
     /// proposal][proposal] will be enabled for compilation.
     ///
@@ -1560,6 +1610,10 @@ impl Config {
             }
         }
 
+        if self.features.relaxed_simd && !self.features.simd {
+            bail!("cannot disable the simd proposal but enable the relaxed simd proposal");
+        }
+
         // Apply compiler settings and flags
         for (k, v) in self.compiler_config.settings.iter() {
             compiler.set(k, v)?;
@@ -1608,6 +1662,7 @@ impl fmt::Debug for Config {
             .field("wasm_reference_types", &self.features.reference_types)
             .field("wasm_bulk_memory", &self.features.bulk_memory)
             .field("wasm_simd", &self.features.simd)
+            .field("wasm_relaxed_simd", &self.features.relaxed_simd)
             .field("wasm_multi_value", &self.features.multi_value)
             .field(
                 "static_memory_maximum_size",
diff --git a/crates/wasmtime/src/engine/serialization.rs b/crates/wasmtime/src/engine/serialization.rs
index 198273f5e79d..36063b49509f 100644
--- a/crates/wasmtime/src/engine/serialization.rs
+++ b/crates/wasmtime/src/engine/serialization.rs
@@ -309,6 +309,7 @@ impl Metadata {
             epoch_interruption,
             static_memory_bound_is_maximum,
             guard_before_linear_memory,
+            relaxed_simd_deterministic,
 
             // This doesn't affect compilation, it's just a runtime setting.
             dynamic_memory_growth_reserve: _,
@@ -364,6 +365,11 @@ impl Metadata {
             other.guard_before_linear_memory,
             "guard before linear memory",
         )?;
+        Self::check_bool(
+            relaxed_simd_deterministic,
+            other.relaxed_simd_deterministic,
+            "relaxed simd deterministic semantics",
+        )?;
 
         Ok(())
     }
diff --git a/crates/wast/src/core.rs b/crates/wast/src/core.rs
index e87d0b1f7e15..46c1f8c95759 100644
--- a/crates/wast/src/core.rs
+++ b/crates/wast/src/core.rs
@@ -39,6 +39,14 @@ fn extract_lane_as_i64(bytes: u128, lane: usize) -> i64 {
 
 pub fn match_val(actual: &Val, expected: &WastRetCore) -> Result<()> {
     match (actual, expected) {
+        (_, WastRetCore::Either(expected)) => {
+            for expected in expected {
+                if match_val(actual, expected).is_ok() {
+                    return Ok(());
+                }
+            }
+            match_val(actual, &expected[0])
+        }
         (Val::I32(a), WastRetCore::I32(b)) => match_int(a, b),
         (Val::I64(a), WastRetCore::I64(b)) => match_int(a, b),
         // Note that these float comparisons are comparing bits, not float
diff --git a/tests/all/wast.rs b/tests/all/wast.rs
index 5f274b5be689..5da3e4cbdde1 100644
--- a/tests/all/wast.rs
+++ b/tests/all/wast.rs
@@ -30,6 +30,7 @@ fn run_wast(wast: &str, strategy: Strategy, pooling: bool) -> anyhow::Result<()>
     let multi_memory = feature_found(wast, "multi-memory");
     let threads = feature_found(wast, "threads");
     let reference_types = !(threads && feature_found(wast, "proposals"));
+    let relaxed_simd = feature_found(wast, "relaxed-simd");
     let use_shared_memory = feature_found_src(&wast_bytes, "shared_memory")
         || feature_found_src(&wast_bytes, "shared)");
 
@@ -43,6 +44,7 @@ fn run_wast(wast: &str, strategy: Strategy, pooling: bool) -> anyhow::Result<()>
         .wasm_threads(threads)
         .wasm_memory64(memory64)
         .wasm_reference_types(reference_types)
+        .wasm_relaxed_simd(relaxed_simd)
         .cranelift_debug_verifier(true);
 
     cfg.wasm_component_model(feature_found(wast, "component-model"));
@@ -108,11 +110,26 @@ fn run_wast(wast: &str, strategy: Strategy, pooling: bool) -> anyhow::Result<()>
         None
     };
 
-    let store = Store::new(&Engine::new(&cfg)?, ());
-    let mut wast_context = WastContext::new(store);
+    let mut engines = vec![(Engine::new(&cfg)?, "default")];
 
-    wast_context.register_spectest(use_shared_memory)?;
-    wast_context.run_buffer(wast.to_str().unwrap(), &wast_bytes)?;
+    // For tests that use relaxed-simd test both the default engine and the
+    // guaranteed-deterministic engine to ensure that both the 'native'
+    // semantics of the instructions plus the canonical semantics work.
+    if relaxed_simd {
+        engines.push((
+            Engine::new(cfg.relaxed_simd_deterministic(true))?,
+            "deterministic",
+        ));
+    }
+
+    for (engine, desc) in engines {
+        let store = Store::new(&engine, ());
+        let mut wast_context = WastContext::new(store);
+        wast_context.register_spectest(use_shared_memory)?;
+        wast_context
+            .run_buffer(wast.to_str().unwrap(), &wast_bytes)
+            .with_context(|| format!("failed to run spec test with {desc} engine"))?;
+    }
 
     Ok(())
 }
diff --git a/tests/misc_testsuite/relaxed-simd/i16x8_relaxed_q15mulr_s.wast b/tests/misc_testsuite/relaxed-simd/i16x8_relaxed_q15mulr_s.wast
new file mode 100644
index 000000000000..265d99160da6
--- /dev/null
+++ b/tests/misc_testsuite/relaxed-simd/i16x8_relaxed_q15mulr_s.wast
@@ -0,0 +1,26 @@
+;; Tests for i16x8.relaxed_q15mulr_s.
+(module
+    (func (export "i16x8.relaxed_q15mulr_s") (param v128 v128) (result v128) (i16x8.relaxed_q15mulr_s (local.get 0) (local.get 1)))
+
+    (func (export "i16x8.relaxed_q15mulr_s_cmp") (param v128 v128) (result v128)
+          (i16x8.eq
+            (i16x8.relaxed_q15mulr_s (local.get 0) (local.get 1))
+            (i16x8.relaxed_q15mulr_s (local.get 0) (local.get 1))))
+)
+
+;; INT16_MIN = -32768
+(assert_return (invoke "i16x8.relaxed_q15mulr_s"
+                       (v128.const i16x8 -32768 -32767 32767 0 0 0 0 0)
+                       (v128.const i16x8 -32768 -32768 32767 0 0 0 0 0))
+               ;; overflows, return either INT16_MIN or INT16_MAX
+               (either (v128.const i16x8 -32768 32767 32766 0 0 0 0 0)
+                       (v128.const i16x8 32767 32767 32766 0 0 0 0 0)))
+
+;; Check that multiple calls to the relaxed instruction with same inputs returns same results.
+
+(assert_return (invoke "i16x8.relaxed_q15mulr_s_cmp"
+                       (v128.const i16x8 -32768 -32767 32767 0 0 0 0 0)
+                       (v128.const i16x8 -32768 -32768 32767 0 0 0 0 0))
+               ;; overflows, return either INT16_MIN or INT16_MAX
+               (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
+
diff --git a/tests/misc_testsuite/relaxed-simd/i32x4_relaxed_trunc.wast b/tests/misc_testsuite/relaxed-simd/i32x4_relaxed_trunc.wast
new file mode 100644
index 000000000000..889542c6ad0d
--- /dev/null
+++ b/tests/misc_testsuite/relaxed-simd/i32x4_relaxed_trunc.wast
@@ -0,0 +1,123 @@
+;; Tests for i32x4.relaxed_trunc_f32x4_s, i32x4.relaxed_trunc_f32x4_u, i32x4.relaxed_trunc_f64x2_s_zero, and i32x4.relaxed_trunc_f64x2_u_zero.
+
+(module
+    (func (export "i32x4.relaxed_trunc_f32x4_s") (param v128) (result v128) (i32x4.relaxed_trunc_f32x4_s (local.get 0)))
+    (func (export "i32x4.relaxed_trunc_f32x4_u") (param v128) (result v128) (i32x4.relaxed_trunc_f32x4_u (local.get 0)))
+    (func (export "i32x4.relaxed_trunc_f64x2_s_zero") (param v128) (result v128) (i32x4.relaxed_trunc_f64x2_s_zero (local.get 0)))
+    (func (export "i32x4.relaxed_trunc_f64x2_u_zero") (param v128) (result v128) (i32x4.relaxed_trunc_f64x2_u_zero (local.get 0)))
+
+    (func (export "i32x4.relaxed_trunc_f32x4_s_cmp") (param v128) (result v128)
+          (i32x4.eq
+            (i32x4.relaxed_trunc_f32x4_s (local.get 0))
+            (i32x4.relaxed_trunc_f32x4_s (local.get 0))))
+    (func (export "i32x4.relaxed_trunc_f32x4_u_cmp") (param v128) (result v128)
+          (i32x4.eq
+            (i32x4.relaxed_trunc_f32x4_u (local.get 0))
+            (i32x4.relaxed_trunc_f32x4_u (local.get 0))))
+    (func (export "i32x4.relaxed_trunc_f64x2_s_zero_cmp") (param v128) (result v128)
+          (i32x4.eq
+            (i32x4.relaxed_trunc_f64x2_s_zero (local.get 0))
+            (i32x4.relaxed_trunc_f64x2_s_zero (local.get 0))))
+    (func (export "i32x4.relaxed_trunc_f64x2_u_zero_cmp") (param v128) (result v128)
+          (i32x4.eq
+            (i32x4.relaxed_trunc_f64x2_u_zero (local.get 0))
+            (i32x4.relaxed_trunc_f64x2_u_zero (local.get 0))))
+)
+
+;; Test some edge cases around min/max to ensure that the instruction either
+;; saturates correctly or returns INT_MIN.
+;;
+;; Note, though, that INT_MAX itself is not tested. The value for INT_MAX is
+;; 2147483647 but that is not representable in a `f32` since it requires 31 bits
+;; when a f32 has only 24 bits available. This means that the closest integers
+;; to INT_MAX which can be represented are 2147483520 and 2147483648, meaning
+;; that the INT_MAX test case cannot be tested.
+(assert_return (invoke "i32x4.relaxed_trunc_f32x4_s"
+                       ;;                INT32_MIN     <INT32_MIN        >INT32_MAX
+                       (v128.const f32x4 -2147483648.0 -2147483904.0 2.0 2147483904.0))
+               ;; out of range -> saturate or INT32_MIN
+               (either (v128.const i32x4 -2147483648 -2147483648 2 2147483647)
+                       (v128.const i32x4 -2147483648 -2147483648 2 -2147483648)))
+
+(assert_return (invoke "i32x4.relaxed_trunc_f32x4_s"
+                       (v128.const f32x4 nan -nan nan:0x444444 -nan:0x444444))
+               ;; nans -> 0 or INT32_MIN
+               (either (v128.const i32x4 0 0 0 0)
+                       (v128.const i32x4 0x80000000 0x80000000 0x80000000 0x80000000)))
+
+(assert_return (invoke "i32x4.relaxed_trunc_f32x4_u"
+                       ;; UINT32_MIN UINT32_MIN-1 <UINT32_MAX UINT32_MAX+1
+                       (v128.const f32x4 0 -1.0 4294967040.0 4294967296.0))
+               ;; out of range -> saturate or UINT32_MAX
+               (either (v128.const i32x4 0 0 4294967040 0xffffffff)
+                       (v128.const i32x4 0 0xffffffff 4294967040 0xffffffff)))
+
+(assert_return (invoke "i32x4.relaxed_trunc_f32x4_u"
+                       (v128.const f32x4 nan -nan nan:0x444444 -nan:0x444444))
+               ;; nans -> 0 or UINT32_MAX
+               (either (v128.const i32x4 0 0 0 0)
+                       (v128.const i32x4 0xffffffff 0xffffffff 0xffffffff 0xffffffff)))
+
+(assert_return (invoke "i32x4.relaxed_trunc_f64x2_s_zero"
+                       (v128.const f64x2 -2147483904.0 2147483904.0))
+               ;; out of range -> saturate or INT32_MIN
+               (either (v128.const i32x4 -2147483648 2147483647 0 0)
+                       (v128.const i32x4 -2147483648 -2147483648 0 0)))
+
+(assert_return (invoke "i32x4.relaxed_trunc_f64x2_s_zero"
+                       (v128.const f64x2 nan -nan))
+               (either (v128.const i32x4 0 0 0 0)
+                       (v128.const i32x4 0x80000000 0x80000000 0 0)))
+
+(assert_return (invoke "i32x4.relaxed_trunc_f64x2_u_zero"
+                       (v128.const f64x2 -1.0 4294967296.0))
+               ;; out of range -> saturate or UINT32_MAX
+               (either (v128.const i32x4 0 0xffffffff 0 0)
+                       (v128.const i32x4 0xffffffff 0xffffffff 0 0)))
+
+(assert_return (invoke "i32x4.relaxed_trunc_f64x2_u_zero"
+                       (v128.const f64x2 nan -nan))
+               (either (v128.const i32x4 0 0 0 0)
+                       (v128.const i32x4 0 0 0xffffffff 0xffffffff)))
+
+;; Check that multiple calls to the relaxed instruction with same inputs returns same results.
+
+(assert_return (invoke "i32x4.relaxed_trunc_f32x4_s_cmp"
+                       ;; INT32_MIN <INT32_MIN INT32_MAX >INT32_MAX
+                       (v128.const f32x4 -2147483648.0 -2147483904.0 2147483647.0 2147483904.0))
+               ;; out of range -> saturate or INT32_MIN
+               (v128.const i32x4 -1 -1 -1 -1))
+
+(assert_return (invoke "i32x4.relaxed_trunc_f32x4_s_cmp"
+                       (v128.const f32x4 nan -nan nan:0x444444 -nan:0x444444))
+               ;; nans -> 0 or INT32_MIN
+               (v128.const i32x4 -1 -1 -1 -1))
+
+(assert_return (invoke "i32x4.relaxed_trunc_f32x4_u_cmp"
+                       ;; UINT32_MIN UINT32_MIN-1 <UINT32_MAX UINT32_MAX+1
+                       (v128.const f32x4 0 -1.0 4294967040.0 4294967296.0))
+               ;; out of range -> saturate or UINT32_MAX
+               (v128.const i32x4 -1 -1 -1 -1))
+
+(assert_return (invoke "i32x4.relaxed_trunc_f32x4_u_cmp"
+                       (v128.const f32x4 nan -nan nan:0x444444 -nan:0x444444))
+               ;; nans -> 0 or UINT32_MAX
+               (v128.const i32x4 -1 -1 -1 -1))
+
+(assert_return (invoke "i32x4.relaxed_trunc_f64x2_s_zero_cmp"
+                       (v128.const f64x2 -2147483904.0 2147483904.0))
+               ;; out of range -> saturate or INT32_MIN
+               (v128.const i32x4 -1 -1 -1 -1))
+
+(assert_return (invoke "i32x4.relaxed_trunc_f64x2_s_zero_cmp"
+                       (v128.const f64x2 nan -nan))
+               (v128.const i32x4 -1 -1 -1 -1))
+
+(assert_return (invoke "i32x4.relaxed_trunc_f64x2_u_zero_cmp"
+                       (v128.const f64x2 -1.0 4294967296.0))
+               ;; out of range -> saturate or UINT32_MAX
+               (v128.const i32x4 -1 -1 -1 -1))
+
+(assert_return (invoke "i32x4.relaxed_trunc_f64x2_u_zero_cmp"
+                       (v128.const f64x2 nan -nan))
+               (v128.const i32x4 -1 -1 -1 -1))
diff --git a/tests/misc_testsuite/relaxed-simd/i8x16_relaxed_swizzle.wast b/tests/misc_testsuite/relaxed-simd/i8x16_relaxed_swizzle.wast
new file mode 100644
index 000000000000..1b20668d3d93
--- /dev/null
+++ b/tests/misc_testsuite/relaxed-simd/i8x16_relaxed_swizzle.wast
@@ -0,0 +1,44 @@
+;; Tests for relaxed i8x16 swizzle.
+
+(module
+    (func (export "i8x16.relaxed_swizzle") (param v128 v128) (result v128) (i8x16.relaxed_swizzle (local.get 0) (local.get 1)))
+
+    (func (export "i8x16.relaxed_swizzle_cmp") (param v128 v128) (result v128)
+          (i8x16.eq
+            (i8x16.relaxed_swizzle (local.get 0) (local.get 1))
+            (i8x16.relaxed_swizzle (local.get 0) (local.get 1))))
+)
+
+(assert_return (invoke "i8x16.relaxed_swizzle"
+                       (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
+                       (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15))
+               (either (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
+                       (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)))
+
+;; out of range, returns 0 or modulo 15 if < 128
+(assert_return (invoke "i8x16.relaxed_swizzle"
+                       (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
+                       (v128.const i8x16 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31))
+               (either (v128.const i8x16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0)
+                       (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)))
+
+;; out of range, returns 0 if >= 128
+(assert_return (invoke "i8x16.relaxed_swizzle"
+                       (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
+                       (v128.const i8x16 128 129 130 131 132 133 134 135 248 249 250 251 252 253 254 255))
+               (either (v128.const i8x16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0)
+                       (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)))
+
+;; Check that multiple calls to the relaxed instruction with same inputs returns same results.
+
+;; out of range, returns 0 or modulo 15 if < 128
+(assert_return (invoke "i8x16.relaxed_swizzle_cmp"
+                       (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
+                       (v128.const i8x16 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31))
+               (v128.const i8x16 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1))
+
+;; out of range, returns 0 if >= 128
+(assert_return (invoke "i8x16.relaxed_swizzle_cmp"
+                       (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
+                       (v128.const i8x16 128 129 130 131 132 133 134 135 248 249 250 251 252 253 254 255))
+               (v128.const i8x16 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1))
diff --git a/tests/misc_testsuite/relaxed-simd/relaxed_dot_product.wast b/tests/misc_testsuite/relaxed-simd/relaxed_dot_product.wast
new file mode 100644
index 000000000000..41dee0afcfa4
--- /dev/null
+++ b/tests/misc_testsuite/relaxed-simd/relaxed_dot_product.wast
@@ -0,0 +1,106 @@
+;; Tests for relaxed dot products.
+
+(module
+    (func (export "i16x8.relaxed_dot_i8x16_i7x16_s") (param v128 v128) (result v128) (i16x8.relaxed_dot_i8x16_i7x16_s (local.get 0) (local.get 1)))
+    (func (export "i32x4.relaxed_dot_i8x16_i7x16_add_s") (param v128 v128 v128) (result v128) (i32x4.relaxed_dot_i8x16_i7x16_add_s (local.get 0) (local.get 1) (local.get 2)))
+
+    (func (export "i16x8.relaxed_dot_i8x16_i7x16_s_cmp") (param v128 v128) (result v128)
+          (i16x8.eq
+            (i16x8.relaxed_dot_i8x16_i7x16_s (local.get 0) (local.get 1))
+            (i16x8.relaxed_dot_i8x16_i7x16_s (local.get 0) (local.get 1))))
+    (func (export "i32x4.relaxed_dot_i8x16_i7x16_add_s_cmp") (param v128 v128 v128) (result v128)
+          (i16x8.eq
+            (i32x4.relaxed_dot_i8x16_i7x16_add_s (local.get 0) (local.get 1) (local.get 2))
+            (i32x4.relaxed_dot_i8x16_i7x16_add_s (local.get 0) (local.get 1) (local.get 2))))
+)
+
+;; Simple values to ensure things are functional.
+(assert_return (invoke "i16x8.relaxed_dot_i8x16_i7x16_s"
+                       (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
+                       (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15))
+               (v128.const i16x8 1 13 41 85 145 221 313 421))
+
+;; Test max and min i8 values;
+(assert_return (invoke "i16x8.relaxed_dot_i8x16_i7x16_s"
+                       (v128.const i8x16 -128 -128 127 127 0 0 0 0 0 0 0 0 0 0 0 0)
+                       (v128.const i8x16 127 127 127 127 0 0 0 0 0 0 0 0 0 0 0 0))
+               (v128.const i16x8 -32512 32258 0 0 0 0 0 0))
+
+;; signed * unsigned   : -128 *  129 * 2 = -33,024 saturated to -32,768
+;; signed * signed     : -128 * -127 * 2 =  32,512
+;; unsigned * unsigned :  128 *  129 * 2 =  33,024
+(assert_return (invoke "i16x8.relaxed_dot_i8x16_i7x16_s"
+                       (v128.const i8x16 -128 -128 0 0 0 0 0 0 0 0 0 0 0 0 0 0)
+                       (v128.const i8x16 -127 -127 0 0 0 0 0 0 0 0 0 0 0 0 0 0))
+               (either
+                 (v128.const i16x8 -32768 0 0 0 0 0 0 0)
+                 (v128.const i16x8  32512 0 0 0 0 0 0 0)
+                 (v128.const i16x8  33024 0 0 0 0 0 0 0)))
+
+;; Simple values to ensure things are functional.
+(assert_return (invoke "i32x4.relaxed_dot_i8x16_i7x16_add_s"
+                       (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
+                       (v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
+                       (v128.const i32x4 0 1 2 3))
+               ;; intermediate result is [14, 126, 366, 734]
+               (v128.const i32x4 14 127 368 737))
+
+;; Test max and min i8 values;
+(assert_return (invoke "i32x4.relaxed_dot_i8x16_i7x16_add_s"
+                       (v128.const i8x16 -128 -128 -128 -128 127 127 127 127 0 0 0 0 0 0 0 0)
+                       (v128.const i8x16 127 127 127 127 127 127 127 127 0 0 0 0 0 0 0 0)
+                       (v128.const i32x4 1 2 3 4))
+               ;; intermediate result is [-65024, 64516, 0, 0]
+               (v128.const i32x4 -65023 64518 3 4))
+
+;; signed * unsigned   : -128 *  129 * 4 = -66,048 (+ 1) VPDPBUSD AVX2-VNNI or AVX512-VNNI
+;; signed * unsigned with intermediate saturation :
+;;   (-128 * 129) + (-128 * 129) = -33024 saturated to -32768 (PMADDUBSW)
+;;   -32768 + -32768 = -65536 (+ 1)
+;; signed * signed     : -128 * -127 * 4 =  65,024 (+ 1)
+;; unsigned * unsigned :  128 *  129 * 2 =  66,048 (+ 1)
+(assert_return (invoke "i32x4.relaxed_dot_i8x16_i7x16_add_s"
+                       (v128.const i8x16 -128 -128 -128 -128 0 0 0 0 0 0 0 0 0 0 0 0)
+                       (v128.const i8x16 -127 -127 -127 -127 0 0 0 0 0 0 0 0 0 0 0 0)
+                       (v128.const i32x4 1 2 3 4))
+               (either
+                 (v128.const i32x4 -66047 2 3 4)
+                 (v128.const i32x4 -65535 2 3 4)
+                 (v128.const i32x4  65025 2 3 4)
+                 (v128.const i32x4  66049 2 3 4)))
+
+;; Check that multiple calls to the relaxed instruction with same inputs returns same results.
+
+;; Test max and min i8 values;
+(assert_return (invoke "i16x8.relaxed_dot_i8x16_i7x16_s_cmp"
+                       (v128.const i8x16 -128 -128 127 127 0 0 0 0 0 0 0 0 0 0 0 0)
+                       (v128.const i8x16 127 127 127 127 0 0 0 0 0 0 0 0 0 0 0 0))
+               (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
+
+;; Test max and min i8 values;
+(assert_return (invoke "i32x4.relaxed_dot_i8x16_i7x16_add_s_cmp"
+                       (v128.const i8x16 -128 -128 -128 -128 127 127 127 127 0 0 0 0 0 0 0 0)
+                       (v128.const i8x16 127 127 127 127 127 127 127 127 0 0 0 0 0 0 0 0)
+                       (v128.const i32x4 1 2 3 4))
+               ;; intermediate result is [-65024, 64516, 0, 0]
+               (v128.const i32x4 -1 -1 -1 -1))
+
+;; signed * unsigned   : -128 *  129 * 2 = -33,024 saturated to -32,768
+;; signed * signed     : -128 * -127 * 2 =  32,512
+;; unsigned * unsigned :  128 *  129 * 2 =  33,024
+(assert_return (invoke "i16x8.relaxed_dot_i8x16_i7x16_s_cmp"
+                       (v128.const i8x16 -128 -128 0 0 0 0 0 0 0 0 0 0 0 0 0 0)
+                       (v128.const i8x16 -127 -127 0 0 0 0 0 0 0 0 0 0 0 0 0 0))
+               (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
+
+;; signed * unsigned   : -128 *  129 * 4 = -66,048 (+ 1) VPDPBUSD AVX2-VNNI or AVX512-VNNI
+;; signed * unsigned with intermediate saturation :
+;;   (-128 * 129) + (-128 * 129) = -33024 saturated to -32768 (PMADDUBSW)
+;;   -32768 + -32768 = -65536 (+ 1)
+;; signed * signed     : -128 * -127 * 4 =  65,024 (+ 1)
+;; unsigned * unsigned :  128 *  129 * 2 =  66,048 (+ 1)
+(assert_return (invoke "i32x4.relaxed_dot_i8x16_i7x16_add_s_cmp"
+                       (v128.const i8x16 -128 -128 -128 -128 0 0 0 0 0 0 0 0 0 0 0 0)
+                       (v128.const i8x16 -127 -127 -127 -127 0 0 0 0 0 0 0 0 0 0 0 0)
+                       (v128.const i32x4 1 2 3 4))
+               (v128.const i32x4 -1 -1 -1 -1))
diff --git a/tests/misc_testsuite/relaxed-simd/relaxed_laneselect.wast b/tests/misc_testsuite/relaxed-simd/relaxed_laneselect.wast
new file mode 100644
index 000000000000..4ea6eb818462
--- /dev/null
+++ b/tests/misc_testsuite/relaxed-simd/relaxed_laneselect.wast
@@ -0,0 +1,92 @@
+;; Tests for i8x16.relaxed_laneselect, i16x8.relaxed_laneselect, i32x4.relaxed_laneselect, and i64x2.relaxed_laneselect.
+
+(module
+    (func (export "i8x16.relaxed_laneselect") (param v128 v128 v128) (result v128) (i8x16.relaxed_laneselect (local.get 0) (local.get 1) (local.get 2)))
+    (func (export "i16x8.relaxed_laneselect") (param v128 v128 v128) (result v128) (i16x8.relaxed_laneselect (local.get 0) (local.get 1) (local.get 2)))
+    (func (export "i32x4.relaxed_laneselect") (param v128 v128 v128) (result v128) (i32x4.relaxed_laneselect (local.get 0) (local.get 1) (local.get 2)))
+    (func (export "i64x2.relaxed_laneselect") (param v128 v128 v128) (result v128) (i64x2.relaxed_laneselect (local.get 0) (local.get 1) (local.get 2)))
+
+    (func (export "i8x16.relaxed_laneselect_cmp") (param v128 v128 v128) (result v128)
+          (i8x16.eq
+            (i8x16.relaxed_laneselect (local.get 0) (local.get 1) (local.get 2))
+            (i8x16.relaxed_laneselect (local.get 0) (local.get 1) (local.get 2))))
+    (func (export "i16x8.relaxed_laneselect_cmp") (param v128 v128 v128) (result v128)
+          (i16x8.eq
+            (i16x8.relaxed_laneselect (local.get 0) (local.get 1) (local.get 2))
+            (i16x8.relaxed_laneselect (local.get 0) (local.get 1) (local.get 2))))
+    (func (export "i32x4.relaxed_laneselect_cmp") (param v128 v128 v128) (result v128)
+          (i32x4.eq
+            (i32x4.relaxed_laneselect (local.get 0) (local.get 1) (local.get 2))
+            (i32x4.relaxed_laneselect (local.get 0) (local.get 1) (local.get 2))))
+    (func (export "i64x2.relaxed_laneselect_cmp") (param v128 v128 v128) (result v128)
+          (i64x2.eq
+            (i64x2.relaxed_laneselect (local.get 0) (local.get 1) (local.get 2))
+            (i64x2.relaxed_laneselect (local.get 0) (local.get 1) (local.get 2))))
+)
+
+(assert_return (invoke "i8x16.relaxed_laneselect"
+                       (v128.const i8x16 0    1  0x12 0x12 4 5 6 7 8 9 10 11 12 13 14 15)
+                       (v128.const i8x16 16   17 0x34 0x34 20 21 22 23 24 25 26 27 28 29 30 31)
+                       (v128.const i8x16 0xff 0  0xf0 0x0f 0 0 0 0 0 0 0 0 0 0 0 0))
+               (either (v128.const i8x16 0    17 0x14 0x32 20 21 22 23 24 25 26 27 28 29 30 31)
+                       (v128.const i8x16 0    17 0x12 0x34 20 21 22 23 24 25 26 27 28 29 30 31)))
+
+(assert_return (invoke "i16x8.relaxed_laneselect"
+                       (v128.const i16x8 0      1 0x1234 0x1234 4 5 6 7)
+                       (v128.const i16x8 8      9 0x5678 0x5678 12 13 14 15)
+                       (v128.const i16x8 0xffff 0 0xff00 0x00ff 0 0 0 0))
+               (either (v128.const i16x8 0      9 0x1278 0x5634 12 13 14 15)
+                       (v128.const i16x8 0      9 0x1234 0x5678 12 13 14 15)))
+
+(assert_return (invoke "i32x4.relaxed_laneselect"
+                       (v128.const i32x4 0          1 0x12341234 0x12341234)
+                       (v128.const i32x4 4          5 0x56785678 0x56785678)
+                       (v128.const i32x4 0xffffffff 0 0xffff0000 0x0000ffff))
+               (either (v128.const i32x4 0          5 0x12345678 0x56781234)
+                       (v128.const i32x4 0          5 0x12341234 0x56785678)))
+
+(assert_return (invoke "i64x2.relaxed_laneselect"
+                       (v128.const i64x2 0                  1)
+                       (v128.const i64x2 2                  3)
+                       (v128.const i64x2 0xffffffffffffffff 0))
+               (either (v128.const i64x2 0                  3)
+                       (v128.const i64x2 0                  3)))
+
+(assert_return (invoke "i64x2.relaxed_laneselect"
+                       (v128.const i64x2 0x1234123412341234 0x1234123412341234)
+                       (v128.const i64x2 0x5678567856785678 0x5678567856785678)
+                       (v128.const i64x2 0xffffffff00000000 0x00000000ffffffff))
+               (either (v128.const i64x2 0x1234123456785678 0x5678567812341234)
+                       (v128.const i64x2 0x1234123412341234 0x5678567856785678)))
+
+;; Check that multiple calls to the relaxed instruction with same inputs returns same results.
+
+(assert_return (invoke "i8x16.relaxed_laneselect_cmp"
+                       (v128.const i8x16 0    1  0x12 0x12 4 5 6 7 8 9 10 11 12 13 14 15)
+                       (v128.const i8x16 16   17 0x34 0x34 20 21 22 23 24 25 26 27 28 29 30 31)
+                       (v128.const i8x16 0xff 0  0xf0 0x0f 0 0 0 0 0 0 0 0 0 0 0 0))
+               (v128.const i8x16 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1))
+
+(assert_return (invoke "i16x8.relaxed_laneselect_cmp"
+                       (v128.const i16x8 0      1 0x1234 0x1234 4 5 6 7)
+                       (v128.const i16x8 8      9 0x5678 0x5678 12 13 14 15)
+                       (v128.const i16x8 0xffff 0 0xff00 0x00ff 0 0 0 0))
+               (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
+
+(assert_return (invoke "i32x4.relaxed_laneselect_cmp"
+                       (v128.const i32x4 0          1 0x12341234 0x12341234)
+                       (v128.const i32x4 4          5 0x56785678 0x56785678)
+                       (v128.const i32x4 0xffffffff 0 0xffff0000 0x0000ffff))
+               (v128.const i32x4 -1 -1 -1 -1))
+
+(assert_return (invoke "i64x2.relaxed_laneselect_cmp"
+                       (v128.const i64x2 0                  1)
+                       (v128.const i64x2 2                  3)
+                       (v128.const i64x2 0xffffffffffffffff 0))
+               (v128.const i64x2 -1 -1))
+
+(assert_return (invoke "i64x2.relaxed_laneselect_cmp"
+                       (v128.const i64x2 0x1234123412341234 0x1234123412341234)
+                       (v128.const i64x2 0x5678567856785678 0x5678567856785678)
+                       (v128.const i64x2 0xffffffff00000000 0x00000000ffffffff))
+               (v128.const i64x2 -1 -1))
diff --git a/tests/misc_testsuite/relaxed-simd/relaxed_madd_nmadd.wast b/tests/misc_testsuite/relaxed-simd/relaxed_madd_nmadd.wast
new file mode 100644
index 000000000000..0e0e0c2bfc4f
--- /dev/null
+++ b/tests/misc_testsuite/relaxed-simd/relaxed_madd_nmadd.wast
@@ -0,0 +1,190 @@
+;; Tests for f32x4.relaxed_madd, f32x4.relaxed_nmadd, f64x2.relaxed_madd, and f64x2.relaxed_nmadd.
+
+(module
+    (func (export "f32x4.relaxed_madd") (param v128 v128 v128) (result v128) (f32x4.relaxed_madd (local.get 0) (local.get 1) (local.get 2)))
+    (func (export "f32x4.relaxed_nmadd") (param v128 v128 v128) (result v128) (f32x4.relaxed_nmadd (local.get 0) (local.get 1) (local.get 2)))
+    (func (export "f64x2.relaxed_nmadd") (param v128 v128 v128) (result v128) (f64x2.relaxed_nmadd (local.get 0) (local.get 1) (local.get 2)))
+    (func (export "f64x2.relaxed_madd") (param v128 v128 v128) (result v128) (f64x2.relaxed_madd (local.get 0) (local.get 1) (local.get 2)))
+
+    (func (export "f32x4.relaxed_madd_cmp") (param v128 v128 v128) (result v128)
+          (f32x4.eq
+            (f32x4.relaxed_madd (local.get 0) (local.get 1) (local.get 2))
+            (f32x4.relaxed_madd (local.get 0) (local.get 1) (local.get 2))))
+    (func (export "f32x4.relaxed_nmadd_cmp") (param v128 v128 v128) (result v128)
+          (f32x4.eq
+            (f32x4.relaxed_nmadd (local.get 0) (local.get 1) (local.get 2))
+            (f32x4.relaxed_nmadd (local.get 0) (local.get 1) (local.get 2))))
+    (func (export "f64x2.relaxed_nmadd_cmp") (param v128 v128 v128) (result v128)
+          (f64x2.eq
+            (f64x2.relaxed_nmadd (local.get 0) (local.get 1) (local.get 2))
+            (f64x2.relaxed_nmadd (local.get 0) (local.get 1) (local.get 2))))
+    (func (export "f64x2.relaxed_madd_cmp") (param v128 v128 v128) (result v128)
+          (f64x2.eq
+            (f64x2.relaxed_madd (local.get 0) (local.get 1) (local.get 2))
+            (f64x2.relaxed_madd (local.get 0) (local.get 1) (local.get 2))))
+)
+
+
+;; FLT_MAX == 0x1.fffffep+127
+;; FLT_MAX * 2 - FLT_MAX ==
+;;   FLT_MAX (if fma)
+;;   0       (if no fma)
+;; from https://www.vinc17.net/software/fma-tests.c
+(assert_return (invoke "f32x4.relaxed_madd"
+                       (v128.const f32x4 0x1.fffffep+127 0x1.fffffep+127 0x1.fffffep+127 0x1.fffffep+127 )
+                       (v128.const f32x4 2.0 2.0 2.0 2.0)
+                       (v128.const f32x4 -0x1.fffffep+127 -0x1.fffffep+127 -0x1.fffffep+127 -0x1.fffffep+127))
+               (either (v128.const f32x4 0x1.fffffep+127 0x1.fffffep+127 0x1.fffffep+127 0x1.fffffep+127)
+                       (v128.const f32x4 inf inf inf inf)))
+
+;; Special values for float:
+;; x            = 0x1.000004p+0 (1 + 2^-22)
+;; y            = 0x1.0002p+0   (1 + 2^-15)
+;; z            = -(1.0 + 0x0.0002p+0 + 0x0.000004p+0)
+;;              = -0x1.000204p+0
+;; x.y          = 1.0 + 0x0.0002p+0 + 0x0.000004p+0 + 0x1p-37 (round bit)
+;; x.y+z        = 0 (2 roundings)
+;; fma(x, y, z) = (0x1p-37) 2^-37
+;; from https://accurate-algorithms.readthedocs.io/en/latest/ch09appendix.html#test-system-information
+(assert_return (invoke "f32x4.relaxed_madd"
+                       (v128.const f32x4 0x1.000004p+0 0x1.000004p+0 0x1.000004p+0 0x1.000004p+0)
+                       (v128.const f32x4 0x1.0002p+0 0x1.0002p+0 0x1.0002p+0 0x1.0002p+0)
+                       (v128.const f32x4 -0x1.000204p+0 -0x1.000204p+0 -0x1.000204p+0 -0x1.000204p+0))
+               (either (v128.const f32x4 0x1p-37 0x1p-37 0x1p-37 0x1p-37)
+                       (v128.const f32x4 0 0 0 0)))
+;; fnma tests with negated x, same answers are expected.
+(assert_return (invoke "f32x4.relaxed_nmadd"
+                       (v128.const f32x4 -0x1.000004p+0 -0x1.000004p+0 -0x1.000004p+0 -0x1.000004p+0)
+                       (v128.const f32x4 0x1.0002p+0 0x1.0002p+0 0x1.0002p+0 0x1.0002p+0)
+                       (v128.const f32x4 -0x1.000204p+0 -0x1.000204p+0 -0x1.000204p+0 -0x1.000204p+0))
+               (either (v128.const f32x4 0x1p-37 0x1p-37 0x1p-37 0x1p-37)
+                       (v128.const f32x4 0 0 0 0)))
+;; fnma tests with negated y, same answers are expected.
+(assert_return (invoke "f32x4.relaxed_nmadd"
+                       (v128.const f32x4 0x1.000004p+0 0x1.000004p+0 0x1.000004p+0 0x1.000004p+0)
+                       (v128.const f32x4 -0x1.0002p+0 -0x1.0002p+0 -0x1.0002p+0 -0x1.0002p+0)
+                       (v128.const f32x4 -0x1.000204p+0 -0x1.000204p+0 -0x1.000204p+0 -0x1.000204p+0))
+               (either (v128.const f32x4 0x1p-37 0x1p-37 0x1p-37 0x1p-37)
+                       (v128.const f32x4 0 0 0 0)))
+
+;; DBL_MAX = 0x1.fffffffffffffp+1023
+;; DLB_MAX * 2 - DLB_MAX ==
+;;   DLB_MAX (if fma)
+;;   0       (if no fma)
+;; form https://www.vinc17.net/software/fma-tests.c
+;; from https://www.vinc17.net/software/fma-tests.c
+(assert_return (invoke "f64x2.relaxed_madd"
+                       (v128.const f64x2 0x1.fffffffffffffp+1023 0x1.fffffffffffffp+1023)
+                       (v128.const f64x2 2.0 2.0)
+                       (v128.const f64x2 -0x1.fffffffffffffp+1023 -0x1.fffffffffffffp+1023))
+               (either (v128.const f64x2 0x1.fffffffffffffp+1023 0x1.fffffffffffffp+1023)
+                       (v128.const f64x2 inf inf)))
+
+;; Special values for double:
+;; x            = 0x1.00000004p+0 (1 + 2^-30)
+;; y            = 0x1.000002p+0   (1 + 2^-23)
+;; z            = -(1.0 + 0x0.000002p+0 + 0x0.00000004p+0)
+;;              = -0x1.00000204p+0
+;; x.y          = 1.0 + 0x0.000002p+0 + 0x0.00000004p+0 + 0x1p-53 (round bit)
+;; x.y+z        = 0 (2 roundings)
+;; fma(x, y, z) = 0x1p-53
+;; from https://accurate-algorithms.readthedocs.io/en/latest/ch09appendix.html#test-system-information
+(assert_return (invoke "f64x2.relaxed_madd"
+                       (v128.const f64x2 0x1.00000004p+0 0x1.00000004p+0)
+                       (v128.const f64x2 0x1.000002p+0 0x1.000002p+0)
+                       (v128.const f64x2 -0x1.00000204p+0 -0x1.00000204p+0))
+               (either (v128.const f64x2 0x1p-53 0x1p-53)
+                       (v128.const f64x2 0 0)))
+;; fnma tests with negated x, same answers are expected.
+(assert_return (invoke "f64x2.relaxed_nmadd"
+                       (v128.const f64x2 -0x1.00000004p+0 -0x1.00000004p+0)
+                       (v128.const f64x2 0x1.000002p+0 0x1.000002p+0)
+                       (v128.const f64x2 -0x1.00000204p+0 -0x1.00000204p+0))
+               (either (v128.const f64x2 0x1p-53 0x1p-53)
+                       (v128.const f64x2 0 0)))
+;; fnma tests with negated y, same answers are expected.
+(assert_return (invoke "f64x2.relaxed_nmadd"
+                       (v128.const f64x2 0x1.00000004p+0 0x1.00000004p+0)
+                       (v128.const f64x2 -0x1.000002p+0 -0x1.000002p+0)
+                       (v128.const f64x2 -0x1.00000204p+0 -0x1.00000204p+0))
+               (either (v128.const f64x2 0x1p-53 0x1p-53)
+                       (v128.const f64x2 0 0)))
+
+;; Check that multiple calls to the relaxed instruction with same inputs returns same results.
+
+;; FLT_MAX == 0x1.fffffep+127
+;; FLT_MAX * 2 - FLT_MAX ==
+;;   FLT_MAX (if fma)
+;;   0       (if no fma)
+;; from https://www.vinc17.net/software/fma-tests.c
+(assert_return (invoke "f32x4.relaxed_madd_cmp"
+                       (v128.const f32x4 0x1.fffffep+127 0x1.fffffep+127 0x1.fffffep+127 0x1.fffffep+127 )
+                       (v128.const f32x4 2.0 2.0 2.0 2.0)
+                       (v128.const f32x4 -0x1.fffffep+127 -0x1.fffffep+127 -0x1.fffffep+127 -0x1.fffffep+127))
+               (v128.const i32x4 -1 -1 -1 -1))
+
+;; Special values for float:
+;; x            = 0x1.000004p+0 (1 + 2^-22)
+;; y            = 0x1.0002p+0   (1 + 2^-15)
+;; z            = -(1.0 + 0x0.0002p+0 + 0x0.000004p+0)
+;;              = -0x1.000204p+0
+;; x.y          = 1.0 + 0x0.0002p+0 + 0x0.000004p+0 + 0x1p-37 (round bit)
+;; x.y+z        = 0 (2 roundings)
+;; fma(x, y, z) = (0x1p-37) 2^-37
+;; from https://accurate-algorithms.readthedocs.io/en/latest/ch09appendix.html#test-system-information
+(assert_return (invoke "f32x4.relaxed_madd_cmp"
+                       (v128.const f32x4 0x1.000004p+0 0x1.000004p+0 0x1.000004p+0 0x1.000004p+0)
+                       (v128.const f32x4 0x1.0002p+0 0x1.0002p+0 0x1.0002p+0 0x1.0002p+0)
+                       (v128.const f32x4 -0x1.000204p+0 -0x1.000204p+0 -0x1.000204p+0 -0x1.000204p+0))
+               (v128.const i32x4 -1 -1 -1 -1))
+;; fnma tests with negated x, same answers are expected.
+(assert_return (invoke "f32x4.relaxed_nmadd_cmp"
+                       (v128.const f32x4 -0x1.000004p+0 -0x1.000004p+0 -0x1.000004p+0 -0x1.000004p+0)
+                       (v128.const f32x4 0x1.0002p+0 0x1.0002p+0 0x1.0002p+0 0x1.0002p+0)
+                       (v128.const f32x4 -0x1.000204p+0 -0x1.000204p+0 -0x1.000204p+0 -0x1.000204p+0))
+               (v128.const i32x4 -1 -1 -1 -1))
+;; fnma tests with negated y, same answers are expected.
+(assert_return (invoke "f32x4.relaxed_nmadd_cmp"
+                       (v128.const f32x4 0x1.000004p+0 0x1.000004p+0 0x1.000004p+0 0x1.000004p+0)
+                       (v128.const f32x4 -0x1.0002p+0 -0x1.0002p+0 -0x1.0002p+0 -0x1.0002p+0)
+                       (v128.const f32x4 -0x1.000204p+0 -0x1.000204p+0 -0x1.000204p+0 -0x1.000204p+0))
+               (v128.const i32x4 -1 -1 -1 -1))
+
+;; DBL_MAX = 0x1.fffffffffffffp+1023
+;; DLB_MAX * 2 - DLB_MAX ==
+;;   DLB_MAX (if fma)
+;;   0       (if no fma)
+;; form https://www.vinc17.net/software/fma-tests.c
+;; from https://www.vinc17.net/software/fma-tests.c
+(assert_return (invoke "f64x2.relaxed_madd_cmp"
+                       (v128.const f64x2 0x1.fffffffffffffp+1023 0x1.fffffffffffffp+1023)
+                       (v128.const f64x2 2.0 2.0)
+                       (v128.const f64x2 -0x1.fffffffffffffp+1023 -0x1.fffffffffffffp+1023))
+               (v128.const i64x2 -1 -1))
+
+;; Special values for double:
+;; x            = 0x1.00000004p+0 (1 + 2^-30)
+;; y            = 0x1.000002p+0   (1 + 2^-23)
+;; z            = -(1.0 + 0x0.000002p+0 + 0x0.00000004p+0)
+;;              = -0x1.00000204p+0
+;; x.y          = 1.0 + 0x0.000002p+0 + 0x0.00000004p+0 + 0x1p-53 (round bit)
+;; x.y+z        = 0 (2 roundings)
+;; fma(x, y, z) = 0x1p-53
+;; from https://accurate-algorithms.readthedocs.io/en/latest/ch09appendix.html#test-system-information
+(assert_return (invoke "f64x2.relaxed_madd_cmp"
+                       (v128.const f64x2 0x1.00000004p+0 0x1.00000004p+0)
+                       (v128.const f64x2 0x1.000002p+0 0x1.000002p+0)
+                       (v128.const f64x2 -0x1.00000204p+0 -0x1.00000204p+0))
+               (v128.const i64x2 -1 -1))
+;; fnma tests with negated x, same answers are expected.
+(assert_return (invoke "f64x2.relaxed_nmadd_cmp"
+                       (v128.const f64x2 -0x1.00000004p+0 -0x1.00000004p+0)
+                       (v128.const f64x2 0x1.000002p+0 0x1.000002p+0)
+                       (v128.const f64x2 -0x1.00000204p+0 -0x1.00000204p+0))
+               (v128.const i64x2 -1 -1))
+;; fnma tests with negated y, same answers are expected.
+(assert_return (invoke "f64x2.relaxed_nmadd_cmp"
+                       (v128.const f64x2 0x1.00000004p+0 0x1.00000004p+0)
+                       (v128.const f64x2 -0x1.000002p+0 -0x1.000002p+0)
+                       (v128.const f64x2 -0x1.00000204p+0 -0x1.00000204p+0))
+               (v128.const i64x2 -1 -1))
diff --git a/tests/misc_testsuite/relaxed-simd/relaxed_min_max.wast b/tests/misc_testsuite/relaxed-simd/relaxed_min_max.wast
new file mode 100644
index 000000000000..d8a04ba4df93
--- /dev/null
+++ b/tests/misc_testsuite/relaxed-simd/relaxed_min_max.wast
@@ -0,0 +1,183 @@
+;; Tests for f32x4.min, f32x4.max, f64x2.min, and f64x2.max.
+
+(module
+    (func (export "f32x4.relaxed_min") (param v128 v128) (result v128) (f32x4.relaxed_min (local.get 0) (local.get 1)))
+    (func (export "f32x4.relaxed_max") (param v128 v128) (result v128) (f32x4.relaxed_max (local.get 0) (local.get 1)))
+    (func (export "f64x2.relaxed_min") (param v128 v128) (result v128) (f64x2.relaxed_min (local.get 0) (local.get 1)))
+    (func (export "f64x2.relaxed_max") (param v128 v128) (result v128) (f64x2.relaxed_max (local.get 0) (local.get 1)))
+
+    (func (export "f32x4.relaxed_min_cmp") (param v128 v128) (result v128)
+          (i32x4.eq
+            (f32x4.relaxed_min (local.get 0) (local.get 1))
+            (f32x4.relaxed_min (local.get 0) (local.get 1))))
+    (func (export "f32x4.relaxed_max_cmp") (param v128 v128) (result v128)
+          (i32x4.eq
+            (f32x4.relaxed_max (local.get 0) (local.get 1))
+            (f32x4.relaxed_max (local.get 0) (local.get 1))))
+    (func (export "f64x2.relaxed_min_cmp") (param v128 v128) (result v128)
+          (i64x2.eq
+            (f64x2.relaxed_min (local.get 0) (local.get 1))
+            (f64x2.relaxed_min (local.get 0) (local.get 1))))
+    (func (export "f64x2.relaxed_max_cmp") (param v128 v128) (result v128)
+          (i64x2.eq
+            (f64x2.relaxed_max (local.get 0) (local.get 1))
+            (f64x2.relaxed_max (local.get 0) (local.get 1))))
+)
+
+(assert_return (invoke "f32x4.relaxed_min"
+                       (v128.const f32x4 -nan nan 0 0)
+                       (v128.const f32x4 0 0 -nan nan))
+               (either (v128.const f32x4 nan:canonical nan:canonical nan:canonical nan:canonical)
+                       (v128.const f32x4 nan:canonical nan:canonical 0 0)
+                       (v128.const f32x4 0 0 nan:canonical nan:canonical)
+                       (v128.const f32x4 0 0 0 0)))
+
+(assert_return (invoke "f32x4.relaxed_min"
+                       (v128.const f32x4 +0.0 -0.0 +0.0 -0.0)
+                       (v128.const f32x4 -0.0 +0.0 +0.0 -0.0))
+               (either (v128.const f32x4 -0.0 -0.0 +0.0 -0.0)
+                       (v128.const f32x4 +0.0 -0.0 +0.0 -0.0)
+                       (v128.const f32x4 -0.0 +0.0 +0.0 -0.0)
+                       (v128.const f32x4 -0.0 -0.0 +0.0 -0.0)))
+
+(assert_return (invoke "f32x4.relaxed_max"
+                       (v128.const f32x4 -nan nan 0 0)
+                       (v128.const f32x4 0 0 -nan nan))
+               (either (v128.const f32x4 nan:canonical nan:canonical nan:canonical nan:canonical)
+                       (v128.const f32x4 nan:canonical nan:canonical 0 0)
+                       (v128.const f32x4 0 0 nan:canonical nan:canonical)
+                       (v128.const f32x4 0 0 0 0)))
+
+(assert_return (invoke "f32x4.relaxed_max"
+                       (v128.const f32x4 +0.0 -0.0 +0.0 -0.0)
+                       (v128.const f32x4 -0.0 +0.0 +0.0 -0.0))
+               (either (v128.const f32x4 +0.0 +0.0 +0.0 -0.0)
+                       (v128.const f32x4 +0.0 -0.0 +0.0 -0.0)
+                       (v128.const f32x4 -0.0 +0.0 +0.0 -0.0)
+                       (v128.const f32x4 -0.0 -0.0 +0.0 -0.0)))
+
+(assert_return (invoke "f64x2.relaxed_min"
+                       (v128.const f64x2 -nan nan)
+                       (v128.const f64x2 0 0))
+               (either (v128.const f64x2 nan:canonical nan:canonical)
+                       (v128.const f64x2 nan:canonical nan:canonical)
+                       (v128.const f64x2 0 0)
+                       (v128.const f64x2 0 0)))
+
+(assert_return (invoke "f64x2.relaxed_min"
+                       (v128.const f64x2 0 0)
+                       (v128.const f64x2 -nan nan))
+               (either (v128.const f64x2 nan:canonical nan:canonical)
+                       (v128.const f64x2 0 0)
+                       (v128.const f64x2 nan:canonical nan:canonical)
+                       (v128.const f64x2 0 0)))
+
+(assert_return (invoke "f64x2.relaxed_min"
+                       (v128.const f64x2 +0.0 -0.0)
+                       (v128.const f64x2 -0.0 +0.0))
+               (either (v128.const f64x2 -0.0 -0.0)
+                       (v128.const f64x2 +0.0 -0.0)
+                       (v128.const f64x2 -0.0 +0.0)
+                       (v128.const f64x2 -0.0 -0.0)))
+
+(assert_return (invoke "f64x2.relaxed_min"
+                       (v128.const f64x2 +0.0 -0.0)
+                       (v128.const f64x2 +0.0 -0.0))
+               (either (v128.const f64x2 +0.0 -0.0)
+                       (v128.const f64x2 +0.0 -0.0)
+                       (v128.const f64x2 +0.0 -0.0)
+                       (v128.const f64x2 +0.0 -0.0)))
+
+(assert_return (invoke "f64x2.relaxed_max"
+                       (v128.const f64x2 -nan nan)
+                       (v128.const f64x2 0 0))
+               (either (v128.const f64x2 nan:canonical nan:canonical)
+                       (v128.const f64x2 nan:canonical nan:canonical)
+                       (v128.const f64x2 0 0)
+                       (v128.const f64x2 0 0)))
+
+(assert_return (invoke "f64x2.relaxed_max"
+                       (v128.const f64x2 0 0)
+                       (v128.const f64x2 -nan nan))
+               (either (v128.const f64x2 nan:canonical nan:canonical)
+                       (v128.const f64x2 0 0)
+                       (v128.const f64x2 nan:canonical nan:canonical)
+                       (v128.const f64x2 0 0)))
+
+(assert_return (invoke "f64x2.relaxed_max"
+                       (v128.const f64x2 +0.0 -0.0)
+                       (v128.const f64x2 -0.0 +0.0))
+               (either (v128.const f64x2 +0.0 +0.0)
+                       (v128.const f64x2 +0.0 -0.0)
+                       (v128.const f64x2 -0.0 +0.0)
+                       (v128.const f64x2 -0.0 -0.0)))
+
+(assert_return (invoke "f64x2.relaxed_max"
+                       (v128.const f64x2 +0.0 -0.0)
+                       (v128.const f64x2 +0.0 -0.0))
+               (either (v128.const f64x2 +0.0 -0.0)
+                       (v128.const f64x2 +0.0 -0.0)
+                       (v128.const f64x2 +0.0 -0.0)
+                       (v128.const f64x2 +0.0 -0.0)))
+
+;; Check that multiple calls to the relaxed instruction with same inputs returns same results.
+
+(assert_return (invoke "f32x4.relaxed_min_cmp"
+                       (v128.const f32x4 -nan nan 0 0)
+                       (v128.const f32x4 0 0 -nan nan))
+               (v128.const i32x4 -1 -1 -1 -1))
+
+(assert_return (invoke "f32x4.relaxed_min_cmp"
+                       (v128.const f32x4 +0.0 -0.0 +0.0 -0.0)
+                       (v128.const f32x4 -0.0 +0.0 +0.0 -0.0))
+               (v128.const i32x4 -1 -1 -1 -1))
+
+(assert_return (invoke "f32x4.relaxed_max_cmp"
+                       (v128.const f32x4 -nan nan 0 0)
+                       (v128.const f32x4 0 0 -nan nan))
+               (v128.const i32x4 -1 -1 -1 -1))
+
+(assert_return (invoke "f32x4.relaxed_max_cmp"
+                       (v128.const f32x4 +0.0 -0.0 +0.0 -0.0)
+                       (v128.const f32x4 -0.0 +0.0 +0.0 -0.0))
+               (v128.const i32x4 -1 -1 -1 -1))
+
+(assert_return (invoke "f64x2.relaxed_min_cmp"
+                       (v128.const f64x2 -nan nan)
+                       (v128.const f64x2 0 0))
+               (v128.const i64x2 -1 -1))
+
+(assert_return (invoke "f64x2.relaxed_min_cmp"
+                       (v128.const f64x2 0 0)
+                       (v128.const f64x2 -nan nan))
+               (v128.const i64x2 -1 -1))
+
+(assert_return (invoke "f64x2.relaxed_min_cmp"
+                       (v128.const f64x2 +0.0 -0.0)
+                       (v128.const f64x2 -0.0 +0.0))
+               (v128.const i64x2 -1 -1))
+
+(assert_return (invoke "f64x2.relaxed_min_cmp"
+                       (v128.const f64x2 +0.0 -0.0)
+                       (v128.const f64x2 +0.0 -0.0))
+               (v128.const i64x2 -1 -1))
+
+(assert_return (invoke "f64x2.relaxed_max_cmp"
+                       (v128.const f64x2 -nan nan)
+                       (v128.const f64x2 0 0))
+               (v128.const i64x2 -1 -1))
+
+(assert_return (invoke "f64x2.relaxed_max_cmp"
+                       (v128.const f64x2 0 0)
+                       (v128.const f64x2 -nan nan))
+               (v128.const i64x2 -1 -1))
+
+(assert_return (invoke "f64x2.relaxed_max_cmp"
+                       (v128.const f64x2 +0.0 -0.0)
+                       (v128.const f64x2 -0.0 +0.0))
+               (v128.const i64x2 -1 -1))
+
+(assert_return (invoke "f64x2.relaxed_max_cmp"
+                       (v128.const f64x2 +0.0 -0.0)
+                       (v128.const f64x2 +0.0 -0.0))
+               (v128.const i64x2 -1 -1))