From 3cab6443dd3aad5679111f0307381db2a3ed83d2 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Tue, 27 Jun 2023 02:15:01 +0100 Subject: [PATCH] riscv64: Implement SIMD `fcmp` (#6643) * riscv64: Add float vector mask instructions * riscv64: Add some vector mask bitwise instructions * riscv64: Implement SIMD `fcmp` * cranelift: Add SIMD `fcmp` testsuite --- build.rs | 2 - cranelift/codegen/src/isa/riscv64/inst/mod.rs | 3 + .../codegen/src/isa/riscv64/inst/vector.rs | 52 ++- .../codegen/src/isa/riscv64/inst_vector.isle | 232 +++++++++++++ cranelift/codegen/src/isa/riscv64/lower.isle | 6 +- .../filetests/isa/riscv64/simd-fcmp-eq.clif | 272 ++++++++++++++++ .../filetests/isa/riscv64/simd-fcmp-ge.clif | 272 ++++++++++++++++ .../filetests/isa/riscv64/simd-fcmp-gt.clif | 272 ++++++++++++++++ .../filetests/isa/riscv64/simd-fcmp-le.clif | 272 ++++++++++++++++ .../filetests/isa/riscv64/simd-fcmp-lt.clif | 272 ++++++++++++++++ .../filetests/isa/riscv64/simd-fcmp-ne.clif | 272 ++++++++++++++++ .../filetests/isa/riscv64/simd-fcmp-one.clif | 296 +++++++++++++++++ .../filetests/isa/riscv64/simd-fcmp-ord.clif | 304 ++++++++++++++++++ .../filetests/isa/riscv64/simd-fcmp-ueq.clif | 296 +++++++++++++++++ .../filetests/isa/riscv64/simd-fcmp-uge.clif | 284 ++++++++++++++++ .../filetests/isa/riscv64/simd-fcmp-ugt.clif | 284 ++++++++++++++++ .../filetests/isa/riscv64/simd-fcmp-ule.clif | 284 ++++++++++++++++ .../filetests/isa/riscv64/simd-fcmp-ult.clif | 284 ++++++++++++++++ .../filetests/isa/riscv64/simd-fcmp-uno.clif | 304 ++++++++++++++++++ .../filetests/runtests/simd-fcmp-eq.clif | 88 +++++ .../filetests/runtests/simd-fcmp-ge.clif | 86 +++++ .../filetests/runtests/simd-fcmp-gt.clif | 90 ++++++ .../filetests/runtests/simd-fcmp-le.clif | 76 +++++ .../filetests/runtests/simd-fcmp-lt.clif | 86 +++++ .../filetests/runtests/simd-fcmp-ne.clif | 78 +++++ .../filetests/runtests/simd-fcmp-one.clif | 75 +++++ .../filetests/runtests/simd-fcmp-ord.clif | 78 +++++ .../filetests/runtests/simd-fcmp-ueq.clif | 75 +++++ .../filetests/runtests/simd-fcmp-uge.clif | 75 +++++ .../filetests/runtests/simd-fcmp-ugt.clif | 75 +++++ .../filetests/runtests/simd-fcmp-ule.clif | 75 +++++ .../filetests/runtests/simd-fcmp-ult.clif | 75 +++++ .../filetests/runtests/simd-fcmp-uno.clif | 88 +++++ .../filetests/runtests/simd-fcmp.clif | 60 ---- 34 files changed, 5368 insertions(+), 75 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-eq.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ge.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-gt.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-le.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-lt.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ne.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-one.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ord.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ueq.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-uge.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ugt.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ule.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ult.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-fcmp-uno.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-eq.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-ge.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-gt.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-le.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-lt.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-ne.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-one.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-ord.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-ueq.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-uge.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-ugt.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-ule.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-ult.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp-uno.clif delete mode 100644 cranelift/filetests/filetests/runtests/simd-fcmp.clif diff --git a/build.rs b/build.rs index 96935c823edb..3c506a4ebecf 100644 --- a/build.rs +++ b/build.rs @@ -234,11 +234,9 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { "issue_3327_bnot_lowering", "simd_conversions", "simd_f32x4", - "simd_f32x4_cmp", "simd_f32x4_pmin_pmax", "simd_f32x4_rounding", "simd_f64x2", - "simd_f64x2_cmp", "simd_f64x2_pmin_pmax", "simd_f64x2_rounding", "simd_i32x4_trunc_sat_f32x4", diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index 93f3772a4676..239c479efbd4 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -1744,6 +1744,9 @@ impl Inst { (VecAluOpRRR::VfsgnjnVV, vs2, vs1) if vs2 == vs1 => { format!("vfneg.v {vd_s},{vs2_s}{mask} {vstate}") } + (VecAluOpRRR::VmnandMM, vs2, vs1) if vs2 == vs1 => { + format!("vmnot.m {vd_s},{vs2_s}{mask} {vstate}") + } _ => format!("{op} {vd_s},{vs2_s},{vs1_s}{mask} {vstate}"), } } diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index 69b485ac14aa..9fc038bade8d 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -357,14 +357,30 @@ impl VecAluOpRRR { VecAluOpRRR::VwaddWV | VecAluOpRRR::VwaddWX => 0b110101, VecAluOpRRR::VwsubuWV | VecAluOpRRR::VwsubuWX => 0b110110, VecAluOpRRR::VwsubWV | VecAluOpRRR::VwsubWX => 0b110111, - VecAluOpRRR::VmseqVV | VecAluOpRRR::VmseqVX => 0b011000, - VecAluOpRRR::VmsneVV | VecAluOpRRR::VmsneVX => 0b011001, - VecAluOpRRR::VmsltuVV | VecAluOpRRR::VmsltuVX => 0b011010, - VecAluOpRRR::VmsltVV | VecAluOpRRR::VmsltVX => 0b011011, - VecAluOpRRR::VmsleuVV | VecAluOpRRR::VmsleuVX => 0b011100, - VecAluOpRRR::VmsleVV | VecAluOpRRR::VmsleVX => 0b011101, - VecAluOpRRR::VmsgtuVX => 0b011110, - VecAluOpRRR::VmsgtVX => 0b011111, + VecAluOpRRR::VmseqVV + | VecAluOpRRR::VmseqVX + | VecAluOpRRR::VmfeqVV + | VecAluOpRRR::VmfeqVF => 0b011000, + VecAluOpRRR::VmsneVV + | VecAluOpRRR::VmsneVX + | VecAluOpRRR::VmfleVV + | VecAluOpRRR::VmfleVF + | VecAluOpRRR::VmandMM => 0b011001, + VecAluOpRRR::VmsltuVV | VecAluOpRRR::VmsltuVX | VecAluOpRRR::VmorMM => 0b011010, + VecAluOpRRR::VmsltVV + | VecAluOpRRR::VmsltVX + | VecAluOpRRR::VmfltVV + | VecAluOpRRR::VmfltVF => 0b011011, + VecAluOpRRR::VmsleuVV + | VecAluOpRRR::VmsleuVX + | VecAluOpRRR::VmfneVV + | VecAluOpRRR::VmfneVF => 0b011100, + VecAluOpRRR::VmsleVV + | VecAluOpRRR::VmsleVX + | VecAluOpRRR::VmfgtVF + | VecAluOpRRR::VmnandMM => 0b011101, + VecAluOpRRR::VmsgtuVX | VecAluOpRRR::VmnorMM => 0b011110, + VecAluOpRRR::VmsgtVX | VecAluOpRRR::VmfgeVF => 0b011111, } } @@ -408,7 +424,11 @@ impl VecAluOpRRR { | VecAluOpRRR::VmulhuVV | VecAluOpRRR::VredmaxuVS | VecAluOpRRR::VredminuVS - | VecAluOpRRR::VcompressVM => VecOpCategory::OPMVV, + | VecAluOpRRR::VcompressVM + | VecAluOpRRR::VmandMM + | VecAluOpRRR::VmorMM + | VecAluOpRRR::VmnandMM + | VecAluOpRRR::VmnorMM => VecOpCategory::OPMVV, VecAluOpRRR::VwaddVX | VecAluOpRRR::VwadduVX | VecAluOpRRR::VwadduWX @@ -453,14 +473,24 @@ impl VecAluOpRRR { | VecAluOpRRR::VfsubVV | VecAluOpRRR::VfmulVV | VecAluOpRRR::VfdivVV - | VecAluOpRRR::VfsgnjnVV => VecOpCategory::OPFVV, + | VecAluOpRRR::VfsgnjnVV + | VecAluOpRRR::VmfeqVV + | VecAluOpRRR::VmfneVV + | VecAluOpRRR::VmfltVV + | VecAluOpRRR::VmfleVV => VecOpCategory::OPFVV, VecAluOpRRR::VfaddVF | VecAluOpRRR::VfsubVF | VecAluOpRRR::VfrsubVF | VecAluOpRRR::VfmulVF | VecAluOpRRR::VfdivVF | VecAluOpRRR::VfrdivVF - | VecAluOpRRR::VfmergeVFM => VecOpCategory::OPFVF, + | VecAluOpRRR::VfmergeVFM + | VecAluOpRRR::VmfeqVF + | VecAluOpRRR::VmfneVF + | VecAluOpRRR::VmfltVF + | VecAluOpRRR::VmfleVF + | VecAluOpRRR::VmfgtVF + | VecAluOpRRR::VmfgeVF => VecOpCategory::OPFVF, } } diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index 2a52f4f95803..a51e96cdf4d7 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -134,6 +134,14 @@ (VmsltVV) (VmsleuVV) (VmsleVV) + (VmfeqVV) + (VmfneVV) + (VmfltVV) + (VmfleVV) + (VmandMM) + (VmorMM) + (VmnandMM) + (VmnorMM) ;; Vector-Scalar Opcodes @@ -184,6 +192,12 @@ (VmsleVX) (VmsgtuVX) (VmsgtVX) + (VmfeqVF) + (VmfneVF) + (VmfltVF) + (VmfleVF) + (VmfgtVF) + (VmfgeVF) )) @@ -1111,6 +1125,66 @@ (decl rv_vmsge_vv (VReg VReg VecOpMasking VState) VReg) (rule (rv_vmsge_vv vs2 vs1 mask vstate) (rv_vmsle_vv vs1 vs2 mask vstate)) +;; Helper for emitting the `vmfeq.vv` (Vector Mask Set If Float Equal) instruction. +(decl rv_vmfeq_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmfeq_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfeqVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfeq.vf` (Vector Mask Set If Float Equal) instruction. +(decl rv_vmfeq_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vmfeq_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfeqVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfne.vv` (Vector Mask Set If Float Not Equal) instruction. +(decl rv_vmfne_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmfne_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfneVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfne.vf` (Vector Mask Set If Float Not Equal) instruction. +(decl rv_vmfne_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vmfne_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfneVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmflt.vv` (Vector Mask Set If Float Less Than) instruction. +(decl rv_vmflt_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmflt_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfltVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmflt.vf` (Vector Mask Set If Float Less Than) instruction. +(decl rv_vmflt_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vmflt_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfltVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfle.vv` (Vector Mask Set If Float Less Than Or Equal) instruction. +(decl rv_vmfle_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmfle_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfleVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfle.vf` (Vector Mask Set If Float Less Than Or Equal) instruction. +(decl rv_vmfle_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vmfle_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfleVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfgt.vv` (Vector Mask Set If Float Greater Than) instruction. +;; This is an alias for `vmflt.vv` with the operands inverted. +(decl rv_vmfgt_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmfgt_vv vs2 vs1 mask vstate) (rv_vmflt_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmfgt.vf` (Vector Mask Set If Float Greater Than) instruction. +(decl rv_vmfgt_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vmfgt_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfgtVF) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmfge.vv` (Vector Mask Set If Float Greater Than Or Equal) instruction. +;; This is an alias for `vmfle.vv` with the operands inverted. +(decl rv_vmfge_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmfge_vv vs2 vs1 mask vstate) (rv_vmfle_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmfge.vf` (Vector Mask Set If Float Greater Than Or Equal) instruction. +(decl rv_vmfge_vf (VReg FReg VecOpMasking VState) VReg) +(rule (rv_vmfge_vf vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmfgeVF) vs2 vs1 mask vstate)) + ;; Helper for emitting the `vzext.vf2` instruction. ;; Zero-extend SEW/2 source to SEW destination (decl rv_vzext_vf2 (VReg VecOpMasking VState) VReg) @@ -1161,6 +1235,40 @@ (rule (rv_vnclipu_wi vs2 imm mask vstate) (vec_alu_rr_uimm5 (VecAluOpRRImm5.VnclipuWI) vs2 imm mask vstate)) +;; Helper for emitting the `vmand.mm` (Mask Bitwise AND) instruction. +;; +;; vd.mask[i] = vs2.mask[i] && vs1.mask[i] +(decl rv_vmand_mm (VReg VReg VState) VReg) +(rule (rv_vmand_mm vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.VmandMM) vs2 vs1 (unmasked) vstate)) + +;; Helper for emitting the `vmor.mm` (Mask Bitwise OR) instruction. +;; +;; vd.mask[i] = vs2.mask[i] || vs1.mask[i] +(decl rv_vmor_mm (VReg VReg VState) VReg) +(rule (rv_vmor_mm vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.VmorMM) vs2 vs1 (unmasked) vstate)) + +;; Helper for emitting the `vmnand.mm` (Mask Bitwise NAND) instruction. +;; +;; vd.mask[i] = !(vs2.mask[i] && vs1.mask[i]) +(decl rv_vmnand_mm (VReg VReg VState) VReg) +(rule (rv_vmnand_mm vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.VmnandMM) vs2 vs1 (unmasked) vstate)) + +;; Helper for emitting the `vmnot.m` (Mask Bitwise NOT) instruction. +;; This is an alias for `vmnand.mm vd, vs, vs` +;; +;; vd.mask[i] = !vs.mask[i] +(decl rv_vmnot_m (VReg VState) VReg) +(rule (rv_vmnot_m vs vstate) (rv_vmnand_mm vs vs vstate)) + +;; Helper for emitting the `vmnor.mm` (Mask Bitwise NOR) instruction. +;; +;; vd.mask[i] = !(vs2.mask[i] || vs1.mask[i]) +(decl rv_vmnor_mm (VReg VReg VState) VReg) +(rule (rv_vmnor_mm vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.VmnorMM) vs2 vs1 (unmasked) vstate)) ;;;; Multi-Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1378,3 +1486,127 @@ (rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) (replicated_imm5 x) y) (rv_vmsle_vi y x (unmasked) ty)) + + + +;; Builds a vector mask corresponding to the FloatCC operation. +(decl gen_fcmp_mask (Type FloatCC Value Value) VReg) + +;; FloatCC.Equal + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) x y) + (rv_vmfeq_vv x y (unmasked) ty)) + +(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) x (splat y)) + (rv_vmfeq_vf x y (unmasked) ty)) + +(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) (splat x) y) + (rv_vmfeq_vf y x (unmasked) ty)) + +;; FloatCC.NotEqual +;; Note: This is UnorderedNotEqual. It is the only unoredered comparison that is not named as such. + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) x y) + (rv_vmfne_vv x y (unmasked) ty)) + +(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) x (splat y)) + (rv_vmfne_vf x y (unmasked) ty)) + +(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) (splat x) y) + (rv_vmfne_vf y x (unmasked) ty)) + +;; FloatCC.LessThan + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) x y) + (rv_vmflt_vv x y (unmasked) ty)) + +(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) x (splat y)) + (rv_vmflt_vf x y (unmasked) ty)) + +(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) (splat x) y) + (rv_vmfgt_vf y x (unmasked) ty)) + +;; FloatCC.LessThanOrEqual + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) x y) + (rv_vmfle_vv x y (unmasked) ty)) + +(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) x (splat y)) + (rv_vmfle_vf x y (unmasked) ty)) + +(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) (splat x) y) + (rv_vmfge_vf y x (unmasked) ty)) + +;; FloatCC.GreaterThan + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) x y) + (rv_vmfgt_vv x y (unmasked) ty)) + +(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) x (splat y)) + (rv_vmfgt_vf x y (unmasked) ty)) + +(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) (splat x) y) + (rv_vmflt_vf y x (unmasked) ty)) + +;; FloatCC.GreaterThanOrEqual + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) x y) + (rv_vmfge_vv x y (unmasked) ty)) + +(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) x (splat y)) + (rv_vmfge_vf x y (unmasked) ty)) + +(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) (splat x) y) + (rv_vmfle_vf y x (unmasked) ty)) + +;; FloatCC.Ordered + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Ordered) x y) + (rv_vmand_mm + (gen_fcmp_mask ty (FloatCC.Equal) x x) + (gen_fcmp_mask ty (FloatCC.Equal) y y) + ty)) + +;; FloatCC.Unordered + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Unordered) x y) + (rv_vmor_mm + (gen_fcmp_mask ty (FloatCC.NotEqual) x x) + (gen_fcmp_mask ty (FloatCC.NotEqual) y y) + ty)) + +;; FloatCC.OrderedNotEqual + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.OrderedNotEqual) x y) + (rv_vmor_mm + (gen_fcmp_mask ty (FloatCC.LessThan) x y) + (gen_fcmp_mask ty (FloatCC.LessThan) y x) + ty)) + +;; FloatCC.UnorderedOrEqual + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrEqual) x y) + (rv_vmnor_mm + (gen_fcmp_mask ty (FloatCC.LessThan) x y) + (gen_fcmp_mask ty (FloatCC.LessThan) y x) + ty)) + +;; FloatCC.UnorderedOrGreaterThan + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrGreaterThan) x y) + (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.LessThanOrEqual) x y) ty)) + +;; FloatCC.UnorderedOrGreaterThanOrEqual + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrGreaterThanOrEqual) x y) + (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.LessThan) x y) ty)) + +;; FloatCC.UnorderedOrLessThan + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrLessThan) x y) + (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.GreaterThanOrEqual) x y) ty)) + +;; FloatCC.UnorderedOrLessThanOrEqual + +(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrLessThanOrEqual) x y) + (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.GreaterThan) x y) ty)) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index d8193998064c..bc46b8279323 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -1462,10 +1462,12 @@ ;;;;; Rules for `fcmp`;;;;;;;;; -(rule - (lower (fcmp cc x @ (value_type ty) y)) +(rule 0 (lower (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) (cmp_value (emit_fcmp cc ty x y))) +(rule 1 (lower (fcmp cc x @ (value_type (ty_vec_fits_in_register ty)) y)) + (gen_expand_mask ty (gen_fcmp_mask ty cc x y))) + ;;;;; Rules for `func_addr`;;;;;;;;; (rule (lower (func_addr (func_ref_data _ name _))) diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-eq.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-eq.clif new file mode 100644 index 000000000000..f6c6331fc17b --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-eq.clif @@ -0,0 +1,272 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_fcmp_eq_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp eq v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfeq.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x90, 0x11, 0x62 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_eq_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp eq v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfeq.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x50, 0x15, 0x62 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_eq_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp eq v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfeq.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x50, 0x15, 0x62 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_eq_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp eq v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfeq.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x90, 0x11, 0x62 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_eq_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp eq v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfeq.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x50, 0x15, 0x62 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_eq_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp eq v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfeq.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x50, 0x15, 0x62 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ge.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ge.clif new file mode 100644 index 000000000000..6cff3a6b9725 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ge.clif @@ -0,0 +1,272 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_fcmp_ge_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp ge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfle.vv v0,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x90, 0x30, 0x66 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_ge_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ge v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfge.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x50, 0x15, 0x7e +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_ge_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ge v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfle.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x50, 0x15, 0x66 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_ge_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp ge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfle.vv v0,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x90, 0x30, 0x66 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_ge_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ge v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfge.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x50, 0x15, 0x7e +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_ge_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ge v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfle.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x50, 0x15, 0x66 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-gt.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-gt.clif new file mode 100644 index 000000000000..25c967a3028c --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-gt.clif @@ -0,0 +1,272 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_fcmp_gt_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp gt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vv v0,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x90, 0x30, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_gt_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp gt v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfgt.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x50, 0x15, 0x76 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_gt_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp gt v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x50, 0x15, 0x6e +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_gt_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp gt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vv v0,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x90, 0x30, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_gt_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp gt v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfgt.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x50, 0x15, 0x76 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_gt_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp gt v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x50, 0x15, 0x6e +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-le.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-le.clif new file mode 100644 index 000000000000..52ff43c7e4e3 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-le.clif @@ -0,0 +1,272 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_fcmp_le_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp le v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfle.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x90, 0x11, 0x66 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_le_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp le v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfle.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x50, 0x15, 0x66 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_le_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp le v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfge.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x50, 0x15, 0x7e +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_le_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp le v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfle.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x90, 0x11, 0x66 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_le_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp le v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfle.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x50, 0x15, 0x66 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_le_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp le v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfge.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x50, 0x15, 0x7e +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-lt.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-lt.clif new file mode 100644 index 000000000000..f946cb1bec16 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-lt.clif @@ -0,0 +1,272 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_fcmp_lt_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp lt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x90, 0x11, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_lt_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp lt v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x50, 0x15, 0x6e +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_lt_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp lt v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfgt.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x50, 0x15, 0x76 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_lt_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp lt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x90, 0x11, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_lt_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp lt v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x50, 0x15, 0x6e +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_lt_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp lt v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfgt.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x50, 0x15, 0x76 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ne.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ne.clif new file mode 100644 index 000000000000..ef8f09e10532 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ne.clif @@ -0,0 +1,272 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_fcmp_ne_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp ne v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfne.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x90, 0x11, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_ne_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ne v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfne.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x50, 0x15, 0x72 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_ne_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ne v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfne.vf v0,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v7,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x50, 0x15, 0x72 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_ne_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp ne v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfne.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x90, 0x11, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_ne_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ne v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfne.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x50, 0x15, 0x72 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_ne_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ne v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfne.vf v0,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x50, 0x15, 0x72 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-one.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-one.clif new file mode 100644 index 000000000000..def2636720e4 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-one.clif @@ -0,0 +1,296 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_fcmp_one_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp one v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmflt.vv v8,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmor.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v12,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v14,v12,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x93, 0x11, 0x6e +; .byte 0x57, 0x94, 0x30, 0x6e +; .byte 0x57, 0x20, 0x64, 0x6a +; .byte 0x57, 0x36, 0x00, 0x5e +; .byte 0x57, 0xb7, 0xcf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x07, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_one_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp one v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmfgt.vf v7,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmor.mm v0,v5,v7 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v11,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v13,v11,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v13,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x6e +; .byte 0xd7, 0x53, 0x15, 0x76 +; .byte 0x57, 0xa0, 0x53, 0x6a +; .byte 0xd7, 0x35, 0x00, 0x5e +; .byte 0xd7, 0xb6, 0xbf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x06, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_one_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp one v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfgt.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmflt.vf v7,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmor.mm v0,v5,v7 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v11,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v13,v11,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v13,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x76 +; .byte 0xd7, 0x53, 0x15, 0x6e +; .byte 0x57, 0xa0, 0x53, 0x6a +; .byte 0xd7, 0x35, 0x00, 0x5e +; .byte 0xd7, 0xb6, 0xbf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x06, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_one_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp one v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmflt.vv v8,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmor.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v12,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v14,v12,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x93, 0x11, 0x6e +; .byte 0x57, 0x94, 0x30, 0x6e +; .byte 0x57, 0x20, 0x64, 0x6a +; .byte 0x57, 0x36, 0x00, 0x5e +; .byte 0x57, 0xb7, 0xcf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x07, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_one_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp one v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmfgt.vf v7,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmor.mm v0,v5,v7 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v11,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v13,v11,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v13,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x6e +; .byte 0xd7, 0x53, 0x15, 0x76 +; .byte 0x57, 0xa0, 0x53, 0x6a +; .byte 0xd7, 0x35, 0x00, 0x5e +; .byte 0xd7, 0xb6, 0xbf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x06, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_one_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp one v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfgt.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmflt.vf v7,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmor.mm v0,v5,v7 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v11,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v13,v11,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v13,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x76 +; .byte 0xd7, 0x53, 0x15, 0x6e +; .byte 0x57, 0xa0, 0x53, 0x6a +; .byte 0xd7, 0x35, 0x00, 0x5e +; .byte 0xd7, 0xb6, 0xbf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x06, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ord.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ord.clif new file mode 100644 index 000000000000..81ac612b3a8b --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ord.clif @@ -0,0 +1,304 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_fcmp_ord_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp ord v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfeq.vv v6,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmfeq.vv v8,v3,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmand.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v12,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v14,v12,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x93, 0x10, 0x62 +; .byte 0x57, 0x94, 0x31, 0x62 +; .byte 0x57, 0x20, 0x64, 0x66 +; .byte 0x57, 0x36, 0x00, 0x5e +; .byte 0x57, 0xb7, 0xcf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x07, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_ord_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ord v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vfmv.v.f v10,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmfeq.vv v6,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmfeq.vf v8,v10,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmand.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v12,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v14,v12,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x55, 0x05, 0x5e +; .byte 0x57, 0x93, 0x10, 0x62 +; .byte 0x57, 0x54, 0xa5, 0x62 +; .byte 0x57, 0x20, 0x64, 0x66 +; .byte 0x57, 0x36, 0x00, 0x5e +; .byte 0x57, 0xb7, 0xcf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x07, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_ord_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ord v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vfmv.v.f v10,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmfeq.vf v6,v10,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmfeq.vv v8,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmand.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v12,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v14,v12,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x55, 0x05, 0x5e +; .byte 0x57, 0x53, 0xa5, 0x62 +; .byte 0x57, 0x94, 0x10, 0x62 +; .byte 0x57, 0x20, 0x64, 0x66 +; .byte 0x57, 0x36, 0x00, 0x5e +; .byte 0x57, 0xb7, 0xcf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x07, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_ord_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp ord v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfeq.vv v6,v1,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmfeq.vv v8,v3,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmand.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v12,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v14,v12,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x93, 0x10, 0x62 +; .byte 0x57, 0x94, 0x31, 0x62 +; .byte 0x57, 0x20, 0x64, 0x66 +; .byte 0x57, 0x36, 0x00, 0x5e +; .byte 0x57, 0xb7, 0xcf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x07, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_ord_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ord v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vfmv.v.f v10,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmfeq.vv v6,v1,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmfeq.vf v8,v10,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmand.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v12,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v14,v12,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x55, 0x05, 0x5e +; .byte 0x57, 0x93, 0x10, 0x62 +; .byte 0x57, 0x54, 0xa5, 0x62 +; .byte 0x57, 0x20, 0x64, 0x66 +; .byte 0x57, 0x36, 0x00, 0x5e +; .byte 0x57, 0xb7, 0xcf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x07, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_ord_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ord v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vfmv.v.f v10,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmfeq.vf v6,v10,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmfeq.vv v8,v1,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmand.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v12,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v14,v12,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x55, 0x05, 0x5e +; .byte 0x57, 0x53, 0xa5, 0x62 +; .byte 0x57, 0x94, 0x10, 0x62 +; .byte 0x57, 0x20, 0x64, 0x66 +; .byte 0x57, 0x36, 0x00, 0x5e +; .byte 0x57, 0xb7, 0xcf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x07, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ueq.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ueq.clif new file mode 100644 index 000000000000..17df6ffa5a2b --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ueq.clif @@ -0,0 +1,296 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_fcmp_ueq_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp ueq v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmflt.vv v8,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmnor.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v12,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v14,v12,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x93, 0x11, 0x6e +; .byte 0x57, 0x94, 0x30, 0x6e +; .byte 0x57, 0x20, 0x64, 0x7a +; .byte 0x57, 0x36, 0x00, 0x5e +; .byte 0x57, 0xb7, 0xcf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x07, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_ueq_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ueq v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmfgt.vf v7,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmnor.mm v0,v5,v7 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v11,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v13,v11,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v13,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x6e +; .byte 0xd7, 0x53, 0x15, 0x76 +; .byte 0x57, 0xa0, 0x53, 0x7a +; .byte 0xd7, 0x35, 0x00, 0x5e +; .byte 0xd7, 0xb6, 0xbf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x06, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_ueq_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ueq v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfgt.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmflt.vf v7,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmnor.mm v0,v5,v7 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v11,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v13,v11,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v13,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x76 +; .byte 0xd7, 0x53, 0x15, 0x6e +; .byte 0x57, 0xa0, 0x53, 0x7a +; .byte 0xd7, 0x35, 0x00, 0x5e +; .byte 0xd7, 0xb6, 0xbf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x06, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_ueq_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp ueq v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmflt.vv v8,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmnor.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v12,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v14,v12,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x93, 0x11, 0x6e +; .byte 0x57, 0x94, 0x30, 0x6e +; .byte 0x57, 0x20, 0x64, 0x7a +; .byte 0x57, 0x36, 0x00, 0x5e +; .byte 0x57, 0xb7, 0xcf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x07, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_ueq_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ueq v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmfgt.vf v7,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmnor.mm v0,v5,v7 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v11,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v13,v11,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v13,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x6e +; .byte 0xd7, 0x53, 0x15, 0x76 +; .byte 0x57, 0xa0, 0x53, 0x7a +; .byte 0xd7, 0x35, 0x00, 0x5e +; .byte 0xd7, 0xb6, 0xbf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x06, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_ueq_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ueq v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfgt.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmflt.vf v7,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmnor.mm v0,v5,v7 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v11,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v13,v11,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v13,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x76 +; .byte 0xd7, 0x53, 0x15, 0x6e +; .byte 0x57, 0xa0, 0x53, 0x7a +; .byte 0xd7, 0x35, 0x00, 0x5e +; .byte 0xd7, 0xb6, 0xbf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x06, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-uge.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-uge.clif new file mode 100644 index 000000000000..26bdfa36e428 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-uge.clif @@ -0,0 +1,284 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_fcmp_uge_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp uge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmnot.m v0,v6 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v10,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v12,v10,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v12,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x93, 0x11, 0x6e +; .byte 0x57, 0x20, 0x63, 0x76 +; .byte 0x57, 0x35, 0x00, 0x5e +; .byte 0x57, 0xb6, 0xaf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x06, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_uge_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp uge v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmnot.m v0,v5 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v9,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v11,v9,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x6e +; .byte 0x57, 0xa0, 0x52, 0x76 +; .byte 0xd7, 0x34, 0x00, 0x5e +; .byte 0xd7, 0xb5, 0x9f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_uge_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp uge v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfgt.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmnot.m v0,v5 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v9,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v11,v9,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x76 +; .byte 0x57, 0xa0, 0x52, 0x76 +; .byte 0xd7, 0x34, 0x00, 0x5e +; .byte 0xd7, 0xb5, 0x9f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_uge_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp uge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmnot.m v0,v6 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v10,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v12,v10,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v12,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x93, 0x11, 0x6e +; .byte 0x57, 0x20, 0x63, 0x76 +; .byte 0x57, 0x35, 0x00, 0x5e +; .byte 0x57, 0xb6, 0xaf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x06, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_uge_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp uge v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmnot.m v0,v5 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v9,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v11,v9,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x6e +; .byte 0x57, 0xa0, 0x52, 0x76 +; .byte 0xd7, 0x34, 0x00, 0x5e +; .byte 0xd7, 0xb5, 0x9f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_uge_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp uge v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfgt.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmnot.m v0,v5 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v9,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v11,v9,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x76 +; .byte 0x57, 0xa0, 0x52, 0x76 +; .byte 0xd7, 0x34, 0x00, 0x5e +; .byte 0xd7, 0xb5, 0x9f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ugt.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ugt.clif new file mode 100644 index 000000000000..ff53c69d8621 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ugt.clif @@ -0,0 +1,284 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_fcmp_ugt_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp ugt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfle.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmnot.m v0,v6 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v10,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v12,v10,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v12,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x93, 0x11, 0x66 +; .byte 0x57, 0x20, 0x63, 0x76 +; .byte 0x57, 0x35, 0x00, 0x5e +; .byte 0x57, 0xb6, 0xaf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x06, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_ugt_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ugt v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfle.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmnot.m v0,v5 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v9,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v11,v9,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x66 +; .byte 0x57, 0xa0, 0x52, 0x76 +; .byte 0xd7, 0x34, 0x00, 0x5e +; .byte 0xd7, 0xb5, 0x9f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_ugt_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ugt v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfge.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmnot.m v0,v5 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v9,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v11,v9,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x7e +; .byte 0x57, 0xa0, 0x52, 0x76 +; .byte 0xd7, 0x34, 0x00, 0x5e +; .byte 0xd7, 0xb5, 0x9f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_ugt_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp ugt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfle.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmnot.m v0,v6 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v10,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v12,v10,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v12,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x93, 0x11, 0x66 +; .byte 0x57, 0x20, 0x63, 0x76 +; .byte 0x57, 0x35, 0x00, 0x5e +; .byte 0x57, 0xb6, 0xaf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x06, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_ugt_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ugt v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfle.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmnot.m v0,v5 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v9,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v11,v9,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x66 +; .byte 0x57, 0xa0, 0x52, 0x76 +; .byte 0xd7, 0x34, 0x00, 0x5e +; .byte 0xd7, 0xb5, 0x9f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_ugt_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ugt v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfge.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmnot.m v0,v5 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v9,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v11,v9,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x7e +; .byte 0x57, 0xa0, 0x52, 0x76 +; .byte 0xd7, 0x34, 0x00, 0x5e +; .byte 0xd7, 0xb5, 0x9f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ule.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ule.clif new file mode 100644 index 000000000000..e5cd8a93c901 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ule.clif @@ -0,0 +1,284 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_fcmp_ule_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp ule v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vv v6,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmnot.m v0,v6 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v10,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v12,v10,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v12,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x93, 0x30, 0x6e +; .byte 0x57, 0x20, 0x63, 0x76 +; .byte 0x57, 0x35, 0x00, 0x5e +; .byte 0x57, 0xb6, 0xaf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x06, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_ule_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ule v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfgt.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmnot.m v0,v5 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v9,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v11,v9,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x76 +; .byte 0x57, 0xa0, 0x52, 0x76 +; .byte 0xd7, 0x34, 0x00, 0x5e +; .byte 0xd7, 0xb5, 0x9f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_ule_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ule v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmnot.m v0,v5 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v9,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v11,v9,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x6e +; .byte 0x57, 0xa0, 0x52, 0x76 +; .byte 0xd7, 0x34, 0x00, 0x5e +; .byte 0xd7, 0xb5, 0x9f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_ule_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp ule v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vv v6,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmnot.m v0,v6 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v10,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v12,v10,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v12,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x93, 0x30, 0x6e +; .byte 0x57, 0x20, 0x63, 0x76 +; .byte 0x57, 0x35, 0x00, 0x5e +; .byte 0x57, 0xb6, 0xaf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x06, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_ule_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ule v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfgt.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmnot.m v0,v5 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v9,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v11,v9,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x76 +; .byte 0x57, 0xa0, 0x52, 0x76 +; .byte 0xd7, 0x34, 0x00, 0x5e +; .byte 0xd7, 0xb5, 0x9f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_ule_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ule v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmflt.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmnot.m v0,v5 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v9,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v11,v9,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x6e +; .byte 0x57, 0xa0, 0x52, 0x76 +; .byte 0xd7, 0x34, 0x00, 0x5e +; .byte 0xd7, 0xb5, 0x9f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ult.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ult.clif new file mode 100644 index 000000000000..dd8d9fb411fe --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-ult.clif @@ -0,0 +1,284 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_fcmp_ult_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp ult v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfle.vv v6,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmnot.m v0,v6 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v10,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v12,v10,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v12,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x93, 0x30, 0x66 +; .byte 0x57, 0x20, 0x63, 0x76 +; .byte 0x57, 0x35, 0x00, 0x5e +; .byte 0x57, 0xb6, 0xaf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x06, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_ult_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ult v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfge.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmnot.m v0,v5 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v9,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v11,v9,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x7e +; .byte 0x57, 0xa0, 0x52, 0x76 +; .byte 0xd7, 0x34, 0x00, 0x5e +; .byte 0xd7, 0xb5, 0x9f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_ult_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ult v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfle.vf v5,v1,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmnot.m v0,v5 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v9,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v11,v9,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x66 +; .byte 0x57, 0xa0, 0x52, 0x76 +; .byte 0xd7, 0x34, 0x00, 0x5e +; .byte 0xd7, 0xb5, 0x9f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_ult_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp ult v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfle.vv v6,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmnot.m v0,v6 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v10,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v12,v10,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v12,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x93, 0x30, 0x66 +; .byte 0x57, 0x20, 0x63, 0x76 +; .byte 0x57, 0x35, 0x00, 0x5e +; .byte 0x57, 0xb6, 0xaf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x06, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_ult_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ult v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfge.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmnot.m v0,v5 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v9,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v11,v9,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x7e +; .byte 0x57, 0xa0, 0x52, 0x76 +; .byte 0xd7, 0x34, 0x00, 0x5e +; .byte 0xd7, 0xb5, 0x9f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_ult_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ult v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfle.vf v5,v1,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmnot.m v0,v5 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v9,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v11,v9,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v11,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x52, 0x15, 0x66 +; .byte 0x57, 0xa0, 0x52, 0x76 +; .byte 0xd7, 0x34, 0x00, 0x5e +; .byte 0xd7, 0xb5, 0x9f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-uno.clif b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-uno.clif new file mode 100644 index 000000000000..1c62186204fc --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-fcmp-uno.clif @@ -0,0 +1,304 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_fcmp_uno_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp uno v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfne.vv v6,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmfne.vv v8,v3,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmor.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v12,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v14,v12,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x93, 0x10, 0x72 +; .byte 0x57, 0x94, 0x31, 0x72 +; .byte 0x57, 0x20, 0x64, 0x6a +; .byte 0x57, 0x36, 0x00, 0x5e +; .byte 0x57, 0xb7, 0xcf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x07, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_uno_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp uno v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vfmv.v.f v10,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmfne.vv v6,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmfne.vf v8,v10,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmor.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v12,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v14,v12,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x55, 0x05, 0x5e +; .byte 0x57, 0x93, 0x10, 0x72 +; .byte 0x57, 0x54, 0xa5, 0x72 +; .byte 0x57, 0x20, 0x64, 0x6a +; .byte 0x57, 0x36, 0x00, 0x5e +; .byte 0x57, 0xb7, 0xcf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x07, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_uno_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp uno v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vfmv.v.f v10,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmfne.vf v6,v10,fa0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmfne.vv v8,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmor.mm v0,v6,v8 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v12,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v14,v12,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x55, 0x05, 0x5e +; .byte 0x57, 0x53, 0xa5, 0x72 +; .byte 0x57, 0x94, 0x10, 0x72 +; .byte 0x57, 0x20, 0x64, 0x6a +; .byte 0x57, 0x36, 0x00, 0x5e +; .byte 0x57, 0xb7, 0xcf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x07, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_uno_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp uno v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmfne.vv v6,v1,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmfne.vv v8,v3,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmor.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v12,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v14,v12,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x93, 0x10, 0x72 +; .byte 0x57, 0x94, 0x31, 0x72 +; .byte 0x57, 0x20, 0x64, 0x6a +; .byte 0x57, 0x36, 0x00, 0x5e +; .byte 0x57, 0xb7, 0xcf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x07, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_rhs_uno_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp uno v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vfmv.v.f v10,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmfne.vv v6,v1,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmfne.vf v8,v10,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmor.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v12,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v14,v12,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x55, 0x05, 0x5e +; .byte 0x57, 0x93, 0x10, 0x72 +; .byte 0x57, 0x54, 0xa5, 0x72 +; .byte 0x57, 0x20, 0x64, 0x6a +; .byte 0x57, 0x36, 0x00, 0x5e +; .byte 0x57, 0xb7, 0xcf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x07, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_fcmp_splat_lhs_uno_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp uno v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vfmv.v.f v10,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmfne.vf v6,v10,fa0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmfne.vv v8,v1,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmor.mm v0,v6,v8 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v12,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v14,v12,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x55, 0x05, 0x5e +; .byte 0x57, 0x53, 0xa5, 0x72 +; .byte 0x57, 0x94, 0x10, 0x72 +; .byte 0x57, 0x20, 0x64, 0x6a +; .byte 0x57, 0x36, 0x00, 0x5e +; .byte 0x57, 0xb7, 0xcf, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x07, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-eq.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-eq.clif new file mode 100644 index 000000000000..fec7a57179d5 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fcmp-eq.clif @@ -0,0 +1,88 @@ +test run +target aarch64 +target s390x +target x86_64 has_sse3 has_ssse3 has_sse41 +target x86_64 has_sse3 has_ssse3 has_sse41 has_avx +target riscv64 has_v + +function %simd_fcmp_eq_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp eq v0, v1 + return v2 +} +; run: %simd_fcmp_eq_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [-1 0 0 0] +; run: %simd_fcmp_eq_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [-1 -1 0 0] +; run: %simd_fcmp_eq_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [-1 -1 0 -1] +; run: %simd_fcmp_eq_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [0 0 0 0] +; run: %simd_fcmp_eq_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [0 -1 0 0] + +function %simd_fcmp_splat_rhs_eq_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp eq v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_eq_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 0 0 0] +; run: %simd_fcmp_splat_rhs_eq_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_eq_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_eq_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_eq_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0] + + +function %simd_fcmp_splat_lhs_eq_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp eq v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_eq_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 0 0 0] +; run: %simd_fcmp_splat_lhs_eq_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_eq_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_eq_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_eq_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0] + +function %simd_fcmp_eq_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp eq v0, v1 + return v2 +} +; run: %simd_fcmp_eq_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [-1 0] +; run: %simd_fcmp_eq_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-1 -1] +; run: %simd_fcmp_eq_f64([+Inf +Inf], [-Inf +Inf]) == [0 -1] +; run: %simd_fcmp_eq_f64([-NaN NaN], [NaN NaN]) == [0 0] +; run: %simd_fcmp_eq_f64([NaN -0x0.0], [-NaN 0x0.0]) == [0 -1] + + +function %simd_fcmp_splat_rhs_eq_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp eq v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_eq_f64([0x0.5 0x1.5], 0x0.5) == [-1 0] +; run: %simd_fcmp_splat_rhs_eq_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1] +; run: %simd_fcmp_splat_rhs_eq_f64([+Inf +Inf], -Inf) == [0 0] +; run: %simd_fcmp_splat_rhs_eq_f64([-NaN NaN], NaN) == [0 0] +; run: %simd_fcmp_splat_rhs_eq_f64([NaN -0x0.0], -NaN) == [0 0] + +function %simd_fcmp_splat_lhs_eq_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp eq v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_eq_f64([0x0.5 0x1.5], 0x0.5) == [-1 0] +; run: %simd_fcmp_splat_lhs_eq_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1] +; run: %simd_fcmp_splat_lhs_eq_f64([+Inf +Inf], -Inf) == [0 0] +; run: %simd_fcmp_splat_lhs_eq_f64([-NaN NaN], NaN) == [0 0] +; run: %simd_fcmp_splat_lhs_eq_f64([NaN -0x0.0], -NaN) == [0 0] + +function %fcmp_eq_f32x4() -> i8 { +block0: + v0 = vconst.f32x4 [0.0 -0x4.2 0x0.33333 -0.0] + v1 = vconst.f32x4 [0.0 -0x4.2 0x0.33333 -0.0] + v2 = fcmp eq v0, v1 + v8 = vall_true v2 + return v8 +} +; run: %fcmp_eq_f32x4() == 1 diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-ge.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-ge.clif new file mode 100644 index 000000000000..02169c1ca6f4 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fcmp-ge.clif @@ -0,0 +1,86 @@ +test run +target aarch64 +target s390x +target x86_64 has_sse3 has_ssse3 has_sse41 +target x86_64 has_sse3 has_ssse3 has_sse41 has_avx +target riscv64 has_v + +function %simd_fcmp_ge_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp ge v0, v1 + return v2 +} +; run: %simd_fcmp_ge_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [-1 0 -1 0] +; run: %simd_fcmp_ge_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [-1 -1 0 -1] +; run: %simd_fcmp_ge_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [-1 -1 -1 -1] +; run: %simd_fcmp_ge_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [0 0 0 0] +; run: %simd_fcmp_ge_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [0 -1 0 -1] + +function %simd_fcmp_splat_rhs_ge_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ge v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_ge_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_ge_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_ge_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_ge_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_ge_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0] + +function %simd_fcmp_splat_lhs_ge_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ge v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_ge_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 0 0 0] +; run: %simd_fcmp_splat_lhs_ge_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_ge_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_ge_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_ge_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0] + +function %simd_fcmp_ge_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp ge v0, v1 + return v2 +} +; run: %simd_fcmp_ge_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [-1 0] +; run: %simd_fcmp_ge_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-1 -1] +; run: %simd_fcmp_ge_f64([+Inf +Inf], [-Inf +Inf]) == [-1 -1] +; run: %simd_fcmp_ge_f64([-NaN NaN], [NaN NaN]) == [0 0] +; run: %simd_fcmp_ge_f64([NaN -0x0.0], [-NaN 0x0.0]) == [0 -1] + +function %simd_fcmp_splat_rhs_ge_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ge v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_ge_f64([0x0.5 0x1.5], 0x0.5) == [-1 -1] +; run: %simd_fcmp_splat_rhs_ge_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1] +; run: %simd_fcmp_splat_rhs_ge_f64([+Inf +Inf], -Inf) == [-1 -1] +; run: %simd_fcmp_splat_rhs_ge_f64([-NaN NaN], NaN) == [0 0] +; run: %simd_fcmp_splat_rhs_ge_f64([NaN -0x0.0], -NaN) == [0 0] + +function %simd_fcmp_splat_lhs_ge_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ge v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_ge_f64([0x0.5 0x1.5], 0x0.5) == [-1 0] +; run: %simd_fcmp_splat_lhs_ge_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1] +; run: %simd_fcmp_splat_lhs_ge_f64([+Inf +Inf], -Inf) == [0 0] +; run: %simd_fcmp_splat_lhs_ge_f64([-NaN NaN], NaN) == [0 0] +; run: %simd_fcmp_splat_lhs_ge_f64([NaN -0x0.0], -NaN) == [0 0] + +function %fcmp_ge_f64x2() -> i8 { +block0: + v0 = vconst.f64x2 [0x0.0 0x4.2] + v1 = vconst.f64x2 [0.0 0x4.1] + v2 = fcmp ge v0, v1 + v8 = vall_true v2 + return v8 +} +; run: %fcmp_ge_f64x2() == 1 diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-gt.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-gt.clif new file mode 100644 index 000000000000..e5bcf192ac01 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fcmp-gt.clif @@ -0,0 +1,90 @@ +test run +target aarch64 +target s390x +target x86_64 has_sse3 has_ssse3 has_sse41 +target x86_64 has_sse3 has_ssse3 has_sse41 has_avx +target riscv64 has_v + +function %simd_fcmp_gt_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp gt v0, v1 + return v2 +} +; run: %simd_fcmp_gt_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [0 0 -1 0] +; run: %simd_fcmp_gt_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [0 0 0 -1] +; run: %simd_fcmp_gt_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [0 0 -1 0] +; run: %simd_fcmp_gt_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [0 0 0 0] +; run: %simd_fcmp_gt_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [0 0 0 -1] + +function %simd_fcmp_splat_rhs_gt_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp gt v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_gt_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_gt_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_gt_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_gt_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_gt_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0] + +function %simd_fcmp_splat_lhs_gt_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp gt v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_gt_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_gt_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_gt_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_gt_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_gt_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0] + +function %simd_fcmp_gt_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp gt v0, v1 + return v2 +} +; run: %simd_fcmp_gt_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [0 0] +; run: %simd_fcmp_gt_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [0 0] +; run: %simd_fcmp_gt_f64([+Inf +Inf], [-Inf +Inf]) == [-1 0] +; run: %simd_fcmp_gt_f64([-NaN NaN], [NaN NaN]) == [0 0] +; run: %simd_fcmp_gt_f64([NaN -0x0.0], [-NaN 0x0.0]) == [0 0] + +function %simd_fcmp_splat_rhs_gt_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp gt v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_gt_f64([0x0.5 0x1.5], 0x0.5) == [0 -1] +; run: %simd_fcmp_splat_rhs_gt_f64([0x0.0 -0x0.0], -0x0.0) == [0 0] +; run: %simd_fcmp_splat_rhs_gt_f64([+Inf +Inf], -Inf) == [-1 -1] +; run: %simd_fcmp_splat_rhs_gt_f64([-NaN NaN], NaN) == [0 0] +; run: %simd_fcmp_splat_rhs_gt_f64([NaN -0x0.0], -NaN) == [0 0] + +function %simd_fcmp_splat_lhs_gt_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp gt v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_gt_f64([0x0.5 0x1.5], 0x0.5) == [0 0] +; run: %simd_fcmp_splat_lhs_gt_f64([0x0.0 -0x0.0], -0x0.0) == [0 0] +; run: %simd_fcmp_splat_lhs_gt_f64([+Inf +Inf], -Inf) == [0 0] +; run: %simd_fcmp_splat_lhs_gt_f64([-NaN NaN], NaN) == [0 0] +; run: %simd_fcmp_splat_lhs_gt_f64([NaN -0x0.0], -NaN) == [0 0] + +function %fcmp_gt_nans_f32x4() -> i8 { +block0: + v0 = vconst.f32x4 [NaN 0x42.0 -NaN NaN] + v1 = vconst.f32x4 [NaN NaN 0x42.0 Inf] + v2 = fcmp gt v0, v1 + ; now check that the result v2 is all zeroes + v3 = vconst.i32x4 0x00 + v4 = bitcast.i32x4 v2 + v5 = icmp eq v3, v4 + v8 = vall_true v5 + return v8 +} +; run: %fcmp_gt_nans_f32x4() == 1 diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-le.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-le.clif new file mode 100644 index 000000000000..36739b1eb63a --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fcmp-le.clif @@ -0,0 +1,76 @@ +test run +target aarch64 +target s390x +target x86_64 has_sse3 has_ssse3 has_sse41 +target x86_64 has_sse3 has_ssse3 has_sse41 has_avx +target riscv64 has_v + +function %simd_fcmp_le_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp le v0, v1 + return v2 +} +; run: %simd_fcmp_le_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [-1 -1 0 -1] +; run: %simd_fcmp_le_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [-1 -1 -1 0] +; run: %simd_fcmp_le_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [-1 -1 0 -1] +; run: %simd_fcmp_le_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [0 0 0 0] +; run: %simd_fcmp_le_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [0 -1 -1 0] + +function %simd_fcmp_splat_rhs_le_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp le v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_le_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 0 0 0] +; run: %simd_fcmp_splat_rhs_le_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_le_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_le_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_le_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0] + +function %simd_fcmp_splat_lhs_le_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp le v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_le_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_le_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_le_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_le_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_le_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0] + +function %simd_fcmp_le_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp le v0, v1 + return v2 +} +; run: %simd_fcmp_le_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [-1 -1] +; run: %simd_fcmp_le_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-1 -1] +; run: %simd_fcmp_le_f64([+Inf +Inf], [-Inf +Inf]) == [0 -1] +; run: %simd_fcmp_le_f64([-NaN NaN], [NaN NaN]) == [0 0] +; run: %simd_fcmp_le_f64([NaN -0x0.0], [-NaN 0x0.0]) == [0 -1] + +function %simd_fcmp_splat_rhs_le_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp le v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_le_f64([0x0.5 0x1.5], 0x0.5) == [-1 0] +; run: %simd_fcmp_splat_rhs_le_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1] +; run: %simd_fcmp_splat_rhs_le_f64([+Inf +Inf], -Inf) == [0 0] +; run: %simd_fcmp_splat_rhs_le_f64([-NaN NaN], NaN) == [0 0] +; run: %simd_fcmp_splat_rhs_le_f64([NaN -0x0.0], -NaN) == [0 0] + +function %simd_fcmp_splat_lhs_le_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp le v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_le_f64([0x0.5 0x1.5], 0x0.5) == [-1 -1] +; run: %simd_fcmp_splat_lhs_le_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1] +; run: %simd_fcmp_splat_lhs_le_f64([+Inf +Inf], -Inf) == [-1 -1] +; run: %simd_fcmp_splat_lhs_le_f64([-NaN NaN], NaN) == [0 0] +; run: %simd_fcmp_splat_lhs_le_f64([NaN -0x0.0], -NaN) == [0 0] diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-lt.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-lt.clif new file mode 100644 index 000000000000..e15cd1f8e1a8 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fcmp-lt.clif @@ -0,0 +1,86 @@ +test run +target aarch64 +target s390x +target x86_64 has_sse3 has_ssse3 has_sse41 +target x86_64 has_sse3 has_ssse3 has_sse41 has_avx +target riscv64 has_v + +function %simd_fcmp_lt_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp lt v0, v1 + return v2 +} +; run: %simd_fcmp_lt_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [0 -1 0 -1] +; run: %simd_fcmp_lt_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [0 0 -1 0] +; run: %simd_fcmp_lt_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [0 0 0 0] +; run: %simd_fcmp_lt_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [0 0 0 0] +; run: %simd_fcmp_lt_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [0 0 -1 0] + +function %simd_fcmp_splat_rhs_lt_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp lt v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_lt_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_lt_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_lt_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_lt_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_lt_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0] + +function %simd_fcmp_splat_lhs_lt_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp lt v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_lt_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_lt_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_lt_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_lt_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_lt_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0] + +function %simd_fcmp_lt_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp lt v0, v1 + return v2 +} +; run: %simd_fcmp_lt_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [0 -1] +; run: %simd_fcmp_lt_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [0 0] +; run: %simd_fcmp_lt_f64([+Inf +Inf], [-Inf +Inf]) == [0 0] +; run: %simd_fcmp_lt_f64([-NaN NaN], [NaN NaN]) == [0 0] +; run: %simd_fcmp_lt_f64([NaN -0x0.0], [-NaN 0x0.0]) == [0 0] + +function %simd_fcmp_splat_rhs_lt_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp lt v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_lt_f64([0x0.5 0x1.5], 0x0.5) == [0 0] +; run: %simd_fcmp_splat_rhs_lt_f64([0x0.0 -0x0.0], -0x0.0) == [0 0] +; run: %simd_fcmp_splat_rhs_lt_f64([+Inf +Inf], -Inf) == [0 0] +; run: %simd_fcmp_splat_rhs_lt_f64([-NaN NaN], NaN) == [0 0] +; run: %simd_fcmp_splat_rhs_lt_f64([NaN -0x0.0], -NaN) == [0 0] + +function %simd_fcmp_splat_lhs_lt_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp lt v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_lt_f64([0x0.5 0x1.5], 0x0.5) == [0 -1] +; run: %simd_fcmp_splat_lhs_lt_f64([0x0.0 -0x0.0], -0x0.0) == [0 0] +; run: %simd_fcmp_splat_lhs_lt_f64([+Inf +Inf], -Inf) == [-1 -1] +; run: %simd_fcmp_splat_lhs_lt_f64([-NaN NaN], NaN) == [0 0] +; run: %simd_fcmp_splat_lhs_lt_f64([NaN -0x0.0], -NaN) == [0 0] + +function %fcmp_lt_f32x4() -> i8 { +block0: + v0 = vconst.f32x4 [0.0 -0x4.2 0x0.0 -0.0] + v1 = vconst.f32x4 [0x0.001 0x4.2 0x0.33333 0x1.0] + v2 = fcmp lt v0, v1 + v8 = vall_true v2 + return v8 +} +; run: %fcmp_lt_f32x4() == 1 diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-ne.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-ne.clif new file mode 100644 index 000000000000..0913bf3e206f --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fcmp-ne.clif @@ -0,0 +1,78 @@ +test run +target aarch64 +target s390x +target x86_64 has_sse3 has_ssse3 has_sse41 +target x86_64 has_sse3 has_ssse3 has_sse41 has_avx +target riscv64 has_v + +function %simd_fcmp_ne_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp ne v0, v1 + return v2 +} +; run: %simd_fcmp_ne_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [0 -1 -1 -1] +; run: %simd_fcmp_ne_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [0 0 -1 -1] +; run: %simd_fcmp_ne_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [0 0 -1 0] +; run: %simd_fcmp_ne_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [-1 -1 -1 -1] +; run: %simd_fcmp_ne_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [-1 0 -1 -1] + +function %simd_fcmp_splat_rhs_ne_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ne v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_ne_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_ne_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_ne_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_ne_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_ne_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1] + + +function %simd_fcmp_splat_lhs_ne_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ne v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_ne_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_ne_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_ne_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_ne_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_ne_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1] + +function %simd_fcmp_ne_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp ne v0, v1 + return v2 +} +; run: %simd_fcmp_ne_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [0 -1] +; run: %simd_fcmp_ne_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [0 0] +; run: %simd_fcmp_ne_f64([+Inf +Inf], [-Inf +Inf]) == [-1 0] +; run: %simd_fcmp_ne_f64([-NaN NaN], [NaN NaN]) == [-1 -1] +; run: %simd_fcmp_ne_f64([NaN -0x0.0], [-NaN 0x0.0]) == [-1 0] + + +function %simd_fcmp_splat_rhs_ne_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ne v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_ne_f64([0x0.5 0x1.5], 0x0.5) == [0 -1] +; run: %simd_fcmp_splat_rhs_ne_f64([0x0.0 -0x0.0], -0x0.0) == [0 0] +; run: %simd_fcmp_splat_rhs_ne_f64([+Inf +Inf], -Inf) == [-1 -1] +; run: %simd_fcmp_splat_rhs_ne_f64([-NaN NaN], NaN) == [-1 -1] +; run: %simd_fcmp_splat_rhs_ne_f64([NaN -0x0.0], -NaN) == [-1 -1] + +function %simd_fcmp_splat_lhs_ne_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ne v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_ne_f64([0x0.5 0x1.5], 0x0.5) == [0 -1] +; run: %simd_fcmp_splat_lhs_ne_f64([0x0.0 -0x0.0], -0x0.0) == [0 0] +; run: %simd_fcmp_splat_lhs_ne_f64([+Inf +Inf], -Inf) == [-1 -1] +; run: %simd_fcmp_splat_lhs_ne_f64([-NaN NaN], NaN) == [-1 -1] +; run: %simd_fcmp_splat_lhs_ne_f64([NaN -0x0.0], -NaN) == [-1 -1] diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-one.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-one.clif new file mode 100644 index 000000000000..d22daf95cbc7 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fcmp-one.clif @@ -0,0 +1,75 @@ +test run +target s390x +target riscv64 has_v + +function %simd_fcmp_one_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp one v0, v1 + return v2 +} +; run: %simd_fcmp_one_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [0 -1 -1 -1] +; run: %simd_fcmp_one_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [0 0 -1 -1] +; run: %simd_fcmp_one_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [0 0 -1 0] +; run: %simd_fcmp_one_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [0 0 0 0] +; run: %simd_fcmp_one_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [0 0 -1 -1] + +function %simd_fcmp_splat_rhs_one_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp one v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_one_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_one_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_one_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_one_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_one_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0] + + +function %simd_fcmp_splat_lhs_one_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp one v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_one_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_one_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_one_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_one_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_one_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0] + +function %simd_fcmp_one_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp one v0, v1 + return v2 +} +; run: %simd_fcmp_one_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [0 -1] +; run: %simd_fcmp_one_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [0 0] +; run: %simd_fcmp_one_f64([+Inf +Inf], [-Inf +Inf]) == [-1 0] +; run: %simd_fcmp_one_f64([-NaN NaN], [NaN NaN]) == [0 0] +; run: %simd_fcmp_one_f64([NaN -0x0.0], [-NaN 0x0.0]) == [0 0] + + +function %simd_fcmp_splat_rhs_one_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp one v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_one_f64([0x0.5 0x1.5], 0x0.5) == [0 -1] +; run: %simd_fcmp_splat_rhs_one_f64([0x0.0 -0x0.0], -0x0.0) == [0 0] +; run: %simd_fcmp_splat_rhs_one_f64([+Inf +Inf], -Inf) == [-1 -1] +; run: %simd_fcmp_splat_rhs_one_f64([-NaN NaN], NaN) == [0 0] +; run: %simd_fcmp_splat_rhs_one_f64([NaN -0x0.0], -NaN) == [0 0] + +function %simd_fcmp_splat_lhs_one_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp one v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_one_f64([0x0.5 0x1.5], 0x0.5) == [0 -1] +; run: %simd_fcmp_splat_lhs_one_f64([0x0.0 -0x0.0], -0x0.0) == [0 0] +; run: %simd_fcmp_splat_lhs_one_f64([+Inf +Inf], -Inf) == [-1 -1] +; run: %simd_fcmp_splat_lhs_one_f64([-NaN NaN], NaN) == [0 0] +; run: %simd_fcmp_splat_lhs_one_f64([NaN -0x0.0], -NaN) == [0 0] diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-ord.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-ord.clif new file mode 100644 index 000000000000..8b9a1af073de --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fcmp-ord.clif @@ -0,0 +1,78 @@ +test run +target aarch64 +target s390x +target x86_64 has_sse3 has_ssse3 has_sse41 +target x86_64 has_sse3 has_ssse3 has_sse41 has_avx +target riscv64 has_v + +function %simd_fcmp_ord_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp ord v0, v1 + return v2 +} +; run: %simd_fcmp_ord_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [-1 -1 -1 -1] +; run: %simd_fcmp_ord_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [-1 -1 -1 -1] +; run: %simd_fcmp_ord_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [-1 -1 -1 -1] +; run: %simd_fcmp_ord_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [0 0 0 0] +; run: %simd_fcmp_ord_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [0 -1 -1 -1] + +function %simd_fcmp_splat_rhs_ord_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ord v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_ord_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_ord_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_ord_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_ord_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_ord_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0] + + +function %simd_fcmp_splat_lhs_ord_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ord v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_ord_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_ord_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_ord_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_ord_f32([-NaN NaN -NaN NaN], NaN) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_ord_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [0 0 0 0] + +function %simd_fcmp_ord_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp ord v0, v1 + return v2 +} +; run: %simd_fcmp_ord_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [-1 -1] +; run: %simd_fcmp_ord_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-1 -1] +; run: %simd_fcmp_ord_f64([+Inf +Inf], [-Inf +Inf]) == [-1 -1] +; run: %simd_fcmp_ord_f64([-NaN NaN], [NaN NaN]) == [0 0] +; run: %simd_fcmp_ord_f64([NaN -0x0.0], [-NaN 0x0.0]) == [0 -1] + + +function %simd_fcmp_splat_rhs_ord_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ord v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_ord_f64([0x0.5 0x1.5], 0x0.5) == [-1 -1] +; run: %simd_fcmp_splat_rhs_ord_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1] +; run: %simd_fcmp_splat_rhs_ord_f64([+Inf +Inf], -Inf) == [-1 -1] +; run: %simd_fcmp_splat_rhs_ord_f64([-NaN NaN], NaN) == [0 0] +; run: %simd_fcmp_splat_rhs_ord_f64([NaN -0x0.0], -NaN) == [0 0] + +function %simd_fcmp_splat_lhs_ord_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ord v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_ord_f64([0x0.5 0x1.5], 0x0.5) == [-1 -1] +; run: %simd_fcmp_splat_lhs_ord_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1] +; run: %simd_fcmp_splat_lhs_ord_f64([+Inf +Inf], -Inf) == [-1 -1] +; run: %simd_fcmp_splat_lhs_ord_f64([-NaN NaN], NaN) == [0 0] +; run: %simd_fcmp_splat_lhs_ord_f64([NaN -0x0.0], -NaN) == [0 0] diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-ueq.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-ueq.clif new file mode 100644 index 000000000000..f248b6c268d0 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fcmp-ueq.clif @@ -0,0 +1,75 @@ +test run +target s390x +target riscv64 has_v + +function %simd_fcmp_ueq_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp ueq v0, v1 + return v2 +} +; run: %simd_fcmp_ueq_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [-1 0 0 0] +; run: %simd_fcmp_ueq_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [-1 -1 0 0] +; run: %simd_fcmp_ueq_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [-1 -1 0 -1] +; run: %simd_fcmp_ueq_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [-1 -1 -1 -1] +; run: %simd_fcmp_ueq_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [-1 -1 0 0] + +function %simd_fcmp_splat_rhs_ueq_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ueq v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_ueq_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 0 0 0] +; run: %simd_fcmp_splat_rhs_ueq_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_ueq_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_ueq_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_ueq_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1] + + +function %simd_fcmp_splat_lhs_ueq_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ueq v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_ueq_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 0 0 0] +; run: %simd_fcmp_splat_lhs_ueq_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_ueq_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_ueq_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_ueq_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1] + +function %simd_fcmp_ueq_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp ueq v0, v1 + return v2 +} +; run: %simd_fcmp_ueq_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [-1 0] +; run: %simd_fcmp_ueq_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-1 -1] +; run: %simd_fcmp_ueq_f64([+Inf +Inf], [-Inf +Inf]) == [0 -1] +; run: %simd_fcmp_ueq_f64([-NaN NaN], [NaN NaN]) == [-1 -1] +; run: %simd_fcmp_ueq_f64([NaN -0x0.0], [-NaN 0x0.0]) == [-1 -1] + + +function %simd_fcmp_splat_rhs_ueq_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ueq v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_ueq_f64([0x0.5 0x1.5], 0x0.5) == [-1 0] +; run: %simd_fcmp_splat_rhs_ueq_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1] +; run: %simd_fcmp_splat_rhs_ueq_f64([+Inf +Inf], -Inf) == [0 0] +; run: %simd_fcmp_splat_rhs_ueq_f64([-NaN NaN], NaN) == [-1 -1] +; run: %simd_fcmp_splat_rhs_ueq_f64([NaN -0x0.0], -NaN) == [-1 -1] + +function %simd_fcmp_splat_lhs_ueq_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ueq v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_ueq_f64([0x0.5 0x1.5], 0x0.5) == [-1 0] +; run: %simd_fcmp_splat_lhs_ueq_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1] +; run: %simd_fcmp_splat_lhs_ueq_f64([+Inf +Inf], -Inf) == [0 0] +; run: %simd_fcmp_splat_lhs_ueq_f64([-NaN NaN], NaN) == [-1 -1] +; run: %simd_fcmp_splat_lhs_ueq_f64([NaN -0x0.0], -NaN) == [-1 -1] diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-uge.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-uge.clif new file mode 100644 index 000000000000..c1c39d654945 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fcmp-uge.clif @@ -0,0 +1,75 @@ +test run +target s390x +target x86_64 has_sse3 has_ssse3 has_sse41 +target x86_64 has_sse3 has_ssse3 has_sse41 has_avx +target riscv64 has_v + +function %simd_fcmp_uge_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp uge v0, v1 + return v2 +} +; run: %simd_fcmp_uge_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [-1 0 -1 0] +; run: %simd_fcmp_uge_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [-1 -1 0 -1] +; run: %simd_fcmp_uge_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [-1 -1 -1 -1] +; run: %simd_fcmp_uge_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [-1 -1 -1 -1] +; run: %simd_fcmp_uge_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [-1 -1 0 -1] + +function %simd_fcmp_splat_rhs_uge_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp uge v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_uge_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_uge_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_uge_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_uge_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_uge_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1] + +function %simd_fcmp_splat_lhs_uge_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp uge v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_uge_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 0 0 0] +; run: %simd_fcmp_splat_lhs_uge_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_uge_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_uge_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_uge_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1] + +function %simd_fcmp_uge_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp uge v0, v1 + return v2 +} +; run: %simd_fcmp_uge_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [-1 0] +; run: %simd_fcmp_uge_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-1 -1] +; run: %simd_fcmp_uge_f64([+Inf +Inf], [-Inf +Inf]) == [-1 -1] +; run: %simd_fcmp_uge_f64([-NaN NaN], [NaN NaN]) == [-1 -1] +; run: %simd_fcmp_uge_f64([NaN -0x0.0], [-NaN 0x0.0]) == [-1 -1] + +function %simd_fcmp_splat_rhs_uge_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp uge v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_uge_f64([0x0.5 0x1.5], 0x0.5) == [-1 -1] +; run: %simd_fcmp_splat_rhs_uge_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1] +; run: %simd_fcmp_splat_rhs_uge_f64([+Inf +Inf], -Inf) == [-1 -1] +; run: %simd_fcmp_splat_rhs_uge_f64([-NaN NaN], NaN) == [-1 -1] +; run: %simd_fcmp_splat_rhs_uge_f64([NaN -0x0.0], -NaN) == [-1 -1] + +function %simd_fcmp_splat_lhs_uge_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp uge v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_uge_f64([0x0.5 0x1.5], 0x0.5) == [-1 0] +; run: %simd_fcmp_splat_lhs_uge_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1] +; run: %simd_fcmp_splat_lhs_uge_f64([+Inf +Inf], -Inf) == [0 0] +; run: %simd_fcmp_splat_lhs_uge_f64([-NaN NaN], NaN) == [-1 -1] +; run: %simd_fcmp_splat_lhs_uge_f64([NaN -0x0.0], -NaN) == [-1 -1] diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-ugt.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-ugt.clif new file mode 100644 index 000000000000..c82d54dc6011 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fcmp-ugt.clif @@ -0,0 +1,75 @@ +test run +target s390x +target x86_64 has_sse3 has_ssse3 has_sse41 +target x86_64 has_sse3 has_ssse3 has_sse41 has_avx +target riscv64 has_v + +function %simd_fcmp_ugt_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp ugt v0, v1 + return v2 +} +; run: %simd_fcmp_ugt_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [0 0 -1 0] +; run: %simd_fcmp_ugt_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [0 0 0 -1] +; run: %simd_fcmp_ugt_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [0 0 -1 0] +; run: %simd_fcmp_ugt_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [-1 -1 -1 -1] +; run: %simd_fcmp_ugt_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [-1 0 0 -1] + +function %simd_fcmp_splat_rhs_ugt_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ugt v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_ugt_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_ugt_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_ugt_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_ugt_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_ugt_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1] + +function %simd_fcmp_splat_lhs_ugt_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ugt v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_ugt_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_ugt_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_ugt_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_ugt_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_ugt_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1] + +function %simd_fcmp_ugt_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp ugt v0, v1 + return v2 +} +; run: %simd_fcmp_ugt_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [0 0] +; run: %simd_fcmp_ugt_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [0 0] +; run: %simd_fcmp_ugt_f64([+Inf +Inf], [-Inf +Inf]) == [-1 0] +; run: %simd_fcmp_ugt_f64([-NaN NaN], [NaN NaN]) == [-1 -1] +; run: %simd_fcmp_ugt_f64([NaN -0x0.0], [-NaN 0x0.0]) == [-1 0] + +function %simd_fcmp_splat_rhs_ugt_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ugt v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_ugt_f64([0x0.5 0x1.5], 0x0.5) == [0 -1] +; run: %simd_fcmp_splat_rhs_ugt_f64([0x0.0 -0x0.0], -0x0.0) == [0 0] +; run: %simd_fcmp_splat_rhs_ugt_f64([+Inf +Inf], -Inf) == [-1 -1] +; run: %simd_fcmp_splat_rhs_ugt_f64([-NaN NaN], NaN) == [-1 -1] +; run: %simd_fcmp_splat_rhs_ugt_f64([NaN -0x0.0], -NaN) == [-1 -1] + +function %simd_fcmp_splat_lhs_ugt_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ugt v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_ugt_f64([0x0.5 0x1.5], 0x0.5) == [0 0] +; run: %simd_fcmp_splat_lhs_ugt_f64([0x0.0 -0x0.0], -0x0.0) == [0 0] +; run: %simd_fcmp_splat_lhs_ugt_f64([+Inf +Inf], -Inf) == [0 0] +; run: %simd_fcmp_splat_lhs_ugt_f64([-NaN NaN], NaN) == [-1 -1] +; run: %simd_fcmp_splat_lhs_ugt_f64([NaN -0x0.0], -NaN) == [-1 -1] diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-ule.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-ule.clif new file mode 100644 index 000000000000..fc64250fe8a7 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fcmp-ule.clif @@ -0,0 +1,75 @@ +test run +target s390x +target x86_64 has_sse3 has_ssse3 has_sse41 +target x86_64 has_sse3 has_ssse3 has_sse41 has_avx +target riscv64 has_v + +function %simd_fcmp_ule_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp ule v0, v1 + return v2 +} +; run: %simd_fcmp_ule_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [-1 -1 0 -1] +; run: %simd_fcmp_ule_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [-1 -1 -1 0] +; run: %simd_fcmp_ule_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [-1 -1 0 -1] +; run: %simd_fcmp_ule_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [-1 -1 -1 -1] +; run: %simd_fcmp_ule_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [-1 -1 -1 0] + +function %simd_fcmp_splat_rhs_ule_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ule v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_ule_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 0 0 0] +; run: %simd_fcmp_splat_rhs_ule_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_ule_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_ule_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_ule_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1] + +function %simd_fcmp_splat_lhs_ule_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ule v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_ule_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_ule_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_ule_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_ule_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_ule_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1] + +function %simd_fcmp_ule_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp ule v0, v1 + return v2 +} +; run: %simd_fcmp_ule_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [-1 -1] +; run: %simd_fcmp_ule_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [-1 -1] +; run: %simd_fcmp_ule_f64([+Inf +Inf], [-Inf +Inf]) == [0 -1] +; run: %simd_fcmp_ule_f64([-NaN NaN], [NaN NaN]) == [-1 -1] +; run: %simd_fcmp_ule_f64([NaN -0x0.0], [-NaN 0x0.0]) == [-1 -1] + +function %simd_fcmp_splat_rhs_ule_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ule v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_ule_f64([0x0.5 0x1.5], 0x0.5) == [-1 0] +; run: %simd_fcmp_splat_rhs_ule_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1] +; run: %simd_fcmp_splat_rhs_ule_f64([+Inf +Inf], -Inf) == [0 0] +; run: %simd_fcmp_splat_rhs_ule_f64([-NaN NaN], NaN) == [-1 -1] +; run: %simd_fcmp_splat_rhs_ule_f64([NaN -0x0.0], -NaN) == [-1 -1] + +function %simd_fcmp_splat_lhs_ule_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ule v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_ule_f64([0x0.5 0x1.5], 0x0.5) == [-1 -1] +; run: %simd_fcmp_splat_lhs_ule_f64([0x0.0 -0x0.0], -0x0.0) == [-1 -1] +; run: %simd_fcmp_splat_lhs_ule_f64([+Inf +Inf], -Inf) == [-1 -1] +; run: %simd_fcmp_splat_lhs_ule_f64([-NaN NaN], NaN) == [-1 -1] +; run: %simd_fcmp_splat_lhs_ule_f64([NaN -0x0.0], -NaN) == [-1 -1] diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-ult.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-ult.clif new file mode 100644 index 000000000000..19ab6a4d9918 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fcmp-ult.clif @@ -0,0 +1,75 @@ +test run +target s390x +target x86_64 has_sse3 has_ssse3 has_sse41 +target x86_64 has_sse3 has_ssse3 has_sse41 has_avx +target riscv64 has_v + +function %simd_fcmp_ult_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp ult v0, v1 + return v2 +} +; run: %simd_fcmp_ult_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [0 -1 0 -1] +; run: %simd_fcmp_ult_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [0 0 -1 0] +; run: %simd_fcmp_ult_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [0 0 0 0] +; run: %simd_fcmp_ult_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [-1 -1 -1 -1] +; run: %simd_fcmp_ult_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [-1 0 -1 0] + +function %simd_fcmp_splat_rhs_ult_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ult v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_ult_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_ult_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_ult_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_ult_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_ult_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1] + +function %simd_fcmp_splat_lhs_ult_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp ult v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_ult_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_ult_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_ult_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_ult_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_ult_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1] + +function %simd_fcmp_ult_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp ult v0, v1 + return v2 +} +; run: %simd_fcmp_ult_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [0 -1] +; run: %simd_fcmp_ult_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [0 0] +; run: %simd_fcmp_ult_f64([+Inf +Inf], [-Inf +Inf]) == [0 0] +; run: %simd_fcmp_ult_f64([-NaN NaN], [NaN NaN]) == [-1 -1] +; run: %simd_fcmp_ult_f64([NaN -0x0.0], [-NaN 0x0.0]) == [-1 0] + +function %simd_fcmp_splat_rhs_ult_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ult v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_ult_f64([0x0.5 0x1.5], 0x0.5) == [0 0] +; run: %simd_fcmp_splat_rhs_ult_f64([0x0.0 -0x0.0], -0x0.0) == [0 0] +; run: %simd_fcmp_splat_rhs_ult_f64([+Inf +Inf], -Inf) == [0 0] +; run: %simd_fcmp_splat_rhs_ult_f64([-NaN NaN], NaN) == [-1 -1] +; run: %simd_fcmp_splat_rhs_ult_f64([NaN -0x0.0], -NaN) == [-1 -1] + +function %simd_fcmp_splat_lhs_ult_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp ult v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_ult_f64([0x0.5 0x1.5], 0x0.5) == [0 -1] +; run: %simd_fcmp_splat_lhs_ult_f64([0x0.0 -0x0.0], -0x0.0) == [0 0] +; run: %simd_fcmp_splat_lhs_ult_f64([+Inf +Inf], -Inf) == [-1 -1] +; run: %simd_fcmp_splat_lhs_ult_f64([-NaN NaN], NaN) == [-1 -1] +; run: %simd_fcmp_splat_lhs_ult_f64([NaN -0x0.0], -NaN) == [-1 -1] diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp-uno.clif b/cranelift/filetests/filetests/runtests/simd-fcmp-uno.clif new file mode 100644 index 000000000000..0181a9aa1c61 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-fcmp-uno.clif @@ -0,0 +1,88 @@ +test run +target aarch64 +target s390x +target x86_64 has_sse3 has_ssse3 has_sse41 +target x86_64 has_sse3 has_ssse3 has_sse41 has_avx +target riscv64 has_v + +function %simd_fcmp_uno_f32(f32x4, f32x4) -> i32x4 { +block0(v0: f32x4, v1: f32x4): + v2 = fcmp uno v0, v1 + return v2 +} +; run: %simd_fcmp_uno_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], [0x0.5 0x2.9 0x1.400000p1 0x1.800000p0]) == [0 0 0 0] +; run: %simd_fcmp_uno_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], [-0x0.0 0x0.0 +Inf -Inf]) == [0 0 0 0] +; run: %simd_fcmp_uno_f32([-0x0.0 0x0.0 +Inf +Inf], [-0x0.0 0x0.0 -Inf +Inf]) == [0 0 0 0] +; run: %simd_fcmp_uno_f32([-NaN NaN -NaN NaN], [NaN NaN -NaN NaN]) == [-1 -1 -1 -1] +; run: %simd_fcmp_uno_f32([NaN -0x0.0 -Inf 0x1.0], [-NaN 0x0.0 +Inf -0x1.0]) == [-1 0 0 0] + +function %simd_fcmp_splat_rhs_uno_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp uno v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_uno_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_uno_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_uno_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0] +; run: %simd_fcmp_splat_rhs_uno_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_rhs_uno_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1] + + +function %simd_fcmp_splat_lhs_uno_f32(f32x4, f32) -> i32x4 { +block0(v0: f32x4, v1: f32): + v2 = splat.f32x4 v1 + v3 = fcmp uno v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_uno_f32([0x0.5 0x1.5 0x1.1p10 0x1.4cccccp0], 0x0.5) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_uno_f32([0x0.0 -0x0.0 -0x0.0 0x0.0], -0x0.0) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_uno_f32([-0x0.0 0x0.0 +Inf +Inf], -Inf) == [0 0 0 0] +; run: %simd_fcmp_splat_lhs_uno_f32([-NaN NaN -NaN NaN], NaN) == [-1 -1 -1 -1] +; run: %simd_fcmp_splat_lhs_uno_f32([NaN -0x0.0 -Inf 0x1.0], -NaN) == [-1 -1 -1 -1] + +function %simd_fcmp_uno_f64(f64x2, f64x2) -> i64x2 { +block0(v0: f64x2, v1: f64x2): + v2 = fcmp uno v0, v1 + return v2 +} +; run: %simd_fcmp_uno_f64([0x0.5 0x1.5], [0x0.5 0x2.9]) == [0 0] +; run: %simd_fcmp_uno_f64([0x0.0 -0x0.0], [-0x0.0 0x0.0]) == [0 0] +; run: %simd_fcmp_uno_f64([+Inf +Inf], [-Inf +Inf]) == [0 0] +; run: %simd_fcmp_uno_f64([-NaN NaN], [NaN NaN]) == [-1 -1] +; run: %simd_fcmp_uno_f64([NaN -0x0.0], [-NaN 0x0.0]) == [-1 0] + + +function %simd_fcmp_splat_rhs_uno_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp uno v0, v2 + return v3 +} +; run: %simd_fcmp_splat_rhs_uno_f64([0x0.5 0x1.5], 0x0.5) == [0 0] +; run: %simd_fcmp_splat_rhs_uno_f64([0x0.0 -0x0.0], -0x0.0) == [0 0] +; run: %simd_fcmp_splat_rhs_uno_f64([+Inf +Inf], -Inf) == [0 0] +; run: %simd_fcmp_splat_rhs_uno_f64([-NaN NaN], NaN) == [-1 -1] +; run: %simd_fcmp_splat_rhs_uno_f64([NaN -0x0.0], -NaN) == [-1 -1] + +function %simd_fcmp_splat_lhs_uno_f64(f64x2, f64) -> i64x2 { +block0(v0: f64x2, v1: f64): + v2 = splat.f64x2 v1 + v3 = fcmp uno v2, v0 + return v3 +} +; run: %simd_fcmp_splat_lhs_uno_f64([0x0.5 0x1.5], 0x0.5) == [0 0] +; run: %simd_fcmp_splat_lhs_uno_f64([0x0.0 -0x0.0], -0x0.0) == [0 0] +; run: %simd_fcmp_splat_lhs_uno_f64([+Inf +Inf], -Inf) == [0 0] +; run: %simd_fcmp_splat_lhs_uno_f64([-NaN NaN], NaN) == [-1 -1] +; run: %simd_fcmp_splat_lhs_uno_f64([NaN -0x0.0], -NaN) == [-1 -1] + +function %fcmp_uno_f64x2() -> i8 { +block0: + v0 = vconst.f64x2 [0.0 NaN] + v1 = vconst.f64x2 [NaN 0x4.1] + v2 = fcmp uno v0, v1 + v8 = vall_true v2 + return v8 +} +; run: %fcmp_uno_f64x2() == 1 diff --git a/cranelift/filetests/filetests/runtests/simd-fcmp.clif b/cranelift/filetests/filetests/runtests/simd-fcmp.clif deleted file mode 100644 index c48a8c975f92..000000000000 --- a/cranelift/filetests/filetests/runtests/simd-fcmp.clif +++ /dev/null @@ -1,60 +0,0 @@ -test run -target aarch64 -target s390x -target x86_64 has_sse3 has_ssse3 has_sse41 -target x86_64 has_sse3 has_ssse3 has_sse41 has_avx - - -function %fcmp_eq_f32x4() -> i8 { -block0: - v0 = vconst.f32x4 [0.0 -0x4.2 0x0.33333 -0.0] - v1 = vconst.f32x4 [0.0 -0x4.2 0x0.33333 -0.0] - v2 = fcmp eq v0, v1 - v8 = vall_true v2 - return v8 -} -; run: %fcmp_eq_f32x4() == 1 - -function %fcmp_lt_f32x4() -> i8 { -block0: - v0 = vconst.f32x4 [0.0 -0x4.2 0x0.0 -0.0] - v1 = vconst.f32x4 [0x0.001 0x4.2 0x0.33333 0x1.0] - v2 = fcmp lt v0, v1 - v8 = vall_true v2 - return v8 -} -; run: %fcmp_lt_f32x4() == 1 - -function %fcmp_ge_f64x2() -> i8 { -block0: - v0 = vconst.f64x2 [0x0.0 0x4.2] - v1 = vconst.f64x2 [0.0 0x4.1] - v2 = fcmp ge v0, v1 - v8 = vall_true v2 - return v8 -} -; run: %fcmp_ge_f64x2() == 1 - -function %fcmp_uno_f64x2() -> i8 { -block0: - v0 = vconst.f64x2 [0.0 NaN] - v1 = vconst.f64x2 [NaN 0x4.1] - v2 = fcmp uno v0, v1 - v8 = vall_true v2 - return v8 -} -; run: %fcmp_uno_f64x2() == 1 - -function %fcmp_gt_nans_f32x4() -> i8 { -block0: - v0 = vconst.f32x4 [NaN 0x42.0 -NaN NaN] - v1 = vconst.f32x4 [NaN NaN 0x42.0 Inf] - v2 = fcmp gt v0, v1 - ; now check that the result v2 is all zeroes - v3 = vconst.i32x4 0x00 - v4 = bitcast.i32x4 v2 - v5 = icmp eq v3, v4 - v8 = vall_true v5 - return v8 -} -; run: %fcmp_gt_nans_f32x4() == 1