From b05a09c06fe336518ca1bf7e5da6c4113374a06d Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Wed, 21 Jun 2023 11:36:52 +0100 Subject: [PATCH] riscv64: Implement SIMD `icmp` (#6609) These are implemented as a combination of two steps, mask generation and mask expansion. Our comparision rules only return their results as a mask register, so we need to expand the mask into lane sized elements. We have 20 (!) comparision instructions, nearly the full table of all IntCC codes in VV, VX and VI formats. However there are some holes in this table. They are: * `vmsltu.vi` * `vmslt.vi` * `vmsgtu.vv` * `vmsgt.vv` * `vmsgeu.*` * `vmsge.*` Most of these can be replaces with the inverted IntCC instruction, however this commit only implements the existing instructions without any inversion and the inverted VV versions of `sgtu`/`sgt`/`sgeu`/`sge` since we need them to get the full icmp functionality. I've split the actual mask expansion into it's own separate rule since we are going to need it for the `fcmp` rules as well. The instruction selection for `icmp` is on a separate rule simply because the rulse end up less verbose than if they were inlined directly into the `icmp` rule. --- build.rs | 4 - .../codegen/src/isa/riscv64/inst/vector.rs | 48 ++- .../codegen/src/isa/riscv64/inst_vector.isle | 254 +++++++++++- cranelift/codegen/src/isa/riscv64/lower.isle | 7 +- .../filetests/isa/riscv64/simd-icmp-eq.clif | 368 +++++++++++++++++ .../filetests/isa/riscv64/simd-icmp-ne.clif | 364 +++++++++++++++++ .../filetests/isa/riscv64/simd-icmp-sge.clif | 372 ++++++++++++++++++ .../filetests/isa/riscv64/simd-icmp-sgt.clif | 368 +++++++++++++++++ .../filetests/isa/riscv64/simd-icmp-sle.clif | 368 +++++++++++++++++ .../filetests/isa/riscv64/simd-icmp-slt.clif | 370 +++++++++++++++++ .../filetests/isa/riscv64/simd-icmp-uge.clif | 372 ++++++++++++++++++ .../filetests/isa/riscv64/simd-icmp-ugt.clif | 368 +++++++++++++++++ .../filetests/isa/riscv64/simd-icmp-ule.clif | 368 +++++++++++++++++ .../filetests/isa/riscv64/simd-icmp-ult.clif | 370 +++++++++++++++++ .../filetests/runtests/simd-icmp-eq.clif | 1 + .../filetests/runtests/simd-icmp-ne.clif | 1 + .../filetests/runtests/simd-icmp-sge.clif | 1 + .../filetests/runtests/simd-icmp-sgt.clif | 1 + .../filetests/runtests/simd-icmp-sle.clif | 1 + .../filetests/runtests/simd-icmp-slt.clif | 1 + .../filetests/runtests/simd-icmp-uge.clif | 1 + .../filetests/runtests/simd-icmp-ugt.clif | 1 + .../filetests/runtests/simd-icmp-ule.clif | 1 + .../filetests/runtests/simd-icmp-ult.clif | 1 + 24 files changed, 3999 insertions(+), 12 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-icmp-eq.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-icmp-ne.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-icmp-sge.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-icmp-sgt.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-icmp-sle.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-icmp-slt.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-icmp-uge.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-icmp-ugt.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-icmp-ule.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-icmp-ult.clif diff --git a/build.rs b/build.rs index 0c858e889390..96935c823edb 100644 --- a/build.rs +++ b/build.rs @@ -241,12 +241,8 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { "simd_f64x2_cmp", "simd_f64x2_pmin_pmax", "simd_f64x2_rounding", - "simd_i16x8_cmp", - "simd_i32x4_cmp", "simd_i32x4_trunc_sat_f32x4", "simd_i32x4_trunc_sat_f64x2", - "simd_i64x2_cmp", - "simd_i8x16_cmp", "simd_load", "simd_splat", ] diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index 2d0e83eb68e4..ab77ecbeaeb1 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -357,7 +357,14 @@ impl VecAluOpRRR { VecAluOpRRR::VwaddWV | VecAluOpRRR::VwaddWX => 0b110101, VecAluOpRRR::VwsubuWV | VecAluOpRRR::VwsubuWX => 0b110110, VecAluOpRRR::VwsubWV | VecAluOpRRR::VwsubWX => 0b110111, - VecAluOpRRR::VmsltVX => 0b011011, + VecAluOpRRR::VmseqVV | VecAluOpRRR::VmseqVX => 0b011000, + VecAluOpRRR::VmsneVV | VecAluOpRRR::VmsneVX => 0b011001, + VecAluOpRRR::VmsltuVV | VecAluOpRRR::VmsltuVX => 0b011010, + VecAluOpRRR::VmsltVV | VecAluOpRRR::VmsltVX => 0b011011, + VecAluOpRRR::VmsleuVV | VecAluOpRRR::VmsleuVX => 0b011100, + VecAluOpRRR::VmsleVV | VecAluOpRRR::VmsleVX => 0b011101, + VecAluOpRRR::VmsgtuVX => 0b011110, + VecAluOpRRR::VmsgtVX => 0b011111, } } @@ -381,7 +388,13 @@ impl VecAluOpRRR { | VecAluOpRRR::VmaxuVV | VecAluOpRRR::VmaxVV | VecAluOpRRR::VmergeVVM - | VecAluOpRRR::VrgatherVV => VecOpCategory::OPIVV, + | VecAluOpRRR::VrgatherVV + | VecAluOpRRR::VmseqVV + | VecAluOpRRR::VmsneVV + | VecAluOpRRR::VmsltuVV + | VecAluOpRRR::VmsltVV + | VecAluOpRRR::VmsleuVV + | VecAluOpRRR::VmsleVV => VecOpCategory::OPIVV, VecAluOpRRR::VwaddVV | VecAluOpRRR::VwaddWV | VecAluOpRRR::VwadduVV @@ -427,8 +440,15 @@ impl VecAluOpRRR { | VecAluOpRRR::VmaxVX | VecAluOpRRR::VslidedownVX | VecAluOpRRR::VmergeVXM + | VecAluOpRRR::VrgatherVX + | VecAluOpRRR::VmseqVX + | VecAluOpRRR::VmsneVX + | VecAluOpRRR::VmsltuVX | VecAluOpRRR::VmsltVX - | VecAluOpRRR::VrgatherVX => VecOpCategory::OPIVX, + | VecAluOpRRR::VmsleuVX + | VecAluOpRRR::VmsleVX + | VecAluOpRRR::VmsgtuVX + | VecAluOpRRR::VmsgtVX => VecOpCategory::OPIVX, VecAluOpRRR::VfaddVV | VecAluOpRRR::VfsubVV | VecAluOpRRR::VfmulVV @@ -522,6 +542,12 @@ impl VecAluOpRRImm5 { VecAluOpRRImm5::VsaddVI => 0b100001, VecAluOpRRImm5::VrgatherVI => 0b001100, VecAluOpRRImm5::VmvrV => 0b100111, + VecAluOpRRImm5::VmseqVI => 0b011000, + VecAluOpRRImm5::VmsneVI => 0b011001, + VecAluOpRRImm5::VmsleuVI => 0b011100, + VecAluOpRRImm5::VmsleVI => 0b011101, + VecAluOpRRImm5::VmsgtuVI => 0b011110, + VecAluOpRRImm5::VmsgtVI => 0b011111, } } @@ -541,7 +567,13 @@ impl VecAluOpRRImm5 { | VecAluOpRRImm5::VsadduVI | VecAluOpRRImm5::VsaddVI | VecAluOpRRImm5::VrgatherVI - | VecAluOpRRImm5::VmvrV => VecOpCategory::OPIVI, + | VecAluOpRRImm5::VmvrV + | VecAluOpRRImm5::VmseqVI + | VecAluOpRRImm5::VmsneVI + | VecAluOpRRImm5::VmsleuVI + | VecAluOpRRImm5::VmsleVI + | VecAluOpRRImm5::VmsgtuVI + | VecAluOpRRImm5::VmsgtVI => VecOpCategory::OPIVI, } } @@ -561,7 +593,13 @@ impl VecAluOpRRImm5 { | VecAluOpRRImm5::VxorVI | VecAluOpRRImm5::VmergeVIM | VecAluOpRRImm5::VsadduVI - | VecAluOpRRImm5::VsaddVI => false, + | VecAluOpRRImm5::VsaddVI + | VecAluOpRRImm5::VmseqVI + | VecAluOpRRImm5::VmsneVI + | VecAluOpRRImm5::VmsleuVI + | VecAluOpRRImm5::VmsleVI + | VecAluOpRRImm5::VmsgtuVI + | VecAluOpRRImm5::VmsgtVI => false, } } diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index fa8d08564b48..256f41ceab32 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -128,6 +128,13 @@ (VredminuVS) (VrgatherVV) (VcompressVM) + (VmseqVV) + (VmsneVV) + (VmsltuVV) + (VmsltVV) + (VmsleuVV) + (VmsleVV) + ;; Vector-Scalar Opcodes (VaddVX) @@ -169,7 +176,14 @@ (VmergeVXM) (VfmergeVFM) (VrgatherVX) + (VmseqVX) + (VmsneVX) + (VmsltuVX) (VmsltVX) + (VmsleuVX) + (VmsleVX) + (VmsgtuVX) + (VmsgtVX) )) @@ -199,6 +213,12 @@ ;; This opcode represents multiple instructions `vmv1r`/`vmv2r`/`vmv4r`/etc... ;; The immediate field specifies how many registers should be copied. (VmvrV) + (VmseqVI) + (VmsneVI) + (VmsleuVI) + (VmsleVI) + (VmsgtuVI) + (VmsgtVI) )) ;; Imm only ALU Ops @@ -969,11 +989,126 @@ (rule (rv_vcompress_vm vs2 vs1 vstate) (vec_alu_rrr (VecAluOpRRR.VcompressVM) vs2 vs1 (unmasked) vstate)) -;; Helper for emitting the `vmslt.vx` (Vector Mask Set Less Than) instruction. +;; Helper for emitting the `vmseq.vv` (Vector Mask Set If Equal) instruction. +(decl rv_vmseq_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmseq_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmseqVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmseq.vx` (Vector Mask Set If Equal) instruction. +(decl rv_vmseq_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmseq_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmseqVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmseq.vi` (Vector Mask Set If Equal) instruction. +(decl rv_vmseq_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmseq_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmseqVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsne.vv` (Vector Mask Set If Not Equal) instruction. +(decl rv_vmsne_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsne_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsneVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsne.vx` (Vector Mask Set If Not Equal) instruction. +(decl rv_vmsne_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsne_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsneVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsne.vi` (Vector Mask Set If Not Equal) instruction. +(decl rv_vmsne_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsne_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsneVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsltu.vv` (Vector Mask Set If Less Than, Unsigned) instruction. +(decl rv_vmsltu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsltu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsltuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsltu.vx` (Vector Mask Set If Less Than, Unsigned) instruction. +(decl rv_vmsltu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsltu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsltuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmslt.vv` (Vector Mask Set If Less Than) instruction. +(decl rv_vmslt_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmslt_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsltVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmslt.vx` (Vector Mask Set If Less Than) instruction. (decl rv_vmslt_vx (VReg XReg VecOpMasking VState) VReg) (rule (rv_vmslt_vx vs2 vs1 mask vstate) (vec_alu_rrr (VecAluOpRRR.VmsltVX) vs2 vs1 mask vstate)) +;; Helper for emitting the `vmsleu.vv` (Vector Mask Set If Less Than or Equal, Unsigned) instruction. +(decl rv_vmsleu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsleu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsleuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsleu.vx` (Vector Mask Set If Less Than or Equal, Unsigned) instruction. +(decl rv_vmsleu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsleu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsleuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsleu.vi` (Vector Mask Set If Less Than or Equal, Unsigned) instruction. +(decl rv_vmsleu_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsleu_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsleuVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsle.vv` (Vector Mask Set If Less Than or Equal) instruction. +(decl rv_vmsle_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsle_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsleVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsle.vx` (Vector Mask Set If Less Than or Equal) instruction. +(decl rv_vmsle_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsle_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsleVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsle.vi` (Vector Mask Set If Less Than or Equal) instruction. +(decl rv_vmsle_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsle_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsleVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsgt.vv` (Vector Mask Set If Greater Than, Unsigned) instruction. +;; This is an alias for `vmsltu.vv` with the operands inverted. +(decl rv_vmsgtu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsgtu_vv vs2 vs1 mask vstate) (rv_vmsltu_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmsgtu.vx` (Vector Mask Set If Greater Than, Unsigned) instruction. +(decl rv_vmsgtu_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsgtu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsgtuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsgtu.vi` (Vector Mask Set If Greater Than, Unsigned) instruction. +(decl rv_vmsgtu_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsgtu_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsgtuVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsgt.vv` (Vector Mask Set If Greater Than) instruction. +;; This is an alias for `vmslt.vv` with the operands inverted. +(decl rv_vmsgt_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsgt_vv vs2 vs1 mask vstate) (rv_vmslt_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmsgt.vx` (Vector Mask Set If Greater Than) instruction. +(decl rv_vmsgt_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vmsgt_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmsgtVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmsgt.vi` (Vector Mask Set If Greater Than) instruction. +(decl rv_vmsgt_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vmsgt_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsgtVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vmsgeu.vv` (Vector Mask Set If Greater Than or Equal, Unsigned) instruction. +;; This is an alias for `vmsleu.vv` with the operands inverted. +(decl rv_vmsgeu_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsgeu_vv vs2 vs1 mask vstate) (rv_vmsleu_vv vs1 vs2 mask vstate)) + +;; Helper for emitting the `vmsge.vv` (Vector Mask Set If Greater Than or Equal) instruction. +;; This is an alias for `vmsle.vv` with the operands inverted. +(decl rv_vmsge_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vmsge_vv vs2 vs1 mask vstate) (rv_vmsle_vv vs1 vs2 mask vstate)) + ;; Helper for emitting the `vzext.vf2` instruction. ;; Zero-extend SEW/2 source to SEW destination (decl rv_vzext_vf2 (VReg VecOpMasking VState) VReg) @@ -1078,3 +1213,120 @@ (rule 0 (gen_slidedown_half (ty_vec_fits_in_register ty) src) (if-let amt (u64_udiv (ty_lane_count ty) 2)) (rv_vslidedown_vx src (imm $I64 amt) (unmasked) ty)) + + +;; Expands a mask into SEW wide lanes. Enabled lanes are set to all ones, disabled +;; lanes are set to all zeros. +(decl gen_expand_mask (Type VReg) VReg) +(rule (gen_expand_mask ty mask) + (if-let zero (imm5_from_i8 0)) + (if-let neg1 (imm5_from_i8 -1)) + (rv_vmerge_vim (rv_vmv_vi zero ty) neg1 mask ty)) + + +;; Builds a vector mask corresponding to the IntCC operation. +(decl gen_icmp_mask (Type IntCC Value Value) VReg) + +;; IntCC.Equal + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x y) + (rv_vmseq_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x (splat y)) + (rv_vmseq_vx x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) (splat x) y) + (rv_vmseq_vx y x (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x (replicated_imm5 y)) + (rv_vmseq_vi x y (unmasked) ty)) + +(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) (replicated_imm5 x) y) + (rv_vmseq_vi y x (unmasked) ty)) + +;; IntCC.NotEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x y) + (rv_vmsne_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x (splat y)) + (rv_vmsne_vx x y (unmasked) ty)) + +(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) (splat x) y) + (rv_vmsne_vx y x (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x (replicated_imm5 y)) + (rv_vmsne_vi x y (unmasked) ty)) + +(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) (replicated_imm5 x) y) + (rv_vmsne_vi y x (unmasked) ty)) + +;; IntCC.UnsignedLessThan + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) x y) + (rv_vmsltu_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) x (splat y)) + (rv_vmsltu_vx x y (unmasked) ty)) + +;; IntCC.SignedLessThan + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) x y) + (rv_vmslt_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) x (splat y)) + (rv_vmslt_vx x y (unmasked) ty)) + +;; IntCC.UnsignedLessThanOrEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x y) + (rv_vmsleu_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x (splat y)) + (rv_vmsleu_vx x y (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x (replicated_imm5 y)) + (rv_vmsleu_vi x y (unmasked) ty)) + +;; IntCC.SignedLessThanOrEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x y) + (rv_vmsle_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x (splat y)) + (rv_vmsle_vx x y (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x (replicated_imm5 y)) + (rv_vmsle_vi x y (unmasked) ty)) + +;; IntCC.UnsignedGreaterThan + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x y) + (rv_vmsgtu_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x (splat y)) + (rv_vmsgtu_vx x y (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x (replicated_imm5 y)) + (rv_vmsgtu_vi x y (unmasked) ty)) + +;; IntCC.SignedGreaterThan + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x y) + (rv_vmsgt_vv x y (unmasked) ty)) + +(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x (splat y)) + (rv_vmsgt_vx x y (unmasked) ty)) + +(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x (replicated_imm5 y)) + (rv_vmsgt_vi x y (unmasked) ty)) + +;; IntCC.UnsignedGreaterThanOrEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThanOrEqual) x y) + (rv_vmsgeu_vv x y (unmasked) ty)) + +;; IntCC.SignedGreaterThanOrEqual + +(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) x y) + (rv_vmsge_vv x y (unmasked) ty)) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 6978d0785d8e..3c578e2abdef 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -1454,10 +1454,13 @@ result)) ;;;;; Rules for `icmp`;;;;;;;;; -(rule - (lower (icmp cc x @ (value_type ty) y)) +(rule 0 (lower (icmp cc x @ (value_type (ty_int ty)) y)) (lower_icmp cc x y ty)) +(rule 1 (lower (icmp cc x @ (value_type (ty_vec_fits_in_register ty)) y)) + (gen_expand_mask ty (gen_icmp_mask ty cc x y))) + + ;;;;; Rules for `fcmp`;;;;;;;;; (rule (lower (fcmp cc x @ (value_type ty) y)) diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-eq.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-eq.clif new file mode 100644 index 000000000000..3f6498ba6d38 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-eq.clif @@ -0,0 +1,368 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_eq_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp eq v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmseq.vv v0,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x11, 0x62 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_eq_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp eq v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmseq.vv v0,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x11, 0x62 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_eq_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp eq v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmseq.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x11, 0x62 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_eq_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp eq v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmseq.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x11, 0x62 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + + + + + +function %simd_icmp_splat_rhs_eq_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp eq v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmseq.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x62 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_eq_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp eq v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmseq.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x62 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_eq_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp eq v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmseq.vi v0,v1,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v6,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v8,v6,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x30, 0x15, 0x62 +; .byte 0x57, 0x33, 0x00, 0x5e +; .byte 0x57, 0xb4, 0x6f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_eq_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp eq v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmseq.vi v0,v1,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v6,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v8,v6,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x30, 0x15, 0x62 +; .byte 0x57, 0x33, 0x00, 0x5e +; .byte 0x57, 0xb4, 0x6f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ne.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ne.clif new file mode 100644 index 000000000000..e53a2b9a9b34 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ne.clif @@ -0,0 +1,364 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_ne_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp ne v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsne.vv v0,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x11, 0x66 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ne_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp ne v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsne.vv v0,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x11, 0x66 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ne_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp ne v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsne.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x11, 0x66 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ne_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp ne v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsne.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x11, 0x66 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_rhs_eq_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp ne v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsne.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x66 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_eq_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp ne v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsne.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x66 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_eq_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp ne v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsne.vi v0,v1,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v6,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v8,v6,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x30, 0x15, 0x66 +; .byte 0x57, 0x33, 0x00, 0x5e +; .byte 0x57, 0xb4, 0x6f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_eq_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp ne v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsne.vi v0,v1,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v6,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v8,v6,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x30, 0x15, 0x66 +; .byte 0x57, 0x33, 0x00, 0x5e +; .byte 0x57, 0xb4, 0x6f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-sge.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-sge.clif new file mode 100644 index 000000000000..2980f22d21f9 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-sge.clif @@ -0,0 +1,372 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_sge_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp sge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vv v0,v3,v1 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x30, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_sge_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp sge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vv v0,v3,v1 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x30, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_sge_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp sge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vv v0,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x30, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_sge_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp sge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vv v0,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x30, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_rhs_sge_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp sge v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsle.vv v0,v8,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x80, 0x80, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_sge_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp sge v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsle.vv v0,v1,v8 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x00, 0x14, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_sge_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp sge v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsle.vv v0,v7,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x70, 0x76 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_sge_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp sge v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsle.vv v0,v1,v7 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x13, 0x76 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-sgt.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-sgt.clif new file mode 100644 index 000000000000..7719b148c7cb --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-sgt.clif @@ -0,0 +1,368 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_sgt_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp sgt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmslt.vv v0,v3,v1 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x30, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_sgt_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp sgt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmslt.vv v0,v3,v1 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x30, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_sgt_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp sgt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmslt.vv v0,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x30, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_sgt_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp sgt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmslt.vv v0,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x30, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_rhs_sgt_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp sgt v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsgt.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x7e +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_sgt_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp sgt v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmslt.vv v0,v1,v8 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x00, 0x14, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_sgt_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp sgt v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsgt.vi v0,v1,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v6,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v8,v6,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x30, 0x15, 0x7e +; .byte 0x57, 0x33, 0x00, 0x5e +; .byte 0x57, 0xb4, 0x6f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_sgt_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp sgt v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmslt.vv v0,v1,v7 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x13, 0x6e +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-sle.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-sle.clif new file mode 100644 index 000000000000..fc1a573ce8d8 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-sle.clif @@ -0,0 +1,368 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_sle_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp sle v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vv v0,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x11, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_sle_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp sle v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vv v0,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x11, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_sle_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp sle v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x11, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_sle_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp sle v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x11, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_rhs_sle_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp sle v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x76 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_sle_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp sle v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsle.vv v0,v8,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x80, 0x80, 0x76 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_sle_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp sle v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsle.vi v0,v1,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v6,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v8,v6,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x30, 0x15, 0x76 +; .byte 0x57, 0x33, 0x00, 0x5e +; .byte 0x57, 0xb4, 0x6f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_sle_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp sle v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsle.vv v0,v7,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x70, 0x76 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-slt.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-slt.clif new file mode 100644 index 000000000000..b5f9ce01942b --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-slt.clif @@ -0,0 +1,370 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_slt_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp slt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmslt.vv v0,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x11, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_slt_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp slt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmslt.vv v0,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x11, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_slt_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp slt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmslt.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x11, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_slt_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp slt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmslt.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x11, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_rhs_slt_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp slt v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmslt.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x6e +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_slt_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp slt v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmslt.vv v0,v8,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x80, 0x80, 0x6e +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_slt_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp slt v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a4,10 +; vmslt.vx v0,v1,a4 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a4, zero, 0xa +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x17, 0x6e +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_slt_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp slt v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmslt.vv v0,v7,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x70, 0x6e +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-uge.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-uge.clif new file mode 100644 index 000000000000..92d0e41db531 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-uge.clif @@ -0,0 +1,372 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_uge_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp uge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vv v0,v3,v1 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x30, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_uge_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp uge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vv v0,v3,v1 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x30, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_uge_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp uge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vv v0,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x30, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_uge_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp uge v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vv v0,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x30, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_rhs_uge_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp uge v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsleu.vv v0,v8,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x80, 0x80, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_uge_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp uge v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsleu.vv v0,v1,v8 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x00, 0x14, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_uge_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp uge v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsleu.vv v0,v7,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x70, 0x72 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_uge_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp uge v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsleu.vv v0,v1,v7 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x13, 0x72 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ugt.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ugt.clif new file mode 100644 index 000000000000..c1de9d668a70 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ugt.clif @@ -0,0 +1,368 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_ugt_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp ugt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsltu.vv v0,v3,v1 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x30, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ugt_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp ugt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsltu.vv v0,v3,v1 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x30, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ugt_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp ugt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsltu.vv v0,v3,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x30, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ugt_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp ugt v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsltu.vv v0,v3,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x30, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_rhs_ugt_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp ugt v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsgtu.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x7a +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_ugt_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp ugt v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsltu.vv v0,v1,v8 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x00, 0x14, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_ugt_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp ugt v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsgtu.vi v0,v1,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v6,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v8,v6,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x30, 0x15, 0x7a +; .byte 0x57, 0x33, 0x00, 0x5e +; .byte 0x57, 0xb4, 0x6f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_ugt_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp ugt v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsltu.vv v0,v1,v7 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x13, 0x6a +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ule.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ule.clif new file mode 100644 index 000000000000..d1ed0549676e --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ule.clif @@ -0,0 +1,368 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_ule_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp ule v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vv v0,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x11, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ule_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp ule v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vv v0,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x11, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ule_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp ule v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x11, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ule_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp ule v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x11, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_rhs_ule_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp ule v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x72 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_ule_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp ule v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsleu.vv v0,v8,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x80, 0x80, 0x72 +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_ule_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp ule v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsleu.vi v0,v1,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v6,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v8,v6,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x30, 0x15, 0x72 +; .byte 0x57, 0x33, 0x00, 0x5e +; .byte 0x57, 0xb4, 0x6f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_ule_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp ule v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsleu.vv v0,v7,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x70, 0x72 +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ult.clif b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ult.clif new file mode 100644 index 000000000000..ceda96cc01ba --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-icmp-ult.clif @@ -0,0 +1,370 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %simd_icmp_ult_i8(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp ult v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsltu.vv v0,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v8,0 #avl=16, #vtype=(e8, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x80, 0x11, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ult_i16(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp ult v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsltu.vv v0,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vmv.v.i v8,0 #avl=8, #vtype=(e16, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x80, 0x11, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ult_i32(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp ult v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsltu.vv v0,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vmv.v.i v8,0 #avl=4, #vtype=(e32, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x80, 0x11, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_ult_i64(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = icmp ult v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsltu.vv v0,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x80, 0x11, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x05, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_rhs_ult_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp ult v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmsltu.vx v0,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x15, 0x6a +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x84, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_lhs_ult_i64(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = icmp ult v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v8,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsltu.vv v0,v8,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v8,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v10,v8,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v10,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x44, 0x05, 0x5e +; .byte 0x57, 0x80, 0x80, 0x6a +; .byte 0x57, 0x34, 0x00, 0x5e +; .byte 0x57, 0xb5, 0x8f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x85, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_rhs_ult_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp ult v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a4,10 +; vmsltu.vx v0,v1,a4 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a4, zero, 0xa +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x40, 0x17, 0x6a +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %simd_icmp_splat_const_lhs_ult_i64(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 10 + v2 = splat.i64x2 v1 + v3 = icmp ult v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v7,10 #avl=2, #vtype=(e64, m1, ta, ma) +; vmsltu.vv v0,v7,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vmv.v.i v7,0 #avl=2, #vtype=(e64, m1, ta, ma) +; vmerge.vim v9,v7,-1,v0.t #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v9,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x33, 0x05, 0x5e +; .byte 0x57, 0x80, 0x70, 0x6a +; .byte 0xd7, 0x33, 0x00, 0x5e +; .byte 0xd7, 0xb4, 0x7f, 0x5c +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-eq.clif b/cranelift/filetests/filetests/runtests/simd-icmp-eq.clif index f81d29cd30be..203b9bbc0c79 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-eq.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-eq.clif @@ -5,6 +5,7 @@ target x86_64 target x86_64 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_eq_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-ne.clif b/cranelift/filetests/filetests/runtests/simd-icmp-ne.clif index 0d1a548d2e71..3a28260e5dde 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-ne.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-ne.clif @@ -7,6 +7,7 @@ target x86_64 sse42 target x86_64 sse42 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_ne_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-sge.clif b/cranelift/filetests/filetests/runtests/simd-icmp-sge.clif index b6e8cdc1c81d..72a3e22b7e73 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-sge.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-sge.clif @@ -7,6 +7,7 @@ target x86_64 sse42 target x86_64 sse42 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_sge_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-sgt.clif b/cranelift/filetests/filetests/runtests/simd-icmp-sgt.clif index b6661c0fe374..56ac1fa63240 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-sgt.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-sgt.clif @@ -7,6 +7,7 @@ target x86_64 sse42 target x86_64 sse42 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_sgt_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-sle.clif b/cranelift/filetests/filetests/runtests/simd-icmp-sle.clif index d5c3acf56773..cb281e9a0eb5 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-sle.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-sle.clif @@ -7,6 +7,7 @@ target x86_64 sse42 target x86_64 sse42 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_sle_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-slt.clif b/cranelift/filetests/filetests/runtests/simd-icmp-slt.clif index 678e755f58c1..9bac309adc2c 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-slt.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-slt.clif @@ -7,6 +7,7 @@ target x86_64 sse42 target x86_64 sse42 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_slt_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-uge.clif b/cranelift/filetests/filetests/runtests/simd-icmp-uge.clif index b1095c4ebde9..074ef56ecbe3 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-uge.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-uge.clif @@ -7,6 +7,7 @@ target x86_64 sse42 target x86_64 sse42 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_uge_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-ugt.clif b/cranelift/filetests/filetests/runtests/simd-icmp-ugt.clif index 708e2bac71fd..04853480ad50 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-ugt.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-ugt.clif @@ -7,6 +7,7 @@ target x86_64 sse42 target x86_64 sse42 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_ugt_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-ule.clif b/cranelift/filetests/filetests/runtests/simd-icmp-ule.clif index 6b02fae5d035..699600c2c66d 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-ule.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-ule.clif @@ -7,6 +7,7 @@ target x86_64 sse42 target x86_64 sse42 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_ule_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-ult.clif b/cranelift/filetests/filetests/runtests/simd-icmp-ult.clif index 35ca0b7443fd..c936fe7a8d84 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-ult.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-ult.clif @@ -7,6 +7,7 @@ target x86_64 sse42 target x86_64 sse42 has_avx target aarch64 target s390x +target riscv64 has_v function %simd_icmp_ult_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16):