Skip to content

Commit

Permalink
riscv64: Implement a few misc SIMD instructions (bytecodealliance#6598)
Browse files Browse the repository at this point in the history
* riscv64: Add immediate rule to `gen_vec_mask`

* riscv64: Implement `scalar_to_vector`

* riscv64: Implement vector `select`

* riscv64: Implement SIMD `iabs`

* wasmtime: Enable SIMD memory64 tests for riscv64

* cranelift: Update targets for `simd-select` tests
  • Loading branch information
afonso360 authored Jun 17, 2023
1 parent 728d0f5 commit 4756114
Show file tree
Hide file tree
Showing 13 changed files with 753 additions and 93 deletions.
9 changes: 0 additions & 9 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
return true;
}

// The memory64 testsuite has a single SIMD test that we don't pass yet.
if testname == "simd" && testsuite == "memory64" {
return true;
}

let known_failure = [
"canonicalize_nan",
"cvt_from_uint",
Expand All @@ -249,18 +244,14 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
"simd_i16x8_arith2",
"simd_i16x8_cmp",
"simd_i16x8_q15mulr_sat_s",
"simd_i32x4_arith2",
"simd_i32x4_cmp",
"simd_i32x4_trunc_sat_f32x4",
"simd_i32x4_trunc_sat_f64x2",
"simd_i64x2_arith2",
"simd_i64x2_cmp",
"simd_i8x16_arith2",
"simd_i8x16_cmp",
"simd_load",
"simd_load_zero",
"simd_splat",
"v128_select",
]
.contains(&testname);

Expand Down
12 changes: 0 additions & 12 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2673,18 +2673,6 @@
(gen_select_reg (IntCC.SignedGreaterThan) x y x y))


(decl lower_iabs (Type XReg) XReg)

; I64 and lower
; Generate the following code:
; sext.{b,h,w} a0, a0
; neg a1, a0
; max a0, a0, a1
(rule (lower_iabs (fits_in_64 ty) val)
(let ((extended XReg (sext val ty $I64))
(negated XReg (rv_neg extended)))
(max $I64 extended negated)))

(decl gen_trapif (XReg TrapCode) InstOutput)
(rule
(gen_trapif test trap_code)
Expand Down
11 changes: 7 additions & 4 deletions cranelift/codegen/src/isa/riscv64/inst_vector.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1006,14 +1006,17 @@


;; Build a vector mask from a u64
;; TODO: We should merge this with the `vconst` rules, and take advantage of
;; the other existing `vconst` rules. One example is using `vmv.v.i` which
;; can represent some of these masks.
;; TODO(#6571): We should merge this with the `vconst` rules, and take advantage of
;; the other existing `vconst` rules.
(decl gen_vec_mask (u64) VReg)

;; When the immediate fits in a 5-bit immediate, we can use `vmv.v.i` directly.
(rule 1 (gen_vec_mask (imm5_from_u64 imm))
(rv_vmv_vi imm (vstate_from_type $I64X2)))

;; Materialize the mask into an X register, and move it into the bottom of
;; the vector register.
(rule (gen_vec_mask mask)
(rule 0 (gen_vec_mask mask)
(rv_vmv_sx (imm $I64 mask) (vstate_from_type $I64X2)))


Expand Down
35 changes: 32 additions & 3 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1556,9 +1556,23 @@
(load_ra))

;;; Rules for `iabs` ;;;;;;;;;;;;;
(rule
(lower (has_type (fits_in_64 ty) (iabs x)))
(lower_iabs ty x))

;; I64 and lower
;; Generate the following code:
;; sext.{b,h,w} a0, a0
;; neg a1, a0
;; max a0, a0, a1
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (iabs x)))
(let ((extended XReg (sext x ty $I64))
(negated XReg (rv_neg extended)))
(max $I64 extended negated)))

;; For vectors we generate the same code, but with vector instructions
;; we can skip the sign extension, since the vector unit will only process
;; Element Sized chunks.
(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (iabs x)))
(let ((negated VReg (rv_vneg_v x (unmasked) ty)))
(rv_vmax_vv x negated (unmasked) ty)))

;;;; Rules for calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -1842,3 +1856,18 @@
(xor VReg (rv_vxor_vv x y (unmasked) ty))
(rhs VReg (rv_vssrl_vi xor one (unmasked) ty)))
(rv_vadd_vv lhs rhs (unmasked) ty)))

;;;; Rules for `scalar_to_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (scalar_to_vector x)))
(if (ty_vector_not_float ty))
(let ((zero VReg (rv_vmv_vx (zero_reg) ty))
(mask VReg (gen_vec_mask 1)))
(rv_vmerge_vxm zero x mask ty)))

(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (scalar_to_vector x)))
(if (ty_vector_float ty))
(let ((zero VReg (rv_vmv_vx (zero_reg) ty))
(elem VReg (rv_vfmv_sf x ty))
(mask VReg (gen_vec_mask 1)))
(rv_vmerge_vvm zero elem mask ty)))
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/riscv64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend>
} else {
vec![self.temp_writable_reg(I64), self.temp_writable_reg(I64)]
}
} else if ty.is_float() {
} else if ty.is_float() || ty.is_vector() {
vec![self.temp_writable_reg(ty)]
} else {
unimplemented!("ty:{:?}", ty)
Expand Down
166 changes: 166 additions & 0 deletions cranelift/filetests/filetests/isa/riscv64/simd-iabs.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v

function %iabs_i8x16(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iabs v0
return v1
}

; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vneg.v v4,v1 #avl=16, #vtype=(e8, m1, ta, ma)
; vmax.vv v6,v1,v4 #avl=16, #vtype=(e8, m1, ta, ma)
; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x42, 0x10, 0x0e
; .byte 0x57, 0x03, 0x12, 0x1e
; .byte 0x27, 0x03, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

function %iabs_i16x8(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iabs v0
return v1
}

; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vneg.v v4,v1 #avl=8, #vtype=(e16, m1, ta, ma)
; vmax.vv v6,v1,v4 #avl=8, #vtype=(e16, m1, ta, ma)
; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x70, 0x84, 0xcc
; .byte 0x57, 0x42, 0x10, 0x0e
; .byte 0x57, 0x03, 0x12, 0x1e
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x27, 0x03, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

function %iabs_i32x4(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iabs v0
return v1
}

; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vneg.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma)
; vmax.vv v6,v1,v4 #avl=4, #vtype=(e32, m1, ta, ma)
; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x42, 0x10, 0x0e
; .byte 0x57, 0x03, 0x12, 0x1e
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x27, 0x03, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

function %iabs_i64x2(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = iabs v0
return v1
}

; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vneg.v v4,v1 #avl=2, #vtype=(e64, m1, ta, ma)
; vmax.vv v6,v1,v4 #avl=2, #vtype=(e64, m1, ta, ma)
; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x70, 0x81, 0xcd
; .byte 0x57, 0x42, 0x10, 0x0e
; .byte 0x57, 0x03, 0x12, 0x1e
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x27, 0x03, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

Loading

0 comments on commit 4756114

Please sign in to comment.