Skip to content

Commit

Permalink
riscv64: Forbid mask/dst overlaps for masked instructions (bytecodeal…
Browse files Browse the repository at this point in the history
  • Loading branch information
afonso360 authored Sep 6, 2023
1 parent 36ca00c commit 6ca7493
Show file tree
Hide file tree
Showing 4 changed files with 1,012 additions and 13 deletions.
13 changes: 8 additions & 5 deletions cranelift/codegen/src/isa/riscv64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,7 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan

// If the operation forbids source/destination overlap we need to
// ensure that the source and destination registers are different.
if op.forbids_src_dst_overlaps() {
if op.forbids_overlaps(mask) {
collector.reg_late_use(vs2);
collector.reg_use(vd_src);
collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`.
Expand Down Expand Up @@ -745,7 +745,7 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
// If the operation forbids source/destination overlap, then we must
// register it as an early_def. This encodes the constraint that
// these must not overlap.
if op.forbids_src_dst_overlaps() {
if op.forbids_overlaps(mask) {
collector.reg_early_def(vd);
} else {
collector.reg_def(vd);
Expand All @@ -768,7 +768,7 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
// If the operation forbids source/destination overlap, then we must
// register it as an early_def. This encodes the constraint that
// these must not overlap.
if op.forbids_src_dst_overlaps() {
if op.forbids_overlaps(mask) {
collector.reg_early_def(vd);
} else {
collector.reg_def(vd);
Expand All @@ -791,16 +791,19 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
// If the operation forbids source/destination overlap, then we must
// register it as an early_def. This encodes the constraint that
// these must not overlap.
if op.forbids_src_dst_overlaps() {
if op.forbids_overlaps(mask) {
collector.reg_early_def(vd);
} else {
collector.reg_def(vd);
}

vec_mask_operands(mask, collector);
}
&Inst::VecAluRImm5 { vd, ref mask, .. } => {
&Inst::VecAluRImm5 {
op, vd, ref mask, ..
} => {
debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
debug_assert!(!op.forbids_overlaps(mask));

collector.reg_def(vd);
vec_mask_operands(mask, collector);
Expand Down
125 changes: 117 additions & 8 deletions cranelift/codegen/src/isa/riscv64/inst/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,13 @@ impl VecOpCategory {
}

impl VecOpMasking {
pub fn is_enabled(&self) -> bool {
match self {
VecOpMasking::Enabled { .. } => true,
VecOpMasking::Disabled => false,
}
}

pub fn encode(&self) -> u32 {
match self {
VecOpMasking::Enabled { .. } => 0,
Expand Down Expand Up @@ -300,6 +307,12 @@ impl VecAluOpRRRR {
}
}

impl VecInstOverlapInfo for VecAluOpRRRR {
fn forbids_src_dst_overlaps(&self) -> bool {
false
}
}

impl fmt::Display for VecAluOpRRRR {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut s = format!("{self:?}");
Expand Down Expand Up @@ -336,9 +349,10 @@ impl VecAluOpRRRImm5 {
VecAluOpRRRImm5::VslideupVI => true,
}
}
}

/// Some instructions do not allow the source and destination registers to overlap.
pub fn forbids_src_dst_overlaps(&self) -> bool {
impl VecInstOverlapInfo for VecAluOpRRRImm5 {
fn forbids_src_dst_overlaps(&self) -> bool {
match self {
VecAluOpRRRImm5::VslideupVI => true,
}
Expand Down Expand Up @@ -569,9 +583,10 @@ impl VecAluOpRRR {
_ => unreachable!(),
}
}
}

/// Some instructions do not allow the source and destination registers to overlap.
pub fn forbids_src_dst_overlaps(&self) -> bool {
impl VecInstOverlapInfo for VecAluOpRRR {
fn forbids_src_dst_overlaps(&self) -> bool {
match self {
VecAluOpRRR::VrgatherVV
| VecAluOpRRR::VrgatherVX
Expand All @@ -595,6 +610,37 @@ impl VecAluOpRRR {
_ => false,
}
}

// Only mask writing operations, and reduction operations (`vred*`) allow mask / dst overlaps.
fn forbids_mask_dst_overlaps(&self) -> bool {
match self {
VecAluOpRRR::VredmaxuVS
| VecAluOpRRR::VredminuVS
| VecAluOpRRR::VmandMM
| VecAluOpRRR::VmorMM
| VecAluOpRRR::VmnandMM
| VecAluOpRRR::VmnorMM
| VecAluOpRRR::VmseqVX
| VecAluOpRRR::VmsneVX
| VecAluOpRRR::VmsltuVX
| VecAluOpRRR::VmsltVX
| VecAluOpRRR::VmsleuVX
| VecAluOpRRR::VmsleVX
| VecAluOpRRR::VmsgtuVX
| VecAluOpRRR::VmsgtVX
| VecAluOpRRR::VmfeqVV
| VecAluOpRRR::VmfneVV
| VecAluOpRRR::VmfltVV
| VecAluOpRRR::VmfleVV
| VecAluOpRRR::VmfeqVF
| VecAluOpRRR::VmfneVF
| VecAluOpRRR::VmfltVF
| VecAluOpRRR::VmfleVF
| VecAluOpRRR::VmfgtVF
| VecAluOpRRR::VmfgeVF => false,
_ => true,
}
}
}

impl fmt::Display for VecAluOpRRR {
Expand Down Expand Up @@ -704,14 +750,28 @@ impl VecAluOpRRImm5 {
| VecAluOpRRImm5::VmsgtVI => false,
}
}
}

/// Some instructions do not allow the source and destination registers to overlap.
pub fn forbids_src_dst_overlaps(&self) -> bool {
impl VecInstOverlapInfo for VecAluOpRRImm5 {
fn forbids_src_dst_overlaps(&self) -> bool {
match self {
VecAluOpRRImm5::VrgatherVI => true,
_ => false,
}
}

// Only mask writing operations, and reduction operations (`vred*`) allow mask / dst overlaps.
fn forbids_mask_dst_overlaps(&self) -> bool {
match self {
VecAluOpRRImm5::VmseqVI
| VecAluOpRRImm5::VmsneVI
| VecAluOpRRImm5::VmsleuVI
| VecAluOpRRImm5::VmsleVI
| VecAluOpRRImm5::VmsgtuVI
| VecAluOpRRImm5::VmsgtVI => false,
_ => true,
}
}
}

impl fmt::Display for VecAluOpRRImm5 {
Expand Down Expand Up @@ -908,9 +968,10 @@ impl VecAluOpRR {
VecAluOpRR::VmvSX | VecAluOpRR::VmvVX => RegClass::Int,
}
}
}

/// Some instructions do not allow the source and destination registers to overlap.
pub fn forbids_src_dst_overlaps(&self) -> bool {
impl VecInstOverlapInfo for VecAluOpRR {
fn forbids_src_dst_overlaps(&self) -> bool {
match self {
VecAluOpRR::VzextVF2
| VecAluOpRR::VzextVF4
Expand Down Expand Up @@ -986,6 +1047,14 @@ impl VecAluOpRImm5 {
}
}

impl VecInstOverlapInfo for VecAluOpRImm5 {
fn forbids_src_dst_overlaps(&self) -> bool {
match self {
VecAluOpRImm5::VmvVI => false,
}
}
}

impl fmt::Display for VecAluOpRImm5 {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(match self {
Expand Down Expand Up @@ -1057,3 +1126,43 @@ impl VecAMode {
}
}
}

pub trait VecInstOverlapInfo {
/// § 5.2 Vector Operands states:
///
/// A destination vector register group can overlap a source vector register group
/// only if one of the following holds:
///
/// * The destination EEW equals the source EEW.
///
/// * The destination EEW is smaller than the source EEW and the overlap is
/// in the lowest-numbered part of the source register group (e.g., when LMUL=1,
/// vnsrl.wi v0, v0, 3 is legal, but a destination of v1 is not).
///
/// * The destination EEW is greater than the source EEW, the source EMUL is at
/// least 1, and the overlap is in the highest-numbered part of the destination register
/// group (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0, v2, or v4 is not).
///
/// For the purpose of determining register group overlap constraints, mask elements have EEW=1.
fn forbids_src_dst_overlaps(&self) -> bool;

/// § 5.3 Vector Masking states:
///
/// > The destination vector register group for a masked vector instruction
/// > cannot overlap the source mask register (v0), unless the destination
/// > vector register is being written with a mask value (e.g., compares) or
/// > the scalar result of a reduction. These instruction encodings are reserved.
///
/// In almost all instructions we should not allow the mask to be re-used as
/// a destination register.
fn forbids_mask_dst_overlaps(&self) -> bool {
true
}

/// There are two broad categories of overlaps (see above). But we can't represent such
/// fine grained overlaps to regalloc. So if any of the two come into play we forbid
/// all source and destination overlaps (including masks).
fn forbids_overlaps(&self, mask: &VecOpMasking) -> bool {
self.forbids_src_dst_overlaps() || (mask.is_enabled() && self.forbids_mask_dst_overlaps())
}
}
Loading

0 comments on commit 6ca7493

Please sign in to comment.