nak: Add a helper for reducing OpShfl lane and c immediates

Every back-end has code to mask these because the hardware only has
limited encoding space.  However, this can be done as a common
legalization operation and doing so means that our post-legalize IR
matches what actually gets encoded.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34678>
This commit is contained in:
Faith Ekstrand 2025-05-12 18:31:52 -04:00 committed by Marge Bot
parent 9890110856
commit 212f99d39d
5 changed files with 32 additions and 10 deletions

View file

@ -4712,6 +4712,24 @@ pub struct OpShfl {
pub op: ShflOp,
}
impl OpShfl {
/// Reduces the lane and c immediates, if any. The hardware only uses
/// some of the bits of `lane` and `c` and ignores the rest. This method
/// masks off the unused bits and ensures that any immediate values fit
/// in the limited encoding space in the instruction.
pub fn reduce_lane_c_imm(&mut self) {
debug_assert!(self.lane.src_mod.is_none());
if let SrcRef::Imm32(lane) = &mut self.lane.src_ref {
*lane &= 0x1f;
}
debug_assert!(self.c.src_mod.is_none());
if let SrcRef::Imm32(c) = &mut self.c.src_ref {
*c &= 0x1f1f;
}
}
}
impl DisplayOp for OpShfl {
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "shfl.{} {} {} {}", self.op, self.src, self.lane, self.c)

View file

@ -1594,6 +1594,7 @@ impl SM20Op for OpShfl {
if matches!(self.c.src_ref, SrcRef::CBuf(_)) {
b.copy_alu_src(&mut self.c, GPR, SrcType::ALU);
}
self.reduce_lane_c_imm();
}
fn encode(&self, e: &mut SM20Encoder<'_>) {
@ -1604,7 +1605,7 @@ impl SM20Op for OpShfl {
assert!(self.lane.src_mod.is_none());
if let Some(u) = self.lane.src_ref.as_u32() {
e.set_field(26..32, u & 0x1f);
e.set_field(26..32, u);
e.set_bit(5, true);
} else {
e.set_reg_src(26..32, &self.lane);
@ -1613,7 +1614,7 @@ impl SM20Op for OpShfl {
assert!(self.c.src_mod.is_none());
if let Some(u) = self.c.src_ref.as_u32() {
e.set_field(42..55, u & 0x1fff);
e.set_field(42..55, u);
e.set_bit(6, true);
} else {
e.set_reg_src(49..55, &self.c);

View file

@ -1843,6 +1843,7 @@ impl SM32Op for OpShfl {
b.copy_alu_src_if_not_reg(&mut self.src, GPR, SrcType::GPR);
b.copy_alu_src_if_not_reg_or_imm(&mut self.lane, GPR, SrcType::ALU);
b.copy_alu_src_if_not_reg_or_imm(&mut self.c, GPR, SrcType::ALU);
self.reduce_lane_c_imm();
}
fn encode(&self, e: &mut SM32Encoder<'_>) {
@ -1868,7 +1869,7 @@ impl SM32Op for OpShfl {
e.set_bit(31, false);
}
SrcRef::Imm32(imm32) => {
e.set_field(23..28, *imm32 & 0x1f);
e.set_field(23..28, *imm32);
e.set_bit(31, true);
}
src => panic!("Invalid shfl lane: {src}"),
@ -1880,7 +1881,7 @@ impl SM32Op for OpShfl {
}
SrcRef::Imm32(imm32) => {
e.set_bit(32, true);
e.set_field(37..50, *imm32 & 0x1f1f);
e.set_field(37..50, *imm32);
}
src => panic!("Invalid shfl c: {src}"),
}

View file

@ -2037,6 +2037,7 @@ impl SM50Op for OpShfl {
b.copy_alu_src_if_not_reg(&mut self.src, GPR, SrcType::GPR);
b.copy_alu_src_if_not_reg_or_imm(&mut self.lane, GPR, SrcType::ALU);
b.copy_alu_src_if_not_reg_or_imm(&mut self.c, GPR, SrcType::ALU);
self.reduce_lane_c_imm();
}
fn encode(&self, e: &mut SM50Encoder<'_>) {
@ -2053,7 +2054,7 @@ impl SM50Op for OpShfl {
}
SrcRef::Imm32(imm32) => {
e.set_bit(28, true);
e.set_field(20..25, *imm32 & 0x1f);
e.set_field(20..25, *imm32);
}
src => panic!("Invalid shfl lane: {src}"),
}
@ -2064,7 +2065,7 @@ impl SM50Op for OpShfl {
}
SrcRef::Imm32(imm32) => {
e.set_bit(29, true);
e.set_field(34..47, *imm32 & 0x1f1f);
e.set_field(34..47, *imm32);
}
src => panic!("Invalid shfl c: {src}"),
}

View file

@ -2123,6 +2123,7 @@ impl SM70Op for OpShfl {
b.copy_alu_src_if_not_reg(&mut self.src, gpr, SrcType::GPR);
b.copy_alu_src_if_not_reg_or_imm(&mut self.lane, gpr, SrcType::ALU);
b.copy_alu_src_if_not_reg_or_imm(&mut self.c, gpr, SrcType::ALU);
self.reduce_lane_c_imm();
}
fn encode(&self, e: &mut SM70Encoder<'_>) {
@ -2139,20 +2140,20 @@ impl SM70Op for OpShfl {
SrcRef::Imm32(imm_c) => {
e.set_opcode(0x589);
e.set_reg_src(32..40, &self.lane);
e.set_field(40..53, *imm_c & 0x1f1f);
e.set_field(40..53, *imm_c);
}
_ => panic!("Invalid instruction form"),
},
SrcRef::Imm32(imm_lane) => match &self.c.src_ref {
SrcRef::Zero | SrcRef::Reg(_) => {
e.set_opcode(0x989);
e.set_field(53..58, *imm_lane & 0x1f);
e.set_field(53..58, *imm_lane);
e.set_reg_src(64..72, &self.c);
}
SrcRef::Imm32(imm_c) => {
e.set_opcode(0xf89);
e.set_field(40..53, *imm_c & 0x1f1f);
e.set_field(53..58, *imm_lane & 0x1f);
e.set_field(40..53, *imm_c);
e.set_field(53..58, *imm_lane);
}
_ => panic!("Invalid instruction form"),
},