From 212f99d39d6fad168673f70b21fd91222d0d04bf Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 12 May 2025 18:31:52 -0400 Subject: [PATCH] nak: Add a helper for reducing OpShfl lane and c immediates Every back-end has code to mask these because the hardware only has limited encoding space. However, this can be done as a common legalization operation and doing so means that our post-legalize IR matches what actually gets encoded. Part-of: --- src/nouveau/compiler/nak/ir.rs | 18 ++++++++++++++++++ src/nouveau/compiler/nak/sm20.rs | 5 +++-- src/nouveau/compiler/nak/sm32.rs | 5 +++-- src/nouveau/compiler/nak/sm50.rs | 5 +++-- src/nouveau/compiler/nak/sm70_encode.rs | 9 +++++---- 5 files changed, 32 insertions(+), 10 deletions(-) diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index d6093c6b0af..58ef4bdb741 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -4712,6 +4712,24 @@ pub struct OpShfl { pub op: ShflOp, } +impl OpShfl { + /// Reduces the lane and c immediates, if any. The hardware only uses + /// some of the bits of `lane` and `c` and ignores the rest. This method + /// masks off the unused bits and ensures that any immediate values fit + /// in the limited encoding space in the instruction. + pub fn reduce_lane_c_imm(&mut self) { + debug_assert!(self.lane.src_mod.is_none()); + if let SrcRef::Imm32(lane) = &mut self.lane.src_ref { + *lane &= 0x1f; + } + + debug_assert!(self.c.src_mod.is_none()); + if let SrcRef::Imm32(c) = &mut self.c.src_ref { + *c &= 0x1f1f; + } + } +} + impl DisplayOp for OpShfl { fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "shfl.{} {} {} {}", self.op, self.src, self.lane, self.c) diff --git a/src/nouveau/compiler/nak/sm20.rs b/src/nouveau/compiler/nak/sm20.rs index f807c18e34d..723b3aa8094 100644 --- a/src/nouveau/compiler/nak/sm20.rs +++ b/src/nouveau/compiler/nak/sm20.rs @@ -1594,6 +1594,7 @@ impl SM20Op for OpShfl { if matches!(self.c.src_ref, SrcRef::CBuf(_)) { b.copy_alu_src(&mut self.c, GPR, SrcType::ALU); } + self.reduce_lane_c_imm(); } fn encode(&self, e: &mut SM20Encoder<'_>) { @@ -1604,7 +1605,7 @@ impl SM20Op for OpShfl { assert!(self.lane.src_mod.is_none()); if let Some(u) = self.lane.src_ref.as_u32() { - e.set_field(26..32, u & 0x1f); + e.set_field(26..32, u); e.set_bit(5, true); } else { e.set_reg_src(26..32, &self.lane); @@ -1613,7 +1614,7 @@ impl SM20Op for OpShfl { assert!(self.c.src_mod.is_none()); if let Some(u) = self.c.src_ref.as_u32() { - e.set_field(42..55, u & 0x1fff); + e.set_field(42..55, u); e.set_bit(6, true); } else { e.set_reg_src(49..55, &self.c); diff --git a/src/nouveau/compiler/nak/sm32.rs b/src/nouveau/compiler/nak/sm32.rs index c8ee9cd4900..81a408f57d6 100644 --- a/src/nouveau/compiler/nak/sm32.rs +++ b/src/nouveau/compiler/nak/sm32.rs @@ -1843,6 +1843,7 @@ impl SM32Op for OpShfl { b.copy_alu_src_if_not_reg(&mut self.src, GPR, SrcType::GPR); b.copy_alu_src_if_not_reg_or_imm(&mut self.lane, GPR, SrcType::ALU); b.copy_alu_src_if_not_reg_or_imm(&mut self.c, GPR, SrcType::ALU); + self.reduce_lane_c_imm(); } fn encode(&self, e: &mut SM32Encoder<'_>) { @@ -1868,7 +1869,7 @@ impl SM32Op for OpShfl { e.set_bit(31, false); } SrcRef::Imm32(imm32) => { - e.set_field(23..28, *imm32 & 0x1f); + e.set_field(23..28, *imm32); e.set_bit(31, true); } src => panic!("Invalid shfl lane: {src}"), @@ -1880,7 +1881,7 @@ impl SM32Op for OpShfl { } SrcRef::Imm32(imm32) => { e.set_bit(32, true); - e.set_field(37..50, *imm32 & 0x1f1f); + e.set_field(37..50, *imm32); } src => panic!("Invalid shfl c: {src}"), } diff --git a/src/nouveau/compiler/nak/sm50.rs b/src/nouveau/compiler/nak/sm50.rs index 61e43bbd4ec..74533e3a347 100644 --- a/src/nouveau/compiler/nak/sm50.rs +++ b/src/nouveau/compiler/nak/sm50.rs @@ -2037,6 +2037,7 @@ impl SM50Op for OpShfl { b.copy_alu_src_if_not_reg(&mut self.src, GPR, SrcType::GPR); b.copy_alu_src_if_not_reg_or_imm(&mut self.lane, GPR, SrcType::ALU); b.copy_alu_src_if_not_reg_or_imm(&mut self.c, GPR, SrcType::ALU); + self.reduce_lane_c_imm(); } fn encode(&self, e: &mut SM50Encoder<'_>) { @@ -2053,7 +2054,7 @@ impl SM50Op for OpShfl { } SrcRef::Imm32(imm32) => { e.set_bit(28, true); - e.set_field(20..25, *imm32 & 0x1f); + e.set_field(20..25, *imm32); } src => panic!("Invalid shfl lane: {src}"), } @@ -2064,7 +2065,7 @@ impl SM50Op for OpShfl { } SrcRef::Imm32(imm32) => { e.set_bit(29, true); - e.set_field(34..47, *imm32 & 0x1f1f); + e.set_field(34..47, *imm32); } src => panic!("Invalid shfl c: {src}"), } diff --git a/src/nouveau/compiler/nak/sm70_encode.rs b/src/nouveau/compiler/nak/sm70_encode.rs index ac17f762c38..333e2d27228 100644 --- a/src/nouveau/compiler/nak/sm70_encode.rs +++ b/src/nouveau/compiler/nak/sm70_encode.rs @@ -2123,6 +2123,7 @@ impl SM70Op for OpShfl { b.copy_alu_src_if_not_reg(&mut self.src, gpr, SrcType::GPR); b.copy_alu_src_if_not_reg_or_imm(&mut self.lane, gpr, SrcType::ALU); b.copy_alu_src_if_not_reg_or_imm(&mut self.c, gpr, SrcType::ALU); + self.reduce_lane_c_imm(); } fn encode(&self, e: &mut SM70Encoder<'_>) { @@ -2139,20 +2140,20 @@ impl SM70Op for OpShfl { SrcRef::Imm32(imm_c) => { e.set_opcode(0x589); e.set_reg_src(32..40, &self.lane); - e.set_field(40..53, *imm_c & 0x1f1f); + e.set_field(40..53, *imm_c); } _ => panic!("Invalid instruction form"), }, SrcRef::Imm32(imm_lane) => match &self.c.src_ref { SrcRef::Zero | SrcRef::Reg(_) => { e.set_opcode(0x989); - e.set_field(53..58, *imm_lane & 0x1f); + e.set_field(53..58, *imm_lane); e.set_reg_src(64..72, &self.c); } SrcRef::Imm32(imm_c) => { e.set_opcode(0xf89); - e.set_field(40..53, *imm_c & 0x1f1f); - e.set_field(53..58, *imm_lane & 0x1f); + e.set_field(40..53, *imm_c); + e.set_field(53..58, *imm_lane); } _ => panic!("Invalid instruction form"), },