From fe2b06395e3f086ce6ab88f7954a73d7787efe50 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 5 Jun 2024 15:31:07 -0500 Subject: [PATCH] nak: Get rid of OpINeg Instead, do the same thing we do for float modifiers and use OpIAdd2 or OpIAdd3. This makes for a little more work in copy-prop but the extra opcode and lowering pass just isn't worth it. Part-of: --- src/nouveau/compiler/nak/api.rs | 1 - src/nouveau/compiler/nak/builder.rs | 18 +++++++--- src/nouveau/compiler/nak/ir.rs | 44 ----------------------- src/nouveau/compiler/nak/legalize.rs | 3 +- src/nouveau/compiler/nak/opt_copy_prop.rs | 29 +++++++++++++-- 5 files changed, 41 insertions(+), 54 deletions(-) diff --git a/src/nouveau/compiler/nak/api.rs b/src/nouveau/compiler/nak/api.rs index 86afec3214a..7a75590e188 100644 --- a/src/nouveau/compiler/nak/api.rs +++ b/src/nouveau/compiler/nak/api.rs @@ -298,7 +298,6 @@ pub extern "C" fn nak_compile_shader( eprintln!("NAK IR after assign_regs:\n{}", &s); } - s.lower_ineg(); s.lower_par_copies(); s.lower_copy_swap(); s.opt_jump_thread(); diff --git a/src/nouveau/compiler/nak/builder.rs b/src/nouveau/compiler/nak/builder.rs index 1b85a385b4f..d1cc3bf9ed5 100644 --- a/src/nouveau/compiler/nak/builder.rs +++ b/src/nouveau/compiler/nak/builder.rs @@ -407,10 +407,20 @@ pub trait SSABuilder: Builder { fn ineg(&mut self, i: Src) -> SSARef { let dst = self.alloc_ssa(RegFile::GPR, 1); - self.push_op(OpINeg { - dst: dst.into(), - src: i, - }); + if self.sm() >= 70 { + self.push_op(OpIAdd3 { + dst: dst.into(), + overflow: [Dst::None; 2], + srcs: [0.into(), i.ineg(), 0.into()], + }); + } else { + self.push_op(OpIAdd2 { + dst: dst.into(), + srcs: [0.into(), i.ineg()], + carry_in: 0.into(), + carry_out: Dst::None, + }); + } dst } diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 4afb81ce8fa..8be03cb3387 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -2997,22 +2997,6 @@ impl DisplayOp for OpIAbs { } impl_display_for_op!(OpIAbs); -#[repr(C)] -#[derive(SrcsAsSlice, DstsAsSlice)] -pub struct OpINeg { - pub dst: Dst, - - #[src_type(ALU)] - pub src: Src, -} - -impl DisplayOp for OpINeg { - fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "ineg {}", self.src) - } -} -impl_display_for_op!(OpINeg); - /// Only used on SM50 #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] @@ -5190,7 +5174,6 @@ pub enum Op { Bfe(OpBfe), Flo(OpFlo), IAbs(OpIAbs), - INeg(OpINeg), IAdd2(OpIAdd2), IAdd3(OpIAdd3), IAdd3X(OpIAdd3X), @@ -5638,7 +5621,6 @@ impl Instr { Op::BRev(_) | Op::Flo(_) | Op::PopC(_) => false, Op::BMsk(_) | Op::IAbs(_) - | Op::INeg(_) | Op::IAdd2(_) | Op::IAdd3(_) | Op::IAdd3X(_) @@ -6260,32 +6242,6 @@ impl Shader { }) } - pub fn lower_ineg(&mut self) { - let sm = self.info.sm; - self.map_instrs(|mut instr: Box, _| -> MappedInstrs { - match instr.op { - Op::INeg(neg) => { - if sm >= 70 { - instr.op = Op::IAdd3(OpIAdd3 { - dst: neg.dst, - overflow: [Dst::None; 2], - srcs: [0.into(), neg.src.ineg(), 0.into()], - }); - } else { - instr.op = Op::IAdd2(OpIAdd2 { - dst: neg.dst, - srcs: [0.into(), neg.src.ineg()], - carry_in: 0.into(), - carry_out: Dst::None, - }); - } - MappedInstrs::One(instr) - } - _ => MappedInstrs::One(instr), - } - }) - } - pub fn gather_global_mem_usage(&mut self) { if let ShaderStageInfo::Compute(_) = self.info.stage { return; diff --git a/src/nouveau/compiler/nak/legalize.rs b/src/nouveau/compiler/nak/legalize.rs index a9e0e37b702..d4c591ed38e 100644 --- a/src/nouveau/compiler/nak/legalize.rs +++ b/src/nouveau/compiler/nak/legalize.rs @@ -424,7 +424,6 @@ fn legalize_sm50_instr( copy_alu_src_if_not_reg(b, &mut op.offset, SrcType::GPR); } Op::Copy(_) => (), // Nothing to do - Op::INeg(_) => (), // we unconditionally lower this Op::SuLd(op) => { copy_alu_src_if_not_reg(b, &mut op.handle, SrcType::GPR); copy_alu_src_if_not_reg(b, &mut op.coord, SrcType::GPR); @@ -592,7 +591,7 @@ fn legalize_sm70_instr( copy_alu_src_if_not_reg(b, &mut op.pos, SrcType::ALU); } Op::BRev(_) | Op::Flo(_) => (), - Op::IAbs(_) | Op::INeg(_) => (), + Op::IAbs(_) => (), Op::IAdd3(op) => { let [ref mut src0, ref mut src1, ref mut src2] = op.srcs; swap_srcs_if_not_reg(src0, src1); diff --git a/src/nouveau/compiler/nak/opt_copy_prop.rs b/src/nouveau/compiler/nak/opt_copy_prop.rs index 1c6f23cc63f..cb5f033c359 100644 --- a/src/nouveau/compiler/nak/opt_copy_prop.rs +++ b/src/nouveau/compiler/nak/opt_copy_prop.rs @@ -481,10 +481,33 @@ impl CopyPropPass { } } } - Op::INeg(neg) => { - let dst = neg.dst.as_ssa().unwrap(); + Op::IAdd2(add) => { + let dst = add.dst.as_ssa().unwrap(); assert!(dst.comps() == 1); - self.add_copy(dst[0], SrcType::I32, neg.src.ineg()); + let dst = dst[0]; + + if add.carry_in.is_zero() { + if add.srcs[0].is_zero() { + self.add_copy(dst, SrcType::I32, add.srcs[1]); + } else if add.srcs[1].is_zero() { + self.add_copy(dst, SrcType::I32, add.srcs[0]); + } + } + } + Op::IAdd3(add) => { + let dst = add.dst.as_ssa().unwrap(); + assert!(dst.comps() == 1); + let dst = dst[0]; + + if add.srcs[0].is_zero() { + if add.srcs[1].is_zero() { + self.add_copy(dst, SrcType::I32, add.srcs[2]); + } else if add.srcs[2].is_zero() { + self.add_copy(dst, SrcType::I32, add.srcs[1]); + } + } else if add.srcs[1].is_zero() && add.srcs[2].is_zero() { + self.add_copy(dst, SrcType::I32, add.srcs[0]); + } } Op::Prmt(prmt) => { let dst = prmt.dst.as_ssa().unwrap();