From 0222107699641d0cbad1d1577187a316355cda3a Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 5 Sep 2023 01:53:52 -0500 Subject: [PATCH] nak: Move all the IADD3 insanity to a new OpIAdd3X opcode Because of its crazy behavior around overflow, we don't want the full IADD3 opcode to support any sort of source modifier propagation. This makes us a new OpIAdd3X opcode which contains all the crazy and lets IAdd3 remain the usual 32-bit integer thing everyone knows and loves. Part-of: --- src/nouveau/compiler/nak_builder.rs | 2 - src/nouveau/compiler/nak_encode_sm75.rs | 66 ++++++++++++----------- src/nouveau/compiler/nak_from_nir.rs | 18 +++---- src/nouveau/compiler/nak_ir.rs | 72 ++++++++++++++++++++----- src/nouveau/compiler/nak_legalize.rs | 7 +++ 5 files changed, 108 insertions(+), 57 deletions(-) diff --git a/src/nouveau/compiler/nak_builder.rs b/src/nouveau/compiler/nak_builder.rs index 3aefc3ae7c4..55397df3785 100644 --- a/src/nouveau/compiler/nak_builder.rs +++ b/src/nouveau/compiler/nak_builder.rs @@ -122,9 +122,7 @@ pub trait SSABuilder: Builder { let dst = self.alloc_ssa(RegFile::GPR, 1); self.push_op(OpIAdd3 { dst: dst.into(), - overflow: Dst::None, srcs: [Src::new_zero(), x, y], - carry: Src::new_imm_bool(false), }); dst } diff --git a/src/nouveau/compiler/nak_encode_sm75.rs b/src/nouveau/compiler/nak_encode_sm75.rs index 9111028ff0d..7db2ea98d50 100644 --- a/src/nouveau/compiler/nak_encode_sm75.rs +++ b/src/nouveau/compiler/nak_encode_sm75.rs @@ -551,40 +551,41 @@ impl SM75Instr { } fn encode_iadd3(&mut self, op: &OpIAdd3) { - /* TODO: This should happen as part of a legalization pass */ - assert!(op.srcs[0].is_reg_or_zero()); - if op.srcs[2].is_reg_or_zero() { - self.encode_alu( - 0x010, - Some(op.dst), - ALUSrc::from_src(&op.srcs[0]), - ALUSrc::from_src(&op.srcs[1]), - ALUSrc::from_src(&op.srcs[2]), - ); - } else { - self.encode_alu( - 0x010, - Some(op.dst), - ALUSrc::from_src(&op.srcs[0]), - ALUSrc::from_src(&op.srcs[2]), - ALUSrc::from_src(&op.srcs[1]), - ); - } + // Hardware requires at least one of these be unmodified + assert!(op.srcs[0].src_mod.is_none() || op.srcs[1].src_mod.is_none()); - self.set_pred_dst(81..84, op.overflow); - - /* Carry for IADD3 is special because the default (register 7) is false - * instead of the usual true and it doesn't have a not modifier. - */ - assert!(op.carry.src_mod.is_none()); - self.set_pred_reg( - 84..87, - match op.carry.src_ref { - SrcRef::False => RegRef::new(RegFile::Pred, 7, 1), - SrcRef::Reg(reg) => reg, - _ => panic!("Invalid carry source"), - }, + self.encode_alu( + 0x010, + Some(op.dst), + ALUSrc::from_src(&op.srcs[0]), + ALUSrc::from_src(&op.srcs[1]), + ALUSrc::from_src(&op.srcs[2]), ); + + self.set_pred_dst(81..84, Dst::None); + self.set_pred_dst(84..87, Dst::None); + } + + fn encode_iadd3x(&mut self, op: &OpIAdd3X) { + // Hardware requires at least one of these be unmodified + assert!(op.srcs[0].src_mod.is_none() || op.srcs[1].src_mod.is_none()); + + self.encode_alu( + 0x010, + Some(op.dst), + ALUSrc::from_src(&op.srcs[0]), + ALUSrc::from_src(&op.srcs[1]), + ALUSrc::from_src(&op.srcs[2]), + ); + + // .X + self.set_bit(74, op.high); + + self.set_pred_dst(81..84, op.overflow[0]); + self.set_pred_dst(84..87, op.overflow[1]); + + self.set_pred_src(87..90, 90, op.carry[0]); + self.set_pred_src(77..80, 80, op.carry[1]); } fn encode_imad(&mut self, op: &OpIMad) { @@ -1563,6 +1564,7 @@ impl SM75Instr { Op::MuFu(op) => si.encode_mufu(&op), Op::IAbs(op) => si.encode_iabs(&op), Op::IAdd3(op) => si.encode_iadd3(&op), + Op::IAdd3X(op) => si.encode_iadd3x(&op), Op::IMad(op) => si.encode_imad(&op), Op::IMad64(op) => si.encode_imad64(&op), Op::IMnMx(op) => si.encode_imnmx(&op), diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs index ca75f02d9d1..a6c8f0f11cb 100644 --- a/src/nouveau/compiler/nak_from_nir.rs +++ b/src/nouveau/compiler/nak_from_nir.rs @@ -445,17 +445,19 @@ impl<'a> ShaderFromNir<'a> { let y = srcs[1].as_ssa().unwrap(); let sum = b.alloc_ssa(RegFile::GPR, 2); let carry = b.alloc_ssa(RegFile::Pred, 1); - b.push_op(OpIAdd3 { + b.push_op(OpIAdd3X { dst: sum[0].into(), - overflow: carry.into(), + overflow: [carry.into(), Dst::None], + high: false, srcs: [x[0].into(), y[0].into(), Src::new_zero()], - carry: Src::new_imm_bool(false), + carry: [SrcRef::False.into(), SrcRef::False.into()], }); - b.push_op(OpIAdd3 { + b.push_op(OpIAdd3X { dst: sum[1].into(), - overflow: Dst::None, + overflow: [Dst::None, Dst::None], + high: true, srcs: [x[1].into(), y[1].into(), Src::new_zero()], - carry: carry.into(), + carry: [carry.into(), SrcRef::False.into()], }); sum } else { @@ -634,9 +636,7 @@ impl<'a> ShaderFromNir<'a> { let gt_neg = b.ineg(gt.into()); b.push_op(OpIAdd3 { dst: dst.into(), - overflow: Dst::None, srcs: [lt.into(), gt_neg.into(), Src::new_zero()], - carry: Src::new_imm_bool(false), }); } IntType::I64 => { @@ -644,9 +644,7 @@ impl<'a> ShaderFromNir<'a> { let gt_neg = b.ineg(gt.into()); b.push_op(OpIAdd3 { dst: high.into(), - overflow: Dst::None, srcs: [lt.into(), gt_neg.into(), Src::new_zero()], - carry: Src::new_imm_bool(false), }); b.push_op(OpShf { dst: dst.into(), diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index 3d7d145c953..c003fb472ed 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -591,6 +591,13 @@ pub enum Dst { } impl Dst { + pub fn is_none(&self) -> bool { + match self { + Dst::None => true, + _ => false, + } + } + pub fn as_reg(&self) -> Option<&RegRef> { match self { Dst::Reg(r) => Some(r), @@ -1004,6 +1011,14 @@ impl Src { } } + pub fn is_false(&self) -> bool { + match self.src_ref { + SrcRef::True => self.src_mod.is_bnot(), + SrcRef::False => !self.src_mod.is_bnot(), + _ => false, + } + } + pub fn is_reg_or_zero(&self) -> bool { match self.src_ref { SrcRef::Zero | SrcRef::SSA(_) | SrcRef::Reg(_) => true, @@ -2080,30 +2095,61 @@ impl fmt::Display for OpINeg { #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpIAdd3 { pub dst: Dst, - pub overflow: Dst, #[src_type(I32)] pub srcs: [Src; 3], - - #[src_type(Pred)] - pub carry: Src, } impl fmt::Display for OpIAdd3 { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, - "IADD3 {{ {} {} }} {{ {}, {}, {}, {} }}", - self.dst, - self.overflow, - self.srcs[0], - self.srcs[1], - self.srcs[2], - self.carry, + "IADD3 {} {{ {}, {}, {} }}", + self.dst, self.srcs[0], self.srcs[1], self.srcs[2], ) } } +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpIAdd3X { + pub dst: Dst, + pub overflow: [Dst; 2], + + pub high: bool, + + #[src_type(ALU)] + pub srcs: [Src; 3], + + #[src_type(Pred)] + pub carry: [Src; 2], +} + +impl fmt::Display for OpIAdd3X { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "IADD3")?; + if self.high { + write!(f, ".HI ")?; + } else { + write!(f, ".LO ")?; + } + if self.overflow[0].is_none() && self.overflow[1].is_none() { + write!(f, "{} ", self.dst)?; + } else { + write!( + f, + "{{ {}, {}, {} }} ", + self.dst, self.overflow[0], self.overflow[1], + )?; + } + write!(f, "{{ {}, {}, {}", self.srcs[0], self.srcs[1], self.srcs[2])?; + if self.high { + write!(f, ", {}, {}", self.carry[0], self.carry[1])?; + } + write!(f, " }}") + } +} + #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpIMad { @@ -3574,6 +3620,7 @@ pub enum Op { IAbs(OpIAbs), INeg(OpINeg), IAdd3(OpIAdd3), + IAdd3X(OpIAdd3X), IMad(OpIMad), IMad64(OpIMad64), IMnMx(OpIMnMx), @@ -3950,6 +3997,7 @@ impl Instr { | Op::IAbs(_) | Op::INeg(_) | Op::IAdd3(_) + | Op::IAdd3X(_) | Op::IMad(_) | Op::IMad64(_) | Op::IMnMx(_) @@ -4238,9 +4286,7 @@ impl Shader { match instr.op { Op::INeg(neg) => MappedInstrs::One(Instr::new_boxed(OpIAdd3 { dst: neg.dst, - overflow: Dst::None, srcs: [Src::new_zero(), neg.src.ineg(), Src::new_zero()], - carry: Src::new_imm_bool(false), })), Op::FSOut(out) => { let mut pcopy = OpParCopy::new(); diff --git a/src/nouveau/compiler/nak_legalize.rs b/src/nouveau/compiler/nak_legalize.rs index 23a7bd35909..e539d9ee170 100644 --- a/src/nouveau/compiler/nak_legalize.rs +++ b/src/nouveau/compiler/nak_legalize.rs @@ -111,6 +111,13 @@ fn legalize_instr(b: &mut impl SSABuilder, instr: &mut Instr) { copy_src_if_not_reg(b, src0, RegFile::GPR); copy_src_if_not_reg(b, src2, RegFile::GPR); } + Op::IAdd3X(op) => { + let [ref mut src0, ref mut src1, ref mut src2] = op.srcs; + swap_srcs_if_not_reg(src0, src1); + swap_srcs_if_not_reg(src2, src1); + copy_src_if_not_reg(b, src0, RegFile::GPR); + copy_src_if_not_reg(b, src2, RegFile::GPR); + } Op::IMad(op) => { let [ref mut src0, ref mut src1, ref mut src2] = op.srcs; swap_srcs_if_not_reg(src0, src1);