diff --git a/src/nouveau/compiler/nak_encode_sm75.rs b/src/nouveau/compiler/nak_encode_sm75.rs index 9943dc64bad..793a9ae5ce5 100644 --- a/src/nouveau/compiler/nak_encode_sm75.rs +++ b/src/nouveau/compiler/nak_encode_sm75.rs @@ -90,7 +90,7 @@ impl SM75Instr { fn set_pred_reg(&mut self, range: Range, reg: RegRef) { assert!(range.len() == 3); assert!(reg.file() == RegFile::Pred); - assert!(reg.base_idx() <= 6); + assert!(reg.base_idx() <= 7); self.set_field(range, reg.base_idx()); } @@ -306,6 +306,26 @@ impl SM75Instr { self.set_bit(90, true); } + fn encode_plop3(&mut self, instr: &Instr, op: &LogicOp) { + assert!(instr.num_dsts() == 1); + assert!(instr.num_srcs() == 3); + + self.set_opcode(0x81c); + self.set_field(64..67, op.lut & 0x7); + self.set_field(72..77, op.lut >> 3); + + self.set_pred_reg(68..71, *instr.src(2).as_reg().unwrap()); + self.set_bit(71, false); /* NOT(src2) */ + + self.set_pred_reg(77..80, *instr.src(1).as_reg().unwrap()); + self.set_bit(80, false); /* NOT(src1) */ + self.set_pred_reg(81..84, *instr.dst(0).as_reg().unwrap()); + self.set_field(84..87, 7_u8); /* Def1 */ + + self.set_pred_reg(87..90, *instr.src(0).as_reg().unwrap()); + self.set_bit(90, false); /* NOT(src0) */ + } + fn set_cmp_op(&mut self, range: Range, op: &CmpOp) { assert!(range.len() == 3); self.set_field( @@ -488,6 +508,7 @@ impl SM75Instr { Opcode::SEL => si.encode_sel(instr), Opcode::IADD3 => si.encode_iadd3(instr), Opcode::LOP3(op) => si.encode_lop3(instr, &op), + Opcode::PLOP3(op) => si.encode_plop3(instr, &op), Opcode::ISETP(op) => si.encode_isetp(instr, &op), Opcode::SHL => si.encode_shl(instr), Opcode::ALD(a) => si.encode_ald(instr, &a), diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs index 056e19d1d7b..2b4411369bd 100644 --- a/src/nouveau/compiler/nak_from_nir.rs +++ b/src/nouveau/compiler/nak_from_nir.rs @@ -97,13 +97,23 @@ impl<'a> ShaderFromNir<'a> { self.instrs.push(Instr::new_iadd(dst, srcs[0], srcs[1])); } nir_op_iand => { - self.instrs.push(Instr::new_lop3( - dst, - LogicOp::new_lut(&|x, y, _| x & y), - srcs[0], - srcs[1], - Src::Zero, - )); + if alu.def.bit_size() == 1 { + self.instrs.push(Instr::new_plop3( + dst, + LogicOp::new_lut(&|x, y, _| x & y), + srcs[0], + srcs[1], + Src::Zero, + )); + } else { + self.instrs.push(Instr::new_lop3( + dst, + LogicOp::new_lut(&|x, y, _| x & y), + srcs[0], + srcs[1], + Src::Zero, + )); + } } nir_op_ieq => { self.instrs.push(Instr::new_isetp( @@ -142,22 +152,42 @@ impl<'a> ShaderFromNir<'a> { )); } nir_op_inot => { - self.instrs.push(Instr::new_lop3( - dst, - LogicOp::new_lut(&|x, _, _| !x), - srcs[0], - Src::Zero, - Src::Zero, - )); + if alu.def.bit_size() == 1 { + self.instrs.push(Instr::new_plop3( + dst, + LogicOp::new_lut(&|x, _, _| !x), + srcs[0], + Src::Zero, + Src::Zero, + )); + } else { + self.instrs.push(Instr::new_lop3( + dst, + LogicOp::new_lut(&|x, _, _| !x), + srcs[0], + Src::Zero, + Src::Zero, + )); + } } nir_op_ior => { - self.instrs.push(Instr::new_lop3( - dst, - LogicOp::new_lut(&|x, y, _| x | y), - srcs[0], - srcs[1], - Src::Zero, - )); + if alu.def.bit_size() == 1 { + self.instrs.push(Instr::new_plop3( + dst, + LogicOp::new_lut(&|x, y, _| x | y), + srcs[0], + srcs[1], + Src::Zero, + )); + } else { + self.instrs.push(Instr::new_lop3( + dst, + LogicOp::new_lut(&|x, y, _| x | y), + srcs[0], + srcs[1], + Src::Zero, + )); + } } nir_op_ishl => { self.instrs.push(Instr::new_shl(dst, srcs[0], srcs[1])); diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index 47daaab476d..8dc28d6a124 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -823,6 +823,10 @@ impl Instr { Instr::new(Opcode::LOP3(op), slice::from_ref(&dst), &[x, y, z]) } + pub fn new_plop3(dst: Dst, op: LogicOp, x: Src, y: Src, z: Src) -> Instr { + Instr::new(Opcode::PLOP3(op), slice::from_ref(&dst), &[x, y, z]) + } + pub fn new_shl(dst: Dst, x: Src, shift: Src) -> Instr { Instr::new(Opcode::SHL, slice::from_ref(&dst), &[x, shift]) } @@ -950,6 +954,7 @@ impl Instr { | Opcode::FMUL | Opcode::IADD3 | Opcode::LOP3(_) + | Opcode::PLOP3(_) | Opcode::ISETP(_) | Opcode::SHL => Some(6), Opcode::MOV => Some(15), @@ -999,6 +1004,7 @@ pub enum Opcode { IADD3, LOP3(LogicOp), + PLOP3(LogicOp), ISETP(IntCmpOp), SHL, @@ -1029,6 +1035,7 @@ impl fmt::Display for Opcode { Opcode::FMUL => write!(f, "FMUL"), Opcode::IADD3 => write!(f, "IADD3"), Opcode::LOP3(op) => write!(f, "LOP3.{}", op), + Opcode::PLOP3(op) => write!(f, "PLOP3.{}", op), Opcode::ISETP(op) => write!(f, "ISETP.{}", op), Opcode::SHL => write!(f, "SHL"), Opcode::S2R(i) => write!(f, "S2R({})", i), @@ -1234,14 +1241,18 @@ impl Shader { for f in &mut self.functions { for b in &mut f.blocks { for instr in &mut b.instrs { + let zero_file = match instr.op { + Opcode::PLOP3(_) => RegFile::Pred, + _ => RegFile::GPR, + }; for dst in instr.dsts_mut() { if dst.is_zero() { - *dst = Dst::Reg(RegRef::zero(RegFile::GPR, 1)) + *dst = Dst::Reg(RegRef::zero(zero_file, 1)) } } for src in instr.srcs_mut() { if src.is_zero() { - *src = Src::Reg(RegRef::zero(RegFile::GPR, 1)) + *src = Src::Reg(RegRef::zero(zero_file, 1)) } } }