diff --git a/src/nouveau/compiler/nak.rs b/src/nouveau/compiler/nak.rs index 0978b8b53ef..4226ff95430 100644 --- a/src/nouveau/compiler/nak.rs +++ b/src/nouveau/compiler/nak.rs @@ -129,8 +129,10 @@ fn nir_options(_dev: &nv_device_info) -> nir_shader_compiler_options { op.lower_usub_sat = true; // TODO op.lower_iadd_sat = true; // TODO op.use_interpolated_input_intrinsics = true; - op.lower_int64_options = - !(nir_lower_iadd64 | nir_lower_ineg64 | nir_lower_shift64); + op.lower_int64_options = !(nir_lower_icmp64 + | nir_lower_iadd64 + | nir_lower_ineg64 + | nir_lower_shift64); op.lower_ldexp = true; op.lower_fmod = true; op.lower_ffract = true; diff --git a/src/nouveau/compiler/nak_builder.rs b/src/nouveau/compiler/nak_builder.rs index b492ce8a3d3..7280f96bf76 100644 --- a/src/nouveau/compiler/nak_builder.rs +++ b/src/nouveau/compiler/nak_builder.rs @@ -201,12 +201,61 @@ pub trait SSABuilder: Builder { set_op: PredSetOp::And, cmp_op: cmp_op, cmp_type: cmp_type, + ex: false, srcs: [x, y], - accum: SrcRef::True.into(), + accum: true.into(), + low_cmp: true.into(), }); dst } + fn isetp64( + &mut self, + cmp_type: IntCmpType, + cmp_op: IntCmpOp, + x: Src, + y: Src, + ) -> SSARef { + let x = x.as_ssa().unwrap(); + let y = y.as_ssa().unwrap(); + + // Low bits are always an unsigned comparison + let low = self.isetp(IntCmpType::U32, cmp_op, x[0].into(), y[0].into()); + + let dst = self.alloc_ssa(RegFile::Pred, 1); + match cmp_op { + IntCmpOp::Eq | IntCmpOp::Ne => { + self.push_op(OpISetP { + dst: dst.into(), + set_op: match cmp_op { + IntCmpOp::Eq => PredSetOp::And, + IntCmpOp::Ne => PredSetOp::Or, + _ => panic!("Not an integer equality"), + }, + cmp_op: cmp_op, + cmp_type: IntCmpType::U32, + ex: false, + srcs: [x[1].into(), y[1].into()], + accum: low.into(), + low_cmp: true.into(), + }); + } + IntCmpOp::Ge | IntCmpOp::Gt | IntCmpOp::Le | IntCmpOp::Lt => { + self.push_op(OpISetP { + dst: dst.into(), + set_op: PredSetOp::And, + cmp_op: cmp_op, + cmp_type: cmp_type, + ex: true, + srcs: [x[1].into(), y[1].into()], + accum: true.into(), + low_cmp: low.into(), + }); + } + } + dst + } + fn lop2(&mut self, op: LogicOp, x: Src, y: Src) -> SSARef { let dst = if x.is_predicate() { self.alloc_ssa(RegFile::Pred, 1) diff --git a/src/nouveau/compiler/nak_encode_sm70.rs b/src/nouveau/compiler/nak_encode_sm70.rs index e47a9bb46ae..8f7b3c48490 100644 --- a/src/nouveau/compiler/nak_encode_sm70.rs +++ b/src/nouveau/compiler/nak_encode_sm70.rs @@ -704,7 +704,8 @@ impl SM70Instr { ALUSrc::None, ); - self.set_pred_src(68..71, 71, SrcRef::True.into()); + self.set_pred_src(68..71, 71, op.low_cmp); + self.set_bit(72, op.ex); self.set_field( 73..74, diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs index 47bd1493a95..e62759d408a 100644 --- a/src/nouveau/compiler/nak_from_nir.rs +++ b/src/nouveau/compiler/nak_from_nir.rs @@ -820,7 +820,10 @@ impl<'a> ShaderFromNir<'a> { if alu.get_src(0).bit_size() == 1 { let lop = LogicOp::new_lut(&|x, y, _| !(x ^ y)); b.lop2(lop, srcs[0], srcs[1]) + } else if alu.get_src(0).bit_size() == 64 { + b.isetp64(IntCmpType::I32, IntCmpOp::Eq, srcs[0], srcs[1]) } else { + assert!(alu.get_src(0).bit_size() == 32); b.isetp(IntCmpType::I32, IntCmpOp::Eq, srcs[0], srcs[1]) } } @@ -834,18 +837,21 @@ impl<'a> ShaderFromNir<'a> { }); dst } - nir_op_ige => { - b.isetp(IntCmpType::I32, IntCmpOp::Ge, srcs[0], srcs[1]) - } - nir_op_ilt => { - b.isetp(IntCmpType::I32, IntCmpOp::Lt, srcs[0], srcs[1]) - } - nir_op_ine => { - if alu.get_src(0).bit_size() == 1 { - let lop = LogicOp::new_lut(&|x, y, _| (x ^ y)); - b.lop2(lop, srcs[0], srcs[1]) + nir_op_ige | nir_op_ilt | nir_op_uge | nir_op_ult => { + let x = *srcs[0].as_ssa().unwrap(); + let y = *srcs[1].as_ssa().unwrap(); + let (cmp_type, cmp_op) = match alu.op { + nir_op_ige => (IntCmpType::I32, IntCmpOp::Ge), + nir_op_ilt => (IntCmpType::I32, IntCmpOp::Lt), + nir_op_uge => (IntCmpType::U32, IntCmpOp::Ge), + nir_op_ult => (IntCmpType::U32, IntCmpOp::Lt), + _ => panic!("Not an integer comparison"), + }; + if alu.get_src(0).bit_size() == 64 { + b.isetp64(cmp_type, cmp_op, x.into(), y.into()) } else { - b.isetp(IntCmpType::I32, IntCmpOp::Ne, srcs[0], srcs[1]) + assert!(alu.get_src(0).bit_size() == 32); + b.isetp(cmp_type, cmp_op, x.into(), y.into()) } } nir_op_imax | nir_op_imin | nir_op_umax | nir_op_umin => { @@ -881,6 +887,17 @@ impl<'a> ShaderFromNir<'a> { }); dst[1].into() } + nir_op_ine => { + if alu.get_src(0).bit_size() == 1 { + let lop = LogicOp::new_lut(&|x, y, _| x ^ y); + b.lop2(lop, srcs[0], srcs[1]) + } else if alu.get_src(0).bit_size() == 64 { + b.isetp64(IntCmpType::I32, IntCmpOp::Ne, srcs[0], srcs[1]) + } else { + assert!(alu.get_src(0).bit_size() == 32); + b.isetp(IntCmpType::I32, IntCmpOp::Ne, srcs[0], srcs[1]) + } + } nir_op_ineg => { if alu.def.bit_size == 64 { let x = srcs[0].as_ssa().unwrap(); @@ -1062,12 +1079,6 @@ impl<'a> ShaderFromNir<'a> { }); dst } - nir_op_uge => { - b.isetp(IntCmpType::U32, IntCmpOp::Ge, srcs[0], srcs[1]) - } - nir_op_ult => { - b.isetp(IntCmpType::U32, IntCmpOp::Lt, srcs[0], srcs[1]) - } nir_op_unpack_32_2x16_split_x => { b.prmt(srcs[0], 0.into(), [0, 1, 4, 4]) } diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index 66b26b574e5..14e271bcb67 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -2514,12 +2514,16 @@ pub struct OpISetP { pub set_op: PredSetOp, pub cmp_op: IntCmpOp, pub cmp_type: IntCmpType, + pub ex: bool, #[src_type(ALU)] pub srcs: [Src; 2], #[src_type(Pred)] pub accum: Src, + + #[src_type(Pred)] + pub low_cmp: Src, } impl DisplayOp for OpISetP { @@ -2528,10 +2532,16 @@ impl DisplayOp for OpISetP { if !self.set_op.is_trivial(&self.accum) { write!(f, "{}", self.set_op)?; } + if self.ex { + write!(f, ".ex")?; + } write!(f, " {} {}", self.srcs[0], self.srcs[1])?; if !self.set_op.is_trivial(&self.accum) { write!(f, " {}", self.accum)?; } + if self.ex { + write!(f, " {}", self.low_cmp)?; + } Ok(()) } } diff --git a/src/nouveau/compiler/nak_spill_values.rs b/src/nouveau/compiler/nak_spill_values.rs index c30a4579f8b..ea8240da355 100644 --- a/src/nouveau/compiler/nak_spill_values.rs +++ b/src/nouveau/compiler/nak_spill_values.rs @@ -130,8 +130,10 @@ impl Spill for SpillPred { set_op: PredSetOp::And, cmp_op: IntCmpOp::Ne, cmp_type: IntCmpType::U32, + ex: false, srcs: [src.into(), Src::new_zero()], - accum: SrcRef::True.into(), + accum: true.into(), + low_cmp: true.into(), }) } }