diff --git a/src/nouveau/compiler/nak/builder.rs b/src/nouveau/compiler/nak/builder.rs index 8cd109412c5..4df90b4cb44 100644 --- a/src/nouveau/compiler/nak/builder.rs +++ b/src/nouveau/compiler/nak/builder.rs @@ -584,16 +584,64 @@ pub trait SSABuilder: Builder { }); } IntCmpOp::Ge | IntCmpOp::Gt | IntCmpOp::Le | IntCmpOp::Lt => { - self.push_op(OpISetP { - dst: dst.into(), - set_op: PredSetOp::And, - cmp_op: cmp_op, - cmp_type: cmp_type, - ex: true, - srcs: [x[1].into(), y[1].into()], - accum: true.into(), - low_cmp: low.into(), - }); + if self.sm() >= 70 { + self.push_op(OpISetP { + dst: dst.into(), + set_op: PredSetOp::And, + cmp_op, + cmp_type, + ex: true, + srcs: [x[1].into(), y[1].into()], + accum: true.into(), + low_cmp: low.into(), + }); + } else { + // On Maxwell, iset.ex only has one source for both accum + // and low_cmp and it does a weird truth table dance. (See + // Foldable for OpISetP for details.) + let low_or_accum = self.alloc_ssa(RegFile::Pred, 1); + let set_op = match cmp_op { + IntCmpOp::Ge | IntCmpOp::Gt => { + // When x != y, we want low_or_accum == false + self.push_op(OpISetP { + dst: low_or_accum.into(), + set_op: PredSetOp::And, + cmp_op: IntCmpOp::Eq, + cmp_type: IntCmpType::U32, + ex: false, + srcs: [x[1].into(), y[1].into()], + accum: low.into(), + low_cmp: true.into(), + }); + PredSetOp::Or + } + IntCmpOp::Le | IntCmpOp::Lt => { + // When x != y, we want low_or_accum == true + self.push_op(OpISetP { + dst: low_or_accum.into(), + set_op: PredSetOp::Or, + cmp_op: IntCmpOp::Ne, + cmp_type: IntCmpType::U32, + ex: false, + srcs: [x[1].into(), y[1].into()], + accum: low.into(), + low_cmp: true.into(), + }); + PredSetOp::And + } + _ => panic!("Not an integer inequality"), + }; + self.push_op(OpISetP { + dst: dst.into(), + set_op, + cmp_op, + cmp_type, + ex: true, + srcs: [x[1].into(), y[1].into()], + accum: low_or_accum.into(), + low_cmp: low_or_accum.into(), + }); + } } } dst