diff --git a/src/nouveau/compiler/nak/builder.rs b/src/nouveau/compiler/nak/builder.rs index 8dde6135082..095b7e47bd7 100644 --- a/src/nouveau/compiler/nak/builder.rs +++ b/src/nouveau/compiler/nak/builder.rs @@ -200,6 +200,7 @@ pub trait SSABuilder: Builder { dst: dst.into(), cmp_op: cmp_op, srcs: [x, y], + ftz: false, }); dst } @@ -212,6 +213,7 @@ pub trait SSABuilder: Builder { cmp_op: cmp_op, srcs: [x, y], accum: SrcRef::True.into(), + ftz: false, }); dst } diff --git a/src/nouveau/compiler/nak/encode_sm70.rs b/src/nouveau/compiler/nak/encode_sm70.rs index fe9fd08f5fd..6b335a3c0fa 100644 --- a/src/nouveau/compiler/nak/encode_sm70.rs +++ b/src/nouveau/compiler/nak/encode_sm70.rs @@ -502,7 +502,7 @@ impl SM70Instr { ALUSrc::None, ); self.set_float_cmp_op(76..80, op.cmp_op); - self.set_bit(80, false); /* TODO: Denorm mode */ + self.set_bit(80, op.ftz); self.set_field(87..90, 0x7_u8); /* TODO: src predicate */ } @@ -529,7 +529,7 @@ impl SM70Instr { self.set_pred_set_op(74..76, op.set_op); self.set_float_cmp_op(76..80, op.cmp_op); - self.set_bit(80, false); /* TODO: Denorm mode */ + self.set_bit(80, op.ftz); self.set_pred_dst(81..84, op.dst); self.set_pred_dst(84..87, Dst::None); /* dst1 */ diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 2d3d9cda7e1..8b2e6816e12 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -679,7 +679,28 @@ impl<'a> ShaderFromNir<'a> { let tmp = b.fmul(srcs[0], frac_1_2pi.into()); b.mufu(MuFuOp::Cos, tmp.into()) } - nir_op_feq => b.fsetp(FloatCmpOp::OrdEq, srcs[0], srcs[1]), + nir_op_feq | nir_op_fge | nir_op_flt | nir_op_fneu => { + let src_type = + FloatType::from_bits(alu.get_src(0).bit_size().into()); + let cmp_op = match alu.op { + nir_op_feq => FloatCmpOp::OrdEq, + nir_op_fge => FloatCmpOp::OrdGe, + nir_op_flt => FloatCmpOp::OrdLt, + nir_op_fneu => FloatCmpOp::UnordNe, + _ => panic!("Usupported float comparison"), + }; + + let dst = b.alloc_ssa(RegFile::Pred, 1); + b.push_op(OpFSetP { + dst: dst.into(), + set_op: PredSetOp::And, + cmp_op: cmp_op, + srcs: [srcs[0], srcs[1]], + accum: SrcRef::True.into(), + ftz: self.float_ctl[src_type].ftz, + }); + dst + } nir_op_fexp2 => b.mufu(MuFuOp::Exp2, srcs[0]), nir_op_ffma => { let ftype = FloatType::from_bits(alu.def.bit_size().into()); @@ -695,18 +716,10 @@ impl<'a> ShaderFromNir<'a> { b.push_op(ffma); dst } - nir_op_fge => { - assert!(alu.get_src(0).bit_size() == 32); - b.fsetp(FloatCmpOp::OrdGe, srcs[0], srcs[1]) - } nir_op_flog2 => { assert!(alu.def.bit_size() == 32); b.mufu(MuFuOp::Log2, srcs[0]) } - nir_op_flt => { - assert!(alu.get_src(0).bit_size() == 32); - b.fsetp(FloatCmpOp::OrdLt, srcs[0], srcs[1]) - } nir_op_fmax | nir_op_fmin => { assert!(alu.def.bit_size() == 32); let dst = b.alloc_ssa(RegFile::GPR, 1); @@ -732,7 +745,6 @@ impl<'a> ShaderFromNir<'a> { b.push_op(fmul); dst } - nir_op_fneu => b.fsetp(FloatCmpOp::UnordNe, srcs[0], srcs[1]), nir_op_fquantize2f16 => { let tmp = b.alloc_ssa(RegFile::GPR, 1); b.push_op(OpF2F { diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index e3db222ddb8..1abf4de9b57 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -2296,11 +2296,18 @@ pub struct OpFSet { #[src_type(F32)] pub srcs: [Src; 2], + + pub ftz: bool, } impl DisplayOp for OpFSet { fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "fset{} {} {}", self.cmp_op, self.srcs[0], self.srcs[1]) + let ftz = if self.ftz { ".ftz" } else { "" }; + write!( + f, + "fset{}{ftz} {} {}", + self.cmp_op, self.srcs[0], self.srcs[1] + ) } } impl_display_for_op!(OpFSet); @@ -2318,11 +2325,14 @@ pub struct OpFSetP { #[src_type(Pred)] pub accum: Src, + + pub ftz: bool, } impl DisplayOp for OpFSetP { fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "fsetp{}", self.cmp_op)?; + let ftz = if self.ftz { ".ftz" } else { "" }; + write!(f, "fsetp{}{ftz}", self.cmp_op)?; if !self.set_op.is_trivial(&self.accum) { write!(f, "{}", self.set_op)?; }