From 8c501a121fef46f825ba9e7c79fd7f91093e6792 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 24 Jul 2024 11:36:35 -0400 Subject: [PATCH] nak: switch to derivative intrinsics Signed-off-by: Alyssa Rosenzweig Part-of: --- src/nouveau/compiler/nak/api.rs | 2 + src/nouveau/compiler/nak/from_nir.rs | 136 ++++++++++++++------------- 2 files changed, 72 insertions(+), 66 deletions(-) diff --git a/src/nouveau/compiler/nak/api.rs b/src/nouveau/compiler/nak/api.rs index 732741ff2bb..b819801c291 100644 --- a/src/nouveau/compiler/nak/api.rs +++ b/src/nouveau/compiler/nak/api.rs @@ -163,6 +163,8 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options { op.discard_is_demote = true; op.max_unroll_iterations = 32; + op.has_ddx_intrinsics = true; + op.scalarize_ddx = true; op } diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 0adbd61772a..5316fd5da48 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -1660,72 +1660,6 @@ impl<'a> ShaderFromNir<'a> { b.shr(srcs[0], srcs[1], false) } } - nir_op_fddx | nir_op_fddx_coarse | nir_op_fddx_fine => { - // TODO: Real coarse derivatives - - assert!(alu.def.bit_size() == 32); - let ftype = FloatType::F32; - let scratch = b.alloc_ssa(RegFile::GPR, 1); - - b.push_op(OpShfl { - dst: scratch[0].into(), - in_bounds: Dst::None, - src: srcs[0], - lane: 1_u32.into(), - c: (0x3_u32 | 0x1c_u32 << 8).into(), - op: ShflOp::Bfly, - }); - - let dst = b.alloc_ssa(RegFile::GPR, 1); - - b.push_op(OpFSwzAdd { - dst: dst[0].into(), - srcs: [scratch[0].into(), srcs[0]], - ops: [ - FSwzAddOp::SubLeft, - FSwzAddOp::SubRight, - FSwzAddOp::SubLeft, - FSwzAddOp::SubRight, - ], - rnd_mode: self.float_ctl[ftype].rnd_mode, - ftz: self.float_ctl[ftype].ftz, - }); - - dst - } - nir_op_fddy | nir_op_fddy_coarse | nir_op_fddy_fine => { - // TODO: Real coarse derivatives - - assert!(alu.def.bit_size() == 32); - let ftype = FloatType::F32; - let scratch = b.alloc_ssa(RegFile::GPR, 1); - - b.push_op(OpShfl { - dst: scratch[0].into(), - in_bounds: Dst::None, - src: srcs[0], - lane: 2_u32.into(), - c: (0x3_u32 | 0x1c_u32 << 8).into(), - op: ShflOp::Bfly, - }); - - let dst = b.alloc_ssa(RegFile::GPR, 1); - - b.push_op(OpFSwzAdd { - dst: dst[0].into(), - srcs: [scratch[0].into(), srcs[0]], - ops: [ - FSwzAddOp::SubLeft, - FSwzAddOp::SubLeft, - FSwzAddOp::SubRight, - FSwzAddOp::SubRight, - ], - rnd_mode: self.float_ctl[ftype].rnd_mode, - ftz: self.float_ctl[ftype].ftz, - }); - - dst - } _ => panic!("Unsupported ALU instruction: {}", alu.info().name()), }; self.set_dst(&alu.def, dst); @@ -2132,6 +2066,76 @@ impl<'a> ShaderFromNir<'a> { } self.set_ssa(&intrin.def, dst); } + nir_intrinsic_ddx + | nir_intrinsic_ddx_coarse + | nir_intrinsic_ddx_fine => { + // TODO: Real coarse derivatives + + assert!(intrin.def.bit_size() == 32); + let ftype = FloatType::F32; + let scratch = b.alloc_ssa(RegFile::GPR, 1); + + b.push_op(OpShfl { + dst: scratch[0].into(), + in_bounds: Dst::None, + src: self.get_src(&srcs[0]), + lane: 1_u32.into(), + c: (0x3_u32 | 0x1c_u32 << 8).into(), + op: ShflOp::Bfly, + }); + + let dst = b.alloc_ssa(RegFile::GPR, 1); + + b.push_op(OpFSwzAdd { + dst: dst[0].into(), + srcs: [scratch[0].into(), self.get_src(&srcs[0])], + ops: [ + FSwzAddOp::SubLeft, + FSwzAddOp::SubRight, + FSwzAddOp::SubLeft, + FSwzAddOp::SubRight, + ], + rnd_mode: self.float_ctl[ftype].rnd_mode, + ftz: self.float_ctl[ftype].ftz, + }); + + self.set_dst(&intrin.def, dst); + } + nir_intrinsic_ddy + | nir_intrinsic_ddy_coarse + | nir_intrinsic_ddy_fine => { + // TODO: Real coarse derivatives + + assert!(intrin.def.bit_size() == 32); + let ftype = FloatType::F32; + let scratch = b.alloc_ssa(RegFile::GPR, 1); + + b.push_op(OpShfl { + dst: scratch[0].into(), + in_bounds: Dst::None, + src: self.get_src(&srcs[0]), + lane: 2_u32.into(), + c: (0x3_u32 | 0x1c_u32 << 8).into(), + op: ShflOp::Bfly, + }); + + let dst = b.alloc_ssa(RegFile::GPR, 1); + + b.push_op(OpFSwzAdd { + dst: dst[0].into(), + srcs: [scratch[0].into(), self.get_src(&srcs[0])], + ops: [ + FSwzAddOp::SubLeft, + FSwzAddOp::SubLeft, + FSwzAddOp::SubRight, + FSwzAddOp::SubRight, + ], + rnd_mode: self.float_ctl[ftype].rnd_mode, + ftz: self.float_ctl[ftype].ftz, + }); + + self.set_dst(&intrin.def, dst); + } nir_intrinsic_ballot => { assert!(srcs[0].bit_size() == 1); let src = self.get_src(&srcs[0]);