nak: switch to derivative intrinsics

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30573>
This commit is contained in:
Alyssa Rosenzweig 2024-07-24 11:36:35 -04:00 committed by Marge Bot
parent 185379ba81
commit 8c501a121f
2 changed files with 72 additions and 66 deletions

View file

@ -163,6 +163,8 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options {
op.discard_is_demote = true;
op.max_unroll_iterations = 32;
op.has_ddx_intrinsics = true;
op.scalarize_ddx = true;
op
}

View file

@ -1660,72 +1660,6 @@ impl<'a> ShaderFromNir<'a> {
b.shr(srcs[0], srcs[1], false)
}
}
nir_op_fddx | nir_op_fddx_coarse | nir_op_fddx_fine => {
// TODO: Real coarse derivatives
assert!(alu.def.bit_size() == 32);
let ftype = FloatType::F32;
let scratch = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpShfl {
dst: scratch[0].into(),
in_bounds: Dst::None,
src: srcs[0],
lane: 1_u32.into(),
c: (0x3_u32 | 0x1c_u32 << 8).into(),
op: ShflOp::Bfly,
});
let dst = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpFSwzAdd {
dst: dst[0].into(),
srcs: [scratch[0].into(), srcs[0]],
ops: [
FSwzAddOp::SubLeft,
FSwzAddOp::SubRight,
FSwzAddOp::SubLeft,
FSwzAddOp::SubRight,
],
rnd_mode: self.float_ctl[ftype].rnd_mode,
ftz: self.float_ctl[ftype].ftz,
});
dst
}
nir_op_fddy | nir_op_fddy_coarse | nir_op_fddy_fine => {
// TODO: Real coarse derivatives
assert!(alu.def.bit_size() == 32);
let ftype = FloatType::F32;
let scratch = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpShfl {
dst: scratch[0].into(),
in_bounds: Dst::None,
src: srcs[0],
lane: 2_u32.into(),
c: (0x3_u32 | 0x1c_u32 << 8).into(),
op: ShflOp::Bfly,
});
let dst = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpFSwzAdd {
dst: dst[0].into(),
srcs: [scratch[0].into(), srcs[0]],
ops: [
FSwzAddOp::SubLeft,
FSwzAddOp::SubLeft,
FSwzAddOp::SubRight,
FSwzAddOp::SubRight,
],
rnd_mode: self.float_ctl[ftype].rnd_mode,
ftz: self.float_ctl[ftype].ftz,
});
dst
}
_ => panic!("Unsupported ALU instruction: {}", alu.info().name()),
};
self.set_dst(&alu.def, dst);
@ -2132,6 +2066,76 @@ impl<'a> ShaderFromNir<'a> {
}
self.set_ssa(&intrin.def, dst);
}
nir_intrinsic_ddx
| nir_intrinsic_ddx_coarse
| nir_intrinsic_ddx_fine => {
// TODO: Real coarse derivatives
assert!(intrin.def.bit_size() == 32);
let ftype = FloatType::F32;
let scratch = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpShfl {
dst: scratch[0].into(),
in_bounds: Dst::None,
src: self.get_src(&srcs[0]),
lane: 1_u32.into(),
c: (0x3_u32 | 0x1c_u32 << 8).into(),
op: ShflOp::Bfly,
});
let dst = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpFSwzAdd {
dst: dst[0].into(),
srcs: [scratch[0].into(), self.get_src(&srcs[0])],
ops: [
FSwzAddOp::SubLeft,
FSwzAddOp::SubRight,
FSwzAddOp::SubLeft,
FSwzAddOp::SubRight,
],
rnd_mode: self.float_ctl[ftype].rnd_mode,
ftz: self.float_ctl[ftype].ftz,
});
self.set_dst(&intrin.def, dst);
}
nir_intrinsic_ddy
| nir_intrinsic_ddy_coarse
| nir_intrinsic_ddy_fine => {
// TODO: Real coarse derivatives
assert!(intrin.def.bit_size() == 32);
let ftype = FloatType::F32;
let scratch = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpShfl {
dst: scratch[0].into(),
in_bounds: Dst::None,
src: self.get_src(&srcs[0]),
lane: 2_u32.into(),
c: (0x3_u32 | 0x1c_u32 << 8).into(),
op: ShflOp::Bfly,
});
let dst = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpFSwzAdd {
dst: dst[0].into(),
srcs: [scratch[0].into(), self.get_src(&srcs[0])],
ops: [
FSwzAddOp::SubLeft,
FSwzAddOp::SubLeft,
FSwzAddOp::SubRight,
FSwzAddOp::SubRight,
],
rnd_mode: self.float_ctl[ftype].rnd_mode,
ftz: self.float_ctl[ftype].ftz,
});
self.set_dst(&intrin.def, dst);
}
nir_intrinsic_ballot => {
assert!(srcs[0].bit_size() == 1);
let src = self.get_src(&srcs[0]);