mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 11:38:05 +02:00
nak: switch to derivative intrinsics
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30573>
This commit is contained in:
parent
185379ba81
commit
8c501a121f
2 changed files with 72 additions and 66 deletions
|
|
@ -163,6 +163,8 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options {
|
|||
op.discard_is_demote = true;
|
||||
|
||||
op.max_unroll_iterations = 32;
|
||||
op.has_ddx_intrinsics = true;
|
||||
op.scalarize_ddx = true;
|
||||
|
||||
op
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1660,72 +1660,6 @@ impl<'a> ShaderFromNir<'a> {
|
|||
b.shr(srcs[0], srcs[1], false)
|
||||
}
|
||||
}
|
||||
nir_op_fddx | nir_op_fddx_coarse | nir_op_fddx_fine => {
|
||||
// TODO: Real coarse derivatives
|
||||
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
let ftype = FloatType::F32;
|
||||
let scratch = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
||||
b.push_op(OpShfl {
|
||||
dst: scratch[0].into(),
|
||||
in_bounds: Dst::None,
|
||||
src: srcs[0],
|
||||
lane: 1_u32.into(),
|
||||
c: (0x3_u32 | 0x1c_u32 << 8).into(),
|
||||
op: ShflOp::Bfly,
|
||||
});
|
||||
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
||||
b.push_op(OpFSwzAdd {
|
||||
dst: dst[0].into(),
|
||||
srcs: [scratch[0].into(), srcs[0]],
|
||||
ops: [
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubRight,
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubRight,
|
||||
],
|
||||
rnd_mode: self.float_ctl[ftype].rnd_mode,
|
||||
ftz: self.float_ctl[ftype].ftz,
|
||||
});
|
||||
|
||||
dst
|
||||
}
|
||||
nir_op_fddy | nir_op_fddy_coarse | nir_op_fddy_fine => {
|
||||
// TODO: Real coarse derivatives
|
||||
|
||||
assert!(alu.def.bit_size() == 32);
|
||||
let ftype = FloatType::F32;
|
||||
let scratch = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
||||
b.push_op(OpShfl {
|
||||
dst: scratch[0].into(),
|
||||
in_bounds: Dst::None,
|
||||
src: srcs[0],
|
||||
lane: 2_u32.into(),
|
||||
c: (0x3_u32 | 0x1c_u32 << 8).into(),
|
||||
op: ShflOp::Bfly,
|
||||
});
|
||||
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
||||
b.push_op(OpFSwzAdd {
|
||||
dst: dst[0].into(),
|
||||
srcs: [scratch[0].into(), srcs[0]],
|
||||
ops: [
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubRight,
|
||||
FSwzAddOp::SubRight,
|
||||
],
|
||||
rnd_mode: self.float_ctl[ftype].rnd_mode,
|
||||
ftz: self.float_ctl[ftype].ftz,
|
||||
});
|
||||
|
||||
dst
|
||||
}
|
||||
_ => panic!("Unsupported ALU instruction: {}", alu.info().name()),
|
||||
};
|
||||
self.set_dst(&alu.def, dst);
|
||||
|
|
@ -2132,6 +2066,76 @@ impl<'a> ShaderFromNir<'a> {
|
|||
}
|
||||
self.set_ssa(&intrin.def, dst);
|
||||
}
|
||||
nir_intrinsic_ddx
|
||||
| nir_intrinsic_ddx_coarse
|
||||
| nir_intrinsic_ddx_fine => {
|
||||
// TODO: Real coarse derivatives
|
||||
|
||||
assert!(intrin.def.bit_size() == 32);
|
||||
let ftype = FloatType::F32;
|
||||
let scratch = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
||||
b.push_op(OpShfl {
|
||||
dst: scratch[0].into(),
|
||||
in_bounds: Dst::None,
|
||||
src: self.get_src(&srcs[0]),
|
||||
lane: 1_u32.into(),
|
||||
c: (0x3_u32 | 0x1c_u32 << 8).into(),
|
||||
op: ShflOp::Bfly,
|
||||
});
|
||||
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
||||
b.push_op(OpFSwzAdd {
|
||||
dst: dst[0].into(),
|
||||
srcs: [scratch[0].into(), self.get_src(&srcs[0])],
|
||||
ops: [
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubRight,
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubRight,
|
||||
],
|
||||
rnd_mode: self.float_ctl[ftype].rnd_mode,
|
||||
ftz: self.float_ctl[ftype].ftz,
|
||||
});
|
||||
|
||||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
nir_intrinsic_ddy
|
||||
| nir_intrinsic_ddy_coarse
|
||||
| nir_intrinsic_ddy_fine => {
|
||||
// TODO: Real coarse derivatives
|
||||
|
||||
assert!(intrin.def.bit_size() == 32);
|
||||
let ftype = FloatType::F32;
|
||||
let scratch = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
||||
b.push_op(OpShfl {
|
||||
dst: scratch[0].into(),
|
||||
in_bounds: Dst::None,
|
||||
src: self.get_src(&srcs[0]),
|
||||
lane: 2_u32.into(),
|
||||
c: (0x3_u32 | 0x1c_u32 << 8).into(),
|
||||
op: ShflOp::Bfly,
|
||||
});
|
||||
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 1);
|
||||
|
||||
b.push_op(OpFSwzAdd {
|
||||
dst: dst[0].into(),
|
||||
srcs: [scratch[0].into(), self.get_src(&srcs[0])],
|
||||
ops: [
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubLeft,
|
||||
FSwzAddOp::SubRight,
|
||||
FSwzAddOp::SubRight,
|
||||
],
|
||||
rnd_mode: self.float_ctl[ftype].rnd_mode,
|
||||
ftz: self.float_ctl[ftype].ftz,
|
||||
});
|
||||
|
||||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
nir_intrinsic_ballot => {
|
||||
assert!(srcs[0].bit_size() == 1);
|
||||
let src = self.get_src(&srcs[0]);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue