nak: Add a copy_fs_outputs_nv intrinsic

This is just a little handle to tell the back-end where to do the copy.
Ideally, we'd have a NIR intrinsic that does the copy but we need to be
able to copy any number of registers up to 34 and NIR intrinsics just
aren't that flexible.

Reviewed-by: M Henning <drawoc@darkrefraction.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28300>
This commit is contained in:
Faith Ekstrand 2024-03-19 10:15:09 -05:00 committed by Marge Bot
parent 75861c64b8
commit 4fcbf558dd
3 changed files with 66 additions and 56 deletions

View file

@ -2151,6 +2151,8 @@ intrinsic("end_primitive_nv", dest_comp=1, src_comp=[1], indices=[STREAM_ID])
# Contains the final primitive handle and indicate the end of emission.
intrinsic("final_primitive_nv", src_comp=[1])
barrier("copy_fs_outputs_nv")
intrinsic("bar_set_nv", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
intrinsic("bar_break_nv", dest_comp=1, bit_sizes=[32], src_comp=[1])
# src[] = { bar, bar_set }

View file

@ -2097,6 +2097,62 @@ impl<'a> ShaderFromNir<'a> {
data: data,
});
}
nir_intrinsic_copy_fs_outputs_nv => {
let ShaderIoInfo::Fragment(info) = &mut self.info.io else {
panic!(
"copy_fs_outputs_nv is only allowed in fragment shaders"
);
};
for i in 0..32 {
// Assume that colors have to come a vec4 at a time
if !self.fs_out_regs[i].is_none() {
info.writes_color |= 0xf << (i & !3)
}
}
let mask_idx = (NAK_FS_OUT_SAMPLE_MASK / 4) as usize;
info.writes_sample_mask = !self.fs_out_regs[mask_idx].is_none();
let depth_idx = (NAK_FS_OUT_DEPTH / 4) as usize;
info.writes_depth = !self.fs_out_regs[depth_idx].is_none();
let mut srcs = Vec::new();
for i in 0..32 {
if info.writes_color & (1 << i) != 0 {
if self.fs_out_regs[i].is_none() {
srcs.push(0.into());
} else {
srcs.push(self.fs_out_regs[i].into());
}
}
}
// These always come together for some reason
if info.writes_sample_mask || info.writes_depth {
if info.writes_sample_mask {
srcs.push(self.fs_out_regs[mask_idx].into());
} else {
srcs.push(0.into());
}
if info.writes_depth {
// Saturate depth writes.
//
// TODO: This seems wrong in light of unrestricted depth
// but it's needed to pass CTS tests for now.
let depth = self.fs_out_regs[depth_idx];
let sat_depth = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpFAdd {
dst: sat_depth.into(),
srcs: [depth.into(), 0.into()],
saturate: true,
rnd_mode: FRndMode::NearestEven,
ftz: false,
});
srcs.push(sat_depth.into());
}
}
b.push_op(OpFSOut { srcs: srcs });
}
nir_intrinsic_demote
| nir_intrinsic_discard
| nir_intrinsic_terminate => {
@ -2842,61 +2898,6 @@ impl<'a> ShaderFromNir<'a> {
self.set_ssa(&undef.def, dst);
}
fn store_fs_outputs(&mut self, b: &mut impl SSABuilder) {
let ShaderIoInfo::Fragment(info) = &mut self.info.io else {
return;
};
for i in 0..32 {
// Assume that colors have to come a vec4 at a time
if !self.fs_out_regs[i].is_none() {
info.writes_color |= 0xf << (i & !3)
}
}
let mask_idx = (NAK_FS_OUT_SAMPLE_MASK / 4) as usize;
info.writes_sample_mask = !self.fs_out_regs[mask_idx].is_none();
let depth_idx = (NAK_FS_OUT_DEPTH / 4) as usize;
info.writes_depth = !self.fs_out_regs[depth_idx].is_none();
let mut srcs = Vec::new();
for i in 0..32 {
if info.writes_color & (1 << i) != 0 {
if self.fs_out_regs[i].is_none() {
srcs.push(0.into());
} else {
srcs.push(self.fs_out_regs[i].into());
}
}
}
// These always come together for some reason
if info.writes_sample_mask || info.writes_depth {
if info.writes_sample_mask {
srcs.push(self.fs_out_regs[mask_idx].into());
} else {
srcs.push(0.into());
}
if info.writes_depth {
// Saturate depth writes.
//
// TODO: This seems wrong in light of unrestricted depth but
// it's needed to pass CTS tests for now.
let depth = self.fs_out_regs[depth_idx];
let sat_depth = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpFAdd {
dst: sat_depth.into(),
srcs: [depth.into(), 0.into()],
saturate: true,
rnd_mode: FRndMode::NearestEven,
ftz: false,
});
srcs.push(sat_depth.into());
}
}
b.push_op(OpFSOut { srcs: srcs });
}
fn parse_block(
&mut self,
ssa_alloc: &mut SSAValueAllocator,
@ -3040,7 +3041,6 @@ impl<'a> ShaderFromNir<'a> {
assert!(succ[1].is_none());
let s0 = succ[0].unwrap();
if s0.index == self.end_block_id {
self.store_fs_outputs(&mut b);
b.push_op(OpExit {});
} else {
self.cfg.add_edge(nb.index, s0.index);

View file

@ -1073,6 +1073,14 @@ nak_nir_lower_fs_outputs(nir_shader *nir)
NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out, fs_out_size, 0);
/* We need a copy_fs_outputs_nv intrinsic so NAK knows where to place the
* final copy. This needs to be in the last block, after all store_output
* intrinsics.
*/
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
nir_builder b = nir_builder_at(nir_after_impl(impl));
nir_copy_fs_outputs_nv(&b);
return true;
}