mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 17:50:11 +01:00
nak: Add a copy_fs_outputs_nv intrinsic
This is just a little handle to tell the back-end where to do the copy. Ideally, we'd have a NIR intrinsic that does the copy but we need to be able to copy any number of registers up to 34 and NIR intrinsics just aren't that flexible. Reviewed-by: M Henning <drawoc@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28300>
This commit is contained in:
parent
75861c64b8
commit
4fcbf558dd
3 changed files with 66 additions and 56 deletions
|
|
@ -2151,6 +2151,8 @@ intrinsic("end_primitive_nv", dest_comp=1, src_comp=[1], indices=[STREAM_ID])
|
|||
# Contains the final primitive handle and indicate the end of emission.
|
||||
intrinsic("final_primitive_nv", src_comp=[1])
|
||||
|
||||
barrier("copy_fs_outputs_nv")
|
||||
|
||||
intrinsic("bar_set_nv", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
|
||||
intrinsic("bar_break_nv", dest_comp=1, bit_sizes=[32], src_comp=[1])
|
||||
# src[] = { bar, bar_set }
|
||||
|
|
|
|||
|
|
@ -2097,6 +2097,62 @@ impl<'a> ShaderFromNir<'a> {
|
|||
data: data,
|
||||
});
|
||||
}
|
||||
nir_intrinsic_copy_fs_outputs_nv => {
|
||||
let ShaderIoInfo::Fragment(info) = &mut self.info.io else {
|
||||
panic!(
|
||||
"copy_fs_outputs_nv is only allowed in fragment shaders"
|
||||
);
|
||||
};
|
||||
|
||||
for i in 0..32 {
|
||||
// Assume that colors have to come a vec4 at a time
|
||||
if !self.fs_out_regs[i].is_none() {
|
||||
info.writes_color |= 0xf << (i & !3)
|
||||
}
|
||||
}
|
||||
let mask_idx = (NAK_FS_OUT_SAMPLE_MASK / 4) as usize;
|
||||
info.writes_sample_mask = !self.fs_out_regs[mask_idx].is_none();
|
||||
let depth_idx = (NAK_FS_OUT_DEPTH / 4) as usize;
|
||||
info.writes_depth = !self.fs_out_regs[depth_idx].is_none();
|
||||
|
||||
let mut srcs = Vec::new();
|
||||
for i in 0..32 {
|
||||
if info.writes_color & (1 << i) != 0 {
|
||||
if self.fs_out_regs[i].is_none() {
|
||||
srcs.push(0.into());
|
||||
} else {
|
||||
srcs.push(self.fs_out_regs[i].into());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// These always come together for some reason
|
||||
if info.writes_sample_mask || info.writes_depth {
|
||||
if info.writes_sample_mask {
|
||||
srcs.push(self.fs_out_regs[mask_idx].into());
|
||||
} else {
|
||||
srcs.push(0.into());
|
||||
}
|
||||
if info.writes_depth {
|
||||
// Saturate depth writes.
|
||||
//
|
||||
// TODO: This seems wrong in light of unrestricted depth
|
||||
// but it's needed to pass CTS tests for now.
|
||||
let depth = self.fs_out_regs[depth_idx];
|
||||
let sat_depth = b.alloc_ssa(RegFile::GPR, 1);
|
||||
b.push_op(OpFAdd {
|
||||
dst: sat_depth.into(),
|
||||
srcs: [depth.into(), 0.into()],
|
||||
saturate: true,
|
||||
rnd_mode: FRndMode::NearestEven,
|
||||
ftz: false,
|
||||
});
|
||||
srcs.push(sat_depth.into());
|
||||
}
|
||||
}
|
||||
|
||||
b.push_op(OpFSOut { srcs: srcs });
|
||||
}
|
||||
nir_intrinsic_demote
|
||||
| nir_intrinsic_discard
|
||||
| nir_intrinsic_terminate => {
|
||||
|
|
@ -2842,61 +2898,6 @@ impl<'a> ShaderFromNir<'a> {
|
|||
self.set_ssa(&undef.def, dst);
|
||||
}
|
||||
|
||||
fn store_fs_outputs(&mut self, b: &mut impl SSABuilder) {
|
||||
let ShaderIoInfo::Fragment(info) = &mut self.info.io else {
|
||||
return;
|
||||
};
|
||||
|
||||
for i in 0..32 {
|
||||
// Assume that colors have to come a vec4 at a time
|
||||
if !self.fs_out_regs[i].is_none() {
|
||||
info.writes_color |= 0xf << (i & !3)
|
||||
}
|
||||
}
|
||||
let mask_idx = (NAK_FS_OUT_SAMPLE_MASK / 4) as usize;
|
||||
info.writes_sample_mask = !self.fs_out_regs[mask_idx].is_none();
|
||||
let depth_idx = (NAK_FS_OUT_DEPTH / 4) as usize;
|
||||
info.writes_depth = !self.fs_out_regs[depth_idx].is_none();
|
||||
|
||||
let mut srcs = Vec::new();
|
||||
for i in 0..32 {
|
||||
if info.writes_color & (1 << i) != 0 {
|
||||
if self.fs_out_regs[i].is_none() {
|
||||
srcs.push(0.into());
|
||||
} else {
|
||||
srcs.push(self.fs_out_regs[i].into());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// These always come together for some reason
|
||||
if info.writes_sample_mask || info.writes_depth {
|
||||
if info.writes_sample_mask {
|
||||
srcs.push(self.fs_out_regs[mask_idx].into());
|
||||
} else {
|
||||
srcs.push(0.into());
|
||||
}
|
||||
if info.writes_depth {
|
||||
// Saturate depth writes.
|
||||
//
|
||||
// TODO: This seems wrong in light of unrestricted depth but
|
||||
// it's needed to pass CTS tests for now.
|
||||
let depth = self.fs_out_regs[depth_idx];
|
||||
let sat_depth = b.alloc_ssa(RegFile::GPR, 1);
|
||||
b.push_op(OpFAdd {
|
||||
dst: sat_depth.into(),
|
||||
srcs: [depth.into(), 0.into()],
|
||||
saturate: true,
|
||||
rnd_mode: FRndMode::NearestEven,
|
||||
ftz: false,
|
||||
});
|
||||
srcs.push(sat_depth.into());
|
||||
}
|
||||
}
|
||||
|
||||
b.push_op(OpFSOut { srcs: srcs });
|
||||
}
|
||||
|
||||
fn parse_block(
|
||||
&mut self,
|
||||
ssa_alloc: &mut SSAValueAllocator,
|
||||
|
|
@ -3040,7 +3041,6 @@ impl<'a> ShaderFromNir<'a> {
|
|||
assert!(succ[1].is_none());
|
||||
let s0 = succ[0].unwrap();
|
||||
if s0.index == self.end_block_id {
|
||||
self.store_fs_outputs(&mut b);
|
||||
b.push_op(OpExit {});
|
||||
} else {
|
||||
self.cfg.add_edge(nb.index, s0.index);
|
||||
|
|
|
|||
|
|
@ -1073,6 +1073,14 @@ nak_nir_lower_fs_outputs(nir_shader *nir)
|
|||
|
||||
NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out, fs_out_size, 0);
|
||||
|
||||
/* We need a copy_fs_outputs_nv intrinsic so NAK knows where to place the
|
||||
* final copy. This needs to be in the last block, after all store_output
|
||||
* intrinsics.
|
||||
*/
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
nir_builder b = nir_builder_at(nir_after_impl(impl));
|
||||
nir_copy_fs_outputs_nv(&b);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue