ac/nir: clear unused components before storing XFB outputs to LDS

Shader variables don't always exactly match intrinsics and they might
contain unused slots.

Fixes a bunch of regressions with RADV_PERFTEST=ngg_streamout on RDNA2,
and also fixes RDNA3 NGG streamout.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8099
Fixes: cd22bf90e7 ("ac/nir/ngg: refine nogs outputs handling")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20735>
This commit is contained in:
Samuel Pitoiset 2023-01-16 10:13:12 +01:00 committed by Marge Bot
parent e39bf3e6aa
commit 84241b1f75

View file

@ -1684,6 +1684,13 @@ ngg_nogs_store_xfb_outputs_to_lds(nir_builder *b, lower_ngg_nogs_state *s)
util_bitcount64(b->shader->info.outputs_written & BITFIELD64_MASK(slot));
unsigned mask = xfb_mask[slot];
/* Clear unused components. */
for (unsigned i = 0; i < 4; i++) {
if (!s->outputs[slot][i])
mask &= ~BITFIELD_BIT(i);
}
while (mask) {
int start, count;
u_bit_scan_consecutive_range(&mask, &start, &count);
@ -1706,6 +1713,14 @@ ngg_nogs_store_xfb_outputs_to_lds(nir_builder *b, lower_ngg_nogs_state *s)
unsigned mask_lo = xfb_mask_16bit_lo[slot];
unsigned mask_hi = xfb_mask_16bit_hi[slot];
/* Clear unused components. */
for (unsigned i = 0; i < 4; i++) {
if (!s->outputs_16bit_lo[slot][i])
mask_lo &= ~BITFIELD_BIT(i);
if (!s->outputs_16bit_hi[slot][i])
mask_hi &= ~BITFIELD_BIT(i);
}
nir_ssa_def **outputs_lo = s->outputs_16bit_lo[slot];
nir_ssa_def **outputs_hi = s->outputs_16bit_hi[slot];
nir_ssa_def *undef = nir_ssa_undef(b, 1, 16);