From 84241b1f75aa34466d0939f656bcc5aa0658c0a3 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 16 Jan 2023 10:13:12 +0100 Subject: [PATCH] ac/nir: clear unused components before storing XFB outputs to LDS Shader variables don't always exactly match intrinsics and they might contain unused slots. Fixes a bunch of regressions with RADV_PERFTEST=ngg_streamout on RDNA2, and also fixes RDNA3 NGG streamout. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8099 Fixes: cd22bf90e79 ("ac/nir/ngg: refine nogs outputs handling") Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_nir_lower_ngg.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index 5fe8c429484..4a0f3bc5df1 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -1684,6 +1684,13 @@ ngg_nogs_store_xfb_outputs_to_lds(nir_builder *b, lower_ngg_nogs_state *s) util_bitcount64(b->shader->info.outputs_written & BITFIELD64_MASK(slot)); unsigned mask = xfb_mask[slot]; + + /* Clear unused components. */ + for (unsigned i = 0; i < 4; i++) { + if (!s->outputs[slot][i]) + mask &= ~BITFIELD_BIT(i); + } + while (mask) { int start, count; u_bit_scan_consecutive_range(&mask, &start, &count); @@ -1706,6 +1713,14 @@ ngg_nogs_store_xfb_outputs_to_lds(nir_builder *b, lower_ngg_nogs_state *s) unsigned mask_lo = xfb_mask_16bit_lo[slot]; unsigned mask_hi = xfb_mask_16bit_hi[slot]; + /* Clear unused components. */ + for (unsigned i = 0; i < 4; i++) { + if (!s->outputs_16bit_lo[slot][i]) + mask_lo &= ~BITFIELD_BIT(i); + if (!s->outputs_16bit_hi[slot][i]) + mask_hi &= ~BITFIELD_BIT(i); + } + nir_ssa_def **outputs_lo = s->outputs_16bit_lo[slot]; nir_ssa_def **outputs_hi = s->outputs_16bit_hi[slot]; nir_ssa_def *undef = nir_ssa_undef(b, 1, 16);