From 38e691fc0a693fe338c9de4ab13ab3f48bbcaf92 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Thu, 30 Apr 2026 15:22:36 +0200 Subject: [PATCH] nir/opt_varyings: do no_signed_zero linking even for non removable stores MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit E.g. position in VS. Foz-DB Navi48: Totals from 948 (0.79% of 120695) affected shaders: MaxWaves: 26816 -> 26828 (+0.04%) Instrs: 799692 -> 796993 (-0.34%); split: -0.34%, +0.01% CodeSize: 3855744 -> 3846816 (-0.23%); split: -0.24%, +0.01% VGPRs: 50256 -> 50220 (-0.07%) Latency: 2209359 -> 2207667 (-0.08%); split: -0.09%, +0.01% InvThroughput: 305260 -> 303519 (-0.57%); split: -0.57%, +0.00% VClause: 11640 -> 11643 (+0.03%); split: -0.01%, +0.03% SClause: 21152 -> 21149 (-0.01%) Copies: 51658 -> 51675 (+0.03%); split: -0.11%, +0.14% Branches: 18656 -> 18655 (-0.01%) PreVGPRs: 37999 -> 37984 (-0.04%) VALU: 469752 -> 467406 (-0.50%); split: -0.50%, +0.00% SALU: 105433 -> 105323 (-0.10%); split: -0.11%, +0.00% Reviewed-by: Marek Olšák Part-of: --- src/compiler/nir/nir_opt_varyings.c | 46 +++++++++++++---------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/src/compiler/nir/nir_opt_varyings.c b/src/compiler/nir/nir_opt_varyings.c index 9fb943802f5..db6c3400c0e 100644 --- a/src/compiler/nir/nir_opt_varyings.c +++ b/src/compiler/nir/nir_opt_varyings.c @@ -1301,9 +1301,6 @@ gather_inputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_d nir_io_semantics sem = nir_intrinsic_io_semantics(intr); - if (!can_remove_varying(linkage, sem.location)) - return false; - /* Insert the load into the list of loads for this scalar slot. */ unsigned slot = intr_get_scalar_16bit_slot(intr); struct scalar_slot *in = &linkage->slot[slot]; @@ -1319,6 +1316,9 @@ gather_inputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_d BITSET_SET(linkage->signed_zero_mask, slot + i * 8); } + if (!can_remove_varying(linkage, sem.location)) + return false; + BITSET_SET(linkage->removable_mask, slot); enum fs_vec4_type fs_vec4_type = FS_VEC4_TYPE_NONE; @@ -1549,9 +1549,6 @@ gather_outputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_ nir_io_semantics sem = nir_intrinsic_io_semantics(intr); - if (!can_remove_varying(linkage, sem.location)) - return false; - /* For "xx -> FS", treat BFCn stores as COLn to make dead varying * elimination do the right thing automatically. The rules are: * - COLn inputs can be removed only if both COLn and BFCn are not @@ -1578,25 +1575,7 @@ gather_outputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_ node->instr = intr; out->num_slots = MAX2(out->num_slots, sem.num_slots); - if (is_store) { - list_addtail(&node->head, &out->producer.stores); - - if (has_xfb(intr)) { - BITSET_SET(linkage->xfb_mask, slot); - - if (sem.no_varying && - !is_active_sysval_output(linkage, slot, intr)) { - if (intr->src[0].ssa->bit_size == 32) - BITSET_SET(linkage->xfb32_only_mask, slot); - else if (intr->src[0].ssa->bit_size == 16) - BITSET_SET(linkage->xfb16_only_mask, slot); - else - UNREACHABLE("invalid load_input type"); - } - } - } else { - list_addtail(&node->head, &out->producer.loads); - } + list_addtail(&node->head, is_store ? &out->producer.stores : &out->producer.loads); if (is_store ? (is_sz_sysval(linkage, slot, intr) || has_xfb(intr)) : !sem.no_signed_zero) { unsigned nsz_count = nir_src_is_const(offset) ? 1 : sem.num_slots; @@ -1604,8 +1583,25 @@ gather_outputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_ BITSET_SET(linkage->signed_zero_mask, slot + i * 8); } + if (!can_remove_varying(linkage, sem.location)) + return false; + BITSET_SET(linkage->removable_mask, slot); + if (is_store && has_xfb(intr)) { + BITSET_SET(linkage->xfb_mask, slot); + + if (sem.no_varying && + !is_active_sysval_output(linkage, slot, intr)) { + if (intr->src[0].ssa->bit_size == 32) + BITSET_SET(linkage->xfb32_only_mask, slot); + else if (intr->src[0].ssa->bit_size == 16) + BITSET_SET(linkage->xfb16_only_mask, slot); + else + UNREACHABLE("invalid load_input type"); + } + } + /* Indirect indexing. */ if (!nir_src_is_const(offset)) { /* Only the indirectly-indexed component is marked as indirect. */