nir/opt_varyings: do no_signed_zero linking even for non removable stores
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

E.g. position in VS.

Foz-DB Navi48:
Totals from 948 (0.79% of 120695) affected shaders:
MaxWaves: 26816 -> 26828 (+0.04%)
Instrs: 799692 -> 796993 (-0.34%); split: -0.34%, +0.01%
CodeSize: 3855744 -> 3846816 (-0.23%); split: -0.24%, +0.01%
VGPRs: 50256 -> 50220 (-0.07%)
Latency: 2209359 -> 2207667 (-0.08%); split: -0.09%, +0.01%
InvThroughput: 305260 -> 303519 (-0.57%); split: -0.57%, +0.00%
VClause: 11640 -> 11643 (+0.03%); split: -0.01%, +0.03%
SClause: 21152 -> 21149 (-0.01%)
Copies: 51658 -> 51675 (+0.03%); split: -0.11%, +0.14%
Branches: 18656 -> 18655 (-0.01%)
PreVGPRs: 37999 -> 37984 (-0.04%)
VALU: 469752 -> 467406 (-0.50%); split: -0.50%, +0.00%
SALU: 105433 -> 105323 (-0.10%); split: -0.11%, +0.00%

Reviewed-by: Marek Olšák <maraeo@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41292>
This commit is contained in:
Georg Lehmann 2026-04-30 15:22:36 +02:00 committed by Marge Bot
parent fac4edbcba
commit 38e691fc0a

View file

@ -1301,9 +1301,6 @@ gather_inputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_d
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
if (!can_remove_varying(linkage, sem.location))
return false;
/* Insert the load into the list of loads for this scalar slot. */
unsigned slot = intr_get_scalar_16bit_slot(intr);
struct scalar_slot *in = &linkage->slot[slot];
@ -1319,6 +1316,9 @@ gather_inputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_d
BITSET_SET(linkage->signed_zero_mask, slot + i * 8);
}
if (!can_remove_varying(linkage, sem.location))
return false;
BITSET_SET(linkage->removable_mask, slot);
enum fs_vec4_type fs_vec4_type = FS_VEC4_TYPE_NONE;
@ -1549,9 +1549,6 @@ gather_outputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
if (!can_remove_varying(linkage, sem.location))
return false;
/* For "xx -> FS", treat BFCn stores as COLn to make dead varying
* elimination do the right thing automatically. The rules are:
* - COLn inputs can be removed only if both COLn and BFCn are not
@ -1578,25 +1575,7 @@ gather_outputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_
node->instr = intr;
out->num_slots = MAX2(out->num_slots, sem.num_slots);
if (is_store) {
list_addtail(&node->head, &out->producer.stores);
if (has_xfb(intr)) {
BITSET_SET(linkage->xfb_mask, slot);
if (sem.no_varying &&
!is_active_sysval_output(linkage, slot, intr)) {
if (intr->src[0].ssa->bit_size == 32)
BITSET_SET(linkage->xfb32_only_mask, slot);
else if (intr->src[0].ssa->bit_size == 16)
BITSET_SET(linkage->xfb16_only_mask, slot);
else
UNREACHABLE("invalid load_input type");
}
}
} else {
list_addtail(&node->head, &out->producer.loads);
}
list_addtail(&node->head, is_store ? &out->producer.stores : &out->producer.loads);
if (is_store ? (is_sz_sysval(linkage, slot, intr) || has_xfb(intr)) : !sem.no_signed_zero) {
unsigned nsz_count = nir_src_is_const(offset) ? 1 : sem.num_slots;
@ -1604,8 +1583,25 @@ gather_outputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_
BITSET_SET(linkage->signed_zero_mask, slot + i * 8);
}
if (!can_remove_varying(linkage, sem.location))
return false;
BITSET_SET(linkage->removable_mask, slot);
if (is_store && has_xfb(intr)) {
BITSET_SET(linkage->xfb_mask, slot);
if (sem.no_varying &&
!is_active_sysval_output(linkage, slot, intr)) {
if (intr->src[0].ssa->bit_size == 32)
BITSET_SET(linkage->xfb32_only_mask, slot);
else if (intr->src[0].ssa->bit_size == 16)
BITSET_SET(linkage->xfb16_only_mask, slot);
else
UNREACHABLE("invalid load_input type");
}
}
/* Indirect indexing. */
if (!nir_src_is_const(offset)) {
/* Only the indirectly-indexed component is marked as indirect. */