nir/load_store_vectorize: fix sorting of vectors in add_to_entry_key

fossil-db (navi31):
Totals from 13 (0.02% of 79377) affected shaders:
Instrs: 2997 -> 2990 (-0.23%); split: -0.77%, +0.53%
CodeSize: 16552 -> 16504 (-0.29%); split: -0.85%, +0.56%
Latency: 75923 -> 75744 (-0.24%); split: -0.30%, +0.06%
InvThroughput: 12741 -> 12754 (+0.10%); split: -0.14%, +0.24%
PreVGPRs: 225 -> 230 (+2.22%)
VALU: 1565 -> 1569 (+0.26%); split: -0.77%, +1.02%

fossil-db (navi21):
Totals from 13 (0.02% of 79377) affected shaders:
Instrs: 2522 -> 2518 (-0.16%); split: -0.75%, +0.59%
CodeSize: 14660 -> 14620 (-0.27%); split: -0.85%, +0.57%
Latency: 77878 -> 77634 (-0.31%); split: -0.36%, +0.05%
InvThroughput: 15512 -> 15518 (+0.04%); split: -0.15%, +0.19%
Copies: 230 -> 231 (+0.43%); split: -0.87%, +1.30%
PreVGPRs: 225 -> 230 (+2.22%)
VALU: 1536 -> 1541 (+0.33%); split: -0.91%, +1.24%

fossil-db (vega10):
Totals from 13 (0.02% of 62962) affected shaders:
Instrs: 2684 -> 2674 (-0.37%); split: -0.75%, +0.37%
CodeSize: 14784 -> 14752 (-0.22%); split: -0.65%, +0.43%
Latency: 118228 -> 118215 (-0.01%); split: -0.06%, +0.05%
InvThroughput: 42893 -> 42892 (-0.00%); split: -0.11%, +0.11%
SClause: 63 -> 62 (-1.59%)
PreVGPRs: 236 -> 241 (+2.12%)
VALU: 1665 -> 1666 (+0.06%); split: -0.72%, +0.78%

fossil-db (polaris10):
Totals from 9 (0.01% of 61794) affected shaders:
Instrs: 1872 -> 1885 (+0.69%); split: -0.16%, +0.85%
CodeSize: 9980 -> 10012 (+0.32%); split: -0.20%, +0.52%
Latency: 82331 -> 82382 (+0.06%); split: -0.01%, +0.07%
InvThroughput: 30603 -> 30686 (+0.27%)
SClause: 44 -> 45 (+2.27%)
Copies: 252 -> 256 (+1.59%)
PreVGPRs: 169 -> 173 (+2.37%)
VALU: 1100 -> 1117 (+1.55%); split: -0.27%, +1.82%
SALU: 430 -> 434 (+0.93%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29242>
This commit is contained in:
Rhys Perry 2025-02-04 15:14:47 +00:00 committed by Marge Bot
parent 5fe0012670
commit 150305bbb8

View file

@ -347,6 +347,12 @@ type_scalar_size_bytes(const struct glsl_type *type)
return glsl_type_is_boolean(type) ? 4u : glsl_get_bit_size(type) / 8u;
}
static bool
cmp_scalar(nir_scalar a, nir_scalar b)
{
return a.def == b.def ? a.comp > b.comp : a.def->index > b.def->index;
}
static unsigned
add_to_entry_key(nir_scalar *offset_defs, uint64_t *offset_defs_mul,
unsigned offset_def_count, nir_scalar def, uint64_t mul)
@ -354,7 +360,7 @@ add_to_entry_key(nir_scalar *offset_defs, uint64_t *offset_defs_mul,
mul = util_mask_sign_extend(mul, def.def->bit_size);
for (unsigned i = 0; i <= offset_def_count; i++) {
if (i == offset_def_count || def.def->index > offset_defs[i].def->index) {
if (i == offset_def_count || cmp_scalar(def, offset_defs[i])) {
/* insert before i */
memmove(offset_defs + i + 1, offset_defs + i,
(offset_def_count - i) * sizeof(nir_scalar));