From 150305bbb8bd538f0b61369460bec2bd0bd0784d Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 4 Feb 2025 15:14:47 +0000 Subject: [PATCH] nir/load_store_vectorize: fix sorting of vectors in add_to_entry_key fossil-db (navi31): Totals from 13 (0.02% of 79377) affected shaders: Instrs: 2997 -> 2990 (-0.23%); split: -0.77%, +0.53% CodeSize: 16552 -> 16504 (-0.29%); split: -0.85%, +0.56% Latency: 75923 -> 75744 (-0.24%); split: -0.30%, +0.06% InvThroughput: 12741 -> 12754 (+0.10%); split: -0.14%, +0.24% PreVGPRs: 225 -> 230 (+2.22%) VALU: 1565 -> 1569 (+0.26%); split: -0.77%, +1.02% fossil-db (navi21): Totals from 13 (0.02% of 79377) affected shaders: Instrs: 2522 -> 2518 (-0.16%); split: -0.75%, +0.59% CodeSize: 14660 -> 14620 (-0.27%); split: -0.85%, +0.57% Latency: 77878 -> 77634 (-0.31%); split: -0.36%, +0.05% InvThroughput: 15512 -> 15518 (+0.04%); split: -0.15%, +0.19% Copies: 230 -> 231 (+0.43%); split: -0.87%, +1.30% PreVGPRs: 225 -> 230 (+2.22%) VALU: 1536 -> 1541 (+0.33%); split: -0.91%, +1.24% fossil-db (vega10): Totals from 13 (0.02% of 62962) affected shaders: Instrs: 2684 -> 2674 (-0.37%); split: -0.75%, +0.37% CodeSize: 14784 -> 14752 (-0.22%); split: -0.65%, +0.43% Latency: 118228 -> 118215 (-0.01%); split: -0.06%, +0.05% InvThroughput: 42893 -> 42892 (-0.00%); split: -0.11%, +0.11% SClause: 63 -> 62 (-1.59%) PreVGPRs: 236 -> 241 (+2.12%) VALU: 1665 -> 1666 (+0.06%); split: -0.72%, +0.78% fossil-db (polaris10): Totals from 9 (0.01% of 61794) affected shaders: Instrs: 1872 -> 1885 (+0.69%); split: -0.16%, +0.85% CodeSize: 9980 -> 10012 (+0.32%); split: -0.20%, +0.52% Latency: 82331 -> 82382 (+0.06%); split: -0.01%, +0.07% InvThroughput: 30603 -> 30686 (+0.27%) SClause: 44 -> 45 (+2.27%) Copies: 252 -> 256 (+1.59%) PreVGPRs: 169 -> 173 (+2.37%) VALU: 1100 -> 1117 (+1.55%); split: -0.27%, +1.82% SALU: 430 -> 434 (+0.93%) Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Part-of: --- src/compiler/nir/nir_opt_load_store_vectorize.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_opt_load_store_vectorize.c b/src/compiler/nir/nir_opt_load_store_vectorize.c index f1a6cd2aea5..b357fbe994b 100644 --- a/src/compiler/nir/nir_opt_load_store_vectorize.c +++ b/src/compiler/nir/nir_opt_load_store_vectorize.c @@ -347,6 +347,12 @@ type_scalar_size_bytes(const struct glsl_type *type) return glsl_type_is_boolean(type) ? 4u : glsl_get_bit_size(type) / 8u; } +static bool +cmp_scalar(nir_scalar a, nir_scalar b) +{ + return a.def == b.def ? a.comp > b.comp : a.def->index > b.def->index; +} + static unsigned add_to_entry_key(nir_scalar *offset_defs, uint64_t *offset_defs_mul, unsigned offset_def_count, nir_scalar def, uint64_t mul) @@ -354,7 +360,7 @@ add_to_entry_key(nir_scalar *offset_defs, uint64_t *offset_defs_mul, mul = util_mask_sign_extend(mul, def.def->bit_size); for (unsigned i = 0; i <= offset_def_count; i++) { - if (i == offset_def_count || def.def->index > offset_defs[i].def->index) { + if (i == offset_def_count || cmp_scalar(def, offset_defs[i])) { /* insert before i */ memmove(offset_defs + i + 1, offset_defs + i, (offset_def_count - i) * sizeof(nir_scalar));