nir/load_store_vectorize: use nir_def_num_lsb_zero in check_for_robustness

fossil-db (gfx1201):
Totals from 499 (0.63% of 79839) affected shaders:
MaxWaves: 14276 -> 14234 (-0.29%)
Instrs: 520883 -> 508159 (-2.44%); split: -2.45%, +0.01%
CodeSize: 2831220 -> 2731080 (-3.54%); split: -3.54%, +0.00%
VGPRs: 27156 -> 27348 (+0.71%)
SpillSGPRs: 360 -> 390 (+8.33%)
Latency: 4473898 -> 4414552 (-1.33%); split: -1.54%, +0.21%
InvThroughput: 494468 -> 493508 (-0.19%); split: -0.62%, +0.43%
VClause: 14211 -> 14060 (-1.06%); split: -1.16%, +0.10%
SClause: 14653 -> 14354 (-2.04%); split: -2.39%, +0.35%
Copies: 36772 -> 37056 (+0.77%); split: -0.65%, +1.42%
Branches: 11502 -> 11486 (-0.14%)
PreSGPRs: 22605 -> 22848 (+1.07%); split: -0.39%, +1.47%
PreVGPRs: 20571 -> 20833 (+1.27%)
VALU: 242982 -> 243151 (+0.07%); split: -0.08%, +0.14%
SALU: 91332 -> 88069 (-3.57%); split: -3.71%, +0.14%
VMEM: 32275 -> 29137 (-9.72%)
SMEM: 26239 -> 22400 (-14.63%)
VOPD: 345 -> 330 (-4.35%)
SClause: 14646 -> 14347 (-2.04%); split: -2.39%, +0.35%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36760>
This commit is contained in:
Rhys Perry 2025-08-12 15:30:21 +01:00 committed by Marge Bot
parent 46da666205
commit b03eeb12a9

View file

@ -46,6 +46,7 @@
#include "nir.h"
#include "nir_builder.h"
#include "nir_deref.h"
#include "nir_range_analysis.h"
#include "nir_worklist.h"
#include <stdlib.h>
@ -172,6 +173,7 @@ struct entry {
struct vectorize_ctx {
nir_shader *shader;
const nir_load_store_vectorize_options *options;
struct hash_table *numlsb_ht;
struct list_head entries[nir_num_variable_modes];
struct hash_table *loads[nir_num_variable_modes];
struct hash_table *stores[nir_num_variable_modes];
@ -1250,8 +1252,11 @@ check_for_robustness(struct vectorize_ctx *ctx, struct entry *low, uint64_t high
* are not guaranteed to be power-of-2.
*/
uint64_t stride = 0;
for (unsigned i = 0; i < low->key->offset_def_count; i++)
stride = calc_gcd(low->key->offset_defs_mul[i], stride);
for (unsigned i = 0; i < low->key->offset_def_count; i++) {
unsigned lsb_zero = nir_def_num_lsb_zero(ctx->numlsb_ht, low->key->offset_defs[i]);
if (lsb_zero != 64)
stride = calc_gcd(low->key->offset_defs_mul[i] << lsb_zero, stride);
}
unsigned addition_bits = low->intrin->src[low->info->base_src].ssa->bit_size;
/* low's offset must be a multiple of "stride" plus "low->offset". */
@ -1702,6 +1707,7 @@ nir_opt_load_store_vectorize(nir_shader *shader, const nir_load_store_vectorize_
struct vectorize_ctx *ctx = rzalloc(NULL, struct vectorize_ctx);
ctx->shader = shader;
ctx->numlsb_ht = _mesa_pointer_hash_table_create(ctx);
ctx->options = options;
nir_shader_index_vars(shader, options->modes);