nir/load_store_vectorize: use nir_def_num_lsb_zero in calc_alignment
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

fossil-db (gfx1201):
Totals from 20 (0.03% of 79839) affected shaders:
Instrs: 15370 -> 15251 (-0.77%)
CodeSize: 89764 -> 88952 (-0.90%)
Latency: 150295 -> 149963 (-0.22%)
InvThroughput: 210291 -> 210105 (-0.09%)
Copies: 1337 -> 1320 (-1.27%)
PreVGPRs: 589 -> 590 (+0.17%)
VALU: 7519 -> 7466 (-0.70%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36760>
This commit is contained in:
Rhys Perry 2025-08-13 14:13:20 +01:00 committed by Marge Bot
parent b03eeb12a9
commit 2d597b6919

View file

@ -581,7 +581,7 @@ aliasing_modes(nir_variable_mode modes)
}
static void
calc_alignment(struct entry *entry)
calc_alignment(struct vectorize_ctx *ctx, struct entry *entry)
{
if (entry->intrin->intrinsic == nir_intrinsic_load_buffer_amd ||
entry->intrin->intrinsic == nir_intrinsic_store_buffer_amd) {
@ -595,8 +595,12 @@ calc_alignment(struct entry *entry)
uint32_t align_mul = 31;
for (unsigned i = 0; i < entry->key->offset_def_count; i++) {
if (entry->key->offset_defs_mul[i])
align_mul = MIN2(align_mul, ffsll(entry->key->offset_defs_mul[i]));
unsigned lsb_zero = nir_def_num_lsb_zero(ctx->numlsb_ht, entry->key->offset_defs[i]);
if (lsb_zero == 64)
continue;
uint64_t stride = entry->key->offset_defs_mul[i] << lsb_zero;
if (stride)
align_mul = MIN2(align_mul, ffsll(stride));
}
entry->align_mul = 1u << (align_mul - 1);
@ -610,7 +614,7 @@ calc_alignment(struct entry *entry)
}
static struct entry *
create_entry(void *mem_ctx,
create_entry(void *mem_ctx, struct vectorize_ctx *ctx,
const struct intrinsic_info *info,
nir_intrinsic_instr *intrin)
{
@ -672,7 +676,7 @@ create_entry(void *mem_ctx,
if (get_variable_mode(entry) & restrict_modes)
entry->access |= ACCESS_RESTRICT;
calc_alignment(entry);
calc_alignment(ctx, entry);
return entry;
}
@ -1660,7 +1664,7 @@ process_block(nir_function_impl *impl, struct vectorize_ctx *ctx, nir_block *blo
unsigned mode_index = mode_to_index(mode);
/* create entry */
struct entry *entry = create_entry(ctx, info, intrin);
struct entry *entry = create_entry(ctx, ctx, info, intrin);
entry->index = next_index++;
list_addtail(&entry->head, &ctx->entries[mode_index]);
@ -1730,7 +1734,7 @@ nir_opt_load_store_vectorize(nir_shader *shader, const nir_load_store_vectorize_
static bool
opt_load_store_update_alignments_callback(struct nir_builder *b,
nir_intrinsic_instr *intrin,
UNUSED void *s)
void *s)
{
if (!nir_intrinsic_has_align_mul(intrin))
return false;
@ -1739,7 +1743,7 @@ opt_load_store_update_alignments_callback(struct nir_builder *b,
if (!info)
return false;
struct entry *entry = create_entry(NULL, info, intrin);
struct entry *entry = create_entry(NULL, s, info, intrin);
const bool progress = update_align(entry);
ralloc_free(entry);
@ -1749,10 +1753,14 @@ opt_load_store_update_alignments_callback(struct nir_builder *b,
bool
nir_opt_load_store_update_alignments(nir_shader *shader)
{
return nir_shader_intrinsics_pass(shader,
opt_load_store_update_alignments_callback,
nir_metadata_control_flow |
nir_metadata_live_defs |
nir_metadata_instr_index,
NULL);
struct vectorize_ctx ctx;
ctx.numlsb_ht = _mesa_pointer_hash_table_create(NULL);
bool progress = nir_shader_intrinsics_pass(shader,
opt_load_store_update_alignments_callback,
nir_metadata_control_flow |
nir_metadata_live_defs |
nir_metadata_instr_index,
&ctx);
ralloc_free(ctx.numlsb_ht);
return progress;
}