nir/opt_load_store_vectorize: don't add negative offsets to load/store_shared2_amd

By hoisting the low address instead, we can make use of these instructions on GFX6.

Totals from 3 (0.00% of 79839) affected shaders: (Navi48)

Instrs: 3768 -> 3776 (+0.21%); split: -0.03%, +0.24%
CodeSize: 20024 -> 20048 (+0.12%); split: -0.04%, +0.16%
Latency: 16093 -> 16198 (+0.65%)
InvThroughput: 3868 -> 3864 (-0.10%)
VClause: 97 -> 93 (-4.12%)
VALU: 2333 -> 2331 (-0.09%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37682>
This commit is contained in:
Daniel Schürmann 2025-10-02 17:32:35 +02:00 committed by Marge Bot
parent 688718be8b
commit 9abbcbc00e

View file

@ -1536,14 +1536,14 @@ try_vectorize_shared2(struct vectorize_ctx *ctx,
return false;
}
/* vectorize the accesses */
nir_builder b = nir_builder_at(nir_after_instr(first->is_store ? second->instr : first->instr));
nir_def *offset = first->intrin->src[first->is_store].ssa;
offset = nir_iadd_imm(&b, offset, nir_intrinsic_base(first->intrin));
/* Take low as base address. */
nir_def *offset = low->intrin->src[first->is_store].ssa;
if (first != low)
offset = nir_iadd_imm(&b, offset, -(int)diff);
hoist_base_addr(&first->intrin->instr, offset->parent_instr);
nir_builder b = nir_builder_at(nir_after_instr(first->is_store ? second->instr : first->instr));
offset = nir_iadd_imm(&b, offset, nir_intrinsic_base(low->intrin));
/* vectorize the accesses */
uint32_t access = nir_intrinsic_access(first->intrin);
if (first->is_store) {
nir_def *low_val = low->intrin->src[low->info->value_src].ssa;