aco/gfx6: allow both constant and gpr offset for global with sgpr address

fossil-db (pitcairn):
Totals from 81 (0.13% of 62069) affected shaders:
MaxWaves: 332 -> 335 (+0.90%)
Instrs: 150087 -> 149737 (-0.23%); split: -0.30%, +0.06%
CodeSize: 754636 -> 752712 (-0.25%); split: -0.31%, +0.06%
SGPRs: 6128 -> 6184 (+0.91%)
VGPRs: 7220 -> 7208 (-0.17%); split: -0.28%, +0.11%
SpillSGPRs: 288 -> 287 (-0.35%)
Latency: 2199197 -> 2198338 (-0.04%); split: -0.20%, +0.17%
InvThroughput: 1613474 -> 1614303 (+0.05%); split: -0.07%, +0.12%
VClause: 2905 -> 2862 (-1.48%); split: -2.34%, +0.86%
SClause: 2366 -> 2378 (+0.51%); split: -0.17%, +0.68%
Copies: 17312 -> 17264 (-0.28%); split: -1.03%, +0.76%
PreSGPRs: 5080 -> 5004 (-1.50%)
PreVGPRs: 5656 -> 5640 (-0.28%)
VALU: 114097 -> 113831 (-0.23%); split: -0.31%, +0.07%
SALU: 16004 -> 15944 (-0.37%); split: -0.41%, +0.04%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35465>
This commit is contained in:
Rhys Perry 2025-05-30 17:52:59 +01:00 committed by Marge Bot
parent 684943bd1f
commit d7dcd81c77

View file

@ -261,6 +261,7 @@ struct LoadEmitInfo {
unsigned align_offset = 0;
pipe_format format;
nir_src* offset_src = NULL; /* should be equal to offset or NULL */
isel_context* ctx;
ac_hw_cache_flags cache = {{0, 0, 0, 0, 0}};
bool split_by_component_stride = true;
@ -315,6 +316,7 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
/* reduce constant offset */
LoadEmitInfo new_info = info;
new_info.ctx = ctx;
Operand offset = info.offset;
unsigned reduced_const_offset = const_offset;
if (const_offset > params.max_const_offset) {
@ -817,8 +819,8 @@ get_gfx6_global_rsrc(Builder& bld, Temp addr)
}
void
lower_global_address(Builder& bld, uint32_t offset_in, Temp* address_inout,
uint32_t* const_offset_inout, Temp* offset_inout)
lower_global_address(isel_context* ctx, Builder& bld, uint32_t offset_in, Temp* address_inout,
uint32_t* const_offset_inout, Temp* offset_inout, nir_src* offset_src)
{
Temp address = *address_inout;
uint64_t const_offset = *const_offset_inout + offset_in;
@ -857,9 +859,10 @@ lower_global_address(Builder& bld, uint32_t offset_in, Temp* address_inout,
if (bld.program->gfx_level == GFX6) {
/* GFX6 (MUBUF): (SGPR address, SGPR offset) or (SGPR address, VGPR offset) */
/* GFX6 (MUBUF-addr64): (VGPR address, SGPR offset) */
/* Disallow SGPR address with both a const_offset and offset because of possible overflow. */
/* Disallow SGPR address with both a const_offset and offset in case of possible overflow. */
if (offset.id() &&
(address.type() == RegType::vgpr ? offset.type() != RegType::sgpr : const_offset > 0)) {
(address.type() == RegType::vgpr ? offset.type() != RegType::sgpr
: add_might_overflow(ctx, offset_src, const_offset))) {
address = add64_32(bld, address, Operand(offset));
offset = Temp();
}
@ -898,7 +901,7 @@ global_load_callback(Builder& bld, const LoadEmitInfo& info, unsigned bytes_need
offset = Temp();
}
uint32_t const_offset = info.const_offset;
lower_global_address(bld, 0, &addr, &const_offset, &offset);
lower_global_address(info.ctx, bld, 0, &addr, &const_offset, &offset, info.offset_src);
unsigned bytes_size = 0;
bool use_mubuf = bld.program->gfx_level == GFX6;
@ -2518,7 +2521,8 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
Temp write_address = addr;
uint32_t write_const_offset = const_offset;
Temp write_offset = offset;
lower_global_address(bld, offsets[i], &write_address, &write_const_offset, &write_offset);
lower_global_address(ctx, bld, offsets[i], &write_address, &write_const_offset, &write_offset,
&instr->src[2]);
unsigned access = nir_intrinsic_access(instr) | ACCESS_TYPE_STORE;
if (write_datas[i].bytes() < 4)
@ -2613,7 +2617,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
Temp addr, offset;
uint32_t const_offset;
parse_global(ctx, instr, &addr, &const_offset, &offset);
lower_global_address(bld, 0, &addr, &const_offset, &offset);
lower_global_address(ctx, bld, 0, &addr, &const_offset, &offset, &instr->src[2]);
if (ctx->options->gfx_level >= GFX7) {
bool global = ctx->options->gfx_level >= GFX9;