aco: increase max_const_offset_plus_one for SMEM load_global

fossil-db (gfx1201):
Totals from 1115 (1.40% of 79377) affected shaders:
Instrs: 1473805 -> 1467571 (-0.42%); split: -0.43%, +0.01%
CodeSize: 7852972 -> 7819656 (-0.42%); split: -0.44%, +0.02%
SpillSGPRs: 1632 -> 1460 (-10.54%); split: -11.27%, +0.74%
Latency: 11975762 -> 11971915 (-0.03%); split: -0.05%, +0.02%
InvThroughput: 2496961 -> 2496448 (-0.02%); split: -0.03%, +0.01%
VClause: 25213 -> 25218 (+0.02%); split: -0.00%, +0.02%
SClause: 28822 -> 28565 (-0.89%); split: -1.41%, +0.52%
Copies: 106377 -> 105715 (-0.62%); split: -1.23%, +0.61%
Branches: 27497 -> 27473 (-0.09%)
PreSGPRs: 52071 -> 51310 (-1.46%)
VALU: 871051 -> 870694 (-0.04%); split: -0.04%, +0.00%
SALU: 186090 -> 181811 (-2.30%); split: -2.32%, +0.02%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34730>
This commit is contained in:
Rhys Perry 2025-04-23 16:41:53 +01:00 committed by Marge Bot
parent f390893a64
commit c26851b80b
4 changed files with 16 additions and 7 deletions

View file

@ -6673,7 +6673,9 @@ visit_load_global(isel_context* ctx, nir_intrinsic_instr* instr)
info.resource = bld.as_uniform(info.resource); info.resource = bld.as_uniform(info.resource);
info.offset = Operand(bld.as_uniform(info.offset)); info.offset = Operand(bld.as_uniform(info.offset));
info.cache = get_cache_flags(ctx, access | ACCESS_TYPE_SMEM); info.cache = get_cache_flags(ctx, access | ACCESS_TYPE_SMEM);
emit_load(ctx, bld, info, smem_load_params); EmitLoadParameters params = smem_load_params;
params.max_const_offset_plus_one = ctx->program->dev.smem_offset_max + 1;
emit_load(ctx, bld, info, params);
} else { } else {
EmitLoadParameters params = global_load_params; EmitLoadParameters params = global_load_params;
info.cache = get_cache_flags(ctx, access); info.cache = get_cache_flags(ctx, access);

View file

@ -180,6 +180,13 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
program->dev.scratch_global_offset_max = 4095; program->dev.scratch_global_offset_max = 4095;
} }
if (program->gfx_level >= GFX8)
program->dev.smem_offset_max = 0xfffff;
else if (program->gfx_level >= GFX7)
program->dev.smem_offset_max = 0xffffffff;
else if (program->gfx_level >= GFX6)
program->dev.smem_offset_max = 0x3ff;
if (program->gfx_level >= GFX12) { if (program->gfx_level >= GFX12) {
/* Same as GFX11, except one less for VSAMPLE. */ /* Same as GFX11, except one less for VSAMPLE. */
program->dev.max_nsa_vgprs = 3; program->dev.max_nsa_vgprs = 3;

View file

@ -2105,6 +2105,9 @@ struct DeviceInfo {
int16_t scratch_global_offset_min; int16_t scratch_global_offset_min;
int16_t scratch_global_offset_max; int16_t scratch_global_offset_max;
unsigned max_nsa_vgprs; unsigned max_nsa_vgprs;
/* Note that GFX6/7 ignore the low 2 bits and this is only for positive offsets. */
uint32_t smem_offset_max;
}; };
enum class CompilationProgress { enum class CompilationProgress {

View file

@ -830,14 +830,11 @@ smem_combine(opt_ctx& ctx, aco_ptr<Instruction>& instr)
Temp base; Temp base;
uint32_t offset; uint32_t offset;
if (info.is_constant_or_literal(32) && if (info.is_constant_or_literal(32) && info.val <= ctx.program->dev.smem_offset_max) {
((ctx.program->gfx_level == GFX6 && info.val <= 0x3FF) ||
(ctx.program->gfx_level == GFX7 && info.val <= 0xFFFFFFFF) ||
(ctx.program->gfx_level >= GFX8 && info.val <= 0xFFFFF))) {
instr->operands[1] = Operand::c32(info.val); instr->operands[1] = Operand::c32(info.val);
} else if (parse_base_offset(ctx, instr.get(), 1, &base, &offset, true) && } else if (parse_base_offset(ctx, instr.get(), 1, &base, &offset, true) &&
base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->gfx_level >= GFX9 && base.regClass() == s1 && offset <= ctx.program->dev.smem_offset_max &&
offset % 4u == 0) { ctx.program->gfx_level >= GFX9 && offset % 4u == 0) {
bool soe = smem.operands.size() >= (!smem.definitions.empty() ? 3 : 4); bool soe = smem.operands.size() >= (!smem.definitions.empty() ? 3 : 4);
if (soe) { if (soe) {
if (ctx.info[smem.operands.back().tempId()].is_constant_or_literal(32) && if (ctx.info[smem.operands.back().tempId()].is_constant_or_literal(32) &&