mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 11:30:11 +01:00
aco: increase max_const_offset_plus_one for SMEM load_global
fossil-db (gfx1201): Totals from 1115 (1.40% of 79377) affected shaders: Instrs: 1473805 -> 1467571 (-0.42%); split: -0.43%, +0.01% CodeSize: 7852972 -> 7819656 (-0.42%); split: -0.44%, +0.02% SpillSGPRs: 1632 -> 1460 (-10.54%); split: -11.27%, +0.74% Latency: 11975762 -> 11971915 (-0.03%); split: -0.05%, +0.02% InvThroughput: 2496961 -> 2496448 (-0.02%); split: -0.03%, +0.01% VClause: 25213 -> 25218 (+0.02%); split: -0.00%, +0.02% SClause: 28822 -> 28565 (-0.89%); split: -1.41%, +0.52% Copies: 106377 -> 105715 (-0.62%); split: -1.23%, +0.61% Branches: 27497 -> 27473 (-0.09%) PreSGPRs: 52071 -> 51310 (-1.46%) VALU: 871051 -> 870694 (-0.04%); split: -0.04%, +0.00% SALU: 186090 -> 181811 (-2.30%); split: -2.32%, +0.02% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34730>
This commit is contained in:
parent
f390893a64
commit
c26851b80b
4 changed files with 16 additions and 7 deletions
|
|
@ -6673,7 +6673,9 @@ visit_load_global(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
info.resource = bld.as_uniform(info.resource);
|
||||
info.offset = Operand(bld.as_uniform(info.offset));
|
||||
info.cache = get_cache_flags(ctx, access | ACCESS_TYPE_SMEM);
|
||||
emit_load(ctx, bld, info, smem_load_params);
|
||||
EmitLoadParameters params = smem_load_params;
|
||||
params.max_const_offset_plus_one = ctx->program->dev.smem_offset_max + 1;
|
||||
emit_load(ctx, bld, info, params);
|
||||
} else {
|
||||
EmitLoadParameters params = global_load_params;
|
||||
info.cache = get_cache_flags(ctx, access);
|
||||
|
|
|
|||
|
|
@ -180,6 +180,13 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
|||
program->dev.scratch_global_offset_max = 4095;
|
||||
}
|
||||
|
||||
if (program->gfx_level >= GFX8)
|
||||
program->dev.smem_offset_max = 0xfffff;
|
||||
else if (program->gfx_level >= GFX7)
|
||||
program->dev.smem_offset_max = 0xffffffff;
|
||||
else if (program->gfx_level >= GFX6)
|
||||
program->dev.smem_offset_max = 0x3ff;
|
||||
|
||||
if (program->gfx_level >= GFX12) {
|
||||
/* Same as GFX11, except one less for VSAMPLE. */
|
||||
program->dev.max_nsa_vgprs = 3;
|
||||
|
|
|
|||
|
|
@ -2105,6 +2105,9 @@ struct DeviceInfo {
|
|||
int16_t scratch_global_offset_min;
|
||||
int16_t scratch_global_offset_max;
|
||||
unsigned max_nsa_vgprs;
|
||||
|
||||
/* Note that GFX6/7 ignore the low 2 bits and this is only for positive offsets. */
|
||||
uint32_t smem_offset_max;
|
||||
};
|
||||
|
||||
enum class CompilationProgress {
|
||||
|
|
|
|||
|
|
@ -830,14 +830,11 @@ smem_combine(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
|
||||
Temp base;
|
||||
uint32_t offset;
|
||||
if (info.is_constant_or_literal(32) &&
|
||||
((ctx.program->gfx_level == GFX6 && info.val <= 0x3FF) ||
|
||||
(ctx.program->gfx_level == GFX7 && info.val <= 0xFFFFFFFF) ||
|
||||
(ctx.program->gfx_level >= GFX8 && info.val <= 0xFFFFF))) {
|
||||
if (info.is_constant_or_literal(32) && info.val <= ctx.program->dev.smem_offset_max) {
|
||||
instr->operands[1] = Operand::c32(info.val);
|
||||
} else if (parse_base_offset(ctx, instr.get(), 1, &base, &offset, true) &&
|
||||
base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->gfx_level >= GFX9 &&
|
||||
offset % 4u == 0) {
|
||||
base.regClass() == s1 && offset <= ctx.program->dev.smem_offset_max &&
|
||||
ctx.program->gfx_level >= GFX9 && offset % 4u == 0) {
|
||||
bool soe = smem.operands.size() >= (!smem.definitions.empty() ? 3 : 4);
|
||||
if (soe) {
|
||||
if (ctx.info[smem.operands.back().tempId()].is_constant_or_literal(32) &&
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue