diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index bcd3895b3cc..bf09786e9ce 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -6673,7 +6673,9 @@ visit_load_global(isel_context* ctx, nir_intrinsic_instr* instr) info.resource = bld.as_uniform(info.resource); info.offset = Operand(bld.as_uniform(info.offset)); info.cache = get_cache_flags(ctx, access | ACCESS_TYPE_SMEM); - emit_load(ctx, bld, info, smem_load_params); + EmitLoadParameters params = smem_load_params; + params.max_const_offset_plus_one = ctx->program->dev.smem_offset_max + 1; + emit_load(ctx, bld, info, params); } else { EmitLoadParameters params = global_load_params; info.cache = get_cache_flags(ctx, access); diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index d0775439838..cda3599ff46 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -180,6 +180,13 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info, program->dev.scratch_global_offset_max = 4095; } + if (program->gfx_level >= GFX8) + program->dev.smem_offset_max = 0xfffff; + else if (program->gfx_level >= GFX7) + program->dev.smem_offset_max = 0xffffffff; + else if (program->gfx_level >= GFX6) + program->dev.smem_offset_max = 0x3ff; + if (program->gfx_level >= GFX12) { /* Same as GFX11, except one less for VSAMPLE. */ program->dev.max_nsa_vgprs = 3; diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index e8cea9c4ffa..b2579db1480 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -2105,6 +2105,9 @@ struct DeviceInfo { int16_t scratch_global_offset_min; int16_t scratch_global_offset_max; unsigned max_nsa_vgprs; + + /* Note that GFX6/7 ignore the low 2 bits and this is only for positive offsets. */ + uint32_t smem_offset_max; }; enum class CompilationProgress { diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 486b8e11bd1..e5b7b5040e7 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -830,14 +830,11 @@ smem_combine(opt_ctx& ctx, aco_ptr& instr) Temp base; uint32_t offset; - if (info.is_constant_or_literal(32) && - ((ctx.program->gfx_level == GFX6 && info.val <= 0x3FF) || - (ctx.program->gfx_level == GFX7 && info.val <= 0xFFFFFFFF) || - (ctx.program->gfx_level >= GFX8 && info.val <= 0xFFFFF))) { + if (info.is_constant_or_literal(32) && info.val <= ctx.program->dev.smem_offset_max) { instr->operands[1] = Operand::c32(info.val); } else if (parse_base_offset(ctx, instr.get(), 1, &base, &offset, true) && - base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->gfx_level >= GFX9 && - offset % 4u == 0) { + base.regClass() == s1 && offset <= ctx.program->dev.smem_offset_max && + ctx.program->gfx_level >= GFX9 && offset % 4u == 0) { bool soe = smem.operands.size() >= (!smem.definitions.empty() ? 3 : 4); if (soe) { if (ctx.info[smem.operands.back().tempId()].is_constant_or_literal(32) &&