From f001515c875d8a0c4db4feb4c1b9bcabc61e6fab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Wed, 3 Dec 2025 11:52:12 +0100 Subject: [PATCH] aco: Use only VGPR offset on buffer atomics on GFX6-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SGPR offset is not included in the bounds check according to the ISA documentation of GFX6-7 and indeed it can trigger VM faults on OOB access. Note that ACO already doesn't use the SGPR offset on GFX6-7 for buffer loads and stores. This commit just does the same for buffer atomics. This commit mitigates a ton of VM faults that are exposed by: https://github.com/KhronosGroup/VK-GL-CTS/commit/24e75fea4b0c6031445a31528f41a65062e37265 Fossil DB stats on Hawaii (GFX7): Totals from 148 (0.24% of 61818) affected shaders: Instrs: 324004 -> 327352 (+1.03%) CodeSize: 1556468 -> 1514100 (-2.72%); split: -2.74%, +0.02% Latency: 1271480 -> 1276894 (+0.43%) InvThroughput: 396850 -> 397740 (+0.22%) VClause: 6861 -> 6858 (-0.04%) Copies: 34083 -> 37430 (+9.82%) PreVGPRs: 5705 -> 5706 (+0.02%) VALU: 147529 -> 150898 (+2.28%) SALU: 98194 -> 98172 (-0.02%) Signed-off-by: Timur Kristóf Reviewed-by: Samuel Pitoiset Part-of: --- .../instruction_selection/aco_select_nir_intrinsics.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp index c54bde959b2..f62ff5010c3 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp @@ -2359,6 +2359,12 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr) Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); Temp dst = get_ssa_temp(ctx, &instr->def); + /* GFX6-7 are affected by a hw bug that prevents address clamping to + * work correctly when the SGPR offset is used. + */ + if (offset.type() == RegType::sgpr && ctx->options->gfx_level < GFX8) + offset = as_vgpr(ctx, offset); + aco_opcode op = instr->def.bit_size == 32 ? op32 : op64; aco_ptr mubuf{create_instruction(op, Format::MUBUF, 6, return_previous ? 1 : 0)}; mubuf->operands[0] = Operand(rsrc);