diff --git a/src/amd/compiler/README-ISA.md b/src/amd/compiler/README-ISA.md index cb9d8da0298..c692fdf7e16 100644 --- a/src/amd/compiler/README-ISA.md +++ b/src/amd/compiler/README-ISA.md @@ -160,6 +160,12 @@ finish and then write to vcc (for example, `s_mov_b64 vcc, vcc`) to correct vccz Currently, we don't do this. +## SGPR offset on MUBUF prevents addr clamping on SI/CI + +[See this LLVM source.](https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp#L1917-L1922) + +This leads to wrong bounds checking, using a VGPR offset fixes it. + ## GCN / GFX6 hazards ### VINTRP followed by a read with `v_readfirstlane` or `v_readlane` diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index a97608a3792..bc9c11f96f1 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5066,6 +5066,13 @@ void load_buffer(isel_context *ctx, unsigned num_components, unsigned component_ bool use_smem = dst.type() != RegType::vgpr && (!glc || ctx->options->chip_class >= GFX8) && allow_smem; if (use_smem) offset = bld.as_uniform(offset); + else { + /* GFX6-7 are affected by a hw bug that prevents address clamping to + * work correctly when the SGPR offset is used. + */ + if (offset.type() == RegType::sgpr && ctx->options->chip_class < GFX8) + offset = as_vgpr(ctx, offset); + } LoadEmitInfo info = {Operand(offset), dst, num_components, component_size, rsrc}; info.glc = glc; @@ -6316,6 +6323,12 @@ void visit_store_ssbo(isel_context *ctx, nir_intrinsic_instr *instr) split_buffer_store(ctx, instr, false, RegType::vgpr, data, writemask, 16, &write_count, write_datas, offsets); + /* GFX6-7 are affected by a hw bug that prevents address clamping to work + * correctly when the SGPR offset is used. + */ + if (offset.type() == RegType::sgpr && ctx->options->chip_class < GFX8) + offset = as_vgpr(ctx, offset); + for (unsigned i = 0; i < write_count; i++) { aco_opcode op = get_buffer_store_op(write_datas[i].bytes());