aco/spill: fix mixed lds+scratch spill/reload

We shouldn't increment the scratch offset while accessing LDS.

Fixes: 133ef9f94b ("aco: spill VGPRs to LDS if it doesn't further limit occupancy")
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40855>
This commit is contained in:
Georg Lehmann 2026-04-08 17:22:19 +02:00 committed by Marge Bot
parent 3ab9145393
commit 44a061a034

View file

@ -1396,18 +1396,21 @@ spill_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& inst
for (unsigned i = 0; i < temp.size(); i++)
split->definitions[i] = bld.def(v1);
bld.insert(split);
for (unsigned i = 0; i < temp.size(); i++, offset += 4, lds_offset += workgroup_size * 4) {
for (unsigned i = 0; i < temp.size(); i++) {
Temp elem = split->definitions[i].getTemp();
if (i < lds_slots) {
bld.ds(aco_opcode::ds_write_addtid_b32, bld.m0(ctx.lds_m0_zero), elem, lds_offset);
lds_offset += workgroup_size * 4;
} else if (ctx.program->gfx_level >= GFX9) {
bld.scratch(aco_opcode::scratch_store_dword, Operand(v1), ctx.scratch_rsrc, elem,
offset, memory_sync_info(storage_vgpr_spill, semantic_private));
offset += 4;
} else {
Instruction* instr = bld.mubuf(aco_opcode::buffer_store_dword, ctx.scratch_rsrc,
Operand(v1), scratch_offset, elem, offset, false);
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
instr->mubuf().cache.value = ac_swizzled;
offset += 4;
}
}
} else if (lds_slots) {
@ -1455,22 +1458,25 @@ reload_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& ins
Instruction* vec{
create_instruction(aco_opcode::p_create_vector, Format::PSEUDO, def.size(), 1)};
vec->definitions[0] = def;
for (unsigned i = 0; i < def.size(); i++, offset += 4, lds_offset += workgroup_size * 4) {
for (unsigned i = 0; i < def.size(); i++) {
Temp tmp = bld.tmp(v1);
vec->operands[i] = Operand(tmp);
if (i < lds_slots) {
bld.ds(aco_opcode::ds_read_addtid_b32, Definition(tmp), bld.m0(ctx.lds_m0_zero),
lds_offset);
lds_offset += workgroup_size * 4;
} else if (ctx.program->gfx_level >= GFX9) {
bld.scratch(aco_opcode::scratch_load_dword, Definition(tmp), Operand(v1),
ctx.scratch_rsrc, offset,
memory_sync_info(storage_vgpr_spill, semantic_private));
offset += 4;
} else {
Instruction* instr =
bld.mubuf(aco_opcode::buffer_load_dword, Definition(tmp), ctx.scratch_rsrc,
Operand(v1), scratch_offset, offset, false);
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
instr->mubuf().cache.value = ac_swizzled;
offset += 4;
}
}
bld.insert(vec);