mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
aco/spill: Make sure that offset stays in bounds
If a shader spills a lot, the offset can be above the HW limit.
cc: mesa-stable
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24652>
(cherry picked from commit 24555cdbbd)
This commit is contained in:
parent
15da4bc4de
commit
02e77eeb2e
2 changed files with 83 additions and 54 deletions
|
|
@ -2722,7 +2722,7 @@
|
|||
"description": "aco/spill: Make sure that offset stays in bounds",
|
||||
"nominated": true,
|
||||
"nomination_type": 0,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1404,39 +1404,28 @@ spill_block(spill_ctx& ctx, unsigned block_idx)
|
|||
}
|
||||
|
||||
Temp
|
||||
load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Block& block,
|
||||
std::vector<aco_ptr<Instruction>>& instructions, unsigned offset)
|
||||
load_scratch_resource(spill_ctx& ctx, Builder& bld, bool apply_scratch_offset)
|
||||
{
|
||||
Builder bld(ctx.program);
|
||||
if (block.kind & block_kind_top_level) {
|
||||
bld.reset(&instructions);
|
||||
} else {
|
||||
for (int block_idx = block.index; block_idx >= 0; block_idx--) {
|
||||
if (!(ctx.program->blocks[block_idx].kind & block_kind_top_level))
|
||||
continue;
|
||||
|
||||
/* find p_logical_end */
|
||||
std::vector<aco_ptr<Instruction>>& prev_instructions = ctx.program->blocks[block_idx].instructions;
|
||||
unsigned idx = prev_instructions.size() - 1;
|
||||
while (prev_instructions[idx]->opcode != aco_opcode::p_logical_end)
|
||||
idx--;
|
||||
bld.reset(&prev_instructions, std::next(prev_instructions.begin(), idx));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* GFX9+ uses scratch_* instructions, which don't use a resource. Return a SADDR instead. */
|
||||
if (ctx.program->gfx_level >= GFX9)
|
||||
return bld.copy(bld.def(s1), Operand::c32(offset));
|
||||
|
||||
Temp private_segment_buffer = ctx.program->private_segment_buffer;
|
||||
if (ctx.program->stage.hw != HWStage::CS)
|
||||
private_segment_buffer =
|
||||
bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand::zero());
|
||||
|
||||
if (offset)
|
||||
scratch_offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc),
|
||||
scratch_offset, Operand::c32(offset));
|
||||
if (apply_scratch_offset) {
|
||||
Temp addr_lo = bld.tmp(s1);
|
||||
Temp addr_hi = bld.tmp(s1);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(addr_lo), Definition(addr_hi),
|
||||
private_segment_buffer);
|
||||
|
||||
Temp carry = bld.tmp(s1);
|
||||
addr_lo = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), addr_lo,
|
||||
ctx.program->scratch_offset);
|
||||
addr_hi = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), addr_hi,
|
||||
Operand::c32(0), bld.scc(carry));
|
||||
|
||||
private_segment_buffer =
|
||||
bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
|
||||
}
|
||||
|
||||
uint32_t rsrc_conf =
|
||||
S_008F0C_ADD_TID_ENABLE(1) | S_008F0C_INDEX_STRIDE(ctx.program->wave_size == 64 ? 3 : 2);
|
||||
|
|
@ -1461,33 +1450,72 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Block& block,
|
|||
void
|
||||
setup_vgpr_spill_reload(spill_ctx& ctx, Block& block,
|
||||
std::vector<aco_ptr<Instruction>>& instructions, uint32_t spill_slot,
|
||||
unsigned* offset)
|
||||
Temp& scratch_offset, unsigned* offset)
|
||||
{
|
||||
Temp scratch_offset = ctx.program->scratch_offset;
|
||||
uint32_t scratch_size = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size;
|
||||
|
||||
uint32_t offset_range;
|
||||
if (ctx.program->gfx_level >= GFX9) {
|
||||
offset_range =
|
||||
ctx.program->dev.scratch_global_offset_max - ctx.program->dev.scratch_global_offset_min;
|
||||
} else {
|
||||
if (scratch_size < 4095)
|
||||
offset_range = 4095 - scratch_size;
|
||||
else
|
||||
offset_range = 0;
|
||||
}
|
||||
|
||||
bool overflow = (ctx.vgpr_spill_slots - 1) * 4 > offset_range;
|
||||
|
||||
Builder rsrc_bld(ctx.program);
|
||||
if (block.kind & block_kind_top_level) {
|
||||
rsrc_bld.reset(&instructions);
|
||||
} else if (ctx.scratch_rsrc == Temp() && (!overflow || ctx.program->gfx_level < GFX9)) {
|
||||
Block* tl_block = █
|
||||
while (!(tl_block->kind & block_kind_top_level))
|
||||
tl_block = &ctx.program->blocks[tl_block->linear_idom];
|
||||
|
||||
/* find p_logical_end */
|
||||
std::vector<aco_ptr<Instruction>>& prev_instructions = tl_block->instructions;
|
||||
unsigned idx = prev_instructions.size() - 1;
|
||||
while (prev_instructions[idx]->opcode != aco_opcode::p_logical_end)
|
||||
idx--;
|
||||
rsrc_bld.reset(&prev_instructions, std::next(prev_instructions.begin(), idx));
|
||||
}
|
||||
|
||||
/* If spilling overflows the constant offset range at any point, we need to emit the soffset
|
||||
* before every spill/reload to avoid increasing register demand.
|
||||
*/
|
||||
Builder offset_bld = rsrc_bld;
|
||||
if (overflow)
|
||||
offset_bld.reset(&instructions);
|
||||
|
||||
*offset = spill_slot * 4;
|
||||
if (ctx.program->gfx_level >= GFX9) {
|
||||
*offset += ctx.program->dev.scratch_global_offset_min;
|
||||
|
||||
if (ctx.scratch_rsrc == Temp()) {
|
||||
int32_t saddr = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size -
|
||||
ctx.program->dev.scratch_global_offset_min;
|
||||
ctx.scratch_rsrc =
|
||||
load_scratch_resource(ctx, scratch_offset, block, instructions, saddr);
|
||||
if (ctx.scratch_rsrc == Temp() || overflow) {
|
||||
int32_t saddr = scratch_size - ctx.program->dev.scratch_global_offset_min;
|
||||
if ((int32_t)*offset > (int32_t)ctx.program->dev.scratch_global_offset_max) {
|
||||
saddr += (int32_t)*offset;
|
||||
*offset = 0;
|
||||
}
|
||||
|
||||
/* GFX9+ uses scratch_* instructions, which don't use a resource. */
|
||||
ctx.scratch_rsrc = offset_bld.copy(offset_bld.def(s1), Operand::c32(saddr));
|
||||
}
|
||||
} else {
|
||||
bool add_offset_to_sgpr =
|
||||
ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size +
|
||||
ctx.vgpr_spill_slots * 4 >
|
||||
4096;
|
||||
if (!add_offset_to_sgpr)
|
||||
*offset += ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size;
|
||||
if (ctx.scratch_rsrc == Temp())
|
||||
ctx.scratch_rsrc = load_scratch_resource(ctx, rsrc_bld, overflow);
|
||||
|
||||
if (ctx.scratch_rsrc == Temp()) {
|
||||
unsigned rsrc_offset =
|
||||
add_offset_to_sgpr ? ctx.program->config->scratch_bytes_per_wave : 0;
|
||||
ctx.scratch_rsrc =
|
||||
load_scratch_resource(ctx, scratch_offset, block, instructions, rsrc_offset);
|
||||
if (overflow) {
|
||||
uint32_t soffset =
|
||||
ctx.program->config->scratch_bytes_per_wave + *offset * ctx.program->wave_size;
|
||||
*offset = 0;
|
||||
|
||||
scratch_offset = offset_bld.copy(offset_bld.def(s1), Operand::c32(soffset));
|
||||
} else {
|
||||
*offset += scratch_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1501,8 +1529,9 @@ spill_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& inst
|
|||
uint32_t spill_id = spill->operands[1].constantValue();
|
||||
uint32_t spill_slot = slots[spill_id];
|
||||
|
||||
Temp scratch_offset = ctx.program->scratch_offset;
|
||||
unsigned offset;
|
||||
setup_vgpr_spill_reload(ctx, block, instructions, spill_slot, &offset);
|
||||
setup_vgpr_spill_reload(ctx, block, instructions, spill_slot, scratch_offset, &offset);
|
||||
|
||||
assert(spill->operands[0].isTemp());
|
||||
Temp temp = spill->operands[0].getTemp();
|
||||
|
|
@ -1522,9 +1551,8 @@ spill_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& inst
|
|||
bld.scratch(aco_opcode::scratch_store_dword, Operand(v1), ctx.scratch_rsrc, elem,
|
||||
offset, memory_sync_info(storage_vgpr_spill, semantic_private));
|
||||
} else {
|
||||
Instruction* instr =
|
||||
bld.mubuf(aco_opcode::buffer_store_dword, ctx.scratch_rsrc, Operand(v1),
|
||||
ctx.program->scratch_offset, elem, offset, false, true);
|
||||
Instruction* instr = bld.mubuf(aco_opcode::buffer_store_dword, ctx.scratch_rsrc,
|
||||
Operand(v1), scratch_offset, elem, offset, false, true);
|
||||
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
||||
}
|
||||
}
|
||||
|
|
@ -1533,7 +1561,7 @@ spill_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& inst
|
|||
memory_sync_info(storage_vgpr_spill, semantic_private));
|
||||
} else {
|
||||
Instruction* instr = bld.mubuf(aco_opcode::buffer_store_dword, ctx.scratch_rsrc, Operand(v1),
|
||||
ctx.program->scratch_offset, temp, offset, false, true);
|
||||
scratch_offset, temp, offset, false, true);
|
||||
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
||||
}
|
||||
}
|
||||
|
|
@ -1545,8 +1573,9 @@ reload_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& ins
|
|||
uint32_t spill_id = reload->operands[0].constantValue();
|
||||
uint32_t spill_slot = slots[spill_id];
|
||||
|
||||
Temp scratch_offset = ctx.program->scratch_offset;
|
||||
unsigned offset;
|
||||
setup_vgpr_spill_reload(ctx, block, instructions, spill_slot, &offset);
|
||||
setup_vgpr_spill_reload(ctx, block, instructions, spill_slot, scratch_offset, &offset);
|
||||
|
||||
Definition def = reload->definitions[0];
|
||||
|
||||
|
|
@ -1565,7 +1594,7 @@ reload_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& ins
|
|||
} else {
|
||||
Instruction* instr =
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, Definition(tmp), ctx.scratch_rsrc,
|
||||
Operand(v1), ctx.program->scratch_offset, offset, false, true);
|
||||
Operand(v1), scratch_offset, offset, false, true);
|
||||
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
||||
}
|
||||
}
|
||||
|
|
@ -1575,7 +1604,7 @@ reload_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& ins
|
|||
memory_sync_info(storage_vgpr_spill, semantic_private));
|
||||
} else {
|
||||
Instruction* instr = bld.mubuf(aco_opcode::buffer_load_dword, def, ctx.scratch_rsrc,
|
||||
Operand(v1), ctx.program->scratch_offset, offset, false, true);
|
||||
Operand(v1), scratch_offset, offset, false, true);
|
||||
instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue