mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 02:40:11 +01:00
aco: Make private_segment_buffer/scratch_offset per-resume
We need different Temps for each resume shader, because registers aren't preserved across resume boundaries. This was likely fine in practice because arg registers are the same for each shader, but resulted in invalid IR and asserts. Fixes crashes in Indiana Jones RT with assertions enabled on GFX8. Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34114>
This commit is contained in:
parent
76096d04bb
commit
3d8db3cbbb
4 changed files with 35 additions and 16 deletions
|
|
@ -7416,7 +7416,9 @@ Temp
|
|||
get_scratch_resource(isel_context* ctx)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
Temp scratch_addr = ctx->program->private_segment_buffer;
|
||||
Temp scratch_addr;
|
||||
if (!ctx->program->private_segment_buffers.empty())
|
||||
scratch_addr = ctx->program->private_segment_buffers.back();
|
||||
if (!scratch_addr.bytes()) {
|
||||
Temp addr_lo =
|
||||
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo));
|
||||
|
|
@ -7474,7 +7476,7 @@ visit_load_scratch(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
} else {
|
||||
info.resource = get_scratch_resource(ctx);
|
||||
info.offset = Operand(as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa)));
|
||||
info.soffset = ctx->program->scratch_offset;
|
||||
info.soffset = ctx->program->scratch_offsets.back();
|
||||
emit_load(ctx, bld, info, scratch_mubuf_load_params);
|
||||
}
|
||||
}
|
||||
|
|
@ -7530,7 +7532,7 @@ visit_store_scratch(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
offset = as_vgpr(ctx, offset);
|
||||
for (unsigned i = 0; i < write_count; i++) {
|
||||
aco_opcode op = get_buffer_store_op(write_datas[i].bytes());
|
||||
Instruction* mubuf = bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset,
|
||||
Instruction* mubuf = bld.mubuf(op, rsrc, offset, ctx->program->scratch_offsets.back(),
|
||||
write_datas[i], offsets[i], true);
|
||||
mubuf->mubuf().sync = memory_sync_info(storage_scratch, semantic_private);
|
||||
unsigned access = ACCESS_TYPE_STORE | ACCESS_IS_SWIZZLED_AMD |
|
||||
|
|
@ -10910,9 +10912,9 @@ add_startpgm(struct isel_context* ctx)
|
|||
* handling spilling.
|
||||
*/
|
||||
if (ctx->args->ring_offsets.used)
|
||||
ctx->program->private_segment_buffer = get_arg(ctx, ctx->args->ring_offsets);
|
||||
ctx->program->private_segment_buffers.push_back(get_arg(ctx, ctx->args->ring_offsets));
|
||||
|
||||
ctx->program->scratch_offset = get_arg(ctx, ctx->args->scratch_offset);
|
||||
ctx->program->scratch_offsets.push_back(get_arg(ctx, ctx->args->scratch_offset));
|
||||
} else if (ctx->program->gfx_level <= GFX10_3 && ctx->program->stage != raytracing_cs) {
|
||||
/* Manually initialize scratch. For RT stages scratch initialization is done in the prolog.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -2137,8 +2137,9 @@ public:
|
|||
std::vector<ac_shader_debug_info> debug_info;
|
||||
|
||||
std::vector<uint8_t> constant_data;
|
||||
Temp private_segment_buffer;
|
||||
Temp scratch_offset;
|
||||
/* Private segment buffers and scratch offsets. One entry per start/resume block */
|
||||
aco::small_vec<Temp, 2> private_segment_buffers;
|
||||
aco::small_vec<Temp, 2> scratch_offsets;
|
||||
|
||||
uint16_t num_waves = 0;
|
||||
uint16_t min_waves = 0;
|
||||
|
|
|
|||
|
|
@ -69,10 +69,14 @@ reindex_program(idx_ctx& ctx, Program* program)
|
|||
}
|
||||
|
||||
/* update program members */
|
||||
program->private_segment_buffer = Temp(ctx.renames[program->private_segment_buffer.id()],
|
||||
program->private_segment_buffer.regClass());
|
||||
program->scratch_offset =
|
||||
Temp(ctx.renames[program->scratch_offset.id()], program->scratch_offset.regClass());
|
||||
for (auto& private_segment_buffer : program->private_segment_buffers) {
|
||||
private_segment_buffer =
|
||||
Temp(ctx.renames[private_segment_buffer.id()], private_segment_buffer.regClass());
|
||||
}
|
||||
for (auto& scratch_offset : program->scratch_offsets) {
|
||||
scratch_offset =
|
||||
Temp(ctx.renames[scratch_offset.id()], scratch_offset.regClass());
|
||||
}
|
||||
program->temp_rc = ctx.temp_rc;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -89,13 +89,16 @@ struct spill_ctx {
|
|||
unsigned vgpr_spill_slots;
|
||||
Temp scratch_rsrc;
|
||||
|
||||
unsigned resume_idx;
|
||||
|
||||
spill_ctx(const RegisterDemand target_pressure_, Program* program_)
|
||||
: target_pressure(target_pressure_), program(program_), memory(),
|
||||
renames(program->blocks.size(), aco::map<Temp, Temp>(memory)),
|
||||
spills_entry(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)),
|
||||
spills_exit(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)),
|
||||
processed(program->blocks.size(), false), ssa_infos(program->peekAllocationId()),
|
||||
remat(memory), wave_size(program->wave_size), sgpr_spill_slots(0), vgpr_spill_slots(0)
|
||||
remat(memory), wave_size(program->wave_size), sgpr_spill_slots(0), vgpr_spill_slots(0),
|
||||
resume_idx(0)
|
||||
{}
|
||||
|
||||
void add_affinity(uint32_t first, uint32_t second)
|
||||
|
|
@ -1131,7 +1134,10 @@ spill_block(spill_ctx& ctx, unsigned block_idx)
|
|||
Temp
|
||||
load_scratch_resource(spill_ctx& ctx, Builder& bld, bool apply_scratch_offset)
|
||||
{
|
||||
Temp private_segment_buffer = ctx.program->private_segment_buffer;
|
||||
Temp private_segment_buffer;
|
||||
if (!ctx.program->private_segment_buffers.empty())
|
||||
private_segment_buffer = ctx.program->private_segment_buffers[ctx.resume_idx];
|
||||
|
||||
if (!private_segment_buffer.bytes()) {
|
||||
Temp addr_lo =
|
||||
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo));
|
||||
|
|
@ -1152,7 +1158,7 @@ load_scratch_resource(spill_ctx& ctx, Builder& bld, bool apply_scratch_offset)
|
|||
|
||||
Temp carry = bld.tmp(s1);
|
||||
addr_lo = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), addr_lo,
|
||||
ctx.program->scratch_offset);
|
||||
ctx.program->scratch_offsets[ctx.resume_idx]);
|
||||
addr_hi = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), addr_hi,
|
||||
Operand::c32(0), bld.scc(carry));
|
||||
|
||||
|
|
@ -1261,7 +1267,9 @@ spill_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& inst
|
|||
uint32_t spill_id = spill->operands[1].constantValue();
|
||||
uint32_t spill_slot = slots[spill_id];
|
||||
|
||||
Temp scratch_offset = ctx.program->scratch_offset;
|
||||
Temp scratch_offset;
|
||||
if (!ctx.program->scratch_offsets.empty())
|
||||
scratch_offset = ctx.program->scratch_offsets[ctx.resume_idx];
|
||||
unsigned offset;
|
||||
setup_vgpr_spill_reload(ctx, block, instructions, spill_slot, scratch_offset, &offset);
|
||||
|
||||
|
|
@ -1307,7 +1315,9 @@ reload_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& ins
|
|||
uint32_t spill_id = reload->operands[0].constantValue();
|
||||
uint32_t spill_slot = slots[spill_id];
|
||||
|
||||
Temp scratch_offset = ctx.program->scratch_offset;
|
||||
Temp scratch_offset;
|
||||
if (!ctx.program->scratch_offsets.empty())
|
||||
scratch_offset = ctx.program->scratch_offsets[ctx.resume_idx];
|
||||
unsigned offset;
|
||||
setup_vgpr_spill_reload(ctx, block, instructions, spill_slot, scratch_offset, &offset);
|
||||
|
||||
|
|
@ -1531,6 +1541,8 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
|
|||
* we cannot reuse the current scratch_rsrc temp because its definition is unreachable */
|
||||
if (block.linear_preds.empty())
|
||||
ctx.scratch_rsrc = Temp();
|
||||
if (block.kind & block_kind_resume)
|
||||
++ctx.resume_idx;
|
||||
}
|
||||
|
||||
std::vector<aco_ptr<Instruction>>::iterator it;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue