radv,aco: save/restore overwritten VGPRs in the trap handler shader

The trap currently doesn't return to the shader but it will be needed
for example for the debug mode.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32056>
This commit is contained in:
Samuel Pitoiset 2024-11-08 16:21:30 +01:00 committed by Marge Bot
parent ccde8ecd64
commit 5f79b8ea2d
3 changed files with 74 additions and 1 deletions

View file

@ -12450,6 +12450,53 @@ dump_sgpr_to_mem(isel_context* ctx, Operand rsrc, Operand data, uint32_t offset)
}
}
void
save_or_restore_vgprs(isel_context* ctx, Operand rsrc, bool save)
{
Builder bld(ctx->program, ctx->block);
uint32_t offset = offsetof(struct aco_trap_handler_layout, saved_vgprs[0]);
ac_hw_cache_flags cache_glc;
cache_glc.value = ac_glc;
PhysReg rsrc_word3(rsrc.physReg() + 3);
/* Set ADD_TID_ENABLE to enable thread indexing. */
bld.sop2(aco_opcode::s_or_b32, Definition(rsrc_word3, s1), bld.def(s1, scc),
Operand(rsrc_word3, s1), Operand::c32(1 << 23));
for (uint32_t i = 0; i < NUM_SAVED_VGPRS; i++) {
if (save) {
bld.mubuf(aco_opcode::buffer_store_dword, Operand(rsrc), Operand(v1), Operand::c32(0u),
Operand(PhysReg{256 + i}, v1) /* v0 */, offset, false /* offen */,
false /* idxen */,
/* addr64 */ false, /* disable_wqm */ false, cache_glc);
} else {
bld.mubuf(aco_opcode::buffer_load_dword, Definition(PhysReg{256 + i}, v1), Operand(rsrc),
Operand(v1), Operand::c32(0u), offset, false /* offen */, false /* idxen */,
/* addr64 */ false, /* disable_wqm */ false, cache_glc);
}
offset += 256;
}
/* Clear ADD_TID_ENABLE. */
bld.sop2(aco_opcode::s_andn2_b32, Definition(rsrc_word3, s1), bld.def(s1, scc),
Operand(rsrc_word3, s1), Operand::c32(1 << 23));
}
void
save_vgprs_to_mem(isel_context* ctx, Operand rsrc)
{
save_or_restore_vgprs(ctx, rsrc, true);
}
void
restore_vgprs_from_mem(isel_context* ctx, Operand rsrc)
{
save_or_restore_vgprs(ctx, rsrc, false);
}
void
select_trap_handler_shader(Program* program, ac_shader_config* config,
const struct aco_compiler_options* options,
@ -12526,6 +12573,9 @@ select_trap_handler_shader(Program* program, ac_shader_config* config,
bld.smem(aco_opcode::s_load_dwordx4, Definition(tma_rsrc, s4), Operand(ttmp2_reg, s2),
Operand::c32(0u));
/* Save VGPRS that needs to be restored. */
save_vgprs_to_mem(&ctx, Operand(tma_rsrc, s4));
/* Store TTMP0-TTMP1. */
bld.copy(Definition(PhysReg{256}, v2) /* v[0-1] */, Operand(ttmp0_reg, s2));
@ -12586,6 +12636,11 @@ select_trap_handler_shader(Program* program, ac_shader_config* config,
offset += 4;
}
if (ctx.program->gfx_level >= GFX9) {
/* Restore VGPRS. */
restore_vgprs_from_mem(&ctx, Operand(tma_rsrc, s4));
}
/* Restore SCC which is the first bit of SQ_WAVE_STATUS. */
bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), Operand(save_wave_status, s1),
Operand::c32(0u));

View file

@ -197,8 +197,11 @@ struct aco_symbol {
};
#define MAX_SGPRS 108
#define NUM_SAVED_VGPRS 2
struct aco_trap_handler_layout {
uint32_t saved_vgprs[NUM_SAVED_VGPRS * 64];
uint32_t ttmp0;
uint32_t ttmp1;

View file

@ -927,7 +927,22 @@ radv_trap_handler_init(struct radv_device *device)
/* Upload a buffer descriptor to store various info from the trap. */
uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + sizeof(desc);
ac_build_raw_buffer_descriptor(pdev->info.gfx_level, tma_va, size - sizeof(desc), desc);
const struct ac_buffer_state ac_state = {
.va = tma_va,
.size = size - sizeof(desc),
.format = PIPE_FORMAT_R32_FLOAT,
.swizzle =
{
PIPE_SWIZZLE_X,
PIPE_SWIZZLE_Y,
PIPE_SWIZZLE_Z,
PIPE_SWIZZLE_W,
},
.gfx10_oob_select = V_008F0C_OOB_SELECT_RAW,
.stride = 4, /* Used for VGPRs dump. */
};
ac_build_buffer_descriptor(pdev->info.gfx_level, &ac_state, desc);
memcpy(device->tma_ptr, desc, sizeof(desc));