radv,aco: dump more SQ_WAVE registers from the trap handler on GFX12

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34840>
This commit is contained in:
Samuel Pitoiset 2025-05-06 17:07:45 +02:00 committed by Marge Bot
parent 0e73c85424
commit ae6d3df139
3 changed files with 112 additions and 46 deletions

View file

@ -12326,28 +12326,60 @@ select_trap_handler_shader(Program* program, ac_shader_config* config,
}
/* Store some hardware registers. */
const uint32_t hw_regs_idx[] = {
1, /* HW_REG_MODE */
3, /* HW_REG_TRAP_STS */
4, /* HW_REG_HW_ID */
5, /* WH_REG_GPR_ALLOC */
6, /* WH_REG_LDS_ALLOC */
7, /* HW_REG_IB_STS */
};
if (options->gfx_level >= GFX12) {
const uint32_t hw_regs_idx[] = {
1, /* HW_REG_MODE */
2, /* HW_REG_STATUS */
5, /* WH_REG_GPR_ALLOC */
6, /* WH_REG_LDS_ALLOC */
7, /* HW_REG_IB_STS */
17, /* HW_REG_EXCP_FLAG_PRIV */
18, /* HW_REG_EXCP_FLAG_USER */
19, /* HW_REG_TRAP_CTRL */
23, /* HW_REG_HW_ID */
};
offset = offsetof(struct aco_trap_handler_layout, sq_wave_regs.status);
offset = offsetof(struct aco_trap_handler_layout, sq_wave_regs.gfx12.state_priv);
/* Store saved SQ_WAVE_STATUS which can change inside the trap. */
dump_sgpr_to_mem(&ctx, Operand(tma_rsrc, s4), Operand(save_wave_status, s1), offset);
offset += 4;
for (unsigned i = 0; i < ARRAY_SIZE(hw_regs_idx); i++) {
/* "((size - 1) << 11) | register" */
bld.sopk(aco_opcode::s_getreg_b32, Definition(ttmp0_reg, s1),
((32 - 1) << 11) | hw_regs_idx[i]);
dump_sgpr_to_mem(&ctx, Operand(tma_rsrc, s4), Operand(ttmp0_reg, s1), offset);
/* Store saved SQ_WAVE_STATE_PRIV which can change inside the trap. */
dump_sgpr_to_mem(&ctx, Operand(tma_rsrc, s4), Operand(save_wave_state_priv, s1), offset);
offset += 4;
for (unsigned i = 0; i < ARRAY_SIZE(hw_regs_idx); i++) {
/* "((size - 1) << 11) | register" */
bld.sopk(aco_opcode::s_getreg_b32, Definition(ttmp0_reg, s1),
((32 - 1) << 11) | hw_regs_idx[i]);
dump_sgpr_to_mem(&ctx, Operand(tma_rsrc, s4), Operand(ttmp0_reg, s1), offset);
offset += 4;
}
} else {
const uint32_t hw_regs_idx[] = {
1, /* HW_REG_MODE */
3, /* HW_REG_TRAP_STS */
4, /* HW_REG_HW_ID */
5, /* WH_REG_GPR_ALLOC */
6, /* WH_REG_LDS_ALLOC */
7, /* HW_REG_IB_STS */
};
offset = offsetof(struct aco_trap_handler_layout, sq_wave_regs.gfx8.status);
/* Store saved SQ_WAVE_STATUS which can change inside the trap. */
dump_sgpr_to_mem(&ctx, Operand(tma_rsrc, s4), Operand(save_wave_status, s1), offset);
offset += 4;
for (unsigned i = 0; i < ARRAY_SIZE(hw_regs_idx); i++) {
/* "((size - 1) << 11) | register" */
bld.sopk(aco_opcode::s_getreg_b32, Definition(ttmp0_reg, s1),
((32 - 1) << 11) | hw_regs_idx[i]);
dump_sgpr_to_mem(&ctx, Operand(tma_rsrc, s4), Operand(ttmp0_reg, s1), offset);
offset += 4;
}
/* Skip space "reserved regs". */
offset += 12;
}
assert(offset == offsetof(struct aco_trap_handler_layout, m0));

View file

@ -209,14 +209,32 @@ struct aco_trap_handler_layout {
uint32_t ttmp0;
uint32_t ttmp1;
struct {
uint32_t status;
uint32_t mode;
uint32_t trap_sts;
uint32_t hw_id1;
uint32_t gpr_alloc;
uint32_t lds_alloc;
uint32_t ib_sts;
union {
struct {
uint32_t state_priv;
uint32_t mode;
uint32_t status;
uint32_t gpr_alloc;
uint32_t lds_alloc;
uint32_t ib_sts;
uint32_t excp_flag_priv;
uint32_t excp_flag_user;
uint32_t trap_ctrl;
uint32_t hw_id1;
} gfx12;
struct {
uint32_t status;
uint32_t mode;
uint32_t trap_sts;
uint32_t hw_id1;
uint32_t gpr_alloc;
uint32_t lds_alloc;
uint32_t ib_sts;
uint32_t reserved0;
uint32_t reserved1;
uint32_t reserved2;
} gfx8;
} sq_wave_regs;
uint32_t m0;

View file

@ -1174,22 +1174,33 @@ radv_dump_sq_hw_regs(struct radv_device *device, const struct aco_trap_handler_l
enum radeon_family family = pdev->info.family;
fprintf(f, "\nHardware registers:\n");
if (pdev->info.gfx_level >= GFX10) {
ac_dump_reg(f, gfx_level, family, R_000404_SQ_WAVE_MODE, layout->sq_wave_regs.mode, ~0);
ac_dump_reg(f, gfx_level, family, R_000408_SQ_WAVE_STATUS, layout->sq_wave_regs.status, ~0);
ac_dump_reg(f, gfx_level, family, R_00040C_SQ_WAVE_TRAPSTS, layout->sq_wave_regs.trap_sts, ~0);
ac_dump_reg(f, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1, layout->sq_wave_regs.hw_id1, ~0);
ac_dump_reg(f, gfx_level, family, R_000414_SQ_WAVE_GPR_ALLOC, layout->sq_wave_regs.gpr_alloc, ~0);
ac_dump_reg(f, gfx_level, family, R_000418_SQ_WAVE_LDS_ALLOC, layout->sq_wave_regs.lds_alloc, ~0);
ac_dump_reg(f, gfx_level, family, R_00041C_SQ_WAVE_IB_STS, layout->sq_wave_regs.ib_sts, ~0);
if (pdev->info.gfx_level >= GFX12) {
ac_dump_reg(f, gfx_level, family, R_000410_SQ_WAVE_STATE_PRIV, layout->sq_wave_regs.gfx12.state_priv, ~0);
ac_dump_reg(f, gfx_level, family, R_000404_SQ_WAVE_MODE, layout->sq_wave_regs.gfx12.mode, ~0);
ac_dump_reg(f, gfx_level, family, R_000408_SQ_WAVE_STATUS, layout->sq_wave_regs.gfx12.status, ~0);
ac_dump_reg(f, gfx_level, family, R_000414_SQ_WAVE_GPR_ALLOC, layout->sq_wave_regs.gfx12.gpr_alloc, ~0);
ac_dump_reg(f, gfx_level, family, R_000418_SQ_WAVE_LDS_ALLOC, layout->sq_wave_regs.gfx12.lds_alloc, ~0);
ac_dump_reg(f, gfx_level, family, R_00041C_SQ_WAVE_IB_STS, layout->sq_wave_regs.gfx12.ib_sts, ~0);
ac_dump_reg(f, gfx_level, family, R_000444_SQ_WAVE_EXCP_FLAG_PRIV, layout->sq_wave_regs.gfx12.excp_flag_priv, ~0);
ac_dump_reg(f, gfx_level, family, R_000448_SQ_WAVE_EXCP_FLAG_USER, layout->sq_wave_regs.gfx12.excp_flag_user, ~0);
ac_dump_reg(f, gfx_level, family, R_00044C_SQ_WAVE_TRAP_CTRL, layout->sq_wave_regs.gfx12.trap_ctrl, ~0);
ac_dump_reg(f, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1, layout->sq_wave_regs.gfx12.hw_id1, ~0);
} else if (pdev->info.gfx_level >= GFX10) {
ac_dump_reg(f, gfx_level, family, R_000404_SQ_WAVE_MODE, layout->sq_wave_regs.gfx8.mode, ~0);
ac_dump_reg(f, gfx_level, family, R_000408_SQ_WAVE_STATUS, layout->sq_wave_regs.gfx8.status, ~0);
ac_dump_reg(f, gfx_level, family, R_00040C_SQ_WAVE_TRAPSTS, layout->sq_wave_regs.gfx8.trap_sts, ~0);
ac_dump_reg(f, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1, layout->sq_wave_regs.gfx8.hw_id1, ~0);
ac_dump_reg(f, gfx_level, family, R_000414_SQ_WAVE_GPR_ALLOC, layout->sq_wave_regs.gfx8.gpr_alloc, ~0);
ac_dump_reg(f, gfx_level, family, R_000418_SQ_WAVE_LDS_ALLOC, layout->sq_wave_regs.gfx8.lds_alloc, ~0);
ac_dump_reg(f, gfx_level, family, R_00041C_SQ_WAVE_IB_STS, layout->sq_wave_regs.gfx8.ib_sts, ~0);
} else {
ac_dump_reg(f, gfx_level, family, R_000044_SQ_WAVE_MODE, layout->sq_wave_regs.mode, ~0);
ac_dump_reg(f, gfx_level, family, R_000048_SQ_WAVE_STATUS, layout->sq_wave_regs.status, ~0);
ac_dump_reg(f, gfx_level, family, R_00004C_SQ_WAVE_TRAPSTS, layout->sq_wave_regs.trap_sts, ~0);
ac_dump_reg(f, gfx_level, family, R_000050_SQ_WAVE_HW_ID, layout->sq_wave_regs.hw_id1, ~0);
ac_dump_reg(f, gfx_level, family, R_000054_SQ_WAVE_GPR_ALLOC, layout->sq_wave_regs.gpr_alloc, ~0);
ac_dump_reg(f, gfx_level, family, R_000058_SQ_WAVE_LDS_ALLOC, layout->sq_wave_regs.lds_alloc, ~0);
ac_dump_reg(f, gfx_level, family, R_00005C_SQ_WAVE_IB_STS, layout->sq_wave_regs.ib_sts, ~0);
ac_dump_reg(f, gfx_level, family, R_000044_SQ_WAVE_MODE, layout->sq_wave_regs.gfx8.mode, ~0);
ac_dump_reg(f, gfx_level, family, R_000048_SQ_WAVE_STATUS, layout->sq_wave_regs.gfx8.status, ~0);
ac_dump_reg(f, gfx_level, family, R_00004C_SQ_WAVE_TRAPSTS, layout->sq_wave_regs.gfx8.trap_sts, ~0);
ac_dump_reg(f, gfx_level, family, R_000050_SQ_WAVE_HW_ID, layout->sq_wave_regs.gfx8.hw_id1, ~0);
ac_dump_reg(f, gfx_level, family, R_000054_SQ_WAVE_GPR_ALLOC, layout->sq_wave_regs.gfx8.gpr_alloc, ~0);
ac_dump_reg(f, gfx_level, family, R_000058_SQ_WAVE_LDS_ALLOC, layout->sq_wave_regs.gfx8.lds_alloc, ~0);
ac_dump_reg(f, gfx_level, family, R_00005C_SQ_WAVE_IB_STS, layout->sq_wave_regs.gfx8.ib_sts, ~0);
}
fprintf(f, "\n\n");
}
@ -1198,14 +1209,16 @@ static uint32_t
radv_get_vgpr_size(const struct radv_device *device, const struct aco_trap_handler_layout *layout)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const uint32_t gpr_alloc =
pdev->info.gfx_level >= GFX12 ? layout->sq_wave_regs.gfx12.gpr_alloc : layout->sq_wave_regs.gfx8.gpr_alloc;
uint32_t vgpr_size;
if (pdev->info.gfx_level >= GFX11) {
vgpr_size = G_000414_VGPR_SIZE_GFX11(layout->sq_wave_regs.gpr_alloc);
vgpr_size = G_000414_VGPR_SIZE_GFX11(gpr_alloc);
} else if (pdev->info.gfx_level >= GFX10) {
vgpr_size = G_000414_VGPR_SIZE_GFX10(layout->sq_wave_regs.gpr_alloc);
vgpr_size = G_000414_VGPR_SIZE_GFX10(gpr_alloc);
} else {
vgpr_size = G_000054_VGPR_SIZE_GFX6(layout->sq_wave_regs.gpr_alloc);
vgpr_size = G_000054_VGPR_SIZE_GFX6(gpr_alloc);
}
return vgpr_size;
@ -1256,7 +1269,10 @@ radv_dump_shader_regs(const struct radv_device *device, const struct aco_trap_ha
static void
radv_dump_lds(const struct radv_device *device, const struct aco_trap_handler_layout *layout, FILE *f)
{
uint32_t lds_size = G_000058_LDS_SIZE(layout->sq_wave_regs.lds_alloc);
const struct radv_physical_device *pdev = radv_device_physical(device);
const uint32_t lds_alloc =
pdev->info.gfx_level >= GFX12 ? layout->sq_wave_regs.gfx12.lds_alloc : layout->sq_wave_regs.gfx8.lds_alloc;
uint32_t lds_size = G_000058_LDS_SIZE(lds_alloc);
if (!lds_size)
return;