From ae6d3df1396e774d77de736ce208bdfedc59c13d Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 6 May 2025 17:07:45 +0200 Subject: [PATCH] radv,aco: dump more SQ_WAVE registers from the trap handler on GFX12 Signed-off-by: Samuel Pitoiset Part-of: --- .../compiler/aco_instruction_selection.cpp | 70 ++++++++++++++----- src/amd/compiler/aco_shader_info.h | 34 ++++++--- src/amd/vulkan/radv_debug.c | 54 +++++++++----- 3 files changed, 112 insertions(+), 46 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 5ccd7c7acf2..b94fd2fe7e1 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -12326,28 +12326,60 @@ select_trap_handler_shader(Program* program, ac_shader_config* config, } /* Store some hardware registers. */ - const uint32_t hw_regs_idx[] = { - 1, /* HW_REG_MODE */ - 3, /* HW_REG_TRAP_STS */ - 4, /* HW_REG_HW_ID */ - 5, /* WH_REG_GPR_ALLOC */ - 6, /* WH_REG_LDS_ALLOC */ - 7, /* HW_REG_IB_STS */ - }; + if (options->gfx_level >= GFX12) { + const uint32_t hw_regs_idx[] = { + 1, /* HW_REG_MODE */ + 2, /* HW_REG_STATUS */ + 5, /* WH_REG_GPR_ALLOC */ + 6, /* WH_REG_LDS_ALLOC */ + 7, /* HW_REG_IB_STS */ + 17, /* HW_REG_EXCP_FLAG_PRIV */ + 18, /* HW_REG_EXCP_FLAG_USER */ + 19, /* HW_REG_TRAP_CTRL */ + 23, /* HW_REG_HW_ID */ + }; - offset = offsetof(struct aco_trap_handler_layout, sq_wave_regs.status); + offset = offsetof(struct aco_trap_handler_layout, sq_wave_regs.gfx12.state_priv); - /* Store saved SQ_WAVE_STATUS which can change inside the trap. */ - dump_sgpr_to_mem(&ctx, Operand(tma_rsrc, s4), Operand(save_wave_status, s1), offset); - offset += 4; - - for (unsigned i = 0; i < ARRAY_SIZE(hw_regs_idx); i++) { - /* "((size - 1) << 11) | register" */ - bld.sopk(aco_opcode::s_getreg_b32, Definition(ttmp0_reg, s1), - ((32 - 1) << 11) | hw_regs_idx[i]); - - dump_sgpr_to_mem(&ctx, Operand(tma_rsrc, s4), Operand(ttmp0_reg, s1), offset); + /* Store saved SQ_WAVE_STATE_PRIV which can change inside the trap. */ + dump_sgpr_to_mem(&ctx, Operand(tma_rsrc, s4), Operand(save_wave_state_priv, s1), offset); offset += 4; + + for (unsigned i = 0; i < ARRAY_SIZE(hw_regs_idx); i++) { + /* "((size - 1) << 11) | register" */ + bld.sopk(aco_opcode::s_getreg_b32, Definition(ttmp0_reg, s1), + ((32 - 1) << 11) | hw_regs_idx[i]); + + dump_sgpr_to_mem(&ctx, Operand(tma_rsrc, s4), Operand(ttmp0_reg, s1), offset); + offset += 4; + } + } else { + const uint32_t hw_regs_idx[] = { + 1, /* HW_REG_MODE */ + 3, /* HW_REG_TRAP_STS */ + 4, /* HW_REG_HW_ID */ + 5, /* WH_REG_GPR_ALLOC */ + 6, /* WH_REG_LDS_ALLOC */ + 7, /* HW_REG_IB_STS */ + }; + + offset = offsetof(struct aco_trap_handler_layout, sq_wave_regs.gfx8.status); + + /* Store saved SQ_WAVE_STATUS which can change inside the trap. */ + dump_sgpr_to_mem(&ctx, Operand(tma_rsrc, s4), Operand(save_wave_status, s1), offset); + offset += 4; + + for (unsigned i = 0; i < ARRAY_SIZE(hw_regs_idx); i++) { + /* "((size - 1) << 11) | register" */ + bld.sopk(aco_opcode::s_getreg_b32, Definition(ttmp0_reg, s1), + ((32 - 1) << 11) | hw_regs_idx[i]); + + dump_sgpr_to_mem(&ctx, Operand(tma_rsrc, s4), Operand(ttmp0_reg, s1), offset); + offset += 4; + } + + /* Skip space "reserved regs". */ + offset += 12; } assert(offset == offsetof(struct aco_trap_handler_layout, m0)); diff --git a/src/amd/compiler/aco_shader_info.h b/src/amd/compiler/aco_shader_info.h index f827624a58c..eb97834738e 100644 --- a/src/amd/compiler/aco_shader_info.h +++ b/src/amd/compiler/aco_shader_info.h @@ -209,14 +209,32 @@ struct aco_trap_handler_layout { uint32_t ttmp0; uint32_t ttmp1; - struct { - uint32_t status; - uint32_t mode; - uint32_t trap_sts; - uint32_t hw_id1; - uint32_t gpr_alloc; - uint32_t lds_alloc; - uint32_t ib_sts; + union { + struct { + uint32_t state_priv; + uint32_t mode; + uint32_t status; + uint32_t gpr_alloc; + uint32_t lds_alloc; + uint32_t ib_sts; + uint32_t excp_flag_priv; + uint32_t excp_flag_user; + uint32_t trap_ctrl; + uint32_t hw_id1; + } gfx12; + + struct { + uint32_t status; + uint32_t mode; + uint32_t trap_sts; + uint32_t hw_id1; + uint32_t gpr_alloc; + uint32_t lds_alloc; + uint32_t ib_sts; + uint32_t reserved0; + uint32_t reserved1; + uint32_t reserved2; + } gfx8; } sq_wave_regs; uint32_t m0; diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c index a097207a36a..e8c77bcc4c2 100644 --- a/src/amd/vulkan/radv_debug.c +++ b/src/amd/vulkan/radv_debug.c @@ -1174,22 +1174,33 @@ radv_dump_sq_hw_regs(struct radv_device *device, const struct aco_trap_handler_l enum radeon_family family = pdev->info.family; fprintf(f, "\nHardware registers:\n"); - if (pdev->info.gfx_level >= GFX10) { - ac_dump_reg(f, gfx_level, family, R_000404_SQ_WAVE_MODE, layout->sq_wave_regs.mode, ~0); - ac_dump_reg(f, gfx_level, family, R_000408_SQ_WAVE_STATUS, layout->sq_wave_regs.status, ~0); - ac_dump_reg(f, gfx_level, family, R_00040C_SQ_WAVE_TRAPSTS, layout->sq_wave_regs.trap_sts, ~0); - ac_dump_reg(f, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1, layout->sq_wave_regs.hw_id1, ~0); - ac_dump_reg(f, gfx_level, family, R_000414_SQ_WAVE_GPR_ALLOC, layout->sq_wave_regs.gpr_alloc, ~0); - ac_dump_reg(f, gfx_level, family, R_000418_SQ_WAVE_LDS_ALLOC, layout->sq_wave_regs.lds_alloc, ~0); - ac_dump_reg(f, gfx_level, family, R_00041C_SQ_WAVE_IB_STS, layout->sq_wave_regs.ib_sts, ~0); + if (pdev->info.gfx_level >= GFX12) { + ac_dump_reg(f, gfx_level, family, R_000410_SQ_WAVE_STATE_PRIV, layout->sq_wave_regs.gfx12.state_priv, ~0); + ac_dump_reg(f, gfx_level, family, R_000404_SQ_WAVE_MODE, layout->sq_wave_regs.gfx12.mode, ~0); + ac_dump_reg(f, gfx_level, family, R_000408_SQ_WAVE_STATUS, layout->sq_wave_regs.gfx12.status, ~0); + ac_dump_reg(f, gfx_level, family, R_000414_SQ_WAVE_GPR_ALLOC, layout->sq_wave_regs.gfx12.gpr_alloc, ~0); + ac_dump_reg(f, gfx_level, family, R_000418_SQ_WAVE_LDS_ALLOC, layout->sq_wave_regs.gfx12.lds_alloc, ~0); + ac_dump_reg(f, gfx_level, family, R_00041C_SQ_WAVE_IB_STS, layout->sq_wave_regs.gfx12.ib_sts, ~0); + ac_dump_reg(f, gfx_level, family, R_000444_SQ_WAVE_EXCP_FLAG_PRIV, layout->sq_wave_regs.gfx12.excp_flag_priv, ~0); + ac_dump_reg(f, gfx_level, family, R_000448_SQ_WAVE_EXCP_FLAG_USER, layout->sq_wave_regs.gfx12.excp_flag_user, ~0); + ac_dump_reg(f, gfx_level, family, R_00044C_SQ_WAVE_TRAP_CTRL, layout->sq_wave_regs.gfx12.trap_ctrl, ~0); + ac_dump_reg(f, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1, layout->sq_wave_regs.gfx12.hw_id1, ~0); + } else if (pdev->info.gfx_level >= GFX10) { + ac_dump_reg(f, gfx_level, family, R_000404_SQ_WAVE_MODE, layout->sq_wave_regs.gfx8.mode, ~0); + ac_dump_reg(f, gfx_level, family, R_000408_SQ_WAVE_STATUS, layout->sq_wave_regs.gfx8.status, ~0); + ac_dump_reg(f, gfx_level, family, R_00040C_SQ_WAVE_TRAPSTS, layout->sq_wave_regs.gfx8.trap_sts, ~0); + ac_dump_reg(f, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1, layout->sq_wave_regs.gfx8.hw_id1, ~0); + ac_dump_reg(f, gfx_level, family, R_000414_SQ_WAVE_GPR_ALLOC, layout->sq_wave_regs.gfx8.gpr_alloc, ~0); + ac_dump_reg(f, gfx_level, family, R_000418_SQ_WAVE_LDS_ALLOC, layout->sq_wave_regs.gfx8.lds_alloc, ~0); + ac_dump_reg(f, gfx_level, family, R_00041C_SQ_WAVE_IB_STS, layout->sq_wave_regs.gfx8.ib_sts, ~0); } else { - ac_dump_reg(f, gfx_level, family, R_000044_SQ_WAVE_MODE, layout->sq_wave_regs.mode, ~0); - ac_dump_reg(f, gfx_level, family, R_000048_SQ_WAVE_STATUS, layout->sq_wave_regs.status, ~0); - ac_dump_reg(f, gfx_level, family, R_00004C_SQ_WAVE_TRAPSTS, layout->sq_wave_regs.trap_sts, ~0); - ac_dump_reg(f, gfx_level, family, R_000050_SQ_WAVE_HW_ID, layout->sq_wave_regs.hw_id1, ~0); - ac_dump_reg(f, gfx_level, family, R_000054_SQ_WAVE_GPR_ALLOC, layout->sq_wave_regs.gpr_alloc, ~0); - ac_dump_reg(f, gfx_level, family, R_000058_SQ_WAVE_LDS_ALLOC, layout->sq_wave_regs.lds_alloc, ~0); - ac_dump_reg(f, gfx_level, family, R_00005C_SQ_WAVE_IB_STS, layout->sq_wave_regs.ib_sts, ~0); + ac_dump_reg(f, gfx_level, family, R_000044_SQ_WAVE_MODE, layout->sq_wave_regs.gfx8.mode, ~0); + ac_dump_reg(f, gfx_level, family, R_000048_SQ_WAVE_STATUS, layout->sq_wave_regs.gfx8.status, ~0); + ac_dump_reg(f, gfx_level, family, R_00004C_SQ_WAVE_TRAPSTS, layout->sq_wave_regs.gfx8.trap_sts, ~0); + ac_dump_reg(f, gfx_level, family, R_000050_SQ_WAVE_HW_ID, layout->sq_wave_regs.gfx8.hw_id1, ~0); + ac_dump_reg(f, gfx_level, family, R_000054_SQ_WAVE_GPR_ALLOC, layout->sq_wave_regs.gfx8.gpr_alloc, ~0); + ac_dump_reg(f, gfx_level, family, R_000058_SQ_WAVE_LDS_ALLOC, layout->sq_wave_regs.gfx8.lds_alloc, ~0); + ac_dump_reg(f, gfx_level, family, R_00005C_SQ_WAVE_IB_STS, layout->sq_wave_regs.gfx8.ib_sts, ~0); } fprintf(f, "\n\n"); } @@ -1198,14 +1209,16 @@ static uint32_t radv_get_vgpr_size(const struct radv_device *device, const struct aco_trap_handler_layout *layout) { const struct radv_physical_device *pdev = radv_device_physical(device); + const uint32_t gpr_alloc = + pdev->info.gfx_level >= GFX12 ? layout->sq_wave_regs.gfx12.gpr_alloc : layout->sq_wave_regs.gfx8.gpr_alloc; uint32_t vgpr_size; if (pdev->info.gfx_level >= GFX11) { - vgpr_size = G_000414_VGPR_SIZE_GFX11(layout->sq_wave_regs.gpr_alloc); + vgpr_size = G_000414_VGPR_SIZE_GFX11(gpr_alloc); } else if (pdev->info.gfx_level >= GFX10) { - vgpr_size = G_000414_VGPR_SIZE_GFX10(layout->sq_wave_regs.gpr_alloc); + vgpr_size = G_000414_VGPR_SIZE_GFX10(gpr_alloc); } else { - vgpr_size = G_000054_VGPR_SIZE_GFX6(layout->sq_wave_regs.gpr_alloc); + vgpr_size = G_000054_VGPR_SIZE_GFX6(gpr_alloc); } return vgpr_size; @@ -1256,7 +1269,10 @@ radv_dump_shader_regs(const struct radv_device *device, const struct aco_trap_ha static void radv_dump_lds(const struct radv_device *device, const struct aco_trap_handler_layout *layout, FILE *f) { - uint32_t lds_size = G_000058_LDS_SIZE(layout->sq_wave_regs.lds_alloc); + const struct radv_physical_device *pdev = radv_device_physical(device); + const uint32_t lds_alloc = + pdev->info.gfx_level >= GFX12 ? layout->sq_wave_regs.gfx12.lds_alloc : layout->sq_wave_regs.gfx8.lds_alloc; + uint32_t lds_size = G_000058_LDS_SIZE(lds_alloc); if (!lds_size) return;