mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-22 06:20:22 +01:00
radv: fix capturing RGP on RDNA3 with more than one Shader Engine
PKT3_RESET_FILTER_CAM_S seems required on GFX11. Otherwise, capturing with more than on SE can hang. Cc: mesa-stable Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25156>
This commit is contained in:
parent
6caae898dd
commit
2cc981a0cd
5 changed files with 30 additions and 18 deletions
|
|
@ -229,10 +229,6 @@ ac_sqtt_get_next_cmdbuf_id(struct ac_sqtt *data, enum amd_ip_type ip_type)
|
||||||
bool
|
bool
|
||||||
ac_sqtt_se_is_disabled(const struct radeon_info *info, unsigned se)
|
ac_sqtt_se_is_disabled(const struct radeon_info *info, unsigned se)
|
||||||
{
|
{
|
||||||
/* FIXME: SQTT only works on SE0 for some unknown reasons. */
|
|
||||||
if (info->gfx_level == GFX11)
|
|
||||||
return se != 0;
|
|
||||||
|
|
||||||
/* No active CU on the SE means it is disabled. */
|
/* No active CU on the SE means it is disabled. */
|
||||||
return info->cu_mask[se][0] == 0;
|
return info->cu_mask[se][0] == 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -168,9 +168,9 @@ radeon_set_uconfig_reg_idx(const struct radv_physical_device *pdevice, struct ra
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
radeon_set_perfctr_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, unsigned value)
|
radeon_set_perfctr_reg(enum amd_gfx_level gfx_level, enum radv_queue_family qf, struct radeon_cmdbuf *cs, unsigned reg,
|
||||||
|
unsigned value)
|
||||||
{
|
{
|
||||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
|
||||||
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
|
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
|
||||||
assert(cs->cdw + 3 <= cs->reserved_dw);
|
assert(cs->cdw + 3 <= cs->reserved_dw);
|
||||||
|
|
||||||
|
|
@ -179,8 +179,7 @@ radeon_set_perfctr_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, unsigne
|
||||||
* that means that it can skip register writes due to not taking correctly into account the
|
* that means that it can skip register writes due to not taking correctly into account the
|
||||||
* fields from the GRBM_GFX_INDEX. With this bit we can force the write.
|
* fields from the GRBM_GFX_INDEX. With this bit we can force the write.
|
||||||
*/
|
*/
|
||||||
bool filter_cam_workaround =
|
bool filter_cam_workaround = gfx_level >= GFX10 && qf == RADV_QUEUE_GENERAL;
|
||||||
cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10 && cmd_buffer->qf == RADV_QUEUE_GENERAL;
|
|
||||||
|
|
||||||
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0) | PKT3_RESET_FILTER_CAM_S(filter_cam_workaround));
|
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0) | PKT3_RESET_FILTER_CAM_S(filter_cam_workaround));
|
||||||
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
|
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
|
||||||
|
|
|
||||||
|
|
@ -462,6 +462,8 @@ radv_emit_instance(struct radv_cmd_buffer *cmd_buffer, int se, int instance)
|
||||||
static void
|
static void
|
||||||
radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, unsigned *selectors)
|
radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, unsigned *selectors)
|
||||||
{
|
{
|
||||||
|
const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level;
|
||||||
|
const enum radv_queue_family qf = cmd_buffer->qf;
|
||||||
struct ac_pc_block_base *regs = block->b->b;
|
struct ac_pc_block_base *regs = block->b->b;
|
||||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||||
unsigned idx;
|
unsigned idx;
|
||||||
|
|
@ -473,7 +475,7 @@ radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
for (idx = 0; idx < count; ++idx) {
|
for (idx = 0; idx < count; ++idx) {
|
||||||
radeon_set_perfctr_reg(cmd_buffer, regs->select0[idx], G_REG_SEL(selectors[idx]) | regs->select_or);
|
radeon_set_perfctr_reg(gfx_level, qf, cs, regs->select0[idx], G_REG_SEL(selectors[idx]) | regs->select_or);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (idx = 0; idx < regs->num_spm_counters; idx++) {
|
for (idx = 0; idx < regs->num_spm_counters; idx++) {
|
||||||
|
|
|
||||||
|
|
@ -75,6 +75,7 @@ radv_emit_wait_for_idle(const struct radv_device *device, struct radeon_cmdbuf *
|
||||||
static void
|
static void
|
||||||
radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
|
radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
|
||||||
{
|
{
|
||||||
|
const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
|
||||||
uint32_t shifted_size = device->sqtt.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
|
uint32_t shifted_size = device->sqtt.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
|
||||||
const struct radeon_info *rad_info = &device->physical_device->rad_info;
|
const struct radeon_info *rad_info = &device->physical_device->rad_info;
|
||||||
unsigned max_se = rad_info->max_se;
|
unsigned max_se = rad_info->max_se;
|
||||||
|
|
@ -94,12 +95,12 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
||||||
|
|
||||||
if (device->physical_device->rad_info.gfx_level >= GFX11) {
|
if (device->physical_device->rad_info.gfx_level >= GFX11) {
|
||||||
/* Order seems important for the following 2 registers. */
|
/* Order seems important for the following 2 registers. */
|
||||||
radeon_set_uconfig_reg(cs, R_0367A4_SQ_THREAD_TRACE_BUF0_SIZE,
|
radeon_set_perfctr_reg(gfx_level, qf, cs, R_0367A4_SQ_THREAD_TRACE_BUF0_SIZE,
|
||||||
S_0367A4_SIZE(shifted_size) | S_0367A4_BASE_HI(shifted_va >> 32));
|
S_0367A4_SIZE(shifted_size) | S_0367A4_BASE_HI(shifted_va >> 32));
|
||||||
|
|
||||||
radeon_set_uconfig_reg(cs, R_0367A0_SQ_THREAD_TRACE_BUF0_BASE, shifted_va);
|
radeon_set_perfctr_reg(gfx_level, qf, cs, R_0367A0_SQ_THREAD_TRACE_BUF0_BASE, shifted_va);
|
||||||
|
|
||||||
radeon_set_uconfig_reg(cs, R_0367B4_SQ_THREAD_TRACE_MASK,
|
radeon_set_perfctr_reg(gfx_level, qf, cs, R_0367B4_SQ_THREAD_TRACE_MASK,
|
||||||
S_0367B4_WTYPE_INCLUDE(0x7f) | /* all shader stages */
|
S_0367B4_WTYPE_INCLUDE(0x7f) | /* all shader stages */
|
||||||
S_0367B4_SA_SEL(0) | S_0367B4_WGP_SEL(first_active_cu / 2) | S_0367B4_SIMD_SEL(0));
|
S_0367B4_SA_SEL(0) | S_0367B4_WGP_SEL(first_active_cu / 2) | S_0367B4_SIMD_SEL(0));
|
||||||
|
|
||||||
|
|
@ -118,10 +119,11 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
||||||
}
|
}
|
||||||
sqtt_token_mask |= S_0367B8_TOKEN_EXCLUDE(token_exclude);
|
sqtt_token_mask |= S_0367B8_TOKEN_EXCLUDE(token_exclude);
|
||||||
|
|
||||||
radeon_set_uconfig_reg(cs, R_0367B8_SQ_THREAD_TRACE_TOKEN_MASK, sqtt_token_mask);
|
radeon_set_perfctr_reg(gfx_level, qf, cs, R_0367B8_SQ_THREAD_TRACE_TOKEN_MASK, sqtt_token_mask);
|
||||||
|
|
||||||
/* Should be emitted last (it enables thread traces). */
|
/* Should be emitted last (it enables thread traces). */
|
||||||
radeon_set_uconfig_reg(cs, R_0367B0_SQ_THREAD_TRACE_CTRL, gfx11_get_sqtt_ctrl(device, true));
|
radeon_set_perfctr_reg(gfx_level, qf, cs, R_0367B0_SQ_THREAD_TRACE_CTRL, gfx11_get_sqtt_ctrl(device, true));
|
||||||
|
|
||||||
} else if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
} else if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||||
/* Order seems important for the following 2 registers. */
|
/* Order seems important for the following 2 registers. */
|
||||||
radeon_set_privileged_config_reg(cs, R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
|
radeon_set_privileged_config_reg(cs, R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
|
||||||
|
|
@ -301,6 +303,7 @@ radv_copy_sqtt_info_regs(const struct radv_device *device, struct radeon_cmdbuf
|
||||||
static void
|
static void
|
||||||
radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
|
radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
|
||||||
{
|
{
|
||||||
|
const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
|
||||||
unsigned max_se = device->physical_device->rad_info.max_se;
|
unsigned max_se = device->physical_device->rad_info.max_se;
|
||||||
|
|
||||||
/* Stop the thread trace with a different event based on the queue. */
|
/* Stop the thread trace with a different event based on the queue. */
|
||||||
|
|
@ -338,7 +341,7 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
||||||
radeon_emit(cs, 4); /* poll interval */
|
radeon_emit(cs, 4); /* poll interval */
|
||||||
|
|
||||||
/* Disable the thread trace mode. */
|
/* Disable the thread trace mode. */
|
||||||
radeon_set_uconfig_reg(cs, R_0367B0_SQ_THREAD_TRACE_CTRL, gfx11_get_sqtt_ctrl(device, false));
|
radeon_set_perfctr_reg(gfx_level, qf, cs, R_0367B0_SQ_THREAD_TRACE_CTRL, gfx11_get_sqtt_ctrl(device, false));
|
||||||
|
|
||||||
/* Wait for thread trace completion. */
|
/* Wait for thread trace completion. */
|
||||||
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
|
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
|
||||||
|
|
|
||||||
|
|
@ -47,6 +47,18 @@ static bool si_sqtt_init_bo(struct si_context *sctx) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
si_sqtt_se_is_disabled(const struct radeon_info *info, unsigned se)
|
||||||
|
{
|
||||||
|
/* FIXME: SQTT only works on SE0 for some unknown reasons. See RADV for the
|
||||||
|
* solution */
|
||||||
|
if (info->gfx_level == GFX11)
|
||||||
|
return se != 0;
|
||||||
|
|
||||||
|
/* No active CU on the SE means it is disabled. */
|
||||||
|
return info->cu_mask[se][0] == 0;
|
||||||
|
}
|
||||||
|
|
||||||
static void si_emit_sqtt_start(struct si_context *sctx,
|
static void si_emit_sqtt_start(struct si_context *sctx,
|
||||||
struct radeon_cmdbuf *cs,
|
struct radeon_cmdbuf *cs,
|
||||||
uint32_t queue_family_index) {
|
uint32_t queue_family_index) {
|
||||||
|
|
@ -62,7 +74,7 @@ static void si_emit_sqtt_start(struct si_context *sctx,
|
||||||
ac_sqtt_get_data_va(&sctx->screen->info, sctx->sqtt, va, se);
|
ac_sqtt_get_data_va(&sctx->screen->info, sctx->sqtt, va, se);
|
||||||
uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
|
uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
|
||||||
|
|
||||||
if (ac_sqtt_se_is_disabled(&sctx->screen->info, se))
|
if (si_sqtt_se_is_disabled(&sctx->screen->info, se))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* Target SEx and SH0. */
|
/* Target SEx and SH0. */
|
||||||
|
|
@ -333,7 +345,7 @@ static void si_emit_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs,
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned se = 0; se < max_se; se++) {
|
for (unsigned se = 0; se < max_se; se++) {
|
||||||
if (ac_sqtt_se_is_disabled(&sctx->screen->info, se))
|
if (si_sqtt_se_is_disabled(&sctx->screen->info, se))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
radeon_begin(cs);
|
radeon_begin(cs);
|
||||||
|
|
@ -565,7 +577,7 @@ static bool si_get_sqtt_trace(struct si_context *sctx,
|
||||||
void *info_ptr = sqtt_ptr + info_offset;
|
void *info_ptr = sqtt_ptr + info_offset;
|
||||||
struct ac_sqtt_data_info *info = (struct ac_sqtt_data_info *)info_ptr;
|
struct ac_sqtt_data_info *info = (struct ac_sqtt_data_info *)info_ptr;
|
||||||
|
|
||||||
if (ac_sqtt_se_is_disabled(&sctx->screen->info, se))
|
if (si_sqtt_se_is_disabled(&sctx->screen->info, se))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!ac_is_sqtt_complete(&sctx->screen->info, sctx->sqtt, info)) {
|
if (!ac_is_sqtt_complete(&sctx->screen->info, sctx->sqtt, info)) {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue