ac,radv,radeonsi: add more SPM helpers to common code

This also fixes a small bug on RADV for RDNA3 where counters might be
stuck.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38577>
This commit is contained in:
Samuel Pitoiset 2025-11-21 15:34:48 +01:00 committed by Marge Bot
parent e2644a1389
commit 108d2d29a9
8 changed files with 81 additions and 128 deletions

View file

@ -887,3 +887,53 @@ ac_emit_spm_setup(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
/* Select SPM counters. */
ac_emit_spm_counters(cs, gfx_level, ip_type, spm);
}
void
ac_emit_spm_start(struct ac_cmdbuf *cs, enum amd_ip_type ip_type)
{
ac_cmdbuf_begin(cs);
/* Start SPM counters. */
ac_cmdbuf_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING));
/* Start windowed performance counters. */
if (ip_type == AMD_IP_GFX)
ac_cmdbuf_event_write(V_028A90_PERFCOUNTER_START);
ac_cmdbuf_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(1));
ac_cmdbuf_end();
}
void
ac_emit_spm_stop(struct ac_cmdbuf *cs, enum amd_ip_type ip_type,
const struct radeon_info *info)
{
ac_cmdbuf_begin(cs);
/* Stop windowed performance counters. */
if (ip_type == AMD_IP_GFX && !info->never_send_perfcounter_stop)
ac_cmdbuf_event_write(V_028A90_PERFCOUNTER_STOP);
ac_cmdbuf_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(0));
/* Stop SPM counters. */
ac_cmdbuf_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
S_036020_SPM_PERFMON_STATE(info->never_stop_sq_perf_counters ?
V_036020_STRM_PERFMON_STATE_START_COUNTING :
V_036020_STRM_PERFMON_STATE_STOP_COUNTING));
ac_cmdbuf_end();
}
void
ac_emit_spm_reset(struct ac_cmdbuf *cs)
{
ac_cmdbuf_begin(cs);
ac_cmdbuf_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET));
ac_cmdbuf_end();
}

View file

@ -206,4 +206,14 @@ ac_emit_spm_setup(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
enum amd_ip_type ip_type, const struct ac_spm *spm,
uint64_t va);
void
ac_emit_spm_start(struct ac_cmdbuf *cs, enum amd_ip_type ip_type);
void
ac_emit_spm_stop(struct ac_cmdbuf *cs, enum amd_ip_type ip_type,
const struct radeon_info *info);
void
ac_emit_spm_reset(struct ac_cmdbuf *cs);
#endif

View file

@ -48,82 +48,32 @@ radv_emit_windowed_counters(struct radv_device *device, struct radv_cmd_stream *
radeon_end();
}
void
radv_perfcounter_emit_reset(struct radv_cmd_stream *cs, bool is_spm)
static void
radv_perfcounter_emit_reset(struct radv_cmd_stream *cs)
{
uint32_t cp_perfmon_cntl;
if (is_spm) {
cp_perfmon_cntl = S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET);
} else {
cp_perfmon_cntl = S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET);
}
radeon_begin(cs);
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, cp_perfmon_cntl);
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET));
radeon_end();
}
static void
radv_perfcounter_emit_start(struct radv_cmd_stream *cs, bool is_spm)
radv_perfcounter_emit_start(struct radv_cmd_stream *cs)
{
uint32_t cp_perfmon_cntl;
if (is_spm) {
cp_perfmon_cntl = S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING);
} else {
cp_perfmon_cntl = S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING);
}
radeon_begin(cs);
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, cp_perfmon_cntl);
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING));
radeon_end();
}
static void
radv_perfcounter_emit_stop(struct radv_cmd_stream *cs, bool is_spm)
radv_perfcounter_emit_stop(struct radv_cmd_stream *cs)
{
uint32_t cp_perfmon_cntl;
if (is_spm) {
cp_perfmon_cntl = S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_STOP_COUNTING);
} else {
cp_perfmon_cntl =
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) | S_036020_PERFMON_SAMPLE_ENABLE(1);
}
radeon_begin(cs);
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, cp_perfmon_cntl);
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) |
S_036020_PERFMON_SAMPLE_ENABLE(1));
radeon_end();
}
void
radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs)
{
/* Start SPM counters. */
radv_perfcounter_emit_start(cs, true);
radv_emit_windowed_counters(device, cs, true);
}
void
radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
radv_emit_windowed_counters(device, cs, false);
/* Stop SPM counters. */
if (pdev->info.never_stop_sq_perf_counters) {
radv_perfcounter_emit_start(cs, true);
} else {
radv_perfcounter_emit_stop(cs, true);
}
}
static void
radv_perfcounter_emit_sample(struct radv_cmd_stream *cs)
{
@ -619,7 +569,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query
radv_pc_wait_idle(cmd_buffer);
radv_emit_instance(cmd_buffer, -1, -1);
radv_emit_windowed_counters(device, cs, false);
radv_perfcounter_emit_stop(cs, false);
radv_perfcounter_emit_stop(cs);
for (unsigned pass = 0; pass < pool->num_passes; ++pass) {
uint64_t pred_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET + 8 * pass;
@ -683,7 +633,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
ac_emit_cp_write_data_imm(cs->b, V_370_ME, perf_ctr_va, 0);
radv_pc_wait_idle(cmd_buffer);
radv_perfcounter_emit_reset(cs, false);
radv_perfcounter_emit_reset(cs);
ac_emit_cp_inhibit_clockgating(cs->b, pdev->info.gfx_level, true);
ac_emit_cp_spi_config_cntl(cs->b, pdev->info.gfx_level, true);
radv_perfcounter_emit_shaders(device, cs, 0x7f);
@ -720,7 +670,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
/* The following sequence actually starts the perfcounters. */
radv_pc_stop_and_sample(cmd_buffer, pool, va, false);
radv_perfcounter_emit_start(cs, false);
radv_perfcounter_emit_start(cs);
radv_emit_windowed_counters(device, cs, true);
assert(cs->b->cdw <= cdw_max);
@ -750,7 +700,7 @@ radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool
radv_pc_wait_idle(cmd_buffer);
radv_pc_stop_and_sample(cmd_buffer, pool, va, true);
radv_perfcounter_emit_reset(cs, false);
radv_perfcounter_emit_reset(cs);
ac_emit_cp_spi_config_cntl(cs->b, pdev->info.gfx_level, false);
ac_emit_cp_inhibit_clockgating(cs->b, pdev->info.gfx_level, false);

View file

@ -32,12 +32,6 @@ struct radv_pc_query_pool {
void radv_perfcounter_emit_shaders(struct radv_device *device, struct radv_cmd_stream *cs, unsigned shaders);
void radv_perfcounter_emit_reset(struct radv_cmd_stream *cs, bool is_spm);
void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs);
void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs);
void radv_pc_deinit_query_pool(struct radv_pc_query_pool *pool);
VkResult radv_pc_init_query_pool(struct radv_physical_device *pdev, const VkQueryPoolCreateInfo *pCreateInfo,

View file

@ -499,7 +499,7 @@ radv_begin_sqtt(struct radv_queue *queue)
ac_emit_cp_spi_config_cntl(cs.b, pdev->info.gfx_level, true);
if (device->spm.bo) {
radv_perfcounter_emit_reset(&cs, true);
ac_emit_spm_reset(cs.b);
/* Enable all shader stages by default. */
radv_perfcounter_emit_shaders(device, &cs, ac_sqtt_get_shader_mask(&pdev->info));
@ -512,7 +512,7 @@ radv_begin_sqtt(struct radv_queue *queue)
if (device->spm.bo) {
radeon_check_space(ws, cs.b, 8);
radv_perfcounter_emit_spm_start(device, &cs);
ac_emit_spm_start(cs.b, cs.hw_ip);
}
result = ws->cs_finalize(cs.b);
@ -574,14 +574,14 @@ radv_end_sqtt(struct radv_queue *queue)
if (device->spm.bo) {
radeon_check_space(ws, cs.b, 8);
radv_perfcounter_emit_spm_stop(device, &cs);
ac_emit_spm_stop(cs.b, cs.hw_ip, &pdev->info);
}
/* Stop SQTT. */
radv_emit_sqtt_stop(device, &cs);
if (device->spm.bo)
radv_perfcounter_emit_reset(&cs, true);
ac_emit_spm_reset(cs.b);
/* Restore previous state by disabling SQG events. */
ac_emit_cp_spi_config_cntl(cs.b, pdev->info.gfx_level, false);

View file

@ -174,52 +174,6 @@ static void si_pc_emit_stop(struct si_context *sctx, struct si_resource *buffer,
radeon_end();
}
void si_pc_emit_spm_start(struct radeon_cmdbuf *cs)
{
radeon_begin(cs);
/* Start SPM counters. */
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING));
/* Start windowed performance counters. */
radeon_event_write(V_028A90_PERFCOUNTER_START);
radeon_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(1));
radeon_end();
}
void si_pc_emit_spm_stop(struct radeon_cmdbuf *cs, bool never_stop_sq_perf_counters,
bool never_send_perfcounter_stop)
{
radeon_begin(cs);
/* Stop windowed performance counters. */
if (!never_send_perfcounter_stop)
radeon_event_write(V_028A90_PERFCOUNTER_STOP);
radeon_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(0));
/* Stop SPM counters. */
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
S_036020_SPM_PERFMON_STATE(never_stop_sq_perf_counters ?
V_036020_STRM_PERFMON_STATE_START_COUNTING :
V_036020_STRM_PERFMON_STATE_STOP_COUNTING));
radeon_end();
}
void si_pc_emit_spm_reset(struct radeon_cmdbuf *cs)
{
radeon_begin(cs);
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET));
radeon_end();
}
static void si_pc_emit_read(struct si_context *sctx, struct ac_pc_block *block, unsigned count,
uint64_t va)
{

View file

@ -1655,10 +1655,6 @@ void si_init_perfcounters(struct si_screen *screen);
void si_destroy_perfcounters(struct si_screen *screen);
void si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, bool inhibit);
void si_pc_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders);
void si_pc_emit_spm_start(struct radeon_cmdbuf *cs);
void si_pc_emit_spm_stop(struct radeon_cmdbuf *cs, bool never_stop_sq_perf_counters,
bool never_send_perfcounter_stop);
void si_pc_emit_spm_reset(struct radeon_cmdbuf *cs);
void si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs);
bool si_spm_init(struct si_context *sctx);
void si_spm_finish(struct si_context *sctx);

View file

@ -141,7 +141,7 @@ static void si_sqtt_start(struct si_context *sctx, struct radeon_cmdbuf *cs)
si_emit_spi_config_cntl(sctx, cs, true);
if (sctx->spm.bo) {
si_pc_emit_spm_reset(cs);
ac_emit_spm_reset(&cs->current);
si_pc_emit_shaders(cs, ac_sqtt_get_shader_mask(&sctx->screen->info));
si_emit_spm_setup(sctx, cs);
}
@ -149,7 +149,7 @@ static void si_sqtt_start(struct si_context *sctx, struct radeon_cmdbuf *cs)
si_emit_sqtt_start(sctx, cs, ip_type);
if (sctx->spm.bo)
si_pc_emit_spm_start(cs);
ac_emit_spm_start(&cs->current, AMD_IP_GFX);
}
static void si_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs)
@ -185,8 +185,7 @@ static void si_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs)
si_cp_dma_wait_for_idle(sctx, cs);
if (sctx->spm.bo)
si_pc_emit_spm_stop(cs, sctx->screen->info.never_stop_sq_perf_counters,
sctx->screen->info.never_send_perfcounter_stop);
ac_emit_spm_stop(&cs->current, AMD_IP_GFX, &sctx->screen->info);
/* Make sure to wait-for-idle before stopping SQTT. */
sctx->barrier_flags |= SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS |
@ -198,7 +197,7 @@ static void si_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs)
si_emit_sqtt_stop(sctx, cs, ip_type);
if (sctx->spm.bo)
si_pc_emit_spm_reset(cs);
ac_emit_spm_reset(&cs->current);
/* Restore previous state by disabling SQG events. */
si_emit_spi_config_cntl(sctx, cs, false);