From 108d2d29a910d59de8c87cd6244a075b45f9ce0e Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Fri, 21 Nov 2025 15:34:48 +0100 Subject: [PATCH] ac,radv,radeonsi: add more SPM helpers to common code This also fixes a small bug on RADV for RDNA3 where counters might be stuck. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_spm.c | 50 ++++++++++++ src/amd/common/ac_spm.h | 10 +++ src/amd/vulkan/radv_perfcounter.c | 76 ++++--------------- src/amd/vulkan/radv_perfcounter.h | 6 -- src/amd/vulkan/radv_sqtt.c | 8 +- src/gallium/drivers/radeonsi/si_perfcounter.c | 46 ----------- src/gallium/drivers/radeonsi/si_pipe.h | 4 - src/gallium/drivers/radeonsi/si_sqtt.c | 9 +-- 8 files changed, 81 insertions(+), 128 deletions(-) diff --git a/src/amd/common/ac_spm.c b/src/amd/common/ac_spm.c index 77710dbf4c2..54866ccb857 100644 --- a/src/amd/common/ac_spm.c +++ b/src/amd/common/ac_spm.c @@ -887,3 +887,53 @@ ac_emit_spm_setup(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, /* Select SPM counters. */ ac_emit_spm_counters(cs, gfx_level, ip_type, spm); } + +void +ac_emit_spm_start(struct ac_cmdbuf *cs, enum amd_ip_type ip_type) +{ + ac_cmdbuf_begin(cs); + + /* Start SPM counters. */ + ac_cmdbuf_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, + S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | + S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING)); + + /* Start windowed performance counters. */ + if (ip_type == AMD_IP_GFX) + ac_cmdbuf_event_write(V_028A90_PERFCOUNTER_START); + ac_cmdbuf_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(1)); + + ac_cmdbuf_end(); +} + +void +ac_emit_spm_stop(struct ac_cmdbuf *cs, enum amd_ip_type ip_type, + const struct radeon_info *info) +{ + ac_cmdbuf_begin(cs); + + /* Stop windowed performance counters. */ + if (ip_type == AMD_IP_GFX && !info->never_send_perfcounter_stop) + ac_cmdbuf_event_write(V_028A90_PERFCOUNTER_STOP); + + ac_cmdbuf_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(0)); + + /* Stop SPM counters. */ + ac_cmdbuf_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, + S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | + S_036020_SPM_PERFMON_STATE(info->never_stop_sq_perf_counters ? + V_036020_STRM_PERFMON_STATE_START_COUNTING : + V_036020_STRM_PERFMON_STATE_STOP_COUNTING)); + + ac_cmdbuf_end(); +} + +void +ac_emit_spm_reset(struct ac_cmdbuf *cs) +{ + ac_cmdbuf_begin(cs); + ac_cmdbuf_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, + S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | + S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET)); + ac_cmdbuf_end(); +} diff --git a/src/amd/common/ac_spm.h b/src/amd/common/ac_spm.h index 648848b4180..27b76736b23 100644 --- a/src/amd/common/ac_spm.h +++ b/src/amd/common/ac_spm.h @@ -206,4 +206,14 @@ ac_emit_spm_setup(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, enum amd_ip_type ip_type, const struct ac_spm *spm, uint64_t va); +void +ac_emit_spm_start(struct ac_cmdbuf *cs, enum amd_ip_type ip_type); + +void +ac_emit_spm_stop(struct ac_cmdbuf *cs, enum amd_ip_type ip_type, + const struct radeon_info *info); + +void +ac_emit_spm_reset(struct ac_cmdbuf *cs); + #endif diff --git a/src/amd/vulkan/radv_perfcounter.c b/src/amd/vulkan/radv_perfcounter.c index 4d2d4818c5b..a8c5b996daa 100644 --- a/src/amd/vulkan/radv_perfcounter.c +++ b/src/amd/vulkan/radv_perfcounter.c @@ -48,82 +48,32 @@ radv_emit_windowed_counters(struct radv_device *device, struct radv_cmd_stream * radeon_end(); } -void -radv_perfcounter_emit_reset(struct radv_cmd_stream *cs, bool is_spm) +static void +radv_perfcounter_emit_reset(struct radv_cmd_stream *cs) { - uint32_t cp_perfmon_cntl; - - if (is_spm) { - cp_perfmon_cntl = S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | - S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET); - } else { - cp_perfmon_cntl = S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET); - } - radeon_begin(cs); - radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, cp_perfmon_cntl); + radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, + S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET)); radeon_end(); } static void -radv_perfcounter_emit_start(struct radv_cmd_stream *cs, bool is_spm) +radv_perfcounter_emit_start(struct radv_cmd_stream *cs) { - uint32_t cp_perfmon_cntl; - - if (is_spm) { - cp_perfmon_cntl = S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | - S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING); - } else { - cp_perfmon_cntl = S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING); - } - radeon_begin(cs); - radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, cp_perfmon_cntl); + radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING)); radeon_end(); } static void -radv_perfcounter_emit_stop(struct radv_cmd_stream *cs, bool is_spm) +radv_perfcounter_emit_stop(struct radv_cmd_stream *cs) { - uint32_t cp_perfmon_cntl; - - if (is_spm) { - cp_perfmon_cntl = S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | - S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_STOP_COUNTING); - } else { - cp_perfmon_cntl = - S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) | S_036020_PERFMON_SAMPLE_ENABLE(1); - } - radeon_begin(cs); - radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, cp_perfmon_cntl); + radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) | + S_036020_PERFMON_SAMPLE_ENABLE(1)); radeon_end(); } -void -radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs) -{ - /* Start SPM counters. */ - radv_perfcounter_emit_start(cs, true); - - radv_emit_windowed_counters(device, cs, true); -} - -void -radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs) -{ - const struct radv_physical_device *pdev = radv_device_physical(device); - - radv_emit_windowed_counters(device, cs, false); - - /* Stop SPM counters. */ - if (pdev->info.never_stop_sq_perf_counters) { - radv_perfcounter_emit_start(cs, true); - } else { - radv_perfcounter_emit_stop(cs, true); - } -} - static void radv_perfcounter_emit_sample(struct radv_cmd_stream *cs) { @@ -619,7 +569,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query radv_pc_wait_idle(cmd_buffer); radv_emit_instance(cmd_buffer, -1, -1); radv_emit_windowed_counters(device, cs, false); - radv_perfcounter_emit_stop(cs, false); + radv_perfcounter_emit_stop(cs); for (unsigned pass = 0; pass < pool->num_passes; ++pass) { uint64_t pred_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET + 8 * pass; @@ -683,7 +633,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo ac_emit_cp_write_data_imm(cs->b, V_370_ME, perf_ctr_va, 0); radv_pc_wait_idle(cmd_buffer); - radv_perfcounter_emit_reset(cs, false); + radv_perfcounter_emit_reset(cs); ac_emit_cp_inhibit_clockgating(cs->b, pdev->info.gfx_level, true); ac_emit_cp_spi_config_cntl(cs->b, pdev->info.gfx_level, true); radv_perfcounter_emit_shaders(device, cs, 0x7f); @@ -720,7 +670,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo /* The following sequence actually starts the perfcounters. */ radv_pc_stop_and_sample(cmd_buffer, pool, va, false); - radv_perfcounter_emit_start(cs, false); + radv_perfcounter_emit_start(cs); radv_emit_windowed_counters(device, cs, true); assert(cs->b->cdw <= cdw_max); @@ -750,7 +700,7 @@ radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool radv_pc_wait_idle(cmd_buffer); radv_pc_stop_and_sample(cmd_buffer, pool, va, true); - radv_perfcounter_emit_reset(cs, false); + radv_perfcounter_emit_reset(cs); ac_emit_cp_spi_config_cntl(cs->b, pdev->info.gfx_level, false); ac_emit_cp_inhibit_clockgating(cs->b, pdev->info.gfx_level, false); diff --git a/src/amd/vulkan/radv_perfcounter.h b/src/amd/vulkan/radv_perfcounter.h index 0da3d575819..04f12628487 100644 --- a/src/amd/vulkan/radv_perfcounter.h +++ b/src/amd/vulkan/radv_perfcounter.h @@ -32,12 +32,6 @@ struct radv_pc_query_pool { void radv_perfcounter_emit_shaders(struct radv_device *device, struct radv_cmd_stream *cs, unsigned shaders); -void radv_perfcounter_emit_reset(struct radv_cmd_stream *cs, bool is_spm); - -void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs); - -void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs); - void radv_pc_deinit_query_pool(struct radv_pc_query_pool *pool); VkResult radv_pc_init_query_pool(struct radv_physical_device *pdev, const VkQueryPoolCreateInfo *pCreateInfo, diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index 7f14cf827f0..4a8508f2b8c 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -499,7 +499,7 @@ radv_begin_sqtt(struct radv_queue *queue) ac_emit_cp_spi_config_cntl(cs.b, pdev->info.gfx_level, true); if (device->spm.bo) { - radv_perfcounter_emit_reset(&cs, true); + ac_emit_spm_reset(cs.b); /* Enable all shader stages by default. */ radv_perfcounter_emit_shaders(device, &cs, ac_sqtt_get_shader_mask(&pdev->info)); @@ -512,7 +512,7 @@ radv_begin_sqtt(struct radv_queue *queue) if (device->spm.bo) { radeon_check_space(ws, cs.b, 8); - radv_perfcounter_emit_spm_start(device, &cs); + ac_emit_spm_start(cs.b, cs.hw_ip); } result = ws->cs_finalize(cs.b); @@ -574,14 +574,14 @@ radv_end_sqtt(struct radv_queue *queue) if (device->spm.bo) { radeon_check_space(ws, cs.b, 8); - radv_perfcounter_emit_spm_stop(device, &cs); + ac_emit_spm_stop(cs.b, cs.hw_ip, &pdev->info); } /* Stop SQTT. */ radv_emit_sqtt_stop(device, &cs); if (device->spm.bo) - radv_perfcounter_emit_reset(&cs, true); + ac_emit_spm_reset(cs.b); /* Restore previous state by disabling SQG events. */ ac_emit_cp_spi_config_cntl(cs.b, pdev->info.gfx_level, false); diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index 3ff5c04220c..74df42a600f 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -174,52 +174,6 @@ static void si_pc_emit_stop(struct si_context *sctx, struct si_resource *buffer, radeon_end(); } -void si_pc_emit_spm_start(struct radeon_cmdbuf *cs) -{ - radeon_begin(cs); - - /* Start SPM counters. */ - radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, - S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | - S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING)); - /* Start windowed performance counters. */ - radeon_event_write(V_028A90_PERFCOUNTER_START); - radeon_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(1)); - - radeon_end(); -} - -void si_pc_emit_spm_stop(struct radeon_cmdbuf *cs, bool never_stop_sq_perf_counters, - bool never_send_perfcounter_stop) -{ - radeon_begin(cs); - - /* Stop windowed performance counters. */ - if (!never_send_perfcounter_stop) - radeon_event_write(V_028A90_PERFCOUNTER_STOP); - - radeon_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(0)); - - /* Stop SPM counters. */ - radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, - S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | - S_036020_SPM_PERFMON_STATE(never_stop_sq_perf_counters ? - V_036020_STRM_PERFMON_STATE_START_COUNTING : - V_036020_STRM_PERFMON_STATE_STOP_COUNTING)); - - radeon_end(); -} - -void si_pc_emit_spm_reset(struct radeon_cmdbuf *cs) -{ - radeon_begin(cs); - radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, - S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | - S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET)); - radeon_end(); -} - - static void si_pc_emit_read(struct si_context *sctx, struct ac_pc_block *block, unsigned count, uint64_t va) { diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 8f1d3ea5738..d66bc858469 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1655,10 +1655,6 @@ void si_init_perfcounters(struct si_screen *screen); void si_destroy_perfcounters(struct si_screen *screen); void si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, bool inhibit); void si_pc_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders); -void si_pc_emit_spm_start(struct radeon_cmdbuf *cs); -void si_pc_emit_spm_stop(struct radeon_cmdbuf *cs, bool never_stop_sq_perf_counters, - bool never_send_perfcounter_stop); -void si_pc_emit_spm_reset(struct radeon_cmdbuf *cs); void si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs); bool si_spm_init(struct si_context *sctx); void si_spm_finish(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c b/src/gallium/drivers/radeonsi/si_sqtt.c index 7f57ce1069b..095551d1483 100644 --- a/src/gallium/drivers/radeonsi/si_sqtt.c +++ b/src/gallium/drivers/radeonsi/si_sqtt.c @@ -141,7 +141,7 @@ static void si_sqtt_start(struct si_context *sctx, struct radeon_cmdbuf *cs) si_emit_spi_config_cntl(sctx, cs, true); if (sctx->spm.bo) { - si_pc_emit_spm_reset(cs); + ac_emit_spm_reset(&cs->current); si_pc_emit_shaders(cs, ac_sqtt_get_shader_mask(&sctx->screen->info)); si_emit_spm_setup(sctx, cs); } @@ -149,7 +149,7 @@ static void si_sqtt_start(struct si_context *sctx, struct radeon_cmdbuf *cs) si_emit_sqtt_start(sctx, cs, ip_type); if (sctx->spm.bo) - si_pc_emit_spm_start(cs); + ac_emit_spm_start(&cs->current, AMD_IP_GFX); } static void si_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs) @@ -185,8 +185,7 @@ static void si_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs) si_cp_dma_wait_for_idle(sctx, cs); if (sctx->spm.bo) - si_pc_emit_spm_stop(cs, sctx->screen->info.never_stop_sq_perf_counters, - sctx->screen->info.never_send_perfcounter_stop); + ac_emit_spm_stop(&cs->current, AMD_IP_GFX, &sctx->screen->info); /* Make sure to wait-for-idle before stopping SQTT. */ sctx->barrier_flags |= SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS | @@ -198,7 +197,7 @@ static void si_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs) si_emit_sqtt_stop(sctx, cs, ip_type); if (sctx->spm.bo) - si_pc_emit_spm_reset(cs); + ac_emit_spm_reset(&cs->current); /* Restore previous state by disabling SQG events. */ si_emit_spi_config_cntl(sctx, cs, false);