mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 19:40:10 +01:00
ac,radv,radeonsi: add more SPM helpers to common code
This also fixes a small bug on RADV for RDNA3 where counters might be stuck. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38577>
This commit is contained in:
parent
e2644a1389
commit
108d2d29a9
8 changed files with 81 additions and 128 deletions
|
|
@ -887,3 +887,53 @@ ac_emit_spm_setup(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
|||
/* Select SPM counters. */
|
||||
ac_emit_spm_counters(cs, gfx_level, ip_type, spm);
|
||||
}
|
||||
|
||||
void
|
||||
ac_emit_spm_start(struct ac_cmdbuf *cs, enum amd_ip_type ip_type)
|
||||
{
|
||||
ac_cmdbuf_begin(cs);
|
||||
|
||||
/* Start SPM counters. */
|
||||
ac_cmdbuf_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
|
||||
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
|
||||
S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING));
|
||||
|
||||
/* Start windowed performance counters. */
|
||||
if (ip_type == AMD_IP_GFX)
|
||||
ac_cmdbuf_event_write(V_028A90_PERFCOUNTER_START);
|
||||
ac_cmdbuf_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(1));
|
||||
|
||||
ac_cmdbuf_end();
|
||||
}
|
||||
|
||||
void
|
||||
ac_emit_spm_stop(struct ac_cmdbuf *cs, enum amd_ip_type ip_type,
|
||||
const struct radeon_info *info)
|
||||
{
|
||||
ac_cmdbuf_begin(cs);
|
||||
|
||||
/* Stop windowed performance counters. */
|
||||
if (ip_type == AMD_IP_GFX && !info->never_send_perfcounter_stop)
|
||||
ac_cmdbuf_event_write(V_028A90_PERFCOUNTER_STOP);
|
||||
|
||||
ac_cmdbuf_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(0));
|
||||
|
||||
/* Stop SPM counters. */
|
||||
ac_cmdbuf_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
|
||||
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
|
||||
S_036020_SPM_PERFMON_STATE(info->never_stop_sq_perf_counters ?
|
||||
V_036020_STRM_PERFMON_STATE_START_COUNTING :
|
||||
V_036020_STRM_PERFMON_STATE_STOP_COUNTING));
|
||||
|
||||
ac_cmdbuf_end();
|
||||
}
|
||||
|
||||
void
|
||||
ac_emit_spm_reset(struct ac_cmdbuf *cs)
|
||||
{
|
||||
ac_cmdbuf_begin(cs);
|
||||
ac_cmdbuf_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
|
||||
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
|
||||
S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET));
|
||||
ac_cmdbuf_end();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -206,4 +206,14 @@ ac_emit_spm_setup(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
|||
enum amd_ip_type ip_type, const struct ac_spm *spm,
|
||||
uint64_t va);
|
||||
|
||||
void
|
||||
ac_emit_spm_start(struct ac_cmdbuf *cs, enum amd_ip_type ip_type);
|
||||
|
||||
void
|
||||
ac_emit_spm_stop(struct ac_cmdbuf *cs, enum amd_ip_type ip_type,
|
||||
const struct radeon_info *info);
|
||||
|
||||
void
|
||||
ac_emit_spm_reset(struct ac_cmdbuf *cs);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -48,82 +48,32 @@ radv_emit_windowed_counters(struct radv_device *device, struct radv_cmd_stream *
|
|||
radeon_end();
|
||||
}
|
||||
|
||||
void
|
||||
radv_perfcounter_emit_reset(struct radv_cmd_stream *cs, bool is_spm)
|
||||
static void
|
||||
radv_perfcounter_emit_reset(struct radv_cmd_stream *cs)
|
||||
{
|
||||
uint32_t cp_perfmon_cntl;
|
||||
|
||||
if (is_spm) {
|
||||
cp_perfmon_cntl = S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
|
||||
S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET);
|
||||
} else {
|
||||
cp_perfmon_cntl = S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET);
|
||||
}
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, cp_perfmon_cntl);
|
||||
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
|
||||
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET));
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
static void
|
||||
radv_perfcounter_emit_start(struct radv_cmd_stream *cs, bool is_spm)
|
||||
radv_perfcounter_emit_start(struct radv_cmd_stream *cs)
|
||||
{
|
||||
uint32_t cp_perfmon_cntl;
|
||||
|
||||
if (is_spm) {
|
||||
cp_perfmon_cntl = S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
|
||||
S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING);
|
||||
} else {
|
||||
cp_perfmon_cntl = S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING);
|
||||
}
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, cp_perfmon_cntl);
|
||||
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING));
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
static void
|
||||
radv_perfcounter_emit_stop(struct radv_cmd_stream *cs, bool is_spm)
|
||||
radv_perfcounter_emit_stop(struct radv_cmd_stream *cs)
|
||||
{
|
||||
uint32_t cp_perfmon_cntl;
|
||||
|
||||
if (is_spm) {
|
||||
cp_perfmon_cntl = S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
|
||||
S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_STOP_COUNTING);
|
||||
} else {
|
||||
cp_perfmon_cntl =
|
||||
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) | S_036020_PERFMON_SAMPLE_ENABLE(1);
|
||||
}
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, cp_perfmon_cntl);
|
||||
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) |
|
||||
S_036020_PERFMON_SAMPLE_ENABLE(1));
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
void
|
||||
radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs)
|
||||
{
|
||||
/* Start SPM counters. */
|
||||
radv_perfcounter_emit_start(cs, true);
|
||||
|
||||
radv_emit_windowed_counters(device, cs, true);
|
||||
}
|
||||
|
||||
void
|
||||
radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
radv_emit_windowed_counters(device, cs, false);
|
||||
|
||||
/* Stop SPM counters. */
|
||||
if (pdev->info.never_stop_sq_perf_counters) {
|
||||
radv_perfcounter_emit_start(cs, true);
|
||||
} else {
|
||||
radv_perfcounter_emit_stop(cs, true);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
radv_perfcounter_emit_sample(struct radv_cmd_stream *cs)
|
||||
{
|
||||
|
|
@ -619,7 +569,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query
|
|||
radv_pc_wait_idle(cmd_buffer);
|
||||
radv_emit_instance(cmd_buffer, -1, -1);
|
||||
radv_emit_windowed_counters(device, cs, false);
|
||||
radv_perfcounter_emit_stop(cs, false);
|
||||
radv_perfcounter_emit_stop(cs);
|
||||
|
||||
for (unsigned pass = 0; pass < pool->num_passes; ++pass) {
|
||||
uint64_t pred_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET + 8 * pass;
|
||||
|
|
@ -683,7 +633,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
|
|||
ac_emit_cp_write_data_imm(cs->b, V_370_ME, perf_ctr_va, 0);
|
||||
|
||||
radv_pc_wait_idle(cmd_buffer);
|
||||
radv_perfcounter_emit_reset(cs, false);
|
||||
radv_perfcounter_emit_reset(cs);
|
||||
ac_emit_cp_inhibit_clockgating(cs->b, pdev->info.gfx_level, true);
|
||||
ac_emit_cp_spi_config_cntl(cs->b, pdev->info.gfx_level, true);
|
||||
radv_perfcounter_emit_shaders(device, cs, 0x7f);
|
||||
|
|
@ -720,7 +670,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
|
|||
/* The following sequence actually starts the perfcounters. */
|
||||
|
||||
radv_pc_stop_and_sample(cmd_buffer, pool, va, false);
|
||||
radv_perfcounter_emit_start(cs, false);
|
||||
radv_perfcounter_emit_start(cs);
|
||||
radv_emit_windowed_counters(device, cs, true);
|
||||
|
||||
assert(cs->b->cdw <= cdw_max);
|
||||
|
|
@ -750,7 +700,7 @@ radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool
|
|||
|
||||
radv_pc_wait_idle(cmd_buffer);
|
||||
radv_pc_stop_and_sample(cmd_buffer, pool, va, true);
|
||||
radv_perfcounter_emit_reset(cs, false);
|
||||
radv_perfcounter_emit_reset(cs);
|
||||
ac_emit_cp_spi_config_cntl(cs->b, pdev->info.gfx_level, false);
|
||||
ac_emit_cp_inhibit_clockgating(cs->b, pdev->info.gfx_level, false);
|
||||
|
||||
|
|
|
|||
|
|
@ -32,12 +32,6 @@ struct radv_pc_query_pool {
|
|||
|
||||
void radv_perfcounter_emit_shaders(struct radv_device *device, struct radv_cmd_stream *cs, unsigned shaders);
|
||||
|
||||
void radv_perfcounter_emit_reset(struct radv_cmd_stream *cs, bool is_spm);
|
||||
|
||||
void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radv_cmd_stream *cs);
|
||||
|
||||
void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radv_cmd_stream *cs);
|
||||
|
||||
void radv_pc_deinit_query_pool(struct radv_pc_query_pool *pool);
|
||||
|
||||
VkResult radv_pc_init_query_pool(struct radv_physical_device *pdev, const VkQueryPoolCreateInfo *pCreateInfo,
|
||||
|
|
|
|||
|
|
@ -499,7 +499,7 @@ radv_begin_sqtt(struct radv_queue *queue)
|
|||
ac_emit_cp_spi_config_cntl(cs.b, pdev->info.gfx_level, true);
|
||||
|
||||
if (device->spm.bo) {
|
||||
radv_perfcounter_emit_reset(&cs, true);
|
||||
ac_emit_spm_reset(cs.b);
|
||||
|
||||
/* Enable all shader stages by default. */
|
||||
radv_perfcounter_emit_shaders(device, &cs, ac_sqtt_get_shader_mask(&pdev->info));
|
||||
|
|
@ -512,7 +512,7 @@ radv_begin_sqtt(struct radv_queue *queue)
|
|||
|
||||
if (device->spm.bo) {
|
||||
radeon_check_space(ws, cs.b, 8);
|
||||
radv_perfcounter_emit_spm_start(device, &cs);
|
||||
ac_emit_spm_start(cs.b, cs.hw_ip);
|
||||
}
|
||||
|
||||
result = ws->cs_finalize(cs.b);
|
||||
|
|
@ -574,14 +574,14 @@ radv_end_sqtt(struct radv_queue *queue)
|
|||
|
||||
if (device->spm.bo) {
|
||||
radeon_check_space(ws, cs.b, 8);
|
||||
radv_perfcounter_emit_spm_stop(device, &cs);
|
||||
ac_emit_spm_stop(cs.b, cs.hw_ip, &pdev->info);
|
||||
}
|
||||
|
||||
/* Stop SQTT. */
|
||||
radv_emit_sqtt_stop(device, &cs);
|
||||
|
||||
if (device->spm.bo)
|
||||
radv_perfcounter_emit_reset(&cs, true);
|
||||
ac_emit_spm_reset(cs.b);
|
||||
|
||||
/* Restore previous state by disabling SQG events. */
|
||||
ac_emit_cp_spi_config_cntl(cs.b, pdev->info.gfx_level, false);
|
||||
|
|
|
|||
|
|
@ -174,52 +174,6 @@ static void si_pc_emit_stop(struct si_context *sctx, struct si_resource *buffer,
|
|||
radeon_end();
|
||||
}
|
||||
|
||||
void si_pc_emit_spm_start(struct radeon_cmdbuf *cs)
|
||||
{
|
||||
radeon_begin(cs);
|
||||
|
||||
/* Start SPM counters. */
|
||||
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
|
||||
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
|
||||
S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING));
|
||||
/* Start windowed performance counters. */
|
||||
radeon_event_write(V_028A90_PERFCOUNTER_START);
|
||||
radeon_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(1));
|
||||
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
void si_pc_emit_spm_stop(struct radeon_cmdbuf *cs, bool never_stop_sq_perf_counters,
|
||||
bool never_send_perfcounter_stop)
|
||||
{
|
||||
radeon_begin(cs);
|
||||
|
||||
/* Stop windowed performance counters. */
|
||||
if (!never_send_perfcounter_stop)
|
||||
radeon_event_write(V_028A90_PERFCOUNTER_STOP);
|
||||
|
||||
radeon_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(0));
|
||||
|
||||
/* Stop SPM counters. */
|
||||
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
|
||||
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
|
||||
S_036020_SPM_PERFMON_STATE(never_stop_sq_perf_counters ?
|
||||
V_036020_STRM_PERFMON_STATE_START_COUNTING :
|
||||
V_036020_STRM_PERFMON_STATE_STOP_COUNTING));
|
||||
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
void si_pc_emit_spm_reset(struct radeon_cmdbuf *cs)
|
||||
{
|
||||
radeon_begin(cs);
|
||||
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
|
||||
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
|
||||
S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET));
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
|
||||
static void si_pc_emit_read(struct si_context *sctx, struct ac_pc_block *block, unsigned count,
|
||||
uint64_t va)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1655,10 +1655,6 @@ void si_init_perfcounters(struct si_screen *screen);
|
|||
void si_destroy_perfcounters(struct si_screen *screen);
|
||||
void si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, bool inhibit);
|
||||
void si_pc_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders);
|
||||
void si_pc_emit_spm_start(struct radeon_cmdbuf *cs);
|
||||
void si_pc_emit_spm_stop(struct radeon_cmdbuf *cs, bool never_stop_sq_perf_counters,
|
||||
bool never_send_perfcounter_stop);
|
||||
void si_pc_emit_spm_reset(struct radeon_cmdbuf *cs);
|
||||
void si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs);
|
||||
bool si_spm_init(struct si_context *sctx);
|
||||
void si_spm_finish(struct si_context *sctx);
|
||||
|
|
|
|||
|
|
@ -141,7 +141,7 @@ static void si_sqtt_start(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
si_emit_spi_config_cntl(sctx, cs, true);
|
||||
|
||||
if (sctx->spm.bo) {
|
||||
si_pc_emit_spm_reset(cs);
|
||||
ac_emit_spm_reset(&cs->current);
|
||||
si_pc_emit_shaders(cs, ac_sqtt_get_shader_mask(&sctx->screen->info));
|
||||
si_emit_spm_setup(sctx, cs);
|
||||
}
|
||||
|
|
@ -149,7 +149,7 @@ static void si_sqtt_start(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
si_emit_sqtt_start(sctx, cs, ip_type);
|
||||
|
||||
if (sctx->spm.bo)
|
||||
si_pc_emit_spm_start(cs);
|
||||
ac_emit_spm_start(&cs->current, AMD_IP_GFX);
|
||||
}
|
||||
|
||||
static void si_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
||||
|
|
@ -185,8 +185,7 @@ static void si_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
si_cp_dma_wait_for_idle(sctx, cs);
|
||||
|
||||
if (sctx->spm.bo)
|
||||
si_pc_emit_spm_stop(cs, sctx->screen->info.never_stop_sq_perf_counters,
|
||||
sctx->screen->info.never_send_perfcounter_stop);
|
||||
ac_emit_spm_stop(&cs->current, AMD_IP_GFX, &sctx->screen->info);
|
||||
|
||||
/* Make sure to wait-for-idle before stopping SQTT. */
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS |
|
||||
|
|
@ -198,7 +197,7 @@ static void si_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
si_emit_sqtt_stop(sctx, cs, ip_type);
|
||||
|
||||
if (sctx->spm.bo)
|
||||
si_pc_emit_spm_reset(cs);
|
||||
ac_emit_spm_reset(&cs->current);
|
||||
|
||||
/* Restore previous state by disabling SQG events. */
|
||||
si_emit_spi_config_cntl(sctx, cs, false);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue