radeonsi/sqtt: enable spm counters on gfx10+

Tested on:
  - navi10: L0 cache counter doesn't work (always 0)
  - sienna_cichlid: L0 doesn't work (always 0) and L1 isn't visible in RGP

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15646>
This commit is contained in:
Pierre-Eric Pelloux-Prayer 2022-03-29 14:37:01 +02:00
parent e5e8b56236
commit c96ee36289

View file

@ -408,7 +408,15 @@ si_thread_trace_start(struct si_context *sctx, int family, struct radeon_cmdbuf
/* Enable SQG events that collects thread trace data. */
si_emit_spi_config_cntl(sctx, cs, true);
si_pc_emit_spm_reset(cs);
si_pc_emit_shaders(cs, 0x7f);
si_emit_spm_setup(sctx, cs);
si_emit_thread_trace_start(sctx, cs, family);
si_pc_emit_spm_start(cs);
}
static void
@ -443,6 +451,8 @@ si_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf *
si_cp_dma_wait_for_idle(sctx, cs);
si_pc_emit_spm_stop(cs, sctx->screen->info.never_stop_sq_perf_counters);
/* Make sure to wait-for-idle before stopping SQTT. */
sctx->flags |=
SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
@ -452,6 +462,8 @@ si_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf *
si_emit_thread_trace_stop(sctx, cs, family);
si_pc_emit_spm_reset(cs);
/* Restore previous state by disabling SQG events. */
si_emit_spi_config_cntl(sctx, cs, false);
@ -616,6 +628,12 @@ si_init_thread_trace(struct si_context *sctx)
list_inithead(&sctx->thread_trace->rgp_code_object.record);
simple_mtx_init(&sctx->thread_trace->rgp_code_object.lock, mtx_plain);
if (sctx->chip_class >= GFX10) {
/* Limit SPM counters to GFX10+ for now */
ASSERTED bool r = si_spm_init(sctx);
assert(r);
}
si_thread_trace_init_cs(sctx);
sctx->sqtt_next_event = EventInvalid;
@ -670,6 +688,9 @@ si_destroy_thread_trace(struct si_context *sctx)
free(sctx->thread_trace);
sctx->thread_trace = NULL;
if (sctx->chip_class >= GFX10)
si_spm_finish(sctx);
}
static uint64_t num_frames = 0;
@ -720,7 +741,15 @@ si_handle_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs)
/* Wait for SQTT to finish and read back the bo */
if (sctx->ws->fence_wait(sctx->ws, sctx->last_sqtt_fence, PIPE_TIMEOUT_INFINITE) &&
si_get_thread_trace(sctx, &thread_trace)) {
/* Map the SPM counter buffer */
if (sctx->chip_class >= GFX10)
sctx->spm_trace.ptr = sctx->ws->buffer_map(sctx->ws, sctx->spm_trace.bo,
NULL, PIPE_MAP_READ | RADEON_MAP_TEMPORARY);
ac_dump_rgp_capture(&sctx->screen->info, &thread_trace, &sctx->spm_trace);
if (sctx->spm_trace.ptr)
sctx->ws->buffer_unmap(sctx->ws, sctx->spm_trace.bo);
} else {
fprintf(stderr, "Failed to read the trace\n");
}