radv: fix capturing performance counters with SPM

Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/14333
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39462>
This commit is contained in:
Samuel Pitoiset 2025-12-22 18:15:07 +01:00 committed by Dylan Baker
parent dae41919b9
commit 25b6338eff
4 changed files with 32 additions and 9 deletions

View file

@ -7673,6 +7673,25 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi
if (radv_device_fault_detection_enabled(device))
radv_cmd_buffer_trace_emit(cmd_buffer);
if (radv_spm_trace_enabled(pdev) && (cmd_buffer->qf == RADV_QUEUE_GENERAL || cmd_buffer->qf == RADV_QUEUE_COMPUTE)) {
/* Force-enable windowed performance counters because the SQTT preamble is based on the queue
* family. That means that if it's presenting on compute, it won't enable windowed performance
* counters on graphics.
*
* On GFX12, this is required because this state seems cleared between command buffers and SPM
* counter values might be lost otherwise.
*/
struct radv_cmd_stream *cs = cmd_buffer->cs;
radeon_check_space(device->ws, cmd_buffer->cs->b, 5);
radeon_begin(cs);
if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
radeon_event_write(V_028A90_PERFCOUNTER_START);
radeon_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(1));
radeon_end();
}
radv_describe_begin_cmd_buffer(cmd_buffer);
return result;

View file

@ -69,13 +69,6 @@ typedef void *drmDevicePtr;
#include "ac_descriptors.h"
#include "ac_formats.h"
static bool
radv_spm_trace_enabled(const struct radv_instance *instance)
{
return (instance->vk.trace_mode & RADV_TRACE_MODE_RGP) &&
debug_get_bool_option("RADV_THREAD_TRACE_CACHE_COUNTERS", true);
}
static bool
radv_trap_handler_enabled()
{
@ -604,10 +597,10 @@ radv_device_init_rgp(struct radv_device *device)
"radv: Thread trace support is enabled (initial buffer size: %u MiB, "
"instruction timing: %s, cache counters: %s, queue events: %s).\n",
device->sqtt.buffer_size / (1024 * 1024), radv_is_instruction_timing_enabled() ? "enabled" : "disabled",
radv_spm_trace_enabled(instance) ? "enabled" : "disabled",
radv_spm_trace_enabled(pdev) ? "enabled" : "disabled",
radv_sqtt_queue_events_enabled() ? "enabled" : "disabled");
if (radv_spm_trace_enabled(instance)) {
if (radv_spm_trace_enabled(pdev)) {
if (pdev->info.gfx_level >= GFX10 && pdev->info.gfx_level < GFX11_5) {
if (!radv_spm_init(device))
return VK_ERROR_INITIALIZATION_FAILED;

View file

@ -72,6 +72,15 @@ radv_taskmesh_enabled(const struct radv_physical_device *pdev)
pdev->info.has_gang_submit;
}
bool
radv_spm_trace_enabled(const struct radv_physical_device *pdev)
{
const struct radv_instance *instance = radv_physical_device_instance(pdev);
return (instance->vk.trace_mode & RADV_TRACE_MODE_RGP) &&
debug_get_bool_option("RADV_THREAD_TRACE_CACHE_COUNTERS", true);
}
static bool
radv_transfer_queue_enabled(const struct radv_physical_device *pdev)
{

View file

@ -298,6 +298,8 @@ void radv_physical_device_destroy(struct vk_physical_device *vk_pdev);
bool radv_compute_queue_enabled(const struct radv_physical_device *pdev);
bool radv_spm_trace_enabled(const struct radv_physical_device *pdev);
static inline uint32_t
radv_get_sampled_image_desc_size(const struct radv_physical_device *pdev)
{