From b82e5c8da8874b7eafde60ad33794aa6db34b2b8 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Fri, 31 May 2024 08:17:31 +0200 Subject: [PATCH] ac,radv,radeonsi: add more parameters to ac_sqtt Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_sqtt.c | 4 ++-- src/amd/common/ac_sqtt.h | 4 +++- src/amd/vulkan/radv_sqtt.c | 15 ++++++++------- src/gallium/drivers/radeonsi/si_sqtt.c | 11 ++++++----- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/amd/common/ac_sqtt.c b/src/amd/common/ac_sqtt.c index 5d36e5f8b38..11210ff0990 100644 --- a/src/amd/common/ac_sqtt.c +++ b/src/amd/common/ac_sqtt.c @@ -38,10 +38,10 @@ ac_sqtt_get_info_va(uint64_t va, unsigned se) } uint64_t -ac_sqtt_get_data_va(const struct radeon_info *rad_info, const struct ac_sqtt *data, uint64_t va, +ac_sqtt_get_data_va(const struct radeon_info *rad_info, const struct ac_sqtt *data, unsigned se) { - return va + ac_sqtt_get_data_offset(rad_info, data, se); + return data->buffer_va + ac_sqtt_get_data_offset(rad_info, data, se); } void diff --git a/src/amd/common/ac_sqtt.h b/src/amd/common/ac_sqtt.h index 1f0414a65cd..4403793ad22 100644 --- a/src/amd/common/ac_sqtt.h +++ b/src/amd/common/ac_sqtt.h @@ -35,10 +35,12 @@ struct ac_sqtt { struct radeon_cmdbuf *stop_cs[2]; /* struct radeon_winsys_bo or struct pb_buffer */ void *bo; + uint64_t buffer_va; void *ptr; uint32_t buffer_size; int start_frame; char *trigger_file; + bool instruction_timing_enabled; uint32_t cmdbuf_ids_per_queue[AMD_NUM_IP_TYPES]; @@ -93,7 +95,7 @@ uint64_t ac_sqtt_get_data_offset(const struct radeon_info *rad_info, const struc uint64_t ac_sqtt_get_info_va(uint64_t va, unsigned se); uint64_t ac_sqtt_get_data_va(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt, - uint64_t va, unsigned se); + unsigned se); void ac_sqtt_init(struct ac_sqtt *data); diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index baf971292a0..8e9c541fd39 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -74,8 +74,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, radeon_check_space(device->ws, cs, 6 + max_se * 33); for (unsigned se = 0; se < max_se; se++) { - uint64_t va = radv_buffer_get_va(device->sqtt.bo); - uint64_t data_va = ac_sqtt_get_data_va(gpu_info, &device->sqtt, va, se); + uint64_t data_va = ac_sqtt_get_data_va(gpu_info, &device->sqtt, se); uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT; int active_cu = ac_sqtt_get_active_cu(&pdev->info, se); @@ -104,7 +103,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, /* Performance counters with SQTT are considered deprecated. */ uint32_t token_exclude = V_0367B8_TOKEN_EXCLUDE_PERF; - if (!radv_is_instruction_timing_enabled()) { + if (!device->sqtt.instruction_timing_enabled) { /* Reduce SQTT traffic when instruction timing isn't enabled. */ token_exclude |= V_0367B8_TOKEN_EXCLUDE_VMEMEXEC | V_0367B8_TOKEN_EXCLUDE_ALUEXEC | V_0367B8_TOKEN_EXCLUDE_VALUINST | V_0367B8_TOKEN_EXCLUDE_IMMEDIATE | @@ -136,7 +135,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, /* Performance counters with SQTT are considered deprecated. */ uint32_t token_exclude = V_008D18_TOKEN_EXCLUDE_PERF; - if (!radv_is_instruction_timing_enabled()) { + if (!device->sqtt.instruction_timing_enabled) { /* Reduce SQTT traffic when instruction timing isn't enabled. */ token_exclude |= V_008D18_TOKEN_EXCLUDE_VMEMEXEC | V_008D18_TOKEN_EXCLUDE_ALUEXEC | V_008D18_TOKEN_EXCLUDE_VALUINST | V_008D18_TOKEN_EXCLUDE_IMMEDIATE | @@ -256,8 +255,7 @@ radv_copy_sqtt_info_regs(const struct radv_device *device, struct radeon_cmdbuf } /* Get the VA where the info struct is stored for this SE. */ - uint64_t va = radv_buffer_get_va(device->sqtt.bo); - uint64_t info_va = ac_sqtt_get_info_va(va, se_index); + uint64_t info_va = ac_sqtt_get_info_va(device->sqtt.buffer_va, se_index); /* Copy back the info struct one DWORD at a time. */ for (unsigned i = 0; i < 3; i++) { @@ -278,7 +276,7 @@ radv_copy_sqtt_info_regs(const struct radv_device *device, struct radeon_cmdbuf * 2) shift right by 5 bits because SQ_THREAD_TRACE_WPTR is 32-byte aligned * 3) mask off the higher 3 bits because WPTR.OFFSET is 29 bits */ - uint64_t data_va = ac_sqtt_get_data_va(&pdev->info, &device->sqtt, va, se_index); + uint64_t data_va = ac_sqtt_get_data_va(&pdev->info, &device->sqtt, se_index); uint64_t shifted_data_va = (data_va >> 5); uint32_t init_wptr_value = shifted_data_va & 0x1fffffff; @@ -627,6 +625,8 @@ radv_sqtt_init_bo(struct radv_device *device) if (!device->sqtt.ptr) return false; + device->sqtt.buffer_va = radv_buffer_get_va(device->sqtt.bo); + return true; } @@ -718,6 +718,7 @@ radv_sqtt_init(struct radv_device *device) /* Default buffer size set to 32MB per SE. */ device->sqtt.buffer_size = (uint32_t)debug_get_num_option("RADV_THREAD_TRACE_BUFFER_SIZE", 32 * 1024 * 1024); + device->sqtt.instruction_timing_enabled = radv_is_instruction_timing_enabled(); if (!radv_sqtt_init_bo(device)) return false; diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c b/src/gallium/drivers/radeonsi/si_sqtt.c index 69844824c49..5aef674c71e 100644 --- a/src/gallium/drivers/radeonsi/si_sqtt.c +++ b/src/gallium/drivers/radeonsi/si_sqtt.c @@ -43,6 +43,8 @@ static bool si_sqtt_init_bo(struct si_context *sctx) if (!sctx->sqtt->bo) return false; + sctx->sqtt->buffer_va = sctx->ws->buffer_get_virtual_address(sctx->sqtt->bo); + return true; } @@ -57,9 +59,8 @@ static void si_emit_sqtt_start(struct si_context *sctx, radeon_begin(cs); for (unsigned se = 0; se < max_se; se++) { - uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->sqtt->bo); uint64_t data_va = - ac_sqtt_get_data_va(&sctx->screen->info, sctx->sqtt, va, se); + ac_sqtt_get_data_va(&sctx->screen->info, sctx->sqtt, se); uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT; if (ac_sqtt_se_is_disabled(&sctx->screen->info, se)) @@ -265,8 +266,7 @@ static void si_copy_sqtt_info_regs(struct si_context *sctx, } /* Get the VA where the info struct is stored for this SE. */ - uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->sqtt->bo); - uint64_t info_va = ac_sqtt_get_info_va(va, se_index); + uint64_t info_va = ac_sqtt_get_info_va(sctx->sqtt->buffer_va, se_index); radeon_begin(cs); @@ -290,7 +290,7 @@ static void si_copy_sqtt_info_regs(struct si_context *sctx, * 3) mask off the higher 3 bits because WPTR.OFFSET is 29 bits */ uint64_t data_va = - ac_sqtt_get_data_va(&sctx->screen->info, sctx->sqtt, va, se_index); + ac_sqtt_get_data_va(&sctx->screen->info, sctx->sqtt, se_index); uint64_t shifted_data_va = (data_va >> 5); uint64_t init_wptr_value = shifted_data_va & 0x1fffffff; @@ -633,6 +633,7 @@ bool si_init_sqtt(struct si_context *sctx) /* Default buffer size set to 32MB per SE. */ sctx->sqtt->buffer_size = debug_get_num_option("AMD_THREAD_TRACE_BUFFER_SIZE", 32 * 1024) * 1024; + sctx->sqtt->instruction_timing_enabled = false; sctx->sqtt->start_frame = 10; const char *trigger = getenv("AMD_THREAD_TRACE_TRIGGER");