From 4da2e971e649ebb018088a094aeb52726610522a Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 6 Jan 2026 16:14:27 +0100 Subject: [PATCH] radv/sqtt: rework radv_emit_sqtt_userdata() to support gang CS For task shaders. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/layers/radv_sqtt_layer.c | 24 +++++++-------- src/amd/vulkan/radv_sqtt.c | 41 ++++++++++++++++--------- src/amd/vulkan/radv_sqtt.h | 8 ++++- 3 files changed, 45 insertions(+), 28 deletions(-) diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c index 449771f5090..e1568d88796 100644 --- a/src/amd/vulkan/layers/radv_sqtt_layer.c +++ b/src/amd/vulkan/layers/radv_sqtt_layer.c @@ -169,7 +169,7 @@ radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API; marker.api_type = api_type; - radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); + radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS); } static void @@ -181,7 +181,7 @@ radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_s marker.api_type = api_type; marker.is_end = 1; - radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); + radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS); } static void @@ -208,7 +208,7 @@ radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker marker.instance_offset_reg_idx = instance_offset_user_data; marker.draw_index_reg_idx = draw_index_user_data; - radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); + radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS); } static void @@ -227,7 +227,7 @@ radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_s marker.thread_y = y; marker.thread_z = z; - radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); + radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS); } void @@ -245,7 +245,7 @@ radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_m marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT; marker.data_type = type; - radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); + radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS); } else { assert(str != NULL); unsigned len = strlen(str); @@ -259,7 +259,7 @@ radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_m memcpy(buffer, &marker, sizeof(marker)); memcpy(buffer + sizeof(marker), str, len); - radv_emit_sqtt_userdata(cmd_buffer, buffer, sizeof(marker) / 4 + marker.length / 4); + radv_emit_sqtt_userdata(cmd_buffer, buffer, sizeof(marker) / 4 + marker.length / 4, RADV_SQTT_USERDATA_MAIN_CS); } } @@ -292,7 +292,7 @@ radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) if (!radv_dedicated_sparse_queue_enabled(pdev)) marker.queue_flags |= VK_QUEUE_SPARSE_BINDING_BIT; - radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); + radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS); } void @@ -310,7 +310,7 @@ radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) marker.device_id_low = device_id; marker.device_id_high = device_id >> 32; - radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); + radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS); } static void @@ -442,7 +442,7 @@ radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1) marker.inval_gl1 = true; - radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); + radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS); cmd_buffer->state.num_layout_transitions = 0; } @@ -469,7 +469,7 @@ radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier marker.cb_id = cmd_buffer->sqtt_cb_id; marker.dword02 = reason; - radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); + radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS); } void @@ -503,7 +503,7 @@ radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand; marker.init_mask_ram = barrier->layout_transitions.init_mask_ram; - radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); + radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS); cmd_buffer->state.num_layout_transitions++; } @@ -543,7 +543,7 @@ radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPo marker.api_pso_hash[0] = pipeline->pipeline_hash; marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32; - radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); + radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS); } /* Queue events */ diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index b79a400df2e..ca4d1230527 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -98,13 +98,32 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radv_cmd_stream *cs ac_pm4_free_state(pm4); } +static void +radv_emit_sqtt_userdata_cs(const struct radv_device *device, struct radv_cmd_stream *cs, uint32_t count, + const uint32_t *dwords) +{ + const struct radv_physical_device *pdev = radv_device_physical(device); + + radeon_check_space(device->ws, cs->b, 2 + count); + radeon_begin(cs); + + /* Without the perfctr bit the CP might not always pass the + * write on correctly. */ + if (pdev->info.gfx_level >= GFX10) + radeon_set_uconfig_perfctr_reg_seq(pdev->info.gfx_level, cs->hw_ip, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count); + else + radeon_set_uconfig_reg_seq(R_030D08_SQ_THREAD_TRACE_USERDATA_2, count); + radeon_emit_array(dwords, count); + + radeon_end(); +} + void -radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords) +radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords, + enum radv_sqtt_userdata_flags flags) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - const struct radv_physical_device *pdev = radv_device_physical(device); const bool is_gfx_or_ace = cmd_buffer->qf == RADV_QUEUE_GENERAL || cmd_buffer->qf == RADV_QUEUE_COMPUTE; - const enum amd_gfx_level gfx_level = pdev->info.gfx_level; struct radv_cmd_stream *cs = cmd_buffer->cs; const uint32_t *dwords = (uint32_t *)data; @@ -115,18 +134,10 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da while (num_dwords > 0) { uint32_t count = MIN2(num_dwords, 2); - radeon_check_space(device->ws, cs->b, 2 + count); - radeon_begin(cs); - - /* Without the perfctr bit the CP might not always pass the - * write on correctly. */ - if (pdev->info.gfx_level >= GFX10) - radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count); - else - radeon_set_uconfig_reg_seq(R_030D08_SQ_THREAD_TRACE_USERDATA_2, count); - radeon_emit_array(dwords, count); - - radeon_end(); + if (flags & RADV_SQTT_USERDATA_MAIN_CS) + radv_emit_sqtt_userdata_cs(device, cs, count, dwords); + if (flags & RADV_SQTT_USERDATA_GANG_CS) + radv_emit_sqtt_userdata_cs(device, cmd_buffer->gang.cs, count, dwords); dwords += count; num_dwords -= count; diff --git a/src/amd/vulkan/radv_sqtt.h b/src/amd/vulkan/radv_sqtt.h index 38a7fccf39f..2be2a03580f 100644 --- a/src/amd/vulkan/radv_sqtt.h +++ b/src/amd/vulkan/radv_sqtt.h @@ -59,11 +59,17 @@ enum rgp_barrier_reason { RGP_BARRIER_INTERNAL_PRE_COPY_QUERY_POOL_RESULTS_SYNC = RGP_BARRIER_INTERNAL_BASE + 3 }; +enum radv_sqtt_userdata_flags { + RADV_SQTT_USERDATA_MAIN_CS = 1u << 0, + RADV_SQTT_USERDATA_GANG_CS = 1u << 1, +}; + bool radv_is_instruction_timing_enabled(void); bool radv_sqtt_queue_events_enabled(void); -void radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords); +void radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords, + enum radv_sqtt_userdata_flags flags); VkResult radv_sqtt_acquire_gpu_timestamp(struct radv_device *device, struct radeon_winsys_bo **gpu_timestamp_bo, uint32_t *gpu_timestamp_offset, void **gpu_timestamp_ptr);