radv/sqtt: rework radv_emit_sqtt_userdata() to support gang CS

For task shaders.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39178>
This commit is contained in:
Samuel Pitoiset 2026-01-06 16:14:27 +01:00
parent 8ed2447554
commit 4da2e971e6
3 changed files with 45 additions and 28 deletions

View file

@ -169,7 +169,7 @@ radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
marker.api_type = api_type;
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS);
}
static void
@ -181,7 +181,7 @@ radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_s
marker.api_type = api_type;
marker.is_end = 1;
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS);
}
static void
@ -208,7 +208,7 @@ radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker
marker.instance_offset_reg_idx = instance_offset_user_data;
marker.draw_index_reg_idx = draw_index_user_data;
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS);
}
static void
@ -227,7 +227,7 @@ radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_s
marker.thread_y = y;
marker.thread_z = z;
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS);
}
void
@ -245,7 +245,7 @@ radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_m
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
marker.data_type = type;
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS);
} else {
assert(str != NULL);
unsigned len = strlen(str);
@ -259,7 +259,7 @@ radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_m
memcpy(buffer, &marker, sizeof(marker));
memcpy(buffer + sizeof(marker), str, len);
radv_emit_sqtt_userdata(cmd_buffer, buffer, sizeof(marker) / 4 + marker.length / 4);
radv_emit_sqtt_userdata(cmd_buffer, buffer, sizeof(marker) / 4 + marker.length / 4, RADV_SQTT_USERDATA_MAIN_CS);
}
}
@ -292,7 +292,7 @@ radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
if (!radv_dedicated_sparse_queue_enabled(pdev))
marker.queue_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS);
}
void
@ -310,7 +310,7 @@ radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
marker.device_id_low = device_id;
marker.device_id_high = device_id >> 32;
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS);
}
static void
@ -442,7 +442,7 @@ radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer)
if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1)
marker.inval_gl1 = true;
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS);
cmd_buffer->state.num_layout_transitions = 0;
}
@ -469,7 +469,7 @@ radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier
marker.cb_id = cmd_buffer->sqtt_cb_id;
marker.dword02 = reason;
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS);
}
void
@ -503,7 +503,7 @@ radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct
marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand;
marker.init_mask_ram = barrier->layout_transitions.init_mask_ram;
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS);
cmd_buffer->state.num_layout_transitions++;
}
@ -543,7 +543,7 @@ radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPo
marker.api_pso_hash[0] = pipeline->pipeline_hash;
marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32;
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4, RADV_SQTT_USERDATA_MAIN_CS);
}
/* Queue events */

View file

@ -98,13 +98,32 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radv_cmd_stream *cs
ac_pm4_free_state(pm4);
}
static void
radv_emit_sqtt_userdata_cs(const struct radv_device *device, struct radv_cmd_stream *cs, uint32_t count,
const uint32_t *dwords)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
radeon_check_space(device->ws, cs->b, 2 + count);
radeon_begin(cs);
/* Without the perfctr bit the CP might not always pass the
* write on correctly. */
if (pdev->info.gfx_level >= GFX10)
radeon_set_uconfig_perfctr_reg_seq(pdev->info.gfx_level, cs->hw_ip, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
else
radeon_set_uconfig_reg_seq(R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
radeon_emit_array(dwords, count);
radeon_end();
}
void
radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords)
radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords,
enum radv_sqtt_userdata_flags flags)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
const bool is_gfx_or_ace = cmd_buffer->qf == RADV_QUEUE_GENERAL || cmd_buffer->qf == RADV_QUEUE_COMPUTE;
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
struct radv_cmd_stream *cs = cmd_buffer->cs;
const uint32_t *dwords = (uint32_t *)data;
@ -115,18 +134,10 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da
while (num_dwords > 0) {
uint32_t count = MIN2(num_dwords, 2);
radeon_check_space(device->ws, cs->b, 2 + count);
radeon_begin(cs);
/* Without the perfctr bit the CP might not always pass the
* write on correctly. */
if (pdev->info.gfx_level >= GFX10)
radeon_set_uconfig_perfctr_reg_seq(gfx_level, cs->hw_ip, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
else
radeon_set_uconfig_reg_seq(R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
radeon_emit_array(dwords, count);
radeon_end();
if (flags & RADV_SQTT_USERDATA_MAIN_CS)
radv_emit_sqtt_userdata_cs(device, cs, count, dwords);
if (flags & RADV_SQTT_USERDATA_GANG_CS)
radv_emit_sqtt_userdata_cs(device, cmd_buffer->gang.cs, count, dwords);
dwords += count;
num_dwords -= count;

View file

@ -59,11 +59,17 @@ enum rgp_barrier_reason {
RGP_BARRIER_INTERNAL_PRE_COPY_QUERY_POOL_RESULTS_SYNC = RGP_BARRIER_INTERNAL_BASE + 3
};
enum radv_sqtt_userdata_flags {
RADV_SQTT_USERDATA_MAIN_CS = 1u << 0,
RADV_SQTT_USERDATA_GANG_CS = 1u << 1,
};
bool radv_is_instruction_timing_enabled(void);
bool radv_sqtt_queue_events_enabled(void);
void radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords);
void radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords,
enum radv_sqtt_userdata_flags flags);
VkResult radv_sqtt_acquire_gpu_timestamp(struct radv_device *device, struct radeon_winsys_bo **gpu_timestamp_bo,
uint32_t *gpu_timestamp_offset, void **gpu_timestamp_ptr);