From 1b4963e3820a568ac321122095c2fdf610d248ec Mon Sep 17 00:00:00 2001 From: Michael Cheng Date: Wed, 6 May 2026 16:59:54 -0700 Subject: [PATCH 1/4] intel/ds: Allow CREATE_DUAL_EVENT_CALLBACK to take optional name args Refactor CREATE_DUAL_EVENT_CALLBACK to accept optional variadic arguments for dynamic event-name formatting in follow-up patches. Add a small helper that formats event names only when a non-NULL format string is provided, and keep existing call sites unchanged so behavior is identical in this commit. Signed-off-by: Michael Cheng --- src/intel/ds/intel_driver_ds.cc | 38 +++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/src/intel/ds/intel_driver_ds.cc b/src/intel/ds/intel_driver_ds.cc index 75438d4d2f4..9bfa637baf0 100644 --- a/src/intel/ds/intel_driver_ds.cc +++ b/src/intel/ds/intel_driver_ds.cc @@ -23,6 +23,7 @@ #include #include +#include #include "common/intel_gem.h" #include "perf/intel_perf.h" @@ -264,6 +265,13 @@ end_event(struct intel_ds_queue *queue, uint64_t ts_ns, struct intel_ds_stage *stage = &queue->stages[stage_id]; uint64_t start_ns = stage->start_ns[level]; + /* Deep copy app_event before the [=] lambda capture. The raw pointer + * points to a stack-local char buffer in the caller, which is invalid + * by the time the Trace() continuation executes. + */ + bool has_app_event = app_event != NULL; + std::string app_event_copy = has_app_event ? std::string(app_event) : std::string(); + if (!start_ns) return; @@ -278,8 +286,8 @@ end_event(struct intel_ds_queue *queue, uint64_t ts_ns, * stage_iid if not already seen. Otherwise, it's a driver event and we * have use the internal stage_iid. */ - uint64_t stage_iid = app_event ? - tctx.GetDataSourceLocked()->debug_marker_stage(tctx, app_event) : + uint64_t stage_iid = has_app_event ? + tctx.GetDataSourceLocked()->debug_marker_stage(tctx, app_event_copy.c_str()) : device->tracepoint_iids[tracepoint_idx]; auto packet = tctx.NewTracePacket(); @@ -432,7 +440,24 @@ extern "C" { * collected. */ -#define CREATE_DUAL_EVENT_CALLBACK(event_name, stage) \ +static const char * +intel_ds_format_event_name(char *buf, size_t buf_size, const char *fmt, ...) +{ + if (fmt == NULL) + return NULL; + + va_list ap; + va_start(ap, fmt); + vsnprintf(buf, buf_size, fmt, ap); + va_end(ap); + + return buf; +} + +#define CREATE_DUAL_EVENT_CALLBACK(event_name, stage, ...) \ + CREATE_DUAL_EVENT_CALLBACK_IMPL(event_name, stage, ##__VA_ARGS__, NULL) + +#define CREATE_DUAL_EVENT_CALLBACK_IMPL(event_name, stage, name_fmt, ...) \ void \ intel_ds_begin_##event_name(struct intel_ds_device *device, \ uint64_t ts_ns, \ @@ -456,8 +481,13 @@ extern "C" { { \ const struct intel_ds_flush_data *flush = \ (const struct intel_ds_flush_data *) flush_data; \ + char event_name[64]; \ + UNUSED const uint32_t *indirect = \ + (const uint32_t *) indirect_data; \ + const char *name = intel_ds_format_event_name( \ + event_name, sizeof(event_name), (name_fmt), ##__VA_ARGS__); \ end_event(flush->queue, ts_ns, stage, flush->submission_id, \ - tp_idx, NULL, payload, indirect_data, \ + tp_idx, name, payload, indirect_data, \ (trace_payload_as_extra_func) \ &trace_payload_as_extra_intel_end_##event_name); \ } \ From 6373bb0fafd1c8c2aec151b953d29b0aa536dd97 Mon Sep 17 00:00:00 2001 From: Michael Cheng Date: Wed, 6 May 2026 17:04:22 -0700 Subject: [PATCH 2/4] intel/ds: Label compute events with dispatch dimensions Format compute Perfetto events as compute(x,y,z) using end-payload group dimensions. For compute_indirect, the dispatch dimensions live in GPU memory at command record time (VkDispatchIndirectCommand). The u_trace framework copies them into a trace-local buffer at trace flush time via the is_indirect mechanism: indirect args are passed as u_trace_address (GPU address + bo), and u_trace copies the pointed-to struct into indirect_data alongside the payload. The end callback receives this as a const uint32_t* indirect pointer, which contains the x/y/z dispatch sizes read back from the GPU. Include these dimensions when indirect tracing is enabled (MESA_GPU_TRACES=indirects), otherwise fall back to the static name. Signed-off-by: Michael Cheng --- src/intel/ds/intel_driver_ds.cc | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/intel/ds/intel_driver_ds.cc b/src/intel/ds/intel_driver_ds.cc index 9bfa637baf0..4052f5b3d2d 100644 --- a/src/intel/ds/intel_driver_ds.cc +++ b/src/intel/ds/intel_driver_ds.cc @@ -484,8 +484,11 @@ intel_ds_format_event_name(char *buf, size_t buf_size, const char *fmt, ...) char event_name[64]; \ UNUSED const uint32_t *indirect = \ (const uint32_t *) indirect_data; \ - const char *name = intel_ds_format_event_name( \ - event_name, sizeof(event_name), (name_fmt), ##__VA_ARGS__); \ + const char *name = NULL; \ + if ((name_fmt) != NULL) { \ + name = intel_ds_format_event_name(event_name, sizeof(event_name), \ + (name_fmt), ##__VA_ARGS__); \ + } \ end_event(flush->queue, ts_ns, stage, flush->submission_id, \ tp_idx, name, payload, indirect_data, \ (trace_payload_as_extra_func) \ @@ -512,8 +515,16 @@ CREATE_DUAL_EVENT_CALLBACK(draw_mesh, INTEL_DS_QUEUE_STAGE_DRAW_MESH) CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect, INTEL_DS_QUEUE_STAGE_DRAW_MESH) CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW_MESH) CREATE_DUAL_EVENT_CALLBACK(xfb, INTEL_DS_QUEUE_STAGE_CMD_BUFFER) -CREATE_DUAL_EVENT_CALLBACK(compute, INTEL_DS_QUEUE_STAGE_COMPUTE) -CREATE_DUAL_EVENT_CALLBACK(compute_indirect, INTEL_DS_QUEUE_STAGE_COMPUTE) +CREATE_DUAL_EVENT_CALLBACK(compute, INTEL_DS_QUEUE_STAGE_COMPUTE, + "compute(%u,%u,%u)", + payload->group_x, payload->group_y, payload->group_z) +CREATE_DUAL_EVENT_CALLBACK(compute_indirect, INTEL_DS_QUEUE_STAGE_COMPUTE, + ((p_atomic_read_relaxed(&device->trace_context.enabled_traces) & + U_TRACE_TYPE_INDIRECTS) && indirect) ? + "compute_indirect(%u,%u,%u)" : "compute_indirect", + indirect ? indirect[0] : 0, + indirect ? indirect[1] : 0, + indirect ? indirect[2] : 0) CREATE_DUAL_EVENT_CALLBACK(generate_draws, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS) CREATE_DUAL_EVENT_CALLBACK(generate_cmds_pre, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS) CREATE_DUAL_EVENT_CALLBACK(generate_cmds_post, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS) From 4510b8eb38163aff3e74133455d3156050c1cd06 Mon Sep 17 00:00:00 2001 From: Michael Cheng Date: Wed, 6 May 2026 17:15:55 -0700 Subject: [PATCH 3/4] intel/ds: Label draw events with vertex/draw counts Format draw and draw_indexed Perfetto events with their vertex count. For draw_indirect and draw_indexed_indirect, include the draw count when indirect tracing is enabled (MESA_GPU_TRACES=indirects), otherwise fall back to the static name. Signed-off-by: Michael Cheng --- src/intel/ds/intel_driver_ds.cc | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/intel/ds/intel_driver_ds.cc b/src/intel/ds/intel_driver_ds.cc index 4052f5b3d2d..40cf53dad2e 100644 --- a/src/intel/ds/intel_driver_ds.cc +++ b/src/intel/ds/intel_driver_ds.cc @@ -502,12 +502,22 @@ CREATE_DUAL_EVENT_CALLBACK(sba, INTEL_DS_QUEUE_STAGE_CMD_BUFFER) CREATE_DUAL_EVENT_CALLBACK(btp, INTEL_DS_QUEUE_STAGE_CMD_BUFFER) CREATE_DUAL_EVENT_CALLBACK(render_pass, INTEL_DS_QUEUE_STAGE_RENDER_PASS) CREATE_DUAL_EVENT_CALLBACK(blorp, INTEL_DS_QUEUE_STAGE_BLORP) -CREATE_DUAL_EVENT_CALLBACK(draw, INTEL_DS_QUEUE_STAGE_DRAW) -CREATE_DUAL_EVENT_CALLBACK(draw_indexed, INTEL_DS_QUEUE_STAGE_DRAW) +CREATE_DUAL_EVENT_CALLBACK(draw, INTEL_DS_QUEUE_STAGE_DRAW, + "draw(%u)", payload->count) +CREATE_DUAL_EVENT_CALLBACK(draw_indexed, INTEL_DS_QUEUE_STAGE_DRAW, + "draw_indexed(%u)", payload->count) CREATE_DUAL_EVENT_CALLBACK(draw_indexed_multi, INTEL_DS_QUEUE_STAGE_DRAW) -CREATE_DUAL_EVENT_CALLBACK(draw_indexed_indirect, INTEL_DS_QUEUE_STAGE_DRAW) +CREATE_DUAL_EVENT_CALLBACK(draw_indexed_indirect, INTEL_DS_QUEUE_STAGE_DRAW, + (p_atomic_read_relaxed(&device->trace_context.enabled_traces) & + U_TRACE_TYPE_INDIRECTS) ? + "draw_indexed_indirect(%u)" : "draw_indexed_indirect", + payload->draw_count) CREATE_DUAL_EVENT_CALLBACK(draw_multi, INTEL_DS_QUEUE_STAGE_DRAW) -CREATE_DUAL_EVENT_CALLBACK(draw_indirect, INTEL_DS_QUEUE_STAGE_DRAW) +CREATE_DUAL_EVENT_CALLBACK(draw_indirect, INTEL_DS_QUEUE_STAGE_DRAW, + (p_atomic_read_relaxed(&device->trace_context.enabled_traces) & + U_TRACE_TYPE_INDIRECTS) ? + "draw_indirect(%u)" : "draw_indirect", + payload->draw_count) CREATE_DUAL_EVENT_CALLBACK(draw_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW) CREATE_DUAL_EVENT_CALLBACK(draw_indirect_byte_count, INTEL_DS_QUEUE_STAGE_DRAW) CREATE_DUAL_EVENT_CALLBACK(draw_indexed_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW) From cb368f43e830d9cd119cb95d483d7289177b8764 Mon Sep 17 00:00:00 2001 From: Michael Cheng Date: Thu, 7 May 2026 15:04:43 -0700 Subject: [PATCH 4/4] intel/ds: Drop trailing 1s from compute dispatch event names Shorten compute* event names by omitting trailing dimensions that are 1. For example, compute(128,1,1) becomes compute(128) and compute_indirect(64,1,1) becomes compute_indirect(64). This makes traces easier to read and reduces trace storage slightly. Signed-off-by: Michael Cheng --- src/intel/ds/intel_driver_ds.cc | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/intel/ds/intel_driver_ds.cc b/src/intel/ds/intel_driver_ds.cc index 40cf53dad2e..d4b18facc34 100644 --- a/src/intel/ds/intel_driver_ds.cc +++ b/src/intel/ds/intel_driver_ds.cc @@ -526,12 +526,17 @@ CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect, INTEL_DS_QUEUE_STAGE_DRAW_MESH) CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW_MESH) CREATE_DUAL_EVENT_CALLBACK(xfb, INTEL_DS_QUEUE_STAGE_CMD_BUFFER) CREATE_DUAL_EVENT_CALLBACK(compute, INTEL_DS_QUEUE_STAGE_COMPUTE, - "compute(%u,%u,%u)", + payload->group_z != 1 ? "compute(%u,%u,%u)" : + payload->group_y != 1 ? "compute(%u,%u)" : + "compute(%u)", payload->group_x, payload->group_y, payload->group_z) CREATE_DUAL_EVENT_CALLBACK(compute_indirect, INTEL_DS_QUEUE_STAGE_COMPUTE, - ((p_atomic_read_relaxed(&device->trace_context.enabled_traces) & - U_TRACE_TYPE_INDIRECTS) && indirect) ? - "compute_indirect(%u,%u,%u)" : "compute_indirect", + ((p_atomic_read_relaxed(&device->trace_context.enabled_traces) & + U_TRACE_TYPE_INDIRECTS) && indirect) ? + (indirect[2] != 1 ? "compute_indirect(%u,%u,%u)" : + indirect[1] != 1 ? "compute_indirect(%u,%u)" : + "compute_indirect(%u)") : + "compute_indirect", indirect ? indirect[0] : 0, indirect ? indirect[1] : 0, indirect ? indirect[2] : 0)