tu: Add tracepoints around draws, with shader sha1s.

Like for CSes, this lets you associate draw call times with the associated
shaders for pinpointing which pipelines are the most expensive.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35759>
This commit is contained in:
Emma Anholt 2025-05-30 10:08:57 -07:00 committed by Marge Bot
parent cf0828debb
commit 707c97f634
7 changed files with 59 additions and 0 deletions

View file

@ -6595,6 +6595,14 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
const struct tu_program_state *program = &cmd->state.program;
struct tu_render_pass_state *rp = &cmd->state.rp;
trace_start_draw(
&cmd->trace, &cmd->draw_cs, cmd, draw_count,
cmd->state.program.stage_sha1[MESA_SHADER_VERTEX],
cmd->state.program.stage_sha1[MESA_SHADER_TESS_CTRL],
cmd->state.program.stage_sha1[MESA_SHADER_TESS_EVAL],
cmd->state.program.stage_sha1[MESA_SHADER_GEOMETRY],
cmd->state.program.stage_sha1[MESA_SHADER_FRAGMENT]);
/* Emit state first, because it's needed for bandwidth calculations */
uint32_t dynamic_draw_state_dirty = 0;
if (!BITSET_IS_EMPTY(cmd->vk.dynamic_graphics_state.dirty) ||
@ -7035,6 +7043,8 @@ tu_CmdDraw(VkCommandBuffer commandBuffer,
tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_INDEX));
tu_cs_emit(cs, instanceCount);
tu_cs_emit(cs, vertexCount);
trace_end_draw(&cmd->trace, cs);
}
TU_GENX(tu_CmdDraw);
@ -7081,6 +7091,9 @@ tu_CmdDrawMultiEXT(VkCommandBuffer commandBuffer,
tu_cs_emit(cs, instanceCount);
tu_cs_emit(cs, draw->vertexCount);
}
if (i != 0)
trace_end_draw(&cmd->trace, cs);
}
TU_GENX(tu_CmdDrawMultiEXT);
@ -7107,6 +7120,8 @@ tu_CmdDrawIndexed(VkCommandBuffer commandBuffer,
tu_cs_emit(cs, firstIndex);
tu_cs_emit_qw(cs, cmd->state.index_va);
tu_cs_emit(cs, cmd->state.max_index_count);
trace_end_draw(&cmd->trace, cs);
}
TU_GENX(tu_CmdDrawIndexed);
@ -7158,6 +7173,9 @@ tu_CmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer,
tu_cs_emit_qw(cs, cmd->state.index_va);
tu_cs_emit(cs, cmd->state.max_index_count);
}
if (i != 0)
trace_end_draw(&cmd->trace, cs);
}
TU_GENX(tu_CmdDrawMultiIndexedEXT);
@ -7201,6 +7219,8 @@ tu_CmdDrawIndirect(VkCommandBuffer commandBuffer,
tu_cs_emit(cs, drawCount);
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, offset));
tu_cs_emit(cs, stride);
trace_end_draw(&cmd->trace, cs);
}
TU_GENX(tu_CmdDrawIndirect);
@ -7232,6 +7252,8 @@ tu_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
tu_cs_emit(cs, cmd->state.max_index_count);
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, offset));
tu_cs_emit(cs, stride);
trace_end_draw(&cmd->trace, cs);
}
TU_GENX(tu_CmdDrawIndexedIndirect);
@ -7269,6 +7291,8 @@ tu_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, offset));
tu_cs_emit_qw(cs, vk_buffer_address(&count_buf->vk, countBufferOffset));
tu_cs_emit(cs, stride);
trace_end_draw(&cmd->trace, cs);
}
TU_GENX(tu_CmdDrawIndirectCount);
@ -7303,6 +7327,8 @@ tu_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, offset));
tu_cs_emit_qw(cs, vk_buffer_address(&count_buf->vk, countBufferOffset));
tu_cs_emit(cs, stride);
trace_end_draw(&cmd->trace, cs);
}
TU_GENX(tu_CmdDrawIndexedIndirectCount);
@ -7345,6 +7371,8 @@ tu_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer,
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, counterBufferOffset));
tu_cs_emit(cs, counterOffset);
tu_cs_emit(cs, vertexStride);
trace_end_draw(&cmd->trace, cs);
}
TU_GENX(tu_CmdDrawIndirectByteCountEXT);

View file

@ -417,6 +417,8 @@ enum tu_suspend_resume_state
SR_IN_CHAIN_AFTER_PRE_CHAIN,
};
typedef char tu_sha1_str[SHA1_DIGEST_STRING_LENGTH];
struct tu_cmd_state
{
uint32_t dirty;
@ -547,6 +549,9 @@ struct tu_cmd_state
bool occlusion_query_may_be_running;
bool trace_draws_enabled;
enum tu_pipeline_type trace_draws_pipeline_type;
enum tu_suspend_resume_state suspend_resume;
bool suspending, resuming;

View file

@ -15,6 +15,7 @@
#include "util/perf/u_perfetto.h"
#include "util/perf/u_perfetto_renderpass.h"
#include "tu_cmd_buffer.h"
#include "tu_tracepoints.h"
#include "tu_tracepoints_perfetto.h"
#include "vk_object.h"
@ -54,6 +55,7 @@ enum tu_stage_id {
GMEM_STAGE_ID,
BYPASS_STAGE_ID,
BLIT_STAGE_ID,
DRAW_STAGE_ID,
COMPUTE_STAGE_ID,
CLEAR_SYSMEM_STAGE_ID,
CLEAR_GMEM_STAGE_ID,
@ -84,6 +86,7 @@ static const struct {
[GMEM_STAGE_ID] = { "GMEM", "Rendering to GMEM" },
[BYPASS_STAGE_ID] = { "Bypass", "Rendering to system memory" },
[BLIT_STAGE_ID] = { "Blit", "Performing a Blit operation" },
[DRAW_STAGE_ID] = { "Draw", "Performing a graphics-pipeline draw" },
[COMPUTE_STAGE_ID] = { "Compute", "Compute job" },
[CLEAR_SYSMEM_STAGE_ID] = { "Clear Sysmem", "" },
[CLEAR_GMEM_STAGE_ID] = { "Clear GMEM", "Per-tile (GMEM) clear" },
@ -525,6 +528,7 @@ CREATE_EVENT_CALLBACK(binning_ib, BINNING_STAGE_ID)
CREATE_EVENT_CALLBACK(draw_ib_gmem, GMEM_STAGE_ID)
CREATE_EVENT_CALLBACK(draw_ib_sysmem, BYPASS_STAGE_ID)
CREATE_EVENT_CALLBACK(blit, BLIT_STAGE_ID)
CREATE_EVENT_CALLBACK(draw, DRAW_STAGE_ID)
CREATE_EVENT_CALLBACK(compute, COMPUTE_STAGE_ID)
CREATE_EVENT_CALLBACK(compute_indirect, COMPUTE_STAGE_ID)
CREATE_EVENT_CALLBACK(generic_clear, GENERIC_CLEAR_STAGE_ID)

View file

@ -2230,6 +2230,11 @@ tu_emit_program_state(struct tu_cs *sub_cs,
shaders[stage]->dynamic_descriptor_sizes[i];
}
}
if (variants[stage]) {
memcpy(prog->stage_sha1[stage], variants[stage]->sha1_str,
sizeof(variants[stage]->sha1_str));
}
}
}

View file

@ -105,6 +105,8 @@ struct tu_program_state
struct tu_program_descriptor_linkage link[MESA_SHADER_STAGES];
char stage_sha1[MESA_SHADER_STAGES][SHA1_DIGEST_STRING_LENGTH];
unsigned dynamic_descriptor_offsets[MAX_SETS];
bool per_view_viewport;

View file

@ -120,6 +120,19 @@ begin_end_tp('render_pass',
Arg(type='int32_t', var='lrzWriteDisabledAtDraw', c_format='%d'),
Arg(type='uint32_t', var='lrzStatus', c_format='%s', to_prim_type='(fd_lrz_gpu_dir_to_str((enum fd_lrz_gpu_dir)({} & 0xff)))', is_indirect=True),])
begin_end_tp('draw',
[Arg(type='uint32_t', var='count', c_format='%u'),
Arg(type='tu_sha1_str', var='vs_sha1', c_format='%s',
copy_func='strcpy'),
Arg(type='tu_sha1_str', var='tcs_sha1', c_format='%s',
copy_func='strcpy'),
Arg(type='tu_sha1_str', var='tes_sha1', c_format='%s',
copy_func='strcpy'),
Arg(type='tu_sha1_str', var='gs_sha1', c_format='%s',
copy_func='strcpy'),
Arg(type='tu_sha1_str', var='fs_sha1', c_format='%s',
copy_func='strcpy'),
], tp_default_enabled=False)
begin_end_tp('binning_ib')
begin_end_tp('draw_ib_sysmem')

View file

@ -588,6 +588,8 @@ void __trace_${trace_name}(
% for arg in trace.tp_struct:
% if arg.copy_func is None:
__entry->${arg.name} = ${arg.var};
% elif arg.length_arg is None:
${arg.copy_func}(__entry->${arg.name}, ${arg.var});
% else:
${arg.copy_func}(__entry->${arg.name}, ${arg.var}, ${arg.length_arg});
% endif