mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 16:00:08 +01:00
tu: Add tracepoints around draws, with shader sha1s.
Like for CSes, this lets you associate draw call times with the associated shaders for pinpointing which pipelines are the most expensive. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35759>
This commit is contained in:
parent
cf0828debb
commit
707c97f634
7 changed files with 59 additions and 0 deletions
|
|
@ -6595,6 +6595,14 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
||||||
const struct tu_program_state *program = &cmd->state.program;
|
const struct tu_program_state *program = &cmd->state.program;
|
||||||
struct tu_render_pass_state *rp = &cmd->state.rp;
|
struct tu_render_pass_state *rp = &cmd->state.rp;
|
||||||
|
|
||||||
|
trace_start_draw(
|
||||||
|
&cmd->trace, &cmd->draw_cs, cmd, draw_count,
|
||||||
|
cmd->state.program.stage_sha1[MESA_SHADER_VERTEX],
|
||||||
|
cmd->state.program.stage_sha1[MESA_SHADER_TESS_CTRL],
|
||||||
|
cmd->state.program.stage_sha1[MESA_SHADER_TESS_EVAL],
|
||||||
|
cmd->state.program.stage_sha1[MESA_SHADER_GEOMETRY],
|
||||||
|
cmd->state.program.stage_sha1[MESA_SHADER_FRAGMENT]);
|
||||||
|
|
||||||
/* Emit state first, because it's needed for bandwidth calculations */
|
/* Emit state first, because it's needed for bandwidth calculations */
|
||||||
uint32_t dynamic_draw_state_dirty = 0;
|
uint32_t dynamic_draw_state_dirty = 0;
|
||||||
if (!BITSET_IS_EMPTY(cmd->vk.dynamic_graphics_state.dirty) ||
|
if (!BITSET_IS_EMPTY(cmd->vk.dynamic_graphics_state.dirty) ||
|
||||||
|
|
@ -7035,6 +7043,8 @@ tu_CmdDraw(VkCommandBuffer commandBuffer,
|
||||||
tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_INDEX));
|
tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_INDEX));
|
||||||
tu_cs_emit(cs, instanceCount);
|
tu_cs_emit(cs, instanceCount);
|
||||||
tu_cs_emit(cs, vertexCount);
|
tu_cs_emit(cs, vertexCount);
|
||||||
|
|
||||||
|
trace_end_draw(&cmd->trace, cs);
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdDraw);
|
TU_GENX(tu_CmdDraw);
|
||||||
|
|
||||||
|
|
@ -7081,6 +7091,9 @@ tu_CmdDrawMultiEXT(VkCommandBuffer commandBuffer,
|
||||||
tu_cs_emit(cs, instanceCount);
|
tu_cs_emit(cs, instanceCount);
|
||||||
tu_cs_emit(cs, draw->vertexCount);
|
tu_cs_emit(cs, draw->vertexCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (i != 0)
|
||||||
|
trace_end_draw(&cmd->trace, cs);
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdDrawMultiEXT);
|
TU_GENX(tu_CmdDrawMultiEXT);
|
||||||
|
|
||||||
|
|
@ -7107,6 +7120,8 @@ tu_CmdDrawIndexed(VkCommandBuffer commandBuffer,
|
||||||
tu_cs_emit(cs, firstIndex);
|
tu_cs_emit(cs, firstIndex);
|
||||||
tu_cs_emit_qw(cs, cmd->state.index_va);
|
tu_cs_emit_qw(cs, cmd->state.index_va);
|
||||||
tu_cs_emit(cs, cmd->state.max_index_count);
|
tu_cs_emit(cs, cmd->state.max_index_count);
|
||||||
|
|
||||||
|
trace_end_draw(&cmd->trace, cs);
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdDrawIndexed);
|
TU_GENX(tu_CmdDrawIndexed);
|
||||||
|
|
||||||
|
|
@ -7158,6 +7173,9 @@ tu_CmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer,
|
||||||
tu_cs_emit_qw(cs, cmd->state.index_va);
|
tu_cs_emit_qw(cs, cmd->state.index_va);
|
||||||
tu_cs_emit(cs, cmd->state.max_index_count);
|
tu_cs_emit(cs, cmd->state.max_index_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (i != 0)
|
||||||
|
trace_end_draw(&cmd->trace, cs);
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdDrawMultiIndexedEXT);
|
TU_GENX(tu_CmdDrawMultiIndexedEXT);
|
||||||
|
|
||||||
|
|
@ -7201,6 +7219,8 @@ tu_CmdDrawIndirect(VkCommandBuffer commandBuffer,
|
||||||
tu_cs_emit(cs, drawCount);
|
tu_cs_emit(cs, drawCount);
|
||||||
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, offset));
|
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, offset));
|
||||||
tu_cs_emit(cs, stride);
|
tu_cs_emit(cs, stride);
|
||||||
|
|
||||||
|
trace_end_draw(&cmd->trace, cs);
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdDrawIndirect);
|
TU_GENX(tu_CmdDrawIndirect);
|
||||||
|
|
||||||
|
|
@ -7232,6 +7252,8 @@ tu_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
|
||||||
tu_cs_emit(cs, cmd->state.max_index_count);
|
tu_cs_emit(cs, cmd->state.max_index_count);
|
||||||
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, offset));
|
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, offset));
|
||||||
tu_cs_emit(cs, stride);
|
tu_cs_emit(cs, stride);
|
||||||
|
|
||||||
|
trace_end_draw(&cmd->trace, cs);
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdDrawIndexedIndirect);
|
TU_GENX(tu_CmdDrawIndexedIndirect);
|
||||||
|
|
||||||
|
|
@ -7269,6 +7291,8 @@ tu_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,
|
||||||
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, offset));
|
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, offset));
|
||||||
tu_cs_emit_qw(cs, vk_buffer_address(&count_buf->vk, countBufferOffset));
|
tu_cs_emit_qw(cs, vk_buffer_address(&count_buf->vk, countBufferOffset));
|
||||||
tu_cs_emit(cs, stride);
|
tu_cs_emit(cs, stride);
|
||||||
|
|
||||||
|
trace_end_draw(&cmd->trace, cs);
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdDrawIndirectCount);
|
TU_GENX(tu_CmdDrawIndirectCount);
|
||||||
|
|
||||||
|
|
@ -7303,6 +7327,8 @@ tu_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,
|
||||||
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, offset));
|
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, offset));
|
||||||
tu_cs_emit_qw(cs, vk_buffer_address(&count_buf->vk, countBufferOffset));
|
tu_cs_emit_qw(cs, vk_buffer_address(&count_buf->vk, countBufferOffset));
|
||||||
tu_cs_emit(cs, stride);
|
tu_cs_emit(cs, stride);
|
||||||
|
|
||||||
|
trace_end_draw(&cmd->trace, cs);
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdDrawIndexedIndirectCount);
|
TU_GENX(tu_CmdDrawIndexedIndirectCount);
|
||||||
|
|
||||||
|
|
@ -7345,6 +7371,8 @@ tu_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer,
|
||||||
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, counterBufferOffset));
|
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, counterBufferOffset));
|
||||||
tu_cs_emit(cs, counterOffset);
|
tu_cs_emit(cs, counterOffset);
|
||||||
tu_cs_emit(cs, vertexStride);
|
tu_cs_emit(cs, vertexStride);
|
||||||
|
|
||||||
|
trace_end_draw(&cmd->trace, cs);
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdDrawIndirectByteCountEXT);
|
TU_GENX(tu_CmdDrawIndirectByteCountEXT);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -417,6 +417,8 @@ enum tu_suspend_resume_state
|
||||||
SR_IN_CHAIN_AFTER_PRE_CHAIN,
|
SR_IN_CHAIN_AFTER_PRE_CHAIN,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef char tu_sha1_str[SHA1_DIGEST_STRING_LENGTH];
|
||||||
|
|
||||||
struct tu_cmd_state
|
struct tu_cmd_state
|
||||||
{
|
{
|
||||||
uint32_t dirty;
|
uint32_t dirty;
|
||||||
|
|
@ -547,6 +549,9 @@ struct tu_cmd_state
|
||||||
|
|
||||||
bool occlusion_query_may_be_running;
|
bool occlusion_query_may_be_running;
|
||||||
|
|
||||||
|
bool trace_draws_enabled;
|
||||||
|
enum tu_pipeline_type trace_draws_pipeline_type;
|
||||||
|
|
||||||
enum tu_suspend_resume_state suspend_resume;
|
enum tu_suspend_resume_state suspend_resume;
|
||||||
|
|
||||||
bool suspending, resuming;
|
bool suspending, resuming;
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@
|
||||||
#include "util/perf/u_perfetto.h"
|
#include "util/perf/u_perfetto.h"
|
||||||
#include "util/perf/u_perfetto_renderpass.h"
|
#include "util/perf/u_perfetto_renderpass.h"
|
||||||
|
|
||||||
|
#include "tu_cmd_buffer.h"
|
||||||
#include "tu_tracepoints.h"
|
#include "tu_tracepoints.h"
|
||||||
#include "tu_tracepoints_perfetto.h"
|
#include "tu_tracepoints_perfetto.h"
|
||||||
#include "vk_object.h"
|
#include "vk_object.h"
|
||||||
|
|
@ -54,6 +55,7 @@ enum tu_stage_id {
|
||||||
GMEM_STAGE_ID,
|
GMEM_STAGE_ID,
|
||||||
BYPASS_STAGE_ID,
|
BYPASS_STAGE_ID,
|
||||||
BLIT_STAGE_ID,
|
BLIT_STAGE_ID,
|
||||||
|
DRAW_STAGE_ID,
|
||||||
COMPUTE_STAGE_ID,
|
COMPUTE_STAGE_ID,
|
||||||
CLEAR_SYSMEM_STAGE_ID,
|
CLEAR_SYSMEM_STAGE_ID,
|
||||||
CLEAR_GMEM_STAGE_ID,
|
CLEAR_GMEM_STAGE_ID,
|
||||||
|
|
@ -84,6 +86,7 @@ static const struct {
|
||||||
[GMEM_STAGE_ID] = { "GMEM", "Rendering to GMEM" },
|
[GMEM_STAGE_ID] = { "GMEM", "Rendering to GMEM" },
|
||||||
[BYPASS_STAGE_ID] = { "Bypass", "Rendering to system memory" },
|
[BYPASS_STAGE_ID] = { "Bypass", "Rendering to system memory" },
|
||||||
[BLIT_STAGE_ID] = { "Blit", "Performing a Blit operation" },
|
[BLIT_STAGE_ID] = { "Blit", "Performing a Blit operation" },
|
||||||
|
[DRAW_STAGE_ID] = { "Draw", "Performing a graphics-pipeline draw" },
|
||||||
[COMPUTE_STAGE_ID] = { "Compute", "Compute job" },
|
[COMPUTE_STAGE_ID] = { "Compute", "Compute job" },
|
||||||
[CLEAR_SYSMEM_STAGE_ID] = { "Clear Sysmem", "" },
|
[CLEAR_SYSMEM_STAGE_ID] = { "Clear Sysmem", "" },
|
||||||
[CLEAR_GMEM_STAGE_ID] = { "Clear GMEM", "Per-tile (GMEM) clear" },
|
[CLEAR_GMEM_STAGE_ID] = { "Clear GMEM", "Per-tile (GMEM) clear" },
|
||||||
|
|
@ -525,6 +528,7 @@ CREATE_EVENT_CALLBACK(binning_ib, BINNING_STAGE_ID)
|
||||||
CREATE_EVENT_CALLBACK(draw_ib_gmem, GMEM_STAGE_ID)
|
CREATE_EVENT_CALLBACK(draw_ib_gmem, GMEM_STAGE_ID)
|
||||||
CREATE_EVENT_CALLBACK(draw_ib_sysmem, BYPASS_STAGE_ID)
|
CREATE_EVENT_CALLBACK(draw_ib_sysmem, BYPASS_STAGE_ID)
|
||||||
CREATE_EVENT_CALLBACK(blit, BLIT_STAGE_ID)
|
CREATE_EVENT_CALLBACK(blit, BLIT_STAGE_ID)
|
||||||
|
CREATE_EVENT_CALLBACK(draw, DRAW_STAGE_ID)
|
||||||
CREATE_EVENT_CALLBACK(compute, COMPUTE_STAGE_ID)
|
CREATE_EVENT_CALLBACK(compute, COMPUTE_STAGE_ID)
|
||||||
CREATE_EVENT_CALLBACK(compute_indirect, COMPUTE_STAGE_ID)
|
CREATE_EVENT_CALLBACK(compute_indirect, COMPUTE_STAGE_ID)
|
||||||
CREATE_EVENT_CALLBACK(generic_clear, GENERIC_CLEAR_STAGE_ID)
|
CREATE_EVENT_CALLBACK(generic_clear, GENERIC_CLEAR_STAGE_ID)
|
||||||
|
|
|
||||||
|
|
@ -2230,6 +2230,11 @@ tu_emit_program_state(struct tu_cs *sub_cs,
|
||||||
shaders[stage]->dynamic_descriptor_sizes[i];
|
shaders[stage]->dynamic_descriptor_sizes[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (variants[stage]) {
|
||||||
|
memcpy(prog->stage_sha1[stage], variants[stage]->sha1_str,
|
||||||
|
sizeof(variants[stage]->sha1_str));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -105,6 +105,8 @@ struct tu_program_state
|
||||||
|
|
||||||
struct tu_program_descriptor_linkage link[MESA_SHADER_STAGES];
|
struct tu_program_descriptor_linkage link[MESA_SHADER_STAGES];
|
||||||
|
|
||||||
|
char stage_sha1[MESA_SHADER_STAGES][SHA1_DIGEST_STRING_LENGTH];
|
||||||
|
|
||||||
unsigned dynamic_descriptor_offsets[MAX_SETS];
|
unsigned dynamic_descriptor_offsets[MAX_SETS];
|
||||||
|
|
||||||
bool per_view_viewport;
|
bool per_view_viewport;
|
||||||
|
|
|
||||||
|
|
@ -120,6 +120,19 @@ begin_end_tp('render_pass',
|
||||||
Arg(type='int32_t', var='lrzWriteDisabledAtDraw', c_format='%d'),
|
Arg(type='int32_t', var='lrzWriteDisabledAtDraw', c_format='%d'),
|
||||||
Arg(type='uint32_t', var='lrzStatus', c_format='%s', to_prim_type='(fd_lrz_gpu_dir_to_str((enum fd_lrz_gpu_dir)({} & 0xff)))', is_indirect=True),])
|
Arg(type='uint32_t', var='lrzStatus', c_format='%s', to_prim_type='(fd_lrz_gpu_dir_to_str((enum fd_lrz_gpu_dir)({} & 0xff)))', is_indirect=True),])
|
||||||
|
|
||||||
|
begin_end_tp('draw',
|
||||||
|
[Arg(type='uint32_t', var='count', c_format='%u'),
|
||||||
|
Arg(type='tu_sha1_str', var='vs_sha1', c_format='%s',
|
||||||
|
copy_func='strcpy'),
|
||||||
|
Arg(type='tu_sha1_str', var='tcs_sha1', c_format='%s',
|
||||||
|
copy_func='strcpy'),
|
||||||
|
Arg(type='tu_sha1_str', var='tes_sha1', c_format='%s',
|
||||||
|
copy_func='strcpy'),
|
||||||
|
Arg(type='tu_sha1_str', var='gs_sha1', c_format='%s',
|
||||||
|
copy_func='strcpy'),
|
||||||
|
Arg(type='tu_sha1_str', var='fs_sha1', c_format='%s',
|
||||||
|
copy_func='strcpy'),
|
||||||
|
], tp_default_enabled=False)
|
||||||
|
|
||||||
begin_end_tp('binning_ib')
|
begin_end_tp('binning_ib')
|
||||||
begin_end_tp('draw_ib_sysmem')
|
begin_end_tp('draw_ib_sysmem')
|
||||||
|
|
|
||||||
|
|
@ -588,6 +588,8 @@ void __trace_${trace_name}(
|
||||||
% for arg in trace.tp_struct:
|
% for arg in trace.tp_struct:
|
||||||
% if arg.copy_func is None:
|
% if arg.copy_func is None:
|
||||||
__entry->${arg.name} = ${arg.var};
|
__entry->${arg.name} = ${arg.var};
|
||||||
|
% elif arg.length_arg is None:
|
||||||
|
${arg.copy_func}(__entry->${arg.name}, ${arg.var});
|
||||||
% else:
|
% else:
|
||||||
${arg.copy_func}(__entry->${arg.name}, ${arg.var}, ${arg.length_arg});
|
${arg.copy_func}(__entry->${arg.name}, ${arg.var}, ${arg.length_arg});
|
||||||
% endif
|
% endif
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue