intel/ds: track number of tracepoint timestamp copies

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24744>
This commit is contained in:
Lionel Landwerlin 2023-08-29 23:00:17 +03:00 committed by Marge Bot
parent 64769bc5b2
commit 7d30b1e28f
5 changed files with 34 additions and 11 deletions

View file

@ -68,8 +68,8 @@ static const struct {
INTEL_DS_QUEUE_STAGE_CMD_BUFFER,
},
{
"generate-draws",
INTEL_DS_QUEUE_STAGE_GENERATE_DRAWS,
"internal-ops",
INTEL_DS_QUEUE_STAGE_INTERNAL_OPS,
},
{
"stall",
@ -391,12 +391,13 @@ CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect, INTEL_DS_QUEUE_STAGE_DRAW_MESH)
CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW_MESH)
CREATE_DUAL_EVENT_CALLBACK(xfb, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
CREATE_DUAL_EVENT_CALLBACK(compute, INTEL_DS_QUEUE_STAGE_COMPUTE)
CREATE_DUAL_EVENT_CALLBACK(generate_draws, INTEL_DS_QUEUE_STAGE_GENERATE_DRAWS)
CREATE_DUAL_EVENT_CALLBACK(trace_copy, INTEL_DS_QUEUE_STAGE_BLORP)
CREATE_DUAL_EVENT_CALLBACK(query_clear_blorp, INTEL_DS_QUEUE_STAGE_BLORP)
CREATE_DUAL_EVENT_CALLBACK(query_clear_cs, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
CREATE_DUAL_EVENT_CALLBACK(query_copy_cs, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
CREATE_DUAL_EVENT_CALLBACK(query_copy_shader, INTEL_DS_QUEUE_STAGE_BLORP)
CREATE_DUAL_EVENT_CALLBACK(generate_draws, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
CREATE_DUAL_EVENT_CALLBACK(trace_copy, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
CREATE_DUAL_EVENT_CALLBACK(trace_copy_cb, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
CREATE_DUAL_EVENT_CALLBACK(query_clear_blorp, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
CREATE_DUAL_EVENT_CALLBACK(query_clear_cs, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
CREATE_DUAL_EVENT_CALLBACK(query_copy_cs, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
CREATE_DUAL_EVENT_CALLBACK(query_copy_shader, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
void
intel_ds_begin_cmd_buffer_annotation(struct intel_ds_device *device,

View file

@ -68,7 +68,7 @@ enum intel_ds_queue_stage {
INTEL_DS_QUEUE_STAGE_QUEUE,
INTEL_DS_QUEUE_STAGE_FRAME,
INTEL_DS_QUEUE_STAGE_CMD_BUFFER,
INTEL_DS_QUEUE_STAGE_GENERATE_DRAWS,
INTEL_DS_QUEUE_STAGE_INTERNAL_OPS,
INTEL_DS_QUEUE_STAGE_STALL,
INTEL_DS_QUEUE_STAGE_COMPUTE,
INTEL_DS_QUEUE_STAGE_RENDER_PASS,

View file

@ -172,7 +172,11 @@ def define_tracepoints(args):
tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z'])
# Used to identify copies generated by utrace
begin_end_tp('trace_copy', end_pipelined=True)
begin_end_tp('trace_copy',
tp_args=[Arg(type='uint32_t', var='count', c_format='%u'),])
begin_end_tp('trace_copy_cb',
tp_args=[Arg(type='uint32_t', var='count', c_format='%u'),],
need_cs_param=True)
def flag_bits(args):
bits = [Arg(type='enum intel_ds_stall_flag', name='flags', var='decode_cb(flags)', c_format='0x%x')]

View file

@ -227,14 +227,19 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
assert(queue->family->engine_class == INTEL_ENGINE_CLASS_RENDER ||
queue->family->engine_class == INTEL_ENGINE_CLASS_COMPUTE);
if (queue->family->engine_class == INTEL_ENGINE_CLASS_RENDER) {
trace_intel_begin_trace_copy_cb(&submit->ds.trace, &submit->batch);
anv_genX(device->info, emit_so_memcpy_init)(&submit->memcpy_state,
device,
&submit->batch);
uint32_t num_traces = 0;
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) {
intel_ds_queue_flush_data(&queue->ds, &cmd_buffers[i]->trace,
&submit->ds, false);
} else {
num_traces += cmd_buffers[i]->trace.num_traces;
u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace),
u_trace_end_iterator(&cmd_buffers[i]->trace),
&submit->ds.trace,
@ -244,8 +249,13 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
}
anv_genX(device->info, emit_so_memcpy_fini)(&submit->memcpy_state);
trace_intel_end_trace_copy_cb(&submit->ds.trace, &submit->batch,
num_traces);
anv_genX(device->info, emit_so_memcpy_end)(&submit->memcpy_state);
} else {
trace_intel_begin_trace_copy_cb(&submit->ds.trace, &submit->batch);
submit->simple_state = (struct anv_simple_shader) {
.device = device,
.dynamic_state_stream = &submit->dynamic_state_stream,
@ -257,11 +267,14 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
};
anv_genX(device->info, emit_simple_shader_init)(&submit->simple_state);
uint32_t num_traces = 0;
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
num_traces += cmd_buffers[i]->trace.num_traces;
if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) {
intel_ds_queue_flush_data(&queue->ds, &cmd_buffers[i]->trace,
&submit->ds, false);
} else {
num_traces += cmd_buffers[i]->trace.num_traces;
u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace),
u_trace_end_iterator(&cmd_buffers[i]->trace),
&submit->ds.trace,
@ -270,6 +283,9 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
}
}
trace_intel_end_trace_copy_cb(&submit->ds.trace, &submit->batch,
num_traces);
anv_genX(device->info, emit_simple_shader_end)(&submit->simple_state);
}

View file

@ -3794,9 +3794,11 @@ genX(CmdExecuteCommands)(
struct anv_memcpy_state memcpy_state;
genX(emit_so_memcpy_init)(&memcpy_state, device, &primary->batch);
uint32_t num_traces = 0;
for (uint32_t i = 0; i < commandBufferCount; i++) {
ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]);
num_traces += secondary->trace.num_traces;
u_trace_clone_append(u_trace_begin_iterator(&secondary->trace),
u_trace_end_iterator(&secondary->trace),
&primary->trace,
@ -3805,7 +3807,7 @@ genX(CmdExecuteCommands)(
}
genX(emit_so_memcpy_fini)(&memcpy_state);
trace_intel_end_trace_copy(&primary->trace);
trace_intel_end_trace_copy(&primary->trace, num_traces);
/* Memcpy is done using the 3D pipeline. */
primary->state.current_pipeline = _3D;