diff --git a/src/intel/ds/intel_driver_ds.cc b/src/intel/ds/intel_driver_ds.cc index 395df676723..25f63b98d5a 100644 --- a/src/intel/ds/intel_driver_ds.cc +++ b/src/intel/ds/intel_driver_ds.cc @@ -68,8 +68,8 @@ static const struct { INTEL_DS_QUEUE_STAGE_CMD_BUFFER, }, { - "generate-draws", - INTEL_DS_QUEUE_STAGE_GENERATE_DRAWS, + "internal-ops", + INTEL_DS_QUEUE_STAGE_INTERNAL_OPS, }, { "stall", @@ -391,12 +391,13 @@ CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect, INTEL_DS_QUEUE_STAGE_DRAW_MESH) CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW_MESH) CREATE_DUAL_EVENT_CALLBACK(xfb, INTEL_DS_QUEUE_STAGE_CMD_BUFFER) CREATE_DUAL_EVENT_CALLBACK(compute, INTEL_DS_QUEUE_STAGE_COMPUTE) -CREATE_DUAL_EVENT_CALLBACK(generate_draws, INTEL_DS_QUEUE_STAGE_GENERATE_DRAWS) -CREATE_DUAL_EVENT_CALLBACK(trace_copy, INTEL_DS_QUEUE_STAGE_BLORP) -CREATE_DUAL_EVENT_CALLBACK(query_clear_blorp, INTEL_DS_QUEUE_STAGE_BLORP) -CREATE_DUAL_EVENT_CALLBACK(query_clear_cs, INTEL_DS_QUEUE_STAGE_CMD_BUFFER) -CREATE_DUAL_EVENT_CALLBACK(query_copy_cs, INTEL_DS_QUEUE_STAGE_CMD_BUFFER) -CREATE_DUAL_EVENT_CALLBACK(query_copy_shader, INTEL_DS_QUEUE_STAGE_BLORP) +CREATE_DUAL_EVENT_CALLBACK(generate_draws, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS) +CREATE_DUAL_EVENT_CALLBACK(trace_copy, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS) +CREATE_DUAL_EVENT_CALLBACK(trace_copy_cb, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS) +CREATE_DUAL_EVENT_CALLBACK(query_clear_blorp, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS) +CREATE_DUAL_EVENT_CALLBACK(query_clear_cs, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS) +CREATE_DUAL_EVENT_CALLBACK(query_copy_cs, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS) +CREATE_DUAL_EVENT_CALLBACK(query_copy_shader, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS) void intel_ds_begin_cmd_buffer_annotation(struct intel_ds_device *device, diff --git a/src/intel/ds/intel_driver_ds.h b/src/intel/ds/intel_driver_ds.h index bc5f4a82e9d..456b1545060 100644 --- a/src/intel/ds/intel_driver_ds.h +++ b/src/intel/ds/intel_driver_ds.h @@ -68,7 +68,7 @@ enum intel_ds_queue_stage { INTEL_DS_QUEUE_STAGE_QUEUE, INTEL_DS_QUEUE_STAGE_FRAME, INTEL_DS_QUEUE_STAGE_CMD_BUFFER, - INTEL_DS_QUEUE_STAGE_GENERATE_DRAWS, + INTEL_DS_QUEUE_STAGE_INTERNAL_OPS, INTEL_DS_QUEUE_STAGE_STALL, INTEL_DS_QUEUE_STAGE_COMPUTE, INTEL_DS_QUEUE_STAGE_RENDER_PASS, diff --git a/src/intel/ds/intel_tracepoints.py b/src/intel/ds/intel_tracepoints.py index b11c789c46a..2048a6bb4c3 100644 --- a/src/intel/ds/intel_tracepoints.py +++ b/src/intel/ds/intel_tracepoints.py @@ -172,7 +172,11 @@ def define_tracepoints(args): tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z']) # Used to identify copies generated by utrace - begin_end_tp('trace_copy', end_pipelined=True) + begin_end_tp('trace_copy', + tp_args=[Arg(type='uint32_t', var='count', c_format='%u'),]) + begin_end_tp('trace_copy_cb', + tp_args=[Arg(type='uint32_t', var='count', c_format='%u'),], + need_cs_param=True) def flag_bits(args): bits = [Arg(type='enum intel_ds_stall_flag', name='flags', var='decode_cb(flags)', c_format='0x%x')] diff --git a/src/intel/vulkan/anv_utrace.c b/src/intel/vulkan/anv_utrace.c index 042d36c700b..166595acebb 100644 --- a/src/intel/vulkan/anv_utrace.c +++ b/src/intel/vulkan/anv_utrace.c @@ -227,14 +227,19 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, assert(queue->family->engine_class == INTEL_ENGINE_CLASS_RENDER || queue->family->engine_class == INTEL_ENGINE_CLASS_COMPUTE); if (queue->family->engine_class == INTEL_ENGINE_CLASS_RENDER) { + + trace_intel_begin_trace_copy_cb(&submit->ds.trace, &submit->batch); + anv_genX(device->info, emit_so_memcpy_init)(&submit->memcpy_state, device, &submit->batch); + uint32_t num_traces = 0; for (uint32_t i = 0; i < cmd_buffer_count; i++) { if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) { intel_ds_queue_flush_data(&queue->ds, &cmd_buffers[i]->trace, &submit->ds, false); } else { + num_traces += cmd_buffers[i]->trace.num_traces; u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace), u_trace_end_iterator(&cmd_buffers[i]->trace), &submit->ds.trace, @@ -244,8 +249,13 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, } anv_genX(device->info, emit_so_memcpy_fini)(&submit->memcpy_state); + trace_intel_end_trace_copy_cb(&submit->ds.trace, &submit->batch, + num_traces); + anv_genX(device->info, emit_so_memcpy_end)(&submit->memcpy_state); } else { + trace_intel_begin_trace_copy_cb(&submit->ds.trace, &submit->batch); + submit->simple_state = (struct anv_simple_shader) { .device = device, .dynamic_state_stream = &submit->dynamic_state_stream, @@ -257,11 +267,14 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, }; anv_genX(device->info, emit_simple_shader_init)(&submit->simple_state); + uint32_t num_traces = 0; for (uint32_t i = 0; i < cmd_buffer_count; i++) { + num_traces += cmd_buffers[i]->trace.num_traces; if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) { intel_ds_queue_flush_data(&queue->ds, &cmd_buffers[i]->trace, &submit->ds, false); } else { + num_traces += cmd_buffers[i]->trace.num_traces; u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace), u_trace_end_iterator(&cmd_buffers[i]->trace), &submit->ds.trace, @@ -270,6 +283,9 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, } } + trace_intel_end_trace_copy_cb(&submit->ds.trace, &submit->batch, + num_traces); + anv_genX(device->info, emit_simple_shader_end)(&submit->simple_state); } diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 71a5d02c322..92f59bac005 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3794,9 +3794,11 @@ genX(CmdExecuteCommands)( struct anv_memcpy_state memcpy_state; genX(emit_so_memcpy_init)(&memcpy_state, device, &primary->batch); + uint32_t num_traces = 0; for (uint32_t i = 0; i < commandBufferCount; i++) { ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); + num_traces += secondary->trace.num_traces; u_trace_clone_append(u_trace_begin_iterator(&secondary->trace), u_trace_end_iterator(&secondary->trace), &primary->trace, @@ -3805,7 +3807,7 @@ genX(CmdExecuteCommands)( } genX(emit_so_memcpy_fini)(&memcpy_state); - trace_intel_end_trace_copy(&primary->trace); + trace_intel_end_trace_copy(&primary->trace, num_traces); /* Memcpy is done using the 3D pipeline. */ primary->state.current_pipeline = _3D;