diff --git a/src/intel/ds/intel_driver_ds.h b/src/intel/ds/intel_driver_ds.h index bc2f9756003..3ee2a6c5d9f 100644 --- a/src/intel/ds/intel_driver_ds.h +++ b/src/intel/ds/intel_driver_ds.h @@ -73,12 +73,10 @@ enum intel_ds_tracepoint_flags { */ INTEL_DS_TRACEPOINT_FLAG_END_CS = BITFIELD_BIT(1), /** - * Whether this tracepoint's timestamp is recorded on the compute pipeline - * or from top of pipe if there was no dispatch (useful for acceleration - * structure builds where the runtime might choose to not emit anything for - * a number of reasons). + * Whether this tracepoint doesn't generate a timestamp but instead repeats + * the last one. */ - INTEL_DS_TRACEPOINT_FLAG_END_CS_OR_NOOP = BITFIELD_BIT(2), + INTEL_DS_TRACEPOINT_FLAG_REPEAST_LAST = BITFIELD_BIT(2), }; /* Convert internal driver PIPE_CONTROL stall bits to intel_ds_stall_flag. */ diff --git a/src/intel/ds/intel_tracepoints.py b/src/intel/ds/intel_tracepoints.py index ed1800d21d2..63de4d86f1c 100644 --- a/src/intel/ds/intel_tracepoints.py +++ b/src/intel/ds/intel_tracepoints.py @@ -47,7 +47,7 @@ def define_tracepoints(args): def begin_end_tp(name, tp_args=[], tp_struct=None, tp_print=None, tp_default_enabled=True, end_pipelined=True, - compute=False, maybe_compute=False, + compute=False, repeat_last=False, need_cs_param=False): global intel_default_tps if tp_default_enabled: @@ -69,8 +69,8 @@ def define_tracepoints(args): if end_pipelined: if compute: tp_flags.append('INTEL_DS_TRACEPOINT_FLAG_END_CS') - elif maybe_compute: - tp_flags.append('INTEL_DS_TRACEPOINT_FLAG_END_CS_OR_NOOP') + elif repeat_last: + tp_flags.append('INTEL_DS_TRACEPOINT_FLAG_REPEAST_LAST') else: tp_flags.append('INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE') Tracepoint('intel_end_{0}'.format(name), @@ -229,15 +229,14 @@ def define_tracepoints(args): tp_args=[Arg(type='uint32_t', var='count', c_format='%u'),], need_cs_param=True) - rt_args = [Arg(type='uint32_t', var='cs_hash', c_format='%u')] - begin_end_tp('as_build', tp_args=rt_args) - begin_end_tp('as_build_leaves', tp_args=rt_args, maybe_compute=True) - begin_end_tp('as_morton_generate', tp_args=rt_args, maybe_compute=True) - begin_end_tp('as_morton_sort', tp_args=rt_args, maybe_compute=True) - begin_end_tp('as_lbvh_build_internal', tp_args=rt_args, maybe_compute=True) - begin_end_tp('as_ploc_build_internal', tp_args=rt_args, maybe_compute=True) - begin_end_tp('as_encode', tp_args=rt_args, maybe_compute=True) - begin_end_tp('as_copy', tp_args=rt_args, maybe_compute=True) + begin_end_tp('as_build') + begin_end_tp('as_build_leaves', repeat_last=True) + begin_end_tp('as_morton_generate', repeat_last=True) + begin_end_tp('as_morton_sort', repeat_last=True) + begin_end_tp('as_lbvh_build_internal', repeat_last=True) + begin_end_tp('as_ploc_build_internal', repeat_last=True) + begin_end_tp('as_encode', repeat_last=True) + begin_end_tp('as_copy', repeat_last=True) begin_end_tp('rays', tp_args=[Arg(type='uint32_t', var='group_x', c_format='%u'), diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 07995cd080b..26283b250ad 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1010,6 +1010,7 @@ enum anv_timestamp_capture_type { ANV_TIMESTAMP_CAPTURE_AT_CS_STALL, ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER, ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH, + ANV_TIMESTAMP_REPEAT_LAST, }; struct anv_physical_device { @@ -6532,10 +6533,13 @@ struct anv_utrace_submit { struct anv_state_stream general_state_stream; /* Last fully read 64bit timestamp (used to rebuild the upper bits of 32bit - * timestamps) + * timestamps), the timestamp is not scaled to the CPU time domain. */ uint64_t last_full_timestamp; + /* Last timestamp, not scaled to the CPU time domain */ + uint64_t last_timestamp; + /* Memcpy state tracking (only used for timestamp copies on render engine) */ struct anv_memcpy_state memcpy_state; diff --git a/src/intel/vulkan/anv_utrace.c b/src/intel/vulkan/anv_utrace.c index e369a8846be..54c1ec6f10f 100644 --- a/src/intel/vulkan/anv_utrace.c +++ b/src/intel/vulkan/anv_utrace.c @@ -351,11 +351,10 @@ anv_utrace_record_ts(struct u_trace *ut, void *cs, const bool is_end_compute = cs == NULL && (flags & INTEL_DS_TRACEPOINT_FLAG_END_CS); - const bool is_end_compute_or_noop = - cs == NULL && - (flags & INTEL_DS_TRACEPOINT_FLAG_END_CS_OR_NOOP); enum anv_timestamp_capture_type capture_type; - if (is_end_compute) { + if (flags & INTEL_DS_TRACEPOINT_FLAG_REPEAST_LAST) { + capture_type = ANV_TIMESTAMP_REPEAT_LAST; + } else if (is_end_compute) { assert(device->info->verx10 < 125 || !is_end_compute || cmd_buffer->state.last_indirect_dispatch != NULL || @@ -366,15 +365,6 @@ anv_utrace_record_ts(struct u_trace *ut, void *cs, ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH : ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER) : ANV_TIMESTAMP_CAPTURE_END_OF_PIPE; - } else if (is_end_compute_or_noop) { - capture_type = - device->info->verx10 >= 125 ? - (cmd_buffer->state.last_indirect_dispatch != NULL ? - ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH : - (cmd_buffer->state.last_compute_walker != NULL ? - ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER : - ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE)) : - ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE; } else { capture_type = (flags & INTEL_DS_TRACEPOINT_FLAG_END_CS) ? ANV_TIMESTAMP_CAPTURE_END_OF_PIPE : @@ -418,6 +408,11 @@ anv_utrace_read_ts(struct u_trace_context *utctx, assert(result == VK_SUCCESS); } + if (flags & INTEL_DS_TRACEPOINT_FLAG_REPEAST_LAST) { + return intel_device_info_timebase_scale(device->info, + submit->last_timestamp); + } + assert(offset_B % sizeof(union anv_utrace_timestamp) == 0); union anv_utrace_timestamp *ts = (union anv_utrace_timestamp *)(bo->map + offset_B); @@ -426,31 +421,32 @@ anv_utrace_read_ts(struct u_trace_context *utctx, if (ts->timestamp == U_TRACE_NO_TIMESTAMP) return U_TRACE_NO_TIMESTAMP; + uint64_t timestamp; + /* Detect a 16/32 bytes timestamp write */ if (ts->gfx20_postsync_data[1] != 0 || ts->gfx20_postsync_data[2] != 0 || ts->gfx20_postsync_data[3] != 0) { if (device->info->ver >= 20) { - return intel_device_info_timebase_scale(device->info, - ts->gfx20_postsync_data[3]); + timestamp = ts->gfx20_postsync_data[3]; + } else { + /* The timestamp written by COMPUTE_WALKER::PostSync only as 32bits. + * We need to rebuild the full 64bits using the previous timestamp. + * We assume that utrace is reading the timestamp in order. Anyway + * timestamp rollover on 32bits in a few minutes so in most cases + * that should be correct. + */ + timestamp = + (submit->last_full_timestamp & 0xffffffff00000000) | + (uint64_t) ts->gfx125_postsync_data[3]; } - - /* The timestamp written by COMPUTE_WALKER::PostSync only as 32bits. We - * need to rebuild the full 64bits using the previous timestamp. We - * assume that utrace is reading the timestamp in order. Anyway - * timestamp rollover on 32bits in a few minutes so in most cases that - * should be correct. - */ - uint64_t timestamp = - (submit->last_full_timestamp & 0xffffffff00000000) | - (uint64_t) ts->gfx125_postsync_data[3]; - - return intel_device_info_timebase_scale(device->info, timestamp); + } else { + submit->last_full_timestamp = timestamp = ts->timestamp; } - submit->last_full_timestamp = ts->timestamp; + submit->last_timestamp = timestamp; - return intel_device_info_timebase_scale(device->info, ts->timestamp); + return intel_device_info_timebase_scale(device->info, timestamp); } static void diff --git a/src/intel/vulkan/genX_acceleration_structure.c b/src/intel/vulkan/genX_acceleration_structure.c index 96308bd54f2..92dcb02d6b1 100644 --- a/src/intel/vulkan/genX_acceleration_structure.c +++ b/src/intel/vulkan/genX_acceleration_structure.c @@ -72,41 +72,29 @@ static void end_debug_marker(VkCommandBuffer commandBuffer) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute; - struct anv_compute_pipeline *pipeline = - anv_pipeline_to_compute(comp_state->base.pipeline); - const struct brw_cs_prog_data *cs_prog_data = - brw_cs_prog_data_const(pipeline->cs->prog_data); cmd_buffer->state.rt.debug_marker_count--; switch (cmd_buffer->state.rt.debug_markers[cmd_buffer->state.rt.debug_marker_count]) { case VK_ACCELERATION_STRUCTURE_BUILD_STEP_TOP: - trace_intel_end_as_build(&cmd_buffer->trace, - cs_prog_data->base.source_hash); + trace_intel_end_as_build(&cmd_buffer->trace); break; case VK_ACCELERATION_STRUCTURE_BUILD_STEP_BUILD_LEAVES: - trace_intel_end_as_build_leaves(&cmd_buffer->trace, - cs_prog_data->base.source_hash); + trace_intel_end_as_build_leaves(&cmd_buffer->trace); break; case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_GENERATE: - trace_intel_end_as_morton_generate(&cmd_buffer->trace, - cs_prog_data->base.source_hash); + trace_intel_end_as_morton_generate(&cmd_buffer->trace); break; case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_SORT: - trace_intel_end_as_morton_sort(&cmd_buffer->trace, - cs_prog_data->base.source_hash); + trace_intel_end_as_morton_sort(&cmd_buffer->trace); break; case VK_ACCELERATION_STRUCTURE_BUILD_STEP_LBVH_BUILD_INTERNAL: - trace_intel_end_as_lbvh_build_internal(&cmd_buffer->trace, - cs_prog_data->base.source_hash); + trace_intel_end_as_lbvh_build_internal(&cmd_buffer->trace); break; case VK_ACCELERATION_STRUCTURE_BUILD_STEP_PLOC_BUILD_INTERNAL: - trace_intel_end_as_ploc_build_internal(&cmd_buffer->trace, - cs_prog_data->base.source_hash); + trace_intel_end_as_ploc_build_internal(&cmd_buffer->trace); break; case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE: - trace_intel_end_as_encode(&cmd_buffer->trace, - cs_prog_data->base.source_hash); + trace_intel_end_as_encode(&cmd_buffer->trace); break; default: unreachable("Invalid build step"); @@ -725,12 +713,6 @@ genX(CmdCopyAccelerationStructureKHR)( return; } - ANV_FROM_HANDLE(anv_pipeline, anv_pipeline, pipeline); - struct anv_compute_pipeline *compute_pipeline = - anv_pipeline_to_compute(anv_pipeline); - const struct brw_cs_prog_data *cs_prog_data = - brw_cs_prog_data_const(compute_pipeline->cs->prog_data); - struct anv_cmd_saved_state saved; anv_cmd_buffer_save_state(cmd_buffer, ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE | @@ -773,8 +755,7 @@ genX(CmdCopyAccelerationStructureKHR)( anv_cmd_buffer_restore_state(cmd_buffer, &saved); - trace_intel_end_as_copy(&cmd_buffer->trace, - cs_prog_data->base.source_hash); + trace_intel_end_as_copy(&cmd_buffer->trace); } void @@ -799,12 +780,6 @@ genX(CmdCopyAccelerationStructureToMemoryKHR)( return; } - ANV_FROM_HANDLE(anv_pipeline, anv_pipeline, pipeline); - struct anv_compute_pipeline *compute_pipeline = - anv_pipeline_to_compute(anv_pipeline); - const struct brw_cs_prog_data *cs_prog_data = - brw_cs_prog_data_const(compute_pipeline->cs->prog_data); - struct anv_cmd_saved_state saved; anv_cmd_buffer_save_state(cmd_buffer, ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE | @@ -851,8 +826,7 @@ genX(CmdCopyAccelerationStructureToMemoryKHR)( anv_cmd_buffer_restore_state(cmd_buffer, &saved); - trace_intel_end_as_copy(&cmd_buffer->trace, - cs_prog_data->base.source_hash); + trace_intel_end_as_copy(&cmd_buffer->trace); } void @@ -876,12 +850,6 @@ genX(CmdCopyMemoryToAccelerationStructureKHR)( return; } - ANV_FROM_HANDLE(anv_pipeline, anv_pipeline, pipeline); - struct anv_compute_pipeline *compute_pipeline = - anv_pipeline_to_compute(anv_pipeline); - const struct brw_cs_prog_data *cs_prog_data = - brw_cs_prog_data_const(compute_pipeline->cs->prog_data); - struct anv_cmd_saved_state saved; anv_cmd_buffer_save_state(cmd_buffer, ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE | @@ -911,8 +879,7 @@ genX(CmdCopyMemoryToAccelerationStructureKHR)( vk_common_CmdDispatch(commandBuffer, 512, 1, 1); anv_cmd_buffer_restore_state(cmd_buffer, &saved); - trace_intel_end_as_copy(&cmd_buffer->trace, - cs_prog_data->base.source_hash); + trace_intel_end_as_copy(&cmd_buffer->trace); } void diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 68f3ef54256..3858a4ffa7e 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -6269,6 +6269,10 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch, } #endif + case ANV_TIMESTAMP_REPEAT_LAST: + /* Noop */ + break; + default: unreachable("invalid"); } diff --git a/src/intel/vulkan/genX_cmd_compute.c b/src/intel/vulkan/genX_cmd_compute.c index 83b19c78ce3..f6255a59ea5 100644 --- a/src/intel/vulkan/genX_cmd_compute.c +++ b/src/intel/vulkan/genX_cmd_compute.c @@ -626,8 +626,7 @@ void genX(CmdDispatchBase)( prog_data->local_size[0] * prog_data->local_size[1] * prog_data->local_size[2]); - if (cmd_buffer->state.rt.debug_marker_count == 0) - trace_intel_begin_compute(&cmd_buffer->trace); + trace_intel_begin_compute(&cmd_buffer->trace); cmd_buffer_flush_compute_state(cmd_buffer); @@ -639,11 +638,9 @@ void genX(CmdDispatchBase)( groupCountX, groupCountY, groupCountZ, false); - if (cmd_buffer->state.rt.debug_marker_count == 0) { - trace_intel_end_compute(&cmd_buffer->trace, - groupCountX, groupCountY, groupCountZ, - prog_data->base.source_hash); - } + trace_intel_end_compute(&cmd_buffer->trace, + groupCountX, groupCountY, groupCountZ, + prog_data->base.source_hash); } static void @@ -686,8 +683,7 @@ emit_unaligned_cs_walker( prog_data->local_size[0] * prog_data->local_size[1] * prog_data->local_size[2]); - if (cmd_buffer->state.rt.debug_marker_count == 0) - trace_intel_begin_compute(&cmd_buffer->trace); + trace_intel_begin_compute(&cmd_buffer->trace); assert(!prog_data->uses_num_work_groups); genX(cmd_buffer_flush_compute_state)(cmd_buffer); @@ -700,11 +696,9 @@ emit_unaligned_cs_walker( dispatch, groupCountX, groupCountY, groupCountZ); #endif - if (cmd_buffer->state.rt.debug_marker_count == 0) { - trace_intel_end_compute(&cmd_buffer->trace, - groupCountX, groupCountY, groupCountZ, - prog_data->base.source_hash); - } + trace_intel_end_compute(&cmd_buffer->trace, + groupCountX, groupCountY, groupCountZ, + prog_data->base.source_hash); } /* @@ -795,8 +789,7 @@ genX(cmd_buffer_dispatch_indirect)(struct anv_cmd_buffer *cmd_buffer, "compute indirect", 0); - if (cmd_buffer->state.rt.debug_marker_count == 0) - trace_intel_begin_compute_indirect(&cmd_buffer->trace); + trace_intel_begin_compute_indirect(&cmd_buffer->trace); cmd_buffer_flush_compute_state(cmd_buffer); @@ -806,11 +799,9 @@ genX(cmd_buffer_dispatch_indirect)(struct anv_cmd_buffer *cmd_buffer, emit_cs_walker(cmd_buffer, pipeline, prog_data, dispatch, indirect_addr, 0, 0, 0, is_unaligned_size_x); - if (cmd_buffer->state.rt.debug_marker_count == 0) { - trace_intel_end_compute_indirect(&cmd_buffer->trace, - anv_address_utrace(indirect_addr), - prog_data->base.source_hash); - } + trace_intel_end_compute_indirect(&cmd_buffer->trace, + anv_address_utrace(indirect_addr), + prog_data->base.source_hash); } void genX(CmdDispatchIndirect)(