diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 83a46ad0e65..eb815baacb7 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -1696,7 +1696,7 @@ tu_trace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps) template static void tu_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps, - unsigned idx, bool end_of_pipe) + unsigned idx, uint32_t) { struct tu_bo *bo = (struct tu_bo *) timestamps; struct tu_cs *ts_cs = (struct tu_cs *) cs; diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index e7f0981514b..500cff9178e 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -500,7 +500,7 @@ fd_get_device_reset_status(struct pipe_context *pctx) static void fd_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps, - unsigned idx, bool end_of_pipe) + unsigned idx, uint32_t flags) { struct fd_batch *batch = container_of(ut, struct fd_batch, trace); struct fd_ringbuffer *ring = cs; diff --git a/src/gallium/drivers/iris/iris_utrace.c b/src/gallium/drivers/iris/iris_utrace.c index f320b6a5df0..477e620b68d 100644 --- a/src/gallium/drivers/iris/iris_utrace.c +++ b/src/gallium/drivers/iris/iris_utrace.c @@ -92,7 +92,7 @@ iris_utrace_delete_ts_buffer(struct u_trace_context *utctx, void *timestamps) static void iris_utrace_record_ts(struct u_trace *trace, void *cs, void *timestamps, unsigned idx, - bool end_of_pipe) + uint32_t flags) { struct iris_batch *batch = container_of(trace, struct iris_batch, trace); struct iris_context *ice = batch->ice; @@ -102,12 +102,14 @@ iris_utrace_record_ts(struct u_trace *trace, void *cs, iris_use_pinned_bo(batch, bo, true, IRIS_DOMAIN_NONE); const bool is_end_compute = - (cs == NULL && ice->utrace.last_compute_walker != NULL && end_of_pipe); + cs == NULL && + (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE_CS); if (is_end_compute) { + assert(ice->utrace.last_compute_walker != NULL); batch->screen->vtbl.rewrite_compute_walker_pc( batch, ice->utrace.last_compute_walker, bo, ts_offset); ice->utrace.last_compute_walker = NULL; - } else if (end_of_pipe) { + } else if (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE) { iris_emit_pipe_control_write(batch, "query: pipelined snapshot write", PIPE_CONTROL_WRITE_TIMESTAMP, bo, ts_offset, 0ull); diff --git a/src/gallium/drivers/radeonsi/si_tracepoints.py b/src/gallium/drivers/radeonsi/si_tracepoints.py index 00320b9c963..bb981889e3c 100644 --- a/src/gallium/drivers/radeonsi/si_tracepoints.py +++ b/src/gallium/drivers/radeonsi/si_tracepoints.py @@ -26,8 +26,7 @@ def define_tracepoints(args): def begin_end_tp(name, tp_args=[], tp_struct=None, tp_print=None, - tp_default_enabled=True, end_pipelined=True, - need_cs_param=False): + tp_default_enabled=True, need_cs_param=False): global si_default_tps if tp_default_enabled: si_default_tps.append(name) @@ -41,7 +40,6 @@ def define_tracepoints(args): tp_struct=tp_struct, tp_perfetto='si_ds_end_{0}'.format(name), tp_print=tp_print, - end_of_pipe=end_pipelined, need_cs_param=need_cs_param) # Various draws/dispatch, radeonsi diff --git a/src/gallium/drivers/radeonsi/si_utrace.c b/src/gallium/drivers/radeonsi/si_utrace.c index 847863c37a9..eb45468588e 100644 --- a/src/gallium/drivers/radeonsi/si_utrace.c +++ b/src/gallium/drivers/radeonsi/si_utrace.c @@ -12,8 +12,8 @@ #include "util/hash_table.h" -static void si_utrace_record_ts(struct u_trace *trace, void *cs, void *timestamps, - unsigned idx, bool end_of_pipe) +static void si_utrace_record_ts(struct u_trace *trace, void *cs, void *timestamps, + unsigned idx, uint32_t flags) { struct si_context *ctx = container_of(trace, struct si_context, trace); struct pipe_resource *buffer = timestamps; diff --git a/src/intel/ds/intel_driver_ds.h b/src/intel/ds/intel_driver_ds.h index 50b0c5af0cb..505abb8eb53 100644 --- a/src/intel/ds/intel_driver_ds.h +++ b/src/intel/ds/intel_driver_ds.h @@ -61,6 +61,19 @@ enum intel_ds_stall_flag { INTEL_DS_CCS_CACHE_FLUSH_BIT = BITFIELD_BIT(16), }; +enum intel_ds_tracepoint_flags { + /** + * Whether the tracepoint's timestamp must be recorded with as an + * end-of-pipe timestamp. + */ + INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE = BITFIELD_BIT(0), + /** + * Whether this tracepoint's timestamp is recorded on the compute pipeline. + */ + INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE_CS = BITFIELD_BIT(1), + +}; + /* Convert internal driver PIPE_CONTROL stall bits to intel_ds_stall_flag. */ typedef enum intel_ds_stall_flag (*intel_ds_stall_cb_t)(uint32_t flags); diff --git a/src/intel/ds/intel_tracepoints.py b/src/intel/ds/intel_tracepoints.py index 9ce5f503c0a..ac49f0e3459 100644 --- a/src/intel/ds/intel_tracepoints.py +++ b/src/intel/ds/intel_tracepoints.py @@ -46,6 +46,7 @@ def define_tracepoints(args): def begin_end_tp(name, tp_args=[], tp_struct=None, tp_print=None, tp_default_enabled=True, end_pipelined=True, + compute=False, need_cs_param=False): global intel_default_tps if tp_default_enabled: @@ -54,13 +55,19 @@ def define_tracepoints(args): toggle_name=name, tp_perfetto='intel_ds_begin_{0}'.format(name), need_cs_param=need_cs_param) + tp_flags = [] + if end_pipelined: + if compute: + tp_flags.append('INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE_CS') + else: + tp_flags.append('INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE') Tracepoint('intel_end_{0}'.format(name), toggle_name=name, args=tp_args, tp_struct=tp_struct, tp_perfetto='intel_ds_end_{0}'.format(name), tp_print=tp_print, - end_of_pipe=end_pipelined, + tp_flags=tp_flags, need_cs_param=need_cs_param) # Frame tracepoints @@ -175,7 +182,8 @@ def define_tracepoints(args): tp_args=[Arg(type='uint32_t', var='group_x', c_format='%u'), Arg(type='uint32_t', var='group_y', c_format='%u'), Arg(type='uint32_t', var='group_z', c_format='%u'),], - tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z']) + tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z'], + compute=True) # Used to identify copies generated by utrace begin_end_tp('trace_copy', @@ -190,7 +198,8 @@ def define_tracepoints(args): tp_args=[Arg(type='uint32_t', var='group_x', c_format='%u'), Arg(type='uint32_t', var='group_y', c_format='%u'), Arg(type='uint32_t', var='group_z', c_format='%u'),], - tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z']) + tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z'], + compute=True) def flag_bits(args): bits = [Arg(type='enum intel_ds_stall_flag', name='flags', var='decode_cb(flags)', c_format='0x%x')] diff --git a/src/intel/vulkan/anv_utrace.c b/src/intel/vulkan/anv_utrace.c index bd55f3e79b6..ae812eec097 100644 --- a/src/intel/vulkan/anv_utrace.c +++ b/src/intel/vulkan/anv_utrace.c @@ -396,7 +396,7 @@ anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps) static void anv_utrace_record_ts(struct u_trace *ut, void *cs, void *timestamps, unsigned idx, - bool end_of_pipe) + uint32_t flags) { struct anv_device *device = container_of(ut->utctx, struct anv_device, ds.trace_context); @@ -414,15 +414,19 @@ anv_utrace_record_ts(struct u_trace *ut, void *cs, /* Is this a end of compute trace point? */ const bool is_end_compute = cs == NULL && - (cmd_buffer->last_compute_walker != NULL || - cmd_buffer->last_indirect_dispatch != NULL) && - end_of_pipe; + (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE_CS); + + assert(!is_end_compute || + cmd_buffer->state.last_indirect_dispatch != NULL || + cmd_buffer->state.last_compute_walker != NULL); + + enum anv_timestamp_capture_type capture_type = + is_end_compute ? + (cmd_buffer->state.last_indirect_dispatch != NULL ? + ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH : ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER) : + (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE) ? + ANV_TIMESTAMP_CAPTURE_END_OF_PIPE : ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE; - enum anv_timestamp_capture_type capture_type = end_of_pipe ? - (is_end_compute ? - (cmd_buffer->last_indirect_dispatch != NULL ? - ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH : ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER) : - ANV_TIMESTAMP_CAPTURE_END_OF_PIPE) : ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE; void *addr = capture_type == ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH ? cmd_buffer->state.last_indirect_dispatch : diff --git a/src/intel/vulkan_hasvk/anv_utrace.c b/src/intel/vulkan_hasvk/anv_utrace.c index a986491f264..c7e849605e5 100644 --- a/src/intel/vulkan_hasvk/anv_utrace.c +++ b/src/intel/vulkan_hasvk/anv_utrace.c @@ -224,7 +224,7 @@ anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps) static void anv_utrace_record_ts(struct u_trace *ut, void *cs, void *timestamps, unsigned idx, - bool end_of_pipe) + uint32_t flags) { struct anv_cmd_buffer *cmd_buffer = container_of(ut, struct anv_cmd_buffer, trace); @@ -232,8 +232,9 @@ anv_utrace_record_ts(struct u_trace *ut, void *cs, struct anv_bo *bo = timestamps; enum anv_timestamp_capture_type capture_type = - (end_of_pipe) ? ANV_TIMESTAMP_CAPTURE_END_OF_PIPE - : ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE; + (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE) ? + ANV_TIMESTAMP_CAPTURE_END_OF_PIPE : + ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE; device->physical->cmd_emit_timestamp(&cmd_buffer->batch, device, (struct anv_address) { .bo = bo, diff --git a/src/util/perf/u_trace.c b/src/util/perf/u_trace.c index d90db9ec805..da5241557f7 100644 --- a/src/util/perf/u_trace.c +++ b/src/util/perf/u_trace.c @@ -853,8 +853,7 @@ u_trace_appendv(struct u_trace *ut, } /* record a timestamp for the trace: */ - ut->utctx->record_timestamp(ut, cs, chunk->timestamps, tp_idx, - tp->end_of_pipe); + ut->utctx->record_timestamp(ut, cs, chunk->timestamps, tp_idx, tp->flags); chunk->traces[tp_idx] = (struct u_trace_event) { .tp = tp, diff --git a/src/util/perf/u_trace.h b/src/util/perf/u_trace.h index 56479e95392..bd14fb7fe06 100644 --- a/src/util/perf/u_trace.h +++ b/src/util/perf/u_trace.h @@ -105,7 +105,7 @@ typedef void (*u_trace_record_ts)(struct u_trace *ut, void *cs, void *timestamps, unsigned idx, - bool end_of_pipe); + uint32_t flags); /** * Driver provided callback to read back a previously recorded timestamp. diff --git a/src/util/perf/u_trace.py b/src/util/perf/u_trace.py index b474f05dabe..4f5722f6648 100644 --- a/src/util/perf/u_trace.py +++ b/src/util/perf/u_trace.py @@ -34,7 +34,7 @@ class Tracepoint(object): """ def __init__(self, name, args=[], toggle_name=None, tp_struct=None, tp_print=None, tp_perfetto=None, - tp_markers=None, end_of_pipe=False, need_cs_param=True): + tp_markers=None, tp_flags=[], need_cs_param=True): """Parameters: - name: the tracepoint name, a tracepoint function with the given @@ -72,7 +72,7 @@ class Tracepoint(object): self.tp_print = tp_print self.tp_perfetto = tp_perfetto self.tp_markers = tp_markers - self.end_of_pipe = end_of_pipe + self.tp_flags = tp_flags self.toggle_name = toggle_name self.need_cs_param = need_cs_param @@ -449,7 +449,7 @@ static void __emit_label_${trace_name}(struct u_trace_context *utctx, void *cs, static const struct u_tracepoint __tp_${trace_name} = { ALIGN_POT(sizeof(struct trace_${trace_name}), 8), /* keep size 64b aligned */ "${trace_name}", - ${"true" if trace.end_of_pipe else "false"}, + ${0 if len(trace.tp_flags) == 0 else " | ".join(trace.tp_flags)}, ${index}, __print_${trace_name}, __print_json_${trace_name}, diff --git a/src/util/perf/u_trace_priv.h b/src/util/perf/u_trace_priv.h index a25811a48e8..8f76c18075d 100644 --- a/src/util/perf/u_trace_priv.h +++ b/src/util/perf/u_trace_priv.h @@ -46,11 +46,9 @@ struct u_tracepoint { unsigned payload_sz; const char *name; /** - * Whether this tracepoint's timestamp must be recorded with as an - * end-of-pipe timestamp (for some GPUs the recording timestamp instruction - * might be different for top/end of pipe). + * A bitfield of driver agnostic flags */ - bool end_of_pipe:1; + uint16_t flags; /** * Index of this tracepoint in _tracepoint_names in the generated * u_trace perfetto header. By associating these names with iids in setup,