diff --git a/docs/u_trace.rst b/docs/u_trace.rst index 7ddab17736c..a6e6fa951f9 100644 --- a/docs/u_trace.rst +++ b/docs/u_trace.rst @@ -21,27 +21,26 @@ Usage u_trace is controlled by environment variables: -:envvar:`GPU_TRACE` - if set to ``1`` enables tracing and outputs the data into ``stdout`` +:envvar:`GPU_TRACES` + controls whether u_trace is enabled and trace output + + ``print`` + prints in a human readable text format. It should be noted that this + is mutually exclusive with ``print_json`` and both cannot be enabled + at the same time. + ``print_json`` + prints in JSON format, suitable for parsing. Application should + appropriately finish its rendering in order for trace's json to be + valid. For the Vulkan API, it is expected to destroy the device, + for GL it's expected to destroy the context. + ``perfetto`` + enables perfetto instrumentation prior to connecting, perfetto + traces can be collected without setting this but it may miss some + events prior to the tracing session being started. :envvar:`GPU_TRACEFILE` specifies a file where to write the output instead of ``stdout`` -:envvar:`GPU_TRACE_FORMAT` - controls a format of the output - - ``txt`` - human readable text format - ``json`` - json format, suitable for parsing. Application should appropriately - finish its rendering in order for trace's json to be valid. - For Vulkan API it is expected to destroy the device, for GL it is - expected to destroy the context. - -:envvar:`GPU_TRACE_INSTRUMENT` - Meaningful only for Perfetto tracing. If set to ``1`` enables - instrumentation of GPU commands before the tracing is enabled. - :envvar:`*_GPU_TRACEPOINT` tracepoints can be enabled or disabled using driver specific environment variable. Most tracepoints are enabled by default. For instance diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index bcce1f06fcc..6d5c8aa5da7 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -1670,6 +1670,12 @@ tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts) return ts * (1000000000 / 19200000); } +struct u_trace_context * +tu_device_get_u_trace(struct tu_device *device) +{ + return &device->trace_context; +} + static void* tu_trace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size) { diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index 4f6b2a3c494..f7dab0d29e4 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -445,6 +445,9 @@ tu_device_lookup_bo(struct tu_device *device, uint32_t handle) return (struct tu_bo *) util_sparse_array_get(&device->bo_map, handle); } +struct u_trace_context * +tu_device_get_u_trace(struct tu_device *device); + /* Get a scratch bo for use inside a command buffer. This will always return * the same bo given the same size or similar sizes, so only one scratch bo * can be used at the same time. It's meant for short-lived things where we diff --git a/src/freedreno/vulkan/tu_perfetto.cc b/src/freedreno/vulkan/tu_perfetto.cc index 312fcf298ea..941dc23eee7 100644 --- a/src/freedreno/vulkan/tu_perfetto.cc +++ b/src/freedreno/vulkan/tu_perfetto.cc @@ -24,6 +24,8 @@ tu_device_get_suspend_count(struct tu_device *dev, uint64_t tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts); +struct u_trace_context * +tu_device_get_u_trace(struct tu_device *device); } /** @@ -410,7 +412,7 @@ void tu_perfetto_submit(struct tu_device *dev, uint32_t submission_id) { /* sync_timestamp isn't free */ - if (!ut_perfetto_enabled) + if (!u_trace_perfetto_active(tu_device_get_u_trace(dev))) return; sync_timestamp(dev); diff --git a/src/gallium/auxiliary/util/u_trace_gallium.c b/src/gallium/auxiliary/util/u_trace_gallium.c index ce034703a6f..18f707f4605 100644 --- a/src/gallium/auxiliary/util/u_trace_gallium.c +++ b/src/gallium/auxiliary/util/u_trace_gallium.c @@ -76,7 +76,7 @@ u_trace_pipe_context_init(struct u_trace_context *utctx, inline void trace_framebuffer_state(struct u_trace *ut, void *cs, const struct pipe_framebuffer_state *pfb) { - if (likely(!u_trace_instrument())) + if (likely(!u_trace_enabled(ut->utctx))) return; trace_framebuffer(ut, cs, pfb); diff --git a/src/util/perf/u_trace.c b/src/util/perf/u_trace.c index 276b64a01ef..88ba45b0a16 100644 --- a/src/util/perf/u_trace.c +++ b/src/util/perf/u_trace.c @@ -38,20 +38,16 @@ #define TIMESTAMP_BUF_SIZE 0x1000 #define TRACES_PER_CHUNK (TIMESTAMP_BUF_SIZE / sizeof(uint64_t)) -int _u_trace_instrument; - struct u_trace_state { util_once_flag once; FILE *trace_file; - bool trace_format_json; + enum u_trace_type enabled_traces; }; static struct u_trace_state u_trace_state = { .once = UTIL_ONCE_FLAG_INIT }; #ifdef HAVE_PERFETTO -int ut_perfetto_enabled; - /** * Global list of contexts, so we can defer starting the queue until * perfetto tracing is started. @@ -59,6 +55,8 @@ int ut_perfetto_enabled; static struct list_head ctx_list = { &ctx_list, &ctx_list }; static simple_mtx_t ctx_list_mutex = SIMPLE_MTX_INITIALIZER; +/* The amount of Perfetto tracers connected */ +int _u_trace_perfetto_count; #endif struct u_trace_payload_buf { @@ -361,10 +359,16 @@ get_chunk(struct u_trace *ut, size_t payload_size) return chunk; } -DEBUG_GET_ONCE_BOOL_OPTION(trace_instrument, "GPU_TRACE_INSTRUMENT", false) -DEBUG_GET_ONCE_BOOL_OPTION(trace, "GPU_TRACE", false) +static const struct debug_named_value config_control[] = { + { "print", U_TRACE_TYPE_PRINT, "Enable print"}, + { "print_json", U_TRACE_TYPE_PRINT_JSON, "Enable print in JSON"}, +#ifdef HAVE_PERFETTO + { "perfetto", U_TRACE_TYPE_PERFETTO_ENV, "Enable perfetto" }, +#endif + DEBUG_NAMED_VALUE_END +}; + DEBUG_GET_ONCE_OPTION(trace_file, "GPU_TRACEFILE", NULL) -DEBUG_GET_ONCE_OPTION(trace_format, "GPU_TRACE_FORMAT", "txt") static void trace_file_fini(void) @@ -376,6 +380,8 @@ trace_file_fini(void) static void u_trace_state_init_once(void) { + u_trace_state.enabled_traces = + debug_get_flags_option("GPU_TRACES", config_control, 0); const char *tracefile_name = debug_get_option_trace_file(); if (tracefile_name && !__check_suid()) { u_trace_state.trace_file = fopen(tracefile_name, "w"); @@ -383,15 +389,9 @@ u_trace_state_init_once(void) atexit(trace_file_fini); } } - if (!u_trace_state.trace_file && debug_get_option_trace()) { + if (!u_trace_state.trace_file) { u_trace_state.trace_file = stdout; } - - if (u_trace_state.trace_file || debug_get_option_trace_instrument()) - p_atomic_inc(&_u_trace_instrument); - - const char *trace_format = debug_get_option_trace_format(); - u_trace_state.trace_format_json = !strcmp(trace_format, "json"); } static void @@ -426,6 +426,7 @@ u_trace_context_init(struct u_trace_context *utctx, { u_trace_state_init(); + utctx->enabled_traces = u_trace_state.enabled_traces; utctx->pctx = pctx; utctx->create_timestamp_buffer = create_timestamp_buffer; utctx->delete_timestamp_buffer = delete_timestamp_buffer; @@ -442,31 +443,35 @@ u_trace_context_init(struct u_trace_context *utctx, list_inithead(&utctx->flushed_trace_chunks); - utctx->out = u_trace_state.trace_file; + if (utctx->enabled_traces & U_TRACE_TYPE_PRINT) { + utctx->out = u_trace_state.trace_file; - if (u_trace_state.trace_format_json) { - utctx->out_printer = &json_printer; + if (utctx->enabled_traces & U_TRACE_TYPE_JSON) { + utctx->out_printer = &json_printer; + } else { + utctx->out_printer = &txt_printer; + } } else { - utctx->out_printer = &txt_printer; + utctx->out = NULL; + utctx->out_printer = NULL; } #ifdef HAVE_PERFETTO simple_mtx_lock(&ctx_list_mutex); list_add(&utctx->node, &ctx_list); - simple_mtx_unlock(&ctx_list_mutex); -#endif + if (_u_trace_perfetto_count > 0) + utctx->enabled_traces |= U_TRACE_TYPE_PERFETTO_ACTIVE; - if (!u_trace_context_actively_tracing(utctx)) - return; - -#ifdef HAVE_PERFETTO - simple_mtx_lock(&ctx_list_mutex); -#endif queue_init(utctx); -#ifdef HAVE_PERFETTO + simple_mtx_unlock(&ctx_list_mutex); +#else + queue_init(utctx); #endif + if (!(p_atomic_read_relaxed(&utctx->enabled_traces) & U_TRACE_TYPE_REQUIRE_QUEUING)) + return; + if (utctx->out) { utctx->out_printer->start(utctx); } @@ -498,20 +503,33 @@ void u_trace_perfetto_start(void) { simple_mtx_lock(&ctx_list_mutex); - list_for_each_entry (struct u_trace_context, utctx, &ctx_list, node) - queue_init(utctx); - simple_mtx_unlock(&ctx_list_mutex); - if (p_atomic_inc_return(&ut_perfetto_enabled) == 1) - p_atomic_inc(&_u_trace_instrument); + list_for_each_entry(struct u_trace_context, utctx, &ctx_list, node) { + queue_init(utctx); + p_atomic_set(&utctx->enabled_traces, + utctx->enabled_traces | U_TRACE_TYPE_PERFETTO_ACTIVE); + } + + _u_trace_perfetto_count++; + + simple_mtx_unlock(&ctx_list_mutex); } void u_trace_perfetto_stop(void) { - assert(ut_perfetto_enabled > 0); - if (p_atomic_dec_return(&ut_perfetto_enabled) == 0) - p_atomic_dec(&_u_trace_instrument); + simple_mtx_lock(&ctx_list_mutex); + + assert(_u_trace_perfetto_count > 0); + _u_trace_perfetto_count--; + if (_u_trace_perfetto_count == 0) { + list_for_each_entry(struct u_trace_context, utctx, &ctx_list, node) { + p_atomic_set(&utctx->enabled_traces, + utctx->enabled_traces & ~U_TRACE_TYPE_PERFETTO_ACTIVE); + } + } + + simple_mtx_unlock(&ctx_list_mutex); } #endif @@ -564,7 +582,8 @@ process_chunk(void *job, void *gdata, int thread_index) utctx->out_printer->event(utctx, chunk, evt, ns, delta); } #ifdef HAVE_PERFETTO - if (evt->tp->perfetto) { + if (evt->tp->perfetto && + (p_atomic_read_relaxed(&utctx->enabled_traces) & U_TRACE_TYPE_PERFETTO_ACTIVE)) { evt->tp->perfetto(utctx->pctx, ns, chunk->flush_data, evt->payload); } #endif diff --git a/src/util/perf/u_trace.h b/src/util/perf/u_trace.h index a045f4b2355..1f9e99ee392 100644 --- a/src/util/perf/u_trace.h +++ b/src/util/perf/u_trace.h @@ -132,11 +132,33 @@ typedef uint64_t (*u_trace_read_ts)(struct u_trace_context *utctx, typedef void (*u_trace_delete_flush_data)(struct u_trace_context *utctx, void *flush_data); +enum u_trace_type { + U_TRACE_TYPE_PRINT = 1u << 0, + U_TRACE_TYPE_JSON = 1u << 1, + U_TRACE_TYPE_PERFETTO_ACTIVE = 1u << 2, + U_TRACE_TYPE_PERFETTO_ENV = 1u << 3, + + U_TRACE_TYPE_PRINT_JSON = U_TRACE_TYPE_PRINT | U_TRACE_TYPE_JSON, + U_TRACE_TYPE_PERFETTO = U_TRACE_TYPE_PERFETTO_ACTIVE | U_TRACE_TYPE_PERFETTO_ENV, + + /* + * A mask of traces that require appending to the tracepoint chunk list. + */ + U_TRACE_TYPE_REQUIRE_QUEUING = U_TRACE_TYPE_PRINT | U_TRACE_TYPE_PERFETTO, + /* + * A mask of traces that require processing the tracepoint chunk list. + */ + U_TRACE_TYPE_REQUIRE_PROCESSING = U_TRACE_TYPE_PRINT | U_TRACE_TYPE_PERFETTO_ACTIVE, +}; + /** * The trace context provides tracking for "in-flight" traces, once the * cmdstream that records timestamps has been flushed. */ struct u_trace_context { + /* All traces enabled in this context */ + enum u_trace_type enabled_traces; + void *pctx; u_trace_create_ts_buffer create_timestamp_buffer; @@ -273,29 +295,32 @@ void u_trace_disable_event_range(struct u_trace_iterator begin_it, void u_trace_flush(struct u_trace *ut, void *flush_data, bool free_data); #ifdef HAVE_PERFETTO -extern int ut_perfetto_enabled; +static ALWAYS_INLINE bool +u_trace_perfetto_active(struct u_trace_context* utctx) { + return p_atomic_read_relaxed(&utctx->enabled_traces) & U_TRACE_TYPE_PERFETTO_ACTIVE; +} void u_trace_perfetto_start(void); void u_trace_perfetto_stop(void); #else -# define ut_perfetto_enabled 0 +static ALWAYS_INLINE bool +u_trace_perfetto_active(UNUSED struct u_trace_context* utctx) { + return false; +} #endif /** - * Return whether instrumentations should be enabled or not. This is called - * from tracepoints. + * Return whether utrace is enabled at all or not, this can be used to + * gate any expensive traces. */ static ALWAYS_INLINE bool -u_trace_instrument(void) -{ - extern int _u_trace_instrument; - return p_atomic_read_relaxed(&_u_trace_instrument); +u_trace_enabled(struct u_trace_context *utctx) { + return p_atomic_read_relaxed(&utctx->enabled_traces) != 0; } -static inline bool -u_trace_context_actively_tracing(struct u_trace_context *utctx) -{ - return !!utctx->out || (ut_perfetto_enabled > 0); +static ALWAYS_INLINE bool +u_trace_context_actively_tracing(struct u_trace_context *utctx) { + return p_atomic_read_relaxed(&utctx->enabled_traces) & U_TRACE_TYPE_REQUIRE_PROCESSING; } #ifdef __cplusplus diff --git a/src/util/perf/u_trace.py b/src/util/perf/u_trace.py index bcdfcc7654c..bbcd942cc7b 100644 --- a/src/util/perf/u_trace.py +++ b/src/util/perf/u_trace.py @@ -236,6 +236,7 @@ void ${trace.tp_perfetto}( % endif void __trace_${trace_name}( struct u_trace *ut + , enum u_trace_type enabled_traces % if need_cs_param: , void *cs % endif @@ -252,11 +253,13 @@ static ALWAYS_INLINE void trace_${trace_name}( , ${arg.type} ${arg.var} % endfor ) { - if (!unlikely(u_trace_instrument() && + enum u_trace_type enabled_traces = p_atomic_read_relaxed(&ut->utctx->enabled_traces); + if (!unlikely(enabled_traces != 0 && ${trace.enabled_expr(trace_toggle_name)})) return; __trace_${trace_name}( ut + , enabled_traces % if need_cs_param: , cs % endif @@ -416,6 +419,7 @@ static const struct u_tracepoint __tp_${trace_name} = { }; void __trace_${trace_name}( struct u_trace *ut + , enum u_trace_type enabled_traces % if need_cs_param: , void *cs % endif @@ -423,11 +427,11 @@ void __trace_${trace_name}( , ${arg.type} ${arg.var} % endfor ) { - struct trace_${trace_name} *__entry = - (struct trace_${trace_name} *)u_trace_append(ut, ${cs_param_value + ","} &__tp_${trace_name}); - % if len(trace.tp_struct) == 0: - (void)__entry; - % endif + struct trace_${trace_name} entry; + UNUSED struct trace_${trace_name} *__entry = + enabled_traces & U_TRACE_TYPE_REQUIRE_QUEUING ? + (struct trace_${trace_name} *)u_trace_append(ut, ${cs_param_value + ","} &__tp_${trace_name}) : + &entry; % for arg in trace.tp_struct: __entry->${arg.name} = ${arg.var}; % endfor diff --git a/src/util/tests/perf/u_trace_test.cpp b/src/util/tests/perf/u_trace_test.cpp index decfe2cb7cb..6714983cdf7 100644 --- a/src/util/tests/perf/u_trace_test.cpp +++ b/src/util/tests/perf/u_trace_test.cpp @@ -10,7 +10,7 @@ static int test_thread(void *_state) { - struct u_trace_context ctx = { 0 }; + struct u_trace_context ctx = {}; u_trace_context_init(&ctx, NULL, NULL, NULL, NULL, NULL, NULL); u_trace_context_fini(&ctx);