From 18e820009d6258e91bc443b9fe7df4681e90324d Mon Sep 17 00:00:00 2001 From: Mark Collins Date: Wed, 19 Oct 2022 19:29:36 +0800 Subject: [PATCH] common/utrace: Refactor and combine all envvars into `GPU_TRACES` All environment variables involved in utrace usage were very fragmented and convoluted to decode the meaning of, this commit has simplified them down into easier to understand flags which directly indicate the resulting behavior (such as `perfetto` enabling queued logs rather than needing to set a `queued` flag) while combining them into a single envvar `GPU_TRACES` and updating existing terminology in utrace to match up with the new options. Signed-off-by: Mark Collins Reviewed-by: Danylo Piliaiev Reviewed-by: Yonggang Luo Ack-by: Chia-I Wu Part-of: --- docs/u_trace.rst | 33 ++++--- src/freedreno/vulkan/tu_device.c | 6 ++ src/freedreno/vulkan/tu_device.h | 3 + src/freedreno/vulkan/tu_perfetto.cc | 4 +- src/gallium/auxiliary/util/u_trace_gallium.c | 2 +- src/util/perf/u_trace.c | 93 ++++++++++++-------- src/util/perf/u_trace.h | 49 ++++++++--- src/util/perf/u_trace.py | 16 ++-- src/util/tests/perf/u_trace_test.cpp | 2 +- 9 files changed, 133 insertions(+), 75 deletions(-) diff --git a/docs/u_trace.rst b/docs/u_trace.rst index 7ddab17736c..a6e6fa951f9 100644 --- a/docs/u_trace.rst +++ b/docs/u_trace.rst @@ -21,27 +21,26 @@ Usage u_trace is controlled by environment variables: -:envvar:`GPU_TRACE` - if set to ``1`` enables tracing and outputs the data into ``stdout`` +:envvar:`GPU_TRACES` + controls whether u_trace is enabled and trace output + + ``print`` + prints in a human readable text format. It should be noted that this + is mutually exclusive with ``print_json`` and both cannot be enabled + at the same time. + ``print_json`` + prints in JSON format, suitable for parsing. Application should + appropriately finish its rendering in order for trace's json to be + valid. For the Vulkan API, it is expected to destroy the device, + for GL it's expected to destroy the context. + ``perfetto`` + enables perfetto instrumentation prior to connecting, perfetto + traces can be collected without setting this but it may miss some + events prior to the tracing session being started. :envvar:`GPU_TRACEFILE` specifies a file where to write the output instead of ``stdout`` -:envvar:`GPU_TRACE_FORMAT` - controls a format of the output - - ``txt`` - human readable text format - ``json`` - json format, suitable for parsing. Application should appropriately - finish its rendering in order for trace's json to be valid. - For Vulkan API it is expected to destroy the device, for GL it is - expected to destroy the context. - -:envvar:`GPU_TRACE_INSTRUMENT` - Meaningful only for Perfetto tracing. If set to ``1`` enables - instrumentation of GPU commands before the tracing is enabled. - :envvar:`*_GPU_TRACEPOINT` tracepoints can be enabled or disabled using driver specific environment variable. Most tracepoints are enabled by default. For instance diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index bcce1f06fcc..6d5c8aa5da7 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -1670,6 +1670,12 @@ tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts) return ts * (1000000000 / 19200000); } +struct u_trace_context * +tu_device_get_u_trace(struct tu_device *device) +{ + return &device->trace_context; +} + static void* tu_trace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size) { diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index 4f6b2a3c494..f7dab0d29e4 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -445,6 +445,9 @@ tu_device_lookup_bo(struct tu_device *device, uint32_t handle) return (struct tu_bo *) util_sparse_array_get(&device->bo_map, handle); } +struct u_trace_context * +tu_device_get_u_trace(struct tu_device *device); + /* Get a scratch bo for use inside a command buffer. This will always return * the same bo given the same size or similar sizes, so only one scratch bo * can be used at the same time. It's meant for short-lived things where we diff --git a/src/freedreno/vulkan/tu_perfetto.cc b/src/freedreno/vulkan/tu_perfetto.cc index 312fcf298ea..941dc23eee7 100644 --- a/src/freedreno/vulkan/tu_perfetto.cc +++ b/src/freedreno/vulkan/tu_perfetto.cc @@ -24,6 +24,8 @@ tu_device_get_suspend_count(struct tu_device *dev, uint64_t tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts); +struct u_trace_context * +tu_device_get_u_trace(struct tu_device *device); } /** @@ -410,7 +412,7 @@ void tu_perfetto_submit(struct tu_device *dev, uint32_t submission_id) { /* sync_timestamp isn't free */ - if (!ut_perfetto_enabled) + if (!u_trace_perfetto_active(tu_device_get_u_trace(dev))) return; sync_timestamp(dev); diff --git a/src/gallium/auxiliary/util/u_trace_gallium.c b/src/gallium/auxiliary/util/u_trace_gallium.c index ce034703a6f..18f707f4605 100644 --- a/src/gallium/auxiliary/util/u_trace_gallium.c +++ b/src/gallium/auxiliary/util/u_trace_gallium.c @@ -76,7 +76,7 @@ u_trace_pipe_context_init(struct u_trace_context *utctx, inline void trace_framebuffer_state(struct u_trace *ut, void *cs, const struct pipe_framebuffer_state *pfb) { - if (likely(!u_trace_instrument())) + if (likely(!u_trace_enabled(ut->utctx))) return; trace_framebuffer(ut, cs, pfb); diff --git a/src/util/perf/u_trace.c b/src/util/perf/u_trace.c index 276b64a01ef..88ba45b0a16 100644 --- a/src/util/perf/u_trace.c +++ b/src/util/perf/u_trace.c @@ -38,20 +38,16 @@ #define TIMESTAMP_BUF_SIZE 0x1000 #define TRACES_PER_CHUNK (TIMESTAMP_BUF_SIZE / sizeof(uint64_t)) -int _u_trace_instrument; - struct u_trace_state { util_once_flag once; FILE *trace_file; - bool trace_format_json; + enum u_trace_type enabled_traces; }; static struct u_trace_state u_trace_state = { .once = UTIL_ONCE_FLAG_INIT }; #ifdef HAVE_PERFETTO -int ut_perfetto_enabled; - /** * Global list of contexts, so we can defer starting the queue until * perfetto tracing is started. @@ -59,6 +55,8 @@ int ut_perfetto_enabled; static struct list_head ctx_list = { &ctx_list, &ctx_list }; static simple_mtx_t ctx_list_mutex = SIMPLE_MTX_INITIALIZER; +/* The amount of Perfetto tracers connected */ +int _u_trace_perfetto_count; #endif struct u_trace_payload_buf { @@ -361,10 +359,16 @@ get_chunk(struct u_trace *ut, size_t payload_size) return chunk; } -DEBUG_GET_ONCE_BOOL_OPTION(trace_instrument, "GPU_TRACE_INSTRUMENT", false) -DEBUG_GET_ONCE_BOOL_OPTION(trace, "GPU_TRACE", false) +static const struct debug_named_value config_control[] = { + { "print", U_TRACE_TYPE_PRINT, "Enable print"}, + { "print_json", U_TRACE_TYPE_PRINT_JSON, "Enable print in JSON"}, +#ifdef HAVE_PERFETTO + { "perfetto", U_TRACE_TYPE_PERFETTO_ENV, "Enable perfetto" }, +#endif + DEBUG_NAMED_VALUE_END +}; + DEBUG_GET_ONCE_OPTION(trace_file, "GPU_TRACEFILE", NULL) -DEBUG_GET_ONCE_OPTION(trace_format, "GPU_TRACE_FORMAT", "txt") static void trace_file_fini(void) @@ -376,6 +380,8 @@ trace_file_fini(void) static void u_trace_state_init_once(void) { + u_trace_state.enabled_traces = + debug_get_flags_option("GPU_TRACES", config_control, 0); const char *tracefile_name = debug_get_option_trace_file(); if (tracefile_name && !__check_suid()) { u_trace_state.trace_file = fopen(tracefile_name, "w"); @@ -383,15 +389,9 @@ u_trace_state_init_once(void) atexit(trace_file_fini); } } - if (!u_trace_state.trace_file && debug_get_option_trace()) { + if (!u_trace_state.trace_file) { u_trace_state.trace_file = stdout; } - - if (u_trace_state.trace_file || debug_get_option_trace_instrument()) - p_atomic_inc(&_u_trace_instrument); - - const char *trace_format = debug_get_option_trace_format(); - u_trace_state.trace_format_json = !strcmp(trace_format, "json"); } static void @@ -426,6 +426,7 @@ u_trace_context_init(struct u_trace_context *utctx, { u_trace_state_init(); + utctx->enabled_traces = u_trace_state.enabled_traces; utctx->pctx = pctx; utctx->create_timestamp_buffer = create_timestamp_buffer; utctx->delete_timestamp_buffer = delete_timestamp_buffer; @@ -442,31 +443,35 @@ u_trace_context_init(struct u_trace_context *utctx, list_inithead(&utctx->flushed_trace_chunks); - utctx->out = u_trace_state.trace_file; + if (utctx->enabled_traces & U_TRACE_TYPE_PRINT) { + utctx->out = u_trace_state.trace_file; - if (u_trace_state.trace_format_json) { - utctx->out_printer = &json_printer; + if (utctx->enabled_traces & U_TRACE_TYPE_JSON) { + utctx->out_printer = &json_printer; + } else { + utctx->out_printer = &txt_printer; + } } else { - utctx->out_printer = &txt_printer; + utctx->out = NULL; + utctx->out_printer = NULL; } #ifdef HAVE_PERFETTO simple_mtx_lock(&ctx_list_mutex); list_add(&utctx->node, &ctx_list); - simple_mtx_unlock(&ctx_list_mutex); -#endif + if (_u_trace_perfetto_count > 0) + utctx->enabled_traces |= U_TRACE_TYPE_PERFETTO_ACTIVE; - if (!u_trace_context_actively_tracing(utctx)) - return; - -#ifdef HAVE_PERFETTO - simple_mtx_lock(&ctx_list_mutex); -#endif queue_init(utctx); -#ifdef HAVE_PERFETTO + simple_mtx_unlock(&ctx_list_mutex); +#else + queue_init(utctx); #endif + if (!(p_atomic_read_relaxed(&utctx->enabled_traces) & U_TRACE_TYPE_REQUIRE_QUEUING)) + return; + if (utctx->out) { utctx->out_printer->start(utctx); } @@ -498,20 +503,33 @@ void u_trace_perfetto_start(void) { simple_mtx_lock(&ctx_list_mutex); - list_for_each_entry (struct u_trace_context, utctx, &ctx_list, node) - queue_init(utctx); - simple_mtx_unlock(&ctx_list_mutex); - if (p_atomic_inc_return(&ut_perfetto_enabled) == 1) - p_atomic_inc(&_u_trace_instrument); + list_for_each_entry(struct u_trace_context, utctx, &ctx_list, node) { + queue_init(utctx); + p_atomic_set(&utctx->enabled_traces, + utctx->enabled_traces | U_TRACE_TYPE_PERFETTO_ACTIVE); + } + + _u_trace_perfetto_count++; + + simple_mtx_unlock(&ctx_list_mutex); } void u_trace_perfetto_stop(void) { - assert(ut_perfetto_enabled > 0); - if (p_atomic_dec_return(&ut_perfetto_enabled) == 0) - p_atomic_dec(&_u_trace_instrument); + simple_mtx_lock(&ctx_list_mutex); + + assert(_u_trace_perfetto_count > 0); + _u_trace_perfetto_count--; + if (_u_trace_perfetto_count == 0) { + list_for_each_entry(struct u_trace_context, utctx, &ctx_list, node) { + p_atomic_set(&utctx->enabled_traces, + utctx->enabled_traces & ~U_TRACE_TYPE_PERFETTO_ACTIVE); + } + } + + simple_mtx_unlock(&ctx_list_mutex); } #endif @@ -564,7 +582,8 @@ process_chunk(void *job, void *gdata, int thread_index) utctx->out_printer->event(utctx, chunk, evt, ns, delta); } #ifdef HAVE_PERFETTO - if (evt->tp->perfetto) { + if (evt->tp->perfetto && + (p_atomic_read_relaxed(&utctx->enabled_traces) & U_TRACE_TYPE_PERFETTO_ACTIVE)) { evt->tp->perfetto(utctx->pctx, ns, chunk->flush_data, evt->payload); } #endif diff --git a/src/util/perf/u_trace.h b/src/util/perf/u_trace.h index a045f4b2355..1f9e99ee392 100644 --- a/src/util/perf/u_trace.h +++ b/src/util/perf/u_trace.h @@ -132,11 +132,33 @@ typedef uint64_t (*u_trace_read_ts)(struct u_trace_context *utctx, typedef void (*u_trace_delete_flush_data)(struct u_trace_context *utctx, void *flush_data); +enum u_trace_type { + U_TRACE_TYPE_PRINT = 1u << 0, + U_TRACE_TYPE_JSON = 1u << 1, + U_TRACE_TYPE_PERFETTO_ACTIVE = 1u << 2, + U_TRACE_TYPE_PERFETTO_ENV = 1u << 3, + + U_TRACE_TYPE_PRINT_JSON = U_TRACE_TYPE_PRINT | U_TRACE_TYPE_JSON, + U_TRACE_TYPE_PERFETTO = U_TRACE_TYPE_PERFETTO_ACTIVE | U_TRACE_TYPE_PERFETTO_ENV, + + /* + * A mask of traces that require appending to the tracepoint chunk list. + */ + U_TRACE_TYPE_REQUIRE_QUEUING = U_TRACE_TYPE_PRINT | U_TRACE_TYPE_PERFETTO, + /* + * A mask of traces that require processing the tracepoint chunk list. + */ + U_TRACE_TYPE_REQUIRE_PROCESSING = U_TRACE_TYPE_PRINT | U_TRACE_TYPE_PERFETTO_ACTIVE, +}; + /** * The trace context provides tracking for "in-flight" traces, once the * cmdstream that records timestamps has been flushed. */ struct u_trace_context { + /* All traces enabled in this context */ + enum u_trace_type enabled_traces; + void *pctx; u_trace_create_ts_buffer create_timestamp_buffer; @@ -273,29 +295,32 @@ void u_trace_disable_event_range(struct u_trace_iterator begin_it, void u_trace_flush(struct u_trace *ut, void *flush_data, bool free_data); #ifdef HAVE_PERFETTO -extern int ut_perfetto_enabled; +static ALWAYS_INLINE bool +u_trace_perfetto_active(struct u_trace_context* utctx) { + return p_atomic_read_relaxed(&utctx->enabled_traces) & U_TRACE_TYPE_PERFETTO_ACTIVE; +} void u_trace_perfetto_start(void); void u_trace_perfetto_stop(void); #else -# define ut_perfetto_enabled 0 +static ALWAYS_INLINE bool +u_trace_perfetto_active(UNUSED struct u_trace_context* utctx) { + return false; +} #endif /** - * Return whether instrumentations should be enabled or not. This is called - * from tracepoints. + * Return whether utrace is enabled at all or not, this can be used to + * gate any expensive traces. */ static ALWAYS_INLINE bool -u_trace_instrument(void) -{ - extern int _u_trace_instrument; - return p_atomic_read_relaxed(&_u_trace_instrument); +u_trace_enabled(struct u_trace_context *utctx) { + return p_atomic_read_relaxed(&utctx->enabled_traces) != 0; } -static inline bool -u_trace_context_actively_tracing(struct u_trace_context *utctx) -{ - return !!utctx->out || (ut_perfetto_enabled > 0); +static ALWAYS_INLINE bool +u_trace_context_actively_tracing(struct u_trace_context *utctx) { + return p_atomic_read_relaxed(&utctx->enabled_traces) & U_TRACE_TYPE_REQUIRE_PROCESSING; } #ifdef __cplusplus diff --git a/src/util/perf/u_trace.py b/src/util/perf/u_trace.py index bcdfcc7654c..bbcd942cc7b 100644 --- a/src/util/perf/u_trace.py +++ b/src/util/perf/u_trace.py @@ -236,6 +236,7 @@ void ${trace.tp_perfetto}( % endif void __trace_${trace_name}( struct u_trace *ut + , enum u_trace_type enabled_traces % if need_cs_param: , void *cs % endif @@ -252,11 +253,13 @@ static ALWAYS_INLINE void trace_${trace_name}( , ${arg.type} ${arg.var} % endfor ) { - if (!unlikely(u_trace_instrument() && + enum u_trace_type enabled_traces = p_atomic_read_relaxed(&ut->utctx->enabled_traces); + if (!unlikely(enabled_traces != 0 && ${trace.enabled_expr(trace_toggle_name)})) return; __trace_${trace_name}( ut + , enabled_traces % if need_cs_param: , cs % endif @@ -416,6 +419,7 @@ static const struct u_tracepoint __tp_${trace_name} = { }; void __trace_${trace_name}( struct u_trace *ut + , enum u_trace_type enabled_traces % if need_cs_param: , void *cs % endif @@ -423,11 +427,11 @@ void __trace_${trace_name}( , ${arg.type} ${arg.var} % endfor ) { - struct trace_${trace_name} *__entry = - (struct trace_${trace_name} *)u_trace_append(ut, ${cs_param_value + ","} &__tp_${trace_name}); - % if len(trace.tp_struct) == 0: - (void)__entry; - % endif + struct trace_${trace_name} entry; + UNUSED struct trace_${trace_name} *__entry = + enabled_traces & U_TRACE_TYPE_REQUIRE_QUEUING ? + (struct trace_${trace_name} *)u_trace_append(ut, ${cs_param_value + ","} &__tp_${trace_name}) : + &entry; % for arg in trace.tp_struct: __entry->${arg.name} = ${arg.var}; % endfor diff --git a/src/util/tests/perf/u_trace_test.cpp b/src/util/tests/perf/u_trace_test.cpp index decfe2cb7cb..6714983cdf7 100644 --- a/src/util/tests/perf/u_trace_test.cpp +++ b/src/util/tests/perf/u_trace_test.cpp @@ -10,7 +10,7 @@ static int test_thread(void *_state) { - struct u_trace_context ctx = { 0 }; + struct u_trace_context ctx = {}; u_trace_context_init(&ctx, NULL, NULL, NULL, NULL, NULL, NULL); u_trace_context_fini(&ctx);