From 78d3b9cd0aca13779e8a166f155a671f4ebbc13e Mon Sep 17 00:00:00 2001 From: Olivia Lee Date: Fri, 4 Apr 2025 20:56:40 -0700 Subject: [PATCH] perfetto: allow specifying clock domain for cpu timestamps Everything is currently using CLOCK_BOOTTIME, which is perfetto's default, and matches the previous behavior. On some hardware, different clocks may be better synchronized with the gpu clock. Signed-off-by: Olivia Lee Reviewed-by: Eric R. Smith Reviewed-by: Mary Guillemard Part-of: --- src/freedreno/vulkan/tu_perfetto.cc | 4 +- .../drivers/freedreno/freedreno_perfetto.cc | 4 +- src/gallium/drivers/radeonsi/si_perfetto.cpp | 3 +- src/intel/ds/intel_driver_ds.cc | 4 +- src/panfrost/vulkan/panvk_utrace_perfetto.cc | 4 +- src/util/perf/u_perfetto.cc | 48 +++++++++++++++++-- src/util/perf/u_perfetto.h | 7 +++ src/util/perf/u_perfetto_renderpass.h | 7 ++- 8 files changed, 68 insertions(+), 13 deletions(-) diff --git a/src/freedreno/vulkan/tu_perfetto.cc b/src/freedreno/vulkan/tu_perfetto.cc index 6dbf2ae323d..454e7b1b50f 100644 --- a/src/freedreno/vulkan/tu_perfetto.cc +++ b/src/freedreno/vulkan/tu_perfetto.cc @@ -366,10 +366,12 @@ tu_perfetto_init(void) static void emit_sync_timestamp(uint64_t cpu_ts, uint64_t gpu_ts) { + uint32_t cpu_clock_id = perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME; TuRenderpassDataSource::Trace([=](auto tctx) { MesaRenderpassDataSource::EmitClockSync(tctx, cpu_ts, - gpu_ts, gpu_clock_id); + gpu_ts, cpu_clock_id, + gpu_clock_id); }); } diff --git a/src/gallium/drivers/freedreno/freedreno_perfetto.cc b/src/gallium/drivers/freedreno/freedreno_perfetto.cc index 6d77f8b15bf..55bd7d858bb 100644 --- a/src/gallium/drivers/freedreno/freedreno_perfetto.cc +++ b/src/gallium/drivers/freedreno/freedreno_perfetto.cc @@ -289,6 +289,7 @@ sync_timestamp(struct fd_context *ctx) } /* get cpu timestamp again because FD_TIMESTAMP can take >100us */ + uint32_t cpu_clock_id = perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME; cpu_ts = perfetto::base::GetBootTimeNs().count(); /* convert GPU ts into ns: */ @@ -297,7 +298,8 @@ sync_timestamp(struct fd_context *ctx) FdRenderpassDataSource::Trace([=](auto tctx) { MesaRenderpassDataSource::EmitClockSync(tctx, cpu_ts, - gpu_ts, gpu_clock_id); + gpu_ts, cpu_clock_id, + gpu_clock_id); }); sync_gpu_ts = gpu_ts; diff --git a/src/gallium/drivers/radeonsi/si_perfetto.cpp b/src/gallium/drivers/radeonsi/si_perfetto.cpp index 1f24b4ae9fa..4f4792e0cfc 100644 --- a/src/gallium/drivers/radeonsi/si_perfetto.cpp +++ b/src/gallium/drivers/radeonsi/si_perfetto.cpp @@ -69,6 +69,7 @@ static void sync_timestamp(SIRenderpassDataSource::TraceContext &ctx, struct si_ struct si_context *sctx = container_of(device, struct si_context, ds); gpu_ts = sctx->screen->b.get_timestamp(&sctx->screen->b); + uint32_t cpu_clock_id = perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME; cpu_ts = perfetto::base::GetBootTimeNs().count(); if (cpu_ts < device->next_clock_sync_ns) @@ -79,7 +80,7 @@ static void sync_timestamp(SIRenderpassDataSource::TraceContext &ctx, struct si_ device->sync_gpu_ts = gpu_ts; device->next_clock_sync_ns = cpu_ts + 1000000000ull; MesaRenderpassDataSource:: - EmitClockSync(ctx, cpu_ts, gpu_ts, device->gpu_clock_id); + EmitClockSync(ctx, cpu_ts, gpu_ts, cpu_clock_id, device->gpu_clock_id); } static void send_descriptors(SIRenderpassDataSource::TraceContext &ctx, diff --git a/src/intel/ds/intel_driver_ds.cc b/src/intel/ds/intel_driver_ds.cc index 152563257c3..c11160652fe 100644 --- a/src/intel/ds/intel_driver_ds.cc +++ b/src/intel/ds/intel_driver_ds.cc @@ -137,6 +137,8 @@ sync_timestamp(IntelRenderpassDataSource::TraceContext &ctx, intel_gem_read_render_timestamp(device->fd, device->info.kmd_type, &gpu_ts); } + + uint32_t cpu_clock_id = perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME; gpu_ts = intel_device_info_timebase_scale(&device->info, gpu_ts); if (cpu_ts < device->next_clock_sync_ns) @@ -148,7 +150,7 @@ sync_timestamp(IntelRenderpassDataSource::TraceContext &ctx, device->next_clock_sync_ns = cpu_ts + 1000000000ull; MesaRenderpassDataSource::EmitClockSync(ctx, - cpu_ts, gpu_ts, device->gpu_clock_id); + cpu_ts, gpu_ts, cpu_clock_id, device->gpu_clock_id); } static void diff --git a/src/panfrost/vulkan/panvk_utrace_perfetto.cc b/src/panfrost/vulkan/panvk_utrace_perfetto.cc index ffbdfe24faa..1f899c1c07d 100644 --- a/src/panfrost/vulkan/panvk_utrace_perfetto.cc +++ b/src/panfrost/vulkan/panvk_utrace_perfetto.cc @@ -99,10 +99,12 @@ emit_clock_snapshot_packet(struct panvk_device *dev, { const struct panvk_utrace_perfetto *utp = &dev->utrace.utp; const uint64_t gpu_ns = get_gpu_time_ns(dev); + const uint32_t cpu_clock_id = + perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME; const uint64_t cpu_ns = perfetto::base::GetBootTimeNs().count(); MesaRenderpassDataSource:: - EmitClockSync(ctx, cpu_ns, gpu_ns, utp->gpu_clock_id); + EmitClockSync(ctx, cpu_ns, gpu_ns, cpu_clock_id, utp->gpu_clock_id); } static void diff --git a/src/util/perf/u_perfetto.cc b/src/util/perf/u_perfetto.cc index e612b333991..d34d470780f 100644 --- a/src/util/perf/u_perfetto.cc +++ b/src/util/perf/u_perfetto.cc @@ -30,7 +30,9 @@ #endif #include "c11/threads.h" +#include "util/u_call_once.h" #include "util/macros.h" +#include "util/timespec.h" /* perfetto requires string literals */ #define UTIL_PERFETTO_CATEGORY_DEFAULT_STR "mesa.default" @@ -63,6 +65,10 @@ clockid_to_perfetto_clock(UNUSED perfetto_clock_id clock) #endif } +/* Default clock domain used for timestamps when not using the 'full' + * functions (which take an explicit timestamp and clock id). */ +static perfetto_clock_id util_perfetto_default_clock = CLOCK_BOOTTIME; + static void util_perfetto_update_tracing_state(void) { @@ -70,18 +76,47 @@ util_perfetto_update_tracing_state(void) TRACE_EVENT_CATEGORY_ENABLED(UTIL_PERFETTO_CATEGORY_DEFAULT_STR)); } +void +util_perfetto_set_default_clock(perfetto_clock_id clock) +{ + p_atomic_set(&util_perfetto_default_clock, clock); +} + +static perfetto_clock_id +util_perfetto_get_default_clock() +{ + return p_atomic_read_relaxed(&util_perfetto_default_clock); +} + +static perfetto::TraceTimestamp +util_perfetto_now(perfetto_clock_id clock) +{ + uint32_t perfetto_clock = clockid_to_perfetto_clock(clock); +#if DETECT_OS_POSIX + struct timespec time; + clock_gettime(clock, &time); + uint64_t timestamp = timespec_to_nsec(&time); +#else + uint64_t timestamp = perfetto::base::GetWallTimeRawNs().count(); +#endif + return perfetto::TraceTimestamp{perfetto_clock, timestamp}; +} + void util_perfetto_trace_begin(const char *name) { TRACE_EVENT_BEGIN( UTIL_PERFETTO_CATEGORY_DEFAULT_STR, nullptr, + util_perfetto_now(util_perfetto_get_default_clock()), [&](perfetto::EventContext ctx) { ctx.event()->set_name(name); }); } void util_perfetto_trace_end(void) { - TRACE_EVENT_END(UTIL_PERFETTO_CATEGORY_DEFAULT_STR); + TRACE_EVENT_END( + UTIL_PERFETTO_CATEGORY_DEFAULT_STR, + util_perfetto_now(util_perfetto_get_default_clock()) ); util_perfetto_update_tracing_state(); } @@ -90,7 +125,9 @@ void util_perfetto_trace_begin_flow(const char *fname, uint64_t id) { TRACE_EVENT_BEGIN( - UTIL_PERFETTO_CATEGORY_DEFAULT_STR, nullptr, perfetto::Flow::ProcessScoped(id), + UTIL_PERFETTO_CATEGORY_DEFAULT_STR, nullptr, + util_perfetto_now(util_perfetto_get_default_clock()), + perfetto::Flow::ProcessScoped(id), [&](perfetto::EventContext ctx) { ctx.event()->set_name(fname); }); } @@ -129,8 +166,11 @@ util_perfetto_trace_full_end(const char *name, uint64_t track_id, perfetto_clock void util_perfetto_counter_set(const char *name, double value) { - TRACE_COUNTER(UTIL_PERFETTO_CATEGORY_DEFAULT_STR, - perfetto::DynamicString(name), value); + TRACE_COUNTER( + UTIL_PERFETTO_CATEGORY_DEFAULT_STR, + perfetto::DynamicString(name), + util_perfetto_now(util_perfetto_get_default_clock()), + value); } uint64_t diff --git a/src/util/perf/u_perfetto.h b/src/util/perf/u_perfetto.h index eadf7062f2e..8029441623f 100644 --- a/src/util/perf/u_perfetto.h +++ b/src/util/perf/u_perfetto.h @@ -52,6 +52,8 @@ util_perfetto_is_tracing_enabled(void) return p_atomic_read_relaxed(&util_perfetto_tracing_state); } +void util_perfetto_set_default_clock(perfetto_clock_id default_clock); + void util_perfetto_trace_begin(const char *name); void util_perfetto_trace_end(void); @@ -81,6 +83,11 @@ util_perfetto_is_tracing_enabled(void) return false; } +static inline void +util_perfetto_set_default_clock(perfetto_clock_id clock) +{ +} + static inline void util_perfetto_trace_begin(const char *name) { diff --git a/src/util/perf/u_perfetto_renderpass.h b/src/util/perf/u_perfetto_renderpass.h index 1ad73d51892..2d4b5433fab 100644 --- a/src/util/perf/u_perfetto_renderpass.h +++ b/src/util/perf/u_perfetto_renderpass.h @@ -122,12 +122,12 @@ class MesaRenderpassDataSource static void EmitClockSync(TraceContext &ctx, uint64_t cpu_ts, uint64_t gpu_ts, + uint32_t cpu_clock_id, uint32_t gpu_clock_id) { auto packet = ctx.NewTracePacket(); - packet->set_timestamp_clock_id( - perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME); + packet->set_timestamp_clock_id(cpu_clock_id); packet->set_timestamp(cpu_ts); auto event = packet->set_clock_snapshot(); @@ -135,8 +135,7 @@ class MesaRenderpassDataSource { auto clock = event->add_clocks(); - clock->set_clock_id( - perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME); + clock->set_clock_id(cpu_clock_id); clock->set_timestamp(cpu_ts); }