From b2bf141b6af24575e90fb9953114c4bd80f24a4d Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 1 Sep 2023 10:12:39 +0300 Subject: [PATCH] perfetto/pps-producer: add optimized cpu/gpu timestamp correlation support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Intel Xe driver added the ability to do cpu/gpu timestamp correlation giving a much better alignment of timestamps (we use to have ~20us delta between the 2 samples, just because of the ioctl barrier potentially sneaking in some work). Signed-off-by: Lionel Landwerlin Reviewed-by: José Roberto de Souza Part-of: --- src/freedreno/ds/fd_pps_driver.cc | 7 +++++++ src/freedreno/ds/fd_pps_driver.h | 2 ++ src/intel/ds/intel_pps_driver.cc | 17 +++++++++++++++++ src/intel/ds/intel_pps_driver.h | 2 ++ src/panfrost/ds/pan_pps_driver.cpp | 7 +++++++ src/panfrost/ds/pan_pps_driver.h | 2 ++ src/tool/pps/pps_datasource.cc | 10 ++++++++-- src/tool/pps/pps_driver.h | 6 ++++++ 8 files changed, 51 insertions(+), 2 deletions(-) diff --git a/src/freedreno/ds/fd_pps_driver.cc b/src/freedreno/ds/fd_pps_driver.cc index 75654bd6fcd..2ec20b9592c 100644 --- a/src/freedreno/ds/fd_pps_driver.cc +++ b/src/freedreno/ds/fd_pps_driver.cc @@ -710,4 +710,11 @@ FreedrenoDriver::gpu_timestamp() const return perfetto::base::GetBootTimeNs().count(); } +bool +FreedrenoDriver::cpu_gpu_timestamp(uint64_t &, uint64_t &) const +{ + /* Not supported */ + return false; +} + } // namespace pps diff --git a/src/freedreno/ds/fd_pps_driver.h b/src/freedreno/ds/fd_pps_driver.h index 51749d0ce5b..2c1c686dde0 100644 --- a/src/freedreno/ds/fd_pps_driver.h +++ b/src/freedreno/ds/fd_pps_driver.h @@ -31,6 +31,8 @@ public: uint64_t next() override; uint32_t gpu_clock_id() const override; uint64_t gpu_timestamp() const override; + bool cpu_gpu_timestamp(uint64_t &cpu_timestamp, + uint64_t &gpu_timestamp) const override; private: struct fd_device *dev; diff --git a/src/intel/ds/intel_pps_driver.cc b/src/intel/ds/intel_pps_driver.cc index b1851b06d16..d434b05f64a 100644 --- a/src/intel/ds/intel_pps_driver.cc +++ b/src/intel/ds/intel_pps_driver.cc @@ -342,4 +342,21 @@ uint64_t IntelDriver::gpu_timestamp() const return intel_device_info_timebase_scale(&perf->devinfo, timestamp); } +bool IntelDriver::cpu_gpu_timestamp(uint64_t &cpu_timestamp, + uint64_t &gpu_timestamp) const +{ + if (!intel_gem_read_correlate_cpu_gpu_timestamp(drm_device.fd, + perf->devinfo.kmd_type, + INTEL_ENGINE_CLASS_RENDER, 0, + CLOCK_BOOTTIME, + &cpu_timestamp, + &gpu_timestamp, + NULL)) + return false; + + gpu_timestamp = + intel_device_info_timebase_scale(&perf->devinfo, gpu_timestamp); + return true; +} + } // namespace pps diff --git a/src/intel/ds/intel_pps_driver.h b/src/intel/ds/intel_pps_driver.h index fb02327c39f..15651c65d9b 100644 --- a/src/intel/ds/intel_pps_driver.h +++ b/src/intel/ds/intel_pps_driver.h @@ -49,6 +49,8 @@ class IntelDriver : public Driver uint64_t next() override; uint32_t gpu_clock_id() const override; uint64_t gpu_timestamp() const override; + bool cpu_gpu_timestamp(uint64_t &cpu_timestamp, + uint64_t &gpu_timestamp) const override; private: /// @brief Requests the next perf sample diff --git a/src/panfrost/ds/pan_pps_driver.cpp b/src/panfrost/ds/pan_pps_driver.cpp index 4ca44f4b2aa..7ac64049b0e 100644 --- a/src/panfrost/ds/pan_pps_driver.cpp +++ b/src/panfrost/ds/pan_pps_driver.cpp @@ -180,4 +180,11 @@ PanfrostDriver::gpu_timestamp() const return perfetto::base::GetBootTimeNs().count(); } +bool +PanfrostDriver::cpu_gpu_timestamp(uint64_t &, uint64_t &) const +{ + /* Not supported */ + return false; +} + } // namespace pps diff --git a/src/panfrost/ds/pan_pps_driver.h b/src/panfrost/ds/pan_pps_driver.h index 9392b9a5673..7b32d3b6af2 100644 --- a/src/panfrost/ds/pan_pps_driver.h +++ b/src/panfrost/ds/pan_pps_driver.h @@ -42,6 +42,8 @@ class PanfrostDriver : public Driver { uint64_t next() override; uint32_t gpu_clock_id() const override; uint64_t gpu_timestamp() const override; + bool cpu_gpu_timestamp(uint64_t &cpu_timestamp, + uint64_t &gpu_timestamp) const override; uint64_t last_dump_ts = 0; diff --git a/src/tool/pps/pps_datasource.cc b/src/tool/pps/pps_datasource.cc index 9242a403fdd..3adef8cf29c 100644 --- a/src/tool/pps/pps_datasource.cc +++ b/src/tool/pps/pps_datasource.cc @@ -245,8 +245,14 @@ void add_timestamp(perfetto::protos::pbzero::ClockSnapshot *event, const Driver return; // Send a correlation event between GPU & CPU timestamps - uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count(); - uint64_t gpu_ts = driver->gpu_timestamp(); + uint64_t cpu_ts, gpu_ts; + + // Try to use the optimized driver correlation if available, otherwise do a + // separate CPU & GPU sample + if (!driver->cpu_gpu_timestamp(cpu_ts, gpu_ts)) { + cpu_ts = perfetto::base::GetBootTimeNs().count(); + gpu_ts = driver->gpu_timestamp(); + } { auto clock = event->add_clocks(); diff --git a/src/tool/pps/pps_driver.h b/src/tool/pps/pps_driver.h index 05791cafcb1..b46c0f58bf6 100644 --- a/src/tool/pps/pps_driver.h +++ b/src/tool/pps/pps_driver.h @@ -84,6 +84,12 @@ class Driver /// Sample a timestamp from the GPU virtual uint64_t gpu_timestamp() const = 0; + /// Sample a timestamp from both the CPU & the GPU + /// + /// This is useful when the driver can do a better timestamp correlation + /// than sampling separately CPU & GPU timestamps. + virtual bool cpu_gpu_timestamp(uint64_t &cpu_timestamp, uint64_t &gpu_timestamp) const = 0; + DrmDevice drm_device; /// List of counter groups