tu/perfetto: Always emit submission event and time it

We previously missed submissions that don't sync timestamps.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31550>
This commit is contained in:
Danylo Piliaiev 2024-12-24 13:27:47 +01:00 committed by Marge Bot
parent e55de285cc
commit 5ae03b4aea
5 changed files with 57 additions and 27 deletions

View file

@ -805,6 +805,7 @@ msm_queue_submit(struct tu_queue *queue, void *_submit,
util_dynarray_num_elements(&submit->commands, struct drm_msm_gem_submit_cmd); util_dynarray_num_elements(&submit->commands, struct drm_msm_gem_submit_cmd);
#if HAVE_PERFETTO #if HAVE_PERFETTO
struct tu_perfetto_clocks clocks; struct tu_perfetto_clocks clocks;
uint64_t start_ts = tu_perfetto_begin_submit();
#endif #endif
uint32_t flags = MSM_PIPE_3D0; uint32_t flags = MSM_PIPE_3D0;
@ -904,7 +905,8 @@ msm_queue_submit(struct tu_queue *queue, void *_submit,
p_atomic_set(&queue->fence, req.fence); p_atomic_set(&queue->fence, req.fence);
#if HAVE_PERFETTO #if HAVE_PERFETTO
clocks = tu_perfetto_submit(queue->device, queue->device->submit_count, NULL); clocks = tu_perfetto_end_submit(queue, queue->device->submit_count,
start_ts, NULL);
gpu_offset = clocks.gpu_ts_offset; gpu_offset = clocks.gpu_ts_offset;
#endif #endif

View file

@ -881,6 +881,7 @@ virtio_queue_submit(struct tu_queue *queue, void *_submit,
struct vdrm_execbuf_params params; struct vdrm_execbuf_params params;
#if HAVE_PERFETTO #if HAVE_PERFETTO
struct tu_perfetto_clocks clocks; struct tu_perfetto_clocks clocks;
uint64_t start_ts = tu_perfetto_begin_submit();
#endif #endif
/* It would be nice to not need to defer this, but virtio_device_init() /* It would be nice to not need to defer this, but virtio_device_init()
@ -1024,7 +1025,8 @@ virtio_queue_submit(struct tu_queue *queue, void *_submit,
} }
#if HAVE_PERFETTO #if HAVE_PERFETTO
clocks = tu_perfetto_submit(queue->device, queue->device->submit_count, NULL); clocks = tu_perfetto_end_submit(queue, queue->device->submit_count,
start_ts, NULL);
gpu_offset = clocks.gpu_ts_offset; gpu_offset = clocks.gpu_ts_offset;
#endif #endif

View file

@ -1084,6 +1084,10 @@ kgsl_queue_submit(struct tu_queue *queue, void *_submit,
struct tu_kgsl_queue_submit *submit = struct tu_kgsl_queue_submit *submit =
(struct tu_kgsl_queue_submit *)_submit; (struct tu_kgsl_queue_submit *)_submit;
#if HAVE_PERFETTO
uint64_t start_ts = tu_perfetto_begin_submit();
#endif
if (submit->commands.size == 0) { if (submit->commands.size == 0) {
const struct kgsl_syncobj *wait_semaphores[wait_count + 1]; const struct kgsl_syncobj *wait_semaphores[wait_count + 1];
for (uint32_t i = 0; i < wait_count; i++) { for (uint32_t i = 0; i < wait_count; i++) {
@ -1257,7 +1261,8 @@ kgsl_queue_submit(struct tu_queue *queue, void *_submit,
.gpu_ts_offset = gpu_offset, .gpu_ts_offset = gpu_offset,
}; };
clocks = tu_perfetto_submit(queue->device, queue->device->submit_count, &clocks); clocks = tu_perfetto_end_submit(queue, queue->device->submit_count,
start_ts, &clocks);
gpu_offset = clocks.gpu_ts_offset; gpu_offset = clocks.gpu_ts_offset;
} }
#endif #endif

View file

@ -8,6 +8,7 @@
#include "tu_perfetto.h" #include "tu_perfetto.h"
#include "tu_buffer.h" #include "tu_buffer.h"
#include "tu_device.h" #include "tu_device.h"
#include "tu_queue.h"
#include "tu_image.h" #include "tu_image.h"
#include "util/hash_table.h" #include "util/hash_table.h"
@ -363,34 +364,21 @@ emit_sync_timestamp(uint64_t cpu_ts, uint64_t gpu_ts)
}); });
} }
static void uint64_t
emit_submit_id(uint32_t submission_id) tu_perfetto_begin_submit()
{ {
TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) { return perfetto::base::GetBootTimeNs().count();
auto packet = tctx.NewTracePacket();
packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
auto event = packet->set_vulkan_api_event();
auto submit = event->set_vk_queue_submit();
submit->set_submission_id(submission_id);
});
} }
struct tu_perfetto_clocks static struct tu_perfetto_clocks
tu_perfetto_submit(struct tu_device *dev, sync_clocks(struct tu_device *dev,
uint32_t submission_id, const struct tu_perfetto_clocks *gpu_clocks)
struct tu_perfetto_clocks *gpu_clocks)
{ {
struct tu_perfetto_clocks clocks {}; struct tu_perfetto_clocks clocks {};
if (gpu_clocks) { if (gpu_clocks) {
clocks = *gpu_clocks; clocks = *gpu_clocks;
} }
if (!u_trace_perfetto_active(tu_device_get_u_trace(dev)))
return {};
clocks.cpu = perfetto::base::GetBootTimeNs().count(); clocks.cpu = perfetto::base::GetBootTimeNs().count();
if (gpu_clocks) { if (gpu_clocks) {
@ -456,8 +444,36 @@ tu_perfetto_submit(struct tu_device *dev,
next_clock_sync_ns = clocks.cpu + 30000000; next_clock_sync_ns = clocks.cpu + 30000000;
} }
emit_sync_timestamp(clocks.cpu, clocks.gpu_ts + clocks.gpu_ts_offset); return clocks;
emit_submit_id(submission_id); }
struct tu_perfetto_clocks
tu_perfetto_end_submit(struct tu_queue *queue,
uint32_t submission_id,
uint64_t start_ts,
struct tu_perfetto_clocks *gpu_clocks)
{
struct tu_device *dev = queue->device;
if (!u_trace_perfetto_active(tu_device_get_u_trace(dev)))
return {};
struct tu_perfetto_clocks clocks = sync_clocks(dev, gpu_clocks);
if (clocks.gpu_ts > 0)
emit_sync_timestamp(clocks.cpu, clocks.gpu_ts + clocks.gpu_ts_offset);
TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
auto packet = tctx.NewTracePacket();
packet->set_timestamp(start_ts);
auto event = packet->set_vulkan_api_event();
auto submit = event->set_vk_queue_submit();
submit->set_duration_ns(clocks.cpu - start_ts);
submit->set_vk_queue((uintptr_t) queue);
submit->set_submission_id(submission_id);
});
return clocks; return clocks;
} }

View file

@ -18,6 +18,7 @@ extern "C" {
#define TU_PERFETTO_MAX_STACK_DEPTH 8 #define TU_PERFETTO_MAX_STACK_DEPTH 8
struct tu_device; struct tu_device;
struct tu_queue;
struct tu_u_trace_submission_data; struct tu_u_trace_submission_data;
struct tu_perfetto_stage { struct tu_perfetto_stage {
@ -46,10 +47,14 @@ struct tu_perfetto_clocks
uint64_t gpu_ts_offset; uint64_t gpu_ts_offset;
}; };
uint64_t
tu_perfetto_begin_submit();
struct tu_perfetto_clocks struct tu_perfetto_clocks
tu_perfetto_submit(struct tu_device *dev, tu_perfetto_end_submit(struct tu_queue *queue,
uint32_t submission_id, uint32_t submission_id,
struct tu_perfetto_clocks *clocks); uint64_t start_ts,
struct tu_perfetto_clocks *clocks);
void tu_perfetto_log_create_buffer(struct tu_device *dev, struct tu_buffer *buffer); void tu_perfetto_log_create_buffer(struct tu_device *dev, struct tu_buffer *buffer);
void tu_perfetto_log_bind_buffer(struct tu_device *dev, struct tu_buffer *buffer); void tu_perfetto_log_bind_buffer(struct tu_device *dev, struct tu_buffer *buffer);