tu/perfetto: Always emit submission event and time it

We previously missed submissions that don't sync timestamps.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31550>
This commit is contained in:
Danylo Piliaiev 2024-12-24 13:27:47 +01:00 committed by Marge Bot
parent e55de285cc
commit 5ae03b4aea
5 changed files with 57 additions and 27 deletions

View file

@ -805,6 +805,7 @@ msm_queue_submit(struct tu_queue *queue, void *_submit,
util_dynarray_num_elements(&submit->commands, struct drm_msm_gem_submit_cmd);
#if HAVE_PERFETTO
struct tu_perfetto_clocks clocks;
uint64_t start_ts = tu_perfetto_begin_submit();
#endif
uint32_t flags = MSM_PIPE_3D0;
@ -904,7 +905,8 @@ msm_queue_submit(struct tu_queue *queue, void *_submit,
p_atomic_set(&queue->fence, req.fence);
#if HAVE_PERFETTO
clocks = tu_perfetto_submit(queue->device, queue->device->submit_count, NULL);
clocks = tu_perfetto_end_submit(queue, queue->device->submit_count,
start_ts, NULL);
gpu_offset = clocks.gpu_ts_offset;
#endif

View file

@ -881,6 +881,7 @@ virtio_queue_submit(struct tu_queue *queue, void *_submit,
struct vdrm_execbuf_params params;
#if HAVE_PERFETTO
struct tu_perfetto_clocks clocks;
uint64_t start_ts = tu_perfetto_begin_submit();
#endif
/* It would be nice to not need to defer this, but virtio_device_init()
@ -1024,7 +1025,8 @@ virtio_queue_submit(struct tu_queue *queue, void *_submit,
}
#if HAVE_PERFETTO
clocks = tu_perfetto_submit(queue->device, queue->device->submit_count, NULL);
clocks = tu_perfetto_end_submit(queue, queue->device->submit_count,
start_ts, NULL);
gpu_offset = clocks.gpu_ts_offset;
#endif

View file

@ -1084,6 +1084,10 @@ kgsl_queue_submit(struct tu_queue *queue, void *_submit,
struct tu_kgsl_queue_submit *submit =
(struct tu_kgsl_queue_submit *)_submit;
#if HAVE_PERFETTO
uint64_t start_ts = tu_perfetto_begin_submit();
#endif
if (submit->commands.size == 0) {
const struct kgsl_syncobj *wait_semaphores[wait_count + 1];
for (uint32_t i = 0; i < wait_count; i++) {
@ -1257,7 +1261,8 @@ kgsl_queue_submit(struct tu_queue *queue, void *_submit,
.gpu_ts_offset = gpu_offset,
};
clocks = tu_perfetto_submit(queue->device, queue->device->submit_count, &clocks);
clocks = tu_perfetto_end_submit(queue, queue->device->submit_count,
start_ts, &clocks);
gpu_offset = clocks.gpu_ts_offset;
}
#endif

View file

@ -8,6 +8,7 @@
#include "tu_perfetto.h"
#include "tu_buffer.h"
#include "tu_device.h"
#include "tu_queue.h"
#include "tu_image.h"
#include "util/hash_table.h"
@ -363,34 +364,21 @@ emit_sync_timestamp(uint64_t cpu_ts, uint64_t gpu_ts)
});
}
static void
emit_submit_id(uint32_t submission_id)
uint64_t
tu_perfetto_begin_submit()
{
TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
auto packet = tctx.NewTracePacket();
packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
auto event = packet->set_vulkan_api_event();
auto submit = event->set_vk_queue_submit();
submit->set_submission_id(submission_id);
});
return perfetto::base::GetBootTimeNs().count();
}
struct tu_perfetto_clocks
tu_perfetto_submit(struct tu_device *dev,
uint32_t submission_id,
struct tu_perfetto_clocks *gpu_clocks)
static struct tu_perfetto_clocks
sync_clocks(struct tu_device *dev,
const struct tu_perfetto_clocks *gpu_clocks)
{
struct tu_perfetto_clocks clocks {};
if (gpu_clocks) {
clocks = *gpu_clocks;
}
if (!u_trace_perfetto_active(tu_device_get_u_trace(dev)))
return {};
clocks.cpu = perfetto::base::GetBootTimeNs().count();
if (gpu_clocks) {
@ -456,8 +444,36 @@ tu_perfetto_submit(struct tu_device *dev,
next_clock_sync_ns = clocks.cpu + 30000000;
}
emit_sync_timestamp(clocks.cpu, clocks.gpu_ts + clocks.gpu_ts_offset);
emit_submit_id(submission_id);
return clocks;
}
struct tu_perfetto_clocks
tu_perfetto_end_submit(struct tu_queue *queue,
uint32_t submission_id,
uint64_t start_ts,
struct tu_perfetto_clocks *gpu_clocks)
{
struct tu_device *dev = queue->device;
if (!u_trace_perfetto_active(tu_device_get_u_trace(dev)))
return {};
struct tu_perfetto_clocks clocks = sync_clocks(dev, gpu_clocks);
if (clocks.gpu_ts > 0)
emit_sync_timestamp(clocks.cpu, clocks.gpu_ts + clocks.gpu_ts_offset);
TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
auto packet = tctx.NewTracePacket();
packet->set_timestamp(start_ts);
auto event = packet->set_vulkan_api_event();
auto submit = event->set_vk_queue_submit();
submit->set_duration_ns(clocks.cpu - start_ts);
submit->set_vk_queue((uintptr_t) queue);
submit->set_submission_id(submission_id);
});
return clocks;
}

View file

@ -18,6 +18,7 @@ extern "C" {
#define TU_PERFETTO_MAX_STACK_DEPTH 8
struct tu_device;
struct tu_queue;
struct tu_u_trace_submission_data;
struct tu_perfetto_stage {
@ -46,10 +47,14 @@ struct tu_perfetto_clocks
uint64_t gpu_ts_offset;
};
uint64_t
tu_perfetto_begin_submit();
struct tu_perfetto_clocks
tu_perfetto_submit(struct tu_device *dev,
uint32_t submission_id,
struct tu_perfetto_clocks *clocks);
tu_perfetto_end_submit(struct tu_queue *queue,
uint32_t submission_id,
uint64_t start_ts,
struct tu_perfetto_clocks *clocks);
void tu_perfetto_log_create_buffer(struct tu_device *dev, struct tu_buffer *buffer);
void tu_perfetto_log_bind_buffer(struct tu_device *dev, struct tu_buffer *buffer);