mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
panvk: add u_trace perfetto support
When perfetto is enabled, u_trace_context calls driver-defined callbacks when processing trace events. Those callbacks are expected to emit perfetto trace packets. Signed-off-by: Chia-I Wu <olvaffe@gmail.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32360>
This commit is contained in:
parent
7c61e2346d
commit
576a87a590
8 changed files with 400 additions and 5 deletions
|
|
@ -88,7 +88,7 @@ panvk_utrace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
|
|||
void
|
||||
panvk_per_arch(utrace_context_init)(struct panvk_device *dev)
|
||||
{
|
||||
u_trace_context_init(&dev->utrace.utctx, NULL, sizeof(uint64_t), 0,
|
||||
u_trace_context_init(&dev->utrace.utctx, dev, sizeof(uint64_t), 0,
|
||||
panvk_utrace_create_buffer, panvk_utrace_delete_buffer,
|
||||
panvk_utrace_record_ts, panvk_utrace_read_ts, NULL,
|
||||
NULL, panvk_utrace_delete_flush_data);
|
||||
|
|
|
|||
|
|
@ -21,12 +21,15 @@ panvk_entrypoints = custom_target(
|
|||
panvk_tracepoints = custom_target(
|
||||
'panvk_tracepoints.[ch]',
|
||||
input: 'panvk_tracepoints.py',
|
||||
output: ['panvk_tracepoints.h', 'panvk_tracepoints.c'],
|
||||
output: ['panvk_tracepoints.h',
|
||||
'panvk_tracepoints_perfetto.h',
|
||||
'panvk_tracepoints.c'],
|
||||
command: [
|
||||
prog_python, '@INPUT@',
|
||||
'--import-path', join_paths(dir_source_root, 'src/util/perf/'),
|
||||
'--utrace-hdr', '@OUTPUT0@',
|
||||
'--utrace-src', '@OUTPUT1@',
|
||||
'--perfetto-hdr', '@OUTPUT1@',
|
||||
'--utrace-src', '@OUTPUT2@',
|
||||
],
|
||||
depend_files: u_trace_py,
|
||||
)
|
||||
|
|
@ -149,6 +152,11 @@ foreach arch : [6, 7, 10]
|
|||
)
|
||||
endforeach
|
||||
|
||||
if with_perfetto
|
||||
panvk_deps += dep_perfetto
|
||||
libpanvk_files += ['panvk_utrace_perfetto.cc']
|
||||
endif
|
||||
|
||||
if with_platform_wayland
|
||||
panvk_deps += dep_wayland_client
|
||||
libpanvk_files += [wayland_drm_client_protocol_h, wayland_drm_protocol_c]
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
#include "panvk_mempool.h"
|
||||
#include "panvk_meta.h"
|
||||
#include "panvk_physical_device.h"
|
||||
#include "panvk_utrace_perfetto.h"
|
||||
|
||||
#include "kmod/pan_kmod.h"
|
||||
#include "util/pan_ir.h"
|
||||
|
|
@ -64,6 +65,9 @@ struct panvk_device {
|
|||
|
||||
struct {
|
||||
struct u_trace_context utctx;
|
||||
#ifdef HAVE_PERFETTO
|
||||
struct panvk_utrace_perfetto utp;
|
||||
#endif
|
||||
} utrace;
|
||||
|
||||
struct {
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ def parse_args():
|
|||
parser.add_argument('-p', '--import-path', required=True)
|
||||
parser.add_argument('--utrace-src', required=True)
|
||||
parser.add_argument('--utrace-hdr', required=True)
|
||||
parser.add_argument('--perfetto-hdr', required=True)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
|
|
@ -20,20 +21,23 @@ from u_trace import ForwardDecl, Header, HeaderScope # noqa: E402
|
|||
from u_trace import Tracepoint # noqa: E402
|
||||
from u_trace import TracepointArg as Arg # noqa: E402
|
||||
from u_trace import TracepointArgStruct as ArgStruct # noqa: E402
|
||||
from u_trace import utrace_generate # noqa: E402
|
||||
from u_trace import utrace_generate, utrace_generate_perfetto_utils # noqa: E402
|
||||
|
||||
Header('vulkan/vulkan_core.h', scope=HeaderScope.HEADER)
|
||||
ForwardDecl('struct panvk_device')
|
||||
|
||||
|
||||
def begin_end_tp(name, args=[], tp_struct=None):
|
||||
Tracepoint(
|
||||
f'begin_{name}',
|
||||
tp_perfetto=f'panvk_utrace_perfetto_begin_{name}',
|
||||
)
|
||||
|
||||
Tracepoint(
|
||||
f'end_{name}',
|
||||
args=args,
|
||||
tp_struct=tp_struct,
|
||||
tp_perfetto=f'panvk_utrace_perfetto_end_{name}',
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -51,7 +55,13 @@ def define_tracepoints():
|
|||
|
||||
|
||||
def generate_code():
|
||||
utrace_generate(cpath=args.utrace_src, hpath=args.utrace_hdr, ctx_param=None)
|
||||
utrace_generate(
|
||||
cpath=args.utrace_src,
|
||||
hpath=args.utrace_hdr,
|
||||
ctx_param='struct panvk_device *dev',
|
||||
)
|
||||
|
||||
utrace_generate_perfetto_utils(hpath=args.perfetto_hdr)
|
||||
|
||||
|
||||
def main():
|
||||
|
|
|
|||
295
src/panfrost/vulkan/panvk_utrace_perfetto.cc
Normal file
295
src/panfrost/vulkan/panvk_utrace_perfetto.cc
Normal file
|
|
@ -0,0 +1,295 @@
|
|||
/*
|
||||
* Copyright 2024 Google LLC
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "panvk_utrace_perfetto.h"
|
||||
|
||||
#include <functional>
|
||||
#include <perfetto.h>
|
||||
|
||||
#include "c11/threads.h"
|
||||
#include "util/log.h"
|
||||
#include "util/perf/u_perfetto.h"
|
||||
#include "util/perf/u_perfetto_renderpass.h"
|
||||
#include "util/timespec.h"
|
||||
#include "util/u_process.h"
|
||||
|
||||
#include "panvk_device.h"
|
||||
#include "panvk_tracepoints.h"
|
||||
#include "panvk_tracepoints_perfetto.h"
|
||||
#include "panvk_utrace.h"
|
||||
|
||||
struct PanVKRenderpassIncrementalState {
|
||||
bool was_cleared = true;
|
||||
};
|
||||
|
||||
struct PanVKRenderpassTraits : public perfetto::DefaultDataSourceTraits {
|
||||
using IncrementalStateType = PanVKRenderpassIncrementalState;
|
||||
};
|
||||
|
||||
class PanVKRenderpassDataSource
|
||||
: public MesaRenderpassDataSource<PanVKRenderpassDataSource,
|
||||
PanVKRenderpassTraits> {};
|
||||
|
||||
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(PanVKRenderpassDataSource);
|
||||
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(PanVKRenderpassDataSource);
|
||||
|
||||
static const char *
|
||||
get_stage_name(enum panvk_utrace_perfetto_stage stage)
|
||||
{
|
||||
switch (stage) {
|
||||
#define CASE(x) \
|
||||
case PANVK_UTRACE_PERFETTO_STAGE_##x: \
|
||||
return #x
|
||||
CASE(CMDBUF);
|
||||
#undef CASE
|
||||
default:
|
||||
unreachable("bad stage");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_interned_data_packet(struct panvk_device *dev,
|
||||
PanVKRenderpassDataSource::TraceContext &ctx,
|
||||
uint64_t now)
|
||||
{
|
||||
const struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
|
||||
|
||||
auto packet = ctx.NewTracePacket();
|
||||
packet->set_timestamp(now);
|
||||
packet->set_sequence_flags(
|
||||
perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
|
||||
|
||||
auto interned_data = packet->set_interned_data();
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(utp->queue_iids); i++) {
|
||||
char name[64];
|
||||
snprintf(name, sizeof(name), "%s-queue-%d", util_get_process_name(), i);
|
||||
|
||||
auto specs = interned_data->add_gpu_specifications();
|
||||
specs->set_iid(utp->queue_iids[i]);
|
||||
specs->set_name(name);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(utp->stage_iids); i++) {
|
||||
auto specs = interned_data->add_gpu_specifications();
|
||||
specs->set_iid(utp->stage_iids[i]);
|
||||
specs->set_name(get_stage_name((enum panvk_utrace_perfetto_stage)i));
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
get_gpu_time_ns(struct panvk_device *dev)
|
||||
{
|
||||
const struct panvk_physical_device *pdev =
|
||||
to_panvk_physical_device(dev->vk.physical);
|
||||
const struct pan_kmod_dev_props *props = &pdev->kmod.props;
|
||||
|
||||
const uint64_t ts = pan_kmod_query_timestamp(dev->kmod.dev);
|
||||
return ts * NSEC_PER_SEC / props->timestamp_frequency;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_clock_snapshot_packet(struct panvk_device *dev,
|
||||
PanVKRenderpassDataSource::TraceContext &ctx)
|
||||
{
|
||||
const struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
|
||||
const uint64_t gpu_ns = get_gpu_time_ns(dev);
|
||||
const uint64_t cpu_ns = perfetto::base::GetBootTimeNs().count();
|
||||
|
||||
MesaRenderpassDataSource<PanVKRenderpassDataSource, PanVKRenderpassTraits>::
|
||||
EmitClockSync(ctx, cpu_ns, gpu_ns, utp->gpu_clock_id);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_setup_packets(struct panvk_device *dev,
|
||||
PanVKRenderpassDataSource::TraceContext &ctx)
|
||||
{
|
||||
struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
|
||||
|
||||
const uint64_t now = perfetto::base::GetBootTimeNs().count();
|
||||
|
||||
/* emit interned data if cleared */
|
||||
auto state = ctx.GetIncrementalState();
|
||||
if (state->was_cleared) {
|
||||
emit_interned_data_packet(dev, ctx, now);
|
||||
|
||||
state->was_cleared = false;
|
||||
utp->next_clock_snapshot = 0;
|
||||
}
|
||||
|
||||
/* emit clock snapshots periodically */
|
||||
if (now >= utp->next_clock_snapshot) {
|
||||
emit_clock_snapshot_packet(dev, ctx);
|
||||
|
||||
utp->next_clock_snapshot = now + NSEC_PER_SEC;
|
||||
}
|
||||
}
|
||||
|
||||
static struct panvk_utrace_perfetto_event *
|
||||
begin_event(struct panvk_device *dev,
|
||||
const struct panvk_utrace_flush_data *data,
|
||||
enum panvk_utrace_perfetto_stage stage)
|
||||
{
|
||||
struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
|
||||
struct panvk_utrace_perfetto_queue *queue = &utp->queues[data->subqueue];
|
||||
struct panvk_utrace_perfetto_event *ev = &queue->stack[queue->stack_depth++];
|
||||
|
||||
assert(data->subqueue < PANVK_UTRACE_PERFETTO_QUEUE_COUNT);
|
||||
|
||||
if (queue->stack_depth > PANVK_UTRACE_PERFETTO_STACK_DEPTH) {
|
||||
PERFETTO_ELOG("queue %d stage %d too deep", data->subqueue, stage);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ev->stage = stage;
|
||||
return ev;
|
||||
}
|
||||
|
||||
static struct panvk_utrace_perfetto_event *
|
||||
end_event(struct panvk_device *dev, const struct panvk_utrace_flush_data *data,
|
||||
enum panvk_utrace_perfetto_stage stage)
|
||||
{
|
||||
struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
|
||||
struct panvk_utrace_perfetto_queue *queue = &utp->queues[data->subqueue];
|
||||
|
||||
assert(data->subqueue < PANVK_UTRACE_PERFETTO_QUEUE_COUNT);
|
||||
|
||||
if (!queue->stack_depth)
|
||||
return NULL;
|
||||
|
||||
struct panvk_utrace_perfetto_event *ev = &queue->stack[--queue->stack_depth];
|
||||
if (queue->stack_depth >= PANVK_UTRACE_PERFETTO_STACK_DEPTH)
|
||||
return NULL;
|
||||
|
||||
assert(ev->stage == stage);
|
||||
return ev;
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_utrace_perfetto_begin_event(struct panvk_device *dev,
|
||||
const struct panvk_utrace_flush_data *data,
|
||||
enum panvk_utrace_perfetto_stage stage,
|
||||
uint64_t ts_ns)
|
||||
{
|
||||
struct panvk_utrace_perfetto_event *ev = begin_event(dev, data, stage);
|
||||
if (!ev)
|
||||
return;
|
||||
|
||||
ev->begin_ns = ts_ns;
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_utrace_perfetto_end_event(
|
||||
struct panvk_device *dev, const struct panvk_utrace_flush_data *data,
|
||||
enum panvk_utrace_perfetto_stage stage, uint64_t ts_ns,
|
||||
std::function<void(perfetto::protos::pbzero::GpuRenderStageEvent *)>
|
||||
emit_event_extra)
|
||||
{
|
||||
const struct panvk_utrace_perfetto_event *ev = end_event(dev, data, stage);
|
||||
if (!ev)
|
||||
return;
|
||||
|
||||
PanVKRenderpassDataSource::Trace(
|
||||
[=](PanVKRenderpassDataSource::TraceContext ctx) {
|
||||
struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
|
||||
|
||||
emit_setup_packets(dev, ctx);
|
||||
|
||||
auto packet = ctx.NewTracePacket();
|
||||
packet->set_timestamp(ev->begin_ns);
|
||||
packet->set_timestamp_clock_id(utp->gpu_clock_id);
|
||||
|
||||
auto event = packet->set_gpu_render_stage_event();
|
||||
event->set_event_id(utp->event_id++);
|
||||
event->set_duration(ts_ns - ev->begin_ns);
|
||||
event->set_hw_queue_iid(utp->queue_iids[data->subqueue]);
|
||||
event->set_stage_iid(utp->stage_iids[stage]);
|
||||
event->set_context(utp->device_id);
|
||||
|
||||
emit_event_extra(event);
|
||||
});
|
||||
}
|
||||
|
||||
#define PANVK_UTRACE_PERFETTO_PROCESS_EVENT(tp, stage) \
|
||||
void panvk_utrace_perfetto_begin_##tp( \
|
||||
struct panvk_device *dev, uint64_t ts_ns, uint16_t tp_idx, \
|
||||
const void *flush_data, const struct trace_begin_##tp *payload, \
|
||||
const void *indirect_data) \
|
||||
{ \
|
||||
/* we can ignore them or save them if we choose to */ \
|
||||
assert(!payload && !indirect_data); \
|
||||
panvk_utrace_perfetto_begin_event( \
|
||||
dev, (const struct panvk_utrace_flush_data *)flush_data, \
|
||||
PANVK_UTRACE_PERFETTO_STAGE_##stage, ts_ns); \
|
||||
} \
|
||||
\
|
||||
void panvk_utrace_perfetto_end_##tp( \
|
||||
struct panvk_device *dev, uint64_t ts_ns, uint16_t tp_idx, \
|
||||
const void *flush_data, const struct trace_end_##tp *payload, \
|
||||
const void *indirect_data) \
|
||||
{ \
|
||||
auto emit_event_extra = \
|
||||
[=](perfetto::protos::pbzero::GpuRenderStageEvent *event) { \
|
||||
trace_payload_as_extra_end_##tp(event, payload, indirect_data); \
|
||||
}; \
|
||||
panvk_utrace_perfetto_end_event( \
|
||||
dev, (const struct panvk_utrace_flush_data *)flush_data, \
|
||||
PANVK_UTRACE_PERFETTO_STAGE_##stage, ts_ns, emit_event_extra); \
|
||||
}
|
||||
|
||||
/* u_trace_context_process dispatches trace events to a background thread
|
||||
* (traceq) for processing. These callbacks are called from traceq.
|
||||
*/
|
||||
PANVK_UTRACE_PERFETTO_PROCESS_EVENT(cmdbuf, CMDBUF)
|
||||
|
||||
static uint32_t
|
||||
get_gpu_clock_id(void)
|
||||
{
|
||||
/* see https://perfetto.dev/docs/concepts/clock-sync */
|
||||
return _mesa_hash_string("org.freedesktop.mesa.panfrost") | 0x80000000;
|
||||
}
|
||||
|
||||
static void
|
||||
register_data_source(void)
|
||||
{
|
||||
perfetto::DataSourceDescriptor dsd;
|
||||
dsd.set_name("gpu.renderstages.panfrost");
|
||||
PanVKRenderpassDataSource::Register(dsd);
|
||||
}
|
||||
|
||||
void
|
||||
panvk_utrace_perfetto_init(struct panvk_device *dev, uint32_t queue_count)
|
||||
{
|
||||
const struct panvk_physical_device *pdev =
|
||||
to_panvk_physical_device(dev->vk.physical);
|
||||
const struct pan_kmod_dev_props *props = &pdev->kmod.props;
|
||||
struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
|
||||
|
||||
if (queue_count > PANVK_UTRACE_PERFETTO_QUEUE_COUNT) {
|
||||
assert(!"PANVK_UTRACE_PERFETTO_QUEUE_COUNT too small");
|
||||
return;
|
||||
}
|
||||
|
||||
/* check for timestamp support */
|
||||
if (!props->gpu_can_query_timestamp || !props->timestamp_frequency ||
|
||||
!get_gpu_time_ns(dev)) {
|
||||
mesa_logw("disable perfetto because gpu timestamp is missing");
|
||||
return;
|
||||
}
|
||||
|
||||
utp->gpu_clock_id = get_gpu_clock_id();
|
||||
utp->device_id = (uintptr_t)dev;
|
||||
|
||||
uint64_t next_iid = 1;
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(utp->queue_iids); i++)
|
||||
utp->queue_iids[i] = next_iid++;
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(utp->stage_iids); i++)
|
||||
utp->stage_iids[i] = next_iid++;
|
||||
|
||||
util_perfetto_init();
|
||||
|
||||
static once_flag register_ds_once = ONCE_FLAG_INIT;
|
||||
call_once(®ister_ds_once, register_data_source);
|
||||
}
|
||||
66
src/panfrost/vulkan/panvk_utrace_perfetto.h
Normal file
66
src/panfrost/vulkan/panvk_utrace_perfetto.h
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* Copyright 2024 Google LLC
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef PANVK_UTRACE_PERFETTO_H
|
||||
#define PANVK_UTRACE_PERFETTO_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* must be at least PANVK_SUBQUEUE_COUNT */
|
||||
#define PANVK_UTRACE_PERFETTO_QUEUE_COUNT 3
|
||||
#define PANVK_UTRACE_PERFETTO_STACK_DEPTH 8
|
||||
|
||||
struct panvk_device;
|
||||
|
||||
enum panvk_utrace_perfetto_stage {
|
||||
PANVK_UTRACE_PERFETTO_STAGE_CMDBUF,
|
||||
PANVK_UTRACE_PERFETTO_STAGE_COUNT,
|
||||
};
|
||||
|
||||
struct panvk_utrace_perfetto_event {
|
||||
enum panvk_utrace_perfetto_stage stage;
|
||||
uint64_t begin_ns;
|
||||
};
|
||||
|
||||
struct panvk_utrace_perfetto_queue {
|
||||
struct panvk_utrace_perfetto_event stack[PANVK_UTRACE_PERFETTO_STACK_DEPTH];
|
||||
uint32_t stack_depth;
|
||||
};
|
||||
|
||||
struct panvk_utrace_perfetto {
|
||||
uint32_t gpu_clock_id;
|
||||
uint64_t device_id;
|
||||
|
||||
uint64_t queue_iids[PANVK_UTRACE_PERFETTO_QUEUE_COUNT];
|
||||
uint64_t stage_iids[PANVK_UTRACE_PERFETTO_STAGE_COUNT];
|
||||
|
||||
uint64_t next_clock_snapshot;
|
||||
uint64_t event_id;
|
||||
|
||||
struct panvk_utrace_perfetto_queue queues[PANVK_UTRACE_PERFETTO_QUEUE_COUNT];
|
||||
};
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
|
||||
void panvk_utrace_perfetto_init(struct panvk_device *dev, uint32_t queue_count);
|
||||
|
||||
#else /* HAVE_PERFETTO */
|
||||
|
||||
static inline void
|
||||
panvk_utrace_perfetto_init(struct panvk_device *dev, uint32_t queue_count)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* HAVE_PERFETTO */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* PANVK_UTRACE_PERFETTO_H */
|
||||
|
|
@ -24,6 +24,7 @@
|
|||
#include "panvk_priv_bo.h"
|
||||
#include "panvk_queue.h"
|
||||
#include "panvk_utrace.h"
|
||||
#include "panvk_utrace_perfetto.h"
|
||||
|
||||
#include "genxml/decode.h"
|
||||
#include "genxml/gen_macros.h"
|
||||
|
|
@ -357,6 +358,11 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
|
|||
}
|
||||
|
||||
panvk_per_arch(utrace_context_init)(device);
|
||||
#if PAN_ARCH >= 10
|
||||
panvk_utrace_perfetto_init(device, PANVK_SUBQUEUE_COUNT);
|
||||
#else
|
||||
panvk_utrace_perfetto_init(device, 2);
|
||||
#endif
|
||||
|
||||
*pDevice = panvk_device_to_handle(device);
|
||||
return VK_SUCCESS;
|
||||
|
|
|
|||
|
|
@ -58,6 +58,12 @@ data_sources {
|
|||
}
|
||||
}
|
||||
|
||||
data_sources {
|
||||
config {
|
||||
name: "gpu.renderstages.panfrost"
|
||||
}
|
||||
}
|
||||
|
||||
data_sources {
|
||||
config {
|
||||
name: "gpu.counters.panfrost"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue