panvk: add u_trace perfetto support

When perfetto is enabled, u_trace_context calls driver-defined callbacks
when processing trace events.  Those callbacks are expected to emit
perfetto trace packets.

Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32360>
This commit is contained in:
Chia-I Wu 2024-11-25 21:15:51 -08:00 committed by Marge Bot
parent 7c61e2346d
commit 576a87a590
8 changed files with 400 additions and 5 deletions

View file

@ -88,7 +88,7 @@ panvk_utrace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
void
panvk_per_arch(utrace_context_init)(struct panvk_device *dev)
{
u_trace_context_init(&dev->utrace.utctx, NULL, sizeof(uint64_t), 0,
u_trace_context_init(&dev->utrace.utctx, dev, sizeof(uint64_t), 0,
panvk_utrace_create_buffer, panvk_utrace_delete_buffer,
panvk_utrace_record_ts, panvk_utrace_read_ts, NULL,
NULL, panvk_utrace_delete_flush_data);

View file

@ -21,12 +21,15 @@ panvk_entrypoints = custom_target(
panvk_tracepoints = custom_target(
'panvk_tracepoints.[ch]',
input: 'panvk_tracepoints.py',
output: ['panvk_tracepoints.h', 'panvk_tracepoints.c'],
output: ['panvk_tracepoints.h',
'panvk_tracepoints_perfetto.h',
'panvk_tracepoints.c'],
command: [
prog_python, '@INPUT@',
'--import-path', join_paths(dir_source_root, 'src/util/perf/'),
'--utrace-hdr', '@OUTPUT0@',
'--utrace-src', '@OUTPUT1@',
'--perfetto-hdr', '@OUTPUT1@',
'--utrace-src', '@OUTPUT2@',
],
depend_files: u_trace_py,
)
@ -149,6 +152,11 @@ foreach arch : [6, 7, 10]
)
endforeach
if with_perfetto
panvk_deps += dep_perfetto
libpanvk_files += ['panvk_utrace_perfetto.cc']
endif
if with_platform_wayland
panvk_deps += dep_wayland_client
libpanvk_files += [wayland_drm_client_protocol_h, wayland_drm_protocol_c]

View file

@ -17,6 +17,7 @@
#include "panvk_mempool.h"
#include "panvk_meta.h"
#include "panvk_physical_device.h"
#include "panvk_utrace_perfetto.h"
#include "kmod/pan_kmod.h"
#include "util/pan_ir.h"
@ -64,6 +65,9 @@ struct panvk_device {
struct {
struct u_trace_context utctx;
#ifdef HAVE_PERFETTO
struct panvk_utrace_perfetto utp;
#endif
} utrace;
struct {

View file

@ -10,6 +10,7 @@ def parse_args():
parser.add_argument('-p', '--import-path', required=True)
parser.add_argument('--utrace-src', required=True)
parser.add_argument('--utrace-hdr', required=True)
parser.add_argument('--perfetto-hdr', required=True)
return parser.parse_args()
@ -20,20 +21,23 @@ from u_trace import ForwardDecl, Header, HeaderScope # noqa: E402
from u_trace import Tracepoint # noqa: E402
from u_trace import TracepointArg as Arg # noqa: E402
from u_trace import TracepointArgStruct as ArgStruct # noqa: E402
from u_trace import utrace_generate # noqa: E402
from u_trace import utrace_generate, utrace_generate_perfetto_utils # noqa: E402
Header('vulkan/vulkan_core.h', scope=HeaderScope.HEADER)
ForwardDecl('struct panvk_device')
def begin_end_tp(name, args=[], tp_struct=None):
Tracepoint(
f'begin_{name}',
tp_perfetto=f'panvk_utrace_perfetto_begin_{name}',
)
Tracepoint(
f'end_{name}',
args=args,
tp_struct=tp_struct,
tp_perfetto=f'panvk_utrace_perfetto_end_{name}',
)
@ -51,7 +55,13 @@ def define_tracepoints():
def generate_code():
utrace_generate(cpath=args.utrace_src, hpath=args.utrace_hdr, ctx_param=None)
utrace_generate(
cpath=args.utrace_src,
hpath=args.utrace_hdr,
ctx_param='struct panvk_device *dev',
)
utrace_generate_perfetto_utils(hpath=args.perfetto_hdr)
def main():

View file

@ -0,0 +1,295 @@
/*
* Copyright 2024 Google LLC
* SPDX-License-Identifier: MIT
*/
#include "panvk_utrace_perfetto.h"
#include <functional>
#include <perfetto.h>
#include "c11/threads.h"
#include "util/log.h"
#include "util/perf/u_perfetto.h"
#include "util/perf/u_perfetto_renderpass.h"
#include "util/timespec.h"
#include "util/u_process.h"
#include "panvk_device.h"
#include "panvk_tracepoints.h"
#include "panvk_tracepoints_perfetto.h"
#include "panvk_utrace.h"
struct PanVKRenderpassIncrementalState {
bool was_cleared = true;
};
struct PanVKRenderpassTraits : public perfetto::DefaultDataSourceTraits {
using IncrementalStateType = PanVKRenderpassIncrementalState;
};
class PanVKRenderpassDataSource
: public MesaRenderpassDataSource<PanVKRenderpassDataSource,
PanVKRenderpassTraits> {};
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(PanVKRenderpassDataSource);
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(PanVKRenderpassDataSource);
static const char *
get_stage_name(enum panvk_utrace_perfetto_stage stage)
{
switch (stage) {
#define CASE(x) \
case PANVK_UTRACE_PERFETTO_STAGE_##x: \
return #x
CASE(CMDBUF);
#undef CASE
default:
unreachable("bad stage");
}
}
static void
emit_interned_data_packet(struct panvk_device *dev,
PanVKRenderpassDataSource::TraceContext &ctx,
uint64_t now)
{
const struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
auto packet = ctx.NewTracePacket();
packet->set_timestamp(now);
packet->set_sequence_flags(
perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
auto interned_data = packet->set_interned_data();
for (uint32_t i = 0; i < ARRAY_SIZE(utp->queue_iids); i++) {
char name[64];
snprintf(name, sizeof(name), "%s-queue-%d", util_get_process_name(), i);
auto specs = interned_data->add_gpu_specifications();
specs->set_iid(utp->queue_iids[i]);
specs->set_name(name);
}
for (uint32_t i = 0; i < ARRAY_SIZE(utp->stage_iids); i++) {
auto specs = interned_data->add_gpu_specifications();
specs->set_iid(utp->stage_iids[i]);
specs->set_name(get_stage_name((enum panvk_utrace_perfetto_stage)i));
}
}
static uint64_t
get_gpu_time_ns(struct panvk_device *dev)
{
const struct panvk_physical_device *pdev =
to_panvk_physical_device(dev->vk.physical);
const struct pan_kmod_dev_props *props = &pdev->kmod.props;
const uint64_t ts = pan_kmod_query_timestamp(dev->kmod.dev);
return ts * NSEC_PER_SEC / props->timestamp_frequency;
}
static void
emit_clock_snapshot_packet(struct panvk_device *dev,
PanVKRenderpassDataSource::TraceContext &ctx)
{
const struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
const uint64_t gpu_ns = get_gpu_time_ns(dev);
const uint64_t cpu_ns = perfetto::base::GetBootTimeNs().count();
MesaRenderpassDataSource<PanVKRenderpassDataSource, PanVKRenderpassTraits>::
EmitClockSync(ctx, cpu_ns, gpu_ns, utp->gpu_clock_id);
}
static void
emit_setup_packets(struct panvk_device *dev,
PanVKRenderpassDataSource::TraceContext &ctx)
{
struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
const uint64_t now = perfetto::base::GetBootTimeNs().count();
/* emit interned data if cleared */
auto state = ctx.GetIncrementalState();
if (state->was_cleared) {
emit_interned_data_packet(dev, ctx, now);
state->was_cleared = false;
utp->next_clock_snapshot = 0;
}
/* emit clock snapshots periodically */
if (now >= utp->next_clock_snapshot) {
emit_clock_snapshot_packet(dev, ctx);
utp->next_clock_snapshot = now + NSEC_PER_SEC;
}
}
static struct panvk_utrace_perfetto_event *
begin_event(struct panvk_device *dev,
const struct panvk_utrace_flush_data *data,
enum panvk_utrace_perfetto_stage stage)
{
struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
struct panvk_utrace_perfetto_queue *queue = &utp->queues[data->subqueue];
struct panvk_utrace_perfetto_event *ev = &queue->stack[queue->stack_depth++];
assert(data->subqueue < PANVK_UTRACE_PERFETTO_QUEUE_COUNT);
if (queue->stack_depth > PANVK_UTRACE_PERFETTO_STACK_DEPTH) {
PERFETTO_ELOG("queue %d stage %d too deep", data->subqueue, stage);
return NULL;
}
ev->stage = stage;
return ev;
}
static struct panvk_utrace_perfetto_event *
end_event(struct panvk_device *dev, const struct panvk_utrace_flush_data *data,
enum panvk_utrace_perfetto_stage stage)
{
struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
struct panvk_utrace_perfetto_queue *queue = &utp->queues[data->subqueue];
assert(data->subqueue < PANVK_UTRACE_PERFETTO_QUEUE_COUNT);
if (!queue->stack_depth)
return NULL;
struct panvk_utrace_perfetto_event *ev = &queue->stack[--queue->stack_depth];
if (queue->stack_depth >= PANVK_UTRACE_PERFETTO_STACK_DEPTH)
return NULL;
assert(ev->stage == stage);
return ev;
}
static void
panvk_utrace_perfetto_begin_event(struct panvk_device *dev,
const struct panvk_utrace_flush_data *data,
enum panvk_utrace_perfetto_stage stage,
uint64_t ts_ns)
{
struct panvk_utrace_perfetto_event *ev = begin_event(dev, data, stage);
if (!ev)
return;
ev->begin_ns = ts_ns;
}
static void
panvk_utrace_perfetto_end_event(
struct panvk_device *dev, const struct panvk_utrace_flush_data *data,
enum panvk_utrace_perfetto_stage stage, uint64_t ts_ns,
std::function<void(perfetto::protos::pbzero::GpuRenderStageEvent *)>
emit_event_extra)
{
const struct panvk_utrace_perfetto_event *ev = end_event(dev, data, stage);
if (!ev)
return;
PanVKRenderpassDataSource::Trace(
[=](PanVKRenderpassDataSource::TraceContext ctx) {
struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
emit_setup_packets(dev, ctx);
auto packet = ctx.NewTracePacket();
packet->set_timestamp(ev->begin_ns);
packet->set_timestamp_clock_id(utp->gpu_clock_id);
auto event = packet->set_gpu_render_stage_event();
event->set_event_id(utp->event_id++);
event->set_duration(ts_ns - ev->begin_ns);
event->set_hw_queue_iid(utp->queue_iids[data->subqueue]);
event->set_stage_iid(utp->stage_iids[stage]);
event->set_context(utp->device_id);
emit_event_extra(event);
});
}
#define PANVK_UTRACE_PERFETTO_PROCESS_EVENT(tp, stage) \
void panvk_utrace_perfetto_begin_##tp( \
struct panvk_device *dev, uint64_t ts_ns, uint16_t tp_idx, \
const void *flush_data, const struct trace_begin_##tp *payload, \
const void *indirect_data) \
{ \
/* we can ignore them or save them if we choose to */ \
assert(!payload && !indirect_data); \
panvk_utrace_perfetto_begin_event( \
dev, (const struct panvk_utrace_flush_data *)flush_data, \
PANVK_UTRACE_PERFETTO_STAGE_##stage, ts_ns); \
} \
\
void panvk_utrace_perfetto_end_##tp( \
struct panvk_device *dev, uint64_t ts_ns, uint16_t tp_idx, \
const void *flush_data, const struct trace_end_##tp *payload, \
const void *indirect_data) \
{ \
auto emit_event_extra = \
[=](perfetto::protos::pbzero::GpuRenderStageEvent *event) { \
trace_payload_as_extra_end_##tp(event, payload, indirect_data); \
}; \
panvk_utrace_perfetto_end_event( \
dev, (const struct panvk_utrace_flush_data *)flush_data, \
PANVK_UTRACE_PERFETTO_STAGE_##stage, ts_ns, emit_event_extra); \
}
/* u_trace_context_process dispatches trace events to a background thread
* (traceq) for processing. These callbacks are called from traceq.
*/
PANVK_UTRACE_PERFETTO_PROCESS_EVENT(cmdbuf, CMDBUF)
static uint32_t
get_gpu_clock_id(void)
{
/* see https://perfetto.dev/docs/concepts/clock-sync */
return _mesa_hash_string("org.freedesktop.mesa.panfrost") | 0x80000000;
}
static void
register_data_source(void)
{
perfetto::DataSourceDescriptor dsd;
dsd.set_name("gpu.renderstages.panfrost");
PanVKRenderpassDataSource::Register(dsd);
}
void
panvk_utrace_perfetto_init(struct panvk_device *dev, uint32_t queue_count)
{
const struct panvk_physical_device *pdev =
to_panvk_physical_device(dev->vk.physical);
const struct pan_kmod_dev_props *props = &pdev->kmod.props;
struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
if (queue_count > PANVK_UTRACE_PERFETTO_QUEUE_COUNT) {
assert(!"PANVK_UTRACE_PERFETTO_QUEUE_COUNT too small");
return;
}
/* check for timestamp support */
if (!props->gpu_can_query_timestamp || !props->timestamp_frequency ||
!get_gpu_time_ns(dev)) {
mesa_logw("disable perfetto because gpu timestamp is missing");
return;
}
utp->gpu_clock_id = get_gpu_clock_id();
utp->device_id = (uintptr_t)dev;
uint64_t next_iid = 1;
for (uint32_t i = 0; i < ARRAY_SIZE(utp->queue_iids); i++)
utp->queue_iids[i] = next_iid++;
for (uint32_t i = 0; i < ARRAY_SIZE(utp->stage_iids); i++)
utp->stage_iids[i] = next_iid++;
util_perfetto_init();
static once_flag register_ds_once = ONCE_FLAG_INIT;
call_once(&register_ds_once, register_data_source);
}

View file

@ -0,0 +1,66 @@
/*
* Copyright 2024 Google LLC
* SPDX-License-Identifier: MIT
*/
#ifndef PANVK_UTRACE_PERFETTO_H
#define PANVK_UTRACE_PERFETTO_H
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
/* must be at least PANVK_SUBQUEUE_COUNT */
#define PANVK_UTRACE_PERFETTO_QUEUE_COUNT 3
#define PANVK_UTRACE_PERFETTO_STACK_DEPTH 8
struct panvk_device;
enum panvk_utrace_perfetto_stage {
PANVK_UTRACE_PERFETTO_STAGE_CMDBUF,
PANVK_UTRACE_PERFETTO_STAGE_COUNT,
};
struct panvk_utrace_perfetto_event {
enum panvk_utrace_perfetto_stage stage;
uint64_t begin_ns;
};
struct panvk_utrace_perfetto_queue {
struct panvk_utrace_perfetto_event stack[PANVK_UTRACE_PERFETTO_STACK_DEPTH];
uint32_t stack_depth;
};
struct panvk_utrace_perfetto {
uint32_t gpu_clock_id;
uint64_t device_id;
uint64_t queue_iids[PANVK_UTRACE_PERFETTO_QUEUE_COUNT];
uint64_t stage_iids[PANVK_UTRACE_PERFETTO_STAGE_COUNT];
uint64_t next_clock_snapshot;
uint64_t event_id;
struct panvk_utrace_perfetto_queue queues[PANVK_UTRACE_PERFETTO_QUEUE_COUNT];
};
#ifdef HAVE_PERFETTO
void panvk_utrace_perfetto_init(struct panvk_device *dev, uint32_t queue_count);
#else /* HAVE_PERFETTO */
static inline void
panvk_utrace_perfetto_init(struct panvk_device *dev, uint32_t queue_count)
{
}
#endif /* HAVE_PERFETTO */
#ifdef __cplusplus
}
#endif
#endif /* PANVK_UTRACE_PERFETTO_H */

View file

@ -24,6 +24,7 @@
#include "panvk_priv_bo.h"
#include "panvk_queue.h"
#include "panvk_utrace.h"
#include "panvk_utrace_perfetto.h"
#include "genxml/decode.h"
#include "genxml/gen_macros.h"
@ -357,6 +358,11 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
}
panvk_per_arch(utrace_context_init)(device);
#if PAN_ARCH >= 10
panvk_utrace_perfetto_init(device, PANVK_SUBQUEUE_COUNT);
#else
panvk_utrace_perfetto_init(device, 2);
#endif
*pDevice = panvk_device_to_handle(device);
return VK_SUCCESS;

View file

@ -58,6 +58,12 @@ data_sources {
}
}
data_sources {
config {
name: "gpu.renderstages.panfrost"
}
}
data_sources {
config {
name: "gpu.counters.panfrost"