diff --git a/src/panfrost/vulkan/csf/panvk_vX_utrace.c b/src/panfrost/vulkan/csf/panvk_vX_utrace.c index fa1b5e7223f..d84870942e0 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_utrace.c +++ b/src/panfrost/vulkan/csf/panvk_vX_utrace.c @@ -88,7 +88,7 @@ panvk_utrace_record_ts(struct u_trace *ut, void *cs, void *timestamps, void panvk_per_arch(utrace_context_init)(struct panvk_device *dev) { - u_trace_context_init(&dev->utrace.utctx, NULL, sizeof(uint64_t), 0, + u_trace_context_init(&dev->utrace.utctx, dev, sizeof(uint64_t), 0, panvk_utrace_create_buffer, panvk_utrace_delete_buffer, panvk_utrace_record_ts, panvk_utrace_read_ts, NULL, NULL, panvk_utrace_delete_flush_data); diff --git a/src/panfrost/vulkan/meson.build b/src/panfrost/vulkan/meson.build index dde8a194622..98797b706e1 100644 --- a/src/panfrost/vulkan/meson.build +++ b/src/panfrost/vulkan/meson.build @@ -21,12 +21,15 @@ panvk_entrypoints = custom_target( panvk_tracepoints = custom_target( 'panvk_tracepoints.[ch]', input: 'panvk_tracepoints.py', - output: ['panvk_tracepoints.h', 'panvk_tracepoints.c'], + output: ['panvk_tracepoints.h', + 'panvk_tracepoints_perfetto.h', + 'panvk_tracepoints.c'], command: [ prog_python, '@INPUT@', '--import-path', join_paths(dir_source_root, 'src/util/perf/'), '--utrace-hdr', '@OUTPUT0@', - '--utrace-src', '@OUTPUT1@', + '--perfetto-hdr', '@OUTPUT1@', + '--utrace-src', '@OUTPUT2@', ], depend_files: u_trace_py, ) @@ -149,6 +152,11 @@ foreach arch : [6, 7, 10] ) endforeach +if with_perfetto + panvk_deps += dep_perfetto + libpanvk_files += ['panvk_utrace_perfetto.cc'] +endif + if with_platform_wayland panvk_deps += dep_wayland_client libpanvk_files += [wayland_drm_client_protocol_h, wayland_drm_protocol_c] diff --git a/src/panfrost/vulkan/panvk_device.h b/src/panfrost/vulkan/panvk_device.h index 2b127be9fdc..ddca52b1638 100644 --- a/src/panfrost/vulkan/panvk_device.h +++ b/src/panfrost/vulkan/panvk_device.h @@ -17,6 +17,7 @@ #include "panvk_mempool.h" #include "panvk_meta.h" #include "panvk_physical_device.h" +#include "panvk_utrace_perfetto.h" #include "kmod/pan_kmod.h" #include "util/pan_ir.h" @@ -64,6 +65,9 @@ struct panvk_device { struct { struct u_trace_context utctx; +#ifdef HAVE_PERFETTO + struct panvk_utrace_perfetto utp; +#endif } utrace; struct { diff --git a/src/panfrost/vulkan/panvk_tracepoints.py b/src/panfrost/vulkan/panvk_tracepoints.py index 7a55adc1d22..1225beee5ce 100644 --- a/src/panfrost/vulkan/panvk_tracepoints.py +++ b/src/panfrost/vulkan/panvk_tracepoints.py @@ -10,6 +10,7 @@ def parse_args(): parser.add_argument('-p', '--import-path', required=True) parser.add_argument('--utrace-src', required=True) parser.add_argument('--utrace-hdr', required=True) + parser.add_argument('--perfetto-hdr', required=True) return parser.parse_args() @@ -20,20 +21,23 @@ from u_trace import ForwardDecl, Header, HeaderScope # noqa: E402 from u_trace import Tracepoint # noqa: E402 from u_trace import TracepointArg as Arg # noqa: E402 from u_trace import TracepointArgStruct as ArgStruct # noqa: E402 -from u_trace import utrace_generate # noqa: E402 +from u_trace import utrace_generate, utrace_generate_perfetto_utils # noqa: E402 Header('vulkan/vulkan_core.h', scope=HeaderScope.HEADER) +ForwardDecl('struct panvk_device') def begin_end_tp(name, args=[], tp_struct=None): Tracepoint( f'begin_{name}', + tp_perfetto=f'panvk_utrace_perfetto_begin_{name}', ) Tracepoint( f'end_{name}', args=args, tp_struct=tp_struct, + tp_perfetto=f'panvk_utrace_perfetto_end_{name}', ) @@ -51,7 +55,13 @@ def define_tracepoints(): def generate_code(): - utrace_generate(cpath=args.utrace_src, hpath=args.utrace_hdr, ctx_param=None) + utrace_generate( + cpath=args.utrace_src, + hpath=args.utrace_hdr, + ctx_param='struct panvk_device *dev', + ) + + utrace_generate_perfetto_utils(hpath=args.perfetto_hdr) def main(): diff --git a/src/panfrost/vulkan/panvk_utrace_perfetto.cc b/src/panfrost/vulkan/panvk_utrace_perfetto.cc new file mode 100644 index 00000000000..dd219afecfd --- /dev/null +++ b/src/panfrost/vulkan/panvk_utrace_perfetto.cc @@ -0,0 +1,295 @@ +/* + * Copyright 2024 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "panvk_utrace_perfetto.h" + +#include +#include + +#include "c11/threads.h" +#include "util/log.h" +#include "util/perf/u_perfetto.h" +#include "util/perf/u_perfetto_renderpass.h" +#include "util/timespec.h" +#include "util/u_process.h" + +#include "panvk_device.h" +#include "panvk_tracepoints.h" +#include "panvk_tracepoints_perfetto.h" +#include "panvk_utrace.h" + +struct PanVKRenderpassIncrementalState { + bool was_cleared = true; +}; + +struct PanVKRenderpassTraits : public perfetto::DefaultDataSourceTraits { + using IncrementalStateType = PanVKRenderpassIncrementalState; +}; + +class PanVKRenderpassDataSource + : public MesaRenderpassDataSource {}; + +PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(PanVKRenderpassDataSource); +PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(PanVKRenderpassDataSource); + +static const char * +get_stage_name(enum panvk_utrace_perfetto_stage stage) +{ + switch (stage) { +#define CASE(x) \ + case PANVK_UTRACE_PERFETTO_STAGE_##x: \ + return #x + CASE(CMDBUF); +#undef CASE + default: + unreachable("bad stage"); + } +} + +static void +emit_interned_data_packet(struct panvk_device *dev, + PanVKRenderpassDataSource::TraceContext &ctx, + uint64_t now) +{ + const struct panvk_utrace_perfetto *utp = &dev->utrace.utp; + + auto packet = ctx.NewTracePacket(); + packet->set_timestamp(now); + packet->set_sequence_flags( + perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED); + + auto interned_data = packet->set_interned_data(); + + for (uint32_t i = 0; i < ARRAY_SIZE(utp->queue_iids); i++) { + char name[64]; + snprintf(name, sizeof(name), "%s-queue-%d", util_get_process_name(), i); + + auto specs = interned_data->add_gpu_specifications(); + specs->set_iid(utp->queue_iids[i]); + specs->set_name(name); + } + + for (uint32_t i = 0; i < ARRAY_SIZE(utp->stage_iids); i++) { + auto specs = interned_data->add_gpu_specifications(); + specs->set_iid(utp->stage_iids[i]); + specs->set_name(get_stage_name((enum panvk_utrace_perfetto_stage)i)); + } +} + +static uint64_t +get_gpu_time_ns(struct panvk_device *dev) +{ + const struct panvk_physical_device *pdev = + to_panvk_physical_device(dev->vk.physical); + const struct pan_kmod_dev_props *props = &pdev->kmod.props; + + const uint64_t ts = pan_kmod_query_timestamp(dev->kmod.dev); + return ts * NSEC_PER_SEC / props->timestamp_frequency; +} + +static void +emit_clock_snapshot_packet(struct panvk_device *dev, + PanVKRenderpassDataSource::TraceContext &ctx) +{ + const struct panvk_utrace_perfetto *utp = &dev->utrace.utp; + const uint64_t gpu_ns = get_gpu_time_ns(dev); + const uint64_t cpu_ns = perfetto::base::GetBootTimeNs().count(); + + MesaRenderpassDataSource:: + EmitClockSync(ctx, cpu_ns, gpu_ns, utp->gpu_clock_id); +} + +static void +emit_setup_packets(struct panvk_device *dev, + PanVKRenderpassDataSource::TraceContext &ctx) +{ + struct panvk_utrace_perfetto *utp = &dev->utrace.utp; + + const uint64_t now = perfetto::base::GetBootTimeNs().count(); + + /* emit interned data if cleared */ + auto state = ctx.GetIncrementalState(); + if (state->was_cleared) { + emit_interned_data_packet(dev, ctx, now); + + state->was_cleared = false; + utp->next_clock_snapshot = 0; + } + + /* emit clock snapshots periodically */ + if (now >= utp->next_clock_snapshot) { + emit_clock_snapshot_packet(dev, ctx); + + utp->next_clock_snapshot = now + NSEC_PER_SEC; + } +} + +static struct panvk_utrace_perfetto_event * +begin_event(struct panvk_device *dev, + const struct panvk_utrace_flush_data *data, + enum panvk_utrace_perfetto_stage stage) +{ + struct panvk_utrace_perfetto *utp = &dev->utrace.utp; + struct panvk_utrace_perfetto_queue *queue = &utp->queues[data->subqueue]; + struct panvk_utrace_perfetto_event *ev = &queue->stack[queue->stack_depth++]; + + assert(data->subqueue < PANVK_UTRACE_PERFETTO_QUEUE_COUNT); + + if (queue->stack_depth > PANVK_UTRACE_PERFETTO_STACK_DEPTH) { + PERFETTO_ELOG("queue %d stage %d too deep", data->subqueue, stage); + return NULL; + } + + ev->stage = stage; + return ev; +} + +static struct panvk_utrace_perfetto_event * +end_event(struct panvk_device *dev, const struct panvk_utrace_flush_data *data, + enum panvk_utrace_perfetto_stage stage) +{ + struct panvk_utrace_perfetto *utp = &dev->utrace.utp; + struct panvk_utrace_perfetto_queue *queue = &utp->queues[data->subqueue]; + + assert(data->subqueue < PANVK_UTRACE_PERFETTO_QUEUE_COUNT); + + if (!queue->stack_depth) + return NULL; + + struct panvk_utrace_perfetto_event *ev = &queue->stack[--queue->stack_depth]; + if (queue->stack_depth >= PANVK_UTRACE_PERFETTO_STACK_DEPTH) + return NULL; + + assert(ev->stage == stage); + return ev; +} + +static void +panvk_utrace_perfetto_begin_event(struct panvk_device *dev, + const struct panvk_utrace_flush_data *data, + enum panvk_utrace_perfetto_stage stage, + uint64_t ts_ns) +{ + struct panvk_utrace_perfetto_event *ev = begin_event(dev, data, stage); + if (!ev) + return; + + ev->begin_ns = ts_ns; +} + +static void +panvk_utrace_perfetto_end_event( + struct panvk_device *dev, const struct panvk_utrace_flush_data *data, + enum panvk_utrace_perfetto_stage stage, uint64_t ts_ns, + std::function + emit_event_extra) +{ + const struct panvk_utrace_perfetto_event *ev = end_event(dev, data, stage); + if (!ev) + return; + + PanVKRenderpassDataSource::Trace( + [=](PanVKRenderpassDataSource::TraceContext ctx) { + struct panvk_utrace_perfetto *utp = &dev->utrace.utp; + + emit_setup_packets(dev, ctx); + + auto packet = ctx.NewTracePacket(); + packet->set_timestamp(ev->begin_ns); + packet->set_timestamp_clock_id(utp->gpu_clock_id); + + auto event = packet->set_gpu_render_stage_event(); + event->set_event_id(utp->event_id++); + event->set_duration(ts_ns - ev->begin_ns); + event->set_hw_queue_iid(utp->queue_iids[data->subqueue]); + event->set_stage_iid(utp->stage_iids[stage]); + event->set_context(utp->device_id); + + emit_event_extra(event); + }); +} + +#define PANVK_UTRACE_PERFETTO_PROCESS_EVENT(tp, stage) \ + void panvk_utrace_perfetto_begin_##tp( \ + struct panvk_device *dev, uint64_t ts_ns, uint16_t tp_idx, \ + const void *flush_data, const struct trace_begin_##tp *payload, \ + const void *indirect_data) \ + { \ + /* we can ignore them or save them if we choose to */ \ + assert(!payload && !indirect_data); \ + panvk_utrace_perfetto_begin_event( \ + dev, (const struct panvk_utrace_flush_data *)flush_data, \ + PANVK_UTRACE_PERFETTO_STAGE_##stage, ts_ns); \ + } \ + \ + void panvk_utrace_perfetto_end_##tp( \ + struct panvk_device *dev, uint64_t ts_ns, uint16_t tp_idx, \ + const void *flush_data, const struct trace_end_##tp *payload, \ + const void *indirect_data) \ + { \ + auto emit_event_extra = \ + [=](perfetto::protos::pbzero::GpuRenderStageEvent *event) { \ + trace_payload_as_extra_end_##tp(event, payload, indirect_data); \ + }; \ + panvk_utrace_perfetto_end_event( \ + dev, (const struct panvk_utrace_flush_data *)flush_data, \ + PANVK_UTRACE_PERFETTO_STAGE_##stage, ts_ns, emit_event_extra); \ + } + +/* u_trace_context_process dispatches trace events to a background thread + * (traceq) for processing. These callbacks are called from traceq. + */ +PANVK_UTRACE_PERFETTO_PROCESS_EVENT(cmdbuf, CMDBUF) + +static uint32_t +get_gpu_clock_id(void) +{ + /* see https://perfetto.dev/docs/concepts/clock-sync */ + return _mesa_hash_string("org.freedesktop.mesa.panfrost") | 0x80000000; +} + +static void +register_data_source(void) +{ + perfetto::DataSourceDescriptor dsd; + dsd.set_name("gpu.renderstages.panfrost"); + PanVKRenderpassDataSource::Register(dsd); +} + +void +panvk_utrace_perfetto_init(struct panvk_device *dev, uint32_t queue_count) +{ + const struct panvk_physical_device *pdev = + to_panvk_physical_device(dev->vk.physical); + const struct pan_kmod_dev_props *props = &pdev->kmod.props; + struct panvk_utrace_perfetto *utp = &dev->utrace.utp; + + if (queue_count > PANVK_UTRACE_PERFETTO_QUEUE_COUNT) { + assert(!"PANVK_UTRACE_PERFETTO_QUEUE_COUNT too small"); + return; + } + + /* check for timestamp support */ + if (!props->gpu_can_query_timestamp || !props->timestamp_frequency || + !get_gpu_time_ns(dev)) { + mesa_logw("disable perfetto because gpu timestamp is missing"); + return; + } + + utp->gpu_clock_id = get_gpu_clock_id(); + utp->device_id = (uintptr_t)dev; + + uint64_t next_iid = 1; + for (uint32_t i = 0; i < ARRAY_SIZE(utp->queue_iids); i++) + utp->queue_iids[i] = next_iid++; + for (uint32_t i = 0; i < ARRAY_SIZE(utp->stage_iids); i++) + utp->stage_iids[i] = next_iid++; + + util_perfetto_init(); + + static once_flag register_ds_once = ONCE_FLAG_INIT; + call_once(®ister_ds_once, register_data_source); +} diff --git a/src/panfrost/vulkan/panvk_utrace_perfetto.h b/src/panfrost/vulkan/panvk_utrace_perfetto.h new file mode 100644 index 00000000000..f9a6222efad --- /dev/null +++ b/src/panfrost/vulkan/panvk_utrace_perfetto.h @@ -0,0 +1,66 @@ +/* + * Copyright 2024 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef PANVK_UTRACE_PERFETTO_H +#define PANVK_UTRACE_PERFETTO_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* must be at least PANVK_SUBQUEUE_COUNT */ +#define PANVK_UTRACE_PERFETTO_QUEUE_COUNT 3 +#define PANVK_UTRACE_PERFETTO_STACK_DEPTH 8 + +struct panvk_device; + +enum panvk_utrace_perfetto_stage { + PANVK_UTRACE_PERFETTO_STAGE_CMDBUF, + PANVK_UTRACE_PERFETTO_STAGE_COUNT, +}; + +struct panvk_utrace_perfetto_event { + enum panvk_utrace_perfetto_stage stage; + uint64_t begin_ns; +}; + +struct panvk_utrace_perfetto_queue { + struct panvk_utrace_perfetto_event stack[PANVK_UTRACE_PERFETTO_STACK_DEPTH]; + uint32_t stack_depth; +}; + +struct panvk_utrace_perfetto { + uint32_t gpu_clock_id; + uint64_t device_id; + + uint64_t queue_iids[PANVK_UTRACE_PERFETTO_QUEUE_COUNT]; + uint64_t stage_iids[PANVK_UTRACE_PERFETTO_STAGE_COUNT]; + + uint64_t next_clock_snapshot; + uint64_t event_id; + + struct panvk_utrace_perfetto_queue queues[PANVK_UTRACE_PERFETTO_QUEUE_COUNT]; +}; + +#ifdef HAVE_PERFETTO + +void panvk_utrace_perfetto_init(struct panvk_device *dev, uint32_t queue_count); + +#else /* HAVE_PERFETTO */ + +static inline void +panvk_utrace_perfetto_init(struct panvk_device *dev, uint32_t queue_count) +{ +} + +#endif /* HAVE_PERFETTO */ + +#ifdef __cplusplus +} +#endif + +#endif /* PANVK_UTRACE_PERFETTO_H */ diff --git a/src/panfrost/vulkan/panvk_vX_device.c b/src/panfrost/vulkan/panvk_vX_device.c index bc9d3f7a5f7..e3c99e25564 100644 --- a/src/panfrost/vulkan/panvk_vX_device.c +++ b/src/panfrost/vulkan/panvk_vX_device.c @@ -24,6 +24,7 @@ #include "panvk_priv_bo.h" #include "panvk_queue.h" #include "panvk_utrace.h" +#include "panvk_utrace_perfetto.h" #include "genxml/decode.h" #include "genxml/gen_macros.h" @@ -357,6 +358,11 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device, } panvk_per_arch(utrace_context_init)(device); +#if PAN_ARCH >= 10 + panvk_utrace_perfetto_init(device, PANVK_SUBQUEUE_COUNT); +#else + panvk_utrace_perfetto_init(device, 2); +#endif *pDevice = panvk_device_to_handle(device); return VK_SUCCESS; diff --git a/src/tool/pps/cfg/system.cfg b/src/tool/pps/cfg/system.cfg index ccc51423f8d..2e6ce480bb2 100644 --- a/src/tool/pps/cfg/system.cfg +++ b/src/tool/pps/cfg/system.cfg @@ -58,6 +58,12 @@ data_sources { } } +data_sources { + config { + name: "gpu.renderstages.panfrost" + } +} + data_sources { config { name: "gpu.counters.panfrost"