From dcc0dc10d70ebf2751a950ccb371c8436da5a8f4 Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Wed, 18 Mar 2026 10:38:38 +0100 Subject: [PATCH 01/38] pan/kmod: Add perf counter api The kernel module is responsible for starting/stopping the counter collection. It decides the layout of the counters in memory. The commit adds an API to reflect this. The counter collection can be started and stopped through the kmod. Counters are dumped into a buffer also provided by the kmod. This is so that later for panthor the buffer can be an mmapped bo. It also allows for having a larger buffer where multiple samples are located internally but pointing data at the most recent one. The memory layout of whatever the data pointer points to can be queried so that the counters can be extracted from it without going through the kmod vtable. --- src/panfrost/lib/kmod/pan_kmod.h | 127 +++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/src/panfrost/lib/kmod/pan_kmod.h b/src/panfrost/lib/kmod/pan_kmod.h index e7356330e7d..8076e78a5bb 100644 --- a/src/panfrost/lib/kmod/pan_kmod.h +++ b/src/panfrost/lib/kmod/pan_kmod.h @@ -1,5 +1,6 @@ /* * Copyright © 2023 Collabora, Ltd. + * Copyright © 2026 Arm Ltd. * SPDX-License-Identifier: MIT */ @@ -37,6 +38,7 @@ #include "util/u_dynarray.h" #include "kmod/panthor_kmod.h" +#include "pan_props.h" #include "pan_trace.h" #if defined(__cplusplus) @@ -384,6 +386,54 @@ struct pan_kmod_va_range { uint64_t size; }; +struct pan_kmod_perf_session { + /* Device this perf session was created from. */ + struct pan_kmod_dev *dev; + + /* Sample data pointer. */ + void* data; + + /* If pan_kmod_perf_session::data_ts is supported. */ + bool data_ts_supported; + + /* The timestamp of the sample data. */ + uint64_t data_ts; +}; + +enum pan_kmod_perf_category { + PAN_KMOD_PERF_CAT_FRONTEND, + PAN_KMOD_PERF_CAT_TILER, + PAN_KMOD_PERF_CAT_MEMSYS, + PAN_KMOD_PERF_CAT_SHADER, + /* Must be last. */ + PAN_KMOD_PERF_CAT_COUNT, +}; + +/* Describes the memory layout of a buffer containing performance counters. + * The buffer is structured like this: + * sample { + * header + * categories [ category { + * blocks [ block { + * header + * samples + * }] + * }] + * } + */ +struct pan_kmod_perf_buffer_layout { + struct { + /* Offset from the start of the buffer in bytes. */ + uint32_t offset; + /* Number of blocks for this category. */ + uint8_t n_blocks; + } category[PAN_KMOD_PERF_CAT_COUNT]; + + uint32_t block_stride; + uint32_t counter_stride; + uint32_t counters_per_category; +}; + /* KMD backend vtable. * * All methods described there are mandatory, unless explicitly flagged as @@ -474,6 +524,25 @@ struct pan_kmod_ops { /* Label the BO */ void (*bo_set_label)(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const char *label); + + /* Initialize a perf session. */ + struct pan_kmod_perf_session *(*perf_create)(struct pan_kmod_dev *dev); + + /* Enable perf counters. */ + int (*perf_enable)(struct pan_kmod_perf_session *session); + + /* Disable perf counters. */ + int (*perf_disable)(struct pan_kmod_perf_session *session); + + /* Dump collected perf counters. */ + int (*perf_dump)(struct pan_kmod_perf_session *session); + + /* Destroy a perf session. */ + void (*perf_destroy)(struct pan_kmod_perf_session *session); + + /* Query the memory layout for a counter buffer. */ + void (*perf_query_layout)(const struct pan_kmod_perf_session *session, + struct pan_kmod_perf_buffer_layout *layout); }; /* KMD information. */ @@ -783,6 +852,64 @@ pan_kmod_query_timestamp(const struct pan_kmod_dev *dev) return dev->ops->query_timestamp(dev); } +static inline struct pan_kmod_perf_session * +pan_kmod_perf_create(struct pan_kmod_dev *dev) +{ + return dev->ops->perf_create(dev); +} + +static inline int +pan_kmod_perf_enable(struct pan_kmod_perf_session *session) +{ + return session->dev->ops->perf_enable(session); +} + +static inline int +pan_kmod_perf_disable(struct pan_kmod_perf_session *session) +{ + return session->dev->ops->perf_disable(session); +} + +static inline int +pan_kmod_perf_dump(struct pan_kmod_perf_session *session) +{ + return session->dev->ops->perf_dump(session); +} + +static inline void +pan_kmod_perf_destroy(struct pan_kmod_perf_session *session) +{ + session->dev->ops->perf_destroy(session); +} + +static inline void +pan_kmod_perf_query_layout(const struct pan_kmod_perf_session *session, + struct pan_kmod_perf_buffer_layout *layout) +{ + session->dev->ops->perf_query_layout(session, layout); +} + +/* Load a counter value from the given address. */ +static inline int64_t +pan_kmod_perf_load_counter(const struct pan_kmod_perf_session *session, + const void *ptr) +{ + if (pan_arch(session->dev->props.gpu_id) < 10) + return *((const uint32_t*)ptr); + else { + const uint64_t val = *((const uint64_t*)ptr); +#ifndef NDEBUG + /* + * Even though the uAPI permits 64-bit unsigned counters, the counter + * values realistically never exceed INT64_MAX. + */ + return (val > INT64_MAX) ? -EINVAL : val; +#else + return val; +#endif + } +} + #if defined(__cplusplus) } // extern "C" #endif From 901dd8dc6ca00acd80911d872bd8808d09bc5e25 Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Wed, 18 Mar 2026 10:39:08 +0100 Subject: [PATCH 02/38] pan/kmod: Implement panfrost kmod perf counter methods This is mostly a copy of the logic from pan_perf.c and hooking it up to the kmod api. --- src/panfrost/lib/kmod/panfrost_kmod.c | 110 ++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/src/panfrost/lib/kmod/panfrost_kmod.c b/src/panfrost/lib/kmod/panfrost_kmod.c index a3d8900c251..e7a0c1c2cdd 100644 --- a/src/panfrost/lib/kmod/panfrost_kmod.c +++ b/src/panfrost/lib/kmod/panfrost_kmod.c @@ -42,6 +42,10 @@ struct panfrost_kmod_bo { uint64_t offset; }; +struct panfrost_kmod_perf_session { + struct pan_kmod_perf_session base; +}; + /* Abstraction over the raw drm_panfrost_get_param ioctl for fetching * information about devices. */ @@ -607,6 +611,106 @@ panfrost_kmod_bo_label(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const c mesa_loge("DRM_IOCTL_PANFROST_SET_LABEL_BO failed (err=%d)", errno); } +static inline struct pan_kmod_perf_session * +panfrost_kmod_perf_init(struct pan_kmod_dev *dev) +{ + UNUSED struct panfrost_kmod_dev *panfrost_dev = + container_of(dev, struct panfrost_kmod_dev, base); + + struct panfrost_kmod_perf_session *sess = + pan_kmod_dev_alloc(dev, sizeof(*sess)); + if (!sess) { + mesa_loge("failed to allocate a panfrost_kmod_perf_session object"); + return NULL; + } + + sess->base.dev = dev; + + struct pan_kmod_perf_buffer_layout layout; + pan_kmod_perf_query_layout(&sess->base, &layout); + + uint32_t n_counters = 0; + for (uint32_t cat = 0; cat < PAN_KMOD_PERF_CAT_COUNT; ++cat) + n_counters += layout.category[cat].n_blocks * layout.counters_per_category; + + uint32_t* counter_values = pan_kmod_dev_alloc(dev, sizeof(uint32_t) * n_counters); + sess->base.data = counter_values; + sess->base.data_ts_supported = false; + + mesa_logd("perf session created"); + + return &(sess->base); +} + +static int +panfrost_kmod_perf_query(struct pan_kmod_perf_session *session, uint32_t enable) +{ + struct drm_panfrost_perfcnt_enable perfcnt_enable = {enable, 0}; + return pan_kmod_ioctl(session->dev->fd, DRM_IOCTL_PANFROST_PERFCNT_ENABLE, + &perfcnt_enable); +} + +static int +panfrost_kmod_perf_enable(struct pan_kmod_perf_session *session) +{ + return panfrost_kmod_perf_query(session, 1 /* enable */); +} + +static int +panfrost_kmod_perf_disable(struct pan_kmod_perf_session *session) +{ + return panfrost_kmod_perf_query(session, 0 /* disable */); +} + +static int +panfrost_kmod_perf_dump(struct pan_kmod_perf_session *session) +{ + struct drm_panfrost_perfcnt_dump perfcnt_dump = { + (uint64_t)(uintptr_t)session->data}; + return pan_kmod_ioctl(session->dev->fd, DRM_IOCTL_PANFROST_PERFCNT_DUMP, + &perfcnt_dump); +} + +static void +panfrost_kmod_perf_query_layout(const struct pan_kmod_perf_session *session, + struct pan_kmod_perf_buffer_layout *layout) +{ + /* Generally counter blocks are laid out in the following order: + * Job manager, tiler, one or more L2 caches, and one or more shader cores. + */ + unsigned l2_slices = pan_query_l2_slices(&session->dev->props); + unsigned core_id_range; + pan_query_core_count(&session->dev->props, &core_id_range); + + /* On all Bifrost architectures this is 64. */ + const unsigned counters_per_cat = 64; + layout->counters_per_category = counters_per_cat; + layout->counter_stride = sizeof(uint32_t); + layout->block_stride = counters_per_cat * sizeof(uint32_t); + + /* Setup the layout */ + layout->category[PAN_KMOD_PERF_CAT_FRONTEND].n_blocks = 1; + layout->category[PAN_KMOD_PERF_CAT_TILER].n_blocks = 1; + layout->category[PAN_KMOD_PERF_CAT_MEMSYS].n_blocks = l2_slices; + layout->category[PAN_KMOD_PERF_CAT_SHADER].n_blocks = core_id_range; + + layout->category[0].offset = 0; + for (unsigned cat_idx = 1; cat_idx < PAN_KMOD_PERF_CAT_COUNT; ++cat_idx) { + layout->category[cat_idx].offset = + layout->category[cat_idx - 1].offset + + layout->category[cat_idx - 1].n_blocks * counters_per_cat; + } +} + +static void +panfrost_kmod_perf_destroy(struct pan_kmod_perf_session *session) +{ + if (session->data) + pan_kmod_dev_free(session->dev, session->data); + pan_kmod_dev_free(session->dev, session); + mesa_logd("perf session destroyed"); +} + const struct pan_kmod_ops panfrost_kmod_ops = { .dev_create = panfrost_kmod_dev_create, .dev_destroy = panfrost_kmod_dev_destroy, @@ -624,4 +728,10 @@ const struct pan_kmod_ops panfrost_kmod_ops = { .vm_bind = panfrost_kmod_vm_bind, .query_timestamp = panfrost_kmod_query_timestamp, .bo_set_label = panfrost_kmod_bo_label, + .perf_create = panfrost_kmod_perf_init, + .perf_enable = panfrost_kmod_perf_enable, + .perf_disable = panfrost_kmod_perf_disable, + .perf_dump = panfrost_kmod_perf_dump, + .perf_query_layout = panfrost_kmod_perf_query_layout, + .perf_destroy = panfrost_kmod_perf_destroy, }; From ab67e972dd0551d072e4b1ba0499fe6150682649 Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Wed, 18 Mar 2026 10:41:03 +0100 Subject: [PATCH 03/38] pan/perf: Use new kmod api Use the new kmod api for everything except getting counter definition configs. --- src/panfrost/ds/pan_pps_perf.cpp | 1 + src/panfrost/perf/pan_perf.c | 80 ++++++++++++++++---------------- src/panfrost/perf/pan_perf.h | 28 +++++------ src/panfrost/perf/quick.c | 4 +- 4 files changed, 54 insertions(+), 59 deletions(-) diff --git a/src/panfrost/ds/pan_pps_perf.cpp b/src/panfrost/ds/pan_pps_perf.cpp index 41e89bd2e27..fce1792f55f 100644 --- a/src/panfrost/ds/pan_pps_perf.cpp +++ b/src/panfrost/ds/pan_pps_perf.cpp @@ -46,6 +46,7 @@ PanfrostPerf::~PanfrostPerf() { if (perf) { pan_perf_disable(perf); + pan_perf_finish(perf); ralloc_free(perf); } } diff --git a/src/panfrost/perf/pan_perf.c b/src/panfrost/perf/pan_perf.c index 604e0f75a26..bd579c39137 100644 --- a/src/panfrost/perf/pan_perf.c +++ b/src/panfrost/perf/pan_perf.c @@ -1,5 +1,6 @@ /* * Copyright © 2021 Collabora, Ltd. + * Copyright © 2026 Arm Ltd. * SPDX-License-Identifier: MIT */ @@ -8,7 +9,6 @@ #include #include "util/macros.h" -#include "util/ralloc.h" #include "pan_perf.h" @@ -17,23 +17,38 @@ #include #include -#define PAN_COUNTERS_PER_CATEGORY 64 #define PAN_SHADER_CORE_INDEX 3 -uint32_t +int64_t +pan_perf_counter_read_raw(const struct pan_perf *perf, uint8_t category_index, + uint8_t block_index, uint32_t counter_index) +{ + assert(perf->session->data != NULL); + + const uint32_t val_offset = + perf->mem_layout.category[category_index].offset + + perf->mem_layout.block_stride * block_index + + perf->mem_layout.counter_stride * counter_index; + + uint8_t *val_ptr = ((uint8_t *)perf->session->data) + val_offset; + return pan_kmod_perf_load_counter(perf->session, val_ptr); +} + +int64_t pan_perf_counter_read(const struct pan_perf_counter *counter, const struct pan_perf *perf) { - unsigned offset = perf->category_offset[counter->category_index]; - offset += counter->offset; - assert(offset < perf->n_counter_values); + int64_t ret = pan_perf_counter_read_raw(perf, counter->category_index, 0, + counter->offset); - uint32_t ret = perf->counter_values[offset]; - - // If counter belongs to shader core, accumulate values for all other cores + /* If counter belongs to shader core, sum values for all cores. */ if (counter->category_index == PAN_SHADER_CORE_INDEX) { - for (uint32_t core = 1; core < perf->core_id_range; ++core) { - ret += perf->counter_values[offset + PAN_COUNTERS_PER_CATEGORY * core]; + uint32_t n_cores = + perf->mem_layout.category[PAN_SHADER_CORE_INDEX].n_blocks; + for (uint32_t core = 1; core < n_cores; ++core) { + ret += pan_perf_counter_read_raw(perf, PAN_SHADER_CORE_INDEX, core, + counter->offset); + assert(ret >= 0 && "counter sum should not overflow"); } } @@ -64,6 +79,9 @@ pan_perf_init(struct pan_perf *perf, int fd) perf->dev = pan_kmod_dev_create(fd, 0, NULL); assert(perf->dev); + perf->session = pan_kmod_perf_create(perf->dev); + assert(perf->session); + struct pan_kmod_dev_props props = perf->dev->props; const struct pan_model *model = @@ -76,49 +94,29 @@ pan_perf_init(struct pan_perf *perf, int fd) if (perf->cfg == NULL) UNREACHABLE("Performance counters missing!"); - // Generally counter blocks are laid out in the following order: - // Job manager, tiler, one or more L2 caches, and one or more shader cores. - unsigned l2_slices = pan_query_l2_slices(&props); - pan_query_core_count(&props, &perf->core_id_range); - - uint32_t n_blocks = 2 + l2_slices + perf->core_id_range; - perf->n_counter_values = PAN_COUNTERS_PER_CATEGORY * n_blocks; - perf->counter_values = ralloc_array(perf, uint32_t, perf->n_counter_values); - - /* Setup the layout */ - perf->category_offset[0] = PAN_COUNTERS_PER_CATEGORY * 0; - perf->category_offset[1] = PAN_COUNTERS_PER_CATEGORY * 1; - perf->category_offset[2] = PAN_COUNTERS_PER_CATEGORY * 2; - perf->category_offset[3] = PAN_COUNTERS_PER_CATEGORY * (2 + l2_slices); -} - -static int -pan_perf_query(struct pan_perf *perf, uint32_t enable) -{ - struct drm_panfrost_perfcnt_enable perfcnt_enable = {enable, 0}; - return pan_kmod_ioctl(perf->dev->fd, DRM_IOCTL_PANFROST_PERFCNT_ENABLE, - &perfcnt_enable); + pan_kmod_perf_query_layout(perf->session, &perf->mem_layout); } int pan_perf_enable(struct pan_perf *perf) { - return pan_perf_query(perf, 1 /* enable */); + return pan_kmod_perf_enable(perf->session); } int pan_perf_disable(struct pan_perf *perf) { - return pan_perf_query(perf, 0 /* disable */); + return pan_kmod_perf_disable(perf->session); +} + +void +pan_perf_finish(struct pan_perf *perf) +{ + pan_kmod_perf_destroy(perf->session); } int pan_perf_dump(struct pan_perf *perf) { - // Dump performance counter values to the memory buffer pointed to by - // counter_values - struct drm_panfrost_perfcnt_dump perfcnt_dump = { - (uint64_t)(uintptr_t)perf->counter_values}; - return pan_kmod_ioctl(perf->dev->fd, DRM_IOCTL_PANFROST_PERFCNT_DUMP, - &perfcnt_dump); + return pan_kmod_perf_dump(perf->session); } diff --git a/src/panfrost/perf/pan_perf.h b/src/panfrost/perf/pan_perf.h index f60012b7d28..c3018af64f1 100644 --- a/src/panfrost/perf/pan_perf.h +++ b/src/panfrost/perf/pan_perf.h @@ -8,6 +8,8 @@ #include +#include + #if defined(__cplusplus) extern "C" { #endif @@ -15,12 +17,6 @@ extern "C" { #define PAN_PERF_MAX_CATEGORIES 4 #define PAN_PERF_MAX_COUNTERS 64 -struct pan_kmod_dev; -struct pan_kmod_dev_props; -struct pan_model; -struct pan_perf_category; -struct pan_perf; - enum pan_perf_counter_units { PAN_PERF_COUNTER_UNITS_CYCLES, PAN_PERF_COUNTER_UNITS_JOBS, @@ -68,19 +64,17 @@ struct pan_perf_config { struct pan_perf { struct pan_kmod_dev *dev; - unsigned core_id_range; + struct pan_kmod_perf_session *session; const struct pan_perf_config *cfg; - - // Memory where to dump counter values - uint32_t *counter_values; - uint32_t n_counter_values; - - /* Offsets of categories */ - unsigned category_offset[PAN_PERF_MAX_CATEGORIES]; + struct pan_kmod_perf_buffer_layout mem_layout; }; -uint32_t pan_perf_counter_read(const struct pan_perf_counter *counter, - const struct pan_perf *perf); +int64_t pan_perf_counter_read_raw(const struct pan_perf *perf, + uint8_t category_index, uint8_t block_index, + uint32_t counter_index); + +int64_t pan_perf_counter_read(const struct pan_perf_counter *counter, + const struct pan_perf *perf); void pan_perf_init(struct pan_perf *perf, int fd); @@ -88,6 +82,8 @@ int pan_perf_enable(struct pan_perf *perf); int pan_perf_disable(struct pan_perf *perf); +void pan_perf_finish(struct pan_perf *perf); + int pan_perf_dump(struct pan_perf *perf); #if defined(__cplusplus) diff --git a/src/panfrost/perf/quick.c b/src/panfrost/perf/quick.c index 1d72ca4928f..afe114fe198 100644 --- a/src/panfrost/perf/quick.c +++ b/src/panfrost/perf/quick.c @@ -45,8 +45,8 @@ main(void) for (unsigned j = 0; j < cat->n_counters; ++j) { const struct pan_perf_counter *ctr = &cat->counters[j]; - uint32_t val = pan_perf_counter_read(ctr, perf); - printf("%s (%s): %u\n", ctr->name, ctr->symbol_name, val); + int64_t val = pan_perf_counter_read(ctr, perf); + printf("%s (%s): %ld\n", ctr->name, ctr->symbol_name, val); } printf("\n"); From d1022176658ed8f72d1867c9d0cb19a582fe25c8 Mon Sep 17 00:00:00 2001 From: Lukas Zapolskas Date: Wed, 19 Mar 2025 14:52:36 +0000 Subject: [PATCH 04/38] panfrost: Add a new interrupt unit Starting from the Mali Gx10 series, some hardware counters may indicate the number of interrupts occurring during the sampling period. Signed-off-by: Lukas Zapolskas --- src/panfrost/perf/pan_perf.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/panfrost/perf/pan_perf.h b/src/panfrost/perf/pan_perf.h index c3018af64f1..99f277e3a0e 100644 --- a/src/panfrost/perf/pan_perf.h +++ b/src/panfrost/perf/pan_perf.h @@ -33,6 +33,7 @@ enum pan_perf_counter_units { PAN_PERF_COUNTER_UNITS_BYTES, PAN_PERF_COUNTER_UNITS_PIXELS, PAN_PERF_COUNTER_UNITS_ISSUES, + PAN_PERF_COUNTER_UNITS_INTERRUPTS, }; struct pan_perf_counter { From f34b53396a98980ff08cb1d44272bef3be560af0 Mon Sep 17 00:00:00 2001 From: Lukas Zapolskas Date: Wed, 19 Mar 2025 14:50:13 +0000 Subject: [PATCH 05/38] panfrost: Update copyright year for new counter definitions The source files generated from counter XML files should now contain a copyright corresponding to the year of generation. Signed-off-by: Lukas Zapolskas --- src/panfrost/perf/pan_gen_perf.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/panfrost/perf/pan_gen_perf.py b/src/panfrost/perf/pan_gen_perf.py index f21a2589284..e65e20601ed 100644 --- a/src/panfrost/perf/pan_gen_perf.py +++ b/src/panfrost/perf/pan_gen_perf.py @@ -4,6 +4,7 @@ import argparse import textwrap import os +import datetime import xml.etree.ElementTree as et @@ -87,12 +88,12 @@ def main(): copyright = textwrap.dedent("""\ /* Autogenerated file, DO NOT EDIT manually! generated by {} * - * Copyright © 2021 Arm Limited - * Copyright © 2021 Collabora Ltd. + * Copyright © {year} Arm Limited + * Copyright © {year} Collabora Ltd. * SPDX-License-Identifier: MIT */ - """).format(os.path.basename(__file__)) + """).format(os.path.basename(__file__), year=datetime.datetime.now().year) h.write(copyright) h.write(textwrap.dedent("""\ From 0c0b2a4d38a2fb09f468bc8bfec67257237ec0f8 Mon Sep 17 00:00:00 2001 From: Lukas Zapolskas Date: Wed, 19 Mar 2025 16:09:30 +0000 Subject: [PATCH 06/38] panfrost: Fix typo in pan_gen_perf.py Signed-off-by: Lukas Zapolskas --- src/panfrost/perf/pan_gen_perf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/panfrost/perf/pan_gen_perf.py b/src/panfrost/perf/pan_gen_perf.py index e65e20601ed..31d431d5cf7 100644 --- a/src/panfrost/perf/pan_gen_perf.py +++ b/src/panfrost/perf/pan_gen_perf.py @@ -41,7 +41,7 @@ class Counter: class Category: - # product Product owning the gategory + # product Product owning the category # xml XML representation of itself def __init__(self, product, xml): self.product = product From 174999920003d4d28124dd0b437044b4e4b375c1 Mon Sep 17 00:00:00 2001 From: Lukas Zapolskas Date: Wed, 19 Mar 2025 15:04:24 +0000 Subject: [PATCH 07/38] panfrost: Create an enumeration for the counter categories The Mali-Gx10 series (G710, G610 and G510) introduce one new category of counters which needs to be accounted for in the setup code. Adding this into an enum ensures relevant structs are updated automatically. v2: - Modified generator script to use the enum Signed-off-by: Lukas Zapolskas --- src/panfrost/perf/pan_gen_perf.py | 2 +- src/panfrost/perf/pan_perf.c | 20 +++++++++----------- src/panfrost/perf/pan_perf.h | 16 ++++++++++++---- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/src/panfrost/perf/pan_gen_perf.py b/src/panfrost/perf/pan_gen_perf.py index 31d431d5cf7..91bdd0bf8be 100644 --- a/src/panfrost/perf/pan_gen_perf.py +++ b/src/panfrost/perf/pan_gen_perf.py @@ -120,7 +120,7 @@ def main(): c.indent(tab_size) n_categories = len(prod.categories) - c.write("STATIC_ASSERT(%u <= PAN_PERF_MAX_CATEGORIES);" % n_categories) + c.write("STATIC_ASSERT(%u <= PAN_PERF_COUNTER_CAT_MAX);" % n_categories) n_counters = 0 for category in prod.categories: category_counters_count = len(category.counters) diff --git a/src/panfrost/perf/pan_perf.c b/src/panfrost/perf/pan_perf.c index bd579c39137..1dc940b90b0 100644 --- a/src/panfrost/perf/pan_perf.c +++ b/src/panfrost/perf/pan_perf.c @@ -17,18 +17,16 @@ #include #include -#define PAN_SHADER_CORE_INDEX 3 - int64_t -pan_perf_counter_read_raw(const struct pan_perf *perf, uint8_t category_index, +pan_perf_counter_read_raw(const struct pan_perf *perf, + enum pan_perf_counter_categories category, uint8_t block_index, uint32_t counter_index) { assert(perf->session->data != NULL); - const uint32_t val_offset = - perf->mem_layout.category[category_index].offset + - perf->mem_layout.block_stride * block_index + - perf->mem_layout.counter_stride * counter_index; + const uint32_t val_offset = perf->mem_layout.category[category].offset + + perf->mem_layout.block_stride * block_index + + perf->mem_layout.counter_stride * counter_index; uint8_t *val_ptr = ((uint8_t *)perf->session->data) + val_offset; return pan_kmod_perf_load_counter(perf->session, val_ptr); @@ -42,12 +40,12 @@ pan_perf_counter_read(const struct pan_perf_counter *counter, counter->offset); /* If counter belongs to shader core, sum values for all cores. */ - if (counter->category_index == PAN_SHADER_CORE_INDEX) { + if (counter->category_index == PAN_PERF_COUNTER_CAT_SHADER) { uint32_t n_cores = - perf->mem_layout.category[PAN_SHADER_CORE_INDEX].n_blocks; + perf->mem_layout.category[PAN_PERF_COUNTER_CAT_SHADER].n_blocks; for (uint32_t core = 1; core < n_cores; ++core) { - ret += pan_perf_counter_read_raw(perf, PAN_SHADER_CORE_INDEX, core, - counter->offset); + ret += pan_perf_counter_read_raw(perf, PAN_PERF_COUNTER_CAT_SHADER, + core, counter->offset); assert(ret >= 0 && "counter sum should not overflow"); } } diff --git a/src/panfrost/perf/pan_perf.h b/src/panfrost/perf/pan_perf.h index 99f277e3a0e..bf12478f433 100644 --- a/src/panfrost/perf/pan_perf.h +++ b/src/panfrost/perf/pan_perf.h @@ -14,9 +14,17 @@ extern "C" { #endif -#define PAN_PERF_MAX_CATEGORIES 4 #define PAN_PERF_MAX_COUNTERS 64 +enum pan_perf_counter_categories { + PAN_PERF_COUNTER_CAT_FRONTEND, + PAN_PERF_COUNTER_CAT_TILER, + PAN_PERF_COUNTER_CAT_MEMSYS, + PAN_PERF_COUNTER_CAT_SHADER, + /* Must be last. */ + PAN_PERF_COUNTER_CAT_MAX, +}; + enum pan_perf_counter_units { PAN_PERF_COUNTER_UNITS_CYCLES, PAN_PERF_COUNTER_UNITS_JOBS, @@ -59,7 +67,7 @@ struct pan_perf_category { struct pan_perf_config { const char *name; - struct pan_perf_category categories[PAN_PERF_MAX_CATEGORIES]; + struct pan_perf_category categories[PAN_PERF_COUNTER_CAT_MAX]; uint32_t n_categories; }; @@ -71,8 +79,8 @@ struct pan_perf { }; int64_t pan_perf_counter_read_raw(const struct pan_perf *perf, - uint8_t category_index, uint8_t block_index, - uint32_t counter_index); + enum pan_perf_counter_categories category, + uint8_t block_index, uint32_t counter_index); int64_t pan_perf_counter_read(const struct pan_perf_counter *counter, const struct pan_perf *perf); From 23d265928ecb2b8390f34321f3742d00924261e2 Mon Sep 17 00:00:00 2001 From: Lukas Zapolskas Date: Wed, 19 Mar 2025 15:21:07 +0000 Subject: [PATCH 08/38] panfrost: Use the enum values for counter description generation Using the enum definitions prevents the category indices to get out of sync from the block types specified in the XML. Signed-off-by: Lukas Zapolskas --- src/panfrost/perf/pan_gen_perf.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/panfrost/perf/pan_gen_perf.py b/src/panfrost/perf/pan_gen_perf.py index 91bdd0bf8be..c81d0f03182 100644 --- a/src/panfrost/perf/pan_gen_perf.py +++ b/src/panfrost/perf/pan_gen_perf.py @@ -26,6 +26,14 @@ class SourceFile: def outdent(self, n): self._indent -= n +CATEGORY_IDX_REMAP = { + "Job Manager": "PAN_PERF_COUNTER_CAT_FRONTEND", + "CSF": "PAN_PERF_COUNTER_CAT_FRONTEND", + "Tiler": "PAN_PERF_COUNTER_CAT_TILER", + "Memory System" : "PAN_PERF_COUNTER_CAT_MEMSYS", + "L2 Cache": "PAN_PERF_COUNTER_CAT_MEMSYS", + "Shader Core": "PAN_PERF_COUNTER_CAT_SHADER", +} class Counter: # category Category owning the counter @@ -165,7 +173,7 @@ def main(): c.write(".symbol_name = \"%s\"," % (counter.underscore_name)) c.write(".units = PAN_PERF_COUNTER_UNITS_%s," % (counter.units.upper())) c.write(".offset = %u," % (counter.offset)) - c.write(".category_index = %u," % i) + c.write(".category_index = %s," % CATEGORY_IDX_REMAP[category.name]) c.outdent(tab_size) c.write("}, // counter") From 7618984cf5b683d25614ae12d03f71e899abf3b3 Mon Sep 17 00:00:00 2001 From: Lukas Zapolskas Date: Fri, 13 Dec 2024 17:09:41 +0000 Subject: [PATCH 09/38] panfrost/perf: Add Gx10 perfcounters Add manually created Mali-Gx10 counter definitions. v2: - Added the architecture major field. v3: - Swap the order of the shader core and memsys blocks. v4: - G710 -> Gx10, to indicate that all GPUs in this generation are supported Signed-off-by: Lukas Zapolskas Co-developed-by: Lars-Ivar Hesselberg Simonsen --- src/panfrost/perf/Gx10.xml | 175 ++++++++++++++++++++++++++++++++++ src/panfrost/perf/meson.build | 2 +- 2 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 src/panfrost/perf/Gx10.xml diff --git a/src/panfrost/perf/Gx10.xml b/src/panfrost/perf/Gx10.xml new file mode 100644 index 00000000000..ddf009b8f81 --- /dev/null +++ b/src/panfrost/perf/Gx10.xml @@ -0,0 +1,175 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/meson.build b/src/panfrost/perf/meson.build index 2f8257f1875..acfa0040135 100644 --- a/src/panfrost/perf/meson.build +++ b/src/panfrost/perf/meson.build @@ -4,7 +4,7 @@ pan_hw_metrics = [ 'G31', 'G51', 'G52', 'G57', 'G68', 'G71', 'G72', 'G76', 'G77', - 'G78', 'T72x', 'T76x', 'T82x', 'T83x', 'T86x', 'T88x', + 'Gx10', 'G78', 'T72x', 'T76x', 'T82x', 'T83x', 'T86x', 'T88x', ] pan_hw_metrics_xml_files = [] From 825ac92605ab5d918284230fa06978a125502834 Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Fri, 17 Apr 2026 10:30:47 +0200 Subject: [PATCH 10/38] pan/model: Update G610 perf counters --- src/panfrost/model/pan_model.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/panfrost/model/pan_model.c b/src/panfrost/model/pan_model.c index f9861ace8dc..890e023c6e1 100644 --- a/src/panfrost/model/pan_model.c +++ b/src/panfrost/model/pan_model.c @@ -78,7 +78,7 @@ const struct pan_model pan_model_list[] = { MODEL_RATES(2, 4, 32)), VALHALL_MODEL(PAN_PROD_ID(9, 0, 3), 0, "G57", "TNAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), MODEL_RATES(2, 4, 32)), - VALHALL_MODEL(PAN_PROD_ID(10, 8, 7), 0, "G610", "TVIx", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384), + VALHALL_MODEL(PAN_PROD_ID(10, 8, 7), 0, "G610", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384), MODEL_RATES(4, 8, 64)), VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 0, "G310v1", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), MODEL_RATES(2, 2, 16)), From 7e7d9cddc762ae9ffecaa582d495a80b9f2029f8 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 17 Dec 2025 13:54:47 +0100 Subject: [PATCH 11/38] panfrost/perf: Add Gx25 perfcounters --- src/panfrost/perf/Gx25.xml | 316 ++++++++++++++++++++++++++++++++++ src/panfrost/perf/meson.build | 2 +- src/panfrost/perf/pan_perf.h | 2 +- 3 files changed, 318 insertions(+), 2 deletions(-) create mode 100644 src/panfrost/perf/Gx25.xml diff --git a/src/panfrost/perf/Gx25.xml b/src/panfrost/perf/Gx25.xml new file mode 100644 index 00000000000..0120d16838f --- /dev/null +++ b/src/panfrost/perf/Gx25.xml @@ -0,0 +1,316 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/meson.build b/src/panfrost/perf/meson.build index acfa0040135..f8418af6e90 100644 --- a/src/panfrost/perf/meson.build +++ b/src/panfrost/perf/meson.build @@ -4,7 +4,7 @@ pan_hw_metrics = [ 'G31', 'G51', 'G52', 'G57', 'G68', 'G71', 'G72', 'G76', 'G77', - 'Gx10', 'G78', 'T72x', 'T76x', 'T82x', 'T83x', 'T86x', 'T88x', + 'Gx10', 'Gx25', 'G78', 'T72x', 'T76x', 'T82x', 'T83x', 'T86x', 'T88x', ] pan_hw_metrics_xml_files = [] diff --git a/src/panfrost/perf/pan_perf.h b/src/panfrost/perf/pan_perf.h index bf12478f433..7cbc4a9a20c 100644 --- a/src/panfrost/perf/pan_perf.h +++ b/src/panfrost/perf/pan_perf.h @@ -14,7 +14,7 @@ extern "C" { #endif -#define PAN_PERF_MAX_COUNTERS 64 +#define PAN_PERF_MAX_COUNTERS 128 enum pan_perf_counter_categories { PAN_PERF_COUNTER_CAT_FRONTEND, From 226669227ebf4fdd6276d880de36f306a1ea3a24 Mon Sep 17 00:00:00 2001 From: Lukas Zapolskas Date: Mon, 24 Mar 2025 19:57:27 +0000 Subject: [PATCH 12/38] panfrost: Move PanfrostDevice to a separate file Put PanfrostDevice into it's own file to keep pan_pps_perf.cpp focused on the panfrost specific producer implementation. Signed-off-by: Lukas Zapolskas --- src/panfrost/ds/meson.build | 1 + src/panfrost/ds/pan_pps_dev.cpp | 36 ++++++++++++++++++++++++++++++++ src/panfrost/ds/pan_pps_perf.cpp | 20 ------------------ 3 files changed, 37 insertions(+), 20 deletions(-) create mode 100644 src/panfrost/ds/pan_pps_dev.cpp diff --git a/src/panfrost/ds/meson.build b/src/panfrost/ds/meson.build index b61b02ee248..8f8f5ccae5d 100644 --- a/src/panfrost/ds/meson.build +++ b/src/panfrost/ds/meson.build @@ -5,6 +5,7 @@ pps_panfrost_sources = [ 'pan_pps_perf.cpp', + 'pan_pps_dev.cpp', 'pan_pps_driver.cpp' ] diff --git a/src/panfrost/ds/pan_pps_dev.cpp b/src/panfrost/ds/pan_pps_dev.cpp new file mode 100644 index 00000000000..065a9c46b7d --- /dev/null +++ b/src/panfrost/ds/pan_pps_dev.cpp @@ -0,0 +1,36 @@ +/* + * Copyright © 2021 Collabora, Ltd. + * SPDX-License-Identifier: MIT + */ + +#include "pan_pps_perf.h" + +#include +#include + +#include +#include + +namespace pps { +PanfrostDevice::PanfrostDevice(int fd): fd(fd) +{ + assert(fd >= 0); +} + +PanfrostDevice::~PanfrostDevice() +{ +} + +PanfrostDevice::PanfrostDevice(PanfrostDevice &&o): fd{o.fd} +{ + o.fd = -1; +} + +PanfrostDevice & +PanfrostDevice::operator=(PanfrostDevice &&o) +{ + std::swap(fd, o.fd); + return *this; +} + +} // namespace pps diff --git a/src/panfrost/ds/pan_pps_perf.cpp b/src/panfrost/ds/pan_pps_perf.cpp index fce1792f55f..f801779c18e 100644 --- a/src/panfrost/ds/pan_pps_perf.cpp +++ b/src/panfrost/ds/pan_pps_perf.cpp @@ -12,26 +12,6 @@ #include namespace pps { -PanfrostDevice::PanfrostDevice(int fd): fd(fd) -{ - assert(fd >= 0); -} - -PanfrostDevice::~PanfrostDevice() -{ -} - -PanfrostDevice::PanfrostDevice(PanfrostDevice &&o): fd{o.fd} -{ - o.fd = -1; -} - -PanfrostDevice & -PanfrostDevice::operator=(PanfrostDevice &&o) -{ - std::swap(fd, o.fd); - return *this; -} PanfrostPerf::PanfrostPerf(const PanfrostDevice &dev) : perf{reinterpret_cast( From 5c32d45ede2efab8c9a24e5a4aeb57a86373e9c5 Mon Sep 17 00:00:00 2001 From: Lukas Zapolskas Date: Wed, 19 Mar 2025 16:28:11 +0000 Subject: [PATCH 13/38] pps: Add the Primitive, Instruction, Pixel and Fragment unit types The Perfetto spec supports several units that are supported directly by Mali performance counters, which are not being expressed in the data source. Signed-off-by: Lukas Zapolskas --- src/tool/pps/pps_counter.h | 4 ++++ src/tool/pps/pps_datasource.cc | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/src/tool/pps/pps_counter.h b/src/tool/pps/pps_counter.h index 7b032be81cb..c8f57b9d4b9 100644 --- a/src/tool/pps/pps_counter.h +++ b/src/tool/pps/pps_counter.h @@ -40,6 +40,10 @@ class Counter Byte, Hertz, None, + Primitive, + Instruction, + Pixel, + Fragment }; using Value = std::variant; diff --git a/src/tool/pps/pps_datasource.cc b/src/tool/pps/pps_datasource.cc index 32223ad03df..3cc494b3409 100644 --- a/src/tool/pps/pps_datasource.cc +++ b/src/tool/pps/pps_datasource.cc @@ -183,6 +183,18 @@ template void add_descriptors(GpuCounterDescript case Counter::Units::None: units = GpuCounterDescriptor::NONE; break; + case Counter::Units::Primitive: + units = GpuCounterDescriptor::PRIMITIVE; + break; + case Counter::Units::Instruction: + units = GpuCounterDescriptor::INSTRUCTION; + break; + case Counter::Units::Pixel: + units = GpuCounterDescriptor::PIXEL; + break; + case Counter::Units::Fragment: + units = GpuCounterDescriptor::FRAGMENT; + break; default: assert(false && "Missing counter units type!"); break; From 34131f71de5cbf462195f15176ef08fc681f4610 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 14 Jan 2026 11:18:56 +0100 Subject: [PATCH 14/38] pan/perf: Don't count on category definition order in the XML --- src/panfrost/perf/pan_gen_perf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/panfrost/perf/pan_gen_perf.py b/src/panfrost/perf/pan_gen_perf.py index c81d0f03182..43f058aa862 100644 --- a/src/panfrost/perf/pan_gen_perf.py +++ b/src/panfrost/perf/pan_gen_perf.py @@ -154,7 +154,7 @@ def main(): for i in range(0, len(prod.categories)): category = prod.categories[i] - c.write("{") + c.write("[%s] = {" % CATEGORY_IDX_REMAP[category.name]) c.indent(tab_size) c.write(".name = \"%s\"," % (category.name)) c.write(".n_counters = %u," % (len(category.counters))) From e05a94a1de403b2cedb81793033fab1be775be86 Mon Sep 17 00:00:00 2001 From: Lukas Zapolskas Date: Thu, 12 Dec 2024 15:38:08 +0000 Subject: [PATCH 15/38] drm-uapi: Add panthor performance counter uAPI Add the panthor performance counter uAPI, added in v5 of the patch series "Add performance counters with manual sampling mode", based on the drm-misc-next kernel, base commit 96c85e428ebaeacd2c640eba075479ab92072ccd v2: - the series is now based on the v5 of the kernel patch --- include/drm-uapi/panthor_drm.h | 568 +++++++++++++++++++++++++++++++++ 1 file changed, 568 insertions(+) diff --git a/include/drm-uapi/panthor_drm.h b/include/drm-uapi/panthor_drm.h index e238c6264fa..174f39dc2da 100644 --- a/include/drm-uapi/panthor_drm.h +++ b/include/drm-uapi/panthor_drm.h @@ -154,6 +154,9 @@ enum drm_panthor_ioctl_id { * This is useful for imported BOs. */ DRM_PANTHOR_BO_QUERY_INFO, + + /** @DRM_PANTHOR_PERF_CONTROL: Control a performance counter session. */ + DRM_PANTHOR_PERF_CONTROL, }; /** @@ -253,6 +256,9 @@ enum drm_panthor_dev_query_type { * @DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO: Query allowed group priorities information. */ DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO, + + /** @DRM_PANTHOR_DEV_QUERY_PERF_INFO: Query performance counter interface information. */ + DRM_PANTHOR_DEV_QUERY_PERF_INFO, }; /** @@ -445,6 +451,138 @@ struct drm_panthor_group_priorities_info { __u8 pad[3]; }; +/** + * enum drm_panthor_perf_feat_flags - Performance counter configuration feature flags. + */ +enum drm_panthor_perf_feat_flags { + /** @DRM_PANTHOR_PERF_BLOCK_STATES_SUPPORT: Coarse-grained block states are supported. */ + DRM_PANTHOR_PERF_BLOCK_STATES_SUPPORT = 1 << 0, +}; + +/** + * enum drm_panthor_perf_block_type - Performance counter supported block types. + */ +enum drm_panthor_perf_block_type { + /** @DRM_PANTHOR_PERF_BLOCK_METADATA: Internal use only. */ + DRM_PANTHOR_PERF_BLOCK_METADATA = 0, + + /** @DRM_PANTHOR_PERF_BLOCK_FW: The FW counter block. */ + DRM_PANTHOR_PERF_BLOCK_FW, + + /** @DRM_PANTHOR_PERF_BLOCK_CSHW: The CSHW counter block. */ + DRM_PANTHOR_PERF_BLOCK_CSHW, + + /** @DRM_PANTHOR_PERF_BLOCK_TILER: The tiler counter block. */ + DRM_PANTHOR_PERF_BLOCK_TILER, + + /** @DRM_PANTHOR_PERF_BLOCK_MEMSYS: A memsys counter block. */ + DRM_PANTHOR_PERF_BLOCK_MEMSYS, + + /** @DRM_PANTHOR_PERF_BLOCK_SHADER: A shader core counter block. */ + DRM_PANTHOR_PERF_BLOCK_SHADER, + + /** @DRM_PANTHOR_PERF_BLOCK_FIRST: Internal use only. */ + DRM_PANTHOR_PERF_BLOCK_FIRST = DRM_PANTHOR_PERF_BLOCK_FW, + + /** @DRM_PANTHOR_PERF_BLOCK_LAST: Internal use only. */ + DRM_PANTHOR_PERF_BLOCK_LAST = DRM_PANTHOR_PERF_BLOCK_SHADER, + + /** @DRM_PANTHOR_PERF_BLOCK_MAX: Internal use only. */ + DRM_PANTHOR_PERF_BLOCK_MAX = DRM_PANTHOR_PERF_BLOCK_LAST + 1, +}; + +/** + * enum drm_panthor_perf_clock - Identifier of the clock used to produce the cycle count values + * in a given block. + * + * Since the integrator has the choice of using one or more clocks, there may be some confusion + * as to which blocks are counted by which clock values unless this information is explicitly + * provided as part of every block sample. Not every single clock here can be used: in the simplest + * case, all cycle counts will be associated with the top-level clock. + */ +enum drm_panthor_perf_clock { + /** @DRM_PANTHOR_PERF_CLOCK_TOPLEVEL: Top-level CSF clock. */ + DRM_PANTHOR_PERF_CLOCK_TOPLEVEL, + + /** + * @DRM_PANTHOR_PERF_CLOCK_COREGROUP: Core group clock, responsible for the MMU, L2 + * caches and the tiler. + */ + DRM_PANTHOR_PERF_CLOCK_COREGROUP, + + /** @DRM_PANTHOR_PERF_CLOCK_SHADER: Clock for the shader cores. */ + DRM_PANTHOR_PERF_CLOCK_SHADER, +}; + +/** + * struct drm_panthor_perf_info - Performance counter interface information + * + * Structure grouping all queryable information relating to the performance counter + * interfaces. + */ +struct drm_panthor_perf_info { + /** + * @counters_per_block: The number of 8-byte counters available in a block. + */ + __u32 counters_per_block; + + /** + * @sample_header_size: The size of the header struct available at the beginning + * of every sample. + */ + __u32 sample_header_size; + + /** + * @block_header_size: The size of the header struct inline with the counters for a + * single block. + */ + __u32 block_header_size; + + /** + * @sample_size: The size of a fully annotated sample, starting with a sample header + * of size @sample_header_size bytes, and all available blocks for the current + * configuration, each comprised of @counters_per_block 64-bit counters and + * a block header of @block_header_size bytes. + * + * The user must use this field to allocate size for the ring buffer. In + * the case of new blocks being added, an old userspace can always use + * this field and ignore any blocks it does not know about. + */ + __u32 sample_size; + + /** @flags: Combination of drm_panthor_perf_feat_flags flags. */ + __u32 flags; + + /** + * @supported_clocks: Bitmask of the clocks supported by the GPU. + * + * Each bit represents a variant of the enum drm_panthor_perf_clock. + * + * For the same GPU, different implementers may have different clocks for the same hardware + * block. At the moment, up to three clocks are supported, and any clocks that are present + * will be reported here. + */ + __u32 supported_clocks; + + /** @fw_blocks: Number of FW blocks available. */ + __u32 fw_blocks; + + /** @cshw_blocks: Number of CSHW blocks available. */ + __u32 cshw_blocks; + + /** @tiler_blocks: Number of tiler blocks available. */ + __u32 tiler_blocks; + + /** @memsys_blocks: Number of memsys blocks available. */ + __u32 memsys_blocks; + + /** @shader_blocks: Number of shader core blocks available. */ + __u32 shader_blocks; + + /** @pad: MBZ. */ + __u32 pad; +}; + /** * struct drm_panthor_dev_query - Arguments passed to DRM_PANTHOR_IOCTL_DEV_QUERY */ @@ -1187,6 +1325,434 @@ struct drm_panthor_bo_query_info { __u32 pad; }; +/** + * DOC: Performance counter decoding in userspace. + * + * Each sample will be exposed to userspace in the following manner: + * + * +--------+--------+------------------------+--------+-------------------------+-----+ + * | Sample | Block | Block | Block | Block | ... | + * | header | header | counters | header | counters | | + * +--------+--------+------------------------+--------+-------------------------+-----+ + * + * Each sample will start with a sample header of type @struct drm_panthor_perf_sample header, + * providing sample-wide information like the start and end timestamps, the counter set currently + * configured, and any errors that may have occurred during sampling. + * + * After the fixed size header, the sample will consist of blocks of + * 64-bit @drm_panthor_dev_query_perf_info::counters_per_block counters, each prefaced with a + * header of its own, indicating source block type, as well as the cycle count needed to normalize + * cycle values within that block, and a clock source identifier. + */ + +/** + * enum drm_panthor_perf_block_state - Bitmask of the power and execution states that an individual + * hardware block went through in a sampling period. + * + * Because the sampling period is controlled from userspace, the block may undergo multiple + * state transitions, so this must be interpreted as one or more such transitions occurring. + */ +enum drm_panthor_perf_block_state { + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_UNKNOWN: The state of this block was unknown during + * the sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_UNKNOWN = 0, + + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_ON: This block was powered on for some or all of + * the sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_ON = 1 << 0, + + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_OFF: This block was powered off for some or all of the + * sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_OFF = 1 << 1, + + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_AVAILABLE: This block was available for execution for + * some or all of the sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_AVAILABLE = 1 << 2, + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_UNAVAILABLE: This block was unavailable for execution for + * some or all of the sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_UNAVAILABLE = 1 << 3, + + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_NORMAL: This block was executing in normal mode + * for some or all of the sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_NORMAL = 1 << 4, + + /** + * @DRM_PANTHOR_PERF_BLOCK_STATE_PROTECTED: This block was executing in protected mode + * for some or all of the sampling period. + */ + DRM_PANTHOR_PERF_BLOCK_STATE_PROTECTED = 1 << 5, +}; + +/** + * struct drm_panthor_perf_block_header - Header present before every block in the + * sample ringbuffer. + */ +struct drm_panthor_perf_block_header { + /** @block_type: Type of the block. */ + __u8 block_type; + + /** @block_idx: Block index. */ + __u8 block_idx; + + /** + * @block_states: Coarse-grained block transitions, bitmask of enum + * drm_panthor_perf_block_states. + */ + __u8 block_states; + + /** + * @clock: Clock used to produce the cycle count for this block, taken from + * enum drm_panthor_perf_clock. The cycle counts are stored in the sample header. + */ + __u8 clock; + + /** @pad: MBZ. */ + __u8 pad[4]; + + /** @enable_mask: Bitmask of counters requested during the session setup. */ + __u64 enable_mask[2]; +}; + +/** + * enum drm_panthor_perf_sample_flags - Sample-wide events that occurred over the sampling + * period. + */ +enum drm_panthor_perf_sample_flags { + /** + * @DRM_PANTHOR_PERF_SAMPLE_OVERFLOW: This sample contains overflows due to the duration + * of the sampling period. + */ + DRM_PANTHOR_PERF_SAMPLE_OVERFLOW = 1 << 0, + + /** + * @DRM_PANTHOR_PERF_SAMPLE_ERROR: This sample encountered an error condition during + * the sample duration. + */ + DRM_PANTHOR_PERF_SAMPLE_ERROR = 1 << 1, +}; + +/** + * struct drm_panthor_perf_sample_header - Header present before every sample. + */ +struct drm_panthor_perf_sample_header { + /** + * @timestamp_start_ns: Earliest timestamp that values in this sample represent, in + * nanoseconds. Derived from CLOCK_MONOTONIC_RAW. + */ + __u64 timestamp_start_ns; + + /** + * @timestamp_end_ns: Latest timestamp that values in this sample represent, in + * nanoseconds. Derived from CLOCK_MONOTONIC_RAW. + */ + __u64 timestamp_end_ns; + + /** @block_set: Set of performance counter blocks. */ + __u8 block_set; + + /** @pad: MBZ. */ + __u8 pad[3]; + + /** @flags: Current sample flags, combination of drm_panthor_perf_sample_flags. */ + __u32 flags; + + /** + * @user_data: User data provided as part of the command that triggered this sample. + * + * - Automatic samples (periodic ones or those around non-counting periods or power state + * transitions) will be tagged with the user_data provided as part of the + * DRM_PANTHOR_PERF_COMMAND_START call. + * - Manual samples will be tagged with the user_data provided with the + * DRM_PANTHOR_PERF_COMMAND_SAMPLE call. + * - A session's final automatic sample will be tagged with the user_data provided with the + * DRM_PANTHOR_PERF_COMMAND_STOP call. + */ + __u64 user_data; + + /** + * @toplevel_clock_cycles: The number of cycles elapsed between + * drm_panthor_perf_sample_header::timestamp_start_ns and + * drm_panthor_perf_sample_header::timestamp_end_ns on the top-level clock if the + * corresponding bit is set in drm_panthor_perf_info::supported_clocks. + */ + __u64 toplevel_clock_cycles; + + /** + * @coregroup_clock_cycles: The number of cycles elapsed between + * drm_panthor_perf_sample_header::timestamp_start_ns and + * drm_panthor_perf_sample_header::timestamp_end_ns on the coregroup clock if the + * corresponding bit is set in drm_panthor_perf_info::supported_clocks. + */ + __u64 coregroup_clock_cycles; + + /** + * @shader_clock_cycles: The number of cycles elapsed between + * drm_panthor_perf_sample_header::timestamp_start_ns and + * drm_panthor_perf_sample_header::timestamp_end_ns on the shader core clock if the + * corresponding bit is set in drm_panthor_perf_info::supported_clocks. + */ + __u64 shader_clock_cycles; +}; + +/** + * enum drm_panthor_perf_command - Command type passed to the DRM_PANTHOR_PERF_CONTROL + * IOCTL. + */ +enum drm_panthor_perf_command { + /** @DRM_PANTHOR_PERF_COMMAND_SETUP: Create a new performance counter sampling context. */ + DRM_PANTHOR_PERF_COMMAND_SETUP, + + /** @DRM_PANTHOR_PERF_COMMAND_TEARDOWN: Teardown a performance counter sampling context. */ + DRM_PANTHOR_PERF_COMMAND_TEARDOWN, + + /** @DRM_PANTHOR_PERF_COMMAND_START: Start a sampling session on the indicated context. */ + DRM_PANTHOR_PERF_COMMAND_START, + + /** @DRM_PANTHOR_PERF_COMMAND_STOP: Stop the sampling session on the indicated context. */ + DRM_PANTHOR_PERF_COMMAND_STOP, + + /** + * @DRM_PANTHOR_PERF_COMMAND_SAMPLE: Request a manual sample on the indicated context. + * + * When the sampling session is configured with a non-zero sampling frequency, any + * DRM_PANTHOR_PERF_CONTROL calls with this command will be ignored and return an + * -EINVAL. + */ + DRM_PANTHOR_PERF_COMMAND_SAMPLE, +}; + +/** + * struct drm_panthor_perf_control - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL. + */ +struct drm_panthor_perf_control { + /** @cmd: Command from enum drm_panthor_perf_command. */ + __u32 cmd; + + /** + * @handle: session handle. + * + * Returned by the DRM_PANTHOR_PERF_COMMAND_SETUP call. + * It must be used in subsequent commands for the same context. + */ + __u32 handle; + + /** + * @size: size of the command structure. + * + * If the pointer is NULL, the size is updated by the driver to provide the size of the + * output structure. If the pointer is not NULL, the driver will only copy min(size, + * struct_size) to the pointer and update the size accordingly. + */ + __u64 size; + + /** + * @pointer: user pointer to a command type struct, such as + * @struct drm_panthor_perf_cmd_start. + */ + __u64 pointer; +}; + +/** + * enum drm_panthor_perf_counter_set - The counter set to be requested from the hardware. + * + * The hardware supports a single performance counter set at a time, so requesting any set other + * than the primary may fail if another process is sampling at the same time. + * + * If in doubt, the primary counter set has the most commonly used counters and requires no + * additional permissions to open. + */ +enum drm_panthor_perf_counter_set { + /** + * @DRM_PANTHOR_PERF_SET_PRIMARY: The default set configured on the hardware. + * + * This is the only set for which all counters in all blocks are defined. + */ + DRM_PANTHOR_PERF_SET_PRIMARY, + + /** + * @DRM_PANTHOR_PERF_SET_SECONDARY: The secondary performance counter set. + * + * Some blocks may not have any defined counters for this set, and the block will + * have the UNAVAILABLE block state permanently set in the block header. + * + * Accessing this set requires the calling process to have the CAP_PERFMON capability. + */ + DRM_PANTHOR_PERF_SET_SECONDARY, + + /** + * @DRM_PANTHOR_PERF_SET_TERTIARY: The tertiary performance counter set. + * + * Some blocks may not have any defined counters for this set, and the block will have + * the UNAVAILABLE block state permanently set in the block header. Note that the + * tertiary set has the fewest defined counter blocks. + * + * Accessing this set requires the calling process to have the CAP_PERFMON capability. + */ + DRM_PANTHOR_PERF_SET_TERTIARY, +}; + +/** + * struct drm_panthor_perf_ringbuf_control - Struct used to map in the ring buffer control indices + * into memory shared between user and kernel. + * + */ +struct drm_panthor_perf_ringbuf_control { + /** + * @extract_idx: The index of the latest sample that was processed by userspace. Only + * modifiable by userspace. + */ + __u64 extract_idx; + + /** + * @insert_idx: The index of the latest sample emitted by the kernel. Only modifiable by + * the kernel. + */ + __u64 insert_idx; +}; + +/** + * struct drm_panthor_perf_cmd_setup - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL + * when the DRM_PANTHOR_PERF_COMMAND_SETUP command is specified. + */ +struct drm_panthor_perf_cmd_setup { + /** + * @block_set: Set of performance counter blocks, member of + * enum drm_panthor_perf_block_set. + * + * This is a global configuration and only one set can be active at a time. If + * another client has already requested a counter set, any further requests + * for a different counter set will fail and return an -EBUSY. + * + * If the requested set does not exist, the request will fail and return an -EINVAL. + * + * Some sets have additional requirements to be enabled, and the setup request will + * fail with an -EACCES if these requirements are not satisfied. + */ + __u8 block_set; + + /** @pad: MBZ. */ + __u8 pad[7]; + + /** @fd: eventfd for signalling the availability of a new sample. */ + __u32 fd; + + /** @ringbuf_handle: Handle to the BO to write perf counter sample to. */ + __u32 ringbuf_handle; + + /** + * @control_handle: Handle to the BO containing a contiguous 16 byte range, used for the + * insert and extract indices for the ringbuffer. + */ + __u32 control_handle; + + /** + * @sample_slots: The number of slots available in the userspace-provided BO. Must be + * a power of 2. + * + * If sample_slots * sample_size does not match the BO size, the setup request will fail. + */ + __u32 sample_slots; + + /** + * @control_offset: Offset into the control BO where the insert and extract indices are + * located. + */ + __u64 control_offset; + + /** + * @sample_freq_ns: Period between automatic counter sample collection in nanoseconds. Zero + * disables automatic collection and all collection must be done through explicit calls + * to DRM_PANTHOR_PERF_CONTROL.SAMPLE. Non-zero values will disable manual counter sampling + * via the DRM_PANTHOR_PERF_COMMAND_SAMPLE command. + * + * This disables software-triggered periodic sampling, but hardware will still trigger + * automatic samples on certain events, including shader core power transitions, and + * entries to and exits from non-counting periods. The final stop command will also + * trigger a sample to ensure no data is lost. + */ + __u64 sample_freq_ns; + + /** + * @fw_enable_mask: Bitmask of counters to request from the FW counter block. Any bits + * past the first drm_panthor_perf_info.counters_per_block bits will be ignored. Bit 0 + * corresponds to counter 0. + */ + __u64 fw_enable_mask[2]; + + /** + * @cshw_enable_mask: Bitmask of counters to request from the CSHW counter block. Any bits + * past the first drm_panthor_perf_info.counters_per_block bits will be ignored. Bit 0 + * corresponds to counter 0. + */ + __u64 cshw_enable_mask[2]; + + /** + * @tiler_enable_mask: Bitmask of counters to request from the tiler counter block. Any + * bits past the first drm_panthor_perf_info.counters_per_block bits will be ignored. Bit + * 0 corresponds to counter 0. + */ + __u64 tiler_enable_mask[2]; + + /** + * @memsys_enable_mask: Bitmask of counters to request from the memsys counter blocks. Any + * bits past the first drm_panthor_perf_info.counters_per_block bits will be ignored. Bit 0 + * corresponds to counter 0. + */ + __u64 memsys_enable_mask[2]; + + /** + * @shader_enable_mask: Bitmask of counters to request from the shader core counter blocks. + * Any bits past the first drm_panthor_perf_info.counters_per_block bits will be ignored. + * Bit 0 corresponds to counter 0. + */ + __u64 shader_enable_mask[2]; +}; + +/** + * struct drm_panthor_perf_cmd_start - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL + * when the DRM_PANTHOR_PERF_COMMAND_START command is specified. + */ +struct drm_panthor_perf_cmd_start { + /** + * @user_data: User provided data that will be attached to automatic samples collected + * until the next DRM_PANTHOR_PERF_COMMAND_STOP. + */ + __u64 user_data; +}; + +/** + * struct drm_panthor_perf_cmd_stop - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL + * when the DRM_PANTHOR_PERF_COMMAND_STOP command is specified. + */ +struct drm_panthor_perf_cmd_stop { + /** + * @user_data: User provided data that will be attached to the automatic sample collected + * at the end of this sampling session. + */ + __u64 user_data; +}; + +/** + * struct drm_panthor_perf_cmd_sample - Arguments passed to DRM_PANTHOR_IOCTL_PERF_CONTROL + * when the DRM_PANTHOR_PERF_COMMAND_SAMPLE command is specified. + */ +struct drm_panthor_perf_cmd_sample { + /** @user_data: User provided data that will be attached to the sample.*/ + __u64 user_data; +}; + /** * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number * @__access: Access type. Must be R, W or RW. @@ -1237,6 +1803,8 @@ enum { DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync), DRM_IOCTL_PANTHOR_BO_QUERY_INFO = DRM_IOCTL_PANTHOR(WR, BO_QUERY_INFO, bo_query_info), + DRM_IOCTL_PANTHOR_PERF_CONTROL = + DRM_IOCTL_PANTHOR(WR, PERF_CONTROL, perf_control) }; #if defined(__cplusplus) From 8d7387deea8cb95f878f1909484ba4576e100531 Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Fri, 20 Mar 2026 17:21:00 +0100 Subject: [PATCH 16/38] pan/kmod: Implement panthor kmod perf counter methods Co-Authored-by: Lukas Zapolskas --- src/panfrost/lib/kmod/panthor_kmod.c | 513 +++++++++++++++++++++++++++ 1 file changed, 513 insertions(+) diff --git a/src/panfrost/lib/kmod/panthor_kmod.c b/src/panfrost/lib/kmod/panthor_kmod.c index c25f2315e54..2fb06e6bfd2 100644 --- a/src/panfrost/lib/kmod/panthor_kmod.c +++ b/src/panfrost/lib/kmod/panthor_kmod.c @@ -1,5 +1,6 @@ /* * Copyright © 2023 Collabora, Ltd. + * Copyright © 2026 Arm, Ltd. * SPDX-License-Identifier: MIT */ @@ -7,6 +8,7 @@ #include #include #include +#include #include "util/hash_table.h" #include "util/libsync.h" @@ -20,6 +22,8 @@ #include "drm-uapi/dma-buf.h" #include "drm-uapi/panthor_drm.h" +#include "util/timespec.h" + #include "pan_kmod_backend.h" #include "pan_props.h" @@ -103,6 +107,43 @@ struct panthor_kmod_bo { } sync; }; +struct panthor_kmod_perf_session { + struct pan_kmod_perf_session base; + + struct { + int event; + } fds; + int session_handle; + + struct { + int ringbuf; + int control; + } bos; + + struct { + size_t sample; + size_t block; + size_t ringbuf; + size_t control; + size_t sample_header; + size_t block_header; + } sizes; + + struct { + size_t cshw_blocks; + size_t tiler_blocks; + size_t memsys_blocks; + size_t shader_blocks; + } config; + + bool session_initialized; + bool active; + uint8_t set; + uint64_t sample_idx; + uint8_t *ringbuffer; + struct drm_panthor_perf_ringbuf_control *ctrl; +}; + static uint32_t to_kmod_group_allow_priority_flags(uint32_t panthor_flags) { @@ -1302,6 +1343,472 @@ panthor_kmod_bo_label(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const ch mesa_loge("DRM_IOCTL_PANTHOR_BO_SET_LABEL failed (err=%d)", errno); } +/* ================ PERF COUNTERS ================= */ + +#define PANTHOR_SAMPLE_SLOTS (32) +#define PANTHOR_POLL_TIMEOUT_SEC (10) +#define PTR_TO_U64(ptr) ((uint64_t)(uintptr_t)(ptr)) + +struct panthor_perf_sample { + struct drm_panthor_perf_sample_header sample_header; + uint8_t bytes[]; +}; + +static int +perf_cmd_setup(int fd, int eventfd, int ringbuf_handle, int control_handle, uint8_t set) +{ + struct drm_panthor_perf_cmd_setup setup = { + .fd = eventfd, + .block_set = set, + .ringbuf_handle = ringbuf_handle, + .control_handle = control_handle, + .sample_slots = PANTHOR_SAMPLE_SLOTS, + .cshw_enable_mask = { UINT64_MAX, UINT64_MAX }, + .tiler_enable_mask = { UINT64_MAX, UINT64_MAX }, + .memsys_enable_mask = { UINT64_MAX, UINT64_MAX }, + .shader_enable_mask = { UINT64_MAX, UINT64_MAX }, + }; + + struct drm_panthor_perf_control ctrl = { + .cmd = DRM_PANTHOR_PERF_COMMAND_SETUP, + .size = sizeof(setup), + .pointer = PTR_TO_U64(&setup), + }; + + return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl); +} + +static int +perf_cmd_start(int fd, int sid, uint64_t user_data) +{ + struct drm_panthor_perf_cmd_start start = { + .user_data = user_data, + }; + + struct drm_panthor_perf_control ctrl = { + .cmd = DRM_PANTHOR_PERF_COMMAND_START, + .handle = sid, + .size = sizeof(start), + .pointer = PTR_TO_U64(&start), + }; + + return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl); +} + +static int +perf_cmd_stop(int fd, int sid, uint64_t user_data) +{ + struct drm_panthor_perf_cmd_stop stop = {}; + + struct drm_panthor_perf_control ctrl = { + .cmd = DRM_PANTHOR_PERF_COMMAND_STOP, + .handle = sid, + .size = sizeof(stop), + .pointer = PTR_TO_U64(&stop), + }; + + return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl); +} + +static int +perf_cmd_sample(int fd, int sid, uint64_t user_data) +{ + struct drm_panthor_perf_cmd_sample sample = { + .user_data = user_data, + }; + + struct drm_panthor_perf_control ctrl = { + .cmd = DRM_PANTHOR_PERF_COMMAND_SAMPLE, + .handle = sid, + .size = sizeof(sample), + .pointer = PTR_TO_U64(&sample), + }; + + return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl); +} + +#define DUMMY_PTR ((uint8_t *)1) + +static int +perf_cmd_teardown(int fd, int sid) +{ + struct drm_panthor_perf_control ctrl = { + .cmd = DRM_PANTHOR_PERF_COMMAND_TEARDOWN, + .handle = sid, + }; + + int ret = drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl); + + return ret; +} + +static int +unmap_and_teardown_bo(int fd, int handle, void *addr, size_t size) +{ + if (addr) + munmap(addr, size); + + struct drm_gem_close ringbuf_close = { + .handle = handle, + }; + return drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &ringbuf_close); +} + +static int +create_and_map_bo(int fd, size_t size, int *handle, void **mapping) +{ + struct drm_panthor_bo_create bo = { + .size = size, + }; + int ret = drmIoctl(fd, DRM_IOCTL_PANTHOR_BO_CREATE, &bo); + if (ret) + return -EINVAL; + + struct drm_panthor_bo_mmap_offset offset = { + .handle = bo.handle, + }; + ret = drmIoctl(fd, DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET, &offset); + if (ret) + goto term_bo; + + void *map = mmap(0, bo.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, (off_t)offset.offset); + if (!map || map == MAP_FAILED) { + ret = -EINVAL; + goto term_bo; + } + + *handle = bo.handle; + *mapping = map; + + return 0; +term_bo: + return unmap_and_teardown_bo(fd, bo.handle, NULL, 0); +} + +static int +poll_for_sample(int poll_fd) +{ + int ret; + eventfd_t tmp; + struct pollfd pfd[1] = { + { + .fd = poll_fd, + .events = POLLIN + } + }; + struct timespec timeout = { + .tv_sec = PANTHOR_POLL_TIMEOUT_SEC, + }; + struct timespec now, result, deadline; + + clock_gettime(CLOCK_MONOTONIC, &now); + timespec_add(&deadline, &now, &timeout); + + do { + clock_gettime(CLOCK_MONOTONIC, &now); + timespec_sub_saturate(&result, &deadline, &now); + ret = ppoll(pfd, 1, &result, NULL); + } while (ret == -1 && errno == EINTR); + + if (ret < 0) + return ret; + + return eventfd_read(poll_fd, &tmp); +} + +static uint64_t +read_extract_idx(struct panthor_kmod_perf_session *perf) +{ + return p_atomic_read(&perf->ctrl->extract_idx); +} + +static void +write_extract_idx(struct panthor_kmod_perf_session *perf, uint64_t idx) +{ + p_atomic_set(&perf->ctrl->extract_idx, idx); +} + +static uint64_t +read_insert_idx(struct panthor_kmod_perf_session *perf) +{ + return p_atomic_read(&perf->ctrl->insert_idx); +} + +static inline struct pan_kmod_perf_session * +panthor_kmod_perf_init(struct pan_kmod_dev *dev) +{ + UNUSED struct panthor_kmod_dev *panthor_dev = + container_of(dev, struct panthor_kmod_dev, base); + + struct panthor_kmod_perf_session *sess = + pan_kmod_dev_alloc(dev, sizeof(*sess)); + if (!sess) { + mesa_loge("failed to allocate a panthor_kmod_perf_session object"); + return NULL; + } + + sess->base.dev = dev; + + struct drm_panthor_gpu_info gpu_info = {}; + struct drm_panthor_dev_query query = { + .type = DRM_PANTHOR_DEV_QUERY_GPU_INFO, + .size = sizeof(gpu_info), + .pointer = (uint64_t)(uintptr_t)&gpu_info, + }; + + int ret = drmIoctl(dev->fd, DRM_IOCTL_PANTHOR_DEV_QUERY, &query); + if (ret) + goto free_perf; + + struct drm_panthor_perf_info perf_info = {}; + + query = (struct drm_panthor_dev_query) { + .type = DRM_PANTHOR_DEV_QUERY_PERF_INFO, + .size = sizeof(perf_info), + .pointer = (uint64_t)(uintptr_t)&perf_info, + }; + + ret = drmIoctl(dev->fd, DRM_IOCTL_PANTHOR_DEV_QUERY, &query); + if (ret) + goto free_perf; + + sess->fds.event = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE); + if (!sess->fds.event) + goto free_perf; + + const size_t block_size = perf_info.counters_per_block * sizeof(uint64_t) + + perf_info.block_header_size; + const size_t sample_size = perf_info.sample_size; + const size_t buffer_size = sample_size * PANTHOR_SAMPLE_SLOTS; + + sess->sizes.block = block_size; + sess->sizes.sample = sample_size; + sess->sizes.ringbuf = buffer_size; + sess->sizes.control = sizeof(*sess->ctrl); + sess->sizes.sample_header = perf_info.sample_header_size; + sess->sizes.block_header = perf_info.block_header_size; + + if (sess->sizes.sample_header != sizeof(struct drm_panthor_perf_sample_header)) + fprintf(stderr, "panfrost perf sample header size mismatch!"); + + if (sess->sizes.block_header != sizeof(struct drm_panthor_perf_block_header)) + fprintf(stderr, "panfrost perf block header size mismatch!"); + + sess->config.cshw_blocks = perf_info.cshw_blocks; + sess->config.tiler_blocks = perf_info.tiler_blocks; + sess->config.memsys_blocks = perf_info.memsys_blocks; + sess->config.shader_blocks = perf_info.shader_blocks; + + void *buf_map; + ret = create_and_map_bo(dev->fd, sess->sizes.ringbuf, &sess->bos.ringbuf, &buf_map); + if (ret) + goto free_eventfd; + + sess->ringbuffer = buf_map; + sess->base.data = buf_map; + sess->base.data_ts_supported = true; + + void *control_map; + ret = create_and_map_bo(dev->fd, sess->sizes.control, &sess->bos.control, &control_map); + if (ret) + goto free_ringbuf; + + sess->ctrl = (struct drm_panthor_perf_ringbuf_control *)control_map; + + sess->set = 0; /* TODO should we make it configurable? */ + sess->active = false; + sess->session_initialized = false; + + return &(sess->base); + +free_ringbuf: + unmap_and_teardown_bo(dev->fd, sess->bos.ringbuf, buf_map, sess->sizes.ringbuf); +free_eventfd: + close(sess->fds.event); +free_perf: + ralloc_free(sess); + return NULL; +} + +static int +panthor_kmod_perf_enable(struct pan_kmod_perf_session *session) +{ + UNUSED struct panthor_kmod_perf_session *psess = + container_of(session, struct panthor_kmod_perf_session, base); + + /* The session cannot be created outside of the sampling thread. */ + if (!psess->session_initialized) { + int session_handle = perf_cmd_setup(psess->base.dev->fd, psess->fds.event, psess->bos.ringbuf, + psess->bos.control, psess->set); + + if (session_handle < 0) + return -EINVAL; + + psess->session_handle = session_handle; + psess->session_initialized = true; + } + + int ret = perf_cmd_start(psess->base.dev->fd, psess->session_handle, psess->sample_idx++); + if (ret) + return ret; + + psess->active = true; + + return 0; +} + +static int +panthor_kmod_perf_disable(struct pan_kmod_perf_session *session) +{ + UNUSED struct panthor_kmod_perf_session *sess = + container_of(session, struct panthor_kmod_perf_session, base); + + int ret = perf_cmd_stop(sess->base.dev->fd, sess->session_handle, sess->sample_idx++); + if (ret) + return ret; + + sess->active = false; + + ret = poll_for_sample(sess->fds.event); + if (ret) + return ret; + + return 0; +} + +static int +panthor_perf_sample(struct panthor_kmod_perf_session *perf) +{ + const uint64_t insert_idx = read_insert_idx(perf); + const uint64_t extract_idx = read_extract_idx(perf); + + // If there's an outstanding sample, discard it + if (insert_idx != extract_idx) + write_extract_idx(perf, insert_idx); + + // Otherwise, request a new sample which will increment the insert idx + int ret = perf_cmd_sample(perf->base.dev->fd, perf->session_handle, perf->sample_idx++); + if (ret) + return ret; + + ret = poll_for_sample(perf->fds.event); + if (ret) + return ret; + + return 0; +} + +static uint8_t *get_base_addr(uint8_t *buf, size_t idx, size_t stride) +{ + return buf + idx * stride; +} + +static inline struct panthor_perf_sample *perf_sample_idx(struct panthor_kmod_perf_session *perf, uint64_t idx) +{ + return (struct panthor_perf_sample *)get_base_addr(perf->ringbuffer, idx, perf->sizes.sample); +} + +static uint64_t +panthor_perf_get_sample_timestamp(struct panthor_kmod_perf_session *perf) +{ + const uint64_t extract_idx = read_extract_idx(perf); + const struct panthor_perf_sample *sample = perf_sample_idx(perf, extract_idx); + + return sample->sample_header.timestamp_end_ns; +} + +static int +panthor_kmod_perf_dump(struct pan_kmod_perf_session *session) +{ + UNUSED struct panthor_kmod_perf_session *psess = + container_of(session, struct panthor_kmod_perf_session, base); + + int ret = panthor_perf_sample(psess); + if (ret) + return ret; + + /* Update data pointer to the correct spot in the ringbuffer. */ + session->data = perf_sample_idx(psess, read_extract_idx(psess)); + session->data_ts = panthor_perf_get_sample_timestamp(psess); + + return 0; +} + +static void +panthor_kmod_perf_query_layout(const struct pan_kmod_perf_session *session, + struct pan_kmod_perf_buffer_layout *layout) +{ + UNUSED struct panthor_kmod_perf_session *psess = + container_of(session, struct panthor_kmod_perf_session, base); + + /* On all Valhall architectures this is 128. */ + const unsigned counters_per_cat = 128; + layout->counters_per_category = counters_per_cat; + + layout->block_stride = psess->sizes.block; + layout->counter_stride = sizeof(uint64_t); + + /* Setup the layout */ + layout->category[PAN_KMOD_PERF_CAT_FRONTEND].n_blocks = psess->config.cshw_blocks; + layout->category[PAN_KMOD_PERF_CAT_TILER].n_blocks = psess->config.tiler_blocks; + layout->category[PAN_KMOD_PERF_CAT_MEMSYS].n_blocks = psess->config.memsys_blocks; + layout->category[PAN_KMOD_PERF_CAT_SHADER].n_blocks = psess->config.shader_blocks; + + layout->category[0].offset = + psess->sizes.sample_header + psess->sizes.block_header; + for (unsigned cat_idx = 1; cat_idx < PAN_KMOD_PERF_CAT_COUNT; ++cat_idx) { + layout->category[cat_idx].offset = + layout->category[cat_idx - 1].offset + + layout->category[cat_idx - 1].n_blocks * layout->block_stride; + } +} + +static int +panthor_perf_stop(struct panthor_kmod_perf_session *perf) +{ + int ret = perf_cmd_stop(perf->base.dev->fd, perf->session_handle, perf->sample_idx++); + if (ret) + return ret; + + perf->active = false; + + ret = poll_for_sample(perf->fds.event); + if (ret) + return ret; + + return 0; +} + +static void +panthor_kmod_perf_destroy(struct pan_kmod_perf_session *session) +{ + UNUSED struct panthor_kmod_perf_session *psess = + container_of(session, struct panthor_kmod_perf_session, base); + + int ret; + + if (psess->active) { + ret = panthor_perf_stop(psess); + assert(ret == 0); + } + + ret = perf_cmd_teardown(psess->base.dev->fd, psess->session_handle); + assert(ret == 0); + + ret = unmap_and_teardown_bo(psess->base.dev->fd, psess->bos.ringbuf, psess->ringbuffer, psess->sizes.ringbuf); + assert(ret == 0); + + ret = unmap_and_teardown_bo(psess->base.dev->fd, psess->bos.control, psess->ctrl, psess->sizes.control); + assert(ret == 0); + + close(psess->fds.event); + ralloc_free(psess); + + pan_kmod_dev_free(session->dev, session); + + mesa_logd("perf session destroyed"); +} + const struct pan_kmod_ops panthor_kmod_ops = { .dev_create = panthor_kmod_dev_create, .dev_destroy = panthor_kmod_dev_destroy, @@ -1319,4 +1826,10 @@ const struct pan_kmod_ops panthor_kmod_ops = { .vm_query_state = panthor_kmod_vm_query_state, .query_timestamp = panthor_kmod_query_timestamp, .bo_set_label = panthor_kmod_bo_label, + .perf_create = panthor_kmod_perf_init, + .perf_enable = panthor_kmod_perf_enable, + .perf_disable = panthor_kmod_perf_disable, + .perf_dump = panthor_kmod_perf_dump, + .perf_query_layout = panthor_kmod_perf_query_layout, + .perf_destroy = panthor_kmod_perf_destroy, }; From 8141fd7342e85598bc65c49f7ab48e24d12b9f4d Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Tue, 24 Mar 2026 13:09:44 +0100 Subject: [PATCH 17/38] pan/perf: Remove kmod version restriction Both kmod backends implement the perf interface now. --- src/panfrost/perf/pan_perf.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/panfrost/perf/pan_perf.c b/src/panfrost/perf/pan_perf.c index 1dc940b90b0..4e3db68db54 100644 --- a/src/panfrost/perf/pan_perf.c +++ b/src/panfrost/perf/pan_perf.c @@ -67,13 +67,6 @@ pan_lookup_counters(const char *name) void pan_perf_init(struct pan_perf *perf, int fd) { - ASSERTED drmVersionPtr version = drmGetVersion(fd); - - /* We only support panfrost at the moment. */ - assert(version && !strcmp(version->name, "panfrost")); - - drmFreeVersion(version); - perf->dev = pan_kmod_dev_create(fd, 0, NULL); assert(perf->dev); From 51992c8556f5fbfd3b9fc676f5ac654f4823fb33 Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Fri, 20 Mar 2026 12:47:19 +0100 Subject: [PATCH 18/38] pan/pps: Delegate more tasks to PanfrostPerf This makes it so that everything that uses the pan_perf C lib is hidden inside PanfrostPerf instead of being used directly from the pps driver. Co-Authored-by: Lukas Zapolskas --- src/panfrost/ds/pan_pps_driver.cpp | 63 ++++------------- src/panfrost/ds/pan_pps_perf.cpp | 110 ++++++++++++++++++++++++++--- src/panfrost/ds/pan_pps_perf.h | 30 ++++++-- 3 files changed, 137 insertions(+), 66 deletions(-) diff --git a/src/panfrost/ds/pan_pps_driver.cpp b/src/panfrost/ds/pan_pps_driver.cpp index f5982a0cba6..aa752777611 100644 --- a/src/panfrost/ds/pan_pps_driver.cpp +++ b/src/panfrost/ds/pan_pps_driver.cpp @@ -33,46 +33,13 @@ PanfrostDriver::~PanfrostDriver() uint64_t PanfrostDriver::get_min_sampling_period_ns() { - return 1000000; + return perf->get_min_sampling_period_ns(); } std::pair, std::vector> PanfrostDriver::create_available_counters(const PanfrostPerf &perf) { - std::pair, std::vector> ret; - auto &[groups, counters] = ret; - - size_t cid = 0; - - for (uint32_t gid = 0; gid < perf.perf->cfg->n_categories; ++gid) { - const auto &category = perf.perf->cfg->categories[gid]; - CounterGroup group = {}; - group.id = gid; - group.name = category.name; - - for (size_t id = 0; id < category.n_counters; ++id) { - Counter counter = {}; - counter.id = cid; - counter.group = gid; - - counter.name = category.counters[id].name; - - counter.set_getter([=](const Counter &c, const Driver &d) { - auto &pan_driver = PanfrostDriver::into(d); - struct pan_perf *perf = pan_driver.perf->perf; - const auto counter = &perf->cfg->categories[gid].counters[id]; - return int64_t(pan_perf_counter_read(counter, perf)); - }); - - group.counters.push_back(cid++); - - counters.emplace_back(counter); - } - - groups.push_back(group); - } - - return ret; + return perf.create_available_counters(); } bool @@ -81,9 +48,12 @@ PanfrostDriver::init_perfcnt() if (!dev) { dev = std::make_unique(drm_device.fd); } + if (!perf) { perf = std::make_unique(*dev); } + + perf->init_perfcnt(drm_device.fd); if (groups.empty() && counters.empty()) { std::tie(groups, counters) = create_available_counters(*perf); } @@ -106,9 +76,9 @@ PanfrostDriver::enable_all_counters() } void -PanfrostDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */) +PanfrostDriver::enable_perfcnt(const uint64_t sampling_period_ns) { - auto res = perf->enable(); + auto res = perf->enable_perfcnt(sampling_period_ns); if (!check(res, "Failed to enable performance counters")) { if (res == -ENOSYS) { PERFETTO_FATAL( @@ -121,10 +91,8 @@ PanfrostDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */) bool PanfrostDriver::dump_perfcnt() { - last_dump_ts = perfetto::base::GetBootTimeNs().count(); - // Dump performance counters to buffer - if (!check(perf->dump(), "Failed to dump performance counters")) { + if (!check(perf->dump_perfcnt(), "Failed to dump performance counters")) { PERFETTO_ELOG("Skipping sample"); return false; } @@ -135,15 +103,13 @@ PanfrostDriver::dump_perfcnt() uint64_t PanfrostDriver::next() { - auto ret = last_dump_ts; - last_dump_ts = 0; - return ret; + return perf->next(); } void PanfrostDriver::disable_perfcnt() { - perf->disable(); + perf->disable_perfcnt(); perf.reset(); dev.reset(); groups.clear(); @@ -154,20 +120,19 @@ PanfrostDriver::disable_perfcnt() uint32_t PanfrostDriver::gpu_clock_id() const { - return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME; + return perf->gpu_clock_id(); } uint64_t PanfrostDriver::gpu_timestamp() const { - return perfetto::base::GetBootTimeNs().count(); + return perf->gpu_timestamp(); } bool -PanfrostDriver::cpu_gpu_timestamp(uint64_t &, uint64_t &) const +PanfrostDriver::cpu_gpu_timestamp(uint64_t &cpu_timestamp, uint64_t &gpu_timestamp) const { - /* Not supported */ - return false; + return perf->cpu_gpu_timestamp(cpu_timestamp, gpu_timestamp); } } // namespace pps diff --git a/src/panfrost/ds/pan_pps_perf.cpp b/src/panfrost/ds/pan_pps_perf.cpp index f801779c18e..2f6ccbf0217 100644 --- a/src/panfrost/ds/pan_pps_perf.cpp +++ b/src/panfrost/ds/pan_pps_perf.cpp @@ -4,6 +4,7 @@ */ #include "pan_pps_perf.h" +#include "pan_pps_driver.h" #include #include @@ -12,14 +13,12 @@ #include namespace pps { - PanfrostPerf::PanfrostPerf(const PanfrostDevice &dev) - : perf{reinterpret_cast( - rzalloc(nullptr, struct pan_perf))} { + perf = reinterpret_cast( + rzalloc(nullptr, struct pan_perf)); assert(perf); assert(dev.fd >= 0); - pan_perf_init(perf, dev.fd); } PanfrostPerf::~PanfrostPerf() @@ -31,7 +30,7 @@ PanfrostPerf::~PanfrostPerf() } } -PanfrostPerf::PanfrostPerf(PanfrostPerf &&o): perf{o.perf} +PanfrostPerf::PanfrostPerf(PanfrostPerf &&o): perf(o.perf) { o.perf = nullptr; } @@ -43,25 +42,116 @@ PanfrostPerf::operator=(PanfrostPerf &&o) return *this; } +bool +PanfrostPerf::init_perfcnt(int fd) +{ + pan_perf_init(perf, fd); + + return perf != NULL; +} + int -PanfrostPerf::enable() const +PanfrostPerf::enable_perfcnt(uint64_t /* sampling_period_ns */) { assert(perf); return pan_perf_enable(perf); } void -PanfrostPerf::disable() const +PanfrostPerf::disable_perfcnt() { assert(perf); pan_perf_disable(perf); } -int -PanfrostPerf::dump() const +bool +PanfrostPerf::dump_perfcnt() { assert(perf); - return pan_perf_dump(perf); + last_dump_ts = perfetto::base::GetBootTimeNs().count(); + + int ret = pan_perf_dump(perf); + + return !!(ret >= 0); +} + +uint64_t +PanfrostPerf::get_min_sampling_period_ns() +{ + return 1000000; +} + +void * +PanfrostPerf::get_subinstance() { + return perf; +} + +std::pair, std::vector> +PanfrostPerf::create_available_counters() const +{ + std::pair, std::vector> ret; + auto &[groups, counters] = ret; + + size_t cid = 0; + + for (uint32_t gid = 0; gid < perf->cfg->n_categories; ++gid) { + const auto &category = perf->cfg->categories[gid]; + CounterGroup group = {}; + group.id = gid; + group.name = category.name; + + for (size_t id = 0; cid < category.n_counters; ++cid) { + Counter counter = {}; + counter.id = cid; + counter.group = gid; + + counter.name = category.counters[id].name; + + counter.set_getter([=](const Counter &c, const Driver &d) { + auto &pan_driver = PanfrostDriver::into(d); + struct pan_perf *perf = static_cast( + pan_driver.perf->get_subinstance()); + const auto counter = + &perf->cfg->categories[gid].counters[id]; + return int64_t(pan_perf_counter_read(counter, perf)); + }); + + group.counters.push_back(cid++); + + counters.emplace_back(counter); + } + + groups.push_back(group); + } + + return ret; +} + +uint64_t +PanfrostPerf::next() +{ + auto ret = last_dump_ts; + last_dump_ts = 0; + return ret; +} + +uint32_t +PanfrostPerf::gpu_clock_id() const +{ + return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME; +} + +uint64_t +PanfrostPerf::gpu_timestamp() const +{ + return perfetto::base::GetBootTimeNs().count(); +} + +bool +PanfrostPerf::cpu_gpu_timestamp(uint64_t &, uint64_t &) const +{ + /* Not supported */ + return false; } } // namespace pps diff --git a/src/panfrost/ds/pan_pps_perf.h b/src/panfrost/ds/pan_pps_perf.h index 742f1aa8f44..b07df750ccb 100644 --- a/src/panfrost/ds/pan_pps_perf.h +++ b/src/panfrost/ds/pan_pps_perf.h @@ -5,6 +5,12 @@ #pragma once +#include +#include +#include +#include +#include + struct pan_perf; namespace pps { @@ -27,17 +33,27 @@ class PanfrostPerf { PanfrostPerf(const PanfrostDevice &dev); ~PanfrostPerf(); - PanfrostPerf(const PanfrostPerf &) = delete; - PanfrostPerf &operator=(const PanfrostPerf &) = delete; + PanfrostPerf(PanfrostPerf &&o); + PanfrostPerf &operator=(PanfrostPerf &&o); - PanfrostPerf(PanfrostPerf &&); - PanfrostPerf &operator=(PanfrostPerf &&); + std::pair, std::vector> + create_available_counters() const; - int enable() const; - void disable() const; - int dump() const; + uint64_t get_min_sampling_period_ns(); + bool init_perfcnt(int fd); + int enable_perfcnt(uint64_t sampling_period_ns); + void disable_perfcnt(); + bool dump_perfcnt(); + uint64_t next(); + uint32_t gpu_clock_id() const; + uint64_t gpu_timestamp() const; + bool cpu_gpu_timestamp(uint64_t &cpu_timestamp, + uint64_t &gpu_timestamp) const; + void *get_subinstance(); + private: struct pan_perf *perf = nullptr; + uint64_t last_dump_ts = 0; }; } // namespace pps From 2e6a34bf0e8560a0e4b64b90e730e405d00c13aa Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Fri, 20 Mar 2026 11:03:17 +0100 Subject: [PATCH 19/38] pan/pps: Output counters per block Instead of summing counters from shader cores, and outputting only the counters from the first l2 slice, use the memory layout provided from the kmod to output individual counters for each (category, block, counter) combination. Co-Authored-by: Lukas Zapolskas --- src/panfrost/ds/pan_pps_perf.cpp | 98 +++++++++++++++++++++++++------- 1 file changed, 78 insertions(+), 20 deletions(-) diff --git a/src/panfrost/ds/pan_pps_perf.cpp b/src/panfrost/ds/pan_pps_perf.cpp index 2f6ccbf0217..2a13a3a0088 100644 --- a/src/panfrost/ds/pan_pps_perf.cpp +++ b/src/panfrost/ds/pan_pps_perf.cpp @@ -1,5 +1,6 @@ /* * Copyright © 2021 Collabora, Ltd. + * Copyright © 2026 Arm, Ltd. * SPDX-License-Identifier: MIT */ @@ -86,39 +87,96 @@ PanfrostPerf::get_subinstance() { return perf; } +std::string +format_suffix(const char *fmt, uint8_t idx) +{ + assert(strlen(fmt) < 200 && "fmt unreasonably long"); + char buf[256]; + std::snprintf(buf, sizeof(buf), fmt, idx); + + return std::string(buf); +} + +const char * +get_block_suffix(uint8_t category) +{ + assert(category <= PAN_PERF_COUNTER_CAT_MAX); + + switch (category) { + case PAN_PERF_COUNTER_CAT_MEMSYS: + return " (slice %u)"; + case PAN_PERF_COUNTER_CAT_SHADER: + return " (core %u)"; + default: + return nullptr; + } + + return nullptr; +} + +Counter::Units +convert_pan_units(enum pan_perf_counter_units unit) +{ + switch (unit) { + case PAN_PERF_COUNTER_UNITS_PRIMITIVES: + return Counter::Units::Primitive; + case PAN_PERF_COUNTER_UNITS_INSTRUCTIONS: + return Counter::Units::Instruction; + case PAN_PERF_COUNTER_UNITS_BYTES: + return Counter::Units::Byte; + case PAN_PERF_COUNTER_UNITS_PIXELS: + return Counter::Units::Pixel; + default: + return Counter::Units::None; + } +} + std::pair, std::vector> PanfrostPerf::create_available_counters() const { std::pair, std::vector> ret; auto &[groups, counters] = ret; - size_t cid = 0; + uint32_t global_counter_id = 0; + + const struct pan_perf_category *category = NULL; + for (uint32_t cat_idx = 0; cat_idx < perf->cfg->n_categories; ++cat_idx) { + assert(cat_idx < PAN_PERF_COUNTER_CAT_MAX); + category = &perf->cfg->categories[cat_idx]; - for (uint32_t gid = 0; gid < perf->cfg->n_categories; ++gid) { - const auto &category = perf->cfg->categories[gid]; CounterGroup group = {}; - group.id = gid; - group.name = category.name; + group.id = cat_idx; + group.name = category->name; - for (size_t id = 0; cid < category.n_counters; ++cid) { - Counter counter = {}; - counter.id = cid; - counter.group = gid; + uint32_t n_blocks = perf->mem_layout.category[cat_idx].n_blocks; + for (uint32_t counter_idx = 0; counter_idx < category->n_counters; + ++counter_idx) { + const struct pan_perf_counter *cinfo = + &category->counters[counter_idx]; - counter.name = category.counters[id].name; + for (uint32_t block_idx = 0; block_idx < n_blocks; ++block_idx) { + const char *suffix = get_block_suffix(cat_idx); + const std::string name = + cinfo->name + (suffix ? format_suffix(suffix, block_idx) : ""); - counter.set_getter([=](const Counter &c, const Driver &d) { - auto &pan_driver = PanfrostDriver::into(d); - struct pan_perf *perf = static_cast( + Counter counter = {}; + counter.id = global_counter_id++; + counter.name = name; + counter.group = group.id; + counter.units = convert_pan_units(cinfo->units); + + counter.set_getter([=](const Counter &c, const Driver &d) { + auto &pan_driver = PanfrostDriver::into(d); + struct pan_perf *perf = static_cast( pan_driver.perf->get_subinstance()); - const auto counter = - &perf->cfg->categories[gid].counters[id]; - return int64_t(pan_perf_counter_read(counter, perf)); - }); + return pan_perf_counter_read_raw( + perf, (enum pan_perf_counter_categories)cat_idx, block_idx, + cinfo->offset); + }); - group.counters.push_back(cid++); - - counters.emplace_back(counter); + group.counters.push_back(counter.id); + counters.emplace_back(counter); + } } groups.push_back(group); From 1afc465345ce80534a00e33048c7fa1a1a6a6fb1 Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Fri, 20 Mar 2026 11:40:39 +0100 Subject: [PATCH 20/38] pan/perf: Add timing related getters These will be used by the pps-producer. --- src/panfrost/perf/pan_perf.c | 21 +++++++++++++++++++++ src/panfrost/perf/pan_perf.h | 6 ++++++ 2 files changed, 27 insertions(+) diff --git a/src/panfrost/perf/pan_perf.c b/src/panfrost/perf/pan_perf.c index 4e3db68db54..a6010c88cb8 100644 --- a/src/panfrost/perf/pan_perf.c +++ b/src/panfrost/perf/pan_perf.c @@ -111,3 +111,24 @@ pan_perf_dump(struct pan_perf *perf) { return pan_kmod_perf_dump(perf->session); } + +uint64_t +pan_perf_get_timestamp(const struct pan_perf *perf) +{ + return perf->session->data_ts; +} + +bool +pan_perf_timestamp_supported(const struct pan_perf *perf) +{ + return perf->session->data_ts_supported; +} + +uint64_t +pan_perf_get_min_sampling_period(const struct pan_perf *perf) +{ + if (pan_arch(perf->dev->props.gpu_id) < 10) + return 1000000; + else + return 500000; +} diff --git a/src/panfrost/perf/pan_perf.h b/src/panfrost/perf/pan_perf.h index 7cbc4a9a20c..752a0db1a1d 100644 --- a/src/panfrost/perf/pan_perf.h +++ b/src/panfrost/perf/pan_perf.h @@ -95,6 +95,12 @@ void pan_perf_finish(struct pan_perf *perf); int pan_perf_dump(struct pan_perf *perf); +uint64_t pan_perf_get_timestamp(const struct pan_perf *perf); + +bool pan_perf_timestamp_supported(const struct pan_perf *perf); + +uint64_t pan_perf_get_min_sampling_period(const struct pan_perf *perf); + #if defined(__cplusplus) } // extern "C" #endif From fd5b441d35972c4cb6e92bcc9610632ca4419b2a Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Fri, 20 Mar 2026 11:42:29 +0100 Subject: [PATCH 21/38] pan/pps: Generalize timing related settings --- src/panfrost/ds/pan_pps_perf.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/panfrost/ds/pan_pps_perf.cpp b/src/panfrost/ds/pan_pps_perf.cpp index 2a13a3a0088..cb49e0b5f03 100644 --- a/src/panfrost/ds/pan_pps_perf.cpp +++ b/src/panfrost/ds/pan_pps_perf.cpp @@ -69,17 +69,22 @@ bool PanfrostPerf::dump_perfcnt() { assert(perf); - last_dump_ts = perfetto::base::GetBootTimeNs().count(); int ret = pan_perf_dump(perf); + if (pan_perf_timestamp_supported(perf)) + last_dump_ts = pan_perf_get_timestamp(perf); + else + last_dump_ts = perfetto::base::GetBootTimeNs().count(); + return !!(ret >= 0); } uint64_t PanfrostPerf::get_min_sampling_period_ns() { - return 1000000; + assert(perf); + return pan_perf_get_min_sampling_period(perf); } void * @@ -196,19 +201,24 @@ PanfrostPerf::next() uint32_t PanfrostPerf::gpu_clock_id() const { - return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME; + assert(perf); + if (pan_perf_timestamp_supported(perf)) + return perfetto::protos::pbzero::BUILTIN_CLOCK_MONOTONIC_RAW; + else + return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME; } uint64_t PanfrostPerf::gpu_timestamp() const { + // TODO (panthor) This information is present in the dump return perfetto::base::GetBootTimeNs().count(); } bool PanfrostPerf::cpu_gpu_timestamp(uint64_t &, uint64_t &) const { - /* Not supported */ + // TODO (panthor) Start using the appropriate IOCTL to get these values return false; } From 36cd1972035d9acf1c9ddb5ba144f04f3f7bfb6a Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Fri, 20 Mar 2026 14:44:14 +0100 Subject: [PATCH 22/38] pan/perf: Make sure category indices match --- src/panfrost/perf/pan_perf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/panfrost/perf/pan_perf.c b/src/panfrost/perf/pan_perf.c index a6010c88cb8..ddf0f627623 100644 --- a/src/panfrost/perf/pan_perf.c +++ b/src/panfrost/perf/pan_perf.c @@ -22,6 +22,11 @@ pan_perf_counter_read_raw(const struct pan_perf *perf, enum pan_perf_counter_categories category, uint8_t block_index, uint32_t counter_index) { + STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_FRONTEND == (int)PAN_PERF_COUNTER_CAT_FRONTEND); + STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_TILER == (int)PAN_PERF_COUNTER_CAT_TILER); + STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_MEMSYS == (int)PAN_PERF_COUNTER_CAT_MEMSYS); + STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_SHADER == (int)PAN_PERF_COUNTER_CAT_SHADER); + assert(perf->session->data != NULL); const uint32_t val_offset = perf->mem_layout.category[category].offset + From c7c0875500ee09e2d4f559f41ecab9180889551e Mon Sep 17 00:00:00 2001 From: Lukas Zapolskas Date: Fri, 5 Sep 2025 11:24:53 +0100 Subject: [PATCH 23/38] pps: Add panthor to available driver list Signed-off-by: Lukas Zapolskas Suggested-by: Lars-Ivar Hesselberg Simonsen --- src/tool/pps/pps_driver.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tool/pps/pps_driver.cc b/src/tool/pps/pps_driver.cc index 4eb3a17aa7c..5ef155633c2 100644 --- a/src/tool/pps/pps_driver.cc +++ b/src/tool/pps/pps_driver.cc @@ -49,6 +49,7 @@ std::unordered_map> create_supported_driver #ifdef PPS_PANFROST map.emplace("panfrost", std::make_unique()); + map.emplace("panthor", std::make_unique()); #endif // PPS_PANFROST #ifdef PPS_V3D From 903a366173ae3a739e4b6b3a2441255e0e3f60c0 Mon Sep 17 00:00:00 2001 From: Lukas Zapolskas Date: Fri, 5 Sep 2025 11:25:21 +0100 Subject: [PATCH 24/38] pps: Add panthor counters to Perfetto config files v2: - Lower the counter_period_ns to match the minimum value in code Signed-off-by: Lukas Zapolskas Suggested-by: Lars-Ivar Hesselberg Simonsen --- src/tool/pps/cfg/gpu.cfg | 9 +++++++++ src/tool/pps/cfg/system.cfg | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/src/tool/pps/cfg/gpu.cfg b/src/tool/pps/cfg/gpu.cfg index 2a6578375d8..e33007cd030 100644 --- a/src/tool/pps/cfg/gpu.cfg +++ b/src/tool/pps/cfg/gpu.cfg @@ -21,6 +21,15 @@ data_sources { } } +data_sources { + config { + name: "gpu.counters.panthor" + gpu_counter_config { + counter_period_ns: 500000 + } + } +} + data_sources { config { name: "gpu.counters.v3d" diff --git a/src/tool/pps/cfg/system.cfg b/src/tool/pps/cfg/system.cfg index 2e6ce480bb2..eebc5b3b9e7 100644 --- a/src/tool/pps/cfg/system.cfg +++ b/src/tool/pps/cfg/system.cfg @@ -73,6 +73,15 @@ data_sources { } } +data_sources { + config { + name: "gpu.counters.panthor" + gpu_counter_config { + counter_period_ns: 500000 + } + } +} + data_sources { config { name: "gpu.counters.v3d" From 6cde1e340325e18403dfbd38b5543c10874372ea Mon Sep 17 00:00:00 2001 From: Lukas Zapolskas Date: Fri, 5 Sep 2025 09:47:21 +0100 Subject: [PATCH 25/38] docs: Add Panthor to the Perfetto documentation as a data source Signed-off-by: Lukas Zapolskas --- docs/perfetto.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/perfetto.rst b/docs/perfetto.rst index b65ffb99951..04410e69afe 100644 --- a/docs/perfetto.rst +++ b/docs/perfetto.rst @@ -220,6 +220,16 @@ To run the producer, follow these two simple steps: ./build/pps-producer +Panthor +^^^^^^^ + +The Panthor PPS driver uses stable IOCTLs and don't need any special privileges to enable. The +data source can be configured by running the producer : + +.. code-block:: sh + + ./build/pps-producer + V3D / V3DV ^^^^^^^^^^ From 77f46dfb5ab931b304e4519c047754280ce40cbe Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Thu, 9 Apr 2026 11:29:04 +0200 Subject: [PATCH 26/38] pan/perf: Pass counter info to raw read function When a counter is derived, it can't be read based on just the offsets into the buffer. To prepare for that, pass the info struct instead. --- src/panfrost/ds/pan_pps_perf.cpp | 4 +--- src/panfrost/perf/pan_perf.c | 18 ++++++++---------- src/panfrost/perf/pan_perf.h | 4 ++-- 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/src/panfrost/ds/pan_pps_perf.cpp b/src/panfrost/ds/pan_pps_perf.cpp index cb49e0b5f03..4211ff2beae 100644 --- a/src/panfrost/ds/pan_pps_perf.cpp +++ b/src/panfrost/ds/pan_pps_perf.cpp @@ -174,9 +174,7 @@ PanfrostPerf::create_available_counters() const auto &pan_driver = PanfrostDriver::into(d); struct pan_perf *perf = static_cast( pan_driver.perf->get_subinstance()); - return pan_perf_counter_read_raw( - perf, (enum pan_perf_counter_categories)cat_idx, block_idx, - cinfo->offset); + return pan_perf_counter_read_raw(perf, cinfo, block_idx); }); group.counters.push_back(counter.id); diff --git a/src/panfrost/perf/pan_perf.c b/src/panfrost/perf/pan_perf.c index ddf0f627623..aa53c0993cd 100644 --- a/src/panfrost/perf/pan_perf.c +++ b/src/panfrost/perf/pan_perf.c @@ -19,8 +19,7 @@ int64_t pan_perf_counter_read_raw(const struct pan_perf *perf, - enum pan_perf_counter_categories category, - uint8_t block_index, uint32_t counter_index) + const struct pan_perf_counter *counter, uint8_t block) { STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_FRONTEND == (int)PAN_PERF_COUNTER_CAT_FRONTEND); STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_TILER == (int)PAN_PERF_COUNTER_CAT_TILER); @@ -29,11 +28,12 @@ pan_perf_counter_read_raw(const struct pan_perf *perf, assert(perf->session->data != NULL); - const uint32_t val_offset = perf->mem_layout.category[category].offset + - perf->mem_layout.block_stride * block_index + - perf->mem_layout.counter_stride * counter_index; + const uint32_t category = counter->category_index; + const uint32_t offset = perf->mem_layout.category[category].offset + + perf->mem_layout.block_stride * block + + perf->mem_layout.counter_stride * counter->offset; - uint8_t *val_ptr = ((uint8_t *)perf->session->data) + val_offset; + uint8_t *val_ptr = ((uint8_t *)perf->session->data) + offset; return pan_kmod_perf_load_counter(perf->session, val_ptr); } @@ -41,16 +41,14 @@ int64_t pan_perf_counter_read(const struct pan_perf_counter *counter, const struct pan_perf *perf) { - int64_t ret = pan_perf_counter_read_raw(perf, counter->category_index, 0, - counter->offset); + int64_t ret = pan_perf_counter_read_raw(perf, counter, 0); /* If counter belongs to shader core, sum values for all cores. */ if (counter->category_index == PAN_PERF_COUNTER_CAT_SHADER) { uint32_t n_cores = perf->mem_layout.category[PAN_PERF_COUNTER_CAT_SHADER].n_blocks; for (uint32_t core = 1; core < n_cores; ++core) { - ret += pan_perf_counter_read_raw(perf, PAN_PERF_COUNTER_CAT_SHADER, - core, counter->offset); + ret += pan_perf_counter_read_raw(perf, counter, core); assert(ret >= 0 && "counter sum should not overflow"); } } diff --git a/src/panfrost/perf/pan_perf.h b/src/panfrost/perf/pan_perf.h index 752a0db1a1d..1e8446b62a4 100644 --- a/src/panfrost/perf/pan_perf.h +++ b/src/panfrost/perf/pan_perf.h @@ -79,8 +79,8 @@ struct pan_perf { }; int64_t pan_perf_counter_read_raw(const struct pan_perf *perf, - enum pan_perf_counter_categories category, - uint8_t block_index, uint32_t counter_index); + const struct pan_perf_counter *counter, + uint8_t block); int64_t pan_perf_counter_read(const struct pan_perf_counter *counter, const struct pan_perf *perf); From 3a1dadf03838b7a0a5adadf2e2121e32d43e3b4c Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Mon, 13 Apr 2026 11:13:59 +0200 Subject: [PATCH 27/38] pan/perf: Rename pan_perf_counter_read[_raw] functions _raw no longer takes the "raw" offsets as arguments so the name doesn't make sense anymore. The old non "_raw" function actually sums over blocks instead of just reading the passed counter. This commits fixes the naming of those two functions so they match what the functions do. --- src/panfrost/ds/pan_pps_perf.cpp | 2 +- src/panfrost/perf/pan_perf.c | 12 ++++++------ src/panfrost/perf/pan_perf.h | 10 +++++----- src/panfrost/perf/quick.c | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/panfrost/ds/pan_pps_perf.cpp b/src/panfrost/ds/pan_pps_perf.cpp index 4211ff2beae..bbf4e64033d 100644 --- a/src/panfrost/ds/pan_pps_perf.cpp +++ b/src/panfrost/ds/pan_pps_perf.cpp @@ -174,7 +174,7 @@ PanfrostPerf::create_available_counters() const auto &pan_driver = PanfrostDriver::into(d); struct pan_perf *perf = static_cast( pan_driver.perf->get_subinstance()); - return pan_perf_counter_read_raw(perf, cinfo, block_idx); + return pan_perf_counter_read(perf, cinfo, block_idx); }); group.counters.push_back(counter.id); diff --git a/src/panfrost/perf/pan_perf.c b/src/panfrost/perf/pan_perf.c index aa53c0993cd..f1905aab02d 100644 --- a/src/panfrost/perf/pan_perf.c +++ b/src/panfrost/perf/pan_perf.c @@ -18,8 +18,8 @@ #include int64_t -pan_perf_counter_read_raw(const struct pan_perf *perf, - const struct pan_perf_counter *counter, uint8_t block) +pan_perf_counter_read(const struct pan_perf *perf, + const struct pan_perf_counter *counter, uint8_t block) { STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_FRONTEND == (int)PAN_PERF_COUNTER_CAT_FRONTEND); STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_TILER == (int)PAN_PERF_COUNTER_CAT_TILER); @@ -38,17 +38,17 @@ pan_perf_counter_read_raw(const struct pan_perf *perf, } int64_t -pan_perf_counter_read(const struct pan_perf_counter *counter, - const struct pan_perf *perf) +pan_perf_counter_read_block_sum(const struct pan_perf_counter *counter, + const struct pan_perf *perf) { - int64_t ret = pan_perf_counter_read_raw(perf, counter, 0); + int64_t ret = pan_perf_counter_read(perf, counter, 0); /* If counter belongs to shader core, sum values for all cores. */ if (counter->category_index == PAN_PERF_COUNTER_CAT_SHADER) { uint32_t n_cores = perf->mem_layout.category[PAN_PERF_COUNTER_CAT_SHADER].n_blocks; for (uint32_t core = 1; core < n_cores; ++core) { - ret += pan_perf_counter_read_raw(perf, counter, core); + ret += pan_perf_counter_read(perf, counter, core); assert(ret >= 0 && "counter sum should not overflow"); } } diff --git a/src/panfrost/perf/pan_perf.h b/src/panfrost/perf/pan_perf.h index 1e8446b62a4..5be5dfd1265 100644 --- a/src/panfrost/perf/pan_perf.h +++ b/src/panfrost/perf/pan_perf.h @@ -78,12 +78,12 @@ struct pan_perf { struct pan_kmod_perf_buffer_layout mem_layout; }; -int64_t pan_perf_counter_read_raw(const struct pan_perf *perf, - const struct pan_perf_counter *counter, - uint8_t block); +int64_t pan_perf_counter_read(const struct pan_perf *perf, + const struct pan_perf_counter *counter, + uint8_t block); -int64_t pan_perf_counter_read(const struct pan_perf_counter *counter, - const struct pan_perf *perf); +int64_t pan_perf_counter_read_block_sum(const struct pan_perf_counter *counter, + const struct pan_perf *perf); void pan_perf_init(struct pan_perf *perf, int fd); diff --git a/src/panfrost/perf/quick.c b/src/panfrost/perf/quick.c index afe114fe198..f221ec909d0 100644 --- a/src/panfrost/perf/quick.c +++ b/src/panfrost/perf/quick.c @@ -45,7 +45,7 @@ main(void) for (unsigned j = 0; j < cat->n_counters; ++j) { const struct pan_perf_counter *ctr = &cat->counters[j]; - int64_t val = pan_perf_counter_read(ctr, perf); + int64_t val = pan_perf_counter_read_block_sum(ctr, perf); printf("%s (%s): %ld\n", ctr->name, ctr->symbol_name, val); } From dd7671fedd1d47781f7fd2b3d2c01548348517f4 Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Thu, 9 Apr 2026 11:42:00 +0200 Subject: [PATCH 28/38] pan/perf: Use proper counter category type in config pan_perf_gen.py already writes the enum value instead of an index. Use the enum type instead of a plain int in pan_perf_counter. --- src/panfrost/perf/pan_gen_perf.py | 2 +- src/panfrost/perf/pan_perf.c | 5 ++--- src/panfrost/perf/pan_perf.h | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/panfrost/perf/pan_gen_perf.py b/src/panfrost/perf/pan_gen_perf.py index 43f058aa862..fad0a1c390c 100644 --- a/src/panfrost/perf/pan_gen_perf.py +++ b/src/panfrost/perf/pan_gen_perf.py @@ -173,7 +173,7 @@ def main(): c.write(".symbol_name = \"%s\"," % (counter.underscore_name)) c.write(".units = PAN_PERF_COUNTER_UNITS_%s," % (counter.units.upper())) c.write(".offset = %u," % (counter.offset)) - c.write(".category_index = %s," % CATEGORY_IDX_REMAP[category.name]) + c.write(".category = %s," % CATEGORY_IDX_REMAP[category.name]) c.outdent(tab_size) c.write("}, // counter") diff --git a/src/panfrost/perf/pan_perf.c b/src/panfrost/perf/pan_perf.c index f1905aab02d..2a6c004443b 100644 --- a/src/panfrost/perf/pan_perf.c +++ b/src/panfrost/perf/pan_perf.c @@ -28,8 +28,7 @@ pan_perf_counter_read(const struct pan_perf *perf, assert(perf->session->data != NULL); - const uint32_t category = counter->category_index; - const uint32_t offset = perf->mem_layout.category[category].offset + + const uint32_t offset = perf->mem_layout.category[counter->category].offset + perf->mem_layout.block_stride * block + perf->mem_layout.counter_stride * counter->offset; @@ -44,7 +43,7 @@ pan_perf_counter_read_block_sum(const struct pan_perf_counter *counter, int64_t ret = pan_perf_counter_read(perf, counter, 0); /* If counter belongs to shader core, sum values for all cores. */ - if (counter->category_index == PAN_PERF_COUNTER_CAT_SHADER) { + if (counter->category == PAN_PERF_COUNTER_CAT_SHADER) { uint32_t n_cores = perf->mem_layout.category[PAN_PERF_COUNTER_CAT_SHADER].n_blocks; for (uint32_t core = 1; core < n_cores; ++core) { diff --git a/src/panfrost/perf/pan_perf.h b/src/panfrost/perf/pan_perf.h index 5be5dfd1265..89d665ec407 100644 --- a/src/panfrost/perf/pan_perf.h +++ b/src/panfrost/perf/pan_perf.h @@ -51,7 +51,7 @@ struct pan_perf_counter { enum pan_perf_counter_units units; // Offset of this counter's value within the category uint32_t offset; - unsigned category_index; + enum pan_perf_counter_categories category; }; struct pan_perf_category { From b6a8d664f74065dc43c1e88ae4fb2ce4d30458a4 Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Thu, 16 Apr 2026 12:46:53 +0200 Subject: [PATCH 29/38] pan/kmod: Add l2 features to pan_kmod_dev_props --- src/panfrost/lib/kmod/pan_kmod.h | 3 +++ src/panfrost/lib/kmod/panfrost_kmod.c | 2 ++ src/panfrost/lib/kmod/panthor_kmod.c | 1 + 3 files changed, 6 insertions(+) diff --git a/src/panfrost/lib/kmod/pan_kmod.h b/src/panfrost/lib/kmod/pan_kmod.h index 8076e78a5bb..922bfbbfb57 100644 --- a/src/panfrost/lib/kmod/pan_kmod.h +++ b/src/panfrost/lib/kmod/pan_kmod.h @@ -199,6 +199,9 @@ struct pan_kmod_dev_props { /* Texture feature bits. */ uint32_t texture_features[4]; + /* L2 feature bits. */ + uint32_t l2_features; + /* Maximum number of threads per core. */ uint32_t max_threads_per_core; diff --git a/src/panfrost/lib/kmod/panfrost_kmod.c b/src/panfrost/lib/kmod/panfrost_kmod.c index e7a0c1c2cdd..67235ad15cf 100644 --- a/src/panfrost/lib/kmod/panfrost_kmod.c +++ b/src/panfrost/lib/kmod/panfrost_kmod.c @@ -167,6 +167,8 @@ panfrost_dev_query_props(struct panfrost_kmod_dev *panfrost_dev) panfrost_query_raw(fd, DRM_PANFROST_PARAM_MEM_FEATURES, true, 0); props->mmu_features = panfrost_query_raw(fd, DRM_PANFROST_PARAM_MMU_FEATURES, true, 0); + props->l2_features = + panfrost_query_raw(fd, DRM_PANFROST_PARAM_L2_FEATURES, true, 0); for (unsigned i = 0; i < ARRAY_SIZE(props->texture_features); i++) { props->texture_features[i] = panfrost_query_raw( diff --git a/src/panfrost/lib/kmod/panthor_kmod.c b/src/panfrost/lib/kmod/panthor_kmod.c index 2fb06e6bfd2..22845cc0433 100644 --- a/src/panfrost/lib/kmod/panthor_kmod.c +++ b/src/panfrost/lib/kmod/panthor_kmod.c @@ -201,6 +201,7 @@ panthor_dev_query_props(struct panthor_kmod_dev *panthor_dev) .tiler_features = panthor_dev->props.gpu.tiler_features, .mem_features = panthor_dev->props.gpu.mem_features, .mmu_features = panthor_dev->props.gpu.mmu_features, + .l2_features = panthor_dev->props.gpu.l2_features, /* This register does not exist because AFBC is no longer optional. */ .afbc_features = 0, From 91e0f4c2a3f324ff235219f3780c0c87af2766c2 Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Thu, 16 Apr 2026 12:47:50 +0200 Subject: [PATCH 30/38] pan: Add BUS_WIDTH query to pan_props --- src/panfrost/lib/pan_props.c | 7 +++++++ src/panfrost/lib/pan_props.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/src/panfrost/lib/pan_props.c b/src/panfrost/lib/pan_props.c index 056bd48d4a2..26babd143a1 100644 --- a/src/panfrost/lib/pan_props.c +++ b/src/panfrost/lib/pan_props.c @@ -17,6 +17,13 @@ pan_query_l2_slices(const struct pan_kmod_dev_props *props) return ((props->mem_features >> 8) & 0xF) + 1; } +unsigned +pan_query_bus_width(const struct pan_kmod_dev_props *props) +{ + /* BUS_WIDTH is L2_FEATURES[31:24] log2 */ + return 1 << ((props->l2_features >> 24) & 0xF); +} + struct pan_tiler_features pan_query_tiler_features(const struct pan_kmod_dev_props *props) { diff --git a/src/panfrost/lib/pan_props.h b/src/panfrost/lib/pan_props.h index 19d3b749735..cebf5616028 100644 --- a/src/panfrost/lib/pan_props.h +++ b/src/panfrost/lib/pan_props.h @@ -21,6 +21,8 @@ struct pan_kmod_vm; unsigned pan_query_l2_slices(const struct pan_kmod_dev_props *props); +unsigned pan_query_bus_width(const struct pan_kmod_dev_props *props); + struct pan_tiler_features pan_query_tiler_features(const struct pan_kmod_dev_props *props); From bf8cfffef5ae652df295ddbea9b918336ac2c6cd Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Thu, 16 Apr 2026 17:22:27 +0200 Subject: [PATCH 31/38] pan/perf: Add new counter units These are needed for the generated counter definitions --- src/panfrost/perf/pan_perf.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/panfrost/perf/pan_perf.h b/src/panfrost/perf/pan_perf.h index 89d665ec407..fe73ec15cd5 100644 --- a/src/panfrost/perf/pan_perf.h +++ b/src/panfrost/perf/pan_perf.h @@ -42,6 +42,12 @@ enum pan_perf_counter_units { PAN_PERF_COUNTER_UNITS_PIXELS, PAN_PERF_COUNTER_UNITS_ISSUES, PAN_PERF_COUNTER_UNITS_INTERRUPTS, + PAN_PERF_COUNTER_UNITS_PERCENT, + PAN_PERF_COUNTER_UNITS_TESTS, + PAN_PERF_COUNTER_UNITS_RAYS, + PAN_PERF_COUNTER_UNITS_NODES, + PAN_PERF_COUNTER_UNITS_BOXES, + PAN_PERF_COUNTER_UNITS_BYTES_PER_SECOND, }; struct pan_perf_counter { From d8bdacfce85eff5a482c3dad066913ea1f92dece Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Thu, 16 Apr 2026 16:58:09 +0200 Subject: [PATCH 32/38] pan/perf: Add libGPUCounters to xml translator libGPUCounters (1) contains all required information to generate the counter definitions used in mesa for Bifrost+ architectures. This script gathers the required information from the xml definitions in libGPUCounters and outputs pan/perf xmls. It also already includes support for derived counters, meaning counters which are computed from other counters actually created by HW. For those, we recursively resolve the variables in the equation until only HW counters and configuration values are left. It makes sense to do it here already since the datastructures make it a simple addition and the codegen doesn't need to handle it at compile time later that way. Derived counters that require MALI_CONFIG_TIME_SPAN are skipped for now. libGPUCounters also does not generate the equations for those and it makes hooking up the derived counters in pan simpler when we don't have to estimate the duration of a sample in some way. 1) https://github.com/ARM-software/libGPUCounters --- src/panfrost/perf/pan_gen_perf_defs.py | 286 +++++++++++++++++++++++++ 1 file changed, 286 insertions(+) create mode 100644 src/panfrost/perf/pan_gen_perf_defs.py diff --git a/src/panfrost/perf/pan_gen_perf_defs.py b/src/panfrost/perf/pan_gen_perf_defs.py new file mode 100644 index 00000000000..fadb46e1c66 --- /dev/null +++ b/src/panfrost/perf/pan_gen_perf_defs.py @@ -0,0 +1,286 @@ +# Copyright (c) 2026 Arm Ltd. +# SPDX-License-Identifier: MIT + +from argparse import ArgumentParser +from pathlib import Path +from dataclasses import dataclass +import datetime +import subprocess +import xml.etree.ElementTree as et +import re + +COUNTERINFO_PATH = "./specification/database/counterinfo" +HARDWARE_LAYOUT_PATH = "./specification/database/hardwarelayout" + +HW_LAYOUT_LUT: dict[str, "HardwareLayout"] = {} + +OUTPUT_COPYRIGHT = """ + +""" + + +def get_revision(path): + cmd = ["git", "rev-parse", "HEAD"] + res = subprocess.run(cmd, capture_output=True, cwd=path.as_posix()) + if res.returncode != 0: + return None + else: + return res.stdout.decode().strip() + + +def map_nn(v, f): + return None if v is None else f(v) + + +def get_elem_text(xml, name): + e = xml.find(name) + if e is not None: + return e.text + else: + return None + + +@dataclass(frozen=True) +class CounterHwLocation: + block: str + counter_index: int + + +@dataclass +class HardwareLayout: + gpu_name: str + # map source name to (block index, counter index) + locations: dict[str, CounterHwLocation] + + @staticmethod + def from_xml(xml: et.Element) -> "HardwareLayout": + gpu_name = xml.get("gpu") + assert gpu_name is not None + locations = {} + for cbe in xml.findall("CounterBlock"): + cb_name = cbe.get("type") + assert cb_name is not None + for counter in cbe.findall("Counter"): + source_name = counter.get("name") + counter_index = counter.get("index") + assert counter_index is not None + locations[source_name] = CounterHwLocation( + cb_name, int(counter_index)) + + return HardwareLayout(gpu_name=gpu_name, locations=locations) + + +def parse_hw_layout(path: Path): + xml = et.parse(path) + return HardwareLayout.from_xml(xml.getroot()) + + +def parse_supported_gpus(xml): + supported_list = xml.find("SupportedGPUs") + return [e.text for e in supported_list.findall("GPU")] + + +def group_from_filename(fname): + # This maps to the values of the "type" field in the CounterBlock xml blocks. + fname_to_dbkey = { + "GPUFrontEnd": "GPU Front-end", + "L2Cache": "Memory System", + "Tiler": "Tiler", + "ShaderCore": "Shader Core", + "Constants": "Constants", + "Content": "Content", + } + for name, key in fname_to_dbkey.items(): + if name in fname: + return key + assert False and "could not find group from filename" + + +@dataclass +class CounterInfo: + machine_name: str + supported_gpus: list[str] + group: str + equation: str = "" + source_name: str = "" + # Can be used as a fallback to find hw offsets if source_name isn't available. + source_alias_name: str = "" + human_name: str = "" + short_desc: str = "" + units: str = "" + + @staticmethod + def from_xml(xml, group): + machine_name = get_elem_text(xml, "MachineName") + assert machine_name is not None + supported = parse_supported_gpus(xml) + + desc_raw = get_elem_text(xml, "ShortDescription") or "" + desc_san = " ".join(map(str.strip, desc_raw.splitlines())).strip() + + return CounterInfo( + machine_name, + supported, + group, + equation=map_nn(get_elem_text(xml, "Equation"), str.strip) or "", + source_name=get_elem_text(xml, "SourceName") or "", + source_alias_name=get_elem_text(xml, "SourceAlias") or "", + human_name=get_elem_text(xml, "HumanName") or "", + short_desc=desc_san, + units=(get_elem_text(xml, "Units") or "").strip(), + ) + + def is_derived(self): + return not self.source_name + + def get_hw_offsets(self, gpu: str) -> CounterHwLocation: + assert self.source_name != "" + assert gpu in self.supported_gpus + locs = HW_LAYOUT_LUT[gpu].locations + if self.source_name in locs: + return locs[self.source_name] + else: + # If the normal source name doesn't work try the alias + # Needed for example for RT_RAY_BOX_ISSUED on G1 which is using the + # alias RT_BOX_ISSUE_CYCLES there. + assert self.source_alias_name != "" + return locs[self.source_alias_name] + + def is_supported(self): + return "MALI_CONFIG_TIME_SPAN" not in self.equation + + +@dataclass +class ProductInfo: + product_id: str + database_key: str + + +def parse_counters(path: Path): + group = group_from_filename(path.name) + xml = et.parse(path) + return [CounterInfo.from_xml(e, group) for e in xml.findall("CounterInfo")] + + +def resolve_equation(eq: str, counters_gpu: list[CounterInfo]): + sorted_c = sorted(counters_gpu, key=lambda c: len(c.machine_name)) + max_len = max([len(c.machine_name) for c in sorted_c]) + + # This loop replaces variables which aren't hardware counters or config values + # until only all have been replaced. + # Iterate backwards from the largest to the smallest variable to make this work: + # eq = MaliMainQueueTask * MaliMainQueueTaskSize * MaliMainQueueTaskSize + + progress = True + while progress: + progress = False + for l in range(max_len, 0, -1): + for c in filter(lambda c: len(c.machine_name) == l, sorted_c): + if c.machine_name in eq: + if c.is_derived(): + repl = f"({c.equation})" + else: + assert c.source_name is not None + repl = f"({c.source_name})" + + eq = eq.replace(c.machine_name, repl) + progress = True + break + + # There was a change, need to restart because we might have added + # a variable with len(name) > l. + if progress: + break + return eq + + +def counter_list_to_xml(counters: list[CounterInfo], gpu: str): + gpu_xml = gpu.replace("Mali-", "").replace("Mali", "").strip() + root = et.Element("metrics", attrib={"id": gpu_xml}) + + IGNORE_CATS = {"Constants", "Content"} + + cat_names = set([c.group for c in counters]) + categories = dict() + for c in sorted(cat_names): + if c in IGNORE_CATS: + continue + categories[c] = et.SubElement(root, "category", attrib={"name": c}) + + for counter in sorted(counters, key=lambda c: c.machine_name): + if not counter.is_supported(): + continue + + if counter.group in IGNORE_CATS: + continue + p = categories[counter.group] + + attrib = { + "name": counter.machine_name, + "title": counter.human_name, + "description": counter.short_desc, + "units": counter.units, + } + + if counter.is_derived(): + attrib["equation"] = resolve_equation(counter.equation, counters) + else: + attrib["counter"] = counter.source_name + attrib["offset"] = str(counter.get_hw_offsets(gpu).counter_index) + + et.SubElement(p, "event", attrib) + + return root + + +def main(): + p = ArgumentParser() + p.add_argument("lib_gpu_counters", type=Path, + help="Path to libGPUCounter source") + p.add_argument( + "--output-path", type=Path, default=Path(__file__).parent / "generated" + ) + args = p.parse_args() + + for f in (args.lib_gpu_counters / HARDWARE_LAYOUT_PATH).glob("*.xml"): + l = parse_hw_layout(f) + HW_LAYOUT_LUT[l.gpu_name] = l + + counters: list[CounterInfo] = [] + for f in (args.lib_gpu_counters / COUNTERINFO_PATH).glob("*.xml"): + counters += parse_counters(f) + + args.output_path.mkdir(exist_ok=True) + + # Generate one file for each GPU. + all_gpus = set().union(*(c.supported_gpus for c in counters)) + for gpu in all_gpus: + gpu_counters = [c for c in counters if gpu in c.supported_gpus] + xml = counter_list_to_xml(gpu_counters, gpu) + et.indent(xml) + + fname = gpu.replace("Mali-", "").replace("Mali", "").strip() + ".xml" + year = datetime.datetime.now().year + rev = get_revision(args.lib_gpu_counters) + assert(rev is not None) + + with open(args.output_path / fname, "wb") as f: + f.write( + OUTPUT_COPYRIGHT.format( + year=year, rev=rev).encode(encoding="utf-8") + ) + f.write(et.tostring(xml, encoding="utf-8")) + f.write("\n".encode(encoding="utf-8")) + + +if __name__ == "__main__": + main() From a996aab17e5c85de019e8fd6604e48e037c153d0 Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Thu, 16 Apr 2026 17:08:27 +0200 Subject: [PATCH 33/38] pan/perf: Replace bifrost+ xmls with generated ones --- src/panfrost/perf/G31.xml | 158 ------------ src/panfrost/perf/G51.xml | 158 ------------ src/panfrost/perf/G52.xml | 160 ------------- src/panfrost/perf/G57.xml | 160 ------------- src/panfrost/perf/G68.xml | 160 ------------- src/panfrost/perf/G71.xml | 158 ------------ src/panfrost/perf/G72.xml | 158 ------------ src/panfrost/perf/G76.xml | 160 ------------- src/panfrost/perf/G77.xml | 160 ------------- src/panfrost/perf/G78.xml | 160 ------------- src/panfrost/perf/Gx10.xml | 175 -------------- src/panfrost/perf/Gx25.xml | 316 ------------------------ src/panfrost/perf/generated/G1.xml | 345 +++++++++++++++++++++++++++ src/panfrost/perf/generated/G31.xml | 247 +++++++++++++++++++ src/panfrost/perf/generated/G51.xml | 247 +++++++++++++++++++ src/panfrost/perf/generated/G52.xml | 251 +++++++++++++++++++ src/panfrost/perf/generated/G71.xml | 247 +++++++++++++++++++ src/panfrost/perf/generated/G710.xml | 272 +++++++++++++++++++++ src/panfrost/perf/generated/G715.xml | 294 +++++++++++++++++++++++ src/panfrost/perf/generated/G72.xml | 247 +++++++++++++++++++ src/panfrost/perf/generated/G720.xml | 303 +++++++++++++++++++++++ src/panfrost/perf/generated/G725.xml | 335 ++++++++++++++++++++++++++ src/panfrost/perf/generated/G76.xml | 251 +++++++++++++++++++ src/panfrost/perf/generated/G77.xml | 259 ++++++++++++++++++++ src/panfrost/perf/generated/G78.xml | 261 ++++++++++++++++++++ src/panfrost/perf/meson.build | 16 +- 26 files changed, 3573 insertions(+), 2085 deletions(-) delete mode 100644 src/panfrost/perf/G31.xml delete mode 100644 src/panfrost/perf/G51.xml delete mode 100644 src/panfrost/perf/G52.xml delete mode 100644 src/panfrost/perf/G57.xml delete mode 100644 src/panfrost/perf/G68.xml delete mode 100644 src/panfrost/perf/G71.xml delete mode 100644 src/panfrost/perf/G72.xml delete mode 100644 src/panfrost/perf/G76.xml delete mode 100644 src/panfrost/perf/G77.xml delete mode 100644 src/panfrost/perf/G78.xml delete mode 100644 src/panfrost/perf/Gx10.xml delete mode 100644 src/panfrost/perf/Gx25.xml create mode 100644 src/panfrost/perf/generated/G1.xml create mode 100644 src/panfrost/perf/generated/G31.xml create mode 100644 src/panfrost/perf/generated/G51.xml create mode 100644 src/panfrost/perf/generated/G52.xml create mode 100644 src/panfrost/perf/generated/G71.xml create mode 100644 src/panfrost/perf/generated/G710.xml create mode 100644 src/panfrost/perf/generated/G715.xml create mode 100644 src/panfrost/perf/generated/G72.xml create mode 100644 src/panfrost/perf/generated/G720.xml create mode 100644 src/panfrost/perf/generated/G725.xml create mode 100644 src/panfrost/perf/generated/G76.xml create mode 100644 src/panfrost/perf/generated/G77.xml create mode 100644 src/panfrost/perf/generated/G78.xml diff --git a/src/panfrost/perf/G31.xml b/src/panfrost/perf/G31.xml deleted file mode 100644 index 0cb8d3fabd8..00000000000 --- a/src/panfrost/perf/G31.xml +++ /dev/null @@ -1,158 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/G51.xml b/src/panfrost/perf/G51.xml deleted file mode 100644 index 2ee1958145b..00000000000 --- a/src/panfrost/perf/G51.xml +++ /dev/null @@ -1,158 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/G52.xml b/src/panfrost/perf/G52.xml deleted file mode 100644 index e42dfdb2d8d..00000000000 --- a/src/panfrost/perf/G52.xml +++ /dev/null @@ -1,160 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/G57.xml b/src/panfrost/perf/G57.xml deleted file mode 100644 index 2c5e843a6f2..00000000000 --- a/src/panfrost/perf/G57.xml +++ /dev/null @@ -1,160 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/G68.xml b/src/panfrost/perf/G68.xml deleted file mode 100644 index ef14f91462e..00000000000 --- a/src/panfrost/perf/G68.xml +++ /dev/null @@ -1,160 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/G71.xml b/src/panfrost/perf/G71.xml deleted file mode 100644 index 6080c5eeb59..00000000000 --- a/src/panfrost/perf/G71.xml +++ /dev/null @@ -1,158 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/G72.xml b/src/panfrost/perf/G72.xml deleted file mode 100644 index 89c3a118dce..00000000000 --- a/src/panfrost/perf/G72.xml +++ /dev/null @@ -1,158 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/G76.xml b/src/panfrost/perf/G76.xml deleted file mode 100644 index 5777a64c7ea..00000000000 --- a/src/panfrost/perf/G76.xml +++ /dev/null @@ -1,160 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/G77.xml b/src/panfrost/perf/G77.xml deleted file mode 100644 index b107ae96ba0..00000000000 --- a/src/panfrost/perf/G77.xml +++ /dev/null @@ -1,160 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/G78.xml b/src/panfrost/perf/G78.xml deleted file mode 100644 index b0093ef0cc1..00000000000 --- a/src/panfrost/perf/G78.xml +++ /dev/null @@ -1,160 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/Gx10.xml b/src/panfrost/perf/Gx10.xml deleted file mode 100644 index ddf009b8f81..00000000000 --- a/src/panfrost/perf/Gx10.xml +++ /dev/null @@ -1,175 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/Gx25.xml b/src/panfrost/perf/Gx25.xml deleted file mode 100644 index 0120d16838f..00000000000 --- a/src/panfrost/perf/Gx25.xml +++ /dev/null @@ -1,316 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/panfrost/perf/generated/G1.xml b/src/panfrost/perf/generated/G1.xml new file mode 100644 index 00000000000..3a59e3d8998 --- /dev/null +++ b/src/panfrost/perf/generated/G1.xml @@ -0,0 +1,345 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G31.xml b/src/panfrost/perf/generated/G31.xml new file mode 100644 index 00000000000..326c06bc175 --- /dev/null +++ b/src/panfrost/perf/generated/G31.xml @@ -0,0 +1,247 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G51.xml b/src/panfrost/perf/generated/G51.xml new file mode 100644 index 00000000000..661ce6a6b1a --- /dev/null +++ b/src/panfrost/perf/generated/G51.xml @@ -0,0 +1,247 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G52.xml b/src/panfrost/perf/generated/G52.xml new file mode 100644 index 00000000000..a2ab269e4d6 --- /dev/null +++ b/src/panfrost/perf/generated/G52.xml @@ -0,0 +1,251 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G71.xml b/src/panfrost/perf/generated/G71.xml new file mode 100644 index 00000000000..36817506f30 --- /dev/null +++ b/src/panfrost/perf/generated/G71.xml @@ -0,0 +1,247 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G710.xml b/src/panfrost/perf/generated/G710.xml new file mode 100644 index 00000000000..a67dad5b183 --- /dev/null +++ b/src/panfrost/perf/generated/G710.xml @@ -0,0 +1,272 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G715.xml b/src/panfrost/perf/generated/G715.xml new file mode 100644 index 00000000000..e8fa0bb074f --- /dev/null +++ b/src/panfrost/perf/generated/G715.xml @@ -0,0 +1,294 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G72.xml b/src/panfrost/perf/generated/G72.xml new file mode 100644 index 00000000000..0853083d49d --- /dev/null +++ b/src/panfrost/perf/generated/G72.xml @@ -0,0 +1,247 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G720.xml b/src/panfrost/perf/generated/G720.xml new file mode 100644 index 00000000000..93255e21e93 --- /dev/null +++ b/src/panfrost/perf/generated/G720.xml @@ -0,0 +1,303 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G725.xml b/src/panfrost/perf/generated/G725.xml new file mode 100644 index 00000000000..650061ac1c1 --- /dev/null +++ b/src/panfrost/perf/generated/G725.xml @@ -0,0 +1,335 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G76.xml b/src/panfrost/perf/generated/G76.xml new file mode 100644 index 00000000000..6387a90613c --- /dev/null +++ b/src/panfrost/perf/generated/G76.xml @@ -0,0 +1,251 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G77.xml b/src/panfrost/perf/generated/G77.xml new file mode 100644 index 00000000000..e0115f7b4e1 --- /dev/null +++ b/src/panfrost/perf/generated/G77.xml @@ -0,0 +1,259 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/generated/G78.xml b/src/panfrost/perf/generated/G78.xml new file mode 100644 index 00000000000..87e05167548 --- /dev/null +++ b/src/panfrost/perf/generated/G78.xml @@ -0,0 +1,261 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/panfrost/perf/meson.build b/src/panfrost/perf/meson.build index f8418af6e90..f16927dadf5 100644 --- a/src/panfrost/perf/meson.build +++ b/src/panfrost/perf/meson.build @@ -3,8 +3,20 @@ # SPDX-License-Identifier: MIT pan_hw_metrics = [ - 'G31', 'G51', 'G52', 'G57', 'G68', 'G71', 'G72', 'G76', 'G77', - 'Gx10', 'Gx25', 'G78', 'T72x', 'T76x', 'T82x', 'T83x', 'T86x', 'T88x', + 'T72x', 'T76x', 'T82x', 'T83x', 'T86x', 'T88x', + 'generated/G31', + 'generated/G51', + 'generated/G52', + 'generated/G71', + 'generated/G72', + 'generated/G76', + 'generated/G77', + 'generated/G78', + 'generated/G710', + 'generated/G715', + 'generated/G720', + 'generated/G725', + 'generated/G1', ] pan_hw_metrics_xml_files = [] From 2951770119268ff85315f225d212b7f904b5178b Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Thu, 16 Apr 2026 17:13:22 +0200 Subject: [PATCH 34/38] pan/model: Update perf counters Update the model table entries to match the ids in the generated definitions. --- src/panfrost/model/pan_model.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/panfrost/model/pan_model.c b/src/panfrost/model/pan_model.c index 890e023c6e1..db010f45681 100644 --- a/src/panfrost/model/pan_model.c +++ b/src/panfrost/model/pan_model.c @@ -66,34 +66,34 @@ const struct pan_model pan_model_list[] = { MIDGARD_MODEL(0x860, "T860", "T86x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192)), MIDGARD_MODEL(0x880, "T880", "T88x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192)), - BIFROST_MODEL(PAN_PROD_ID(6, 0, 0), "G71", "TMIx", MODEL_ANISO(NONE), MODEL_TB_SIZES( 4096, 4096)), - BIFROST_MODEL(PAN_PROD_ID(6, 2, 1), "G72", "THEx", MODEL_ANISO(R0P3), MODEL_TB_SIZES( 8192, 4096)), - BIFROST_MODEL(PAN_PROD_ID(7, 0, 0), "G51", "TSIx", MODEL_ANISO(R1P1), MODEL_TB_SIZES( 8192, 8192)), - BIFROST_MODEL(PAN_PROD_ID(7, 0, 3), "G31", "TDVx", MODEL_ANISO(ALL), MODEL_TB_SIZES( 8192, 8192)), - BIFROST_MODEL(PAN_PROD_ID(7, 2, 1), "G76", "TNOx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)), - BIFROST_MODEL(PAN_PROD_ID(7, 2, 2), "G52", "TGOx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)), - BIFROST_MODEL(PAN_PROD_ID(7, 4, 2), "G52 r1", "TGOx", MODEL_ANISO(ALL), MODEL_TB_SIZES( 8192, 8192)), + BIFROST_MODEL(PAN_PROD_ID(6, 0, 0), "G71", "G71", MODEL_ANISO(NONE), MODEL_TB_SIZES( 4096, 4096)), + BIFROST_MODEL(PAN_PROD_ID(6, 2, 1), "G72", "G72", MODEL_ANISO(R0P3), MODEL_TB_SIZES( 8192, 4096)), + BIFROST_MODEL(PAN_PROD_ID(7, 0, 0), "G51", "G51", MODEL_ANISO(R1P1), MODEL_TB_SIZES( 8192, 8192)), + BIFROST_MODEL(PAN_PROD_ID(7, 0, 3), "G31", "G31", MODEL_ANISO(ALL), MODEL_TB_SIZES( 8192, 8192)), + BIFROST_MODEL(PAN_PROD_ID(7, 2, 1), "G76", "G76", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)), + BIFROST_MODEL(PAN_PROD_ID(7, 2, 2), "G52", "G52", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)), + BIFROST_MODEL(PAN_PROD_ID(7, 4, 2), "G52 r1", "G52", MODEL_ANISO(ALL), MODEL_TB_SIZES( 8192, 8192)), - VALHALL_MODEL(PAN_PROD_ID(9, 0, 1), 0, "G57", "TNAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), + VALHALL_MODEL(PAN_PROD_ID(9, 0, 1), 0, "G57", "G77", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), MODEL_RATES(2, 4, 32)), - VALHALL_MODEL(PAN_PROD_ID(9, 0, 3), 0, "G57", "TNAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), + VALHALL_MODEL(PAN_PROD_ID(9, 0, 3), 0, "G57", "G77", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), MODEL_RATES(2, 4, 32)), - VALHALL_MODEL(PAN_PROD_ID(10, 8, 7), 0, "G610", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384), + VALHALL_MODEL(PAN_PROD_ID(10, 8, 7), 0, "G610", "G710", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384), MODEL_RATES(4, 8, 64)), - VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 0, "G310v1", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), + VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 0, "G310v1", "G710", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), MODEL_RATES(2, 2, 16)), - VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 1, "G310v2", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), + VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 1, "G310v2", "G710", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), MODEL_RATES(2, 4, 32)), - VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 2, "G310v3", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), + VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 2, "G310v3", "G710", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192), MODEL_RATES(4, 4, 48)), - VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 3, "G310v4", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384), + VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 3, "G310v4", "G710", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384), MODEL_RATES(4, 8, 48)), - VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 4, "G310v5", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384), + VALHALL_MODEL(PAN_PROD_ID(10, 12, 4), 4, "G310v5", "G710", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384), MODEL_RATES(4, 8, 64)), - FIFTHGEN_MODEL(PAN_PROD_ID(12, 8, 0), 4, "G720", "TTIx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 32768), + FIFTHGEN_MODEL(PAN_PROD_ID(12, 8, 0), 4, "G720", "G720", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 32768), MODEL_RATES(4, 8, 128)), - FIFTHGEN_MODEL(PAN_PROD_ID(13, 8, 0), 4, "G725", "TKRx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 65536), + FIFTHGEN_MODEL(PAN_PROD_ID(13, 8, 0), 4, "G725", "G725", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 65536), MODEL_RATES(4, 8, 128)), }; /* clang-format on */ From 7887f4e7b56ffa88de99cfd989cc8d8836bff88b Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Thu, 16 Apr 2026 17:19:41 +0200 Subject: [PATCH 35/38] pan/perf: Add derived counter prerequisites --- src/panfrost/perf/pan_perf.c | 9 +++++++++ src/panfrost/perf/pan_perf.h | 16 +++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/panfrost/perf/pan_perf.c b/src/panfrost/perf/pan_perf.c index 2a6c004443b..28961ea7eb8 100644 --- a/src/panfrost/perf/pan_perf.c +++ b/src/panfrost/perf/pan_perf.c @@ -88,6 +88,15 @@ pan_perf_init(struct pan_perf *perf, int fd) UNREACHABLE("Performance counters missing!"); pan_kmod_perf_query_layout(perf->session, &perf->mem_layout); + + unsigned unused; + + perf->derived_configs[PAN_PERF_DERIVED_CONFIG_SHADER_CORE_COUNT] = + pan_query_core_count(&props, &unused); + perf->derived_configs[PAN_PERF_DERIVED_CONFIG_L2_CACHE_COUNT] = + pan_query_l2_slices(&props); + perf->derived_configs[PAN_PERF_DERIVED_CONFIG_EXT_BUS_BYTE_SIZE] = + pan_query_bus_width(&props); } int diff --git a/src/panfrost/perf/pan_perf.h b/src/panfrost/perf/pan_perf.h index fe73ec15cd5..48c2a8ef77f 100644 --- a/src/panfrost/perf/pan_perf.h +++ b/src/panfrost/perf/pan_perf.h @@ -14,7 +14,8 @@ extern "C" { #endif -#define PAN_PERF_MAX_COUNTERS 128 +/* 128 hardware counters, but there can be more derived ones. */ +#define PAN_PERF_MAX_COUNTERS 190 enum pan_perf_counter_categories { PAN_PERF_COUNTER_CAT_FRONTEND, @@ -50,6 +51,15 @@ enum pan_perf_counter_units { PAN_PERF_COUNTER_UNITS_BYTES_PER_SECOND, }; +enum pan_perf_derived_config { + PAN_PERF_DERIVED_CONFIG_SHADER_CORE_COUNT, + PAN_PERF_DERIVED_CONFIG_EXT_BUS_BYTE_SIZE, + PAN_PERF_DERIVED_CONFIG_L2_CACHE_COUNT, + PAN_PERF_DERIVED_CONFIG_LAST = PAN_PERF_DERIVED_CONFIG_L2_CACHE_COUNT, +}; + +struct pan_perf; + struct pan_perf_counter { const char *name; const char *desc; @@ -58,6 +68,9 @@ struct pan_perf_counter { // Offset of this counter's value within the category uint32_t offset; enum pan_perf_counter_categories category; + + /* Optional, function to compute the derived counters value. */ + double (*derived)(const struct pan_perf*, const double*, uint8_t); }; struct pan_perf_category { @@ -82,6 +95,7 @@ struct pan_perf { struct pan_kmod_perf_session *session; const struct pan_perf_config *cfg; struct pan_kmod_perf_buffer_layout mem_layout; + double derived_configs[PAN_PERF_DERIVED_CONFIG_LAST + 1]; }; int64_t pan_perf_counter_read(const struct pan_perf *perf, From 5edbcca82b44a3c6e4938e98e267933ecfd394c0 Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Thu, 16 Apr 2026 17:21:10 +0200 Subject: [PATCH 36/38] pan/perf: Support reading derived counters Also pull out pan_perf_counter_read_raw into the header to help inlining into the generated equation functions added afterwards. --- src/panfrost/perf/pan_perf.c | 17 ++++------------- src/panfrost/perf/pan_perf.h | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/panfrost/perf/pan_perf.c b/src/panfrost/perf/pan_perf.c index 28961ea7eb8..66cd514bed4 100644 --- a/src/panfrost/perf/pan_perf.c +++ b/src/panfrost/perf/pan_perf.c @@ -21,19 +21,10 @@ int64_t pan_perf_counter_read(const struct pan_perf *perf, const struct pan_perf_counter *counter, uint8_t block) { - STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_FRONTEND == (int)PAN_PERF_COUNTER_CAT_FRONTEND); - STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_TILER == (int)PAN_PERF_COUNTER_CAT_TILER); - STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_MEMSYS == (int)PAN_PERF_COUNTER_CAT_MEMSYS); - STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_SHADER == (int)PAN_PERF_COUNTER_CAT_SHADER); - - assert(perf->session->data != NULL); - - const uint32_t offset = perf->mem_layout.category[counter->category].offset + - perf->mem_layout.block_stride * block + - perf->mem_layout.counter_stride * counter->offset; - - uint8_t *val_ptr = ((uint8_t *)perf->session->data) + offset; - return pan_kmod_perf_load_counter(perf->session, val_ptr); + if (counter->derived != NULL) + return counter->derived(perf, perf->derived_configs, block); + else + return pan_perf_counter_read_raw(perf, counter->category, counter->offset, block); } int64_t diff --git a/src/panfrost/perf/pan_perf.h b/src/panfrost/perf/pan_perf.h index 48c2a8ef77f..3c8640789cc 100644 --- a/src/panfrost/perf/pan_perf.h +++ b/src/panfrost/perf/pan_perf.h @@ -98,6 +98,27 @@ struct pan_perf { double derived_configs[PAN_PERF_DERIVED_CONFIG_LAST + 1]; }; +static inline +int64_t pan_perf_counter_read_raw(const struct pan_perf *perf, + enum pan_perf_counter_categories cat, + uint8_t counter_index, + uint8_t block) +{ + STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_FRONTEND == (int)PAN_PERF_COUNTER_CAT_FRONTEND); + STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_TILER == (int)PAN_PERF_COUNTER_CAT_TILER); + STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_MEMSYS == (int)PAN_PERF_COUNTER_CAT_MEMSYS); + STATIC_ASSERT((int)PAN_KMOD_PERF_CAT_SHADER == (int)PAN_PERF_COUNTER_CAT_SHADER); + + assert(perf->session->data != NULL); + + const uint32_t offset = perf->mem_layout.category[cat].offset + + perf->mem_layout.block_stride * block + + perf->mem_layout.counter_stride * counter_index; + + uint8_t *val_ptr = ((uint8_t *)perf->session->data) + offset; + return pan_kmod_perf_load_counter(perf->session, val_ptr); +} + int64_t pan_perf_counter_read(const struct pan_perf *perf, const struct pan_perf_counter *counter, uint8_t block); From 54d1a512a86453e8834823548e3180c5da7a4e35 Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Fri, 17 Apr 2026 11:58:13 +0200 Subject: [PATCH 37/38] pan/perf: Fix pan_gen_perf.py format --- src/panfrost/perf/pan_gen_perf.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/panfrost/perf/pan_gen_perf.py b/src/panfrost/perf/pan_gen_perf.py index fad0a1c390c..c798afcf814 100644 --- a/src/panfrost/perf/pan_gen_perf.py +++ b/src/panfrost/perf/pan_gen_perf.py @@ -15,7 +15,7 @@ class SourceFile: self._indent = 0 def write(self, *args): - code = ' '.join(map(str,args)) + code = ' '.join(map(str, args)) for line in code.splitlines(): text = ''.rjust(self._indent) + line self.file.write(text.rstrip() + "\n") @@ -26,15 +26,17 @@ class SourceFile: def outdent(self, n): self._indent -= n + CATEGORY_IDX_REMAP = { - "Job Manager": "PAN_PERF_COUNTER_CAT_FRONTEND", - "CSF": "PAN_PERF_COUNTER_CAT_FRONTEND", - "Tiler": "PAN_PERF_COUNTER_CAT_TILER", - "Memory System" : "PAN_PERF_COUNTER_CAT_MEMSYS", - "L2 Cache": "PAN_PERF_COUNTER_CAT_MEMSYS", - "Shader Core": "PAN_PERF_COUNTER_CAT_SHADER", + "Job Manager": "PAN_PERF_COUNTER_CAT_FRONTEND", + "CSF": "PAN_PERF_COUNTER_CAT_FRONTEND", + "Tiler": "PAN_PERF_COUNTER_CAT_TILER", + "Memory System" : "PAN_PERF_COUNTER_CAT_MEMSYS", + "L2 Cache": "PAN_PERF_COUNTER_CAT_MEMSYS", + "Shader Core": "PAN_PERF_COUNTER_CAT_SHADER", } + class Counter: # category Category owning the counter # xml XML representation of itself @@ -197,7 +199,7 @@ def main(): c.write("\nconst struct pan_perf_config * pan_perf_configs[] = {") c.indent(tab_size) for prod in prods: - c.write("&pan_perf_config_%s," % prod.id) + c.write("&pan_perf_config_%s," % prod.id) c.outdent(tab_size) c.write("};") From 3b6b25c7d6d0685fbebbacde9a6949df5544b895 Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Fri, 17 Apr 2026 11:48:33 +0200 Subject: [PATCH 38/38] pan/perf: Generate derived counter code For the derived counters generate functions that read the required hardware counters, then compute and return the result. The computations use doubles like in libGPUCounters. It also performs all computations using floating point, we want to match the output of other tools using that library. The equation implementations are deduplicated as there are counters which have changed their equation over time but not every generation. --- src/panfrost/perf/pan_gen_perf.py | 204 +++++++++++++++++++++++++++++- 1 file changed, 201 insertions(+), 3 deletions(-) diff --git a/src/panfrost/perf/pan_gen_perf.py b/src/panfrost/perf/pan_gen_perf.py index c798afcf814..4f240e8c408 100644 --- a/src/panfrost/perf/pan_gen_perf.py +++ b/src/panfrost/perf/pan_gen_perf.py @@ -5,9 +5,14 @@ import argparse import textwrap import os import datetime +import re +from dataclasses import dataclass +from typing import ClassVar import xml.etree.ElementTree as et +TAB_SIZE = 3 + class SourceFile: def __init__(self, filename): @@ -28,6 +33,7 @@ class SourceFile: CATEGORY_IDX_REMAP = { + "GPU Front-end": "PAN_PERF_COUNTER_CAT_FRONTEND", "Job Manager": "PAN_PERF_COUNTER_CAT_FRONTEND", "CSF": "PAN_PERF_COUNTER_CAT_FRONTEND", "Tiler": "PAN_PERF_COUNTER_CAT_TILER", @@ -46,8 +52,14 @@ class Counter: self.name = self.xml.get("name") self.desc = self.xml.get("description") self.units = self.xml.get("units") - self.offset = int(self.xml.get("offset")) - self.underscore_name = self.xml.get("counter").lower() + self.equation = self.xml.get("equation") + self.offset = int(self.xml.get("offset") or 0) + self.underscore_name = (self.xml.get("counter") or "").lower() + self.source_name = self.xml.get("counter") or "" + self.equation_impl = None + + if self.units.endswith("/second"): + self.units = self.units.replace("/second", "_per_second") class Category: @@ -71,6 +83,7 @@ class Product: self.filename = filename self.xml = et.parse(self.filename) self.name = self.xml.getroot().get('id') + assert(self.name is not None) self.id = self.name.lower() self.categories = [] @@ -78,6 +91,114 @@ class Product: self.categories.append(Category(self, xml_cat)) +@dataclass +class EquationImpl: + fname: str + body: str + counter: Counter + version: int = -1 + + impls: ClassVar[dict[str, dict[str, 'EquationImpl']]] = {} + + """We don't want duplicate methods wasting space, this makes sure there is + only one implementation for each variant of counter hardware locations. + """ + @classmethod + def get(cls, counter, all_counters): + + body = cls.generate_body(counter, all_counters) + + if counter.name not in cls.impls: + cls.impls[counter.name] = {} + + bucket = cls.impls[counter.name] + + if body not in bucket: + fname = f"compute_{counter.name.lower()}" + eq = EquationImpl(fname, body, counter) + eq.version = len(bucket.keys()) + bucket[body] = eq + + return bucket[body] + + @staticmethod + def generate_body(counter, counters): + eq = counter.equation + + vals = dict() + + for c in sorted(counters, key=lambda x: len(x.source_name), reverse=True): + if c.source_name == "" or c.source_name not in eq: + continue + + idx = len(vals) + cat_enum = CATEGORY_IDX_REMAP[c.category.name] + # MaliAnyUtil for example is from "Shader Core" but it reads GPU_ACTIVE + # which is from "Front-end". We can not use the block index from the + # shader core when reading a front-end counter. + # If reading from another block for the equation the only block index that + # makes sense is 0 because if the category had more than one block we + # could not know which one to choose. + from_block = 'block' if c.category.name == counter.category.name else '0' + r = f"const double v{idx} = pan_perf_counter_read_raw(perf, {cat_enum}, {c.offset}, {from_block});" + vals[c.source_name] = (idx, r) + + eq = eq.replace(c.source_name, f"v{idx}") + + for match in re.finditer(r"(MALI_CONFIG[a-zA-Z0-9_]+)($|[^a-zA-Z0-9_])", eq): + config = match.group(1) + + idx = len(vals) + pan_config = config.replace("MALI", "PAN_PERF_DERIVED") + r = f"const double v{idx} = configs[{pan_config}];" + vals[config] = (idx, r) + + eq = eq.replace(config, f"v{idx}") + + defs = [r for _, r in vals.values()] + body = "\n".join(defs) + "\n" + body += f"return {eq};" + + return body + + @property + def versioned_name(self): + assert (self.version != -1 and "should not emit non versioned") + return self.fname + f"_v{self.version}" + + @property + def decl(self): + decl = "double " + self.versioned_name + \ + "(const struct pan_perf *perf, const double *configs, uint8_t block)" + return decl + + +def generate_equations(prods, c): + for prod in prods: + + all_raw_counters = [] + for cat in prod.categories: + for counter in cat.counters: + if counter.source_name: + all_raw_counters.append(counter) + + for cat in prod.categories: + for counter in cat.counters: + if not counter.equation: + continue + + eq = EquationImpl.get(counter, all_raw_counters) + counter.equation_impl = eq + + for impls in EquationImpl.impls.values(): + for impl in impls.values(): + c.write("static " + impl.decl + "{") + c.indent(TAB_SIZE) + c.write(impl.body) + c.outdent(TAB_SIZE) + c.write("}\n") + + def main(): parser = argparse.ArgumentParser() parser.add_argument("--header", help="Header file to write", required=True) @@ -93,7 +214,7 @@ def main(): for xml_file in args.xml_files: prods.append(Product(xml_file)) - tab_size = 3 + tab_size = TAB_SIZE copyright = textwrap.dedent("""\ /* Autogenerated file, DO NOT EDIT manually! generated by {} @@ -121,6 +242,79 @@ def main(): #include """)) + c.write(textwrap.dedent(""" + static inline int max2(int a, int b) { + return MAX2(a, b); + } + + static inline int max3(int a, int b, int c) { + return max2(max2(a, b), c); + } + + static inline int max4(int a, int b, int c, int d) { + return max2(max3(a, b, c), d); + } + + static inline int max5(int a, int b, int c, int d, int e) { + return max2(max4(a, b, c, d), e); + } + + static inline int max6(int a, int b, int c, int d, int e, int f) { + return max2(max5(a, b, c, d, e), f); + } + + static inline int max7(int a, int b, int c, int d, int e, int f, int g) { + return max2(max6(a, b, c, d, e, f), g); + } + + static inline int max8(int a, int b, int c, int d, int e, int f, int g, int h) { + return max2(max7(a, b, c, d, e, f, g), h); + } + + static inline int max9(int a, int b, int c, int d, int e, int f, int g, int h, int i) { + return max2(max8(a, b, c, d, e, f, g, h), i); + } + + static inline int min2(int a, int b) { + return MIN2(a, b); + } + + static inline int min3(int a, int b, int c) { + return min2(min2(a, b), c); + } + + static inline int min4(int a, int b, int c, int d) { + return min2(min3(a, b, c), d); + } + + static inline int min5(int a, int b, int c, int d, int e) { + return min2(min4(a, b, c, d), e); + } + + static inline int min6(int a, int b, int c, int d, int e, int f) { + return min2(min5(a, b, c, d, e), f); + } + + static inline int min7(int a, int b, int c, int d, int e, int f, int g) { + return min2(min6(a, b, c, d, e, f), g); + } + + static inline int min8(int a, int b, int c, int d, int e, int f, int g, int h) { + return min2(min7(a, b, c, d, e, f, g), h); + } + + static inline int min9(int a, int b, int c, int d, int e, int f, int g, int h, int i) { + return min2(min8(a, b, c, d, e, f, g, h), i); + } + + #define GET_MACRO(_1,_2,_3,_4,_5,_6,_7,_8,_9,name,...) name + #define min(...) GET_MACRO(__VA_ARGS__, min9, min8, min7, min6, min5, min4, min3, min2)(__VA_ARGS__) + #define max(...) GET_MACRO(__VA_ARGS__, max9, max8, max7, max6, max5, max4, max3, max2)(__VA_ARGS__) + + """)) + + generate_equations(prods, c) + for prod in prods: c.write(textwrap.dedent(""" static void UNUSED @@ -176,6 +370,10 @@ def main(): c.write(".units = PAN_PERF_COUNTER_UNITS_%s," % (counter.units.upper())) c.write(".offset = %u," % (counter.offset)) c.write(".category = %s," % CATEGORY_IDX_REMAP[category.name]) + if counter.equation: + c.write(f".derived = {counter.equation_impl.versioned_name},") + else: + c.write(".derived = NULL,") c.outdent(tab_size) c.write("}, // counter")