From 8d7387deea8cb95f878f1909484ba4576e100531 Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Fri, 20 Mar 2026 17:21:00 +0100 Subject: [PATCH] pan/kmod: Implement panthor kmod perf counter methods Co-Authored-by: Lukas Zapolskas --- src/panfrost/lib/kmod/panthor_kmod.c | 513 +++++++++++++++++++++++++++ 1 file changed, 513 insertions(+) diff --git a/src/panfrost/lib/kmod/panthor_kmod.c b/src/panfrost/lib/kmod/panthor_kmod.c index c25f2315e54..2fb06e6bfd2 100644 --- a/src/panfrost/lib/kmod/panthor_kmod.c +++ b/src/panfrost/lib/kmod/panthor_kmod.c @@ -1,5 +1,6 @@ /* * Copyright © 2023 Collabora, Ltd. + * Copyright © 2026 Arm, Ltd. * SPDX-License-Identifier: MIT */ @@ -7,6 +8,7 @@ #include #include #include +#include #include "util/hash_table.h" #include "util/libsync.h" @@ -20,6 +22,8 @@ #include "drm-uapi/dma-buf.h" #include "drm-uapi/panthor_drm.h" +#include "util/timespec.h" + #include "pan_kmod_backend.h" #include "pan_props.h" @@ -103,6 +107,43 @@ struct panthor_kmod_bo { } sync; }; +struct panthor_kmod_perf_session { + struct pan_kmod_perf_session base; + + struct { + int event; + } fds; + int session_handle; + + struct { + int ringbuf; + int control; + } bos; + + struct { + size_t sample; + size_t block; + size_t ringbuf; + size_t control; + size_t sample_header; + size_t block_header; + } sizes; + + struct { + size_t cshw_blocks; + size_t tiler_blocks; + size_t memsys_blocks; + size_t shader_blocks; + } config; + + bool session_initialized; + bool active; + uint8_t set; + uint64_t sample_idx; + uint8_t *ringbuffer; + struct drm_panthor_perf_ringbuf_control *ctrl; +}; + static uint32_t to_kmod_group_allow_priority_flags(uint32_t panthor_flags) { @@ -1302,6 +1343,472 @@ panthor_kmod_bo_label(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const ch mesa_loge("DRM_IOCTL_PANTHOR_BO_SET_LABEL failed (err=%d)", errno); } +/* ================ PERF COUNTERS ================= */ + +#define PANTHOR_SAMPLE_SLOTS (32) +#define PANTHOR_POLL_TIMEOUT_SEC (10) +#define PTR_TO_U64(ptr) ((uint64_t)(uintptr_t)(ptr)) + +struct panthor_perf_sample { + struct drm_panthor_perf_sample_header sample_header; + uint8_t bytes[]; +}; + +static int +perf_cmd_setup(int fd, int eventfd, int ringbuf_handle, int control_handle, uint8_t set) +{ + struct drm_panthor_perf_cmd_setup setup = { + .fd = eventfd, + .block_set = set, + .ringbuf_handle = ringbuf_handle, + .control_handle = control_handle, + .sample_slots = PANTHOR_SAMPLE_SLOTS, + .cshw_enable_mask = { UINT64_MAX, UINT64_MAX }, + .tiler_enable_mask = { UINT64_MAX, UINT64_MAX }, + .memsys_enable_mask = { UINT64_MAX, UINT64_MAX }, + .shader_enable_mask = { UINT64_MAX, UINT64_MAX }, + }; + + struct drm_panthor_perf_control ctrl = { + .cmd = DRM_PANTHOR_PERF_COMMAND_SETUP, + .size = sizeof(setup), + .pointer = PTR_TO_U64(&setup), + }; + + return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl); +} + +static int +perf_cmd_start(int fd, int sid, uint64_t user_data) +{ + struct drm_panthor_perf_cmd_start start = { + .user_data = user_data, + }; + + struct drm_panthor_perf_control ctrl = { + .cmd = DRM_PANTHOR_PERF_COMMAND_START, + .handle = sid, + .size = sizeof(start), + .pointer = PTR_TO_U64(&start), + }; + + return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl); +} + +static int +perf_cmd_stop(int fd, int sid, uint64_t user_data) +{ + struct drm_panthor_perf_cmd_stop stop = {}; + + struct drm_panthor_perf_control ctrl = { + .cmd = DRM_PANTHOR_PERF_COMMAND_STOP, + .handle = sid, + .size = sizeof(stop), + .pointer = PTR_TO_U64(&stop), + }; + + return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl); +} + +static int +perf_cmd_sample(int fd, int sid, uint64_t user_data) +{ + struct drm_panthor_perf_cmd_sample sample = { + .user_data = user_data, + }; + + struct drm_panthor_perf_control ctrl = { + .cmd = DRM_PANTHOR_PERF_COMMAND_SAMPLE, + .handle = sid, + .size = sizeof(sample), + .pointer = PTR_TO_U64(&sample), + }; + + return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl); +} + +#define DUMMY_PTR ((uint8_t *)1) + +static int +perf_cmd_teardown(int fd, int sid) +{ + struct drm_panthor_perf_control ctrl = { + .cmd = DRM_PANTHOR_PERF_COMMAND_TEARDOWN, + .handle = sid, + }; + + int ret = drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl); + + return ret; +} + +static int +unmap_and_teardown_bo(int fd, int handle, void *addr, size_t size) +{ + if (addr) + munmap(addr, size); + + struct drm_gem_close ringbuf_close = { + .handle = handle, + }; + return drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &ringbuf_close); +} + +static int +create_and_map_bo(int fd, size_t size, int *handle, void **mapping) +{ + struct drm_panthor_bo_create bo = { + .size = size, + }; + int ret = drmIoctl(fd, DRM_IOCTL_PANTHOR_BO_CREATE, &bo); + if (ret) + return -EINVAL; + + struct drm_panthor_bo_mmap_offset offset = { + .handle = bo.handle, + }; + ret = drmIoctl(fd, DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET, &offset); + if (ret) + goto term_bo; + + void *map = mmap(0, bo.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, (off_t)offset.offset); + if (!map || map == MAP_FAILED) { + ret = -EINVAL; + goto term_bo; + } + + *handle = bo.handle; + *mapping = map; + + return 0; +term_bo: + return unmap_and_teardown_bo(fd, bo.handle, NULL, 0); +} + +static int +poll_for_sample(int poll_fd) +{ + int ret; + eventfd_t tmp; + struct pollfd pfd[1] = { + { + .fd = poll_fd, + .events = POLLIN + } + }; + struct timespec timeout = { + .tv_sec = PANTHOR_POLL_TIMEOUT_SEC, + }; + struct timespec now, result, deadline; + + clock_gettime(CLOCK_MONOTONIC, &now); + timespec_add(&deadline, &now, &timeout); + + do { + clock_gettime(CLOCK_MONOTONIC, &now); + timespec_sub_saturate(&result, &deadline, &now); + ret = ppoll(pfd, 1, &result, NULL); + } while (ret == -1 && errno == EINTR); + + if (ret < 0) + return ret; + + return eventfd_read(poll_fd, &tmp); +} + +static uint64_t +read_extract_idx(struct panthor_kmod_perf_session *perf) +{ + return p_atomic_read(&perf->ctrl->extract_idx); +} + +static void +write_extract_idx(struct panthor_kmod_perf_session *perf, uint64_t idx) +{ + p_atomic_set(&perf->ctrl->extract_idx, idx); +} + +static uint64_t +read_insert_idx(struct panthor_kmod_perf_session *perf) +{ + return p_atomic_read(&perf->ctrl->insert_idx); +} + +static inline struct pan_kmod_perf_session * +panthor_kmod_perf_init(struct pan_kmod_dev *dev) +{ + UNUSED struct panthor_kmod_dev *panthor_dev = + container_of(dev, struct panthor_kmod_dev, base); + + struct panthor_kmod_perf_session *sess = + pan_kmod_dev_alloc(dev, sizeof(*sess)); + if (!sess) { + mesa_loge("failed to allocate a panthor_kmod_perf_session object"); + return NULL; + } + + sess->base.dev = dev; + + struct drm_panthor_gpu_info gpu_info = {}; + struct drm_panthor_dev_query query = { + .type = DRM_PANTHOR_DEV_QUERY_GPU_INFO, + .size = sizeof(gpu_info), + .pointer = (uint64_t)(uintptr_t)&gpu_info, + }; + + int ret = drmIoctl(dev->fd, DRM_IOCTL_PANTHOR_DEV_QUERY, &query); + if (ret) + goto free_perf; + + struct drm_panthor_perf_info perf_info = {}; + + query = (struct drm_panthor_dev_query) { + .type = DRM_PANTHOR_DEV_QUERY_PERF_INFO, + .size = sizeof(perf_info), + .pointer = (uint64_t)(uintptr_t)&perf_info, + }; + + ret = drmIoctl(dev->fd, DRM_IOCTL_PANTHOR_DEV_QUERY, &query); + if (ret) + goto free_perf; + + sess->fds.event = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE); + if (!sess->fds.event) + goto free_perf; + + const size_t block_size = perf_info.counters_per_block * sizeof(uint64_t) + + perf_info.block_header_size; + const size_t sample_size = perf_info.sample_size; + const size_t buffer_size = sample_size * PANTHOR_SAMPLE_SLOTS; + + sess->sizes.block = block_size; + sess->sizes.sample = sample_size; + sess->sizes.ringbuf = buffer_size; + sess->sizes.control = sizeof(*sess->ctrl); + sess->sizes.sample_header = perf_info.sample_header_size; + sess->sizes.block_header = perf_info.block_header_size; + + if (sess->sizes.sample_header != sizeof(struct drm_panthor_perf_sample_header)) + fprintf(stderr, "panfrost perf sample header size mismatch!"); + + if (sess->sizes.block_header != sizeof(struct drm_panthor_perf_block_header)) + fprintf(stderr, "panfrost perf block header size mismatch!"); + + sess->config.cshw_blocks = perf_info.cshw_blocks; + sess->config.tiler_blocks = perf_info.tiler_blocks; + sess->config.memsys_blocks = perf_info.memsys_blocks; + sess->config.shader_blocks = perf_info.shader_blocks; + + void *buf_map; + ret = create_and_map_bo(dev->fd, sess->sizes.ringbuf, &sess->bos.ringbuf, &buf_map); + if (ret) + goto free_eventfd; + + sess->ringbuffer = buf_map; + sess->base.data = buf_map; + sess->base.data_ts_supported = true; + + void *control_map; + ret = create_and_map_bo(dev->fd, sess->sizes.control, &sess->bos.control, &control_map); + if (ret) + goto free_ringbuf; + + sess->ctrl = (struct drm_panthor_perf_ringbuf_control *)control_map; + + sess->set = 0; /* TODO should we make it configurable? */ + sess->active = false; + sess->session_initialized = false; + + return &(sess->base); + +free_ringbuf: + unmap_and_teardown_bo(dev->fd, sess->bos.ringbuf, buf_map, sess->sizes.ringbuf); +free_eventfd: + close(sess->fds.event); +free_perf: + ralloc_free(sess); + return NULL; +} + +static int +panthor_kmod_perf_enable(struct pan_kmod_perf_session *session) +{ + UNUSED struct panthor_kmod_perf_session *psess = + container_of(session, struct panthor_kmod_perf_session, base); + + /* The session cannot be created outside of the sampling thread. */ + if (!psess->session_initialized) { + int session_handle = perf_cmd_setup(psess->base.dev->fd, psess->fds.event, psess->bos.ringbuf, + psess->bos.control, psess->set); + + if (session_handle < 0) + return -EINVAL; + + psess->session_handle = session_handle; + psess->session_initialized = true; + } + + int ret = perf_cmd_start(psess->base.dev->fd, psess->session_handle, psess->sample_idx++); + if (ret) + return ret; + + psess->active = true; + + return 0; +} + +static int +panthor_kmod_perf_disable(struct pan_kmod_perf_session *session) +{ + UNUSED struct panthor_kmod_perf_session *sess = + container_of(session, struct panthor_kmod_perf_session, base); + + int ret = perf_cmd_stop(sess->base.dev->fd, sess->session_handle, sess->sample_idx++); + if (ret) + return ret; + + sess->active = false; + + ret = poll_for_sample(sess->fds.event); + if (ret) + return ret; + + return 0; +} + +static int +panthor_perf_sample(struct panthor_kmod_perf_session *perf) +{ + const uint64_t insert_idx = read_insert_idx(perf); + const uint64_t extract_idx = read_extract_idx(perf); + + // If there's an outstanding sample, discard it + if (insert_idx != extract_idx) + write_extract_idx(perf, insert_idx); + + // Otherwise, request a new sample which will increment the insert idx + int ret = perf_cmd_sample(perf->base.dev->fd, perf->session_handle, perf->sample_idx++); + if (ret) + return ret; + + ret = poll_for_sample(perf->fds.event); + if (ret) + return ret; + + return 0; +} + +static uint8_t *get_base_addr(uint8_t *buf, size_t idx, size_t stride) +{ + return buf + idx * stride; +} + +static inline struct panthor_perf_sample *perf_sample_idx(struct panthor_kmod_perf_session *perf, uint64_t idx) +{ + return (struct panthor_perf_sample *)get_base_addr(perf->ringbuffer, idx, perf->sizes.sample); +} + +static uint64_t +panthor_perf_get_sample_timestamp(struct panthor_kmod_perf_session *perf) +{ + const uint64_t extract_idx = read_extract_idx(perf); + const struct panthor_perf_sample *sample = perf_sample_idx(perf, extract_idx); + + return sample->sample_header.timestamp_end_ns; +} + +static int +panthor_kmod_perf_dump(struct pan_kmod_perf_session *session) +{ + UNUSED struct panthor_kmod_perf_session *psess = + container_of(session, struct panthor_kmod_perf_session, base); + + int ret = panthor_perf_sample(psess); + if (ret) + return ret; + + /* Update data pointer to the correct spot in the ringbuffer. */ + session->data = perf_sample_idx(psess, read_extract_idx(psess)); + session->data_ts = panthor_perf_get_sample_timestamp(psess); + + return 0; +} + +static void +panthor_kmod_perf_query_layout(const struct pan_kmod_perf_session *session, + struct pan_kmod_perf_buffer_layout *layout) +{ + UNUSED struct panthor_kmod_perf_session *psess = + container_of(session, struct panthor_kmod_perf_session, base); + + /* On all Valhall architectures this is 128. */ + const unsigned counters_per_cat = 128; + layout->counters_per_category = counters_per_cat; + + layout->block_stride = psess->sizes.block; + layout->counter_stride = sizeof(uint64_t); + + /* Setup the layout */ + layout->category[PAN_KMOD_PERF_CAT_FRONTEND].n_blocks = psess->config.cshw_blocks; + layout->category[PAN_KMOD_PERF_CAT_TILER].n_blocks = psess->config.tiler_blocks; + layout->category[PAN_KMOD_PERF_CAT_MEMSYS].n_blocks = psess->config.memsys_blocks; + layout->category[PAN_KMOD_PERF_CAT_SHADER].n_blocks = psess->config.shader_blocks; + + layout->category[0].offset = + psess->sizes.sample_header + psess->sizes.block_header; + for (unsigned cat_idx = 1; cat_idx < PAN_KMOD_PERF_CAT_COUNT; ++cat_idx) { + layout->category[cat_idx].offset = + layout->category[cat_idx - 1].offset + + layout->category[cat_idx - 1].n_blocks * layout->block_stride; + } +} + +static int +panthor_perf_stop(struct panthor_kmod_perf_session *perf) +{ + int ret = perf_cmd_stop(perf->base.dev->fd, perf->session_handle, perf->sample_idx++); + if (ret) + return ret; + + perf->active = false; + + ret = poll_for_sample(perf->fds.event); + if (ret) + return ret; + + return 0; +} + +static void +panthor_kmod_perf_destroy(struct pan_kmod_perf_session *session) +{ + UNUSED struct panthor_kmod_perf_session *psess = + container_of(session, struct panthor_kmod_perf_session, base); + + int ret; + + if (psess->active) { + ret = panthor_perf_stop(psess); + assert(ret == 0); + } + + ret = perf_cmd_teardown(psess->base.dev->fd, psess->session_handle); + assert(ret == 0); + + ret = unmap_and_teardown_bo(psess->base.dev->fd, psess->bos.ringbuf, psess->ringbuffer, psess->sizes.ringbuf); + assert(ret == 0); + + ret = unmap_and_teardown_bo(psess->base.dev->fd, psess->bos.control, psess->ctrl, psess->sizes.control); + assert(ret == 0); + + close(psess->fds.event); + ralloc_free(psess); + + pan_kmod_dev_free(session->dev, session); + + mesa_logd("perf session destroyed"); +} + const struct pan_kmod_ops panthor_kmod_ops = { .dev_create = panthor_kmod_dev_create, .dev_destroy = panthor_kmod_dev_destroy, @@ -1319,4 +1826,10 @@ const struct pan_kmod_ops panthor_kmod_ops = { .vm_query_state = panthor_kmod_vm_query_state, .query_timestamp = panthor_kmod_query_timestamp, .bo_set_label = panthor_kmod_bo_label, + .perf_create = panthor_kmod_perf_init, + .perf_enable = panthor_kmod_perf_enable, + .perf_disable = panthor_kmod_perf_disable, + .perf_dump = panthor_kmod_perf_dump, + .perf_query_layout = panthor_kmod_perf_query_layout, + .perf_destroy = panthor_kmod_perf_destroy, };