pan/kmod: Implement panthor kmod perf counter methods

Co-Authored-by: Lukas Zapolskas <lukas.zapolskas@arm.com>
This commit is contained in:
Christoph Pillmayer 2026-03-20 17:21:00 +01:00
parent e05a94a1de
commit 8d7387deea

View file

@ -1,5 +1,6 @@
/*
* Copyright © 2023 Collabora, Ltd.
* Copyright © 2026 Arm, Ltd.
* SPDX-License-Identifier: MIT
*/
@ -7,6 +8,7 @@
#include <fcntl.h>
#include <string.h>
#include <xf86drm.h>
#include <sys/eventfd.h>
#include "util/hash_table.h"
#include "util/libsync.h"
@ -20,6 +22,8 @@
#include "drm-uapi/dma-buf.h"
#include "drm-uapi/panthor_drm.h"
#include "util/timespec.h"
#include "pan_kmod_backend.h"
#include "pan_props.h"
@ -103,6 +107,43 @@ struct panthor_kmod_bo {
} sync;
};
struct panthor_kmod_perf_session {
struct pan_kmod_perf_session base;
struct {
int event;
} fds;
int session_handle;
struct {
int ringbuf;
int control;
} bos;
struct {
size_t sample;
size_t block;
size_t ringbuf;
size_t control;
size_t sample_header;
size_t block_header;
} sizes;
struct {
size_t cshw_blocks;
size_t tiler_blocks;
size_t memsys_blocks;
size_t shader_blocks;
} config;
bool session_initialized;
bool active;
uint8_t set;
uint64_t sample_idx;
uint8_t *ringbuffer;
struct drm_panthor_perf_ringbuf_control *ctrl;
};
static uint32_t
to_kmod_group_allow_priority_flags(uint32_t panthor_flags)
{
@ -1302,6 +1343,472 @@ panthor_kmod_bo_label(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const ch
mesa_loge("DRM_IOCTL_PANTHOR_BO_SET_LABEL failed (err=%d)", errno);
}
/* ================ PERF COUNTERS ================= */
#define PANTHOR_SAMPLE_SLOTS (32)
#define PANTHOR_POLL_TIMEOUT_SEC (10)
#define PTR_TO_U64(ptr) ((uint64_t)(uintptr_t)(ptr))
struct panthor_perf_sample {
struct drm_panthor_perf_sample_header sample_header;
uint8_t bytes[];
};
static int
perf_cmd_setup(int fd, int eventfd, int ringbuf_handle, int control_handle, uint8_t set)
{
struct drm_panthor_perf_cmd_setup setup = {
.fd = eventfd,
.block_set = set,
.ringbuf_handle = ringbuf_handle,
.control_handle = control_handle,
.sample_slots = PANTHOR_SAMPLE_SLOTS,
.cshw_enable_mask = { UINT64_MAX, UINT64_MAX },
.tiler_enable_mask = { UINT64_MAX, UINT64_MAX },
.memsys_enable_mask = { UINT64_MAX, UINT64_MAX },
.shader_enable_mask = { UINT64_MAX, UINT64_MAX },
};
struct drm_panthor_perf_control ctrl = {
.cmd = DRM_PANTHOR_PERF_COMMAND_SETUP,
.size = sizeof(setup),
.pointer = PTR_TO_U64(&setup),
};
return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl);
}
static int
perf_cmd_start(int fd, int sid, uint64_t user_data)
{
struct drm_panthor_perf_cmd_start start = {
.user_data = user_data,
};
struct drm_panthor_perf_control ctrl = {
.cmd = DRM_PANTHOR_PERF_COMMAND_START,
.handle = sid,
.size = sizeof(start),
.pointer = PTR_TO_U64(&start),
};
return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl);
}
static int
perf_cmd_stop(int fd, int sid, uint64_t user_data)
{
struct drm_panthor_perf_cmd_stop stop = {};
struct drm_panthor_perf_control ctrl = {
.cmd = DRM_PANTHOR_PERF_COMMAND_STOP,
.handle = sid,
.size = sizeof(stop),
.pointer = PTR_TO_U64(&stop),
};
return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl);
}
static int
perf_cmd_sample(int fd, int sid, uint64_t user_data)
{
struct drm_panthor_perf_cmd_sample sample = {
.user_data = user_data,
};
struct drm_panthor_perf_control ctrl = {
.cmd = DRM_PANTHOR_PERF_COMMAND_SAMPLE,
.handle = sid,
.size = sizeof(sample),
.pointer = PTR_TO_U64(&sample),
};
return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl);
}
#define DUMMY_PTR ((uint8_t *)1)
static int
perf_cmd_teardown(int fd, int sid)
{
struct drm_panthor_perf_control ctrl = {
.cmd = DRM_PANTHOR_PERF_COMMAND_TEARDOWN,
.handle = sid,
};
int ret = drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl);
return ret;
}
static int
unmap_and_teardown_bo(int fd, int handle, void *addr, size_t size)
{
if (addr)
munmap(addr, size);
struct drm_gem_close ringbuf_close = {
.handle = handle,
};
return drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &ringbuf_close);
}
static int
create_and_map_bo(int fd, size_t size, int *handle, void **mapping)
{
struct drm_panthor_bo_create bo = {
.size = size,
};
int ret = drmIoctl(fd, DRM_IOCTL_PANTHOR_BO_CREATE, &bo);
if (ret)
return -EINVAL;
struct drm_panthor_bo_mmap_offset offset = {
.handle = bo.handle,
};
ret = drmIoctl(fd, DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET, &offset);
if (ret)
goto term_bo;
void *map = mmap(0, bo.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, (off_t)offset.offset);
if (!map || map == MAP_FAILED) {
ret = -EINVAL;
goto term_bo;
}
*handle = bo.handle;
*mapping = map;
return 0;
term_bo:
return unmap_and_teardown_bo(fd, bo.handle, NULL, 0);
}
static int
poll_for_sample(int poll_fd)
{
int ret;
eventfd_t tmp;
struct pollfd pfd[1] = {
{
.fd = poll_fd,
.events = POLLIN
}
};
struct timespec timeout = {
.tv_sec = PANTHOR_POLL_TIMEOUT_SEC,
};
struct timespec now, result, deadline;
clock_gettime(CLOCK_MONOTONIC, &now);
timespec_add(&deadline, &now, &timeout);
do {
clock_gettime(CLOCK_MONOTONIC, &now);
timespec_sub_saturate(&result, &deadline, &now);
ret = ppoll(pfd, 1, &result, NULL);
} while (ret == -1 && errno == EINTR);
if (ret < 0)
return ret;
return eventfd_read(poll_fd, &tmp);
}
static uint64_t
read_extract_idx(struct panthor_kmod_perf_session *perf)
{
return p_atomic_read(&perf->ctrl->extract_idx);
}
static void
write_extract_idx(struct panthor_kmod_perf_session *perf, uint64_t idx)
{
p_atomic_set(&perf->ctrl->extract_idx, idx);
}
static uint64_t
read_insert_idx(struct panthor_kmod_perf_session *perf)
{
return p_atomic_read(&perf->ctrl->insert_idx);
}
static inline struct pan_kmod_perf_session *
panthor_kmod_perf_init(struct pan_kmod_dev *dev)
{
UNUSED struct panthor_kmod_dev *panthor_dev =
container_of(dev, struct panthor_kmod_dev, base);
struct panthor_kmod_perf_session *sess =
pan_kmod_dev_alloc(dev, sizeof(*sess));
if (!sess) {
mesa_loge("failed to allocate a panthor_kmod_perf_session object");
return NULL;
}
sess->base.dev = dev;
struct drm_panthor_gpu_info gpu_info = {};
struct drm_panthor_dev_query query = {
.type = DRM_PANTHOR_DEV_QUERY_GPU_INFO,
.size = sizeof(gpu_info),
.pointer = (uint64_t)(uintptr_t)&gpu_info,
};
int ret = drmIoctl(dev->fd, DRM_IOCTL_PANTHOR_DEV_QUERY, &query);
if (ret)
goto free_perf;
struct drm_panthor_perf_info perf_info = {};
query = (struct drm_panthor_dev_query) {
.type = DRM_PANTHOR_DEV_QUERY_PERF_INFO,
.size = sizeof(perf_info),
.pointer = (uint64_t)(uintptr_t)&perf_info,
};
ret = drmIoctl(dev->fd, DRM_IOCTL_PANTHOR_DEV_QUERY, &query);
if (ret)
goto free_perf;
sess->fds.event = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE);
if (!sess->fds.event)
goto free_perf;
const size_t block_size = perf_info.counters_per_block * sizeof(uint64_t) +
perf_info.block_header_size;
const size_t sample_size = perf_info.sample_size;
const size_t buffer_size = sample_size * PANTHOR_SAMPLE_SLOTS;
sess->sizes.block = block_size;
sess->sizes.sample = sample_size;
sess->sizes.ringbuf = buffer_size;
sess->sizes.control = sizeof(*sess->ctrl);
sess->sizes.sample_header = perf_info.sample_header_size;
sess->sizes.block_header = perf_info.block_header_size;
if (sess->sizes.sample_header != sizeof(struct drm_panthor_perf_sample_header))
fprintf(stderr, "panfrost perf sample header size mismatch!");
if (sess->sizes.block_header != sizeof(struct drm_panthor_perf_block_header))
fprintf(stderr, "panfrost perf block header size mismatch!");
sess->config.cshw_blocks = perf_info.cshw_blocks;
sess->config.tiler_blocks = perf_info.tiler_blocks;
sess->config.memsys_blocks = perf_info.memsys_blocks;
sess->config.shader_blocks = perf_info.shader_blocks;
void *buf_map;
ret = create_and_map_bo(dev->fd, sess->sizes.ringbuf, &sess->bos.ringbuf, &buf_map);
if (ret)
goto free_eventfd;
sess->ringbuffer = buf_map;
sess->base.data = buf_map;
sess->base.data_ts_supported = true;
void *control_map;
ret = create_and_map_bo(dev->fd, sess->sizes.control, &sess->bos.control, &control_map);
if (ret)
goto free_ringbuf;
sess->ctrl = (struct drm_panthor_perf_ringbuf_control *)control_map;
sess->set = 0; /* TODO should we make it configurable? */
sess->active = false;
sess->session_initialized = false;
return &(sess->base);
free_ringbuf:
unmap_and_teardown_bo(dev->fd, sess->bos.ringbuf, buf_map, sess->sizes.ringbuf);
free_eventfd:
close(sess->fds.event);
free_perf:
ralloc_free(sess);
return NULL;
}
static int
panthor_kmod_perf_enable(struct pan_kmod_perf_session *session)
{
UNUSED struct panthor_kmod_perf_session *psess =
container_of(session, struct panthor_kmod_perf_session, base);
/* The session cannot be created outside of the sampling thread. */
if (!psess->session_initialized) {
int session_handle = perf_cmd_setup(psess->base.dev->fd, psess->fds.event, psess->bos.ringbuf,
psess->bos.control, psess->set);
if (session_handle < 0)
return -EINVAL;
psess->session_handle = session_handle;
psess->session_initialized = true;
}
int ret = perf_cmd_start(psess->base.dev->fd, psess->session_handle, psess->sample_idx++);
if (ret)
return ret;
psess->active = true;
return 0;
}
static int
panthor_kmod_perf_disable(struct pan_kmod_perf_session *session)
{
UNUSED struct panthor_kmod_perf_session *sess =
container_of(session, struct panthor_kmod_perf_session, base);
int ret = perf_cmd_stop(sess->base.dev->fd, sess->session_handle, sess->sample_idx++);
if (ret)
return ret;
sess->active = false;
ret = poll_for_sample(sess->fds.event);
if (ret)
return ret;
return 0;
}
static int
panthor_perf_sample(struct panthor_kmod_perf_session *perf)
{
const uint64_t insert_idx = read_insert_idx(perf);
const uint64_t extract_idx = read_extract_idx(perf);
// If there's an outstanding sample, discard it
if (insert_idx != extract_idx)
write_extract_idx(perf, insert_idx);
// Otherwise, request a new sample which will increment the insert idx
int ret = perf_cmd_sample(perf->base.dev->fd, perf->session_handle, perf->sample_idx++);
if (ret)
return ret;
ret = poll_for_sample(perf->fds.event);
if (ret)
return ret;
return 0;
}
static uint8_t *get_base_addr(uint8_t *buf, size_t idx, size_t stride)
{
return buf + idx * stride;
}
static inline struct panthor_perf_sample *perf_sample_idx(struct panthor_kmod_perf_session *perf, uint64_t idx)
{
return (struct panthor_perf_sample *)get_base_addr(perf->ringbuffer, idx, perf->sizes.sample);
}
static uint64_t
panthor_perf_get_sample_timestamp(struct panthor_kmod_perf_session *perf)
{
const uint64_t extract_idx = read_extract_idx(perf);
const struct panthor_perf_sample *sample = perf_sample_idx(perf, extract_idx);
return sample->sample_header.timestamp_end_ns;
}
static int
panthor_kmod_perf_dump(struct pan_kmod_perf_session *session)
{
UNUSED struct panthor_kmod_perf_session *psess =
container_of(session, struct panthor_kmod_perf_session, base);
int ret = panthor_perf_sample(psess);
if (ret)
return ret;
/* Update data pointer to the correct spot in the ringbuffer. */
session->data = perf_sample_idx(psess, read_extract_idx(psess));
session->data_ts = panthor_perf_get_sample_timestamp(psess);
return 0;
}
static void
panthor_kmod_perf_query_layout(const struct pan_kmod_perf_session *session,
struct pan_kmod_perf_buffer_layout *layout)
{
UNUSED struct panthor_kmod_perf_session *psess =
container_of(session, struct panthor_kmod_perf_session, base);
/* On all Valhall architectures this is 128. */
const unsigned counters_per_cat = 128;
layout->counters_per_category = counters_per_cat;
layout->block_stride = psess->sizes.block;
layout->counter_stride = sizeof(uint64_t);
/* Setup the layout */
layout->category[PAN_KMOD_PERF_CAT_FRONTEND].n_blocks = psess->config.cshw_blocks;
layout->category[PAN_KMOD_PERF_CAT_TILER].n_blocks = psess->config.tiler_blocks;
layout->category[PAN_KMOD_PERF_CAT_MEMSYS].n_blocks = psess->config.memsys_blocks;
layout->category[PAN_KMOD_PERF_CAT_SHADER].n_blocks = psess->config.shader_blocks;
layout->category[0].offset =
psess->sizes.sample_header + psess->sizes.block_header;
for (unsigned cat_idx = 1; cat_idx < PAN_KMOD_PERF_CAT_COUNT; ++cat_idx) {
layout->category[cat_idx].offset =
layout->category[cat_idx - 1].offset +
layout->category[cat_idx - 1].n_blocks * layout->block_stride;
}
}
static int
panthor_perf_stop(struct panthor_kmod_perf_session *perf)
{
int ret = perf_cmd_stop(perf->base.dev->fd, perf->session_handle, perf->sample_idx++);
if (ret)
return ret;
perf->active = false;
ret = poll_for_sample(perf->fds.event);
if (ret)
return ret;
return 0;
}
static void
panthor_kmod_perf_destroy(struct pan_kmod_perf_session *session)
{
UNUSED struct panthor_kmod_perf_session *psess =
container_of(session, struct panthor_kmod_perf_session, base);
int ret;
if (psess->active) {
ret = panthor_perf_stop(psess);
assert(ret == 0);
}
ret = perf_cmd_teardown(psess->base.dev->fd, psess->session_handle);
assert(ret == 0);
ret = unmap_and_teardown_bo(psess->base.dev->fd, psess->bos.ringbuf, psess->ringbuffer, psess->sizes.ringbuf);
assert(ret == 0);
ret = unmap_and_teardown_bo(psess->base.dev->fd, psess->bos.control, psess->ctrl, psess->sizes.control);
assert(ret == 0);
close(psess->fds.event);
ralloc_free(psess);
pan_kmod_dev_free(session->dev, session);
mesa_logd("perf session destroyed");
}
const struct pan_kmod_ops panthor_kmod_ops = {
.dev_create = panthor_kmod_dev_create,
.dev_destroy = panthor_kmod_dev_destroy,
@ -1319,4 +1826,10 @@ const struct pan_kmod_ops panthor_kmod_ops = {
.vm_query_state = panthor_kmod_vm_query_state,
.query_timestamp = panthor_kmod_query_timestamp,
.bo_set_label = panthor_kmod_bo_label,
.perf_create = panthor_kmod_perf_init,
.perf_enable = panthor_kmod_perf_enable,
.perf_disable = panthor_kmod_perf_disable,
.perf_dump = panthor_kmod_perf_dump,
.perf_query_layout = panthor_kmod_perf_query_layout,
.perf_destroy = panthor_kmod_perf_destroy,
};