mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 06:58:05 +02:00
pan/kmod: Implement panthor kmod perf counter methods
Co-Authored-by: Lukas Zapolskas <lukas.zapolskas@arm.com>
This commit is contained in:
parent
e05a94a1de
commit
8d7387deea
1 changed files with 513 additions and 0 deletions
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2023 Collabora, Ltd.
|
||||
* Copyright © 2026 Arm, Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
|
|
@ -7,6 +8,7 @@
|
|||
#include <fcntl.h>
|
||||
#include <string.h>
|
||||
#include <xf86drm.h>
|
||||
#include <sys/eventfd.h>
|
||||
|
||||
#include "util/hash_table.h"
|
||||
#include "util/libsync.h"
|
||||
|
|
@ -20,6 +22,8 @@
|
|||
#include "drm-uapi/dma-buf.h"
|
||||
#include "drm-uapi/panthor_drm.h"
|
||||
|
||||
#include "util/timespec.h"
|
||||
|
||||
#include "pan_kmod_backend.h"
|
||||
#include "pan_props.h"
|
||||
|
||||
|
|
@ -103,6 +107,43 @@ struct panthor_kmod_bo {
|
|||
} sync;
|
||||
};
|
||||
|
||||
struct panthor_kmod_perf_session {
|
||||
struct pan_kmod_perf_session base;
|
||||
|
||||
struct {
|
||||
int event;
|
||||
} fds;
|
||||
int session_handle;
|
||||
|
||||
struct {
|
||||
int ringbuf;
|
||||
int control;
|
||||
} bos;
|
||||
|
||||
struct {
|
||||
size_t sample;
|
||||
size_t block;
|
||||
size_t ringbuf;
|
||||
size_t control;
|
||||
size_t sample_header;
|
||||
size_t block_header;
|
||||
} sizes;
|
||||
|
||||
struct {
|
||||
size_t cshw_blocks;
|
||||
size_t tiler_blocks;
|
||||
size_t memsys_blocks;
|
||||
size_t shader_blocks;
|
||||
} config;
|
||||
|
||||
bool session_initialized;
|
||||
bool active;
|
||||
uint8_t set;
|
||||
uint64_t sample_idx;
|
||||
uint8_t *ringbuffer;
|
||||
struct drm_panthor_perf_ringbuf_control *ctrl;
|
||||
};
|
||||
|
||||
static uint32_t
|
||||
to_kmod_group_allow_priority_flags(uint32_t panthor_flags)
|
||||
{
|
||||
|
|
@ -1302,6 +1343,472 @@ panthor_kmod_bo_label(struct pan_kmod_dev *dev, struct pan_kmod_bo *bo, const ch
|
|||
mesa_loge("DRM_IOCTL_PANTHOR_BO_SET_LABEL failed (err=%d)", errno);
|
||||
}
|
||||
|
||||
/* ================ PERF COUNTERS ================= */
|
||||
|
||||
#define PANTHOR_SAMPLE_SLOTS (32)
|
||||
#define PANTHOR_POLL_TIMEOUT_SEC (10)
|
||||
#define PTR_TO_U64(ptr) ((uint64_t)(uintptr_t)(ptr))
|
||||
|
||||
struct panthor_perf_sample {
|
||||
struct drm_panthor_perf_sample_header sample_header;
|
||||
uint8_t bytes[];
|
||||
};
|
||||
|
||||
static int
|
||||
perf_cmd_setup(int fd, int eventfd, int ringbuf_handle, int control_handle, uint8_t set)
|
||||
{
|
||||
struct drm_panthor_perf_cmd_setup setup = {
|
||||
.fd = eventfd,
|
||||
.block_set = set,
|
||||
.ringbuf_handle = ringbuf_handle,
|
||||
.control_handle = control_handle,
|
||||
.sample_slots = PANTHOR_SAMPLE_SLOTS,
|
||||
.cshw_enable_mask = { UINT64_MAX, UINT64_MAX },
|
||||
.tiler_enable_mask = { UINT64_MAX, UINT64_MAX },
|
||||
.memsys_enable_mask = { UINT64_MAX, UINT64_MAX },
|
||||
.shader_enable_mask = { UINT64_MAX, UINT64_MAX },
|
||||
};
|
||||
|
||||
struct drm_panthor_perf_control ctrl = {
|
||||
.cmd = DRM_PANTHOR_PERF_COMMAND_SETUP,
|
||||
.size = sizeof(setup),
|
||||
.pointer = PTR_TO_U64(&setup),
|
||||
};
|
||||
|
||||
return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl);
|
||||
}
|
||||
|
||||
static int
|
||||
perf_cmd_start(int fd, int sid, uint64_t user_data)
|
||||
{
|
||||
struct drm_panthor_perf_cmd_start start = {
|
||||
.user_data = user_data,
|
||||
};
|
||||
|
||||
struct drm_panthor_perf_control ctrl = {
|
||||
.cmd = DRM_PANTHOR_PERF_COMMAND_START,
|
||||
.handle = sid,
|
||||
.size = sizeof(start),
|
||||
.pointer = PTR_TO_U64(&start),
|
||||
};
|
||||
|
||||
return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl);
|
||||
}
|
||||
|
||||
static int
|
||||
perf_cmd_stop(int fd, int sid, uint64_t user_data)
|
||||
{
|
||||
struct drm_panthor_perf_cmd_stop stop = {};
|
||||
|
||||
struct drm_panthor_perf_control ctrl = {
|
||||
.cmd = DRM_PANTHOR_PERF_COMMAND_STOP,
|
||||
.handle = sid,
|
||||
.size = sizeof(stop),
|
||||
.pointer = PTR_TO_U64(&stop),
|
||||
};
|
||||
|
||||
return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl);
|
||||
}
|
||||
|
||||
static int
|
||||
perf_cmd_sample(int fd, int sid, uint64_t user_data)
|
||||
{
|
||||
struct drm_panthor_perf_cmd_sample sample = {
|
||||
.user_data = user_data,
|
||||
};
|
||||
|
||||
struct drm_panthor_perf_control ctrl = {
|
||||
.cmd = DRM_PANTHOR_PERF_COMMAND_SAMPLE,
|
||||
.handle = sid,
|
||||
.size = sizeof(sample),
|
||||
.pointer = PTR_TO_U64(&sample),
|
||||
};
|
||||
|
||||
return drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl);
|
||||
}
|
||||
|
||||
#define DUMMY_PTR ((uint8_t *)1)
|
||||
|
||||
static int
|
||||
perf_cmd_teardown(int fd, int sid)
|
||||
{
|
||||
struct drm_panthor_perf_control ctrl = {
|
||||
.cmd = DRM_PANTHOR_PERF_COMMAND_TEARDOWN,
|
||||
.handle = sid,
|
||||
};
|
||||
|
||||
int ret = drmIoctl(fd, DRM_IOCTL_PANTHOR_PERF_CONTROL, &ctrl);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
unmap_and_teardown_bo(int fd, int handle, void *addr, size_t size)
|
||||
{
|
||||
if (addr)
|
||||
munmap(addr, size);
|
||||
|
||||
struct drm_gem_close ringbuf_close = {
|
||||
.handle = handle,
|
||||
};
|
||||
return drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &ringbuf_close);
|
||||
}
|
||||
|
||||
static int
|
||||
create_and_map_bo(int fd, size_t size, int *handle, void **mapping)
|
||||
{
|
||||
struct drm_panthor_bo_create bo = {
|
||||
.size = size,
|
||||
};
|
||||
int ret = drmIoctl(fd, DRM_IOCTL_PANTHOR_BO_CREATE, &bo);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
struct drm_panthor_bo_mmap_offset offset = {
|
||||
.handle = bo.handle,
|
||||
};
|
||||
ret = drmIoctl(fd, DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET, &offset);
|
||||
if (ret)
|
||||
goto term_bo;
|
||||
|
||||
void *map = mmap(0, bo.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, (off_t)offset.offset);
|
||||
if (!map || map == MAP_FAILED) {
|
||||
ret = -EINVAL;
|
||||
goto term_bo;
|
||||
}
|
||||
|
||||
*handle = bo.handle;
|
||||
*mapping = map;
|
||||
|
||||
return 0;
|
||||
term_bo:
|
||||
return unmap_and_teardown_bo(fd, bo.handle, NULL, 0);
|
||||
}
|
||||
|
||||
static int
|
||||
poll_for_sample(int poll_fd)
|
||||
{
|
||||
int ret;
|
||||
eventfd_t tmp;
|
||||
struct pollfd pfd[1] = {
|
||||
{
|
||||
.fd = poll_fd,
|
||||
.events = POLLIN
|
||||
}
|
||||
};
|
||||
struct timespec timeout = {
|
||||
.tv_sec = PANTHOR_POLL_TIMEOUT_SEC,
|
||||
};
|
||||
struct timespec now, result, deadline;
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &now);
|
||||
timespec_add(&deadline, &now, &timeout);
|
||||
|
||||
do {
|
||||
clock_gettime(CLOCK_MONOTONIC, &now);
|
||||
timespec_sub_saturate(&result, &deadline, &now);
|
||||
ret = ppoll(pfd, 1, &result, NULL);
|
||||
} while (ret == -1 && errno == EINTR);
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return eventfd_read(poll_fd, &tmp);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
read_extract_idx(struct panthor_kmod_perf_session *perf)
|
||||
{
|
||||
return p_atomic_read(&perf->ctrl->extract_idx);
|
||||
}
|
||||
|
||||
static void
|
||||
write_extract_idx(struct panthor_kmod_perf_session *perf, uint64_t idx)
|
||||
{
|
||||
p_atomic_set(&perf->ctrl->extract_idx, idx);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
read_insert_idx(struct panthor_kmod_perf_session *perf)
|
||||
{
|
||||
return p_atomic_read(&perf->ctrl->insert_idx);
|
||||
}
|
||||
|
||||
static inline struct pan_kmod_perf_session *
|
||||
panthor_kmod_perf_init(struct pan_kmod_dev *dev)
|
||||
{
|
||||
UNUSED struct panthor_kmod_dev *panthor_dev =
|
||||
container_of(dev, struct panthor_kmod_dev, base);
|
||||
|
||||
struct panthor_kmod_perf_session *sess =
|
||||
pan_kmod_dev_alloc(dev, sizeof(*sess));
|
||||
if (!sess) {
|
||||
mesa_loge("failed to allocate a panthor_kmod_perf_session object");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sess->base.dev = dev;
|
||||
|
||||
struct drm_panthor_gpu_info gpu_info = {};
|
||||
struct drm_panthor_dev_query query = {
|
||||
.type = DRM_PANTHOR_DEV_QUERY_GPU_INFO,
|
||||
.size = sizeof(gpu_info),
|
||||
.pointer = (uint64_t)(uintptr_t)&gpu_info,
|
||||
};
|
||||
|
||||
int ret = drmIoctl(dev->fd, DRM_IOCTL_PANTHOR_DEV_QUERY, &query);
|
||||
if (ret)
|
||||
goto free_perf;
|
||||
|
||||
struct drm_panthor_perf_info perf_info = {};
|
||||
|
||||
query = (struct drm_panthor_dev_query) {
|
||||
.type = DRM_PANTHOR_DEV_QUERY_PERF_INFO,
|
||||
.size = sizeof(perf_info),
|
||||
.pointer = (uint64_t)(uintptr_t)&perf_info,
|
||||
};
|
||||
|
||||
ret = drmIoctl(dev->fd, DRM_IOCTL_PANTHOR_DEV_QUERY, &query);
|
||||
if (ret)
|
||||
goto free_perf;
|
||||
|
||||
sess->fds.event = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE);
|
||||
if (!sess->fds.event)
|
||||
goto free_perf;
|
||||
|
||||
const size_t block_size = perf_info.counters_per_block * sizeof(uint64_t) +
|
||||
perf_info.block_header_size;
|
||||
const size_t sample_size = perf_info.sample_size;
|
||||
const size_t buffer_size = sample_size * PANTHOR_SAMPLE_SLOTS;
|
||||
|
||||
sess->sizes.block = block_size;
|
||||
sess->sizes.sample = sample_size;
|
||||
sess->sizes.ringbuf = buffer_size;
|
||||
sess->sizes.control = sizeof(*sess->ctrl);
|
||||
sess->sizes.sample_header = perf_info.sample_header_size;
|
||||
sess->sizes.block_header = perf_info.block_header_size;
|
||||
|
||||
if (sess->sizes.sample_header != sizeof(struct drm_panthor_perf_sample_header))
|
||||
fprintf(stderr, "panfrost perf sample header size mismatch!");
|
||||
|
||||
if (sess->sizes.block_header != sizeof(struct drm_panthor_perf_block_header))
|
||||
fprintf(stderr, "panfrost perf block header size mismatch!");
|
||||
|
||||
sess->config.cshw_blocks = perf_info.cshw_blocks;
|
||||
sess->config.tiler_blocks = perf_info.tiler_blocks;
|
||||
sess->config.memsys_blocks = perf_info.memsys_blocks;
|
||||
sess->config.shader_blocks = perf_info.shader_blocks;
|
||||
|
||||
void *buf_map;
|
||||
ret = create_and_map_bo(dev->fd, sess->sizes.ringbuf, &sess->bos.ringbuf, &buf_map);
|
||||
if (ret)
|
||||
goto free_eventfd;
|
||||
|
||||
sess->ringbuffer = buf_map;
|
||||
sess->base.data = buf_map;
|
||||
sess->base.data_ts_supported = true;
|
||||
|
||||
void *control_map;
|
||||
ret = create_and_map_bo(dev->fd, sess->sizes.control, &sess->bos.control, &control_map);
|
||||
if (ret)
|
||||
goto free_ringbuf;
|
||||
|
||||
sess->ctrl = (struct drm_panthor_perf_ringbuf_control *)control_map;
|
||||
|
||||
sess->set = 0; /* TODO should we make it configurable? */
|
||||
sess->active = false;
|
||||
sess->session_initialized = false;
|
||||
|
||||
return &(sess->base);
|
||||
|
||||
free_ringbuf:
|
||||
unmap_and_teardown_bo(dev->fd, sess->bos.ringbuf, buf_map, sess->sizes.ringbuf);
|
||||
free_eventfd:
|
||||
close(sess->fds.event);
|
||||
free_perf:
|
||||
ralloc_free(sess);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
panthor_kmod_perf_enable(struct pan_kmod_perf_session *session)
|
||||
{
|
||||
UNUSED struct panthor_kmod_perf_session *psess =
|
||||
container_of(session, struct panthor_kmod_perf_session, base);
|
||||
|
||||
/* The session cannot be created outside of the sampling thread. */
|
||||
if (!psess->session_initialized) {
|
||||
int session_handle = perf_cmd_setup(psess->base.dev->fd, psess->fds.event, psess->bos.ringbuf,
|
||||
psess->bos.control, psess->set);
|
||||
|
||||
if (session_handle < 0)
|
||||
return -EINVAL;
|
||||
|
||||
psess->session_handle = session_handle;
|
||||
psess->session_initialized = true;
|
||||
}
|
||||
|
||||
int ret = perf_cmd_start(psess->base.dev->fd, psess->session_handle, psess->sample_idx++);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
psess->active = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
panthor_kmod_perf_disable(struct pan_kmod_perf_session *session)
|
||||
{
|
||||
UNUSED struct panthor_kmod_perf_session *sess =
|
||||
container_of(session, struct panthor_kmod_perf_session, base);
|
||||
|
||||
int ret = perf_cmd_stop(sess->base.dev->fd, sess->session_handle, sess->sample_idx++);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
sess->active = false;
|
||||
|
||||
ret = poll_for_sample(sess->fds.event);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
panthor_perf_sample(struct panthor_kmod_perf_session *perf)
|
||||
{
|
||||
const uint64_t insert_idx = read_insert_idx(perf);
|
||||
const uint64_t extract_idx = read_extract_idx(perf);
|
||||
|
||||
// If there's an outstanding sample, discard it
|
||||
if (insert_idx != extract_idx)
|
||||
write_extract_idx(perf, insert_idx);
|
||||
|
||||
// Otherwise, request a new sample which will increment the insert idx
|
||||
int ret = perf_cmd_sample(perf->base.dev->fd, perf->session_handle, perf->sample_idx++);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = poll_for_sample(perf->fds.event);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint8_t *get_base_addr(uint8_t *buf, size_t idx, size_t stride)
|
||||
{
|
||||
return buf + idx * stride;
|
||||
}
|
||||
|
||||
static inline struct panthor_perf_sample *perf_sample_idx(struct panthor_kmod_perf_session *perf, uint64_t idx)
|
||||
{
|
||||
return (struct panthor_perf_sample *)get_base_addr(perf->ringbuffer, idx, perf->sizes.sample);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
panthor_perf_get_sample_timestamp(struct panthor_kmod_perf_session *perf)
|
||||
{
|
||||
const uint64_t extract_idx = read_extract_idx(perf);
|
||||
const struct panthor_perf_sample *sample = perf_sample_idx(perf, extract_idx);
|
||||
|
||||
return sample->sample_header.timestamp_end_ns;
|
||||
}
|
||||
|
||||
static int
|
||||
panthor_kmod_perf_dump(struct pan_kmod_perf_session *session)
|
||||
{
|
||||
UNUSED struct panthor_kmod_perf_session *psess =
|
||||
container_of(session, struct panthor_kmod_perf_session, base);
|
||||
|
||||
int ret = panthor_perf_sample(psess);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Update data pointer to the correct spot in the ringbuffer. */
|
||||
session->data = perf_sample_idx(psess, read_extract_idx(psess));
|
||||
session->data_ts = panthor_perf_get_sample_timestamp(psess);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
panthor_kmod_perf_query_layout(const struct pan_kmod_perf_session *session,
|
||||
struct pan_kmod_perf_buffer_layout *layout)
|
||||
{
|
||||
UNUSED struct panthor_kmod_perf_session *psess =
|
||||
container_of(session, struct panthor_kmod_perf_session, base);
|
||||
|
||||
/* On all Valhall architectures this is 128. */
|
||||
const unsigned counters_per_cat = 128;
|
||||
layout->counters_per_category = counters_per_cat;
|
||||
|
||||
layout->block_stride = psess->sizes.block;
|
||||
layout->counter_stride = sizeof(uint64_t);
|
||||
|
||||
/* Setup the layout */
|
||||
layout->category[PAN_KMOD_PERF_CAT_FRONTEND].n_blocks = psess->config.cshw_blocks;
|
||||
layout->category[PAN_KMOD_PERF_CAT_TILER].n_blocks = psess->config.tiler_blocks;
|
||||
layout->category[PAN_KMOD_PERF_CAT_MEMSYS].n_blocks = psess->config.memsys_blocks;
|
||||
layout->category[PAN_KMOD_PERF_CAT_SHADER].n_blocks = psess->config.shader_blocks;
|
||||
|
||||
layout->category[0].offset =
|
||||
psess->sizes.sample_header + psess->sizes.block_header;
|
||||
for (unsigned cat_idx = 1; cat_idx < PAN_KMOD_PERF_CAT_COUNT; ++cat_idx) {
|
||||
layout->category[cat_idx].offset =
|
||||
layout->category[cat_idx - 1].offset +
|
||||
layout->category[cat_idx - 1].n_blocks * layout->block_stride;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
panthor_perf_stop(struct panthor_kmod_perf_session *perf)
|
||||
{
|
||||
int ret = perf_cmd_stop(perf->base.dev->fd, perf->session_handle, perf->sample_idx++);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
perf->active = false;
|
||||
|
||||
ret = poll_for_sample(perf->fds.event);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
panthor_kmod_perf_destroy(struct pan_kmod_perf_session *session)
|
||||
{
|
||||
UNUSED struct panthor_kmod_perf_session *psess =
|
||||
container_of(session, struct panthor_kmod_perf_session, base);
|
||||
|
||||
int ret;
|
||||
|
||||
if (psess->active) {
|
||||
ret = panthor_perf_stop(psess);
|
||||
assert(ret == 0);
|
||||
}
|
||||
|
||||
ret = perf_cmd_teardown(psess->base.dev->fd, psess->session_handle);
|
||||
assert(ret == 0);
|
||||
|
||||
ret = unmap_and_teardown_bo(psess->base.dev->fd, psess->bos.ringbuf, psess->ringbuffer, psess->sizes.ringbuf);
|
||||
assert(ret == 0);
|
||||
|
||||
ret = unmap_and_teardown_bo(psess->base.dev->fd, psess->bos.control, psess->ctrl, psess->sizes.control);
|
||||
assert(ret == 0);
|
||||
|
||||
close(psess->fds.event);
|
||||
ralloc_free(psess);
|
||||
|
||||
pan_kmod_dev_free(session->dev, session);
|
||||
|
||||
mesa_logd("perf session destroyed");
|
||||
}
|
||||
|
||||
const struct pan_kmod_ops panthor_kmod_ops = {
|
||||
.dev_create = panthor_kmod_dev_create,
|
||||
.dev_destroy = panthor_kmod_dev_destroy,
|
||||
|
|
@ -1319,4 +1826,10 @@ const struct pan_kmod_ops panthor_kmod_ops = {
|
|||
.vm_query_state = panthor_kmod_vm_query_state,
|
||||
.query_timestamp = panthor_kmod_query_timestamp,
|
||||
.bo_set_label = panthor_kmod_bo_label,
|
||||
.perf_create = panthor_kmod_perf_init,
|
||||
.perf_enable = panthor_kmod_perf_enable,
|
||||
.perf_disable = panthor_kmod_perf_disable,
|
||||
.perf_dump = panthor_kmod_perf_dump,
|
||||
.perf_query_layout = panthor_kmod_perf_query_layout,
|
||||
.perf_destroy = panthor_kmod_perf_destroy,
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue