Merge branch 'ethos-pmu' into 'main'

ethosu: Add performance counter debug output

See merge request mesa/mesa!40269
This commit is contained in:
Rob Herring 2026-03-10 23:55:22 -05:00
commit 6f0d3eb6ea
5 changed files with 99 additions and 4 deletions

View file

@ -43,6 +43,11 @@ enum drm_ethosu_ioctl_id {
/** @DRM_ETHOSU_SUBMIT: Submit a job and BOs to run. */
DRM_ETHOSU_SUBMIT,
DRM_ETHOSU_PERFMON_CREATE,
DRM_ETHOSU_PERFMON_DESTROY,
DRM_ETHOSU_PERFMON_GET_VALUES,
DRM_ETHOSU_PERFMON_SET_GLOBAL,
};
/**
@ -79,7 +84,9 @@ struct drm_ethosu_npu_info {
__u32 config;
__u32 sram_size;
__u32 pmu_counters;
};
/**
* struct drm_ethosu_dev_query - Arguments passed to DRM_ETHOSU_IOCTL_DEV_QUERY
*/
@ -171,7 +178,6 @@ struct drm_ethosu_bo_wait {
__s64 timeout_ns; /* absolute */
};
struct drm_ethosu_cmdstream_bo_create {
/* Size of the data argument. */
__u32 size;
@ -220,10 +226,52 @@ struct drm_ethosu_submit {
/** Input: Number of jobs passed in. */
__u32 job_count;
/** Reserved, must be zero. */
__u32 pad;
/** Input: Id returned by DRM_ETHOSU_PERFMON_CREATE */
__u32 perfmon_id;
};
#define DRM_ETHOSU_MAX_PERF_EVENT_COUNTERS 8
#define DRM_ETHOSU_MAX_PERF_COUNTERS \
(DRM_ETHOSU_MAX_PERF_EVENT_COUNTERS + 1)
struct drm_ethosu_perfmon_create {
__u32 id;
__u32 ncounters;
__u16 counters[DRM_ETHOSU_MAX_PERF_EVENT_COUNTERS];
};
struct drm_ethosu_perfmon_destroy {
__u32 id;
};
/*
* Returns the values of the performance counters tracked by this
* perfmon (as an array of (ncounters + 1) u64 values).
*
* No implicit synchronization is performed, so the user has to
* guarantee that any jobs using this perfmon have already been
* completed.
*/
struct drm_ethosu_perfmon_get_values {
__u32 id;
__u32 pad;
__u64 values_ptr;
};
#define DRM_ETHOSU_PERFMON_CLEAR_GLOBAL 0x0001
/**
* struct drm_ethosu_perfmon_set_global - ioctl to define a global performance
* monitor
*
* The global performance monitor will be used for all jobs. If a global
* performance monitor is defined, jobs with a self-defined performance
* monitor won't be allowed.
*/
struct drm_ethosu_perfmon_set_global {
__u32 flags;
__u32 id;
};
/**
* DRM_IOCTL_ETHOSU() - Build a ethosu IOCTL number
@ -253,6 +301,14 @@ enum {
DRM_IOCTL_ETHOSU(WR, CMDSTREAM_BO_CREATE, cmdstream_bo_create),
DRM_IOCTL_ETHOSU_SUBMIT =
DRM_IOCTL_ETHOSU(WR, SUBMIT, submit),
DRM_IOCTL_ETHOSU_PERFMON_CREATE =
DRM_IOCTL_ETHOSU(WR, PERFMON_CREATE, perfmon_create),
DRM_IOCTL_ETHOSU_PERFMON_DESTROY =
DRM_IOCTL_ETHOSU(WR, PERFMON_DESTROY, perfmon_destroy),
DRM_IOCTL_ETHOSU_PERFMON_GET_VALUES =
DRM_IOCTL_ETHOSU(WR, PERFMON_GET_VALUES, perfmon_get_values),
DRM_IOCTL_ETHOSU_PERFMON_SET_GLOBAL =
DRM_IOCTL_ETHOSU(WR, PERFMON_SET_GLOBAL, perfmon_set_global),
};
#if defined(__cplusplus)

View file

@ -20,6 +20,7 @@ static const struct debug_named_value ethosu_debug_options[] = {
{"zero_bos", ETHOSU_DBG_ZERO, "Zero buffers for debugging"},
{"disable_nhcwb16", ETHOSU_DBG_DISABLE_NHCWB16, "Disable NHCWB16"},
{"disable_sram", ETHOSU_DBG_DISABLE_SRAM, "Disable SRAM"},
{"dump_perf", ETHOSU_DBG_DUMP_PERF, "Dump performance counters for each submit"},
DEBUG_NAMED_VALUE_END};
DEBUG_GET_ONCE_FLAGS_OPTION(ethosu_debug, "ETHOSU_DEBUG", ethosu_debug_options, 0)
@ -240,4 +241,4 @@ ethosu_screen_create(int fd,
screen->resource_destroy = ethosu_resource_destroy;
return screen;
}
}

View file

@ -20,6 +20,7 @@ enum ethosu_dbg {
ETHOSU_DBG_ZERO = BITFIELD_BIT(2),
ETHOSU_DBG_DISABLE_NHCWB16 = BITFIELD_BIT(3),
ETHOSU_DBG_DISABLE_SRAM = BITFIELD_BIT(4),
ETHOSU_DBG_DUMP_PERF = BITFIELD_BIT(6),
};
extern int ethosu_debug;

View file

@ -242,6 +242,20 @@ ethosu_ml_subgraph_create(struct pipe_context *pcontext,
}
}
subgraph->perfmon_id = 0;
if (DBG_ENABLED(ETHOSU_DBG_DUMP_PERF)) {
struct drm_ethosu_perfmon_create perfmon_create = {
.counters = { 32, 35 }, /* npu-idle, npu-active */
.ncounters = 2,
};
ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_PERFMON_CREATE, &perfmon_create);
if (ret == 0)
subgraph->perfmon_id = perfmon_create.id;
else
DBG("Could not create perfmon - %d\n", ret);
}
subgraph->io_rsrc = pipe_buffer_create(pscreen, 0, PIPE_USAGE_DEFAULT, subgraph->io_used);
return &subgraph->base;
@ -292,6 +306,7 @@ ethosu_ml_subgraph_invoke(struct pipe_context *pcontext,
submit.jobs = (uintptr_t)&job;
submit.job_count = 1;
submit.perfmon_id = subgraph->perfmon_id;
if (DBG_ENABLED(ETHOSU_DBG_MSGS))
clock_gettime(CLOCK_MONOTONIC_RAW, &start);
@ -338,6 +353,20 @@ ethosu_ml_subgraph_read_outputs(struct pipe_context *pcontext,
pipe_buffer_read(pcontext, subgraph->io_rsrc, output->offset, output->size, outputs[i]);
}
if (DBG_ENABLED(ETHOSU_DBG_DUMP_PERF)) {
struct ethosu_screen *screen = ethosu_screen(pcontext->screen);
uint64_t values[9];
struct drm_ethosu_perfmon_get_values get_values = {
.id = subgraph->perfmon_id,
.values_ptr = (uintptr_t)values,
};
int ret;
ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_PERFMON_GET_VALUES, &get_values);
assert(ret == 0);
mesa_logi("PMU: cycles=%lu, npu-active=%lu, npu-idle=%lu\n", values[2], values[1], values[0]);
}
}
void
@ -356,6 +385,13 @@ ethosu_ml_subgraph_destroy(struct pipe_context *pcontext,
ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg);
assert(ret >= 0);
if (subgraph->perfmon_id) {
struct drm_ethosu_perfmon_destroy destroy = {
.id = subgraph->perfmon_id,
};
drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_PERFMON_DESTROY, &destroy);
}
util_dynarray_fini(&subgraph->operations);
util_dynarray_fini(&subgraph->tensors);

View file

@ -180,6 +180,7 @@ struct ethosu_subgraph {
uint32_t *cmdstream;
uint32_t *cursor;
uint32_t cmdstream_bo;
uint32_t perfmon_id;
struct pipe_resource *io_rsrc;
unsigned io_used;