mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-12 04:40:42 +01:00
Merge branch 'ethos-pmu' into 'main'
ethosu: Add performance counter debug output See merge request mesa/mesa!40269
This commit is contained in:
commit
6f0d3eb6ea
5 changed files with 99 additions and 4 deletions
|
|
@ -43,6 +43,11 @@ enum drm_ethosu_ioctl_id {
|
|||
|
||||
/** @DRM_ETHOSU_SUBMIT: Submit a job and BOs to run. */
|
||||
DRM_ETHOSU_SUBMIT,
|
||||
|
||||
DRM_ETHOSU_PERFMON_CREATE,
|
||||
DRM_ETHOSU_PERFMON_DESTROY,
|
||||
DRM_ETHOSU_PERFMON_GET_VALUES,
|
||||
DRM_ETHOSU_PERFMON_SET_GLOBAL,
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -79,7 +84,9 @@ struct drm_ethosu_npu_info {
|
|||
__u32 config;
|
||||
|
||||
__u32 sram_size;
|
||||
__u32 pmu_counters;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_ethosu_dev_query - Arguments passed to DRM_ETHOSU_IOCTL_DEV_QUERY
|
||||
*/
|
||||
|
|
@ -171,7 +178,6 @@ struct drm_ethosu_bo_wait {
|
|||
__s64 timeout_ns; /* absolute */
|
||||
};
|
||||
|
||||
|
||||
struct drm_ethosu_cmdstream_bo_create {
|
||||
/* Size of the data argument. */
|
||||
__u32 size;
|
||||
|
|
@ -220,10 +226,52 @@ struct drm_ethosu_submit {
|
|||
/** Input: Number of jobs passed in. */
|
||||
__u32 job_count;
|
||||
|
||||
/** Reserved, must be zero. */
|
||||
__u32 pad;
|
||||
/** Input: Id returned by DRM_ETHOSU_PERFMON_CREATE */
|
||||
__u32 perfmon_id;
|
||||
};
|
||||
|
||||
#define DRM_ETHOSU_MAX_PERF_EVENT_COUNTERS 8
|
||||
#define DRM_ETHOSU_MAX_PERF_COUNTERS \
|
||||
(DRM_ETHOSU_MAX_PERF_EVENT_COUNTERS + 1)
|
||||
|
||||
struct drm_ethosu_perfmon_create {
|
||||
__u32 id;
|
||||
__u32 ncounters;
|
||||
__u16 counters[DRM_ETHOSU_MAX_PERF_EVENT_COUNTERS];
|
||||
};
|
||||
|
||||
struct drm_ethosu_perfmon_destroy {
|
||||
__u32 id;
|
||||
};
|
||||
|
||||
/*
|
||||
* Returns the values of the performance counters tracked by this
|
||||
* perfmon (as an array of (ncounters + 1) u64 values).
|
||||
*
|
||||
* No implicit synchronization is performed, so the user has to
|
||||
* guarantee that any jobs using this perfmon have already been
|
||||
* completed.
|
||||
*/
|
||||
struct drm_ethosu_perfmon_get_values {
|
||||
__u32 id;
|
||||
__u32 pad;
|
||||
__u64 values_ptr;
|
||||
};
|
||||
|
||||
#define DRM_ETHOSU_PERFMON_CLEAR_GLOBAL 0x0001
|
||||
|
||||
/**
|
||||
* struct drm_ethosu_perfmon_set_global - ioctl to define a global performance
|
||||
* monitor
|
||||
*
|
||||
* The global performance monitor will be used for all jobs. If a global
|
||||
* performance monitor is defined, jobs with a self-defined performance
|
||||
* monitor won't be allowed.
|
||||
*/
|
||||
struct drm_ethosu_perfmon_set_global {
|
||||
__u32 flags;
|
||||
__u32 id;
|
||||
};
|
||||
|
||||
/**
|
||||
* DRM_IOCTL_ETHOSU() - Build a ethosu IOCTL number
|
||||
|
|
@ -253,6 +301,14 @@ enum {
|
|||
DRM_IOCTL_ETHOSU(WR, CMDSTREAM_BO_CREATE, cmdstream_bo_create),
|
||||
DRM_IOCTL_ETHOSU_SUBMIT =
|
||||
DRM_IOCTL_ETHOSU(WR, SUBMIT, submit),
|
||||
DRM_IOCTL_ETHOSU_PERFMON_CREATE =
|
||||
DRM_IOCTL_ETHOSU(WR, PERFMON_CREATE, perfmon_create),
|
||||
DRM_IOCTL_ETHOSU_PERFMON_DESTROY =
|
||||
DRM_IOCTL_ETHOSU(WR, PERFMON_DESTROY, perfmon_destroy),
|
||||
DRM_IOCTL_ETHOSU_PERFMON_GET_VALUES =
|
||||
DRM_IOCTL_ETHOSU(WR, PERFMON_GET_VALUES, perfmon_get_values),
|
||||
DRM_IOCTL_ETHOSU_PERFMON_SET_GLOBAL =
|
||||
DRM_IOCTL_ETHOSU(WR, PERFMON_SET_GLOBAL, perfmon_set_global),
|
||||
};
|
||||
|
||||
#if defined(__cplusplus)
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ static const struct debug_named_value ethosu_debug_options[] = {
|
|||
{"zero_bos", ETHOSU_DBG_ZERO, "Zero buffers for debugging"},
|
||||
{"disable_nhcwb16", ETHOSU_DBG_DISABLE_NHCWB16, "Disable NHCWB16"},
|
||||
{"disable_sram", ETHOSU_DBG_DISABLE_SRAM, "Disable SRAM"},
|
||||
{"dump_perf", ETHOSU_DBG_DUMP_PERF, "Dump performance counters for each submit"},
|
||||
DEBUG_NAMED_VALUE_END};
|
||||
|
||||
DEBUG_GET_ONCE_FLAGS_OPTION(ethosu_debug, "ETHOSU_DEBUG", ethosu_debug_options, 0)
|
||||
|
|
@ -240,4 +241,4 @@ ethosu_screen_create(int fd,
|
|||
screen->resource_destroy = ethosu_resource_destroy;
|
||||
|
||||
return screen;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ enum ethosu_dbg {
|
|||
ETHOSU_DBG_ZERO = BITFIELD_BIT(2),
|
||||
ETHOSU_DBG_DISABLE_NHCWB16 = BITFIELD_BIT(3),
|
||||
ETHOSU_DBG_DISABLE_SRAM = BITFIELD_BIT(4),
|
||||
ETHOSU_DBG_DUMP_PERF = BITFIELD_BIT(6),
|
||||
};
|
||||
|
||||
extern int ethosu_debug;
|
||||
|
|
|
|||
|
|
@ -242,6 +242,20 @@ ethosu_ml_subgraph_create(struct pipe_context *pcontext,
|
|||
}
|
||||
}
|
||||
|
||||
subgraph->perfmon_id = 0;
|
||||
if (DBG_ENABLED(ETHOSU_DBG_DUMP_PERF)) {
|
||||
|
||||
struct drm_ethosu_perfmon_create perfmon_create = {
|
||||
.counters = { 32, 35 }, /* npu-idle, npu-active */
|
||||
.ncounters = 2,
|
||||
};
|
||||
ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_PERFMON_CREATE, &perfmon_create);
|
||||
if (ret == 0)
|
||||
subgraph->perfmon_id = perfmon_create.id;
|
||||
else
|
||||
DBG("Could not create perfmon - %d\n", ret);
|
||||
}
|
||||
|
||||
subgraph->io_rsrc = pipe_buffer_create(pscreen, 0, PIPE_USAGE_DEFAULT, subgraph->io_used);
|
||||
|
||||
return &subgraph->base;
|
||||
|
|
@ -292,6 +306,7 @@ ethosu_ml_subgraph_invoke(struct pipe_context *pcontext,
|
|||
|
||||
submit.jobs = (uintptr_t)&job;
|
||||
submit.job_count = 1;
|
||||
submit.perfmon_id = subgraph->perfmon_id;
|
||||
|
||||
if (DBG_ENABLED(ETHOSU_DBG_MSGS))
|
||||
clock_gettime(CLOCK_MONOTONIC_RAW, &start);
|
||||
|
|
@ -338,6 +353,20 @@ ethosu_ml_subgraph_read_outputs(struct pipe_context *pcontext,
|
|||
|
||||
pipe_buffer_read(pcontext, subgraph->io_rsrc, output->offset, output->size, outputs[i]);
|
||||
}
|
||||
|
||||
if (DBG_ENABLED(ETHOSU_DBG_DUMP_PERF)) {
|
||||
struct ethosu_screen *screen = ethosu_screen(pcontext->screen);
|
||||
uint64_t values[9];
|
||||
struct drm_ethosu_perfmon_get_values get_values = {
|
||||
.id = subgraph->perfmon_id,
|
||||
.values_ptr = (uintptr_t)values,
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_PERFMON_GET_VALUES, &get_values);
|
||||
assert(ret == 0);
|
||||
mesa_logi("PMU: cycles=%lu, npu-active=%lu, npu-idle=%lu\n", values[2], values[1], values[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -356,6 +385,13 @@ ethosu_ml_subgraph_destroy(struct pipe_context *pcontext,
|
|||
ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg);
|
||||
assert(ret >= 0);
|
||||
|
||||
if (subgraph->perfmon_id) {
|
||||
struct drm_ethosu_perfmon_destroy destroy = {
|
||||
.id = subgraph->perfmon_id,
|
||||
};
|
||||
drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_PERFMON_DESTROY, &destroy);
|
||||
}
|
||||
|
||||
util_dynarray_fini(&subgraph->operations);
|
||||
util_dynarray_fini(&subgraph->tensors);
|
||||
|
||||
|
|
|
|||
|
|
@ -180,6 +180,7 @@ struct ethosu_subgraph {
|
|||
uint32_t *cmdstream;
|
||||
uint32_t *cursor;
|
||||
uint32_t cmdstream_bo;
|
||||
uint32_t perfmon_id;
|
||||
|
||||
struct pipe_resource *io_rsrc;
|
||||
unsigned io_used;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue