mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-21 23:18:24 +02:00
ethosu: Add performance counter debug output
Add simple performance counter support as debug output. This is enough to measure NPU cycles for networks. Signed-off-by: Rob Herring (Arm) <robh@kernel.org> Reviewed-by: Tomeu Vizoso <tomeu@tomeuvizoso.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40269>
This commit is contained in:
parent
83d0646d79
commit
0972ef7d33
5 changed files with 108 additions and 5 deletions
|
|
@ -43,6 +43,11 @@ enum drm_ethosu_ioctl_id {
|
||||||
|
|
||||||
/** @DRM_ETHOSU_SUBMIT: Submit a job and BOs to run. */
|
/** @DRM_ETHOSU_SUBMIT: Submit a job and BOs to run. */
|
||||||
DRM_ETHOSU_SUBMIT,
|
DRM_ETHOSU_SUBMIT,
|
||||||
|
|
||||||
|
DRM_ETHOSU_PERFMON_CREATE,
|
||||||
|
DRM_ETHOSU_PERFMON_DESTROY,
|
||||||
|
DRM_ETHOSU_PERFMON_GET_VALUES,
|
||||||
|
DRM_ETHOSU_PERFMON_SET_GLOBAL,
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -79,7 +84,9 @@ struct drm_ethosu_npu_info {
|
||||||
__u32 config;
|
__u32 config;
|
||||||
|
|
||||||
__u32 sram_size;
|
__u32 sram_size;
|
||||||
|
__u32 pmu_counters;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct drm_ethosu_dev_query - Arguments passed to DRM_ETHOSU_IOCTL_DEV_QUERY
|
* struct drm_ethosu_dev_query - Arguments passed to DRM_ETHOSU_IOCTL_DEV_QUERY
|
||||||
*/
|
*/
|
||||||
|
|
@ -171,7 +178,6 @@ struct drm_ethosu_bo_wait {
|
||||||
__s64 timeout_ns; /* absolute */
|
__s64 timeout_ns; /* absolute */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct drm_ethosu_cmdstream_bo_create {
|
struct drm_ethosu_cmdstream_bo_create {
|
||||||
/* Size of the data argument. */
|
/* Size of the data argument. */
|
||||||
__u32 size;
|
__u32 size;
|
||||||
|
|
@ -220,10 +226,52 @@ struct drm_ethosu_submit {
|
||||||
/** Input: Number of jobs passed in. */
|
/** Input: Number of jobs passed in. */
|
||||||
__u32 job_count;
|
__u32 job_count;
|
||||||
|
|
||||||
/** Reserved, must be zero. */
|
/** Input: Id returned by DRM_ETHOSU_PERFMON_CREATE */
|
||||||
__u32 pad;
|
__u32 perfmon_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define DRM_ETHOSU_MAX_PERF_EVENT_COUNTERS 8
|
||||||
|
#define DRM_ETHOSU_MAX_PERF_COUNTERS \
|
||||||
|
(DRM_ETHOSU_MAX_PERF_EVENT_COUNTERS + 1)
|
||||||
|
|
||||||
|
struct drm_ethosu_perfmon_create {
|
||||||
|
__u32 id;
|
||||||
|
__u32 ncounters;
|
||||||
|
__u16 counters[DRM_ETHOSU_MAX_PERF_EVENT_COUNTERS];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct drm_ethosu_perfmon_destroy {
|
||||||
|
__u32 id;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns the values of the performance counters tracked by this
|
||||||
|
* perfmon (as an array of (ncounters + 1) u64 values).
|
||||||
|
*
|
||||||
|
* No implicit synchronization is performed, so the user has to
|
||||||
|
* guarantee that any jobs using this perfmon have already been
|
||||||
|
* completed.
|
||||||
|
*/
|
||||||
|
struct drm_ethosu_perfmon_get_values {
|
||||||
|
__u32 id;
|
||||||
|
__u32 pad;
|
||||||
|
__u64 values_ptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define DRM_ETHOSU_PERFMON_CLEAR_GLOBAL 0x0001
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct drm_ethosu_perfmon_set_global - ioctl to define a global performance
|
||||||
|
* monitor
|
||||||
|
*
|
||||||
|
* The global performance monitor will be used for all jobs. If a global
|
||||||
|
* performance monitor is defined, jobs with a self-defined performance
|
||||||
|
* monitor won't be allowed.
|
||||||
|
*/
|
||||||
|
struct drm_ethosu_perfmon_set_global {
|
||||||
|
__u32 flags;
|
||||||
|
__u32 id;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* DRM_IOCTL_ETHOSU() - Build a ethosu IOCTL number
|
* DRM_IOCTL_ETHOSU() - Build a ethosu IOCTL number
|
||||||
|
|
@ -253,6 +301,14 @@ enum {
|
||||||
DRM_IOCTL_ETHOSU(WR, CMDSTREAM_BO_CREATE, cmdstream_bo_create),
|
DRM_IOCTL_ETHOSU(WR, CMDSTREAM_BO_CREATE, cmdstream_bo_create),
|
||||||
DRM_IOCTL_ETHOSU_SUBMIT =
|
DRM_IOCTL_ETHOSU_SUBMIT =
|
||||||
DRM_IOCTL_ETHOSU(WR, SUBMIT, submit),
|
DRM_IOCTL_ETHOSU(WR, SUBMIT, submit),
|
||||||
|
DRM_IOCTL_ETHOSU_PERFMON_CREATE =
|
||||||
|
DRM_IOCTL_ETHOSU(WR, PERFMON_CREATE, perfmon_create),
|
||||||
|
DRM_IOCTL_ETHOSU_PERFMON_DESTROY =
|
||||||
|
DRM_IOCTL_ETHOSU(WR, PERFMON_DESTROY, perfmon_destroy),
|
||||||
|
DRM_IOCTL_ETHOSU_PERFMON_GET_VALUES =
|
||||||
|
DRM_IOCTL_ETHOSU(WR, PERFMON_GET_VALUES, perfmon_get_values),
|
||||||
|
DRM_IOCTL_ETHOSU_PERFMON_SET_GLOBAL =
|
||||||
|
DRM_IOCTL_ETHOSU(WR, PERFMON_SET_GLOBAL, perfmon_set_global),
|
||||||
};
|
};
|
||||||
|
|
||||||
#if defined(__cplusplus)
|
#if defined(__cplusplus)
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,7 @@ static const struct debug_named_value ethosu_debug_options[] = {
|
||||||
{"disable_nhcwb16", ETHOSU_DBG_DISABLE_NHCWB16, "Disable NHCWB16"},
|
{"disable_nhcwb16", ETHOSU_DBG_DISABLE_NHCWB16, "Disable NHCWB16"},
|
||||||
{"disable_sram", ETHOSU_DBG_DISABLE_SRAM, "Disable SRAM"},
|
{"disable_sram", ETHOSU_DBG_DISABLE_SRAM, "Disable SRAM"},
|
||||||
{"force_u85", ETHOSU_DBG_FORCE_U85, "Force U85 behavior even on U65 hardware"},
|
{"force_u85", ETHOSU_DBG_FORCE_U85, "Force U85 behavior even on U65 hardware"},
|
||||||
|
{"dump_perf", ETHOSU_DBG_DUMP_PERF, "Dump performance counters for each submit"},
|
||||||
DEBUG_NAMED_VALUE_END};
|
DEBUG_NAMED_VALUE_END};
|
||||||
|
|
||||||
DEBUG_GET_ONCE_FLAGS_OPTION(ethosu_debug, "ETHOSU_DEBUG", ethosu_debug_options, 0)
|
DEBUG_GET_ONCE_FLAGS_OPTION(ethosu_debug, "ETHOSU_DEBUG", ethosu_debug_options, 0)
|
||||||
|
|
@ -336,4 +337,4 @@ ethosu_ml_device_create(const char *spec)
|
||||||
set_device_callbacks(device);
|
set_device_callbacks(device);
|
||||||
|
|
||||||
return &device->base;
|
return &device->base;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ enum ethosu_dbg {
|
||||||
ETHOSU_DBG_DISABLE_NHCWB16 = BITFIELD_BIT(3),
|
ETHOSU_DBG_DISABLE_NHCWB16 = BITFIELD_BIT(3),
|
||||||
ETHOSU_DBG_DISABLE_SRAM = BITFIELD_BIT(4),
|
ETHOSU_DBG_DISABLE_SRAM = BITFIELD_BIT(4),
|
||||||
ETHOSU_DBG_FORCE_U85 = BITFIELD_BIT(5),
|
ETHOSU_DBG_FORCE_U85 = BITFIELD_BIT(5),
|
||||||
|
ETHOSU_DBG_DUMP_PERF = BITFIELD_BIT(6),
|
||||||
};
|
};
|
||||||
|
|
||||||
extern int ethosu_debug;
|
extern int ethosu_debug;
|
||||||
|
|
|
||||||
|
|
@ -320,6 +320,23 @@ prepare_for_submission(struct ethosu_subgraph *subgraph,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
subgraph->perfmon_id = 0;
|
||||||
|
if (DBG_ENABLED(ETHOSU_DBG_DUMP_PERF)) {
|
||||||
|
|
||||||
|
struct drm_ethosu_perfmon_create perfmon_create = {
|
||||||
|
.counters = { 32, 35 }, /* npu-idle, npu-active */
|
||||||
|
.ncounters = 2,
|
||||||
|
};
|
||||||
|
ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_PERFMON_CREATE, &perfmon_create);
|
||||||
|
DBG("Perfmon create returned %d\n", ret);
|
||||||
|
if (ret == 0) {
|
||||||
|
subgraph->perfmon_id = perfmon_create.id;
|
||||||
|
} else {
|
||||||
|
DBG("Could not create perfmon: ret=%d errno=%d (%s)\n",
|
||||||
|
ret, errno, strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
DBG("subgraph->io_used %d\n", subgraph->io_used);
|
DBG("subgraph->io_used %d\n", subgraph->io_used);
|
||||||
subgraph->io_rsrc = pipe_buffer_create(pcontext->screen, 0,
|
subgraph->io_rsrc = pipe_buffer_create(pcontext->screen, 0,
|
||||||
PIPE_USAGE_DEFAULT,
|
PIPE_USAGE_DEFAULT,
|
||||||
|
|
@ -447,6 +464,7 @@ ethosu_ml_subgraph_invoke(struct pipe_context *pcontext,
|
||||||
|
|
||||||
submit.jobs = (uintptr_t)&job;
|
submit.jobs = (uintptr_t)&job;
|
||||||
submit.job_count = 1;
|
submit.job_count = 1;
|
||||||
|
submit.perfmon_id = subgraph->perfmon_id;
|
||||||
|
|
||||||
if (DBG_ENABLED(ETHOSU_DBG_MSGS))
|
if (DBG_ENABLED(ETHOSU_DBG_MSGS))
|
||||||
clock_gettime(CLOCK_MONOTONIC_RAW, &start);
|
clock_gettime(CLOCK_MONOTONIC_RAW, &start);
|
||||||
|
|
@ -493,6 +511,25 @@ ethosu_ml_subgraph_read_outputs(struct pipe_context *pcontext,
|
||||||
|
|
||||||
pipe_buffer_read(pcontext, subgraph->io_rsrc, output->offset, output->size, outputs[i]);
|
pipe_buffer_read(pcontext, subgraph->io_rsrc, output->offset, output->size, outputs[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (DBG_ENABLED(ETHOSU_DBG_DUMP_PERF)) {
|
||||||
|
struct ethosu_screen *screen = ethosu_screen(pcontext->screen);
|
||||||
|
uint64_t values[9];
|
||||||
|
struct drm_ethosu_perfmon_get_values get_values = {
|
||||||
|
.id = subgraph->perfmon_id,
|
||||||
|
.values_ptr = (uintptr_t)values,
|
||||||
|
};
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_PERFMON_GET_VALUES, &get_values);
|
||||||
|
if (ret == 0) {
|
||||||
|
mesa_logi("PMU: cycles=%lu, npu-active=%lu, npu-idle=%lu\n",
|
||||||
|
values[2], values[1], values[0]);
|
||||||
|
} else {
|
||||||
|
DBG("Could not read perfmon values: ret=%d errno=%d (%s)\n",
|
||||||
|
ret, errno, strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
@ -500,10 +537,10 @@ ethosu_ml_subgraph_destroy(struct pipe_ml_device *pdevice,
|
||||||
struct pipe_ml_subgraph *psubgraph)
|
struct pipe_ml_subgraph *psubgraph)
|
||||||
{
|
{
|
||||||
struct ethosu_subgraph *subgraph = (struct ethosu_subgraph *)(psubgraph);
|
struct ethosu_subgraph *subgraph = (struct ethosu_subgraph *)(psubgraph);
|
||||||
|
struct ethosu_screen *screen = subgraph->screen;
|
||||||
|
|
||||||
if (subgraph->io_rsrc) {
|
if (subgraph->io_rsrc) {
|
||||||
/* Post-submission state: cleanup DRM resources */
|
/* Post-submission state: cleanup DRM resources */
|
||||||
struct ethosu_screen *screen = subgraph->screen;
|
|
||||||
struct drm_gem_close arg = {0};
|
struct drm_gem_close arg = {0};
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
|
@ -521,6 +558,13 @@ ethosu_ml_subgraph_destroy(struct pipe_ml_device *pdevice,
|
||||||
free(subgraph->coefs);
|
free(subgraph->coefs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (DBG_ENABLED(ETHOSU_DBG_DUMP_PERF)) {
|
||||||
|
struct drm_ethosu_perfmon_destroy destroy = {
|
||||||
|
.id = subgraph->perfmon_id,
|
||||||
|
};
|
||||||
|
drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_PERFMON_DESTROY, &destroy);
|
||||||
|
}
|
||||||
|
|
||||||
util_dynarray_fini(&subgraph->tensors);
|
util_dynarray_fini(&subgraph->tensors);
|
||||||
|
|
||||||
free(subgraph);
|
free(subgraph);
|
||||||
|
|
|
||||||
|
|
@ -257,6 +257,7 @@ struct ethosu_subgraph {
|
||||||
uint32_t *cmdstream;
|
uint32_t *cmdstream;
|
||||||
uint32_t *cursor;
|
uint32_t *cursor;
|
||||||
uint32_t cmdstream_bo;
|
uint32_t cmdstream_bo;
|
||||||
|
uint32_t perfmon_id;
|
||||||
|
|
||||||
struct pipe_resource *io_rsrc;
|
struct pipe_resource *io_rsrc;
|
||||||
unsigned io_used;
|
unsigned io_used;
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue