diff --git a/include/drm-uapi/ethosu_accel.h b/include/drm-uapi/ethosu_accel.h index 135d6480e3a..dde6756642e 100644 --- a/include/drm-uapi/ethosu_accel.h +++ b/include/drm-uapi/ethosu_accel.h @@ -43,6 +43,11 @@ enum drm_ethosu_ioctl_id { /** @DRM_ETHOSU_SUBMIT: Submit a job and BOs to run. */ DRM_ETHOSU_SUBMIT, + + DRM_ETHOSU_PERFMON_CREATE, + DRM_ETHOSU_PERFMON_DESTROY, + DRM_ETHOSU_PERFMON_GET_VALUES, + DRM_ETHOSU_PERFMON_SET_GLOBAL, }; /** @@ -79,7 +84,9 @@ struct drm_ethosu_npu_info { __u32 config; __u32 sram_size; + __u32 pmu_counters; }; + /** * struct drm_ethosu_dev_query - Arguments passed to DRM_ETHOSU_IOCTL_DEV_QUERY */ @@ -171,7 +178,6 @@ struct drm_ethosu_bo_wait { __s64 timeout_ns; /* absolute */ }; - struct drm_ethosu_cmdstream_bo_create { /* Size of the data argument. */ __u32 size; @@ -220,10 +226,52 @@ struct drm_ethosu_submit { /** Input: Number of jobs passed in. */ __u32 job_count; - /** Reserved, must be zero. */ - __u32 pad; + /** Input: Id returned by DRM_ETHOSU_PERFMON_CREATE */ + __u32 perfmon_id; }; +#define DRM_ETHOSU_MAX_PERF_EVENT_COUNTERS 8 +#define DRM_ETHOSU_MAX_PERF_COUNTERS \ + (DRM_ETHOSU_MAX_PERF_EVENT_COUNTERS + 1) + +struct drm_ethosu_perfmon_create { + __u32 id; + __u32 ncounters; + __u16 counters[DRM_ETHOSU_MAX_PERF_EVENT_COUNTERS]; +}; + +struct drm_ethosu_perfmon_destroy { + __u32 id; +}; + +/* + * Returns the values of the performance counters tracked by this + * perfmon (as an array of (ncounters + 1) u64 values). + * + * No implicit synchronization is performed, so the user has to + * guarantee that any jobs using this perfmon have already been + * completed. + */ +struct drm_ethosu_perfmon_get_values { + __u32 id; + __u32 pad; + __u64 values_ptr; +}; + +#define DRM_ETHOSU_PERFMON_CLEAR_GLOBAL 0x0001 + +/** + * struct drm_ethosu_perfmon_set_global - ioctl to define a global performance + * monitor + * + * The global performance monitor will be used for all jobs. If a global + * performance monitor is defined, jobs with a self-defined performance + * monitor won't be allowed. + */ +struct drm_ethosu_perfmon_set_global { + __u32 flags; + __u32 id; +}; /** * DRM_IOCTL_ETHOSU() - Build a ethosu IOCTL number @@ -253,6 +301,14 @@ enum { DRM_IOCTL_ETHOSU(WR, CMDSTREAM_BO_CREATE, cmdstream_bo_create), DRM_IOCTL_ETHOSU_SUBMIT = DRM_IOCTL_ETHOSU(WR, SUBMIT, submit), + DRM_IOCTL_ETHOSU_PERFMON_CREATE = + DRM_IOCTL_ETHOSU(WR, PERFMON_CREATE, perfmon_create), + DRM_IOCTL_ETHOSU_PERFMON_DESTROY = + DRM_IOCTL_ETHOSU(WR, PERFMON_DESTROY, perfmon_destroy), + DRM_IOCTL_ETHOSU_PERFMON_GET_VALUES = + DRM_IOCTL_ETHOSU(WR, PERFMON_GET_VALUES, perfmon_get_values), + DRM_IOCTL_ETHOSU_PERFMON_SET_GLOBAL = + DRM_IOCTL_ETHOSU(WR, PERFMON_SET_GLOBAL, perfmon_set_global), }; #if defined(__cplusplus) diff --git a/src/gallium/drivers/ethosu/ethosu_device.c b/src/gallium/drivers/ethosu/ethosu_device.c index 392717f85a3..5d012e11691 100644 --- a/src/gallium/drivers/ethosu/ethosu_device.c +++ b/src/gallium/drivers/ethosu/ethosu_device.c @@ -20,6 +20,7 @@ static const struct debug_named_value ethosu_debug_options[] = { {"zero_bos", ETHOSU_DBG_ZERO, "Zero buffers for debugging"}, {"disable_nhcwb16", ETHOSU_DBG_DISABLE_NHCWB16, "Disable NHCWB16"}, {"disable_sram", ETHOSU_DBG_DISABLE_SRAM, "Disable SRAM"}, + {"dump_perf", ETHOSU_DBG_DUMP_PERF, "Dump performance counters for each submit"}, DEBUG_NAMED_VALUE_END}; DEBUG_GET_ONCE_FLAGS_OPTION(ethosu_debug, "ETHOSU_DEBUG", ethosu_debug_options, 0) @@ -240,4 +241,4 @@ ethosu_screen_create(int fd, screen->resource_destroy = ethosu_resource_destroy; return screen; -} \ No newline at end of file +} diff --git a/src/gallium/drivers/ethosu/ethosu_device.h b/src/gallium/drivers/ethosu/ethosu_device.h index b121661baad..957e12c559c 100644 --- a/src/gallium/drivers/ethosu/ethosu_device.h +++ b/src/gallium/drivers/ethosu/ethosu_device.h @@ -20,6 +20,7 @@ enum ethosu_dbg { ETHOSU_DBG_ZERO = BITFIELD_BIT(2), ETHOSU_DBG_DISABLE_NHCWB16 = BITFIELD_BIT(3), ETHOSU_DBG_DISABLE_SRAM = BITFIELD_BIT(4), + ETHOSU_DBG_DUMP_PERF = BITFIELD_BIT(6), }; extern int ethosu_debug; diff --git a/src/gallium/drivers/ethosu/ethosu_ml.c b/src/gallium/drivers/ethosu/ethosu_ml.c index 8230bdc7661..d6f1584b146 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.c +++ b/src/gallium/drivers/ethosu/ethosu_ml.c @@ -242,6 +242,20 @@ ethosu_ml_subgraph_create(struct pipe_context *pcontext, } } + subgraph->perfmon_id = 0; + if (DBG_ENABLED(ETHOSU_DBG_DUMP_PERF)) { + + struct drm_ethosu_perfmon_create perfmon_create = { + .counters = { 32, 35 }, /* npu-idle, npu-active */ + .ncounters = 2, + }; + ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_PERFMON_CREATE, &perfmon_create); + if (ret == 0) + subgraph->perfmon_id = perfmon_create.id; + else + DBG("Could not create perfmon - %d\n", ret); + } + subgraph->io_rsrc = pipe_buffer_create(pscreen, 0, PIPE_USAGE_DEFAULT, subgraph->io_used); return &subgraph->base; @@ -292,6 +306,7 @@ ethosu_ml_subgraph_invoke(struct pipe_context *pcontext, submit.jobs = (uintptr_t)&job; submit.job_count = 1; + submit.perfmon_id = subgraph->perfmon_id; if (DBG_ENABLED(ETHOSU_DBG_MSGS)) clock_gettime(CLOCK_MONOTONIC_RAW, &start); @@ -338,6 +353,20 @@ ethosu_ml_subgraph_read_outputs(struct pipe_context *pcontext, pipe_buffer_read(pcontext, subgraph->io_rsrc, output->offset, output->size, outputs[i]); } + + if (DBG_ENABLED(ETHOSU_DBG_DUMP_PERF)) { + struct ethosu_screen *screen = ethosu_screen(pcontext->screen); + uint64_t values[9]; + struct drm_ethosu_perfmon_get_values get_values = { + .id = subgraph->perfmon_id, + .values_ptr = (uintptr_t)values, + }; + int ret; + + ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_PERFMON_GET_VALUES, &get_values); + assert(ret == 0); + mesa_logi("PMU: cycles=%lu, npu-active=%lu, npu-idle=%lu\n", values[2], values[1], values[0]); + } } void @@ -356,6 +385,13 @@ ethosu_ml_subgraph_destroy(struct pipe_context *pcontext, ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg); assert(ret >= 0); + if (subgraph->perfmon_id) { + struct drm_ethosu_perfmon_destroy destroy = { + .id = subgraph->perfmon_id, + }; + drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_PERFMON_DESTROY, &destroy); + } + util_dynarray_fini(&subgraph->operations); util_dynarray_fini(&subgraph->tensors); diff --git a/src/gallium/drivers/ethosu/ethosu_ml.h b/src/gallium/drivers/ethosu/ethosu_ml.h index 9dc9bbe9869..9e94e8a5680 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.h +++ b/src/gallium/drivers/ethosu/ethosu_ml.h @@ -180,6 +180,7 @@ struct ethosu_subgraph { uint32_t *cmdstream; uint32_t *cursor; uint32_t cmdstream_bo; + uint32_t perfmon_id; struct pipe_resource *io_rsrc; unsigned io_used;