From 3b68c5b4bcbb27cfb4acd14b021e4fd8406980b5 Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Wed, 25 Mar 2026 18:52:09 +0100 Subject: [PATCH] ethosu: move hardware description from ethosu_screen to ethosu_ml_device Move target-specific fields (is_u65, ifm_ublock, ofm_ublock, max_concurrent_blocks, sram_size) from ethosu_screen into ethosu_ml_device. This decouples the compilation phase from the DRM file descriptor and pipe_screen, allowing ahead-of-time compilation where the target NPU is not present on the compilation host. The ethosu_device_screen() helper is retained only for runtime paths that need the DRM fd (buffer allocation, job submission, destroy). Compilation code now accesses hardware parameters through ethosu_ml_device() cast of pipe_ml_device, which can be created either from a DRM-backed screen or standalone via ethosu_ml_device_create() with a target string like "65-256". Part-of: --- src/gallium/drivers/ethosu/ethosu_cmd.c | 42 ++++++++-------- src/gallium/drivers/ethosu/ethosu_coefs.c | 4 +- src/gallium/drivers/ethosu/ethosu_device.c | 50 +++++++++++++------- src/gallium/drivers/ethosu/ethosu_device.h | 26 ++++------ src/gallium/drivers/ethosu/ethosu_encode.cpp | 12 ++--- src/gallium/drivers/ethosu/ethosu_lower.c | 4 +- src/gallium/drivers/ethosu/ethosu_ml.c | 7 +-- src/gallium/drivers/ethosu/ethosu_ml.h | 2 + src/gallium/drivers/ethosu/ethosu_sched.c | 30 ++++++------ 9 files changed, 96 insertions(+), 81 deletions(-) diff --git a/src/gallium/drivers/ethosu/ethosu_cmd.c b/src/gallium/drivers/ethosu/ethosu_cmd.c index 41bef2f5dd5..ea7d955d101 100644 --- a/src/gallium/drivers/ethosu/ethosu_cmd.c +++ b/src/gallium/drivers/ethosu/ethosu_cmd.c @@ -192,7 +192,7 @@ emit_ifm_precision(struct ethosu_subgraph *subgraph, if (feature_map->is_signed) prec |= NPU_SET_IFM_PRECISION_ACTIVATION(1); // signed activation - if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) + if (ethosu_ml_device(subgraph->base.device)->is_u65) prec |= NPU_SET_IFM_PRECISION_SCALE_MODE(op_to_scale); EMIT0(precision_cmd, prec); @@ -222,13 +222,13 @@ emit_ofm(struct ethosu_subgraph *subgraph, struct ethosu_feature_map *feature_ma EMIT0(NPU_SET_OFM_HEIGHT_M1, feature_map->shape.height - 1); EMIT0(NPU_SET_OFM_WIDTH_M1, feature_map->shape.width - 1); - if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) + if (!ethosu_ml_device(subgraph->base.device)->is_u65) EMIT0(NPU_SET_OFM_DEPTH_M1, feature_map->shape.depth - 1); emit_tiles( subgraph, feature_map, NPU_SET_OFM_HEIGHT0_M1, NPU_SET_OFM_HEIGHT1_M1, NPU_SET_OFM_WIDTH0_M1); - if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) + if (ethosu_ml_device(subgraph->base.device)->is_u65) EMIT0(NPU_SET_OFM_DEPTH_M1, feature_map->shape.depth - 1); emit_strides(subgraph, feature_map, NPU_SET_OFM_STRIDE_C, NPU_SET_OFM_STRIDE_Y, NPU_SET_OFM_STRIDE_X); @@ -277,7 +277,7 @@ emit_kernel(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation static void emit_weights(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation) { - if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) + if (!ethosu_ml_device(subgraph->base.device)->is_u65) EMIT0(NPU_SET_WEIGHT_FORMAT, 0x0); EMIT0(NPU_SET_WEIGHT_REGION, operation->conv.weights.region); @@ -378,22 +378,22 @@ emit_acc_format(struct ethosu_subgraph *subgraph, struct ethosu_operation *opera static void emit_common(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, enum ethosu_op_to_scale op_to_scale) { - if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) + if (!ethosu_ml_device(subgraph->base.device)->is_u65) emit_ifm_precision(subgraph, &operation->ifm, op_to_scale, NPU_SET_IFM_PRECISION); emit_ifm(subgraph, &operation->ifm); - if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) + if (ethosu_ml_device(subgraph->base.device)->is_u65) emit_ifm_precision(subgraph, &operation->ifm, op_to_scale, NPU_SET_IFM_PRECISION); EMIT0(NPU_SET_IFM_UPSCALE, operation->upscale); if (operation->type != ETHOSU_OPERATION_TYPE_ELTWISE) emit_padding(subgraph, operation); - if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) + if (!ethosu_ml_device(subgraph->base.device)->is_u65) emit_ofm_precision(subgraph, operation); emit_ofm(subgraph, &operation->ofm); - if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) + if (ethosu_ml_device(subgraph->base.device)->is_u65) emit_ofm_precision(subgraph, operation); if (operation->type != ETHOSU_OPERATION_TYPE_ELTWISE) @@ -410,7 +410,7 @@ emit_common(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation static void emit_convolution(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation) { - if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) + if (!ethosu_ml_device(subgraph->base.device)->is_u65) EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_SHIFT(operation->conv.shift), operation->conv.scale); operation->ifm.tiles.addresses[0] = ethosu_allocate_feature_map(subgraph, operation->ifm.tensor_idx); @@ -426,7 +426,7 @@ emit_convolution(struct ethosu_subgraph *subgraph, struct ethosu_operation *oper emit_common(subgraph, operation, false); emit_block_config(subgraph, operation); - if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) + if (ethosu_ml_device(subgraph->base.device)->is_u65) emit_shram_registers(subgraph, operation); else emit_acc_format(subgraph, operation); @@ -500,7 +500,7 @@ emit_pooling(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio switch (operation->pooling.type) { case ETHOSU_POOLING_TYPE_MAX: { - if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) { + if (!ethosu_ml_device(subgraph->base.device)->is_u65) { EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_ROUND_MODE(1), 1); break; } else @@ -533,7 +533,7 @@ emit_pooling(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio } emit_block_config(subgraph, operation); - if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) + if (ethosu_ml_device(subgraph->base.device)->is_u65) emit_shram_registers(subgraph, operation); else emit_acc_format(subgraph, operation); @@ -564,7 +564,7 @@ static void emit_ifm2(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, bool has_scalar) { if (has_scalar) { - if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) + if (ethosu_ml_device(subgraph->base.device)->is_u65) EMIT0(NPU_SET_IFM2_SCALAR, operation->ifm2.scalar); else { emit_ifm2_precision(subgraph, operation, true); @@ -612,7 +612,7 @@ emit_ifm2_broadcast(struct ethosu_subgraph *subgraph, struct ethosu_operation *o { unsigned ifm2_broadcast = 0; - if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) { + if (ethosu_ml_device(subgraph->base.device)->is_u65) { ifm2_broadcast |= NPU_SET_IFM2_BROADCAST_OPERAND_ORDER(operation->eltwise.ifm_reversed); if (has_scalar) { @@ -787,7 +787,7 @@ emit_eltwise(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio bool has_scalar = operation->ifm2.scalar != 0; enum ethosu_op_to_scale op_to_scale; - if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) { + if (ethosu_ml_device(subgraph->base.device)->is_u65) { op_to_scale = eltwise_emit_ofm_scaling( subgraph, operation->ifm.scale, @@ -812,7 +812,7 @@ emit_eltwise(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio emit_ifm2(subgraph, operation, has_scalar); - if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) + if (ethosu_ml_device(subgraph->base.device)->is_u65) emit_ifm_precision(subgraph, &operation->ifm2, OP_NONE, NPU_SET_IFM2_PRECISION); else emit_ifm2_precision(subgraph, operation, has_scalar); @@ -820,7 +820,7 @@ emit_eltwise(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio emit_ifm2_broadcast(subgraph, operation, has_scalar); emit_block_config(subgraph, operation); - if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) + if (ethosu_ml_device(subgraph->base.device)->is_u65) emit_shram_registers(subgraph, operation); else emit_acc_format(subgraph, operation); @@ -1090,7 +1090,7 @@ get_jobs(const struct ethosu_block *area, static unsigned calc_blockdep(struct ethosu_subgraph *subgraph, struct ethosu_operation *prev_op, struct ethosu_operation *operation) { - struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device); + struct ethosu_ml_device *device = ethosu_ml_device(subgraph->base.device); if (!prev_op) return 0; @@ -1102,7 +1102,7 @@ calc_blockdep(struct ethosu_subgraph *subgraph, struct ethosu_operation *prev_op ifm_index = 1; } else if (operation->ifm.tensor_idx != prev_op->ofm.tensor_idx) { /* Previous operation doesn't produce current operation's IFM */ - return screen->max_concurrent_blocks; + return device->max_concurrent_blocks; } const struct ethosu_feature_map *ifm = (ifm_index == 0) ? &operation->ifm : &operation->ifm2; @@ -1135,7 +1135,7 @@ calc_blockdep(struct ethosu_subgraph *subgraph, struct ethosu_operation *prev_op &curr_ifm_job); /* Get last jobs from previous operation */ - int max_jobs = screen->max_concurrent_blocks; + int max_jobs = device->max_concurrent_blocks; assert(max_jobs <= 8); struct box last_prev_jobs[8]; int prev_count = get_jobs(&prev_ofm->shape, &prev_block, max_jobs, false, last_prev_jobs); @@ -1187,7 +1187,7 @@ ethosu_emit_cmdstream(struct ethosu_subgraph *subgraph) /* Compile */ - if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) + if (ethosu_ml_device(subgraph->base.device)->is_u65) EMIT0(NPU_SET_PARALLEL_MODE, 0x0); util_dynarray_foreach (&subgraph->operations, struct ethosu_operation, operation) { diff --git a/src/gallium/drivers/ethosu/ethosu_coefs.c b/src/gallium/drivers/ethosu/ethosu_coefs.c index db94314fab3..3505ee147ab 100644 --- a/src/gallium/drivers/ethosu/ethosu_coefs.c +++ b/src/gallium/drivers/ethosu/ethosu_coefs.c @@ -63,7 +63,7 @@ fill_scale_and_biases(struct ethosu_subgraph *subgraph, struct ethosu_operation /* U65 packs 10-byte bias/scale entries contiguously then aligns to 16. * U85 scales are read in groups of 16 channels, so pad depth to a * 16-channel boundary first, then multiply by 10 bytes per entry. */ - if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) + if (ethosu_ml_device(subgraph->base.device)->is_u65) *scales_size = align(operation->ofm.shape.depth * 10, 16); else *scales_size = align(operation->ofm.shape.depth, 16) * 10; @@ -87,7 +87,7 @@ fill_scale_and_biases(struct ethosu_subgraph *subgraph, struct ethosu_operation uint32_t shift; int scale = ethosu_quantize_scale(conv_scale, &shift); - if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) + if (ethosu_ml_device(subgraph->base.device)->is_u65) encode_bias_scale_u65( biases[i], scale, shift, &(*scales)[idx]); else diff --git a/src/gallium/drivers/ethosu/ethosu_device.c b/src/gallium/drivers/ethosu/ethosu_device.c index 09a3c8ca880..7f08463124e 100644 --- a/src/gallium/drivers/ethosu/ethosu_device.c +++ b/src/gallium/drivers/ethosu/ethosu_device.c @@ -252,29 +252,36 @@ ethosu_screen_create(int fd, ethosu_screen->fd = fd; dev_query(ethosu_screen); + bool is_u65 = DRM_ETHOSU_ARCH_MAJOR(ethosu_screen->info.id) == 1; + if (DBG_ENABLED(ETHOSU_DBG_FORCE_U85)) + is_u65 = false; + + ethosu_screen->ml_device.is_u65 = is_u65; + ethosu_screen->ml_device.sram_size = ethosu_screen->info.sram_size; + if (DBG_ENABLED(ETHOSU_DBG_DISABLE_SRAM)) - ethosu_screen->info.sram_size = 0; + ethosu_screen->ml_device.sram_size = 0; - if (ethosu_is_u65(ethosu_screen)) { - ethosu_screen->ifm_ublock.width = 2; - ethosu_screen->ifm_ublock.height = 2; - ethosu_screen->ifm_ublock.depth = 8; + if (is_u65) { + ethosu_screen->ml_device.ifm_ublock.width = 2; + ethosu_screen->ml_device.ifm_ublock.height = 2; + ethosu_screen->ml_device.ifm_ublock.depth = 8; - ethosu_screen->ofm_ublock.width = 2; - ethosu_screen->ofm_ublock.height = 2; - ethosu_screen->ofm_ublock.depth = 8; + ethosu_screen->ml_device.ofm_ublock.width = 2; + ethosu_screen->ml_device.ofm_ublock.height = 2; + ethosu_screen->ml_device.ofm_ublock.depth = 8; - ethosu_screen->max_concurrent_blocks = 3; + ethosu_screen->ml_device.max_concurrent_blocks = 3; } else { - ethosu_screen->ifm_ublock.width = 4; - ethosu_screen->ifm_ublock.height = 4; - ethosu_screen->ifm_ublock.depth = 16; + ethosu_screen->ml_device.ifm_ublock.width = 4; + ethosu_screen->ml_device.ifm_ublock.height = 4; + ethosu_screen->ml_device.ifm_ublock.depth = 16; - ethosu_screen->ofm_ublock.width = 4; - ethosu_screen->ofm_ublock.height = 1; - ethosu_screen->ofm_ublock.depth = 8; + ethosu_screen->ml_device.ofm_ublock.width = 4; + ethosu_screen->ml_device.ofm_ublock.height = 1; + ethosu_screen->ml_device.ofm_ublock.depth = 8; - ethosu_screen->max_concurrent_blocks = 7; + ethosu_screen->ml_device.max_concurrent_blocks = 7; } screen->get_screen_fd = ethosu_screen_get_fd; @@ -301,6 +308,17 @@ ethosu_ml_device_create(const char *spec) ethosu_debug = debug_get_option_ethosu_debug(); device = rzalloc(NULL, struct ethosu_ml_device); + + device->is_u65 = true; + device->ifm_ublock.width = 2; + device->ifm_ublock.height = 2; + device->ifm_ublock.depth = 8; + device->ofm_ublock.width = 2; + device->ofm_ublock.height = 2; + device->ofm_ublock.depth = 8; + device->max_concurrent_blocks = 3; + device->sram_size = 0; + set_device_callbacks(device); return &device->base; diff --git a/src/gallium/drivers/ethosu/ethosu_device.h b/src/gallium/drivers/ethosu/ethosu_device.h index e33490984a5..835c390b4fd 100644 --- a/src/gallium/drivers/ethosu/ethosu_device.h +++ b/src/gallium/drivers/ethosu/ethosu_device.h @@ -42,6 +42,13 @@ struct ethosu_block { struct ethosu_ml_device { struct pipe_ml_device base; + + /* Target hardware description — set from DRM query or from spec string */ + bool is_u65; + struct ethosu_block ifm_ublock; + struct ethosu_block ofm_ublock; + unsigned max_concurrent_blocks; + uint32_t sram_size; }; struct ethosu_screen { @@ -50,9 +57,6 @@ struct ethosu_screen { int fd; struct drm_ethosu_npu_info info; - struct ethosu_block ifm_ublock; - struct ethosu_block ofm_ublock; - unsigned max_concurrent_blocks; }; static inline struct ethosu_screen * @@ -61,20 +65,10 @@ ethosu_screen(struct pipe_screen *p) return (struct ethosu_screen *)p; } -static inline bool -ethosu_is_u65(struct ethosu_screen *e) +static inline struct ethosu_ml_device * +ethosu_ml_device(struct pipe_ml_device *p) { - if (DBG_ENABLED(ETHOSU_DBG_FORCE_U85)) - return false; - else - return DRM_ETHOSU_ARCH_MAJOR(e->info.id) == 1; -} - -static inline struct ethosu_screen * -ethosu_device_screen(struct pipe_ml_device *pdevice) -{ - struct ethosu_ml_device *dev = (struct ethosu_ml_device *)pdevice; - return container_of(dev, struct ethosu_screen, ml_device); + return (struct ethosu_ml_device *)p; } struct ethosu_context { diff --git a/src/gallium/drivers/ethosu/ethosu_encode.cpp b/src/gallium/drivers/ethosu/ethosu_encode.cpp index 6b9cfa083a6..0906b8196b5 100644 --- a/src/gallium/drivers/ethosu/ethosu_encode.cpp +++ b/src/gallium/drivers/ethosu/ethosu_encode.cpp @@ -58,7 +58,7 @@ ml_reorder_encode_weights(struct ethosu_subgraph *subgraph, uint8_t **weights, long *weights_size) { - struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device); + struct ethosu_ml_device *device = ethosu_ml_device(subgraph->base.device); int bit_depth = 8; bool is_sparse = false; EthosUTraversal traversal; @@ -88,15 +88,15 @@ ml_reorder_encode_weights(struct ethosu_subgraph *subgraph, WeightSourceCommon *source; - if (ethosu_is_u65(screen)) { + if (device->is_u65) { if (operation->kernel.is_signed) { source = new EthosUWeightOrdering(1, dilation, - operation->block_config.ofm_block.depth, bit_depth, screen->ofm_ublock.depth, - screen->ifm_ublock.depth, transform_func, ¶m, traversal); + operation->block_config.ofm_block.depth, bit_depth, device->ofm_ublock.depth, + device->ifm_ublock.depth, transform_func, ¶m, traversal); } else { source = new EthosUWeightOrdering(1, dilation, - operation->block_config.ofm_block.depth, bit_depth, screen->ofm_ublock.depth, - screen->ifm_ublock.depth, transform_func, ¶m, traversal); + operation->block_config.ofm_block.depth, bit_depth, device->ofm_ublock.depth, + device->ifm_ublock.depth, transform_func, ¶m, traversal); } } else { if (operation->kernel.is_signed) { diff --git a/src/gallium/drivers/ethosu/ethosu_lower.c b/src/gallium/drivers/ethosu/ethosu_lower.c index 209dc6c6a09..e296127cbf2 100644 --- a/src/gallium/drivers/ethosu/ethosu_lower.c +++ b/src/gallium/drivers/ethosu/ethosu_lower.c @@ -201,7 +201,7 @@ ethosu_lower_concatenation(struct ethosu_subgraph *subgraph, { operation->type = ETHOSU_OPERATION_TYPE_POOLING; - if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) { + if (ethosu_ml_device(subgraph->base.device)->is_u65) { operation->pooling.type = ETHOSU_POOLING_TYPE_AVG; operation->round_mode = ETHOSU_ROUNDING_NATURAL; } else @@ -432,7 +432,7 @@ ethosu_lower_graph(struct ethosu_subgraph *subgraph, } if (operation.conv.scales.size + operation.conv.weights.size <= - ethosu_device_screen(subgraph->base.device)->info.sram_size) { + ethosu_ml_device(subgraph->base.device)->sram_size) { struct ethosu_operation dma_operation = {0}; ethosu_lower_dma(subgraph, &poperations[i], &operation, &dma_operation); diff --git a/src/gallium/drivers/ethosu/ethosu_ml.c b/src/gallium/drivers/ethosu/ethosu_ml.c index 37167982a01..c16ec09b587 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.c +++ b/src/gallium/drivers/ethosu/ethosu_ml.c @@ -281,7 +281,8 @@ static void prepare_for_submission(struct ethosu_subgraph *subgraph, struct pipe_context *pcontext) { - struct ethosu_screen *screen = ethosu_screen(pcontext->screen); + subgraph->screen = ethosu_screen(pcontext->screen); + struct ethosu_screen *screen = subgraph->screen; uint64_t cmdstream_size = (subgraph->cursor - subgraph->cmdstream) * sizeof(*subgraph->cursor); @@ -370,7 +371,7 @@ ethosu_ml_subgraph_invoke(struct pipe_context *pcontext, job.region_bo_handles[COEFS_REGION] = ethosu_resource(subgraph->coefs_rsrc)->handle; if (!DBG_ENABLED(ETHOSU_DBG_DISABLE_SRAM)) { job.region_bo_handles[SCRATCH_REGION] = 0; - job.sram_size = screen->info.sram_size; + job.sram_size = ethosu_ml_device(subgraph->base.device)->sram_size; } } @@ -434,7 +435,7 @@ ethosu_ml_subgraph_destroy(struct pipe_ml_device *pdevice, if (subgraph->io_rsrc) { /* Post-submission state: cleanup DRM resources */ - struct ethosu_screen *screen = ethosu_device_screen(pdevice); + struct ethosu_screen *screen = subgraph->screen; struct drm_gem_close arg = {0}; int ret; diff --git a/src/gallium/drivers/ethosu/ethosu_ml.h b/src/gallium/drivers/ethosu/ethosu_ml.h index 9343566c0e0..f3bf7c4b3ef 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.h +++ b/src/gallium/drivers/ethosu/ethosu_ml.h @@ -201,6 +201,8 @@ struct ethosu_tensor { struct ethosu_subgraph { struct pipe_ml_subgraph base; + struct ethosu_screen *screen; /* Set during prepare_for_submission */ + struct util_dynarray operations; /* ethosu_operation */ struct util_dynarray tensors; /* ethosu_tensor */ diff --git a/src/gallium/drivers/ethosu/ethosu_sched.c b/src/gallium/drivers/ethosu/ethosu_sched.c index 6f98d695b7c..328fa2eef06 100644 --- a/src/gallium/drivers/ethosu/ethosu_sched.c +++ b/src/gallium/drivers/ethosu/ethosu_sched.c @@ -15,16 +15,16 @@ required_input_size(int value, int stride, int border) static struct ethosu_block _get_ifm_blocksize(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, struct ethosu_block ofm_block) { - struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device); + struct ethosu_ml_device *device = ethosu_ml_device(subgraph->base.device); struct ethosu_block ifm_block = {0}; // IFM block height int h = required_input_size(ofm_block.height, operation->kernel.stride_y, MIN2(operation->kernel.height, SUB_KERNEL_MAX.height)); - h = align(h, screen->ofm_ublock.height); + h = align(h, device->ofm_ublock.height); // IFM block width int w = required_input_size(ofm_block.width, operation->kernel.stride_x, MIN2(operation->kernel.width, SUB_KERNEL_MAX.width)); - w = align(w, screen->ofm_ublock.width); + w = align(w, device->ofm_ublock.width); ifm_block.height = h; ifm_block.width = w; @@ -73,7 +73,7 @@ try_block_config(struct ethosu_operation *operation, struct ethosu_block ofm_blo static struct ethosu_block_config find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation) { - struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device); + struct ethosu_ml_device *device = ethosu_ml_device(subgraph->base.device); struct ethosu_block_config config = {}; struct ethosu_block search_space = ARCH_OFM_BLOCK_MAX; float ofm_elements = operation->ofm.shape.width * operation->ofm.shape.height * operation->ofm.shape.depth; @@ -89,7 +89,7 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope search_space.height = MIN2(search_space.height, operation->ofm.shape.height); search_space.depth = MIN2(search_space.depth, operation->ofm.shape.depth); - unsigned depth = MAX2(screen->ofm_ublock.depth, MIN2(search_space.depth, ARCH_SPLIT_DEPTH)); + unsigned depth = MAX2(device->ofm_ublock.depth, MIN2(search_space.depth, ARCH_SPLIT_DEPTH)); bool is_part_kernel = false; if (is_convolution) { @@ -106,16 +106,16 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope depth = align(depth, ARCH_SPLIT_DEPTH); } - search_space.width = align(search_space.width, screen->ofm_ublock.width); - search_space.height = align(search_space.height, screen->ofm_ublock.height); - search_space.depth = align(search_space.depth, screen->ofm_ublock.depth); + search_space.width = align(search_space.width, device->ofm_ublock.width); + search_space.height = align(search_space.height, device->ofm_ublock.height); + search_space.depth = align(search_space.depth, device->ofm_ublock.depth); while (depth <= search_space.depth) { bool wont_fit[search_space.height + 1][search_space.width + 1]; memset(wont_fit, 0, sizeof(wont_fit)); - for (unsigned height = screen->ofm_ublock.height; height <= search_space.height; height += screen->ofm_ublock.height) { - for (unsigned width = screen->ofm_ublock.width; width <= search_space.width; width += screen->ofm_ublock.width) { + for (unsigned height = device->ofm_ublock.height; height <= search_space.height; height += device->ofm_ublock.height) { + for (unsigned width = device->ofm_ublock.width; width <= search_space.width; width += device->ofm_ublock.width) { if (wont_fit[height][width]) continue; @@ -124,7 +124,7 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope struct ethosu_block ifm_block = _get_ifm_blocksize(subgraph, operation, ofm_block); if (!is_equal_depth) - ifm_block.depth = align(MIN2(operation->ifm.shape.depth, is_part_kernel ? 16 : 32), screen->ifm_ublock.depth); + ifm_block.depth = align(MIN2(operation->ifm.shape.depth, is_part_kernel ? 16 : 32), device->ifm_ublock.depth); // Try to fit the blocks in SHRAM struct ethosu_shram_layout layout = {0}; @@ -181,7 +181,7 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope config.ofm_block.height = height; config.ofm_block.width = width; config.ofm_block.depth = depth; - config.ofm_ublock = screen->ofm_ublock; + config.ofm_ublock = device->ofm_ublock; config.is_partkernel = is_part_kernel; best_cost = relative_cost; @@ -193,7 +193,7 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope } } - depth += screen->ofm_ublock.depth; + depth += device->ofm_ublock.depth; if (depth < operation->ofm.shape.depth) { depth = align(depth, ARCH_SPLIT_DEPTH); } @@ -205,9 +205,9 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope void ethosu_sched_operation(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation) { - struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device); + struct ethosu_ml_device *device = ethosu_ml_device(subgraph->base.device); - if (ethosu_is_u65(screen)) + if (device->is_u65) operation->block_config = find_block_config(subgraph, operation); else operation->block_config = find_block_config_u85(subgraph, operation);