diff --git a/src/gallium/drivers/ethosu/ethosu_coefs.c b/src/gallium/drivers/ethosu/ethosu_coefs.c index e8a895fd244..25aeeed569f 100644 --- a/src/gallium/drivers/ethosu/ethosu_coefs.c +++ b/src/gallium/drivers/ethosu/ethosu_coefs.c @@ -6,6 +6,7 @@ #include "util/u_inlines.h" #include "mlw_codec/mlw_encode.h" +#include "ethosu_ml.h" #include "ethosu_coefs.h" static void @@ -64,6 +65,7 @@ calculate_weights_strides(struct ethosu_operation *operation, int out_strides[4] static void fill_weights(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, uint8_t **weights, long *weights_size, struct pipe_resource *weight_rsrc) { + struct ethosu_screen *screen = ethosu_screen(subgraph->base.context->screen); int brick_strides[4] = {0}; unsigned input_channels = operation->ifm.shape.depth; @@ -98,8 +100,8 @@ fill_weights(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio int64_t padded_size = 0; *weights_size = mlw_reorder_encode( - IFM_UBLOCK.depth, - OFM_UBLOCK.depth, + screen->ifm_ublock.depth, + screen->ofm_ublock.depth, operation->ofm.shape.depth, operation->kernel.height, operation->kernel.width, diff --git a/src/gallium/drivers/ethosu/ethosu_device.c b/src/gallium/drivers/ethosu/ethosu_device.c index c9442991fb8..22d8b02ae90 100644 --- a/src/gallium/drivers/ethosu/ethosu_device.c +++ b/src/gallium/drivers/ethosu/ethosu_device.c @@ -234,8 +234,24 @@ ethosu_screen_create(int fd, ethosu_screen->info.sram_size = 0; if (ethosu_is_u65(ethosu_screen)) { + ethosu_screen->ifm_ublock.width = 2; + ethosu_screen->ifm_ublock.height = 2; + ethosu_screen->ifm_ublock.depth = 8; + + ethosu_screen->ofm_ublock.width = 2; + ethosu_screen->ofm_ublock.height = 2; + ethosu_screen->ofm_ublock.depth = 8; + ethosu_screen->max_concurrent_blocks = 3; } else { + ethosu_screen->ifm_ublock.width = 4; + ethosu_screen->ifm_ublock.height = 4; + ethosu_screen->ifm_ublock.depth = 16; + + ethosu_screen->ofm_ublock.width = 4; + ethosu_screen->ofm_ublock.height = 1; + ethosu_screen->ofm_ublock.depth = 8; + ethosu_screen->max_concurrent_blocks = 7; } diff --git a/src/gallium/drivers/ethosu/ethosu_device.h b/src/gallium/drivers/ethosu/ethosu_device.h index 3d5ebcb8e76..fa08a8fe6a8 100644 --- a/src/gallium/drivers/ethosu/ethosu_device.h +++ b/src/gallium/drivers/ethosu/ethosu_device.h @@ -33,11 +33,19 @@ extern int ethosu_debug; ##__VA_ARGS__); \ } while (0) +struct ethosu_block { + unsigned width; + unsigned height; + unsigned depth; +}; + struct ethosu_screen { struct pipe_screen pscreen; int fd; struct drm_ethosu_npu_info info; + struct ethosu_block ifm_ublock; + struct ethosu_block ofm_ublock; unsigned max_concurrent_blocks; }; diff --git a/src/gallium/drivers/ethosu/ethosu_ml.c b/src/gallium/drivers/ethosu/ethosu_ml.c index 53bc9610c7f..41155d37a76 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.c +++ b/src/gallium/drivers/ethosu/ethosu_ml.c @@ -23,8 +23,6 @@ #include "ethosu_lower.h" #include "ethosu_ml.h" -struct ethosu_block IFM_UBLOCK = {2, 2, 8}; -struct ethosu_block OFM_UBLOCK = {2, 2, 8}; struct ethosu_block ARCH_OFM_BLOCK_MAX = {64, 32, 128}; struct ethosu_block SUB_KERNEL_MAX = {8, 8, 65536}; diff --git a/src/gallium/drivers/ethosu/ethosu_ml.h b/src/gallium/drivers/ethosu/ethosu_ml.h index f3bf3c76bbb..9ce81c3290d 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.h +++ b/src/gallium/drivers/ethosu/ethosu_ml.h @@ -25,19 +25,11 @@ extern struct ethosu_block ARCH_OFM_BLOCK_MAX; extern struct ethosu_block SUB_KERNEL_MAX; -extern struct ethosu_block IFM_UBLOCK; -extern struct ethosu_block OFM_UBLOCK; #define COEFS_REGION 0 #define IO_REGION 1 #define SCRATCH_REGION 2 -struct ethosu_block { - unsigned width; - unsigned height; - unsigned depth; -}; - enum ethosu_operation_type { ETHOSU_OPERATION_TYPE_CONVOLUTION, ETHOSU_OPERATION_TYPE_POOLING, @@ -126,6 +118,7 @@ enum ethosu_acc_type { struct ethosu_block_config { struct ethosu_block ifm_block; struct ethosu_block ofm_block; + struct ethosu_block ofm_ublock; struct ethosu_shram_layout shram_layout; unsigned bank_size; enum ethosu_acc_type acc_type; diff --git a/src/gallium/drivers/ethosu/ethosu_sched.c b/src/gallium/drivers/ethosu/ethosu_sched.c index 0e5cd122c25..5a93d023331 100644 --- a/src/gallium/drivers/ethosu/ethosu_sched.c +++ b/src/gallium/drivers/ethosu/ethosu_sched.c @@ -12,17 +12,18 @@ required_input_size(int value, int stride, int border) } static struct ethosu_block -_get_ifm_blocksize(struct ethosu_operation *operation, struct ethosu_block ofm_block) +_get_ifm_blocksize(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, struct ethosu_block ofm_block) { + struct ethosu_screen *screen = ethosu_screen(subgraph->base.context->screen); struct ethosu_block ifm_block = {0}; // IFM block height int h = required_input_size(ofm_block.height, operation->kernel.stride_y, MIN2(operation->kernel.height, SUB_KERNEL_MAX.height)); - h = align(h, OFM_UBLOCK.height); + h = align(h, screen->ofm_ublock.height); // IFM block width int w = required_input_size(ofm_block.width, operation->kernel.stride_x, MIN2(operation->kernel.width, SUB_KERNEL_MAX.width)); - w = align(w, OFM_UBLOCK.width); + w = align(w, screen->ofm_ublock.width); ifm_block.height = h; ifm_block.width = w; @@ -69,8 +70,9 @@ try_block_config(struct ethosu_operation *operation, struct ethosu_block ofm_blo } static struct ethosu_block_config -find_block_config(struct ethosu_operation *operation) +find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation) { + struct ethosu_screen *screen = ethosu_screen(subgraph->base.context->screen); struct ethosu_block_config config = {}; struct ethosu_block search_space = ARCH_OFM_BLOCK_MAX; float ofm_elements = operation->ofm.shape.width * operation->ofm.shape.height * operation->ofm.shape.depth; @@ -86,31 +88,31 @@ find_block_config(struct ethosu_operation *operation) search_space.height = MIN2(search_space.height, operation->ofm.shape.height); search_space.depth = MIN2(search_space.depth, operation->ofm.shape.depth); - unsigned depth = MAX2(OFM_UBLOCK.depth, MIN2(search_space.depth, ARCH_SPLIT_DEPTH)); + unsigned depth = MAX2(screen->ofm_ublock.depth, MIN2(search_space.depth, ARCH_SPLIT_DEPTH)); if (depth < operation->ofm.shape.depth) { depth = align(depth, ARCH_SPLIT_DEPTH); } - search_space.width = align(search_space.width, OFM_UBLOCK.width); - search_space.height = align(search_space.height, OFM_UBLOCK.height); - search_space.depth = align(search_space.depth, OFM_UBLOCK.depth); + search_space.width = align(search_space.width, screen->ofm_ublock.width); + search_space.height = align(search_space.height, screen->ofm_ublock.height); + search_space.depth = align(search_space.depth, screen->ofm_ublock.depth); while (depth <= search_space.depth) { bool wont_fit[search_space.height + 1][search_space.width + 1]; memset(wont_fit, 0, sizeof(wont_fit)); - for (unsigned height = OFM_UBLOCK.height; height <= search_space.height; height += OFM_UBLOCK.height) { - for (unsigned width = OFM_UBLOCK.width; width <= search_space.width; width += OFM_UBLOCK.width) { + for (unsigned height = screen->ofm_ublock.height; height <= search_space.height; height += screen->ofm_ublock.height) { + for (unsigned width = screen->ofm_ublock.width; width <= search_space.width; width += screen->ofm_ublock.width) { if (wont_fit[height][width]) continue; - struct ethosu_block ofm_block = {height, width, depth}; - struct ethosu_block ifm_block = _get_ifm_blocksize(operation, ofm_block); + struct ethosu_block ofm_block = {width, height, depth}; + struct ethosu_block ifm_block = _get_ifm_blocksize(subgraph, operation, ofm_block); if (!is_equal_depth) - ifm_block.depth = align(MIN2(operation->ifm.shape.depth, operation->conv.part_kernel_first ? 16 : 32), IFM_UBLOCK.depth); + ifm_block.depth = align(MIN2(operation->ifm.shape.depth, is_part_kernel ? 16 : 32), screen->ifm_ublock.depth); // Try to fit the blocks in SHRAM struct ethosu_shram_layout layout = {0}; @@ -167,6 +169,7 @@ find_block_config(struct ethosu_operation *operation) config.ofm_block.height = height; config.ofm_block.width = width; config.ofm_block.depth = depth; + config.ofm_ublock = screen->ofm_ublock; best_cost = relative_cost; } @@ -177,7 +180,7 @@ find_block_config(struct ethosu_operation *operation) } } - depth += OFM_UBLOCK.depth; + depth += screen->ofm_ublock.depth; if (depth < operation->ofm.shape.depth) { depth = align(depth, ARCH_SPLIT_DEPTH); } @@ -189,5 +192,5 @@ find_block_config(struct ethosu_operation *operation) void ethosu_sched_operation(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation) { - operation->block_config = find_block_config(operation); + operation->block_config = find_block_config(subgraph, operation); }