ethosu: Make the UBlock sizes arch-specific

As U85 has a different configuration.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39611>
This commit is contained in:
Tomeu Vizoso 2026-02-17 09:16:00 +01:00 committed by Marge Bot
parent 91137a9327
commit 3ade0a4dd6
6 changed files with 47 additions and 27 deletions

View file

@ -6,6 +6,7 @@
#include "util/u_inlines.h"
#include "mlw_codec/mlw_encode.h"
#include "ethosu_ml.h"
#include "ethosu_coefs.h"
static void
@ -64,6 +65,7 @@ calculate_weights_strides(struct ethosu_operation *operation, int out_strides[4]
static void
fill_weights(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, uint8_t **weights, long *weights_size, struct pipe_resource *weight_rsrc)
{
struct ethosu_screen *screen = ethosu_screen(subgraph->base.context->screen);
int brick_strides[4] = {0};
unsigned input_channels = operation->ifm.shape.depth;
@ -98,8 +100,8 @@ fill_weights(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio
int64_t padded_size = 0;
*weights_size = mlw_reorder_encode(
IFM_UBLOCK.depth,
OFM_UBLOCK.depth,
screen->ifm_ublock.depth,
screen->ofm_ublock.depth,
operation->ofm.shape.depth,
operation->kernel.height,
operation->kernel.width,

View file

@ -234,8 +234,24 @@ ethosu_screen_create(int fd,
ethosu_screen->info.sram_size = 0;
if (ethosu_is_u65(ethosu_screen)) {
ethosu_screen->ifm_ublock.width = 2;
ethosu_screen->ifm_ublock.height = 2;
ethosu_screen->ifm_ublock.depth = 8;
ethosu_screen->ofm_ublock.width = 2;
ethosu_screen->ofm_ublock.height = 2;
ethosu_screen->ofm_ublock.depth = 8;
ethosu_screen->max_concurrent_blocks = 3;
} else {
ethosu_screen->ifm_ublock.width = 4;
ethosu_screen->ifm_ublock.height = 4;
ethosu_screen->ifm_ublock.depth = 16;
ethosu_screen->ofm_ublock.width = 4;
ethosu_screen->ofm_ublock.height = 1;
ethosu_screen->ofm_ublock.depth = 8;
ethosu_screen->max_concurrent_blocks = 7;
}

View file

@ -33,11 +33,19 @@ extern int ethosu_debug;
##__VA_ARGS__); \
} while (0)
struct ethosu_block {
unsigned width;
unsigned height;
unsigned depth;
};
struct ethosu_screen {
struct pipe_screen pscreen;
int fd;
struct drm_ethosu_npu_info info;
struct ethosu_block ifm_ublock;
struct ethosu_block ofm_ublock;
unsigned max_concurrent_blocks;
};

View file

@ -23,8 +23,6 @@
#include "ethosu_lower.h"
#include "ethosu_ml.h"
struct ethosu_block IFM_UBLOCK = {2, 2, 8};
struct ethosu_block OFM_UBLOCK = {2, 2, 8};
struct ethosu_block ARCH_OFM_BLOCK_MAX = {64, 32, 128};
struct ethosu_block SUB_KERNEL_MAX = {8, 8, 65536};

View file

@ -25,19 +25,11 @@
extern struct ethosu_block ARCH_OFM_BLOCK_MAX;
extern struct ethosu_block SUB_KERNEL_MAX;
extern struct ethosu_block IFM_UBLOCK;
extern struct ethosu_block OFM_UBLOCK;
#define COEFS_REGION 0
#define IO_REGION 1
#define SCRATCH_REGION 2
struct ethosu_block {
unsigned width;
unsigned height;
unsigned depth;
};
enum ethosu_operation_type {
ETHOSU_OPERATION_TYPE_CONVOLUTION,
ETHOSU_OPERATION_TYPE_POOLING,
@ -126,6 +118,7 @@ enum ethosu_acc_type {
struct ethosu_block_config {
struct ethosu_block ifm_block;
struct ethosu_block ofm_block;
struct ethosu_block ofm_ublock;
struct ethosu_shram_layout shram_layout;
unsigned bank_size;
enum ethosu_acc_type acc_type;

View file

@ -12,17 +12,18 @@ required_input_size(int value, int stride, int border)
}
static struct ethosu_block
_get_ifm_blocksize(struct ethosu_operation *operation, struct ethosu_block ofm_block)
_get_ifm_blocksize(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, struct ethosu_block ofm_block)
{
struct ethosu_screen *screen = ethosu_screen(subgraph->base.context->screen);
struct ethosu_block ifm_block = {0};
// IFM block height
int h = required_input_size(ofm_block.height, operation->kernel.stride_y, MIN2(operation->kernel.height, SUB_KERNEL_MAX.height));
h = align(h, OFM_UBLOCK.height);
h = align(h, screen->ofm_ublock.height);
// IFM block width
int w = required_input_size(ofm_block.width, operation->kernel.stride_x, MIN2(operation->kernel.width, SUB_KERNEL_MAX.width));
w = align(w, OFM_UBLOCK.width);
w = align(w, screen->ofm_ublock.width);
ifm_block.height = h;
ifm_block.width = w;
@ -69,8 +70,9 @@ try_block_config(struct ethosu_operation *operation, struct ethosu_block ofm_blo
}
static struct ethosu_block_config
find_block_config(struct ethosu_operation *operation)
find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
{
struct ethosu_screen *screen = ethosu_screen(subgraph->base.context->screen);
struct ethosu_block_config config = {};
struct ethosu_block search_space = ARCH_OFM_BLOCK_MAX;
float ofm_elements = operation->ofm.shape.width * operation->ofm.shape.height * operation->ofm.shape.depth;
@ -86,31 +88,31 @@ find_block_config(struct ethosu_operation *operation)
search_space.height = MIN2(search_space.height, operation->ofm.shape.height);
search_space.depth = MIN2(search_space.depth, operation->ofm.shape.depth);
unsigned depth = MAX2(OFM_UBLOCK.depth, MIN2(search_space.depth, ARCH_SPLIT_DEPTH));
unsigned depth = MAX2(screen->ofm_ublock.depth, MIN2(search_space.depth, ARCH_SPLIT_DEPTH));
if (depth < operation->ofm.shape.depth) {
depth = align(depth, ARCH_SPLIT_DEPTH);
}
search_space.width = align(search_space.width, OFM_UBLOCK.width);
search_space.height = align(search_space.height, OFM_UBLOCK.height);
search_space.depth = align(search_space.depth, OFM_UBLOCK.depth);
search_space.width = align(search_space.width, screen->ofm_ublock.width);
search_space.height = align(search_space.height, screen->ofm_ublock.height);
search_space.depth = align(search_space.depth, screen->ofm_ublock.depth);
while (depth <= search_space.depth) {
bool wont_fit[search_space.height + 1][search_space.width + 1];
memset(wont_fit, 0, sizeof(wont_fit));
for (unsigned height = OFM_UBLOCK.height; height <= search_space.height; height += OFM_UBLOCK.height) {
for (unsigned width = OFM_UBLOCK.width; width <= search_space.width; width += OFM_UBLOCK.width) {
for (unsigned height = screen->ofm_ublock.height; height <= search_space.height; height += screen->ofm_ublock.height) {
for (unsigned width = screen->ofm_ublock.width; width <= search_space.width; width += screen->ofm_ublock.width) {
if (wont_fit[height][width])
continue;
struct ethosu_block ofm_block = {height, width, depth};
struct ethosu_block ifm_block = _get_ifm_blocksize(operation, ofm_block);
struct ethosu_block ofm_block = {width, height, depth};
struct ethosu_block ifm_block = _get_ifm_blocksize(subgraph, operation, ofm_block);
if (!is_equal_depth)
ifm_block.depth = align(MIN2(operation->ifm.shape.depth, operation->conv.part_kernel_first ? 16 : 32), IFM_UBLOCK.depth);
ifm_block.depth = align(MIN2(operation->ifm.shape.depth, is_part_kernel ? 16 : 32), screen->ifm_ublock.depth);
// Try to fit the blocks in SHRAM
struct ethosu_shram_layout layout = {0};
@ -167,6 +169,7 @@ find_block_config(struct ethosu_operation *operation)
config.ofm_block.height = height;
config.ofm_block.width = width;
config.ofm_block.depth = depth;
config.ofm_ublock = screen->ofm_ublock;
best_cost = relative_cost;
}
@ -177,7 +180,7 @@ find_block_config(struct ethosu_operation *operation)
}
}
depth += OFM_UBLOCK.depth;
depth += screen->ofm_ublock.depth;
if (depth < operation->ofm.shape.depth) {
depth = align(depth, ARCH_SPLIT_DEPTH);
}
@ -189,5 +192,5 @@ find_block_config(struct ethosu_operation *operation)
void
ethosu_sched_operation(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
{
operation->block_config = find_block_config(operation);
operation->block_config = find_block_config(subgraph, operation);
}