mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 09:28:07 +02:00
ethosu: move hardware description from ethosu_screen to ethosu_ml_device
Move target-specific fields (is_u65, ifm_ublock, ofm_ublock, max_concurrent_blocks, sram_size) from ethosu_screen into ethosu_ml_device. This decouples the compilation phase from the DRM file descriptor and pipe_screen, allowing ahead-of-time compilation where the target NPU is not present on the compilation host. The ethosu_device_screen() helper is retained only for runtime paths that need the DRM fd (buffer allocation, job submission, destroy). Compilation code now accesses hardware parameters through ethosu_ml_device() cast of pipe_ml_device, which can be created either from a DRM-backed screen or standalone via ethosu_ml_device_create() with a target string like "65-256". Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40647>
This commit is contained in:
parent
06e5026e28
commit
3b68c5b4bc
9 changed files with 96 additions and 81 deletions
|
|
@ -192,7 +192,7 @@ emit_ifm_precision(struct ethosu_subgraph *subgraph,
|
|||
if (feature_map->is_signed)
|
||||
prec |= NPU_SET_IFM_PRECISION_ACTIVATION(1); // signed activation
|
||||
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
if (ethosu_ml_device(subgraph->base.device)->is_u65)
|
||||
prec |= NPU_SET_IFM_PRECISION_SCALE_MODE(op_to_scale);
|
||||
|
||||
EMIT0(precision_cmd, prec);
|
||||
|
|
@ -222,13 +222,13 @@ emit_ofm(struct ethosu_subgraph *subgraph, struct ethosu_feature_map *feature_ma
|
|||
EMIT0(NPU_SET_OFM_HEIGHT_M1, feature_map->shape.height - 1);
|
||||
EMIT0(NPU_SET_OFM_WIDTH_M1, feature_map->shape.width - 1);
|
||||
|
||||
if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
if (!ethosu_ml_device(subgraph->base.device)->is_u65)
|
||||
EMIT0(NPU_SET_OFM_DEPTH_M1, feature_map->shape.depth - 1);
|
||||
|
||||
emit_tiles(
|
||||
subgraph, feature_map, NPU_SET_OFM_HEIGHT0_M1, NPU_SET_OFM_HEIGHT1_M1, NPU_SET_OFM_WIDTH0_M1);
|
||||
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
if (ethosu_ml_device(subgraph->base.device)->is_u65)
|
||||
EMIT0(NPU_SET_OFM_DEPTH_M1, feature_map->shape.depth - 1);
|
||||
|
||||
emit_strides(subgraph, feature_map, NPU_SET_OFM_STRIDE_C, NPU_SET_OFM_STRIDE_Y, NPU_SET_OFM_STRIDE_X);
|
||||
|
|
@ -277,7 +277,7 @@ emit_kernel(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation
|
|||
static void
|
||||
emit_weights(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||
{
|
||||
if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
if (!ethosu_ml_device(subgraph->base.device)->is_u65)
|
||||
EMIT0(NPU_SET_WEIGHT_FORMAT, 0x0);
|
||||
|
||||
EMIT0(NPU_SET_WEIGHT_REGION, operation->conv.weights.region);
|
||||
|
|
@ -378,22 +378,22 @@ emit_acc_format(struct ethosu_subgraph *subgraph, struct ethosu_operation *opera
|
|||
static void
|
||||
emit_common(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, enum ethosu_op_to_scale op_to_scale)
|
||||
{
|
||||
if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
if (!ethosu_ml_device(subgraph->base.device)->is_u65)
|
||||
emit_ifm_precision(subgraph, &operation->ifm, op_to_scale, NPU_SET_IFM_PRECISION);
|
||||
emit_ifm(subgraph, &operation->ifm);
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
if (ethosu_ml_device(subgraph->base.device)->is_u65)
|
||||
emit_ifm_precision(subgraph, &operation->ifm, op_to_scale, NPU_SET_IFM_PRECISION);
|
||||
EMIT0(NPU_SET_IFM_UPSCALE, operation->upscale);
|
||||
|
||||
if (operation->type != ETHOSU_OPERATION_TYPE_ELTWISE)
|
||||
emit_padding(subgraph, operation);
|
||||
|
||||
if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
if (!ethosu_ml_device(subgraph->base.device)->is_u65)
|
||||
emit_ofm_precision(subgraph, operation);
|
||||
|
||||
emit_ofm(subgraph, &operation->ofm);
|
||||
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
if (ethosu_ml_device(subgraph->base.device)->is_u65)
|
||||
emit_ofm_precision(subgraph, operation);
|
||||
|
||||
if (operation->type != ETHOSU_OPERATION_TYPE_ELTWISE)
|
||||
|
|
@ -410,7 +410,7 @@ emit_common(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation
|
|||
static void
|
||||
emit_convolution(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||
{
|
||||
if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
if (!ethosu_ml_device(subgraph->base.device)->is_u65)
|
||||
EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_SHIFT(operation->conv.shift), operation->conv.scale);
|
||||
|
||||
operation->ifm.tiles.addresses[0] = ethosu_allocate_feature_map(subgraph, operation->ifm.tensor_idx);
|
||||
|
|
@ -426,7 +426,7 @@ emit_convolution(struct ethosu_subgraph *subgraph, struct ethosu_operation *oper
|
|||
emit_common(subgraph, operation, false);
|
||||
|
||||
emit_block_config(subgraph, operation);
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
if (ethosu_ml_device(subgraph->base.device)->is_u65)
|
||||
emit_shram_registers(subgraph, operation);
|
||||
else
|
||||
emit_acc_format(subgraph, operation);
|
||||
|
|
@ -500,7 +500,7 @@ emit_pooling(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio
|
|||
|
||||
switch (operation->pooling.type) {
|
||||
case ETHOSU_POOLING_TYPE_MAX: {
|
||||
if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) {
|
||||
if (!ethosu_ml_device(subgraph->base.device)->is_u65) {
|
||||
EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_ROUND_MODE(1), 1);
|
||||
break;
|
||||
} else
|
||||
|
|
@ -533,7 +533,7 @@ emit_pooling(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio
|
|||
}
|
||||
|
||||
emit_block_config(subgraph, operation);
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
if (ethosu_ml_device(subgraph->base.device)->is_u65)
|
||||
emit_shram_registers(subgraph, operation);
|
||||
else
|
||||
emit_acc_format(subgraph, operation);
|
||||
|
|
@ -564,7 +564,7 @@ static void
|
|||
emit_ifm2(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, bool has_scalar)
|
||||
{
|
||||
if (has_scalar) {
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
if (ethosu_ml_device(subgraph->base.device)->is_u65)
|
||||
EMIT0(NPU_SET_IFM2_SCALAR, operation->ifm2.scalar);
|
||||
else {
|
||||
emit_ifm2_precision(subgraph, operation, true);
|
||||
|
|
@ -612,7 +612,7 @@ emit_ifm2_broadcast(struct ethosu_subgraph *subgraph, struct ethosu_operation *o
|
|||
{
|
||||
unsigned ifm2_broadcast = 0;
|
||||
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) {
|
||||
if (ethosu_ml_device(subgraph->base.device)->is_u65) {
|
||||
ifm2_broadcast |= NPU_SET_IFM2_BROADCAST_OPERAND_ORDER(operation->eltwise.ifm_reversed);
|
||||
|
||||
if (has_scalar) {
|
||||
|
|
@ -787,7 +787,7 @@ emit_eltwise(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio
|
|||
bool has_scalar = operation->ifm2.scalar != 0;
|
||||
enum ethosu_op_to_scale op_to_scale;
|
||||
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) {
|
||||
if (ethosu_ml_device(subgraph->base.device)->is_u65) {
|
||||
op_to_scale = eltwise_emit_ofm_scaling(
|
||||
subgraph,
|
||||
operation->ifm.scale,
|
||||
|
|
@ -812,7 +812,7 @@ emit_eltwise(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio
|
|||
|
||||
emit_ifm2(subgraph, operation, has_scalar);
|
||||
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
if (ethosu_ml_device(subgraph->base.device)->is_u65)
|
||||
emit_ifm_precision(subgraph, &operation->ifm2, OP_NONE, NPU_SET_IFM2_PRECISION);
|
||||
else
|
||||
emit_ifm2_precision(subgraph, operation, has_scalar);
|
||||
|
|
@ -820,7 +820,7 @@ emit_eltwise(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio
|
|||
emit_ifm2_broadcast(subgraph, operation, has_scalar);
|
||||
|
||||
emit_block_config(subgraph, operation);
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
if (ethosu_ml_device(subgraph->base.device)->is_u65)
|
||||
emit_shram_registers(subgraph, operation);
|
||||
else
|
||||
emit_acc_format(subgraph, operation);
|
||||
|
|
@ -1090,7 +1090,7 @@ get_jobs(const struct ethosu_block *area,
|
|||
static unsigned
|
||||
calc_blockdep(struct ethosu_subgraph *subgraph, struct ethosu_operation *prev_op, struct ethosu_operation *operation)
|
||||
{
|
||||
struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device);
|
||||
struct ethosu_ml_device *device = ethosu_ml_device(subgraph->base.device);
|
||||
|
||||
if (!prev_op)
|
||||
return 0;
|
||||
|
|
@ -1102,7 +1102,7 @@ calc_blockdep(struct ethosu_subgraph *subgraph, struct ethosu_operation *prev_op
|
|||
ifm_index = 1;
|
||||
} else if (operation->ifm.tensor_idx != prev_op->ofm.tensor_idx) {
|
||||
/* Previous operation doesn't produce current operation's IFM */
|
||||
return screen->max_concurrent_blocks;
|
||||
return device->max_concurrent_blocks;
|
||||
}
|
||||
|
||||
const struct ethosu_feature_map *ifm = (ifm_index == 0) ? &operation->ifm : &operation->ifm2;
|
||||
|
|
@ -1135,7 +1135,7 @@ calc_blockdep(struct ethosu_subgraph *subgraph, struct ethosu_operation *prev_op
|
|||
&curr_ifm_job);
|
||||
|
||||
/* Get last jobs from previous operation */
|
||||
int max_jobs = screen->max_concurrent_blocks;
|
||||
int max_jobs = device->max_concurrent_blocks;
|
||||
assert(max_jobs <= 8);
|
||||
struct box last_prev_jobs[8];
|
||||
int prev_count = get_jobs(&prev_ofm->shape, &prev_block, max_jobs, false, last_prev_jobs);
|
||||
|
|
@ -1187,7 +1187,7 @@ ethosu_emit_cmdstream(struct ethosu_subgraph *subgraph)
|
|||
|
||||
/* Compile */
|
||||
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
if (ethosu_ml_device(subgraph->base.device)->is_u65)
|
||||
EMIT0(NPU_SET_PARALLEL_MODE, 0x0);
|
||||
|
||||
util_dynarray_foreach (&subgraph->operations, struct ethosu_operation, operation) {
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ fill_scale_and_biases(struct ethosu_subgraph *subgraph, struct ethosu_operation
|
|||
/* U65 packs 10-byte bias/scale entries contiguously then aligns to 16.
|
||||
* U85 scales are read in groups of 16 channels, so pad depth to a
|
||||
* 16-channel boundary first, then multiply by 10 bytes per entry. */
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
if (ethosu_ml_device(subgraph->base.device)->is_u65)
|
||||
*scales_size = align(operation->ofm.shape.depth * 10, 16);
|
||||
else
|
||||
*scales_size = align(operation->ofm.shape.depth, 16) * 10;
|
||||
|
|
@ -87,7 +87,7 @@ fill_scale_and_biases(struct ethosu_subgraph *subgraph, struct ethosu_operation
|
|||
uint32_t shift;
|
||||
int scale = ethosu_quantize_scale(conv_scale, &shift);
|
||||
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
if (ethosu_ml_device(subgraph->base.device)->is_u65)
|
||||
encode_bias_scale_u65(
|
||||
biases[i], scale, shift, &(*scales)[idx]);
|
||||
else
|
||||
|
|
|
|||
|
|
@ -252,29 +252,36 @@ ethosu_screen_create(int fd,
|
|||
ethosu_screen->fd = fd;
|
||||
dev_query(ethosu_screen);
|
||||
|
||||
bool is_u65 = DRM_ETHOSU_ARCH_MAJOR(ethosu_screen->info.id) == 1;
|
||||
if (DBG_ENABLED(ETHOSU_DBG_FORCE_U85))
|
||||
is_u65 = false;
|
||||
|
||||
ethosu_screen->ml_device.is_u65 = is_u65;
|
||||
ethosu_screen->ml_device.sram_size = ethosu_screen->info.sram_size;
|
||||
|
||||
if (DBG_ENABLED(ETHOSU_DBG_DISABLE_SRAM))
|
||||
ethosu_screen->info.sram_size = 0;
|
||||
ethosu_screen->ml_device.sram_size = 0;
|
||||
|
||||
if (ethosu_is_u65(ethosu_screen)) {
|
||||
ethosu_screen->ifm_ublock.width = 2;
|
||||
ethosu_screen->ifm_ublock.height = 2;
|
||||
ethosu_screen->ifm_ublock.depth = 8;
|
||||
if (is_u65) {
|
||||
ethosu_screen->ml_device.ifm_ublock.width = 2;
|
||||
ethosu_screen->ml_device.ifm_ublock.height = 2;
|
||||
ethosu_screen->ml_device.ifm_ublock.depth = 8;
|
||||
|
||||
ethosu_screen->ofm_ublock.width = 2;
|
||||
ethosu_screen->ofm_ublock.height = 2;
|
||||
ethosu_screen->ofm_ublock.depth = 8;
|
||||
ethosu_screen->ml_device.ofm_ublock.width = 2;
|
||||
ethosu_screen->ml_device.ofm_ublock.height = 2;
|
||||
ethosu_screen->ml_device.ofm_ublock.depth = 8;
|
||||
|
||||
ethosu_screen->max_concurrent_blocks = 3;
|
||||
ethosu_screen->ml_device.max_concurrent_blocks = 3;
|
||||
} else {
|
||||
ethosu_screen->ifm_ublock.width = 4;
|
||||
ethosu_screen->ifm_ublock.height = 4;
|
||||
ethosu_screen->ifm_ublock.depth = 16;
|
||||
ethosu_screen->ml_device.ifm_ublock.width = 4;
|
||||
ethosu_screen->ml_device.ifm_ublock.height = 4;
|
||||
ethosu_screen->ml_device.ifm_ublock.depth = 16;
|
||||
|
||||
ethosu_screen->ofm_ublock.width = 4;
|
||||
ethosu_screen->ofm_ublock.height = 1;
|
||||
ethosu_screen->ofm_ublock.depth = 8;
|
||||
ethosu_screen->ml_device.ofm_ublock.width = 4;
|
||||
ethosu_screen->ml_device.ofm_ublock.height = 1;
|
||||
ethosu_screen->ml_device.ofm_ublock.depth = 8;
|
||||
|
||||
ethosu_screen->max_concurrent_blocks = 7;
|
||||
ethosu_screen->ml_device.max_concurrent_blocks = 7;
|
||||
}
|
||||
|
||||
screen->get_screen_fd = ethosu_screen_get_fd;
|
||||
|
|
@ -301,6 +308,17 @@ ethosu_ml_device_create(const char *spec)
|
|||
ethosu_debug = debug_get_option_ethosu_debug();
|
||||
|
||||
device = rzalloc(NULL, struct ethosu_ml_device);
|
||||
|
||||
device->is_u65 = true;
|
||||
device->ifm_ublock.width = 2;
|
||||
device->ifm_ublock.height = 2;
|
||||
device->ifm_ublock.depth = 8;
|
||||
device->ofm_ublock.width = 2;
|
||||
device->ofm_ublock.height = 2;
|
||||
device->ofm_ublock.depth = 8;
|
||||
device->max_concurrent_blocks = 3;
|
||||
device->sram_size = 0;
|
||||
|
||||
set_device_callbacks(device);
|
||||
|
||||
return &device->base;
|
||||
|
|
|
|||
|
|
@ -42,6 +42,13 @@ struct ethosu_block {
|
|||
|
||||
struct ethosu_ml_device {
|
||||
struct pipe_ml_device base;
|
||||
|
||||
/* Target hardware description — set from DRM query or from spec string */
|
||||
bool is_u65;
|
||||
struct ethosu_block ifm_ublock;
|
||||
struct ethosu_block ofm_ublock;
|
||||
unsigned max_concurrent_blocks;
|
||||
uint32_t sram_size;
|
||||
};
|
||||
|
||||
struct ethosu_screen {
|
||||
|
|
@ -50,9 +57,6 @@ struct ethosu_screen {
|
|||
|
||||
int fd;
|
||||
struct drm_ethosu_npu_info info;
|
||||
struct ethosu_block ifm_ublock;
|
||||
struct ethosu_block ofm_ublock;
|
||||
unsigned max_concurrent_blocks;
|
||||
};
|
||||
|
||||
static inline struct ethosu_screen *
|
||||
|
|
@ -61,20 +65,10 @@ ethosu_screen(struct pipe_screen *p)
|
|||
return (struct ethosu_screen *)p;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
ethosu_is_u65(struct ethosu_screen *e)
|
||||
static inline struct ethosu_ml_device *
|
||||
ethosu_ml_device(struct pipe_ml_device *p)
|
||||
{
|
||||
if (DBG_ENABLED(ETHOSU_DBG_FORCE_U85))
|
||||
return false;
|
||||
else
|
||||
return DRM_ETHOSU_ARCH_MAJOR(e->info.id) == 1;
|
||||
}
|
||||
|
||||
static inline struct ethosu_screen *
|
||||
ethosu_device_screen(struct pipe_ml_device *pdevice)
|
||||
{
|
||||
struct ethosu_ml_device *dev = (struct ethosu_ml_device *)pdevice;
|
||||
return container_of(dev, struct ethosu_screen, ml_device);
|
||||
return (struct ethosu_ml_device *)p;
|
||||
}
|
||||
|
||||
struct ethosu_context {
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ ml_reorder_encode_weights(struct ethosu_subgraph *subgraph,
|
|||
uint8_t **weights,
|
||||
long *weights_size)
|
||||
{
|
||||
struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device);
|
||||
struct ethosu_ml_device *device = ethosu_ml_device(subgraph->base.device);
|
||||
int bit_depth = 8;
|
||||
bool is_sparse = false;
|
||||
EthosUTraversal traversal;
|
||||
|
|
@ -88,15 +88,15 @@ ml_reorder_encode_weights(struct ethosu_subgraph *subgraph,
|
|||
|
||||
WeightSourceCommon *source;
|
||||
|
||||
if (ethosu_is_u65(screen)) {
|
||||
if (device->is_u65) {
|
||||
if (operation->kernel.is_signed) {
|
||||
source = new EthosUWeightOrdering<int8_t>(1, dilation,
|
||||
operation->block_config.ofm_block.depth, bit_depth, screen->ofm_ublock.depth,
|
||||
screen->ifm_ublock.depth, transform_func, ¶m, traversal);
|
||||
operation->block_config.ofm_block.depth, bit_depth, device->ofm_ublock.depth,
|
||||
device->ifm_ublock.depth, transform_func, ¶m, traversal);
|
||||
} else {
|
||||
source = new EthosUWeightOrdering<uint8_t>(1, dilation,
|
||||
operation->block_config.ofm_block.depth, bit_depth, screen->ofm_ublock.depth,
|
||||
screen->ifm_ublock.depth, transform_func, ¶m, traversal);
|
||||
operation->block_config.ofm_block.depth, bit_depth, device->ofm_ublock.depth,
|
||||
device->ifm_ublock.depth, transform_func, ¶m, traversal);
|
||||
}
|
||||
} else {
|
||||
if (operation->kernel.is_signed) {
|
||||
|
|
|
|||
|
|
@ -201,7 +201,7 @@ ethosu_lower_concatenation(struct ethosu_subgraph *subgraph,
|
|||
{
|
||||
operation->type = ETHOSU_OPERATION_TYPE_POOLING;
|
||||
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) {
|
||||
if (ethosu_ml_device(subgraph->base.device)->is_u65) {
|
||||
operation->pooling.type = ETHOSU_POOLING_TYPE_AVG;
|
||||
operation->round_mode = ETHOSU_ROUNDING_NATURAL;
|
||||
} else
|
||||
|
|
@ -432,7 +432,7 @@ ethosu_lower_graph(struct ethosu_subgraph *subgraph,
|
|||
}
|
||||
|
||||
if (operation.conv.scales.size + operation.conv.weights.size <=
|
||||
ethosu_device_screen(subgraph->base.device)->info.sram_size) {
|
||||
ethosu_ml_device(subgraph->base.device)->sram_size) {
|
||||
struct ethosu_operation dma_operation = {0};
|
||||
ethosu_lower_dma(subgraph, &poperations[i], &operation, &dma_operation);
|
||||
|
||||
|
|
|
|||
|
|
@ -281,7 +281,8 @@ static void
|
|||
prepare_for_submission(struct ethosu_subgraph *subgraph,
|
||||
struct pipe_context *pcontext)
|
||||
{
|
||||
struct ethosu_screen *screen = ethosu_screen(pcontext->screen);
|
||||
subgraph->screen = ethosu_screen(pcontext->screen);
|
||||
struct ethosu_screen *screen = subgraph->screen;
|
||||
uint64_t cmdstream_size = (subgraph->cursor - subgraph->cmdstream) *
|
||||
sizeof(*subgraph->cursor);
|
||||
|
||||
|
|
@ -370,7 +371,7 @@ ethosu_ml_subgraph_invoke(struct pipe_context *pcontext,
|
|||
job.region_bo_handles[COEFS_REGION] = ethosu_resource(subgraph->coefs_rsrc)->handle;
|
||||
if (!DBG_ENABLED(ETHOSU_DBG_DISABLE_SRAM)) {
|
||||
job.region_bo_handles[SCRATCH_REGION] = 0;
|
||||
job.sram_size = screen->info.sram_size;
|
||||
job.sram_size = ethosu_ml_device(subgraph->base.device)->sram_size;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -434,7 +435,7 @@ ethosu_ml_subgraph_destroy(struct pipe_ml_device *pdevice,
|
|||
|
||||
if (subgraph->io_rsrc) {
|
||||
/* Post-submission state: cleanup DRM resources */
|
||||
struct ethosu_screen *screen = ethosu_device_screen(pdevice);
|
||||
struct ethosu_screen *screen = subgraph->screen;
|
||||
struct drm_gem_close arg = {0};
|
||||
int ret;
|
||||
|
||||
|
|
|
|||
|
|
@ -201,6 +201,8 @@ struct ethosu_tensor {
|
|||
struct ethosu_subgraph {
|
||||
struct pipe_ml_subgraph base;
|
||||
|
||||
struct ethosu_screen *screen; /* Set during prepare_for_submission */
|
||||
|
||||
struct util_dynarray operations; /* ethosu_operation */
|
||||
struct util_dynarray tensors; /* ethosu_tensor */
|
||||
|
||||
|
|
|
|||
|
|
@ -15,16 +15,16 @@ required_input_size(int value, int stride, int border)
|
|||
static struct ethosu_block
|
||||
_get_ifm_blocksize(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, struct ethosu_block ofm_block)
|
||||
{
|
||||
struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device);
|
||||
struct ethosu_ml_device *device = ethosu_ml_device(subgraph->base.device);
|
||||
struct ethosu_block ifm_block = {0};
|
||||
|
||||
// IFM block height
|
||||
int h = required_input_size(ofm_block.height, operation->kernel.stride_y, MIN2(operation->kernel.height, SUB_KERNEL_MAX.height));
|
||||
h = align(h, screen->ofm_ublock.height);
|
||||
h = align(h, device->ofm_ublock.height);
|
||||
|
||||
// IFM block width
|
||||
int w = required_input_size(ofm_block.width, operation->kernel.stride_x, MIN2(operation->kernel.width, SUB_KERNEL_MAX.width));
|
||||
w = align(w, screen->ofm_ublock.width);
|
||||
w = align(w, device->ofm_ublock.width);
|
||||
|
||||
ifm_block.height = h;
|
||||
ifm_block.width = w;
|
||||
|
|
@ -73,7 +73,7 @@ try_block_config(struct ethosu_operation *operation, struct ethosu_block ofm_blo
|
|||
static struct ethosu_block_config
|
||||
find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||
{
|
||||
struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device);
|
||||
struct ethosu_ml_device *device = ethosu_ml_device(subgraph->base.device);
|
||||
struct ethosu_block_config config = {};
|
||||
struct ethosu_block search_space = ARCH_OFM_BLOCK_MAX;
|
||||
float ofm_elements = operation->ofm.shape.width * operation->ofm.shape.height * operation->ofm.shape.depth;
|
||||
|
|
@ -89,7 +89,7 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope
|
|||
search_space.height = MIN2(search_space.height, operation->ofm.shape.height);
|
||||
search_space.depth = MIN2(search_space.depth, operation->ofm.shape.depth);
|
||||
|
||||
unsigned depth = MAX2(screen->ofm_ublock.depth, MIN2(search_space.depth, ARCH_SPLIT_DEPTH));
|
||||
unsigned depth = MAX2(device->ofm_ublock.depth, MIN2(search_space.depth, ARCH_SPLIT_DEPTH));
|
||||
|
||||
bool is_part_kernel = false;
|
||||
if (is_convolution) {
|
||||
|
|
@ -106,16 +106,16 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope
|
|||
depth = align(depth, ARCH_SPLIT_DEPTH);
|
||||
}
|
||||
|
||||
search_space.width = align(search_space.width, screen->ofm_ublock.width);
|
||||
search_space.height = align(search_space.height, screen->ofm_ublock.height);
|
||||
search_space.depth = align(search_space.depth, screen->ofm_ublock.depth);
|
||||
search_space.width = align(search_space.width, device->ofm_ublock.width);
|
||||
search_space.height = align(search_space.height, device->ofm_ublock.height);
|
||||
search_space.depth = align(search_space.depth, device->ofm_ublock.depth);
|
||||
|
||||
while (depth <= search_space.depth) {
|
||||
bool wont_fit[search_space.height + 1][search_space.width + 1];
|
||||
memset(wont_fit, 0, sizeof(wont_fit));
|
||||
|
||||
for (unsigned height = screen->ofm_ublock.height; height <= search_space.height; height += screen->ofm_ublock.height) {
|
||||
for (unsigned width = screen->ofm_ublock.width; width <= search_space.width; width += screen->ofm_ublock.width) {
|
||||
for (unsigned height = device->ofm_ublock.height; height <= search_space.height; height += device->ofm_ublock.height) {
|
||||
for (unsigned width = device->ofm_ublock.width; width <= search_space.width; width += device->ofm_ublock.width) {
|
||||
|
||||
if (wont_fit[height][width])
|
||||
continue;
|
||||
|
|
@ -124,7 +124,7 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope
|
|||
struct ethosu_block ifm_block = _get_ifm_blocksize(subgraph, operation, ofm_block);
|
||||
|
||||
if (!is_equal_depth)
|
||||
ifm_block.depth = align(MIN2(operation->ifm.shape.depth, is_part_kernel ? 16 : 32), screen->ifm_ublock.depth);
|
||||
ifm_block.depth = align(MIN2(operation->ifm.shape.depth, is_part_kernel ? 16 : 32), device->ifm_ublock.depth);
|
||||
|
||||
// Try to fit the blocks in SHRAM
|
||||
struct ethosu_shram_layout layout = {0};
|
||||
|
|
@ -181,7 +181,7 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope
|
|||
config.ofm_block.height = height;
|
||||
config.ofm_block.width = width;
|
||||
config.ofm_block.depth = depth;
|
||||
config.ofm_ublock = screen->ofm_ublock;
|
||||
config.ofm_ublock = device->ofm_ublock;
|
||||
config.is_partkernel = is_part_kernel;
|
||||
|
||||
best_cost = relative_cost;
|
||||
|
|
@ -193,7 +193,7 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope
|
|||
}
|
||||
}
|
||||
|
||||
depth += screen->ofm_ublock.depth;
|
||||
depth += device->ofm_ublock.depth;
|
||||
if (depth < operation->ofm.shape.depth) {
|
||||
depth = align(depth, ARCH_SPLIT_DEPTH);
|
||||
}
|
||||
|
|
@ -205,9 +205,9 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope
|
|||
void
|
||||
ethosu_sched_operation(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||
{
|
||||
struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device);
|
||||
struct ethosu_ml_device *device = ethosu_ml_device(subgraph->base.device);
|
||||
|
||||
if (ethosu_is_u65(screen))
|
||||
if (device->is_u65)
|
||||
operation->block_config = find_block_config(subgraph, operation);
|
||||
else
|
||||
operation->block_config = find_block_config_u85(subgraph, operation);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue