ethosu: Add reshape operation

A reshape operation just changes the dimensions of a tensor, but doesn't
change the data at all. So we just point the OFM to the IFM data and
we're done.

Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39975>
This commit is contained in:
Rob Herring (Arm) 2026-03-11 09:57:49 -05:00 committed by Marge Bot
parent 08d93a60f5
commit dce4b0313a
4 changed files with 87 additions and 26 deletions

View file

@ -869,6 +869,9 @@ emit_operation_code(struct ethosu_subgraph *subgraph, struct ethosu_operation *o
case ETHOSU_OPERATION_TYPE_DMA:
EMIT0(NPU_OP_DMA_START, 0x0);
break;
default:
assert(0);
break;
}
}
@ -976,6 +979,8 @@ fill_memory_accesses(struct ethosu_subgraph *subgraph)
{
util_dynarray_foreach (&subgraph->operations, struct ethosu_operation, operation) {
switch (operation->type) {
case ETHOSU_OPERATION_TYPE_NONE:
break;
case ETHOSU_OPERATION_TYPE_DMA:
operation->read_accesses[0].region = COEFS_REGION;
operation->read_accesses[0].address = operation->dma.address;
@ -1000,6 +1005,15 @@ fill_memory_accesses(struct ethosu_subgraph *subgraph)
operation->write_accesses[0].address = operation->ofm.tiles.addresses[0];
operation->write_accesses[0].size = operation->ofm.shape.height * operation->ofm.shape.width * operation->ofm.shape.depth;
break;
case ETHOSU_OPERATION_TYPE_CONVOLUTION:
operation->read_accesses[2].region = operation->conv.scales.region;
operation->read_accesses[2].address = operation->conv.scales.address;
operation->read_accesses[2].size = operation->conv.scales.size;
operation->read_accesses[3].region = operation->conv.weights.region;
operation->read_accesses[3].address = operation->conv.weights.address;
operation->read_accesses[3].size = operation->conv.weights.size;
/* fall-through */
default:
operation->read_accesses[0].region = IO_REGION;
operation->read_accesses[0].address = operation->ifm.tiles.addresses[0];
@ -1009,14 +1023,6 @@ fill_memory_accesses(struct ethosu_subgraph *subgraph)
operation->read_accesses[1].address = operation->ifm2.tiles.addresses[0];
operation->read_accesses[1].size = operation->ifm2.shape.height * operation->ifm2.shape.width * operation->ifm2.shape.depth;
operation->read_accesses[2].region = operation->conv.scales.region;
operation->read_accesses[2].address = operation->conv.scales.address;
operation->read_accesses[2].size = operation->conv.scales.size;
operation->read_accesses[3].region = operation->conv.weights.region;
operation->read_accesses[3].address = operation->conv.weights.address;
operation->read_accesses[3].size = operation->conv.weights.size;
operation->write_accesses[0].region = IO_REGION;
operation->write_accesses[0].address = operation->ofm.tiles.addresses[0];
operation->write_accesses[0].size = operation->ofm.shape.height * operation->ofm.shape.width * operation->ofm.shape.depth;
@ -1126,8 +1132,11 @@ calc_blockdep(struct ethosu_subgraph *subgraph, struct ethosu_operation *prev_op
if (operation->ifm2.tensor == prev_op->ofm.tensor) {
ifm_index = 1;
} else if (operation->ifm.tensor != prev_op->ofm.tensor) {
/* Previous operation doesn't produce current operation's IFM */
return device->max_concurrent_blocks;
if (prev_op->type == ETHOSU_OPERATION_TYPE_NONE)
return 0;
else
/* Previous operation doesn't produce current operation's IFM */
return device->max_concurrent_blocks;
}
const struct ethosu_feature_map *ifm = (ifm_index == 0) ? &operation->ifm : &operation->ifm2;
@ -1200,6 +1209,16 @@ ethosu_emit_cmdstream(struct ethosu_subgraph *subgraph)
struct ethosu_operation *prev_op = NULL;
struct util_dynarray outstanding_dma_ops;
struct util_dynarray outstanding_npu_ops;
bool has_op = false;
util_dynarray_foreach (&subgraph->operations, struct ethosu_operation, operation) {
if (operation->type != ETHOSU_OPERATION_TYPE_NONE) {
has_op = true;
break;
}
}
if (!has_op)
return;
outstanding_dma_ops = UTIL_DYNARRAY_INIT;
outstanding_npu_ops = UTIL_DYNARRAY_INIT;
@ -1219,6 +1238,11 @@ ethosu_emit_cmdstream(struct ethosu_subgraph *subgraph)
int npu_waits, dma_waits;
if (operation->type == ETHOSU_OPERATION_TYPE_NONE) {
prev_op = operation;
continue;
}
get_wait_dependency(subgraph, operation, &outstanding_dma_ops, &outstanding_npu_ops,
&npu_waits, &dma_waits);
@ -1235,6 +1259,9 @@ ethosu_emit_cmdstream(struct ethosu_subgraph *subgraph)
case ETHOSU_OPERATION_TYPE_DMA:
emit_dma(subgraph, operation);
break;
default:
UNREACHABLE("Unknown operation");
break;
}
if (operation->type != ETHOSU_OPERATION_TYPE_DMA) {

View file

@ -602,6 +602,22 @@ ethosu_lower_quantize(struct ethosu_subgraph *subgraph,
ethosu_sched_operation(subgraph, operation);
}
static void
ethosu_lower_reshape(struct ethosu_subgraph *subgraph,
const struct pipe_ml_operation *poperation,
struct ethosu_operation *operation)
{
operation->type = ETHOSU_OPERATION_TYPE_NONE;
set_feature_maps(subgraph, poperation->input_tensors[0], poperation->output_tensors[0], operation);
operation->ifm.tiles.addresses[0] = ethosu_allocate_feature_map(subgraph, operation->ifm.tensor);
operation->ofm.tiles.addresses[0] = operation->ifm.tiles.addresses[0];
operation->ofm.tensor->offset = operation->ifm.tensor->offset;
operation->ofm.tensor->size = operation->ifm.tensor->size;
operation->ofm.tensor->layout = operation->ifm.tensor->layout;
}
static void
ethosu_lower_concatenation(struct ethosu_subgraph *subgraph,
const struct pipe_ml_operation *poperation,
@ -786,9 +802,11 @@ register_tensors(struct ethosu_subgraph *subgraph,
if (!DBG_ENABLED(ETHOSU_DBG_DISABLE_NHCWB16)) {
struct ethosu_tensor *tensor = ethosu_find_tensor(subgraph, ptensor->index);
if (tensor->shape.depth % 16 == 0 &&
ethosu_find_first_consumer(poperations, count, ptensor->index)) {
tensor->layout = ETHOSU_LAYOUT_NHCWB16;
if (tensor->shape.depth % 16 == 0) {
const struct pipe_ml_operation *consumer =
ethosu_find_first_consumer(poperations, count, ptensor->index);
if (consumer && consumer->type != PIPE_ML_OPERATION_TYPE_RESHAPE)
tensor->layout = ETHOSU_LAYOUT_NHCWB16;
}
}
}
@ -940,6 +958,12 @@ ethosu_lower_graph(struct ethosu_subgraph *subgraph,
break;
}
case PIPE_ML_OPERATION_TYPE_RESHAPE: {
ethosu_lower_reshape(subgraph, &poperations[i], &operation);
util_dynarray_append(&subgraph->operations, operation);
break;
}
default:
DBG("poperation->type %d\n", poperations[i].type);
UNREACHABLE("Unsupported ML operation type");

View file

@ -150,6 +150,7 @@ ethosu_ml_operation_supported(struct pipe_ml_device *pdevice,
case PIPE_ML_OPERATION_TYPE_HSWISH:
case PIPE_ML_OPERATION_TYPE_LEAKY_RELU:
case PIPE_ML_OPERATION_TYPE_QUANTIZE:
case PIPE_ML_OPERATION_TYPE_RESHAPE:
supported = true;
break;
case PIPE_ML_OPERATION_TYPE_RESIZE: {
@ -270,6 +271,7 @@ static void
prepare_for_submission(struct ethosu_subgraph *subgraph,
struct pipe_context *pcontext)
{
int ret;
subgraph->screen = ethosu_screen(pcontext->screen);
struct ethosu_screen *screen = subgraph->screen;
uint64_t cmdstream_size = (subgraph->cursor - subgraph->cmdstream) *
@ -279,19 +281,21 @@ prepare_for_submission(struct ethosu_subgraph *subgraph,
ethosu_dump_buffer((uint8_t *)subgraph->cmdstream, "cmdstream", 0, 0, 0,
cmdstream_size);
struct drm_ethosu_cmdstream_bo_create cmd_bo_create = {
.size = cmdstream_size,
.data = (uintptr_t)subgraph->cmdstream,
};
if (cmdstream_size) {
struct drm_ethosu_cmdstream_bo_create cmd_bo_create = {
.size = cmdstream_size,
.data = (uintptr_t)subgraph->cmdstream,
};
int ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_CMDSTREAM_BO_CREATE,
&cmd_bo_create);
assert(ret == 0);
ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_CMDSTREAM_BO_CREATE,
&cmd_bo_create);
assert(ret == 0);
free(subgraph->cmdstream);
subgraph->cmdstream = NULL;
free(subgraph->cmdstream);
subgraph->cmdstream = NULL;
subgraph->cmdstream_bo = cmd_bo_create.handle;
subgraph->cmdstream_bo = cmd_bo_create.handle;
}
DBG("subgraph->coefs_used %d\n", subgraph->coefs_used);
if (subgraph->coefs_used > 0) {
@ -424,6 +428,9 @@ ethosu_ml_subgraph_invoke(struct pipe_context *pcontext,
pipe_buffer_unmap(pcontext, transfer_in);
}
if (!subgraph->cmdstream_bo)
return;
job.cmd_bo = subgraph->cmdstream_bo;
if (subgraph->coefs_rsrc) {
@ -501,9 +508,11 @@ ethosu_ml_subgraph_destroy(struct pipe_ml_device *pdevice,
pipe_resource_reference(&subgraph->io_rsrc, NULL);
pipe_resource_reference(&subgraph->coefs_rsrc, NULL);
arg.handle = subgraph->cmdstream_bo;
ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg);
assert(ret >= 0);
if (subgraph->cmdstream_bo) {
arg.handle = subgraph->cmdstream_bo;
ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg);
assert(ret >= 0);
}
} else {
/* Pre-submission state: cleanup raw buffers */
free(subgraph->cmdstream);

View file

@ -39,6 +39,7 @@ extern struct ethosu_block SUB_KERNEL_MAX;
#define LUT_REGION 0x103 // Internal SHRAM
enum ethosu_operation_type {
ETHOSU_OPERATION_TYPE_NONE,
ETHOSU_OPERATION_TYPE_CONVOLUTION,
ETHOSU_OPERATION_TYPE_POOLING,
ETHOSU_OPERATION_TYPE_ELTWISE,