diff --git a/src/gallium/drivers/ethosu/ethosu_cmd.c b/src/gallium/drivers/ethosu/ethosu_cmd.c index a74df5debe1..ba63a2b9ee3 100644 --- a/src/gallium/drivers/ethosu/ethosu_cmd.c +++ b/src/gallium/drivers/ethosu/ethosu_cmd.c @@ -869,6 +869,9 @@ emit_operation_code(struct ethosu_subgraph *subgraph, struct ethosu_operation *o case ETHOSU_OPERATION_TYPE_DMA: EMIT0(NPU_OP_DMA_START, 0x0); break; + default: + assert(0); + break; } } @@ -976,6 +979,8 @@ fill_memory_accesses(struct ethosu_subgraph *subgraph) { util_dynarray_foreach (&subgraph->operations, struct ethosu_operation, operation) { switch (operation->type) { + case ETHOSU_OPERATION_TYPE_NONE: + break; case ETHOSU_OPERATION_TYPE_DMA: operation->read_accesses[0].region = COEFS_REGION; operation->read_accesses[0].address = operation->dma.address; @@ -1000,6 +1005,15 @@ fill_memory_accesses(struct ethosu_subgraph *subgraph) operation->write_accesses[0].address = operation->ofm.tiles.addresses[0]; operation->write_accesses[0].size = operation->ofm.shape.height * operation->ofm.shape.width * operation->ofm.shape.depth; break; + case ETHOSU_OPERATION_TYPE_CONVOLUTION: + operation->read_accesses[2].region = operation->conv.scales.region; + operation->read_accesses[2].address = operation->conv.scales.address; + operation->read_accesses[2].size = operation->conv.scales.size; + + operation->read_accesses[3].region = operation->conv.weights.region; + operation->read_accesses[3].address = operation->conv.weights.address; + operation->read_accesses[3].size = operation->conv.weights.size; + /* fall-through */ default: operation->read_accesses[0].region = IO_REGION; operation->read_accesses[0].address = operation->ifm.tiles.addresses[0]; @@ -1009,14 +1023,6 @@ fill_memory_accesses(struct ethosu_subgraph *subgraph) operation->read_accesses[1].address = operation->ifm2.tiles.addresses[0]; operation->read_accesses[1].size = operation->ifm2.shape.height * operation->ifm2.shape.width * operation->ifm2.shape.depth; - operation->read_accesses[2].region = operation->conv.scales.region; - operation->read_accesses[2].address = operation->conv.scales.address; - operation->read_accesses[2].size = operation->conv.scales.size; - - operation->read_accesses[3].region = operation->conv.weights.region; - operation->read_accesses[3].address = operation->conv.weights.address; - operation->read_accesses[3].size = operation->conv.weights.size; - operation->write_accesses[0].region = IO_REGION; operation->write_accesses[0].address = operation->ofm.tiles.addresses[0]; operation->write_accesses[0].size = operation->ofm.shape.height * operation->ofm.shape.width * operation->ofm.shape.depth; @@ -1126,8 +1132,11 @@ calc_blockdep(struct ethosu_subgraph *subgraph, struct ethosu_operation *prev_op if (operation->ifm2.tensor == prev_op->ofm.tensor) { ifm_index = 1; } else if (operation->ifm.tensor != prev_op->ofm.tensor) { - /* Previous operation doesn't produce current operation's IFM */ - return device->max_concurrent_blocks; + if (prev_op->type == ETHOSU_OPERATION_TYPE_NONE) + return 0; + else + /* Previous operation doesn't produce current operation's IFM */ + return device->max_concurrent_blocks; } const struct ethosu_feature_map *ifm = (ifm_index == 0) ? &operation->ifm : &operation->ifm2; @@ -1200,6 +1209,16 @@ ethosu_emit_cmdstream(struct ethosu_subgraph *subgraph) struct ethosu_operation *prev_op = NULL; struct util_dynarray outstanding_dma_ops; struct util_dynarray outstanding_npu_ops; + bool has_op = false; + + util_dynarray_foreach (&subgraph->operations, struct ethosu_operation, operation) { + if (operation->type != ETHOSU_OPERATION_TYPE_NONE) { + has_op = true; + break; + } + } + if (!has_op) + return; outstanding_dma_ops = UTIL_DYNARRAY_INIT; outstanding_npu_ops = UTIL_DYNARRAY_INIT; @@ -1219,6 +1238,11 @@ ethosu_emit_cmdstream(struct ethosu_subgraph *subgraph) int npu_waits, dma_waits; + if (operation->type == ETHOSU_OPERATION_TYPE_NONE) { + prev_op = operation; + continue; + } + get_wait_dependency(subgraph, operation, &outstanding_dma_ops, &outstanding_npu_ops, &npu_waits, &dma_waits); @@ -1235,6 +1259,9 @@ ethosu_emit_cmdstream(struct ethosu_subgraph *subgraph) case ETHOSU_OPERATION_TYPE_DMA: emit_dma(subgraph, operation); break; + default: + UNREACHABLE("Unknown operation"); + break; } if (operation->type != ETHOSU_OPERATION_TYPE_DMA) { diff --git a/src/gallium/drivers/ethosu/ethosu_lower.c b/src/gallium/drivers/ethosu/ethosu_lower.c index ff20bf0f4b7..d7e84e8767e 100644 --- a/src/gallium/drivers/ethosu/ethosu_lower.c +++ b/src/gallium/drivers/ethosu/ethosu_lower.c @@ -602,6 +602,22 @@ ethosu_lower_quantize(struct ethosu_subgraph *subgraph, ethosu_sched_operation(subgraph, operation); } +static void +ethosu_lower_reshape(struct ethosu_subgraph *subgraph, + const struct pipe_ml_operation *poperation, + struct ethosu_operation *operation) +{ + operation->type = ETHOSU_OPERATION_TYPE_NONE; + + set_feature_maps(subgraph, poperation->input_tensors[0], poperation->output_tensors[0], operation); + operation->ifm.tiles.addresses[0] = ethosu_allocate_feature_map(subgraph, operation->ifm.tensor); + operation->ofm.tiles.addresses[0] = operation->ifm.tiles.addresses[0]; + + operation->ofm.tensor->offset = operation->ifm.tensor->offset; + operation->ofm.tensor->size = operation->ifm.tensor->size; + operation->ofm.tensor->layout = operation->ifm.tensor->layout; +} + static void ethosu_lower_concatenation(struct ethosu_subgraph *subgraph, const struct pipe_ml_operation *poperation, @@ -786,9 +802,11 @@ register_tensors(struct ethosu_subgraph *subgraph, if (!DBG_ENABLED(ETHOSU_DBG_DISABLE_NHCWB16)) { struct ethosu_tensor *tensor = ethosu_find_tensor(subgraph, ptensor->index); - if (tensor->shape.depth % 16 == 0 && - ethosu_find_first_consumer(poperations, count, ptensor->index)) { - tensor->layout = ETHOSU_LAYOUT_NHCWB16; + if (tensor->shape.depth % 16 == 0) { + const struct pipe_ml_operation *consumer = + ethosu_find_first_consumer(poperations, count, ptensor->index); + if (consumer && consumer->type != PIPE_ML_OPERATION_TYPE_RESHAPE) + tensor->layout = ETHOSU_LAYOUT_NHCWB16; } } } @@ -940,6 +958,12 @@ ethosu_lower_graph(struct ethosu_subgraph *subgraph, break; } + case PIPE_ML_OPERATION_TYPE_RESHAPE: { + ethosu_lower_reshape(subgraph, &poperations[i], &operation); + util_dynarray_append(&subgraph->operations, operation); + break; + } + default: DBG("poperation->type %d\n", poperations[i].type); UNREACHABLE("Unsupported ML operation type"); diff --git a/src/gallium/drivers/ethosu/ethosu_ml.c b/src/gallium/drivers/ethosu/ethosu_ml.c index 551ab7b0a49..a227e35505e 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.c +++ b/src/gallium/drivers/ethosu/ethosu_ml.c @@ -150,6 +150,7 @@ ethosu_ml_operation_supported(struct pipe_ml_device *pdevice, case PIPE_ML_OPERATION_TYPE_HSWISH: case PIPE_ML_OPERATION_TYPE_LEAKY_RELU: case PIPE_ML_OPERATION_TYPE_QUANTIZE: + case PIPE_ML_OPERATION_TYPE_RESHAPE: supported = true; break; case PIPE_ML_OPERATION_TYPE_RESIZE: { @@ -270,6 +271,7 @@ static void prepare_for_submission(struct ethosu_subgraph *subgraph, struct pipe_context *pcontext) { + int ret; subgraph->screen = ethosu_screen(pcontext->screen); struct ethosu_screen *screen = subgraph->screen; uint64_t cmdstream_size = (subgraph->cursor - subgraph->cmdstream) * @@ -279,19 +281,21 @@ prepare_for_submission(struct ethosu_subgraph *subgraph, ethosu_dump_buffer((uint8_t *)subgraph->cmdstream, "cmdstream", 0, 0, 0, cmdstream_size); - struct drm_ethosu_cmdstream_bo_create cmd_bo_create = { - .size = cmdstream_size, - .data = (uintptr_t)subgraph->cmdstream, - }; + if (cmdstream_size) { + struct drm_ethosu_cmdstream_bo_create cmd_bo_create = { + .size = cmdstream_size, + .data = (uintptr_t)subgraph->cmdstream, + }; - int ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_CMDSTREAM_BO_CREATE, - &cmd_bo_create); - assert(ret == 0); + ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_CMDSTREAM_BO_CREATE, + &cmd_bo_create); + assert(ret == 0); - free(subgraph->cmdstream); - subgraph->cmdstream = NULL; + free(subgraph->cmdstream); + subgraph->cmdstream = NULL; - subgraph->cmdstream_bo = cmd_bo_create.handle; + subgraph->cmdstream_bo = cmd_bo_create.handle; + } DBG("subgraph->coefs_used %d\n", subgraph->coefs_used); if (subgraph->coefs_used > 0) { @@ -424,6 +428,9 @@ ethosu_ml_subgraph_invoke(struct pipe_context *pcontext, pipe_buffer_unmap(pcontext, transfer_in); } + if (!subgraph->cmdstream_bo) + return; + job.cmd_bo = subgraph->cmdstream_bo; if (subgraph->coefs_rsrc) { @@ -501,9 +508,11 @@ ethosu_ml_subgraph_destroy(struct pipe_ml_device *pdevice, pipe_resource_reference(&subgraph->io_rsrc, NULL); pipe_resource_reference(&subgraph->coefs_rsrc, NULL); - arg.handle = subgraph->cmdstream_bo; - ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg); - assert(ret >= 0); + if (subgraph->cmdstream_bo) { + arg.handle = subgraph->cmdstream_bo; + ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg); + assert(ret >= 0); + } } else { /* Pre-submission state: cleanup raw buffers */ free(subgraph->cmdstream); diff --git a/src/gallium/drivers/ethosu/ethosu_ml.h b/src/gallium/drivers/ethosu/ethosu_ml.h index fcaf45003dc..36b26aebb8e 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.h +++ b/src/gallium/drivers/ethosu/ethosu_ml.h @@ -39,6 +39,7 @@ extern struct ethosu_block SUB_KERNEL_MAX; #define LUT_REGION 0x103 // Internal SHRAM enum ethosu_operation_type { + ETHOSU_OPERATION_TYPE_NONE, ETHOSU_OPERATION_TYPE_CONVOLUTION, ETHOSU_OPERATION_TYPE_POOLING, ETHOSU_OPERATION_TYPE_ELTWISE,