diff --git a/src/gallium/drivers/ethosu/ethosu_cmd.c b/src/gallium/drivers/ethosu/ethosu_cmd.c index 988567787e0..41bef2f5dd5 100644 --- a/src/gallium/drivers/ethosu/ethosu_cmd.c +++ b/src/gallium/drivers/ethosu/ethosu_cmd.c @@ -192,7 +192,7 @@ emit_ifm_precision(struct ethosu_subgraph *subgraph, if (feature_map->is_signed) prec |= NPU_SET_IFM_PRECISION_ACTIVATION(1); // signed activation - if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) + if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) prec |= NPU_SET_IFM_PRECISION_SCALE_MODE(op_to_scale); EMIT0(precision_cmd, prec); @@ -222,13 +222,13 @@ emit_ofm(struct ethosu_subgraph *subgraph, struct ethosu_feature_map *feature_ma EMIT0(NPU_SET_OFM_HEIGHT_M1, feature_map->shape.height - 1); EMIT0(NPU_SET_OFM_WIDTH_M1, feature_map->shape.width - 1); - if (!ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) + if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) EMIT0(NPU_SET_OFM_DEPTH_M1, feature_map->shape.depth - 1); emit_tiles( subgraph, feature_map, NPU_SET_OFM_HEIGHT0_M1, NPU_SET_OFM_HEIGHT1_M1, NPU_SET_OFM_WIDTH0_M1); - if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) + if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) EMIT0(NPU_SET_OFM_DEPTH_M1, feature_map->shape.depth - 1); emit_strides(subgraph, feature_map, NPU_SET_OFM_STRIDE_C, NPU_SET_OFM_STRIDE_Y, NPU_SET_OFM_STRIDE_X); @@ -277,7 +277,7 @@ emit_kernel(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation static void emit_weights(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation) { - if (!ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) + if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) EMIT0(NPU_SET_WEIGHT_FORMAT, 0x0); EMIT0(NPU_SET_WEIGHT_REGION, operation->conv.weights.region); @@ -378,22 +378,22 @@ emit_acc_format(struct ethosu_subgraph *subgraph, struct ethosu_operation *opera static void emit_common(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, enum ethosu_op_to_scale op_to_scale) { - if (!ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) + if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) emit_ifm_precision(subgraph, &operation->ifm, op_to_scale, NPU_SET_IFM_PRECISION); emit_ifm(subgraph, &operation->ifm); - if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) + if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) emit_ifm_precision(subgraph, &operation->ifm, op_to_scale, NPU_SET_IFM_PRECISION); EMIT0(NPU_SET_IFM_UPSCALE, operation->upscale); if (operation->type != ETHOSU_OPERATION_TYPE_ELTWISE) emit_padding(subgraph, operation); - if (!ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) + if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) emit_ofm_precision(subgraph, operation); emit_ofm(subgraph, &operation->ofm); - if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) + if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) emit_ofm_precision(subgraph, operation); if (operation->type != ETHOSU_OPERATION_TYPE_ELTWISE) @@ -410,7 +410,7 @@ emit_common(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation static void emit_convolution(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation) { - if (!ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) + if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_SHIFT(operation->conv.shift), operation->conv.scale); operation->ifm.tiles.addresses[0] = ethosu_allocate_feature_map(subgraph, operation->ifm.tensor_idx); @@ -426,7 +426,7 @@ emit_convolution(struct ethosu_subgraph *subgraph, struct ethosu_operation *oper emit_common(subgraph, operation, false); emit_block_config(subgraph, operation); - if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) + if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) emit_shram_registers(subgraph, operation); else emit_acc_format(subgraph, operation); @@ -500,7 +500,7 @@ emit_pooling(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio switch (operation->pooling.type) { case ETHOSU_POOLING_TYPE_MAX: { - if (!ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) { + if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) { EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_ROUND_MODE(1), 1); break; } else @@ -533,7 +533,7 @@ emit_pooling(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio } emit_block_config(subgraph, operation); - if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) + if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) emit_shram_registers(subgraph, operation); else emit_acc_format(subgraph, operation); @@ -564,7 +564,7 @@ static void emit_ifm2(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, bool has_scalar) { if (has_scalar) { - if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) + if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) EMIT0(NPU_SET_IFM2_SCALAR, operation->ifm2.scalar); else { emit_ifm2_precision(subgraph, operation, true); @@ -612,7 +612,7 @@ emit_ifm2_broadcast(struct ethosu_subgraph *subgraph, struct ethosu_operation *o { unsigned ifm2_broadcast = 0; - if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) { + if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) { ifm2_broadcast |= NPU_SET_IFM2_BROADCAST_OPERAND_ORDER(operation->eltwise.ifm_reversed); if (has_scalar) { @@ -787,7 +787,7 @@ emit_eltwise(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio bool has_scalar = operation->ifm2.scalar != 0; enum ethosu_op_to_scale op_to_scale; - if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) { + if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) { op_to_scale = eltwise_emit_ofm_scaling( subgraph, operation->ifm.scale, @@ -812,7 +812,7 @@ emit_eltwise(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio emit_ifm2(subgraph, operation, has_scalar); - if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) + if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) emit_ifm_precision(subgraph, &operation->ifm2, OP_NONE, NPU_SET_IFM2_PRECISION); else emit_ifm2_precision(subgraph, operation, has_scalar); @@ -820,7 +820,7 @@ emit_eltwise(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio emit_ifm2_broadcast(subgraph, operation, has_scalar); emit_block_config(subgraph, operation); - if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) + if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) emit_shram_registers(subgraph, operation); else emit_acc_format(subgraph, operation); @@ -1090,7 +1090,7 @@ get_jobs(const struct ethosu_block *area, static unsigned calc_blockdep(struct ethosu_subgraph *subgraph, struct ethosu_operation *prev_op, struct ethosu_operation *operation) { - struct ethosu_screen *screen = ethosu_screen(subgraph->base.context->screen); + struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device); if (!prev_op) return 0; @@ -1187,7 +1187,7 @@ ethosu_emit_cmdstream(struct ethosu_subgraph *subgraph) /* Compile */ - if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) + if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) EMIT0(NPU_SET_PARALLEL_MODE, 0x0); util_dynarray_foreach (&subgraph->operations, struct ethosu_operation, operation) { diff --git a/src/gallium/drivers/ethosu/ethosu_coefs.c b/src/gallium/drivers/ethosu/ethosu_coefs.c index c52782fb41a..db94314fab3 100644 --- a/src/gallium/drivers/ethosu/ethosu_coefs.c +++ b/src/gallium/drivers/ethosu/ethosu_coefs.c @@ -63,7 +63,7 @@ fill_scale_and_biases(struct ethosu_subgraph *subgraph, struct ethosu_operation /* U65 packs 10-byte bias/scale entries contiguously then aligns to 16. * U85 scales are read in groups of 16 channels, so pad depth to a * 16-channel boundary first, then multiply by 10 bytes per entry. */ - if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) + if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) *scales_size = align(operation->ofm.shape.depth * 10, 16); else *scales_size = align(operation->ofm.shape.depth, 16) * 10; @@ -87,7 +87,7 @@ fill_scale_and_biases(struct ethosu_subgraph *subgraph, struct ethosu_operation uint32_t shift; int scale = ethosu_quantize_scale(conv_scale, &shift); - if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) + if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) encode_bias_scale_u65( biases[i], scale, shift, &(*scales)[idx]); else diff --git a/src/gallium/drivers/ethosu/ethosu_device.c b/src/gallium/drivers/ethosu/ethosu_device.c index 5bbd6eeb512..09a3c8ca880 100644 --- a/src/gallium/drivers/ethosu/ethosu_device.c +++ b/src/gallium/drivers/ethosu/ethosu_device.c @@ -5,10 +5,12 @@ #include "ethosu_device.h" #include "ethosu_ml.h" +#include "ethosu_public.h" #include "drm-uapi/ethosu_accel.h" #include +#include #include "util/os_mman.h" #include "util/u_inlines.h" #include "util/u_surface.h" @@ -113,11 +115,8 @@ ethosu_create_context(struct pipe_screen *screen, pctx->buffer_subdata = u_default_buffer_subdata; pctx->clear_buffer = u_default_clear_buffer; - pctx->ml_operation_supported = ethosu_ml_operation_supported; - pctx->ml_subgraph_create = ethosu_ml_subgraph_create; pctx->ml_subgraph_invoke = ethosu_ml_subgraph_invoke; pctx->ml_subgraph_read_output = ethosu_ml_subgraph_read_outputs; - pctx->ml_subgraph_destroy = ethosu_ml_subgraph_destroy; return pctx; } @@ -217,6 +216,23 @@ dev_query(struct ethosu_screen *screen) assert(ret != -1); } +static struct pipe_ml_device * +ethosu_ml_device_create_accel(struct pipe_screen *pscreen) +{ + struct ethosu_screen *screen = ethosu_screen(pscreen); + + return &screen->ml_device.base; +} + +static void +set_device_callbacks(struct ethosu_ml_device *device) +{ + device->base.ml_operation_supported = ethosu_ml_operation_supported; + device->base.ml_subgraph_create = ethosu_ml_subgraph_create; + device->base.ml_subgraph_serialize = ethosu_ml_subgraph_serialize; + device->base.ml_subgraph_destroy = ethosu_ml_subgraph_destroy; +} + struct pipe_screen * ethosu_screen_create(int fd, const struct pipe_screen_config *config, @@ -266,6 +282,26 @@ ethosu_screen_create(int fd, screen->context_create = ethosu_create_context; screen->resource_create = ethosu_resource_create; screen->resource_destroy = ethosu_resource_destroy; + screen->get_ml_device = ethosu_ml_device_create_accel; + + ethosu_screen->ml_device.base.id = "ethosu-65-256"; + set_device_callbacks(ðosu_screen->ml_device); return screen; +} + +struct pipe_ml_device * +ethosu_ml_device_create(const char *spec) +{ + struct ethosu_ml_device *device = NULL; + + if (strcmp(spec, "65-256") != 0) + return NULL; + + ethosu_debug = debug_get_option_ethosu_debug(); + + device = rzalloc(NULL, struct ethosu_ml_device); + set_device_callbacks(device); + + return &device->base; } \ No newline at end of file diff --git a/src/gallium/drivers/ethosu/ethosu_device.h b/src/gallium/drivers/ethosu/ethosu_device.h index 4f13aaba917..e33490984a5 100644 --- a/src/gallium/drivers/ethosu/ethosu_device.h +++ b/src/gallium/drivers/ethosu/ethosu_device.h @@ -40,8 +40,13 @@ struct ethosu_block { unsigned depth; }; +struct ethosu_ml_device { + struct pipe_ml_device base; +}; + struct ethosu_screen { struct pipe_screen pscreen; + struct ethosu_ml_device ml_device; int fd; struct drm_ethosu_npu_info info; @@ -65,6 +70,13 @@ ethosu_is_u65(struct ethosu_screen *e) return DRM_ETHOSU_ARCH_MAJOR(e->info.id) == 1; } +static inline struct ethosu_screen * +ethosu_device_screen(struct pipe_ml_device *pdevice) +{ + struct ethosu_ml_device *dev = (struct ethosu_ml_device *)pdevice; + return container_of(dev, struct ethosu_screen, ml_device); +} + struct ethosu_context { struct pipe_context base; }; diff --git a/src/gallium/drivers/ethosu/ethosu_encode.cpp b/src/gallium/drivers/ethosu/ethosu_encode.cpp index f6d1d963fdd..6b9cfa083a6 100644 --- a/src/gallium/drivers/ethosu/ethosu_encode.cpp +++ b/src/gallium/drivers/ethosu/ethosu_encode.cpp @@ -58,7 +58,7 @@ ml_reorder_encode_weights(struct ethosu_subgraph *subgraph, uint8_t **weights, long *weights_size) { - struct ethosu_screen *screen = ethosu_screen(subgraph->base.context->screen); + struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device); int bit_depth = 8; bool is_sparse = false; EthosUTraversal traversal; diff --git a/src/gallium/drivers/ethosu/ethosu_lower.c b/src/gallium/drivers/ethosu/ethosu_lower.c index f1f5485fd00..209dc6c6a09 100644 --- a/src/gallium/drivers/ethosu/ethosu_lower.c +++ b/src/gallium/drivers/ethosu/ethosu_lower.c @@ -201,7 +201,7 @@ ethosu_lower_concatenation(struct ethosu_subgraph *subgraph, { operation->type = ETHOSU_OPERATION_TYPE_POOLING; - if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) { + if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) { operation->pooling.type = ETHOSU_POOLING_TYPE_AVG; operation->round_mode = ETHOSU_ROUNDING_NATURAL; } else @@ -432,7 +432,7 @@ ethosu_lower_graph(struct ethosu_subgraph *subgraph, } if (operation.conv.scales.size + operation.conv.weights.size <= - ethosu_screen(subgraph->base.context->screen)->info.sram_size) { + ethosu_device_screen(subgraph->base.device)->info.sram_size) { struct ethosu_operation dma_operation = {0}; ethosu_lower_dma(subgraph, &poperations[i], &operation, &dma_operation); diff --git a/src/gallium/drivers/ethosu/ethosu_ml.c b/src/gallium/drivers/ethosu/ethosu_ml.c index 47b59efefd8..37167982a01 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.c +++ b/src/gallium/drivers/ethosu/ethosu_ml.c @@ -136,7 +136,7 @@ ethosu_quantize_scale(double scale, uint32_t *shift) } bool -ethosu_ml_operation_supported(struct pipe_context *pcontext, +ethosu_ml_operation_supported(struct pipe_ml_device *pdevice, const struct pipe_ml_operation *operation) { bool supported = false; @@ -183,16 +183,14 @@ ethosu_ml_operation_supported(struct pipe_context *pcontext, } struct pipe_ml_subgraph * -ethosu_ml_subgraph_create(struct pipe_context *pcontext, +ethosu_ml_subgraph_create(struct pipe_ml_device *pdevice, const struct pipe_ml_operation *poperations, unsigned count) { - struct pipe_screen *pscreen = pcontext->screen; - struct ethosu_screen *screen = ethosu_screen(pscreen); struct ethosu_subgraph *subgraph; subgraph = calloc(1, sizeof(*subgraph)); - subgraph->base.context = pcontext; + subgraph->base.device = pdevice; subgraph->tensors = UTIL_DYNARRAY_INIT; subgraph->operations = UTIL_DYNARRAY_INIT; @@ -216,42 +214,120 @@ ethosu_ml_subgraph_create(struct pipe_context *pcontext, ethosu_emit_cmdstream(subgraph); + util_dynarray_foreach (&subgraph->operations, struct ethosu_operation, operation) { + free(operation->kernel.scales); + free(operation->kernel.zero_points); + } + util_dynarray_fini(&subgraph->operations); + + free(subgraph->cmd0_state); + free(subgraph->cmd1_state); + free(subgraph->cmd0_valid); + free(subgraph->cmd1_valid); + + return &subgraph->base; +} + +uint8_t * +ethosu_ml_subgraph_serialize(struct pipe_ml_device *pdevice, + struct pipe_ml_subgraph *psubgraph, + size_t *size) +{ + struct ethosu_subgraph *subgraph = (struct ethosu_subgraph *)(psubgraph); + uint64_t header_size = NUM_HEADER_FIELDS * sizeof(uint64_t); + uint64_t tensors_size = util_dynarray_num_elements(&subgraph->tensors, + struct ethosu_tensor) * NUM_TENSOR_FIELDS * sizeof(uint32_t); + uint64_t cmdstream_size = (subgraph->cursor - subgraph->cmdstream) * + sizeof(*subgraph->cursor); + uint64_t coefs_size = subgraph->coefs_used * sizeof(*subgraph->coefs); + uint64_t io_size = subgraph->io_used; + uint64_t total_size = header_size + cmdstream_size + coefs_size + + tensors_size; + uint8_t *buffer, *cursor; + + buffer = malloc(total_size); + if (!buffer) + return NULL; + + cursor = buffer; + + uint64_t *header = (uint64_t *)cursor; + header[0] = cmdstream_size; + header[1] = coefs_size; + header[2] = io_size; + header[3] = tensors_size; + cursor += header_size; + + uint32_t *tensors = (uint32_t *)cursor; + util_dynarray_foreach(&subgraph->tensors, struct ethosu_tensor, tensor) { + tensors[0] = tensor->index; + tensors[1] = tensor->offset; + tensors[2] = tensor->size; + tensors += NUM_TENSOR_FIELDS; + } + cursor += tensors_size; + + memcpy(cursor, subgraph->cmdstream, cmdstream_size); + cursor += cmdstream_size; + + if (coefs_size > 0) + memcpy(cursor, subgraph->coefs, coefs_size); + + *size = total_size; + return buffer; +} + +static void +prepare_for_submission(struct ethosu_subgraph *subgraph, + struct pipe_context *pcontext) +{ + struct ethosu_screen *screen = ethosu_screen(pcontext->screen); + uint64_t cmdstream_size = (subgraph->cursor - subgraph->cmdstream) * + sizeof(*subgraph->cursor); + + if (DBG_ENABLED(ETHOSU_DBG_DUMP_BOS)) + ethosu_dump_buffer((uint8_t *)subgraph->cmdstream, "cmdstream", 0, 0, 0, + cmdstream_size); + struct drm_ethosu_cmdstream_bo_create cmd_bo_create = { - .size = (subgraph->cursor - subgraph->cmdstream) * sizeof(*subgraph->cursor), + .size = cmdstream_size, .data = (uintptr_t)subgraph->cmdstream, }; - if (DBG_ENABLED(ETHOSU_DBG_DUMP_BOS)) - ethosu_dump_buffer((uint8_t *)subgraph->cmdstream, "cmdstream", 0, 0, 0, (subgraph->cursor - subgraph->cmdstream) * sizeof(*subgraph->cursor)); - - int ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_CMDSTREAM_BO_CREATE, &cmd_bo_create); + int ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_CMDSTREAM_BO_CREATE, + &cmd_bo_create); assert(ret == 0); free(subgraph->cmdstream); + subgraph->cmdstream = NULL; subgraph->cmdstream_bo = cmd_bo_create.handle; + DBG("subgraph->coefs_used %d\n", subgraph->coefs_used); if (subgraph->coefs_used > 0) { - subgraph->coefs_rsrc = pipe_buffer_create(pscreen, 0, PIPE_USAGE_DEFAULT, subgraph->coefs_used); - assert(subgraph->coefs_rsrc != NULL); - pipe_buffer_write(subgraph->base.context, subgraph->coefs_rsrc, 0, subgraph->coefs_used, subgraph->coefs); + subgraph->coefs_rsrc = pipe_buffer_create(pcontext->screen, 0, + PIPE_USAGE_DEFAULT, + subgraph->coefs_used); + pipe_buffer_write(pcontext, subgraph->coefs_rsrc, 0, + subgraph->coefs_used, subgraph->coefs); free(subgraph->coefs); subgraph->coefs = NULL; if (DBG_ENABLED(ETHOSU_DBG_DUMP_BOS)) { struct pipe_transfer *transfer_in; - uint8_t *buf = pipe_buffer_map(subgraph->base.context, subgraph->coefs_rsrc, + uint8_t *buf = pipe_buffer_map(pcontext, subgraph->coefs_rsrc, PIPE_MAP_READ, &transfer_in); - ethosu_dump_buffer(buf, "coefs", 0, 0, 0, pipe_buffer_size(subgraph->coefs_rsrc)); - pipe_buffer_unmap(subgraph->base.context, transfer_in); + ethosu_dump_buffer(buf, "coefs", 0, 0, 0, + pipe_buffer_size(subgraph->coefs_rsrc)); + pipe_buffer_unmap(pcontext, transfer_in); } } - subgraph->io_rsrc = pipe_buffer_create(pscreen, 0, PIPE_USAGE_DEFAULT, subgraph->io_used); - assert(subgraph->io_rsrc != NULL); - - return &subgraph->base; + DBG("subgraph->io_used %d\n", subgraph->io_used); + subgraph->io_rsrc = pipe_buffer_create(pcontext->screen, 0, + PIPE_USAGE_DEFAULT, + subgraph->io_used); } void @@ -267,6 +343,9 @@ ethosu_ml_subgraph_invoke(struct pipe_context *pcontext, struct timespec start, end; int ret; + if (subgraph->io_rsrc == NULL) + prepare_for_submission(subgraph, pcontext); + for (unsigned i = 0; i < inputs_count; i++) { struct ethosu_tensor *input = ethosu_find_tensor(subgraph, input_idxs[i]); assert(input); @@ -279,10 +358,10 @@ ethosu_ml_subgraph_invoke(struct pipe_context *pcontext, if (DBG_ENABLED(ETHOSU_DBG_DUMP_BOS)) { struct pipe_transfer *transfer_in; - uint8_t *buf = pipe_buffer_map(subgraph->base.context, subgraph->io_rsrc, + uint8_t *buf = pipe_buffer_map(pcontext, subgraph->io_rsrc, PIPE_MAP_READ, &transfer_in); ethosu_dump_buffer(buf, "io-before", 0, 0, 0, pipe_buffer_size(subgraph->io_rsrc)); - pipe_buffer_unmap(subgraph->base.context, transfer_in); + pipe_buffer_unmap(pcontext, transfer_in); } job.cmd_bo = subgraph->cmdstream_bo; @@ -313,8 +392,8 @@ ethosu_ml_subgraph_invoke(struct pipe_context *pcontext, /* Force a sync */ struct pipe_transfer *transfer_in; - pipe_buffer_map(subgraph->base.context, subgraph->io_rsrc, PIPE_MAP_READ, &transfer_in); - pipe_buffer_unmap(subgraph->base.context, transfer_in); + pipe_buffer_map(pcontext, subgraph->io_rsrc, PIPE_MAP_READ, &transfer_in); + pipe_buffer_unmap(pcontext, transfer_in); clock_gettime(CLOCK_MONOTONIC_RAW, &end); duration_ns = (long long)(end.tv_sec - start.tv_sec) * 1000000000LL + (end.tv_nsec - start.tv_nsec); @@ -337,10 +416,10 @@ ethosu_ml_subgraph_read_outputs(struct pipe_context *pcontext, if (DBG_ENABLED(ETHOSU_DBG_DUMP_BOS)) { struct pipe_transfer *transfer_in; - uint8_t *buf = pipe_buffer_map(subgraph->base.context, subgraph->io_rsrc, + uint8_t *buf = pipe_buffer_map(pcontext, subgraph->io_rsrc, PIPE_MAP_READ, &transfer_in); ethosu_dump_buffer(buf, "io-after", 0, 0, 0, pipe_buffer_size(subgraph->io_rsrc)); - pipe_buffer_unmap(subgraph->base.context, transfer_in); + pipe_buffer_unmap(pcontext, transfer_in); } pipe_buffer_read(pcontext, subgraph->io_rsrc, output->offset, output->size, outputs[i]); @@ -348,33 +427,30 @@ ethosu_ml_subgraph_read_outputs(struct pipe_context *pcontext, } void -ethosu_ml_subgraph_destroy(struct pipe_context *pcontext, +ethosu_ml_subgraph_destroy(struct pipe_ml_device *pdevice, struct pipe_ml_subgraph *psubgraph) { - int ret; - struct drm_gem_close arg = {0}; - struct ethosu_screen *screen = ethosu_screen(pcontext->screen); struct ethosu_subgraph *subgraph = (struct ethosu_subgraph *)(psubgraph); - pipe_resource_reference(&subgraph->io_rsrc, NULL); - pipe_resource_reference(&subgraph->coefs_rsrc, NULL); + if (subgraph->io_rsrc) { + /* Post-submission state: cleanup DRM resources */ + struct ethosu_screen *screen = ethosu_device_screen(pdevice); + struct drm_gem_close arg = {0}; + int ret; - arg.handle = subgraph->cmdstream_bo; - ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg); - assert(ret >= 0); + pipe_resource_reference(&subgraph->io_rsrc, NULL); + pipe_resource_reference(&subgraph->coefs_rsrc, NULL); - util_dynarray_foreach (&subgraph->operations, struct ethosu_operation, operation) { - free(operation->kernel.scales); - free(operation->kernel.zero_points); + arg.handle = subgraph->cmdstream_bo; + ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg); + assert(ret >= 0); + } else { + /* Pre-submission state: cleanup raw buffers */ + free(subgraph->cmdstream); + free(subgraph->coefs); } - util_dynarray_fini(&subgraph->operations); util_dynarray_fini(&subgraph->tensors); - free(subgraph->cmd0_state); - free(subgraph->cmd1_state); - free(subgraph->cmd0_valid); - free(subgraph->cmd1_valid); - free(subgraph); } diff --git a/src/gallium/drivers/ethosu/ethosu_ml.h b/src/gallium/drivers/ethosu/ethosu_ml.h index e1609ccb6a2..9343566c0e0 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.h +++ b/src/gallium/drivers/ethosu/ethosu_ml.h @@ -195,11 +195,14 @@ struct ethosu_tensor { enum ethosu_layout layout; }; +#define NUM_HEADER_FIELDS 4 +#define NUM_TENSOR_FIELDS 3 + struct ethosu_subgraph { struct pipe_ml_subgraph base; struct util_dynarray operations; /* ethosu_operation */ - struct util_dynarray tensors; /* ethosu_tensor* */ + struct util_dynarray tensors; /* ethosu_tensor */ unsigned cmdstream_used; uint32_t *cmdstream; @@ -221,13 +224,19 @@ struct ethosu_subgraph { }; bool -ethosu_ml_operation_supported(struct pipe_context *pcontext, const struct pipe_ml_operation *operation); +ethosu_ml_operation_supported(struct pipe_ml_device *pdevice, + const struct pipe_ml_operation *operation); struct pipe_ml_subgraph * -ethosu_ml_subgraph_create(struct pipe_context *pcontext, +ethosu_ml_subgraph_create(struct pipe_ml_device *pdevice, const struct pipe_ml_operation *poperations, unsigned count); +uint8_t * +ethosu_ml_subgraph_serialize(struct pipe_ml_device *pdevice, + struct pipe_ml_subgraph *psubgraph, + size_t *size); + void ethosu_ml_subgraph_invoke(struct pipe_context *pcontext, struct pipe_ml_subgraph *psubgraph, unsigned inputs_count, unsigned input_idxs[], @@ -239,7 +248,7 @@ void ethosu_ml_subgraph_read_outputs(struct pipe_context *pcontext, unsigned output_idxs[], void *outputs[], bool is_signed[]); -void ethosu_ml_subgraph_destroy(struct pipe_context *context, +void ethosu_ml_subgraph_destroy(struct pipe_ml_device *pdevice, struct pipe_ml_subgraph *psubgraph); unsigned ethosu_allocate_feature_map(struct ethosu_subgraph *subgraph, unsigned tensor_idx); diff --git a/src/gallium/drivers/ethosu/ethosu_public.h b/src/gallium/drivers/ethosu/ethosu_public.h new file mode 100644 index 00000000000..f8a2fe2b978 --- /dev/null +++ b/src/gallium/drivers/ethosu/ethosu_public.h @@ -0,0 +1,13 @@ +/* + * Copyright (c) 2024 Tomeu Vizoso + * SPDX-License-Identifier: MIT + */ + +#ifndef ETHOSU_PUBLIC_H +#define ETHOSU_PUBLIC_H + +struct pipe_ml_device; + +struct pipe_ml_device *ethosu_ml_device_create(const char *spec); + +#endif /* ETHOSU_PUBLIC_H */ diff --git a/src/gallium/drivers/ethosu/ethosu_sched.c b/src/gallium/drivers/ethosu/ethosu_sched.c index 18b8c18629c..6f98d695b7c 100644 --- a/src/gallium/drivers/ethosu/ethosu_sched.c +++ b/src/gallium/drivers/ethosu/ethosu_sched.c @@ -15,7 +15,7 @@ required_input_size(int value, int stride, int border) static struct ethosu_block _get_ifm_blocksize(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, struct ethosu_block ofm_block) { - struct ethosu_screen *screen = ethosu_screen(subgraph->base.context->screen); + struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device); struct ethosu_block ifm_block = {0}; // IFM block height @@ -73,7 +73,7 @@ try_block_config(struct ethosu_operation *operation, struct ethosu_block ofm_blo static struct ethosu_block_config find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation) { - struct ethosu_screen *screen = ethosu_screen(subgraph->base.context->screen); + struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device); struct ethosu_block_config config = {}; struct ethosu_block search_space = ARCH_OFM_BLOCK_MAX; float ofm_elements = operation->ofm.shape.width * operation->ofm.shape.height * operation->ofm.shape.depth; @@ -205,7 +205,7 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope void ethosu_sched_operation(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation) { - struct ethosu_screen *screen = ethosu_screen(subgraph->base.context->screen); + struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device); if (ethosu_is_u65(screen)) operation->block_config = find_block_config(subgraph, operation); diff --git a/src/gallium/drivers/etnaviv/etnaviv_context.c b/src/gallium/drivers/etnaviv/etnaviv_context.c index c24ededbdc9..c0e56ebecb1 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_context.c +++ b/src/gallium/drivers/etnaviv/etnaviv_context.c @@ -723,11 +723,8 @@ etna_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) pctx->destroy = etna_context_destroy; pctx->draw_vbo = etna_draw_vbo; - pctx->ml_operation_supported = etna_ml_operation_supported; - pctx->ml_subgraph_create = etna_ml_subgraph_create; pctx->ml_subgraph_invoke = etna_ml_subgraph_invoke; pctx->ml_subgraph_read_output = etna_ml_subgraph_read_outputs; - pctx->ml_subgraph_destroy = etna_ml_subgraph_destroy; pctx->flush = etna_context_flush; pctx->set_debug_callback = etna_set_debug_callback; pctx->create_fence_fd = etna_create_fence_fd; diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml.c b/src/gallium/drivers/etnaviv/etnaviv_ml.c index 32a63a06a50..4060a1c93e1 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml.c @@ -16,6 +16,7 @@ #include "etnaviv_ml_nn.h" #include "etnaviv_ml_tp.h" #include "etnaviv_ml.h" +#include "etnaviv_screen.h" struct etna_ml_tensor * etna_ml_get_tensor(struct etna_ml_subgraph *subgraph, unsigned idx) @@ -68,7 +69,6 @@ etna_ml_allocate_tensor(struct etna_ml_subgraph *subgraph) void etna_ml_create_tensor(struct etna_ml_subgraph *subgraph, unsigned idx, unsigned size) { - struct pipe_context *context = subgraph->base.context; struct etna_ml_tensor *tensor = etna_ml_get_tensor(subgraph, idx); assert(idx < util_dynarray_num_elements(&subgraph->tensors, struct etna_ml_tensor *)); @@ -81,7 +81,7 @@ etna_ml_create_tensor(struct etna_ml_subgraph *subgraph, unsigned idx, unsigned return; } - res = etna_ml_create_resource(context, size); + res = etna_ml_create_resource(&subgraph->screen->base, size); tensor->resource = res; tensor->size = size; @@ -99,10 +99,9 @@ etna_ml_destroy_tensor(struct etna_ml_subgraph *subgraph, unsigned idx) } struct etna_bo * -etna_ml_create_bo(struct pipe_context *pctx, size_t size) +etna_ml_create_bo(struct etna_screen *screen, size_t size) { - struct etna_context *ctx = etna_context(pctx); - struct etna_bo *bo = etna_bo_new(ctx->screen->dev, + struct etna_bo *bo = etna_bo_new(screen->dev, size, DRM_ETNA_GEM_CACHE_WC); @@ -115,9 +114,9 @@ etna_ml_create_bo(struct pipe_context *pctx, size_t size) } struct pipe_resource * -etna_ml_create_resource(struct pipe_context *pctx, size_t size) +etna_ml_create_resource(struct pipe_screen *pscreen, size_t size) { - struct pipe_resource *res = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_DEFAULT, size); + struct pipe_resource *res = pipe_buffer_create(pscreen, 0, PIPE_USAGE_DEFAULT, size); void *ptr = etna_bo_map(etna_buffer_resource(res)->bo); memset(ptr, 0, pipe_buffer_size(res)); @@ -125,9 +124,8 @@ etna_ml_create_resource(struct pipe_context *pctx, size_t size) } struct etna_core_npu_info * -etna_ml_get_core_info(struct etna_context *context) +etna_ml_get_core_info(struct etna_screen *screen) { - struct etna_screen *screen = context->screen; struct etna_core_info *info = etna_gpu_get_core_info(screen->npu); return &info->npu; } @@ -135,9 +133,7 @@ etna_ml_get_core_info(struct etna_context *context) static bool needs_reshuffle(struct etna_ml_subgraph *subgraph, const struct pipe_ml_operation *poperation) { - struct pipe_context *context = subgraph->base.context; - struct etna_context *ctx = etna_context(context); - unsigned nn_core_version = ctx->screen->specs.nn_core_version; + unsigned nn_core_version = subgraph->screen->specs.nn_core_version; bool has_stride = poperation->conv.stride_x > 1 || poperation->conv.stride_y > 1; bool pointwise = poperation->conv.pointwise; bool has_padding = poperation->conv.padding_top > 0 || @@ -651,7 +647,7 @@ tensor_quantization_supported(struct pipe_tensor *tensor) } bool -etna_ml_operation_supported(struct pipe_context *pcontext, +etna_ml_operation_supported(struct pipe_ml_device *pdevice, const struct pipe_ml_operation *operation) { bool supported = false; @@ -743,12 +739,12 @@ etna_ml_operation_supported(struct pipe_context *pcontext, } struct pipe_ml_subgraph * -etna_ml_subgraph_create(struct pipe_context *pcontext, +etna_ml_subgraph_create(struct pipe_ml_device *pdevice, const struct pipe_ml_operation *poperations, unsigned count) { - struct etna_context *ctx = etna_context(pcontext); - unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count; + struct etna_screen *screen = etna_ml_device_screen(pdevice); + unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count; struct etna_ml_subgraph *subgraph; struct list_head operations; unsigned tensor_count; @@ -763,7 +759,8 @@ etna_ml_subgraph_create(struct pipe_context *pcontext, list_inithead(&operations); - subgraph->base.context = pcontext; + subgraph->base.device = pdevice; + subgraph->screen = screen; subgraph->operations = UTIL_DYNARRAY_INIT; subgraph->tensors = UTIL_DYNARRAY_INIT; @@ -880,7 +877,7 @@ etna_ml_subgraph_invoke(struct pipe_context *pctx, struct pipe_ml_subgraph *psub bool is_signed[]) { struct etna_context *ctx = etna_context(pctx); - unsigned tp_core_count = etna_ml_get_core_info(ctx)->tp_core_count; + unsigned tp_core_count = etna_ml_get_core_info(ctx->screen)->tp_core_count; struct etna_ml_subgraph *subgraph = (struct etna_ml_subgraph *)(psubgraph); struct etna_cmd_stream *stream = ctx->stream; static bool is_initialized = false; @@ -968,10 +965,10 @@ etna_ml_subgraph_invoke(struct pipe_context *pctx, struct pipe_ml_subgraph *psub switch (operation->type) { case ETNA_JOB_TYPE_TP: - etna_ml_emit_operation_tp(subgraph, operation, i); + etna_ml_emit_operation_tp(pctx, subgraph, operation, i); break; case ETNA_JOB_TYPE_NN: - etna_ml_emit_operation_nn(subgraph, operation, i); + etna_ml_emit_operation_nn(pctx, subgraph, operation, i); break; default: UNREACHABLE("Unsupported ML operation type"); @@ -1063,7 +1060,7 @@ etna_ml_subgraph_read_outputs(struct pipe_context *context, struct pipe_ml_subgr } void -etna_ml_subgraph_destroy(struct pipe_context *context, struct pipe_ml_subgraph *psubgraph) +etna_ml_subgraph_destroy(struct pipe_ml_device *pdevice, struct pipe_ml_subgraph *psubgraph) { struct etna_ml_subgraph *subgraph = (struct etna_ml_subgraph *)(psubgraph); diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml.h b/src/gallium/drivers/etnaviv/etnaviv_ml.h index c0646456ea5..718765ba694 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml.h +++ b/src/gallium/drivers/etnaviv/etnaviv_ml.h @@ -13,6 +13,16 @@ #define MAX_CONFIG_BOS 4 +struct etna_ml_device { + struct pipe_ml_device base; +}; + +static inline struct etna_ml_device * +etna_ml_device(struct pipe_ml_device *dev) +{ + return (struct etna_ml_device *)dev; +} + enum etna_job_type { ETNA_JOB_TYPE_NN, ETNA_JOB_TYPE_TP, @@ -47,6 +57,7 @@ struct etna_ml_tensor { struct etna_ml_subgraph { struct pipe_ml_subgraph base; + struct etna_screen *screen; struct util_dynarray operations; @@ -142,18 +153,18 @@ struct pipe_resource *etna_ml_get_resource(struct etna_ml_subgraph *subgraph, un unsigned etna_ml_get_offset(struct etna_ml_subgraph *subgraph, unsigned idx); unsigned etna_ml_get_size(struct etna_ml_subgraph *subgraph, unsigned idx); -struct etna_bo *etna_ml_create_bo(struct pipe_context *pctx, size_t size); +struct etna_bo *etna_ml_create_bo(struct etna_screen *screen, size_t size); -struct pipe_resource *etna_ml_create_resource(struct pipe_context *pctx, size_t size); +struct pipe_resource *etna_ml_create_resource(struct pipe_screen *pscreen, size_t size); -struct etna_core_npu_info *etna_ml_get_core_info(struct etna_context *context); +struct etna_core_npu_info *etna_ml_get_core_info(struct etna_screen *screen); bool -etna_ml_operation_supported(struct pipe_context *pcontext, +etna_ml_operation_supported(struct pipe_ml_device *pdevice, const struct pipe_ml_operation *operation); struct pipe_ml_subgraph * -etna_ml_subgraph_create(struct pipe_context *context, +etna_ml_subgraph_create(struct pipe_ml_device *pdevice, const struct pipe_ml_operation *operations, unsigned count); @@ -167,6 +178,6 @@ etna_ml_subgraph_read_outputs(struct pipe_context *context, struct pipe_ml_subgr bool is_signed[]); void -etna_ml_subgraph_destroy(struct pipe_context *context, struct pipe_ml_subgraph *subgraph); +etna_ml_subgraph_destroy(struct pipe_ml_device *pdevice, struct pipe_ml_subgraph *subgraph); #endif diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c index eb7a83df402..ab60c19a10f 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c @@ -376,8 +376,7 @@ reorder_for_hw_depthwise(struct etna_ml_subgraph *subgraph, struct etna_operatio static void transpose(struct etna_ml_subgraph *subgraph, struct etna_operation *operation) { - struct pipe_context *context = subgraph->base.context; - unsigned nn_core_version = etna_context(context)->screen->specs.nn_core_version; + unsigned nn_core_version = subgraph->screen->specs.nn_core_version; void *map = operation->weight_tensor; unsigned new_size; uint8_t *output; @@ -503,8 +502,7 @@ static bool calc_pooling_first_pixel(struct etna_ml_subgraph *subgraph, const struct pipe_ml_operation *poperation) { - struct pipe_context *context = subgraph->base.context; - unsigned nn_core_version = etna_context(context)->screen->specs.nn_core_version; + unsigned nn_core_version = subgraph->screen->specs.nn_core_version; unsigned input_width = poperation->input_tensors[0]->dims[1]; unsigned input_channels = poperation->input_tensors[0]->dims[3]; @@ -554,9 +552,7 @@ etna_ml_lower_convolution(struct etna_ml_subgraph *subgraph, const struct pipe_ml_operation *poperation, struct etna_operation *operation) { - struct pipe_context *context = subgraph->base.context; - struct etna_context *ctx = etna_context(context); - unsigned nn_core_version = ctx->screen->specs.nn_core_version; + unsigned nn_core_version = subgraph->screen->specs.nn_core_version; /* TODO: Support stride_x != stride_y */ assert(poperation->conv.stride_x == poperation->conv.stride_y); @@ -886,9 +882,7 @@ etna_ml_lower_add(struct etna_ml_subgraph *subgraph, const struct pipe_ml_operation *poperation, struct etna_operation *operation) { - struct pipe_context *context = subgraph->base.context; - struct etna_context *ctx = etna_context(context); - unsigned nn_core_version = ctx->screen->specs.nn_core_version; + unsigned nn_core_version = subgraph->screen->specs.nn_core_version; if (nn_core_version < 8) etna_ml_lower_add_v7(subgraph, poperation, operation); @@ -988,24 +982,23 @@ etna_ml_calc_addition_sizes(unsigned *input_width, unsigned *input_height, unsig } static unsigned -etna_ml_calculate_tiling(struct etna_context *ctx, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out) +etna_ml_calculate_tiling(struct etna_screen *screen, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out) { - unsigned nn_core_version = ctx->screen->specs.nn_core_version; + unsigned nn_core_version = screen->specs.nn_core_version; if (nn_core_version == 7) - return etna_ml_calculate_tiling_v7(ctx, operation, tile_width_out, tile_height_out); + return etna_ml_calculate_tiling_v7(screen, operation, tile_width_out, tile_height_out); else - return etna_ml_calculate_tiling_v8(ctx, operation, tile_width_out, tile_height_out); + return etna_ml_calculate_tiling_v8(screen, operation, tile_width_out, tile_height_out); } static struct etna_bo * create_nn_config(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, struct etna_bo *coefficients, unsigned coef_cache_size) { - struct pipe_context *context = subgraph->base.context; - struct etna_context *ctx = etna_context(context); - unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count; - unsigned nn_core_version = ctx->screen->specs.nn_core_version; - unsigned oc_sram_size = etna_ml_get_core_info(ctx)->on_chip_sram_size; - struct etna_bo *bo = etna_ml_create_bo(context, sizeof(struct etna_nn_params)); + struct etna_screen *screen = subgraph->screen; + unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count; + unsigned nn_core_version = screen->specs.nn_core_version; + unsigned oc_sram_size = etna_ml_get_core_info(screen)->on_chip_sram_size; + struct etna_bo *bo = etna_ml_create_bo(screen, sizeof(struct etna_nn_params)); unsigned input_width = operation->input_width; unsigned input_height = operation->input_height; unsigned input_channels = operation->input_channels; @@ -1162,7 +1155,7 @@ create_nn_config(struct etna_ml_subgraph *subgraph, const struct etna_operation } unsigned tile_x, tile_y; - unsigned superblocks = etna_ml_calculate_tiling(ctx, operation, &tile_x, &tile_y); + unsigned superblocks = etna_ml_calculate_tiling(screen, operation, &tile_x, &tile_y); map->out_image_tile_x_size = tile_x; map->out_image_tile_y_size = tile_y; @@ -1294,9 +1287,7 @@ void etna_ml_compile_operation_nn(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, struct etna_vip_instruction *instruction) { - struct pipe_context *pctx = subgraph->base.context; - struct etna_context *ctx = etna_context(pctx); - unsigned nn_core_version = ctx->screen->specs.nn_core_version; + unsigned nn_core_version = subgraph->screen->specs.nn_core_version; unsigned coef_cache_size; instruction->type = ETNA_JOB_TYPE_NN; @@ -1320,11 +1311,11 @@ etna_ml_compile_operation_nn(struct etna_ml_subgraph *subgraph, const struct etn } void -etna_ml_emit_operation_nn(struct etna_ml_subgraph *subgraph, +etna_ml_emit_operation_nn(struct pipe_context *pctx, + struct etna_ml_subgraph *subgraph, struct etna_vip_instruction *operation, unsigned idx) { - struct pipe_context *pctx = subgraph->base.context; struct etna_context *ctx = etna_context(pctx); struct etna_cmd_stream *stream = ctx->stream; unsigned offset = idx + 1; diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.h b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.h index 329acbe6431..42cdbb2009e 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.h +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.h @@ -11,13 +11,13 @@ etna_ml_calc_addition_sizes(unsigned *input_width, unsigned *input_height, unsig unsigned *output_width, unsigned *output_height, unsigned *output_channels); unsigned -etna_ml_calculate_tiling_v7(struct etna_context *ctx, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out); +etna_ml_calculate_tiling_v7(struct etna_screen *screen, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out); struct etna_bo * etna_ml_create_coeffs_v7(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, unsigned *cache_size); unsigned -etna_ml_calculate_tiling_v8(struct etna_context *ctx, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out); +etna_ml_calculate_tiling_v8(struct etna_screen *screen, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out); struct etna_bo * etna_ml_create_coeffs_v8(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, unsigned *cache_size); @@ -43,6 +43,7 @@ etna_ml_compile_operation_nn(struct etna_ml_subgraph *subgraph, struct etna_vip_instruction *instruction); void -etna_ml_emit_operation_nn(struct etna_ml_subgraph *subgraph, +etna_ml_emit_operation_nn(struct pipe_context *pctx, + struct etna_ml_subgraph *subgraph, struct etna_vip_instruction *operation, unsigned idx); diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v7.c b/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v7.c index 47184799190..47ecfac6178 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v7.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v7.c @@ -17,10 +17,10 @@ map_resource(struct pipe_resource *resource) #define MAX_TILE_WIDTH 64 static unsigned -calc_superblocks(struct etna_context *ctx, const struct etna_operation *operation, unsigned tile_y, unsigned interleave_mode) +calc_superblocks(struct etna_screen *screen, const struct etna_operation *operation, unsigned tile_y, unsigned interleave_mode) { - unsigned nn_core_count = ctx->screen->info->npu.nn_core_count; - unsigned nn_accum_buffer_depth = ctx->screen->info->npu.nn_accum_buffer_depth; + unsigned nn_core_count = screen->info->npu.nn_core_count; + unsigned nn_accum_buffer_depth = screen->info->npu.nn_accum_buffer_depth; unsigned output_channels = operation->addition ? 1 : operation->output_channels; unsigned kernels_per_core = DIV_ROUND_UP(output_channels, nn_core_count); unsigned foo = (nn_accum_buffer_depth * interleave_mode) / tile_y; @@ -60,10 +60,10 @@ calc_interleave_mode(unsigned tile_width, unsigned weight_height) } unsigned -etna_ml_calculate_tiling_v7(struct etna_context *ctx, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out) +etna_ml_calculate_tiling_v7(struct etna_screen *screen, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out) { - unsigned nn_input_buffer_depth = ctx->screen->info->npu.nn_input_buffer_depth; - unsigned nn_accum_buffer_depth = ctx->screen->info->npu.nn_accum_buffer_depth; + unsigned nn_input_buffer_depth = screen->info->npu.nn_input_buffer_depth; + unsigned nn_accum_buffer_depth = screen->info->npu.nn_accum_buffer_depth; unsigned input_width = operation->input_width; unsigned input_height = operation->input_height; unsigned input_channels = operation->input_channels; @@ -95,7 +95,7 @@ etna_ml_calculate_tiling_v7(struct etna_context *ctx, const struct etna_operatio tile_height -= 1; tile_height = MAX2(tile_height, 1); - superblocks = calc_superblocks(ctx, operation, tile_height, interleave_mode); + superblocks = calc_superblocks(screen, operation, tile_height, interleave_mode); if (tile_width_out) *tile_width_out = tile_width; @@ -185,8 +185,8 @@ wb_stream_write(struct wb_stream *wb_stream, unsigned value) static unsigned write_core_6(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigned core, const struct etna_operation *operation, unsigned zrl_bits) { - struct pipe_context *pctx = subgraph->base.context; - unsigned nn_core_count = etna_ml_get_core_info(etna_context(pctx))->nn_core_count; + struct etna_screen *screen = subgraph->screen; + unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count; unsigned input_channels = operation->addition ? 1 : operation->input_channels; unsigned output_channels = operation->addition ? 1 : operation->output_channels; unsigned cores_used = MIN2(output_channels, nn_core_count); @@ -195,7 +195,7 @@ write_core_6(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigned core, co uint32_t *biases = (uint32_t *)operation->bias_tensor; unsigned out_values_per_channel = operation->output_width * operation->output_height; unsigned stride = MIN2(input_channels, 6); - unsigned superblocks = etna_ml_calculate_tiling_v7(etna_context(pctx), operation, NULL, NULL); + unsigned superblocks = etna_ml_calculate_tiling_v7(screen, operation, NULL, NULL); uint8_t *weights_maps[DIV_ROUND_UP(kernels_per_core, superblocks)]; uint32_t *initial_ptr = map; bool do_write = initial_ptr != NULL; @@ -265,8 +265,8 @@ write_core_6(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigned core, co static unsigned write_core_interleaved(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigned core, const struct etna_operation *operation, unsigned zrl_bits) { - struct pipe_context *pctx = subgraph->base.context; - unsigned nn_core_count = etna_ml_get_core_info(etna_context(pctx))->nn_core_count; + struct etna_screen *screen = subgraph->screen; + unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count; unsigned input_channels = operation->addition ? 1 : operation->input_channels; unsigned output_channels = operation->addition ? 1 : operation->output_channels; unsigned cores_used = MIN2(output_channels, nn_core_count); @@ -274,7 +274,7 @@ write_core_interleaved(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigne uint8_t *input = operation->weight_tensor; uint32_t *biases = (uint32_t *)operation->bias_tensor; unsigned out_values_per_channel = operation->output_width * operation->output_height; - unsigned superblocks = etna_ml_calculate_tiling_v7(etna_context(pctx), operation, NULL, NULL); + unsigned superblocks = etna_ml_calculate_tiling_v7(screen, operation, NULL, NULL); uint8_t (*weights_map)[input_channels][operation->weight_width][operation->weight_height] = (void *)input; uint32_t *initial_ptr = map; bool do_write = initial_ptr != NULL; @@ -352,15 +352,15 @@ write_core_interleaved(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigne static unsigned write_core_sequential(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigned core, const struct etna_operation *operation, unsigned zrl_bits) { - struct pipe_context *pctx = subgraph->base.context; - unsigned nn_core_count = etna_ml_get_core_info(etna_context(pctx))->nn_core_count; + struct etna_screen *screen = subgraph->screen; + unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count; unsigned output_channels = operation->addition ? 1 : operation->output_channels; unsigned cores_used = MIN2(output_channels, nn_core_count); unsigned kernels_per_core = DIV_ROUND_UP(output_channels, cores_used); uint8_t *input = operation->weight_tensor; uint32_t *biases = (uint32_t *)operation->bias_tensor; unsigned out_values_per_channel = operation->output_width * operation->output_height; - unsigned superblocks = etna_ml_calculate_tiling_v7(etna_context(pctx), operation, NULL, NULL); + unsigned superblocks = etna_ml_calculate_tiling_v7(screen, operation, NULL, NULL); uint32_t *initial_ptr = map; bool do_write = initial_ptr != NULL; uint64_t buffer = 0; @@ -438,9 +438,8 @@ write_core_sequential(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigned static unsigned calculate_weight_bo_size(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation) { - struct pipe_context *context = subgraph->base.context; - struct etna_context *ctx = etna_context(context); - unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count; + struct etna_screen *screen = subgraph->screen; + unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count; unsigned header_size = align(nn_core_count * 4, 64); unsigned input_channels = operation->addition ? 1 : operation->input_channels; unsigned output_channels = operation->addition ? 1 : operation->output_channels; @@ -462,10 +461,9 @@ calculate_weight_bo_size(struct etna_ml_subgraph *subgraph, const struct etna_op static unsigned calculate_zrl_bits(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation) { - struct pipe_context *context = subgraph->base.context; - struct etna_context *ctx = etna_context(context); - unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count; - unsigned max_zrl_bits = etna_ml_get_core_info(ctx)->nn_zrl_bits; + struct etna_screen *screen = subgraph->screen; + unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count; + unsigned max_zrl_bits = etna_ml_get_core_info(screen)->nn_zrl_bits; unsigned header_size = align(nn_core_count * 4, 64); unsigned input_channels = operation->addition ? 1 : operation->input_channels; unsigned output_channels = operation->addition ? 1 : operation->output_channels; @@ -515,9 +513,8 @@ calculate_zrl_bits(struct etna_ml_subgraph *subgraph, const struct etna_operatio struct etna_bo * etna_ml_create_coeffs_v7(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, unsigned *cache_size) { - struct pipe_context *context = subgraph->base.context; - struct etna_context *ctx = etna_context(context); - unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count; + struct etna_screen *screen = subgraph->screen; + unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count; unsigned header_size = align(nn_core_count * 4, 64); unsigned input_channels = operation->addition ? 1 : operation->input_channels; unsigned output_channels = operation->addition ? 1 : operation->output_channels; @@ -529,7 +526,7 @@ etna_ml_create_coeffs_v7(struct etna_ml_subgraph *subgraph, const struct etna_op bo_size = calculate_weight_bo_size(subgraph, operation); zrl_bits = calculate_zrl_bits(subgraph, operation); - struct etna_bo *compressed = etna_ml_create_bo(context, bo_size); + struct etna_bo *compressed = etna_ml_create_bo(screen, bo_size); etna_bo_cpu_prep(compressed, DRM_ETNA_PREP_WRITE); diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v8.c b/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v8.c index 0c0874cf7e0..8321b867bf2 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v8.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v8.c @@ -39,10 +39,10 @@ struct etna_nn_header_v8 { #define MAX_TILE_WIDTH 64 static unsigned -calc_superblocks(struct etna_context *ctx, const struct etna_operation *operation, unsigned tile_x, unsigned tile_y, unsigned interleave_mode) +calc_superblocks(struct etna_screen *screen, const struct etna_operation *operation, unsigned tile_x, unsigned tile_y, unsigned interleave_mode) { - unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count; - struct etna_core_info *info = etna_gpu_get_core_info(ctx->screen->npu); + unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count; + struct etna_core_info *info = etna_gpu_get_core_info(screen->npu); unsigned nn_accum_buffer_depth = info->npu.nn_accum_buffer_depth; unsigned output_channels = operation->output_channels; unsigned kernels_per_core = DIV_ROUND_UP(output_channels, nn_core_count); @@ -67,7 +67,7 @@ calc_superblocks(struct etna_context *ctx, const struct etna_operation *operatio } static unsigned -calc_interleave_mode(struct etna_context *ctx, unsigned tile_width, unsigned weight_height) +calc_interleave_mode(unsigned tile_width, unsigned weight_height) { unsigned mode; @@ -103,10 +103,10 @@ calc_interleave_mode(struct etna_context *ctx, unsigned tile_width, unsigned wei } unsigned -etna_ml_calculate_tiling_v8(struct etna_context *ctx, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out) +etna_ml_calculate_tiling_v8(struct etna_screen *screen, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out) { - unsigned nn_input_buffer_depth = etna_ml_get_core_info(ctx)->nn_input_buffer_depth; - unsigned nn_accum_buffer_depth = etna_ml_get_core_info(ctx)->nn_accum_buffer_depth; + unsigned nn_input_buffer_depth = etna_ml_get_core_info(screen)->nn_input_buffer_depth; + unsigned nn_accum_buffer_depth = etna_ml_get_core_info(screen)->nn_accum_buffer_depth; unsigned input_width = operation->input_width; unsigned input_height = operation->input_height; unsigned input_channels = operation->input_channels; @@ -128,7 +128,7 @@ etna_ml_calculate_tiling_v8(struct etna_context *ctx, const struct etna_operatio } tile_width = MIN2(output_width, 64); - interleave_mode = calc_interleave_mode(ctx, tile_width, operation->weight_height); + interleave_mode = calc_interleave_mode(tile_width, operation->weight_height); tile_height = nn_input_buffer_depth * interleave_mode - operation->weight_height + 1; tile_height = MIN2(tile_height, interleave_mode * nn_accum_buffer_depth); @@ -143,7 +143,7 @@ etna_ml_calculate_tiling_v8(struct etna_context *ctx, const struct etna_operatio tile_height = MAX2(tile_height, 1); - superblocks = calc_superblocks(ctx, operation, tile_width, tile_height, interleave_mode); + superblocks = calc_superblocks(screen, operation, tile_width, tile_height, interleave_mode); if (tile_width_out) *tile_width_out = tile_width; @@ -437,9 +437,7 @@ static void encode_byte(struct encoder *encoder, uint8_t byte) static void encode_value(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, struct encoder *encoder, uint8_t value) { - struct pipe_context *context = subgraph->base.context; - struct etna_context *ctx = etna_context(context); - unsigned customer_id = ctx->screen->info->customer_id; + unsigned customer_id = subgraph->screen->info->customer_id; uint8_t zero_point = operation->weight_zero_point; value -= encoder->avg_bias; @@ -600,9 +598,8 @@ static void encoder_reset(struct etna_ml_subgraph *subgraph, const struct etna_o static void encode_superblock(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, struct encoder *encoder, unsigned kernels_in_superblock, unsigned first_channel) { - struct pipe_context *pctx = subgraph->base.context; - struct etna_context *ctx = etna_context(pctx); - unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count; + struct etna_screen *screen = subgraph->screen; + unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count; unsigned input_channels = operation->input_channels; unsigned output_channels = operation->output_channels; unsigned kernel_size; @@ -671,9 +668,8 @@ static uint32_t pack_symbol_map(uint8_t map[8]) static struct etna_bo * create_bo(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation) { - struct pipe_context *context = subgraph->base.context; - struct etna_context *ctx = etna_context(context); - unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count; + struct etna_screen *screen = subgraph->screen; + unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count; unsigned input_channels = operation->input_channels; unsigned output_channels = operation->output_channels; unsigned cores_used = MIN2(output_channels, nn_core_count); @@ -689,7 +685,7 @@ create_bo(struct etna_ml_subgraph *subgraph, const struct etna_operation *operat unsigned tail_size = 64; max_size = header_size + cores_used * body_size + tail_size; - return etna_ml_create_bo(context, max_size); + return etna_ml_create_bo(screen, max_size); } static void @@ -718,12 +714,11 @@ calculate_symbol_map(struct etna_ml_subgraph *subgraph, const struct etna_operat static void fill_weights(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, struct encoder *encoder, struct etna_nn_header_v8 *header) { - struct pipe_context *context = subgraph->base.context; - struct etna_context *ctx = etna_context(context); + struct etna_screen *screen = subgraph->screen; unsigned output_channels = operation->output_channels; - unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count; + unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count; unsigned cores_used = MIN2(output_channels, nn_core_count); - unsigned superblocks = etna_ml_calculate_tiling_v8(ctx, operation, NULL, NULL); + unsigned superblocks = etna_ml_calculate_tiling_v8(screen, operation, NULL, NULL); unsigned full_superblock = DIV_ROUND_UP(output_channels, nn_core_count * superblocks); unsigned channel_per_superblock[superblocks]; diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c b/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c index c9e78e91f9c..bd0ff8968bf 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c @@ -247,7 +247,7 @@ set_default_tp_config(struct etna_tp_params *map) static struct etna_bo * create_transpose_config(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation) { - struct etna_bo *bo = etna_ml_create_bo(subgraph->base.context, sizeof(struct etna_tp_params)); + struct etna_bo *bo = etna_ml_create_bo(subgraph->screen, sizeof(struct etna_tp_params)); etna_bo_cpu_prep(bo, DRM_ETNA_PREP_WRITE); @@ -297,7 +297,7 @@ create_detranspose_config(struct etna_ml_subgraph *subgraph, const struct etna_o unsigned input_width = operation->input_width; unsigned input_height = operation->input_height; unsigned input_channels = operation->input_channels; - struct etna_bo *bo = etna_ml_create_bo(subgraph->base.context, sizeof(struct etna_tp_params)); + struct etna_bo *bo = etna_ml_create_bo(subgraph->screen, sizeof(struct etna_tp_params)); etna_bo_cpu_prep(bo, DRM_ETNA_PREP_WRITE); @@ -418,7 +418,7 @@ static struct etna_bo * create_reshuffle_config(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, unsigned tp_core, unsigned tp_cores_used) { - struct etna_bo *bo = etna_ml_create_bo(subgraph->base.context, sizeof(struct etna_tp_params)); + struct etna_bo *bo = etna_ml_create_bo(subgraph->screen, sizeof(struct etna_tp_params)); unsigned input_width = operation->input_width; unsigned input_height = operation->input_height; unsigned output_width = operation->output_width; @@ -572,8 +572,7 @@ static struct etna_bo * create_pad_config(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, unsigned tp_core, unsigned tp_cores_used) { - struct pipe_context *pctx = subgraph->base.context; - struct etna_bo *bo = etna_ml_create_bo(pctx, sizeof(struct etna_tp_params)); + struct etna_bo *bo = etna_ml_create_bo(subgraph->screen, sizeof(struct etna_tp_params)); unsigned input_width = operation->input_width; unsigned input_height = operation->input_height; unsigned input_channels = operation->input_channels; @@ -747,8 +746,7 @@ static struct etna_bo * create_pwl_lut_config(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, unsigned tp_core, unsigned tp_cores_used, struct etna_bo *pwl_lut) { - struct pipe_context *pctx = subgraph->base.context; - struct etna_bo *bo = etna_ml_create_bo(pctx, sizeof(struct etna_tp_params)); + struct etna_bo *bo = etna_ml_create_bo(subgraph->screen, sizeof(struct etna_tp_params)); unsigned input_width = operation->input_width; unsigned input_height = operation->input_height; unsigned input_channels = operation->input_channels; @@ -1124,10 +1122,8 @@ static struct etna_bo * create_relu_lut_bo(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation) { - struct pipe_context *context = subgraph->base.context; - struct etna_context *ctx = etna_context(context); const unsigned lut_length = 1024; - struct etna_bo *pwl_lut = etna_bo_new(ctx->screen->dev, + struct etna_bo *pwl_lut = etna_bo_new(subgraph->screen->dev, lut_length * sizeof(uint32_t), DRM_ETNA_GEM_CACHE_WC); @@ -1155,10 +1151,8 @@ static struct etna_bo * create_abs_lut_bo(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation) { - struct pipe_context *context = subgraph->base.context; - struct etna_context *ctx = etna_context(context); unsigned lut_length = 1024; - struct etna_bo *pwl_lut = etna_bo_new(ctx->screen->dev, + struct etna_bo *pwl_lut = etna_bo_new(subgraph->screen->dev, lut_length * sizeof(uint32_t), DRM_ETNA_GEM_CACHE_WC); @@ -1274,11 +1268,9 @@ static struct etna_bo * create_log_lut_bo(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation) { - struct pipe_context *context = subgraph->base.context; - struct etna_context *ctx = etna_context(context); unsigned lut_table_len = 1024; - struct etna_bo *pwl_lut = etna_bo_new(ctx->screen->dev, + struct etna_bo *pwl_lut = etna_bo_new(subgraph->screen->dev, lut_table_len * sizeof(uint32_t), DRM_ETNA_GEM_CACHE_WC); @@ -1331,7 +1323,6 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, struct etna_vip_instruction *instruction) { - struct etna_context *ctx = etna_context(subgraph->base.context); struct pipe_resource *input = etna_ml_get_resource(subgraph, operation->input_tensors[0]); assert(input); pipe_resource_reference(&instruction->input, input); @@ -1351,7 +1342,7 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph, instruction->configs[0] = create_detranspose_config(subgraph, operation); break; case ETNA_ML_TP_RESHUFFLE: { - unsigned tp_core_count = etna_ml_get_core_info(ctx)->tp_core_count; + unsigned tp_core_count = etna_ml_get_core_info(subgraph->screen)->tp_core_count; unsigned tp_cores_used; tp_cores_used = (operation->input_width > 8 || operation->input_channels > 1) ? tp_core_count : 1; @@ -1368,7 +1359,7 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph, break; } case ETNA_ML_TP_PAD: { - unsigned tp_cores_used = etna_ml_get_core_info(ctx)->tp_core_count; + unsigned tp_cores_used = etna_ml_get_core_info(subgraph->screen)->tp_core_count; if (operation->input_width == 1) tp_cores_used = 1; @@ -1380,7 +1371,7 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph, break; } case ETNA_ML_TP_RELU: { - unsigned tp_cores_used = etna_ml_get_core_info(ctx)->tp_core_count; + unsigned tp_cores_used = etna_ml_get_core_info(subgraph->screen)->tp_core_count; if (operation->input_width < 6) tp_cores_used = 1; @@ -1393,7 +1384,7 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph, break; } case ETNA_ML_TP_ABSOLUTE: { - unsigned tp_cores_used = etna_ml_get_core_info(ctx)->tp_core_count; + unsigned tp_cores_used = etna_ml_get_core_info(subgraph->screen)->tp_core_count; ML_DBG("absolute: input_width %d tp_cores_used %d\n", operation->input_width, tp_cores_used); instruction->pwl_lut = create_abs_lut_bo(subgraph, operation); @@ -1403,7 +1394,7 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph, break; } case ETNA_ML_TP_LOGISTIC: { - unsigned tp_cores_used = etna_ml_get_core_info(ctx)->tp_core_count; + unsigned tp_cores_used = etna_ml_get_core_info(subgraph->screen)->tp_core_count; if (operation->input_width < 6) tp_cores_used = 1; @@ -1421,12 +1412,13 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph, } void -etna_ml_emit_operation_tp(struct etna_ml_subgraph *subgraph, +etna_ml_emit_operation_tp(struct pipe_context *pctx, + struct etna_ml_subgraph *subgraph, struct etna_vip_instruction *operation, unsigned idx) { - struct etna_context *ctx = etna_context(subgraph->base.context); - unsigned tp_core_count = etna_ml_get_core_info(ctx)->tp_core_count; + struct etna_context *ctx = etna_context(pctx); + unsigned tp_core_count = etna_ml_get_core_info(subgraph->screen)->tp_core_count; struct etna_cmd_stream *stream = ctx->stream; bool more_than_one_tp_job = operation->configs[1] != NULL; bool parallel = DBG_ENABLED(ETNA_DBG_NPU_PARALLEL); diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_tp.h b/src/gallium/drivers/etnaviv/etnaviv_ml_tp.h index 32667c2caf1..30756e7cfc5 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_tp.h +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_tp.h @@ -47,6 +47,7 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph, struct etna_vip_instruction *instruction); void -etna_ml_emit_operation_tp(struct etna_ml_subgraph *subgraph, +etna_ml_emit_operation_tp(struct pipe_context *pctx, + struct etna_ml_subgraph *subgraph, struct etna_vip_instruction *operation, unsigned idx); \ No newline at end of file diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c index f091ff2d59c..4ac60ccd454 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c @@ -34,6 +34,7 @@ #include "etnaviv_debug.h" #include "etnaviv_fence.h" #include "etnaviv_format.h" +#include "etnaviv_ml.h" #include "etnaviv_query.h" #include "etnaviv_resource.h" #include "etnaviv_translate.h" @@ -1014,6 +1015,14 @@ etna_screen_get_fd(struct pipe_screen *pscreen) return etna_device_fd(screen->dev); } +static struct pipe_ml_device * +etna_get_ml_device(struct pipe_screen *pscreen) +{ + struct etna_screen *screen = etna_screen(pscreen); + + return &screen->ml_device.base; +} + struct pipe_screen * etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu, struct etna_gpu *npu, struct renderonly *ro) @@ -1089,6 +1098,13 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu, pscreen->is_dmabuf_modifier_supported = etna_screen_is_dmabuf_modifier_supported; pscreen->get_dmabuf_modifier_planes = etna_screen_get_dmabuf_modifier_planes; + if (npu) { + screen->ml_device.base.ml_operation_supported = etna_ml_operation_supported; + screen->ml_device.base.ml_subgraph_create = etna_ml_subgraph_create; + screen->ml_device.base.ml_subgraph_destroy = etna_ml_subgraph_destroy; + pscreen->get_ml_device = etna_get_ml_device; + } + if (!etna_shader_screen_init(pscreen)) goto fail; diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.h b/src/gallium/drivers/etnaviv/etnaviv_screen.h index 18c6233ab9a..d00dd2385a8 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_screen.h +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.h @@ -41,6 +41,8 @@ #include "util/u_helpers.h" #include "util/u_queue.h" #include "compiler/nir/nir.h" +#include "etnaviv_ml.h" +#include "pipe/p_state.h" struct etna_bo; @@ -54,6 +56,7 @@ struct etna_screen { struct etna_pipe *pipe_nn; struct etna_perfmon *perfmon; struct renderonly *ro; + struct etna_ml_device ml_device; struct util_dynarray supported_pm_queries; struct slab_parent_pool transfer_pool; @@ -89,6 +92,13 @@ etna_screen(struct pipe_screen *pscreen) return (struct etna_screen *)pscreen; } +static inline struct etna_screen * +etna_ml_device_screen(struct pipe_ml_device *pdevice) +{ + struct etna_ml_device *dev = etna_ml_device(pdevice); + return container_of(dev, struct etna_screen, ml_device); +} + struct etna_bo * etna_screen_bo_from_handle(struct pipe_screen *pscreen, struct winsys_handle *whandle); diff --git a/src/gallium/drivers/rocket/rkt_coefs.c b/src/gallium/drivers/rocket/rkt_coefs.c index f14e5d4216d..1e9a10623bf 100644 --- a/src/gallium/drivers/rocket/rkt_coefs.c +++ b/src/gallium/drivers/rocket/rkt_coefs.c @@ -12,7 +12,7 @@ struct pipe_resource * rkt_fill_weights(struct rkt_ml_subgraph *subgraph, const struct pipe_ml_operation *poperation) { - struct pipe_context *pcontext = subgraph->base.context; + struct pipe_context *pcontext = subgraph->context; unsigned weights_width = poperation->conv.weight_tensor->dims[1]; unsigned weights_height = poperation->conv.weight_tensor->dims[2]; unsigned input_channels = poperation->input_tensors[0]->dims[3]; @@ -130,7 +130,7 @@ rkt_fill_biases(struct rkt_ml_subgraph *subgraph, const struct pipe_ml_operation *poperation, unsigned *truncate_bits) { - struct pipe_context *pcontext = subgraph->base.context; + struct pipe_context *pcontext = subgraph->context; unsigned output_channels = poperation->output_tensors[0]->dims[3]; unsigned weights_size = poperation->conv.weight_tensor->dims[1]; struct pipe_transfer *transfer_out; diff --git a/src/gallium/drivers/rocket/rkt_device.c b/src/gallium/drivers/rocket/rkt_device.c index 9c2da6a2cd2..8da0157ada8 100644 --- a/src/gallium/drivers/rocket/rkt_device.c +++ b/src/gallium/drivers/rocket/rkt_device.c @@ -124,11 +124,8 @@ rkt_create_context(struct pipe_screen *screen, pctx->buffer_subdata = u_default_buffer_subdata; pctx->clear_buffer = u_default_clear_buffer; - pctx->ml_operation_supported = rkt_ml_operation_supported; - pctx->ml_subgraph_create = rkt_ml_subgraph_create; pctx->ml_subgraph_invoke = rkt_ml_subgraph_invoke; pctx->ml_subgraph_read_output = rkt_ml_subgraph_read_outputs; - pctx->ml_subgraph_destroy = rkt_ml_subgraph_destroy; return pctx; } @@ -204,6 +201,12 @@ rkt_screen_get_fd(struct pipe_screen *pscreen) return rkt_screen(pscreen)->fd; } +static struct pipe_ml_device * +rkt_get_ml_device(struct pipe_screen *pscreen) +{ + return &rkt_screen(pscreen)->ml_device.base; +} + struct pipe_screen * rkt_screen_create(int fd, const struct pipe_screen_config *config, @@ -228,5 +231,10 @@ rkt_screen_create(int fd, screen->resource_create = rkt_resource_create; screen->resource_destroy = rkt_resource_destroy; + rkt_screen->ml_device.base.ml_operation_supported = rkt_ml_operation_supported; + rkt_screen->ml_device.base.ml_subgraph_create = rkt_ml_subgraph_create; + rkt_screen->ml_device.base.ml_subgraph_destroy = rkt_ml_subgraph_destroy; + screen->get_ml_device = rkt_get_ml_device; + return screen; } \ No newline at end of file diff --git a/src/gallium/drivers/rocket/rkt_device.h b/src/gallium/drivers/rocket/rkt_device.h index 0425a4260d9..428750e9da7 100644 --- a/src/gallium/drivers/rocket/rkt_device.h +++ b/src/gallium/drivers/rocket/rkt_device.h @@ -8,6 +8,7 @@ #include "pipe/p_state.h" #include "renderonly/renderonly.h" #include "util/log.h" +#include "util/macros.h" #ifndef RKT_SCREEN_H #define RKT_SCREEN_H @@ -29,11 +30,23 @@ extern int rocket_debug; ##__VA_ARGS__); \ } while (0) +struct rkt_ml_device { + struct pipe_ml_device base; + struct pipe_context *context; +}; + +static inline struct rkt_ml_device * +rkt_ml_device(struct pipe_ml_device *dev) +{ + return (struct rkt_ml_device *)dev; +} + struct rkt_screen { struct pipe_screen pscreen; int fd; struct renderonly *ro; + struct rkt_ml_device ml_device; }; static inline struct rkt_screen * @@ -42,6 +55,13 @@ rkt_screen(struct pipe_screen *p) return (struct rkt_screen *)p; } +static inline struct rkt_screen * +rkt_ml_device_screen(struct pipe_ml_device *pdevice) +{ + struct rkt_ml_device *dev = rkt_ml_device(pdevice); + return container_of(dev, struct rkt_screen, ml_device); +} + struct rkt_context { struct pipe_context base; }; diff --git a/src/gallium/drivers/rocket/rkt_ml.c b/src/gallium/drivers/rocket/rkt_ml.c index dbee31e669d..ae090abbd91 100644 --- a/src/gallium/drivers/rocket/rkt_ml.c +++ b/src/gallium/drivers/rocket/rkt_ml.c @@ -40,7 +40,7 @@ static void create_tensor(struct rkt_ml_subgraph *subgraph, unsigned idx, unsigned size) { - struct pipe_context *context = subgraph->base.context; + struct pipe_context *context = subgraph->context; struct pipe_resource **tensors = util_dynarray_begin(&subgraph->tensors); assert(idx < util_dynarray_num_elements(&subgraph->tensors, @@ -90,7 +90,7 @@ static void compile_operation(struct rkt_ml_subgraph *subgraph, struct rkt_operation *operation) { - struct pipe_context *pcontext = subgraph->base.context; + struct pipe_context *pcontext = subgraph->context; unsigned regcfg_total_size = 0; struct util_dynarray *regcfgs; struct pipe_transfer *transfer = NULL; @@ -264,7 +264,7 @@ tensor_quantization_supported(struct pipe_tensor *tensor) } bool -rkt_ml_operation_supported(struct pipe_context *pcontext, +rkt_ml_operation_supported(struct pipe_ml_device *pdevice, const struct pipe_ml_operation *operation) { bool supported = false; @@ -299,15 +299,21 @@ rkt_ml_operation_supported(struct pipe_context *pcontext, } struct pipe_ml_subgraph * -rkt_ml_subgraph_create(struct pipe_context *pcontext, +rkt_ml_subgraph_create(struct pipe_ml_device *pdevice, const struct pipe_ml_operation *poperations, unsigned count) { + struct rkt_screen *screen = rkt_ml_device_screen(pdevice); + struct rkt_ml_device *dev = rkt_ml_device(pdevice); struct rkt_ml_subgraph *subgraph; unsigned tensor_count; + if (!dev->context) + dev->context = screen->pscreen.context_create(&screen->pscreen, NULL, 0); + subgraph = calloc(1, sizeof(*subgraph)); - subgraph->base.context = pcontext; + subgraph->base.device = pdevice; + subgraph->context = dev->context; tensor_count = count_tensors(poperations, count); subgraph->tensors = UTIL_DYNARRAY_INIT; @@ -614,7 +620,7 @@ free_operation(struct rkt_operation *operation) } void -rkt_ml_subgraph_destroy(struct pipe_context *context, +rkt_ml_subgraph_destroy(struct pipe_ml_device *pdevice, struct pipe_ml_subgraph *psubgraph) { struct rkt_ml_subgraph *subgraph = (struct rkt_ml_subgraph *)(psubgraph); diff --git a/src/gallium/drivers/rocket/rkt_ml.h b/src/gallium/drivers/rocket/rkt_ml.h index 6bde9b4846e..7775989d3d5 100644 --- a/src/gallium/drivers/rocket/rkt_ml.h +++ b/src/gallium/drivers/rocket/rkt_ml.h @@ -117,15 +117,16 @@ struct rkt_operation { struct rkt_ml_subgraph { struct pipe_ml_subgraph base; + struct pipe_context *context; struct util_dynarray operations; /* rkt_operation */ struct util_dynarray tensors; /* pipe_resource* */ }; bool -rkt_ml_operation_supported(struct pipe_context *pcontext, const struct pipe_ml_operation *operation); +rkt_ml_operation_supported(struct pipe_ml_device *pdevice, const struct pipe_ml_operation *operation); struct pipe_ml_subgraph * -rkt_ml_subgraph_create(struct pipe_context *pcontext, +rkt_ml_subgraph_create(struct pipe_ml_device *pdevice, const struct pipe_ml_operation *poperations, unsigned count); @@ -140,7 +141,7 @@ void rkt_ml_subgraph_read_outputs(struct pipe_context *pcontext, unsigned output_idxs[], void *outputs[], bool is_signed[]); -void rkt_ml_subgraph_destroy(struct pipe_context *context, +void rkt_ml_subgraph_destroy(struct pipe_ml_device *pdevice, struct pipe_ml_subgraph *psubgraph); struct rkt_resource *rkt_get_tensor(struct rkt_ml_subgraph *subgraph, diff --git a/src/gallium/frontends/teflon/tfl_device.c b/src/gallium/frontends/teflon/tfl_device.c index c92acca0d36..07bafa71df7 100644 --- a/src/gallium/frontends/teflon/tfl_device.c +++ b/src/gallium/frontends/teflon/tfl_device.c @@ -43,7 +43,8 @@ teflon_debug(const char *format, ...) struct teflon_delegate { TfLiteDelegate base; struct pipe_loader_device *dev; - struct pipe_context *context; + struct pipe_screen *screen; + struct pipe_ml_device *ml_dev; struct pipe_tensor *tensors; unsigned tensor_count; }; @@ -526,7 +527,6 @@ partition_init(TfLiteContext *tf_context, const char *buffer, size_t length) { const TfLiteDelegateParams *params = (const TfLiteDelegateParams *)buffer; struct teflon_delegate *delegate = (struct teflon_delegate *)params->delegate; - struct pipe_context *context = delegate->context; struct pipe_ml_operation operations[params->nodes_to_replace->size]; long start = 0, end = 0; @@ -553,9 +553,9 @@ partition_init(TfLiteContext *tf_context, const char *buffer, size_t length) dump_graph(delegate->tensors, tf_context->tensors_size, operations, params->nodes_to_replace->size); struct pipe_ml_subgraph *subgraph; - subgraph = context->ml_subgraph_create(context, - operations, - params->nodes_to_replace->size); + subgraph = delegate->ml_dev->ml_subgraph_create(delegate->ml_dev, + operations, + params->nodes_to_replace->size); struct teflon_subgraph *tsubgraph = calloc(1, sizeof(*tsubgraph)); tsubgraph->base = subgraph; @@ -603,9 +603,8 @@ partition_free(TfLiteContext *tf_context, void *buffer) { struct teflon_subgraph *tsubgraph = (struct teflon_subgraph *)buffer; struct pipe_ml_subgraph *subgraph = tsubgraph->base; - struct pipe_context *context = subgraph->context; - context->ml_subgraph_destroy(context, subgraph); + subgraph->device->ml_subgraph_destroy(subgraph->device, subgraph); free(tsubgraph->input_tensors); free(tsubgraph->output_tensors); free(tsubgraph); @@ -617,7 +616,7 @@ partition_invoke(TfLiteContext *tf_context, TfLiteNode *node) struct teflon_delegate *delegate = (struct teflon_delegate *)node->delegate; struct teflon_subgraph *tsubgraph = (struct teflon_subgraph *)node->user_data; struct pipe_ml_subgraph *subgraph = tsubgraph->base; - struct pipe_context *context = delegate->context; + struct pipe_context *context = delegate->screen->context_create(delegate->screen, NULL, PIPE_CONTEXT_COMPUTE_ONLY); long start = 0, end = 0; if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) { @@ -663,6 +662,9 @@ partition_invoke(TfLiteContext *tf_context, TfLiteNode *node) teflon_debug("teflon: invoked graph, took %ld ms\n", (end - start)); } + context->destroy(context); + context = NULL; + return kTfLiteOk; } @@ -794,14 +796,13 @@ static bool check_op_support(TfLiteDelegate *tf_delegate, TfLiteContext *tf_context, TfLiteNode *node, TfLiteRegistration *registration) { struct teflon_delegate *delegate = (struct teflon_delegate *)tf_delegate; - struct pipe_context *context = delegate->context; struct pipe_ml_operation operation = {0}; bool supported = false; if (!fill_operation(delegate, tf_context, node, registration, &operation)) return false; - supported = context->ml_operation_supported(context, &operation); + supported = delegate->ml_dev->ml_operation_supported(delegate->ml_dev, &operation); free_operation(&operation); @@ -976,7 +977,6 @@ tflite_plugin_create_delegate(char **options_keys, void (*report_error)(const char *)) { struct teflon_delegate *delegate = (struct teflon_delegate *)calloc(1, sizeof(*delegate)); - struct pipe_screen *screen; struct pipe_loader_device **devs; delegate->base.flags = kTfLiteDelegateFlagsAllowDynamicTensors | kTfLiteDelegateFlagsRequirePropagatedShapes; @@ -999,8 +999,8 @@ tflite_plugin_create_delegate(char **options_keys, teflon_debug("Teflon delegate: loaded %s driver\n", delegate->dev->driver_name); - screen = pipe_loader_create_screen(delegate->dev, false); - delegate->context = screen->context_create(screen, NULL, PIPE_CONTEXT_COMPUTE_ONLY); + delegate->screen = pipe_loader_create_screen(delegate->dev, false); + delegate->ml_dev = delegate->screen->get_ml_device(delegate->screen); return &delegate->base; } @@ -1009,7 +1009,6 @@ __attribute__((visibility("default"))) void tflite_plugin_destroy_delegate(TfLiteDelegate *tf_delegate) { struct teflon_delegate *delegate = (struct teflon_delegate *)tf_delegate; - struct pipe_screen *screen; if (tf_delegate == NULL) { fprintf(stderr, "tflite_plugin_destroy_delegate: NULL delegate!\n"); @@ -1028,9 +1027,7 @@ tflite_plugin_destroy_delegate(TfLiteDelegate *tf_delegate) } free(delegate->tensors); - screen = delegate->context->screen; - delegate->context->destroy(delegate->context); - screen->destroy(screen); + delegate->screen->destroy(delegate->screen); pipe_loader_release(&delegate->dev, 1); free(delegate); } diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index e0927b5dddc..f706ce49467 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -88,6 +88,7 @@ struct u_upload_mgr; struct util_debug_callback; struct u_vbuf; struct pipe_context; +struct pipe_ml_subgraph; typedef void (*pipe_draw_func)(struct pipe_context *pipe, const struct pipe_draw_info *info, @@ -1244,29 +1245,6 @@ struct pipe_context { struct winsys_handle *handle, unsigned usage ); - /** - * Checks whether an operation can be accelerated by this context. - * - * \param ctx pipe context - * \param operation pipe_ml_operation to be checked - * \return whether the context can accelerate this operation - */ - bool (*ml_operation_supported)(struct pipe_context *context, const struct pipe_ml_operation *operation); - - /** - * Compiles a ML subgraph, to be executed later. The returned pipe_ml_subgraph - * should contain all information needed to execute the subgraph with as - * little effort as strictly needed. - * - * \param ctx pipe context - * \param operations array containing the definitions of the operations in the graph - * \param count number of operations - * \return a newly allocated pipe_ml_subgraph - */ - struct pipe_ml_subgraph *(*ml_subgraph_create)(struct pipe_context *context, - const struct pipe_ml_operation *operations, - unsigned count); - /** * Invokes a ML subgraph for a given input tensor. * @@ -1298,15 +1276,6 @@ struct pipe_context { struct pipe_ml_subgraph *subgraph, unsigned outputs_count, unsigned output_idxs[], void *outputs[], bool is_signed[]); - - /** - * Release all resources allocated by the implementation of ml_subgraph_create - * - * \param ctx pipe context - * \param subgraph subgraph to release - */ - void (*ml_subgraph_destroy)(struct pipe_context *context, - struct pipe_ml_subgraph *subgraph); }; diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index fd458f51e1b..228c721fb0b 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -122,6 +122,11 @@ struct pipe_screen { const char *(*get_vendor)(struct pipe_screen *); + /** + * Returns the ML device for this screen, or NULL if ML is not supported. + */ + struct pipe_ml_device *(*get_ml_device)(struct pipe_screen *); + /** * Returns the device vendor. * diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 8d73c1a0bc4..0f0aa548340 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -1275,9 +1275,62 @@ struct pipe_ml_operation struct pipe_ml_subgraph { /** - * pipe_context that owns this subgraph. + * pipe_ml_device that owns this subgraph. */ - struct pipe_context *context; + struct pipe_ml_device *device; +}; + +/** + * ML device providing ahead-of-time operations: operation support queries, + * subgraph compilation/serialization, and subgraph destruction. + */ +struct pipe_ml_device { + const char *id; + + /** + * Checks whether an operation can be accelerated by this device. + * + * \param device pipe_ml_device to be used + * \param operation pipe_ml_operation to be checked + * \return whether the device can accelerate this operation + */ + bool (*ml_operation_supported)(struct pipe_ml_device *device, + const struct pipe_ml_operation *operation); + + /** + * Compiles a ML subgraph, to be executed later. The returned pipe_ml_subgraph + * should contain all information needed to execute the subgraph with as + * little effort as strictly needed. + * + * \param device pipe_ml_device to be used + * \param operations array containing the definitions of the operations in the graph + * \param count number of operations + * \return a newly allocated pipe_ml_subgraph + */ + struct pipe_ml_subgraph *(*ml_subgraph_create)(struct pipe_ml_device *device, + const struct pipe_ml_operation *operations, + unsigned count); + + /** + * Serialize a compiled subgraph into a byte buffer. + * + * \param device pipe_ml_device to be used + * \param subgraph previously-compiled subgraph + * \param size output: size of the returned buffer + * \return malloc'd buffer (caller frees), or NULL on failure + */ + uint8_t *(*ml_subgraph_serialize)(struct pipe_ml_device *device, + struct pipe_ml_subgraph *subgraph, + size_t *size); + + /** + * Release all resources allocated by the implementation of ml_subgraph_create + * + * \param device pipe_ml_device to be used + * \param subgraph subgraph to release + */ + void (*ml_subgraph_destroy)(struct pipe_ml_device *device, + struct pipe_ml_subgraph *subgraph); }; struct pipe_compute_state