From 986f8c7ff2112bca17d9d044baab64f999ca276f Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Thu, 24 Oct 2024 07:59:18 +0200 Subject: [PATCH] teflon: Support multiple graph inputs and outputs Operations other than tensor addition will also need to be able to handle multiple inputs, and a variable number of them. And for testing individual operations, we also need to support models with multiple inputs. Reviewed-by: Philipp Zabel Part-of: --- src/gallium/frontends/teflon/tfl_device.c | 88 +++++++++++------------ src/gallium/include/pipe/p_context.h | 8 ++- src/gallium/include/pipe/p_state.h | 12 ++-- 3 files changed, 54 insertions(+), 54 deletions(-) diff --git a/src/gallium/frontends/teflon/tfl_device.c b/src/gallium/frontends/teflon/tfl_device.c index dc620cfcea4..aa08f09b861 100644 --- a/src/gallium/frontends/teflon/tfl_device.c +++ b/src/gallium/frontends/teflon/tfl_device.c @@ -94,16 +94,22 @@ create_resource(struct pipe_context *context, TfLiteTensor tensor) } static void -fill_operation(struct teflon_delegate *delegate, TfLiteContext *tf_context, TfLiteNode *node, TfLiteRegistration *node_registration, struct pipe_ml_operation *operation, struct pipe_tensor *tensors) +fill_operation(struct teflon_delegate *delegate, TfLiteContext *tf_context, TfLiteNode *node, TfLiteRegistration *node_registration, struct pipe_ml_operation *operation, struct pipe_tensor *tensors) { - TfLiteConvParams* params = (TfLiteConvParams*)node->builtin_data; + operation->input_count = node->inputs->size; + operation->input_tensors = calloc(operation->input_count, sizeof(void*)); + for (unsigned i = 0; i < node->inputs->size; i++) + operation->input_tensors[i] = &tensors[node->inputs->data[i]]; - operation->input_tensor = &tensors[node->inputs->data[0]]; - operation->output_tensor = &tensors[node->outputs->data[0]]; + operation->output_count = node->outputs->size; + operation->output_tensors = calloc(operation->output_count, sizeof(void*)); + for (unsigned i = 0; i < node->outputs->size; i++) + operation->output_tensors[i] = &tensors[node->outputs->data[i]]; switch(node_registration->builtin_code) { case kTfLiteBuiltinConv2d: - case kTfLiteBuiltinDepthwiseConv2d: + case kTfLiteBuiltinDepthwiseConv2d: { + TfLiteConvParams* params = (TfLiteConvParams*)node->builtin_data; operation->type = PIPE_ML_OPERATION_TYPE_CONVOLUTION; operation->conv.weight_tensor = &tensors[node->inputs->data[1]]; operation->conv.bias_tensor = &tensors[node->inputs->data[2]]; @@ -114,12 +120,12 @@ fill_operation(struct teflon_delegate *delegate, TfLiteContext *tf_context, TfLi operation->conv.pointwise = operation->conv.weight_tensor->dims[1] == 1 && \ operation->conv.weight_tensor->dims[2] == 1; break; + } case kTfLiteBuiltinAveragePool2d: operation->type = PIPE_ML_OPERATION_TYPE_POOLING; break; case kTfLiteBuiltinAdd: operation->type = PIPE_ML_OPERATION_TYPE_ADD; - operation->add.input_tensor = &tensors[node->inputs->data[1]]; break; default: unreachable("Unsupported ML operation type"); @@ -175,40 +181,35 @@ dump_graph(struct pipe_tensor *tensors, unsigned tensor_count, struct pipe_ml_op } teflon_debug("\n"); - teflon_debug("%3s %-6s %3s %3s %s\n", "idx", "type", "in", "out", "operation type-specific"); + teflon_debug("%3s %-6s %25s %25s %s\n", "idx", "type", "inputs", "outputs", "operation type-specific"); teflon_debug("================================================================================================\n"); for (int i = 0; i < operation_count; i++) { + teflon_debug("%3d ", i); + switch(operations[i].type) { - case PIPE_ML_OPERATION_TYPE_ADD: - teflon_debug("%3d %-6s %3d %3d in: %d", - i, - "ADD", - operations[i].input_tensor->index, - operations[i].output_tensor->index, - operations[i].add.input_tensor->index); - break; - case PIPE_ML_OPERATION_TYPE_CONVOLUTION: - teflon_debug("%3d %-6s %3d %3d w: %d b: %d stride: %d pad: %s", - i, - operations[i].conv.depthwise ? "DWCONV" : "CONV", - operations[i].input_tensor->index, - operations[i].output_tensor->index, - operations[i].conv.weight_tensor->index, - operations[i].conv.bias_tensor->index, - operations[i].conv.stride_x, - operations[i].conv.padding_same ? "SAME" : "VALID"); - break; - case PIPE_ML_OPERATION_TYPE_POOLING: - teflon_debug("%3d %-6s %3d %3d filter: %dx%d stride: %d pad: %s", - i, - "POOL", - operations[i].input_tensor->index, - operations[i].output_tensor->index, - operations[i].pooling.filter_height, - operations[i].pooling.filter_width, - operations[i].pooling.stride_x, - operations[i].pooling.padding_same ? "SAME" : "VALID"); - break; + case PIPE_ML_OPERATION_TYPE_ADD: + teflon_debug("%-6s ", "ADD"); + break; + case PIPE_ML_OPERATION_TYPE_CONVOLUTION: + teflon_debug("%-6s ", operations[i].conv.depthwise ? "DWCONV" : "CONV"); + break; + case PIPE_ML_OPERATION_TYPE_POOLING: + teflon_debug("%-6s ", "POOL"); + break; + } + + for (unsigned j = 0; j < operations[i].input_count; j++) { + teflon_debug("%d", operations[i].input_tensors[j]->index); + if (j < operations[i].input_count - 1) + teflon_debug(","); + } + + teflon_debug(" "); + + for (unsigned j = 0; j < operations[i].output_count; j++) { + teflon_debug("%d", operations[i].output_tensors[j]->index); + if (j < operations[i].output_count - 1) + teflon_debug(","); } teflon_debug("\n"); @@ -325,19 +326,18 @@ partition_invoke(TfLiteContext *tf_context, TfLiteNode *node) start = (long)time.tv_sec * 1000 + (long)time.tv_nsec / 1000000; } - struct pipe_tensor input = {0}; - /* FIXME: Support mutiple inputs */ - fill_tensor(delegate, tf_context, &input, tsubgraph->input_tensors[0]); - context->ml_subgraph_invoke(context, subgraph, &input); + void **buffers = malloc(tsubgraph->input_count * sizeof(*buffers)); + for (unsigned i = 0; i < tsubgraph->input_count; i++) + buffers[i] = tf_context->tensors[tsubgraph->input_tensors[i]].data.data; + context->ml_subgraph_invoke(context, subgraph, tsubgraph->input_count, tsubgraph->input_tensors, buffers); + free(buffers); - void **buffers = malloc(tsubgraph->output_count * sizeof(*buffers)); + buffers = malloc(tsubgraph->output_count * sizeof(*buffers)); for (unsigned i = 0; i < tsubgraph->output_count; i++) buffers[i] = tf_context->tensors[tsubgraph->output_tensors[i]].data.data; context->ml_subgraph_read_output(context, subgraph, tsubgraph->output_count, tsubgraph->output_tensors, buffers); free(buffers); - pipe_resource_reference(&input.resource, NULL); - if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) { struct timespec time; clock_gettime(CLOCK_MONOTONIC, &time); diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 7d1be91813c..f1159873571 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -1258,11 +1258,15 @@ struct pipe_context { * * \param ctx pipe context * \param subgraph previously-compiled subgraph - * \param input tensor to use as the input + * \param inputs_count number of input tensors to copy in + * \param input_idxs array with the indices of input tensors + * \param inputs array of buffers to copy the tensor data from */ void (*ml_subgraph_invoke)(struct pipe_context *context, struct pipe_ml_subgraph *subgraph, - struct pipe_tensor *input); + unsigned inputs_count, + unsigned input_idxs[], + void *inputs[]); /** * After a ML subgraph has been invoked, copy the contents of the output diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 25d36f01ced..01182a59246 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -1066,12 +1066,14 @@ struct pipe_ml_operation /** * Tensor used as input. */ - struct pipe_tensor *input_tensor; + struct pipe_tensor **input_tensors; + unsigned input_count; /** * Tensor used as output. */ - struct pipe_tensor *output_tensor; + struct pipe_tensor **output_tensors; + unsigned output_count; union { struct { @@ -1135,12 +1137,6 @@ struct pipe_ml_operation */ bool padding_same; } pooling; - struct { - /** - * Additional input tensor, to be added to the other one. - */ - struct pipe_tensor *input_tensor; - } add; }; };