From f9c34a3eb03a3bb8f470c1927601b5bf1a324bba Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Tue, 24 Sep 2024 11:59:45 +0200
Subject: [PATCH] teflon: Add is_signed parameter to ml_subgraph_invoke and
 ml_subgraph_read_output

There probably is a better way to provide this information to the
gallium driver, but this allows the driver to apply conversions as
needed when writing input tensors and reading back output tensors.

Reviewed-by: Tomeu Vizoso <tomeu@tomeuvizoso.net>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31979>
---
 src/gallium/drivers/etnaviv/etnaviv_ml.c  |  6 +++--
 src/gallium/drivers/etnaviv/etnaviv_ml.h  |  5 ++--
 src/gallium/frontends/teflon/tfl_device.c | 30 ++++++++++++++++++-----
 src/gallium/include/pipe/p_context.h      |  7 ++++--
 4 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml.c b/src/gallium/drivers/etnaviv/etnaviv_ml.c
index 757f7f02c6d..4934d1afa47 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_ml.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_ml.c
@@ -512,7 +512,8 @@ close_batch(struct pipe_context *pctx)
 
 void
 etna_ml_subgraph_invoke(struct pipe_context *pctx, struct pipe_ml_subgraph *psubgraph,
-                        unsigned inputs_count, unsigned input_idxs[], void *inputs[])
+                        unsigned inputs_count, unsigned input_idxs[], void *inputs[],
+                        bool is_signed[])
 {
    struct etna_context *ctx = etna_context(pctx);
    unsigned tp_core_count = etna_ml_get_core_info(ctx)->tp_core_count;
@@ -629,7 +630,8 @@ etna_ml_subgraph_invoke(struct pipe_context *pctx, struct pipe_ml_subgraph *psub
 
 void
 etna_ml_subgraph_read_outputs(struct pipe_context *context, struct pipe_ml_subgraph *psubgraph,
-                              unsigned outputs_count, unsigned output_idxs[], void *outputs[])
+                              unsigned outputs_count, unsigned output_idxs[], void *outputs[],
+                              bool is_signed[])
 {
    struct etna_ml_subgraph *subgraph = (struct etna_ml_subgraph *)(psubgraph);
    unsigned operation_count = util_dynarray_num_elements(&subgraph->operations, struct etna_vip_instruction);
diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml.h b/src/gallium/drivers/etnaviv/etnaviv_ml.h
index 5f333cb299d..e5d2a18dfc0 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_ml.h
+++ b/src/gallium/drivers/etnaviv/etnaviv_ml.h
@@ -123,11 +123,12 @@ etna_ml_subgraph_create(struct pipe_context *context,
 
 void
 etna_ml_subgraph_invoke(struct pipe_context *pctx, struct pipe_ml_subgraph *psubgraph,
-                        unsigned inputs_count, unsigned input_idxs[], void *inputs[]);
+                        unsigned inputs_count, unsigned input_idxs[], void *inputs[], bool is_signed[]);
 
 void
 etna_ml_subgraph_read_outputs(struct pipe_context *context, struct pipe_ml_subgraph *subgraph,
-                              unsigned outputs_count, unsigned output_idxs[], void *outputs[]);
+                              unsigned outputs_count, unsigned output_idxs[], void *outputs[],
+                              bool is_signed[]);
 
 void
 etna_ml_subgraph_destroy(struct pipe_context *context, struct pipe_ml_subgraph *subgraph);
diff --git a/src/gallium/frontends/teflon/tfl_device.c b/src/gallium/frontends/teflon/tfl_device.c
index 80bd7ee5fcb..7ecf15ba694 100644
--- a/src/gallium/frontends/teflon/tfl_device.c
+++ b/src/gallium/frontends/teflon/tfl_device.c
@@ -351,16 +351,34 @@ partition_invoke(TfLiteContext *tf_context, TfLiteNode *node)
    }
 
    void **buffers = malloc(tsubgraph->input_count * sizeof(*buffers));
-   for (unsigned i = 0; i < tsubgraph->input_count; i++)
-      buffers[i] = tf_context->tensors[tsubgraph->input_tensors[i]].data.data;
-   context->ml_subgraph_invoke(context, subgraph, tsubgraph->input_count, tsubgraph->input_tensors, buffers);
+   bool *is_signed = malloc(tsubgraph->input_count * sizeof(*is_signed));
+   for (unsigned i = 0; i < tsubgraph->input_count; i++) {
+      TfLiteTensor tf_tensor = tf_context->tensors[tsubgraph->input_tensors[i]];
+
+      buffers[i] = tf_tensor.data.data;
+      is_signed[i] = !(tf_tensor.type == kTfLiteUInt8 ||
+                       tf_tensor.type == kTfLiteUInt16 ||
+                       tf_tensor.type == kTfLiteUInt32 ||
+                       tf_tensor.type == kTfLiteUInt64);
+   }
+   context->ml_subgraph_invoke(context, subgraph, tsubgraph->input_count, tsubgraph->input_tensors, buffers, is_signed);
    free(buffers);
+   free(is_signed);
 
    buffers = malloc(tsubgraph->output_count * sizeof(*buffers));
-   for (unsigned i = 0; i < tsubgraph->output_count; i++)
-      buffers[i] = tf_context->tensors[tsubgraph->output_tensors[i]].data.data;
-   context->ml_subgraph_read_output(context, subgraph, tsubgraph->output_count, tsubgraph->output_tensors, buffers);
+   is_signed = malloc(tsubgraph->output_count * sizeof(*is_signed));
+   for (unsigned i = 0; i < tsubgraph->output_count; i++) {
+      TfLiteTensor tf_tensor = tf_context->tensors[tsubgraph->output_tensors[i]];
+
+      buffers[i] = tf_tensor.data.data;
+      is_signed[i] = !(tf_tensor.type == kTfLiteUInt8 ||
+                       tf_tensor.type == kTfLiteUInt16 ||
+                       tf_tensor.type == kTfLiteUInt32 ||
+                       tf_tensor.type == kTfLiteUInt64);
+   }
+   context->ml_subgraph_read_output(context, subgraph, tsubgraph->output_count, tsubgraph->output_tensors, buffers, is_signed);
    free(buffers);
+   free(is_signed);
 
    if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) {
       struct timespec time;
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index f1159873571..b04fb80f48c 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -1261,12 +1261,13 @@ struct pipe_context {
     * \param inputs_count number of input tensors to copy in
     * \param input_idxs   array with the indices of input tensors
     * \param inputs       array of buffers to copy the tensor data from
+    * \param is_signed    per-buffer signed integer flag
     */
    void (*ml_subgraph_invoke)(struct pipe_context *context,
                               struct pipe_ml_subgraph *subgraph,
                               unsigned inputs_count,
                               unsigned input_idxs[],
-                              void *inputs[]);
+                              void *inputs[], bool is_signed[]);
 
    /**
     * After a ML subgraph has been invoked, copy the contents of the output
@@ -1277,10 +1278,12 @@ struct pipe_context {
     * \param outputs_count number of output tensors to copy out
     * \param output_idxs   array with the indices of output tensors
     * \param outputs       array of buffers to copy the tensor data to
+    * \param is_signed     per-buffer signed integer flag
     */
    void (*ml_subgraph_read_output)(struct pipe_context *context,
                                    struct pipe_ml_subgraph *subgraph,
-                                   unsigned outputs_count, unsigned output_idxs[], void *outputs[]);
+                                   unsigned outputs_count, unsigned output_idxs[],
+                                   void *outputs[], bool is_signed[]);
 
    /**
     * Release all resources allocated by the implementation of ml_subgraph_create