From ed2c19a411fa8668212b3a7658352aeaa773fc94 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Wed, 22 Apr 2026 15:26:57 -0500
Subject: [PATCH] ethosu: Store ethosu_tensor struct ptr in feature map

Some of the tensor info is needed at various points during lowering.
Instead of storing the tensor index and looking it up every time, store
a point to the tensor struct instead.

Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39975>
---
 src/gallium/drivers/ethosu/ethosu_cmd.c   | 16 ++---
 src/gallium/drivers/ethosu/ethosu_lower.c | 74 ++++++++++-------------
 src/gallium/drivers/ethosu/ethosu_ml.h    |  4 +-
 3 files changed, 42 insertions(+), 52 deletions(-)

diff --git a/src/gallium/drivers/ethosu/ethosu_cmd.c b/src/gallium/drivers/ethosu/ethosu_cmd.c
index 8f155a817f2..d092603cd96 100644
--- a/src/gallium/drivers/ethosu/ethosu_cmd.c
+++ b/src/gallium/drivers/ethosu/ethosu_cmd.c
@@ -139,7 +139,7 @@ emit_strides(
 {
    unsigned elem_size = 1;
    unsigned tensor_x, tensor_y, tensor_c;
-   struct ethosu_tensor *tensor = ethosu_find_tensor(subgraph, feature_map->tensor_idx);
+   struct ethosu_tensor *tensor = feature_map->tensor;
 
    if (tensor->layout == ETHOSU_LAYOUT_NHCWB16) {
       tensor_x = 16 * elem_size;
@@ -181,10 +181,9 @@ emit_ifm_precision(struct ethosu_subgraph *subgraph,
                    struct ethosu_feature_map *feature_map,
                    enum ethosu_op_to_scale op_to_scale, uint32_t precision_cmd)
 {
-   struct ethosu_tensor *tensor = ethosu_find_tensor(subgraph, feature_map->tensor_idx);
    unsigned prec = 0;
 
-   if (tensor->layout == ETHOSU_LAYOUT_NHCWB16)
+   if (feature_map->tensor->layout == ETHOSU_LAYOUT_NHCWB16)
       prec |= NPU_SET_IFM_PRECISION_FORMAT(1);
 
    prec |= NPU_SET_IFM_PRECISION_PRECISION(feature_map->precision);
@@ -238,10 +237,9 @@ emit_ofm(struct ethosu_subgraph *subgraph, struct ethosu_feature_map *feature_ma
 static void
 emit_ofm_precision(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
 {
-   struct ethosu_tensor *tensor = ethosu_find_tensor(subgraph, operation->ofm.tensor_idx);
    unsigned prec = 0;
 
-   if (tensor->layout == ETHOSU_LAYOUT_NHCWB16)
+   if (operation->ofm.tensor->layout == ETHOSU_LAYOUT_NHCWB16)
       prec |= NPU_SET_OFM_PRECISION_FORMAT(1);
 
    prec |= NPU_SET_OFM_PRECISION_PRECISION(operation->ofm.precision);
@@ -534,13 +532,12 @@ emit_ifm2_precision(struct ethosu_subgraph *subgraph,
                     struct ethosu_operation *operation,
                     bool has_scalar)
 {
-   struct ethosu_tensor *tensor = ethosu_find_tensor(subgraph, operation->ifm2.tensor_idx);
    unsigned prec = 0;
 
    prec |= NPU_SET_IFM2_PRECISION_ACTIVATION_TYPE(operation->ifm2.is_signed);
    prec |= NPU_SET_IFM2_PRECISION_ACTIVATION_PRECISION(operation->ifm2.precision);
 
-   if (tensor->layout == ETHOSU_LAYOUT_NHCWB16)
+   if (operation->ifm2.tensor->layout == ETHOSU_LAYOUT_NHCWB16)
       prec |= NPU_SET_IFM2_PRECISION_ACTIVATION_FORMAT(1);
 
    /* Vela: scalar → NONE(3), non-scalar → TILE2X2(0) */
@@ -1087,10 +1084,9 @@ calc_blockdep(struct ethosu_subgraph *subgraph, struct ethosu_operation *prev_op
 
    /* Check if previous OFM matches current IFM (same tensor) */
    int ifm_index = 0;
-   if (operation->ifm2.tensor_idx != 0 &&
-       operation->ifm2.tensor_idx == prev_op->ofm.tensor_idx) {
+   if (operation->ifm2.tensor == prev_op->ofm.tensor) {
       ifm_index = 1;
-   } else if (operation->ifm.tensor_idx != prev_op->ofm.tensor_idx) {
+   } else if (operation->ifm.tensor != prev_op->ofm.tensor) {
       /* Previous operation doesn't produce current operation's IFM */
       return device->max_concurrent_blocks;
    }
diff --git a/src/gallium/drivers/ethosu/ethosu_lower.c b/src/gallium/drivers/ethosu/ethosu_lower.c
index e9441cbc915..7ab313aadab 100644
--- a/src/gallium/drivers/ethosu/ethosu_lower.c
+++ b/src/gallium/drivers/ethosu/ethosu_lower.c
@@ -31,27 +31,28 @@ needed_total_padding(int input_size, int stride, int filter_size)
 }
 
 static void
-set_feature_maps(struct pipe_tensor *input_tensor,
+set_feature_map(struct ethosu_subgraph *subgraph,
+                struct pipe_tensor *tensor,
+                struct ethosu_feature_map *fm)
+{
+   fm->tensor = ethosu_find_tensor(subgraph, tensor->index);
+   fm->shape.height = tensor->dims[1];
+   fm->shape.width = tensor->dims[2];
+   fm->shape.depth = tensor->dims[3];
+   fm->zero_point = tensor->zero_point;
+   fm->scale = tensor->scale;
+   fm->is_signed = tensor->is_signed;
+   fm->precision = log2(tensor->type_size);
+}
+
+static void
+set_feature_maps(struct ethosu_subgraph *subgraph,
+                 struct pipe_tensor *input_tensor,
                  struct pipe_tensor *output_tensor,
                  struct ethosu_operation *operation)
 {
-   operation->ifm.tensor_idx = input_tensor->index;
-   operation->ifm.shape.height = input_tensor->dims[1];
-   operation->ifm.shape.width = input_tensor->dims[2];
-   operation->ifm.shape.depth = input_tensor->dims[3];
-   operation->ifm.zero_point = input_tensor->zero_point;
-   operation->ifm.scale = input_tensor->scale;
-   operation->ifm.is_signed = input_tensor->is_signed;
-   operation->ifm.precision = log2(input_tensor->type_size);
-
-   operation->ofm.tensor_idx = output_tensor->index;
-   operation->ofm.shape.height = output_tensor->dims[1];
-   operation->ofm.shape.width = output_tensor->dims[2];
-   operation->ofm.shape.depth = output_tensor->dims[3];
-   operation->ofm.zero_point = output_tensor->zero_point;
-   operation->ofm.scale = output_tensor->scale;
-   operation->ofm.is_signed = output_tensor->is_signed;
-   operation->ofm.precision = log2(output_tensor->type_size);
+   set_feature_map(subgraph, input_tensor, &operation->ifm);
+   set_feature_map(subgraph, output_tensor, &operation->ofm);
 }
 
 static const struct pipe_ml_operation *
@@ -70,9 +71,8 @@ ethosu_find_first_consumer(const struct pipe_ml_operation *poperations,
 }
 
 static unsigned
-ethosu_allocate_feature_map(struct ethosu_subgraph *subgraph, unsigned tensor_idx)
+ethosu_allocate_feature_map(struct ethosu_subgraph *subgraph, struct ethosu_tensor *tensor)
 {
-   struct ethosu_tensor *tensor = ethosu_find_tensor(subgraph, tensor_idx);
    unsigned size;
 
    if (tensor->layout == ETHOSU_LAYOUT_NHWC) {
@@ -100,12 +100,12 @@ ethosu_allocate_feature_map(struct ethosu_subgraph *subgraph, unsigned tensor_id
 static void
 allocate_feature_maps(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
 {
-   operation->ofm.tiles.addresses[0] = ethosu_allocate_feature_map(subgraph, operation->ofm.tensor_idx);
+   operation->ofm.tiles.addresses[0] = ethosu_allocate_feature_map(subgraph, operation->ofm.tensor);
    operation->ofm.tiles.height_0 = operation->ofm.shape.height;
    operation->ofm.tiles.height_1 = operation->ofm.shape.height;
    operation->ofm.tiles.width_0 = operation->ofm.shape.width;
 
-   operation->ifm.tiles.addresses[0] = ethosu_allocate_feature_map(subgraph, operation->ifm.tensor_idx);
+   operation->ifm.tiles.addresses[0] = ethosu_allocate_feature_map(subgraph, operation->ifm.tensor);
    operation->ifm.tiles.height_0 = operation->ifm.shape.height;
    operation->ifm.tiles.height_1 = operation->ifm.shape.height;
    operation->ifm.tiles.width_0 = operation->ifm.shape.width;
@@ -152,7 +152,7 @@ ethosu_lower_convolution(struct ethosu_subgraph *subgraph,
 
    operation->conv.depthwise = is_depthwise(poperation);
 
-   set_feature_maps(input_tensor, poperation->output_tensors[0], operation);
+   set_feature_maps(subgraph, input_tensor, poperation->output_tensors[0], operation);
 
    operation->kernel.height = poperation->conv.weight_tensor->dims[1];
    operation->kernel.width = poperation->conv.weight_tensor->dims[2];
@@ -217,7 +217,7 @@ ethosu_lower_pooling(struct ethosu_subgraph *subgraph,
       assert(0 && "Unsupported pooling type");
    }
 
-   set_feature_maps(poperation->input_tensors[0], poperation->output_tensors[0], operation);
+   set_feature_maps(subgraph, poperation->input_tensors[0], poperation->output_tensors[0], operation);
 
    operation->kernel.height = poperation->pooling.filter_height;
    operation->kernel.width = poperation->pooling.filter_width;
@@ -247,16 +247,14 @@ ethosu_lower_concatenation(struct ethosu_subgraph *subgraph,
    } else
       operation->pooling.type = ETHOSU_POOLING_TYPE_SUM;
 
-   set_feature_maps(poperation->input_tensors[input_idx], poperation->output_tensors[0], operation);
+   set_feature_maps(subgraph, poperation->input_tensors[input_idx], poperation->output_tensors[0], operation);
    operation->ofm.shape.depth = operation->ifm.shape.depth;
 
    allocate_feature_maps(subgraph, operation);
    for (unsigned i = 0; i < input_idx; i++) {
-      struct ethosu_tensor *tensor = ethosu_find_tensor(subgraph, operation->ofm.tensor_idx);
-
-      if (tensor->layout == ETHOSU_LAYOUT_NHWC)
+      if (operation->ofm.tensor->layout == ETHOSU_LAYOUT_NHWC)
          operation->ofm.tiles.addresses[0] += poperation->input_tensors[i]->dims[3];
-      else if (tensor->layout == ETHOSU_LAYOUT_NHCWB16)
+      else if (operation->ofm.tensor->layout == ETHOSU_LAYOUT_NHCWB16)
          operation->ofm.tiles.addresses[0] += poperation->input_tensors[i]->dims[2] * align(poperation->input_tensors[i]->dims[3], 16);
       else
          assert(0 && "Unsupported layout");
@@ -273,7 +271,7 @@ ethosu_lower_resize(struct ethosu_subgraph *subgraph,
    operation->type = ETHOSU_OPERATION_TYPE_POOLING;
    operation->pooling.type = ETHOSU_POOLING_TYPE_AVG;
 
-   set_feature_maps(poperation->input_tensors[0], poperation->output_tensors[0], operation);
+   set_feature_maps(subgraph, poperation->input_tensors[0], poperation->output_tensors[0], operation);
 
    operation->upscale = ETHOSU_UPSCALE_NEAREST;
 
@@ -289,7 +287,7 @@ ethosu_lower_strided_slice(struct ethosu_subgraph *subgraph,
    operation->type = ETHOSU_OPERATION_TYPE_POOLING;
    operation->pooling.type = ETHOSU_POOLING_TYPE_AVG;
 
-   set_feature_maps(poperation->input_tensors[0], poperation->output_tensors[0], operation);
+   set_feature_maps(subgraph, poperation->input_tensors[0], poperation->output_tensors[0], operation);
    operation->ifm.shape = operation->ofm.shape;
 
    allocate_feature_maps(subgraph, operation);
@@ -340,16 +338,10 @@ ethosu_lower_add(struct ethosu_subgraph *subgraph,
       operation->eltwise.ifm_reversed = true;
    }
 
-   set_feature_maps(poperation->input_tensors[ifm_idx], poperation->output_tensors[0], operation);
+   set_feature_maps(subgraph, poperation->input_tensors[ifm_idx], poperation->output_tensors[0], operation);
+
+   set_feature_map(subgraph, poperation->input_tensors[ifm2_idx], &operation->ifm2);
 
-   operation->ifm2.tensor_idx = poperation->input_tensors[ifm2_idx]->index;
-   operation->ifm2.shape.height = poperation->input_tensors[ifm2_idx]->dims[1];
-   operation->ifm2.shape.width = poperation->input_tensors[ifm2_idx]->dims[2];
-   operation->ifm2.shape.depth = poperation->input_tensors[ifm2_idx]->dims[3];
-   operation->ifm2.zero_point = poperation->input_tensors[ifm2_idx]->zero_point;
-   operation->ifm2.scale = poperation->input_tensors[ifm2_idx]->scale;
-   operation->ifm2.is_signed = poperation->input_tensors[ifm2_idx]->is_signed;
-   operation->ifm2.precision = log2(poperation->input_tensors[ifm2_idx]->type_size);
    if (poperation->input_tensors[ifm2_idx]->data &&
        operation->ifm2.shape.width == 1 &&
        operation->ifm2.shape.height == 1 &&
@@ -361,7 +353,7 @@ ethosu_lower_add(struct ethosu_subgraph *subgraph,
 
    allocate_feature_maps(subgraph, operation);
 
-   operation->ifm2.tiles.addresses[0] = ethosu_allocate_feature_map(subgraph, operation->ifm2.tensor_idx);
+   operation->ifm2.tiles.addresses[0] = ethosu_allocate_feature_map(subgraph, operation->ifm2.tensor);
    operation->ifm2.tiles.height_0 = operation->ifm2.shape.height;
    operation->ifm2.tiles.height_1 = operation->ifm2.shape.height;
    operation->ifm2.tiles.width_0 = operation->ifm2.shape.width;
diff --git a/src/gallium/drivers/ethosu/ethosu_ml.h b/src/gallium/drivers/ethosu/ethosu_ml.h
index ecd0cd64296..e4dca781211 100644
--- a/src/gallium/drivers/ethosu/ethosu_ml.h
+++ b/src/gallium/drivers/ethosu/ethosu_ml.h
@@ -66,8 +66,10 @@ enum ethosu_upscale_mode {
    ETHOSU_UPSCALE_ZEROS = 2,
 };
 
+struct ethosu_tensor;
+
 struct ethosu_feature_map {
-   unsigned tensor_idx;
+   struct ethosu_tensor *tensor;
    struct ethosu_block shape;
    bool is_signed;
    uint8_t precision;