ethosu: Fix concatenation OFM scaling

Some pooling operations like concatenation are NOPs requiring different scaling calculations. Signed-off-by: Rob Herring (Arm) <robh@kernel.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39975>
2026-05-07 11:28:05 +02:00 · 2026-04-14 09:17:20 -05:00 · 2026-04-14 09:17:20 -05:00 · aaaca26fd2
commit aaaca26fd2
parent d772f36741
5 changed files with 55 additions and 38 deletions
--- a/src/gallium/drivers/ethosu/ethosu_cmd.c
+++ b/src/gallium/drivers/ethosu/ethosu_cmd.c
@ -472,38 +472,45 @@ emit_pooling(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio
   emit_common(subgraph, operation, false);
-   switch (operation->pooling.type) {
+   if (operation->pooling.nop) {
-   case ETHOSU_POOLING_TYPE_MAX: {
+      scale = ethosu_quantize_scale(
-      if (!ethosu_ml_device(subgraph->base.device)->is_u65) {
+         operation->ifm.scale / operation->ofm.scale,
-         EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_ROUND_MODE(1), 1);
+         &scale_shift, true);
         break;
      } else
         FALLTHROUGH;
   }
   case ETHOSU_POOLING_TYPE_AVG: {
      scale = pooling_emit_ofm_scaling(
         operation->ifm.scale,
         operation->ofm.scale,
         operation->kernel.height,
         operation->kernel.width,
         &scale_shift);
      EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_SHIFT(scale_shift), scale);
-      break;
+   } else {
-   }
+      switch (operation->pooling.type) {
-   case ETHOSU_POOLING_TYPE_SUM: {
+      case ETHOSU_POOLING_TYPE_MAX: {
-      scale = sum_emit_ofm_scaling(
+         if (!ethosu_ml_device(subgraph->base.device)->is_u65) {
-         operation->ifm.scale,
+            EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_ROUND_MODE(1), 1);
-         operation->ofm.scale,
+            break;
-         operation->kernel.height,
+         } else
-         operation->kernel.width,
+            FALLTHROUGH;
-         &scale_shift);
+      }
      case ETHOSU_POOLING_TYPE_AVG: {
         scale = pooling_emit_ofm_scaling(
            operation->ifm.scale,
            operation->ofm.scale,
            operation->kernel.height,
            operation->kernel.width,
            &scale_shift);
-      EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_SHIFT(scale_shift) | NPU_SET_OFM_SCALE_ROUND_MODE(1), scale);
+         EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_SHIFT(scale_shift), scale);
-      break;
+         break;
-   }
+      }
-   default:
+      case ETHOSU_POOLING_TYPE_SUM: {
-      UNREACHABLE("Invalid pooling type");
+         scale = sum_emit_ofm_scaling(
            operation->ifm.scale,
            operation->ofm.scale,
            operation->kernel.height,
            operation->kernel.width,
            &scale_shift);
         EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_SHIFT(scale_shift) | NPU_SET_OFM_SCALE_ROUND_MODE(1), scale);
         break;
      }
      default:
         UNREACHABLE("Invalid pooling type");
      }
   }
   emit_block_config(subgraph, operation);
@ -658,8 +665,8 @@ eltwise_emit_ofm_scaling(
   input_rescale = min_input_scale * input_shift_val / (2.0 * max_input_scale);
   output_rescale = (2.0 * max_input_scale) / (output_scale * input_shift_val);
-   opa_scale = ethosu_quantize_scale(input_rescale, &opa_shift);
+   opa_scale = ethosu_quantize_scale(input_rescale, &opa_shift, false);
-   ofm_scale = ethosu_quantize_scale(output_rescale, &ofm_shift);
+   ofm_scale = ethosu_quantize_scale(output_rescale, &ofm_shift, false);
   if (DBG_ENABLED(ETHOSU_DBG_MSGS)) {
      fprintf(stderr, "ADD advanced scaling: ifm1_scale=%f ifm2_scale=%f ofm_scale=%f\n",
@ -711,7 +718,7 @@ simplified_elementwise_add_sub_scale(
      output_rescale_val = (2.0 * max_input_scale) / (output_scale * input_shift_val);
   }
-   *out_out_scale = ethosu_quantize_scale(output_rescale_val, out_out_shift);
+   *out_out_scale = ethosu_quantize_scale(output_rescale_val, out_out_shift, false);
 }
 /*
@ -739,8 +746,8 @@ eltwise_emit_ofm_scaling_u85(
      &input1_rescale, &input2_rescale,
      &ofm_scale, &ofm_shift);
-   opa_scale = ethosu_quantize_scale(input1_rescale, &opa_shift);
+   opa_scale = ethosu_quantize_scale(input1_rescale, &opa_shift, false);
-   opb_scale = ethosu_quantize_scale(input2_rescale, &opb_shift);
+   opb_scale = ethosu_quantize_scale(input2_rescale, &opb_shift, false);
   EMIT1(NPU_SET_OPA_SCALE,
         NPU_SET_OPA_SCALE_SHIFT(opa_shift) | NPU_SET_OPA_SCALE_DBL_RND(input_shift),
--- a/src/gallium/drivers/ethosu/ethosu_coefs.c
+++ b/src/gallium/drivers/ethosu/ethosu_coefs.c
@ -85,7 +85,7 @@ fill_scale_and_biases(struct ethosu_subgraph *subgraph, struct ethosu_operation
      }
      int32_t shift;
-      int scale = ethosu_quantize_scale(conv_scale, &shift);
+      int scale = ethosu_quantize_scale(conv_scale, &shift, false);
      uint64_t bias = biases ? biases[i] : 0;
--- a/src/gallium/drivers/ethosu/ethosu_lower.c
+++ b/src/gallium/drivers/ethosu/ethosu_lower.c
@ -264,6 +264,7 @@ ethosu_lower_concatenation(struct ethosu_subgraph *subgraph,
      operation->round_mode = ETHOSU_ROUNDING_NATURAL;
   } else
      operation->pooling.type = ETHOSU_POOLING_TYPE_SUM;
   operation->pooling.nop = true;
   set_feature_maps(subgraph, poperation->input_tensors[input_idx], poperation->output_tensors[0], operation);
   operation->ofm.shape.depth = operation->ifm.shape.depth;
--- a/src/gallium/drivers/ethosu/ethosu_ml.c
+++ b/src/gallium/drivers/ethosu/ethosu_ml.c
@ -83,12 +83,20 @@ ethosu_round_up_divide(int a, int b)
 }
 int
-ethosu_quantize_scale(double scale, int32_t *shift)
+ethosu_quantize_scale(double scale, int32_t *shift, bool reduced)
 {
   int exponent = 0;
   double significand = frexp(scale, &exponent);
   int32_t quantized_scale = round(significand * (double)(1LL << 31));
   *shift = 31 - exponent;
   if (reduced) {
      quantized_scale = (quantized_scale >> 16) + (quantized_scale >> 15 & 1);
      // make sure reduced scale does not overflow
      quantized_scale = MIN2(quantized_scale, 0x7FFF);
      *shift -= 16;
   }
   if (*shift > 63) {
      if (quantized_scale > exp2(*shift - 63)) {
         quantized_scale = quantized_scale >> (*shift - 63);
--- a/src/gallium/drivers/ethosu/ethosu_ml.h
+++ b/src/gallium/drivers/ethosu/ethosu_ml.h
@ -168,6 +168,7 @@ struct ethosu_operation {
      struct {
         enum ethosu_pooling_type type;
         bool nop;
      } pooling;
      struct {
@ -278,6 +279,6 @@ int ethosu_round_up_to_multiple(int a, int b);
 int ethosu_round_up_divide(int a, int b);
-int ethosu_quantize_scale(double scale, int32_t *shift);
+int ethosu_quantize_scale(double scale, int32_t *shift, bool reduced);
 #endif /* ETHOSU_ML_H */