teflon: Reject per-axis quantization

Until a workaround for missing hardware support is implemented, stop pretending to support convolution operations on tensors with per-axis quantization. Reviewed-by: Tomeu Vizoso <tomeu@tomeuvizoso.net> Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32387>
2026-05-05 22:38:05 +02:00 · 2024-11-26 16:47:47 +01:00 · 2024-11-26 16:47:47 +01:00 · a9f0624d6b
commit a9f0624d6b
parent 0501a3b5c1
1 changed files with 35 additions and 4 deletions
--- a/src/gallium/frontends/teflon/tfl_device.c
+++ b/src/gallium/frontends/teflon/tfl_device.c
@ -390,6 +390,21 @@ partition_invoke(TfLiteContext *tf_context, TfLiteNode *node)
   return kTfLiteOk;
 }

+static bool
+tensor_quantization_supported(TfLiteTensor *tensor)
+{
+   if (tensor->quantization.type == kTfLiteAffineQuantization) {
+      TfLiteAffineQuantization *affine = (TfLiteAffineQuantization *)tensor->quantization.params;
+
+      /*
+       * Per-axis quantization not supported, for details see:
+       * https://ai.google.dev/edge/litert/models/quantization_spec#per-axis_vs_per-tensor
+       */
+      return affine->scale->size == 1 && affine->zero_point->size == 1;
+   }
+   return false;
+}
+
 static TfLiteStatus
 PrepareDelegate(TfLiteContext *context, TfLiteDelegate *delegate)
 {
@ -410,10 +425,18 @@ PrepareDelegate(TfLiteContext *context, TfLiteDelegate *delegate)

      switch(registration->builtin_code) {
         case kTfLiteBuiltinConv2d: {
+            TfLiteTensor *input_tensor = &context->tensors[node->inputs->data[0]];
+            TfLiteTensor *weight_tensor = &context->tensors[node->inputs->data[1]];
+            TfLiteTensor *bias_tensor = &context->tensors[node->inputs->data[2]];
+            TfLiteTensor *output_tensor = &context->tensors[node->outputs->data[0]];
            TfLiteConvParams* params = (TfLiteConvParams*)node->builtin_data;

-            // Dilation not yet implemented
-            if ((params->activation == kTfLiteActNone ||
+            // Dilation and per-axis quantization not yet implemented
+            if (tensor_quantization_supported(input_tensor) &&
+                tensor_quantization_supported(weight_tensor) &&
+                tensor_quantization_supported(bias_tensor) &&
+                tensor_quantization_supported(output_tensor) &&
+                (params->activation == kTfLiteActNone ||
                 params->activation == kTfLiteActRelu) &&
                (registration->version < 2 ||
                 (params->dilation_width_factor == 1 &&
@ -423,10 +446,18 @@ PrepareDelegate(TfLiteContext *context, TfLiteDelegate *delegate)
            break;
         }
         case kTfLiteBuiltinDepthwiseConv2d: {
+            TfLiteTensor *input_tensor = &context->tensors[node->inputs->data[0]];
+            TfLiteTensor *weight_tensor = &context->tensors[node->inputs->data[1]];
+            TfLiteTensor *bias_tensor = &context->tensors[node->inputs->data[2]];
+            TfLiteTensor *output_tensor = &context->tensors[node->outputs->data[0]];
            TfLiteDepthwiseConvParams* params = (TfLiteDepthwiseConvParams*)node->builtin_data;

-            // Dilation not yet implemented
-            if ((params->activation == kTfLiteActNone ||
+            // Dilation and per-axis quantization not yet implemented
+            if (tensor_quantization_supported(input_tensor) &&
+                tensor_quantization_supported(weight_tensor) &&
+                tensor_quantization_supported(bias_tensor) &&
+                tensor_quantization_supported(output_tensor) &&
+                (params->activation == kTfLiteActNone ||
                 params->activation == kTfLiteActRelu) &&
                (registration->version < 2 ||
                 (params->dilation_width_factor == 1 &&