ethosu: Add support for per-channel quantization

For those models with coefficients that have different quantization
parameters for each channel.

The NPU can handle per-channel scales as can be seen in
fill_scale_and_biases(), which already iterates per output channel.

Activation tensors (input/output) don't have per-channel quantization.

- Add scales/zero_points arrays to ethosu_kernel struct
- Copy per-channel scales from weight tensor in lower pass
- Use per-channel scale when computing conv_scale in coefs
- Allow per-channel quantization in operation_supported check

Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39594>
This commit is contained in:
Anders Roxell 2026-01-15 15:50:18 +01:00 committed by Marge Bot
parent 0887e6d89f
commit 63c028b5e0
4 changed files with 33 additions and 24 deletions

View file

@ -22,7 +22,9 @@ fill_scale_and_biases(struct ethosu_subgraph *subgraph, struct ethosu_operation
for (unsigned i = 0; i < operation->ofm.shape.depth; i++) {
uint64_t bias = biases[i];
double conv_scale = ((double)operation->ifm.scale * (double)operation->kernel.scale) / (double)operation->ofm.scale;
double kernel_scale = (operation->kernel.scales != NULL) ?
operation->kernel.scales[i] : operation->kernel.scale;
double conv_scale = ((double)operation->ifm.scale * kernel_scale) / (double)operation->ofm.scale;
uint32_t shift;
int scale = ethosu_quantize_scale(conv_scale, &shift);

View file

@ -125,8 +125,6 @@ ethosu_lower_convolution(struct ethosu_subgraph *subgraph,
operation->type = ETHOSU_OPERATION_TYPE_CONVOLUTION;
operation->conv.depthwise = is_depthwise(poperation);
// operation->padding_same = poperation->conv.padding_same;
// operation->stride = poperation->conv.stride_x;
set_feature_maps(input_tensor, poperation->output_tensors[0], operation);
@ -141,6 +139,24 @@ ethosu_lower_convolution(struct ethosu_subgraph *subgraph,
operation->kernel.zero_point = poperation->conv.weight_tensor->zero_point;
operation->kernel.is_signed = poperation->conv.weight_tensor->is_signed;
/* Per-channel quantization support */
struct pipe_tensor *weight = poperation->conv.weight_tensor;
if (weight->scales != NULL) {
unsigned num_channels = poperation->output_tensors[0]->dims[3];
operation->kernel.scales = malloc(num_channels * sizeof(float));
memcpy(operation->kernel.scales, weight->scales, num_channels * sizeof(float));
if (weight->zero_points != NULL) {
operation->kernel.zero_points = malloc(num_channels * sizeof(int));
memcpy(operation->kernel.zero_points, weight->zero_points, num_channels * sizeof(int));
} else {
operation->kernel.zero_points = NULL;
}
} else {
operation->kernel.scales = NULL;
operation->kernel.zero_points = NULL;
}
operation->conv.part_kernel_first = ethosu_is_part_kernel_first(operation);
if (poperation->conv.padding_same) {

View file

@ -137,16 +137,6 @@ ethosu_quantize_scale(double scale, uint32_t *shift)
return quantized_scale;
}
static bool
tensor_quantization_supported(struct pipe_tensor *tensor)
{
/*
* Per-axis quantization not supported, for details see:
* https://ai.google.dev/edge/litert/models/quantization_spec#per-axis_vs_per-tensor
*/
return tensor->scales == NULL && tensor->zero_points == NULL;
}
bool
ethosu_ml_operation_supported(struct pipe_context *pcontext,
const struct pipe_ml_operation *operation)
@ -155,17 +145,10 @@ ethosu_ml_operation_supported(struct pipe_context *pcontext,
switch (operation->type) {
case PIPE_ML_OPERATION_TYPE_CONVOLUTION: {
struct pipe_tensor *input_tensor = operation->input_tensors[0];
struct pipe_tensor *weight_tensor = operation->conv.weight_tensor;
struct pipe_tensor *bias_tensor = operation->conv.bias_tensor;
struct pipe_tensor *output_tensor = operation->output_tensors[0];
// Dilation and per-axis quantization not yet implemented
if (tensor_quantization_supported(input_tensor) &&
tensor_quantization_supported(weight_tensor) &&
tensor_quantization_supported(bias_tensor) &&
tensor_quantization_supported(output_tensor) &&
operation->conv.dilation_width_factor == 1 &&
/*
* Dilation is not yet implemented.
*/
if (operation->conv.dilation_width_factor == 1 &&
operation->conv.dilation_height_factor == 1)
supported = true;
@ -356,7 +339,12 @@ ethosu_ml_subgraph_destroy(struct pipe_context *pcontext,
ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg);
assert(ret >= 0);
util_dynarray_foreach (&subgraph->operations, struct ethosu_operation, operation) {
free(operation->kernel.scales);
free(operation->kernel.zero_points);
}
util_dynarray_fini(&subgraph->operations);
util_dynarray_fini(&subgraph->tensors);
free(subgraph);

View file

@ -82,6 +82,9 @@ struct ethosu_kernel {
bool is_signed;
unsigned zero_point;
float scale;
/* Per-channel quantization (NULL for per-tensor) */
float *scales;
int *zero_points;
};
struct ethosu_padding {