mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 02:48:06 +02:00
ethosu: Add support for per-channel quantization
For those models with coefficients that have different quantization parameters for each channel. The NPU can handle per-channel scales as can be seen in fill_scale_and_biases(), which already iterates per output channel. Activation tensors (input/output) don't have per-channel quantization. - Add scales/zero_points arrays to ethosu_kernel struct - Copy per-channel scales from weight tensor in lower pass - Use per-channel scale when computing conv_scale in coefs - Allow per-channel quantization in operation_supported check Signed-off-by: Anders Roxell <anders.roxell@linaro.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39594>
This commit is contained in:
parent
0887e6d89f
commit
63c028b5e0
4 changed files with 33 additions and 24 deletions
|
|
@ -22,7 +22,9 @@ fill_scale_and_biases(struct ethosu_subgraph *subgraph, struct ethosu_operation
|
|||
|
||||
for (unsigned i = 0; i < operation->ofm.shape.depth; i++) {
|
||||
uint64_t bias = biases[i];
|
||||
double conv_scale = ((double)operation->ifm.scale * (double)operation->kernel.scale) / (double)operation->ofm.scale;
|
||||
double kernel_scale = (operation->kernel.scales != NULL) ?
|
||||
operation->kernel.scales[i] : operation->kernel.scale;
|
||||
double conv_scale = ((double)operation->ifm.scale * kernel_scale) / (double)operation->ofm.scale;
|
||||
uint32_t shift;
|
||||
int scale = ethosu_quantize_scale(conv_scale, &shift);
|
||||
|
||||
|
|
|
|||
|
|
@ -125,8 +125,6 @@ ethosu_lower_convolution(struct ethosu_subgraph *subgraph,
|
|||
operation->type = ETHOSU_OPERATION_TYPE_CONVOLUTION;
|
||||
|
||||
operation->conv.depthwise = is_depthwise(poperation);
|
||||
// operation->padding_same = poperation->conv.padding_same;
|
||||
// operation->stride = poperation->conv.stride_x;
|
||||
|
||||
set_feature_maps(input_tensor, poperation->output_tensors[0], operation);
|
||||
|
||||
|
|
@ -141,6 +139,24 @@ ethosu_lower_convolution(struct ethosu_subgraph *subgraph,
|
|||
operation->kernel.zero_point = poperation->conv.weight_tensor->zero_point;
|
||||
operation->kernel.is_signed = poperation->conv.weight_tensor->is_signed;
|
||||
|
||||
/* Per-channel quantization support */
|
||||
struct pipe_tensor *weight = poperation->conv.weight_tensor;
|
||||
if (weight->scales != NULL) {
|
||||
unsigned num_channels = poperation->output_tensors[0]->dims[3];
|
||||
operation->kernel.scales = malloc(num_channels * sizeof(float));
|
||||
memcpy(operation->kernel.scales, weight->scales, num_channels * sizeof(float));
|
||||
|
||||
if (weight->zero_points != NULL) {
|
||||
operation->kernel.zero_points = malloc(num_channels * sizeof(int));
|
||||
memcpy(operation->kernel.zero_points, weight->zero_points, num_channels * sizeof(int));
|
||||
} else {
|
||||
operation->kernel.zero_points = NULL;
|
||||
}
|
||||
} else {
|
||||
operation->kernel.scales = NULL;
|
||||
operation->kernel.zero_points = NULL;
|
||||
}
|
||||
|
||||
operation->conv.part_kernel_first = ethosu_is_part_kernel_first(operation);
|
||||
|
||||
if (poperation->conv.padding_same) {
|
||||
|
|
|
|||
|
|
@ -137,16 +137,6 @@ ethosu_quantize_scale(double scale, uint32_t *shift)
|
|||
return quantized_scale;
|
||||
}
|
||||
|
||||
static bool
|
||||
tensor_quantization_supported(struct pipe_tensor *tensor)
|
||||
{
|
||||
/*
|
||||
* Per-axis quantization not supported, for details see:
|
||||
* https://ai.google.dev/edge/litert/models/quantization_spec#per-axis_vs_per-tensor
|
||||
*/
|
||||
return tensor->scales == NULL && tensor->zero_points == NULL;
|
||||
}
|
||||
|
||||
bool
|
||||
ethosu_ml_operation_supported(struct pipe_context *pcontext,
|
||||
const struct pipe_ml_operation *operation)
|
||||
|
|
@ -155,17 +145,10 @@ ethosu_ml_operation_supported(struct pipe_context *pcontext,
|
|||
|
||||
switch (operation->type) {
|
||||
case PIPE_ML_OPERATION_TYPE_CONVOLUTION: {
|
||||
struct pipe_tensor *input_tensor = operation->input_tensors[0];
|
||||
struct pipe_tensor *weight_tensor = operation->conv.weight_tensor;
|
||||
struct pipe_tensor *bias_tensor = operation->conv.bias_tensor;
|
||||
struct pipe_tensor *output_tensor = operation->output_tensors[0];
|
||||
|
||||
// Dilation and per-axis quantization not yet implemented
|
||||
if (tensor_quantization_supported(input_tensor) &&
|
||||
tensor_quantization_supported(weight_tensor) &&
|
||||
tensor_quantization_supported(bias_tensor) &&
|
||||
tensor_quantization_supported(output_tensor) &&
|
||||
operation->conv.dilation_width_factor == 1 &&
|
||||
/*
|
||||
* Dilation is not yet implemented.
|
||||
*/
|
||||
if (operation->conv.dilation_width_factor == 1 &&
|
||||
operation->conv.dilation_height_factor == 1)
|
||||
supported = true;
|
||||
|
||||
|
|
@ -356,7 +339,12 @@ ethosu_ml_subgraph_destroy(struct pipe_context *pcontext,
|
|||
ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg);
|
||||
assert(ret >= 0);
|
||||
|
||||
util_dynarray_foreach (&subgraph->operations, struct ethosu_operation, operation) {
|
||||
free(operation->kernel.scales);
|
||||
free(operation->kernel.zero_points);
|
||||
}
|
||||
util_dynarray_fini(&subgraph->operations);
|
||||
|
||||
util_dynarray_fini(&subgraph->tensors);
|
||||
|
||||
free(subgraph);
|
||||
|
|
|
|||
|
|
@ -82,6 +82,9 @@ struct ethosu_kernel {
|
|||
bool is_signed;
|
||||
unsigned zero_point;
|
||||
float scale;
|
||||
/* Per-channel quantization (NULL for per-tensor) */
|
||||
float *scales;
|
||||
int *zero_points;
|
||||
};
|
||||
|
||||
struct ethosu_padding {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue