diff --git a/src/gallium/drivers/ethosu/ethosu_coefs.c b/src/gallium/drivers/ethosu/ethosu_coefs.c index 0fed9e4a929..a00e3939cef 100644 --- a/src/gallium/drivers/ethosu/ethosu_coefs.c +++ b/src/gallium/drivers/ethosu/ethosu_coefs.c @@ -22,7 +22,9 @@ fill_scale_and_biases(struct ethosu_subgraph *subgraph, struct ethosu_operation for (unsigned i = 0; i < operation->ofm.shape.depth; i++) { uint64_t bias = biases[i]; - double conv_scale = ((double)operation->ifm.scale * (double)operation->kernel.scale) / (double)operation->ofm.scale; + double kernel_scale = (operation->kernel.scales != NULL) ? + operation->kernel.scales[i] : operation->kernel.scale; + double conv_scale = ((double)operation->ifm.scale * kernel_scale) / (double)operation->ofm.scale; uint32_t shift; int scale = ethosu_quantize_scale(conv_scale, &shift); diff --git a/src/gallium/drivers/ethosu/ethosu_lower.c b/src/gallium/drivers/ethosu/ethosu_lower.c index 86111a191bc..5b9469a5541 100644 --- a/src/gallium/drivers/ethosu/ethosu_lower.c +++ b/src/gallium/drivers/ethosu/ethosu_lower.c @@ -125,8 +125,6 @@ ethosu_lower_convolution(struct ethosu_subgraph *subgraph, operation->type = ETHOSU_OPERATION_TYPE_CONVOLUTION; operation->conv.depthwise = is_depthwise(poperation); - // operation->padding_same = poperation->conv.padding_same; - // operation->stride = poperation->conv.stride_x; set_feature_maps(input_tensor, poperation->output_tensors[0], operation); @@ -141,6 +139,24 @@ ethosu_lower_convolution(struct ethosu_subgraph *subgraph, operation->kernel.zero_point = poperation->conv.weight_tensor->zero_point; operation->kernel.is_signed = poperation->conv.weight_tensor->is_signed; + /* Per-channel quantization support */ + struct pipe_tensor *weight = poperation->conv.weight_tensor; + if (weight->scales != NULL) { + unsigned num_channels = poperation->output_tensors[0]->dims[3]; + operation->kernel.scales = malloc(num_channels * sizeof(float)); + memcpy(operation->kernel.scales, weight->scales, num_channels * sizeof(float)); + + if (weight->zero_points != NULL) { + operation->kernel.zero_points = malloc(num_channels * sizeof(int)); + memcpy(operation->kernel.zero_points, weight->zero_points, num_channels * sizeof(int)); + } else { + operation->kernel.zero_points = NULL; + } + } else { + operation->kernel.scales = NULL; + operation->kernel.zero_points = NULL; + } + operation->conv.part_kernel_first = ethosu_is_part_kernel_first(operation); if (poperation->conv.padding_same) { diff --git a/src/gallium/drivers/ethosu/ethosu_ml.c b/src/gallium/drivers/ethosu/ethosu_ml.c index 8230bdc7661..32d0ff08250 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.c +++ b/src/gallium/drivers/ethosu/ethosu_ml.c @@ -137,16 +137,6 @@ ethosu_quantize_scale(double scale, uint32_t *shift) return quantized_scale; } -static bool -tensor_quantization_supported(struct pipe_tensor *tensor) -{ - /* - * Per-axis quantization not supported, for details see: - * https://ai.google.dev/edge/litert/models/quantization_spec#per-axis_vs_per-tensor - */ - return tensor->scales == NULL && tensor->zero_points == NULL; -} - bool ethosu_ml_operation_supported(struct pipe_context *pcontext, const struct pipe_ml_operation *operation) @@ -155,17 +145,10 @@ ethosu_ml_operation_supported(struct pipe_context *pcontext, switch (operation->type) { case PIPE_ML_OPERATION_TYPE_CONVOLUTION: { - struct pipe_tensor *input_tensor = operation->input_tensors[0]; - struct pipe_tensor *weight_tensor = operation->conv.weight_tensor; - struct pipe_tensor *bias_tensor = operation->conv.bias_tensor; - struct pipe_tensor *output_tensor = operation->output_tensors[0]; - - // Dilation and per-axis quantization not yet implemented - if (tensor_quantization_supported(input_tensor) && - tensor_quantization_supported(weight_tensor) && - tensor_quantization_supported(bias_tensor) && - tensor_quantization_supported(output_tensor) && - operation->conv.dilation_width_factor == 1 && + /* + * Dilation is not yet implemented. + */ + if (operation->conv.dilation_width_factor == 1 && operation->conv.dilation_height_factor == 1) supported = true; @@ -356,7 +339,12 @@ ethosu_ml_subgraph_destroy(struct pipe_context *pcontext, ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg); assert(ret >= 0); + util_dynarray_foreach (&subgraph->operations, struct ethosu_operation, operation) { + free(operation->kernel.scales); + free(operation->kernel.zero_points); + } util_dynarray_fini(&subgraph->operations); + util_dynarray_fini(&subgraph->tensors); free(subgraph); diff --git a/src/gallium/drivers/ethosu/ethosu_ml.h b/src/gallium/drivers/ethosu/ethosu_ml.h index 9dc9bbe9869..1ce7ce7ac5c 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.h +++ b/src/gallium/drivers/ethosu/ethosu_ml.h @@ -82,6 +82,9 @@ struct ethosu_kernel { bool is_signed; unsigned zero_point; float scale; + /* Per-channel quantization (NULL for per-tensor) */ + float *scales; + int *zero_points; }; struct ethosu_padding {