ethosu: Handle per-channel zero_points

fill_weights subtracted a single zero_point from all weights which
did not handle models with per-channel zero_points. Use the
per-channel zero_point for each output channel when available.

Also decouple the zero_points copy from the scales copy in the lower
pass so they are handled independently.

Suggested-by: Tomeu Vizoso <tomeu@tomeuvizoso.net>
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39594>
This commit is contained in:
Anders Roxell 2026-02-18 13:26:45 +01:00 committed by Marge Bot
parent 63c028b5e0
commit e27ba5b437
2 changed files with 23 additions and 11 deletions

View file

@ -76,11 +76,23 @@ fill_weights(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio
uint8_t *input_weights_8 = pipe_buffer_map(subgraph->base.context, weight_rsrc,
PIPE_MAP_READ, &transfer_in);
int16_t *input_weights = malloc(pipe_buffer_size(weight_rsrc) * sizeof(*input_weights));
for (int i = 0; i < pipe_buffer_size(weight_rsrc); i++) {
unsigned num_weights = pipe_buffer_size(weight_rsrc);
unsigned output_channels = operation->ofm.shape.depth;
unsigned oc_stride = output_channels > 0 ? num_weights / output_channels : num_weights;
for (unsigned i = 0; i < num_weights; i++) {
int zp;
if (operation->kernel.zero_points) {
unsigned ch = operation->kernel.depthwise ? i % output_channels : i / oc_stride;
zp = operation->kernel.zero_points[ch];
} else {
zp = operation->kernel.zero_point;
}
if (operation->kernel.is_signed)
input_weights[i] = (int8_t)input_weights_8[i] - operation->kernel.zero_point;
input_weights[i] = (int8_t)input_weights_8[i] - zp;
else
input_weights[i] = input_weights_8[i] - operation->kernel.zero_point;
input_weights[i] = input_weights_8[i] - zp;
}
pipe_buffer_unmap(subgraph->base.context, transfer_in);

View file

@ -141,19 +141,19 @@ ethosu_lower_convolution(struct ethosu_subgraph *subgraph,
/* Per-channel quantization support */
struct pipe_tensor *weight = poperation->conv.weight_tensor;
unsigned num_channels = poperation->output_tensors[0]->dims[3];
if (weight->scales != NULL) {
unsigned num_channels = poperation->output_tensors[0]->dims[3];
operation->kernel.scales = malloc(num_channels * sizeof(float));
memcpy(operation->kernel.scales, weight->scales, num_channels * sizeof(float));
if (weight->zero_points != NULL) {
operation->kernel.zero_points = malloc(num_channels * sizeof(int));
memcpy(operation->kernel.zero_points, weight->zero_points, num_channels * sizeof(int));
} else {
operation->kernel.zero_points = NULL;
}
} else {
operation->kernel.scales = NULL;
}
if (weight->zero_points != NULL) {
operation->kernel.zero_points = malloc(num_channels * sizeof(int));
memcpy(operation->kernel.zero_points, weight->zero_points, num_channels * sizeof(int));
} else {
operation->kernel.zero_points = NULL;
}