diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml.c b/src/gallium/drivers/etnaviv/etnaviv_ml.c index 4934d1afa47..2b29ef28ba3 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml.c @@ -542,7 +542,19 @@ etna_ml_subgraph_invoke(struct pipe_context *pctx, struct pipe_ml_subgraph *psub for (int i = 0; i < inputs_count; i++) { struct pipe_resource *res = etna_ml_get_tensor(subgraph, input_idxs[i]); - pipe_buffer_write(pctx, res, offsets[input_idxs[i]], sizes[input_idxs[i]], inputs[i]); + if (is_signed[i]) { + struct pipe_transfer *dst_transfer; + const uint8_t *src = inputs[i]; + uint8_t *dst_map; + dst_map = pipe_buffer_map_range(pctx, res, 0, sizes[input_idxs[i]], PIPE_MAP_WRITE, &dst_transfer); + assert(dst_map); + for (unsigned k = 0; k < sizes[input_idxs[i]]; k++) { + dst_map[k] = src[k] + 128; + } + pipe_buffer_unmap(pctx, dst_transfer); + } else { + pipe_buffer_write(pctx, res, offsets[input_idxs[i]], sizes[input_idxs[i]], inputs[i]); + } } unsigned i = 0; @@ -662,7 +674,22 @@ etna_ml_subgraph_read_outputs(struct pipe_context *context, struct pipe_ml_subgr for (int i = 0; i < outputs_count; i++) { struct pipe_resource *res = etna_ml_get_tensor(subgraph, output_idxs[i]); - pipe_buffer_read(context, res, 0, pipe_buffer_size(res), outputs[i]); + if (is_signed[i]) { + struct pipe_transfer *src_transfer; + uint8_t *src_map; + src_map = (uint8_t *) pipe_buffer_map_range(context, + res, + 0, pipe_buffer_size(res), + PIPE_MAP_READ, + &src_transfer); + assert(src_map); + for (unsigned k = 0; k < pipe_buffer_size(res); k++) { + ((uint8_t *)(outputs[i]))[k] = src_map[k] - 128; + } + pipe_buffer_unmap(context, src_transfer); + } else { + pipe_buffer_read(context, res, 0, pipe_buffer_size(res), outputs[i]); + } } } diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml.h b/src/gallium/drivers/etnaviv/etnaviv_ml.h index e5d2a18dfc0..3b05618f13d 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml.h +++ b/src/gallium/drivers/etnaviv/etnaviv_ml.h @@ -94,6 +94,7 @@ struct etna_operation { unsigned weight_height; uint8_t weight_zero_point; float weight_scale; + bool weight_signed; uint8_t addition_offset; diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c index 66149229ca4..858409f990d 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c @@ -199,9 +199,15 @@ pointwise_to_2x2(struct etna_ml_subgraph *subgraph, struct etna_operation *opera uint8_t *map_out = output + channel * 2 * 2 * operation->input_channels; map_out[0] = map_in[0]; - map_out[1] = operation->weight_zero_point; - map_out[2] = operation->weight_zero_point; - map_out[3] = operation->weight_zero_point; + if (operation->weight_signed) { + map_out[1] = operation->weight_zero_point - 128; + map_out[2] = operation->weight_zero_point - 128; + map_out[3] = operation->weight_zero_point - 128; + } else { + map_out[1] = operation->weight_zero_point; + map_out[2] = operation->weight_zero_point; + map_out[3] = operation->weight_zero_point; + } } pipe_resource_reference(&operation->weight_tensor, NULL); @@ -231,6 +237,8 @@ expand_depthwise(struct etna_ml_subgraph *subgraph, struct etna_operation *opera for (unsigned i = 0; i < operation->weight_width * operation->weight_height * operation->input_channels; i++) { if (i % operation->input_channels == in_depth) map_out[i] = map_in[i]; + else if (operation->weight_signed) + map_out[i] = operation->weight_zero_point - 128; else map_out[i] = operation->weight_zero_point; } @@ -380,7 +388,8 @@ strided_to_normal(struct etna_ml_subgraph *subgraph, struct etna_operation *oper output = map_resource(output_res); unsigned wdims_out[4] = {operation->output_channels, operation->weight_width, operation->weight_height, operation->input_channels}; - reshape(input, output, operation->stride, operation->weight_zero_point, wdims_in, wdims_out); + int weight_zero_point = operation->weight_signed ? (operation->weight_zero_point - 128) : operation->weight_zero_point; + reshape(input, output, operation->stride, weight_zero_point, wdims_in, wdims_out); pipe_resource_reference(&operation->weight_tensor, NULL); operation->weight_tensor = output_res; @@ -415,6 +424,25 @@ calc_pooling_first_pixel(struct etna_ml_subgraph *subgraph, return false; } +static inline uint8_t +etna_tensor_zero_point(struct pipe_tensor *tensor) +{ + if (tensor->is_signed) { + /* + * Since the hardware only supports unsigned 8-bit integers, signed + * tensors are shifted from the -128..127 range to 0..255 by adding 128 + * when uploading and subtracting 128 when downloading the tensor. + * Tensor zero point and weight coefficients have to be adapted to + * account for this. + */ + assert(tensor->zero_point >= -128 && tensor->zero_point <= 127); + return tensor->zero_point + 128; + } else { + assert(tensor->zero_point >= 0 && tensor->zero_point <= 255); + return tensor->zero_point; + } +} + void etna_ml_lower_convolution(struct etna_ml_subgraph *subgraph, const struct pipe_ml_operation *poperation, @@ -442,21 +470,22 @@ etna_ml_lower_convolution(struct etna_ml_subgraph *subgraph, operation->input_width = poperation->input_tensors[0]->dims[1]; operation->input_height = poperation->input_tensors[0]->dims[2]; operation->input_channels = poperation->input_tensors[0]->dims[3]; - operation->input_zero_point = poperation->input_tensors[0]->zero_point; + operation->input_zero_point = etna_tensor_zero_point(poperation->input_tensors[0]); operation->input_scale = poperation->input_tensors[0]->scale; operation->output_tensor = poperation->output_tensors[0]->index; operation->output_width = poperation->output_tensors[0]->dims[1]; operation->output_height = poperation->output_tensors[0]->dims[2]; operation->output_channels = poperation->output_tensors[0]->dims[3]; - operation->output_zero_point = poperation->output_tensors[0]->zero_point; + operation->output_zero_point = etna_tensor_zero_point(poperation->output_tensors[0]); operation->output_scale = poperation->output_tensors[0]->scale; pipe_resource_reference(&operation->weight_tensor, poperation->conv.weight_tensor->resource); operation->weight_width = poperation->conv.weight_tensor->dims[1]; operation->weight_height = poperation->conv.weight_tensor->dims[2]; - operation->weight_zero_point = poperation->conv.weight_tensor->zero_point; + operation->weight_zero_point = etna_tensor_zero_point(poperation->conv.weight_tensor); operation->weight_scale = poperation->conv.weight_tensor->scale; + operation->weight_signed = poperation->conv.weight_tensor->is_signed; pipe_resource_reference(&operation->bias_tensor, poperation->conv.bias_tensor->resource); @@ -544,7 +573,7 @@ etna_ml_lower_add(struct etna_ml_subgraph *subgraph, operation->input_width = poperation->input_tensors[0]->dims[1]; operation->input_height = poperation->input_tensors[0]->dims[2]; operation->input_channels = poperation->input_tensors[0]->dims[3]; - operation->input_zero_point = poperation->input_tensors[0]->zero_point; + operation->input_zero_point = etna_tensor_zero_point(poperation->input_tensors[0]); operation->input_scale = poperation->input_tensors[0]->scale; operation->input_tensor_size = operation->input_width * operation->input_height * @@ -555,7 +584,7 @@ etna_ml_lower_add(struct etna_ml_subgraph *subgraph, operation->output_width = poperation->output_tensors[0]->dims[1]; operation->output_height = poperation->output_tensors[0]->dims[2]; operation->output_channels = poperation->output_tensors[0]->dims[3]; - operation->output_zero_point = poperation->output_tensors[0]->zero_point; + operation->output_zero_point = etna_tensor_zero_point(poperation->output_tensors[0]); operation->output_scale = poperation->output_tensors[0]->scale; if (nn_core_version < 8) { @@ -564,6 +593,7 @@ etna_ml_lower_add(struct etna_ml_subgraph *subgraph, operation->weight_height = 2; operation->weight_zero_point = 0x0; operation->weight_scale = compute_weight_scale_add(poperation->input_tensors[1]->scale, poperation->input_tensors[0]->scale); + operation->weight_signed = false; operation->addition_offset = compute_addition_offset(poperation->input_tensors[1]->scale, poperation->input_tensors[0]->scale, operation->weight_scale); uint8_t *weight_map = map_resource(operation->weight_tensor); @@ -582,6 +612,7 @@ etna_ml_lower_add(struct etna_ml_subgraph *subgraph, operation->weight_height = 1; operation->weight_zero_point = 0x0; operation->weight_scale = compute_weight_scale_add(poperation->input_tensors[1]->scale, poperation->input_tensors[0]->scale); + operation->weight_signed = false; operation->addition_offset = compute_addition_offset(poperation->input_tensors[1]->scale, poperation->input_tensors[0]->scale, operation->weight_scale); uint8_t (*weight_map)[operation->input_channels] = map_resource(operation->weight_tensor); diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v8.c b/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v8.c index 6840301285c..f486f7994c7 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v8.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_nn_v8.c @@ -192,8 +192,17 @@ static uint32_t calculate_bias_correction(struct etna_ml_subgraph *subgraph, con else input_channels = operation->input_channels; - for (unsigned i = 0; i < operation->weight_width * operation->weight_height * input_channels; i++) { - correction += (weights[i] - operation->weight_zero_point) * input_zero_point; + if (operation->weight_signed) { + /* See etna_tensor_zero_point() */ + int8_t weight_zero_point = operation->weight_zero_point - 128; + + for (unsigned i = 0; i < operation->weight_width * operation->weight_height * input_channels; i++) { + correction += (((int8_t *)weights)[i] - weight_zero_point) * input_zero_point; + } + } else { + for (unsigned i = 0; i < operation->weight_width * operation->weight_height * input_channels; i++) { + correction += (weights[i] - operation->weight_zero_point) * input_zero_point; + } } return correction; @@ -652,6 +661,8 @@ static void encode_superblock(struct etna_ml_subgraph *subgraph, const struct et if (kernel_idx + block * block_size >= kernel_size) weight = operation->weight_zero_point; + else if (operation->weight_signed) + weight = ((int8_t *)(weights_map[oc]))[kernel_idx + block * block_size] + 128; else weight = weights_map[oc][kernel_idx + block * block_size]; diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c b/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c index c1a1d63c7d6..3e83105c91d 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c @@ -535,6 +535,18 @@ create_reshuffle_config(struct etna_ml_subgraph *subgraph, const struct etna_ope return bo; } +static inline uint8_t +etna_tensor_zero_point(struct pipe_tensor *tensor) +{ + if (tensor->is_signed) { + assert(tensor->zero_point >= -128 && tensor->zero_point <= 127); + return tensor->zero_point + 128; + } else { + assert(tensor->zero_point >= 0 && tensor->zero_point <= 255); + return tensor->zero_point; + } +} + void etna_ml_lower_transpose(struct etna_ml_subgraph *subgraph, const struct pipe_ml_operation *first_operation, @@ -548,7 +560,7 @@ etna_ml_lower_transpose(struct etna_ml_subgraph *subgraph, operation->input_width = first_operation->input_tensors[0]->dims[1]; operation->input_height = first_operation->input_tensors[0]->dims[2]; operation->input_channels = first_operation->input_tensors[0]->dims[3]; - operation->input_zero_point = first_operation->input_tensors[0]->zero_point; + operation->input_zero_point = etna_tensor_zero_point(first_operation->input_tensors[0]); operation->input_scale = first_operation->input_tensors[0]->scale; operation->input_tensor_size = operation->input_width * operation->input_height * @@ -559,7 +571,7 @@ etna_ml_lower_transpose(struct etna_ml_subgraph *subgraph, operation->output_width = first_operation->input_tensors[0]->dims[1]; operation->output_height = first_operation->input_tensors[0]->dims[2]; operation->output_channels = first_operation->input_tensors[0]->dims[3]; - operation->output_zero_point = first_operation->input_tensors[0]->zero_point; + operation->output_zero_point = etna_tensor_zero_point(first_operation->input_tensors[0]); operation->output_scale = first_operation->input_tensors[0]->scale; } @@ -606,7 +618,7 @@ etna_ml_lower_reshuffle(struct etna_ml_subgraph *subgraph, operation->input_width = convolution->input_tensors[0]->dims[1]; operation->input_height = convolution->input_tensors[0]->dims[2]; operation->input_channels = convolution->input_tensors[0]->dims[3]; - operation->input_zero_point = convolution->input_tensors[0]->zero_point; + operation->input_zero_point = etna_tensor_zero_point(convolution->input_tensors[0]); operation->input_scale = convolution->input_tensors[0]->scale; operation->input_tensor_size = operation->input_width * operation->input_height * @@ -617,7 +629,7 @@ etna_ml_lower_reshuffle(struct etna_ml_subgraph *subgraph, operation->output_width = DIV_ROUND_UP(operation->input_width, operation->stride); operation->output_height = DIV_ROUND_UP(operation->input_height, operation->stride); operation->output_channels = operation->input_channels * operation->stride * operation->stride; - operation->output_zero_point = convolution->input_tensors[0]->zero_point; + operation->output_zero_point = etna_tensor_zero_point(convolution->input_tensors[0]); operation->output_scale = convolution->input_tensors[0]->scale; /* When destriding a convolution, the transformation to be made to the input