mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
etnaviv/nn: Add support for signed 8-bit tensors
The hardware only supports unsigned 8-bit tensors, but with the configurable zero point we can map signed 8-bit integers to unsigned 8-bit integers by adding a constant offset of 128 to all values and to the zero point setting. This requires adding 128 to all input tensors and subtracting 128 from all output tensors during inference. Reviewed-by: Tomeu Vizoso <tomeu@tomeuvizoso.net> Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31979>
This commit is contained in:
parent
f9c34a3eb0
commit
4153154423
5 changed files with 99 additions and 17 deletions
|
|
@ -542,7 +542,19 @@ etna_ml_subgraph_invoke(struct pipe_context *pctx, struct pipe_ml_subgraph *psub
|
|||
|
||||
for (int i = 0; i < inputs_count; i++) {
|
||||
struct pipe_resource *res = etna_ml_get_tensor(subgraph, input_idxs[i]);
|
||||
pipe_buffer_write(pctx, res, offsets[input_idxs[i]], sizes[input_idxs[i]], inputs[i]);
|
||||
if (is_signed[i]) {
|
||||
struct pipe_transfer *dst_transfer;
|
||||
const uint8_t *src = inputs[i];
|
||||
uint8_t *dst_map;
|
||||
dst_map = pipe_buffer_map_range(pctx, res, 0, sizes[input_idxs[i]], PIPE_MAP_WRITE, &dst_transfer);
|
||||
assert(dst_map);
|
||||
for (unsigned k = 0; k < sizes[input_idxs[i]]; k++) {
|
||||
dst_map[k] = src[k] + 128;
|
||||
}
|
||||
pipe_buffer_unmap(pctx, dst_transfer);
|
||||
} else {
|
||||
pipe_buffer_write(pctx, res, offsets[input_idxs[i]], sizes[input_idxs[i]], inputs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned i = 0;
|
||||
|
|
@ -662,7 +674,22 @@ etna_ml_subgraph_read_outputs(struct pipe_context *context, struct pipe_ml_subgr
|
|||
|
||||
for (int i = 0; i < outputs_count; i++) {
|
||||
struct pipe_resource *res = etna_ml_get_tensor(subgraph, output_idxs[i]);
|
||||
pipe_buffer_read(context, res, 0, pipe_buffer_size(res), outputs[i]);
|
||||
if (is_signed[i]) {
|
||||
struct pipe_transfer *src_transfer;
|
||||
uint8_t *src_map;
|
||||
src_map = (uint8_t *) pipe_buffer_map_range(context,
|
||||
res,
|
||||
0, pipe_buffer_size(res),
|
||||
PIPE_MAP_READ,
|
||||
&src_transfer);
|
||||
assert(src_map);
|
||||
for (unsigned k = 0; k < pipe_buffer_size(res); k++) {
|
||||
((uint8_t *)(outputs[i]))[k] = src_map[k] - 128;
|
||||
}
|
||||
pipe_buffer_unmap(context, src_transfer);
|
||||
} else {
|
||||
pipe_buffer_read(context, res, 0, pipe_buffer_size(res), outputs[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -94,6 +94,7 @@ struct etna_operation {
|
|||
unsigned weight_height;
|
||||
uint8_t weight_zero_point;
|
||||
float weight_scale;
|
||||
bool weight_signed;
|
||||
|
||||
uint8_t addition_offset;
|
||||
|
||||
|
|
|
|||
|
|
@ -199,9 +199,15 @@ pointwise_to_2x2(struct etna_ml_subgraph *subgraph, struct etna_operation *opera
|
|||
uint8_t *map_out = output + channel * 2 * 2 * operation->input_channels;
|
||||
|
||||
map_out[0] = map_in[0];
|
||||
map_out[1] = operation->weight_zero_point;
|
||||
map_out[2] = operation->weight_zero_point;
|
||||
map_out[3] = operation->weight_zero_point;
|
||||
if (operation->weight_signed) {
|
||||
map_out[1] = operation->weight_zero_point - 128;
|
||||
map_out[2] = operation->weight_zero_point - 128;
|
||||
map_out[3] = operation->weight_zero_point - 128;
|
||||
} else {
|
||||
map_out[1] = operation->weight_zero_point;
|
||||
map_out[2] = operation->weight_zero_point;
|
||||
map_out[3] = operation->weight_zero_point;
|
||||
}
|
||||
}
|
||||
|
||||
pipe_resource_reference(&operation->weight_tensor, NULL);
|
||||
|
|
@ -231,6 +237,8 @@ expand_depthwise(struct etna_ml_subgraph *subgraph, struct etna_operation *opera
|
|||
for (unsigned i = 0; i < operation->weight_width * operation->weight_height * operation->input_channels; i++) {
|
||||
if (i % operation->input_channels == in_depth)
|
||||
map_out[i] = map_in[i];
|
||||
else if (operation->weight_signed)
|
||||
map_out[i] = operation->weight_zero_point - 128;
|
||||
else
|
||||
map_out[i] = operation->weight_zero_point;
|
||||
}
|
||||
|
|
@ -380,7 +388,8 @@ strided_to_normal(struct etna_ml_subgraph *subgraph, struct etna_operation *oper
|
|||
output = map_resource(output_res);
|
||||
|
||||
unsigned wdims_out[4] = {operation->output_channels, operation->weight_width, operation->weight_height, operation->input_channels};
|
||||
reshape(input, output, operation->stride, operation->weight_zero_point, wdims_in, wdims_out);
|
||||
int weight_zero_point = operation->weight_signed ? (operation->weight_zero_point - 128) : operation->weight_zero_point;
|
||||
reshape(input, output, operation->stride, weight_zero_point, wdims_in, wdims_out);
|
||||
|
||||
pipe_resource_reference(&operation->weight_tensor, NULL);
|
||||
operation->weight_tensor = output_res;
|
||||
|
|
@ -415,6 +424,25 @@ calc_pooling_first_pixel(struct etna_ml_subgraph *subgraph,
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline uint8_t
|
||||
etna_tensor_zero_point(struct pipe_tensor *tensor)
|
||||
{
|
||||
if (tensor->is_signed) {
|
||||
/*
|
||||
* Since the hardware only supports unsigned 8-bit integers, signed
|
||||
* tensors are shifted from the -128..127 range to 0..255 by adding 128
|
||||
* when uploading and subtracting 128 when downloading the tensor.
|
||||
* Tensor zero point and weight coefficients have to be adapted to
|
||||
* account for this.
|
||||
*/
|
||||
assert(tensor->zero_point >= -128 && tensor->zero_point <= 127);
|
||||
return tensor->zero_point + 128;
|
||||
} else {
|
||||
assert(tensor->zero_point >= 0 && tensor->zero_point <= 255);
|
||||
return tensor->zero_point;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
etna_ml_lower_convolution(struct etna_ml_subgraph *subgraph,
|
||||
const struct pipe_ml_operation *poperation,
|
||||
|
|
@ -442,21 +470,22 @@ etna_ml_lower_convolution(struct etna_ml_subgraph *subgraph,
|
|||
operation->input_width = poperation->input_tensors[0]->dims[1];
|
||||
operation->input_height = poperation->input_tensors[0]->dims[2];
|
||||
operation->input_channels = poperation->input_tensors[0]->dims[3];
|
||||
operation->input_zero_point = poperation->input_tensors[0]->zero_point;
|
||||
operation->input_zero_point = etna_tensor_zero_point(poperation->input_tensors[0]);
|
||||
operation->input_scale = poperation->input_tensors[0]->scale;
|
||||
|
||||
operation->output_tensor = poperation->output_tensors[0]->index;
|
||||
operation->output_width = poperation->output_tensors[0]->dims[1];
|
||||
operation->output_height = poperation->output_tensors[0]->dims[2];
|
||||
operation->output_channels = poperation->output_tensors[0]->dims[3];
|
||||
operation->output_zero_point = poperation->output_tensors[0]->zero_point;
|
||||
operation->output_zero_point = etna_tensor_zero_point(poperation->output_tensors[0]);
|
||||
operation->output_scale = poperation->output_tensors[0]->scale;
|
||||
|
||||
pipe_resource_reference(&operation->weight_tensor, poperation->conv.weight_tensor->resource);
|
||||
operation->weight_width = poperation->conv.weight_tensor->dims[1];
|
||||
operation->weight_height = poperation->conv.weight_tensor->dims[2];
|
||||
operation->weight_zero_point = poperation->conv.weight_tensor->zero_point;
|
||||
operation->weight_zero_point = etna_tensor_zero_point(poperation->conv.weight_tensor);
|
||||
operation->weight_scale = poperation->conv.weight_tensor->scale;
|
||||
operation->weight_signed = poperation->conv.weight_tensor->is_signed;
|
||||
|
||||
pipe_resource_reference(&operation->bias_tensor, poperation->conv.bias_tensor->resource);
|
||||
|
||||
|
|
@ -544,7 +573,7 @@ etna_ml_lower_add(struct etna_ml_subgraph *subgraph,
|
|||
operation->input_width = poperation->input_tensors[0]->dims[1];
|
||||
operation->input_height = poperation->input_tensors[0]->dims[2];
|
||||
operation->input_channels = poperation->input_tensors[0]->dims[3];
|
||||
operation->input_zero_point = poperation->input_tensors[0]->zero_point;
|
||||
operation->input_zero_point = etna_tensor_zero_point(poperation->input_tensors[0]);
|
||||
operation->input_scale = poperation->input_tensors[0]->scale;
|
||||
operation->input_tensor_size = operation->input_width *
|
||||
operation->input_height *
|
||||
|
|
@ -555,7 +584,7 @@ etna_ml_lower_add(struct etna_ml_subgraph *subgraph,
|
|||
operation->output_width = poperation->output_tensors[0]->dims[1];
|
||||
operation->output_height = poperation->output_tensors[0]->dims[2];
|
||||
operation->output_channels = poperation->output_tensors[0]->dims[3];
|
||||
operation->output_zero_point = poperation->output_tensors[0]->zero_point;
|
||||
operation->output_zero_point = etna_tensor_zero_point(poperation->output_tensors[0]);
|
||||
operation->output_scale = poperation->output_tensors[0]->scale;
|
||||
|
||||
if (nn_core_version < 8) {
|
||||
|
|
@ -564,6 +593,7 @@ etna_ml_lower_add(struct etna_ml_subgraph *subgraph,
|
|||
operation->weight_height = 2;
|
||||
operation->weight_zero_point = 0x0;
|
||||
operation->weight_scale = compute_weight_scale_add(poperation->input_tensors[1]->scale, poperation->input_tensors[0]->scale);
|
||||
operation->weight_signed = false;
|
||||
operation->addition_offset = compute_addition_offset(poperation->input_tensors[1]->scale, poperation->input_tensors[0]->scale, operation->weight_scale);
|
||||
|
||||
uint8_t *weight_map = map_resource(operation->weight_tensor);
|
||||
|
|
@ -582,6 +612,7 @@ etna_ml_lower_add(struct etna_ml_subgraph *subgraph,
|
|||
operation->weight_height = 1;
|
||||
operation->weight_zero_point = 0x0;
|
||||
operation->weight_scale = compute_weight_scale_add(poperation->input_tensors[1]->scale, poperation->input_tensors[0]->scale);
|
||||
operation->weight_signed = false;
|
||||
operation->addition_offset = compute_addition_offset(poperation->input_tensors[1]->scale, poperation->input_tensors[0]->scale, operation->weight_scale);
|
||||
|
||||
uint8_t (*weight_map)[operation->input_channels] = map_resource(operation->weight_tensor);
|
||||
|
|
|
|||
|
|
@ -192,8 +192,17 @@ static uint32_t calculate_bias_correction(struct etna_ml_subgraph *subgraph, con
|
|||
else
|
||||
input_channels = operation->input_channels;
|
||||
|
||||
for (unsigned i = 0; i < operation->weight_width * operation->weight_height * input_channels; i++) {
|
||||
correction += (weights[i] - operation->weight_zero_point) * input_zero_point;
|
||||
if (operation->weight_signed) {
|
||||
/* See etna_tensor_zero_point() */
|
||||
int8_t weight_zero_point = operation->weight_zero_point - 128;
|
||||
|
||||
for (unsigned i = 0; i < operation->weight_width * operation->weight_height * input_channels; i++) {
|
||||
correction += (((int8_t *)weights)[i] - weight_zero_point) * input_zero_point;
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0; i < operation->weight_width * operation->weight_height * input_channels; i++) {
|
||||
correction += (weights[i] - operation->weight_zero_point) * input_zero_point;
|
||||
}
|
||||
}
|
||||
|
||||
return correction;
|
||||
|
|
@ -652,6 +661,8 @@ static void encode_superblock(struct etna_ml_subgraph *subgraph, const struct et
|
|||
|
||||
if (kernel_idx + block * block_size >= kernel_size)
|
||||
weight = operation->weight_zero_point;
|
||||
else if (operation->weight_signed)
|
||||
weight = ((int8_t *)(weights_map[oc]))[kernel_idx + block * block_size] + 128;
|
||||
else
|
||||
weight = weights_map[oc][kernel_idx + block * block_size];
|
||||
|
||||
|
|
|
|||
|
|
@ -535,6 +535,18 @@ create_reshuffle_config(struct etna_ml_subgraph *subgraph, const struct etna_ope
|
|||
return bo;
|
||||
}
|
||||
|
||||
static inline uint8_t
|
||||
etna_tensor_zero_point(struct pipe_tensor *tensor)
|
||||
{
|
||||
if (tensor->is_signed) {
|
||||
assert(tensor->zero_point >= -128 && tensor->zero_point <= 127);
|
||||
return tensor->zero_point + 128;
|
||||
} else {
|
||||
assert(tensor->zero_point >= 0 && tensor->zero_point <= 255);
|
||||
return tensor->zero_point;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
etna_ml_lower_transpose(struct etna_ml_subgraph *subgraph,
|
||||
const struct pipe_ml_operation *first_operation,
|
||||
|
|
@ -548,7 +560,7 @@ etna_ml_lower_transpose(struct etna_ml_subgraph *subgraph,
|
|||
operation->input_width = first_operation->input_tensors[0]->dims[1];
|
||||
operation->input_height = first_operation->input_tensors[0]->dims[2];
|
||||
operation->input_channels = first_operation->input_tensors[0]->dims[3];
|
||||
operation->input_zero_point = first_operation->input_tensors[0]->zero_point;
|
||||
operation->input_zero_point = etna_tensor_zero_point(first_operation->input_tensors[0]);
|
||||
operation->input_scale = first_operation->input_tensors[0]->scale;
|
||||
operation->input_tensor_size = operation->input_width *
|
||||
operation->input_height *
|
||||
|
|
@ -559,7 +571,7 @@ etna_ml_lower_transpose(struct etna_ml_subgraph *subgraph,
|
|||
operation->output_width = first_operation->input_tensors[0]->dims[1];
|
||||
operation->output_height = first_operation->input_tensors[0]->dims[2];
|
||||
operation->output_channels = first_operation->input_tensors[0]->dims[3];
|
||||
operation->output_zero_point = first_operation->input_tensors[0]->zero_point;
|
||||
operation->output_zero_point = etna_tensor_zero_point(first_operation->input_tensors[0]);
|
||||
operation->output_scale = first_operation->input_tensors[0]->scale;
|
||||
}
|
||||
|
||||
|
|
@ -606,7 +618,7 @@ etna_ml_lower_reshuffle(struct etna_ml_subgraph *subgraph,
|
|||
operation->input_width = convolution->input_tensors[0]->dims[1];
|
||||
operation->input_height = convolution->input_tensors[0]->dims[2];
|
||||
operation->input_channels = convolution->input_tensors[0]->dims[3];
|
||||
operation->input_zero_point = convolution->input_tensors[0]->zero_point;
|
||||
operation->input_zero_point = etna_tensor_zero_point(convolution->input_tensors[0]);
|
||||
operation->input_scale = convolution->input_tensors[0]->scale;
|
||||
operation->input_tensor_size = operation->input_width *
|
||||
operation->input_height *
|
||||
|
|
@ -617,7 +629,7 @@ etna_ml_lower_reshuffle(struct etna_ml_subgraph *subgraph,
|
|||
operation->output_width = DIV_ROUND_UP(operation->input_width, operation->stride);
|
||||
operation->output_height = DIV_ROUND_UP(operation->input_height, operation->stride);
|
||||
operation->output_channels = operation->input_channels * operation->stride * operation->stride;
|
||||
operation->output_zero_point = convolution->input_tensors[0]->zero_point;
|
||||
operation->output_zero_point = etna_tensor_zero_point(convolution->input_tensors[0]);
|
||||
operation->output_scale = convolution->input_tensors[0]->scale;
|
||||
|
||||
/* When destriding a convolution, the transformation to be made to the input
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue