diff --git a/src/gallium/drivers/ethosu/ethosu_cmd.c b/src/gallium/drivers/ethosu/ethosu_cmd.c index 32719c05cb0..c185e14b0bc 100644 --- a/src/gallium/drivers/ethosu/ethosu_cmd.c +++ b/src/gallium/drivers/ethosu/ethosu_cmd.c @@ -472,38 +472,45 @@ emit_pooling(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio emit_common(subgraph, operation, false); - switch (operation->pooling.type) { - case ETHOSU_POOLING_TYPE_MAX: { - if (!ethosu_ml_device(subgraph->base.device)->is_u65) { - EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_ROUND_MODE(1), 1); - break; - } else - FALLTHROUGH; - } - case ETHOSU_POOLING_TYPE_AVG: { - scale = pooling_emit_ofm_scaling( - operation->ifm.scale, - operation->ofm.scale, - operation->kernel.height, - operation->kernel.width, - &scale_shift); - + if (operation->pooling.nop) { + scale = ethosu_quantize_scale( + operation->ifm.scale / operation->ofm.scale, + &scale_shift, true); EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_SHIFT(scale_shift), scale); - break; - } - case ETHOSU_POOLING_TYPE_SUM: { - scale = sum_emit_ofm_scaling( - operation->ifm.scale, - operation->ofm.scale, - operation->kernel.height, - operation->kernel.width, - &scale_shift); + } else { + switch (operation->pooling.type) { + case ETHOSU_POOLING_TYPE_MAX: { + if (!ethosu_ml_device(subgraph->base.device)->is_u65) { + EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_ROUND_MODE(1), 1); + break; + } else + FALLTHROUGH; + } + case ETHOSU_POOLING_TYPE_AVG: { + scale = pooling_emit_ofm_scaling( + operation->ifm.scale, + operation->ofm.scale, + operation->kernel.height, + operation->kernel.width, + &scale_shift); - EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_SHIFT(scale_shift) | NPU_SET_OFM_SCALE_ROUND_MODE(1), scale); - break; - } - default: - UNREACHABLE("Invalid pooling type"); + EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_SHIFT(scale_shift), scale); + break; + } + case ETHOSU_POOLING_TYPE_SUM: { + scale = sum_emit_ofm_scaling( + operation->ifm.scale, + operation->ofm.scale, + operation->kernel.height, + operation->kernel.width, + &scale_shift); + + EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_SHIFT(scale_shift) | NPU_SET_OFM_SCALE_ROUND_MODE(1), scale); + break; + } + default: + UNREACHABLE("Invalid pooling type"); + } } emit_block_config(subgraph, operation); @@ -658,8 +665,8 @@ eltwise_emit_ofm_scaling( input_rescale = min_input_scale * input_shift_val / (2.0 * max_input_scale); output_rescale = (2.0 * max_input_scale) / (output_scale * input_shift_val); - opa_scale = ethosu_quantize_scale(input_rescale, &opa_shift); - ofm_scale = ethosu_quantize_scale(output_rescale, &ofm_shift); + opa_scale = ethosu_quantize_scale(input_rescale, &opa_shift, false); + ofm_scale = ethosu_quantize_scale(output_rescale, &ofm_shift, false); if (DBG_ENABLED(ETHOSU_DBG_MSGS)) { fprintf(stderr, "ADD advanced scaling: ifm1_scale=%f ifm2_scale=%f ofm_scale=%f\n", @@ -711,7 +718,7 @@ simplified_elementwise_add_sub_scale( output_rescale_val = (2.0 * max_input_scale) / (output_scale * input_shift_val); } - *out_out_scale = ethosu_quantize_scale(output_rescale_val, out_out_shift); + *out_out_scale = ethosu_quantize_scale(output_rescale_val, out_out_shift, false); } /* @@ -739,8 +746,8 @@ eltwise_emit_ofm_scaling_u85( &input1_rescale, &input2_rescale, &ofm_scale, &ofm_shift); - opa_scale = ethosu_quantize_scale(input1_rescale, &opa_shift); - opb_scale = ethosu_quantize_scale(input2_rescale, &opb_shift); + opa_scale = ethosu_quantize_scale(input1_rescale, &opa_shift, false); + opb_scale = ethosu_quantize_scale(input2_rescale, &opb_shift, false); EMIT1(NPU_SET_OPA_SCALE, NPU_SET_OPA_SCALE_SHIFT(opa_shift) | NPU_SET_OPA_SCALE_DBL_RND(input_shift), diff --git a/src/gallium/drivers/ethosu/ethosu_coefs.c b/src/gallium/drivers/ethosu/ethosu_coefs.c index 37b0f1f2185..328a711e6da 100644 --- a/src/gallium/drivers/ethosu/ethosu_coefs.c +++ b/src/gallium/drivers/ethosu/ethosu_coefs.c @@ -85,7 +85,7 @@ fill_scale_and_biases(struct ethosu_subgraph *subgraph, struct ethosu_operation } int32_t shift; - int scale = ethosu_quantize_scale(conv_scale, &shift); + int scale = ethosu_quantize_scale(conv_scale, &shift, false); uint64_t bias = biases ? biases[i] : 0; diff --git a/src/gallium/drivers/ethosu/ethosu_lower.c b/src/gallium/drivers/ethosu/ethosu_lower.c index 3342a53aa39..1f1254aac70 100644 --- a/src/gallium/drivers/ethosu/ethosu_lower.c +++ b/src/gallium/drivers/ethosu/ethosu_lower.c @@ -264,6 +264,7 @@ ethosu_lower_concatenation(struct ethosu_subgraph *subgraph, operation->round_mode = ETHOSU_ROUNDING_NATURAL; } else operation->pooling.type = ETHOSU_POOLING_TYPE_SUM; + operation->pooling.nop = true; set_feature_maps(subgraph, poperation->input_tensors[input_idx], poperation->output_tensors[0], operation); operation->ofm.shape.depth = operation->ifm.shape.depth; diff --git a/src/gallium/drivers/ethosu/ethosu_ml.c b/src/gallium/drivers/ethosu/ethosu_ml.c index f5f045fc4ba..1ff5f9a3a66 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.c +++ b/src/gallium/drivers/ethosu/ethosu_ml.c @@ -83,12 +83,20 @@ ethosu_round_up_divide(int a, int b) } int -ethosu_quantize_scale(double scale, int32_t *shift) +ethosu_quantize_scale(double scale, int32_t *shift, bool reduced) { int exponent = 0; double significand = frexp(scale, &exponent); int32_t quantized_scale = round(significand * (double)(1LL << 31)); *shift = 31 - exponent; + + if (reduced) { + quantized_scale = (quantized_scale >> 16) + (quantized_scale >> 15 & 1); + // make sure reduced scale does not overflow + quantized_scale = MIN2(quantized_scale, 0x7FFF); + *shift -= 16; + } + if (*shift > 63) { if (quantized_scale > exp2(*shift - 63)) { quantized_scale = quantized_scale >> (*shift - 63); diff --git a/src/gallium/drivers/ethosu/ethosu_ml.h b/src/gallium/drivers/ethosu/ethosu_ml.h index bb4b7be8ab4..4f7d31ec6fb 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.h +++ b/src/gallium/drivers/ethosu/ethosu_ml.h @@ -168,6 +168,7 @@ struct ethosu_operation { struct { enum ethosu_pooling_type type; + bool nop; } pooling; struct { @@ -278,6 +279,6 @@ int ethosu_round_up_to_multiple(int a, int b); int ethosu_round_up_divide(int a, int b); -int ethosu_quantize_scale(double scale, int32_t *shift); +int ethosu_quantize_scale(double scale, int32_t *shift, bool reduced); #endif /* ETHOSU_ML_H */