diff --git a/src/gallium/drivers/ethosu/ethosu_cmd.c b/src/gallium/drivers/ethosu/ethosu_cmd.c index 6cf1eaddde2..4eeefc557f7 100644 --- a/src/gallium/drivers/ethosu/ethosu_cmd.c +++ b/src/gallium/drivers/ethosu/ethosu_cmd.c @@ -721,6 +721,23 @@ simplified_elementwise_add_sub_scale( *out_out_scale = ethosu_quantize_scale(output_rescale_val, out_out_shift, false); } +static void +elementwise_mul_scale( + struct ethosu_subgraph *subgraph, + double input1_scale, + double input2_scale, + double output_scale) +{ + double output_rescale; + int32_t ofm_scale, ofm_shift; + + output_rescale = (input1_scale * input2_scale) / output_scale; + ofm_scale = ethosu_quantize_scale(output_rescale, &ofm_shift, false); + + /* OFM_SCALE: output scale with shift */ + EMIT1(NPU_SET_OFM_SCALE, ofm_shift, ofm_scale); +} + /* * U85 uses "simplified" mode (from Vela simplified_elementwise_add_sub_scale): * Both operands are independently rescaled. OPA_SCALE and OPB_SCALE each @@ -765,27 +782,39 @@ static void emit_eltwise(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation) { bool has_scalar = operation->ifm2.scalar != 0; - enum ethosu_op_to_scale op_to_scale; + enum ethosu_op_to_scale op_to_scale = OP_NONE; - if (ethosu_ml_device(subgraph->base.device)->is_u65) { - op_to_scale = eltwise_emit_ofm_scaling( - subgraph, - operation->ifm.scale, - operation->ifm2.scale, - operation->ofm.scale); - } else { - op_to_scale = eltwise_emit_ofm_scaling_u85( - subgraph, - operation->ifm.scale, - operation->ifm2.scale, - operation->ofm.scale); - } + switch (operation->eltwise.type) { + case ETHOSU_ELTWISE_TYPE_MUL: + elementwise_mul_scale(subgraph, operation->ifm.scale, + operation->ifm2.scale, + operation->ofm.scale); + break; + case ETHOSU_ELTWISE_TYPE_ADD: + if (ethosu_ml_device(subgraph->base.device)->is_u65) { + op_to_scale = eltwise_emit_ofm_scaling( + subgraph, + operation->ifm.scale, + operation->ifm2.scale, + operation->ofm.scale); + } else { + op_to_scale = eltwise_emit_ofm_scaling_u85( + subgraph, + operation->ifm.scale, + operation->ifm2.scale, + operation->ofm.scale); + } - if (operation->eltwise.ifm_reversed) { - if (op_to_scale == OP_A) - op_to_scale = OP_B; - else - op_to_scale = OP_A; + if (operation->eltwise.ifm_reversed) { + if (op_to_scale == OP_A) + op_to_scale = OP_B; + else + op_to_scale = OP_A; + } + break; + default: + assert(0); + break; } emit_common(subgraph, operation, op_to_scale); @@ -832,7 +861,7 @@ emit_operation_code(struct ethosu_subgraph *subgraph, struct ethosu_operation *o EMIT0(NPU_OP_POOL, operation->pooling.type); break; case ETHOSU_OPERATION_TYPE_ELTWISE: - EMIT0(NPU_OP_ELEMENTWISE, 0x1); + EMIT0(NPU_OP_ELEMENTWISE, operation->eltwise.type); break; case ETHOSU_OPERATION_TYPE_DMA: EMIT0(NPU_OP_DMA_START, 0x0); diff --git a/src/gallium/drivers/ethosu/ethosu_lower.c b/src/gallium/drivers/ethosu/ethosu_lower.c index 16c793a1564..49818a5f0a6 100644 --- a/src/gallium/drivers/ethosu/ethosu_lower.c +++ b/src/gallium/drivers/ethosu/ethosu_lower.c @@ -519,6 +519,14 @@ ethosu_lower_graph(struct ethosu_subgraph *subgraph, case PIPE_ML_OPERATION_TYPE_ADD: { ethosu_lower_eltwise(subgraph, &poperations[i], &operation); + operation.eltwise.type = ETHOSU_ELTWISE_TYPE_ADD; + util_dynarray_append(&subgraph->operations, operation); + break; + } + + case PIPE_ML_OPERATION_TYPE_MUL: { + ethosu_lower_eltwise(subgraph, &poperations[i], &operation); + operation.eltwise.type = ETHOSU_ELTWISE_TYPE_MUL; util_dynarray_append(&subgraph->operations, operation); break; } diff --git a/src/gallium/drivers/ethosu/ethosu_ml.c b/src/gallium/drivers/ethosu/ethosu_ml.c index d8f9f7e23c2..aa810c12842 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.c +++ b/src/gallium/drivers/ethosu/ethosu_ml.c @@ -140,6 +140,7 @@ ethosu_ml_operation_supported(struct pipe_ml_device *pdevice, break; } + case PIPE_ML_OPERATION_TYPE_MUL: case PIPE_ML_OPERATION_TYPE_ADD: case PIPE_ML_OPERATION_TYPE_POOLING: case PIPE_ML_OPERATION_TYPE_STRIDED_SLICE: diff --git a/src/gallium/drivers/ethosu/ethosu_ml.h b/src/gallium/drivers/ethosu/ethosu_ml.h index 4d57d2c924a..4530997fa74 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.h +++ b/src/gallium/drivers/ethosu/ethosu_ml.h @@ -42,6 +42,31 @@ enum ethosu_operation_type { ETHOSU_OPERATION_TYPE_DMA, }; +enum ethosu_eltwise_type { + ETHOSU_ELTWISE_TYPE_MUL, + ETHOSU_ELTWISE_TYPE_ADD, + ETHOSU_ELTWISE_TYPE_SUB, + ETHOSU_ELTWISE_TYPE_MIN, + ETHOSU_ELTWISE_TYPE_MAX, + ETHOSU_ELTWISE_TYPE_LRELU, + ETHOSU_ELTWISE_TYPE_ABS, + /* U85 only */ + ETHOSU_ELTWISE_TYPE_CLZ, + ETHOSU_ELTWISE_TYPE_SHR, + ETHOSU_ELTWISE_TYPE_SHL, + ETHOSU_ELTWISE_TYPE_LSR, + ETHOSU_ELTWISE_TYPE_DIV, + ETHOSU_ELTWISE_TYPE_CMP_EQ = 0x10, + ETHOSU_ELTWISE_TYPE_CMP_NE, + ETHOSU_ELTWISE_TYPE_CMP_GE, + ETHOSU_ELTWISE_TYPE_CMP_GT, + ETHOSU_ELTWISE_TYPE_CMP_AND = 0x21, + ETHOSU_ELTWISE_TYPE_CMP_OR, + ETHOSU_ELTWISE_TYPE_CMP_XOR, + ETHOSU_ELTWISE_TYPE_CMP_NOT, + ETHOSU_ELTWISE_TYPE_CMP_AND_NOT = 0x2A, +}; + struct ethosu_tile_box { unsigned height_0; /* The height of tile 0 */ unsigned height_1; /* The height of tile 1, 0 if unused */ @@ -173,6 +198,7 @@ struct ethosu_operation { } pooling; struct { + enum ethosu_eltwise_type type; uint16_t activation_min; unsigned lut_bytes; bool ifm_reversed;