ethosu: Properly emit IFM_BROADCAST and IFM2_BROADCAST on U85

On U85, both NPU_SET_IFM_BROADCAST and NPU_SET_IFM2_BROADCAST must be
emitted for elementwise operations, matching Vela's GenerateInputBroadcast.

Add calc_broadcast_mode() matching Vela's CalculateBroadcast(): broadcasts
a dimension of shape1 when it is 1 and shape2 is larger, producing a
broadcast_mode bitmask (H=1, W=2, C=4, SCALAR=8).

Split emit_ifm2_broadcast into U65 (legacy bitfields) and U85 paths.
The U85 path emits both IFM_BROADCAST and IFM2_BROADCAST using
calc_broadcast_mode in each direction.

Also fix emit_eltwise to call emit_ifm2_precision instead of
emit_ifm_broadcast for U85, which was emitting 0 instead of the
required IFM2_PRECISION register.

Signed-off-by: Tomeu Vizoso <tomeu@tomeuvizoso.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39611>
This commit is contained in:
Tomeu Vizoso 2026-03-22 18:11:21 +01:00 committed by Marge Bot
parent 2a6d181bc6
commit ac0d6e7b7c

View file

@ -587,22 +587,57 @@ emit_ifm_broadcast(struct ethosu_subgraph *subgraph, struct ethosu_operation *op
EMIT0(NPU_SET_IFM_BROADCAST, ifm_broadcast);
}
/*
* U85 broadcast_mode calculation matching Vela's CalculateBroadcast().
* Broadcasts shape1 dimensions that are 1 when shape2 is larger.
* Returns a 4-bit broadcast_mode: H=1, W=2, C=4, or'ed together.
*/
static unsigned
calc_broadcast_mode(struct ethosu_block *shape1, struct ethosu_block *shape2)
{
unsigned mode = 0;
if (shape1->height < shape2->height && shape1->height == 1)
mode |= 1; /* H */
if (shape1->width < shape2->width && shape1->width == 1)
mode |= 2; /* W */
if (shape1->depth < shape2->depth && shape1->depth == 1)
mode |= 4; /* C */
return mode;
}
static void
emit_ifm2_broadcast(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, bool has_scalar)
{
unsigned ifm2_broadcast = 0;
ifm2_broadcast |= NPU_SET_IFM2_BROADCAST_OPERAND_ORDER(operation->eltwise.ifm_reversed);
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) {
ifm2_broadcast |= NPU_SET_IFM2_BROADCAST_OPERAND_ORDER(operation->eltwise.ifm_reversed);
if (has_scalar) {
ifm2_broadcast |= NPU_SET_IFM2_BROADCAST_BROADCAST_SCALAR(1);
if (has_scalar) {
ifm2_broadcast |= NPU_SET_IFM2_BROADCAST_BROADCAST_SCALAR(1);
} else {
if (operation->ifm.shape.height != operation->ifm2.shape.height)
ifm2_broadcast |= NPU_SET_IFM2_BROADCAST_BROADCAST_HEIGHT__MASK;
if (operation->ifm.shape.width != operation->ifm2.shape.width)
ifm2_broadcast |= NPU_SET_IFM2_BROADCAST_BROADCAST_WIDTH__MASK;
if (operation->ifm.shape.depth != operation->ifm2.shape.depth)
ifm2_broadcast |= NPU_SET_IFM2_BROADCAST_BROADCAST_DEPTH__MASK;
}
} else {
if (operation->ifm.shape.height != operation->ifm2.shape.height)
ifm2_broadcast |= NPU_SET_IFM2_BROADCAST_BROADCAST_HEIGHT__MASK;
if (operation->ifm.shape.width != operation->ifm2.shape.width)
ifm2_broadcast |= NPU_SET_IFM2_BROADCAST_BROADCAST_WIDTH__MASK;
if (operation->ifm.shape.depth != operation->ifm2.shape.depth)
ifm2_broadcast |= NPU_SET_IFM2_BROADCAST_BROADCAST_DEPTH__MASK;
unsigned ifm_mode, ifm2_mode;
if (has_scalar) {
ifm_mode = 0;
ifm2_mode = 8; /* SCALAR */
} else {
ifm_mode = calc_broadcast_mode(&operation->ifm.shape, &operation->ifm2.shape);
ifm2_mode = calc_broadcast_mode(&operation->ifm2.shape, &operation->ifm.shape);
}
EMIT0(NPU_SET_IFM_BROADCAST, ifm_mode);
ifm2_broadcast = ifm2_mode;
}
EMIT0(NPU_SET_IFM2_BROADCAST, ifm2_broadcast);
@ -780,7 +815,7 @@ emit_eltwise(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
emit_ifm_precision(subgraph, &operation->ifm2, OP_NONE, NPU_SET_IFM2_PRECISION);
else
emit_ifm_broadcast(subgraph, operation, false);
emit_ifm2_precision(subgraph, operation, has_scalar);
emit_ifm2_broadcast(subgraph, operation, has_scalar);