ethosu: Add logistic and TANH operations

Logistic and TANH operations are similar and both lower to pooling
operation with a LUT.

Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39975>
This commit is contained in:
Rob Herring (Arm) 2026-03-10 20:13:24 -05:00 committed by Marge Bot
parent 6933207435
commit a305dfd54b
6 changed files with 143 additions and 4 deletions

View file

@ -285,7 +285,10 @@ emit_activation(struct ethosu_subgraph *subgraph, struct ethosu_operation *opera
if (operation->type == ETHOSU_OPERATION_TYPE_ELTWISE)
min = operation->eltwise.activation_min;
EMIT0(NPU_SET_ACTIVATION, 0x0);
if (operation->type == ETHOSU_OPERATION_TYPE_POOLING)
EMIT0(NPU_SET_ACTIVATION, operation->pooling.activation);
else
EMIT0(NPU_SET_ACTIVATION, 0x0);
if (operation->ofm.is_signed) {
EMIT0(NPU_SET_ACTIVATION_MIN, 0xff80);
@ -840,8 +843,8 @@ emit_dma(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
{
EMIT0(NPU_SET_DMA0_SRC_REGION, COEFS_REGION);
EMIT1(NPU_SET_DMA0_SRC, 0x0, operation->dma.address);
EMIT0(NPU_SET_DMA0_DST_REGION, SCRATCH_REGION);
EMIT1(NPU_SET_DMA0_DST, 0x0, 0x0);
EMIT0(NPU_SET_DMA0_DST_REGION, operation->dma.dst_region);
EMIT1(NPU_SET_DMA0_DST, 0x0, operation->dma.dst_address);
EMIT1(NPU_SET_DMA0_LEN, 0x0, operation->dma.size);
}
@ -978,10 +981,24 @@ fill_memory_accesses(struct ethosu_subgraph *subgraph)
operation->read_accesses[0].address = operation->dma.address;
operation->read_accesses[0].size = operation->dma.size;
operation->write_accesses[0].region = SCRATCH_REGION;
operation->write_accesses[0].region = operation->dma.dst_region;
operation->write_accesses[0].address = 0x0;
operation->write_accesses[0].size = operation->dma.size;
break;
case ETHOSU_OPERATION_TYPE_POOLING:
if (operation->pooling.activation >= ETHOSU_POOLING_ACTIVATION_LUT(0)) {
operation->read_accesses[1].region = LUT_REGION;
operation->read_accesses[1].address = SHRAM_LUT_BASE(operation->pooling.activation & 0xf);
operation->read_accesses[1].size = LUT8_SIZE;
}
operation->read_accesses[0].region = operation->ifm.region;
operation->read_accesses[0].address = operation->ifm.tiles.addresses[0];
operation->read_accesses[0].size = operation->ifm.shape.height * operation->ifm.shape.width * operation->ifm.shape.depth;
operation->write_accesses[0].region = operation->ofm.region;
operation->write_accesses[0].address = operation->ofm.tiles.addresses[0];
operation->write_accesses[0].size = operation->ofm.shape.height * operation->ofm.shape.width * operation->ofm.shape.depth;
break;
default:
operation->read_accesses[0].region = IO_REGION;

View file

@ -160,3 +160,17 @@ fill_coefs(struct ethosu_subgraph *subgraph,
memcpy(subgraph->coefs + operation->conv.weights.address, weights, operation->conv.weights.size);
free(weights);
}
#define LUT_SIZE 256
void
fill_lut(struct ethosu_subgraph *subgraph,
struct ethosu_operation *operation,
void *lut)
{
operation->pooling.lut.region = COEFS_REGION;
operation->pooling.lut.address = subgraph->coefs_used;
subgraph->coefs_used += LUT_SIZE;
subgraph->coefs = realloc(subgraph->coefs, subgraph->coefs_used);
memcpy(subgraph->coefs + operation->pooling.lut.address, lut, LUT_SIZE);
}

View file

@ -15,4 +15,9 @@ fill_coefs(struct ethosu_subgraph *subgraph,
uint8_t *weight_data,
unsigned weight_size);
void
fill_lut(struct ethosu_subgraph *subgraph,
struct ethosu_operation *operation,
void *lut);
#endif /* ETHOSU_COEFS_H */

View file

@ -275,6 +275,75 @@ ethosu_lower_pooling(struct ethosu_subgraph *subgraph,
ethosu_sched_operation(subgraph, operation);
}
static double
clamp_sigmoid8(double x)
{
if (x <= -8.0)
return 0.0;
else if (x >= 8.0)
return 1.0;
else
return (1.0 / (1.0 + exp(-x)));
}
static void
ethos_create_lut(struct ethosu_operation *operation, uint8_t *lut, double (*func)(double))
{
double ifm_scale = operation->ifm.scale;
double ofm_scale = operation->ofm.scale;
int zpIn = operation->ifm.zero_point;
int zpOut = operation->ofm.zero_point;
int qMin = operation->ifm.is_signed ? -128 : 0;
int qMax = operation->ifm.is_signed ? 127 : 255;
for (int x = qMin; x <= qMax; ++x, lut++) {
double xReal = ifm_scale * (double)(x - zpIn);
double yReal = func(xReal);
int lutVal = (int)round((double)zpOut + yReal / ofm_scale);
lutVal = MIN2(qMax, MAX2(qMin, lutVal));
*lut = lutVal;
}
}
static void
ethosu_lower_lut_dma(struct ethosu_subgraph *subgraph,
const struct pipe_ml_operation *poperation,
struct ethosu_operation *pool_operation,
struct ethosu_operation *operation)
{
operation->type = ETHOSU_OPERATION_TYPE_DMA;
operation->dma.address = pool_operation->pooling.lut.address;
operation->dma.size = LUT8_SIZE;
operation->dma.dst_region = LUT_REGION;
operation->dma.dst_address = SHRAM_LUT_BASE(0);
}
static void
ethosu_lower_lut(struct ethosu_subgraph *subgraph,
const struct pipe_ml_operation *poperation,
struct ethosu_operation *operation, double (*func)(double))
{
uint8_t lut[LUT8_SIZE];
operation->type = ETHOSU_OPERATION_TYPE_POOLING;
operation->round_mode = ETHOSU_ROUNDING_NATURAL;
operation->pooling.type = ETHOSU_POOLING_TYPE_AVG;
operation->pooling.activation = ETHOSU_POOLING_ACTIVATION_LUT(0);
set_feature_maps(subgraph, poperation->input_tensors[0], poperation->output_tensors[0], operation);
ethos_create_lut(operation, lut, func);
fill_lut(subgraph, operation, lut);
/* The LUT handles 0 point and scale, so make them equal */
operation->ofm.zero_point = operation->ifm.zero_point;
operation->ofm.scale = operation->ifm.scale;
allocate_feature_maps(subgraph, operation);
ethosu_sched_operation(subgraph, operation);
}
static void
ethosu_lower_concatenation(struct ethosu_subgraph *subgraph,
const struct pipe_ml_operation *poperation,
@ -431,6 +500,7 @@ ethosu_lower_dma(struct ethosu_subgraph *subgraph,
operation->dma.address = conv_operation->conv.scales.address;
operation->dma.size = conv_operation->conv.scales.size + conv_operation->conv.weights.size;
operation->dma.dst_region = SCRATCH_REGION;
conv_operation->conv.scales.region = SCRATCH_REGION;
conv_operation->conv.scales.address = 0;
@ -537,6 +607,28 @@ ethosu_lower_graph(struct ethosu_subgraph *subgraph,
break;
}
case PIPE_ML_OPERATION_TYPE_LOGISTIC: {
ethosu_lower_lut(subgraph, &poperations[i], &operation, clamp_sigmoid8);
struct ethosu_operation dma_operation = {0};
ethosu_lower_lut_dma(subgraph, &poperations[i], &operation, &dma_operation);
util_dynarray_append(&subgraph->operations, dma_operation);
util_dynarray_append(&subgraph->operations, operation);
break;
}
case PIPE_ML_OPERATION_TYPE_TANH: {
ethosu_lower_lut(subgraph, &poperations[i], &operation, tanh);
struct ethosu_operation dma_operation = {0};
ethosu_lower_lut_dma(subgraph, &poperations[i], &operation, &dma_operation);
util_dynarray_append(&subgraph->operations, dma_operation);
util_dynarray_append(&subgraph->operations, operation);
break;
}
case PIPE_ML_OPERATION_TYPE_STRIDED_SLICE: {
ethosu_lower_strided_slice(subgraph, &poperations[i], &operation);
util_dynarray_append(&subgraph->operations, operation);

View file

@ -145,6 +145,8 @@ ethosu_ml_operation_supported(struct pipe_ml_device *pdevice,
case PIPE_ML_OPERATION_TYPE_POOLING:
case PIPE_ML_OPERATION_TYPE_STRIDED_SLICE:
case PIPE_ML_OPERATION_TYPE_PAD:
case PIPE_ML_OPERATION_TYPE_LOGISTIC:
case PIPE_ML_OPERATION_TYPE_TANH:
supported = true;
break;
case PIPE_ML_OPERATION_TYPE_RESIZE: {

View file

@ -16,6 +16,8 @@
#define SHRAM_RESERVED_END_BANKS 2
#define SHRAM_TOTAL_BANKS SHRAM_BANKS
#define SHRAM_BANK_SIZE_BYTES 1024
#define LUT8_SIZE 256
#define SHRAM_LUT_BASE(lut) (46 * SHRAM_BANK_SIZE_BYTES + (lut) * LUT8_SIZE)
#define ACC_BITS 32 /* Use for now always 32-bit accumulators */
#define IFM_GRANULE 8
#define ACC_GRANULE 16
@ -34,6 +36,7 @@ extern struct ethosu_block SUB_KERNEL_MAX;
#define COEFS_REGION 0
#define IO_REGION 1
#define SCRATCH_REGION 2
#define LUT_REGION 0x103 // Internal SHRAM
enum ethosu_operation_type {
ETHOSU_OPERATION_TYPE_CONVOLUTION,
@ -176,6 +179,8 @@ enum ethosu_pooling_type {
ETHOSU_POOLING_TYPE_ARGMAX_Y,
};
#define ETHOSU_POOLING_ACTIVATION_LUT(n) (0x10 | (n))
#define MAX_MEMORY_ACCESSES 5 /* IFM, IFM2, Scales, Weights, LUT*/
struct ethosu_operation {
@ -195,6 +200,8 @@ struct ethosu_operation {
struct {
enum ethosu_pooling_type type;
bool nop;
uint8_t activation;
struct ethosu_address_range lut;
} pooling;
struct {
@ -206,7 +213,9 @@ struct ethosu_operation {
struct {
unsigned address;
unsigned dst_address;
long size;
unsigned dst_region;
} dma;
};