diff --git a/src/gallium/drivers/ethosu/ethosu_cmd.c b/src/gallium/drivers/ethosu/ethosu_cmd.c index f6a88f7ffd7..4c7fb0c2dd7 100644 --- a/src/gallium/drivers/ethosu/ethosu_cmd.c +++ b/src/gallium/drivers/ethosu/ethosu_cmd.c @@ -36,21 +36,66 @@ ethosu_ensure_cmdstream(struct ethosu_subgraph *subgraph) subgraph->cmdstream_used += 32; } -#define EMIT0(cmd, param) \ - do { \ - ethosu_ensure_cmdstream(subgraph); \ - *(subgraph->cursor++) = cmd | (((param) & 0xFFFF) << 16); \ - if (DBG_ENABLED(ETHOSU_DBG_MSGS)) \ - fprintf(stderr, "emit0(%s, 0x%x);\n", ethosu_get_cmd_name(0, cmd), (param) & 0xFFFF); \ +/* Check if a CMD0 register value has changed - returns true if should emit */ +static bool +ethosu_cmd0_changed(struct ethosu_subgraph *subgraph, uint16_t reg, uint16_t value) +{ + assert(reg < ETHOSU_MAX_REG_INDEX); + + if (subgraph->cmd0_valid[reg] && subgraph->cmd0_state[reg] == value) + return false; + + subgraph->cmd0_state[reg] = value; + subgraph->cmd0_valid[reg] = true; + return true; +} + +/* Check if a CMD1 register value has changed - returns true if should emit */ +static bool +ethosu_cmd1_changed(struct ethosu_subgraph *subgraph, uint16_t reg, uint64_t value) +{ + assert(reg < ETHOSU_MAX_REG_INDEX); + + if (subgraph->cmd1_valid[reg] && subgraph->cmd1_state[reg] == value) + return false; + + subgraph->cmd1_state[reg] = value; + subgraph->cmd1_valid[reg] = true; + return true; +} + +/* Check if this is an operation command (always emit, never deduplicate). + * NPU_OP_* commands occupy offsets 0x00–0x13: STOP=0, IRQ=1, CONV=2, + * DEPTHWISE=3, POOL=5, ELEMENTWISE=6, RESIZE=7, DMA_START=16, + * DMA_WAIT=17, KERNEL_WAIT=18, PMU_MASK=19. + * Configuration registers (NPU_SET_*) start at 0x100. */ +static bool +ethosu_is_op_cmd(uint16_t cmd) +{ + return (cmd <= 0x13); +} + +#define EMIT0(cmd, param) \ + do { \ + uint16_t _value = (param) & 0xFFFF; \ + if (ethosu_is_op_cmd(cmd) || ethosu_cmd0_changed(subgraph, cmd, _value)) { \ + ethosu_ensure_cmdstream(subgraph); \ + *(subgraph->cursor++) = cmd | ((uint32_t)_value << 16); \ + if (DBG_ENABLED(ETHOSU_DBG_MSGS)) \ + fprintf(stderr, "emit0(%s, 0x%x);\n", ethosu_get_cmd_name(0, cmd), _value); \ + } \ } while (0) -#define EMIT1(cmd, param, offset) \ - do { \ - ethosu_ensure_cmdstream(subgraph); \ - *(subgraph->cursor++) = cmd | 0x4000 | (((param) & 0xFFFF) << 16); \ - *(subgraph->cursor++) = (offset) & 0xFFFFFFFF; \ - if (DBG_ENABLED(ETHOSU_DBG_MSGS)) \ - fprintf(stderr, "emit1(%s, 0x%x, 0x%x);\n", ethosu_get_cmd_name(1, cmd), (param) & 0xFFFF, (int)(offset)); \ +#define EMIT1(cmd, param, offset) \ + do { \ + uint64_t _value = (((uint64_t)(param) & 0xFFFF) << 32) | ((uint64_t)(offset) & 0xFFFFFFFF); \ + if (ethosu_cmd1_changed(subgraph, cmd, _value)) { \ + ethosu_ensure_cmdstream(subgraph); \ + *(subgraph->cursor++) = cmd | 0x4000 | (((param) & 0xFFFF) << 16); \ + *(subgraph->cursor++) = (offset) & 0xFFFFFFFF; \ + if (DBG_ENABLED(ETHOSU_DBG_MSGS)) \ + fprintf(stderr, "emit1(%s, 0x%x, 0x%x);\n", ethosu_get_cmd_name(1, cmd), (param) & 0xFFFF, (int)(offset)); \ + } \ } while (0) static void diff --git a/src/gallium/drivers/ethosu/ethosu_ml.c b/src/gallium/drivers/ethosu/ethosu_ml.c index 41155d37a76..e3b24238e8e 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.c +++ b/src/gallium/drivers/ethosu/ethosu_ml.c @@ -199,6 +199,21 @@ ethosu_ml_subgraph_create(struct pipe_context *pcontext, subgraph->tensors = UTIL_DYNARRAY_INIT; subgraph->operations = UTIL_DYNARRAY_INIT; + /* Allocate register state tracking arrays */ + subgraph->cmd0_state = calloc(ETHOSU_MAX_REG_INDEX, sizeof(*subgraph->cmd0_state)); + subgraph->cmd1_state = calloc(ETHOSU_MAX_REG_INDEX, sizeof(*subgraph->cmd1_state)); + subgraph->cmd0_valid = calloc(ETHOSU_MAX_REG_INDEX, sizeof(bool)); + subgraph->cmd1_valid = calloc(ETHOSU_MAX_REG_INDEX, sizeof(bool)); + if (!subgraph->cmd0_state || !subgraph->cmd1_state || + !subgraph->cmd0_valid || !subgraph->cmd1_valid) { + free(subgraph->cmd0_state); + free(subgraph->cmd1_state); + free(subgraph->cmd0_valid); + free(subgraph->cmd1_valid); + free(subgraph); + return NULL; + } + ethosu_lower_graph(subgraph, poperations, count); ethosu_emit_cmdstream(subgraph); @@ -356,5 +371,10 @@ ethosu_ml_subgraph_destroy(struct pipe_context *pcontext, util_dynarray_fini(&subgraph->tensors); + free(subgraph->cmd0_state); + free(subgraph->cmd1_state); + free(subgraph->cmd0_valid); + free(subgraph->cmd1_valid); + free(subgraph); } diff --git a/src/gallium/drivers/ethosu/ethosu_ml.h b/src/gallium/drivers/ethosu/ethosu_ml.h index d93567972d4..0aa1d1c08c4 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.h +++ b/src/gallium/drivers/ethosu/ethosu_ml.h @@ -26,6 +26,11 @@ extern struct ethosu_block ARCH_OFM_BLOCK_MAX; extern struct ethosu_block SUB_KERNEL_MAX; +/* Maximum register index for state tracking arrays. + * All CMD0 offsets are ≤ 0x18F and CMD1 offsets are ≤ 0x1A0, + * so 512 entries covers the full range. */ +#define ETHOSU_MAX_REG_INDEX 512 + #define COEFS_REGION 0 #define IO_REGION 1 #define SCRATCH_REGION 2 @@ -196,6 +201,12 @@ struct ethosu_subgraph { uint8_t *coefs; struct pipe_resource *coefs_rsrc; unsigned coefs_used; + + /* Register state tracking to avoid emitting unchanged values */ + uint16_t *cmd0_state; /* Array of last values for CMD0 registers (16-bit) */ + uint64_t *cmd1_state; /* Array of last values for CMD1 registers */ + bool *cmd0_valid; /* Track which CMD0 registers have been set */ + bool *cmd1_valid; /* Track which CMD1 registers have been set */ }; bool