From ea731cda1273e2a333ae08163b3d91c7626ac744 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 27 Feb 2026 14:12:53 +0100 Subject: [PATCH] ethosu: fix blockdep to check for data dependencies calc_blockdep always returned MAX_BLOCKDEP without checking if the previous op writes to a buffer the current op reads from. This let the NPU start reading before the previous write was done. Add overlap check between previous OFM and current IFM so we set blockdep to 0 when they share the same buffer. Update ethos-imx93-fails.txt to remove the tests that now pass. Signed-off-by: Anders Roxell Part-of: --- .../drivers/ethosu/ci/ethos-imx93-fails.txt | 3 --- src/gallium/drivers/ethosu/ethosu_cmd.c | 26 ++++++++++++++++++- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/ethosu/ci/ethos-imx93-fails.txt b/src/gallium/drivers/ethosu/ci/ethos-imx93-fails.txt index 8b5606b0818..e9fae523529 100644 --- a/src/gallium/drivers/ethosu/ci/ethos-imx93-fails.txt +++ b/src/gallium/drivers/ethosu/ci/ethos-imx93-fails.txt @@ -1,5 +1,4 @@ Models.Op/efficientdet_efficientdet_tflite_lite0_int8_v1,Fail -Models.Op/micronetlarge_ad_large_int8,Fail Models.Op/movenetlightning_089,Fail Models.Op/movenetlightning_090,Fail Models.Op/movenetlightning_093,Fail @@ -42,5 +41,3 @@ Models.Op/movenetthunder_141,Fail Models.Op/movenetthunder_151,Fail Models.Op/movenetthunder_152,Fail Models.Op/movenetthunder_movenet_single_pose_thunder_ptq,Fail -Models.Op/ssdmobilenetv2_ssd_mobilenet_v2_coco_quant_postprocess,Fail -Models.Op/mobilenetv2_mobilenet_v2_tflite_1_0_224_quantized_v1,Fail \ No newline at end of file diff --git a/src/gallium/drivers/ethosu/ethosu_cmd.c b/src/gallium/drivers/ethosu/ethosu_cmd.c index caab9994921..429162a36e4 100644 --- a/src/gallium/drivers/ethosu/ethosu_cmd.c +++ b/src/gallium/drivers/ethosu/ethosu_cmd.c @@ -618,6 +618,20 @@ fill_memory_accesses(struct ethosu_subgraph *subgraph) } } +static bool +fm_ranges_overlap(struct ethosu_subgraph *subgraph, + struct ethosu_feature_map *a, struct ethosu_feature_map *b) +{ + struct ethosu_tensor *ta = ethosu_find_tensor(subgraph, a->tensor_idx); + struct ethosu_tensor *tb = ethosu_find_tensor(subgraph, b->tensor_idx); + + if (!ta || !tb || ta->size == 0 || tb->size == 0) + return false; + + return ta->offset < tb->offset + tb->size && + tb->offset < ta->offset + ta->size; +} + static unsigned calc_blockdep(struct ethosu_subgraph *subgraph, struct ethosu_operation *prev_op, struct ethosu_operation *operation) { @@ -630,7 +644,17 @@ calc_blockdep(struct ethosu_subgraph *subgraph, struct ethosu_operation *prev_op if (prev_uses_lut && SHRAM_RESERVED_UNUSED_BANKS == 0 && !curr_uses_lut) return 0; - return MAX_BLOCKDEP; /* TODO: Check if there is actually overlap between the FMs */ + /* If the previous op writes to the same buffer that the current op + * reads from, we need to wait for it to finish first. + */ + bool ifm_overlaps = fm_ranges_overlap(subgraph, &prev_op->ofm, &operation->ifm); + bool ifm2_overlaps = operation->type == ETHOSU_OPERATION_TYPE_ELTWISE && + fm_ranges_overlap(subgraph, &prev_op->ofm, &operation->ifm2); + + if (ifm_overlaps || ifm2_overlaps) + return 0; + + return MAX_BLOCKDEP; } void