diff --git a/src/amd/compiler/aco_insert_delay_alu.cpp b/src/amd/compiler/aco_insert_delay_alu.cpp index 1b5042839ba..27c0dee998c 100644 --- a/src/amd/compiler/aco_insert_delay_alu.cpp +++ b/src/amd/compiler/aco_insert_delay_alu.cpp @@ -307,6 +307,28 @@ handle_block(Program* program, Block& block, delay_ctx& ctx) block.instructions.swap(new_instructions); } +void +handle_loop_latch(Program* program, Block& block, delay_ctx& ctx) +{ + /* No actual loop. */ + if (block.linear_preds.size() == 1) + return; + + /* The loop header is also the loop latch. */ + if (block.kind & block_kind_loop_latch) + return; + + unsigned i = block.index; + while (ctx.program->blocks[i].loop_nest_depth >= block.loop_nest_depth) { + Block& latch_block = ctx.program->blocks[i++]; + if (latch_block.loop_nest_depth == block.loop_nest_depth && + (latch_block.kind & block_kind_loop_latch)) { + handle_block(program, latch_block, ctx); + break; + } + } +} + } /* end namespace */ void @@ -321,6 +343,12 @@ insert_delay_alu(Program* program) if (current.instructions.empty()) continue; + /* Handle the loop latch block before the loop body. */ + if (current.kind & block_kind_loop_header) + handle_loop_latch(program, current, ctx); + else if (current.kind & block_kind_loop_latch) + continue; + handle_block(program, current, ctx); /* Reset ctx if there is a jump, assuming ALU will be done