diff --git a/src/amd/compiler/aco_dead_code_analysis.cpp b/src/amd/compiler/aco_dead_code_analysis.cpp index e1dbcd53621..18313a2f350 100644 --- a/src/amd/compiler/aco_dead_code_analysis.cpp +++ b/src/amd/compiler/aco_dead_code_analysis.cpp @@ -30,51 +30,40 @@ /* * Implements an analysis pass to determine the number of uses * for each SSA-definition. + * + * This pass assumes that no loop header phis are dead code. */ namespace aco { namespace { -struct dce_ctx { - int current_block; - std::vector uses; - std::vector> live; - - dce_ctx(Program* program) - : current_block(program->blocks.size() - 1), uses(program->peekAllocationId()) - { - live.reserve(program->blocks.size()); - for (Block& block : program->blocks) - live.emplace_back(block.instructions.size()); - } -}; - void -process_block(dce_ctx& ctx, Block& block) +process_loop_header_phis(std::vector& uses, Block& block) { - std::vector& live = ctx.live[block.index]; - assert(live.size() == block.instructions.size()); - bool process_predecessors = false; - for (int idx = block.instructions.size() - 1; idx >= 0; idx--) { - if (live[idx]) - continue; - - aco_ptr& instr = block.instructions[idx]; - if (!is_dead(ctx.uses, instr.get())) { - for (const Operand& op : instr->operands) { - if (op.isTemp()) { - if (ctx.uses[op.tempId()] == 0) - process_predecessors = true; - ctx.uses[op.tempId()]++; - } - } - live[idx] = true; + for (aco_ptr& instr : block.instructions) { + if (!is_phi(instr)) + return; + for (const Operand& op : instr->operands) { + if (op.isTemp()) + uses[op.tempId()]++; } } +} - if (process_predecessors) { - for (unsigned pred_idx : block.linear_preds) - ctx.current_block = std::max(ctx.current_block, (int)pred_idx); +void +process_block(std::vector& uses, Block& block) +{ + for (auto it = block.instructions.rbegin(); it != block.instructions.rend(); it++) { + aco_ptr& instr = *it; + if ((block.kind & block_kind_loop_header) && is_phi(instr)) + break; + + if (!is_dead(uses, instr.get())) { + for (const Operand& op : instr->operands) { + if (op.isTemp()) + uses[op.tempId()]++; + } + } } } @@ -83,15 +72,17 @@ process_block(dce_ctx& ctx, Block& block) std::vector dead_code_analysis(Program* program) { + std::vector uses(program->peekAllocationId()); - dce_ctx ctx(program); - - while (ctx.current_block >= 0) { - unsigned next_block = ctx.current_block--; - process_block(ctx, program->blocks[next_block]); + for (Block& block : program->blocks) { + if (block.kind & block_kind_loop_header) + process_loop_header_phis(uses, block); } - return ctx.uses; + for (auto it = program->blocks.rbegin(); it != program->blocks.rend(); it++) + process_block(uses, *it); + + return uses; } } // namespace aco diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index d20bc29b120..4f4fb7eba5e 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -251,6 +251,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector> /* create ssa name for restore mask */ if (info.has_divergent_break) { + // TODO: this phi is unnecessary if we end WQM immediately after the loop /* this phi might be trivial but ensures a parallelcopy on the loop header */ aco_ptr phi{create_instruction( aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};