diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 40fb9021e78..539b35a8181 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1987,10 +1987,6 @@ struct Block { uint16_t divergent_if_logical_depth = 0; uint16_t uniform_if_depth = 0; - /* this information is needed for predecessors to blocks with phis when - * moving out of ssa */ - bool scc_live_out = false; - Block() : index(0) {} }; diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp b/src/amd/compiler/aco_optimizer_postRA.cpp index 58f68cc00a8..26242b2079f 100644 --- a/src/amd/compiler/aco_optimizer_postRA.cpp +++ b/src/amd/compiler/aco_optimizer_postRA.cpp @@ -866,12 +866,6 @@ optimize_postRA(Program* program) for (aco_ptr& instr : block.instructions) process_instruction(ctx, instr); - - /* SCC might get overwritten by copies or swaps from parallelcopies - * inserted by SSA-elimination for linear phis. - */ - if (!block.scc_live_out) - ctx.instr_idx_by_regs[block.index][scc] = overwritten_unknown_instr; } /* Cleanup pass diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 3bef9354e60..dbc963f115f 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -104,7 +104,7 @@ struct ra_ctx { aco::monotonic_buffer_resource memory; std::vector assignments; std::vector> renames; - std::vector loop_header; + std::vector> loop_header; aco::unordered_map orig_names; aco::unordered_map vectors; aco::unordered_map split_vectors; @@ -2226,11 +2226,14 @@ get_regs_for_phis(ra_ctx& ctx, Block& block, RegisterFile& register_file, std::vector>& instructions, IDSet& live_in) { /* move all phis to instructions */ + bool has_linear_phis = false; for (aco_ptr& phi : block.instructions) { if (!is_phi(phi)) break; - if (!phi->definitions[0].isKill()) + if (!phi->definitions[0].isKill()) { + has_linear_phis |= phi->opcode == aco_opcode::p_linear_phi; instructions.emplace_back(std::move(phi)); + } } /* assign phis with all-matching registers to that register */ @@ -2307,6 +2310,23 @@ get_regs_for_phis(ra_ctx& ctx, Block& block, RegisterFile& register_file, register_file.fill(definition); ctx.assignments[definition.tempId()].set(definition); } + + /* Provide a scratch register in case we need to preserve SCC */ + if (has_linear_phis || block.kind & block_kind_loop_header) { + PhysReg scratch_reg = scc; + if (register_file[scc]) { + scratch_reg = get_reg_phi(ctx, live_in, register_file, instructions, block, ctx.phi_dummy, + Temp(0, s1)); + if (block.kind & block_kind_loop_header) + ctx.loop_header.back().second = scratch_reg; + } + + for (aco_ptr& phi : instructions) { + phi->pseudo().tmp_in_scc = register_file[scc]; + phi->pseudo().scratch_sgpr = scratch_reg; + phi->pseudo().needs_scratch_reg = true; + } + } } inline Temp @@ -2378,7 +2398,7 @@ handle_live_in(ra_ctx& ctx, Temp val, Block* block) void handle_loop_phis(ra_ctx& ctx, const IDSet& live_in, uint32_t loop_header_idx, - uint32_t loop_exit_idx) + uint32_t loop_exit_idx, PhysReg scratch_reg) { Block& loop_header = ctx.program->blocks[loop_header_idx]; aco::unordered_map renames(ctx.memory); @@ -2415,6 +2435,10 @@ handle_loop_phis(ra_ctx& ctx, const IDSet& live_in, uint32_t loop_header_idx, assignment& var = ctx.assignments[prev.id()]; ctx.assignments[renamed.id()] = var; loop_header.instructions[0]->definitions[0].setFixed(var.reg); + + /* Set scratch register */ + loop_header.instructions[0]->pseudo().scratch_sgpr = scratch_reg; + loop_header.instructions[0]->pseudo().needs_scratch_reg = true; } /* rename loop carried phi operands */ @@ -2478,9 +2502,10 @@ RegisterFile init_reg_file(ra_ctx& ctx, const std::vector& live_out_per_block, Block& block) { if (block.kind & block_kind_loop_exit) { - uint32_t header = ctx.loop_header.back(); + uint32_t header = ctx.loop_header.back().first; + PhysReg scratch_reg = ctx.loop_header.back().second; ctx.loop_header.pop_back(); - handle_loop_phis(ctx, live_out_per_block[header], header, block.index); + handle_loop_phis(ctx, live_out_per_block[header], header, block.index, scratch_reg); } RegisterFile register_file; @@ -2488,7 +2513,7 @@ init_reg_file(ra_ctx& ctx, const std::vector& live_out_per_block, Block& assert(block.index != 0 || live_in.empty()); if (block.kind & block_kind_loop_header) { - ctx.loop_header.emplace_back(block.index); + ctx.loop_header.emplace_back(block.index, PhysReg{scc}); /* already rename phis incoming value */ for (aco_ptr& instr : block.instructions) { if (!is_phi(instr)) @@ -3051,7 +3076,6 @@ register_allocation(Program* program, ra_test_policy policy) PhysReg br_reg = get_reg_phi(ctx, program->live.live_in[block.index], register_file, instructions, block, ctx.phi_dummy, Temp(0, s2)); for (unsigned pred : block.linear_preds) { - program->blocks[pred].scc_live_out = register_file[scc]; aco_ptr& br = program->blocks[pred].instructions.back(); assert(br->definitions.size() == 1 && br->definitions[0].regClass() == s2 && diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp index 731d07c04a7..5c518d766af 100644 --- a/src/amd/compiler/aco_ssa_elimination.cpp +++ b/src/amd/compiler/aco_ssa_elimination.cpp @@ -93,10 +93,7 @@ insert_parallelcopies(ssa_elimination_ctx& ctx) continue; Block& block = ctx.program->blocks[block_idx]; - std::vector>::iterator it = block.instructions.end(); - --it; - assert((*it)->isBranch()); - PhysReg scratch_sgpr = (*it)->definitions[0].physReg(); + Block& succ = ctx.program->blocks[block.linear_succs[0]]; aco_ptr pc{create_instruction(aco_opcode::p_parallelcopy, Format::PSEUDO, linear_phi_info.size(), linear_phi_info.size())}; unsigned i = 0; @@ -105,9 +102,10 @@ insert_parallelcopies(ssa_elimination_ctx& ctx) pc->operands[i] = phi_info.op; i++; } - pc->pseudo().tmp_in_scc = block.scc_live_out; - pc->pseudo().scratch_sgpr = scratch_sgpr; - pc->pseudo().needs_scratch_reg = true; + pc->pseudo().tmp_in_scc = succ.instructions[0]->pseudo().tmp_in_scc; + pc->pseudo().scratch_sgpr = succ.instructions[0]->pseudo().scratch_sgpr; + pc->pseudo().needs_scratch_reg = succ.instructions[0]->pseudo().needs_scratch_reg; + auto it = std::prev(block.instructions.end()); block.instructions.insert(it, std::move(pc)); } }