From 007cb02db972223944cbd211a45a656dd93a8292 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Fri, 18 Mar 2022 20:03:27 +0100 Subject: [PATCH] aco: use branch definition as scratch register for SSA lowering Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_ir.h | 1 - src/amd/compiler/aco_register_allocation.cpp | 35 +------------------- src/amd/compiler/aco_ssa_elimination.cpp | 3 +- 3 files changed, 3 insertions(+), 36 deletions(-) diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index fa37b962820..6108794dd3b 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1914,7 +1914,6 @@ struct Block { /* this information is needed for predecessors to blocks with phis when * moving out of ssa */ bool scc_live_out = false; - PhysReg scratch_sgpr = PhysReg(); /* only needs to be valid if scc_live_out != false */ Block() : index(0) {} }; diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 5b4733354ba..00c273fc5fc 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2524,9 +2524,6 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra ra_ctx ctx(program, policy); get_affinities(ctx, live_out_per_block); - /* state of register file after phis */ - std::vector> sgpr_live_in(program->blocks.size()); - for (Block& block : program->blocks) { ctx.block = █ @@ -2550,9 +2547,8 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra PhysReg br_reg = get_reg_phi(ctx, live_out_per_block[block.index], register_file, instructions, block, ctx.phi_dummy, Temp(0, s2)); for (unsigned pred : block.linear_preds) { + program->blocks[pred].scc_live_out = register_file[scc]; aco_ptr& br = program->blocks[pred].instructions.back(); - if (br->definitions.empty()) - continue; assert(br->definitions.size() == 1 && br->definitions[0].regClass() == s2 && br->definitions[0].isKill()); @@ -2561,11 +2557,6 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra } } - /* fill in sgpr_live_in */ - for (unsigned i = 0; i <= ctx.max_used_sgpr; i++) - sgpr_live_in[block.index][i] = register_file[PhysReg{i}]; - sgpr_live_in[block.index][127] = register_file[scc]; - /* Handle all other instructions of the block */ auto NonPhi = [](aco_ptr& instr) -> bool { return instr && !is_phi(instr); }; std::vector>::iterator instr_it = @@ -2977,30 +2968,6 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra block.instructions = std::move(instructions); } /* end for BB */ - /* find scc spill registers which may be needed for parallelcopies created by phis */ - for (Block& block : program->blocks) { - if (block.linear_preds.size() <= 1) - continue; - - std::bitset<128> regs = sgpr_live_in[block.index]; - if (!regs[127]) - continue; - - /* choose a register */ - int16_t reg = 0; - for (; reg < ctx.program->max_reg_demand.sgpr && regs[reg]; reg++) - ; - assert(reg < ctx.program->max_reg_demand.sgpr); - adjust_max_used_regs(ctx, s1, reg); - - /* update predecessors */ - for (unsigned& pred_index : block.linear_preds) { - Block& pred = program->blocks[pred_index]; - pred.scc_live_out = true; - pred.scratch_sgpr = PhysReg{(uint16_t)reg}; - } - } - /* num_gpr = rnd_up(max_used_gpr + 1) */ program->config->num_vgprs = get_vgpr_alloc(program, ctx.max_used_vgpr + 1); program->config->num_sgprs = get_sgpr_alloc(program, ctx.max_used_sgpr + 1); diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp index 266af1e4893..5bdffa8f903 100644 --- a/src/amd/compiler/aco_ssa_elimination.cpp +++ b/src/amd/compiler/aco_ssa_elimination.cpp @@ -121,6 +121,7 @@ insert_parallelcopies(ssa_elimination_ctx& ctx) std::vector>::iterator it = block.instructions.end(); --it; assert((*it)->isBranch()); + PhysReg scratch_sgpr = (*it)->definitions[0].physReg(); aco_ptr pc{ create_instruction(aco_opcode::p_parallelcopy, Format::PSEUDO, linear_phi_info.size(), linear_phi_info.size())}; @@ -131,7 +132,7 @@ insert_parallelcopies(ssa_elimination_ctx& ctx) i++; } pc->tmp_in_scc = block.scc_live_out; - pc->scratch_sgpr = block.scratch_sgpr; + pc->scratch_sgpr = scratch_sgpr; block.instructions.insert(it, std::move(pc)); } }