aco/ra: explicitly assign scratch SGPR for linear phis

We are about to remove the branch definitions which previously
served this purpose. Also remove Block::scc_live_out.
Some changes due to round-robin RA.

Totals from 939 (1.18% of 79395) affected shaders: (Navi31)

Instrs: 5038786 -> 5038611 (-0.00%); split: -0.01%, +0.00%
CodeSize: 26153412 -> 26152904 (-0.00%); split: -0.00%, +0.00%
Latency: 41649989 -> 41650120 (+0.00%); split: -0.00%, +0.00%
InvThroughput: 6447508 -> 6447536 (+0.00%); split: -0.00%, +0.00%
SClause: 131319 -> 131276 (-0.03%); split: -0.03%, +0.00%
Copies: 359362 -> 359256 (-0.03%); split: -0.05%, +0.02%
SALU: 639275 -> 639169 (-0.02%); split: -0.03%, +0.01%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32217>
This commit is contained in:
Daniel Schürmann 2024-11-07 10:39:49 +01:00 committed by Marge Bot
parent 17da551133
commit a1a4a6061c
4 changed files with 36 additions and 24 deletions

View file

@ -1987,10 +1987,6 @@ struct Block {
uint16_t divergent_if_logical_depth = 0;
uint16_t uniform_if_depth = 0;
/* this information is needed for predecessors to blocks with phis when
* moving out of ssa */
bool scc_live_out = false;
Block() : index(0) {}
};

View file

@ -866,12 +866,6 @@ optimize_postRA(Program* program)
for (aco_ptr<Instruction>& instr : block.instructions)
process_instruction(ctx, instr);
/* SCC might get overwritten by copies or swaps from parallelcopies
* inserted by SSA-elimination for linear phis.
*/
if (!block.scc_live_out)
ctx.instr_idx_by_regs[block.index][scc] = overwritten_unknown_instr;
}
/* Cleanup pass

View file

@ -104,7 +104,7 @@ struct ra_ctx {
aco::monotonic_buffer_resource memory;
std::vector<assignment> assignments;
std::vector<aco::unordered_map<uint32_t, Temp>> renames;
std::vector<uint32_t> loop_header;
std::vector<std::pair<uint32_t, PhysReg>> loop_header;
aco::unordered_map<uint32_t, Temp> orig_names;
aco::unordered_map<uint32_t, vector_info> vectors;
aco::unordered_map<uint32_t, Instruction*> split_vectors;
@ -2226,11 +2226,14 @@ get_regs_for_phis(ra_ctx& ctx, Block& block, RegisterFile& register_file,
std::vector<aco_ptr<Instruction>>& instructions, IDSet& live_in)
{
/* move all phis to instructions */
bool has_linear_phis = false;
for (aco_ptr<Instruction>& phi : block.instructions) {
if (!is_phi(phi))
break;
if (!phi->definitions[0].isKill())
if (!phi->definitions[0].isKill()) {
has_linear_phis |= phi->opcode == aco_opcode::p_linear_phi;
instructions.emplace_back(std::move(phi));
}
}
/* assign phis with all-matching registers to that register */
@ -2307,6 +2310,23 @@ get_regs_for_phis(ra_ctx& ctx, Block& block, RegisterFile& register_file,
register_file.fill(definition);
ctx.assignments[definition.tempId()].set(definition);
}
/* Provide a scratch register in case we need to preserve SCC */
if (has_linear_phis || block.kind & block_kind_loop_header) {
PhysReg scratch_reg = scc;
if (register_file[scc]) {
scratch_reg = get_reg_phi(ctx, live_in, register_file, instructions, block, ctx.phi_dummy,
Temp(0, s1));
if (block.kind & block_kind_loop_header)
ctx.loop_header.back().second = scratch_reg;
}
for (aco_ptr<Instruction>& phi : instructions) {
phi->pseudo().tmp_in_scc = register_file[scc];
phi->pseudo().scratch_sgpr = scratch_reg;
phi->pseudo().needs_scratch_reg = true;
}
}
}
inline Temp
@ -2378,7 +2398,7 @@ handle_live_in(ra_ctx& ctx, Temp val, Block* block)
void
handle_loop_phis(ra_ctx& ctx, const IDSet& live_in, uint32_t loop_header_idx,
uint32_t loop_exit_idx)
uint32_t loop_exit_idx, PhysReg scratch_reg)
{
Block& loop_header = ctx.program->blocks[loop_header_idx];
aco::unordered_map<uint32_t, Temp> renames(ctx.memory);
@ -2415,6 +2435,10 @@ handle_loop_phis(ra_ctx& ctx, const IDSet& live_in, uint32_t loop_header_idx,
assignment& var = ctx.assignments[prev.id()];
ctx.assignments[renamed.id()] = var;
loop_header.instructions[0]->definitions[0].setFixed(var.reg);
/* Set scratch register */
loop_header.instructions[0]->pseudo().scratch_sgpr = scratch_reg;
loop_header.instructions[0]->pseudo().needs_scratch_reg = true;
}
/* rename loop carried phi operands */
@ -2478,9 +2502,10 @@ RegisterFile
init_reg_file(ra_ctx& ctx, const std::vector<IDSet>& live_out_per_block, Block& block)
{
if (block.kind & block_kind_loop_exit) {
uint32_t header = ctx.loop_header.back();
uint32_t header = ctx.loop_header.back().first;
PhysReg scratch_reg = ctx.loop_header.back().second;
ctx.loop_header.pop_back();
handle_loop_phis(ctx, live_out_per_block[header], header, block.index);
handle_loop_phis(ctx, live_out_per_block[header], header, block.index, scratch_reg);
}
RegisterFile register_file;
@ -2488,7 +2513,7 @@ init_reg_file(ra_ctx& ctx, const std::vector<IDSet>& live_out_per_block, Block&
assert(block.index != 0 || live_in.empty());
if (block.kind & block_kind_loop_header) {
ctx.loop_header.emplace_back(block.index);
ctx.loop_header.emplace_back(block.index, PhysReg{scc});
/* already rename phis incoming value */
for (aco_ptr<Instruction>& instr : block.instructions) {
if (!is_phi(instr))
@ -3051,7 +3076,6 @@ register_allocation(Program* program, ra_test_policy policy)
PhysReg br_reg = get_reg_phi(ctx, program->live.live_in[block.index], register_file,
instructions, block, ctx.phi_dummy, Temp(0, s2));
for (unsigned pred : block.linear_preds) {
program->blocks[pred].scc_live_out = register_file[scc];
aco_ptr<Instruction>& br = program->blocks[pred].instructions.back();
assert(br->definitions.size() == 1 && br->definitions[0].regClass() == s2 &&

View file

@ -93,10 +93,7 @@ insert_parallelcopies(ssa_elimination_ctx& ctx)
continue;
Block& block = ctx.program->blocks[block_idx];
std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.end();
--it;
assert((*it)->isBranch());
PhysReg scratch_sgpr = (*it)->definitions[0].physReg();
Block& succ = ctx.program->blocks[block.linear_succs[0]];
aco_ptr<Instruction> pc{create_instruction(aco_opcode::p_parallelcopy, Format::PSEUDO,
linear_phi_info.size(), linear_phi_info.size())};
unsigned i = 0;
@ -105,9 +102,10 @@ insert_parallelcopies(ssa_elimination_ctx& ctx)
pc->operands[i] = phi_info.op;
i++;
}
pc->pseudo().tmp_in_scc = block.scc_live_out;
pc->pseudo().scratch_sgpr = scratch_sgpr;
pc->pseudo().needs_scratch_reg = true;
pc->pseudo().tmp_in_scc = succ.instructions[0]->pseudo().tmp_in_scc;
pc->pseudo().scratch_sgpr = succ.instructions[0]->pseudo().scratch_sgpr;
pc->pseudo().needs_scratch_reg = succ.instructions[0]->pseudo().needs_scratch_reg;
auto it = std::prev(block.instructions.end());
block.instructions.insert(it, std::move(pc));
}
}