mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
aco/lower_branches: do eliminate_useless_exec_writes_in_block() during branch lowering.
Totals from 728 (0.92% of 79395) affected shaders: (Navi31) Instrs: 452926 -> 452161 (-0.17%) CodeSize: 2255536 -> 2252504 (-0.13%) Latency: 1683404 -> 1683470 (+0.00%); split: -0.01%, +0.01% InvThroughput: 210887 -> 210888 (+0.00%); split: -0.00%, +0.00% SALU: 77865 -> 77106 (-0.97%) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32477>
This commit is contained in:
parent
eecdb45d61
commit
2b5a893e29
1 changed files with 64 additions and 1 deletions
|
|
@ -12,8 +12,11 @@ namespace {
|
|||
|
||||
struct branch_ctx {
|
||||
Program* program;
|
||||
std::vector<bool> blocks_incoming_exec_used;
|
||||
|
||||
branch_ctx(Program* program_) : program(program_) {}
|
||||
branch_ctx(Program* program_)
|
||||
: program(program_), blocks_incoming_exec_used(program_->blocks.size(), true)
|
||||
{}
|
||||
};
|
||||
|
||||
void
|
||||
|
|
@ -35,6 +38,65 @@ remove_linear_successor(branch_ctx& ctx, Block& block, uint32_t succ_index)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
eliminate_useless_exec_writes_in_block(branch_ctx& ctx, Block& block)
|
||||
{
|
||||
/* Check if any successor needs the outgoing exec mask from the current block. */
|
||||
bool exec_write_used;
|
||||
if (block.kind & block_kind_end_with_regs) {
|
||||
/* Last block of a program with succeed shader part should respect final exec write. */
|
||||
exec_write_used = true;
|
||||
} else if (block.linear_succs.empty() && !block.instructions.empty() &&
|
||||
block.instructions.back()->opcode == aco_opcode::s_setpc_b64) {
|
||||
/* This block ends in a long jump and exec might be needed for the next shader part. */
|
||||
exec_write_used = true;
|
||||
} else {
|
||||
/* blocks_incoming_exec_used is initialized to true, so this is correct even for loops. */
|
||||
exec_write_used =
|
||||
std::any_of(block.linear_succs.begin(), block.linear_succs.end(),
|
||||
[&ctx](int succ_idx) { return ctx.blocks_incoming_exec_used[succ_idx]; });
|
||||
}
|
||||
|
||||
/* Go through all instructions and eliminate useless exec writes. */
|
||||
for (int i = block.instructions.size() - 1; i >= 0; --i) {
|
||||
aco_ptr<Instruction>& instr = block.instructions[i];
|
||||
|
||||
/* See if the current instruction needs or writes exec. */
|
||||
bool needs_exec = needs_exec_mask(instr.get());
|
||||
bool writes_exec =
|
||||
instr->writes_exec() && instr->definitions[0].regClass() == ctx.program->lane_mask;
|
||||
|
||||
/* See if we found an unused exec write. */
|
||||
if (writes_exec && !exec_write_used) {
|
||||
/* Don't eliminate an instruction that writes registers other than exec and scc.
|
||||
* It is possible that this is eg. an s_and_saveexec and the saved value is
|
||||
* used by a later branch.
|
||||
*/
|
||||
bool writes_other = std::any_of(instr->definitions.begin(), instr->definitions.end(),
|
||||
[](const Definition& def) -> bool
|
||||
{ return def.physReg() != exec && def.physReg() != scc; });
|
||||
if (!writes_other) {
|
||||
instr.reset();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* For a newly encountered exec write, clear the used flag. */
|
||||
if (writes_exec)
|
||||
exec_write_used = false;
|
||||
|
||||
/* If the current instruction needs exec, mark it as used. */
|
||||
exec_write_used |= needs_exec;
|
||||
}
|
||||
|
||||
/* Remember if the current block needs an incoming exec mask from its predecessors. */
|
||||
ctx.blocks_incoming_exec_used[block.index] = exec_write_used;
|
||||
|
||||
/* Cleanup: remove deleted instructions from the vector. */
|
||||
auto new_end = std::remove(block.instructions.begin(), block.instructions.end(), nullptr);
|
||||
block.instructions.resize(new_end - block.instructions.begin());
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the branch instruction can be removed:
|
||||
* This is beneficial when executing the next block with an empty exec mask
|
||||
|
|
@ -196,6 +258,7 @@ lower_branches(Program* program)
|
|||
for (int i = program->blocks.size() - 1; i >= 0; i--) {
|
||||
Block& block = program->blocks[i];
|
||||
lower_branch_instruction(ctx, block);
|
||||
eliminate_useless_exec_writes_in_block(ctx, block);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue