mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 19:40:10 +01:00
aco: refactor and speed-up dead code analysis
Assuming that no loop header phis are dead code, we can perform the dead code analysis in a single iteration. Totals from 25 (0.03% of 79330) affected shaders: (GFX11) MaxWaves: 664 -> 662 (-0.30%) Instrs: 487618 -> 488822 (+0.25%) CodeSize: 2451548 -> 2459756 (+0.33%) VGPRs: 1296 -> 1332 (+2.78%) Latency: 2337256 -> 2338098 (+0.04%); split: -0.00%, +0.04% InvThroughput: 560682 -> 576158 (+2.76%) VClause: 15782 -> 15790 (+0.05%) Copies: 37905 -> 38731 (+2.18%) PreVGPRs: 1124 -> 1156 (+2.85%) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26901>
This commit is contained in:
parent
a37f43e422
commit
dce695b24f
2 changed files with 33 additions and 41 deletions
|
|
@ -30,51 +30,40 @@
|
|||
/*
|
||||
* Implements an analysis pass to determine the number of uses
|
||||
* for each SSA-definition.
|
||||
*
|
||||
* This pass assumes that no loop header phis are dead code.
|
||||
*/
|
||||
|
||||
namespace aco {
|
||||
namespace {
|
||||
|
||||
struct dce_ctx {
|
||||
int current_block;
|
||||
std::vector<uint16_t> uses;
|
||||
std::vector<std::vector<bool>> live;
|
||||
|
||||
dce_ctx(Program* program)
|
||||
: current_block(program->blocks.size() - 1), uses(program->peekAllocationId())
|
||||
{
|
||||
live.reserve(program->blocks.size());
|
||||
for (Block& block : program->blocks)
|
||||
live.emplace_back(block.instructions.size());
|
||||
}
|
||||
};
|
||||
|
||||
void
|
||||
process_block(dce_ctx& ctx, Block& block)
|
||||
process_loop_header_phis(std::vector<uint16_t>& uses, Block& block)
|
||||
{
|
||||
std::vector<bool>& live = ctx.live[block.index];
|
||||
assert(live.size() == block.instructions.size());
|
||||
bool process_predecessors = false;
|
||||
for (int idx = block.instructions.size() - 1; idx >= 0; idx--) {
|
||||
if (live[idx])
|
||||
continue;
|
||||
|
||||
aco_ptr<Instruction>& instr = block.instructions[idx];
|
||||
if (!is_dead(ctx.uses, instr.get())) {
|
||||
for (const Operand& op : instr->operands) {
|
||||
if (op.isTemp()) {
|
||||
if (ctx.uses[op.tempId()] == 0)
|
||||
process_predecessors = true;
|
||||
ctx.uses[op.tempId()]++;
|
||||
}
|
||||
}
|
||||
live[idx] = true;
|
||||
for (aco_ptr<Instruction>& instr : block.instructions) {
|
||||
if (!is_phi(instr))
|
||||
return;
|
||||
for (const Operand& op : instr->operands) {
|
||||
if (op.isTemp())
|
||||
uses[op.tempId()]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (process_predecessors) {
|
||||
for (unsigned pred_idx : block.linear_preds)
|
||||
ctx.current_block = std::max(ctx.current_block, (int)pred_idx);
|
||||
void
|
||||
process_block(std::vector<uint16_t>& uses, Block& block)
|
||||
{
|
||||
for (auto it = block.instructions.rbegin(); it != block.instructions.rend(); it++) {
|
||||
aco_ptr<Instruction>& instr = *it;
|
||||
if ((block.kind & block_kind_loop_header) && is_phi(instr))
|
||||
break;
|
||||
|
||||
if (!is_dead(uses, instr.get())) {
|
||||
for (const Operand& op : instr->operands) {
|
||||
if (op.isTemp())
|
||||
uses[op.tempId()]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -83,15 +72,17 @@ process_block(dce_ctx& ctx, Block& block)
|
|||
std::vector<uint16_t>
|
||||
dead_code_analysis(Program* program)
|
||||
{
|
||||
std::vector<uint16_t> uses(program->peekAllocationId());
|
||||
|
||||
dce_ctx ctx(program);
|
||||
|
||||
while (ctx.current_block >= 0) {
|
||||
unsigned next_block = ctx.current_block--;
|
||||
process_block(ctx, program->blocks[next_block]);
|
||||
for (Block& block : program->blocks) {
|
||||
if (block.kind & block_kind_loop_header)
|
||||
process_loop_header_phis(uses, block);
|
||||
}
|
||||
|
||||
return ctx.uses;
|
||||
for (auto it = program->blocks.rbegin(); it != program->blocks.rend(); it++)
|
||||
process_block(uses, *it);
|
||||
|
||||
return uses;
|
||||
}
|
||||
|
||||
} // namespace aco
|
||||
|
|
|
|||
|
|
@ -251,6 +251,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
|
|||
|
||||
/* create ssa name for restore mask */
|
||||
if (info.has_divergent_break) {
|
||||
// TODO: this phi is unnecessary if we end WQM immediately after the loop
|
||||
/* this phi might be trivial but ensures a parallelcopy on the loop header */
|
||||
aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue