aco/insert_exec: don't always reset top exec

This allows to re-use previous temporaries in case exec was restored
from a Temp, rather than having to create a new copy of exec.

Foz-DB Navi31:
Totals from 545 (0.69% of 79395) affected shaders:
Instrs: 216563 -> 215698 (-0.40%)
CodeSize: 1183536 -> 1180076 (-0.29%)
Latency: 1135269 -> 1135294 (+0.00%); split: -0.00%, +0.00%
Copies: 11933 -> 11072 (-7.22%)
SALU: 18990 -> 18129 (-4.53%)

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31567>
This commit is contained in:
Georg Lehmann 2024-10-08 20:30:15 +02:00 committed by Marge Bot
parent 4f04e6f0c4
commit 31f62a6123

View file

@ -110,7 +110,6 @@ transition_to_WQM(exec_ctx& ctx, Builder bld, unsigned idx)
assert(ctx.info[idx].exec.back().op.size() == bld.lm.size()); assert(ctx.info[idx].exec.back().op.size() == bld.lm.size());
assert(ctx.info[idx].exec.back().op.isTemp()); assert(ctx.info[idx].exec.back().op.isTemp());
bld.copy(Definition(exec, bld.lm), ctx.info[idx].exec.back().op); bld.copy(Definition(exec, bld.lm), ctx.info[idx].exec.back().op);
ctx.info[idx].exec.back().op = Operand(exec, bld.lm);
} }
void void
@ -128,7 +127,6 @@ transition_to_Exact(exec_ctx& ctx, Builder bld, unsigned idx)
assert(ctx.info[idx].exec.back().op.size() == bld.lm.size()); assert(ctx.info[idx].exec.back().op.size() == bld.lm.size());
assert(ctx.info[idx].exec.back().op.isTemp()); assert(ctx.info[idx].exec.back().op.isTemp());
bld.copy(Definition(exec, bld.lm), ctx.info[idx].exec.back().op); bld.copy(Definition(exec, bld.lm), ctx.info[idx].exec.back().op);
ctx.info[idx].exec.back().op = Operand(exec, bld.lm);
return; return;
} }
/* otherwise, we create an exact mask and push to the stack */ /* otherwise, we create an exact mask and push to the stack */
@ -366,8 +364,6 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
Operand restore = ctx.info[idx].exec.back().op; Operand restore = ctx.info[idx].exec.back().op;
assert(restore.size() == bld.lm.size()); assert(restore.size() == bld.lm.size());
bld.copy(Definition(exec, bld.lm), restore); bld.copy(Definition(exec, bld.lm), restore);
if (!restore.isConstant())
ctx.info[idx].exec.back().op = Operand(exec, bld.lm);
} }
return i; return i;
@ -507,6 +503,8 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
} else if (nested_cf) { } else if (nested_cf) {
/* Save curent exec temporarily. */ /* Save curent exec temporarily. */
info.exec.back().op = bld.copy(bld.def(bld.lm), Operand(exec, bld.lm)); info.exec.back().op = bld.copy(bld.def(bld.lm), Operand(exec, bld.lm));
} else {
info.exec.back().op = Operand(exec, bld.lm);
} }
/* Remove invocations from global exact mask. */ /* Remove invocations from global exact mask. */
@ -606,16 +604,14 @@ add_branch_code(exec_ctx& ctx, Block* block)
if (has_divergent_break) { if (has_divergent_break) {
/* save restore exec mask */ /* save restore exec mask */
uint8_t mask = ctx.info[idx].exec.back().type; if (!ctx.info[idx].exec.back().op.constantEquals(-1u)) {
if (ctx.info[idx].exec.back().op.constantEquals(-1u)) {
ctx.info[idx].exec.emplace_back(Operand(exec, bld.lm), mask);
} else {
bld.reset(bld.instructions, std::prev(bld.instructions->end())); bld.reset(bld.instructions, std::prev(bld.instructions->end()));
Operand restore = bld.copy(bld.def(bld.lm), Operand(exec, bld.lm)); Operand restore = bld.copy(bld.def(bld.lm), Operand(exec, bld.lm));
ctx.info[idx].exec.emplace(std::prev(ctx.info[idx].exec.end()), restore, mask); ctx.info[idx].exec.back().op = restore;
bld.reset(bld.instructions); bld.reset(bld.instructions);
} }
ctx.info[idx].exec.back().type &= (mask_type_wqm | mask_type_exact); uint8_t mask = ctx.info[idx].exec.back().type & (mask_type_wqm | mask_type_exact);
ctx.info[idx].exec.emplace_back(Operand(exec, bld.lm), mask);
} }
unsigned num_exec_masks = ctx.info[idx].exec.size(); unsigned num_exec_masks = ctx.info[idx].exec.size();
@ -638,10 +634,8 @@ add_branch_code(exec_ctx& ctx, Block* block)
need_parallelcopy = true; need_parallelcopy = true;
} }
if (need_parallelcopy) { if (need_parallelcopy)
bld.copy(Definition(exec, bld.lm), ctx.info[idx].exec.back().op); bld.copy(Definition(exec, bld.lm), ctx.info[idx].exec.back().op);
ctx.info[idx].exec.back().op = Operand(exec, bld.lm);
}
bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2), Operand(exec, bld.lm), bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2), Operand(exec, bld.lm),
block->linear_succs[1], block->linear_succs[0]); block->linear_succs[1], block->linear_succs[0]);
} else if (block->kind & block_kind_uniform) { } else if (block->kind & block_kind_uniform) {