aco/insert_exec_mask: don't disable dead quads on demote in divergent CF

Also force-enalbe helpers in case of demote in divergent CF.

Totals from 1305 (1.64% of 79377) affected shaders: (Navi31)

Instrs: 926923 -> 922516 (-0.48%); split: -0.48%, +0.00%
CodeSize: 5045292 -> 5027408 (-0.35%); split: -0.36%, +0.00%
Latency: 6176577 -> 6174708 (-0.03%); split: -0.03%, +0.00%
InvThroughput: 931603 -> 931583 (-0.00%); split: -0.00%, +0.00%
SClause: 22816 -> 22855 (+0.17%); split: -0.17%, +0.34%
Copies: 57347 -> 55170 (-3.80%); split: -3.81%, +0.01%
Branches: 18990 -> 18974 (-0.08%)
PreSGPRs: 42734 -> 43248 (+1.20%)
SALU: 90511 -> 86153 (-4.81%); split: -4.85%, +0.04%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33619>
This commit is contained in:
Daniel Schürmann 2025-02-18 11:24:00 +01:00 committed by Marge Bot
parent b872ff6ef2
commit 90faadae72
2 changed files with 27 additions and 33 deletions

View file

@ -510,55 +510,49 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
instr->definitions[1] = bld.def(s1, scc);
}
} else if (instr->opcode == aco_opcode::p_demote_to_helper) {
assert(!ctx.handle_wqm || state == WQM);
assert((info.exec[0].type & mask_type_exact) && (info.exec[0].type & mask_type_global));
const bool nested_cf = !(info.exec.back().type & mask_type_global);
if (ctx.handle_wqm && state == Exact && nested_cf) {
/* Transition back to WQM without extra instruction. */
info.exec.pop_back();
state = WQM;
} else if (block->instructions[idx + 1]->opcode == aco_opcode::p_end_wqm) {
if (block->instructions[idx + 1]->opcode == aco_opcode::p_end_wqm) {
/* Transition to Exact without extra instruction. */
info.exec.resize(1);
state = Exact;
} else if (nested_cf) {
/* Save curent exec temporarily. */
info.exec.back().op = bld.copy(bld.def(bld.lm), Operand(exec, bld.lm));
} else {
/* Make sure to not use some previously stored temporary. */
info.exec.back().op = Operand(exec, bld.lm);
}
/* Remove invocations from global exact mask. */
Definition def = state == Exact ? Definition(exec, bld.lm) : bld.def(bld.lm);
Operand src = instr->operands[0].isConstant() ? Operand(exec, bld.lm) : instr->operands[0];
Operand exit_cond = Operand(exec, bld.lm);
bld.sop2(Builder::s_andn2, def, bld.def(s1, scc), info.exec[0].op, src);
info.exec[0].op = def.isTemp() ? Operand(def.getTemp()) : Operand(exec, bld.lm);
if (state == Exact) {
assert(info.exec.size() == 1);
bld.sop2(Builder::s_andn2, Definition(exec, bld.lm), bld.def(s1, scc), info.exec[0].op,
src);
} else {
Temp cond = bld.tmp(s1);
info.exec[0].op = bld.sop2(Builder::s_andn2, bld.def(bld.lm), Definition(cond, scc),
info.exec[0].op, src);
/* Update global WQM mask and store in exec. */
if (state == WQM) {
assert(info.exec.size() > 1);
bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), bld.def(s1, scc), def.getTemp());
/* Update global WQM mask and store in exec. */
if (info.exec.back().type & mask_type_global) {
assert(info.exec.size() == 2);
bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), bld.def(s1, scc),
info.exec[0].op);
} else {
/* Conditionally set exec=0. Note, that exec might already be zero, so don't use s_branch_execz. */
bld.sop2(Builder::s_cselect, Definition(exec, bld.lm), Operand(exec, bld.lm),
Operand::zero(bld.lm.bytes()), bld.scc(cond));
exit_cond = Operand(cond, scc);
}
}
/* End shader if global mask is zero. */
instr->opcode = aco_opcode::p_exit_early_if_not;
instr->operands[0] = Operand(exec, bld.lm);
instr->operands[0] = exit_cond;
bld.insert(std::move(instr));
/* Update all other exec masks. */
if (nested_cf) {
const unsigned global_idx = state == WQM ? 1 : 0;
for (unsigned i = global_idx + 1; i < info.exec.size() - 1; i++) {
info.exec[i].op = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc),
info.exec[i].op, Operand(exec, bld.lm));
}
/* Update current exec and save WQM mask. */
info.exec[global_idx].op =
bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.def(s1, scc),
Definition(exec, bld.lm), info.exec.back().op, Operand(exec, bld.lm));
info.exec.back().op = Operand(exec, bld.lm);
}
continue;
} else if (instr->opcode == aco_opcode::p_elect) {

View file

@ -8631,11 +8631,11 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
bld.pseudo(aco_opcode::p_demote_to_helper, cond);
/* Perform the demote in WQM so that it doesn't make exec empty. WQM should last until at
* least the next top-level block.
/* Perform the demote in WQM so that it doesn't make exec empty.
* WQM should last until at least the next top-level block.
*/
if (ctx->cf_info.in_divergent_cf)
set_wqm(ctx);
set_wqm(ctx, true);
ctx->block->kind |= block_kind_uses_discard;
ctx->program->needs_exact = true;