mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 04:40:09 +01:00
aco/insert_exec_mask: don't disable dead quads on demote in divergent CF
Also force-enalbe helpers in case of demote in divergent CF. Totals from 1305 (1.64% of 79377) affected shaders: (Navi31) Instrs: 926923 -> 922516 (-0.48%); split: -0.48%, +0.00% CodeSize: 5045292 -> 5027408 (-0.35%); split: -0.36%, +0.00% Latency: 6176577 -> 6174708 (-0.03%); split: -0.03%, +0.00% InvThroughput: 931603 -> 931583 (-0.00%); split: -0.00%, +0.00% SClause: 22816 -> 22855 (+0.17%); split: -0.17%, +0.34% Copies: 57347 -> 55170 (-3.80%); split: -3.81%, +0.01% Branches: 18990 -> 18974 (-0.08%) PreSGPRs: 42734 -> 43248 (+1.20%) SALU: 90511 -> 86153 (-4.81%); split: -4.85%, +0.04% Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33619>
This commit is contained in:
parent
b872ff6ef2
commit
90faadae72
2 changed files with 27 additions and 33 deletions
|
|
@ -510,55 +510,49 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
|
|||
instr->definitions[1] = bld.def(s1, scc);
|
||||
}
|
||||
} else if (instr->opcode == aco_opcode::p_demote_to_helper) {
|
||||
assert(!ctx.handle_wqm || state == WQM);
|
||||
assert((info.exec[0].type & mask_type_exact) && (info.exec[0].type & mask_type_global));
|
||||
|
||||
const bool nested_cf = !(info.exec.back().type & mask_type_global);
|
||||
if (ctx.handle_wqm && state == Exact && nested_cf) {
|
||||
/* Transition back to WQM without extra instruction. */
|
||||
info.exec.pop_back();
|
||||
state = WQM;
|
||||
} else if (block->instructions[idx + 1]->opcode == aco_opcode::p_end_wqm) {
|
||||
if (block->instructions[idx + 1]->opcode == aco_opcode::p_end_wqm) {
|
||||
/* Transition to Exact without extra instruction. */
|
||||
info.exec.resize(1);
|
||||
state = Exact;
|
||||
} else if (nested_cf) {
|
||||
/* Save curent exec temporarily. */
|
||||
info.exec.back().op = bld.copy(bld.def(bld.lm), Operand(exec, bld.lm));
|
||||
} else {
|
||||
/* Make sure to not use some previously stored temporary. */
|
||||
info.exec.back().op = Operand(exec, bld.lm);
|
||||
}
|
||||
|
||||
/* Remove invocations from global exact mask. */
|
||||
Definition def = state == Exact ? Definition(exec, bld.lm) : bld.def(bld.lm);
|
||||
Operand src = instr->operands[0].isConstant() ? Operand(exec, bld.lm) : instr->operands[0];
|
||||
Operand exit_cond = Operand(exec, bld.lm);
|
||||
|
||||
bld.sop2(Builder::s_andn2, def, bld.def(s1, scc), info.exec[0].op, src);
|
||||
info.exec[0].op = def.isTemp() ? Operand(def.getTemp()) : Operand(exec, bld.lm);
|
||||
if (state == Exact) {
|
||||
assert(info.exec.size() == 1);
|
||||
bld.sop2(Builder::s_andn2, Definition(exec, bld.lm), bld.def(s1, scc), info.exec[0].op,
|
||||
src);
|
||||
} else {
|
||||
Temp cond = bld.tmp(s1);
|
||||
info.exec[0].op = bld.sop2(Builder::s_andn2, bld.def(bld.lm), Definition(cond, scc),
|
||||
info.exec[0].op, src);
|
||||
|
||||
/* Update global WQM mask and store in exec. */
|
||||
if (state == WQM) {
|
||||
assert(info.exec.size() > 1);
|
||||
bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), bld.def(s1, scc), def.getTemp());
|
||||
/* Update global WQM mask and store in exec. */
|
||||
if (info.exec.back().type & mask_type_global) {
|
||||
assert(info.exec.size() == 2);
|
||||
bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), bld.def(s1, scc),
|
||||
info.exec[0].op);
|
||||
} else {
|
||||
/* Conditionally set exec=0. Note, that exec might already be zero, so don't use s_branch_execz. */
|
||||
bld.sop2(Builder::s_cselect, Definition(exec, bld.lm), Operand(exec, bld.lm),
|
||||
Operand::zero(bld.lm.bytes()), bld.scc(cond));
|
||||
exit_cond = Operand(cond, scc);
|
||||
}
|
||||
}
|
||||
|
||||
/* End shader if global mask is zero. */
|
||||
instr->opcode = aco_opcode::p_exit_early_if_not;
|
||||
instr->operands[0] = Operand(exec, bld.lm);
|
||||
instr->operands[0] = exit_cond;
|
||||
bld.insert(std::move(instr));
|
||||
|
||||
/* Update all other exec masks. */
|
||||
if (nested_cf) {
|
||||
const unsigned global_idx = state == WQM ? 1 : 0;
|
||||
for (unsigned i = global_idx + 1; i < info.exec.size() - 1; i++) {
|
||||
info.exec[i].op = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc),
|
||||
info.exec[i].op, Operand(exec, bld.lm));
|
||||
}
|
||||
/* Update current exec and save WQM mask. */
|
||||
info.exec[global_idx].op =
|
||||
bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.def(s1, scc),
|
||||
Definition(exec, bld.lm), info.exec.back().op, Operand(exec, bld.lm));
|
||||
info.exec.back().op = Operand(exec, bld.lm);
|
||||
}
|
||||
continue;
|
||||
|
||||
} else if (instr->opcode == aco_opcode::p_elect) {
|
||||
|
|
|
|||
|
|
@ -8631,11 +8631,11 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
|
||||
bld.pseudo(aco_opcode::p_demote_to_helper, cond);
|
||||
|
||||
/* Perform the demote in WQM so that it doesn't make exec empty. WQM should last until at
|
||||
* least the next top-level block.
|
||||
/* Perform the demote in WQM so that it doesn't make exec empty.
|
||||
* WQM should last until at least the next top-level block.
|
||||
*/
|
||||
if (ctx->cf_info.in_divergent_cf)
|
||||
set_wqm(ctx);
|
||||
set_wqm(ctx, true);
|
||||
|
||||
ctx->block->kind |= block_kind_uses_discard;
|
||||
ctx->program->needs_exact = true;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue