aco: apply DPP with scalar src1 on gfx11.5+

Foz-DB Navi48:
Totals from 6261 (7.62% of 82179) affected shaders:
MaxWaves: 176284 -> 176236 (-0.03%); split: +0.01%, -0.03%
Instrs: 5850185 -> 5828451 (-0.37%); split: -0.41%, +0.04%
CodeSize: 31363324 -> 31419904 (+0.18%); split: -0.08%, +0.26%
VGPRs: 328284 -> 328200 (-0.03%); split: -0.07%, +0.05%
SpillSGPRs: 2268 -> 2256 (-0.53%)
Latency: 50235516 -> 50218816 (-0.03%); split: -0.06%, +0.03%
InvThroughput: 8256243 -> 8242036 (-0.17%); split: -0.22%, +0.05%
VClause: 81000 -> 80975 (-0.03%); split: -0.11%, +0.08%
SClause: 136376 -> 136387 (+0.01%); split: -0.11%, +0.11%
Copies: 414021 -> 417894 (+0.94%); split: -0.13%, +1.07%
Branches: 105301 -> 105298 (-0.00%); split: -0.00%, +0.00%
PreSGPRs: 291360 -> 291432 (+0.02%)
PreVGPRs: 238593 -> 238729 (+0.06%); split: -0.02%, +0.08%
VALU: 3425446 -> 3403463 (-0.64%); split: -0.65%, +0.01%
SALU: 815505 -> 819372 (+0.47%); split: -0.02%, +0.50%

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39516>
This commit is contained in:
Georg Lehmann 2026-01-25 16:24:06 +01:00 committed by Marge Bot
parent 3fe329b3d0
commit d4c0318f48
3 changed files with 19 additions and 3 deletions

View file

@ -484,7 +484,8 @@ can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp
for (unsigned i = 0; i < instr->operands.size(); i++) {
if (instr->operands[i].isLiteral())
return false;
if (!instr->operands[i].isOfType(RegType::vgpr) && i < 2)
if (!instr->operands[i].isOfType(RegType::vgpr) &&
(i == 0 || (i == 1 && gfx_level < GFX11_5)))
return false;
}
@ -555,6 +556,9 @@ convert_to_DPP(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr, bool dpp8)
remove_vop3 &= instr->operands.size() < 3 || !instr->operands[2].isFixed() ||
instr->operands[2].isOfType(RegType::vgpr) || instr->operands[2].physReg() == vcc;
/* scalar src1 needs VOP3. */
remove_vop3 &= instr->operands.size() < 2 || instr->operands[1].isOfType(RegType::vgpr);
if (remove_vop3)
instr->format = withoutVOP3(instr->format);

View file

@ -4982,9 +4982,14 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (((dpp8 && ctx.program->gfx_level < GFX11) || !input_mods) && mov_uses_mods)
continue;
Format old_format = instr->format;
if (i != 0) {
if (!can_swap_operands(instr, &instr->opcode, 0, i))
if (!instr->operands[0].isOfType(RegType::vgpr) && !instr->isVOP3P())
instr->format = asVOP3(instr->format);
if (!can_swap_operands(instr, &instr->opcode, 0, i)) {
instr->format = old_format;
continue;
}
instr->valu().swapOperands(0, i);
}
@ -4993,6 +4998,7 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
ASSERTED bool success = can_swap_operands(instr, &instr->opcode, 0, i);
assert(success);
instr->valu().swapOperands(0, i);
instr->format = old_format;
}
continue;
}

View file

@ -643,9 +643,14 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (((dpp8 && ctx.program->gfx_level < GFX11) || !input_mods) && mov_uses_mods)
continue;
Format old_format = instr->format;
if (i != 0) {
if (!can_swap_operands(instr, &instr->opcode, 0, i))
if (!instr->operands[0].isOfType(RegType::vgpr) && !instr->isVOP3P())
instr->format = asVOP3(instr->format);
if (!can_swap_operands(instr, &instr->opcode, 0, i)) {
instr->format = old_format;
continue;
}
instr->valu().swapOperands(0, i);
}
@ -654,6 +659,7 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
ASSERTED bool success = can_swap_operands(instr, &instr->opcode, 0, i);
assert(success);
instr->valu().swapOperands(0, i);
instr->format = old_format;
}
continue;
}