diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index b198378b374..3885f1bb315 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -484,7 +484,8 @@ can_use_DPP(amd_gfx_level gfx_level, const aco_ptr& instr, bool dpp for (unsigned i = 0; i < instr->operands.size(); i++) { if (instr->operands[i].isLiteral()) return false; - if (!instr->operands[i].isOfType(RegType::vgpr) && i < 2) + if (!instr->operands[i].isOfType(RegType::vgpr) && + (i == 0 || (i == 1 && gfx_level < GFX11_5))) return false; } @@ -555,6 +556,9 @@ convert_to_DPP(amd_gfx_level gfx_level, aco_ptr& instr, bool dpp8) remove_vop3 &= instr->operands.size() < 3 || !instr->operands[2].isFixed() || instr->operands[2].isOfType(RegType::vgpr) || instr->operands[2].physReg() == vcc; + /* scalar src1 needs VOP3. */ + remove_vop3 &= instr->operands.size() < 2 || instr->operands[1].isOfType(RegType::vgpr); + if (remove_vop3) instr->format = withoutVOP3(instr->format); diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 388f28bf039..fb844a3db98 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -4982,9 +4982,14 @@ select_instruction(opt_ctx& ctx, aco_ptr& instr) if (((dpp8 && ctx.program->gfx_level < GFX11) || !input_mods) && mov_uses_mods) continue; + Format old_format = instr->format; if (i != 0) { - if (!can_swap_operands(instr, &instr->opcode, 0, i)) + if (!instr->operands[0].isOfType(RegType::vgpr) && !instr->isVOP3P()) + instr->format = asVOP3(instr->format); + if (!can_swap_operands(instr, &instr->opcode, 0, i)) { + instr->format = old_format; continue; + } instr->valu().swapOperands(0, i); } @@ -4993,6 +4998,7 @@ select_instruction(opt_ctx& ctx, aco_ptr& instr) ASSERTED bool success = can_swap_operands(instr, &instr->opcode, 0, i); assert(success); instr->valu().swapOperands(0, i); + instr->format = old_format; } continue; } diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp b/src/amd/compiler/aco_optimizer_postRA.cpp index be6df392aeb..c5e45e0ea0e 100644 --- a/src/amd/compiler/aco_optimizer_postRA.cpp +++ b/src/amd/compiler/aco_optimizer_postRA.cpp @@ -643,9 +643,14 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr& instr) if (((dpp8 && ctx.program->gfx_level < GFX11) || !input_mods) && mov_uses_mods) continue; + Format old_format = instr->format; if (i != 0) { - if (!can_swap_operands(instr, &instr->opcode, 0, i)) + if (!instr->operands[0].isOfType(RegType::vgpr) && !instr->isVOP3P()) + instr->format = asVOP3(instr->format); + if (!can_swap_operands(instr, &instr->opcode, 0, i)) { + instr->format = old_format; continue; + } instr->valu().swapOperands(0, i); } @@ -654,6 +659,7 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr& instr) ASSERTED bool success = can_swap_operands(instr, &instr->opcode, 0, i); assert(success); instr->valu().swapOperands(0, i); + instr->format = old_format; } continue; }