diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index 8a99e53ddaf..ee1d0785aec 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -160,8 +160,7 @@ emit_instruction(asm_context& ctx, std::vector& out, Instruction* inst instr->opcode = aco_opcode::v_fma_f16; instr->format = (Format)((uint32_t)instr->format & ~(uint32_t)Format::VOP2); } else if (instr->opcode == aco_opcode::v_fmamk_f16) { - std::swap(instr->operands[1], instr->operands[2]); - instr->valu().opsel[1].swap(instr->valu().opsel[2]); + instr->valu().swapOperands(1, 2); instr->opcode = aco_opcode::v_fma_f16; instr->format = (Format)((uint32_t)instr->format & ~(uint32_t)Format::VOP2); } diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 04b93420f16..a2313641a76 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1402,6 +1402,8 @@ struct VALU_instruction : public Instruction { bitfield_array8 opsel_hi; /* VOP3P */ bitfield_bool clamp; /* VOP3, VOP3P, SDWA, VINTERP_inreg */ }; + + void swapOperands(unsigned idx0, unsigned idx1); }; static_assert(sizeof(VALU_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); @@ -1718,6 +1720,22 @@ struct Pseudo_reduction_instruction : public Instruction { static_assert(sizeof(Pseudo_reduction_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); +inline void +VALU_instruction::swapOperands(unsigned idx0, unsigned idx1) +{ + if (this->isSDWA() && idx0 != idx1) { + assert(idx0 < 2 && idx1 < 2); + std::swap(this->sdwa().sel[0], this->sdwa().sel[1]); + } + assert(idx0 < 3 && idx1 < 3); + std::swap(this->operands[idx0], this->operands[idx1]); + this->neg[idx0].swap(this->neg[idx1]); + this->abs[idx0].swap(this->abs[idx1]); + this->opsel[idx0].swap(this->opsel[idx1]); + this->opsel_lo[idx0].swap(this->opsel_lo[idx1]); + this->opsel_hi[idx0].swap(this->opsel_hi[idx1]); +} + extern thread_local aco::monotonic_buffer_resource* instruction_buffer; struct instr_deleter_functor { diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 885ffc54bb4..c928b70f2b4 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -1429,9 +1429,8 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) instr->operands[i] = op; continue; } else if (!instr->isVOP3() && can_swap_operands(instr, &instr->opcode)) { - instr->operands[i] = instr->operands[0]; - instr->operands[0] = op; - instr->valu().opsel[0].swap(instr->valu().opsel[i]); + instr->operands[i] = op; + instr->valu().swapOperands(0, i); continue; } else if (can_use_VOP3(ctx, instr)) { instr->format = asVOP3(instr->format); @@ -4829,12 +4828,7 @@ select_instruction(opt_ctx& ctx, aco_ptr& instr) if (i != 0) { if (!can_swap_operands(instr, &instr->opcode, 0, i)) continue; - std::swap(instr->operands[0], instr->operands[i]); - instr->valu().neg[0].swap(instr->valu().neg[i]); - instr->valu().abs[0].swap(instr->valu().abs[i]); - instr->valu().opsel[0].swap(instr->valu().opsel[i]); - instr->valu().opsel_lo[0].swap(instr->valu().opsel_lo[i]); - instr->valu().opsel_hi[0].swap(instr->valu().opsel_hi[i]); + instr->valu().swapOperands(0, i); } if (!can_use_DPP(ctx.program->gfx_level, instr, info.is_dpp8())) diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp b/src/amd/compiler/aco_optimizer_postRA.cpp index d04b3c6c0f6..7aa554d0766 100644 --- a/src/amd/compiler/aco_optimizer_postRA.cpp +++ b/src/amd/compiler/aco_optimizer_postRA.cpp @@ -522,12 +522,7 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr& instr) if (i != 0) { if (!can_swap_operands(instr, &instr->opcode, 0, i)) continue; - std::swap(instr->operands[0], instr->operands[i]); - instr->valu().neg[0].swap(instr->valu().neg[i]); - instr->valu().abs[0].swap(instr->valu().abs[i]); - instr->valu().opsel[0].swap(instr->valu().opsel[i]); - instr->valu().opsel_lo[0].swap(instr->valu().opsel_lo[i]); - instr->valu().opsel_hi[0].swap(instr->valu().opsel_hi[i]); + instr->valu().swapOperands(0, i); } if (!can_use_DPP(ctx.program->gfx_level, instr, dpp8))