aco/sched_vopd: convert fma with inline constants to fmamk/fmaak

This optimization was previously done in the post-RA optimizer,
but it is more fitting for the vopd scheduler.

Doing it here also has the benefit that we don't unnecessarily use
the constant bus when VOPD can't be used.

No Foz-DB changes on GFX12 until the next commit.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40225>
This commit is contained in:
Georg Lehmann 2026-03-04 15:26:18 +01:00 committed by Marge Bot
parent 1ae9931145
commit 6cef434478
2 changed files with 84 additions and 2 deletions

View file

@ -134,7 +134,8 @@ can_reorder(const Instruction* const instr)
VOPDInfo
get_vopd_info(const SchedILPContext& ctx, const Instruction* instr)
{
if (instr->format != Format::VOP1 && instr->format != Format::VOP2)
if (instr->format != Format::VOP1 && instr->format != Format::VOP2 &&
instr->format != Format::VOP3)
return VOPDInfo();
VOPDInfo info;
@ -181,6 +182,35 @@ get_vopd_info(const SchedILPContext& ctx, const Instruction* instr)
info.op = aco_opcode::v_dual_and_b32;
info.can_be_opx = false;
break;
case aco_opcode::v_fma_f32: {
/* Convert v_fma_f32 with inline constant to fmamk/fmaak. */
int constant_idx = -1;
int vgpr_idx = -1;
for (int i = 0; i < 3; i++) {
const Operand& op = instr->operands[i];
if (op.isConstant() && !op.isLiteral())
constant_idx = i;
else if (op.isOfType(RegType::vgpr))
vgpr_idx = i;
else
return VOPDInfo();
}
if (constant_idx < 0 || vgpr_idx < 0 || instr->usesModifiers())
return VOPDInfo();
info.literal = instr->operands[constant_idx].constantValue();
info.has_literal = true;
if (constant_idx == 2) {
info.op = aco_opcode::v_dual_fmaak_f32;
info.operand_swizzle = vgpr_idx == 0 ? 0b11'00'01 : 0b11'01'00;
} else {
info.op = aco_opcode::v_dual_fmamk_f32;
info.is_commutative = false;
info.operand_swizzle = constant_idx == 0 ? 0b11'10'01 : 0b11'10'00;
}
break;
}
default: return VOPDInfo();
}
@ -200,7 +230,7 @@ get_vopd_info(const SchedILPContext& ctx, const Instruction* instr)
}
Operand op = instr->operands[swizzle];
unsigned port = (instr->opcode == aco_opcode::v_fmamk_f32 && i == 1) ? 2 : i;
unsigned port = (info.op == aco_opcode::v_dual_fmamk_f32 && i == 1) ? 2 : i;
if (op.isOfType(RegType::vgpr)) {
info.src_banks |= 1 << (port * 4 + (op.physReg().reg() & bank_mask[port]));
if (port < 2)

View file

@ -289,3 +289,55 @@ BEGIN_TEST(vopd_sched.same_vgpr)
finish_schedule_vopd_test();
}
END_TEST
BEGIN_TEST(vopd_sched.fma_with_constant)
if (!setup_cs(NULL, GFX11, CHIP_UNKNOWN, "", 32))
return;
PhysReg reg_v0{256};
PhysReg reg_v1{257};
PhysReg reg_v2{258};
PhysReg reg_v3{259};
PhysReg reg_s0{0};
//>> p_unit_test 0
//! v1: %0:v[1] = v_dual_fmamk_f32 %0:v[3], %0:v[2], 0x40000000 :: v1: %0:v[0] = v_dual_fmaak_f32 4.0, %0:v[2], 0x40000000
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
bld.vop3(aco_opcode::v_fma_f32, Definition(reg_v0, v1), Operand(reg_v2, v1),
Operand::c32(fui(4.0f)), Operand::c32(fui(2.0f)));
bld.vop3(aco_opcode::v_fma_f32, Definition(reg_v1, v1), Operand(reg_v3, v1),
Operand::c32(fui(2.0f)), Operand(reg_v2, v1));
bld.reset(program->create_and_insert_block());
//>> p_unit_test 1
//! v1: %0:v[1] = v_dual_fmamk_f32 %0:v[3], %0:v[2], 0x40800000 :: v1: %0:v[0] = v_dual_fmaak_f32 2.0, %0:v[2], 0x40800000
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
bld.vop3(aco_opcode::v_fma_f32, Definition(reg_v0, v1), Operand::c32(fui(2.0f)),
Operand(reg_v2, v1), Operand::c32(fui(4.0f)));
bld.vop3(aco_opcode::v_fma_f32, Definition(reg_v1, v1), Operand::c32(fui(4.0f)),
Operand(reg_v3, v1), Operand(reg_v2, v1));
/* Allow no sgpr operand. */
bld.reset(program->create_and_insert_block());
//>> p_unit_test 2
//! v1: %0:v[1] = v_mov_b32 0
//! v1: %0:v[0] = v_fma_f32 %0:s[0], %0:v[2], 2.0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
bld.vop3(aco_opcode::v_fma_f32, Definition(reg_v0, v1), Operand(reg_s0, s1), Operand(reg_v2, v1),
Operand::c32(fui(2.0f)));
bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v1, v1), Operand::c32(0));
/* Allow no modifiers. */
bld.reset(program->create_and_insert_block());
//>> p_unit_test 3
//! v1: %0:v[1] = v_mov_b32 0
//! v1: %0:v[0] = v_fma_f32 |%0:v[2]|, %0:v[2], 2.0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
bld.vop3(aco_opcode::v_fma_f32, Definition(reg_v0, v1), Operand(reg_v2, v1), Operand(reg_v2, v1),
Operand::c32(fui(2.0f)))
->valu()
.abs[0] = true;
bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v1, v1), Operand::c32(0));
finish_schedule_vopd_test();
END_TEST