mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-03 03:28:09 +02:00
aco/sched_vopd: convert fma with inline constants to fmamk/fmaak
This optimization was previously done in the post-RA optimizer, but it is more fitting for the vopd scheduler. Doing it here also has the benefit that we don't unnecessarily use the constant bus when VOPD can't be used. No Foz-DB changes on GFX12 until the next commit. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40225>
This commit is contained in:
parent
1ae9931145
commit
6cef434478
2 changed files with 84 additions and 2 deletions
|
|
@ -134,7 +134,8 @@ can_reorder(const Instruction* const instr)
|
|||
VOPDInfo
|
||||
get_vopd_info(const SchedILPContext& ctx, const Instruction* instr)
|
||||
{
|
||||
if (instr->format != Format::VOP1 && instr->format != Format::VOP2)
|
||||
if (instr->format != Format::VOP1 && instr->format != Format::VOP2 &&
|
||||
instr->format != Format::VOP3)
|
||||
return VOPDInfo();
|
||||
|
||||
VOPDInfo info;
|
||||
|
|
@ -181,6 +182,35 @@ get_vopd_info(const SchedILPContext& ctx, const Instruction* instr)
|
|||
info.op = aco_opcode::v_dual_and_b32;
|
||||
info.can_be_opx = false;
|
||||
break;
|
||||
case aco_opcode::v_fma_f32: {
|
||||
/* Convert v_fma_f32 with inline constant to fmamk/fmaak. */
|
||||
int constant_idx = -1;
|
||||
int vgpr_idx = -1;
|
||||
for (int i = 0; i < 3; i++) {
|
||||
const Operand& op = instr->operands[i];
|
||||
if (op.isConstant() && !op.isLiteral())
|
||||
constant_idx = i;
|
||||
else if (op.isOfType(RegType::vgpr))
|
||||
vgpr_idx = i;
|
||||
else
|
||||
return VOPDInfo();
|
||||
}
|
||||
|
||||
if (constant_idx < 0 || vgpr_idx < 0 || instr->usesModifiers())
|
||||
return VOPDInfo();
|
||||
|
||||
info.literal = instr->operands[constant_idx].constantValue();
|
||||
info.has_literal = true;
|
||||
if (constant_idx == 2) {
|
||||
info.op = aco_opcode::v_dual_fmaak_f32;
|
||||
info.operand_swizzle = vgpr_idx == 0 ? 0b11'00'01 : 0b11'01'00;
|
||||
} else {
|
||||
info.op = aco_opcode::v_dual_fmamk_f32;
|
||||
info.is_commutative = false;
|
||||
info.operand_swizzle = constant_idx == 0 ? 0b11'10'01 : 0b11'10'00;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: return VOPDInfo();
|
||||
}
|
||||
|
||||
|
|
@ -200,7 +230,7 @@ get_vopd_info(const SchedILPContext& ctx, const Instruction* instr)
|
|||
}
|
||||
Operand op = instr->operands[swizzle];
|
||||
|
||||
unsigned port = (instr->opcode == aco_opcode::v_fmamk_f32 && i == 1) ? 2 : i;
|
||||
unsigned port = (info.op == aco_opcode::v_dual_fmamk_f32 && i == 1) ? 2 : i;
|
||||
if (op.isOfType(RegType::vgpr)) {
|
||||
info.src_banks |= 1 << (port * 4 + (op.physReg().reg() & bank_mask[port]));
|
||||
if (port < 2)
|
||||
|
|
|
|||
|
|
@ -289,3 +289,55 @@ BEGIN_TEST(vopd_sched.same_vgpr)
|
|||
finish_schedule_vopd_test();
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(vopd_sched.fma_with_constant)
|
||||
if (!setup_cs(NULL, GFX11, CHIP_UNKNOWN, "", 32))
|
||||
return;
|
||||
|
||||
PhysReg reg_v0{256};
|
||||
PhysReg reg_v1{257};
|
||||
PhysReg reg_v2{258};
|
||||
PhysReg reg_v3{259};
|
||||
PhysReg reg_s0{0};
|
||||
|
||||
//>> p_unit_test 0
|
||||
//! v1: %0:v[1] = v_dual_fmamk_f32 %0:v[3], %0:v[2], 0x40000000 :: v1: %0:v[0] = v_dual_fmaak_f32 4.0, %0:v[2], 0x40000000
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
|
||||
bld.vop3(aco_opcode::v_fma_f32, Definition(reg_v0, v1), Operand(reg_v2, v1),
|
||||
Operand::c32(fui(4.0f)), Operand::c32(fui(2.0f)));
|
||||
bld.vop3(aco_opcode::v_fma_f32, Definition(reg_v1, v1), Operand(reg_v3, v1),
|
||||
Operand::c32(fui(2.0f)), Operand(reg_v2, v1));
|
||||
|
||||
bld.reset(program->create_and_insert_block());
|
||||
//>> p_unit_test 1
|
||||
//! v1: %0:v[1] = v_dual_fmamk_f32 %0:v[3], %0:v[2], 0x40800000 :: v1: %0:v[0] = v_dual_fmaak_f32 2.0, %0:v[2], 0x40800000
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
|
||||
bld.vop3(aco_opcode::v_fma_f32, Definition(reg_v0, v1), Operand::c32(fui(2.0f)),
|
||||
Operand(reg_v2, v1), Operand::c32(fui(4.0f)));
|
||||
bld.vop3(aco_opcode::v_fma_f32, Definition(reg_v1, v1), Operand::c32(fui(4.0f)),
|
||||
Operand(reg_v3, v1), Operand(reg_v2, v1));
|
||||
|
||||
/* Allow no sgpr operand. */
|
||||
bld.reset(program->create_and_insert_block());
|
||||
//>> p_unit_test 2
|
||||
//! v1: %0:v[1] = v_mov_b32 0
|
||||
//! v1: %0:v[0] = v_fma_f32 %0:s[0], %0:v[2], 2.0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
|
||||
bld.vop3(aco_opcode::v_fma_f32, Definition(reg_v0, v1), Operand(reg_s0, s1), Operand(reg_v2, v1),
|
||||
Operand::c32(fui(2.0f)));
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v1, v1), Operand::c32(0));
|
||||
|
||||
/* Allow no modifiers. */
|
||||
bld.reset(program->create_and_insert_block());
|
||||
//>> p_unit_test 3
|
||||
//! v1: %0:v[1] = v_mov_b32 0
|
||||
//! v1: %0:v[0] = v_fma_f32 |%0:v[2]|, %0:v[2], 2.0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
|
||||
bld.vop3(aco_opcode::v_fma_f32, Definition(reg_v0, v1), Operand(reg_v2, v1), Operand(reg_v2, v1),
|
||||
Operand::c32(fui(2.0f)))
|
||||
->valu()
|
||||
.abs[0] = true;
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v1, v1), Operand::c32(0));
|
||||
|
||||
finish_schedule_vopd_test();
|
||||
END_TEST
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue