aco: swap operands to create VOPD instructions

fossil-db (navi31, wave32):
Totals from 61565 (77.69% of 79242) affected shaders:
Instrs: 34874995 -> 34439344 (-1.25%); split: -1.27%, +0.02%
CodeSize: 200611536 -> 200564028 (-0.02%); split: -0.12%, +0.10%
Latency: 242520024 -> 242120510 (-0.16%); split: -0.28%, +0.11%
InvThroughput: 50236383 -> 49588742 (-1.29%); split: -1.31%, +0.02%
VClause: 713308 -> 712902 (-0.06%); split: -0.07%, +0.01%
SClause: 1184865 -> 1184620 (-0.02%); split: -0.03%, +0.01%
VALU: 18235068 -> 17803847 (-2.36%); split: -2.37%, +0.00%
VOPD: 3930904 -> 4362125 (+10.97%); split: +10.99%, -0.02%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27485>
This commit is contained in:
Rhys Perry 2024-02-06 11:17:02 +00:00 committed by Marge Bot
parent 96d8b7c59c
commit 65dfb27f8f

View file

@ -28,11 +28,12 @@ using mask_t = uint16_t;
static_assert(std::numeric_limits<mask_t>::digits >= num_nodes);
struct VOPDInfo {
VOPDInfo() : is_opy_only(0), is_dst_odd(0), src_banks(0), has_literal(0) {}
VOPDInfo() : is_opy_only(0), is_dst_odd(0), src_banks(0), has_literal(0), is_commutative(0) {}
uint16_t is_opy_only : 1;
uint16_t is_dst_odd : 1;
uint16_t src_banks : 10; /* 0-3: src0, 4-7: src1, 8-9: src2 */
uint16_t has_literal : 1;
uint16_t is_commutative : 1;
aco_opcode op = aco_opcode::num_opcodes;
uint32_t literal = 0;
};
@ -124,17 +125,27 @@ get_vopd_info(const Instruction* instr)
return VOPDInfo();
VOPDInfo info;
info.is_commutative = true;
switch (instr->opcode) {
case aco_opcode::v_fmac_f32: info.op = aco_opcode::v_dual_fmac_f32; break;
case aco_opcode::v_fmaak_f32: info.op = aco_opcode::v_dual_fmaak_f32; break;
case aco_opcode::v_fmamk_f32: info.op = aco_opcode::v_dual_fmamk_f32; break;
case aco_opcode::v_fmamk_f32:
info.op = aco_opcode::v_dual_fmamk_f32;
info.is_commutative = false;
break;
case aco_opcode::v_mul_f32: info.op = aco_opcode::v_dual_mul_f32; break;
case aco_opcode::v_add_f32: info.op = aco_opcode::v_dual_add_f32; break;
case aco_opcode::v_sub_f32: info.op = aco_opcode::v_dual_sub_f32; break;
case aco_opcode::v_subrev_f32: info.op = aco_opcode::v_dual_subrev_f32; break;
case aco_opcode::v_mul_legacy_f32: info.op = aco_opcode::v_dual_mul_dx9_zero_f32; break;
case aco_opcode::v_mov_b32: info.op = aco_opcode::v_dual_mov_b32; break;
case aco_opcode::v_cndmask_b32: info.op = aco_opcode::v_dual_cndmask_b32; break;
case aco_opcode::v_mov_b32:
info.op = aco_opcode::v_dual_mov_b32;
info.is_commutative = false;
break;
case aco_opcode::v_cndmask_b32:
info.op = aco_opcode::v_dual_cndmask_b32;
info.is_commutative = false;
break;
case aco_opcode::v_max_f32: info.op = aco_opcode::v_dual_max_f32; break;
case aco_opcode::v_min_f32: info.op = aco_opcode::v_dual_min_f32; break;
case aco_opcode::v_dot2c_f32_f16: info.op = aco_opcode::v_dual_dot2acc_f32_f16; break;
@ -145,6 +156,7 @@ get_vopd_info(const Instruction* instr)
case aco_opcode::v_lshlrev_b32:
info.op = aco_opcode::v_dual_lshlrev_b32;
info.is_opy_only = true;
info.is_commutative = false;
break;
case aco_opcode::v_and_b32:
info.op = aco_opcode::v_dual_and_b32;
@ -181,9 +193,38 @@ get_vopd_info(const Instruction* instr)
if (has_sgpr && info.has_literal)
return VOPDInfo();
info.is_commutative &= instr->operands[0].isOfType(RegType::vgpr);
return info;
}
bool
is_vopd_compatible(const VOPDInfo& a, const VOPDInfo& b)
{
if ((a.is_opy_only && b.is_opy_only) || (a.is_dst_odd == b.is_dst_odd))
return false;
/* Both can use a literal, but it must be the same literal. */
if (a.has_literal && b.has_literal && a.literal != b.literal)
return false;
/* The rest is checking src VGPR bank compatibility. */
if ((a.src_banks & b.src_banks) == 0)
return true;
if (!a.is_commutative && !b.is_commutative)
return false;
uint16_t src0 = a.src_banks & 0xf;
uint16_t src1 = a.src_banks & 0xf0;
uint16_t src2 = a.src_banks & 0x300;
uint16_t a_src_banks = (src0 << 4) | (src1 >> 4) | src2;
if ((a_src_banks & b.src_banks) != 0)
return false;
return true;
}
bool
can_use_vopd(const SchedILPContext& ctx, unsigned idx)
{
@ -197,15 +238,7 @@ can_use_vopd(const SchedILPContext& ctx, unsigned idx)
if (ctx.prev_vopd_info.op == aco_opcode::num_opcodes || cur_vopd.op == aco_opcode::num_opcodes)
return false;
if ((ctx.prev_vopd_info.src_banks & cur_vopd.src_banks) ||
(ctx.prev_vopd_info.is_opy_only & cur_vopd.is_opy_only) ||
(ctx.prev_vopd_info.is_dst_odd == cur_vopd.is_dst_odd)) {
return false;
}
/* Both can use a literal, but it must be the same literal. */
if (ctx.prev_vopd_info.has_literal && cur_vopd.has_literal &&
ctx.prev_vopd_info.literal != cur_vopd.literal)
if (!is_vopd_compatible(ctx.prev_vopd_info, cur_vopd))
return false;
assert(first->definitions.size() == 1);
@ -594,12 +627,20 @@ select_instruction_vopd(const SchedILPContext& ctx, bool* use_vopd)
}
void
get_vopd_opcode_operands(Instruction* instr, const VOPDInfo& info, aco_opcode* op,
get_vopd_opcode_operands(Instruction* instr, const VOPDInfo& info, bool swap, aco_opcode* op,
unsigned* num_operands, Operand* operands)
{
*op = info.op;
*num_operands += instr->operands.size();
std::copy(instr->operands.begin(), instr->operands.end(), operands);
if (swap) {
if (info.op == aco_opcode::v_dual_sub_f32)
*op = aco_opcode::v_dual_subrev_f32;
else if (info.op == aco_opcode::v_dual_subrev_f32)
*op = aco_opcode::v_dual_sub_f32;
std::swap(operands[0], operands[1]);
}
}
Instruction*
@ -610,16 +651,24 @@ create_vopd_instruction(const SchedILPContext& ctx, unsigned idx)
VOPDInfo x_info = ctx.prev_vopd_info;
VOPDInfo y_info = ctx.vopd[idx];
bool swap_x = false, swap_y = false;
if (x_info.src_banks & y_info.src_banks) {
assert(x_info.is_commutative || y_info.is_commutative);
swap_x = x_info.is_commutative;
swap_y = y_info.is_commutative && !swap_x;
}
if (x_info.is_opy_only) {
std::swap(x, y);
std::swap(x_info, y_info);
std::swap(swap_x, swap_y);
}
aco_opcode x_op, y_op;
unsigned num_operands = 0;
Operand operands[6];
get_vopd_opcode_operands(x, x_info, &x_op, &num_operands, operands);
get_vopd_opcode_operands(y, y_info, &y_op, &num_operands, operands + num_operands);
get_vopd_opcode_operands(x, x_info, swap_x, &x_op, &num_operands, operands);
get_vopd_opcode_operands(y, y_info, swap_y, &y_op, &num_operands, operands + num_operands);
VOPD_instruction* instr =
create_instruction<VOPD_instruction>(x_op, Format::VOPD, num_operands, 2);