mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 05:18:08 +02:00
aco: swap operands to create VOPD instructions
fossil-db (navi31, wave32): Totals from 61565 (77.69% of 79242) affected shaders: Instrs: 34874995 -> 34439344 (-1.25%); split: -1.27%, +0.02% CodeSize: 200611536 -> 200564028 (-0.02%); split: -0.12%, +0.10% Latency: 242520024 -> 242120510 (-0.16%); split: -0.28%, +0.11% InvThroughput: 50236383 -> 49588742 (-1.29%); split: -1.31%, +0.02% VClause: 713308 -> 712902 (-0.06%); split: -0.07%, +0.01% SClause: 1184865 -> 1184620 (-0.02%); split: -0.03%, +0.01% VALU: 18235068 -> 17803847 (-2.36%); split: -2.37%, +0.00% VOPD: 3930904 -> 4362125 (+10.97%); split: +10.99%, -0.02% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27485>
This commit is contained in:
parent
96d8b7c59c
commit
65dfb27f8f
1 changed files with 65 additions and 16 deletions
|
|
@ -28,11 +28,12 @@ using mask_t = uint16_t;
|
|||
static_assert(std::numeric_limits<mask_t>::digits >= num_nodes);
|
||||
|
||||
struct VOPDInfo {
|
||||
VOPDInfo() : is_opy_only(0), is_dst_odd(0), src_banks(0), has_literal(0) {}
|
||||
VOPDInfo() : is_opy_only(0), is_dst_odd(0), src_banks(0), has_literal(0), is_commutative(0) {}
|
||||
uint16_t is_opy_only : 1;
|
||||
uint16_t is_dst_odd : 1;
|
||||
uint16_t src_banks : 10; /* 0-3: src0, 4-7: src1, 8-9: src2 */
|
||||
uint16_t has_literal : 1;
|
||||
uint16_t is_commutative : 1;
|
||||
aco_opcode op = aco_opcode::num_opcodes;
|
||||
uint32_t literal = 0;
|
||||
};
|
||||
|
|
@ -124,17 +125,27 @@ get_vopd_info(const Instruction* instr)
|
|||
return VOPDInfo();
|
||||
|
||||
VOPDInfo info;
|
||||
info.is_commutative = true;
|
||||
switch (instr->opcode) {
|
||||
case aco_opcode::v_fmac_f32: info.op = aco_opcode::v_dual_fmac_f32; break;
|
||||
case aco_opcode::v_fmaak_f32: info.op = aco_opcode::v_dual_fmaak_f32; break;
|
||||
case aco_opcode::v_fmamk_f32: info.op = aco_opcode::v_dual_fmamk_f32; break;
|
||||
case aco_opcode::v_fmamk_f32:
|
||||
info.op = aco_opcode::v_dual_fmamk_f32;
|
||||
info.is_commutative = false;
|
||||
break;
|
||||
case aco_opcode::v_mul_f32: info.op = aco_opcode::v_dual_mul_f32; break;
|
||||
case aco_opcode::v_add_f32: info.op = aco_opcode::v_dual_add_f32; break;
|
||||
case aco_opcode::v_sub_f32: info.op = aco_opcode::v_dual_sub_f32; break;
|
||||
case aco_opcode::v_subrev_f32: info.op = aco_opcode::v_dual_subrev_f32; break;
|
||||
case aco_opcode::v_mul_legacy_f32: info.op = aco_opcode::v_dual_mul_dx9_zero_f32; break;
|
||||
case aco_opcode::v_mov_b32: info.op = aco_opcode::v_dual_mov_b32; break;
|
||||
case aco_opcode::v_cndmask_b32: info.op = aco_opcode::v_dual_cndmask_b32; break;
|
||||
case aco_opcode::v_mov_b32:
|
||||
info.op = aco_opcode::v_dual_mov_b32;
|
||||
info.is_commutative = false;
|
||||
break;
|
||||
case aco_opcode::v_cndmask_b32:
|
||||
info.op = aco_opcode::v_dual_cndmask_b32;
|
||||
info.is_commutative = false;
|
||||
break;
|
||||
case aco_opcode::v_max_f32: info.op = aco_opcode::v_dual_max_f32; break;
|
||||
case aco_opcode::v_min_f32: info.op = aco_opcode::v_dual_min_f32; break;
|
||||
case aco_opcode::v_dot2c_f32_f16: info.op = aco_opcode::v_dual_dot2acc_f32_f16; break;
|
||||
|
|
@ -145,6 +156,7 @@ get_vopd_info(const Instruction* instr)
|
|||
case aco_opcode::v_lshlrev_b32:
|
||||
info.op = aco_opcode::v_dual_lshlrev_b32;
|
||||
info.is_opy_only = true;
|
||||
info.is_commutative = false;
|
||||
break;
|
||||
case aco_opcode::v_and_b32:
|
||||
info.op = aco_opcode::v_dual_and_b32;
|
||||
|
|
@ -181,9 +193,38 @@ get_vopd_info(const Instruction* instr)
|
|||
if (has_sgpr && info.has_literal)
|
||||
return VOPDInfo();
|
||||
|
||||
info.is_commutative &= instr->operands[0].isOfType(RegType::vgpr);
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
bool
|
||||
is_vopd_compatible(const VOPDInfo& a, const VOPDInfo& b)
|
||||
{
|
||||
if ((a.is_opy_only && b.is_opy_only) || (a.is_dst_odd == b.is_dst_odd))
|
||||
return false;
|
||||
|
||||
/* Both can use a literal, but it must be the same literal. */
|
||||
if (a.has_literal && b.has_literal && a.literal != b.literal)
|
||||
return false;
|
||||
|
||||
/* The rest is checking src VGPR bank compatibility. */
|
||||
if ((a.src_banks & b.src_banks) == 0)
|
||||
return true;
|
||||
|
||||
if (!a.is_commutative && !b.is_commutative)
|
||||
return false;
|
||||
|
||||
uint16_t src0 = a.src_banks & 0xf;
|
||||
uint16_t src1 = a.src_banks & 0xf0;
|
||||
uint16_t src2 = a.src_banks & 0x300;
|
||||
uint16_t a_src_banks = (src0 << 4) | (src1 >> 4) | src2;
|
||||
if ((a_src_banks & b.src_banks) != 0)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
can_use_vopd(const SchedILPContext& ctx, unsigned idx)
|
||||
{
|
||||
|
|
@ -197,15 +238,7 @@ can_use_vopd(const SchedILPContext& ctx, unsigned idx)
|
|||
if (ctx.prev_vopd_info.op == aco_opcode::num_opcodes || cur_vopd.op == aco_opcode::num_opcodes)
|
||||
return false;
|
||||
|
||||
if ((ctx.prev_vopd_info.src_banks & cur_vopd.src_banks) ||
|
||||
(ctx.prev_vopd_info.is_opy_only & cur_vopd.is_opy_only) ||
|
||||
(ctx.prev_vopd_info.is_dst_odd == cur_vopd.is_dst_odd)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Both can use a literal, but it must be the same literal. */
|
||||
if (ctx.prev_vopd_info.has_literal && cur_vopd.has_literal &&
|
||||
ctx.prev_vopd_info.literal != cur_vopd.literal)
|
||||
if (!is_vopd_compatible(ctx.prev_vopd_info, cur_vopd))
|
||||
return false;
|
||||
|
||||
assert(first->definitions.size() == 1);
|
||||
|
|
@ -594,12 +627,20 @@ select_instruction_vopd(const SchedILPContext& ctx, bool* use_vopd)
|
|||
}
|
||||
|
||||
void
|
||||
get_vopd_opcode_operands(Instruction* instr, const VOPDInfo& info, aco_opcode* op,
|
||||
get_vopd_opcode_operands(Instruction* instr, const VOPDInfo& info, bool swap, aco_opcode* op,
|
||||
unsigned* num_operands, Operand* operands)
|
||||
{
|
||||
*op = info.op;
|
||||
*num_operands += instr->operands.size();
|
||||
std::copy(instr->operands.begin(), instr->operands.end(), operands);
|
||||
|
||||
if (swap) {
|
||||
if (info.op == aco_opcode::v_dual_sub_f32)
|
||||
*op = aco_opcode::v_dual_subrev_f32;
|
||||
else if (info.op == aco_opcode::v_dual_subrev_f32)
|
||||
*op = aco_opcode::v_dual_sub_f32;
|
||||
std::swap(operands[0], operands[1]);
|
||||
}
|
||||
}
|
||||
|
||||
Instruction*
|
||||
|
|
@ -610,16 +651,24 @@ create_vopd_instruction(const SchedILPContext& ctx, unsigned idx)
|
|||
VOPDInfo x_info = ctx.prev_vopd_info;
|
||||
VOPDInfo y_info = ctx.vopd[idx];
|
||||
|
||||
bool swap_x = false, swap_y = false;
|
||||
if (x_info.src_banks & y_info.src_banks) {
|
||||
assert(x_info.is_commutative || y_info.is_commutative);
|
||||
swap_x = x_info.is_commutative;
|
||||
swap_y = y_info.is_commutative && !swap_x;
|
||||
}
|
||||
|
||||
if (x_info.is_opy_only) {
|
||||
std::swap(x, y);
|
||||
std::swap(x_info, y_info);
|
||||
std::swap(swap_x, swap_y);
|
||||
}
|
||||
|
||||
aco_opcode x_op, y_op;
|
||||
unsigned num_operands = 0;
|
||||
Operand operands[6];
|
||||
get_vopd_opcode_operands(x, x_info, &x_op, &num_operands, operands);
|
||||
get_vopd_opcode_operands(y, y_info, &y_op, &num_operands, operands + num_operands);
|
||||
get_vopd_opcode_operands(x, x_info, swap_x, &x_op, &num_operands, operands);
|
||||
get_vopd_opcode_operands(y, y_info, swap_y, &y_op, &num_operands, operands + num_operands);
|
||||
|
||||
VOPD_instruction* instr =
|
||||
create_instruction<VOPD_instruction>(x_op, Format::VOPD, num_operands, 2);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue