aco: don't apply modifiers through DPP to unsupported instructions

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21201>
This commit is contained in:
Rhys Perry 2023-02-08 16:37:44 +00:00 committed by Marge Bot
parent 3bd5b583f9
commit ab3184c0a2
4 changed files with 77 additions and 29 deletions

View file

@ -4880,38 +4880,49 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
continue;
ssa_info info = ctx.info[instr->operands[i].tempId()];
if (!info.is_dpp() || info.instr->pass_flags != instr->pass_flags)
continue;
aco_opcode swapped_op;
if (info.is_dpp() && info.instr->pass_flags == instr->pass_flags &&
(i == 0 || can_swap_operands(instr, &swapped_op)) &&
can_use_DPP(instr, true, info.is_dpp8()) && !instr->isDPP()) {
bool dpp8 = info.is_dpp8();
convert_to_DPP(instr, dpp8);
if (dpp8) {
DPP8_instruction* dpp = &instr->dpp8();
for (unsigned j = 0; j < 8; ++j)
dpp->lane_sel[j] = info.instr->dpp8().lane_sel[j];
if (i) {
instr->opcode = swapped_op;
std::swap(instr->operands[0], instr->operands[1]);
}
} else {
DPP16_instruction* dpp = &instr->dpp16();
if (i) {
instr->opcode = swapped_op;
std::swap(instr->operands[0], instr->operands[1]);
std::swap(dpp->neg[0], dpp->neg[1]);
std::swap(dpp->abs[0], dpp->abs[1]);
}
dpp->dpp_ctrl = info.instr->dpp16().dpp_ctrl;
dpp->bound_ctrl = info.instr->dpp16().bound_ctrl;
dpp->neg[0] ^= info.instr->dpp16().neg[0] && !dpp->abs[0];
dpp->abs[0] |= info.instr->dpp16().abs[0];
if (i != 0 && !can_swap_operands(instr, &swapped_op))
continue;
if (instr->isDPP() || !can_use_DPP(instr, true, info.is_dpp8()))
continue;
bool dpp8 = info.is_dpp8();
bool input_mods = instr_info.can_use_input_modifiers[(int)instr->opcode] &&
instr_info.operand_size[(int)instr->opcode] == 32;
if (!dpp8 && (info.instr->dpp16().neg[0] || info.instr->dpp16().abs[0]) && !input_mods)
continue;
convert_to_DPP(instr, dpp8);
if (dpp8) {
DPP8_instruction* dpp = &instr->dpp8();
for (unsigned j = 0; j < 8; ++j)
dpp->lane_sel[j] = info.instr->dpp8().lane_sel[j];
if (i) {
instr->opcode = swapped_op;
std::swap(instr->operands[0], instr->operands[1]);
}
if (--ctx.uses[info.instr->definitions[0].tempId()])
ctx.uses[info.instr->operands[0].tempId()]++;
instr->operands[0].setTemp(info.instr->operands[0].getTemp());
break;
} else {
DPP16_instruction* dpp = &instr->dpp16();
if (i) {
instr->opcode = swapped_op;
std::swap(instr->operands[0], instr->operands[1]);
std::swap(dpp->neg[0], dpp->neg[1]);
std::swap(dpp->abs[0], dpp->abs[1]);
}
dpp->dpp_ctrl = info.instr->dpp16().dpp_ctrl;
dpp->bound_ctrl = info.instr->dpp16().bound_ctrl;
dpp->neg[0] ^= info.instr->dpp16().neg[0] && !dpp->abs[0];
dpp->abs[0] |= info.instr->dpp16().abs[0];
}
if (--ctx.uses[info.instr->definitions[0].tempId()])
ctx.uses[info.instr->operands[0].tempId()]++;
instr->operands[0].setTemp(info.instr->operands[0].getTemp());
break;
}
}

View file

@ -511,6 +511,11 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (i && !can_swap_operands(instr, &instr->opcode))
continue;
bool input_mods = instr_info.can_use_input_modifiers[(int)instr->opcode] &&
instr_info.operand_size[(int)instr->opcode] == 32;
if (!dpp8 && (mov->dpp16().neg[0] || mov->dpp16().abs[0]) && !input_mods)
continue;
if (!dpp8) /* anything else doesn't make sense in SSA */
assert(mov->dpp16().row_mask == 0xf && mov->dpp16().bank_mask == 0xf);

View file

@ -1064,6 +1064,22 @@ BEGIN_TEST(optimizer.dpp)
res7->vop3().abs[0] = true;
writeout(7, res7);
//! v1: %tmp11 = v_mov_b32 -%a row_mirror bound_ctrl:1
//! v1: %res11 = v_add_u32 %tmp11, %b
//! p_unit_test 11, %res11
auto tmp11 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
tmp11->dpp16().neg[0] = true;
Temp res11 = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), tmp11, b);
writeout(11, res11);
//! v1: %tmp12 = v_mov_b32 -%a row_mirror bound_ctrl:1
//! v1: %res12 = v_add_f16 %tmp12, %b
//! p_unit_test 12, %res12
auto tmp12 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
tmp12->dpp16().neg[0] = true;
Temp res12 = bld.vop2(aco_opcode::v_add_f16, bld.def(v1), tmp12, b);
writeout(12, res12);
/* vcc */
//! v1: %res8 = v_cndmask_b32 %a, %b, %c:vcc row_mirror bound_ctrl:1
//! p_unit_test 8, %res8

View file

@ -409,6 +409,22 @@ BEGIN_TEST(optimizer_postRA.dpp)
res7->vop3().abs[0] = true;
writeout(7, Operand(res7, reg_v2));
//! v1: %tmp12:v[2] = v_mov_b32 -%a:v[0] row_mirror bound_ctrl:1
//! v1: %res12:v[2] = v_add_u32 %tmp12:v[2], %b:v[1]
//! p_unit_test 12, %res12:v[2]
auto tmp12 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
tmp12->dpp16().neg[0] = true;
Temp res12 = bld.vop2(aco_opcode::v_add_u32, bld.def(v1, reg_v2), Operand(tmp12, reg_v2), b);
writeout(12, Operand(res12, reg_v2));
//! v1: %tmp13:v[2] = v_mov_b32 -%a:v[0] row_mirror bound_ctrl:1
//! v1: %res13:v[2] = v_add_f16 %tmp13:v[2], %b:v[1]
//! p_unit_test 13, %res13:v[2]
auto tmp13 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
tmp13->dpp16().neg[0] = true;
Temp res13 = bld.vop2(aco_opcode::v_add_f16, bld.def(v1, reg_v2), Operand(tmp13, reg_v2), b);
writeout(13, Operand(res13, reg_v2));
/* vcc */
//! v1: %res8:v[2] = v_cndmask_b32 %a:v[0], %b:v[1], %c:vcc row_mirror bound_ctrl:1
//! p_unit_test 8, %res8:v[2]