aco: don't combine DPP into v_cmpx

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25471>
This commit is contained in:
Rhys Perry 2023-09-29 11:36:43 +01:00 committed by Marge Bot
parent ea633c128c
commit 6518d09601
4 changed files with 37 additions and 10 deletions

View file

@ -793,14 +793,6 @@ VALU_writes_sgpr(aco_ptr<Instruction>& instr)
return false;
}
bool
instr_writes_exec(const aco_ptr<Instruction>& instr)
{
return std::any_of(instr->definitions.begin(), instr->definitions.end(),
[](const Definition& def) -> bool
{ return def.physReg() == exec_lo || def.physReg() == exec_hi; });
}
bool
instr_writes_sgpr(const aco_ptr<Instruction>& instr)
{
@ -915,7 +907,7 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>&
if (!instr->isVALU() && instr->reads_exec()) {
ctx.has_nonVALU_exec_read = true;
} else if (instr->isVALU()) {
if (instr_writes_exec(instr)) {
if (instr->writes_exec()) {
ctx.has_nonVALU_exec_read = false;
/* Insert s_waitcnt_depctr instruction with magic imm to mitigate the problem */
@ -1151,7 +1143,7 @@ handle_valu_partial_forwarding_hazard_instr(VALUPartialForwardingHazardGlobalSta
aco_ptr<Instruction>& instr)
{
if (instr->isSALU() && !instr->definitions.empty()) {
if (block_state.state == written_after_exec_write && instr_writes_exec(instr))
if (block_state.state == written_after_exec_write && instr->writes_exec())
block_state.state = exec_written;
} else if (instr->isVALU()) {
bool vgpr_write = false;

View file

@ -399,6 +399,10 @@ can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp
return false;
}
/* According to LLVM, it's unsafe to combine DPP into v_cmpx. */
if (instr->writes_exec())
return false;
/* simpler than listing all VOP3P opcodes which do not support DPP */
if (instr->isVOP3P()) {
return instr->opcode == aco_opcode::v_fma_mix_f32 ||

View file

@ -1058,6 +1058,15 @@ struct Instruction {
return false;
}
constexpr bool writes_exec() const noexcept
{
for (const Definition& def : definitions) {
if (def.isFixed() && (def.physReg() == exec_lo || def.physReg() == exec_hi))
return true;
}
return false;
}
Pseudo_instruction& pseudo() noexcept
{
assert(isPseudo());

View file

@ -513,6 +513,28 @@ BEGIN_TEST(optimizer_postRA.dpp_across_exec)
}
END_TEST
BEGIN_TEST(optimizer_postRA.dpp_vcmpx)
//>> v1: %a:v[0], v1: %b:v[1] = p_startpgm
if (!setup_cs("v1 v1", GFX11))
return;
bld.instructions->at(0)->definitions[0].setFixed(PhysReg(256));
bld.instructions->at(0)->definitions[1].setFixed(PhysReg(257));
PhysReg reg_v2(258);
Operand a(inputs[0], PhysReg(256));
Operand b(inputs[1], PhysReg(257));
//! v1: %tmp0:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
//! s2: %res0:exec = v_cmpx_lt_f32 %tmp0:v[2], %b:v[1]
//! p_unit_test 0, %res0:exec
Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
Temp res0 = bld.vopc(aco_opcode::v_cmpx_lt_f32, bld.def(bld.lm, exec), Operand(tmp0, reg_v2), b);
writeout(0, Operand(res0, exec));
finish_optimizer_postRA_test();
END_TEST
BEGIN_TEST(optimizer_postRA.dpp_across_cf)
//>> v1: %a:v[0], v1: %b:v[1], v1: %c:v[2], v1: %d:v[3], s2: %e:s[0-1] = p_startpgm
if (!setup_cs("v1 v1 v1 v1 s2", GFX10_3))