mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
aco: don't combine DPP into v_cmpx
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25471>
This commit is contained in:
parent
ea633c128c
commit
6518d09601
4 changed files with 37 additions and 10 deletions
|
|
@ -793,14 +793,6 @@ VALU_writes_sgpr(aco_ptr<Instruction>& instr)
|
|||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
instr_writes_exec(const aco_ptr<Instruction>& instr)
|
||||
{
|
||||
return std::any_of(instr->definitions.begin(), instr->definitions.end(),
|
||||
[](const Definition& def) -> bool
|
||||
{ return def.physReg() == exec_lo || def.physReg() == exec_hi; });
|
||||
}
|
||||
|
||||
bool
|
||||
instr_writes_sgpr(const aco_ptr<Instruction>& instr)
|
||||
{
|
||||
|
|
@ -915,7 +907,7 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>&
|
|||
if (!instr->isVALU() && instr->reads_exec()) {
|
||||
ctx.has_nonVALU_exec_read = true;
|
||||
} else if (instr->isVALU()) {
|
||||
if (instr_writes_exec(instr)) {
|
||||
if (instr->writes_exec()) {
|
||||
ctx.has_nonVALU_exec_read = false;
|
||||
|
||||
/* Insert s_waitcnt_depctr instruction with magic imm to mitigate the problem */
|
||||
|
|
@ -1151,7 +1143,7 @@ handle_valu_partial_forwarding_hazard_instr(VALUPartialForwardingHazardGlobalSta
|
|||
aco_ptr<Instruction>& instr)
|
||||
{
|
||||
if (instr->isSALU() && !instr->definitions.empty()) {
|
||||
if (block_state.state == written_after_exec_write && instr_writes_exec(instr))
|
||||
if (block_state.state == written_after_exec_write && instr->writes_exec())
|
||||
block_state.state = exec_written;
|
||||
} else if (instr->isVALU()) {
|
||||
bool vgpr_write = false;
|
||||
|
|
|
|||
|
|
@ -399,6 +399,10 @@ can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp
|
|||
return false;
|
||||
}
|
||||
|
||||
/* According to LLVM, it's unsafe to combine DPP into v_cmpx. */
|
||||
if (instr->writes_exec())
|
||||
return false;
|
||||
|
||||
/* simpler than listing all VOP3P opcodes which do not support DPP */
|
||||
if (instr->isVOP3P()) {
|
||||
return instr->opcode == aco_opcode::v_fma_mix_f32 ||
|
||||
|
|
|
|||
|
|
@ -1058,6 +1058,15 @@ struct Instruction {
|
|||
return false;
|
||||
}
|
||||
|
||||
constexpr bool writes_exec() const noexcept
|
||||
{
|
||||
for (const Definition& def : definitions) {
|
||||
if (def.isFixed() && (def.physReg() == exec_lo || def.physReg() == exec_hi))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
Pseudo_instruction& pseudo() noexcept
|
||||
{
|
||||
assert(isPseudo());
|
||||
|
|
|
|||
|
|
@ -513,6 +513,28 @@ BEGIN_TEST(optimizer_postRA.dpp_across_exec)
|
|||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(optimizer_postRA.dpp_vcmpx)
|
||||
//>> v1: %a:v[0], v1: %b:v[1] = p_startpgm
|
||||
if (!setup_cs("v1 v1", GFX11))
|
||||
return;
|
||||
|
||||
bld.instructions->at(0)->definitions[0].setFixed(PhysReg(256));
|
||||
bld.instructions->at(0)->definitions[1].setFixed(PhysReg(257));
|
||||
|
||||
PhysReg reg_v2(258);
|
||||
Operand a(inputs[0], PhysReg(256));
|
||||
Operand b(inputs[1], PhysReg(257));
|
||||
|
||||
//! v1: %tmp0:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
|
||||
//! s2: %res0:exec = v_cmpx_lt_f32 %tmp0:v[2], %b:v[1]
|
||||
//! p_unit_test 0, %res0:exec
|
||||
Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
|
||||
Temp res0 = bld.vopc(aco_opcode::v_cmpx_lt_f32, bld.def(bld.lm, exec), Operand(tmp0, reg_v2), b);
|
||||
writeout(0, Operand(res0, exec));
|
||||
|
||||
finish_optimizer_postRA_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(optimizer_postRA.dpp_across_cf)
|
||||
//>> v1: %a:v[0], v1: %b:v[1], v1: %c:v[2], v1: %d:v[3], s2: %e:s[0-1] = p_startpgm
|
||||
if (!setup_cs("v1 v1 v1 v1 s2", GFX10_3))
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue