diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 3885f1bb315..88e5a37188e 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -489,10 +489,6 @@ can_use_DPP(amd_gfx_level gfx_level, const aco_ptr& instr, bool dpp return false; } - /* According to LLVM, it's unsafe to combine DPP into v_cmpx. */ - if (instr->writes_exec()) - return false; - return opcode_supports_dpp(gfx_level, instr->opcode, instr->isVOP3P()); } diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp b/src/amd/compiler/aco_optimizer_postRA.cpp index 77d04c6ccd4..690f3a19a2f 100644 --- a/src/amd/compiler/aco_optimizer_postRA.cpp +++ b/src/amd/compiler/aco_optimizer_postRA.cpp @@ -1078,10 +1078,6 @@ try_optimize_branching_sequence(pr_opt_ctx& ctx, aco_ptr& exec_copy : aco_opcode::num_opcodes; const bool vopc = v_cmpx_op != aco_opcode::num_opcodes; - /* V_CMPX+DPP returns 0 with reads from disabled lanes, unlike V_CMP+DPP (RDNA3 ISA doc, 7.7) */ - if (vopc && exec_val->isDPP()) - return false; - /* If s_and_saveexec is used, we'll need to insert a new instruction to save the old exec. */ bool save_original_exec = exec_copy->opcode == and_saveexec && !exec_copy->definitions[0].isKill(); @@ -1157,7 +1153,7 @@ try_optimize_branching_sequence(pr_opt_ctx& ctx, aco_ptr& exec_copy if (vopc) { /* Add one extra definition for exec and copy the VOP3-specific fields if present. */ if (!vcmpx_exec_only) { - if (exec_val->isSDWA()) { + if (exec_val->isSDWA() || exec_val->isDPP()) { /* This might work but it needs testing and more code to copy the instruction. */ return false; } else { diff --git a/src/amd/compiler/tests/test_optimizer_postRA.cpp b/src/amd/compiler/tests/test_optimizer_postRA.cpp index f605a72a769..f924a7e2421 100644 --- a/src/amd/compiler/tests/test_optimizer_postRA.cpp +++ b/src/amd/compiler/tests/test_optimizer_postRA.cpp @@ -492,8 +492,7 @@ BEGIN_TEST(optimizer_postRA.dpp_vcmpx) Operand a(inputs[0], PhysReg(256)); Operand b(inputs[1], PhysReg(257)); - //! v1: %tmp0:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 fi - //! s2: %res0:exec = v_cmpx_lt_f32 %tmp0:v[2], %b:v[1] + //! s2: %res0:exec = v_cmpx_lt_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi //! p_unit_test 0, %res0:exec Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); Temp res0 = bld.vopc(aco_opcode::v_cmpx_lt_f32, bld.def(bld.lm, exec), Operand(tmp0, reg_v2), b);