diff --git a/.pick_status.json b/.pick_status.json index 4fc172f3d13..bea8332b615 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -688,7 +688,7 @@ "description": "aco: Don't use vcmpx with DPP.", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "baab6f18c91166b275c339027dcd87ce57795cd5" }, diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp index 98a00b050c0..9a1a3dcf590 100644 --- a/src/amd/compiler/aco_ssa_elimination.cpp +++ b/src/amd/compiler/aco_ssa_elimination.cpp @@ -363,6 +363,10 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in exec_val->isVOPC() ? get_vcmpx(exec_val->opcode) : aco_opcode::num_opcodes; const bool vopc = v_cmpx_op != aco_opcode::num_opcodes; + /* V_CMPX+DPP returns 0 with reads from disabled lanes, unlike V_CMP+DPP (RDNA3 ISA doc, 7.7) */ + if (vopc && exec_val->isDPP()) + return; + /* If s_and_saveexec is used, we'll need to insert a new instruction to save the old exec. */ bool save_original_exec = exec_copy->opcode == and_saveexec; @@ -452,11 +456,10 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in if (vopc) { /* Add one extra definition for exec and copy the VOP3-specific fields if present. */ if (!vcmpx_exec_only) { - if (exec_val->isSDWA() || exec_val->isDPP()) { + if (exec_val->isSDWA()) { /* This might work but it needs testing and more code to copy the instruction. */ return; - } - else if (!exec_val->isVOP3()) { + } else if (!exec_val->isVOP3()) { aco_ptr tmp = std::move(exec_val); exec_val.reset(create_instruction( tmp->opcode, tmp->format, tmp->operands.size(), tmp->definitions.size() + 1));