aco: allow v_cmpx with DPP

The wording in the RDNA3 ISA doc was since clarified, v_cmpx with DPP
behaves exactly like one would expect:
FI controls whether the source value can be read from inactive lanes,
but inactive lanes always write a 0 bit. The same applies to v_cmp with DPP.

Foz-DB Navi48:
Totals from 987 (1.20% of 82405) affected shaders:
Instrs: 517003 -> 516445 (-0.11%); split: -0.11%, +0.00%
CodeSize: 2782688 -> 2780508 (-0.08%); split: -0.08%, +0.00%
Latency: 2059169 -> 2056327 (-0.14%); split: -0.14%, +0.00%
InvThroughput: 365374 -> 365328 (-0.01%); split: -0.03%, +0.01%
Copies: 64669 -> 65616 (+1.46%)
SALU: 70693 -> 70652 (-0.06%)

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39516>
This commit is contained in:
Georg Lehmann 2026-01-25 15:06:28 +01:00 committed by Marge Bot
parent 1c1bd9d090
commit 2d38da94d4
3 changed files with 2 additions and 11 deletions

View file

@ -489,10 +489,6 @@ can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp
return false;
}
/* According to LLVM, it's unsafe to combine DPP into v_cmpx. */
if (instr->writes_exec())
return false;
return opcode_supports_dpp(gfx_level, instr->opcode, instr->isVOP3P());
}

View file

@ -1078,10 +1078,6 @@ try_optimize_branching_sequence(pr_opt_ctx& ctx, aco_ptr<Instruction>& exec_copy
: aco_opcode::num_opcodes;
const bool vopc = v_cmpx_op != aco_opcode::num_opcodes;
/* V_CMPX+DPP returns 0 with reads from disabled lanes, unlike V_CMP+DPP (RDNA3 ISA doc, 7.7) */
if (vopc && exec_val->isDPP())
return false;
/* If s_and_saveexec is used, we'll need to insert a new instruction to save the old exec. */
bool save_original_exec =
exec_copy->opcode == and_saveexec && !exec_copy->definitions[0].isKill();
@ -1157,7 +1153,7 @@ try_optimize_branching_sequence(pr_opt_ctx& ctx, aco_ptr<Instruction>& exec_copy
if (vopc) {
/* Add one extra definition for exec and copy the VOP3-specific fields if present. */
if (!vcmpx_exec_only) {
if (exec_val->isSDWA()) {
if (exec_val->isSDWA() || exec_val->isDPP()) {
/* This might work but it needs testing and more code to copy the instruction. */
return false;
} else {

View file

@ -492,8 +492,7 @@ BEGIN_TEST(optimizer_postRA.dpp_vcmpx)
Operand a(inputs[0], PhysReg(256));
Operand b(inputs[1], PhysReg(257));
//! v1: %tmp0:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 fi
//! s2: %res0:exec = v_cmpx_lt_f32 %tmp0:v[2], %b:v[1]
//! s2: %res0:exec = v_cmpx_lt_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi
//! p_unit_test 0, %res0:exec
Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
Temp res0 = bld.vopc(aco_opcode::v_cmpx_lt_f32, bld.def(bld.lm, exec), Operand(tmp0, reg_v2), b);