mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-18 10:48:21 +02:00
aco: allow v_cmpx with DPP
The wording in the RDNA3 ISA doc was since clarified, v_cmpx with DPP behaves exactly like one would expect: FI controls whether the source value can be read from inactive lanes, but inactive lanes always write a 0 bit. The same applies to v_cmp with DPP. Foz-DB Navi48: Totals from 987 (1.20% of 82405) affected shaders: Instrs: 517003 -> 516445 (-0.11%); split: -0.11%, +0.00% CodeSize: 2782688 -> 2780508 (-0.08%); split: -0.08%, +0.00% Latency: 2059169 -> 2056327 (-0.14%); split: -0.14%, +0.00% InvThroughput: 365374 -> 365328 (-0.01%); split: -0.03%, +0.01% Copies: 64669 -> 65616 (+1.46%) SALU: 70693 -> 70652 (-0.06%) Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39516>
This commit is contained in:
parent
1c1bd9d090
commit
2d38da94d4
3 changed files with 2 additions and 11 deletions
|
|
@ -489,10 +489,6 @@ can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp
|
|||
return false;
|
||||
}
|
||||
|
||||
/* According to LLVM, it's unsafe to combine DPP into v_cmpx. */
|
||||
if (instr->writes_exec())
|
||||
return false;
|
||||
|
||||
return opcode_supports_dpp(gfx_level, instr->opcode, instr->isVOP3P());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1078,10 +1078,6 @@ try_optimize_branching_sequence(pr_opt_ctx& ctx, aco_ptr<Instruction>& exec_copy
|
|||
: aco_opcode::num_opcodes;
|
||||
const bool vopc = v_cmpx_op != aco_opcode::num_opcodes;
|
||||
|
||||
/* V_CMPX+DPP returns 0 with reads from disabled lanes, unlike V_CMP+DPP (RDNA3 ISA doc, 7.7) */
|
||||
if (vopc && exec_val->isDPP())
|
||||
return false;
|
||||
|
||||
/* If s_and_saveexec is used, we'll need to insert a new instruction to save the old exec. */
|
||||
bool save_original_exec =
|
||||
exec_copy->opcode == and_saveexec && !exec_copy->definitions[0].isKill();
|
||||
|
|
@ -1157,7 +1153,7 @@ try_optimize_branching_sequence(pr_opt_ctx& ctx, aco_ptr<Instruction>& exec_copy
|
|||
if (vopc) {
|
||||
/* Add one extra definition for exec and copy the VOP3-specific fields if present. */
|
||||
if (!vcmpx_exec_only) {
|
||||
if (exec_val->isSDWA()) {
|
||||
if (exec_val->isSDWA() || exec_val->isDPP()) {
|
||||
/* This might work but it needs testing and more code to copy the instruction. */
|
||||
return false;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -492,8 +492,7 @@ BEGIN_TEST(optimizer_postRA.dpp_vcmpx)
|
|||
Operand a(inputs[0], PhysReg(256));
|
||||
Operand b(inputs[1], PhysReg(257));
|
||||
|
||||
//! v1: %tmp0:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 fi
|
||||
//! s2: %res0:exec = v_cmpx_lt_f32 %tmp0:v[2], %b:v[1]
|
||||
//! s2: %res0:exec = v_cmpx_lt_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi
|
||||
//! p_unit_test 0, %res0:exec
|
||||
Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
|
||||
Temp res0 = bld.vopc(aco_opcode::v_cmpx_lt_f32, bld.def(bld.lm, exec), Operand(tmp0, reg_v2), b);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue