aco/optimizer: use cndmask for neg(b2i)

Foz-DB Navi48:
Totals from 1310 (1.59% of 82419) affected shaders:
Instrs: 1337622 -> 1338677 (+0.08%); split: -0.00%, +0.08%
CodeSize: 7039828 -> 7043996 (+0.06%); split: -0.00%, +0.06%
Latency: 7783135 -> 7782526 (-0.01%); split: -0.01%, +0.00%
InvThroughput: 1587987 -> 1586644 (-0.08%)
Branches: 24320 -> 24318 (-0.01%)

Foz-DB Navi21:
Totals from 334 (0.41% of 82387) affected shaders:
Instrs: 666102 -> 666094 (-0.00%)
CodeSize: 3599748 -> 3599724 (-0.00%)
Latency: 6873870 -> 6873868 (-0.00%); split: -0.00%, +0.00%
InvThroughput: 2151773 -> 2151780 (+0.00%); split: -0.00%, +0.00%
Branches: 17419 -> 17411 (-0.05%)

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38530>
This commit is contained in:
Georg Lehmann 2025-03-01 11:26:25 +01:00 committed by Marge Bot
parent 0e4d4aeef7
commit 92dbf42379

View file

@ -4469,6 +4469,10 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
}
} else if (info.opcode == aco_opcode::v_sub_u32 && !info.clamp) {
assert(ctx.program->gfx_level >= GFX9);
/* v_sub_u32(0, v_cndmask_b32(0, 1, cond)) -> v_cndmask_b32(0, -1, cond) */
add_opt(v_cndmask_b32, v_cndmask_b32, 0x2, "0312",
and_cb<and_cb<and_cb<check_const_cb<0, 0>, remove_const_cb<1>>, remove_const_cb<0>>,
insert_const_cb<1, UINT32_MAX>>);
/* v_sub_u32(a, v_cndmask_b32(0, 1, cond)) -> v_subb_co_u32(a, 0, cond) */
add_opt(v_cndmask_b32, v_subb_co_u32, 0x2, "0132",
and_cb<and_cb<check_const_cb<1, 0>, remove_const_cb<1>>, add_lm_def_cb>);
@ -4479,6 +4483,13 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
} else if ((info.opcode == aco_opcode::v_sub_co_u32 ||
info.opcode == aco_opcode::v_sub_co_u32_e64) &&
!info.clamp) {
/* v_sub_co_u32(0, v_cndmask_b32(0, 1, cond)) -> v_cndmask_b32(0, -1, cond) */
if (ctx.uses[info.defs[1].tempId()] == 0) {
add_opt(
v_cndmask_b32, v_cndmask_b32, 0x2, "0312",
and_cb<and_cb<and_cb<check_const_cb<0, 0>, remove_const_cb<1>>, remove_const_cb<0>>,
and_cb<insert_const_cb<1, UINT32_MAX>, pop_def_cb>>);
}
/* v_sub_co_u32(a, v_cndmask_b32(0, 1, cond)) -> v_subb_co_u32(a, 0, cond) */
add_opt(v_cndmask_b32, v_subb_co_u32, 0x2, "0132",
and_cb<check_const_cb<1, 0>, remove_const_cb<1>>);