From 92dbf42379aa654735387012ddbee2af0ab118ea Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sat, 1 Mar 2025 11:26:25 +0100 Subject: [PATCH] aco/optimizer: use cndmask for neg(b2i) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foz-DB Navi48: Totals from 1310 (1.59% of 82419) affected shaders: Instrs: 1337622 -> 1338677 (+0.08%); split: -0.00%, +0.08% CodeSize: 7039828 -> 7043996 (+0.06%); split: -0.00%, +0.06% Latency: 7783135 -> 7782526 (-0.01%); split: -0.01%, +0.00% InvThroughput: 1587987 -> 1586644 (-0.08%) Branches: 24320 -> 24318 (-0.01%) Foz-DB Navi21: Totals from 334 (0.41% of 82387) affected shaders: Instrs: 666102 -> 666094 (-0.00%) CodeSize: 3599748 -> 3599724 (-0.00%) Latency: 6873870 -> 6873868 (-0.00%); split: -0.00%, +0.00% InvThroughput: 2151773 -> 2151780 (+0.00%); split: -0.00%, +0.00% Branches: 17419 -> 17411 (-0.05%) Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_optimizer.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index f1991b6e7fe..a908db8510c 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -4469,6 +4469,10 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) } } else if (info.opcode == aco_opcode::v_sub_u32 && !info.clamp) { assert(ctx.program->gfx_level >= GFX9); + /* v_sub_u32(0, v_cndmask_b32(0, 1, cond)) -> v_cndmask_b32(0, -1, cond) */ + add_opt(v_cndmask_b32, v_cndmask_b32, 0x2, "0312", + and_cb, remove_const_cb<1>>, remove_const_cb<0>>, + insert_const_cb<1, UINT32_MAX>>); /* v_sub_u32(a, v_cndmask_b32(0, 1, cond)) -> v_subb_co_u32(a, 0, cond) */ add_opt(v_cndmask_b32, v_subb_co_u32, 0x2, "0132", and_cb, remove_const_cb<1>>, add_lm_def_cb>); @@ -4479,6 +4483,13 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) } else if ((info.opcode == aco_opcode::v_sub_co_u32 || info.opcode == aco_opcode::v_sub_co_u32_e64) && !info.clamp) { + /* v_sub_co_u32(0, v_cndmask_b32(0, 1, cond)) -> v_cndmask_b32(0, -1, cond) */ + if (ctx.uses[info.defs[1].tempId()] == 0) { + add_opt( + v_cndmask_b32, v_cndmask_b32, 0x2, "0312", + and_cb, remove_const_cb<1>>, remove_const_cb<0>>, + and_cb, pop_def_cb>>); + } /* v_sub_co_u32(a, v_cndmask_b32(0, 1, cond)) -> v_subb_co_u32(a, 0, cond) */ add_opt(v_cndmask_b32, v_subb_co_u32, 0x2, "0132", and_cb, remove_const_cb<1>>);