From d9919c3e101545ebd85b0307f190f5d854e4fcfc Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sat, 14 Dec 2024 19:25:22 +0100 Subject: [PATCH] aco/optimizer: optimize add(mad_u32_u16(a, b, 0), c) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foz-DB Navi48: Totals from 104 (0.13% of 82419) affected shaders: Instrs: 3554243 -> 3553555 (-0.02%); split: -0.02%, +0.00% CodeSize: 18836004 -> 18830572 (-0.03%); split: -0.03%, +0.00% Latency: 19288034 -> 19287208 (-0.00%); split: -0.01%, +0.00% InvThroughput: 3527510 -> 3526925 (-0.02%); split: -0.02%, +0.00% VClause: 89526 -> 89522 (-0.00%); split: -0.02%, +0.01% SClause: 62484 -> 62492 (+0.01%); split: -0.00%, +0.01% Copies: 266415 -> 266404 (-0.00%); split: -0.04%, +0.03% Branches: 102123 -> 102125 (+0.00%) VALU: 1987067 -> 1986531 (-0.03%); split: -0.03%, +0.00% SALU: 471348 -> 471346 (-0.00%); split: -0.00%, +0.00% Foz-DB Navi21: Totals from 228 (0.28% of 82387) affected shaders: Instrs: 3069693 -> 3068317 (-0.04%); split: -0.05%, +0.00% CodeSize: 16582476 -> 16574920 (-0.05%); split: -0.05%, +0.00% Latency: 20038755 -> 20030986 (-0.04%); split: -0.04%, +0.00% InvThroughput: 4742546 -> 4738245 (-0.09%); split: -0.10%, +0.00% VClause: 93157 -> 93135 (-0.02%); split: -0.03%, +0.01% Copies: 265019 -> 264959 (-0.02%); split: -0.04%, +0.02% VALU: 2025352 -> 2023897 (-0.07%); split: -0.07%, +0.00% SALU: 447385 -> 447375 (-0.00%); split: -0.00%, +0.00% Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_optimizer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 32e6b16058c..947a39c1d12 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -4849,6 +4849,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) } else if (info.opcode == aco_opcode::v_add_u32 && !info.clamp) { assert(ctx.program->gfx_level >= GFX9); add_opt(v_bcnt_u32_b32, v_bcnt_u32_b32, 0x3, "102", remove_const_cb<0>, true); + add_opt(v_mad_u32_u16, v_mad_u32_u16, 0x3, "1203", remove_const_cb<0>, true); add_opt(v_mul_u32_u24, v_mad_u32_u24, 0x3, "120", nullptr, true); add_opt(v_mul_i32_i24, v_mad_i32_i24, 0x3, "120", nullptr, true); add_opt(v_xor_b32, v_xad_u32, 0x3, "120", nullptr, true);