From 6a1caabd64dbf71b4ce4c30e089505fa6256c5ac Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sat, 14 Dec 2024 19:52:25 +0100 Subject: [PATCH] aco/optimizer: use new helpers for v_add_lshl_u32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foz-DB Navi48: Totals from 357 (0.43% of 82419) affected shaders: Instrs: 244419 -> 243608 (-0.33%); split: -0.34%, +0.01% CodeSize: 1302584 -> 1304188 (+0.12%); split: -0.00%, +0.13% VGPRs: 21240 -> 21216 (-0.11%) Latency: 1226165 -> 1225651 (-0.04%); split: -0.06%, +0.02% InvThroughput: 162432 -> 161940 (-0.30%); split: -0.30%, +0.00% Copies: 16607 -> 16610 (+0.02%) PreSGPRs: 14082 -> 14135 (+0.38%) PreVGPRs: 15917 -> 15914 (-0.02%) VALU: 136308 -> 135699 (-0.45%) SALU: 24415 -> 24418 (+0.01%) VOPD: 333 -> 334 (+0.30%) Foz-DB Navi21: Totals from 319 (0.39% of 82387) affected shaders: Instrs: 255434 -> 254831 (-0.24%) CodeSize: 1375792 -> 1378164 (+0.17%) VGPRs: 15360 -> 15344 (-0.10%) Latency: 1405956 -> 1405181 (-0.06%) InvThroughput: 174402 -> 173816 (-0.34%) Copies: 25892 -> 25891 (-0.00%) PreSGPRs: 14129 -> 14132 (+0.02%) PreVGPRs: 12457 -> 12454 (-0.02%) VALU: 139630 -> 139032 (-0.43%) Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_optimizer.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 2713cf5a611..e2faeee42af 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -4594,9 +4594,6 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) instr->opcode == aco_opcode::v_subrev_co_u32 || instr->opcode == aco_opcode::v_subrev_co_u32_e64) { combine_add_sub_b2i(ctx, instr, aco_opcode::v_subbrev_co_u32, 1); - } else if (instr->opcode == aco_opcode::v_lshlrev_b32 && ctx.program->gfx_level >= GFX9) { - combine_three_valu_op(ctx, instr, aco_opcode::v_add_u32, aco_opcode::v_add_lshl_u32, "120", - 2); } else if (instr->opcode == aco_opcode::s_not_b32 || instr->opcode == aco_opcode::s_not_b64) { if (!combine_salu_not_bitwise(ctx, instr)) combine_inverse_comparison(ctx, instr); @@ -4818,6 +4815,8 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) add_opt(s_lshl_b32, s_lshl2_add_u32, 0x3, "102", remove_const_cb<2>); add_opt(s_lshl_b32, s_lshl3_add_u32, 0x3, "102", remove_const_cb<3>); add_opt(s_lshl_b32, s_lshl4_add_u32, 0x3, "102", remove_const_cb<4>); + } else if (info.opcode == aco_opcode::v_lshlrev_b32 && ctx.program->gfx_level >= GFX9) { + add_opt(v_add_u32, v_add_lshl_u32, 0x2, "120", nullptr, true); } if (match_and_apply_patterns(ctx, info, patterns)) {