From 0478021fdcc6dbd552817e6036a47660e34a9615 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sun, 30 Nov 2025 14:57:14 +0100 Subject: [PATCH] aco/optimizer: reassociate rcp(mul(a, const)) into rcp_omod(a) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foz-DB Navi48: Totals from 2484 (2.54% of 97637) affected shaders: Instrs: 10368279 -> 10361892 (-0.06%); split: -0.06%, +0.00% CodeSize: 55161104 -> 55150752 (-0.02%); split: -0.02%, +0.00% SpillSGPRs: 14665 -> 14666 (+0.01%) Latency: 87694014 -> 87689324 (-0.01%); split: -0.01%, +0.00% InvThroughput: 16595764 -> 16594448 (-0.01%); split: -0.01%, +0.00% VClause: 209922 -> 209918 (-0.00%); split: -0.01%, +0.00% SClause: 205195 -> 205251 (+0.03%); split: -0.01%, +0.04% Copies: 843771 -> 843765 (-0.00%); split: -0.01%, +0.01% Branches: 275985 -> 275962 (-0.01%); split: -0.01%, +0.00% PreVGPRs: 170608 -> 170494 (-0.07%) VALU: 5840893 -> 5838038 (-0.05%); split: -0.05%, +0.00% SALU: 1481388 -> 1479037 (-0.16%); split: -0.16%, +0.00% VOPD: 7496 -> 7485 (-0.15%) Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_optimizer.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 0488eff8bef..224f8bcce39 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -4372,6 +4372,22 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) add_opt(v_mul_f64, v_mul_f64, 0x3, "120", reassoc_omod_cb, true); } else if (info.opcode == aco_opcode::v_mul_f64_e64 && can_reassoc_omod(ctx, info, 64)) { add_opt(v_mul_f64_e64, v_mul_f64_e64, 0x3, "120", reassoc_omod_cb, true); + } else if (info.opcode == aco_opcode::v_rcp_f32 && can_reassoc_omod(ctx, info, 32)) { + add_opt(v_mul_f32, v_rcp_f32, 0x1, "01", reassoc_omod_cb); + add_opt(v_mul_legacy_f32, v_rcp_f32, 0x1, "01", reassoc_omod_cb); + add_opt(s_mul_f32, v_rcp_f32, 0x1, "01", reassoc_omod_cb); + } else if (info.opcode == aco_opcode::v_s_rcp_f32 && can_reassoc_omod(ctx, info, 32)) { + add_opt(s_mul_f32, v_s_rcp_f32, 0x1, "01", reassoc_omod_cb); + } else if (info.opcode == aco_opcode::v_rcp_f16 && can_reassoc_omod(ctx, info, 16)) { + add_opt(v_mul_f16, v_rcp_f16, 0x1, "01", reassoc_omod_cb); + add_opt(s_mul_f16, v_rcp_f16, 0x1, "01", reassoc_omod_cb); + } else if (info.opcode == aco_opcode::v_s_rcp_f16 && can_reassoc_omod(ctx, info, 16)) { + add_opt(s_mul_f16, v_s_rcp_f16, 0x1, "01", reassoc_omod_cb); + } else if (info.opcode == aco_opcode::v_rcp_f64 && can_reassoc_omod(ctx, info, 64)) { + if (ctx.program->gfx_level < GFX12) + add_opt(v_mul_f64_e64, v_rcp_f64, 0x1, "01", reassoc_omod_cb); + else + add_opt(v_mul_f64, v_rcp_f64, 0x1, "01", reassoc_omod_cb); } else if (info.opcode == aco_opcode::v_add_u16 && !info.clamp) { if (ctx.program->gfx_level < GFX9) { add_opt(v_mul_lo_u16, v_mad_legacy_u16, 0x3, "120");