From 004f8aa2f43068223406a0edff23698ff70ee0aa Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sat, 12 Jul 2025 00:00:37 +0200 Subject: [PATCH] aco: optimize get_alu_src with constant source and size > 1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Emulated FSR4, Navi31: Totals from 14 (100.00% of 14) affected shaders: MaxWaves: 130 -> 131 (+0.77%) Instrs: 67887 -> 67470 (-0.61%); split: -0.70%, +0.09% CodeSize: 464428 -> 461668 (-0.59%); split: -0.67%, +0.07% VGPRs: 2544 -> 2520 (-0.94%) SpillVGPRs: 92 -> 89 (-3.26%) Latency: 256823 -> 257574 (+0.29%); split: -0.37%, +0.66% InvThroughput: 253895 -> 252929 (-0.38%); split: -0.40%, +0.02% VClause: 997 -> 984 (-1.30%); split: -2.11%, +0.80% Copies: 4501 -> 3788 (-15.84%); split: -17.35%, +1.51% PreSGPRs: 504 -> 519 (+2.98%) PreVGPRs: 2460 -> 2448 (-0.49%) VALU: 57202 -> 56726 (-0.83%); split: -0.88%, +0.05% SALU: 1231 -> 1384 (+12.43%) VMEM: 3807 -> 3801 (-0.16%) VOPD: 2693 -> 2303 (-14.48%); split: +1.19%, -15.67% Reviewed-by: Daniel Schürmann Part-of: --- .../instruction_selection/aco_select_nir_alu.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp index 5050d05a703..27b50518f83 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp @@ -64,6 +64,16 @@ get_alu_src(struct isel_context* ctx, nir_alu_src src, unsigned size = 1) if (src.src.ssa->num_components == 1 && size == 1) return get_ssa_temp(ctx, src.src.ssa); + if (nir_src_is_const(src.src) && src.src.ssa->num_components == 1 && + (size * src.src.ssa->bit_size) <= 32) { + uint32_t val = 0; + for (unsigned i = 0; i < size; i++) { + val |= nir_src_as_uint(src.src) << (i * src.src.ssa->bit_size); + } + Builder bld(ctx->program, ctx->block); + return bld.copy(bld.def(s1), Operand::c32(val)); + } + Temp vec = get_ssa_temp(ctx, src.src.ssa); unsigned elem_size = src.src.ssa->bit_size / 8u; bool identity_swizzle = true;