From 911e1ce168b81c5fd23cbedeb84a80d1e2f00cff Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sat, 29 Nov 2025 11:12:13 +0100 Subject: [PATCH] aco/isel: emit exec copy for ballot(true) Once copy propagated in the optimizer, this will allow using nir_opt_uniform_subgroup without too many regressions. Foz-DB Navi48: Totals from 405 (0.41% of 97637) affected shaders: Instrs: 3796716 -> 3796894 (+0.00%); split: -0.00%, +0.00% CodeSize: 20116136 -> 20116652 (+0.00%); split: -0.00%, +0.00% Latency: 18326661 -> 18327114 (+0.00%); split: -0.00%, +0.00% InvThroughput: 3353206 -> 3353268 (+0.00%); split: -0.00%, +0.00% Copies: 292307 -> 293830 (+0.52%) SALU: 507523 -> 507738 (+0.04%) Reviewed-by: Rhys Perry Part-of: --- .../instruction_selection/aco_select_nir_intrinsics.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp index 6b54f0a656d..aaa17c360e9 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp @@ -4210,6 +4210,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) Definition def = dst.size() == bld.lm.size() ? Definition(dst) : bld.def(bld.lm); if (instr->intrinsic == nir_intrinsic_ballot_relaxed) src = bld.copy(def, src); + else if (nir_src_is_const(instr->src[0]) && nir_src_as_uint(instr->src[0])) + src = bld.copy(def, Operand(exec, bld.lm)); else src = bld.sop2(Builder::s_and, def, bld.def(s1, scc), src, Operand(exec, bld.lm)); if (dst.size() != bld.lm.size()) {