From 809fb0fba3ad6ad01ecd0c3df5f5cc99c3e1b224 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sat, 13 Dec 2025 14:31:23 +0100 Subject: [PATCH] ac/nir/lower_ps_late: emit scalar f2f16_rtz for when one half of a packed export is undef MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foz-DB Navi48: Totals from 7200 (8.74% of 82405) affected shaders: Instrs: 9056391 -> 9048177 (-0.09%); split: -0.09%, +0.00% CodeSize: 48681288 -> 48640684 (-0.08%); split: -0.09%, +0.00% VGPRs: 413088 -> 413784 (+0.17%) Latency: 76340711 -> 76320080 (-0.03%); split: -0.03%, +0.00% InvThroughput: 12692959 -> 12684618 (-0.07%); split: -0.07%, +0.00% VClause: 148823 -> 148821 (-0.00%) Copies: 601739 -> 601874 (+0.02%); split: -0.01%, +0.03% VALU: 5213356 -> 5207253 (-0.12%); split: -0.12%, +0.00% SALU: 1160815 -> 1160817 (+0.00%); split: -0.00%, +0.00% VOPD: 79520 -> 79444 (-0.10%); split: +0.09%, -0.18% Reviewed-by: Marek Olšák Acked-by: Daniel Schürmann Part-of: --- src/amd/common/nir/ac_nir_lower_ps_late.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/amd/common/nir/ac_nir_lower_ps_late.c b/src/amd/common/nir/ac_nir_lower_ps_late.c index 9f543e6c4e3..fa6f4f83b14 100644 --- a/src/amd/common/nir/ac_nir_lower_ps_late.c +++ b/src/amd/common/nir/ac_nir_lower_ps_late.c @@ -481,14 +481,20 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, unsigned output_index, u if (!lo && !hi) continue; - lo = lo ? lo : nir_undef(b, 1, type_size); - hi = hi ? hi : nir_undef(b, 1, type_size); - - if (nir_op_infos[pack_op].num_inputs == 2) { - outputs[i] = nir_build_alu2(b, pack_op, lo, hi); + if (pack_op == nir_op_pack_half_2x16_rtz_split && (!lo || !hi)) { + lo = lo ? nir_f2f16_rtz(b, lo) : nir_undef(b, 1, 16); + hi = hi ? nir_f2f16_rtz(b, hi) : nir_undef(b, 1, 16); + outputs[i] = nir_pack_32_2x16_split(b, lo, hi); } else { - nir_def *vec = nir_vec2(b, lo, hi); - outputs[i] = nir_build_alu1(b, pack_op, vec); + lo = lo ? lo : nir_undef(b, 1, type_size); + hi = hi ? hi : nir_undef(b, 1, type_size); + + if (nir_op_infos[pack_op].num_inputs == 2) { + outputs[i] = nir_build_alu2(b, pack_op, lo, hi); + } else { + nir_def *vec = nir_vec2(b, lo, hi); + outputs[i] = nir_build_alu1(b, pack_op, vec); + } } if (s->options->gfx_level >= GFX11)