ac/nir/lower_ps_late: emit scalar f2f16_rtz for when one half of a packed export is undef

Foz-DB Navi48:
Totals from 7200 (8.74% of 82405) affected shaders:
Instrs: 9056391 -> 9048177 (-0.09%); split: -0.09%, +0.00%
CodeSize: 48681288 -> 48640684 (-0.08%); split: -0.09%, +0.00%
VGPRs: 413088 -> 413784 (+0.17%)
Latency: 76340711 -> 76320080 (-0.03%); split: -0.03%, +0.00%
InvThroughput: 12692959 -> 12684618 (-0.07%); split: -0.07%, +0.00%
VClause: 148823 -> 148821 (-0.00%)
Copies: 601739 -> 601874 (+0.02%); split: -0.01%, +0.03%
VALU: 5213356 -> 5207253 (-0.12%); split: -0.12%, +0.00%
SALU: 1160815 -> 1160817 (+0.00%); split: -0.00%, +0.00%
VOPD: 79520 -> 79444 (-0.10%); split: +0.09%, -0.18%

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Acked-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39412>
This commit is contained in:
Georg Lehmann 2025-12-13 14:31:23 +01:00 committed by Marge Bot
parent 8c895c5c61
commit 809fb0fba3

View file

@ -481,14 +481,20 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, unsigned output_index, u
if (!lo && !hi)
continue;
lo = lo ? lo : nir_undef(b, 1, type_size);
hi = hi ? hi : nir_undef(b, 1, type_size);
if (nir_op_infos[pack_op].num_inputs == 2) {
outputs[i] = nir_build_alu2(b, pack_op, lo, hi);
if (pack_op == nir_op_pack_half_2x16_rtz_split && (!lo || !hi)) {
lo = lo ? nir_f2f16_rtz(b, lo) : nir_undef(b, 1, 16);
hi = hi ? nir_f2f16_rtz(b, hi) : nir_undef(b, 1, 16);
outputs[i] = nir_pack_32_2x16_split(b, lo, hi);
} else {
nir_def *vec = nir_vec2(b, lo, hi);
outputs[i] = nir_build_alu1(b, pack_op, vec);
lo = lo ? lo : nir_undef(b, 1, type_size);
hi = hi ? hi : nir_undef(b, 1, type_size);
if (nir_op_infos[pack_op].num_inputs == 2) {
outputs[i] = nir_build_alu2(b, pack_op, lo, hi);
} else {
nir_def *vec = nir_vec2(b, lo, hi);
outputs[i] = nir_build_alu1(b, pack_op, vec);
}
}
if (s->options->gfx_level >= GFX11)