From 4aad8726819b3134c3a6e7b5557aec073da3dc2e Mon Sep 17 00:00:00 2001
From: Aitor Camacho <aitor@lunarg.com>
Date: Wed, 17 Dec 2025 21:32:36 +0900
Subject: [PATCH] nir/opt_varyings: If IO supports 16bit floats, don't pack
 them into 32bit

Signed-off-by: Aitor Camacho <aitor@lunarg.com>
---
 src/compiler/nir/nir_opt_varyings.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/compiler/nir/nir_opt_varyings.c b/src/compiler/nir/nir_opt_varyings.c
index 55797507488..3d0906826c5 100644
--- a/src/compiler/nir/nir_opt_varyings.c
+++ b/src/compiler/nir/nir_opt_varyings.c
@@ -659,6 +659,7 @@ struct linkage_info {
    bool has_flexible_interp;
    bool always_interpolate_convergent_fs_inputs;
    bool group_tes_inputs_into_pos_var_groups;
+   bool io_supports_16bit_input_output;
 
    mesa_shader_stage producer_stage;
    mesa_shader_stage consumer_stage;
@@ -4786,8 +4787,11 @@ vs_tcs_tes_gs_assign_slots_2sets(struct linkage_info *linkage,
     */
    vs_tcs_tes_gs_assign_slots(linkage, input32_mask, slot_index,
                               patch_slot_index, 2, progress);
+   unsigned slot_size_16bit = linkage->io_supports_16bit_input_output
+                                 ? 2
+                                 : 1;
    vs_tcs_tes_gs_assign_slots(linkage, input16_mask, slot_index,
-                              patch_slot_index, 1, progress);
+                              patch_slot_index, slot_size_16bit, progress);
 
    assert(*slot_index <= VARYING_SLOT_MAX * 8);
    assert(!patch_slot_index || *patch_slot_index <= VARYING_SLOT_TESS_MAX * 8);
@@ -4808,6 +4812,9 @@ static void
 compact_varyings(struct linkage_info *linkage,
                  nir_opt_varyings_progress *progress)
 {
+   unsigned slot_size_16bit = linkage->io_supports_16bit_input_output
+                                 ? 2
+                                 : 1;
    if (linkage->consumer_stage == MESA_SHADER_FRAGMENT) {
       /* These arrays are used to track which scalar slots we've already
        * assigned. We can fill unused components of indirectly-indexed slots,
@@ -4864,7 +4871,7 @@ compact_varyings(struct linkage_info *linkage,
          fs_assign_slot_groups(linkage, assigned_mask, assigned_fs_vec4_type,
                                linkage->interp_fp16_mask, linkage->flat16_mask,
                                linkage->convergent16_mask, NULL,
-                               FS_VEC4_TYPE_INTERP_FP16, 1, false, 0, progress);
+                               FS_VEC4_TYPE_INTERP_FP16, slot_size_16bit, false, 0, progress);
       } else {
          /* Basically the same as above. */
          fs_assign_slot_groups_separate_qual(
@@ -4877,7 +4884,7 @@ compact_varyings(struct linkage_info *linkage,
             linkage, assigned_mask, assigned_fs_vec4_type,
             &linkage->interp_fp16_qual_masks, linkage->flat16_mask,
             linkage->convergent16_mask, NULL,
-            FS_VEC4_TYPE_INTERP_FP16_PERSP_PIXEL, 1, false, 0, progress);
+            FS_VEC4_TYPE_INTERP_FP16_PERSP_PIXEL, slot_size_16bit, false, 0, progress);
       }
 
       /* Assign INTERP_MODE_EXPLICIT. Both FP32 and FP16 can occupy the same
@@ -5245,7 +5252,10 @@ init_linkage(nir_shader *producer, nir_shader *consumer, bool spirv,
       .group_tes_inputs_into_pos_var_groups =
          consumer->info.stage == MESA_SHADER_TESS_EVAL &&
          consumer->options->io_options &
-         nir_io_compaction_groups_tes_inputs_into_pos_and_var_groups,
+            nir_io_compaction_groups_tes_inputs_into_pos_and_var_groups,
+      .io_supports_16bit_input_output = producer->options->io_options &
+                                        consumer->options->io_options &
+                                        nir_io_16bit_input_output_support,
       .producer_stage = producer->info.stage,
       .consumer_stage = consumer->info.stage,
       .producer_builder =