Merge branch 'nir-opt-varyings-16bit' into 'main'

Draft: nir/opt_varyings: If IO supports 16bit floats, don't pack them into 32bit See merge request mesa/mesa!38994
2025-12-20 05:10:11 +01:00 · 2025-12-20 09:51:06 +09:00 · 2025-12-20 09:51:06 +09:00 · 698879321d
commit 698879321d
parent c430f394c5 4aad872681
1 changed files with 14 additions and 4 deletions
--- a/src/compiler/nir/nir_opt_varyings.c
+++ b/src/compiler/nir/nir_opt_varyings.c
@ -659,6 +659,7 @@ struct linkage_info {
   bool has_flexible_interp;
   bool always_interpolate_convergent_fs_inputs;
   bool group_tes_inputs_into_pos_var_groups;
+   bool io_supports_16bit_input_output;

   mesa_shader_stage producer_stage;
   mesa_shader_stage consumer_stage;
@ -4786,8 +4787,11 @@ vs_tcs_tes_gs_assign_slots_2sets(struct linkage_info *linkage,
    */
   vs_tcs_tes_gs_assign_slots(linkage, input32_mask, slot_index,
                              patch_slot_index, 2, progress);
+   unsigned slot_size_16bit = linkage->io_supports_16bit_input_output
+                                 ? 2
+                                 : 1;
   vs_tcs_tes_gs_assign_slots(linkage, input16_mask, slot_index,
-                              patch_slot_index, 1, progress);
+                              patch_slot_index, slot_size_16bit, progress);

   assert(*slot_index <= VARYING_SLOT_MAX * 8);
   assert(!patch_slot_index || *patch_slot_index <= VARYING_SLOT_TESS_MAX * 8);
@ -4808,6 +4812,9 @@ static void
 compact_varyings(struct linkage_info *linkage,
                 nir_opt_varyings_progress *progress)
 {
+   unsigned slot_size_16bit = linkage->io_supports_16bit_input_output
+                                 ? 2
+                                 : 1;
   if (linkage->consumer_stage == MESA_SHADER_FRAGMENT) {
      /* These arrays are used to track which scalar slots we've already
       * assigned. We can fill unused components of indirectly-indexed slots,
@ -4864,7 +4871,7 @@ compact_varyings(struct linkage_info *linkage,
         fs_assign_slot_groups(linkage, assigned_mask, assigned_fs_vec4_type,
                               linkage->interp_fp16_mask, linkage->flat16_mask,
                               linkage->convergent16_mask, NULL,
-                               FS_VEC4_TYPE_INTERP_FP16, 1, false, 0, progress);
+                               FS_VEC4_TYPE_INTERP_FP16, slot_size_16bit, false, 0, progress);
      } else {
         /* Basically the same as above. */
         fs_assign_slot_groups_separate_qual(
@ -4877,7 +4884,7 @@ compact_varyings(struct linkage_info *linkage,
            linkage, assigned_mask, assigned_fs_vec4_type,
            &linkage->interp_fp16_qual_masks, linkage->flat16_mask,
            linkage->convergent16_mask, NULL,
-            FS_VEC4_TYPE_INTERP_FP16_PERSP_PIXEL, 1, false, 0, progress);
+            FS_VEC4_TYPE_INTERP_FP16_PERSP_PIXEL, slot_size_16bit, false, 0, progress);
      }

      /* Assign INTERP_MODE_EXPLICIT. Both FP32 and FP16 can occupy the same
@ -5246,6 +5253,9 @@ init_linkage(nir_shader *producer, nir_shader *consumer, bool spirv,
         consumer->info.stage == MESA_SHADER_TESS_EVAL &&
         consumer->options->io_options &
            nir_io_compaction_groups_tes_inputs_into_pos_and_var_groups,
+      .io_supports_16bit_input_output = producer->options->io_options &
+                                        consumer->options->io_options &
+                                        nir_io_16bit_input_output_support,
      .producer_stage = producer->info.stage,
      .consumer_stage = consumer->info.stage,
      .producer_builder =