brw: add ability to compute VUE map for separate tcs/tes

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34872>
2025-12-22 11:20:11 +01:00 · 2025-04-23 11:51:17 +03:00 · 2025-04-23 11:51:17 +03:00 · 8dee4813b0
commit 8dee4813b0
parent afea98593e
4 changed files with 46 additions and 16 deletions
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@ -2323,7 +2323,7 @@ iris_compile_tes(struct iris_screen *screen,

      struct intel_vue_map input_vue_map;
      brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
-                               key->patch_inputs_read);
+                               key->patch_inputs_read, false /* separate */);

      struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(screen, key);

--- a/src/intel/compiler/brw_compile_tcs.cpp
+++ b/src/intel/compiler/brw_compile_tcs.cpp
@ -203,7 +203,8 @@ brw_compile_tcs(const struct brw_compiler *compiler,
                       key->base.vue_layout, 1);
   brw_compute_tess_vue_map(&vue_prog_data->vue_map,
                            nir->info.outputs_written,
-                            nir->info.patch_outputs_written);
+                            nir->info.patch_outputs_written,
+                            nir->info.separate_shader);

   brw_nir_apply_key(nir, compiler, &key->base, dispatch_width);
   brw_nir_lower_vue_inputs(nir, &input_vue_map);
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@ -1139,7 +1139,8 @@ void brw_compute_vue_map(const struct intel_device_info *devinfo,

 void brw_compute_tess_vue_map(struct intel_vue_map *const vue_map,
                              uint64_t slots_valid,
-                              uint32_t is_patch);
+                              uint32_t is_patch,
+                              bool separate_shader);

 struct brw_vue_prog_data {
   struct brw_stage_prog_data base;
--- a/src/intel/compiler/brw_vue_map.c
+++ b/src/intel/compiler/brw_vue_map.c
@ -304,19 +304,19 @@ brw_compute_vue_map(const struct intel_device_info *devinfo,
 void
 brw_compute_tess_vue_map(struct intel_vue_map *vue_map,
                         uint64_t vertex_slots,
-                         uint32_t patch_slots)
+                         uint32_t patch_slots,
+                         bool separate)
 {
   /* I don't think anything actually uses this... */
   vue_map->slots_valid = vertex_slots;
-
-   /* separate isn't really meaningful, we always compiled tessellation
-    * shaders together, so use a fixed layout.
-    */
-   vue_map->layout = INTEL_VUE_LAYOUT_FIXED;
+   vue_map->layout = separate ? INTEL_VUE_LAYOUT_SEPARATE : INTEL_VUE_LAYOUT_FIXED;

   vertex_slots &= ~(VARYING_BIT_TESS_LEVEL_OUTER |
                     VARYING_BIT_TESS_LEVEL_INNER);

+   if (separate)
+      vertex_slots |= VARYING_BIT_POS;
+
   /* Make sure that the values we store in vue_map->varying_to_slot and
    * vue_map->slot_to_varying won't overflow the signed chars that are used
    * to store them.  Note that since vue_map->slot_to_varying sometimes holds
@ -354,16 +354,44 @@ brw_compute_tess_vue_map(struct intel_vue_map *vue_map,
   /* apparently, including the patch header... */
   vue_map->num_per_patch_slots = slot;

-   /* then assign per-vertex varyings for each vertex in our patch */
-   while (vertex_slots != 0) {
-      const int varying = ffsll(vertex_slots) - 1;
-      if (vue_map->varying_to_slot[varying] == -1) {
+   /* We have 2 blocks of varyings, generics for the app & builtins. Builtins
+    * have to match according to the Vulkan spec, but generics don't have to.
+    *
+    * In fixed layout we can put all the builtins together at the beginning
+    * and pack the generics after that.
+    *
+    * In separate layout we lay out the generics in a position independent way
+    * at the beginning first then followed by the builtins. An offset will be
+    * pushed into the tessellation evaluation shader to indicate where to find
+    * the builtins.
+    */
+   const int first_vertex_slot = slot;
+
+   const uint64_t generics =
+      (separate ? BITFIELD64_MASK(util_last_bit64(vertex_slots)) : vertex_slots) &
+      ~BITFIELD64_MASK(VARYING_SLOT_VAR0);
+   const uint64_t builtins = vertex_slots & BITFIELD64_MASK(VARYING_SLOT_VAR0);
+
+   /* TODO: consider only using a single layout (just allowing position
+    * independent generics), since there should be no difference in VUE size.
+    */
+   if (separate) {
+      const int first_generics_slot = slot;
+      u_foreach_bit64(varying, generics) {
+         slot = first_generics_slot + varying - VARYING_SLOT_VAR0;
         assign_vue_slot(vue_map, varying, slot++);
      }
-      vertex_slots &= ~BITFIELD64_BIT(varying);
+      u_foreach_bit64(varying, builtins)
+         assign_vue_slot(vue_map, varying, slot++);
+   } else {
+      u_foreach_bit64(varying, builtins) {
+         assign_vue_slot(vue_map, varying, slot++);
+      }
+      u_foreach_bit64(varying, generics)
+         assign_vue_slot(vue_map, varying, slot++);
   }

-   vue_map->num_per_vertex_slots = slot - vue_map->num_per_patch_slots;
+   vue_map->num_per_vertex_slots = slot - first_vertex_slot;
   vue_map->num_pos_slots = 0;
   vue_map->num_slots = slot;
 }