diff --git a/src/gallium/drivers/crocus/crocus_program.c b/src/gallium/drivers/crocus/crocus_program.c
index 9400380bdd2..7360734da29 100644
--- a/src/gallium/drivers/crocus/crocus_program.c
+++ b/src/gallium/drivers/crocus/crocus_program.c
@@ -1204,7 +1204,9 @@ crocus_compile_vs(struct crocus_context *ice,
       crocus_vs_outputs_written(ice, key, nir->info.outputs_written);
    elk_compute_vue_map(devinfo,
                        &vue_prog_data->vue_map, outputs_written,
-                       nir->info.separate_shader, /* pos slots */ 1);
+                       nir->info.separate_shader ?
+                       INTEL_VUE_LAYOUT_SEPARATE :
+                       INTEL_VUE_LAYOUT_FIXED, /* pos slots */ 1);
 
    /* Don't tell the backend about our clip plane constants, we've already
     * lowered them in NIR and we don't want it doing it again.
@@ -1694,7 +1696,9 @@ crocus_compile_gs(struct crocus_context *ice,
 
    elk_compute_vue_map(devinfo,
                        &vue_prog_data->vue_map, nir->info.outputs_written,
-                       nir->info.separate_shader, /* pos slots */ 1);
+                       nir->info.separate_shader ?
+                       INTEL_VUE_LAYOUT_SEPARATE :
+                       INTEL_VUE_LAYOUT_FIXED, /* pos slots */ 1);
 
    if (devinfo->ver == 6)
       gfx6_gs_xfb_setup(&ish->stream_output, gs_prog_data);
@@ -1969,7 +1973,7 @@ update_last_vue_map(struct crocus_context *ice,
          ice->state.stage_dirty_for_nos[CROCUS_NOS_LAST_VUE_MAP];
    }
 
-   if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
+   if (changed_slots || (old_map && old_map->layout != vue_map->layout)) {
       ice->state.dirty |= CROCUS_DIRTY_GEN7_SBE;
       if (devinfo->ver < 6)
          ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG;
@@ -2872,7 +2876,7 @@ crocus_create_fs_state(struct pipe_context *ctx,
       if (devinfo->ver < 6) {
          elk_compute_vue_map(devinfo, &vue_map,
                              info->inputs_read | VARYING_BIT_POS,
-                             false, /* pos slots */ 1);
+                             INTEL_VUE_LAYOUT_FIXED, /* pos slots */ 1);
       }
       if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
          crocus_compile_fs(ice, ish, &key, &vue_map);
diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h
index 0c99a33973c..a8d0e7bb63c 100644
--- a/src/gallium/drivers/iris/iris_context.h
+++ b/src/gallium/drivers/iris/iris_context.h
@@ -231,7 +231,8 @@ struct iris_vue_prog_key {
    struct iris_base_prog_key base;
 
    unsigned nr_userclip_plane_consts:4;
-   unsigned padding:28;
+   enum intel_vue_layout layout:2;
+   unsigned padding:26;
 };
 
 struct iris_vs_prog_key {
@@ -284,7 +285,8 @@ struct iris_fs_prog_key {
    bool multisample_fbo:1;
    bool force_dual_color_blend:1;
    bool coherent_fb_fetch:1;
-   uint64_t padding:43;
+   enum intel_vue_layout vue_layout:2;
+   uint64_t padding:41;
 };
 
 struct iris_cs_prog_key {
diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c
index 4af7b8e11e7..7f627f9ff61 100644
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@@ -55,12 +55,20 @@
 #include "iris_pipe.h"
 #include "nir/tgsi_to_nir.h"
 
-#define KEY_INIT(prefix)                                                   \
-   .prefix.program_string_id = ish->program_id,                            \
-   .prefix.limit_trig_input_range = screen->driconf.limit_trig_input_range
-#define BRW_KEY_INIT(gen, prog_id, limit_trig_input)       \
+static inline enum intel_vue_layout
+vue_layout(bool separate_shader)
+{
+   return separate_shader ? INTEL_VUE_LAYOUT_SEPARATE : INTEL_VUE_LAYOUT_FIXED;
+}
+
+#define KEY_INIT(prefix)                                   \
+   .prefix.program_string_id = ish->program_id,            \
+   .prefix.limit_trig_input_range =                        \
+      screen->driconf.limit_trig_input_range
+#define BRW_KEY_INIT(gen, prog_id, limit_trig_input, _vue_layout) \
    .base.program_string_id = prog_id,                      \
-   .base.limit_trig_input_range = limit_trig_input
+   .base.limit_trig_input_range = limit_trig_input,        \
+   .base.vue_layout = _vue_layout
 
 #ifdef INTEL_USE_ELK
 #define ELK_KEY_INIT(gen, prog_id, limit_trig_input)       \
@@ -525,7 +533,8 @@ iris_to_brw_vs_key(const struct iris_screen *screen,
 {
    return (struct brw_vs_prog_key) {
       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
-                   key->vue.base.limit_trig_input_range),
+                   key->vue.base.limit_trig_input_range,
+                   key->vue.layout),
    };
 }
 
@@ -535,7 +544,8 @@ iris_to_brw_tcs_key(const struct iris_screen *screen,
 {
    return (struct brw_tcs_prog_key) {
       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
-                   key->vue.base.limit_trig_input_range),
+                   key->vue.base.limit_trig_input_range,
+                   key->vue.layout),
       ._tes_primitive_mode = key->_tes_primitive_mode,
       .input_vertices = key->input_vertices,
       .patch_outputs_written = key->patch_outputs_written,
@@ -549,7 +559,8 @@ iris_to_brw_tes_key(const struct iris_screen *screen,
 {
    return (struct brw_tes_prog_key) {
       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
-                   key->vue.base.limit_trig_input_range),
+                   key->vue.base.limit_trig_input_range,
+                   key->vue.layout),
       .patch_inputs_read = key->patch_inputs_read,
       .inputs_read = key->inputs_read,
    };
@@ -561,7 +572,8 @@ iris_to_brw_gs_key(const struct iris_screen *screen,
 {
    return (struct brw_gs_prog_key) {
       BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
-                   key->vue.base.limit_trig_input_range),
+                   key->vue.base.limit_trig_input_range,
+                   key->vue.layout),
    };
 }
 
@@ -571,7 +583,8 @@ iris_to_brw_fs_key(const struct iris_screen *screen,
 {
    return (struct brw_wm_prog_key) {
       BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
-                   key->base.limit_trig_input_range),
+                   key->base.limit_trig_input_range,
+                   key->vue_layout),
       .nr_color_regions = key->nr_color_regions,
       .flat_shade = key->flat_shade,
       .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
@@ -595,7 +608,8 @@ iris_to_brw_cs_key(const struct iris_screen *screen,
 {
    return (struct brw_cs_prog_key) {
       BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
-                   key->base.limit_trig_input_range),
+                   key->base.limit_trig_input_range,
+                   INTEL_VUE_LAYOUT_SEPARATE),
    };
 }
 
@@ -1884,7 +1898,7 @@ iris_compile_vs(struct iris_screen *screen,
 
       brw_compute_vue_map(devinfo,
                           &brw_prog_data->base.vue_map, nir->info.outputs_written,
-                          nir->info.separate_shader, /* pos_slots */ 1);
+                          key->vue.layout, /* pos_slots */ 1);
 
       struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(screen, key);
 
@@ -1916,7 +1930,9 @@ iris_compile_vs(struct iris_screen *screen,
 
       elk_compute_vue_map(devinfo,
                           &elk_prog_data->base.vue_map, nir->info.outputs_written,
-                          nir->info.separate_shader, /* pos_slots */ 1);
+                          nir->info.separate_shader ?
+                          INTEL_VUE_LAYOUT_SEPARATE :
+                          INTEL_VUE_LAYOUT_FIXED, /* pos_slots */ 1);
 
       struct elk_vs_prog_key elk_key = iris_to_elk_vs_key(screen, key);
 
@@ -1983,7 +1999,10 @@ iris_update_compiled_vs(struct iris_context *ice)
    struct iris_uncompiled_shader *ish =
       ice->shaders.uncompiled[MESA_SHADER_VERTEX];
 
-   struct iris_vs_prog_key key = { KEY_INIT(vue.base) };
+   struct iris_vs_prog_key key = {
+      KEY_INIT(vue.base),
+      .vue.layout = vue_layout(ish->nir->info.separate_shader),
+   };
    screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
 
    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
@@ -2208,6 +2227,7 @@ iris_update_compiled_tcs(struct iris_context *ice)
       iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
    struct iris_tcs_prog_key key = {
       .vue.base.program_string_id = tcs ? tcs->program_id : 0,
+      .vue.layout = vue_layout(tcs ? tcs->nir->info.separate_shader : false),
       ._tes_primitive_mode = tes_info->tess._primitive_mode,
       .input_vertices =
          !tcs || iris_use_tcs_multi_patch(screen) ? ice->state.vertices_per_patch : 0,
@@ -2416,7 +2436,10 @@ iris_update_compiled_tes(struct iris_context *ice)
    struct iris_uncompiled_shader *ish =
       ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
 
-   struct iris_tes_prog_key key = { KEY_INIT(vue.base) };
+   struct iris_tes_prog_key key = {
+      KEY_INIT(vue.base),
+      .vue.layout = vue_layout(ish->nir->info.separate_shader),
+   };
    get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
    screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
 
@@ -2500,7 +2523,7 @@ iris_compile_gs(struct iris_screen *screen,
 
       brw_compute_vue_map(devinfo,
                           &brw_prog_data->base.vue_map, nir->info.outputs_written,
-                          nir->info.separate_shader, /* pos_slots */ 1);
+                          key->vue.layout, /* pos_slots */ 1);
 
       struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(screen, key);
 
@@ -2530,7 +2553,9 @@ iris_compile_gs(struct iris_screen *screen,
 
       elk_compute_vue_map(devinfo,
                           &elk_prog_data->base.vue_map, nir->info.outputs_written,
-                          nir->info.separate_shader, /* pos_slots */ 1);
+                          nir->info.separate_shader ?
+                          INTEL_VUE_LAYOUT_SEPARATE :
+                          INTEL_VUE_LAYOUT_FIXED, /* pos_slots */ 1);
 
       struct elk_gs_prog_key elk_key = iris_to_elk_gs_key(screen, key);
 
@@ -2600,7 +2625,10 @@ iris_update_compiled_gs(struct iris_context *ice)
    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
 
    if (ish) {
-      struct iris_gs_prog_key key = { KEY_INIT(vue.base) };
+      struct iris_gs_prog_key key = {
+         KEY_INIT(vue.base),
+         .vue.layout = vue_layout(ish->nir->info.separate_shader),
+      };
       screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
 
       bool added;
@@ -2777,7 +2805,10 @@ iris_update_compiled_fs(struct iris_context *ice)
    struct iris_uncompiled_shader *ish =
       ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
-   struct iris_fs_prog_key key = { KEY_INIT(base) };
+   struct iris_fs_prog_key key = {
+      KEY_INIT(base),
+      .vue_layout = vue_layout(ish->nir->info.separate_shader),
+   };
    screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
 
    struct intel_vue_map *last_vue_map =
@@ -2847,7 +2878,7 @@ update_last_vue_map(struct iris_context *ice,
       ice->state.dirty |= IRIS_DIRTY_CLIP;
    }
 
-   if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
+   if (changed_slots || (old_map && old_map->layout != vue_map->layout)) {
       ice->state.dirty |= IRIS_DIRTY_SBE;
    }
 
@@ -3429,13 +3460,17 @@ iris_create_shader_state(struct pipe_context *ctx,
       if (info->clip_distance_array_size == 0)
          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
 
-      key.vs = (struct iris_vs_prog_key) { KEY_INIT(vue.base) };
+      key.vs = (struct iris_vs_prog_key) {
+         KEY_INIT(vue.base),
+         .vue.layout = vue_layout(ish->nir->info.separate_shader),
+      };
       key_size = sizeof(key.vs);
       break;
 
    case MESA_SHADER_TESS_CTRL: {
       key.tcs = (struct iris_tcs_prog_key) {
          KEY_INIT(vue.base),
+         .vue.layout = vue_layout(ish->nir->info.separate_shader),
          // XXX: make sure the linker fills this out from the TES...
          ._tes_primitive_mode =
          info->tess._primitive_mode ? info->tess._primitive_mode
@@ -3463,6 +3498,7 @@ iris_create_shader_state(struct pipe_context *ctx,
 
       key.tes = (struct iris_tes_prog_key) {
          KEY_INIT(vue.base),
+         .vue.layout = vue_layout(ish->nir->info.separate_shader),
          // XXX: not ideal, need TCS output/TES input unification
          .inputs_read = info->inputs_read,
          .patch_inputs_read = info->patch_inputs_read,
@@ -3474,7 +3510,10 @@ iris_create_shader_state(struct pipe_context *ctx,
    case MESA_SHADER_GEOMETRY:
       ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
 
-      key.gs = (struct iris_gs_prog_key) { KEY_INIT(vue.base) };
+      key.gs = (struct iris_gs_prog_key) {
+         KEY_INIT(vue.base),
+         .vue.layout = vue_layout(ish->nir->info.separate_shader),
+      };
       key_size = sizeof(key.gs);
       break;
 
@@ -3505,6 +3544,7 @@ iris_create_shader_state(struct pipe_context *ctx,
 
       key.fs = (struct iris_fs_prog_key) {
          KEY_INIT(base),
+         .vue_layout = vue_layout(ish->nir->info.separate_shader),
          .nr_color_regions = util_bitcount(color_outputs),
          .coherent_fb_fetch = devinfo->ver >= 9 && devinfo->ver < 20,
          .input_slots_valid =
diff --git a/src/intel/blorp/blorp_elk.c b/src/intel/blorp/blorp_elk.c
index cac5315278b..d9b6c390144 100644
--- a/src/intel/blorp/blorp_elk.c
+++ b/src/intel/blorp/blorp_elk.c
@@ -91,7 +91,9 @@ blorp_compile_vs_elk(struct blorp_context *blorp, void *mem_ctx,
    elk_compute_vue_map(compiler->devinfo,
                        &vs_prog_data->base.vue_map,
                        nir->info.outputs_written,
-                       nir->info.separate_shader,
+                       nir->info.separate_shader ?
+                       INTEL_VUE_LAYOUT_SEPARATE :
+                       INTEL_VUE_LAYOUT_FIXED,
                        1);
 
    struct elk_vs_prog_key vs_key = { 0, };
@@ -231,7 +233,8 @@ blorp_ensure_sf_program_elk(struct blorp_batch *batch,
    unsigned program_size;
 
    struct intel_vue_map vue_map;
-   elk_compute_vue_map(compiler->devinfo, &vue_map, slots_valid, false, 1);
+   elk_compute_vue_map(compiler->devinfo, &vue_map, slots_valid,
+                       INTEL_VUE_LAYOUT_FIXED, 1);
 
    struct elk_sf_prog_data prog_data_tmp;
    program = elk_compile_sf(compiler, mem_ctx, &key.key,
diff --git a/src/intel/compiler/brw_compile_fs.cpp b/src/intel/compiler/brw_compile_fs.cpp
index f7518d370b7..0ad3c9cfcab 100644
--- a/src/intel/compiler/brw_compile_fs.cpp
+++ b/src/intel/compiler/brw_compile_fs.cpp
@@ -825,7 +825,7 @@ calculate_urb_setup(const struct intel_device_info *devinfo,
          struct intel_vue_map prev_stage_vue_map;
          brw_compute_vue_map(devinfo, &prev_stage_vue_map,
                              key->input_slots_valid,
-                             nir->info.separate_shader, 1);
+                             key->base.vue_layout, 1);
 
          int first_slot =
             brw_compute_first_fs_urb_slot_required(unique_fs_attrs,
diff --git a/src/intel/compiler/brw_compile_gs.cpp b/src/intel/compiler/brw_compile_gs.cpp
index 3161a5845fa..b0c0b11a2a3 100644
--- a/src/intel/compiler/brw_compile_gs.cpp
+++ b/src/intel/compiler/brw_compile_gs.cpp
@@ -161,7 +161,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
    GLbitfield64 inputs_read = nir->info.inputs_read;
    brw_compute_vue_map(compiler->devinfo,
                        &input_vue_map, inputs_read,
-                       nir->info.separate_shader, 1);
+                       key->base.vue_layout, 1);
 
    brw_nir_apply_key(nir, compiler, &key->base, dispatch_width);
    brw_nir_lower_vue_inputs(nir, &input_vue_map);
diff --git a/src/intel/compiler/brw_compile_mesh.cpp b/src/intel/compiler/brw_compile_mesh.cpp
index 87337412b5d..82562e358a6 100644
--- a/src/intel/compiler/brw_compile_mesh.cpp
+++ b/src/intel/compiler/brw_compile_mesh.cpp
@@ -1662,12 +1662,9 @@ brw_compile_mesh(const struct brw_compiler *compiler,
 
    brw_nir_lower_tue_inputs(nir, params->tue_map);
 
-   /* Incorrectly set separate to false until we fix the anv/brw in the next
-    * commit.
-    */
    brw_compute_mue_map(compiler, nir, &prog_data->map,
                        prog_data->index_format,
-                       false /* TODO: use nir->info.separate_shader */);
+                       key->base.vue_layout);
    brw_nir_lower_mue_outputs(nir, &prog_data->map);
 
    prog_data->autostrip_enable = brw_mesh_autostrip_enable(compiler, nir, &prog_data->map);
diff --git a/src/intel/compiler/brw_compile_tcs.cpp b/src/intel/compiler/brw_compile_tcs.cpp
index c32f6f96a5f..96a51bb97e1 100644
--- a/src/intel/compiler/brw_compile_tcs.cpp
+++ b/src/intel/compiler/brw_compile_tcs.cpp
@@ -200,7 +200,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
 
    struct intel_vue_map input_vue_map;
    brw_compute_vue_map(devinfo, &input_vue_map, nir->info.inputs_read,
-                       nir->info.separate_shader, 1);
+                       key->base.vue_layout, 1);
    brw_compute_tess_vue_map(&vue_prog_data->vue_map,
                             nir->info.outputs_written,
                             nir->info.patch_outputs_written);
diff --git a/src/intel/compiler/brw_compile_tes.cpp b/src/intel/compiler/brw_compile_tes.cpp
index 484f7bcf5c2..d6f13baae0d 100644
--- a/src/intel/compiler/brw_compile_tes.cpp
+++ b/src/intel/compiler/brw_compile_tes.cpp
@@ -84,7 +84,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
 
    brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
                        nir->info.outputs_written,
-                       nir->info.separate_shader, 1);
+                       key->base.vue_layout, 1);
 
    unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;
 
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index b56bef64337..b45f0eb0393 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -210,14 +210,16 @@ struct brw_base_prog_key {
 
    bool uses_inline_push_addr:1;
 
-   unsigned padding:21;
+   enum intel_vue_layout vue_layout:2;
 
    /**
     * Apply workarounds for SIN and COS input range problems.
     * This limits input range for SIN and COS to [-2p : 2p] to
     * avoid precision issues.
     */
-   bool limit_trig_input_range;
+   bool limit_trig_input_range:1;
+
+   unsigned padding:26;
 };
 
 /**
@@ -1070,7 +1072,7 @@ brw_varying_to_offset(const struct intel_vue_map *vue_map, unsigned varying)
 void brw_compute_vue_map(const struct intel_device_info *devinfo,
                          struct intel_vue_map *vue_map,
                          uint64_t slots_valid,
-                         bool separate_shader,
+                         enum intel_vue_layout layout,
                          uint32_t pos_slots);
 
 void brw_compute_tess_vue_map(struct intel_vue_map *const vue_map,
diff --git a/src/intel/compiler/brw_vue_map.c b/src/intel/compiler/brw_vue_map.c
index 1cb5774c350..922c208df8e 100644
--- a/src/intel/compiler/brw_vue_map.c
+++ b/src/intel/compiler/brw_vue_map.c
@@ -60,10 +60,10 @@ void
 brw_compute_vue_map(const struct intel_device_info *devinfo,
                     struct intel_vue_map *vue_map,
                     uint64_t slots_valid,
-                    bool separate,
+                    enum intel_vue_layout layout,
                     uint32_t pos_slots)
 {
-   if (separate) {
+   if (layout != INTEL_VUE_LAYOUT_FIXED) {
       /* In SSO mode, we don't know whether the adjacent stage will
        * read/write gl_ClipDistance, which has a fixed slot location.
        * We have to assume the worst and reserve a slot for it, or else
@@ -77,7 +77,7 @@ brw_compute_vue_map(const struct intel_device_info *devinfo,
    }
 
    vue_map->slots_valid = slots_valid;
-   vue_map->separate = separate;
+   vue_map->layout = layout;
 
    /* gl_Layer, gl_ViewportIndex & gl_PrimitiveShadingRateEXT don't get their
     * own varying slots -- they are stored in the first VUE slot
@@ -177,7 +177,7 @@ brw_compute_vue_map(const struct intel_device_info *devinfo,
    uint64_t generics = slots_valid & ~BITFIELD64_MASK(VARYING_SLOT_VAR0);
    while (generics != 0) {
       const int varying = ffsll(generics) - 1;
-      if (separate) {
+      if (layout != INTEL_VUE_LAYOUT_FIXED) {
          slot = first_generic_slot + varying - VARYING_SLOT_VAR0;
       }
       assign_vue_slot(vue_map, varying, slot++);
@@ -202,8 +202,10 @@ brw_compute_tess_vue_map(struct intel_vue_map *vue_map,
    /* I don't think anything actually uses this... */
    vue_map->slots_valid = vertex_slots;
 
-   /* separate isn't really meaningful, but make sure it's initialized */
-   vue_map->separate = false;
+   /* separate isn't really meaningful, we always compiled tessellation
+    * shaders together, so use a fixed layout.
+    */
+   vue_map->layout = INTEL_VUE_LAYOUT_FIXED;
 
    vertex_slots &= ~(VARYING_BIT_TESS_LEVEL_OUTER |
                      VARYING_BIT_TESS_LEVEL_INNER);
@@ -278,12 +280,17 @@ void
 brw_print_vue_map(FILE *fp, const struct intel_vue_map *vue_map,
                   gl_shader_stage stage)
 {
+   const char *layout_name =
+      vue_map->layout == INTEL_VUE_LAYOUT_FIXED ? "fixed" :
+      vue_map->layout == INTEL_VUE_LAYOUT_SEPARATE ? "separate" :
+      "separate-mesh";
+
    if (vue_map->num_per_vertex_slots > 0 || vue_map->num_per_patch_slots > 0) {
       fprintf(fp, "PUE map (%d slots, %d/patch, %d/vertex, %s)\n",
               vue_map->num_slots,
               vue_map->num_per_patch_slots,
               vue_map->num_per_vertex_slots,
-              vue_map->separate ? "SSO" : "non-SSO");
+              layout_name);
       for (int i = 0; i < vue_map->num_slots; i++) {
          if (vue_map->slot_to_varying[i] >= VARYING_SLOT_PATCH0) {
             fprintf(fp, "  [%02d] VARYING_SLOT_PATCH%d\n", i,
@@ -295,8 +302,7 @@ brw_print_vue_map(FILE *fp, const struct intel_vue_map *vue_map,
       }
    } else {
       fprintf(fp, "%s VUE map (%d slots, %s)\n",
-              gl_shader_stage_name(stage),
-              vue_map->num_slots, vue_map->separate ? "SSO" : "non-SSO");
+              gl_shader_stage_name(stage), vue_map->num_slots, layout_name);
       for (int i = 0; i < vue_map->num_slots; i++) {
          fprintf(fp, "  [%02d] %s\n", i,
                  varying_name(vue_map->slot_to_varying[i], stage));
diff --git a/src/intel/compiler/elk/elk_compiler.h b/src/intel/compiler/elk/elk_compiler.h
index 5162723e674..0f8f658b1f7 100644
--- a/src/intel/compiler/elk/elk_compiler.h
+++ b/src/intel/compiler/elk/elk_compiler.h
@@ -1215,7 +1215,7 @@ elk_varying_to_offset(const struct intel_vue_map *vue_map, unsigned varying)
 void elk_compute_vue_map(const struct intel_device_info *devinfo,
                          struct intel_vue_map *vue_map,
                          uint64_t slots_valid,
-                         bool separate_shader,
+                         enum intel_vue_layout layout,
                          uint32_t pos_slots);
 
 void elk_compute_tess_vue_map(struct intel_vue_map *const vue_map,
diff --git a/src/intel/compiler/elk/elk_fs.cpp b/src/intel/compiler/elk/elk_fs.cpp
index 530f04568f5..0a6a6627459 100644
--- a/src/intel/compiler/elk/elk_fs.cpp
+++ b/src/intel/compiler/elk/elk_fs.cpp
@@ -1424,7 +1424,9 @@ calculate_urb_setup(const struct intel_device_info *devinfo,
          struct intel_vue_map prev_stage_vue_map;
          elk_compute_vue_map(devinfo, &prev_stage_vue_map,
                              key->input_slots_valid,
-                             nir->info.separate_shader, 1);
+                             nir->info.separate_shader ?
+                             INTEL_VUE_LAYOUT_SEPARATE :
+                             INTEL_VUE_LAYOUT_FIXED, 1);
 
          int first_slot =
             elk_compute_first_urb_slot_required(inputs_read,
diff --git a/src/intel/compiler/elk/elk_shader.cpp b/src/intel/compiler/elk/elk_shader.cpp
index affaf16eeed..05c0a3b3032 100644
--- a/src/intel/compiler/elk/elk_shader.cpp
+++ b/src/intel/compiler/elk/elk_shader.cpp
@@ -1276,7 +1276,9 @@ elk_compile_tes(const struct elk_compiler *compiler,
 
    elk_compute_vue_map(devinfo, &prog_data->base.vue_map,
                        nir->info.outputs_written,
-                       nir->info.separate_shader, 1);
+                       nir->info.separate_shader ?
+                       INTEL_VUE_LAYOUT_SEPARATE :
+                       INTEL_VUE_LAYOUT_FIXED, 1);
 
    unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;
 
diff --git a/src/intel/compiler/elk/elk_vec4_gs_visitor.cpp b/src/intel/compiler/elk/elk_vec4_gs_visitor.cpp
index 21867747b4e..06cc7b8ab74 100644
--- a/src/intel/compiler/elk/elk_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/elk/elk_vec4_gs_visitor.cpp
@@ -610,7 +610,9 @@ elk_compile_gs(const struct elk_compiler *compiler,
    GLbitfield64 inputs_read = nir->info.inputs_read;
    elk_compute_vue_map(compiler->devinfo,
                        &c.input_vue_map, inputs_read,
-                       nir->info.separate_shader, 1);
+                       nir->info.separate_shader ?
+                       INTEL_VUE_LAYOUT_SEPARATE :
+                       INTEL_VUE_LAYOUT_FIXED, 1);
 
    elk_nir_apply_key(nir, compiler, &key->base, 8);
    elk_nir_lower_vue_inputs(nir, &c.input_vue_map);
diff --git a/src/intel/compiler/elk/elk_vec4_tcs.cpp b/src/intel/compiler/elk/elk_vec4_tcs.cpp
index 673af595c58..1772670f255 100644
--- a/src/intel/compiler/elk/elk_vec4_tcs.cpp
+++ b/src/intel/compiler/elk/elk_vec4_tcs.cpp
@@ -373,7 +373,9 @@ elk_compile_tcs(const struct elk_compiler *compiler,
 
    struct intel_vue_map input_vue_map;
    elk_compute_vue_map(devinfo, &input_vue_map, nir->info.inputs_read,
-                       nir->info.separate_shader, 1);
+                       nir->info.separate_shader ?
+                       INTEL_VUE_LAYOUT_SEPARATE :
+                       INTEL_VUE_LAYOUT_FIXED, 1);
    elk_compute_tess_vue_map(&vue_prog_data->vue_map,
                             nir->info.outputs_written,
                             nir->info.patch_outputs_written);
diff --git a/src/intel/compiler/elk/elk_vue_map.c b/src/intel/compiler/elk/elk_vue_map.c
index bb8f751ce83..e119b2e31db 100644
--- a/src/intel/compiler/elk/elk_vue_map.c
+++ b/src/intel/compiler/elk/elk_vue_map.c
@@ -60,17 +60,20 @@ void
 elk_compute_vue_map(const struct intel_device_info *devinfo,
                     struct intel_vue_map *vue_map,
                     uint64_t slots_valid,
-                    bool separate,
+                    enum intel_vue_layout layout,
                     uint32_t pos_slots)
 {
+   assert(layout == INTEL_VUE_LAYOUT_FIXED ||
+          layout == INTEL_VUE_LAYOUT_SEPARATE);
+
    /* Keep using the packed/contiguous layout on old hardware - we only need
     * the SSO layout when using geometry/tessellation shaders or 32 FS input
     * varyings, which only exist on Gen >= 6.  It's also a bit more efficient.
     */
    if (devinfo->ver < 6)
-      separate = false;
+      layout = INTEL_VUE_LAYOUT_FIXED;
 
-   if (separate) {
+   if (layout == INTEL_VUE_LAYOUT_SEPARATE) {
       /* In SSO mode, we don't know whether the adjacent stage will
        * read/write gl_ClipDistance, which has a fixed slot location.
        * We have to assume the worst and reserve a slot for it, or else
@@ -84,7 +87,7 @@ elk_compute_vue_map(const struct intel_device_info *devinfo,
    }
 
    vue_map->slots_valid = slots_valid;
-   vue_map->separate = separate;
+   vue_map->layout = layout;
 
    /* gl_Layer, gl_ViewportIndex & gl_PrimitiveShadingRateEXT don't get their
     * own varying slots -- they are stored in the first VUE slot
@@ -198,7 +201,7 @@ elk_compute_vue_map(const struct intel_device_info *devinfo,
    uint64_t generics = slots_valid & ~BITFIELD64_MASK(VARYING_SLOT_VAR0);
    while (generics != 0) {
       const int varying = ffsll(generics) - 1;
-      if (separate) {
+      if (layout == INTEL_VUE_LAYOUT_SEPARATE) {
          slot = first_generic_slot + varying - VARYING_SLOT_VAR0;
       }
       assign_vue_slot(vue_map, varying, slot++);
@@ -224,7 +227,7 @@ elk_compute_tess_vue_map(struct intel_vue_map *vue_map,
    vue_map->slots_valid = vertex_slots;
 
    /* separate isn't really meaningful, but make sure it's initialized */
-   vue_map->separate = false;
+   vue_map->layout = INTEL_VUE_LAYOUT_FIXED;
 
    vertex_slots &= ~(VARYING_BIT_TESS_LEVEL_OUTER |
                      VARYING_BIT_TESS_LEVEL_INNER);
@@ -301,12 +304,15 @@ void
 elk_print_vue_map(FILE *fp, const struct intel_vue_map *vue_map,
                   gl_shader_stage stage)
 {
+   const char *layout_name =
+      vue_map->layout == INTEL_VUE_LAYOUT_FIXED ? "non-SSO" : "SSO";
+
    if (vue_map->num_per_vertex_slots > 0 || vue_map->num_per_patch_slots > 0) {
       fprintf(fp, "PUE map (%d slots, %d/patch, %d/vertex, %s)\n",
               vue_map->num_slots,
               vue_map->num_per_patch_slots,
               vue_map->num_per_vertex_slots,
-              vue_map->separate ? "SSO" : "non-SSO");
+              layout_name);
       for (int i = 0; i < vue_map->num_slots; i++) {
          if (vue_map->slot_to_varying[i] >= VARYING_SLOT_PATCH0) {
             fprintf(fp, "  [%d] VARYING_SLOT_PATCH%d\n", i,
@@ -317,8 +323,7 @@ elk_print_vue_map(FILE *fp, const struct intel_vue_map *vue_map,
          }
       }
    } else {
-      fprintf(fp, "VUE map (%d slots, %s)\n",
-              vue_map->num_slots, vue_map->separate ? "SSO" : "non-SSO");
+      fprintf(fp, "VUE map (%d slots, %s)\n", vue_map->num_slots, layout_name);
       for (int i = 0; i < vue_map->num_slots; i++) {
          fprintf(fp, "  [%d] %s\n", i,
                  varying_name(vue_map->slot_to_varying[i], stage));
diff --git a/src/intel/compiler/intel_shader_enums.h b/src/intel/compiler/intel_shader_enums.h
index 15a03ba781d..91255da816c 100644
--- a/src/intel/compiler/intel_shader_enums.h
+++ b/src/intel/compiler/intel_shader_enums.h
@@ -122,6 +122,19 @@ enum intel_barycentric_mode {
     (1 << INTEL_BARYCENTRIC_NONPERSPECTIVE_CENTROID) | \
     (1 << INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE))
 
+enum intel_vue_layout {
+   /**
+    * Layout is fixed and shared by producer/consumer, allowing for tigh
+    * packing
+    */
+   INTEL_VUE_LAYOUT_FIXED = 0,
+   /**
+    * Layout is separate, works for ARB_separate_shader_objects but without
+    * Mesh support.
+    */
+   INTEL_VUE_LAYOUT_SEPARATE,
+};
+
 /**
  * Data structure recording the relationship between the gl_varying_slot enum
  * and "slots" within the vertex URB entry (VUE).  A "slot" is defined as a
@@ -142,7 +155,7 @@ struct intel_vue_map {
    uint64_t slots_valid;
 
    /**
-    * Is this VUE map for a separate shader pipeline?
+    * The layout of the VUE
     *
     * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
     * without the linker having a chance to dead code eliminate unused varyings.
@@ -150,7 +163,7 @@ struct intel_vue_map {
     * This means that we have to use a fixed slot layout, based on the output's
     * location field, rather than assigning slots in a compact contiguous block.
     */
-   bool separate;
+   enum intel_vue_layout layout;
 
    /**
     * Map from gl_varying_slot value to VUE slot.  For gl_varying_slots that are
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 3ff44b435f8..44629946981 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -316,20 +316,23 @@ anv_get_robust_flags(const struct vk_pipeline_robustness_state *rstate)
 
 static void
 populate_base_prog_key(struct anv_pipeline_stage *stage,
-                       const struct anv_device *device)
+                       const struct anv_device *device,
+                       const enum intel_vue_layout vue_layout)
 {
    stage->key.base.robust_flags = anv_get_robust_flags(&stage->rstate);
+   stage->key.base.vue_layout = vue_layout;
    stage->key.base.limit_trig_input_range =
       device->physical->instance->limit_trig_input_range;
 }
 
 static void
 populate_vs_prog_key(struct anv_pipeline_stage *stage,
-                     const struct anv_device *device)
+                     const struct anv_device *device,
+                     const enum intel_vue_layout vue_layout)
 {
    memset(&stage->key, 0, sizeof(stage->key));
 
-   populate_base_prog_key(stage, device);
+   populate_base_prog_key(stage, device, vue_layout);
 
    stage->key.vs.vf_component_packing =
       device->physical->instance->vf_component_packing;
@@ -338,31 +341,34 @@ populate_vs_prog_key(struct anv_pipeline_stage *stage,
 static void
 populate_tcs_prog_key(struct anv_pipeline_stage *stage,
                       const struct anv_device *device,
-                      unsigned input_vertices)
+                      unsigned input_vertices,
+                      const enum intel_vue_layout vue_layout)
 {
    memset(&stage->key, 0, sizeof(stage->key));
 
-   populate_base_prog_key(stage, device);
+   populate_base_prog_key(stage, device, vue_layout);
 
    stage->key.tcs.input_vertices = input_vertices;
 }
 
 static void
 populate_tes_prog_key(struct anv_pipeline_stage *stage,
-                      const struct anv_device *device)
+                      const struct anv_device *device,
+                      const enum intel_vue_layout vue_layout)
 {
    memset(&stage->key, 0, sizeof(stage->key));
 
-   populate_base_prog_key(stage, device);
+   populate_base_prog_key(stage, device, vue_layout);
 }
 
 static void
 populate_gs_prog_key(struct anv_pipeline_stage *stage,
-                     const struct anv_device *device)
+                     const struct anv_device *device,
+                     const enum intel_vue_layout vue_layout)
 {
    memset(&stage->key, 0, sizeof(stage->key));
 
-   populate_base_prog_key(stage, device);
+   populate_base_prog_key(stage, device, vue_layout);
 }
 
 static bool
@@ -424,18 +430,19 @@ populate_task_prog_key(struct anv_pipeline_stage *stage,
 {
    memset(&stage->key, 0, sizeof(stage->key));
 
-   populate_base_prog_key(stage, device);
+   populate_base_prog_key(stage, device, INTEL_VUE_LAYOUT_FIXED);
 
    stage->key.base.uses_inline_push_addr = true;
 }
 
 static void
 populate_mesh_prog_key(struct anv_pipeline_stage *stage,
-                       const struct anv_device *device)
+                       const struct anv_device *device,
+                       const enum intel_vue_layout vue_layout)
 {
    memset(&stage->key, 0, sizeof(stage->key));
 
-   populate_base_prog_key(stage, device);
+   populate_base_prog_key(stage, device, vue_layout);
 
    stage->key.base.uses_inline_push_addr = true;
 }
@@ -462,13 +469,14 @@ populate_wm_prog_key(struct anv_pipeline_stage *stage,
                      const struct vk_multisample_state *ms,
                      const struct vk_fragment_shading_rate_state *fsr,
                      const struct vk_render_pass_state *rp,
-                     const enum intel_sometimes is_mesh)
+                     const enum intel_sometimes is_mesh,
+                     const enum intel_vue_layout vue_layout)
 {
    const struct anv_device *device = pipeline->base.device;
 
    memset(&stage->key, 0, sizeof(stage->key));
 
-   populate_base_prog_key(stage, device);
+   populate_base_prog_key(stage, device, vue_layout);
 
    struct brw_wm_prog_key *key = &stage->key.wm;
 
@@ -553,7 +561,7 @@ populate_cs_prog_key(struct anv_pipeline_stage *stage,
 {
    memset(&stage->key, 0, sizeof(stage->key));
 
-   populate_base_prog_key(stage, device);
+   populate_base_prog_key(stage, device, INTEL_VUE_LAYOUT_FIXED);
 
    stage->key.base.uses_inline_push_addr = device->info->verx10 >= 125;
 }
@@ -565,7 +573,7 @@ populate_bs_prog_key(struct anv_pipeline_stage *stage,
 {
    memset(&stage->key, 0, sizeof(stage->key));
 
-   populate_base_prog_key(stage, device);
+   populate_base_prog_key(stage, device, INTEL_VUE_LAYOUT_FIXED);
 
    stage->key.bs.pipeline_ray_flags = ray_flags;
    stage->key.bs.pipeline_ray_flags = ray_flags;
@@ -1159,7 +1167,7 @@ anv_pipeline_compile_vs(const struct brw_compiler *compiler,
    brw_compute_vue_map(compiler->devinfo,
                        &vs_stage->prog_data.vs.base.vue_map,
                        vs_stage->nir->info.outputs_written,
-                       vs_stage->nir->info.separate_shader,
+                       vs_stage->key.base.vue_layout,
                        pos_slots);
 
    vs_stage->num_stats = 1;
@@ -1335,7 +1343,7 @@ anv_pipeline_compile_gs(const struct brw_compiler *compiler,
    brw_compute_vue_map(compiler->devinfo,
                        &gs_stage->prog_data.gs.base.vue_map,
                        gs_stage->nir->info.outputs_written,
-                       gs_stage->nir->info.separate_shader, 1);
+                       gs_stage->key.base.vue_layout, 1);
 
    gs_stage->num_stats = 1;
 
@@ -1522,7 +1530,7 @@ anv_pipeline_compile_fs(const struct brw_compiler *compiler,
       brw_compute_vue_map(compiler->devinfo,
                           &prev_vue_map,
                           fs_stage->nir->info.inputs_read,
-                          fs_stage->nir->info.separate_shader,
+                          fs_stage->key.base.vue_layout,
                           pos_slots);
 
       fs_stage->key.wm.input_slots_valid = prev_vue_map.slots_valid;
@@ -1742,6 +1750,16 @@ anv_graphics_pipeline_init_keys(struct anv_graphics_base_pipeline *pipeline,
                                 const struct vk_graphics_pipeline_state *state,
                                 struct anv_pipeline_stage *stages)
 {
+   struct anv_device *device = pipeline->base.device;
+   enum intel_vue_layout vue_layout;
+
+   if ((pipeline->base.flags & VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT) ||
+       !device->vk.enabled_extensions.EXT_graphics_pipeline_library) {
+      vue_layout = INTEL_VUE_LAYOUT_FIXED;
+   } else {
+      vue_layout = INTEL_VUE_LAYOUT_SEPARATE;
+   }
+
    for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
       if (!anv_pipeline_base_has_stage(pipeline, s))
          continue;
@@ -1751,20 +1769,21 @@ anv_graphics_pipeline_init_keys(struct anv_graphics_base_pipeline *pipeline,
       const struct anv_device *device = pipeline->base.device;
       switch (stages[s].stage) {
       case MESA_SHADER_VERTEX:
-         populate_vs_prog_key(&stages[s], device);
+         populate_vs_prog_key(&stages[s], device, vue_layout);
          break;
       case MESA_SHADER_TESS_CTRL:
          populate_tcs_prog_key(&stages[s],
                                device,
                                BITSET_TEST(state->dynamic,
                                            MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS) ?
-                               0 : state->ts->patch_control_points);
+                               0 : state->ts->patch_control_points,
+                               vue_layout);
          break;
       case MESA_SHADER_TESS_EVAL:
-         populate_tes_prog_key(&stages[s], device);
+         populate_tes_prog_key(&stages[s], device, vue_layout);
          break;
       case MESA_SHADER_GEOMETRY:
-         populate_gs_prog_key(&stages[s], device);
+         populate_gs_prog_key(&stages[s], device, vue_layout);
          break;
       case MESA_SHADER_FRAGMENT: {
          /* Assume rasterization enabled in any of the following case :
@@ -1794,7 +1813,8 @@ anv_graphics_pipeline_init_keys(struct anv_graphics_base_pipeline *pipeline,
                               pipeline,
                               state->dynamic,
                               raster_enabled ? state->ms : NULL,
-                              state->fsr, state->rp, is_mesh);
+                              state->fsr, state->rp, is_mesh,
+                              vue_layout);
          break;
       }
 
@@ -1803,7 +1823,7 @@ anv_graphics_pipeline_init_keys(struct anv_graphics_base_pipeline *pipeline,
          break;
 
       case MESA_SHADER_MESH: {
-         populate_mesh_prog_key(&stages[s], device);
+         populate_mesh_prog_key(&stages[s], device, vue_layout);
          break;
       }
 
@@ -2019,8 +2039,11 @@ anv_pipeline_nir_preprocess(struct anv_pipeline *pipeline,
    };
    NIR_PASS(_, stage->nir, nir_opt_access, &opt_access_options);
 
-   /* Vulkan uses the separate-shader linking model */
-   stage->nir->info.separate_shader = true;
+   /* Use a separate-shader linking model for pipeline libraries, we do cross
+    * stage linking otherwise.
+    */
+   stage->nir->info.separate_shader =
+      stage->key.base.vue_layout != INTEL_VUE_LAYOUT_FIXED;
 
    struct brw_nir_compiler_opts opts = {
       .softfp64 = device->fp64_nir,