From becb014d2747ecf455a6a3ac0c38be236e2da53a Mon Sep 17 00:00:00 2001
From: Benjamin Lee <benjamin.lee@collabora.com>
Date: Sat, 19 Oct 2024 21:03:13 -0700
Subject: [PATCH] nir: treat per-view outputs as arrayed IO

This is needed for implementing multiview in panvk, where the address
calculation for multiview outputs is not well-represented by lowering to
nir_intrinsic_store_output with a single offset.

The case where a variable is both per-view and per-{vertex,primitive} is
now unsupported. This would come up with drivers implementing
NV_mesh_shader or using nir_lower_multiview on geometry, tessellation,
or mesh shaders. No drivers currently do either of these. There was some
code that attempted to handle the nested per-view case by unwrapping
per-view/arrayed types twice, but it's unclear to what extent this
actually worked.

ANV and Turnip both rely on per-view outputs being assigned a unique
driver location for each view, so I've added on option to configure that
behavior rather than removing it.

Signed-off-by: Benjamin Lee <benjamin.lee@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31704>
---
 src/compiler/glsl/gl_nir_link_varyings.c      | 10 ++--
 .../glsl/gl_nir_lower_packed_varyings.c       |  2 +-
 src/compiler/nir/nir.h                        |  9 +++
 src/compiler/nir/nir_divergence_analysis.c    |  6 ++
 src/compiler/nir/nir_gather_info.c            |  7 +--
 src/compiler/nir/nir_intrinsics.py            |  7 +++
 src/compiler/nir/nir_linking_helpers.c        | 32 +++++-----
 .../nir/nir_lower_clamp_color_outputs.c       |  1 +
 src/compiler/nir/nir_lower_clip.c             |  1 +
 .../nir/nir_lower_clip_cull_distance_arrays.c |  5 --
 src/compiler/nir/nir_lower_clip_disable.c     |  1 +
 src/compiler/nir/nir_lower_io.c               | 37 ++++++++++--
 src/compiler/nir/nir_lower_io_to_scalar.c     |  2 +
 src/compiler/nir/nir_lower_mediump.c          |  2 +
 src/compiler/nir/nir_lower_point_size.c       |  3 +-
 src/compiler/nir/nir_lower_point_size_mov.c   |  1 +
 src/compiler/nir/nir_lower_wrmasks.c          |  2 +
 .../nir/nir_move_vec_src_uses_to_dest.c       |  3 +-
 src/compiler/nir/nir_opt_dead_cf.c            |  1 +
 src/compiler/nir/nir_opt_shrink_stores.c      |  1 +
 src/compiler/nir/nir_opt_undef.c              |  1 +
 src/compiler/nir/nir_opt_varyings.c           |  3 +
 src/compiler/nir/nir_opt_vectorize_io.c       |  2 +
 src/compiler/nir/nir_print.c                  |  5 +-
 src/compiler/nir/nir_validate.c               |  6 +-
 src/freedreno/ir3/ir3_compiler.c              |  2 +
 src/freedreno/ir3/ir3_compiler_nir.c          | 30 +++++-----
 src/freedreno/ir3/ir3_nir.c                   |  3 +-
 src/freedreno/ir3/ir3_nir.h                   |  1 +
 src/freedreno/ir3/ir3_nir_lower_64b.c         |  2 +
 src/intel/compiler/brw_compiler.c             |  1 +
 src/intel/compiler/brw_nir.c                  | 59 ++++++++++++++++++-
 src/intel/compiler/elk/elk_nir.c              |  2 +-
 src/intel/vulkan/anv_mesh_perprim_wa.c        |  2 +-
 .../compiler/bi_lower_divergent_indirects.c   |  6 ++
 35 files changed, 191 insertions(+), 67 deletions(-)

diff --git a/src/compiler/glsl/gl_nir_link_varyings.c b/src/compiler/glsl/gl_nir_link_varyings.c
index 36e370a72ab..42d70e96d43 100644
--- a/src/compiler/glsl/gl_nir_link_varyings.c
+++ b/src/compiler/glsl/gl_nir_link_varyings.c
@@ -77,7 +77,7 @@ static const struct glsl_type *
 get_varying_type(const nir_variable *var, gl_shader_stage stage)
 {
    const struct glsl_type *type = var->type;
-   if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
+   if (nir_is_arrayed_io(var, stage)) {
       assert(glsl_type_is_array(type));
       type = glsl_get_array_element(type);
    }
@@ -3387,7 +3387,7 @@ set_variable_io_mask(BITSET_WORD *bits, nir_variable *var, gl_shader_stage stage
    assert(var->data.location >= VARYING_SLOT_VAR0);
 
    const struct glsl_type *type = var->type;
-   if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
+   if (nir_is_arrayed_io(var, stage)) {
       assert(glsl_type_is_array(type));
       type = glsl_get_array_element(type);
    }
@@ -3536,7 +3536,7 @@ remove_unused_io_vars(nir_shader *producer, nir_shader *consumer,
          unsigned location = var->data.location - VARYING_SLOT_VAR0;
 
          const struct glsl_type *type = var->type;
-         if (nir_is_arrayed_io(var, shader->info.stage) || var->data.per_view) {
+         if (nir_is_arrayed_io(var, shader->info.stage)) {
             assert(glsl_type_is_array(type));
             type = glsl_get_array_element(type);
          }
@@ -3606,7 +3606,7 @@ remove_unused_varyings(nir_shader *producer, nir_shader *consumer,
          continue;
 
       const struct glsl_type *type = var->type;
-      if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
+      if (nir_is_arrayed_io(var, producer->info.stage)) {
          assert(glsl_type_is_array(type));
          type = glsl_get_array_element(type);
       }
@@ -3622,7 +3622,7 @@ remove_unused_varyings(nir_shader *producer, nir_shader *consumer,
          continue;
 
       const struct glsl_type *type = var->type;
-      if (nir_is_arrayed_io(var, consumer->info.stage) || var->data.per_view) {
+      if (nir_is_arrayed_io(var, consumer->info.stage)) {
          assert(glsl_type_is_array(type));
          type = glsl_get_array_element(type);
       }
diff --git a/src/compiler/glsl/gl_nir_lower_packed_varyings.c b/src/compiler/glsl/gl_nir_lower_packed_varyings.c
index 1bec7ee59ca..928e92e10d1 100644
--- a/src/compiler/glsl/gl_nir_lower_packed_varyings.c
+++ b/src/compiler/glsl/gl_nir_lower_packed_varyings.c
@@ -222,7 +222,7 @@ lower_packed_varying_needs_lowering(nir_shader *shader, nir_variable *var,
       return false;
 
    const struct glsl_type *type = var->type;
-   if (nir_is_arrayed_io(var, shader->info.stage) || var->data.per_view) {
+   if (nir_is_arrayed_io(var, shader->info.stage)) {
       assert(glsl_type_is_array(type));
       type = glsl_get_array_element(type);
    }
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 89a96122230..5e9e5ef069d 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -4377,6 +4377,15 @@ typedef struct nir_shader_compiler_options {
    /** Whether derivative intrinsics must be scalarized. */
    bool scalarize_ddx;
 
+   /**
+    * Assign a range of driver locations to per-view outputs, with unique
+    * slots for each view. If unset, per-view outputs will be treated
+    * similarly to other arrayed IO, and only slots for one view will be
+    * assigned. Regardless of this setting, per-view outputs are only assigned
+    * slots for one value in var->data.location.
+    */
+   bool per_view_unique_driver_locations;
+
    /** Options determining lowering and behavior of inputs and outputs. */
    nir_io_options io_options;
 
diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c
index 2e556007174..fe8f266ad58 100644
--- a/src/compiler/nir/nir_divergence_analysis.c
+++ b/src/compiler/nir/nir_divergence_analysis.c
@@ -418,6 +418,12 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
          unreachable("Invalid stage for load_output");
       }
       break;
+   case nir_intrinsic_load_per_view_output:
+      is_divergent = instr->src[0].ssa->divergent ||
+                     instr->src[1].ssa->divergent ||
+                     (stage == MESA_SHADER_TESS_CTRL &&
+                      !(options & nir_divergence_single_patch_per_tcs_subgroup));
+      break;
    case nir_intrinsic_load_per_vertex_output:
       /* TCS and NV_mesh_shader only (EXT_mesh_shader does not allow loading outputs). */
       assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH);
diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c
index a8621ab4a3f..b6ecba36ecb 100644
--- a/src/compiler/nir/nir_gather_info.c
+++ b/src/compiler/nir/nir_gather_info.c
@@ -221,11 +221,6 @@ mark_whole_variable(nir_shader *shader, nir_variable *var,
       type = glsl_get_array_element(type);
    }
 
-   if (var->data.per_view) {
-      assert(glsl_type_is_array(type));
-      type = glsl_get_array_element(type);
-   }
-
    const unsigned slots = nir_variable_count_slots(var, type);
    set_io_mask(shader, var, 0, slots, deref, is_output_read);
 }
@@ -578,6 +573,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,
 
    case nir_intrinsic_load_output:
    case nir_intrinsic_load_per_vertex_output:
+   case nir_intrinsic_load_per_view_output:
    case nir_intrinsic_load_per_primitive_output:
       if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
           instr->intrinsic == nir_intrinsic_load_output &&
@@ -613,6 +609,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,
 
    case nir_intrinsic_store_output:
    case nir_intrinsic_store_per_vertex_output:
+   case nir_intrinsic_store_per_view_output:
    case nir_intrinsic_store_per_primitive_output:
       if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
           instr->intrinsic == nir_intrinsic_store_output &&
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index d664753095c..0253c953d09 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -1182,6 +1182,11 @@ load("ssbo_address", [1, 1], [], [CAN_ELIMINATE, CAN_REORDER])
 load("output", [1], [BASE, RANGE, COMPONENT, DEST_TYPE, IO_SEMANTICS], flags=[CAN_ELIMINATE])
 # src[] = { vertex, offset }.
 load("per_vertex_output", [1, 1], [BASE, RANGE, COMPONENT, DEST_TYPE, IO_SEMANTICS], [CAN_ELIMINATE])
+# src[] = { view_index, offset }.
+# view_index is "compacted", meaning it is the index of the Nth *enabled* view,
+# not the Nth absolute view. See the nir_lower_multiview docs for a more
+# detailed explanation.
+load("per_view_output", [1, 1], [BASE, RANGE, COMPONENT, DEST_TYPE, IO_SEMANTICS], [CAN_ELIMINATE])
 # src[] = { primitive, offset }.
 load("per_primitive_output", [1, 1], [BASE, COMPONENT, DEST_TYPE, IO_SEMANTICS], [CAN_ELIMINATE])
 # src[] = { offset }.
@@ -1223,6 +1228,8 @@ def store(name, srcs, indices=[], flags=[]):
 store("output", [1], [BASE, RANGE, WRITE_MASK, COMPONENT, SRC_TYPE, IO_SEMANTICS, IO_XFB, IO_XFB2])
 # src[] = { value, vertex, offset }.
 store("per_vertex_output", [1, 1], [BASE, RANGE, WRITE_MASK, COMPONENT, SRC_TYPE, IO_SEMANTICS])
+# src[] = { value, view_index, offset }.
+store("per_view_output", [1, 1], [BASE, RANGE, WRITE_MASK, COMPONENT, SRC_TYPE, IO_SEMANTICS])
 # src[] = { value, primitive, offset }.
 store("per_primitive_output", [1, 1], [BASE, RANGE, WRITE_MASK, COMPONENT, SRC_TYPE, IO_SEMANTICS])
 # src[] = { value, block_index, offset }
diff --git a/src/compiler/nir/nir_linking_helpers.c b/src/compiler/nir/nir_linking_helpers.c
index 4b84369b1a0..b9fdea68eca 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -49,7 +49,7 @@ get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
    assert(location < 64);
 
    const struct glsl_type *type = var->type;
-   if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
+   if (nir_is_arrayed_io(var, stage)) {
       assert(glsl_type_is_array(type));
       type = glsl_get_array_element(type);
    }
@@ -337,7 +337,7 @@ get_unmoveable_components_masks(nir_shader *shader,
           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 
          const struct glsl_type *type = var->type;
-         if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
+         if (nir_is_arrayed_io(var, stage)) {
             assert(glsl_type_is_array(type));
             type = glsl_get_array_element(type);
          }
@@ -438,7 +438,7 @@ remap_slots_and_components(nir_shader *shader, nir_variable_mode mode,
           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
 
          const struct glsl_type *type = var->type;
-         if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
+         if (nir_is_arrayed_io(var, stage)) {
             assert(glsl_type_is_array(type));
             type = glsl_get_array_element(type);
          }
@@ -578,7 +578,7 @@ gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
             continue;
 
          const struct glsl_type *type = var->type;
-         if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
+         if (nir_is_arrayed_io(var, producer->info.stage)) {
             assert(glsl_type_is_array(type));
             type = glsl_get_array_element(type);
          }
@@ -641,8 +641,7 @@ gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
 
          if (!vc_info->initialised) {
             const struct glsl_type *type = in_var->type;
-            if (nir_is_arrayed_io(in_var, consumer->info.stage) ||
-                in_var->data.per_view) {
+            if (nir_is_arrayed_io(in_var, consumer->info.stage)) {
                assert(glsl_type_is_array(type));
                type = glsl_get_array_element(type);
             }
@@ -1539,18 +1538,17 @@ nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
             last_partial = false;
          }
 
-         /* per-view variables have an extra array dimension, which is ignored
-          * when counting user-facing slots (var->data.location), but *not*
-          * with driver slots (var->data.driver_location). That is, each user
-          * slot maps to multiple driver slots.
-          */
-         driver_size = glsl_count_attribute_slots(type, false);
-         if (var->data.per_view) {
-            assert(glsl_type_is_array(type));
-            var_size =
-               glsl_count_attribute_slots(glsl_get_array_element(type), false);
+         var_size = glsl_count_attribute_slots(type, false);
+         if (var->data.per_view &&
+             shader->options->per_view_unique_driver_locations) {
+            /* per-view variables have an extra array dimension, which is
+             * ignored when counting user-facing slots (var->data.location),
+             * but *not* with driver slots (var->data.driver_location). That
+             * is, each user slot maps to multiple driver slots. */
+            const struct glsl_type *array_type = var->type;
+            driver_size = glsl_count_attribute_slots(array_type, false);
          } else {
-            var_size = driver_size;
+            driver_size = var_size;
          }
       }
 
diff --git a/src/compiler/nir/nir_lower_clamp_color_outputs.c b/src/compiler/nir/nir_lower_clamp_color_outputs.c
index 17afa82716d..a0a7db9d57b 100644
--- a/src/compiler/nir/nir_lower_clamp_color_outputs.c
+++ b/src/compiler/nir/nir_lower_clamp_color_outputs.c
@@ -64,6 +64,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, nir_shader *shader)
       loc = out->data.location;
       break;
    case nir_intrinsic_store_output:
+   case nir_intrinsic_store_per_view_output:
       loc = nir_intrinsic_io_semantics(intr).location;
       break;
    default:
diff --git a/src/compiler/nir/nir_lower_clip.c b/src/compiler/nir/nir_lower_clip.c
index f7823a69bb7..e105929953d 100644
--- a/src/compiler/nir/nir_lower_clip.c
+++ b/src/compiler/nir/nir_lower_clip.c
@@ -165,6 +165,7 @@ find_output(nir_builder *b, unsigned location)
 
             if ((intr->intrinsic == nir_intrinsic_store_output ||
                  intr->intrinsic == nir_intrinsic_store_per_vertex_output ||
+                 intr->intrinsic == nir_intrinsic_store_per_view_output ||
                  intr->intrinsic == nir_intrinsic_store_per_primitive_output) &&
                 nir_intrinsic_io_semantics(intr).location == location) {
                assert(nir_src_is_const(*nir_get_io_offset_src(intr)));
diff --git a/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c b/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c
index fd13145e934..1a12c2903c0 100644
--- a/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c
+++ b/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c
@@ -105,11 +105,6 @@ get_unwrapped_array_length(nir_shader *nir, nir_variable *var)
    if (nir_is_arrayed_io(var, nir->info.stage))
       type = glsl_get_array_element(type);
 
-   if (var->data.per_view) {
-      assert(glsl_type_is_array(type));
-      type = glsl_get_array_element(type);
-   }
-
    assert(glsl_type_is_array(type));
 
    return glsl_get_length(type);
diff --git a/src/compiler/nir/nir_lower_clip_disable.c b/src/compiler/nir/nir_lower_clip_disable.c
index beeb00400a4..bf705cc9f04 100644
--- a/src/compiler/nir/nir_lower_clip_disable.c
+++ b/src/compiler/nir/nir_lower_clip_disable.c
@@ -128,6 +128,7 @@ lower_clip_plane_store_io(nir_builder *b, nir_intrinsic_instr *intr,
    case nir_intrinsic_store_output:
    case nir_intrinsic_store_per_primitive_output:
    case nir_intrinsic_store_per_vertex_output:
+   case nir_intrinsic_store_per_view_output:
       break;
    default:
       return false;
diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index 1313a15855d..0c5732d5f69 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -148,6 +148,14 @@ nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage)
    if (var->data.patch || !glsl_type_is_array(var->type))
       return false;
 
+   if (var->data.per_view) {
+      /* Nested arrayed outputs (both per-view and per-{vertex,primitive}) are
+       * unsupported. */
+      assert(stage == MESA_SHADER_VERTEX);
+      assert(var->data.mode == nir_var_shader_out);
+      return true;
+   }
+
    if (stage == MESA_SHADER_MESH) {
       /* NV_mesh_shader: this is flat array for the whole workgroup. */
       if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES)
@@ -352,8 +360,14 @@ emit_load(struct lower_io_state *state,
       }
       break;
    case nir_var_shader_out:
-      op = !array_index ? nir_intrinsic_load_output : var->data.per_primitive ? nir_intrinsic_load_per_primitive_output
-                                                                              : nir_intrinsic_load_per_vertex_output;
+      if (!array_index)
+         op = nir_intrinsic_load_output;
+      else if (var->data.per_primitive)
+         op = nir_intrinsic_load_per_primitive_output;
+      else if (var->data.per_view)
+         op = nir_intrinsic_load_per_view_output;
+      else
+         op = nir_intrinsic_load_per_vertex_output;
       break;
    case nir_var_uniform:
       op = nir_intrinsic_load_uniform;
@@ -495,9 +509,15 @@ emit_store(struct lower_io_state *state, nir_def *data,
    nir_builder *b = &state->builder;
 
    assert(var->data.mode == nir_var_shader_out);
-   nir_intrinsic_op op =
-      !array_index ? nir_intrinsic_store_output : var->data.per_primitive ? nir_intrinsic_store_per_primitive_output
-                                                                          : nir_intrinsic_store_per_vertex_output;
+   nir_intrinsic_op op;
+   if (!array_index)
+      op = nir_intrinsic_store_output;
+   else if (var->data.per_view)
+      op = nir_intrinsic_store_per_view_output;
+   else if (var->data.per_primitive)
+      op = nir_intrinsic_store_per_primitive_output;
+   else
+      op = nir_intrinsic_store_per_vertex_output;
 
    nir_intrinsic_instr *store =
       nir_intrinsic_instr_create(state->builder.shader, op);
@@ -2806,6 +2826,7 @@ nir_get_io_offset_src_number(const nir_intrinsic_instr *instr)
    case nir_intrinsic_load_input_vertex:
    case nir_intrinsic_load_per_vertex_input:
    case nir_intrinsic_load_per_vertex_output:
+   case nir_intrinsic_load_per_view_output:
    case nir_intrinsic_load_per_primitive_output:
    case nir_intrinsic_load_interpolated_input:
    case nir_intrinsic_load_smem_amd:
@@ -2823,6 +2844,7 @@ nir_get_io_offset_src_number(const nir_intrinsic_instr *instr)
       return 1;
    case nir_intrinsic_store_ssbo:
    case nir_intrinsic_store_per_vertex_output:
+   case nir_intrinsic_store_per_view_output:
    case nir_intrinsic_store_per_primitive_output:
       return 2;
    default:
@@ -2849,9 +2871,11 @@ nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr *instr)
    switch (instr->intrinsic) {
    case nir_intrinsic_load_per_vertex_input:
    case nir_intrinsic_load_per_vertex_output:
+   case nir_intrinsic_load_per_view_output:
    case nir_intrinsic_load_per_primitive_output:
       return 0;
    case nir_intrinsic_store_per_vertex_output:
+   case nir_intrinsic_store_per_view_output:
    case nir_intrinsic_store_per_primitive_output:
       return 1;
    default:
@@ -2992,9 +3016,11 @@ is_output(nir_intrinsic_instr *intrin)
 {
    return intrin->intrinsic == nir_intrinsic_load_output ||
           intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
+          intrin->intrinsic == nir_intrinsic_load_per_view_output ||
           intrin->intrinsic == nir_intrinsic_load_per_primitive_output ||
           intrin->intrinsic == nir_intrinsic_store_output ||
           intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
+          intrin->intrinsic == nir_intrinsic_store_per_view_output ||
           intrin->intrinsic == nir_intrinsic_store_per_primitive_output;
 }
 
@@ -3003,6 +3029,7 @@ is_dual_slot(nir_intrinsic_instr *intrin)
 {
    if (intrin->intrinsic == nir_intrinsic_store_output ||
        intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
+       intrin->intrinsic == nir_intrinsic_store_per_view_output ||
        intrin->intrinsic == nir_intrinsic_store_per_primitive_output) {
       return nir_src_bit_size(intrin->src[0]) == 64 &&
              nir_src_num_components(intrin->src[0]) >= 3;
diff --git a/src/compiler/nir/nir_lower_io_to_scalar.c b/src/compiler/nir/nir_lower_io_to_scalar.c
index fe28722bd50..d28d4667372 100644
--- a/src/compiler/nir/nir_lower_io_to_scalar.c
+++ b/src/compiler/nir/nir_lower_io_to_scalar.c
@@ -290,6 +290,7 @@ nir_lower_io_to_scalar_instr(nir_builder *b, nir_instr *instr, void *data)
 
    if ((intr->intrinsic == nir_intrinsic_load_output ||
         intr->intrinsic == nir_intrinsic_load_per_vertex_output ||
+        intr->intrinsic == nir_intrinsic_load_per_view_output ||
         intr->intrinsic == nir_intrinsic_load_per_primitive_output) &&
        (state->mask & nir_var_shader_out) &&
        (!state->filter || state->filter(instr, state->filter_data))) {
@@ -308,6 +309,7 @@ nir_lower_io_to_scalar_instr(nir_builder *b, nir_instr *instr, void *data)
 
    if ((intr->intrinsic == nir_intrinsic_store_output ||
         intr->intrinsic == nir_intrinsic_store_per_vertex_output ||
+        intr->intrinsic == nir_intrinsic_store_per_view_output ||
         intr->intrinsic == nir_intrinsic_store_per_primitive_output) &&
        state->mask & nir_var_shader_out &&
        (!state->filter || state->filter(instr, state->filter_data))) {
diff --git a/src/compiler/nir/nir_lower_mediump.c b/src/compiler/nir/nir_lower_mediump.c
index 176e2d1e1b0..2114007e719 100644
--- a/src/compiler/nir/nir_lower_mediump.c
+++ b/src/compiler/nir/nir_lower_mediump.c
@@ -47,8 +47,10 @@ get_io_intrinsic(nir_instr *instr, nir_variable_mode modes,
       return modes & nir_var_shader_in ? intr : NULL;
    case nir_intrinsic_load_output:
    case nir_intrinsic_load_per_vertex_output:
+   case nir_intrinsic_load_per_view_output:
    case nir_intrinsic_store_output:
    case nir_intrinsic_store_per_vertex_output:
+   case nir_intrinsic_store_per_view_output:
       *out_mode = nir_var_shader_out;
       return modes & nir_var_shader_out ? intr : NULL;
    default:
diff --git a/src/compiler/nir/nir_lower_point_size.c b/src/compiler/nir/nir_lower_point_size.c
index f91df81d130..b6fdc17f1e4 100644
--- a/src/compiler/nir/nir_lower_point_size.c
+++ b/src/compiler/nir/nir_lower_point_size.c
@@ -45,7 +45,8 @@ lower_point_size_intrin(nir_builder *b, nir_intrinsic_instr *intr, void *data)
       nir_variable *var = nir_deref_instr_get_variable(deref);
       location = var->data.location;
       psiz_src = &intr->src[1];
-   } else if (intr->intrinsic == nir_intrinsic_store_output) {
+   } else if (intr->intrinsic == nir_intrinsic_store_output ||
+              intr->intrinsic == nir_intrinsic_store_per_view_output) {
       location = nir_intrinsic_io_semantics(intr).location;
       psiz_src = &intr->src[0];
    }
diff --git a/src/compiler/nir/nir_lower_point_size_mov.c b/src/compiler/nir/nir_lower_point_size_mov.c
index a951996396b..f199329c413 100644
--- a/src/compiler/nir/nir_lower_point_size_mov.c
+++ b/src/compiler/nir/nir_lower_point_size_mov.c
@@ -69,6 +69,7 @@ lower_point_size_mov(nir_builder *b, nir_intrinsic_instr *intr, void *data)
    switch (intr->intrinsic) {
    case nir_intrinsic_store_output:
    case nir_intrinsic_store_per_vertex_output:
+   case nir_intrinsic_store_per_view_output:
    case nir_intrinsic_store_per_primitive_output: {
       nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
       if (sem.location != VARYING_SLOT_PSIZ)
diff --git a/src/compiler/nir/nir_lower_wrmasks.c b/src/compiler/nir/nir_lower_wrmasks.c
index a0ff9df1026..605611c6e33 100644
--- a/src/compiler/nir/nir_lower_wrmasks.c
+++ b/src/compiler/nir/nir_lower_wrmasks.c
@@ -64,6 +64,7 @@ value_src(nir_intrinsic_op intrinsic)
    switch (intrinsic) {
    case nir_intrinsic_store_output:
    case nir_intrinsic_store_per_vertex_output:
+   case nir_intrinsic_store_per_view_output:
    case nir_intrinsic_store_ssbo:
    case nir_intrinsic_store_shared:
    case nir_intrinsic_store_global:
@@ -84,6 +85,7 @@ offset_src(nir_intrinsic_op intrinsic)
    case nir_intrinsic_store_scratch:
       return 1;
    case nir_intrinsic_store_per_vertex_output:
+   case nir_intrinsic_store_per_view_output:
    case nir_intrinsic_store_ssbo:
       return 2;
    default:
diff --git a/src/compiler/nir/nir_move_vec_src_uses_to_dest.c b/src/compiler/nir/nir_move_vec_src_uses_to_dest.c
index fc62361cade..c85aa19fc2a 100644
--- a/src/compiler/nir/nir_move_vec_src_uses_to_dest.c
+++ b/src/compiler/nir/nir_move_vec_src_uses_to_dest.c
@@ -85,7 +85,8 @@ move_vec_src_uses_to_dest_block(nir_block *block, bool skip_const_srcs)
          nir_instr *use_instr = nir_src_parent_instr(src);
          if (use_instr->type == nir_instr_type_intrinsic) {
             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(use_instr);
-            if (intr->intrinsic == nir_intrinsic_store_output)
+            if (intr->intrinsic == nir_intrinsic_store_output ||
+                intr->intrinsic == nir_intrinsic_store_per_view_output)
                return false;
          }
       }
diff --git a/src/compiler/nir/nir_opt_dead_cf.c b/src/compiler/nir/nir_opt_dead_cf.c
index cb260c7ce56..592491fec05 100644
--- a/src/compiler/nir/nir_opt_dead_cf.c
+++ b/src/compiler/nir/nir_opt_dead_cf.c
@@ -241,6 +241,7 @@ node_is_dead(nir_cf_node *node)
             case nir_intrinsic_load_shared2_amd:
             case nir_intrinsic_load_output:
             case nir_intrinsic_load_per_vertex_output:
+            case nir_intrinsic_load_per_view_output:
                /* Same as above loads. */
                return false;
 
diff --git a/src/compiler/nir/nir_opt_shrink_stores.c b/src/compiler/nir/nir_opt_shrink_stores.c
index a39e60a2015..027834e1f92 100644
--- a/src/compiler/nir/nir_opt_shrink_stores.c
+++ b/src/compiler/nir/nir_opt_shrink_stores.c
@@ -63,6 +63,7 @@ opt_shrink_store_instr(nir_builder *b, nir_intrinsic_instr *instr, bool shrink_i
    switch (instr->intrinsic) {
    case nir_intrinsic_store_output:
    case nir_intrinsic_store_per_vertex_output:
+   case nir_intrinsic_store_per_view_output:
    case nir_intrinsic_store_ssbo:
    case nir_intrinsic_store_shared:
    case nir_intrinsic_store_global:
diff --git a/src/compiler/nir/nir_opt_undef.c b/src/compiler/nir/nir_opt_undef.c
index 613a80ec685..4114def55cc 100644
--- a/src/compiler/nir/nir_opt_undef.c
+++ b/src/compiler/nir/nir_opt_undef.c
@@ -149,6 +149,7 @@ opt_undef_store(nir_intrinsic_instr *intrin)
       break;
    case nir_intrinsic_store_output:
    case nir_intrinsic_store_per_vertex_output:
+   case nir_intrinsic_store_per_view_output:
    case nir_intrinsic_store_per_primitive_output:
    case nir_intrinsic_store_ssbo:
    case nir_intrinsic_store_shared:
diff --git a/src/compiler/nir/nir_opt_varyings.c b/src/compiler/nir/nir_opt_varyings.c
index 243178b7de9..74f69b8073d 100644
--- a/src/compiler/nir/nir_opt_varyings.c
+++ b/src/compiler/nir/nir_opt_varyings.c
@@ -1495,14 +1495,17 @@ gather_outputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_
    if (intr->intrinsic != nir_intrinsic_store_output &&
        intr->intrinsic != nir_intrinsic_load_output &&
        intr->intrinsic != nir_intrinsic_store_per_vertex_output &&
+       intr->intrinsic != nir_intrinsic_store_per_view_output &&
        intr->intrinsic != nir_intrinsic_store_per_primitive_output &&
        intr->intrinsic != nir_intrinsic_load_per_vertex_output &&
+       intr->intrinsic != nir_intrinsic_load_per_view_output &&
        intr->intrinsic != nir_intrinsic_load_per_primitive_output)
       return false;
 
    bool is_store =
       intr->intrinsic == nir_intrinsic_store_output ||
       intr->intrinsic == nir_intrinsic_store_per_vertex_output ||
+      intr->intrinsic == nir_intrinsic_store_per_view_output ||
       intr->intrinsic == nir_intrinsic_store_per_primitive_output;
 
    if (is_store) {
diff --git a/src/compiler/nir/nir_opt_vectorize_io.c b/src/compiler/nir/nir_opt_vectorize_io.c
index 888ca409155..04cd236f840 100644
--- a/src/compiler/nir/nir_opt_vectorize_io.c
+++ b/src/compiler/nir/nir_opt_vectorize_io.c
@@ -520,9 +520,11 @@ nir_opt_vectorize_io(nir_shader *shader, nir_variable_mode modes)
 
             case nir_intrinsic_load_output:
             case nir_intrinsic_load_per_vertex_output:
+            case nir_intrinsic_load_per_view_output:
             case nir_intrinsic_load_per_primitive_output:
             case nir_intrinsic_store_output:
             case nir_intrinsic_store_per_vertex_output:
+            case nir_intrinsic_store_per_view_output:
             case nir_intrinsic_store_per_primitive_output:
                if (!(modes & nir_var_shader_out))
                   continue;
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 8b4c2c25fef..6eb04eff4f0 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -1371,6 +1371,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
          case nir_intrinsic_store_output:
          case nir_intrinsic_store_per_primitive_output:
          case nir_intrinsic_store_per_vertex_output:
+         case nir_intrinsic_store_per_view_output:
             mode = nir_var_shader_out;
             break;
 
@@ -1423,7 +1424,8 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
              state->shader->info.stage == MESA_SHADER_GEOMETRY &&
              (instr->intrinsic == nir_intrinsic_store_output ||
               instr->intrinsic == nir_intrinsic_store_per_primitive_output ||
-              instr->intrinsic == nir_intrinsic_store_per_vertex_output)) {
+              instr->intrinsic == nir_intrinsic_store_per_vertex_output ||
+              instr->intrinsic == nir_intrinsic_store_per_view_output)) {
             unsigned gs_streams = io.gs_streams;
             fprintf(fp, " gs_streams(");
             for (unsigned i = 0; i < 4; i++) {
@@ -1651,6 +1653,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
    case nir_intrinsic_load_output:
    case nir_intrinsic_store_output:
    case nir_intrinsic_store_per_vertex_output:
+   case nir_intrinsic_store_per_view_output:
       var_mode = nir_var_shader_out;
       break;
    default:
diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c
index 815dd634df0..99befd17e47 100644
--- a/src/compiler/nir/nir_validate.c
+++ b/src/compiler/nir/nir_validate.c
@@ -618,6 +618,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
    case nir_intrinsic_load_interpolated_input:
    case nir_intrinsic_load_output:
    case nir_intrinsic_load_per_vertex_output:
+   case nir_intrinsic_load_per_view_output:
    case nir_intrinsic_load_per_primitive_output:
    case nir_intrinsic_load_push_constant:
       /* All memory load operations must load at least a byte */
@@ -652,6 +653,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
 
    case nir_intrinsic_store_output:
    case nir_intrinsic_store_per_vertex_output:
+   case nir_intrinsic_store_per_view_output:
       if (state->shader->info.stage == MESA_SHADER_FRAGMENT)
          validate_assert(state, nir_src_bit_size(instr->src[0]) >= 8);
       else
@@ -1527,10 +1529,6 @@ validate_var_decl(nir_variable *var, nir_variable_mode valid_modes,
 
       const struct glsl_type *type = glsl_get_array_element(var->type);
       if (nir_is_arrayed_io(var, state->shader->info.stage)) {
-         if (var->data.per_view) {
-            assert(glsl_type_is_array(type));
-            type = glsl_get_array_element(type);
-         }
          assert(glsl_type_is_array(type));
          assert(glsl_type_is_scalar(glsl_get_array_element(type)));
       } else {
diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c
index 7566b9a057b..72b8e18a79f 100644
--- a/src/freedreno/ir3/ir3_compiler.c
+++ b/src/freedreno/ir3/ir3_compiler.c
@@ -118,6 +118,8 @@ static const nir_shader_compiler_options ir3_base_options = {
 
    .divergence_analysis_options = nir_divergence_uniform_load_tears,
    .scalarize_ddx = true,
+
+   .per_view_unique_driver_locations = true,
 };
 
 struct ir3_compiler *
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 6498ae48af0..aede968fefe 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -2842,6 +2842,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
       b = NULL;
       break;
    case nir_intrinsic_store_output:
+   case nir_intrinsic_store_per_view_output:
       setup_output(ctx, intr);
       break;
    case nir_intrinsic_load_base_vertex:
@@ -5028,25 +5029,21 @@ setup_output(struct ir3_context *ctx, nir_intrinsic_instr *intr)
    struct ir3_shader_variant *so = ctx->so;
    nir_io_semantics io = nir_intrinsic_io_semantics(intr);
 
-   compile_assert(ctx, nir_src_is_const(intr->src[1]));
+   nir_src offset_src = *nir_get_io_offset_src(intr);
+   compile_assert(ctx, nir_src_is_const(offset_src));
 
-   unsigned offset = nir_src_as_uint(intr->src[1]);
-   unsigned n = nir_intrinsic_base(intr) + offset;
+   unsigned offset = nir_src_as_uint(offset_src);
    unsigned frac = nir_intrinsic_component(intr);
    unsigned ncomp = nir_intrinsic_src_components(intr, 0);
+   unsigned slot = io.location + offset;
 
    /* For per-view variables, each user-facing slot corresponds to multiple
-    * views, each with a corresponding driver_location, and the offset is for
-    * the driver_location. To properly figure out of the slot, we'd need to
-    * plumb through the number of views. However, for now we only use
-    * per-view with gl_Position, so we assume that the variable is not an
-    * array or matrix (so there are no indirect accesses to the variable
-    * itself) and the indirect offset corresponds to the view.
-    */
-   unsigned slot = io.location + (io.per_view ? 0 : offset);
-
-   if (io.per_view && offset > 0)
-      so->multi_pos_output = true;
+    * views, each with a corresponding driver_location, and the view index
+    * offsets the driver_location. */
+   unsigned view_index = intr->intrinsic == nir_intrinsic_store_per_view_output
+      ? nir_src_as_uint(intr->src[1])
+      : 0;
+   unsigned n = nir_intrinsic_base(intr) + offset + view_index;
 
    if (ctx->so->type == MESA_SHADER_FRAGMENT) {
       switch (slot) {
@@ -5124,8 +5121,9 @@ setup_output(struct ir3_context *ctx, nir_intrinsic_instr *intr)
    compile_assert(ctx, so->outputs_count <= ARRAY_SIZE(so->outputs));
 
    so->outputs[n].slot = slot;
-   if (io.per_view)
-      so->outputs[n].view = offset;
+   if (view_index > 0)
+      so->multi_pos_output = true;
+   so->outputs[n].view = view_index;
 
    for (int i = 0; i < ncomp; i++) {
       unsigned idx = (n * 4) + i + frac;
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 82ca86d6a47..000455491fa 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -926,7 +926,8 @@ output_slot_used_for_binning(gl_varying_slot slot)
 static bool
 remove_nonbinning_output(nir_builder *b, nir_intrinsic_instr *intr, void *data)
 {
-   if (intr->intrinsic != nir_intrinsic_store_output)
+   if (intr->intrinsic != nir_intrinsic_store_output &&
+       intr->intrinsic != nir_intrinsic_store_per_view_output)
       return false;
 
    nir_io_semantics io = nir_intrinsic_io_semantics(intr);
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h
index 7aabc5de438..417d3039ddc 100644
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -145,6 +145,7 @@ is_intrinsic_store(nir_intrinsic_op op)
 {
    switch (op) {
    case nir_intrinsic_store_output:
+   case nir_intrinsic_store_per_view_output:
    case nir_intrinsic_store_scratch:
    case nir_intrinsic_store_ssbo:
    case nir_intrinsic_store_shared:
diff --git a/src/freedreno/ir3/ir3_nir_lower_64b.c b/src/freedreno/ir3/ir3_nir_lower_64b.c
index c244c609f9f..a9171c1d4e0 100644
--- a/src/freedreno/ir3/ir3_nir_lower_64b.c
+++ b/src/freedreno/ir3/ir3_nir_lower_64b.c
@@ -62,6 +62,7 @@ lower_64b_intrinsics(nir_builder *b, nir_instr *instr, void *unused)
       switch (intr->intrinsic) {
       case nir_intrinsic_store_ssbo:
       case nir_intrinsic_store_global_ir3:
+      case nir_intrinsic_store_per_view_output:
          offset_src_idx = 2;
          break;
       default:
@@ -123,6 +124,7 @@ lower_64b_intrinsics(nir_builder *b, nir_instr *instr, void *unused)
       case nir_intrinsic_load_ssbo:
       case nir_intrinsic_load_ubo:
       case nir_intrinsic_load_global_ir3:
+      case nir_intrinsic_load_per_view_output:
          offset_src_idx = 1;
          break;
       default:
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index dd090a24a85..591782c7806 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -81,6 +81,7 @@ const struct nir_shader_compiler_options brw_scalar_nir_options = {
    .scalarize_ddx = true,
    .support_indirect_inputs = (uint8_t)BITFIELD_MASK(PIPE_SHADER_TYPES),
    .support_indirect_outputs = (uint8_t)BITFIELD_MASK(PIPE_SHADER_TYPES),
+   .per_view_unique_driver_locations = true,
 };
 
 struct brw_compiler *
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index e9eec017c33..8fc5300c3f8 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -285,8 +285,10 @@ is_output(nir_intrinsic_instr *intrin)
 {
    return intrin->intrinsic == nir_intrinsic_load_output ||
           intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
+          intrin->intrinsic == nir_intrinsic_load_per_view_output ||
           intrin->intrinsic == nir_intrinsic_store_output ||
-          intrin->intrinsic == nir_intrinsic_store_per_vertex_output;
+          intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
+          intrin->intrinsic == nir_intrinsic_store_per_view_output;
 }
 
 
@@ -342,6 +344,56 @@ remap_patch_urb_offsets(nir_block *block, nir_builder *b,
    return true;
 }
 
+/* Replace store_per_view_output to plain store_output, mapping the view index
+ * to IO offset. Because we only use per-view outputs for position, the offset
+ * pitch is always 1. */
+static bool
+lower_per_view_outputs(nir_builder *b,
+                       nir_intrinsic_instr *intrin,
+                       UNUSED void *cb_data)
+{
+   if (intrin->intrinsic != nir_intrinsic_store_per_view_output &&
+       intrin->intrinsic != nir_intrinsic_load_per_view_output)
+      return false;
+
+   b->cursor = nir_before_instr(&intrin->instr);
+
+   nir_src *view_index = nir_get_io_arrayed_index_src(intrin);
+   nir_src *offset = nir_get_io_offset_src(intrin);
+
+   nir_def *new_offset = nir_iadd(b, view_index->ssa, offset->ssa);
+
+   nir_intrinsic_instr *new;
+   if (intrin->intrinsic == nir_intrinsic_store_per_view_output)
+      new = nir_store_output(b, intrin->src[0].ssa, new_offset);
+   else {
+      nir_def *new_def = nir_load_output(b, intrin->def.num_components,
+                                         intrin->def.bit_size, new_offset);
+      new = nir_instr_as_intrinsic(new_def->parent_instr);
+   }
+
+   nir_intrinsic_set_base(new, nir_intrinsic_base(intrin));
+   nir_intrinsic_set_range(new, nir_intrinsic_range(intrin));
+   nir_intrinsic_set_write_mask(new, nir_intrinsic_write_mask(intrin));
+   nir_intrinsic_set_component(new, nir_intrinsic_component(intrin));
+   nir_intrinsic_set_src_type(new, nir_intrinsic_src_type(intrin));
+   nir_intrinsic_set_io_semantics(new, nir_intrinsic_io_semantics(intrin));
+
+   if (intrin->intrinsic == nir_intrinsic_load_per_view_output)
+      nir_def_rewrite_uses(&intrin->def, &new->def);
+   nir_instr_remove(&intrin->instr);
+
+   return true;
+}
+
+static bool
+brw_nir_lower_per_view_outputs(nir_shader *nir)
+{
+   return nir_shader_intrinsics_pass(nir, lower_per_view_outputs,
+                                     nir_metadata_control_flow,
+                                     NULL);
+}
+
 void
 brw_nir_lower_vs_inputs(nir_shader *nir)
 {
@@ -640,6 +692,7 @@ brw_nir_lower_vue_outputs(nir_shader *nir)
 
    nir_lower_io(nir, nir_var_shader_out, type_size_vec4,
                 nir_lower_io_lower_64bit_to_32);
+   brw_nir_lower_per_view_outputs(nir);
 }
 
 void
@@ -1220,7 +1273,7 @@ brw_mesh_compact_io(nir_shader *mesh, nir_shader *frag)
       assert(location < ARRAY_SIZE(mapping));
 
       const struct glsl_type *type = var->type;
-      if (nir_is_arrayed_io(var, MESA_SHADER_MESH) || var->data.per_view) {
+      if (nir_is_arrayed_io(var, MESA_SHADER_MESH)) {
          assert(glsl_type_is_array(type));
          type = glsl_get_array_element(type);
       }
@@ -2253,7 +2306,7 @@ brw_nir_get_var_type(const struct nir_shader *nir, nir_variable *var)
    const struct glsl_type *type = var->interface_type;
    if (!type) {
       type = var->type;
-      if (nir_is_arrayed_io(var, nir->info.stage) || var->data.per_view) {
+      if (nir_is_arrayed_io(var, nir->info.stage)) {
          assert(glsl_type_is_array(type));
          type = glsl_get_array_element(type);
       }
diff --git a/src/intel/compiler/elk/elk_nir.c b/src/intel/compiler/elk/elk_nir.c
index 61a5f3240e2..f8a8c151d30 100644
--- a/src/intel/compiler/elk/elk_nir.c
+++ b/src/intel/compiler/elk/elk_nir.c
@@ -1899,7 +1899,7 @@ elk_nir_get_var_type(const struct nir_shader *nir, nir_variable *var)
    const struct glsl_type *type = var->interface_type;
    if (!type) {
       type = var->type;
-      if (nir_is_arrayed_io(var, nir->info.stage) || var->data.per_view) {
+      if (nir_is_arrayed_io(var, nir->info.stage)) {
          assert(glsl_type_is_array(type));
          type = glsl_get_array_element(type);
       }
diff --git a/src/intel/vulkan/anv_mesh_perprim_wa.c b/src/intel/vulkan/anv_mesh_perprim_wa.c
index f46d6a1082b..c35b9f5c365 100644
--- a/src/intel/vulkan/anv_mesh_perprim_wa.c
+++ b/src/intel/vulkan/anv_mesh_perprim_wa.c
@@ -114,7 +114,7 @@ anv_mesh_convert_attrs_prim_to_vert(struct nir_shader *nir,
              location >= VARYING_SLOT_VAR0);
 
       const struct glsl_type *type = var->type;
-      if (nir_is_arrayed_io(var, MESA_SHADER_MESH) || var->data.per_view) {
+      if (nir_is_arrayed_io(var, MESA_SHADER_MESH)) {
          assert(glsl_type_is_array(type));
          type = glsl_get_array_element(type);
       }
diff --git a/src/panfrost/compiler/bi_lower_divergent_indirects.c b/src/panfrost/compiler/bi_lower_divergent_indirects.c
index b4f3a88c582..54fb34d07c5 100644
--- a/src/panfrost/compiler/bi_lower_divergent_indirects.c
+++ b/src/panfrost/compiler/bi_lower_divergent_indirects.c
@@ -60,6 +60,12 @@ bi_lower_divergent_indirects_impl(nir_builder *b, nir_intrinsic_instr *intr,
       offset = nir_get_io_offset_src(intr);
       break;
 
+   case nir_intrinsic_store_per_view_output:
+      assert(stage == MESA_SHADER_VERTEX);
+      assert(!nir_src_is_divergent(&intr->src[1]));
+      offset = nir_get_io_offset_src(intr);
+      break;
+
    case nir_intrinsic_image_texel_address:
    case nir_intrinsic_image_load:
    case nir_intrinsic_image_store: