ac/nir/ngg: Use new pre-rasterization output info helper.

For NGG VS/TES and GS. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28936>
2026-05-09 06:48:06 +02:00 · 2024-04-25 17:15:39 +02:00 · 2024-04-25 17:15:39 +02:00 · 4ac0727f87
commit 4ac0727f87
parent b1819d60ea
1 changed files with 69 additions and 230 deletions
--- a/src/amd/common/ac_nir_lower_ngg.c
+++ b/src/amd/common/ac_nir_lower_ngg.c
@ -47,13 +47,6 @@ typedef struct
   nir_def *chan[4];
 } vs_output;

-typedef struct
-{
-   nir_alu_type types[VARYING_SLOT_MAX][4];
-   nir_alu_type types_16bit_lo[16][4];
-   nir_alu_type types_16bit_hi[16][4];
-} shader_output_types;
-
 typedef struct
 {
   const ac_nir_lower_ngg_options *options;
@ -91,20 +84,9 @@ typedef struct
   bool has_clipdist;

   /* outputs */
-   nir_def *outputs[VARYING_SLOT_MAX][4];
-   nir_def *outputs_16bit_lo[16][4];
-   nir_def *outputs_16bit_hi[16][4];
-   shader_output_types output_types;
+   ac_nir_prerast_out out;
 } lower_ngg_nogs_state;

-typedef struct
-{
-   /* output stream index, 2 bit per component */
-   uint8_t stream;
-   /* Bitmask of components used: 4 bits per slot, 1 bit per component. */
-   uint8_t components_mask : 4;
-} gs_output_info;
-
 typedef struct
 {
   const ac_nir_lower_ngg_options *options;
@ -120,16 +102,8 @@ typedef struct
   unsigned lds_offs_primflags;
   bool output_compile_time_known;
   bool streamout_enabled;
-   /* 32 bit outputs */
-   nir_def *outputs[VARYING_SLOT_MAX][4];
-   gs_output_info output_info[VARYING_SLOT_MAX];
-   /* 16 bit outputs */
-   nir_def *outputs_16bit_hi[16][4];
-   nir_def *outputs_16bit_lo[16][4];
-   gs_output_info output_info_16bit_hi[16];
-   gs_output_info output_info_16bit_lo[16];
-   /* output types for both 32bit and 16bit */
-   shader_output_types output_types;
+   /* Outputs */
+   ac_nir_prerast_out out;
   /* Count per stream. */
   nir_def *vertex_count[4];
   nir_def *primitive_count[4];
@ -661,7 +635,7 @@ emit_store_ngg_nogs_es_primitive_id(nir_builder *b, lower_ngg_nogs_state *s)
      prim_id = nir_load_primitive_id(b);
   }

-   s->outputs[VARYING_SLOT_PRIMITIVE_ID][0] = prim_id;
+   s->out.outputs[VARYING_SLOT_PRIMITIVE_ID][0] = prim_id;

   /* Update outputs_written to reflect that the pass added a new output. */
   b->shader->info.outputs_written |= VARYING_BIT_PRIMITIVE_ID;
@ -1710,11 +1684,11 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
 static void
 ngg_nogs_store_edgeflag_to_lds(nir_builder *b, lower_ngg_nogs_state *s)
 {
-   if (!s->outputs[VARYING_SLOT_EDGE][0])
+   if (!s->out.outputs[VARYING_SLOT_EDGE][0])
      return;

   /* clamp user edge flag to 1 for latter bit operations */
-   nir_def *edgeflag = s->outputs[VARYING_SLOT_EDGE][0];
+   nir_def *edgeflag = s->out.outputs[VARYING_SLOT_EDGE][0];
   edgeflag = nir_umin(b, edgeflag, nir_imm_int(b, 1));

   /* user edge flag is stored at the beginning of a vertex if streamout is not enabled */
@ -1774,7 +1748,7 @@ ngg_nogs_store_xfb_outputs_to_lds(nir_builder *b, lower_ngg_nogs_state *s)

      /* Clear unused components. */
      for (unsigned i = 0; i < 4; i++) {
-         if (!s->outputs[slot][i])
+         if (!s->out.outputs[slot][i])
            mask &= ~BITFIELD_BIT(i);
      }

@ -1787,7 +1761,7 @@ ngg_nogs_store_xfb_outputs_to_lds(nir_builder *b, lower_ngg_nogs_state *s)
          *   Vulkan does not allow streamout outputs less than 32bit.
          *   OpenGL puts 16bit outputs in VARYING_SLOT_VAR0_16BIT.
          */
-         nir_def *store_val = nir_vec(b, &s->outputs[slot][start], (unsigned)count);
+         nir_def *store_val = nir_vec(b, &s->out.outputs[slot][start], (unsigned)count);
         nir_store_shared(b, store_val, addr, .base = packed_location * 16 + start * 4);
      }
   }
@ -1802,14 +1776,14 @@ ngg_nogs_store_xfb_outputs_to_lds(nir_builder *b, lower_ngg_nogs_state *s)

      /* Clear unused components. */
      for (unsigned i = 0; i < 4; i++) {
-         if (!s->outputs_16bit_lo[slot][i])
+         if (!s->out.outputs_16bit_lo[slot][i])
            mask_lo &= ~BITFIELD_BIT(i);
-         if (!s->outputs_16bit_hi[slot][i])
+         if (!s->out.outputs_16bit_hi[slot][i])
            mask_hi &= ~BITFIELD_BIT(i);
      }

-      nir_def **outputs_lo = s->outputs_16bit_lo[slot];
-      nir_def **outputs_hi = s->outputs_16bit_hi[slot];
+      nir_def **outputs_lo = s->out.outputs_16bit_lo[slot];
+      nir_def **outputs_hi = s->out.outputs_16bit_hi[slot];
      nir_def *undef = nir_undef(b, 1, 16);

      unsigned mask = mask_lo | mask_hi;
@ -1994,7 +1968,7 @@ ngg_build_streamout_vertex(nir_builder *b, nir_xfb_info *info,
                           unsigned stream, nir_def *so_buffer[4],
                           nir_def *buffer_offsets[4],
                           nir_def *vtx_buffer_idx, nir_def *vtx_lds_addr,
-                           shader_output_types *output_types,
+                           ac_nir_prerast_out *pr_out,
                           bool skip_primitive_id)
 {
   nir_def *vtx_buffer_offsets[4];
@ -2053,10 +2027,10 @@ ngg_build_streamout_vertex(nir_builder *b, nir_xfb_info *info,

            if (out->high_16bits) {
               v = nir_unpack_32_2x16_split_y(b, v);
-               t = output_types->types_16bit_hi[index][c];
+               t = pr_out->types_16bit_hi[index][c];
            } else {
               v = nir_unpack_32_2x16_split_x(b, v);
-               t = output_types->types_16bit_lo[index][c];
+               t = pr_out->types_16bit_lo[index][c];
            }

            t = nir_alu_type_get_base_type(t);
@ -2112,7 +2086,7 @@ ngg_nogs_build_streamout(nir_builder *b, lower_ngg_nogs_state *s)
            nir_def *vtx_lds_addr = pervertex_lds_addr(b, vtx_lds_idx, vtx_lds_stride);
            ngg_build_streamout_vertex(b, info, 0, so_buffer, buffer_offsets,
                                       nir_iadd_imm(b, vtx_buffer_idx, i),
-                                       vtx_lds_addr, &s->output_types, s->skip_primitive_id);
+                                       vtx_lds_addr, &s->out, s->skip_primitive_id);
         }
         nir_pop_if(b, if_valid_vertex);
      }
@ -2188,56 +2162,7 @@ ngg_nogs_gather_outputs(nir_builder *b, struct exec_list *cf_list, lower_ngg_nog
         if (intrin->intrinsic != nir_intrinsic_store_output)
            continue;

-         assert(nir_src_is_const(intrin->src[1]) && !nir_src_as_uint(intrin->src[1]));
-
-         nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
-         unsigned slot = sem.location;
-
-         nir_def **output;
-         nir_alu_type *type;
-         if (slot >= VARYING_SLOT_VAR0_16BIT) {
-            unsigned index = slot - VARYING_SLOT_VAR0_16BIT;
-            if (sem.high_16bits) {
-               output = s->outputs_16bit_hi[index];
-               type = s->output_types.types_16bit_hi[index];
-            } else {
-               output = s->outputs_16bit_lo[index];
-               type = s->output_types.types_16bit_lo[index];
-            }
-         } else {
-            output = s->outputs[slot];
-            type = s->output_types.types[slot];
-         }
-
-         unsigned component = nir_intrinsic_component(intrin);
-         unsigned write_mask = nir_intrinsic_write_mask(intrin);
-         nir_alu_type src_type = nir_intrinsic_src_type(intrin);
-         b->cursor = nir_after_instr(instr);
-
-         nir_def *store_val = intrin->src[0].ssa;
-
-         /* 16-bit output stored in a normal varying slot that isn't a dedicated 16-bit slot. */
-         const bool non_dedicated_16bit = slot < VARYING_SLOT_VAR0_16BIT && store_val->bit_size == 16;
-
-         u_foreach_bit (i, write_mask) {
-            unsigned c = component + i;
-            nir_def *store_component = nir_channel(b, intrin->src[0].ssa, i);
-            if (non_dedicated_16bit) {
-               if (sem.high_16bits) {
-                  nir_def *lo = output[c] ? nir_unpack_32_2x16_split_x(b, output[c]) : nir_imm_intN_t(b, 0, 16);
-                  output[c] = nir_pack_32_2x16_split(b, lo, store_component);
-               } else {
-                  nir_def *hi = output[c] ? nir_unpack_32_2x16_split_y(b, output[c]) : nir_imm_intN_t(b, 0, 16);
-                  output[c] = nir_pack_32_2x16_split(b, store_component, hi);
-               }
-               type[c] = nir_type_uint32;
-            } else {
-               output[c] = store_component;
-               type[c] = src_type;
-            }
-         }
-
-         /* remove all store output instructions */
+         ac_nir_gather_prerast_store_output_info(b, intrin, &s->out);
         nir_instr_remove(instr);
      }
   }
@ -2418,9 +2343,9 @@ nogs_export_vertex_params(nir_builder *b, nir_function_impl *impl,
      const unsigned num_outputs =
         gather_vs_outputs(b, outputs,
                           s->options->vs_output_param_offset,
-                           s->outputs,
-                           s->outputs_16bit_lo,
-                           s->outputs_16bit_hi);
+                           s->out.outputs,
+                           s->out.outputs_16bit_lo,
+                           s->out.outputs_16bit_hi);

      if (!num_outputs)
         return;
@ -2438,8 +2363,8 @@ nogs_export_vertex_params(nir_builder *b, nir_function_impl *impl,
      ac_nir_export_parameters(b, s->options->vs_output_param_offset,
                                 b->shader->info.outputs_written,
                                 b->shader->info.outputs_written_16bit,
-                                 s->outputs, s->outputs_16bit_lo,
-                                 s->outputs_16bit_hi);
+                                 s->out.outputs, s->out.outputs_16bit_lo,
+                                 s->out.outputs_16bit_hi);
   }
 }

@ -2608,7 +2533,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option

      nir_def *pos_val = nir_load_var(b, state.position_value_var);
      for (int i = 0; i < 4; i++)
-         state.outputs[VARYING_SLOT_POS][i] = nir_channel(b, pos_val, i);
+         state.out.outputs[VARYING_SLOT_POS][i] = nir_channel(b, pos_val, i);
   }

   /* Gather outputs data and types */
@ -2650,12 +2575,12 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
                          options->clip_cull_dist_mask,
                          !options->has_param_exports,
                          options->force_vrs, !wait_attr_ring,
-                          export_outputs, state.outputs, NULL);
+                          export_outputs, state.out.outputs, NULL);

   nogs_export_vertex_params(b, impl, if_es_thread, num_es_threads, &state);

   if (wait_attr_ring)
-      export_pos0_wait_attr_ring(b, if_es_thread, state.outputs, options);
+      export_pos0_wait_attr_ring(b, if_es_thread, state.out.outputs, options);

   nir_metadata_preserve(impl, nir_metadata_none);
   nir_validate_shader(shader, "after emitting NGG VS/TES");
@ -2773,101 +2698,13 @@ ngg_gs_clear_primflags(nir_builder *b, nir_def *num_vertices, unsigned stream, l
 static bool
 lower_ngg_gs_store_output(nir_builder *b, nir_intrinsic_instr *intrin, lower_ngg_gs_state *s)
 {
-   assert(nir_src_is_const(intrin->src[1]) && !nir_src_as_uint(intrin->src[1]));
-   b->cursor = nir_before_instr(&intrin->instr);
-
-   unsigned writemask = nir_intrinsic_write_mask(intrin);
-   unsigned component_offset = nir_intrinsic_component(intrin);
-   nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin);
-
-   unsigned location = io_sem.location;
-
-   nir_def *store_val = intrin->src[0].ssa;
-   nir_alu_type src_type = nir_intrinsic_src_type(intrin);
-
-   /* Small bitsize components consume the same amount of space as 32-bit components,
-    * but 64-bit ones consume twice as many. (Vulkan spec 15.1.5)
-    *
-    * 64-bit IO has been lowered to multi 32-bit IO.
-    */
-   assert(store_val->bit_size <= 32);
-   assert(nir_alu_type_get_type_size(src_type) == store_val->bit_size);
-
-   /* Get corresponding output variable and usage info. */
-   nir_def **output;
-   nir_alu_type *type;
-   gs_output_info *info;
-   if (location >= VARYING_SLOT_VAR0_16BIT) {
-      unsigned index = location - VARYING_SLOT_VAR0_16BIT;
-      assert(index < 16);
-
-      if (io_sem.high_16bits) {
-         output = s->outputs_16bit_hi[index];
-         type = s->output_types.types_16bit_hi[index];
-         info = s->output_info_16bit_hi + index;
-      } else {
-         output = s->outputs_16bit_lo[index];
-         type = s->output_types.types_16bit_lo[index];
-         info = s->output_info_16bit_lo + index;
-      }
-   } else {
-      assert(location < VARYING_SLOT_MAX);
-      output = s->outputs[location];
-      type = s->output_types.types[location];
-      info = s->output_info + location;
-   }
-
-   for (unsigned comp = 0; comp < store_val->num_components; ++comp) {
-      if (!(writemask & (1 << comp)))
-         continue;
-      unsigned stream = (io_sem.gs_streams >> (comp * 2)) & 0x3;
-      if (!(b->shader->info.gs.active_stream_mask & (1 << stream)))
-         continue;
-
-      unsigned component = component_offset + comp;
-
-      /* The same output component should always belong to the same stream. */
-      assert(!(info->components_mask & (1 << component)) ||
-             ((info->stream >> (component * 2)) & 3) == stream);
-
-      /* Components of the same output slot may belong to different streams. */
-      info->stream |= stream << (component * 2);
-      info->components_mask |= BITFIELD_BIT(component);
-
-      /* Assume we have called nir_lower_io_to_temporaries which store output in the
-       * same block as EmitVertex, so we don't need to use nir_variable for outputs.
-       */
-      nir_def *store_component = nir_channel(b, store_val, comp);
-
-      /* 16-bit output stored in a normal varying slot that isn't a dedicated 16-bit slot. */
-      const bool non_dedicated_16bit = location < VARYING_SLOT_VAR0_16BIT && store_val->bit_size == 16;
-
-      if (non_dedicated_16bit) {
-         if (io_sem.high_16bits) {
-            nir_def *lo = output[component] ? nir_unpack_32_2x16_split_x(b, output[component]) : nir_imm_intN_t(b, 0, 16);
-            output[component] = nir_pack_32_2x16_split(b, lo, store_component);
-         } else {
-            nir_def *hi = output[component] ? nir_unpack_32_2x16_split_y(b, output[component]) : nir_imm_intN_t(b, 0, 16);
-            output[component] = nir_pack_32_2x16_split(b, store_component, hi);
-         }
-
-         /* Don't care about what type was set first, we mark this as a 32-bit unsigned. */
-         type[component] = nir_type_uint32;
-      } else {
-         output[component] = store_component;
-
-         /* If type is set multiple times, the value must be same. */
-         assert(type[component] == nir_type_invalid || type[component] == src_type);
-         type[component] = src_type;
-      }
-   }
-
+   ac_nir_gather_prerast_store_output_info(b, intrin, &s->out);
   nir_instr_remove(&intrin->instr);
   return true;
 }

 static unsigned
-gs_output_component_mask_with_stream(gs_output_info *info, unsigned stream)
+gs_output_component_mask_with_stream(ac_nir_prerast_per_output_info *info, unsigned stream)
 {
   unsigned mask = info->components_mask;
   if (!mask)
@ -2897,25 +2734,28 @@ lower_ngg_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *intri
   nir_def *current_vtx_per_prim = intrin->src[1].ssa;
   nir_def *gs_emit_vtx_addr = ngg_gs_emit_vertex_addr(b, gs_emit_vtx_idx, s);

+   /* Store generic 32-bit outputs to LDS.
+    * In case of packed 16-bit, we assume that has been already packed into 32 bit slots by now.
+    */
   u_foreach_bit64(slot, b->shader->info.outputs_written) {
-      unsigned packed_location = util_bitcount64((b->shader->info.outputs_written & BITFIELD64_MASK(slot)));
-      gs_output_info *info = &s->output_info[slot];
-      nir_def **output = s->outputs[slot];
+      const unsigned packed_location = util_bitcount64((b->shader->info.outputs_written & BITFIELD64_MASK(slot)));
+      unsigned mask = gs_output_component_mask_with_stream(&s->out.infos[slot], stream);
+
+      nir_def **output = s->out.outputs[slot];
+      nir_def *undef = nir_undef(b, 1, 32);

-      unsigned mask = gs_output_component_mask_with_stream(info, stream);
      while (mask) {
         int start, count;
         u_bit_scan_consecutive_range(&mask, &start, &count);
         nir_def *values[4] = {0};
         for (int c = start; c < start + count; ++c) {
            if (!output[c]) {
-               /* no one write to this output before */
-               values[c - start] = nir_undef(b, 1, 32);
-               continue;
+               /* The shader hasn't written this output. */
+               values[c - start] = undef;
+            } else {
+               assert(output[c]->bit_size == 32);
+               values[c - start] = output[c];
            }
-
-            /* extend 8/16 bit to 32 bit, 64 bit has been lowered */
-            values[c - start] = nir_u2uN(b, output[c], 32);
         }

         nir_def *store_val = nir_vec(b, values, (unsigned)count);
@ -2925,21 +2765,22 @@ lower_ngg_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *intri
      }

      /* Clear all outputs (they are undefined after emit_vertex) */
-      memset(s->outputs[slot], 0, sizeof(s->outputs[slot]));
+      memset(s->out.outputs[slot], 0, sizeof(s->out.outputs[slot]));
   }

-   /* Store 16bit outputs to LDS. */
-   unsigned num_32bit_outputs = util_bitcount64(b->shader->info.outputs_written);
+   const unsigned num_32bit_outputs = util_bitcount64(b->shader->info.outputs_written);
+
+   /* Store dedicated 16-bit outputs to LDS. */
   u_foreach_bit(slot, b->shader->info.outputs_written_16bit) {
-      unsigned packed_location = num_32bit_outputs +
+      const unsigned packed_location = num_32bit_outputs +
         util_bitcount(b->shader->info.outputs_written_16bit & BITFIELD_MASK(slot));

-      unsigned mask_lo = gs_output_component_mask_with_stream(s->output_info_16bit_lo + slot, stream);
-      unsigned mask_hi = gs_output_component_mask_with_stream(s->output_info_16bit_hi + slot, stream);
+      const unsigned mask_lo = gs_output_component_mask_with_stream(s->out.infos_16bit_lo + slot, stream);
+      const unsigned mask_hi = gs_output_component_mask_with_stream(s->out.infos_16bit_hi + slot, stream);
      unsigned mask = mask_lo | mask_hi;

-      nir_def **output_lo = s->outputs_16bit_lo[slot];
-      nir_def **output_hi = s->outputs_16bit_hi[slot];
+      nir_def **output_lo = s->out.outputs_16bit_lo[slot];
+      nir_def **output_hi = s->out.outputs_16bit_hi[slot];
      nir_def *undef = nir_undef(b, 1, 16);

      while (mask) {
@ -2960,8 +2801,8 @@ lower_ngg_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *intri
      }

      /* Clear all outputs (they are undefined after emit_vertex) */
-      memset(s->outputs_16bit_lo[slot], 0, sizeof(s->outputs_16bit_lo[slot]));
-      memset(s->outputs_16bit_hi[slot], 0, sizeof(s->outputs_16bit_hi[slot]));
+      memset(s->out.outputs_16bit_lo[slot], 0, sizeof(s->out.outputs_16bit_lo[slot]));
+      memset(s->out.outputs_16bit_hi[slot], 0, sizeof(s->out.outputs_16bit_hi[slot]));
   }

   /* Calculate and store per-vertex primitive flags based on vertex counts:
@ -3113,11 +2954,10 @@ ngg_gs_export_vertices(nir_builder *b, nir_def *max_num_out_vtx, nir_def *tid_in
   }

   u_foreach_bit64(slot, b->shader->info.outputs_written) {
-      unsigned packed_location =
+      const unsigned packed_location =
         util_bitcount64((b->shader->info.outputs_written & BITFIELD64_MASK(slot)));

-      gs_output_info *info = &s->output_info[slot];
-      unsigned mask = gs_output_component_mask_with_stream(info, 0);
+      unsigned mask = gs_output_component_mask_with_stream(&s->out.infos[slot], 0);

      while (mask) {
         int start, count;
@ -3128,20 +2968,19 @@ ngg_gs_export_vertices(nir_builder *b, nir_def *max_num_out_vtx, nir_def *tid_in
                            .align_mul = 4);

         for (int i = 0; i < count; i++)
-            s->outputs[slot][start + i] = nir_channel(b, load, i);
+            s->out.outputs[slot][start + i] = nir_channel(b, load, i);
      }
   }

-   /* 16bit outputs */
-   unsigned num_32bit_outputs = util_bitcount64(b->shader->info.outputs_written);
+   const unsigned num_32bit_outputs = util_bitcount64(b->shader->info.outputs_written);
+
+   /* Dedicated 16-bit outputs. */
   u_foreach_bit(i, b->shader->info.outputs_written_16bit) {
-      unsigned packed_location = num_32bit_outputs +
+      const unsigned packed_location = num_32bit_outputs +
         util_bitcount(b->shader->info.outputs_written_16bit & BITFIELD_MASK(i));

-      gs_output_info *info_lo = s->output_info_16bit_lo + i;
-      gs_output_info *info_hi = s->output_info_16bit_hi + i;
-      unsigned mask_lo = gs_output_component_mask_with_stream(info_lo, 0);
-      unsigned mask_hi = gs_output_component_mask_with_stream(info_hi, 0);
+      const unsigned mask_lo = gs_output_component_mask_with_stream(&s->out.infos_16bit_lo[i], 0);
+      const unsigned mask_hi = gs_output_component_mask_with_stream(&s->out.infos_16bit_hi[i], 0);
      unsigned mask = mask_lo | mask_hi;

      while (mask) {
@ -3157,10 +2996,10 @@ ngg_gs_export_vertices(nir_builder *b, nir_def *max_num_out_vtx, nir_def *tid_in
            unsigned comp = start + j;

            if (mask_lo & BITFIELD_BIT(comp))
-               s->outputs_16bit_lo[i][comp] = nir_unpack_32_2x16_split_x(b, val);
+               s->out.outputs_16bit_lo[i][comp] = nir_unpack_32_2x16_split_x(b, val);

            if (mask_hi & BITFIELD_BIT(comp))
-               s->outputs_16bit_hi[i][comp] = nir_unpack_32_2x16_split_y(b, val);
+               s->out.outputs_16bit_hi[i][comp] = nir_unpack_32_2x16_split_y(b, val);
         }
      }
   }
@ -3179,7 +3018,7 @@ ngg_gs_export_vertices(nir_builder *b, nir_def *max_num_out_vtx, nir_def *tid_in
                          s->options->clip_cull_dist_mask,
                          !s->options->has_param_exports,
                          s->options->force_vrs, !wait_attr_ring,
-                          export_outputs, s->outputs, NULL);
+                          export_outputs, s->out.outputs, NULL);

   nir_pop_if(b, if_vtx_export_thread);

@ -3190,8 +3029,8 @@ ngg_gs_export_vertices(nir_builder *b, nir_def *max_num_out_vtx, nir_def *tid_in
         vs_output outputs[64];
         unsigned num_outputs = gather_vs_outputs(b, outputs,
                                                  s->options->vs_output_param_offset,
-                                                  s->outputs, s->outputs_16bit_lo,
-                                                  s->outputs_16bit_hi);
+                                                  s->out.outputs, s->out.outputs_16bit_lo,
+                                                  s->out.outputs_16bit_hi);

         if (num_outputs) {
            b->cursor = nir_after_impl(s->impl);
@ -3204,13 +3043,13 @@ ngg_gs_export_vertices(nir_builder *b, nir_def *max_num_out_vtx, nir_def *tid_in
         ac_nir_export_parameters(b, s->options->vs_output_param_offset,
                                  b->shader->info.outputs_written,
                                  b->shader->info.outputs_written_16bit,
-                                  s->outputs, s->outputs_16bit_lo,
-                                  s->outputs_16bit_hi);
+                                  s->out.outputs, s->out.outputs_16bit_lo,
+                                  s->out.outputs_16bit_hi);
      }
   }

   if (wait_attr_ring)
-      export_pos0_wait_attr_ring(b, if_vtx_export_thread, s->outputs, s->options);
+      export_pos0_wait_attr_ring(b, if_vtx_export_thread, s->out.outputs, s->options);
 }

 static void
@ -3459,7 +3298,7 @@ ngg_gs_build_streamout(nir_builder *b, lower_ngg_gs_state *s)
                                       buffer_offsets,
                                       nir_iadd_imm(b, vtx_buffer_idx, i),
                                       exported_vtx_lds_addr[i],
-                                       &s->output_types, false);
+                                       &s->out, false);
         }
      }
      nir_pop_if(b, if_emit);