zink: remove rework_io and revectorization

farewell, you beautiful beasts. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39542>
2026-02-07 03:40:27 +01:00 · 2026-01-26 10:10:48 +01:00 · 2026-01-26 10:10:48 +01:00 · 2b925a83c2
commit 2b925a83c2
parent bf80d510c0
1 changed files with 0 additions and 775 deletions
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@ -5179,513 +5179,6 @@ zink_flat_flags(struct nir_shader *shader)
   return flat_flags;
 }

-struct rework_io_state {
-   /* these are search criteria */
-   bool indirect_only;
-   unsigned location;
-   nir_variable_mode mode;
-   mesa_shader_stage stage;
-   nir_shader *nir;
-   const char *name;
-
-   /* these are found by scanning */
-   bool arrayed_io;
-   bool medium_precision;
-   bool fb_fetch_output;
-   bool dual_source_blend_index;
-   uint32_t component_mask;
-   uint32_t ignored_component_mask;
-   unsigned array_size;
-   unsigned bit_size;
-   unsigned base;
-   nir_alu_type type;
-   /* must be last */
-   char *newname;
-};
-
-/* match an existing variable against the rework state */
-static nir_variable *
-find_rework_var(nir_shader *nir, struct rework_io_state *ris)
-{
-   nir_foreach_variable_with_modes(var, nir, ris->mode) {
-      const struct glsl_type *type = var->type;
-      if (nir_is_arrayed_io(var, nir->info.stage))
-         type = glsl_get_array_element(type);
-      if (var->data.fb_fetch_output != ris->fb_fetch_output)
-         continue;
-      if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_out && ris->dual_source_blend_index != var->data.index)
-         continue;
-      unsigned num_slots = var->data.compact ? DIV_ROUND_UP(glsl_array_size(type), 4) : glsl_count_attribute_slots(type, false);
-      if (var->data.location > ris->location + ris->array_size || var->data.location + num_slots <= ris->location)
-         continue;
-      unsigned num_components = glsl_get_vector_elements(glsl_without_array(type));
-      assert(!glsl_type_contains_64bit(type));
-      uint32_t component_mask = ris->component_mask ? ris->component_mask : BITFIELD_MASK(4);
-      if (BITFIELD_RANGE(var->data.location_frac, num_components) & component_mask)
-         return var;
-   }
-   return NULL;
-}
-
-static void
-update_io_var_name(struct rework_io_state *ris, const char *name)
-{
-   if (!(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV)))
-      return;
-   if (!name)
-      return;
-   if (ris->name && !strcmp(ris->name, name))
-      return;
-   if (ris->newname && !strcmp(ris->newname, name))
-      return;
-   if (ris->newname) {
-      ris->newname = ralloc_asprintf(ris->nir, "%s_%s", ris->newname, name);
-   } else if (ris->name) {
-      ris->newname = ralloc_asprintf(ris->nir, "%s_%s", ris->name, name);
-   } else {
-      ris->newname = ralloc_strdup(ris->nir, name);
-   }
-}
-
-/* check/update tracking state for variable info */
-static void
-update_io_var_state(nir_intrinsic_instr *intr, struct rework_io_state *ris)
-{
-   bool is_load = false;
-   bool is_input = false;
-   bool is_interp = false;
-   filter_io_instr(intr, &is_load, &is_input, &is_interp);
-   nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
-   unsigned frac = nir_intrinsic_component(intr);
-   /* the mask of components for the instruction */
-   uint32_t cmask = is_load ? BITFIELD_RANGE(frac, intr->num_components) : (nir_intrinsic_write_mask(intr) << frac);
-
-   /* always check for existing variables first */
-   struct rework_io_state test = {
-      .location = ris->location,
-      .mode = ris->mode,
-      .stage = ris->stage,
-      .arrayed_io = io_instr_is_arrayed(intr),
-      .medium_precision = sem.medium_precision,
-      .fb_fetch_output = sem.fb_fetch_output,
-      .dual_source_blend_index = sem.dual_source_blend_index,
-      .component_mask = cmask,
-      .array_size = sem.num_slots > 1 ? sem.num_slots : 0,
-   };
-   if (find_rework_var(ris->nir, &test))
-      return;
-
-   /* filter ignored components to scan later:
-    * - ignore no-overlapping-components case
-    * - always match fbfetch and dual src blend
-    */
-   if (ris->component_mask &&
-       (!(ris->component_mask & cmask) || ris->fb_fetch_output != sem.fb_fetch_output || ris->dual_source_blend_index != sem.dual_source_blend_index)) {
-      ris->ignored_component_mask |= cmask;
-      return;
-   }
-
-   assert(!ris->indirect_only || sem.num_slots > 1);
-   if (sem.num_slots > 1)
-      ris->array_size = MAX2(ris->array_size, sem.num_slots);
-
-   assert(!ris->component_mask || ris->arrayed_io == io_instr_is_arrayed(intr));
-   ris->arrayed_io = io_instr_is_arrayed(intr);
-
-   ris->component_mask |= cmask;
-
-   unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]);
-   assert(!ris->bit_size || ris->bit_size == bit_size);
-   ris->bit_size = bit_size;
-
-   nir_alu_type type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr);
-   if (ris->type) {
-      /* in the case of clashing types, this heuristic guarantees some semblance of a match */
-      if (ris->type & nir_type_float || type & nir_type_float) {
-         ris->type = nir_type_float | bit_size;
-      } else if (ris->type & nir_type_int || type & nir_type_int) {
-         ris->type = nir_type_int | bit_size;
-      } else if (ris->type & nir_type_uint || type & nir_type_uint) {
-         ris->type = nir_type_uint | bit_size;
-      } else {
-         assert(bit_size == 1);
-         ris->type = nir_type_bool;
-      }
-   } else {
-      ris->type = type;
-   }
-
-   update_io_var_name(ris, intr->name);
-
-   ris->medium_precision |= sem.medium_precision;
-   ris->fb_fetch_output |= sem.fb_fetch_output;
-   ris->dual_source_blend_index |= sem.dual_source_blend_index;
-   if (ris->stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in)
-      ris->base = nir_intrinsic_base(intr);
-}
-
-/* instruction-level scanning for variable data */
-static bool
-scan_io_var_usage(nir_builder *b, nir_intrinsic_instr *intr, void *data)
-{
-   struct rework_io_state *ris = data;
-   bool is_load = false;
-   bool is_input = false;
-   bool is_interp = false;
-   /* mode-based filtering */
-   if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
-      return false;
-   if (ris->mode == nir_var_shader_in) {
-      if (!is_input)
-         return false;
-   } else {
-      if (is_input)
-         return false;
-   }
-   /* location-based filtering */
-   nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
-   if (sem.location != ris->location && (ris->location > sem.location || ris->location + ris->array_size <= sem.location))
-      return false;
-
-   /* only scan indirect i/o when indirect_only is set */
-   nir_src *src_offset = nir_get_io_offset_src(intr);
-   if (!nir_src_is_const(*src_offset)) {
-      if (!ris->indirect_only)
-         return false;
-      update_io_var_state(intr, ris);
-      return false;
-   }
-
-   /* don't scan direct i/o when indirect_only is set */
-   if (ris->indirect_only)
-      return false;
-
-   update_io_var_state(intr, ris);
-   return false;
-}
-
-/* scan a given i/o slot for state info */
-static struct rework_io_state
-scan_io_var_slot(nir_shader *nir, nir_variable_mode mode, unsigned location, bool scan_indirects)
-{
-   struct rework_io_state ris = {
-      .location = location,
-      .mode = mode,
-      .stage = nir->info.stage,
-      .nir = nir,
-   };
-
-   struct rework_io_state test;
-   do {
-      update_io_var_name(&test, ris.newname ? ris.newname : ris.name);
-      test = ris;
-      /* always run indirect scan first to detect potential overlaps */
-      if (scan_indirects) {
-         ris.indirect_only = true;
-         nir_shader_intrinsics_pass(nir, scan_io_var_usage, nir_metadata_all, &ris);
-      }
-      ris.indirect_only = false;
-      nir_shader_intrinsics_pass(nir, scan_io_var_usage, nir_metadata_all, &ris);
-      /* keep scanning until no changes found */
-   } while (memcmp(&ris, &test, offsetof(struct rework_io_state, newname)));
-   return ris;
-}
-
-/* create a variable using explicit/scan info */
-static void
-create_io_var(nir_shader *nir, struct rework_io_state *ris)
-{
-   char name[1024];
-   assert(ris->component_mask);
-   if (ris->newname || ris->name) {
-      snprintf(name, sizeof(name), "%s", ris->newname ? ris->newname : ris->name);
-   /* always use builtin name where possible */
-   } else if (nir->info.stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in) {
-      snprintf(name, sizeof(name), "%s", gl_vert_attrib_name(ris->location));
-   } else if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_out) {
-      snprintf(name, sizeof(name), "%s", gl_frag_result_name(ris->location));
-   } else if (nir_slot_is_sysval_output(ris->location, nir->info.stage)) {
-      snprintf(name, sizeof(name), "%s", gl_varying_slot_name_for_stage(ris->location, nir->info.stage));
-   } else {
-      int c = ffs(ris->component_mask) - 1;
-      if (c)
-         snprintf(name, sizeof(name), "slot_%u_c%u", ris->location, c);
-      else
-         snprintf(name, sizeof(name), "slot_%u", ris->location);
-   }
-   /* calculate vec/array type */
-   int frac = ffs(ris->component_mask) - 1;
-   int num_components = util_last_bit(ris->component_mask) - frac;
-   assert(ris->component_mask == BITFIELD_RANGE(frac, num_components));
-   const struct glsl_type *vec_type = glsl_vector_type(nir_get_glsl_base_type_for_nir_type(ris->type), num_components);
-   if (ris->array_size)
-      vec_type = glsl_array_type(vec_type, ris->array_size, glsl_get_explicit_stride(vec_type));
-   if (ris->arrayed_io) {
-      /* tess size may be unknown with generated tcs */
-      unsigned arrayed = nir->info.stage == MESA_SHADER_GEOMETRY ?
-                         nir->info.gs.vertices_in :
-                         nir->info.stage == MESA_SHADER_MESH ?
-                         nir->info.mesh.max_primitives_out :
-                         32 /* MAX_PATCH_VERTICES */;
-      vec_type = glsl_array_type(vec_type, arrayed, glsl_get_explicit_stride(vec_type));
-   }
-   nir_variable *var = nir_variable_create(nir, ris->mode, vec_type, name);
-   var->data.location_frac = frac;
-   var->data.location = ris->location;
-   /* gallium vertex inputs use intrinsic 'base' indexing */
-   if (nir->info.stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in)
-      var->data.driver_location = ris->base;
-   bool is_tess_level = (nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_TESS_EVAL) &&
-                        (ris->location == VARYING_SLOT_TESS_LEVEL_INNER || ris->location == VARYING_SLOT_TESS_LEVEL_OUTER);
-   var->data.patch = ris->location >= VARYING_SLOT_PATCH0 || is_tess_level;
-   /* set flat by default: add_derefs will fill this in later after more shader passes */
-   if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_in)
-      var->data.interpolation = INTERP_MODE_FLAT;
-   var->data.fb_fetch_output = ris->fb_fetch_output;
-   var->data.index = ris->dual_source_blend_index;
-   var->data.precision = ris->medium_precision;
-   if (nir->info.stage == MESA_SHADER_MESH && ris->mode == nir_var_shader_out)
-      var->data.per_primitive = (nir->info.per_primitive_outputs & BITFIELD64_BIT(ris->location)) > 0;
-   else if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_in)
-      var->data.per_primitive = (nir->info.per_primitive_inputs & BITFIELD64_BIT(ris->location)) > 0;
-   /* only clip/cull dist and tess levels are compact */
-   if (nir->info.stage != MESA_SHADER_VERTEX || ris->mode != nir_var_shader_in)
-      var->data.compact = is_clipcull_dist(ris->location) || is_tess_level;
-}
-
-/* loop the i/o mask and generate variables for specified locations */
-static void
-loop_io_var_mask(nir_shader *nir, nir_variable_mode mode, bool indirect, bool patch, uint64_t mask)
-{
-   ASSERTED bool is_vertex_input = nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in;
-   u_foreach_bit64(slot, mask) {
-      unsigned location = slot;
-      if (patch)
-         location += VARYING_SLOT_PATCH0;
-
-      /* this should've been handled explicitly */
-      assert(is_vertex_input || !is_clipcull_dist(location));
-
-      unsigned remaining = 0;
-      do {
-         /* scan the slot for usage */
-         struct rework_io_state ris = scan_io_var_slot(nir, mode, location, indirect);
-         /* one of these must be true or things have gone very wrong */
-         assert(indirect || ris.component_mask || find_rework_var(nir, &ris) || remaining);
-         /* release builds only */
-         if (!ris.component_mask)
-            break;
-
-         /* whatever reaches this point is either enough info to create a variable or an existing variable */
-         if (!find_rework_var(nir, &ris))
-            create_io_var(nir, &ris);
-         /* scanning may detect multiple potential variables per location at component offsets: process again */
-         remaining = ris.ignored_component_mask;
-      } while (remaining);
-   }
-}
-
-/* for a given mode, generate variables */
-static void
-rework_io_vars(nir_shader *nir, nir_variable_mode mode, struct zink_shader *zs)
-{
-   assert(mode == nir_var_shader_out || mode == nir_var_shader_in);
-   assert(util_bitcount(mode) == 1);
-   bool found = false;
-   /* if no i/o, skip */
-   if (mode == nir_var_shader_out)
-      found = nir->info.outputs_written || nir->info.outputs_read || nir->info.patch_outputs_written || nir->info.patch_outputs_read;
-   else
-      found = nir->info.inputs_read || nir->info.patch_inputs_read;
-   if (!found)
-      return;
-
-   /* use local copies to enable incremental processing */
-   uint64_t inputs_read = nir->info.inputs_read;
-   uint64_t inputs_read_indirectly = nir->info.inputs_read_indirectly;
-   uint64_t outputs_accessed = nir->info.outputs_written | nir->info.outputs_read;
-   uint64_t outputs_accessed_indirectly = nir->info.outputs_read_indirectly |
-                                          nir->info.outputs_written_indirectly;
-
-   /* fragment outputs are special: handle separately */
-   if (mode == nir_var_shader_out && nir->info.stage == MESA_SHADER_FRAGMENT) {
-      assert(!outputs_accessed_indirectly);
-      u_foreach_bit64(slot, outputs_accessed) {
-         struct rework_io_state ris = {
-            .location = slot,
-            .mode = mode,
-            .stage = nir->info.stage,
-         };
-         /* explicitly handle builtins */
-         switch (slot) {
-         case FRAG_RESULT_DEPTH:
-         case FRAG_RESULT_STENCIL:
-         case FRAG_RESULT_SAMPLE_MASK:
-            ris.bit_size = 32;
-            ris.component_mask = 0x1;
-            ris.type = slot == FRAG_RESULT_DEPTH ? nir_type_float32 : nir_type_uint32;
-            create_io_var(nir, &ris);
-            outputs_accessed &= ~BITFIELD64_BIT(slot);
-            break;
-         default:
-            break;
-         }
-      }
-      /* the rest of the outputs can be generated normally */
-      loop_io_var_mask(nir, mode, false, false, outputs_accessed);
-      return;
-   }
-
-   /* vertex inputs are special: handle separately */
-   if (nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in) {
-      assert(!inputs_read_indirectly);
-      u_foreach_bit64(slot, inputs_read) {
-         /* explicitly handle builtins */
-         if (slot != VERT_ATTRIB_POS && slot != VERT_ATTRIB_POINT_SIZE)
-            continue;
-
-         uint32_t component_mask = slot == VERT_ATTRIB_POINT_SIZE ? 0x1 : 0xf;
-         struct rework_io_state ris = {
-            .location = slot,
-            .mode = mode,
-            .stage = nir->info.stage,
-            .bit_size = 32,
-            .component_mask = component_mask,
-            .type = nir_type_float32,
-            .newname = scan_io_var_slot(nir, nir_var_shader_in, slot, false).newname,
-         };
-         create_io_var(nir, &ris);
-         inputs_read &= ~BITFIELD64_BIT(slot);
-      }
-      /* the rest of the inputs can be generated normally */
-      loop_io_var_mask(nir, mode, false, false, inputs_read);
-      return;
-   }
-
-   /* these are the masks to process based on the mode: nothing "special" as above */
-   uint64_t mask = mode == nir_var_shader_in ? inputs_read : outputs_accessed;
-   uint64_t indirect_mask = mode == nir_var_shader_in ? inputs_read_indirectly : outputs_accessed_indirectly;
-   u_foreach_bit64(slot, mask) {
-      struct rework_io_state ris = {
-         .location = slot,
-         .mode = mode,
-         .stage = nir->info.stage,
-         .arrayed_io = (mode == nir_var_shader_in ? zs->arrayed_inputs : zs->arrayed_outputs) & BITFIELD64_BIT(slot),
-      };
-      /* explicitly handle builtins */
-      unsigned max_components = 0;
-      switch (slot) {
-      case VARYING_SLOT_FOGC:
-         /* use intr components */
-         break;
-      case VARYING_SLOT_POS:
-      case VARYING_SLOT_CLIP_VERTEX:
-      case VARYING_SLOT_PNTC:
-      case VARYING_SLOT_BOUNDING_BOX0:
-      case VARYING_SLOT_BOUNDING_BOX1:
-         max_components = 4;
-         ris.type = nir_type_float32;
-         break;
-      case VARYING_SLOT_CLIP_DIST0:
-         max_components = nir->info.clip_distance_array_size;
-         assert(max_components);
-         ris.type = nir_type_float32;
-         break;
-      case VARYING_SLOT_CULL_DIST0:
-         max_components = nir->info.cull_distance_array_size;
-         assert(max_components);
-         ris.type = nir_type_float32;
-         break;
-      case VARYING_SLOT_CLIP_DIST1:
-      case VARYING_SLOT_CULL_DIST1:
-         mask &= ~BITFIELD64_BIT(slot);
-         indirect_mask &= ~BITFIELD64_BIT(slot);
-         continue;
-      case VARYING_SLOT_TESS_LEVEL_OUTER:
-         max_components = 4;
-         ris.type = nir_type_float32;
-         break;
-      case VARYING_SLOT_TESS_LEVEL_INNER:
-         max_components = 2;
-         ris.type = nir_type_float32;
-         break;
-      case VARYING_SLOT_PRIMITIVE_ID:
-      case VARYING_SLOT_LAYER:
-      case VARYING_SLOT_VIEWPORT:
-      case VARYING_SLOT_FACE:
-      case VARYING_SLOT_VIEW_INDEX:
-      case VARYING_SLOT_VIEWPORT_MASK:
-         ris.type = nir_type_int32;
-         max_components = 1;
-         break;
-      case VARYING_SLOT_PSIZ:
-         max_components = 1;
-         ris.type = nir_type_float32;
-         break;
-      default:
-         break;
-      }
-      if (!max_components)
-         continue;
-      switch (slot) {
-      case VARYING_SLOT_TESS_LEVEL_INNER:
-         /* actually VARYING_SLOT_PRIMITIVE_INDICES */
-         if (nir->info.stage == MESA_SHADER_MESH) {
-            switch (nir->info.mesh.primitive_type) {
-            case MESA_PRIM_POINTS:
-               max_components = 1;
-               break;
-            case MESA_PRIM_LINES:
-               max_components = 2;
-               break;
-            default:
-               max_components = 3;
-               break;
-            }
-            ris.component_mask = BITFIELD_MASK(max_components);
-            ris.type = nir_type_int32;
-            break;
-         }
-         FALLTHROUGH;
-      case VARYING_SLOT_CLIP_DIST0:
-      case VARYING_SLOT_CLIP_DIST1:
-      case VARYING_SLOT_CULL_DIST0:
-      case VARYING_SLOT_CULL_DIST1:
-      case VARYING_SLOT_TESS_LEVEL_OUTER:
-         /* compact arrays */
-         ris.component_mask = 0x1;
-         ris.array_size = max_components;
-         break;
-      default:
-         ris.component_mask = BITFIELD_MASK(max_components);
-         break;
-      }
-      ris.bit_size = 32;
-      create_io_var(nir, &ris);
-      mask &= ~BITFIELD64_BIT(slot);
-      /* eliminate clip/cull distance scanning early */
-      indirect_mask &= ~BITFIELD64_BIT(slot);
-   }
-
-   /* patch i/o */
-   if ((nir->info.stage == MESA_SHADER_TESS_CTRL && mode == nir_var_shader_out) ||
-       (nir->info.stage == MESA_SHADER_TESS_EVAL && mode == nir_var_shader_in)) {
-      uint64_t patch_outputs_accessed = nir->info.patch_outputs_read | nir->info.patch_outputs_written;
-      uint64_t indirect_patch_mask =
-         mode == nir_var_shader_in ? nir->info.patch_inputs_read_indirectly
-                                   : (nir->info.patch_outputs_read_indirectly |
-                                      nir->info.patch_outputs_written_indirectly);
-      uint64_t patch_mask = mode == nir_var_shader_in ? nir->info.patch_inputs_read : patch_outputs_accessed;
-
-      loop_io_var_mask(nir, mode, true, true, indirect_patch_mask);
-      loop_io_var_mask(nir, mode, false, true, patch_mask);
-   }
-
-   /* regular i/o */
-   loop_io_var_mask(nir, mode, true, false, indirect_mask);
-   loop_io_var_mask(nir, mode, false, false, mask);
-}
-
 static int
 zink_type_size(const struct glsl_type *type, bool bindless)
 {
@ -5831,274 +5324,6 @@ fix_vertex_input_locations(nir_shader *nir)
   return nir_shader_intrinsics_pass(nir, fix_vertex_input_locations_instr, nir_metadata_all, NULL);
 }

-struct trivial_revectorize_state {
-   bool has_xfb;
-   uint32_t component_mask;
-   nir_intrinsic_instr *base;
-   nir_intrinsic_instr *next_emit_vertex;
-   nir_intrinsic_instr *merge[NIR_MAX_VEC_COMPONENTS];
-   struct set *deletions;
-};
-
-/* always skip xfb; scalarized xfb is preferred */
-static bool
-intr_has_xfb(nir_intrinsic_instr *intr)
-{
-   if (!nir_intrinsic_has_io_xfb(intr))
-      return false;
-   for (unsigned i = 0; i < 2; i++) {
-      if (nir_intrinsic_io_xfb(intr).out[i].num_components || nir_intrinsic_io_xfb2(intr).out[i].num_components) {
-         return true;
-      }
-   }
-   return false;
-}
-
-/* helper to avoid vectorizing i/o for different vertices */
-static nir_intrinsic_instr *
-find_next_emit_vertex(nir_intrinsic_instr *intr)
-{
-   bool found = false;
-   nir_foreach_instr_safe(instr, intr->instr.block) {
-      if (instr->type == nir_instr_type_intrinsic) {
-         nir_intrinsic_instr *test_intr = nir_instr_as_intrinsic(instr);
-         if (!found && test_intr != intr)
-            continue;
-         if (!found) {
-            assert(intr == test_intr);
-            found = true;
-            continue;
-         }
-         if (test_intr->intrinsic == nir_intrinsic_emit_vertex)
-            return test_intr;
-      }
-   }
-   return NULL;
-}
-
-/* scan for vectorizable instrs on a given location */
-static bool
-trivial_revectorize_intr_scan(nir_shader *nir, nir_intrinsic_instr *intr, struct trivial_revectorize_state *state)
-{
-   nir_intrinsic_instr *base = state->base;
-
-   if (intr == base)
-      return false;
-
-   if (intr->intrinsic != base->intrinsic)
-      return false;
-
-   if (_mesa_set_search(state->deletions, intr))
-      return false;
-
-   bool is_load = false;
-   bool is_input = false;
-   bool is_interp = false;
-   filter_io_instr(intr, &is_load, &is_input, &is_interp);
-
-   nir_io_semantics base_sem = nir_intrinsic_io_semantics(base);
-   nir_io_semantics test_sem = nir_intrinsic_io_semantics(intr);
-   nir_alu_type base_type = is_load ? nir_intrinsic_dest_type(base) : nir_intrinsic_src_type(base);
-   nir_alu_type test_type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr);
-   int c = nir_intrinsic_component(intr);
-   /* already detected */
-   if (state->component_mask & BITFIELD_BIT(c))
-      return false;
-   /* not a match */
-   if (base_sem.location != test_sem.location || base_sem.num_slots != test_sem.num_slots || base_type != test_type)
-      return false;
-   /* only vectorize when all srcs match */
-   for (unsigned i = !is_input; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++) {
-      if (!nir_srcs_equal(intr->src[i], base->src[i]))
-         return false;
-   }
-   /* never match xfb */
-   state->has_xfb |= intr_has_xfb(intr);
-   if (state->has_xfb)
-      return false;
-   if (nir->info.stage == MESA_SHADER_GEOMETRY) {
-      /* only match same vertex */
-      if (state->next_emit_vertex != find_next_emit_vertex(intr))
-         return false;
-   }
-   uint32_t mask = is_load ? BITFIELD_RANGE(c, intr->num_components) : (nir_intrinsic_write_mask(intr) << c);
-   state->component_mask |= mask;
-   u_foreach_bit(component, mask)
-      state->merge[component] = intr;
-
-   return true;
-}
-
-static bool
-trivial_revectorize_scan(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
-{
-   bool is_load = false;
-   bool is_input = false;
-   bool is_interp = false;
-   if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
-      return false;
-   if (intr->num_components != 1)
-      return false;
-   nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
-   if (!is_input || b->shader->info.stage != MESA_SHADER_VERTEX) {
-      /* always ignore compact arrays */
-      switch (sem.location) {
-      case VARYING_SLOT_CLIP_DIST0:
-      case VARYING_SLOT_CLIP_DIST1:
-      case VARYING_SLOT_CULL_DIST0:
-      case VARYING_SLOT_CULL_DIST1:
-      case VARYING_SLOT_TESS_LEVEL_INNER:
-      case VARYING_SLOT_TESS_LEVEL_OUTER:
-         return false;
-      default: break;
-      }
-   }
-   /* always ignore to-be-deleted instrs */
-   if (_mesa_set_search(data, intr))
-      return false;
-
-   /* never vectorize xfb */
-   if (intr_has_xfb(intr))
-      return false;
-
-   int ic = nir_intrinsic_component(intr);
-   uint32_t mask = is_load ? BITFIELD_RANGE(ic, intr->num_components) : (nir_intrinsic_write_mask(intr) << ic);
-   /* already vectorized */
-   if (util_bitcount(mask) == 4)
-      return false;
-   struct trivial_revectorize_state state = {
-      .component_mask = mask,
-      .base = intr,
-      /* avoid clobbering i/o for different vertices */
-      .next_emit_vertex = b->shader->info.stage == MESA_SHADER_GEOMETRY ? find_next_emit_vertex(intr) : NULL,
-      .deletions = data,
-   };
-   u_foreach_bit(bit, mask)
-      state.merge[bit] = intr;
-   bool progress = false;
-   nir_foreach_instr(instr, intr->instr.block) {
-      if (instr->type != nir_instr_type_intrinsic)
-         continue;
-      nir_intrinsic_instr *test_intr = nir_instr_as_intrinsic(instr);
-      /* no matching across vertex emission */
-      if (test_intr->intrinsic == nir_intrinsic_emit_vertex)
-         break;
-      progress |= trivial_revectorize_intr_scan(b->shader, test_intr, &state);
-   }
-   if (!progress || state.has_xfb)
-      return false;
-
-   /* verify nothing crazy happened */
-   assert(state.component_mask);
-   for (unsigned i = 0; i < 4; i++) {
-      assert(!state.merge[i] || !intr_has_xfb(state.merge[i]));
-   }
-
-   unsigned first_component = ffs(state.component_mask) - 1;
-   unsigned num_components = util_bitcount(state.component_mask);
-   unsigned num_contiguous = 0;
-   uint32_t contiguous_mask = 0;
-   for (unsigned i = 0; i < num_components; i++) {
-      unsigned c = i + first_component;
-      /* calc mask of contiguous components to vectorize */
-      if (state.component_mask & BITFIELD_BIT(c)) {
-         num_contiguous++;
-         contiguous_mask |= BITFIELD_BIT(c);
-      }
-      /* on the first gap or the the last component, vectorize */
-      if (!(state.component_mask & BITFIELD_BIT(c)) || i == num_components - 1) {
-         if (num_contiguous > 1) {
-            /* reindex to enable easy src/dest index comparison */
-            nir_index_ssa_defs(nir_shader_get_entrypoint(b->shader));
-            /* determine the first/last instr to use for the base (vectorized) load/store */
-            unsigned first_c = ffs(contiguous_mask) - 1;
-            nir_intrinsic_instr *base = NULL;
-            unsigned test_idx = is_load ? UINT32_MAX : 0;
-            for (unsigned j = 0; j < num_contiguous; j++) {
-               unsigned merge_c = j + first_c;
-               nir_intrinsic_instr *merge_intr = state.merge[merge_c];
-               /* avoid breaking ssa ordering by using:
-                * - first instr for vectorized load
-                * - last instr for vectorized store
-                * this guarantees all srcs have been seen
-                */
-               if ((is_load && merge_intr->def.index < test_idx) ||
-                   (!is_load && merge_intr->src[0].ssa->index >= test_idx)) {
-                  test_idx = is_load ? merge_intr->def.index : merge_intr->src[0].ssa->index;
-                  base = merge_intr;
-               }
-            }
-            assert(base);
-            /* update instr components */
-            nir_intrinsic_set_component(base, nir_intrinsic_component(state.merge[first_c]));
-            unsigned orig_components = base->num_components;
-            base->num_components = num_contiguous;
-            /* do rewrites after loads and before stores */
-            b->cursor = is_load ? nir_after_instr(&base->instr) : nir_before_instr(&base->instr);
-            if (is_load) {
-               base->def.num_components = num_contiguous;
-               /* iterate the contiguous loaded components and rewrite merged dests */
-               for (unsigned j = 0; j < num_contiguous; j++) {
-                  unsigned merge_c = j + first_c;
-                  nir_intrinsic_instr *merge_intr = state.merge[merge_c];
-                  /* detect if the merged instr loaded multiple components and use swizzle mask for rewrite */
-                  unsigned use_components = merge_intr == base ? orig_components : merge_intr->def.num_components;
-                  nir_def *swiz = nir_channels(b, &base->def, BITFIELD_RANGE(j, use_components));
-                  nir_def_rewrite_uses_after_instr(&merge_intr->def, swiz, merge_intr == base ? nir_def_instr(swiz) : &merge_intr->instr);
-                  j += use_components - 1;
-               }
-            } else {
-               nir_def *comp[NIR_MAX_VEC_COMPONENTS];
-               /* generate swizzled vec of store components and rewrite store src */
-               for (unsigned j = 0; j < num_contiguous; j++) {
-                  unsigned merge_c = j + first_c;
-                  nir_intrinsic_instr *merge_intr = state.merge[merge_c];
-                  /* detect if the merged instr stored multiple components and extract them for rewrite */
-                  unsigned use_components = merge_intr == base ? orig_components : merge_intr->num_components;
-                  for (unsigned k = 0; k < use_components; k++)
-                     comp[j + k] = nir_channel(b, merge_intr->src[0].ssa, k);
-                  j += use_components - 1;
-               }
-               nir_def *val = nir_vec(b, comp, num_contiguous);
-               nir_src_rewrite(&base->src[0], val);
-               nir_intrinsic_set_write_mask(base, BITFIELD_MASK(num_contiguous));
-            }
-            /* deleting instructions during a foreach explodes the compiler, so delete later */
-            for (unsigned j = 0; j < num_contiguous; j++) {
-               unsigned merge_c = j + first_c;
-               nir_intrinsic_instr *merge_intr = state.merge[merge_c];
-               if (merge_intr != base)
-                  _mesa_set_add(data, &merge_intr->instr);
-            }
-         }
-         contiguous_mask = 0;
-         num_contiguous = 0;
-      }
-   }
-
-   return true;
-}
-
-/* attempt to revectorize scalar i/o, ignoring xfb and "hard stuff" */
-static bool
-trivial_revectorize(nir_shader *nir)
-{
-   struct set deletions;
-
-   if (nir->info.stage > MESA_SHADER_FRAGMENT)
-      return false;
-
-   _mesa_set_init(&deletions, NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
-   bool progress = nir_shader_intrinsics_pass(nir, trivial_revectorize_scan, nir_metadata_control_flow, &deletions);
-   /* now it's safe to delete */
-   set_foreach_remove(&deletions, entry) {
-      nir_instr *instr = (void*)entry->key;
-      nir_instr_remove(instr);
-   }
-   _mesa_set_fini(&deletions, NULL);
-   return progress;
-}
-
 static bool
 flatten_image_arrays_intr(struct nir_builder *b, nir_instr *instr, void *data)
 {