diff --git a/src/compiler/nir/nir_opt_load_store_vectorize.c b/src/compiler/nir/nir_opt_load_store_vectorize.c
index 6b75446f5d9..fd402b776d0 100644
--- a/src/compiler/nir/nir_opt_load_store_vectorize.c
+++ b/src/compiler/nir/nir_opt_load_store_vectorize.c
@@ -892,6 +892,31 @@ subtract_deref(nir_builder *b, nir_deref_instr *deref, int64_t offset)
       b, deref, nir_imm_intN_t(b, -offset, deref->def.bit_size));
 }
 
+static void
+hoist_base_addr(nir_instr *instr, nir_instr *to_hoist)
+{
+   /* Return if this instruction already dominates the first load. */
+   if (to_hoist->block != instr->block || to_hoist->index <= instr->index)
+      return;
+
+   /* Only the offset calculation (consisting of ALU and load_const)
+    * differs between the vectorized loads.
+    */
+   assert(to_hoist->type == nir_instr_type_load_const ||
+          to_hoist->type == nir_instr_type_alu);
+
+   if (to_hoist->type == nir_instr_type_alu) {
+      /* For ALU, recursively hoist the sources. */
+      nir_alu_instr *alu = nir_instr_as_alu(to_hoist);
+      for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++)
+         hoist_base_addr(instr, alu->src[i].src.ssa->parent_instr);
+   }
+
+   nir_instr_move(nir_before_instr(instr), to_hoist);
+   to_hoist->index = instr->index;
+   return;
+}
+
 static void
 vectorize_loads(nir_builder *b, struct vectorize_ctx *ctx,
                 struct entry *low, struct entry *high,
@@ -958,21 +983,16 @@ vectorize_loads(nir_builder *b, struct vectorize_ctx *ctx,
 
    /* update the offset */
    if (first != low && info->base_src >= 0) {
-      /* let nir_opt_algebraic() remove this addition. this doesn't have much
-       * issues with subtracting 16 from expressions like "(i + 1) * 16" because
-       * nir_opt_algebraic() turns them into "i * 16 + 16" */
       b->cursor = nir_before_instr(first->instr);
 
+      /* Hoist low base addr before first intrinsic. */
+      nir_def *base = low->intrin->src[info->base_src].ssa;
+      hoist_base_addr(first->instr, base->parent_instr);
+      nir_src_rewrite(&first->intrin->src[info->base_src], base);
+
       if (nir_intrinsic_has_offset_shift(first->intrin)) {
-         nir_add_io_offset(b, first->intrin, -(int)(high_start / 8u));
-      } else {
-         /* TODO once all intrinsics that need a scaled offset use
-          * offset_shift, this old path can be removed.
-          */
-         nir_def *new_base = first->intrin->src[info->base_src].ssa;
-         new_base = nir_iadd_imm(
-            b, new_base, -(int)(high_start / 8u / get_offset_scale(first)));
-         nir_src_rewrite(&first->intrin->src[info->base_src], new_base);
+         nir_intrinsic_set_offset_shift(first->intrin,
+                                        nir_intrinsic_offset_shift(low->intrin));
       }
    }
 
@@ -1013,7 +1033,7 @@ vectorize_loads(nir_builder *b, struct vectorize_ctx *ctx,
 
       nir_intrinsic_set_range_base(first->intrin, old_low_range_base);
       nir_intrinsic_set_range(first->intrin, range);
-   } else if (nir_intrinsic_has_base(first->intrin) && info->base_src == -1 && info->deref_src == -1) {
+   } else if (nir_intrinsic_has_base(first->intrin) && info->deref_src == -1) {
       nir_intrinsic_set_base(first->intrin, nir_intrinsic_base(low->intrin));
    }
 
@@ -1737,6 +1757,8 @@ process_block(nir_function_impl *impl, struct vectorize_ctx *ctx, nir_block *blo
    unsigned next_index = 0;
 
    nir_foreach_instr_safe(instr, block) {
+      instr->index = next_index++;
+
       if (handle_barrier(ctx, &progress, impl, instr))
          continue;
 
@@ -1760,7 +1782,7 @@ process_block(nir_function_impl *impl, struct vectorize_ctx *ctx, nir_block *blo
 
       /* create entry */
       struct entry *entry = create_entry(ctx, ctx, info, intrin);
-      entry->index = next_index++;
+      entry->index = next_index;
 
       list_addtail(&entry->head, &ctx->entries[mode_index]);