diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp
index da3f0cacd81..f3e2ec628ac 100644
--- a/src/amd/compiler/aco_scheduler.cpp
+++ b/src/amd/compiler/aco_scheduler.cpp
@@ -113,6 +113,7 @@ struct sched_ctx {
    int16_t occupancy_factor;
    int16_t last_SMEM_stall;
    int last_SMEM_dep_idx;
+   int last_VMEM_store_idx;
    MoveState mv;
    bool schedule_pos_exports = true;
    unsigned schedule_pos_export_div = 1;
@@ -1178,9 +1179,15 @@ schedule_position_export(sched_ctx& ctx, Block* block, Instruction* current, int
    }
 }
 
-unsigned
+void
 schedule_VMEM_store(sched_ctx& ctx, Block* block, Instruction* current, int idx)
 {
+   int max_distance = ctx.last_VMEM_store_idx + VMEM_STORE_CLAUSE_MAX_GRAB_DIST;
+   ctx.last_VMEM_store_idx = idx;
+
+   if (max_distance < idx)
+      return;
+
    hazard_query hq;
    init_hazard_query(ctx, &hq);
 
@@ -1191,31 +1198,30 @@ schedule_VMEM_store(sched_ctx& ctx, Block* block, Instruction* current, int idx)
       if (candidate->opcode == aco_opcode::p_logical_start)
          break;
 
-      if (!should_form_clause(current, candidate.get())) {
-         add_to_hazard_query(&hq, candidate.get());
-         ctx.mv.downwards_skip(cursor);
-         k += get_likely_cost(candidate.get());
-         continue;
+      if (should_form_clause(current, candidate.get())) {
+         if (perform_hazard_query(&hq, candidate.get(), false) == hazard_success)
+            ctx.mv.downwards_move_clause(cursor);
+         break;
       }
 
-      if (perform_hazard_query(&hq, candidate.get(), false) != hazard_success)
+      if (candidate->isVMEM() || candidate->isFlatLike())
          break;
-      if (ctx.mv.downwards_move_clause(cursor) != move_success)
-         break;
-   }
 
-   return cursor.insert_idx - cursor.insert_idx_clause - 1;
+      add_to_hazard_query(&hq, candidate.get());
+      ctx.mv.downwards_skip(cursor);
+      k += get_likely_cost(candidate.get());
+   }
 }
 
 void
 schedule_block(sched_ctx& ctx, Program* program, Block* block)
 {
    ctx.last_SMEM_dep_idx = 0;
+   ctx.last_VMEM_store_idx = INT_MAX;
    ctx.last_SMEM_stall = INT16_MIN;
    ctx.mv.block = block;
 
    /* go through all instructions and find memory loads */
-   unsigned num_stores = 0;
    for (unsigned idx = 0; idx < block->instructions.size(); idx++) {
       Instruction* current = block->instructions[idx].get();
 
@@ -1231,7 +1237,10 @@ schedule_block(sched_ctx& ctx, Program* program, Block* block)
       }
 
       if (current->definitions.empty()) {
-         num_stores += current->isVMEM() || current->isFlatLike() ? 1 : 0;
+         if ((current->isVMEM() || current->isFlatLike()) && program->gfx_level >= GFX11) {
+            ctx.mv.current = current;
+            schedule_VMEM_store(ctx, block, current, idx);
+         }
          continue;
       }
 
@@ -1251,18 +1260,6 @@ schedule_block(sched_ctx& ctx, Program* program, Block* block)
       }
    }
 
-   /* GFX11 benefits from creating VMEM store clauses. */
-   if (num_stores > 1 && program->gfx_level >= GFX11) {
-      for (int idx = block->instructions.size() - 1; idx >= 0; idx--) {
-         Instruction* current = block->instructions[idx].get();
-         if (!current->definitions.empty() || !(current->isVMEM() || current->isFlatLike()))
-            continue;
-
-         ctx.mv.current = current;
-         idx -= schedule_VMEM_store(ctx, block, current, idx);
-      }
-   }
-
    /* resummarize the block's register demand */
    block->register_demand = block->live_in_demand;
    for (const aco_ptr<Instruction>& instr : block->instructions)