diff --git a/src/gallium/drivers/lima/ir/gp/gpir.h b/src/gallium/drivers/lima/ir/gp/gpir.h
index de571c3c1c4..e7707814b7c 100644
--- a/src/gallium/drivers/lima/ir/gp/gpir.h
+++ b/src/gallium/drivers/lima/ir/gp/gpir.h
@@ -120,6 +120,7 @@ typedef struct {
    int *slots;
    gpir_node_type type;
    bool spillless;
+   bool schedule_first;
    bool may_consume_two_slots;
 } gpir_op_info;
 
@@ -299,14 +300,20 @@ typedef struct gpir_instr {
     *
     * (1) alu_num_slot_free >= alu_num_slot_needed_by_store +
     *       alu_num_slot_needed_by_max +
-    *       alu_num_slot_needed_by_next_max
+    *       max(alu_num_unscheduled_next_max - alu_max_allowed_next_max, 0)
     * (2) alu_non_cplx_slot_free >= alu_num_slot_needed_by_max +
     *       alu_num_slot_neede_by_non_cplx_store
+    *
+    * alu_max_allowed_next_max is normally 5 (since there can be at most 5 max
+    * nodes for the next instruction) but when there is a complex1 node in
+    * this instruction it reduces to 4 to reserve a slot for complex2 in the
+    * next instruction.
     */
    int alu_num_slot_needed_by_store;
    int alu_num_slot_needed_by_non_cplx_store;
    int alu_num_slot_needed_by_max;
-   int alu_num_slot_needed_by_next_max;
+   int alu_num_unscheduled_next_max;
+   int alu_max_allowed_next_max;
 
    /* Used to communicate to the scheduler how many slots need to be cleared
     * up in order to satisfy the invariants.
diff --git a/src/gallium/drivers/lima/ir/gp/instr.c b/src/gallium/drivers/lima/ir/gp/instr.c
index e07a2c9b7c2..45e9d817143 100644
--- a/src/gallium/drivers/lima/ir/gp/instr.c
+++ b/src/gallium/drivers/lima/ir/gp/instr.c
@@ -37,6 +37,7 @@ gpir_instr *gpir_instr_create(gpir_block *block)
    instr->index = block->sched.instr_index++;
    instr->alu_num_slot_free = 6;
    instr->alu_non_cplx_slot_free = 5;
+   instr->alu_max_allowed_next_max = 5;
 
    list_add(&instr->list, &block->instr_list);
    return instr;
@@ -96,6 +97,8 @@ static bool gpir_instr_insert_alu_check(gpir_instr *instr, gpir_node *node)
    int non_cplx_store_reduce_slot = 0;
    int max_reduce_slot = node->sched.max_node ? 1 : 0;
    int next_max_reduce_slot = node->sched.next_max_node ? 1 : 0;
+   int alu_new_max_allowed_next_max =
+      node->op == gpir_op_complex1 ? 4 : instr->alu_max_allowed_next_max;
 
    /* check if this node is child of one store node.
     * complex1 won't be any of this instr's store node's child,
@@ -117,7 +120,8 @@ static bool gpir_instr_insert_alu_check(gpir_instr *instr, gpir_node *node)
    int slot_difference = 
        instr->alu_num_slot_needed_by_store - store_reduce_slot +
        instr->alu_num_slot_needed_by_max - max_reduce_slot +
-       MAX2(instr->alu_num_slot_needed_by_next_max - next_max_reduce_slot, 0) -
+       MAX2(instr->alu_num_unscheduled_next_max - next_max_reduce_slot -
+            alu_new_max_allowed_next_max, 0) -
       (instr->alu_num_slot_free - consume_slot);
    if (slot_difference > 0) {
       gpir_debug("failed %d because of alu slot\n", node->index);
@@ -141,7 +145,8 @@ static bool gpir_instr_insert_alu_check(gpir_instr *instr, gpir_node *node)
    instr->alu_num_slot_needed_by_store -= store_reduce_slot;
    instr->alu_num_slot_needed_by_non_cplx_store -= non_cplx_store_reduce_slot;
    instr->alu_num_slot_needed_by_max -= max_reduce_slot;
-   instr->alu_num_slot_needed_by_next_max -= next_max_reduce_slot;
+   instr->alu_num_unscheduled_next_max -= next_max_reduce_slot;
+   instr->alu_max_allowed_next_max = alu_new_max_allowed_next_max;
    return true;
 }
 
@@ -165,7 +170,9 @@ static void gpir_instr_remove_alu(gpir_instr *instr, gpir_node *node)
    if (node->sched.max_node)
       instr->alu_num_slot_needed_by_max++;
    if (node->sched.next_max_node)
-      instr->alu_num_slot_needed_by_next_max++;
+      instr->alu_num_unscheduled_next_max++;
+   if (node->op == gpir_op_complex1)
+      instr->alu_max_allowed_next_max = 5;
 }
 
 static bool gpir_instr_insert_reg0_check(gpir_instr *instr, gpir_node *node)
@@ -312,7 +319,7 @@ static bool gpir_instr_insert_store_check(gpir_instr *instr, gpir_node *node)
     */
    int slot_difference = instr->alu_num_slot_needed_by_store + 1
       + instr->alu_num_slot_needed_by_max +
-      MAX2(instr->alu_num_slot_needed_by_next_max, 0) -
+      MAX2(instr->alu_num_unscheduled_next_max - instr->alu_max_allowed_next_max, 0) -
       instr->alu_num_slot_free;
    if (slot_difference > 0) {
       instr->slot_difference = slot_difference;
diff --git a/src/gallium/drivers/lima/ir/gp/node.c b/src/gallium/drivers/lima/ir/gp/node.c
index decda5f1246..a8706627f38 100644
--- a/src/gallium/drivers/lima/ir/gp/node.c
+++ b/src/gallium/drivers/lima/ir/gp/node.c
@@ -58,6 +58,7 @@ const gpir_op_info gpir_op_infos[] = {
       .name = "complex2",
       .slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },
       .spillless = true,
+      .schedule_first = true,
    },
    [gpir_op_add] = {
       .name = "add",
@@ -154,11 +155,13 @@ const gpir_op_info gpir_op_infos[] = {
       .name = "rcp_impl",
       .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
       .spillless = true,
+      .schedule_first = true,
    },
    [gpir_op_rsqrt_impl] = {
       .name = "rsqrt_impl",
       .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
       .spillless = true,
+      .schedule_first = true,
    },
    [gpir_op_load_uniform] = {
       .name = "ld_uni",
diff --git a/src/gallium/drivers/lima/ir/gp/scheduler.c b/src/gallium/drivers/lima/ir/gp/scheduler.c
index 35925a1af51..f06089b7992 100644
--- a/src/gallium/drivers/lima/ir/gp/scheduler.c
+++ b/src/gallium/drivers/lima/ir/gp/scheduler.c
@@ -441,7 +441,8 @@ static void schedule_insert_ready_list(sched_ctx *ctx,
 
    struct list_head *insert_pos = &ctx->ready_list;
    list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
-      if (insert_node->sched.dist > node->sched.dist) {
+      if (insert_node->sched.dist > node->sched.dist ||
+          gpir_op_infos[insert_node->op].schedule_first) {
          insert_pos = &node->list;
          break;
       }
@@ -916,7 +917,7 @@ static void spill_node(sched_ctx *ctx, gpir_node *node, gpir_store_node *store)
       }
       if (node->sched.next_max_node) {
          node->sched.next_max_node = false;
-         ctx->instr->alu_num_slot_needed_by_next_max--;
+         ctx->instr->alu_num_unscheduled_next_max--;
       }
    }
 }
@@ -1153,7 +1154,7 @@ static bool can_use_complex(gpir_node *node)
 
 static void sched_find_max_nodes(sched_ctx *ctx)
 {
-   ctx->instr->alu_num_slot_needed_by_next_max = -5;
+   ctx->instr->alu_num_unscheduled_next_max = 0;
    ctx->instr->alu_num_slot_needed_by_max = 0;
 
    list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
@@ -1169,7 +1170,7 @@ static void sched_find_max_nodes(sched_ctx *ctx)
       if (node->sched.max_node)
          ctx->instr->alu_num_slot_needed_by_max++;
       if (node->sched.next_max_node)
-         ctx->instr->alu_num_slot_needed_by_next_max++;
+         ctx->instr->alu_num_unscheduled_next_max++;
    }
 }
 
@@ -1179,9 +1180,10 @@ static void sched_find_max_nodes(sched_ctx *ctx)
 static void verify_max_nodes(sched_ctx *ctx)
 {
    int alu_num_slot_needed_by_max = 0;
-   int alu_num_slot_needed_by_next_max = -5;
+   int alu_num_unscheduled_next_max = 0;
    int alu_num_slot_needed_by_store = 0;
    int alu_num_slot_needed_by_non_cplx_store = 0;
+   int alu_max_allowed_next_max = 5;
 
    list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
       if (!gpir_is_input_node(node))
@@ -1190,7 +1192,7 @@ static void verify_max_nodes(sched_ctx *ctx)
       if (node->sched.max_node)
          alu_num_slot_needed_by_max++;
       if (node->sched.next_max_node)
-         alu_num_slot_needed_by_next_max++;
+         alu_num_unscheduled_next_max++;
       if (used_by_store(node, ctx->instr)) {
          alu_num_slot_needed_by_store++;
          if (node->sched.next_max_node && !node->sched.complex_allowed)
@@ -1198,12 +1200,17 @@ static void verify_max_nodes(sched_ctx *ctx)
       }
    }
 
+   if (ctx->instr->slots[GPIR_INSTR_SLOT_MUL0] &&
+       ctx->instr->slots[GPIR_INSTR_SLOT_MUL0]->op == gpir_op_complex1)
+      alu_max_allowed_next_max = 4;
+
    assert(ctx->instr->alu_num_slot_needed_by_max == alu_num_slot_needed_by_max);
-   assert(ctx->instr->alu_num_slot_needed_by_next_max == alu_num_slot_needed_by_next_max);
+   assert(ctx->instr->alu_num_unscheduled_next_max == alu_num_unscheduled_next_max);
+   assert(ctx->instr->alu_max_allowed_next_max == alu_max_allowed_next_max);
    assert(ctx->instr->alu_num_slot_needed_by_store == alu_num_slot_needed_by_store);
    assert(ctx->instr->alu_num_slot_needed_by_non_cplx_store ==
           alu_num_slot_needed_by_non_cplx_store);
-   assert(ctx->instr->alu_num_slot_free >= alu_num_slot_needed_by_store + alu_num_slot_needed_by_max + MAX2(alu_num_slot_needed_by_next_max, 0));
+   assert(ctx->instr->alu_num_slot_free >= alu_num_slot_needed_by_store + alu_num_slot_needed_by_max + MAX2(alu_num_unscheduled_next_max - alu_max_allowed_next_max, 0));
    assert(ctx->instr->alu_non_cplx_slot_free >= alu_num_slot_needed_by_max + alu_num_slot_needed_by_non_cplx_store);
 }
 
@@ -1237,6 +1244,13 @@ static bool try_node(sched_ctx *ctx)
             score = schedule_try_node(ctx, node, true);
          }
 
+         /* schedule_first nodes must be scheduled if possible */
+         if (gpir_op_infos[node->op].schedule_first && score != INT_MIN) {
+            best_node = node;
+            best_score = score;
+            break;
+         }
+
          if (score > best_score) {
             best_score = score;
             best_node = node;
@@ -1382,7 +1396,8 @@ static bool sched_move(sched_ctx *ctx)
     * need to insert the move.
     */
 
-   if (ctx->instr->alu_num_slot_needed_by_next_max > 0) {
+   if (ctx->instr->alu_num_unscheduled_next_max >
+       ctx->instr->alu_max_allowed_next_max) {
       list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
          if (!can_place_move(ctx, node))
             continue;