diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c
index 21d1e16e78e..ba7207c656e 100644
--- a/src/freedreno/ir3/ir3_legalize.c
+++ b/src/freedreno/ir3/ir3_legalize.c
@@ -75,6 +75,8 @@ struct ir3_legalize_state {
    /* When p0.x-w, a0.x, and a1.x are ready. */
    unsigned pred_ready[4];
    unsigned addr_ready[2];
+
+   unsigned cycle;
 };
 
 struct ir3_legalize_block_data {
@@ -188,8 +190,7 @@ get_ready_slot(struct ir3_legalize_state *state,
 static unsigned
 delay_calc(struct ir3_legalize_ctx *ctx,
            struct ir3_legalize_state *state,
-           struct ir3_instruction *instr,
-           unsigned cycle)
+           struct ir3_instruction *instr)
 {
    /* As far as we know, shader outputs don't need any delay. */
    if (instr->opc == OPC_END || instr->opc == OPC_CHMASK)
@@ -202,7 +203,8 @@ delay_calc(struct ir3_legalize_ctx *ctx,
 
       unsigned elems = post_ra_reg_elems(src);
       unsigned num = post_ra_reg_num(src);
-      unsigned src_cycle = cycle + ir3_src_read_delay(ctx->compiler, instr, n);
+      unsigned src_cycle =
+         state->cycle + ir3_src_read_delay(ctx->compiler, instr, n);
 
       for (unsigned elem = 0; elem < elems; elem++, num++) {
          unsigned ready_cycle =
@@ -224,7 +226,6 @@ static void
 delay_update(struct ir3_legalize_ctx *ctx,
              struct ir3_legalize_state *state,
              struct ir3_instruction *instr,
-             unsigned cycle,
              bool mergedregs)
 {
    if (writes_addr1(instr) && instr->block->in_early_preamble)
@@ -236,7 +237,7 @@ delay_update(struct ir3_legalize_ctx *ctx,
 
       unsigned elems = post_ra_reg_elems(dst);
       unsigned num = post_ra_reg_num(dst);
-      unsigned dst_cycle = cycle;
+      unsigned dst_cycle = state->cycle;
 
       /* sct and swz have scalar destinations and each destination is written in
        * a subsequent cycle.
@@ -432,7 +433,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
    list_replace(&block->instr_list, &instr_list);
    list_inithead(&block->instr_list);
 
-   unsigned cycle = 0;
+   state->cycle = 0;
 
    foreach_instr_safe (n, &instr_list) {
       unsigned i;
@@ -566,10 +567,10 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
          nop->flags |= IR3_INSTR_SS;
          n->flags &= ~IR3_INSTR_SS;
          last_n = nop;
-         cycle++;
+         state->cycle++;
       }
 
-      unsigned delay = delay_calc(ctx, state, n, cycle);
+      unsigned delay = delay_calc(ctx, state, n);
 
       /* NOTE: I think the nopN encoding works for a5xx and
        * probably a4xx, but not a3xx.  So far only tested on
@@ -584,7 +585,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
          unsigned transfer = MIN2(delay, 3 - last_n->nop);
          last_n->nop += transfer;
          delay -= transfer;
-         cycle += transfer;
+         state->cycle += transfer;
       }
 
       if ((delay > 0) && last_n && (last_n->opc == OPC_NOP)) {
@@ -592,13 +593,13 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
          unsigned transfer = MIN2(delay, 5 - last_n->repeat);
          last_n->repeat += transfer;
          delay -= transfer;
-         cycle += transfer;
+         state->cycle += transfer;
       }
 
       if (delay > 0) {
          assert(delay <= 6);
          ir3_NOP(&build)->repeat = delay - 1;
-         cycle += delay;
+         state->cycle += delay;
       }
 
       if (ctx->compiler->samgq_workaround &&
@@ -721,12 +722,12 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
 
       bool count = count_instruction(n, ctx->compiler);
       if (count)
-         cycle += 1;
+         state->cycle += 1;
 
-      delay_update(ctx, state, n, cycle, mergedregs);
+      delay_update(ctx, state, n, mergedregs);
 
       if (count)
-         cycle += n->repeat + n->nop;
+         state->cycle += n->repeat + n->nop;
 
       if (ctx->early_input_release && is_input(n)) {
          last_input_needs_ss |= (n->opc == OPC_LDLV);
@@ -791,16 +792,16 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
     * cycle offset.
     */
    for (unsigned i = 0; i < ARRAY_SIZE(state->pred_ready); i++)
-      state->pred_ready[i] = MAX2(state->pred_ready[i], cycle) - cycle;
+      state->pred_ready[i] = MAX2(state->pred_ready[i], state->cycle) - state->cycle;
    for (unsigned i = 0; i < ARRAY_SIZE(state->alu_nop.full_ready); i++) {
       state->alu_nop.full_ready[i] =
-         MAX2(state->alu_nop.full_ready[i], cycle) - cycle;
+         MAX2(state->alu_nop.full_ready[i], state->cycle) - state->cycle;
       state->alu_nop.half_ready[i] =
-         MAX2(state->alu_nop.half_ready[i], cycle) - cycle;
+         MAX2(state->alu_nop.half_ready[i], state->cycle) - state->cycle;
       state->non_alu_nop.full_ready[i] =
-         MAX2(state->non_alu_nop.full_ready[i], cycle) - cycle;
+         MAX2(state->non_alu_nop.full_ready[i], state->cycle) - state->cycle;
       state->non_alu_nop.half_ready[i] =
-         MAX2(state->non_alu_nop.half_ready[i], cycle) - cycle;
+         MAX2(state->non_alu_nop.half_ready[i], state->cycle) - state->cycle;
    }
 
    bd->valid = true;