From f7002802c378a9eb0dfd74150495fff7f75e6c60 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Fri, 9 May 2025 11:26:05 +0200 Subject: [PATCH] ir3/legalize: normalize nop state at block start Now that we have the block's final cycle value available in its state, we don't have to subtract it at the end of a block anymore, but we can do it at the beginning when merging it into its successor state. This will save us one iteration over all its ready slots. Signed-off-by: Job Noorman Part-of: --- src/freedreno/ir3/ir3_legalize.c | 35 ++++++++++---------------------- 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index ba7207c656e..d7776b646e2 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -370,19 +370,24 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) state->needs_ss_for_const |= pstate->needs_ss_for_const; state->needs_sy_for_const |= pstate->needs_sy_for_const; - /* Our nop state is the max of the predecessor blocks */ + /* Our nop state is the max of the predecessor blocks. The predecessor nop + * state contains the cycle offset from the start of its block when each + * register becomes ready. But successor blocks need the cycle offset from + * their start, which is the predecessor's block's end. Translate the + * cycle offset. + */ for (unsigned i = 0; i < ARRAY_SIZE(state->pred_ready); i++) state->pred_ready[i] = MAX2(state->pred_ready[i], - pstate->pred_ready[i]); + MAX2(pstate->pred_ready[i], pstate->cycle) - pstate->cycle); for (unsigned i = 0; i < ARRAY_SIZE(state->alu_nop.full_ready); i++) { state->alu_nop.full_ready[i] = MAX2(state->alu_nop.full_ready[i], - pstate->alu_nop.full_ready[i]); + MAX2(pstate->alu_nop.full_ready[i], pstate->cycle) - pstate->cycle); state->alu_nop.half_ready[i] = MAX2(state->alu_nop.half_ready[i], - pstate->alu_nop.half_ready[i]); + MAX2(pstate->alu_nop.half_ready[i], pstate->cycle) - pstate->cycle); state->non_alu_nop.full_ready[i] = MAX2(state->non_alu_nop.full_ready[i], - pstate->non_alu_nop.full_ready[i]); + MAX2(pstate->non_alu_nop.full_ready[i], pstate->cycle) - pstate->cycle); state->non_alu_nop.half_ready[i] = MAX2(state->non_alu_nop.half_ready[i], - pstate->non_alu_nop.half_ready[i]); + MAX2(pstate->non_alu_nop.half_ready[i], pstate->cycle) - pstate->cycle); } } @@ -786,24 +791,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) list_add(&baryf->node, &block->instr_list); } - /* Currently our nop state contains the cycle offset from the start of this - * block when each register becomes ready. But successor blocks need the - * cycle offset from their start, which is this block's end. Translate the - * cycle offset. - */ - for (unsigned i = 0; i < ARRAY_SIZE(state->pred_ready); i++) - state->pred_ready[i] = MAX2(state->pred_ready[i], state->cycle) - state->cycle; - for (unsigned i = 0; i < ARRAY_SIZE(state->alu_nop.full_ready); i++) { - state->alu_nop.full_ready[i] = - MAX2(state->alu_nop.full_ready[i], state->cycle) - state->cycle; - state->alu_nop.half_ready[i] = - MAX2(state->alu_nop.half_ready[i], state->cycle) - state->cycle; - state->non_alu_nop.full_ready[i] = - MAX2(state->non_alu_nop.full_ready[i], state->cycle) - state->cycle; - state->non_alu_nop.half_ready[i] = - MAX2(state->non_alu_nop.half_ready[i], state->cycle) - state->cycle; - } - bd->valid = true; if (memcmp(&prev_state, state, sizeof(*state))) {