diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 2bcaa274fac..b11ec10b011 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -2118,6 +2118,8 @@ struct log_stream; void ir3_print_instr_stream(struct log_stream *stream, struct ir3_instruction *instr); /* delay calculation: */ +unsigned ir3_src_read_delay(struct ir3_compiler *compiler, + struct ir3_instruction *instr, unsigned src_n); int ir3_delayslots(struct ir3_compiler *compiler, struct ir3_instruction *assigner, struct ir3_instruction *consumer, unsigned n, bool soft); diff --git a/src/freedreno/ir3/ir3_delay.c b/src/freedreno/ir3/ir3_delay.c index caae8ac43a0..f4370fd23fd 100644 --- a/src/freedreno/ir3/ir3_delay.c +++ b/src/freedreno/ir3/ir3_delay.c @@ -23,6 +23,28 @@ * src iterators work. */ +/* Return the number of cycles from the start of the instruction until src_n is + * read. + */ +unsigned +ir3_src_read_delay(struct ir3_compiler *compiler, struct ir3_instruction *instr, + unsigned src_n) +{ + /* gat and swz have scalar sources and each source is read in a subsequent + * cycle. + */ + if (instr->opc == OPC_GAT || instr->opc == OPC_SWZ) { + return src_n; + } + + /* cat3 instructions consume their last source one or two cycles later. */ + if ((is_mad(instr->opc) || is_madsh(instr->opc)) && src_n == 2) { + return 2; + } + + return 0; +} + /* calculate required # of delay slots between the instruction that * assigns a value and the one that consumes */ @@ -85,12 +107,7 @@ ir3_delayslots(struct ir3_compiler *compiler, bool mismatched_half = (assigner->dsts[0]->flags & IR3_REG_HALF) != (consumer->srcs[n]->flags & IR3_REG_HALF); unsigned penalty = mismatched_half ? 3 : 0; - if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) && (n == 2)) { - /* special case, 3rd src to cat3 not required on first cycle */ - return 1 + penalty; - } else { - return 3 + penalty; - } + return 3 + penalty - ir3_src_read_delay(compiler, consumer, n); } } diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index c2c5dd42844..202bfac171c 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -178,7 +178,8 @@ get_ready_slot(struct ir3_legalize_state *state, } static unsigned -delay_calc(struct ir3_legalize_state *state, +delay_calc(struct ir3_legalize_ctx *ctx, + struct ir3_legalize_state *state, struct ir3_instruction *instr, unsigned cycle) { @@ -193,19 +194,7 @@ delay_calc(struct ir3_legalize_state *state, unsigned elems = post_ra_reg_elems(src); unsigned num = post_ra_reg_num(src); - unsigned src_cycle = cycle; - - /* gat and swz have scalar sources and each source is read in a - * subsequent cycle. - */ - if (instr->opc == OPC_GAT || instr->opc == OPC_SWZ) - src_cycle += n; - - /* cat3 instructions consume their last source two cycles later, so they - * only need a delay of 1. - */ - if ((is_mad(instr->opc) || is_madsh(instr->opc)) && n == 2) - src_cycle += 2; + unsigned src_cycle = cycle + ir3_src_read_delay(ctx->compiler, instr, n); for (unsigned elem = 0; elem < elems; elem++, num++) { unsigned ready_cycle = @@ -560,7 +549,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) cycle++; } - unsigned delay = delay_calc(state, n, cycle); + unsigned delay = delay_calc(ctx, state, n, cycle); /* NOTE: I think the nopN encoding works for a5xx and * probably a4xx, but not a3xx. So far only tested on