mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-30 23:00:11 +01:00
ir3: add helper to calculate src read delay
cat3 instructions read their 3rd src later than their first two srcs. This was implemented in two different places: once for scheduling and once for legalization. Extract this logic in a new helper and also add similar logic for gat/swz there (which the scheduling logic failed to account for). Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33183>
This commit is contained in:
parent
e7ac1094f6
commit
2c7c62dfd9
3 changed files with 29 additions and 21 deletions
|
|
@ -2118,6 +2118,8 @@ struct log_stream;
|
|||
void ir3_print_instr_stream(struct log_stream *stream, struct ir3_instruction *instr);
|
||||
|
||||
/* delay calculation: */
|
||||
unsigned ir3_src_read_delay(struct ir3_compiler *compiler,
|
||||
struct ir3_instruction *instr, unsigned src_n);
|
||||
int ir3_delayslots(struct ir3_compiler *compiler,
|
||||
struct ir3_instruction *assigner,
|
||||
struct ir3_instruction *consumer, unsigned n, bool soft);
|
||||
|
|
|
|||
|
|
@ -23,6 +23,28 @@
|
|||
* src iterators work.
|
||||
*/
|
||||
|
||||
/* Return the number of cycles from the start of the instruction until src_n is
|
||||
* read.
|
||||
*/
|
||||
unsigned
|
||||
ir3_src_read_delay(struct ir3_compiler *compiler, struct ir3_instruction *instr,
|
||||
unsigned src_n)
|
||||
{
|
||||
/* gat and swz have scalar sources and each source is read in a subsequent
|
||||
* cycle.
|
||||
*/
|
||||
if (instr->opc == OPC_GAT || instr->opc == OPC_SWZ) {
|
||||
return src_n;
|
||||
}
|
||||
|
||||
/* cat3 instructions consume their last source one or two cycles later. */
|
||||
if ((is_mad(instr->opc) || is_madsh(instr->opc)) && src_n == 2) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* calculate required # of delay slots between the instruction that
|
||||
* assigns a value and the one that consumes
|
||||
*/
|
||||
|
|
@ -85,12 +107,7 @@ ir3_delayslots(struct ir3_compiler *compiler,
|
|||
bool mismatched_half = (assigner->dsts[0]->flags & IR3_REG_HALF) !=
|
||||
(consumer->srcs[n]->flags & IR3_REG_HALF);
|
||||
unsigned penalty = mismatched_half ? 3 : 0;
|
||||
if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) && (n == 2)) {
|
||||
/* special case, 3rd src to cat3 not required on first cycle */
|
||||
return 1 + penalty;
|
||||
} else {
|
||||
return 3 + penalty;
|
||||
}
|
||||
return 3 + penalty - ir3_src_read_delay(compiler, consumer, n);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -178,7 +178,8 @@ get_ready_slot(struct ir3_legalize_state *state,
|
|||
}
|
||||
|
||||
static unsigned
|
||||
delay_calc(struct ir3_legalize_state *state,
|
||||
delay_calc(struct ir3_legalize_ctx *ctx,
|
||||
struct ir3_legalize_state *state,
|
||||
struct ir3_instruction *instr,
|
||||
unsigned cycle)
|
||||
{
|
||||
|
|
@ -193,19 +194,7 @@ delay_calc(struct ir3_legalize_state *state,
|
|||
|
||||
unsigned elems = post_ra_reg_elems(src);
|
||||
unsigned num = post_ra_reg_num(src);
|
||||
unsigned src_cycle = cycle;
|
||||
|
||||
/* gat and swz have scalar sources and each source is read in a
|
||||
* subsequent cycle.
|
||||
*/
|
||||
if (instr->opc == OPC_GAT || instr->opc == OPC_SWZ)
|
||||
src_cycle += n;
|
||||
|
||||
/* cat3 instructions consume their last source two cycles later, so they
|
||||
* only need a delay of 1.
|
||||
*/
|
||||
if ((is_mad(instr->opc) || is_madsh(instr->opc)) && n == 2)
|
||||
src_cycle += 2;
|
||||
unsigned src_cycle = cycle + ir3_src_read_delay(ctx->compiler, instr, n);
|
||||
|
||||
for (unsigned elem = 0; elem < elems; elem++, num++) {
|
||||
unsigned ready_cycle =
|
||||
|
|
@ -560,7 +549,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
|||
cycle++;
|
||||
}
|
||||
|
||||
unsigned delay = delay_calc(state, n, cycle);
|
||||
unsigned delay = delay_calc(ctx, state, n, cycle);
|
||||
|
||||
/* NOTE: I think the nopN encoding works for a5xx and
|
||||
* probably a4xx, but not a3xx. So far only tested on
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue