mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 18:18:06 +02:00
ir3: make delay slots a compiler property
They changed on a7xx so we want to make it configurable. Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33183>
This commit is contained in:
parent
2c7c62dfd9
commit
5460be5d33
4 changed files with 35 additions and 8 deletions
|
|
@ -160,6 +160,9 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
|
|||
compiler->has_branch_and_or = false;
|
||||
compiler->has_rpt_bary_f = false;
|
||||
compiler->has_alias_tex = false;
|
||||
compiler->delay_slots.alu_to_alu = 3;
|
||||
compiler->delay_slots.non_alu = 6;
|
||||
compiler->delay_slots.cat3_src2_read = 2;
|
||||
|
||||
if (compiler->gen >= 6) {
|
||||
compiler->samgq_workaround = true;
|
||||
|
|
|
|||
|
|
@ -289,6 +289,26 @@ struct ir3_compiler {
|
|||
bool has_alias_rt;
|
||||
|
||||
bool reading_shading_rate_requires_smask_quirk;
|
||||
|
||||
struct {
|
||||
/* The number of cycles needed for the result of one ALU operation to be
|
||||
* available to another ALU operation. Only valid when the halfness of the
|
||||
* source and destination match.
|
||||
*/
|
||||
unsigned alu_to_alu;
|
||||
|
||||
/* The number of cycles needed for the result of one instruction to be
|
||||
* available to another. Valid for a0.x, a1.x, and p0.c destinations, ALU
|
||||
* to non-ALU dependencies, and ALU to ALU dependencies witch mismatched
|
||||
* halfness.
|
||||
*/
|
||||
unsigned non_alu;
|
||||
|
||||
/* The number of cycles from the start of the instruction until a cat3
|
||||
* instruction reads its 3rd src.
|
||||
*/
|
||||
unsigned cat3_src2_read;
|
||||
} delay_slots;
|
||||
};
|
||||
|
||||
void ir3_compiler_destroy(struct ir3_compiler *compiler);
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ ir3_src_read_delay(struct ir3_compiler *compiler, struct ir3_instruction *instr,
|
|||
|
||||
/* cat3 instructions consume their last source one or two cycles later. */
|
||||
if ((is_mad(instr->opc) || is_madsh(instr->opc)) && src_n == 2) {
|
||||
return 2;
|
||||
return compiler->delay_slots.cat3_src2_read;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
@ -68,7 +68,7 @@ ir3_delayslots(struct ir3_compiler *compiler,
|
|||
return 0;
|
||||
|
||||
if (writes_addr0(assigner) || writes_addr1(assigner))
|
||||
return 6;
|
||||
return compiler->delay_slots.non_alu;
|
||||
|
||||
if (soft && needs_ss(compiler, assigner, consumer))
|
||||
return soft_ss_delay(assigner);
|
||||
|
|
@ -98,7 +98,7 @@ ir3_delayslots(struct ir3_compiler *compiler,
|
|||
/* assigner must be alu: */
|
||||
if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) ||
|
||||
is_mem(consumer)) {
|
||||
return 6;
|
||||
return compiler->delay_slots.non_alu;
|
||||
} else {
|
||||
/* In mergedregs mode, there is an extra 2-cycle penalty when half of
|
||||
* a full-reg is read as a half-reg or when a half-reg is read as a
|
||||
|
|
@ -107,7 +107,8 @@ ir3_delayslots(struct ir3_compiler *compiler,
|
|||
bool mismatched_half = (assigner->dsts[0]->flags & IR3_REG_HALF) !=
|
||||
(consumer->srcs[n]->flags & IR3_REG_HALF);
|
||||
unsigned penalty = mismatched_half ? 3 : 0;
|
||||
return 3 + penalty - ir3_src_read_delay(compiler, consumer, n);
|
||||
return compiler->delay_slots.alu_to_alu + penalty -
|
||||
ir3_src_read_delay(compiler, consumer, n);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -213,7 +213,8 @@ delay_calc(struct ir3_legalize_ctx *ctx,
|
|||
}
|
||||
|
||||
static void
|
||||
delay_update(struct ir3_legalize_state *state,
|
||||
delay_update(struct ir3_legalize_ctx *ctx,
|
||||
struct ir3_legalize_state *state,
|
||||
struct ir3_instruction *instr,
|
||||
unsigned cycle,
|
||||
bool mergedregs)
|
||||
|
|
@ -265,11 +266,13 @@ delay_update(struct ir3_legalize_state *state,
|
|||
reset_ready_slot = true;
|
||||
} else if ((dst->flags & IR3_REG_PREDICATE) ||
|
||||
reg_num(dst) == REG_A0) {
|
||||
delay = 6;
|
||||
delay = ctx->compiler->delay_slots.non_alu;
|
||||
if (!matching_size)
|
||||
continue;
|
||||
} else {
|
||||
delay = (consumer_alu && matching_size) ? 3 : 6;
|
||||
delay = (consumer_alu && matching_size)
|
||||
? ctx->compiler->delay_slots.alu_to_alu
|
||||
: ctx->compiler->delay_slots.non_alu;
|
||||
}
|
||||
|
||||
if (!matching_size) {
|
||||
|
|
@ -697,7 +700,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
|||
if (count)
|
||||
cycle += 1;
|
||||
|
||||
delay_update(state, n, cycle, mergedregs);
|
||||
delay_update(ctx, state, n, cycle, mergedregs);
|
||||
|
||||
if (count)
|
||||
cycle += n->repeat + n->nop;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue