ir3: Enable early preamble

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27462>
This commit is contained in:
Connor Abbott 2023-03-16 20:35:59 +01:00 committed by Marge Bot
parent d8d192f3f4
commit c9c483bf02
4 changed files with 89 additions and 1 deletions

View file

@ -673,6 +673,8 @@ struct ir3_block {
bool reconvergence_point;
bool in_early_preamble;
/* Track instructions which do not write a register but other-
* wise must not be discarded (such as kill, stg, etc)
*/
@ -1486,6 +1488,14 @@ reg_gpr(struct ir3_register *r)
return true;
}
static inline bool
reg_is_addr1(struct ir3_register *r)
{
if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED))
return false;
return r->num == regid(REG_A0, 1);
}
static inline type_t
half_type(type_t type)
{
@ -1982,6 +1992,9 @@ is_ss_producer(struct ir3_instruction *instr)
return true;
}
if (instr->block->in_early_preamble && writes_addr1(instr))
return true;
return is_sfu(instr) || is_local_mem_load(instr);
}

View file

@ -49,6 +49,7 @@ static const struct debug_named_value shader_debug_options[] = {
{"nopreamble", IR3_DBG_NOPREAMBLE, "Disable the preamble pass"},
{"fullsync", IR3_DBG_FULLSYNC, "Add (sy) + (ss) after each cat5/cat6"},
{"fullnop", IR3_DBG_FULLNOP, "Add nops before each instruction"},
{"noearlypreamble", IR3_DBG_NOEARLYPREAMBLE, "Disable early preambles"},
#if MESA_DEBUG
/* MESA_DEBUG-only options: */
{"schedmsgs", IR3_DBG_SCHEDMSGS, "Enable scheduler debug messages"},

View file

@ -343,6 +343,7 @@ enum ir3_shader_debug {
IR3_DBG_SHADER_INTERNAL = BITFIELD_BIT(14),
IR3_DBG_FULLSYNC = BITFIELD_BIT(15),
IR3_DBG_FULLNOP = BITFIELD_BIT(16),
IR3_DBG_NOEARLYPREAMBLE = BITFIELD_BIT(17),
/* MESA_DEBUG-only options: */
IR3_DBG_SCHEDMSGS = BITFIELD_BIT(20),

View file

@ -220,6 +220,9 @@ delay_update(struct ir3_legalize_state *state,
unsigned cycle,
bool mergedregs)
{
if (writes_addr1(instr) && instr->block->in_early_preamble)
return;
foreach_dst_n (dst, n, instr) {
unsigned elems = post_ra_reg_elems(dst);
unsigned num = post_ra_reg_num(dst);
@ -495,6 +498,11 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
apply_ss(n, state, mergedregs);
last_input_needs_ss = false;
}
} else if (reg_is_addr1(reg) && block->in_early_preamble) {
if (regmask_get(&state->needs_ss, reg)) {
apply_ss(n, state, mergedregs);
last_input_needs_ss = false;
}
}
}
@ -507,6 +515,12 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
}
}
/* I'm not exactly what this is for, but it seems we need this on every
* mova1 in early preambles.
*/
if (writes_addr1(n) && block->in_early_preamble)
n->srcs[0]->flags |= IR3_REG_R;
/* cat5+ does not have an (ss) bit, if needed we need to
* insert a nop to carry the sync flag. Would be kinda
* clever if we were aware of this during scheduling, but
@ -584,6 +598,8 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
} else {
regmask_set(&state->needs_ss, dst);
}
} else if (reg_is_addr1(dst) && block->in_early_preamble) {
regmask_set(&state->needs_ss, dst);
}
}
@ -1587,16 +1603,73 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
* a5xx and a6xx do automatically release varying storage at the end.
*/
ctx->early_input_release = true;
struct ir3_block *start_block = ir3_after_preamble(ir);
/* Gather information to determine whether we can enable early preamble.
*/
bool gpr_in_preamble = false;
bool pred_in_preamble = false;
bool relative_in_preamble = false;
bool in_preamble = start_block != ir3_start_block(ir);
bool has_preamble = start_block != ir3_start_block(ir);
foreach_block (block, &ir->block_list) {
if (block == start_block)
in_preamble = false;
foreach_instr (instr, &block->instr_list) {
if (is_input(instr)) {
ctx->has_inputs = true;
if (block != start_block) {
ctx->early_input_release = false;
break;
}
}
if (is_meta(instr))
continue;
foreach_src (reg, instr) {
if (in_preamble) {
if (!(reg->flags & (IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_SHARED)) &&
is_reg_gpr(reg))
gpr_in_preamble = true;
if (reg->flags & IR3_REG_RELATIV)
relative_in_preamble = true;
}
}
foreach_dst (reg, instr) {
if (is_dest_gpr(reg)) {
if (in_preamble) {
if (!(reg->flags & IR3_REG_SHARED))
gpr_in_preamble = true;
if (reg->flags & IR3_REG_RELATIV)
relative_in_preamble = true;
}
}
}
if (in_preamble && writes_pred(instr)) {
pred_in_preamble = true;
}
}
}
so->info.early_preamble = has_preamble && !gpr_in_preamble &&
!pred_in_preamble && !relative_in_preamble &&
ir->compiler->has_early_preamble &&
!(ir3_shader_debug & IR3_DBG_NOEARLYPREAMBLE);
/* On a7xx, sync behavior for a1.x is different in the early preamble. RaW
* dependencies must be synchronized with (ss) there must be an extra
* (r) on the source of the mova1 instruction.
*/
if (so->info.early_preamble && ir->compiler->gen >= 7) {
foreach_block (block, &ir->block_list) {
if (block == start_block)
break;
block->in_early_preamble = true;
}
}