mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 04:50:11 +01:00
ir3: Enable early preamble
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27462>
This commit is contained in:
parent
d8d192f3f4
commit
c9c483bf02
4 changed files with 89 additions and 1 deletions
|
|
@ -673,6 +673,8 @@ struct ir3_block {
|
|||
|
||||
bool reconvergence_point;
|
||||
|
||||
bool in_early_preamble;
|
||||
|
||||
/* Track instructions which do not write a register but other-
|
||||
* wise must not be discarded (such as kill, stg, etc)
|
||||
*/
|
||||
|
|
@ -1486,6 +1488,14 @@ reg_gpr(struct ir3_register *r)
|
|||
return true;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
reg_is_addr1(struct ir3_register *r)
|
||||
{
|
||||
if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED))
|
||||
return false;
|
||||
return r->num == regid(REG_A0, 1);
|
||||
}
|
||||
|
||||
static inline type_t
|
||||
half_type(type_t type)
|
||||
{
|
||||
|
|
@ -1982,6 +1992,9 @@ is_ss_producer(struct ir3_instruction *instr)
|
|||
return true;
|
||||
}
|
||||
|
||||
if (instr->block->in_early_preamble && writes_addr1(instr))
|
||||
return true;
|
||||
|
||||
return is_sfu(instr) || is_local_mem_load(instr);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ static const struct debug_named_value shader_debug_options[] = {
|
|||
{"nopreamble", IR3_DBG_NOPREAMBLE, "Disable the preamble pass"},
|
||||
{"fullsync", IR3_DBG_FULLSYNC, "Add (sy) + (ss) after each cat5/cat6"},
|
||||
{"fullnop", IR3_DBG_FULLNOP, "Add nops before each instruction"},
|
||||
{"noearlypreamble", IR3_DBG_NOEARLYPREAMBLE, "Disable early preambles"},
|
||||
#if MESA_DEBUG
|
||||
/* MESA_DEBUG-only options: */
|
||||
{"schedmsgs", IR3_DBG_SCHEDMSGS, "Enable scheduler debug messages"},
|
||||
|
|
|
|||
|
|
@ -343,6 +343,7 @@ enum ir3_shader_debug {
|
|||
IR3_DBG_SHADER_INTERNAL = BITFIELD_BIT(14),
|
||||
IR3_DBG_FULLSYNC = BITFIELD_BIT(15),
|
||||
IR3_DBG_FULLNOP = BITFIELD_BIT(16),
|
||||
IR3_DBG_NOEARLYPREAMBLE = BITFIELD_BIT(17),
|
||||
|
||||
/* MESA_DEBUG-only options: */
|
||||
IR3_DBG_SCHEDMSGS = BITFIELD_BIT(20),
|
||||
|
|
|
|||
|
|
@ -220,6 +220,9 @@ delay_update(struct ir3_legalize_state *state,
|
|||
unsigned cycle,
|
||||
bool mergedregs)
|
||||
{
|
||||
if (writes_addr1(instr) && instr->block->in_early_preamble)
|
||||
return;
|
||||
|
||||
foreach_dst_n (dst, n, instr) {
|
||||
unsigned elems = post_ra_reg_elems(dst);
|
||||
unsigned num = post_ra_reg_num(dst);
|
||||
|
|
@ -495,6 +498,11 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
|||
apply_ss(n, state, mergedregs);
|
||||
last_input_needs_ss = false;
|
||||
}
|
||||
} else if (reg_is_addr1(reg) && block->in_early_preamble) {
|
||||
if (regmask_get(&state->needs_ss, reg)) {
|
||||
apply_ss(n, state, mergedregs);
|
||||
last_input_needs_ss = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -507,6 +515,12 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
|||
}
|
||||
}
|
||||
|
||||
/* I'm not exactly what this is for, but it seems we need this on every
|
||||
* mova1 in early preambles.
|
||||
*/
|
||||
if (writes_addr1(n) && block->in_early_preamble)
|
||||
n->srcs[0]->flags |= IR3_REG_R;
|
||||
|
||||
/* cat5+ does not have an (ss) bit, if needed we need to
|
||||
* insert a nop to carry the sync flag. Would be kinda
|
||||
* clever if we were aware of this during scheduling, but
|
||||
|
|
@ -584,6 +598,8 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
|||
} else {
|
||||
regmask_set(&state->needs_ss, dst);
|
||||
}
|
||||
} else if (reg_is_addr1(dst) && block->in_early_preamble) {
|
||||
regmask_set(&state->needs_ss, dst);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1587,16 +1603,73 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
|
|||
* a5xx and a6xx do automatically release varying storage at the end.
|
||||
*/
|
||||
ctx->early_input_release = true;
|
||||
|
||||
struct ir3_block *start_block = ir3_after_preamble(ir);
|
||||
|
||||
/* Gather information to determine whether we can enable early preamble.
|
||||
*/
|
||||
bool gpr_in_preamble = false;
|
||||
bool pred_in_preamble = false;
|
||||
bool relative_in_preamble = false;
|
||||
bool in_preamble = start_block != ir3_start_block(ir);
|
||||
bool has_preamble = start_block != ir3_start_block(ir);
|
||||
|
||||
foreach_block (block, &ir->block_list) {
|
||||
if (block == start_block)
|
||||
in_preamble = false;
|
||||
|
||||
foreach_instr (instr, &block->instr_list) {
|
||||
if (is_input(instr)) {
|
||||
ctx->has_inputs = true;
|
||||
if (block != start_block) {
|
||||
ctx->early_input_release = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_meta(instr))
|
||||
continue;
|
||||
|
||||
foreach_src (reg, instr) {
|
||||
if (in_preamble) {
|
||||
if (!(reg->flags & (IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_SHARED)) &&
|
||||
is_reg_gpr(reg))
|
||||
gpr_in_preamble = true;
|
||||
if (reg->flags & IR3_REG_RELATIV)
|
||||
relative_in_preamble = true;
|
||||
}
|
||||
}
|
||||
|
||||
foreach_dst (reg, instr) {
|
||||
if (is_dest_gpr(reg)) {
|
||||
if (in_preamble) {
|
||||
if (!(reg->flags & IR3_REG_SHARED))
|
||||
gpr_in_preamble = true;
|
||||
if (reg->flags & IR3_REG_RELATIV)
|
||||
relative_in_preamble = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (in_preamble && writes_pred(instr)) {
|
||||
pred_in_preamble = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
so->info.early_preamble = has_preamble && !gpr_in_preamble &&
|
||||
!pred_in_preamble && !relative_in_preamble &&
|
||||
ir->compiler->has_early_preamble &&
|
||||
!(ir3_shader_debug & IR3_DBG_NOEARLYPREAMBLE);
|
||||
|
||||
/* On a7xx, sync behavior for a1.x is different in the early preamble. RaW
|
||||
* dependencies must be synchronized with (ss) there must be an extra
|
||||
* (r) on the source of the mova1 instruction.
|
||||
*/
|
||||
if (so->info.early_preamble && ir->compiler->gen >= 7) {
|
||||
foreach_block (block, &ir->block_list) {
|
||||
if (block == start_block)
|
||||
break;
|
||||
block->in_early_preamble = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue