From c9c483bf0295ee7cd542d01c0eef939f7aeaec7b Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 16 Mar 2023 20:35:59 +0100 Subject: [PATCH] ir3: Enable early preamble Part-of: --- src/freedreno/ir3/ir3.h | 13 ++++++ src/freedreno/ir3/ir3_compiler.c | 1 + src/freedreno/ir3/ir3_compiler.h | 1 + src/freedreno/ir3/ir3_legalize.c | 75 +++++++++++++++++++++++++++++++- 4 files changed, 89 insertions(+), 1 deletion(-) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index dbd6509b5e6..33eab610218 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -673,6 +673,8 @@ struct ir3_block { bool reconvergence_point; + bool in_early_preamble; + /* Track instructions which do not write a register but other- * wise must not be discarded (such as kill, stg, etc) */ @@ -1486,6 +1488,14 @@ reg_gpr(struct ir3_register *r) return true; } +static inline bool +reg_is_addr1(struct ir3_register *r) +{ + if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED)) + return false; + return r->num == regid(REG_A0, 1); +} + static inline type_t half_type(type_t type) { @@ -1982,6 +1992,9 @@ is_ss_producer(struct ir3_instruction *instr) return true; } + if (instr->block->in_early_preamble && writes_addr1(instr)) + return true; + return is_sfu(instr) || is_local_mem_load(instr); } diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index c8cf7c18136..6a2a2eb1426 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -49,6 +49,7 @@ static const struct debug_named_value shader_debug_options[] = { {"nopreamble", IR3_DBG_NOPREAMBLE, "Disable the preamble pass"}, {"fullsync", IR3_DBG_FULLSYNC, "Add (sy) + (ss) after each cat5/cat6"}, {"fullnop", IR3_DBG_FULLNOP, "Add nops before each instruction"}, + {"noearlypreamble", IR3_DBG_NOEARLYPREAMBLE, "Disable early preambles"}, #if MESA_DEBUG /* MESA_DEBUG-only options: */ {"schedmsgs", IR3_DBG_SCHEDMSGS, "Enable scheduler debug messages"}, diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 39f5c775691..1b726dfef91 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -343,6 +343,7 @@ enum ir3_shader_debug { IR3_DBG_SHADER_INTERNAL = BITFIELD_BIT(14), IR3_DBG_FULLSYNC = BITFIELD_BIT(15), IR3_DBG_FULLNOP = BITFIELD_BIT(16), + IR3_DBG_NOEARLYPREAMBLE = BITFIELD_BIT(17), /* MESA_DEBUG-only options: */ IR3_DBG_SCHEDMSGS = BITFIELD_BIT(20), diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 99538c9e647..d1129775d42 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -220,6 +220,9 @@ delay_update(struct ir3_legalize_state *state, unsigned cycle, bool mergedregs) { + if (writes_addr1(instr) && instr->block->in_early_preamble) + return; + foreach_dst_n (dst, n, instr) { unsigned elems = post_ra_reg_elems(dst); unsigned num = post_ra_reg_num(dst); @@ -495,6 +498,11 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) apply_ss(n, state, mergedregs); last_input_needs_ss = false; } + } else if (reg_is_addr1(reg) && block->in_early_preamble) { + if (regmask_get(&state->needs_ss, reg)) { + apply_ss(n, state, mergedregs); + last_input_needs_ss = false; + } } } @@ -507,6 +515,12 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) } } + /* I'm not exactly what this is for, but it seems we need this on every + * mova1 in early preambles. + */ + if (writes_addr1(n) && block->in_early_preamble) + n->srcs[0]->flags |= IR3_REG_R; + /* cat5+ does not have an (ss) bit, if needed we need to * insert a nop to carry the sync flag. Would be kinda * clever if we were aware of this during scheduling, but @@ -584,6 +598,8 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) } else { regmask_set(&state->needs_ss, dst); } + } else if (reg_is_addr1(dst) && block->in_early_preamble) { + regmask_set(&state->needs_ss, dst); } } @@ -1587,16 +1603,73 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) * a5xx and a6xx do automatically release varying storage at the end. */ ctx->early_input_release = true; + struct ir3_block *start_block = ir3_after_preamble(ir); + + /* Gather information to determine whether we can enable early preamble. + */ + bool gpr_in_preamble = false; + bool pred_in_preamble = false; + bool relative_in_preamble = false; + bool in_preamble = start_block != ir3_start_block(ir); + bool has_preamble = start_block != ir3_start_block(ir); + foreach_block (block, &ir->block_list) { + if (block == start_block) + in_preamble = false; + foreach_instr (instr, &block->instr_list) { if (is_input(instr)) { ctx->has_inputs = true; if (block != start_block) { ctx->early_input_release = false; - break; } } + + if (is_meta(instr)) + continue; + + foreach_src (reg, instr) { + if (in_preamble) { + if (!(reg->flags & (IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_SHARED)) && + is_reg_gpr(reg)) + gpr_in_preamble = true; + if (reg->flags & IR3_REG_RELATIV) + relative_in_preamble = true; + } + } + + foreach_dst (reg, instr) { + if (is_dest_gpr(reg)) { + if (in_preamble) { + if (!(reg->flags & IR3_REG_SHARED)) + gpr_in_preamble = true; + if (reg->flags & IR3_REG_RELATIV) + relative_in_preamble = true; + } + } + } + + if (in_preamble && writes_pred(instr)) { + pred_in_preamble = true; + } + } + } + + so->info.early_preamble = has_preamble && !gpr_in_preamble && + !pred_in_preamble && !relative_in_preamble && + ir->compiler->has_early_preamble && + !(ir3_shader_debug & IR3_DBG_NOEARLYPREAMBLE); + + /* On a7xx, sync behavior for a1.x is different in the early preamble. RaW + * dependencies must be synchronized with (ss) there must be an extra + * (r) on the source of the mova1 instruction. + */ + if (so->info.early_preamble && ir->compiler->gen >= 7) { + foreach_block (block, &ir->block_list) { + if (block == start_block) + break; + block->in_early_preamble = true; } }