diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 25556052fef..d0219c1aceb 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1299,58 +1299,6 @@ is_input(struct ir3_instruction *instr) } } -/* Whether non-helper invocations can read the value of helper invocations. We - * cannot insert (eq) before these instructions. - */ -static inline bool -uses_helpers(struct ir3_instruction *instr) -{ - switch (instr->opc) { - /* These require helper invocations to be present */ - case OPC_SAMB: - case OPC_GETLOD: - case OPC_DSX: - case OPC_DSY: - case OPC_DSXPP_1: - case OPC_DSYPP_1: - case OPC_DSXPP_MACRO: - case OPC_DSYPP_MACRO: - case OPC_QUAD_SHUFFLE_BRCST: - case OPC_QUAD_SHUFFLE_HORIZ: - case OPC_QUAD_SHUFFLE_VERT: - case OPC_QUAD_SHUFFLE_DIAG: - case OPC_META_TEX_PREFETCH: - return true; - - /* sam requires helper invocations except for dummy prefetch instructions */ - case OPC_SAM: - return !has_dummy_dst(instr); - - /* Subgroup operations don't require helper invocations to be present, but - * will use helper invocations if they are present. - */ - case OPC_BALLOT_MACRO: - case OPC_ANY_MACRO: - case OPC_ALL_MACRO: - case OPC_READ_FIRST_MACRO: - case OPC_READ_COND_MACRO: - case OPC_MOVMSK: - case OPC_BRCST_ACTIVE: - return true; - - /* Catch lowered READ_FIRST/READ_COND. For elect, don't include the getone - * in the preamble because it doesn't actually matter which fiber is - * selected. - */ - case OPC_MOV: - case OPC_ELECT_MACRO: - return instr->flags & IR3_INSTR_NEEDS_HELPERS; - - default: - return false; - } -} - static inline bool is_bool(struct ir3_instruction *instr) { @@ -3302,7 +3250,8 @@ regmask_or_shared(regmask_t *dst, regmask_t *a, regmask_t *b) } static inline void -regmask_set(regmask_t *regmask, struct ir3_register *reg) +regmask_set_masked(regmask_t *regmask, struct ir3_register *reg, + unsigned wrmask) { unsigned size = reg_elem_size(reg); enum ir3_reg_file file; @@ -3311,12 +3260,18 @@ regmask_set(regmask_t *regmask, struct ir3_register *reg) if (reg->flags & IR3_REG_RELATIV) { __regmask_set(regmask, file, n, size * reg->size); } else { - for (unsigned mask = reg->wrmask; mask; mask >>= 1, n += size) + for (unsigned mask = reg->wrmask & wrmask; mask; mask >>= 1, n += size) if (mask & 1) __regmask_set(regmask, file, n, size); } } +static inline void +regmask_set(regmask_t *regmask, struct ir3_register *reg) +{ + regmask_set_masked(regmask, reg, ~0); +} + static inline void regmask_clear(regmask_t *regmask, struct ir3_register *reg) { diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index b9ef0a418f6..e05de9df672 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -1594,6 +1594,208 @@ dbg_expand_rpt(struct ir3 *ir) } } +struct ir3_mark_helpers_data { + bool valid; + regmask_t needs_helpers; +}; + +static void +instr_mark_helpers(struct ir3_mark_helpers_data *bd, + struct ir3_instruction *instr) +{ + if (instr->flags & IR3_INSTR_NEEDS_HELPERS) { + return; + } + + foreach_dst (dst, instr) { + if (dst->flags & (IR3_REG_RT | IR3_REG_DUMMY)) { + continue; + } + + if (regmask_get(&bd->needs_helpers, dst)) { + instr->flags |= IR3_INSTR_NEEDS_HELPERS; + return; + } + } + + switch (instr->opc) { + case OPC_MOVMSK: + case OPC_BRCST_ACTIVE: + case OPC_QUAD_SHUFFLE_BRCST: + case OPC_QUAD_SHUFFLE_HORIZ: + case OPC_QUAD_SHUFFLE_VERT: + case OPC_QUAD_SHUFFLE_DIAG: + case OPC_BALL: + case OPC_BANY: + /* Subgroup operations don't require helper invocations to be present, but + * will use helper invocations if they are present. + */ + instr->flags |= IR3_INSTR_NEEDS_HELPERS; + return; + + case OPC_SAM: + case OPC_SAMB: + case OPC_GETLOD: + case OPC_DSX: + case OPC_DSY: + case OPC_DSXPP_1: + case OPC_DSYPP_1: { + if (instr->opc == OPC_SAM && has_dummy_dst(instr)) { + /* sam requires helper invocations except for dummy prefetch + * instructions. + */ + return; + } + + /* These instructions don't use helpers themselves but have a src that + * needs to be calculated using helpers (e.g., the coordinates used to + * calculate derivatives). Mark the src register as needing helpers so + * that we can keep them enabled until it is written. + */ + unsigned nsrcs; + + if (instr->opc == OPC_SAM || instr->opc == OPC_SAMB || + instr->opc == OPC_GETLOD) { + nsrcs = (instr->flags & IR3_INSTR_3D) ? 3 : 2; + } else { + /* dsx/dsy: derive the number of sources from the dst wrmask since the + * src itself may use aliases. + */ + nsrcs = util_last_bit(instr->dsts[0]->wrmask); + } + + if (instr->srcs[0]->flags & IR3_REG_FIRST_ALIAS) { + assert(nsrcs <= instr->srcs_count); + + for (unsigned i = 0; i < nsrcs; i++) { + struct ir3_register *src = instr->srcs[i]; + + if (is_reg_gpr(src)) { + regmask_set(&bd->needs_helpers, src); + } + } + } else { + regmask_set_masked(&bd->needs_helpers, instr->srcs[0], MASK(nsrcs)); + } + + break; + } + + default: + break; + } +} + +/* Apply IR3_INSTR_NEEDS_HELPERS to instructions that need helper invocations to + * be active. Note that we don't necessarily apply it to all instructions that + * need helpers, just to the last one in each block, as that gives us enough + * information for inserting (eq) to kill helpers. + * + * We use a backwards data-flow analysis because we cannot always know whether + * an instruction needs helpers by just looking at the opcode. For example, + * instructions that calculate (implicit) derivatives don't need helpers to be + * active but the calculation of their src needs to be done with active helpers. + */ +static bool +mark_helpers(struct ir3_legalize_ctx *ctx, struct ir3 *ir, + struct ir3_shader_variant *so) +{ + foreach_block (block, &ir->block_list) { + struct ir3_mark_helpers_data *bd = + ralloc(ctx, struct ir3_mark_helpers_data); + bd->valid = false; + regmask_init(&bd->needs_helpers, ctx->compiler->mergedregs); + block->data = bd; + } + + bool uses_helpers = false; + bool progress; + + do { + progress = false; + + foreach_block_rev (block, &ir->block_list) { + struct ir3_mark_helpers_data *bd = block->data; + + if (bd->valid) { + continue; + } + + struct ir3_mark_helpers_data prev_bd = *bd; + regmask_init(&bd->needs_helpers, ctx->compiler->mergedregs); + bool may_have_needs_helpers_at_entry = true; + + for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) { + struct ir3_block *succ = block->successors[i]; + if (!succ) { + continue; + } + + struct ir3_mark_helpers_data *succ_bd = succ->data; + regmask_or(&bd->needs_helpers, &bd->needs_helpers, + &succ_bd->needs_helpers); + } + + foreach_instr_rev (instr, &block->instr_list) { + instr_mark_helpers(bd, instr); + + /* We only care about the last instruction needing helpers. */ + if (instr->flags & IR3_INSTR_NEEDS_HELPERS) { + uses_helpers = true; + + /* This also means we can stop tracking needs_helpers. This saves + * us from unnecessarily invalidating predecessors. Making sure + * loops are handled correctly is done in helper_sched. + */ + regmask_init(&bd->needs_helpers, ctx->compiler->mergedregs); + may_have_needs_helpers_at_entry = false; + break; + } + } + + bd->valid = true; + + /* We have to invalidate the block's predecessors whenever it has more + * needs_helpers registers as the previous time around because this may + * cause more instructions being marked as needing helpers in its + * predecessors. We don't have to do this when it has less + * needs_helpers registers as this won't change anything. This is + * checked using may_have_needs_helpers_at_entry which will be false + * whenever we cleared needs_helpers. + */ + if (may_have_needs_helpers_at_entry && + memcmp(&prev_bd.needs_helpers, &bd->needs_helpers, + sizeof(prev_bd.needs_helpers)) != 0) { + progress = true; + + for (unsigned i = 0; i < block->predecessors_count; i++) { + struct ir3_mark_helpers_data *pred_bd = + block->predecessors[i]->data; + pred_bd->valid = false; + } + } + } + } while (progress); + + struct ir3_block *start_block = ir3_start_block(ir); + struct ir3_mark_helpers_data *start_bd = start_block->data; + + foreach_input (input, ir) { + if (regmask_get(&start_bd->needs_helpers, input->dsts[0])) { + /* If we need helpers for an input reg, we have to make sure helpers + * are enabled when we enter the shader. Just mark the first + * instruction as needing helpers. + */ + struct ir3_instruction *first = ir3_block_get_first_instr(start_block); + first->flags |= IR3_INSTR_NEEDS_HELPERS; + uses_helpers = true; + break; + } + } + + return uses_helpers; +} + struct ir3_helper_block_data { /* Whether helper invocations may be used on any path starting at the * beginning of the block. @@ -1618,16 +1820,15 @@ static void helper_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir, struct ir3_shader_variant *so) { - bool non_prefetch_helpers = false; - foreach_block (block, &ir->block_list) { struct ir3_helper_block_data *bd = rzalloc(ctx, struct ir3_helper_block_data); foreach_instr (instr, &block->instr_list) { - if (uses_helpers(instr)) { + if (instr->flags & IR3_INSTR_NEEDS_HELPERS) { bd->uses_helpers_beginning = true; - if (instr->opc != OPC_META_TEX_PREFETCH) { - non_prefetch_helpers = true; + + if (is_terminator(instr)) { + bd->uses_helpers_end = true; } } @@ -1640,28 +1841,9 @@ helper_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir, } } - struct ir3_instruction *terminator = ir3_block_get_terminator(block); - if (terminator) { - if (terminator->opc == OPC_BALL || terminator->opc == OPC_BANY || - (terminator->opc == OPC_GETONE && - (terminator->flags & IR3_INSTR_NEEDS_HELPERS))) { - bd->uses_helpers_beginning = true; - bd->uses_helpers_end = true; - non_prefetch_helpers = true; - } - } - block->data = bd; } - /* If only prefetches use helpers then we can disable them in the shader via - * a register setting. - */ - if (!non_prefetch_helpers) { - so->prefetch_end_of_quad = true; - return; - } - bool progress; do { progress = false; @@ -1757,11 +1939,7 @@ helper_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir, */ struct ir3_instruction *first_instr = NULL; foreach_instr_rev (instr, &block->instr_list) { - /* Skip prefetches because they actually execute before the block - * starts and at this stage they aren't guaranteed to be at the start - * of the block. - */ - if (uses_helpers(instr) && instr->opc != OPC_META_TEX_PREFETCH) + if (instr->flags & IR3_INSTR_NEEDS_HELPERS) break; first_instr = instr; } @@ -2286,8 +2464,16 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) /* TODO: does (eq) exist before a6xx? */ if (so->type == MESA_SHADER_FRAGMENT && so->need_pixlod && - so->compiler->gen >= 6) - helper_sched(ctx, ir, so); + so->compiler->gen >= 6) { + if (mark_helpers(ctx, ir, so)) { + helper_sched(ctx, ir, so); + } else { + /* If no instructions use helpers, we can disable them in the shader + * via a register setting. + */ + so->prefetch_end_of_quad = true; + } + } if (ir3_shader_debug & IR3_DBG_FULLSYNC) { dbg_sync_sched(ir, so);