diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 696d9e0ff30..68f77b46a27 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -1808,83 +1808,101 @@ mark_helpers(struct ir3_legalize_ctx *ctx, struct ir3 *ir, return uses_helpers; } -struct ir3_helper_block_data { - /* Whether helper invocations may be used on any path starting at the +struct ir3_end_of_feature_block_data { + /* Whether the feature may be used on any path starting at the * beginning of the block. */ - bool uses_helpers_beginning; + bool uses_feature_beginning; - /* Whether helper invocations may be used by the end of the block. Branch - * instructions are considered to be "between" blocks, because (eq) has to be - * inserted after them in the successor blocks, so branch instructions using - * helpers will result in uses_helpers_end = true for their block. + /* Whether the feature may be used by the end of the block. Branch + * instructions are considered to be "between" blocks, because the flag has + * to be inserted after them in the successor blocks, so branch instructions + * using the feature will result in uses_feature_end = true for their block. */ - bool uses_helpers_end; + bool uses_feature_end; }; -/* Insert (eq) after the last instruction using the results of helper - * invocations. Use a backwards dataflow analysis to determine at which points - * in the program helper invocations are definitely never used, and then insert - * (eq) at the point where we cross from a point where they may be used to a - * point where they are never used. +/* There are NOP flags which signify that certain feature will not be used after + * this point. It could be the end of helper invocations (eq), the end of + * cat5/cat6 usage (eolm)/(eogm). The common denominator is that such flags are + * placed onto nop and cannot be used inside a control flow. + * + * Use a backwards dataflow analysis to determine at which points + * in the feature is definitely never used, and then insert + * corresponding flag at the point where we cross from a point where they may be + * used to a point where they are never used. */ -static void -helper_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir, - struct ir3_shader_variant *so) +static bool +feature_usage_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir, + struct ir3_shader_variant *so, + bool (*check_instr)(struct ir3_instruction *), + bool (*cheap_instr)(struct ir3_instruction *), + uint32_t new_flag) { + bool uses_feature = false; + foreach_block (block, &ir->block_list) { - struct ir3_helper_block_data *bd = - rzalloc(ctx, struct ir3_helper_block_data); + struct ir3_end_of_feature_block_data *bd = + rzalloc(ctx, struct ir3_end_of_feature_block_data); foreach_instr (instr, &block->instr_list) { - if (instr->flags & IR3_INSTR_NEEDS_HELPERS) { - bd->uses_helpers_beginning = true; + if (check_instr(instr)) { + uses_feature = true; + bd->uses_feature_beginning = true; if (is_terminator(instr)) { - bd->uses_helpers_end = true; + bd->uses_feature_end = true; } } if (instr->opc == OPC_SHPE) { - /* (eq) is not allowed in preambles, mark the whole preamble as - * requiring helpers to avoid putting it there. + /* The flags are not allowed in preambles, mark the whole preamble + * as using it to avoid putting it there. */ - bd->uses_helpers_beginning = true; - bd->uses_helpers_end = true; + bd->uses_feature_beginning = true; + bd->uses_feature_end = true; } } block->data = bd; } + /* If the feature was not needed by any instruction, return early. Features + * that aren't used in the shader can generally be disabled at the top level + * (helpers disabled at dispatch, local memory size set to 0), so no need to + * add a NOP just to add the flag. + */ + if (!uses_feature) + return false; + bool progress; do { progress = false; foreach_block_rev (block, &ir->block_list) { - struct ir3_helper_block_data *bd = block->data; + struct ir3_end_of_feature_block_data *bd = block->data; - if (!bd->uses_helpers_beginning) + if (!bd->uses_feature_beginning) continue; for (unsigned i = 0; i < block->physical_predecessors_count; i++) { struct ir3_block *pred = block->physical_predecessors[i]; - struct ir3_helper_block_data *pred_bd = pred->data; - if (!pred_bd->uses_helpers_end) { - pred_bd->uses_helpers_end = true; + struct ir3_end_of_feature_block_data *pred_bd = pred->data; + if (!pred_bd->uses_feature_end) { + pred_bd->uses_feature_end = true; } - if (!pred_bd->uses_helpers_beginning) { - pred_bd->uses_helpers_beginning = true; + if (!pred_bd->uses_feature_beginning) { + pred_bd->uses_feature_beginning = true; progress = true; } } } } while (progress); - /* Now, we need to determine the points where helper invocations become + /* Now, we need to determine the points where the feature become * unused. */ foreach_block (block, &ir->block_list) { - struct ir3_helper_block_data *bd = block->data; - if (bd->uses_helpers_end) + struct ir3_end_of_feature_block_data *bd = block->data; + if (bd->uses_feature_end) continue; /* We need to check the predecessors because of situations with critical @@ -1898,10 +1916,10 @@ helper_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir, * ... * end * - * The endif block will have uses_helpers_beginning = false and - * uses_helpers_end = false, but because we jump to there from the - * beginning of the if where uses_helpers_end = true, we still want to - * add an (eq) at the beginning of the block: + * The endif block will have uses_feature_beginning = false and + * uses_feature_end = false, but because we jump to there from the + * beginning of the if where uses_feature_end = true, we still want to + * add the flag at the beginning of the block: * * br p0.x, #endif * ... @@ -1929,29 +1947,29 @@ helper_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir, * ... * end * - * We also need this to make sure we insert (eq) after branches which use - * helper invocations. + * We also need this to make sure we insert the flag after branches which use + * the feature. */ - bool pred_uses_helpers = bd->uses_helpers_beginning; + bool pred_uses_feature = bd->uses_feature_beginning; for (unsigned i = 0; i < block->physical_predecessors_count; i++) { struct ir3_block *pred = block->physical_predecessors[i]; - struct ir3_helper_block_data *pred_bd = pred->data; - if (pred_bd->uses_helpers_end) { - pred_uses_helpers = true; + struct ir3_end_of_feature_block_data *pred_bd = pred->data; + if (pred_bd->uses_feature_end) { + pred_uses_feature = true; break; } } - if (!pred_uses_helpers) + if (!pred_uses_feature) continue; - /* The last use of helpers is somewhere between the beginning and the - * end. first_instr will be the first instruction where helpers are no - * longer required, or NULL if helpers are not required just at the end. + /* The last use of the feature is somewhere between the beginning and the + * end. first_instr will be the first instruction where the feature are no + * longer required, or NULL if the feature are not required just at the end. */ struct ir3_instruction *first_instr = NULL; foreach_instr_rev (instr, &block->instr_list) { - if (instr->flags & IR3_INSTR_NEEDS_HELPERS) + if (check_instr(instr)) break; first_instr = instr; } @@ -1964,18 +1982,12 @@ helper_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir, * insert one. */ if (instr->opc == OPC_NOP) { - instr->flags |= IR3_INSTR_EQ; + instr->flags |= new_flag; killed = true; break; } - /* ALU and SFU instructions probably aren't going to benefit much - * from killing helper invocations, because they complete at least - * an entire quad in a cycle and don't access any quad-divergent - * memory, so delay emitting (eq) in the hopes that we find a nop - * afterwards. - */ - if (is_alu(instr) || is_sfu(instr)) + if (cheap_instr(instr)) continue; if (instr->opc == OPC_PREDE) continue; @@ -1995,9 +2007,49 @@ helper_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir, : ir3_before_terminator(block); struct ir3_builder build = ir3_builder_at(cursor); struct ir3_instruction *nop = ir3_NOP(&build); - nop->flags |= IR3_INSTR_EQ; + nop->flags |= new_flag; } } + + return true; +} + +static bool +uses_helpers(struct ir3_instruction *instr) +{ + return !!(instr->flags & IR3_INSTR_NEEDS_HELPERS); +} + +static bool +is_cheap_for_eq(struct ir3_instruction *instr) +{ + /* ALU and SFU instructions probably aren't going to benefit much + * from killing helper invocations, because they complete at least + * an entire quad in a cycle and don't access any quad-divergent + * memory, so delay emitting (eq) in the hopes that we find a nop + * afterwards. + */ + return is_alu(instr) || is_sfu(instr); +} + +/* Insert (eq) after the last instruction using the results of helper + * invocations. Use a backwards dataflow analysis to determine at which points + * in the program helper invocations are definitely never used, and then insert + * (eq) at the point where we cross from a point where they may be used to a + * point where they are never used. + */ +static void +helper_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir, + struct ir3_shader_variant *so) +{ + if (!feature_usage_sched(ctx, ir, so, uses_helpers, is_cheap_for_eq, + IR3_INSTR_EQ) && + so->num_sampler_prefetch) { + /* If only prefetches use helpers then we can disable them in the shader + * via a register setting. + */ + so->prefetch_end_of_quad = true; + } } struct ir3_last_block_data {