From b6fe69d855ef4607ac4b265cee86bf811fcfd690 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 29 Sep 2021 13:56:51 +0200 Subject: [PATCH] ir3: Support prefetching with preambles Since the NIR pass runs very late, it needs to be aware of preambles, and when creating the instruction we need to move it to the start block so that RA doesn't overwrite it in the preamble. Part-of: --- src/freedreno/ir3/ir3_compiler_nir.c | 2 +- src/freedreno/ir3/ir3_legalize.c | 12 ++++++---- .../ir3/ir3_nir_lower_tex_prefetch.c | 23 ++++++++++++++----- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index e63c0ba2d60..481db2b48e7 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -3171,7 +3171,7 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex) compile_assert(ctx, tex->src[idx].src.is_ssa); - sam = ir3_SAM(b, opc, type, MASK(ncomp), 0, NULL, + sam = ir3_SAM(ctx->in_block, opc, type, MASK(ncomp), 0, NULL, get_barycentric(ctx, IJ_PERSP_PIXEL), 0); sam->prefetch.input_offset = ir3_nir_coord_offset(tex->src[idx].src.ssa); /* make sure not to add irrelevant flags like S2EN */ diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 7ad2addbd85..c884c92f226 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -51,6 +51,7 @@ struct ir3_legalize_ctx { gl_shader_stage type; int max_bary; bool early_input_release; + bool has_inputs; }; struct ir3_legalize_state { @@ -348,7 +349,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) assert(inputs_remaining == 0 || !ctx->early_input_release); - if (has_tex_prefetch && input_count == 0) { + if (has_tex_prefetch && !ctx->has_inputs) { /* texture prefetch, but *no* inputs.. we need to insert a * dummy bary.f at the top of the shader to unblock varying * storage: @@ -956,9 +957,12 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) struct ir3_block *start_block = ir3_after_preamble(ir); foreach_block (block, &ir->block_list) { foreach_instr (instr, &block->instr_list) { - if (is_input(instr) && block != start_block) { - ctx->early_input_release = false; - break; + if (is_input(instr)) { + ctx->has_inputs = true; + if (block != start_block) { + ctx->early_input_release = false; + break; + } } } } diff --git a/src/freedreno/ir3/ir3_nir_lower_tex_prefetch.c b/src/freedreno/ir3/ir3_nir_lower_tex_prefetch.c index a8691f57bfe..76006188b26 100644 --- a/src/freedreno/ir3/ir3_nir_lower_tex_prefetch.c +++ b/src/freedreno/ir3/ir3_nir_lower_tex_prefetch.c @@ -196,14 +196,25 @@ lower_tex_prefetch_block(nir_block *block) static bool lower_tex_prefetch_func(nir_function_impl *impl) { - /* Only instructions in the the outer-most block are considered - * eligible for pre-dispatch, because they need to be move-able - * to the beginning of the shader to avoid locking down the - * register holding the pre-fetched result for too long. + /* Only instructions in the the outer-most block are considered eligible for + * pre-dispatch, because they need to be move-able to the beginning of the + * shader to avoid locking down the register holding the pre-fetched result + * for too long. However if there is a preamble we should skip the preamble + * and only look in the first block after the preamble instead, because that + * corresponds to the first block in the original program and texture fetches + * in the preamble are never pre-dispatchable. */ nir_block *block = nir_start_block(impl); - if (!block) - return false; + + nir_if *nif = nir_block_get_following_if(block); + if (nif) { + nir_instr *cond = nif->condition.ssa->parent_instr; + if (cond->type == nir_instr_type_intrinsic && + nir_instr_as_intrinsic(cond)->intrinsic == + nir_intrinsic_preamble_start_ir3) { + block = nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node)); + } + } bool progress = lower_tex_prefetch_block(block);