diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index e63c0ba2d60..481db2b48e7 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -3171,7 +3171,7 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex) compile_assert(ctx, tex->src[idx].src.is_ssa); - sam = ir3_SAM(b, opc, type, MASK(ncomp), 0, NULL, + sam = ir3_SAM(ctx->in_block, opc, type, MASK(ncomp), 0, NULL, get_barycentric(ctx, IJ_PERSP_PIXEL), 0); sam->prefetch.input_offset = ir3_nir_coord_offset(tex->src[idx].src.ssa); /* make sure not to add irrelevant flags like S2EN */ diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 7ad2addbd85..c884c92f226 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -51,6 +51,7 @@ struct ir3_legalize_ctx { gl_shader_stage type; int max_bary; bool early_input_release; + bool has_inputs; }; struct ir3_legalize_state { @@ -348,7 +349,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) assert(inputs_remaining == 0 || !ctx->early_input_release); - if (has_tex_prefetch && input_count == 0) { + if (has_tex_prefetch && !ctx->has_inputs) { /* texture prefetch, but *no* inputs.. we need to insert a * dummy bary.f at the top of the shader to unblock varying * storage: @@ -956,9 +957,12 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) struct ir3_block *start_block = ir3_after_preamble(ir); foreach_block (block, &ir->block_list) { foreach_instr (instr, &block->instr_list) { - if (is_input(instr) && block != start_block) { - ctx->early_input_release = false; - break; + if (is_input(instr)) { + ctx->has_inputs = true; + if (block != start_block) { + ctx->early_input_release = false; + break; + } } } } diff --git a/src/freedreno/ir3/ir3_nir_lower_tex_prefetch.c b/src/freedreno/ir3/ir3_nir_lower_tex_prefetch.c index a8691f57bfe..76006188b26 100644 --- a/src/freedreno/ir3/ir3_nir_lower_tex_prefetch.c +++ b/src/freedreno/ir3/ir3_nir_lower_tex_prefetch.c @@ -196,14 +196,25 @@ lower_tex_prefetch_block(nir_block *block) static bool lower_tex_prefetch_func(nir_function_impl *impl) { - /* Only instructions in the the outer-most block are considered - * eligible for pre-dispatch, because they need to be move-able - * to the beginning of the shader to avoid locking down the - * register holding the pre-fetched result for too long. + /* Only instructions in the the outer-most block are considered eligible for + * pre-dispatch, because they need to be move-able to the beginning of the + * shader to avoid locking down the register holding the pre-fetched result + * for too long. However if there is a preamble we should skip the preamble + * and only look in the first block after the preamble instead, because that + * corresponds to the first block in the original program and texture fetches + * in the preamble are never pre-dispatchable. */ nir_block *block = nir_start_block(impl); - if (!block) - return false; + + nir_if *nif = nir_block_get_following_if(block); + if (nif) { + nir_instr *cond = nif->condition.ssa->parent_instr; + if (cond->type == nir_instr_type_intrinsic && + nir_instr_as_intrinsic(cond)->intrinsic == + nir_intrinsic_preamble_start_ir3) { + block = nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node)); + } + } bool progress = lower_tex_prefetch_block(block);