diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index d34bb159527..24197079d1d 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1402,6 +1402,11 @@ intrinsic("inclusive_scan_clusters_ir3", dest_comp=1, src_comp=[1], intrinsic("exclusive_scan_clusters_ir3", dest_comp=1, src_comp=[1, 1], bit_sizes=src0, indices=[REDUCTION_OP]) +# IR3-specific intrinsics for prefetching descriptors in preambles. +intrinsic("prefetch_sam_ir3", [1, 1], flags=[CAN_REORDER]) +intrinsic("prefetch_tex_ir3", [1], flags=[CAN_REORDER]) +intrinsic("prefetch_ubo_ir3", [1], flags=[CAN_REORDER]) + # Intrinsics used by the Midgard/Bifrost blend pipeline. These are defined # within a blend shader to read/write the raw value from the tile buffer, # without applying any format conversion in the process. If the shader needs diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index dd5e32d6955..16e63d70e0d 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1151,10 +1151,11 @@ is_load(struct ir3_instruction *instr) case OPC_LDP: case OPC_L2G: case OPC_LDLW: - case OPC_LDC: case OPC_LDLV: /* probably some others too.. */ return true; + case OPC_LDC: + return instr->dsts_count > 0; default: return false; } @@ -1185,7 +1186,6 @@ uses_helpers(struct ir3_instruction *instr) { switch (instr->opc) { /* These require helper invocations to be present */ - case OPC_SAM: case OPC_SAMB: case OPC_GETLOD: case OPC_DSX: @@ -1201,6 +1201,10 @@ uses_helpers(struct ir3_instruction *instr) case OPC_META_TEX_PREFETCH: return true; + /* sam requires helper invocations except for dummy prefetch instructions */ + case OPC_SAM: + return instr->dsts_count != 0; + /* Subgroup operations don't require helper invocations to be present, but * will use helper invocations if they are present. */ @@ -2741,7 +2745,7 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, unsigned wrmask, if (flags & IR3_INSTR_S2EN) { nreg++; } - if (src0) { + if (src0 || opc == OPC_SAM) { nreg++; } if (src1) { @@ -2756,6 +2760,12 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, unsigned wrmask, } if (src0) { __ssa_src(sam, src0, 0); + } else if (opc == OPC_SAM) { + /* Create a dummy shared source for the coordinate, for the prefetch + * case. It needs to be shared so that we don't accidentally disable early + * preamble, and this is what the blob does. + */ + ir3_src_create(sam, regid(48, 0), IR3_REG_SHARED); } if (src1) { __ssa_src(sam, src1, 0); diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index d9c94982010..19b6d6784f4 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -3042,6 +3042,47 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) load->push_consts.dst_base + load->push_consts.src_size, 4)); break; } + case nir_intrinsic_prefetch_sam_ir3: { + struct tex_src_info info = + get_bindless_samp_src(ctx, &intr->src[0], &intr->src[1]); + struct ir3_instruction *sam = + emit_sam(ctx, OPC_SAM, info, TYPE_F32, 0b1111, NULL, NULL); + + sam->dsts_count = 0; + array_insert(ctx->block, ctx->block->keeps, sam); + break; + } + case nir_intrinsic_prefetch_tex_ir3: { + struct ir3_instruction *idx = ir3_get_src(ctx, &intr->src[0])[0]; + struct ir3_instruction *resinfo = ir3_RESINFO(b, idx, 0); + resinfo->cat6.iim_val = 1; + resinfo->cat6.d = 1; + resinfo->cat6.type = TYPE_U32; + resinfo->cat6.typed = false; + + ir3_handle_bindless_cat6(resinfo, intr->src[0]); + if (resinfo->flags & IR3_INSTR_B) + ctx->so->bindless_tex = true; + + resinfo->dsts_count = 0; + array_insert(ctx->block, ctx->block->keeps, resinfo); + break; + } + case nir_intrinsic_prefetch_ubo_ir3: { + struct ir3_instruction *offset = create_immed(ctx->block, 0); + struct ir3_instruction *idx = ir3_get_src(ctx, &intr->src[0])[0]; + struct ir3_instruction *ldc = ir3_LDC(b, idx, 0, offset, 0); + ldc->cat6.iim_val = 1; + ldc->cat6.type = TYPE_U32; + + ir3_handle_bindless_cat6(ldc, intr->src[0]); + if (ldc->flags & IR3_INSTR_B) + ctx->so->bindless_ubo = true; + + ldc->dsts_count = 0; + array_insert(ctx->block, ctx->block->keeps, ldc); + break; + } default: ir3_context_error(ctx, "Unhandled intrinsic type: %s\n", nir_intrinsic_infos[intr->intrinsic].name); diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 3bb4cfa1232..7116e47add4 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -603,11 +603,11 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) } } - if (is_tex_or_prefetch(n)) { + if (is_tex_or_prefetch(n) && n->dsts_count > 0) { regmask_set(&state->needs_sy, n->dsts[0]); if (n->opc == OPC_META_TEX_PREFETCH) ctx->has_tex_prefetch = true; - } else if (n->opc == OPC_RESINFO) { + } else if (n->opc == OPC_RESINFO && n->dsts_count > 0) { regmask_set(&state->needs_ss, n->dsts[0]); ir3_NOP(block)->flags |= IR3_INSTR_SS; last_input_needs_ss = false; @@ -791,6 +791,25 @@ apply_fine_deriv_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block) return true; } +/* Some instructions can take a dummy destination of r63.x, which we model as it + * not having a destination in the IR to avoid having special code to handle + * this. Insert the dummy destination after everything else is done. + */ +static bool +expand_dummy_dests(struct ir3_block *block) +{ + foreach_instr (n, &block->instr_list) { + if ((n->opc == OPC_SAM || n->opc == OPC_LDC || n->opc == OPC_RESINFO) && + n->dsts_count == 0) { + struct ir3_register *dst = ir3_dst_create(n, INVALID_REG, 0); + /* Copy the blob's writemask */ + if (n->opc == OPC_SAM) + dst->wrmask = 0b1111; + } + } + return true; +} + static void apply_push_consts_load_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block) @@ -1720,6 +1739,10 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) so->compiler->gen >= 6) helper_sched(ctx, ir, so); + foreach_block (block, &ir->block_list) { + progress |= expand_dummy_dests(block); + } + ir3_count_instructions(ir); resolve_jumps(ir); diff --git a/src/freedreno/ir3/ir3_validate.c b/src/freedreno/ir3/ir3_validate.c index 1b799e39730..59d369c0a20 100644 --- a/src/freedreno/ir3/ir3_validate.c +++ b/src/freedreno/ir3/ir3_validate.c @@ -354,7 +354,8 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr) switch (instr->opc) { case OPC_RESINFO: case OPC_RESFMT: - validate_reg_size(ctx, instr->dsts[0], instr->cat6.type); + if (instr->dsts_count > 0) + validate_reg_size(ctx, instr->dsts[0], instr->cat6.type); validate_reg_size(ctx, instr->srcs[0], instr->cat6.type); break; case OPC_L2G: