From 294014e196e455a1d068c975a6ae69fe7c7294fb Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Mon, 28 Jul 2025 09:02:17 +0200 Subject: [PATCH] ir3: use dummy dst for descriptor prefetches Now that we have we have the concept of "dummy" registers, we can use it for descriptor prefetches as well. Currently, they are represented as having no dst, and a fixup pass during legalization adds the actual needed dummy dst. This can be prevented by representing their dst using a dummy register from the start. Signed-off-by: Job Noorman Part-of: --- src/freedreno/ir3/ir3.h | 10 ++++++++-- src/freedreno/ir3/ir3_alias.c | 2 +- src/freedreno/ir3/ir3_compiler_nir.c | 17 +++++++++++++--- src/freedreno/ir3/ir3_legalize.c | 29 +++------------------------- 4 files changed, 26 insertions(+), 32 deletions(-) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index cbafa6a2834..18dcdd2aea6 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1220,6 +1220,12 @@ is_shared(struct ir3_instruction *instr) return !!(instr->dsts[0]->flags & IR3_REG_SHARED); } +static inline bool +has_dummy_dst(struct ir3_instruction *instr) +{ + return !!(instr->dsts[0]->flags & IR3_REG_DUMMY); +} + static inline bool is_store(struct ir3_instruction *instr) { @@ -1259,7 +1265,7 @@ is_load(struct ir3_instruction *instr) /* probably some others too.. */ return true; case OPC_LDC: - return instr->dsts_count > 0; + return !has_dummy_dst(instr); default: return false; } @@ -1307,7 +1313,7 @@ uses_helpers(struct ir3_instruction *instr) /* sam requires helper invocations except for dummy prefetch instructions */ case OPC_SAM: - return instr->dsts_count != 0; + return !has_dummy_dst(instr); /* Subgroup operations don't require helper invocations to be present, but * will use helper invocations if they are present. diff --git a/src/freedreno/ir3/ir3_alias.c b/src/freedreno/ir3/ir3_alias.c index 6c3b4208201..d7f4d589018 100644 --- a/src/freedreno/ir3/ir3_alias.c +++ b/src/freedreno/ir3/ir3_alias.c @@ -16,7 +16,7 @@ supports_alias_srcs(struct ir3_instruction *instr) if (is_tex_shuffle(instr)) return false; /* Descriptor prefetches don't support alias.tex. */ - if (instr->opc == OPC_SAM && instr->dsts_count == 0) + if (instr->opc == OPC_SAM && has_dummy_dst(instr)) return false; /* Seems to not always work properly. Blob disables it as well. */ if (instr->opc == OPC_ISAM && (instr->flags & IR3_INSTR_IMM_OFFSET)) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 378a874cbfa..415ee86cd40 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2627,6 +2627,17 @@ apply_mov_half_shared_quirk(struct ir3_context *ctx, return dst; } +static void +make_dst_dummy(struct ir3_instruction *instr) +{ + assert(instr->dsts_count == 1); + + struct ir3_register *dst = instr->dsts[0]; + dst->flags &= ~IR3_REG_SSA; + dst->flags |= IR3_REG_DUMMY; + dst->num = INVALID_REG; +} + static void emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) { @@ -3363,7 +3374,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction *sam = emit_sam(ctx, OPC_SAM, info, TYPE_F32, 0b1111, NULL, NULL); - sam->dsts_count = 0; + make_dst_dummy(sam); array_insert(ctx->block, ctx->block->keeps, sam); break; } @@ -3379,7 +3390,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) if (resinfo->flags & IR3_INSTR_B) ctx->so->bindless_tex = true; - resinfo->dsts_count = 0; + make_dst_dummy(resinfo); array_insert(ctx->block, ctx->block->keeps, resinfo); break; } @@ -3394,7 +3405,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) if (ldc->flags & IR3_INSTR_B) ctx->so->bindless_ubo = true; - ldc->dsts_count = 0; + make_dst_dummy(ldc); array_insert(ctx->block, ctx->block->keeps, ldc); break; } diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index eb3e3f13f86..cf8a710af31 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -238,9 +238,9 @@ sync_update(struct ir3_legalize_state *state, struct ir3_compiler *compiler, } } - if (is_tex_or_prefetch(n) && n->dsts_count > 0) { + if (is_tex_or_prefetch(n) && !has_dummy_dst(n)) { regmask_set(&state->needs_sy, n->dsts[0]); - } else if (n->opc == OPC_RESINFO && n->dsts_count > 0) { + } else if (n->opc == OPC_RESINFO && !has_dummy_dst(n)) { regmask_set(&state->needs_ss, n->dsts[0]); } else if (is_load(n)) { if (is_local_mem_load(n)) @@ -793,7 +793,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) ctx->has_tex_prefetch = true; } - if (n->opc == OPC_RESINFO && n->dsts_count > 0) { + if (n->opc == OPC_RESINFO && !has_dummy_dst(n)) { ir3_update_legalize_state(state, ctx->compiler, n); n = ir3_NOP(&build); @@ -915,25 +915,6 @@ apply_fine_deriv_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block) return true; } -/* Some instructions can take a dummy destination of r63.x, which we model as it - * not having a destination in the IR to avoid having special code to handle - * this. Insert the dummy destination after everything else is done. - */ -static bool -expand_dummy_dests(struct ir3_block *block) -{ - foreach_instr (n, &block->instr_list) { - if ((n->opc == OPC_SAM || n->opc == OPC_LDC || n->opc == OPC_RESINFO) && - n->dsts_count == 0) { - struct ir3_register *dst = ir3_dst_create(n, INVALID_REG, 0); - /* Copy the blob's writemask */ - if (n->opc == OPC_SAM) - dst->wrmask = 0b1111; - } - } - return true; -} - static void apply_push_consts_load_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block) @@ -2031,10 +2012,6 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) so->compiler->gen >= 6) helper_sched(ctx, ir, so); - foreach_block (block, &ir->block_list) { - progress |= expand_dummy_dests(block); - } - /* Note: insert (last) before alias.tex to have the sources that are actually * read by instructions (as opposed to alias registers) more easily * available.