diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index cbafa6a2834..18dcdd2aea6 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1220,6 +1220,12 @@ is_shared(struct ir3_instruction *instr) return !!(instr->dsts[0]->flags & IR3_REG_SHARED); } +static inline bool +has_dummy_dst(struct ir3_instruction *instr) +{ + return !!(instr->dsts[0]->flags & IR3_REG_DUMMY); +} + static inline bool is_store(struct ir3_instruction *instr) { @@ -1259,7 +1265,7 @@ is_load(struct ir3_instruction *instr) /* probably some others too.. */ return true; case OPC_LDC: - return instr->dsts_count > 0; + return !has_dummy_dst(instr); default: return false; } @@ -1307,7 +1313,7 @@ uses_helpers(struct ir3_instruction *instr) /* sam requires helper invocations except for dummy prefetch instructions */ case OPC_SAM: - return instr->dsts_count != 0; + return !has_dummy_dst(instr); /* Subgroup operations don't require helper invocations to be present, but * will use helper invocations if they are present. diff --git a/src/freedreno/ir3/ir3_alias.c b/src/freedreno/ir3/ir3_alias.c index 6c3b4208201..d7f4d589018 100644 --- a/src/freedreno/ir3/ir3_alias.c +++ b/src/freedreno/ir3/ir3_alias.c @@ -16,7 +16,7 @@ supports_alias_srcs(struct ir3_instruction *instr) if (is_tex_shuffle(instr)) return false; /* Descriptor prefetches don't support alias.tex. */ - if (instr->opc == OPC_SAM && instr->dsts_count == 0) + if (instr->opc == OPC_SAM && has_dummy_dst(instr)) return false; /* Seems to not always work properly. Blob disables it as well. */ if (instr->opc == OPC_ISAM && (instr->flags & IR3_INSTR_IMM_OFFSET)) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 378a874cbfa..415ee86cd40 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2627,6 +2627,17 @@ apply_mov_half_shared_quirk(struct ir3_context *ctx, return dst; } +static void +make_dst_dummy(struct ir3_instruction *instr) +{ + assert(instr->dsts_count == 1); + + struct ir3_register *dst = instr->dsts[0]; + dst->flags &= ~IR3_REG_SSA; + dst->flags |= IR3_REG_DUMMY; + dst->num = INVALID_REG; +} + static void emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) { @@ -3363,7 +3374,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction *sam = emit_sam(ctx, OPC_SAM, info, TYPE_F32, 0b1111, NULL, NULL); - sam->dsts_count = 0; + make_dst_dummy(sam); array_insert(ctx->block, ctx->block->keeps, sam); break; } @@ -3379,7 +3390,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) if (resinfo->flags & IR3_INSTR_B) ctx->so->bindless_tex = true; - resinfo->dsts_count = 0; + make_dst_dummy(resinfo); array_insert(ctx->block, ctx->block->keeps, resinfo); break; } @@ -3394,7 +3405,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) if (ldc->flags & IR3_INSTR_B) ctx->so->bindless_ubo = true; - ldc->dsts_count = 0; + make_dst_dummy(ldc); array_insert(ctx->block, ctx->block->keeps, ldc); break; } diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index eb3e3f13f86..cf8a710af31 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -238,9 +238,9 @@ sync_update(struct ir3_legalize_state *state, struct ir3_compiler *compiler, } } - if (is_tex_or_prefetch(n) && n->dsts_count > 0) { + if (is_tex_or_prefetch(n) && !has_dummy_dst(n)) { regmask_set(&state->needs_sy, n->dsts[0]); - } else if (n->opc == OPC_RESINFO && n->dsts_count > 0) { + } else if (n->opc == OPC_RESINFO && !has_dummy_dst(n)) { regmask_set(&state->needs_ss, n->dsts[0]); } else if (is_load(n)) { if (is_local_mem_load(n)) @@ -793,7 +793,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) ctx->has_tex_prefetch = true; } - if (n->opc == OPC_RESINFO && n->dsts_count > 0) { + if (n->opc == OPC_RESINFO && !has_dummy_dst(n)) { ir3_update_legalize_state(state, ctx->compiler, n); n = ir3_NOP(&build); @@ -915,25 +915,6 @@ apply_fine_deriv_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block) return true; } -/* Some instructions can take a dummy destination of r63.x, which we model as it - * not having a destination in the IR to avoid having special code to handle - * this. Insert the dummy destination after everything else is done. - */ -static bool -expand_dummy_dests(struct ir3_block *block) -{ - foreach_instr (n, &block->instr_list) { - if ((n->opc == OPC_SAM || n->opc == OPC_LDC || n->opc == OPC_RESINFO) && - n->dsts_count == 0) { - struct ir3_register *dst = ir3_dst_create(n, INVALID_REG, 0); - /* Copy the blob's writemask */ - if (n->opc == OPC_SAM) - dst->wrmask = 0b1111; - } - } - return true; -} - static void apply_push_consts_load_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block) @@ -2031,10 +2012,6 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) so->compiler->gen >= 6) helper_sched(ctx, ir, so); - foreach_block (block, &ir->block_list) { - progress |= expand_dummy_dests(block); - } - /* Note: insert (last) before alias.tex to have the sources that are actually * read by instructions (as opposed to alias registers) more easily * available.