ir3: use dummy dst for descriptor prefetches

Now that we have we have the concept of "dummy" registers, we can use it
for descriptor prefetches as well. Currently, they are represented as
having no dst, and a fixup pass during legalization adds the actual
needed dummy dst. This can be prevented by representing their dst using
a dummy register from the start.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36365>
This commit is contained in:
Job Noorman 2025-07-28 09:02:17 +02:00
parent 1d45e1431f
commit 294014e196
4 changed files with 26 additions and 32 deletions

View file

@ -1220,6 +1220,12 @@ is_shared(struct ir3_instruction *instr)
return !!(instr->dsts[0]->flags & IR3_REG_SHARED);
}
static inline bool
has_dummy_dst(struct ir3_instruction *instr)
{
return !!(instr->dsts[0]->flags & IR3_REG_DUMMY);
}
static inline bool
is_store(struct ir3_instruction *instr)
{
@ -1259,7 +1265,7 @@ is_load(struct ir3_instruction *instr)
/* probably some others too.. */
return true;
case OPC_LDC:
return instr->dsts_count > 0;
return !has_dummy_dst(instr);
default:
return false;
}
@ -1307,7 +1313,7 @@ uses_helpers(struct ir3_instruction *instr)
/* sam requires helper invocations except for dummy prefetch instructions */
case OPC_SAM:
return instr->dsts_count != 0;
return !has_dummy_dst(instr);
/* Subgroup operations don't require helper invocations to be present, but
* will use helper invocations if they are present.

View file

@ -16,7 +16,7 @@ supports_alias_srcs(struct ir3_instruction *instr)
if (is_tex_shuffle(instr))
return false;
/* Descriptor prefetches don't support alias.tex. */
if (instr->opc == OPC_SAM && instr->dsts_count == 0)
if (instr->opc == OPC_SAM && has_dummy_dst(instr))
return false;
/* Seems to not always work properly. Blob disables it as well. */
if (instr->opc == OPC_ISAM && (instr->flags & IR3_INSTR_IMM_OFFSET))

View file

@ -2627,6 +2627,17 @@ apply_mov_half_shared_quirk(struct ir3_context *ctx,
return dst;
}
static void
make_dst_dummy(struct ir3_instruction *instr)
{
assert(instr->dsts_count == 1);
struct ir3_register *dst = instr->dsts[0];
dst->flags &= ~IR3_REG_SSA;
dst->flags |= IR3_REG_DUMMY;
dst->num = INVALID_REG;
}
static void
emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
{
@ -3363,7 +3374,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction *sam =
emit_sam(ctx, OPC_SAM, info, TYPE_F32, 0b1111, NULL, NULL);
sam->dsts_count = 0;
make_dst_dummy(sam);
array_insert(ctx->block, ctx->block->keeps, sam);
break;
}
@ -3379,7 +3390,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
if (resinfo->flags & IR3_INSTR_B)
ctx->so->bindless_tex = true;
resinfo->dsts_count = 0;
make_dst_dummy(resinfo);
array_insert(ctx->block, ctx->block->keeps, resinfo);
break;
}
@ -3394,7 +3405,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
if (ldc->flags & IR3_INSTR_B)
ctx->so->bindless_ubo = true;
ldc->dsts_count = 0;
make_dst_dummy(ldc);
array_insert(ctx->block, ctx->block->keeps, ldc);
break;
}

View file

@ -238,9 +238,9 @@ sync_update(struct ir3_legalize_state *state, struct ir3_compiler *compiler,
}
}
if (is_tex_or_prefetch(n) && n->dsts_count > 0) {
if (is_tex_or_prefetch(n) && !has_dummy_dst(n)) {
regmask_set(&state->needs_sy, n->dsts[0]);
} else if (n->opc == OPC_RESINFO && n->dsts_count > 0) {
} else if (n->opc == OPC_RESINFO && !has_dummy_dst(n)) {
regmask_set(&state->needs_ss, n->dsts[0]);
} else if (is_load(n)) {
if (is_local_mem_load(n))
@ -793,7 +793,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
ctx->has_tex_prefetch = true;
}
if (n->opc == OPC_RESINFO && n->dsts_count > 0) {
if (n->opc == OPC_RESINFO && !has_dummy_dst(n)) {
ir3_update_legalize_state(state, ctx->compiler, n);
n = ir3_NOP(&build);
@ -915,25 +915,6 @@ apply_fine_deriv_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
return true;
}
/* Some instructions can take a dummy destination of r63.x, which we model as it
* not having a destination in the IR to avoid having special code to handle
* this. Insert the dummy destination after everything else is done.
*/
static bool
expand_dummy_dests(struct ir3_block *block)
{
foreach_instr (n, &block->instr_list) {
if ((n->opc == OPC_SAM || n->opc == OPC_LDC || n->opc == OPC_RESINFO) &&
n->dsts_count == 0) {
struct ir3_register *dst = ir3_dst_create(n, INVALID_REG, 0);
/* Copy the blob's writemask */
if (n->opc == OPC_SAM)
dst->wrmask = 0b1111;
}
}
return true;
}
static void
apply_push_consts_load_macro(struct ir3_legalize_ctx *ctx,
struct ir3_block *block)
@ -2031,10 +2012,6 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
so->compiler->gen >= 6)
helper_sched(ctx, ir, so);
foreach_block (block, &ir->block_list) {
progress |= expand_dummy_dests(block);
}
/* Note: insert (last) before alias.tex to have the sources that are actually
* read by instructions (as opposed to alias registers) more easily
* available.