ir3/dce: support partial writes from collects

When alias.rt is used to alias certain output components, we might end
up with a situation where some, but not all, of the components of
collects end up being unused. This is currently not supported which
means we end up with useless moves (coming from copy lowering) for
aliased output components.

Fix this by adding support for partial wrmasks for collects in DCE. The
wrmasks are initially zeroed out and then updated based on the wrmask of
their users. Sources of collects for which the corresponding dst ends up
being unused are treated as unused as well. This allows us to remove
the useless output moves by simply updating the wrmask of the end
sources.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31222>
This commit is contained in:
Job Noorman 2025-01-22 15:33:48 +01:00 committed by Marge Bot
parent a7a357f91d
commit 144121b6df
4 changed files with 71 additions and 3 deletions

View file

@ -44,6 +44,25 @@ instr_dce(struct ir3_instruction *instr, bool falsedep)
mark_array_use(instr, reg); /* src */
foreach_ssa_src_n (src, i, instr) {
if (!__is_false_dep(instr, i)) {
if (instr->opc == OPC_META_COLLECT &&
!(instr->dsts[0]->wrmask & (1 << i))) {
/* Ignore sources of collects for which the corresponding dst is not
* written since they are unused.
*/
continue;
}
/* Propagate the wrmask of sources to their defs. */
struct ir3_register *src_reg = instr->srcs[i];
src_reg->def->wrmask |= src_reg->wrmask;
if (!src_reg->wrmask) {
/* If no components are read, the def is unused. */
continue;
}
}
instr_dce(src, __is_false_dep(instr, i));
}
}
@ -76,6 +95,41 @@ remove_unused_by_block(struct ir3_block *block)
ir3_instr_remove(instr);
progress = true;
} else if (instr->opc == OPC_META_COLLECT) {
struct ir3_register *dst = instr->dsts[0];
/* Trim unused trailing components. While it's tempting to just remove
* all unused components, this doesn't work for a few reasons. Note
* that currently, collects with unused components are only created
* when certain FS output components are aliased using alias.rt. The
* important part here is that the collect will be used for an output.
* Even if only certain components of an output are written to GPRs, we
* still need to allocate the correct consecutive registers. For
* example, if we only write out.xz, we have to make sure there is
* still a register in between the registers allocated for the x and z
* components. In other words, we have to be able to allocate a base
* register for the output such that all components written to GPRs
* have the correct offset from the base register. So we cannot remove
* any unused holes in the collect. We also cannot remove the leading
* unused components because then RA might decide put the first used
* component in, say, r0.x, leaving no space to allocate a base
* register. Therefore, we only trim trailing components.
*
* TODO: we could probably trim leading components by having a way to
* request a minimum register number from RA.
*/
instr->srcs_count = util_last_bit(dst->wrmask);
/* Mark sources for which the corresponding dst is not written as
* undef.
*/
foreach_src_n (src, src_n, instr) {
if (!(dst->wrmask & (1 << src_n))) {
src->def = NULL;
src->num = INVALID_REG;
src->flags &= ~(IR3_REG_CONST | IR3_REG_IMMED);
}
}
}
}
return progress;
@ -105,6 +159,13 @@ find_and_remove_unused(struct ir3 *ir, struct ir3_shader_variant *so)
}
instr->flags |= IR3_INSTR_UNUSED;
/* To eliminate unused components in collect, we zero the wrmask and
* update it using the wrmask of its users.
*/
if (instr->opc == OPC_META_COLLECT) {
instr->dsts[0]->wrmask = 0;
}
}
}

View file

@ -475,7 +475,6 @@ calculate_deps(struct ir3_postsched_deps_state *state,
add_reg_dep(state, node, reg, reg->array.base + j, i, -1);
}
} else {
assert(reg->wrmask >= 1);
u_foreach_bit (b, reg->wrmask) {
add_reg_dep(state, node, reg, reg->num + b, i, -1);
}

View file

@ -396,7 +396,7 @@ print_reg_name(struct log_stream *stream, struct ir3_instruction *instr,
"xyzw"[reg_comp(reg)]);
}
if (reg->wrmask > 0x1)
if (reg->wrmask != 0x1)
mesa_log_stream_printf(stream, " (wrmask=0x%x)", reg->wrmask);
}

View file

@ -93,7 +93,15 @@ validate_src(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr,
struct ir3_register *src = reg->def;
validate_assert(ctx, _mesa_set_search(ctx->defs, src->instr));
validate_assert(ctx, src->wrmask == reg->wrmask);
if (src->instr->opc == OPC_META_COLLECT) {
/* We only support reading a subset of written components from collects.
*/
validate_assert(ctx, !(reg->wrmask & ~src->wrmask));
} else {
validate_assert(ctx, src->wrmask == reg->wrmask);
}
validate_assert(ctx, reg_class_flags(src) == reg_class_flags(reg));
if (src->flags & IR3_REG_CONST)