mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 20:38:06 +02:00
ir3/dce: support partial writes from collects
When alias.rt is used to alias certain output components, we might end up with a situation where some, but not all, of the components of collects end up being unused. This is currently not supported which means we end up with useless moves (coming from copy lowering) for aliased output components. Fix this by adding support for partial wrmasks for collects in DCE. The wrmasks are initially zeroed out and then updated based on the wrmask of their users. Sources of collects for which the corresponding dst ends up being unused are treated as unused as well. This allows us to remove the useless output moves by simply updating the wrmask of the end sources. Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31222>
This commit is contained in:
parent
a7a357f91d
commit
144121b6df
4 changed files with 71 additions and 3 deletions
|
|
@ -44,6 +44,25 @@ instr_dce(struct ir3_instruction *instr, bool falsedep)
|
|||
mark_array_use(instr, reg); /* src */
|
||||
|
||||
foreach_ssa_src_n (src, i, instr) {
|
||||
if (!__is_false_dep(instr, i)) {
|
||||
if (instr->opc == OPC_META_COLLECT &&
|
||||
!(instr->dsts[0]->wrmask & (1 << i))) {
|
||||
/* Ignore sources of collects for which the corresponding dst is not
|
||||
* written since they are unused.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Propagate the wrmask of sources to their defs. */
|
||||
struct ir3_register *src_reg = instr->srcs[i];
|
||||
src_reg->def->wrmask |= src_reg->wrmask;
|
||||
|
||||
if (!src_reg->wrmask) {
|
||||
/* If no components are read, the def is unused. */
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
instr_dce(src, __is_false_dep(instr, i));
|
||||
}
|
||||
}
|
||||
|
|
@ -76,6 +95,41 @@ remove_unused_by_block(struct ir3_block *block)
|
|||
|
||||
ir3_instr_remove(instr);
|
||||
progress = true;
|
||||
} else if (instr->opc == OPC_META_COLLECT) {
|
||||
struct ir3_register *dst = instr->dsts[0];
|
||||
|
||||
/* Trim unused trailing components. While it's tempting to just remove
|
||||
* all unused components, this doesn't work for a few reasons. Note
|
||||
* that currently, collects with unused components are only created
|
||||
* when certain FS output components are aliased using alias.rt. The
|
||||
* important part here is that the collect will be used for an output.
|
||||
* Even if only certain components of an output are written to GPRs, we
|
||||
* still need to allocate the correct consecutive registers. For
|
||||
* example, if we only write out.xz, we have to make sure there is
|
||||
* still a register in between the registers allocated for the x and z
|
||||
* components. In other words, we have to be able to allocate a base
|
||||
* register for the output such that all components written to GPRs
|
||||
* have the correct offset from the base register. So we cannot remove
|
||||
* any unused holes in the collect. We also cannot remove the leading
|
||||
* unused components because then RA might decide put the first used
|
||||
* component in, say, r0.x, leaving no space to allocate a base
|
||||
* register. Therefore, we only trim trailing components.
|
||||
*
|
||||
* TODO: we could probably trim leading components by having a way to
|
||||
* request a minimum register number from RA.
|
||||
*/
|
||||
instr->srcs_count = util_last_bit(dst->wrmask);
|
||||
|
||||
/* Mark sources for which the corresponding dst is not written as
|
||||
* undef.
|
||||
*/
|
||||
foreach_src_n (src, src_n, instr) {
|
||||
if (!(dst->wrmask & (1 << src_n))) {
|
||||
src->def = NULL;
|
||||
src->num = INVALID_REG;
|
||||
src->flags &= ~(IR3_REG_CONST | IR3_REG_IMMED);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return progress;
|
||||
|
|
@ -105,6 +159,13 @@ find_and_remove_unused(struct ir3 *ir, struct ir3_shader_variant *so)
|
|||
}
|
||||
|
||||
instr->flags |= IR3_INSTR_UNUSED;
|
||||
|
||||
/* To eliminate unused components in collect, we zero the wrmask and
|
||||
* update it using the wrmask of its users.
|
||||
*/
|
||||
if (instr->opc == OPC_META_COLLECT) {
|
||||
instr->dsts[0]->wrmask = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -475,7 +475,6 @@ calculate_deps(struct ir3_postsched_deps_state *state,
|
|||
add_reg_dep(state, node, reg, reg->array.base + j, i, -1);
|
||||
}
|
||||
} else {
|
||||
assert(reg->wrmask >= 1);
|
||||
u_foreach_bit (b, reg->wrmask) {
|
||||
add_reg_dep(state, node, reg, reg->num + b, i, -1);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -396,7 +396,7 @@ print_reg_name(struct log_stream *stream, struct ir3_instruction *instr,
|
|||
"xyzw"[reg_comp(reg)]);
|
||||
}
|
||||
|
||||
if (reg->wrmask > 0x1)
|
||||
if (reg->wrmask != 0x1)
|
||||
mesa_log_stream_printf(stream, " (wrmask=0x%x)", reg->wrmask);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -93,7 +93,15 @@ validate_src(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr,
|
|||
struct ir3_register *src = reg->def;
|
||||
|
||||
validate_assert(ctx, _mesa_set_search(ctx->defs, src->instr));
|
||||
validate_assert(ctx, src->wrmask == reg->wrmask);
|
||||
|
||||
if (src->instr->opc == OPC_META_COLLECT) {
|
||||
/* We only support reading a subset of written components from collects.
|
||||
*/
|
||||
validate_assert(ctx, !(reg->wrmask & ~src->wrmask));
|
||||
} else {
|
||||
validate_assert(ctx, src->wrmask == reg->wrmask);
|
||||
}
|
||||
|
||||
validate_assert(ctx, reg_class_flags(src) == reg_class_flags(reg));
|
||||
|
||||
if (src->flags & IR3_REG_CONST)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue