intel/brw: Use correct instruction for value change check when coalescing
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

When we have partial VGRF MOVs with offsets, we will reach
`channels_remaining == 0` with `inst` that is not writing the whole VGRF.
Currently, even though we check `can_coalesce_vars()` for each offset
separately, it will always check if the dst value is not changed only
for the offset from the instruction that satisfied the
`channels_remaining == 0` condition.

Instead, we should remember and use the correct instruction for each
written offset separately.

Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10916
Signed-off-by: Sviatoslav Peleshko <sviatoslav.peleshko@globallogic.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35062>
This commit is contained in:
Sviatoslav Peleshko 2025-05-12 00:55:57 +03:00 committed by Marge Bot
parent aae67ab678
commit 0e3e5146cf
2 changed files with 30 additions and 4 deletions

View file

@ -282,8 +282,8 @@ brw_opt_register_coalesce(brw_shader &s)
if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
for (int i = 0; i < src_size; i++) {
dst_reg_offset[i] = inst->dst.offset / REG_SIZE + i;
mov[i] = inst;
}
mov[0] = inst;
channels_remaining -= regs_written(inst);
} else {
const int offset = inst->src[0].offset / REG_SIZE;
@ -297,9 +297,10 @@ brw_opt_register_coalesce(brw_shader &s)
channels_remaining = -1;
continue;
}
for (unsigned i = 0; i < MAX2(inst->size_written / REG_SIZE, 1); i++)
for (unsigned i = 0; i < MAX2(inst->size_written / REG_SIZE, 1); i++) {
dst_reg_offset[offset + i] = inst->dst.offset / REG_SIZE + i;
mov[offset] = inst;
mov[offset + i] = inst;
}
channels_remaining -= regs_written(inst);
}
@ -318,7 +319,7 @@ brw_opt_register_coalesce(brw_shader &s)
dst_var[i] = live.var_from_vgrf[dst_reg] + dst_reg_offset[i];
src_var[i] = live.var_from_vgrf[src_reg] + i;
if (!can_coalesce_vars(devinfo, live, ips, s.cfg, inst, dst_var[i], src_var[i]) ||
if (!can_coalesce_vars(devinfo, live, ips, s.cfg, mov[i], dst_var[i], src_var[i]) ||
would_violate_eot_restriction(s, s.cfg, dst_reg, src_reg)) {
can_coalesce = false;
src_reg = ~0u;

View file

@ -84,3 +84,28 @@ TEST_F(RegisterCoalesceTest, InterfereButContainEachOther)
EXPECT_SHADERS_MATCH(bld, exp);
}
TEST_F(RegisterCoalesceTest, ChangingTemporaryCompoundRegisterNotChangesOriginal)
{
brw_builder bld = make_shader();
brw_reg src = vgrf(bld, BRW_TYPE_F, 2);
brw_reg tmp = vgrf(bld, BRW_TYPE_F, 2);
brw_reg dst = vgrf(bld, BRW_TYPE_F, 2);
brw_reg one = brw_imm_f(1.0);
brw_reg two = brw_imm_f(2.0);
bld.MOV(src, one);
bld.MOV(offset(src, bld, 1), two);
bld.MOV(offset(tmp, bld, 1), offset(src, bld, 1));
bld.MOV(tmp, src);
bld.ADD(offset(tmp, bld, 1), offset(tmp, bld, 1), one);
bld.ADD(dst, src, one);
bld.ADD(offset(dst, bld, 1), offset(src, bld, 1), two);
EXPECT_NO_PROGRESS(brw_opt_register_coalesce, bld);
}