brw: brw_reg::nr for an accumulator is not part of the offset
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Without this, reg_offset will return 1024 for acc0. This causes
has_invalid_dst_region to decide that the destination region is invalid
(because 1024 != 0), and the lowering code tries to treat the floating
point accumulators as integers. It's a mess.

v2: Add and use set_gfx_platform. Suggested by Caio.

Fixes: 937373eb25 ("i965/fs: Handle fixed HW GRF subnr in reg_offset().")
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40716>
This commit is contained in:
Ian Romanick 2026-03-27 20:59:38 -07:00 committed by Marge Bot
parent bc848da244
commit cfdb3ddb93
3 changed files with 54 additions and 1 deletions

View file

@ -1424,7 +1424,8 @@ reg_space(const brw_reg &r)
static inline unsigned
reg_offset(const brw_reg &r)
{
return (r.file == ADDRESS || r.file == VGRF || r.file == IMM || r.file == ATTR ? 0 : r.nr) *
return (r.file == ADDRESS || r.file == VGRF || r.file == IMM ||
r.file == ATTR || brw_reg_is_arf(r, BRW_ARF_ACCUMULATOR) ? 0 : r.nr) *
(r.file == UNIFORM ? 4 : REG_SIZE) + r.offset +
(r.file == ADDRESS || r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
}

View file

@ -128,6 +128,18 @@ protected:
brw_init_isa_info(&compiler->isa, devinfo);
}
void
set_gfx_platform(const char *name)
{
int pci_id = intel_device_name_to_pci_device_id(name);
assert(pci_id > 0);
intel_get_device_info_from_pci_id(pci_id, devinfo);
assert(devinfo->ver > 0);
brw_init_isa_info(&compiler->isa, devinfo);
}
brw_builder
make_shader(mesa_shader_stage stage = MESA_SHADER_FRAGMENT,
unsigned dispatch_width = 0)

View file

@ -22,3 +22,43 @@ TEST_F(lower_regioning_test, sel_ud_d_d)
EXPECT_NO_PROGRESS(brw_lower_regioning, bld);
}
TEST_F(lower_regioning_test, bf_to_f_accumulator)
{
set_gfx_platform("dg2");
brw_builder bld = make_shader(MESA_SHADER_FRAGMENT, 32);
brw_builder exp = make_shader(MESA_SHADER_FRAGMENT, 32);
brw_reg src0 = vgrf(bld, exp, BRW_TYPE_BF, 4);
brw_reg tmp0 = vgrf(bld, exp, BRW_TYPE_UD);
brw_reg tmp1 = vgrf(bld, exp, BRW_TYPE_UD);
brw_reg acc0 = retype(brw_acc_reg(8 * reg_unit(devinfo)),
BRW_TYPE_F);
bld.exec_all().MOV(acc0, src0);
EXPECT_PROGRESS(brw_lower_simd_width, bld);
EXPECT_PROGRESS(brw_lower_regioning, bld);
brw_reg acc1 = acc0;
brw_reg acc2 = acc0;
brw_reg acc3 = acc0;
acc1.nr = BRW_ARF_ACCUMULATOR + 1;
acc2.nr = BRW_ARF_ACCUMULATOR + 2;
acc3.nr = BRW_ARF_ACCUMULATOR + 3;
exp.exec_all().group(8, 0).MOV(acc0, src0);
exp.exec_all().group(8, 1).MOV(acc1, byte_offset(src0, 16));
exp.exec_all().group(8, 2).UNDEF(tmp0);
exp.exec_all().group(8, 2).MOV(retype(tmp0, BRW_TYPE_UW),
retype(byte_offset(src0, 32), BRW_TYPE_UW));
exp.exec_all().group(8, 2).MOV(acc2, retype(tmp0, BRW_TYPE_BF));
exp.exec_all().group(8, 3).UNDEF(tmp1);
exp.exec_all().group(8, 3).MOV(retype(tmp1, BRW_TYPE_UW),
retype(byte_offset(src0, 48), BRW_TYPE_UW));
exp.exec_all().group(8, 3).MOV(acc3, retype(tmp1, BRW_TYPE_BF));
EXPECT_SHADERS_MATCH(bld, exp);
}