From cfdb3ddb93c02eb1e98e9f634263313fc87693c0 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 27 Mar 2026 20:59:38 -0700 Subject: [PATCH] brw: brw_reg::nr for an accumulator is not part of the offset Without this, reg_offset will return 1024 for acc0. This causes has_invalid_dst_region to decide that the destination region is invalid (because 1024 != 0), and the lowering code tries to treat the floating point accumulators as integers. It's a mess. v2: Add and use set_gfx_platform. Suggested by Caio. Fixes: 937373eb25c ("i965/fs: Handle fixed HW GRF subnr in reg_offset().") Reviewed-by: Caio Oliveira Part-of: --- src/intel/compiler/brw/brw_reg.h | 3 +- src/intel/compiler/brw/test_helpers.h | 12 ++++++ .../compiler/brw/test_lower_regioning.cpp | 40 +++++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw/brw_reg.h b/src/intel/compiler/brw/brw_reg.h index 6f7cb30bc6b..85af34e81e3 100644 --- a/src/intel/compiler/brw/brw_reg.h +++ b/src/intel/compiler/brw/brw_reg.h @@ -1424,7 +1424,8 @@ reg_space(const brw_reg &r) static inline unsigned reg_offset(const brw_reg &r) { - return (r.file == ADDRESS || r.file == VGRF || r.file == IMM || r.file == ATTR ? 0 : r.nr) * + return (r.file == ADDRESS || r.file == VGRF || r.file == IMM || + r.file == ATTR || brw_reg_is_arf(r, BRW_ARF_ACCUMULATOR) ? 0 : r.nr) * (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset + (r.file == ADDRESS || r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0); } diff --git a/src/intel/compiler/brw/test_helpers.h b/src/intel/compiler/brw/test_helpers.h index 25e4155371d..b1ec3cd7143 100644 --- a/src/intel/compiler/brw/test_helpers.h +++ b/src/intel/compiler/brw/test_helpers.h @@ -128,6 +128,18 @@ protected: brw_init_isa_info(&compiler->isa, devinfo); } + void + set_gfx_platform(const char *name) + { + int pci_id = intel_device_name_to_pci_device_id(name); + assert(pci_id > 0); + + intel_get_device_info_from_pci_id(pci_id, devinfo); + assert(devinfo->ver > 0); + + brw_init_isa_info(&compiler->isa, devinfo); + } + brw_builder make_shader(mesa_shader_stage stage = MESA_SHADER_FRAGMENT, unsigned dispatch_width = 0) diff --git a/src/intel/compiler/brw/test_lower_regioning.cpp b/src/intel/compiler/brw/test_lower_regioning.cpp index a0cfe82da02..114f6bb190d 100644 --- a/src/intel/compiler/brw/test_lower_regioning.cpp +++ b/src/intel/compiler/brw/test_lower_regioning.cpp @@ -22,3 +22,43 @@ TEST_F(lower_regioning_test, sel_ud_d_d) EXPECT_NO_PROGRESS(brw_lower_regioning, bld); } + +TEST_F(lower_regioning_test, bf_to_f_accumulator) +{ + set_gfx_platform("dg2"); + + brw_builder bld = make_shader(MESA_SHADER_FRAGMENT, 32); + brw_builder exp = make_shader(MESA_SHADER_FRAGMENT, 32); + + brw_reg src0 = vgrf(bld, exp, BRW_TYPE_BF, 4); + brw_reg tmp0 = vgrf(bld, exp, BRW_TYPE_UD); + brw_reg tmp1 = vgrf(bld, exp, BRW_TYPE_UD); + brw_reg acc0 = retype(brw_acc_reg(8 * reg_unit(devinfo)), + BRW_TYPE_F); + + bld.exec_all().MOV(acc0, src0); + + EXPECT_PROGRESS(brw_lower_simd_width, bld); + EXPECT_PROGRESS(brw_lower_regioning, bld); + + brw_reg acc1 = acc0; + brw_reg acc2 = acc0; + brw_reg acc3 = acc0; + + acc1.nr = BRW_ARF_ACCUMULATOR + 1; + acc2.nr = BRW_ARF_ACCUMULATOR + 2; + acc3.nr = BRW_ARF_ACCUMULATOR + 3; + + exp.exec_all().group(8, 0).MOV(acc0, src0); + exp.exec_all().group(8, 1).MOV(acc1, byte_offset(src0, 16)); + exp.exec_all().group(8, 2).UNDEF(tmp0); + exp.exec_all().group(8, 2).MOV(retype(tmp0, BRW_TYPE_UW), + retype(byte_offset(src0, 32), BRW_TYPE_UW)); + exp.exec_all().group(8, 2).MOV(acc2, retype(tmp0, BRW_TYPE_BF)); + exp.exec_all().group(8, 3).UNDEF(tmp1); + exp.exec_all().group(8, 3).MOV(retype(tmp1, BRW_TYPE_UW), + retype(byte_offset(src0, 48), BRW_TYPE_UW)); + exp.exec_all().group(8, 3).MOV(acc3, retype(tmp1, BRW_TYPE_BF)); + + EXPECT_SHADERS_MATCH(bld, exp); +}