From 64bc538f5e2f73bfb21717f898d7b54aab323adc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Wed, 8 Apr 2026 12:02:50 -0700 Subject: [PATCH] intel/brw: Explicitly upcast UB to UW for SHR with vector immediates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HW does not allow instructions with vector immediates to cross a GRF boundary if it has a stride. Under register pressure, the register allocator may place a temporary register across such a boundary. To resolve this, we now explicitly emit a MOV to upcast the UB payload into a UW VGRF. This ensures the SHR instruction operates on a dense, well-aligned region that satisfies hardware alignment constraints. Below is the portion of the shader exhibiting this issue: Native code for unnamed fragment shader GLSL6 (src_hash 0x9c84a007) (sha1 48745e7dae90d08f8a9bbe4dbf837de23440c841f0344e669cb8af9df79bce58) SIMD32 shader: 44 instructions. 0 loops. 354 cycles. 0:0 spills:fills, 2 sends, scheduled with mode latency-sensitive. Promoted 0 constants. GRF registers: 22. Non-SSA regs (after NIR): 11. Compacted 800 to 800 bytes (0%) mov(1) f1<1>UW g0.30<0,1,0>UW { align1 WE_all 1N }; mov(1) f1.1<1>UW g1.30<0,1,0>UW { align1 WE_all 1N I@1 }; mov(32) g2<2>UW g0.20<2,8,0>UW { align1 WE_all }; mov(32) g4<2>UW g0.21<2,8,0>UW { align1 WE_all }; mov(32) g8<2>UW g1.20<2,8,0>UW { align1 WE_all }; mov(32) g10<2>UW g1.21<2,8,0>UW { align1 WE_all }; mov(16) g12<4>UB g0.60<1,8,0>UB { align1 1H }; mov(16) g13<4>UB g1.60<1,8,0>UB { align1 2H }; add(32) g0<1>UW g2<16,8,2>UW 0x01000100V { align1 WE_all I@6 }; add(32) g1<1>UW g4<16,8,2>UW 0x01010000V { align1 WE_all I@6 }; add(32) g2<1>UW g8<16,8,2>UW 0x01000100V { align1 WE_all I@6 }; add(32) g3<1>UW g10<16,8,2>UW 0x01010000V { align1 WE_all I@6 }; shr(16) g4<1>UW g12<32,8,4>UB 0x76543210V { align1 1H I@6 }; mov(16) g14.32<4>UB g13<32,8,4>UB { align1 2H I@6 }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N I@6 }; mov(16) g5<1>UW g0<16,8,2>UW { align1 1H }; sync nop(1) null<0,1,0>UB { align1 WE_all 1N I@6 }; mov(16) g0<1>UW g1<16,8,2>UW { align1 1H }; sync nop(1) null<0,1,0>UB { align1 WE_all 5N I@6 }; mov(16) g5.16<1>UW g2<16,8,2>UW { align1 2H }; sync nop(1) null<0,1,0>UB { align1 WE_all 5N I@6 }; mov(16) g0.16<1>UW g3<16,8,2>UW { align1 2H }; shr(16) g4.16<1>UW g14.32<32,8,4>UB 0x76543210V { align1 2H I@5 }; ERROR: Invalid register region for source 0. See special restrictions section. Reviewed-by: Ian Romanick Signed-off-by: José Roberto de Souza Part-of: --- src/intel/compiler/brw/brw_from_nir.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index ccd51256ac8..71e54358976 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -194,8 +194,11 @@ emit_system_values_block(nir_to_brw_state &ntb, nir_block *block) */ const struct brw_reg reg = s.devinfo->ver >= 20 ? xe2_vec1_grf(i, 15) : brw_vec1_grf(i + 1, 7); + brw_reg mask_uw = hbld.vgrf(BRW_TYPE_UW); + hbld.MOV(mask_uw, stride(retype(reg, BRW_TYPE_UB), 1, 8, 0)); + hbld.SHR(offset(shifted, hbld, i), - stride(retype(reg, BRW_TYPE_UB), 1, 8, 0), + mask_uw, brw_imm_v(0x76543210)); } @@ -3528,8 +3531,11 @@ emit_sampleid_setup(nir_to_brw_state &ntb) */ const struct brw_reg id_reg = devinfo->ver >= 20 ? xe2_vec1_grf(i, 8) : brw_vec1_grf(i + 1, 0); + brw_reg mask_uw = hbld.vgrf(BRW_TYPE_UW); + hbld.MOV(mask_uw, stride(retype(id_reg, BRW_TYPE_UB), 1, 8, 0)); + hbld.SHR(offset(tmp, hbld, i), - stride(retype(id_reg, BRW_TYPE_UB), 1, 8, 0), + mask_uw, brw_imm_v(0x44440000)); }