From 82b1afe7328d77f0afd94180c94a0bf17721c98a Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Thu, 26 Mar 2026 13:16:01 -0700 Subject: [PATCH] brw: evict memory for workgroup scope in Xe2 and newer On Xe2 and Xe3, the flushing is necessary due to aliasing of TGM data in L1 memory (HSD 14020414266). On newer platforms, it is necessary for proper post-format data conversion handling (HSD 22020984324). See the Instruction_Fence page (63969) for documentation on the fact that the threadgroup scope ignores flushes. Thanks to Francisco Jerez and Kenneth Graunke on their help for this patch. v2: restrict the flushing to TGM (Lionel). Signed-off-by: Paulo Zanoni --- src/intel/compiler/brw/brw_from_nir.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index 13902d4ad67..10745ea3f81 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -4614,7 +4614,7 @@ emit_fence(const brw_builder &bld, enum opcode opcode, static uint32_t lsc_fence_descriptor_for_intrinsic(const struct intel_device_info *devinfo, - nir_intrinsic_instr *instr) + nir_intrinsic_instr *instr, bool is_tgm) { assert(devinfo->has_lsc); @@ -4640,8 +4640,17 @@ lsc_fence_descriptor_for_intrinsic(const struct intel_device_info *devinfo, flush_type = LSC_FLUSH_TYPE_EVICT; break; case SCOPE_WORKGROUP: - scope = LSC_FENCE_THREADGROUP; - flush_type = LSC_FLUSH_TYPE_NONE; + /* On Xe2 and Xe3 we need the eviction due to aliasing of TGM data + * in L1 (HSD 14020414266). On Xe3p we need this due to how data + * post-format conversion happens (HSD 22020984324). + * Also, we have to upgrade the scope to TILE since flush_type is + * ignored for threadgroup fences, which means we'll use the + * values alaready initialized. + */ + if (devinfo->ver < 20 || !is_tgm) { + scope = LSC_FENCE_THREADGROUP; + flush_type = LSC_FLUSH_TYPE_NONE; + } break; case SCOPE_SHADER_CALL: case SCOPE_INVOCATION: @@ -5090,7 +5099,7 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb, if (devinfo->has_lsc) { assert(devinfo->verx10 >= 125); uint32_t desc = - lsc_fence_descriptor_for_intrinsic(devinfo, instr); + lsc_fence_descriptor_for_intrinsic(devinfo, instr, tgm_fence); if (ugm_fence) { fence_regs[fence_regs_count++] = emit_fence(ubld1, opcode, BRW_SFID_UGM, desc,