intel/brw: move cache_mode assignment to after send->sfid choice

We have code to choose cache_mode before send->sfid is assigned, but after it we have more code to choose cache_mode that relies on send->sfid. Move everything to after the selection of send->sfid so the code to pick cache_mode is all together. I plan to simplify this futher in the next commits, the goal of this patch is to make the next diff easier to read. Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41319>
2026-06-10 07:48:19 +02:00 · 2026-03-11 11:31:02 -07:00 · 2026-03-11 11:31:02 -07:00 · 2fcaa19a96
commit 2fcaa19a96
parent 75a0cd6e2c
1 changed files with 35 additions and 35 deletions
--- a/src/intel/compiler/brw/brw_lower_logical_sends.cpp
+++ b/src/intel/compiler/brw/brw_lower_logical_sends.cpp
@ -1251,41 +1251,6 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
      }
   }

-   /* Bspec: Atomic instruction -> Cache section:
-    *
-    *    Atomic messages are always forced to "un-cacheable" in the L1
-    *    cache.
-    *
-    * Bspec: Overview of memory Access:
-    *
-    *   If a read from a Null tile gets a cache-hit in a virtually-addressed
-    *   GPU cache, then the read may not return zeroes.
-    *
-    * If a shader writes to a null tile and wants to be able to read it back
-    * as zero, it will use the 'volatile' decoration for the access, otherwise
-    * the compiler may choose to optimize things out, breaking the
-    * residencyNonResidentStrict guarantees. Due to the above, we need to make
-    * these operations uncached.
-    */
-   unsigned cache_mode =
-      lsc_opcode_is_atomic(op) ? LSC_CACHE(devinfo, STORE, L1UC_L3WB) :
-      volatile_access ?
-         (devinfo->ver >= 20 ?
-            /* Xe2 has a better L3 that can deal with null tiles.*/
-            (lsc_opcode_is_store(op) ?
-               LSC_CACHE(devinfo, STORE, L1UC_L3WB) :
-               LSC_CACHE(devinfo, LOAD, L1UC_L3C)) :
-            /* On older platforms, all caches have to be bypassed. */
-            (lsc_opcode_is_store(op) ?
-               LSC_CACHE(devinfo, STORE, L1UC_L3UC) :
-               LSC_CACHE(devinfo, LOAD, L1UC_L3UC))) :
-      /* Skip L1 for coherent accesses */
-      coherent_access ? (lsc_opcode_is_store(op) ?
-                         LSC_CACHE(devinfo, STORE, L1UC_L3WB) :
-                         LSC_CACHE(devinfo, LOAD, L1UC_L3C)) :
-      lsc_opcode_is_store(op) ? LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS) :
-      LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS);
-
   /* If we're a fragment shader, we have to predicate with the sample mask to
    * avoid helper invocations in instructions with side effects, unless they
    * are explicitly required.  One exception is for scratch writes - even
@ -1321,6 +1286,41 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
   }
   assert(send->sfid);

+   /* Bspec: Atomic instruction -> Cache section:
+    *
+    *    Atomic messages are always forced to "un-cacheable" in the L1
+    *    cache.
+    *
+    * Bspec: Overview of memory Access:
+    *
+    *   If a read from a Null tile gets a cache-hit in a virtually-addressed
+    *   GPU cache, then the read may not return zeroes.
+    *
+    * If a shader writes to a null tile and wants to be able to read it back
+    * as zero, it will use the 'volatile' decoration for the access, otherwise
+    * the compiler may choose to optimize things out, breaking the
+    * residencyNonResidentStrict guarantees. Due to the above, we need to make
+    * these operations uncached.
+    */
+   unsigned cache_mode =
+      lsc_opcode_is_atomic(op) ? LSC_CACHE(devinfo, STORE, L1UC_L3WB) :
+      volatile_access ?
+         (devinfo->ver >= 20 ?
+            /* Xe2 has a better L3 that can deal with null tiles.*/
+            (lsc_opcode_is_store(op) ?
+               LSC_CACHE(devinfo, STORE, L1UC_L3WB) :
+               LSC_CACHE(devinfo, LOAD, L1UC_L3C)) :
+            /* On older platforms, all caches have to be bypassed. */
+            (lsc_opcode_is_store(op) ?
+               LSC_CACHE(devinfo, STORE, L1UC_L3UC) :
+               LSC_CACHE(devinfo, LOAD, L1UC_L3UC))) :
+      /* Skip L1 for coherent accesses */
+      coherent_access ? (lsc_opcode_is_store(op) ?
+                         LSC_CACHE(devinfo, STORE, L1UC_L3WB) :
+                         LSC_CACHE(devinfo, LOAD, L1UC_L3C)) :
+      lsc_opcode_is_store(op) ? LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS) :
+      LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS);
+
   /* Disable LSC data port L1 cache scheme for the TGM load/store for RT
    * shaders. (see HSD 18038444588)
    */