intel/brw: move cache_mode assignment to after send->sfid choice

We have code to choose cache_mode before send->sfid is assigned, but
after it we have more code to choose cache_mode that relies on
send->sfid. Move everything to after the selection of send->sfid so
the code to pick cache_mode is all together. I plan to simplify this
futher in the next commits, the goal of this patch is to make the next
diff easier to read.

Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41319>
This commit is contained in:
Paulo Zanoni 2026-03-11 11:31:02 -07:00 committed by Marge Bot
parent 75a0cd6e2c
commit 2fcaa19a96

View file

@ -1251,41 +1251,6 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
}
}
/* Bspec: Atomic instruction -> Cache section:
*
* Atomic messages are always forced to "un-cacheable" in the L1
* cache.
*
* Bspec: Overview of memory Access:
*
* If a read from a Null tile gets a cache-hit in a virtually-addressed
* GPU cache, then the read may not return zeroes.
*
* If a shader writes to a null tile and wants to be able to read it back
* as zero, it will use the 'volatile' decoration for the access, otherwise
* the compiler may choose to optimize things out, breaking the
* residencyNonResidentStrict guarantees. Due to the above, we need to make
* these operations uncached.
*/
unsigned cache_mode =
lsc_opcode_is_atomic(op) ? LSC_CACHE(devinfo, STORE, L1UC_L3WB) :
volatile_access ?
(devinfo->ver >= 20 ?
/* Xe2 has a better L3 that can deal with null tiles.*/
(lsc_opcode_is_store(op) ?
LSC_CACHE(devinfo, STORE, L1UC_L3WB) :
LSC_CACHE(devinfo, LOAD, L1UC_L3C)) :
/* On older platforms, all caches have to be bypassed. */
(lsc_opcode_is_store(op) ?
LSC_CACHE(devinfo, STORE, L1UC_L3UC) :
LSC_CACHE(devinfo, LOAD, L1UC_L3UC))) :
/* Skip L1 for coherent accesses */
coherent_access ? (lsc_opcode_is_store(op) ?
LSC_CACHE(devinfo, STORE, L1UC_L3WB) :
LSC_CACHE(devinfo, LOAD, L1UC_L3C)) :
lsc_opcode_is_store(op) ? LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS) :
LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS);
/* If we're a fragment shader, we have to predicate with the sample mask to
* avoid helper invocations in instructions with side effects, unless they
* are explicitly required. One exception is for scratch writes - even
@ -1321,6 +1286,41 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
}
assert(send->sfid);
/* Bspec: Atomic instruction -> Cache section:
*
* Atomic messages are always forced to "un-cacheable" in the L1
* cache.
*
* Bspec: Overview of memory Access:
*
* If a read from a Null tile gets a cache-hit in a virtually-addressed
* GPU cache, then the read may not return zeroes.
*
* If a shader writes to a null tile and wants to be able to read it back
* as zero, it will use the 'volatile' decoration for the access, otherwise
* the compiler may choose to optimize things out, breaking the
* residencyNonResidentStrict guarantees. Due to the above, we need to make
* these operations uncached.
*/
unsigned cache_mode =
lsc_opcode_is_atomic(op) ? LSC_CACHE(devinfo, STORE, L1UC_L3WB) :
volatile_access ?
(devinfo->ver >= 20 ?
/* Xe2 has a better L3 that can deal with null tiles.*/
(lsc_opcode_is_store(op) ?
LSC_CACHE(devinfo, STORE, L1UC_L3WB) :
LSC_CACHE(devinfo, LOAD, L1UC_L3C)) :
/* On older platforms, all caches have to be bypassed. */
(lsc_opcode_is_store(op) ?
LSC_CACHE(devinfo, STORE, L1UC_L3UC) :
LSC_CACHE(devinfo, LOAD, L1UC_L3UC))) :
/* Skip L1 for coherent accesses */
coherent_access ? (lsc_opcode_is_store(op) ?
LSC_CACHE(devinfo, STORE, L1UC_L3WB) :
LSC_CACHE(devinfo, LOAD, L1UC_L3C)) :
lsc_opcode_is_store(op) ? LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS) :
LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS);
/* Disable LSC data port L1 cache scheme for the TGM load/store for RT
* shaders. (see HSD 18038444588)
*/