mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 13:58:04 +02:00
nak: use MemScope::CTA for shared memory scoped SCOPE_WORKGROUP barriers
CTA synchronizes between all threads within the same workgroup, so we should use that over GPU which has some more severe performance implications. Sadly it doesn't appear like we can rely on .CTA to work for global memory so let's keep using GPU for those for now. Speeds up vk_cooperative_matrix by roughly 40% Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36482>
This commit is contained in:
parent
50c6f31963
commit
67aeacc86b
1 changed files with 14 additions and 2 deletions
|
|
@ -3413,9 +3413,21 @@ impl<'a> ShaderFromNir<'a> {
|
|||
if intrin.memory_scope() != SCOPE_NONE {
|
||||
let mem_scope = match intrin.memory_scope() {
|
||||
SCOPE_INVOCATION | SCOPE_SUBGROUP => MemScope::CTA,
|
||||
SCOPE_WORKGROUP | SCOPE_QUEUE_FAMILY | SCOPE_DEVICE => {
|
||||
MemScope::GPU
|
||||
// A membar.gpu is very expensive so use .cta whenever
|
||||
// possible.
|
||||
// TODO: Figure out under which conditions we can relax
|
||||
// them for global memory/images to CTA.
|
||||
SCOPE_WORKGROUP => {
|
||||
let global_modes = nir_var_image
|
||||
| nir_var_mem_global
|
||||
| nir_var_mem_ssbo;
|
||||
if intrin.memory_modes() & global_modes != 0 {
|
||||
MemScope::GPU
|
||||
} else {
|
||||
MemScope::CTA
|
||||
}
|
||||
}
|
||||
SCOPE_QUEUE_FAMILY | SCOPE_DEVICE => MemScope::GPU,
|
||||
_ => panic!("Unhandled memory scope"),
|
||||
};
|
||||
b.push_op(OpMemBar { scope: mem_scope });
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue