From ffdc0d8e98eeb68abcfff3c48b3691b999305004 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 18 Feb 2025 19:06:37 -0600 Subject: [PATCH] nak: Use suld.constant when ACCESS_CAN_REORDER is set This is way faster than suld.sys, which is what we're using today. So far I haven't seen it matter for anything but texel buffers but it likely helps some app somewhere. Backport-to: 25.0 Part-of: --- src/nouveau/compiler/nak/from_nir.rs | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index ee547f48cee..e488c582290 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -2338,6 +2338,15 @@ impl<'a> ShaderFromNir<'a> { let coord = self.get_image_coord(intrin, dim); // let sample = self.get_src(&srcs[2]); + let mem_order = if intrin.intrinsic + == nir_intrinsic_load_global_constant + || (intrin.access() & ACCESS_CAN_REORDER) != 0 + { + MemOrder::Constant + } else { + MemOrder::Strong(MemScope::System) + }; + let comps = intrin.num_components; assert!(intrin.def.bit_size() == 32); assert!(comps == 1 || comps == 2 || comps == 4); @@ -2348,7 +2357,7 @@ impl<'a> ShaderFromNir<'a> { dst: dst.into(), fault: Dst::None, image_dim: dim, - mem_order: MemOrder::Strong(MemScope::System), + mem_order, mem_eviction_priority: self .get_eviction_priority(intrin.access()), mask: (1 << comps) - 1, @@ -2363,6 +2372,15 @@ impl<'a> ShaderFromNir<'a> { let coord = self.get_image_coord(intrin, dim); // let sample = self.get_src(&srcs[2]); + let mem_order = if intrin.intrinsic + == nir_intrinsic_load_global_constant + || (intrin.access() & ACCESS_CAN_REORDER) != 0 + { + MemOrder::Constant + } else { + MemOrder::Strong(MemScope::System) + }; + let comps = intrin.num_components; assert!(intrin.def.bit_size() == 32); assert!(comps == 5); @@ -2374,7 +2392,7 @@ impl<'a> ShaderFromNir<'a> { dst: dst.into(), fault: fault.into(), image_dim: dim, - mem_order: MemOrder::Strong(MemScope::System), + mem_order, mem_eviction_priority: self .get_eviction_priority(intrin.access()), mask: (1 << (comps - 1)) - 1,