From 18bf6fb96dd57d7891b6483ef39efe42e05bbaaa Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Thu, 29 Jan 2026 22:51:48 +0100 Subject: [PATCH] nir: add nvidias shared memory non unform address shift Part-of: --- src/compiler/nir/nir_intrinsics.py | 11 +++++++---- src/compiler/nir/nir_validate.c | 5 +++++ src/compiler/rust/nir.rs | 4 ++++ 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index ae4211251ae..c04593c2c07 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -228,6 +228,9 @@ index("unsigned", "align_offset") # other than bytes (i.e., where the shift is implicit). index("unsigned", "offset_shift") +# Similar to offset_shift except it is applied only to the non uniform offset src, not the base. +index("unsigned", "offset_shift_nv") + # The Vulkan descriptor type for a vulkan_resource_[re]index intrinsic. index("unsigned", "desc_type") @@ -915,7 +918,7 @@ intrinsic("load_vulkan_descriptor", src_comp=[-1], dest_comp=0, intrinsic("deref_atomic", src_comp=[-1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP]) intrinsic("ssbo_atomic", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT]) intrinsic("shared_atomic", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) -intrinsic("shared_atomic_nv", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) +intrinsic("shared_atomic_nv", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP, OFFSET_SHIFT_NV]) intrinsic("task_payload_atomic", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) intrinsic("global_atomic", src_comp=[1, 1], dest_comp=1, indices=[ATOMIC_OP]) intrinsic("global_atomic_2x32", src_comp=[2, 1], dest_comp=1, indices=[ATOMIC_OP]) @@ -927,7 +930,7 @@ intrinsic("global_atomic_pco", src_comp=[3], dest_comp=1, indices=[ATOMIC_OP], intrinsic("deref_atomic_swap", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP]) intrinsic("ssbo_atomic_swap", src_comp=[-1, 1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT]) intrinsic("shared_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) -intrinsic("shared_atomic_swap_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) +intrinsic("shared_atomic_swap_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP, OFFSET_SHIFT_NV]) intrinsic("task_payload_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) intrinsic("global_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[ATOMIC_OP]) intrinsic("global_atomic_swap_2x32", src_comp=[2, 1, 1], dest_comp=1, indices=[ATOMIC_OP]) @@ -1838,8 +1841,8 @@ load("global_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[C store("global_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET]) load("scratch_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE]) store("scratch_nv", [1], indices=[BASE, ALIGN_MUL, ALIGN_OFFSET]) -load("shared_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE]) -store("shared_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET]) +load("shared_nv", [1], indices=[BASE, OFFSET_SHIFT_NV, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE]) +store("shared_nv", [1], indices=[BASE, OFFSET_SHIFT_NV, ACCESS, ALIGN_MUL, ALIGN_OFFSET]) # Same as shared_atomic_add, but with GDS. src[] = {store_val, gds_addr, m0} intrinsic("gds_atomic_add_amd", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE]) diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c index 216bb49e0eb..fa45b5d9553 100644 --- a/src/compiler/nir/nir_validate.c +++ b/src/compiler/nir/nir_validate.c @@ -938,6 +938,11 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) unsigned min_align = 1 << nir_intrinsic_offset_shift(instr); validate_assert(state, nir_intrinsic_align(instr) >= min_align); } + + if (nir_intrinsic_has_offset_shift_nv(instr)) { + unsigned shift = nir_intrinsic_offset_shift_nv(instr); + validate_assert(state, shift == 0 || (shift >= 2 && shift <= 4)); + } } static void diff --git a/src/compiler/rust/nir.rs b/src/compiler/rust/nir.rs index 13c82d545d5..90d99407078 100644 --- a/src/compiler/rust/nir.rs +++ b/src/compiler/rust/nir.rs @@ -393,6 +393,10 @@ impl nir_intrinsic_instr { pub fn num_matrices(&self) -> u8 { self.get_const_index(NIR_INTRINSIC_NUM_MATRICES) as u8 } + + pub fn offset_shift_nv(&self) -> u8 { + self.get_const_index(NIR_INTRINSIC_OFFSET_SHIFT_NV) as u8 + } } impl nir_intrinsic_info {