nir: add nvidias shared memory non unform address shift

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39709>
This commit is contained in:
Karol Herbst 2026-01-29 22:51:48 +01:00
parent c016346b50
commit 18bf6fb96d
3 changed files with 16 additions and 4 deletions

View file

@ -228,6 +228,9 @@ index("unsigned", "align_offset")
# other than bytes (i.e., where the shift is implicit).
index("unsigned", "offset_shift")
# Similar to offset_shift except it is applied only to the non uniform offset src, not the base.
index("unsigned", "offset_shift_nv")
# The Vulkan descriptor type for a vulkan_resource_[re]index intrinsic.
index("unsigned", "desc_type")
@ -915,7 +918,7 @@ intrinsic("load_vulkan_descriptor", src_comp=[-1], dest_comp=0,
intrinsic("deref_atomic", src_comp=[-1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP])
intrinsic("ssbo_atomic", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT])
intrinsic("shared_atomic", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
intrinsic("shared_atomic_nv", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
intrinsic("shared_atomic_nv", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP, OFFSET_SHIFT_NV])
intrinsic("task_payload_atomic", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
intrinsic("global_atomic", src_comp=[1, 1], dest_comp=1, indices=[ATOMIC_OP])
intrinsic("global_atomic_2x32", src_comp=[2, 1], dest_comp=1, indices=[ATOMIC_OP])
@ -927,7 +930,7 @@ intrinsic("global_atomic_pco", src_comp=[3], dest_comp=1, indices=[ATOMIC_OP],
intrinsic("deref_atomic_swap", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP])
intrinsic("ssbo_atomic_swap", src_comp=[-1, 1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT])
intrinsic("shared_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
intrinsic("shared_atomic_swap_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
intrinsic("shared_atomic_swap_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP, OFFSET_SHIFT_NV])
intrinsic("task_payload_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
intrinsic("global_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[ATOMIC_OP])
intrinsic("global_atomic_swap_2x32", src_comp=[2, 1, 1], dest_comp=1, indices=[ATOMIC_OP])
@ -1838,8 +1841,8 @@ load("global_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[C
store("global_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
load("scratch_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
store("scratch_nv", [1], indices=[BASE, ALIGN_MUL, ALIGN_OFFSET])
load("shared_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
store("shared_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
load("shared_nv", [1], indices=[BASE, OFFSET_SHIFT_NV, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
store("shared_nv", [1], indices=[BASE, OFFSET_SHIFT_NV, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
# Same as shared_atomic_add, but with GDS. src[] = {store_val, gds_addr, m0}
intrinsic("gds_atomic_add_amd", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])

View file

@ -938,6 +938,11 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
unsigned min_align = 1 << nir_intrinsic_offset_shift(instr);
validate_assert(state, nir_intrinsic_align(instr) >= min_align);
}
if (nir_intrinsic_has_offset_shift_nv(instr)) {
unsigned shift = nir_intrinsic_offset_shift_nv(instr);
validate_assert(state, shift == 0 || (shift >= 2 && shift <= 4));
}
}
static void

View file

@ -393,6 +393,10 @@ impl nir_intrinsic_instr {
pub fn num_matrices(&self) -> u8 {
self.get_const_index(NIR_INTRINSIC_NUM_MATRICES) as u8
}
pub fn offset_shift_nv(&self) -> u8 {
self.get_const_index(NIR_INTRINSIC_OFFSET_SHIFT_NV) as u8
}
}
impl nir_intrinsic_info {