diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 7feffc247a2..c67ba03d64c 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -5595,6 +5595,27 @@ nir_src *nir_get_io_index_src(nir_intrinsic_instr *instr); nir_src *nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr); nir_src *nir_get_shader_call_payload_src(nir_intrinsic_instr *call); +static inline unsigned +nir_get_io_base_size_nv(const nir_intrinsic_instr *intr) +{ + switch (intr->intrinsic) { + case nir_intrinsic_global_atomic_nv: + case nir_intrinsic_global_atomic_swap_nv: + case nir_intrinsic_shared_atomic_nv: + case nir_intrinsic_shared_atomic_swap_nv: + case nir_intrinsic_load_global_nv: + case nir_intrinsic_load_scratch_nv: + case nir_intrinsic_load_shared_nv: + case nir_intrinsic_store_global_nv: + case nir_intrinsic_store_scratch_nv: + case nir_intrinsic_store_shared_nv: + return 24; + default: + UNREACHABLE("unknown nvidia intrinsic"); + return -1; + } +} + bool nir_is_shared_access(nir_intrinsic_instr *intr); bool nir_is_output_load(nir_intrinsic_instr *intr); bool nir_is_input_load(nir_intrinsic_instr *intr); diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 93a43961f14..8fc6b019578 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -612,6 +612,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_shared: case nir_intrinsic_load_shared_ir3: + case nir_intrinsic_load_shared_nv: is_divergent = src_divergent(instr->src[0], state) || (options & nir_divergence_uniform_load_tears); break; @@ -619,6 +620,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_global: case nir_intrinsic_load_global_2x32: case nir_intrinsic_load_global_ir3: + case nir_intrinsic_load_global_nv: case nir_intrinsic_load_deref: { if (load_may_tear(state, instr)) { is_divergent = true; @@ -880,6 +882,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_helper_invocation: case nir_intrinsic_is_helper_invocation: case nir_intrinsic_load_scratch: + case nir_intrinsic_load_scratch_nv: case nir_intrinsic_deref_atomic: case nir_intrinsic_deref_atomic_swap: case nir_intrinsic_ssbo_atomic: @@ -894,6 +897,8 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_bindless_image_atomic_swap: case nir_intrinsic_shared_atomic: case nir_intrinsic_shared_atomic_swap: + case nir_intrinsic_shared_atomic_nv: + case nir_intrinsic_shared_atomic_swap_nv: case nir_intrinsic_task_payload_atomic: case nir_intrinsic_task_payload_atomic_swap: case nir_intrinsic_global_atomic: @@ -905,6 +910,8 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_global_atomic_swap_agx: case nir_intrinsic_global_atomic_2x32: case nir_intrinsic_global_atomic_swap_2x32: + case nir_intrinsic_global_atomic_nv: + case nir_intrinsic_global_atomic_swap_nv: case nir_intrinsic_global_atomic_pco: case nir_intrinsic_atomic_counter_add: case nir_intrinsic_atomic_counter_min: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index cd369bc002f..3eefac92c97 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -905,6 +905,9 @@ intrinsic("load_vulkan_descriptor", src_comp=[-1], dest_comp=0, # AGX global variants take a 64-bit base address plus a 32-bit offset in words. # The offset is sign-extended or zero-extended based on the SIGN_EXTEND index. # +# NV variants all come with a 24 bit base, that is unsigned with a constant 0 address, +# signed otherwise. +# # PCO global variants use a vec3 for the memory address and data, where component X # has the low 32 address bits, component Y has the high 32 address bits, and component Z # has the data parameter. @@ -912,21 +915,25 @@ intrinsic("load_vulkan_descriptor", src_comp=[-1], dest_comp=0, intrinsic("deref_atomic", src_comp=[-1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP]) intrinsic("ssbo_atomic", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT]) intrinsic("shared_atomic", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) +intrinsic("shared_atomic_nv", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) intrinsic("task_payload_atomic", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) intrinsic("global_atomic", src_comp=[1, 1], dest_comp=1, indices=[ATOMIC_OP]) intrinsic("global_atomic_2x32", src_comp=[2, 1], dest_comp=1, indices=[ATOMIC_OP]) intrinsic("global_atomic_amd", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) intrinsic("global_atomic_agx", src_comp=[1, 1, 1], dest_comp=1, indices=[ATOMIC_OP, SIGN_EXTEND]) +intrinsic("global_atomic_nv", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) intrinsic("global_atomic_pco", src_comp=[3], dest_comp=1, indices=[ATOMIC_OP], bit_sizes=[32]) intrinsic("deref_atomic_swap", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP]) intrinsic("ssbo_atomic_swap", src_comp=[-1, 1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT]) intrinsic("shared_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) +intrinsic("shared_atomic_swap_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) intrinsic("task_payload_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) intrinsic("global_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[ATOMIC_OP]) intrinsic("global_atomic_swap_2x32", src_comp=[2, 1, 1], dest_comp=1, indices=[ATOMIC_OP]) intrinsic("global_atomic_swap_amd", src_comp=[1, 1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) intrinsic("global_atomic_swap_agx", src_comp=[1, 1, 1, 1], dest_comp=1, indices=[ATOMIC_OP, SIGN_EXTEND]) +intrinsic("global_atomic_swap_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) intrinsic("global_atomic_swap_pco", src_comp=[4], dest_comp=1, indices=[ATOMIC_OP], bit_sizes=[32]) def system_value(name, dest_comp, indices=[], bit_sizes=[32], can_reorder=True): @@ -1825,6 +1832,15 @@ load("global_amd", [1, 1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flag # src[] = { value, address, unsigned 32-bit offset }. store("global_amd", [1, 1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET, WRITE_MASK]) +# src[] = { address }. BASE is a 24 bit unsigned offset if a constant 0 address is given, +# signed otherwise. +load("global_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE]) +store("global_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET]) +load("scratch_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE]) +store("scratch_nv", [1], indices=[BASE, ALIGN_MUL, ALIGN_OFFSET]) +load("shared_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE]) +store("shared_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET]) + # Same as shared_atomic_add, but with GDS. src[] = {store_val, gds_addr, m0} intrinsic("gds_atomic_add_amd", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE]) diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index 2ec77a09722..f1e99d17fa1 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -979,6 +979,7 @@ nir_get_io_offset_src_number(const nir_intrinsic_instr *instr) case nir_intrinsic_load_output: case nir_intrinsic_load_pixel_local: case nir_intrinsic_load_shared: + case nir_intrinsic_load_shared_nv: case nir_intrinsic_load_task_payload: case nir_intrinsic_load_uniform: case nir_intrinsic_load_constant: @@ -988,16 +989,22 @@ nir_get_io_offset_src_number(const nir_intrinsic_instr *instr) case nir_intrinsic_load_global_2x32: case nir_intrinsic_load_global_constant: case nir_intrinsic_load_global_etna: + case nir_intrinsic_load_global_nv: case nir_intrinsic_load_scratch: + case nir_intrinsic_load_scratch_nv: case nir_intrinsic_load_fs_input_interp_deltas: case nir_intrinsic_shared_atomic: + case nir_intrinsic_shared_atomic_nv: case nir_intrinsic_shared_atomic_swap: + case nir_intrinsic_shared_atomic_swap_nv: case nir_intrinsic_task_payload_atomic: case nir_intrinsic_task_payload_atomic_swap: case nir_intrinsic_global_atomic: case nir_intrinsic_global_atomic_2x32: + case nir_intrinsic_global_atomic_nv: case nir_intrinsic_global_atomic_swap: case nir_intrinsic_global_atomic_swap_2x32: + case nir_intrinsic_global_atomic_swap_nv: case nir_intrinsic_load_coefficients_agx: case nir_intrinsic_load_shared_block_intel: case nir_intrinsic_load_global_block_intel: @@ -1021,11 +1028,14 @@ nir_get_io_offset_src_number(const nir_intrinsic_instr *instr) case nir_intrinsic_store_output: case nir_intrinsic_store_pixel_local: case nir_intrinsic_store_shared: + case nir_intrinsic_store_shared_nv: case nir_intrinsic_store_task_payload: case nir_intrinsic_store_global: case nir_intrinsic_store_global_2x32: case nir_intrinsic_store_global_etna: + case nir_intrinsic_store_global_nv: case nir_intrinsic_store_scratch: + case nir_intrinsic_store_scratch_nv: case nir_intrinsic_ssbo_atomic: case nir_intrinsic_ssbo_atomic_swap: case nir_intrinsic_ldc_nv: diff --git a/src/compiler/nir/nir_opt_dead_cf.c b/src/compiler/nir/nir_opt_dead_cf.c index 2212acdfac7..e67d8adeee4 100644 --- a/src/compiler/nir/nir_opt_dead_cf.c +++ b/src/compiler/nir/nir_opt_dead_cf.c @@ -240,6 +240,7 @@ node_is_dead(nir_cf_node *node) case nir_intrinsic_load_shared: case nir_intrinsic_load_shared2_amd: + case nir_intrinsic_load_shared_nv: case nir_intrinsic_load_output: case nir_intrinsic_load_pixel_local: case nir_intrinsic_load_per_vertex_output: diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c index ae563ae1677..87ee76ae3fe 100644 --- a/src/compiler/nir/nir_validate.c +++ b/src/compiler/nir/nir_validate.c @@ -729,6 +729,30 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) } break; + case nir_intrinsic_global_atomic_nv: + case nir_intrinsic_global_atomic_swap_nv: + case nir_intrinsic_shared_atomic_nv: + case nir_intrinsic_shared_atomic_swap_nv: + case nir_intrinsic_load_global_nv: + case nir_intrinsic_load_scratch_nv: + case nir_intrinsic_load_shared_nv: + case nir_intrinsic_store_global_nv: + case nir_intrinsic_store_scratch_nv: + case nir_intrinsic_store_shared_nv: { + int base = nir_intrinsic_base(instr); + nir_src src = *nir_get_io_offset_src(instr); + unsigned const_bits = nir_get_io_base_size_nv(instr); + + if (nir_src_is_const(src) && nir_src_as_int(src) == 0) { + validate_assert(state, base >= 0 && base < BITFIELD_MASK(const_bits)); + } else { + int32_t max = BITFIELD_MASK(const_bits - 1); + int32_t min = ~BITFIELD_MASK(const_bits - 1); + validate_assert(state, base >= min && base < max); + } + break; + } + default: break; }