mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-22 20:20:37 +01:00
nir: add nvidia IO intrinsics
Reviewed-by: Mel Henning <mhenning@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39525>
This commit is contained in:
parent
24073b66fa
commit
e779538ad2
6 changed files with 79 additions and 0 deletions
|
|
@ -5595,6 +5595,27 @@ nir_src *nir_get_io_index_src(nir_intrinsic_instr *instr);
|
|||
nir_src *nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr);
|
||||
nir_src *nir_get_shader_call_payload_src(nir_intrinsic_instr *call);
|
||||
|
||||
static inline unsigned
|
||||
nir_get_io_base_size_nv(const nir_intrinsic_instr *intr)
|
||||
{
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_global_atomic_nv:
|
||||
case nir_intrinsic_global_atomic_swap_nv:
|
||||
case nir_intrinsic_shared_atomic_nv:
|
||||
case nir_intrinsic_shared_atomic_swap_nv:
|
||||
case nir_intrinsic_load_global_nv:
|
||||
case nir_intrinsic_load_scratch_nv:
|
||||
case nir_intrinsic_load_shared_nv:
|
||||
case nir_intrinsic_store_global_nv:
|
||||
case nir_intrinsic_store_scratch_nv:
|
||||
case nir_intrinsic_store_shared_nv:
|
||||
return 24;
|
||||
default:
|
||||
UNREACHABLE("unknown nvidia intrinsic");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
bool nir_is_shared_access(nir_intrinsic_instr *intr);
|
||||
bool nir_is_output_load(nir_intrinsic_instr *intr);
|
||||
bool nir_is_input_load(nir_intrinsic_instr *intr);
|
||||
|
|
|
|||
|
|
@ -612,6 +612,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
|
||||
case nir_intrinsic_load_shared:
|
||||
case nir_intrinsic_load_shared_ir3:
|
||||
case nir_intrinsic_load_shared_nv:
|
||||
is_divergent = src_divergent(instr->src[0], state) ||
|
||||
(options & nir_divergence_uniform_load_tears);
|
||||
break;
|
||||
|
|
@ -619,6 +620,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_load_global:
|
||||
case nir_intrinsic_load_global_2x32:
|
||||
case nir_intrinsic_load_global_ir3:
|
||||
case nir_intrinsic_load_global_nv:
|
||||
case nir_intrinsic_load_deref: {
|
||||
if (load_may_tear(state, instr)) {
|
||||
is_divergent = true;
|
||||
|
|
@ -880,6 +882,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_load_helper_invocation:
|
||||
case nir_intrinsic_is_helper_invocation:
|
||||
case nir_intrinsic_load_scratch:
|
||||
case nir_intrinsic_load_scratch_nv:
|
||||
case nir_intrinsic_deref_atomic:
|
||||
case nir_intrinsic_deref_atomic_swap:
|
||||
case nir_intrinsic_ssbo_atomic:
|
||||
|
|
@ -894,6 +897,8 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_bindless_image_atomic_swap:
|
||||
case nir_intrinsic_shared_atomic:
|
||||
case nir_intrinsic_shared_atomic_swap:
|
||||
case nir_intrinsic_shared_atomic_nv:
|
||||
case nir_intrinsic_shared_atomic_swap_nv:
|
||||
case nir_intrinsic_task_payload_atomic:
|
||||
case nir_intrinsic_task_payload_atomic_swap:
|
||||
case nir_intrinsic_global_atomic:
|
||||
|
|
@ -905,6 +910,8 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_global_atomic_swap_agx:
|
||||
case nir_intrinsic_global_atomic_2x32:
|
||||
case nir_intrinsic_global_atomic_swap_2x32:
|
||||
case nir_intrinsic_global_atomic_nv:
|
||||
case nir_intrinsic_global_atomic_swap_nv:
|
||||
case nir_intrinsic_global_atomic_pco:
|
||||
case nir_intrinsic_atomic_counter_add:
|
||||
case nir_intrinsic_atomic_counter_min:
|
||||
|
|
|
|||
|
|
@ -905,6 +905,9 @@ intrinsic("load_vulkan_descriptor", src_comp=[-1], dest_comp=0,
|
|||
# AGX global variants take a 64-bit base address plus a 32-bit offset in words.
|
||||
# The offset is sign-extended or zero-extended based on the SIGN_EXTEND index.
|
||||
#
|
||||
# NV variants all come with a 24 bit base, that is unsigned with a constant 0 address,
|
||||
# signed otherwise.
|
||||
#
|
||||
# PCO global variants use a vec3 for the memory address and data, where component X
|
||||
# has the low 32 address bits, component Y has the high 32 address bits, and component Z
|
||||
# has the data parameter.
|
||||
|
|
@ -912,21 +915,25 @@ intrinsic("load_vulkan_descriptor", src_comp=[-1], dest_comp=0,
|
|||
intrinsic("deref_atomic", src_comp=[-1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP])
|
||||
intrinsic("ssbo_atomic", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT])
|
||||
intrinsic("shared_atomic", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("shared_atomic_nv", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("task_payload_atomic", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic", src_comp=[1, 1], dest_comp=1, indices=[ATOMIC_OP])
|
||||
intrinsic("global_atomic_2x32", src_comp=[2, 1], dest_comp=1, indices=[ATOMIC_OP])
|
||||
intrinsic("global_atomic_amd", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic_agx", src_comp=[1, 1, 1], dest_comp=1, indices=[ATOMIC_OP, SIGN_EXTEND])
|
||||
intrinsic("global_atomic_nv", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic_pco", src_comp=[3], dest_comp=1, indices=[ATOMIC_OP], bit_sizes=[32])
|
||||
|
||||
intrinsic("deref_atomic_swap", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP])
|
||||
intrinsic("ssbo_atomic_swap", src_comp=[-1, 1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT])
|
||||
intrinsic("shared_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("shared_atomic_swap_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("task_payload_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[ATOMIC_OP])
|
||||
intrinsic("global_atomic_swap_2x32", src_comp=[2, 1, 1], dest_comp=1, indices=[ATOMIC_OP])
|
||||
intrinsic("global_atomic_swap_amd", src_comp=[1, 1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic_swap_agx", src_comp=[1, 1, 1, 1], dest_comp=1, indices=[ATOMIC_OP, SIGN_EXTEND])
|
||||
intrinsic("global_atomic_swap_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic_swap_pco", src_comp=[4], dest_comp=1, indices=[ATOMIC_OP], bit_sizes=[32])
|
||||
|
||||
def system_value(name, dest_comp, indices=[], bit_sizes=[32], can_reorder=True):
|
||||
|
|
@ -1825,6 +1832,15 @@ load("global_amd", [1, 1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flag
|
|||
# src[] = { value, address, unsigned 32-bit offset }.
|
||||
store("global_amd", [1, 1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET, WRITE_MASK])
|
||||
|
||||
# src[] = { address }. BASE is a 24 bit unsigned offset if a constant 0 address is given,
|
||||
# signed otherwise.
|
||||
load("global_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
|
||||
store("global_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
|
||||
load("scratch_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
|
||||
store("scratch_nv", [1], indices=[BASE, ALIGN_MUL, ALIGN_OFFSET])
|
||||
load("shared_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
|
||||
store("shared_nv", [1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
|
||||
|
||||
# Same as shared_atomic_add, but with GDS. src[] = {store_val, gds_addr, m0}
|
||||
intrinsic("gds_atomic_add_amd", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
|
||||
|
||||
|
|
|
|||
|
|
@ -979,6 +979,7 @@ nir_get_io_offset_src_number(const nir_intrinsic_instr *instr)
|
|||
case nir_intrinsic_load_output:
|
||||
case nir_intrinsic_load_pixel_local:
|
||||
case nir_intrinsic_load_shared:
|
||||
case nir_intrinsic_load_shared_nv:
|
||||
case nir_intrinsic_load_task_payload:
|
||||
case nir_intrinsic_load_uniform:
|
||||
case nir_intrinsic_load_constant:
|
||||
|
|
@ -988,16 +989,22 @@ nir_get_io_offset_src_number(const nir_intrinsic_instr *instr)
|
|||
case nir_intrinsic_load_global_2x32:
|
||||
case nir_intrinsic_load_global_constant:
|
||||
case nir_intrinsic_load_global_etna:
|
||||
case nir_intrinsic_load_global_nv:
|
||||
case nir_intrinsic_load_scratch:
|
||||
case nir_intrinsic_load_scratch_nv:
|
||||
case nir_intrinsic_load_fs_input_interp_deltas:
|
||||
case nir_intrinsic_shared_atomic:
|
||||
case nir_intrinsic_shared_atomic_nv:
|
||||
case nir_intrinsic_shared_atomic_swap:
|
||||
case nir_intrinsic_shared_atomic_swap_nv:
|
||||
case nir_intrinsic_task_payload_atomic:
|
||||
case nir_intrinsic_task_payload_atomic_swap:
|
||||
case nir_intrinsic_global_atomic:
|
||||
case nir_intrinsic_global_atomic_2x32:
|
||||
case nir_intrinsic_global_atomic_nv:
|
||||
case nir_intrinsic_global_atomic_swap:
|
||||
case nir_intrinsic_global_atomic_swap_2x32:
|
||||
case nir_intrinsic_global_atomic_swap_nv:
|
||||
case nir_intrinsic_load_coefficients_agx:
|
||||
case nir_intrinsic_load_shared_block_intel:
|
||||
case nir_intrinsic_load_global_block_intel:
|
||||
|
|
@ -1021,11 +1028,14 @@ nir_get_io_offset_src_number(const nir_intrinsic_instr *instr)
|
|||
case nir_intrinsic_store_output:
|
||||
case nir_intrinsic_store_pixel_local:
|
||||
case nir_intrinsic_store_shared:
|
||||
case nir_intrinsic_store_shared_nv:
|
||||
case nir_intrinsic_store_task_payload:
|
||||
case nir_intrinsic_store_global:
|
||||
case nir_intrinsic_store_global_2x32:
|
||||
case nir_intrinsic_store_global_etna:
|
||||
case nir_intrinsic_store_global_nv:
|
||||
case nir_intrinsic_store_scratch:
|
||||
case nir_intrinsic_store_scratch_nv:
|
||||
case nir_intrinsic_ssbo_atomic:
|
||||
case nir_intrinsic_ssbo_atomic_swap:
|
||||
case nir_intrinsic_ldc_nv:
|
||||
|
|
|
|||
|
|
@ -240,6 +240,7 @@ node_is_dead(nir_cf_node *node)
|
|||
|
||||
case nir_intrinsic_load_shared:
|
||||
case nir_intrinsic_load_shared2_amd:
|
||||
case nir_intrinsic_load_shared_nv:
|
||||
case nir_intrinsic_load_output:
|
||||
case nir_intrinsic_load_pixel_local:
|
||||
case nir_intrinsic_load_per_vertex_output:
|
||||
|
|
|
|||
|
|
@ -729,6 +729,30 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
|
|||
}
|
||||
break;
|
||||
|
||||
case nir_intrinsic_global_atomic_nv:
|
||||
case nir_intrinsic_global_atomic_swap_nv:
|
||||
case nir_intrinsic_shared_atomic_nv:
|
||||
case nir_intrinsic_shared_atomic_swap_nv:
|
||||
case nir_intrinsic_load_global_nv:
|
||||
case nir_intrinsic_load_scratch_nv:
|
||||
case nir_intrinsic_load_shared_nv:
|
||||
case nir_intrinsic_store_global_nv:
|
||||
case nir_intrinsic_store_scratch_nv:
|
||||
case nir_intrinsic_store_shared_nv: {
|
||||
int base = nir_intrinsic_base(instr);
|
||||
nir_src src = *nir_get_io_offset_src(instr);
|
||||
unsigned const_bits = nir_get_io_base_size_nv(instr);
|
||||
|
||||
if (nir_src_is_const(src) && nir_src_as_int(src) == 0) {
|
||||
validate_assert(state, base >= 0 && base < BITFIELD_MASK(const_bits));
|
||||
} else {
|
||||
int32_t max = BITFIELD_MASK(const_bits - 1);
|
||||
int32_t min = ~BITFIELD_MASK(const_bits - 1);
|
||||
validate_assert(state, base >= min && base < max);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue