nir: Add some new _nv intrinsics

The ldc_nv and ldcx_nv intrinsics correspond to the index and bindless
forms of NVIDIA's LDC instruction, respectively.  ldc_nv is pretty much
load_ubo without some of the unnecessary constant bits while ldcx_nv
takes a 64-bit bindless handle instead of an index.  The other two give
us a little control over register allocation at the NIR level to ensure
that LDCX handles are placed in uniform registers.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29591>
This commit is contained in:
Faith Ekstrand 2024-05-22 14:40:20 -05:00 committed by Marge Bot
parent ab84cf11c7
commit b107240474
5 changed files with 26 additions and 0 deletions

View file

@ -267,6 +267,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_optimization_barrier_sgpr_amd:
case nir_intrinsic_load_printf_buffer_address:
case nir_intrinsic_load_printf_base_identifier:
case nir_intrinsic_r2ur_nv:
is_divergent = false;
break;
@ -444,6 +445,8 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ubo_vec4:
case nir_intrinsic_ldc_nv:
case nir_intrinsic_ldcx_nv:
is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
instr->src[1].ssa->divergent;
break;

View file

@ -2221,6 +2221,21 @@ intrinsic("dpas_intel", dest_comp=0, src_comp=[0, 0, 0],
flags=[CAN_ELIMINATE])
# NVIDIA-specific intrinsics
# src[] = { index, offset }.
intrinsic("ldc_nv", dest_comp=0, src_comp=[1, 1],
indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET],
flags=[CAN_ELIMINATE, CAN_REORDER])
# [Un]pins an LDCX handle around non-uniform control-flow sections
# src[] = { handle }.
intrinsic("pin_cx_handle_nv", src_comp=[1])
intrinsic("unpin_cx_handle_nv", src_comp=[1])
# Explicitly copies a value to a uniform register
intrinsic("r2ur_nv", dest_comp=0, src_comp=[0],
flags=[CAN_ELIMINATE, CAN_REORDER])
# src[] = { handle, offset }.
intrinsic("ldcx_nv", dest_comp=0, src_comp=[1, 1],
indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET],
flags=[CAN_ELIMINATE, CAN_REORDER])
intrinsic("load_sysval_nv", dest_comp=1, src_comp=[], bit_sizes=[32, 64],
indices=[ACCESS, BASE], flags=[CAN_ELIMINATE])
intrinsic("isberd_nv", dest_comp=1, src_comp=[1], bit_sizes=[32],

View file

@ -2779,6 +2779,8 @@ nir_get_io_offset_src_number(const nir_intrinsic_instr *instr)
case nir_intrinsic_store_scratch:
case nir_intrinsic_ssbo_atomic:
case nir_intrinsic_ssbo_atomic_swap:
case nir_intrinsic_ldc_nv:
case nir_intrinsic_ldcx_nv:
return 1;
case nir_intrinsic_store_ssbo:
case nir_intrinsic_store_per_vertex_output:

View file

@ -386,6 +386,8 @@ intrin_to_variable_mode(nir_intrinsic_op intrin)
{
switch (intrin) {
case nir_intrinsic_load_ubo:
case nir_intrinsic_ldc_nv:
case nir_intrinsic_ldcx_nv:
return nir_var_mem_ubo;
case nir_intrinsic_load_push_constant:
@ -442,6 +444,8 @@ lower_mem_access_instr(nir_builder *b, nir_instr *instr, void *_data)
case nir_intrinsic_load_shared:
case nir_intrinsic_load_scratch:
case nir_intrinsic_load_task_payload:
case nir_intrinsic_ldc_nv:
case nir_intrinsic_ldcx_nv:
return lower_mem_load(b, intrin, state->callback, state->cb_data);
case nir_intrinsic_store_global:

View file

@ -102,6 +102,8 @@ get_info(nir_intrinsic_op op)
LOAD(nir_var_mem_ssbo, ssbo_uniform_block_intel, 0, 1, -1)
LOAD(nir_var_mem_shared, shared_uniform_block_intel, -1, 0, -1)
LOAD(nir_var_mem_global, global_constant_uniform_block_intel, -1, 0, -1)
INFO(nir_var_mem_ubo, ldc_nv, false, 0, 1, -1, -1)
INFO(nir_var_mem_ubo, ldcx_nv, false, 0, 1, -1, -1)
default:
break;
#undef ATOMIC