mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 11:48:06 +02:00
nir: Add some new _nv intrinsics
The ldc_nv and ldcx_nv intrinsics correspond to the index and bindless forms of NVIDIA's LDC instruction, respectively. ldc_nv is pretty much load_ubo without some of the unnecessary constant bits while ldcx_nv takes a 64-bit bindless handle instead of an index. The other two give us a little control over register allocation at the NIR level to ensure that LDCX handles are placed in uniform registers. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29591>
This commit is contained in:
parent
ab84cf11c7
commit
b107240474
5 changed files with 26 additions and 0 deletions
|
|
@ -267,6 +267,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_optimization_barrier_sgpr_amd:
|
||||
case nir_intrinsic_load_printf_buffer_address:
|
||||
case nir_intrinsic_load_printf_base_identifier:
|
||||
case nir_intrinsic_r2ur_nv:
|
||||
is_divergent = false;
|
||||
break;
|
||||
|
||||
|
|
@ -444,6 +445,8 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
|
||||
case nir_intrinsic_load_ubo:
|
||||
case nir_intrinsic_load_ubo_vec4:
|
||||
case nir_intrinsic_ldc_nv:
|
||||
case nir_intrinsic_ldcx_nv:
|
||||
is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
|
||||
instr->src[1].ssa->divergent;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -2221,6 +2221,21 @@ intrinsic("dpas_intel", dest_comp=0, src_comp=[0, 0, 0],
|
|||
flags=[CAN_ELIMINATE])
|
||||
|
||||
# NVIDIA-specific intrinsics
|
||||
# src[] = { index, offset }.
|
||||
intrinsic("ldc_nv", dest_comp=0, src_comp=[1, 1],
|
||||
indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET],
|
||||
flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
# [Un]pins an LDCX handle around non-uniform control-flow sections
|
||||
# src[] = { handle }.
|
||||
intrinsic("pin_cx_handle_nv", src_comp=[1])
|
||||
intrinsic("unpin_cx_handle_nv", src_comp=[1])
|
||||
# Explicitly copies a value to a uniform register
|
||||
intrinsic("r2ur_nv", dest_comp=0, src_comp=[0],
|
||||
flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
# src[] = { handle, offset }.
|
||||
intrinsic("ldcx_nv", dest_comp=0, src_comp=[1, 1],
|
||||
indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET],
|
||||
flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
intrinsic("load_sysval_nv", dest_comp=1, src_comp=[], bit_sizes=[32, 64],
|
||||
indices=[ACCESS, BASE], flags=[CAN_ELIMINATE])
|
||||
intrinsic("isberd_nv", dest_comp=1, src_comp=[1], bit_sizes=[32],
|
||||
|
|
|
|||
|
|
@ -2779,6 +2779,8 @@ nir_get_io_offset_src_number(const nir_intrinsic_instr *instr)
|
|||
case nir_intrinsic_store_scratch:
|
||||
case nir_intrinsic_ssbo_atomic:
|
||||
case nir_intrinsic_ssbo_atomic_swap:
|
||||
case nir_intrinsic_ldc_nv:
|
||||
case nir_intrinsic_ldcx_nv:
|
||||
return 1;
|
||||
case nir_intrinsic_store_ssbo:
|
||||
case nir_intrinsic_store_per_vertex_output:
|
||||
|
|
|
|||
|
|
@ -386,6 +386,8 @@ intrin_to_variable_mode(nir_intrinsic_op intrin)
|
|||
{
|
||||
switch (intrin) {
|
||||
case nir_intrinsic_load_ubo:
|
||||
case nir_intrinsic_ldc_nv:
|
||||
case nir_intrinsic_ldcx_nv:
|
||||
return nir_var_mem_ubo;
|
||||
|
||||
case nir_intrinsic_load_push_constant:
|
||||
|
|
@ -442,6 +444,8 @@ lower_mem_access_instr(nir_builder *b, nir_instr *instr, void *_data)
|
|||
case nir_intrinsic_load_shared:
|
||||
case nir_intrinsic_load_scratch:
|
||||
case nir_intrinsic_load_task_payload:
|
||||
case nir_intrinsic_ldc_nv:
|
||||
case nir_intrinsic_ldcx_nv:
|
||||
return lower_mem_load(b, intrin, state->callback, state->cb_data);
|
||||
|
||||
case nir_intrinsic_store_global:
|
||||
|
|
|
|||
|
|
@ -102,6 +102,8 @@ get_info(nir_intrinsic_op op)
|
|||
LOAD(nir_var_mem_ssbo, ssbo_uniform_block_intel, 0, 1, -1)
|
||||
LOAD(nir_var_mem_shared, shared_uniform_block_intel, -1, 0, -1)
|
||||
LOAD(nir_var_mem_global, global_constant_uniform_block_intel, -1, 0, -1)
|
||||
INFO(nir_var_mem_ubo, ldc_nv, false, 0, 1, -1, -1)
|
||||
INFO(nir_var_mem_ubo, ldcx_nv, false, 0, 1, -1, -1)
|
||||
default:
|
||||
break;
|
||||
#undef ATOMIC
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue