brw/nir: add new intrinsics to load data from the indirect address

This address is delivered on Gfx12.5+ in compute/mesh/task shaders
from the command stream instruction.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40174>
This commit is contained in:
Lionel Landwerlin 2026-02-20 13:28:27 +02:00 committed by Marge Bot
parent 7b1533414a
commit e14d6b535c
6 changed files with 39 additions and 2 deletions

View file

@ -365,6 +365,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_load_urb_output_handle_intel:
case nir_intrinsic_load_ray_query_global_intel:
case nir_intrinsic_load_call_return_address_amd:
case nir_intrinsic_load_indirect_address_intel:
is_divergent = false;
break;
@ -613,6 +614,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_load_shared:
case nir_intrinsic_load_shared_ir3:
case nir_intrinsic_load_shared_nv:
case nir_intrinsic_load_shader_indirect_data_intel:
is_divergent = src_divergent(instr->src[0], state) ||
(options & nir_divergence_uniform_load_tears);
break;

View file

@ -2532,6 +2532,12 @@ intrinsic("store_per_primitive_payload_intel", src_comp=[-1], indices=[BASE, COM
# Number of data items being operated on for a SIMD program.
system_value("simd_width_intel", 1)
# Address delivered in R0[31:6] compute, mesh & task shaders on Gfx12.5+
# coming from
# (3DSTATE_MESH_SHADER_DATA|3DSTATE_TASK_SHADER_DATA|COMPUTE_WALKER)
# IndirectDataStartAddress
system_value("indirect_address_intel", 1)
# Load a relocatable 32-bit value
intrinsic("load_reloc_const_intel", dest_comp=1, bit_sizes=[32],
indices=[PARAM_IDX, BASE], flags=[CAN_ELIMINATE, CAN_REORDER])
@ -2633,6 +2639,12 @@ store("urb_vec4_intel", [1, 1, 1], [BASE])
# src[] = { value, address }.
store("urb_lsc_intel", [1], [BASE])
# Load from indirect address delivered in the thread payloads in compute, mesh
# & task shaders on Gfx12.5+
#
# src[] = { offset }.
load("shader_indirect_data_intel", [1], [BASE, RANGE])
# Return a handle for a shader's input or output URB memory.
system_value("urb_input_handle_intel", 1)
system_value("urb_output_handle_intel", 1)

View file

@ -1015,6 +1015,7 @@ nir_get_io_offset_src_number(const nir_intrinsic_instr *instr)
case nir_intrinsic_load_shared_ir3:
case nir_intrinsic_load_push_data_intel:
case nir_intrinsic_vild_nv:
case nir_intrinsic_load_shader_indirect_data_intel:
return 0;
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ubo_vec4:

View file

@ -126,6 +126,7 @@ get_info(nir_intrinsic_op op)
STORE(nir_var_shader_out, urb_lsc_intel, -1, 1, -1, 0, 1)
LOAD(0, urb_vec4_intel, 0, 1, -1, 16)
STORE(nir_var_shader_out, urb_vec4_intel, 1, 2, -1, 0, 16)
LOAD(nir_var_mem_ubo, shader_indirect_data_intel, -1, 0, -1, 1)
default:
break;
#undef ATOMIC
@ -1613,7 +1614,7 @@ try_vectorize_shared2(struct vectorize_ctx *ctx,
nir_bitcast_vector(&b, nir_channel(&b, new_def, 0), low_bit_size));
nir_def_rewrite_uses(&high->intrin->def,
nir_bitcast_vector(&b, nir_channel(&b, new_def, 1), high_bit_size));
new_entry = create_entry(ctx, get_info(nir_intrinsic_load_shared2_amd), nir_def_as_intrinsic(new_def));
new_entry = create_entry(ctx, get_info(nir_intrinsic_load_shared2_amd), nir_def_as_intrinsic(new_def));
}
/* Add a new entry, so that alias checks stay intact. Remove the old entries,

View file

@ -1915,6 +1915,7 @@ get_nir_def(nir_to_brw_state &ntb, const nir_def &def, bool all_sources_uniform)
case nir_intrinsic_load_ssbo_uniform_block_intel:
case nir_intrinsic_load_ubo_uniform_block_intel:
case nir_intrinsic_load_workgroup_id:
case nir_intrinsic_load_indirect_address_intel:
is_scalar = true;
break;
@ -1924,6 +1925,7 @@ get_nir_def(nir_to_brw_state &ntb, const nir_def &def, bool all_sources_uniform)
case nir_intrinsic_load_push_data_intel:
case nir_intrinsic_load_inline_data_intel:
case nir_intrinsic_load_shader_indirect_data_intel:
is_scalar = get_nir_src(ntb, instr->src[0], 0).is_scalar;
break;
@ -4260,6 +4262,13 @@ brw_from_nir_emit_cs_intrinsic(nir_to_brw_state &ntb,
s.cs_payload().load_subgroup_id(bld, dest);
break;
case nir_intrinsic_load_indirect_address_intel:
(dest.is_scalar ? bld.scalar_group() : bld).AND(
retype(dest, BRW_TYPE_UD),
retype(brw_vec1_grf(0, 0), BRW_TYPE_UD),
brw_imm_ud(INTEL_MASK(31, 6)));
break;
case nir_intrinsic_load_local_invocation_id:
/* This is only used for hardware generated local IDs. */
assert(cs_prog_data->generate_local_id);
@ -4908,6 +4917,7 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
case nir_intrinsic_global_atomic_swap:
case nir_intrinsic_load_scratch:
case nir_intrinsic_store_scratch:
case nir_intrinsic_load_shader_indirect_data_intel:
brw_from_nir_emit_memory_access(ntb, bld, xbld, instr);
break;
@ -6126,6 +6136,14 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
break;
}
case nir_intrinsic_load_shader_indirect_data_intel: {
mode = MEMORY_MODE_CONSTANT;
binding_type = LSC_ADDR_SURFTYPE_FLAT;
srcs[MEMORY_LOGICAL_ADDRESS] =
memory_address(ntb, bld, instr, *binding_type, &address_offset);
no_mask_handle = srcs[MEMORY_LOGICAL_ADDRESS].is_scalar;
break;
}
case nir_intrinsic_load_global_constant_uniform_block_intel:
case nir_intrinsic_load_global:
case nir_intrinsic_load_global_constant:
@ -6195,7 +6213,9 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
instr->intrinsic == nir_intrinsic_load_ubo_uniform_block_intel ||
instr->intrinsic == nir_intrinsic_load_ssbo_uniform_block_intel ||
instr->intrinsic == nir_intrinsic_load_shared_uniform_block_intel ||
instr->intrinsic == nir_intrinsic_load_global_constant_uniform_block_intel;
instr->intrinsic == nir_intrinsic_load_global_constant_uniform_block_intel ||
(instr->intrinsic == nir_intrinsic_load_shader_indirect_data_intel &&
nir_src_is_const(instr->src[0]));
const bool block = convergent_block_load ||
instr->intrinsic == nir_intrinsic_load_global_block_intel ||
instr->intrinsic == nir_intrinsic_load_shared_block_intel ||

View file

@ -3107,6 +3107,7 @@ lsc_op_for_nir_intrinsic(const nir_intrinsic_instr *intrin)
case nir_intrinsic_load_ssbo_uniform_block_intel:
case nir_intrinsic_load_ubo_uniform_block_intel:
case nir_intrinsic_load_scratch:
case nir_intrinsic_load_shader_indirect_data_intel:
return LSC_OP_LOAD;
case nir_intrinsic_store_ssbo: