brw/nir: handle inline_data_intel more like push_data_intel

It's pretty much the same mechanism, except it's a different register
location.

With this change we gain indirect loading support.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39405>
This commit is contained in:
Lionel Landwerlin 2026-02-23 23:02:40 +02:00 committed by Marge Bot
parent 789bb544f5
commit 7f19814414
8 changed files with 22 additions and 25 deletions

View file

@ -304,7 +304,6 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_load_reloc_const_intel:
case nir_intrinsic_load_btd_global_arg_addr_intel:
case nir_intrinsic_load_btd_local_arg_addr_intel:
case nir_intrinsic_load_inline_data_intel:
case nir_intrinsic_load_simd_width_intel:
case nir_intrinsic_load_ray_num_dss_rt_stacks_intel:
case nir_intrinsic_load_lshs_vertex_stride_amd:
@ -785,7 +784,8 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_load_texture_handle_kk:
case nir_intrinsic_load_depth_texture_kk:
case nir_intrinsic_load_sampler_handle_kk:
case nir_intrinsic_load_texture_scale: {
case nir_intrinsic_load_texture_scale:
case nir_intrinsic_load_inline_data_intel: {
unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
for (unsigned i = 0; i < num_srcs; i++) {
if (src_divergent(instr->src[i], state)) {

View file

@ -2644,7 +2644,9 @@ system_value("urb_output_handle_intel", 1)
load("urb_input_handle_indexed_intel", [1], [], [CAN_ELIMINATE, CAN_REORDER])
# Inline register delivery (available on Gfx12.5+ for CS/Mesh/Task stages)
load("inline_data_intel", [], [BASE], [CAN_ELIMINATE, CAN_REORDER])
#
# src[] = { offset }
load("inline_data_intel", [1], [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
# Load push data on Intel VS,TCS,TES,GS,FS stages
# src[] = { offset }

View file

@ -1493,8 +1493,10 @@ blorp_build_nir_shader(struct blorp_context *blorp,
if (key->dst_samples > 1) {
nir_def *num_layers_data =
nir_load_inline_data_intel(&b, 1, 32,
.base = BLORP_INLINE_PARAM_THREAD_GROUP_ID_Z_DIMENSION);
nir_load_inline_data_intel(
&b, 1, 32, nir_imm_int(&b, 0),
.base = BLORP_INLINE_PARAM_THREAD_GROUP_ID_Z_DIMENSION,
.range = 4);
nir_def *z_pos = nir_umod(&b, nir_channel(&b, store_pos, 2),
num_layers_data);

View file

@ -1911,7 +1911,6 @@ get_nir_def(nir_to_brw_state &ntb, const nir_def &def, bool all_sources_uniform)
case nir_intrinsic_load_btd_local_arg_addr_intel:
case nir_intrinsic_load_btd_shader_type_intel:
case nir_intrinsic_load_global_constant_uniform_block_intel:
case nir_intrinsic_load_inline_data_intel:
case nir_intrinsic_load_reloc_const_intel:
case nir_intrinsic_load_ssbo_uniform_block_intel:
case nir_intrinsic_load_ubo_uniform_block_intel:
@ -1924,6 +1923,7 @@ get_nir_def(nir_to_brw_state &ntb, const nir_def &def, bool all_sources_uniform)
break;
case nir_intrinsic_load_push_data_intel:
case nir_intrinsic_load_inline_data_intel:
is_scalar = get_nir_src(ntb, instr->src[0], 0).is_scalar;
break;
@ -4235,8 +4235,6 @@ brw_from_nir_emit_cs_intrinsic(nir_to_brw_state &ntb,
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
dest = get_nir_def(ntb, instr->def);
const brw_builder xbld = dest.is_scalar ? bld.scalar_group() : bld;
switch (instr->intrinsic) {
case nir_intrinsic_barrier:
if (nir_intrinsic_memory_scope(instr) != SCOPE_NONE)
@ -4257,17 +4255,6 @@ brw_from_nir_emit_cs_intrinsic(nir_to_brw_state &ntb,
}
break;
case nir_intrinsic_load_inline_data_intel: {
unsigned inline_stride = brw_type_size_bytes(dest.type);
for (unsigned c = 0; c < instr->def.num_components; c++) {
xbld.MOV(offset(dest, xbld, c),
byte_offset(brw_uniform_reg(BRW_INLINE_PARAM_REG, dest.type),
nir_intrinsic_base(instr) +
c * inline_stride));
}
break;
}
case nir_intrinsic_load_subgroup_id:
s.cs_payload().load_subgroup_id(bld, dest);
break;
@ -5298,14 +5285,18 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
break;
}
case nir_intrinsic_load_push_data_intel: {
case nir_intrinsic_load_push_data_intel:
case nir_intrinsic_load_inline_data_intel: {
/* Offsets are in bytes but they should always aligned to
* the type size
*/
unsigned base_offset = nir_intrinsic_base(instr);
assert(base_offset % 4 == 0 || base_offset % brw_type_size_bytes(dest.type) == 0);
brw_reg src = brw_uniform_reg(base_offset / REG_SIZE, dest.type);
brw_reg src = brw_uniform_reg(
instr->intrinsic == nir_intrinsic_load_inline_data_intel ?
BRW_INLINE_PARAM_REG : (base_offset / REG_SIZE),
dest.type);
if (nir_src_is_const(instr->src[0])) {
unsigned load_offset = nir_src_as_uint(instr->src[0]);

View file

@ -382,7 +382,9 @@ build_load_uniform(nir_builder *b, unsigned offset,
unsigned num_components, unsigned bit_size)
{
return nir_load_inline_data_intel(b, num_components, bit_size,
.base = offset);
nir_imm_int(b, 0),
.base = offset,
.range = num_components * bit_size / 8);
}
#define load_trampoline_param(b, name, num_components, bit_size) \

View file

@ -115,7 +115,7 @@ lower_num_workgroups(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
if (pdevice->info.verx10 >= 125) {
num_workgroups =
nir_load_inline_data_intel(
b, 3, 32,
b, 3, 32, nir_imm_int(b, 0),
.base = ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET);
} else {
num_workgroups =

View file

@ -15,7 +15,7 @@ anv_nir_lower_unaligned_dispatch(nir_shader *shader)
nir_def *global_idx = nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
nir_def *max_unaligned_invocations_x =
nir_load_inline_data_intel(&b, 1, 32,
nir_load_inline_data_intel(&b, 1, 32, nir_imm_int(&b, 0),
.base = ANV_INLINE_PARAM_UNALIGNED_INVOCATIONS_X_OFFSET);
nir_push_if(&b, nir_uge(&b, global_idx, max_unaligned_invocations_x));

View file

@ -978,7 +978,7 @@ static nir_def *
mesh_load_provoking_vertex(nir_builder *b, void *data)
{
return nir_load_inline_data_intel(
b, 1, 32,
b, 1, 32, nir_imm_int(b, 0),
.base = ANV_INLINE_PARAM_MESH_PROVOKING_VERTEX);
}