mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 03:08:05 +02:00
brw/nir: handle inline_data_intel more like push_data_intel
It's pretty much the same mechanism, except it's a different register location. With this change we gain indirect loading support. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39405>
This commit is contained in:
parent
789bb544f5
commit
7f19814414
8 changed files with 22 additions and 25 deletions
|
|
@ -304,7 +304,6 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_load_reloc_const_intel:
|
||||
case nir_intrinsic_load_btd_global_arg_addr_intel:
|
||||
case nir_intrinsic_load_btd_local_arg_addr_intel:
|
||||
case nir_intrinsic_load_inline_data_intel:
|
||||
case nir_intrinsic_load_simd_width_intel:
|
||||
case nir_intrinsic_load_ray_num_dss_rt_stacks_intel:
|
||||
case nir_intrinsic_load_lshs_vertex_stride_amd:
|
||||
|
|
@ -785,7 +784,8 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_load_texture_handle_kk:
|
||||
case nir_intrinsic_load_depth_texture_kk:
|
||||
case nir_intrinsic_load_sampler_handle_kk:
|
||||
case nir_intrinsic_load_texture_scale: {
|
||||
case nir_intrinsic_load_texture_scale:
|
||||
case nir_intrinsic_load_inline_data_intel: {
|
||||
unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
|
||||
for (unsigned i = 0; i < num_srcs; i++) {
|
||||
if (src_divergent(instr->src[i], state)) {
|
||||
|
|
|
|||
|
|
@ -2644,7 +2644,9 @@ system_value("urb_output_handle_intel", 1)
|
|||
load("urb_input_handle_indexed_intel", [1], [], [CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# Inline register delivery (available on Gfx12.5+ for CS/Mesh/Task stages)
|
||||
load("inline_data_intel", [], [BASE], [CAN_ELIMINATE, CAN_REORDER])
|
||||
#
|
||||
# src[] = { offset }
|
||||
load("inline_data_intel", [1], [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# Load push data on Intel VS,TCS,TES,GS,FS stages
|
||||
# src[] = { offset }
|
||||
|
|
|
|||
|
|
@ -1493,8 +1493,10 @@ blorp_build_nir_shader(struct blorp_context *blorp,
|
|||
|
||||
if (key->dst_samples > 1) {
|
||||
nir_def *num_layers_data =
|
||||
nir_load_inline_data_intel(&b, 1, 32,
|
||||
.base = BLORP_INLINE_PARAM_THREAD_GROUP_ID_Z_DIMENSION);
|
||||
nir_load_inline_data_intel(
|
||||
&b, 1, 32, nir_imm_int(&b, 0),
|
||||
.base = BLORP_INLINE_PARAM_THREAD_GROUP_ID_Z_DIMENSION,
|
||||
.range = 4);
|
||||
|
||||
nir_def *z_pos = nir_umod(&b, nir_channel(&b, store_pos, 2),
|
||||
num_layers_data);
|
||||
|
|
|
|||
|
|
@ -1911,7 +1911,6 @@ get_nir_def(nir_to_brw_state &ntb, const nir_def &def, bool all_sources_uniform)
|
|||
case nir_intrinsic_load_btd_local_arg_addr_intel:
|
||||
case nir_intrinsic_load_btd_shader_type_intel:
|
||||
case nir_intrinsic_load_global_constant_uniform_block_intel:
|
||||
case nir_intrinsic_load_inline_data_intel:
|
||||
case nir_intrinsic_load_reloc_const_intel:
|
||||
case nir_intrinsic_load_ssbo_uniform_block_intel:
|
||||
case nir_intrinsic_load_ubo_uniform_block_intel:
|
||||
|
|
@ -1924,6 +1923,7 @@ get_nir_def(nir_to_brw_state &ntb, const nir_def &def, bool all_sources_uniform)
|
|||
break;
|
||||
|
||||
case nir_intrinsic_load_push_data_intel:
|
||||
case nir_intrinsic_load_inline_data_intel:
|
||||
is_scalar = get_nir_src(ntb, instr->src[0], 0).is_scalar;
|
||||
break;
|
||||
|
||||
|
|
@ -4235,8 +4235,6 @@ brw_from_nir_emit_cs_intrinsic(nir_to_brw_state &ntb,
|
|||
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
|
||||
dest = get_nir_def(ntb, instr->def);
|
||||
|
||||
const brw_builder xbld = dest.is_scalar ? bld.scalar_group() : bld;
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_barrier:
|
||||
if (nir_intrinsic_memory_scope(instr) != SCOPE_NONE)
|
||||
|
|
@ -4257,17 +4255,6 @@ brw_from_nir_emit_cs_intrinsic(nir_to_brw_state &ntb,
|
|||
}
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_inline_data_intel: {
|
||||
unsigned inline_stride = brw_type_size_bytes(dest.type);
|
||||
for (unsigned c = 0; c < instr->def.num_components; c++) {
|
||||
xbld.MOV(offset(dest, xbld, c),
|
||||
byte_offset(brw_uniform_reg(BRW_INLINE_PARAM_REG, dest.type),
|
||||
nir_intrinsic_base(instr) +
|
||||
c * inline_stride));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_subgroup_id:
|
||||
s.cs_payload().load_subgroup_id(bld, dest);
|
||||
break;
|
||||
|
|
@ -5298,14 +5285,18 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_push_data_intel: {
|
||||
case nir_intrinsic_load_push_data_intel:
|
||||
case nir_intrinsic_load_inline_data_intel: {
|
||||
/* Offsets are in bytes but they should always aligned to
|
||||
* the type size
|
||||
*/
|
||||
unsigned base_offset = nir_intrinsic_base(instr);
|
||||
assert(base_offset % 4 == 0 || base_offset % brw_type_size_bytes(dest.type) == 0);
|
||||
|
||||
brw_reg src = brw_uniform_reg(base_offset / REG_SIZE, dest.type);
|
||||
brw_reg src = brw_uniform_reg(
|
||||
instr->intrinsic == nir_intrinsic_load_inline_data_intel ?
|
||||
BRW_INLINE_PARAM_REG : (base_offset / REG_SIZE),
|
||||
dest.type);
|
||||
|
||||
if (nir_src_is_const(instr->src[0])) {
|
||||
unsigned load_offset = nir_src_as_uint(instr->src[0]);
|
||||
|
|
|
|||
|
|
@ -382,7 +382,9 @@ build_load_uniform(nir_builder *b, unsigned offset,
|
|||
unsigned num_components, unsigned bit_size)
|
||||
{
|
||||
return nir_load_inline_data_intel(b, num_components, bit_size,
|
||||
.base = offset);
|
||||
nir_imm_int(b, 0),
|
||||
.base = offset,
|
||||
.range = num_components * bit_size / 8);
|
||||
}
|
||||
|
||||
#define load_trampoline_param(b, name, num_components, bit_size) \
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ lower_num_workgroups(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
|||
if (pdevice->info.verx10 >= 125) {
|
||||
num_workgroups =
|
||||
nir_load_inline_data_intel(
|
||||
b, 3, 32,
|
||||
b, 3, 32, nir_imm_int(b, 0),
|
||||
.base = ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET);
|
||||
} else {
|
||||
num_workgroups =
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ anv_nir_lower_unaligned_dispatch(nir_shader *shader)
|
|||
|
||||
nir_def *global_idx = nir_channel(&b, nir_load_global_invocation_id(&b, 32), 0);
|
||||
nir_def *max_unaligned_invocations_x =
|
||||
nir_load_inline_data_intel(&b, 1, 32,
|
||||
nir_load_inline_data_intel(&b, 1, 32, nir_imm_int(&b, 0),
|
||||
.base = ANV_INLINE_PARAM_UNALIGNED_INVOCATIONS_X_OFFSET);
|
||||
|
||||
nir_push_if(&b, nir_uge(&b, global_idx, max_unaligned_invocations_x));
|
||||
|
|
|
|||
|
|
@ -978,7 +978,7 @@ static nir_def *
|
|||
mesh_load_provoking_vertex(nir_builder *b, void *data)
|
||||
{
|
||||
return nir_load_inline_data_intel(
|
||||
b, 1, 32,
|
||||
b, 1, 32, nir_imm_int(b, 0),
|
||||
.base = ANV_INLINE_PARAM_MESH_PROVOKING_VERTEX);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue