diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index b6c1e369d4a..1ac2cca7a17 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -365,6 +365,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_urb_output_handle_intel: case nir_intrinsic_load_ray_query_global_intel: case nir_intrinsic_load_call_return_address_amd: + case nir_intrinsic_load_indirect_address_intel: is_divergent = false; break; @@ -613,6 +614,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_shared: case nir_intrinsic_load_shared_ir3: case nir_intrinsic_load_shared_nv: + case nir_intrinsic_load_shader_indirect_data_intel: is_divergent = src_divergent(instr->src[0], state) || (options & nir_divergence_uniform_load_tears); break; diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 8cc7fd93def..2afd086a5fa 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2532,6 +2532,12 @@ intrinsic("store_per_primitive_payload_intel", src_comp=[-1], indices=[BASE, COM # Number of data items being operated on for a SIMD program. system_value("simd_width_intel", 1) +# Address delivered in R0[31:6] compute, mesh & task shaders on Gfx12.5+ +# coming from +# (3DSTATE_MESH_SHADER_DATA|3DSTATE_TASK_SHADER_DATA|COMPUTE_WALKER) +# IndirectDataStartAddress +system_value("indirect_address_intel", 1) + # Load a relocatable 32-bit value intrinsic("load_reloc_const_intel", dest_comp=1, bit_sizes=[32], indices=[PARAM_IDX, BASE], flags=[CAN_ELIMINATE, CAN_REORDER]) @@ -2633,6 +2639,12 @@ store("urb_vec4_intel", [1, 1, 1], [BASE]) # src[] = { value, address }. store("urb_lsc_intel", [1], [BASE]) +# Load from indirect address delivered in the thread payloads in compute, mesh +# & task shaders on Gfx12.5+ +# +# src[] = { offset }. +load("shader_indirect_data_intel", [1], [BASE, RANGE]) + # Return a handle for a shader's input or output URB memory. system_value("urb_input_handle_intel", 1) system_value("urb_output_handle_intel", 1) diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index 95396f9ba42..f6e6bed4931 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -1015,6 +1015,7 @@ nir_get_io_offset_src_number(const nir_intrinsic_instr *instr) case nir_intrinsic_load_shared_ir3: case nir_intrinsic_load_push_data_intel: case nir_intrinsic_vild_nv: + case nir_intrinsic_load_shader_indirect_data_intel: return 0; case nir_intrinsic_load_ubo: case nir_intrinsic_load_ubo_vec4: diff --git a/src/compiler/nir/nir_opt_load_store_vectorize.c b/src/compiler/nir/nir_opt_load_store_vectorize.c index 379f7ee2f5f..f32bb7f9f40 100644 --- a/src/compiler/nir/nir_opt_load_store_vectorize.c +++ b/src/compiler/nir/nir_opt_load_store_vectorize.c @@ -126,6 +126,7 @@ get_info(nir_intrinsic_op op) STORE(nir_var_shader_out, urb_lsc_intel, -1, 1, -1, 0, 1) LOAD(0, urb_vec4_intel, 0, 1, -1, 16) STORE(nir_var_shader_out, urb_vec4_intel, 1, 2, -1, 0, 16) + LOAD(nir_var_mem_ubo, shader_indirect_data_intel, -1, 0, -1, 1) default: break; #undef ATOMIC @@ -1613,7 +1614,7 @@ try_vectorize_shared2(struct vectorize_ctx *ctx, nir_bitcast_vector(&b, nir_channel(&b, new_def, 0), low_bit_size)); nir_def_rewrite_uses(&high->intrin->def, nir_bitcast_vector(&b, nir_channel(&b, new_def, 1), high_bit_size)); - new_entry = create_entry(ctx, get_info(nir_intrinsic_load_shared2_amd), nir_def_as_intrinsic(new_def)); + new_entry = create_entry(ctx, get_info(nir_intrinsic_load_shared2_amd), nir_def_as_intrinsic(new_def)); } /* Add a new entry, so that alias checks stay intact. Remove the old entries, diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index f56380aff41..b2e472176f4 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -1915,6 +1915,7 @@ get_nir_def(nir_to_brw_state &ntb, const nir_def &def, bool all_sources_uniform) case nir_intrinsic_load_ssbo_uniform_block_intel: case nir_intrinsic_load_ubo_uniform_block_intel: case nir_intrinsic_load_workgroup_id: + case nir_intrinsic_load_indirect_address_intel: is_scalar = true; break; @@ -1924,6 +1925,7 @@ get_nir_def(nir_to_brw_state &ntb, const nir_def &def, bool all_sources_uniform) case nir_intrinsic_load_push_data_intel: case nir_intrinsic_load_inline_data_intel: + case nir_intrinsic_load_shader_indirect_data_intel: is_scalar = get_nir_src(ntb, instr->src[0], 0).is_scalar; break; @@ -4260,6 +4262,13 @@ brw_from_nir_emit_cs_intrinsic(nir_to_brw_state &ntb, s.cs_payload().load_subgroup_id(bld, dest); break; + case nir_intrinsic_load_indirect_address_intel: + (dest.is_scalar ? bld.scalar_group() : bld).AND( + retype(dest, BRW_TYPE_UD), + retype(brw_vec1_grf(0, 0), BRW_TYPE_UD), + brw_imm_ud(INTEL_MASK(31, 6))); + break; + case nir_intrinsic_load_local_invocation_id: /* This is only used for hardware generated local IDs. */ assert(cs_prog_data->generate_local_id); @@ -4908,6 +4917,7 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb, case nir_intrinsic_global_atomic_swap: case nir_intrinsic_load_scratch: case nir_intrinsic_store_scratch: + case nir_intrinsic_load_shader_indirect_data_intel: brw_from_nir_emit_memory_access(ntb, bld, xbld, instr); break; @@ -6126,6 +6136,14 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, break; } + case nir_intrinsic_load_shader_indirect_data_intel: { + mode = MEMORY_MODE_CONSTANT; + binding_type = LSC_ADDR_SURFTYPE_FLAT; + srcs[MEMORY_LOGICAL_ADDRESS] = + memory_address(ntb, bld, instr, *binding_type, &address_offset); + no_mask_handle = srcs[MEMORY_LOGICAL_ADDRESS].is_scalar; + break; + } case nir_intrinsic_load_global_constant_uniform_block_intel: case nir_intrinsic_load_global: case nir_intrinsic_load_global_constant: @@ -6195,7 +6213,9 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, instr->intrinsic == nir_intrinsic_load_ubo_uniform_block_intel || instr->intrinsic == nir_intrinsic_load_ssbo_uniform_block_intel || instr->intrinsic == nir_intrinsic_load_shared_uniform_block_intel || - instr->intrinsic == nir_intrinsic_load_global_constant_uniform_block_intel; + instr->intrinsic == nir_intrinsic_load_global_constant_uniform_block_intel || + (instr->intrinsic == nir_intrinsic_load_shader_indirect_data_intel && + nir_src_is_const(instr->src[0])); const bool block = convergent_block_load || instr->intrinsic == nir_intrinsic_load_global_block_intel || instr->intrinsic == nir_intrinsic_load_shared_block_intel || diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c index 8858903a8a4..f481458cb61 100644 --- a/src/intel/compiler/brw/brw_nir.c +++ b/src/intel/compiler/brw/brw_nir.c @@ -3107,6 +3107,7 @@ lsc_op_for_nir_intrinsic(const nir_intrinsic_instr *intrin) case nir_intrinsic_load_ssbo_uniform_block_intel: case nir_intrinsic_load_ubo_uniform_block_intel: case nir_intrinsic_load_scratch: + case nir_intrinsic_load_shader_indirect_data_intel: return LSC_OP_LOAD; case nir_intrinsic_store_ssbo: