diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 8f59443f8ea..ee0bf9f38dc 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1350,7 +1350,7 @@ store("ssbo_block_intel", [-1, 1], [WRITE_MASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET] store("shared_block_intel", [1], [BASE, WRITE_MASK, ALIGN_MUL, ALIGN_OFFSET]) # Intrinsics for Intel mesh shading -system_value("mesh_global_arg_addr_intel", 1, bit_sizes=[64]) +system_value("mesh_inline_data_intel", 1, [ALIGN_OFFSET], bit_sizes=[32, 64]) # Intrinsics for Intel bindless thread dispatch system_value("btd_dss_id_intel", 1) diff --git a/src/compiler/nir/nir_lower_shader_calls.c b/src/compiler/nir/nir_lower_shader_calls.c index 052f5db847a..d04332de9f8 100644 --- a/src/compiler/nir/nir_lower_shader_calls.c +++ b/src/compiler/nir/nir_lower_shader_calls.c @@ -176,7 +176,7 @@ can_remat_instr(nir_instr *instr, struct brw_bitset *remat) case nir_intrinsic_load_ray_miss_sbt_stride_intel: case nir_intrinsic_load_callable_sbt_addr_intel: case nir_intrinsic_load_callable_sbt_stride_intel: - case nir_intrinsic_load_mesh_global_arg_addr_intel: + case nir_intrinsic_load_mesh_inline_data_intel: /* Notably missing from the above list is btd_local_arg_addr_intel. * This is because the resume shader will have a different local * argument pointer because it has a different BSR. Any access of diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index fd740b357c4..5476f0104b1 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -1998,6 +1998,15 @@ brw_compute_first_urb_slot_required(uint64_t inputs_read, return 0; } +/* From InlineData in 3DSTATE_TASK_SHADER_DATA and 3DSTATE_MESH_SHADER_DATA. */ +#define BRW_TASK_MESH_INLINE_DATA_SIZE_DW 8 + +/* InlineData[0-1] is used for Vulkan descriptor. */ +#define BRW_TASK_MESH_PUSH_CONSTANTS_START_DW 2 + +#define BRW_TASK_MESH_PUSH_CONSTANTS_SIZE_DW \ + (BRW_TASK_MESH_INLINE_DATA_SIZE_DW - BRW_TASK_MESH_PUSH_CONSTANTS_START_DW) + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/intel/compiler/brw_mesh.cpp b/src/intel/compiler/brw_mesh.cpp index 264149d6db7..c47162c9b9c 100644 --- a/src/intel/compiler/brw_mesh.cpp +++ b/src/intel/compiler/brw_mesh.cpp @@ -48,10 +48,20 @@ brw_nir_lower_load_uniforms_impl(nir_builder *b, nir_instr *instr, nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); assert(intrin->intrinsic == nir_intrinsic_load_uniform); - return brw_nir_load_global_const(b, - intrin, - nir_load_mesh_global_arg_addr_intel(b), - 0); + /* Read the first few 32-bit scalars from InlineData. */ + if (nir_src_is_const(intrin->src[0]) && + nir_dest_bit_size(intrin->dest) == 32 && + nir_dest_num_components(intrin->dest) == 1) { + unsigned off = nir_intrinsic_base(intrin) + nir_src_as_uint(intrin->src[0]); + unsigned off_dw = off / 4; + if (off % 4 == 0 && off_dw < BRW_TASK_MESH_PUSH_CONSTANTS_SIZE_DW) { + off_dw += BRW_TASK_MESH_PUSH_CONSTANTS_START_DW; + return nir_load_mesh_inline_data_intel(b, 32, off_dw); + } + } + + return brw_nir_load_global_const(b, intrin, + nir_load_mesh_inline_data_intel(b, 64, 0), 0); } static void @@ -995,10 +1005,12 @@ fs_visitor::nir_emit_task_mesh_intrinsic(const fs_builder &bld, dest = get_nir_dest(instr->dest); switch (instr->intrinsic) { - case nir_intrinsic_load_mesh_global_arg_addr_intel: + case nir_intrinsic_load_mesh_inline_data_intel: assert(payload.num_regs == 3 || payload.num_regs == 4); - /* Passed in the Inline Parameter, the last element of the payload. */ - bld.MOV(dest, retype(brw_vec1_grf(payload.num_regs - 1, 0), dest.type)); + /* Inline Parameter is the last element of the payload. */ + bld.MOV(dest, retype(brw_vec1_grf(payload.num_regs - 1, + nir_intrinsic_align_offset(instr)), + dest.type)); break; case nir_intrinsic_load_draw_id: