diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 2bdebe94028..45f09a43817 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -1495,6 +1495,21 @@ nir_store_global(nir_builder *build, nir_ssa_def *addr, unsigned align, nir_builder_instr_insert(build, &store->instr); } +static inline nir_ssa_def * +nir_load_global_constant(nir_builder *build, nir_ssa_def *addr, unsigned align, + unsigned num_components, unsigned bit_size) +{ + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_global_constant); + load->num_components = num_components; + load->src[0] = nir_src_for_ssa(addr); + nir_intrinsic_set_align(load, align, 0); + nir_ssa_dest_init(&load->instr, &load->dest, + num_components, bit_size, NULL); + nir_builder_instr_insert(build, &load->instr); + return &load->dest.ssa; +} + static inline nir_ssa_def * nir_load_param(nir_builder *build, uint32_t param_idx) { diff --git a/src/intel/compiler/brw_nir_lower_rt_intrinsics.c b/src/intel/compiler/brw_nir_lower_rt_intrinsics.c index 11fa8e49c07..4198aab78f5 100644 --- a/src/intel/compiler/brw_nir_lower_rt_intrinsics.c +++ b/src/intel/compiler/brw_nir_lower_rt_intrinsics.c @@ -135,6 +135,54 @@ lower_rt_intrinsics_impl(nir_function_impl *impl, nir_instr_remove(instr); break; + case nir_intrinsic_load_uniform: { + /* We don't want to lower this in the launch trampoline. */ + if (stage == MESA_SHADER_COMPUTE) + break; + + assert(intrin->dest.is_ssa); + assert(intrin->src[0].is_ssa); + + unsigned bit_size = intrin->dest.ssa.bit_size; + assert(bit_size >= 8 && bit_size % 8 == 0); + unsigned byte_size = bit_size / 8; + + if (nir_src_is_const(intrin->src[0])) { + uint64_t offset = BRW_RT_PUSH_CONST_OFFSET + + nir_intrinsic_base(intrin) + + nir_src_as_uint(intrin->src[0]); + + /* Things should be component-aligned. */ + assert(offset % byte_size == 0); + + unsigned suboffset = offset % 64; + uint64_t aligned_offset = offset - suboffset; + + /* Load two just in case we go over a 64B boundary */ + nir_ssa_def *data[2]; + for (unsigned i = 0; i < 2; i++) { + nir_ssa_def *addr = + nir_iadd_imm(b, nir_load_btd_global_arg_addr_intel(b), + aligned_offset + i * 64); + data[i] = nir_load_global_const_block_intel(b, addr, 16); + } + + sysval = nir_extract_bits(b, data, 2, suboffset * 8, + intrin->num_components, bit_size); + } else { + nir_ssa_def *offset32 = + nir_iadd_imm(b, intrin->src[0].ssa, + BRW_RT_PUSH_CONST_OFFSET + + nir_intrinsic_base(intrin)); + nir_ssa_def *addr = + nir_iadd(b, nir_load_btd_global_arg_addr_intel(b), + nir_u2u64(b, offset32)); + sysval = nir_load_global_constant(b, addr, byte_size, + intrin->num_components, bit_size); + } + break; + } + case nir_intrinsic_load_ray_launch_id: sysval = nir_channels(b, hotzone, 0xe); break; diff --git a/src/intel/compiler/brw_rt.h b/src/intel/compiler/brw_rt.h index eebb29b1f1b..f153257b6a4 100644 --- a/src/intel/compiler/brw_rt.h +++ b/src/intel/compiler/brw_rt.h @@ -31,6 +31,9 @@ extern "C" { /** Vulkan defines shaderGroupHandleSize = 32 */ #define BRW_RT_SBT_HANDLE_SIZE 32 +/** Offset after the RT dispatch globals at which "push" constants live */ +#define BRW_RT_PUSH_CONST_OFFSET 128 + /** Stride of the resume SBT */ #define BRW_BTD_RESUME_SBT_STRIDE 8