From 9fa1cdfe7ffd9e7ebd83055e2008f3e4b8ada549 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 6 Aug 2020 22:17:17 -0500 Subject: [PATCH] intel/rt: Implement push constants as global memory reads They're not really "push" anymore but that's because there is no such thing as push constants in bindless shaders on Intel. They should be fast enough, though. There is some room for debate here as to whether we want to do the pull in NIR or push it into the back-end. The advantage of doing it in the back-end is that it'd be easier to use MOV_INDIRECT for indirect push constant access rather than falling back to a dataport message. Reviewed-by: Caio Marcelo de Oliveira Filho Part-of: --- src/compiler/nir/nir_builder.h | 15 ++++++ .../compiler/brw_nir_lower_rt_intrinsics.c | 48 +++++++++++++++++++ src/intel/compiler/brw_rt.h | 3 ++ 3 files changed, 66 insertions(+) diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 2bdebe94028..45f09a43817 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -1495,6 +1495,21 @@ nir_store_global(nir_builder *build, nir_ssa_def *addr, unsigned align, nir_builder_instr_insert(build, &store->instr); } +static inline nir_ssa_def * +nir_load_global_constant(nir_builder *build, nir_ssa_def *addr, unsigned align, + unsigned num_components, unsigned bit_size) +{ + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_global_constant); + load->num_components = num_components; + load->src[0] = nir_src_for_ssa(addr); + nir_intrinsic_set_align(load, align, 0); + nir_ssa_dest_init(&load->instr, &load->dest, + num_components, bit_size, NULL); + nir_builder_instr_insert(build, &load->instr); + return &load->dest.ssa; +} + static inline nir_ssa_def * nir_load_param(nir_builder *build, uint32_t param_idx) { diff --git a/src/intel/compiler/brw_nir_lower_rt_intrinsics.c b/src/intel/compiler/brw_nir_lower_rt_intrinsics.c index 11fa8e49c07..4198aab78f5 100644 --- a/src/intel/compiler/brw_nir_lower_rt_intrinsics.c +++ b/src/intel/compiler/brw_nir_lower_rt_intrinsics.c @@ -135,6 +135,54 @@ lower_rt_intrinsics_impl(nir_function_impl *impl, nir_instr_remove(instr); break; + case nir_intrinsic_load_uniform: { + /* We don't want to lower this in the launch trampoline. */ + if (stage == MESA_SHADER_COMPUTE) + break; + + assert(intrin->dest.is_ssa); + assert(intrin->src[0].is_ssa); + + unsigned bit_size = intrin->dest.ssa.bit_size; + assert(bit_size >= 8 && bit_size % 8 == 0); + unsigned byte_size = bit_size / 8; + + if (nir_src_is_const(intrin->src[0])) { + uint64_t offset = BRW_RT_PUSH_CONST_OFFSET + + nir_intrinsic_base(intrin) + + nir_src_as_uint(intrin->src[0]); + + /* Things should be component-aligned. */ + assert(offset % byte_size == 0); + + unsigned suboffset = offset % 64; + uint64_t aligned_offset = offset - suboffset; + + /* Load two just in case we go over a 64B boundary */ + nir_ssa_def *data[2]; + for (unsigned i = 0; i < 2; i++) { + nir_ssa_def *addr = + nir_iadd_imm(b, nir_load_btd_global_arg_addr_intel(b), + aligned_offset + i * 64); + data[i] = nir_load_global_const_block_intel(b, addr, 16); + } + + sysval = nir_extract_bits(b, data, 2, suboffset * 8, + intrin->num_components, bit_size); + } else { + nir_ssa_def *offset32 = + nir_iadd_imm(b, intrin->src[0].ssa, + BRW_RT_PUSH_CONST_OFFSET + + nir_intrinsic_base(intrin)); + nir_ssa_def *addr = + nir_iadd(b, nir_load_btd_global_arg_addr_intel(b), + nir_u2u64(b, offset32)); + sysval = nir_load_global_constant(b, addr, byte_size, + intrin->num_components, bit_size); + } + break; + } + case nir_intrinsic_load_ray_launch_id: sysval = nir_channels(b, hotzone, 0xe); break; diff --git a/src/intel/compiler/brw_rt.h b/src/intel/compiler/brw_rt.h index eebb29b1f1b..f153257b6a4 100644 --- a/src/intel/compiler/brw_rt.h +++ b/src/intel/compiler/brw_rt.h @@ -31,6 +31,9 @@ extern "C" { /** Vulkan defines shaderGroupHandleSize = 32 */ #define BRW_RT_SBT_HANDLE_SIZE 32 +/** Offset after the RT dispatch globals at which "push" constants live */ +#define BRW_RT_PUSH_CONST_OFFSET 128 + /** Stride of the resume SBT */ #define BRW_BTD_RESUME_SBT_STRIDE 8