From 54dfc08b89a1e1f53afc1b6ef8b3ca164dcdab0f Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 22 Dec 2022 17:27:58 +0200 Subject: [PATCH] nir: add a new intrinsic to describe resources accessed on intel Intel HW has multiple ways to access resources like UBO/SSBO/images : - binding tables : a small ~240 heap of surfaces - bindless surfaces : a 64Mb heap of surfaces up to Gfx12+, 4Gb on Gfx12.5+ - surfaces : a 4Gb heap on Gfx12.5+ (mostly unused at the moment, only available through the LSC) For samplers, we have 2 options since Gfx11+ : - samplers indexed from the Dynamic State Heap (4Gb) - samplers indexed from the Bindless Sampler Heap (4Gb) Additionally our whole push constant promotion mechanism is based around binding table indices. This is problematic if you want to also promote to push constants things that would be accessed through the bindless heap. To solve this issue, we introduce a new intrinsic that will cary a block index that is not based off the binding table index nor the bindless table offset. We will also use this intrinsic to identify whether the buffer/surface index in load_ubo/load_ssbo/store_ssbo/etc... is relative to the binding table or the bindless heap. Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Part-of: --- src/compiler/nir/nir.h | 10 ++++++++++ src/compiler/nir/nir_divergence_analysis.c | 3 ++- src/compiler/nir/nir_intrinsics.py | 22 ++++++++++++++++++++++ src/compiler/nir/nir_lower_shader_calls.c | 3 +++ src/compiler/nir/nir_print.c | 18 ++++++++++++++++++ 5 files changed, 55 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 0bb04abec5f..a8b0bd365f7 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -255,6 +255,16 @@ typedef enum { nir_ray_query_value_intersection_triangle_vertex_positions } nir_ray_query_value; +/** + * Intel resource flags + */ +typedef enum { + nir_resource_intel_bindless = 1u << 0, + nir_resource_intel_pushable = 1u << 1, + nir_resource_intel_sampler = 1u << 2, + nir_resource_intel_non_uniform = 1u << 3, +} nir_resource_data_intel; + typedef union { bool b; float f32; diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index cf147fd256f..a4af184e8cf 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -446,7 +446,8 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) case nir_intrinsic_load_desc_set_address_intel: case nir_intrinsic_load_desc_set_dynamic_index_intel: case nir_intrinsic_load_global_constant_bounded: - case nir_intrinsic_load_global_constant_offset: { + case nir_intrinsic_load_global_constant_offset: + case nir_intrinsic_resource_intel: { unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; for (unsigned i = 0; i < num_srcs; i++) { if (instr->src[i].ssa->divergent) { diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index e56a9827d8d..a7da447df11 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -285,6 +285,12 @@ index("unsigned", "flags") # Logical operation of an atomic intrinsic index("nir_atomic_op", "atomic_op") +# Block identifier to push promotion +index("unsigned", "resource_block_intel") + +# Various flags describing the resource access +index("nir_resource_data_intel", "resource_access_intel") + intrinsic("nop", flags=[CAN_ELIMINATE]) intrinsic("convert_alu_types", dest_comp=0, src_comp=[0], @@ -1731,6 +1737,22 @@ system_value("simd_width_intel", 1) intrinsic("load_reloc_const_intel", dest_comp=1, bit_sizes=[32], indices=[PARAM_IDX], flags=[CAN_ELIMINATE, CAN_REORDER]) +# 1 component 32bit surface index that can be used for bindless or BTI heaps +# +# This intrinsic is used to figure out what UBOs accesses could be promoted to +# push constants. To allow promoting a load_ubo to push constants, we need to +# know that the surface & offset are constants. If we want to use the bindless +# heap for this we have to build the surface index with a pushed constant for +# the descriptor set which prevents us from doing a nir_src_is_const() check. +# With this intrinsic, we can just check the surface_index src with +# nir_src_is_const() and ignore set_offset. +# +# src[] = { set_offset, surface_index, array_index } +intrinsic("resource_intel", dest_comp=1, bit_sizes=[32], + src_comp=[1, 1, 1], + indices=[DESC_SET, BINDING, RESOURCE_ACCESS_INTEL, RESOURCE_BLOCK_INTEL], + flags=[CAN_ELIMINATE, CAN_REORDER]) + # 64-bit global address for a Vulkan descriptor set # src[0] = { set } intrinsic("load_desc_set_address_intel", dest_comp=1, bit_sizes=[64], diff --git a/src/compiler/nir/nir_lower_shader_calls.c b/src/compiler/nir/nir_lower_shader_calls.c index c30104fac65..0c9f10a0d2d 100644 --- a/src/compiler/nir/nir_lower_shader_calls.c +++ b/src/compiler/nir/nir_lower_shader_calls.c @@ -197,6 +197,9 @@ can_remat_instr(nir_instr *instr, struct sized_bitset *remat) */ return true; + case nir_intrinsic_resource_intel: + return nir_foreach_src(instr, src_is_in_bitset, remat); + default: return false; } diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index ed6e299dc5b..a7fc38c6ffa 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -1224,6 +1224,24 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) break; } + case NIR_INTRINSIC_RESOURCE_ACCESS_INTEL: { + fprintf(fp, "resource_intel="); + unsigned int modes = nir_intrinsic_resource_access_intel(instr); + while (modes) { + nir_resource_data_intel i = 1u << u_bit_scan(&modes); + switch (i) { + case nir_resource_intel_bindless: fprintf(fp, "bindless"); break; + case nir_resource_intel_pushable: fprintf(fp, "pushable"); break; + case nir_resource_intel_sampler: fprintf(fp, "sampler"); break; + case nir_resource_intel_non_uniform: + fprintf(fp, "non-uniform"); break; + default: fprintf(fp, "unknown"); break; + } + fprintf(fp, "%s", modes ? "|" : ""); + } + break; + } + default: { unsigned off = info->index_map[idx] - 1; fprintf(fp, "%s=%d", nir_intrinsic_index_names[idx], instr->const_index[off]);