nir: add a new intrinsic to describe resources accessed on intel

Intel HW has multiple ways to access resources like UBO/SSBO/images :

   - binding tables : a small ~240 heap of surfaces

   - bindless surfaces : a 64Mb heap of surfaces up to Gfx12+, 4Gb on Gfx12.5+

   - surfaces : a 4Gb heap on Gfx12.5+ (mostly unused at the moment,
     only available through the LSC)

For samplers, we have 2 options since Gfx11+ :

   - samplers indexed from the Dynamic State Heap (4Gb)

   - samplers indexed from the Bindless Sampler Heap (4Gb)

Additionally our whole push constant promotion mechanism is based
around binding table indices. This is problematic if you want to also
promote to push constants things that would be accessed through the
bindless heap.

To solve this issue, we introduce a new intrinsic that will cary a
block index that is not based off the binding table index nor the
bindless table offset.

We will also use this intrinsic to identify whether the buffer/surface
index in load_ubo/load_ssbo/store_ssbo/etc... is relative to the
binding table or the bindless heap.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21645>
This commit is contained in:
Lionel Landwerlin 2022-12-22 17:27:58 +02:00 committed by Marge Bot
parent b8790e9808
commit 54dfc08b89
5 changed files with 55 additions and 1 deletions

View file

@ -255,6 +255,16 @@ typedef enum {
nir_ray_query_value_intersection_triangle_vertex_positions
} nir_ray_query_value;
/**
* Intel resource flags
*/
typedef enum {
nir_resource_intel_bindless = 1u << 0,
nir_resource_intel_pushable = 1u << 1,
nir_resource_intel_sampler = 1u << 2,
nir_resource_intel_non_uniform = 1u << 3,
} nir_resource_data_intel;
typedef union {
bool b;
float f32;

View file

@ -446,7 +446,8 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
case nir_intrinsic_load_desc_set_address_intel:
case nir_intrinsic_load_desc_set_dynamic_index_intel:
case nir_intrinsic_load_global_constant_bounded:
case nir_intrinsic_load_global_constant_offset: {
case nir_intrinsic_load_global_constant_offset:
case nir_intrinsic_resource_intel: {
unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
for (unsigned i = 0; i < num_srcs; i++) {
if (instr->src[i].ssa->divergent) {

View file

@ -285,6 +285,12 @@ index("unsigned", "flags")
# Logical operation of an atomic intrinsic
index("nir_atomic_op", "atomic_op")
# Block identifier to push promotion
index("unsigned", "resource_block_intel")
# Various flags describing the resource access
index("nir_resource_data_intel", "resource_access_intel")
intrinsic("nop", flags=[CAN_ELIMINATE])
intrinsic("convert_alu_types", dest_comp=0, src_comp=[0],
@ -1731,6 +1737,22 @@ system_value("simd_width_intel", 1)
intrinsic("load_reloc_const_intel", dest_comp=1, bit_sizes=[32],
indices=[PARAM_IDX], flags=[CAN_ELIMINATE, CAN_REORDER])
# 1 component 32bit surface index that can be used for bindless or BTI heaps
#
# This intrinsic is used to figure out what UBOs accesses could be promoted to
# push constants. To allow promoting a load_ubo to push constants, we need to
# know that the surface & offset are constants. If we want to use the bindless
# heap for this we have to build the surface index with a pushed constant for
# the descriptor set which prevents us from doing a nir_src_is_const() check.
# With this intrinsic, we can just check the surface_index src with
# nir_src_is_const() and ignore set_offset.
#
# src[] = { set_offset, surface_index, array_index }
intrinsic("resource_intel", dest_comp=1, bit_sizes=[32],
src_comp=[1, 1, 1],
indices=[DESC_SET, BINDING, RESOURCE_ACCESS_INTEL, RESOURCE_BLOCK_INTEL],
flags=[CAN_ELIMINATE, CAN_REORDER])
# 64-bit global address for a Vulkan descriptor set
# src[0] = { set }
intrinsic("load_desc_set_address_intel", dest_comp=1, bit_sizes=[64],

View file

@ -197,6 +197,9 @@ can_remat_instr(nir_instr *instr, struct sized_bitset *remat)
*/
return true;
case nir_intrinsic_resource_intel:
return nir_foreach_src(instr, src_is_in_bitset, remat);
default:
return false;
}

View file

@ -1224,6 +1224,24 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
break;
}
case NIR_INTRINSIC_RESOURCE_ACCESS_INTEL: {
fprintf(fp, "resource_intel=");
unsigned int modes = nir_intrinsic_resource_access_intel(instr);
while (modes) {
nir_resource_data_intel i = 1u << u_bit_scan(&modes);
switch (i) {
case nir_resource_intel_bindless: fprintf(fp, "bindless"); break;
case nir_resource_intel_pushable: fprintf(fp, "pushable"); break;
case nir_resource_intel_sampler: fprintf(fp, "sampler"); break;
case nir_resource_intel_non_uniform:
fprintf(fp, "non-uniform"); break;
default: fprintf(fp, "unknown"); break;
}
fprintf(fp, "%s", modes ? "|" : "");
}
break;
}
default: {
unsigned off = info->index_map[idx] - 1;
fprintf(fp, "%s=%d", nir_intrinsic_index_names[idx], instr->const_index[off]);