From 172b3a6882cee1500f43c158c77efe428fa6cbc4 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 12 Jun 2025 10:06:51 -0400 Subject: [PATCH 01/32] nir: Add sampler and resource heap system values Reviewed-by: Lionel Landwerlin --- src/compiler/nir/nir.c | 8 ++++++++ src/compiler/nir/nir_divergence_analysis.c | 2 ++ src/compiler/nir/nir_intrinsics.py | 4 ++++ src/compiler/shader_enums.c | 2 ++ src/compiler/shader_enums.h | 4 ++++ 5 files changed, 20 insertions(+) diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 6e1122155c9..4565f249700 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -2566,6 +2566,10 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_warp_id_arm; case SYSTEM_VALUE_WARP_MAX_ID_ARM: return nir_intrinsic_load_warp_max_id_arm; + case SYSTEM_VALUE_SAMPLER_HEAP_PTR: + return nir_intrinsic_load_sampler_heap_ptr; + case SYSTEM_VALUE_RESOURCE_HEAP_PTR: + return nir_intrinsic_load_resource_heap_ptr; default: return nir_num_intrinsics; } @@ -2752,6 +2756,10 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) return SYSTEM_VALUE_WARP_ID_ARM; case nir_intrinsic_load_warp_max_id_arm: return SYSTEM_VALUE_WARP_MAX_ID_ARM; + case nir_intrinsic_load_sampler_heap_ptr: + return SYSTEM_VALUE_SAMPLER_HEAP_PTR; + case nir_intrinsic_load_resource_heap_ptr: + return SYSTEM_VALUE_RESOURCE_HEAP_PTR; default: UNREACHABLE("intrinsic doesn't produce a system value"); } diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index feafb307088..e5e9a01f92d 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -364,6 +364,8 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_ray_query_global_intel: case nir_intrinsic_load_call_return_address_amd: case nir_intrinsic_load_indirect_address_intel: + case nir_intrinsic_load_sampler_heap_ptr: + case nir_intrinsic_load_resource_heap_ptr: is_divergent = false; break; diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 9e22044e999..598b4f3a779 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1426,6 +1426,10 @@ intrinsic("cmat_insert", src_comp=[-1, 1, -1, 1]) intrinsic("cmat_copy", src_comp=[-1, -1]) intrinsic("cmat_transpose", src_comp=[-1, -1]) +# VK_KHR_descriptor_heap +system_value("sampler_heap_ptr", 1, bit_sizes=[64]) +system_value("resource_heap_ptr", 1, bit_sizes=[64]) + # Select an output vertex in a poly GS. Takes the stream-local vertex ID. intrinsic("select_vertex_poly", src_comp=[1], indices=[STREAM_ID]) diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c index 1d5a2bc2438..4e59f5b3e37 100644 --- a/src/compiler/shader_enums.c +++ b/src/compiler/shader_enums.c @@ -459,6 +459,8 @@ gl_system_value_name(gl_system_value sysval) ENUM(SYSTEM_VALUE_CORE_MAX_ID_ARM), ENUM(SYSTEM_VALUE_WARP_ID_ARM), ENUM(SYSTEM_VALUE_WARP_MAX_ID_ARM), + ENUM(SYSTEM_VALUE_SAMPLER_HEAP_PTR), + ENUM(SYSTEM_VALUE_RESOURCE_HEAP_PTR), }; STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX); return NAME(sysval); diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h index 66a77a72466..2b6f920395c 100644 --- a/src/compiler/shader_enums.h +++ b/src/compiler/shader_enums.h @@ -969,6 +969,10 @@ typedef enum SYSTEM_VALUE_WARP_ID_ARM, SYSTEM_VALUE_WARP_MAX_ID_ARM, + /* SPV_KHR_sampler_heap */ + SYSTEM_VALUE_SAMPLER_HEAP_PTR, + SYSTEM_VALUE_RESOURCE_HEAP_PTR, + SYSTEM_VALUE_MAX /**< Number of values */ } gl_system_value; From 59378a05708a170582d03dafeb858e573d88350a Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 19 Jun 2025 14:57:43 -0400 Subject: [PATCH 02/32] nir: Add intrinsics for descriptor heaps Reviewed-by: Lionel Landwerlin --- src/compiler/nir/nir_divergence_analysis.c | 14 ++++++++ src/compiler/nir/nir_gather_info.c | 5 ++- src/compiler/nir/nir_intrinsics.py | 16 +++++++++ src/compiler/nir/nir_lower_io.c | 9 ++--- .../nir/nir_lower_non_uniform_access.c | 10 ++++++ src/compiler/nir/nir_opt_non_uniform_access.c | 10 ++++++ src/compiler/nir/nir_print.c | 34 +++++++++++++++++++ 7 files changed, 93 insertions(+), 5 deletions(-) diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index e5e9a01f92d..bf77844c3c5 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -655,9 +655,11 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_image_samples_identical: case nir_intrinsic_image_deref_samples_identical: case nir_intrinsic_bindless_image_samples_identical: + case nir_intrinsic_image_heap_samples_identical: case nir_intrinsic_image_fragment_mask_load_amd: case nir_intrinsic_image_deref_fragment_mask_load_amd: case nir_intrinsic_bindless_image_fragment_mask_load_amd: + case nir_intrinsic_image_heap_fragment_mask_load_amd: is_divergent = (src_divergent(instr->src[0], state) && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) || src_divergent(instr->src[1], state) || @@ -676,9 +678,11 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_image_load: case nir_intrinsic_image_deref_load: case nir_intrinsic_bindless_image_load: + case nir_intrinsic_image_heap_load: case nir_intrinsic_image_sparse_load: case nir_intrinsic_image_deref_sparse_load: case nir_intrinsic_bindless_image_sparse_load: + case nir_intrinsic_image_heap_sparse_load: is_divergent = (src_divergent(instr->src[0], state) && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) || src_divergent(instr->src[1], state) || @@ -736,20 +740,27 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_image_levels: case nir_intrinsic_image_deref_levels: case nir_intrinsic_bindless_image_levels: + case nir_intrinsic_image_heap_levels: case nir_intrinsic_image_samples: case nir_intrinsic_image_deref_samples: case nir_intrinsic_bindless_image_samples: + case nir_intrinsic_image_heap_samples: case nir_intrinsic_image_size: case nir_intrinsic_image_deref_size: case nir_intrinsic_bindless_image_size: + case nir_intrinsic_image_heap_size: case nir_intrinsic_image_descriptor_amd: case nir_intrinsic_image_deref_descriptor_amd: + case nir_intrinsic_image_heap_descriptor_amd: case nir_intrinsic_bindless_image_descriptor_amd: case nir_intrinsic_strict_wqm_coord_amd: case nir_intrinsic_copy_deref: case nir_intrinsic_vulkan_resource_index: case nir_intrinsic_vulkan_resource_reindex: case nir_intrinsic_load_vulkan_descriptor: + case nir_intrinsic_load_heap_descriptor: + case nir_intrinsic_load_resource_heap_data: + case nir_intrinsic_global_addr_to_descriptor: case nir_intrinsic_load_input_attachment_target_pan: case nir_intrinsic_load_input_attachment_conv_pan: case nir_intrinsic_load_converted_mem_pan: @@ -763,6 +774,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_sample_positions_amd: case nir_intrinsic_image_deref_load_param_intel: case nir_intrinsic_image_load_raw_intel: + case nir_intrinsic_load_buffer_ptr_deref: case nir_intrinsic_get_ubo_size: case nir_intrinsic_load_ssbo_address: case nir_intrinsic_load_global_bounded: @@ -904,6 +916,8 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_image_atomic_swap: case nir_intrinsic_bindless_image_atomic: case nir_intrinsic_bindless_image_atomic_swap: + case nir_intrinsic_image_heap_atomic: + case nir_intrinsic_image_heap_atomic_swap: case nir_intrinsic_shared_atomic: case nir_intrinsic_shared_atomic_swap: case nir_intrinsic_shared_atomic_nv: diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index 0bab69896c3..962030ae982 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -920,7 +920,10 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader) instr->intrinsic == nir_intrinsic_bindless_image_size || instr->intrinsic == nir_intrinsic_bindless_image_samples || instr->intrinsic == nir_intrinsic_get_ubo_size || - instr->intrinsic == nir_intrinsic_get_ssbo_size) + instr->intrinsic == nir_intrinsic_get_ssbo_size || + instr->intrinsic == nir_intrinsic_image_heap_levels || + instr->intrinsic == nir_intrinsic_image_heap_size || + instr->intrinsic == nir_intrinsic_image_heap_samples) shader->info.uses_resource_info_query = true; break; } diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 598b4f3a779..6c856d81388 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -234,6 +234,9 @@ index("unsigned", "offset_shift_nv") # The Vulkan descriptor type for a vulkan_resource_[re]index intrinsic. index("unsigned", "desc_type") +# The Vulkan descriptor type according to VkSpirvResourceTypeFlagsKHR. +index("unsigned", "resource_type") + # The nir_alu_type of input data to a store or conversion index("nir_alu_type", "src_type") @@ -806,6 +809,8 @@ def image(name, src_comp=[], extra_indices=[], **kwargs): indices=[IMAGE_DIM, IMAGE_ARRAY, FORMAT, ACCESS, RANGE_BASE] + extra_indices, **kwargs) intrinsic("bindless_image_" + name, src_comp=[-1] + src_comp, indices=[IMAGE_DIM, IMAGE_ARRAY, FORMAT, ACCESS] + extra_indices, **kwargs) + intrinsic("image_heap_" + name, src_comp=[1] + src_comp, + indices=[IMAGE_DIM, IMAGE_ARRAY, FORMAT, ACCESS] + extra_indices, **kwargs) image("load", src_comp=[4, 1, 1], extra_indices=[DEST_TYPE], dest_comp=0, flags=[CAN_ELIMINATE]) image("sparse_load", src_comp=[4, 1, 1], extra_indices=[DEST_TYPE], dest_comp=0, flags=[CAN_ELIMINATE]) @@ -1429,6 +1434,17 @@ intrinsic("cmat_transpose", src_comp=[-1, -1]) # VK_KHR_descriptor_heap system_value("sampler_heap_ptr", 1, bit_sizes=[64]) system_value("resource_heap_ptr", 1, bit_sizes=[64]) +# src[] = { deref }. +load("buffer_ptr_deref", [-1], [ACCESS, RESOURCE_TYPE], + flags=[CAN_ELIMINATE, CAN_REORDER]) +# src[] = { offset }. +load("heap_descriptor", [1], [RESOURCE_TYPE], [CAN_ELIMINATE, CAN_REORDER]) +# src[] = { offset }. +load("resource_heap_data", [1], [ALIGN_MUL, ALIGN_OFFSET], + flags=[CAN_ELIMINATE, CAN_REORDER]) +# src[] = { addr }. +intrinsic("global_addr_to_descriptor", src_comp=[1], dest_comp=0, + indices=[RESOURCE_TYPE], flags=[CAN_ELIMINATE, CAN_REORDER]) # Select an output vertex in a poly GS. Takes the stream-local vertex ID. intrinsic("select_vertex_poly", src_comp=[1], indices=[STREAM_ID]) diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index cbdedca1b77..3fff72cff00 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -1094,10 +1094,11 @@ nir_get_io_offset_src(nir_intrinsic_instr *instr) return idx >= 0 ? &instr->src[idx] : NULL; } -#define IMG_CASE(name) \ - case nir_intrinsic_image_##name: \ - case nir_intrinsic_image_deref_##name: \ - case nir_intrinsic_bindless_image_##name +#define IMG_CASE(name) \ + case nir_intrinsic_image_##name: \ + case nir_intrinsic_image_deref_##name: \ + case nir_intrinsic_bindless_image_##name: \ + case nir_intrinsic_image_heap_##name /** * Return the index or handle source number for a load/store intrinsic or -1 diff --git a/src/compiler/nir/nir_lower_non_uniform_access.c b/src/compiler/nir/nir_lower_non_uniform_access.c index 71705e397e0..94495bc1e05 100644 --- a/src/compiler/nir/nir_lower_non_uniform_access.c +++ b/src/compiler/nir/nir_lower_non_uniform_access.c @@ -424,6 +424,16 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl, case nir_intrinsic_image_deref_atomic_swap: case nir_intrinsic_image_deref_samples_identical: case nir_intrinsic_image_deref_fragment_mask_load_amd: + case nir_intrinsic_image_heap_load: + case nir_intrinsic_image_heap_sparse_load: + case nir_intrinsic_image_heap_store: + case nir_intrinsic_image_heap_atomic: + case nir_intrinsic_image_heap_atomic_swap: + case nir_intrinsic_image_heap_levels: + case nir_intrinsic_image_heap_size: + case nir_intrinsic_image_heap_samples: + case nir_intrinsic_image_heap_samples_identical: + case nir_intrinsic_image_heap_fragment_mask_load_amd: if ((options->types & nir_lower_non_uniform_image_access) && lower_non_uniform_access_intrin(&state, intrin, 0, nir_lower_non_uniform_image_access)) progress = true; diff --git a/src/compiler/nir/nir_opt_non_uniform_access.c b/src/compiler/nir/nir_opt_non_uniform_access.c index 27fdc4daa04..a5819d5bb80 100644 --- a/src/compiler/nir/nir_opt_non_uniform_access.c +++ b/src/compiler/nir/nir_opt_non_uniform_access.c @@ -87,6 +87,16 @@ is_image_query_intrinsic(nir_intrinsic_instr *intrin) case nir_intrinsic_image_deref_size: case nir_intrinsic_image_deref_samples: case nir_intrinsic_image_deref_levels: + case nir_intrinsic_image_deref_fragment_mask_load_amd: + case nir_intrinsic_image_heap_load: + case nir_intrinsic_image_heap_sparse_load: + case nir_intrinsic_image_heap_store: + case nir_intrinsic_image_heap_atomic: + case nir_intrinsic_image_heap_atomic_swap: + case nir_intrinsic_image_heap_size: + case nir_intrinsic_image_heap_samples: + case nir_intrinsic_image_heap_levels: + case nir_intrinsic_image_heap_fragment_mask_load_amd: return true; default: diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 0471fb1ea7d..06c8c5b24e7 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -1238,6 +1238,33 @@ vulkan_descriptor_type_name(VkDescriptorType type) } } +static const char * +vk_spirv_resource_type_name(VkSpirvResourceTypeFlagBitsEXT type) +{ + switch (type) { + case VK_SPIRV_RESOURCE_TYPE_SAMPLER_BIT_EXT: + return "sampler"; + case VK_SPIRV_RESOURCE_TYPE_SAMPLED_IMAGE_BIT_EXT: + return "texture"; + case VK_SPIRV_RESOURCE_TYPE_READ_ONLY_IMAGE_BIT_EXT: + return "RO-image"; + case VK_SPIRV_RESOURCE_TYPE_READ_WRITE_IMAGE_BIT_EXT: + return "RW-image"; + case VK_SPIRV_RESOURCE_TYPE_COMBINED_SAMPLED_IMAGE_BIT_EXT: + return "texture+sampler"; + case VK_SPIRV_RESOURCE_TYPE_UNIFORM_BUFFER_BIT_EXT: + return "UBO"; + case VK_SPIRV_RESOURCE_TYPE_READ_ONLY_STORAGE_BUFFER_BIT_EXT: + return "RO-SSBO"; + case VK_SPIRV_RESOURCE_TYPE_READ_WRITE_STORAGE_BUFFER_BIT_EXT: + return "RW-SSBO"; + case VK_SPIRV_RESOURCE_TYPE_ACCELERATION_STRUCTURE_BIT_EXT: + return "accel-struct"; + default: + return "unknown"; + } +} + static void print_alu_type(nir_alu_type type, print_state *state) { @@ -1425,6 +1452,13 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) break; } + case NIR_INTRINSIC_RESOURCE_TYPE: { + VkSpirvResourceTypeFlagBitsEXT res_type = + nir_intrinsic_resource_type(instr); + fprintf(fp, "resource_type=%s", vk_spirv_resource_type_name(res_type)); + break; + } + case NIR_INTRINSIC_SRC_TYPE: { fprintf(fp, "src_type="); print_alu_type(nir_intrinsic_src_type(instr), state); From 5e01059829e04c33c853e8fc7d3c031967970e68 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 19 Jun 2025 14:58:14 -0400 Subject: [PATCH 03/32] nir: Add tex sources for descriptor heaps We also add a new boolean which indicates that the texture op uses an embedded sampler. Reviewed-by: Lionel Landwerlin --- src/compiler/nir/nir.c | 2 ++ src/compiler/nir/nir.h | 12 ++++++++++++ src/compiler/nir/nir_clone.c | 1 + src/compiler/nir/nir_gather_info.c | 4 ++++ src/compiler/nir/nir_instr_set.c | 1 + src/compiler/nir/nir_lower_non_uniform_access.c | 2 ++ src/compiler/nir/nir_lower_tex.c | 4 ++++ src/compiler/nir/nir_opt_non_uniform_access.c | 2 ++ src/compiler/nir/nir_print.c | 6 ++++++ src/compiler/nir/nir_serialize.c | 5 ++++- src/compiler/shader_info.h | 3 +++ 11 files changed, 41 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 4565f249700..4e608be35f7 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -3579,6 +3579,8 @@ nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src) case nir_tex_src_texture_2_handle: case nir_tex_src_sampler_2_handle: case nir_tex_src_block_size: + case nir_tex_src_texture_heap_offset: + case nir_tex_src_sampler_heap_offset: return nir_type_uint; case nir_num_tex_src_types: diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index fb061c88d6d..4d9ba32747d 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2362,6 +2362,12 @@ typedef enum nir_tex_src_type { */ nir_tex_src_sampler_handle, + /** Texture descriptor heap offset (in bytes) */ + nir_tex_src_texture_heap_offset, + + /** Sampler descriptor heap offset (in bytes) */ + nir_tex_src_sampler_heap_offset, + /** Tex src intrinsic * * This is an intrinsic used before function inlining i.e. before we know @@ -2593,6 +2599,12 @@ typedef struct nir_tex_instr { */ bool sampler_non_uniform; + /** True if this texture instruction uses an embedded sampler. + * + * In this case, sampler_index is the index in embedded sampler table. + */ + bool embedded_sampler; + /** True if the offset is not dynamically uniform */ bool offset_non_uniform; diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c index 65297e77561..0aa7e95af2e 100644 --- a/src/compiler/nir/nir_clone.c +++ b/src/compiler/nir/nir_clone.c @@ -423,6 +423,7 @@ clone_tex(clone_state *state, const nir_tex_instr *tex) ntex->texture_non_uniform = tex->texture_non_uniform; ntex->sampler_non_uniform = tex->sampler_non_uniform; ntex->offset_non_uniform = tex->offset_non_uniform; + ntex->embedded_sampler = tex->embedded_sampler; ntex->backend_flags = tex->backend_flags; diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index 962030ae982..4d04549c7f0 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -944,6 +944,9 @@ gather_tex_info(nir_tex_instr *instr, nir_shader *shader) nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle) != -1) shader->info.uses_bindless = true; + if (instr->embedded_sampler) + shader->info.uses_embedded_samplers = true; + if (!nir_tex_instr_is_query(instr) && (instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS || instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS)) @@ -1026,6 +1029,7 @@ nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint) shader->info.bit_sizes_float = 0; shader->info.bit_sizes_int = 0; shader->info.uses_bindless = false; + shader->info.uses_embedded_samplers = false; nir_foreach_variable_with_modes(var, shader, nir_var_image | nir_var_uniform) { if (var->data.bindless) diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c index 871699408c3..e45619f8ad1 100644 --- a/src/compiler/nir/nir_instr_set.c +++ b/src/compiler/nir/nir_instr_set.c @@ -278,6 +278,7 @@ pack_tex(const nir_tex_instr *instr) PACK(instr->skip_helpers, 1); PACK(instr->texture_non_uniform, 1); PACK(instr->sampler_non_uniform, 1); + PACK(instr->embedded_sampler, 1); PACK(instr->offset_non_uniform, 1); #undef PACK diff --git a/src/compiler/nir/nir_lower_non_uniform_access.c b/src/compiler/nir/nir_lower_non_uniform_access.c index 94495bc1e05..9ed656b6f49 100644 --- a/src/compiler/nir/nir_lower_non_uniform_access.c +++ b/src/compiler/nir/nir_lower_non_uniform_access.c @@ -245,6 +245,7 @@ lower_non_uniform_tex_access(struct nu_state *state, nir_tex_instr *tex, case nir_tex_src_texture_handle: case nir_tex_src_texture_deref: case nir_tex_src_texture_2_deref: + case nir_tex_src_texture_heap_offset: if (!tex->texture_non_uniform) continue; if (!(opts->types & base_access_type)) @@ -257,6 +258,7 @@ lower_non_uniform_tex_access(struct nu_state *state, nir_tex_instr *tex, case nir_tex_src_sampler_handle: case nir_tex_src_sampler_deref: case nir_tex_src_sampler_2_deref: + case nir_tex_src_sampler_heap_offset: if (!tex->sampler_non_uniform) continue; if (!(opts->types & base_access_type)) diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c index ed1ad705ef0..c2d8f7c383b 100644 --- a/src/compiler/nir/nir_lower_tex.c +++ b/src/compiler/nir/nir_lower_tex.c @@ -376,6 +376,7 @@ sample_plane(nir_builder *b, nir_tex_instr *tex, int plane, plane_tex->dest_type = nir_type_float | tex->def.bit_size; plane_tex->coord_components = 2; + plane_tex->embedded_sampler = tex->embedded_sampler; plane_tex->texture_index = tex->texture_index; plane_tex->sampler_index = tex->sampler_index; plane_tex->can_speculate = tex->can_speculate; @@ -937,6 +938,7 @@ lower_tex_to_txd(nir_builder *b, nir_tex_instr *tex) txd->sampler_dim = tex->sampler_dim; txd->dest_type = tex->dest_type; txd->coord_components = tex->coord_components; + txd->embedded_sampler = tex->embedded_sampler; txd->texture_index = tex->texture_index; txd->sampler_index = tex->sampler_index; txd->is_array = tex->is_array; @@ -980,6 +982,7 @@ lower_txb_to_txl(nir_builder *b, nir_tex_instr *tex) txl->sampler_dim = tex->sampler_dim; txl->dest_type = tex->dest_type; txl->coord_components = tex->coord_components; + txl->embedded_sampler = tex->embedded_sampler; txl->texture_index = tex->texture_index; txl->sampler_index = tex->sampler_index; txl->is_array = tex->is_array; @@ -1237,6 +1240,7 @@ lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex) tex_copy->is_gather_implicit_lod = tex->is_gather_implicit_lod; tex_copy->component = tex->component; tex_copy->dest_type = tex->dest_type; + tex_copy->embedded_sampler = tex->embedded_sampler; tex_copy->texture_index = tex->texture_index; tex_copy->sampler_index = tex->sampler_index; tex_copy->backend_flags = tex->backend_flags; diff --git a/src/compiler/nir/nir_opt_non_uniform_access.c b/src/compiler/nir/nir_opt_non_uniform_access.c index a5819d5bb80..ffe58e499de 100644 --- a/src/compiler/nir/nir_opt_non_uniform_access.c +++ b/src/compiler/nir/nir_opt_non_uniform_access.c @@ -219,6 +219,7 @@ opt_non_uniform_tex_access(nir_tex_instr *tex) case nir_tex_src_texture_offset: case nir_tex_src_texture_handle: case nir_tex_src_texture_deref: + case nir_tex_src_texture_heap_offset: if (tex->texture_non_uniform && !nir_src_is_divergent(&tex->src[i].src)) { tex->texture_non_uniform = false; progress = true; @@ -228,6 +229,7 @@ opt_non_uniform_tex_access(nir_tex_instr *tex) case nir_tex_src_sampler_offset: case nir_tex_src_sampler_handle: case nir_tex_src_sampler_deref: + case nir_tex_src_sampler_heap_offset: if (tex->sampler_non_uniform && !nir_src_is_divergent(&tex->src[i].src)) { tex->sampler_non_uniform = false; progress = true; diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 06c8c5b24e7..5104e164569 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -2112,6 +2112,12 @@ print_tex_instr(nir_tex_instr *instr, print_state *state) case nir_tex_src_box_size: fprintf(fp, "(box_size)"); break; + case nir_tex_src_texture_heap_offset: + fprintf(fp, "(texture_heap_offset)"); + break; + case nir_tex_src_sampler_heap_offset: + fprintf(fp, "(sampler_heap_offset)"); + break; case nir_tex_src_plane: fprintf(fp, "(plane)"); break; diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c index e4248c78d86..01bd11023e6 100644 --- a/src/compiler/nir/nir_serialize.c +++ b/src/compiler/nir/nir_serialize.c @@ -1374,10 +1374,11 @@ union packed_tex_data { unsigned texture_non_uniform : 1; unsigned sampler_non_uniform : 1; unsigned offset_non_uniform : 1; + unsigned embedded_sampler : 1; unsigned array_is_lowered_cube : 1; unsigned is_gather_implicit_lod : 1; unsigned can_speculate : 1; - unsigned unused : 3; /* Mark unused for valgrind. */ + unsigned unused : 2; /* Mark unused for valgrind. */ } u; }; @@ -1415,6 +1416,7 @@ write_tex(write_ctx *ctx, const nir_tex_instr *tex) .u.texture_non_uniform = tex->texture_non_uniform, .u.sampler_non_uniform = tex->sampler_non_uniform, .u.offset_non_uniform = tex->offset_non_uniform, + .u.embedded_sampler = tex->embedded_sampler, .u.array_is_lowered_cube = tex->array_is_lowered_cube, .u.is_gather_implicit_lod = tex->is_gather_implicit_lod, .u.can_speculate = tex->can_speculate, @@ -1456,6 +1458,7 @@ read_tex(read_ctx *ctx, union packed_instr header) tex->texture_non_uniform = packed.u.texture_non_uniform; tex->sampler_non_uniform = packed.u.sampler_non_uniform; tex->offset_non_uniform = packed.u.offset_non_uniform; + tex->embedded_sampler = packed.u.embedded_sampler; tex->array_is_lowered_cube = packed.u.array_is_lowered_cube; tex->is_gather_implicit_lod = packed.u.is_gather_implicit_lod; tex->can_speculate = packed.u.can_speculate; diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h index df32b4e6938..65e95699345 100644 --- a/src/compiler/shader_info.h +++ b/src/compiler/shader_info.h @@ -276,6 +276,9 @@ typedef struct shader_info { /* Whether ARB_bindless_texture ops or variables are used */ bool uses_bindless : 1; + /* Number of embedded samplers used by this shader */ + bool uses_embedded_samplers : 1; + /** * Shared memory types have explicit layout set. Used for * SPV_KHR_workgroup_storage_explicit_layout. From c3e1448d22d2e0920ecb5deb96affc0936075e26 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 3 Feb 2026 17:28:02 +0200 Subject: [PATCH 04/32] nir: improve deref_instr_get_variable So we can get through all the casting inserted by heaps. Signed-off-by: Lionel Landwerlin --- src/compiler/nir/nir.h | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 4d9ba32747d..24eb2c73ae4 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1832,19 +1832,6 @@ nir_deref_instr_parent(const nir_deref_instr *instr) return nir_src_as_deref(instr->parent); } -static inline nir_variable * -nir_deref_instr_get_variable(const nir_deref_instr *instr) -{ - while (instr->deref_type != nir_deref_type_var) { - if (instr->deref_type == nir_deref_type_cast) - return NULL; - - instr = nir_deref_instr_parent(instr); - } - - return instr->var; -} - bool nir_deref_instr_has_indirect(nir_deref_instr *instr); bool nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr); @@ -1972,12 +1959,6 @@ typedef struct nir_intrinsic_instr { nir_src src[]; } nir_intrinsic_instr; -static inline nir_variable * -nir_intrinsic_get_var(const nir_intrinsic_instr *intrin, unsigned i) -{ - return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i])); -} - typedef enum { /* Memory ordering. */ NIR_MEMORY_ACQUIRE = 1 << 0, @@ -4124,6 +4105,26 @@ nir_shader_get_function_for_name(const nir_shader *shader, const char *name) return NULL; } +static inline nir_variable * +nir_deref_instr_get_variable(const nir_deref_instr *instr) +{ + while (instr->deref_type != nir_deref_type_var) { + if (instr->deref_type == nir_deref_type_cast && + !nir_def_is_deref(instr->parent.ssa)) + return NULL; + + instr = nir_deref_instr_parent(instr); + } + + return instr->var; +} + +static inline nir_variable * +nir_intrinsic_get_var(const nir_intrinsic_instr *intrin, unsigned i) +{ + return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i])); +} + /* * After all functions are forcibly inlined, these passes remove redundant * functions from a shader and library respectively. From b5b6f10c20d69112b38e295c54af9cc1fa82ff29 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 2 Jul 2025 13:56:47 -0400 Subject: [PATCH 05/32] spirv: Improve the error message for invalid SPIR-V sections Reviewed-by: Lionel Landwerlin --- src/compiler/spirv/spirv_to_nir.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 93bacdf07a4..42be1595730 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -6010,11 +6010,8 @@ vtn_handle_execution_mode_id(struct vtn_builder *b, struct vtn_value *entry_poin } static bool -vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) +spv_op_is_preamble(SpvOp opcode) { - vtn_set_instruction_result_type(b, opcode, w, count); - switch (opcode) { case SpvOpSource: case SpvOpSourceContinued: @@ -6036,9 +6033,24 @@ vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpGroupMemberDecorate: case SpvOpDecorateString: case SpvOpMemberDecorateString: - vtn_fail("Invalid opcode types and variables section"); - break; + return true; + default: + return false; + } +} + +static bool +vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + vtn_fail_if(spv_op_is_preamble(opcode), + "Invalid opcode in the types and variables section: %s", + spirv_op_to_string(opcode)); + + vtn_set_instruction_result_type(b, opcode, w, count); + + switch (opcode) { case SpvOpTypeVoid: case SpvOpTypeBool: case SpvOpTypeInt: From 8c049c5338e0e2c09032036a3b41d0ab369bb2fa Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 19 Jun 2025 14:53:55 -0400 Subject: [PATCH 06/32] spirv: Add new SPV_KHR_descriptor_heap Builtins Reviewed-by: Lionel Landwerlin --- src/compiler/spirv/vtn_variables.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index 3e5d0ca2f00..cf76dc3b14f 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -1339,6 +1339,15 @@ vtn_get_builtin_location(struct vtn_builder *b, set_mode_system_value(b, mode); break; + case SpvBuiltInSamplerHeapEXT: + *location = SYSTEM_VALUE_SAMPLER_HEAP_PTR; + set_mode_system_value(b, mode); + break; + case SpvBuiltInResourceHeapEXT: + *location = SYSTEM_VALUE_RESOURCE_HEAP_PTR; + set_mode_system_value(b, mode); + break; + default: vtn_fail("Unsupported builtin: %s (%u)", spirv_builtin_to_string(builtin), builtin); From 61a416b0efbddadf808ff1f1ffbda63b069759a4 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 19 Jun 2025 15:28:09 -0400 Subject: [PATCH 07/32] spirv: Handle OpTypeBufferKHR Reviewed-by: Lionel Landwerlin --- src/compiler/spirv/spirv_to_nir.c | 21 +++++++++++++++++++++ src/compiler/spirv/vtn_private.h | 3 ++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 42be1595730..aed4f1eb0ba 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -465,6 +465,7 @@ vtn_base_type_to_string(enum vtn_base_type t) CASE(function); CASE(event); CASE(cooperative_matrix); + CASE(buffer); } #undef CASE UNREACHABLE("unknown base type"); @@ -1269,6 +1270,9 @@ vtn_types_compatible(struct vtn_builder *b, case vtn_base_type_cooperative_matrix: return t1->type == t2->type; + case vtn_base_type_buffer: + return t1->storage_class == t2->storage_class; + case vtn_base_type_array: return t1->length == t2->length && vtn_types_compatible(b, t1->array_element, t2->array_element); @@ -1330,6 +1334,7 @@ vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) case vtn_base_type_accel_struct: case vtn_base_type_ray_query: case vtn_base_type_cooperative_matrix: + case vtn_base_type_buffer: /* Nothing more to do */ break; @@ -2438,6 +2443,21 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, val->type->type = b->shader->info.cs.ptr_size == 64 ? glsl_int64_t_type() : glsl_int_type(); break; + case SpvOpTypeBufferEXT: { + SpvStorageClass storage_class = w[2]; + vtn_fail_if(storage_class != SpvStorageClassUniform && + storage_class != SpvStorageClassStorageBuffer, + "Storage Class must be Uniform or StorageBuffer."); + + const nir_address_format addr_format = vtn_mode_to_address_format(b, + vtn_storage_class_to_mode(b, storage_class, NULL, NULL)); + + val->type->base_type = vtn_base_type_buffer; + val->type->storage_class = storage_class; + val->type->type = nir_address_format_to_glsl_type(addr_format); + break; + } + case SpvOpTypeDeviceEvent: case SpvOpTypeReserveId: case SpvOpTypeQueue: @@ -6076,6 +6096,7 @@ vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpTypeRayQueryKHR: case SpvOpTypeCooperativeMatrixKHR: case SpvOpTypeUntypedPointerKHR: + case SpvOpTypeBufferEXT: vtn_handle_type(b, opcode, w, count); break; diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h index d1d465b2893..e33f798b0f2 100644 --- a/src/compiler/spirv/vtn_private.h +++ b/src/compiler/spirv/vtn_private.h @@ -297,6 +297,7 @@ enum vtn_base_type { vtn_base_type_function, vtn_base_type_event, vtn_base_type_cooperative_matrix, + vtn_base_type_buffer, }; struct vtn_type { @@ -364,7 +365,7 @@ struct vtn_type { bool packed:1; }; - /* Members for pointer types */ + /* Members for pointer and buffer types */ struct { /* For regular pointers, the vtn_type of the object pointed to; * for untyped pointers it must be NULL. From 9b080b6c0d7b951dd8995cbc01872194f714990e Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 19 Jun 2025 15:36:41 -0400 Subject: [PATCH 08/32] spirv,vulkan: Implement OpConstantSizeOfKHR Reviewed-by: Lionel Landwerlin --- src/compiler/spirv/nir_spirv.h | 10 ++++++++++ src/compiler/spirv/spirv_to_nir.c | 32 +++++++++++++++++++++++++++++++ src/vulkan/runtime/vk_nir.c | 13 +++++++++++++ 3 files changed, 55 insertions(+) diff --git a/src/compiler/spirv/nir_spirv.h b/src/compiler/spirv/nir_spirv.h index c7096197dd2..79549bd1760 100644 --- a/src/compiler/spirv/nir_spirv.h +++ b/src/compiler/spirv/nir_spirv.h @@ -111,6 +111,16 @@ struct spirv_to_nir_options { */ uint32_t min_ssbo_alignment; + /* These must be identical to the values set in + * VkPhysicalDeviceDescriptorHeapPropertiesEXT + */ + uint32_t sampler_descriptor_size; + uint32_t sampler_descriptor_alignment; + uint32_t image_descriptor_size; + uint32_t image_descriptor_alignment; + uint32_t buffer_descriptor_size; + uint32_t buffer_descriptor_alignment; + const nir_shader *clc_shader; struct { diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index aed4f1eb0ba..fe09ecb437a 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -3034,6 +3034,37 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, val->is_null_constant = true; break; + case SpvOpConstantSizeOfEXT: { + vtn_fail_if(val->type->type != glsl_uint_type() && + val->type->type != glsl_int_type(), + "Result Type must be a 32-bit integer type scalar."); + + struct vtn_type *type = vtn_get_type(b, w[3]); + switch (type->base_type) { + case vtn_base_type_image: + val->constant->values[0].u32 = + align(b->options->image_descriptor_size, + b->options->image_descriptor_alignment); + break; + case vtn_base_type_sampler: + val->constant->values[0].u32 = + align(b->options->sampler_descriptor_size, + b->options->sampler_descriptor_alignment); + break; + case vtn_base_type_accel_struct: + case vtn_base_type_buffer: + val->constant->values[0].u32 = + align(b->options->buffer_descriptor_size, + b->options->buffer_descriptor_alignment); + break; + default: + vtn_fail("Type must be an OpTypeBufferKHR, OpTypeImage, " + "OpTypeAccelerationStructureKHR, OpTypeTensorARM, or " + "OpTypeSampler instruction."); + } + break; + } + default: vtn_fail_with_opcode("Unhandled opcode", opcode); } @@ -6112,6 +6143,7 @@ vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpSpecConstantComposite: case SpvOpSpecConstantCompositeReplicateEXT: case SpvOpSpecConstantOp: + case SpvOpConstantSizeOfEXT: vtn_handle_constant(b, opcode, w, count); break; diff --git a/src/vulkan/runtime/vk_nir.c b/src/vulkan/runtime/vk_nir.c index 391b07616e7..457a084e735 100644 --- a/src/vulkan/runtime/vk_nir.c +++ b/src/vulkan/runtime/vk_nir.c @@ -136,6 +136,19 @@ vk_spirv_to_nir(struct vk_device *device, spirv_options_local.debug.func = spirv_nir_debug; spirv_options_local.debug.private_data = (void *)device; + spirv_options_local.sampler_descriptor_size = + device->physical->properties.samplerDescriptorSize; + spirv_options_local.sampler_descriptor_alignment = + device->physical->properties.samplerDescriptorAlignment; + spirv_options_local.image_descriptor_size = + device->physical->properties.imageDescriptorSize; + spirv_options_local.image_descriptor_alignment = + device->physical->properties.imageDescriptorAlignment; + spirv_options_local.buffer_descriptor_size = + device->physical->properties.bufferDescriptorSize; + spirv_options_local.buffer_descriptor_alignment = + device->physical->properties.bufferDescriptorAlignment; + uint32_t num_spec_entries = 0; struct nir_spirv_specialization *spec_entries = vk_spec_info_to_nir_spirv(spec_info, &num_spec_entries); From d897061fd1048531517c9753ed9a6520cd3e19a2 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 19 Jun 2025 15:49:46 -0400 Subject: [PATCH 09/32] spirv: Handle ArrayStrideIdKHR and OffsetIdKHR decorations Reviewed-by: Lionel Landwerlin --- src/compiler/spirv/spirv_to_nir.c | 14 ++++++++++++++ src/compiler/spirv/vtn_variables.c | 1 + 2 files changed, 15 insertions(+) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index fe09ecb437a..be87c196a88 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -1128,6 +1128,7 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, case SpvOpDecorate: case SpvOpDecorateId: case SpvOpMemberDecorate: + case SpvOpMemberDecorateIdEXT: case SpvOpDecorateString: case SpvOpMemberDecorateString: case SpvOpExecutionMode: @@ -1142,6 +1143,7 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, dec->scope = VTN_DEC_DECORATION; break; case SpvOpMemberDecorate: + case SpvOpMemberDecorateIdEXT: case SpvOpMemberDecorateString: dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++); vtn_fail_if(dec->scope < VTN_DEC_STRUCT_MEMBER0, /* overflow */ @@ -1530,6 +1532,8 @@ array_stride_decoration_cb(struct vtn_builder *b, vtn_fail_if(dec->operands[0] == 0, "ArrayStride must be non-zero"); type->stride = dec->operands[0]; } + } else if (dec->decoration == SpvDecorationArrayStrideIdEXT) { + type->stride = vtn_constant_uint(b, dec->operands[0]); } } @@ -1595,6 +1599,12 @@ struct_member_decoration_cb(struct vtn_builder *b, ctx->type->offsets[member] = dec->operands[0]; ctx->fields[member].offset = dec->operands[0]; break; + case SpvDecorationOffsetIdEXT: { + uint32_t offset = vtn_constant_uint(b, dec->operands[0]); + ctx->type->offsets[member] = offset; + ctx->fields[member].offset = offset; + break; + } case SpvDecorationMatrixStride: /* Handled as a second pass */ break; @@ -1775,6 +1785,7 @@ type_decoration_cb(struct vtn_builder *b, switch (dec->decoration) { case SpvDecorationArrayStride: + case SpvDecorationArrayStrideIdEXT: vtn_assert(type->base_type == vtn_base_type_array || type->base_type == vtn_base_type_pointer); break; @@ -1813,6 +1824,7 @@ type_decoration_cb(struct vtn_builder *b, case SpvDecorationXfbBuffer: case SpvDecorationXfbStride: case SpvDecorationUserSemantic: + case SpvDecorationOffsetIdEXT: vtn_warn("Decoration only allowed for struct members: %s", spirv_decoration_to_string(dec->decoration)); break; @@ -5512,6 +5524,7 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpDecorate: case SpvOpDecorateId: case SpvOpMemberDecorate: + case SpvOpMemberDecorateIdEXT: case SpvOpGroupDecorate: case SpvOpGroupMemberDecorate: case SpvOpDecorateString: @@ -6080,6 +6093,7 @@ spv_op_is_preamble(SpvOp opcode) case SpvOpDecorate: case SpvOpDecorateId: case SpvOpMemberDecorate: + case SpvOpMemberDecorateIdEXT: case SpvOpGroupDecorate: case SpvOpGroupMemberDecorate: case SpvOpDecorateString: diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index cf76dc3b14f..ea316f8480e 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -1468,6 +1468,7 @@ apply_var_decoration(struct vtn_builder *b, case SpvDecorationArrayStride: case SpvDecorationGLSLShared: case SpvDecorationGLSLPacked: + case SpvDecorationArrayStrideIdEXT: break; /* These can apply to a type but we don't care about them */ case SpvDecorationBinding: From 7f21d52fc2c1f06a0ca0d2665ae7e52c3c2a14be Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 31 Jul 2025 17:00:39 -0400 Subject: [PATCH 10/32] spirv: Handle OpBufferPointerKHR --- src/compiler/spirv/spirv_to_nir.c | 1 + src/compiler/spirv/vtn_variables.c | 88 ++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index be87c196a88..f2959e0263d 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -6718,6 +6718,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpUntypedInBoundsAccessChainKHR: case SpvOpUntypedInBoundsPtrAccessChainKHR: case SpvOpUntypedArrayLengthKHR: + case SpvOpBufferPointerEXT: vtn_handle_variables(b, opcode, w, count); break; diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index ea316f8480e..e33fc015b01 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -2703,6 +2703,38 @@ vtn_cast_pointer(struct vtn_builder *b, struct vtn_pointer *p, return casted; } +static void +buffer_ptr_decoration_cb(struct vtn_builder *b, struct vtn_value *val, + int member, const struct vtn_decoration *dec, + void *void_access) +{ + enum gl_access_qualifier *access = void_access; + vtn_assert(member == -1); + + switch (dec->decoration) { + case SpvDecorationNonReadable: + *access |= ACCESS_NON_READABLE; + break; + case SpvDecorationNonWritable: + *access |= ACCESS_NON_WRITEABLE; + break; + case SpvDecorationRestrict: + *access |= ACCESS_RESTRICT; + break; + case SpvDecorationAliased: + *access &= ~ACCESS_RESTRICT; + break; + case SpvDecorationVolatile: + *access |= ACCESS_VOLATILE; + break; + case SpvDecorationCoherent: + *access |= ACCESS_COHERENT; + break; + default: + vtn_fail_with_decoration("Unhandled decoration", dec->decoration); + } +} + void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -2956,6 +2988,62 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, break; } + case SpvOpBufferPointerEXT: { + struct vtn_type *res_type = vtn_get_type(b, w[1]); + struct vtn_value *val = vtn_untyped_value(b, w[1]); + struct vtn_value *src_val = vtn_value(b, w[3], vtn_value_type_pointer); + struct vtn_pointer *src = vtn_value_to_pointer(b, src_val); + + vtn_fail_if(res_type->base_type != vtn_base_type_pointer, + "Result Type must be a pointer type"); + + enum gl_access_qualifier access = 0; + vtn_foreach_decoration(b, val, buffer_ptr_decoration_cb, &access); + + VkSpirvResourceTypeFlagBitsEXT resource_type; + switch (res_type->storage_class) { + case SpvStorageClassUniform: + case SpvStorageClassUniformConstant: + resource_type = VK_SPIRV_RESOURCE_TYPE_UNIFORM_BUFFER_BIT_EXT; + break; + case SpvStorageClassStorageBuffer: + if (access & ACCESS_NON_WRITEABLE) + resource_type = VK_SPIRV_RESOURCE_TYPE_READ_ONLY_STORAGE_BUFFER_BIT_EXT; + else + resource_type = VK_SPIRV_RESOURCE_TYPE_READ_WRITE_STORAGE_BUFFER_BIT_EXT; + break; + default: + vtn_fail("Result Type must be a pointer type with a Storage Class " + "of Uniform or StorageBuffer."); + } + + const nir_address_format addr_format = vtn_mode_to_address_format(b, + vtn_storage_class_to_mode(b, res_type->storage_class, NULL, NULL)); + + unsigned num_components = nir_address_format_num_components(addr_format); + unsigned bit_size = nir_address_format_bit_size(addr_format); + + struct vtn_type *buffer_type = vtn_zalloc(b, struct vtn_type); + buffer_type->base_type = vtn_base_type_buffer; + buffer_type->storage_class = res_type->storage_class; + buffer_type->type = nir_address_format_to_glsl_type(addr_format); + + /* buffer is always an untyped pointer */ + src = vtn_cast_pointer(b, src, buffer_type); + + /* We know the alignment from the API */ + src = vtn_align_pointer(b, src, b->options->buffer_descriptor_alignment); + + nir_deref_instr *src_deref = vtn_pointer_to_deref(b, src); + nir_def *ptr = nir_load_buffer_ptr_deref(&b->nb, num_components, bit_size, + &src_deref->def, + .access = access, + .resource_type = resource_type); + + vtn_push_pointer(b, w[2], vtn_pointer_from_ssa(b, ptr, res_type)); + break; + } + case SpvOpStore: { struct vtn_value *dest_val = vtn_pointer_value(b, w[1]); struct vtn_pointer *dest = vtn_value_to_pointer(b, dest_val); From 4673996f369c9dbf065cca8bdb510b0fdf5de7e7 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 8 Sep 2025 14:30:05 +0200 Subject: [PATCH 11/32] nir: make nir_variable::descriptor_set a 32-bit variable With descriptor heap there is no limit. Signed-off-by: Samuel Pitoiset Reviewed-by: Lionel Landwerlin --- src/compiler/nir/nir.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 24eb2c73ae4..ec0362b971e 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -715,7 +715,7 @@ typedef struct nir_variable { /** * Descriptor set binding for sampler or UBO. */ - unsigned descriptor_set : 5; + unsigned descriptor_set; #define NIR_VARIABLE_NO_INDEX ~0 From e95a22bd2004fcad39ccfe42cf3bada49a72185d Mon Sep 17 00:00:00 2001 From: Konstantin Seurer Date: Fri, 25 Apr 2025 18:23:23 +0200 Subject: [PATCH 12/32] spirv,nir: Preserve more information about the descriptor type Descriptor heap mappings need the information to selectively apply mappings (descriptor type masks). --- src/compiler/nir/nir.c | 2 ++ src/compiler/nir/nir.h | 4 ++++ src/compiler/nir/nir_intrinsics.py | 2 +- src/compiler/spirv/vtn_variables.c | 38 ++++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 4e608be35f7..ba19995172e 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -2955,6 +2955,7 @@ nir_chase_binding(nir_src rsrc) res.var = deref->var; res.desc_set = deref->var->data.descriptor_set; res.binding = deref->var->data.binding; + res.resource_type = deref->var->data.resource_type; return res; } else if (deref->deref_type == nir_deref_type_array && is_image) { if (res.num_indices == ARRAY_SIZE(res.indices)) @@ -3045,6 +3046,7 @@ nir_chase_binding(nir_src rsrc) res.success = true; res.desc_set = nir_intrinsic_desc_set(intrin); res.binding = nir_intrinsic_binding(intrin); + res.resource_type = nir_intrinsic_resource_type(intrin); res.num_indices = 1; res.indices[0] = intrin->src[0]; return res; diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index ec0362b971e..f80a7aa8107 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -712,6 +712,9 @@ typedef struct nir_variable { */ unsigned access : 9; + /* VkSpirvResourceTypeFlagBitsKHR bit index that this variable would have. */ + unsigned resource_type : 9; + /** * Descriptor set binding for sampler or UBO. */ @@ -3157,6 +3160,7 @@ typedef struct nir_binding { nir_variable *var; unsigned desc_set; unsigned binding; + unsigned resource_type; unsigned num_indices; nir_src indices[4]; bool read_first_invocation; diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 6c856d81388..e3f4f405a1b 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -854,7 +854,7 @@ image("fragment_mask_load_amd", src_comp=[4], dest_comp=1, bit_sizes=[32], flags # corresponds to the tuple (set, binding, index) and computes an index # corresponding to tuple (set, binding, idx + src1). intrinsic("vulkan_resource_index", src_comp=[1], dest_comp=0, - indices=[DESC_SET, BINDING, DESC_TYPE], + indices=[DESC_SET, BINDING, DESC_TYPE, RESOURCE_TYPE], flags=[CAN_ELIMINATE, CAN_REORDER]) intrinsic("vulkan_resource_reindex", src_comp=[0, 1], dest_comp=0, indices=[DESC_TYPE], flags=[CAN_ELIMINATE, CAN_REORDER]) diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index e33fc015b01..837ad46512f 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -260,6 +260,7 @@ vtn_variable_resource_index(struct vtn_builder *b, struct vtn_variable *var, nir_intrinsic_set_desc_set(instr, var->descriptor_set); nir_intrinsic_set_binding(instr, var->binding); nir_intrinsic_set_desc_type(instr, vk_desc_type_for_mode(b, var->mode)); + nir_intrinsic_set_resource_type(instr, var->var->data.resource_type); nir_address_format addr_format = vtn_mode_to_address_format(b, var->mode); nir_def_init(&instr->instr, &instr->def, @@ -2462,6 +2463,43 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val, /* Propagate access flags from the OpVariable decorations. */ val->pointer->access |= var->access; + switch (without_array->base_type) { + case vtn_base_type_image: + if (glsl_type_is_image(without_array->glsl_image)) { + if (var->access & ACCESS_NON_WRITEABLE) + var->var->data.resource_type = VK_SPIRV_RESOURCE_TYPE_READ_ONLY_IMAGE_BIT_EXT; + else + var->var->data.resource_type = VK_SPIRV_RESOURCE_TYPE_READ_WRITE_IMAGE_BIT_EXT; + } else { + var->var->data.resource_type = VK_SPIRV_RESOURCE_TYPE_SAMPLED_IMAGE_BIT_EXT; + } + break; + case vtn_base_type_sampler: + var->var->data.resource_type = VK_SPIRV_RESOURCE_TYPE_SAMPLER_BIT_EXT; + break; + case vtn_base_type_sampled_image: + var->var->data.resource_type = VK_SPIRV_RESOURCE_TYPE_COMBINED_SAMPLED_IMAGE_BIT_EXT; + break; + case vtn_base_type_accel_struct: + var->var->data.resource_type = VK_SPIRV_RESOURCE_TYPE_ACCELERATION_STRUCTURE_BIT_EXT; + break; + default: + switch (var->mode) { + case vtn_variable_mode_ubo: + var->var->data.resource_type = VK_SPIRV_RESOURCE_TYPE_UNIFORM_BUFFER_BIT_EXT; + break; + case vtn_variable_mode_ssbo: + if (var->access & ACCESS_NON_WRITEABLE) + var->var->data.resource_type = VK_SPIRV_RESOURCE_TYPE_READ_ONLY_STORAGE_BUFFER_BIT_EXT; + else + var->var->data.resource_type = VK_SPIRV_RESOURCE_TYPE_READ_WRITE_STORAGE_BUFFER_BIT_EXT; + break; + default: + break; + } + break; + } + if ((var->mode == vtn_variable_mode_input || var->mode == vtn_variable_mode_output) && var->var->members) { From 2b1683873db191688b795b7271a53ea7c1f2adf4 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 19 Jun 2025 15:51:58 -0400 Subject: [PATCH 13/32] spirv: Mark DescriptorHeapKHR as implemented --- src/compiler/spirv/spirv_to_nir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index f2959e0263d..c4c7bd99cf5 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -81,6 +81,7 @@ static const struct spirv_capabilities implemented_capabilities = { .DenormFlushToZero = true, .DenormPreserve = true, .DerivativeControl = true, + .DescriptorHeapEXT = true, .DeviceGroup = true, .DotProduct = true, .DotProductBFloat16AccVALVE = true, From 16f35ff9327be1bfb6ec0f819aa6d2fdd6d16a44 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Fri, 13 Jun 2025 12:28:46 -0400 Subject: [PATCH 14/32] vulkan: Rename some VK_EXT_descriptor_buffer properties --- src/amd/vulkan/radv_physical_device.c | 2 +- src/freedreno/vulkan/tu_device.cc | 4 ++-- src/gallium/frontends/lavapipe/lvp_device.c | 2 +- src/intel/vulkan/anv_physical_device.c | 2 +- src/nouveau/vulkan/nvk_physical_device.c | 2 +- src/vulkan/util/vk_physical_device_properties_gen.py | 1 + 6 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index e62ef2f43a0..eabb185d25f 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -2119,7 +2119,7 @@ radv_get_physical_device_properties(struct radv_physical_device *pdev) .imageViewCaptureReplayDescriptorDataSize = 1, .samplerCaptureReplayDescriptorDataSize = 4, .accelerationStructureCaptureReplayDescriptorDataSize = 1, - .samplerDescriptorSize = RADV_SAMPLER_DESC_SIZE, + .EDBsamplerDescriptorSize = RADV_SAMPLER_DESC_SIZE, .combinedImageSamplerDescriptorSize = radv_get_combined_image_sampler_desc_size(pdev), .sampledImageDescriptorSize = radv_get_sampled_image_desc_size(pdev), .storageImageDescriptorSize = RADV_STORAGE_IMAGE_DESC_SIZE, diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 9bff9092562..1e06f8ef910 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -1389,7 +1389,7 @@ tu_get_properties(struct tu_physical_device *pdevice, props->samplerCaptureReplayDescriptorDataSize = 0; props->accelerationStructureCaptureReplayDescriptorDataSize = 0; /* Note: these sizes must match descriptor_size() */ - props->samplerDescriptorSize = FDL6_TEX_CONST_DWORDS * 4; + props->EDBsamplerDescriptorSize = FDL6_TEX_CONST_DWORDS * 4; props->combinedImageSamplerDescriptorSize = 2 * FDL6_TEX_CONST_DWORDS * 4; props->sampledImageDescriptorSize = FDL6_TEX_CONST_DWORDS * 4; props->storageImageDescriptorSize = FDL6_TEX_CONST_DWORDS * 4; @@ -1535,7 +1535,7 @@ tu_get_properties(struct tu_physical_device *pdevice, props->conservativeRasterizationPostDepthCoverage = false; /* VK_EXT_fragment_density_map_offset */ - props->fragmentDensityOffsetGranularity = (VkExtent2D) { + props->fragmentDensityOffsetGranularity = (VkExtent2D) { TU_FDM_OFFSET_GRANULARITY, TU_FDM_OFFSET_GRANULARITY }; diff --git a/src/gallium/frontends/lavapipe/lvp_device.c b/src/gallium/frontends/lavapipe/lvp_device.c index 04c96979922..e0dde31dc3e 100644 --- a/src/gallium/frontends/lavapipe/lvp_device.c +++ b/src/gallium/frontends/lavapipe/lvp_device.c @@ -1230,7 +1230,7 @@ lvp_get_properties(const struct lvp_physical_device *device, struct vk_propertie .imageViewCaptureReplayDescriptorDataSize = 0, .samplerCaptureReplayDescriptorDataSize = 0, .accelerationStructureCaptureReplayDescriptorDataSize = 0, - .samplerDescriptorSize = sizeof(struct lp_descriptor), + .EDBsamplerDescriptorSize = sizeof(struct lp_descriptor), .combinedImageSamplerDescriptorSize = sizeof(struct lp_descriptor), .sampledImageDescriptorSize = sizeof(struct lp_descriptor), .storageImageDescriptorSize = sizeof(struct lp_descriptor), diff --git a/src/intel/vulkan/anv_physical_device.c b/src/intel/vulkan/anv_physical_device.c index 7d838fba1b3..709c84b1d58 100644 --- a/src/intel/vulkan/anv_physical_device.c +++ b/src/intel/vulkan/anv_physical_device.c @@ -1714,7 +1714,7 @@ get_properties(const struct anv_physical_device *pdevice, */ props->accelerationStructureCaptureReplayDescriptorDataSize = 0; - props->samplerDescriptorSize = ANV_SAMPLER_STATE_SIZE; + props->EDBsamplerDescriptorSize = ANV_SAMPLER_STATE_SIZE; props->combinedImageSamplerDescriptorSize = align(ANV_SURFACE_STATE_SIZE + ANV_SAMPLER_STATE_SIZE, ANV_SURFACE_STATE_SIZE); props->sampledImageDescriptorSize = ANV_SURFACE_STATE_SIZE; diff --git a/src/nouveau/vulkan/nvk_physical_device.c b/src/nouveau/vulkan/nvk_physical_device.c index 2ee4f194b27..539cd3bccba 100644 --- a/src/nouveau/vulkan/nvk_physical_device.c +++ b/src/nouveau/vulkan/nvk_physical_device.c @@ -1102,7 +1102,7 @@ nvk_get_device_properties(const struct nvk_instance *instance, .samplerCaptureReplayDescriptorDataSize = sizeof(struct nvk_sampler_capture), .accelerationStructureCaptureReplayDescriptorDataSize = 0, // todo - .samplerDescriptorSize = sizeof(struct nvk_sampled_image_descriptor), + .EDBsamplerDescriptorSize = sizeof(struct nvk_sampled_image_descriptor), .combinedImageSamplerDescriptorSize = sizeof(struct nvk_sampled_image_descriptor), .sampledImageDescriptorSize = sizeof(struct nvk_sampled_image_descriptor), .storageImageDescriptorSize = sizeof(struct nvk_storage_image_descriptor), diff --git a/src/vulkan/util/vk_physical_device_properties_gen.py b/src/vulkan/util/vk_physical_device_properties_gen.py index 3ffffbfb26d..8a39fc06526 100644 --- a/src/vulkan/util/vk_physical_device_properties_gen.py +++ b/src/vulkan/util/vk_physical_device_properties_gen.py @@ -58,6 +58,7 @@ RENAMED_PROPERTIES = { ("SubgroupProperties", "supportedStages"): "subgroupSupportedStages", ("SubgroupProperties", "supportedOperations"): "subgroupSupportedOperations", ("SubgroupProperties", "quadOperationsInAllStages"): "subgroupQuadOperationsInAllStages", + ("DescriptorBufferPropertiesEXT", "samplerDescriptorSize"): "EDBsamplerDescriptorSize", } OUT_ARRAYS = { From 91bc5b440b4c660616d990672761afed280b9cc5 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 1 Jul 2025 18:36:23 +0200 Subject: [PATCH 15/32] vulkan/runtime: handle custom border color index with samplers Signed-off-by: Samuel Pitoiset --- src/vulkan/runtime/vk_limits.h | 2 ++ src/vulkan/runtime/vk_sampler.c | 8 ++++++++ src/vulkan/runtime/vk_sampler.h | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/src/vulkan/runtime/vk_limits.h b/src/vulkan/runtime/vk_limits.h index 1cb02eabfe8..6950766214a 100644 --- a/src/vulkan/runtime/vk_limits.h +++ b/src/vulkan/runtime/vk_limits.h @@ -96,4 +96,6 @@ */ #define MESA_VK_MAX_MULTIVIEW_VIEW_COUNT 32 +#define MESA_VK_MAX_CUSTOM_BORDER_COLOR ~0 + #endif /* VK_LIMITS_H */ diff --git a/src/vulkan/runtime/vk_sampler.c b/src/vulkan/runtime/vk_sampler.c index d612290b27b..b57f91cf635 100644 --- a/src/vulkan/runtime/vk_sampler.c +++ b/src/vulkan/runtime/vk_sampler.c @@ -25,6 +25,7 @@ #include "vk_sampler.h" #include "vk_device.h" #include "vk_format.h" +#include "vk_limits.h" #include "vk_util.h" #include "vk_ycbcr_conversion.h" @@ -121,6 +122,7 @@ vk_sampler_state_init(struct vk_sampler_state *state, if (!vk_border_color_is_custom(pCreateInfo->borderColor)) state->border_color_value = vk_border_color_value(pCreateInfo->borderColor); state->reduction_mode = VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE; + state->border_color_index = MESA_VK_MAX_CUSTOM_BORDER_COLOR; vk_foreach_struct_const(ext, pCreateInfo->pNext) { switch (ext->sType) { @@ -173,6 +175,12 @@ vk_sampler_state_init(struct vk_sampler_state *state, break; } + case VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_INDEX_CREATE_INFO_EXT: { + const VkSamplerCustomBorderColorIndexCreateInfoEXT *bc_info = (void *)ext; + state->border_color_index = bc_info->index; + break; + } + default: break; } diff --git a/src/vulkan/runtime/vk_sampler.h b/src/vulkan/runtime/vk_sampler.h index 12039bb27fa..fca93f48830 100644 --- a/src/vulkan/runtime/vk_sampler.h +++ b/src/vulkan/runtime/vk_sampler.h @@ -90,6 +90,11 @@ struct vk_sampler_state { */ VkClearColorValue border_color_value; + /** + * VkSamplerCustomBorderColorIndexCreateInfo::index. + */ + uint32_t border_color_index; + /** VkSamplerBorderColorComponentMappingCreateInfoEXT::components */ VkComponentMapping border_color_component_mapping; From 67abf9e4762907029d92512190f7bf4195ba5ef8 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Fri, 13 Jun 2025 13:10:05 -0400 Subject: [PATCH 16/32] vulkan: Add a lowering pass for descriptor heap mappings Lowers all mappings and embedded samplers to descriptor heaps without mappings. This was based on a pass written by Konstantin Seurer and Mike Blumenkrantz but was basically entirely rewritten and uses different NIR intrinsics. --- src/vulkan/runtime/meson.build | 1 + .../runtime/vk_nir_lower_descriptor_heaps.c | 919 ++++++++++++++++++ .../runtime/vk_nir_lower_descriptor_heaps.h | 81 ++ 3 files changed, 1001 insertions(+) create mode 100644 src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c create mode 100644 src/vulkan/runtime/vk_nir_lower_descriptor_heaps.h diff --git a/src/vulkan/runtime/meson.build b/src/vulkan/runtime/meson.build index 7722b77b420..1f2e9426f57 100644 --- a/src/vulkan/runtime/meson.build +++ b/src/vulkan/runtime/meson.build @@ -262,6 +262,7 @@ vulkan_runtime_files = files( 'vk_meta_draw_rects.c', 'vk_nir.c', 'vk_nir_convert_ycbcr.c', + 'vk_nir_lower_descriptor_heaps.c', 'vk_pipeline.c', 'vk_pipeline_cache.c', 'vk_shader.c', diff --git a/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c b/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c new file mode 100644 index 00000000000..9b2adc8f5a5 --- /dev/null +++ b/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c @@ -0,0 +1,919 @@ +/* + * Copyright © 2024 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#include "vk_nir_lower_descriptor_heaps.h" + +#include "vk_sampler.h" + +#include "nir_builder.h" +#include "util/u_dynarray.h" +#include "util/hash_table.h" + +struct heap_mapping_ctx { + const VkShaderDescriptorSetAndBindingMappingInfoEXT *info; + + /* Map from vk_sampler_state to indices */ + struct hash_table *sampler_idx_map; +}; + +static uint32_t +hash_sampler(const void *_s) +{ + const struct vk_sampler_state *s = _s; + return _mesa_hash_data(s, sizeof(*s)); +} + +static bool +samplers_equal(const void *_a, const void *_b) +{ + const struct vk_sampler_state *a = _a, *b = _b; + return !memcmp(a, b, sizeof(*a)); +} + +static uint32_t +add_embedded_sampler(struct heap_mapping_ctx *ctx, + const VkSamplerCreateInfo *info) +{ + struct vk_sampler_state key; + vk_sampler_state_init(&key, info); + + struct hash_entry *entry = + _mesa_hash_table_search(ctx->sampler_idx_map, &key); + if (entry != NULL) + return (uintptr_t)entry->data; + + uint32_t index = ctx->sampler_idx_map->entries; + + struct vk_sampler_state *state = + ralloc(ctx->sampler_idx_map, struct vk_sampler_state); + *state = key; + + _mesa_hash_table_insert(ctx->sampler_idx_map, state, + (void *)(uintptr_t)index); + + return index; +} + +static nir_def * +load_push(nir_builder *b, unsigned bit_size, unsigned offset) +{ + assert(bit_size % 8 == 0); + assert(offset % (bit_size / 8) == 0); + return nir_load_push_constant(b, 1, bit_size, nir_imm_int(b, offset), + .range = offset + (bit_size / 8)); +} + +static nir_def * +load_indirect(nir_builder *b, unsigned bit_size, nir_def *addr, unsigned offset) +{ + assert(bit_size % 8 == 0); + assert(offset % (bit_size / 8) == 0); + addr = nir_iadd_imm(b, addr, offset); + return nir_load_global_constant(b, 1, bit_size, addr); +} + +static nir_def * +load_shader_record(nir_builder *b, unsigned bit_size, unsigned offset) +{ + assert(bit_size % 8 == 0); + assert(offset % (bit_size / 8) == 0); + nir_def *addr = nir_iadd_imm(b, nir_load_shader_record_ptr(b), offset); + return nir_load_global_constant(b, 1, bit_size, addr); +} + +static nir_def * +unpack_combined_image_sampler(nir_builder *b, nir_def *combined, + bool is_sampler) +{ + assert(combined->bit_size == 32); + if (is_sampler) + return nir_ubitfield_extract_imm(b, combined, 20, 12); + else + return nir_ubitfield_extract_imm(b, combined, 0, 20); +} + +nir_def * +vk_build_descriptor_heap_offset(nir_builder *b, + const VkDescriptorSetAndBindingMappingEXT *mapping, + VkSpirvResourceTypeFlagBitsEXT resource_type, + uint32_t binding, nir_def *index, + bool is_sampler) +{ + assert(util_is_power_of_two_nonzero(resource_type)); + + if (index == NULL) + index = nir_imm_int(b, 0); + + assert(binding >= mapping->firstBinding); + const uint32_t rel_binding = binding - mapping->firstBinding; + assert(rel_binding < mapping->bindingCount); + nir_def *shader_index = nir_iadd_imm(b, index, rel_binding); + + const bool is_sampled_image = + resource_type == VK_SPIRV_RESOURCE_TYPE_COMBINED_SAMPLED_IMAGE_BIT_EXT; + + switch (mapping->source) { + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_CONSTANT_OFFSET_EXT: { + const VkDescriptorMappingSourceConstantOffsetEXT *data = + &mapping->sourceData.constantOffset; + + uint32_t heap_offset; + uint32_t array_stride; + if (is_sampled_image && is_sampler) { + array_stride = data->samplerHeapArrayStride; + heap_offset = data->samplerHeapOffset; + } else { + array_stride = data->heapArrayStride; + heap_offset = data->heapOffset; + } + + return nir_iadd_imm(b, nir_imul_imm(b, shader_index, array_stride), + heap_offset); + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_PUSH_INDEX_EXT: { + const VkDescriptorMappingSourcePushIndexEXT *data = + &mapping->sourceData.pushIndex; + + nir_def *push_index; + if (is_sampled_image && is_sampler && + !data->useCombinedImageSamplerIndex) { + push_index = load_push(b, 32, data->samplerPushOffset); + } else { + push_index = load_push(b, 32, data->pushOffset); + } + + if (data->useCombinedImageSamplerIndex && is_sampled_image) + push_index = unpack_combined_image_sampler(b, push_index, is_sampler); + + nir_def *offset; + uint32_t array_stride; + if (is_sampled_image && is_sampler) { + array_stride = data->samplerHeapArrayStride; + nir_def *push_offset = + nir_imul_imm(b, push_index, data->samplerHeapIndexStride); + offset = nir_iadd_imm(b, push_offset, data->samplerHeapOffset); + } else { + array_stride = data->heapArrayStride; + nir_def *push_offset = + nir_imul_imm(b, push_index, data->heapIndexStride); + offset = nir_iadd_imm(b, push_offset, data->heapOffset); + } + + return nir_iadd(b, offset, nir_imul_imm(b, shader_index, array_stride)); + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_INDIRECT_INDEX_EXT: { + const VkDescriptorMappingSourceIndirectIndexEXT *data = + &mapping->sourceData.indirectIndex; + + nir_def *indirect_index; + if (is_sampled_image && is_sampler && + !data->useCombinedImageSamplerIndex) { + nir_def *indirect_addr = load_push(b, 64, data->samplerPushOffset); + indirect_index = load_indirect(b, 32, indirect_addr, + data->samplerAddressOffset); + } else { + nir_def *indirect_addr = load_push(b, 64, data->pushOffset); + indirect_index = load_indirect(b, 32, indirect_addr, + data->addressOffset); + } + + if (data->useCombinedImageSamplerIndex && is_sampled_image) + indirect_index = unpack_combined_image_sampler(b, indirect_index, + is_sampler); + + nir_def *offset; + uint32_t array_stride; + if (is_sampled_image && is_sampler) { + array_stride = data->samplerHeapArrayStride; + nir_def *indirect_offset = + nir_imul_imm(b, indirect_index, data->samplerHeapIndexStride); + offset = nir_iadd_imm(b, indirect_offset, data->samplerHeapOffset); + } else { + array_stride = data->heapArrayStride; + nir_def *indirect_offset = + nir_imul_imm(b, indirect_index, data->heapIndexStride); + offset = nir_iadd_imm(b, indirect_offset, data->heapOffset); + } + + return nir_iadd(b, offset, nir_imul_imm(b, shader_index, array_stride)); + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_RESOURCE_HEAP_DATA_EXT: { + const VkDescriptorMappingSourceHeapDataEXT *data = + &mapping->sourceData.heapData; + return nir_iadd_imm(b, load_push(b, 32, data->pushOffset), + data->heapOffset); + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_INDIRECT_INDEX_ARRAY_EXT: { + const VkDescriptorMappingSourceIndirectIndexArrayEXT *data = + &mapping->sourceData.indirectIndexArray; + + nir_def *indirect_addr; + uint32_t addr_offset; + if (is_sampled_image && is_sampler && + !data->useCombinedImageSamplerIndex) { + indirect_addr = load_push(b, 64, data->samplerPushOffset); + addr_offset = data->samplerAddressOffset; + } else { + indirect_addr = load_push(b, 64, data->pushOffset); + addr_offset = data->addressOffset; + } + + /* The shader index goes into the indirect. */ + indirect_addr = nir_iadd(b, indirect_addr, + nir_u2u64(b, nir_imul_imm(b, shader_index, 4))); + nir_def *indirect_index = load_indirect(b, 32, indirect_addr, + addr_offset); + + if (data->useCombinedImageSamplerIndex && is_sampled_image) + indirect_index = unpack_combined_image_sampler(b, indirect_index, + is_sampler); + + if (is_sampled_image && is_sampler) { + nir_def *indirect_offset = + nir_imul_imm(b, indirect_index, data->samplerHeapIndexStride); + return nir_iadd_imm(b, indirect_offset, data->samplerHeapOffset); + } else { + nir_def *indirect_offset = + nir_imul_imm(b, indirect_index, data->heapIndexStride); + return nir_iadd_imm(b, indirect_offset, data->heapOffset); + } + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_SHADER_RECORD_INDEX_EXT: { + const VkDescriptorMappingSourceShaderRecordIndexEXT *data = + &mapping->sourceData.shaderRecordIndex; + + nir_def *record_index; + if (is_sampled_image && is_sampler && + !data->useCombinedImageSamplerIndex) { + record_index = load_shader_record(b, 32, data->samplerShaderRecordOffset); + } else { + record_index = load_shader_record(b, 32, data->shaderRecordOffset); + } + + if (data->useCombinedImageSamplerIndex && is_sampled_image) + record_index = unpack_combined_image_sampler(b, record_index, + is_sampler); + + nir_def *offset; + uint32_t array_stride; + if (is_sampled_image && is_sampler) { + array_stride = data->samplerHeapArrayStride; + nir_def *record_offset = + nir_imul_imm(b, record_index, data->samplerHeapIndexStride); + offset = nir_iadd_imm(b, record_offset, data->samplerHeapOffset); + } else { + array_stride = data->heapArrayStride; + nir_def *record_offset = + nir_imul_imm(b, record_index, data->heapIndexStride); + offset = nir_iadd_imm(b, record_offset, data->heapOffset); + } + + return nir_iadd(b, offset, nir_imul_imm(b, shader_index, array_stride)); + } + + default: + return NULL; + } +} + +nir_def * +vk_build_descriptor_heap_address(nir_builder *b, + const VkDescriptorSetAndBindingMappingEXT *mapping, + uint32_t binding, nir_def *index) +{ + switch (mapping->source) { + case VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_ADDRESS_EXT: + return load_push(b, 64, mapping->sourceData.pushAddressOffset); + + case VK_DESCRIPTOR_MAPPING_SOURCE_INDIRECT_ADDRESS_EXT: { + const VkDescriptorMappingSourceIndirectAddressEXT *data = + &mapping->sourceData.indirectAddress; + + nir_def *addr = load_push(b, 64, data->pushOffset); + return load_indirect(b, 64, addr, data->addressOffset); + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_SHADER_RECORD_DATA_EXT: + return nir_iadd_imm(b, nir_load_shader_record_ptr(b), + mapping->sourceData.shaderRecordDataOffset); + + case VK_DESCRIPTOR_MAPPING_SOURCE_SHADER_RECORD_ADDRESS_EXT: + return load_shader_record(b, 64, + mapping->sourceData.shaderRecordAddressOffset); + + default: + return NULL; + } +} + +static nir_deref_instr * +deref_get_root_cast(nir_deref_instr *deref) +{ + while (true) { + if (deref->deref_type == nir_deref_type_var) + return NULL; + + nir_deref_instr *parent = nir_src_as_deref(deref->parent); + if (!parent) + break; + + deref = parent; + } + assert(deref->deref_type == nir_deref_type_cast); + + return deref; +} + +static bool +deref_cast_is_heap_ptr(nir_deref_instr *deref) +{ + assert(deref->deref_type == nir_deref_type_cast); + nir_intrinsic_instr *intrin = nir_src_as_intrinsic(deref->parent); + if (intrin == NULL) + return false; + + switch (intrin->intrinsic) { + case nir_intrinsic_load_deref: { + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + if (var == NULL || var->data.mode != nir_var_system_value) + return false; + + return var->data.location == SYSTEM_VALUE_SAMPLER_HEAP_PTR || + var->data.location == SYSTEM_VALUE_RESOURCE_HEAP_PTR; + } + + case nir_intrinsic_load_sampler_heap_ptr: + case nir_intrinsic_load_resource_heap_ptr: + return true; + + default: + return false; + } +} + +static bool +get_deref_resource_binding(nir_deref_instr *deref, + uint32_t *set, uint32_t *binding, + VkSpirvResourceTypeFlagBitsEXT *resource_type, + nir_def **index_out) +{ + nir_def *index = NULL; + if (deref->deref_type == nir_deref_type_array) { + index = deref->arr.index.ssa; + deref = nir_deref_instr_parent(deref); + } + + if (deref->deref_type != nir_deref_type_var) + return false; + + nir_variable *var = deref->var; + + if (var->data.mode != nir_var_uniform && var->data.mode != nir_var_image) + return false; + + /* This should only happen for internal meta shaders */ + if (var->data.resource_type == 0) + return false; + + *set = var->data.descriptor_set; + *binding = var->data.binding; + *resource_type = var->data.resource_type; + if (index_out != NULL) + *index_out = index; + + return true; +} + +static bool +get_buffer_resource_binding(nir_intrinsic_instr *desc_load, + uint32_t *set, uint32_t *binding, + VkSpirvResourceTypeFlagBitsEXT *resource_type) +{ + assert(desc_load->intrinsic == nir_intrinsic_load_vulkan_descriptor); + nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(desc_load->src[0]); + + while (idx_intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) + idx_intrin = nir_src_as_intrinsic(idx_intrin->src[0]); + + if (idx_intrin->intrinsic != nir_intrinsic_vulkan_resource_index) + return false; + + *set = nir_intrinsic_desc_set(idx_intrin); + *binding = nir_intrinsic_binding(idx_intrin); + *resource_type = nir_intrinsic_resource_type(idx_intrin); + + return true; +} + +static inline bool +buffer_resource_has_zero_index(nir_intrinsic_instr *desc_load) +{ + assert(desc_load->intrinsic == nir_intrinsic_load_vulkan_descriptor); + nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(desc_load->src[0]); + + if (idx_intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) + return false; + + assert(idx_intrin->intrinsic == nir_intrinsic_vulkan_resource_index); + if (!nir_src_is_const(idx_intrin->src[0])) + return false; + + return nir_src_as_uint(idx_intrin->src[0]) == 0; +} + +/* This assumes get_buffer_resource_binding() already succeeded */ +static nir_def * +build_buffer_resource_index(nir_builder *b, nir_intrinsic_instr *desc_load) +{ + assert(desc_load->intrinsic == nir_intrinsic_load_vulkan_descriptor); + nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(desc_load->src[0]); + + nir_def *index = nir_imm_int(b, 0); + while (idx_intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) { + index = nir_iadd(b, index, idx_intrin->src[1].ssa); + idx_intrin = nir_src_as_intrinsic(idx_intrin->src[0]); + } + + assert(idx_intrin->intrinsic == nir_intrinsic_vulkan_resource_index); + return nir_iadd(b, index, idx_intrin->src[0].ssa); +} + +/** Builds a buffer address for deref chain + * + * This assumes that you can chase the chain all the way back to the original + * vulkan_resource_index intrinsic. + * + * The cursor is not where you left it when this function returns. + */ +static nir_def * +build_buffer_addr_for_deref(nir_builder *b, nir_def *root_addr, + nir_deref_instr *deref, + nir_address_format addr_format) +{ + nir_deref_instr *parent = nir_deref_instr_parent(deref); + if (parent) { + nir_def *addr = + build_buffer_addr_for_deref(b, root_addr, parent, addr_format); + + b->cursor = nir_before_instr(&deref->instr); + return nir_explicit_io_address_from_deref(b, deref, addr, addr_format); + } + + return root_addr; +} + +/* The cursor is not where you left it when this function returns. */ +static nir_def * +build_deref_heap_offset(nir_builder *b, nir_deref_instr *deref, + bool is_sampler, struct heap_mapping_ctx *ctx) +{ + uint32_t set, binding; + VkSpirvResourceTypeFlagBitsEXT resource_type; + nir_def *index; + if (get_deref_resource_binding(deref, &set, &binding, + &resource_type, &index)) { + if (ctx->info == NULL) + return NULL; + + const VkDescriptorSetAndBindingMappingEXT *mapping = + vk_descriptor_heap_mapping(ctx->info, set, binding, resource_type); + assert(mapping != NULL); + if (mapping == NULL) + return NULL; + + b->cursor = nir_before_instr(&deref->instr); + + if (index == NULL) + index = nir_imm_int(b, 0); + + return vk_build_descriptor_heap_offset(b, mapping, resource_type, + binding, index, is_sampler); + } else { + nir_deref_instr *root_cast = deref_get_root_cast(deref); + if (root_cast == NULL) + return false; + + if (!deref_cast_is_heap_ptr(root_cast)) + return NULL; + + /* We're building an offset. It starts at zero */ + b->cursor = nir_before_instr(&root_cast->instr); + nir_def *base_addr = nir_imm_int(b, 0); + + return build_buffer_addr_for_deref(b, base_addr, deref, + nir_address_format_32bit_offset); + } +} + +static const VkSamplerCreateInfo * +get_deref_embedded_sampler(nir_deref_instr *sampler, + struct heap_mapping_ctx *ctx) +{ + if (ctx->info == NULL) + return false; + + uint32_t set, binding; + VkSpirvResourceTypeFlagBitsEXT resource_type; + if (!get_deref_resource_binding(sampler, &set, &binding, + &resource_type, NULL)) + return NULL; + + const VkDescriptorSetAndBindingMappingEXT *mapping = + vk_descriptor_heap_mapping(ctx->info, set, binding, resource_type); + + return vk_descriptor_heap_embedded_sampler(mapping); +} + +static bool +lower_heaps_tex(nir_builder *b, nir_tex_instr *tex, + struct heap_mapping_ctx *ctx) +{ + const int texture_src_idx = + nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); + const int sampler_src_idx = + nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref); + bool progress = false; + + nir_deref_instr *texture = nir_src_as_deref(tex->src[texture_src_idx].src); + assert(texture != NULL); + + { + nir_def *heap_offset = build_deref_heap_offset(b, texture, false, ctx); + if (heap_offset != NULL) { + nir_src_rewrite(&tex->src[texture_src_idx].src, heap_offset); + tex->src[texture_src_idx].src_type = nir_tex_src_texture_heap_offset; + progress = true; + } + } + + if (nir_tex_instr_need_sampler(tex)) { + /* If this is a combined image/sampler, we may only have an image deref + * source and it's also the sampler deref. + */ + nir_deref_instr *sampler = sampler_src_idx < 0 ? texture : + nir_src_as_deref(tex->src[sampler_src_idx].src); + + const VkSamplerCreateInfo *embedded_sampler = + get_deref_embedded_sampler(sampler, ctx); + if (embedded_sampler == NULL) { + nir_def *heap_offset = build_deref_heap_offset(b, sampler, true, ctx); + if (heap_offset != NULL) { + nir_src_rewrite(&tex->src[sampler_src_idx].src, heap_offset); + tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_heap_offset; + progress = true; + } + } else { + nir_tex_instr_remove_src(tex, sampler_src_idx); + tex->embedded_sampler = true; + tex->sampler_index = add_embedded_sampler(ctx, embedded_sampler); + b->shader->info.uses_embedded_samplers = true; + progress = true; + } + } + + /* Remove unused sampler sources so we don't accidentally reference things + * that don't actually exist. The driver can add it back in if it really + * needs it. + */ + if (progress && sampler_src_idx >= 0 && !nir_tex_instr_need_sampler(tex)) + nir_tex_instr_remove_src(tex, sampler_src_idx); + + return progress; +} + +static bool +lower_heaps_image(nir_builder *b, nir_intrinsic_instr *intrin, + struct heap_mapping_ctx *ctx) +{ + nir_deref_instr *image = nir_src_as_deref(intrin->src[0]); + nir_def *heap_offset = build_deref_heap_offset(b, image, false, ctx); + if (heap_offset == NULL) + return false; + + nir_rewrite_image_intrinsic(intrin, heap_offset, false); + + /* TODO: Roll this into nir_rewrite_image_intrinsic? */ + switch (intrin->intrinsic) { +#define CASE(op) \ + case nir_intrinsic_image_##op: \ + intrin->intrinsic = nir_intrinsic_image_heap_##op; \ + break; + CASE(load) + CASE(sparse_load) + CASE(store) + CASE(atomic) + CASE(atomic_swap) + CASE(size) + CASE(samples) + CASE(load_raw_intel) + CASE(store_raw_intel) + CASE(fragment_mask_load_amd) + CASE(store_block_agx) +#undef CASE + default: + UNREACHABLE("Unhanded image intrinsic"); + } + + return true; +} + +static bool +try_lower_heaps_deref_access(nir_builder *b, nir_intrinsic_instr *intrin, + struct heap_mapping_ctx *ctx) +{ + if (ctx->info == NULL) + return false; + + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_deref_instr *root_cast = deref_get_root_cast(deref); + if (root_cast == NULL) + return false; + + nir_intrinsic_instr *desc_load = nir_src_as_intrinsic(root_cast->parent); + if (desc_load == NULL || + desc_load->intrinsic != nir_intrinsic_load_vulkan_descriptor) + return false; + + uint32_t set, binding; + VkSpirvResourceTypeFlagBitsEXT resource_type; + if (!get_buffer_resource_binding(desc_load, &set, &binding, &resource_type)) + return false; + + const VkDescriptorSetAndBindingMappingEXT *mapping = + vk_descriptor_heap_mapping(ctx->info, set, binding, resource_type); + if (mapping == NULL) + return false; + + switch (mapping->source) { + case VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_DATA_EXT: { + assert(nir_deref_mode_is(deref, nir_var_mem_ubo)); + assert(intrin->intrinsic == nir_intrinsic_load_deref); + assert(buffer_resource_has_zero_index(desc_load)); + + b->cursor = nir_before_instr(&desc_load->instr); + nir_def *offset = nir_imm_int(b, mapping->sourceData.pushDataOffset); + + /* This moves the cursor */ + offset = build_buffer_addr_for_deref(b, offset, deref, + nir_address_format_32bit_offset); + + const uint32_t range = mapping->sourceData.pushDataOffset + + glsl_get_explicit_size(root_cast->type, false); + + b->cursor = nir_before_instr(&intrin->instr); + nir_def *val = nir_load_push_constant(b, intrin->def.num_components, + intrin->def.bit_size, + offset, .range = range); + nir_def_replace(&intrin->def, val); + return true; + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_RESOURCE_HEAP_DATA_EXT: { + assert(nir_deref_mode_is(deref, nir_var_mem_ubo)); + assert(intrin->intrinsic == nir_intrinsic_load_deref); + assert(buffer_resource_has_zero_index(desc_load)); + + b->cursor = nir_before_instr(&desc_load->instr); + nir_def *heap_offset = + vk_build_descriptor_heap_offset(b, mapping, resource_type, binding, + NULL /* index */, + false /* is_sampler */); + + /* This moves the cursor */ + heap_offset = build_buffer_addr_for_deref(b, heap_offset, deref, + nir_address_format_32bit_offset); + + uint32_t align_mul, align_offset; + if (!nir_get_explicit_deref_align(deref, true, &align_mul, + &align_offset)) { + /* If we don't have an alignment from the deref, assume scalar */ + assert(glsl_type_is_vector_or_scalar(deref->type) || + glsl_type_is_matrix(deref->type)); + align_mul = glsl_type_is_boolean(deref->type) ? + 4 : glsl_get_bit_size(deref->type) / 8; + align_offset = 0; + } + + b->cursor = nir_before_instr(&intrin->instr); + nir_def *val = nir_load_resource_heap_data(b, intrin->def.num_components, + intrin->def.bit_size, + heap_offset, + .align_mul = align_mul, + .align_offset = align_offset); + nir_def_replace(&intrin->def, val); + return true; + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_ADDRESS_EXT: + case VK_DESCRIPTOR_MAPPING_SOURCE_INDIRECT_ADDRESS_EXT: + case VK_DESCRIPTOR_MAPPING_SOURCE_SHADER_RECORD_DATA_EXT: + case VK_DESCRIPTOR_MAPPING_SOURCE_SHADER_RECORD_ADDRESS_EXT: { + b->cursor = nir_before_instr(&desc_load->instr); + + nir_def *index = build_buffer_resource_index(b, desc_load); + nir_def *addr = + vk_build_descriptor_heap_address(b, mapping, binding, index); + + /* This moves the cursor */ + addr = build_buffer_addr_for_deref(b, addr, deref, + nir_address_format_64bit_global); + + b->cursor = nir_before_instr(&intrin->instr); + nir_lower_explicit_io_instr(b, intrin, addr, + nir_address_format_64bit_global); + return true; + } + + default: + /* We could also handle descriptor offset mapping sources here but + * there's no point. They access a real descriptor so we don't need to + * rewrite them to a different address format like we did for UBOs + * above. We can handle them in lower_load_descriptors. + */ + return false; + } +} + +static bool +lower_heaps_load_buffer_ptr(nir_builder *b, nir_intrinsic_instr *ptr_load, + struct heap_mapping_ctx *ctx) +{ + assert(ptr_load->intrinsic == nir_intrinsic_load_buffer_ptr_deref); + nir_deref_instr *deref = nir_src_as_deref(ptr_load->src[0]); + + nir_deref_instr *root_cast = deref_get_root_cast(deref); + if (!deref_cast_is_heap_ptr(root_cast)) + return false; + + /* We're building an offset. It starts at zero */ + b->cursor = nir_before_instr(&root_cast->instr); + nir_def *heap_base_offset = nir_imm_int(b, 0); + + /* This moves the cursor */ + nir_def *heap_offset = + build_buffer_addr_for_deref(b, heap_base_offset, deref, + nir_address_format_32bit_offset); + + const VkSpirvResourceTypeFlagBitsEXT resource_type = + nir_intrinsic_resource_type(ptr_load); + + b->cursor = nir_before_instr(&ptr_load->instr); + nir_def *desc = nir_load_heap_descriptor(b, ptr_load->def.num_components, + ptr_load->def.bit_size, + heap_offset, + .resource_type = resource_type); + + nir_def_replace(&ptr_load->def, desc); + + return true; +} + +static bool +lower_heaps_load_descriptor(nir_builder *b, nir_intrinsic_instr *desc_load, + struct heap_mapping_ctx *ctx) +{ + if (ctx->info == NULL) + return false; + + uint32_t set, binding; + VkSpirvResourceTypeFlagBitsEXT resource_type; + if (!get_buffer_resource_binding(desc_load, &set, &binding, &resource_type)) + return false; /* This must be old school variable pointers */ + + const VkDescriptorSetAndBindingMappingEXT *mapping = + vk_descriptor_heap_mapping(ctx->info, set, binding, resource_type); + if (mapping == NULL) + return false; /* Descriptor sets */ + + /* These have to be handled by try_lower_deref_access() */ + if (mapping->source == VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_DATA_EXT || + mapping->source == VK_DESCRIPTOR_MAPPING_SOURCE_RESOURCE_HEAP_DATA_EXT) { + assert(resource_type == VK_SPIRV_RESOURCE_TYPE_UNIFORM_BUFFER_BIT_EXT); + return false; + } + + b->cursor = nir_before_instr(&desc_load->instr); + nir_def *index = build_buffer_resource_index(b, desc_load); + + /* There are a few mapping sources that are allowed for SSBOs and + * acceleration structures which use addresses. If it's an acceleration + * structure or try_lower_deref_access() fails to catch it, we have to + * load the address and ask the driver to convert the address to a + * descriptor. + */ + nir_def *addr = vk_build_descriptor_heap_address(b, mapping, binding, index); + if (addr != NULL) { + nir_def *desc = + nir_global_addr_to_descriptor(b, desc_load->def.num_components, + desc_load->def.bit_size, addr, + .resource_type = resource_type); + nir_def_replace(&desc_load->def, desc); + return true; + } + + /* Everything else is an offset */ + nir_def *heap_offset = + vk_build_descriptor_heap_offset(b, mapping, resource_type, binding, + index, false /* is_sampler */); + nir_def *desc = nir_load_heap_descriptor(b, desc_load->def.num_components, + desc_load->def.bit_size, + heap_offset, + .resource_type = resource_type); + + nir_def_replace(&desc_load->def, desc); + + return true; +} + +static bool +lower_heaps_intrin(nir_builder *b, nir_intrinsic_instr *intrin, + struct heap_mapping_ctx *ctx) +{ + switch (intrin->intrinsic) { + case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_deref_sparse_load: + case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_deref_atomic: + case nir_intrinsic_image_deref_atomic_swap: + case nir_intrinsic_image_deref_size: + case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_deref_load_raw_intel: + case nir_intrinsic_image_deref_store_raw_intel: + case nir_intrinsic_image_deref_fragment_mask_load_amd: + case nir_intrinsic_image_deref_store_block_agx: + return lower_heaps_image(b, intrin, ctx); + + case nir_intrinsic_load_deref: + case nir_intrinsic_store_deref: + case nir_intrinsic_load_deref_block_intel: + case nir_intrinsic_store_deref_block_intel: + case nir_intrinsic_deref_atomic: + case nir_intrinsic_deref_atomic_swap: + return try_lower_heaps_deref_access(b, intrin, ctx); + + case nir_intrinsic_load_buffer_ptr_deref: + return lower_heaps_load_buffer_ptr(b, intrin, ctx); + + case nir_intrinsic_load_vulkan_descriptor: + return lower_heaps_load_descriptor(b, intrin, ctx); + + default: + return false; + } +} + +static bool +lower_heaps_instr(nir_builder *b, nir_instr *instr, void *data) +{ + switch (instr->type) { + case nir_instr_type_tex: + return lower_heaps_tex(b, nir_instr_as_tex(instr), data); + case nir_instr_type_intrinsic: + return lower_heaps_intrin(b, nir_instr_as_intrinsic(instr), data); + default: + return false; + } +} + +bool +vk_nir_lower_descriptor_heaps( + nir_shader *nir, + const VkShaderDescriptorSetAndBindingMappingInfoEXT *mapping, + struct vk_sampler_state_array *embedded_samplers_out) +{ + struct heap_mapping_ctx ctx = { + .info = mapping, + .sampler_idx_map = _mesa_hash_table_create(NULL, hash_sampler, + samplers_equal), + }; + + bool progress = + nir_shader_instructions_pass(nir, lower_heaps_instr, + nir_metadata_control_flow, &ctx); + + memset(embedded_samplers_out, 0, sizeof(*embedded_samplers_out)); + + const uint32_t embedded_sampler_count = ctx.sampler_idx_map->entries; + if (embedded_sampler_count > 0) { + embedded_samplers_out->sampler_count = embedded_sampler_count; + embedded_samplers_out->samplers = + malloc(embedded_sampler_count * sizeof(struct vk_sampler_state)); + hash_table_foreach(ctx.sampler_idx_map, entries) { + const struct vk_sampler_state *state = entries->key; + const uint32_t index = (uintptr_t)entries->data; + embedded_samplers_out->samplers[index] = *state; + } + } + + ralloc_free(ctx.sampler_idx_map); + + return progress; +} diff --git a/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.h b/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.h new file mode 100644 index 00000000000..53f8b5fb4db --- /dev/null +++ b/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.h @@ -0,0 +1,81 @@ +/* + * Copyright © 2024 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#ifndef VK_NIR_LOWER_DESCRIPTOR_HEAP_MAPPINGS +#define VK_NIR_LOWER_DESCRIPTOR_HEAP_MAPPINGS + +#include "nir.h" +#include + +static inline const VkDescriptorSetAndBindingMappingEXT * +vk_descriptor_heap_mapping(const VkShaderDescriptorSetAndBindingMappingInfoEXT *info, + uint32_t set, uint32_t binding, + VkSpirvResourceTypeFlagBitsEXT resource_type) +{ + assert(util_is_power_of_two_nonzero(resource_type)); + + for (uint32_t i = 0; i < info->mappingCount; i++) { + const VkDescriptorSetAndBindingMappingEXT *mapping = &info->pMappings[i]; + const uint32_t begin_binding = mapping->firstBinding; + const uint32_t end_binding = + (mapping->firstBinding + mapping->bindingCount) < mapping->firstBinding ? + UINT32_MAX : (mapping->firstBinding + mapping->bindingCount - 1) ; + + if (mapping->descriptorSet == set && + binding >= begin_binding && binding <= end_binding && + mapping->resourceMask & resource_type) + return mapping; + } + + return NULL; +} + +static inline const VkSamplerCreateInfo * +vk_descriptor_heap_embedded_sampler(const VkDescriptorSetAndBindingMappingEXT *mapping) +{ + switch (mapping->source) { + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_CONSTANT_OFFSET_EXT: + return mapping->sourceData.constantOffset.pEmbeddedSampler; + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_PUSH_INDEX_EXT: + return mapping->sourceData.pushIndex.pEmbeddedSampler; + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_INDIRECT_INDEX_EXT: + return mapping->sourceData.indirectIndex.pEmbeddedSampler; + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_INDIRECT_INDEX_ARRAY_EXT: + return mapping->sourceData.indirectIndexArray.pEmbeddedSampler; + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_SHADER_RECORD_INDEX_EXT: + return mapping->sourceData.shaderRecordIndex.pEmbeddedSampler; + default: + return NULL; + } +} + +nir_def * +vk_build_descriptor_heap_offset(nir_builder *b, + const VkDescriptorSetAndBindingMappingEXT *mapping, + VkSpirvResourceTypeFlagBitsEXT resource_type, + uint32_t binding, nir_def *index, + bool is_sampler); +nir_def * +vk_build_descriptor_heap_address(nir_builder *b, + const VkDescriptorSetAndBindingMappingEXT *mapping, + uint32_t binding, nir_def *index); + +struct vk_sampler_state_array { + struct vk_sampler_state *samplers; + uint32_t sampler_count; +}; + +static inline void +vk_sampler_state_array_finish(struct vk_sampler_state_array *arr) +{ + free(arr->samplers); +} + +bool vk_nir_lower_descriptor_heaps( + nir_shader *nir, + const VkShaderDescriptorSetAndBindingMappingInfoEXT *mapping, + struct vk_sampler_state_array *embedded_samplers_out); + +#endif From 1df60a41f6ddefabc2fc6fb3f3fe47c1db0da0a6 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 16 Jun 2025 14:46:14 -0400 Subject: [PATCH 17/32] vulkan: Support descriptor heaps in vk_nir_convert_ycbcr() The original version of the pass relied on derefs and passing bindings and indices to drivers through the callback. While this works, it's much more convenient with heaps to just take an index into the embedded descriptor table. --- src/vulkan/runtime/vk_nir_convert_ycbcr.c | 78 ++++++++++++++++------- src/vulkan/runtime/vk_nir_convert_ycbcr.h | 6 ++ 2 files changed, 60 insertions(+), 24 deletions(-) diff --git a/src/vulkan/runtime/vk_nir_convert_ycbcr.c b/src/vulkan/runtime/vk_nir_convert_ycbcr.c index 2f25ed6103b..e688fa8da42 100644 --- a/src/vulkan/runtime/vk_nir_convert_ycbcr.c +++ b/src/vulkan/runtime/vk_nir_convert_ycbcr.c @@ -151,19 +151,33 @@ struct ycbcr_state { nir_builder *builder; nir_def *image_size; nir_tex_instr *origin_tex; - nir_deref_instr *tex_deref; + nir_tex_src tex_handle; const struct vk_ycbcr_conversion_state *conversion; const struct vk_format_ycbcr_info *format_ycbcr_info; }; /* TODO: we should probably replace this with a push constant/uniform. */ static nir_def * -get_texture_size(struct ycbcr_state *state, nir_deref_instr *texture) +get_texture_size(struct ycbcr_state *state) { - if (!state->image_size) { - nir_builder *b = state->builder; - state->image_size = nir_i2f32(b, nir_txs(b, .texture_deref = texture)); - } + if (state->image_size) + return state->image_size; + + nir_builder *b = state->builder; + nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1); + + tex->op = nir_texop_txs; + tex->sampler_dim = state->origin_tex->sampler_dim, + tex->is_array = state->origin_tex->is_array, + tex->is_shadow = state->origin_tex->is_shadow, + tex->dest_type = nir_type_int32; + + tex->src[0] = state->tex_handle; + + nir_def_init(&tex->instr, &tex->def, nir_tex_instr_dest_size(tex), 32); + nir_builder_instr_insert(b, &tex->instr); + + state->image_size = nir_i2f32(b, &tex->def); return state->image_size; } @@ -185,7 +199,7 @@ implicit_downsampled_coords(struct ycbcr_state *state, { nir_builder *b = state->builder; const struct vk_ycbcr_conversion_state *conversion = state->conversion; - nir_def *image_size = get_texture_size(state, state->tex_deref); + nir_def *image_size = get_texture_size(state); nir_def *comp[4] = { NULL, }; int c; @@ -247,6 +261,7 @@ create_plane_tex_instr_implicit(struct ycbcr_state *state, tex->is_new_style_shadow = old_tex->is_new_style_shadow; tex->component = old_tex->component; + tex->embedded_sampler = old_tex->embedded_sampler; tex->texture_index = old_tex->texture_index; tex->sampler_index = old_tex->sampler_index; tex->is_array = old_tex->is_array; @@ -294,25 +309,40 @@ lower_ycbcr_tex_instr(nir_builder *b, nir_tex_instr *tex, void *_state) tex->op == nir_texop_lod) return false; - int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); - assert(deref_src_idx >= 0); - nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src); + nir_tex_src tex_handle; + const struct vk_ycbcr_conversion_state *conversion; + if (tex->embedded_sampler) { + const int heap_src_idx = + nir_tex_instr_src_index(tex, nir_tex_src_texture_heap_offset); + tex_handle = tex->src[heap_src_idx]; - nir_variable *var = nir_deref_instr_get_variable(deref); - uint32_t set = var->data.descriptor_set; - uint32_t binding = var->data.binding; - - assert(tex->texture_index == 0); - unsigned array_index = 0; - if (deref->deref_type != nir_deref_type_var) { - assert(deref->deref_type == nir_deref_type_array); - if (!nir_src_is_const(deref->arr.index)) + conversion = state->cb(state->cb_data, + VK_NIR_YCBCR_SET_IMMUTABLE_SAMPLERS, + tex->sampler_index, 0); + } else { + const int deref_src_idx = + nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); + if (deref_src_idx < 0) return false; - array_index = nir_src_as_uint(deref->arr.index); - } - const struct vk_ycbcr_conversion_state *conversion = - state->cb(state->cb_data, set, binding, array_index); + tex_handle = tex->src[deref_src_idx]; + nir_deref_instr *deref = nir_src_as_deref(tex_handle.src); + + nir_variable *var = nir_deref_instr_get_variable(deref); + uint32_t set = var->data.descriptor_set; + uint32_t binding = var->data.binding; + + assert(tex->texture_index == 0); + unsigned array_index = 0; + if (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + if (!nir_src_is_const(deref->arr.index)) + return false; + array_index = nir_src_as_uint(deref->arr.index); + } + + conversion = state->cb(state->cb_data, set, binding, array_index); + } if (conversion == NULL) return false; @@ -357,7 +387,7 @@ lower_ycbcr_tex_instr(nir_builder *b, nir_tex_instr *tex, void *_state) struct ycbcr_state tex_state = { .builder = b, .origin_tex = tex, - .tex_deref = deref, + .tex_handle = tex_handle, .conversion = conversion, .format_ycbcr_info = format_ycbcr_info, }; diff --git a/src/vulkan/runtime/vk_nir_convert_ycbcr.h b/src/vulkan/runtime/vk_nir_convert_ycbcr.h index 0ff1c1b3e01..915c9cbd465 100644 --- a/src/vulkan/runtime/vk_nir_convert_ycbcr.h +++ b/src/vulkan/runtime/vk_nir_convert_ycbcr.h @@ -40,6 +40,12 @@ nir_convert_ycbcr_to_rgb(nir_builder *b, struct vk_ycbcr_conversion; +/** Passed as the set parameter to nir_vk_ycbcr_conversion_lookup_cb() to + * indicate that embedded samplers are being used and that binding is the + * index in the embedded sampler table. + */ +#define VK_NIR_YCBCR_SET_IMMUTABLE_SAMPLERS UINT32_MAX + typedef const struct vk_ycbcr_conversion_state * (*nir_vk_ycbcr_conversion_lookup_cb)(const void *data, uint32_t set, uint32_t binding, uint32_t array_index); From c855c5d99cf1a4e78b5197dde04ffc3d513e31e1 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 11 Jun 2025 18:14:04 -0400 Subject: [PATCH 18/32] vulkan/pipeline: Allow compiling compute/rt pipelines with a NULL layout --- src/vulkan/runtime/vk_pipeline.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/vulkan/runtime/vk_pipeline.c b/src/vulkan/runtime/vk_pipeline.c index 164c33ea84f..6a82b6b716f 100644 --- a/src/vulkan/runtime/vk_pipeline.c +++ b/src/vulkan/runtime/vk_pipeline.c @@ -2348,11 +2348,13 @@ vk_get_compute_pipeline_compile_info(struct vk_pipeline_stage *stage, features_blake3); _mesa_blake3_update(&blake3_ctx, features_blake3, sizeof(features_blake3)); - for (uint32_t i = 0; i < pipeline_layout->set_count; i++) { - if (pipeline_layout->set_layouts[i] != NULL) { - _mesa_blake3_update(&blake3_ctx, - pipeline_layout->set_layouts[i]->blake3, - sizeof(pipeline_layout->set_layouts[i]->blake3)); + if (pipeline_layout != NULL) { + for (uint32_t i = 0; i < pipeline_layout->set_count; i++) { + if (pipeline_layout->set_layouts[i] != NULL) { + _mesa_blake3_update(&blake3_ctx, + pipeline_layout->set_layouts[i]->blake3, + sizeof(pipeline_layout->set_layouts[i]->blake3)); + } } } if (push_range != NULL) @@ -2415,8 +2417,8 @@ vk_pipeline_compile_compute_stage(struct vk_device *device, .next_stage_mask = 0, .nir = nir, .robustness = &stage->precomp->rs, - .set_layout_count = pipeline_layout->set_count, - .set_layouts = pipeline_layout->set_layouts, + .set_layout_count = pipeline_layout ? pipeline_layout->set_count : 0, + .set_layouts = pipeline_layout ? pipeline_layout->set_layouts : NULL, .push_constant_range_count = push_range != NULL, .push_constant_ranges = push_range != NULL ? push_range : NULL, }; @@ -2798,8 +2800,8 @@ hash_rt_parameters(struct mesa_blake3 *blake3_ctx, _mesa_blake3_update(blake3_ctx, &shader_flags, sizeof(shader_flags)); _mesa_blake3_update(blake3_ctx, &rt_flags, sizeof(rt_flags)); - for (uint32_t i = 0; i < pipeline_layout->set_count; i++) { - if (pipeline_layout->set_layouts[i] != NULL) { + if (pipeline_layout != NULL) { + for (uint32_t i = 0; i < pipeline_layout->set_count; i++) { _mesa_blake3_update(blake3_ctx, pipeline_layout->set_layouts[i]->blake3, sizeof(pipeline_layout->set_layouts[i]->blake3)); @@ -3209,8 +3211,8 @@ vk_pipeline_compile_rt_shader(struct vk_device *device, .next_stage_mask = 0, .nir = nir, .robustness = &stage->precomp->rs, - .set_layout_count = pipeline_layout->set_count, - .set_layouts = pipeline_layout->set_layouts, + .set_layout_count = pipeline_layout != NULL ? pipeline_layout->set_count : 0, + .set_layouts = pipeline_layout != NULL ? pipeline_layout->set_layouts : NULL, .push_constant_range_count = push_range != NULL, .push_constant_ranges = push_range != NULL ? push_range : NULL, }; @@ -3319,8 +3321,8 @@ vk_pipeline_compile_rt_shader_group(struct vk_device *device, .next_stage_mask = 0, .nir = vk_pipeline_precomp_shader_get_nir(precomp, nir_options), .robustness = &precomp->rs, - .set_layout_count = pipeline_layout->set_count, - .set_layouts = pipeline_layout->set_layouts, + .set_layout_count = pipeline_layout != NULL ? pipeline_layout->set_count : 0, + .set_layouts = pipeline_layout != NULL ? pipeline_layout->set_layouts : NULL, .push_constant_range_count = push_range != NULL, .push_constant_ranges = push_range != NULL ? push_range : NULL, }; From b37bf9c482aa99230249aa0dda078b3a82ab52ea Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 16 Jun 2025 17:02:50 -0400 Subject: [PATCH 19/32] vulkan/shader: Call vk_nir_lower_descriptor_heaps() Embedded samplers (if present) are passed to the driver as part of the vk_shader_compile_info --- src/vulkan/runtime/vk_shader.c | 41 +++++++++++++++++++++++++++++----- src/vulkan/runtime/vk_shader.h | 4 ++++ 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/vulkan/runtime/vk_shader.c b/src/vulkan/runtime/vk_shader.c index 80431a5c046..d138ee60b67 100644 --- a/src/vulkan/runtime/vk_shader.c +++ b/src/vulkan/runtime/vk_shader.c @@ -29,6 +29,7 @@ #include "vk_descriptor_set_layout.h" #include "vk_device.h" #include "vk_nir.h" +#include "vk_nir_lower_descriptor_heaps.h" #include "vk_physical_device.h" #include "vk_physical_device_features.h" #include "vk_pipeline.h" @@ -245,7 +246,8 @@ cmp_stage_idx(const void *_a, const void *_b) static nir_shader * vk_shader_to_nir(struct vk_device *device, const VkShaderCreateInfoEXT *info, - const struct vk_pipeline_robustness_state *rs) + const struct vk_pipeline_robustness_state *rs, + struct vk_sampler_state_array *embedded_samplers_out) { const struct vk_device_shader_ops *ops = device->shader_ops; @@ -274,6 +276,19 @@ vk_shader_to_nir(struct vk_device *device, if (ops->preprocess_nir != NULL) ops->preprocess_nir(device->physical, nir, rs); + const VkShaderDescriptorSetAndBindingMappingInfoEXT *desc_map = + vk_find_struct_const(info->pNext, + SHADER_DESCRIPTOR_SET_AND_BINDING_MAPPING_INFO_EXT); + + bool heaps_progress = false; + NIR_PASS(heaps_progress, nir, vk_nir_lower_descriptor_heaps, + desc_map, embedded_samplers_out); + if (heaps_progress) { + NIR_PASS(_, nir, nir_remove_dead_variables, + nir_var_uniform | nir_var_image, NULL); + NIR_PASS(_, nir, nir_opt_dce); + } + return nir; } @@ -286,6 +301,7 @@ vk_shader_compile_info_init(struct vk_shader_compile_info *info, struct set_layouts *set_layouts, const VkShaderCreateInfoEXT *vk_info, const struct vk_pipeline_robustness_state *rs, + const struct vk_sampler_state_array *es, nir_shader *nir) { for (uint32_t sl = 0; sl < vk_info->setLayoutCount; sl++) { @@ -301,6 +317,8 @@ vk_shader_compile_info_init(struct vk_shader_compile_info *info, .robustness = rs, .set_layout_count = vk_info->setLayoutCount, .set_layouts = set_layouts->set_layouts, + .embedded_sampler_count = es->sampler_count, + .embedded_samplers = es->samplers, .push_constant_range_count = vk_info->pushConstantRangeCount, .push_constant_ranges = vk_info->pPushConstantRanges, }; @@ -600,8 +618,10 @@ vk_common_CreateShadersEXT(VkDevice _device, .idx = i, }; } else { + struct vk_sampler_state_array embedded_samplers = {}; nir_shader *nir = vk_shader_to_nir(device, vk_info, - &vk_robustness_disabled); + &vk_robustness_disabled, + &embedded_samplers); if (nir == NULL) { result = vk_errorf(device, VK_ERROR_UNKNOWN, "Failed to compile shader to NIR"); @@ -611,12 +631,16 @@ vk_common_CreateShadersEXT(VkDevice _device, struct vk_shader_compile_info info; struct set_layouts set_layouts; vk_shader_compile_info_init(&info, &set_layouts, - vk_info, &vk_robustness_disabled, nir); + vk_info, &vk_robustness_disabled, + &embedded_samplers, nir); struct vk_shader *shader; result = vk_compile_shaders(device, 1, &info, NULL /* state */, NULL /* features */, pAllocator, &shader); + + vk_sampler_state_array_finish(&embedded_samplers); + if (result != VK_SUCCESS) break; @@ -636,6 +660,7 @@ vk_common_CreateShadersEXT(VkDevice _device, if (linked_count > 0) { struct set_layouts set_layouts[VK_MAX_LINKED_SHADER_STAGES]; struct vk_shader_compile_info infos[VK_MAX_LINKED_SHADER_STAGES]; + struct vk_sampler_state_array embedded_samplers[VK_MAX_LINKED_SHADER_STAGES]; VkResult result = VK_SUCCESS; /* Sort so we guarantee the driver always gets them in-order */ @@ -643,12 +668,14 @@ vk_common_CreateShadersEXT(VkDevice _device, /* Memset for easy error handling */ memset(infos, 0, sizeof(infos)); + memset(embedded_samplers, 0, sizeof(embedded_samplers)); for (uint32_t l = 0; l < linked_count; l++) { const VkShaderCreateInfoEXT *vk_info = &pCreateInfos[linked[l].idx]; nir_shader *nir = vk_shader_to_nir(device, vk_info, - &vk_robustness_disabled); + &vk_robustness_disabled, + &embedded_samplers[l]); if (nir == NULL) { result = vk_errorf(device, VK_ERROR_UNKNOWN, "Failed to compile shader to NIR"); @@ -656,7 +683,8 @@ vk_common_CreateShadersEXT(VkDevice _device, } vk_shader_compile_info_init(&infos[l], &set_layouts[l], - vk_info, &vk_robustness_disabled, nir); + vk_info, &vk_robustness_disabled, + &embedded_samplers[l], nir); } if (result == VK_SUCCESS) { @@ -675,6 +703,9 @@ vk_common_CreateShadersEXT(VkDevice _device, } } + for (uint32_t l = 0; l < linked_count; l++) + vk_sampler_state_array_finish(&embedded_samplers[l]); + if (first_fail_or_success == VK_SUCCESS) first_fail_or_success = result; } diff --git a/src/vulkan/runtime/vk_shader.h b/src/vulkan/runtime/vk_shader.h index b4334ef171d..5670c5bd3be 100644 --- a/src/vulkan/runtime/vk_shader.h +++ b/src/vulkan/runtime/vk_shader.h @@ -46,6 +46,7 @@ struct vk_features; struct vk_physical_device; struct vk_pipeline; struct vk_pipeline_robustness_state; +struct vk_sampler_state; bool vk_validate_shader_binaries(void); @@ -94,6 +95,9 @@ struct vk_shader_compile_info { uint32_t set_layout_count; struct vk_descriptor_set_layout * const *set_layouts; + uint32_t embedded_sampler_count; + const struct vk_sampler_state* embedded_samplers; + uint32_t push_constant_range_count; const VkPushConstantRange *push_constant_ranges; }; From e87904a693af51b5744c7a23a0c33af6fe6ab946 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 11 Jun 2025 19:34:07 -0400 Subject: [PATCH 20/32] vulkan: Add a vk_hash_descriptor_heap_mappings() helper --- .../runtime/vk_nir_lower_descriptor_heaps.c | 148 ++++++++++++++++++ .../runtime/vk_nir_lower_descriptor_heaps.h | 5 + 2 files changed, 153 insertions(+) diff --git a/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c b/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c index 9b2adc8f5a5..065890546fe 100644 --- a/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c +++ b/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c @@ -11,6 +11,154 @@ #include "util/u_dynarray.h" #include "util/hash_table.h" +static void +hash_embedded_sampler(struct mesa_blake3 *ctx, + const struct VkSamplerCreateInfo *info) +{ + if (info != NULL) { + struct vk_sampler_state state; + vk_sampler_state_init(&state, info); + _mesa_blake3_update(ctx, &state, sizeof(state)); + } +} + +void +vk_hash_descriptor_heap_mappings( + const VkShaderDescriptorSetAndBindingMappingInfoEXT *info, + blake3_hash blake3_out) +{ + struct mesa_blake3 ctx; + _mesa_blake3_init(&ctx); + +#define HASH(ctx, x) _mesa_blake3_update(ctx, &(x), sizeof(x)) + + for (uint32_t i = 0; i < info->mappingCount; i++) { + const VkDescriptorSetAndBindingMappingEXT *mapping = &info->pMappings[i]; + HASH(&ctx, mapping->descriptorSet); + HASH(&ctx, mapping->firstBinding); + HASH(&ctx, mapping->bindingCount); + HASH(&ctx, mapping->resourceMask); + HASH(&ctx, mapping->source); + switch (mapping->source) { + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_CONSTANT_OFFSET_EXT: { + const VkDescriptorMappingSourceConstantOffsetEXT *data = + &mapping->sourceData.constantOffset; + HASH(&ctx, data->heapOffset); + HASH(&ctx, data->heapArrayStride); + hash_embedded_sampler(&ctx, data->pEmbeddedSampler); + HASH(&ctx, data->samplerHeapOffset); + HASH(&ctx, data->samplerHeapArrayStride); + break; + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_PUSH_INDEX_EXT: { + const VkDescriptorMappingSourcePushIndexEXT *data = + &mapping->sourceData.pushIndex; + HASH(&ctx, data->heapOffset); + HASH(&ctx, data->pushOffset); + HASH(&ctx, data->heapIndexStride); + HASH(&ctx, data->heapArrayStride); + hash_embedded_sampler(&ctx, data->pEmbeddedSampler); + HASH(&ctx, data->useCombinedImageSamplerIndex); + HASH(&ctx, data->samplerHeapOffset); + HASH(&ctx, data->samplerPushOffset); + HASH(&ctx, data->samplerHeapIndexStride); + HASH(&ctx, data->samplerHeapArrayStride); + break; + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_INDIRECT_INDEX_EXT: { + const VkDescriptorMappingSourceIndirectIndexEXT *data = + &mapping->sourceData.indirectIndex; + HASH(&ctx, data->heapOffset); + HASH(&ctx, data->pushOffset); + HASH(&ctx, data->addressOffset); + HASH(&ctx, data->heapIndexStride); + HASH(&ctx, data->heapArrayStride); + hash_embedded_sampler(&ctx, data->pEmbeddedSampler); + HASH(&ctx, data->useCombinedImageSamplerIndex); + HASH(&ctx, data->samplerHeapOffset); + HASH(&ctx, data->samplerPushOffset); + HASH(&ctx, data->samplerAddressOffset); + HASH(&ctx, data->samplerHeapIndexStride); + HASH(&ctx, data->samplerHeapArrayStride); + break; + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_RESOURCE_HEAP_DATA_EXT: { + const VkDescriptorMappingSourceHeapDataEXT *data = + &mapping->sourceData.heapData; + HASH(&ctx, data->heapOffset); + HASH(&ctx, data->pushOffset); + break; + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_DATA_EXT: + HASH(&ctx, mapping->sourceData.pushDataOffset); + break; + + case VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_ADDRESS_EXT: + HASH(&ctx, mapping->sourceData.pushAddressOffset); + break; + + case VK_DESCRIPTOR_MAPPING_SOURCE_INDIRECT_ADDRESS_EXT: { + const VkDescriptorMappingSourceIndirectAddressEXT *data = + &mapping->sourceData.indirectAddress; + HASH(&ctx, data->pushOffset); + HASH(&ctx, data->addressOffset); + break; + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_INDIRECT_INDEX_ARRAY_EXT: { + const VkDescriptorMappingSourceIndirectIndexArrayEXT *data = + &mapping->sourceData.indirectIndexArray; + HASH(&ctx, data->heapOffset); + HASH(&ctx, data->pushOffset); + HASH(&ctx, data->addressOffset); + HASH(&ctx, data->heapIndexStride); + hash_embedded_sampler(&ctx, data->pEmbeddedSampler); + HASH(&ctx, data->useCombinedImageSamplerIndex); + HASH(&ctx, data->samplerHeapOffset); + HASH(&ctx, data->samplerPushOffset); + HASH(&ctx, data->samplerAddressOffset); + HASH(&ctx, data->samplerHeapIndexStride); + break; + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_SHADER_RECORD_INDEX_EXT: { + const VkDescriptorMappingSourceShaderRecordIndexEXT *data = + &mapping->sourceData.shaderRecordIndex; + HASH(&ctx, data->heapOffset); + HASH(&ctx, data->shaderRecordOffset); + HASH(&ctx, data->heapIndexStride); + HASH(&ctx, data->heapArrayStride); + hash_embedded_sampler(&ctx, data->pEmbeddedSampler); + HASH(&ctx, data->useCombinedImageSamplerIndex); + HASH(&ctx, data->samplerHeapOffset); + HASH(&ctx, data->samplerShaderRecordOffset); + HASH(&ctx, data->samplerHeapIndexStride); + HASH(&ctx, data->samplerHeapArrayStride); + break; + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_SHADER_RECORD_DATA_EXT: + HASH(&ctx, mapping->sourceData.shaderRecordDataOffset); + break; + + case VK_DESCRIPTOR_MAPPING_SOURCE_SHADER_RECORD_ADDRESS_EXT: + HASH(&ctx, mapping->sourceData.shaderRecordAddressOffset); + break; + + default: + UNREACHABLE("Unsupported descriptor mapping source"); + } + } + + _mesa_blake3_final(&ctx, blake3_out); +} + +#undef HASH + struct heap_mapping_ctx { const VkShaderDescriptorSetAndBindingMappingInfoEXT *info; diff --git a/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.h b/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.h index 53f8b5fb4db..93a79543291 100644 --- a/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.h +++ b/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.h @@ -8,6 +8,7 @@ #include "nir.h" #include +#include "util/mesa-blake3.h" static inline const VkDescriptorSetAndBindingMappingEXT * vk_descriptor_heap_mapping(const VkShaderDescriptorSetAndBindingMappingInfoEXT *info, @@ -51,6 +52,10 @@ vk_descriptor_heap_embedded_sampler(const VkDescriptorSetAndBindingMappingEXT *m } } +void vk_hash_descriptor_heap_mappings( + const VkShaderDescriptorSetAndBindingMappingInfoEXT *info, + blake3_hash blake3_out); + nir_def * vk_build_descriptor_heap_offset(nir_builder *b, const VkDescriptorSetAndBindingMappingEXT *mapping, From 0b26186792b415b75d3566b58e2aa49b25671070 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 17 Jun 2025 11:42:06 -0400 Subject: [PATCH 21/32] vulkan/pipeline: Reorder vk_pipeline_precomp_shader_deserialize() --- src/vulkan/runtime/vk_pipeline.c | 40 ++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/src/vulkan/runtime/vk_pipeline.c b/src/vulkan/runtime/vk_pipeline.c index 6a82b6b716f..5d8f3fa4903 100644 --- a/src/vulkan/runtime/vk_pipeline.c +++ b/src/vulkan/runtime/vk_pipeline.c @@ -790,6 +790,25 @@ vk_pipeline_precomp_shader_deserialize(struct vk_device *device, const void *key_data, size_t key_size, struct blob_reader *blob) { + const mesa_shader_stage stage = blob_read_uint32(blob); + + struct vk_pipeline_robustness_state rs; + blob_copy_bytes(blob, &rs, sizeof(rs)); + + struct vk_pipeline_tess_info tess; + blob_copy_bytes(blob, &tess, sizeof(tess)); + + blake3_hash blake3; + blob_copy_bytes(blob, blake3, sizeof(blake3)); + + uint64_t nir_size = blob_read_uint64(blob); + if (blob->overrun || nir_size > SIZE_MAX) + return NULL; + + const void *nir_data = blob_read_bytes(blob, nir_size); + if (blob->overrun) + return NULL; + struct vk_pipeline_precomp_shader *shader = vk_zalloc(&device->alloc, sizeof(*shader), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); @@ -804,28 +823,19 @@ vk_pipeline_precomp_shader_deserialize(struct vk_device *device, shader->cache_key, sizeof(shader->cache_key)); - shader->stage = blob_read_uint32(blob); - blob_copy_bytes(blob, &shader->rs, sizeof(shader->rs)); - blob_copy_bytes(blob, &shader->tess, sizeof(shader->tess)); - - uint64_t nir_size = blob_read_uint64(blob); - if (blob->overrun || nir_size > SIZE_MAX) - goto fail_shader; - - const void *nir_data = blob_read_bytes(blob, nir_size); - if (blob->overrun) - goto fail_shader; + shader->stage = stage; + shader->rs = rs; + shader->tess = tess; + memcpy(shader->cache_key, blake3, sizeof(blake3)); blob_init(&shader->nir_blob); blob_write_bytes(&shader->nir_blob, nir_data, nir_size); if (shader->nir_blob.out_of_memory) - goto fail_nir_blob; + goto fail_cache_obj; return &shader->cache_obj; -fail_nir_blob: - blob_finish(&shader->nir_blob); -fail_shader: +fail_cache_obj: vk_pipeline_cache_object_finish(&shader->cache_obj); vk_free(&device->alloc, shader); From c283dc94c534a56318afe9cc4d3aadcf51773049 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 12 Jun 2025 09:33:29 -0400 Subject: [PATCH 22/32] vulkan/pipeline: Call vk_nir_lower_descriptor_heaps() As with vk_shader_object, vk_nir_lower_descriptor_heaps() is called right after the driver preprocess step. The resulting mapping and embedded samplers are then baked into the pre-compile shader. --- src/vulkan/runtime/vk_pipeline.c | 84 ++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 10 deletions(-) diff --git a/src/vulkan/runtime/vk_pipeline.c b/src/vulkan/runtime/vk_pipeline.c index 5d8f3fa4903..cf31cebb98f 100644 --- a/src/vulkan/runtime/vk_pipeline.c +++ b/src/vulkan/runtime/vk_pipeline.c @@ -31,9 +31,11 @@ #include "vk_graphics_state.h" #include "vk_log.h" #include "vk_nir.h" +#include "vk_nir_lower_descriptor_heaps.h" #include "vk_physical_device.h" #include "vk_physical_device_features.h" #include "vk_pipeline_layout.h" +#include "vk_sampler.h" #include "vk_shader.h" #include "vk_shader_module.h" #include "vk_util.h" @@ -296,6 +298,16 @@ vk_pipeline_hash_shader_stage_blake3(VkPipelineCreateFlags2KHR pipeline_flags, info->pSpecializationInfo->dataSize); } + const VkShaderDescriptorSetAndBindingMappingInfoEXT *desc_map = + vk_find_struct_const(info->pNext, + SHADER_DESCRIPTOR_SET_AND_BINDING_MAPPING_INFO_EXT); + if (desc_map != NULL) { + blake3_hash desc_map_blake3; + vk_hash_descriptor_heap_mappings(desc_map, desc_map_blake3); + + _mesa_blake3_update(&ctx, desc_map_blake3, sizeof(desc_map_blake3)); + } + uint32_t req_subgroup_size = get_required_subgroup_size(info); _mesa_blake3_update(&ctx, &req_subgroup_size, sizeof(req_subgroup_size)); @@ -700,6 +712,9 @@ struct vk_pipeline_precomp_shader { /* Tessellation info if the shader is a tessellation shader */ struct vk_pipeline_tess_info tess; + uint32_t embedded_sampler_count; + struct vk_sampler_state *embedded_samplers; + struct blob nir_blob; }; @@ -730,7 +745,9 @@ static struct vk_pipeline_precomp_shader * vk_pipeline_precomp_shader_create(struct vk_device *device, const void *key_data, size_t key_size, const struct vk_pipeline_robustness_state *rs, - nir_shader *nir) + nir_shader *nir, + const uint32_t embedded_sampler_count, + const struct vk_sampler_state *embedded_samplers) { struct blob blob; blob_init(&blob); @@ -740,10 +757,12 @@ vk_pipeline_precomp_shader_create(struct vk_device *device, if (blob.out_of_memory) goto fail_blob; - struct vk_pipeline_precomp_shader *shader = - vk_zalloc(&device->alloc, sizeof(*shader), 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (shader == NULL) + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct vk_pipeline_precomp_shader, shader, 1); + VK_MULTIALLOC_DECL(&ma, struct vk_sampler_state, samplers, + embedded_sampler_count); + if (!vk_multialloc_zalloc(&ma, &device->alloc, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) goto fail_blob; assert(sizeof(shader->cache_key) == key_size); @@ -759,6 +778,11 @@ vk_pipeline_precomp_shader_create(struct vk_device *device, vk_pipeline_gather_nir_tess_info(nir, &shader->tess); + shader->embedded_sampler_count = embedded_sampler_count; + shader->embedded_samplers = samplers; + for (uint32_t i = 0; i < embedded_sampler_count; i++) + shader->embedded_samplers[i] = embedded_samplers[i]; + shader->nir_blob = blob; return shader; @@ -779,6 +803,10 @@ vk_pipeline_precomp_shader_serialize(struct vk_pipeline_cache_object *obj, blob_write_uint32(blob, shader->stage); blob_write_bytes(blob, &shader->rs, sizeof(shader->rs)); blob_write_bytes(blob, &shader->tess, sizeof(shader->tess)); + blob_write_uint32(blob, shader->embedded_sampler_count); + blob_write_bytes(blob, shader->embedded_samplers, + shader->embedded_sampler_count * + sizeof(*shader->embedded_samplers)); blob_write_uint64(blob, shader->nir_blob.size); blob_write_bytes(blob, shader->nir_blob.data, shader->nir_blob.size); @@ -798,6 +826,11 @@ vk_pipeline_precomp_shader_deserialize(struct vk_device *device, struct vk_pipeline_tess_info tess; blob_copy_bytes(blob, &tess, sizeof(tess)); + const uint32_t embedded_sampler_count = blob_read_uint32(blob); + const struct vk_sampler_state *embedded_samplers = + blob_read_bytes(blob, embedded_sampler_count * + sizeof(*embedded_samplers)); + blake3_hash blake3; blob_copy_bytes(blob, blake3, sizeof(blake3)); @@ -809,10 +842,12 @@ vk_pipeline_precomp_shader_deserialize(struct vk_device *device, if (blob->overrun) return NULL; - struct vk_pipeline_precomp_shader *shader = - vk_zalloc(&device->alloc, sizeof(*shader), 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (shader == NULL) + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct vk_pipeline_precomp_shader, shader, 1); + VK_MULTIALLOC_DECL(&ma, struct vk_sampler_state, samplers, + embedded_sampler_count); + if (!vk_multialloc_zalloc(&ma, &device->alloc, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) return NULL; assert(sizeof(shader->cache_key) == key_size); @@ -826,6 +861,12 @@ vk_pipeline_precomp_shader_deserialize(struct vk_device *device, shader->stage = stage; shader->rs = rs; shader->tess = tess; + + shader->embedded_sampler_count = embedded_sampler_count; + shader->embedded_samplers = samplers; + for (uint32_t i = 0; i < embedded_sampler_count; i++) + shader->embedded_samplers[i] = embedded_samplers[i]; + memcpy(shader->cache_key, blake3, sizeof(blake3)); blob_init(&shader->nir_blob); @@ -967,10 +1008,25 @@ vk_pipeline_precompile_shader(struct vk_device *device, if (ops->preprocess_nir != NULL) ops->preprocess_nir(device->physical, nir, &rs); + const VkShaderDescriptorSetAndBindingMappingInfoEXT *desc_map = + vk_find_struct_const(info->pNext, + SHADER_DESCRIPTOR_SET_AND_BINDING_MAPPING_INFO_EXT); + struct vk_sampler_state_array embedded_samplers; + bool heaps_progress = false; + NIR_PASS(heaps_progress, nir, vk_nir_lower_descriptor_heaps, + desc_map, &embedded_samplers); + if (heaps_progress) { + NIR_PASS(_, nir, nir_remove_dead_variables, + nir_var_uniform | nir_var_image, NULL); + NIR_PASS(_, nir, nir_opt_dce); + } + stage->precomp = vk_pipeline_precomp_shader_create(device, stage->precomp_key, sizeof(stage->precomp_key), - &rs, nir); + &rs, nir, + embedded_samplers.sampler_count, + embedded_samplers.samplers); ralloc_free(nir); if (stage->precomp == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -1792,6 +1848,8 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device, .robustness = &stage->precomp->rs, .set_layout_count = compile_info->set_layout_count, .set_layouts = compile_info->set_layouts, + .embedded_sampler_count = stage->precomp->embedded_sampler_count, + .embedded_samplers = stage->precomp->embedded_samplers, .push_constant_range_count = push_range != NULL, .push_constant_ranges = push_range != NULL ? push_range : NULL, }; @@ -2429,6 +2487,8 @@ vk_pipeline_compile_compute_stage(struct vk_device *device, .robustness = &stage->precomp->rs, .set_layout_count = pipeline_layout ? pipeline_layout->set_count : 0, .set_layouts = pipeline_layout ? pipeline_layout->set_layouts : NULL, + .embedded_sampler_count = stage->precomp->embedded_sampler_count, + .embedded_samplers = stage->precomp->embedded_samplers, .push_constant_range_count = push_range != NULL, .push_constant_ranges = push_range != NULL ? push_range : NULL, }; @@ -3223,6 +3283,8 @@ vk_pipeline_compile_rt_shader(struct vk_device *device, .robustness = &stage->precomp->rs, .set_layout_count = pipeline_layout != NULL ? pipeline_layout->set_count : 0, .set_layouts = pipeline_layout != NULL ? pipeline_layout->set_layouts : NULL, + .embedded_sampler_count = stage->precomp->embedded_sampler_count, + .embedded_samplers = stage->precomp->embedded_samplers, .push_constant_range_count = push_range != NULL, .push_constant_ranges = push_range != NULL ? push_range : NULL, }; @@ -3333,6 +3395,8 @@ vk_pipeline_compile_rt_shader_group(struct vk_device *device, .robustness = &precomp->rs, .set_layout_count = pipeline_layout != NULL ? pipeline_layout->set_count : 0, .set_layouts = pipeline_layout != NULL ? pipeline_layout->set_layouts : NULL, + .embedded_sampler_count = precomp->embedded_sampler_count, + .embedded_samplers = precomp->embedded_samplers, .push_constant_range_count = push_range != NULL, .push_constant_ranges = push_range != NULL ? push_range : NULL, }; From 03775e68852696f062e41c68946703e57bd01898 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 11 Jun 2025 14:30:56 -0400 Subject: [PATCH 23/32] vulkan: Add a common implementation of GetPhysicalDeviceDescriptorSizeKHR --- src/vulkan/runtime/vk_physical_device.c | 31 +++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/vulkan/runtime/vk_physical_device.c b/src/vulkan/runtime/vk_physical_device.c index ad11a03810a..d272d05979e 100644 --- a/src/vulkan/runtime/vk_physical_device.c +++ b/src/vulkan/runtime/vk_physical_device.c @@ -324,3 +324,34 @@ vk_common_GetPhysicalDeviceToolProperties(VkPhysicalDevice physicalDevice, return vk_outarray_status(&out); } + +VKAPI_ATTR VkDeviceSize VKAPI_CALL +vk_common_GetPhysicalDeviceDescriptorSizeEXT(VkPhysicalDevice physicalDevice, + VkDescriptorType descriptorType) +{ + VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice); + + switch (descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + return pdevice->properties.samplerDescriptorSize; + + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + case VK_DESCRIPTOR_TYPE_SAMPLE_WEIGHT_IMAGE_QCOM: + case VK_DESCRIPTOR_TYPE_BLOCK_MATCH_IMAGE_QCOM: + return pdevice->properties.imageDescriptorSize; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: + case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV: + return pdevice->properties.bufferDescriptorSize; + + default: + UNREACHABLE("Invalid descriptor type in GetPhysicalDeviceDescriptorSizeEXT"); + return 0; + } +} From fed12a17bb6aa7ee873431be6062a4e585bd43cb Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Fri, 13 Jun 2025 13:21:31 -0400 Subject: [PATCH 24/32] vulkan: Add a no-op implementation of [Un]RegisterCustomBorderColor() --- src/vulkan/runtime/vk_device.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/vulkan/runtime/vk_device.c b/src/vulkan/runtime/vk_device.c index a2ffe734ff9..09ee230b5a8 100644 --- a/src/vulkan/runtime/vk_device.c +++ b/src/vulkan/runtime/vk_device.c @@ -636,6 +636,23 @@ vk_common_DeviceWaitIdle(VkDevice _device) return VK_SUCCESS; } +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_RegisterCustomBorderColorEXT(VkDevice device, + const VkSamplerCustomBorderColorCreateInfoEXT* pBorderColor, + VkBool32 requestIndex, + uint32_t *pIndex) +{ + if (requestIndex) + *pIndex = 0; + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_UnregisterCustomBorderColorEXT(VkDevice device, + uint32_t index) +{ } + VkResult vk_device_copy_semaphore_payloads(struct vk_device *device, uint32_t wait_semaphore_count, From 1b8394a374d06947bddb697ae3f711d5450a6b4f Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 11 Jun 2025 13:41:09 -0400 Subject: [PATCH 25/32] vulkan: Add a vk_buffer_address_range() helper --- src/vulkan/runtime/vk_buffer.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/vulkan/runtime/vk_buffer.h b/src/vulkan/runtime/vk_buffer.h index d9b7330da85..73abba977b5 100644 --- a/src/vulkan/runtime/vk_buffer.h +++ b/src/vulkan/runtime/vk_buffer.h @@ -86,6 +86,23 @@ vk_buffer_range(const struct vk_buffer *buffer, } } +static inline VkDeviceAddressRangeEXT +vk_buffer_address_range(const struct vk_buffer *buffer, + VkDeviceSize offset, VkDeviceSize range) +{ + /* Since we're returning a size along with the address, it's safe for this + * helper to automatically handle null descriptor cases by returning a zero + * address and size. + */ + if (buffer == NULL || range == 0) + return (VkDeviceAddressRangeEXT) { .size = 0 }; + + return (VkDeviceAddressRangeEXT) { + .address = vk_buffer_address(buffer, offset), + .size = vk_buffer_range(buffer, offset, range), + }; +} + #ifdef __cplusplus } #endif From f953f89f7e75310b9f20b2d42c10a09501b7a8c2 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Fri, 5 Sep 2025 22:53:05 -0400 Subject: [PATCH 26/32] SPIR-V\ hacks --- src/compiler/spirv/spirv_to_nir.c | 4 +++ src/compiler/spirv/vtn_variables.c | 22 ++++++++++++---- .../runtime/vk_nir_lower_descriptor_heaps.c | 26 ++++++++++++++++--- 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index c4c7bd99cf5..3d8b3900281 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -1378,6 +1378,10 @@ vtn_type_needs_explicit_layout(struct vtn_builder *b, struct vtn_type *type, */ return b->shader->info.has_transform_feedback_varyings; + case vtn_variable_mode_uniform: + /* These are used for descriptor heaps in Vulkan */ + return b->options->environment == NIR_SPIRV_VULKAN; + case vtn_variable_mode_ssbo: case vtn_variable_mode_phys_ssbo: case vtn_variable_mode_ubo: diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index 837ad46512f..dc962438724 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -1341,12 +1341,12 @@ vtn_get_builtin_location(struct vtn_builder *b, break; case SpvBuiltInSamplerHeapEXT: + vtn_assert(*mode == nir_var_uniform); *location = SYSTEM_VALUE_SAMPLER_HEAP_PTR; - set_mode_system_value(b, mode); break; case SpvBuiltInResourceHeapEXT: + vtn_assert(*mode == nir_var_uniform); *location = SYSTEM_VALUE_RESOURCE_HEAP_PTR; - set_mode_system_value(b, mode); break; default: @@ -1797,6 +1797,13 @@ vtn_storage_class_to_mode(struct vtn_builder *b, nir_mode = nir_var_mem_global; break; case SpvStorageClassUniformConstant: + /* This can happen with descriptor heaps and it's UBO */ + if (interface_type == NULL) { + mode = vtn_variable_mode_uniform; + nir_mode = nir_var_uniform; + break; + } + /* interface_type is only NULL when OpTypeForwardPointer is used and * OpTypeForwardPointer can only be used for struct types, not images or * acceleration structures. @@ -1804,8 +1811,7 @@ vtn_storage_class_to_mode(struct vtn_builder *b, if (interface_type) interface_type = vtn_type_without_array(interface_type); - if (interface_type && - interface_type->base_type == vtn_base_type_image && + if (interface_type->base_type == vtn_base_type_image && glsl_type_is_image(interface_type->glsl_image)) { mode = vtn_variable_mode_image; nir_mode = nir_var_image; @@ -2790,7 +2796,13 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, const bool untyped = opcode == SpvOpUntypedVariableKHR; struct vtn_type *ptr_type = vtn_get_type(b, w[1]); - struct vtn_type *data_type = untyped ? vtn_get_type(b, w[4]) : ptr_type->pointed; + struct vtn_type *data_type = + untyped && count > 4 ? vtn_get_type(b, w[4]) : ptr_type->pointed; + if (data_type == NULL) { + data_type = vtn_zalloc(b, struct vtn_type); + data_type->base_type = vtn_base_type_void; + data_type->type = glsl_void_type(); + } SpvStorageClass storage_class = w[3]; diff --git a/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c b/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c index 065890546fe..4cbe3e14089 100644 --- a/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c +++ b/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c @@ -479,6 +479,14 @@ deref_get_root_cast(nir_deref_instr *deref) return deref; } +static bool +var_is_heap_ptr(nir_variable *var) +{ + return var->data.mode == nir_var_uniform && + (var->data.location == SYSTEM_VALUE_SAMPLER_HEAP_PTR || + var->data.location == SYSTEM_VALUE_RESOURCE_HEAP_PTR); +} + static bool deref_cast_is_heap_ptr(nir_deref_instr *deref) { @@ -890,6 +898,18 @@ try_lower_heaps_deref_access(nir_builder *b, nir_intrinsic_instr *intrin, } } +static inline nir_variable * +get_variable(const nir_deref_instr *deref) +{ + while (deref->deref_type != nir_deref_type_var) { + deref = nir_deref_instr_parent(deref); + if (deref == NULL) + return NULL; + } + + return deref->var; +} + static bool lower_heaps_load_buffer_ptr(nir_builder *b, nir_intrinsic_instr *ptr_load, struct heap_mapping_ctx *ctx) @@ -897,12 +917,12 @@ lower_heaps_load_buffer_ptr(nir_builder *b, nir_intrinsic_instr *ptr_load, assert(ptr_load->intrinsic == nir_intrinsic_load_buffer_ptr_deref); nir_deref_instr *deref = nir_src_as_deref(ptr_load->src[0]); - nir_deref_instr *root_cast = deref_get_root_cast(deref); - if (!deref_cast_is_heap_ptr(root_cast)) + nir_variable *var = get_variable(deref); + if (var == NULL || !var_is_heap_ptr(var)) return false; /* We're building an offset. It starts at zero */ - b->cursor = nir_before_instr(&root_cast->instr); + b->cursor = nir_before_impl(b->impl); nir_def *heap_base_offset = nir_imm_int(b, 0); /* This moves the cursor */ From ddb3090c69010fabf22e25d72384f1f688ad5dab Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 3 Feb 2026 14:17:23 +0200 Subject: [PATCH 27/32] spirv: workaround invalid derefs generated by untyped pointers Clone the deref chains for given modes. This is ugly but there is something fundamentally wrong with the current approach for descriptors. Or we need to make the NIR validation a bit more relax to let this stuff slide. --- src/compiler/spirv/spirv_to_nir.c | 98 ++++++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 9 deletions(-) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 3d8b3900281..89def448463 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -681,6 +681,69 @@ spirv_to_gl_access_qualifier(struct vtn_builder *b, } } +static nir_deref_instr * +clone_deref_chain_for_mode(struct vtn_builder *b, + nir_def *value, + nir_variable_mode mode, + const glsl_type *type, + enum pipe_format image_format) +{ + assert(nir_def_is_deref(value)); + + nir_deref_instr *deref = nir_def_as_deref(value); + if (deref->modes & mode) + return deref; + + nir_deref_path path; + nir_deref_path_init(&path, deref, NULL); + + nir_deref_instr *r = NULL; + for (unsigned i = 0; path.path[i] != NULL; i++) { + nir_deref_instr *d = path.path[i]; + switch (d->deref_type) { + case nir_deref_type_var: { + nir_variable *var = nir_variable_clone(d->var, b->shader); + var->type = type; + var->data.mode = mode; + if (glsl_type_is_image(type)) + var->data.image.format = image_format; + nir_shader_add_variable(b->shader, var); + r = nir_build_deref_var(&b->nb, var); + r->modes = mode; + break; + } + + case nir_deref_type_array: + r = nir_build_deref_array(&b->nb, r, d->arr.index.ssa); + break; + + case nir_deref_type_ptr_as_array: + r = nir_build_deref_ptr_as_array(&b->nb, r, d->arr.index.ssa); + break; + + case nir_deref_type_struct: + r = nir_build_deref_struct(&b->nb, r, d->strct.index); + break; + + case nir_deref_type_cast: + r = nir_build_deref_cast_with_alignment(&b->nb, &r->def, mode, + d->type, + d->cast.ptr_stride, + d->cast.align_mul, + d->cast.align_offset); + break; + + default: + UNREACHABLE("invalid type"); + return NULL; + } + } + + nir_deref_path_finish(&path); + + return r; +} + static nir_deref_instr * vtn_get_image(struct vtn_builder *b, uint32_t value_id, enum gl_access_qualifier *access) @@ -689,10 +752,13 @@ vtn_get_image(struct vtn_builder *b, uint32_t value_id, vtn_assert(type->base_type == vtn_base_type_image); if (access) *access |= spirv_to_gl_access_qualifier(b, type->access_qualifier); + nir_variable_mode mode = glsl_type_is_image(type->glsl_image) ? nir_var_image : nir_var_uniform; - return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id), - mode, type->glsl_image, 0); + nir_def *value = &clone_deref_chain_for_mode( + b, vtn_get_nir_ssa(b, value_id), mode, + type->glsl_image, type->image_format)->def; + return nir_build_deref_cast(&b->nb, value, mode, type->glsl_image, 0); } static void @@ -710,8 +776,11 @@ vtn_get_sampler(struct vtn_builder *b, uint32_t value_id) { struct vtn_type *type = vtn_get_value_type(b, value_id); vtn_assert(type->base_type == vtn_base_type_sampler); - return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id), - nir_var_uniform, glsl_bare_sampler_type(), 0); + nir_def *value = &clone_deref_chain_for_mode( + b, vtn_get_nir_ssa(b, value_id), nir_var_uniform, + glsl_bare_sampler_type(), PIPE_FORMAT_NONE)->def; + return nir_build_deref_cast(&b->nb, value, nir_var_uniform, + glsl_bare_sampler_type(), 0); } nir_def * @@ -738,6 +807,11 @@ vtn_get_sampled_image(struct vtn_builder *b, uint32_t value_id) struct vtn_type *type = vtn_get_value_type(b, value_id); vtn_assert(type->base_type == vtn_base_type_sampled_image); nir_def *si_vec2 = vtn_get_nir_ssa(b, value_id); + nir_alu_instr *si_alu_vec2 = nir_def_as_alu(si_vec2); + assert(si_alu_vec2->src[0].swizzle[0] == 0); + assert(si_alu_vec2->src[1].swizzle[0] == 0); + nir_def *image = si_alu_vec2->src[0].src.ssa; + nir_def *sampler = si_alu_vec2->src[1].src.ssa; /* Even though this is a sampled image, we can end up here with a storage * image because OpenCL doesn't distinguish between the two. @@ -747,11 +821,17 @@ vtn_get_sampled_image(struct vtn_builder *b, uint32_t value_id) nir_var_image : nir_var_uniform; struct vtn_sampled_image si = { NULL, }; - si.image = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 0), - image_mode, image_type, 0); - si.sampler = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 1), - nir_var_uniform, - glsl_bare_sampler_type(), 0); + si.image = nir_build_deref_cast( + &b->nb, + &clone_deref_chain_for_mode( + b, image, image_mode, image_type, type->image_format)->def, + image_mode, image_type, 0); + si.sampler = nir_build_deref_cast( + &b->nb, + &clone_deref_chain_for_mode( + b, sampler, nir_var_uniform, glsl_bare_sampler_type(), PIPE_FORMAT_NONE)->def, + nir_var_uniform, + glsl_bare_sampler_type(), 0); return si; } From 097e0d8d30be4e70a34aeb7ffd611b91f1744446 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 5 Feb 2026 18:02:21 +0200 Subject: [PATCH 28/32] spirv: handle OpUntypedImageTexelPointerEXT Signed-off-by: Lionel Landwerlin --- src/compiler/spirv/spirv_to_nir.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 89def448463..3128a428049 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -4311,6 +4311,22 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, val->image->sample = vtn_get_nir_ssa(b, w[5]); val->image->lod = nir_imm_int(&b->nb, 0); return; + } else if (opcode == SpvOpUntypedImageTexelPointerEXT) { + struct vtn_type *type = vtn_get_value_type(b, w[3]); + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_image_pointer); + val->image = vtn_alloc(b, struct vtn_image_pointer); + + val->image->image = nir_build_deref_cast( + &b->nb, + &clone_deref_chain_for_mode( + b, vtn_get_nir_ssa(b, w[4]), nir_var_image, type->glsl_image, type->image_format)->def, + nir_var_image, + type->glsl_image, 0); + val->image->coord = get_image_coord(b, w[5]); + val->image->sample = vtn_get_nir_ssa(b, w[6]); + val->image->lod = nir_imm_int(&b->nb, 0); + return; } struct vtn_image_pointer image; @@ -6846,6 +6862,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpImageTexelPointer: case SpvOpImageQueryFormat: case SpvOpImageQueryOrder: + case SpvOpUntypedImageTexelPointerEXT: vtn_handle_image(b, opcode, w, count); break; From b7503886ac361adaa2b2b18f46d91e7f4abd713f Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 11 Nov 2025 13:10:25 +0200 Subject: [PATCH 29/32] fixup: vulkan,nir: deal with untyped heap image variables --- .../runtime/vk_nir_lower_descriptor_heaps.c | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c b/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c index 4cbe3e14089..479cbcca1e1 100644 --- a/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c +++ b/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c @@ -461,6 +461,21 @@ vk_build_descriptor_heap_address(nir_builder *b, } } +static nir_deref_instr * +deref_get_root_image_cast(nir_deref_instr *deref) +{ + while (true) { + nir_deref_instr *parent = nir_src_as_deref(deref->parent); + if (!parent || parent->deref_type == nir_deref_type_var) + break; + + deref = parent; + } + assert(deref->deref_type == nir_deref_type_cast); + + return deref; +} + static nir_deref_instr * deref_get_root_cast(nir_deref_instr *deref) { @@ -653,11 +668,15 @@ build_deref_heap_offset(nir_builder *b, nir_deref_instr *deref, return vk_build_descriptor_heap_offset(b, mapping, resource_type, binding, index, is_sampler); } else { - nir_deref_instr *root_cast = deref_get_root_cast(deref); + nir_deref_instr *root_cast = deref_get_root_image_cast(deref); if (root_cast == NULL) - return false; + return NULL; - if (!deref_cast_is_heap_ptr(root_cast)) + nir_variable *var = nir_deref_instr_get_variable(nir_deref_instr_parent(root_cast)); + assert(var != NULL); + if (var->data.mode != nir_var_uniform || + (var->data.location != SYSTEM_VALUE_SAMPLER_HEAP_PTR && + var->data.location != SYSTEM_VALUE_RESOURCE_HEAP_PTR)) return NULL; /* We're building an offset. It starts at zero */ From 0ff3e80e99a5f9a1ef99685d41299ec15cffa36e Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 14 Aug 2025 14:31:00 +0300 Subject: [PATCH 30/32] nir/spirv/vulkan: track usage of descriptor heap in shaders Signed-off-by: Lionel Landwerlin --- src/compiler/nir/nir_gather_info.c | 36 ++++++++++++++++++++++++++++++ src/compiler/nir/nir_print.c | 1 + src/compiler/shader_info.h | 7 +++++- src/compiler/spirv/spirv_to_nir.c | 4 ++++ src/vulkan/runtime/vk_pipeline.c | 1 + src/vulkan/runtime/vk_shader.c | 1 + 6 files changed, 49 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index 4d04549c7f0..86d1371e23a 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -396,6 +396,7 @@ intrinsic_is_bindless(nir_intrinsic_instr *instr) case nir_intrinsic_bindless_image_atomic_swap: case nir_intrinsic_bindless_image_descriptor_amd: case nir_intrinsic_bindless_image_format: + case nir_intrinsic_bindless_image_levels: case nir_intrinsic_bindless_image_load: case nir_intrinsic_bindless_image_load_raw_intel: case nir_intrinsic_bindless_image_order: @@ -413,6 +414,36 @@ intrinsic_is_bindless(nir_intrinsic_instr *instr) return false; } +static bool +intrinsic_is_heap(nir_intrinsic_instr *instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_global_addr_to_descriptor: + case nir_intrinsic_image_heap_atomic: + case nir_intrinsic_image_heap_atomic_swap: + case nir_intrinsic_image_heap_descriptor_amd: + case nir_intrinsic_image_heap_format: + case nir_intrinsic_image_heap_levels: + case nir_intrinsic_image_heap_load: + case nir_intrinsic_image_heap_load_raw_intel: + case nir_intrinsic_image_heap_order: + case nir_intrinsic_image_heap_samples: + case nir_intrinsic_image_heap_samples_identical: + case nir_intrinsic_image_heap_size: + case nir_intrinsic_image_heap_sparse_load: + case nir_intrinsic_image_heap_store: + case nir_intrinsic_image_heap_store_raw_intel: + case nir_intrinsic_load_buffer_ptr_deref: + case nir_intrinsic_load_heap_descriptor: + case nir_intrinsic_load_resource_heap_data: + case nir_intrinsic_load_sampler_heap_ptr: + return true; + default: + break; + } + return false; +} + static void gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader) { @@ -883,6 +914,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader) } default: + shader->info.use_descriptor_heap |= intrinsic_is_heap(instr); shader->info.uses_bindless |= intrinsic_is_bindless(instr); if (nir_intrinsic_writes_external_memory(instr)) shader->info.writes_memory = true; @@ -944,6 +976,10 @@ gather_tex_info(nir_tex_instr *instr, nir_shader *shader) nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle) != -1) shader->info.uses_bindless = true; + if (nir_tex_instr_src_index(instr, nir_tex_src_texture_heap_offset) != -1 || + nir_tex_instr_src_index(instr, nir_tex_src_sampler_heap_offset) != -1) + shader->info.use_descriptor_heap = true; + if (instr->embedded_sampler) shader->info.uses_embedded_samplers = true; diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 5104e164569..af3fcf10dd4 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -2961,6 +2961,7 @@ print_shader_info(const struct shader_info *info, FILE *fp) print_nz_bool(fp, "flrp_lowered", info->flrp_lowered); print_nz_bool(fp, "io_lowered", info->io_lowered); print_nz_bool(fp, "writes_memory", info->writes_memory); + print_nz_bool(fp, "use_descriptor_heap", info->use_descriptor_heap); print_nz_unsigned(fp, "derivative_group", info->derivative_group); switch (info->stage) { diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h index 65e95699345..c71bb1fc8b6 100644 --- a/src/compiler/shader_info.h +++ b/src/compiler/shader_info.h @@ -338,7 +338,12 @@ typedef struct shader_info { * generate NaNs, and the only way the GPU saw one was to possibly feed it * in as a uniform. */ - bool use_legacy_math_rules; + bool use_legacy_math_rules:1; + + /** + * Whether the shader uses descriptor heaps + */ + bool use_descriptor_heap:1; /* * Arrangement of invocations used to calculate derivatives in diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 3128a428049..3dbb7b3474c 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -5532,6 +5532,10 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, spirv_capability_to_string(cap)); break; + case SpvCapabilityDescriptorHeapEXT: + b->shader->info.use_descriptor_heap = true; + break; + default: vtn_fail_if(!spirv_capabilities_get(&implemented_capabilities, cap), "Unimplemented SPIR-V capability: %s (%u)", diff --git a/src/vulkan/runtime/vk_pipeline.c b/src/vulkan/runtime/vk_pipeline.c index cf31cebb98f..9144df77585 100644 --- a/src/vulkan/runtime/vk_pipeline.c +++ b/src/vulkan/runtime/vk_pipeline.c @@ -1016,6 +1016,7 @@ vk_pipeline_precompile_shader(struct vk_device *device, NIR_PASS(heaps_progress, nir, vk_nir_lower_descriptor_heaps, desc_map, &embedded_samplers); if (heaps_progress) { + nir->info.use_descriptor_heap = true; NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_uniform | nir_var_image, NULL); NIR_PASS(_, nir, nir_opt_dce); diff --git a/src/vulkan/runtime/vk_shader.c b/src/vulkan/runtime/vk_shader.c index d138ee60b67..5ee20396712 100644 --- a/src/vulkan/runtime/vk_shader.c +++ b/src/vulkan/runtime/vk_shader.c @@ -284,6 +284,7 @@ vk_shader_to_nir(struct vk_device *device, NIR_PASS(heaps_progress, nir, vk_nir_lower_descriptor_heaps, desc_map, embedded_samplers_out); if (heaps_progress) { + nir->info.use_descriptor_heap = true; NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_uniform | nir_var_image, NULL); NIR_PASS(_, nir, nir_opt_dce); From 21cd47e5eddbaf5122235e91ef72567aff90ab11 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 17 Feb 2026 13:11:59 +0200 Subject: [PATCH 31/32] nir/lower_non_uniform: add heap support Signed-off-by: Lionel Landwerlin --- src/compiler/nir/nir_opt_non_uniform_access.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_opt_non_uniform_access.c b/src/compiler/nir/nir_opt_non_uniform_access.c index ffe58e499de..a4ecabc8e78 100644 --- a/src/compiler/nir/nir_opt_non_uniform_access.c +++ b/src/compiler/nir/nir_opt_non_uniform_access.c @@ -24,10 +24,21 @@ #include "nir.h" #include "nir_builder.h" +#include "vulkan/vulkan_core.h" + static bool is_ubo_intrinsic(nir_intrinsic_instr *intrin) { - return intrin->intrinsic == nir_intrinsic_load_ubo; + switch (intrin->intrinsic) { + case nir_intrinsic_load_ubo: + return true; + + case nir_intrinsic_load_buffer_ptr_deref: + return nir_intrinsic_resource_type(intrin) == VK_SPIRV_RESOURCE_TYPE_UNIFORM_BUFFER_BIT_EXT; + + default: + return false; + } } static bool @@ -40,6 +51,10 @@ is_ssbo_intrinsic(nir_intrinsic_instr *intrin) case nir_intrinsic_ssbo_atomic_swap: return true; + case nir_intrinsic_load_buffer_ptr_deref: + return nir_intrinsic_resource_type(intrin) == VK_SPIRV_RESOURCE_TYPE_READ_ONLY_STORAGE_BUFFER_BIT_EXT || + nir_intrinsic_resource_type(intrin) == VK_SPIRV_RESOURCE_TYPE_READ_WRITE_STORAGE_BUFFER_BIT_EXT; + default: return false; } From fc5c6b97502c09e191b5075306b27e82e4dc3aea Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 4 Feb 2026 17:23:00 +0200 Subject: [PATCH 32/32] nir: add a pass to tag non uniform accesses Inferring the information not provided with VK_EXT_descriptor_heap. Signed-off-by: Lionel Landwerlin --- src/compiler/nir/nir.h | 1 + src/compiler/nir/nir_opt_non_uniform_access.c | 134 ++++++++++++++++++ 2 files changed, 135 insertions(+) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index f80a7aa8107..4bfdee5f822 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -6210,6 +6210,7 @@ bool nir_has_non_uniform_access(nir_shader *shader, enum nir_lower_non_uniform_a bool nir_opt_non_uniform_access(nir_shader *shader); bool nir_lower_non_uniform_access(nir_shader *shader, const nir_lower_non_uniform_access_options *options); +bool nir_tag_non_uniform_accesses(nir_shader *shader); typedef struct nir_lower_idiv_options { /* Whether 16-bit floating point arithmetic should be allowed in 8-bit diff --git a/src/compiler/nir/nir_opt_non_uniform_access.c b/src/compiler/nir/nir_opt_non_uniform_access.c index a4ecabc8e78..2452812ec65 100644 --- a/src/compiler/nir/nir_opt_non_uniform_access.c +++ b/src/compiler/nir/nir_opt_non_uniform_access.c @@ -119,6 +119,21 @@ is_image_query_intrinsic(nir_intrinsic_instr *intrin) } } +static bool +is_deref_intrinsic(nir_intrinsic_instr *intrin) +{ + switch (intrin->intrinsic) { + case nir_intrinsic_load_deref: + case nir_intrinsic_store_deref: + case nir_intrinsic_deref_atomic: + case nir_intrinsic_deref_atomic_swap: + return true; + + default: + return false; + } +} + static bool has_non_uniform_tex_access(nir_tex_instr *tex, enum nir_lower_non_uniform_access_type types) { @@ -319,3 +334,122 @@ nir_opt_non_uniform_access(nir_shader *shader) return progress; } + +static bool +tag_non_uniform_tex_access(nir_tex_instr *tex) +{ + bool progress = false; + + for (unsigned i = 0; i < tex->num_srcs; i++) { + switch (tex->src[i].src_type) { + case nir_tex_src_texture_offset: + case nir_tex_src_texture_handle: + case nir_tex_src_texture_deref: + case nir_tex_src_texture_heap_offset: + if (nir_src_is_divergent(&tex->src[i].src)) { + tex->texture_non_uniform = true; + progress = true; + } + break; + + case nir_tex_src_sampler_offset: + case nir_tex_src_sampler_handle: + case nir_tex_src_sampler_deref: + case nir_tex_src_sampler_heap_offset: + if (nir_src_is_divergent(&tex->src[i].src)) { + tex->sampler_non_uniform = true; + progress = true; + } + break; + + case nir_tex_src_offset: + if (nir_src_is_divergent(&tex->src[i].src)) { + tex->offset_non_uniform = true; + progress = true; + } + break; + + default: + break; + } + } + + return progress; +} + +static bool +tag_non_uniform_access_intrin(nir_intrinsic_instr *intrin, unsigned handle_src) +{ + if (has_non_uniform_access_intrin(intrin)) + return false; + + if (!nir_src_is_divergent(&intrin->src[handle_src])) + return false; + + nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) | ACCESS_NON_UNIFORM); + + return true; +} + +static bool +tag_non_uniform_deref_intrin(nir_intrinsic_instr *intrin) +{ + nir_deref_instr *deref = nir_def_as_deref(intrin->src[0].ssa); + + if (!nir_deref_mode_is_one_of(deref, + nir_var_mem_ubo | + nir_var_mem_ssbo)) + return false; + + assert(deref); + while (deref && + deref->deref_type != nir_deref_type_var && + deref->deref_type != nir_deref_type_cast) + deref = nir_deref_instr_parent(deref); + assert(deref); + + if (!nir_src_is_divergent(&deref->parent)) + return false; + + nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) | ACCESS_NON_UNIFORM); + + return true; +} + +static bool +nir_tag_non_uniform_access_instr(nir_builder *b, nir_instr *instr, UNUSED void *user_data) +{ + switch (instr->type) { + case nir_instr_type_tex: + return tag_non_uniform_tex_access(nir_instr_as_tex(instr)); + + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (is_ubo_intrinsic(intrin) || is_ssbo_intrinsic(intrin) || + is_image_access_intrinsic(intrin) || is_image_query_intrinsic(intrin) || + intrin->intrinsic == nir_intrinsic_get_ssbo_size) + return tag_non_uniform_access_intrin(intrin, nir_get_io_index_src_number(intrin)); + if (is_deref_intrinsic(intrin)) + return tag_non_uniform_deref_intrin(intrin); + break; + } + + default: + /* Nothing to do */ + break; + } + + return false; +} + +bool +nir_tag_non_uniform_accesses(nir_shader *shader) +{ + nir_divergence_analysis(shader); + + return nir_shader_instructions_pass(shader, + nir_tag_non_uniform_access_instr, + nir_metadata_live_defs | + nir_metadata_instr_index | + nir_metadata_control_flow, NULL); +}