diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index e62ef2f43a0..eabb185d25f 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -2119,7 +2119,7 @@ radv_get_physical_device_properties(struct radv_physical_device *pdev) .imageViewCaptureReplayDescriptorDataSize = 1, .samplerCaptureReplayDescriptorDataSize = 4, .accelerationStructureCaptureReplayDescriptorDataSize = 1, - .samplerDescriptorSize = RADV_SAMPLER_DESC_SIZE, + .EDBsamplerDescriptorSize = RADV_SAMPLER_DESC_SIZE, .combinedImageSamplerDescriptorSize = radv_get_combined_image_sampler_desc_size(pdev), .sampledImageDescriptorSize = radv_get_sampled_image_desc_size(pdev), .storageImageDescriptorSize = RADV_STORAGE_IMAGE_DESC_SIZE, diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 60d5d24a707..e50ddeed698 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -2566,6 +2566,10 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_warp_id_arm; case SYSTEM_VALUE_WARP_MAX_ID_ARM: return nir_intrinsic_load_warp_max_id_arm; + case SYSTEM_VALUE_SAMPLER_HEAP_PTR: + return nir_intrinsic_load_sampler_heap_ptr; + case SYSTEM_VALUE_RESOURCE_HEAP_PTR: + return nir_intrinsic_load_resource_heap_ptr; default: return nir_num_intrinsics; } @@ -2752,6 +2756,10 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) return SYSTEM_VALUE_WARP_ID_ARM; case nir_intrinsic_load_warp_max_id_arm: return SYSTEM_VALUE_WARP_MAX_ID_ARM; + case nir_intrinsic_load_sampler_heap_ptr: + return SYSTEM_VALUE_SAMPLER_HEAP_PTR; + case nir_intrinsic_load_resource_heap_ptr: + return SYSTEM_VALUE_RESOURCE_HEAP_PTR; default: UNREACHABLE("intrinsic doesn't produce a system value"); } @@ -2947,6 +2955,7 @@ nir_chase_binding(nir_src rsrc) res.var = deref->var; res.desc_set = deref->var->data.descriptor_set; res.binding = deref->var->data.binding; + res.resource_type = deref->var->data.resource_type; return res; } else if (deref->deref_type == nir_deref_type_array && is_image) { if (res.num_indices == ARRAY_SIZE(res.indices)) @@ -3037,6 +3046,7 @@ nir_chase_binding(nir_src rsrc) res.success = true; res.desc_set = nir_intrinsic_desc_set(intrin); res.binding = nir_intrinsic_binding(intrin); + res.resource_type = nir_intrinsic_resource_type(intrin); res.num_indices = 1; res.indices[0] = intrin->src[0]; return res; @@ -3571,6 +3581,8 @@ nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src) case nir_tex_src_texture_2_handle: case nir_tex_src_sampler_2_handle: case nir_tex_src_block_size: + case nir_tex_src_texture_heap_offset: + case nir_tex_src_sampler_heap_offset: return nir_type_uint; case nir_num_tex_src_types: diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 7c552657292..dd61edb03ca 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -712,10 +712,13 @@ typedef struct nir_variable { */ unsigned access : 9; + /* VkSpirvResourceTypeFlagBitsKHR bit index that this variable would have. */ + unsigned resource_type : 9; + /** * Descriptor set binding for sampler or UBO. */ - unsigned descriptor_set : 5; + unsigned descriptor_set; #define NIR_VARIABLE_NO_INDEX ~0 @@ -1832,19 +1835,6 @@ nir_deref_instr_parent(const nir_deref_instr *instr) return nir_src_as_deref(instr->parent); } -static inline nir_variable * -nir_deref_instr_get_variable(const nir_deref_instr *instr) -{ - while (instr->deref_type != nir_deref_type_var) { - if (instr->deref_type == nir_deref_type_cast) - return NULL; - - instr = nir_deref_instr_parent(instr); - } - - return instr->var; -} - bool nir_deref_instr_has_indirect(nir_deref_instr *instr); bool nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr); @@ -1972,12 +1962,6 @@ typedef struct nir_intrinsic_instr { nir_src src[]; } nir_intrinsic_instr; -static inline nir_variable * -nir_intrinsic_get_var(const nir_intrinsic_instr *intrin, unsigned i) -{ - return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i])); -} - typedef enum { /* Memory ordering. */ NIR_MEMORY_ACQUIRE = 1 << 0, @@ -2363,6 +2347,12 @@ typedef enum nir_tex_src_type { */ nir_tex_src_sampler_handle, + /** Texture descriptor heap offset (in bytes) */ + nir_tex_src_texture_heap_offset, + + /** Sampler descriptor heap offset (in bytes) */ + nir_tex_src_sampler_heap_offset, + /** Tex src intrinsic * * This is an intrinsic used before function inlining i.e. before we know @@ -2594,6 +2584,12 @@ typedef struct nir_tex_instr { */ bool sampler_non_uniform; + /** True if this texture instruction uses an embedded sampler. + * + * In this case, sampler_index is the index in embedded sampler table. + */ + bool embedded_sampler; + /** True if the offset is not dynamically uniform */ bool offset_non_uniform; @@ -3165,6 +3161,7 @@ typedef struct nir_binding { nir_variable *var; unsigned desc_set; unsigned binding; + unsigned resource_type; unsigned num_indices; nir_src indices[4]; bool read_first_invocation; @@ -4113,6 +4110,26 @@ nir_shader_get_function_for_name(const nir_shader *shader, const char *name) return NULL; } +static inline nir_variable * +nir_deref_instr_get_variable(const nir_deref_instr *instr) +{ + while (instr->deref_type != nir_deref_type_var) { + if (instr->deref_type == nir_deref_type_cast && + !nir_def_is_deref(instr->parent.ssa)) + return NULL; + + instr = nir_deref_instr_parent(instr); + } + + return instr->var; +} + +static inline nir_variable * +nir_intrinsic_get_var(const nir_intrinsic_instr *intrin, unsigned i) +{ + return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i])); +} + /* * After all functions are forcibly inlined, these passes remove redundant * functions from a shader and library respectively. @@ -6193,6 +6210,7 @@ bool nir_has_non_uniform_access(nir_shader *shader, enum nir_lower_non_uniform_a bool nir_opt_non_uniform_access(nir_shader *shader); bool nir_lower_non_uniform_access(nir_shader *shader, const nir_lower_non_uniform_access_options *options); +bool nir_tag_non_uniform_accesses(nir_shader *shader); typedef struct nir_lower_idiv_options { /* Whether 16-bit floating point arithmetic should be allowed in 8-bit diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c index 65297e77561..0aa7e95af2e 100644 --- a/src/compiler/nir/nir_clone.c +++ b/src/compiler/nir/nir_clone.c @@ -423,6 +423,7 @@ clone_tex(clone_state *state, const nir_tex_instr *tex) ntex->texture_non_uniform = tex->texture_non_uniform; ntex->sampler_non_uniform = tex->sampler_non_uniform; ntex->offset_non_uniform = tex->offset_non_uniform; + ntex->embedded_sampler = tex->embedded_sampler; ntex->backend_flags = tex->backend_flags; diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index feafb307088..bf77844c3c5 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -364,6 +364,8 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_ray_query_global_intel: case nir_intrinsic_load_call_return_address_amd: case nir_intrinsic_load_indirect_address_intel: + case nir_intrinsic_load_sampler_heap_ptr: + case nir_intrinsic_load_resource_heap_ptr: is_divergent = false; break; @@ -653,9 +655,11 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_image_samples_identical: case nir_intrinsic_image_deref_samples_identical: case nir_intrinsic_bindless_image_samples_identical: + case nir_intrinsic_image_heap_samples_identical: case nir_intrinsic_image_fragment_mask_load_amd: case nir_intrinsic_image_deref_fragment_mask_load_amd: case nir_intrinsic_bindless_image_fragment_mask_load_amd: + case nir_intrinsic_image_heap_fragment_mask_load_amd: is_divergent = (src_divergent(instr->src[0], state) && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) || src_divergent(instr->src[1], state) || @@ -674,9 +678,11 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_image_load: case nir_intrinsic_image_deref_load: case nir_intrinsic_bindless_image_load: + case nir_intrinsic_image_heap_load: case nir_intrinsic_image_sparse_load: case nir_intrinsic_image_deref_sparse_load: case nir_intrinsic_bindless_image_sparse_load: + case nir_intrinsic_image_heap_sparse_load: is_divergent = (src_divergent(instr->src[0], state) && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) || src_divergent(instr->src[1], state) || @@ -734,20 +740,27 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_image_levels: case nir_intrinsic_image_deref_levels: case nir_intrinsic_bindless_image_levels: + case nir_intrinsic_image_heap_levels: case nir_intrinsic_image_samples: case nir_intrinsic_image_deref_samples: case nir_intrinsic_bindless_image_samples: + case nir_intrinsic_image_heap_samples: case nir_intrinsic_image_size: case nir_intrinsic_image_deref_size: case nir_intrinsic_bindless_image_size: + case nir_intrinsic_image_heap_size: case nir_intrinsic_image_descriptor_amd: case nir_intrinsic_image_deref_descriptor_amd: + case nir_intrinsic_image_heap_descriptor_amd: case nir_intrinsic_bindless_image_descriptor_amd: case nir_intrinsic_strict_wqm_coord_amd: case nir_intrinsic_copy_deref: case nir_intrinsic_vulkan_resource_index: case nir_intrinsic_vulkan_resource_reindex: case nir_intrinsic_load_vulkan_descriptor: + case nir_intrinsic_load_heap_descriptor: + case nir_intrinsic_load_resource_heap_data: + case nir_intrinsic_global_addr_to_descriptor: case nir_intrinsic_load_input_attachment_target_pan: case nir_intrinsic_load_input_attachment_conv_pan: case nir_intrinsic_load_converted_mem_pan: @@ -761,6 +774,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_sample_positions_amd: case nir_intrinsic_image_deref_load_param_intel: case nir_intrinsic_image_load_raw_intel: + case nir_intrinsic_load_buffer_ptr_deref: case nir_intrinsic_get_ubo_size: case nir_intrinsic_load_ssbo_address: case nir_intrinsic_load_global_bounded: @@ -902,6 +916,8 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_image_atomic_swap: case nir_intrinsic_bindless_image_atomic: case nir_intrinsic_bindless_image_atomic_swap: + case nir_intrinsic_image_heap_atomic: + case nir_intrinsic_image_heap_atomic_swap: case nir_intrinsic_shared_atomic: case nir_intrinsic_shared_atomic_swap: case nir_intrinsic_shared_atomic_nv: diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index 0bab69896c3..86d1371e23a 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -396,6 +396,7 @@ intrinsic_is_bindless(nir_intrinsic_instr *instr) case nir_intrinsic_bindless_image_atomic_swap: case nir_intrinsic_bindless_image_descriptor_amd: case nir_intrinsic_bindless_image_format: + case nir_intrinsic_bindless_image_levels: case nir_intrinsic_bindless_image_load: case nir_intrinsic_bindless_image_load_raw_intel: case nir_intrinsic_bindless_image_order: @@ -413,6 +414,36 @@ intrinsic_is_bindless(nir_intrinsic_instr *instr) return false; } +static bool +intrinsic_is_heap(nir_intrinsic_instr *instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_global_addr_to_descriptor: + case nir_intrinsic_image_heap_atomic: + case nir_intrinsic_image_heap_atomic_swap: + case nir_intrinsic_image_heap_descriptor_amd: + case nir_intrinsic_image_heap_format: + case nir_intrinsic_image_heap_levels: + case nir_intrinsic_image_heap_load: + case nir_intrinsic_image_heap_load_raw_intel: + case nir_intrinsic_image_heap_order: + case nir_intrinsic_image_heap_samples: + case nir_intrinsic_image_heap_samples_identical: + case nir_intrinsic_image_heap_size: + case nir_intrinsic_image_heap_sparse_load: + case nir_intrinsic_image_heap_store: + case nir_intrinsic_image_heap_store_raw_intel: + case nir_intrinsic_load_buffer_ptr_deref: + case nir_intrinsic_load_heap_descriptor: + case nir_intrinsic_load_resource_heap_data: + case nir_intrinsic_load_sampler_heap_ptr: + return true; + default: + break; + } + return false; +} + static void gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader) { @@ -883,6 +914,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader) } default: + shader->info.use_descriptor_heap |= intrinsic_is_heap(instr); shader->info.uses_bindless |= intrinsic_is_bindless(instr); if (nir_intrinsic_writes_external_memory(instr)) shader->info.writes_memory = true; @@ -920,7 +952,10 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader) instr->intrinsic == nir_intrinsic_bindless_image_size || instr->intrinsic == nir_intrinsic_bindless_image_samples || instr->intrinsic == nir_intrinsic_get_ubo_size || - instr->intrinsic == nir_intrinsic_get_ssbo_size) + instr->intrinsic == nir_intrinsic_get_ssbo_size || + instr->intrinsic == nir_intrinsic_image_heap_levels || + instr->intrinsic == nir_intrinsic_image_heap_size || + instr->intrinsic == nir_intrinsic_image_heap_samples) shader->info.uses_resource_info_query = true; break; } @@ -941,6 +976,13 @@ gather_tex_info(nir_tex_instr *instr, nir_shader *shader) nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle) != -1) shader->info.uses_bindless = true; + if (nir_tex_instr_src_index(instr, nir_tex_src_texture_heap_offset) != -1 || + nir_tex_instr_src_index(instr, nir_tex_src_sampler_heap_offset) != -1) + shader->info.use_descriptor_heap = true; + + if (instr->embedded_sampler) + shader->info.uses_embedded_samplers = true; + if (!nir_tex_instr_is_query(instr) && (instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS || instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS)) @@ -1023,6 +1065,7 @@ nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint) shader->info.bit_sizes_float = 0; shader->info.bit_sizes_int = 0; shader->info.uses_bindless = false; + shader->info.uses_embedded_samplers = false; nir_foreach_variable_with_modes(var, shader, nir_var_image | nir_var_uniform) { if (var->data.bindless) diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c index b163b361fb1..60be9d92410 100644 --- a/src/compiler/nir/nir_instr_set.c +++ b/src/compiler/nir/nir_instr_set.c @@ -278,6 +278,7 @@ pack_tex(const nir_tex_instr *instr) PACK(instr->skip_helpers, 1); PACK(instr->texture_non_uniform, 1); PACK(instr->sampler_non_uniform, 1); + PACK(instr->embedded_sampler, 1); PACK(instr->offset_non_uniform, 1); #undef PACK diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 374ff0e97c8..c0330216e15 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -245,6 +245,9 @@ index("unsigned", "offset_shift_nv") # The Vulkan descriptor type for a vulkan_resource_[re]index intrinsic. index("unsigned", "desc_type") +# The Vulkan descriptor type according to VkSpirvResourceTypeFlagsKHR. +index("unsigned", "resource_type") + # The nir_alu_type of input data to a store or conversion index("nir_alu_type", "src_type") @@ -816,6 +819,8 @@ def image(name, src_comp=[], extra_indices=[], **kwargs): indices=[IMAGE_DIM, IMAGE_ARRAY, FORMAT, ACCESS, RANGE_BASE] + extra_indices, **kwargs) intrinsic("bindless_image_" + name, src_comp=[-1] + src_comp, indices=[IMAGE_DIM, IMAGE_ARRAY, FORMAT, ACCESS] + extra_indices, **kwargs) + intrinsic("image_heap_" + name, src_comp=[1] + src_comp, + indices=[IMAGE_DIM, IMAGE_ARRAY, FORMAT, ACCESS] + extra_indices, **kwargs) image("load", src_comp=[4, 1, 1], extra_indices=[DEST_TYPE], dest_comp=0, flags=[CAN_ELIMINATE]) image("sparse_load", src_comp=[4, 1, 1], extra_indices=[DEST_TYPE], dest_comp=0, flags=[CAN_ELIMINATE]) @@ -859,7 +864,7 @@ image("fragment_mask_load_amd", src_comp=[4], dest_comp=1, bit_sizes=[32], flags # corresponds to the tuple (set, binding, index) and computes an index # corresponding to tuple (set, binding, idx + src1). intrinsic("vulkan_resource_index", src_comp=[1], dest_comp=0, - indices=[DESC_SET, BINDING, DESC_TYPE], + indices=[DESC_SET, BINDING, DESC_TYPE, RESOURCE_TYPE], flags=[CAN_ELIMINATE, CAN_REORDER]) intrinsic("vulkan_resource_reindex", src_comp=[0, 1], dest_comp=0, indices=[DESC_TYPE], flags=[CAN_ELIMINATE, CAN_REORDER]) @@ -1436,6 +1441,21 @@ intrinsic("cmat_insert", src_comp=[-1, 1, -1, 1]) intrinsic("cmat_copy", src_comp=[-1, -1]) intrinsic("cmat_transpose", src_comp=[-1, -1]) +# VK_KHR_descriptor_heap +system_value("sampler_heap_ptr", 1, bit_sizes=[64]) +system_value("resource_heap_ptr", 1, bit_sizes=[64]) +# src[] = { deref }. +load("buffer_ptr_deref", [-1], [ACCESS, RESOURCE_TYPE], + flags=[CAN_ELIMINATE, CAN_REORDER]) +# src[] = { offset }. +load("heap_descriptor", [1], [RESOURCE_TYPE], [CAN_ELIMINATE, CAN_REORDER]) +# src[] = { offset }. +load("resource_heap_data", [1], [ALIGN_MUL, ALIGN_OFFSET], + flags=[CAN_ELIMINATE, CAN_REORDER]) +# src[] = { addr }. +intrinsic("global_addr_to_descriptor", src_comp=[1], dest_comp=0, + indices=[RESOURCE_TYPE], flags=[CAN_ELIMINATE, CAN_REORDER]) + # Select an output vertex in a poly GS. Takes the stream-local vertex ID. intrinsic("select_vertex_poly", src_comp=[1], indices=[STREAM_ID]) diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index cbdedca1b77..3fff72cff00 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -1094,10 +1094,11 @@ nir_get_io_offset_src(nir_intrinsic_instr *instr) return idx >= 0 ? &instr->src[idx] : NULL; } -#define IMG_CASE(name) \ - case nir_intrinsic_image_##name: \ - case nir_intrinsic_image_deref_##name: \ - case nir_intrinsic_bindless_image_##name +#define IMG_CASE(name) \ + case nir_intrinsic_image_##name: \ + case nir_intrinsic_image_deref_##name: \ + case nir_intrinsic_bindless_image_##name: \ + case nir_intrinsic_image_heap_##name /** * Return the index or handle source number for a load/store intrinsic or -1 diff --git a/src/compiler/nir/nir_lower_non_uniform_access.c b/src/compiler/nir/nir_lower_non_uniform_access.c index 71705e397e0..9ed656b6f49 100644 --- a/src/compiler/nir/nir_lower_non_uniform_access.c +++ b/src/compiler/nir/nir_lower_non_uniform_access.c @@ -245,6 +245,7 @@ lower_non_uniform_tex_access(struct nu_state *state, nir_tex_instr *tex, case nir_tex_src_texture_handle: case nir_tex_src_texture_deref: case nir_tex_src_texture_2_deref: + case nir_tex_src_texture_heap_offset: if (!tex->texture_non_uniform) continue; if (!(opts->types & base_access_type)) @@ -257,6 +258,7 @@ lower_non_uniform_tex_access(struct nu_state *state, nir_tex_instr *tex, case nir_tex_src_sampler_handle: case nir_tex_src_sampler_deref: case nir_tex_src_sampler_2_deref: + case nir_tex_src_sampler_heap_offset: if (!tex->sampler_non_uniform) continue; if (!(opts->types & base_access_type)) @@ -424,6 +426,16 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl, case nir_intrinsic_image_deref_atomic_swap: case nir_intrinsic_image_deref_samples_identical: case nir_intrinsic_image_deref_fragment_mask_load_amd: + case nir_intrinsic_image_heap_load: + case nir_intrinsic_image_heap_sparse_load: + case nir_intrinsic_image_heap_store: + case nir_intrinsic_image_heap_atomic: + case nir_intrinsic_image_heap_atomic_swap: + case nir_intrinsic_image_heap_levels: + case nir_intrinsic_image_heap_size: + case nir_intrinsic_image_heap_samples: + case nir_intrinsic_image_heap_samples_identical: + case nir_intrinsic_image_heap_fragment_mask_load_amd: if ((options->types & nir_lower_non_uniform_image_access) && lower_non_uniform_access_intrin(&state, intrin, 0, nir_lower_non_uniform_image_access)) progress = true; diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c index ed1ad705ef0..c2d8f7c383b 100644 --- a/src/compiler/nir/nir_lower_tex.c +++ b/src/compiler/nir/nir_lower_tex.c @@ -376,6 +376,7 @@ sample_plane(nir_builder *b, nir_tex_instr *tex, int plane, plane_tex->dest_type = nir_type_float | tex->def.bit_size; plane_tex->coord_components = 2; + plane_tex->embedded_sampler = tex->embedded_sampler; plane_tex->texture_index = tex->texture_index; plane_tex->sampler_index = tex->sampler_index; plane_tex->can_speculate = tex->can_speculate; @@ -937,6 +938,7 @@ lower_tex_to_txd(nir_builder *b, nir_tex_instr *tex) txd->sampler_dim = tex->sampler_dim; txd->dest_type = tex->dest_type; txd->coord_components = tex->coord_components; + txd->embedded_sampler = tex->embedded_sampler; txd->texture_index = tex->texture_index; txd->sampler_index = tex->sampler_index; txd->is_array = tex->is_array; @@ -980,6 +982,7 @@ lower_txb_to_txl(nir_builder *b, nir_tex_instr *tex) txl->sampler_dim = tex->sampler_dim; txl->dest_type = tex->dest_type; txl->coord_components = tex->coord_components; + txl->embedded_sampler = tex->embedded_sampler; txl->texture_index = tex->texture_index; txl->sampler_index = tex->sampler_index; txl->is_array = tex->is_array; @@ -1237,6 +1240,7 @@ lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex) tex_copy->is_gather_implicit_lod = tex->is_gather_implicit_lod; tex_copy->component = tex->component; tex_copy->dest_type = tex->dest_type; + tex_copy->embedded_sampler = tex->embedded_sampler; tex_copy->texture_index = tex->texture_index; tex_copy->sampler_index = tex->sampler_index; tex_copy->backend_flags = tex->backend_flags; diff --git a/src/compiler/nir/nir_opt_non_uniform_access.c b/src/compiler/nir/nir_opt_non_uniform_access.c index 27fdc4daa04..2452812ec65 100644 --- a/src/compiler/nir/nir_opt_non_uniform_access.c +++ b/src/compiler/nir/nir_opt_non_uniform_access.c @@ -24,10 +24,21 @@ #include "nir.h" #include "nir_builder.h" +#include "vulkan/vulkan_core.h" + static bool is_ubo_intrinsic(nir_intrinsic_instr *intrin) { - return intrin->intrinsic == nir_intrinsic_load_ubo; + switch (intrin->intrinsic) { + case nir_intrinsic_load_ubo: + return true; + + case nir_intrinsic_load_buffer_ptr_deref: + return nir_intrinsic_resource_type(intrin) == VK_SPIRV_RESOURCE_TYPE_UNIFORM_BUFFER_BIT_EXT; + + default: + return false; + } } static bool @@ -40,6 +51,10 @@ is_ssbo_intrinsic(nir_intrinsic_instr *intrin) case nir_intrinsic_ssbo_atomic_swap: return true; + case nir_intrinsic_load_buffer_ptr_deref: + return nir_intrinsic_resource_type(intrin) == VK_SPIRV_RESOURCE_TYPE_READ_ONLY_STORAGE_BUFFER_BIT_EXT || + nir_intrinsic_resource_type(intrin) == VK_SPIRV_RESOURCE_TYPE_READ_WRITE_STORAGE_BUFFER_BIT_EXT; + default: return false; } @@ -87,6 +102,31 @@ is_image_query_intrinsic(nir_intrinsic_instr *intrin) case nir_intrinsic_image_deref_size: case nir_intrinsic_image_deref_samples: case nir_intrinsic_image_deref_levels: + case nir_intrinsic_image_deref_fragment_mask_load_amd: + case nir_intrinsic_image_heap_load: + case nir_intrinsic_image_heap_sparse_load: + case nir_intrinsic_image_heap_store: + case nir_intrinsic_image_heap_atomic: + case nir_intrinsic_image_heap_atomic_swap: + case nir_intrinsic_image_heap_size: + case nir_intrinsic_image_heap_samples: + case nir_intrinsic_image_heap_levels: + case nir_intrinsic_image_heap_fragment_mask_load_amd: + return true; + + default: + return false; + } +} + +static bool +is_deref_intrinsic(nir_intrinsic_instr *intrin) +{ + switch (intrin->intrinsic) { + case nir_intrinsic_load_deref: + case nir_intrinsic_store_deref: + case nir_intrinsic_deref_atomic: + case nir_intrinsic_deref_atomic_swap: return true; default: @@ -209,6 +249,7 @@ opt_non_uniform_tex_access(nir_tex_instr *tex) case nir_tex_src_texture_offset: case nir_tex_src_texture_handle: case nir_tex_src_texture_deref: + case nir_tex_src_texture_heap_offset: if (tex->texture_non_uniform && !nir_src_is_divergent(&tex->src[i].src)) { tex->texture_non_uniform = false; progress = true; @@ -218,6 +259,7 @@ opt_non_uniform_tex_access(nir_tex_instr *tex) case nir_tex_src_sampler_offset: case nir_tex_src_sampler_handle: case nir_tex_src_sampler_deref: + case nir_tex_src_sampler_heap_offset: if (tex->sampler_non_uniform && !nir_src_is_divergent(&tex->src[i].src)) { tex->sampler_non_uniform = false; progress = true; @@ -292,3 +334,122 @@ nir_opt_non_uniform_access(nir_shader *shader) return progress; } + +static bool +tag_non_uniform_tex_access(nir_tex_instr *tex) +{ + bool progress = false; + + for (unsigned i = 0; i < tex->num_srcs; i++) { + switch (tex->src[i].src_type) { + case nir_tex_src_texture_offset: + case nir_tex_src_texture_handle: + case nir_tex_src_texture_deref: + case nir_tex_src_texture_heap_offset: + if (nir_src_is_divergent(&tex->src[i].src)) { + tex->texture_non_uniform = true; + progress = true; + } + break; + + case nir_tex_src_sampler_offset: + case nir_tex_src_sampler_handle: + case nir_tex_src_sampler_deref: + case nir_tex_src_sampler_heap_offset: + if (nir_src_is_divergent(&tex->src[i].src)) { + tex->sampler_non_uniform = true; + progress = true; + } + break; + + case nir_tex_src_offset: + if (nir_src_is_divergent(&tex->src[i].src)) { + tex->offset_non_uniform = true; + progress = true; + } + break; + + default: + break; + } + } + + return progress; +} + +static bool +tag_non_uniform_access_intrin(nir_intrinsic_instr *intrin, unsigned handle_src) +{ + if (has_non_uniform_access_intrin(intrin)) + return false; + + if (!nir_src_is_divergent(&intrin->src[handle_src])) + return false; + + nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) | ACCESS_NON_UNIFORM); + + return true; +} + +static bool +tag_non_uniform_deref_intrin(nir_intrinsic_instr *intrin) +{ + nir_deref_instr *deref = nir_def_as_deref(intrin->src[0].ssa); + + if (!nir_deref_mode_is_one_of(deref, + nir_var_mem_ubo | + nir_var_mem_ssbo)) + return false; + + assert(deref); + while (deref && + deref->deref_type != nir_deref_type_var && + deref->deref_type != nir_deref_type_cast) + deref = nir_deref_instr_parent(deref); + assert(deref); + + if (!nir_src_is_divergent(&deref->parent)) + return false; + + nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) | ACCESS_NON_UNIFORM); + + return true; +} + +static bool +nir_tag_non_uniform_access_instr(nir_builder *b, nir_instr *instr, UNUSED void *user_data) +{ + switch (instr->type) { + case nir_instr_type_tex: + return tag_non_uniform_tex_access(nir_instr_as_tex(instr)); + + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (is_ubo_intrinsic(intrin) || is_ssbo_intrinsic(intrin) || + is_image_access_intrinsic(intrin) || is_image_query_intrinsic(intrin) || + intrin->intrinsic == nir_intrinsic_get_ssbo_size) + return tag_non_uniform_access_intrin(intrin, nir_get_io_index_src_number(intrin)); + if (is_deref_intrinsic(intrin)) + return tag_non_uniform_deref_intrin(intrin); + break; + } + + default: + /* Nothing to do */ + break; + } + + return false; +} + +bool +nir_tag_non_uniform_accesses(nir_shader *shader) +{ + nir_divergence_analysis(shader); + + return nir_shader_instructions_pass(shader, + nir_tag_non_uniform_access_instr, + nir_metadata_live_defs | + nir_metadata_instr_index | + nir_metadata_control_flow, NULL); +} diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 387190a57cd..c1d97024065 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -1238,6 +1238,33 @@ vulkan_descriptor_type_name(VkDescriptorType type) } } +static const char * +vk_spirv_resource_type_name(VkSpirvResourceTypeFlagBitsEXT type) +{ + switch (type) { + case VK_SPIRV_RESOURCE_TYPE_SAMPLER_BIT_EXT: + return "sampler"; + case VK_SPIRV_RESOURCE_TYPE_SAMPLED_IMAGE_BIT_EXT: + return "texture"; + case VK_SPIRV_RESOURCE_TYPE_READ_ONLY_IMAGE_BIT_EXT: + return "RO-image"; + case VK_SPIRV_RESOURCE_TYPE_READ_WRITE_IMAGE_BIT_EXT: + return "RW-image"; + case VK_SPIRV_RESOURCE_TYPE_COMBINED_SAMPLED_IMAGE_BIT_EXT: + return "texture+sampler"; + case VK_SPIRV_RESOURCE_TYPE_UNIFORM_BUFFER_BIT_EXT: + return "UBO"; + case VK_SPIRV_RESOURCE_TYPE_READ_ONLY_STORAGE_BUFFER_BIT_EXT: + return "RO-SSBO"; + case VK_SPIRV_RESOURCE_TYPE_READ_WRITE_STORAGE_BUFFER_BIT_EXT: + return "RW-SSBO"; + case VK_SPIRV_RESOURCE_TYPE_ACCELERATION_STRUCTURE_BIT_EXT: + return "accel-struct"; + default: + return "unknown"; + } +} + static void print_alu_type(nir_alu_type type, print_state *state) { @@ -1425,6 +1452,13 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) break; } + case NIR_INTRINSIC_RESOURCE_TYPE: { + VkSpirvResourceTypeFlagBitsEXT res_type = + nir_intrinsic_resource_type(instr); + fprintf(fp, "resource_type=%s", vk_spirv_resource_type_name(res_type)); + break; + } + case NIR_INTRINSIC_SRC_TYPE: { fprintf(fp, "src_type="); print_alu_type(nir_intrinsic_src_type(instr), state); @@ -2075,6 +2109,12 @@ print_tex_instr(nir_tex_instr *instr, print_state *state) case nir_tex_src_box_size: fprintf(fp, "(box_size)"); break; + case nir_tex_src_texture_heap_offset: + fprintf(fp, "(texture_heap_offset)"); + break; + case nir_tex_src_sampler_heap_offset: + fprintf(fp, "(sampler_heap_offset)"); + break; case nir_tex_src_plane: fprintf(fp, "(plane)"); break; @@ -2918,6 +2958,7 @@ print_shader_info(const struct shader_info *info, FILE *fp) print_nz_bool(fp, "flrp_lowered", info->flrp_lowered); print_nz_bool(fp, "io_lowered", info->io_lowered); print_nz_bool(fp, "writes_memory", info->writes_memory); + print_nz_bool(fp, "use_descriptor_heap", info->use_descriptor_heap); print_nz_unsigned(fp, "derivative_group", info->derivative_group); switch (info->stage) { diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c index 24fb6ca16d3..5d9f12a6842 100644 --- a/src/compiler/nir/nir_serialize.c +++ b/src/compiler/nir/nir_serialize.c @@ -1374,10 +1374,11 @@ union packed_tex_data { unsigned texture_non_uniform : 1; unsigned sampler_non_uniform : 1; unsigned offset_non_uniform : 1; + unsigned embedded_sampler : 1; unsigned array_is_lowered_cube : 1; unsigned is_gather_implicit_lod : 1; unsigned can_speculate : 1; - unsigned unused : 3; /* Mark unused for valgrind. */ + unsigned unused : 2; /* Mark unused for valgrind. */ } u; }; @@ -1415,6 +1416,7 @@ write_tex(write_ctx *ctx, const nir_tex_instr *tex) .u.texture_non_uniform = tex->texture_non_uniform, .u.sampler_non_uniform = tex->sampler_non_uniform, .u.offset_non_uniform = tex->offset_non_uniform, + .u.embedded_sampler = tex->embedded_sampler, .u.array_is_lowered_cube = tex->array_is_lowered_cube, .u.is_gather_implicit_lod = tex->is_gather_implicit_lod, .u.can_speculate = tex->can_speculate, @@ -1456,6 +1458,7 @@ read_tex(read_ctx *ctx, union packed_instr header) tex->texture_non_uniform = packed.u.texture_non_uniform; tex->sampler_non_uniform = packed.u.sampler_non_uniform; tex->offset_non_uniform = packed.u.offset_non_uniform; + tex->embedded_sampler = packed.u.embedded_sampler; tex->array_is_lowered_cube = packed.u.array_is_lowered_cube; tex->is_gather_implicit_lod = packed.u.is_gather_implicit_lod; tex->can_speculate = packed.u.can_speculate; diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c index 1d5a2bc2438..4e59f5b3e37 100644 --- a/src/compiler/shader_enums.c +++ b/src/compiler/shader_enums.c @@ -459,6 +459,8 @@ gl_system_value_name(gl_system_value sysval) ENUM(SYSTEM_VALUE_CORE_MAX_ID_ARM), ENUM(SYSTEM_VALUE_WARP_ID_ARM), ENUM(SYSTEM_VALUE_WARP_MAX_ID_ARM), + ENUM(SYSTEM_VALUE_SAMPLER_HEAP_PTR), + ENUM(SYSTEM_VALUE_RESOURCE_HEAP_PTR), }; STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX); return NAME(sysval); diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h index 66a77a72466..2b6f920395c 100644 --- a/src/compiler/shader_enums.h +++ b/src/compiler/shader_enums.h @@ -969,6 +969,10 @@ typedef enum SYSTEM_VALUE_WARP_ID_ARM, SYSTEM_VALUE_WARP_MAX_ID_ARM, + /* SPV_KHR_sampler_heap */ + SYSTEM_VALUE_SAMPLER_HEAP_PTR, + SYSTEM_VALUE_RESOURCE_HEAP_PTR, + SYSTEM_VALUE_MAX /**< Number of values */ } gl_system_value; diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h index df32b4e6938..c71bb1fc8b6 100644 --- a/src/compiler/shader_info.h +++ b/src/compiler/shader_info.h @@ -276,6 +276,9 @@ typedef struct shader_info { /* Whether ARB_bindless_texture ops or variables are used */ bool uses_bindless : 1; + /* Number of embedded samplers used by this shader */ + bool uses_embedded_samplers : 1; + /** * Shared memory types have explicit layout set. Used for * SPV_KHR_workgroup_storage_explicit_layout. @@ -335,7 +338,12 @@ typedef struct shader_info { * generate NaNs, and the only way the GPU saw one was to possibly feed it * in as a uniform. */ - bool use_legacy_math_rules; + bool use_legacy_math_rules:1; + + /** + * Whether the shader uses descriptor heaps + */ + bool use_descriptor_heap:1; /* * Arrangement of invocations used to calculate derivatives in diff --git a/src/compiler/spirv/nir_spirv.h b/src/compiler/spirv/nir_spirv.h index c7096197dd2..79549bd1760 100644 --- a/src/compiler/spirv/nir_spirv.h +++ b/src/compiler/spirv/nir_spirv.h @@ -111,6 +111,16 @@ struct spirv_to_nir_options { */ uint32_t min_ssbo_alignment; + /* These must be identical to the values set in + * VkPhysicalDeviceDescriptorHeapPropertiesEXT + */ + uint32_t sampler_descriptor_size; + uint32_t sampler_descriptor_alignment; + uint32_t image_descriptor_size; + uint32_t image_descriptor_alignment; + uint32_t buffer_descriptor_size; + uint32_t buffer_descriptor_alignment; + const nir_shader *clc_shader; struct { diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 93bacdf07a4..3dbb7b3474c 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -81,6 +81,7 @@ static const struct spirv_capabilities implemented_capabilities = { .DenormFlushToZero = true, .DenormPreserve = true, .DerivativeControl = true, + .DescriptorHeapEXT = true, .DeviceGroup = true, .DotProduct = true, .DotProductBFloat16AccVALVE = true, @@ -465,6 +466,7 @@ vtn_base_type_to_string(enum vtn_base_type t) CASE(function); CASE(event); CASE(cooperative_matrix); + CASE(buffer); } #undef CASE UNREACHABLE("unknown base type"); @@ -679,6 +681,69 @@ spirv_to_gl_access_qualifier(struct vtn_builder *b, } } +static nir_deref_instr * +clone_deref_chain_for_mode(struct vtn_builder *b, + nir_def *value, + nir_variable_mode mode, + const glsl_type *type, + enum pipe_format image_format) +{ + assert(nir_def_is_deref(value)); + + nir_deref_instr *deref = nir_def_as_deref(value); + if (deref->modes & mode) + return deref; + + nir_deref_path path; + nir_deref_path_init(&path, deref, NULL); + + nir_deref_instr *r = NULL; + for (unsigned i = 0; path.path[i] != NULL; i++) { + nir_deref_instr *d = path.path[i]; + switch (d->deref_type) { + case nir_deref_type_var: { + nir_variable *var = nir_variable_clone(d->var, b->shader); + var->type = type; + var->data.mode = mode; + if (glsl_type_is_image(type)) + var->data.image.format = image_format; + nir_shader_add_variable(b->shader, var); + r = nir_build_deref_var(&b->nb, var); + r->modes = mode; + break; + } + + case nir_deref_type_array: + r = nir_build_deref_array(&b->nb, r, d->arr.index.ssa); + break; + + case nir_deref_type_ptr_as_array: + r = nir_build_deref_ptr_as_array(&b->nb, r, d->arr.index.ssa); + break; + + case nir_deref_type_struct: + r = nir_build_deref_struct(&b->nb, r, d->strct.index); + break; + + case nir_deref_type_cast: + r = nir_build_deref_cast_with_alignment(&b->nb, &r->def, mode, + d->type, + d->cast.ptr_stride, + d->cast.align_mul, + d->cast.align_offset); + break; + + default: + UNREACHABLE("invalid type"); + return NULL; + } + } + + nir_deref_path_finish(&path); + + return r; +} + static nir_deref_instr * vtn_get_image(struct vtn_builder *b, uint32_t value_id, enum gl_access_qualifier *access) @@ -687,10 +752,13 @@ vtn_get_image(struct vtn_builder *b, uint32_t value_id, vtn_assert(type->base_type == vtn_base_type_image); if (access) *access |= spirv_to_gl_access_qualifier(b, type->access_qualifier); + nir_variable_mode mode = glsl_type_is_image(type->glsl_image) ? nir_var_image : nir_var_uniform; - return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id), - mode, type->glsl_image, 0); + nir_def *value = &clone_deref_chain_for_mode( + b, vtn_get_nir_ssa(b, value_id), mode, + type->glsl_image, type->image_format)->def; + return nir_build_deref_cast(&b->nb, value, mode, type->glsl_image, 0); } static void @@ -708,8 +776,11 @@ vtn_get_sampler(struct vtn_builder *b, uint32_t value_id) { struct vtn_type *type = vtn_get_value_type(b, value_id); vtn_assert(type->base_type == vtn_base_type_sampler); - return nir_build_deref_cast(&b->nb, vtn_get_nir_ssa(b, value_id), - nir_var_uniform, glsl_bare_sampler_type(), 0); + nir_def *value = &clone_deref_chain_for_mode( + b, vtn_get_nir_ssa(b, value_id), nir_var_uniform, + glsl_bare_sampler_type(), PIPE_FORMAT_NONE)->def; + return nir_build_deref_cast(&b->nb, value, nir_var_uniform, + glsl_bare_sampler_type(), 0); } nir_def * @@ -736,6 +807,11 @@ vtn_get_sampled_image(struct vtn_builder *b, uint32_t value_id) struct vtn_type *type = vtn_get_value_type(b, value_id); vtn_assert(type->base_type == vtn_base_type_sampled_image); nir_def *si_vec2 = vtn_get_nir_ssa(b, value_id); + nir_alu_instr *si_alu_vec2 = nir_def_as_alu(si_vec2); + assert(si_alu_vec2->src[0].swizzle[0] == 0); + assert(si_alu_vec2->src[1].swizzle[0] == 0); + nir_def *image = si_alu_vec2->src[0].src.ssa; + nir_def *sampler = si_alu_vec2->src[1].src.ssa; /* Even though this is a sampled image, we can end up here with a storage * image because OpenCL doesn't distinguish between the two. @@ -745,11 +821,17 @@ vtn_get_sampled_image(struct vtn_builder *b, uint32_t value_id) nir_var_image : nir_var_uniform; struct vtn_sampled_image si = { NULL, }; - si.image = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 0), - image_mode, image_type, 0); - si.sampler = nir_build_deref_cast(&b->nb, nir_channel(&b->nb, si_vec2, 1), - nir_var_uniform, - glsl_bare_sampler_type(), 0); + si.image = nir_build_deref_cast( + &b->nb, + &clone_deref_chain_for_mode( + b, image, image_mode, image_type, type->image_format)->def, + image_mode, image_type, 0); + si.sampler = nir_build_deref_cast( + &b->nb, + &clone_deref_chain_for_mode( + b, sampler, nir_var_uniform, glsl_bare_sampler_type(), PIPE_FORMAT_NONE)->def, + nir_var_uniform, + glsl_bare_sampler_type(), 0); return si; } @@ -1127,6 +1209,7 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, case SpvOpDecorate: case SpvOpDecorateId: case SpvOpMemberDecorate: + case SpvOpMemberDecorateIdEXT: case SpvOpDecorateString: case SpvOpMemberDecorateString: case SpvOpExecutionMode: @@ -1141,6 +1224,7 @@ vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, dec->scope = VTN_DEC_DECORATION; break; case SpvOpMemberDecorate: + case SpvOpMemberDecorateIdEXT: case SpvOpMemberDecorateString: dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++); vtn_fail_if(dec->scope < VTN_DEC_STRUCT_MEMBER0, /* overflow */ @@ -1269,6 +1353,9 @@ vtn_types_compatible(struct vtn_builder *b, case vtn_base_type_cooperative_matrix: return t1->type == t2->type; + case vtn_base_type_buffer: + return t1->storage_class == t2->storage_class; + case vtn_base_type_array: return t1->length == t2->length && vtn_types_compatible(b, t1->array_element, t2->array_element); @@ -1330,6 +1417,7 @@ vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) case vtn_base_type_accel_struct: case vtn_base_type_ray_query: case vtn_base_type_cooperative_matrix: + case vtn_base_type_buffer: /* Nothing more to do */ break; @@ -1370,6 +1458,10 @@ vtn_type_needs_explicit_layout(struct vtn_builder *b, struct vtn_type *type, */ return b->shader->info.has_transform_feedback_varyings; + case vtn_variable_mode_uniform: + /* These are used for descriptor heaps in Vulkan */ + return b->options->environment == NIR_SPIRV_VULKAN; + case vtn_variable_mode_ssbo: case vtn_variable_mode_phys_ssbo: case vtn_variable_mode_ubo: @@ -1525,6 +1617,8 @@ array_stride_decoration_cb(struct vtn_builder *b, vtn_fail_if(dec->operands[0] == 0, "ArrayStride must be non-zero"); type->stride = dec->operands[0]; } + } else if (dec->decoration == SpvDecorationArrayStrideIdEXT) { + type->stride = vtn_constant_uint(b, dec->operands[0]); } } @@ -1590,6 +1684,12 @@ struct_member_decoration_cb(struct vtn_builder *b, ctx->type->offsets[member] = dec->operands[0]; ctx->fields[member].offset = dec->operands[0]; break; + case SpvDecorationOffsetIdEXT: { + uint32_t offset = vtn_constant_uint(b, dec->operands[0]); + ctx->type->offsets[member] = offset; + ctx->fields[member].offset = offset; + break; + } case SpvDecorationMatrixStride: /* Handled as a second pass */ break; @@ -1770,6 +1870,7 @@ type_decoration_cb(struct vtn_builder *b, switch (dec->decoration) { case SpvDecorationArrayStride: + case SpvDecorationArrayStrideIdEXT: vtn_assert(type->base_type == vtn_base_type_array || type->base_type == vtn_base_type_pointer); break; @@ -1808,6 +1909,7 @@ type_decoration_cb(struct vtn_builder *b, case SpvDecorationXfbBuffer: case SpvDecorationXfbStride: case SpvDecorationUserSemantic: + case SpvDecorationOffsetIdEXT: vtn_warn("Decoration only allowed for struct members: %s", spirv_decoration_to_string(dec->decoration)); break; @@ -2438,6 +2540,21 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, val->type->type = b->shader->info.cs.ptr_size == 64 ? glsl_int64_t_type() : glsl_int_type(); break; + case SpvOpTypeBufferEXT: { + SpvStorageClass storage_class = w[2]; + vtn_fail_if(storage_class != SpvStorageClassUniform && + storage_class != SpvStorageClassStorageBuffer, + "Storage Class must be Uniform or StorageBuffer."); + + const nir_address_format addr_format = vtn_mode_to_address_format(b, + vtn_storage_class_to_mode(b, storage_class, NULL, NULL)); + + val->type->base_type = vtn_base_type_buffer; + val->type->storage_class = storage_class; + val->type->type = nir_address_format_to_glsl_type(addr_format); + break; + } + case SpvOpTypeDeviceEvent: case SpvOpTypeReserveId: case SpvOpTypeQueue: @@ -3014,6 +3131,37 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, val->is_null_constant = true; break; + case SpvOpConstantSizeOfEXT: { + vtn_fail_if(val->type->type != glsl_uint_type() && + val->type->type != glsl_int_type(), + "Result Type must be a 32-bit integer type scalar."); + + struct vtn_type *type = vtn_get_type(b, w[3]); + switch (type->base_type) { + case vtn_base_type_image: + val->constant->values[0].u32 = + align(b->options->image_descriptor_size, + b->options->image_descriptor_alignment); + break; + case vtn_base_type_sampler: + val->constant->values[0].u32 = + align(b->options->sampler_descriptor_size, + b->options->sampler_descriptor_alignment); + break; + case vtn_base_type_accel_struct: + case vtn_base_type_buffer: + val->constant->values[0].u32 = + align(b->options->buffer_descriptor_size, + b->options->buffer_descriptor_alignment); + break; + default: + vtn_fail("Type must be an OpTypeBufferKHR, OpTypeImage, " + "OpTypeAccelerationStructureKHR, OpTypeTensorARM, or " + "OpTypeSampler instruction."); + } + break; + } + default: vtn_fail_with_opcode("Unhandled opcode", opcode); } @@ -4163,6 +4311,22 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, val->image->sample = vtn_get_nir_ssa(b, w[5]); val->image->lod = nir_imm_int(&b->nb, 0); return; + } else if (opcode == SpvOpUntypedImageTexelPointerEXT) { + struct vtn_type *type = vtn_get_value_type(b, w[3]); + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_image_pointer); + val->image = vtn_alloc(b, struct vtn_image_pointer); + + val->image->image = nir_build_deref_cast( + &b->nb, + &clone_deref_chain_for_mode( + b, vtn_get_nir_ssa(b, w[4]), nir_var_image, type->glsl_image, type->image_format)->def, + nir_var_image, + type->glsl_image, 0); + val->image->coord = get_image_coord(b, w[5]); + val->image->sample = vtn_get_nir_ssa(b, w[6]); + val->image->lod = nir_imm_int(&b->nb, 0); + return; } struct vtn_image_pointer image; @@ -5368,6 +5532,10 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, spirv_capability_to_string(cap)); break; + case SpvCapabilityDescriptorHeapEXT: + b->shader->info.use_descriptor_heap = true; + break; + default: vtn_fail_if(!spirv_capabilities_get(&implemented_capabilities, cap), "Unimplemented SPIR-V capability: %s (%u)", @@ -5461,6 +5629,7 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpDecorate: case SpvOpDecorateId: case SpvOpMemberDecorate: + case SpvOpMemberDecorateIdEXT: case SpvOpGroupDecorate: case SpvOpGroupMemberDecorate: case SpvOpDecorateString: @@ -6010,11 +6179,8 @@ vtn_handle_execution_mode_id(struct vtn_builder *b, struct vtn_value *entry_poin } static bool -vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, - const uint32_t *w, unsigned count) +spv_op_is_preamble(SpvOp opcode) { - vtn_set_instruction_result_type(b, opcode, w, count); - switch (opcode) { case SpvOpSource: case SpvOpSourceContinued: @@ -6032,13 +6198,29 @@ vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpDecorate: case SpvOpDecorateId: case SpvOpMemberDecorate: + case SpvOpMemberDecorateIdEXT: case SpvOpGroupDecorate: case SpvOpGroupMemberDecorate: case SpvOpDecorateString: case SpvOpMemberDecorateString: - vtn_fail("Invalid opcode types and variables section"); - break; + return true; + default: + return false; + } +} + +static bool +vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + vtn_fail_if(spv_op_is_preamble(opcode), + "Invalid opcode in the types and variables section: %s", + spirv_op_to_string(opcode)); + + vtn_set_instruction_result_type(b, opcode, w, count); + + switch (opcode) { case SpvOpTypeVoid: case SpvOpTypeBool: case SpvOpTypeInt: @@ -6064,6 +6246,7 @@ vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpTypeRayQueryKHR: case SpvOpTypeCooperativeMatrixKHR: case SpvOpTypeUntypedPointerKHR: + case SpvOpTypeBufferEXT: vtn_handle_type(b, opcode, w, count); break; @@ -6079,6 +6262,7 @@ vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpSpecConstantComposite: case SpvOpSpecConstantCompositeReplicateEXT: case SpvOpSpecConstantOp: + case SpvOpConstantSizeOfEXT: vtn_handle_constant(b, opcode, w, count); break; @@ -6639,6 +6823,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpUntypedInBoundsAccessChainKHR: case SpvOpUntypedInBoundsPtrAccessChainKHR: case SpvOpUntypedArrayLengthKHR: + case SpvOpBufferPointerEXT: vtn_handle_variables(b, opcode, w, count); break; @@ -6681,6 +6866,7 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, case SpvOpImageTexelPointer: case SpvOpImageQueryFormat: case SpvOpImageQueryOrder: + case SpvOpUntypedImageTexelPointerEXT: vtn_handle_image(b, opcode, w, count); break; diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h index d1d465b2893..e33f798b0f2 100644 --- a/src/compiler/spirv/vtn_private.h +++ b/src/compiler/spirv/vtn_private.h @@ -297,6 +297,7 @@ enum vtn_base_type { vtn_base_type_function, vtn_base_type_event, vtn_base_type_cooperative_matrix, + vtn_base_type_buffer, }; struct vtn_type { @@ -364,7 +365,7 @@ struct vtn_type { bool packed:1; }; - /* Members for pointer types */ + /* Members for pointer and buffer types */ struct { /* For regular pointers, the vtn_type of the object pointed to; * for untyped pointers it must be NULL. diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index 3e5d0ca2f00..dc962438724 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -260,6 +260,7 @@ vtn_variable_resource_index(struct vtn_builder *b, struct vtn_variable *var, nir_intrinsic_set_desc_set(instr, var->descriptor_set); nir_intrinsic_set_binding(instr, var->binding); nir_intrinsic_set_desc_type(instr, vk_desc_type_for_mode(b, var->mode)); + nir_intrinsic_set_resource_type(instr, var->var->data.resource_type); nir_address_format addr_format = vtn_mode_to_address_format(b, var->mode); nir_def_init(&instr->instr, &instr->def, @@ -1339,6 +1340,15 @@ vtn_get_builtin_location(struct vtn_builder *b, set_mode_system_value(b, mode); break; + case SpvBuiltInSamplerHeapEXT: + vtn_assert(*mode == nir_var_uniform); + *location = SYSTEM_VALUE_SAMPLER_HEAP_PTR; + break; + case SpvBuiltInResourceHeapEXT: + vtn_assert(*mode == nir_var_uniform); + *location = SYSTEM_VALUE_RESOURCE_HEAP_PTR; + break; + default: vtn_fail("Unsupported builtin: %s (%u)", spirv_builtin_to_string(builtin), builtin); @@ -1459,6 +1469,7 @@ apply_var_decoration(struct vtn_builder *b, case SpvDecorationArrayStride: case SpvDecorationGLSLShared: case SpvDecorationGLSLPacked: + case SpvDecorationArrayStrideIdEXT: break; /* These can apply to a type but we don't care about them */ case SpvDecorationBinding: @@ -1786,6 +1797,13 @@ vtn_storage_class_to_mode(struct vtn_builder *b, nir_mode = nir_var_mem_global; break; case SpvStorageClassUniformConstant: + /* This can happen with descriptor heaps and it's UBO */ + if (interface_type == NULL) { + mode = vtn_variable_mode_uniform; + nir_mode = nir_var_uniform; + break; + } + /* interface_type is only NULL when OpTypeForwardPointer is used and * OpTypeForwardPointer can only be used for struct types, not images or * acceleration structures. @@ -1793,8 +1811,7 @@ vtn_storage_class_to_mode(struct vtn_builder *b, if (interface_type) interface_type = vtn_type_without_array(interface_type); - if (interface_type && - interface_type->base_type == vtn_base_type_image && + if (interface_type->base_type == vtn_base_type_image && glsl_type_is_image(interface_type->glsl_image)) { mode = vtn_variable_mode_image; nir_mode = nir_var_image; @@ -2452,6 +2469,43 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val, /* Propagate access flags from the OpVariable decorations. */ val->pointer->access |= var->access; + switch (without_array->base_type) { + case vtn_base_type_image: + if (glsl_type_is_image(without_array->glsl_image)) { + if (var->access & ACCESS_NON_WRITEABLE) + var->var->data.resource_type = VK_SPIRV_RESOURCE_TYPE_READ_ONLY_IMAGE_BIT_EXT; + else + var->var->data.resource_type = VK_SPIRV_RESOURCE_TYPE_READ_WRITE_IMAGE_BIT_EXT; + } else { + var->var->data.resource_type = VK_SPIRV_RESOURCE_TYPE_SAMPLED_IMAGE_BIT_EXT; + } + break; + case vtn_base_type_sampler: + var->var->data.resource_type = VK_SPIRV_RESOURCE_TYPE_SAMPLER_BIT_EXT; + break; + case vtn_base_type_sampled_image: + var->var->data.resource_type = VK_SPIRV_RESOURCE_TYPE_COMBINED_SAMPLED_IMAGE_BIT_EXT; + break; + case vtn_base_type_accel_struct: + var->var->data.resource_type = VK_SPIRV_RESOURCE_TYPE_ACCELERATION_STRUCTURE_BIT_EXT; + break; + default: + switch (var->mode) { + case vtn_variable_mode_ubo: + var->var->data.resource_type = VK_SPIRV_RESOURCE_TYPE_UNIFORM_BUFFER_BIT_EXT; + break; + case vtn_variable_mode_ssbo: + if (var->access & ACCESS_NON_WRITEABLE) + var->var->data.resource_type = VK_SPIRV_RESOURCE_TYPE_READ_ONLY_STORAGE_BUFFER_BIT_EXT; + else + var->var->data.resource_type = VK_SPIRV_RESOURCE_TYPE_READ_WRITE_STORAGE_BUFFER_BIT_EXT; + break; + default: + break; + } + break; + } + if ((var->mode == vtn_variable_mode_input || var->mode == vtn_variable_mode_output) && var->var->members) { @@ -2693,6 +2747,38 @@ vtn_cast_pointer(struct vtn_builder *b, struct vtn_pointer *p, return casted; } +static void +buffer_ptr_decoration_cb(struct vtn_builder *b, struct vtn_value *val, + int member, const struct vtn_decoration *dec, + void *void_access) +{ + enum gl_access_qualifier *access = void_access; + vtn_assert(member == -1); + + switch (dec->decoration) { + case SpvDecorationNonReadable: + *access |= ACCESS_NON_READABLE; + break; + case SpvDecorationNonWritable: + *access |= ACCESS_NON_WRITEABLE; + break; + case SpvDecorationRestrict: + *access |= ACCESS_RESTRICT; + break; + case SpvDecorationAliased: + *access &= ~ACCESS_RESTRICT; + break; + case SpvDecorationVolatile: + *access |= ACCESS_VOLATILE; + break; + case SpvDecorationCoherent: + *access |= ACCESS_COHERENT; + break; + default: + vtn_fail_with_decoration("Unhandled decoration", dec->decoration); + } +} + void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -2710,7 +2796,13 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, const bool untyped = opcode == SpvOpUntypedVariableKHR; struct vtn_type *ptr_type = vtn_get_type(b, w[1]); - struct vtn_type *data_type = untyped ? vtn_get_type(b, w[4]) : ptr_type->pointed; + struct vtn_type *data_type = + untyped && count > 4 ? vtn_get_type(b, w[4]) : ptr_type->pointed; + if (data_type == NULL) { + data_type = vtn_zalloc(b, struct vtn_type); + data_type->base_type = vtn_base_type_void; + data_type->type = glsl_void_type(); + } SpvStorageClass storage_class = w[3]; @@ -2946,6 +3038,62 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, break; } + case SpvOpBufferPointerEXT: { + struct vtn_type *res_type = vtn_get_type(b, w[1]); + struct vtn_value *val = vtn_untyped_value(b, w[1]); + struct vtn_value *src_val = vtn_value(b, w[3], vtn_value_type_pointer); + struct vtn_pointer *src = vtn_value_to_pointer(b, src_val); + + vtn_fail_if(res_type->base_type != vtn_base_type_pointer, + "Result Type must be a pointer type"); + + enum gl_access_qualifier access = 0; + vtn_foreach_decoration(b, val, buffer_ptr_decoration_cb, &access); + + VkSpirvResourceTypeFlagBitsEXT resource_type; + switch (res_type->storage_class) { + case SpvStorageClassUniform: + case SpvStorageClassUniformConstant: + resource_type = VK_SPIRV_RESOURCE_TYPE_UNIFORM_BUFFER_BIT_EXT; + break; + case SpvStorageClassStorageBuffer: + if (access & ACCESS_NON_WRITEABLE) + resource_type = VK_SPIRV_RESOURCE_TYPE_READ_ONLY_STORAGE_BUFFER_BIT_EXT; + else + resource_type = VK_SPIRV_RESOURCE_TYPE_READ_WRITE_STORAGE_BUFFER_BIT_EXT; + break; + default: + vtn_fail("Result Type must be a pointer type with a Storage Class " + "of Uniform or StorageBuffer."); + } + + const nir_address_format addr_format = vtn_mode_to_address_format(b, + vtn_storage_class_to_mode(b, res_type->storage_class, NULL, NULL)); + + unsigned num_components = nir_address_format_num_components(addr_format); + unsigned bit_size = nir_address_format_bit_size(addr_format); + + struct vtn_type *buffer_type = vtn_zalloc(b, struct vtn_type); + buffer_type->base_type = vtn_base_type_buffer; + buffer_type->storage_class = res_type->storage_class; + buffer_type->type = nir_address_format_to_glsl_type(addr_format); + + /* buffer is always an untyped pointer */ + src = vtn_cast_pointer(b, src, buffer_type); + + /* We know the alignment from the API */ + src = vtn_align_pointer(b, src, b->options->buffer_descriptor_alignment); + + nir_deref_instr *src_deref = vtn_pointer_to_deref(b, src); + nir_def *ptr = nir_load_buffer_ptr_deref(&b->nb, num_components, bit_size, + &src_deref->def, + .access = access, + .resource_type = resource_type); + + vtn_push_pointer(b, w[2], vtn_pointer_from_ssa(b, ptr, res_type)); + break; + } + case SpvOpStore: { struct vtn_value *dest_val = vtn_pointer_value(b, w[1]); struct vtn_pointer *dest = vtn_value_to_pointer(b, dest_val); diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 9bff9092562..1e06f8ef910 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -1389,7 +1389,7 @@ tu_get_properties(struct tu_physical_device *pdevice, props->samplerCaptureReplayDescriptorDataSize = 0; props->accelerationStructureCaptureReplayDescriptorDataSize = 0; /* Note: these sizes must match descriptor_size() */ - props->samplerDescriptorSize = FDL6_TEX_CONST_DWORDS * 4; + props->EDBsamplerDescriptorSize = FDL6_TEX_CONST_DWORDS * 4; props->combinedImageSamplerDescriptorSize = 2 * FDL6_TEX_CONST_DWORDS * 4; props->sampledImageDescriptorSize = FDL6_TEX_CONST_DWORDS * 4; props->storageImageDescriptorSize = FDL6_TEX_CONST_DWORDS * 4; @@ -1535,7 +1535,7 @@ tu_get_properties(struct tu_physical_device *pdevice, props->conservativeRasterizationPostDepthCoverage = false; /* VK_EXT_fragment_density_map_offset */ - props->fragmentDensityOffsetGranularity = (VkExtent2D) { + props->fragmentDensityOffsetGranularity = (VkExtent2D) { TU_FDM_OFFSET_GRANULARITY, TU_FDM_OFFSET_GRANULARITY }; diff --git a/src/gallium/frontends/lavapipe/lvp_device.c b/src/gallium/frontends/lavapipe/lvp_device.c index 04c96979922..e0dde31dc3e 100644 --- a/src/gallium/frontends/lavapipe/lvp_device.c +++ b/src/gallium/frontends/lavapipe/lvp_device.c @@ -1230,7 +1230,7 @@ lvp_get_properties(const struct lvp_physical_device *device, struct vk_propertie .imageViewCaptureReplayDescriptorDataSize = 0, .samplerCaptureReplayDescriptorDataSize = 0, .accelerationStructureCaptureReplayDescriptorDataSize = 0, - .samplerDescriptorSize = sizeof(struct lp_descriptor), + .EDBsamplerDescriptorSize = sizeof(struct lp_descriptor), .combinedImageSamplerDescriptorSize = sizeof(struct lp_descriptor), .sampledImageDescriptorSize = sizeof(struct lp_descriptor), .storageImageDescriptorSize = sizeof(struct lp_descriptor), diff --git a/src/intel/vulkan/anv_physical_device.c b/src/intel/vulkan/anv_physical_device.c index 7d838fba1b3..709c84b1d58 100644 --- a/src/intel/vulkan/anv_physical_device.c +++ b/src/intel/vulkan/anv_physical_device.c @@ -1714,7 +1714,7 @@ get_properties(const struct anv_physical_device *pdevice, */ props->accelerationStructureCaptureReplayDescriptorDataSize = 0; - props->samplerDescriptorSize = ANV_SAMPLER_STATE_SIZE; + props->EDBsamplerDescriptorSize = ANV_SAMPLER_STATE_SIZE; props->combinedImageSamplerDescriptorSize = align(ANV_SURFACE_STATE_SIZE + ANV_SAMPLER_STATE_SIZE, ANV_SURFACE_STATE_SIZE); props->sampledImageDescriptorSize = ANV_SURFACE_STATE_SIZE; diff --git a/src/nouveau/vulkan/nvk_physical_device.c b/src/nouveau/vulkan/nvk_physical_device.c index 2ee4f194b27..539cd3bccba 100644 --- a/src/nouveau/vulkan/nvk_physical_device.c +++ b/src/nouveau/vulkan/nvk_physical_device.c @@ -1102,7 +1102,7 @@ nvk_get_device_properties(const struct nvk_instance *instance, .samplerCaptureReplayDescriptorDataSize = sizeof(struct nvk_sampler_capture), .accelerationStructureCaptureReplayDescriptorDataSize = 0, // todo - .samplerDescriptorSize = sizeof(struct nvk_sampled_image_descriptor), + .EDBsamplerDescriptorSize = sizeof(struct nvk_sampled_image_descriptor), .combinedImageSamplerDescriptorSize = sizeof(struct nvk_sampled_image_descriptor), .sampledImageDescriptorSize = sizeof(struct nvk_sampled_image_descriptor), .storageImageDescriptorSize = sizeof(struct nvk_storage_image_descriptor), diff --git a/src/vulkan/runtime/meson.build b/src/vulkan/runtime/meson.build index 7722b77b420..1f2e9426f57 100644 --- a/src/vulkan/runtime/meson.build +++ b/src/vulkan/runtime/meson.build @@ -262,6 +262,7 @@ vulkan_runtime_files = files( 'vk_meta_draw_rects.c', 'vk_nir.c', 'vk_nir_convert_ycbcr.c', + 'vk_nir_lower_descriptor_heaps.c', 'vk_pipeline.c', 'vk_pipeline_cache.c', 'vk_shader.c', diff --git a/src/vulkan/runtime/vk_buffer.h b/src/vulkan/runtime/vk_buffer.h index d9b7330da85..73abba977b5 100644 --- a/src/vulkan/runtime/vk_buffer.h +++ b/src/vulkan/runtime/vk_buffer.h @@ -86,6 +86,23 @@ vk_buffer_range(const struct vk_buffer *buffer, } } +static inline VkDeviceAddressRangeEXT +vk_buffer_address_range(const struct vk_buffer *buffer, + VkDeviceSize offset, VkDeviceSize range) +{ + /* Since we're returning a size along with the address, it's safe for this + * helper to automatically handle null descriptor cases by returning a zero + * address and size. + */ + if (buffer == NULL || range == 0) + return (VkDeviceAddressRangeEXT) { .size = 0 }; + + return (VkDeviceAddressRangeEXT) { + .address = vk_buffer_address(buffer, offset), + .size = vk_buffer_range(buffer, offset, range), + }; +} + #ifdef __cplusplus } #endif diff --git a/src/vulkan/runtime/vk_device.c b/src/vulkan/runtime/vk_device.c index a2ffe734ff9..09ee230b5a8 100644 --- a/src/vulkan/runtime/vk_device.c +++ b/src/vulkan/runtime/vk_device.c @@ -636,6 +636,23 @@ vk_common_DeviceWaitIdle(VkDevice _device) return VK_SUCCESS; } +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_RegisterCustomBorderColorEXT(VkDevice device, + const VkSamplerCustomBorderColorCreateInfoEXT* pBorderColor, + VkBool32 requestIndex, + uint32_t *pIndex) +{ + if (requestIndex) + *pIndex = 0; + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_UnregisterCustomBorderColorEXT(VkDevice device, + uint32_t index) +{ } + VkResult vk_device_copy_semaphore_payloads(struct vk_device *device, uint32_t wait_semaphore_count, diff --git a/src/vulkan/runtime/vk_limits.h b/src/vulkan/runtime/vk_limits.h index 1cb02eabfe8..6950766214a 100644 --- a/src/vulkan/runtime/vk_limits.h +++ b/src/vulkan/runtime/vk_limits.h @@ -96,4 +96,6 @@ */ #define MESA_VK_MAX_MULTIVIEW_VIEW_COUNT 32 +#define MESA_VK_MAX_CUSTOM_BORDER_COLOR ~0 + #endif /* VK_LIMITS_H */ diff --git a/src/vulkan/runtime/vk_nir.c b/src/vulkan/runtime/vk_nir.c index 391b07616e7..457a084e735 100644 --- a/src/vulkan/runtime/vk_nir.c +++ b/src/vulkan/runtime/vk_nir.c @@ -136,6 +136,19 @@ vk_spirv_to_nir(struct vk_device *device, spirv_options_local.debug.func = spirv_nir_debug; spirv_options_local.debug.private_data = (void *)device; + spirv_options_local.sampler_descriptor_size = + device->physical->properties.samplerDescriptorSize; + spirv_options_local.sampler_descriptor_alignment = + device->physical->properties.samplerDescriptorAlignment; + spirv_options_local.image_descriptor_size = + device->physical->properties.imageDescriptorSize; + spirv_options_local.image_descriptor_alignment = + device->physical->properties.imageDescriptorAlignment; + spirv_options_local.buffer_descriptor_size = + device->physical->properties.bufferDescriptorSize; + spirv_options_local.buffer_descriptor_alignment = + device->physical->properties.bufferDescriptorAlignment; + uint32_t num_spec_entries = 0; struct nir_spirv_specialization *spec_entries = vk_spec_info_to_nir_spirv(spec_info, &num_spec_entries); diff --git a/src/vulkan/runtime/vk_nir_convert_ycbcr.c b/src/vulkan/runtime/vk_nir_convert_ycbcr.c index 2f25ed6103b..e688fa8da42 100644 --- a/src/vulkan/runtime/vk_nir_convert_ycbcr.c +++ b/src/vulkan/runtime/vk_nir_convert_ycbcr.c @@ -151,19 +151,33 @@ struct ycbcr_state { nir_builder *builder; nir_def *image_size; nir_tex_instr *origin_tex; - nir_deref_instr *tex_deref; + nir_tex_src tex_handle; const struct vk_ycbcr_conversion_state *conversion; const struct vk_format_ycbcr_info *format_ycbcr_info; }; /* TODO: we should probably replace this with a push constant/uniform. */ static nir_def * -get_texture_size(struct ycbcr_state *state, nir_deref_instr *texture) +get_texture_size(struct ycbcr_state *state) { - if (!state->image_size) { - nir_builder *b = state->builder; - state->image_size = nir_i2f32(b, nir_txs(b, .texture_deref = texture)); - } + if (state->image_size) + return state->image_size; + + nir_builder *b = state->builder; + nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1); + + tex->op = nir_texop_txs; + tex->sampler_dim = state->origin_tex->sampler_dim, + tex->is_array = state->origin_tex->is_array, + tex->is_shadow = state->origin_tex->is_shadow, + tex->dest_type = nir_type_int32; + + tex->src[0] = state->tex_handle; + + nir_def_init(&tex->instr, &tex->def, nir_tex_instr_dest_size(tex), 32); + nir_builder_instr_insert(b, &tex->instr); + + state->image_size = nir_i2f32(b, &tex->def); return state->image_size; } @@ -185,7 +199,7 @@ implicit_downsampled_coords(struct ycbcr_state *state, { nir_builder *b = state->builder; const struct vk_ycbcr_conversion_state *conversion = state->conversion; - nir_def *image_size = get_texture_size(state, state->tex_deref); + nir_def *image_size = get_texture_size(state); nir_def *comp[4] = { NULL, }; int c; @@ -247,6 +261,7 @@ create_plane_tex_instr_implicit(struct ycbcr_state *state, tex->is_new_style_shadow = old_tex->is_new_style_shadow; tex->component = old_tex->component; + tex->embedded_sampler = old_tex->embedded_sampler; tex->texture_index = old_tex->texture_index; tex->sampler_index = old_tex->sampler_index; tex->is_array = old_tex->is_array; @@ -294,25 +309,40 @@ lower_ycbcr_tex_instr(nir_builder *b, nir_tex_instr *tex, void *_state) tex->op == nir_texop_lod) return false; - int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); - assert(deref_src_idx >= 0); - nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src); + nir_tex_src tex_handle; + const struct vk_ycbcr_conversion_state *conversion; + if (tex->embedded_sampler) { + const int heap_src_idx = + nir_tex_instr_src_index(tex, nir_tex_src_texture_heap_offset); + tex_handle = tex->src[heap_src_idx]; - nir_variable *var = nir_deref_instr_get_variable(deref); - uint32_t set = var->data.descriptor_set; - uint32_t binding = var->data.binding; - - assert(tex->texture_index == 0); - unsigned array_index = 0; - if (deref->deref_type != nir_deref_type_var) { - assert(deref->deref_type == nir_deref_type_array); - if (!nir_src_is_const(deref->arr.index)) + conversion = state->cb(state->cb_data, + VK_NIR_YCBCR_SET_IMMUTABLE_SAMPLERS, + tex->sampler_index, 0); + } else { + const int deref_src_idx = + nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); + if (deref_src_idx < 0) return false; - array_index = nir_src_as_uint(deref->arr.index); - } - const struct vk_ycbcr_conversion_state *conversion = - state->cb(state->cb_data, set, binding, array_index); + tex_handle = tex->src[deref_src_idx]; + nir_deref_instr *deref = nir_src_as_deref(tex_handle.src); + + nir_variable *var = nir_deref_instr_get_variable(deref); + uint32_t set = var->data.descriptor_set; + uint32_t binding = var->data.binding; + + assert(tex->texture_index == 0); + unsigned array_index = 0; + if (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + if (!nir_src_is_const(deref->arr.index)) + return false; + array_index = nir_src_as_uint(deref->arr.index); + } + + conversion = state->cb(state->cb_data, set, binding, array_index); + } if (conversion == NULL) return false; @@ -357,7 +387,7 @@ lower_ycbcr_tex_instr(nir_builder *b, nir_tex_instr *tex, void *_state) struct ycbcr_state tex_state = { .builder = b, .origin_tex = tex, - .tex_deref = deref, + .tex_handle = tex_handle, .conversion = conversion, .format_ycbcr_info = format_ycbcr_info, }; diff --git a/src/vulkan/runtime/vk_nir_convert_ycbcr.h b/src/vulkan/runtime/vk_nir_convert_ycbcr.h index 0ff1c1b3e01..915c9cbd465 100644 --- a/src/vulkan/runtime/vk_nir_convert_ycbcr.h +++ b/src/vulkan/runtime/vk_nir_convert_ycbcr.h @@ -40,6 +40,12 @@ nir_convert_ycbcr_to_rgb(nir_builder *b, struct vk_ycbcr_conversion; +/** Passed as the set parameter to nir_vk_ycbcr_conversion_lookup_cb() to + * indicate that embedded samplers are being used and that binding is the + * index in the embedded sampler table. + */ +#define VK_NIR_YCBCR_SET_IMMUTABLE_SAMPLERS UINT32_MAX + typedef const struct vk_ycbcr_conversion_state * (*nir_vk_ycbcr_conversion_lookup_cb)(const void *data, uint32_t set, uint32_t binding, uint32_t array_index); diff --git a/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c b/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c new file mode 100644 index 00000000000..479cbcca1e1 --- /dev/null +++ b/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.c @@ -0,0 +1,1106 @@ +/* + * Copyright © 2024 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#include "vk_nir_lower_descriptor_heaps.h" + +#include "vk_sampler.h" + +#include "nir_builder.h" +#include "util/u_dynarray.h" +#include "util/hash_table.h" + +static void +hash_embedded_sampler(struct mesa_blake3 *ctx, + const struct VkSamplerCreateInfo *info) +{ + if (info != NULL) { + struct vk_sampler_state state; + vk_sampler_state_init(&state, info); + _mesa_blake3_update(ctx, &state, sizeof(state)); + } +} + +void +vk_hash_descriptor_heap_mappings( + const VkShaderDescriptorSetAndBindingMappingInfoEXT *info, + blake3_hash blake3_out) +{ + struct mesa_blake3 ctx; + _mesa_blake3_init(&ctx); + +#define HASH(ctx, x) _mesa_blake3_update(ctx, &(x), sizeof(x)) + + for (uint32_t i = 0; i < info->mappingCount; i++) { + const VkDescriptorSetAndBindingMappingEXT *mapping = &info->pMappings[i]; + HASH(&ctx, mapping->descriptorSet); + HASH(&ctx, mapping->firstBinding); + HASH(&ctx, mapping->bindingCount); + HASH(&ctx, mapping->resourceMask); + HASH(&ctx, mapping->source); + switch (mapping->source) { + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_CONSTANT_OFFSET_EXT: { + const VkDescriptorMappingSourceConstantOffsetEXT *data = + &mapping->sourceData.constantOffset; + HASH(&ctx, data->heapOffset); + HASH(&ctx, data->heapArrayStride); + hash_embedded_sampler(&ctx, data->pEmbeddedSampler); + HASH(&ctx, data->samplerHeapOffset); + HASH(&ctx, data->samplerHeapArrayStride); + break; + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_PUSH_INDEX_EXT: { + const VkDescriptorMappingSourcePushIndexEXT *data = + &mapping->sourceData.pushIndex; + HASH(&ctx, data->heapOffset); + HASH(&ctx, data->pushOffset); + HASH(&ctx, data->heapIndexStride); + HASH(&ctx, data->heapArrayStride); + hash_embedded_sampler(&ctx, data->pEmbeddedSampler); + HASH(&ctx, data->useCombinedImageSamplerIndex); + HASH(&ctx, data->samplerHeapOffset); + HASH(&ctx, data->samplerPushOffset); + HASH(&ctx, data->samplerHeapIndexStride); + HASH(&ctx, data->samplerHeapArrayStride); + break; + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_INDIRECT_INDEX_EXT: { + const VkDescriptorMappingSourceIndirectIndexEXT *data = + &mapping->sourceData.indirectIndex; + HASH(&ctx, data->heapOffset); + HASH(&ctx, data->pushOffset); + HASH(&ctx, data->addressOffset); + HASH(&ctx, data->heapIndexStride); + HASH(&ctx, data->heapArrayStride); + hash_embedded_sampler(&ctx, data->pEmbeddedSampler); + HASH(&ctx, data->useCombinedImageSamplerIndex); + HASH(&ctx, data->samplerHeapOffset); + HASH(&ctx, data->samplerPushOffset); + HASH(&ctx, data->samplerAddressOffset); + HASH(&ctx, data->samplerHeapIndexStride); + HASH(&ctx, data->samplerHeapArrayStride); + break; + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_RESOURCE_HEAP_DATA_EXT: { + const VkDescriptorMappingSourceHeapDataEXT *data = + &mapping->sourceData.heapData; + HASH(&ctx, data->heapOffset); + HASH(&ctx, data->pushOffset); + break; + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_DATA_EXT: + HASH(&ctx, mapping->sourceData.pushDataOffset); + break; + + case VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_ADDRESS_EXT: + HASH(&ctx, mapping->sourceData.pushAddressOffset); + break; + + case VK_DESCRIPTOR_MAPPING_SOURCE_INDIRECT_ADDRESS_EXT: { + const VkDescriptorMappingSourceIndirectAddressEXT *data = + &mapping->sourceData.indirectAddress; + HASH(&ctx, data->pushOffset); + HASH(&ctx, data->addressOffset); + break; + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_INDIRECT_INDEX_ARRAY_EXT: { + const VkDescriptorMappingSourceIndirectIndexArrayEXT *data = + &mapping->sourceData.indirectIndexArray; + HASH(&ctx, data->heapOffset); + HASH(&ctx, data->pushOffset); + HASH(&ctx, data->addressOffset); + HASH(&ctx, data->heapIndexStride); + hash_embedded_sampler(&ctx, data->pEmbeddedSampler); + HASH(&ctx, data->useCombinedImageSamplerIndex); + HASH(&ctx, data->samplerHeapOffset); + HASH(&ctx, data->samplerPushOffset); + HASH(&ctx, data->samplerAddressOffset); + HASH(&ctx, data->samplerHeapIndexStride); + break; + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_SHADER_RECORD_INDEX_EXT: { + const VkDescriptorMappingSourceShaderRecordIndexEXT *data = + &mapping->sourceData.shaderRecordIndex; + HASH(&ctx, data->heapOffset); + HASH(&ctx, data->shaderRecordOffset); + HASH(&ctx, data->heapIndexStride); + HASH(&ctx, data->heapArrayStride); + hash_embedded_sampler(&ctx, data->pEmbeddedSampler); + HASH(&ctx, data->useCombinedImageSamplerIndex); + HASH(&ctx, data->samplerHeapOffset); + HASH(&ctx, data->samplerShaderRecordOffset); + HASH(&ctx, data->samplerHeapIndexStride); + HASH(&ctx, data->samplerHeapArrayStride); + break; + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_SHADER_RECORD_DATA_EXT: + HASH(&ctx, mapping->sourceData.shaderRecordDataOffset); + break; + + case VK_DESCRIPTOR_MAPPING_SOURCE_SHADER_RECORD_ADDRESS_EXT: + HASH(&ctx, mapping->sourceData.shaderRecordAddressOffset); + break; + + default: + UNREACHABLE("Unsupported descriptor mapping source"); + } + } + + _mesa_blake3_final(&ctx, blake3_out); +} + +#undef HASH + +struct heap_mapping_ctx { + const VkShaderDescriptorSetAndBindingMappingInfoEXT *info; + + /* Map from vk_sampler_state to indices */ + struct hash_table *sampler_idx_map; +}; + +static uint32_t +hash_sampler(const void *_s) +{ + const struct vk_sampler_state *s = _s; + return _mesa_hash_data(s, sizeof(*s)); +} + +static bool +samplers_equal(const void *_a, const void *_b) +{ + const struct vk_sampler_state *a = _a, *b = _b; + return !memcmp(a, b, sizeof(*a)); +} + +static uint32_t +add_embedded_sampler(struct heap_mapping_ctx *ctx, + const VkSamplerCreateInfo *info) +{ + struct vk_sampler_state key; + vk_sampler_state_init(&key, info); + + struct hash_entry *entry = + _mesa_hash_table_search(ctx->sampler_idx_map, &key); + if (entry != NULL) + return (uintptr_t)entry->data; + + uint32_t index = ctx->sampler_idx_map->entries; + + struct vk_sampler_state *state = + ralloc(ctx->sampler_idx_map, struct vk_sampler_state); + *state = key; + + _mesa_hash_table_insert(ctx->sampler_idx_map, state, + (void *)(uintptr_t)index); + + return index; +} + +static nir_def * +load_push(nir_builder *b, unsigned bit_size, unsigned offset) +{ + assert(bit_size % 8 == 0); + assert(offset % (bit_size / 8) == 0); + return nir_load_push_constant(b, 1, bit_size, nir_imm_int(b, offset), + .range = offset + (bit_size / 8)); +} + +static nir_def * +load_indirect(nir_builder *b, unsigned bit_size, nir_def *addr, unsigned offset) +{ + assert(bit_size % 8 == 0); + assert(offset % (bit_size / 8) == 0); + addr = nir_iadd_imm(b, addr, offset); + return nir_load_global_constant(b, 1, bit_size, addr); +} + +static nir_def * +load_shader_record(nir_builder *b, unsigned bit_size, unsigned offset) +{ + assert(bit_size % 8 == 0); + assert(offset % (bit_size / 8) == 0); + nir_def *addr = nir_iadd_imm(b, nir_load_shader_record_ptr(b), offset); + return nir_load_global_constant(b, 1, bit_size, addr); +} + +static nir_def * +unpack_combined_image_sampler(nir_builder *b, nir_def *combined, + bool is_sampler) +{ + assert(combined->bit_size == 32); + if (is_sampler) + return nir_ubitfield_extract_imm(b, combined, 20, 12); + else + return nir_ubitfield_extract_imm(b, combined, 0, 20); +} + +nir_def * +vk_build_descriptor_heap_offset(nir_builder *b, + const VkDescriptorSetAndBindingMappingEXT *mapping, + VkSpirvResourceTypeFlagBitsEXT resource_type, + uint32_t binding, nir_def *index, + bool is_sampler) +{ + assert(util_is_power_of_two_nonzero(resource_type)); + + if (index == NULL) + index = nir_imm_int(b, 0); + + assert(binding >= mapping->firstBinding); + const uint32_t rel_binding = binding - mapping->firstBinding; + assert(rel_binding < mapping->bindingCount); + nir_def *shader_index = nir_iadd_imm(b, index, rel_binding); + + const bool is_sampled_image = + resource_type == VK_SPIRV_RESOURCE_TYPE_COMBINED_SAMPLED_IMAGE_BIT_EXT; + + switch (mapping->source) { + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_CONSTANT_OFFSET_EXT: { + const VkDescriptorMappingSourceConstantOffsetEXT *data = + &mapping->sourceData.constantOffset; + + uint32_t heap_offset; + uint32_t array_stride; + if (is_sampled_image && is_sampler) { + array_stride = data->samplerHeapArrayStride; + heap_offset = data->samplerHeapOffset; + } else { + array_stride = data->heapArrayStride; + heap_offset = data->heapOffset; + } + + return nir_iadd_imm(b, nir_imul_imm(b, shader_index, array_stride), + heap_offset); + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_PUSH_INDEX_EXT: { + const VkDescriptorMappingSourcePushIndexEXT *data = + &mapping->sourceData.pushIndex; + + nir_def *push_index; + if (is_sampled_image && is_sampler && + !data->useCombinedImageSamplerIndex) { + push_index = load_push(b, 32, data->samplerPushOffset); + } else { + push_index = load_push(b, 32, data->pushOffset); + } + + if (data->useCombinedImageSamplerIndex && is_sampled_image) + push_index = unpack_combined_image_sampler(b, push_index, is_sampler); + + nir_def *offset; + uint32_t array_stride; + if (is_sampled_image && is_sampler) { + array_stride = data->samplerHeapArrayStride; + nir_def *push_offset = + nir_imul_imm(b, push_index, data->samplerHeapIndexStride); + offset = nir_iadd_imm(b, push_offset, data->samplerHeapOffset); + } else { + array_stride = data->heapArrayStride; + nir_def *push_offset = + nir_imul_imm(b, push_index, data->heapIndexStride); + offset = nir_iadd_imm(b, push_offset, data->heapOffset); + } + + return nir_iadd(b, offset, nir_imul_imm(b, shader_index, array_stride)); + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_INDIRECT_INDEX_EXT: { + const VkDescriptorMappingSourceIndirectIndexEXT *data = + &mapping->sourceData.indirectIndex; + + nir_def *indirect_index; + if (is_sampled_image && is_sampler && + !data->useCombinedImageSamplerIndex) { + nir_def *indirect_addr = load_push(b, 64, data->samplerPushOffset); + indirect_index = load_indirect(b, 32, indirect_addr, + data->samplerAddressOffset); + } else { + nir_def *indirect_addr = load_push(b, 64, data->pushOffset); + indirect_index = load_indirect(b, 32, indirect_addr, + data->addressOffset); + } + + if (data->useCombinedImageSamplerIndex && is_sampled_image) + indirect_index = unpack_combined_image_sampler(b, indirect_index, + is_sampler); + + nir_def *offset; + uint32_t array_stride; + if (is_sampled_image && is_sampler) { + array_stride = data->samplerHeapArrayStride; + nir_def *indirect_offset = + nir_imul_imm(b, indirect_index, data->samplerHeapIndexStride); + offset = nir_iadd_imm(b, indirect_offset, data->samplerHeapOffset); + } else { + array_stride = data->heapArrayStride; + nir_def *indirect_offset = + nir_imul_imm(b, indirect_index, data->heapIndexStride); + offset = nir_iadd_imm(b, indirect_offset, data->heapOffset); + } + + return nir_iadd(b, offset, nir_imul_imm(b, shader_index, array_stride)); + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_RESOURCE_HEAP_DATA_EXT: { + const VkDescriptorMappingSourceHeapDataEXT *data = + &mapping->sourceData.heapData; + return nir_iadd_imm(b, load_push(b, 32, data->pushOffset), + data->heapOffset); + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_INDIRECT_INDEX_ARRAY_EXT: { + const VkDescriptorMappingSourceIndirectIndexArrayEXT *data = + &mapping->sourceData.indirectIndexArray; + + nir_def *indirect_addr; + uint32_t addr_offset; + if (is_sampled_image && is_sampler && + !data->useCombinedImageSamplerIndex) { + indirect_addr = load_push(b, 64, data->samplerPushOffset); + addr_offset = data->samplerAddressOffset; + } else { + indirect_addr = load_push(b, 64, data->pushOffset); + addr_offset = data->addressOffset; + } + + /* The shader index goes into the indirect. */ + indirect_addr = nir_iadd(b, indirect_addr, + nir_u2u64(b, nir_imul_imm(b, shader_index, 4))); + nir_def *indirect_index = load_indirect(b, 32, indirect_addr, + addr_offset); + + if (data->useCombinedImageSamplerIndex && is_sampled_image) + indirect_index = unpack_combined_image_sampler(b, indirect_index, + is_sampler); + + if (is_sampled_image && is_sampler) { + nir_def *indirect_offset = + nir_imul_imm(b, indirect_index, data->samplerHeapIndexStride); + return nir_iadd_imm(b, indirect_offset, data->samplerHeapOffset); + } else { + nir_def *indirect_offset = + nir_imul_imm(b, indirect_index, data->heapIndexStride); + return nir_iadd_imm(b, indirect_offset, data->heapOffset); + } + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_SHADER_RECORD_INDEX_EXT: { + const VkDescriptorMappingSourceShaderRecordIndexEXT *data = + &mapping->sourceData.shaderRecordIndex; + + nir_def *record_index; + if (is_sampled_image && is_sampler && + !data->useCombinedImageSamplerIndex) { + record_index = load_shader_record(b, 32, data->samplerShaderRecordOffset); + } else { + record_index = load_shader_record(b, 32, data->shaderRecordOffset); + } + + if (data->useCombinedImageSamplerIndex && is_sampled_image) + record_index = unpack_combined_image_sampler(b, record_index, + is_sampler); + + nir_def *offset; + uint32_t array_stride; + if (is_sampled_image && is_sampler) { + array_stride = data->samplerHeapArrayStride; + nir_def *record_offset = + nir_imul_imm(b, record_index, data->samplerHeapIndexStride); + offset = nir_iadd_imm(b, record_offset, data->samplerHeapOffset); + } else { + array_stride = data->heapArrayStride; + nir_def *record_offset = + nir_imul_imm(b, record_index, data->heapIndexStride); + offset = nir_iadd_imm(b, record_offset, data->heapOffset); + } + + return nir_iadd(b, offset, nir_imul_imm(b, shader_index, array_stride)); + } + + default: + return NULL; + } +} + +nir_def * +vk_build_descriptor_heap_address(nir_builder *b, + const VkDescriptorSetAndBindingMappingEXT *mapping, + uint32_t binding, nir_def *index) +{ + switch (mapping->source) { + case VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_ADDRESS_EXT: + return load_push(b, 64, mapping->sourceData.pushAddressOffset); + + case VK_DESCRIPTOR_MAPPING_SOURCE_INDIRECT_ADDRESS_EXT: { + const VkDescriptorMappingSourceIndirectAddressEXT *data = + &mapping->sourceData.indirectAddress; + + nir_def *addr = load_push(b, 64, data->pushOffset); + return load_indirect(b, 64, addr, data->addressOffset); + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_SHADER_RECORD_DATA_EXT: + return nir_iadd_imm(b, nir_load_shader_record_ptr(b), + mapping->sourceData.shaderRecordDataOffset); + + case VK_DESCRIPTOR_MAPPING_SOURCE_SHADER_RECORD_ADDRESS_EXT: + return load_shader_record(b, 64, + mapping->sourceData.shaderRecordAddressOffset); + + default: + return NULL; + } +} + +static nir_deref_instr * +deref_get_root_image_cast(nir_deref_instr *deref) +{ + while (true) { + nir_deref_instr *parent = nir_src_as_deref(deref->parent); + if (!parent || parent->deref_type == nir_deref_type_var) + break; + + deref = parent; + } + assert(deref->deref_type == nir_deref_type_cast); + + return deref; +} + +static nir_deref_instr * +deref_get_root_cast(nir_deref_instr *deref) +{ + while (true) { + if (deref->deref_type == nir_deref_type_var) + return NULL; + + nir_deref_instr *parent = nir_src_as_deref(deref->parent); + if (!parent) + break; + + deref = parent; + } + assert(deref->deref_type == nir_deref_type_cast); + + return deref; +} + +static bool +var_is_heap_ptr(nir_variable *var) +{ + return var->data.mode == nir_var_uniform && + (var->data.location == SYSTEM_VALUE_SAMPLER_HEAP_PTR || + var->data.location == SYSTEM_VALUE_RESOURCE_HEAP_PTR); +} + +static bool +deref_cast_is_heap_ptr(nir_deref_instr *deref) +{ + assert(deref->deref_type == nir_deref_type_cast); + nir_intrinsic_instr *intrin = nir_src_as_intrinsic(deref->parent); + if (intrin == NULL) + return false; + + switch (intrin->intrinsic) { + case nir_intrinsic_load_deref: { + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + if (var == NULL || var->data.mode != nir_var_system_value) + return false; + + return var->data.location == SYSTEM_VALUE_SAMPLER_HEAP_PTR || + var->data.location == SYSTEM_VALUE_RESOURCE_HEAP_PTR; + } + + case nir_intrinsic_load_sampler_heap_ptr: + case nir_intrinsic_load_resource_heap_ptr: + return true; + + default: + return false; + } +} + +static bool +get_deref_resource_binding(nir_deref_instr *deref, + uint32_t *set, uint32_t *binding, + VkSpirvResourceTypeFlagBitsEXT *resource_type, + nir_def **index_out) +{ + nir_def *index = NULL; + if (deref->deref_type == nir_deref_type_array) { + index = deref->arr.index.ssa; + deref = nir_deref_instr_parent(deref); + } + + if (deref->deref_type != nir_deref_type_var) + return false; + + nir_variable *var = deref->var; + + if (var->data.mode != nir_var_uniform && var->data.mode != nir_var_image) + return false; + + /* This should only happen for internal meta shaders */ + if (var->data.resource_type == 0) + return false; + + *set = var->data.descriptor_set; + *binding = var->data.binding; + *resource_type = var->data.resource_type; + if (index_out != NULL) + *index_out = index; + + return true; +} + +static bool +get_buffer_resource_binding(nir_intrinsic_instr *desc_load, + uint32_t *set, uint32_t *binding, + VkSpirvResourceTypeFlagBitsEXT *resource_type) +{ + assert(desc_load->intrinsic == nir_intrinsic_load_vulkan_descriptor); + nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(desc_load->src[0]); + + while (idx_intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) + idx_intrin = nir_src_as_intrinsic(idx_intrin->src[0]); + + if (idx_intrin->intrinsic != nir_intrinsic_vulkan_resource_index) + return false; + + *set = nir_intrinsic_desc_set(idx_intrin); + *binding = nir_intrinsic_binding(idx_intrin); + *resource_type = nir_intrinsic_resource_type(idx_intrin); + + return true; +} + +static inline bool +buffer_resource_has_zero_index(nir_intrinsic_instr *desc_load) +{ + assert(desc_load->intrinsic == nir_intrinsic_load_vulkan_descriptor); + nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(desc_load->src[0]); + + if (idx_intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) + return false; + + assert(idx_intrin->intrinsic == nir_intrinsic_vulkan_resource_index); + if (!nir_src_is_const(idx_intrin->src[0])) + return false; + + return nir_src_as_uint(idx_intrin->src[0]) == 0; +} + +/* This assumes get_buffer_resource_binding() already succeeded */ +static nir_def * +build_buffer_resource_index(nir_builder *b, nir_intrinsic_instr *desc_load) +{ + assert(desc_load->intrinsic == nir_intrinsic_load_vulkan_descriptor); + nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(desc_load->src[0]); + + nir_def *index = nir_imm_int(b, 0); + while (idx_intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) { + index = nir_iadd(b, index, idx_intrin->src[1].ssa); + idx_intrin = nir_src_as_intrinsic(idx_intrin->src[0]); + } + + assert(idx_intrin->intrinsic == nir_intrinsic_vulkan_resource_index); + return nir_iadd(b, index, idx_intrin->src[0].ssa); +} + +/** Builds a buffer address for deref chain + * + * This assumes that you can chase the chain all the way back to the original + * vulkan_resource_index intrinsic. + * + * The cursor is not where you left it when this function returns. + */ +static nir_def * +build_buffer_addr_for_deref(nir_builder *b, nir_def *root_addr, + nir_deref_instr *deref, + nir_address_format addr_format) +{ + nir_deref_instr *parent = nir_deref_instr_parent(deref); + if (parent) { + nir_def *addr = + build_buffer_addr_for_deref(b, root_addr, parent, addr_format); + + b->cursor = nir_before_instr(&deref->instr); + return nir_explicit_io_address_from_deref(b, deref, addr, addr_format); + } + + return root_addr; +} + +/* The cursor is not where you left it when this function returns. */ +static nir_def * +build_deref_heap_offset(nir_builder *b, nir_deref_instr *deref, + bool is_sampler, struct heap_mapping_ctx *ctx) +{ + uint32_t set, binding; + VkSpirvResourceTypeFlagBitsEXT resource_type; + nir_def *index; + if (get_deref_resource_binding(deref, &set, &binding, + &resource_type, &index)) { + if (ctx->info == NULL) + return NULL; + + const VkDescriptorSetAndBindingMappingEXT *mapping = + vk_descriptor_heap_mapping(ctx->info, set, binding, resource_type); + assert(mapping != NULL); + if (mapping == NULL) + return NULL; + + b->cursor = nir_before_instr(&deref->instr); + + if (index == NULL) + index = nir_imm_int(b, 0); + + return vk_build_descriptor_heap_offset(b, mapping, resource_type, + binding, index, is_sampler); + } else { + nir_deref_instr *root_cast = deref_get_root_image_cast(deref); + if (root_cast == NULL) + return NULL; + + nir_variable *var = nir_deref_instr_get_variable(nir_deref_instr_parent(root_cast)); + assert(var != NULL); + if (var->data.mode != nir_var_uniform || + (var->data.location != SYSTEM_VALUE_SAMPLER_HEAP_PTR && + var->data.location != SYSTEM_VALUE_RESOURCE_HEAP_PTR)) + return NULL; + + /* We're building an offset. It starts at zero */ + b->cursor = nir_before_instr(&root_cast->instr); + nir_def *base_addr = nir_imm_int(b, 0); + + return build_buffer_addr_for_deref(b, base_addr, deref, + nir_address_format_32bit_offset); + } +} + +static const VkSamplerCreateInfo * +get_deref_embedded_sampler(nir_deref_instr *sampler, + struct heap_mapping_ctx *ctx) +{ + if (ctx->info == NULL) + return false; + + uint32_t set, binding; + VkSpirvResourceTypeFlagBitsEXT resource_type; + if (!get_deref_resource_binding(sampler, &set, &binding, + &resource_type, NULL)) + return NULL; + + const VkDescriptorSetAndBindingMappingEXT *mapping = + vk_descriptor_heap_mapping(ctx->info, set, binding, resource_type); + + return vk_descriptor_heap_embedded_sampler(mapping); +} + +static bool +lower_heaps_tex(nir_builder *b, nir_tex_instr *tex, + struct heap_mapping_ctx *ctx) +{ + const int texture_src_idx = + nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); + const int sampler_src_idx = + nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref); + bool progress = false; + + nir_deref_instr *texture = nir_src_as_deref(tex->src[texture_src_idx].src); + assert(texture != NULL); + + { + nir_def *heap_offset = build_deref_heap_offset(b, texture, false, ctx); + if (heap_offset != NULL) { + nir_src_rewrite(&tex->src[texture_src_idx].src, heap_offset); + tex->src[texture_src_idx].src_type = nir_tex_src_texture_heap_offset; + progress = true; + } + } + + if (nir_tex_instr_need_sampler(tex)) { + /* If this is a combined image/sampler, we may only have an image deref + * source and it's also the sampler deref. + */ + nir_deref_instr *sampler = sampler_src_idx < 0 ? texture : + nir_src_as_deref(tex->src[sampler_src_idx].src); + + const VkSamplerCreateInfo *embedded_sampler = + get_deref_embedded_sampler(sampler, ctx); + if (embedded_sampler == NULL) { + nir_def *heap_offset = build_deref_heap_offset(b, sampler, true, ctx); + if (heap_offset != NULL) { + nir_src_rewrite(&tex->src[sampler_src_idx].src, heap_offset); + tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_heap_offset; + progress = true; + } + } else { + nir_tex_instr_remove_src(tex, sampler_src_idx); + tex->embedded_sampler = true; + tex->sampler_index = add_embedded_sampler(ctx, embedded_sampler); + b->shader->info.uses_embedded_samplers = true; + progress = true; + } + } + + /* Remove unused sampler sources so we don't accidentally reference things + * that don't actually exist. The driver can add it back in if it really + * needs it. + */ + if (progress && sampler_src_idx >= 0 && !nir_tex_instr_need_sampler(tex)) + nir_tex_instr_remove_src(tex, sampler_src_idx); + + return progress; +} + +static bool +lower_heaps_image(nir_builder *b, nir_intrinsic_instr *intrin, + struct heap_mapping_ctx *ctx) +{ + nir_deref_instr *image = nir_src_as_deref(intrin->src[0]); + nir_def *heap_offset = build_deref_heap_offset(b, image, false, ctx); + if (heap_offset == NULL) + return false; + + nir_rewrite_image_intrinsic(intrin, heap_offset, false); + + /* TODO: Roll this into nir_rewrite_image_intrinsic? */ + switch (intrin->intrinsic) { +#define CASE(op) \ + case nir_intrinsic_image_##op: \ + intrin->intrinsic = nir_intrinsic_image_heap_##op; \ + break; + CASE(load) + CASE(sparse_load) + CASE(store) + CASE(atomic) + CASE(atomic_swap) + CASE(size) + CASE(samples) + CASE(load_raw_intel) + CASE(store_raw_intel) + CASE(fragment_mask_load_amd) + CASE(store_block_agx) +#undef CASE + default: + UNREACHABLE("Unhanded image intrinsic"); + } + + return true; +} + +static bool +try_lower_heaps_deref_access(nir_builder *b, nir_intrinsic_instr *intrin, + struct heap_mapping_ctx *ctx) +{ + if (ctx->info == NULL) + return false; + + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_deref_instr *root_cast = deref_get_root_cast(deref); + if (root_cast == NULL) + return false; + + nir_intrinsic_instr *desc_load = nir_src_as_intrinsic(root_cast->parent); + if (desc_load == NULL || + desc_load->intrinsic != nir_intrinsic_load_vulkan_descriptor) + return false; + + uint32_t set, binding; + VkSpirvResourceTypeFlagBitsEXT resource_type; + if (!get_buffer_resource_binding(desc_load, &set, &binding, &resource_type)) + return false; + + const VkDescriptorSetAndBindingMappingEXT *mapping = + vk_descriptor_heap_mapping(ctx->info, set, binding, resource_type); + if (mapping == NULL) + return false; + + switch (mapping->source) { + case VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_DATA_EXT: { + assert(nir_deref_mode_is(deref, nir_var_mem_ubo)); + assert(intrin->intrinsic == nir_intrinsic_load_deref); + assert(buffer_resource_has_zero_index(desc_load)); + + b->cursor = nir_before_instr(&desc_load->instr); + nir_def *offset = nir_imm_int(b, mapping->sourceData.pushDataOffset); + + /* This moves the cursor */ + offset = build_buffer_addr_for_deref(b, offset, deref, + nir_address_format_32bit_offset); + + const uint32_t range = mapping->sourceData.pushDataOffset + + glsl_get_explicit_size(root_cast->type, false); + + b->cursor = nir_before_instr(&intrin->instr); + nir_def *val = nir_load_push_constant(b, intrin->def.num_components, + intrin->def.bit_size, + offset, .range = range); + nir_def_replace(&intrin->def, val); + return true; + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_RESOURCE_HEAP_DATA_EXT: { + assert(nir_deref_mode_is(deref, nir_var_mem_ubo)); + assert(intrin->intrinsic == nir_intrinsic_load_deref); + assert(buffer_resource_has_zero_index(desc_load)); + + b->cursor = nir_before_instr(&desc_load->instr); + nir_def *heap_offset = + vk_build_descriptor_heap_offset(b, mapping, resource_type, binding, + NULL /* index */, + false /* is_sampler */); + + /* This moves the cursor */ + heap_offset = build_buffer_addr_for_deref(b, heap_offset, deref, + nir_address_format_32bit_offset); + + uint32_t align_mul, align_offset; + if (!nir_get_explicit_deref_align(deref, true, &align_mul, + &align_offset)) { + /* If we don't have an alignment from the deref, assume scalar */ + assert(glsl_type_is_vector_or_scalar(deref->type) || + glsl_type_is_matrix(deref->type)); + align_mul = glsl_type_is_boolean(deref->type) ? + 4 : glsl_get_bit_size(deref->type) / 8; + align_offset = 0; + } + + b->cursor = nir_before_instr(&intrin->instr); + nir_def *val = nir_load_resource_heap_data(b, intrin->def.num_components, + intrin->def.bit_size, + heap_offset, + .align_mul = align_mul, + .align_offset = align_offset); + nir_def_replace(&intrin->def, val); + return true; + } + + case VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_ADDRESS_EXT: + case VK_DESCRIPTOR_MAPPING_SOURCE_INDIRECT_ADDRESS_EXT: + case VK_DESCRIPTOR_MAPPING_SOURCE_SHADER_RECORD_DATA_EXT: + case VK_DESCRIPTOR_MAPPING_SOURCE_SHADER_RECORD_ADDRESS_EXT: { + b->cursor = nir_before_instr(&desc_load->instr); + + nir_def *index = build_buffer_resource_index(b, desc_load); + nir_def *addr = + vk_build_descriptor_heap_address(b, mapping, binding, index); + + /* This moves the cursor */ + addr = build_buffer_addr_for_deref(b, addr, deref, + nir_address_format_64bit_global); + + b->cursor = nir_before_instr(&intrin->instr); + nir_lower_explicit_io_instr(b, intrin, addr, + nir_address_format_64bit_global); + return true; + } + + default: + /* We could also handle descriptor offset mapping sources here but + * there's no point. They access a real descriptor so we don't need to + * rewrite them to a different address format like we did for UBOs + * above. We can handle them in lower_load_descriptors. + */ + return false; + } +} + +static inline nir_variable * +get_variable(const nir_deref_instr *deref) +{ + while (deref->deref_type != nir_deref_type_var) { + deref = nir_deref_instr_parent(deref); + if (deref == NULL) + return NULL; + } + + return deref->var; +} + +static bool +lower_heaps_load_buffer_ptr(nir_builder *b, nir_intrinsic_instr *ptr_load, + struct heap_mapping_ctx *ctx) +{ + assert(ptr_load->intrinsic == nir_intrinsic_load_buffer_ptr_deref); + nir_deref_instr *deref = nir_src_as_deref(ptr_load->src[0]); + + nir_variable *var = get_variable(deref); + if (var == NULL || !var_is_heap_ptr(var)) + return false; + + /* We're building an offset. It starts at zero */ + b->cursor = nir_before_impl(b->impl); + nir_def *heap_base_offset = nir_imm_int(b, 0); + + /* This moves the cursor */ + nir_def *heap_offset = + build_buffer_addr_for_deref(b, heap_base_offset, deref, + nir_address_format_32bit_offset); + + const VkSpirvResourceTypeFlagBitsEXT resource_type = + nir_intrinsic_resource_type(ptr_load); + + b->cursor = nir_before_instr(&ptr_load->instr); + nir_def *desc = nir_load_heap_descriptor(b, ptr_load->def.num_components, + ptr_load->def.bit_size, + heap_offset, + .resource_type = resource_type); + + nir_def_replace(&ptr_load->def, desc); + + return true; +} + +static bool +lower_heaps_load_descriptor(nir_builder *b, nir_intrinsic_instr *desc_load, + struct heap_mapping_ctx *ctx) +{ + if (ctx->info == NULL) + return false; + + uint32_t set, binding; + VkSpirvResourceTypeFlagBitsEXT resource_type; + if (!get_buffer_resource_binding(desc_load, &set, &binding, &resource_type)) + return false; /* This must be old school variable pointers */ + + const VkDescriptorSetAndBindingMappingEXT *mapping = + vk_descriptor_heap_mapping(ctx->info, set, binding, resource_type); + if (mapping == NULL) + return false; /* Descriptor sets */ + + /* These have to be handled by try_lower_deref_access() */ + if (mapping->source == VK_DESCRIPTOR_MAPPING_SOURCE_PUSH_DATA_EXT || + mapping->source == VK_DESCRIPTOR_MAPPING_SOURCE_RESOURCE_HEAP_DATA_EXT) { + assert(resource_type == VK_SPIRV_RESOURCE_TYPE_UNIFORM_BUFFER_BIT_EXT); + return false; + } + + b->cursor = nir_before_instr(&desc_load->instr); + nir_def *index = build_buffer_resource_index(b, desc_load); + + /* There are a few mapping sources that are allowed for SSBOs and + * acceleration structures which use addresses. If it's an acceleration + * structure or try_lower_deref_access() fails to catch it, we have to + * load the address and ask the driver to convert the address to a + * descriptor. + */ + nir_def *addr = vk_build_descriptor_heap_address(b, mapping, binding, index); + if (addr != NULL) { + nir_def *desc = + nir_global_addr_to_descriptor(b, desc_load->def.num_components, + desc_load->def.bit_size, addr, + .resource_type = resource_type); + nir_def_replace(&desc_load->def, desc); + return true; + } + + /* Everything else is an offset */ + nir_def *heap_offset = + vk_build_descriptor_heap_offset(b, mapping, resource_type, binding, + index, false /* is_sampler */); + nir_def *desc = nir_load_heap_descriptor(b, desc_load->def.num_components, + desc_load->def.bit_size, + heap_offset, + .resource_type = resource_type); + + nir_def_replace(&desc_load->def, desc); + + return true; +} + +static bool +lower_heaps_intrin(nir_builder *b, nir_intrinsic_instr *intrin, + struct heap_mapping_ctx *ctx) +{ + switch (intrin->intrinsic) { + case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_deref_sparse_load: + case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_deref_atomic: + case nir_intrinsic_image_deref_atomic_swap: + case nir_intrinsic_image_deref_size: + case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_deref_load_raw_intel: + case nir_intrinsic_image_deref_store_raw_intel: + case nir_intrinsic_image_deref_fragment_mask_load_amd: + case nir_intrinsic_image_deref_store_block_agx: + return lower_heaps_image(b, intrin, ctx); + + case nir_intrinsic_load_deref: + case nir_intrinsic_store_deref: + case nir_intrinsic_load_deref_block_intel: + case nir_intrinsic_store_deref_block_intel: + case nir_intrinsic_deref_atomic: + case nir_intrinsic_deref_atomic_swap: + return try_lower_heaps_deref_access(b, intrin, ctx); + + case nir_intrinsic_load_buffer_ptr_deref: + return lower_heaps_load_buffer_ptr(b, intrin, ctx); + + case nir_intrinsic_load_vulkan_descriptor: + return lower_heaps_load_descriptor(b, intrin, ctx); + + default: + return false; + } +} + +static bool +lower_heaps_instr(nir_builder *b, nir_instr *instr, void *data) +{ + switch (instr->type) { + case nir_instr_type_tex: + return lower_heaps_tex(b, nir_instr_as_tex(instr), data); + case nir_instr_type_intrinsic: + return lower_heaps_intrin(b, nir_instr_as_intrinsic(instr), data); + default: + return false; + } +} + +bool +vk_nir_lower_descriptor_heaps( + nir_shader *nir, + const VkShaderDescriptorSetAndBindingMappingInfoEXT *mapping, + struct vk_sampler_state_array *embedded_samplers_out) +{ + struct heap_mapping_ctx ctx = { + .info = mapping, + .sampler_idx_map = _mesa_hash_table_create(NULL, hash_sampler, + samplers_equal), + }; + + bool progress = + nir_shader_instructions_pass(nir, lower_heaps_instr, + nir_metadata_control_flow, &ctx); + + memset(embedded_samplers_out, 0, sizeof(*embedded_samplers_out)); + + const uint32_t embedded_sampler_count = ctx.sampler_idx_map->entries; + if (embedded_sampler_count > 0) { + embedded_samplers_out->sampler_count = embedded_sampler_count; + embedded_samplers_out->samplers = + malloc(embedded_sampler_count * sizeof(struct vk_sampler_state)); + hash_table_foreach(ctx.sampler_idx_map, entries) { + const struct vk_sampler_state *state = entries->key; + const uint32_t index = (uintptr_t)entries->data; + embedded_samplers_out->samplers[index] = *state; + } + } + + ralloc_free(ctx.sampler_idx_map); + + return progress; +} diff --git a/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.h b/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.h new file mode 100644 index 00000000000..93a79543291 --- /dev/null +++ b/src/vulkan/runtime/vk_nir_lower_descriptor_heaps.h @@ -0,0 +1,86 @@ +/* + * Copyright © 2024 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#ifndef VK_NIR_LOWER_DESCRIPTOR_HEAP_MAPPINGS +#define VK_NIR_LOWER_DESCRIPTOR_HEAP_MAPPINGS + +#include "nir.h" +#include +#include "util/mesa-blake3.h" + +static inline const VkDescriptorSetAndBindingMappingEXT * +vk_descriptor_heap_mapping(const VkShaderDescriptorSetAndBindingMappingInfoEXT *info, + uint32_t set, uint32_t binding, + VkSpirvResourceTypeFlagBitsEXT resource_type) +{ + assert(util_is_power_of_two_nonzero(resource_type)); + + for (uint32_t i = 0; i < info->mappingCount; i++) { + const VkDescriptorSetAndBindingMappingEXT *mapping = &info->pMappings[i]; + const uint32_t begin_binding = mapping->firstBinding; + const uint32_t end_binding = + (mapping->firstBinding + mapping->bindingCount) < mapping->firstBinding ? + UINT32_MAX : (mapping->firstBinding + mapping->bindingCount - 1) ; + + if (mapping->descriptorSet == set && + binding >= begin_binding && binding <= end_binding && + mapping->resourceMask & resource_type) + return mapping; + } + + return NULL; +} + +static inline const VkSamplerCreateInfo * +vk_descriptor_heap_embedded_sampler(const VkDescriptorSetAndBindingMappingEXT *mapping) +{ + switch (mapping->source) { + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_CONSTANT_OFFSET_EXT: + return mapping->sourceData.constantOffset.pEmbeddedSampler; + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_PUSH_INDEX_EXT: + return mapping->sourceData.pushIndex.pEmbeddedSampler; + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_INDIRECT_INDEX_EXT: + return mapping->sourceData.indirectIndex.pEmbeddedSampler; + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_INDIRECT_INDEX_ARRAY_EXT: + return mapping->sourceData.indirectIndexArray.pEmbeddedSampler; + case VK_DESCRIPTOR_MAPPING_SOURCE_HEAP_WITH_SHADER_RECORD_INDEX_EXT: + return mapping->sourceData.shaderRecordIndex.pEmbeddedSampler; + default: + return NULL; + } +} + +void vk_hash_descriptor_heap_mappings( + const VkShaderDescriptorSetAndBindingMappingInfoEXT *info, + blake3_hash blake3_out); + +nir_def * +vk_build_descriptor_heap_offset(nir_builder *b, + const VkDescriptorSetAndBindingMappingEXT *mapping, + VkSpirvResourceTypeFlagBitsEXT resource_type, + uint32_t binding, nir_def *index, + bool is_sampler); +nir_def * +vk_build_descriptor_heap_address(nir_builder *b, + const VkDescriptorSetAndBindingMappingEXT *mapping, + uint32_t binding, nir_def *index); + +struct vk_sampler_state_array { + struct vk_sampler_state *samplers; + uint32_t sampler_count; +}; + +static inline void +vk_sampler_state_array_finish(struct vk_sampler_state_array *arr) +{ + free(arr->samplers); +} + +bool vk_nir_lower_descriptor_heaps( + nir_shader *nir, + const VkShaderDescriptorSetAndBindingMappingInfoEXT *mapping, + struct vk_sampler_state_array *embedded_samplers_out); + +#endif diff --git a/src/vulkan/runtime/vk_physical_device.c b/src/vulkan/runtime/vk_physical_device.c index ad11a03810a..d272d05979e 100644 --- a/src/vulkan/runtime/vk_physical_device.c +++ b/src/vulkan/runtime/vk_physical_device.c @@ -324,3 +324,34 @@ vk_common_GetPhysicalDeviceToolProperties(VkPhysicalDevice physicalDevice, return vk_outarray_status(&out); } + +VKAPI_ATTR VkDeviceSize VKAPI_CALL +vk_common_GetPhysicalDeviceDescriptorSizeEXT(VkPhysicalDevice physicalDevice, + VkDescriptorType descriptorType) +{ + VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice); + + switch (descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + return pdevice->properties.samplerDescriptorSize; + + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + case VK_DESCRIPTOR_TYPE_SAMPLE_WEIGHT_IMAGE_QCOM: + case VK_DESCRIPTOR_TYPE_BLOCK_MATCH_IMAGE_QCOM: + return pdevice->properties.imageDescriptorSize; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: + case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV: + return pdevice->properties.bufferDescriptorSize; + + default: + UNREACHABLE("Invalid descriptor type in GetPhysicalDeviceDescriptorSizeEXT"); + return 0; + } +} diff --git a/src/vulkan/runtime/vk_pipeline.c b/src/vulkan/runtime/vk_pipeline.c index 0ffb5b8a712..c049bb0d6cf 100644 --- a/src/vulkan/runtime/vk_pipeline.c +++ b/src/vulkan/runtime/vk_pipeline.c @@ -31,9 +31,11 @@ #include "vk_graphics_state.h" #include "vk_log.h" #include "vk_nir.h" +#include "vk_nir_lower_descriptor_heaps.h" #include "vk_physical_device.h" #include "vk_physical_device_features.h" #include "vk_pipeline_layout.h" +#include "vk_sampler.h" #include "vk_shader.h" #include "vk_shader_module.h" #include "vk_util.h" @@ -296,6 +298,16 @@ vk_pipeline_hash_shader_stage_blake3(VkPipelineCreateFlags2KHR pipeline_flags, info->pSpecializationInfo->dataSize); } + const VkShaderDescriptorSetAndBindingMappingInfoEXT *desc_map = + vk_find_struct_const(info->pNext, + SHADER_DESCRIPTOR_SET_AND_BINDING_MAPPING_INFO_EXT); + if (desc_map != NULL) { + blake3_hash desc_map_blake3; + vk_hash_descriptor_heap_mappings(desc_map, desc_map_blake3); + + _mesa_blake3_update(&ctx, desc_map_blake3, sizeof(desc_map_blake3)); + } + uint32_t req_subgroup_size = get_required_subgroup_size(info); _mesa_blake3_update(&ctx, &req_subgroup_size, sizeof(req_subgroup_size)); @@ -700,6 +712,9 @@ struct vk_pipeline_precomp_shader { /* Tessellation info if the shader is a tessellation shader */ struct vk_pipeline_tess_info tess; + uint32_t embedded_sampler_count; + struct vk_sampler_state *embedded_samplers; + struct blob nir_blob; }; @@ -730,7 +745,9 @@ static struct vk_pipeline_precomp_shader * vk_pipeline_precomp_shader_create(struct vk_device *device, const void *key_data, size_t key_size, const struct vk_pipeline_robustness_state *rs, - nir_shader *nir) + nir_shader *nir, + const uint32_t embedded_sampler_count, + const struct vk_sampler_state *embedded_samplers) { struct blob blob; blob_init(&blob); @@ -740,10 +757,12 @@ vk_pipeline_precomp_shader_create(struct vk_device *device, if (blob.out_of_memory) goto fail_blob; - struct vk_pipeline_precomp_shader *shader = - vk_zalloc(&device->alloc, sizeof(*shader), 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (shader == NULL) + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct vk_pipeline_precomp_shader, shader, 1); + VK_MULTIALLOC_DECL(&ma, struct vk_sampler_state, samplers, + embedded_sampler_count); + if (!vk_multialloc_zalloc(&ma, &device->alloc, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) goto fail_blob; assert(sizeof(shader->cache_key) == key_size); @@ -759,6 +778,11 @@ vk_pipeline_precomp_shader_create(struct vk_device *device, vk_pipeline_gather_nir_tess_info(nir, &shader->tess); + shader->embedded_sampler_count = embedded_sampler_count; + shader->embedded_samplers = samplers; + for (uint32_t i = 0; i < embedded_sampler_count; i++) + shader->embedded_samplers[i] = embedded_samplers[i]; + shader->nir_blob = blob; return shader; @@ -779,6 +803,10 @@ vk_pipeline_precomp_shader_serialize(struct vk_pipeline_cache_object *obj, blob_write_uint32(blob, shader->stage); blob_write_bytes(blob, &shader->rs, sizeof(shader->rs)); blob_write_bytes(blob, &shader->tess, sizeof(shader->tess)); + blob_write_uint32(blob, shader->embedded_sampler_count); + blob_write_bytes(blob, shader->embedded_samplers, + shader->embedded_sampler_count * + sizeof(*shader->embedded_samplers)); blob_write_uint64(blob, shader->nir_blob.size); blob_write_bytes(blob, shader->nir_blob.data, shader->nir_blob.size); @@ -790,10 +818,36 @@ vk_pipeline_precomp_shader_deserialize(struct vk_device *device, const void *key_data, size_t key_size, struct blob_reader *blob) { - struct vk_pipeline_precomp_shader *shader = - vk_zalloc(&device->alloc, sizeof(*shader), 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (shader == NULL) + const mesa_shader_stage stage = blob_read_uint32(blob); + + struct vk_pipeline_robustness_state rs; + blob_copy_bytes(blob, &rs, sizeof(rs)); + + struct vk_pipeline_tess_info tess; + blob_copy_bytes(blob, &tess, sizeof(tess)); + + const uint32_t embedded_sampler_count = blob_read_uint32(blob); + const struct vk_sampler_state *embedded_samplers = + blob_read_bytes(blob, embedded_sampler_count * + sizeof(*embedded_samplers)); + + blake3_hash blake3; + blob_copy_bytes(blob, blake3, sizeof(blake3)); + + uint64_t nir_size = blob_read_uint64(blob); + if (blob->overrun || nir_size > SIZE_MAX) + return NULL; + + const void *nir_data = blob_read_bytes(blob, nir_size); + if (blob->overrun) + return NULL; + + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct vk_pipeline_precomp_shader, shader, 1); + VK_MULTIALLOC_DECL(&ma, struct vk_sampler_state, samplers, + embedded_sampler_count); + if (!vk_multialloc_zalloc(&ma, &device->alloc, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) return NULL; assert(sizeof(shader->cache_key) == key_size); @@ -804,28 +858,25 @@ vk_pipeline_precomp_shader_deserialize(struct vk_device *device, shader->cache_key, sizeof(shader->cache_key)); - shader->stage = blob_read_uint32(blob); - blob_copy_bytes(blob, &shader->rs, sizeof(shader->rs)); - blob_copy_bytes(blob, &shader->tess, sizeof(shader->tess)); + shader->stage = stage; + shader->rs = rs; + shader->tess = tess; - uint64_t nir_size = blob_read_uint64(blob); - if (blob->overrun || nir_size > SIZE_MAX) - goto fail_shader; + shader->embedded_sampler_count = embedded_sampler_count; + shader->embedded_samplers = samplers; + for (uint32_t i = 0; i < embedded_sampler_count; i++) + shader->embedded_samplers[i] = embedded_samplers[i]; - const void *nir_data = blob_read_bytes(blob, nir_size); - if (blob->overrun) - goto fail_shader; + memcpy(shader->cache_key, blake3, sizeof(blake3)); blob_init(&shader->nir_blob); blob_write_bytes(&shader->nir_blob, nir_data, nir_size); if (shader->nir_blob.out_of_memory) - goto fail_nir_blob; + goto fail_cache_obj; return &shader->cache_obj; -fail_nir_blob: - blob_finish(&shader->nir_blob); -fail_shader: +fail_cache_obj: vk_pipeline_cache_object_finish(&shader->cache_obj); vk_free(&device->alloc, shader); @@ -957,10 +1008,26 @@ vk_pipeline_precompile_shader(struct vk_device *device, if (ops->preprocess_nir != NULL) ops->preprocess_nir(device->physical, nir, &rs); + const VkShaderDescriptorSetAndBindingMappingInfoEXT *desc_map = + vk_find_struct_const(info->pNext, + SHADER_DESCRIPTOR_SET_AND_BINDING_MAPPING_INFO_EXT); + struct vk_sampler_state_array embedded_samplers; + bool heaps_progress = false; + NIR_PASS(heaps_progress, nir, vk_nir_lower_descriptor_heaps, + desc_map, &embedded_samplers); + if (heaps_progress) { + nir->info.use_descriptor_heap = true; + NIR_PASS(_, nir, nir_remove_dead_variables, + nir_var_uniform | nir_var_image, NULL); + NIR_PASS(_, nir, nir_opt_dce); + } + stage->precomp = vk_pipeline_precomp_shader_create(device, stage->precomp_key, sizeof(stage->precomp_key), - &rs, nir); + &rs, nir, + embedded_samplers.sampler_count, + embedded_samplers.samplers); ralloc_free(nir); if (stage->precomp == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -1782,6 +1849,8 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device, .robustness = &stage->precomp->rs, .set_layout_count = compile_info->set_layout_count, .set_layouts = compile_info->set_layouts, + .embedded_sampler_count = stage->precomp->embedded_sampler_count, + .embedded_samplers = stage->precomp->embedded_samplers, .push_constant_range_count = push_range != NULL, .push_constant_ranges = push_range != NULL ? push_range : NULL, }; @@ -2348,11 +2417,13 @@ vk_get_compute_pipeline_compile_info(struct vk_pipeline_stage *stage, features_blake3); _mesa_blake3_update(&blake3_ctx, features_blake3, sizeof(features_blake3)); - for (uint32_t i = 0; i < pipeline_layout->set_count; i++) { - if (pipeline_layout->set_layouts[i] != NULL) { - _mesa_blake3_update(&blake3_ctx, - pipeline_layout->set_layouts[i]->blake3, - sizeof(pipeline_layout->set_layouts[i]->blake3)); + if (pipeline_layout != NULL) { + for (uint32_t i = 0; i < pipeline_layout->set_count; i++) { + if (pipeline_layout->set_layouts[i] != NULL) { + _mesa_blake3_update(&blake3_ctx, + pipeline_layout->set_layouts[i]->blake3, + sizeof(pipeline_layout->set_layouts[i]->blake3)); + } } } if (push_range != NULL) @@ -2415,8 +2486,10 @@ vk_pipeline_compile_compute_stage(struct vk_device *device, .next_stage_mask = 0, .nir = nir, .robustness = &stage->precomp->rs, - .set_layout_count = pipeline_layout->set_count, - .set_layouts = pipeline_layout->set_layouts, + .set_layout_count = pipeline_layout ? pipeline_layout->set_count : 0, + .set_layouts = pipeline_layout ? pipeline_layout->set_layouts : NULL, + .embedded_sampler_count = stage->precomp->embedded_sampler_count, + .embedded_samplers = stage->precomp->embedded_samplers, .push_constant_range_count = push_range != NULL, .push_constant_ranges = push_range != NULL ? push_range : NULL, }; @@ -2798,8 +2871,8 @@ hash_rt_parameters(struct mesa_blake3 *blake3_ctx, _mesa_blake3_update(blake3_ctx, &shader_flags, sizeof(shader_flags)); _mesa_blake3_update(blake3_ctx, &rt_flags, sizeof(rt_flags)); - for (uint32_t i = 0; i < pipeline_layout->set_count; i++) { - if (pipeline_layout->set_layouts[i] != NULL) { + if (pipeline_layout != NULL) { + for (uint32_t i = 0; i < pipeline_layout->set_count; i++) { _mesa_blake3_update(blake3_ctx, pipeline_layout->set_layouts[i]->blake3, sizeof(pipeline_layout->set_layouts[i]->blake3)); @@ -3209,8 +3282,10 @@ vk_pipeline_compile_rt_shader(struct vk_device *device, .next_stage_mask = 0, .nir = nir, .robustness = &stage->precomp->rs, - .set_layout_count = pipeline_layout->set_count, - .set_layouts = pipeline_layout->set_layouts, + .set_layout_count = pipeline_layout != NULL ? pipeline_layout->set_count : 0, + .set_layouts = pipeline_layout != NULL ? pipeline_layout->set_layouts : NULL, + .embedded_sampler_count = stage->precomp->embedded_sampler_count, + .embedded_samplers = stage->precomp->embedded_samplers, .push_constant_range_count = push_range != NULL, .push_constant_ranges = push_range != NULL ? push_range : NULL, }; @@ -3319,8 +3394,10 @@ vk_pipeline_compile_rt_shader_group(struct vk_device *device, .next_stage_mask = 0, .nir = vk_pipeline_precomp_shader_get_nir(precomp, nir_options), .robustness = &precomp->rs, - .set_layout_count = pipeline_layout->set_count, - .set_layouts = pipeline_layout->set_layouts, + .set_layout_count = pipeline_layout != NULL ? pipeline_layout->set_count : 0, + .set_layouts = pipeline_layout != NULL ? pipeline_layout->set_layouts : NULL, + .embedded_sampler_count = precomp->embedded_sampler_count, + .embedded_samplers = precomp->embedded_samplers, .push_constant_range_count = push_range != NULL, .push_constant_ranges = push_range != NULL ? push_range : NULL, }; diff --git a/src/vulkan/runtime/vk_sampler.c b/src/vulkan/runtime/vk_sampler.c index d612290b27b..b57f91cf635 100644 --- a/src/vulkan/runtime/vk_sampler.c +++ b/src/vulkan/runtime/vk_sampler.c @@ -25,6 +25,7 @@ #include "vk_sampler.h" #include "vk_device.h" #include "vk_format.h" +#include "vk_limits.h" #include "vk_util.h" #include "vk_ycbcr_conversion.h" @@ -121,6 +122,7 @@ vk_sampler_state_init(struct vk_sampler_state *state, if (!vk_border_color_is_custom(pCreateInfo->borderColor)) state->border_color_value = vk_border_color_value(pCreateInfo->borderColor); state->reduction_mode = VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE; + state->border_color_index = MESA_VK_MAX_CUSTOM_BORDER_COLOR; vk_foreach_struct_const(ext, pCreateInfo->pNext) { switch (ext->sType) { @@ -173,6 +175,12 @@ vk_sampler_state_init(struct vk_sampler_state *state, break; } + case VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_INDEX_CREATE_INFO_EXT: { + const VkSamplerCustomBorderColorIndexCreateInfoEXT *bc_info = (void *)ext; + state->border_color_index = bc_info->index; + break; + } + default: break; } diff --git a/src/vulkan/runtime/vk_sampler.h b/src/vulkan/runtime/vk_sampler.h index 12039bb27fa..fca93f48830 100644 --- a/src/vulkan/runtime/vk_sampler.h +++ b/src/vulkan/runtime/vk_sampler.h @@ -90,6 +90,11 @@ struct vk_sampler_state { */ VkClearColorValue border_color_value; + /** + * VkSamplerCustomBorderColorIndexCreateInfo::index. + */ + uint32_t border_color_index; + /** VkSamplerBorderColorComponentMappingCreateInfoEXT::components */ VkComponentMapping border_color_component_mapping; diff --git a/src/vulkan/runtime/vk_shader.c b/src/vulkan/runtime/vk_shader.c index 80431a5c046..5ee20396712 100644 --- a/src/vulkan/runtime/vk_shader.c +++ b/src/vulkan/runtime/vk_shader.c @@ -29,6 +29,7 @@ #include "vk_descriptor_set_layout.h" #include "vk_device.h" #include "vk_nir.h" +#include "vk_nir_lower_descriptor_heaps.h" #include "vk_physical_device.h" #include "vk_physical_device_features.h" #include "vk_pipeline.h" @@ -245,7 +246,8 @@ cmp_stage_idx(const void *_a, const void *_b) static nir_shader * vk_shader_to_nir(struct vk_device *device, const VkShaderCreateInfoEXT *info, - const struct vk_pipeline_robustness_state *rs) + const struct vk_pipeline_robustness_state *rs, + struct vk_sampler_state_array *embedded_samplers_out) { const struct vk_device_shader_ops *ops = device->shader_ops; @@ -274,6 +276,20 @@ vk_shader_to_nir(struct vk_device *device, if (ops->preprocess_nir != NULL) ops->preprocess_nir(device->physical, nir, rs); + const VkShaderDescriptorSetAndBindingMappingInfoEXT *desc_map = + vk_find_struct_const(info->pNext, + SHADER_DESCRIPTOR_SET_AND_BINDING_MAPPING_INFO_EXT); + + bool heaps_progress = false; + NIR_PASS(heaps_progress, nir, vk_nir_lower_descriptor_heaps, + desc_map, embedded_samplers_out); + if (heaps_progress) { + nir->info.use_descriptor_heap = true; + NIR_PASS(_, nir, nir_remove_dead_variables, + nir_var_uniform | nir_var_image, NULL); + NIR_PASS(_, nir, nir_opt_dce); + } + return nir; } @@ -286,6 +302,7 @@ vk_shader_compile_info_init(struct vk_shader_compile_info *info, struct set_layouts *set_layouts, const VkShaderCreateInfoEXT *vk_info, const struct vk_pipeline_robustness_state *rs, + const struct vk_sampler_state_array *es, nir_shader *nir) { for (uint32_t sl = 0; sl < vk_info->setLayoutCount; sl++) { @@ -301,6 +318,8 @@ vk_shader_compile_info_init(struct vk_shader_compile_info *info, .robustness = rs, .set_layout_count = vk_info->setLayoutCount, .set_layouts = set_layouts->set_layouts, + .embedded_sampler_count = es->sampler_count, + .embedded_samplers = es->samplers, .push_constant_range_count = vk_info->pushConstantRangeCount, .push_constant_ranges = vk_info->pPushConstantRanges, }; @@ -600,8 +619,10 @@ vk_common_CreateShadersEXT(VkDevice _device, .idx = i, }; } else { + struct vk_sampler_state_array embedded_samplers = {}; nir_shader *nir = vk_shader_to_nir(device, vk_info, - &vk_robustness_disabled); + &vk_robustness_disabled, + &embedded_samplers); if (nir == NULL) { result = vk_errorf(device, VK_ERROR_UNKNOWN, "Failed to compile shader to NIR"); @@ -611,12 +632,16 @@ vk_common_CreateShadersEXT(VkDevice _device, struct vk_shader_compile_info info; struct set_layouts set_layouts; vk_shader_compile_info_init(&info, &set_layouts, - vk_info, &vk_robustness_disabled, nir); + vk_info, &vk_robustness_disabled, + &embedded_samplers, nir); struct vk_shader *shader; result = vk_compile_shaders(device, 1, &info, NULL /* state */, NULL /* features */, pAllocator, &shader); + + vk_sampler_state_array_finish(&embedded_samplers); + if (result != VK_SUCCESS) break; @@ -636,6 +661,7 @@ vk_common_CreateShadersEXT(VkDevice _device, if (linked_count > 0) { struct set_layouts set_layouts[VK_MAX_LINKED_SHADER_STAGES]; struct vk_shader_compile_info infos[VK_MAX_LINKED_SHADER_STAGES]; + struct vk_sampler_state_array embedded_samplers[VK_MAX_LINKED_SHADER_STAGES]; VkResult result = VK_SUCCESS; /* Sort so we guarantee the driver always gets them in-order */ @@ -643,12 +669,14 @@ vk_common_CreateShadersEXT(VkDevice _device, /* Memset for easy error handling */ memset(infos, 0, sizeof(infos)); + memset(embedded_samplers, 0, sizeof(embedded_samplers)); for (uint32_t l = 0; l < linked_count; l++) { const VkShaderCreateInfoEXT *vk_info = &pCreateInfos[linked[l].idx]; nir_shader *nir = vk_shader_to_nir(device, vk_info, - &vk_robustness_disabled); + &vk_robustness_disabled, + &embedded_samplers[l]); if (nir == NULL) { result = vk_errorf(device, VK_ERROR_UNKNOWN, "Failed to compile shader to NIR"); @@ -656,7 +684,8 @@ vk_common_CreateShadersEXT(VkDevice _device, } vk_shader_compile_info_init(&infos[l], &set_layouts[l], - vk_info, &vk_robustness_disabled, nir); + vk_info, &vk_robustness_disabled, + &embedded_samplers[l], nir); } if (result == VK_SUCCESS) { @@ -675,6 +704,9 @@ vk_common_CreateShadersEXT(VkDevice _device, } } + for (uint32_t l = 0; l < linked_count; l++) + vk_sampler_state_array_finish(&embedded_samplers[l]); + if (first_fail_or_success == VK_SUCCESS) first_fail_or_success = result; } diff --git a/src/vulkan/runtime/vk_shader.h b/src/vulkan/runtime/vk_shader.h index b4334ef171d..5670c5bd3be 100644 --- a/src/vulkan/runtime/vk_shader.h +++ b/src/vulkan/runtime/vk_shader.h @@ -46,6 +46,7 @@ struct vk_features; struct vk_physical_device; struct vk_pipeline; struct vk_pipeline_robustness_state; +struct vk_sampler_state; bool vk_validate_shader_binaries(void); @@ -94,6 +95,9 @@ struct vk_shader_compile_info { uint32_t set_layout_count; struct vk_descriptor_set_layout * const *set_layouts; + uint32_t embedded_sampler_count; + const struct vk_sampler_state* embedded_samplers; + uint32_t push_constant_range_count; const VkPushConstantRange *push_constant_ranges; }; diff --git a/src/vulkan/util/vk_physical_device_properties_gen.py b/src/vulkan/util/vk_physical_device_properties_gen.py index 3ffffbfb26d..8a39fc06526 100644 --- a/src/vulkan/util/vk_physical_device_properties_gen.py +++ b/src/vulkan/util/vk_physical_device_properties_gen.py @@ -58,6 +58,7 @@ RENAMED_PROPERTIES = { ("SubgroupProperties", "supportedStages"): "subgroupSupportedStages", ("SubgroupProperties", "supportedOperations"): "subgroupSupportedOperations", ("SubgroupProperties", "quadOperationsInAllStages"): "subgroupQuadOperationsInAllStages", + ("DescriptorBufferPropertiesEXT", "samplerDescriptorSize"): "EDBsamplerDescriptorSize", } OUT_ARRAYS = {