diff --git a/src/freedreno/ci/freedreno-a618-fails.txt b/src/freedreno/ci/freedreno-a618-fails.txt index 4258e2f4ff5..c5895225675 100644 --- a/src/freedreno/ci/freedreno-a618-fails.txt +++ b/src/freedreno/ci/freedreno-a618-fails.txt @@ -15,6 +15,22 @@ dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments5_ dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash @@ -23,6 +39,22 @@ gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachme gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash +gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash +gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash +gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash +gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash +gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash +gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash +gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash +gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash +gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash +gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash +gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash +gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash +gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash +gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash +gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash +gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash spill-dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_bool,Fail spill-dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_bool_requiredsubgroupsize128,Fail diff --git a/src/freedreno/ci/freedreno-a630-asan-fails.txt b/src/freedreno/ci/freedreno-a630-asan-fails.txt index 25d7994d5e5..7f45d307e71 100644 --- a/src/freedreno/ci/freedreno-a630-asan-fails.txt +++ b/src/freedreno/ci/freedreno-a630-asan-fails.txt @@ -1 +1,28 @@ dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.12,Crash + +# https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3759 +# deqp-vk: ../src/freedreno/vulkan/tu_pipeline.c:3894: tu_pipeline_builder_init_graphics: Assertion `subpass->color_count == 0 || !create_info->pColorBlendState || subpass->color_count == create_info->pColorBlendState->attachmentCount' failed +dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash +dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash +dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash +dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash +dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash +dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash +dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash +dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash diff --git a/src/freedreno/ci/freedreno-a630-fails.txt b/src/freedreno/ci/freedreno-a630-fails.txt index 99a9feacd54..5ca2147794e 100644 --- a/src/freedreno/ci/freedreno-a630-fails.txt +++ b/src/freedreno/ci/freedreno-a630-fails.txt @@ -431,6 +431,22 @@ dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments5_ dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash +dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash +dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash @@ -439,6 +455,22 @@ gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachme gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash +gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash +gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash +gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash +gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash +gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash +gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash +gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash +gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash +gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash +gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash +gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash +gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash +gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash +gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash +gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash +gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash # https://gitlab.freedesktop.org/mesa/mesa/-/issues/7152 spec@ext_transform_feedback@builtin-varyings gl_culldistance,Fail diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 797aac513e6..3c2fde55cae 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -615,17 +615,19 @@ ir3_trim_constlen(struct ir3_shader_variant **variants, { unsigned constlens[MESA_SHADER_STAGES] = {}; + bool shared_consts_enable = false; + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (variants[i]) + if (variants[i]) { constlens[i] = variants[i]->constlen; + shared_consts_enable = + ir3_const_state(variants[i])->shared_consts_enable; + } } uint32_t trimmed = 0; STATIC_ASSERT(MESA_SHADER_STAGES <= 8 * sizeof(trimmed)); - bool shared_consts_enable = - ir3_const_state(variants[MESA_SHADER_VERTEX])->shared_consts_enable; - /* Use a hw quirk for geometry shared consts, not matched with actual * shared consts size (on a6xx). */ diff --git a/src/freedreno/vulkan/tu_descriptor_set.c b/src/freedreno/vulkan/tu_descriptor_set.c index d9d4a051f8f..14d8b4b07da 100644 --- a/src/freedreno/vulkan/tu_descriptor_set.c +++ b/src/freedreno/vulkan/tu_descriptor_set.c @@ -410,6 +410,46 @@ sha1_update_descriptor_set_layout(struct mesa_sha1 *ctx, * just multiple descriptor set layouts pasted together. */ +void +tu_pipeline_layout_init(struct tu_pipeline_layout *layout) +{ + unsigned dynamic_offset_size = 0; + + for (uint32_t set = 0; set < layout->num_sets; set++) { + assert(set < MAX_SETS); + layout->set[set].dynamic_offset_start = dynamic_offset_size; + + if (layout->set[set].layout) + dynamic_offset_size += layout->set[set].layout->dynamic_offset_size; + } + + layout->dynamic_offset_size = dynamic_offset_size; + + /* We only care about INDEPENDENT_SETS for dynamic-offset descriptors, + * where all the descriptors from all the sets are combined into one set + * and we have to provide the dynamic_offset_start dynamically with fast + * linking. + */ + if (dynamic_offset_size == 0) { + layout->independent_sets = false; + } + + struct mesa_sha1 ctx; + _mesa_sha1_init(&ctx); + for (unsigned s = 0; s < layout->num_sets; s++) { + if (layout->set[s].layout) + sha1_update_descriptor_set_layout(&ctx, layout->set[s].layout); + _mesa_sha1_update(&ctx, &layout->set[s].dynamic_offset_start, + sizeof(layout->set[s].dynamic_offset_start)); + } + _mesa_sha1_update(&ctx, &layout->num_sets, sizeof(layout->num_sets)); + _mesa_sha1_update(&ctx, &layout->push_constant_size, + sizeof(layout->push_constant_size)); + _mesa_sha1_update(&ctx, &layout->independent_sets, + sizeof(layout->independent_sets)); + _mesa_sha1_final(&ctx, layout->sha1); +} + VKAPI_ATTR VkResult VKAPI_CALL tu_CreatePipelineLayout(VkDevice _device, const VkPipelineLayoutCreateInfo *pCreateInfo, @@ -428,23 +468,16 @@ tu_CreatePipelineLayout(VkDevice _device, return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); layout->num_sets = pCreateInfo->setLayoutCount; - layout->dynamic_offset_size = 0; - - unsigned dynamic_offset_size = 0; - for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) { TU_FROM_HANDLE(tu_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[set]); assert(set < MAX_SETS); layout->set[set].layout = set_layout; - layout->set[set].dynamic_offset_start = dynamic_offset_size; - vk_descriptor_set_layout_ref(&set_layout->vk); - - dynamic_offset_size += set_layout->dynamic_offset_size; + if (set_layout) + vk_descriptor_set_layout_ref(&set_layout->vk); } - layout->dynamic_offset_size = dynamic_offset_size; layout->push_constant_size = 0; for (unsigned i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) { @@ -454,18 +487,10 @@ tu_CreatePipelineLayout(VkDevice _device, } layout->push_constant_size = align(layout->push_constant_size, 16); + layout->independent_sets = + pCreateInfo->flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT; - struct mesa_sha1 ctx; - _mesa_sha1_init(&ctx); - for (unsigned s = 0; s < layout->num_sets; s++) { - sha1_update_descriptor_set_layout(&ctx, layout->set[s].layout); - _mesa_sha1_update(&ctx, &layout->set[s].dynamic_offset_start, - sizeof(layout->set[s].dynamic_offset_start)); - } - _mesa_sha1_update(&ctx, &layout->num_sets, sizeof(layout->num_sets)); - _mesa_sha1_update(&ctx, &layout->push_constant_size, - sizeof(layout->push_constant_size)); - _mesa_sha1_final(&ctx, layout->sha1); + tu_pipeline_layout_init(layout); *pPipelineLayout = tu_pipeline_layout_to_handle(layout); @@ -483,8 +508,10 @@ tu_DestroyPipelineLayout(VkDevice _device, if (!pipeline_layout) return; - for (uint32_t i = 0; i < pipeline_layout->num_sets; i++) - vk_descriptor_set_layout_unref(&device->vk, &pipeline_layout->set[i].layout->vk); + for (uint32_t i = 0; i < pipeline_layout->num_sets; i++) { + if (pipeline_layout->set[i].layout) + vk_descriptor_set_layout_unref(&device->vk, &pipeline_layout->set[i].layout->vk); + } vk_object_free(&device->vk, pAllocator, pipeline_layout); } diff --git a/src/freedreno/vulkan/tu_descriptor_set.h b/src/freedreno/vulkan/tu_descriptor_set.h index 6b7b1bafdae..270afcfdc30 100644 --- a/src/freedreno/vulkan/tu_descriptor_set.h +++ b/src/freedreno/vulkan/tu_descriptor_set.h @@ -93,6 +93,8 @@ struct tu_pipeline_layout uint32_t dynamic_offset_start; } set[MAX_SETS]; + bool independent_sets; + uint32_t num_sets; uint32_t push_constant_size; uint32_t dynamic_offset_size; @@ -102,6 +104,8 @@ struct tu_pipeline_layout VK_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline_layout, base, VkPipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT) +void tu_pipeline_layout_init(struct tu_pipeline_layout *layout); + struct tu_descriptor_set { struct vk_object_base base; diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index 894d9875b24..c63610dd277 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -238,11 +238,14 @@ get_device_extensions(const struct tu_physical_device *device, .EXT_tooling_info = true, .EXT_inline_uniform_block = true, .EXT_mutable_descriptor_type = true, + .KHR_pipeline_library = true, + .EXT_graphics_pipeline_library = true, }; } static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = { &tu_shaders_ops, + &tu_nir_shaders_ops, NULL, }; @@ -906,6 +909,12 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, features->multiDraw = true; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_FEATURES_EXT: { + VkPhysicalDeviceGraphicsPipelineLibraryFeaturesEXT *features = + (VkPhysicalDeviceGraphicsPipelineLibraryFeaturesEXT *)ext; + features->graphicsPipelineLibrary = true; + break; + } default: break; @@ -1371,6 +1380,13 @@ tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, properties->maxMultiDrawCount = 2048; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_PROPERTIES_EXT: { + VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT *props = + (VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT *)ext; + props->graphicsPipelineLibraryFastLinking = true; + props->graphicsPipelineLibraryIndependentInterpolationDecoration = true; + break; + } default: break; } diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 08659a45cfb..68b59b65466 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -15,6 +15,7 @@ #include "main/menums.h" #include "nir/nir.h" #include "nir/nir_builder.h" +#include "nir/nir_serialize.h" #include "spirv/nir_spirv.h" #include "util/debug.h" #include "util/mesa-sha1.h" @@ -243,11 +244,15 @@ struct tu_pipeline_builder struct tu_device *device; void *mem_ctx; struct vk_pipeline_cache *cache; - struct tu_pipeline_layout *layout; const VkAllocationCallbacks *alloc; const VkGraphicsPipelineCreateInfo *create_info; - struct tu_compiled_shaders *shaders; + struct tu_pipeline_layout layout; + + struct tu_compiled_shaders *compiled_shaders; + + struct tu_const_state const_state[MESA_SHADER_FRAGMENT + 1]; + struct ir3_shader_variant *variants[MESA_SHADER_FRAGMENT + 1]; struct ir3_shader_variant *binning_variant; uint64_t shader_iova[MESA_SHADER_FRAGMENT + 1]; uint64_t binning_vs_iova; @@ -260,6 +265,7 @@ struct tu_pipeline_builder /* these states are affectd by rasterizer_discard */ bool depth_clip_disable; bool use_color_attachments; + bool attachment_state_valid; VkFormat color_attachment_formats[MAX_RTS]; VkFormat depth_attachment_format; uint32_t multiview_mask; @@ -268,6 +274,24 @@ struct tu_pipeline_builder bool subpass_feedback_loop_color; bool subpass_feedback_loop_ds; bool feedback_loop_may_involve_textures; + + /* Each library defines at least one piece of state in + * VkGraphicsPipelineLibraryFlagsEXT, and libraries cannot overlap, so + * there can be at most as many libraries as pieces of state, of which + * there are currently 4. + */ +#define MAX_LIBRARIES 4 + + unsigned num_libraries; + struct tu_pipeline *libraries[MAX_LIBRARIES]; + + /* This is just the state that we are compiling now, whereas the final + * pipeline will include the state from the libraries. + */ + VkGraphicsPipelineLibraryFlagsEXT state; + + /* The stages we are compiling now. */ + VkShaderStageFlags active_stages; }; static bool @@ -648,6 +672,30 @@ tu6_emit_xs(struct tu_cs *cs, } } +static void +tu6_emit_dynamic_offset(struct tu_cs *cs, + const struct ir3_shader_variant *xs, + struct tu_pipeline_builder *builder) +{ + if (!xs || builder->const_state[xs->type].dynamic_offset_loc == UINT32_MAX) + return; + + tu_cs_emit_pkt7(cs, tu6_stage2opcode(xs->type), 3 + MAX_SETS); + tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(builder->const_state[xs->type].dynamic_offset_loc / 4) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(xs->type)) | + CP_LOAD_STATE6_0_NUM_UNIT(DIV_ROUND_UP(MAX_SETS, 4))); + tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); + tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + + for (unsigned i = 0; i < MAX_SETS; i++) { + unsigned dynamic_offset_start = + builder->layout.set[i].dynamic_offset_start / (A6XX_TEX_CONST_DWORDS * 4); + tu_cs_emit(cs, i < builder->layout.num_sets ? dynamic_offset_start : 0); + } +} + static void tu6_emit_shared_consts_enable(struct tu_cs *cs, bool enable) { @@ -1767,7 +1815,7 @@ tu6_emit_program_config(struct tu_cs *cs, STATIC_ASSERT(MESA_SHADER_VERTEX == 0); - bool shared_consts_enable = tu6_shared_constants_enable(builder->layout, + bool shared_consts_enable = tu6_shared_constants_enable(&builder->layout, builder->device->compiler); tu6_emit_shared_consts_enable(cs, shared_consts_enable); @@ -1780,7 +1828,7 @@ tu6_emit_program_config(struct tu_cs *cs, .gfx_ibo = true, .gfx_shared_const = shared_consts_enable)); for (; stage < ARRAY_SIZE(builder->shader_iova); stage++) { - tu6_emit_xs_config(cs, stage, builder->shaders->variants[stage]); + tu6_emit_xs_config(cs, stage, builder->variants[stage]); } } @@ -1790,15 +1838,14 @@ tu6_emit_program(struct tu_cs *cs, bool binning_pass, struct tu_pipeline *pipeline) { - const struct ir3_shader_variant *vs = builder->shaders->variants[MESA_SHADER_VERTEX]; + const struct ir3_shader_variant *vs = builder->variants[MESA_SHADER_VERTEX]; const struct ir3_shader_variant *bs = builder->binning_variant; - const struct ir3_shader_variant *hs = builder->shaders->variants[MESA_SHADER_TESS_CTRL]; - const struct ir3_shader_variant *ds = builder->shaders->variants[MESA_SHADER_TESS_EVAL]; - const struct ir3_shader_variant *gs = builder->shaders->variants[MESA_SHADER_GEOMETRY]; - const struct ir3_shader_variant *fs = builder->shaders->variants[MESA_SHADER_FRAGMENT]; + const struct ir3_shader_variant *hs = builder->variants[MESA_SHADER_TESS_CTRL]; + const struct ir3_shader_variant *ds = builder->variants[MESA_SHADER_TESS_EVAL]; + const struct ir3_shader_variant *gs = builder->variants[MESA_SHADER_GEOMETRY]; + const struct ir3_shader_variant *fs = builder->variants[MESA_SHADER_FRAGMENT]; gl_shader_stage stage = MESA_SHADER_VERTEX; - uint32_t cps_per_patch = builder->create_info->pTessellationState ? - builder->create_info->pTessellationState->patchControlPoints : 0; + uint32_t cps_per_patch = pipeline->tess.patch_control_points; bool multi_pos_output = vs->multi_pos_output; /* Don't use the binning pass variant when GS is present because we don't @@ -1807,20 +1854,22 @@ tu6_emit_program(struct tu_cs *cs, if (binning_pass && !gs) { vs = bs; tu6_emit_xs(cs, stage, bs, &builder->pvtmem, builder->binning_vs_iova); + tu6_emit_dynamic_offset(cs, bs, builder); stage++; } for (; stage < ARRAY_SIZE(builder->shader_iova); stage++) { - const struct ir3_shader_variant *xs = builder->shaders->variants[stage]; + const struct ir3_shader_variant *xs = builder->variants[stage]; if (stage == MESA_SHADER_FRAGMENT && binning_pass) fs = xs = NULL; tu6_emit_xs(cs, stage, xs, &builder->pvtmem, builder->shader_iova[stage]); + tu6_emit_dynamic_offset(cs, xs, builder); } - uint32_t multiview_views = util_logbase2(builder->multiview_mask) + 1; - uint32_t multiview_cntl = builder->multiview_mask ? + uint32_t multiview_views = util_logbase2(pipeline->rast.multiview_mask) + 1; + uint32_t multiview_cntl = pipeline->rast.multiview_mask ? A6XX_PC_MULTIVIEW_CNTL_ENABLE | A6XX_PC_MULTIVIEW_CNTL_VIEWS(multiview_views) | COND(!multi_pos_output, A6XX_PC_MULTIVIEW_CNTL_DISABLEMULTIPOS) @@ -1845,7 +1894,7 @@ tu6_emit_program(struct tu_cs *cs, if (multiview_cntl && builder->device->physical_device->info->a6xx.supports_multiview_mask) { tu_cs_emit_pkt4(cs, REG_A6XX_PC_MULTIVIEW_MASK, 1); - tu_cs_emit(cs, builder->multiview_mask); + tu_cs_emit(cs, pipeline->rast.multiview_mask); } tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_WAVE_INPUT_SIZE, 1); @@ -2366,6 +2415,38 @@ tu_setup_pvtmem(struct tu_device *dev, return VK_SUCCESS; } +static bool +contains_all_shader_state(VkGraphicsPipelineLibraryFlagsEXT state) +{ + return (state & + (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) == + (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT); +} + +/* Return true if this pipeline contains all of the GPL stages listed but none + * of the libraries it uses do, so this is "the first time" that all of them + * are defined together. This is useful for state that needs to be combined + * from multiple GPL stages. + */ + +static bool +set_combined_state(struct tu_pipeline_builder *builder, + struct tu_pipeline *pipeline, + VkGraphicsPipelineLibraryFlagsEXT state) +{ + if ((pipeline->state & state) != state) + return false; + + for (unsigned i = 0; i < builder->num_libraries; i++) { + if ((builder->libraries[i]->state & state) == state) + return false; + } + + return true; +} + static VkResult tu_pipeline_allocate_cs(struct tu_device *dev, struct tu_pipeline *pipeline, @@ -2373,38 +2454,49 @@ tu_pipeline_allocate_cs(struct tu_device *dev, struct tu_pipeline_builder *builder, struct ir3_shader_variant *compute) { - uint32_t size = 1024 + tu6_load_state_size(pipeline, layout); + uint32_t size = 1024; /* graphics case: */ if (builder) { - size += TU6_EMIT_VERTEX_INPUT_MAX_DWORDS + - 2 * TU6_EMIT_VFD_DEST_MAX_DWORDS; - - for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) { - if (builder->shaders->variants[i]) { - size += builder->shaders->variants[i]->info.size / 4; - } + if (builder->state & + VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) { + size += TU6_EMIT_VERTEX_INPUT_MAX_DWORDS; } - size += builder->binning_variant->info.size / 4; + if (set_combined_state(builder, pipeline, + VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) { + size += 2 * TU6_EMIT_VFD_DEST_MAX_DWORDS; + size += tu6_load_state_size(pipeline, layout); - builder->additional_cs_reserve_size = 0; - for (unsigned i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) { - struct ir3_shader_variant *variant = builder->shaders->variants[i]; - if (variant) { - builder->additional_cs_reserve_size += - tu_xs_get_additional_cs_size_dwords(variant); - - if (variant->binning) { - builder->additional_cs_reserve_size += - tu_xs_get_additional_cs_size_dwords(variant->binning); + for (uint32_t i = 0; i < ARRAY_SIZE(builder->variants); i++) { + if (builder->variants[i]) { + size += builder->variants[i]->info.size / 4; } } - } - /* The additional size is used twice, once per tu6_emit_program() call. */ - size += builder->additional_cs_reserve_size * 2; + size += builder->binning_variant->info.size / 4; + + builder->additional_cs_reserve_size = 0; + for (unsigned i = 0; i < ARRAY_SIZE(builder->variants); i++) { + struct ir3_shader_variant *variant = builder->variants[i]; + if (variant) { + builder->additional_cs_reserve_size += + tu_xs_get_additional_cs_size_dwords(variant); + + if (variant->binning) { + builder->additional_cs_reserve_size += + tu_xs_get_additional_cs_size_dwords(variant->binning); + } + } + } + + /* The additional size is used twice, once per tu6_emit_program() call. */ + size += builder->additional_cs_reserve_size * 2; + } } else { + size += tu6_load_state_size(pipeline, layout); + size += compute->info.size / 4; size += tu_xs_get_additional_cs_size_dwords(compute); @@ -2436,20 +2528,42 @@ tu_pipeline_allocate_cs(struct tu_device *dev, static void tu_pipeline_shader_key_init(struct ir3_shader_key *key, const struct tu_pipeline *pipeline, - const VkGraphicsPipelineCreateInfo *pipeline_info) + struct tu_pipeline_builder *builder, + nir_shader **nir) { - for (uint32_t i = 0; i < pipeline_info->stageCount; i++) { - if (pipeline_info->pStages[i].stage == VK_SHADER_STAGE_GEOMETRY_BIT) { + /* We set this after we compile to NIR because we need the prim mode */ + key->tessellation = IR3_TESS_NONE; + + for (unsigned i = 0; i < builder->num_libraries; i++) { + if (!(builder->libraries[i]->state & + (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT))) + continue; + + const struct ir3_shader_key *library_key = + &builder->libraries[i]->ir3_key; + + if (library_key->tessellation != IR3_TESS_NONE) + key->tessellation = library_key->tessellation; + key->has_gs |= library_key->has_gs; + key->sample_shading |= library_key->sample_shading; + } + + for (uint32_t i = 0; i < builder->create_info->stageCount; i++) { + if (builder->create_info->pStages[i].stage == VK_SHADER_STAGE_GEOMETRY_BIT) { key->has_gs = true; break; } } - if (pipeline_info->pRasterizationState->rasterizerDiscardEnable && - !(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_RASTERIZER_DISCARD))) + if (!(builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) return; - const VkPipelineMultisampleStateCreateInfo *msaa_info = pipeline_info->pMultisampleState; + if (builder->rasterizer_discard) + return; + + const VkPipelineMultisampleStateCreateInfo *msaa_info = + builder->create_info->pMultisampleState; /* The 1.3.215 spec says: * @@ -2475,12 +2589,9 @@ tu_pipeline_shader_key_init(struct ir3_shader_key *key, * just checked in tu6_emit_fs_inputs. We will also copy the value to * tu_shader_key::force_sample_interp in a bit. */ - if (msaa_info->sampleShadingEnable && + if (msaa_info && msaa_info->sampleShadingEnable && (msaa_info->minSampleShading * msaa_info->rasterizationSamples) > 1.0f) key->sample_shading = true; - - /* We set this after we compile to NIR because we need the prim mode */ - key->tessellation = IR3_TESS_NONE; } static uint32_t @@ -2624,12 +2735,21 @@ tu_shader_key_init(struct tu_shader_key *key, static void tu_hash_stage(struct mesa_sha1 *ctx, const VkPipelineShaderStageCreateInfo *stage, + const nir_shader *nir, const struct tu_shader_key *key) { - unsigned char stage_hash[SHA1_DIGEST_LENGTH]; - vk_pipeline_hash_shader_stage(stage, stage_hash); - _mesa_sha1_update(ctx, stage_hash, sizeof(stage_hash)); + if (nir) { + struct blob blob; + blob_init(&blob); + nir_serialize(&blob, nir, true); + _mesa_sha1_update(ctx, blob.data, blob.size); + blob_finish(&blob); + } else { + unsigned char stage_hash[SHA1_DIGEST_LENGTH]; + vk_pipeline_hash_shader_stage(stage, stage_hash); + _mesa_sha1_update(ctx, stage_hash, sizeof(stage_hash)); + } _mesa_sha1_update(ctx, key, sizeof(*key)); } @@ -2647,9 +2767,11 @@ tu_hash_compiler(struct mesa_sha1 *ctx, const struct ir3_compiler *compiler) static void tu_hash_shaders(unsigned char *hash, const VkPipelineShaderStageCreateInfo **stages, + nir_shader *const *nir, const struct tu_pipeline_layout *layout, const struct tu_shader_key *keys, const struct ir3_shader_key *ir3_key, + VkGraphicsPipelineLibraryFlagsEXT state, const struct ir3_compiler *compiler) { struct mesa_sha1 ctx; @@ -2662,10 +2784,11 @@ tu_hash_shaders(unsigned char *hash, _mesa_sha1_update(&ctx, ir3_key, sizeof(ir3_key)); for (int i = 0; i < MESA_SHADER_STAGES; ++i) { - if (stages[i]) { - tu_hash_stage(&ctx, stages[i], &keys[i]); + if (stages[i] || nir[i]) { + tu_hash_stage(&ctx, stages[i], nir[i], &keys[i]); } } + _mesa_sha1_update(&ctx, &state, sizeof(state)); tu_hash_compiler(&ctx, compiler); _mesa_sha1_final(&ctx, hash); } @@ -2684,7 +2807,7 @@ tu_hash_compute(unsigned char *hash, if (layout) _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); - tu_hash_stage(&ctx, stage, key); + tu_hash_stage(&ctx, stage, NULL, key); tu_hash_compiler(&ctx, compiler); _mesa_sha1_final(&ctx, hash); @@ -2708,6 +2831,9 @@ tu_shaders_destroy(struct vk_pipeline_cache_object *object) for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++) ralloc_free(shaders->variants[i]); + for (unsigned i = 0; i < ARRAY_SIZE(shaders->safe_const_variants); i++) + ralloc_free(shaders->safe_const_variants[i]); + vk_pipeline_cache_object_finish(&shaders->base); vk_free(&object->device->alloc, shaders); } @@ -2753,6 +2879,13 @@ tu_shaders_serialize(struct vk_pipeline_cache_object *object, } else { blob_write_uint8(blob, 0); } + + if (shaders->safe_const_variants[i]) { + blob_write_uint8(blob, 1); + ir3_store_variant(blob, shaders->safe_const_variants[i]); + } else { + blob_write_uint8(blob, 0); + } } return true; @@ -2774,10 +2907,13 @@ tu_shaders_deserialize(struct vk_device *_device, shaders->active_desc_sets = blob_read_uint8(blob); for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++) { - bool has_shader = blob_read_uint8(blob); - if (has_shader) { + if (blob_read_uint8(blob)) { shaders->variants[i] = ir3_retrieve_variant(blob, dev->compiler, NULL); } + + if (blob_read_uint8(blob)) { + shaders->safe_const_variants[i] = ir3_retrieve_variant(blob, dev->compiler, NULL); + } } return &shaders->base; @@ -2806,6 +2942,117 @@ tu_pipeline_cache_insert(struct vk_pipeline_cache *cache, return container_of(object, struct tu_compiled_shaders, base); } +static bool +tu_nir_shaders_serialize(struct vk_pipeline_cache_object *object, + struct blob *blob); + +static struct vk_pipeline_cache_object * +tu_nir_shaders_deserialize(struct vk_device *device, + const void *key_data, size_t key_size, + struct blob_reader *blob); + +static void +tu_nir_shaders_destroy(struct vk_pipeline_cache_object *object) +{ + struct tu_nir_shaders *shaders = + container_of(object, struct tu_nir_shaders, base); + + for (unsigned i = 0; i < ARRAY_SIZE(shaders->nir); i++) + ralloc_free(shaders->nir[i]); + + vk_pipeline_cache_object_finish(&shaders->base); + vk_free(&object->device->alloc, shaders); +} + +const struct vk_pipeline_cache_object_ops tu_nir_shaders_ops = { + .serialize = tu_nir_shaders_serialize, + .deserialize = tu_nir_shaders_deserialize, + .destroy = tu_nir_shaders_destroy, +}; + +static struct tu_nir_shaders * +tu_nir_shaders_init(struct tu_device *dev, const void *key_data, size_t key_size) +{ + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct tu_nir_shaders, shaders, 1); + VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, key_size); + + if (!vk_multialloc_zalloc(&ma, &dev->vk.alloc, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) + return NULL; + + memcpy(obj_key_data, key_data, key_size); + vk_pipeline_cache_object_init(&dev->vk, &shaders->base, + &tu_nir_shaders_ops, obj_key_data, key_size); + + return shaders; +} + +static bool +tu_nir_shaders_serialize(struct vk_pipeline_cache_object *object, + struct blob *blob) +{ + struct tu_nir_shaders *shaders = + container_of(object, struct tu_nir_shaders, base); + + for (unsigned i = 0; i < ARRAY_SIZE(shaders->nir); i++) { + if (shaders->nir[i]) { + blob_write_uint8(blob, 1); + nir_serialize(blob, shaders->nir[i], true); + } else { + blob_write_uint8(blob, 0); + } + } + + return true; +} + +static struct vk_pipeline_cache_object * +tu_nir_shaders_deserialize(struct vk_device *_device, + const void *key_data, size_t key_size, + struct blob_reader *blob) +{ + struct tu_device *dev = container_of(_device, struct tu_device, vk); + struct tu_nir_shaders *shaders = + tu_nir_shaders_init(dev, key_data, key_size); + + if (!shaders) + return NULL; + + for (unsigned i = 0; i < ARRAY_SIZE(shaders->nir); i++) { + if (blob_read_uint8(blob)) { + shaders->nir[i] = + nir_deserialize(NULL, ir3_get_compiler_options(dev->compiler), blob); + } + } + + return &shaders->base; +} + +static struct tu_nir_shaders * +tu_nir_cache_lookup(struct vk_pipeline_cache *cache, + const void *key_data, size_t key_size, + bool *application_cache_hit) +{ + struct vk_pipeline_cache_object *object = + vk_pipeline_cache_lookup_object(cache, key_data, key_size, + &tu_nir_shaders_ops, application_cache_hit); + if (object) + return container_of(object, struct tu_nir_shaders, base); + else + return NULL; +} + +static struct tu_nir_shaders * +tu_nir_cache_insert(struct vk_pipeline_cache *cache, + struct tu_nir_shaders *shaders) +{ + struct vk_pipeline_cache_object *object = + vk_pipeline_cache_add_object(cache, &shaders->base); + return container_of(object, struct tu_nir_shaders, base); +} + + static VkResult tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline) @@ -2825,45 +3072,79 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, const VkPipelineCreationFeedbackCreateInfo *creation_feedback = vk_find_struct_const(builder->create_info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); + bool must_compile = + builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT; for (uint32_t i = 0; i < builder->create_info->stageCount; i++) { gl_shader_stage stage = vk_to_mesa_shader_stage(builder->create_info->pStages[i].stage); stage_infos[stage] = &builder->create_info->pStages[i]; - - pipeline->active_stages |= builder->create_info->pStages[i].stage; + must_compile = true; } - if (tu6_shared_constants_enable(builder->layout, builder->device->compiler)) { + if (tu6_shared_constants_enable(&builder->layout, builder->device->compiler)) { pipeline->shared_consts = (struct tu_push_constant_range) { .lo = 0, - .dwords = builder->layout->push_constant_size / 4, + .dwords = builder->layout.push_constant_size / 4, }; } + nir_shader *nir[ARRAY_SIZE(stage_infos)] = { NULL }; + struct tu_shader_key keys[ARRAY_SIZE(stage_infos)] = { }; for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(keys); stage++) { tu_shader_key_init(&keys[stage], stage_infos[stage], builder->device); } - struct ir3_shader_key ir3_key = {}; - tu_pipeline_shader_key_init(&ir3_key, pipeline, builder->create_info); + if (builder->create_info->flags & + VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT) { + for (unsigned i = 0; i < builder->num_libraries; i++) { + struct tu_pipeline *library = builder->libraries[i]; - keys[MESA_SHADER_VERTEX].multiview_mask = builder->multiview_mask; - keys[MESA_SHADER_FRAGMENT].multiview_mask = builder->multiview_mask; - keys[MESA_SHADER_FRAGMENT].force_sample_interp = ir3_key.sample_shading; + for (unsigned j = 0; j < ARRAY_SIZE(library->shaders); j++) { + if (library->shaders[j].nir) { + assert(!nir[j]); + nir[j] = nir_shader_clone(NULL, library->shaders[j].nir); + keys[j] = library->shaders[j].key; + must_compile = true; + } + } + } + } + + struct ir3_shader_key ir3_key = {}; + tu_pipeline_shader_key_init(&ir3_key, pipeline, builder, nir); + + struct tu_compiled_shaders *compiled_shaders = NULL; + struct tu_nir_shaders *nir_shaders = NULL; + if (!must_compile) + goto done; + + if (builder->state & + VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { + keys[MESA_SHADER_VERTEX].multiview_mask = builder->multiview_mask; + } + + if (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { + keys[MESA_SHADER_FRAGMENT].multiview_mask = builder->multiview_mask; + keys[MESA_SHADER_FRAGMENT].force_sample_interp = ir3_key.sample_shading; + } unsigned char pipeline_sha1[20]; - tu_hash_shaders(pipeline_sha1, stage_infos, builder->layout, keys, &ir3_key, compiler); + tu_hash_shaders(pipeline_sha1, stage_infos, nir, &builder->layout, keys, + &ir3_key, builder->state, compiler); + + unsigned char nir_sha1[21]; + memcpy(nir_sha1, pipeline_sha1, sizeof(pipeline_sha1)); + nir_sha1[20] = 'N'; const bool executable_info = builder->create_info->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR; char *nir_initial_disasm[ARRAY_SIZE(stage_infos)] = { NULL }; - struct tu_compiled_shaders *compiled_shaders; - if (!executable_info) { + bool cache_hit = false; bool application_cache_hit = false; compiled_shaders = @@ -2871,12 +3152,31 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, sizeof(pipeline_sha1), &application_cache_hit); + cache_hit = !!compiled_shaders; + + /* If the user asks us to keep the NIR around, we need to have it for a + * successful cache hit. If we only have a "partial" cache hit, then we + * still need to recompile in order to get the NIR. + */ + if (compiled_shaders && + (builder->create_info->flags & + VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT)) { + bool nir_application_cache_hit = false; + nir_shaders = + tu_nir_cache_lookup(builder->cache, &nir_sha1, + sizeof(nir_sha1), + &nir_application_cache_hit); + + application_cache_hit &= nir_application_cache_hit; + cache_hit &= !!nir_shaders; + } + if (application_cache_hit && builder->cache != builder->device->mem_cache) { pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; } - if (compiled_shaders) + if (cache_hit) goto done; } @@ -2885,8 +3185,6 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, return VK_PIPELINE_COMPILE_REQUIRED; } - nir_shader *nir[ARRAY_SIZE(stage_infos)] = { NULL }; - struct tu_shader *shaders[ARRAY_SIZE(nir)] = { NULL }; for (gl_shader_stage stage = MESA_SHADER_VERTEX; @@ -2907,7 +3205,8 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, stage_feedbacks[stage].duration += os_time_get_nano() - stage_start; } - if (!nir[MESA_SHADER_FRAGMENT]) { + if (!nir[MESA_SHADER_FRAGMENT] && + (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) { const nir_shader_compiler_options *nir_options = ir3_get_compiler_options(builder->device->compiler); nir_builder fs_b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, @@ -2929,6 +3228,32 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, tu_link_shaders(builder, nir, ARRAY_SIZE(nir)); + if (builder->create_info->flags & + VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT) { + nir_shaders = + tu_nir_shaders_init(builder->device, &nir_sha1, sizeof(nir_sha1)); + for (gl_shader_stage stage = MESA_SHADER_VERTEX; + stage < ARRAY_SIZE(nir); stage++) { + if (!nir[stage]) + continue; + + nir_shaders->nir[stage] = nir_shader_clone(NULL, nir[stage]); + } + + nir_shaders = tu_nir_cache_insert(builder->cache, nir_shaders); + + if (compiled_shaders) + goto done; + } + + compiled_shaders = + tu_shaders_init(builder->device, &pipeline_sha1, sizeof(pipeline_sha1)); + + if (!compiled_shaders) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } + uint32_t desc_sets = 0; for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(nir); stage++) { @@ -2939,7 +3264,7 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, struct tu_shader *shader = tu_shader_create(builder->device, nir[stage], &keys[stage], - builder->layout, builder->alloc); + &builder->layout, builder->alloc); if (!shader) { result = VK_ERROR_OUT_OF_HOST_MEMORY; goto fail; @@ -2972,20 +3297,18 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, stage_feedbacks[stage].duration += os_time_get_nano() - stage_start; } + /* In the the tess-but-not-FS case we don't know whether the FS will read + * PrimID so we need to unconditionally store it. + */ + if (nir[MESA_SHADER_TESS_CTRL] && !nir[MESA_SHADER_FRAGMENT]) + ir3_key.tcs_store_primid = true; + struct tu_shader *last_shader = shaders[MESA_SHADER_GEOMETRY]; if (!last_shader) last_shader = shaders[MESA_SHADER_TESS_EVAL]; if (!last_shader) last_shader = shaders[MESA_SHADER_VERTEX]; - compiled_shaders = - tu_shaders_init(builder->device, &pipeline_sha1, sizeof(pipeline_sha1)); - - if (!compiled_shaders) { - result = VK_ERROR_OUT_OF_HOST_MEMORY; - goto fail; - } - compiled_shaders->active_desc_sets = desc_sets; for (gl_shader_stage stage = MESA_SHADER_VERTEX; @@ -3028,9 +3351,19 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, } stage_feedbacks[stage].duration += os_time_get_nano() - stage_start; + } else if (contains_all_shader_state(builder->state)) { + compiled_shaders->safe_const_variants[stage] = + ir3_shader_create_variant(shaders[stage]->ir3_shader, &ir3_key, + executable_info); + if (!compiled_shaders->variants[stage]) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } } } + ir3_key.safe_constlen = false; + for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < ARRAY_SIZE(nir); stage++) { if (shaders[stage]) { @@ -3041,42 +3374,133 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, compiled_shaders = tu_pipeline_cache_insert(builder->cache, compiled_shaders); -done: - for (gl_shader_stage stage = MESA_SHADER_VERTEX; - stage < ARRAY_SIZE(nir); stage++) { - if (compiled_shaders->variants[stage]) { - tu_append_executable(pipeline, compiled_shaders->variants[stage], - nir_initial_disasm[stage]); +done:; + + struct ir3_shader_variant *safe_const_variants[ARRAY_SIZE(nir)] = { NULL }; + nir_shader *post_link_nir[ARRAY_SIZE(nir)] = { NULL }; + + if (compiled_shaders) { + for (gl_shader_stage stage = MESA_SHADER_VERTEX; + stage < ARRAY_SIZE(nir); stage++) { + if (compiled_shaders->variants[stage]) { + tu_append_executable(pipeline, compiled_shaders->variants[stage], + nir_initial_disasm[stage]); + builder->variants[stage] = compiled_shaders->variants[stage]; + safe_const_variants[stage] = + compiled_shaders->safe_const_variants[stage]; + builder->const_state[stage] = + compiled_shaders->const_state[stage]; + } } } - struct ir3_shader_variant *vs = - compiled_shaders->variants[MESA_SHADER_VERTEX]; - - struct ir3_shader_variant *variant; - if (!vs->stream_output.num_outputs && ir3_has_binning_vs(&vs->key)) { - tu_append_executable(pipeline, vs->binning, NULL); - variant = vs->binning; - } else { - variant = vs; + if (nir_shaders) { + for (gl_shader_stage stage = MESA_SHADER_VERTEX; + stage < ARRAY_SIZE(nir); stage++) { + if (nir_shaders->nir[stage]) { + post_link_nir[stage] = nir_shaders->nir[stage]; + } + } } - builder->binning_variant = variant; + /* In the case where we're building a library without link-time + * optimization but with sub-libraries that retain LTO info, we should + * retain it ourselves in case another pipeline includes us with LTO. + */ + for (unsigned i = 0; i < builder->num_libraries; i++) { + struct tu_pipeline *library = builder->libraries[i]; + for (gl_shader_stage stage = MESA_SHADER_VERTEX; + stage < ARRAY_SIZE(library->shaders); stage++) { + if (!post_link_nir[stage] && library->shaders[stage].nir) { + post_link_nir[stage] = library->shaders[stage].nir; + keys[stage] = library->shaders[stage].key; + } + } + } - builder->shaders = compiled_shaders; + if (!(builder->create_info->flags & + VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT)) { + for (unsigned i = 0; i < builder->num_libraries; i++) { + struct tu_pipeline *library = builder->libraries[i]; + for (gl_shader_stage stage = MESA_SHADER_VERTEX; + stage < ARRAY_SIZE(library->shaders); stage++) { + if (library->shaders[stage].variant) { + assert(!builder->variants[stage]); + builder->variants[stage] = library->shaders[stage].variant; + safe_const_variants[stage] = + library->shaders[stage].safe_const_variant; + builder->const_state[stage] = + library->shaders[stage].const_state; + post_link_nir[stage] = library->shaders[stage].nir; + } + } + } - pipeline->active_desc_sets = compiled_shaders->active_desc_sets; - if (compiled_shaders->variants[MESA_SHADER_TESS_CTRL]) { + /* Because we added more variants, we need to trim constlen again. + */ + if (builder->num_libraries > 0) { + uint32_t safe_constlens = ir3_trim_constlen(builder->variants, compiler); + for (gl_shader_stage stage = MESA_SHADER_VERTEX; + stage < ARRAY_SIZE(builder->variants); stage++) { + if (safe_constlens & (1u << stage)) + builder->variants[stage] = safe_const_variants[stage]; + } + } + } + + if (compiled_shaders) + pipeline->active_desc_sets = compiled_shaders->active_desc_sets; + + for (unsigned i = 0; i < builder->num_libraries; i++) { + struct tu_pipeline *library = builder->libraries[i]; + pipeline->active_desc_sets |= library->active_desc_sets; + } + + if (compiled_shaders && compiled_shaders->variants[MESA_SHADER_TESS_CTRL]) { pipeline->tess.patch_type = compiled_shaders->variants[MESA_SHADER_TESS_CTRL]->key.tessellation; } + if (contains_all_shader_state(pipeline->state)) { + struct ir3_shader_variant *vs = + builder->variants[MESA_SHADER_VERTEX]; + + struct ir3_shader_variant *variant; + if (!vs->stream_output.num_outputs && ir3_has_binning_vs(&vs->key)) { + tu_append_executable(pipeline, vs->binning, NULL); + variant = vs->binning; + } else { + variant = vs; + } + + builder->binning_variant = variant; + + builder->compiled_shaders = compiled_shaders; + + /* It doesn't make much sense to use RETAIN_LINK_TIME_OPTIMIZATION_INFO + * when compiling all stages, but make sure we don't leak. + */ + if (nir_shaders) + vk_pipeline_cache_object_unref(&nir_shaders->base); + } else { + pipeline->compiled_shaders = compiled_shaders; + pipeline->nir_shaders = nir_shaders; + pipeline->ir3_key = ir3_key; + for (gl_shader_stage stage = MESA_SHADER_VERTEX; + stage < ARRAY_SIZE(pipeline->shaders); stage++) { + pipeline->shaders[stage].nir = post_link_nir[stage]; + pipeline->shaders[stage].key = keys[stage]; + pipeline->shaders[stage].const_state = builder->const_state[stage]; + pipeline->shaders[stage].variant = builder->variants[stage]; + pipeline->shaders[stage].safe_const_variant = + safe_const_variants[stage]; + } + } + pipeline_feedback.duration = os_time_get_nano() - pipeline_start; if (creation_feedback) { *creation_feedback->pPipelineCreationFeedback = pipeline_feedback; - assert(builder->create_info->stageCount == - creation_feedback->pipelineStageCreationFeedbackCount); for (uint32_t i = 0; i < builder->create_info->stageCount; i++) { gl_shader_stage s = vk_to_mesa_shader_stage(builder->create_info->pStages[i].stage); @@ -3097,6 +3521,9 @@ fail: if (compiled_shaders) vk_pipeline_cache_object_unref(&compiled_shaders->base); + if (nir_shaders) + vk_pipeline_cache_object_unref(&nir_shaders->base); + return result; } @@ -3227,6 +3654,155 @@ tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder, } } +static void +tu_pipeline_builder_parse_libraries(struct tu_pipeline_builder *builder, + struct tu_pipeline *pipeline) +{ + const VkPipelineLibraryCreateInfoKHR *library_info = + vk_find_struct_const(builder->create_info->pNext, + PIPELINE_LIBRARY_CREATE_INFO_KHR); + + if (library_info) { + assert(library_info->libraryCount <= MAX_LIBRARIES); + builder->num_libraries = library_info->libraryCount; + for (unsigned i = 0; i < library_info->libraryCount; i++) { + TU_FROM_HANDLE(tu_pipeline, library, library_info->pLibraries[i]); + builder->libraries[i] = library; + } + } + + /* Merge in the state from libraries. The program state is a bit special + * and is handled separately. + */ + pipeline->state = builder->state; + for (unsigned i = 0; i < builder->num_libraries; i++) { + struct tu_pipeline *library = builder->libraries[i]; + pipeline->state |= library->state; + + uint32_t library_dynamic_state = 0; + if (library->state & + VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) { + pipeline->vi = library->vi; + pipeline->ia = library->ia; + library_dynamic_state |= + BIT(TU_DYNAMIC_STATE_VERTEX_INPUT) | + BIT(TU_DYNAMIC_STATE_VB_STRIDE) | + BIT(TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY) | + BIT(TU_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE); + pipeline->shared_consts = library->shared_consts; + } + + if (library->state & + VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { + pipeline->tess = library->tess; + pipeline->rast = library->rast; + pipeline->viewport = library->viewport; + library_dynamic_state |= + BIT(VK_DYNAMIC_STATE_VIEWPORT) | + BIT(VK_DYNAMIC_STATE_SCISSOR) | + BIT(VK_DYNAMIC_STATE_LINE_WIDTH) | + BIT(VK_DYNAMIC_STATE_DEPTH_BIAS) | + BIT(TU_DYNAMIC_STATE_RASTERIZER_DISCARD); + } + + if (library->state & + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { + pipeline->ds = library->ds; + pipeline->lrz.fs = library->lrz.fs; + pipeline->lrz.force_disable_mask |= library->lrz.force_disable_mask; + pipeline->lrz.force_late_z |= library->lrz.force_late_z; + library_dynamic_state |= + BIT(VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK) | + BIT(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK) | + BIT(VK_DYNAMIC_STATE_STENCIL_REFERENCE) | + BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL) | + BIT(TU_DYNAMIC_STATE_RB_STENCIL_CNTL) | + BIT(VK_DYNAMIC_STATE_DEPTH_BOUNDS); + pipeline->shared_consts = library->shared_consts; + } + + if (library->state & + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) { + pipeline->blend = library->blend; + pipeline->output = library->output; + pipeline->lrz.force_disable_mask |= library->lrz.force_disable_mask; + pipeline->lrz.force_late_z |= library->lrz.force_late_z; + pipeline->prim_order = library->prim_order; + library_dynamic_state |= + BIT(VK_DYNAMIC_STATE_BLEND_CONSTANTS) | + BIT(TU_DYNAMIC_STATE_SAMPLE_LOCATIONS) | + BIT(TU_DYNAMIC_STATE_BLEND) | + BIT(TU_DYNAMIC_STATE_LOGIC_OP) | + BIT(TU_DYNAMIC_STATE_COLOR_WRITE_ENABLE); + } + + if ((library->state & + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) && + (library->state & + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) { + pipeline->prim_order = library->prim_order; + } + + pipeline->dynamic_state_mask = + (pipeline->dynamic_state_mask & ~library_dynamic_state) | + (library->dynamic_state_mask & library_dynamic_state); + + u_foreach_bit (i, library_dynamic_state & ~library->dynamic_state_mask) { + if (i >= TU_DYNAMIC_STATE_COUNT) + break; + + pipeline->dynamic_state[i] = library->dynamic_state[i]; + } + + if (contains_all_shader_state(library->state)) { + pipeline->program = library->program; + pipeline->load_state = library->load_state; + } + } +} + +static void +tu_pipeline_builder_parse_layout(struct tu_pipeline_builder *builder, + struct tu_pipeline *pipeline) +{ + TU_FROM_HANDLE(tu_pipeline_layout, layout, builder->create_info->layout); + + if (layout) { + /* Note: it's still valid to have a layout even if there are libraries. + * This allows the app to e.g. overwrite an INDEPENDENT_SET layout with + * a non-INDEPENDENT_SET layout which may make us use a faster path, + * currently this just affects dynamic offset descriptors. + */ + builder->layout = *layout; + } else { + for (unsigned i = 0; i < builder->num_libraries; i++) { + struct tu_pipeline *library = builder->libraries[i]; + builder->layout.num_sets = MAX2(builder->layout.num_sets, + library->num_sets); + for (unsigned j = 0; j < library->num_sets; j++) { + if (library->layouts[i]) + builder->layout.set[i].layout = library->layouts[i]; + } + + builder->layout.push_constant_size = pipeline->push_constant_size; + builder->layout.independent_sets |= pipeline->independent_sets; + } + + tu_pipeline_layout_init(&builder->layout); + } + + if (builder->create_info->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) { + pipeline->num_sets = builder->layout.num_sets; + for (unsigned i = 0; i < pipeline->num_sets; i++) { + pipeline->layouts[i] = builder->layout.set[i].layout; + if (pipeline->layouts[i]) + vk_descriptor_set_layout_ref(&pipeline->layouts[i]->vk); + } + pipeline->push_constant_size = builder->layout.push_constant_size; + pipeline->independent_sets = builder->layout.independent_sets; + } +} + static void tu_pipeline_set_linkage(struct tu_program_descriptor_linkage *link, struct tu_const_state *const_state, @@ -3266,13 +3842,13 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder, tu6_emit_program(&prog_cs, builder, true, pipeline); pipeline->program.binning_state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs); - for (unsigned i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) { - if (!builder->shaders->variants[i]) + for (unsigned i = 0; i < ARRAY_SIZE(builder->variants); i++) { + if (!builder->variants[i]) continue; tu_pipeline_set_linkage(&pipeline->program.link[i], - &builder->shaders->const_state[i], - builder->shaders->variants[i]); + &builder->const_state[i], + builder->variants[i]); } } @@ -3377,8 +3953,8 @@ static void tu_pipeline_builder_parse_tessellation(struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline) { - if (!(pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) || - !(pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) + if (!(builder->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) || + !(builder->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) return; const VkPipelineTessellationStateCreateInfo *tess_info = @@ -3390,7 +3966,7 @@ tu_pipeline_builder_parse_tessellation(struct tu_pipeline_builder *builder, vk_find_struct_const(tess_info->pNext, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO); pipeline->tess.upper_left_domain_origin = !domain_info || domain_info->domainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT; - const struct ir3_shader_variant *hs = builder->shaders->variants[MESA_SHADER_TESS_CTRL]; + const struct ir3_shader_variant *hs = builder->variants[MESA_SHADER_TESS_CTRL]; pipeline->tess.param_stride = hs->output_size * 4; } @@ -3516,6 +4092,8 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder, vk_find_struct_const(rast_info->pNext, PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT); pipeline->rast.provoking_vertex_last = provoking_vtx_state && provoking_vtx_state->provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; + + pipeline->rast.multiview_mask = builder->multiview_mask; } static void @@ -3532,13 +4110,12 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder, */ const VkPipelineDepthStencilStateCreateInfo *ds_info = builder->create_info->pDepthStencilState; - const enum pipe_format pipe_format = - vk_format_to_pipe_format(builder->depth_attachment_format); uint32_t rb_depth_cntl = 0, rb_stencil_cntl = 0; struct tu_cs cs; - if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED && - builder->depth_attachment_format != VK_FORMAT_S8_UINT) { + if (!builder->attachment_state_valid || + (builder->depth_attachment_format != VK_FORMAT_UNDEFINED && + builder->depth_attachment_format != VK_FORMAT_S8_UINT)) { if (ds_info->depthTestEnable) { rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE | @@ -3557,19 +4134,10 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder, if (ds_info->depthBoundsTestEnable && !ds_info->depthTestEnable) tu6_apply_depth_bounds_workaround(builder->device, &rb_depth_cntl); - - pipeline->output.depth_cpp_per_sample = util_format_get_component_bits( - pipe_format, UTIL_FORMAT_COLORSPACE_ZS, 0) / 8; - } else { - /* if RB_DEPTH_CNTL is set dynamically, we need to make sure it is set - * to 0 when this pipeline is used, as enabling depth test when there - * is no depth attachment is a problem (at least for the S8_UINT case) - */ - if (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL)) - pipeline->output.rb_depth_cntl_disable = true; } - if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED) { + if (!builder->attachment_state_valid || + builder->depth_attachment_format != VK_FORMAT_UNDEFINED) { const VkStencilOpState *front = &ds_info->front; const VkStencilOpState *back = &ds_info->back; @@ -3590,9 +4158,6 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder, A6XX_RB_STENCIL_CONTROL_STENCIL_READ; } - pipeline->output.stencil_cpp_per_sample = util_format_get_component_bits( - pipe_format, UTIL_FORMAT_COLORSPACE_ZS, 1) / 8; - pipeline->ds.raster_order_attachment_access = ds_info->flags & (VK_PIPELINE_DEPTH_STENCIL_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_DEPTH_ACCESS_BIT_ARM | @@ -3615,7 +4180,8 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder, pipeline->ds.rb_stencil_cntl = rb_stencil_cntl; /* the remaining draw states arent used if there is no d/s, leave them empty */ - if (builder->depth_attachment_format == VK_FORMAT_UNDEFINED) + if (builder->depth_attachment_format == VK_FORMAT_UNDEFINED && + builder->attachment_state_valid) return; if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BOUNDS, 3)) { @@ -3640,8 +4206,8 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder, .bfref = ds_info->back.reference & 0xff)); } - if (builder->shaders->variants[MESA_SHADER_FRAGMENT]) { - const struct ir3_shader_variant *fs = builder->shaders->variants[MESA_SHADER_FRAGMENT]; + if (builder->variants[MESA_SHADER_FRAGMENT]) { + const struct ir3_shader_variant *fs = builder->variants[MESA_SHADER_FRAGMENT]; if (fs->has_kill) { pipeline->lrz.force_disable_mask |= TU_LRZ_FORCE_DISABLE_WRITE; } @@ -3651,6 +4217,26 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder, } } +static void +tu_pipeline_builder_parse_ds_disable(struct tu_pipeline_builder *builder, + struct tu_pipeline *pipeline) +{ + if (builder->rasterizer_discard) + return; + + /* If RB_DEPTH_CNTL is static state, then we can disable it ahead of time. + * However we only know whether RB_DEPTH_CNTL is dynamic in the fragment + * shader state and we only know whether it needs to be force-disabled in + * the output interface state. + */ + struct tu_cs cs; + if (pipeline->output.rb_depth_cntl_disable && + tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RB_DEPTH_CNTL, 2)) { + tu_cs_emit_pkt4(&cs, REG_A6XX_RB_DEPTH_CNTL, 1); + tu_cs_emit(&cs, 0); + } +} + static void tu_pipeline_builder_parse_multisample_and_color_blend( struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline) @@ -3704,6 +4290,27 @@ tu_pipeline_builder_parse_multisample_and_color_blend( VK_PIPELINE_COLOR_BLEND_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_BIT_ARM; } + const enum pipe_format ds_pipe_format = + vk_format_to_pipe_format(builder->depth_attachment_format); + + if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED && + builder->depth_attachment_format != VK_FORMAT_S8_UINT) { + pipeline->output.depth_cpp_per_sample = util_format_get_component_bits( + ds_pipe_format, UTIL_FORMAT_COLORSPACE_ZS, 0) / 8; + } else { + /* We need to make sure RB_DEPTH_CNTL is set to 0 when this pipeline is + * used, regardless of whether it's linked with a fragment shader + * pipeline that has an enabled depth test or if RB_DEPTH_CNTL is set + * dynamically. + */ + pipeline->output.rb_depth_cntl_disable = true; + } + + if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED) { + pipeline->output.stencil_cpp_per_sample = util_format_get_component_bits( + ds_pipe_format, UTIL_FORMAT_COLORSPACE_ZS, 1) / 8; + } + struct tu_cs cs; tu6_emit_rb_mrt_controls(pipeline, blend_info, builder->color_attachment_formats, @@ -3848,9 +4455,21 @@ tu_pipeline_finish(struct tu_pipeline *pipeline, if (pipeline->pvtmem_bo) tu_bo_finish(dev, pipeline->pvtmem_bo); + if (pipeline->compiled_shaders) + vk_pipeline_cache_object_unref(&pipeline->compiled_shaders->base); + + if (pipeline->nir_shaders) + vk_pipeline_cache_object_unref(&pipeline->nir_shaders->base); + + for (unsigned i = 0; i < pipeline->num_sets; i++) { + if (pipeline->layouts[i]) + vk_descriptor_set_layout_unref(&dev->vk, &pipeline->layouts[i]->vk); + } + ralloc_free(pipeline->executables_mem_ctx); } + static VkResult tu_pipeline_builder_build(struct tu_pipeline_builder *builder, struct tu_pipeline **pipeline) @@ -3865,66 +4484,118 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder, (*pipeline)->executables_mem_ctx = ralloc_context(NULL); util_dynarray_init(&(*pipeline)->executables, (*pipeline)->executables_mem_ctx); - /* compile and upload shaders */ - result = tu_pipeline_builder_compile_shaders(builder, *pipeline); - if (result != VK_SUCCESS) { - vk_object_free(&builder->device->vk, builder->alloc, *pipeline); - return result; + tu_pipeline_builder_parse_dynamic(builder, *pipeline); + tu_pipeline_builder_parse_libraries(builder, *pipeline); + + VkShaderStageFlags stages = 0; + for (unsigned i = 0; i < builder->create_info->stageCount; i++) { + stages |= builder->create_info->pStages[i].stage; } + builder->active_stages = stages; - result = tu_pipeline_allocate_cs(builder->device, *pipeline, - builder->layout, builder, NULL); - if (result != VK_SUCCESS) { - vk_object_free(&builder->device->vk, builder->alloc, *pipeline); - return result; - } + (*pipeline)->active_stages = stages; + for (unsigned i = 0; i < builder->num_libraries; i++) + (*pipeline)->active_stages |= builder->libraries[i]->active_stages; - for (uint32_t i = 0; i < ARRAY_SIZE(builder->shader_iova); i++) - builder->shader_iova[i] = - tu_upload_variant(*pipeline, builder->shaders->variants[i]); + /* Compile and upload shaders unless a library has already done that. */ + if ((*pipeline)->program.state.size == 0) { + tu_pipeline_builder_parse_layout(builder, *pipeline); - builder->binning_vs_iova = - tu_upload_variant(*pipeline, builder->binning_variant); - - /* Setup private memory. Note that because we're sharing the same private - * memory for all stages, all stages must use the same config, or else - * fibers from one stage might overwrite fibers in another. - */ - - uint32_t pvtmem_size = 0; - bool per_wave = true; - for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) { - if (builder->shaders->variants[i]) { - pvtmem_size = MAX2(pvtmem_size, builder->shaders->variants[i]->pvtmem_size); - if (!builder->shaders->variants[i]->pvtmem_per_wave) - per_wave = false; + result = tu_pipeline_builder_compile_shaders(builder, *pipeline); + if (result != VK_SUCCESS) { + vk_object_free(&builder->device->vk, builder->alloc, *pipeline); + return result; } } - if (builder->binning_variant) { - pvtmem_size = MAX2(pvtmem_size, builder->binning_variant->pvtmem_size); - if (!builder->binning_variant->pvtmem_per_wave) - per_wave = false; + result = tu_pipeline_allocate_cs(builder->device, *pipeline, + &builder->layout, builder, NULL); + + + /* This has to come before emitting the program so that + * pipeline->tess.patch_control_points and pipeline->rast.multiview_mask + * are always set. + */ + if (builder->state & + VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { + tu_pipeline_builder_parse_tessellation(builder, *pipeline); + (*pipeline)->rast.multiview_mask = builder->multiview_mask; } - result = tu_setup_pvtmem(builder->device, *pipeline, &builder->pvtmem, - pvtmem_size, per_wave); - if (result != VK_SUCCESS) { - vk_object_free(&builder->device->vk, builder->alloc, *pipeline); - return result; + if (set_combined_state(builder, *pipeline, + VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) { + if (result != VK_SUCCESS) { + vk_object_free(&builder->device->vk, builder->alloc, *pipeline); + return result; + } + + for (uint32_t i = 0; i < ARRAY_SIZE(builder->shader_iova); i++) + builder->shader_iova[i] = + tu_upload_variant(*pipeline, builder->variants[i]); + + builder->binning_vs_iova = + tu_upload_variant(*pipeline, builder->binning_variant); + + /* Setup private memory. Note that because we're sharing the same private + * memory for all stages, all stages must use the same config, or else + * fibers from one stage might overwrite fibers in another. + */ + + uint32_t pvtmem_size = 0; + bool per_wave = true; + for (uint32_t i = 0; i < ARRAY_SIZE(builder->variants); i++) { + if (builder->variants[i]) { + pvtmem_size = MAX2(pvtmem_size, builder->variants[i]->pvtmem_size); + if (!builder->variants[i]->pvtmem_per_wave) + per_wave = false; + } + } + + if (builder->binning_variant) { + pvtmem_size = MAX2(pvtmem_size, builder->binning_variant->pvtmem_size); + if (!builder->binning_variant->pvtmem_per_wave) + per_wave = false; + } + + result = tu_setup_pvtmem(builder->device, *pipeline, &builder->pvtmem, + pvtmem_size, per_wave); + if (result != VK_SUCCESS) { + vk_object_free(&builder->device->vk, builder->alloc, *pipeline); + return result; + } + + tu_pipeline_builder_parse_shader_stages(builder, *pipeline); + tu6_emit_load_state(*pipeline, &builder->layout); } - tu_pipeline_builder_parse_dynamic(builder, *pipeline); - tu_pipeline_builder_parse_shader_stages(builder, *pipeline); - tu_pipeline_builder_parse_vertex_input(builder, *pipeline); - tu_pipeline_builder_parse_input_assembly(builder, *pipeline); - tu_pipeline_builder_parse_tessellation(builder, *pipeline); - tu_pipeline_builder_parse_viewport(builder, *pipeline); - tu_pipeline_builder_parse_rasterization(builder, *pipeline); - tu_pipeline_builder_parse_depth_stencil(builder, *pipeline); - tu_pipeline_builder_parse_multisample_and_color_blend(builder, *pipeline); - tu_pipeline_builder_parse_rasterization_order(builder, *pipeline); - tu6_emit_load_state(*pipeline, builder->layout); + if (builder->state & + VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) { + tu_pipeline_builder_parse_vertex_input(builder, *pipeline); + tu_pipeline_builder_parse_input_assembly(builder, *pipeline); + } + + if (builder->state & + VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { + tu_pipeline_builder_parse_viewport(builder, *pipeline); + tu_pipeline_builder_parse_rasterization(builder, *pipeline); + } + + if (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { + tu_pipeline_builder_parse_depth_stencil(builder, *pipeline); + } + + if (builder->state & + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) { + tu_pipeline_builder_parse_multisample_and_color_blend(builder, *pipeline); + } + + if (set_combined_state(builder, *pipeline, + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) { + tu_pipeline_builder_parse_rasterization_order(builder, *pipeline); + tu_pipeline_builder_parse_ds_disable(builder, *pipeline); + } return VK_SUCCESS; } @@ -3932,8 +4603,8 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder, static void tu_pipeline_builder_finish(struct tu_pipeline_builder *builder) { - if (builder->shaders) - vk_pipeline_cache_object_unref(&builder->shaders->base); + if (builder->compiled_shaders) + vk_pipeline_cache_object_unref(&builder->compiled_shaders->base); ralloc_free(builder->mem_ctx); } @@ -3945,17 +4616,49 @@ tu_pipeline_builder_init_graphics( const VkGraphicsPipelineCreateInfo *create_info, const VkAllocationCallbacks *alloc) { - TU_FROM_HANDLE(tu_pipeline_layout, layout, create_info->layout); - *builder = (struct tu_pipeline_builder) { .device = dev, .mem_ctx = ralloc_context(NULL), .cache = cache, .create_info = create_info, .alloc = alloc, - .layout = layout, }; + const VkGraphicsPipelineLibraryCreateInfoEXT *gpl_info = + vk_find_struct_const(builder->create_info->pNext, + GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT); + + const VkPipelineLibraryCreateInfoKHR *library_info = + vk_find_struct_const(builder->create_info->pNext, + PIPELINE_LIBRARY_CREATE_INFO_KHR); + + if (gpl_info) { + builder->state = gpl_info->flags; + } else { + /* Implement this bit of spec text: + * + * If this structure is omitted, and either + * VkGraphicsPipelineCreateInfo::flags includes + * VK_PIPELINE_CREATE_LIBRARY_BIT_KHR or the + * VkGraphicsPipelineCreateInfo::pNext chain includes a + * VkPipelineLibraryCreateInfoKHR structure with a libraryCount + * greater than 0, it is as if flags is 0. Otherwise if this + * structure is omitted, it is as if flags includes all possible + * subsets of the graphics pipeline (i.e. a complete graphics + * pipeline). + */ + if ((library_info && library_info->libraryCount > 0) || + (builder->create_info->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)) { + builder->state = 0; + } else { + builder->state = + VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT; + } + } + bool rasterizer_discard_dynamic = false; if (create_info->pDynamicState) { for (uint32_t i = 0; i < create_info->pDynamicState->dynamicStateCount; i++) { @@ -3968,76 +4671,120 @@ tu_pipeline_builder_init_graphics( } builder->rasterizer_discard = + (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) && builder->create_info->pRasterizationState->rasterizerDiscardEnable && !rasterizer_discard_dynamic; - const VkPipelineRenderingCreateInfo *rendering_info = - vk_find_struct_const(create_info->pNext, PIPELINE_RENDERING_CREATE_INFO); + if (builder->state & + (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) { + const VkPipelineRenderingCreateInfo *rendering_info = + vk_find_struct_const(create_info->pNext, PIPELINE_RENDERING_CREATE_INFO); - if (unlikely(dev->instance->debug_flags & TU_DEBUG_DYNAMIC) && !rendering_info) - rendering_info = vk_get_pipeline_rendering_create_info(create_info); + if (unlikely(dev->instance->debug_flags & TU_DEBUG_DYNAMIC) && !rendering_info) + rendering_info = vk_get_pipeline_rendering_create_info(create_info); - if (rendering_info) { - builder->subpass_raster_order_attachment_access = false; - builder->subpass_feedback_loop_ds = false; - builder->subpass_feedback_loop_color = false; - - builder->multiview_mask = rendering_info->viewMask; - - const VkRenderingSelfDependencyInfoMESA *self_dependency = - vk_find_struct_const(rendering_info->pNext, RENDERING_SELF_DEPENDENCY_INFO_MESA); - - if (self_dependency) { - builder->subpass_feedback_loop_ds = - self_dependency->depthSelfDependency || - self_dependency->stencilSelfDependency; - builder->subpass_feedback_loop_color = - self_dependency->colorSelfDependencies; + /* Get multiview_mask, which is only used for shaders */ + if (builder->state & + (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) { + if (rendering_info) { + builder->multiview_mask = rendering_info->viewMask; + } else { + const struct tu_render_pass *pass = + tu_render_pass_from_handle(create_info->renderPass); + const struct tu_subpass *subpass = + &pass->subpasses[create_info->subpass]; + builder->multiview_mask = subpass->multiview_mask; + } } - if (!builder->rasterizer_discard) { - builder->depth_attachment_format = - rendering_info->depthAttachmentFormat == VK_FORMAT_UNDEFINED ? - rendering_info->stencilAttachmentFormat : - rendering_info->depthAttachmentFormat; + /* Get the attachment state. This is valid: + * + * - With classic renderpasses, when either fragment shader or fragment + * output interface state is being compiled. This includes when we + * emulate classic renderpasses with dynamic rendering with the debug + * flag. + * - With dynamic rendering (renderPass is NULL) only when compiling the + * output interface state. + * + * We only actually need this for the fragment output interface state, + * but the spec also requires us to skip parsing depth/stencil state + * when the attachment state is defined *and* no depth/stencil + * attachment is not used, so we have to parse it for fragment shader + * state when possible. Life is pain. + */ + if (((builder->state & + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) || + ((builder->state & + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) && + builder->create_info->renderPass)) && + rendering_info) { + builder->subpass_raster_order_attachment_access = false; + builder->subpass_feedback_loop_ds = false; + builder->subpass_feedback_loop_color = false; - for (unsigned i = 0; i < rendering_info->colorAttachmentCount; i++) { - builder->color_attachment_formats[i] = - rendering_info->pColorAttachmentFormats[i]; - if (builder->color_attachment_formats[i] != VK_FORMAT_UNDEFINED) { + const VkRenderingSelfDependencyInfoMESA *self_dependency = + vk_find_struct_const(rendering_info->pNext, RENDERING_SELF_DEPENDENCY_INFO_MESA); + + if (self_dependency) { + builder->subpass_feedback_loop_ds = + self_dependency->depthSelfDependency || + self_dependency->stencilSelfDependency; + builder->subpass_feedback_loop_color = + self_dependency->colorSelfDependencies; + } + + if (!builder->rasterizer_discard) { + builder->depth_attachment_format = + rendering_info->depthAttachmentFormat == VK_FORMAT_UNDEFINED ? + rendering_info->stencilAttachmentFormat : + rendering_info->depthAttachmentFormat; + + for (unsigned i = 0; i < rendering_info->colorAttachmentCount; i++) { + builder->color_attachment_formats[i] = + rendering_info->pColorAttachmentFormats[i]; + if (builder->color_attachment_formats[i] != VK_FORMAT_UNDEFINED) { + builder->use_color_attachments = true; + } + } + } + + builder->attachment_state_valid = true; + } else if ((builder->state & + (VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) && + create_info->renderPass != VK_NULL_HANDLE) { + const struct tu_render_pass *pass = + tu_render_pass_from_handle(create_info->renderPass); + const struct tu_subpass *subpass = + &pass->subpasses[create_info->subpass]; + + builder->subpass_raster_order_attachment_access = + subpass->raster_order_attachment_access; + builder->subpass_feedback_loop_color = subpass->feedback_loop_color; + builder->subpass_feedback_loop_ds = subpass->feedback_loop_ds; + + if (!builder->rasterizer_discard) { + const uint32_t a = subpass->depth_stencil_attachment.attachment; + builder->depth_attachment_format = (a != VK_ATTACHMENT_UNUSED) ? + pass->attachments[a].format : VK_FORMAT_UNDEFINED; + + assert(subpass->color_count == 0 || + !create_info->pColorBlendState || + subpass->color_count == create_info->pColorBlendState->attachmentCount); + for (uint32_t i = 0; i < subpass->color_count; i++) { + const uint32_t a = subpass->color_attachments[i].attachment; + if (a == VK_ATTACHMENT_UNUSED) + continue; + + builder->color_attachment_formats[i] = pass->attachments[a].format; builder->use_color_attachments = true; } } - } - } else { - const struct tu_render_pass *pass = - tu_render_pass_from_handle(create_info->renderPass); - const struct tu_subpass *subpass = - &pass->subpasses[create_info->subpass]; - builder->subpass_raster_order_attachment_access = - subpass->raster_order_attachment_access; - builder->subpass_feedback_loop_color = subpass->feedback_loop_color; - builder->subpass_feedback_loop_ds = subpass->feedback_loop_ds; - - builder->multiview_mask = subpass->multiview_mask; - - if (!builder->rasterizer_discard) { - const uint32_t a = subpass->depth_stencil_attachment.attachment; - builder->depth_attachment_format = (a != VK_ATTACHMENT_UNUSED) ? - pass->attachments[a].format : VK_FORMAT_UNDEFINED; - - assert(subpass->color_count == 0 || - !create_info->pColorBlendState || - subpass->color_count == create_info->pColorBlendState->attachmentCount); - for (uint32_t i = 0; i < subpass->color_count; i++) { - const uint32_t a = subpass->color_attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) - continue; - - builder->color_attachment_formats[i] = pass->attachments[a].format; - builder->use_color_attachments = true; - } + builder->attachment_state_valid = true; } } diff --git a/src/freedreno/vulkan/tu_pipeline.h b/src/freedreno/vulkan/tu_pipeline.h index a06599fadf5..6b26bd2a9fc 100644 --- a/src/freedreno/vulkan/tu_pipeline.h +++ b/src/freedreno/vulkan/tu_pipeline.h @@ -60,9 +60,22 @@ struct tu_compiled_shaders uint8_t active_desc_sets; struct ir3_shader_variant *variants[MESA_SHADER_STAGES]; + + struct ir3_shader_variant *safe_const_variants[MESA_SHADER_STAGES]; +}; + +struct tu_nir_shaders +{ + struct vk_pipeline_cache_object base; + + /* This is optional, and is only filled out when a library pipeline is + * compiled with RETAIN_LINK_TIME_OPTIMIZATION_INFO. + */ + nir_shader *nir[MESA_SHADER_STAGES]; }; extern const struct vk_pipeline_cache_object_ops tu_shaders_ops; +extern const struct vk_pipeline_cache_object_ops tu_nir_shaders_ops; static bool inline tu6_shared_constants_enable(const struct tu_pipeline_layout *layout, @@ -111,6 +124,8 @@ struct tu_pipeline uint32_t dynamic_state_mask; struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT]; + VkGraphicsPipelineLibraryFlagsEXT state; + /* for dynamic states which use the same register: */ struct { uint32_t gras_su_cntl, gras_su_cntl_mask; @@ -119,6 +134,8 @@ struct tu_pipeline enum a5xx_line_mode line_mode; bool provoking_vertex_last; + uint32_t multiview_mask; + struct tu_draw_state state; } rast; @@ -209,6 +226,31 @@ struct tu_pipeline bool z_negative_one_to_one; } viewport; + /* Used only for libraries. compiled_shaders only contains variants compiled + * by this pipeline, and it owns them, so when it is freed they disappear. + * Similarly, nir_shaders owns the link-time NIR. shaders points to the + * shaders from this pipeline and all libraries included in it, for + * convenience. + */ + struct tu_compiled_shaders *compiled_shaders; + struct tu_nir_shaders *nir_shaders; + struct { + nir_shader *nir; + struct tu_shader_key key; + struct tu_const_state const_state; + struct ir3_shader_variant *variant, *safe_const_variant; + } shaders[MESA_SHADER_FRAGMENT + 1]; + + struct ir3_shader_key ir3_key; + + /* Used for libraries, to stitch together an overall layout for the final + * pipeline. + */ + struct tu_descriptor_set_layout *layouts[MAX_SETS]; + unsigned num_sets; + unsigned push_constant_size; + bool independent_sets; + void *executables_mem_ctx; /* tu_pipeline_executable */ struct util_dynarray executables; diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index b8e0f28c81a..4f9d847e230 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -176,19 +176,32 @@ lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr, struct tu_descriptor_set_layout *set_layout = layout->set[set].layout; struct tu_descriptor_set_binding_layout *binding_layout = &set_layout->binding[binding]; - uint32_t base; + nir_ssa_def *base; shader->active_desc_sets |= 1u << set; switch (binding_layout->type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - base = (layout->set[set].dynamic_offset_start + - binding_layout->dynamic_offset_offset) / (4 * A6XX_TEX_CONST_DWORDS); + if (layout->independent_sets) { + /* With independent sets, we don't know + * layout->set[set].dynamic_offset_start until after link time which + * with fast linking means after the shader is compiled. We have to + * get it from the const file instead. + */ + base = nir_imm_int(b, binding_layout->dynamic_offset_offset / (4 * A6XX_TEX_CONST_DWORDS)); + nir_ssa_def *dynamic_offset_start = + nir_load_uniform(b, 1, 32, nir_imm_int(b, 0), + .base = shader->const_state.dynamic_offset_loc + set); + base = nir_iadd(b, base, dynamic_offset_start); + } else { + base = nir_imm_int(b, (layout->set[set].dynamic_offset_start + + binding_layout->dynamic_offset_offset) / (4 * A6XX_TEX_CONST_DWORDS)); + } set = MAX_SETS; break; default: - base = binding_layout->offset / (4 * A6XX_TEX_CONST_DWORDS); + base = nir_imm_int(b, binding_layout->offset / (4 * A6XX_TEX_CONST_DWORDS)); break; } @@ -204,7 +217,7 @@ lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr, } nir_ssa_def *def = nir_vec3(b, nir_imm_int(b, set), - nir_iadd(b, nir_imm_int(b, base), + nir_iadd(b, base, nir_ishl(b, vulkan_idx, shift)), shift); @@ -658,6 +671,13 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev, align(DIV_ROUND_UP(const_state->push_consts.dwords, 4), dev->compiler->const_upload_unit); + if (layout->independent_sets) { + const_state->dynamic_offset_loc = reserved_consts_vec4 * 4; + reserved_consts_vec4 += DIV_ROUND_UP(MAX_SETS, 4); + } else { + const_state->dynamic_offset_loc = UINT32_MAX; + } + tu_shader->reserved_user_consts_vec4 = reserved_consts_vec4; struct lower_instr_params params = { diff --git a/src/freedreno/vulkan/tu_shader.h b/src/freedreno/vulkan/tu_shader.h index 6f529cde058..69e0a20683b 100644 --- a/src/freedreno/vulkan/tu_shader.h +++ b/src/freedreno/vulkan/tu_shader.h @@ -21,6 +21,7 @@ struct tu_push_constant_range struct tu_const_state { struct tu_push_constant_range push_consts; + uint32_t dynamic_offset_loc; }; struct tu_shader