diff --git a/src/freedreno/ci/freedreno-a618-fails.txt b/src/freedreno/ci/freedreno-a618-fails.txt
index 4258e2f4ff5..c5895225675 100644
--- a/src/freedreno/ci/freedreno-a618-fails.txt
+++ b/src/freedreno/ci/freedreno-a618-fails.txt
@@ -15,6 +15,22 @@ dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments5_
 dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash
 dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash
 dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash
 gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash
 gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash
 gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash
@@ -23,6 +39,22 @@ gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachme
 gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash
 gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash
 gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash
+gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash
+gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash
+gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash
+gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash
+gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash
+gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash
+gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash
+gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash
+gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash
+gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash
+gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash
+gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash
+gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash
+gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash
+gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash
+gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash
 
 spill-dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_bool,Fail
 spill-dEQP-VK.subgroups.ballot_broadcast.compute.subgroupbroadcast_bool_requiredsubgroupsize128,Fail
diff --git a/src/freedreno/ci/freedreno-a630-asan-fails.txt b/src/freedreno/ci/freedreno-a630-asan-fails.txt
index 25d7994d5e5..7f45d307e71 100644
--- a/src/freedreno/ci/freedreno-a630-asan-fails.txt
+++ b/src/freedreno/ci/freedreno-a630-asan-fails.txt
@@ -1 +1,28 @@
 dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.12,Crash
+
+# https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3759
+# deqp-vk: ../src/freedreno/vulkan/tu_pipeline.c:3894: tu_pipeline_builder_init_graphics: Assertion `subpass->color_count == 0 || !create_info->pColorBlendState || subpass->color_count == create_info->pColorBlendState->attachmentCount' failed
+dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash
+dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash
+dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash
+dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash
+dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash
+dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash
+dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash
+dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash
diff --git a/src/freedreno/ci/freedreno-a630-fails.txt b/src/freedreno/ci/freedreno-a630-fails.txt
index 99a9feacd54..5ca2147794e 100644
--- a/src/freedreno/ci/freedreno-a630-fails.txt
+++ b/src/freedreno/ci/freedreno-a630-fails.txt
@@ -431,6 +431,22 @@ dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments5_
 dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash
 dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash
 dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash
+dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash
+dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash
 gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash
 gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash
 gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash
@@ -439,6 +455,22 @@ gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachme
 gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash
 gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash
 gmem-dEQP-VK.pipeline.monolithic.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash
+gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash
+gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash
+gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash
+gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash
+gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash
+gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash
+gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash
+gmem-dEQP-VK.pipeline.pipeline_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash
+gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more0,Crash
+gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more1,Crash
+gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments4_more3,Crash
+gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more0,Crash
+gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more1,Crash
+gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments5_more3,Crash
+gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more0,Crash
+gmem-dEQP-VK.pipeline.fast_linked_library.color_write_enable_maxa.cwe_after_bind.attachments6_more1,Crash
 
 # https://gitlab.freedesktop.org/mesa/mesa/-/issues/7152
 spec@ext_transform_feedback@builtin-varyings gl_culldistance,Fail
diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c
index 797aac513e6..3c2fde55cae 100644
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
@@ -615,17 +615,19 @@ ir3_trim_constlen(struct ir3_shader_variant **variants,
 {
    unsigned constlens[MESA_SHADER_STAGES] = {};
 
+   bool shared_consts_enable = false;
+
    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-      if (variants[i])
+      if (variants[i]) {
          constlens[i] = variants[i]->constlen;
+         shared_consts_enable =
+            ir3_const_state(variants[i])->shared_consts_enable;
+      }
    }
 
    uint32_t trimmed = 0;
    STATIC_ASSERT(MESA_SHADER_STAGES <= 8 * sizeof(trimmed));
 
-   bool shared_consts_enable =
-      ir3_const_state(variants[MESA_SHADER_VERTEX])->shared_consts_enable;
-
    /* Use a hw quirk for geometry shared consts, not matched with actual
     * shared consts size (on a6xx).
     */
diff --git a/src/freedreno/vulkan/tu_descriptor_set.c b/src/freedreno/vulkan/tu_descriptor_set.c
index d9d4a051f8f..14d8b4b07da 100644
--- a/src/freedreno/vulkan/tu_descriptor_set.c
+++ b/src/freedreno/vulkan/tu_descriptor_set.c
@@ -410,6 +410,46 @@ sha1_update_descriptor_set_layout(struct mesa_sha1 *ctx,
  * just multiple descriptor set layouts pasted together.
  */
 
+void
+tu_pipeline_layout_init(struct tu_pipeline_layout *layout)
+{
+   unsigned dynamic_offset_size = 0;
+
+   for (uint32_t set = 0; set < layout->num_sets; set++) {
+      assert(set < MAX_SETS);
+      layout->set[set].dynamic_offset_start = dynamic_offset_size;
+
+      if (layout->set[set].layout)
+         dynamic_offset_size += layout->set[set].layout->dynamic_offset_size;
+   }
+
+   layout->dynamic_offset_size = dynamic_offset_size;
+
+   /* We only care about INDEPENDENT_SETS for dynamic-offset descriptors,
+    * where all the descriptors from all the sets are combined into one set
+    * and we have to provide the dynamic_offset_start dynamically with fast
+    * linking.
+    */
+   if (dynamic_offset_size == 0) {
+      layout->independent_sets = false;
+   }
+
+   struct mesa_sha1 ctx;
+   _mesa_sha1_init(&ctx);
+   for (unsigned s = 0; s < layout->num_sets; s++) {
+      if (layout->set[s].layout)
+         sha1_update_descriptor_set_layout(&ctx, layout->set[s].layout);
+      _mesa_sha1_update(&ctx, &layout->set[s].dynamic_offset_start,
+                        sizeof(layout->set[s].dynamic_offset_start));
+   }
+   _mesa_sha1_update(&ctx, &layout->num_sets, sizeof(layout->num_sets));
+   _mesa_sha1_update(&ctx, &layout->push_constant_size,
+                     sizeof(layout->push_constant_size));
+   _mesa_sha1_update(&ctx, &layout->independent_sets,
+                     sizeof(layout->independent_sets));
+   _mesa_sha1_final(&ctx, layout->sha1);
+}
+
 VKAPI_ATTR VkResult VKAPI_CALL
 tu_CreatePipelineLayout(VkDevice _device,
                         const VkPipelineLayoutCreateInfo *pCreateInfo,
@@ -428,23 +468,16 @@ tu_CreatePipelineLayout(VkDevice _device,
       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
 
    layout->num_sets = pCreateInfo->setLayoutCount;
-   layout->dynamic_offset_size = 0;
-
-   unsigned dynamic_offset_size = 0;
-
    for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) {
       TU_FROM_HANDLE(tu_descriptor_set_layout, set_layout,
                      pCreateInfo->pSetLayouts[set]);
 
       assert(set < MAX_SETS);
       layout->set[set].layout = set_layout;
-      layout->set[set].dynamic_offset_start = dynamic_offset_size;
-      vk_descriptor_set_layout_ref(&set_layout->vk);
-
-      dynamic_offset_size += set_layout->dynamic_offset_size;
+      if (set_layout)
+         vk_descriptor_set_layout_ref(&set_layout->vk);
    }
 
-   layout->dynamic_offset_size = dynamic_offset_size;
    layout->push_constant_size = 0;
 
    for (unsigned i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) {
@@ -454,18 +487,10 @@ tu_CreatePipelineLayout(VkDevice _device,
    }
 
    layout->push_constant_size = align(layout->push_constant_size, 16);
+   layout->independent_sets =
+      pCreateInfo->flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT;
 
-   struct mesa_sha1 ctx;
-   _mesa_sha1_init(&ctx);
-   for (unsigned s = 0; s < layout->num_sets; s++) {
-      sha1_update_descriptor_set_layout(&ctx, layout->set[s].layout);
-      _mesa_sha1_update(&ctx, &layout->set[s].dynamic_offset_start,
-                        sizeof(layout->set[s].dynamic_offset_start));
-   }
-   _mesa_sha1_update(&ctx, &layout->num_sets, sizeof(layout->num_sets));
-   _mesa_sha1_update(&ctx, &layout->push_constant_size,
-                     sizeof(layout->push_constant_size));
-   _mesa_sha1_final(&ctx, layout->sha1);
+   tu_pipeline_layout_init(layout);
 
    *pPipelineLayout = tu_pipeline_layout_to_handle(layout);
 
@@ -483,8 +508,10 @@ tu_DestroyPipelineLayout(VkDevice _device,
    if (!pipeline_layout)
       return;
 
-   for (uint32_t i = 0; i < pipeline_layout->num_sets; i++)
-      vk_descriptor_set_layout_unref(&device->vk, &pipeline_layout->set[i].layout->vk);
+   for (uint32_t i = 0; i < pipeline_layout->num_sets; i++) {
+      if (pipeline_layout->set[i].layout)
+         vk_descriptor_set_layout_unref(&device->vk, &pipeline_layout->set[i].layout->vk);
+   }
 
    vk_object_free(&device->vk, pAllocator, pipeline_layout);
 }
diff --git a/src/freedreno/vulkan/tu_descriptor_set.h b/src/freedreno/vulkan/tu_descriptor_set.h
index 6b7b1bafdae..270afcfdc30 100644
--- a/src/freedreno/vulkan/tu_descriptor_set.h
+++ b/src/freedreno/vulkan/tu_descriptor_set.h
@@ -93,6 +93,8 @@ struct tu_pipeline_layout
       uint32_t dynamic_offset_start;
    } set[MAX_SETS];
 
+   bool independent_sets;
+
    uint32_t num_sets;
    uint32_t push_constant_size;
    uint32_t dynamic_offset_size;
@@ -102,6 +104,8 @@ struct tu_pipeline_layout
 VK_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline_layout, base, VkPipelineLayout,
                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
 
+void tu_pipeline_layout_init(struct tu_pipeline_layout *layout);
+
 struct tu_descriptor_set
 {
    struct vk_object_base base;
diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c
index 894d9875b24..c63610dd277 100644
--- a/src/freedreno/vulkan/tu_device.c
+++ b/src/freedreno/vulkan/tu_device.c
@@ -238,11 +238,14 @@ get_device_extensions(const struct tu_physical_device *device,
       .EXT_tooling_info = true,
       .EXT_inline_uniform_block = true,
       .EXT_mutable_descriptor_type = true,
+      .KHR_pipeline_library = true,
+      .EXT_graphics_pipeline_library = true,
    };
 }
 
 static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
    &tu_shaders_ops,
+   &tu_nir_shaders_ops,
    NULL,
 };
 
@@ -906,6 +909,12 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
          features->multiDraw = true;
          break;
       }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_FEATURES_EXT: {
+         VkPhysicalDeviceGraphicsPipelineLibraryFeaturesEXT *features =
+            (VkPhysicalDeviceGraphicsPipelineLibraryFeaturesEXT *)ext;
+         features->graphicsPipelineLibrary = true;
+         break;
+      }
 
       default:
          break;
@@ -1371,6 +1380,13 @@ tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
          properties->maxMultiDrawCount = 2048;
          break;
       }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_PROPERTIES_EXT: {
+         VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT *props =
+            (VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT *)ext;
+         props->graphicsPipelineLibraryFastLinking = true;
+         props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
+         break;
+      }
       default:
          break;
       }
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index 08659a45cfb..68b59b65466 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -15,6 +15,7 @@
 #include "main/menums.h"
 #include "nir/nir.h"
 #include "nir/nir_builder.h"
+#include "nir/nir_serialize.h"
 #include "spirv/nir_spirv.h"
 #include "util/debug.h"
 #include "util/mesa-sha1.h"
@@ -243,11 +244,15 @@ struct tu_pipeline_builder
    struct tu_device *device;
    void *mem_ctx;
    struct vk_pipeline_cache *cache;
-   struct tu_pipeline_layout *layout;
    const VkAllocationCallbacks *alloc;
    const VkGraphicsPipelineCreateInfo *create_info;
 
-   struct tu_compiled_shaders *shaders;
+   struct tu_pipeline_layout layout;
+
+   struct tu_compiled_shaders *compiled_shaders;
+
+   struct tu_const_state const_state[MESA_SHADER_FRAGMENT + 1];
+   struct ir3_shader_variant *variants[MESA_SHADER_FRAGMENT + 1];
    struct ir3_shader_variant *binning_variant;
    uint64_t shader_iova[MESA_SHADER_FRAGMENT + 1];
    uint64_t binning_vs_iova;
@@ -260,6 +265,7 @@ struct tu_pipeline_builder
    /* these states are affectd by rasterizer_discard */
    bool depth_clip_disable;
    bool use_color_attachments;
+   bool attachment_state_valid;
    VkFormat color_attachment_formats[MAX_RTS];
    VkFormat depth_attachment_format;
    uint32_t multiview_mask;
@@ -268,6 +274,24 @@ struct tu_pipeline_builder
    bool subpass_feedback_loop_color;
    bool subpass_feedback_loop_ds;
    bool feedback_loop_may_involve_textures;
+
+   /* Each library defines at least one piece of state in
+    * VkGraphicsPipelineLibraryFlagsEXT, and libraries cannot overlap, so
+    * there can be at most as many libraries as pieces of state, of which
+    * there are currently 4.
+    */
+#define MAX_LIBRARIES 4
+
+   unsigned num_libraries;
+   struct tu_pipeline *libraries[MAX_LIBRARIES];
+
+   /* This is just the state that we are compiling now, whereas the final
+    * pipeline will include the state from the libraries.
+    */
+   VkGraphicsPipelineLibraryFlagsEXT state;
+
+   /* The stages we are compiling now. */
+   VkShaderStageFlags active_stages;
 };
 
 static bool
@@ -648,6 +672,30 @@ tu6_emit_xs(struct tu_cs *cs,
    }
 }
 
+static void
+tu6_emit_dynamic_offset(struct tu_cs *cs,
+                        const struct ir3_shader_variant *xs,
+                        struct tu_pipeline_builder *builder)
+{
+   if (!xs || builder->const_state[xs->type].dynamic_offset_loc == UINT32_MAX)
+      return;
+
+   tu_cs_emit_pkt7(cs, tu6_stage2opcode(xs->type), 3 + MAX_SETS);
+   tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(builder->const_state[xs->type].dynamic_offset_loc / 4) |
+              CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
+              CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
+              CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(xs->type)) |
+              CP_LOAD_STATE6_0_NUM_UNIT(DIV_ROUND_UP(MAX_SETS, 4)));
+   tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
+   tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
+
+   for (unsigned i = 0; i < MAX_SETS; i++) {
+      unsigned dynamic_offset_start =
+         builder->layout.set[i].dynamic_offset_start / (A6XX_TEX_CONST_DWORDS * 4);
+      tu_cs_emit(cs, i < builder->layout.num_sets ? dynamic_offset_start : 0);
+   }
+}
+
 static void
 tu6_emit_shared_consts_enable(struct tu_cs *cs, bool enable)
 {
@@ -1767,7 +1815,7 @@ tu6_emit_program_config(struct tu_cs *cs,
 
    STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
 
-   bool shared_consts_enable = tu6_shared_constants_enable(builder->layout,
+   bool shared_consts_enable = tu6_shared_constants_enable(&builder->layout,
          builder->device->compiler);
    tu6_emit_shared_consts_enable(cs, shared_consts_enable);
 
@@ -1780,7 +1828,7 @@ tu6_emit_program_config(struct tu_cs *cs,
          .gfx_ibo = true,
          .gfx_shared_const = shared_consts_enable));
    for (; stage < ARRAY_SIZE(builder->shader_iova); stage++) {
-      tu6_emit_xs_config(cs, stage, builder->shaders->variants[stage]);
+      tu6_emit_xs_config(cs, stage, builder->variants[stage]);
    }
 }
 
@@ -1790,15 +1838,14 @@ tu6_emit_program(struct tu_cs *cs,
                  bool binning_pass,
                  struct tu_pipeline *pipeline)
 {
-   const struct ir3_shader_variant *vs = builder->shaders->variants[MESA_SHADER_VERTEX];
+   const struct ir3_shader_variant *vs = builder->variants[MESA_SHADER_VERTEX];
    const struct ir3_shader_variant *bs = builder->binning_variant;
-   const struct ir3_shader_variant *hs = builder->shaders->variants[MESA_SHADER_TESS_CTRL];
-   const struct ir3_shader_variant *ds = builder->shaders->variants[MESA_SHADER_TESS_EVAL];
-   const struct ir3_shader_variant *gs = builder->shaders->variants[MESA_SHADER_GEOMETRY];
-   const struct ir3_shader_variant *fs = builder->shaders->variants[MESA_SHADER_FRAGMENT];
+   const struct ir3_shader_variant *hs = builder->variants[MESA_SHADER_TESS_CTRL];
+   const struct ir3_shader_variant *ds = builder->variants[MESA_SHADER_TESS_EVAL];
+   const struct ir3_shader_variant *gs = builder->variants[MESA_SHADER_GEOMETRY];
+   const struct ir3_shader_variant *fs = builder->variants[MESA_SHADER_FRAGMENT];
    gl_shader_stage stage = MESA_SHADER_VERTEX;
-   uint32_t cps_per_patch = builder->create_info->pTessellationState ?
-      builder->create_info->pTessellationState->patchControlPoints : 0;
+   uint32_t cps_per_patch = pipeline->tess.patch_control_points;
    bool multi_pos_output = vs->multi_pos_output;
 
   /* Don't use the binning pass variant when GS is present because we don't
@@ -1807,20 +1854,22 @@ tu6_emit_program(struct tu_cs *cs,
    if (binning_pass && !gs) {
       vs = bs;
       tu6_emit_xs(cs, stage, bs, &builder->pvtmem, builder->binning_vs_iova);
+      tu6_emit_dynamic_offset(cs, bs, builder);
       stage++;
    }
 
    for (; stage < ARRAY_SIZE(builder->shader_iova); stage++) {
-      const struct ir3_shader_variant *xs = builder->shaders->variants[stage];
+      const struct ir3_shader_variant *xs = builder->variants[stage];
 
       if (stage == MESA_SHADER_FRAGMENT && binning_pass)
          fs = xs = NULL;
 
       tu6_emit_xs(cs, stage, xs, &builder->pvtmem, builder->shader_iova[stage]);
+      tu6_emit_dynamic_offset(cs, xs, builder);
    }
 
-   uint32_t multiview_views = util_logbase2(builder->multiview_mask) + 1;
-   uint32_t multiview_cntl = builder->multiview_mask ?
+   uint32_t multiview_views = util_logbase2(pipeline->rast.multiview_mask) + 1;
+   uint32_t multiview_cntl = pipeline->rast.multiview_mask ?
       A6XX_PC_MULTIVIEW_CNTL_ENABLE |
       A6XX_PC_MULTIVIEW_CNTL_VIEWS(multiview_views) |
       COND(!multi_pos_output, A6XX_PC_MULTIVIEW_CNTL_DISABLEMULTIPOS)
@@ -1845,7 +1894,7 @@ tu6_emit_program(struct tu_cs *cs,
    if (multiview_cntl &&
        builder->device->physical_device->info->a6xx.supports_multiview_mask) {
       tu_cs_emit_pkt4(cs, REG_A6XX_PC_MULTIVIEW_MASK, 1);
-      tu_cs_emit(cs, builder->multiview_mask);
+      tu_cs_emit(cs, pipeline->rast.multiview_mask);
    }
 
    tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_WAVE_INPUT_SIZE, 1);
@@ -2366,6 +2415,38 @@ tu_setup_pvtmem(struct tu_device *dev,
    return VK_SUCCESS;
 }
 
+static bool
+contains_all_shader_state(VkGraphicsPipelineLibraryFlagsEXT state)
+{
+   return (state &
+      (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
+       VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) ==
+      (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
+       VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT);
+}
+
+/* Return true if this pipeline contains all of the GPL stages listed but none
+ * of the libraries it uses do, so this is "the first time" that all of them
+ * are defined together. This is useful for state that needs to be combined
+ * from multiple GPL stages.
+ */
+
+static bool
+set_combined_state(struct tu_pipeline_builder *builder,
+                   struct tu_pipeline *pipeline,
+                   VkGraphicsPipelineLibraryFlagsEXT state)
+{
+   if ((pipeline->state & state) != state)
+      return false;
+
+   for (unsigned i = 0; i < builder->num_libraries; i++) {
+      if ((builder->libraries[i]->state & state) == state)
+         return false;
+   }
+
+   return true;
+}
+
 static VkResult
 tu_pipeline_allocate_cs(struct tu_device *dev,
                         struct tu_pipeline *pipeline,
@@ -2373,38 +2454,49 @@ tu_pipeline_allocate_cs(struct tu_device *dev,
                         struct tu_pipeline_builder *builder,
                         struct ir3_shader_variant *compute)
 {
-   uint32_t size = 1024 + tu6_load_state_size(pipeline, layout);
+   uint32_t size = 1024;
 
    /* graphics case: */
    if (builder) {
-      size += TU6_EMIT_VERTEX_INPUT_MAX_DWORDS +
-         2 * TU6_EMIT_VFD_DEST_MAX_DWORDS;
-
-      for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) {
-         if (builder->shaders->variants[i]) {
-            size += builder->shaders->variants[i]->info.size / 4;
-         }
+      if (builder->state &
+          VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) {
+         size += TU6_EMIT_VERTEX_INPUT_MAX_DWORDS;
       }
 
-      size += builder->binning_variant->info.size / 4;
+      if (set_combined_state(builder, pipeline,
+                             VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
+                             VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) {
+         size += 2 * TU6_EMIT_VFD_DEST_MAX_DWORDS;
+         size += tu6_load_state_size(pipeline, layout);
 
-      builder->additional_cs_reserve_size = 0;
-      for (unsigned i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) {
-         struct ir3_shader_variant *variant = builder->shaders->variants[i];
-         if (variant) {
-            builder->additional_cs_reserve_size +=
-               tu_xs_get_additional_cs_size_dwords(variant);
-
-            if (variant->binning) {
-               builder->additional_cs_reserve_size +=
-                  tu_xs_get_additional_cs_size_dwords(variant->binning);
+         for (uint32_t i = 0; i < ARRAY_SIZE(builder->variants); i++) {
+            if (builder->variants[i]) {
+               size += builder->variants[i]->info.size / 4;
             }
          }
-      }
 
-      /* The additional size is used twice, once per tu6_emit_program() call. */
-      size += builder->additional_cs_reserve_size * 2;
+         size += builder->binning_variant->info.size / 4;
+
+         builder->additional_cs_reserve_size = 0;
+         for (unsigned i = 0; i < ARRAY_SIZE(builder->variants); i++) {
+            struct ir3_shader_variant *variant = builder->variants[i];
+            if (variant) {
+               builder->additional_cs_reserve_size +=
+                  tu_xs_get_additional_cs_size_dwords(variant);
+
+               if (variant->binning) {
+                  builder->additional_cs_reserve_size +=
+                     tu_xs_get_additional_cs_size_dwords(variant->binning);
+               }
+            }
+         }
+
+         /* The additional size is used twice, once per tu6_emit_program() call. */
+         size += builder->additional_cs_reserve_size * 2;
+      }
    } else {
+      size += tu6_load_state_size(pipeline, layout);
+
       size += compute->info.size / 4;
 
       size += tu_xs_get_additional_cs_size_dwords(compute);
@@ -2436,20 +2528,42 @@ tu_pipeline_allocate_cs(struct tu_device *dev,
 static void
 tu_pipeline_shader_key_init(struct ir3_shader_key *key,
                             const struct tu_pipeline *pipeline,
-                            const VkGraphicsPipelineCreateInfo *pipeline_info)
+                            struct tu_pipeline_builder *builder,
+                            nir_shader **nir)
 {
-   for (uint32_t i = 0; i < pipeline_info->stageCount; i++) {
-      if (pipeline_info->pStages[i].stage == VK_SHADER_STAGE_GEOMETRY_BIT) {
+   /* We set this after we compile to NIR because we need the prim mode */
+   key->tessellation = IR3_TESS_NONE;
+
+   for (unsigned i = 0; i < builder->num_libraries; i++) {
+      if (!(builder->libraries[i]->state &
+            (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
+             VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)))
+         continue;
+
+      const struct ir3_shader_key *library_key =
+         &builder->libraries[i]->ir3_key;
+
+      if (library_key->tessellation != IR3_TESS_NONE)
+         key->tessellation = library_key->tessellation;
+      key->has_gs |= library_key->has_gs;
+      key->sample_shading |= library_key->sample_shading;
+   }
+
+   for (uint32_t i = 0; i < builder->create_info->stageCount; i++) {
+      if (builder->create_info->pStages[i].stage == VK_SHADER_STAGE_GEOMETRY_BIT) {
          key->has_gs = true;
          break;
       }
    }
 
-   if (pipeline_info->pRasterizationState->rasterizerDiscardEnable &&
-       !(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_RASTERIZER_DISCARD)))
+   if (!(builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT))
       return;
 
-   const VkPipelineMultisampleStateCreateInfo *msaa_info = pipeline_info->pMultisampleState;
+   if (builder->rasterizer_discard)
+      return;
+
+   const VkPipelineMultisampleStateCreateInfo *msaa_info =
+      builder->create_info->pMultisampleState;
 
    /* The 1.3.215 spec says:
     *
@@ -2475,12 +2589,9 @@ tu_pipeline_shader_key_init(struct ir3_shader_key *key,
     * just checked in tu6_emit_fs_inputs.  We will also copy the value to
     * tu_shader_key::force_sample_interp in a bit.
     */
-   if (msaa_info->sampleShadingEnable &&
+   if (msaa_info && msaa_info->sampleShadingEnable &&
        (msaa_info->minSampleShading * msaa_info->rasterizationSamples) > 1.0f)
       key->sample_shading = true;
-
-   /* We set this after we compile to NIR because we need the prim mode */
-   key->tessellation = IR3_TESS_NONE;
 }
 
 static uint32_t
@@ -2624,12 +2735,21 @@ tu_shader_key_init(struct tu_shader_key *key,
 static void
 tu_hash_stage(struct mesa_sha1 *ctx,
               const VkPipelineShaderStageCreateInfo *stage,
+              const nir_shader *nir,
               const struct tu_shader_key *key)
 {
-   unsigned char stage_hash[SHA1_DIGEST_LENGTH];
 
-   vk_pipeline_hash_shader_stage(stage, stage_hash);
-   _mesa_sha1_update(ctx, stage_hash, sizeof(stage_hash));
+   if (nir) {
+      struct blob blob;
+      blob_init(&blob);
+      nir_serialize(&blob, nir, true);
+      _mesa_sha1_update(ctx, blob.data, blob.size);
+      blob_finish(&blob);
+   } else {
+      unsigned char stage_hash[SHA1_DIGEST_LENGTH];
+      vk_pipeline_hash_shader_stage(stage, stage_hash);
+      _mesa_sha1_update(ctx, stage_hash, sizeof(stage_hash));
+   }
    _mesa_sha1_update(ctx, key, sizeof(*key));
 }
 
@@ -2647,9 +2767,11 @@ tu_hash_compiler(struct mesa_sha1 *ctx, const struct ir3_compiler *compiler)
 static void
 tu_hash_shaders(unsigned char *hash,
                 const VkPipelineShaderStageCreateInfo **stages,
+                nir_shader *const *nir,
                 const struct tu_pipeline_layout *layout,
                 const struct tu_shader_key *keys,
                 const struct ir3_shader_key *ir3_key,
+                VkGraphicsPipelineLibraryFlagsEXT state,
                 const struct ir3_compiler *compiler)
 {
    struct mesa_sha1 ctx;
@@ -2662,10 +2784,11 @@ tu_hash_shaders(unsigned char *hash,
    _mesa_sha1_update(&ctx, ir3_key, sizeof(ir3_key));
 
    for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
-      if (stages[i]) {
-         tu_hash_stage(&ctx, stages[i], &keys[i]);
+      if (stages[i] || nir[i]) {
+         tu_hash_stage(&ctx, stages[i], nir[i], &keys[i]);
       }
    }
+   _mesa_sha1_update(&ctx, &state, sizeof(state));
    tu_hash_compiler(&ctx, compiler);
    _mesa_sha1_final(&ctx, hash);
 }
@@ -2684,7 +2807,7 @@ tu_hash_compute(unsigned char *hash,
    if (layout)
       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
 
-   tu_hash_stage(&ctx, stage, key);
+   tu_hash_stage(&ctx, stage, NULL, key);
 
    tu_hash_compiler(&ctx, compiler);
    _mesa_sha1_final(&ctx, hash);
@@ -2708,6 +2831,9 @@ tu_shaders_destroy(struct vk_pipeline_cache_object *object)
    for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++)
       ralloc_free(shaders->variants[i]);
 
+   for (unsigned i = 0; i < ARRAY_SIZE(shaders->safe_const_variants); i++)
+      ralloc_free(shaders->safe_const_variants[i]);
+
    vk_pipeline_cache_object_finish(&shaders->base);
    vk_free(&object->device->alloc, shaders);
 }
@@ -2753,6 +2879,13 @@ tu_shaders_serialize(struct vk_pipeline_cache_object *object,
       } else {
          blob_write_uint8(blob, 0);
       }
+
+      if (shaders->safe_const_variants[i]) {
+         blob_write_uint8(blob, 1);
+         ir3_store_variant(blob, shaders->safe_const_variants[i]);
+      } else {
+         blob_write_uint8(blob, 0);
+      }
    }
 
    return true;
@@ -2774,10 +2907,13 @@ tu_shaders_deserialize(struct vk_device *_device,
    shaders->active_desc_sets = blob_read_uint8(blob);
 
    for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++) {
-      bool has_shader = blob_read_uint8(blob);
-      if (has_shader) {
+      if (blob_read_uint8(blob)) {
          shaders->variants[i] = ir3_retrieve_variant(blob, dev->compiler, NULL);
       }
+
+      if (blob_read_uint8(blob)) {
+         shaders->safe_const_variants[i] = ir3_retrieve_variant(blob, dev->compiler, NULL);
+      }
    }
 
    return &shaders->base;
@@ -2806,6 +2942,117 @@ tu_pipeline_cache_insert(struct vk_pipeline_cache *cache,
    return container_of(object, struct tu_compiled_shaders, base);
 }
 
+static bool
+tu_nir_shaders_serialize(struct vk_pipeline_cache_object *object,
+                         struct blob *blob);
+
+static struct vk_pipeline_cache_object *
+tu_nir_shaders_deserialize(struct vk_device *device,
+                           const void *key_data, size_t key_size,
+                           struct blob_reader *blob);
+
+static void
+tu_nir_shaders_destroy(struct vk_pipeline_cache_object *object)
+{
+   struct tu_nir_shaders *shaders =
+      container_of(object, struct tu_nir_shaders, base);
+
+   for (unsigned i = 0; i < ARRAY_SIZE(shaders->nir); i++)
+      ralloc_free(shaders->nir[i]);
+
+   vk_pipeline_cache_object_finish(&shaders->base);
+   vk_free(&object->device->alloc, shaders);
+}
+
+const struct vk_pipeline_cache_object_ops tu_nir_shaders_ops = {
+   .serialize = tu_nir_shaders_serialize,
+   .deserialize = tu_nir_shaders_deserialize,
+   .destroy = tu_nir_shaders_destroy,
+};
+
+static struct tu_nir_shaders *
+tu_nir_shaders_init(struct tu_device *dev, const void *key_data, size_t key_size)
+{
+   VK_MULTIALLOC(ma);
+   VK_MULTIALLOC_DECL(&ma, struct tu_nir_shaders, shaders, 1);
+   VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, key_size);
+
+   if (!vk_multialloc_zalloc(&ma, &dev->vk.alloc,
+                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
+      return NULL;
+
+   memcpy(obj_key_data, key_data, key_size);
+   vk_pipeline_cache_object_init(&dev->vk, &shaders->base,
+                                 &tu_nir_shaders_ops, obj_key_data, key_size);
+
+   return shaders;
+}
+
+static bool
+tu_nir_shaders_serialize(struct vk_pipeline_cache_object *object,
+                         struct blob *blob)
+{
+   struct tu_nir_shaders *shaders =
+      container_of(object, struct tu_nir_shaders, base);
+
+   for (unsigned i = 0; i < ARRAY_SIZE(shaders->nir); i++) {
+      if (shaders->nir[i]) {
+         blob_write_uint8(blob, 1);
+         nir_serialize(blob, shaders->nir[i], true);
+      } else {
+         blob_write_uint8(blob, 0);
+      }
+   }
+
+   return true;
+}
+
+static struct vk_pipeline_cache_object *
+tu_nir_shaders_deserialize(struct vk_device *_device,
+                       const void *key_data, size_t key_size,
+                       struct blob_reader *blob)
+{
+   struct tu_device *dev = container_of(_device, struct tu_device, vk);
+   struct tu_nir_shaders *shaders =
+      tu_nir_shaders_init(dev, key_data, key_size);
+
+   if (!shaders)
+      return NULL;
+
+   for (unsigned i = 0; i < ARRAY_SIZE(shaders->nir); i++) {
+      if (blob_read_uint8(blob)) {
+         shaders->nir[i] =
+            nir_deserialize(NULL, ir3_get_compiler_options(dev->compiler), blob);
+      }
+   }
+
+   return &shaders->base;
+}
+
+static struct tu_nir_shaders *
+tu_nir_cache_lookup(struct vk_pipeline_cache *cache,
+                    const void *key_data, size_t key_size,
+                    bool *application_cache_hit)
+{
+   struct vk_pipeline_cache_object *object =
+      vk_pipeline_cache_lookup_object(cache, key_data, key_size,
+                                      &tu_nir_shaders_ops, application_cache_hit);
+   if (object)
+      return container_of(object, struct tu_nir_shaders, base);
+   else
+      return NULL;
+}
+
+static struct tu_nir_shaders *
+tu_nir_cache_insert(struct vk_pipeline_cache *cache,
+                    struct tu_nir_shaders *shaders)
+{
+   struct vk_pipeline_cache_object *object =
+      vk_pipeline_cache_add_object(cache, &shaders->base);
+   return container_of(object, struct tu_nir_shaders, base);
+}
+
+
 static VkResult
 tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
                                     struct tu_pipeline *pipeline)
@@ -2825,45 +3072,79 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
    const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
       vk_find_struct_const(builder->create_info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
 
+   bool must_compile =
+      builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT;
    for (uint32_t i = 0; i < builder->create_info->stageCount; i++) {
       gl_shader_stage stage =
          vk_to_mesa_shader_stage(builder->create_info->pStages[i].stage);
       stage_infos[stage] = &builder->create_info->pStages[i];
-
-      pipeline->active_stages |= builder->create_info->pStages[i].stage;
+      must_compile = true;
    }
 
-   if (tu6_shared_constants_enable(builder->layout, builder->device->compiler)) {
+   if (tu6_shared_constants_enable(&builder->layout, builder->device->compiler)) {
       pipeline->shared_consts = (struct tu_push_constant_range) {
          .lo = 0,
-         .dwords = builder->layout->push_constant_size / 4,
+         .dwords = builder->layout.push_constant_size / 4,
       };
    }
 
+   nir_shader *nir[ARRAY_SIZE(stage_infos)] = { NULL };
+
    struct tu_shader_key keys[ARRAY_SIZE(stage_infos)] = { };
    for (gl_shader_stage stage = MESA_SHADER_VERTEX;
         stage < ARRAY_SIZE(keys); stage++) {
       tu_shader_key_init(&keys[stage], stage_infos[stage], builder->device);
    }
 
-   struct ir3_shader_key ir3_key = {};
-   tu_pipeline_shader_key_init(&ir3_key, pipeline, builder->create_info);
+   if (builder->create_info->flags &
+       VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT) {
+      for (unsigned i = 0; i < builder->num_libraries; i++) {
+         struct tu_pipeline *library = builder->libraries[i];
 
-   keys[MESA_SHADER_VERTEX].multiview_mask = builder->multiview_mask;
-   keys[MESA_SHADER_FRAGMENT].multiview_mask = builder->multiview_mask;
-   keys[MESA_SHADER_FRAGMENT].force_sample_interp = ir3_key.sample_shading;
+         for (unsigned j = 0; j < ARRAY_SIZE(library->shaders); j++) {
+            if (library->shaders[j].nir) {
+               assert(!nir[j]);
+               nir[j] = nir_shader_clone(NULL, library->shaders[j].nir);
+               keys[j] = library->shaders[j].key;
+               must_compile = true;
+            }
+         }
+      }
+   }
+
+   struct ir3_shader_key ir3_key = {};
+   tu_pipeline_shader_key_init(&ir3_key, pipeline, builder, nir);
+
+   struct tu_compiled_shaders *compiled_shaders = NULL;
+   struct tu_nir_shaders *nir_shaders = NULL;
+   if (!must_compile)
+      goto done;
+
+   if (builder->state &
+       VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
+      keys[MESA_SHADER_VERTEX].multiview_mask = builder->multiview_mask;
+   }
+
+   if (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
+      keys[MESA_SHADER_FRAGMENT].multiview_mask = builder->multiview_mask;
+      keys[MESA_SHADER_FRAGMENT].force_sample_interp = ir3_key.sample_shading;
+   }
 
    unsigned char pipeline_sha1[20];
-   tu_hash_shaders(pipeline_sha1, stage_infos, builder->layout, keys, &ir3_key, compiler);
+   tu_hash_shaders(pipeline_sha1, stage_infos, nir, &builder->layout, keys,
+                   &ir3_key, builder->state, compiler);
+
+   unsigned char nir_sha1[21];
+   memcpy(nir_sha1, pipeline_sha1, sizeof(pipeline_sha1));
+   nir_sha1[20] = 'N';
 
    const bool executable_info = builder->create_info->flags &
       VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
 
    char *nir_initial_disasm[ARRAY_SIZE(stage_infos)] = { NULL };
 
-   struct tu_compiled_shaders *compiled_shaders;
-
    if (!executable_info) {
+      bool cache_hit = false;
       bool application_cache_hit = false;
 
       compiled_shaders =
@@ -2871,12 +3152,31 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
                                   sizeof(pipeline_sha1),
                                   &application_cache_hit);
 
+      cache_hit = !!compiled_shaders;
+
+      /* If the user asks us to keep the NIR around, we need to have it for a
+       * successful cache hit. If we only have a "partial" cache hit, then we
+       * still need to recompile in order to get the NIR.
+       */
+      if (compiled_shaders &&
+          (builder->create_info->flags &
+           VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT)) {
+         bool nir_application_cache_hit = false;
+         nir_shaders =
+            tu_nir_cache_lookup(builder->cache, &nir_sha1,
+                                sizeof(nir_sha1),
+                                &nir_application_cache_hit);
+
+         application_cache_hit &= nir_application_cache_hit;
+         cache_hit &= !!nir_shaders;
+      }
+
       if (application_cache_hit && builder->cache != builder->device->mem_cache) {
          pipeline_feedback.flags |=
             VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
       }
 
-      if (compiled_shaders)
+      if (cache_hit)
          goto done;
    }
 
@@ -2885,8 +3185,6 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
       return VK_PIPELINE_COMPILE_REQUIRED;
    }
 
-   nir_shader *nir[ARRAY_SIZE(stage_infos)] = { NULL };
-
    struct tu_shader *shaders[ARRAY_SIZE(nir)] = { NULL };
 
    for (gl_shader_stage stage = MESA_SHADER_VERTEX;
@@ -2907,7 +3205,8 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
       stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
    }
 
-   if (!nir[MESA_SHADER_FRAGMENT]) {
+   if (!nir[MESA_SHADER_FRAGMENT] &&
+       (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) {
          const nir_shader_compiler_options *nir_options =
             ir3_get_compiler_options(builder->device->compiler);
          nir_builder fs_b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
@@ -2929,6 +3228,32 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
 
    tu_link_shaders(builder, nir, ARRAY_SIZE(nir));
 
+   if (builder->create_info->flags &
+       VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT) {
+      nir_shaders =
+         tu_nir_shaders_init(builder->device, &nir_sha1, sizeof(nir_sha1));
+      for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+           stage < ARRAY_SIZE(nir); stage++) {
+         if (!nir[stage])
+            continue;
+
+         nir_shaders->nir[stage] = nir_shader_clone(NULL, nir[stage]);
+      }
+
+      nir_shaders = tu_nir_cache_insert(builder->cache, nir_shaders);
+
+      if (compiled_shaders)
+         goto done;
+   }
+
+   compiled_shaders =
+      tu_shaders_init(builder->device, &pipeline_sha1, sizeof(pipeline_sha1));
+
+   if (!compiled_shaders) {
+      result = VK_ERROR_OUT_OF_HOST_MEMORY;
+      goto fail;
+   }
+
    uint32_t desc_sets = 0;
    for (gl_shader_stage stage = MESA_SHADER_VERTEX;
         stage < ARRAY_SIZE(nir); stage++) {
@@ -2939,7 +3264,7 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
 
       struct tu_shader *shader =
          tu_shader_create(builder->device, nir[stage], &keys[stage],
-                          builder->layout, builder->alloc);
+                          &builder->layout, builder->alloc);
       if (!shader) {
          result = VK_ERROR_OUT_OF_HOST_MEMORY;
          goto fail;
@@ -2972,20 +3297,18 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
       stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
    }
 
+   /* In the the tess-but-not-FS case we don't know whether the FS will read
+    * PrimID so we need to unconditionally store it.
+    */
+   if (nir[MESA_SHADER_TESS_CTRL] && !nir[MESA_SHADER_FRAGMENT])
+      ir3_key.tcs_store_primid = true;
+
    struct tu_shader *last_shader = shaders[MESA_SHADER_GEOMETRY];
    if (!last_shader)
       last_shader = shaders[MESA_SHADER_TESS_EVAL];
    if (!last_shader)
       last_shader = shaders[MESA_SHADER_VERTEX];
 
-   compiled_shaders =
-      tu_shaders_init(builder->device, &pipeline_sha1, sizeof(pipeline_sha1));
-
-   if (!compiled_shaders) {
-      result = VK_ERROR_OUT_OF_HOST_MEMORY;
-      goto fail;
-   }
-
    compiled_shaders->active_desc_sets = desc_sets;
 
    for (gl_shader_stage stage = MESA_SHADER_VERTEX;
@@ -3028,9 +3351,19 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
          }
 
          stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
+      } else if (contains_all_shader_state(builder->state)) {
+         compiled_shaders->safe_const_variants[stage] =
+            ir3_shader_create_variant(shaders[stage]->ir3_shader, &ir3_key,
+                                      executable_info);
+         if (!compiled_shaders->variants[stage]) {
+            result = VK_ERROR_OUT_OF_HOST_MEMORY;
+            goto fail;
+         }
       }
    }
 
+   ir3_key.safe_constlen = false;
+
    for (gl_shader_stage stage = MESA_SHADER_VERTEX;
          stage < ARRAY_SIZE(nir); stage++) {
       if (shaders[stage]) {
@@ -3041,42 +3374,133 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
    compiled_shaders =
       tu_pipeline_cache_insert(builder->cache, compiled_shaders);
 
-done:
-   for (gl_shader_stage stage = MESA_SHADER_VERTEX;
-         stage < ARRAY_SIZE(nir); stage++) {
-      if (compiled_shaders->variants[stage]) {
-         tu_append_executable(pipeline, compiled_shaders->variants[stage],
-            nir_initial_disasm[stage]);
+done:;
+
+   struct ir3_shader_variant *safe_const_variants[ARRAY_SIZE(nir)] = { NULL };
+   nir_shader *post_link_nir[ARRAY_SIZE(nir)] = { NULL };
+
+   if (compiled_shaders) {
+      for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+            stage < ARRAY_SIZE(nir); stage++) {
+         if (compiled_shaders->variants[stage]) {
+            tu_append_executable(pipeline, compiled_shaders->variants[stage],
+               nir_initial_disasm[stage]);
+            builder->variants[stage] = compiled_shaders->variants[stage];
+            safe_const_variants[stage] =
+               compiled_shaders->safe_const_variants[stage];
+            builder->const_state[stage] =
+               compiled_shaders->const_state[stage];
+         }
       }
    }
 
-   struct ir3_shader_variant *vs =
-      compiled_shaders->variants[MESA_SHADER_VERTEX];
-
-   struct ir3_shader_variant *variant;
-   if (!vs->stream_output.num_outputs && ir3_has_binning_vs(&vs->key)) {
-      tu_append_executable(pipeline, vs->binning, NULL);
-      variant = vs->binning;
-   } else {
-      variant = vs;
+   if (nir_shaders) {
+      for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+            stage < ARRAY_SIZE(nir); stage++) {
+         if (nir_shaders->nir[stage]) {
+            post_link_nir[stage] = nir_shaders->nir[stage];
+         }
+      }
    }
 
-   builder->binning_variant = variant;
+   /* In the case where we're building a library without link-time
+    * optimization but with sub-libraries that retain LTO info, we should
+    * retain it ourselves in case another pipeline includes us with LTO.
+    */
+   for (unsigned i = 0; i < builder->num_libraries; i++) {
+      struct tu_pipeline *library = builder->libraries[i];
+      for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+            stage < ARRAY_SIZE(library->shaders); stage++) {
+         if (!post_link_nir[stage] && library->shaders[stage].nir) {
+            post_link_nir[stage] = library->shaders[stage].nir;
+            keys[stage] = library->shaders[stage].key;
+         }
+      }
+   }
 
-   builder->shaders = compiled_shaders;
+   if (!(builder->create_info->flags &
+         VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT)) {
+      for (unsigned i = 0; i < builder->num_libraries; i++) {
+         struct tu_pipeline *library = builder->libraries[i];
+         for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+               stage < ARRAY_SIZE(library->shaders); stage++) {
+            if (library->shaders[stage].variant) {
+               assert(!builder->variants[stage]);
+               builder->variants[stage] = library->shaders[stage].variant;
+               safe_const_variants[stage] =
+                  library->shaders[stage].safe_const_variant;
+               builder->const_state[stage] =
+                  library->shaders[stage].const_state;
+               post_link_nir[stage] = library->shaders[stage].nir;
+            }
+         }
+      }
 
-   pipeline->active_desc_sets = compiled_shaders->active_desc_sets;
-   if (compiled_shaders->variants[MESA_SHADER_TESS_CTRL]) {
+      /* Because we added more variants, we need to trim constlen again.
+       */
+      if (builder->num_libraries > 0) {
+         uint32_t safe_constlens = ir3_trim_constlen(builder->variants, compiler);
+         for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+              stage < ARRAY_SIZE(builder->variants); stage++) {
+            if (safe_constlens & (1u << stage))
+               builder->variants[stage] = safe_const_variants[stage];
+         }
+      }
+   }
+
+   if (compiled_shaders)
+      pipeline->active_desc_sets = compiled_shaders->active_desc_sets;
+
+   for (unsigned i = 0; i < builder->num_libraries; i++) {
+      struct tu_pipeline *library = builder->libraries[i];
+      pipeline->active_desc_sets |= library->active_desc_sets;
+   }
+
+   if (compiled_shaders && compiled_shaders->variants[MESA_SHADER_TESS_CTRL]) {
       pipeline->tess.patch_type =
          compiled_shaders->variants[MESA_SHADER_TESS_CTRL]->key.tessellation;
    }
 
+   if (contains_all_shader_state(pipeline->state)) {
+      struct ir3_shader_variant *vs =
+         builder->variants[MESA_SHADER_VERTEX];
+
+      struct ir3_shader_variant *variant;
+      if (!vs->stream_output.num_outputs && ir3_has_binning_vs(&vs->key)) {
+         tu_append_executable(pipeline, vs->binning, NULL);
+         variant = vs->binning;
+      } else {
+         variant = vs;
+      }
+
+      builder->binning_variant = variant;
+
+      builder->compiled_shaders = compiled_shaders;
+
+      /* It doesn't make much sense to use RETAIN_LINK_TIME_OPTIMIZATION_INFO
+       * when compiling all stages, but make sure we don't leak.
+       */
+      if (nir_shaders)
+         vk_pipeline_cache_object_unref(&nir_shaders->base);
+   } else {
+      pipeline->compiled_shaders = compiled_shaders;
+      pipeline->nir_shaders = nir_shaders;
+      pipeline->ir3_key = ir3_key;
+      for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+           stage < ARRAY_SIZE(pipeline->shaders); stage++) {
+         pipeline->shaders[stage].nir = post_link_nir[stage];
+         pipeline->shaders[stage].key = keys[stage];
+         pipeline->shaders[stage].const_state = builder->const_state[stage];
+         pipeline->shaders[stage].variant = builder->variants[stage];
+         pipeline->shaders[stage].safe_const_variant =
+            safe_const_variants[stage];
+      }
+   }
+
    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
    if (creation_feedback) {
       *creation_feedback->pPipelineCreationFeedback = pipeline_feedback;
 
-      assert(builder->create_info->stageCount == 
-             creation_feedback->pipelineStageCreationFeedbackCount);
       for (uint32_t i = 0; i < builder->create_info->stageCount; i++) {
          gl_shader_stage s =
             vk_to_mesa_shader_stage(builder->create_info->pStages[i].stage);
@@ -3097,6 +3521,9 @@ fail:
    if (compiled_shaders)
       vk_pipeline_cache_object_unref(&compiled_shaders->base);
 
+   if (nir_shaders)
+      vk_pipeline_cache_object_unref(&nir_shaders->base);
+
    return result;
 }
 
@@ -3227,6 +3654,155 @@ tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder,
    }
 }
 
+static void
+tu_pipeline_builder_parse_libraries(struct tu_pipeline_builder *builder,
+                                    struct tu_pipeline *pipeline)
+{
+   const VkPipelineLibraryCreateInfoKHR *library_info =
+      vk_find_struct_const(builder->create_info->pNext,
+                           PIPELINE_LIBRARY_CREATE_INFO_KHR);
+
+   if (library_info) {
+      assert(library_info->libraryCount <= MAX_LIBRARIES);
+      builder->num_libraries = library_info->libraryCount;
+      for (unsigned i = 0; i < library_info->libraryCount; i++) {
+         TU_FROM_HANDLE(tu_pipeline, library, library_info->pLibraries[i]);
+         builder->libraries[i] = library;
+      }
+   }
+
+   /* Merge in the state from libraries. The program state is a bit special
+    * and is handled separately.
+    */
+   pipeline->state = builder->state;
+   for (unsigned i = 0; i < builder->num_libraries; i++) {
+      struct tu_pipeline *library = builder->libraries[i];
+      pipeline->state |= library->state;
+
+      uint32_t library_dynamic_state = 0;
+      if (library->state &
+          VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) {
+         pipeline->vi = library->vi;
+         pipeline->ia = library->ia;
+         library_dynamic_state |=
+            BIT(TU_DYNAMIC_STATE_VERTEX_INPUT) |
+            BIT(TU_DYNAMIC_STATE_VB_STRIDE) |
+            BIT(TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY) |
+            BIT(TU_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE);
+         pipeline->shared_consts = library->shared_consts;
+      }
+
+      if (library->state &
+          VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
+         pipeline->tess = library->tess;
+         pipeline->rast = library->rast;
+         pipeline->viewport = library->viewport;
+         library_dynamic_state |=
+            BIT(VK_DYNAMIC_STATE_VIEWPORT) |
+            BIT(VK_DYNAMIC_STATE_SCISSOR) |
+            BIT(VK_DYNAMIC_STATE_LINE_WIDTH) |
+            BIT(VK_DYNAMIC_STATE_DEPTH_BIAS) |
+            BIT(TU_DYNAMIC_STATE_RASTERIZER_DISCARD);
+      }
+
+      if (library->state &
+          VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
+         pipeline->ds = library->ds;
+         pipeline->lrz.fs = library->lrz.fs;
+         pipeline->lrz.force_disable_mask |= library->lrz.force_disable_mask;
+         pipeline->lrz.force_late_z |= library->lrz.force_late_z;
+         library_dynamic_state |=
+            BIT(VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK) |
+            BIT(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK) |
+            BIT(VK_DYNAMIC_STATE_STENCIL_REFERENCE) |
+            BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL) |
+            BIT(TU_DYNAMIC_STATE_RB_STENCIL_CNTL) |
+            BIT(VK_DYNAMIC_STATE_DEPTH_BOUNDS);
+         pipeline->shared_consts = library->shared_consts;
+      }
+
+      if (library->state &
+          VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) {
+         pipeline->blend = library->blend;
+         pipeline->output = library->output;
+         pipeline->lrz.force_disable_mask |= library->lrz.force_disable_mask;
+         pipeline->lrz.force_late_z |= library->lrz.force_late_z;
+         pipeline->prim_order = library->prim_order;
+         library_dynamic_state |=
+            BIT(VK_DYNAMIC_STATE_BLEND_CONSTANTS) |
+            BIT(TU_DYNAMIC_STATE_SAMPLE_LOCATIONS) |
+            BIT(TU_DYNAMIC_STATE_BLEND) |
+            BIT(TU_DYNAMIC_STATE_LOGIC_OP) |
+            BIT(TU_DYNAMIC_STATE_COLOR_WRITE_ENABLE);
+      }
+
+      if ((library->state &
+           VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) &&
+          (library->state &
+           VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) {
+         pipeline->prim_order = library->prim_order;
+      }
+
+      pipeline->dynamic_state_mask =
+         (pipeline->dynamic_state_mask & ~library_dynamic_state) |
+         (library->dynamic_state_mask & library_dynamic_state);
+
+      u_foreach_bit (i, library_dynamic_state & ~library->dynamic_state_mask) {
+         if (i >= TU_DYNAMIC_STATE_COUNT)
+            break;
+
+         pipeline->dynamic_state[i] = library->dynamic_state[i];
+      }
+
+      if (contains_all_shader_state(library->state)) {
+         pipeline->program = library->program;
+         pipeline->load_state = library->load_state;
+      }
+   }
+}
+
+static void
+tu_pipeline_builder_parse_layout(struct tu_pipeline_builder *builder,
+                                 struct tu_pipeline *pipeline)
+{
+   TU_FROM_HANDLE(tu_pipeline_layout, layout, builder->create_info->layout);
+
+   if (layout) {
+      /* Note: it's still valid to have a layout even if there are libraries.
+       * This allows the app to e.g. overwrite an INDEPENDENT_SET layout with
+       * a non-INDEPENDENT_SET layout which may make us use a faster path,
+       * currently this just affects dynamic offset descriptors.
+       */
+      builder->layout = *layout;
+   } else {
+      for (unsigned i = 0; i < builder->num_libraries; i++) {
+         struct tu_pipeline *library = builder->libraries[i];
+         builder->layout.num_sets = MAX2(builder->layout.num_sets,
+                                         library->num_sets);
+         for (unsigned j = 0; j < library->num_sets; j++) {
+            if (library->layouts[i])
+               builder->layout.set[i].layout = library->layouts[i];
+         }
+
+         builder->layout.push_constant_size = pipeline->push_constant_size;
+         builder->layout.independent_sets |= pipeline->independent_sets;
+      }
+
+      tu_pipeline_layout_init(&builder->layout);
+   }
+
+   if (builder->create_info->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) {
+      pipeline->num_sets = builder->layout.num_sets;
+      for (unsigned i = 0; i < pipeline->num_sets; i++) {
+         pipeline->layouts[i] = builder->layout.set[i].layout;
+         if (pipeline->layouts[i])
+            vk_descriptor_set_layout_ref(&pipeline->layouts[i]->vk);
+      }
+      pipeline->push_constant_size = builder->layout.push_constant_size;
+      pipeline->independent_sets = builder->layout.independent_sets;
+   }
+}
+
 static void
 tu_pipeline_set_linkage(struct tu_program_descriptor_linkage *link,
                         struct tu_const_state *const_state,
@@ -3266,13 +3842,13 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder,
    tu6_emit_program(&prog_cs, builder, true, pipeline);
    pipeline->program.binning_state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);
 
-   for (unsigned i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) {
-      if (!builder->shaders->variants[i])
+   for (unsigned i = 0; i < ARRAY_SIZE(builder->variants); i++) {
+      if (!builder->variants[i])
          continue;
 
       tu_pipeline_set_linkage(&pipeline->program.link[i],
-                              &builder->shaders->const_state[i],
-                              builder->shaders->variants[i]);
+                              &builder->const_state[i],
+                              builder->variants[i]);
    }
 }
 
@@ -3377,8 +3953,8 @@ static void
 tu_pipeline_builder_parse_tessellation(struct tu_pipeline_builder *builder,
                                        struct tu_pipeline *pipeline)
 {
-   if (!(pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ||
-       !(pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))
+   if (!(builder->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ||
+       !(builder->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))
       return;
 
    const VkPipelineTessellationStateCreateInfo *tess_info =
@@ -3390,7 +3966,7 @@ tu_pipeline_builder_parse_tessellation(struct tu_pipeline_builder *builder,
          vk_find_struct_const(tess_info->pNext, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO);
    pipeline->tess.upper_left_domain_origin = !domain_info ||
          domain_info->domainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT;
-   const struct ir3_shader_variant *hs = builder->shaders->variants[MESA_SHADER_TESS_CTRL];
+   const struct ir3_shader_variant *hs = builder->variants[MESA_SHADER_TESS_CTRL];
    pipeline->tess.param_stride = hs->output_size * 4;
 }
 
@@ -3516,6 +4092,8 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder,
       vk_find_struct_const(rast_info->pNext, PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT);
    pipeline->rast.provoking_vertex_last = provoking_vtx_state &&
       provoking_vtx_state->provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
+
+   pipeline->rast.multiview_mask = builder->multiview_mask;
 }
 
 static void
@@ -3532,13 +4110,12 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder,
     */
    const VkPipelineDepthStencilStateCreateInfo *ds_info =
       builder->create_info->pDepthStencilState;
-   const enum pipe_format pipe_format =
-      vk_format_to_pipe_format(builder->depth_attachment_format);
    uint32_t rb_depth_cntl = 0, rb_stencil_cntl = 0;
    struct tu_cs cs;
 
-   if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED &&
-       builder->depth_attachment_format != VK_FORMAT_S8_UINT) {
+   if (!builder->attachment_state_valid ||
+       (builder->depth_attachment_format != VK_FORMAT_UNDEFINED &&
+        builder->depth_attachment_format != VK_FORMAT_S8_UINT)) {
       if (ds_info->depthTestEnable) {
          rb_depth_cntl |=
             A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE |
@@ -3557,19 +4134,10 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder,
 
       if (ds_info->depthBoundsTestEnable && !ds_info->depthTestEnable)
          tu6_apply_depth_bounds_workaround(builder->device, &rb_depth_cntl);
-
-      pipeline->output.depth_cpp_per_sample = util_format_get_component_bits(
-            pipe_format, UTIL_FORMAT_COLORSPACE_ZS, 0) / 8;
-   } else {
-      /* if RB_DEPTH_CNTL is set dynamically, we need to make sure it is set
-       * to 0 when this pipeline is used, as enabling depth test when there
-       * is no depth attachment is a problem (at least for the S8_UINT case)
-       */
-      if (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL))
-         pipeline->output.rb_depth_cntl_disable = true;
    }
 
-   if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED) {
+   if (!builder->attachment_state_valid ||
+       builder->depth_attachment_format != VK_FORMAT_UNDEFINED) {
       const VkStencilOpState *front = &ds_info->front;
       const VkStencilOpState *back = &ds_info->back;
 
@@ -3590,9 +4158,6 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder,
             A6XX_RB_STENCIL_CONTROL_STENCIL_READ;
       }
 
-      pipeline->output.stencil_cpp_per_sample = util_format_get_component_bits(
-            pipe_format, UTIL_FORMAT_COLORSPACE_ZS, 1) / 8;
-
       pipeline->ds.raster_order_attachment_access =
          ds_info->flags &
          (VK_PIPELINE_DEPTH_STENCIL_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_DEPTH_ACCESS_BIT_ARM |
@@ -3615,7 +4180,8 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder,
    pipeline->ds.rb_stencil_cntl = rb_stencil_cntl;
 
    /* the remaining draw states arent used if there is no d/s, leave them empty */
-   if (builder->depth_attachment_format == VK_FORMAT_UNDEFINED)
+   if (builder->depth_attachment_format == VK_FORMAT_UNDEFINED &&
+       builder->attachment_state_valid)
       return;
 
    if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BOUNDS, 3)) {
@@ -3640,8 +4206,8 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder,
                                               .bfref = ds_info->back.reference & 0xff));
    }
 
-   if (builder->shaders->variants[MESA_SHADER_FRAGMENT]) {
-      const struct ir3_shader_variant *fs = builder->shaders->variants[MESA_SHADER_FRAGMENT];
+   if (builder->variants[MESA_SHADER_FRAGMENT]) {
+      const struct ir3_shader_variant *fs = builder->variants[MESA_SHADER_FRAGMENT];
       if (fs->has_kill) {
          pipeline->lrz.force_disable_mask |= TU_LRZ_FORCE_DISABLE_WRITE;
       }
@@ -3651,6 +4217,26 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder,
    }
 }
 
+static void
+tu_pipeline_builder_parse_ds_disable(struct tu_pipeline_builder *builder,
+                                     struct tu_pipeline *pipeline)
+{
+   if (builder->rasterizer_discard)
+      return;
+
+   /* If RB_DEPTH_CNTL is static state, then we can disable it ahead of time.
+    * However we only know whether RB_DEPTH_CNTL is dynamic in the fragment
+    * shader state and we only know whether it needs to be force-disabled in
+    * the output interface state.
+    */
+   struct tu_cs cs;
+   if (pipeline->output.rb_depth_cntl_disable &&
+       tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RB_DEPTH_CNTL, 2)) {
+      tu_cs_emit_pkt4(&cs, REG_A6XX_RB_DEPTH_CNTL, 1);
+      tu_cs_emit(&cs, 0);
+   }
+}
+
 static void
 tu_pipeline_builder_parse_multisample_and_color_blend(
    struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline)
@@ -3704,6 +4290,27 @@ tu_pipeline_builder_parse_multisample_and_color_blend(
          VK_PIPELINE_COLOR_BLEND_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_BIT_ARM;
    }
 
+   const enum pipe_format ds_pipe_format =
+      vk_format_to_pipe_format(builder->depth_attachment_format);
+
+   if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED &&
+       builder->depth_attachment_format != VK_FORMAT_S8_UINT) {
+      pipeline->output.depth_cpp_per_sample = util_format_get_component_bits(
+            ds_pipe_format, UTIL_FORMAT_COLORSPACE_ZS, 0) / 8;
+   } else {
+      /* We need to make sure RB_DEPTH_CNTL is set to 0 when this pipeline is
+       * used, regardless of whether it's linked with a fragment shader
+       * pipeline that has an enabled depth test or if RB_DEPTH_CNTL is set
+       * dynamically.
+       */
+      pipeline->output.rb_depth_cntl_disable = true;
+   }
+
+   if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED) {
+      pipeline->output.stencil_cpp_per_sample = util_format_get_component_bits(
+            ds_pipe_format, UTIL_FORMAT_COLORSPACE_ZS, 1) / 8;
+   }
+
    struct tu_cs cs;
    tu6_emit_rb_mrt_controls(pipeline, blend_info,
                             builder->color_attachment_formats,
@@ -3848,9 +4455,21 @@ tu_pipeline_finish(struct tu_pipeline *pipeline,
    if (pipeline->pvtmem_bo)
       tu_bo_finish(dev, pipeline->pvtmem_bo);
 
+   if (pipeline->compiled_shaders)
+      vk_pipeline_cache_object_unref(&pipeline->compiled_shaders->base);
+
+   if (pipeline->nir_shaders)
+      vk_pipeline_cache_object_unref(&pipeline->nir_shaders->base);
+
+   for (unsigned i = 0; i < pipeline->num_sets; i++) {
+      if (pipeline->layouts[i])
+         vk_descriptor_set_layout_unref(&dev->vk, &pipeline->layouts[i]->vk);
+   }
+
    ralloc_free(pipeline->executables_mem_ctx);
 }
 
+
 static VkResult
 tu_pipeline_builder_build(struct tu_pipeline_builder *builder,
                           struct tu_pipeline **pipeline)
@@ -3865,66 +4484,118 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder,
    (*pipeline)->executables_mem_ctx = ralloc_context(NULL);
    util_dynarray_init(&(*pipeline)->executables, (*pipeline)->executables_mem_ctx);
 
-   /* compile and upload shaders */
-   result = tu_pipeline_builder_compile_shaders(builder, *pipeline);
-   if (result != VK_SUCCESS) {
-      vk_object_free(&builder->device->vk, builder->alloc, *pipeline);
-      return result;
+   tu_pipeline_builder_parse_dynamic(builder, *pipeline);
+   tu_pipeline_builder_parse_libraries(builder, *pipeline);
+
+   VkShaderStageFlags stages = 0;
+   for (unsigned i = 0; i < builder->create_info->stageCount; i++) {
+      stages |= builder->create_info->pStages[i].stage;
    }
+   builder->active_stages = stages;
 
-   result = tu_pipeline_allocate_cs(builder->device, *pipeline,
-                                    builder->layout, builder, NULL);
-   if (result != VK_SUCCESS) {
-      vk_object_free(&builder->device->vk, builder->alloc, *pipeline);
-      return result;
-   }
+   (*pipeline)->active_stages = stages;
+   for (unsigned i = 0; i < builder->num_libraries; i++)
+      (*pipeline)->active_stages |= builder->libraries[i]->active_stages;
 
-   for (uint32_t i = 0; i < ARRAY_SIZE(builder->shader_iova); i++)
-      builder->shader_iova[i] =
-         tu_upload_variant(*pipeline, builder->shaders->variants[i]);
+   /* Compile and upload shaders unless a library has already done that. */
+   if ((*pipeline)->program.state.size == 0) {
+      tu_pipeline_builder_parse_layout(builder, *pipeline);
 
-   builder->binning_vs_iova =
-      tu_upload_variant(*pipeline, builder->binning_variant);
-
-   /* Setup private memory. Note that because we're sharing the same private
-    * memory for all stages, all stages must use the same config, or else
-    * fibers from one stage might overwrite fibers in another.
-    */
-
-   uint32_t pvtmem_size = 0;
-   bool per_wave = true;
-   for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) {
-      if (builder->shaders->variants[i]) {
-         pvtmem_size = MAX2(pvtmem_size, builder->shaders->variants[i]->pvtmem_size);
-         if (!builder->shaders->variants[i]->pvtmem_per_wave)
-            per_wave = false;
+      result = tu_pipeline_builder_compile_shaders(builder, *pipeline);
+      if (result != VK_SUCCESS) {
+         vk_object_free(&builder->device->vk, builder->alloc, *pipeline);
+         return result;
       }
    }
 
-   if (builder->binning_variant) {
-      pvtmem_size = MAX2(pvtmem_size, builder->binning_variant->pvtmem_size);
-      if (!builder->binning_variant->pvtmem_per_wave)
-         per_wave = false;
+   result = tu_pipeline_allocate_cs(builder->device, *pipeline,
+                                    &builder->layout, builder, NULL);
+
+
+   /* This has to come before emitting the program so that
+    * pipeline->tess.patch_control_points and pipeline->rast.multiview_mask
+    * are always set.
+    */
+   if (builder->state &
+       VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
+      tu_pipeline_builder_parse_tessellation(builder, *pipeline);
+      (*pipeline)->rast.multiview_mask = builder->multiview_mask;
    }
 
-   result = tu_setup_pvtmem(builder->device, *pipeline, &builder->pvtmem,
-                            pvtmem_size, per_wave);
-   if (result != VK_SUCCESS) {
-      vk_object_free(&builder->device->vk, builder->alloc, *pipeline);
-      return result;
+   if (set_combined_state(builder, *pipeline,
+                          VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
+                          VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) {
+      if (result != VK_SUCCESS) {
+         vk_object_free(&builder->device->vk, builder->alloc, *pipeline);
+         return result;
+      }
+
+      for (uint32_t i = 0; i < ARRAY_SIZE(builder->shader_iova); i++)
+         builder->shader_iova[i] =
+            tu_upload_variant(*pipeline, builder->variants[i]);
+
+      builder->binning_vs_iova =
+         tu_upload_variant(*pipeline, builder->binning_variant);
+
+      /* Setup private memory. Note that because we're sharing the same private
+       * memory for all stages, all stages must use the same config, or else
+       * fibers from one stage might overwrite fibers in another.
+       */
+
+      uint32_t pvtmem_size = 0;
+      bool per_wave = true;
+      for (uint32_t i = 0; i < ARRAY_SIZE(builder->variants); i++) {
+         if (builder->variants[i]) {
+            pvtmem_size = MAX2(pvtmem_size, builder->variants[i]->pvtmem_size);
+            if (!builder->variants[i]->pvtmem_per_wave)
+               per_wave = false;
+         }
+      }
+
+      if (builder->binning_variant) {
+         pvtmem_size = MAX2(pvtmem_size, builder->binning_variant->pvtmem_size);
+         if (!builder->binning_variant->pvtmem_per_wave)
+            per_wave = false;
+      }
+
+      result = tu_setup_pvtmem(builder->device, *pipeline, &builder->pvtmem,
+                               pvtmem_size, per_wave);
+      if (result != VK_SUCCESS) {
+         vk_object_free(&builder->device->vk, builder->alloc, *pipeline);
+         return result;
+      }
+
+      tu_pipeline_builder_parse_shader_stages(builder, *pipeline);
+      tu6_emit_load_state(*pipeline, &builder->layout);
    }
 
-   tu_pipeline_builder_parse_dynamic(builder, *pipeline);
-   tu_pipeline_builder_parse_shader_stages(builder, *pipeline);
-   tu_pipeline_builder_parse_vertex_input(builder, *pipeline);
-   tu_pipeline_builder_parse_input_assembly(builder, *pipeline);
-   tu_pipeline_builder_parse_tessellation(builder, *pipeline);
-   tu_pipeline_builder_parse_viewport(builder, *pipeline);
-   tu_pipeline_builder_parse_rasterization(builder, *pipeline);
-   tu_pipeline_builder_parse_depth_stencil(builder, *pipeline);
-   tu_pipeline_builder_parse_multisample_and_color_blend(builder, *pipeline);
-   tu_pipeline_builder_parse_rasterization_order(builder, *pipeline);
-   tu6_emit_load_state(*pipeline, builder->layout);
+   if (builder->state &
+       VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) {
+      tu_pipeline_builder_parse_vertex_input(builder, *pipeline);
+      tu_pipeline_builder_parse_input_assembly(builder, *pipeline);
+   }
+
+   if (builder->state &
+       VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
+      tu_pipeline_builder_parse_viewport(builder, *pipeline);
+      tu_pipeline_builder_parse_rasterization(builder, *pipeline);
+   }
+
+   if (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
+      tu_pipeline_builder_parse_depth_stencil(builder, *pipeline);
+   }
+
+   if (builder->state &
+       VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) {
+      tu_pipeline_builder_parse_multisample_and_color_blend(builder, *pipeline);
+   }
+
+   if (set_combined_state(builder, *pipeline,
+                          VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |
+                          VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) {
+      tu_pipeline_builder_parse_rasterization_order(builder, *pipeline);
+      tu_pipeline_builder_parse_ds_disable(builder, *pipeline);
+   }
 
    return VK_SUCCESS;
 }
@@ -3932,8 +4603,8 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder,
 static void
 tu_pipeline_builder_finish(struct tu_pipeline_builder *builder)
 {
-   if (builder->shaders)
-      vk_pipeline_cache_object_unref(&builder->shaders->base);
+   if (builder->compiled_shaders)
+      vk_pipeline_cache_object_unref(&builder->compiled_shaders->base);
    ralloc_free(builder->mem_ctx);
 }
 
@@ -3945,17 +4616,49 @@ tu_pipeline_builder_init_graphics(
    const VkGraphicsPipelineCreateInfo *create_info,
    const VkAllocationCallbacks *alloc)
 {
-   TU_FROM_HANDLE(tu_pipeline_layout, layout, create_info->layout);
-
    *builder = (struct tu_pipeline_builder) {
       .device = dev,
       .mem_ctx = ralloc_context(NULL),
       .cache = cache,
       .create_info = create_info,
       .alloc = alloc,
-      .layout = layout,
    };
 
+   const VkGraphicsPipelineLibraryCreateInfoEXT *gpl_info =
+      vk_find_struct_const(builder->create_info->pNext, 
+                           GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT);
+
+   const VkPipelineLibraryCreateInfoKHR *library_info =
+      vk_find_struct_const(builder->create_info->pNext, 
+                           PIPELINE_LIBRARY_CREATE_INFO_KHR);
+
+   if (gpl_info) {
+      builder->state = gpl_info->flags;
+   } else {
+      /* Implement this bit of spec text:
+       *
+       *    If this structure is omitted, and either
+       *    VkGraphicsPipelineCreateInfo::flags includes
+       *    VK_PIPELINE_CREATE_LIBRARY_BIT_KHR or the
+       *    VkGraphicsPipelineCreateInfo::pNext chain includes a
+       *    VkPipelineLibraryCreateInfoKHR structure with a libraryCount
+       *    greater than 0, it is as if flags is 0. Otherwise if this
+       *    structure is omitted, it is as if flags includes all possible
+       *    subsets of the graphics pipeline (i.e. a complete graphics
+       *    pipeline).
+       */
+      if ((library_info && library_info->libraryCount > 0) ||
+          (builder->create_info->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)) {
+         builder->state = 0;
+      } else {
+         builder->state =
+            VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT |
+            VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
+            VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |
+            VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT;
+      }
+   }
+
    bool rasterizer_discard_dynamic = false;
    if (create_info->pDynamicState) {
       for (uint32_t i = 0; i < create_info->pDynamicState->dynamicStateCount; i++) {
@@ -3968,76 +4671,120 @@ tu_pipeline_builder_init_graphics(
    }
 
    builder->rasterizer_discard =
+      (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) &&
       builder->create_info->pRasterizationState->rasterizerDiscardEnable &&
       !rasterizer_discard_dynamic;
 
-   const VkPipelineRenderingCreateInfo *rendering_info =
-      vk_find_struct_const(create_info->pNext, PIPELINE_RENDERING_CREATE_INFO);
+   if (builder->state &
+       (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
+        VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |
+        VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) {
+      const VkPipelineRenderingCreateInfo *rendering_info =
+         vk_find_struct_const(create_info->pNext, PIPELINE_RENDERING_CREATE_INFO);
 
-   if (unlikely(dev->instance->debug_flags & TU_DEBUG_DYNAMIC) && !rendering_info)
-      rendering_info = vk_get_pipeline_rendering_create_info(create_info);
+      if (unlikely(dev->instance->debug_flags & TU_DEBUG_DYNAMIC) && !rendering_info)
+         rendering_info = vk_get_pipeline_rendering_create_info(create_info);
 
-   if (rendering_info) {
-      builder->subpass_raster_order_attachment_access = false;
-      builder->subpass_feedback_loop_ds = false;
-      builder->subpass_feedback_loop_color = false;
-
-      builder->multiview_mask = rendering_info->viewMask;
-
-      const VkRenderingSelfDependencyInfoMESA *self_dependency =
-         vk_find_struct_const(rendering_info->pNext, RENDERING_SELF_DEPENDENCY_INFO_MESA);
-
-      if (self_dependency) {
-         builder->subpass_feedback_loop_ds =
-            self_dependency->depthSelfDependency ||
-            self_dependency->stencilSelfDependency;
-         builder->subpass_feedback_loop_color =
-            self_dependency->colorSelfDependencies;
+      /* Get multiview_mask, which is only used for shaders */
+      if (builder->state &
+          (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
+           VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) {
+         if (rendering_info) {
+            builder->multiview_mask = rendering_info->viewMask;
+         } else {
+            const struct tu_render_pass *pass =
+               tu_render_pass_from_handle(create_info->renderPass);
+            const struct tu_subpass *subpass =
+               &pass->subpasses[create_info->subpass];
+            builder->multiview_mask = subpass->multiview_mask;
+         }
       }
 
-      if (!builder->rasterizer_discard) {
-         builder->depth_attachment_format =
-            rendering_info->depthAttachmentFormat == VK_FORMAT_UNDEFINED ?
-            rendering_info->stencilAttachmentFormat :
-            rendering_info->depthAttachmentFormat;
+      /* Get the attachment state. This is valid:
+       *
+       * - With classic renderpasses, when either fragment shader or fragment
+       *   output interface state is being compiled. This includes when we
+       *   emulate classic renderpasses with dynamic rendering with the debug
+       *   flag.
+       * - With dynamic rendering (renderPass is NULL) only when compiling the
+       *   output interface state.
+       *
+       * We only actually need this for the fragment output interface state,
+       * but the spec also requires us to skip parsing depth/stencil state
+       * when the attachment state is defined *and* no depth/stencil
+       * attachment is not used, so we have to parse it for fragment shader
+       * state when possible. Life is pain.
+       */
+      if (((builder->state &
+            VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) ||
+           ((builder->state &
+            VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) &&
+            builder->create_info->renderPass)) &&
+          rendering_info) {
+         builder->subpass_raster_order_attachment_access = false;
+         builder->subpass_feedback_loop_ds = false;
+         builder->subpass_feedback_loop_color = false;
 
-         for (unsigned i = 0; i < rendering_info->colorAttachmentCount; i++) {
-            builder->color_attachment_formats[i] =
-               rendering_info->pColorAttachmentFormats[i];
-            if (builder->color_attachment_formats[i] != VK_FORMAT_UNDEFINED) {
+         const VkRenderingSelfDependencyInfoMESA *self_dependency =
+            vk_find_struct_const(rendering_info->pNext, RENDERING_SELF_DEPENDENCY_INFO_MESA);
+
+         if (self_dependency) {
+            builder->subpass_feedback_loop_ds =
+               self_dependency->depthSelfDependency ||
+               self_dependency->stencilSelfDependency;
+            builder->subpass_feedback_loop_color =
+               self_dependency->colorSelfDependencies;
+         }
+
+         if (!builder->rasterizer_discard) {
+            builder->depth_attachment_format =
+               rendering_info->depthAttachmentFormat == VK_FORMAT_UNDEFINED ?
+               rendering_info->stencilAttachmentFormat :
+               rendering_info->depthAttachmentFormat;
+
+            for (unsigned i = 0; i < rendering_info->colorAttachmentCount; i++) {
+               builder->color_attachment_formats[i] =
+                  rendering_info->pColorAttachmentFormats[i];
+               if (builder->color_attachment_formats[i] != VK_FORMAT_UNDEFINED) {
+                  builder->use_color_attachments = true;
+               }
+            }
+         }
+
+         builder->attachment_state_valid = true;
+      } else if ((builder->state &
+                  (VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |
+                   VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)) &&
+                 create_info->renderPass != VK_NULL_HANDLE) {
+         const struct tu_render_pass *pass =
+            tu_render_pass_from_handle(create_info->renderPass);
+         const struct tu_subpass *subpass =
+            &pass->subpasses[create_info->subpass];
+
+         builder->subpass_raster_order_attachment_access =
+            subpass->raster_order_attachment_access;
+         builder->subpass_feedback_loop_color = subpass->feedback_loop_color;
+         builder->subpass_feedback_loop_ds = subpass->feedback_loop_ds;
+
+         if (!builder->rasterizer_discard) {
+            const uint32_t a = subpass->depth_stencil_attachment.attachment;
+            builder->depth_attachment_format = (a != VK_ATTACHMENT_UNUSED) ?
+               pass->attachments[a].format : VK_FORMAT_UNDEFINED;
+
+            assert(subpass->color_count == 0 ||
+                   !create_info->pColorBlendState ||
+                   subpass->color_count == create_info->pColorBlendState->attachmentCount);
+            for (uint32_t i = 0; i < subpass->color_count; i++) {
+               const uint32_t a = subpass->color_attachments[i].attachment;
+               if (a == VK_ATTACHMENT_UNUSED)
+                  continue;
+
+               builder->color_attachment_formats[i] = pass->attachments[a].format;
                builder->use_color_attachments = true;
             }
          }
-      }
-   } else {
-      const struct tu_render_pass *pass =
-         tu_render_pass_from_handle(create_info->renderPass);
-      const struct tu_subpass *subpass =
-         &pass->subpasses[create_info->subpass];
 
-      builder->subpass_raster_order_attachment_access =
-         subpass->raster_order_attachment_access;
-      builder->subpass_feedback_loop_color = subpass->feedback_loop_color;
-      builder->subpass_feedback_loop_ds = subpass->feedback_loop_ds;
-
-      builder->multiview_mask = subpass->multiview_mask;
-
-      if (!builder->rasterizer_discard) {
-         const uint32_t a = subpass->depth_stencil_attachment.attachment;
-         builder->depth_attachment_format = (a != VK_ATTACHMENT_UNUSED) ?
-            pass->attachments[a].format : VK_FORMAT_UNDEFINED;
-
-         assert(subpass->color_count == 0 ||
-                !create_info->pColorBlendState ||
-                subpass->color_count == create_info->pColorBlendState->attachmentCount);
-         for (uint32_t i = 0; i < subpass->color_count; i++) {
-            const uint32_t a = subpass->color_attachments[i].attachment;
-            if (a == VK_ATTACHMENT_UNUSED)
-               continue;
-
-            builder->color_attachment_formats[i] = pass->attachments[a].format;
-            builder->use_color_attachments = true;
-         }
+         builder->attachment_state_valid = true;
       }
    }
 
diff --git a/src/freedreno/vulkan/tu_pipeline.h b/src/freedreno/vulkan/tu_pipeline.h
index a06599fadf5..6b26bd2a9fc 100644
--- a/src/freedreno/vulkan/tu_pipeline.h
+++ b/src/freedreno/vulkan/tu_pipeline.h
@@ -60,9 +60,22 @@ struct tu_compiled_shaders
    uint8_t active_desc_sets;
 
    struct ir3_shader_variant *variants[MESA_SHADER_STAGES];
+
+   struct ir3_shader_variant *safe_const_variants[MESA_SHADER_STAGES];
+};
+
+struct tu_nir_shaders
+{
+   struct vk_pipeline_cache_object base;
+
+   /* This is optional, and is only filled out when a library pipeline is
+    * compiled with RETAIN_LINK_TIME_OPTIMIZATION_INFO.
+    */
+   nir_shader *nir[MESA_SHADER_STAGES];
 };
 
 extern const struct vk_pipeline_cache_object_ops tu_shaders_ops;
+extern const struct vk_pipeline_cache_object_ops tu_nir_shaders_ops;
 
 static bool inline
 tu6_shared_constants_enable(const struct tu_pipeline_layout *layout,
@@ -111,6 +124,8 @@ struct tu_pipeline
    uint32_t dynamic_state_mask;
    struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
 
+   VkGraphicsPipelineLibraryFlagsEXT state;
+
    /* for dynamic states which use the same register: */
    struct {
       uint32_t gras_su_cntl, gras_su_cntl_mask;
@@ -119,6 +134,8 @@ struct tu_pipeline
       enum a5xx_line_mode line_mode;
       bool provoking_vertex_last;
 
+      uint32_t multiview_mask;
+
       struct tu_draw_state state;
    } rast;
 
@@ -209,6 +226,31 @@ struct tu_pipeline
       bool z_negative_one_to_one;
    } viewport;
 
+   /* Used only for libraries. compiled_shaders only contains variants compiled
+    * by this pipeline, and it owns them, so when it is freed they disappear.
+    * Similarly, nir_shaders owns the link-time NIR. shaders points to the
+    * shaders from this pipeline and all libraries included in it, for
+    * convenience.
+    */
+   struct tu_compiled_shaders *compiled_shaders;
+   struct tu_nir_shaders *nir_shaders;
+   struct {
+      nir_shader *nir;
+      struct tu_shader_key key;
+      struct tu_const_state const_state;
+      struct ir3_shader_variant *variant, *safe_const_variant;
+   } shaders[MESA_SHADER_FRAGMENT + 1];
+
+   struct ir3_shader_key ir3_key;
+
+   /* Used for libraries, to stitch together an overall layout for the final
+    * pipeline.
+    */
+   struct tu_descriptor_set_layout *layouts[MAX_SETS];
+   unsigned num_sets;
+   unsigned push_constant_size;
+   bool independent_sets;
+
    void *executables_mem_ctx;
    /* tu_pipeline_executable */
    struct util_dynarray executables;
diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c
index b8e0f28c81a..4f9d847e230 100644
--- a/src/freedreno/vulkan/tu_shader.c
+++ b/src/freedreno/vulkan/tu_shader.c
@@ -176,19 +176,32 @@ lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr,
    struct tu_descriptor_set_layout *set_layout = layout->set[set].layout;
    struct tu_descriptor_set_binding_layout *binding_layout =
       &set_layout->binding[binding];
-   uint32_t base;
+   nir_ssa_def *base;
 
    shader->active_desc_sets |= 1u << set;
 
    switch (binding_layout->type) {
    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-      base = (layout->set[set].dynamic_offset_start +
-         binding_layout->dynamic_offset_offset) / (4 * A6XX_TEX_CONST_DWORDS);
+      if (layout->independent_sets) {
+         /* With independent sets, we don't know
+          * layout->set[set].dynamic_offset_start until after link time which
+          * with fast linking means after the shader is compiled. We have to
+          * get it from the const file instead.
+          */
+         base = nir_imm_int(b, binding_layout->dynamic_offset_offset / (4 * A6XX_TEX_CONST_DWORDS));
+         nir_ssa_def *dynamic_offset_start =
+            nir_load_uniform(b, 1, 32, nir_imm_int(b, 0),
+                             .base = shader->const_state.dynamic_offset_loc + set);
+         base = nir_iadd(b, base, dynamic_offset_start);
+      } else {
+         base = nir_imm_int(b, (layout->set[set].dynamic_offset_start +
+            binding_layout->dynamic_offset_offset) / (4 * A6XX_TEX_CONST_DWORDS));
+      }
       set = MAX_SETS;
       break;
    default:
-      base = binding_layout->offset / (4 * A6XX_TEX_CONST_DWORDS);
+      base = nir_imm_int(b, binding_layout->offset / (4 * A6XX_TEX_CONST_DWORDS));
       break;
    }
 
@@ -204,7 +217,7 @@ lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr,
    }
 
    nir_ssa_def *def = nir_vec3(b, nir_imm_int(b, set),
-                               nir_iadd(b, nir_imm_int(b, base),
+                               nir_iadd(b, base,
                                         nir_ishl(b, vulkan_idx, shift)),
                                shift);
 
@@ -658,6 +671,13 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev,
       align(DIV_ROUND_UP(const_state->push_consts.dwords, 4),
             dev->compiler->const_upload_unit);
 
+   if (layout->independent_sets) {
+      const_state->dynamic_offset_loc = reserved_consts_vec4 * 4;
+      reserved_consts_vec4 += DIV_ROUND_UP(MAX_SETS, 4);
+   } else {
+      const_state->dynamic_offset_loc = UINT32_MAX;
+   }
+
    tu_shader->reserved_user_consts_vec4 = reserved_consts_vec4;
 
    struct lower_instr_params params = {
diff --git a/src/freedreno/vulkan/tu_shader.h b/src/freedreno/vulkan/tu_shader.h
index 6f529cde058..69e0a20683b 100644
--- a/src/freedreno/vulkan/tu_shader.h
+++ b/src/freedreno/vulkan/tu_shader.h
@@ -21,6 +21,7 @@ struct tu_push_constant_range
 struct tu_const_state
 {
    struct tu_push_constant_range push_consts;
+   uint32_t dynamic_offset_loc;
 };
 
 struct tu_shader