nvk: Switch to shader objects

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27024>
2025-12-22 22:10:10 +01:00 · 2024-01-11 18:18:54 -06:00 · 2024-01-11 18:18:54 -06:00 · 813b253939
commit 813b253939
parent 4001658c18
12 changed files with 745 additions and 786 deletions
--- a/src/nouveau/vulkan/meson.build
+++ b/src/nouveau/vulkan/meson.build
@ -16,7 +16,6 @@ nvk_files = files(
  'nvk_cmd_pool.c',
  'nvk_cmd_pool.h',
  'nvk_codegen.c',
-  'nvk_compute_pipeline.c',
  'nvk_descriptor_set.h',
  'nvk_descriptor_set.c',
  'nvk_descriptor_set_layout.c',
@ -31,7 +30,6 @@ nvk_files = files(
  'nvk_event.h',
  'nvk_format.c',
  'nvk_format.h',
-  'nvk_graphics_pipeline.c',
  'nvk_heap.c',
  'nvk_heap.h',
  'nvk_image.c',
@ -45,8 +43,6 @@ nvk_files = files(
  'nvk_nir_lower_descriptors.c',
  'nvk_physical_device.c',
  'nvk_physical_device.h',
-  'nvk_pipeline.c',
-  'nvk_pipeline.h',
  'nvk_private.h',
  'nvk_query_pool.c',
  'nvk_query_pool.h',
--- a/src/nouveau/vulkan/nvk_cmd_buffer.c
+++ b/src/nouveau/vulkan/nvk_cmd_buffer.c
@ -13,7 +13,7 @@
 #include "nvk_entrypoints.h"
 #include "nvk_mme.h"
 #include "nvk_physical_device.h"
-#include "nvk_pipeline.h"
+#include "nvk_shader.h"

 #include "vk_pipeline_layout.h"
 #include "vk_synchronization.h"
@ -551,33 +551,27 @@ nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
   nvk_cmd_invalidate_deps(cmd, 1, pDependencyInfo);
 }

-VKAPI_ATTR void VKAPI_CALL
-nvk_CmdBindPipeline(VkCommandBuffer commandBuffer,
-                    VkPipelineBindPoint pipelineBindPoint,
-                    VkPipeline _pipeline)
+void
+nvk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd,
+                     uint32_t stage_count,
+                     const gl_shader_stage *stages,
+                     struct vk_shader ** const shaders)
 {
-   VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
-   VK_FROM_HANDLE(nvk_pipeline, pipeline, _pipeline);
+   struct nvk_cmd_buffer *cmd = container_of(vk_cmd, struct nvk_cmd_buffer, vk);
   struct nvk_device *dev = nvk_cmd_buffer_device(cmd);

-   for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
-      if(!pipeline->shaders[s])
-         continue;
-      if (pipeline->shaders[s]->info.slm_size)
-         nvk_device_ensure_slm(dev, pipeline->shaders[s]->info.slm_size);
-   }
+   for (uint32_t i = 0; i < stage_count; i++) {
+      struct nvk_shader *shader =
+         container_of(shaders[i], struct nvk_shader, vk);

-   switch (pipelineBindPoint) {
-   case VK_PIPELINE_BIND_POINT_GRAPHICS:
-      assert(pipeline->type == NVK_PIPELINE_GRAPHICS);
-      nvk_cmd_bind_graphics_pipeline(cmd, (void *)pipeline);
-      break;
-   case VK_PIPELINE_BIND_POINT_COMPUTE:
-      assert(pipeline->type == NVK_PIPELINE_COMPUTE);
-      nvk_cmd_bind_compute_pipeline(cmd, (void *)pipeline);
-      break;
-   default:
-      unreachable("Unhandled bind point");
+      if (shader != NULL && shader->info.slm_size > 0)
+         nvk_device_ensure_slm(dev, shader->info.slm_size);
+
+      if (stages[i] == MESA_SHADER_COMPUTE ||
+          stages[i] == MESA_SHADER_KERNEL)
+         nvk_cmd_bind_compute_shader(cmd, shader);
+      else
+         nvk_cmd_bind_graphics_shader(cmd, stages[i], shader);
   }
 }

--- a/src/nouveau/vulkan/nvk_cmd_buffer.h
+++ b/src/nouveau/vulkan/nvk_cmd_buffer.h
@ -24,6 +24,7 @@ struct nvk_cmd_pool;
 struct nvk_image_view;
 struct nvk_push_descriptor_set;
 struct nvk_shader;
+struct vk_shader;

 struct nvk_sample_location {
   uint8_t x_u4:4;
@ -102,9 +103,11 @@ struct nvk_rendering_state {

 struct nvk_graphics_state {
   struct nvk_rendering_state render;
-   struct nvk_graphics_pipeline *pipeline;
   struct nvk_descriptor_state descriptors;

+   uint32_t shaders_dirty;
+   struct nvk_shader *shaders[MESA_SHADER_MESH + 1];
+
   /* Used for meta save/restore */
   struct nvk_addr_range vb0;

@ -114,8 +117,8 @@ struct nvk_graphics_state {
 };

 struct nvk_compute_state {
-   struct nvk_compute_pipeline *pipeline;
   struct nvk_descriptor_state descriptors;
+   struct nvk_shader *shader;
 };

 struct nvk_cmd_push {
@ -209,10 +212,17 @@ void nvk_cmd_buffer_begin_compute(struct nvk_cmd_buffer *cmd,
 void nvk_cmd_invalidate_graphics_state(struct nvk_cmd_buffer *cmd);
 void nvk_cmd_invalidate_compute_state(struct nvk_cmd_buffer *cmd);

-void nvk_cmd_bind_graphics_pipeline(struct nvk_cmd_buffer *cmd,
-                                    struct nvk_graphics_pipeline *pipeline);
-void nvk_cmd_bind_compute_pipeline(struct nvk_cmd_buffer *cmd,
-                                   struct nvk_compute_pipeline *pipeline);
+void nvk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd,
+                          uint32_t stage_count,
+                          const gl_shader_stage *stages,
+                          struct vk_shader ** const shaders);
+
+void nvk_cmd_bind_graphics_shader(struct nvk_cmd_buffer *cmd,
+                                  const gl_shader_stage stage,
+                                  struct nvk_shader *shader);
+
+void nvk_cmd_bind_compute_shader(struct nvk_cmd_buffer *cmd,
+                                 struct nvk_shader *shader);

 void nvk_cmd_bind_vertex_buffer(struct nvk_cmd_buffer *cmd, uint32_t vb_idx,
                                struct nvk_addr_range addr_range);
--- a/src/nouveau/vulkan/nvk_cmd_dispatch.c
+++ b/src/nouveau/vulkan/nvk_cmd_dispatch.c
@ -9,7 +9,7 @@
 #include "nvk_entrypoints.h"
 #include "nvk_mme.h"
 #include "nvk_physical_device.h"
-#include "nvk_pipeline.h"
+#include "nvk_shader.h"

 #include "nouveau_context.h"

@ -41,6 +41,11 @@
 #define NVC6C0_QMDV03_00_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC6C0, QMDV03_00, ##a)
 #define NVC6C0_QMDV03_00_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC6C0, QMDV03_00, ##a)

+#define QMD_DEF_SET(qmd, class_id, version_major, version_minor, a...) \
+   NVDEF_MW_SET((qmd), NV##class_id, QMDV##version_major##_##version_minor, ##a)
+#define QMD_VAL_SET(qmd, class_id, version_major, version_minor, a...) \
+   NVVAL_MW_SET((qmd), NV##class_id, QMDV##version_major##_##version_minor, ##a)
+
 VkResult
 nvk_push_dispatch_state_init(struct nvk_device *dev, struct nv_push *p)
 {
@ -97,6 +102,129 @@ nvk_cmd_invalidate_compute_state(struct nvk_cmd_buffer *cmd)
   memset(&cmd->state.cs, 0, sizeof(cmd->state.cs));
 }

+static int
+gv100_sm_config_smem_size(uint32_t size)
+{
+   if      (size > 64 * 1024) size = 96 * 1024;
+   else if (size > 32 * 1024) size = 64 * 1024;
+   else if (size > 16 * 1024) size = 32 * 1024;
+   else if (size >  8 * 1024) size = 16 * 1024;
+   else                       size =  8 * 1024;
+   return (size / 4096) + 1;
+}
+
+#define nvk_qmd_init_base(qmd, shader, class_id, version_major, version_minor)   \
+do {                                                                                                   \
+   QMD_DEF_SET(qmd, class_id, version_major, version_minor, API_VISIBLE_CALL_LIMIT, NO_CHECK);         \
+   QMD_VAL_SET(qmd, class_id, version_major, version_minor, BARRIER_COUNT, shader->info.num_barriers);      \
+   QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION0,                     \
+                                                            shader->info.cs.local_size[0]);                 \
+   QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION1,                     \
+                                                            shader->info.cs.local_size[1]);                 \
+   QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION2,                     \
+                                                            shader->info.cs.local_size[2]);                 \
+   QMD_VAL_SET(qmd, class_id, version_major, version_minor, QMD_MAJOR_VERSION, version_major);         \
+   QMD_VAL_SET(qmd, class_id, version_major, version_minor, QMD_VERSION, version_minor);               \
+   QMD_DEF_SET(qmd, class_id, version_major, version_minor, SAMPLER_INDEX, INDEPENDENTLY);             \
+   QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);         \
+   QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHADER_LOCAL_MEMORY_LOW_SIZE,              \
+                                                            align(shader->info.slm_size, 0x10));            \
+   QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHARED_MEMORY_SIZE,                        \
+                                                            align(shader->info.cs.smem_size, 0x100));       \
+} while (0)
+
+static void
+nva0c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader)
+{
+   nvk_qmd_init_base(qmd, shader, A0C0, 00, 06);
+
+   if (shader->info.cs.smem_size <= (16 << 10))
+      NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB);
+   else if (shader->info.cs.smem_size <= (32 << 10))
+      NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB);
+   else if (shader->info.cs.smem_size <= (48 << 10))
+      NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB);
+   else
+      unreachable("Invalid shared memory size");
+
+   uint64_t addr = shader->hdr_addr;
+   assert(addr < 0xffffffff);
+   NVA0C0_QMDV00_06_VAL_SET(qmd, PROGRAM_OFFSET, addr);
+   NVA0C0_QMDV00_06_VAL_SET(qmd, REGISTER_COUNT, shader->info.num_gprs);
+   NVA0C0_QMDV00_06_VAL_SET(qmd, SASS_VERSION, 0x30);
+}
+
+static void
+nvc0c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader)
+{
+   nvk_qmd_init_base(qmd, shader, C0C0, 02, 01);
+
+   uint64_t addr = shader->hdr_addr;
+   assert(addr < 0xffffffff);
+
+   NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
+   NVC0C0_QMDV02_01_VAL_SET(qmd, PROGRAM_OFFSET, addr);
+   NVC0C0_QMDV02_01_VAL_SET(qmd, REGISTER_COUNT, shader->info.num_gprs);
+}
+
+static void
+nvc3c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader)
+{
+   nvk_qmd_init_base(qmd, shader, C3C0, 02, 02);
+
+   NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
+   /* those are all QMD 2.2+ */
+   NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
+                            gv100_sm_config_smem_size(shader->info.cs.smem_size));
+   NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,
+                            gv100_sm_config_smem_size(NVK_MAX_SHARED_SIZE));
+   NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
+                            gv100_sm_config_smem_size(shader->info.cs.smem_size));
+
+   NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, shader->info.num_gprs);
+
+   uint64_t addr = shader->hdr_addr;
+   NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, addr & 0xffffffff);
+   NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, addr >> 32);
+}
+
+static void
+nvc6c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader)
+{
+   nvk_qmd_init_base(qmd, shader, C6C0, 03, 00);
+
+   NVC6C0_QMDV03_00_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
+   /* those are all QMD 2.2+ */
+   NVC6C0_QMDV03_00_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
+                            gv100_sm_config_smem_size(shader->info.cs.smem_size));
+   NVC6C0_QMDV03_00_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,
+                            gv100_sm_config_smem_size(NVK_MAX_SHARED_SIZE));
+   NVC6C0_QMDV03_00_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
+                            gv100_sm_config_smem_size(shader->info.cs.smem_size));
+
+   NVC6C0_QMDV03_00_VAL_SET(qmd, REGISTER_COUNT_V, shader->info.num_gprs);
+
+   uint64_t addr = shader->hdr_addr;
+   NVC6C0_QMDV03_00_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, addr & 0xffffffff);
+   NVC6C0_QMDV03_00_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, addr >> 32);
+}
+
+static void
+nvk_qmd_init(struct nvk_physical_device *pdev,
+             uint32_t *qmd, const struct nvk_shader *shader)
+{
+   if (pdev->info.cls_compute >= AMPERE_COMPUTE_A)
+      nvc6c0_qmd_init(qmd, shader);
+   else if (pdev->info.cls_compute >= VOLTA_COMPUTE_A)
+      nvc3c0_qmd_init(qmd, shader);
+   else if (pdev->info.cls_compute >= PASCAL_COMPUTE_A)
+      nvc0c0_qmd_init(qmd, shader);
+   else if (pdev->info.cls_compute >= KEPLER_COMPUTE_A)
+      nva0c0_qmd_init(qmd, shader);
+   else
+      unreachable("Unknown GPU generation");
+}
+
 static void
 nva0c0_qmd_set_dispatch_size(UNUSED struct nvk_device *dev, uint32_t *qmd,
                             uint32_t x, uint32_t y, uint32_t z)
@ -171,18 +299,16 @@ nvc6c0_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index,


 void
-nvk_cmd_bind_compute_pipeline(struct nvk_cmd_buffer *cmd,
-                              struct nvk_compute_pipeline *pipeline)
+nvk_cmd_bind_compute_shader(struct nvk_cmd_buffer *cmd,
+                            struct nvk_shader *shader)
 {
-   cmd->state.cs.pipeline = pipeline;
+   cmd->state.cs.shader = shader;
 }

 static uint32_t
 nvk_compute_local_size(struct nvk_cmd_buffer *cmd)
 {
-   const struct nvk_compute_pipeline *pipeline = cmd->state.cs.pipeline;
-   const struct nvk_shader *shader =
-      pipeline->base.shaders[MESA_SHADER_COMPUTE];
+   const struct nvk_shader *shader = cmd->state.cs.shader;

   return shader->info.cs.local_size[0] *
          shader->info.cs.local_size[1] *
@ -196,7 +322,7 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd,
   struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
   struct nvk_physical_device *pdev = nvk_device_physical(dev);
   const uint32_t min_cbuf_alignment = nvk_min_cbuf_alignment(&pdev->info);
-   const struct nvk_compute_pipeline *pipeline = cmd->state.cs.pipeline;
+   const struct nvk_shader *shader = cmd->state.cs.shader;
   struct nvk_descriptor_state *desc = &cmd->state.cs.descriptors;
   VkResult result;

@ -224,7 +350,7 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd,

   uint32_t qmd[128];
   memset(qmd, 0, sizeof(qmd));
-   memcpy(qmd, pipeline->qmd_template, sizeof(pipeline->qmd_template));
+   nvk_qmd_init(pdev, qmd, shader);

   if (nvk_cmd_buffer_compute_cls(cmd) >= AMPERE_COMPUTE_A) {
      nvc6c0_qmd_set_dispatch_size(nvk_cmd_buffer_device(cmd), qmd,
@ -244,8 +370,6 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd,
                                   desc->root.cs.group_count[2]);
   }

-   const struct nvk_shader *shader =
-      pipeline->base.shaders[MESA_SHADER_COMPUTE];
   for (uint32_t c = 0; c < shader->cbuf_map.cbuf_count; c++) {
      const struct nvk_cbuf *cbuf = &shader->cbuf_map.cbufs[c];

--- a/src/nouveau/vulkan/nvk_cmd_draw.c
+++ b/src/nouveau/vulkan/nvk_cmd_draw.c
@ -11,7 +11,7 @@
 #include "nvk_image_view.h"
 #include "nvk_mme.h"
 #include "nvk_physical_device.h"
-#include "nvk_pipeline.h"
+#include "nvk_shader.h"

 #include "nil_format.h"
 #include "util/bitpack_helpers.h"
@ -370,13 +370,6 @@ nvk_push_draw_state_init(struct nvk_device *dev, struct nv_push *p)
      P_NV9097_SET_PROGRAM_REGION_B(p, shader_base_addr);
   }

-   for (uint32_t i = 0; i < 6; i++) {
-      P_IMMD(p, NV9097, SET_PIPELINE_SHADER(i), {
-         .enable  = ENABLE_FALSE,
-         .type    = i,
-      });
-   }
-
   for (uint32_t group = 0; group < 5; group++) {
      for (uint32_t slot = 0; slot < 16; slot++) {
         P_IMMD(p, NV9097, BIND_GROUP_CONSTANT_BUFFER(group), {
@ -495,6 +488,8 @@ nvk_cmd_buffer_begin_graphics(struct nvk_cmd_buffer *cmd,
         nvk_cmd_buffer_dirty_render_pass(cmd);
      }
   }
+
+   cmd->state.gfx.shaders_dirty = ~0;
 }

 void
@ -514,6 +509,8 @@ nvk_cmd_invalidate_graphics_state(struct nvk_cmd_buffer *cmd)
   struct nvk_rendering_state render_save = cmd->state.gfx.render;
   memset(&cmd->state.gfx, 0, sizeof(cmd->state.gfx));
   cmd->state.gfx.render = render_save;
+
+   cmd->state.gfx.shaders_dirty = ~0;
 }

 static void
@ -951,23 +948,223 @@ nvk_CmdEndRendering(VkCommandBuffer commandBuffer)
 }

 void
-nvk_cmd_bind_graphics_pipeline(struct nvk_cmd_buffer *cmd,
-                               struct nvk_graphics_pipeline *pipeline)
+nvk_cmd_bind_graphics_shader(struct nvk_cmd_buffer *cmd,
+                             const gl_shader_stage stage,
+                             struct nvk_shader *shader)
 {
-   cmd->state.gfx.pipeline = pipeline;
-   vk_cmd_set_dynamic_graphics_state(&cmd->vk, &pipeline->dynamic);
+   struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state;
+
+   assert(stage < ARRAY_SIZE(cmd->state.gfx.shaders));
+   if (cmd->state.gfx.shaders[stage] == shader)
+      return;
+
+   cmd->state.gfx.shaders[stage] = shader;
+   cmd->state.gfx.shaders_dirty |= BITFIELD_BIT(stage);

   /* When a pipeline with tess shaders is bound we need to re-upload the
    * tessellation parameters at flush_ts_state, as the domain origin can be
    * dynamic.
    */
-   if (nvk_shader_is_enabled(pipeline->base.shaders[MESA_SHADER_TESS_EVAL])) {
-      BITSET_SET(cmd->vk.dynamic_graphics_state.dirty,
-                 MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN);
+   if (stage == MESA_SHADER_TESS_EVAL)
+      BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN);
+
+   /* Emitting SET_HYBRID_ANTI_ALIAS_CONTROL requires the fragment shader */
+   if (stage == MESA_SHADER_FRAGMENT)
+      BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES);
+}
+
+static uint32_t
+mesa_to_nv9097_shader_type(gl_shader_stage stage)
+{
+   static const uint32_t mesa_to_nv9097[] = {
+      [MESA_SHADER_VERTEX]    = NV9097_SET_PIPELINE_SHADER_TYPE_VERTEX,
+      [MESA_SHADER_TESS_CTRL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION_INIT,
+      [MESA_SHADER_TESS_EVAL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION,
+      [MESA_SHADER_GEOMETRY]  = NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY,
+      [MESA_SHADER_FRAGMENT]  = NV9097_SET_PIPELINE_SHADER_TYPE_PIXEL,
+   };
+   assert(stage < ARRAY_SIZE(mesa_to_nv9097));
+   return mesa_to_nv9097[stage];
+}
+
+static uint32_t
+nvk_pipeline_bind_group(gl_shader_stage stage)
+{
+   return stage;
+}
+
+static void
+nvk_flush_shaders(struct nvk_cmd_buffer *cmd)
+{
+   if (cmd->state.gfx.shaders_dirty == 0)
+      return;
+
+   /* Map shader types to shaders */
+   struct nvk_shader *type_shader[6] = { NULL, };
+   uint32_t types_dirty = 0;
+
+   const uint32_t gfx_stages = BITFIELD_BIT(MESA_SHADER_VERTEX) |
+                               BITFIELD_BIT(MESA_SHADER_TESS_CTRL) |
+                               BITFIELD_BIT(MESA_SHADER_TESS_EVAL) |
+                               BITFIELD_BIT(MESA_SHADER_GEOMETRY) |
+                               BITFIELD_BIT(MESA_SHADER_FRAGMENT);
+
+   u_foreach_bit(stage, cmd->state.gfx.shaders_dirty & gfx_stages) {
+      uint32_t type = mesa_to_nv9097_shader_type(stage);
+      types_dirty |= BITFIELD_BIT(type);
+
+      /* Only copy non-NULL shaders because mesh/task alias with vertex and
+       * tessellation stages.
+       */
+      if (cmd->state.gfx.shaders[stage] != NULL) {
+         assert(type < ARRAY_SIZE(type_shader));
+         assert(type_shader[type] == NULL);
+         type_shader[type] = cmd->state.gfx.shaders[stage];
+      }
   }

-   struct nv_push *p = nvk_cmd_buffer_push(cmd, pipeline->push_dw_count);
-   nv_push_raw(p, pipeline->push_data, pipeline->push_dw_count);
+   u_foreach_bit(type, types_dirty) {
+      struct nvk_shader *shader = type_shader[type];
+
+      /* We always map index == type */
+      const uint32_t idx = type;
+
+      struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
+      P_IMMD(p, NV9097, SET_PIPELINE_SHADER(idx), {
+         .enable  = shader != NULL,
+         .type    = type,
+      });
+
+      if (shader == NULL)
+         continue;
+
+      uint64_t addr = shader->hdr_addr;
+      if (nvk_cmd_buffer_3d_cls(cmd) >= VOLTA_A) {
+         P_MTHD(p, NVC397, SET_PIPELINE_PROGRAM_ADDRESS_A(idx));
+         P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_A(p, idx, addr >> 32);
+         P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_B(p, idx, addr);
+      } else {
+         assert(addr < 0xffffffff);
+         P_IMMD(p, NV9097, SET_PIPELINE_PROGRAM(idx), addr);
+      }
+
+      P_MTHD(p, NVC397, SET_PIPELINE_REGISTER_COUNT(idx));
+      P_NVC397_SET_PIPELINE_REGISTER_COUNT(p, idx, shader->info.num_gprs);
+      P_NVC397_SET_PIPELINE_BINDING(p, idx,
+         nvk_pipeline_bind_group(shader->info.stage));
+
+      if (shader->info.stage == MESA_SHADER_FRAGMENT) {
+         p = nvk_cmd_buffer_push(cmd, 9);
+
+         P_MTHD(p, NVC397, SET_SUBTILING_PERF_KNOB_A);
+         P_NV9097_SET_SUBTILING_PERF_KNOB_A(p, {
+            .fraction_of_spm_register_file_per_subtile         = 0x10,
+            .fraction_of_spm_pixel_output_buffer_per_subtile   = 0x40,
+            .fraction_of_spm_triangle_ram_per_subtile          = 0x16,
+            .fraction_of_max_quads_per_subtile                 = 0x20,
+         });
+         P_NV9097_SET_SUBTILING_PERF_KNOB_B(p, 0x20);
+
+         P_IMMD(p, NV9097, SET_API_MANDATED_EARLY_Z,
+                shader->info.fs.early_fragment_tests);
+
+         if (nvk_cmd_buffer_3d_cls(cmd) >= MAXWELL_B) {
+            P_IMMD(p, NVB197, SET_POST_Z_PS_IMASK,
+                   shader->info.fs.post_depth_coverage);
+         } else {
+            assert(!shader->info.fs.post_depth_coverage);
+         }
+
+         P_IMMD(p, NV9097, SET_ZCULL_BOUNDS, {
+            .z_min_unbounded_enable = shader->info.fs.writes_depth,
+            .z_max_unbounded_enable = shader->info.fs.writes_depth,
+         });
+      }
+   }
+
+   const uint32_t vtg_stages = BITFIELD_BIT(MESA_SHADER_VERTEX) |
+                               BITFIELD_BIT(MESA_SHADER_TESS_EVAL) |
+                               BITFIELD_BIT(MESA_SHADER_GEOMETRY);
+   const uint32_t vtgm_stages = vtg_stages | BITFIELD_BIT(MESA_SHADER_MESH);
+
+   if (cmd->state.gfx.shaders_dirty & vtg_stages) {
+      struct nak_xfb_info *xfb = NULL;
+      u_foreach_bit(stage, vtg_stages) {
+         if (cmd->state.gfx.shaders[stage] != NULL)
+            xfb = &cmd->state.gfx.shaders[stage]->info.vtg.xfb;
+      }
+
+      if (xfb == NULL) {
+         struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
+         for (uint8_t b = 0; b < 4; b++)
+            P_IMMD(p, NV9097, SET_STREAM_OUT_CONTROL_COMPONENT_COUNT(b), 0);
+      } else {
+         for (uint8_t b = 0; b < ARRAY_SIZE(xfb->attr_count); b++) {
+            const uint8_t attr_count = xfb->attr_count[b];
+            /* upload packed varying indices in multiples of 4 bytes */
+            const uint32_t n = DIV_ROUND_UP(attr_count, 4);
+
+            struct nv_push *p = nvk_cmd_buffer_push(cmd, 5 + n);
+
+            P_MTHD(p, NV9097, SET_STREAM_OUT_CONTROL_STREAM(b));
+            P_NV9097_SET_STREAM_OUT_CONTROL_STREAM(p, b, xfb->stream[b]);
+            P_NV9097_SET_STREAM_OUT_CONTROL_COMPONENT_COUNT(p, b, attr_count);
+            P_NV9097_SET_STREAM_OUT_CONTROL_STRIDE(p, b, xfb->stride[b]);
+
+            if (n > 0) {
+               P_MTHD(p, NV9097, SET_STREAM_OUT_LAYOUT_SELECT(b, 0));
+               P_INLINE_ARRAY(p, (const uint32_t*)xfb->attr_index[b], n);
+            }
+         }
+      }
+   }
+
+   if (cmd->state.gfx.shaders_dirty & vtgm_stages) {
+      struct nvk_shader *last_vtgm = NULL;
+      u_foreach_bit(stage, vtgm_stages) {
+         if (cmd->state.gfx.shaders[stage] != NULL)
+            last_vtgm = cmd->state.gfx.shaders[stage];
+      }
+
+      struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
+
+      P_IMMD(p, NV9097, SET_RT_LAYER, {
+         .v       = 0,
+         .control = last_vtgm->info.vtg.writes_layer ?
+                    CONTROL_GEOMETRY_SHADER_SELECTS_LAYER :
+                    CONTROL_V_SELECTS_LAYER,
+      });
+
+      P_IMMD(p, NV9097, SET_ATTRIBUTE_POINT_SIZE, {
+         .enable  = last_vtgm->info.vtg.writes_point_size,
+         .slot    = 0,
+      });
+
+      const uint8_t clip_enable = last_vtgm->info.vtg.clip_enable;
+      const uint8_t cull_enable = last_vtgm->info.vtg.cull_enable;
+      P_IMMD(p, NV9097, SET_USER_CLIP_ENABLE, {
+         .plane0 = ((clip_enable | cull_enable) >> 0) & 1,
+         .plane1 = ((clip_enable | cull_enable) >> 1) & 1,
+         .plane2 = ((clip_enable | cull_enable) >> 2) & 1,
+         .plane3 = ((clip_enable | cull_enable) >> 3) & 1,
+         .plane4 = ((clip_enable | cull_enable) >> 4) & 1,
+         .plane5 = ((clip_enable | cull_enable) >> 5) & 1,
+         .plane6 = ((clip_enable | cull_enable) >> 6) & 1,
+         .plane7 = ((clip_enable | cull_enable) >> 7) & 1,
+      });
+      P_IMMD(p, NV9097, SET_USER_CLIP_OP, {
+         .plane0 = (cull_enable >> 0) & 1,
+         .plane1 = (cull_enable >> 1) & 1,
+         .plane2 = (cull_enable >> 2) & 1,
+         .plane3 = (cull_enable >> 3) & 1,
+         .plane4 = (cull_enable >> 4) & 1,
+         .plane5 = (cull_enable >> 5) & 1,
+         .plane6 = (cull_enable >> 6) & 1,
+         .plane7 = (cull_enable >> 7) & 1,
+      });
+   }
+
+   cmd->state.gfx.shaders_dirty = 0;
 }

 static void
@ -1045,11 +1242,10 @@ nvk_flush_ts_state(struct nvk_cmd_buffer *cmd)
   }

   if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) {
-      const struct nvk_graphics_pipeline *pipeline= cmd->state.gfx.pipeline;
      const struct nvk_shader *shader =
-         pipeline->base.shaders[MESA_SHADER_TESS_EVAL];
+         cmd->state.gfx.shaders[MESA_SHADER_TESS_EVAL];

-      if (nvk_shader_is_enabled(shader)) {
+      if (shader != NULL) {
         enum nak_ts_prims prims = shader->info.ts.prims;
         /* When the origin is lower-left, we have to flip the winding order */
         if (dyn->ts.domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) {
@ -1433,9 +1629,10 @@ nvk_flush_ms_state(struct nvk_cmd_buffer *cmd)
                dyn->ms.rasterization_samples == render->samples);
      }

-      const struct nvk_graphics_pipeline *pipeline = cmd->state.gfx.pipeline;
+      struct nvk_shader *fs = cmd->state.gfx.shaders[MESA_SHADER_FRAGMENT];
+      const float min_sample_shading = fs != NULL ? fs->min_sample_shading : 0;
      uint32_t min_samples = ceilf(dyn->ms.rasterization_samples *
-                                   pipeline->min_sample_shading);
+                                   min_sample_shading);
      min_samples = util_next_power_of_two(MAX2(1, min_samples));

      P_IMMD(p, NV9097, SET_HYBRID_ANTI_ALIAS_CONTROL, {
@ -1923,7 +2120,6 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
   struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
   struct nvk_physical_device *pdev = nvk_device_physical(dev);
   const uint32_t min_cbuf_alignment = nvk_min_cbuf_alignment(&pdev->info);
-   const struct nvk_graphics_pipeline *pipeline = cmd->state.gfx.pipeline;
   struct nvk_descriptor_state *desc = &cmd->state.gfx.descriptors;
   VkResult result;

@ -1952,8 +2148,8 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
   /* Find cbuf maps for the 5 cbuf groups */
   const struct nvk_shader *cbuf_shaders[5] = { NULL, };
   for (gl_shader_stage stage = 0; stage < MESA_SHADER_STAGES; stage++) {
-      const struct nvk_shader *shader = pipeline->base.shaders[stage];
-      if (!shader || shader->code_size == 0)
+      const struct nvk_shader *shader = cmd->state.gfx.shaders[stage];
+      if (shader == NULL)
         continue;

      uint32_t group = nvk_cbuf_binding_for_stage(stage);
@ -2053,6 +2249,7 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
 static void
 nvk_flush_gfx_state(struct nvk_cmd_buffer *cmd)
 {
+   nvk_flush_shaders(cmd);
   nvk_flush_dynamic_state(cmd);
   nvk_flush_descriptors(cmd);
 }
--- a/src/nouveau/vulkan/nvk_cmd_meta.c
+++ b/src/nouveau/vulkan/nvk_cmd_meta.c
@ -60,7 +60,7 @@ struct nvk_meta_save {
   struct vk_vertex_input_state _dynamic_vi;
   struct vk_sample_locations_state _dynamic_sl;
   struct vk_dynamic_graphics_state dynamic;
-   struct nvk_graphics_pipeline *pipeline;
+   struct nvk_shader *shaders[MESA_SHADER_MESH + 1];
   struct nvk_addr_range vb0;
   struct nvk_descriptor_set *desc0;
   bool has_push_desc0;
@ -76,7 +76,9 @@ nvk_meta_begin(struct nvk_cmd_buffer *cmd,
   save->_dynamic_vi = cmd->state.gfx._dynamic_vi;
   save->_dynamic_sl = cmd->state.gfx._dynamic_sl;

-   save->pipeline = cmd->state.gfx.pipeline;
+   STATIC_ASSERT(sizeof(cmd->state.gfx.shaders) == sizeof(save->shaders));
+   memcpy(save->shaders, cmd->state.gfx.shaders, sizeof(save->shaders));
+
   save->vb0 = cmd->state.gfx.vb0;

   save->desc0 = cmd->state.gfx.descriptors.sets[0];
@ -148,8 +150,12 @@ nvk_meta_end(struct nvk_cmd_buffer *cmd,
          cmd->vk.dynamic_graphics_state.set,
          sizeof(cmd->vk.dynamic_graphics_state.set));

-   if (save->pipeline)
-      nvk_cmd_bind_graphics_pipeline(cmd, save->pipeline);
+   for (uint32_t stage = 0; stage < ARRAY_SIZE(save->shaders); stage++) {
+      if (stage == MESA_SHADER_COMPUTE)
+         continue;
+
+      nvk_cmd_bind_graphics_shader(cmd, stage, save->shaders[stage]);
+   }

   nvk_cmd_bind_vertex_buffer(cmd, 0, save->vb0);

--- a/src/nouveau/vulkan/nvk_device.c
+++ b/src/nouveau/vulkan/nvk_device.c
@ -8,6 +8,7 @@
 #include "nvk_entrypoints.h"
 #include "nvk_instance.h"
 #include "nvk_physical_device.h"
+#include "nvk_shader.h"

 #include "vk_pipeline_cache.h"
 #include "vulkan/wsi/wsi_common.h"
@ -146,6 +147,8 @@ nvk_CreateDevice(VkPhysicalDevice physicalDevice,
   if (result != VK_SUCCESS)
      goto fail_alloc;

+   dev->vk.shader_ops = &nvk_device_shader_ops;
+
   drmDevicePtr drm_device = NULL;
   int ret = drmGetDeviceFromDevId(pdev->render_dev, 0, &drm_device);
   if (ret != 0) {
--- a/src/nouveau/vulkan/nvk_graphics_pipeline.c
+++ b/src/nouveau/vulkan/nvk_graphics_pipeline.c
@ -1,417 +0,0 @@
-/*
- * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
- * SPDX-License-Identifier: MIT
- */
-#include "nvk_pipeline.h"
-
-#include "nvk_device.h"
-#include "nvk_mme.h"
-#include "nvk_physical_device.h"
-#include "nvk_shader.h"
-
-#include "vk_nir.h"
-#include "vk_pipeline.h"
-#include "vk_pipeline_layout.h"
-
-#include "nv_push.h"
-
-#include "nouveau_context.h"
-
-#include "compiler/spirv/nir_spirv.h"
-
-#include "nvk_cl9097.h"
-#include "nvk_clb197.h"
-#include "nvk_clc397.h"
-
-static void
-emit_pipeline_xfb_state(struct nv_push *p, const struct nak_xfb_info *xfb)
-{
-   for (uint8_t b = 0; b < ARRAY_SIZE(xfb->attr_count); b++) {
-      const uint8_t attr_count = xfb->attr_count[b];
-      P_MTHD(p, NV9097, SET_STREAM_OUT_CONTROL_STREAM(b));
-      P_NV9097_SET_STREAM_OUT_CONTROL_STREAM(p, b, xfb->stream[b]);
-      P_NV9097_SET_STREAM_OUT_CONTROL_COMPONENT_COUNT(p, b, attr_count);
-      P_NV9097_SET_STREAM_OUT_CONTROL_STRIDE(p, b, xfb->stride[b]);
-
-      /* upload packed varying indices in multiples of 4 bytes */
-      const uint32_t n = DIV_ROUND_UP(attr_count, 4);
-      if (n > 0) {
-         P_MTHD(p, NV9097, SET_STREAM_OUT_LAYOUT_SELECT(b, 0));
-         P_INLINE_ARRAY(p, (const uint32_t*)xfb->attr_index[b], n);
-      }
-   }
-}
-
-static const uint32_t mesa_to_nv9097_shader_type[] = {
-   [MESA_SHADER_VERTEX]    = NV9097_SET_PIPELINE_SHADER_TYPE_VERTEX,
-   [MESA_SHADER_TESS_CTRL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION_INIT,
-   [MESA_SHADER_TESS_EVAL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION,
-   [MESA_SHADER_GEOMETRY]  = NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY,
-   [MESA_SHADER_FRAGMENT]  = NV9097_SET_PIPELINE_SHADER_TYPE_PIXEL,
-};
-
-static void
-merge_tess_info(struct shader_info *tes_info, struct shader_info *tcs_info)
-{
-   /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
-    *
-    *    "PointMode. Controls generation of points rather than triangles
-    *     or lines. This functionality defaults to disabled, and is
-    *     enabled if either shader stage includes the execution mode.
-    *
-    * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
-    * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
-    * and OutputVertices, it says:
-    *
-    *    "One mode must be set in at least one of the tessellation
-    *     shader stages."
-    *
-    * So, the fields can be set in either the TCS or TES, but they must
-    * agree if set in both.  Our backend looks at TES, so bitwise-or in
-    * the values from the TCS.
-    */
-   assert(tcs_info->tess.tcs_vertices_out == 0 || tes_info->tess.tcs_vertices_out == 0 ||
-          tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
-   tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
-
-   assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
-          tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
-          tcs_info->tess.spacing == tes_info->tess.spacing);
-   tes_info->tess.spacing |= tcs_info->tess.spacing;
-
-   assert(tcs_info->tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED ||
-          tes_info->tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED ||
-          tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
-   tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
-   tes_info->tess.ccw |= tcs_info->tess.ccw;
-   tes_info->tess.point_mode |= tcs_info->tess.point_mode;
-
-   /* Copy the merged info back to the TCS */
-   tcs_info->tess.tcs_vertices_out = tes_info->tess.tcs_vertices_out;
-   tcs_info->tess.spacing = tes_info->tess.spacing;
-   tcs_info->tess._primitive_mode = tes_info->tess._primitive_mode;
-   tcs_info->tess.ccw = tes_info->tess.ccw;
-   tcs_info->tess.point_mode = tes_info->tess.point_mode;
-}
-
-VkResult
-nvk_graphics_pipeline_create(struct nvk_device *dev,
-                             struct vk_pipeline_cache *cache,
-                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
-                             const VkAllocationCallbacks *pAllocator,
-                             VkPipeline *pPipeline)
-{
-   VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout);
-   struct nvk_graphics_pipeline *pipeline;
-   VkResult result = VK_SUCCESS;
-
-   pipeline = (void *)nvk_pipeline_zalloc(dev, NVK_PIPELINE_GRAPHICS,
-                                          sizeof(*pipeline), pAllocator);
-   if (pipeline == NULL)
-      return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
-
-   VkPipelineCreateFlags2KHR pipeline_flags =
-      vk_graphics_pipeline_create_flags(pCreateInfo);
-
-   if (pipeline_flags &
-       VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)
-      cache = NULL;
-
-   struct vk_graphics_pipeline_all_state all;
-   struct vk_graphics_pipeline_state state = {};
-   result = vk_graphics_pipeline_state_fill(&dev->vk, &state, pCreateInfo,
-                                            NULL, 0, &all, NULL, 0, NULL);
-   assert(result == VK_SUCCESS);
-
-   VkPipelineCreationFeedbackEXT pipeline_feedback = {
-      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
-   };
-   VkPipelineCreationFeedbackEXT stage_feedbacks[MESA_SHADER_STAGES] = { 0 };
-
-   int64_t pipeline_start = os_time_get_nano();
-
-   const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
-         vk_find_struct_const(pCreateInfo->pNext,
-                              PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
-
-   const VkPipelineShaderStageCreateInfo *infos[MESA_SHADER_STAGES] = {};
-   nir_shader *nir[MESA_SHADER_STAGES] = {};
-   struct vk_pipeline_robustness_state robustness[MESA_SHADER_STAGES];
-
-   struct vk_pipeline_cache_object *cache_objs[MESA_SHADER_STAGES] = {};
-
-   struct nak_fs_key fs_key_tmp, *fs_key = NULL;
-   nvk_populate_fs_key(&fs_key_tmp, &state);
-   fs_key = &fs_key_tmp;
-
-   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
-      const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
-      gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
-      infos[stage] = sinfo;
-   }
-
-   for (gl_shader_stage stage = 0; stage < MESA_SHADER_STAGES; stage++) {
-      const VkPipelineShaderStageCreateInfo *sinfo = infos[stage];
-      if (sinfo == NULL)
-         continue;
-
-      vk_pipeline_robustness_state_fill(&dev->vk, &robustness[stage],
-                                        pCreateInfo->pNext, sinfo->pNext);
-   }
-
-   for (gl_shader_stage stage = 0; stage < MESA_SHADER_STAGES; stage++) {
-      const VkPipelineShaderStageCreateInfo *sinfo = infos[stage];
-      if (sinfo == NULL)
-         continue;
-
-      unsigned char sha1[SHA1_DIGEST_LENGTH];
-      nvk_hash_shader(sha1, sinfo, &robustness[stage],
-                      state.rp->view_mask != 0, pipeline_layout,
-                      stage == MESA_SHADER_FRAGMENT ? fs_key : NULL);
-
-      if (cache) {
-         bool cache_hit = false;
-         cache_objs[stage] = vk_pipeline_cache_lookup_object(cache, &sha1, sizeof(sha1),
-                                                             &nvk_shader_ops, &cache_hit);
-         pipeline->base.shaders[stage] =
-            container_of(cache_objs[stage], struct nvk_shader, base);
-
-         if (cache_hit && cache != dev->mem_cache)
-            pipeline_feedback.flags |=
-               VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
-      }
-
-      if (!cache_objs[stage] &&
-          pCreateInfo->flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR) {
-         result = VK_PIPELINE_COMPILE_REQUIRED;
-         goto fail;
-      }
-   }
-
-   for (gl_shader_stage stage = 0; stage < MESA_SHADER_STAGES; stage++) {
-      const VkPipelineShaderStageCreateInfo *sinfo = infos[stage];
-      if (sinfo == NULL || cache_objs[stage])
-         continue;
-
-      result = nvk_shader_stage_to_nir(dev, sinfo, &robustness[stage],
-                                       cache, NULL, &nir[stage]);
-      if (result != VK_SUCCESS)
-         goto fail;
-   }
-
-   if (nir[MESA_SHADER_TESS_CTRL] && nir[MESA_SHADER_TESS_EVAL]) {
-      merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info, &nir[MESA_SHADER_TESS_CTRL]->info);
-   }
-
-   for (gl_shader_stage stage = 0; stage < MESA_SHADER_STAGES; stage++) {
-      const VkPipelineShaderStageCreateInfo *sinfo = infos[stage];
-      if (sinfo == NULL)
-         continue;
-
-      if (!cache_objs[stage]) {
-         int64_t stage_start = os_time_get_nano();
-
-         unsigned char sha1[SHA1_DIGEST_LENGTH];
-         nvk_hash_shader(sha1, sinfo, &robustness[stage],
-                         state.rp->view_mask != 0, pipeline_layout,
-                         stage == MESA_SHADER_FRAGMENT ? fs_key : NULL);
-
-         struct nvk_shader *shader = nvk_shader_init(dev, sha1, SHA1_DIGEST_LENGTH);
-         if(shader == NULL) {
-            result = vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
-            goto fail;
-         }
-
-         nvk_lower_nir(dev, nir[stage], &robustness[stage],
-                       state.rp->view_mask != 0,
-                       pipeline_layout->set_count,
-                       pipeline_layout->set_layouts,
-                       &shader->cbuf_map);
-
-         result = nvk_compile_nir(dev, nir[stage],
-                                  pipeline_flags, &robustness[stage],
-                                  stage == MESA_SHADER_FRAGMENT ? fs_key : NULL,
-                                  cache, shader);
-
-         if (result == VK_SUCCESS) {
-            cache_objs[stage] = &shader->base;
-
-            if (cache)
-               cache_objs[stage] = vk_pipeline_cache_add_object(cache,
-                                                                cache_objs[stage]);
-
-            stage_feedbacks[stage].flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
-            pipeline->base.shaders[stage] =
-               container_of(cache_objs[stage], struct nvk_shader, base);
-         }
-
-         stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
-         ralloc_free(nir[stage]);
-      }
-
-      if (result != VK_SUCCESS)
-         goto fail;
-
-      result = nvk_shader_upload(dev, pipeline->base.shaders[stage]);
-      if (result != VK_SUCCESS)
-         goto fail;
-   }
-
-   struct nv_push push;
-   nv_push_init(&push, pipeline->push_data, ARRAY_SIZE(pipeline->push_data));
-   struct nv_push *p = &push;
-
-   bool force_max_samples = false;
-
-   struct nvk_shader *last_geom = NULL;
-   for (gl_shader_stage stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
-      struct nvk_shader *shader = pipeline->base.shaders[stage];
-      uint32_t idx = mesa_to_nv9097_shader_type[stage];
-
-      P_IMMD(p, NV9097, SET_PIPELINE_SHADER(idx), {
-         .enable  = nvk_shader_is_enabled(shader),
-         .type    = mesa_to_nv9097_shader_type[stage],
-      });
-
-      if (!nvk_shader_is_enabled(shader))
-         continue;
-
-      if (stage != MESA_SHADER_FRAGMENT)
-         last_geom = shader;
-
-      uint64_t addr = shader->hdr_addr;
-      if (dev->pdev->info.cls_eng3d >= VOLTA_A) {
-         P_MTHD(p, NVC397, SET_PIPELINE_PROGRAM_ADDRESS_A(idx));
-         P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_A(p, idx, addr >> 32);
-         P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_B(p, idx, addr);
-      } else {
-         assert(addr < 0xffffffff);
-         P_IMMD(p, NV9097, SET_PIPELINE_PROGRAM(idx), addr);
-      }
-
-      P_MTHD(p, NVC397, SET_PIPELINE_REGISTER_COUNT(idx));
-      P_NVC397_SET_PIPELINE_REGISTER_COUNT(p, idx, shader->info.num_gprs);
-      P_NVC397_SET_PIPELINE_BINDING(p, idx, nvk_cbuf_binding_for_stage(stage));
-
-      switch (stage) {
-      case MESA_SHADER_VERTEX:
-      case MESA_SHADER_GEOMETRY:
-      case MESA_SHADER_TESS_CTRL:
-      case MESA_SHADER_TESS_EVAL:
-         break;
-
-      case MESA_SHADER_FRAGMENT:
-         P_IMMD(p, NV9097, SET_SUBTILING_PERF_KNOB_A, {
-            .fraction_of_spm_register_file_per_subtile         = 0x10,
-            .fraction_of_spm_pixel_output_buffer_per_subtile   = 0x40,
-            .fraction_of_spm_triangle_ram_per_subtile          = 0x16,
-            .fraction_of_max_quads_per_subtile                 = 0x20,
-         });
-         P_NV9097_SET_SUBTILING_PERF_KNOB_B(p, 0x20);
-
-         P_IMMD(p, NV9097, SET_API_MANDATED_EARLY_Z,
-                shader->info.fs.early_fragment_tests);
-
-         if (dev->pdev->info.cls_eng3d >= MAXWELL_B) {
-            P_IMMD(p, NVB197, SET_POST_Z_PS_IMASK,
-                   shader->info.fs.post_depth_coverage);
-         } else {
-            assert(!shader->info.fs.post_depth_coverage);
-         }
-
-         P_IMMD(p, NV9097, SET_ZCULL_BOUNDS, {
-            .z_min_unbounded_enable = shader->info.fs.writes_depth,
-            .z_max_unbounded_enable = shader->info.fs.writes_depth,
-         });
-
-         /* If we're using the incoming sample mask and doing sample shading,
-          * we have to do sample shading "to the max", otherwise there's no
-          * way to tell which sets of samples are covered by the current
-          * invocation.
-          */
-         force_max_samples = shader->info.fs.reads_sample_mask ||
-                             shader->info.fs.uses_sample_shading;
-         break;
-
-      default:
-         unreachable("Unsupported shader stage");
-      }
-   }
-
-   const uint8_t clip_cull = last_geom->info.vtg.clip_enable |
-                             last_geom->info.vtg.cull_enable;
-   if (clip_cull) {
-      P_IMMD(p, NV9097, SET_USER_CLIP_ENABLE, {
-         .plane0 = (clip_cull >> 0) & 1,
-         .plane1 = (clip_cull >> 1) & 1,
-         .plane2 = (clip_cull >> 2) & 1,
-         .plane3 = (clip_cull >> 3) & 1,
-         .plane4 = (clip_cull >> 4) & 1,
-         .plane5 = (clip_cull >> 5) & 1,
-         .plane6 = (clip_cull >> 6) & 1,
-         .plane7 = (clip_cull >> 7) & 1,
-      });
-      P_IMMD(p, NV9097, SET_USER_CLIP_OP, {
-         .plane0 = (last_geom->info.vtg.cull_enable >> 0) & 1,
-         .plane1 = (last_geom->info.vtg.cull_enable >> 1) & 1,
-         .plane2 = (last_geom->info.vtg.cull_enable >> 2) & 1,
-         .plane3 = (last_geom->info.vtg.cull_enable >> 3) & 1,
-         .plane4 = (last_geom->info.vtg.cull_enable >> 4) & 1,
-         .plane5 = (last_geom->info.vtg.cull_enable >> 5) & 1,
-         .plane6 = (last_geom->info.vtg.cull_enable >> 6) & 1,
-         .plane7 = (last_geom->info.vtg.cull_enable >> 7) & 1,
-      });
-   }
-
-   /* TODO: prog_selects_layer */
-   P_IMMD(p, NV9097, SET_RT_LAYER, {
-      .v       = 0,
-      .control = last_geom->info.vtg.writes_layer ?
-                 CONTROL_GEOMETRY_SHADER_SELECTS_LAYER :
-                 CONTROL_V_SELECTS_LAYER,
-   });
-
-   P_IMMD(p, NV9097, SET_ATTRIBUTE_POINT_SIZE, {
-      .enable  = last_geom->info.vtg.writes_point_size,
-      .slot    = 0,
-   });
-
-   emit_pipeline_xfb_state(&push, &last_geom->info.vtg.xfb);
-
-   pipeline->push_dw_count = nv_push_dw_count(&push);
-
-   if (force_max_samples)
-      pipeline->min_sample_shading = 1;
-   else if (state.ms != NULL && state.ms->sample_shading_enable)
-      pipeline->min_sample_shading = CLAMP(state.ms->min_sample_shading, 0, 1);
-   else
-      pipeline->min_sample_shading = 0;
-
-   pipeline->dynamic.vi = &pipeline->_dynamic_vi;
-   pipeline->dynamic.ms.sample_locations = &pipeline->_dynamic_sl;
-   vk_dynamic_graphics_state_fill(&pipeline->dynamic, &state);
-
-   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
-   if (creation_feedback) {
-      *creation_feedback->pPipelineCreationFeedback = pipeline_feedback;
-
-      int fb_count = creation_feedback->pipelineStageCreationFeedbackCount;
-      if (pCreateInfo->stageCount == fb_count) {
-         for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
-            const VkPipelineShaderStageCreateInfo *sinfo =
-               &pCreateInfo->pStages[i];
-            gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
-            creation_feedback->pPipelineStageCreationFeedbacks[i] =
-               stage_feedbacks[stage];
-         }
-      }
-   }
-
-   *pPipeline = nvk_pipeline_to_handle(&pipeline->base);
-
-   return VK_SUCCESS;
-
-fail:
-   vk_object_free(&dev->vk, pAllocator, pipeline);
-   return result;
-}
--- a/src/nouveau/vulkan/nvk_pipeline.h
+++ b/src/nouveau/vulkan/nvk_pipeline.h
@ -1,74 +0,0 @@
-/*
- * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
- * SPDX-License-Identifier: MIT
- */
-#ifndef NVK_PIPELINE_H
-#define NVK_PIPELINE_H 1
-
-#include "nvk_private.h"
-#include "nvk_shader.h"
-
-#include "vk_graphics_state.h"
-#include "vk_object.h"
-
-struct vk_pipeline_cache;
-
-enum nvk_pipeline_type {
-   NVK_PIPELINE_GRAPHICS,
-   NVK_PIPELINE_COMPUTE,
-};
-
-struct nvk_pipeline {
-   struct vk_object_base base;
-
-   enum nvk_pipeline_type type;
-
-   struct nvk_shader *shaders[MESA_SHADER_STAGES];
-};
-
-VK_DEFINE_NONDISP_HANDLE_CASTS(nvk_pipeline, base, VkPipeline,
-                               VK_OBJECT_TYPE_PIPELINE)
-
-void
-nvk_pipeline_free(struct nvk_device *dev,
-                  struct nvk_pipeline *pipeline,
-                  const VkAllocationCallbacks *pAllocator);
-struct nvk_pipeline *
-nvk_pipeline_zalloc(struct nvk_device *dev,
-                    enum nvk_pipeline_type type, size_t size,
-                    const VkAllocationCallbacks *pAllocator);
-
-struct nvk_compute_pipeline {
-   struct nvk_pipeline base;
-
-   uint32_t qmd_template[64];
-};
-
-VkResult
-nvk_compute_pipeline_create(struct nvk_device *dev,
-                            struct vk_pipeline_cache *cache,
-                            const VkComputePipelineCreateInfo *pCreateInfo,
-                            const VkAllocationCallbacks *pAllocator,
-                            VkPipeline *pPipeline);
-
-struct nvk_graphics_pipeline {
-   struct nvk_pipeline base;
-
-   uint32_t push_data[192];
-   uint32_t push_dw_count;
-
-   float min_sample_shading;
-
-   struct vk_vertex_input_state _dynamic_vi;
-   struct vk_sample_locations_state _dynamic_sl;
-   struct vk_dynamic_graphics_state dynamic;
-};
-
-VkResult
-nvk_graphics_pipeline_create(struct nvk_device *dev,
-                             struct vk_pipeline_cache *cache,
-                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
-                             const VkAllocationCallbacks *pAllocator,
-                             VkPipeline *pPipeline);
-
-#endif
--- a/src/nouveau/vulkan/nvk_query_pool.c
+++ b/src/nouveau/vulkan/nvk_query_pool.c
@ -11,7 +11,6 @@
 #include "nvk_event.h"
 #include "nvk_mme.h"
 #include "nvk_physical_device.h"
-#include "nvk_pipeline.h"

 #include "vk_meta.h"
 #include "vk_pipeline.h"
@ -973,12 +972,13 @@ nvk_meta_copy_query_pool_results(struct nvk_cmd_buffer *cmd,
   }

   /* Save pipeline and push constants */
-   struct nvk_compute_pipeline *pipeline_save = cmd->state.cs.pipeline;
+   struct nvk_shader *shader_save = cmd->state.cs.shader;
   uint8_t push_save[NVK_MAX_PUSH_SIZE];
   memcpy(push_save, desc->root.push, NVK_MAX_PUSH_SIZE);

-   nvk_CmdBindPipeline(nvk_cmd_buffer_to_handle(cmd),
-                       VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
+   dev->vk.dispatch_table.CmdBindPipeline(nvk_cmd_buffer_to_handle(cmd),
+                                          VK_PIPELINE_BIND_POINT_COMPUTE,
+                                          pipeline);

   nvk_CmdPushConstants(nvk_cmd_buffer_to_handle(cmd), layout,
                        VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push), &push);
@ -986,11 +986,8 @@ nvk_meta_copy_query_pool_results(struct nvk_cmd_buffer *cmd,
   nvk_CmdDispatchBase(nvk_cmd_buffer_to_handle(cmd), 0, 0, 0, 1, 1, 1);

   /* Restore pipeline and push constants */
-   if (pipeline_save) {
-      nvk_CmdBindPipeline(nvk_cmd_buffer_to_handle(cmd),
-                          VK_PIPELINE_BIND_POINT_COMPUTE,
-                          nvk_pipeline_to_handle(&pipeline_save->base));
-   }
+   if (shader_save)
+      nvk_cmd_bind_compute_shader(cmd, shader_save);
   memcpy(desc->root.push, push_save, NVK_MAX_PUSH_SIZE);
 }

--- a/src/nouveau/vulkan/nvk_shader.c
+++ b/src/nouveau/vulkan/nvk_shader.c
@ -8,12 +8,11 @@
 #include "nvk_descriptor_set_layout.h"
 #include "nvk_device.h"
 #include "nvk_physical_device.h"
-#include "nvk_pipeline.h"
 #include "nvk_sampler.h"
+#include "nvk_shader.h"

 #include "vk_nir_convert_ycbcr.h"
 #include "vk_pipeline.h"
-#include "vk_pipeline_cache.h"
 #include "vk_pipeline_layout.h"
 #include "vk_shader_module.h"
 #include "vk_ycbcr_conversion.h"
@ -186,7 +185,7 @@ nvk_preprocess_nir(struct vk_physical_device *vk_pdev, nir_shader *nir)
      nvk_cg_preprocess_nir(nir);
 }

-void
+static void
 nvk_populate_fs_key(struct nak_fs_key *key,
                    const struct vk_graphics_pipeline_state *state)
 {
@ -195,6 +194,9 @@ nvk_populate_fs_key(struct nak_fs_key *key,
   key->sample_locations_cb = 0;
   key->sample_locations_offset = nvk_root_descriptor_offset(draw.sample_locations);

+   if (state == NULL)
+      return;
+
   if (state->pipeline_flags &
       VK_PIPELINE_CREATE_2_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT)
      key->zs_self_dep = true;
@ -208,6 +210,25 @@ nvk_populate_fs_key(struct nak_fs_key *key,
      key->force_sample_shading = true;
 }

+static void
+nvk_hash_graphics_state(struct vk_physical_device *device,
+                        const struct vk_graphics_pipeline_state *state,
+                        VkShaderStageFlags stages,
+                        blake3_hash blake3_out)
+{
+   struct mesa_blake3 blake3_ctx;
+   _mesa_blake3_init(&blake3_ctx);
+   if (stages & VK_SHADER_STAGE_FRAGMENT_BIT) {
+      struct nak_fs_key key;
+      nvk_populate_fs_key(&key, state);
+      _mesa_blake3_update(&blake3_ctx, &key, sizeof(key));
+
+      const bool is_multiview = state->rp->view_mask != 0;
+      _mesa_blake3_update(&blake3_ctx, &is_multiview, sizeof(is_multiview));
+   }
+   _mesa_blake3_final(&blake3_ctx, blake3_out);
+}
+
 static bool
 lower_load_global_constant_offset_instr(nir_builder *b,
                                        nir_intrinsic_instr *intrin,
@ -290,52 +311,6 @@ lookup_ycbcr_conversion(const void *_state, uint32_t set,
          &sampler->vk.ycbcr_conversion->state : NULL;
 }

-VkResult
-nvk_shader_stage_to_nir(struct nvk_device *dev,
-                        const VkPipelineShaderStageCreateInfo *sinfo,
-                        const struct vk_pipeline_robustness_state *rstate,
-                        struct vk_pipeline_cache *cache,
-                        void *mem_ctx, struct nir_shader **nir_out)
-{
-   struct nvk_physical_device *pdev = nvk_device_physical(dev);
-   const gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
-   const nir_shader_compiler_options *nir_options =
-      nvk_get_nir_options(&pdev->vk, stage, rstate);
-
-   unsigned char stage_sha1[SHA1_DIGEST_LENGTH];
-   vk_pipeline_hash_shader_stage(sinfo, rstate, stage_sha1);
-
-   if (cache == NULL)
-      cache = dev->mem_cache;
-
-   nir_shader *nir = vk_pipeline_cache_lookup_nir(cache, stage_sha1,
-                                                  sizeof(stage_sha1),
-                                                  nir_options, NULL,
-                                                  mem_ctx);
-   if (nir != NULL) {
-      *nir_out = nir;
-      return VK_SUCCESS;
-   }
-
-   const struct spirv_to_nir_options spirv_options =
-      nvk_get_spirv_options(&pdev->vk, stage, rstate);
-
-   VkResult result = vk_pipeline_shader_stage_to_nir(&dev->vk, sinfo,
-                                                     &spirv_options,
-                                                     nir_options,
-                                                     mem_ctx, &nir);
-   if (result != VK_SUCCESS)
-      return result;
-
-   nvk_preprocess_nir(&dev->pdev->vk, nir);
-
-   vk_pipeline_cache_add_nir(cache, stage_sha1, sizeof(stage_sha1), nir);
-
-   *nir_out = nir;
-
-   return VK_SUCCESS;
-}
-
 static inline bool
 nir_has_image_var(nir_shader *nir)
 {
@ -493,13 +468,13 @@ nvk_shader_dump(struct nvk_shader *shader)
 static VkResult
 nvk_compile_nir_with_nak(struct nvk_physical_device *pdev,
                         nir_shader *nir,
-                         VkPipelineCreateFlagBits2KHR pipeline_flags,
+                         VkShaderCreateFlagsEXT shader_flags,
                         const struct vk_pipeline_robustness_state *rs,
                         const struct nak_fs_key *fs_key,
                         struct nvk_shader *shader)
 {
   const bool dump_asm =
-      pipeline_flags & VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
+      shader_flags & VK_SHADER_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_MESA;

   nir_variable_mode robust2_modes = 0;
   if (rs->uniform_buffers == VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT)
@ -515,38 +490,18 @@ nvk_compile_nir_with_nak(struct nvk_physical_device *pdev,
   return VK_SUCCESS;
 }

-struct nvk_shader *
-nvk_shader_init(struct nvk_device *dev, const void *key_data, size_t key_size)
-{
-   VK_MULTIALLOC(ma);
-   VK_MULTIALLOC_DECL(&ma, struct nvk_shader, shader, 1);
-   VK_MULTIALLOC_DECL_SIZE(&ma, char, obj_key_data, key_size);
-
-   if (!vk_multialloc_zalloc(&ma, &dev->vk.alloc,
-                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
-      return NULL;
-
-   memcpy(obj_key_data, key_data, key_size);
-
-   vk_pipeline_cache_object_init(&dev->vk, &shader->base,
-                                 &nvk_shader_ops, obj_key_data, key_size);
-
-   return shader;
-}
-
-VkResult
+static VkResult
 nvk_compile_nir(struct nvk_device *dev, nir_shader *nir,
-                VkPipelineCreateFlagBits2KHR pipeline_flags,
+                VkShaderCreateFlagsEXT shader_flags,
                const struct vk_pipeline_robustness_state *rs,
                const struct nak_fs_key *fs_key,
-                struct vk_pipeline_cache *cache,
                struct nvk_shader *shader)
 {
   struct nvk_physical_device *pdev = nvk_device_physical(dev);
   VkResult result;

   if (use_nak(pdev, nir->info.stage)) {
-      result = nvk_compile_nir_with_nak(pdev, nir, pipeline_flags, rs,
+      result = nvk_compile_nir_with_nak(pdev, nir, shader_flags, rs,
                                       fs_key, shader);
   } else {
      result = nvk_cg_compile_nir(pdev, nir, fs_key, shader);
@ -555,7 +510,7 @@ nvk_compile_nir(struct nvk_device *dev, nir_shader *nir,
      return result;

   if (nir->constant_data_size > 0) {
-      uint32_t data_align = nvk_min_cbuf_alignment(&dev->pdev->info);
+      uint32_t data_align = nvk_min_cbuf_alignment(&pdev->info);
      uint32_t data_size = align(nir->constant_data_size, data_align);

      void *data = malloc(data_size);
@ -650,11 +605,15 @@ nvk_shader_upload(struct nvk_device *dev, struct nvk_shader *shader)
   return result;
 }

-void
-nvk_shader_finish(struct nvk_device *dev, struct nvk_shader *shader)
+static const struct vk_shader_ops nvk_shader_ops;
+
+static void
+nvk_shader_destroy(struct vk_device *vk_dev,
+                   struct vk_shader *vk_shader,
+                   const VkAllocationCallbacks* pAllocator)
 {
-   if (shader == NULL)
-      return;
+   struct nvk_device *dev = container_of(vk_dev, struct nvk_device, vk);
+   struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);

   if (shader->upload_size > 0) {
      nvk_heap_free(dev, &dev->shader_heap,
@ -671,127 +630,330 @@ nvk_shader_finish(struct nvk_device *dev, struct nvk_shader *shader)

   free((void *)shader->data_ptr);

-   vk_free(&dev->vk.alloc, shader);
+   vk_shader_free(&dev->vk, pAllocator, &shader->vk);
 }

-void
-nvk_hash_shader(unsigned char *hash,
-                const VkPipelineShaderStageCreateInfo *sinfo,
-                const struct vk_pipeline_robustness_state *rs,
-                bool is_multiview,
-                const struct vk_pipeline_layout *layout,
-                const struct nak_fs_key *fs_key)
+static VkResult
+nvk_compile_shader(struct nvk_device *dev,
+                   struct vk_shader_compile_info *info,
+                   const struct vk_graphics_pipeline_state *state,
+                   const VkAllocationCallbacks* pAllocator,
+                   struct vk_shader **shader_out)
 {
-   struct mesa_sha1 ctx;
+   struct nvk_shader *shader;
+   VkResult result;

-   _mesa_sha1_init(&ctx);
+   /* We consume the NIR, regardless of success or failure */
+   nir_shader *nir = info->nir;

-   unsigned char stage_sha1[SHA1_DIGEST_LENGTH];
-   vk_pipeline_hash_shader_stage(sinfo, rs, stage_sha1);
+   shader = vk_shader_zalloc(&dev->vk, &nvk_shader_ops, info->stage,
+                             pAllocator, sizeof(*shader));
+   if (shader == NULL) {
+      ralloc_free(nir);
+      return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }

-   _mesa_sha1_update(&ctx, stage_sha1, sizeof(stage_sha1));
+   /* TODO: Multiview with ESO */
+   const bool is_multiview = state && state->rp->view_mask != 0;

-   _mesa_sha1_update(&ctx, &is_multiview, sizeof(is_multiview));
+   nvk_lower_nir(dev, nir, info->robustness, is_multiview,
+                 info->set_layout_count, info->set_layouts,
+                 &shader->cbuf_map);

-   if (layout) {
-      _mesa_sha1_update(&ctx, &layout->create_flags,
-                        sizeof(layout->create_flags));
-      _mesa_sha1_update(&ctx, &layout->set_count, sizeof(layout->set_count));
-      for (int i = 0; i < layout->set_count; i++) {
-         struct nvk_descriptor_set_layout *set =
-            vk_to_nvk_descriptor_set_layout(layout->set_layouts[i]);
-         _mesa_sha1_update(&ctx, &set->vk.blake3, sizeof(set->vk.blake3));
+   struct nak_fs_key fs_key_tmp, *fs_key = NULL;
+   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+      nvk_populate_fs_key(&fs_key_tmp, state);
+      fs_key = &fs_key_tmp;
+   }
+
+   result = nvk_compile_nir(dev, nir, info->flags, info->robustness,
+                            fs_key, shader);
+   ralloc_free(nir);
+   if (result != VK_SUCCESS) {
+      nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
+      return result;
+   }
+
+   result = nvk_shader_upload(dev, shader);
+   if (result != VK_SUCCESS) {
+      nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
+      return result;
+   }
+
+   if (info->stage == MESA_SHADER_FRAGMENT) {
+      if (shader->info.fs.reads_sample_mask ||
+          shader->info.fs.uses_sample_shading) {
+         shader->min_sample_shading = 1;
+      } else if (state != NULL && state->ms != NULL &&
+                 state->ms->sample_shading_enable) {
+         shader->min_sample_shading =
+            CLAMP(state->ms->min_sample_shading, 0, 1);
+      } else {
+         shader->min_sample_shading = 0;
      }
   }

-   if(fs_key)
-      _mesa_sha1_update(&ctx, fs_key, sizeof(*fs_key));
+   *shader_out = &shader->vk;

-   _mesa_sha1_final(&ctx, hash);
+   return VK_SUCCESS;
 }

-static bool
-nvk_shader_serialize(struct vk_pipeline_cache_object *object,
-                     struct blob *blob);
-
-static struct vk_pipeline_cache_object *
-nvk_shader_deserialize(struct vk_pipeline_cache *cache,
-                       const void *key_data,
-                       size_t key_size,
-                       struct blob_reader *blob);
-
-void
-nvk_shader_destroy(struct vk_device *_dev,
-                   struct vk_pipeline_cache_object *object)
+static VkResult
+nvk_compile_shaders(struct vk_device *vk_dev,
+                    uint32_t shader_count,
+                    struct vk_shader_compile_info *infos,
+                    const struct vk_graphics_pipeline_state *state,
+                    const VkAllocationCallbacks* pAllocator,
+                    struct vk_shader **shaders_out)
 {
-   struct nvk_device *dev =
-      container_of(_dev, struct nvk_device, vk);
-   struct nvk_shader *shader =
-      container_of(object, struct nvk_shader, base);
+   struct nvk_device *dev = container_of(vk_dev, struct nvk_device, vk);

-   nvk_shader_finish(dev, shader);
+   for (uint32_t i = 0; i < shader_count; i++) {
+      VkResult result = nvk_compile_shader(dev, &infos[i], state,
+                                           pAllocator, &shaders_out[i]);
+      if (result != VK_SUCCESS) {
+         /* Clean up all the shaders before this point */
+         for (uint32_t j = 0; j < i; j++)
+            nvk_shader_destroy(&dev->vk, shaders_out[j], pAllocator);
+
+         /* Clean up all the NIR after this point */
+         for (uint32_t j = i + 1; j < shader_count; j++)
+            ralloc_free(infos[j].nir);
+
+         /* Memset the output array */
+         memset(shaders_out, 0, shader_count * sizeof(*shaders_out));
+
+         return result;
+      }
+   }
+
+   return VK_SUCCESS;
 }

-const struct vk_pipeline_cache_object_ops nvk_shader_ops = {
-   .serialize = nvk_shader_serialize,
-   .deserialize = nvk_shader_deserialize,
-   .destroy = nvk_shader_destroy,
-};
+static VkResult
+nvk_deserialize_shader(struct vk_device *vk_dev,
+                       struct blob_reader *blob,
+                       uint32_t binary_version,
+                       const VkAllocationCallbacks* pAllocator,
+                       struct vk_shader **shader_out)
+{
+   struct nvk_device *dev = container_of(vk_dev, struct nvk_device, vk);
+   struct nvk_shader *shader;
+   VkResult result;
+
+   struct nak_shader_info info;
+   blob_copy_bytes(blob, &info, sizeof(info));
+
+   struct nvk_cbuf_map cbuf_map;
+   blob_copy_bytes(blob, &cbuf_map, sizeof(cbuf_map));
+
+   float min_sample_shading;
+   blob_copy_bytes(blob, &min_sample_shading, sizeof(min_sample_shading));
+
+   const uint32_t code_size = blob_read_uint32(blob);
+   const uint32_t data_size = blob_read_uint32(blob);
+   if (blob->overrun)
+      return vk_error(dev, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);
+
+   shader = vk_shader_zalloc(&dev->vk, &nvk_shader_ops, info.stage,
+                             pAllocator, sizeof(*shader));
+   if (shader == NULL)
+      return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   shader->info = info;
+   shader->cbuf_map = cbuf_map;
+   shader->min_sample_shading = min_sample_shading;
+   shader->code_size = code_size;
+   shader->data_size = data_size;
+
+   shader->code_ptr = malloc(code_size);
+   if (shader->code_ptr == NULL) {
+      nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
+      return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   shader->data_ptr = malloc(data_size);
+   if (shader->data_ptr == NULL) {
+      nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
+      return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   blob_copy_bytes(blob, (void *)shader->code_ptr, shader->code_size);
+   blob_copy_bytes(blob, (void *)shader->data_ptr, shader->data_size);
+   if (blob->overrun) {
+      nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
+      return vk_error(dev, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);
+   }
+
+   result = nvk_shader_upload(dev, shader);
+   if (result != VK_SUCCESS) {
+      nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
+      return result;
+   }
+
+   *shader_out = &shader->vk;
+
+   return VK_SUCCESS;
+}

 static bool
-nvk_shader_serialize(struct vk_pipeline_cache_object *object,
+nvk_shader_serialize(struct vk_device *vk_dev,
+                     const struct vk_shader *vk_shader,
                     struct blob *blob)
 {
-   struct nvk_shader *shader =
-      container_of(object, struct nvk_shader, base);
+   struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
+
+   /* We can't currently cache assmbly */
+   if (shader->nak != NULL && shader->nak->asm_str != NULL)
+      return false;

   blob_write_bytes(blob, &shader->info, sizeof(shader->info));
   blob_write_bytes(blob, &shader->cbuf_map, sizeof(shader->cbuf_map));
+   blob_write_bytes(blob, &shader->min_sample_shading,
+                    sizeof(shader->min_sample_shading));
+
   blob_write_uint32(blob, shader->code_size);
-   blob_write_bytes(blob, shader->code_ptr, shader->code_size);
   blob_write_uint32(blob, shader->data_size);
+   blob_write_bytes(blob, shader->code_ptr, shader->code_size);
   blob_write_bytes(blob, shader->data_ptr, shader->data_size);

+   return !blob->out_of_memory;
+}
+
+#define WRITE_STR(field, ...) ({                               \
+   memset(field, 0, sizeof(field));                            \
+   UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
+   assert(i > 0 && i < sizeof(field));                         \
+})
+
+static VkResult
+nvk_shader_get_executable_properties(
+   UNUSED struct vk_device *device,
+   const struct vk_shader *vk_shader,
+   uint32_t *executable_count,
+   VkPipelineExecutablePropertiesKHR *properties)
+{
+   struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
+   VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
+                          properties, executable_count);
+
+   vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
+      props->stages = mesa_to_vk_shader_stage(shader->info.stage);
+      props->subgroupSize = 32;
+      WRITE_STR(props->name, "%s",
+                _mesa_shader_stage_to_string(shader->info.stage));
+      WRITE_STR(props->description, "%s shader",
+                _mesa_shader_stage_to_string(shader->info.stage));
+   }
+
+   return vk_outarray_status(&out);
+}
+
+static VkResult
+nvk_shader_get_executable_statistics(
+   UNUSED struct vk_device *device,
+   const struct vk_shader *vk_shader,
+   uint32_t executable_index,
+   uint32_t *statistic_count,
+   VkPipelineExecutableStatisticKHR *statistics)
+{
+   struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
+   VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
+                          statistics, statistic_count);
+
+   assert(executable_index == 0);
+
+   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
+      WRITE_STR(stat->name, "Code Size");
+      WRITE_STR(stat->description,
+                "Size of the compiled shader binary, in bytes");
+      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+      stat->value.u64 = shader->code_size;
+   }
+
+   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
+      WRITE_STR(stat->name, "Number of GPRs");
+      WRITE_STR(stat->description, "Number of GPRs used by this pipeline");
+      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+      stat->value.u64 = shader->info.num_gprs;
+   }
+
+   vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
+      WRITE_STR(stat->name, "SLM Size");
+      WRITE_STR(stat->description,
+                "Size of shader local (scratch) memory, in bytes");
+      stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+      stat->value.u64 = shader->info.slm_size;
+   }
+
+   return vk_outarray_status(&out);
+}
+
+static bool
+write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
+              const char *data)
+{
+   ir->isText = VK_TRUE;
+
+   size_t data_len = strlen(data) + 1;
+
+   if (ir->pData == NULL) {
+      ir->dataSize = data_len;
+      return true;
+   }
+
+   strncpy(ir->pData, data, ir->dataSize);
+   if (ir->dataSize < data_len)
+      return false;
+
+   ir->dataSize = data_len;
   return true;
 }

-static struct vk_pipeline_cache_object *
-nvk_shader_deserialize(struct vk_pipeline_cache *cache,
-                       const void *key_data,
-                       size_t key_size,
-                       struct blob_reader *blob)
+static VkResult
+nvk_shader_get_executable_internal_representations(
+   UNUSED struct vk_device *device,
+   const struct vk_shader *vk_shader,
+   uint32_t executable_index,
+   uint32_t *internal_representation_count,
+   VkPipelineExecutableInternalRepresentationKHR *internal_representations)
 {
-   struct nvk_device *dev =
-      container_of(cache->base.device, struct nvk_device, vk);
-   struct nvk_shader *shader =
-      nvk_shader_init(dev, key_data, key_size);
+   struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
+   VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
+                          internal_representations,
+                          internal_representation_count);
+   bool incomplete_text = false;

-   if (!shader)
-      return NULL;
+   assert(executable_index == 0);

-   blob_copy_bytes(blob, &shader->info, sizeof(shader->info));
-   blob_copy_bytes(blob, &shader->cbuf_map, sizeof(shader->cbuf_map));
+   if (shader->nak != NULL && shader->nak->asm_str != NULL) {
+      vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
+         WRITE_STR(ir->name, "NAK assembly");
+         WRITE_STR(ir->description, "NAK assembly");
+         if (!write_ir_text(ir, shader->nak->asm_str))
+            incomplete_text = true;
+      }
+   }

-   shader->code_size = blob_read_uint32(blob);
-   void *code_ptr = malloc(shader->code_size);
-   if (!code_ptr)
-      goto fail;
-
-   blob_copy_bytes(blob, code_ptr, shader->code_size);
-   shader->code_ptr = code_ptr;
-
-   shader->data_size = blob_read_uint32(blob);
-   void *data_ptr = malloc(shader->data_size);
-   if (!data_ptr)
-      goto fail;
-
-   blob_copy_bytes(blob, data_ptr, shader->data_size);
-   shader->data_ptr = data_ptr;
-
-   return &shader->base;
-
-fail:
-   /* nvk_shader_destroy frees both shader and shader->xfb */
-   nvk_shader_destroy(cache->base.device, &shader->base);
-   return NULL;
+   return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
 }
+
+static const struct vk_shader_ops nvk_shader_ops = {
+   .destroy = nvk_shader_destroy,
+   .serialize = nvk_shader_serialize,
+   .get_executable_properties = nvk_shader_get_executable_properties,
+   .get_executable_statistics = nvk_shader_get_executable_statistics,
+   .get_executable_internal_representations =
+      nvk_shader_get_executable_internal_representations,
+};
+
+const struct vk_device_shader_ops nvk_device_shader_ops = {
+   .get_nir_options = nvk_get_nir_options,
+   .get_spirv_options = nvk_get_spirv_options,
+   .preprocess_nir = nvk_preprocess_nir,
+   .hash_graphics_state = nvk_hash_graphics_state,
+   .compile = nvk_compile_shaders,
+   .deserialize = nvk_deserialize_shader,
+   .cmd_set_dynamic_graphics_state = vk_cmd_set_dynamic_graphics_state,
+   .cmd_bind_shaders = nvk_cmd_bind_shaders,
+};
--- a/src/nouveau/vulkan/nvk_shader.h
+++ b/src/nouveau/vulkan/nvk_shader.h
@ -14,6 +14,8 @@
 #include "nir.h"
 #include "nouveau_bo.h"

+#include "vk_shader.h"
+
 struct nak_shader_bin;
 struct nvk_device;
 struct nvk_physical_device;
@ -57,11 +59,14 @@ struct nvk_cbuf_map {
 };

 struct nvk_shader {
-   struct vk_pipeline_cache_object base;
+   struct vk_shader vk;

   struct nak_shader_info info;
   struct nvk_cbuf_map cbuf_map;

+   /* Only relevant for fragment shaders */
+   float min_sample_shading;
+
   struct nak_shader_bin *nak;
   const void *code_ptr;
   uint32_t code_size;
@ -84,11 +89,7 @@ struct nvk_shader {
   uint64_t data_addr;
 };

-static inline bool
-nvk_shader_is_enabled(const struct nvk_shader *shader)
-{
-   return shader && shader->upload_size > 0;
-}
+extern const struct vk_device_shader_ops nvk_device_shader_ops;

 VkShaderStageFlags nvk_nak_stages(const struct nv_device_info *info);

@ -115,18 +116,6 @@ nvk_nir_lower_descriptors(nir_shader *nir,
                          uint32_t set_layout_count,
                          struct vk_descriptor_set_layout * const *set_layouts,
                          struct nvk_cbuf_map *cbuf_map_out);
-
-VkResult
-nvk_shader_stage_to_nir(struct nvk_device *dev,
-                        const VkPipelineShaderStageCreateInfo *sinfo,
-                        const struct vk_pipeline_robustness_state *rstate,
-                        struct vk_pipeline_cache *cache,
-                        void *mem_ctx, struct nir_shader **nir_out);
-
-void
-nvk_populate_fs_key(struct nak_fs_key *key,
-                    const struct vk_graphics_pipeline_state *state);
-
 void
 nvk_lower_nir(struct nvk_device *dev, nir_shader *nir,
              const struct vk_pipeline_robustness_state *rs,
@ -135,37 +124,9 @@ nvk_lower_nir(struct nvk_device *dev, nir_shader *nir,
              struct vk_descriptor_set_layout * const *set_layouts,
              struct nvk_cbuf_map *cbuf_map_out);

-VkResult
-nvk_compile_nir(struct nvk_device *dev, nir_shader *nir,
-                VkPipelineCreateFlagBits2KHR pipeline_flags,
-                const struct vk_pipeline_robustness_state *rstate,
-                const struct nak_fs_key *fs_key,
-                struct vk_pipeline_cache *cache,
-                struct nvk_shader *shader);
-
 VkResult
 nvk_shader_upload(struct nvk_device *dev, struct nvk_shader *shader);

-struct nvk_shader *
-nvk_shader_init(struct nvk_device *dev, const void *key_data, size_t key_size);
-
-extern const struct vk_pipeline_cache_object_ops nvk_shader_ops;
-
-void
-nvk_shader_finish(struct nvk_device *dev, struct nvk_shader *shader);
-
-void
-nvk_hash_shader(unsigned char *hash,
-                const VkPipelineShaderStageCreateInfo *sinfo,
-                const struct vk_pipeline_robustness_state *rstate,
-                bool is_multiview,
-                const struct vk_pipeline_layout *layout,
-                const struct nak_fs_key *fs_key);
-
-void
-nvk_shader_destroy(struct vk_device *dev,
-                   struct vk_pipeline_cache_object *object);
-
 /* Codegen wrappers.
 *
 * TODO: Delete these once NAK supports everything.