From 68885511d2c64951600307e8d0041c7de3d0bc64 Mon Sep 17 00:00:00 2001
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Date: Thu, 30 May 2024 11:20:32 +0300
Subject: [PATCH] anv: add support for indirect execution set

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31384>
---
 src/intel/genxml/meson.build   |   2 +
 src/intel/vulkan/anv_dgc_set.c | 336 +++++++++++++++++++++++++++++++++
 src/intel/vulkan/anv_genX.h    |   4 +
 src/intel/vulkan/anv_private.h |  43 +++++
 src/intel/vulkan/anv_util.c    |  30 +++
 src/intel/vulkan/genX_shader.c |  69 +++++++
 src/intel/vulkan/meson.build   |   1 +
 7 files changed, 485 insertions(+)
 create mode 100644 src/intel/vulkan/anv_dgc_set.c

diff --git a/src/intel/genxml/meson.build b/src/intel/genxml/meson.build
index 5b934a6c531..ccc38c66c84 100644
--- a/src/intel/genxml/meson.build
+++ b/src/intel/genxml/meson.build
@@ -51,6 +51,8 @@ genX_bits_included_symbols = [
   '3DSTATE_STENCIL_BUFFER::Surface Pitch',
   '3DSTATE_HIER_DEPTH_BUFFER::Surface Base Address',
   '3DSTATE_HIER_DEPTH_BUFFER::Surface Pitch',
+  '3DSTATE_DS',
+  '3DSTATE_HS',
   '3DSTATE_CLEAR_PARAMS',
   '3DSTATE_SO_BUFFER::Surface Base Address',
   '3DSTATE_SO_BUFFER::Stream Offset',
diff --git a/src/intel/vulkan/anv_dgc_set.c b/src/intel/vulkan/anv_dgc_set.c
new file mode 100644
index 00000000000..47c942fd4a6
--- /dev/null
+++ b/src/intel/vulkan/anv_dgc_set.c
@@ -0,0 +1,336 @@
+/*
+ * Copyright 2024 Intel Corporation
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+
+#include "genxml/genX_bits.h"
+
+#include "anv_private.h"
+
+enum anv_dgc_stage
+anv_vk_stage_to_dgc_stage(VkShaderStageFlags vk_stage)
+{
+   switch (vk_stage) {
+   case VK_SHADER_STAGE_VERTEX_BIT:
+      return ANV_DGC_STAGE_VERTEX;
+   case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
+      return ANV_DGC_STAGE_TESS_CTRL;
+   case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
+      return ANV_DGC_STAGE_TESS_EVAL;
+   case VK_SHADER_STAGE_GEOMETRY_BIT:
+      return ANV_DGC_STAGE_GEOMETRY;
+   case VK_SHADER_STAGE_FRAGMENT_BIT:
+      return ANV_DGC_STAGE_FRAGMENT;
+   case VK_SHADER_STAGE_TASK_BIT_EXT:
+      return ANV_DGC_STAGE_TASK;
+   case VK_SHADER_STAGE_MESH_BIT_EXT:
+      return ANV_DGC_STAGE_MESH;
+   case VK_SHADER_STAGE_COMPUTE_BIT:
+      return ANV_DGC_STAGE_COMPUTE;
+   default:
+      UNREACHABLE("Unhandled stage");
+   }
+}
+
+uint32_t
+anv_vk_stages_to_generated_stages(VkShaderStageFlags vk_stages)
+{
+   uint32_t gen_stages = 0;
+   anv_foreach_vk_stage(stage, vk_stages)
+      gen_stages |= BITFIELD_BIT(anv_vk_stage_to_dgc_stage(stage));
+   return gen_stages;
+}
+
+void
+anv_write_gfx_indirect_descriptor(struct anv_device *device,
+                                  struct anv_dgc_gfx_descriptor *descriptor,
+                                  struct anv_cmd_graphics_state *gfx)
+{
+   struct anv_dgc_push_stage_state empty_push = {};
+
+   if (intel_needs_workaround(device->info, 16011107343) &&
+       gfx->shaders[MESA_SHADER_TESS_CTRL] != NULL) {
+      memcpy(&descriptor->final_commands[descriptor->final_commands_size],
+             gfx->dyn_state.packed.hs,
+             _3DSTATE_HS_length(device->info) * 4);
+      descriptor->final_commands_size += _3DSTATE_HS_length(device->info) * 4;
+   }
+
+   if (intel_needs_workaround(device->info, 22018402687) &&
+       gfx->shaders[MESA_SHADER_TESS_EVAL] != NULL) {
+      memcpy(&descriptor->final_commands[descriptor->final_commands_size],
+             gfx->dyn_state.packed.ds,
+             _3DSTATE_DS_length(device->info) * 4);
+      descriptor->final_commands_size += _3DSTATE_DS_length(device->info) * 4;
+   }
+   assert(descriptor->final_commands_size <= sizeof(descriptor->final_commands));
+
+   anv_foreach_vk_stage(vk_stage, ANV_GRAPHICS_STAGE_BITS) {
+      enum anv_dgc_stage gen_stage = anv_vk_stage_to_dgc_stage(vk_stage);
+      enum mesa_shader_stage stage = vk_to_mesa_shader_stage(vk_stage);
+
+      if ((gfx->active_stages & vk_stage) == 0) {
+         descriptor->push_constants.stages[gen_stage] = empty_push;
+         continue;
+      }
+
+      const struct anv_pipeline_bind_map *bind_map =
+         &gfx->shaders[stage]->bind_map;
+      if ((bind_map->push_ranges[0].length == 0 ||
+           bind_map->push_ranges[0].set != ANV_DESCRIPTOR_SET_PUSH_CONSTANTS) &&
+          bind_map->inline_dwords_count == 0) {
+         descriptor->push_constants.stages[gen_stage] = empty_push;
+         continue;
+      }
+
+      if (stage == MESA_SHADER_MESH &&
+          intel_needs_workaround(device->info, 18019110168)) {
+         const struct brw_mesh_prog_data *mesh_prog_data = get_gfx_mesh_prog_data(gfx);
+         descriptor->wa_18019110168_remapping_table_offset =
+            gfx->shaders[MESA_SHADER_MESH]->kernel.offset +
+            mesh_prog_data->wa_18019110168_mapping_offset;
+      }
+
+      if (stage == MESA_SHADER_MESH || stage == MESA_SHADER_TASK) {
+         descriptor->push_constants.stages[gen_stage].bindless.inline_dwords_count =
+            bind_map->inline_dwords_count;
+         assert(sizeof(bind_map->inline_dwords) ==
+                sizeof(descriptor->push_constants.stages[gen_stage].bindless.inline_dwords));
+         memcpy(descriptor->push_constants.stages[gen_stage].bindless.inline_dwords,
+                bind_map->inline_dwords, sizeof(bind_map->inline_dwords));
+      } else {
+         for (uint32_t i = 0; i < ARRAY_SIZE(bind_map->push_ranges); i++) {
+            const struct anv_push_range *range = &bind_map->push_ranges[i];
+            if (range->length == 0)
+               break;
+
+            /* We should have compiler all the indirectly bindable shaders in
+             * such a way that it's the only types of push constants we should
+             * see.
+             */
+            assert(range->set == ANV_DESCRIPTOR_SET_PUSH_CONSTANTS ||
+                   range->set == ANV_DESCRIPTOR_SET_DESCRIPTORS ||
+                   range->set == ANV_DESCRIPTOR_SET_NULL ||
+                   range->set == ANV_DESCRIPTOR_SET_PER_PRIM_PADDING);
+
+            struct anv_dgc_push_stage_slot *slot =
+               &descriptor->push_constants.stages[gen_stage].legacy.slots[i];
+
+            slot->push_data_size = 32 * range->length;
+
+            slot->push_data_offset = 32 * range->start;
+            slot->type = ANV_DGC_PUSH_SLOT_TYPE_PUSH_CONSTANTS;
+            descriptor->push_constants.stages[gen_stage].legacy.n_slots++;
+         }
+      }
+      descriptor->push_constants.active_stages |= 1u << gen_stage;
+   }
+}
+
+static void
+write_cs_set_entry(struct anv_device *device,
+                   struct anv_indirect_execution_set *indirect_set,
+                   uint32_t entry, struct anv_shader *shader)
+{
+   struct anv_dgc_cs_descriptor descriptor;
+   anv_genX(device->info, write_cs_descriptor)(&descriptor, device, shader);
+
+   const struct brw_cs_prog_data *prog_data =
+      brw_cs_prog_data_const(shader->prog_data);
+
+   if (device->info->verx10 < 125)
+      anv_reloc_list_append(&indirect_set->relocs, &shader->relocs);
+
+   memcpy(indirect_set->bo->map + entry * indirect_set->stride,
+          &descriptor, sizeof(descriptor));
+
+   indirect_set->uses_systolic |= prog_data->uses_systolic;
+   indirect_set->max_scratch = MAX2(indirect_set->max_scratch,
+                                    prog_data->base.total_scratch);
+   indirect_set->max_ray_queries = MAX2(indirect_set->max_ray_queries,
+                                        shader->prog_data->ray_queries);
+}
+
+static void
+write_rt_set_entry(struct anv_indirect_execution_set *indirect_set,
+                   uint32_t entry, struct vk_pipeline *pipeline)
+{
+   indirect_set->max_scratch = MAX2(indirect_set->max_scratch,
+                                    vk_pipeline_get_rt_scratch_size(pipeline));
+   indirect_set->max_ray_queries = MAX2(indirect_set->max_ray_queries,
+                                        vk_pipeline_get_rt_ray_queries(pipeline));
+}
+
+VkResult anv_CreateIndirectExecutionSetEXT(
+   VkDevice                                    _device,
+   const VkIndirectExecutionSetCreateInfoEXT*  pCreateInfo,
+   const VkAllocationCallbacks*                pAllocator,
+   VkIndirectExecutionSetEXT*                  pIndirectExecutionSet)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+
+   struct anv_indirect_execution_set *indirect_set =
+      vk_object_zalloc(&device->vk, pAllocator,
+                       sizeof(struct anv_indirect_execution_set),
+                       VK_OBJECT_TYPE_INDIRECT_EXECUTION_SET_EXT);
+   if (indirect_set == NULL)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   VkResult result =
+      anv_reloc_list_init(&indirect_set->relocs,
+                          pAllocator ? pAllocator : &device->vk.alloc,
+                          device->physical->uses_relocs);
+   if (result != VK_SUCCESS)
+      goto fail_object;
+
+   struct vk_pipeline *vk_pipeline = NULL;
+   struct vk_shader *vk_shader = NULL;
+   VkPipelineBindPoint bind_point;
+   uint32_t entry_count;
+   if (pCreateInfo->type == VK_INDIRECT_EXECUTION_SET_INFO_TYPE_PIPELINES_EXT) {
+      entry_count = pCreateInfo->info.pPipelineInfo->maxPipelineCount;
+      vk_pipeline =
+         vk_pipeline_from_handle(pCreateInfo->info.pPipelineInfo->initialPipeline);
+      bind_point = vk_pipeline->bind_point;
+      if (vk_pipeline->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE)
+         vk_shader = vk_pipeline_get_shader(vk_pipeline, MESA_SHADER_COMPUTE);
+   } else {
+      entry_count = pCreateInfo->info.pShaderInfo->maxShaderCount;
+      vk_shader =
+         vk_shader_from_handle(pCreateInfo->info.pShaderInfo->pInitialShaders[0]);
+      bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
+   }
+
+   enum anv_bo_alloc_flags alloc_flags =
+      ANV_BO_ALLOC_CAPTURE |
+      ANV_BO_ALLOC_MAPPED |
+      ANV_BO_ALLOC_HOST_CACHED_COHERENT;
+
+   switch (bind_point) {
+   case VK_PIPELINE_BIND_POINT_COMPUTE: {
+      struct anv_shader *shader = container_of(vk_shader, struct anv_shader, vk);
+
+      /* Alignment required for
+       * MEDIA_INTERFACE_DESCRIPTOR_LOAD::InterfaceDescriptorDataStartAddress
+       */
+      STATIC_ASSERT(sizeof(struct anv_dgc_cs_descriptor) % 64 == 0);
+
+      indirect_set->stride = sizeof(struct anv_dgc_cs_descriptor);
+
+      uint32_t size = align(entry_count * indirect_set->stride, 4096);
+
+      /* Generations up to Gfx12.0 have a structures describing the compute
+       * shader that needs to live in the dynamic state heap.
+       */
+      if (device->info->verx10 <= 120)
+         alloc_flags |= ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL;
+
+      result = anv_device_alloc_bo(device, "indirect-exec-set", size,
+                                   alloc_flags, 0 /* explicit_address */,
+                                   &indirect_set->bo);
+      if (result != VK_SUCCESS)
+         goto fail_relocs;
+
+      indirect_set->bind_map = anv_pipeline_bind_map_clone(
+         device, pAllocator, &shader->bind_map);
+      if (indirect_set->bind_map == NULL) {
+         result = vk_errorf(device, VK_ERROR_OUT_OF_HOST_MEMORY,
+                            "Fail to allocate bind map");
+         goto fail_bo;
+      }
+
+      write_cs_set_entry(device, indirect_set, 0, shader);
+      break;
+   }
+
+   case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: {
+      VK_FROM_HANDLE(vk_pipeline, pipeline,
+                     pCreateInfo->info.pPipelineInfo->initialPipeline);
+      write_rt_set_entry(indirect_set, 0, pipeline);
+      break;
+   }
+
+   default:
+      UNREACHABLE("Unsupported indirect pipeline type");
+   }
+
+   *pIndirectExecutionSet = anv_indirect_execution_set_to_handle(indirect_set);
+
+   return VK_SUCCESS;
+
+ fail_bo:
+   anv_device_release_bo(device, indirect_set->bo);
+ fail_relocs:
+   anv_reloc_list_finish(&indirect_set->relocs);
+ fail_object:
+   vk_object_free(&device->vk, pAllocator, indirect_set);
+   return result;
+}
+
+void anv_DestroyIndirectExecutionSetEXT(
+   VkDevice                                    _device,
+   VkIndirectExecutionSetEXT                   indirectExecutionSet,
+   const VkAllocationCallbacks*                pAllocator)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   ANV_FROM_HANDLE(anv_indirect_execution_set, indirect_set, indirectExecutionSet);
+
+   vk_free2(&device->vk.alloc, pAllocator, indirect_set->bind_map);
+   anv_reloc_list_finish(&indirect_set->relocs);
+   if (indirect_set->bo)
+      anv_device_release_bo(device, indirect_set->bo);
+   vk_object_free(&device->vk, pAllocator, indirect_set);
+}
+
+void anv_UpdateIndirectExecutionSetPipelineEXT(
+   VkDevice                                    _device,
+   VkIndirectExecutionSetEXT                   indirectExecutionSet,
+   uint32_t                                    executionSetWriteCount,
+   const VkWriteIndirectExecutionSetPipelineEXT* pExecutionSetWrites)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   ANV_FROM_HANDLE(anv_indirect_execution_set, indirect_set, indirectExecutionSet);
+
+   for (uint32_t i = 0; i < executionSetWriteCount; i++) {
+      VK_FROM_HANDLE(vk_pipeline, pipeline, pExecutionSetWrites[i].pipeline);
+
+      switch (pipeline->bind_point) {
+      case VK_PIPELINE_BIND_POINT_COMPUTE: {
+         struct vk_shader *vk_shader =
+            vk_pipeline_get_shader(pipeline, MESA_SHADER_COMPUTE);
+         struct anv_shader *shader = container_of(vk_shader, struct anv_shader, vk);
+         write_cs_set_entry(device, indirect_set,
+                            pExecutionSetWrites[i].index, shader);
+         break;
+      }
+
+      case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
+         write_rt_set_entry(indirect_set, pExecutionSetWrites[i].index, pipeline);
+         break;
+
+      default:
+         UNREACHABLE("Unsupported indirect pipeline type");
+      }
+   }
+}
+
+void anv_UpdateIndirectExecutionSetShaderEXT(
+   VkDevice                                    _device,
+   VkIndirectExecutionSetEXT                   indirectExecutionSet,
+   uint32_t                                    executionSetWriteCount,
+   const VkWriteIndirectExecutionSetShaderEXT* pExecutionSetWrites)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   ANV_FROM_HANDLE(anv_indirect_execution_set, indirect_set, indirectExecutionSet);
+
+   for (uint32_t i = 0; i < executionSetWriteCount; i++) {
+      VK_FROM_HANDLE(vk_shader, vk_shader, pExecutionSetWrites[i].shader);
+      assert(vk_shader->stage == MESA_SHADER_COMPUTE);
+      struct anv_shader *shader = container_of(vk_shader, struct anv_shader, vk);
+      write_cs_set_entry(device, indirect_set,
+                         pExecutionSetWrites[i].index, shader);
+   }
+}
diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index 4421b2693e6..6ebc4f83b71 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -548,6 +548,10 @@ void genX(write_rt_shader_group)(struct anv_device *device,
                                  uint32_t shader_count,
                                  void *output);
 
+void genX(write_cs_descriptor)(struct anv_dgc_cs_descriptor *desc,
+                               struct anv_device *device,
+                               struct anv_shader *shader);
+
 uint32_t genX(shader_cmd_size)(struct anv_device *device,
                                mesa_shader_stage stage);
 
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index cfc8628b67e..abca42b8e07 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -54,6 +54,7 @@
 #include "compiler/brw/brw_rt.h"
 #include "ds/intel_driver_ds.h"
 #include "dev/virtio/intel_virtio.h"
+#include "shaders/libintel_shaders.h"
 #include "util/bitset.h"
 #include "util/bitscan.h"
 #include "util/cache_ops.h"
@@ -1116,6 +1117,11 @@ struct anv_pipeline_bind_map {
    uint8_t                                      inferred_behavior;
 };
 
+struct anv_pipeline_bind_map *
+anv_pipeline_bind_map_clone(struct anv_device *device,
+                            const VkAllocationCallbacks *alloc,
+                            const struct anv_pipeline_bind_map *src);
+
 struct anv_push_descriptor_info {
    /* A bitfield of descriptors used. */
    uint32_t used_descriptors;
@@ -5193,6 +5199,10 @@ struct anv_event {
 
 #define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
 
+#define ANV_VK_STAGE_MASK (ANV_GRAPHICS_STAGE_BITS |    \
+                           ANV_RT_STAGE_BITS |          \
+                           VK_SHADER_STAGE_COMPUTE_BIT)
+
 #define anv_foreach_stage(stage, stage_bits)                         \
    u_foreach_bit(stage, (stage_bits & ANV_STAGE_MASK))
 
@@ -6539,6 +6549,36 @@ static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_poo
           pool->khr_perf_preamble_stride * pass;
 }
 
+struct anv_indirect_execution_set {
+   struct vk_object_base base;
+
+   struct anv_pipeline_bind_map *bind_map;
+
+   /** List of all the scratch buffers on < Gfx12.5 */
+   struct anv_reloc_list relocs;
+
+   struct anv_bo *bo;
+
+   bool uses_systolic;
+
+   uint32_t stride;
+
+   uint32_t max_final_commands_size;
+
+   /** Maximum scratch space for shaders */
+   uint32_t max_scratch;
+   /** Maximum number of ray queries used by shaders */
+   uint32_t max_ray_queries;
+};
+
+void anv_write_gfx_indirect_descriptor(struct anv_device *device,
+                                       struct anv_dgc_gfx_descriptor *descriptor,
+                                       struct anv_cmd_graphics_state *gfx);
+
+enum anv_dgc_stage anv_vk_stage_to_dgc_stage(VkShaderStageFlags vk_stage);
+
+uint32_t anv_vk_stages_to_generated_stages(VkShaderStageFlags vk_stages);
+
 struct anv_vid_mem {
    struct anv_device_memory *mem;
    VkDeviceSize       offset;
@@ -6949,6 +6989,9 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,
 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_video_session, vk.base,
                                VkVideoSessionKHR,
                                VK_OBJECT_TYPE_VIDEO_SESSION_KHR)
+VK_DEFINE_NONDISP_HANDLE_CASTS(anv_indirect_execution_set, base,
+                               VkIndirectExecutionSetEXT,
+                               VK_OBJECT_TYPE_INDIRECT_EXECUTION_SET_EXT)
 
 #define anv_genX(devinfo, thing) ({             \
    __typeof(&gfx9_##thing) genX_thing;          \
diff --git a/src/intel/vulkan/anv_util.c b/src/intel/vulkan/anv_util.c
index 583baf4c6d3..52964667595 100644
--- a/src/intel/vulkan/anv_util.c
+++ b/src/intel/vulkan/anv_util.c
@@ -574,3 +574,33 @@ anv_device_finish_rt_shaders(struct anv_device *device)
    if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
       return;
 }
+
+struct anv_pipeline_bind_map *
+anv_pipeline_bind_map_clone(struct anv_device *device,
+                            const VkAllocationCallbacks *alloc,
+                            const struct anv_pipeline_bind_map *src)
+{
+   VK_MULTIALLOC(ma);
+   VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_bind_map, bind_map, 1);
+   VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, surfaces, src->surface_count);
+   VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, samplers, src->sampler_count);
+   VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_embedded_sampler_binding, embedded_samplers, src->embedded_sampler_count);
+
+   if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, alloc,
+                              VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
+      return NULL;
+
+   memcpy(bind_map, src, sizeof(*src));
+
+   memcpy(surfaces, src->surface_to_descriptor,
+          sizeof(*surfaces) * src->surface_count);
+   bind_map->surface_to_descriptor = surfaces;
+   memcpy(samplers, src->sampler_to_descriptor,
+          sizeof(*samplers) * src->sampler_count);
+   bind_map->sampler_to_descriptor = samplers;
+   memcpy(embedded_samplers, src->embedded_sampler_to_binding,
+          sizeof(*embedded_samplers) * src->embedded_sampler_count);
+   bind_map->embedded_sampler_to_binding = embedded_samplers;
+
+   return bind_map;
+}
diff --git a/src/intel/vulkan/genX_shader.c b/src/intel/vulkan/genX_shader.c
index 6cb46bc7acf..410e5aa244d 100644
--- a/src/intel/vulkan/genX_shader.c
+++ b/src/intel/vulkan/genX_shader.c
@@ -1272,6 +1272,75 @@ emit_cs_shader(struct anv_batch *batch,
 #endif
 }
 
+void
+genX(write_cs_descriptor)(struct anv_dgc_cs_descriptor *desc,
+                          struct anv_device *device,
+                          struct anv_shader *shader)
+{
+   const struct anv_pipeline_bind_map *bind_map = &shader->bind_map;
+   const struct anv_push_range *push_range = &bind_map->push_ranges[0];
+
+   *desc = (struct anv_dgc_cs_descriptor) {
+      .push_data_offset = 32 * (push_range->set == ANV_DESCRIPTOR_SET_PUSH_CONSTANTS ?
+                                push_range->start : 0),
+   };
+
+   const struct brw_cs_prog_data *prog_data =
+      brw_cs_prog_data_const(shader->prog_data);
+   const struct intel_cs_dispatch_info dispatch =
+      brw_cs_get_dispatch_info(device->info, prog_data, NULL);
+
+   desc->right_mask = dispatch.right_mask;
+   desc->threads = dispatch.threads;
+   desc->simd_size = dispatch.simd_size;
+
+#if GFX_VERx10 >= 125
+   GENX(COMPUTE_WALKER_pack)(NULL, desc->gfx125.compute_walker,
+                             &(struct GENX(COMPUTE_WALKER)) {
+                                GENX(COMPUTE_WALKER_header),
+                                .body = {
+                                   .PostSync.MOCS = anv_mocs(device, NULL, 0),
+                                },
+                             });
+
+   assert(sizeof(desc->gfx125.compute_walker) >
+          sizeof(shader->cs.gfx125.compute_walker_body));
+   for (uint32_t i = 0; i < ARRAY_SIZE(shader->cs.gfx125.compute_walker_body); i++)
+      desc->gfx125.compute_walker[1 + i] |= shader->cs.gfx125.compute_walker_body[i];
+   desc->gfx125.inline_dwords_count = bind_map->inline_dwords_count;
+   assert(sizeof(desc->gfx125.inline_dwords) ==
+          sizeof(bind_map->inline_dwords));
+   memcpy(desc->gfx125.inline_dwords,
+          bind_map->inline_dwords,
+          sizeof(bind_map->inline_dwords));
+
+#else
+   assert(sizeof(desc->gfx9.media_vfe_state) ==
+          shader->cs.gfx9.vfe.len * 4);
+   assert(sizeof(desc->gfx9.interface_descriptor_data) ==
+          sizeof(shader->cs.gfx9.idd));
+
+   memcpy(desc->gfx9.media_vfe_state,
+          &shader->cmd_data[shader->cs.gfx9.vfe.offset],
+          shader->cs.gfx9.vfe.len * 4);
+   memcpy(desc->gfx9.interface_descriptor_data,
+          shader->cs.gfx9.idd,
+          sizeof(desc->gfx9.interface_descriptor_data));
+
+   desc->gfx9.n_threads = dispatch.threads;
+   desc->gfx9.cross_thread_push_size = prog_data->push.cross_thread.size;
+   desc->gfx9.per_thread_push_size = prog_data->push.per_thread.size;
+   desc->gfx9.subgroup_id_offset =
+      offsetof(struct anv_push_constants, cs.subgroup_id) -
+      (32 * push_range->start + prog_data->push.cross_thread.size);
+
+   GENX(GPGPU_WALKER_pack)(NULL, desc->gfx9.gpgpu_walker,
+                           &(struct GENX(GPGPU_WALKER)) {
+                                GENX(GPGPU_WALKER_header),
+                           });
+#endif
+}
+
 void
 genX(init_instructions)(struct anv_physical_device *device)
 {
diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build
index b4ee5e2e775..1b1adbc203b 100644
--- a/src/intel/vulkan/meson.build
+++ b/src/intel/vulkan/meson.build
@@ -160,6 +160,7 @@ libanv_files = files(
   'anv_cmd_buffer.c',
   'anv_descriptor_set.c',
   'anv_device.c',
+  'anv_dgc_set.c',
   'anv_embedded_sampler.c',
   'anv_event.c',
   'anv_formats.c',