From 68885511d2c64951600307e8d0041c7de3d0bc64 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 30 May 2024 11:20:32 +0300 Subject: [PATCH] anv: add support for indirect execution set Signed-off-by: Lionel Landwerlin Acked-by: Alyssa Rosenzweig Part-of: --- src/intel/genxml/meson.build | 2 + src/intel/vulkan/anv_dgc_set.c | 336 +++++++++++++++++++++++++++++++++ src/intel/vulkan/anv_genX.h | 4 + src/intel/vulkan/anv_private.h | 43 +++++ src/intel/vulkan/anv_util.c | 30 +++ src/intel/vulkan/genX_shader.c | 69 +++++++ src/intel/vulkan/meson.build | 1 + 7 files changed, 485 insertions(+) create mode 100644 src/intel/vulkan/anv_dgc_set.c diff --git a/src/intel/genxml/meson.build b/src/intel/genxml/meson.build index 5b934a6c531..ccc38c66c84 100644 --- a/src/intel/genxml/meson.build +++ b/src/intel/genxml/meson.build @@ -51,6 +51,8 @@ genX_bits_included_symbols = [ '3DSTATE_STENCIL_BUFFER::Surface Pitch', '3DSTATE_HIER_DEPTH_BUFFER::Surface Base Address', '3DSTATE_HIER_DEPTH_BUFFER::Surface Pitch', + '3DSTATE_DS', + '3DSTATE_HS', '3DSTATE_CLEAR_PARAMS', '3DSTATE_SO_BUFFER::Surface Base Address', '3DSTATE_SO_BUFFER::Stream Offset', diff --git a/src/intel/vulkan/anv_dgc_set.c b/src/intel/vulkan/anv_dgc_set.c new file mode 100644 index 00000000000..47c942fd4a6 --- /dev/null +++ b/src/intel/vulkan/anv_dgc_set.c @@ -0,0 +1,336 @@ +/* + * Copyright 2024 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include +#include + +#include "genxml/genX_bits.h" + +#include "anv_private.h" + +enum anv_dgc_stage +anv_vk_stage_to_dgc_stage(VkShaderStageFlags vk_stage) +{ + switch (vk_stage) { + case VK_SHADER_STAGE_VERTEX_BIT: + return ANV_DGC_STAGE_VERTEX; + case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: + return ANV_DGC_STAGE_TESS_CTRL; + case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: + return ANV_DGC_STAGE_TESS_EVAL; + case VK_SHADER_STAGE_GEOMETRY_BIT: + return ANV_DGC_STAGE_GEOMETRY; + case VK_SHADER_STAGE_FRAGMENT_BIT: + return ANV_DGC_STAGE_FRAGMENT; + case VK_SHADER_STAGE_TASK_BIT_EXT: + return ANV_DGC_STAGE_TASK; + case VK_SHADER_STAGE_MESH_BIT_EXT: + return ANV_DGC_STAGE_MESH; + case VK_SHADER_STAGE_COMPUTE_BIT: + return ANV_DGC_STAGE_COMPUTE; + default: + UNREACHABLE("Unhandled stage"); + } +} + +uint32_t +anv_vk_stages_to_generated_stages(VkShaderStageFlags vk_stages) +{ + uint32_t gen_stages = 0; + anv_foreach_vk_stage(stage, vk_stages) + gen_stages |= BITFIELD_BIT(anv_vk_stage_to_dgc_stage(stage)); + return gen_stages; +} + +void +anv_write_gfx_indirect_descriptor(struct anv_device *device, + struct anv_dgc_gfx_descriptor *descriptor, + struct anv_cmd_graphics_state *gfx) +{ + struct anv_dgc_push_stage_state empty_push = {}; + + if (intel_needs_workaround(device->info, 16011107343) && + gfx->shaders[MESA_SHADER_TESS_CTRL] != NULL) { + memcpy(&descriptor->final_commands[descriptor->final_commands_size], + gfx->dyn_state.packed.hs, + _3DSTATE_HS_length(device->info) * 4); + descriptor->final_commands_size += _3DSTATE_HS_length(device->info) * 4; + } + + if (intel_needs_workaround(device->info, 22018402687) && + gfx->shaders[MESA_SHADER_TESS_EVAL] != NULL) { + memcpy(&descriptor->final_commands[descriptor->final_commands_size], + gfx->dyn_state.packed.ds, + _3DSTATE_DS_length(device->info) * 4); + descriptor->final_commands_size += _3DSTATE_DS_length(device->info) * 4; + } + assert(descriptor->final_commands_size <= sizeof(descriptor->final_commands)); + + anv_foreach_vk_stage(vk_stage, ANV_GRAPHICS_STAGE_BITS) { + enum anv_dgc_stage gen_stage = anv_vk_stage_to_dgc_stage(vk_stage); + enum mesa_shader_stage stage = vk_to_mesa_shader_stage(vk_stage); + + if ((gfx->active_stages & vk_stage) == 0) { + descriptor->push_constants.stages[gen_stage] = empty_push; + continue; + } + + const struct anv_pipeline_bind_map *bind_map = + &gfx->shaders[stage]->bind_map; + if ((bind_map->push_ranges[0].length == 0 || + bind_map->push_ranges[0].set != ANV_DESCRIPTOR_SET_PUSH_CONSTANTS) && + bind_map->inline_dwords_count == 0) { + descriptor->push_constants.stages[gen_stage] = empty_push; + continue; + } + + if (stage == MESA_SHADER_MESH && + intel_needs_workaround(device->info, 18019110168)) { + const struct brw_mesh_prog_data *mesh_prog_data = get_gfx_mesh_prog_data(gfx); + descriptor->wa_18019110168_remapping_table_offset = + gfx->shaders[MESA_SHADER_MESH]->kernel.offset + + mesh_prog_data->wa_18019110168_mapping_offset; + } + + if (stage == MESA_SHADER_MESH || stage == MESA_SHADER_TASK) { + descriptor->push_constants.stages[gen_stage].bindless.inline_dwords_count = + bind_map->inline_dwords_count; + assert(sizeof(bind_map->inline_dwords) == + sizeof(descriptor->push_constants.stages[gen_stage].bindless.inline_dwords)); + memcpy(descriptor->push_constants.stages[gen_stage].bindless.inline_dwords, + bind_map->inline_dwords, sizeof(bind_map->inline_dwords)); + } else { + for (uint32_t i = 0; i < ARRAY_SIZE(bind_map->push_ranges); i++) { + const struct anv_push_range *range = &bind_map->push_ranges[i]; + if (range->length == 0) + break; + + /* We should have compiler all the indirectly bindable shaders in + * such a way that it's the only types of push constants we should + * see. + */ + assert(range->set == ANV_DESCRIPTOR_SET_PUSH_CONSTANTS || + range->set == ANV_DESCRIPTOR_SET_DESCRIPTORS || + range->set == ANV_DESCRIPTOR_SET_NULL || + range->set == ANV_DESCRIPTOR_SET_PER_PRIM_PADDING); + + struct anv_dgc_push_stage_slot *slot = + &descriptor->push_constants.stages[gen_stage].legacy.slots[i]; + + slot->push_data_size = 32 * range->length; + + slot->push_data_offset = 32 * range->start; + slot->type = ANV_DGC_PUSH_SLOT_TYPE_PUSH_CONSTANTS; + descriptor->push_constants.stages[gen_stage].legacy.n_slots++; + } + } + descriptor->push_constants.active_stages |= 1u << gen_stage; + } +} + +static void +write_cs_set_entry(struct anv_device *device, + struct anv_indirect_execution_set *indirect_set, + uint32_t entry, struct anv_shader *shader) +{ + struct anv_dgc_cs_descriptor descriptor; + anv_genX(device->info, write_cs_descriptor)(&descriptor, device, shader); + + const struct brw_cs_prog_data *prog_data = + brw_cs_prog_data_const(shader->prog_data); + + if (device->info->verx10 < 125) + anv_reloc_list_append(&indirect_set->relocs, &shader->relocs); + + memcpy(indirect_set->bo->map + entry * indirect_set->stride, + &descriptor, sizeof(descriptor)); + + indirect_set->uses_systolic |= prog_data->uses_systolic; + indirect_set->max_scratch = MAX2(indirect_set->max_scratch, + prog_data->base.total_scratch); + indirect_set->max_ray_queries = MAX2(indirect_set->max_ray_queries, + shader->prog_data->ray_queries); +} + +static void +write_rt_set_entry(struct anv_indirect_execution_set *indirect_set, + uint32_t entry, struct vk_pipeline *pipeline) +{ + indirect_set->max_scratch = MAX2(indirect_set->max_scratch, + vk_pipeline_get_rt_scratch_size(pipeline)); + indirect_set->max_ray_queries = MAX2(indirect_set->max_ray_queries, + vk_pipeline_get_rt_ray_queries(pipeline)); +} + +VkResult anv_CreateIndirectExecutionSetEXT( + VkDevice _device, + const VkIndirectExecutionSetCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkIndirectExecutionSetEXT* pIndirectExecutionSet) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + struct anv_indirect_execution_set *indirect_set = + vk_object_zalloc(&device->vk, pAllocator, + sizeof(struct anv_indirect_execution_set), + VK_OBJECT_TYPE_INDIRECT_EXECUTION_SET_EXT); + if (indirect_set == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + VkResult result = + anv_reloc_list_init(&indirect_set->relocs, + pAllocator ? pAllocator : &device->vk.alloc, + device->physical->uses_relocs); + if (result != VK_SUCCESS) + goto fail_object; + + struct vk_pipeline *vk_pipeline = NULL; + struct vk_shader *vk_shader = NULL; + VkPipelineBindPoint bind_point; + uint32_t entry_count; + if (pCreateInfo->type == VK_INDIRECT_EXECUTION_SET_INFO_TYPE_PIPELINES_EXT) { + entry_count = pCreateInfo->info.pPipelineInfo->maxPipelineCount; + vk_pipeline = + vk_pipeline_from_handle(pCreateInfo->info.pPipelineInfo->initialPipeline); + bind_point = vk_pipeline->bind_point; + if (vk_pipeline->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) + vk_shader = vk_pipeline_get_shader(vk_pipeline, MESA_SHADER_COMPUTE); + } else { + entry_count = pCreateInfo->info.pShaderInfo->maxShaderCount; + vk_shader = + vk_shader_from_handle(pCreateInfo->info.pShaderInfo->pInitialShaders[0]); + bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; + } + + enum anv_bo_alloc_flags alloc_flags = + ANV_BO_ALLOC_CAPTURE | + ANV_BO_ALLOC_MAPPED | + ANV_BO_ALLOC_HOST_CACHED_COHERENT; + + switch (bind_point) { + case VK_PIPELINE_BIND_POINT_COMPUTE: { + struct anv_shader *shader = container_of(vk_shader, struct anv_shader, vk); + + /* Alignment required for + * MEDIA_INTERFACE_DESCRIPTOR_LOAD::InterfaceDescriptorDataStartAddress + */ + STATIC_ASSERT(sizeof(struct anv_dgc_cs_descriptor) % 64 == 0); + + indirect_set->stride = sizeof(struct anv_dgc_cs_descriptor); + + uint32_t size = align(entry_count * indirect_set->stride, 4096); + + /* Generations up to Gfx12.0 have a structures describing the compute + * shader that needs to live in the dynamic state heap. + */ + if (device->info->verx10 <= 120) + alloc_flags |= ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL; + + result = anv_device_alloc_bo(device, "indirect-exec-set", size, + alloc_flags, 0 /* explicit_address */, + &indirect_set->bo); + if (result != VK_SUCCESS) + goto fail_relocs; + + indirect_set->bind_map = anv_pipeline_bind_map_clone( + device, pAllocator, &shader->bind_map); + if (indirect_set->bind_map == NULL) { + result = vk_errorf(device, VK_ERROR_OUT_OF_HOST_MEMORY, + "Fail to allocate bind map"); + goto fail_bo; + } + + write_cs_set_entry(device, indirect_set, 0, shader); + break; + } + + case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: { + VK_FROM_HANDLE(vk_pipeline, pipeline, + pCreateInfo->info.pPipelineInfo->initialPipeline); + write_rt_set_entry(indirect_set, 0, pipeline); + break; + } + + default: + UNREACHABLE("Unsupported indirect pipeline type"); + } + + *pIndirectExecutionSet = anv_indirect_execution_set_to_handle(indirect_set); + + return VK_SUCCESS; + + fail_bo: + anv_device_release_bo(device, indirect_set->bo); + fail_relocs: + anv_reloc_list_finish(&indirect_set->relocs); + fail_object: + vk_object_free(&device->vk, pAllocator, indirect_set); + return result; +} + +void anv_DestroyIndirectExecutionSetEXT( + VkDevice _device, + VkIndirectExecutionSetEXT indirectExecutionSet, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_indirect_execution_set, indirect_set, indirectExecutionSet); + + vk_free2(&device->vk.alloc, pAllocator, indirect_set->bind_map); + anv_reloc_list_finish(&indirect_set->relocs); + if (indirect_set->bo) + anv_device_release_bo(device, indirect_set->bo); + vk_object_free(&device->vk, pAllocator, indirect_set); +} + +void anv_UpdateIndirectExecutionSetPipelineEXT( + VkDevice _device, + VkIndirectExecutionSetEXT indirectExecutionSet, + uint32_t executionSetWriteCount, + const VkWriteIndirectExecutionSetPipelineEXT* pExecutionSetWrites) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_indirect_execution_set, indirect_set, indirectExecutionSet); + + for (uint32_t i = 0; i < executionSetWriteCount; i++) { + VK_FROM_HANDLE(vk_pipeline, pipeline, pExecutionSetWrites[i].pipeline); + + switch (pipeline->bind_point) { + case VK_PIPELINE_BIND_POINT_COMPUTE: { + struct vk_shader *vk_shader = + vk_pipeline_get_shader(pipeline, MESA_SHADER_COMPUTE); + struct anv_shader *shader = container_of(vk_shader, struct anv_shader, vk); + write_cs_set_entry(device, indirect_set, + pExecutionSetWrites[i].index, shader); + break; + } + + case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: + write_rt_set_entry(indirect_set, pExecutionSetWrites[i].index, pipeline); + break; + + default: + UNREACHABLE("Unsupported indirect pipeline type"); + } + } +} + +void anv_UpdateIndirectExecutionSetShaderEXT( + VkDevice _device, + VkIndirectExecutionSetEXT indirectExecutionSet, + uint32_t executionSetWriteCount, + const VkWriteIndirectExecutionSetShaderEXT* pExecutionSetWrites) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_indirect_execution_set, indirect_set, indirectExecutionSet); + + for (uint32_t i = 0; i < executionSetWriteCount; i++) { + VK_FROM_HANDLE(vk_shader, vk_shader, pExecutionSetWrites[i].shader); + assert(vk_shader->stage == MESA_SHADER_COMPUTE); + struct anv_shader *shader = container_of(vk_shader, struct anv_shader, vk); + write_cs_set_entry(device, indirect_set, + pExecutionSetWrites[i].index, shader); + } +} diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 4421b2693e6..6ebc4f83b71 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -548,6 +548,10 @@ void genX(write_rt_shader_group)(struct anv_device *device, uint32_t shader_count, void *output); +void genX(write_cs_descriptor)(struct anv_dgc_cs_descriptor *desc, + struct anv_device *device, + struct anv_shader *shader); + uint32_t genX(shader_cmd_size)(struct anv_device *device, mesa_shader_stage stage); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index cfc8628b67e..abca42b8e07 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -54,6 +54,7 @@ #include "compiler/brw/brw_rt.h" #include "ds/intel_driver_ds.h" #include "dev/virtio/intel_virtio.h" +#include "shaders/libintel_shaders.h" #include "util/bitset.h" #include "util/bitscan.h" #include "util/cache_ops.h" @@ -1116,6 +1117,11 @@ struct anv_pipeline_bind_map { uint8_t inferred_behavior; }; +struct anv_pipeline_bind_map * +anv_pipeline_bind_map_clone(struct anv_device *device, + const VkAllocationCallbacks *alloc, + const struct anv_pipeline_bind_map *src); + struct anv_push_descriptor_info { /* A bitfield of descriptors used. */ uint32_t used_descriptors; @@ -5193,6 +5199,10 @@ struct anv_event { #define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1) +#define ANV_VK_STAGE_MASK (ANV_GRAPHICS_STAGE_BITS | \ + ANV_RT_STAGE_BITS | \ + VK_SHADER_STAGE_COMPUTE_BIT) + #define anv_foreach_stage(stage, stage_bits) \ u_foreach_bit(stage, (stage_bits & ANV_STAGE_MASK)) @@ -6539,6 +6549,36 @@ static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_poo pool->khr_perf_preamble_stride * pass; } +struct anv_indirect_execution_set { + struct vk_object_base base; + + struct anv_pipeline_bind_map *bind_map; + + /** List of all the scratch buffers on < Gfx12.5 */ + struct anv_reloc_list relocs; + + struct anv_bo *bo; + + bool uses_systolic; + + uint32_t stride; + + uint32_t max_final_commands_size; + + /** Maximum scratch space for shaders */ + uint32_t max_scratch; + /** Maximum number of ray queries used by shaders */ + uint32_t max_ray_queries; +}; + +void anv_write_gfx_indirect_descriptor(struct anv_device *device, + struct anv_dgc_gfx_descriptor *descriptor, + struct anv_cmd_graphics_state *gfx); + +enum anv_dgc_stage anv_vk_stage_to_dgc_stage(VkShaderStageFlags vk_stage); + +uint32_t anv_vk_stages_to_generated_stages(VkShaderStageFlags vk_stages); + struct anv_vid_mem { struct anv_device_memory *mem; VkDeviceSize offset; @@ -6949,6 +6989,9 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base, VK_DEFINE_NONDISP_HANDLE_CASTS(anv_video_session, vk.base, VkVideoSessionKHR, VK_OBJECT_TYPE_VIDEO_SESSION_KHR) +VK_DEFINE_NONDISP_HANDLE_CASTS(anv_indirect_execution_set, base, + VkIndirectExecutionSetEXT, + VK_OBJECT_TYPE_INDIRECT_EXECUTION_SET_EXT) #define anv_genX(devinfo, thing) ({ \ __typeof(&gfx9_##thing) genX_thing; \ diff --git a/src/intel/vulkan/anv_util.c b/src/intel/vulkan/anv_util.c index 583baf4c6d3..52964667595 100644 --- a/src/intel/vulkan/anv_util.c +++ b/src/intel/vulkan/anv_util.c @@ -574,3 +574,33 @@ anv_device_finish_rt_shaders(struct anv_device *device) if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline) return; } + +struct anv_pipeline_bind_map * +anv_pipeline_bind_map_clone(struct anv_device *device, + const VkAllocationCallbacks *alloc, + const struct anv_pipeline_bind_map *src) +{ + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_bind_map, bind_map, 1); + VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, surfaces, src->surface_count); + VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, samplers, src->sampler_count); + VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_embedded_sampler_binding, embedded_samplers, src->embedded_sampler_count); + + if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, alloc, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) + return NULL; + + memcpy(bind_map, src, sizeof(*src)); + + memcpy(surfaces, src->surface_to_descriptor, + sizeof(*surfaces) * src->surface_count); + bind_map->surface_to_descriptor = surfaces; + memcpy(samplers, src->sampler_to_descriptor, + sizeof(*samplers) * src->sampler_count); + bind_map->sampler_to_descriptor = samplers; + memcpy(embedded_samplers, src->embedded_sampler_to_binding, + sizeof(*embedded_samplers) * src->embedded_sampler_count); + bind_map->embedded_sampler_to_binding = embedded_samplers; + + return bind_map; +} diff --git a/src/intel/vulkan/genX_shader.c b/src/intel/vulkan/genX_shader.c index 6cb46bc7acf..410e5aa244d 100644 --- a/src/intel/vulkan/genX_shader.c +++ b/src/intel/vulkan/genX_shader.c @@ -1272,6 +1272,75 @@ emit_cs_shader(struct anv_batch *batch, #endif } +void +genX(write_cs_descriptor)(struct anv_dgc_cs_descriptor *desc, + struct anv_device *device, + struct anv_shader *shader) +{ + const struct anv_pipeline_bind_map *bind_map = &shader->bind_map; + const struct anv_push_range *push_range = &bind_map->push_ranges[0]; + + *desc = (struct anv_dgc_cs_descriptor) { + .push_data_offset = 32 * (push_range->set == ANV_DESCRIPTOR_SET_PUSH_CONSTANTS ? + push_range->start : 0), + }; + + const struct brw_cs_prog_data *prog_data = + brw_cs_prog_data_const(shader->prog_data); + const struct intel_cs_dispatch_info dispatch = + brw_cs_get_dispatch_info(device->info, prog_data, NULL); + + desc->right_mask = dispatch.right_mask; + desc->threads = dispatch.threads; + desc->simd_size = dispatch.simd_size; + +#if GFX_VERx10 >= 125 + GENX(COMPUTE_WALKER_pack)(NULL, desc->gfx125.compute_walker, + &(struct GENX(COMPUTE_WALKER)) { + GENX(COMPUTE_WALKER_header), + .body = { + .PostSync.MOCS = anv_mocs(device, NULL, 0), + }, + }); + + assert(sizeof(desc->gfx125.compute_walker) > + sizeof(shader->cs.gfx125.compute_walker_body)); + for (uint32_t i = 0; i < ARRAY_SIZE(shader->cs.gfx125.compute_walker_body); i++) + desc->gfx125.compute_walker[1 + i] |= shader->cs.gfx125.compute_walker_body[i]; + desc->gfx125.inline_dwords_count = bind_map->inline_dwords_count; + assert(sizeof(desc->gfx125.inline_dwords) == + sizeof(bind_map->inline_dwords)); + memcpy(desc->gfx125.inline_dwords, + bind_map->inline_dwords, + sizeof(bind_map->inline_dwords)); + +#else + assert(sizeof(desc->gfx9.media_vfe_state) == + shader->cs.gfx9.vfe.len * 4); + assert(sizeof(desc->gfx9.interface_descriptor_data) == + sizeof(shader->cs.gfx9.idd)); + + memcpy(desc->gfx9.media_vfe_state, + &shader->cmd_data[shader->cs.gfx9.vfe.offset], + shader->cs.gfx9.vfe.len * 4); + memcpy(desc->gfx9.interface_descriptor_data, + shader->cs.gfx9.idd, + sizeof(desc->gfx9.interface_descriptor_data)); + + desc->gfx9.n_threads = dispatch.threads; + desc->gfx9.cross_thread_push_size = prog_data->push.cross_thread.size; + desc->gfx9.per_thread_push_size = prog_data->push.per_thread.size; + desc->gfx9.subgroup_id_offset = + offsetof(struct anv_push_constants, cs.subgroup_id) - + (32 * push_range->start + prog_data->push.cross_thread.size); + + GENX(GPGPU_WALKER_pack)(NULL, desc->gfx9.gpgpu_walker, + &(struct GENX(GPGPU_WALKER)) { + GENX(GPGPU_WALKER_header), + }); +#endif +} + void genX(init_instructions)(struct anv_physical_device *device) { diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index b4ee5e2e775..1b1adbc203b 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -160,6 +160,7 @@ libanv_files = files( 'anv_cmd_buffer.c', 'anv_descriptor_set.c', 'anv_device.c', + 'anv_dgc_set.c', 'anv_embedded_sampler.c', 'anv_event.c', 'anv_formats.c',