diff --git a/src/nouveau/vulkan/meson.build b/src/nouveau/vulkan/meson.build index dc0072bbf99..bcb217c4608 100644 --- a/src/nouveau/vulkan/meson.build +++ b/src/nouveau/vulkan/meson.build @@ -41,6 +41,8 @@ nvk_files = files( 'nvk_image.h', 'nvk_image_view.c', 'nvk_image_view.h', + 'nvk_indirect_execution_set.c', + 'nvk_indirect_execution_set.h', 'nvk_instance.c', 'nvk_instance.h', 'nvk_mme.c', diff --git a/src/nouveau/vulkan/nvk_indirect_execution_set.c b/src/nouveau/vulkan/nvk_indirect_execution_set.c new file mode 100644 index 00000000000..cd327fc302c --- /dev/null +++ b/src/nouveau/vulkan/nvk_indirect_execution_set.c @@ -0,0 +1,382 @@ +/* + * Copyright © 2024 Collabora Ltd. and Red Hat Inc. + * SPDX-License-Identifier: MIT + */ +#include "nvk_indirect_execution_set.h" + +#include "nvk_cmd_buffer.h" +#include "nvk_entrypoints.h" +#include "nvk_device.h" +#include "nvk_shader.h" +#include "vk_pipeline.h" + +static void * +nvk_ies_map(struct nvk_indirect_execution_set *ies, uint32_t index) +{ + assert(index < ies->count); + return ies->mem->map + (index * (size_t)ies->stride_B); +} + +void +nvk_ies_cs_qmd_init(struct nvk_physical_device *pdev, + struct nvk_ies_cs_qmd *qmd, + struct nvk_shader *shader) +{ + struct nak_qmd_info qmd_info = { + .addr = shader->hdr_addr, + .smem_size = shader->info.cs.smem_size, + .smem_max = NVK_MAX_SHARED_SIZE, + }; + + assert(shader->cbuf_map.cbuf_count <= ARRAY_SIZE(qmd_info.cbufs)); + for (uint32_t c = 0; c < shader->cbuf_map.cbuf_count; c++) { + const struct nvk_cbuf *cbuf = &shader->cbuf_map.cbufs[c]; + switch (cbuf->type) { + case NVK_CBUF_TYPE_ROOT_DESC: + /* This one gets patched with the actual address */ + assert(c == 0); + qmd_info.cbufs[qmd_info.num_cbufs++] = (struct nak_qmd_cbuf) { + .index = c, + .addr = 0xc0ffee000, + .size = sizeof(struct nvk_root_descriptor_table), + }; + break; + + case NVK_CBUF_TYPE_SHADER_DATA: + qmd_info.cbufs[qmd_info.num_cbufs++] = (struct nak_qmd_cbuf) { + .index = c, + .addr = shader->data_addr, + .size = shader->data_size, + }; + break; + + default: + unreachable("Unsupported cbuf type"); + } + } + + nak_fill_qmd(&pdev->info, &shader->info, &qmd_info, + qmd->qmd, sizeof(qmd->qmd)); +} + +static void +nvk_ies_set_cs(struct nvk_device *dev, + struct nvk_indirect_execution_set *ies, + uint32_t index, + struct nvk_shader *shader) +{ + struct nvk_ies_cs_qmd qmd = {}; + nvk_ies_cs_qmd_init(nvk_device_physical(dev), &qmd, shader); + + assert(sizeof(qmd) <= ies->stride_B); + memcpy(nvk_ies_map(ies, index), &qmd, sizeof(qmd)); +} + +uint16_t +nvk_ies_gfx_pipeline_max_dw_count(struct nvk_physical_device *pdev, + VkShaderStageFlags stages) +{ + gl_shader_stage last_vtgm = MESA_SHADER_VERTEX; + u_foreach_bit(s, stages) { + gl_shader_stage stage = vk_to_mesa_shader_stage(1 << s); + if (stage != MESA_SHADER_FRAGMENT) + last_vtgm = stage; + } + + uint16_t push_dw = 0; + u_foreach_bit(s, stages) { + gl_shader_stage stage = vk_to_mesa_shader_stage(1 << s); + push_dw += nvk_max_shader_push_dw(pdev, stage, stage == last_vtgm); + } + + return push_dw; +} + +static uint32_t +nvk_ies_stride_gfx_pipeline(struct nvk_physical_device *pdev, + VkShaderStageFlags stages) +{ + return sizeof(struct nvk_ies_gfx_pipeline) + + (4 * nvk_ies_gfx_pipeline_max_dw_count(pdev, stages)); +} + +static void +nvk_ies_set_gfx_pipeline(struct nvk_device *dev, + struct nvk_indirect_execution_set *ies, + uint32_t index, + struct vk_pipeline *pipeline) +{ + gl_shader_stage last_vtgm = MESA_SHADER_VERTEX; + struct nvk_shader *type_shader[6] = {}; + u_foreach_bit(s, pipeline->stages) { + gl_shader_stage stage = vk_to_mesa_shader_stage(1 << s); + struct vk_shader *vk_shader = vk_pipeline_get_shader(pipeline, stage); + struct nvk_shader *shader = + container_of(vk_shader, struct nvk_shader, vk); + + assert(shader->info.stage == stage); + + if (stage != MESA_SHADER_FRAGMENT) + last_vtgm = stage; + + uint32_t type = mesa_to_nv9097_shader_type(stage); + type_shader[type] = shader; + } + + void *map = nvk_ies_map(ies, index); + + uint16_t dw_count = 0; + for (uint32_t i = 0; i < ARRAY_SIZE(type_shader); i++) { + if (type_shader[i] == NULL) + continue; + + const uint16_t s_dw_count = type_shader[i]->info.stage == last_vtgm + ? type_shader[i]->vtgm_push_dw_count + : type_shader[i]->push_dw_count; + memcpy(map + sizeof(struct nvk_ies_gfx_pipeline) + dw_count * 4, + type_shader[i]->push_dw, s_dw_count * 4); + dw_count += s_dw_count; + } + + struct nvk_ies_gfx_pipeline hdr = { + .dw_count = dw_count, + }; + memcpy(map, &hdr, sizeof(hdr)); +} + +uint16_t +nvk_ies_gfx_shader_max_dw_count(struct nvk_physical_device *pdev, + VkShaderStageFlags stages, + bool last_vtgm) +{ + /* Each entry is a single shader so take the max */ + uint16_t max_push_dw = 0; + u_foreach_bit(s, stages) { + gl_shader_stage stage = vk_to_mesa_shader_stage(1 << s); + uint16_t push_dw = nvk_max_shader_push_dw(pdev, stage, last_vtgm); + max_push_dw = MAX2(max_push_dw, push_dw); + } + + return max_push_dw; +} + +static uint32_t +nvk_ies_stride_gfx_shader(struct nvk_physical_device *pdev, + VkShaderStageFlags stages) +{ + return sizeof(struct nvk_ies_gfx_shader) + + (4 * nvk_ies_gfx_shader_max_dw_count(pdev, stages, true)); +} + +static void +nvk_ies_set_gfx_shader(struct nvk_device *dev, + struct nvk_indirect_execution_set *ies, + uint32_t index, + struct nvk_shader *shader) +{ + struct nvk_ies_gfx_shader hdr = { + .dw_count = shader->push_dw_count, + .vtgm_dw_count = shader->vtgm_push_dw_count, + }; + + void *map = nvk_ies_map(ies, index); + memcpy(map, &hdr, sizeof(hdr)); + memcpy(map + sizeof(hdr), shader->push_dw, + 4 * MAX2(shader->push_dw_count, shader->vtgm_push_dw_count)); +} + +static void +nvk_ies_set_pipeline(struct nvk_device *dev, + struct nvk_indirect_execution_set *ies, + uint32_t index, struct vk_pipeline *pipeline) +{ + switch (ies->type) { + case NVK_IES_TYPE_CS_QMD: { + struct vk_shader *vk_shader = + vk_pipeline_get_shader(pipeline, MESA_SHADER_COMPUTE); + struct nvk_shader *shader = + container_of(vk_shader, struct nvk_shader, vk); + nvk_ies_set_cs(dev, ies, index, shader); + break; + } + + case NVK_IES_TYPE_GFX_PIPELINE: { + nvk_ies_set_gfx_pipeline(dev, ies, index, pipeline); + break; + } + + default: + unreachable("Invalid indirect execution set type"); + } +} + +static void +nvk_ies_set_shader(struct nvk_device *dev, + struct nvk_indirect_execution_set *ies, + uint32_t index, struct nvk_shader *shader) +{ + switch (ies->type) { + case NVK_IES_TYPE_CS_QMD: { + nvk_ies_set_cs(dev, ies, index, shader); + break; + } + + case NVK_IES_TYPE_GFX_SHADER: { + nvk_ies_set_gfx_shader(dev, ies, index, shader); + break; + } + + default: + unreachable("Invalid indirect execution set type"); + } +} + +VKAPI_ATTR VkResult VKAPI_CALL +nvk_CreateIndirectExecutionSetEXT(VkDevice _device, + const VkIndirectExecutionSetCreateInfoEXT *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkIndirectExecutionSetEXT *pIndirectExecutionSet) +{ + VK_FROM_HANDLE(nvk_device, dev, _device); + struct nvk_physical_device *pdev = nvk_device_physical(dev); + VkResult result; + + struct nvk_indirect_execution_set *ies = + vk_object_zalloc(&dev->vk, pAllocator, sizeof(*ies), + VK_OBJECT_TYPE_INDIRECT_EXECUTION_SET_EXT); + if (ies == NULL) + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + + switch (pCreateInfo->type) { + case VK_INDIRECT_EXECUTION_SET_INFO_TYPE_PIPELINES_EXT: { + VK_FROM_HANDLE(vk_pipeline, pipeline, + pCreateInfo->info.pPipelineInfo->initialPipeline); + + ies->count = pCreateInfo->info.pPipelineInfo->maxPipelineCount; + if (pipeline->stages & VK_SHADER_STAGE_COMPUTE_BIT) { + assert(pipeline->stages == VK_SHADER_STAGE_COMPUTE_BIT); + ies->type = NVK_IES_TYPE_CS_QMD; + ies->stride_B = sizeof(struct nvk_ies_cs_qmd); + } else if (pipeline->stages & NVK_SHADER_STAGE_GRAPHICS_BITS) { + assert(!(pipeline->stages & ~NVK_SHADER_STAGE_GRAPHICS_BITS)); + ies->type = NVK_IES_TYPE_GFX_PIPELINE; + ies->stride_B = nvk_ies_stride_gfx_pipeline(pdev, pipeline->stages); + } else { + unreachable("Unknown shader stage"); + } + break; + } + + case VK_INDIRECT_EXECUTION_SET_INFO_TYPE_SHADER_OBJECTS_EXT: { + const VkIndirectExecutionSetShaderInfoEXT *info = + pCreateInfo->info.pShaderInfo; + + VkShaderStageFlags stages = 0; + for (uint32_t i = 0; i < info->shaderCount; i++) { + VK_FROM_HANDLE(nvk_shader, shader, info->pInitialShaders[i]); + stages |= mesa_to_vk_shader_stage(shader->vk.stage); + } + + ies->count = info->maxShaderCount; + if (stages & VK_SHADER_STAGE_COMPUTE_BIT) { + assert(stages == VK_SHADER_STAGE_COMPUTE_BIT); + ies->type = NVK_IES_TYPE_CS_QMD; + ies->stride_B = sizeof(struct nvk_ies_cs_qmd); + } else if (stages & NVK_SHADER_STAGE_GRAPHICS_BITS) { + assert(!(stages & ~NVK_SHADER_STAGE_GRAPHICS_BITS)); + ies->type = NVK_IES_TYPE_GFX_SHADER; + ies->stride_B = nvk_ies_stride_gfx_shader(pdev, stages); + } else { + unreachable("Unknown shader stage"); + } + break; + } + + default: + unreachable("Unknown indirect execution set info type"); + } + + size_t size = ies->count * (size_t)ies->stride_B; + result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base, + size, 0, NVKMD_MEM_LOCAL, + NVKMD_MEM_MAP_WR, &ies->mem); + if (result != VK_SUCCESS) { + vk_object_free(&dev->vk, pAllocator, ies); + return result; + } + + switch (pCreateInfo->type) { + case VK_INDIRECT_EXECUTION_SET_INFO_TYPE_PIPELINES_EXT: { + VK_FROM_HANDLE(vk_pipeline, pipeline, + pCreateInfo->info.pPipelineInfo->initialPipeline); + nvk_ies_set_pipeline(dev, ies, 0, pipeline); + break; + } + + case VK_INDIRECT_EXECUTION_SET_INFO_TYPE_SHADER_OBJECTS_EXT: { + const VkIndirectExecutionSetShaderInfoEXT *info = + pCreateInfo->info.pShaderInfo; + + for (uint32_t i = 0; i < info->shaderCount; i++) { + VK_FROM_HANDLE(nvk_shader, shader, info->pInitialShaders[i]); + nvk_ies_set_shader(dev, ies, i, shader); + } + break; + } + + default: + unreachable("Unknown indirect execution set info type"); + } + + *pIndirectExecutionSet = nvk_indirect_execution_set_to_handle(ies); + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +nvk_DestroyIndirectExecutionSetEXT(VkDevice _device, + VkIndirectExecutionSetEXT indirectExecutionSet, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(nvk_device, dev, _device); + VK_FROM_HANDLE(nvk_indirect_execution_set, ies, indirectExecutionSet); + + if (ies == NULL) + return; + + nvkmd_mem_unref(ies->mem); + + vk_object_free(&dev->vk, pAllocator, ies); +} + +VKAPI_ATTR void VKAPI_CALL +nvk_UpdateIndirectExecutionSetPipelineEXT( + VkDevice _device, + VkIndirectExecutionSetEXT indirectExecutionSet, + uint32_t executionSetWriteCount, + const VkWriteIndirectExecutionSetPipelineEXT *pExecutionSetWrites) +{ + VK_FROM_HANDLE(nvk_device, dev, _device); + VK_FROM_HANDLE(nvk_indirect_execution_set, ies, indirectExecutionSet); + + for (uint32_t i = 0; i < executionSetWriteCount; i++) { + VK_FROM_HANDLE(vk_pipeline, pipeline, pExecutionSetWrites[i].pipeline); + nvk_ies_set_pipeline(dev, ies, pExecutionSetWrites[i].index, pipeline); + } +} + +VKAPI_ATTR void VKAPI_CALL +nvk_UpdateIndirectExecutionSetShaderEXT( + VkDevice _device, + VkIndirectExecutionSetEXT indirectExecutionSet, + uint32_t executionSetWriteCount, + const VkWriteIndirectExecutionSetShaderEXT *pExecutionSetWrites) +{ + VK_FROM_HANDLE(nvk_device, dev, _device); + VK_FROM_HANDLE(nvk_indirect_execution_set, ies, indirectExecutionSet); + + for (uint32_t i = 0; i < executionSetWriteCount; i++) { + VK_FROM_HANDLE(nvk_shader, shader, pExecutionSetWrites[i].shader); + nvk_ies_set_shader(dev, ies, pExecutionSetWrites[i].index, shader); + } +} diff --git a/src/nouveau/vulkan/nvk_indirect_execution_set.h b/src/nouveau/vulkan/nvk_indirect_execution_set.h new file mode 100644 index 00000000000..d98f8542612 --- /dev/null +++ b/src/nouveau/vulkan/nvk_indirect_execution_set.h @@ -0,0 +1,79 @@ +/* + * Copyright © 2024 Collabora Ltd. and Red Hat Inc. + * SPDX-License-Identifier: MIT + */ +#ifndef NVK_INDIRECT_EXECUTION_SET +#define NVK_INDIRECT_EXECUTION_SET 1 + +#include "nvk_private.h" + +struct nvk_physical_device; +struct nvk_shader; +struct nvkmd_mem; + +enum nvk_ies_type { + NVK_IES_TYPE_CS_QMD, + NVK_IES_TYPE_GFX_SHADER, + NVK_IES_TYPE_GFX_PIPELINE, +}; + +PRAGMA_DIAGNOSTIC_PUSH +PRAGMA_DIAGNOSTIC_ERROR(-Wpadded) +struct nvk_ies_cs_qmd { + uint32_t qmd[64]; +}; +PRAGMA_DIAGNOSTIC_POP + +static inline uint16_t +nvk_ies_cs_qmd_max_dw_count(struct nvk_physical_device *pdev) +{ + return 64; +} + +void nvk_ies_cs_qmd_init(struct nvk_physical_device *pdev, + struct nvk_ies_cs_qmd *qmd, + struct nvk_shader *shader); + +PRAGMA_DIAGNOSTIC_PUSH +PRAGMA_DIAGNOSTIC_ERROR(-Wpadded) +struct nvk_ies_gfx_shader { + uint16_t dw_count; + uint16_t vtgm_dw_count; + uint32_t push[0]; +}; +PRAGMA_DIAGNOSTIC_POP +static_assert(sizeof(struct nvk_ies_gfx_shader) == 4, + "nvk_ies_gfx_shader has no holes"); + +uint16_t nvk_ies_gfx_shader_max_dw_count(struct nvk_physical_device *pdev, + VkShaderStageFlags stages, + bool last_vtgm); + +PRAGMA_DIAGNOSTIC_PUSH +PRAGMA_DIAGNOSTIC_ERROR(-Wpadded) +struct nvk_ies_gfx_pipeline { + uint32_t dw_count; + uint32_t push[0]; +}; +PRAGMA_DIAGNOSTIC_POP +static_assert(sizeof(struct nvk_ies_gfx_pipeline) == 4, + "nvk_ies_gfx_pipeline has no holes"); + +uint16_t nvk_ies_gfx_pipeline_max_dw_count(struct nvk_physical_device *pdev, + VkShaderStageFlags stages); + +struct nvk_indirect_execution_set { + struct vk_object_base base; + + enum nvk_ies_type type; + uint32_t stride_B; + uint32_t count; + + struct nvkmd_mem *mem; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(nvk_indirect_execution_set, base, + VkIndirectExecutionSetEXT, + VK_OBJECT_TYPE_INDIRECT_EXECUTION_SET_EXT); + +#endif diff --git a/src/nouveau/vulkan/nvk_shader.h b/src/nouveau/vulkan/nvk_shader.h index ec091afb879..80d3a033700 100644 --- a/src/nouveau/vulkan/nvk_shader.h +++ b/src/nouveau/vulkan/nvk_shader.h @@ -118,6 +118,9 @@ struct nvk_shader { uint32_t *push_dw; }; +VK_DEFINE_NONDISP_HANDLE_CASTS(nvk_shader, vk.base, VkShaderEXT, + VK_OBJECT_TYPE_SHADER_EXT); + extern const struct vk_device_shader_ops nvk_device_shader_ops; VkShaderStageFlags nvk_nak_stages(const struct nv_device_info *info);