diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.c b/src/nouveau/vulkan/nvk_cmd_buffer.c index 4503d0631a2..5f781fbfef6 100644 --- a/src/nouveau/vulkan/nvk_cmd_buffer.c +++ b/src/nouveau/vulkan/nvk_cmd_buffer.c @@ -49,6 +49,7 @@ nvk_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer) nvk_cmd_pool_free_mem_list(pool, &cmd->owned_mem); nvk_cmd_pool_free_gart_mem_list(pool, &cmd->owned_gart_mem); + nvk_cmd_pool_free_qmd_list(pool, &cmd->owned_qmd); util_dynarray_fini(&cmd->pushes); vk_command_buffer_finish(&cmd->vk); vk_free(&pool->vk.alloc, cmd); @@ -82,6 +83,7 @@ nvk_create_cmd_buffer(struct vk_command_pool *vk_pool, list_inithead(&cmd->owned_mem); list_inithead(&cmd->owned_gart_mem); + list_inithead(&cmd->owned_qmd); util_dynarray_init(&cmd->pushes, NULL); *cmd_buffer_out = &cmd->vk; @@ -104,6 +106,7 @@ nvk_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, nvk_cmd_pool_free_mem_list(pool, &cmd->owned_mem); nvk_cmd_pool_free_gart_mem_list(pool, &cmd->owned_gart_mem); + nvk_cmd_pool_free_qmd_list(pool, &cmd->owned_qmd); cmd->upload_mem = NULL; cmd->push_mem = NULL; cmd->push_mem_limit = NULL; @@ -290,6 +293,52 @@ nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer *cmd, return VK_SUCCESS; } +VkResult +nvk_cmd_buffer_alloc_qmd(struct nvk_cmd_buffer *cmd, + uint32_t size, uint32_t alignment, + uint64_t *addr, void **ptr) +{ + struct nvk_device *dev = nvk_cmd_buffer_device(cmd); + const struct nvk_physical_device *pdev = nvk_device_physical(dev); + + /* On Maxwell B and later, we have INVALIDATE_SKED_CACHES so we can just + * allocate from wherever we want (the upload stream in this case). + */ + if (pdev->info.cls_compute >= MAXWELL_COMPUTE_B) + return nvk_cmd_buffer_upload_alloc(cmd, size, alignment, addr, ptr); + + /* The GPU compute scheduler (SKED) has a cache. Maxwell B added the + * INVALIDATE_SKED_CACHES instruction to manage the SKED cache. We call + * that at the top of every command buffer so that we always pick up + * whatever QMDs we've written from the CPU fresh. On Maxwell A and + * earlier, the SKED cache still exists in some form but we have no way to + * invalidate it. If a compute shader has been dispatched from a QMD at an + * address that's no longer valid, the SKED cache can fault. To work + * around this, we have a QMD heap on the device and we allocate QMDs from + * that on Maxwell A and earlier. + * + * Prior to Maxwell B, the GPU doesn't seem to need any sort of SKED cache + * invalidation to pick up new writes from the CPU. However, we do still + * have to worry about faults that may be caused by the SKED cache + * containing a stale address. Just allocating all QMDs from a central + * heap which never throws memory away seems to be sufficient for this. + */ + assert(size <= NVK_CMD_QMD_SIZE); + assert(alignment <= NVK_CMD_QMD_SIZE); + + struct nvk_cmd_qmd *qmd; + VkResult result = nvk_cmd_pool_alloc_qmd(nvk_cmd_buffer_pool(cmd), &qmd); + if (unlikely(result != VK_SUCCESS)) + return result; + + list_addtail(&qmd->link, &cmd->owned_qmd); + + *addr = qmd->addr; + *ptr = qmd->map; + + return VK_SUCCESS; +} + VKAPI_ATTR VkResult VKAPI_CALL nvk_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo *pBeginInfo) @@ -573,7 +622,7 @@ nvk_cmd_invalidate_deps(struct nvk_cmd_buffer *cmd, } if ((barriers & NVK_BARRIER_INVALIDATE_QMD_DATA) && - pdev->info.cls_eng3d >= MAXWELL_COMPUTE_B) + pdev->info.cls_compute >= MAXWELL_COMPUTE_B) P_IMMD(p, NVB1C0, INVALIDATE_SKED_CACHES, 0); } diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.h b/src/nouveau/vulkan/nvk_cmd_buffer.h index 03c5120862d..a001f82398a 100644 --- a/src/nouveau/vulkan/nvk_cmd_buffer.h +++ b/src/nouveau/vulkan/nvk_cmd_buffer.h @@ -216,6 +216,7 @@ struct nvk_cmd_buffer { */ struct list_head owned_mem; struct list_head owned_gart_mem; + struct list_head owned_qmd; struct nvk_cmd_mem *upload_mem; uint32_t upload_offset; @@ -342,6 +343,10 @@ VkResult nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer *cmd, VkResult nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer *cmd, uint64_t *addr); +VkResult nvk_cmd_buffer_alloc_qmd(struct nvk_cmd_buffer *cmd, + uint32_t size, uint32_t alignment, + uint64_t *addr, void **ptr); + void nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer *cmd, const VkDependencyInfo *dep, bool wait); diff --git a/src/nouveau/vulkan/nvk_cmd_dispatch.c b/src/nouveau/vulkan/nvk_cmd_dispatch.c index c9dad593d6a..b777638dce3 100644 --- a/src/nouveau/vulkan/nvk_cmd_dispatch.c +++ b/src/nouveau/vulkan/nvk_cmd_dispatch.c @@ -190,9 +190,13 @@ nvk_cmd_upload_qmd(struct nvk_cmd_buffer *cmd, uint32_t qmd[64]; nak_fill_qmd(&pdev->info, &shader->info, &qmd_info, qmd, sizeof(qmd)); - result = nvk_cmd_buffer_upload_data(cmd, qmd, sizeof(qmd), 0x100, &qmd_addr); + void *qmd_map; + result = nvk_cmd_buffer_alloc_qmd(cmd, sizeof(qmd), 0x100, + &qmd_addr, &qmd_map); if (unlikely(result != VK_SUCCESS)) return result; + + memcpy(qmd_map, qmd, sizeof(qmd)); } *qmd_addr_out = qmd_addr; diff --git a/src/nouveau/vulkan/nvk_cmd_pool.c b/src/nouveau/vulkan/nvk_cmd_pool.c index e16a02682f6..c652c520861 100644 --- a/src/nouveau/vulkan/nvk_cmd_pool.c +++ b/src/nouveau/vulkan/nvk_cmd_pool.c @@ -44,6 +44,39 @@ nvk_cmd_mem_destroy(struct nvk_cmd_pool *pool, struct nvk_cmd_mem *mem) vk_free(&pool->vk.alloc, mem); } +static VkResult +nvk_cmd_qmd_create(struct nvk_cmd_pool *pool, struct nvk_cmd_qmd **qmd_out) +{ + struct nvk_device *dev = nvk_cmd_pool_device(pool); + struct nvk_cmd_qmd *qmd; + VkResult result; + + qmd = vk_zalloc(&pool->vk.alloc, sizeof(*qmd), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (qmd == NULL) + return vk_error(pool, VK_ERROR_OUT_OF_HOST_MEMORY); + + result = nvk_heap_alloc(dev, &dev->qmd_heap, + NVK_CMD_QMD_SIZE, NVK_CMD_QMD_SIZE, + &qmd->addr, &qmd->map); + if (result != VK_SUCCESS) { + vk_free(&pool->vk.alloc, qmd); + return result; + } + + *qmd_out = qmd; + return VK_SUCCESS; +} + +static void +nvk_cmd_qmd_destroy(struct nvk_cmd_pool *pool, struct nvk_cmd_qmd *qmd) +{ + struct nvk_device *dev = nvk_cmd_pool_device(pool); + + nvk_heap_free(dev, &dev->qmd_heap, qmd->addr, NVK_CMD_QMD_SIZE); + vk_free(&pool->vk.alloc, qmd); +} + VKAPI_ATTR VkResult VKAPI_CALL nvk_CreateCommandPool(VkDevice _device, const VkCommandPoolCreateInfo *pCreateInfo, @@ -67,6 +100,7 @@ nvk_CreateCommandPool(VkDevice _device, list_inithead(&pool->free_mem); list_inithead(&pool->free_gart_mem); + list_inithead(&pool->free_qmd); *pCmdPool = nvk_cmd_pool_to_handle(pool); @@ -85,6 +119,10 @@ nvk_cmd_pool_destroy_mem(struct nvk_cmd_pool *pool) nvk_cmd_mem_destroy(pool, mem); list_inithead(&pool->free_gart_mem); + + list_for_each_entry_safe(struct nvk_cmd_qmd, qmd, &pool->free_qmd, link) + nvk_cmd_qmd_destroy(pool, qmd); + list_inithead(&pool->free_qmd); } VkResult @@ -108,6 +146,21 @@ nvk_cmd_pool_alloc_mem(struct nvk_cmd_pool *pool, bool force_gart, return nvk_cmd_mem_create(pool, force_gart, mem_out); } +VkResult +nvk_cmd_pool_alloc_qmd(struct nvk_cmd_pool *pool, + struct nvk_cmd_qmd **qmd_out) +{ + if (!list_is_empty(&pool->free_qmd)) { + struct nvk_cmd_qmd *qmd = + list_first_entry(&pool->free_qmd, struct nvk_cmd_qmd, link); + list_del(&qmd->link); + *qmd_out = qmd; + return VK_SUCCESS; + } + + return nvk_cmd_qmd_create(pool, qmd_out); +} + void nvk_cmd_pool_free_mem_list(struct nvk_cmd_pool *pool, struct list_head *mem_list) @@ -124,6 +177,14 @@ nvk_cmd_pool_free_gart_mem_list(struct nvk_cmd_pool *pool, list_inithead(mem_list); } +void +nvk_cmd_pool_free_qmd_list(struct nvk_cmd_pool *pool, + struct list_head *qmd_list) +{ + list_splicetail(qmd_list, &pool->free_qmd); + list_inithead(qmd_list); +} + VKAPI_ATTR void VKAPI_CALL nvk_DestroyCommandPool(VkDevice _device, VkCommandPool commandPool, diff --git a/src/nouveau/vulkan/nvk_cmd_pool.h b/src/nouveau/vulkan/nvk_cmd_pool.h index 01f14079ffb..6d556ef3952 100644 --- a/src/nouveau/vulkan/nvk_cmd_pool.h +++ b/src/nouveau/vulkan/nvk_cmd_pool.h @@ -21,12 +21,23 @@ struct nvk_cmd_mem { struct list_head link; }; +#define NVK_CMD_QMD_SIZE 256 + +struct nvk_cmd_qmd { + uint64_t addr; + void *map; + + /** Link in nvk_cmd_pool::free_qmd or nvk_cmd_buffer::owned_qmd */ + struct list_head link; +}; + struct nvk_cmd_pool { struct vk_command_pool vk; /** List of nvk_cmd_mem */ struct list_head free_mem; struct list_head free_gart_mem; + struct list_head free_qmd; }; VK_DEFINE_NONDISP_HANDLE_CASTS(nvk_cmd_pool, vk.base, VkCommandPool, @@ -41,9 +52,13 @@ nvk_cmd_pool_device(struct nvk_cmd_pool *pool) VkResult nvk_cmd_pool_alloc_mem(struct nvk_cmd_pool *pool, bool force_gart, struct nvk_cmd_mem **mem_out); +VkResult nvk_cmd_pool_alloc_qmd(struct nvk_cmd_pool *pool, + struct nvk_cmd_qmd **qmd_out); void nvk_cmd_pool_free_mem_list(struct nvk_cmd_pool *pool, struct list_head *mem_list); void nvk_cmd_pool_free_gart_mem_list(struct nvk_cmd_pool *pool, struct list_head *mem_list); +void nvk_cmd_pool_free_qmd_list(struct nvk_cmd_pool *pool, + struct list_head *mem_list); #endif /* NVK_CMD_POOL_H */ diff --git a/src/nouveau/vulkan/nvk_device.c b/src/nouveau/vulkan/nvk_device.c index 38d4c9e17ca..df05a9f20e4 100644 --- a/src/nouveau/vulkan/nvk_device.c +++ b/src/nouveau/vulkan/nvk_device.c @@ -16,6 +16,7 @@ #include "cl9097.h" #include "clb097.h" +#include "clb197.h" #include "clc397.h" static void @@ -228,6 +229,14 @@ nvk_CreateDevice(VkPhysicalDevice physicalDevice, if (result != VK_SUCCESS) goto fail_shader_heap; + if (pdev->info.cls_eng3d < MAXWELL_B) { + result = nvk_heap_init(dev, &dev->qmd_heap, + NVKMD_MEM_LOCAL, NVKMD_MEM_MAP_WR, + 0 /* overalloc */, false /* contiguous */); + if (result != VK_SUCCESS) + goto fail_event_heap; + } + nvk_slm_area_init(&dev->slm); if (pdev->info.cls_eng3d >= FERMI_A && @@ -271,6 +280,9 @@ fail_vab_memory: nvkmd_mem_unref(dev->vab_memory); fail_slm: nvk_slm_area_finish(&dev->slm); + if (pdev->info.cls_eng3d < MAXWELL_B) + nvk_heap_finish(dev, &dev->qmd_heap); +fail_event_heap: nvk_heap_finish(dev, &dev->event_heap); fail_shader_heap: nvk_heap_finish(dev, &dev->shader_heap); @@ -301,6 +313,8 @@ nvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) if (!dev) return; + const struct nvk_physical_device *pdev = nvk_device_physical(dev); + if (dev->copy_queries) vk_shader_destroy(&dev->vk, &dev->copy_queries->vk, &dev->vk.alloc); @@ -316,6 +330,8 @@ nvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) nvk_upload_queue_sync(dev, &dev->upload); nvk_slm_area_finish(&dev->slm); + if (pdev->info.cls_eng3d < MAXWELL_B) + nvk_heap_finish(dev, &dev->qmd_heap); nvk_heap_finish(dev, &dev->event_heap); nvk_heap_finish(dev, &dev->shader_heap); nvk_edb_bview_cache_finish(dev, &dev->edb_bview_cache); diff --git a/src/nouveau/vulkan/nvk_device.h b/src/nouveau/vulkan/nvk_device.h index 4e38adc58f4..30c99ecc4e4 100644 --- a/src/nouveau/vulkan/nvk_device.h +++ b/src/nouveau/vulkan/nvk_device.h @@ -46,6 +46,7 @@ struct nvk_device { struct nvk_edb_bview_cache edb_bview_cache; struct nvk_heap shader_heap; struct nvk_heap event_heap; + struct nvk_heap qmd_heap; struct nvk_slm_area slm; struct nvkmd_mem *vab_memory;