diff --git a/src/nouveau/vulkan/nvk_device.c b/src/nouveau/vulkan/nvk_device.c index e164b4921fa..dbe891ff5ea 100644 --- a/src/nouveau/vulkan/nvk_device.c +++ b/src/nouveau/vulkan/nvk_device.c @@ -196,7 +196,7 @@ nvk_CreateDevice(VkPhysicalDevice physicalDevice, */ result = nvk_heap_init(dev, &dev->shader_heap, NOUVEAU_WS_BO_LOCAL | NOUVEAU_WS_BO_NO_SHARE, - NOUVEAU_WS_BO_WR, + 0 /* map_flags */, 4096 /* overalloc */, dev->pdev->info.cls_eng3d < VOLTA_A); if (result != VK_SUCCESS) diff --git a/src/nouveau/vulkan/nvk_heap.c b/src/nouveau/vulkan/nvk_heap.c index 9548a35067d..6f9e2fe4b1e 100644 --- a/src/nouveau/vulkan/nvk_heap.c +++ b/src/nouveau/vulkan/nvk_heap.c @@ -162,7 +162,10 @@ nvk_heap_alloc_locked(struct nvk_device *dev, struct nvk_heap *heap, } else { *addr_out = heap->bos[bo_idx].bo->offset + bo_offset; } - *map_out = (char *)heap->bos[bo_idx].map + bo_offset; + if (map_out != NULL) { + assert(heap->bos[bo_idx].map != NULL); + *map_out = (char *)heap->bos[bo_idx].map + bo_offset; + } return VK_SUCCESS; } @@ -215,15 +218,38 @@ nvk_heap_upload(struct nvk_device *dev, struct nvk_heap *heap, uint64_t *addr_out) { simple_mtx_lock(&heap->mutex); - - void *map; VkResult result = nvk_heap_alloc_locked(dev, heap, size, alignment, - addr_out, &map); - if (result == VK_SUCCESS) - memcpy(map, data, size); + addr_out, NULL /* map */); simple_mtx_unlock(&heap->mutex); - return result; + if (result != VK_SUCCESS) + return result; + + /* Now, kick off an upload of the shader data. + * + * This is a queued operation that the driver ensures happens before any + * more client work via semaphores. Because this is asynchronous and heap + * allocations are synchronous we have to be a bit careful here. The heap + * only ever tracks the current known CPU state of everything while the + * upload queue makes that state valid at some point in the future. + * + * This can be especially tricky for very fast upload/free cycles such as + * if the client compiles a shader, throws it away without using it, and + * then compiles another shader that ends up at the same address. What + * makes this all correct is the fact that the everything on the upload + * queue happens in a well-defined device-wide order. In this case the + * first shader will get uploaded and then the second will get uploaded + * over top of it. As long as we don't free the memory out from under the + * upload queue, everything will end up in the correct state by the time + * the client's shaders actually execute. + */ + result = nvk_upload_queue_upload(dev, &dev->upload, *addr_out, data, size); + if (result != VK_SUCCESS) { + nvk_heap_free(dev, heap, *addr_out, size); + return result; + } + + return VK_SUCCESS; } void