nvk: Allocate QMDs from a heap on Maxwell A and earlier

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34127>
This commit is contained in:
Faith Ekstrand 2025-03-12 11:24:12 -05:00 committed by Marge Bot
parent 94787116b1
commit 7939331dde
7 changed files with 153 additions and 2 deletions

View file

@ -49,6 +49,7 @@ nvk_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer)
nvk_cmd_pool_free_mem_list(pool, &cmd->owned_mem);
nvk_cmd_pool_free_gart_mem_list(pool, &cmd->owned_gart_mem);
nvk_cmd_pool_free_qmd_list(pool, &cmd->owned_qmd);
util_dynarray_fini(&cmd->pushes);
vk_command_buffer_finish(&cmd->vk);
vk_free(&pool->vk.alloc, cmd);
@ -82,6 +83,7 @@ nvk_create_cmd_buffer(struct vk_command_pool *vk_pool,
list_inithead(&cmd->owned_mem);
list_inithead(&cmd->owned_gart_mem);
list_inithead(&cmd->owned_qmd);
util_dynarray_init(&cmd->pushes, NULL);
*cmd_buffer_out = &cmd->vk;
@ -104,6 +106,7 @@ nvk_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
nvk_cmd_pool_free_mem_list(pool, &cmd->owned_mem);
nvk_cmd_pool_free_gart_mem_list(pool, &cmd->owned_gart_mem);
nvk_cmd_pool_free_qmd_list(pool, &cmd->owned_qmd);
cmd->upload_mem = NULL;
cmd->push_mem = NULL;
cmd->push_mem_limit = NULL;
@ -290,6 +293,52 @@ nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer *cmd,
return VK_SUCCESS;
}
VkResult
nvk_cmd_buffer_alloc_qmd(struct nvk_cmd_buffer *cmd,
uint32_t size, uint32_t alignment,
uint64_t *addr, void **ptr)
{
struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
const struct nvk_physical_device *pdev = nvk_device_physical(dev);
/* On Maxwell B and later, we have INVALIDATE_SKED_CACHES so we can just
* allocate from wherever we want (the upload stream in this case).
*/
if (pdev->info.cls_compute >= MAXWELL_COMPUTE_B)
return nvk_cmd_buffer_upload_alloc(cmd, size, alignment, addr, ptr);
/* The GPU compute scheduler (SKED) has a cache. Maxwell B added the
* INVALIDATE_SKED_CACHES instruction to manage the SKED cache. We call
* that at the top of every command buffer so that we always pick up
* whatever QMDs we've written from the CPU fresh. On Maxwell A and
* earlier, the SKED cache still exists in some form but we have no way to
* invalidate it. If a compute shader has been dispatched from a QMD at an
* address that's no longer valid, the SKED cache can fault. To work
* around this, we have a QMD heap on the device and we allocate QMDs from
* that on Maxwell A and earlier.
*
* Prior to Maxwell B, the GPU doesn't seem to need any sort of SKED cache
* invalidation to pick up new writes from the CPU. However, we do still
* have to worry about faults that may be caused by the SKED cache
* containing a stale address. Just allocating all QMDs from a central
* heap which never throws memory away seems to be sufficient for this.
*/
assert(size <= NVK_CMD_QMD_SIZE);
assert(alignment <= NVK_CMD_QMD_SIZE);
struct nvk_cmd_qmd *qmd;
VkResult result = nvk_cmd_pool_alloc_qmd(nvk_cmd_buffer_pool(cmd), &qmd);
if (unlikely(result != VK_SUCCESS))
return result;
list_addtail(&qmd->link, &cmd->owned_qmd);
*addr = qmd->addr;
*ptr = qmd->map;
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
nvk_BeginCommandBuffer(VkCommandBuffer commandBuffer,
const VkCommandBufferBeginInfo *pBeginInfo)
@ -573,7 +622,7 @@ nvk_cmd_invalidate_deps(struct nvk_cmd_buffer *cmd,
}
if ((barriers & NVK_BARRIER_INVALIDATE_QMD_DATA) &&
pdev->info.cls_eng3d >= MAXWELL_COMPUTE_B)
pdev->info.cls_compute >= MAXWELL_COMPUTE_B)
P_IMMD(p, NVB1C0, INVALIDATE_SKED_CACHES, 0);
}

View file

@ -216,6 +216,7 @@ struct nvk_cmd_buffer {
*/
struct list_head owned_mem;
struct list_head owned_gart_mem;
struct list_head owned_qmd;
struct nvk_cmd_mem *upload_mem;
uint32_t upload_offset;
@ -342,6 +343,10 @@ VkResult nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer *cmd,
VkResult nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer *cmd,
uint64_t *addr);
VkResult nvk_cmd_buffer_alloc_qmd(struct nvk_cmd_buffer *cmd,
uint32_t size, uint32_t alignment,
uint64_t *addr, void **ptr);
void nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer *cmd,
const VkDependencyInfo *dep,
bool wait);

View file

@ -190,9 +190,13 @@ nvk_cmd_upload_qmd(struct nvk_cmd_buffer *cmd,
uint32_t qmd[64];
nak_fill_qmd(&pdev->info, &shader->info, &qmd_info, qmd, sizeof(qmd));
result = nvk_cmd_buffer_upload_data(cmd, qmd, sizeof(qmd), 0x100, &qmd_addr);
void *qmd_map;
result = nvk_cmd_buffer_alloc_qmd(cmd, sizeof(qmd), 0x100,
&qmd_addr, &qmd_map);
if (unlikely(result != VK_SUCCESS))
return result;
memcpy(qmd_map, qmd, sizeof(qmd));
}
*qmd_addr_out = qmd_addr;

View file

@ -44,6 +44,39 @@ nvk_cmd_mem_destroy(struct nvk_cmd_pool *pool, struct nvk_cmd_mem *mem)
vk_free(&pool->vk.alloc, mem);
}
static VkResult
nvk_cmd_qmd_create(struct nvk_cmd_pool *pool, struct nvk_cmd_qmd **qmd_out)
{
struct nvk_device *dev = nvk_cmd_pool_device(pool);
struct nvk_cmd_qmd *qmd;
VkResult result;
qmd = vk_zalloc(&pool->vk.alloc, sizeof(*qmd), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (qmd == NULL)
return vk_error(pool, VK_ERROR_OUT_OF_HOST_MEMORY);
result = nvk_heap_alloc(dev, &dev->qmd_heap,
NVK_CMD_QMD_SIZE, NVK_CMD_QMD_SIZE,
&qmd->addr, &qmd->map);
if (result != VK_SUCCESS) {
vk_free(&pool->vk.alloc, qmd);
return result;
}
*qmd_out = qmd;
return VK_SUCCESS;
}
static void
nvk_cmd_qmd_destroy(struct nvk_cmd_pool *pool, struct nvk_cmd_qmd *qmd)
{
struct nvk_device *dev = nvk_cmd_pool_device(pool);
nvk_heap_free(dev, &dev->qmd_heap, qmd->addr, NVK_CMD_QMD_SIZE);
vk_free(&pool->vk.alloc, qmd);
}
VKAPI_ATTR VkResult VKAPI_CALL
nvk_CreateCommandPool(VkDevice _device,
const VkCommandPoolCreateInfo *pCreateInfo,
@ -67,6 +100,7 @@ nvk_CreateCommandPool(VkDevice _device,
list_inithead(&pool->free_mem);
list_inithead(&pool->free_gart_mem);
list_inithead(&pool->free_qmd);
*pCmdPool = nvk_cmd_pool_to_handle(pool);
@ -85,6 +119,10 @@ nvk_cmd_pool_destroy_mem(struct nvk_cmd_pool *pool)
nvk_cmd_mem_destroy(pool, mem);
list_inithead(&pool->free_gart_mem);
list_for_each_entry_safe(struct nvk_cmd_qmd, qmd, &pool->free_qmd, link)
nvk_cmd_qmd_destroy(pool, qmd);
list_inithead(&pool->free_qmd);
}
VkResult
@ -108,6 +146,21 @@ nvk_cmd_pool_alloc_mem(struct nvk_cmd_pool *pool, bool force_gart,
return nvk_cmd_mem_create(pool, force_gart, mem_out);
}
VkResult
nvk_cmd_pool_alloc_qmd(struct nvk_cmd_pool *pool,
struct nvk_cmd_qmd **qmd_out)
{
if (!list_is_empty(&pool->free_qmd)) {
struct nvk_cmd_qmd *qmd =
list_first_entry(&pool->free_qmd, struct nvk_cmd_qmd, link);
list_del(&qmd->link);
*qmd_out = qmd;
return VK_SUCCESS;
}
return nvk_cmd_qmd_create(pool, qmd_out);
}
void
nvk_cmd_pool_free_mem_list(struct nvk_cmd_pool *pool,
struct list_head *mem_list)
@ -124,6 +177,14 @@ nvk_cmd_pool_free_gart_mem_list(struct nvk_cmd_pool *pool,
list_inithead(mem_list);
}
void
nvk_cmd_pool_free_qmd_list(struct nvk_cmd_pool *pool,
struct list_head *qmd_list)
{
list_splicetail(qmd_list, &pool->free_qmd);
list_inithead(qmd_list);
}
VKAPI_ATTR void VKAPI_CALL
nvk_DestroyCommandPool(VkDevice _device,
VkCommandPool commandPool,

View file

@ -21,12 +21,23 @@ struct nvk_cmd_mem {
struct list_head link;
};
#define NVK_CMD_QMD_SIZE 256
struct nvk_cmd_qmd {
uint64_t addr;
void *map;
/** Link in nvk_cmd_pool::free_qmd or nvk_cmd_buffer::owned_qmd */
struct list_head link;
};
struct nvk_cmd_pool {
struct vk_command_pool vk;
/** List of nvk_cmd_mem */
struct list_head free_mem;
struct list_head free_gart_mem;
struct list_head free_qmd;
};
VK_DEFINE_NONDISP_HANDLE_CASTS(nvk_cmd_pool, vk.base, VkCommandPool,
@ -41,9 +52,13 @@ nvk_cmd_pool_device(struct nvk_cmd_pool *pool)
VkResult nvk_cmd_pool_alloc_mem(struct nvk_cmd_pool *pool,
bool force_gart,
struct nvk_cmd_mem **mem_out);
VkResult nvk_cmd_pool_alloc_qmd(struct nvk_cmd_pool *pool,
struct nvk_cmd_qmd **qmd_out);
void nvk_cmd_pool_free_mem_list(struct nvk_cmd_pool *pool,
struct list_head *mem_list);
void nvk_cmd_pool_free_gart_mem_list(struct nvk_cmd_pool *pool,
struct list_head *mem_list);
void nvk_cmd_pool_free_qmd_list(struct nvk_cmd_pool *pool,
struct list_head *mem_list);
#endif /* NVK_CMD_POOL_H */

View file

@ -16,6 +16,7 @@
#include "cl9097.h"
#include "clb097.h"
#include "clb197.h"
#include "clc397.h"
static void
@ -228,6 +229,14 @@ nvk_CreateDevice(VkPhysicalDevice physicalDevice,
if (result != VK_SUCCESS)
goto fail_shader_heap;
if (pdev->info.cls_eng3d < MAXWELL_B) {
result = nvk_heap_init(dev, &dev->qmd_heap,
NVKMD_MEM_LOCAL, NVKMD_MEM_MAP_WR,
0 /* overalloc */, false /* contiguous */);
if (result != VK_SUCCESS)
goto fail_event_heap;
}
nvk_slm_area_init(&dev->slm);
if (pdev->info.cls_eng3d >= FERMI_A &&
@ -271,6 +280,9 @@ fail_vab_memory:
nvkmd_mem_unref(dev->vab_memory);
fail_slm:
nvk_slm_area_finish(&dev->slm);
if (pdev->info.cls_eng3d < MAXWELL_B)
nvk_heap_finish(dev, &dev->qmd_heap);
fail_event_heap:
nvk_heap_finish(dev, &dev->event_heap);
fail_shader_heap:
nvk_heap_finish(dev, &dev->shader_heap);
@ -301,6 +313,8 @@ nvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
if (!dev)
return;
const struct nvk_physical_device *pdev = nvk_device_physical(dev);
if (dev->copy_queries)
vk_shader_destroy(&dev->vk, &dev->copy_queries->vk, &dev->vk.alloc);
@ -316,6 +330,8 @@ nvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
nvk_upload_queue_sync(dev, &dev->upload);
nvk_slm_area_finish(&dev->slm);
if (pdev->info.cls_eng3d < MAXWELL_B)
nvk_heap_finish(dev, &dev->qmd_heap);
nvk_heap_finish(dev, &dev->event_heap);
nvk_heap_finish(dev, &dev->shader_heap);
nvk_edb_bview_cache_finish(dev, &dev->edb_bview_cache);

View file

@ -46,6 +46,7 @@ struct nvk_device {
struct nvk_edb_bview_cache edb_bview_cache;
struct nvk_heap shader_heap;
struct nvk_heap event_heap;
struct nvk_heap qmd_heap;
struct nvk_slm_area slm;
struct nvkmd_mem *vab_memory;