mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-01 20:30:12 +01:00
nvk: Allocate QMDs from a heap on Maxwell A and earlier
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34127>
This commit is contained in:
parent
94787116b1
commit
7939331dde
7 changed files with 153 additions and 2 deletions
|
|
@ -49,6 +49,7 @@ nvk_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer)
|
|||
|
||||
nvk_cmd_pool_free_mem_list(pool, &cmd->owned_mem);
|
||||
nvk_cmd_pool_free_gart_mem_list(pool, &cmd->owned_gart_mem);
|
||||
nvk_cmd_pool_free_qmd_list(pool, &cmd->owned_qmd);
|
||||
util_dynarray_fini(&cmd->pushes);
|
||||
vk_command_buffer_finish(&cmd->vk);
|
||||
vk_free(&pool->vk.alloc, cmd);
|
||||
|
|
@ -82,6 +83,7 @@ nvk_create_cmd_buffer(struct vk_command_pool *vk_pool,
|
|||
|
||||
list_inithead(&cmd->owned_mem);
|
||||
list_inithead(&cmd->owned_gart_mem);
|
||||
list_inithead(&cmd->owned_qmd);
|
||||
util_dynarray_init(&cmd->pushes, NULL);
|
||||
|
||||
*cmd_buffer_out = &cmd->vk;
|
||||
|
|
@ -104,6 +106,7 @@ nvk_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
|
|||
|
||||
nvk_cmd_pool_free_mem_list(pool, &cmd->owned_mem);
|
||||
nvk_cmd_pool_free_gart_mem_list(pool, &cmd->owned_gart_mem);
|
||||
nvk_cmd_pool_free_qmd_list(pool, &cmd->owned_qmd);
|
||||
cmd->upload_mem = NULL;
|
||||
cmd->push_mem = NULL;
|
||||
cmd->push_mem_limit = NULL;
|
||||
|
|
@ -290,6 +293,52 @@ nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer *cmd,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
nvk_cmd_buffer_alloc_qmd(struct nvk_cmd_buffer *cmd,
|
||||
uint32_t size, uint32_t alignment,
|
||||
uint64_t *addr, void **ptr)
|
||||
{
|
||||
struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
|
||||
const struct nvk_physical_device *pdev = nvk_device_physical(dev);
|
||||
|
||||
/* On Maxwell B and later, we have INVALIDATE_SKED_CACHES so we can just
|
||||
* allocate from wherever we want (the upload stream in this case).
|
||||
*/
|
||||
if (pdev->info.cls_compute >= MAXWELL_COMPUTE_B)
|
||||
return nvk_cmd_buffer_upload_alloc(cmd, size, alignment, addr, ptr);
|
||||
|
||||
/* The GPU compute scheduler (SKED) has a cache. Maxwell B added the
|
||||
* INVALIDATE_SKED_CACHES instruction to manage the SKED cache. We call
|
||||
* that at the top of every command buffer so that we always pick up
|
||||
* whatever QMDs we've written from the CPU fresh. On Maxwell A and
|
||||
* earlier, the SKED cache still exists in some form but we have no way to
|
||||
* invalidate it. If a compute shader has been dispatched from a QMD at an
|
||||
* address that's no longer valid, the SKED cache can fault. To work
|
||||
* around this, we have a QMD heap on the device and we allocate QMDs from
|
||||
* that on Maxwell A and earlier.
|
||||
*
|
||||
* Prior to Maxwell B, the GPU doesn't seem to need any sort of SKED cache
|
||||
* invalidation to pick up new writes from the CPU. However, we do still
|
||||
* have to worry about faults that may be caused by the SKED cache
|
||||
* containing a stale address. Just allocating all QMDs from a central
|
||||
* heap which never throws memory away seems to be sufficient for this.
|
||||
*/
|
||||
assert(size <= NVK_CMD_QMD_SIZE);
|
||||
assert(alignment <= NVK_CMD_QMD_SIZE);
|
||||
|
||||
struct nvk_cmd_qmd *qmd;
|
||||
VkResult result = nvk_cmd_pool_alloc_qmd(nvk_cmd_buffer_pool(cmd), &qmd);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
return result;
|
||||
|
||||
list_addtail(&qmd->link, &cmd->owned_qmd);
|
||||
|
||||
*addr = qmd->addr;
|
||||
*ptr = qmd->map;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
nvk_BeginCommandBuffer(VkCommandBuffer commandBuffer,
|
||||
const VkCommandBufferBeginInfo *pBeginInfo)
|
||||
|
|
@ -573,7 +622,7 @@ nvk_cmd_invalidate_deps(struct nvk_cmd_buffer *cmd,
|
|||
}
|
||||
|
||||
if ((barriers & NVK_BARRIER_INVALIDATE_QMD_DATA) &&
|
||||
pdev->info.cls_eng3d >= MAXWELL_COMPUTE_B)
|
||||
pdev->info.cls_compute >= MAXWELL_COMPUTE_B)
|
||||
P_IMMD(p, NVB1C0, INVALIDATE_SKED_CACHES, 0);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -216,6 +216,7 @@ struct nvk_cmd_buffer {
|
|||
*/
|
||||
struct list_head owned_mem;
|
||||
struct list_head owned_gart_mem;
|
||||
struct list_head owned_qmd;
|
||||
|
||||
struct nvk_cmd_mem *upload_mem;
|
||||
uint32_t upload_offset;
|
||||
|
|
@ -342,6 +343,10 @@ VkResult nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer *cmd,
|
|||
VkResult nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer *cmd,
|
||||
uint64_t *addr);
|
||||
|
||||
VkResult nvk_cmd_buffer_alloc_qmd(struct nvk_cmd_buffer *cmd,
|
||||
uint32_t size, uint32_t alignment,
|
||||
uint64_t *addr, void **ptr);
|
||||
|
||||
void nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer *cmd,
|
||||
const VkDependencyInfo *dep,
|
||||
bool wait);
|
||||
|
|
|
|||
|
|
@ -190,9 +190,13 @@ nvk_cmd_upload_qmd(struct nvk_cmd_buffer *cmd,
|
|||
uint32_t qmd[64];
|
||||
nak_fill_qmd(&pdev->info, &shader->info, &qmd_info, qmd, sizeof(qmd));
|
||||
|
||||
result = nvk_cmd_buffer_upload_data(cmd, qmd, sizeof(qmd), 0x100, &qmd_addr);
|
||||
void *qmd_map;
|
||||
result = nvk_cmd_buffer_alloc_qmd(cmd, sizeof(qmd), 0x100,
|
||||
&qmd_addr, &qmd_map);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
return result;
|
||||
|
||||
memcpy(qmd_map, qmd, sizeof(qmd));
|
||||
}
|
||||
|
||||
*qmd_addr_out = qmd_addr;
|
||||
|
|
|
|||
|
|
@ -44,6 +44,39 @@ nvk_cmd_mem_destroy(struct nvk_cmd_pool *pool, struct nvk_cmd_mem *mem)
|
|||
vk_free(&pool->vk.alloc, mem);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
nvk_cmd_qmd_create(struct nvk_cmd_pool *pool, struct nvk_cmd_qmd **qmd_out)
|
||||
{
|
||||
struct nvk_device *dev = nvk_cmd_pool_device(pool);
|
||||
struct nvk_cmd_qmd *qmd;
|
||||
VkResult result;
|
||||
|
||||
qmd = vk_zalloc(&pool->vk.alloc, sizeof(*qmd), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (qmd == NULL)
|
||||
return vk_error(pool, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
result = nvk_heap_alloc(dev, &dev->qmd_heap,
|
||||
NVK_CMD_QMD_SIZE, NVK_CMD_QMD_SIZE,
|
||||
&qmd->addr, &qmd->map);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&pool->vk.alloc, qmd);
|
||||
return result;
|
||||
}
|
||||
|
||||
*qmd_out = qmd;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
nvk_cmd_qmd_destroy(struct nvk_cmd_pool *pool, struct nvk_cmd_qmd *qmd)
|
||||
{
|
||||
struct nvk_device *dev = nvk_cmd_pool_device(pool);
|
||||
|
||||
nvk_heap_free(dev, &dev->qmd_heap, qmd->addr, NVK_CMD_QMD_SIZE);
|
||||
vk_free(&pool->vk.alloc, qmd);
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
nvk_CreateCommandPool(VkDevice _device,
|
||||
const VkCommandPoolCreateInfo *pCreateInfo,
|
||||
|
|
@ -67,6 +100,7 @@ nvk_CreateCommandPool(VkDevice _device,
|
|||
|
||||
list_inithead(&pool->free_mem);
|
||||
list_inithead(&pool->free_gart_mem);
|
||||
list_inithead(&pool->free_qmd);
|
||||
|
||||
*pCmdPool = nvk_cmd_pool_to_handle(pool);
|
||||
|
||||
|
|
@ -85,6 +119,10 @@ nvk_cmd_pool_destroy_mem(struct nvk_cmd_pool *pool)
|
|||
nvk_cmd_mem_destroy(pool, mem);
|
||||
|
||||
list_inithead(&pool->free_gart_mem);
|
||||
|
||||
list_for_each_entry_safe(struct nvk_cmd_qmd, qmd, &pool->free_qmd, link)
|
||||
nvk_cmd_qmd_destroy(pool, qmd);
|
||||
list_inithead(&pool->free_qmd);
|
||||
}
|
||||
|
||||
VkResult
|
||||
|
|
@ -108,6 +146,21 @@ nvk_cmd_pool_alloc_mem(struct nvk_cmd_pool *pool, bool force_gart,
|
|||
return nvk_cmd_mem_create(pool, force_gart, mem_out);
|
||||
}
|
||||
|
||||
VkResult
|
||||
nvk_cmd_pool_alloc_qmd(struct nvk_cmd_pool *pool,
|
||||
struct nvk_cmd_qmd **qmd_out)
|
||||
{
|
||||
if (!list_is_empty(&pool->free_qmd)) {
|
||||
struct nvk_cmd_qmd *qmd =
|
||||
list_first_entry(&pool->free_qmd, struct nvk_cmd_qmd, link);
|
||||
list_del(&qmd->link);
|
||||
*qmd_out = qmd;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
return nvk_cmd_qmd_create(pool, qmd_out);
|
||||
}
|
||||
|
||||
void
|
||||
nvk_cmd_pool_free_mem_list(struct nvk_cmd_pool *pool,
|
||||
struct list_head *mem_list)
|
||||
|
|
@ -124,6 +177,14 @@ nvk_cmd_pool_free_gart_mem_list(struct nvk_cmd_pool *pool,
|
|||
list_inithead(mem_list);
|
||||
}
|
||||
|
||||
void
|
||||
nvk_cmd_pool_free_qmd_list(struct nvk_cmd_pool *pool,
|
||||
struct list_head *qmd_list)
|
||||
{
|
||||
list_splicetail(qmd_list, &pool->free_qmd);
|
||||
list_inithead(qmd_list);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
nvk_DestroyCommandPool(VkDevice _device,
|
||||
VkCommandPool commandPool,
|
||||
|
|
|
|||
|
|
@ -21,12 +21,23 @@ struct nvk_cmd_mem {
|
|||
struct list_head link;
|
||||
};
|
||||
|
||||
#define NVK_CMD_QMD_SIZE 256
|
||||
|
||||
struct nvk_cmd_qmd {
|
||||
uint64_t addr;
|
||||
void *map;
|
||||
|
||||
/** Link in nvk_cmd_pool::free_qmd or nvk_cmd_buffer::owned_qmd */
|
||||
struct list_head link;
|
||||
};
|
||||
|
||||
struct nvk_cmd_pool {
|
||||
struct vk_command_pool vk;
|
||||
|
||||
/** List of nvk_cmd_mem */
|
||||
struct list_head free_mem;
|
||||
struct list_head free_gart_mem;
|
||||
struct list_head free_qmd;
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(nvk_cmd_pool, vk.base, VkCommandPool,
|
||||
|
|
@ -41,9 +52,13 @@ nvk_cmd_pool_device(struct nvk_cmd_pool *pool)
|
|||
VkResult nvk_cmd_pool_alloc_mem(struct nvk_cmd_pool *pool,
|
||||
bool force_gart,
|
||||
struct nvk_cmd_mem **mem_out);
|
||||
VkResult nvk_cmd_pool_alloc_qmd(struct nvk_cmd_pool *pool,
|
||||
struct nvk_cmd_qmd **qmd_out);
|
||||
|
||||
void nvk_cmd_pool_free_mem_list(struct nvk_cmd_pool *pool,
|
||||
struct list_head *mem_list);
|
||||
void nvk_cmd_pool_free_gart_mem_list(struct nvk_cmd_pool *pool,
|
||||
struct list_head *mem_list);
|
||||
void nvk_cmd_pool_free_qmd_list(struct nvk_cmd_pool *pool,
|
||||
struct list_head *mem_list);
|
||||
#endif /* NVK_CMD_POOL_H */
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "cl9097.h"
|
||||
#include "clb097.h"
|
||||
#include "clb197.h"
|
||||
#include "clc397.h"
|
||||
|
||||
static void
|
||||
|
|
@ -228,6 +229,14 @@ nvk_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
if (result != VK_SUCCESS)
|
||||
goto fail_shader_heap;
|
||||
|
||||
if (pdev->info.cls_eng3d < MAXWELL_B) {
|
||||
result = nvk_heap_init(dev, &dev->qmd_heap,
|
||||
NVKMD_MEM_LOCAL, NVKMD_MEM_MAP_WR,
|
||||
0 /* overalloc */, false /* contiguous */);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_event_heap;
|
||||
}
|
||||
|
||||
nvk_slm_area_init(&dev->slm);
|
||||
|
||||
if (pdev->info.cls_eng3d >= FERMI_A &&
|
||||
|
|
@ -271,6 +280,9 @@ fail_vab_memory:
|
|||
nvkmd_mem_unref(dev->vab_memory);
|
||||
fail_slm:
|
||||
nvk_slm_area_finish(&dev->slm);
|
||||
if (pdev->info.cls_eng3d < MAXWELL_B)
|
||||
nvk_heap_finish(dev, &dev->qmd_heap);
|
||||
fail_event_heap:
|
||||
nvk_heap_finish(dev, &dev->event_heap);
|
||||
fail_shader_heap:
|
||||
nvk_heap_finish(dev, &dev->shader_heap);
|
||||
|
|
@ -301,6 +313,8 @@ nvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
|
|||
if (!dev)
|
||||
return;
|
||||
|
||||
const struct nvk_physical_device *pdev = nvk_device_physical(dev);
|
||||
|
||||
if (dev->copy_queries)
|
||||
vk_shader_destroy(&dev->vk, &dev->copy_queries->vk, &dev->vk.alloc);
|
||||
|
||||
|
|
@ -316,6 +330,8 @@ nvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
|
|||
nvk_upload_queue_sync(dev, &dev->upload);
|
||||
|
||||
nvk_slm_area_finish(&dev->slm);
|
||||
if (pdev->info.cls_eng3d < MAXWELL_B)
|
||||
nvk_heap_finish(dev, &dev->qmd_heap);
|
||||
nvk_heap_finish(dev, &dev->event_heap);
|
||||
nvk_heap_finish(dev, &dev->shader_heap);
|
||||
nvk_edb_bview_cache_finish(dev, &dev->edb_bview_cache);
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ struct nvk_device {
|
|||
struct nvk_edb_bview_cache edb_bview_cache;
|
||||
struct nvk_heap shader_heap;
|
||||
struct nvk_heap event_heap;
|
||||
struct nvk_heap qmd_heap;
|
||||
struct nvk_slm_area slm;
|
||||
struct nvkmd_mem *vab_memory;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue