nvk: Use an nvkmd_mem for the SLM area

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30033>
This commit is contained in:
Faith Ekstrand 2024-07-03 14:42:20 -05:00 committed by Marge Bot
parent d8d2ba9666
commit b86079d61e
4 changed files with 42 additions and 40 deletions

View file

@ -35,24 +35,24 @@ static void
nvk_slm_area_finish(struct nvk_slm_area *area)
{
simple_mtx_destroy(&area->mutex);
if (area->bo)
nouveau_ws_bo_destroy(area->bo);
if (area->mem)
nvkmd_mem_unref(area->mem);
}
struct nouveau_ws_bo *
nvk_slm_area_get_bo_ref(struct nvk_slm_area *area,
uint32_t *bytes_per_warp_out,
uint32_t *bytes_per_tpc_out)
struct nvkmd_mem *
nvk_slm_area_get_mem_ref(struct nvk_slm_area *area,
uint32_t *bytes_per_warp_out,
uint32_t *bytes_per_tpc_out)
{
simple_mtx_lock(&area->mutex);
struct nouveau_ws_bo *bo = area->bo;
if (bo)
nouveau_ws_bo_ref(bo);
struct nvkmd_mem *mem = area->mem;
if (mem)
nvkmd_mem_ref(mem);
*bytes_per_warp_out = area->bytes_per_warp;
*bytes_per_tpc_out = area->bytes_per_tpc;
simple_mtx_unlock(&area->mutex);
return bo;
return mem;
}
static VkResult
@ -61,6 +61,8 @@ nvk_slm_area_ensure(struct nvk_device *dev,
uint32_t bytes_per_thread)
{
struct nvk_physical_device *pdev = nvk_device_physical(dev);
VkResult result;
assert(bytes_per_thread < (1 << 24));
/* TODO: Volta+doesn't use CRC */
@ -98,28 +100,28 @@ nvk_slm_area_ensure(struct nvk_device *dev,
*/
size = align64(size, 0x20000);
struct nouveau_ws_bo *bo =
nouveau_ws_bo_new(dev->ws_dev, size, 0,
NOUVEAU_WS_BO_LOCAL | NOUVEAU_WS_BO_NO_SHARE);
if (bo == NULL)
return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
struct nvkmd_mem *mem;
result = nvkmd_dev_alloc_mem(dev->nvkmd, &dev->vk.base, size, 0,
NVKMD_MEM_LOCAL | NVKMD_MEM_NO_SHARE, &mem);
if (result != VK_SUCCESS)
return result;
struct nouveau_ws_bo *unref_bo;
struct nvkmd_mem *unref_mem;
simple_mtx_lock(&area->mutex);
if (bytes_per_tpc <= area->bytes_per_tpc) {
/* We lost the race, throw away our BO */
assert(area->bytes_per_warp == bytes_per_warp);
unref_bo = bo;
unref_mem = mem;
} else {
unref_bo = area->bo;
area->bo = bo;
unref_mem = area->mem;
area->mem = mem;
area->bytes_per_warp = bytes_per_warp;
area->bytes_per_tpc = bytes_per_tpc;
}
simple_mtx_unlock(&area->mutex);
if (unref_bo)
nouveau_ws_bo_destroy(unref_bo);
if (unref_mem)
nvkmd_mem_unref(unref_mem);
return VK_SUCCESS;
}

View file

@ -17,19 +17,20 @@
struct nvk_physical_device;
struct nvkmd_dev;
struct nvkmd_mem;
struct vk_pipeline_cache;
struct nvk_slm_area {
simple_mtx_t mutex;
struct nouveau_ws_bo *bo;
struct nvkmd_mem *mem;
uint32_t bytes_per_warp;
uint32_t bytes_per_tpc;
};
struct nouveau_ws_bo *
nvk_slm_area_get_bo_ref(struct nvk_slm_area *area,
uint32_t *bytes_per_warp_out,
uint32_t *bytes_per_mp_out);
struct nvkmd_mem *
nvk_slm_area_get_mem_ref(struct nvk_slm_area *area,
uint32_t *bytes_per_warp_out,
uint32_t *bytes_per_mp_out);
struct nvk_device {
struct vk_device vk;

View file

@ -35,8 +35,8 @@ nvk_queue_state_finish(struct nvk_device *dev,
nvkmd_mem_unref(qs->images.mem);
if (qs->samplers.mem)
nvkmd_mem_unref(qs->samplers.mem);
if (qs->slm.bo)
nouveau_ws_bo_destroy(qs->slm.bo);
if (qs->slm.mem)
nvkmd_mem_unref(qs->slm.mem);
if (qs->push.bo) {
nouveau_ws_bo_unmap(qs->push.bo, qs->push.bo_map);
nouveau_ws_bo_destroy(qs->push.bo);
@ -61,7 +61,6 @@ nvk_queue_state_update(struct nvk_device *dev,
struct nvk_queue_state *qs)
{
struct nvk_physical_device *pdev = nvk_device_physical(dev);
struct nouveau_ws_bo *bo;
struct nvkmd_mem *mem;
uint32_t alloc_count, bytes_per_warp, bytes_per_tpc;
bool dirty = false;
@ -92,19 +91,19 @@ nvk_queue_state_update(struct nvk_device *dev,
nvkmd_mem_unref(mem);
}
bo = nvk_slm_area_get_bo_ref(&dev->slm, &bytes_per_warp, &bytes_per_tpc);
if (qs->slm.bo != bo || qs->slm.bytes_per_warp != bytes_per_warp ||
mem = nvk_slm_area_get_mem_ref(&dev->slm, &bytes_per_warp, &bytes_per_tpc);
if (qs->slm.mem != mem || qs->slm.bytes_per_warp != bytes_per_warp ||
qs->slm.bytes_per_tpc != bytes_per_tpc) {
if (qs->slm.bo)
nouveau_ws_bo_destroy(qs->slm.bo);
qs->slm.bo = bo;
if (qs->slm.mem)
nvkmd_mem_unref(qs->slm.mem);
qs->slm.mem = mem;
qs->slm.bytes_per_warp = bytes_per_warp;
qs->slm.bytes_per_tpc = bytes_per_tpc;
dirty = true;
} else {
/* No change */
if (bo)
nouveau_ws_bo_destroy(bo);
if (mem)
nvkmd_mem_unref(mem);
}
/* TODO: We're currently depending on kernel reference counting to protect
@ -171,9 +170,9 @@ nvk_queue_state_update(struct nvk_device *dev,
});
}
if (qs->slm.bo) {
const uint64_t slm_addr = qs->slm.bo->offset;
const uint64_t slm_size = qs->slm.bo->size;
if (qs->slm.mem) {
const uint64_t slm_addr = qs->slm.mem->va->addr;
const uint64_t slm_size = qs->slm.mem->size_B;
const uint64_t slm_per_warp = qs->slm.bytes_per_warp;
const uint64_t slm_per_tpc = qs->slm.bytes_per_tpc;
assert(!(slm_per_tpc & 0x7fff));

View file

@ -28,7 +28,7 @@ struct nvk_queue_state {
} samplers;
struct {
struct nouveau_ws_bo *bo;
struct nvkmd_mem *mem;
uint32_t bytes_per_warp;
uint32_t bytes_per_tpc;
} slm;