diff --git a/src/nouveau/vulkan/nvk_cmd_dispatch.c b/src/nouveau/vulkan/nvk_cmd_dispatch.c index f78f6368216..077349db396 100644 --- a/src/nouveau/vulkan/nvk_cmd_dispatch.c +++ b/src/nouveau/vulkan/nvk_cmd_dispatch.c @@ -55,6 +55,15 @@ nvk_push_dispatch_state_init(struct nvk_device *dev, struct nv_push *p) if (pdev->info.cls_compute == MAXWELL_COMPUTE_A) P_IMMD(p, NVB0C0, SET_SELECT_MAXWELL_TEXTURE_HEADERS, V_TRUE); + if (pdev->info.cls_eng3d < VOLTA_COMPUTE_A) { + uint64_t shader_base_addr = + nvk_heap_contiguous_base_address(&dev->shader_heap); + + P_MTHD(p, NVA0C0, SET_PROGRAM_REGION_A); + P_NVA0C0_SET_PROGRAM_REGION_A(p, shader_base_addr >> 32); + P_NVA0C0_SET_PROGRAM_REGION_B(p, shader_base_addr); + } + return VK_SUCCESS; } diff --git a/src/nouveau/vulkan/nvk_cmd_draw.c b/src/nouveau/vulkan/nvk_cmd_draw.c index 978d5c284c6..90fdd50a117 100644 --- a/src/nouveau/vulkan/nvk_cmd_draw.c +++ b/src/nouveau/vulkan/nvk_cmd_draw.c @@ -367,6 +367,15 @@ nvk_push_draw_state_init(struct nvk_device *dev, struct nv_push *p) P_IMMD(p, NV9097, SET_CT_MRT_ENABLE, V_TRUE); + if (pdev->info.cls_eng3d < VOLTA_A) { + uint64_t shader_base_addr = + nvk_heap_contiguous_base_address(&dev->shader_heap); + + P_MTHD(p, NV9097, SET_PROGRAM_REGION_A); + P_NV9097_SET_PROGRAM_REGION_A(p, shader_base_addr >> 32); + P_NV9097_SET_PROGRAM_REGION_B(p, shader_base_addr); + } + for (uint32_t i = 0; i < 6; i++) { P_IMMD(p, NV9097, SET_PIPELINE_SHADER(i), { .enable = ENABLE_FALSE, diff --git a/src/nouveau/vulkan/nvk_heap.c b/src/nouveau/vulkan/nvk_heap.c index f7763579d0b..d00b936091f 100644 --- a/src/nouveau/vulkan/nvk_heap.c +++ b/src/nouveau/vulkan/nvk_heap.c @@ -26,7 +26,17 @@ nvk_heap_init(struct nvk_device *dev, struct nvk_heap *heap, heap->bo_flags |= NOUVEAU_WS_BO_MAP; heap->map_flags = map_flags; heap->overalloc = overalloc; - heap->contiguous = contiguous; + + if (contiguous) { + heap->base_addr = nouveau_ws_alloc_vma(dev->ws_dev, 0, + NVK_HEAP_MAX_SIZE, + 0, false /* bda */, + false /* sparse */); + if (heap->base_addr == 0) { + return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, + "Failed to allocate VMA for heap"); + } + } simple_mtx_init(&heap->mutex, mtx_plain); util_vma_heap_init(&heap->heap, 0, 0); @@ -41,12 +51,21 @@ void nvk_heap_finish(struct nvk_device *dev, struct nvk_heap *heap) { for (uint32_t bo_idx = 0; bo_idx < heap->bo_count; bo_idx++) { + if (heap->base_addr != 0) { + nouveau_ws_bo_unbind_vma(dev->ws_dev, heap->bos[bo_idx].addr, + heap->bos[bo_idx].bo->size); + } nouveau_ws_bo_unmap(heap->bos[bo_idx].bo, heap->bos[bo_idx].map); nouveau_ws_bo_destroy(heap->bos[bo_idx].bo); } util_vma_heap_finish(&heap->heap); simple_mtx_destroy(&heap->mutex); + + if (heap->base_addr != 0) { + nouveau_ws_free_vma(dev->ws_dev, heap->base_addr, NVK_HEAP_MAX_SIZE, + false /* bda */, false /* sparse */); + } } static uint64_t @@ -74,115 +93,41 @@ vma_bo_offset(uint64_t offset) static VkResult nvk_heap_grow_locked(struct nvk_device *dev, struct nvk_heap *heap) { - VkResult result; - - if (heap->contiguous) { - if (heap->total_size >= NVK_HEAP_MAX_SIZE) { - return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, - "Heap has already hit its maximum size"); - } - - const uint64_t new_bo_size = - MAX2(heap->total_size * 2, NVK_HEAP_MIN_SIZE); - - void *new_bo_map; - struct nouveau_ws_bo *new_bo = - nouveau_ws_bo_new_mapped(dev->ws_dev, - new_bo_size + heap->overalloc, 0, - heap->bo_flags, heap->map_flags, - &new_bo_map); - if (new_bo == NULL) { - return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, - "Failed to allocate a heap BO: %m"); - } - - if (heap->bo_count > 0) { - assert(heap->bo_count == 1); - struct nouveau_ws_bo *old_bo = heap->bos[0].bo; - - assert(util_is_power_of_two_nonzero64(heap->total_size)); - assert(heap->total_size >= NVK_HEAP_MIN_SIZE); - assert(heap->total_size <= old_bo->size); - assert(heap->total_size < new_bo_size); - - unsigned line_bytes = MIN2(heap->total_size, 1 << 17); - assert(heap->total_size % line_bytes == 0); - unsigned line_count = heap->total_size / line_bytes; - - uint32_t push_dw[12]; - struct nv_push push; - nv_push_init(&push, push_dw, ARRAY_SIZE(push_dw)); - struct nv_push *p = &push; - - P_MTHD(p, NV90B5, OFFSET_IN_UPPER); - P_NV90B5_OFFSET_IN_UPPER(p, old_bo->offset >> 32); - P_NV90B5_OFFSET_IN_LOWER(p, old_bo->offset & 0xffffffff); - P_NV90B5_OFFSET_OUT_UPPER(p, new_bo->offset >> 32); - P_NV90B5_OFFSET_OUT_LOWER(p, new_bo->offset & 0xffffffff); - P_NV90B5_PITCH_IN(p, line_bytes); - P_NV90B5_PITCH_OUT(p, line_bytes); - P_NV90B5_LINE_LENGTH_IN(p, line_bytes); - P_NV90B5_LINE_COUNT(p, line_count); - - P_IMMD(p, NV90B5, LAUNCH_DMA, { - .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED, - .multi_line_enable = MULTI_LINE_ENABLE_TRUE, - .flush_enable = FLUSH_ENABLE_TRUE, - .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH, - .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH, - }); - - struct nouveau_ws_bo *push_bos[] = { new_bo, old_bo, }; - result = nvk_queue_submit_simple(&dev->queue, - nv_push_dw_count(&push), push_dw, - ARRAY_SIZE(push_bos), push_bos); - if (result != VK_SUCCESS) { - nouveau_ws_bo_unmap(new_bo, new_bo_map); - nouveau_ws_bo_destroy(new_bo); - return result; - } - - nouveau_ws_bo_unmap(heap->bos[0].bo, heap->bos[0].map); - nouveau_ws_bo_destroy(heap->bos[0].bo); - } - - uint64_t vma = encode_vma(0, heap->total_size); - util_vma_heap_free(&heap->heap, vma, new_bo_size - heap->total_size); - - heap->total_size = new_bo_size; - heap->bo_count = 1; - heap->bos[0].bo = new_bo; - heap->bos[0].map = new_bo_map; - - return VK_SUCCESS; - } else { - if (heap->bo_count >= NVK_HEAP_MAX_BO_COUNT) { - return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, - "Heap has already hit its maximum size"); - } - - /* First two BOs are MIN_SIZE, double after that */ - const uint64_t new_bo_size = - NVK_HEAP_MIN_SIZE << (MAX2(heap->bo_count, 1) - 1); - - heap->bos[heap->bo_count].bo = - nouveau_ws_bo_new_mapped(dev->ws_dev, - new_bo_size + heap->overalloc, 0, - heap->bo_flags, heap->map_flags, - &heap->bos[heap->bo_count].map); - if (heap->bos[heap->bo_count].bo == NULL) { - return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, - "Failed to allocate a heap BO: %m"); - } - - uint64_t vma = encode_vma(heap->bo_count, 0); - util_vma_heap_free(&heap->heap, vma, new_bo_size); - - heap->total_size += new_bo_size; - heap->bo_count++; - - return VK_SUCCESS; + if (heap->bo_count >= NVK_HEAP_MAX_BO_COUNT) { + return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, + "Heap has already hit its maximum size"); } + + /* First two BOs are MIN_SIZE, double after that */ + const uint64_t new_bo_size = + NVK_HEAP_MIN_SIZE << (MAX2(heap->bo_count, 1) - 1); + + heap->bos[heap->bo_count].bo = + nouveau_ws_bo_new_mapped(dev->ws_dev, + new_bo_size + heap->overalloc, 0, + heap->bo_flags, heap->map_flags, + &heap->bos[heap->bo_count].map); + if (heap->bos[heap->bo_count].bo == NULL) { + return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, + "Failed to allocate a heap BO: %m"); + } + + if (heap->base_addr != 0) { + heap->bos[heap->bo_count].addr = heap->base_addr + heap->total_size; + nouveau_ws_bo_bind_vma(dev->ws_dev, heap->bos[heap->bo_count].bo, + heap->bos[heap->bo_count].addr, + new_bo_size, 0, 0); + } else { + heap->bos[heap->bo_count].addr = heap->bos[heap->bo_count].bo->offset; + } + + uint64_t vma = encode_vma(heap->bo_count, 0); + util_vma_heap_free(&heap->heap, vma, new_bo_size); + + heap->total_size += new_bo_size; + heap->bo_count++; + + return VK_SUCCESS; } static VkResult @@ -201,7 +146,7 @@ nvk_heap_alloc_locked(struct nvk_device *dev, struct nvk_heap *heap, assert(bo_offset + size + heap->overalloc <= heap->bos[bo_idx].bo->size); - if (heap->contiguous) { + if (heap->base_addr != 0) { assert(bo_idx == 0); *addr_out = bo_offset; } else { @@ -224,31 +169,21 @@ nvk_heap_free_locked(struct nvk_device *dev, struct nvk_heap *heap, { assert(addr + size > addr); - if (heap->contiguous) { - assert(heap->bo_count == 1); - uint64_t bo_offset = addr; + for (uint32_t bo_idx = 0; bo_idx < heap->bo_count; bo_idx++) { + if (addr < heap->bos[bo_idx].bo->offset) + continue; - assert(bo_offset + size <= heap->bos[0].bo->size); - uint64_t vma = encode_vma(0, bo_offset); + uint64_t bo_offset = addr - heap->bos[bo_idx].bo->offset; + if (bo_offset >= heap->bos[bo_idx].bo->size) + continue; + + assert(bo_offset + size <= heap->bos[bo_idx].bo->size); + uint64_t vma = encode_vma(bo_idx, bo_offset); util_vma_heap_free(&heap->heap, vma, size); - } else { - for (uint32_t bo_idx = 0; bo_idx < heap->bo_count; bo_idx++) { - if (addr < heap->bos[bo_idx].bo->offset) - continue; - - uint64_t bo_offset = addr - heap->bos[bo_idx].bo->offset; - if (bo_offset >= heap->bos[bo_idx].bo->size) - continue; - - assert(bo_offset + size <= heap->bos[bo_idx].bo->size); - uint64_t vma = encode_vma(bo_idx, bo_offset); - - util_vma_heap_free(&heap->heap, vma, size); - return; - } - assert(!"Failed to find heap BO"); + return; } + assert(!"Failed to find heap BO"); } VkResult @@ -256,12 +191,6 @@ nvk_heap_alloc(struct nvk_device *dev, struct nvk_heap *heap, uint64_t size, uint32_t alignment, uint64_t *addr_out, void **map_out) { - /* We can't return maps from contiguous heaps because the the map may go - * away at any time when the lock isn't taken and we don't want to trust - * the caller with racy maps. - */ - assert(!heap->contiguous); - simple_mtx_lock(&heap->mutex); VkResult result = nvk_heap_alloc_locked(dev, heap, size, alignment, addr_out, map_out); diff --git a/src/nouveau/vulkan/nvk_heap.h b/src/nouveau/vulkan/nvk_heap.h index b7c280ab006..510952e7155 100644 --- a/src/nouveau/vulkan/nvk_heap.h +++ b/src/nouveau/vulkan/nvk_heap.h @@ -23,17 +23,20 @@ struct nvk_device; struct nvk_heap_bo { struct nouveau_ws_bo *bo; void *map; + uint64_t addr; }; struct nvk_heap { enum nouveau_ws_bo_flags bo_flags; enum nouveau_ws_bo_map_flags map_flags; uint32_t overalloc; - bool contiguous; simple_mtx_t mutex; struct util_vma_heap heap; + /* Base address for contiguous heaps, 0 otherwise */ + uint64_t base_addr; + uint64_t total_size; uint32_t bo_count; @@ -58,19 +61,11 @@ VkResult nvk_heap_upload(struct nvk_device *dev, struct nvk_heap *heap, void nvk_heap_free(struct nvk_device *dev, struct nvk_heap *heap, uint64_t addr, uint64_t size); -static inline struct nouveau_ws_bo * -nvk_heap_get_contiguous_bo_ref(struct nvk_heap *heap) +static inline uint64_t +nvk_heap_contiguous_base_address(struct nvk_heap *heap) { - assert(heap->contiguous); - assert(heap->bo_count <= 1); - - simple_mtx_lock(&heap->mutex); - struct nouveau_ws_bo *bo = heap->bos[0].bo; - if (bo) - nouveau_ws_bo_ref(bo); - simple_mtx_unlock(&heap->mutex); - - return bo; + assert(heap->base_addr != 0); + return heap->base_addr; } #endif /* define NVK_HEAP_H */ diff --git a/src/nouveau/vulkan/nvk_queue.c b/src/nouveau/vulkan/nvk_queue.c index a4851dd90f9..1946aa4996f 100644 --- a/src/nouveau/vulkan/nvk_queue.c +++ b/src/nouveau/vulkan/nvk_queue.c @@ -35,8 +35,6 @@ nvk_queue_state_finish(struct nvk_device *dev, nouveau_ws_bo_destroy(qs->images.bo); if (qs->samplers.bo) nouveau_ws_bo_destroy(qs->samplers.bo); - if (qs->shaders.bo) - nouveau_ws_bo_destroy(qs->shaders.bo); if (qs->slm.bo) nouveau_ws_bo_destroy(qs->slm.bo); if (qs->push.bo) { @@ -90,19 +88,6 @@ nvk_queue_state_update(struct nvk_device *dev, nouveau_ws_bo_destroy(bo); } - if (dev->shader_heap.contiguous) { - bo = nvk_heap_get_contiguous_bo_ref(&dev->shader_heap); - if (qs->shaders.bo != bo) { - if (qs->shaders.bo) - nouveau_ws_bo_destroy(qs->shaders.bo); - qs->shaders.bo = bo; - dirty = true; - } else { - if (bo) - nouveau_ws_bo_destroy(bo); - } - } - bo = nvk_slm_area_get_bo_ref(&dev->slm, &bytes_per_warp, &bytes_per_tpc); if (qs->slm.bo != bo || qs->slm.bytes_per_warp != bytes_per_warp || qs->slm.bytes_per_tpc != bytes_per_tpc) { @@ -182,20 +167,6 @@ nvk_queue_state_update(struct nvk_device *dev, }); } - if (qs->shaders.bo) { - /* Compute */ - assert(dev->pdev->info.cls_compute < VOLTA_COMPUTE_A); - P_MTHD(p, NVA0C0, SET_PROGRAM_REGION_A); - P_NVA0C0_SET_PROGRAM_REGION_A(p, qs->shaders.bo->offset >> 32); - P_NVA0C0_SET_PROGRAM_REGION_B(p, qs->shaders.bo->offset); - - /* 3D */ - assert(dev->pdev->info.cls_eng3d < VOLTA_A); - P_MTHD(p, NV9097, SET_PROGRAM_REGION_A); - P_NV9097_SET_PROGRAM_REGION_A(p, qs->shaders.bo->offset >> 32); - P_NV9097_SET_PROGRAM_REGION_B(p, qs->shaders.bo->offset); - } - if (qs->slm.bo) { const uint64_t slm_addr = qs->slm.bo->offset; const uint64_t slm_size = qs->slm.bo->size; diff --git a/src/nouveau/vulkan/nvk_queue.h b/src/nouveau/vulkan/nvk_queue.h index d97ff00503c..5ec3d2c53a4 100644 --- a/src/nouveau/vulkan/nvk_queue.h +++ b/src/nouveau/vulkan/nvk_queue.h @@ -26,10 +26,6 @@ struct nvk_queue_state { uint32_t alloc_count; } samplers; - struct { - struct nouveau_ws_bo *bo; - } shaders; - struct { struct nouveau_ws_bo *bo; uint32_t bytes_per_warp; diff --git a/src/nouveau/winsys/nouveau_bo.c b/src/nouveau/winsys/nouveau_bo.c index 6ee022b14a3..17f152870a8 100644 --- a/src/nouveau/winsys/nouveau_bo.c +++ b/src/nouveau/winsys/nouveau_bo.c @@ -63,6 +63,10 @@ nouveau_ws_alloc_vma(struct nouveau_ws_device *dev, { assert(dev->has_vm_bind); + /* if the caller doesn't care, use the GPU page size */ + if (align == 0) + align = 0x1000; + uint64_t offset; simple_mtx_lock(&dev->vma_mutex); if (bda_capture_replay) {