diff --git a/src/nouveau/vulkan/nvk_cmd_dispatch.c b/src/nouveau/vulkan/nvk_cmd_dispatch.c
index f78f6368216..077349db396 100644
--- a/src/nouveau/vulkan/nvk_cmd_dispatch.c
+++ b/src/nouveau/vulkan/nvk_cmd_dispatch.c
@@ -55,6 +55,15 @@ nvk_push_dispatch_state_init(struct nvk_device *dev, struct nv_push *p)
    if (pdev->info.cls_compute == MAXWELL_COMPUTE_A)
       P_IMMD(p, NVB0C0, SET_SELECT_MAXWELL_TEXTURE_HEADERS, V_TRUE);
 
+   if (pdev->info.cls_eng3d < VOLTA_COMPUTE_A) {
+      uint64_t shader_base_addr =
+         nvk_heap_contiguous_base_address(&dev->shader_heap);
+
+      P_MTHD(p, NVA0C0, SET_PROGRAM_REGION_A);
+      P_NVA0C0_SET_PROGRAM_REGION_A(p, shader_base_addr >> 32);
+      P_NVA0C0_SET_PROGRAM_REGION_B(p, shader_base_addr);
+   }
+
    return VK_SUCCESS;
 }
 
diff --git a/src/nouveau/vulkan/nvk_cmd_draw.c b/src/nouveau/vulkan/nvk_cmd_draw.c
index 978d5c284c6..90fdd50a117 100644
--- a/src/nouveau/vulkan/nvk_cmd_draw.c
+++ b/src/nouveau/vulkan/nvk_cmd_draw.c
@@ -367,6 +367,15 @@ nvk_push_draw_state_init(struct nvk_device *dev, struct nv_push *p)
 
    P_IMMD(p, NV9097, SET_CT_MRT_ENABLE, V_TRUE);
 
+   if (pdev->info.cls_eng3d < VOLTA_A) {
+      uint64_t shader_base_addr =
+         nvk_heap_contiguous_base_address(&dev->shader_heap);
+
+      P_MTHD(p, NV9097, SET_PROGRAM_REGION_A);
+      P_NV9097_SET_PROGRAM_REGION_A(p, shader_base_addr >> 32);
+      P_NV9097_SET_PROGRAM_REGION_B(p, shader_base_addr);
+   }
+
    for (uint32_t i = 0; i < 6; i++) {
       P_IMMD(p, NV9097, SET_PIPELINE_SHADER(i), {
          .enable  = ENABLE_FALSE,
diff --git a/src/nouveau/vulkan/nvk_heap.c b/src/nouveau/vulkan/nvk_heap.c
index f7763579d0b..d00b936091f 100644
--- a/src/nouveau/vulkan/nvk_heap.c
+++ b/src/nouveau/vulkan/nvk_heap.c
@@ -26,7 +26,17 @@ nvk_heap_init(struct nvk_device *dev, struct nvk_heap *heap,
       heap->bo_flags |= NOUVEAU_WS_BO_MAP;
    heap->map_flags = map_flags;
    heap->overalloc = overalloc;
-   heap->contiguous = contiguous;
+
+   if (contiguous) {
+      heap->base_addr = nouveau_ws_alloc_vma(dev->ws_dev, 0,
+                                             NVK_HEAP_MAX_SIZE,
+                                             0, false /* bda */,
+                                             false /* sparse */);
+      if (heap->base_addr == 0) {
+         return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
+                          "Failed to allocate VMA for heap");
+      }
+   }
 
    simple_mtx_init(&heap->mutex, mtx_plain);
    util_vma_heap_init(&heap->heap, 0, 0);
@@ -41,12 +51,21 @@ void
 nvk_heap_finish(struct nvk_device *dev, struct nvk_heap *heap)
 {
    for (uint32_t bo_idx = 0; bo_idx < heap->bo_count; bo_idx++) {
+      if (heap->base_addr != 0) {
+         nouveau_ws_bo_unbind_vma(dev->ws_dev, heap->bos[bo_idx].addr,
+                                  heap->bos[bo_idx].bo->size);
+      }
       nouveau_ws_bo_unmap(heap->bos[bo_idx].bo, heap->bos[bo_idx].map);
       nouveau_ws_bo_destroy(heap->bos[bo_idx].bo);
    }
 
    util_vma_heap_finish(&heap->heap);
    simple_mtx_destroy(&heap->mutex);
+
+   if (heap->base_addr != 0) {
+      nouveau_ws_free_vma(dev->ws_dev, heap->base_addr, NVK_HEAP_MAX_SIZE,
+                          false /* bda */, false /* sparse */);
+   }
 }
 
 static uint64_t
@@ -74,115 +93,41 @@ vma_bo_offset(uint64_t offset)
 static VkResult
 nvk_heap_grow_locked(struct nvk_device *dev, struct nvk_heap *heap)
 {
-   VkResult result;
-
-   if (heap->contiguous) {
-      if (heap->total_size >= NVK_HEAP_MAX_SIZE) {
-         return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
-                          "Heap has already hit its maximum size");
-      }
-
-      const uint64_t new_bo_size =
-         MAX2(heap->total_size * 2, NVK_HEAP_MIN_SIZE);
-
-      void *new_bo_map;
-      struct nouveau_ws_bo *new_bo =
-         nouveau_ws_bo_new_mapped(dev->ws_dev,
-                                  new_bo_size + heap->overalloc, 0,
-                                  heap->bo_flags, heap->map_flags,
-                                  &new_bo_map);
-      if (new_bo == NULL) {
-         return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
-                          "Failed to allocate a heap BO: %m");
-      }
-
-      if (heap->bo_count > 0) {
-         assert(heap->bo_count == 1);
-         struct nouveau_ws_bo *old_bo = heap->bos[0].bo;
-
-         assert(util_is_power_of_two_nonzero64(heap->total_size));
-         assert(heap->total_size >= NVK_HEAP_MIN_SIZE);
-         assert(heap->total_size <= old_bo->size);
-         assert(heap->total_size < new_bo_size);
-
-         unsigned line_bytes = MIN2(heap->total_size, 1 << 17);
-         assert(heap->total_size % line_bytes == 0);
-         unsigned line_count = heap->total_size / line_bytes;
-
-         uint32_t push_dw[12];
-         struct nv_push push;
-         nv_push_init(&push, push_dw, ARRAY_SIZE(push_dw));
-         struct nv_push *p = &push;
-
-         P_MTHD(p, NV90B5, OFFSET_IN_UPPER);
-         P_NV90B5_OFFSET_IN_UPPER(p, old_bo->offset >> 32);
-         P_NV90B5_OFFSET_IN_LOWER(p, old_bo->offset & 0xffffffff);
-         P_NV90B5_OFFSET_OUT_UPPER(p, new_bo->offset >> 32);
-         P_NV90B5_OFFSET_OUT_LOWER(p, new_bo->offset & 0xffffffff);
-         P_NV90B5_PITCH_IN(p, line_bytes);
-         P_NV90B5_PITCH_OUT(p, line_bytes);
-         P_NV90B5_LINE_LENGTH_IN(p, line_bytes);
-         P_NV90B5_LINE_COUNT(p, line_count);
-
-         P_IMMD(p, NV90B5, LAUNCH_DMA, {
-            .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
-            .multi_line_enable = MULTI_LINE_ENABLE_TRUE,
-            .flush_enable = FLUSH_ENABLE_TRUE,
-            .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
-            .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
-         });
-
-         struct nouveau_ws_bo *push_bos[] = { new_bo, old_bo, };
-         result = nvk_queue_submit_simple(&dev->queue,
-                                          nv_push_dw_count(&push), push_dw,
-                                          ARRAY_SIZE(push_bos), push_bos);
-         if (result != VK_SUCCESS) {
-            nouveau_ws_bo_unmap(new_bo, new_bo_map);
-            nouveau_ws_bo_destroy(new_bo);
-            return result;
-         }
-
-         nouveau_ws_bo_unmap(heap->bos[0].bo, heap->bos[0].map);
-         nouveau_ws_bo_destroy(heap->bos[0].bo);
-      }
-
-      uint64_t vma = encode_vma(0, heap->total_size);
-      util_vma_heap_free(&heap->heap, vma, new_bo_size - heap->total_size);
-
-      heap->total_size = new_bo_size;
-      heap->bo_count = 1;
-      heap->bos[0].bo = new_bo;
-      heap->bos[0].map = new_bo_map;
-
-      return VK_SUCCESS;
-   } else {
-      if (heap->bo_count >= NVK_HEAP_MAX_BO_COUNT) {
-         return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
-                          "Heap has already hit its maximum size");
-      }
-
-      /* First two BOs are MIN_SIZE, double after that */
-      const uint64_t new_bo_size =
-         NVK_HEAP_MIN_SIZE << (MAX2(heap->bo_count, 1) - 1);
-
-      heap->bos[heap->bo_count].bo =
-         nouveau_ws_bo_new_mapped(dev->ws_dev,
-                                  new_bo_size + heap->overalloc, 0,
-                                  heap->bo_flags, heap->map_flags,
-                                  &heap->bos[heap->bo_count].map);
-      if (heap->bos[heap->bo_count].bo == NULL) {
-         return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
-                          "Failed to allocate a heap BO: %m");
-      }
-
-      uint64_t vma = encode_vma(heap->bo_count, 0);
-      util_vma_heap_free(&heap->heap, vma, new_bo_size);
-
-      heap->total_size += new_bo_size;
-      heap->bo_count++;
-
-      return VK_SUCCESS;
+   if (heap->bo_count >= NVK_HEAP_MAX_BO_COUNT) {
+      return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
+                       "Heap has already hit its maximum size");
    }
+
+   /* First two BOs are MIN_SIZE, double after that */
+   const uint64_t new_bo_size =
+      NVK_HEAP_MIN_SIZE << (MAX2(heap->bo_count, 1) - 1);
+
+   heap->bos[heap->bo_count].bo =
+      nouveau_ws_bo_new_mapped(dev->ws_dev,
+                               new_bo_size + heap->overalloc, 0,
+                               heap->bo_flags, heap->map_flags,
+                               &heap->bos[heap->bo_count].map);
+   if (heap->bos[heap->bo_count].bo == NULL) {
+      return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
+                       "Failed to allocate a heap BO: %m");
+   }
+
+   if (heap->base_addr != 0) {
+      heap->bos[heap->bo_count].addr = heap->base_addr + heap->total_size;
+      nouveau_ws_bo_bind_vma(dev->ws_dev, heap->bos[heap->bo_count].bo,
+                             heap->bos[heap->bo_count].addr,
+                             new_bo_size, 0, 0);
+   } else {
+      heap->bos[heap->bo_count].addr = heap->bos[heap->bo_count].bo->offset;
+   }
+
+   uint64_t vma = encode_vma(heap->bo_count, 0);
+   util_vma_heap_free(&heap->heap, vma, new_bo_size);
+
+   heap->total_size += new_bo_size;
+   heap->bo_count++;
+
+   return VK_SUCCESS;
 }
 
 static VkResult
@@ -201,7 +146,7 @@ nvk_heap_alloc_locked(struct nvk_device *dev, struct nvk_heap *heap,
          assert(bo_offset + size + heap->overalloc <=
                 heap->bos[bo_idx].bo->size);
 
-         if (heap->contiguous) {
+         if (heap->base_addr != 0) {
             assert(bo_idx == 0);
             *addr_out = bo_offset;
          } else {
@@ -224,31 +169,21 @@ nvk_heap_free_locked(struct nvk_device *dev, struct nvk_heap *heap,
 {
    assert(addr + size > addr);
 
-   if (heap->contiguous) {
-      assert(heap->bo_count == 1);
-      uint64_t bo_offset = addr;
+   for (uint32_t bo_idx = 0; bo_idx < heap->bo_count; bo_idx++) {
+      if (addr < heap->bos[bo_idx].bo->offset)
+         continue;
 
-      assert(bo_offset + size <= heap->bos[0].bo->size);
-      uint64_t vma = encode_vma(0, bo_offset);
+      uint64_t bo_offset = addr - heap->bos[bo_idx].bo->offset;
+      if (bo_offset >= heap->bos[bo_idx].bo->size)
+         continue;
+
+      assert(bo_offset + size <= heap->bos[bo_idx].bo->size);
+      uint64_t vma = encode_vma(bo_idx, bo_offset);
 
       util_vma_heap_free(&heap->heap, vma, size);
-   } else {
-      for (uint32_t bo_idx = 0; bo_idx < heap->bo_count; bo_idx++) {
-         if (addr < heap->bos[bo_idx].bo->offset)
-            continue;
-
-         uint64_t bo_offset = addr - heap->bos[bo_idx].bo->offset;
-         if (bo_offset >= heap->bos[bo_idx].bo->size)
-            continue;
-
-         assert(bo_offset + size <= heap->bos[bo_idx].bo->size);
-         uint64_t vma = encode_vma(bo_idx, bo_offset);
-
-         util_vma_heap_free(&heap->heap, vma, size);
-         return;
-      }
-      assert(!"Failed to find heap BO");
+      return;
    }
+   assert(!"Failed to find heap BO");
 }
 
 VkResult
@@ -256,12 +191,6 @@ nvk_heap_alloc(struct nvk_device *dev, struct nvk_heap *heap,
                uint64_t size, uint32_t alignment,
                uint64_t *addr_out, void **map_out)
 {
-   /* We can't return maps from contiguous heaps because the the map may go
-    * away at any time when the lock isn't taken and we don't want to trust
-    * the caller with racy maps.
-    */
-   assert(!heap->contiguous);
-
    simple_mtx_lock(&heap->mutex);
    VkResult result = nvk_heap_alloc_locked(dev, heap, size, alignment,
                                            addr_out, map_out);
diff --git a/src/nouveau/vulkan/nvk_heap.h b/src/nouveau/vulkan/nvk_heap.h
index b7c280ab006..510952e7155 100644
--- a/src/nouveau/vulkan/nvk_heap.h
+++ b/src/nouveau/vulkan/nvk_heap.h
@@ -23,17 +23,20 @@ struct nvk_device;
 struct nvk_heap_bo {
    struct nouveau_ws_bo *bo;
    void *map;
+   uint64_t addr;
 };
 
 struct nvk_heap {
    enum nouveau_ws_bo_flags bo_flags;
    enum nouveau_ws_bo_map_flags map_flags;
    uint32_t overalloc;
-   bool contiguous;
 
    simple_mtx_t mutex;
    struct util_vma_heap heap;
 
+   /* Base address for contiguous heaps, 0 otherwise */
+   uint64_t base_addr;
+
    uint64_t total_size;
 
    uint32_t bo_count;
@@ -58,19 +61,11 @@ VkResult nvk_heap_upload(struct nvk_device *dev, struct nvk_heap *heap,
 void nvk_heap_free(struct nvk_device *dev, struct nvk_heap *heap,
                    uint64_t addr, uint64_t size);
 
-static inline struct nouveau_ws_bo *
-nvk_heap_get_contiguous_bo_ref(struct nvk_heap *heap)
+static inline uint64_t
+nvk_heap_contiguous_base_address(struct nvk_heap *heap)
 {
-   assert(heap->contiguous);
-   assert(heap->bo_count <= 1);
-
-   simple_mtx_lock(&heap->mutex);
-   struct nouveau_ws_bo *bo = heap->bos[0].bo;
-   if (bo)
-      nouveau_ws_bo_ref(bo);
-   simple_mtx_unlock(&heap->mutex);
-
-   return bo;
+   assert(heap->base_addr != 0);
+   return heap->base_addr;
 }
 
 #endif /* define NVK_HEAP_H */
diff --git a/src/nouveau/vulkan/nvk_queue.c b/src/nouveau/vulkan/nvk_queue.c
index a4851dd90f9..1946aa4996f 100644
--- a/src/nouveau/vulkan/nvk_queue.c
+++ b/src/nouveau/vulkan/nvk_queue.c
@@ -35,8 +35,6 @@ nvk_queue_state_finish(struct nvk_device *dev,
       nouveau_ws_bo_destroy(qs->images.bo);
    if (qs->samplers.bo)
       nouveau_ws_bo_destroy(qs->samplers.bo);
-   if (qs->shaders.bo)
-      nouveau_ws_bo_destroy(qs->shaders.bo);
    if (qs->slm.bo)
       nouveau_ws_bo_destroy(qs->slm.bo);
    if (qs->push.bo) {
@@ -90,19 +88,6 @@ nvk_queue_state_update(struct nvk_device *dev,
          nouveau_ws_bo_destroy(bo);
    }
 
-   if (dev->shader_heap.contiguous) {
-      bo = nvk_heap_get_contiguous_bo_ref(&dev->shader_heap);
-      if (qs->shaders.bo != bo) {
-         if (qs->shaders.bo)
-            nouveau_ws_bo_destroy(qs->shaders.bo);
-         qs->shaders.bo = bo;
-         dirty = true;
-      } else {
-         if (bo)
-            nouveau_ws_bo_destroy(bo);
-      }
-   }
-
    bo = nvk_slm_area_get_bo_ref(&dev->slm, &bytes_per_warp, &bytes_per_tpc);
    if (qs->slm.bo != bo || qs->slm.bytes_per_warp != bytes_per_warp ||
        qs->slm.bytes_per_tpc != bytes_per_tpc) {
@@ -182,20 +167,6 @@ nvk_queue_state_update(struct nvk_device *dev,
       });
    }
 
-   if (qs->shaders.bo) {
-      /* Compute */
-      assert(dev->pdev->info.cls_compute < VOLTA_COMPUTE_A);
-      P_MTHD(p, NVA0C0, SET_PROGRAM_REGION_A);
-      P_NVA0C0_SET_PROGRAM_REGION_A(p, qs->shaders.bo->offset >> 32);
-      P_NVA0C0_SET_PROGRAM_REGION_B(p, qs->shaders.bo->offset);
-
-      /* 3D */
-      assert(dev->pdev->info.cls_eng3d < VOLTA_A);
-      P_MTHD(p, NV9097, SET_PROGRAM_REGION_A);
-      P_NV9097_SET_PROGRAM_REGION_A(p, qs->shaders.bo->offset >> 32);
-      P_NV9097_SET_PROGRAM_REGION_B(p, qs->shaders.bo->offset);
-   }
-
    if (qs->slm.bo) {
       const uint64_t slm_addr = qs->slm.bo->offset;
       const uint64_t slm_size = qs->slm.bo->size;
diff --git a/src/nouveau/vulkan/nvk_queue.h b/src/nouveau/vulkan/nvk_queue.h
index d97ff00503c..5ec3d2c53a4 100644
--- a/src/nouveau/vulkan/nvk_queue.h
+++ b/src/nouveau/vulkan/nvk_queue.h
@@ -26,10 +26,6 @@ struct nvk_queue_state {
       uint32_t alloc_count;
    } samplers;
 
-   struct {
-      struct nouveau_ws_bo *bo;
-   } shaders;
-
    struct {
       struct nouveau_ws_bo *bo;
       uint32_t bytes_per_warp;
diff --git a/src/nouveau/winsys/nouveau_bo.c b/src/nouveau/winsys/nouveau_bo.c
index 6ee022b14a3..17f152870a8 100644
--- a/src/nouveau/winsys/nouveau_bo.c
+++ b/src/nouveau/winsys/nouveau_bo.c
@@ -63,6 +63,10 @@ nouveau_ws_alloc_vma(struct nouveau_ws_device *dev,
 {
    assert(dev->has_vm_bind);
 
+   /* if the caller doesn't care, use the GPU page size */
+   if (align == 0)
+      align = 0x1000;
+
    uint64_t offset;
    simple_mtx_lock(&dev->vma_mutex);
    if (bda_capture_replay) {