diff --git a/src/intel/dev/intel_device_info.h b/src/intel/dev/intel_device_info.h index dd9ee6197a6..d94c6db3792 100644 --- a/src/intel/dev/intel_device_info.h +++ b/src/intel/dev/intel_device_info.h @@ -215,6 +215,16 @@ enum intel_wa_steppings intel_device_info_wa_stepping(struct intel_device_info * uint32_t intel_device_info_get_max_slm_size(const struct intel_device_info *devinfo); uint32_t intel_device_info_get_max_preferred_slm_size(const struct intel_device_info *devinfo); +static inline unsigned +intel_device_info_get_max_engine_prefetch(const struct intel_device_info *devinfo) +{ + unsigned max_prefetch = 0; + for (unsigned engine = INTEL_ENGINE_CLASS_RENDER; + engine < ARRAY_SIZE(devinfo->engine_class_prefetch); engine++) + max_prefetch = MAX2(max_prefetch, devinfo->engine_class_prefetch[engine]); + return max_prefetch; +} + /** * True if this device supports the Extended Bindless Surface Offset mode, * which offers 26-bit surface handles, instead of 20-bit. This effectively diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index cb1d044f0a4..55c9a3221bd 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -401,6 +401,25 @@ anv_device_finish_descriptors_view(struct anv_device *device) device->descriptor_view_state); } +static VkResult +anv_device_bind_null_va(struct anv_device *device, + struct anv_va_range *range, + enum anv_vm_bind_op op) +{ + struct anv_vm_bind bind = { + .address = range->addr, + .size = range->size, + .op = op, + }; + struct anv_sparse_submission submit = { + .binds = &bind, + .binds_len = 1, + .binds_capacity = 1, + }; + return device->kmd_backend->vm_bind(device, &submit, + ANV_VM_BIND_FLAG_SIGNAL_BIND_TIMELINE); +} + static VkResult anv_device_init_vma_heaps(struct anv_device *device) { @@ -416,6 +435,16 @@ anv_device_init_vma_heaps(struct anv_device *device) device->physical->va.high_heap.addr, device->physical->va.high_heap.size); + /* Reduce the usable size of the null initialized heap by enough pages so + * that no batch buffers get placed where the CS could end up prefetching + * beyond the limit of the null pages. + */ + unsigned max_prefetch = intel_device_info_get_max_engine_prefetch(device->info); + max_prefetch = align(max_prefetch, device->info->mem_alignment); + util_vma_heap_init(&device->vma_null_initialized, + device->physical->va.null_initialized_heap.addr, + device->physical->va.null_initialized_heap.size - max_prefetch); + if (device->physical->indirect_descriptors) { util_vma_heap_init(&device->vma_desc, device->physical->va.indirect_descriptor_pool.addr, @@ -442,6 +471,7 @@ anv_device_init_vma_heaps(struct anv_device *device) static void anv_device_finish_vma_heaps(struct anv_device *device) { + util_vma_heap_finish(&device->vma_null_initialized); util_vma_heap_finish(&device->vma_trtt); util_vma_heap_finish(&device->vma_dynamic_visible); util_vma_heap_finish(&device->vma_desc); @@ -843,12 +873,18 @@ VkResult anv_CreateDevice( goto fail_vmas; } + result = anv_device_bind_null_va(device, + &device->physical->va.null_initialized_heap, + ANV_VM_BIND); + if (result != VK_SUCCESS) + goto fail_mutex; + if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV) anv_memory_trace_init(device); result = anv_bo_cache_init(&device->bo_cache, device); if (result != VK_SUCCESS) - goto fail_mutex; + goto fail_null_vma_init; if (!anv_slab_bo_init(device)) goto fail_cache; @@ -1281,6 +1317,10 @@ VkResult anv_CreateDevice( anv_slab_bo_deinit(device); fail_cache: anv_bo_cache_finish(&device->bo_cache); + fail_null_vma_init: + anv_device_bind_null_va(device, + &device->physical->va.null_initialized_heap, + ANV_VM_UNBIND); fail_mutex: pthread_mutex_destroy(&device->mutex); fail_vmas: @@ -1485,6 +1525,9 @@ anv_vma_heap_for_flags(struct anv_device *device, if (alloc_flags & ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL) return &device->vma_dynamic_visible; + if (alloc_flags & ANV_BO_ALLOC_NULL_INITIALIZED_HEAP) + return &device->vma_null_initialized; + return &device->vma_hi; } @@ -1539,7 +1582,8 @@ anv_vma_free(struct anv_device *device, vma_heap == &device->vma_hi || vma_heap == &device->vma_desc || vma_heap == &device->vma_dynamic_visible || - vma_heap == &device->vma_trtt); + vma_heap == &device->vma_trtt || + vma_heap == &device->vma_null_initialized); const uint64_t addr_48b = intel_48b_address(address); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 7e463895805..dae4cab16a4 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -474,6 +474,13 @@ enum anv_bo_alloc_flags { /** Specifies that this bo is a slab parent */ ANV_BO_ALLOC_SLAB_PARENT = (1 << 22), + + /** Specifies that the bo should be allocated from a special heap that maps + * all unused pages to null, we also reserved extra space at the end of the + * heap so that we can put batch buffers in it without the CS prefetching + * beyond the limit of the mapped null pages (causing a page fault). + */ + ANV_BO_ALLOC_NULL_INITIALIZED_HEAP = (1 << 23), }; /** Specifies that the BO should be cached and coherent. */ @@ -492,12 +499,14 @@ enum anv_bo_alloc_flags { #define ANV_BO_ALLOC_BATCH_BUFFER_FLAGS (ANV_BO_ALLOC_MAPPED | \ ANV_BO_ALLOC_HOST_CACHED_COHERENT | \ - ANV_BO_ALLOC_CAPTURE) + ANV_BO_ALLOC_CAPTURE | \ + ANV_BO_ALLOC_NULL_INITIALIZED_HEAP) #define ANV_BO_ALLOC_BATCH_BUFFER_INTERNAL_FLAGS (ANV_BO_ALLOC_MAPPED | \ ANV_BO_ALLOC_HOST_COHERENT | \ ANV_BO_ALLOC_INTERNAL | \ - ANV_BO_ALLOC_CAPTURE) + ANV_BO_ALLOC_CAPTURE | \ + ANV_BO_ALLOC_NULL_INITIALIZED_HEAP) struct anv_bo { const char *name; @@ -1688,6 +1697,10 @@ struct anv_physical_device { * Push descriptor with descriptor buffers */ struct anv_va_range push_descriptor_buffer_pool; + /** + * Null page initialized heap + */ + struct anv_va_range null_initialized_heap; /** * AUX-TT */ @@ -2604,6 +2617,7 @@ struct anv_device { pthread_mutex_t vma_mutex; struct util_vma_heap vma_lo; struct util_vma_heap vma_hi; + struct util_vma_heap vma_null_initialized; struct util_vma_heap vma_desc; struct util_vma_heap vma_dynamic_visible; struct util_vma_heap vma_trtt; diff --git a/src/intel/vulkan/anv_va.c b/src/intel/vulkan/anv_va.c index 5e25f3a8f44..447a6aaeb47 100644 --- a/src/intel/vulkan/anv_va.c +++ b/src/intel/vulkan/anv_va.c @@ -62,6 +62,7 @@ anv_device_print_vas(struct anv_physical_device *device) PRINT_HEAP(dynamic_state_pool); PRINT_HEAP(dynamic_visible_pool); PRINT_HEAP(push_descriptor_buffer_pool); + PRINT_HEAP(null_initialized_heap); PRINT_HEAP(high_heap); PRINT_HEAP(trtt); } @@ -144,8 +145,9 @@ anv_physical_device_init_va_ranges(struct anv_physical_device *device) if (device->info.verx10 >= 125) address = va_add(&device->va.push_descriptor_buffer_pool, address, _1Gb - 4096); - address = align64(address, device->info.mem_alignment); + address = align64(address, _1Gb); address = va_add(&device->va.aux_tt_pool, address, 2 * _1Gb); + address = va_add(&device->va.null_initialized_heap, address, _1Gb * 8); /* What's left to do for us is to set va.high_heap and va.trtt without * overlap, but there are a few things to be considered: diff --git a/src/intel/vulkan/xe/anv_kmd_backend.c b/src/intel/vulkan/xe/anv_kmd_backend.c index 06dbd5a4dd5..d6291f28fd2 100644 --- a/src/intel/vulkan/xe/anv_kmd_backend.c +++ b/src/intel/vulkan/xe/anv_kmd_backend.c @@ -390,13 +390,26 @@ xe_vm_bind_bo(struct anv_device *device, struct anv_bo *bo) static VkResult xe_vm_unbind_bo(struct anv_device *device, struct anv_bo *bo) { - struct anv_vm_bind bind = { - .bo = bo, - .address = 0, - .bo_offset = 0, - .size = 0, - .op = ANV_VM_UNBIND_ALL, - }; + struct anv_vm_bind bind; + if (bo->alloc_flags & ANV_BO_ALLOC_NULL_INITIALIZED_HEAP) { + bind = (struct anv_vm_bind) { + .address = bo->offset, + .size = bo->actual_size, + .op = ANV_VM_BIND, + }; + } else if (bo->from_host_ptr) { + bind = (struct anv_vm_bind) { + .bo = bo, + .address = bo->offset, + .size = bo->actual_size, + .op = ANV_VM_UNBIND, + }; + } else { + bind = (struct anv_vm_bind) { + .bo = bo, + .op = ANV_VM_UNBIND_ALL, + }; + } struct anv_sparse_submission submit = { .queue = NULL, .binds = &bind, @@ -405,11 +418,6 @@ xe_vm_unbind_bo(struct anv_device *device, struct anv_bo *bo) .wait_count = 0, .signal_count = 0, }; - if (bo->from_host_ptr) { - bind.address = bo->offset; - bind.size = bo->actual_size; - bind.op = ANV_VM_UNBIND; - } return xe_vm_bind_op(device, &submit, ANV_VM_BIND_FLAG_SIGNAL_BIND_TIMELINE); }