From 35831dded5f8e463583d6cda2a4b6db378fca8a5 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 29 Feb 2024 09:41:53 +0200 Subject: [PATCH] anv: increase maxResourceDescriptorBufferRange on DG2+ The current helper anv_physical_device_bindless_heap_size() artificially limited the surface heap size on DG2+ to 128MB. The HW is actually 4GB capable, but we have workaround requiring to overlap the dynamic state heap with the bindless surface state heap. The actual limit comes from our virtual address space setup. It is different between descriptor buffers and regular descriptors. Signed-off-by: Lionel Landwerlin Fixes: fe037dec6e ("anv: expose VK_EXT_descriptor_buffer") Part-of: --- src/intel/vulkan/anv_device.c | 5 +++-- src/intel/vulkan/anv_private.h | 11 +++++++++-- src/intel/vulkan/genX_cmd_buffer.c | 6 +++--- src/intel/vulkan/genX_init_state.c | 3 ++- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 3487e36f906..426cf523823 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1078,7 +1078,7 @@ get_properties_1_2(const struct anv_physical_device *pdevice, * advertise a larger limit here. */ const unsigned max_bindless_views = - anv_physical_device_bindless_heap_size(pdevice) / ANV_SURFACE_STATE_SIZE; + anv_physical_device_bindless_heap_size(pdevice, false) / ANV_SURFACE_STATE_SIZE; p->maxUpdateAfterBindDescriptorsInAllPools = max_bindless_views; p->shaderUniformBufferArrayNonUniformIndexingNative = false; p->shaderSampledImageArrayNonUniformIndexingNative = false; @@ -1567,7 +1567,8 @@ get_properties(const struct anv_physical_device *pdevice, props->inputAttachmentDescriptorSize = ANV_SURFACE_STATE_SIZE; props->accelerationStructureDescriptorSize = sizeof(struct anv_address_range_descriptor); props->maxSamplerDescriptorBufferRange = pdevice->va.descriptor_buffer_pool.size; - props->maxResourceDescriptorBufferRange = anv_physical_device_bindless_heap_size(pdevice); + props->maxResourceDescriptorBufferRange = anv_physical_device_bindless_heap_size(pdevice, + true); props->resourceDescriptorBufferAddressSpaceSize = pdevice->va.descriptor_buffer_pool.size; props->descriptorBufferAddressSpaceSize = pdevice->va.descriptor_buffer_pool.size; props->samplerDescriptorBufferAddressSpaceSize = pdevice->va.descriptor_buffer_pool.size; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 0c816c9630d..c7ad35125f3 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1224,10 +1224,17 @@ struct anv_physical_device { }; static inline uint32_t -anv_physical_device_bindless_heap_size(const struct anv_physical_device *device) +anv_physical_device_bindless_heap_size(const struct anv_physical_device *device, + bool descriptor_buffer) { + /* Pre-Gfx12.5, the HW bindless surface heap is only 64MB. After it's 4GB, + * but we have some workarounds that require 2 heaps to overlap, so the + * size is dictated by our VA allocation. + */ return device->uses_ex_bso ? - 128 * 1024 * 1024 /* 128 MiB */ : + (descriptor_buffer ? + device->va.descriptor_buffer_pool.size : + device->va.bindless_surface_state_pool.size) : 64 * 1024 * 1024 /* 64 MiB */; } diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 1f2d4710d11..ca5ad262a30 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -217,7 +217,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) MIN2(device->physical->va.descriptor_buffer_pool.size - (cmd_buffer->state.descriptor_buffers.surfaces_address - device->physical->va.descriptor_buffer_pool.addr), - anv_physical_device_bindless_heap_size(device->physical)) : + anv_physical_device_bindless_heap_size(device->physical, true)) : (device->workaround_bo->size - device->workaround_address.offset); sba.BindlessSurfaceStateBaseAddress = (struct anv_address) { .offset = surfaces_addr, @@ -265,7 +265,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) device->physical->va.bindless_surface_state_pool.addr, }; sba.BindlessSurfaceStateSize = - anv_physical_device_bindless_heap_size(device->physical) / + anv_physical_device_bindless_heap_size(device->physical, false) / ANV_SURFACE_STATE_SIZE - 1; sba.BindlessSurfaceStateMOCS = mocs; sba.BindlessSurfaceStateBaseAddressModifyEnable = true; @@ -2647,7 +2647,7 @@ update_descriptor_set_surface_state(struct anv_cmd_buffer *cmd_buffer, pipe_state->descriptor_buffers[set_idx].buffer_offset; const uint64_t set_size = MIN2(va_range->size - (descriptor_set_addr - va_range->addr), - anv_physical_device_bindless_heap_size(device)); + anv_physical_device_bindless_heap_size(device, true)); if (descriptor_set_addr != pipe_state->descriptor_buffers[set_idx].address) { pipe_state->descriptor_buffers[set_idx].address = descriptor_set_addr; diff --git a/src/intel/vulkan/genX_init_state.c b/src/intel/vulkan/genX_init_state.c index b5e03212fa7..0076f89a39a 100644 --- a/src/intel/vulkan/genX_init_state.c +++ b/src/intel/vulkan/genX_init_state.c @@ -287,7 +287,8 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch) device->physical->va.bindless_surface_state_pool.addr, }; sba.BindlessSurfaceStateSize = - anv_physical_device_bindless_heap_size(device->physical) / ANV_SURFACE_STATE_SIZE - 1; + anv_physical_device_bindless_heap_size(device->physical, false) / + ANV_SURFACE_STATE_SIZE - 1; sba.BindlessSurfaceStateMOCS = mocs; sba.BindlessSurfaceStateBaseAddressModifyEnable = true; } else {