anv: add new heap/pool for descriptor buffers

We'll use a new heap & a new pool for descriptor buffers. The heap
will hold descriptor buffers, while the pool will only be used on
Gfx12.5+ for push descriptors.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22151>
This commit is contained in:
Lionel Landwerlin 2023-03-03 17:07:44 +02:00 committed by Marge Bot
parent fc1aeb57bb
commit c6a91f1695
7 changed files with 133 additions and 38 deletions

View file

@ -157,6 +157,8 @@ anv_create_cmd_buffer(struct vk_command_pool *pool,
&device->general_state_pool, 16384);
anv_state_stream_init(&cmd_buffer->indirect_push_descriptor_stream,
&device->indirect_push_descriptor_pool, 4096);
anv_state_stream_init(&cmd_buffer->push_descriptor_buffer_stream,
&device->push_descriptor_buffer_pool, 4096);
int success = u_vector_init_pow2(&cmd_buffer->dynamic_bos, 8,
sizeof(struct anv_bo *));
@ -209,6 +211,7 @@ destroy_cmd_buffer(struct anv_cmd_buffer *cmd_buffer)
anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
anv_state_stream_finish(&cmd_buffer->general_state_stream);
anv_state_stream_finish(&cmd_buffer->indirect_push_descriptor_stream);
anv_state_stream_finish(&cmd_buffer->push_descriptor_buffer_stream);
while (u_vector_length(&cmd_buffer->dynamic_bos) > 0) {
struct anv_bo **bo = u_vector_remove(&cmd_buffer->dynamic_bos);
@ -280,6 +283,10 @@ reset_cmd_buffer(struct anv_cmd_buffer *cmd_buffer,
&cmd_buffer->device->indirect_push_descriptor_pool,
4096);
anv_state_stream_finish(&cmd_buffer->push_descriptor_buffer_stream);
anv_state_stream_init(&cmd_buffer->push_descriptor_buffer_stream,
&cmd_buffer->device->push_descriptor_buffer_pool, 4096);
while (u_vector_length(&cmd_buffer->dynamic_bos) > 0) {
struct anv_bo **bo = u_vector_remove(&cmd_buffer->dynamic_bos);
anv_device_release_bo(cmd_buffer->device, *bo);

View file

@ -1845,6 +1845,34 @@ anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
if (result != VK_SUCCESS)
return result;
/* Replicate all non protected memory types for descriptor buffers because
* we want to identify memory allocations to place them in the right memory
* heap.
*/
device->memory.default_buffer_mem_types =
BITFIELD_RANGE(0, device->memory.type_count);
device->memory.protected_mem_types = 0;
device->memory.desc_buffer_mem_types = 0;
uint32_t base_types_count = device->memory.type_count;
for (int i = 0; i < base_types_count; i++) {
if (device->memory.types[i].propertyFlags &
VK_MEMORY_PROPERTY_PROTECTED_BIT) {
device->memory.protected_mem_types |= BITFIELD_BIT(i);
continue;
}
assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
device->memory.desc_buffer_mem_types |=
BITFIELD_BIT(device->memory.type_count);
struct anv_memory_type *new_type =
&device->memory.types[device->memory.type_count++];
*new_type = device->memory.types[i];
new_type->descriptor_buffer = true;
}
for (unsigned i = 0; i < device->memory.type_count; i++) {
VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
@ -3276,6 +3304,13 @@ VkResult anv_CreateDevice(
device->physical->va.bindless_surface_state_pool.size);
}
/* Always initialized because the the memory types point to this and they
* are on the physical device.
*/
util_vma_heap_init(&device->vma_desc_buf,
device->physical->va.descriptor_buffer_pool.addr,
device->physical->va.descriptor_buffer_pool.size);
util_vma_heap_init(&device->vma_samplers,
device->physical->va.sampler_state_pool.addr,
device->physical->va.sampler_state_pool.size);
@ -3461,11 +3496,28 @@ VkResult anv_CreateDevice(
goto fail_binding_table_pool;
}
if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
device->info->verx10 >= 125) {
/* On Gfx12.5+ because of the bindless stages (Mesh, Task, RT), the only
* way we can wire push descriptors is through the bindless heap. This
* state pool is a 1Gb carve out of the 4Gb HW heap.
*/
result = anv_state_pool_init(&device->push_descriptor_buffer_pool, device,
&(struct anv_state_pool_params) {
.name = "push descriptor buffer state pool",
.base_address = device->physical->va.push_descriptor_buffer_pool.addr,
.block_size = 4096,
.max_size = device->physical->va.push_descriptor_buffer_pool.size,
});
if (result != VK_SUCCESS)
goto fail_indirect_push_descriptor_pool;
}
if (device->info->has_aux_map) {
device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator,
&physical_device->info);
if (!device->aux_map_ctx)
goto fail_indirect_push_descriptor_pool;
goto fail_push_descriptor_buffer_pool;
}
result = anv_device_alloc_bo(device, "workaround", 8192,
@ -3721,6 +3773,10 @@ VkResult anv_CreateDevice(
intel_aux_map_finish(device->aux_map_ctx);
device->aux_map_ctx = NULL;
}
fail_push_descriptor_buffer_pool:
if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
device->info->verx10 >= 125)
anv_state_pool_finish(&device->push_descriptor_buffer_pool);
fail_indirect_push_descriptor_pool:
if (device->physical->indirect_descriptors)
anv_state_pool_finish(&device->indirect_push_descriptor_pool);
@ -3754,6 +3810,7 @@ VkResult anv_CreateDevice(
util_vma_heap_finish(&device->vma_trtt);
if (!device->physical->indirect_descriptors)
util_vma_heap_finish(&device->vma_samplers);
util_vma_heap_finish(&device->vma_desc_buf);
util_vma_heap_finish(&device->vma_desc);
util_vma_heap_finish(&device->vma_hi);
util_vma_heap_finish(&device->vma_lo);
@ -3851,6 +3908,9 @@ void anv_DestroyDevice(
device->aux_map_ctx = NULL;
}
if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
device->info->verx10 >= 125)
anv_state_pool_finish(&device->push_descriptor_buffer_pool);
if (device->physical->indirect_descriptors)
anv_state_pool_finish(&device->indirect_push_descriptor_pool);
anv_state_pool_finish(&device->binding_table_pool);
@ -3872,6 +3932,7 @@ void anv_DestroyDevice(
util_vma_heap_finish(&device->vma_trtt);
if (!device->physical->indirect_descriptors)
util_vma_heap_finish(&device->vma_samplers);
util_vma_heap_finish(&device->vma_desc_buf);
util_vma_heap_finish(&device->vma_desc);
util_vma_heap_finish(&device->vma_hi);
util_vma_heap_finish(&device->vma_lo);
@ -3933,6 +3994,9 @@ anv_vma_heap_for_flags(struct anv_device *device,
if (alloc_flags & ANV_BO_ALLOC_TRTT)
return &device->vma_trtt;
if (alloc_flags & ANV_BO_ALLOC_DESCRIPTOR_BUFFER_POOL)
return &device->vma_desc_buf;
if (alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS)
return &device->vma_lo;
@ -3959,6 +4023,7 @@ anv_vma_alloc(struct anv_device *device,
if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
assert(*out_vma_heap == &device->vma_hi ||
*out_vma_heap == &device->vma_desc_buf ||
*out_vma_heap == &device->vma_trtt);
if (client_address) {
@ -3994,6 +4059,7 @@ anv_vma_free(struct anv_device *device,
assert(vma_heap == &device->vma_lo ||
vma_heap == &device->vma_hi ||
vma_heap == &device->vma_desc ||
vma_heap == &device->vma_desc_buf ||
vma_heap == &device->vma_samplers ||
vma_heap == &device->vma_trtt);
@ -4173,6 +4239,9 @@ VkResult anv_AllocateMemory(
}
}
if (mem_type->descriptor_buffer)
alloc_flags |= ANV_BO_ALLOC_DESCRIPTOR_BUFFER_POOL;
if (mem->vk.ahardware_buffer) {
result = anv_import_ahw_memory(_device, mem);
if (result != VK_SUCCESS)
@ -4716,19 +4785,16 @@ anv_get_buffer_memory_requirements(struct anv_device *device,
* supported memory type for the resource. The bit `1<<i` is set if and
* only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
* structure for the physical device is supported.
*
* We have special memory types for descriptor buffers.
*/
uint32_t memory_types = 0;
for (uint32_t i = 0; i < device->physical->memory.type_count; i++) {
/* Have the protected buffer bit match only the memory types with the
* equivalent bit.
*/
if (!!(flags & VK_BUFFER_CREATE_PROTECTED_BIT) !=
!!(device->physical->memory.types[i].propertyFlags &
VK_MEMORY_PROPERTY_PROTECTED_BIT))
continue;
memory_types |= 1ull << i;
}
uint32_t memory_types =
(flags & VK_BUFFER_CREATE_PROTECTED_BIT) ?
device->physical->memory.protected_mem_types :
((usage & (VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT |
VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT)) ?
device->physical->memory.desc_buffer_mem_types :
device->physical->memory.default_buffer_mem_types);
/* The GPU appears to write back to main memory in cachelines. Writes to a
* buffers should not clobber with writes to another buffers so make sure

View file

@ -1986,18 +1986,10 @@ anv_image_get_memory_requirements(struct anv_device *device,
* only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
* structure for the physical device is supported.
*/
uint32_t memory_types = 0;
for (uint32_t i = 0; i < device->physical->memory.type_count; i++) {
/* Have the protected image bit match only the memory types with the
* equivalent bit.
*/
if (!!(image->vk.create_flags & VK_IMAGE_CREATE_PROTECTED_BIT) !=
!!(device->physical->memory.types[i].propertyFlags &
VK_MEMORY_PROPERTY_PROTECTED_BIT))
continue;
memory_types |= 1ull << i;
}
uint32_t memory_types =
(image->vk.create_flags & VK_IMAGE_CREATE_PROTECTED_BIT) ?
device->physical->memory.protected_mem_types :
device->physical->memory.default_buffer_mem_types;
vk_foreach_struct(ext, pMemoryRequirements->pNext) {
switch (ext->sType) {

View file

@ -454,6 +454,9 @@ enum anv_bo_alloc_flags {
* aligned to the AUX-TT requirements.
*/
ANV_BO_ALLOC_AUX_CCS = (1 << 20),
/** For descriptor buffer pools */
ANV_BO_ALLOC_DESCRIPTOR_BUFFER_POOL = (1 << 21),
};
/** Specifies that the BO should be cached and coherent. */
@ -936,6 +939,8 @@ struct anv_memory_type {
/* Standard bits passed on to the client */
VkMemoryPropertyFlags propertyFlags;
uint32_t heapIndex;
/* Whether this is the descriptor buffer memory type */
bool descriptor_buffer;
};
struct anv_memory_heap {
@ -1085,6 +1090,12 @@ struct anv_physical_device {
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
bool need_flush;
#endif
/** Mask of memory types of normal allocations */
uint32_t default_buffer_mem_types;
/** Mask of memory types of descriptor buffers */
uint32_t desc_buffer_mem_types;
/** Mask of memory types of protected buffers/images */
uint32_t protected_mem_types;
} memory;
struct {
@ -1133,6 +1144,14 @@ struct anv_physical_device {
* Instruction state pool
*/
struct anv_va_range instruction_state_pool;
/**
* Descriptor buffers
*/
struct anv_va_range descriptor_buffer_pool;
/**
* Push descriptor with descriptor buffers
*/
struct anv_va_range push_descriptor_buffer_pool;
/**
* Client heap
*/
@ -1681,6 +1700,7 @@ struct anv_device {
struct util_vma_heap vma_lo;
struct util_vma_heap vma_hi;
struct util_vma_heap vma_desc;
struct util_vma_heap vma_desc_buf;
struct util_vma_heap vma_samplers;
struct util_vma_heap vma_trtt;
@ -1707,6 +1727,7 @@ struct anv_device {
struct anv_state_pool internal_surface_state_pool;
struct anv_state_pool bindless_surface_state_pool;
struct anv_state_pool indirect_push_descriptor_pool;
struct anv_state_pool push_descriptor_buffer_pool;
struct anv_state_reserved_pool custom_border_colors;
@ -3702,6 +3723,7 @@ struct anv_cmd_buffer {
struct anv_state_stream dynamic_state_stream;
struct anv_state_stream general_state_stream;
struct anv_state_stream indirect_push_descriptor_stream;
struct anv_state_stream push_descriptor_buffer_stream;
VkCommandBufferUsageFlags usage_flags;

View file

@ -61,6 +61,8 @@ anv_device_print_vas(struct anv_physical_device *device)
PRINT_HEAP(indirect_descriptor_pool);
PRINT_HEAP(indirect_push_descriptor_pool);
PRINT_HEAP(instruction_state_pool);
PRINT_HEAP(descriptor_buffer_pool);
PRINT_HEAP(push_descriptor_buffer_pool);
PRINT_HEAP(high_heap);
PRINT_HEAP(trtt);
}
@ -143,6 +145,15 @@ anv_physical_device_init_va_ranges(struct anv_physical_device *device)
address = align64(address, _4Gb);
address = va_add(&device->va.instruction_state_pool, address, 2 * _1Gb);
address = align64(address, _4Gb);
address = va_add(&device->va.descriptor_buffer_pool, address, 2 *_1Gb);
assert(device->va.descriptor_buffer_pool.addr % _4Gb == 0);
if (device->info.verx10 >= 125)
address = va_add(&device->va.push_descriptor_buffer_pool, address, _1Gb - 4096);
assert(device->va.descriptor_buffer_pool.addr ==
align64(device->va.descriptor_buffer_pool.addr, 4 * _1Gb));
/* What's left to do for us is to set va.high_heap and va.trtt without
* overlap, but there are a few things to be considered:
*

View file

@ -371,19 +371,10 @@ anv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device,
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_video_session, vid, videoSession);
uint32_t memory_types = 0;
for (uint32_t i = 0; i < device->physical->memory.type_count; i++) {
/* Have the protected buffer bit match only the memory types with the
* equivalent bit.
*/
if (!!(vid->vk.flags & VK_VIDEO_SESSION_CREATE_PROTECTED_CONTENT_BIT_KHR) !=
!!(device->physical->memory.types[i].propertyFlags &
VK_MEMORY_PROPERTY_PROTECTED_BIT))
continue;
memory_types |= 1ull << i;
}
uint32_t memory_types =
(vid->vk.flags & VK_VIDEO_SESSION_CREATE_PROTECTED_CONTENT_BIT_KHR) ?
device->physical->memory.protected_mem_types :
device->physical->memory.default_buffer_mem_types;
switch (vid->vk.op) {
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
get_h264_video_session_mem_reqs(vid,

View file

@ -425,6 +425,12 @@ setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
if (result != VK_SUCCESS)
return result;
if (device->physical->va.push_descriptor_buffer_pool.size > 0) {
result = pin_state_pool(device, execbuf, &device->push_descriptor_buffer_pool);
if (result != VK_SUCCESS)
return result;
}
/* Add the BOs for all user allocated memory objects because we can't
* track after binding updates of VK_EXT_descriptor_indexing and due to how
* sparse resources work.