mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-29 08:00:12 +01:00
anv: add new heap/pool for descriptor buffers
We'll use a new heap & a new pool for descriptor buffers. The heap will hold descriptor buffers, while the pool will only be used on Gfx12.5+ for push descriptors. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22151>
This commit is contained in:
parent
fc1aeb57bb
commit
c6a91f1695
7 changed files with 133 additions and 38 deletions
|
|
@ -157,6 +157,8 @@ anv_create_cmd_buffer(struct vk_command_pool *pool,
|
|||
&device->general_state_pool, 16384);
|
||||
anv_state_stream_init(&cmd_buffer->indirect_push_descriptor_stream,
|
||||
&device->indirect_push_descriptor_pool, 4096);
|
||||
anv_state_stream_init(&cmd_buffer->push_descriptor_buffer_stream,
|
||||
&device->push_descriptor_buffer_pool, 4096);
|
||||
|
||||
int success = u_vector_init_pow2(&cmd_buffer->dynamic_bos, 8,
|
||||
sizeof(struct anv_bo *));
|
||||
|
|
@ -209,6 +211,7 @@ destroy_cmd_buffer(struct anv_cmd_buffer *cmd_buffer)
|
|||
anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
|
||||
anv_state_stream_finish(&cmd_buffer->general_state_stream);
|
||||
anv_state_stream_finish(&cmd_buffer->indirect_push_descriptor_stream);
|
||||
anv_state_stream_finish(&cmd_buffer->push_descriptor_buffer_stream);
|
||||
|
||||
while (u_vector_length(&cmd_buffer->dynamic_bos) > 0) {
|
||||
struct anv_bo **bo = u_vector_remove(&cmd_buffer->dynamic_bos);
|
||||
|
|
@ -280,6 +283,10 @@ reset_cmd_buffer(struct anv_cmd_buffer *cmd_buffer,
|
|||
&cmd_buffer->device->indirect_push_descriptor_pool,
|
||||
4096);
|
||||
|
||||
anv_state_stream_finish(&cmd_buffer->push_descriptor_buffer_stream);
|
||||
anv_state_stream_init(&cmd_buffer->push_descriptor_buffer_stream,
|
||||
&cmd_buffer->device->push_descriptor_buffer_pool, 4096);
|
||||
|
||||
while (u_vector_length(&cmd_buffer->dynamic_bos) > 0) {
|
||||
struct anv_bo **bo = u_vector_remove(&cmd_buffer->dynamic_bos);
|
||||
anv_device_release_bo(cmd_buffer->device, *bo);
|
||||
|
|
|
|||
|
|
@ -1845,6 +1845,34 @@ anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
|
|||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
/* Replicate all non protected memory types for descriptor buffers because
|
||||
* we want to identify memory allocations to place them in the right memory
|
||||
* heap.
|
||||
*/
|
||||
device->memory.default_buffer_mem_types =
|
||||
BITFIELD_RANGE(0, device->memory.type_count);
|
||||
device->memory.protected_mem_types = 0;
|
||||
device->memory.desc_buffer_mem_types = 0;
|
||||
|
||||
uint32_t base_types_count = device->memory.type_count;
|
||||
for (int i = 0; i < base_types_count; i++) {
|
||||
if (device->memory.types[i].propertyFlags &
|
||||
VK_MEMORY_PROPERTY_PROTECTED_BIT) {
|
||||
device->memory.protected_mem_types |= BITFIELD_BIT(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
|
||||
|
||||
device->memory.desc_buffer_mem_types |=
|
||||
BITFIELD_BIT(device->memory.type_count);
|
||||
|
||||
struct anv_memory_type *new_type =
|
||||
&device->memory.types[device->memory.type_count++];
|
||||
*new_type = device->memory.types[i];
|
||||
new_type->descriptor_buffer = true;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < device->memory.type_count; i++) {
|
||||
VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
|
||||
if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
|
||||
|
|
@ -3276,6 +3304,13 @@ VkResult anv_CreateDevice(
|
|||
device->physical->va.bindless_surface_state_pool.size);
|
||||
}
|
||||
|
||||
/* Always initialized because the the memory types point to this and they
|
||||
* are on the physical device.
|
||||
*/
|
||||
util_vma_heap_init(&device->vma_desc_buf,
|
||||
device->physical->va.descriptor_buffer_pool.addr,
|
||||
device->physical->va.descriptor_buffer_pool.size);
|
||||
|
||||
util_vma_heap_init(&device->vma_samplers,
|
||||
device->physical->va.sampler_state_pool.addr,
|
||||
device->physical->va.sampler_state_pool.size);
|
||||
|
|
@ -3461,11 +3496,28 @@ VkResult anv_CreateDevice(
|
|||
goto fail_binding_table_pool;
|
||||
}
|
||||
|
||||
if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
|
||||
device->info->verx10 >= 125) {
|
||||
/* On Gfx12.5+ because of the bindless stages (Mesh, Task, RT), the only
|
||||
* way we can wire push descriptors is through the bindless heap. This
|
||||
* state pool is a 1Gb carve out of the 4Gb HW heap.
|
||||
*/
|
||||
result = anv_state_pool_init(&device->push_descriptor_buffer_pool, device,
|
||||
&(struct anv_state_pool_params) {
|
||||
.name = "push descriptor buffer state pool",
|
||||
.base_address = device->physical->va.push_descriptor_buffer_pool.addr,
|
||||
.block_size = 4096,
|
||||
.max_size = device->physical->va.push_descriptor_buffer_pool.size,
|
||||
});
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_indirect_push_descriptor_pool;
|
||||
}
|
||||
|
||||
if (device->info->has_aux_map) {
|
||||
device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator,
|
||||
&physical_device->info);
|
||||
if (!device->aux_map_ctx)
|
||||
goto fail_indirect_push_descriptor_pool;
|
||||
goto fail_push_descriptor_buffer_pool;
|
||||
}
|
||||
|
||||
result = anv_device_alloc_bo(device, "workaround", 8192,
|
||||
|
|
@ -3721,6 +3773,10 @@ VkResult anv_CreateDevice(
|
|||
intel_aux_map_finish(device->aux_map_ctx);
|
||||
device->aux_map_ctx = NULL;
|
||||
}
|
||||
fail_push_descriptor_buffer_pool:
|
||||
if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
|
||||
device->info->verx10 >= 125)
|
||||
anv_state_pool_finish(&device->push_descriptor_buffer_pool);
|
||||
fail_indirect_push_descriptor_pool:
|
||||
if (device->physical->indirect_descriptors)
|
||||
anv_state_pool_finish(&device->indirect_push_descriptor_pool);
|
||||
|
|
@ -3754,6 +3810,7 @@ VkResult anv_CreateDevice(
|
|||
util_vma_heap_finish(&device->vma_trtt);
|
||||
if (!device->physical->indirect_descriptors)
|
||||
util_vma_heap_finish(&device->vma_samplers);
|
||||
util_vma_heap_finish(&device->vma_desc_buf);
|
||||
util_vma_heap_finish(&device->vma_desc);
|
||||
util_vma_heap_finish(&device->vma_hi);
|
||||
util_vma_heap_finish(&device->vma_lo);
|
||||
|
|
@ -3851,6 +3908,9 @@ void anv_DestroyDevice(
|
|||
device->aux_map_ctx = NULL;
|
||||
}
|
||||
|
||||
if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
|
||||
device->info->verx10 >= 125)
|
||||
anv_state_pool_finish(&device->push_descriptor_buffer_pool);
|
||||
if (device->physical->indirect_descriptors)
|
||||
anv_state_pool_finish(&device->indirect_push_descriptor_pool);
|
||||
anv_state_pool_finish(&device->binding_table_pool);
|
||||
|
|
@ -3872,6 +3932,7 @@ void anv_DestroyDevice(
|
|||
util_vma_heap_finish(&device->vma_trtt);
|
||||
if (!device->physical->indirect_descriptors)
|
||||
util_vma_heap_finish(&device->vma_samplers);
|
||||
util_vma_heap_finish(&device->vma_desc_buf);
|
||||
util_vma_heap_finish(&device->vma_desc);
|
||||
util_vma_heap_finish(&device->vma_hi);
|
||||
util_vma_heap_finish(&device->vma_lo);
|
||||
|
|
@ -3933,6 +3994,9 @@ anv_vma_heap_for_flags(struct anv_device *device,
|
|||
if (alloc_flags & ANV_BO_ALLOC_TRTT)
|
||||
return &device->vma_trtt;
|
||||
|
||||
if (alloc_flags & ANV_BO_ALLOC_DESCRIPTOR_BUFFER_POOL)
|
||||
return &device->vma_desc_buf;
|
||||
|
||||
if (alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS)
|
||||
return &device->vma_lo;
|
||||
|
||||
|
|
@ -3959,6 +4023,7 @@ anv_vma_alloc(struct anv_device *device,
|
|||
|
||||
if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
|
||||
assert(*out_vma_heap == &device->vma_hi ||
|
||||
*out_vma_heap == &device->vma_desc_buf ||
|
||||
*out_vma_heap == &device->vma_trtt);
|
||||
|
||||
if (client_address) {
|
||||
|
|
@ -3994,6 +4059,7 @@ anv_vma_free(struct anv_device *device,
|
|||
assert(vma_heap == &device->vma_lo ||
|
||||
vma_heap == &device->vma_hi ||
|
||||
vma_heap == &device->vma_desc ||
|
||||
vma_heap == &device->vma_desc_buf ||
|
||||
vma_heap == &device->vma_samplers ||
|
||||
vma_heap == &device->vma_trtt);
|
||||
|
||||
|
|
@ -4173,6 +4239,9 @@ VkResult anv_AllocateMemory(
|
|||
}
|
||||
}
|
||||
|
||||
if (mem_type->descriptor_buffer)
|
||||
alloc_flags |= ANV_BO_ALLOC_DESCRIPTOR_BUFFER_POOL;
|
||||
|
||||
if (mem->vk.ahardware_buffer) {
|
||||
result = anv_import_ahw_memory(_device, mem);
|
||||
if (result != VK_SUCCESS)
|
||||
|
|
@ -4716,19 +4785,16 @@ anv_get_buffer_memory_requirements(struct anv_device *device,
|
|||
* supported memory type for the resource. The bit `1<<i` is set if and
|
||||
* only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
|
||||
* structure for the physical device is supported.
|
||||
*
|
||||
* We have special memory types for descriptor buffers.
|
||||
*/
|
||||
uint32_t memory_types = 0;
|
||||
for (uint32_t i = 0; i < device->physical->memory.type_count; i++) {
|
||||
/* Have the protected buffer bit match only the memory types with the
|
||||
* equivalent bit.
|
||||
*/
|
||||
if (!!(flags & VK_BUFFER_CREATE_PROTECTED_BIT) !=
|
||||
!!(device->physical->memory.types[i].propertyFlags &
|
||||
VK_MEMORY_PROPERTY_PROTECTED_BIT))
|
||||
continue;
|
||||
|
||||
memory_types |= 1ull << i;
|
||||
}
|
||||
uint32_t memory_types =
|
||||
(flags & VK_BUFFER_CREATE_PROTECTED_BIT) ?
|
||||
device->physical->memory.protected_mem_types :
|
||||
((usage & (VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT |
|
||||
VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT)) ?
|
||||
device->physical->memory.desc_buffer_mem_types :
|
||||
device->physical->memory.default_buffer_mem_types);
|
||||
|
||||
/* The GPU appears to write back to main memory in cachelines. Writes to a
|
||||
* buffers should not clobber with writes to another buffers so make sure
|
||||
|
|
|
|||
|
|
@ -1986,18 +1986,10 @@ anv_image_get_memory_requirements(struct anv_device *device,
|
|||
* only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
|
||||
* structure for the physical device is supported.
|
||||
*/
|
||||
uint32_t memory_types = 0;
|
||||
for (uint32_t i = 0; i < device->physical->memory.type_count; i++) {
|
||||
/* Have the protected image bit match only the memory types with the
|
||||
* equivalent bit.
|
||||
*/
|
||||
if (!!(image->vk.create_flags & VK_IMAGE_CREATE_PROTECTED_BIT) !=
|
||||
!!(device->physical->memory.types[i].propertyFlags &
|
||||
VK_MEMORY_PROPERTY_PROTECTED_BIT))
|
||||
continue;
|
||||
|
||||
memory_types |= 1ull << i;
|
||||
}
|
||||
uint32_t memory_types =
|
||||
(image->vk.create_flags & VK_IMAGE_CREATE_PROTECTED_BIT) ?
|
||||
device->physical->memory.protected_mem_types :
|
||||
device->physical->memory.default_buffer_mem_types;
|
||||
|
||||
vk_foreach_struct(ext, pMemoryRequirements->pNext) {
|
||||
switch (ext->sType) {
|
||||
|
|
|
|||
|
|
@ -454,6 +454,9 @@ enum anv_bo_alloc_flags {
|
|||
* aligned to the AUX-TT requirements.
|
||||
*/
|
||||
ANV_BO_ALLOC_AUX_CCS = (1 << 20),
|
||||
|
||||
/** For descriptor buffer pools */
|
||||
ANV_BO_ALLOC_DESCRIPTOR_BUFFER_POOL = (1 << 21),
|
||||
};
|
||||
|
||||
/** Specifies that the BO should be cached and coherent. */
|
||||
|
|
@ -936,6 +939,8 @@ struct anv_memory_type {
|
|||
/* Standard bits passed on to the client */
|
||||
VkMemoryPropertyFlags propertyFlags;
|
||||
uint32_t heapIndex;
|
||||
/* Whether this is the descriptor buffer memory type */
|
||||
bool descriptor_buffer;
|
||||
};
|
||||
|
||||
struct anv_memory_heap {
|
||||
|
|
@ -1085,6 +1090,12 @@ struct anv_physical_device {
|
|||
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
|
||||
bool need_flush;
|
||||
#endif
|
||||
/** Mask of memory types of normal allocations */
|
||||
uint32_t default_buffer_mem_types;
|
||||
/** Mask of memory types of descriptor buffers */
|
||||
uint32_t desc_buffer_mem_types;
|
||||
/** Mask of memory types of protected buffers/images */
|
||||
uint32_t protected_mem_types;
|
||||
} memory;
|
||||
|
||||
struct {
|
||||
|
|
@ -1133,6 +1144,14 @@ struct anv_physical_device {
|
|||
* Instruction state pool
|
||||
*/
|
||||
struct anv_va_range instruction_state_pool;
|
||||
/**
|
||||
* Descriptor buffers
|
||||
*/
|
||||
struct anv_va_range descriptor_buffer_pool;
|
||||
/**
|
||||
* Push descriptor with descriptor buffers
|
||||
*/
|
||||
struct anv_va_range push_descriptor_buffer_pool;
|
||||
/**
|
||||
* Client heap
|
||||
*/
|
||||
|
|
@ -1681,6 +1700,7 @@ struct anv_device {
|
|||
struct util_vma_heap vma_lo;
|
||||
struct util_vma_heap vma_hi;
|
||||
struct util_vma_heap vma_desc;
|
||||
struct util_vma_heap vma_desc_buf;
|
||||
struct util_vma_heap vma_samplers;
|
||||
struct util_vma_heap vma_trtt;
|
||||
|
||||
|
|
@ -1707,6 +1727,7 @@ struct anv_device {
|
|||
struct anv_state_pool internal_surface_state_pool;
|
||||
struct anv_state_pool bindless_surface_state_pool;
|
||||
struct anv_state_pool indirect_push_descriptor_pool;
|
||||
struct anv_state_pool push_descriptor_buffer_pool;
|
||||
|
||||
struct anv_state_reserved_pool custom_border_colors;
|
||||
|
||||
|
|
@ -3702,6 +3723,7 @@ struct anv_cmd_buffer {
|
|||
struct anv_state_stream dynamic_state_stream;
|
||||
struct anv_state_stream general_state_stream;
|
||||
struct anv_state_stream indirect_push_descriptor_stream;
|
||||
struct anv_state_stream push_descriptor_buffer_stream;
|
||||
|
||||
VkCommandBufferUsageFlags usage_flags;
|
||||
|
||||
|
|
|
|||
|
|
@ -61,6 +61,8 @@ anv_device_print_vas(struct anv_physical_device *device)
|
|||
PRINT_HEAP(indirect_descriptor_pool);
|
||||
PRINT_HEAP(indirect_push_descriptor_pool);
|
||||
PRINT_HEAP(instruction_state_pool);
|
||||
PRINT_HEAP(descriptor_buffer_pool);
|
||||
PRINT_HEAP(push_descriptor_buffer_pool);
|
||||
PRINT_HEAP(high_heap);
|
||||
PRINT_HEAP(trtt);
|
||||
}
|
||||
|
|
@ -143,6 +145,15 @@ anv_physical_device_init_va_ranges(struct anv_physical_device *device)
|
|||
address = align64(address, _4Gb);
|
||||
address = va_add(&device->va.instruction_state_pool, address, 2 * _1Gb);
|
||||
|
||||
address = align64(address, _4Gb);
|
||||
address = va_add(&device->va.descriptor_buffer_pool, address, 2 *_1Gb);
|
||||
assert(device->va.descriptor_buffer_pool.addr % _4Gb == 0);
|
||||
if (device->info.verx10 >= 125)
|
||||
address = va_add(&device->va.push_descriptor_buffer_pool, address, _1Gb - 4096);
|
||||
|
||||
assert(device->va.descriptor_buffer_pool.addr ==
|
||||
align64(device->va.descriptor_buffer_pool.addr, 4 * _1Gb));
|
||||
|
||||
/* What's left to do for us is to set va.high_heap and va.trtt without
|
||||
* overlap, but there are a few things to be considered:
|
||||
*
|
||||
|
|
|
|||
|
|
@ -371,19 +371,10 @@ anv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device,
|
|||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_video_session, vid, videoSession);
|
||||
|
||||
uint32_t memory_types = 0;
|
||||
for (uint32_t i = 0; i < device->physical->memory.type_count; i++) {
|
||||
/* Have the protected buffer bit match only the memory types with the
|
||||
* equivalent bit.
|
||||
*/
|
||||
if (!!(vid->vk.flags & VK_VIDEO_SESSION_CREATE_PROTECTED_CONTENT_BIT_KHR) !=
|
||||
!!(device->physical->memory.types[i].propertyFlags &
|
||||
VK_MEMORY_PROPERTY_PROTECTED_BIT))
|
||||
continue;
|
||||
|
||||
memory_types |= 1ull << i;
|
||||
}
|
||||
|
||||
uint32_t memory_types =
|
||||
(vid->vk.flags & VK_VIDEO_SESSION_CREATE_PROTECTED_CONTENT_BIT_KHR) ?
|
||||
device->physical->memory.protected_mem_types :
|
||||
device->physical->memory.default_buffer_mem_types;
|
||||
switch (vid->vk.op) {
|
||||
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
|
||||
get_h264_video_session_mem_reqs(vid,
|
||||
|
|
|
|||
|
|
@ -425,6 +425,12 @@ setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
|
|||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
if (device->physical->va.push_descriptor_buffer_pool.size > 0) {
|
||||
result = pin_state_pool(device, execbuf, &device->push_descriptor_buffer_pool);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Add the BOs for all user allocated memory objects because we can't
|
||||
* track after binding updates of VK_EXT_descriptor_indexing and due to how
|
||||
* sparse resources work.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue