diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 841d09c1d96..5346f82176f 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -157,6 +157,8 @@ anv_create_cmd_buffer(struct vk_command_pool *pool, &device->general_state_pool, 16384); anv_state_stream_init(&cmd_buffer->indirect_push_descriptor_stream, &device->indirect_push_descriptor_pool, 4096); + anv_state_stream_init(&cmd_buffer->push_descriptor_buffer_stream, + &device->push_descriptor_buffer_pool, 4096); int success = u_vector_init_pow2(&cmd_buffer->dynamic_bos, 8, sizeof(struct anv_bo *)); @@ -209,6 +211,7 @@ destroy_cmd_buffer(struct anv_cmd_buffer *cmd_buffer) anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); anv_state_stream_finish(&cmd_buffer->general_state_stream); anv_state_stream_finish(&cmd_buffer->indirect_push_descriptor_stream); + anv_state_stream_finish(&cmd_buffer->push_descriptor_buffer_stream); while (u_vector_length(&cmd_buffer->dynamic_bos) > 0) { struct anv_bo **bo = u_vector_remove(&cmd_buffer->dynamic_bos); @@ -280,6 +283,10 @@ reset_cmd_buffer(struct anv_cmd_buffer *cmd_buffer, &cmd_buffer->device->indirect_push_descriptor_pool, 4096); + anv_state_stream_finish(&cmd_buffer->push_descriptor_buffer_stream); + anv_state_stream_init(&cmd_buffer->push_descriptor_buffer_stream, + &cmd_buffer->device->push_descriptor_buffer_pool, 4096); + while (u_vector_length(&cmd_buffer->dynamic_bos) > 0) { struct anv_bo **bo = u_vector_remove(&cmd_buffer->dynamic_bos); anv_device_release_bo(cmd_buffer->device, *bo); diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index ce483508884..ba16ca9df48 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1845,6 +1845,34 @@ anv_physical_device_init_heaps(struct anv_physical_device *device, int fd) if (result != VK_SUCCESS) return result; + /* Replicate all non protected memory types for descriptor buffers because + * we want to identify memory allocations to place them in the right memory + * heap. + */ + device->memory.default_buffer_mem_types = + BITFIELD_RANGE(0, device->memory.type_count); + device->memory.protected_mem_types = 0; + device->memory.desc_buffer_mem_types = 0; + + uint32_t base_types_count = device->memory.type_count; + for (int i = 0; i < base_types_count; i++) { + if (device->memory.types[i].propertyFlags & + VK_MEMORY_PROPERTY_PROTECTED_BIT) { + device->memory.protected_mem_types |= BITFIELD_BIT(i); + continue; + } + + assert(device->memory.type_count < ARRAY_SIZE(device->memory.types)); + + device->memory.desc_buffer_mem_types |= + BITFIELD_BIT(device->memory.type_count); + + struct anv_memory_type *new_type = + &device->memory.types[device->memory.type_count++]; + *new_type = device->memory.types[i]; + new_type->descriptor_buffer = true; + } + for (unsigned i = 0; i < device->memory.type_count; i++) { VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags; if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && @@ -3276,6 +3304,13 @@ VkResult anv_CreateDevice( device->physical->va.bindless_surface_state_pool.size); } + /* Always initialized because the the memory types point to this and they + * are on the physical device. + */ + util_vma_heap_init(&device->vma_desc_buf, + device->physical->va.descriptor_buffer_pool.addr, + device->physical->va.descriptor_buffer_pool.size); + util_vma_heap_init(&device->vma_samplers, device->physical->va.sampler_state_pool.addr, device->physical->va.sampler_state_pool.size); @@ -3461,11 +3496,28 @@ VkResult anv_CreateDevice( goto fail_binding_table_pool; } + if (device->vk.enabled_extensions.EXT_descriptor_buffer && + device->info->verx10 >= 125) { + /* On Gfx12.5+ because of the bindless stages (Mesh, Task, RT), the only + * way we can wire push descriptors is through the bindless heap. This + * state pool is a 1Gb carve out of the 4Gb HW heap. + */ + result = anv_state_pool_init(&device->push_descriptor_buffer_pool, device, + &(struct anv_state_pool_params) { + .name = "push descriptor buffer state pool", + .base_address = device->physical->va.push_descriptor_buffer_pool.addr, + .block_size = 4096, + .max_size = device->physical->va.push_descriptor_buffer_pool.size, + }); + if (result != VK_SUCCESS) + goto fail_indirect_push_descriptor_pool; + } + if (device->info->has_aux_map) { device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator, &physical_device->info); if (!device->aux_map_ctx) - goto fail_indirect_push_descriptor_pool; + goto fail_push_descriptor_buffer_pool; } result = anv_device_alloc_bo(device, "workaround", 8192, @@ -3721,6 +3773,10 @@ VkResult anv_CreateDevice( intel_aux_map_finish(device->aux_map_ctx); device->aux_map_ctx = NULL; } + fail_push_descriptor_buffer_pool: + if (device->vk.enabled_extensions.EXT_descriptor_buffer && + device->info->verx10 >= 125) + anv_state_pool_finish(&device->push_descriptor_buffer_pool); fail_indirect_push_descriptor_pool: if (device->physical->indirect_descriptors) anv_state_pool_finish(&device->indirect_push_descriptor_pool); @@ -3754,6 +3810,7 @@ VkResult anv_CreateDevice( util_vma_heap_finish(&device->vma_trtt); if (!device->physical->indirect_descriptors) util_vma_heap_finish(&device->vma_samplers); + util_vma_heap_finish(&device->vma_desc_buf); util_vma_heap_finish(&device->vma_desc); util_vma_heap_finish(&device->vma_hi); util_vma_heap_finish(&device->vma_lo); @@ -3851,6 +3908,9 @@ void anv_DestroyDevice( device->aux_map_ctx = NULL; } + if (device->vk.enabled_extensions.EXT_descriptor_buffer && + device->info->verx10 >= 125) + anv_state_pool_finish(&device->push_descriptor_buffer_pool); if (device->physical->indirect_descriptors) anv_state_pool_finish(&device->indirect_push_descriptor_pool); anv_state_pool_finish(&device->binding_table_pool); @@ -3872,6 +3932,7 @@ void anv_DestroyDevice( util_vma_heap_finish(&device->vma_trtt); if (!device->physical->indirect_descriptors) util_vma_heap_finish(&device->vma_samplers); + util_vma_heap_finish(&device->vma_desc_buf); util_vma_heap_finish(&device->vma_desc); util_vma_heap_finish(&device->vma_hi); util_vma_heap_finish(&device->vma_lo); @@ -3933,6 +3994,9 @@ anv_vma_heap_for_flags(struct anv_device *device, if (alloc_flags & ANV_BO_ALLOC_TRTT) return &device->vma_trtt; + if (alloc_flags & ANV_BO_ALLOC_DESCRIPTOR_BUFFER_POOL) + return &device->vma_desc_buf; + if (alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS) return &device->vma_lo; @@ -3959,6 +4023,7 @@ anv_vma_alloc(struct anv_device *device, if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) { assert(*out_vma_heap == &device->vma_hi || + *out_vma_heap == &device->vma_desc_buf || *out_vma_heap == &device->vma_trtt); if (client_address) { @@ -3994,6 +4059,7 @@ anv_vma_free(struct anv_device *device, assert(vma_heap == &device->vma_lo || vma_heap == &device->vma_hi || vma_heap == &device->vma_desc || + vma_heap == &device->vma_desc_buf || vma_heap == &device->vma_samplers || vma_heap == &device->vma_trtt); @@ -4173,6 +4239,9 @@ VkResult anv_AllocateMemory( } } + if (mem_type->descriptor_buffer) + alloc_flags |= ANV_BO_ALLOC_DESCRIPTOR_BUFFER_POOL; + if (mem->vk.ahardware_buffer) { result = anv_import_ahw_memory(_device, mem); if (result != VK_SUCCESS) @@ -4716,19 +4785,16 @@ anv_get_buffer_memory_requirements(struct anv_device *device, * supported memory type for the resource. The bit `1<physical->memory.type_count; i++) { - /* Have the protected buffer bit match only the memory types with the - * equivalent bit. - */ - if (!!(flags & VK_BUFFER_CREATE_PROTECTED_BIT) != - !!(device->physical->memory.types[i].propertyFlags & - VK_MEMORY_PROPERTY_PROTECTED_BIT)) - continue; - - memory_types |= 1ull << i; - } + uint32_t memory_types = + (flags & VK_BUFFER_CREATE_PROTECTED_BIT) ? + device->physical->memory.protected_mem_types : + ((usage & (VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | + VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT)) ? + device->physical->memory.desc_buffer_mem_types : + device->physical->memory.default_buffer_mem_types); /* The GPU appears to write back to main memory in cachelines. Writes to a * buffers should not clobber with writes to another buffers so make sure diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 5ba5713c9e7..57d86489394 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -1986,18 +1986,10 @@ anv_image_get_memory_requirements(struct anv_device *device, * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties * structure for the physical device is supported. */ - uint32_t memory_types = 0; - for (uint32_t i = 0; i < device->physical->memory.type_count; i++) { - /* Have the protected image bit match only the memory types with the - * equivalent bit. - */ - if (!!(image->vk.create_flags & VK_IMAGE_CREATE_PROTECTED_BIT) != - !!(device->physical->memory.types[i].propertyFlags & - VK_MEMORY_PROPERTY_PROTECTED_BIT)) - continue; - - memory_types |= 1ull << i; - } + uint32_t memory_types = + (image->vk.create_flags & VK_IMAGE_CREATE_PROTECTED_BIT) ? + device->physical->memory.protected_mem_types : + device->physical->memory.default_buffer_mem_types; vk_foreach_struct(ext, pMemoryRequirements->pNext) { switch (ext->sType) { diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 484d7070cec..d7f0aa272cb 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -454,6 +454,9 @@ enum anv_bo_alloc_flags { * aligned to the AUX-TT requirements. */ ANV_BO_ALLOC_AUX_CCS = (1 << 20), + + /** For descriptor buffer pools */ + ANV_BO_ALLOC_DESCRIPTOR_BUFFER_POOL = (1 << 21), }; /** Specifies that the BO should be cached and coherent. */ @@ -936,6 +939,8 @@ struct anv_memory_type { /* Standard bits passed on to the client */ VkMemoryPropertyFlags propertyFlags; uint32_t heapIndex; + /* Whether this is the descriptor buffer memory type */ + bool descriptor_buffer; }; struct anv_memory_heap { @@ -1085,6 +1090,12 @@ struct anv_physical_device { #ifdef SUPPORT_INTEL_INTEGRATED_GPUS bool need_flush; #endif + /** Mask of memory types of normal allocations */ + uint32_t default_buffer_mem_types; + /** Mask of memory types of descriptor buffers */ + uint32_t desc_buffer_mem_types; + /** Mask of memory types of protected buffers/images */ + uint32_t protected_mem_types; } memory; struct { @@ -1133,6 +1144,14 @@ struct anv_physical_device { * Instruction state pool */ struct anv_va_range instruction_state_pool; + /** + * Descriptor buffers + */ + struct anv_va_range descriptor_buffer_pool; + /** + * Push descriptor with descriptor buffers + */ + struct anv_va_range push_descriptor_buffer_pool; /** * Client heap */ @@ -1681,6 +1700,7 @@ struct anv_device { struct util_vma_heap vma_lo; struct util_vma_heap vma_hi; struct util_vma_heap vma_desc; + struct util_vma_heap vma_desc_buf; struct util_vma_heap vma_samplers; struct util_vma_heap vma_trtt; @@ -1707,6 +1727,7 @@ struct anv_device { struct anv_state_pool internal_surface_state_pool; struct anv_state_pool bindless_surface_state_pool; struct anv_state_pool indirect_push_descriptor_pool; + struct anv_state_pool push_descriptor_buffer_pool; struct anv_state_reserved_pool custom_border_colors; @@ -3702,6 +3723,7 @@ struct anv_cmd_buffer { struct anv_state_stream dynamic_state_stream; struct anv_state_stream general_state_stream; struct anv_state_stream indirect_push_descriptor_stream; + struct anv_state_stream push_descriptor_buffer_stream; VkCommandBufferUsageFlags usage_flags; diff --git a/src/intel/vulkan/anv_va.c b/src/intel/vulkan/anv_va.c index f7caa822c69..5e0f5db4d0c 100644 --- a/src/intel/vulkan/anv_va.c +++ b/src/intel/vulkan/anv_va.c @@ -61,6 +61,8 @@ anv_device_print_vas(struct anv_physical_device *device) PRINT_HEAP(indirect_descriptor_pool); PRINT_HEAP(indirect_push_descriptor_pool); PRINT_HEAP(instruction_state_pool); + PRINT_HEAP(descriptor_buffer_pool); + PRINT_HEAP(push_descriptor_buffer_pool); PRINT_HEAP(high_heap); PRINT_HEAP(trtt); } @@ -143,6 +145,15 @@ anv_physical_device_init_va_ranges(struct anv_physical_device *device) address = align64(address, _4Gb); address = va_add(&device->va.instruction_state_pool, address, 2 * _1Gb); + address = align64(address, _4Gb); + address = va_add(&device->va.descriptor_buffer_pool, address, 2 *_1Gb); + assert(device->va.descriptor_buffer_pool.addr % _4Gb == 0); + if (device->info.verx10 >= 125) + address = va_add(&device->va.push_descriptor_buffer_pool, address, _1Gb - 4096); + + assert(device->va.descriptor_buffer_pool.addr == + align64(device->va.descriptor_buffer_pool.addr, 4 * _1Gb)); + /* What's left to do for us is to set va.high_heap and va.trtt without * overlap, but there are a few things to be considered: * diff --git a/src/intel/vulkan/anv_video.c b/src/intel/vulkan/anv_video.c index 401aeb28115..070c1806cc3 100644 --- a/src/intel/vulkan/anv_video.c +++ b/src/intel/vulkan/anv_video.c @@ -371,19 +371,10 @@ anv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_video_session, vid, videoSession); - uint32_t memory_types = 0; - for (uint32_t i = 0; i < device->physical->memory.type_count; i++) { - /* Have the protected buffer bit match only the memory types with the - * equivalent bit. - */ - if (!!(vid->vk.flags & VK_VIDEO_SESSION_CREATE_PROTECTED_CONTENT_BIT_KHR) != - !!(device->physical->memory.types[i].propertyFlags & - VK_MEMORY_PROPERTY_PROTECTED_BIT)) - continue; - - memory_types |= 1ull << i; - } - + uint32_t memory_types = + (vid->vk.flags & VK_VIDEO_SESSION_CREATE_PROTECTED_CONTENT_BIT_KHR) ? + device->physical->memory.protected_mem_types : + device->physical->memory.default_buffer_mem_types; switch (vid->vk.op) { case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: get_h264_video_session_mem_reqs(vid, diff --git a/src/intel/vulkan/i915/anv_batch_chain.c b/src/intel/vulkan/i915/anv_batch_chain.c index 743a6b765eb..77d24a89308 100644 --- a/src/intel/vulkan/i915/anv_batch_chain.c +++ b/src/intel/vulkan/i915/anv_batch_chain.c @@ -425,6 +425,12 @@ setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf, if (result != VK_SUCCESS) return result; + if (device->physical->va.push_descriptor_buffer_pool.size > 0) { + result = pin_state_pool(device, execbuf, &device->push_descriptor_buffer_pool); + if (result != VK_SUCCESS) + return result; + } + /* Add the BOs for all user allocated memory objects because we can't * track after binding updates of VK_EXT_descriptor_indexing and due to how * sparse resources work.