From 6368c1445f44e3c05b399d9e279d36a79a1a6bcc Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Tue, 18 Apr 2023 17:26:05 -0700 Subject: [PATCH] anv/sparse: add the initial code for Sparse Resources This giant patch implements a huge chunk of the Vulkan Sparse Resources API. I previously had this as a nice series of many smaller patches that evolved as the xe.ko added more features, but once I was asked to squash some of the major reworks I realized I wouldn't be able easily rewrite history, so I just squased basically the whole series into a giant patch. I may end up splitting this again later if I find a way to properly do it. If we want to support the DX12 API through vkd3d we need to support part of the the Sparse Resources API. If we don't, a bunch of Steam games won't work. For now we only support the xe.ko backend, but the vast majority of the code is KMD-independent and so an i915.ko implementation would use most of what's here, just extending the part that binds and unbinds memory. v2+: There's no way to sanely track the version history of this patch in this commit message. Please refer to Gitlab. Reviewed-by: Lionel Landwerlin Signed-off-by: Paulo Zanoni Part-of: --- src/intel/vulkan/anv_batch_chain.c | 108 ++++- src/intel/vulkan/anv_device.c | 115 +++-- src/intel/vulkan/anv_formats.c | 120 +++++- src/intel/vulkan/anv_image.c | 284 ++++++++++-- src/intel/vulkan/anv_pipeline.c | 1 + src/intel/vulkan/anv_private.h | 103 +++++ src/intel/vulkan/anv_sparse.c | 670 +++++++++++++++++++++++++++++ src/intel/vulkan/genX_cmd_buffer.c | 68 ++- src/intel/vulkan/meson.build | 1 + 9 files changed, 1360 insertions(+), 110 deletions(-) create mode 100644 src/intel/vulkan/anv_sparse.c diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index fec72e2b2e2..d5c8681e44c 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -1344,23 +1344,106 @@ can_chain_query_pools(struct anv_query_pool *p1, struct anv_query_pool *p2) } static VkResult -anv_queue_submit_locked(struct anv_queue *queue, - struct vk_queue_submit *submit, - struct anv_utrace_submit *utrace_submit) +anv_queue_submit_sparse_bind_locked(struct anv_queue *queue, + struct vk_queue_submit *submit) { + struct anv_device *device = queue->device; VkResult result; - if (unlikely((submit->buffer_bind_count || - submit->image_opaque_bind_count || - submit->image_bind_count))) { + /* When fake sparse is enabled, while we do accept creating "sparse" + * resources we can't really handle sparse submission. Fake sparse is + * supposed to be used by applications that request sparse to be enabled + * but don't actually *use* it. + */ + if (!device->physical->has_sparse) { if (INTEL_DEBUG(DEBUG_SPARSE)) fprintf(stderr, "=== application submitting sparse operations: " "buffer_bind:%d image_opaque_bind:%d image_bind:%d\n", submit->buffer_bind_count, submit->image_opaque_bind_count, submit->image_bind_count); - fprintf(stderr, "Error: Using sparse operation. Sparse binding not supported.\n"); + return vk_queue_set_lost(&queue->vk, "Sparse binding not supported"); } + device->using_sparse = true; + + assert(submit->command_buffer_count == 0); + + /* TODO: make both the syncs and signals be passed as part of the vm_bind + * ioctl so they can be waited asynchronously. For now this doesn't matter + * as we're doing synchronous vm_bind, but later when we make it async this + * will make a difference. + */ + result = vk_sync_wait_many(&device->vk, submit->wait_count, submit->waits, + VK_SYNC_WAIT_COMPLETE, INT64_MAX); + if (result != VK_SUCCESS) + return vk_queue_set_lost(&queue->vk, "vk_sync_wait failed"); + + /* Do the binds */ + for (uint32_t i = 0; i < submit->buffer_bind_count; i++) { + VkSparseBufferMemoryBindInfo *bind_info = &submit->buffer_binds[i]; + ANV_FROM_HANDLE(anv_buffer, buffer, bind_info->buffer); + + assert(anv_buffer_is_sparse(buffer)); + + for (uint32_t j = 0; j < bind_info->bindCount; j++) { + result = anv_sparse_bind_resource_memory(device, + &buffer->sparse_data, + &bind_info->pBinds[j]); + if (result != VK_SUCCESS) + return result; + } + } + + for (uint32_t i = 0; i < submit->image_opaque_bind_count; i++) { + VkSparseImageOpaqueMemoryBindInfo *bind_info = + &submit->image_opaque_binds[i]; + ANV_FROM_HANDLE(anv_image, image, bind_info->image); + + assert(anv_image_is_sparse(image)); + assert(!image->disjoint); + struct anv_sparse_binding_data *sparse_data = + &image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN].sparse_data; + + for (uint32_t j = 0; j < bind_info->bindCount; j++) { + result = anv_sparse_bind_resource_memory(device, sparse_data, + &bind_info->pBinds[j]); + if (result != VK_SUCCESS) + return result; + } + } + + for (uint32_t i = 0; i < submit->image_bind_count; i++) { + VkSparseImageMemoryBindInfo *bind_info = &submit->image_binds[i]; + ANV_FROM_HANDLE(anv_image, image, bind_info->image); + + assert(anv_image_is_sparse(image)); + assert(image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT); + + for (uint32_t j = 0; j < bind_info->bindCount; j++) { + result = anv_sparse_bind_image_memory(queue, image, + &bind_info->pBinds[j]); + if (result != VK_SUCCESS) + return result; + } + } + + for (uint32_t i = 0; i < submit->signal_count; i++) { + struct vk_sync_signal *s = &submit->signals[i]; + result = vk_sync_signal(&device->vk, s->sync, s->signal_value); + if (result != VK_SUCCESS) + return vk_queue_set_lost(&queue->vk, "vk_sync_signal failed"); + } + + return VK_SUCCESS; +} + +static VkResult +anv_queue_submit_cmd_buffers_locked(struct anv_queue *queue, + struct vk_queue_submit *submit, + struct anv_utrace_submit *utrace_submit) +{ + VkResult result; + if (submit->command_buffer_count == 0) { result = anv_queue_exec_locked(queue, submit->wait_count, submit->waits, 0 /* cmd_buffer_count */, @@ -1477,7 +1560,16 @@ anv_queue_submit(struct vk_queue *vk_queue, pthread_mutex_lock(&device->mutex); uint64_t start_ts = intel_ds_begin_submit(&queue->ds); - result = anv_queue_submit_locked(queue, submit, utrace_submit); + + if (submit->buffer_bind_count || + submit->image_opaque_bind_count || + submit->image_bind_count) { + result = anv_queue_submit_sparse_bind_locked(queue, submit); + } else { + result = anv_queue_submit_cmd_buffers_locked(queue, submit, + utrace_submit); + } + /* Take submission ID under lock */ intel_ds_end_submit(&queue->ds, start_ts); diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index b83d38bec6e..292dcb65670 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -411,6 +411,9 @@ get_features(const struct anv_physical_device *pdevice, const bool mesh_shader = pdevice->vk.supported_extensions.EXT_mesh_shader; + const bool has_sparse_or_fake = pdevice->instance->has_fake_sparse || + pdevice->has_sparse; + *features = (struct vk_features) { /* Vulkan 1.0 */ .robustBufferAccess = true, @@ -461,17 +464,17 @@ get_features(const struct anv_physical_device *pdevice, .shaderFloat64 = pdevice->info.has_64bit_float, .shaderInt64 = true, .shaderInt16 = true, - .shaderResourceResidency = pdevice->instance->has_fake_sparse, .shaderResourceMinLod = true, - .sparseBinding = pdevice->instance->has_fake_sparse, - .sparseResidencyBuffer = pdevice->instance->has_fake_sparse, - .sparseResidencyImage2D = pdevice->instance->has_fake_sparse, - .sparseResidencyImage3D = pdevice->instance->has_fake_sparse, + .shaderResourceResidency = has_sparse_or_fake, + .sparseBinding = has_sparse_or_fake, + .sparseResidencyAliased = has_sparse_or_fake, + .sparseResidencyBuffer = has_sparse_or_fake, + .sparseResidencyImage2D = has_sparse_or_fake, + .sparseResidencyImage3D = has_sparse_or_fake, .sparseResidency2Samples = false, .sparseResidency4Samples = false, .sparseResidency8Samples = false, .sparseResidency16Samples = false, - .sparseResidencyAliased = pdevice->instance->has_fake_sparse, .variableMultisampleRate = true, .inheritedQueries = true, @@ -1123,7 +1126,8 @@ static void anv_physical_device_init_queue_families(struct anv_physical_device *pdevice) { uint32_t family_count = 0; - VkQueueFlags sparse_flags = pdevice->instance->has_fake_sparse ? + VkQueueFlags sparse_flags = (pdevice->instance->has_fake_sparse || + pdevice->has_sparse) ? VK_QUEUE_SPARSE_BINDING_BIT : 0; if (pdevice->engine_info) { @@ -1393,6 +1397,9 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, device->uses_relocs = device->info.kmd_type != INTEL_KMD_TYPE_XE; + device->has_sparse = device->info.kmd_type == INTEL_KMD_TYPE_XE && + debug_get_bool_option("ANV_SPARSE", false); + device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) || driQueryOptionb(&instance->dri_options, "always_flush_cache"); @@ -1668,6 +1675,9 @@ void anv_GetPhysicalDeviceProperties( const uint32_t max_workgroup_size = MIN2(1024, 32 * devinfo->max_cs_workgroup_threads); + const bool has_sparse_or_fake = pdevice->instance->has_fake_sparse || + pdevice->has_sparse; + VkSampleCountFlags sample_counts = isl_device_get_sample_counts(&pdevice->isl_dev); @@ -1685,7 +1695,7 @@ void anv_GetPhysicalDeviceProperties( .maxMemoryAllocationCount = UINT32_MAX, .maxSamplerAllocationCount = 64 * 1024, .bufferImageGranularity = 1, - .sparseAddressSpaceSize = pdevice->instance->has_fake_sparse ? (1uLL << 48) : 0, + .sparseAddressSpaceSize = has_sparse_or_fake ? (1uLL << 48) : 0, .maxBoundDescriptorSets = MAX_SETS, .maxPerStageDescriptorSamplers = max_samplers, .maxPerStageDescriptorUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS, @@ -1811,11 +1821,11 @@ void anv_GetPhysicalDeviceProperties( VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, .limits = limits, .sparseProperties = { - .residencyStandard2DBlockShape = pdevice->instance->has_fake_sparse, - .residencyStandard2DMultisampleBlockShape = pdevice->instance->has_fake_sparse, - .residencyStandard3DBlockShape = pdevice->instance->has_fake_sparse, + .residencyStandard2DBlockShape = has_sparse_or_fake, + .residencyStandard2DMultisampleBlockShape = false, + .residencyStandard3DBlockShape = has_sparse_or_fake, .residencyAlignedMipSize = false, - .residencyNonResidentStrict = pdevice->instance->has_fake_sparse, + .residencyNonResidentStrict = has_sparse_or_fake, }, }; @@ -4322,6 +4332,7 @@ anv_bind_buffer_memory(const VkBindBufferMemoryInfo *pBindInfo) ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer); assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO); + assert(!anv_buffer_is_sparse(buffer)); if (mem) { assert(pBindInfo->memoryOffset < mem->vk.size); @@ -4346,22 +4357,6 @@ VkResult anv_BindBufferMemory2( return VK_SUCCESS; } -VkResult anv_QueueBindSparse( - VkQueue _queue, - uint32_t bindInfoCount, - const VkBindSparseInfo* pBindInfo, - VkFence fence) -{ - ANV_FROM_HANDLE(anv_queue, queue, _queue); - if (vk_device_is_lost(&queue->device->vk)) - return VK_ERROR_DEVICE_LOST; - - if (INTEL_DEBUG(DEBUG_SPARSE)) - fprintf(stderr, "=== [%s:%d] [%s]\n", __FILE__, __LINE__, __func__); - - return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT); -} - // Event functions VkResult anv_CreateEvent( @@ -4446,6 +4441,7 @@ static void anv_get_buffer_memory_requirements(struct anv_device *device, VkDeviceSize size, VkBufferUsageFlags usage, + bool is_sparse, VkMemoryRequirements2* pMemoryRequirements) { /* The Vulkan spec (git aaed022) says: @@ -4463,6 +4459,18 @@ anv_get_buffer_memory_requirements(struct anv_device *device, */ uint32_t alignment = 64; + /* From the spec, section "Sparse Buffer and Fully-Resident Image Block + * Size": + * "The sparse block size in bytes for sparse buffers and fully-resident + * images is reported as VkMemoryRequirements::alignment. alignment + * represents both the memory alignment requirement and the binding + * granularity (in bytes) for sparse resources." + */ + if (is_sparse) { + alignment = ANV_SPARSE_BLOCK_SIZE; + size = align64(size, alignment); + } + pMemoryRequirements->memoryRequirements.size = size; pMemoryRequirements->memoryRequirements.alignment = alignment; @@ -4500,17 +4508,21 @@ void anv_GetDeviceBufferMemoryRequirementsKHR( VkMemoryRequirements2* pMemoryRequirements) { ANV_FROM_HANDLE(anv_device, device, _device); + const bool is_sparse = + pInfo->pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT; - if (INTEL_DEBUG(DEBUG_SPARSE) && pInfo->pCreateInfo->flags & - (VK_BUFFER_CREATE_SPARSE_BINDING_BIT | - VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT | - VK_BUFFER_CREATE_SPARSE_ALIASED_BIT)) + if (!device->physical->has_sparse && + INTEL_DEBUG(DEBUG_SPARSE) && + pInfo->pCreateInfo->flags & (VK_BUFFER_CREATE_SPARSE_BINDING_BIT | + VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT | + VK_BUFFER_CREATE_SPARSE_ALIASED_BIT)) fprintf(stderr, "=== %s %s:%d flags:0x%08x\n", __func__, __FILE__, __LINE__, pInfo->pCreateInfo->flags); anv_get_buffer_memory_requirements(device, pInfo->pCreateInfo->size, pInfo->pCreateInfo->usage, + is_sparse, pMemoryRequirements); } @@ -4523,10 +4535,11 @@ VkResult anv_CreateBuffer( ANV_FROM_HANDLE(anv_device, device, _device); struct anv_buffer *buffer; - if (INTEL_DEBUG(DEBUG_SPARSE) && (pCreateInfo->flags & - (VK_BUFFER_CREATE_SPARSE_BINDING_BIT | - VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT | - VK_BUFFER_CREATE_SPARSE_ALIASED_BIT))) + if (!device->physical->has_sparse && + INTEL_DEBUG(DEBUG_SPARSE) && + pCreateInfo->flags & (VK_BUFFER_CREATE_SPARSE_BINDING_BIT | + VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT | + VK_BUFFER_CREATE_SPARSE_ALIASED_BIT)) fprintf(stderr, "=== %s %s:%d flags:0x%08x\n", __func__, __FILE__, __LINE__, pCreateInfo->flags); @@ -4544,6 +4557,27 @@ VkResult anv_CreateBuffer( return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); buffer->address = ANV_NULL_ADDRESS; + if (anv_buffer_is_sparse(buffer)) { + const VkBufferOpaqueCaptureAddressCreateInfo *opaque_addr_info = + vk_find_struct_const(pCreateInfo->pNext, + BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO); + enum anv_bo_alloc_flags alloc_flags = 0; + uint64_t client_address = 0; + + if (opaque_addr_info) { + alloc_flags = ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS; + client_address = opaque_addr_info->opaqueCaptureAddress; + } + + VkResult result = anv_init_sparse_bindings(device, buffer->vk.size, + &buffer->sparse_data, + alloc_flags, client_address, + &buffer->address); + if (result != VK_SUCCESS) { + vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk); + return result; + } + } *pBuffer = anv_buffer_to_handle(buffer); @@ -4561,6 +4595,11 @@ void anv_DestroyBuffer( if (!buffer) return; + if (anv_buffer_is_sparse(buffer)) { + assert(buffer->address.offset == buffer->sparse_data.address); + anv_free_sparse_bindings(device, &buffer->sparse_data); + } + vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk); } @@ -4579,7 +4618,9 @@ uint64_t anv_GetBufferOpaqueCaptureAddress( VkDevice device, const VkBufferDeviceAddressInfo* pInfo) { - return 0; + ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer); + + return anv_address_physical(buffer->address); } uint64_t anv_GetDeviceMemoryOpaqueCaptureAddress( diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index 6fd9a8987ba..03d41d26da8 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -1784,33 +1784,113 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties2( return result; } -void anv_GetPhysicalDeviceSparseImageFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkImageType type, - VkSampleCountFlagBits samples, - VkImageUsageFlags usage, - VkImageTiling tiling, - uint32_t* pNumProperties, - VkSparseImageFormatProperties* pProperties) -{ - if (INTEL_DEBUG(DEBUG_SPARSE)) - fprintf(stderr, "=== [%s:%d] [%s]\n", __FILE__, __LINE__, __func__); - /* Sparse images are not yet supported. */ - *pNumProperties = 0; -} - void anv_GetPhysicalDeviceSparseImageFormatProperties2( VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSparseImageFormatInfo2* pFormatInfo, uint32_t* pPropertyCount, VkSparseImageFormatProperties2* pProperties) { - if (INTEL_DEBUG(DEBUG_SPARSE)) - fprintf(stderr, "=== [%s:%d] [%s]\n", __FILE__, __LINE__, __func__); + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + const struct intel_device_info *devinfo = &physical_device->info; + VkImageAspectFlags aspects = vk_format_aspects(pFormatInfo->format); + VK_OUTARRAY_MAKE_TYPED(VkSparseImageFormatProperties2, props, + pProperties, pPropertyCount); - /* Sparse images are not yet supported. */ - *pPropertyCount = 0; + if (!physical_device->has_sparse) { + if (INTEL_DEBUG(DEBUG_SPARSE)) + fprintf(stderr, "=== [%s:%d] [%s]\n", __FILE__, __LINE__, __func__); + return; + } + + vk_foreach_struct_const(ext, pFormatInfo->pNext) + anv_debug_ignored_stype(ext->sType); + + if (anv_sparse_image_check_support(physical_device, + VK_IMAGE_CREATE_SPARSE_BINDING_BIT | + VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT, + pFormatInfo->tiling, + pFormatInfo->samples, + pFormatInfo->type, + pFormatInfo->format) != VK_SUCCESS) { + return; + } + + VkExtent3D ds_granularity = {}; + VkSparseImageFormatProperties2 *ds_props_ptr = NULL; + + u_foreach_bit(b, aspects) { + VkImageAspectFlagBits aspect = 1 << b; + + const uint32_t plane = + anv_aspect_to_plane(vk_format_aspects(pFormatInfo->format), aspect); + struct anv_format_plane anv_format_plane = + anv_get_format_plane(devinfo, pFormatInfo->format, plane, + pFormatInfo->tiling); + enum isl_format isl_format = anv_format_plane.isl_format; + assert(isl_format != ISL_FORMAT_UNSUPPORTED); + + VkImageCreateFlags vk_create_flags = + VK_IMAGE_CREATE_SPARSE_BINDING_BIT | + VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT; + + isl_surf_usage_flags_t isl_usage = + anv_image_choose_isl_surf_usage(vk_create_flags, pFormatInfo->usage, + 0, aspect); + + const enum isl_surf_dim isl_surf_dim = + pFormatInfo->type == VK_IMAGE_TYPE_1D ? ISL_SURF_DIM_1D : + pFormatInfo->type == VK_IMAGE_TYPE_2D ? ISL_SURF_DIM_2D : + ISL_SURF_DIM_3D; + + struct isl_surf isl_surf; + bool ok = isl_surf_init(&physical_device->isl_dev, &isl_surf, + .dim = isl_surf_dim, + .format = isl_format, + .width = 1, + .height = 1, + .depth = 1, + .levels = 1, + .array_len = 1, + .samples = pFormatInfo->samples, + .min_alignment_B = 0, + .row_pitch_B = 0, + .usage = isl_usage, + .tiling_flags = ISL_TILING_ANY_MASK); + if (!ok) { + /* There's no way to return an error code! */ + assert(false); + *pPropertyCount = 0; + return; + } + + VkSparseImageFormatProperties format_props = + anv_sparse_calc_image_format_properties(physical_device, aspect, + pFormatInfo->type, + &isl_surf); + + /* If both depth and stencil are the same, unify them if possible. */ + if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)) { + if (!ds_props_ptr) { + ds_granularity = format_props.imageGranularity; + } else if (ds_granularity.width == + format_props.imageGranularity.width && + ds_granularity.height == + format_props.imageGranularity.height && + ds_granularity.depth == + format_props.imageGranularity.depth) { + ds_props_ptr->properties.aspectMask |= aspect; + continue; + } + } + + vk_outarray_append_typed(VkSparseImageFormatProperties2, &props, p) { + p->properties = format_props; + if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)) + ds_props_ptr = p; + } + } } void anv_GetPhysicalDeviceExternalBufferProperties( diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 5fa0a1aa678..d8e8220d826 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -56,8 +56,9 @@ memory_range_end(struct anv_image_memory_range memory_range) * Get binding for VkImagePlaneMemoryRequirementsInfo, * VkBindImagePlaneMemoryInfo and VkDeviceImageMemoryRequirements. */ -static struct anv_image_binding * -image_aspect_to_binding(struct anv_image *image, VkImageAspectFlags aspect) +struct anv_image_binding * +anv_image_aspect_to_binding(struct anv_image *image, + VkImageAspectFlags aspect) { uint32_t plane = 0; @@ -200,11 +201,11 @@ memory_range_merge(struct anv_image_memory_range *a, a->size = MAX2(a->size, b.offset + b.size); } -static isl_surf_usage_flags_t -choose_isl_surf_usage(VkImageCreateFlags vk_create_flags, - VkImageUsageFlags vk_usage, - isl_surf_usage_flags_t isl_extra_usage, - VkImageAspectFlagBits aspect) +isl_surf_usage_flags_t +anv_image_choose_isl_surf_usage(VkImageCreateFlags vk_create_flags, + VkImageUsageFlags vk_usage, + isl_surf_usage_flags_t isl_extra_usage, + VkImageAspectFlagBits aspect) { isl_surf_usage_flags_t isl_usage = isl_extra_usage; @@ -223,6 +224,10 @@ choose_isl_surf_usage(VkImageCreateFlags vk_create_flags, if (vk_usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) isl_usage |= ISL_SURF_USAGE_CPB_BIT; + if (vk_create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) + isl_usage |= ISL_SURF_USAGE_SPARSE_BIT | + ISL_SURF_USAGE_DISABLE_AUX_BIT; + if (vk_usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR || vk_usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) isl_usage |= ISL_SURF_USAGE_VIDEO_DECODE_BIT; @@ -665,6 +670,10 @@ add_aux_surface_if_supported(struct anv_device *device, if ((isl_extra_usage_flags & ISL_SURF_USAGE_DISABLE_AUX_BIT)) return VK_SUCCESS; + /* TODO: consider whether compression with sparse is workable. */ + if (anv_image_is_sparse(image)) + return VK_SUCCESS; + if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) { /* We don't advertise that depth buffers could be used as storage * images. @@ -1180,8 +1189,8 @@ add_all_surfaces_implicit_layout( VkImageUsageFlags vk_usage = vk_image_usage(&image->vk, aspect); isl_surf_usage_flags_t isl_usage = - choose_isl_surf_usage(image->vk.create_flags, vk_usage, - isl_extra_usage_flags, aspect); + anv_image_choose_isl_surf_usage(image->vk.create_flags, vk_usage, + isl_extra_usage_flags, aspect); result = add_primary_surface(device, image, plane, plane_format, ANV_OFFSET_IMPLICIT, plane_stride, @@ -1387,6 +1396,63 @@ alloc_private_binding(struct anv_device *device, return result; } +static void +anv_image_finish_sparse_bindings(struct anv_image *image) +{ + struct anv_device *device = + container_of(image->vk.base.device, struct anv_device, vk); + + assert(anv_image_is_sparse(image)); + + for (int i = 0; i < ANV_IMAGE_MEMORY_BINDING_END; i++) { + struct anv_image_binding *b = &image->bindings[i]; + + if (b->sparse_data.size != 0) { + assert(b->memory_range.size == b->sparse_data.size); + assert(b->address.offset == b->sparse_data.address); + anv_free_sparse_bindings(device, &b->sparse_data); + } + } +} + +static VkResult MUST_CHECK +anv_image_init_sparse_bindings(struct anv_image *image) +{ + struct anv_device *device = + container_of(image->vk.base.device, struct anv_device, vk); + VkResult result; + + assert(anv_image_is_sparse(image)); + + for (int i = 0; i < ANV_IMAGE_MEMORY_BINDING_END; i++) { + struct anv_image_binding *b = &image->bindings[i]; + + if (b->memory_range.size != 0) { + assert(b->sparse_data.size == 0); + + /* From the spec, Custom Sparse Image Block Shapes section: + * "... the size in bytes of the custom sparse image block shape + * will be reported in VkMemoryRequirements::alignment." + * + * ISL should have set this for us, so just assert it here. + */ + assert(b->memory_range.alignment == ANV_SPARSE_BLOCK_SIZE); + assert(b->memory_range.size % ANV_SPARSE_BLOCK_SIZE == 0); + + result = anv_init_sparse_bindings(device, + b->memory_range.size, + &b->sparse_data, 0, 0, + &b->address); + if (result != VK_SUCCESS) { + anv_image_finish_sparse_bindings(image); + return result; + } + } + } + + return VK_SUCCESS; +} + VkResult anv_image_init(struct anv_device *device, struct anv_image *image, const struct anv_image_create_info *create_info) @@ -1502,6 +1568,12 @@ anv_image_init(struct anv_device *device, struct anv_image *image, can_fast_clear_with_non_zero_color(device->info, image, p, fmt_list); } + if (anv_image_is_sparse(image)) { + r = anv_image_init_sparse_bindings(image); + if (r != VK_SUCCESS) + goto fail; + } + return VK_SUCCESS; fail: @@ -1515,6 +1587,9 @@ anv_image_finish(struct anv_image *image) struct anv_device *device = container_of(image->vk.base.device, struct anv_device, vk); + if (anv_image_is_sparse(image)) + anv_image_finish_sparse_bindings(image); + if (image->from_gralloc) { assert(!image->disjoint); assert(image->n_planes == 1); @@ -1549,6 +1624,18 @@ anv_image_init_from_create_info(struct anv_device *device, const VkImageCreateInfo *pCreateInfo, bool no_private_binding_alloc) { + if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) { + VkResult result = + anv_sparse_image_check_support(device->physical, + pCreateInfo->flags, + pCreateInfo->tiling, + pCreateInfo->samples, + pCreateInfo->imageType, + pCreateInfo->format); + if (result != VK_SUCCESS) + return result; + } + const VkNativeBufferANDROID *gralloc_info = vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID); if (gralloc_info) @@ -1583,10 +1670,11 @@ VkResult anv_CreateImage( { ANV_FROM_HANDLE(anv_device, device, _device); - if (INTEL_DEBUG(DEBUG_SPARSE) && (pCreateInfo->flags & - (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | - VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | - VK_IMAGE_CREATE_SPARSE_ALIASED_BIT))) + if (!device->physical->has_sparse && + INTEL_DEBUG(DEBUG_SPARSE) && + pCreateInfo->flags & (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | + VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | + VK_IMAGE_CREATE_SPARSE_ALIASED_BIT)) fprintf(stderr, "=== %s %s:%d flags:0x%08x\n", __func__, __FILE__, __LINE__, pCreateInfo->flags); @@ -1733,7 +1821,7 @@ anv_image_get_memory_requirements(struct anv_device *device, if (image->disjoint) { assert(util_bitcount(aspects) == 1); assert(aspects & image->vk.aspects); - binding = image_aspect_to_binding(image, aspects); + binding = anv_image_aspect_to_binding(image, aspects); } else { assert(aspects == image->vk.aspects); binding = &image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN]; @@ -1784,10 +1872,11 @@ void anv_GetDeviceImageMemoryRequirementsKHR( ANV_FROM_HANDLE(anv_device, device, _device); struct anv_image image = { 0 }; - if (INTEL_DEBUG(DEBUG_SPARSE) && (pInfo->pCreateInfo->flags & - (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | - VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | - VK_IMAGE_CREATE_SPARSE_ALIASED_BIT))) + if (!device->physical->has_sparse && + INTEL_DEBUG(DEBUG_SPARSE) && + pInfo->pCreateInfo->flags & (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | + VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | + VK_IMAGE_CREATE_SPARSE_ALIASED_BIT)) fprintf(stderr, "=== %s %s:%d flags:0x%08x\n", __func__, __FILE__, __LINE__, pInfo->pCreateInfo->flags); @@ -1800,39 +1889,158 @@ void anv_GetDeviceImageMemoryRequirementsKHR( anv_image_get_memory_requirements(device, &image, aspects, pMemoryRequirements); + anv_image_finish(&image); } -void anv_GetImageSparseMemoryRequirements( - VkDevice device, - VkImage image, - uint32_t* pSparseMemoryRequirementCount, - VkSparseImageMemoryRequirements* pSparseMemoryRequirements) +static void +anv_image_get_sparse_memory_requirements( + struct anv_device *device, + struct anv_image *image, + VkImageAspectFlags aspects, + uint32_t *pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements) { - if (INTEL_DEBUG(DEBUG_SPARSE)) - fprintf(stderr, "=== [%s:%d] [%s]\n", __FILE__, __LINE__, __func__); - *pSparseMemoryRequirementCount = 0; + VK_OUTARRAY_MAKE_TYPED(VkSparseImageMemoryRequirements2, reqs, + pSparseMemoryRequirements, + pSparseMemoryRequirementCount); + + /* From the spec: + * "The sparse image must have been created using the + * VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT flag to retrieve valid sparse + * image memory requirements." + */ + if (!(image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT)) + return; + + VkSparseImageMemoryRequirements ds_mem_reqs = {}; + VkSparseImageMemoryRequirements2 *ds_reqs_ptr = NULL; + + u_foreach_bit(b, aspects) { + VkImageAspectFlagBits aspect = 1 << b; + const uint32_t plane = anv_image_aspect_to_plane(image, aspect); + struct isl_surf *surf = &image->planes[plane].primary_surface.isl; + + VkSparseImageFormatProperties format_props = + anv_sparse_calc_image_format_properties(device->physical, aspect, + image->vk.image_type, surf); + + uint32_t miptail_first_lod; + VkDeviceSize miptail_size, miptail_offset, miptail_stride; + anv_sparse_calc_miptail_properties(device, image, aspect, + &miptail_first_lod, &miptail_size, + &miptail_offset, &miptail_stride); + + VkSparseImageMemoryRequirements mem_reqs = { + .formatProperties = format_props, + .imageMipTailFirstLod = miptail_first_lod, + .imageMipTailSize = miptail_size, + .imageMipTailOffset = miptail_offset, + .imageMipTailStride = miptail_stride, + }; + + /* If both depth and stencil are the same, unify them if possible. */ + if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)) { + if (!ds_reqs_ptr) { + ds_mem_reqs = mem_reqs; + } else if (ds_mem_reqs.formatProperties.imageGranularity.width == + mem_reqs.formatProperties.imageGranularity.width && + ds_mem_reqs.formatProperties.imageGranularity.height == + mem_reqs.formatProperties.imageGranularity.height && + ds_mem_reqs.formatProperties.imageGranularity.depth == + mem_reqs.formatProperties.imageGranularity.depth && + ds_mem_reqs.imageMipTailFirstLod == + mem_reqs.imageMipTailFirstLod && + ds_mem_reqs.imageMipTailSize == + mem_reqs.imageMipTailSize && + ds_mem_reqs.imageMipTailOffset == + mem_reqs.imageMipTailOffset && + ds_mem_reqs.imageMipTailStride == + mem_reqs.imageMipTailStride) { + ds_reqs_ptr->memoryRequirements.formatProperties.aspectMask |= + aspect; + continue; + } + } + + vk_outarray_append_typed(VkSparseImageMemoryRequirements2, &reqs, r) { + r->memoryRequirements = mem_reqs; + if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)) + ds_reqs_ptr = r; + } + } } void anv_GetImageSparseMemoryRequirements2( - VkDevice device, + VkDevice _device, const VkImageSparseMemoryRequirementsInfo2* pInfo, uint32_t* pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements2* pSparseMemoryRequirements) { - if (INTEL_DEBUG(DEBUG_SPARSE)) - fprintf(stderr, "=== [%s:%d] [%s]\n", __FILE__, __LINE__, __func__); - *pSparseMemoryRequirementCount = 0; + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_image, image, pInfo->image); + + if (!anv_sparse_residency_is_enabled(device)) { + if (!device->physical->has_sparse && INTEL_DEBUG(DEBUG_SPARSE)) + fprintf(stderr, "=== [%s:%d] [%s]\n", __FILE__, __LINE__, __func__); + + *pSparseMemoryRequirementCount = 0; + return; + } + + anv_image_get_sparse_memory_requirements(device, image, image->vk.aspects, + pSparseMemoryRequirementCount, + pSparseMemoryRequirements); } -void anv_GetDeviceImageSparseMemoryRequirementsKHR( - VkDevice device, - const VkDeviceImageMemoryRequirements* pInfo, +void anv_GetDeviceImageSparseMemoryRequirements( + VkDevice _device, + const VkDeviceImageMemoryRequirements* pInfo, uint32_t* pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements2* pSparseMemoryRequirements) { - if (INTEL_DEBUG(DEBUG_SPARSE)) - fprintf(stderr, "=== [%s:%d] [%s]\n", __FILE__, __LINE__, __func__); - *pSparseMemoryRequirementCount = 0; + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_image image = { 0 }; + + if (!anv_sparse_residency_is_enabled(device)) { + if (!device->physical->has_sparse && INTEL_DEBUG(DEBUG_SPARSE)) + fprintf(stderr, "=== [%s:%d] [%s]\n", __FILE__, __LINE__, __func__); + + *pSparseMemoryRequirementCount = 0; + return; + } + + /* This function is similar to anv_GetDeviceImageMemoryRequirementsKHR, in + * which it actually creates an image, gets the properties and then + * destroys the image. + * + * We could one day refactor things to allow us to gather the properties + * without having to actually create the image, maybe by reworking ISL to + * separate creation from parameter computing. + */ + + ASSERTED VkResult result = + anv_image_init_from_create_info(device, &image, pInfo->pCreateInfo, + true /* no_private_binding_alloc */); + assert(result == VK_SUCCESS); + + /* The spec says: + * "planeAspect is a VkImageAspectFlagBits value specifying the aspect + * corresponding to the image plane to query. This parameter is ignored + * unless pCreateInfo::tiling is VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, + * or pCreateInfo::flags has VK_IMAGE_CREATE_DISJOINT_BIT set." + */ + VkImageAspectFlags aspects = + (pInfo->pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT) || + (pInfo->pCreateInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) + ? pInfo->planeAspect : image.vk.aspects; + + anv_image_get_sparse_memory_requirements(device, &image, aspects, + pSparseMemoryRequirementCount, + pSparseMemoryRequirements); + + anv_image_finish(&image); } VkResult anv_BindImageMemory2( @@ -1848,6 +2056,8 @@ VkResult anv_BindImageMemory2( ANV_FROM_HANDLE(anv_image, image, bind_info->image); bool did_bind = false; + assert(!anv_image_is_sparse(image)); + /* Resolve will alter the image's aspects, do this first. */ if (mem && mem->vk.ahardware_buffer) resolve_ahw_image(device, image, mem); @@ -1872,7 +2082,7 @@ VkResult anv_BindImageMemory2( break; struct anv_image_binding *binding = - image_aspect_to_binding(image, plane_info->planeAspect); + anv_image_aspect_to_binding(image, plane_info->planeAspect); binding->address = (struct anv_address) { .bo = mem->bo, diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 831a1ab4156..bb2607feb5e 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -184,6 +184,7 @@ anv_shader_stage_to_nir(struct anv_device *device, .ray_tracing_position_fetch = rt_enabled, .shader_clock = true, .shader_viewport_index_layer = true, + .sparse_residency = pdevice->has_sparse, .stencil_export = true, .storage_8bit = true, .storage_16bit = true, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index b825ce4bcad..85137521a2d 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -916,6 +916,11 @@ struct anv_physical_device { /** Whether the i915 driver has the ability to create VM objects */ bool has_vm_control; + /** True if we have the means to do sparse binding (e.g., a Kernel driver + * a vm_bind ioctl). + */ + bool has_sparse; + /**/ bool uses_ex_bso; @@ -1648,6 +1653,14 @@ struct anv_device { * Command pool for companion RCS command buffer. */ VkCommandPool companion_rcs_cmd_pool; + + /* This is true if the user ever bound a sparse resource to memory. This + * is used for a workaround that makes every memoryBarrier flush more + * things than it should. Many applications request for the sparse + * featuers to be enabled but don't use them, and some create sparse + * resources but never use them. + */ + bool using_sparse; }; static inline uint32_t @@ -2576,13 +2589,86 @@ const struct anv_descriptor_set_layout * anv_pipeline_layout_get_push_set(const struct anv_pipeline_sets_layout *layout, uint8_t *desc_idx); +struct anv_sparse_binding_data { + uint64_t address; + uint64_t size; + + /* This is kept only because it's given to us by vma_alloc() and need to be + * passed back to vma_free(), we have no other particular use for it + */ + struct util_vma_heap *vma_heap; +}; + +#define ANV_SPARSE_BLOCK_SIZE (64 * 1024) + +static inline bool +anv_sparse_binding_is_enabled(struct anv_device *device) +{ + return device->vk.enabled_features.sparseBinding; +} + +static inline bool +anv_sparse_residency_is_enabled(struct anv_device *device) +{ + return device->vk.enabled_features.sparseResidencyBuffer || + device->vk.enabled_features.sparseResidencyImage2D || + device->vk.enabled_features.sparseResidencyImage3D || + device->vk.enabled_features.sparseResidency2Samples || + device->vk.enabled_features.sparseResidency4Samples || + device->vk.enabled_features.sparseResidency8Samples || + device->vk.enabled_features.sparseResidency16Samples || + device->vk.enabled_features.sparseResidencyAliased; +} + +VkResult anv_init_sparse_bindings(struct anv_device *device, + uint64_t size, + struct anv_sparse_binding_data *sparse, + enum anv_bo_alloc_flags alloc_flags, + uint64_t client_address, + struct anv_address *out_address); +VkResult anv_free_sparse_bindings(struct anv_device *device, + struct anv_sparse_binding_data *sparse); +VkResult anv_sparse_bind_resource_memory(struct anv_device *device, + struct anv_sparse_binding_data *data, + const VkSparseMemoryBind *bind_); +VkResult anv_sparse_bind_image_memory(struct anv_queue *queue, + struct anv_image *image, + const VkSparseImageMemoryBind *bind); + +VkSparseImageFormatProperties +anv_sparse_calc_image_format_properties(struct anv_physical_device *pdevice, + VkImageAspectFlags aspect, + VkImageType vk_image_type, + struct isl_surf *surf); +void anv_sparse_calc_miptail_properties(struct anv_device *device, + struct anv_image *image, + VkImageAspectFlags vk_aspect, + uint32_t *imageMipTailFirstLod, + VkDeviceSize *imageMipTailSize, + VkDeviceSize *imageMipTailOffset, + VkDeviceSize *imageMipTailStride); +VkResult anv_sparse_image_check_support(struct anv_physical_device *pdevice, + VkImageCreateFlags flags, + VkImageTiling tiling, + VkSampleCountFlagBits samples, + VkImageType type, + VkFormat format); + struct anv_buffer { struct vk_buffer vk; /* Set when bound */ struct anv_address address; + + struct anv_sparse_binding_data sparse_data; }; +static inline bool +anv_buffer_is_sparse(struct anv_buffer *buffer) +{ + return buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT; +} + enum anv_cmd_dirty_bits { ANV_CMD_DIRTY_PIPELINE = 1 << 0, ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 1, @@ -4472,6 +4558,7 @@ struct anv_image { struct anv_image_binding { struct anv_image_memory_range memory_range; struct anv_address address; + struct anv_sparse_binding_data sparse_data; } bindings[ANV_IMAGE_MEMORY_BINDING_END]; /** @@ -4525,6 +4612,12 @@ struct anv_image { struct list_head link; }; +static inline bool +anv_image_is_sparse(struct anv_image *image) +{ + return image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT; +} + static inline bool anv_image_is_externally_shared(const struct anv_image *image) { @@ -4748,6 +4841,10 @@ anv_cmd_buffer_load_clear_color_from_image(struct anv_cmd_buffer *cmd_buffer, struct anv_state state, const struct anv_image *image); +struct anv_image_binding * +anv_image_aspect_to_binding(struct anv_image *image, + VkImageAspectFlags aspect); + void anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, @@ -4809,6 +4906,12 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, enum isl_aux_op ccs_op, union isl_color_value *clear_value, bool predicate); +isl_surf_usage_flags_t +anv_image_choose_isl_surf_usage(VkImageCreateFlags vk_create_flags, + VkImageUsageFlags vk_usage, + isl_surf_usage_flags_t isl_extra_usage, + VkImageAspectFlagBits aspect); + void anv_cmd_buffer_fill_area(struct anv_cmd_buffer *cmd_buffer, struct anv_address address, diff --git a/src/intel/vulkan/anv_sparse.c b/src/intel/vulkan/anv_sparse.c new file mode 100644 index 00000000000..6a39557b119 --- /dev/null +++ b/src/intel/vulkan/anv_sparse.c @@ -0,0 +1,670 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +/* Sparse binding handling. + * + * There is one main structure passed around all over this file: + * + * - struct anv_sparse_binding_data: every resource (VkBuffer or VkImage) has + * a pointer to an instance of this structure. It contains the virtual + * memory address (VMA) used by the binding operations (which is different + * from the VMA used by the anv_bo it's bound to) and the VMA range size. We + * do not keep record of our our list of bindings (which ranges were bound + * to which buffers). + */ + +static VkOffset3D +vk_offset3d_px_to_el(const VkOffset3D offset_px, + const struct isl_format_layout *layout) +{ + return (VkOffset3D) { + .x = offset_px.x / layout->bw, + .y = offset_px.y / layout->bh, + .z = offset_px.z / layout->bd, + }; +} + +static VkOffset3D +vk_offset3d_el_to_px(const VkOffset3D offset_el, + const struct isl_format_layout *layout) +{ + return (VkOffset3D) { + .x = offset_el.x * layout->bw, + .y = offset_el.y * layout->bh, + .z = offset_el.z * layout->bd, + }; +} + +static VkExtent3D +vk_extent3d_px_to_el(const VkExtent3D extent_px, + const struct isl_format_layout *layout) +{ + return (VkExtent3D) { + .width = extent_px.width / layout->bw, + .height = extent_px.height / layout->bh, + .depth = extent_px.depth / layout->bd, + }; +} + +static VkExtent3D +vk_extent3d_el_to_px(const VkExtent3D extent_el, + const struct isl_format_layout *layout) +{ + return (VkExtent3D) { + .width = extent_el.width * layout->bw, + .height = extent_el.height * layout->bh, + .depth = extent_el.depth * layout->bd, + }; +} + +static bool +isl_tiling_supports_standard_block_shapes(enum isl_tiling tiling) +{ + return tiling == ISL_TILING_64 || + tiling == ISL_TILING_ICL_Ys || + tiling == ISL_TILING_SKL_Ys; +} + +static VkExtent3D +anv_sparse_get_standard_image_block_shape(enum isl_format format, + VkImageType image_type, + uint16_t texel_size) +{ + const struct isl_format_layout *layout = isl_format_get_layout(format); + VkExtent3D block_shape = { .width = 0, .height = 0, .depth = 0 }; + + switch (image_type) { + case VK_IMAGE_TYPE_1D: + /* 1D images don't have a standard block format. */ + assert(false); + break; + case VK_IMAGE_TYPE_2D: + switch (texel_size) { + case 8: + block_shape = (VkExtent3D) { .width = 256, .height = 256, .depth = 1 }; + break; + case 16: + block_shape = (VkExtent3D) { .width = 256, .height = 128, .depth = 1 }; + break; + case 32: + block_shape = (VkExtent3D) { .width = 128, .height = 128, .depth = 1 }; + break; + case 64: + block_shape = (VkExtent3D) { .width = 128, .height = 64, .depth = 1 }; + break; + case 128: + block_shape = (VkExtent3D) { .width = 64, .height = 64, .depth = 1 }; + break; + default: + fprintf(stderr, "unexpected texel_size %d\n", texel_size); + assert(false); + } + break; + case VK_IMAGE_TYPE_3D: + switch (texel_size) { + case 8: + block_shape = (VkExtent3D) { .width = 64, .height = 32, .depth = 32 }; + break; + case 16: + block_shape = (VkExtent3D) { .width = 32, .height = 32, .depth = 32 }; + break; + case 32: + block_shape = (VkExtent3D) { .width = 32, .height = 32, .depth = 16 }; + break; + case 64: + block_shape = (VkExtent3D) { .width = 32, .height = 16, .depth = 16 }; + break; + case 128: + block_shape = (VkExtent3D) { .width = 16, .height = 16, .depth = 16 }; + break; + default: + fprintf(stderr, "unexpected texel_size %d\n", texel_size); + assert(false); + } + break; + default: + fprintf(stderr, "unexpected image_type %d\n", image_type); + assert(false); + } + + return vk_extent3d_el_to_px(block_shape, layout); +} + +VkResult +anv_init_sparse_bindings(struct anv_device *device, + uint64_t size_, + struct anv_sparse_binding_data *sparse, + enum anv_bo_alloc_flags alloc_flags, + uint64_t client_address, + struct anv_address *out_address) +{ + uint64_t size = align64(size_, ANV_SPARSE_BLOCK_SIZE); + + sparse->address = anv_vma_alloc(device, size, ANV_SPARSE_BLOCK_SIZE, + alloc_flags, + intel_48b_address(client_address), + &sparse->vma_heap); + sparse->size = size; + + out_address->bo = NULL; + out_address->offset = sparse->address; + + struct anv_vm_bind bind = { + .bo = NULL, /* That's a NULL binding. */ + .address = sparse->address, + .bo_offset = 0, + .size = size, + .op = ANV_VM_BIND, + }; + int rc = device->kmd_backend->vm_bind(device, 1, &bind); + if (rc) { + anv_vma_free(device, sparse->vma_heap, sparse->address, sparse->size); + return vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY, + "failed to bind sparse buffer"); + } + + return VK_SUCCESS; +} + +VkResult +anv_free_sparse_bindings(struct anv_device *device, + struct anv_sparse_binding_data *sparse) +{ + if (!sparse->address) + return VK_SUCCESS; + + struct anv_vm_bind unbind = { + .bo = 0, + .address = sparse->address, + .bo_offset = 0, + .size = sparse->size, + .op = ANV_VM_UNBIND, + }; + int ret = device->kmd_backend->vm_bind(device, 1, &unbind); + if (ret) + return vk_errorf(device, VK_ERROR_UNKNOWN, + "failed to unbind vm for sparse resource\n"); + + anv_vma_free(device, sparse->vma_heap, sparse->address, sparse->size); + + return VK_SUCCESS; +} + +static VkExtent3D +anv_sparse_calc_block_shape(struct anv_physical_device *pdevice, + struct isl_surf *surf) +{ + const struct isl_format_layout *layout = + isl_format_get_layout(surf->format); + const int Bpb = layout->bpb / 8; + + struct isl_tile_info tile_info; + isl_surf_get_tile_info(surf, &tile_info); + + VkExtent3D block_shape_el = { + .width = tile_info.logical_extent_el.width, + .height = tile_info.logical_extent_el.height, + .depth = tile_info.logical_extent_el.depth, + }; + VkExtent3D block_shape_px = vk_extent3d_el_to_px(block_shape_el, layout); + + if (surf->tiling == ISL_TILING_LINEAR) { + uint32_t elements_per_row = surf->row_pitch_B / + (block_shape_el.width * Bpb); + uint32_t rows_per_tile = ANV_SPARSE_BLOCK_SIZE / + (elements_per_row * Bpb); + assert(rows_per_tile * elements_per_row * Bpb == ANV_SPARSE_BLOCK_SIZE); + + block_shape_px = (VkExtent3D) { + .width = elements_per_row * layout->bw, + .height = rows_per_tile * layout->bh, + .depth = layout->bd, + }; + } + + return block_shape_px; +} + +VkSparseImageFormatProperties +anv_sparse_calc_image_format_properties(struct anv_physical_device *pdevice, + VkImageAspectFlags aspect, + VkImageType vk_image_type, + struct isl_surf *surf) +{ + const struct isl_format_layout *isl_layout = + isl_format_get_layout(surf->format); + const int bpb = isl_layout->bpb; + assert(bpb == 8 || bpb == 16 || bpb == 32 || bpb == 64 ||bpb == 128); + const int Bpb = bpb / 8; + + VkExtent3D granularity = anv_sparse_calc_block_shape(pdevice, surf); + bool is_standard = false; + bool is_known_nonstandard_format = false; + + if (vk_image_type != VK_IMAGE_TYPE_1D) { + VkExtent3D std_shape = + anv_sparse_get_standard_image_block_shape(surf->format, vk_image_type, + bpb); + /* YUV formats don't work with Tile64, which is required if we want to + * claim standard block shapes. The spec requires us to support all + * non-compressed color formats that non-sparse supports, so we can't + * just say YUV formats are not supported by Sparse. So we end + * supporting this format and anv_sparse_calc_miptail_properties() will + * say that everything is part of the miptail. + * + * For more details on the hardware restriction, please check + * isl_gfx125_filter_tiling(). + */ + if (pdevice->info.verx10 >= 125 && isl_format_is_yuv(surf->format)) + is_known_nonstandard_format = true; + + is_standard = granularity.width == std_shape.width && + granularity.height == std_shape.height && + granularity.depth == std_shape.depth; + + assert(is_standard || is_known_nonstandard_format); + } + + uint32_t block_size = granularity.width * granularity.height * + granularity.depth * Bpb; + bool wrong_block_size = block_size != ANV_SPARSE_BLOCK_SIZE; + + return (VkSparseImageFormatProperties) { + .aspectMask = aspect, + .imageGranularity = granularity, + .flags = ((is_standard || is_known_nonstandard_format) ? 0 : + VK_SPARSE_IMAGE_FORMAT_NONSTANDARD_BLOCK_SIZE_BIT) | + (wrong_block_size ? VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT : + 0), + }; +} + +/* The miptail is supposed to be this region where the tiniest mip levels + * are squished together in one single page, which should save us some memory. + * It's a hardware feature which our hardware supports on certain tiling + * formats - the ones we always want to use for sparse resources. + * + * For sparse, the main feature of the miptail is that it only supports opaque + * binds, so you either bind the whole miptail or you bind nothing at all, + * there are no subresources inside it to separately bind. While the idea is + * that the miptail as reported by sparse should match what our hardware does, + * in practice we can say in our sparse functions that certain mip levels are + * part of the miptail while from the point of view of our hardwared they + * aren't. + * + * If we detect we're using the sparse-friendly tiling formats and ISL + * supports miptails for them, we can just trust the miptail level set by ISL + * and things can proceed as The Spec intended. + * + * However, if that's not the case, we have to go on a best-effort policy. We + * could simply declare that every mip level is part of the miptail and be + * done, but since that kinda defeats the purpose of Sparse we try to find + * what level we really should be reporting as the first miptail level based + * on the alignments of the surface subresources. + */ +void +anv_sparse_calc_miptail_properties(struct anv_device *device, + struct anv_image *image, + VkImageAspectFlags vk_aspect, + uint32_t *imageMipTailFirstLod, + VkDeviceSize *imageMipTailSize, + VkDeviceSize *imageMipTailOffset, + VkDeviceSize *imageMipTailStride) +{ + assert(__builtin_popcount(vk_aspect) == 1); + const uint32_t plane = anv_image_aspect_to_plane(image, vk_aspect); + struct isl_surf *surf = &image->planes[plane].primary_surface.isl; + uint64_t binding_plane_offset = + image->planes[plane].primary_surface.memory_range.offset; + const struct isl_format_layout *isl_layout = + isl_format_get_layout(surf->format); + const int Bpb = isl_layout->bpb / 8; + struct isl_tile_info tile_info; + isl_surf_get_tile_info(surf, &tile_info); + uint32_t tile_size = tile_info.logical_extent_el.width * Bpb * + tile_info.logical_extent_el.height * + tile_info.logical_extent_el.depth; + + uint64_t layer1_offset; + uint32_t x_off, y_off; + + /* Treat the whole thing as a single miptail. We should have already + * reported this image as VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT. + * + * In theory we could try to make ISL massage the alignments so that we + * could at least claim mip level 0 to be not part of the miptail, but + * that could end up wasting a lot of memory, so it's better to do + * nothing and focus our efforts into making things use the appropriate + * tiling formats that give us the standard block shapes. + */ + if (tile_size != ANV_SPARSE_BLOCK_SIZE) + goto out_everything_is_miptail; + + assert(surf->tiling != ISL_TILING_LINEAR); + + if (image->vk.array_layers == 1) { + layer1_offset = surf->size_B; + } else { + isl_surf_get_image_offset_B_tile_sa(surf, 0, 1, 0, &layer1_offset, + &x_off, &y_off); + if (x_off || y_off) + goto out_everything_is_miptail; + } + assert(layer1_offset % tile_size == 0); + + /* We could try to do better here, but there's not really any point since + * we should be supporting the appropriate tiling formats everywhere. + */ + if (!isl_tiling_supports_standard_block_shapes(surf->tiling)) + goto out_everything_is_miptail; + + int miptail_first_level = surf->miptail_start_level; + if (miptail_first_level >= image->vk.mip_levels) + goto out_no_miptail; + + uint64_t miptail_offset = 0; + isl_surf_get_image_offset_B_tile_sa(surf, miptail_first_level, 0, 0, + &miptail_offset, + &x_off, &y_off); + assert(x_off == 0 && y_off == 0); + assert(miptail_offset % tile_size == 0); + + *imageMipTailFirstLod = miptail_first_level; + *imageMipTailSize = tile_size; + *imageMipTailOffset = binding_plane_offset + miptail_offset; + *imageMipTailStride = layer1_offset; + return; + +out_no_miptail: + *imageMipTailFirstLod = image->vk.mip_levels; + *imageMipTailSize = 0; + *imageMipTailOffset = 0; + *imageMipTailStride = 0; + return; + +out_everything_is_miptail: + *imageMipTailFirstLod = 0; + *imageMipTailSize = surf->size_B; + *imageMipTailOffset = binding_plane_offset; + *imageMipTailStride = 0; + return; +} + +static struct anv_vm_bind +vk_bind_to_anv_vm_bind(struct anv_sparse_binding_data *sparse, + const struct VkSparseMemoryBind *vk_bind) +{ + struct anv_vm_bind anv_bind = { + .bo = NULL, + .address = sparse->address + vk_bind->resourceOffset, + .bo_offset = 0, + .size = vk_bind->size, + .op = ANV_VM_BIND, + }; + + assert(vk_bind->size); + assert(vk_bind->resourceOffset + vk_bind->size <= sparse->size); + + if (vk_bind->memory != VK_NULL_HANDLE) { + anv_bind.bo = anv_device_memory_from_handle(vk_bind->memory)->bo; + anv_bind.bo_offset = vk_bind->memoryOffset, + assert(vk_bind->memoryOffset + vk_bind->size <= anv_bind.bo->size); + } + + return anv_bind; +} + +VkResult +anv_sparse_bind_resource_memory(struct anv_device *device, + struct anv_sparse_binding_data *sparse, + const VkSparseMemoryBind *vk_bind) +{ + struct anv_vm_bind bind = vk_bind_to_anv_vm_bind(sparse, vk_bind); + + int rc = device->kmd_backend->vm_bind(device, 1, &bind); + if (rc) { + return vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY, + "failed to bind sparse buffer"); + } + + return VK_SUCCESS; +} + +VkResult +anv_sparse_bind_image_memory(struct anv_queue *queue, + struct anv_image *image, + const VkSparseImageMemoryBind *bind) +{ + struct anv_device *device = queue->device; + VkImageAspectFlags aspect = bind->subresource.aspectMask; + uint32_t mip_level = bind->subresource.mipLevel; + uint32_t array_layer = bind->subresource.arrayLayer; + + assert(__builtin_popcount(aspect) == 1); + assert(!(bind->flags & VK_SPARSE_MEMORY_BIND_METADATA_BIT)); + + struct anv_image_binding *img_binding = image->disjoint ? + anv_image_aspect_to_binding(image, aspect) : + &image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN]; + struct anv_sparse_binding_data *sparse_data = &img_binding->sparse_data; + + const uint32_t plane = anv_image_aspect_to_plane(image, aspect); + struct isl_surf *surf = &image->planes[plane].primary_surface.isl; + uint64_t binding_plane_offset = + image->planes[plane].primary_surface.memory_range.offset; + const struct isl_format_layout *layout = + isl_format_get_layout(surf->format); + struct isl_tile_info tile_info; + isl_surf_get_tile_info(surf, &tile_info); + + VkExtent3D block_shape_px = + anv_sparse_calc_block_shape(device->physical, surf); + VkExtent3D block_shape_el = vk_extent3d_px_to_el(block_shape_px, layout); + + /* Both bind->offset and bind->extent are in pixel units. */ + VkOffset3D bind_offset_el = vk_offset3d_px_to_el(bind->offset, layout); + + /* The spec says we only really need to align if for a given coordinate + * offset + extent equals the corresponding dimensions of the image + * subresource, but all the other non-aligned usage is invalid, so just + * align everything. + */ + VkExtent3D bind_extent_px = { + .width = ALIGN_NPOT(bind->extent.width, block_shape_px.width), + .height = ALIGN_NPOT(bind->extent.height, block_shape_px.height), + .depth = ALIGN_NPOT(bind->extent.depth, block_shape_px.depth), + }; + VkExtent3D bind_extent_el = vk_extent3d_px_to_el(bind_extent_px, layout); + + /* A sparse block should correspond to our tile size, so this has to be + * either 4k or 64k depending on the tiling format. */ + const uint64_t block_size_B = block_shape_el.width * (layout->bpb / 8) * + block_shape_el.height * + block_shape_el.depth; + /* How many blocks are necessary to form a whole line on this image? */ + const uint32_t blocks_per_line = surf->row_pitch_B / (layout->bpb / 8) / + block_shape_el.width; + /* The loop below will try to bind a whole line of blocks at a time as + * they're guaranteed to be contiguous, so we calculate how many blocks + * that is and how big is each block to figure the bind size of a whole + * line. + * + * TODO: if we're binding mip_level 0 and bind_extent_el.width is the total + * line, the whole rectangle is contiguous so we could do this with a + * single bind instead of per-line. We should figure out how common this is + * and consider implementing this special-case. + */ + uint64_t line_bind_size_in_blocks = bind_extent_el.width / + block_shape_el.width; + uint64_t line_bind_size = line_bind_size_in_blocks * block_size_B; + assert(line_bind_size_in_blocks != 0); + assert(line_bind_size != 0); + + uint64_t memory_offset = bind->memoryOffset; + for (uint32_t z = bind_offset_el.z; + z < bind_offset_el.z + bind_extent_el.depth; + z += block_shape_el.depth) { + uint64_t subresource_offset_B; + uint32_t subresource_x_offset, subresource_y_offset; + isl_surf_get_image_offset_B_tile_sa(surf, mip_level, array_layer, z, + &subresource_offset_B, + &subresource_x_offset, + &subresource_y_offset); + assert(subresource_x_offset == 0 && subresource_y_offset == 0); + assert(subresource_offset_B % block_size_B == 0); + + for (uint32_t y = bind_offset_el.y; + y < bind_offset_el.y + bind_extent_el.height; + y+= block_shape_el.height) { + uint32_t line_block_offset = y / block_shape_el.height * + blocks_per_line; + uint64_t line_start_B = subresource_offset_B + + line_block_offset * block_size_B; + uint64_t bind_offset_B = line_start_B + + (bind_offset_el.x / block_shape_el.width) * + block_size_B; + + VkSparseMemoryBind opaque_bind = { + .resourceOffset = binding_plane_offset + bind_offset_B, + .size = line_bind_size, + .memory = bind->memory, + .memoryOffset = memory_offset, + .flags = bind->flags, + }; + + memory_offset += line_bind_size; + + assert(line_start_B % block_size_B == 0); + assert(opaque_bind.resourceOffset % block_size_B == 0); + assert(opaque_bind.size % block_size_B == 0); + + struct anv_vm_bind bind = vk_bind_to_anv_vm_bind(sparse_data, + &opaque_bind); + int rc = device->kmd_backend->vm_bind(device, 1, &bind); + if (rc) { + return vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY, + "failed to bind sparse buffer"); + } + } + } + + return VK_SUCCESS; +} + +VkResult +anv_sparse_image_check_support(struct anv_physical_device *pdevice, + VkImageCreateFlags flags, + VkImageTiling tiling, + VkSampleCountFlagBits samples, + VkImageType type, + VkFormat vk_format) +{ + assert(flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT); + + /* The spec says: + * "A sparse image created using VK_IMAGE_CREATE_SPARSE_BINDING_BIT (but + * not VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) supports all formats that + * non-sparse usage supports, and supports both VK_IMAGE_TILING_OPTIMAL + * and VK_IMAGE_TILING_LINEAR tiling." + */ + if (!(flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT)) + return VK_SUCCESS; + + /* From here on, these are the rules: + * "A sparse image created using VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT + * supports all non-compressed color formats with power-of-two element + * size that non-sparse usage supports. Additional formats may also be + * supported and can be queried via + * vkGetPhysicalDeviceSparseImageFormatProperties. + * VK_IMAGE_TILING_LINEAR tiling is not supported." + */ + + /* While the spec itself says linear is not supported (see above), deqp-vk + * tries anyway to create linear sparse images, so we have to check for it. + * This is also said in VUID-VkImageCreateInfo-tiling-04121: + * "If tiling is VK_IMAGE_TILING_LINEAR, flags must not contain + * VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT" + */ + if (tiling == VK_IMAGE_TILING_LINEAR) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + /* TODO: not supported yet. */ + if (samples != VK_SAMPLE_COUNT_1_BIT) + return VK_ERROR_FEATURE_NOT_PRESENT; + + /* While the Vulkan spec allows us to support depth/stencil sparse images + * everywhere, sometimes we're not able to have them with the tiling + * formats that give us the standard block shapes. Having standard block + * shapes is higher priority than supporting depth/stencil sparse images. + * + * Please see ISL's filter_tiling() functions for accurate explanations on + * why depth/stencil images are not always supported with the tiling + * formats we want. But in short: depth/stencil support in our HW is + * limited to 2D and we can't build a 2D view of a 3D image with these + * tiling formats due to the address swizzling being different. + */ + VkImageAspectFlags aspects = vk_format_aspects(vk_format); + if (aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + /* For 125+, isl_gfx125_filter_tiling() claims 3D is not supported. + * For the previous platforms, isl_gfx6_filter_tiling() says only 2D is + * supported. + */ + if (pdevice->info.verx10 >= 125) { + if (type == VK_IMAGE_TYPE_3D) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + } else { + if (type != VK_IMAGE_TYPE_2D) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + } + } + + const struct anv_format *anv_format = anv_get_format(vk_format); + if (!anv_format) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + for (int p = 0; p < anv_format->n_planes; p++) { + enum isl_format isl_format = anv_format->planes[p].isl_format; + + if (isl_format == ISL_FORMAT_UNSUPPORTED) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + const struct isl_format_layout *isl_layout = + isl_format_get_layout(isl_format); + + /* As quoted above, we only need to support the power-of-two formats. + * The problem with the non-power-of-two formats is that we need an + * integer number of pixels to fit into a sparse block, so we'd need the + * sparse block sizes to be, for example, 192k for 24bpp. + * + * TODO: add support for these formats. + */ + if (isl_layout->bpb != 8 && isl_layout->bpb != 16 && + isl_layout->bpb != 32 && isl_layout->bpb != 64 && + isl_layout->bpb != 128) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + } + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 92f59bac005..417d0ed9327 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3845,11 +3845,35 @@ mask_is_shader_write(const VkAccessFlags2 access) VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT)); } +static inline bool +mask_is_write(const VkAccessFlags2 access) +{ + return access & (VK_ACCESS_2_SHADER_WRITE_BIT | + VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_TRANSFER_WRITE_BIT | + VK_ACCESS_2_HOST_WRITE_BIT | + VK_ACCESS_2_MEMORY_WRITE_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT | + VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR | +#ifdef VK_ENABLE_BETA_EXTENSIONS + VK_ACCESS_2_VIDEO_ENCODE_WRITE_BIT_KHR | +#endif + VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | + VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT | + VK_ACCESS_2_COMMAND_PREPROCESS_WRITE_BIT_NV | + VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | + VK_ACCESS_2_MICROMAP_WRITE_BIT_EXT | + VK_ACCESS_2_OPTICAL_FLOW_WRITE_BIT_NV); +} + static void cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, const VkDependencyInfo *dep_info, const char *reason) { + struct anv_device *device = cmd_buffer->device; + /* XXX: Right now, we're really dumb and just flush whatever categories * the app asks for. One of these days we may make this a bit better * but right now that's all the hardware allows for in most areas. @@ -3857,6 +3881,8 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, VkAccessFlags2 src_flags = 0; VkAccessFlags2 dst_flags = 0; + bool apply_sparse_flushes = false; + if (anv_cmd_buffer_is_video_queue(cmd_buffer)) return; @@ -3873,21 +3899,34 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.queries.buffer_write_bits |= ANV_QUERY_COMPUTE_WRITES_PENDING_BITS; } + + /* There's no way of knowing if this memory barrier is related to sparse + * buffers! This is pretty horrible. + */ + if (device->using_sparse && mask_is_write(src_flags)) + apply_sparse_flushes = true; } for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) { - src_flags |= dep_info->pBufferMemoryBarriers[i].srcAccessMask; - dst_flags |= dep_info->pBufferMemoryBarriers[i].dstAccessMask; + const VkBufferMemoryBarrier2 *buf_barrier = + &dep_info->pBufferMemoryBarriers[i]; + ANV_FROM_HANDLE(anv_buffer, buffer, buf_barrier->buffer); + + src_flags |= buf_barrier->srcAccessMask; + dst_flags |= buf_barrier->dstAccessMask; /* Shader writes to buffers that could then be written by a transfer * command (including queries). */ - if (stage_is_shader(dep_info->pBufferMemoryBarriers[i].srcStageMask) && - mask_is_shader_write(dep_info->pBufferMemoryBarriers[i].srcAccessMask) && - stage_is_transfer(dep_info->pBufferMemoryBarriers[i].dstStageMask)) { + if (stage_is_shader(buf_barrier->srcStageMask) && + mask_is_shader_write(buf_barrier->srcAccessMask) && + stage_is_transfer(buf_barrier->dstStageMask)) { cmd_buffer->state.queries.buffer_write_bits |= ANV_QUERY_COMPUTE_WRITES_PENDING_BITS; } + + if (anv_buffer_is_sparse(buffer) && mask_is_write(src_flags)) + apply_sparse_flushes = true; } for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) { @@ -3951,7 +3990,7 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, anv_foreach_image_aspect_bit(aspect_bit, image, aspects) { VkImageAspectFlagBits aspect = 1UL << aspect_bit; if (anv_layout_has_untracked_aux_writes( - cmd_buffer->device->info, + device->info, image, aspect, img_barrier->newLayout, cmd_buffer->queue_family->queueFlags)) { @@ -3963,11 +4002,24 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, } } } + + if (anv_image_is_sparse(image) && mask_is_write(src_flags)) + apply_sparse_flushes = true; } enum anv_pipe_bits bits = - anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) | - anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags); + anv_pipe_flush_bits_for_access_flags(device, src_flags) | + anv_pipe_invalidate_bits_for_access_flags(device, dst_flags); + + /* Our HW implementation of the sparse feature lives in the GAM unit + * (interface between all the GPU caches and external memory). As a result + * writes to NULL bound images & buffers that should be ignored are + * actually still visible in the caches. The only way for us to get correct + * NULL bound regions to return 0s is to evict the caches to force the + * caches to be repopulated with 0s. + */ + if (apply_sparse_flushes) + bits |= ANV_PIPE_FLUSH_BITS; if (dst_flags & VK_ACCESS_INDIRECT_COMMAND_READ_BIT) genX(cmd_buffer_flush_generated_draws)(cmd_buffer); diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index dc47ad582e8..60058c8cd50 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -182,6 +182,7 @@ libanv_files = files( 'anv_pipeline_cache.c', 'anv_private.h', 'anv_queue.c', + 'anv_sparse.c', 'anv_util.c', 'anv_utrace.c', 'anv_va.c',