From 37595775a0e1d63ad99cb9adb67ee5c35ee8c9ff Mon Sep 17 00:00:00 2001 From: Lars-Ivar Hesselberg Simonsen Date: Fri, 4 Apr 2025 15:11:41 +0200 Subject: [PATCH] panvk: Add barrier for interleaved ZS copy cmds When executing CopyBufferToImage or CopyImage with multiple regions of both depth and stencil aspects targeting an interleaved depth stencil image, we must split the regions into one copy-command for each aspect and add a barrier between them to avoid a write-after-write race. Reviewed-by: Boris Brezillon Fixes: 5067921349a ("panvk: Switch to vk_meta") Part-of: --- src/panfrost/vulkan/panvk_vX_cmd_meta.c | 160 +++++++++++++++++++++++- 1 file changed, 158 insertions(+), 2 deletions(-) diff --git a/src/panfrost/vulkan/panvk_vX_cmd_meta.c b/src/panfrost/vulkan/panvk_vX_cmd_meta.c index 337de1743f9..25bb84d707e 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_meta.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_meta.c @@ -1,5 +1,6 @@ /* * Copyright © 2021 Collabora Ltd. + * Copyright © 2025 Arm Ltd. * SPDX-License-Identifier: MIT */ @@ -267,6 +268,80 @@ panvk_per_arch(CmdCopyBuffer2)(VkCommandBuffer commandBuffer, panvk_per_arch(cmd_meta_compute_end)(cmdbuf, &save); } +static bool +lower_copy_buffer_to_image( + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo) +{ + VK_FROM_HANDLE(panvk_image, dst_img, pCopyBufferToImageInfo->dstImage); + + const VkImageAspectFlags zs_mask = + (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); + /* Only required for interleaved depth stencil that are not multi-planar */ + if (vk_format_aspects(dst_img->vk.format) != zs_mask || + dst_img->plane_count > 1) + return false; + + uint32_t num_depth_regions = 0, num_stencil_regions = 0; + for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++) { + const VkImageAspectFlags aspect_mask = + pCopyBufferToImageInfo->pRegions[i].imageSubresource.aspectMask; + assert((aspect_mask & ~zs_mask) == 0); + if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) + num_depth_regions++; + else + num_stencil_regions++; + } + + /* If we have both depth and stencil writes to an interleaved depth stencil + * image, we must split the writes per aspect with a barrier between them to + * avoid a write-after-write race. */ + const bool lowering_needed = (num_depth_regions && num_stencil_regions); + if (!lowering_needed) + return false; + + VkCopyBufferToImageInfo2 adjusted_info = *pCopyBufferToImageInfo; + STACK_ARRAY(VkBufferImageCopy2, depth_regions, num_depth_regions); + STACK_ARRAY(VkBufferImageCopy2, stencil_regions, num_stencil_regions); + + uint32_t depth_idx = 0, stencil_idx = 0; + for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++) { + const VkImageAspectFlags aspect_mask = + pCopyBufferToImageInfo->pRegions[i].imageSubresource.aspectMask; + + if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) + depth_regions[depth_idx++] = pCopyBufferToImageInfo->pRegions[i]; + else + stencil_regions[stencil_idx++] = pCopyBufferToImageInfo->pRegions[i]; + } + + adjusted_info.regionCount = num_depth_regions; + adjusted_info.pRegions = depth_regions; + panvk_per_arch(CmdCopyBufferToImage2)(commandBuffer, &adjusted_info); + + const VkMemoryBarrier2 mem_barrier = { + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_COPY_BIT, + .srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_COPY_BIT, + .dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT}; + const VkDependencyInfo dep_info = { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .memoryBarrierCount = 1, + .pMemoryBarriers = &mem_barrier, + }; + panvk_per_arch(CmdPipelineBarrier2)(commandBuffer, &dep_info); + + adjusted_info.regionCount = num_stencil_regions; + adjusted_info.pRegions = stencil_regions; + panvk_per_arch(CmdCopyBufferToImage2)(commandBuffer, &adjusted_info); + + STACK_ARRAY_FINISH(depth_regions); + STACK_ARRAY_FINISH(stencil_regions); + + return true; +} + VKAPI_ATTR void VKAPI_CALL panvk_per_arch(CmdCopyBufferToImage2)( VkCommandBuffer commandBuffer, @@ -277,8 +352,12 @@ panvk_per_arch(CmdCopyBufferToImage2)( VK_FROM_HANDLE(panvk_image, img, pCopyBufferToImageInfo->dstImage); struct vk_meta_copy_image_properties img_props = panvk_meta_copy_get_image_properties(img); - bool use_gfx_pipeline = copy_to_image_use_gfx_pipeline(dev, img); + /* Early out if this operation was lowered. */ + if (lower_copy_buffer_to_image(commandBuffer, pCopyBufferToImageInfo)) + return; + + bool use_gfx_pipeline = copy_to_image_use_gfx_pipeline(dev, img); if (use_gfx_pipeline) { struct panvk_cmd_meta_graphics_save_ctx save = {0}; @@ -346,6 +425,79 @@ panvk_per_arch(CmdUpdateBuffer)(VkCommandBuffer commandBuffer, panvk_per_arch(cmd_meta_compute_end)(cmdbuf, &save); } +static bool +lower_copy_image(VkCommandBuffer commandBuffer, + const VkCopyImageInfo2 *pCopyImageInfo) +{ + VK_FROM_HANDLE(panvk_image, dst_img, pCopyImageInfo->dstImage); + + const VkImageAspectFlags zs_mask = + (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); + /* Only required for interleaved depth stencil that are not multi-planar */ + if (vk_format_aspects(dst_img->vk.format) != zs_mask || + dst_img->plane_count > 1) + return false; + + uint32_t num_depth_regions = 0, num_stencil_regions = 0; + for (uint32_t i = 0; i < pCopyImageInfo->regionCount; i++) { + const VkImageAspectFlags aspect_mask = + pCopyImageInfo->pRegions[i].dstSubresource.aspectMask; + assert((aspect_mask & ~zs_mask) == 0); + if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) + num_depth_regions++; + else + num_stencil_regions++; + } + + /* If we have both depth and stencil writes to an interleaved depth stencil + * image, we must split the writes per aspect with a barrier between them to + * avoid a write-after-write race. */ + const bool lowering_needed = (num_depth_regions && num_stencil_regions); + if (!lowering_needed) + return false; + + VkCopyImageInfo2 adjusted_info = *pCopyImageInfo; + STACK_ARRAY(VkImageCopy2, depth_regions, num_depth_regions); + STACK_ARRAY(VkImageCopy2, stencil_regions, num_stencil_regions); + + uint32_t depth_idx = 0, stencil_idx = 0; + for (uint32_t i = 0; i < pCopyImageInfo->regionCount; i++) { + const VkImageAspectFlags aspect_mask = + pCopyImageInfo->pRegions[i].dstSubresource.aspectMask; + + if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) + depth_regions[depth_idx++] = pCopyImageInfo->pRegions[i]; + else + stencil_regions[stencil_idx++] = pCopyImageInfo->pRegions[i]; + } + + adjusted_info.regionCount = num_depth_regions; + adjusted_info.pRegions = depth_regions; + panvk_per_arch(CmdCopyImage2)(commandBuffer, &adjusted_info); + + const VkMemoryBarrier2 mem_barrier = { + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_COPY_BIT, + .srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_COPY_BIT, + .dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT}; + const VkDependencyInfo dep_info = { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .memoryBarrierCount = 1, + .pMemoryBarriers = &mem_barrier, + }; + panvk_per_arch(CmdPipelineBarrier2)(commandBuffer, &dep_info); + + adjusted_info.regionCount = num_stencil_regions; + adjusted_info.pRegions = stencil_regions; + panvk_per_arch(CmdCopyImage2)(commandBuffer, &adjusted_info); + + STACK_ARRAY_FINISH(depth_regions); + STACK_ARRAY_FINISH(stencil_regions); + + return true; +} + VKAPI_ATTR void VKAPI_CALL panvk_per_arch(CmdCopyImage2)(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyImageInfo) @@ -358,8 +510,12 @@ panvk_per_arch(CmdCopyImage2)(VkCommandBuffer commandBuffer, panvk_meta_copy_get_image_properties(src_img); struct vk_meta_copy_image_properties dst_img_props = panvk_meta_copy_get_image_properties(dst_img); - bool use_gfx_pipeline = copy_to_image_use_gfx_pipeline(dev, dst_img); + /* Early out if this operation was lowered. */ + if (lower_copy_image(commandBuffer, pCopyImageInfo)) + return; + + bool use_gfx_pipeline = copy_to_image_use_gfx_pipeline(dev, dst_img); if (use_gfx_pipeline) { struct panvk_cmd_meta_graphics_save_ctx save = {0};