panvk: Add barrier for interleaved ZS copy cmds

When executing CopyBufferToImage or CopyImage with multiple regions of
both depth and stencil aspects targeting an interleaved depth stencil
image, we must split the regions into one copy-command for each aspect
and add a barrier between them to avoid a write-after-write race.

Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Fixes: 5067921349 ("panvk: Switch to vk_meta")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34384>
This commit is contained in:
Lars-Ivar Hesselberg Simonsen 2025-04-04 15:11:41 +02:00 committed by Marge Bot
parent ef9e7cb3f5
commit 37595775a0

View file

@ -1,5 +1,6 @@
/*
* Copyright © 2021 Collabora Ltd.
* Copyright © 2025 Arm Ltd.
* SPDX-License-Identifier: MIT
*/
@ -267,6 +268,80 @@ panvk_per_arch(CmdCopyBuffer2)(VkCommandBuffer commandBuffer,
panvk_per_arch(cmd_meta_compute_end)(cmdbuf, &save);
}
static bool
lower_copy_buffer_to_image(
VkCommandBuffer commandBuffer,
const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
{
VK_FROM_HANDLE(panvk_image, dst_img, pCopyBufferToImageInfo->dstImage);
const VkImageAspectFlags zs_mask =
(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
/* Only required for interleaved depth stencil that are not multi-planar */
if (vk_format_aspects(dst_img->vk.format) != zs_mask ||
dst_img->plane_count > 1)
return false;
uint32_t num_depth_regions = 0, num_stencil_regions = 0;
for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++) {
const VkImageAspectFlags aspect_mask =
pCopyBufferToImageInfo->pRegions[i].imageSubresource.aspectMask;
assert((aspect_mask & ~zs_mask) == 0);
if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
num_depth_regions++;
else
num_stencil_regions++;
}
/* If we have both depth and stencil writes to an interleaved depth stencil
* image, we must split the writes per aspect with a barrier between them to
* avoid a write-after-write race. */
const bool lowering_needed = (num_depth_regions && num_stencil_regions);
if (!lowering_needed)
return false;
VkCopyBufferToImageInfo2 adjusted_info = *pCopyBufferToImageInfo;
STACK_ARRAY(VkBufferImageCopy2, depth_regions, num_depth_regions);
STACK_ARRAY(VkBufferImageCopy2, stencil_regions, num_stencil_regions);
uint32_t depth_idx = 0, stencil_idx = 0;
for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++) {
const VkImageAspectFlags aspect_mask =
pCopyBufferToImageInfo->pRegions[i].imageSubresource.aspectMask;
if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
depth_regions[depth_idx++] = pCopyBufferToImageInfo->pRegions[i];
else
stencil_regions[stencil_idx++] = pCopyBufferToImageInfo->pRegions[i];
}
adjusted_info.regionCount = num_depth_regions;
adjusted_info.pRegions = depth_regions;
panvk_per_arch(CmdCopyBufferToImage2)(commandBuffer, &adjusted_info);
const VkMemoryBarrier2 mem_barrier = {
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
.srcStageMask = VK_PIPELINE_STAGE_2_COPY_BIT,
.srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
.dstStageMask = VK_PIPELINE_STAGE_2_COPY_BIT,
.dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT};
const VkDependencyInfo dep_info = {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.memoryBarrierCount = 1,
.pMemoryBarriers = &mem_barrier,
};
panvk_per_arch(CmdPipelineBarrier2)(commandBuffer, &dep_info);
adjusted_info.regionCount = num_stencil_regions;
adjusted_info.pRegions = stencil_regions;
panvk_per_arch(CmdCopyBufferToImage2)(commandBuffer, &adjusted_info);
STACK_ARRAY_FINISH(depth_regions);
STACK_ARRAY_FINISH(stencil_regions);
return true;
}
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdCopyBufferToImage2)(
VkCommandBuffer commandBuffer,
@ -277,8 +352,12 @@ panvk_per_arch(CmdCopyBufferToImage2)(
VK_FROM_HANDLE(panvk_image, img, pCopyBufferToImageInfo->dstImage);
struct vk_meta_copy_image_properties img_props =
panvk_meta_copy_get_image_properties(img);
bool use_gfx_pipeline = copy_to_image_use_gfx_pipeline(dev, img);
/* Early out if this operation was lowered. */
if (lower_copy_buffer_to_image(commandBuffer, pCopyBufferToImageInfo))
return;
bool use_gfx_pipeline = copy_to_image_use_gfx_pipeline(dev, img);
if (use_gfx_pipeline) {
struct panvk_cmd_meta_graphics_save_ctx save = {0};
@ -346,6 +425,79 @@ panvk_per_arch(CmdUpdateBuffer)(VkCommandBuffer commandBuffer,
panvk_per_arch(cmd_meta_compute_end)(cmdbuf, &save);
}
static bool
lower_copy_image(VkCommandBuffer commandBuffer,
const VkCopyImageInfo2 *pCopyImageInfo)
{
VK_FROM_HANDLE(panvk_image, dst_img, pCopyImageInfo->dstImage);
const VkImageAspectFlags zs_mask =
(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
/* Only required for interleaved depth stencil that are not multi-planar */
if (vk_format_aspects(dst_img->vk.format) != zs_mask ||
dst_img->plane_count > 1)
return false;
uint32_t num_depth_regions = 0, num_stencil_regions = 0;
for (uint32_t i = 0; i < pCopyImageInfo->regionCount; i++) {
const VkImageAspectFlags aspect_mask =
pCopyImageInfo->pRegions[i].dstSubresource.aspectMask;
assert((aspect_mask & ~zs_mask) == 0);
if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
num_depth_regions++;
else
num_stencil_regions++;
}
/* If we have both depth and stencil writes to an interleaved depth stencil
* image, we must split the writes per aspect with a barrier between them to
* avoid a write-after-write race. */
const bool lowering_needed = (num_depth_regions && num_stencil_regions);
if (!lowering_needed)
return false;
VkCopyImageInfo2 adjusted_info = *pCopyImageInfo;
STACK_ARRAY(VkImageCopy2, depth_regions, num_depth_regions);
STACK_ARRAY(VkImageCopy2, stencil_regions, num_stencil_regions);
uint32_t depth_idx = 0, stencil_idx = 0;
for (uint32_t i = 0; i < pCopyImageInfo->regionCount; i++) {
const VkImageAspectFlags aspect_mask =
pCopyImageInfo->pRegions[i].dstSubresource.aspectMask;
if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
depth_regions[depth_idx++] = pCopyImageInfo->pRegions[i];
else
stencil_regions[stencil_idx++] = pCopyImageInfo->pRegions[i];
}
adjusted_info.regionCount = num_depth_regions;
adjusted_info.pRegions = depth_regions;
panvk_per_arch(CmdCopyImage2)(commandBuffer, &adjusted_info);
const VkMemoryBarrier2 mem_barrier = {
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
.srcStageMask = VK_PIPELINE_STAGE_2_COPY_BIT,
.srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
.dstStageMask = VK_PIPELINE_STAGE_2_COPY_BIT,
.dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT};
const VkDependencyInfo dep_info = {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.memoryBarrierCount = 1,
.pMemoryBarriers = &mem_barrier,
};
panvk_per_arch(CmdPipelineBarrier2)(commandBuffer, &dep_info);
adjusted_info.regionCount = num_stencil_regions;
adjusted_info.pRegions = stencil_regions;
panvk_per_arch(CmdCopyImage2)(commandBuffer, &adjusted_info);
STACK_ARRAY_FINISH(depth_regions);
STACK_ARRAY_FINISH(stencil_regions);
return true;
}
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdCopyImage2)(VkCommandBuffer commandBuffer,
const VkCopyImageInfo2 *pCopyImageInfo)
@ -358,8 +510,12 @@ panvk_per_arch(CmdCopyImage2)(VkCommandBuffer commandBuffer,
panvk_meta_copy_get_image_properties(src_img);
struct vk_meta_copy_image_properties dst_img_props =
panvk_meta_copy_get_image_properties(dst_img);
bool use_gfx_pipeline = copy_to_image_use_gfx_pipeline(dev, dst_img);
/* Early out if this operation was lowered. */
if (lower_copy_image(commandBuffer, pCopyImageInfo))
return;
bool use_gfx_pipeline = copy_to_image_use_gfx_pipeline(dev, dst_img);
if (use_gfx_pipeline) {
struct panvk_cmd_meta_graphics_save_ctx save = {0};