diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index b54d32133a9..2652eb195ee 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -4185,7 +4185,8 @@ gfx_write_access(VkAccessFlags2 flags, VkPipelineStageFlags2 stages, } static enum tu_cmd_access_mask -vk2tu_access(VkAccessFlags2 flags, VkPipelineStageFlags2 stages, bool image_only, bool gmem) +vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2, + VkPipelineStageFlags2 stages, bool image_only, bool gmem) { BITMASK_ENUM(tu_cmd_access_mask) mask = 0; @@ -4800,9 +4801,11 @@ tu_subpass_barrier(struct tu_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 dst_stage_vk = sanitize_dst_stage(barrier->dst_stage_mask); BITMASK_ENUM(tu_cmd_access_mask) src_flags = - vk2tu_access(barrier->src_access_mask, src_stage_vk, false, false); + vk2tu_access(barrier->src_access_mask, barrier->src_access_mask2, + src_stage_vk, false, false); BITMASK_ENUM(tu_cmd_access_mask) dst_flags = - vk2tu_access(barrier->dst_access_mask, dst_stage_vk, false, false); + vk2tu_access(barrier->dst_access_mask, barrier->dst_access_mask2, + dst_stage_vk, false, false); if (barrier->incoherent_ccu_color) src_flags |= TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE; @@ -7462,33 +7465,57 @@ tu_barrier(struct tu_cmd_buffer *cmd, const VkDependencyInfo *dep_info = &dep_infos[dep_idx]; for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) { + const VkMemoryBarrier2 *barrier = &dep_info->pMemoryBarriers[i]; VkPipelineStageFlags2 sanitized_src_stage = - sanitize_src_stage(dep_info->pMemoryBarriers[i].srcStageMask); + sanitize_src_stage(barrier->srcStageMask); VkPipelineStageFlags2 sanitized_dst_stage = - sanitize_dst_stage(dep_info->pMemoryBarriers[i].dstStageMask); - src_flags |= vk2tu_access(dep_info->pMemoryBarriers[i].srcAccessMask, + sanitize_dst_stage(barrier->dstStageMask); + + VkAccessFlags3KHR src_access_mask2 = 0, dst_access_mask2 = 0; + const VkMemoryBarrierAccessFlags3KHR *access3 = + vk_find_struct_const(barrier->pNext, MEMORY_BARRIER_ACCESS_FLAGS_3_KHR); + if (access3) { + src_access_mask2 = access3->srcAccessMask3; + dst_access_mask2 = access3->dstAccessMask3; + } + + src_flags |= vk2tu_access(barrier->srcAccessMask, src_access_mask2, sanitized_src_stage, false, gmem); - dst_flags |= vk2tu_access(dep_info->pMemoryBarriers[i].dstAccessMask, + dst_flags |= vk2tu_access(barrier->dstAccessMask, dst_access_mask2, sanitized_dst_stage, false, gmem); srcStage |= sanitized_src_stage; dstStage |= sanitized_dst_stage; } for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) { + const VkBufferMemoryBarrier2 *barrier = + &dep_info->pBufferMemoryBarriers[i]; VkPipelineStageFlags2 sanitized_src_stage = - sanitize_src_stage(dep_info->pBufferMemoryBarriers[i].srcStageMask); + sanitize_src_stage(barrier->srcStageMask); VkPipelineStageFlags2 sanitized_dst_stage = - sanitize_dst_stage(dep_info->pBufferMemoryBarriers[i].dstStageMask); - src_flags |= vk2tu_access(dep_info->pBufferMemoryBarriers[i].srcAccessMask, + sanitize_dst_stage(barrier->dstStageMask); + + VkAccessFlags3KHR src_access_mask2 = 0, dst_access_mask2 = 0; + const VkMemoryBarrierAccessFlags3KHR *access3 = + vk_find_struct_const(barrier->pNext, MEMORY_BARRIER_ACCESS_FLAGS_3_KHR); + if (access3) { + src_access_mask2 = access3->srcAccessMask3; + dst_access_mask2 = access3->dstAccessMask3; + } + + src_flags |= vk2tu_access(barrier->srcAccessMask, src_access_mask2, sanitized_src_stage, false, gmem); - dst_flags |= vk2tu_access(dep_info->pBufferMemoryBarriers[i].dstAccessMask, + dst_flags |= vk2tu_access(barrier->dstAccessMask, dst_access_mask2, sanitized_dst_stage, false, gmem); srcStage |= sanitized_src_stage; dstStage |= sanitized_dst_stage; } for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) { - VkImageLayout old_layout = dep_info->pImageMemoryBarriers[i].oldLayout; + const VkImageMemoryBarrier2 *barrier = + &dep_info->pImageMemoryBarriers[i]; + + VkImageLayout old_layout = barrier->oldLayout; if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED) { /* The underlying memory for this image may have been used earlier * within the same queue submission for a different image, which @@ -7497,7 +7524,7 @@ tu_barrier(struct tu_cmd_buffer *cmd, * to the image. We don't want these entries being flushed later and * overwriting the actual image, so we need to flush the CCU. */ - VK_FROM_HANDLE(tu_image, image, dep_info->pImageMemoryBarriers[i].image); + VK_FROM_HANDLE(tu_image, image, barrier->image); if (vk_format_is_depth_or_stencil(image->vk.format)) { src_flags |= TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE; @@ -7506,12 +7533,21 @@ tu_barrier(struct tu_cmd_buffer *cmd, } } VkPipelineStageFlags2 sanitized_src_stage = - sanitize_src_stage(dep_info->pImageMemoryBarriers[i].srcStageMask); + sanitize_src_stage(barrier->srcStageMask); VkPipelineStageFlags2 sanitized_dst_stage = - sanitize_dst_stage(dep_info->pImageMemoryBarriers[i].dstStageMask); - src_flags |= vk2tu_access(dep_info->pImageMemoryBarriers[i].srcAccessMask, + sanitize_dst_stage(barrier->dstStageMask); + + VkAccessFlags3KHR src_access_mask2 = 0, dst_access_mask2 = 0; + const VkMemoryBarrierAccessFlags3KHR *access3 = + vk_find_struct_const(barrier->pNext, MEMORY_BARRIER_ACCESS_FLAGS_3_KHR); + if (access3) { + src_access_mask2 = access3->srcAccessMask3; + dst_access_mask2 = access3->dstAccessMask3; + } + + src_flags |= vk2tu_access(barrier->srcAccessMask, src_access_mask2, sanitized_src_stage, true, gmem); - dst_flags |= vk2tu_access(dep_info->pImageMemoryBarriers[i].dstAccessMask, + dst_flags |= vk2tu_access(barrier->dstAccessMask, dst_access_mask2, sanitized_dst_stage, true, gmem); srcStage |= sanitized_src_stage; dstStage |= sanitized_dst_stage; diff --git a/src/freedreno/vulkan/tu_pass.cc b/src/freedreno/vulkan/tu_pass.cc index a238217b63f..1c9da7b4229 100644 --- a/src/freedreno/vulkan/tu_pass.cc +++ b/src/freedreno/vulkan/tu_pass.cc @@ -48,6 +48,15 @@ tu_render_pass_add_subpass_dep(struct tu_render_pass *pass, VkAccessFlags2 src_access_mask = barrier ? barrier->srcAccessMask : dep->srcAccessMask; VkPipelineStageFlags2 dst_stage_mask = barrier ? barrier->dstStageMask : dep->dstStageMask; VkAccessFlags2 dst_access_mask = barrier ? barrier->dstAccessMask : dep->dstAccessMask; + VkAccessFlags3KHR src_access_mask2 = 0, dst_access_mask2 = 0; + if (barrier) { + const VkMemoryBarrierAccessFlags3KHR *access3 = + vk_find_struct_const(dep->pNext, MEMORY_BARRIER_ACCESS_FLAGS_3_KHR); + if (access3) { + src_access_mask2 = access3->srcAccessMask3; + dst_access_mask2 = access3->dstAccessMask3; + } + } /* We can conceptually break down the process of rewriting a sysmem * renderpass into a gmem one into two parts: @@ -89,6 +98,8 @@ tu_render_pass_add_subpass_dep(struct tu_render_pass *pass, dst_barrier->dst_stage_mask |= dst_stage_mask; dst_barrier->src_access_mask |= src_access_mask; dst_barrier->dst_access_mask |= dst_access_mask; + dst_barrier->src_access_mask2 |= src_access_mask2; + dst_barrier->dst_access_mask2 |= dst_access_mask2; } /* We currently only care about undefined layouts, because we have to diff --git a/src/freedreno/vulkan/tu_pass.h b/src/freedreno/vulkan/tu_pass.h index a01eea8c150..c3e4d1169c1 100644 --- a/src/freedreno/vulkan/tu_pass.h +++ b/src/freedreno/vulkan/tu_pass.h @@ -26,7 +26,9 @@ struct tu_subpass_barrier { VkPipelineStageFlags2 src_stage_mask; VkPipelineStageFlags2 dst_stage_mask; VkAccessFlags2 src_access_mask; + VkAccessFlags3KHR src_access_mask2; VkAccessFlags2 dst_access_mask; + VkAccessFlags3KHR dst_access_mask2; bool incoherent_ccu_color, incoherent_ccu_depth; };