From ed621992668d041b696839a242a34ef5eda4af86 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 15 Jun 2023 14:04:14 +0300 Subject: [PATCH] anv: track buffer writes from shaders for query results writes In the following sequence : - write buffer B with a shader - barrier on buffer from shader-write to transfer - vkCmdCopyQueryPoolResults to buffer B The barrier should take care of ordering things between the shader writes and vkCmdCopyQueryPoolResults. The problem is that vkCmdCopyQueryPoolResults runs on the command streamer and that is not coherent or synchronized in the same way as shaders. This change marks the barrier has potentially containing pending buffer writes for queries so that we can insert the necessary flush for vkCmdCopyQueryPoolResults later. Signed-off-by: Lionel Landwerlin Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9013 Cc: mesa-stable Reviewed-by: Rohan Garg Part-of: (cherry picked from commit cab849562517631db7303f0bdf08d21e60f59ad8) --- .pick_status.json | 2 +- src/intel/vulkan/genX_cmd_buffer.c | 51 ++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/.pick_status.json b/.pick_status.json index 94ddf37120d..c678a263384 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -688,7 +688,7 @@ "description": "anv: track buffer writes from shaders for query results writes", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null }, diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 5adc91a6034..b7fe5627aab 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -4065,6 +4065,37 @@ genX(CmdExecuteCommands)( } } +static inline bool +stage_is_shader(const VkPipelineStageFlags2 stage) +{ + return (stage & (VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT | + VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT | + VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR | + VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT | + VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT)); +} + +static inline bool +stage_is_transfer(const VkPipelineStageFlags2 stage) +{ + return (stage & (VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT | + VK_PIPELINE_STAGE_2_TRANSFER_BIT)); +} + +static inline bool +mask_is_shader_write(const VkAccessFlags2 access) +{ + return (access & (VK_ACCESS_2_SHADER_WRITE_BIT | + VK_ACCESS_2_MEMORY_WRITE_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT)); +} + static void cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, const VkDependencyInfo *dep_info, @@ -4083,11 +4114,31 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) { src_flags |= dep_info->pMemoryBarriers[i].srcAccessMask; dst_flags |= dep_info->pMemoryBarriers[i].dstAccessMask; + + /* Shader writes to buffers that could then be written by a transfer + * command (including queries). + */ + if (stage_is_shader(dep_info->pMemoryBarriers[i].srcStageMask) && + mask_is_shader_write(dep_info->pMemoryBarriers[i].srcAccessMask) && + stage_is_transfer(dep_info->pMemoryBarriers[i].dstStageMask)) { + cmd_buffer->state.pending_query_bits |= + ANV_QUERY_COMPUTE_WRITES_PENDING_BITS; + } } for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) { src_flags |= dep_info->pBufferMemoryBarriers[i].srcAccessMask; dst_flags |= dep_info->pBufferMemoryBarriers[i].dstAccessMask; + + /* Shader writes to buffers that could then be written by a transfer + * command (including queries). + */ + if (stage_is_shader(dep_info->pBufferMemoryBarriers[i].srcStageMask) && + mask_is_shader_write(dep_info->pBufferMemoryBarriers[i].srcAccessMask) && + stage_is_transfer(dep_info->pBufferMemoryBarriers[i].dstStageMask)) { + cmd_buffer->state.pending_query_bits |= + ANV_QUERY_COMPUTE_WRITES_PENDING_BITS; + } } for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) {