From f262865a90c947cbc66dfc26baf78987d396d408 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 2 Sep 2025 11:33:24 +0300 Subject: [PATCH] anv: fix pipeline barriers with pre-rasterization stages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-rasterization stages need a CS stall if they need to wait on the flushes from a PIPE_CONTROL. Signed-off-by: Lionel Landwerlin Cc: mesa-stable Reviewed-by: Tapani Pälli Part-of: --- src/intel/vulkan/genX_cmd_buffer.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 74cd2a4216a..33325da7197 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -27,6 +27,7 @@ #include "anv_private.h" #include "anv_measure.h" #include "vk_render_pass.h" +#include "vk_synchronization.h" #include "vk_util.h" #include "util/format_srgb.h" @@ -4563,6 +4564,9 @@ cmd_buffer_accumulate_barrier_bits(struct anv_cmd_buffer *cmd_buffer, } } + src_stages = vk_expand_src_stage_flags2(src_stages); + dst_stages = vk_expand_dst_stage_flags2(dst_stages); + enum anv_pipe_bits bits = anv_pipe_flush_bits_for_access_flags(cmd_buffer, src_flags, src_flags3) | anv_pipe_invalidate_bits_for_access_flags(cmd_buffer, dst_flags, dst_flags3); @@ -4571,9 +4575,7 @@ cmd_buffer_accumulate_barrier_bits(struct anv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 pb_stall_stages = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT | - VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | - VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT; if (anv_cmd_buffer_is_render_queue(cmd_buffer)) { /* On a render queue, the following stages can also use a pixel shader. */ @@ -4584,12 +4586,18 @@ cmd_buffer_accumulate_barrier_bits(struct anv_cmd_buffer *cmd_buffer, VK_PIPELINE_STAGE_2_CLEAR_BIT; } VkPipelineStageFlags2 cs_stall_stages = + VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT | + VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR | - VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR | - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR; if (anv_cmd_buffer_is_compute_queue(cmd_buffer)) { /* On a compute queue, the following stages can also use a compute * shader. @@ -4610,13 +4618,18 @@ cmd_buffer_accumulate_barrier_bits(struct anv_cmd_buffer *cmd_buffer, /* Prior to Gfx20, we can restrict pb-stall/cs-stall to some pipeline * modes. Gfx20 doesn't do pipeline switches so we have to assume the worse * case. + * + * To use a PB-stall we need both destination stages to be contained to the + * fragment shader stages. That way the HW can hold the fragment shader + * dispatch until the synchronization operation happened. */ const bool needs_pb_stall = anv_cmd_buffer_is_render_queue(cmd_buffer) && #if GFX_VER < 20 cmd_buffer->state.current_pipeline == _3D && #endif - (src_stages & pb_stall_stages); + (dst_stages & ~pb_stall_stages) == 0 && + (dst_stages & pb_stall_stages); if (needs_pb_stall) { bits |= GFX_VERx10 >= 125 ? ANV_PIPE_PSS_STALL_SYNC_BIT : @@ -4624,10 +4637,7 @@ cmd_buffer_accumulate_barrier_bits(struct anv_cmd_buffer *cmd_buffer, } const bool needs_cs_stall = anv_cmd_buffer_is_render_or_compute_queue(cmd_buffer) && -#if GFX_VER < 20 - cmd_buffer->state.current_pipeline == GPGPU && -#endif - (src_stages & cs_stall_stages); + (dst_stages & cs_stall_stages); if (needs_cs_stall) bits |= ANV_PIPE_CS_STALL_BIT;