anv: fix pipeline barriers with pre-rasterization stages
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Pre-rasterization stages need a CS stall if they need to wait on the
flushes from a PIPE_CONTROL.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: mesa-stable
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37132>
This commit is contained in:
Lionel Landwerlin 2025-09-02 11:33:24 +03:00 committed by Marge Bot
parent 8a9baab791
commit f262865a90

View file

@ -27,6 +27,7 @@
#include "anv_private.h"
#include "anv_measure.h"
#include "vk_render_pass.h"
#include "vk_synchronization.h"
#include "vk_util.h"
#include "util/format_srgb.h"
@ -4563,6 +4564,9 @@ cmd_buffer_accumulate_barrier_bits(struct anv_cmd_buffer *cmd_buffer,
}
}
src_stages = vk_expand_src_stage_flags2(src_stages);
dst_stages = vk_expand_dst_stage_flags2(dst_stages);
enum anv_pipe_bits bits =
anv_pipe_flush_bits_for_access_flags(cmd_buffer, src_flags, src_flags3) |
anv_pipe_invalidate_bits_for_access_flags(cmd_buffer, dst_flags, dst_flags3);
@ -4571,9 +4575,7 @@ cmd_buffer_accumulate_barrier_bits(struct anv_cmd_buffer *cmd_buffer,
VkPipelineStageFlags2 pb_stall_stages =
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT |
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT |
VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT |
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT;
if (anv_cmd_buffer_is_render_queue(cmd_buffer)) {
/* On a render queue, the following stages can also use a pixel shader.
*/
@ -4584,12 +4586,18 @@ cmd_buffer_accumulate_barrier_bits(struct anv_cmd_buffer *cmd_buffer,
VK_PIPELINE_STAGE_2_CLEAR_BIT;
}
VkPipelineStageFlags2 cs_stall_stages =
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT |
VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT |
VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT |
VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT |
VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT |
VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT |
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT |
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT |
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR |
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR |
VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR |
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR;
if (anv_cmd_buffer_is_compute_queue(cmd_buffer)) {
/* On a compute queue, the following stages can also use a compute
* shader.
@ -4610,13 +4618,18 @@ cmd_buffer_accumulate_barrier_bits(struct anv_cmd_buffer *cmd_buffer,
/* Prior to Gfx20, we can restrict pb-stall/cs-stall to some pipeline
* modes. Gfx20 doesn't do pipeline switches so we have to assume the worse
* case.
*
* To use a PB-stall we need both destination stages to be contained to the
* fragment shader stages. That way the HW can hold the fragment shader
* dispatch until the synchronization operation happened.
*/
const bool needs_pb_stall =
anv_cmd_buffer_is_render_queue(cmd_buffer) &&
#if GFX_VER < 20
cmd_buffer->state.current_pipeline == _3D &&
#endif
(src_stages & pb_stall_stages);
(dst_stages & ~pb_stall_stages) == 0 &&
(dst_stages & pb_stall_stages);
if (needs_pb_stall) {
bits |= GFX_VERx10 >= 125 ?
ANV_PIPE_PSS_STALL_SYNC_BIT :
@ -4624,10 +4637,7 @@ cmd_buffer_accumulate_barrier_bits(struct anv_cmd_buffer *cmd_buffer,
}
const bool needs_cs_stall =
anv_cmd_buffer_is_render_or_compute_queue(cmd_buffer) &&
#if GFX_VER < 20
cmd_buffer->state.current_pipeline == GPGPU &&
#endif
(src_stages & cs_stall_stages);
(dst_stages & cs_stall_stages);
if (needs_cs_stall)
bits |= ANV_PIPE_CS_STALL_BIT;