From 57e2b272d5d5b572a5a9b41be04cc3fffd1e59ad Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Fri, 20 Mar 2026 10:57:37 +0100 Subject: [PATCH] radv: emit PFP_SYNC_ME right after STRMOUT_BUFFER_UPDATE is emitted This is likely less frequent than the draws, and it's only needed when the VA is used. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 21322a80db1..e99e344cf18 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -15496,6 +15496,7 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_streamout_state *so = &cmd_buffer->state.streamout; struct radv_cmd_stream *cs = cmd_buffer->cs; + bool needs_pfp_sync_me = false; assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); @@ -15532,6 +15533,8 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou va += vk_buffer_address(&buffer->vk, counter_buffer_offset); radv_cs_add_buffer(device->ws, cs->b, buffer->bo); + + needs_pfp_sync_me = true; } if (pdev->info.gfx_level >= GFX12) { @@ -15573,6 +15576,15 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou assert(cs->b->cdw <= cdw_max); + if (needs_pfp_sync_me && pdev->info.has_load_ctx_reg_pkt) { + /* Make sure that PFP waits for ME to avoid a race condition because the data is written by + * STRMOUT_BUFFER_UPDATE/COPY_DATA in ME, but LOAD_CONTEXT_REG_INDEX loads the value from + * memory between PFP and ME. + */ + radeon_check_space(device->ws, cs->b, 2); + ac_emit_cp_pfp_sync_me(cs->b, false); + } + radv_set_streamout_enable(cmd_buffer, false); } @@ -15602,12 +15614,6 @@ radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_d radeon_end(); if (pdev->info.has_load_ctx_reg_pkt) { - /* Make sure that PFP waits for ME to avoid a race condition because the data is written by - * STRMOUT_BUFFER_UPDATE in ME, but LOAD_CONTEXT_REG_INDEX loads the value from memory between - * PFP and ME. - */ - ac_emit_cp_pfp_sync_me(cs->b, false); - ac_emit_cp_load_context_reg_index(cs->b, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE, 1, draw_info->strmout_va, false); } else {