diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index cae08e1ca85..2bdbdec81e0 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -15333,10 +15333,19 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); - if (pdev->info.gfx_level >= GFX12) + if (pdev->info.gfx_level >= GFX12) { radv_init_streamout_state(cmd_buffer); - else if (!pdev->use_ngg_streamout) + + /* Invalidate L2 in case the buffer filled size needs to be saved because COPY_DATA isn't + * coherent with L2. + */ + if (pdev->info.cp_sdma_ge_use_system_memory_scope) { + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2; + radv_emit_cache_flush(cmd_buffer); + } + } else if (!pdev->use_ngg_streamout) { radv_flush_vgt_streamout(cmd_buffer); + } ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, MAX_SO_BUFFERS * 10); @@ -15534,8 +15543,8 @@ radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_d radeon_end(); if (gfx_level >= GFX10) { - /* Emitting a COPY_DATA packet should be enough because RADV doesn't support preemption - * (shadow memory) but for unknown reasons, it can lead to GPU hangs on GFX10+. + /* Make sure that PFP waits for ME to avoid a race condition because the data is written by + * STRMOUT_BUFFER_UPDATE in ME, but LOAD_CONTEXT_REG_INDEX is in PFP. */ ac_emit_cp_pfp_sync_me(cs->b, false);