From d9420eed9ecad6a3a1cb37718cf8bff06872041d Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 10 Mar 2026 18:51:55 +0100 Subject: [PATCH 1/2] radv: fix missing L2 cache invalidation with streamout on GFX12 COPY_DATA emitted from the CP isn't coherent with L2, in case the buffer filled size needs to be copied. This fixes rare and random flickering with Mafia 3 Definitive Edition on RDNA4. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/14697 Cc: mesa-stable Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index cae08e1ca85..e1cf5d8a513 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -15333,10 +15333,19 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); - if (pdev->info.gfx_level >= GFX12) + if (pdev->info.gfx_level >= GFX12) { radv_init_streamout_state(cmd_buffer); - else if (!pdev->use_ngg_streamout) + + /* Invalidate L2 in case the buffer filled size needs to be saved because COPY_DATA isn't + * coherent with L2. + */ + if (pdev->info.cp_sdma_ge_use_system_memory_scope) { + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2; + radv_emit_cache_flush(cmd_buffer); + } + } else if (!pdev->use_ngg_streamout) { radv_flush_vgt_streamout(cmd_buffer); + } ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, MAX_SO_BUFFERS * 10); From dfdaf6a27728c4f837d0b637fe2544754dd80ac7 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 10 Mar 2026 18:21:58 +0100 Subject: [PATCH 2/2] radv: rewrite a comment explaining why PFP waits for ME with streamout Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index e1cf5d8a513..2bdbdec81e0 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -15543,8 +15543,8 @@ radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_d radeon_end(); if (gfx_level >= GFX10) { - /* Emitting a COPY_DATA packet should be enough because RADV doesn't support preemption - * (shadow memory) but for unknown reasons, it can lead to GPU hangs on GFX10+. + /* Make sure that PFP waits for ME to avoid a race condition because the data is written by + * STRMOUT_BUFFER_UPDATE in ME, but LOAD_CONTEXT_REG_INDEX is in PFP. */ ac_emit_cp_pfp_sync_me(cs->b, false);