From 0e31cb83ce8025588e6ff78076b20a561ccd0991 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 10 Mar 2026 18:51:55 +0100 Subject: [PATCH] radv: fix missing L2 cache invalidation with streamout on GFX12 COPY_DATA emitted from the CP isn't coherent with L2, in case the buffer filled size needs to be copied. This fixes rare and random flickering with Mafia 3 Definitive Edition on RDNA4. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/14697 Cc: mesa-stable Signed-off-by: Samuel Pitoiset (cherry picked from commit d9420eed9ecad6a3a1cb37718cf8bff06872041d) Part-of: --- .pick_status.json | 2 +- src/amd/vulkan/radv_cmd_buffer.c | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index e448b59b7b8..9d2cf1032c8 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -194,7 +194,7 @@ "description": "radv: fix missing L2 cache invalidation with streamout on GFX12", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 5591b90cca7..f0f1f247a09 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -15170,10 +15170,19 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); - if (pdev->info.gfx_level >= GFX12) + if (pdev->info.gfx_level >= GFX12) { radv_init_streamout_state(cmd_buffer); - else if (!pdev->use_ngg_streamout) + + /* Invalidate L2 in case the buffer filled size needs to be saved because COPY_DATA isn't + * coherent with L2. + */ + if (pdev->info.cp_sdma_ge_use_system_memory_scope) { + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2; + radv_emit_cache_flush(cmd_buffer); + } + } else if (!pdev->use_ngg_streamout) { radv_flush_vgt_streamout(cmd_buffer); + } ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, MAX_SO_BUFFERS * 10);