Merge branch 'radv_fix_xfb_sync_gfx12' into 'main'

radv: fix missing L2 cache invalidation with streamout on GFX12

Closes #14697

See merge request mesa/mesa!40327
This commit is contained in:
Samuel Pitoiset 2026-03-11 08:35:16 +01:00
commit 7c4e0ea485

View file

@ -15333,10 +15333,19 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC
assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
if (pdev->info.gfx_level >= GFX12)
if (pdev->info.gfx_level >= GFX12) {
radv_init_streamout_state(cmd_buffer);
else if (!pdev->use_ngg_streamout)
/* Invalidate L2 in case the buffer filled size needs to be saved because COPY_DATA isn't
* coherent with L2.
*/
if (pdev->info.cp_sdma_ge_use_system_memory_scope) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2;
radv_emit_cache_flush(cmd_buffer);
}
} else if (!pdev->use_ngg_streamout) {
radv_flush_vgt_streamout(cmd_buffer);
}
ASSERTED unsigned cdw_max = radeon_check_space(device->ws, cs->b, MAX_SO_BUFFERS * 10);
@ -15534,8 +15543,8 @@ radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_d
radeon_end();
if (gfx_level >= GFX10) {
/* Emitting a COPY_DATA packet should be enough because RADV doesn't support preemption
* (shadow memory) but for unknown reasons, it can lead to GPU hangs on GFX10+.
/* Make sure that PFP waits for ME to avoid a race condition because the data is written by
* STRMOUT_BUFFER_UPDATE in ME, but LOAD_CONTEXT_REG_INDEX is in PFP.
*/
ac_emit_cp_pfp_sync_me(cs->b, false);