radeonsi: fix buffer coherency issues on gfx6-8,12 due to missing PFP->ME sync

This fixes random GPU hangs on gfx12 due to incoherent indirect buffer data,
causing random indirect vertex and instance counts, which timeouts if
the random numbers are large.

Fixes: a8abbbb172 - radeonsi: remove r600_pipe_common.h

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30503>
(cherry picked from commit 83b88c54ba)
This commit is contained in:
Marek Olšák 2024-07-30 16:30:14 -04:00 committed by Eric Engestrom
parent d05e39f304
commit 7b7c32b4dc
5 changed files with 8 additions and 6 deletions

View file

@ -644,7 +644,7 @@
"description": "radeonsi: fix buffer coherency issues on gfx6-8,12 due to missing PFP->ME sync",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "a8abbbb172ea69453ac5bbb6a97c3497eda4ca53",
"notes": null

View file

@ -1208,7 +1208,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
/* Indirect buffers use TC L2 on GFX9-GFX11, but not other hw. */
if ((sctx->gfx_level <= GFX8 || sctx->gfx_level == GFX12) &&
si_resource(info->indirect)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WB_L2;
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_resource(info->indirect)->TC_L2_dirty = false;
}

View file

@ -949,6 +949,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
}
gcr_cntl = 0; /* all done */
/* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */
flags &= ~SI_CONTEXT_PFP_SYNC_ME;
} else {
/* GFX10 */
@ -1002,6 +1003,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
/* Ignore fields that only modify the behavior of other fields. */
if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) {
/* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */
unsigned dont_sync_pfp = (!(flags & SI_CONTEXT_PFP_SYNC_ME)) << 31;
/* Flush caches and wait for the caches to assert idle.

View file

@ -1698,7 +1698,7 @@ static void si_render_condition(struct pipe_context *ctx, struct pipe_query *que
/* Settings this in the render cond atom is too late,
* so set it here. */
if (sctx->gfx_level <= GFX8) {
sctx->flags |= SI_CONTEXT_WB_L2;
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
}

View file

@ -2136,7 +2136,7 @@ static void si_draw(struct pipe_context *ctx,
si_resource(indexbuf)->TC_L2_dirty) {
/* GFX8-GFX11 reads index buffers through TC L2, so it doesn't
* need this. */
sctx->flags |= SI_CONTEXT_WB_L2;
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_resource(indexbuf)->TC_L2_dirty = false;
}
@ -2149,14 +2149,14 @@ static void si_draw(struct pipe_context *ctx,
/* Indirect buffers use TC L2 on GFX9-GFX11, but not other hw. */
if (GFX_VERSION <= GFX8 || GFX_VERSION == GFX12) {
if (indirect->buffer && si_resource(indirect->buffer)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WB_L2;
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_resource(indirect->buffer)->TC_L2_dirty = false;
}
if (indirect->indirect_draw_count &&
si_resource(indirect->indirect_draw_count)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WB_L2;
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_resource(indirect->indirect_draw_count)->TC_L2_dirty = false;
}