radeonsi: fix buffer coherency issues on gfx6-8,12 due to missing PFP->ME sync

This fixes random GPU hangs on gfx12 due to incoherent indirect buffer data,
causing random indirect vertex and instance counts, which timeouts if
the random numbers are large.

Fixes: a8abbbb172 - radeonsi: remove r600_pipe_common.h

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30503>
(cherry picked from commit 83b88c54ba)
This commit is contained in:
Marek Olšák 2024-07-30 16:30:14 -04:00 committed by Eric Engestrom
parent 5499c943cd
commit 53005aead9
5 changed files with 8 additions and 6 deletions

View file

@ -4584,7 +4584,7 @@
"description": "radeonsi: fix buffer coherency issues on gfx6-8,12 due to missing PFP->ME sync",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "a8abbbb172ea69453ac5bbb6a97c3497eda4ca53",
"notes": null

View file

@ -996,7 +996,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
if (info->indirect) {
/* Indirect buffers use TC L2 on GFX9, but not older hw. */
if (sctx->gfx_level <= GFX8 && si_resource(info->indirect)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WB_L2;
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_resource(info->indirect)->TC_L2_dirty = false;
}

View file

@ -937,6 +937,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
}
gcr_cntl = 0; /* all done */
/* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */
flags &= ~SI_CONTEXT_PFP_SYNC_ME;
} else {
/* GFX10 */
@ -990,6 +991,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
/* Ignore fields that only modify the behavior of other fields. */
if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) {
/* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */
unsigned dont_sync_pfp = (!(flags & SI_CONTEXT_PFP_SYNC_ME)) << 31;
/* Flush caches and wait for the caches to assert idle.

View file

@ -1464,7 +1464,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE;
if (sscreen->info.gfx_level <= GFX8) {
sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_L2;
sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WB_L2;
sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
}
if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))

View file

@ -2070,7 +2070,7 @@ static void si_draw(struct pipe_context *ctx,
} else if (GFX_VERSION <= GFX7 && si_resource(indexbuf)->TC_L2_dirty) {
/* GFX8 reads index buffers through TC L2, so it doesn't
* need this. */
sctx->flags |= SI_CONTEXT_WB_L2;
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_resource(indexbuf)->TC_L2_dirty = false;
}
@ -2083,14 +2083,14 @@ static void si_draw(struct pipe_context *ctx,
/* Indirect buffers use TC L2 on GFX9, but not older hw. */
if (GFX_VERSION <= GFX8) {
if (indirect->buffer && si_resource(indirect->buffer)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WB_L2;
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_resource(indirect->buffer)->TC_L2_dirty = false;
}
if (indirect->indirect_draw_count &&
si_resource(indirect->indirect_draw_count)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WB_L2;
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_resource(indirect->indirect_draw_count)->TC_L2_dirty = false;
}