diff --git a/.pick_status.json b/.pick_status.json index d1cdd8bf6c3..5e90dcc4a51 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -644,7 +644,7 @@ "description": "radeonsi: fix buffer coherency issues on gfx6-8,12 due to missing PFP->ME sync", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "a8abbbb172ea69453ac5bbb6a97c3497eda4ca53", "notes": null diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index b1a569d780e..98bfa4ecaa7 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -1208,7 +1208,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info /* Indirect buffers use TC L2 on GFX9-GFX11, but not other hw. */ if ((sctx->gfx_level <= GFX8 || sctx->gfx_level == GFX12) && si_resource(info->indirect)->TC_L2_dirty) { - sctx->flags |= SI_CONTEXT_WB_L2; + sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME; si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); si_resource(info->indirect)->TC_L2_dirty = false; } diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 14f7ba767b0..9b11eaf678e 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -949,6 +949,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs) } gcr_cntl = 0; /* all done */ + /* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */ flags &= ~SI_CONTEXT_PFP_SYNC_ME; } else { /* GFX10 */ @@ -1002,6 +1003,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs) /* Ignore fields that only modify the behavior of other fields. */ if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) { + /* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */ unsigned dont_sync_pfp = (!(flags & SI_CONTEXT_PFP_SYNC_ME)) << 31; /* Flush caches and wait for the caches to assert idle. diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index c54be078cff..18c19b58063 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -1698,7 +1698,7 @@ static void si_render_condition(struct pipe_context *ctx, struct pipe_query *que /* Settings this in the render cond atom is too late, * so set it here. */ if (sctx->gfx_level <= GFX8) { - sctx->flags |= SI_CONTEXT_WB_L2; + sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME; si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 33c8e7769bb..bd2c936306b 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -2136,7 +2136,7 @@ static void si_draw(struct pipe_context *ctx, si_resource(indexbuf)->TC_L2_dirty) { /* GFX8-GFX11 reads index buffers through TC L2, so it doesn't * need this. */ - sctx->flags |= SI_CONTEXT_WB_L2; + sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME; si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); si_resource(indexbuf)->TC_L2_dirty = false; } @@ -2149,14 +2149,14 @@ static void si_draw(struct pipe_context *ctx, /* Indirect buffers use TC L2 on GFX9-GFX11, but not other hw. */ if (GFX_VERSION <= GFX8 || GFX_VERSION == GFX12) { if (indirect->buffer && si_resource(indirect->buffer)->TC_L2_dirty) { - sctx->flags |= SI_CONTEXT_WB_L2; + sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME; si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); si_resource(indirect->buffer)->TC_L2_dirty = false; } if (indirect->indirect_draw_count && si_resource(indirect->indirect_draw_count)->TC_L2_dirty) { - sctx->flags |= SI_CONTEXT_WB_L2; + sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME; si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); si_resource(indirect->indirect_draw_count)->TC_L2_dirty = false; }