diff --git a/.pick_status.json b/.pick_status.json index f85ab88faea..eeddcd2de41 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -4584,7 +4584,7 @@ "description": "radeonsi: fix buffer coherency issues on gfx6-8,12 due to missing PFP->ME sync", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "a8abbbb172ea69453ac5bbb6a97c3497eda4ca53", "notes": null diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 76f1837aec5..a0ecd96f1e9 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -996,7 +996,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info if (info->indirect) { /* Indirect buffers use TC L2 on GFX9, but not older hw. */ if (sctx->gfx_level <= GFX8 && si_resource(info->indirect)->TC_L2_dirty) { - sctx->flags |= SI_CONTEXT_WB_L2; + sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME; si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); si_resource(info->indirect)->TC_L2_dirty = false; } diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 95e6966570f..9f2a6e985b8 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -937,6 +937,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs) } gcr_cntl = 0; /* all done */ + /* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */ flags &= ~SI_CONTEXT_PFP_SYNC_ME; } else { /* GFX10 */ @@ -990,6 +991,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs) /* Ignore fields that only modify the behavior of other fields. */ if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) { + /* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */ unsigned dont_sync_pfp = (!(flags & SI_CONTEXT_PFP_SYNC_ME)) << 31; /* Flush caches and wait for the caches to assert idle. diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 85f9d3943fa..26e32fec861 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1464,7 +1464,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE; if (sscreen->info.gfx_level <= GFX8) { sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_L2; - sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WB_L2; + sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME; } if (debug_get_bool_option("RADEON_DUMP_SHADERS", false)) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 5ac40469a6c..8944be91c84 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -2070,7 +2070,7 @@ static void si_draw(struct pipe_context *ctx, } else if (GFX_VERSION <= GFX7 && si_resource(indexbuf)->TC_L2_dirty) { /* GFX8 reads index buffers through TC L2, so it doesn't * need this. */ - sctx->flags |= SI_CONTEXT_WB_L2; + sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME; si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); si_resource(indexbuf)->TC_L2_dirty = false; } @@ -2083,14 +2083,14 @@ static void si_draw(struct pipe_context *ctx, /* Indirect buffers use TC L2 on GFX9, but not older hw. */ if (GFX_VERSION <= GFX8) { if (indirect->buffer && si_resource(indirect->buffer)->TC_L2_dirty) { - sctx->flags |= SI_CONTEXT_WB_L2; + sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME; si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); si_resource(indirect->buffer)->TC_L2_dirty = false; } if (indirect->indirect_draw_count && si_resource(indirect->indirect_draw_count)->TC_L2_dirty) { - sctx->flags |= SI_CONTEXT_WB_L2; + sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME; si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); si_resource(indirect->indirect_draw_count)->TC_L2_dirty = false; }