diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 309973e3724..8c1d509b31b 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -116,17 +116,15 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui radeon_emit((dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */ radeon_emit(command); } + radeon_end(); /* CP DMA is executed in ME, but index buffers are read by PFP. * This ensures that ME (CP DMA) is idle before PFP starts fetching * indices. If we wanted to execute CP DMA in PFP, this packet * should precede it. */ - if (sctx->has_graphics && flags & CP_DMA_PFP_SYNC_ME) { - radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0)); - radeon_emit(0); - } - radeon_end(); + if (sctx->has_graphics && flags & CP_DMA_PFP_SYNC_ME) + si_cp_pfp_sync_me(cs); } void si_cp_dma_wait_for_idle(struct si_context *sctx, struct radeon_cmdbuf *cs) diff --git a/src/gallium/drivers/radeonsi/si_cp_utils.c b/src/gallium/drivers/radeonsi/si_cp_utils.c index 4c1f65151ed..9b9a6001e88 100644 --- a/src/gallium/drivers/radeonsi/si_cp_utils.c +++ b/src/gallium/drivers/radeonsi/si_cp_utils.c @@ -189,11 +189,15 @@ void si_cp_acquire_mem(struct si_context *sctx, struct radeon_cmdbuf *cs, unsign if (!compute_ib) sctx->context_roll = true; - if (engine == V_580_CP_PFP) { - radeon_begin(cs); - radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0)); - radeon_emit(0); - radeon_end(); - } + if (engine == V_580_CP_PFP) + si_cp_pfp_sync_me(cs); } } + +void si_cp_pfp_sync_me(struct radeon_cmdbuf *cs) +{ + radeon_begin(cs); + radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0)); + radeon_emit(0); + radeon_end(); +} diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 5c558aee0a3..c39a7c626d2 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -906,10 +906,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs) si_cp_acquire_mem(ctx, cs, gcr_cntl, flags & SI_CONTEXT_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME); } else if (flags & SI_CONTEXT_PFP_SYNC_ME) { - radeon_begin_again(cs); - radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0)); - radeon_emit(0); - radeon_end(); + si_cp_pfp_sync_me(cs); } radeon_begin_again(cs); @@ -1140,12 +1137,8 @@ void gfx6_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs) /* This might be needed even without any cache flags, such as when doing buffer stores * to an index buffer. */ - if (flags & SI_CONTEXT_PFP_SYNC_ME) { - radeon_begin(cs); - radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0)); - radeon_emit(0); - radeon_end(); - } + if (flags & SI_CONTEXT_PFP_SYNC_ME) + si_cp_pfp_sync_me(cs); } if (flags & SI_CONTEXT_START_PIPELINE_STATS && sctx->pipeline_stats_enabled != 1) { diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index fdae9da4f64..35a2104142a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1570,6 +1570,7 @@ void si_cp_release_acquire_mem_pws(struct si_context *sctx, struct radeon_cmdbuf unsigned sqtt_flush_flags); void si_cp_acquire_mem(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned gcr_cntl, unsigned engine); +void si_cp_pfp_sync_me(struct radeon_cmdbuf *cs); /* si_debug.c */ void si_gather_context_rolls(struct si_context *sctx);