radeonsi: add si_cp_pfp_sync_me

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31168>
This commit is contained in:
Marek Olšák 2024-08-07 00:08:11 -04:00
parent 2d64e8d333
commit fe18c09703
4 changed files with 17 additions and 21 deletions

View file

@ -116,17 +116,15 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
radeon_emit((dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
radeon_emit(command);
}
radeon_end();
/* CP DMA is executed in ME, but index buffers are read by PFP.
* This ensures that ME (CP DMA) is idle before PFP starts fetching
* indices. If we wanted to execute CP DMA in PFP, this packet
* should precede it.
*/
if (sctx->has_graphics && flags & CP_DMA_PFP_SYNC_ME) {
radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(0);
}
radeon_end();
if (sctx->has_graphics && flags & CP_DMA_PFP_SYNC_ME)
si_cp_pfp_sync_me(cs);
}
void si_cp_dma_wait_for_idle(struct si_context *sctx, struct radeon_cmdbuf *cs)

View file

@ -189,11 +189,15 @@ void si_cp_acquire_mem(struct si_context *sctx, struct radeon_cmdbuf *cs, unsign
if (!compute_ib)
sctx->context_roll = true;
if (engine == V_580_CP_PFP) {
radeon_begin(cs);
radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(0);
radeon_end();
}
if (engine == V_580_CP_PFP)
si_cp_pfp_sync_me(cs);
}
}
void si_cp_pfp_sync_me(struct radeon_cmdbuf *cs)
{
radeon_begin(cs);
radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(0);
radeon_end();
}

View file

@ -906,10 +906,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
si_cp_acquire_mem(ctx, cs, gcr_cntl,
flags & SI_CONTEXT_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME);
} else if (flags & SI_CONTEXT_PFP_SYNC_ME) {
radeon_begin_again(cs);
radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(0);
radeon_end();
si_cp_pfp_sync_me(cs);
}
radeon_begin_again(cs);
@ -1140,12 +1137,8 @@ void gfx6_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs)
/* This might be needed even without any cache flags, such as when doing buffer stores
* to an index buffer.
*/
if (flags & SI_CONTEXT_PFP_SYNC_ME) {
radeon_begin(cs);
radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(0);
radeon_end();
}
if (flags & SI_CONTEXT_PFP_SYNC_ME)
si_cp_pfp_sync_me(cs);
}
if (flags & SI_CONTEXT_START_PIPELINE_STATS && sctx->pipeline_stats_enabled != 1) {

View file

@ -1570,6 +1570,7 @@ void si_cp_release_acquire_mem_pws(struct si_context *sctx, struct radeon_cmdbuf
unsigned sqtt_flush_flags);
void si_cp_acquire_mem(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned gcr_cntl,
unsigned engine);
void si_cp_pfp_sync_me(struct radeon_cmdbuf *cs);
/* si_debug.c */
void si_gather_context_rolls(struct si_context *sctx);