radeonsi: add gfx10+ support into si_cp_acquire_mem

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31168>
This commit is contained in:
Marek Olšák 2024-08-06 23:55:22 -04:00
parent a42d9db1b6
commit 862a5f7abe
2 changed files with 20 additions and 21 deletions

View file

@ -142,7 +142,20 @@ void si_cp_acquire_mem(struct si_context *sctx, struct radeon_cmdbuf *cs, unsign
assert(gcr_cntl);
if (sctx->gfx_level >= GFX10) {
/* TODO */
/* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */
unsigned engine_flag = engine == V_580_CP_ME ? BITFIELD_BIT(31) : 0;
/* Flush caches. This doesn't wait for idle. */
radeon_begin(cs);
radeon_emit(PKT3(PKT3_ACQUIRE_MEM, 6, 0));
radeon_emit(engine_flag); /* which engine to use */
radeon_emit(0xffffffff); /* CP_COHER_SIZE */
radeon_emit(0x01ffffff); /* CP_COHER_SIZE_HI */
radeon_emit(0); /* CP_COHER_BASE */
radeon_emit(0); /* CP_COHER_BASE_HI */
radeon_emit(0x0000000A); /* POLL_INTERVAL */
radeon_emit(gcr_cntl); /* GCR_CNTL */
radeon_end();
} else {
bool compute_ib = !sctx->has_graphics;

View file

@ -840,10 +840,9 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
ctx->num_cs_flushes++;
ctx->compute_is_busy = false;
}
radeon_end();
if (cb_db_event) {
radeon_end();
if (ctx->gfx_level >= GFX11) {
si_cp_release_mem_pws(ctx, cs, cb_db_event, gcr_cntl & C_586_GLI_INV);
@ -900,33 +899,20 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
si_sqtt_describe_barrier_end(ctx, &ctx->gfx_cs, flags);
}
}
radeon_begin_again(cs);
}
/* Ignore fields that only modify the behavior of other fields. */
if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) {
/* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */
unsigned dont_sync_pfp = (!(flags & SI_CONTEXT_PFP_SYNC_ME)) << 31;
/* Flush caches and wait for the caches to assert idle.
* The cache flush is executed in the ME, but the PFP waits
* for completion.
*/
radeon_emit(PKT3(PKT3_ACQUIRE_MEM, 6, 0));
radeon_emit(dont_sync_pfp); /* CP_COHER_CNTL */
radeon_emit(0xffffffff); /* CP_COHER_SIZE */
radeon_emit(0xffffff); /* CP_COHER_SIZE_HI */
radeon_emit(0); /* CP_COHER_BASE */
radeon_emit(0); /* CP_COHER_BASE_HI */
radeon_emit(0x0000000A); /* POLL_INTERVAL */
radeon_emit(gcr_cntl); /* GCR_CNTL */
si_cp_acquire_mem(ctx, cs, gcr_cntl,
flags & SI_CONTEXT_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME);
} else if (flags & SI_CONTEXT_PFP_SYNC_ME) {
/* Synchronize PFP with ME. (this stalls PFP) */
radeon_begin_again(cs);
radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(0);
radeon_end();
}
radeon_begin_again(cs);
if (flags & SI_CONTEXT_START_PIPELINE_STATS && ctx->pipeline_stats_enabled != 1) {
radeon_event_write(V_028A90_PIPELINESTAT_START);
ctx->pipeline_stats_enabled = 1;