From 862a5f7abe3cd3c27deb6be4fe2c02b33561aedf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 6 Aug 2024 23:55:22 -0400 Subject: [PATCH] radeonsi: add gfx10+ support into si_cp_acquire_mem Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_cp_utils.c | 15 ++++++++++++- src/gallium/drivers/radeonsi/si_gfx_cs.c | 26 +++++----------------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_cp_utils.c b/src/gallium/drivers/radeonsi/si_cp_utils.c index b54488d833b..4c1f65151ed 100644 --- a/src/gallium/drivers/radeonsi/si_cp_utils.c +++ b/src/gallium/drivers/radeonsi/si_cp_utils.c @@ -142,7 +142,20 @@ void si_cp_acquire_mem(struct si_context *sctx, struct radeon_cmdbuf *cs, unsign assert(gcr_cntl); if (sctx->gfx_level >= GFX10) { - /* TODO */ + /* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */ + unsigned engine_flag = engine == V_580_CP_ME ? BITFIELD_BIT(31) : 0; + + /* Flush caches. This doesn't wait for idle. */ + radeon_begin(cs); + radeon_emit(PKT3(PKT3_ACQUIRE_MEM, 6, 0)); + radeon_emit(engine_flag); /* which engine to use */ + radeon_emit(0xffffffff); /* CP_COHER_SIZE */ + radeon_emit(0x01ffffff); /* CP_COHER_SIZE_HI */ + radeon_emit(0); /* CP_COHER_BASE */ + radeon_emit(0); /* CP_COHER_BASE_HI */ + radeon_emit(0x0000000A); /* POLL_INTERVAL */ + radeon_emit(gcr_cntl); /* GCR_CNTL */ + radeon_end(); } else { bool compute_ib = !sctx->has_graphics; diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index c049c4ebe77..8703b4d1bac 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -840,10 +840,9 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs) ctx->num_cs_flushes++; ctx->compute_is_busy = false; } + radeon_end(); if (cb_db_event) { - radeon_end(); - if (ctx->gfx_level >= GFX11) { si_cp_release_mem_pws(ctx, cs, cb_db_event, gcr_cntl & C_586_GLI_INV); @@ -900,33 +899,20 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs) si_sqtt_describe_barrier_end(ctx, &ctx->gfx_cs, flags); } } - - radeon_begin_again(cs); } /* Ignore fields that only modify the behavior of other fields. */ if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) { - /* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */ - unsigned dont_sync_pfp = (!(flags & SI_CONTEXT_PFP_SYNC_ME)) << 31; - - /* Flush caches and wait for the caches to assert idle. - * The cache flush is executed in the ME, but the PFP waits - * for completion. - */ - radeon_emit(PKT3(PKT3_ACQUIRE_MEM, 6, 0)); - radeon_emit(dont_sync_pfp); /* CP_COHER_CNTL */ - radeon_emit(0xffffffff); /* CP_COHER_SIZE */ - radeon_emit(0xffffff); /* CP_COHER_SIZE_HI */ - radeon_emit(0); /* CP_COHER_BASE */ - radeon_emit(0); /* CP_COHER_BASE_HI */ - radeon_emit(0x0000000A); /* POLL_INTERVAL */ - radeon_emit(gcr_cntl); /* GCR_CNTL */ + si_cp_acquire_mem(ctx, cs, gcr_cntl, + flags & SI_CONTEXT_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME); } else if (flags & SI_CONTEXT_PFP_SYNC_ME) { - /* Synchronize PFP with ME. (this stalls PFP) */ + radeon_begin_again(cs); radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0)); radeon_emit(0); + radeon_end(); } + radeon_begin_again(cs); if (flags & SI_CONTEXT_START_PIPELINE_STATS && ctx->pipeline_stats_enabled != 1) { radeon_event_write(V_028A90_PIPELINESTAT_START); ctx->pipeline_stats_enabled = 1;