ac,radeonsi,radv: use V_581A_* engine sel for non-pws acquire_mem packet
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

V_581B_PFP and V_581B_ME is for pws acquire_mem. Current code
does not cause any problem because we won't pass engine arg
directly to acqure_mem packet. But use a native V_581A_* arg
for better coding.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41069>
This commit is contained in:
Qiang Yu 2026-04-21 15:02:35 +08:00 committed by Marge Bot
parent 89c1bf34ed
commit b41cd59790
6 changed files with 14 additions and 13 deletions

View file

@ -403,7 +403,8 @@ ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
enum amd_ip_type ip_type, uint32_t engine,
uint32_t gcr_cntl)
{
assert(ip_type != AMD_IP_GFX || (engine == V_581B_CP_PFP || engine == V_581B_CP_ME));
assert(ip_type != AMD_IP_GFX ||
(engine == V_581A_PREFETCH_PARSER || engine == V_581A_MICRO_ENGINE));
assert(gcr_cntl);
ac_cmdbuf_begin(cs);
@ -411,7 +412,7 @@ ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
if (gfx_level >= GFX10) {
/* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */
const uint32_t engine_flag =
ip_type == AMD_IP_GFX && engine == V_581B_CP_ME ? BITFIELD_BIT(31) : 0;
ip_type == AMD_IP_GFX ? S_581A_ENGINE_SEL(engine) : 0;
const uint32_t coher_size_hi =
gfx_level >= GFX11 && ip_type == AMD_IP_GFX ? 0xffffff : 0xff;

View file

@ -208,7 +208,7 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev
/* Ignore fields that only modify the behavior of other fields. */
if (gcr_cntl & C_587_GL2_RANGE & C_587_SEQ & (gfx_level >= GFX12 ? ~0 : C_587_GL1_RANGE)) {
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP, gcr_cntl);
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581A_PREFETCH_PARSER, gcr_cntl);
} else if ((cb_db_event || (flush_bits & (RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
RADV_CMD_FLAG_CS_PARTIAL_FLUSH))) &&
!is_mec) {
@ -392,7 +392,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e
}
if ((flush_bits & RADV_CMD_FLAG_INV_L2) || (gfx_level <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) {
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP,
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581A_PREFETCH_PARSER,
cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) |
S_0301F0_TC_WB_ACTION_ENA(gfx_level >= GFX8));
cp_coher_cntl = 0;
@ -406,14 +406,14 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e
*
* WB doesn't work without NC.
*/
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP,
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581A_PREFETCH_PARSER,
cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1));
cp_coher_cntl = 0;
*sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2 | RGP_FLUSH_INVAL_VMEM_L0;
}
if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP,
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581A_PREFETCH_PARSER,
cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1));
cp_coher_cntl = 0;
@ -425,7 +425,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e
* Therefore, it should be last. Done in PFP.
*/
if (cp_coher_cntl)
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP, cp_coher_cntl);
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581A_PREFETCH_PARSER, cp_coher_cntl);
radeon_begin(cs);

View file

@ -791,7 +791,7 @@ radv_amdgpu_cs_chain_dgc_ib(struct ac_cmdbuf *_cs, uint64_t va, uint32_t cdw, ui
/* Writeback L2 because CP isn't coherent with L2 on GFX6-8. */
if (cs->ws->info.gfx_level == GFX8) {
ac_emit_cp_acquire_mem(&cs->base, GFX8, AMD_IP_COMPUTE, V_581B_CP_ME,
ac_emit_cp_acquire_mem(&cs->base, GFX8, AMD_IP_COMPUTE, V_581A_MICRO_ENGINE,
S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1));
}

View file

@ -296,7 +296,7 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
/* Ignore fields that only modify the behavior of other fields. */
if (gcr_cntl & C_587_GL2_RANGE & C_587_SEQ & (ctx->gfx_level >= GFX12 ? ~0 : C_587_GL1_RANGE)) {
si_cp_acquire_mem(ctx, cs, gcr_cntl,
flags & SI_BARRIER_PFP_SYNC_ME ? V_581B_CP_PFP : V_581B_CP_ME);
flags & SI_BARRIER_PFP_SYNC_ME ? V_581A_PREFETCH_PARSER : V_581A_MICRO_ENGINE);
} else if (flags & SI_BARRIER_PFP_SYNC_ME) {
si_cp_pfp_sync_me(cs);
}
@ -457,7 +457,7 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
*
* GFX6-GFX7 don't support L2 write-back.
*/
unsigned engine = flags & SI_BARRIER_PFP_SYNC_ME ? V_581B_CP_PFP : V_581B_CP_ME;
unsigned engine = flags & SI_BARRIER_PFP_SYNC_ME ? V_581A_PREFETCH_PARSER : V_581A_MICRO_ENGINE;
if (flags & SI_BARRIER_INV_L2 || (sctx->gfx_level <= GFX7 && flags & SI_BARRIER_WB_L2)) {
/* Invalidate L1 & L2. WB must be set on GFX8+ when TC_ACTION is set. */
@ -485,7 +485,7 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
S_0301F0_TC_NC_ACTION_ENA(1),
/* If this is not the last ACQUIRE_MEM, flush in ME.
* We only want to synchronize with PFP in the last ACQUIRE_MEM. */
last_acquire_mem ? engine : V_581B_CP_ME);
last_acquire_mem ? engine : V_581A_MICRO_ENGINE);
if (last_acquire_mem)
flags &= ~SI_BARRIER_PFP_SYNC_ME;

View file

@ -59,7 +59,7 @@ void si_cp_acquire_mem(struct si_context *sctx, struct radeon_cmdbuf *cs, unsign
if (!compute_ib)
sctx->context_roll = true;
if (engine == V_581B_CP_PFP)
if (engine == V_581A_PREFETCH_PARSER)
si_cp_pfp_sync_me(cs);
}
}

View file

@ -61,7 +61,7 @@ static void si_pc_wait_idle(struct si_context *sctx)
radeon_emit(EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4)));
radeon_end();
si_cp_acquire_mem(sctx, cs, coher_cntl_stall_all, V_581B_CP_PFP);
si_cp_acquire_mem(sctx, cs, coher_cntl_stall_all, V_581A_PREFETCH_PARSER);
}
static void si_pc_emit_instance(struct si_context *sctx, int se, int instance)