From b41cd5979024419d2ffe020520cccf786e12b138 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Tue, 21 Apr 2026 15:02:35 +0800 Subject: [PATCH] ac,radeonsi,radv: use V_581A_* engine sel for non-pws acquire_mem packet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit V_581B_PFP and V_581B_ME is for pws acquire_mem. Current code does not cause any problem because we won't pass engine arg directly to acqure_mem packet. But use a native V_581A_* arg for better coding. Reviewed-by: Samuel Pitoiset Reviewed-by: Marek Olšák Part-of: --- src/amd/common/ac_cmdbuf_cp.c | 5 +++-- src/amd/vulkan/radv_cs.c | 10 +++++----- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 2 +- src/gallium/drivers/radeonsi/si_barrier.c | 6 +++--- src/gallium/drivers/radeonsi/si_cp_utils.c | 2 +- src/gallium/drivers/radeonsi/si_perfcounter.c | 2 +- 6 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/amd/common/ac_cmdbuf_cp.c b/src/amd/common/ac_cmdbuf_cp.c index 9ee85b9fca4..18ada0a84ab 100644 --- a/src/amd/common/ac_cmdbuf_cp.c +++ b/src/amd/common/ac_cmdbuf_cp.c @@ -403,7 +403,8 @@ ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, enum amd_ip_type ip_type, uint32_t engine, uint32_t gcr_cntl) { - assert(ip_type != AMD_IP_GFX || (engine == V_581B_CP_PFP || engine == V_581B_CP_ME)); + assert(ip_type != AMD_IP_GFX || + (engine == V_581A_PREFETCH_PARSER || engine == V_581A_MICRO_ENGINE)); assert(gcr_cntl); ac_cmdbuf_begin(cs); @@ -411,7 +412,7 @@ ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, if (gfx_level >= GFX10) { /* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */ const uint32_t engine_flag = - ip_type == AMD_IP_GFX && engine == V_581B_CP_ME ? BITFIELD_BIT(31) : 0; + ip_type == AMD_IP_GFX ? S_581A_ENGINE_SEL(engine) : 0; const uint32_t coher_size_hi = gfx_level >= GFX11 && ip_type == AMD_IP_GFX ? 0xffffff : 0xff; diff --git a/src/amd/vulkan/radv_cs.c b/src/amd/vulkan/radv_cs.c index 71d8ccccdd4..c086820b177 100644 --- a/src/amd/vulkan/radv_cs.c +++ b/src/amd/vulkan/radv_cs.c @@ -208,7 +208,7 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev /* Ignore fields that only modify the behavior of other fields. */ if (gcr_cntl & C_587_GL2_RANGE & C_587_SEQ & (gfx_level >= GFX12 ? ~0 : C_587_GL1_RANGE)) { - ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP, gcr_cntl); + ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581A_PREFETCH_PARSER, gcr_cntl); } else if ((cb_db_event || (flush_bits & (RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH))) && !is_mec) { @@ -392,7 +392,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e } if ((flush_bits & RADV_CMD_FLAG_INV_L2) || (gfx_level <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) { - ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP, + ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581A_PREFETCH_PARSER, cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) | S_0301F0_TC_WB_ACTION_ENA(gfx_level >= GFX8)); cp_coher_cntl = 0; @@ -406,14 +406,14 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e * * WB doesn't work without NC. */ - ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP, + ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581A_PREFETCH_PARSER, cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1)); cp_coher_cntl = 0; *sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2 | RGP_FLUSH_INVAL_VMEM_L0; } if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) { - ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP, + ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581A_PREFETCH_PARSER, cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1)); cp_coher_cntl = 0; @@ -425,7 +425,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e * Therefore, it should be last. Done in PFP. */ if (cp_coher_cntl) - ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP, cp_coher_cntl); + ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581A_PREFETCH_PARSER, cp_coher_cntl); radeon_begin(cs); diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 86486b9e10e..0acb18e639a 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -791,7 +791,7 @@ radv_amdgpu_cs_chain_dgc_ib(struct ac_cmdbuf *_cs, uint64_t va, uint32_t cdw, ui /* Writeback L2 because CP isn't coherent with L2 on GFX6-8. */ if (cs->ws->info.gfx_level == GFX8) { - ac_emit_cp_acquire_mem(&cs->base, GFX8, AMD_IP_COMPUTE, V_581B_CP_ME, + ac_emit_cp_acquire_mem(&cs->base, GFX8, AMD_IP_COMPUTE, V_581A_MICRO_ENGINE, S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1)); } diff --git a/src/gallium/drivers/radeonsi/si_barrier.c b/src/gallium/drivers/radeonsi/si_barrier.c index b983886182c..41a1d7392c7 100644 --- a/src/gallium/drivers/radeonsi/si_barrier.c +++ b/src/gallium/drivers/radeonsi/si_barrier.c @@ -296,7 +296,7 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs) /* Ignore fields that only modify the behavior of other fields. */ if (gcr_cntl & C_587_GL2_RANGE & C_587_SEQ & (ctx->gfx_level >= GFX12 ? ~0 : C_587_GL1_RANGE)) { si_cp_acquire_mem(ctx, cs, gcr_cntl, - flags & SI_BARRIER_PFP_SYNC_ME ? V_581B_CP_PFP : V_581B_CP_ME); + flags & SI_BARRIER_PFP_SYNC_ME ? V_581A_PREFETCH_PARSER : V_581A_MICRO_ENGINE); } else if (flags & SI_BARRIER_PFP_SYNC_ME) { si_cp_pfp_sync_me(cs); } @@ -457,7 +457,7 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs) * * GFX6-GFX7 don't support L2 write-back. */ - unsigned engine = flags & SI_BARRIER_PFP_SYNC_ME ? V_581B_CP_PFP : V_581B_CP_ME; + unsigned engine = flags & SI_BARRIER_PFP_SYNC_ME ? V_581A_PREFETCH_PARSER : V_581A_MICRO_ENGINE; if (flags & SI_BARRIER_INV_L2 || (sctx->gfx_level <= GFX7 && flags & SI_BARRIER_WB_L2)) { /* Invalidate L1 & L2. WB must be set on GFX8+ when TC_ACTION is set. */ @@ -485,7 +485,7 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs) S_0301F0_TC_NC_ACTION_ENA(1), /* If this is not the last ACQUIRE_MEM, flush in ME. * We only want to synchronize with PFP in the last ACQUIRE_MEM. */ - last_acquire_mem ? engine : V_581B_CP_ME); + last_acquire_mem ? engine : V_581A_MICRO_ENGINE); if (last_acquire_mem) flags &= ~SI_BARRIER_PFP_SYNC_ME; diff --git a/src/gallium/drivers/radeonsi/si_cp_utils.c b/src/gallium/drivers/radeonsi/si_cp_utils.c index f5f0a4d8444..419925b8ec5 100644 --- a/src/gallium/drivers/radeonsi/si_cp_utils.c +++ b/src/gallium/drivers/radeonsi/si_cp_utils.c @@ -59,7 +59,7 @@ void si_cp_acquire_mem(struct si_context *sctx, struct radeon_cmdbuf *cs, unsign if (!compute_ib) sctx->context_roll = true; - if (engine == V_581B_CP_PFP) + if (engine == V_581A_PREFETCH_PARSER) si_cp_pfp_sync_me(cs); } } diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index 06c33996c78..bbe2c1698f5 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -61,7 +61,7 @@ static void si_pc_wait_idle(struct si_context *sctx) radeon_emit(EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4))); radeon_end(); - si_cp_acquire_mem(sctx, cs, coher_cntl_stall_all, V_581B_CP_PFP); + si_cp_acquire_mem(sctx, cs, coher_cntl_stall_all, V_581A_PREFETCH_PARSER); } static void si_pc_emit_instance(struct si_context *sctx, int se, int instance)