mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 20:18:12 +02:00
ac,radeonsi,radv: use V_581A_* engine sel for non-pws acquire_mem packet
V_581B_PFP and V_581B_ME is for pws acquire_mem. Current code does not cause any problem because we won't pass engine arg directly to acqure_mem packet. But use a native V_581A_* arg for better coding. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41069>
This commit is contained in:
parent
89c1bf34ed
commit
b41cd59790
6 changed files with 14 additions and 13 deletions
|
|
@ -403,7 +403,8 @@ ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
|||
enum amd_ip_type ip_type, uint32_t engine,
|
||||
uint32_t gcr_cntl)
|
||||
{
|
||||
assert(ip_type != AMD_IP_GFX || (engine == V_581B_CP_PFP || engine == V_581B_CP_ME));
|
||||
assert(ip_type != AMD_IP_GFX ||
|
||||
(engine == V_581A_PREFETCH_PARSER || engine == V_581A_MICRO_ENGINE));
|
||||
assert(gcr_cntl);
|
||||
|
||||
ac_cmdbuf_begin(cs);
|
||||
|
|
@ -411,7 +412,7 @@ ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
|||
if (gfx_level >= GFX10) {
|
||||
/* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */
|
||||
const uint32_t engine_flag =
|
||||
ip_type == AMD_IP_GFX && engine == V_581B_CP_ME ? BITFIELD_BIT(31) : 0;
|
||||
ip_type == AMD_IP_GFX ? S_581A_ENGINE_SEL(engine) : 0;
|
||||
const uint32_t coher_size_hi =
|
||||
gfx_level >= GFX11 && ip_type == AMD_IP_GFX ? 0xffffff : 0xff;
|
||||
|
||||
|
|
|
|||
|
|
@ -208,7 +208,7 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev
|
|||
|
||||
/* Ignore fields that only modify the behavior of other fields. */
|
||||
if (gcr_cntl & C_587_GL2_RANGE & C_587_SEQ & (gfx_level >= GFX12 ? ~0 : C_587_GL1_RANGE)) {
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP, gcr_cntl);
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581A_PREFETCH_PARSER, gcr_cntl);
|
||||
} else if ((cb_db_event || (flush_bits & (RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
|
||||
RADV_CMD_FLAG_CS_PARTIAL_FLUSH))) &&
|
||||
!is_mec) {
|
||||
|
|
@ -392,7 +392,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e
|
|||
}
|
||||
|
||||
if ((flush_bits & RADV_CMD_FLAG_INV_L2) || (gfx_level <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) {
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP,
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581A_PREFETCH_PARSER,
|
||||
cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) |
|
||||
S_0301F0_TC_WB_ACTION_ENA(gfx_level >= GFX8));
|
||||
cp_coher_cntl = 0;
|
||||
|
|
@ -406,14 +406,14 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e
|
|||
*
|
||||
* WB doesn't work without NC.
|
||||
*/
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP,
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581A_PREFETCH_PARSER,
|
||||
cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1));
|
||||
cp_coher_cntl = 0;
|
||||
|
||||
*sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2 | RGP_FLUSH_INVAL_VMEM_L0;
|
||||
}
|
||||
if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP,
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581A_PREFETCH_PARSER,
|
||||
cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1));
|
||||
cp_coher_cntl = 0;
|
||||
|
||||
|
|
@ -425,7 +425,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e
|
|||
* Therefore, it should be last. Done in PFP.
|
||||
*/
|
||||
if (cp_coher_cntl)
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP, cp_coher_cntl);
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581A_PREFETCH_PARSER, cp_coher_cntl);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
|
|||
|
|
@ -791,7 +791,7 @@ radv_amdgpu_cs_chain_dgc_ib(struct ac_cmdbuf *_cs, uint64_t va, uint32_t cdw, ui
|
|||
|
||||
/* Writeback L2 because CP isn't coherent with L2 on GFX6-8. */
|
||||
if (cs->ws->info.gfx_level == GFX8) {
|
||||
ac_emit_cp_acquire_mem(&cs->base, GFX8, AMD_IP_COMPUTE, V_581B_CP_ME,
|
||||
ac_emit_cp_acquire_mem(&cs->base, GFX8, AMD_IP_COMPUTE, V_581A_MICRO_ENGINE,
|
||||
S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -296,7 +296,7 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
|
|||
/* Ignore fields that only modify the behavior of other fields. */
|
||||
if (gcr_cntl & C_587_GL2_RANGE & C_587_SEQ & (ctx->gfx_level >= GFX12 ? ~0 : C_587_GL1_RANGE)) {
|
||||
si_cp_acquire_mem(ctx, cs, gcr_cntl,
|
||||
flags & SI_BARRIER_PFP_SYNC_ME ? V_581B_CP_PFP : V_581B_CP_ME);
|
||||
flags & SI_BARRIER_PFP_SYNC_ME ? V_581A_PREFETCH_PARSER : V_581A_MICRO_ENGINE);
|
||||
} else if (flags & SI_BARRIER_PFP_SYNC_ME) {
|
||||
si_cp_pfp_sync_me(cs);
|
||||
}
|
||||
|
|
@ -457,7 +457,7 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
*
|
||||
* GFX6-GFX7 don't support L2 write-back.
|
||||
*/
|
||||
unsigned engine = flags & SI_BARRIER_PFP_SYNC_ME ? V_581B_CP_PFP : V_581B_CP_ME;
|
||||
unsigned engine = flags & SI_BARRIER_PFP_SYNC_ME ? V_581A_PREFETCH_PARSER : V_581A_MICRO_ENGINE;
|
||||
|
||||
if (flags & SI_BARRIER_INV_L2 || (sctx->gfx_level <= GFX7 && flags & SI_BARRIER_WB_L2)) {
|
||||
/* Invalidate L1 & L2. WB must be set on GFX8+ when TC_ACTION is set. */
|
||||
|
|
@ -485,7 +485,7 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
S_0301F0_TC_NC_ACTION_ENA(1),
|
||||
/* If this is not the last ACQUIRE_MEM, flush in ME.
|
||||
* We only want to synchronize with PFP in the last ACQUIRE_MEM. */
|
||||
last_acquire_mem ? engine : V_581B_CP_ME);
|
||||
last_acquire_mem ? engine : V_581A_MICRO_ENGINE);
|
||||
|
||||
if (last_acquire_mem)
|
||||
flags &= ~SI_BARRIER_PFP_SYNC_ME;
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ void si_cp_acquire_mem(struct si_context *sctx, struct radeon_cmdbuf *cs, unsign
|
|||
if (!compute_ib)
|
||||
sctx->context_roll = true;
|
||||
|
||||
if (engine == V_581B_CP_PFP)
|
||||
if (engine == V_581A_PREFETCH_PARSER)
|
||||
si_cp_pfp_sync_me(cs);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ static void si_pc_wait_idle(struct si_context *sctx)
|
|||
radeon_emit(EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4)));
|
||||
radeon_end();
|
||||
|
||||
si_cp_acquire_mem(sctx, cs, coher_cntl_stall_all, V_581B_CP_PFP);
|
||||
si_cp_acquire_mem(sctx, cs, coher_cntl_stall_all, V_581A_PREFETCH_PARSER);
|
||||
}
|
||||
|
||||
static void si_pc_emit_instance(struct si_context *sctx, int se, int instance)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue