amd: don't use non-existent GL1 packet fields on gfx12

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38093>
This commit is contained in:
Marek Olšák 2025-10-27 15:52:12 -04:00 committed by Marge Bot
parent 12062110ab
commit 484a36302d
4 changed files with 26 additions and 19 deletions

View file

@ -194,7 +194,7 @@ ac_emit_cp_release_mem_pws(struct ac_cmdbuf *cs, ASSERTED enum amd_gfx_level gfx
/* Extract GCR_CNTL fields because the encoding is different in RELEASE_MEM. */
assert(G_586_GLI_INV(gcr_cntl) == 0);
assert(G_586_GL1_RANGE(gcr_cntl) == 0);
assert(gfx_level >= GFX12 || G_586_GL1_RANGE(gcr_cntl) == 0);
const uint32_t glm_wb = G_586_GLM_WB(gcr_cntl);
const uint32_t glm_inv = G_586_GLM_INV(gcr_cntl);
const uint32_t glk_wb = G_586_GLK_WB(gcr_cntl);
@ -213,9 +213,8 @@ ac_emit_cp_release_mem_pws(struct ac_cmdbuf *cs, ASSERTED enum amd_gfx_level gfx
ac_cmdbuf_emit(PKT3(PKT3_RELEASE_MEM, 6, 0));
ac_cmdbuf_emit(S_490_EVENT_TYPE(event_type) |
S_490_EVENT_INDEX(ts ? 5 : 6) |
(gfx_level >= GFX12 ? 0 : S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv)) |
(gfx_level >= GFX12 ? 0 : S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GL1_INV(gl1_inv)) |
S_490_GLV_INV(glv_inv) |
S_490_GL1_INV(gl1_inv) |
S_490_GL2_INV(gl2_inv) |
S_490_GL2_WB(gl2_wb) |
S_490_SEQ(gcr_seq) |

View file

@ -3015,8 +3015,8 @@ struct ac_pm4_state *ac_create_shadowing_ib_preamble(const struct radeon_info *i
ac_pm4_cmd_add(pm4, 0); /* INT_CTXID */
unsigned gcr_cntl = S_586_GL2_INV(1) | S_586_GL2_WB(1) |
(info->gfx_level >= GFX12 ? 0 : S_586_GLM_INV(1) | S_586_GLM_WB(1)) |
S_586_GL1_INV(1) | S_586_GLV_INV(1) |
(info->gfx_level >= GFX12 ? 0 : S_586_GLM_INV(1) | S_586_GLM_WB(1) | S_586_GL1_INV(1)) |
S_586_GLV_INV(1) |
S_586_GLK_INV(1) | S_586_GLI_INV(V_586_GLI_ALL);
/* Wait for the PWS counter. */

View file

@ -53,15 +53,21 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev
*sqtt_flush_bits |= RGP_FLUSH_INVAL_ICACHE;
}
if (flush_bits & RADV_CMD_FLAG_INV_SCACHE) {
gcr_cntl |= S_586_GL1_INV(1) | S_586_GLK_INV(1);
gcr_cntl |= S_586_GLK_INV(1);
*sqtt_flush_bits |= RGP_FLUSH_INVAL_SMEM_L0;
}
if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
gcr_cntl |= S_586_GL1_INV(1) | S_586_GLV_INV(1);
gcr_cntl |= S_586_GLV_INV(1);
*sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0 | RGP_FLUSH_INVAL_L1;
*sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0;
}
if (flush_bits & (RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE) && gfx_level < GFX12) {
gcr_cntl |= S_586_GL1_INV(1);
*sqtt_flush_bits |= RGP_FLUSH_INVAL_L1;
}
if (flush_bits & RADV_CMD_FLAG_INV_L2) {
/* Writeback and invalidate everything in L2. */
gcr_cntl |= S_586_GL2_INV(1) | S_586_GL2_WB(1);
@ -150,11 +156,10 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev
/* Send an event that flushes caches. */
ac_emit_cp_release_mem_pws(cs->b, gfx_level, cs->hw_ip, cb_db_event, gcr_cntl);
gcr_cntl &=
C_586_GLK_WB & C_586_GLK_INV & C_586_GLV_INV & C_586_GL1_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */
gcr_cntl &= C_586_GLK_WB & C_586_GLK_INV & C_586_GLV_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */
if (gfx_level < GFX12)
gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV;
gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GL1_INV;
/* Wait for the event and invalidate remaining caches if needed. */
ac_emit_cp_acquire_mem_pws(cs->b, gfx_level, cs->hw_ip, cb_db_event, V_580_CP_PFP, 0, gcr_cntl);
@ -205,7 +210,7 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev
}
/* Ignore fields that only modify the behavior of other fields. */
if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) {
if (gcr_cntl & C_586_GL2_RANGE & C_586_SEQ & (gfx_level >= GFX12 ? ~0 : C_586_GL1_RANGE)) {
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_580_CP_PFP, gcr_cntl);
} else if ((cb_db_event || (flush_bits & (RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
RADV_CMD_FLAG_CS_PARTIAL_FLUSH))) &&

View file

@ -150,9 +150,11 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
if (flags & SI_BARRIER_INV_ICACHE)
gcr_cntl |= S_586_GLI_INV(V_586_GLI_ALL);
if (flags & SI_BARRIER_INV_SMEM)
gcr_cntl |= S_586_GL1_INV(1) | S_586_GLK_INV(1);
gcr_cntl |= S_586_GLK_INV(1);
if (flags & SI_BARRIER_INV_VMEM)
gcr_cntl |= S_586_GL1_INV(1) | S_586_GLV_INV(1);
gcr_cntl |= S_586_GLV_INV(1);
if (ctx->gfx_level < GFX12 && flags & (SI_BARRIER_INV_SMEM | SI_BARRIER_INV_VMEM))
gcr_cntl |= S_586_GL1_INV(1);
/* The L2 cache ops are:
* - INV: - invalidate lines that reflect memory (were loaded from memory)
@ -239,15 +241,16 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
unsigned gl2_wb = G_586_GL2_WB(gcr_cntl);
unsigned gcr_seq = G_586_SEQ(gcr_cntl);
gcr_cntl &= C_586_GLV_INV & C_586_GL1_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */
gcr_cntl &= C_586_GLV_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */
if (ctx->gfx_level < GFX12)
gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV;
gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GL1_INV;
si_cp_release_mem(ctx, cs, cb_db_event,
(ctx->gfx_level >= GFX12 ? 0 : S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv)) |
(ctx->gfx_level >= GFX12 ? 0 : S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) |
S_490_GL1_INV(gl1_inv)) |
S_490_GLV_INV(glv_inv) |
S_490_GL1_INV(gl1_inv) | S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) |
S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) |
S_490_SEQ(gcr_seq),
EOP_DST_SEL_MEM, EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
EOP_DATA_SEL_VALUE_32BIT, wait_mem_scratch, va, ctx->wait_mem_number,
@ -280,7 +283,7 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
}
/* Ignore fields that only modify the behavior of other fields. */
if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) {
if (gcr_cntl & C_586_GL2_RANGE & C_586_SEQ & (ctx->gfx_level >= GFX12 ? ~0 : C_586_GL1_RANGE)) {
si_cp_acquire_mem(ctx, cs, gcr_cntl,
flags & SI_BARRIER_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME);
} else if (flags & SI_BARRIER_PFP_SYNC_ME) {