diff --git a/src/amd/common/ac_cmdbuf_cp.c b/src/amd/common/ac_cmdbuf_cp.c index a445cd30aea..2046149daf6 100644 --- a/src/amd/common/ac_cmdbuf_cp.c +++ b/src/amd/common/ac_cmdbuf_cp.c @@ -194,7 +194,7 @@ ac_emit_cp_release_mem_pws(struct ac_cmdbuf *cs, ASSERTED enum amd_gfx_level gfx /* Extract GCR_CNTL fields because the encoding is different in RELEASE_MEM. */ assert(G_586_GLI_INV(gcr_cntl) == 0); - assert(G_586_GL1_RANGE(gcr_cntl) == 0); + assert(gfx_level >= GFX12 || G_586_GL1_RANGE(gcr_cntl) == 0); const uint32_t glm_wb = G_586_GLM_WB(gcr_cntl); const uint32_t glm_inv = G_586_GLM_INV(gcr_cntl); const uint32_t glk_wb = G_586_GLK_WB(gcr_cntl); @@ -213,9 +213,8 @@ ac_emit_cp_release_mem_pws(struct ac_cmdbuf *cs, ASSERTED enum amd_gfx_level gfx ac_cmdbuf_emit(PKT3(PKT3_RELEASE_MEM, 6, 0)); ac_cmdbuf_emit(S_490_EVENT_TYPE(event_type) | S_490_EVENT_INDEX(ts ? 5 : 6) | - (gfx_level >= GFX12 ? 0 : S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv)) | + (gfx_level >= GFX12 ? 0 : S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GL1_INV(gl1_inv)) | S_490_GLV_INV(glv_inv) | - S_490_GL1_INV(gl1_inv) | S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) | S_490_SEQ(gcr_seq) | diff --git a/src/amd/common/ac_shadowed_regs.c b/src/amd/common/ac_shadowed_regs.c index b7b48eb4bac..6b62770c6aa 100644 --- a/src/amd/common/ac_shadowed_regs.c +++ b/src/amd/common/ac_shadowed_regs.c @@ -3015,8 +3015,8 @@ struct ac_pm4_state *ac_create_shadowing_ib_preamble(const struct radeon_info *i ac_pm4_cmd_add(pm4, 0); /* INT_CTXID */ unsigned gcr_cntl = S_586_GL2_INV(1) | S_586_GL2_WB(1) | - (info->gfx_level >= GFX12 ? 0 : S_586_GLM_INV(1) | S_586_GLM_WB(1)) | - S_586_GL1_INV(1) | S_586_GLV_INV(1) | + (info->gfx_level >= GFX12 ? 0 : S_586_GLM_INV(1) | S_586_GLM_WB(1) | S_586_GL1_INV(1)) | + S_586_GLV_INV(1) | S_586_GLK_INV(1) | S_586_GLI_INV(V_586_GLI_ALL); /* Wait for the PWS counter. */ diff --git a/src/amd/vulkan/radv_cs.c b/src/amd/vulkan/radv_cs.c index 874afe2f0df..8d374f13ba9 100644 --- a/src/amd/vulkan/radv_cs.c +++ b/src/amd/vulkan/radv_cs.c @@ -53,15 +53,21 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev *sqtt_flush_bits |= RGP_FLUSH_INVAL_ICACHE; } if (flush_bits & RADV_CMD_FLAG_INV_SCACHE) { - gcr_cntl |= S_586_GL1_INV(1) | S_586_GLK_INV(1); + gcr_cntl |= S_586_GLK_INV(1); *sqtt_flush_bits |= RGP_FLUSH_INVAL_SMEM_L0; } if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) { - gcr_cntl |= S_586_GL1_INV(1) | S_586_GLV_INV(1); + gcr_cntl |= S_586_GLV_INV(1); - *sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0 | RGP_FLUSH_INVAL_L1; + *sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0; } + if (flush_bits & (RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE) && gfx_level < GFX12) { + gcr_cntl |= S_586_GL1_INV(1); + + *sqtt_flush_bits |= RGP_FLUSH_INVAL_L1; + } + if (flush_bits & RADV_CMD_FLAG_INV_L2) { /* Writeback and invalidate everything in L2. */ gcr_cntl |= S_586_GL2_INV(1) | S_586_GL2_WB(1); @@ -150,11 +156,10 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev /* Send an event that flushes caches. */ ac_emit_cp_release_mem_pws(cs->b, gfx_level, cs->hw_ip, cb_db_event, gcr_cntl); - gcr_cntl &= - C_586_GLK_WB & C_586_GLK_INV & C_586_GLV_INV & C_586_GL1_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */ + gcr_cntl &= C_586_GLK_WB & C_586_GLK_INV & C_586_GLV_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */ if (gfx_level < GFX12) - gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV; + gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GL1_INV; /* Wait for the event and invalidate remaining caches if needed. */ ac_emit_cp_acquire_mem_pws(cs->b, gfx_level, cs->hw_ip, cb_db_event, V_580_CP_PFP, 0, gcr_cntl); @@ -205,7 +210,7 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev } /* Ignore fields that only modify the behavior of other fields. */ - if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) { + if (gcr_cntl & C_586_GL2_RANGE & C_586_SEQ & (gfx_level >= GFX12 ? ~0 : C_586_GL1_RANGE)) { ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_580_CP_PFP, gcr_cntl); } else if ((cb_db_event || (flush_bits & (RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH))) && diff --git a/src/gallium/drivers/radeonsi/si_barrier.c b/src/gallium/drivers/radeonsi/si_barrier.c index e7052d6dc2d..86f75f592b1 100644 --- a/src/gallium/drivers/radeonsi/si_barrier.c +++ b/src/gallium/drivers/radeonsi/si_barrier.c @@ -150,9 +150,11 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs) if (flags & SI_BARRIER_INV_ICACHE) gcr_cntl |= S_586_GLI_INV(V_586_GLI_ALL); if (flags & SI_BARRIER_INV_SMEM) - gcr_cntl |= S_586_GL1_INV(1) | S_586_GLK_INV(1); + gcr_cntl |= S_586_GLK_INV(1); if (flags & SI_BARRIER_INV_VMEM) - gcr_cntl |= S_586_GL1_INV(1) | S_586_GLV_INV(1); + gcr_cntl |= S_586_GLV_INV(1); + if (ctx->gfx_level < GFX12 && flags & (SI_BARRIER_INV_SMEM | SI_BARRIER_INV_VMEM)) + gcr_cntl |= S_586_GL1_INV(1); /* The L2 cache ops are: * - INV: - invalidate lines that reflect memory (were loaded from memory) @@ -239,15 +241,16 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs) unsigned gl2_wb = G_586_GL2_WB(gcr_cntl); unsigned gcr_seq = G_586_SEQ(gcr_cntl); - gcr_cntl &= C_586_GLV_INV & C_586_GL1_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */ + gcr_cntl &= C_586_GLV_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */ if (ctx->gfx_level < GFX12) - gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV; + gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GL1_INV; si_cp_release_mem(ctx, cs, cb_db_event, - (ctx->gfx_level >= GFX12 ? 0 : S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv)) | + (ctx->gfx_level >= GFX12 ? 0 : S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | + S_490_GL1_INV(gl1_inv)) | S_490_GLV_INV(glv_inv) | - S_490_GL1_INV(gl1_inv) | S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) | + S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) | S_490_SEQ(gcr_seq), EOP_DST_SEL_MEM, EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT, wait_mem_scratch, va, ctx->wait_mem_number, @@ -280,7 +283,7 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs) } /* Ignore fields that only modify the behavior of other fields. */ - if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) { + if (gcr_cntl & C_586_GL2_RANGE & C_586_SEQ & (ctx->gfx_level >= GFX12 ? ~0 : C_586_GL1_RANGE)) { si_cp_acquire_mem(ctx, cs, gcr_cntl, flags & SI_BARRIER_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME); } else if (flags & SI_BARRIER_PFP_SYNC_ME) {