mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-30 01:20:17 +01:00
radeonsi: rename SI_CONTEXT_* flags to SI_BARRIER_* flags
some of the definition names are changed completely Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31193>
This commit is contained in:
parent
6f25ace87e
commit
ce72376641
19 changed files with 203 additions and 206 deletions
|
|
@ -348,7 +348,7 @@ static void gfx11_sh_query_get_result_resource(struct si_context *sctx, struct s
|
|||
|
||||
/* TODO: Range-invalidate GL2 */
|
||||
if (sctx->screen->info.cp_sdma_ge_use_system_memory_scope) {
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -38,19 +38,19 @@ static void prepare_cb_db_flushes(struct si_context *ctx, unsigned *flags)
|
|||
/* Don't flush CB and DB if there have been no draw calls. */
|
||||
if (ctx->num_draw_calls == ctx->last_cb_flush_num_draw_calls &&
|
||||
ctx->num_decompress_calls == ctx->last_cb_flush_num_decompress_calls)
|
||||
*flags &= ~SI_CONTEXT_FLUSH_AND_INV_CB;
|
||||
*flags &= ~SI_BARRIER_SYNC_AND_INV_CB;
|
||||
|
||||
if (ctx->num_draw_calls == ctx->last_db_flush_num_draw_calls &&
|
||||
ctx->num_decompress_calls == ctx->last_db_flush_num_decompress_calls)
|
||||
*flags &= ~SI_CONTEXT_FLUSH_AND_INV_DB;
|
||||
*flags &= ~SI_BARRIER_SYNC_AND_INV_DB;
|
||||
|
||||
/* Track the last flush. */
|
||||
if (*flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
|
||||
if (*flags & SI_BARRIER_SYNC_AND_INV_CB) {
|
||||
ctx->num_cb_cache_flushes++;
|
||||
ctx->last_cb_flush_num_draw_calls = ctx->num_draw_calls;
|
||||
ctx->last_cb_flush_num_decompress_calls = ctx->num_decompress_calls;
|
||||
}
|
||||
if (*flags & SI_CONTEXT_FLUSH_AND_INV_DB) {
|
||||
if (*flags & SI_BARRIER_SYNC_AND_INV_DB) {
|
||||
ctx->num_db_cache_flushes++;
|
||||
ctx->last_db_flush_num_draw_calls = ctx->num_draw_calls;
|
||||
ctx->last_db_flush_num_decompress_calls = ctx->num_decompress_calls;
|
||||
|
|
@ -68,30 +68,30 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
|
|||
|
||||
if (!ctx->has_graphics) {
|
||||
/* Only process compute flags. */
|
||||
flags &= SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE |
|
||||
SI_CONTEXT_INV_L2 | SI_CONTEXT_WB_L2 | SI_CONTEXT_INV_L2_METADATA |
|
||||
SI_CONTEXT_CS_PARTIAL_FLUSH;
|
||||
flags &= SI_BARRIER_INV_ICACHE | SI_BARRIER_INV_SMEM | SI_BARRIER_INV_VMEM |
|
||||
SI_BARRIER_INV_L2 | SI_BARRIER_WB_L2 | SI_BARRIER_INV_L2_METADATA |
|
||||
SI_BARRIER_SYNC_CS;
|
||||
}
|
||||
|
||||
/* We don't need these. */
|
||||
assert(!(flags & SI_CONTEXT_FLUSH_AND_INV_DB_META));
|
||||
assert(!(flags & SI_BARRIER_EVENT_FLUSH_AND_INV_DB_META));
|
||||
|
||||
prepare_cb_db_flushes(ctx, &flags);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
if (flags & SI_CONTEXT_VGT_FLUSH)
|
||||
if (flags & SI_BARRIER_EVENT_VGT_FLUSH)
|
||||
radeon_event_write(V_028A90_VGT_FLUSH);
|
||||
|
||||
if (flags & SI_CONTEXT_INV_ICACHE)
|
||||
if (flags & SI_BARRIER_INV_ICACHE)
|
||||
gcr_cntl |= S_586_GLI_INV(V_586_GLI_ALL);
|
||||
if (flags & SI_CONTEXT_INV_SCACHE) {
|
||||
if (flags & SI_BARRIER_INV_SMEM) {
|
||||
/* TODO: When writing to the SMEM L1 cache, we need to set SEQ
|
||||
* to FORWARD when both L1 and L2 are written out (WB or INV).
|
||||
*/
|
||||
gcr_cntl |= S_586_GL1_INV(1) | S_586_GLK_INV(1);
|
||||
}
|
||||
if (flags & SI_CONTEXT_INV_VCACHE)
|
||||
if (flags & SI_BARRIER_INV_VMEM)
|
||||
gcr_cntl |= S_586_GL1_INV(1) | S_586_GLV_INV(1);
|
||||
|
||||
/* The L2 cache ops are:
|
||||
|
|
@ -104,39 +104,39 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
|
|||
*
|
||||
* GLM doesn't support WB alone. If WB is set, INV must be set too.
|
||||
*/
|
||||
if (flags & SI_CONTEXT_INV_L2) {
|
||||
if (flags & SI_BARRIER_INV_L2) {
|
||||
/* Writeback and invalidate everything in L2. */
|
||||
gcr_cntl |= S_586_GL2_INV(1) | S_586_GL2_WB(1) |
|
||||
(ctx->gfx_level < GFX12 ? S_586_GLM_INV(1) | S_586_GLM_WB(1) : 0);
|
||||
ctx->num_L2_invalidates++;
|
||||
} else if (flags & SI_CONTEXT_WB_L2) {
|
||||
} else if (flags & SI_BARRIER_WB_L2) {
|
||||
gcr_cntl |= S_586_GL2_WB(1) |
|
||||
(ctx->gfx_level < GFX12 ? S_586_GLM_WB(1) | S_586_GLM_INV(1) : 0);
|
||||
} else if (flags & SI_CONTEXT_INV_L2_METADATA) {
|
||||
} else if (flags & SI_BARRIER_INV_L2_METADATA) {
|
||||
assert(ctx->gfx_level < GFX12);
|
||||
gcr_cntl |= S_586_GLM_INV(1) | S_586_GLM_WB(1);
|
||||
}
|
||||
|
||||
if (flags & (SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB)) {
|
||||
if (flags & (SI_BARRIER_SYNC_AND_INV_CB | SI_BARRIER_SYNC_AND_INV_DB)) {
|
||||
/* Flush CMASK/FMASK/DCC. Will wait for idle later. */
|
||||
if (ctx->gfx_level < GFX12 && flags & SI_CONTEXT_FLUSH_AND_INV_CB)
|
||||
if (ctx->gfx_level < GFX12 && flags & SI_BARRIER_SYNC_AND_INV_CB)
|
||||
radeon_event_write(V_028A90_FLUSH_AND_INV_CB_META);
|
||||
|
||||
/* Gfx11 can't flush DB_META and should use a TS event instead. */
|
||||
/* Flush HTILE. Will wait for idle later. */
|
||||
if (ctx->gfx_level < GFX12 && ctx->gfx_level != GFX11 &&
|
||||
flags & SI_CONTEXT_FLUSH_AND_INV_DB)
|
||||
flags & SI_BARRIER_SYNC_AND_INV_DB)
|
||||
radeon_event_write(V_028A90_FLUSH_AND_INV_DB_META);
|
||||
|
||||
/* First flush CB/DB, then L1/L2. */
|
||||
gcr_cntl |= S_586_SEQ(V_586_SEQ_FORWARD);
|
||||
|
||||
if ((flags & (SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB)) ==
|
||||
(SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB)) {
|
||||
if ((flags & (SI_BARRIER_SYNC_AND_INV_CB | SI_BARRIER_SYNC_AND_INV_DB)) ==
|
||||
(SI_BARRIER_SYNC_AND_INV_CB | SI_BARRIER_SYNC_AND_INV_DB)) {
|
||||
cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
|
||||
} else if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
|
||||
} else if (flags & SI_BARRIER_SYNC_AND_INV_CB) {
|
||||
cb_db_event = V_028A90_FLUSH_AND_INV_CB_DATA_TS;
|
||||
} else if (flags & SI_CONTEXT_FLUSH_AND_INV_DB) {
|
||||
} else if (flags & SI_BARRIER_SYNC_AND_INV_DB) {
|
||||
if (ctx->gfx_level == GFX11)
|
||||
cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
|
||||
else
|
||||
|
|
@ -146,18 +146,18 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
|
|||
}
|
||||
} else {
|
||||
/* Wait for graphics shaders to go idle if requested. */
|
||||
if (flags & SI_CONTEXT_PS_PARTIAL_FLUSH) {
|
||||
if (flags & SI_BARRIER_SYNC_PS) {
|
||||
radeon_event_write(V_028A90_PS_PARTIAL_FLUSH);
|
||||
/* Only count explicit shader flushes, not implicit ones. */
|
||||
ctx->num_vs_flushes++;
|
||||
ctx->num_ps_flushes++;
|
||||
} else if (flags & SI_CONTEXT_VS_PARTIAL_FLUSH) {
|
||||
} else if (flags & SI_BARRIER_SYNC_VS) {
|
||||
radeon_event_write(V_028A90_VS_PARTIAL_FLUSH);
|
||||
ctx->num_vs_flushes++;
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & SI_CONTEXT_CS_PARTIAL_FLUSH && ctx->compute_is_busy) {
|
||||
if (flags & SI_BARRIER_SYNC_CS && ctx->compute_is_busy) {
|
||||
radeon_event_write(V_028A90_CS_PARTIAL_FLUSH);
|
||||
ctx->num_cs_flushes++;
|
||||
ctx->compute_is_busy = false;
|
||||
|
|
@ -170,13 +170,13 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
|
|||
|
||||
/* Wait for the event and invalidate remaining caches if needed. */
|
||||
si_cp_acquire_mem_pws(ctx, cs, cb_db_event,
|
||||
flags & SI_CONTEXT_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME,
|
||||
flags & SI_BARRIER_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME,
|
||||
gcr_cntl & ~C_586_GLI_INV, /* keep only GLI_INV */
|
||||
0, flags);
|
||||
|
||||
gcr_cntl = 0; /* all done */
|
||||
/* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */
|
||||
flags &= ~SI_CONTEXT_PFP_SYNC_ME;
|
||||
flags &= ~SI_BARRIER_PFP_SYNC_ME;
|
||||
} else {
|
||||
/* GFX10 */
|
||||
struct si_resource *wait_mem_scratch =
|
||||
|
|
@ -226,16 +226,16 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
|
|||
/* Ignore fields that only modify the behavior of other fields. */
|
||||
if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) {
|
||||
si_cp_acquire_mem(ctx, cs, gcr_cntl,
|
||||
flags & SI_CONTEXT_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME);
|
||||
} else if (flags & SI_CONTEXT_PFP_SYNC_ME) {
|
||||
flags & SI_BARRIER_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME);
|
||||
} else if (flags & SI_BARRIER_PFP_SYNC_ME) {
|
||||
si_cp_pfp_sync_me(cs);
|
||||
}
|
||||
|
||||
radeon_begin_again(cs);
|
||||
if (flags & SI_CONTEXT_START_PIPELINE_STATS && ctx->pipeline_stats_enabled != 1) {
|
||||
if (flags & SI_BARRIER_EVENT_PIPELINESTAT_START && ctx->pipeline_stats_enabled != 1) {
|
||||
radeon_event_write(V_028A90_PIPELINESTAT_START);
|
||||
ctx->pipeline_stats_enabled = 1;
|
||||
} else if (flags & SI_CONTEXT_STOP_PIPELINE_STATS && ctx->pipeline_stats_enabled != 0) {
|
||||
} else if (flags & SI_BARRIER_EVENT_PIPELINESTAT_STOP && ctx->pipeline_stats_enabled != 0) {
|
||||
radeon_event_write(V_028A90_PIPELINESTAT_STOP);
|
||||
ctx->pipeline_stats_enabled = 0;
|
||||
}
|
||||
|
|
@ -253,13 +253,13 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
|
||||
if (!sctx->has_graphics) {
|
||||
/* Only process compute flags. */
|
||||
flags &= SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE |
|
||||
SI_CONTEXT_INV_L2 | SI_CONTEXT_WB_L2 | SI_CONTEXT_INV_L2_METADATA |
|
||||
SI_CONTEXT_CS_PARTIAL_FLUSH;
|
||||
flags &= SI_BARRIER_INV_ICACHE | SI_BARRIER_INV_SMEM | SI_BARRIER_INV_VMEM |
|
||||
SI_BARRIER_INV_L2 | SI_BARRIER_WB_L2 | SI_BARRIER_INV_L2_METADATA |
|
||||
SI_BARRIER_SYNC_CS;
|
||||
}
|
||||
|
||||
uint32_t cp_coher_cntl = 0;
|
||||
const uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB);
|
||||
const uint32_t flush_cb_db = flags & (SI_BARRIER_SYNC_AND_INV_CB | SI_BARRIER_SYNC_AND_INV_DB);
|
||||
|
||||
assert(sctx->gfx_level <= GFX9);
|
||||
|
||||
|
|
@ -273,13 +273,13 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
* to add a workaround for it.
|
||||
*/
|
||||
|
||||
if (flags & SI_CONTEXT_INV_ICACHE)
|
||||
if (flags & SI_BARRIER_INV_ICACHE)
|
||||
cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
|
||||
if (flags & SI_CONTEXT_INV_SCACHE)
|
||||
if (flags & SI_BARRIER_INV_SMEM)
|
||||
cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
|
||||
|
||||
if (sctx->gfx_level <= GFX8) {
|
||||
if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
|
||||
if (flags & SI_BARRIER_SYNC_AND_INV_CB) {
|
||||
cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) |
|
||||
S_0085F0_CB1_DEST_BASE_ENA(1) | S_0085F0_CB2_DEST_BASE_ENA(1) |
|
||||
S_0085F0_CB3_DEST_BASE_ENA(1) | S_0085F0_CB4_DEST_BASE_ENA(1) |
|
||||
|
|
@ -291,18 +291,18 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
si_cp_release_mem(sctx, cs, V_028A90_FLUSH_AND_INV_CB_DATA_TS, 0, EOP_DST_SEL_MEM,
|
||||
EOP_INT_SEL_NONE, EOP_DATA_SEL_DISCARD, NULL, 0, 0, SI_NOT_QUERY);
|
||||
}
|
||||
if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
|
||||
if (flags & SI_BARRIER_SYNC_AND_INV_DB)
|
||||
cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1);
|
||||
}
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
/* Flush CMASK/FMASK/DCC. SURFACE_SYNC will wait for idle. */
|
||||
if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
|
||||
if (flags & SI_BARRIER_SYNC_AND_INV_CB)
|
||||
radeon_event_write(V_028A90_FLUSH_AND_INV_CB_META);
|
||||
|
||||
/* Flush HTILE. SURFACE_SYNC will wait for idle. */
|
||||
if (flags & (SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_FLUSH_AND_INV_DB_META))
|
||||
if (flags & (SI_BARRIER_SYNC_AND_INV_DB | SI_BARRIER_EVENT_FLUSH_AND_INV_DB_META))
|
||||
radeon_event_write(V_028A90_FLUSH_AND_INV_DB_META);
|
||||
|
||||
/* Wait for shader engines to go idle.
|
||||
|
|
@ -316,25 +316,25 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
* bindings.
|
||||
*/
|
||||
if (sctx->gfx_level <= GFX8 || !flush_cb_db) {
|
||||
if (flags & SI_CONTEXT_PS_PARTIAL_FLUSH) {
|
||||
if (flags & SI_BARRIER_SYNC_PS) {
|
||||
radeon_event_write(V_028A90_PS_PARTIAL_FLUSH);
|
||||
/* Only count explicit shader flushes, not implicit ones done by SURFACE_SYNC. */
|
||||
sctx->num_vs_flushes++;
|
||||
sctx->num_ps_flushes++;
|
||||
} else if (flags & SI_CONTEXT_VS_PARTIAL_FLUSH) {
|
||||
} else if (flags & SI_BARRIER_SYNC_VS) {
|
||||
radeon_event_write(V_028A90_VS_PARTIAL_FLUSH);
|
||||
sctx->num_vs_flushes++;
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & SI_CONTEXT_CS_PARTIAL_FLUSH && sctx->compute_is_busy) {
|
||||
if (flags & SI_BARRIER_SYNC_CS && sctx->compute_is_busy) {
|
||||
radeon_event_write(V_028A90_CS_PARTIAL_FLUSH);
|
||||
sctx->num_cs_flushes++;
|
||||
sctx->compute_is_busy = false;
|
||||
}
|
||||
|
||||
/* VGT state synchronization. */
|
||||
if (flags & SI_CONTEXT_VGT_FLUSH)
|
||||
if (flags & SI_BARRIER_EVENT_VGT_FLUSH)
|
||||
radeon_event_write(V_028A90_VGT_FLUSH);
|
||||
|
||||
radeon_end();
|
||||
|
|
@ -348,10 +348,10 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
|
||||
/* Set the CB/DB flush event. */
|
||||
switch (flush_cb_db) {
|
||||
case SI_CONTEXT_FLUSH_AND_INV_CB:
|
||||
case SI_BARRIER_SYNC_AND_INV_CB:
|
||||
cb_db_event = V_028A90_FLUSH_AND_INV_CB_DATA_TS;
|
||||
break;
|
||||
case SI_CONTEXT_FLUSH_AND_INV_DB:
|
||||
case SI_BARRIER_SYNC_AND_INV_DB:
|
||||
cb_db_event = V_028A90_FLUSH_AND_INV_DB_DATA_TS;
|
||||
break;
|
||||
default:
|
||||
|
|
@ -373,17 +373,17 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
*/
|
||||
tc_flags = 0;
|
||||
|
||||
if (flags & SI_CONTEXT_INV_L2_METADATA) {
|
||||
if (flags & SI_BARRIER_INV_L2_METADATA) {
|
||||
tc_flags = EVENT_TC_ACTION_ENA | EVENT_TC_MD_ACTION_ENA;
|
||||
}
|
||||
|
||||
/* Ideally flush L2 together with CB/DB. */
|
||||
if (flags & SI_CONTEXT_INV_L2) {
|
||||
if (flags & SI_BARRIER_INV_L2) {
|
||||
/* Writeback and invalidate everything in L2 & L1. */
|
||||
tc_flags = EVENT_TC_ACTION_ENA | EVENT_TC_WB_ACTION_ENA;
|
||||
|
||||
/* Clear the flags. */
|
||||
flags &= ~(SI_CONTEXT_INV_L2 | SI_CONTEXT_WB_L2);
|
||||
flags &= ~(SI_BARRIER_INV_L2 | SI_BARRIER_WB_L2);
|
||||
sctx->num_L2_invalidates++;
|
||||
}
|
||||
|
||||
|
|
@ -416,9 +416,9 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
*
|
||||
* GFX6-GFX7 don't support L2 write-back.
|
||||
*/
|
||||
unsigned engine = flags & SI_CONTEXT_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME;
|
||||
unsigned engine = flags & SI_BARRIER_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME;
|
||||
|
||||
if (flags & SI_CONTEXT_INV_L2 || (sctx->gfx_level <= GFX7 && flags & SI_CONTEXT_WB_L2)) {
|
||||
if (flags & SI_BARRIER_INV_L2 || (sctx->gfx_level <= GFX7 && flags & SI_BARRIER_WB_L2)) {
|
||||
/* Invalidate L1 & L2. WB must be set on GFX8+ when TC_ACTION is set. */
|
||||
si_cp_acquire_mem(sctx, cs,
|
||||
cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) |
|
||||
|
|
@ -428,7 +428,7 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
/* L1 invalidation and L2 writeback must be done separately, because both operations can't
|
||||
* be done together.
|
||||
*/
|
||||
if (flags & SI_CONTEXT_WB_L2) {
|
||||
if (flags & SI_BARRIER_WB_L2) {
|
||||
/* WB = write-back
|
||||
* NC = apply to non-coherent MTYPEs
|
||||
* (i.e. MTYPE <= 1, which is what we use everywhere)
|
||||
|
|
@ -438,7 +438,7 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
* If we get here, the only flag that can't be executed together with WB_L2 is VMEM cache
|
||||
* invalidation.
|
||||
*/
|
||||
bool last_acquire_mem = !(flags & SI_CONTEXT_INV_VCACHE);
|
||||
bool last_acquire_mem = !(flags & SI_BARRIER_INV_VMEM);
|
||||
|
||||
si_cp_acquire_mem(sctx, cs,
|
||||
cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) |
|
||||
|
|
@ -448,33 +448,33 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
last_acquire_mem ? engine : V_580_CP_ME);
|
||||
|
||||
if (last_acquire_mem)
|
||||
flags &= ~SI_CONTEXT_PFP_SYNC_ME;
|
||||
flags &= ~SI_BARRIER_PFP_SYNC_ME;
|
||||
cp_coher_cntl = 0;
|
||||
sctx->num_L2_writebacks++;
|
||||
}
|
||||
|
||||
if (flags & SI_CONTEXT_INV_VCACHE)
|
||||
if (flags & SI_BARRIER_INV_VMEM)
|
||||
cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
|
||||
|
||||
/* If there are still some cache flags left... */
|
||||
if (cp_coher_cntl) {
|
||||
si_cp_acquire_mem(sctx, cs, cp_coher_cntl, engine);
|
||||
flags &= ~SI_CONTEXT_PFP_SYNC_ME;
|
||||
flags &= ~SI_BARRIER_PFP_SYNC_ME;
|
||||
}
|
||||
|
||||
/* This might be needed even without any cache flags, such as when doing buffer stores
|
||||
* to an index buffer.
|
||||
*/
|
||||
if (flags & SI_CONTEXT_PFP_SYNC_ME)
|
||||
if (flags & SI_BARRIER_PFP_SYNC_ME)
|
||||
si_cp_pfp_sync_me(cs);
|
||||
}
|
||||
|
||||
if (flags & SI_CONTEXT_START_PIPELINE_STATS && sctx->pipeline_stats_enabled != 1) {
|
||||
if (flags & SI_BARRIER_EVENT_PIPELINESTAT_START && sctx->pipeline_stats_enabled != 1) {
|
||||
radeon_begin(cs);
|
||||
radeon_event_write(V_028A90_PIPELINESTAT_START);
|
||||
radeon_end();
|
||||
sctx->pipeline_stats_enabled = 1;
|
||||
} else if (flags & SI_CONTEXT_STOP_PIPELINE_STATS && sctx->pipeline_stats_enabled != 0) {
|
||||
} else if (flags & SI_BARRIER_EVENT_PIPELINESTAT_STOP && sctx->pipeline_stats_enabled != 0) {
|
||||
radeon_begin(cs);
|
||||
radeon_event_write(V_028A90_PIPELINESTAT_STOP);
|
||||
radeon_end();
|
||||
|
|
@ -533,12 +533,12 @@ void si_barrier_before_internal_op(struct si_context *sctx, unsigned flags,
|
|||
if (!si_is_buffer_idle(sctx, buf, RADEON_USAGE_WRITE |
|
||||
(writable_buffers_mask & BITFIELD_BIT(i) ? RADEON_USAGE_READ : 0))) {
|
||||
if (buf->bind_history & ps_mask)
|
||||
sctx->barrier_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_PS;
|
||||
else
|
||||
sctx->barrier_flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_VS;
|
||||
|
||||
if (buf->bind_history & cs_mask)
|
||||
sctx->barrier_flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_CS;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -553,12 +553,12 @@ void si_barrier_before_internal_op(struct si_context *sctx, unsigned flags,
|
|||
if (!si_is_buffer_idle(sctx, img, RADEON_USAGE_WRITE | (writable ? RADEON_USAGE_READ : 0))) {
|
||||
si_make_CB_shader_coherent(sctx, images[i].resource->nr_samples, true,
|
||||
((struct si_texture*)images[i].resource)->surface.u.gfx9.color.dcc.pipe_aligned);
|
||||
sctx->barrier_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS;
|
||||
}
|
||||
}
|
||||
|
||||
/* Invalidate the VMEM cache only. The SMEM cache isn't used by shader buffers. */
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_VCACHE;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_VMEM;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
|
||||
|
|
@ -569,18 +569,18 @@ void si_barrier_after_internal_op(struct si_context *sctx, unsigned flags,
|
|||
unsigned num_images,
|
||||
const struct pipe_image_view *images)
|
||||
{
|
||||
sctx->barrier_flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_CS;
|
||||
|
||||
if (num_images) {
|
||||
/* Make sure image stores are visible to CB, which doesn't use L2 on GFX6-8. */
|
||||
sctx->barrier_flags |= sctx->gfx_level <= GFX8 ? SI_CONTEXT_WB_L2 : 0;
|
||||
sctx->barrier_flags |= sctx->gfx_level <= GFX8 ? SI_BARRIER_WB_L2 : 0;
|
||||
/* Make sure image stores are visible to all CUs. */
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_VCACHE;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_VMEM;
|
||||
}
|
||||
|
||||
/* Make sure buffer stores are visible to all CUs and also as index/indirect buffers. */
|
||||
if (num_buffers)
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE | SI_CONTEXT_PFP_SYNC_ME;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_SMEM | SI_BARRIER_INV_VMEM | SI_BARRIER_PFP_SYNC_ME;
|
||||
|
||||
/* We must set L2_cache_dirty for buffers because:
|
||||
* - GFX6,12: CP DMA doesn't use L2.
|
||||
|
|
@ -600,7 +600,7 @@ void si_barrier_after_internal_op(struct si_context *sctx, unsigned flags,
|
|||
images[i].access & PIPE_IMAGE_ACCESS_WRITE &&
|
||||
(sctx->screen->always_allow_dcc_stores ||
|
||||
images[i].access & SI_IMAGE_ACCESS_ALLOW_DCC_STORE)) {
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -659,10 +659,10 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
|
|||
if (!flags)
|
||||
return;
|
||||
|
||||
sctx->barrier_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS;
|
||||
|
||||
if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_SMEM | SI_BARRIER_INV_VMEM;
|
||||
|
||||
/* VMEM cache contents are written back to L2 automatically at the end of waves, but
|
||||
* the contents of other VMEM caches might still be stale.
|
||||
|
|
@ -671,38 +671,38 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
|
|||
*/
|
||||
if (flags & (PIPE_BARRIER_VERTEX_BUFFER | PIPE_BARRIER_SHADER_BUFFER | PIPE_BARRIER_TEXTURE |
|
||||
PIPE_BARRIER_IMAGE | PIPE_BARRIER_STREAMOUT_BUFFER | PIPE_BARRIER_GLOBAL_BUFFER))
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_VCACHE;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_VMEM;
|
||||
|
||||
if (flags & (PIPE_BARRIER_INDEX_BUFFER | PIPE_BARRIER_INDIRECT_BUFFER))
|
||||
sctx->barrier_flags |= SI_CONTEXT_PFP_SYNC_ME;
|
||||
sctx->barrier_flags |= SI_BARRIER_PFP_SYNC_ME;
|
||||
|
||||
/* Index buffers use L2 since GFX8 */
|
||||
if (flags & PIPE_BARRIER_INDEX_BUFFER &&
|
||||
(sctx->gfx_level <= GFX7 || sctx->screen->info.cp_sdma_ge_use_system_memory_scope))
|
||||
sctx->barrier_flags |= SI_CONTEXT_WB_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_WB_L2;
|
||||
|
||||
/* Indirect buffers use L2 since GFX9. */
|
||||
if (flags & PIPE_BARRIER_INDIRECT_BUFFER &&
|
||||
(sctx->gfx_level <= GFX8 || sctx->screen->info.cp_sdma_ge_use_system_memory_scope))
|
||||
sctx->barrier_flags |= SI_CONTEXT_WB_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_WB_L2;
|
||||
|
||||
/* MSAA color images are flushed in si_decompress_textures when needed.
|
||||
* Shaders never write to depth/stencil images.
|
||||
*/
|
||||
if (flags & PIPE_BARRIER_FRAMEBUFFER && sctx->framebuffer.uncompressed_cb_mask) {
|
||||
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB;
|
||||
|
||||
if (sctx->gfx_level >= GFX10 && sctx->gfx_level < GFX12) {
|
||||
if (sctx->screen->info.tcc_rb_non_coherent)
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2;
|
||||
else /* We don't know which shaders do image stores with DCC: */
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2_METADATA;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2_METADATA;
|
||||
} else if (sctx->gfx_level == GFX9) {
|
||||
/* We have to invalidate L2 for MSAA and when DCC can have pipe_aligned=0. */
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2;
|
||||
} else if (sctx->gfx_level <= GFX8) {
|
||||
/* CB doesn't use L2 on GFX6-8. */
|
||||
sctx->barrier_flags |= SI_CONTEXT_WB_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_WB_L2;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -729,7 +729,7 @@ static void si_set_sampler_depth_decompress_mask(struct si_context *sctx, struct
|
|||
void si_fb_barrier_before_rendering(struct si_context *sctx)
|
||||
{
|
||||
/* Wait for all shaders because all image loads must finish before CB/DB can write there. */
|
||||
sctx->barrier_flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PS_PARTIAL_FLUSH;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_CS | SI_BARRIER_SYNC_PS;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
|
||||
|
|
@ -800,7 +800,7 @@ void si_fb_barrier_after_rendering(struct si_context *sctx, unsigned flags)
|
|||
*
|
||||
* This seems to fix them:
|
||||
*/
|
||||
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_INV_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_DB | SI_BARRIER_INV_L2;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
} else if (sctx->gfx_level == GFX9) {
|
||||
|
|
@ -810,7 +810,7 @@ void si_fb_barrier_after_rendering(struct si_context *sctx, unsigned flags)
|
|||
* - render with DEPTH_BEFORE_SHADER=1
|
||||
* Flushing DB metadata works around the problem.
|
||||
*/
|
||||
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META;
|
||||
sctx->barrier_flags |= SI_BARRIER_EVENT_FLUSH_AND_INV_DB_META;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -507,7 +507,7 @@ static void si_blit_decompress_color(struct si_context *sctx, struct si_texture
|
|||
/* Required before and after FMASK and DCC_DECOMPRESS. */
|
||||
if (custom_blend == sctx->custom_blend_fmask_decompress ||
|
||||
custom_blend == sctx->custom_blend_dcc_decompress) {
|
||||
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
|
||||
|
|
@ -517,7 +517,7 @@ static void si_blit_decompress_color(struct si_context *sctx, struct si_texture
|
|||
|
||||
if (custom_blend == sctx->custom_blend_fmask_decompress ||
|
||||
custom_blend == sctx->custom_blend_dcc_decompress) {
|
||||
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
|
||||
|
|
@ -1070,7 +1070,7 @@ static void si_do_CB_resolve(struct si_context *sctx, const struct pipe_blit_inf
|
|||
enum pipe_format format)
|
||||
{
|
||||
/* Required before and after CB_RESOLVE. */
|
||||
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
|
||||
si_blitter_begin(
|
||||
|
|
|
|||
|
|
@ -67,11 +67,11 @@ void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,
|
|||
}
|
||||
|
||||
/* Invalidate the VMEM cache because we always use compute. */
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_VCACHE;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_VMEM;
|
||||
|
||||
/* GFX6-8: CB and DB don't use L2. */
|
||||
if (sctx->gfx_level <= GFX8)
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2;
|
||||
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
|
||||
|
|
@ -104,11 +104,11 @@ void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,
|
|||
}
|
||||
|
||||
/* Wait for idle. */
|
||||
sctx->barrier_flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_CS;
|
||||
|
||||
/* GFX6-8: CB and DB don't use L2. */
|
||||
if (sctx->gfx_level <= GFX8)
|
||||
sctx->barrier_flags |= SI_CONTEXT_WB_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_WB_L2;
|
||||
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
|
|
@ -1209,7 +1209,7 @@ static void gfx6_clear(struct pipe_context *ctx, unsigned buffers,
|
|||
if ((zstex->depth_clear_value[level] != 0) != (depth != 0)) {
|
||||
/* ZRANGE_PRECISION register of a bound surface will change so we
|
||||
* must flush the DB caches. */
|
||||
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_DB;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
/* Update DB_DEPTH_CLEAR. */
|
||||
|
|
@ -1245,7 +1245,7 @@ static void gfx6_clear(struct pipe_context *ctx, unsigned buffers,
|
|||
* The root cause is unknown.
|
||||
*/
|
||||
if (sctx->gfx_level == GFX11 || sctx->gfx_level == GFX11_5) {
|
||||
sctx->barrier_flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_VS;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1175,7 +1175,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
|
|||
info->block[0] * info->block[1] * info->block[2] > 256;
|
||||
|
||||
if (cs_regalloc_hang) {
|
||||
sctx->barrier_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
|
||||
|
|
@ -1214,7 +1214,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
|
|||
/* Indirect buffers are read through L2 on GFX9-GFX11, but not other hw. */
|
||||
if ((sctx->gfx_level <= GFX8 || sctx->gfx_level == GFX12) &&
|
||||
si_resource(info->indirect)->L2_cache_dirty) {
|
||||
sctx->barrier_flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
|
||||
sctx->barrier_flags |= SI_BARRIER_WB_L2 | SI_BARRIER_PFP_SYNC_ME;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
si_resource(info->indirect)->L2_cache_dirty = false;
|
||||
}
|
||||
|
|
@ -1309,7 +1309,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
|
|||
trace_si_end_compute(&sctx->trace, info->grid[0], info->grid[1], info->grid[2]);
|
||||
|
||||
if (cs_regalloc_hang) {
|
||||
sctx->barrier_flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_CS;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,9 +14,9 @@
|
|||
|
||||
static void si_compute_begin_internal(struct si_context *sctx, bool render_condition_enabled)
|
||||
{
|
||||
sctx->barrier_flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
|
||||
sctx->barrier_flags &= ~SI_BARRIER_EVENT_PIPELINESTAT_START;
|
||||
if (sctx->num_hw_pipestat_streamout_queries) {
|
||||
sctx->barrier_flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
|
||||
sctx->barrier_flags |= SI_BARRIER_EVENT_PIPELINESTAT_STOP;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
|
||||
|
|
@ -32,9 +32,9 @@ static void si_compute_begin_internal(struct si_context *sctx, bool render_condi
|
|||
|
||||
static void si_compute_end_internal(struct si_context *sctx)
|
||||
{
|
||||
sctx->barrier_flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
|
||||
sctx->barrier_flags &= ~SI_BARRIER_EVENT_PIPELINESTAT_STOP;
|
||||
if (sctx->num_hw_pipestat_streamout_queries) {
|
||||
sctx->barrier_flags |= SI_CONTEXT_START_PIPELINE_STATS;
|
||||
sctx->barrier_flags |= SI_BARRIER_EVENT_PIPELINESTAT_START;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
|
||||
|
|
@ -345,7 +345,7 @@ void si_retile_dcc(struct si_context *sctx, struct si_texture *tex)
|
|||
assert(sctx->gfx_level < GFX12);
|
||||
|
||||
/* Flush and wait for CB before retiling DCC. */
|
||||
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
|
||||
/* Set the DCC buffer. */
|
||||
|
|
|
|||
|
|
@ -151,7 +151,7 @@ void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs,
|
|||
assert(size && size % 4 == 0);
|
||||
|
||||
if (!cp_dma_use_L2(sctx)) {
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
|
||||
|
|
@ -234,7 +234,7 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,
|
|||
assert(dst && src);
|
||||
|
||||
if (!cp_dma_use_L2(sctx)) {
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1897,7 +1897,7 @@ static void si_upload_bindless_descriptors(struct si_context *sctx)
|
|||
/* Wait for graphics/compute to be idle before updating the resident
|
||||
* descriptors directly in memory, in case the GPU is using them.
|
||||
*/
|
||||
sctx->barrier_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS;
|
||||
si_emit_barrier_direct(sctx);
|
||||
|
||||
util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {
|
||||
|
|
@ -1921,11 +1921,11 @@ static void si_upload_bindless_descriptors(struct si_context *sctx)
|
|||
}
|
||||
|
||||
/* Invalidate scalar L0 because the cache doesn't know that L2 changed. */
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_SCACHE;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_SMEM;
|
||||
|
||||
/* TODO: Range-invalidate GL2 */
|
||||
if (sctx->screen->info.cp_sdma_ge_use_system_memory_scope)
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2;
|
||||
|
||||
sctx->bindless_descriptors_dirty = false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h
|
|||
struct radeon_cmdbuf *cs = &ctx->gfx_cs;
|
||||
struct radeon_winsys *ws = ctx->ws;
|
||||
struct si_screen *sscreen = ctx->screen;
|
||||
const unsigned wait_ps_cs = SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH;
|
||||
const unsigned wait_ps_cs = SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS;
|
||||
unsigned wait_flags = 0;
|
||||
|
||||
if (ctx->gfx_flush_in_progress)
|
||||
|
|
@ -146,7 +146,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h
|
|||
* and make this process guilty of hanging.
|
||||
*/
|
||||
if (ctx->gfx_level >= GFX12)
|
||||
wait_flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
|
||||
wait_flags |= SI_BARRIER_SYNC_VS;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -462,16 +462,16 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
|
|||
*
|
||||
* TODO: Do we also need to invalidate CB & DB caches?
|
||||
*/
|
||||
ctx->barrier_flags |= SI_CONTEXT_INV_L2;
|
||||
ctx->barrier_flags |= SI_BARRIER_INV_L2;
|
||||
if (ctx->gfx_level < GFX10)
|
||||
ctx->barrier_flags |= SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE;
|
||||
ctx->barrier_flags |= SI_BARRIER_INV_ICACHE | SI_BARRIER_INV_SMEM | SI_BARRIER_INV_VMEM;
|
||||
|
||||
/* Disable pipeline stats if there are no active queries. */
|
||||
ctx->barrier_flags &= ~SI_CONTEXT_START_PIPELINE_STATS & ~SI_CONTEXT_STOP_PIPELINE_STATS;
|
||||
ctx->barrier_flags &= ~SI_BARRIER_EVENT_PIPELINESTAT_START & ~SI_BARRIER_EVENT_PIPELINESTAT_STOP;
|
||||
if (ctx->num_hw_pipestat_streamout_queries)
|
||||
ctx->barrier_flags |= SI_CONTEXT_START_PIPELINE_STATS;
|
||||
ctx->barrier_flags |= SI_BARRIER_EVENT_PIPELINESTAT_START;
|
||||
else
|
||||
ctx->barrier_flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
|
||||
ctx->barrier_flags |= SI_BARRIER_EVENT_PIPELINESTAT_STOP;
|
||||
|
||||
ctx->pipeline_stats_enabled = -1; /* indicate that the current hw state is unknown */
|
||||
|
||||
|
|
@ -479,7 +479,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
|
|||
* When switching NGG->legacy, we need to flush VGT for certain hw generations.
|
||||
*/
|
||||
if (ctx->screen->info.has_vgt_flush_ngg_legacy_bug && !ctx->ngg)
|
||||
ctx->barrier_flags |= SI_CONTEXT_VGT_FLUSH;
|
||||
ctx->barrier_flags |= SI_BARRIER_EVENT_VGT_FLUSH;
|
||||
|
||||
si_mark_atom_dirty(ctx, &ctx->atoms.s.barrier);
|
||||
si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_ge_ring_state);
|
||||
|
|
|
|||
|
|
@ -47,41 +47,38 @@ struct ac_llvm_compiler;
|
|||
/* Alignment for optimal CP DMA performance. */
|
||||
#define SI_CPDMA_ALIGNMENT 32
|
||||
|
||||
/* Pipeline & streamout query controls. */
|
||||
#define SI_CONTEXT_START_PIPELINE_STATS (1 << 0)
|
||||
#define SI_CONTEXT_STOP_PIPELINE_STATS (1 << 1)
|
||||
/* gap */
|
||||
/* Pipeline & streamout query start/stop events. */
|
||||
#define SI_BARRIER_EVENT_PIPELINESTAT_START BITFIELD_BIT(0)
|
||||
#define SI_BARRIER_EVENT_PIPELINESTAT_STOP BITFIELD_BIT(1)
|
||||
/* Events only used by workarounds. These shouldn't be used for API barriers. */
|
||||
#define SI_BARRIER_EVENT_FLUSH_AND_INV_DB_META BITFIELD_BIT(2)
|
||||
#define SI_BARRIER_EVENT_VGT_FLUSH BITFIELD_BIT(3)
|
||||
/* PFP waits for ME to finish. Used to sync for index and indirect buffers and render condition. */
|
||||
#define SI_BARRIER_PFP_SYNC_ME BITFIELD_BIT(4)
|
||||
/* Instruction cache. */
|
||||
#define SI_CONTEXT_INV_ICACHE (1 << 3)
|
||||
/* Scalar cache. (GFX6-9: scalar L1; GFX10: scalar L0)
|
||||
#define SI_BARRIER_INV_ICACHE BITFIELD_BIT(5)
|
||||
/* Scalar cache. (GFX6-9: scalar L1; GFX10+: scalar L0)
|
||||
* GFX10: This also invalidates the L1 shader array cache. */
|
||||
#define SI_CONTEXT_INV_SCACHE (1 << 4)
|
||||
/* Vector cache. (GFX6-9: vector L1; GFX10: vector L0)
|
||||
#define SI_BARRIER_INV_SMEM BITFIELD_BIT(6)
|
||||
/* Vector cache. (GFX6-9: vector L1; GFX10+: vector L0)
|
||||
* GFX10: This also invalidates the L1 shader array cache. */
|
||||
#define SI_CONTEXT_INV_VCACHE (1 << 5)
|
||||
#define SI_BARRIER_INV_VMEM BITFIELD_BIT(7)
|
||||
/* L2 cache + L2 metadata cache writeback & invalidate.
|
||||
* GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
|
||||
#define SI_CONTEXT_INV_L2 (1 << 6)
|
||||
* GFX6-8: Used by shaders only. GFX9+: Used by everything. */
|
||||
#define SI_BARRIER_INV_L2 BITFIELD_BIT(8)
|
||||
/* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
|
||||
* Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
|
||||
* GFX6-7 will do complete invalidation, because the writeback is unsupported. */
|
||||
#define SI_CONTEXT_WB_L2 (1 << 7)
|
||||
/* Writeback & invalidate the L2 metadata cache only. It can only be coupled with
|
||||
* a CB or DB flush. */
|
||||
#define SI_CONTEXT_INV_L2_METADATA (1 << 8)
|
||||
* GFX6-7 will do complete invalidation because the writeback is unsupported. */
|
||||
#define SI_BARRIER_WB_L2 BITFIELD_BIT(9)
|
||||
/* Writeback & invalidate the L2 metadata cache only. */
|
||||
#define SI_BARRIER_INV_L2_METADATA BITFIELD_BIT(10)
|
||||
/* These wait for shaders to finish. (SYNC_VS = wait for the whole geometry pipeline to finish) */
|
||||
#define SI_BARRIER_SYNC_VS BITFIELD_BIT(11)
|
||||
#define SI_BARRIER_SYNC_PS BITFIELD_BIT(12)
|
||||
#define SI_BARRIER_SYNC_CS BITFIELD_BIT(13)
|
||||
/* Framebuffer caches. */
|
||||
#define SI_CONTEXT_FLUSH_AND_INV_DB (1 << 9)
|
||||
#define SI_CONTEXT_FLUSH_AND_INV_DB_META (1 << 10)
|
||||
#define SI_CONTEXT_FLUSH_AND_INV_CB (1 << 11)
|
||||
/* Engine synchronization. */
|
||||
#define SI_CONTEXT_VS_PARTIAL_FLUSH (1 << 12)
|
||||
#define SI_CONTEXT_PS_PARTIAL_FLUSH (1 << 13)
|
||||
#define SI_CONTEXT_CS_PARTIAL_FLUSH (1 << 14)
|
||||
#define SI_CONTEXT_VGT_FLUSH (1 << 15)
|
||||
/* gap */
|
||||
/* PFP waits for ME to finish. Used to sync for index and indirect buffers and render
|
||||
* condition. It's typically set when doing a VS/PS/CS partial flush for buffers. */
|
||||
#define SI_CONTEXT_PFP_SYNC_ME (1 << 17)
|
||||
#define SI_BARRIER_SYNC_AND_INV_DB BITFIELD_BIT(14)
|
||||
#define SI_BARRIER_SYNC_AND_INV_CB BITFIELD_BIT(15)
|
||||
|
||||
#define SI_PREFETCH_LS (1 << 1)
|
||||
#define SI_PREFETCH_HS (1 << 2)
|
||||
|
|
@ -1881,26 +1878,26 @@ static inline void si_saved_cs_reference(struct si_saved_cs **dst, struct si_sav
|
|||
static inline void si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples,
|
||||
bool shaders_read_metadata, bool dcc_pipe_aligned)
|
||||
{
|
||||
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_INV_VCACHE;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB | SI_BARRIER_INV_VMEM;
|
||||
sctx->force_shader_coherency.with_cb = false;
|
||||
|
||||
if (sctx->gfx_level >= GFX10 && sctx->gfx_level < GFX12) {
|
||||
if (sctx->screen->info.tcc_rb_non_coherent)
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2;
|
||||
else if (shaders_read_metadata)
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2_METADATA;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2_METADATA;
|
||||
} else if (sctx->gfx_level == GFX9) {
|
||||
/* Single-sample color is coherent with shaders on GFX9, but
|
||||
* L2 metadata must be flushed if shaders read metadata.
|
||||
* (DCC, CMASK).
|
||||
*/
|
||||
if (num_samples >= 2 || (shaders_read_metadata && !dcc_pipe_aligned))
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2;
|
||||
else if (shaders_read_metadata)
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2_METADATA;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2_METADATA;
|
||||
} else if (sctx->gfx_level <= GFX8) {
|
||||
/* GFX6-GFX8 */
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2;
|
||||
}
|
||||
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
|
|
@ -1909,26 +1906,26 @@ static inline void si_make_CB_shader_coherent(struct si_context *sctx, unsigned
|
|||
static inline void si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples,
|
||||
bool include_stencil, bool shaders_read_metadata)
|
||||
{
|
||||
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_INV_VCACHE;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_DB | SI_BARRIER_INV_VMEM;
|
||||
sctx->force_shader_coherency.with_db = false;
|
||||
|
||||
if (sctx->gfx_level >= GFX10 && sctx->gfx_level < GFX12) {
|
||||
if (sctx->screen->info.tcc_rb_non_coherent)
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2;
|
||||
else if (shaders_read_metadata)
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2_METADATA;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2_METADATA;
|
||||
} else if (sctx->gfx_level == GFX9) {
|
||||
/* Single-sample depth (not stencil) is coherent with shaders
|
||||
* on GFX9, but L2 metadata must be flushed if shaders read
|
||||
* metadata.
|
||||
*/
|
||||
if (num_samples >= 2 || include_stencil)
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2;
|
||||
else if (shaders_read_metadata)
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2_METADATA;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2_METADATA;
|
||||
} else if (sctx->gfx_level <= GFX8) {
|
||||
/* GFX6-GFX8 */
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2;
|
||||
}
|
||||
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
|
|
|
|||
|
|
@ -888,12 +888,12 @@ static void si_update_hw_pipeline_stats(struct si_context *sctx, unsigned type,
|
|||
|
||||
/* Enable/disable pipeline stats if we have any queries. */
|
||||
if (diff == 1 && sctx->num_hw_pipestat_streamout_queries == 1) {
|
||||
sctx->barrier_flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
|
||||
sctx->barrier_flags |= SI_CONTEXT_START_PIPELINE_STATS;
|
||||
sctx->barrier_flags &= ~SI_BARRIER_EVENT_PIPELINESTAT_STOP;
|
||||
sctx->barrier_flags |= SI_BARRIER_EVENT_PIPELINESTAT_START;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
} else if (diff == -1 && sctx->num_hw_pipestat_streamout_queries == 0) {
|
||||
sctx->barrier_flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
|
||||
sctx->barrier_flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
|
||||
sctx->barrier_flags &= ~SI_BARRIER_EVENT_PIPELINESTAT_START;
|
||||
sctx->barrier_flags |= SI_BARRIER_EVENT_PIPELINESTAT_STOP;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
}
|
||||
|
|
@ -1599,8 +1599,8 @@ static void si_query_hw_get_result_resource(struct si_context *sctx, struct si_q
|
|||
break;
|
||||
}
|
||||
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE |
|
||||
(sctx->gfx_level <= GFX8 ? SI_CONTEXT_INV_L2 : 0);
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_SMEM | SI_BARRIER_INV_VMEM |
|
||||
(sctx->gfx_level <= GFX8 ? SI_BARRIER_INV_L2 : 0);
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
|
||||
for (qbuf = &query->buffer; qbuf; qbuf = qbuf_prev) {
|
||||
|
|
@ -1698,7 +1698,7 @@ static void si_render_condition(struct pipe_context *ctx, struct pipe_query *que
|
|||
/* Settings this in the render cond atom is too late,
|
||||
* so set it here. */
|
||||
if (sctx->gfx_level <= GFX8) {
|
||||
sctx->barrier_flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
|
||||
sctx->barrier_flags |= SI_BARRIER_WB_L2 | SI_BARRIER_PFP_SYNC_ME;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -988,7 +988,7 @@ static void post_upload_binary(struct si_screen *sscreen, struct si_shader *shad
|
|||
si_cp_dma_copy_buffer(upload_ctx, &shader->bo->b.b, staging, 0, staging_offset,
|
||||
binary_size);
|
||||
si_barrier_after_simple_buffer_op(upload_ctx, 0, &shader->bo->b.b, staging);
|
||||
upload_ctx->barrier_flags |= SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_L2;
|
||||
upload_ctx->barrier_flags |= SI_BARRIER_INV_ICACHE | SI_BARRIER_INV_L2;
|
||||
|
||||
#if 0 /* debug: validate whether the copy was successful */
|
||||
uint32_t *dst_binary = malloc(binary_size);
|
||||
|
|
|
|||
|
|
@ -93,8 +93,8 @@ static void si_emit_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs,
|
|||
if (sctx->screen->info.has_sqtt_rb_harvest_bug) {
|
||||
/* Some chips with disabled RBs should wait for idle because FINISH_DONE
|
||||
* doesn't work. */
|
||||
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB |
|
||||
SI_CONTEXT_CS_PARTIAL_FLUSH;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB | SI_BARRIER_SYNC_AND_INV_DB |
|
||||
SI_BARRIER_SYNC_CS;
|
||||
sctx->emit_barrier(sctx, cs);
|
||||
}
|
||||
|
||||
|
|
@ -140,10 +140,10 @@ static void si_sqtt_start(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
si_cp_dma_wait_for_idle(sctx, cs);
|
||||
|
||||
/* Make sure to wait-for-idle before starting SQTT. */
|
||||
sctx->barrier_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
|
||||
SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE |
|
||||
SI_CONTEXT_INV_VCACHE | SI_CONTEXT_INV_L2 |
|
||||
SI_CONTEXT_PFP_SYNC_ME;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS |
|
||||
SI_BARRIER_INV_ICACHE | SI_BARRIER_INV_SMEM |
|
||||
SI_BARRIER_INV_VMEM | SI_BARRIER_INV_L2 |
|
||||
SI_BARRIER_PFP_SYNC_ME;
|
||||
sctx->emit_barrier(sctx, cs);
|
||||
|
||||
si_inhibit_clockgating(sctx, cs, true);
|
||||
|
|
@ -200,10 +200,10 @@ static void si_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
sctx->screen->info.never_send_perfcounter_stop);
|
||||
|
||||
/* Make sure to wait-for-idle before stopping SQTT. */
|
||||
sctx->barrier_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
|
||||
SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE |
|
||||
SI_CONTEXT_INV_VCACHE | SI_CONTEXT_INV_L2 |
|
||||
SI_CONTEXT_PFP_SYNC_ME;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS |
|
||||
SI_BARRIER_INV_ICACHE | SI_BARRIER_INV_SMEM |
|
||||
SI_BARRIER_INV_VMEM | SI_BARRIER_INV_L2 |
|
||||
SI_BARRIER_PFP_SYNC_ME;
|
||||
sctx->emit_barrier(sctx, cs);
|
||||
|
||||
si_emit_sqtt_stop(sctx, cs, ip_type);
|
||||
|
|
@ -620,30 +620,30 @@ void si_sqtt_describe_barrier_end(struct si_context *sctx, struct radeon_cmdbuf
|
|||
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END;
|
||||
marker.cb_id = 0;
|
||||
|
||||
if (flags & SI_CONTEXT_VS_PARTIAL_FLUSH)
|
||||
if (flags & SI_BARRIER_SYNC_VS)
|
||||
marker.vs_partial_flush = true;
|
||||
if (flags & SI_CONTEXT_PS_PARTIAL_FLUSH)
|
||||
if (flags & SI_BARRIER_SYNC_PS)
|
||||
marker.ps_partial_flush = true;
|
||||
if (flags & SI_CONTEXT_CS_PARTIAL_FLUSH)
|
||||
if (flags & SI_BARRIER_SYNC_CS)
|
||||
marker.cs_partial_flush = true;
|
||||
|
||||
if (flags & SI_CONTEXT_PFP_SYNC_ME)
|
||||
if (flags & SI_BARRIER_PFP_SYNC_ME)
|
||||
marker.pfp_sync_me = true;
|
||||
|
||||
if (flags & SI_CONTEXT_INV_VCACHE)
|
||||
if (flags & SI_BARRIER_INV_VMEM)
|
||||
marker.inval_tcp = true;
|
||||
if (flags & SI_CONTEXT_INV_ICACHE)
|
||||
if (flags & SI_BARRIER_INV_ICACHE)
|
||||
marker.inval_sqI = true;
|
||||
if (flags & SI_CONTEXT_INV_SCACHE)
|
||||
if (flags & SI_BARRIER_INV_SMEM)
|
||||
marker.inval_sqK = true;
|
||||
if (flags & SI_CONTEXT_INV_L2)
|
||||
if (flags & SI_BARRIER_INV_L2)
|
||||
marker.inval_tcc = true;
|
||||
|
||||
if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
|
||||
if (flags & SI_BARRIER_SYNC_AND_INV_CB) {
|
||||
marker.inval_cb = true;
|
||||
marker.flush_cb = true;
|
||||
}
|
||||
if (flags & SI_CONTEXT_FLUSH_AND_INV_DB) {
|
||||
if (flags & SI_BARRIER_SYNC_AND_INV_DB) {
|
||||
marker.inval_db = true;
|
||||
marker.flush_db = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1756,14 +1756,14 @@ static void si_set_active_query_state(struct pipe_context *ctx, bool enable)
|
|||
if (enable) {
|
||||
/* Disable pipeline stats if there are no active queries. */
|
||||
if (sctx->num_hw_pipestat_streamout_queries) {
|
||||
sctx->barrier_flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
|
||||
sctx->barrier_flags |= SI_CONTEXT_START_PIPELINE_STATS;
|
||||
sctx->barrier_flags &= ~SI_BARRIER_EVENT_PIPELINESTAT_STOP;
|
||||
sctx->barrier_flags |= SI_BARRIER_EVENT_PIPELINESTAT_START;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
} else {
|
||||
if (sctx->num_hw_pipestat_streamout_queries) {
|
||||
sctx->barrier_flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
|
||||
sctx->barrier_flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
|
||||
sctx->barrier_flags &= ~SI_BARRIER_EVENT_PIPELINESTAT_START;
|
||||
sctx->barrier_flags |= SI_BARRIER_EVENT_PIPELINESTAT_STOP;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -907,7 +907,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
|
|||
instance_count, 2, sctx->patch_vertices)) {
|
||||
/* The cache flushes should have been emitted already. */
|
||||
assert(sctx->barrier_flags == 0);
|
||||
sctx->barrier_flags = SI_CONTEXT_VGT_FLUSH;
|
||||
sctx->barrier_flags = SI_BARRIER_EVENT_VGT_FLUSH;
|
||||
si_emit_barrier_direct(sctx);
|
||||
}
|
||||
}
|
||||
|
|
@ -2121,7 +2121,7 @@ static void si_draw(struct pipe_context *ctx,
|
|||
index_size = 2;
|
||||
|
||||
/* GFX6-7 don't read index buffers through L2. */
|
||||
sctx->barrier_flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
|
||||
sctx->barrier_flags |= SI_BARRIER_WB_L2 | SI_BARRIER_PFP_SYNC_ME;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
si_resource(indexbuf)->L2_cache_dirty = false;
|
||||
} else if (!IS_DRAW_VERTEX_STATE && info->has_user_indices) {
|
||||
|
|
@ -2144,7 +2144,7 @@ static void si_draw(struct pipe_context *ctx,
|
|||
si_resource(indexbuf)->L2_cache_dirty) {
|
||||
/* GFX8-GFX11 reads index buffers through L2, so it doesn't
|
||||
* need this. */
|
||||
sctx->barrier_flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
|
||||
sctx->barrier_flags |= SI_BARRIER_WB_L2 | SI_BARRIER_PFP_SYNC_ME;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
si_resource(indexbuf)->L2_cache_dirty = false;
|
||||
}
|
||||
|
|
@ -2157,14 +2157,14 @@ static void si_draw(struct pipe_context *ctx,
|
|||
/* Indirect buffers use L2 on GFX9-GFX11, but not other hw. */
|
||||
if (GFX_VERSION <= GFX8 || GFX_VERSION == GFX12) {
|
||||
if (indirect->buffer && si_resource(indirect->buffer)->L2_cache_dirty) {
|
||||
sctx->barrier_flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
|
||||
sctx->barrier_flags |= SI_BARRIER_WB_L2 | SI_BARRIER_PFP_SYNC_ME;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
si_resource(indirect->buffer)->L2_cache_dirty = false;
|
||||
}
|
||||
|
||||
if (indirect->indirect_draw_count &&
|
||||
si_resource(indirect->indirect_draw_count)->L2_cache_dirty) {
|
||||
sctx->barrier_flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
|
||||
sctx->barrier_flags |= SI_BARRIER_WB_L2 | SI_BARRIER_PFP_SYNC_ME;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
si_resource(indirect->indirect_draw_count)->L2_cache_dirty = false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3769,7 +3769,7 @@ bool si_update_ngg(struct si_context *sctx)
|
|||
* pointers are set.
|
||||
*/
|
||||
if (sctx->screen->info.has_vgt_flush_ngg_legacy_bug && !new_ngg) {
|
||||
sctx->barrier_flags |= SI_CONTEXT_VGT_FLUSH;
|
||||
sctx->barrier_flags |= SI_BARRIER_EVENT_VGT_FLUSH;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
|
||||
if (sctx->gfx_level == GFX10) {
|
||||
|
|
|
|||
|
|
@ -98,12 +98,12 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ
|
|||
* VS_PARTIAL_FLUSH is required if the buffers are going to be
|
||||
* used as an input immediately.
|
||||
*/
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE |
|
||||
SI_CONTEXT_VS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_SMEM | SI_BARRIER_INV_VMEM |
|
||||
SI_BARRIER_SYNC_VS | SI_BARRIER_PFP_SYNC_ME;
|
||||
|
||||
/* Make the streamout state buffer available to the CP for resuming and DrawTF. */
|
||||
if (sctx->screen->info.cp_sdma_ge_use_system_memory_scope)
|
||||
sctx->barrier_flags |= SI_CONTEXT_WB_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_WB_L2;
|
||||
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
}
|
||||
|
|
@ -227,8 +227,8 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ
|
|||
/* All readers of the streamout targets need to be finished before we can
|
||||
* start writing to them.
|
||||
*/
|
||||
sctx->barrier_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
|
||||
SI_CONTEXT_PFP_SYNC_ME;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS |
|
||||
SI_BARRIER_PFP_SYNC_ME;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
} else {
|
||||
si_set_atom_dirty(sctx, &sctx->atoms.s.streamout_begin, false);
|
||||
|
|
@ -371,7 +371,7 @@ void si_emit_streamout_end(struct si_context *sctx)
|
|||
|
||||
if (sctx->gfx_level >= GFX11) {
|
||||
/* Wait for streamout to finish before reading GDS_STRMOUT registers. */
|
||||
sctx->barrier_flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_VS;
|
||||
si_emit_barrier_direct(sctx);
|
||||
} else {
|
||||
si_flush_vgt_streamout(sctx);
|
||||
|
|
@ -387,7 +387,7 @@ void si_emit_streamout_end(struct si_context *sctx)
|
|||
COPY_DATA_REG, NULL,
|
||||
(R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
|
||||
/* For DrawTF reading buf_filled_size: */
|
||||
sctx->barrier_flags |= SI_CONTEXT_PFP_SYNC_ME;
|
||||
sctx->barrier_flags |= SI_BARRIER_PFP_SYNC_ME;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
|
||||
} else {
|
||||
uint64_t va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset;
|
||||
|
|
|
|||
|
|
@ -537,7 +537,7 @@ void si_test_blit_perf(struct si_screen *sscreen)
|
|||
case METHOD_DEFAULT:
|
||||
if (test_flavor == TEST_FB_CLEAR) {
|
||||
ctx->clear(ctx, PIPE_CLEAR_COLOR, NULL, clear_color, 0, 0);
|
||||
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_INV_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB | SI_BARRIER_INV_L2;
|
||||
} else {
|
||||
ctx->clear_render_target(ctx, dst_surf, clear_color,
|
||||
dst_box.x, dst_box.y,
|
||||
|
|
@ -640,10 +640,10 @@ void si_test_blit_perf(struct si_screen *sscreen)
|
|||
}
|
||||
|
||||
/* Wait for idle after all tests. */
|
||||
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
|
||||
SI_CONTEXT_CS_PARTIAL_FLUSH |
|
||||
SI_CONTEXT_INV_L2 | SI_CONTEXT_INV_SCACHE |
|
||||
SI_CONTEXT_INV_VCACHE;
|
||||
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB |
|
||||
SI_BARRIER_SYNC_CS |
|
||||
SI_BARRIER_INV_L2 | SI_BARRIER_INV_SMEM |
|
||||
SI_BARRIER_INV_VMEM;
|
||||
si_emit_barrier_direct(sctx);
|
||||
|
||||
ctx->end_query(ctx, q);
|
||||
|
|
|
|||
|
|
@ -257,7 +257,7 @@ void si_test_dma_perf(struct si_screen *sscreen)
|
|||
si_barrier_after_simple_buffer_op(sctx, 0, dst, src);
|
||||
}
|
||||
|
||||
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
|
||||
sctx->barrier_flags |= SI_BARRIER_INV_L2;
|
||||
}
|
||||
|
||||
ctx->end_query(ctx, q);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue