radeonsi: rename SI_CONTEXT_* flags to SI_BARRIER_* flags

some of the definition names are changed completely

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31193>
This commit is contained in:
Marek Olšák 2024-08-23 20:38:40 -04:00 committed by Marge Bot
parent 6f25ace87e
commit ce72376641
19 changed files with 203 additions and 206 deletions

View file

@ -348,7 +348,7 @@ static void gfx11_sh_query_get_result_resource(struct si_context *sctx, struct s
/* TODO: Range-invalidate GL2 */
if (sctx->screen->info.cp_sdma_ge_use_system_memory_scope) {
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
sctx->barrier_flags |= SI_BARRIER_INV_L2;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}

View file

@ -38,19 +38,19 @@ static void prepare_cb_db_flushes(struct si_context *ctx, unsigned *flags)
/* Don't flush CB and DB if there have been no draw calls. */
if (ctx->num_draw_calls == ctx->last_cb_flush_num_draw_calls &&
ctx->num_decompress_calls == ctx->last_cb_flush_num_decompress_calls)
*flags &= ~SI_CONTEXT_FLUSH_AND_INV_CB;
*flags &= ~SI_BARRIER_SYNC_AND_INV_CB;
if (ctx->num_draw_calls == ctx->last_db_flush_num_draw_calls &&
ctx->num_decompress_calls == ctx->last_db_flush_num_decompress_calls)
*flags &= ~SI_CONTEXT_FLUSH_AND_INV_DB;
*flags &= ~SI_BARRIER_SYNC_AND_INV_DB;
/* Track the last flush. */
if (*flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
if (*flags & SI_BARRIER_SYNC_AND_INV_CB) {
ctx->num_cb_cache_flushes++;
ctx->last_cb_flush_num_draw_calls = ctx->num_draw_calls;
ctx->last_cb_flush_num_decompress_calls = ctx->num_decompress_calls;
}
if (*flags & SI_CONTEXT_FLUSH_AND_INV_DB) {
if (*flags & SI_BARRIER_SYNC_AND_INV_DB) {
ctx->num_db_cache_flushes++;
ctx->last_db_flush_num_draw_calls = ctx->num_draw_calls;
ctx->last_db_flush_num_decompress_calls = ctx->num_decompress_calls;
@ -68,30 +68,30 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
if (!ctx->has_graphics) {
/* Only process compute flags. */
flags &= SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE |
SI_CONTEXT_INV_L2 | SI_CONTEXT_WB_L2 | SI_CONTEXT_INV_L2_METADATA |
SI_CONTEXT_CS_PARTIAL_FLUSH;
flags &= SI_BARRIER_INV_ICACHE | SI_BARRIER_INV_SMEM | SI_BARRIER_INV_VMEM |
SI_BARRIER_INV_L2 | SI_BARRIER_WB_L2 | SI_BARRIER_INV_L2_METADATA |
SI_BARRIER_SYNC_CS;
}
/* We don't need these. */
assert(!(flags & SI_CONTEXT_FLUSH_AND_INV_DB_META));
assert(!(flags & SI_BARRIER_EVENT_FLUSH_AND_INV_DB_META));
prepare_cb_db_flushes(ctx, &flags);
radeon_begin(cs);
if (flags & SI_CONTEXT_VGT_FLUSH)
if (flags & SI_BARRIER_EVENT_VGT_FLUSH)
radeon_event_write(V_028A90_VGT_FLUSH);
if (flags & SI_CONTEXT_INV_ICACHE)
if (flags & SI_BARRIER_INV_ICACHE)
gcr_cntl |= S_586_GLI_INV(V_586_GLI_ALL);
if (flags & SI_CONTEXT_INV_SCACHE) {
if (flags & SI_BARRIER_INV_SMEM) {
/* TODO: When writing to the SMEM L1 cache, we need to set SEQ
* to FORWARD when both L1 and L2 are written out (WB or INV).
*/
gcr_cntl |= S_586_GL1_INV(1) | S_586_GLK_INV(1);
}
if (flags & SI_CONTEXT_INV_VCACHE)
if (flags & SI_BARRIER_INV_VMEM)
gcr_cntl |= S_586_GL1_INV(1) | S_586_GLV_INV(1);
/* The L2 cache ops are:
@ -104,39 +104,39 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
*
* GLM doesn't support WB alone. If WB is set, INV must be set too.
*/
if (flags & SI_CONTEXT_INV_L2) {
if (flags & SI_BARRIER_INV_L2) {
/* Writeback and invalidate everything in L2. */
gcr_cntl |= S_586_GL2_INV(1) | S_586_GL2_WB(1) |
(ctx->gfx_level < GFX12 ? S_586_GLM_INV(1) | S_586_GLM_WB(1) : 0);
ctx->num_L2_invalidates++;
} else if (flags & SI_CONTEXT_WB_L2) {
} else if (flags & SI_BARRIER_WB_L2) {
gcr_cntl |= S_586_GL2_WB(1) |
(ctx->gfx_level < GFX12 ? S_586_GLM_WB(1) | S_586_GLM_INV(1) : 0);
} else if (flags & SI_CONTEXT_INV_L2_METADATA) {
} else if (flags & SI_BARRIER_INV_L2_METADATA) {
assert(ctx->gfx_level < GFX12);
gcr_cntl |= S_586_GLM_INV(1) | S_586_GLM_WB(1);
}
if (flags & (SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB)) {
if (flags & (SI_BARRIER_SYNC_AND_INV_CB | SI_BARRIER_SYNC_AND_INV_DB)) {
/* Flush CMASK/FMASK/DCC. Will wait for idle later. */
if (ctx->gfx_level < GFX12 && flags & SI_CONTEXT_FLUSH_AND_INV_CB)
if (ctx->gfx_level < GFX12 && flags & SI_BARRIER_SYNC_AND_INV_CB)
radeon_event_write(V_028A90_FLUSH_AND_INV_CB_META);
/* Gfx11 can't flush DB_META and should use a TS event instead. */
/* Flush HTILE. Will wait for idle later. */
if (ctx->gfx_level < GFX12 && ctx->gfx_level != GFX11 &&
flags & SI_CONTEXT_FLUSH_AND_INV_DB)
flags & SI_BARRIER_SYNC_AND_INV_DB)
radeon_event_write(V_028A90_FLUSH_AND_INV_DB_META);
/* First flush CB/DB, then L1/L2. */
gcr_cntl |= S_586_SEQ(V_586_SEQ_FORWARD);
if ((flags & (SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB)) ==
(SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB)) {
if ((flags & (SI_BARRIER_SYNC_AND_INV_CB | SI_BARRIER_SYNC_AND_INV_DB)) ==
(SI_BARRIER_SYNC_AND_INV_CB | SI_BARRIER_SYNC_AND_INV_DB)) {
cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
} else if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
} else if (flags & SI_BARRIER_SYNC_AND_INV_CB) {
cb_db_event = V_028A90_FLUSH_AND_INV_CB_DATA_TS;
} else if (flags & SI_CONTEXT_FLUSH_AND_INV_DB) {
} else if (flags & SI_BARRIER_SYNC_AND_INV_DB) {
if (ctx->gfx_level == GFX11)
cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
else
@ -146,18 +146,18 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
}
} else {
/* Wait for graphics shaders to go idle if requested. */
if (flags & SI_CONTEXT_PS_PARTIAL_FLUSH) {
if (flags & SI_BARRIER_SYNC_PS) {
radeon_event_write(V_028A90_PS_PARTIAL_FLUSH);
/* Only count explicit shader flushes, not implicit ones. */
ctx->num_vs_flushes++;
ctx->num_ps_flushes++;
} else if (flags & SI_CONTEXT_VS_PARTIAL_FLUSH) {
} else if (flags & SI_BARRIER_SYNC_VS) {
radeon_event_write(V_028A90_VS_PARTIAL_FLUSH);
ctx->num_vs_flushes++;
}
}
if (flags & SI_CONTEXT_CS_PARTIAL_FLUSH && ctx->compute_is_busy) {
if (flags & SI_BARRIER_SYNC_CS && ctx->compute_is_busy) {
radeon_event_write(V_028A90_CS_PARTIAL_FLUSH);
ctx->num_cs_flushes++;
ctx->compute_is_busy = false;
@ -170,13 +170,13 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
/* Wait for the event and invalidate remaining caches if needed. */
si_cp_acquire_mem_pws(ctx, cs, cb_db_event,
flags & SI_CONTEXT_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME,
flags & SI_BARRIER_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME,
gcr_cntl & ~C_586_GLI_INV, /* keep only GLI_INV */
0, flags);
gcr_cntl = 0; /* all done */
/* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */
flags &= ~SI_CONTEXT_PFP_SYNC_ME;
flags &= ~SI_BARRIER_PFP_SYNC_ME;
} else {
/* GFX10 */
struct si_resource *wait_mem_scratch =
@ -226,16 +226,16 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
/* Ignore fields that only modify the behavior of other fields. */
if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) {
si_cp_acquire_mem(ctx, cs, gcr_cntl,
flags & SI_CONTEXT_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME);
} else if (flags & SI_CONTEXT_PFP_SYNC_ME) {
flags & SI_BARRIER_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME);
} else if (flags & SI_BARRIER_PFP_SYNC_ME) {
si_cp_pfp_sync_me(cs);
}
radeon_begin_again(cs);
if (flags & SI_CONTEXT_START_PIPELINE_STATS && ctx->pipeline_stats_enabled != 1) {
if (flags & SI_BARRIER_EVENT_PIPELINESTAT_START && ctx->pipeline_stats_enabled != 1) {
radeon_event_write(V_028A90_PIPELINESTAT_START);
ctx->pipeline_stats_enabled = 1;
} else if (flags & SI_CONTEXT_STOP_PIPELINE_STATS && ctx->pipeline_stats_enabled != 0) {
} else if (flags & SI_BARRIER_EVENT_PIPELINESTAT_STOP && ctx->pipeline_stats_enabled != 0) {
radeon_event_write(V_028A90_PIPELINESTAT_STOP);
ctx->pipeline_stats_enabled = 0;
}
@ -253,13 +253,13 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
if (!sctx->has_graphics) {
/* Only process compute flags. */
flags &= SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE |
SI_CONTEXT_INV_L2 | SI_CONTEXT_WB_L2 | SI_CONTEXT_INV_L2_METADATA |
SI_CONTEXT_CS_PARTIAL_FLUSH;
flags &= SI_BARRIER_INV_ICACHE | SI_BARRIER_INV_SMEM | SI_BARRIER_INV_VMEM |
SI_BARRIER_INV_L2 | SI_BARRIER_WB_L2 | SI_BARRIER_INV_L2_METADATA |
SI_BARRIER_SYNC_CS;
}
uint32_t cp_coher_cntl = 0;
const uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB);
const uint32_t flush_cb_db = flags & (SI_BARRIER_SYNC_AND_INV_CB | SI_BARRIER_SYNC_AND_INV_DB);
assert(sctx->gfx_level <= GFX9);
@ -273,13 +273,13 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
* to add a workaround for it.
*/
if (flags & SI_CONTEXT_INV_ICACHE)
if (flags & SI_BARRIER_INV_ICACHE)
cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
if (flags & SI_CONTEXT_INV_SCACHE)
if (flags & SI_BARRIER_INV_SMEM)
cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
if (sctx->gfx_level <= GFX8) {
if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
if (flags & SI_BARRIER_SYNC_AND_INV_CB) {
cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) |
S_0085F0_CB1_DEST_BASE_ENA(1) | S_0085F0_CB2_DEST_BASE_ENA(1) |
S_0085F0_CB3_DEST_BASE_ENA(1) | S_0085F0_CB4_DEST_BASE_ENA(1) |
@ -291,18 +291,18 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
si_cp_release_mem(sctx, cs, V_028A90_FLUSH_AND_INV_CB_DATA_TS, 0, EOP_DST_SEL_MEM,
EOP_INT_SEL_NONE, EOP_DATA_SEL_DISCARD, NULL, 0, 0, SI_NOT_QUERY);
}
if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
if (flags & SI_BARRIER_SYNC_AND_INV_DB)
cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1);
}
radeon_begin(cs);
/* Flush CMASK/FMASK/DCC. SURFACE_SYNC will wait for idle. */
if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
if (flags & SI_BARRIER_SYNC_AND_INV_CB)
radeon_event_write(V_028A90_FLUSH_AND_INV_CB_META);
/* Flush HTILE. SURFACE_SYNC will wait for idle. */
if (flags & (SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_FLUSH_AND_INV_DB_META))
if (flags & (SI_BARRIER_SYNC_AND_INV_DB | SI_BARRIER_EVENT_FLUSH_AND_INV_DB_META))
radeon_event_write(V_028A90_FLUSH_AND_INV_DB_META);
/* Wait for shader engines to go idle.
@ -316,25 +316,25 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
* bindings.
*/
if (sctx->gfx_level <= GFX8 || !flush_cb_db) {
if (flags & SI_CONTEXT_PS_PARTIAL_FLUSH) {
if (flags & SI_BARRIER_SYNC_PS) {
radeon_event_write(V_028A90_PS_PARTIAL_FLUSH);
/* Only count explicit shader flushes, not implicit ones done by SURFACE_SYNC. */
sctx->num_vs_flushes++;
sctx->num_ps_flushes++;
} else if (flags & SI_CONTEXT_VS_PARTIAL_FLUSH) {
} else if (flags & SI_BARRIER_SYNC_VS) {
radeon_event_write(V_028A90_VS_PARTIAL_FLUSH);
sctx->num_vs_flushes++;
}
}
if (flags & SI_CONTEXT_CS_PARTIAL_FLUSH && sctx->compute_is_busy) {
if (flags & SI_BARRIER_SYNC_CS && sctx->compute_is_busy) {
radeon_event_write(V_028A90_CS_PARTIAL_FLUSH);
sctx->num_cs_flushes++;
sctx->compute_is_busy = false;
}
/* VGT state synchronization. */
if (flags & SI_CONTEXT_VGT_FLUSH)
if (flags & SI_BARRIER_EVENT_VGT_FLUSH)
radeon_event_write(V_028A90_VGT_FLUSH);
radeon_end();
@ -348,10 +348,10 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
/* Set the CB/DB flush event. */
switch (flush_cb_db) {
case SI_CONTEXT_FLUSH_AND_INV_CB:
case SI_BARRIER_SYNC_AND_INV_CB:
cb_db_event = V_028A90_FLUSH_AND_INV_CB_DATA_TS;
break;
case SI_CONTEXT_FLUSH_AND_INV_DB:
case SI_BARRIER_SYNC_AND_INV_DB:
cb_db_event = V_028A90_FLUSH_AND_INV_DB_DATA_TS;
break;
default:
@ -373,17 +373,17 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
*/
tc_flags = 0;
if (flags & SI_CONTEXT_INV_L2_METADATA) {
if (flags & SI_BARRIER_INV_L2_METADATA) {
tc_flags = EVENT_TC_ACTION_ENA | EVENT_TC_MD_ACTION_ENA;
}
/* Ideally flush L2 together with CB/DB. */
if (flags & SI_CONTEXT_INV_L2) {
if (flags & SI_BARRIER_INV_L2) {
/* Writeback and invalidate everything in L2 & L1. */
tc_flags = EVENT_TC_ACTION_ENA | EVENT_TC_WB_ACTION_ENA;
/* Clear the flags. */
flags &= ~(SI_CONTEXT_INV_L2 | SI_CONTEXT_WB_L2);
flags &= ~(SI_BARRIER_INV_L2 | SI_BARRIER_WB_L2);
sctx->num_L2_invalidates++;
}
@ -416,9 +416,9 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
*
* GFX6-GFX7 don't support L2 write-back.
*/
unsigned engine = flags & SI_CONTEXT_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME;
unsigned engine = flags & SI_BARRIER_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME;
if (flags & SI_CONTEXT_INV_L2 || (sctx->gfx_level <= GFX7 && flags & SI_CONTEXT_WB_L2)) {
if (flags & SI_BARRIER_INV_L2 || (sctx->gfx_level <= GFX7 && flags & SI_BARRIER_WB_L2)) {
/* Invalidate L1 & L2. WB must be set on GFX8+ when TC_ACTION is set. */
si_cp_acquire_mem(sctx, cs,
cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) |
@ -428,7 +428,7 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
/* L1 invalidation and L2 writeback must be done separately, because both operations can't
* be done together.
*/
if (flags & SI_CONTEXT_WB_L2) {
if (flags & SI_BARRIER_WB_L2) {
/* WB = write-back
* NC = apply to non-coherent MTYPEs
* (i.e. MTYPE <= 1, which is what we use everywhere)
@ -438,7 +438,7 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
* If we get here, the only flag that can't be executed together with WB_L2 is VMEM cache
* invalidation.
*/
bool last_acquire_mem = !(flags & SI_CONTEXT_INV_VCACHE);
bool last_acquire_mem = !(flags & SI_BARRIER_INV_VMEM);
si_cp_acquire_mem(sctx, cs,
cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) |
@ -448,33 +448,33 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
last_acquire_mem ? engine : V_580_CP_ME);
if (last_acquire_mem)
flags &= ~SI_CONTEXT_PFP_SYNC_ME;
flags &= ~SI_BARRIER_PFP_SYNC_ME;
cp_coher_cntl = 0;
sctx->num_L2_writebacks++;
}
if (flags & SI_CONTEXT_INV_VCACHE)
if (flags & SI_BARRIER_INV_VMEM)
cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
/* If there are still some cache flags left... */
if (cp_coher_cntl) {
si_cp_acquire_mem(sctx, cs, cp_coher_cntl, engine);
flags &= ~SI_CONTEXT_PFP_SYNC_ME;
flags &= ~SI_BARRIER_PFP_SYNC_ME;
}
/* This might be needed even without any cache flags, such as when doing buffer stores
* to an index buffer.
*/
if (flags & SI_CONTEXT_PFP_SYNC_ME)
if (flags & SI_BARRIER_PFP_SYNC_ME)
si_cp_pfp_sync_me(cs);
}
if (flags & SI_CONTEXT_START_PIPELINE_STATS && sctx->pipeline_stats_enabled != 1) {
if (flags & SI_BARRIER_EVENT_PIPELINESTAT_START && sctx->pipeline_stats_enabled != 1) {
radeon_begin(cs);
radeon_event_write(V_028A90_PIPELINESTAT_START);
radeon_end();
sctx->pipeline_stats_enabled = 1;
} else if (flags & SI_CONTEXT_STOP_PIPELINE_STATS && sctx->pipeline_stats_enabled != 0) {
} else if (flags & SI_BARRIER_EVENT_PIPELINESTAT_STOP && sctx->pipeline_stats_enabled != 0) {
radeon_begin(cs);
radeon_event_write(V_028A90_PIPELINESTAT_STOP);
radeon_end();
@ -533,12 +533,12 @@ void si_barrier_before_internal_op(struct si_context *sctx, unsigned flags,
if (!si_is_buffer_idle(sctx, buf, RADEON_USAGE_WRITE |
(writable_buffers_mask & BITFIELD_BIT(i) ? RADEON_USAGE_READ : 0))) {
if (buf->bind_history & ps_mask)
sctx->barrier_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
sctx->barrier_flags |= SI_BARRIER_SYNC_PS;
else
sctx->barrier_flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
sctx->barrier_flags |= SI_BARRIER_SYNC_VS;
if (buf->bind_history & cs_mask)
sctx->barrier_flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
sctx->barrier_flags |= SI_BARRIER_SYNC_CS;
}
}
@ -553,12 +553,12 @@ void si_barrier_before_internal_op(struct si_context *sctx, unsigned flags,
if (!si_is_buffer_idle(sctx, img, RADEON_USAGE_WRITE | (writable ? RADEON_USAGE_READ : 0))) {
si_make_CB_shader_coherent(sctx, images[i].resource->nr_samples, true,
((struct si_texture*)images[i].resource)->surface.u.gfx9.color.dcc.pipe_aligned);
sctx->barrier_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH;
sctx->barrier_flags |= SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS;
}
}
/* Invalidate the VMEM cache only. The SMEM cache isn't used by shader buffers. */
sctx->barrier_flags |= SI_CONTEXT_INV_VCACHE;
sctx->barrier_flags |= SI_BARRIER_INV_VMEM;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
@ -569,18 +569,18 @@ void si_barrier_after_internal_op(struct si_context *sctx, unsigned flags,
unsigned num_images,
const struct pipe_image_view *images)
{
sctx->barrier_flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
sctx->barrier_flags |= SI_BARRIER_SYNC_CS;
if (num_images) {
/* Make sure image stores are visible to CB, which doesn't use L2 on GFX6-8. */
sctx->barrier_flags |= sctx->gfx_level <= GFX8 ? SI_CONTEXT_WB_L2 : 0;
sctx->barrier_flags |= sctx->gfx_level <= GFX8 ? SI_BARRIER_WB_L2 : 0;
/* Make sure image stores are visible to all CUs. */
sctx->barrier_flags |= SI_CONTEXT_INV_VCACHE;
sctx->barrier_flags |= SI_BARRIER_INV_VMEM;
}
/* Make sure buffer stores are visible to all CUs and also as index/indirect buffers. */
if (num_buffers)
sctx->barrier_flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE | SI_CONTEXT_PFP_SYNC_ME;
sctx->barrier_flags |= SI_BARRIER_INV_SMEM | SI_BARRIER_INV_VMEM | SI_BARRIER_PFP_SYNC_ME;
/* We must set L2_cache_dirty for buffers because:
* - GFX6,12: CP DMA doesn't use L2.
@ -600,7 +600,7 @@ void si_barrier_after_internal_op(struct si_context *sctx, unsigned flags,
images[i].access & PIPE_IMAGE_ACCESS_WRITE &&
(sctx->screen->always_allow_dcc_stores ||
images[i].access & SI_IMAGE_ACCESS_ALLOW_DCC_STORE)) {
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
sctx->barrier_flags |= SI_BARRIER_INV_L2;
break;
}
}
@ -659,10 +659,10 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
if (!flags)
return;
sctx->barrier_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH;
sctx->barrier_flags |= SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS;
if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
sctx->barrier_flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE;
sctx->barrier_flags |= SI_BARRIER_INV_SMEM | SI_BARRIER_INV_VMEM;
/* VMEM cache contents are written back to L2 automatically at the end of waves, but
* the contents of other VMEM caches might still be stale.
@ -671,38 +671,38 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
*/
if (flags & (PIPE_BARRIER_VERTEX_BUFFER | PIPE_BARRIER_SHADER_BUFFER | PIPE_BARRIER_TEXTURE |
PIPE_BARRIER_IMAGE | PIPE_BARRIER_STREAMOUT_BUFFER | PIPE_BARRIER_GLOBAL_BUFFER))
sctx->barrier_flags |= SI_CONTEXT_INV_VCACHE;
sctx->barrier_flags |= SI_BARRIER_INV_VMEM;
if (flags & (PIPE_BARRIER_INDEX_BUFFER | PIPE_BARRIER_INDIRECT_BUFFER))
sctx->barrier_flags |= SI_CONTEXT_PFP_SYNC_ME;
sctx->barrier_flags |= SI_BARRIER_PFP_SYNC_ME;
/* Index buffers use L2 since GFX8 */
if (flags & PIPE_BARRIER_INDEX_BUFFER &&
(sctx->gfx_level <= GFX7 || sctx->screen->info.cp_sdma_ge_use_system_memory_scope))
sctx->barrier_flags |= SI_CONTEXT_WB_L2;
sctx->barrier_flags |= SI_BARRIER_WB_L2;
/* Indirect buffers use L2 since GFX9. */
if (flags & PIPE_BARRIER_INDIRECT_BUFFER &&
(sctx->gfx_level <= GFX8 || sctx->screen->info.cp_sdma_ge_use_system_memory_scope))
sctx->barrier_flags |= SI_CONTEXT_WB_L2;
sctx->barrier_flags |= SI_BARRIER_WB_L2;
/* MSAA color images are flushed in si_decompress_textures when needed.
* Shaders never write to depth/stencil images.
*/
if (flags & PIPE_BARRIER_FRAMEBUFFER && sctx->framebuffer.uncompressed_cb_mask) {
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB;
if (sctx->gfx_level >= GFX10 && sctx->gfx_level < GFX12) {
if (sctx->screen->info.tcc_rb_non_coherent)
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
sctx->barrier_flags |= SI_BARRIER_INV_L2;
else /* We don't know which shaders do image stores with DCC: */
sctx->barrier_flags |= SI_CONTEXT_INV_L2_METADATA;
sctx->barrier_flags |= SI_BARRIER_INV_L2_METADATA;
} else if (sctx->gfx_level == GFX9) {
/* We have to invalidate L2 for MSAA and when DCC can have pipe_aligned=0. */
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
sctx->barrier_flags |= SI_BARRIER_INV_L2;
} else if (sctx->gfx_level <= GFX8) {
/* CB doesn't use L2 on GFX6-8. */
sctx->barrier_flags |= SI_CONTEXT_WB_L2;
sctx->barrier_flags |= SI_BARRIER_WB_L2;
}
}
@ -729,7 +729,7 @@ static void si_set_sampler_depth_decompress_mask(struct si_context *sctx, struct
void si_fb_barrier_before_rendering(struct si_context *sctx)
{
/* Wait for all shaders because all image loads must finish before CB/DB can write there. */
sctx->barrier_flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PS_PARTIAL_FLUSH;
sctx->barrier_flags |= SI_BARRIER_SYNC_CS | SI_BARRIER_SYNC_PS;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
@ -800,7 +800,7 @@ void si_fb_barrier_after_rendering(struct si_context *sctx, unsigned flags)
*
* This seems to fix them:
*/
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_INV_L2;
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_DB | SI_BARRIER_INV_L2;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
} else if (sctx->gfx_level == GFX9) {
@ -810,7 +810,7 @@ void si_fb_barrier_after_rendering(struct si_context *sctx, unsigned flags)
* - render with DEPTH_BEFORE_SHADER=1
* Flushing DB metadata works around the problem.
*/
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META;
sctx->barrier_flags |= SI_BARRIER_EVENT_FLUSH_AND_INV_DB_META;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
}

View file

@ -507,7 +507,7 @@ static void si_blit_decompress_color(struct si_context *sctx, struct si_texture
/* Required before and after FMASK and DCC_DECOMPRESS. */
if (custom_blend == sctx->custom_blend_fmask_decompress ||
custom_blend == sctx->custom_blend_dcc_decompress) {
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
@ -517,7 +517,7 @@ static void si_blit_decompress_color(struct si_context *sctx, struct si_texture
if (custom_blend == sctx->custom_blend_fmask_decompress ||
custom_blend == sctx->custom_blend_dcc_decompress) {
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
@ -1070,7 +1070,7 @@ static void si_do_CB_resolve(struct si_context *sctx, const struct pipe_blit_inf
enum pipe_format format)
{
/* Required before and after CB_RESOLVE. */
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
si_blitter_begin(

View file

@ -67,11 +67,11 @@ void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,
}
/* Invalidate the VMEM cache because we always use compute. */
sctx->barrier_flags |= SI_CONTEXT_INV_VCACHE;
sctx->barrier_flags |= SI_BARRIER_INV_VMEM;
/* GFX6-8: CB and DB don't use L2. */
if (sctx->gfx_level <= GFX8)
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
sctx->barrier_flags |= SI_BARRIER_INV_L2;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
@ -104,11 +104,11 @@ void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,
}
/* Wait for idle. */
sctx->barrier_flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
sctx->barrier_flags |= SI_BARRIER_SYNC_CS;
/* GFX6-8: CB and DB don't use L2. */
if (sctx->gfx_level <= GFX8)
sctx->barrier_flags |= SI_CONTEXT_WB_L2;
sctx->barrier_flags |= SI_BARRIER_WB_L2;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
@ -1209,7 +1209,7 @@ static void gfx6_clear(struct pipe_context *ctx, unsigned buffers,
if ((zstex->depth_clear_value[level] != 0) != (depth != 0)) {
/* ZRANGE_PRECISION register of a bound surface will change so we
* must flush the DB caches. */
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_DB;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
/* Update DB_DEPTH_CLEAR. */
@ -1245,7 +1245,7 @@ static void gfx6_clear(struct pipe_context *ctx, unsigned buffers,
* The root cause is unknown.
*/
if (sctx->gfx_level == GFX11 || sctx->gfx_level == GFX11_5) {
sctx->barrier_flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
sctx->barrier_flags |= SI_BARRIER_SYNC_VS;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
}

View file

@ -1175,7 +1175,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
info->block[0] * info->block[1] * info->block[2] > 256;
if (cs_regalloc_hang) {
sctx->barrier_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH;
sctx->barrier_flags |= SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
@ -1214,7 +1214,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
/* Indirect buffers are read through L2 on GFX9-GFX11, but not other hw. */
if ((sctx->gfx_level <= GFX8 || sctx->gfx_level == GFX12) &&
si_resource(info->indirect)->L2_cache_dirty) {
sctx->barrier_flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
sctx->barrier_flags |= SI_BARRIER_WB_L2 | SI_BARRIER_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
si_resource(info->indirect)->L2_cache_dirty = false;
}
@ -1309,7 +1309,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
trace_si_end_compute(&sctx->trace, info->grid[0], info->grid[1], info->grid[2]);
if (cs_regalloc_hang) {
sctx->barrier_flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
sctx->barrier_flags |= SI_BARRIER_SYNC_CS;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
}

View file

@ -14,9 +14,9 @@
static void si_compute_begin_internal(struct si_context *sctx, bool render_condition_enabled)
{
sctx->barrier_flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
sctx->barrier_flags &= ~SI_BARRIER_EVENT_PIPELINESTAT_START;
if (sctx->num_hw_pipestat_streamout_queries) {
sctx->barrier_flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
sctx->barrier_flags |= SI_BARRIER_EVENT_PIPELINESTAT_STOP;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
@ -32,9 +32,9 @@ static void si_compute_begin_internal(struct si_context *sctx, bool render_condi
static void si_compute_end_internal(struct si_context *sctx)
{
sctx->barrier_flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
sctx->barrier_flags &= ~SI_BARRIER_EVENT_PIPELINESTAT_STOP;
if (sctx->num_hw_pipestat_streamout_queries) {
sctx->barrier_flags |= SI_CONTEXT_START_PIPELINE_STATS;
sctx->barrier_flags |= SI_BARRIER_EVENT_PIPELINESTAT_START;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
@ -345,7 +345,7 @@ void si_retile_dcc(struct si_context *sctx, struct si_texture *tex)
assert(sctx->gfx_level < GFX12);
/* Flush and wait for CB before retiling DCC. */
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
/* Set the DCC buffer. */

View file

@ -151,7 +151,7 @@ void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs,
assert(size && size % 4 == 0);
if (!cp_dma_use_L2(sctx)) {
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
sctx->barrier_flags |= SI_BARRIER_INV_L2;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
@ -234,7 +234,7 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,
assert(dst && src);
if (!cp_dma_use_L2(sctx)) {
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
sctx->barrier_flags |= SI_BARRIER_INV_L2;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}

View file

@ -1897,7 +1897,7 @@ static void si_upload_bindless_descriptors(struct si_context *sctx)
/* Wait for graphics/compute to be idle before updating the resident
* descriptors directly in memory, in case the GPU is using them.
*/
sctx->barrier_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH;
sctx->barrier_flags |= SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS;
si_emit_barrier_direct(sctx);
util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {
@ -1921,11 +1921,11 @@ static void si_upload_bindless_descriptors(struct si_context *sctx)
}
/* Invalidate scalar L0 because the cache doesn't know that L2 changed. */
sctx->barrier_flags |= SI_CONTEXT_INV_SCACHE;
sctx->barrier_flags |= SI_BARRIER_INV_SMEM;
/* TODO: Range-invalidate GL2 */
if (sctx->screen->info.cp_sdma_ge_use_system_memory_scope)
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
sctx->barrier_flags |= SI_BARRIER_INV_L2;
sctx->bindless_descriptors_dirty = false;
}

View file

@ -78,7 +78,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h
struct radeon_cmdbuf *cs = &ctx->gfx_cs;
struct radeon_winsys *ws = ctx->ws;
struct si_screen *sscreen = ctx->screen;
const unsigned wait_ps_cs = SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH;
const unsigned wait_ps_cs = SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS;
unsigned wait_flags = 0;
if (ctx->gfx_flush_in_progress)
@ -146,7 +146,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h
* and make this process guilty of hanging.
*/
if (ctx->gfx_level >= GFX12)
wait_flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
wait_flags |= SI_BARRIER_SYNC_VS;
}
}
@ -462,16 +462,16 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
*
* TODO: Do we also need to invalidate CB & DB caches?
*/
ctx->barrier_flags |= SI_CONTEXT_INV_L2;
ctx->barrier_flags |= SI_BARRIER_INV_L2;
if (ctx->gfx_level < GFX10)
ctx->barrier_flags |= SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE;
ctx->barrier_flags |= SI_BARRIER_INV_ICACHE | SI_BARRIER_INV_SMEM | SI_BARRIER_INV_VMEM;
/* Disable pipeline stats if there are no active queries. */
ctx->barrier_flags &= ~SI_CONTEXT_START_PIPELINE_STATS & ~SI_CONTEXT_STOP_PIPELINE_STATS;
ctx->barrier_flags &= ~SI_BARRIER_EVENT_PIPELINESTAT_START & ~SI_BARRIER_EVENT_PIPELINESTAT_STOP;
if (ctx->num_hw_pipestat_streamout_queries)
ctx->barrier_flags |= SI_CONTEXT_START_PIPELINE_STATS;
ctx->barrier_flags |= SI_BARRIER_EVENT_PIPELINESTAT_START;
else
ctx->barrier_flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
ctx->barrier_flags |= SI_BARRIER_EVENT_PIPELINESTAT_STOP;
ctx->pipeline_stats_enabled = -1; /* indicate that the current hw state is unknown */
@ -479,7 +479,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
* When switching NGG->legacy, we need to flush VGT for certain hw generations.
*/
if (ctx->screen->info.has_vgt_flush_ngg_legacy_bug && !ctx->ngg)
ctx->barrier_flags |= SI_CONTEXT_VGT_FLUSH;
ctx->barrier_flags |= SI_BARRIER_EVENT_VGT_FLUSH;
si_mark_atom_dirty(ctx, &ctx->atoms.s.barrier);
si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_ge_ring_state);

View file

@ -47,41 +47,38 @@ struct ac_llvm_compiler;
/* Alignment for optimal CP DMA performance. */
#define SI_CPDMA_ALIGNMENT 32
/* Pipeline & streamout query controls. */
#define SI_CONTEXT_START_PIPELINE_STATS (1 << 0)
#define SI_CONTEXT_STOP_PIPELINE_STATS (1 << 1)
/* gap */
/* Pipeline & streamout query start/stop events. */
#define SI_BARRIER_EVENT_PIPELINESTAT_START BITFIELD_BIT(0)
#define SI_BARRIER_EVENT_PIPELINESTAT_STOP BITFIELD_BIT(1)
/* Events only used by workarounds. These shouldn't be used for API barriers. */
#define SI_BARRIER_EVENT_FLUSH_AND_INV_DB_META BITFIELD_BIT(2)
#define SI_BARRIER_EVENT_VGT_FLUSH BITFIELD_BIT(3)
/* PFP waits for ME to finish. Used to sync for index and indirect buffers and render condition. */
#define SI_BARRIER_PFP_SYNC_ME BITFIELD_BIT(4)
/* Instruction cache. */
#define SI_CONTEXT_INV_ICACHE (1 << 3)
/* Scalar cache. (GFX6-9: scalar L1; GFX10: scalar L0)
#define SI_BARRIER_INV_ICACHE BITFIELD_BIT(5)
/* Scalar cache. (GFX6-9: scalar L1; GFX10+: scalar L0)
* GFX10: This also invalidates the L1 shader array cache. */
#define SI_CONTEXT_INV_SCACHE (1 << 4)
/* Vector cache. (GFX6-9: vector L1; GFX10: vector L0)
#define SI_BARRIER_INV_SMEM BITFIELD_BIT(6)
/* Vector cache. (GFX6-9: vector L1; GFX10+: vector L0)
* GFX10: This also invalidates the L1 shader array cache. */
#define SI_CONTEXT_INV_VCACHE (1 << 5)
#define SI_BARRIER_INV_VMEM BITFIELD_BIT(7)
/* L2 cache + L2 metadata cache writeback & invalidate.
* GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
#define SI_CONTEXT_INV_L2 (1 << 6)
* GFX6-8: Used by shaders only. GFX9+: Used by everything. */
#define SI_BARRIER_INV_L2 BITFIELD_BIT(8)
/* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
* Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
* GFX6-7 will do complete invalidation, because the writeback is unsupported. */
#define SI_CONTEXT_WB_L2 (1 << 7)
/* Writeback & invalidate the L2 metadata cache only. It can only be coupled with
* a CB or DB flush. */
#define SI_CONTEXT_INV_L2_METADATA (1 << 8)
* GFX6-7 will do complete invalidation because the writeback is unsupported. */
#define SI_BARRIER_WB_L2 BITFIELD_BIT(9)
/* Writeback & invalidate the L2 metadata cache only. */
#define SI_BARRIER_INV_L2_METADATA BITFIELD_BIT(10)
/* These wait for shaders to finish. (SYNC_VS = wait for the whole geometry pipeline to finish) */
#define SI_BARRIER_SYNC_VS BITFIELD_BIT(11)
#define SI_BARRIER_SYNC_PS BITFIELD_BIT(12)
#define SI_BARRIER_SYNC_CS BITFIELD_BIT(13)
/* Framebuffer caches. */
#define SI_CONTEXT_FLUSH_AND_INV_DB (1 << 9)
#define SI_CONTEXT_FLUSH_AND_INV_DB_META (1 << 10)
#define SI_CONTEXT_FLUSH_AND_INV_CB (1 << 11)
/* Engine synchronization. */
#define SI_CONTEXT_VS_PARTIAL_FLUSH (1 << 12)
#define SI_CONTEXT_PS_PARTIAL_FLUSH (1 << 13)
#define SI_CONTEXT_CS_PARTIAL_FLUSH (1 << 14)
#define SI_CONTEXT_VGT_FLUSH (1 << 15)
/* gap */
/* PFP waits for ME to finish. Used to sync for index and indirect buffers and render
* condition. It's typically set when doing a VS/PS/CS partial flush for buffers. */
#define SI_CONTEXT_PFP_SYNC_ME (1 << 17)
#define SI_BARRIER_SYNC_AND_INV_DB BITFIELD_BIT(14)
#define SI_BARRIER_SYNC_AND_INV_CB BITFIELD_BIT(15)
#define SI_PREFETCH_LS (1 << 1)
#define SI_PREFETCH_HS (1 << 2)
@ -1881,26 +1878,26 @@ static inline void si_saved_cs_reference(struct si_saved_cs **dst, struct si_sav
static inline void si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples,
bool shaders_read_metadata, bool dcc_pipe_aligned)
{
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_INV_VCACHE;
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB | SI_BARRIER_INV_VMEM;
sctx->force_shader_coherency.with_cb = false;
if (sctx->gfx_level >= GFX10 && sctx->gfx_level < GFX12) {
if (sctx->screen->info.tcc_rb_non_coherent)
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
sctx->barrier_flags |= SI_BARRIER_INV_L2;
else if (shaders_read_metadata)
sctx->barrier_flags |= SI_CONTEXT_INV_L2_METADATA;
sctx->barrier_flags |= SI_BARRIER_INV_L2_METADATA;
} else if (sctx->gfx_level == GFX9) {
/* Single-sample color is coherent with shaders on GFX9, but
* L2 metadata must be flushed if shaders read metadata.
* (DCC, CMASK).
*/
if (num_samples >= 2 || (shaders_read_metadata && !dcc_pipe_aligned))
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
sctx->barrier_flags |= SI_BARRIER_INV_L2;
else if (shaders_read_metadata)
sctx->barrier_flags |= SI_CONTEXT_INV_L2_METADATA;
sctx->barrier_flags |= SI_BARRIER_INV_L2_METADATA;
} else if (sctx->gfx_level <= GFX8) {
/* GFX6-GFX8 */
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
sctx->barrier_flags |= SI_BARRIER_INV_L2;
}
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
@ -1909,26 +1906,26 @@ static inline void si_make_CB_shader_coherent(struct si_context *sctx, unsigned
static inline void si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples,
bool include_stencil, bool shaders_read_metadata)
{
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_INV_VCACHE;
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_DB | SI_BARRIER_INV_VMEM;
sctx->force_shader_coherency.with_db = false;
if (sctx->gfx_level >= GFX10 && sctx->gfx_level < GFX12) {
if (sctx->screen->info.tcc_rb_non_coherent)
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
sctx->barrier_flags |= SI_BARRIER_INV_L2;
else if (shaders_read_metadata)
sctx->barrier_flags |= SI_CONTEXT_INV_L2_METADATA;
sctx->barrier_flags |= SI_BARRIER_INV_L2_METADATA;
} else if (sctx->gfx_level == GFX9) {
/* Single-sample depth (not stencil) is coherent with shaders
* on GFX9, but L2 metadata must be flushed if shaders read
* metadata.
*/
if (num_samples >= 2 || include_stencil)
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
sctx->barrier_flags |= SI_BARRIER_INV_L2;
else if (shaders_read_metadata)
sctx->barrier_flags |= SI_CONTEXT_INV_L2_METADATA;
sctx->barrier_flags |= SI_BARRIER_INV_L2_METADATA;
} else if (sctx->gfx_level <= GFX8) {
/* GFX6-GFX8 */
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
sctx->barrier_flags |= SI_BARRIER_INV_L2;
}
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);

View file

@ -888,12 +888,12 @@ static void si_update_hw_pipeline_stats(struct si_context *sctx, unsigned type,
/* Enable/disable pipeline stats if we have any queries. */
if (diff == 1 && sctx->num_hw_pipestat_streamout_queries == 1) {
sctx->barrier_flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
sctx->barrier_flags |= SI_CONTEXT_START_PIPELINE_STATS;
sctx->barrier_flags &= ~SI_BARRIER_EVENT_PIPELINESTAT_STOP;
sctx->barrier_flags |= SI_BARRIER_EVENT_PIPELINESTAT_START;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
} else if (diff == -1 && sctx->num_hw_pipestat_streamout_queries == 0) {
sctx->barrier_flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
sctx->barrier_flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
sctx->barrier_flags &= ~SI_BARRIER_EVENT_PIPELINESTAT_START;
sctx->barrier_flags |= SI_BARRIER_EVENT_PIPELINESTAT_STOP;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
}
@ -1599,8 +1599,8 @@ static void si_query_hw_get_result_resource(struct si_context *sctx, struct si_q
break;
}
sctx->barrier_flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE |
(sctx->gfx_level <= GFX8 ? SI_CONTEXT_INV_L2 : 0);
sctx->barrier_flags |= SI_BARRIER_INV_SMEM | SI_BARRIER_INV_VMEM |
(sctx->gfx_level <= GFX8 ? SI_BARRIER_INV_L2 : 0);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
for (qbuf = &query->buffer; qbuf; qbuf = qbuf_prev) {
@ -1698,7 +1698,7 @@ static void si_render_condition(struct pipe_context *ctx, struct pipe_query *que
/* Settings this in the render cond atom is too late,
* so set it here. */
if (sctx->gfx_level <= GFX8) {
sctx->barrier_flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
sctx->barrier_flags |= SI_BARRIER_WB_L2 | SI_BARRIER_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}

View file

@ -988,7 +988,7 @@ static void post_upload_binary(struct si_screen *sscreen, struct si_shader *shad
si_cp_dma_copy_buffer(upload_ctx, &shader->bo->b.b, staging, 0, staging_offset,
binary_size);
si_barrier_after_simple_buffer_op(upload_ctx, 0, &shader->bo->b.b, staging);
upload_ctx->barrier_flags |= SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_L2;
upload_ctx->barrier_flags |= SI_BARRIER_INV_ICACHE | SI_BARRIER_INV_L2;
#if 0 /* debug: validate whether the copy was successful */
uint32_t *dst_binary = malloc(binary_size);

View file

@ -93,8 +93,8 @@ static void si_emit_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs,
if (sctx->screen->info.has_sqtt_rb_harvest_bug) {
/* Some chips with disabled RBs should wait for idle because FINISH_DONE
* doesn't work. */
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB |
SI_CONTEXT_CS_PARTIAL_FLUSH;
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB | SI_BARRIER_SYNC_AND_INV_DB |
SI_BARRIER_SYNC_CS;
sctx->emit_barrier(sctx, cs);
}
@ -140,10 +140,10 @@ static void si_sqtt_start(struct si_context *sctx, struct radeon_cmdbuf *cs)
si_cp_dma_wait_for_idle(sctx, cs);
/* Make sure to wait-for-idle before starting SQTT. */
sctx->barrier_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE |
SI_CONTEXT_INV_VCACHE | SI_CONTEXT_INV_L2 |
SI_CONTEXT_PFP_SYNC_ME;
sctx->barrier_flags |= SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS |
SI_BARRIER_INV_ICACHE | SI_BARRIER_INV_SMEM |
SI_BARRIER_INV_VMEM | SI_BARRIER_INV_L2 |
SI_BARRIER_PFP_SYNC_ME;
sctx->emit_barrier(sctx, cs);
si_inhibit_clockgating(sctx, cs, true);
@ -200,10 +200,10 @@ static void si_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs)
sctx->screen->info.never_send_perfcounter_stop);
/* Make sure to wait-for-idle before stopping SQTT. */
sctx->barrier_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE |
SI_CONTEXT_INV_VCACHE | SI_CONTEXT_INV_L2 |
SI_CONTEXT_PFP_SYNC_ME;
sctx->barrier_flags |= SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS |
SI_BARRIER_INV_ICACHE | SI_BARRIER_INV_SMEM |
SI_BARRIER_INV_VMEM | SI_BARRIER_INV_L2 |
SI_BARRIER_PFP_SYNC_ME;
sctx->emit_barrier(sctx, cs);
si_emit_sqtt_stop(sctx, cs, ip_type);
@ -620,30 +620,30 @@ void si_sqtt_describe_barrier_end(struct si_context *sctx, struct radeon_cmdbuf
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END;
marker.cb_id = 0;
if (flags & SI_CONTEXT_VS_PARTIAL_FLUSH)
if (flags & SI_BARRIER_SYNC_VS)
marker.vs_partial_flush = true;
if (flags & SI_CONTEXT_PS_PARTIAL_FLUSH)
if (flags & SI_BARRIER_SYNC_PS)
marker.ps_partial_flush = true;
if (flags & SI_CONTEXT_CS_PARTIAL_FLUSH)
if (flags & SI_BARRIER_SYNC_CS)
marker.cs_partial_flush = true;
if (flags & SI_CONTEXT_PFP_SYNC_ME)
if (flags & SI_BARRIER_PFP_SYNC_ME)
marker.pfp_sync_me = true;
if (flags & SI_CONTEXT_INV_VCACHE)
if (flags & SI_BARRIER_INV_VMEM)
marker.inval_tcp = true;
if (flags & SI_CONTEXT_INV_ICACHE)
if (flags & SI_BARRIER_INV_ICACHE)
marker.inval_sqI = true;
if (flags & SI_CONTEXT_INV_SCACHE)
if (flags & SI_BARRIER_INV_SMEM)
marker.inval_sqK = true;
if (flags & SI_CONTEXT_INV_L2)
if (flags & SI_BARRIER_INV_L2)
marker.inval_tcc = true;
if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
if (flags & SI_BARRIER_SYNC_AND_INV_CB) {
marker.inval_cb = true;
marker.flush_cb = true;
}
if (flags & SI_CONTEXT_FLUSH_AND_INV_DB) {
if (flags & SI_BARRIER_SYNC_AND_INV_DB) {
marker.inval_db = true;
marker.flush_db = true;
}

View file

@ -1756,14 +1756,14 @@ static void si_set_active_query_state(struct pipe_context *ctx, bool enable)
if (enable) {
/* Disable pipeline stats if there are no active queries. */
if (sctx->num_hw_pipestat_streamout_queries) {
sctx->barrier_flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
sctx->barrier_flags |= SI_CONTEXT_START_PIPELINE_STATS;
sctx->barrier_flags &= ~SI_BARRIER_EVENT_PIPELINESTAT_STOP;
sctx->barrier_flags |= SI_BARRIER_EVENT_PIPELINESTAT_START;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
} else {
if (sctx->num_hw_pipestat_streamout_queries) {
sctx->barrier_flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
sctx->barrier_flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
sctx->barrier_flags &= ~SI_BARRIER_EVENT_PIPELINESTAT_START;
sctx->barrier_flags |= SI_BARRIER_EVENT_PIPELINESTAT_STOP;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
}

View file

@ -907,7 +907,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
instance_count, 2, sctx->patch_vertices)) {
/* The cache flushes should have been emitted already. */
assert(sctx->barrier_flags == 0);
sctx->barrier_flags = SI_CONTEXT_VGT_FLUSH;
sctx->barrier_flags = SI_BARRIER_EVENT_VGT_FLUSH;
si_emit_barrier_direct(sctx);
}
}
@ -2121,7 +2121,7 @@ static void si_draw(struct pipe_context *ctx,
index_size = 2;
/* GFX6-7 don't read index buffers through L2. */
sctx->barrier_flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
sctx->barrier_flags |= SI_BARRIER_WB_L2 | SI_BARRIER_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
si_resource(indexbuf)->L2_cache_dirty = false;
} else if (!IS_DRAW_VERTEX_STATE && info->has_user_indices) {
@ -2144,7 +2144,7 @@ static void si_draw(struct pipe_context *ctx,
si_resource(indexbuf)->L2_cache_dirty) {
/* GFX8-GFX11 reads index buffers through L2, so it doesn't
* need this. */
sctx->barrier_flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
sctx->barrier_flags |= SI_BARRIER_WB_L2 | SI_BARRIER_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
si_resource(indexbuf)->L2_cache_dirty = false;
}
@ -2157,14 +2157,14 @@ static void si_draw(struct pipe_context *ctx,
/* Indirect buffers use L2 on GFX9-GFX11, but not other hw. */
if (GFX_VERSION <= GFX8 || GFX_VERSION == GFX12) {
if (indirect->buffer && si_resource(indirect->buffer)->L2_cache_dirty) {
sctx->barrier_flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
sctx->barrier_flags |= SI_BARRIER_WB_L2 | SI_BARRIER_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
si_resource(indirect->buffer)->L2_cache_dirty = false;
}
if (indirect->indirect_draw_count &&
si_resource(indirect->indirect_draw_count)->L2_cache_dirty) {
sctx->barrier_flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
sctx->barrier_flags |= SI_BARRIER_WB_L2 | SI_BARRIER_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
si_resource(indirect->indirect_draw_count)->L2_cache_dirty = false;
}

View file

@ -3769,7 +3769,7 @@ bool si_update_ngg(struct si_context *sctx)
* pointers are set.
*/
if (sctx->screen->info.has_vgt_flush_ngg_legacy_bug && !new_ngg) {
sctx->barrier_flags |= SI_CONTEXT_VGT_FLUSH;
sctx->barrier_flags |= SI_BARRIER_EVENT_VGT_FLUSH;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
if (sctx->gfx_level == GFX10) {

View file

@ -98,12 +98,12 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ
* VS_PARTIAL_FLUSH is required if the buffers are going to be
* used as an input immediately.
*/
sctx->barrier_flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE |
SI_CONTEXT_VS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME;
sctx->barrier_flags |= SI_BARRIER_INV_SMEM | SI_BARRIER_INV_VMEM |
SI_BARRIER_SYNC_VS | SI_BARRIER_PFP_SYNC_ME;
/* Make the streamout state buffer available to the CP for resuming and DrawTF. */
if (sctx->screen->info.cp_sdma_ge_use_system_memory_scope)
sctx->barrier_flags |= SI_CONTEXT_WB_L2;
sctx->barrier_flags |= SI_BARRIER_WB_L2;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
@ -227,8 +227,8 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ
/* All readers of the streamout targets need to be finished before we can
* start writing to them.
*/
sctx->barrier_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
SI_CONTEXT_PFP_SYNC_ME;
sctx->barrier_flags |= SI_BARRIER_SYNC_PS | SI_BARRIER_SYNC_CS |
SI_BARRIER_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
} else {
si_set_atom_dirty(sctx, &sctx->atoms.s.streamout_begin, false);
@ -371,7 +371,7 @@ void si_emit_streamout_end(struct si_context *sctx)
if (sctx->gfx_level >= GFX11) {
/* Wait for streamout to finish before reading GDS_STRMOUT registers. */
sctx->barrier_flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
sctx->barrier_flags |= SI_BARRIER_SYNC_VS;
si_emit_barrier_direct(sctx);
} else {
si_flush_vgt_streamout(sctx);
@ -387,7 +387,7 @@ void si_emit_streamout_end(struct si_context *sctx)
COPY_DATA_REG, NULL,
(R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
/* For DrawTF reading buf_filled_size: */
sctx->barrier_flags |= SI_CONTEXT_PFP_SYNC_ME;
sctx->barrier_flags |= SI_BARRIER_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
} else {
uint64_t va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset;

View file

@ -537,7 +537,7 @@ void si_test_blit_perf(struct si_screen *sscreen)
case METHOD_DEFAULT:
if (test_flavor == TEST_FB_CLEAR) {
ctx->clear(ctx, PIPE_CLEAR_COLOR, NULL, clear_color, 0, 0);
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_INV_L2;
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB | SI_BARRIER_INV_L2;
} else {
ctx->clear_render_target(ctx, dst_surf, clear_color,
dst_box.x, dst_box.y,
@ -640,10 +640,10 @@ void si_test_blit_perf(struct si_screen *sscreen)
}
/* Wait for idle after all tests. */
sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
SI_CONTEXT_CS_PARTIAL_FLUSH |
SI_CONTEXT_INV_L2 | SI_CONTEXT_INV_SCACHE |
SI_CONTEXT_INV_VCACHE;
sctx->barrier_flags |= SI_BARRIER_SYNC_AND_INV_CB |
SI_BARRIER_SYNC_CS |
SI_BARRIER_INV_L2 | SI_BARRIER_INV_SMEM |
SI_BARRIER_INV_VMEM;
si_emit_barrier_direct(sctx);
ctx->end_query(ctx, q);

View file

@ -257,7 +257,7 @@ void si_test_dma_perf(struct si_screen *sscreen)
si_barrier_after_simple_buffer_op(sctx, 0, dst, src);
}
sctx->barrier_flags |= SI_CONTEXT_INV_L2;
sctx->barrier_flags |= SI_BARRIER_INV_L2;
}
ctx->end_query(ctx, q);