radeonsi: rename "cache_flush" -> "barrier"

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31193>
This commit is contained in:
Marek Olšák 2024-08-23 06:08:46 -04:00 committed by Marge Bot
parent 214b4a119d
commit dac99e75af
18 changed files with 71 additions and 71 deletions

View file

@ -349,7 +349,7 @@ static void gfx11_sh_query_get_result_resource(struct si_context *sctx, struct s
/* TODO: Range-invalidate GL2 */
if (sctx->screen->info.cp_sdma_ge_use_system_memory_scope) {
sctx->flags |= SI_CONTEXT_INV_L2;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
struct gfx11_sh_query_buffer *qbuf = query->first;

View file

@ -508,7 +508,7 @@ static void si_blit_decompress_color(struct si_context *sctx, struct si_texture
if (custom_blend == sctx->custom_blend_fmask_decompress ||
custom_blend == sctx->custom_blend_dcc_decompress) {
sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
si_blitter_begin(sctx, SI_DECOMPRESS);
@ -518,7 +518,7 @@ static void si_blit_decompress_color(struct si_context *sctx, struct si_texture
if (custom_blend == sctx->custom_blend_fmask_decompress ||
custom_blend == sctx->custom_blend_dcc_decompress) {
sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
/* When running FMASK decompression with DCC, we need to run the "eliminate fast clear" pass
@ -1071,7 +1071,7 @@ static void si_do_CB_resolve(struct si_context *sctx, const struct pipe_blit_inf
{
/* Required before and after CB_RESOLVE. */
sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
si_blitter_begin(
sctx, SI_COLOR_RESOLVE | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));

View file

@ -73,7 +73,7 @@ void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,
if (sctx->gfx_level <= GFX8)
sctx->flags |= SI_CONTEXT_INV_L2;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
/* Execute clears. */
for (unsigned i = 0; i < num_clears; i++) {
@ -110,7 +110,7 @@ void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,
if (sctx->gfx_level <= GFX8)
sctx->flags |= SI_CONTEXT_WB_L2;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
static bool si_alloc_separate_cmask(struct si_screen *sscreen, struct si_texture *tex)
@ -1210,7 +1210,7 @@ static void gfx6_clear(struct pipe_context *ctx, unsigned buffers,
/* ZRANGE_PRECISION register of a bound surface will change so we
* must flush the DB caches. */
sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
/* Update DB_DEPTH_CLEAR. */
zstex->depth_clear_value[level] = depth;
@ -1246,7 +1246,7 @@ static void gfx6_clear(struct pipe_context *ctx, unsigned buffers,
*/
if (sctx->gfx_level == GFX11 || sctx->gfx_level == GFX11_5) {
sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
}

View file

@ -1176,7 +1176,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
if (cs_regalloc_hang) {
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
if (program->ir_type != PIPE_SHADER_IR_NATIVE && program->shader.compilation_failed)
@ -1216,7 +1216,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
if ((sctx->gfx_level <= GFX8 || sctx->gfx_level == GFX12) &&
si_resource(info->indirect)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
si_resource(info->indirect)->TC_L2_dirty = false;
}
}
@ -1269,7 +1269,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
/* Registers that are not read from memory should be set before this: */
if (sctx->flags)
si_emit_cache_flush_direct(sctx);
si_emit_barrier_direct(sctx);
if (sctx->has_graphics && si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
sctx->atoms.s.render_cond.emit(sctx, -1);
@ -1312,7 +1312,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
if (cs_regalloc_hang) {
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
}

View file

@ -82,7 +82,7 @@ void si_barrier_before_internal_op(struct si_context *sctx, unsigned flags,
/* Invalidate the VMEM cache only. The SMEM cache isn't used by shader buffers. */
sctx->flags |= SI_CONTEXT_INV_VCACHE;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
void si_barrier_after_internal_op(struct si_context *sctx, unsigned flags,
@ -129,7 +129,7 @@ void si_barrier_after_internal_op(struct si_context *sctx, unsigned flags,
}
}
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
static void si_set_dst_src_barrier_buffers(struct pipe_shader_buffer *buffers,
@ -165,7 +165,7 @@ static void si_compute_begin_internal(struct si_context *sctx, bool render_condi
sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
if (sctx->num_hw_pipestat_streamout_queries) {
sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
if (!render_condition_enabled)
@ -183,7 +183,7 @@ static void si_compute_end_internal(struct si_context *sctx)
sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
if (sctx->num_hw_pipestat_streamout_queries) {
sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
sctx->render_cond_enabled = sctx->render_cond;
@ -494,7 +494,7 @@ void si_retile_dcc(struct si_context *sctx, struct si_texture *tex)
/* Flush and wait for CB before retiling DCC. */
sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
/* Set the DCC buffer. */
assert(tex->surface.meta_offset && tex->surface.meta_offset <= UINT_MAX);

View file

@ -125,7 +125,7 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst
* Also wait for the previous CP DMA operations.
*/
if (*is_first && sctx->flags)
si_emit_cache_flush_direct(sctx);
si_emit_barrier_direct(sctx);
if (*is_first && !(*packet_flags & CP_DMA_CLEAR))
*packet_flags |= CP_DMA_RAW_WAIT;
@ -152,7 +152,7 @@ void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs,
if (!cp_dma_use_L2(sctx)) {
sctx->flags |= SI_CONTEXT_INV_L2;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
/* Mark the buffer range of destination as valid (initialized),
@ -235,7 +235,7 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,
if (!cp_dma_use_L2(sctx)) {
sctx->flags |= SI_CONTEXT_INV_L2;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
/* Mark the buffer range of destination as valid (initialized),

View file

@ -1641,7 +1641,7 @@ static void si_mark_bindless_descriptors_dirty(struct si_context *sctx)
/* gfx_shader_pointers uploads bindless descriptors. */
si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
/* gfx_shader_pointers can flag cache flags, so we need to dirty this too. */
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
/* Update all buffer bindings where the buffer is bound, including
@ -1898,7 +1898,7 @@ static void si_upload_bindless_descriptors(struct si_context *sctx)
* descriptors directly in memory, in case the GPU is using them.
*/
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH;
si_emit_cache_flush_direct(sctx);
si_emit_barrier_direct(sctx);
util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {
unsigned desc_slot = (*tex_handle)->desc_slot;

View file

@ -167,7 +167,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h
/* Wait for draw calls to finish if needed. */
if (wait_flags) {
ctx->flags |= wait_flags;
si_emit_cache_flush_direct(ctx);
si_emit_barrier_direct(ctx);
}
ctx->gfx_last_ib_is_busy = (wait_flags & wait_ps_cs) != wait_ps_cs;
@ -481,7 +481,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
if (ctx->screen->info.has_vgt_flush_ngg_legacy_bug && !ctx->ngg)
ctx->flags |= SI_CONTEXT_VGT_FLUSH;
si_mark_atom_dirty(ctx, &ctx->atoms.s.cache_flush);
si_mark_atom_dirty(ctx, &ctx->atoms.s.barrier);
si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_ge_ring_state);
if (ctx->screen->attribute_pos_prim_ring) {
@ -735,7 +735,7 @@ static void prepare_cb_db_flushes(struct si_context *ctx, unsigned *flags)
}
}
void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
{
uint32_t gcr_cntl = 0;
unsigned cb_db_event = 0;
@ -922,7 +922,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
ctx->flags = 0;
}
void gfx6_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs)
void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
{
uint32_t flags = sctx->flags;

View file

@ -635,9 +635,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign
/* Initialize context functions used by graphics and compute. */
if (sctx->gfx_level >= GFX10)
sctx->emit_cache_flush = gfx10_emit_cache_flush;
sctx->emit_barrier = gfx10_emit_barrier;
else
sctx->emit_cache_flush = gfx6_emit_cache_flush;
sctx->emit_barrier = gfx6_emit_barrier;
sctx->b.emit_string_marker = si_emit_string_marker;
sctx->b.set_debug_callback = si_set_debug_callback;

View file

@ -955,7 +955,7 @@ struct si_context {
struct si_resource *csa;
} shadowing;
void (*emit_cache_flush)(struct si_context *ctx, struct radeon_cmdbuf *cs);
void (*emit_barrier)(struct si_context *ctx, struct radeon_cmdbuf *cs);
struct blitter_context *blitter;
void *noop_blend;
@ -1593,8 +1593,8 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx);
void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs);
void si_trace_emit(struct si_context *sctx);
void si_emit_ts(struct si_context *sctx, struct si_resource* buffer, unsigned int offset);
void gfx10_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs);
void gfx6_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs);
void gfx10_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs);
void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs);
/* Replace the sctx->b.draw_vbo function with a wrapper. This can be use to implement
* optimizations without affecting the normal draw_vbo functions perf.
*/
@ -1896,7 +1896,7 @@ static inline void si_make_CB_shader_coherent(struct si_context *sctx, unsigned
sctx->flags |= SI_CONTEXT_INV_L2;
}
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
static inline void si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples,
@ -1924,7 +1924,7 @@ static inline void si_make_DB_shader_coherent(struct si_context *sctx, unsigned
sctx->flags |= SI_CONTEXT_INV_L2;
}
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
static inline bool si_can_sample_zs(struct si_texture *tex, bool stencil_sampler)
@ -2199,18 +2199,18 @@ si_set_rasterized_prim(struct si_context *sctx, enum mesa_prim rast_prim,
/* There are 3 ways to flush caches and all of them are correct.
*
* 1) sctx->flags |= ...;
* si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); // deferred
* si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); // deferred
*
* 2) sctx->flags |= ...;
* si_emit_cache_flush_direct(sctx); // immediate
* si_emit_barrier_direct(sctx); // immediate
*
* 3) sctx->flags |= ...;
* sctx->emit_cache_flush(sctx, cs); // immediate (2 is better though)
* sctx->emit_barrier(sctx, cs); // immediate (2 is better though)
*/
static inline void si_emit_cache_flush_direct(struct si_context *sctx)
static inline void si_emit_barrier_direct(struct si_context *sctx)
{
sctx->emit_cache_flush(sctx, &sctx->gfx_cs);
sctx->dirty_atoms &= ~SI_ATOM_BIT(cache_flush);
sctx->emit_barrier(sctx, &sctx->gfx_cs);
sctx->dirty_atoms &= ~SI_ATOM_BIT(barrier);
}
#define PRINT_ERR(fmt, args...) \

View file

@ -890,11 +890,11 @@ static void si_update_hw_pipeline_stats(struct si_context *sctx, unsigned type,
if (diff == 1 && sctx->num_hw_pipestat_streamout_queries == 1) {
sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
} else if (diff == -1 && sctx->num_hw_pipestat_streamout_queries == 0) {
sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
}
}
@ -1601,7 +1601,7 @@ static void si_query_hw_get_result_resource(struct si_context *sctx, struct si_q
sctx->flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE |
(sctx->gfx_level <= GFX8 ? SI_CONTEXT_INV_L2 : 0);
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
for (qbuf = &query->buffer; qbuf; qbuf = qbuf_prev) {
if (query->b.type != PIPE_QUERY_TIMESTAMP) {
@ -1699,7 +1699,7 @@ static void si_render_condition(struct pipe_context *ctx, struct pipe_query *que
* so set it here. */
if (sctx->gfx_level <= GFX8) {
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
sctx->render_cond_enabled = old_render_cond_enabled;

View file

@ -95,7 +95,7 @@ static void si_emit_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs,
* doesn't work. */
sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB |
SI_CONTEXT_CS_PARTIAL_FLUSH;
sctx->emit_cache_flush(sctx, cs);
sctx->emit_barrier(sctx, cs);
}
ac_sqtt_emit_wait(&sscreen->info, pm4, sctx->sqtt, is_compute_queue);
@ -144,7 +144,7 @@ static void si_sqtt_start(struct si_context *sctx, struct radeon_cmdbuf *cs)
SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE |
SI_CONTEXT_INV_VCACHE | SI_CONTEXT_INV_L2 |
SI_CONTEXT_PFP_SYNC_ME;
sctx->emit_cache_flush(sctx, cs);
sctx->emit_barrier(sctx, cs);
si_inhibit_clockgating(sctx, cs, true);
@ -204,7 +204,7 @@ static void si_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs)
SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE |
SI_CONTEXT_INV_VCACHE | SI_CONTEXT_INV_L2 |
SI_CONTEXT_PFP_SYNC_ME;
sctx->emit_cache_flush(sctx, cs);
sctx->emit_barrier(sctx, cs);
si_emit_sqtt_stop(sctx, cs, ip_type);

View file

@ -1758,13 +1758,13 @@ static void si_set_active_query_state(struct pipe_context *ctx, bool enable)
if (sctx->num_hw_pipestat_streamout_queries) {
sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
} else {
if (sctx->num_hw_pipestat_streamout_queries) {
sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
}
@ -2633,7 +2633,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
* Wait for PS because: texture -> render (eg: glBlitFramebuffer(with src=dst) then glDraw*)
*/
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PS_PARTIAL_FLUSH;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
/* DB caches are flushed on demand (using si_decompress_textures) except the cases below. */
if (sctx->gfx_level >= GFX12) {
@ -2661,7 +2661,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
* This seems to fix them:
*/
sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_INV_L2;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
} else if (sctx->gfx_level == GFX9) {
/* It appears that DB metadata "leaks" in a sequence of:
@ -2671,7 +2671,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
* Flushing DB metadata works around the problem.
*/
sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
/* Take the maximum of the old and new count. If the new count is lower,
@ -4990,7 +4990,7 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
flags & (PIPE_BARRIER_INDEX_BUFFER | PIPE_BARRIER_INDIRECT_BUFFER))
sctx->flags |= SI_CONTEXT_WB_L2;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
@ -5003,9 +5003,9 @@ static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
return si_create_blend_state_mode(&sctx->b, &blend, mode);
}
static void si_emit_cache_flush_state(struct si_context *sctx, unsigned index)
static void si_emit_barrier_as_atom(struct si_context *sctx, unsigned index)
{
sctx->emit_cache_flush(sctx, &sctx->gfx_cs);
sctx->emit_barrier(sctx, &sctx->gfx_cs);
}
static void si_pm4_emit_sqtt_pipeline(struct si_context *sctx, unsigned index)
@ -5056,7 +5056,7 @@ void si_init_state_functions(struct si_context *sctx)
sctx->atoms.s.clip_regs.emit = si_emit_clip_regs;
sctx->atoms.s.clip_state.emit = si_emit_clip_state;
sctx->atoms.s.stencil_ref.emit = si_emit_stencil_ref;
sctx->atoms.s.cache_flush.emit = si_emit_cache_flush_state;
sctx->atoms.s.barrier.emit = si_emit_barrier_as_atom;
sctx->b.create_blend_state = si_create_blend_state;
sctx->b.bind_blend_state = si_bind_blend_state;

View file

@ -236,9 +236,9 @@ union si_state_atoms {
struct si_atom ngg_cull_state;
struct si_atom vgt_pipeline_state;
struct si_atom tess_io_layout;
struct si_atom cache_flush;
struct si_atom streamout_begin; /* this must be done after cache_flush */
struct si_atom render_cond; /* this must be after cache_flush */
struct si_atom barrier;
struct si_atom streamout_begin; /* this must be done after barrier */
struct si_atom render_cond; /* this must be after barrier */
struct si_atom spi_ge_ring_state; /* this must be last because it waits for idle. */
} s;
struct si_atom array[sizeof(struct si_atoms_s) / sizeof(struct si_atom)];

View file

@ -908,7 +908,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
/* The cache flushes should have been emitted already. */
assert(sctx->flags == 0);
sctx->flags = SI_CONTEXT_VGT_FLUSH;
si_emit_cache_flush_direct(sctx);
si_emit_barrier_direct(sctx);
}
}
@ -2122,7 +2122,7 @@ static void si_draw(struct pipe_context *ctx,
/* GFX6-7 don't read index buffers through TC L2. */
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
si_resource(indexbuf)->TC_L2_dirty = false;
} else if (!IS_DRAW_VERTEX_STATE && info->has_user_indices) {
unsigned start_offset;
@ -2145,7 +2145,7 @@ static void si_draw(struct pipe_context *ctx,
/* GFX8-GFX11 reads index buffers through L2, so it doesn't
* need this. */
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
si_resource(indexbuf)->TC_L2_dirty = false;
}
}
@ -2158,14 +2158,14 @@ static void si_draw(struct pipe_context *ctx,
if (GFX_VERSION <= GFX8 || GFX_VERSION == GFX12) {
if (indirect->buffer && si_resource(indirect->buffer)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
si_resource(indirect->buffer)->TC_L2_dirty = false;
}
if (indirect->indirect_draw_count &&
si_resource(indirect->indirect_draw_count)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
si_resource(indirect->indirect_draw_count)->TC_L2_dirty = false;
}
}
@ -2307,7 +2307,7 @@ static void si_draw(struct pipe_context *ctx,
(sctx, indirect, prim, index_size, instance_count, primitive_restart,
info->restart_index, min_direct_count);
/* <-- CUs are idle here if the cache_flush state waited. */
/* <-- CUs are idle here if the barrier atom waited. */
/* This must be done after si_emit_all_states, which can affect this. */
si_emit_vs_state<GFX_VERSION, HAS_TESS, HAS_GS, NGG, IS_DRAW_VERTEX_STATE, HAS_SH_PAIRS_PACKED>

View file

@ -3770,7 +3770,7 @@ bool si_update_ngg(struct si_context *sctx)
*/
if (sctx->screen->info.has_vgt_flush_ngg_legacy_bug && !new_ngg) {
sctx->flags |= SI_CONTEXT_VGT_FLUSH;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
if (sctx->gfx_level == GFX10) {
/* Workaround for https://gitlab.freedesktop.org/mesa/mesa/-/issues/2941 */

View file

@ -105,7 +105,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ
if (sctx->screen->info.cp_sdma_ge_use_system_memory_scope)
sctx->flags |= SI_CONTEXT_WB_L2;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
}
/* TODO: This is a hack that fixes these failures. It shouldn't be necessary.
@ -229,7 +229,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ
*/
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
} else {
si_set_atom_dirty(sctx, &sctx->atoms.s.streamout_begin, false);
si_set_streamout_enable(sctx, false);
@ -372,7 +372,7 @@ void si_emit_streamout_end(struct si_context *sctx)
if (sctx->gfx_level >= GFX11) {
/* Wait for streamout to finish before reading GDS_STRMOUT registers. */
sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
si_emit_cache_flush_direct(sctx);
si_emit_barrier_direct(sctx);
} else {
si_flush_vgt_streamout(sctx);
}
@ -388,7 +388,7 @@ void si_emit_streamout_end(struct si_context *sctx)
(R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
/* For DrawTF reading buf_filled_size: */
sctx->flags |= SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
} else {
uint64_t va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset;

View file

@ -516,7 +516,7 @@ void si_test_blit_perf(struct si_screen *sscreen)
fb.nr_cbufs = 1;
fb.cbufs[0] = dst_surf;
ctx->set_framebuffer_state(ctx, &fb);
si_emit_cache_flush_direct(sctx);
si_emit_barrier_direct(sctx);
}
}
@ -644,7 +644,7 @@ void si_test_blit_perf(struct si_screen *sscreen)
SI_CONTEXT_CS_PARTIAL_FLUSH |
SI_CONTEXT_INV_L2 | SI_CONTEXT_INV_SCACHE |
SI_CONTEXT_INV_VCACHE;
si_emit_cache_flush_direct(sctx);
si_emit_barrier_direct(sctx);
ctx->end_query(ctx, q);
pipe_surface_reference(&dst_surf, NULL);