diff --git a/src/gallium/drivers/radeonsi/gfx11_query.c b/src/gallium/drivers/radeonsi/gfx11_query.c index 569f1f309de..4e01b06e307 100644 --- a/src/gallium/drivers/radeonsi/gfx11_query.c +++ b/src/gallium/drivers/radeonsi/gfx11_query.c @@ -349,7 +349,7 @@ static void gfx11_sh_query_get_result_resource(struct si_context *sctx, struct s /* TODO: Range-invalidate GL2 */ if (sctx->screen->info.cp_sdma_ge_use_system_memory_scope) { sctx->flags |= SI_CONTEXT_INV_L2; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } struct gfx11_sh_query_buffer *qbuf = query->first; diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 5c46dac16ce..d3c6919d200 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -508,7 +508,7 @@ static void si_blit_decompress_color(struct si_context *sctx, struct si_texture if (custom_blend == sctx->custom_blend_fmask_decompress || custom_blend == sctx->custom_blend_dcc_decompress) { sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } si_blitter_begin(sctx, SI_DECOMPRESS); @@ -518,7 +518,7 @@ static void si_blit_decompress_color(struct si_context *sctx, struct si_texture if (custom_blend == sctx->custom_blend_fmask_decompress || custom_blend == sctx->custom_blend_dcc_decompress) { sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } /* When running FMASK decompression with DCC, we need to run the "eliminate fast clear" pass @@ -1071,7 +1071,7 @@ static void si_do_CB_resolve(struct si_context *sctx, const struct pipe_blit_inf { /* Required before and after CB_RESOLVE. */ sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); si_blitter_begin( sctx, SI_COLOR_RESOLVE | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index e4e55017824..7caccbcc85a 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -73,7 +73,7 @@ void si_execute_clears(struct si_context *sctx, struct si_clear_info *info, if (sctx->gfx_level <= GFX8) sctx->flags |= SI_CONTEXT_INV_L2; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); /* Execute clears. */ for (unsigned i = 0; i < num_clears; i++) { @@ -110,7 +110,7 @@ void si_execute_clears(struct si_context *sctx, struct si_clear_info *info, if (sctx->gfx_level <= GFX8) sctx->flags |= SI_CONTEXT_WB_L2; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } static bool si_alloc_separate_cmask(struct si_screen *sscreen, struct si_texture *tex) @@ -1210,7 +1210,7 @@ static void gfx6_clear(struct pipe_context *ctx, unsigned buffers, /* ZRANGE_PRECISION register of a bound surface will change so we * must flush the DB caches. */ sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } /* Update DB_DEPTH_CLEAR. */ zstex->depth_clear_value[level] = depth; @@ -1246,7 +1246,7 @@ static void gfx6_clear(struct pipe_context *ctx, unsigned buffers, */ if (sctx->gfx_level == GFX11 || sctx->gfx_level == GFX11_5) { sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } } diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 817dd1c3569..e148f6d803a 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -1176,7 +1176,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info if (cs_regalloc_hang) { sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } if (program->ir_type != PIPE_SHADER_IR_NATIVE && program->shader.compilation_failed) @@ -1216,7 +1216,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info if ((sctx->gfx_level <= GFX8 || sctx->gfx_level == GFX12) && si_resource(info->indirect)->TC_L2_dirty) { sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); si_resource(info->indirect)->TC_L2_dirty = false; } } @@ -1269,7 +1269,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info /* Registers that are not read from memory should be set before this: */ if (sctx->flags) - si_emit_cache_flush_direct(sctx); + si_emit_barrier_direct(sctx); if (sctx->has_graphics && si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) { sctx->atoms.s.render_cond.emit(sctx, -1); @@ -1312,7 +1312,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info if (cs_regalloc_hang) { sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } } diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index 36fc36c314a..928b568d23e 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -82,7 +82,7 @@ void si_barrier_before_internal_op(struct si_context *sctx, unsigned flags, /* Invalidate the VMEM cache only. The SMEM cache isn't used by shader buffers. */ sctx->flags |= SI_CONTEXT_INV_VCACHE; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } void si_barrier_after_internal_op(struct si_context *sctx, unsigned flags, @@ -129,7 +129,7 @@ void si_barrier_after_internal_op(struct si_context *sctx, unsigned flags, } } - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } static void si_set_dst_src_barrier_buffers(struct pipe_shader_buffer *buffers, @@ -165,7 +165,7 @@ static void si_compute_begin_internal(struct si_context *sctx, bool render_condi sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS; if (sctx->num_hw_pipestat_streamout_queries) { sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } if (!render_condition_enabled) @@ -183,7 +183,7 @@ static void si_compute_end_internal(struct si_context *sctx) sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS; if (sctx->num_hw_pipestat_streamout_queries) { sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } sctx->render_cond_enabled = sctx->render_cond; @@ -494,7 +494,7 @@ void si_retile_dcc(struct si_context *sctx, struct si_texture *tex) /* Flush and wait for CB before retiling DCC. */ sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); /* Set the DCC buffer. */ assert(tex->surface.meta_offset && tex->surface.meta_offset <= UINT_MAX); diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 2ab9f1a61aa..6172227cb40 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -125,7 +125,7 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst * Also wait for the previous CP DMA operations. */ if (*is_first && sctx->flags) - si_emit_cache_flush_direct(sctx); + si_emit_barrier_direct(sctx); if (*is_first && !(*packet_flags & CP_DMA_CLEAR)) *packet_flags |= CP_DMA_RAW_WAIT; @@ -152,7 +152,7 @@ void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs, if (!cp_dma_use_L2(sctx)) { sctx->flags |= SI_CONTEXT_INV_L2; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } /* Mark the buffer range of destination as valid (initialized), @@ -235,7 +235,7 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, if (!cp_dma_use_L2(sctx)) { sctx->flags |= SI_CONTEXT_INV_L2; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } /* Mark the buffer range of destination as valid (initialized), diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index acbf90f40c9..676e3f159ae 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1641,7 +1641,7 @@ static void si_mark_bindless_descriptors_dirty(struct si_context *sctx) /* gfx_shader_pointers uploads bindless descriptors. */ si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers); /* gfx_shader_pointers can flag cache flags, so we need to dirty this too. */ - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } /* Update all buffer bindings where the buffer is bound, including @@ -1898,7 +1898,7 @@ static void si_upload_bindless_descriptors(struct si_context *sctx) * descriptors directly in memory, in case the GPU is using them. */ sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH; - si_emit_cache_flush_direct(sctx); + si_emit_barrier_direct(sctx); util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) { unsigned desc_slot = (*tex_handle)->desc_slot; diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index b81a96acdc4..498a2901e48 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -167,7 +167,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h /* Wait for draw calls to finish if needed. */ if (wait_flags) { ctx->flags |= wait_flags; - si_emit_cache_flush_direct(ctx); + si_emit_barrier_direct(ctx); } ctx->gfx_last_ib_is_busy = (wait_flags & wait_ps_cs) != wait_ps_cs; @@ -481,7 +481,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) if (ctx->screen->info.has_vgt_flush_ngg_legacy_bug && !ctx->ngg) ctx->flags |= SI_CONTEXT_VGT_FLUSH; - si_mark_atom_dirty(ctx, &ctx->atoms.s.cache_flush); + si_mark_atom_dirty(ctx, &ctx->atoms.s.barrier); si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_ge_ring_state); if (ctx->screen->attribute_pos_prim_ring) { @@ -735,7 +735,7 @@ static void prepare_cb_db_flushes(struct si_context *ctx, unsigned *flags) } } -void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs) +void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs) { uint32_t gcr_cntl = 0; unsigned cb_db_event = 0; @@ -922,7 +922,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs) ctx->flags = 0; } -void gfx6_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs) +void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs) { uint32_t flags = sctx->flags; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 22502697d76..35139efea6e 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -635,9 +635,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign /* Initialize context functions used by graphics and compute. */ if (sctx->gfx_level >= GFX10) - sctx->emit_cache_flush = gfx10_emit_cache_flush; + sctx->emit_barrier = gfx10_emit_barrier; else - sctx->emit_cache_flush = gfx6_emit_cache_flush; + sctx->emit_barrier = gfx6_emit_barrier; sctx->b.emit_string_marker = si_emit_string_marker; sctx->b.set_debug_callback = si_set_debug_callback; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index e2b32cf6517..91260d8d711 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -955,7 +955,7 @@ struct si_context { struct si_resource *csa; } shadowing; - void (*emit_cache_flush)(struct si_context *ctx, struct radeon_cmdbuf *cs); + void (*emit_barrier)(struct si_context *ctx, struct radeon_cmdbuf *cs); struct blitter_context *blitter; void *noop_blend; @@ -1593,8 +1593,8 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx); void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs); void si_trace_emit(struct si_context *sctx); void si_emit_ts(struct si_context *sctx, struct si_resource* buffer, unsigned int offset); -void gfx10_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs); -void gfx6_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs); +void gfx10_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs); +void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs); /* Replace the sctx->b.draw_vbo function with a wrapper. This can be use to implement * optimizations without affecting the normal draw_vbo functions perf. */ @@ -1896,7 +1896,7 @@ static inline void si_make_CB_shader_coherent(struct si_context *sctx, unsigned sctx->flags |= SI_CONTEXT_INV_L2; } - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } static inline void si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples, @@ -1924,7 +1924,7 @@ static inline void si_make_DB_shader_coherent(struct si_context *sctx, unsigned sctx->flags |= SI_CONTEXT_INV_L2; } - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } static inline bool si_can_sample_zs(struct si_texture *tex, bool stencil_sampler) @@ -2199,18 +2199,18 @@ si_set_rasterized_prim(struct si_context *sctx, enum mesa_prim rast_prim, /* There are 3 ways to flush caches and all of them are correct. * * 1) sctx->flags |= ...; - * si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); // deferred + * si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); // deferred * * 2) sctx->flags |= ...; - * si_emit_cache_flush_direct(sctx); // immediate + * si_emit_barrier_direct(sctx); // immediate * * 3) sctx->flags |= ...; - * sctx->emit_cache_flush(sctx, cs); // immediate (2 is better though) + * sctx->emit_barrier(sctx, cs); // immediate (2 is better though) */ -static inline void si_emit_cache_flush_direct(struct si_context *sctx) +static inline void si_emit_barrier_direct(struct si_context *sctx) { - sctx->emit_cache_flush(sctx, &sctx->gfx_cs); - sctx->dirty_atoms &= ~SI_ATOM_BIT(cache_flush); + sctx->emit_barrier(sctx, &sctx->gfx_cs); + sctx->dirty_atoms &= ~SI_ATOM_BIT(barrier); } #define PRINT_ERR(fmt, args...) \ diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index 10b4097d592..1932bda7a60 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -890,11 +890,11 @@ static void si_update_hw_pipeline_stats(struct si_context *sctx, unsigned type, if (diff == 1 && sctx->num_hw_pipestat_streamout_queries == 1) { sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS; sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } else if (diff == -1 && sctx->num_hw_pipestat_streamout_queries == 0) { sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS; sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } } } @@ -1601,7 +1601,7 @@ static void si_query_hw_get_result_resource(struct si_context *sctx, struct si_q sctx->flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE | (sctx->gfx_level <= GFX8 ? SI_CONTEXT_INV_L2 : 0); - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); for (qbuf = &query->buffer; qbuf; qbuf = qbuf_prev) { if (query->b.type != PIPE_QUERY_TIMESTAMP) { @@ -1699,7 +1699,7 @@ static void si_render_condition(struct pipe_context *ctx, struct pipe_query *que * so set it here. */ if (sctx->gfx_level <= GFX8) { sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } sctx->render_cond_enabled = old_render_cond_enabled; diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c b/src/gallium/drivers/radeonsi/si_sqtt.c index c64843cf052..9b82755eb8a 100644 --- a/src/gallium/drivers/radeonsi/si_sqtt.c +++ b/src/gallium/drivers/radeonsi/si_sqtt.c @@ -95,7 +95,7 @@ static void si_emit_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs, * doesn't work. */ sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_CS_PARTIAL_FLUSH; - sctx->emit_cache_flush(sctx, cs); + sctx->emit_barrier(sctx, cs); } ac_sqtt_emit_wait(&sscreen->info, pm4, sctx->sqtt, is_compute_queue); @@ -144,7 +144,7 @@ static void si_sqtt_start(struct si_context *sctx, struct radeon_cmdbuf *cs) SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE | SI_CONTEXT_INV_L2 | SI_CONTEXT_PFP_SYNC_ME; - sctx->emit_cache_flush(sctx, cs); + sctx->emit_barrier(sctx, cs); si_inhibit_clockgating(sctx, cs, true); @@ -204,7 +204,7 @@ static void si_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs) SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE | SI_CONTEXT_INV_L2 | SI_CONTEXT_PFP_SYNC_ME; - sctx->emit_cache_flush(sctx, cs); + sctx->emit_barrier(sctx, cs); si_emit_sqtt_stop(sctx, cs, ip_type); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 8a6b757a513..1908fd1a7e8 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1758,13 +1758,13 @@ static void si_set_active_query_state(struct pipe_context *ctx, bool enable) if (sctx->num_hw_pipestat_streamout_queries) { sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS; sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } } else { if (sctx->num_hw_pipestat_streamout_queries) { sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS; sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } } @@ -2633,7 +2633,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, * Wait for PS because: texture -> render (eg: glBlitFramebuffer(with src=dst) then glDraw*) */ sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PS_PARTIAL_FLUSH; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); /* DB caches are flushed on demand (using si_decompress_textures) except the cases below. */ if (sctx->gfx_level >= GFX12) { @@ -2661,7 +2661,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, * This seems to fix them: */ sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_INV_L2; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } } else if (sctx->gfx_level == GFX9) { /* It appears that DB metadata "leaks" in a sequence of: @@ -2671,7 +2671,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, * Flushing DB metadata works around the problem. */ sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } /* Take the maximum of the old and new count. If the new count is lower, @@ -4990,7 +4990,7 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) flags & (PIPE_BARRIER_INDEX_BUFFER | PIPE_BARRIER_INDIRECT_BUFFER)) sctx->flags |= SI_CONTEXT_WB_L2; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) @@ -5003,9 +5003,9 @@ static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) return si_create_blend_state_mode(&sctx->b, &blend, mode); } -static void si_emit_cache_flush_state(struct si_context *sctx, unsigned index) +static void si_emit_barrier_as_atom(struct si_context *sctx, unsigned index) { - sctx->emit_cache_flush(sctx, &sctx->gfx_cs); + sctx->emit_barrier(sctx, &sctx->gfx_cs); } static void si_pm4_emit_sqtt_pipeline(struct si_context *sctx, unsigned index) @@ -5056,7 +5056,7 @@ void si_init_state_functions(struct si_context *sctx) sctx->atoms.s.clip_regs.emit = si_emit_clip_regs; sctx->atoms.s.clip_state.emit = si_emit_clip_state; sctx->atoms.s.stencil_ref.emit = si_emit_stencil_ref; - sctx->atoms.s.cache_flush.emit = si_emit_cache_flush_state; + sctx->atoms.s.barrier.emit = si_emit_barrier_as_atom; sctx->b.create_blend_state = si_create_blend_state; sctx->b.bind_blend_state = si_bind_blend_state; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index ad5f647834b..bc16d91cb11 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -236,9 +236,9 @@ union si_state_atoms { struct si_atom ngg_cull_state; struct si_atom vgt_pipeline_state; struct si_atom tess_io_layout; - struct si_atom cache_flush; - struct si_atom streamout_begin; /* this must be done after cache_flush */ - struct si_atom render_cond; /* this must be after cache_flush */ + struct si_atom barrier; + struct si_atom streamout_begin; /* this must be done after barrier */ + struct si_atom render_cond; /* this must be after barrier */ struct si_atom spi_ge_ring_state; /* this must be last because it waits for idle. */ } s; struct si_atom array[sizeof(struct si_atoms_s) / sizeof(struct si_atom)]; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 56a4b15930b..0985c271b93 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -908,7 +908,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, /* The cache flushes should have been emitted already. */ assert(sctx->flags == 0); sctx->flags = SI_CONTEXT_VGT_FLUSH; - si_emit_cache_flush_direct(sctx); + si_emit_barrier_direct(sctx); } } @@ -2122,7 +2122,7 @@ static void si_draw(struct pipe_context *ctx, /* GFX6-7 don't read index buffers through TC L2. */ sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); si_resource(indexbuf)->TC_L2_dirty = false; } else if (!IS_DRAW_VERTEX_STATE && info->has_user_indices) { unsigned start_offset; @@ -2145,7 +2145,7 @@ static void si_draw(struct pipe_context *ctx, /* GFX8-GFX11 reads index buffers through L2, so it doesn't * need this. */ sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); si_resource(indexbuf)->TC_L2_dirty = false; } } @@ -2158,14 +2158,14 @@ static void si_draw(struct pipe_context *ctx, if (GFX_VERSION <= GFX8 || GFX_VERSION == GFX12) { if (indirect->buffer && si_resource(indirect->buffer)->TC_L2_dirty) { sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); si_resource(indirect->buffer)->TC_L2_dirty = false; } if (indirect->indirect_draw_count && si_resource(indirect->indirect_draw_count)->TC_L2_dirty) { sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); si_resource(indirect->indirect_draw_count)->TC_L2_dirty = false; } } @@ -2307,7 +2307,7 @@ static void si_draw(struct pipe_context *ctx, (sctx, indirect, prim, index_size, instance_count, primitive_restart, info->restart_index, min_direct_count); - /* <-- CUs are idle here if the cache_flush state waited. */ + /* <-- CUs are idle here if the barrier atom waited. */ /* This must be done after si_emit_all_states, which can affect this. */ si_emit_vs_state diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 69ca9f39910..952a956dc19 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -3770,7 +3770,7 @@ bool si_update_ngg(struct si_context *sctx) */ if (sctx->screen->info.has_vgt_flush_ngg_legacy_bug && !new_ngg) { sctx->flags |= SI_CONTEXT_VGT_FLUSH; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); if (sctx->gfx_level == GFX10) { /* Workaround for https://gitlab.freedesktop.org/mesa/mesa/-/issues/2941 */ diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c index dafd5f92942..766fe6f72ef 100644 --- a/src/gallium/drivers/radeonsi/si_state_streamout.c +++ b/src/gallium/drivers/radeonsi/si_state_streamout.c @@ -105,7 +105,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ if (sctx->screen->info.cp_sdma_ge_use_system_memory_scope) sctx->flags |= SI_CONTEXT_WB_L2; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } /* TODO: This is a hack that fixes these failures. It shouldn't be necessary. @@ -229,7 +229,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ */ sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } else { si_set_atom_dirty(sctx, &sctx->atoms.s.streamout_begin, false); si_set_streamout_enable(sctx, false); @@ -372,7 +372,7 @@ void si_emit_streamout_end(struct si_context *sctx) if (sctx->gfx_level >= GFX11) { /* Wait for streamout to finish before reading GDS_STRMOUT registers. */ sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH; - si_emit_cache_flush_direct(sctx); + si_emit_barrier_direct(sctx); } else { si_flush_vgt_streamout(sctx); } @@ -388,7 +388,7 @@ void si_emit_streamout_end(struct si_context *sctx) (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i); /* For DrawTF reading buf_filled_size: */ sctx->flags |= SI_CONTEXT_PFP_SYNC_ME; - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); } else { uint64_t va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset; diff --git a/src/gallium/drivers/radeonsi/si_test_blit_perf.c b/src/gallium/drivers/radeonsi/si_test_blit_perf.c index d419c6289f6..d215d4c1340 100644 --- a/src/gallium/drivers/radeonsi/si_test_blit_perf.c +++ b/src/gallium/drivers/radeonsi/si_test_blit_perf.c @@ -516,7 +516,7 @@ void si_test_blit_perf(struct si_screen *sscreen) fb.nr_cbufs = 1; fb.cbufs[0] = dst_surf; ctx->set_framebuffer_state(ctx, &fb); - si_emit_cache_flush_direct(sctx); + si_emit_barrier_direct(sctx); } } @@ -644,7 +644,7 @@ void si_test_blit_perf(struct si_screen *sscreen) SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_INV_L2 | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE; - si_emit_cache_flush_direct(sctx); + si_emit_barrier_direct(sctx); ctx->end_query(ctx, q); pipe_surface_reference(&dst_surf, NULL);