From aa9fe1e423c3cfcdeadb5fe84461bcfe9233f332 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Tue, 8 Dec 2020 10:57:45 +0100 Subject: [PATCH] radeonsi: pass radeon_cmdbuf to emit_cache_flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Marek Olšák Part-of: --- src/gallium/drivers/radeonsi/si_compute.c | 2 +- src/gallium/drivers/radeonsi/si_cp_dma.c | 2 +- src/gallium/drivers/radeonsi/si_descriptors.c | 2 +- src/gallium/drivers/radeonsi/si_gfx_cs.c | 2 +- src/gallium/drivers/radeonsi/si_pipe.h | 2 +- src/gallium/drivers/radeonsi/si_state.h | 4 ++-- .../drivers/radeonsi/si_state_draw.cpp | 21 ++++++++----------- .../drivers/radeonsi/si_state_streamout.c | 2 +- .../drivers/radeonsi/si_test_dma_perf.c | 4 ++-- 9 files changed, 19 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index abf11665fb8..775d92421f2 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -886,7 +886,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info /* Registers that are not read from memory should be set before this: */ if (sctx->flags) - sctx->emit_cache_flush(sctx); + sctx->emit_cache_flush(sctx, &sctx->gfx_cs); if (sctx->has_graphics && si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) { sctx->atoms.s.render_cond.emit(sctx); diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 18b5ed01a6c..d1b764e6bae 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -179,7 +179,7 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst * Also wait for the previous CP DMA operations. */ if (!(user_flags & SI_CPDMA_SKIP_GFX_SYNC) && sctx->flags) - sctx->emit_cache_flush(sctx); + sctx->emit_cache_flush(sctx, &sctx->gfx_cs); if (!(user_flags & SI_CPDMA_SKIP_SYNC_BEFORE) && *is_first && !(*packet_flags & CP_DMA_CLEAR)) *packet_flags |= CP_DMA_RAW_WAIT; diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index c54a58ace86..4b9aeaff4d5 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1723,7 +1723,7 @@ static void si_upload_bindless_descriptors(struct si_context *sctx) * descriptors directly in memory, in case the GPU is using them. */ sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH; - sctx->emit_cache_flush(sctx); + sctx->emit_cache_flush(sctx, &sctx->gfx_cs); util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) { unsigned desc_slot = (*tex_handle)->desc_slot; diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 3218620eecd..08042c93725 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -146,7 +146,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h /* Wait for draw calls to finish if needed. */ if (wait_flags) { ctx->flags |= wait_flags; - ctx->emit_cache_flush(ctx); + ctx->emit_cache_flush(ctx, &ctx->gfx_cs); } ctx->gfx_last_ib_is_busy = (wait_flags & wait_ps_cs) != wait_ps_cs; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 2f4da9ecec0..3f63c7ad190 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -934,7 +934,7 @@ struct si_context { void *sh_query_result_shader; struct si_resource *shadowed_regs; - void (*emit_cache_flush)(struct si_context *ctx); + void (*emit_cache_flush)(struct si_context *ctx, struct radeon_cmdbuf *cs); struct blitter_context *blitter; void *noop_blend; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 9a3ca2ac0cc..9eb798e0fd7 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -592,8 +592,8 @@ bool si_update_ngg(struct si_context *sctx); void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned cp_coher_cntl); void si_prim_discard_signal_next_compute_ib_start(struct si_context *sctx); -void gfx10_emit_cache_flush(struct si_context *sctx); -void si_emit_cache_flush(struct si_context *sctx); +void gfx10_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs); +void si_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs); void si_trace_emit(struct si_context *sctx); void si_init_draw_functions(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index e51dde169f0..e942c3cabb3 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -1173,10 +1173,8 @@ void si_prim_discard_signal_next_compute_ib_start(struct si_context *sctx) *sctx->last_pkt3_write_data = PKT3(PKT3_NOP, 3, 0); } -extern "C" -void gfx10_emit_cache_flush(struct si_context *ctx) +void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs) { - struct radeon_cmdbuf *cs = &ctx->gfx_cs; uint32_t gcr_cntl = 0; unsigned cb_db_event = 0; unsigned flags = ctx->flags; @@ -1318,7 +1316,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx) EOP_DST_SEL_MEM, EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT, wait_mem_scratch, va, ctx->wait_mem_number, SI_NOT_QUERY); - si_cp_wait_mem(ctx, &ctx->gfx_cs, va, ctx->wait_mem_number, 0xffffffff, WAIT_REG_MEM_EQUAL); + si_cp_wait_mem(ctx, cs, va, ctx->wait_mem_number, 0xffffffff, WAIT_REG_MEM_EQUAL); } /* Ignore fields that only modify the behavior of other fields. */ @@ -1354,9 +1352,8 @@ void gfx10_emit_cache_flush(struct si_context *ctx) } extern "C" -void si_emit_cache_flush(struct si_context *sctx) +void si_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs) { - struct radeon_cmdbuf *cs = &sctx->gfx_cs; uint32_t flags = sctx->flags; if (!sctx->has_graphics) { @@ -1545,7 +1542,7 @@ void si_emit_cache_flush(struct si_context *sctx) /* Invalidate L1 & L2. (L1 is always invalidated on GFX6) * WB must be set on GFX8+ when TC_ACTION is set. */ - si_emit_surface_sync(sctx, &sctx->gfx_cs, + si_emit_surface_sync(sctx, cs, cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) | S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= GFX8)); cp_coher_cntl = 0; @@ -1562,21 +1559,21 @@ void si_emit_cache_flush(struct si_context *sctx) * WB doesn't work without NC. */ si_emit_surface_sync( - sctx, &sctx->gfx_cs, + sctx, cs, cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1)); cp_coher_cntl = 0; sctx->num_L2_writebacks++; } if (flags & SI_CONTEXT_INV_VCACHE) { /* Invalidate per-CU VMEM L1. */ - si_emit_surface_sync(sctx, &sctx->gfx_cs, cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1)); + si_emit_surface_sync(sctx, cs, cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1)); cp_coher_cntl = 0; } } /* If TC flushes haven't cleared this... */ if (cp_coher_cntl) - si_emit_surface_sync(sctx, &sctx->gfx_cs, cp_coher_cntl); + si_emit_surface_sync(sctx, cs, cp_coher_cntl); if (is_barrier) si_prim_discard_signal_next_compute_ib_start(sctx); @@ -2347,7 +2344,7 @@ static void si_draw_vbo(struct pipe_context *ctx, si_emit_all_states (sctx, info, indirect, prim, instance_count, min_direct_count, primitive_restart, masked_atoms); - sctx->emit_cache_flush(sctx); + sctx->emit_cache_flush(sctx, &sctx->gfx_cs); /* <-- CUs are idle here. */ if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) { @@ -2378,7 +2375,7 @@ static void si_draw_vbo(struct pipe_context *ctx, * states, and draw at the end. */ if (sctx->flags) - sctx->emit_cache_flush(sctx); + sctx->emit_cache_flush(sctx, &sctx->gfx_cs); /* Only prefetch the API VS and VBO descriptors. */ if (GFX_VERSION >= GFX7 && sctx->prefetch_L2_mask) diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c index f52e4b1b70d..4c38746ed16 100644 --- a/src/gallium/drivers/radeonsi/si_state_streamout.c +++ b/src/gallium/drivers/radeonsi/si_state_streamout.c @@ -207,7 +207,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ si_set_rw_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL); if (wait_now) - sctx->emit_cache_flush(sctx); + sctx->emit_cache_flush(sctx, &sctx->gfx_cs); } static void gfx10_emit_streamout_begin(struct si_context *sctx) diff --git a/src/gallium/drivers/radeonsi/si_test_dma_perf.c b/src/gallium/drivers/radeonsi/si_test_dma_perf.c index ad03f5c532c..d9214b58948 100644 --- a/src/gallium/drivers/radeonsi/si_test_dma_perf.c +++ b/src/gallium/drivers/radeonsi/si_test_dma_perf.c @@ -172,7 +172,7 @@ void si_test_dma_perf(struct si_screen *sscreen) sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB; - sctx->emit_cache_flush(sctx); + sctx->emit_cache_flush(sctx, &sctx->gfx_cs); struct pipe_query *q = ctx->create_query(ctx, query_type, 0); ctx->begin_query(ctx, q); @@ -235,7 +235,7 @@ void si_test_dma_perf(struct si_screen *sscreen) sctx->flags |= SI_CONTEXT_INV_VCACHE | (cache_policy == L2_LRU ? 0 : SI_CONTEXT_INV_L2) | SI_CONTEXT_CS_PARTIAL_FLUSH; - sctx->emit_cache_flush(sctx); + sctx->emit_cache_flush(sctx, &sctx->gfx_cs); } ctx->end_query(ctx, q);