From 1ee394c34de94907e4192fc11f93719651b4a2d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 22 Aug 2024 16:27:09 -0400 Subject: [PATCH] radeonsi: move barriers out of si_clear_buffer Some places need no barriers, while other places only need the barrier after. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_buffer.c | 4 +++- .../drivers/radeonsi/si_compute_blit.c | 22 ++++++++++--------- src/gallium/drivers/radeonsi/si_pipe.c | 5 ++++- .../radeonsi/si_test_image_copy_region.c | 17 +++++++++++--- 4 files changed, 33 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c index 38f449dd553..4917cbdb346 100644 --- a/src/gallium/drivers/radeonsi/si_buffer.c +++ b/src/gallium/drivers/radeonsi/si_buffer.c @@ -194,8 +194,10 @@ bool si_alloc_resource(struct si_screen *sscreen, struct si_resource *res) struct si_context *ctx = si_get_aux_context(&sscreen->aux_context.general); uint32_t value = 0; - si_clear_buffer(ctx, &res->b.b, 0, res->bo_size, &value, 4, SI_OP_SYNC_AFTER, + unsigned flags = SI_OP_SYNC_AFTER; + si_clear_buffer(ctx, &res->b.b, 0, res->bo_size, &value, 4, flags, SI_AUTO_SELECT_CLEAR_METHOD); + si_barrier_after_simple_buffer_op(ctx, flags, &res->b.b, NULL); si_put_aux_context_flush(&sscreen->aux_context.general); } diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index 5025e005e3c..250f66d5029 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -406,15 +406,11 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, if (util_lower_clearsize_to_dword(clear_value, (int*)&clear_value_size, &clamped)) clear_value = &clamped; - si_barrier_before_simple_buffer_op(sctx, flags, dst, NULL); - if (method != SI_CP_DMA_CLEAR_METHOD && si_compute_clear_copy_buffer(sctx, dst, offset, NULL, 0, size, clear_value, clear_value_size, flags, 0, - method == SI_AUTO_SELECT_CLEAR_METHOD)) { - si_barrier_after_simple_buffer_op(sctx, flags, dst, NULL); + method == SI_AUTO_SELECT_CLEAR_METHOD)) return; - } uint64_t aligned_size = size & ~3ull; if (aligned_size) { @@ -423,8 +419,6 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, dst, offset, aligned_size, *clear_value, flags); } - si_barrier_after_simple_buffer_op(sctx, flags, dst, NULL); - offset += aligned_size; size -= aligned_size; @@ -449,8 +443,13 @@ static void si_pipe_clear_buffer(struct pipe_context *ctx, struct pipe_resource unsigned offset, unsigned size, const void *clear_value, int clear_value_size) { - si_clear_buffer((struct si_context *)ctx, dst, offset, size, (uint32_t *)clear_value, - clear_value_size, SI_OP_SYNC_BEFORE_AFTER, SI_AUTO_SELECT_CLEAR_METHOD); + struct si_context *sctx = (struct si_context *)ctx; + unsigned flags = SI_OP_SYNC_BEFORE_AFTER; + + si_barrier_before_simple_buffer_op(sctx, flags, dst, NULL); + si_clear_buffer(sctx, dst, offset, size, (uint32_t *)clear_value, clear_value_size, flags, + SI_AUTO_SELECT_CLEAR_METHOD); + si_barrier_after_simple_buffer_op(sctx, flags, dst, NULL); } void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src, @@ -698,10 +697,13 @@ void si_compute_expand_fmask(struct pipe_context *ctx, struct pipe_resource *tex /* Clear FMASK to identity. */ struct si_texture *stex = (struct si_texture *)tex; + unsigned op_flags = SI_OP_SYNC_AFTER; + si_clear_buffer(sctx, tex, stex->surface.fmask_offset, stex->surface.fmask_size, (uint32_t *)&fmask_expand_values[log_fragments][log_samples - 1], - log_fragments >= 2 && log_samples == 4 ? 8 : 4, SI_OP_SYNC_AFTER, + log_fragments >= 2 && log_samples == 4 ? 8 : 4, op_flags, SI_AUTO_SELECT_CLEAR_METHOD); + si_barrier_after_simple_buffer_op(sctx, op_flags, tex, NULL); } void si_compute_clear_image_dcc_single(struct si_context *sctx, struct si_texture *tex, diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 3b9fdb2a98f..4ea8d4c7b2c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -829,8 +829,11 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign * for some reason when the compute codepath is used. */ uint32_t clear_value = 0; + unsigned op_flags = SI_OP_SYNC_AFTER; + si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0, sctx->null_const_buf.buffer->width0, - &clear_value, 4, SI_OP_SYNC_AFTER, SI_CP_DMA_CLEAR_METHOD); + &clear_value, 4, op_flags, SI_CP_DMA_CLEAR_METHOD); + si_barrier_after_simple_buffer_op(sctx, op_flags, sctx->null_const_buf.buffer, NULL); } if (!(flags & SI_CONTEXT_FLAG_AUX)) { diff --git a/src/gallium/drivers/radeonsi/si_test_image_copy_region.c b/src/gallium/drivers/radeonsi/si_test_image_copy_region.c index 198a33ebdaf..c212af51cb5 100644 --- a/src/gallium/drivers/radeonsi/si_test_image_copy_region.c +++ b/src/gallium/drivers/radeonsi/si_test_image_copy_region.c @@ -537,8 +537,12 @@ void si_test_image_copy_region(struct si_screen *sscreen) /* clear dst pixels */ uint32_t zero = 0; - si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, &zero, 4, SI_OP_SYNC_BEFORE_AFTER, + unsigned flags = SI_OP_SYNC_BEFORE_AFTER; + + si_barrier_before_simple_buffer_op(sctx, flags, dst, NULL); + si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, &zero, 4, flags, SI_AUTO_SELECT_CLEAR_METHOD); + si_barrier_after_simple_buffer_op(sctx, flags, dst, NULL); for (j = 0; j < num_partial_copies; j++) { int width, height, depth; @@ -715,10 +719,17 @@ void si_test_blit(struct si_screen *sscreen, unsigned test_flags) /* clear dst pixels */ uint32_t zero = 0; + unsigned flags = SI_OP_SYNC_BEFORE_AFTER; + + /* Using 2 consecutive barriers calls results in a single merged barrier for both resources. */ + si_barrier_before_simple_buffer_op(sctx, flags, gfx_dst, NULL); + si_barrier_before_simple_buffer_op(sctx, flags, comp_dst, NULL); si_clear_buffer(sctx, gfx_dst, 0, ((struct si_texture *)gfx_dst)->surface.surf_size, &zero, - 4, SI_OP_SYNC_BEFORE_AFTER, SI_AUTO_SELECT_CLEAR_METHOD); + 4, flags, SI_AUTO_SELECT_CLEAR_METHOD); si_clear_buffer(sctx, comp_dst, 0, ((struct si_texture *)comp_dst)->surface.surf_size, &zero, - 4, SI_OP_SYNC_BEFORE_AFTER, SI_AUTO_SELECT_CLEAR_METHOD); + 4, flags, SI_AUTO_SELECT_CLEAR_METHOD); + si_barrier_after_simple_buffer_op(sctx, flags, gfx_dst, NULL); + si_barrier_after_simple_buffer_op(sctx, flags, comp_dst, NULL); /* TODO: These two fix quite a lot of BCn cases. */ /*si_clear_buffer(sctx, gfx_src, 0, ((struct si_texture *)gfx_src)->surface.surf_size, &zero,