radeonsi: move barriers out of si_clear_buffer

Some places need no barriers, while other places only need the barrier
after.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31193>
This commit is contained in:
Marek Olšák 2024-08-22 16:27:09 -04:00 committed by Marge Bot
parent 80a6f568e2
commit 1ee394c34d
4 changed files with 33 additions and 15 deletions

View file

@ -194,8 +194,10 @@ bool si_alloc_resource(struct si_screen *sscreen, struct si_resource *res)
struct si_context *ctx = si_get_aux_context(&sscreen->aux_context.general);
uint32_t value = 0;
si_clear_buffer(ctx, &res->b.b, 0, res->bo_size, &value, 4, SI_OP_SYNC_AFTER,
unsigned flags = SI_OP_SYNC_AFTER;
si_clear_buffer(ctx, &res->b.b, 0, res->bo_size, &value, 4, flags,
SI_AUTO_SELECT_CLEAR_METHOD);
si_barrier_after_simple_buffer_op(ctx, flags, &res->b.b, NULL);
si_put_aux_context_flush(&sscreen->aux_context.general);
}

View file

@ -406,15 +406,11 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
if (util_lower_clearsize_to_dword(clear_value, (int*)&clear_value_size, &clamped))
clear_value = &clamped;
si_barrier_before_simple_buffer_op(sctx, flags, dst, NULL);
if (method != SI_CP_DMA_CLEAR_METHOD &&
si_compute_clear_copy_buffer(sctx, dst, offset, NULL, 0, size, clear_value,
clear_value_size, flags, 0,
method == SI_AUTO_SELECT_CLEAR_METHOD)) {
si_barrier_after_simple_buffer_op(sctx, flags, dst, NULL);
method == SI_AUTO_SELECT_CLEAR_METHOD))
return;
}
uint64_t aligned_size = size & ~3ull;
if (aligned_size) {
@ -423,8 +419,6 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, dst, offset, aligned_size, *clear_value, flags);
}
si_barrier_after_simple_buffer_op(sctx, flags, dst, NULL);
offset += aligned_size;
size -= aligned_size;
@ -449,8 +443,13 @@ static void si_pipe_clear_buffer(struct pipe_context *ctx, struct pipe_resource
unsigned offset, unsigned size, const void *clear_value,
int clear_value_size)
{
si_clear_buffer((struct si_context *)ctx, dst, offset, size, (uint32_t *)clear_value,
clear_value_size, SI_OP_SYNC_BEFORE_AFTER, SI_AUTO_SELECT_CLEAR_METHOD);
struct si_context *sctx = (struct si_context *)ctx;
unsigned flags = SI_OP_SYNC_BEFORE_AFTER;
si_barrier_before_simple_buffer_op(sctx, flags, dst, NULL);
si_clear_buffer(sctx, dst, offset, size, (uint32_t *)clear_value, clear_value_size, flags,
SI_AUTO_SELECT_CLEAR_METHOD);
si_barrier_after_simple_buffer_op(sctx, flags, dst, NULL);
}
void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src,
@ -698,10 +697,13 @@ void si_compute_expand_fmask(struct pipe_context *ctx, struct pipe_resource *tex
/* Clear FMASK to identity. */
struct si_texture *stex = (struct si_texture *)tex;
unsigned op_flags = SI_OP_SYNC_AFTER;
si_clear_buffer(sctx, tex, stex->surface.fmask_offset, stex->surface.fmask_size,
(uint32_t *)&fmask_expand_values[log_fragments][log_samples - 1],
log_fragments >= 2 && log_samples == 4 ? 8 : 4, SI_OP_SYNC_AFTER,
log_fragments >= 2 && log_samples == 4 ? 8 : 4, op_flags,
SI_AUTO_SELECT_CLEAR_METHOD);
si_barrier_after_simple_buffer_op(sctx, op_flags, tex, NULL);
}
void si_compute_clear_image_dcc_single(struct si_context *sctx, struct si_texture *tex,

View file

@ -829,8 +829,11 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign
* for some reason when the compute codepath is used.
*/
uint32_t clear_value = 0;
unsigned op_flags = SI_OP_SYNC_AFTER;
si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0, sctx->null_const_buf.buffer->width0,
&clear_value, 4, SI_OP_SYNC_AFTER, SI_CP_DMA_CLEAR_METHOD);
&clear_value, 4, op_flags, SI_CP_DMA_CLEAR_METHOD);
si_barrier_after_simple_buffer_op(sctx, op_flags, sctx->null_const_buf.buffer, NULL);
}
if (!(flags & SI_CONTEXT_FLAG_AUX)) {

View file

@ -537,8 +537,12 @@ void si_test_image_copy_region(struct si_screen *sscreen)
/* clear dst pixels */
uint32_t zero = 0;
si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, &zero, 4, SI_OP_SYNC_BEFORE_AFTER,
unsigned flags = SI_OP_SYNC_BEFORE_AFTER;
si_barrier_before_simple_buffer_op(sctx, flags, dst, NULL);
si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, &zero, 4, flags,
SI_AUTO_SELECT_CLEAR_METHOD);
si_barrier_after_simple_buffer_op(sctx, flags, dst, NULL);
for (j = 0; j < num_partial_copies; j++) {
int width, height, depth;
@ -715,10 +719,17 @@ void si_test_blit(struct si_screen *sscreen, unsigned test_flags)
/* clear dst pixels */
uint32_t zero = 0;
unsigned flags = SI_OP_SYNC_BEFORE_AFTER;
/* Using 2 consecutive barriers calls results in a single merged barrier for both resources. */
si_barrier_before_simple_buffer_op(sctx, flags, gfx_dst, NULL);
si_barrier_before_simple_buffer_op(sctx, flags, comp_dst, NULL);
si_clear_buffer(sctx, gfx_dst, 0, ((struct si_texture *)gfx_dst)->surface.surf_size, &zero,
4, SI_OP_SYNC_BEFORE_AFTER, SI_AUTO_SELECT_CLEAR_METHOD);
4, flags, SI_AUTO_SELECT_CLEAR_METHOD);
si_clear_buffer(sctx, comp_dst, 0, ((struct si_texture *)comp_dst)->surface.surf_size, &zero,
4, SI_OP_SYNC_BEFORE_AFTER, SI_AUTO_SELECT_CLEAR_METHOD);
4, flags, SI_AUTO_SELECT_CLEAR_METHOD);
si_barrier_after_simple_buffer_op(sctx, flags, gfx_dst, NULL);
si_barrier_after_simple_buffer_op(sctx, flags, comp_dst, NULL);
/* TODO: These two fix quite a lot of BCn cases. */
/*si_clear_buffer(sctx, gfx_src, 0, ((struct si_texture *)gfx_src)->surface.surf_size, &zero,