radeonsi: add barrier helpers for simple internal buffer ops

These just take dst and src parameters instead of lists of buffers and
images.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31193>
This commit is contained in:
Marek Olšák 2024-08-22 14:51:34 -04:00 committed by Marge Bot
parent 999b254ca8
commit 5d607348a3
3 changed files with 43 additions and 26 deletions

View file

@ -159,6 +159,34 @@ void si_barrier_after_internal_op(struct si_context *sctx, unsigned flags,
}
}
static void si_set_dst_src_barrier_buffers(struct pipe_shader_buffer *buffers,
struct pipe_resource *dst, struct pipe_resource *src)
{
assert(dst);
memset(buffers, 0, sizeof(buffers[0]) * 2);
/* Only the "buffer" field is going to be used. */
buffers[0].buffer = dst;
buffers[1].buffer = src;
}
/* This is for simple buffer ops that have 1 dst and 0-1 src. */
void si_barrier_before_simple_buffer_op(struct si_context *sctx, unsigned flags,
struct pipe_resource *dst, struct pipe_resource *src)
{
struct pipe_shader_buffer barrier_buffers[2];
si_set_dst_src_barrier_buffers(barrier_buffers, dst, src);
si_barrier_before_internal_op(sctx, flags, src ? 2 : 1, barrier_buffers, 0x1, 0, NULL);
}
/* This is for simple buffer ops that have 1 dst and 0-1 src. */
void si_barrier_after_simple_buffer_op(struct si_context *sctx, unsigned flags,
struct pipe_resource *dst, struct pipe_resource *src)
{
struct pipe_shader_buffer barrier_buffers[2];
si_set_dst_src_barrier_buffers(barrier_buffers, dst, src);
si_barrier_after_internal_op(sctx, flags, src ? 2 : 1, barrier_buffers, 0x1, 0, NULL);
}
static void si_compute_begin_internal(struct si_context *sctx, unsigned flags)
{
sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
@ -354,12 +382,10 @@ bool si_compute_clear_copy_buffer(struct si_context *sctx, struct pipe_resource
struct pipe_grid_info grid = {};
set_work_size(&grid, dispatch.workgroup_size, 1, 1, dispatch.num_threads, 1, 1);
unsigned writable_bitmask = is_copy ? 0x2 : 0x1;
si_barrier_before_internal_op(sctx, flags, dispatch.num_ssbos, sb, writable_bitmask, 0, NULL);
si_barrier_before_simple_buffer_op(sctx, flags, dst, src);
si_launch_grid_internal_ssbos(sctx, &grid, shader, flags, dispatch.num_ssbos, sb,
writable_bitmask);
si_barrier_after_internal_op(sctx, flags, dispatch.num_ssbos, sb, writable_bitmask, 0, NULL);
is_copy ? 0x2 : 0x1);
si_barrier_after_simple_buffer_op(sctx, flags, dst, src);
return true;
}
@ -457,9 +483,9 @@ void si_compute_shorten_ubyte_buffer(struct si_context *sctx, struct pipe_resour
sb[1].buffer_offset = src_offset;
sb[1].buffer_size = count;
si_barrier_before_internal_op(sctx, flags, 2, sb, 0x1, 0, NULL);
si_barrier_before_simple_buffer_op(sctx, flags, dst, src);
si_launch_grid_internal_ssbos(sctx, &info, sctx->cs_ubyte_to_ushort, flags, 2, sb, 0x1);
si_barrier_after_internal_op(sctx, flags, 2, sb, 0x1, 0, NULL);
si_barrier_after_simple_buffer_op(sctx, flags, dst, src);
}
static void si_compute_save_and_bind_images(struct si_context *sctx, unsigned num_images,
@ -546,9 +572,9 @@ void si_retile_dcc(struct si_context *sctx, struct si_texture *tex)
unsigned flags = SI_OP_SYNC_BEFORE;
si_barrier_before_internal_op(sctx, flags, 1, &sb, 0x1, 0, NULL);
si_barrier_before_simple_buffer_op(sctx, flags, sb.buffer, NULL);
si_launch_grid_internal_ssbos(sctx, &info, *shader, flags, 1, &sb, 0x1);
si_barrier_after_internal_op(sctx, flags, 1, &sb, 0x1, 0, NULL);
si_barrier_after_simple_buffer_op(sctx, flags, sb.buffer, NULL);
/* Don't flush caches. L2 will be flushed by the kernel fence. */
}

View file

@ -156,12 +156,7 @@ void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs,
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
}
struct pipe_shader_buffer barrier_buffer;
barrier_buffer.buffer = dst;
barrier_buffer.buffer_offset = MIN2(offset, UINT32_MAX);
barrier_buffer.buffer_size = MIN2(size, UINT32_MAX);
si_barrier_before_internal_op(sctx, user_flags, 1, &barrier_buffer, 0x1, 0, NULL);
si_barrier_before_simple_buffer_op(sctx, user_flags, dst, NULL);
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
@ -192,7 +187,7 @@ void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs,
va += byte_count;
}
si_barrier_after_internal_op(sctx, user_flags, 1, &barrier_buffer, 0x1, 0, NULL);
si_barrier_after_simple_buffer_op(sctx, user_flags, dst, NULL);
sctx->num_cp_dma_calls++;
}
@ -250,15 +245,7 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
}
struct pipe_shader_buffer barrier_buffers[2];
barrier_buffers[0].buffer = dst;
barrier_buffers[0].buffer_offset = MIN2(dst_offset, UINT32_MAX);
barrier_buffers[0].buffer_size = MIN2(size, UINT32_MAX);
barrier_buffers[1].buffer = src;
barrier_buffers[1].buffer_offset = MIN2(src_offset, UINT32_MAX);
barrier_buffers[1].buffer_size = MIN2(size, UINT32_MAX);
si_barrier_before_internal_op(sctx, user_flags, 2, barrier_buffers, 0x1, 0, NULL);
si_barrier_before_simple_buffer_op(sctx, user_flags, dst, src);
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
@ -357,7 +344,7 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,
if (realign_size)
si_cp_dma_realign_engine(sctx, realign_size, user_flags, &is_first);
si_barrier_after_internal_op(sctx, user_flags, 2, barrier_buffers, 0x1, 0, NULL);
si_barrier_after_simple_buffer_op(sctx, user_flags, dst, src);
sctx->num_cp_dma_calls++;
}

View file

@ -1484,6 +1484,10 @@ void si_barrier_after_internal_op(struct si_context *sctx, unsigned flags,
unsigned writable_buffers_mask,
unsigned num_images,
const struct pipe_image_view *images);
void si_barrier_before_simple_buffer_op(struct si_context *sctx, unsigned flags,
struct pipe_resource *dst, struct pipe_resource *src);
void si_barrier_after_simple_buffer_op(struct si_context *sctx, unsigned flags,
struct pipe_resource *dst, struct pipe_resource *src);
bool si_should_blit_clamp_to_edge(const struct pipe_blit_info *info, unsigned coord_mask);
void si_launch_grid_internal_ssbos(struct si_context *sctx, struct pipe_grid_info *info,
void *shader, unsigned flags, unsigned num_buffers,