radeonsi: don't sync PS or CS before (clear|copy)_buffer based on bind history

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13400>
This commit is contained in:
Marek Olšák 2021-10-16 13:02:58 -04:00 committed by Marge Bot
parent 4bc8c2590e
commit 61ebdcfc29
3 changed files with 44 additions and 1 deletions

View file

@ -59,11 +59,43 @@ unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher,
}
}
static void si_improve_sync_flags(struct si_context *sctx, struct pipe_resource *dst,
struct pipe_resource *src, unsigned *flags)
{
if (dst->target != PIPE_BUFFER || (src && src->target != PIPE_BUFFER))
return;
const unsigned cs_mask = SI_BIND_CONSTANT_BUFFER(PIPE_SHADER_COMPUTE) |
SI_BIND_SHADER_BUFFER(PIPE_SHADER_COMPUTE) |
SI_BIND_IMAGE_BUFFER(PIPE_SHADER_COMPUTE) |
SI_BIND_SAMPLER_BUFFER(PIPE_SHADER_COMPUTE);
const unsigned ps_mask = SI_BIND_CONSTANT_BUFFER(PIPE_SHADER_FRAGMENT) |
SI_BIND_SHADER_BUFFER(PIPE_SHADER_FRAGMENT) |
SI_BIND_IMAGE_BUFFER(PIPE_SHADER_FRAGMENT) |
SI_BIND_SAMPLER_BUFFER(PIPE_SHADER_FRAGMENT);
unsigned bind_history = si_resource(dst)->bind_history |
(src ? si_resource(src)->bind_history : 0);
/* Clear SI_OP_SYNC_CS_BEFORE if the buffer has never been used with a CS. */
if (*flags & SI_OP_SYNC_CS_BEFORE && !(bind_history & cs_mask))
*flags &= ~SI_OP_SYNC_CS_BEFORE;
/* Clear SI_OP_SYNC_PS_BEFORE if the buffer has never been used with a PS. */
if (*flags & SI_OP_SYNC_PS_BEFORE && !(bind_history & ps_mask)) {
*flags &= ~SI_OP_SYNC_PS_BEFORE;
*flags |= SI_OP_SYNC_GE_BEFORE;
}
}
void si_launch_grid_internal(struct si_context *sctx, struct pipe_grid_info *info,
void *shader, unsigned flags)
{
/* Wait for previous shaders to finish. */
if (flags & SI_OP_SYNC_GE_BEFORE)
sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH;
if (flags & SI_OP_SYNC_PS_BEFORE)
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
@ -315,6 +347,8 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
if (!size)
return;
si_improve_sync_flags(sctx, dst, NULL, &flags);
ASSERTED unsigned clear_alignment = MIN2(clear_value_size, 4);
assert(clear_value_size != 3 && clear_value_size != 6); /* 12 is allowed. */
@ -404,6 +438,8 @@ void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct p
enum si_cache_policy cache_policy = get_cache_policy(sctx, coher, size);
uint64_t compute_min_size = 8 * 1024;
si_improve_sync_flags(sctx, dst, src, &flags);
/* Only use compute for VRAM copies on dGPUs. */
if (sctx->screen->info.has_dedicated_vram && si_resource(dst)->domains & RADEON_DOMAIN_VRAM &&
si_resource(src)->domains & RADEON_DOMAIN_VRAM && size > compute_min_size &&

View file

@ -196,6 +196,9 @@ void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs,
assert(size && size % 4 == 0);
if (user_flags & SI_OP_SYNC_GE_BEFORE)
sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME;
if (user_flags & SI_OP_SYNC_CS_BEFORE)
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME;
@ -337,6 +340,9 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,
}
}
if (user_flags & SI_OP_SYNC_GE_BEFORE)
sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME;
if (user_flags & SI_OP_SYNC_CS_BEFORE)
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME;

View file

@ -1367,6 +1367,7 @@ void si_init_clear_functions(struct si_context *sctx);
#define SI_OP_CS_IMAGE (1 << 5)
#define SI_OP_CS_RENDER_COND_ENABLE (1 << 6)
#define SI_OP_CPDMA_SKIP_CHECK_CS_SPACE (1 << 7) /* don't call need_cs_space */
#define SI_OP_SYNC_GE_BEFORE (1 << 8) /* only sync VS, TCS, TES, GS */
unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher,
enum si_cache_policy cache_policy);