diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index 696aa8d12bc..b5dd93b6489 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -59,11 +59,43 @@ unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher, } } +static void si_improve_sync_flags(struct si_context *sctx, struct pipe_resource *dst, + struct pipe_resource *src, unsigned *flags) +{ + if (dst->target != PIPE_BUFFER || (src && src->target != PIPE_BUFFER)) + return; + + const unsigned cs_mask = SI_BIND_CONSTANT_BUFFER(PIPE_SHADER_COMPUTE) | + SI_BIND_SHADER_BUFFER(PIPE_SHADER_COMPUTE) | + SI_BIND_IMAGE_BUFFER(PIPE_SHADER_COMPUTE) | + SI_BIND_SAMPLER_BUFFER(PIPE_SHADER_COMPUTE); + + const unsigned ps_mask = SI_BIND_CONSTANT_BUFFER(PIPE_SHADER_FRAGMENT) | + SI_BIND_SHADER_BUFFER(PIPE_SHADER_FRAGMENT) | + SI_BIND_IMAGE_BUFFER(PIPE_SHADER_FRAGMENT) | + SI_BIND_SAMPLER_BUFFER(PIPE_SHADER_FRAGMENT); + + unsigned bind_history = si_resource(dst)->bind_history | + (src ? si_resource(src)->bind_history : 0); + + /* Clear SI_OP_SYNC_CS_BEFORE if the buffer has never been used with a CS. */ + if (*flags & SI_OP_SYNC_CS_BEFORE && !(bind_history & cs_mask)) + *flags &= ~SI_OP_SYNC_CS_BEFORE; + + /* Clear SI_OP_SYNC_PS_BEFORE if the buffer has never been used with a PS. */ + if (*flags & SI_OP_SYNC_PS_BEFORE && !(bind_history & ps_mask)) { + *flags &= ~SI_OP_SYNC_PS_BEFORE; + *flags |= SI_OP_SYNC_GE_BEFORE; + } +} + void si_launch_grid_internal(struct si_context *sctx, struct pipe_grid_info *info, void *shader, unsigned flags) { - /* Wait for previous shaders to finish. */ + if (flags & SI_OP_SYNC_GE_BEFORE) + sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH; + if (flags & SI_OP_SYNC_PS_BEFORE) sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH; @@ -315,6 +347,8 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, if (!size) return; + si_improve_sync_flags(sctx, dst, NULL, &flags); + ASSERTED unsigned clear_alignment = MIN2(clear_value_size, 4); assert(clear_value_size != 3 && clear_value_size != 6); /* 12 is allowed. */ @@ -404,6 +438,8 @@ void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct p enum si_cache_policy cache_policy = get_cache_policy(sctx, coher, size); uint64_t compute_min_size = 8 * 1024; + si_improve_sync_flags(sctx, dst, src, &flags); + /* Only use compute for VRAM copies on dGPUs. */ if (sctx->screen->info.has_dedicated_vram && si_resource(dst)->domains & RADEON_DOMAIN_VRAM && si_resource(src)->domains & RADEON_DOMAIN_VRAM && size > compute_min_size && diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index ca22306209e..f37f74a32b6 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -196,6 +196,9 @@ void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs, assert(size && size % 4 == 0); + if (user_flags & SI_OP_SYNC_GE_BEFORE) + sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME; + if (user_flags & SI_OP_SYNC_CS_BEFORE) sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME; @@ -337,6 +340,9 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, } } + if (user_flags & SI_OP_SYNC_GE_BEFORE) + sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME; + if (user_flags & SI_OP_SYNC_CS_BEFORE) sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 82a7f117ba1..a66ceecacdf 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1367,6 +1367,7 @@ void si_init_clear_functions(struct si_context *sctx); #define SI_OP_CS_IMAGE (1 << 5) #define SI_OP_CS_RENDER_COND_ENABLE (1 << 6) #define SI_OP_CPDMA_SKIP_CHECK_CS_SPACE (1 << 7) /* don't call need_cs_space */ +#define SI_OP_SYNC_GE_BEFORE (1 << 8) /* only sync VS, TCS, TES, GS */ unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher, enum si_cache_policy cache_policy);