mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-01 12:28:07 +02:00
radeonsi: fix read from compute / write from draw sync
A compute dispatch should see the result of a previous draw command. radeonsi was missing this implicit sync, causing rendering artifacts: the compute shader was reading from a texture still being written to by the previous draw. Framebuffer BOs are marked with RADEON_USAGE_NEEDS_IMPLICIT_SYNC, so compute jobs will sync. v2: use RADEON_USAGE_NEEDS_IMPLICIT_SYNC v3: unconditionally make CB coherent after a flush Reviewed-by: Zoltán Böszörményi <zboszor@gmail.com> (v3) Reviewed-by: Marek Olšák <marek.olsak@amd.com> (v3) Cc: mesa-stable Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/4032 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2878 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/1336 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8869>
This commit is contained in:
parent
a8373b3d38
commit
bddc0e023c
5 changed files with 67 additions and 4 deletions
|
|
@ -98,7 +98,12 @@ enum radeon_bo_usage
|
||||||
/* The winsys ensures that the CS submission will be scheduled after
|
/* The winsys ensures that the CS submission will be scheduled after
|
||||||
* previously flushed CSs referencing this BO in a conflicting way.
|
* previously flushed CSs referencing this BO in a conflicting way.
|
||||||
*/
|
*/
|
||||||
RADEON_USAGE_SYNCHRONIZED = 8
|
RADEON_USAGE_SYNCHRONIZED = 8,
|
||||||
|
|
||||||
|
/* When used, an implicit sync is done to make sure a compute shader
|
||||||
|
* will read the written values from a previous draw.
|
||||||
|
*/
|
||||||
|
RADEON_USAGE_NEEDS_IMPLICIT_SYNC = 16,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum radeon_map_flags
|
enum radeon_map_flags
|
||||||
|
|
|
||||||
|
|
@ -823,6 +823,47 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_
|
||||||
radeon_end();
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool si_check_needs_implicit_sync(struct si_context *sctx)
|
||||||
|
{
|
||||||
|
/* If the compute shader is going to read from a texture/image written by a
|
||||||
|
* previous draw, we must wait for its completion before continuing.
|
||||||
|
* Buffers and image stores (from the draw) are not taken into consideration
|
||||||
|
* because that's the app responsibility.
|
||||||
|
*
|
||||||
|
* The OpenGL 4.6 spec says:
|
||||||
|
*
|
||||||
|
* buffer object and texture stores performed by shaders are not
|
||||||
|
* automatically synchronized
|
||||||
|
*/
|
||||||
|
struct si_shader_info *info = &sctx->cs_shader_state.program->sel.info;
|
||||||
|
struct si_samplers *samplers = &sctx->samplers[PIPE_SHADER_COMPUTE];
|
||||||
|
unsigned mask = samplers->enabled_mask & info->base.textures_used;
|
||||||
|
|
||||||
|
while (mask) {
|
||||||
|
int i = u_bit_scan(&mask);
|
||||||
|
struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[i];
|
||||||
|
|
||||||
|
struct si_resource *res = si_resource(sview->base.texture);
|
||||||
|
if (sctx->ws->cs_is_buffer_referenced(&sctx->gfx_cs, res->buf,
|
||||||
|
RADEON_USAGE_NEEDS_IMPLICIT_SYNC))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct si_images *images = &sctx->images[PIPE_SHADER_COMPUTE];
|
||||||
|
mask = u_bit_consecutive(0, info->base.num_images);
|
||||||
|
|
||||||
|
while (mask) {
|
||||||
|
int i = u_bit_scan(&mask);
|
||||||
|
struct pipe_image_view *sview = &images->views[i];
|
||||||
|
|
||||||
|
struct si_resource *res = si_resource(sview->resource);
|
||||||
|
if (sctx->ws->cs_is_buffer_referenced(&sctx->gfx_cs, res->buf,
|
||||||
|
RADEON_USAGE_NEEDS_IMPLICIT_SYNC))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info)
|
static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info)
|
||||||
{
|
{
|
||||||
struct si_context *sctx = (struct si_context *)ctx;
|
struct si_context *sctx = (struct si_context *)ctx;
|
||||||
|
|
@ -849,6 +890,11 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
|
||||||
if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
|
if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
|
||||||
si_update_fb_dirtiness_after_rendering(sctx);
|
si_update_fb_dirtiness_after_rendering(sctx);
|
||||||
sctx->last_num_draw_calls = sctx->num_draw_calls;
|
sctx->last_num_draw_calls = sctx->num_draw_calls;
|
||||||
|
|
||||||
|
if (sctx->force_cb_shader_coherent || si_check_needs_implicit_sync(sctx))
|
||||||
|
si_make_CB_shader_coherent(sctx, 0,
|
||||||
|
sctx->framebuffer.CB_has_shader_readable_metadata,
|
||||||
|
sctx->framebuffer.all_DCC_pipe_aligned);
|
||||||
}
|
}
|
||||||
|
|
||||||
si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
|
si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
|
||||||
|
|
|
||||||
|
|
@ -551,6 +551,13 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
|
||||||
ctx->index_ring_base = ctx->index_ring_size_per_ib;
|
ctx->index_ring_base = ctx->index_ring_size_per_ib;
|
||||||
|
|
||||||
ctx->index_ring_offset = 0;
|
ctx->index_ring_offset = 0;
|
||||||
|
|
||||||
|
/* All buffer references are removed on a flush, so si_check_needs_implicit_sync
|
||||||
|
* cannot determine if si_make_CB_shader_coherent() needs to be called.
|
||||||
|
* ctx->force_cb_shader_coherent will be cleared by the first call to
|
||||||
|
* si_make_CB_shader_coherent.
|
||||||
|
*/
|
||||||
|
ctx->force_cb_shader_coherent = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned cp_coher_cntl)
|
void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned cp_coher_cntl)
|
||||||
|
|
|
||||||
|
|
@ -1266,6 +1266,8 @@ struct si_context {
|
||||||
struct list_head shader_query_buffers;
|
struct list_head shader_query_buffers;
|
||||||
unsigned num_active_shader_queries;
|
unsigned num_active_shader_queries;
|
||||||
|
|
||||||
|
bool force_cb_shader_coherent;
|
||||||
|
|
||||||
/* Statistics gathering for the DCC enablement heuristic. It can't be
|
/* Statistics gathering for the DCC enablement heuristic. It can't be
|
||||||
* in si_texture because si_texture can be shared by multiple
|
* in si_texture because si_texture can be shared by multiple
|
||||||
* contexts. This is for back buffers only. We shouldn't get too many
|
* contexts. This is for back buffers only. We shouldn't get too many
|
||||||
|
|
@ -1745,6 +1747,7 @@ static inline void si_make_CB_shader_coherent(struct si_context *sctx, unsigned
|
||||||
bool shaders_read_metadata, bool dcc_pipe_aligned)
|
bool shaders_read_metadata, bool dcc_pipe_aligned)
|
||||||
{
|
{
|
||||||
sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_INV_VCACHE;
|
sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_INV_VCACHE;
|
||||||
|
sctx->force_cb_shader_coherent = false;
|
||||||
|
|
||||||
if (sctx->chip_class >= GFX10) {
|
if (sctx->chip_class >= GFX10) {
|
||||||
if (sctx->screen->info.tcc_harvested)
|
if (sctx->screen->info.tcc_harvested)
|
||||||
|
|
|
||||||
|
|
@ -2955,17 +2955,19 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
|
||||||
|
|
||||||
tex = (struct si_texture *)cb->base.texture;
|
tex = (struct si_texture *)cb->base.texture;
|
||||||
radeon_add_to_buffer_list(
|
radeon_add_to_buffer_list(
|
||||||
sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE,
|
sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC,
|
||||||
tex->buffer.b.b.nr_samples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : RADEON_PRIO_COLOR_BUFFER);
|
tex->buffer.b.b.nr_samples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : RADEON_PRIO_COLOR_BUFFER);
|
||||||
|
|
||||||
if (tex->cmask_buffer && tex->cmask_buffer != &tex->buffer) {
|
if (tex->cmask_buffer && tex->cmask_buffer != &tex->buffer) {
|
||||||
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->cmask_buffer, RADEON_USAGE_READWRITE,
|
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->cmask_buffer,
|
||||||
|
RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC,
|
||||||
RADEON_PRIO_SEPARATE_META);
|
RADEON_PRIO_SEPARATE_META);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tex->dcc_separate_buffer)
|
if (tex->dcc_separate_buffer)
|
||||||
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->dcc_separate_buffer,
|
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->dcc_separate_buffer,
|
||||||
RADEON_USAGE_READWRITE, RADEON_PRIO_SEPARATE_META);
|
RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC,
|
||||||
|
RADEON_PRIO_SEPARATE_META);
|
||||||
|
|
||||||
/* Compute mutable surface parameters. */
|
/* Compute mutable surface parameters. */
|
||||||
cb_color_base = tex->buffer.gpu_address >> 8;
|
cb_color_base = tex->buffer.gpu_address >> 8;
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue