From 0be3900b8d372e6e00f03db4d00c3433ade0f605 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 25 Aug 2024 14:15:49 -0400 Subject: [PATCH] radeonsi: move DB synchronization into si_fb_barrier_after_rendering Now these workarounds are applied everywhere. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_barrier.c | 39 +++++++++++++++++++++ src/gallium/drivers/radeonsi/si_blit.c | 2 +- src/gallium/drivers/radeonsi/si_compute.c | 8 ++--- src/gallium/drivers/radeonsi/si_pipe.h | 2 ++ src/gallium/drivers/radeonsi/si_state.c | 41 +---------------------- 5 files changed, 47 insertions(+), 45 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_barrier.c b/src/gallium/drivers/radeonsi/si_barrier.c index 5c8cddbfd51..7fd8ac4086d 100644 --- a/src/gallium/drivers/radeonsi/si_barrier.c +++ b/src/gallium/drivers/radeonsi/si_barrier.c @@ -768,6 +768,45 @@ void si_fb_barrier_after_rendering(struct si_context *sctx, unsigned flags) sctx->framebuffer.all_DCC_pipe_aligned); } } + + if (flags & SI_FB_BARRIER_SYNC_DB && sctx->framebuffer.state.zsbuf) { + /* DB caches are flushed on demand (using si_decompress_textures) except the cases below. */ + if (sctx->gfx_level >= GFX12) { + si_make_DB_shader_coherent(sctx, sctx->framebuffer.nr_samples, true, false); + } else if (sctx->generate_mipmap_for_depth) { + /* u_blitter doesn't invoke depth decompression when it does multiple blits in a row, + * but the only case when it matters for DB is when doing generate_mipmap, which writes Z, + * which is always uncompressed. So here we flush DB manually between individual + * generate_mipmap blits. + */ + si_make_DB_shader_coherent(sctx, 1, false, sctx->framebuffer.DB_has_shader_readable_metadata); + } else if (sctx->screen->info.family == CHIP_NAVI33) { + struct si_surface *old_zsurf = (struct si_surface *)sctx->framebuffer.state.zsbuf; + struct si_texture *old_ztex = (struct si_texture *)old_zsurf->base.texture; + + if (old_ztex->upgraded_depth) { + /* TODO: some failures related to hyperz appeared after 969ed851 on nv33: + * - piglit tex-miplevel-selection + * - KHR-GL46.direct_state_access.framebuffers_texture_attachment + * - GTF-GL46.gtf30.GL3Tests.blend_minmax.blend_minmax_draw + * - KHR-GL46.direct_state_access.framebuffers_texture_layer_attachment + * + * This seems to fix them: + */ + sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_INV_L2; + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); + } + } else if (sctx->gfx_level == GFX9) { + /* It appears that DB metadata "leaks" in a sequence of: + * - depth clear + * - DCC decompress for shader image writes (with DB disabled) + * - render with DEPTH_BEFORE_SHADER=1 + * Flushing DB metadata works around the problem. + */ + sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META; + si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); + } + } } void si_init_barrier_functions(struct si_context *sctx) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index b8e3f78074b..a130e847f59 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -933,7 +933,7 @@ void si_decompress_subresource(struct pipe_context *ctx, struct pipe_resource *t */ if (sctx->framebuffer.state.zsbuf && sctx->framebuffer.state.zsbuf->u.tex.level == level && sctx->framebuffer.state.zsbuf->texture == tex) - si_fb_barrier_after_rendering(sctx, 0); + si_fb_barrier_after_rendering(sctx, SI_FB_BARRIER_SYNC_DB); si_decompress_depth(sctx, stex, planes, level, level, first_layer, last_layer); } else if (stex->surface.fmask_size || stex->cmask_buffer || diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index b81784d0ae0..b7759eeb69b 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -1193,15 +1193,15 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info (sctx->force_shader_coherency.with_db || si_check_needs_implicit_sync(sctx, RADEON_USAGE_DB_NEEDS_IMPLICIT_SYNC)); - si_fb_barrier_after_rendering(sctx, sync_cb ? SI_FB_BARRIER_SYNC_CB : 0); + si_fb_barrier_after_rendering(sctx, + (sync_cb ? SI_FB_BARRIER_SYNC_CB : 0) | + (sync_db ? SI_FB_BARRIER_SYNC_DB : 0)); if (sync_cb) sctx->num_draw_calls_sh_coherent.with_cb = sctx->num_draw_calls; - if (sync_db) { + if (sync_db) sctx->num_draw_calls_sh_coherent.with_db = sctx->num_draw_calls; - si_make_DB_shader_coherent(sctx, 0, false, false); - } } if (sctx->gfx_level < GFX11) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index b5feae5d739..8b64d400523 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1365,6 +1365,8 @@ struct si_context { /* si_barrier.c */ #define SI_FB_BARRIER_SYNC_CB BITFIELD_BIT(0) +#define SI_FB_BARRIER_SYNC_DB BITFIELD_BIT(1) +#define SI_FB_BARRIER_SYNC_ALL BITFIELD_RANGE(0, 2) void si_barrier_before_internal_op(struct si_context *sctx, unsigned flags, unsigned num_buffers, diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index eabcc536009..de3f84fd225 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2547,7 +2547,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, return; } - si_fb_barrier_after_rendering(sctx, SI_FB_BARRIER_SYNC_CB); + si_fb_barrier_after_rendering(sctx, SI_FB_BARRIER_SYNC_ALL); /* Disable DCC if the formats are incompatible. */ if (sctx->gfx_level >= GFX8 && sctx->gfx_level < GFX11) { @@ -2575,45 +2575,6 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, sctx->barrier_flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PS_PARTIAL_FLUSH; si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); - /* DB caches are flushed on demand (using si_decompress_textures) except the cases below. */ - if (sctx->gfx_level >= GFX12) { - si_make_DB_shader_coherent(sctx, sctx->framebuffer.nr_samples, true, false); - } else if (sctx->generate_mipmap_for_depth) { - /* u_blitter doesn't invoke depth decompression when it does multiple - * blits in a row, but the only case when it matters for DB is when - * doing generate_mipmap. So here we flush DB manually between - * individual generate_mipmap blits. - * Note that lower mipmap levels aren't compressed. - */ - si_make_DB_shader_coherent(sctx, 1, false, sctx->framebuffer.DB_has_shader_readable_metadata); - } else if (old_has_zsbuf && - sctx->gfx_level == GFX11 && sctx->screen->info.family == CHIP_NAVI33) { - struct si_surface *old_zsurf = (struct si_surface *)sctx->framebuffer.state.zsbuf; - struct si_texture *old_ztex = (struct si_texture *)old_zsurf->base.texture; - - if (old_ztex->upgraded_depth) { - /* TODO: some failures related to hyperz appeared after 969ed851 on nv33: - * - piglit tex-miplevel-selection - * - KHR-GL46.direct_state_access.framebuffers_texture_attachment - * - GTF-GL46.gtf30.GL3Tests.blend_minmax.blend_minmax_draw - * - KHR-GL46.direct_state_access.framebuffers_texture_layer_attachment - * - * This seems to fix them: - */ - sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_INV_L2; - si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); - } - } else if (sctx->gfx_level == GFX9) { - /* It appears that DB metadata "leaks" in a sequence of: - * - depth clear - * - DCC decompress for shader image writes (with DB disabled) - * - render with DEPTH_BEFORE_SHADER=1 - * Flushing DB metadata works around the problem. - */ - sctx->barrier_flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META; - si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier); - } - /* Take the maximum of the old and new count. If the new count is lower, * dirtying is needed to disable the unbound colorbuffers. */