diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 75eda35217d..6f79adc7385 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -488,6 +488,22 @@ static void si_blit_decompress_color(struct si_context *sctx, struct si_texture custom_blend == sctx->custom_blend_dcc_decompress) sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; + /* When running FMASK decompresion with DCC, we need to run the "eliminate fast clear" pass + * separately because FMASK decompression doesn't eliminate DCC fast clear. This makes + * render->texture transitions more expensive. It can be disabled by + * allow_dcc_msaa_clear_to_reg_for_bpp. + * + * TODO: When we get here, change the compression to TC-compatible on the next clear + * to disable both the FMASK decompression and fast clear elimination passes. + */ + if (sctx->screen->allow_dcc_msaa_clear_to_reg_for_bpp[util_logbase2(tex->surface.bpe)] && + custom_blend == sctx->custom_blend_fmask_decompress && + vi_dcc_enabled(tex, level)) { + si_blitter_begin(sctx, SI_DECOMPRESS); + util_blitter_custom_color(sctx->blitter, cbsurf, sctx->custom_blend_eliminate_fastclear); + si_blitter_end(sctx); + } + pipe_surface_reference(&cbsurf, NULL); } diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index ee69e872c4f..adeff6f4b73 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -644,8 +644,8 @@ static void si_fast_clear(struct si_context *sctx, unsigned *buffers, if (level > 0 && (eliminate_needed || !sctx->screen->info.has_dcc_constant_encode)) continue; - /* TODO: This DCC+CMASK clear doesn't work with MSAA. */ - if (tex->buffer.b.b.nr_samples >= 2 && tex->cmask_buffer && eliminate_needed) + if (tex->buffer.b.b.nr_samples >= 2 && eliminate_needed && + !sctx->screen->allow_dcc_msaa_clear_to_reg_for_bpp[util_logbase2(tex->surface.bpe)]) continue; assert(num_clears < ARRAY_SIZE(info)); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 9181aef7615..ede75601f4a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1204,6 +1204,14 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, sscreen->use_ngg_culling = sscreen->use_ngg && !(sscreen->debug_flags & DBG(NO_NGG_CULLING)); sscreen->use_ngg_streamout = false; + /* Only set this for the cases that are known to work, which are: + * - GFX9 if bpp >= 4 (in bytes) + */ + if (sscreen->info.chip_class == GFX9) { + for (unsigned bpp_log2 = util_logbase2(4); bpp_log2 <= util_logbase2(16); bpp_log2++) + sscreen->allow_dcc_msaa_clear_to_reg_for_bpp[bpp_log2] = true; + } + /* Only enable primitive binning on APUs by default. */ if (sscreen->info.chip_class >= GFX10) { sscreen->dpbb_allowed = true; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index b18dd62e486..12d806093b2 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -557,6 +557,7 @@ struct si_screen { bool use_ngg; bool use_ngg_culling; bool use_ngg_streamout; + bool allow_dcc_msaa_clear_to_reg_for_bpp[5]; /* indexed by log2(Bpp) */ struct { #define OPT_BOOL(name, dflt, description) bool name : 1; diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index a99d55b8670..578714ea775 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -233,8 +233,7 @@ static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surfac case GFX9: /* DCC clear for 4x and 8x MSAA textures unimplemented. */ - if (ptex->nr_storage_samples >= 4 || - (sscreen->info.family == CHIP_RAVEN && ptex->nr_storage_samples >= 2 && bpe < 4)) + if (ptex->nr_storage_samples >= 4) flags |= RADEON_SURF_DISABLE_DCC; break;