radeonsi: fix and enable full DCC with MSAA 2x on gfx9

This enables fast clear with any clear color (not just 0/1) for bpp >= 32.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10003>
This commit is contained in:
Marek Olšák 2021-03-19 16:14:23 -04:00 committed by Marge Bot
parent 7e68fae25f
commit 8b95f51ef1
5 changed files with 28 additions and 4 deletions

View file

@ -488,6 +488,22 @@ static void si_blit_decompress_color(struct si_context *sctx, struct si_texture
custom_blend == sctx->custom_blend_dcc_decompress) custom_blend == sctx->custom_blend_dcc_decompress)
sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
/* When running FMASK decompresion with DCC, we need to run the "eliminate fast clear" pass
* separately because FMASK decompression doesn't eliminate DCC fast clear. This makes
* render->texture transitions more expensive. It can be disabled by
* allow_dcc_msaa_clear_to_reg_for_bpp.
*
* TODO: When we get here, change the compression to TC-compatible on the next clear
* to disable both the FMASK decompression and fast clear elimination passes.
*/
if (sctx->screen->allow_dcc_msaa_clear_to_reg_for_bpp[util_logbase2(tex->surface.bpe)] &&
custom_blend == sctx->custom_blend_fmask_decompress &&
vi_dcc_enabled(tex, level)) {
si_blitter_begin(sctx, SI_DECOMPRESS);
util_blitter_custom_color(sctx->blitter, cbsurf, sctx->custom_blend_eliminate_fastclear);
si_blitter_end(sctx);
}
pipe_surface_reference(&cbsurf, NULL); pipe_surface_reference(&cbsurf, NULL);
} }

View file

@ -644,8 +644,8 @@ static void si_fast_clear(struct si_context *sctx, unsigned *buffers,
if (level > 0 && (eliminate_needed || !sctx->screen->info.has_dcc_constant_encode)) if (level > 0 && (eliminate_needed || !sctx->screen->info.has_dcc_constant_encode))
continue; continue;
/* TODO: This DCC+CMASK clear doesn't work with MSAA. */ if (tex->buffer.b.b.nr_samples >= 2 && eliminate_needed &&
if (tex->buffer.b.b.nr_samples >= 2 && tex->cmask_buffer && eliminate_needed) !sctx->screen->allow_dcc_msaa_clear_to_reg_for_bpp[util_logbase2(tex->surface.bpe)])
continue; continue;
assert(num_clears < ARRAY_SIZE(info)); assert(num_clears < ARRAY_SIZE(info));

View file

@ -1204,6 +1204,14 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
sscreen->use_ngg_culling = sscreen->use_ngg && !(sscreen->debug_flags & DBG(NO_NGG_CULLING)); sscreen->use_ngg_culling = sscreen->use_ngg && !(sscreen->debug_flags & DBG(NO_NGG_CULLING));
sscreen->use_ngg_streamout = false; sscreen->use_ngg_streamout = false;
/* Only set this for the cases that are known to work, which are:
* - GFX9 if bpp >= 4 (in bytes)
*/
if (sscreen->info.chip_class == GFX9) {
for (unsigned bpp_log2 = util_logbase2(4); bpp_log2 <= util_logbase2(16); bpp_log2++)
sscreen->allow_dcc_msaa_clear_to_reg_for_bpp[bpp_log2] = true;
}
/* Only enable primitive binning on APUs by default. */ /* Only enable primitive binning on APUs by default. */
if (sscreen->info.chip_class >= GFX10) { if (sscreen->info.chip_class >= GFX10) {
sscreen->dpbb_allowed = true; sscreen->dpbb_allowed = true;

View file

@ -557,6 +557,7 @@ struct si_screen {
bool use_ngg; bool use_ngg;
bool use_ngg_culling; bool use_ngg_culling;
bool use_ngg_streamout; bool use_ngg_streamout;
bool allow_dcc_msaa_clear_to_reg_for_bpp[5]; /* indexed by log2(Bpp) */
struct { struct {
#define OPT_BOOL(name, dflt, description) bool name : 1; #define OPT_BOOL(name, dflt, description) bool name : 1;

View file

@ -233,8 +233,7 @@ static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surfac
case GFX9: case GFX9:
/* DCC clear for 4x and 8x MSAA textures unimplemented. */ /* DCC clear for 4x and 8x MSAA textures unimplemented. */
if (ptex->nr_storage_samples >= 4 || if (ptex->nr_storage_samples >= 4)
(sscreen->info.family == CHIP_RAVEN && ptex->nr_storage_samples >= 2 && bpe < 4))
flags |= RADEON_SURF_DISABLE_DCC; flags |= RADEON_SURF_DISABLE_DCC;
break; break;