radeonsi: enable DCC stores on gfx10.3 APUs for better performance

There is just one hw bug that we need to handle.

NO_DCC_FB was unused.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12449>
This commit is contained in:
Marek Olšák 2021-08-17 19:01:37 -04:00 committed by Marge Bot
parent c88a546550
commit 34a2c75310
5 changed files with 33 additions and 5 deletions

View file

@ -697,8 +697,6 @@ radeonsi driver environment variables
Disable DCC.
``nodccclear``
Disable DCC fast clear.
``nodccfb``
Disable separate DCC on the main framebuffer
``nodccmsaa``
Disable DCC for MSAA
``nodpbb``

View file

@ -741,6 +741,9 @@ static void si_set_shader_image_desc(struct si_context *ctx, const struct pipe_i
bool uses_dcc = vi_dcc_enabled(tex, level);
unsigned access = view->access;
if (uses_dcc && screen->always_allow_dcc_stores)
access |= SI_IMAGE_ACCESS_ALLOW_DCC_STORE;
assert(!tex->is_depth);
assert(fmask_desc || tex->surface.fmask_offset == 0);
@ -782,7 +785,7 @@ static void si_set_shader_image_desc(struct si_context *ctx, const struct pipe_i
view->u.tex.first_layer, view->u.tex.last_layer, width, height, depth, desc, fmask_desc);
si_set_mutable_tex_desc_fields(screen, tex, &tex->surface.u.legacy.level[level], level, level,
util_format_get_blockwidth(view->format),
false, view->access, desc);
false, access, desc);
}
}

View file

@ -109,7 +109,8 @@ static const struct debug_named_value radeonsi_debug_options[] = {
{"nodisplaydcc", DBG(NO_DISPLAY_DCC), "Disable display DCC"},
{"nodcc", DBG(NO_DCC), "Disable DCC."},
{"nodccclear", DBG(NO_DCC_CLEAR), "Disable DCC fast clear."},
{"nodccfb", DBG(NO_DCC_FB), "Disable separate DCC on the main framebuffer"},
{"nodccstore", DBG(NO_DCC_STORE), "Disable DCC stores"},
{"dccstore", DBG(DCC_STORE), "Enable DCC stores"},
{"nodccmsaa", DBG(NO_DCC_MSAA), "Disable DCC for MSAA"},
{"nofmask", DBG(NO_FMASK), "Disable MSAA compression"},
@ -1260,6 +1261,14 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
sscreen->allow_dcc_msaa_clear_to_reg_for_bpp[bpp_log2] = true;
}
/* DCC stores have 50% performance of uncompressed stores and sometimes
* even less than that. It's risky to enable on dGPUs.
*/
sscreen->always_allow_dcc_stores = !(sscreen->debug_flags & DBG(NO_DCC_STORE)) &&
((sscreen->info.chip_class >= GFX10_3 &&
!sscreen->info.has_dedicated_vram) ||
sscreen->debug_flags & DBG(DCC_STORE));
sscreen->dpbb_allowed = !(sscreen->debug_flags & DBG(NO_DPBB)) &&
(sscreen->info.chip_class >= GFX10 ||
/* Only enable primitive binning on gfx9 APUs by default. */

View file

@ -237,7 +237,8 @@ enum
DBG_NO_DISPLAY_DCC,
DBG_NO_DCC,
DBG_NO_DCC_CLEAR,
DBG_NO_DCC_FB,
DBG_NO_DCC_STORE,
DBG_DCC_STORE,
DBG_NO_DCC_MSAA,
DBG_NO_FMASK,
@ -550,6 +551,7 @@ struct si_screen {
bool use_ngg_culling;
bool use_ngg_streamout;
bool allow_dcc_msaa_clear_to_reg_for_bpp[5]; /* indexed by log2(Bpp) */
bool always_allow_dcc_stores;
struct {
#define OPT_BOOL(name, dflt, description) bool name : 1;

View file

@ -150,6 +150,17 @@ static LLVMValueRef force_dcc_off(struct si_shader_context *ctx, LLVMValueRef rs
}
}
static LLVMValueRef force_write_compress_off(struct si_shader_context *ctx, LLVMValueRef rsrc)
{
LLVMValueRef i32_6 = LLVMConstInt(ctx->ac.i32, 6, 0);
LLVMValueRef i32_C = LLVMConstInt(ctx->ac.i32, C_00A018_WRITE_COMPRESS_ENABLE, 0);
LLVMValueRef tmp;
tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
}
/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
* adjust "index" to point to FMASK. */
static LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, LLVMValueRef list,
@ -173,6 +184,11 @@ static LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, LLVMValueR
if (desc_type == AC_DESC_IMAGE && uses_store && ctx->ac.chip_class <= GFX9)
rsrc = force_dcc_off(ctx, rsrc);
if (desc_type == AC_DESC_IMAGE && !uses_store &&
ctx->screen->always_allow_dcc_stores && ctx->screen->info.has_image_load_dcc_bug)
rsrc = force_write_compress_off(ctx, rsrc);
return rsrc;
}