radeonsi: allow and finish TC-compatible MSAA HTILE

This improves perf for Catia by 4%.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13603>
This commit is contained in:
Marek Olšák 2021-10-29 15:36:05 -04:00 committed by Marge Bot
parent 3baeaac64b
commit c0f723ce2b
3 changed files with 17 additions and 5 deletions

View file

@ -371,6 +371,10 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture
*/
S_00A018_WRITE_COMPRESS_ENABLE(ac_surface_supports_dcc_image_stores(sscreen->info.chip_class, &tex->surface) &&
(access & SI_IMAGE_ACCESS_ALLOW_DCC_STORE));
/* TC-compatible MSAA HTILE requires ITERATE_256. */
if (tex->is_depth && tex->buffer.b.b.nr_samples >= 2)
state[6] |= S_00A018_ITERATE_256(1);
}
state[7] = meta_va >> 16;

View file

@ -3293,10 +3293,10 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE |
(zb->base.texture->nr_samples > 1 ? RADEON_PRIO_DEPTH_BUFFER_MSAA
: RADEON_PRIO_DEPTH_BUFFER));
bool tc_compat_htile = vi_tc_compat_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS);
/* Set fields dependent on tc_compatile_htile. */
if (sctx->chip_class >= GFX9 &&
vi_tc_compat_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS)) {
if (sctx->chip_class >= GFX9 && tc_compat_htile) {
unsigned max_zplanes = 4;
if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM && tex->buffer.b.b.nr_samples > 1)
@ -3305,8 +3305,17 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1);
if (sctx->chip_class >= GFX10) {
db_z_info |= S_028040_ITERATE_FLUSH(1);
db_stencil_info |= S_028044_ITERATE_FLUSH(!tex->htile_stencil_disabled);
bool iterate256 = tex->buffer.b.b.nr_samples >= 2;
db_z_info |= S_028040_ITERATE_FLUSH(1) |
S_028040_ITERATE_256(iterate256);
db_stencil_info |= S_028044_ITERATE_FLUSH(!tex->htile_stencil_disabled) |
S_028044_ITERATE_256(iterate256);
/* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
if (sctx->screen->info.has_two_planes_iterate256_bug && iterate256 &&
!tex->htile_stencil_disabled && tex->buffer.b.b.nr_samples == 4) {
max_zplanes = 1;
}
} else {
db_z_info |= S_028038_ITERATE_FLUSH(1);
db_stencil_info |= S_02803C_ITERATE_FLUSH(1);

View file

@ -1232,7 +1232,6 @@ si_texture_create_with_modifier(struct pipe_screen *screen,
sscreen->info.family != CHIP_TONGA && sscreen->info.family != CHIP_ICELAND &&
(templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
!(sscreen->debug_flags & DBG(NO_HYPERZ)) && !is_flushed_depth &&
templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */
is_zs;
enum radeon_surf_mode tile_mode = si_choose_tiling(sscreen, templ, tc_compatible_htile);