radeonsi: flush DB caches only when transitioning from DB to texturing

Use the mechanism of si_decompress_textures, but instead of doing
the actual decompression, just flag the DB cache flush there.

This removes a lot of unnecessary DB cache flushes.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
Marek Olšák 2017-06-15 00:34:08 +02:00
parent fdca690e91
commit 2263610827
5 changed files with 56 additions and 25 deletions

View file

@ -344,10 +344,6 @@ si_decompress_depth(struct si_context *sctx,
}
}
assert(!tex->tc_compatible_htile || levels_z == 0);
assert(!tex->tc_compatible_htile || levels_s == 0 ||
!r600_can_sample_zs(tex, true));
/* We may have to allocate the flushed texture here when called from
* si_decompress_subresource.
*/
@ -384,10 +380,30 @@ si_decompress_depth(struct si_context *sctx,
}
if (inplace_planes) {
si_blit_decompress_zs_in_place(
sctx, tex,
levels_z, levels_s,
first_layer, last_layer);
if (!tex->tc_compatible_htile) {
si_blit_decompress_zs_in_place(
sctx, tex,
levels_z, levels_s,
first_layer, last_layer);
}
/* Only in-place decompression needs to flush DB caches, or
* when we don't decompress but TC-compatible planes are dirty.
*/
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
SI_CONTEXT_INV_GLOBAL_L2 |
SI_CONTEXT_INV_VMEM_L1;
/* If we flush DB caches for TC-compatible depth, the dirty
* state becomes 0 for the whole mipmap tree and all planes.
* (there is nothing else to flush)
*/
if (tex->tc_compatible_htile) {
if (r600_can_sample_zs(tex, false))
tex->dirty_level_mask = 0;
if (r600_can_sample_zs(tex, true))
tex->stencil_dirty_level_mask = 0;
}
}
}
@ -1352,11 +1368,15 @@ static boolean si_generate_mipmap(struct pipe_context *ctx,
rtex->dirty_level_mask &= ~u_bit_consecutive(base_level + 1,
last_level - base_level);
sctx->generate_mipmap_for_depth = rtex->is_depth;
si_blitter_begin(ctx, SI_BLIT | SI_DISABLE_RENDER_COND);
util_blitter_generate_mipmap(sctx->blitter, tex, format,
base_level, last_level,
first_layer, last_layer);
si_blitter_end(ctx);
sctx->generate_mipmap_for_depth = false;
return true;
}

View file

@ -584,12 +584,14 @@ static bool color_needs_decompression(struct r600_texture *rtex)
(rtex->cmask.size || rtex->dcc_offset));
}
static bool depth_needs_decompression(struct r600_texture *rtex,
struct si_sampler_view *sview)
static bool depth_needs_decompression(struct r600_texture *rtex)
{
return rtex->db_compatible &&
(!rtex->tc_compatible_htile ||
!r600_can_sample_zs(rtex, sview->is_stencil_sampler));
/* If the depth/stencil texture is TC-compatible, no decompression
* will be done. The decompression function will only flush DB caches
* to make it coherent with shaders. That's necessary because the driver
* doesn't flush DB caches in any other case.
*/
return rtex->db_compatible;
}
static void si_update_shader_needs_decompress_mask(struct si_context *sctx,
@ -633,9 +635,8 @@ static void si_set_sampler_views(struct pipe_context *ctx,
if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) {
struct r600_texture *rtex =
(struct r600_texture*)views[i]->texture;
struct si_sampler_view *rview = (struct si_sampler_view *)views[i];
if (depth_needs_decompression(rtex, rview)) {
if (depth_needs_decompression(rtex)) {
samplers->needs_depth_decompress_mask |= 1u << slot;
} else {
samplers->needs_depth_decompress_mask &= ~(1u << slot);
@ -2470,7 +2471,7 @@ static void si_make_texture_handle_resident(struct pipe_context *ctx,
struct r600_texture *rtex =
(struct r600_texture *)sview->base.texture;
if (depth_needs_decompression(rtex, sview)) {
if (depth_needs_decompression(rtex)) {
util_dynarray_append(
&sctx->resident_tex_needs_depth_decompress,
struct si_texture_handle *,

View file

@ -362,6 +362,7 @@ struct si_context {
bool db_stencil_clear:1;
bool db_stencil_disable_expclear:1;
bool occlusion_queries_disabled:1;
bool generate_mipmap_for_depth:1;
/* Emitted draw state. */
bool gs_tri_strip_adj_fix:1;

View file

@ -2525,15 +2525,26 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
* the only client not using TC that can change textures is
* the framebuffer.
*
* Flush all CB and DB caches here because all buffers can be used
* for write by both TC (with shader image stores) and CB/DB.
* Wait for compute shaders because of possible transitions:
* - FB write -> shader read
* - shader write -> FB read
*
* DB caches are flushed on demand (using si_decompress_textures).
*/
sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
SI_CONTEXT_INV_GLOBAL_L2 |
SI_CONTEXT_FLUSH_AND_INV_CB |
SI_CONTEXT_FLUSH_AND_INV_DB |
SI_CONTEXT_CS_PARTIAL_FLUSH;
/* u_blitter doesn't invoke depth decompression when it does multiple
* blits in a row, but the only case when it matters for DB is when
* doing generate_mipmap. So here we flush DB manually between
* individual generate_mipmap blits.
* Note that lower mipmap levels aren't compressed.
*/
if (sctx->generate_mipmap_for_depth)
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
/* Take the maximum of the old and new count. If the new count is lower,
* dirtying is needed to disable the unbound colorbuffers.
*/
@ -3990,9 +4001,9 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
/* Depth and stencil are flushed in si_decompress_textures when needed. */
if (flags & PIPE_BARRIER_FRAMEBUFFER)
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
SI_CONTEXT_FLUSH_AND_INV_DB;
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
if (flags & (PIPE_BARRIER_FRAMEBUFFER |
PIPE_BARRIER_INDIRECT_BUFFER))

View file

@ -1402,11 +1402,9 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
struct r600_texture *rtex = (struct r600_texture *)surf->texture;
if (!rtex->tc_compatible_htile)
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
if (rtex->surface.flags & RADEON_SURF_SBUFFER &&
(!rtex->tc_compatible_htile || !rtex->can_sample_s))
if (rtex->surface.flags & RADEON_SURF_SBUFFER)
rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
}
if (sctx->framebuffer.compressed_cb_mask) {