radeonsi/gfx9: don't flush L2 metadata for CB if not needed

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
Marek Olšák 2017-08-19 15:06:22 +02:00
parent 5b62eb237c
commit aa64e24cb1
4 changed files with 38 additions and 17 deletions

View file

@ -413,7 +413,8 @@ si_decompress_depth(struct si_context *sctx,
* The DB->CB copy uses CB for the final writes.
*/
if (copy_planes && tex->resource.b.b.nr_samples > 1)
si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples);
si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples,
false);
}
static void
@ -524,7 +525,8 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
}
sctx->decompression_enabled = false;
si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples);
si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples,
vi_dcc_enabled(rtex, first_level));
}
static void
@ -1213,7 +1215,7 @@ static void si_do_CB_resolve(struct si_context *sctx,
si_blitter_end(&sctx->b.b);
/* Flush caches for possible texturing. */
si_make_CB_shader_coherent(sctx, 1);
si_make_CB_shader_coherent(sctx, 1, false);
}
static bool do_hardware_msaa_resolve(struct pipe_context *ctx,

View file

@ -58,7 +58,10 @@
/* Write dirty L2 lines back to memory (shader and CP DMA stores), but don't
* invalidate L2. SI-CIK can't do it, so they will do complete invalidation. */
#define SI_CONTEXT_WRITEBACK_GLOBAL_L2 (R600_CONTEXT_PRIVATE_FLAG << 4)
/* gaps */
/* Writeback & invalidate the L2 metadata cache. It can only be coupled with
* a CB or DB flush. */
#define SI_CONTEXT_INV_L2_METADATA (R600_CONTEXT_PRIVATE_FLAG << 5)
/* gap */
/* Framebuffer caches. */
#define SI_CONTEXT_FLUSH_AND_INV_DB (R600_CONTEXT_PRIVATE_FLAG << 7)
#define SI_CONTEXT_FLUSH_AND_INV_CB (R600_CONTEXT_PRIVATE_FLAG << 8)
@ -198,6 +201,7 @@ struct si_framebuffer {
ubyte dirty_cbufs;
bool dirty_zsbuf;
bool any_dst_linear;
bool CB_has_shader_readable_metadata;
};
struct si_clip_state {
@ -612,14 +616,25 @@ si_saved_cs_reference(struct si_saved_cs **dst, struct si_saved_cs *src)
}
static inline void
si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples)
si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples,
bool shaders_read_metadata)
{
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
SI_CONTEXT_INV_VMEM_L1;
/* Single-sample color is coherent with shaders on GFX9. */
if (sctx->b.chip_class <= VI || num_samples >= 2)
if (sctx->b.chip_class >= GFX9) {
/* Single-sample color is coherent with shaders on GFX9, but
* L2 metadata must be flushed if shaders read metadata.
* (DCC, CMASK).
*/
if (num_samples >= 2)
sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
else if (shaders_read_metadata)
sctx->b.flags |= SI_CONTEXT_INV_L2_METADATA;
} else {
/* SI-CI-VI */
sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
}
}
static inline void

View file

@ -2573,7 +2573,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
*/
if (sctx->framebuffer.nr_samples <= 1 &&
sctx->framebuffer.state.nr_cbufs)
si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples);
si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
sctx->framebuffer.CB_has_shader_readable_metadata);
sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
@ -2608,6 +2609,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
sctx->framebuffer.any_dst_linear = false;
sctx->framebuffer.CB_has_shader_readable_metadata = false;
for (i = 0; i < state->nr_cbufs; i++) {
if (!state->cbufs[i])
@ -2642,6 +2644,9 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
if (rtex->surface.is_linear)
sctx->framebuffer.any_dst_linear = true;
if (vi_dcc_enabled(rtex, surf->base.u.tex.level))
sctx->framebuffer.CB_has_shader_readable_metadata = true;
r600_context_add_resource_size(ctx, surf->base.texture);
p_atomic_inc(&rtex->framebuffers_bound);
@ -4022,7 +4027,8 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
/* Multisample surfaces are flushed in si_decompress_textures. */
if (sctx->framebuffer.nr_samples <= 1 &&
sctx->framebuffer.state.nr_cbufs)
si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples);
si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
sctx->framebuffer.CB_has_shader_readable_metadata);
}
/* This only ensures coherency for shader image/buffer stores. */
@ -4067,8 +4073,7 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
sctx->framebuffer.state.nr_cbufs) {
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
/* Single-sample color is coherent with TC on GFX9. */
if (sctx->screen->b.chip_class <= VI)
if (sctx->b.chip_class <= VI)
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}

View file

@ -988,13 +988,12 @@ void si_emit_cache_flush(struct si_context *sctx)
* TC | TC_MD = writeback & invalidate L2 metadata (DCC, etc.)
* TCL1 = invalidate L1
*/
tc_flags = 0;
/* When flushing CB or DB, L2 metadata should always be invali-
* dated before texturing. Invalidating L2 data is not needed
* in some cases.
*/
tc_flags = EVENT_TC_ACTION_ENA |
EVENT_TC_MD_ACTION_ENA;
if (rctx->flags & SI_CONTEXT_INV_L2_METADATA) {
tc_flags = EVENT_TC_ACTION_ENA |
EVENT_TC_MD_ACTION_ENA;
}
/* Ideally flush TC together with CB/DB. */
if (rctx->flags & SI_CONTEXT_INV_GLOBAL_L2) {