mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 07:18:17 +02:00
radeonsi/gfx9: don't flush TC L2 between rendering and texturing if not needed
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
287b0a28f4
commit
5b62eb237c
3 changed files with 47 additions and 34 deletions
|
|
@ -391,29 +391,29 @@ si_decompress_depth(struct si_context *sctx,
|
||||||
/* Only in-place decompression needs to flush DB caches, or
|
/* Only in-place decompression needs to flush DB caches, or
|
||||||
* when we don't decompress but TC-compatible planes are dirty.
|
* when we don't decompress but TC-compatible planes are dirty.
|
||||||
*/
|
*/
|
||||||
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
|
si_make_DB_shader_coherent(sctx, tex->resource.b.b.nr_samples,
|
||||||
SI_CONTEXT_INV_GLOBAL_L2 |
|
inplace_planes & PIPE_MASK_S);
|
||||||
SI_CONTEXT_INV_VMEM_L1;
|
|
||||||
|
|
||||||
/* If we flush DB caches for TC-compatible depth, the dirty
|
/* If we flush DB caches for TC-compatible depth, the dirty
|
||||||
* state becomes 0 for the whole mipmap tree and all planes.
|
* state becomes 0 for the whole mipmap tree and all planes.
|
||||||
* (there is nothing else to flush)
|
* (there is nothing else to flush)
|
||||||
*/
|
*/
|
||||||
if (tex->tc_compatible_htile) {
|
if (tex->tc_compatible_htile) {
|
||||||
if (r600_can_sample_zs(tex, false))
|
/* Only clear the mask that we are flushing, because
|
||||||
|
* si_make_DB_shader_coherent() can treat depth and
|
||||||
|
* stencil differently.
|
||||||
|
*/
|
||||||
|
if (inplace_planes & PIPE_MASK_Z)
|
||||||
tex->dirty_level_mask = 0;
|
tex->dirty_level_mask = 0;
|
||||||
if (r600_can_sample_zs(tex, true))
|
if (inplace_planes & PIPE_MASK_S)
|
||||||
tex->stencil_dirty_level_mask = 0;
|
tex->stencil_dirty_level_mask = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* set_framebuffer_state takes care of coherency for single-sample.
|
/* set_framebuffer_state takes care of coherency for single-sample.
|
||||||
* The DB->CB copy uses CB for the final writes.
|
* The DB->CB copy uses CB for the final writes.
|
||||||
*/
|
*/
|
||||||
if (copy_planes && tex->resource.b.b.nr_samples > 1) {
|
if (copy_planes && tex->resource.b.b.nr_samples > 1)
|
||||||
sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
|
si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples);
|
||||||
SI_CONTEXT_INV_GLOBAL_L2 |
|
|
||||||
SI_CONTEXT_FLUSH_AND_INV_CB;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -524,10 +524,7 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
|
||||||
}
|
}
|
||||||
|
|
||||||
sctx->decompression_enabled = false;
|
sctx->decompression_enabled = false;
|
||||||
|
si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples);
|
||||||
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
|
|
||||||
SI_CONTEXT_INV_GLOBAL_L2 |
|
|
||||||
SI_CONTEXT_INV_VMEM_L1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -1216,9 +1213,7 @@ static void si_do_CB_resolve(struct si_context *sctx,
|
||||||
si_blitter_end(&sctx->b.b);
|
si_blitter_end(&sctx->b.b);
|
||||||
|
|
||||||
/* Flush caches for possible texturing. */
|
/* Flush caches for possible texturing. */
|
||||||
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
|
si_make_CB_shader_coherent(sctx, 1);
|
||||||
SI_CONTEXT_INV_GLOBAL_L2 |
|
|
||||||
SI_CONTEXT_INV_VMEM_L1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
|
static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
|
||||||
|
|
|
||||||
|
|
@ -611,4 +611,27 @@ si_saved_cs_reference(struct si_saved_cs **dst, struct si_saved_cs *src)
|
||||||
*dst = src;
|
*dst = src;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples)
|
||||||
|
{
|
||||||
|
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
|
||||||
|
SI_CONTEXT_INV_VMEM_L1;
|
||||||
|
|
||||||
|
/* Single-sample color is coherent with shaders on GFX9. */
|
||||||
|
if (sctx->b.chip_class <= VI || num_samples >= 2)
|
||||||
|
sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples,
|
||||||
|
bool include_stencil)
|
||||||
|
{
|
||||||
|
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
|
||||||
|
SI_CONTEXT_INV_VMEM_L1;
|
||||||
|
|
||||||
|
/* Single-sample depth (not stencil) is coherent with shaders on GFX9. */
|
||||||
|
if (sctx->b.chip_class <= VI || num_samples >= 2 || include_stencil)
|
||||||
|
sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -2572,11 +2572,9 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
|
||||||
* Only flush and wait for CB if there is actually a bound color buffer.
|
* Only flush and wait for CB if there is actually a bound color buffer.
|
||||||
*/
|
*/
|
||||||
if (sctx->framebuffer.nr_samples <= 1 &&
|
if (sctx->framebuffer.nr_samples <= 1 &&
|
||||||
sctx->framebuffer.state.nr_cbufs) {
|
sctx->framebuffer.state.nr_cbufs)
|
||||||
sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
|
si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples);
|
||||||
SI_CONTEXT_INV_GLOBAL_L2 |
|
|
||||||
SI_CONTEXT_FLUSH_AND_INV_CB;
|
|
||||||
}
|
|
||||||
sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
|
sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
|
||||||
|
|
||||||
/* u_blitter doesn't invoke depth decompression when it does multiple
|
/* u_blitter doesn't invoke depth decompression when it does multiple
|
||||||
|
|
@ -2585,11 +2583,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
|
||||||
* individual generate_mipmap blits.
|
* individual generate_mipmap blits.
|
||||||
* Note that lower mipmap levels aren't compressed.
|
* Note that lower mipmap levels aren't compressed.
|
||||||
*/
|
*/
|
||||||
if (sctx->generate_mipmap_for_depth) {
|
if (sctx->generate_mipmap_for_depth)
|
||||||
sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
|
si_make_DB_shader_coherent(sctx, 1, false);
|
||||||
SI_CONTEXT_INV_GLOBAL_L2 |
|
|
||||||
SI_CONTEXT_FLUSH_AND_INV_DB;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Take the maximum of the old and new count. If the new count is lower,
|
/* Take the maximum of the old and new count. If the new count is lower,
|
||||||
* dirtying is needed to disable the unbound colorbuffers.
|
* dirtying is needed to disable the unbound colorbuffers.
|
||||||
|
|
@ -4026,11 +4021,8 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
|
||||||
|
|
||||||
/* Multisample surfaces are flushed in si_decompress_textures. */
|
/* Multisample surfaces are flushed in si_decompress_textures. */
|
||||||
if (sctx->framebuffer.nr_samples <= 1 &&
|
if (sctx->framebuffer.nr_samples <= 1 &&
|
||||||
sctx->framebuffer.state.nr_cbufs) {
|
sctx->framebuffer.state.nr_cbufs)
|
||||||
sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
|
si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples);
|
||||||
SI_CONTEXT_INV_GLOBAL_L2 |
|
|
||||||
SI_CONTEXT_FLUSH_AND_INV_CB;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This only ensures coherency for shader image/buffer stores. */
|
/* This only ensures coherency for shader image/buffer stores. */
|
||||||
|
|
@ -4073,8 +4065,11 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
|
||||||
if (flags & PIPE_BARRIER_FRAMEBUFFER &&
|
if (flags & PIPE_BARRIER_FRAMEBUFFER &&
|
||||||
sctx->framebuffer.nr_samples <= 1 &&
|
sctx->framebuffer.nr_samples <= 1 &&
|
||||||
sctx->framebuffer.state.nr_cbufs) {
|
sctx->framebuffer.state.nr_cbufs) {
|
||||||
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
|
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
|
||||||
SI_CONTEXT_WRITEBACK_GLOBAL_L2;
|
|
||||||
|
/* Single-sample color is coherent with TC on GFX9. */
|
||||||
|
if (sctx->screen->b.chip_class <= VI)
|
||||||
|
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Indirect buffers use TC L2 on GFX9, but not older hw. */
|
/* Indirect buffers use TC L2 on GFX9, but not older hw. */
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue