freedreno/a6xx: Per-subpass LRZ

Allow the LRZ buffer to be re-allocated if a mid-frame depth clear
starts a new subpass.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22895>
This commit is contained in:
Rob Clark 2023-04-22 12:44:59 -07:00 committed by Marge Bot
parent a77406b72b
commit 3738969710
3 changed files with 65 additions and 15 deletions

View file

@ -183,21 +183,6 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
fd6_emit_flag_reference(ring, rsc, zsbuf->u.tex.level,
zsbuf->u.tex.first_layer);
if (rsc->lrz) {
OUT_REG(ring, A6XX_GRAS_LRZ_BUFFER_BASE(.bo = rsc->lrz),
A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = rsc->lrz_pitch),
// XXX a6xx seems to use a different buffer here.. not sure
// what for..
A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE());
} else {
OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE, 5);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
OUT_RING(ring, 0x00000000);
}
/* NOTE: blob emits GRAS_LRZ_CNTL plus GRAZ_LRZ_BUFFER_BASE
* plus this CP_EVENT_WRITE at the end in it's own IB..
*/
@ -245,6 +230,33 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
}
}
static void
emit_lrz(struct fd_batch *batch, struct fd_batch_subpass *subpass)
{
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
struct fd_ringbuffer *ring = batch->gmem;
if (!subpass->lrz) {
OUT_REG(ring, A6XX_GRAS_LRZ_BUFFER_BASE(),
A6XX_GRAS_LRZ_BUFFER_PITCH(),
A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE());
return;
}
/* When swapping LRZ buffers we need to flush LRZ cache..
* we possibly don't need this during the binning pass, it
* appears that the corruption happens on the read-side, ie.
* we change the LRZ buffer after a sub-pass, but get a
* cache-hit on stale data from the previous LRZ buffer.
*/
fd6_emit_lrz_flush(ring);
struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
OUT_REG(ring, A6XX_GRAS_LRZ_BUFFER_BASE(.bo = subpass->lrz),
A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = zsbuf->lrz_pitch),
A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE());
}
static bool
use_hw_binning(struct fd_batch *batch)
{
@ -786,6 +798,7 @@ emit_binning_pass(struct fd_batch *batch) assert_dt
/* emit IB to binning drawcmds: */
trace_start_binning_ib(&batch->trace, ring);
foreach_subpass (subpass, batch) {
emit_lrz(batch, subpass);
fd6_emit_ib(ring, subpass->draw);
}
trace_end_binning_ib(&batch->trace, ring);
@ -1525,6 +1538,8 @@ fd6_emit_tile(struct fd_batch *batch, const struct fd_tile *tile)
trace_end_clears(&batch->trace, batch->gmem);
}
emit_lrz(batch, subpass);
fd6_emit_ib(batch->gmem, subpass->draw);
}
@ -1740,6 +1755,8 @@ fd6_emit_sysmem(struct fd_batch *batch)
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
update_render_cntl<CHIP>(batch, pfb, false);
emit_lrz(batch, subpass);
fd6_emit_ib(ring, subpass->draw);
}
}

View file

@ -83,6 +83,8 @@ subpass_destroy(struct fd_batch_subpass *subpass)
if (subpass->subpass_clears)
fd_ringbuffer_del(subpass->subpass_clears);
list_del(&subpass->node);
if (subpass->lrz)
fd_bo_del(subpass->lrz);
free(subpass);
}
@ -380,8 +382,24 @@ fd_batch_set_fb(struct fd_batch *batch, const struct pipe_framebuffer_state *pfb
assert(!batch->nondraw);
util_copy_framebuffer_state(&batch->framebuffer, pfb);
if (!pfb->zsbuf)
return;
struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
/* Switching back to a batch we'd previously started constructing shouldn't
* result in a different lrz. The dependency tracking should avoid another
* batch writing/clearing our depth buffer.
*/
if (batch->subpass->lrz) {
assert(batch->subpass->lrz == zsbuf->lrz);
} else if (zsbuf->lrz) {
batch->subpass->lrz = fd_bo_ref(zsbuf->lrz);
}
}
/* NOTE: could drop the last ref to batch
*/
void

View file

@ -51,6 +51,12 @@ struct fd_batch_result;
* can be split out into another sub-pass. At gmem time, the appropriate
* sysmem or gmem clears can be interleaved with the CP_INDIRECT_BUFFER
* to the subpass's draw cmdstream.
*
* For depth clears, a replacement LRZ buffer can be allocated (clear
* still inserted into the prologue cmdstream since it needs be executed
* even in sysmem or if we aren't binning, since later batches could
* depend in the LRZ state). The alternative would be to invalidate
* LRZ for draws after the start of the new subpass.
*/
struct fd_batch_subpass {
struct list_head node;
@ -73,6 +79,15 @@ struct fd_batch_subpass {
* always come at the start of a subpass).
*/
unsigned num_draws;
/**
* If a subpass starts with a LRZ clear, it gets a new LRZ buffer.
* The fd_resource::lrz always tracks the current lrz buffer, but at
* binning/gmem time we need to know what was the current lrz buffer
* at the time draws were emitted to the subpass. Which is tracked
* here.
*/
struct fd_bo *lrz;
};
/**