freedreno/a6xx: Split tile loads and clears

This will give better visibility in perfetto, and prepares for the next
commit where we could have per-subpass clears.

While we are at it, start adopting vulkan terms for tile load/store.  No
need to be pointlessly different.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22895>
This commit is contained in:
Rob Clark 2023-04-22 10:36:36 -07:00 committed by Marge Bot
parent 10f625eb13
commit c613bf1f14
7 changed files with 108 additions and 69 deletions

View file

@ -88,7 +88,7 @@ static void
emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
struct pipe_surface *psurf)
{
struct fd_ringbuffer *ring = batch->tile_fini;
struct fd_ringbuffer *ring = batch->tile_store;
struct fd_resource *rsc = fd_resource(psurf->texture);
uint32_t offset =
fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
@ -141,9 +141,9 @@ prepare_tile_fini_ib(struct fd_batch *batch) assert_dt
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
struct fd_ringbuffer *ring;
batch->tile_fini =
batch->tile_store =
fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
ring = batch->tile_fini;
ring = batch->tile_store;
fd2_emit_vertex_bufs(ring, 0x9c,
(struct fd2_vertex_buf[]){
@ -223,7 +223,7 @@ prepare_tile_fini_ib(struct fd_batch *batch) assert_dt
static void
fd2_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
{
fd2_emit_ib(batch->gmem, batch->tile_fini);
fd2_emit_ib(batch->gmem, batch->tile_store);
}
/* transfer from system memory to gmem */

View file

@ -848,9 +848,9 @@ emit_msaa(struct fd_ringbuffer *ring, unsigned nr)
OUT_RING(ring, A6XX_RB_BLIT_GMEM_MSAA_CNTL_SAMPLES(samples));
}
static void prepare_tile_setup_ib(struct fd_batch *batch);
static void prepare_tile_setup(struct fd_batch *batch);
template <chip CHIP>
static void prepare_tile_fini_ib(struct fd_batch *batch);
static void prepare_tile_fini(struct fd_batch *batch);
/* before first tile */
template <chip CHIP>
@ -874,8 +874,8 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt
fd6_cache_inv(batch, ring);
prepare_tile_setup_ib(batch);
prepare_tile_fini_ib<CHIP>(batch);
prepare_tile_setup(batch);
prepare_tile_fini<CHIP>(batch);
OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
OUT_RING(ring, 0x0);
@ -1316,18 +1316,23 @@ emit_restore_blits(struct fd_batch *batch, struct fd_ringbuffer *ring)
}
static void
prepare_tile_setup_ib(struct fd_batch *batch)
prepare_tile_setup(struct fd_batch *batch)
{
if (!(batch->restore || batch->fast_cleared))
return;
if (batch->restore) {
batch->tile_loads =
fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
batch->tile_setup =
fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
set_blit_scissor(batch, batch->tile_loads);
emit_restore_blits(batch, batch->tile_loads);
}
set_blit_scissor(batch, batch->tile_setup);
if (batch->fast_cleared) {
batch->tile_clears =
fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
emit_restore_blits(batch, batch->tile_setup);
emit_clears(batch, batch->tile_setup);
set_blit_scissor(batch, batch->tile_clears);
emit_clears(batch, batch->tile_clears);
}
}
/*
@ -1342,10 +1347,16 @@ fd6_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
static void
fd6_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
{
if (batch->tile_setup) {
trace_start_clear_restore(&batch->trace, batch->gmem, batch->fast_cleared, batch->restore);
emit_conditional_ib(batch, tile, batch->tile_setup);
trace_end_clear_restore(&batch->trace, batch->gmem);
if (batch->tile_loads) {
trace_start_tile_loads(&batch->trace, batch->gmem, batch->restore);
emit_conditional_ib(batch, tile, batch->tile_loads);
trace_end_tile_loads(&batch->trace, batch->gmem);
}
if (batch->tile_clears) {
trace_start_clears(&batch->trace, batch->gmem, batch->fast_cleared);
emit_conditional_ib(batch, tile, batch->tile_clears);
trace_end_clears(&batch->trace, batch->gmem);
}
}
@ -1465,15 +1476,16 @@ emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,
template <chip CHIP>
static void
prepare_tile_fini_ib(struct fd_batch *batch) assert_dt
prepare_tile_fini(struct fd_batch *batch)
assert_dt
{
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
struct fd_ringbuffer *ring;
batch->tile_fini =
batch->tile_store =
fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
ring = batch->tile_fini;
ring = batch->tile_store;
set_blit_scissor(batch, ring);
@ -1540,10 +1552,10 @@ fd6_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE));
emit_marker6(ring, 7);
if (batch->tile_fini) {
trace_start_resolve(&batch->trace, batch->gmem, batch->resolve);
emit_conditional_ib(batch, tile, batch->tile_fini);
trace_end_resolve(&batch->trace, batch->gmem);
if (batch->tile_store) {
trace_start_tile_stores(&batch->trace, batch->gmem, batch->resolve);
emit_conditional_ib(batch, tile, batch->tile_store);
trace_end_tile_stores(&batch->trace, batch->gmem);
}
}
@ -1581,7 +1593,7 @@ emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
struct pipe_box box2d;
u_box_2d(0, 0, pfb->width, pfb->height, &box2d);
trace_start_clear_restore(&batch->trace, ring, buffers, 0);
trace_start_clears(&batch->trace, ring, buffers);
if (buffers & PIPE_CLEAR_COLOR) {
for (int i = 0; i < pfb->nr_cbufs; i++) {
@ -1626,7 +1638,7 @@ emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
fd_wfi(batch, ring);
trace_end_clear_restore(&batch->trace, ring);
trace_end_clears(&batch->trace, ring);
}
template <chip CHIP>

View file

@ -153,14 +153,19 @@ cleanup_submit(struct fd_batch *batch)
batch->epilogue = NULL;
}
if (batch->tile_setup) {
fd_ringbuffer_del(batch->tile_setup);
batch->tile_setup = NULL;
if (batch->tile_loads) {
fd_ringbuffer_del(batch->tile_loads);
batch->tile_loads = NULL;
}
if (batch->tile_fini) {
fd_ringbuffer_del(batch->tile_fini);
batch->tile_fini = NULL;
if (batch->tile_clears) {
fd_ringbuffer_del(batch->tile_clears);
batch->tile_clears = NULL;
}
if (batch->tile_store) {
fd_ringbuffer_del(batch->tile_store);
batch->tile_store = NULL;
}
fd_submit_del(batch->submit);

View file

@ -206,8 +206,9 @@ struct fd_batch {
/** epilogue cmdstream (executed after all tiles): */
struct fd_ringbuffer *epilogue;
struct fd_ringbuffer *tile_setup;
struct fd_ringbuffer *tile_fini;
struct fd_ringbuffer *tile_loads;
struct fd_ringbuffer *tile_clears;
struct fd_ringbuffer *tile_store;
union pipe_color_union clear_color[MAX_RENDER_TARGETS];
double clear_depth;

View file

@ -464,35 +464,51 @@ fd_end_compute(struct pipe_context *pctx, uint64_t ts_ns,
}
void
fd_start_clear_restore(struct pipe_context *pctx, uint64_t ts_ns,
const void *flush_data,
const struct trace_start_clear_restore *payload)
fd_start_clears(struct pipe_context *pctx, uint64_t ts_ns,
const void *flush_data,
const struct trace_start_clears *payload)
{
stage_start(pctx, ts_ns, CLEAR_RESTORE_STAGE_ID);
stage_start(pctx, ts_ns, CLEAR_STAGE_ID);
}
void
fd_end_clear_restore(struct pipe_context *pctx, uint64_t ts_ns,
fd_end_clears(struct pipe_context *pctx, uint64_t ts_ns,
const void *flush_data,
const struct trace_end_clears *payload)
{
stage_end(pctx, ts_ns, CLEAR_STAGE_ID);
}
void
fd_start_tile_loads(struct pipe_context *pctx, uint64_t ts_ns,
const void *flush_data,
const struct trace_start_tile_loads *payload)
{
stage_start(pctx, ts_ns, TILE_LOAD_STAGE_ID);
}
void
fd_end_tile_loads(struct pipe_context *pctx, uint64_t ts_ns,
const void *flush_data,
const struct trace_end_tile_loads *payload)
{
stage_end(pctx, ts_ns, TILE_LOAD_STAGE_ID);
}
void
fd_start_tile_stores(struct pipe_context *pctx, uint64_t ts_ns,
const void *flush_data,
const struct trace_end_clear_restore *payload)
const struct trace_start_tile_stores *payload)
{
stage_end(pctx, ts_ns, CLEAR_RESTORE_STAGE_ID);
stage_start(pctx, ts_ns, TILE_STORE_STAGE_ID);
}
void
fd_start_resolve(struct pipe_context *pctx, uint64_t ts_ns,
const void *flush_data,
const struct trace_start_resolve *payload)
fd_end_tile_stores(struct pipe_context *pctx, uint64_t ts_ns,
const void *flush_data,
const struct trace_end_tile_stores *payload)
{
stage_start(pctx, ts_ns, RESOLVE_STAGE_ID);
}
void
fd_end_resolve(struct pipe_context *pctx, uint64_t ts_ns,
const void *flush_data,
const struct trace_end_resolve *payload)
{
stage_end(pctx, ts_ns, RESOLVE_STAGE_ID);
stage_end(pctx, ts_ns, TILE_STORE_STAGE_ID);
}
void

View file

@ -40,8 +40,9 @@ enum fd_stage_id {
BYPASS_STAGE_ID,
BLIT_STAGE_ID,
COMPUTE_STAGE_ID,
CLEAR_RESTORE_STAGE_ID,
RESOLVE_STAGE_ID,
CLEAR_STAGE_ID,
TILE_LOAD_STAGE_ID,
TILE_STORE_STAGE_ID,
STATE_RESTORE_STAGE_ID,
VSC_OVERFLOW_STAGE_ID,
PROLOGUE_STAGE_ID,
@ -59,8 +60,9 @@ static const struct {
[BYPASS_STAGE_ID] = {"Render", "Rendering to system memory"},
[BLIT_STAGE_ID] = {"Blit", "Performing a Blit operation"},
[COMPUTE_STAGE_ID] = {"Compute", "Compute job"},
[CLEAR_RESTORE_STAGE_ID] = {"Clear/Restore", "Clear (sysmem) or per-tile clear or restore (GMEM)"},
[RESOLVE_STAGE_ID] = {"Resolve", "Per tile resolve (GMEM to system memory"},
[CLEAR_STAGE_ID] = {"Clear", "Clear (sysmem) or per-tile clear (GMEM)"},
[TILE_LOAD_STAGE_ID] = {"Tile Load", "Per tile load (system memory to GMEM)"},
[TILE_STORE_STAGE_ID] = {"Tile Store", "Per tile store (GMEM to system memory)"},
[STATE_RESTORE_STAGE_ID] = {"State Restore", "Setup at the beginning of new cmdstream buffer"},
[VSC_OVERFLOW_STAGE_ID] = {"VSC Overflow Test", ""},
[PROLOGUE_STAGE_ID] = {"Prologue", "Preemble cmdstream (executed once before first tile)"},

View file

@ -123,17 +123,20 @@ begin_end_tp('binning_ib')
begin_end_tp('vsc_overflow_test')
begin_end_tp('prologue')
# For GMEM pass, where this could either be a clear or restore
begin_end_tp('clear_restore',
args=[TracepointArg(type='uint16_t', var='fast_cleared', c_format='0x%x'),
TracepointArg(type='uint16_t', var='restore', c_format='0x%x')],
tp_print=['fast_cleared: 0x%x, restore=0x%x', '__entry->fast_cleared',
'__entry->restore'],
# Either sysmem or gmem clears
begin_end_tp('clears',
args=[TracepointArg(type='uint16_t', var='fast_cleared', c_format='0x%x')],
tp_print=['fast_cleared: 0x%x', '__entry->fast_cleared'],
)
begin_end_tp('resolve',
args=[TracepointArg(type='uint16_t', var='stored', c_format='0x%x')],
tp_print=['stored: 0x%x', '__entry->stored'],
begin_end_tp('tile_loads',
args=[TracepointArg(type='uint16_t', var='load', c_format='0x%x')],
tp_print=['load=0x%x', '__entry->load'],
)
begin_end_tp('tile_stores',
args=[TracepointArg(type='uint16_t', var='store', c_format='0x%x')],
tp_print=['store: 0x%x', '__entry->store'],
)
singular_tp('start_tile',