freedreno: add non-draw batches for compute/blit

Get rid of "gmem" (ie. tiling) ringbuffer, and just emit setup commands
directly to "draw" ringbuffer for compute (and in future for blits not
using the 3d pipe).  This way we can have a simple flat cmdstream buffer
and bypass setup related to 3d pipe.

Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
Rob Clark 2017-11-24 10:37:22 -05:00
parent 2697480c92
commit b852c3bf67
12 changed files with 82 additions and 32 deletions

View file

@ -120,6 +120,35 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v)
fd5_emit_shader(ring, v);
}
static void
emit_setup(struct fd_context *ctx)
{
struct fd_ringbuffer *ring = ctx->batch->draw;
fd5_emit_restore(ctx->batch, ring);
fd5_emit_lrz_flush(ring);
OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
OUT_RING(ring, 0x0);
OUT_PKT7(ring, CP_EVENT_WRITE, 1);
OUT_RING(ring, UNK_19);
OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
/* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
fd_wfi(ctx->batch, ring);
OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
OUT_RING(ring, 0x10000000); /* RB_CCU_CNTL */
OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
OUT_RING(ring, A5XX_RB_CNTL_BYPASS);
}
static void
fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info)
{
@ -128,6 +157,8 @@ fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info)
struct ir3_shader_variant *v;
struct fd_ringbuffer *ring = ctx->batch->draw;
emit_setup(ctx);
v = ir3_shader_variant(so->shader, key, &ctx->debug);
if (ctx->dirty_shader[PIPE_SHADER_COMPUTE] & FD_DIRTY_SHADER_PROG)

View file

@ -55,12 +55,16 @@ batch_init(struct fd_batch *batch)
}
batch->draw = fd_ringbuffer_new(ctx->pipe, size);
batch->binning = fd_ringbuffer_new(ctx->pipe, size);
batch->gmem = fd_ringbuffer_new(ctx->pipe, size);
if (!batch->nondraw) {
batch->binning = fd_ringbuffer_new(ctx->pipe, size);
batch->gmem = fd_ringbuffer_new(ctx->pipe, size);
fd_ringbuffer_set_parent(batch->gmem, NULL);
fd_ringbuffer_set_parent(batch->draw, batch->gmem);
fd_ringbuffer_set_parent(batch->binning, batch->gmem);
fd_ringbuffer_set_parent(batch->gmem, NULL);
fd_ringbuffer_set_parent(batch->draw, batch->gmem);
fd_ringbuffer_set_parent(batch->binning, batch->gmem);
} else {
fd_ringbuffer_set_parent(batch->draw, NULL);
}
batch->in_fence_fd = -1;
batch->fence = fd_fence_create(batch);
@ -89,7 +93,7 @@ batch_init(struct fd_batch *batch)
}
struct fd_batch *
fd_batch_create(struct fd_context *ctx)
fd_batch_create(struct fd_context *ctx, bool nondraw)
{
struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
@ -100,6 +104,7 @@ fd_batch_create(struct fd_context *ctx)
pipe_reference_init(&batch->reference, 1);
batch->ctx = ctx;
batch->nondraw = nondraw;
batch->resources = _mesa_set_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
@ -123,8 +128,13 @@ batch_fini(struct fd_batch *batch)
fd_fence_ref(NULL, &batch->fence, NULL);
fd_ringbuffer_del(batch->draw);
fd_ringbuffer_del(batch->binning);
fd_ringbuffer_del(batch->gmem);
if (!batch->nondraw) {
fd_ringbuffer_del(batch->binning);
fd_ringbuffer_del(batch->gmem);
} else {
debug_assert(!batch->binning);
debug_assert(!batch->gmem);
}
if (batch->lrz_clear) {
fd_ringbuffer_del(batch->lrz_clear);
batch->lrz_clear = NULL;
@ -326,6 +336,7 @@ fd_batch_flush(struct fd_batch *batch, bool sync, bool force)
* up used_resources
*/
struct fd_batch *tmp = NULL;
fd_batch_reference(&tmp, batch);
batch_flush(tmp, force);
if (sync)

View file

@ -93,6 +93,8 @@ struct fd_batch {
FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
} cleared, partial_cleared, restore, resolve;
/* is this a non-draw batch (ie compute/blit which has no pfb state)? */
bool nondraw : 1;
bool needs_flush : 1;
bool blit : 1;
bool back_blit : 1; /* only blit so far is resource shadowing back-blit */
@ -202,7 +204,7 @@ struct fd_batch {
uint32_t dependents_mask;
};
struct fd_batch * fd_batch_create(struct fd_context *ctx);
struct fd_batch * fd_batch_create(struct fd_context *ctx, bool nondraw);
void fd_batch_reset(struct fd_batch *batch);
void fd_batch_sync(struct fd_batch *batch);

View file

@ -316,7 +316,7 @@ fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx)
idx--; /* bit zero returns 1 for ffs() */
batch = fd_batch_create(ctx);
batch = fd_batch_create(ctx, false);
if (!batch)
goto out;

View file

@ -155,9 +155,10 @@ fd_context_destroy(struct pipe_context *pctx)
fd_pipe_del(ctx->pipe);
if (fd_mesa_debug & (FD_DBG_BSTAT | FD_DBG_MSGS)) {
printf("batch_total=%u, batch_sysmem=%u, batch_gmem=%u, batch_restore=%u\n",
printf("batch_total=%u, batch_sysmem=%u, batch_gmem=%u, batch_nondraw=%u, batch_restore=%u\n",
(uint32_t)ctx->stats.batch_total, (uint32_t)ctx->stats.batch_sysmem,
(uint32_t)ctx->stats.batch_gmem, (uint32_t)ctx->stats.batch_restore);
(uint32_t)ctx->stats.batch_gmem, (uint32_t)ctx->stats.batch_nondraw,
(uint32_t)ctx->stats.batch_restore);
}
FREE(ctx);

View file

@ -215,7 +215,7 @@ struct fd_context {
uint64_t prims_emitted;
uint64_t prims_generated;
uint64_t draw_calls;
uint64_t batch_total, batch_sysmem, batch_gmem, batch_restore;
uint64_t batch_total, batch_sysmem, batch_gmem, batch_nondraw, batch_restore;
uint64_t staging_uploads, shadow_uploads;
} stats;
@ -304,7 +304,7 @@ struct fd_context {
/* draw: */
bool (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info,
unsigned index_offset);
unsigned index_offset);
bool (*clear)(struct fd_context *ctx, unsigned buffers,
const union pipe_color_union *color, double depth, unsigned stencil);

View file

@ -459,7 +459,7 @@ fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
struct fd_batch *batch, *save_batch = NULL;
unsigned i;
batch = fd_batch_create(ctx);
batch = fd_batch_create(ctx, true);
fd_batch_reference(&save_batch, ctx->batch);
fd_batch_reference(&ctx->batch, batch);
@ -493,9 +493,10 @@ fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
mtx_unlock(&ctx->screen->lock);
batch->needs_flush = true;
ctx->launch_grid(ctx, info);
fd_gmem_flush_compute(batch);
fd_batch_flush(batch, false, false);
fd_batch_reference(&ctx->batch, save_batch);
fd_batch_reference(&save_batch, NULL);

View file

@ -372,13 +372,15 @@ render_sysmem(struct fd_batch *batch)
static void
flush_ring(struct fd_batch *batch)
{
/* for compute/blit batch, there is no batch->gmem, only batch->draw: */
struct fd_ringbuffer *ring = batch->nondraw ? batch->draw : batch->gmem;
uint32_t timestamp;
int out_fence_fd = -1;
fd_ringbuffer_flush2(batch->gmem, batch->in_fence_fd,
fd_ringbuffer_flush2(ring, batch->in_fence_fd,
batch->needs_out_fence_fd ? &out_fence_fd : NULL);
timestamp = fd_ringbuffer_timestamp(batch->gmem);
timestamp = fd_ringbuffer_timestamp(ring);
fd_fence_populate(batch->fence, timestamp, out_fence_fd);
}
@ -389,8 +391,9 @@ fd_gmem_render_tiles(struct fd_batch *batch)
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
bool sysmem = false;
if (ctx->emit_sysmem_prep) {
if (batch->cleared || batch->gmem_reason || (batch->num_draws > 5)) {
if (ctx->emit_sysmem_prep && !batch->nondraw) {
if (batch->cleared || batch->gmem_reason ||
((batch->num_draws > 5) && !batch->blit)) {
DBG("GMEM: cleared=%x, gmem_reason=%x, num_draws=%u",
batch->cleared, batch->gmem_reason, batch->num_draws);
} else if (!(fd_mesa_debug & FD_DBG_NOBYPASS)) {
@ -407,7 +410,10 @@ fd_gmem_render_tiles(struct fd_batch *batch)
ctx->stats.batch_total++;
if (sysmem) {
if (batch->nondraw) {
DBG("%p: rendering non-draw", batch);
ctx->stats.batch_nondraw++;
} else if (sysmem) {
DBG("%p: rendering sysmem %ux%u (%s/%s)",
batch, pfb->width, pfb->height,
util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
@ -447,13 +453,6 @@ fd_gmem_render_noop(struct fd_batch *batch)
flush_ring(batch);
}
void
fd_gmem_flush_compute(struct fd_batch *batch)
{
render_sysmem(batch);
flush_ring(batch);
}
/* tile needs restore if it isn't completely contained within the
* cleared scissor:
*/

View file

@ -64,7 +64,6 @@ struct fd_batch;
void fd_gmem_render_tiles(struct fd_batch *batch);
void fd_gmem_render_noop(struct fd_batch *batch);
void fd_gmem_flush_compute(struct fd_batch *batch);
bool fd_gmem_needs_restore(struct fd_batch *batch, struct fd_tile *tile,
uint32_t buffers);

View file

@ -127,6 +127,7 @@ fd_get_driver_query_info(struct pipe_screen *pscreen,
{"batches", FD_QUERY_BATCH_TOTAL, {0}},
{"batches-sysmem", FD_QUERY_BATCH_SYSMEM, {0}},
{"batches-gmem", FD_QUERY_BATCH_GMEM, {0}},
{"batches-nondraw", FD_QUERY_BATCH_NONDRAW, {0}},
{"restores", FD_QUERY_BATCH_RESTORE, {0}},
{"prims-emitted", PIPE_QUERY_PRIMITIVES_EMITTED, {0}},
{"staging", FD_QUERY_STAGING_UPLOADS, {0}},

View file

@ -60,9 +60,10 @@ fd_query(struct pipe_query *pq)
#define FD_QUERY_BATCH_TOTAL (PIPE_QUERY_DRIVER_SPECIFIC + 1) /* total # of batches (submits) */
#define FD_QUERY_BATCH_SYSMEM (PIPE_QUERY_DRIVER_SPECIFIC + 2) /* batches using system memory (GMEM bypass) */
#define FD_QUERY_BATCH_GMEM (PIPE_QUERY_DRIVER_SPECIFIC + 3) /* batches using GMEM */
#define FD_QUERY_BATCH_RESTORE (PIPE_QUERY_DRIVER_SPECIFIC + 4) /* batches requiring GMEM restore */
#define FD_QUERY_STAGING_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 5) /* texture/buffer uploads using staging blit */
#define FD_QUERY_SHADOW_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 6) /* texture/buffer uploads that shadowed rsc */
#define FD_QUERY_BATCH_NONDRAW (PIPE_QUERY_DRIVER_SPECIFIC + 4) /* compute/blit batches */
#define FD_QUERY_BATCH_RESTORE (PIPE_QUERY_DRIVER_SPECIFIC + 5) /* batches requiring GMEM restore */
#define FD_QUERY_STAGING_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 6) /* texture/buffer uploads using staging blit */
#define FD_QUERY_SHADOW_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 7) /* texture/buffer uploads that shadowed rsc */
void fd_query_screen_init(struct pipe_screen *pscreen);
void fd_query_context_init(struct pipe_context *pctx);

View file

@ -65,6 +65,8 @@ read_counter(struct fd_context *ctx, int type)
return ctx->stats.batch_sysmem;
case FD_QUERY_BATCH_GMEM:
return ctx->stats.batch_gmem;
case FD_QUERY_BATCH_NONDRAW:
return ctx->stats.batch_nondraw;
case FD_QUERY_BATCH_RESTORE:
return ctx->stats.batch_restore;
case FD_QUERY_STAGING_UPLOADS:
@ -82,6 +84,7 @@ is_rate_query(struct fd_query *q)
case FD_QUERY_BATCH_TOTAL:
case FD_QUERY_BATCH_SYSMEM:
case FD_QUERY_BATCH_GMEM:
case FD_QUERY_BATCH_NONDRAW:
case FD_QUERY_BATCH_RESTORE:
case FD_QUERY_STAGING_UPLOADS:
case FD_QUERY_SHADOW_UPLOADS:
@ -147,6 +150,7 @@ fd_sw_create_query(struct fd_context *ctx, unsigned query_type)
case FD_QUERY_BATCH_TOTAL:
case FD_QUERY_BATCH_SYSMEM:
case FD_QUERY_BATCH_GMEM:
case FD_QUERY_BATCH_NONDRAW:
case FD_QUERY_BATCH_RESTORE:
case FD_QUERY_STAGING_UPLOADS:
case FD_QUERY_SHADOW_UPLOADS: