asahi: Add batch tracking logic

We already have the notion of an agx_batch, which encapsulates a render
pass. Extend the logic to allow multiple in-flight batches per context, avoiding
a flush in set_framebuffer_state and improving performance for certain
applications designed for IMRs that ping-pong unnecessarily between FBOs. I
don't have such an application immediately in mind, but I wanted to get this
flag-day out of the way while the driver is still small and flexible.

The driver was written from day 1 with batch tracking in mind, so this is a
relatively small change to actually wire it up, but there are lots of little
details to get right.

The code itself is mostly a copy/paste of panfrost, which in turn draws
inspiration from freedreno and v3d.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19865>
This commit is contained in:
Alyssa Rosenzweig 2022-11-17 18:10:11 -05:00 committed by Marge Bot
parent de1eb9400f
commit d7511ad784
4 changed files with 327 additions and 150 deletions

View file

@ -1,16 +1,202 @@
/*
* Copyright 2022 Alyssa Rosenzweig
* Copyright 2019-2020 Collabora, Ltd.
* SPDX-License-Identifier: MIT
*/
#include "agx_state.h"
void
agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason)
#define foreach_batch(ctx, idx) \
BITSET_FOREACH_SET(idx, ctx->batches.active, AGX_MAX_BATCHES)
static unsigned
agx_batch_idx(struct agx_batch *batch)
{
/* TODO: Turn into loop when we support multiple batches */
if (ctx->batch) {
struct agx_batch *batch = ctx->batch;
return batch - batch->ctx->batches.slots;
}
bool
agx_batch_is_active(struct agx_batch *batch)
{
return BITSET_TEST(batch->ctx->batches.active, agx_batch_idx(batch));
}
static void
agx_batch_init(struct agx_context *ctx,
const struct pipe_framebuffer_state *key,
struct agx_batch *batch)
{
struct agx_device *dev = agx_device(ctx->base.screen);
batch->ctx = ctx;
util_copy_framebuffer_state(&batch->key, key);
batch->seqnum = ++ctx->batches.seqnum;
agx_pool_init(&batch->pool, dev, AGX_MEMORY_TYPE_FRAMEBUFFER, true);
agx_pool_init(&batch->pipeline_pool, dev, AGX_MEMORY_TYPE_SHADER, true);
/* These allocations can happen only once and will just be zeroed (not freed)
* during batch clean up. The memory is owned by the context.
*/
if (!batch->bo_list.set) {
batch->bo_list.set = rzalloc_array(ctx, BITSET_WORD, 128);
batch->bo_list.word_count = 128;
} else {
memset(batch->bo_list.set, 0, batch->bo_list.word_count * sizeof(BITSET_WORD));
}
if (!batch->encoder) {
batch->encoder = agx_bo_create(dev, 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
batch->encoder_current = batch->encoder->ptr.cpu;
batch->encoder_end = batch->encoder_current + batch->encoder->size;
} else {
batch->encoder_current = batch->encoder->ptr.cpu;
batch->encoder_end = batch->encoder_current + batch->encoder->size;
}
if (!batch->scissor.bo) {
batch->scissor.bo = agx_bo_create(dev, 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
}
if (!batch->depth_bias.bo) {
batch->depth_bias.bo = agx_bo_create(dev, 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
}
batch->clear = 0;
batch->draw = 0;
batch->load = 0;
batch->clear_depth = 0;
batch->clear_stencil = 0;
batch->scissor.count = 0;
batch->depth_bias.count = 0;
batch->varyings = 0;
/* We need to emit prim state at the start. Max collides with all. */
batch->reduced_prim = PIPE_PRIM_MAX;
if (batch->key.zsbuf) {
agx_batch_writes(batch, agx_resource(key->zsbuf->texture));
}
for (unsigned i = 0; i < key->nr_cbufs; ++i) {
agx_batch_writes(batch, agx_resource(key->cbufs[i]->texture));
}
unsigned batch_idx = agx_batch_idx(batch);
BITSET_SET(ctx->batches.active, batch_idx);
agx_batch_init_state(batch);
}
void
agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch)
{
struct agx_device *dev = agx_device(ctx->base.screen);
assert(batch->ctx == ctx);
if (ctx->batch == batch)
ctx->batch = NULL;
/* There is no more writer for anything we wrote recorded on this context */
hash_table_foreach(ctx->writer, ent) {
if (ent->data == batch)
_mesa_hash_table_remove(ctx->writer, ent);
}
int handle;
AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
agx_bo_unreference(agx_lookup_bo(dev, handle));
}
agx_pool_cleanup(&batch->pool);
agx_pool_cleanup(&batch->pipeline_pool);
util_unreference_framebuffer_state(&batch->key);
unsigned batch_idx = agx_batch_idx(batch);
BITSET_CLEAR(ctx->batches.active, batch_idx);
}
static struct agx_batch *
agx_get_batch_for_framebuffer(struct agx_context *ctx,
const struct pipe_framebuffer_state *state)
{
/* Look if we have a matching batch */
unsigned i;
foreach_batch(ctx, i) {
struct agx_batch *candidate = &ctx->batches.slots[i];
if (util_framebuffer_state_equal(&candidate->key, state)) {
/* We found a match, increase the seqnum for the LRU
* eviction logic.
*/
candidate->seqnum = ++ctx->batches.seqnum;
return candidate;
}
}
/* Look if we have a free batch */
struct agx_batch *batch = NULL;
for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) {
if (!BITSET_TEST(ctx->batches.active, i)) {
batch = &ctx->batches.slots[i];
break;
}
}
/* Else, evict something */
if (!batch) {
for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) {
struct agx_batch *candidate = &ctx->batches.slots[i];
if (!batch || batch->seqnum > candidate->seqnum)
batch = candidate;
}
agx_flush_batch(ctx, batch);
}
/* Batch is now free */
agx_batch_init(ctx, state, batch);
return batch;
}
struct agx_batch *
agx_get_batch(struct agx_context *ctx)
{
if (!ctx->batch) {
ctx->batch = agx_get_batch_for_framebuffer(ctx, &ctx->framebuffer);
agx_dirty_all(ctx);
}
assert(util_framebuffer_state_equal(&ctx->framebuffer, &ctx->batch->key));
return ctx->batch;
}
void
agx_flush_all(struct agx_context *ctx, const char *reason)
{
if (reason)
perf_debug_ctx(ctx, "Flushing due to: %s\n", reason);
unsigned idx;
foreach_batch(ctx, idx) {
agx_flush_batch(ctx, &ctx->batches.slots[idx]);
}
}
static void
agx_flush_readers_except(struct agx_context *ctx,
struct agx_resource *rsrc,
struct agx_batch *except,
const char *reason)
{
unsigned idx;
foreach_batch(ctx, idx) {
struct agx_batch *batch = &ctx->batches.slots[idx];
if (batch == except)
continue;
if (agx_batch_uses_bo(batch, rsrc->bo)) {
perf_debug_ctx(ctx, "Flush reader due to: %s\n", reason);
@ -19,20 +205,38 @@ agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char
}
}
void
agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason)
static void
agx_flush_writer_except(struct agx_context *ctx,
struct agx_resource *rsrc,
struct agx_batch *except,
const char *reason)
{
struct hash_entry *ent = _mesa_hash_table_search(ctx->writer, rsrc);
if (ent) {
if (ent && ent->data != except) {
perf_debug_ctx(ctx, "Flush writer due to: %s\n", reason);
agx_flush_batch(ctx, ent->data);
}
}
void
agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason)
{
agx_flush_readers_except(ctx, rsrc, NULL, reason);
}
void
agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason)
{
agx_flush_writer_except(ctx, rsrc, NULL, reason);
}
void
agx_batch_reads(struct agx_batch *batch, struct agx_resource *rsrc)
{
/* Hazard: read-after-write */
agx_flush_writer_except(batch->ctx, rsrc, batch, "Read from another batch");
agx_batch_add_bo(batch, rsrc->bo);
if (rsrc->separate_stencil)
@ -45,12 +249,15 @@ agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc)
struct agx_context *ctx = batch->ctx;
struct hash_entry *ent = _mesa_hash_table_search(ctx->writer, rsrc);
agx_flush_readers_except(ctx, rsrc, batch, "Write from other batch");
/* Nothing to do if we're already writing */
if (ent && ent->data == batch)
return;
/* Flush the old writer if there is one */
agx_flush_writer(ctx, rsrc, "Multiple writers");
/* Hazard: writer-after-write, write-after-read */
if (ent)
agx_flush_writer(ctx, rsrc, "Multiple writers");
/* Write is strictly stronger than a read */
agx_batch_reads(batch, rsrc);

View file

@ -659,7 +659,7 @@ agx_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor
const union pipe_color_union *color, double depth, unsigned stencil)
{
struct agx_context *ctx = agx_context(pctx);
struct agx_batch *batch = ctx->batch;
struct agx_batch *batch = agx_get_batch(ctx);
unsigned fastclear = buffers & ~(batch->draw | batch->load);
unsigned slowclear = buffers & ~fastclear;
@ -690,11 +690,11 @@ agx_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor
assert((batch->draw & slowclear) == slowclear);
}
static void
agx_flush_resource(struct pipe_context *ctx,
struct pipe_resource *resource)
{
agx_flush_writer(agx_context(ctx), agx_resource(resource), "flush_resource");
}
/*
@ -710,7 +710,7 @@ agx_flush(struct pipe_context *pctx,
if (fence)
*fence = NULL;
agx_flush_batch(ctx, ctx->batch);
agx_flush_all(ctx, "Gallium flush");
}
void
@ -718,9 +718,13 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
{
struct agx_device *dev = agx_device(ctx->base.screen);
assert(agx_batch_is_active(batch));
/* Nothing to do */
if (!(batch->draw | batch->clear))
if (!(batch->draw | batch->clear)) {
agx_batch_cleanup(ctx, batch);
return;
}
/* Finalize the encoder */
uint8_t stop[5 + 64] = { 0x00, 0x00, 0x00, 0xc0, 0x00 };
@ -761,7 +765,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
pipeline_store =
agx_build_store_pipeline(batch,
dev->internal.store,
agx_pool_upload(&batch->pool, ctx->render_target[0], sizeof(ctx->render_target)));
agx_batch_upload_pbe(batch, 0));
}
for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
@ -851,37 +855,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
agxdecode_next_frame();
}
AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
agx_bo_unreference(agx_lookup_bo(dev, handle));
}
/* There is no more writer for anything we wrote recorded on this context */
hash_table_foreach(ctx->writer, ent) {
if (ent->data == batch)
_mesa_hash_table_remove(ctx->writer, ent);
}
memset(batch->bo_list.set, 0, batch->bo_list.word_count * sizeof(BITSET_WORD));
agx_pool_cleanup(&batch->pool);
agx_pool_cleanup(&batch->pipeline_pool);
agx_pool_init(&batch->pool, dev, AGX_MEMORY_TYPE_FRAMEBUFFER, true);
agx_pool_init(&batch->pipeline_pool, dev, AGX_MEMORY_TYPE_CMDBUF_32, true);
batch->clear = 0;
batch->draw = 0;
batch->load = 0;
batch->encoder_current = batch->encoder->ptr.cpu;
batch->encoder_end = batch->encoder_current + batch->encoder->size;
batch->scissor.count = 0;
agx_dirty_all(ctx);
agx_batch_init_state(batch);
/* After resetting the batch, rebind the framebuffer so we update resource
* tracking logic and the BO lists.
*
* XXX: This is a hack to workaround lack of proper batch tracking.
*/
ctx->base.set_framebuffer_state(&ctx->base, &ctx->framebuffer);
agx_batch_cleanup(ctx, batch);
}
static void
@ -919,20 +893,6 @@ agx_create_context(struct pipe_screen *screen,
pctx->screen = screen;
pctx->priv = priv;
ctx->batch = rzalloc(ctx, struct agx_batch);
ctx->batch->ctx = ctx;
ctx->batch->bo_list.set = rzalloc_array(ctx->batch, BITSET_WORD, 128);
ctx->batch->bo_list.word_count = 128;
agx_pool_init(&ctx->batch->pool,
agx_device(screen), AGX_MEMORY_TYPE_FRAMEBUFFER, true);
agx_pool_init(&ctx->batch->pipeline_pool,
agx_device(screen), AGX_MEMORY_TYPE_SHADER, true);
ctx->batch->encoder = agx_bo_create(agx_device(screen), 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
ctx->batch->encoder_current = ctx->batch->encoder->ptr.cpu;
ctx->batch->encoder_end = ctx->batch->encoder_current + ctx->batch->encoder->size;
ctx->batch->scissor.bo = agx_bo_create(agx_device(screen), 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
ctx->batch->depth_bias.bo = agx_bo_create(agx_device(screen), 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
ctx->writer = _mesa_pointer_hash_table_create(ctx);
/* Upload fixed shaders (TODO: compile them?) */

View file

@ -781,63 +781,61 @@ agx_set_framebuffer_state(struct pipe_context *pctx,
if (!state)
return;
/* XXX: eliminate this flush with batch tracking logic */
agx_flush_all(ctx, "Framebuffer switch");
util_copy_framebuffer_state(&ctx->framebuffer, state);
util_copy_framebuffer_state(&ctx->batch->key, state);
ctx->dirty = ~0;
if (state->zsbuf)
agx_batch_writes(ctx->batch, agx_resource(state->zsbuf->texture));
for (unsigned i = 0; i < state->nr_cbufs; ++i) {
struct pipe_surface *surf = state->cbufs[i];
struct agx_resource *tex = agx_resource(surf->texture);
const struct util_format_description *desc =
util_format_description(surf->format);
unsigned level = surf->u.tex.level;
unsigned layer = surf->u.tex.first_layer;
agx_batch_writes(ctx->batch, tex);
assert(surf->u.tex.last_layer == layer);
agx_pack(ctx->render_target[i], RENDER_TARGET, cfg) {
cfg.layout = agx_translate_layout(tex->layout.tiling);
cfg.channels = agx_pixel_format[surf->format].channels;
cfg.type = agx_pixel_format[surf->format].type;
assert(desc->nr_channels >= 1 && desc->nr_channels <= 4);
cfg.swizzle_r = agx_channel_from_pipe(desc->swizzle[0]) & 3;
if (desc->nr_channels >= 2)
cfg.swizzle_g = agx_channel_from_pipe(desc->swizzle[1]) & 3;
if (desc->nr_channels >= 3)
cfg.swizzle_b = agx_channel_from_pipe(desc->swizzle[2]) & 3;
if (desc->nr_channels >= 4)
cfg.swizzle_a = agx_channel_from_pipe(desc->swizzle[3]) & 3;
cfg.width = state->width;
cfg.height = state->height;
cfg.level = surf->u.tex.level;
cfg.buffer = agx_map_texture_gpu(tex, layer);
cfg.unk_mipmapped = tex->mipmapped;
if (tex->layout.tiling == AIL_TILING_LINEAR) {
cfg.stride = ail_get_linear_stride_B(&tex->layout, level) - 4;
cfg.levels = 1;
} else {
cfg.unk_tiled = true;
cfg.levels = tex->base.last_level + 1;
}
};
}
ctx->batch = NULL;
agx_dirty_all(ctx);
}
uint64_t
agx_batch_upload_pbe(struct agx_batch *batch, unsigned rt)
{
struct pipe_surface *surf = batch->key.cbufs[rt];
struct agx_resource *tex = agx_resource(surf->texture);
const struct util_format_description *desc =
util_format_description(surf->format);
unsigned level = surf->u.tex.level;
unsigned layer = surf->u.tex.first_layer;
assert(surf->u.tex.last_layer == layer);
struct agx_ptr T = agx_pool_alloc_aligned(&batch->pool, AGX_RENDER_TARGET_LENGTH, 256);
agx_pack(T.cpu, RENDER_TARGET, cfg) {
cfg.layout = agx_translate_layout(tex->layout.tiling);
cfg.channels = agx_pixel_format[surf->format].channels;
cfg.type = agx_pixel_format[surf->format].type;
assert(desc->nr_channels >= 1 && desc->nr_channels <= 4);
cfg.swizzle_r = agx_channel_from_pipe(desc->swizzle[0]) & 3;
if (desc->nr_channels >= 2)
cfg.swizzle_g = agx_channel_from_pipe(desc->swizzle[1]) & 3;
if (desc->nr_channels >= 3)
cfg.swizzle_b = agx_channel_from_pipe(desc->swizzle[2]) & 3;
if (desc->nr_channels >= 4)
cfg.swizzle_a = agx_channel_from_pipe(desc->swizzle[3]) & 3;
cfg.width = batch->key.width;
cfg.height = batch->key.height;
cfg.level = surf->u.tex.level;
cfg.buffer = agx_map_texture_gpu(tex, layer);
cfg.unk_mipmapped = tex->mipmapped;
if (tex->layout.tiling == AIL_TILING_LINEAR) {
cfg.stride = ail_get_linear_stride_B(&tex->layout, level) - 4;
cfg.levels = 1;
} else {
cfg.unk_tiled = true;
cfg.levels = tex->base.last_level + 1;
}
};
return T.gpu;
}
/* Likewise constant buffers, textures, and samplers are handled in a common
* per-draw path, with dirty tracking to reduce the costs involved.
*/
@ -1224,18 +1222,20 @@ agx_update_vs(struct agx_context *ctx)
}
static bool
agx_update_fs(struct agx_context *ctx)
agx_update_fs(struct agx_batch *batch)
{
struct agx_context *ctx = batch->ctx;
struct asahi_shader_key key = {
.nr_cbufs = ctx->batch->key.nr_cbufs,
.nr_cbufs = batch->key.nr_cbufs,
.clip_plane_enable = ctx->rast->base.clip_plane_enable,
};
if (ctx->batch->reduced_prim == PIPE_PRIM_POINTS)
if (batch->reduced_prim == PIPE_PRIM_POINTS)
key.sprite_coord_enable = ctx->rast->base.sprite_coord_enable;
for (unsigned i = 0; i < key.nr_cbufs; ++i) {
struct pipe_surface *surf = ctx->batch->key.cbufs[i];
struct pipe_surface *surf = batch->key.cbufs[i];
if (surf) {
enum pipe_format fmt = surf->format;
@ -1557,9 +1557,6 @@ agx_batch_init_state(struct agx_batch *batch)
agx_ppp_fini(&out, &ppp);
batch->encoder_current = out;
/* We need to emit prim state at the start. Max collides with all. */
batch->reduced_prim = PIPE_PRIM_MAX;
}
static enum agx_object_type
@ -1586,9 +1583,10 @@ agx_pass_type_for_shader(struct agx_shader_info *info)
#define MAX_PPP_UPDATES 2
static uint8_t *
agx_encode_state(struct agx_context *ctx, uint8_t *out,
agx_encode_state(struct agx_batch *batch, uint8_t *out,
bool is_lines, bool is_points)
{
struct agx_context *ctx = batch->ctx;
struct agx_rasterizer *rast = ctx->rast;
unsigned ppp_updates = 0;
@ -1613,7 +1611,7 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out,
out += AGX_VDM_STATE_VERTEX_SHADER_WORD_0_LENGTH;
agx_pack(out, VDM_STATE_VERTEX_SHADER_WORD_1, cfg) {
cfg.pipeline = agx_build_pipeline(ctx->batch, ctx->vs, PIPE_SHADER_VERTEX);
cfg.pipeline = agx_build_pipeline(batch, ctx->vs, PIPE_SHADER_VERTEX);
}
out += AGX_VDM_STATE_VERTEX_SHADER_WORD_1_LENGTH;
@ -1634,17 +1632,17 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out,
out += 4;
}
struct agx_pool *pool = &ctx->batch->pool;
struct agx_pool *pool = &batch->pool;
struct agx_compiled_shader *vs = ctx->vs, *fs = ctx->fs;
unsigned zbias = 0;
if (ctx->rast->base.offset_tri) {
zbias = agx_upload_depth_bias(ctx->batch, &ctx->rast->base);
zbias = agx_upload_depth_bias(batch, &ctx->rast->base);
ctx->dirty |= AGX_DIRTY_SCISSOR_ZBIAS;
}
if (ctx->dirty & (AGX_DIRTY_VIEWPORT | AGX_DIRTY_SCISSOR_ZBIAS)) {
agx_upload_viewport_scissor(pool, ctx->batch, &out, &ctx->viewport,
agx_upload_viewport_scissor(pool, batch, &out, &ctx->viewport,
ctx->rast->base.scissor ? &ctx->scissor : NULL,
zbias);
}
@ -1652,7 +1650,7 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out,
bool varyings_dirty = false;
if (IS_DIRTY(VS_PROG) || IS_DIRTY(FS_PROG) || IS_DIRTY(RS)) {
ctx->batch->varyings = agx_link_varyings_vs_fs(&ctx->batch->pipeline_pool,
batch->varyings = agx_link_varyings_vs_fs(&batch->pipeline_pool,
&ctx->vs->info.varyings.vs,
&ctx->fs->info.varyings.fs,
ctx->rast->base.flatshade_first);
@ -1774,13 +1772,13 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out,
if (IS_DIRTY(FS) || varyings_dirty) {
unsigned frag_tex_count = ctx->stage[PIPE_SHADER_FRAGMENT].texture_count;
agx_ppp_push(&ppp, FRAGMENT_SHADER, cfg) {
cfg.pipeline = agx_build_pipeline(ctx->batch, ctx->fs, PIPE_SHADER_FRAGMENT),
cfg.pipeline = agx_build_pipeline(batch, ctx->fs, PIPE_SHADER_FRAGMENT),
cfg.uniform_register_count = ctx->fs->info.push_count;
cfg.preshader_register_count = ctx->fs->info.nr_preamble_gprs;
cfg.texture_state_register_count = frag_tex_count;
cfg.sampler_state_register_count = frag_tex_count;
cfg.cf_binding_count = ctx->fs->info.varyings.fs.nr_bindings;
cfg.cf_bindings = ctx->batch->varyings;
cfg.cf_bindings = batch->varyings;
/* XXX: This is probably wrong */
cfg.unknown_30 = frag_tex_count >= 4;
@ -1883,18 +1881,12 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
}
struct agx_context *ctx = agx_context(pctx);
struct agx_batch *batch = ctx->batch;
struct agx_batch *batch = agx_get_batch(ctx);
if (agx_scissor_culls_everything(ctx))
return;
#ifndef NDEBUG
/* For debugging dirty tracking, mark all state as dirty every draw, forcing
* everything to be re-emitted fresh.
*/
if (unlikely(agx_device(pctx->screen)->debug & AGX_DBG_DIRTY))
agx_dirty_all(ctx);
#endif
agx_dirty_all(ctx);
/* Dirty track the reduced prim: lines vs points vs triangles */
enum pipe_prim_type reduced_prim = u_reduced_prim(info->mode);
@ -1902,8 +1894,8 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
batch->reduced_prim = reduced_prim;
/* TODO: masks */
ctx->batch->draw |= ~0;
ctx->batch->load |= ~0;
batch->draw |= ~0;
batch->load |= ~0;
/* TODO: These are expensive calls, consider finer dirty tracking */
if (agx_update_vs(ctx))
@ -1911,7 +1903,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
else if (ctx->stage[PIPE_SHADER_VERTEX].dirty)
ctx->dirty |= AGX_DIRTY_VS;
if (agx_update_fs(ctx))
if (agx_update_fs(batch))
ctx->dirty |= AGX_DIRTY_FS | AGX_DIRTY_FS_PROG;
else if (ctx->stage[PIPE_SHADER_FRAGMENT].dirty)
ctx->dirty |= AGX_DIRTY_FS;
@ -1939,7 +1931,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
AGX_INDEX_LIST_START_LENGTH +
AGX_INDEX_LIST_BUFFER_SIZE_LENGTH);
uint8_t *out = agx_encode_state(ctx, batch->encoder_current,
uint8_t *out = agx_encode_state(batch, batch->encoder_current,
reduced_prim == PIPE_PRIM_LINES,
reduced_prim == PIPE_PRIM_POINTS);
@ -2008,6 +2000,8 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
assert(batch->encoder_current <= batch->encoder_end &&
"Failed to reserve sufficient space in encoder");
ctx->dirty = 0;
assert(batch == agx_get_batch(ctx) && "batch should not change under us");
}
void agx_init_state_functions(struct pipe_context *ctx);

View file

@ -95,6 +95,7 @@ struct agx_array {
struct agx_batch {
struct agx_context *ctx;
struct pipe_framebuffer_state key;
uint64_t seqnum;
/* PIPE_CLEAR_* bitmask */
uint32_t clear, draw, load;
@ -174,11 +175,24 @@ enum agx_dirty {
AGX_DIRTY_FS_PROG = BITFIELD_BIT(11),
};
#define AGX_MAX_BATCHES (2)
struct agx_context {
struct pipe_context base;
struct agx_compiled_shader *vs, *fs;
uint32_t dirty;
/* Set of batches. When full, the LRU entry (the batch with the smallest
* seqnum) is flushed to free a slot.
*/
struct {
uint64_t seqnum;
struct agx_batch slots[AGX_MAX_BATCHES];
/** Set of active batches for faster traversal */
BITSET_DECLARE(active, AGX_MAX_BATCHES);
} batches;
struct agx_batch *batch;
struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
@ -204,8 +218,6 @@ struct agx_context {
struct util_debug_callback debug;
bool is_noop;
uint8_t render_target[8][AGX_RENDER_TARGET_LENGTH];
struct blitter_context *blitter;
/* Map of agx_resource to agx_batch that writes that resource */
@ -350,6 +362,9 @@ uint64_t
agx_push_location(struct agx_batch *batch, struct agx_push push,
enum pipe_shader_type stage);
bool
agx_batch_is_active(struct agx_batch *batch);
uint64_t
agx_build_clear_pipeline(struct agx_batch *batch, uint32_t code, uint64_t clear_buf);
@ -360,6 +375,9 @@ agx_build_store_pipeline(struct agx_batch *batch, uint32_t code,
uint64_t
agx_build_reload_pipeline(struct agx_batch *batch, uint32_t code, struct pipe_surface *surf);
uint64_t
agx_batch_upload_pbe(struct agx_batch *batch, unsigned rt);
/* Add a BO to a batch. This needs to be amortized O(1) since it's called in
* hot paths. To achieve this we model BO lists by bit sets */
@ -383,7 +401,7 @@ agx_batch_add_bo(struct agx_batch *batch, struct agx_bo *bo)
{
/* Double the size of the BO list if we run out, this is amortized O(1) */
if (unlikely(bo->handle > agx_batch_bo_list_bits(batch))) {
batch->bo_list.set = rerzalloc(batch, batch->bo_list.set, BITSET_WORD,
batch->bo_list.set = rerzalloc(batch->ctx, batch->bo_list.set, BITSET_WORD,
batch->bo_list.word_count,
batch->bo_list.word_count * 2);
batch->bo_list.word_count *= 2;
@ -408,6 +426,7 @@ agx_batch_num_bo(struct agx_batch *batch)
BITSET_FOREACH_SET(handle, (batch)->bo_list.set, agx_batch_bo_list_bits(batch))
void agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch);
void agx_flush_all(struct agx_context *ctx, const char *reason);
void agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason);
void agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason);
@ -415,6 +434,9 @@ void agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const
void agx_batch_reads(struct agx_batch *batch, struct agx_resource *rsrc);
void agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc);
struct agx_batch *agx_get_batch(struct agx_context *ctx);
void agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch);
/* Blit shaders */
void
agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter,
@ -426,12 +448,6 @@ void agx_blit(struct pipe_context *pipe,
void agx_internal_shaders(struct agx_device *dev);
/* Batch logic */
static void
agx_flush_all(struct agx_context *ctx, const char *reason)
{
perf_debug_ctx(ctx, "Flushing due to: %s\n", reason);
ctx->base.flush(&ctx->base, NULL, 0);
}
void
agx_batch_init_state(struct agx_batch *batch);