mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-19 17:40:32 +01:00
asahi: Add batch tracking logic
We already have the notion of an agx_batch, which encapsulates a render pass. Extend the logic to allow multiple in-flight batches per context, avoiding a flush in set_framebuffer_state and improving performance for certain applications designed for IMRs that ping-pong unnecessarily between FBOs. I don't have such an application immediately in mind, but I wanted to get this flag-day out of the way while the driver is still small and flexible. The driver was written from day 1 with batch tracking in mind, so this is a relatively small change to actually wire it up, but there are lots of little details to get right. The code itself is mostly a copy/paste of panfrost, which in turn draws inspiration from freedreno and v3d. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19865>
This commit is contained in:
parent
de1eb9400f
commit
d7511ad784
4 changed files with 327 additions and 150 deletions
|
|
@ -1,16 +1,202 @@
|
|||
/*
|
||||
* Copyright 2022 Alyssa Rosenzweig
|
||||
* Copyright 2019-2020 Collabora, Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "agx_state.h"
|
||||
|
||||
void
|
||||
agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason)
|
||||
#define foreach_batch(ctx, idx) \
|
||||
BITSET_FOREACH_SET(idx, ctx->batches.active, AGX_MAX_BATCHES)
|
||||
|
||||
static unsigned
|
||||
agx_batch_idx(struct agx_batch *batch)
|
||||
{
|
||||
/* TODO: Turn into loop when we support multiple batches */
|
||||
if (ctx->batch) {
|
||||
struct agx_batch *batch = ctx->batch;
|
||||
return batch - batch->ctx->batches.slots;
|
||||
}
|
||||
|
||||
bool
|
||||
agx_batch_is_active(struct agx_batch *batch)
|
||||
{
|
||||
return BITSET_TEST(batch->ctx->batches.active, agx_batch_idx(batch));
|
||||
}
|
||||
|
||||
static void
|
||||
agx_batch_init(struct agx_context *ctx,
|
||||
const struct pipe_framebuffer_state *key,
|
||||
struct agx_batch *batch)
|
||||
{
|
||||
struct agx_device *dev = agx_device(ctx->base.screen);
|
||||
|
||||
batch->ctx = ctx;
|
||||
util_copy_framebuffer_state(&batch->key, key);
|
||||
batch->seqnum = ++ctx->batches.seqnum;
|
||||
|
||||
agx_pool_init(&batch->pool, dev, AGX_MEMORY_TYPE_FRAMEBUFFER, true);
|
||||
agx_pool_init(&batch->pipeline_pool, dev, AGX_MEMORY_TYPE_SHADER, true);
|
||||
|
||||
/* These allocations can happen only once and will just be zeroed (not freed)
|
||||
* during batch clean up. The memory is owned by the context.
|
||||
*/
|
||||
if (!batch->bo_list.set) {
|
||||
batch->bo_list.set = rzalloc_array(ctx, BITSET_WORD, 128);
|
||||
batch->bo_list.word_count = 128;
|
||||
} else {
|
||||
memset(batch->bo_list.set, 0, batch->bo_list.word_count * sizeof(BITSET_WORD));
|
||||
}
|
||||
|
||||
if (!batch->encoder) {
|
||||
batch->encoder = agx_bo_create(dev, 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
|
||||
batch->encoder_current = batch->encoder->ptr.cpu;
|
||||
batch->encoder_end = batch->encoder_current + batch->encoder->size;
|
||||
} else {
|
||||
batch->encoder_current = batch->encoder->ptr.cpu;
|
||||
batch->encoder_end = batch->encoder_current + batch->encoder->size;
|
||||
}
|
||||
|
||||
if (!batch->scissor.bo) {
|
||||
batch->scissor.bo = agx_bo_create(dev, 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
|
||||
}
|
||||
|
||||
if (!batch->depth_bias.bo) {
|
||||
batch->depth_bias.bo = agx_bo_create(dev, 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
|
||||
}
|
||||
|
||||
batch->clear = 0;
|
||||
batch->draw = 0;
|
||||
batch->load = 0;
|
||||
batch->clear_depth = 0;
|
||||
batch->clear_stencil = 0;
|
||||
batch->scissor.count = 0;
|
||||
batch->depth_bias.count = 0;
|
||||
batch->varyings = 0;
|
||||
|
||||
/* We need to emit prim state at the start. Max collides with all. */
|
||||
batch->reduced_prim = PIPE_PRIM_MAX;
|
||||
|
||||
if (batch->key.zsbuf) {
|
||||
agx_batch_writes(batch, agx_resource(key->zsbuf->texture));
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < key->nr_cbufs; ++i) {
|
||||
agx_batch_writes(batch, agx_resource(key->cbufs[i]->texture));
|
||||
}
|
||||
|
||||
unsigned batch_idx = agx_batch_idx(batch);
|
||||
BITSET_SET(ctx->batches.active, batch_idx);
|
||||
|
||||
agx_batch_init_state(batch);
|
||||
}
|
||||
|
||||
void
|
||||
agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch)
|
||||
{
|
||||
struct agx_device *dev = agx_device(ctx->base.screen);
|
||||
assert(batch->ctx == ctx);
|
||||
|
||||
if (ctx->batch == batch)
|
||||
ctx->batch = NULL;
|
||||
|
||||
/* There is no more writer for anything we wrote recorded on this context */
|
||||
hash_table_foreach(ctx->writer, ent) {
|
||||
if (ent->data == batch)
|
||||
_mesa_hash_table_remove(ctx->writer, ent);
|
||||
}
|
||||
|
||||
int handle;
|
||||
AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
|
||||
agx_bo_unreference(agx_lookup_bo(dev, handle));
|
||||
}
|
||||
|
||||
agx_pool_cleanup(&batch->pool);
|
||||
agx_pool_cleanup(&batch->pipeline_pool);
|
||||
util_unreference_framebuffer_state(&batch->key);
|
||||
|
||||
unsigned batch_idx = agx_batch_idx(batch);
|
||||
BITSET_CLEAR(ctx->batches.active, batch_idx);
|
||||
}
|
||||
|
||||
static struct agx_batch *
|
||||
agx_get_batch_for_framebuffer(struct agx_context *ctx,
|
||||
const struct pipe_framebuffer_state *state)
|
||||
{
|
||||
/* Look if we have a matching batch */
|
||||
unsigned i;
|
||||
foreach_batch(ctx, i) {
|
||||
struct agx_batch *candidate = &ctx->batches.slots[i];
|
||||
|
||||
if (util_framebuffer_state_equal(&candidate->key, state)) {
|
||||
/* We found a match, increase the seqnum for the LRU
|
||||
* eviction logic.
|
||||
*/
|
||||
candidate->seqnum = ++ctx->batches.seqnum;
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
|
||||
/* Look if we have a free batch */
|
||||
struct agx_batch *batch = NULL;
|
||||
for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) {
|
||||
if (!BITSET_TEST(ctx->batches.active, i)) {
|
||||
batch = &ctx->batches.slots[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Else, evict something */
|
||||
if (!batch) {
|
||||
for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) {
|
||||
struct agx_batch *candidate = &ctx->batches.slots[i];
|
||||
|
||||
if (!batch || batch->seqnum > candidate->seqnum)
|
||||
batch = candidate;
|
||||
}
|
||||
|
||||
agx_flush_batch(ctx, batch);
|
||||
}
|
||||
|
||||
/* Batch is now free */
|
||||
agx_batch_init(ctx, state, batch);
|
||||
return batch;
|
||||
}
|
||||
|
||||
struct agx_batch *
|
||||
agx_get_batch(struct agx_context *ctx)
|
||||
{
|
||||
if (!ctx->batch) {
|
||||
ctx->batch = agx_get_batch_for_framebuffer(ctx, &ctx->framebuffer);
|
||||
agx_dirty_all(ctx);
|
||||
}
|
||||
|
||||
assert(util_framebuffer_state_equal(&ctx->framebuffer, &ctx->batch->key));
|
||||
return ctx->batch;
|
||||
}
|
||||
|
||||
void
|
||||
agx_flush_all(struct agx_context *ctx, const char *reason)
|
||||
{
|
||||
if (reason)
|
||||
perf_debug_ctx(ctx, "Flushing due to: %s\n", reason);
|
||||
|
||||
unsigned idx;
|
||||
foreach_batch(ctx, idx) {
|
||||
agx_flush_batch(ctx, &ctx->batches.slots[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
agx_flush_readers_except(struct agx_context *ctx,
|
||||
struct agx_resource *rsrc,
|
||||
struct agx_batch *except,
|
||||
const char *reason)
|
||||
{
|
||||
unsigned idx;
|
||||
|
||||
foreach_batch(ctx, idx) {
|
||||
struct agx_batch *batch = &ctx->batches.slots[idx];
|
||||
|
||||
if (batch == except)
|
||||
continue;
|
||||
|
||||
if (agx_batch_uses_bo(batch, rsrc->bo)) {
|
||||
perf_debug_ctx(ctx, "Flush reader due to: %s\n", reason);
|
||||
|
|
@ -19,20 +205,38 @@ agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason)
|
||||
static void
|
||||
agx_flush_writer_except(struct agx_context *ctx,
|
||||
struct agx_resource *rsrc,
|
||||
struct agx_batch *except,
|
||||
const char *reason)
|
||||
{
|
||||
struct hash_entry *ent = _mesa_hash_table_search(ctx->writer, rsrc);
|
||||
|
||||
if (ent) {
|
||||
if (ent && ent->data != except) {
|
||||
perf_debug_ctx(ctx, "Flush writer due to: %s\n", reason);
|
||||
agx_flush_batch(ctx, ent->data);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason)
|
||||
{
|
||||
agx_flush_readers_except(ctx, rsrc, NULL, reason);
|
||||
}
|
||||
|
||||
void
|
||||
agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason)
|
||||
{
|
||||
agx_flush_writer_except(ctx, rsrc, NULL, reason);
|
||||
}
|
||||
|
||||
void
|
||||
agx_batch_reads(struct agx_batch *batch, struct agx_resource *rsrc)
|
||||
{
|
||||
/* Hazard: read-after-write */
|
||||
agx_flush_writer_except(batch->ctx, rsrc, batch, "Read from another batch");
|
||||
|
||||
agx_batch_add_bo(batch, rsrc->bo);
|
||||
|
||||
if (rsrc->separate_stencil)
|
||||
|
|
@ -45,12 +249,15 @@ agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc)
|
|||
struct agx_context *ctx = batch->ctx;
|
||||
struct hash_entry *ent = _mesa_hash_table_search(ctx->writer, rsrc);
|
||||
|
||||
agx_flush_readers_except(ctx, rsrc, batch, "Write from other batch");
|
||||
|
||||
/* Nothing to do if we're already writing */
|
||||
if (ent && ent->data == batch)
|
||||
return;
|
||||
|
||||
/* Flush the old writer if there is one */
|
||||
agx_flush_writer(ctx, rsrc, "Multiple writers");
|
||||
/* Hazard: writer-after-write, write-after-read */
|
||||
if (ent)
|
||||
agx_flush_writer(ctx, rsrc, "Multiple writers");
|
||||
|
||||
/* Write is strictly stronger than a read */
|
||||
agx_batch_reads(batch, rsrc);
|
||||
|
|
|
|||
|
|
@ -659,7 +659,7 @@ agx_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor
|
|||
const union pipe_color_union *color, double depth, unsigned stencil)
|
||||
{
|
||||
struct agx_context *ctx = agx_context(pctx);
|
||||
struct agx_batch *batch = ctx->batch;
|
||||
struct agx_batch *batch = agx_get_batch(ctx);
|
||||
|
||||
unsigned fastclear = buffers & ~(batch->draw | batch->load);
|
||||
unsigned slowclear = buffers & ~fastclear;
|
||||
|
|
@ -690,11 +690,11 @@ agx_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor
|
|||
assert((batch->draw & slowclear) == slowclear);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
agx_flush_resource(struct pipe_context *ctx,
|
||||
struct pipe_resource *resource)
|
||||
{
|
||||
agx_flush_writer(agx_context(ctx), agx_resource(resource), "flush_resource");
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -710,7 +710,7 @@ agx_flush(struct pipe_context *pctx,
|
|||
if (fence)
|
||||
*fence = NULL;
|
||||
|
||||
agx_flush_batch(ctx, ctx->batch);
|
||||
agx_flush_all(ctx, "Gallium flush");
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -718,9 +718,13 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
|
|||
{
|
||||
struct agx_device *dev = agx_device(ctx->base.screen);
|
||||
|
||||
assert(agx_batch_is_active(batch));
|
||||
|
||||
/* Nothing to do */
|
||||
if (!(batch->draw | batch->clear))
|
||||
if (!(batch->draw | batch->clear)) {
|
||||
agx_batch_cleanup(ctx, batch);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Finalize the encoder */
|
||||
uint8_t stop[5 + 64] = { 0x00, 0x00, 0x00, 0xc0, 0x00 };
|
||||
|
|
@ -761,7 +765,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
|
|||
pipeline_store =
|
||||
agx_build_store_pipeline(batch,
|
||||
dev->internal.store,
|
||||
agx_pool_upload(&batch->pool, ctx->render_target[0], sizeof(ctx->render_target)));
|
||||
agx_batch_upload_pbe(batch, 0));
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
|
||||
|
|
@ -851,37 +855,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
|
|||
agxdecode_next_frame();
|
||||
}
|
||||
|
||||
AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
|
||||
agx_bo_unreference(agx_lookup_bo(dev, handle));
|
||||
}
|
||||
|
||||
/* There is no more writer for anything we wrote recorded on this context */
|
||||
hash_table_foreach(ctx->writer, ent) {
|
||||
if (ent->data == batch)
|
||||
_mesa_hash_table_remove(ctx->writer, ent);
|
||||
}
|
||||
|
||||
memset(batch->bo_list.set, 0, batch->bo_list.word_count * sizeof(BITSET_WORD));
|
||||
agx_pool_cleanup(&batch->pool);
|
||||
agx_pool_cleanup(&batch->pipeline_pool);
|
||||
agx_pool_init(&batch->pool, dev, AGX_MEMORY_TYPE_FRAMEBUFFER, true);
|
||||
agx_pool_init(&batch->pipeline_pool, dev, AGX_MEMORY_TYPE_CMDBUF_32, true);
|
||||
batch->clear = 0;
|
||||
batch->draw = 0;
|
||||
batch->load = 0;
|
||||
batch->encoder_current = batch->encoder->ptr.cpu;
|
||||
batch->encoder_end = batch->encoder_current + batch->encoder->size;
|
||||
batch->scissor.count = 0;
|
||||
|
||||
agx_dirty_all(ctx);
|
||||
agx_batch_init_state(batch);
|
||||
|
||||
/* After resetting the batch, rebind the framebuffer so we update resource
|
||||
* tracking logic and the BO lists.
|
||||
*
|
||||
* XXX: This is a hack to workaround lack of proper batch tracking.
|
||||
*/
|
||||
ctx->base.set_framebuffer_state(&ctx->base, &ctx->framebuffer);
|
||||
agx_batch_cleanup(ctx, batch);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -919,20 +893,6 @@ agx_create_context(struct pipe_screen *screen,
|
|||
pctx->screen = screen;
|
||||
pctx->priv = priv;
|
||||
|
||||
ctx->batch = rzalloc(ctx, struct agx_batch);
|
||||
ctx->batch->ctx = ctx;
|
||||
ctx->batch->bo_list.set = rzalloc_array(ctx->batch, BITSET_WORD, 128);
|
||||
ctx->batch->bo_list.word_count = 128;
|
||||
agx_pool_init(&ctx->batch->pool,
|
||||
agx_device(screen), AGX_MEMORY_TYPE_FRAMEBUFFER, true);
|
||||
agx_pool_init(&ctx->batch->pipeline_pool,
|
||||
agx_device(screen), AGX_MEMORY_TYPE_SHADER, true);
|
||||
ctx->batch->encoder = agx_bo_create(agx_device(screen), 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
|
||||
ctx->batch->encoder_current = ctx->batch->encoder->ptr.cpu;
|
||||
ctx->batch->encoder_end = ctx->batch->encoder_current + ctx->batch->encoder->size;
|
||||
ctx->batch->scissor.bo = agx_bo_create(agx_device(screen), 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
|
||||
ctx->batch->depth_bias.bo = agx_bo_create(agx_device(screen), 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
|
||||
|
||||
ctx->writer = _mesa_pointer_hash_table_create(ctx);
|
||||
|
||||
/* Upload fixed shaders (TODO: compile them?) */
|
||||
|
|
|
|||
|
|
@ -781,63 +781,61 @@ agx_set_framebuffer_state(struct pipe_context *pctx,
|
|||
if (!state)
|
||||
return;
|
||||
|
||||
/* XXX: eliminate this flush with batch tracking logic */
|
||||
agx_flush_all(ctx, "Framebuffer switch");
|
||||
|
||||
util_copy_framebuffer_state(&ctx->framebuffer, state);
|
||||
util_copy_framebuffer_state(&ctx->batch->key, state);
|
||||
ctx->dirty = ~0;
|
||||
|
||||
if (state->zsbuf)
|
||||
agx_batch_writes(ctx->batch, agx_resource(state->zsbuf->texture));
|
||||
|
||||
|
||||
for (unsigned i = 0; i < state->nr_cbufs; ++i) {
|
||||
struct pipe_surface *surf = state->cbufs[i];
|
||||
struct agx_resource *tex = agx_resource(surf->texture);
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(surf->format);
|
||||
unsigned level = surf->u.tex.level;
|
||||
unsigned layer = surf->u.tex.first_layer;
|
||||
|
||||
agx_batch_writes(ctx->batch, tex);
|
||||
|
||||
assert(surf->u.tex.last_layer == layer);
|
||||
|
||||
agx_pack(ctx->render_target[i], RENDER_TARGET, cfg) {
|
||||
cfg.layout = agx_translate_layout(tex->layout.tiling);
|
||||
cfg.channels = agx_pixel_format[surf->format].channels;
|
||||
cfg.type = agx_pixel_format[surf->format].type;
|
||||
|
||||
assert(desc->nr_channels >= 1 && desc->nr_channels <= 4);
|
||||
cfg.swizzle_r = agx_channel_from_pipe(desc->swizzle[0]) & 3;
|
||||
|
||||
if (desc->nr_channels >= 2)
|
||||
cfg.swizzle_g = agx_channel_from_pipe(desc->swizzle[1]) & 3;
|
||||
|
||||
if (desc->nr_channels >= 3)
|
||||
cfg.swizzle_b = agx_channel_from_pipe(desc->swizzle[2]) & 3;
|
||||
|
||||
if (desc->nr_channels >= 4)
|
||||
cfg.swizzle_a = agx_channel_from_pipe(desc->swizzle[3]) & 3;
|
||||
|
||||
cfg.width = state->width;
|
||||
cfg.height = state->height;
|
||||
cfg.level = surf->u.tex.level;
|
||||
cfg.buffer = agx_map_texture_gpu(tex, layer);
|
||||
cfg.unk_mipmapped = tex->mipmapped;
|
||||
|
||||
if (tex->layout.tiling == AIL_TILING_LINEAR) {
|
||||
cfg.stride = ail_get_linear_stride_B(&tex->layout, level) - 4;
|
||||
cfg.levels = 1;
|
||||
} else {
|
||||
cfg.unk_tiled = true;
|
||||
cfg.levels = tex->base.last_level + 1;
|
||||
}
|
||||
};
|
||||
}
|
||||
ctx->batch = NULL;
|
||||
agx_dirty_all(ctx);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
agx_batch_upload_pbe(struct agx_batch *batch, unsigned rt)
|
||||
{
|
||||
struct pipe_surface *surf = batch->key.cbufs[rt];
|
||||
struct agx_resource *tex = agx_resource(surf->texture);
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(surf->format);
|
||||
unsigned level = surf->u.tex.level;
|
||||
unsigned layer = surf->u.tex.first_layer;
|
||||
|
||||
assert(surf->u.tex.last_layer == layer);
|
||||
|
||||
struct agx_ptr T = agx_pool_alloc_aligned(&batch->pool, AGX_RENDER_TARGET_LENGTH, 256);
|
||||
|
||||
agx_pack(T.cpu, RENDER_TARGET, cfg) {
|
||||
cfg.layout = agx_translate_layout(tex->layout.tiling);
|
||||
cfg.channels = agx_pixel_format[surf->format].channels;
|
||||
cfg.type = agx_pixel_format[surf->format].type;
|
||||
|
||||
assert(desc->nr_channels >= 1 && desc->nr_channels <= 4);
|
||||
cfg.swizzle_r = agx_channel_from_pipe(desc->swizzle[0]) & 3;
|
||||
|
||||
if (desc->nr_channels >= 2)
|
||||
cfg.swizzle_g = agx_channel_from_pipe(desc->swizzle[1]) & 3;
|
||||
|
||||
if (desc->nr_channels >= 3)
|
||||
cfg.swizzle_b = agx_channel_from_pipe(desc->swizzle[2]) & 3;
|
||||
|
||||
if (desc->nr_channels >= 4)
|
||||
cfg.swizzle_a = agx_channel_from_pipe(desc->swizzle[3]) & 3;
|
||||
|
||||
cfg.width = batch->key.width;
|
||||
cfg.height = batch->key.height;
|
||||
cfg.level = surf->u.tex.level;
|
||||
cfg.buffer = agx_map_texture_gpu(tex, layer);
|
||||
cfg.unk_mipmapped = tex->mipmapped;
|
||||
|
||||
if (tex->layout.tiling == AIL_TILING_LINEAR) {
|
||||
cfg.stride = ail_get_linear_stride_B(&tex->layout, level) - 4;
|
||||
cfg.levels = 1;
|
||||
} else {
|
||||
cfg.unk_tiled = true;
|
||||
cfg.levels = tex->base.last_level + 1;
|
||||
}
|
||||
};
|
||||
|
||||
return T.gpu;
|
||||
}
|
||||
|
||||
|
||||
/* Likewise constant buffers, textures, and samplers are handled in a common
|
||||
* per-draw path, with dirty tracking to reduce the costs involved.
|
||||
*/
|
||||
|
|
@ -1224,18 +1222,20 @@ agx_update_vs(struct agx_context *ctx)
|
|||
}
|
||||
|
||||
static bool
|
||||
agx_update_fs(struct agx_context *ctx)
|
||||
agx_update_fs(struct agx_batch *batch)
|
||||
{
|
||||
struct agx_context *ctx = batch->ctx;
|
||||
|
||||
struct asahi_shader_key key = {
|
||||
.nr_cbufs = ctx->batch->key.nr_cbufs,
|
||||
.nr_cbufs = batch->key.nr_cbufs,
|
||||
.clip_plane_enable = ctx->rast->base.clip_plane_enable,
|
||||
};
|
||||
|
||||
if (ctx->batch->reduced_prim == PIPE_PRIM_POINTS)
|
||||
if (batch->reduced_prim == PIPE_PRIM_POINTS)
|
||||
key.sprite_coord_enable = ctx->rast->base.sprite_coord_enable;
|
||||
|
||||
for (unsigned i = 0; i < key.nr_cbufs; ++i) {
|
||||
struct pipe_surface *surf = ctx->batch->key.cbufs[i];
|
||||
struct pipe_surface *surf = batch->key.cbufs[i];
|
||||
|
||||
if (surf) {
|
||||
enum pipe_format fmt = surf->format;
|
||||
|
|
@ -1557,9 +1557,6 @@ agx_batch_init_state(struct agx_batch *batch)
|
|||
|
||||
agx_ppp_fini(&out, &ppp);
|
||||
batch->encoder_current = out;
|
||||
|
||||
/* We need to emit prim state at the start. Max collides with all. */
|
||||
batch->reduced_prim = PIPE_PRIM_MAX;
|
||||
}
|
||||
|
||||
static enum agx_object_type
|
||||
|
|
@ -1586,9 +1583,10 @@ agx_pass_type_for_shader(struct agx_shader_info *info)
|
|||
#define MAX_PPP_UPDATES 2
|
||||
|
||||
static uint8_t *
|
||||
agx_encode_state(struct agx_context *ctx, uint8_t *out,
|
||||
agx_encode_state(struct agx_batch *batch, uint8_t *out,
|
||||
bool is_lines, bool is_points)
|
||||
{
|
||||
struct agx_context *ctx = batch->ctx;
|
||||
struct agx_rasterizer *rast = ctx->rast;
|
||||
unsigned ppp_updates = 0;
|
||||
|
||||
|
|
@ -1613,7 +1611,7 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out,
|
|||
out += AGX_VDM_STATE_VERTEX_SHADER_WORD_0_LENGTH;
|
||||
|
||||
agx_pack(out, VDM_STATE_VERTEX_SHADER_WORD_1, cfg) {
|
||||
cfg.pipeline = agx_build_pipeline(ctx->batch, ctx->vs, PIPE_SHADER_VERTEX);
|
||||
cfg.pipeline = agx_build_pipeline(batch, ctx->vs, PIPE_SHADER_VERTEX);
|
||||
}
|
||||
out += AGX_VDM_STATE_VERTEX_SHADER_WORD_1_LENGTH;
|
||||
|
||||
|
|
@ -1634,17 +1632,17 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out,
|
|||
out += 4;
|
||||
}
|
||||
|
||||
struct agx_pool *pool = &ctx->batch->pool;
|
||||
struct agx_pool *pool = &batch->pool;
|
||||
struct agx_compiled_shader *vs = ctx->vs, *fs = ctx->fs;
|
||||
unsigned zbias = 0;
|
||||
|
||||
if (ctx->rast->base.offset_tri) {
|
||||
zbias = agx_upload_depth_bias(ctx->batch, &ctx->rast->base);
|
||||
zbias = agx_upload_depth_bias(batch, &ctx->rast->base);
|
||||
ctx->dirty |= AGX_DIRTY_SCISSOR_ZBIAS;
|
||||
}
|
||||
|
||||
if (ctx->dirty & (AGX_DIRTY_VIEWPORT | AGX_DIRTY_SCISSOR_ZBIAS)) {
|
||||
agx_upload_viewport_scissor(pool, ctx->batch, &out, &ctx->viewport,
|
||||
agx_upload_viewport_scissor(pool, batch, &out, &ctx->viewport,
|
||||
ctx->rast->base.scissor ? &ctx->scissor : NULL,
|
||||
zbias);
|
||||
}
|
||||
|
|
@ -1652,7 +1650,7 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out,
|
|||
bool varyings_dirty = false;
|
||||
|
||||
if (IS_DIRTY(VS_PROG) || IS_DIRTY(FS_PROG) || IS_DIRTY(RS)) {
|
||||
ctx->batch->varyings = agx_link_varyings_vs_fs(&ctx->batch->pipeline_pool,
|
||||
batch->varyings = agx_link_varyings_vs_fs(&batch->pipeline_pool,
|
||||
&ctx->vs->info.varyings.vs,
|
||||
&ctx->fs->info.varyings.fs,
|
||||
ctx->rast->base.flatshade_first);
|
||||
|
|
@ -1774,13 +1772,13 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out,
|
|||
if (IS_DIRTY(FS) || varyings_dirty) {
|
||||
unsigned frag_tex_count = ctx->stage[PIPE_SHADER_FRAGMENT].texture_count;
|
||||
agx_ppp_push(&ppp, FRAGMENT_SHADER, cfg) {
|
||||
cfg.pipeline = agx_build_pipeline(ctx->batch, ctx->fs, PIPE_SHADER_FRAGMENT),
|
||||
cfg.pipeline = agx_build_pipeline(batch, ctx->fs, PIPE_SHADER_FRAGMENT),
|
||||
cfg.uniform_register_count = ctx->fs->info.push_count;
|
||||
cfg.preshader_register_count = ctx->fs->info.nr_preamble_gprs;
|
||||
cfg.texture_state_register_count = frag_tex_count;
|
||||
cfg.sampler_state_register_count = frag_tex_count;
|
||||
cfg.cf_binding_count = ctx->fs->info.varyings.fs.nr_bindings;
|
||||
cfg.cf_bindings = ctx->batch->varyings;
|
||||
cfg.cf_bindings = batch->varyings;
|
||||
|
||||
/* XXX: This is probably wrong */
|
||||
cfg.unknown_30 = frag_tex_count >= 4;
|
||||
|
|
@ -1883,18 +1881,12 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||
}
|
||||
|
||||
struct agx_context *ctx = agx_context(pctx);
|
||||
struct agx_batch *batch = ctx->batch;
|
||||
struct agx_batch *batch = agx_get_batch(ctx);
|
||||
|
||||
if (agx_scissor_culls_everything(ctx))
|
||||
return;
|
||||
|
||||
#ifndef NDEBUG
|
||||
/* For debugging dirty tracking, mark all state as dirty every draw, forcing
|
||||
* everything to be re-emitted fresh.
|
||||
*/
|
||||
if (unlikely(agx_device(pctx->screen)->debug & AGX_DBG_DIRTY))
|
||||
agx_dirty_all(ctx);
|
||||
#endif
|
||||
agx_dirty_all(ctx);
|
||||
|
||||
/* Dirty track the reduced prim: lines vs points vs triangles */
|
||||
enum pipe_prim_type reduced_prim = u_reduced_prim(info->mode);
|
||||
|
|
@ -1902,8 +1894,8 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||
batch->reduced_prim = reduced_prim;
|
||||
|
||||
/* TODO: masks */
|
||||
ctx->batch->draw |= ~0;
|
||||
ctx->batch->load |= ~0;
|
||||
batch->draw |= ~0;
|
||||
batch->load |= ~0;
|
||||
|
||||
/* TODO: These are expensive calls, consider finer dirty tracking */
|
||||
if (agx_update_vs(ctx))
|
||||
|
|
@ -1911,7 +1903,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||
else if (ctx->stage[PIPE_SHADER_VERTEX].dirty)
|
||||
ctx->dirty |= AGX_DIRTY_VS;
|
||||
|
||||
if (agx_update_fs(ctx))
|
||||
if (agx_update_fs(batch))
|
||||
ctx->dirty |= AGX_DIRTY_FS | AGX_DIRTY_FS_PROG;
|
||||
else if (ctx->stage[PIPE_SHADER_FRAGMENT].dirty)
|
||||
ctx->dirty |= AGX_DIRTY_FS;
|
||||
|
|
@ -1939,7 +1931,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||
AGX_INDEX_LIST_START_LENGTH +
|
||||
AGX_INDEX_LIST_BUFFER_SIZE_LENGTH);
|
||||
|
||||
uint8_t *out = agx_encode_state(ctx, batch->encoder_current,
|
||||
uint8_t *out = agx_encode_state(batch, batch->encoder_current,
|
||||
reduced_prim == PIPE_PRIM_LINES,
|
||||
reduced_prim == PIPE_PRIM_POINTS);
|
||||
|
||||
|
|
@ -2008,6 +2000,8 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||
assert(batch->encoder_current <= batch->encoder_end &&
|
||||
"Failed to reserve sufficient space in encoder");
|
||||
ctx->dirty = 0;
|
||||
|
||||
assert(batch == agx_get_batch(ctx) && "batch should not change under us");
|
||||
}
|
||||
|
||||
void agx_init_state_functions(struct pipe_context *ctx);
|
||||
|
|
|
|||
|
|
@ -95,6 +95,7 @@ struct agx_array {
|
|||
struct agx_batch {
|
||||
struct agx_context *ctx;
|
||||
struct pipe_framebuffer_state key;
|
||||
uint64_t seqnum;
|
||||
|
||||
/* PIPE_CLEAR_* bitmask */
|
||||
uint32_t clear, draw, load;
|
||||
|
|
@ -174,11 +175,24 @@ enum agx_dirty {
|
|||
AGX_DIRTY_FS_PROG = BITFIELD_BIT(11),
|
||||
};
|
||||
|
||||
#define AGX_MAX_BATCHES (2)
|
||||
|
||||
struct agx_context {
|
||||
struct pipe_context base;
|
||||
struct agx_compiled_shader *vs, *fs;
|
||||
uint32_t dirty;
|
||||
|
||||
/* Set of batches. When full, the LRU entry (the batch with the smallest
|
||||
* seqnum) is flushed to free a slot.
|
||||
*/
|
||||
struct {
|
||||
uint64_t seqnum;
|
||||
struct agx_batch slots[AGX_MAX_BATCHES];
|
||||
|
||||
/** Set of active batches for faster traversal */
|
||||
BITSET_DECLARE(active, AGX_MAX_BATCHES);
|
||||
} batches;
|
||||
|
||||
struct agx_batch *batch;
|
||||
|
||||
struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
|
||||
|
|
@ -204,8 +218,6 @@ struct agx_context {
|
|||
struct util_debug_callback debug;
|
||||
bool is_noop;
|
||||
|
||||
uint8_t render_target[8][AGX_RENDER_TARGET_LENGTH];
|
||||
|
||||
struct blitter_context *blitter;
|
||||
|
||||
/* Map of agx_resource to agx_batch that writes that resource */
|
||||
|
|
@ -350,6 +362,9 @@ uint64_t
|
|||
agx_push_location(struct agx_batch *batch, struct agx_push push,
|
||||
enum pipe_shader_type stage);
|
||||
|
||||
bool
|
||||
agx_batch_is_active(struct agx_batch *batch);
|
||||
|
||||
uint64_t
|
||||
agx_build_clear_pipeline(struct agx_batch *batch, uint32_t code, uint64_t clear_buf);
|
||||
|
||||
|
|
@ -360,6 +375,9 @@ agx_build_store_pipeline(struct agx_batch *batch, uint32_t code,
|
|||
uint64_t
|
||||
agx_build_reload_pipeline(struct agx_batch *batch, uint32_t code, struct pipe_surface *surf);
|
||||
|
||||
uint64_t
|
||||
agx_batch_upload_pbe(struct agx_batch *batch, unsigned rt);
|
||||
|
||||
/* Add a BO to a batch. This needs to be amortized O(1) since it's called in
|
||||
* hot paths. To achieve this we model BO lists by bit sets */
|
||||
|
||||
|
|
@ -383,7 +401,7 @@ agx_batch_add_bo(struct agx_batch *batch, struct agx_bo *bo)
|
|||
{
|
||||
/* Double the size of the BO list if we run out, this is amortized O(1) */
|
||||
if (unlikely(bo->handle > agx_batch_bo_list_bits(batch))) {
|
||||
batch->bo_list.set = rerzalloc(batch, batch->bo_list.set, BITSET_WORD,
|
||||
batch->bo_list.set = rerzalloc(batch->ctx, batch->bo_list.set, BITSET_WORD,
|
||||
batch->bo_list.word_count,
|
||||
batch->bo_list.word_count * 2);
|
||||
batch->bo_list.word_count *= 2;
|
||||
|
|
@ -408,6 +426,7 @@ agx_batch_num_bo(struct agx_batch *batch)
|
|||
BITSET_FOREACH_SET(handle, (batch)->bo_list.set, agx_batch_bo_list_bits(batch))
|
||||
|
||||
void agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch);
|
||||
void agx_flush_all(struct agx_context *ctx, const char *reason);
|
||||
void agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason);
|
||||
void agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason);
|
||||
|
||||
|
|
@ -415,6 +434,9 @@ void agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const
|
|||
void agx_batch_reads(struct agx_batch *batch, struct agx_resource *rsrc);
|
||||
void agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc);
|
||||
|
||||
struct agx_batch *agx_get_batch(struct agx_context *ctx);
|
||||
void agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch);
|
||||
|
||||
/* Blit shaders */
|
||||
void
|
||||
agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter,
|
||||
|
|
@ -426,12 +448,6 @@ void agx_blit(struct pipe_context *pipe,
|
|||
void agx_internal_shaders(struct agx_device *dev);
|
||||
|
||||
/* Batch logic */
|
||||
static void
|
||||
agx_flush_all(struct agx_context *ctx, const char *reason)
|
||||
{
|
||||
perf_debug_ctx(ctx, "Flushing due to: %s\n", reason);
|
||||
ctx->base.flush(&ctx->base, NULL, 0);
|
||||
}
|
||||
|
||||
void
|
||||
agx_batch_init_state(struct agx_batch *batch);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue