diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_barrier.c b/src/gallium/drivers/freedreno/a6xx/fd6_barrier.c new file mode 100644 index 00000000000..6f82b3a1024 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_barrier.c @@ -0,0 +1,198 @@ +/* + * Copyright © 2023 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define FD_BO_NO_HARDPIN 1 + +#include "freedreno_batch.h" + +#include "fd6_barrier.h" +#include "fd6_context.h" + +/* TODO probably more of the various fd6_event_write() should be + * consolidated here. + */ + +static uint32_t +event_write(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum vgt_event_type evt) +{ + bool timestamp = false; + switch (evt) { + case CACHE_FLUSH_TS: + case WT_DONE_TS: + case RB_DONE_TS: + case PC_CCU_FLUSH_DEPTH_TS: + case PC_CCU_FLUSH_COLOR_TS: + case PC_CCU_RESOLVE_TS: + timestamp = true; + break; + default: + break; + } + + OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1); + OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt)); + if (timestamp) { + struct fd6_context *fd6_ctx = fd6_context(ctx); + uint32_t seqno = ++fd6_ctx->seqno; + OUT_RELOC(ring, control_ptr(fd6_ctx, seqno)); /* ADDR_LO/HI */ + OUT_RING(ring, seqno); + + return seqno; + } + + return 0; +} + +static void +fd6_emit_flushes(struct fd_context *ctx, struct fd_ringbuffer *ring, + unsigned flushes) +{ + /* Experiments show that invalidating CCU while it still has data in it + * doesn't work, so make sure to always flush before invalidating in case + * any data remains that hasn't yet been made available through a barrier. + * However it does seem to work for UCHE. + */ + if (flushes & (FD6_FLUSH_CCU_COLOR | FD6_INVALIDATE_CCU_COLOR)) + event_write(ctx, ring, PC_CCU_FLUSH_COLOR_TS); + + if (flushes & (FD6_FLUSH_CCU_DEPTH | FD6_INVALIDATE_CCU_DEPTH)) + event_write(ctx, ring, PC_CCU_FLUSH_DEPTH_TS); + + if (flushes & FD6_INVALIDATE_CCU_COLOR) + event_write(ctx, ring, PC_CCU_INVALIDATE_COLOR); + + if (flushes & FD6_INVALIDATE_CCU_DEPTH) + event_write(ctx, ring, PC_CCU_INVALIDATE_DEPTH); + + if (flushes & FD6_FLUSH_CACHE) + event_write(ctx, ring, CACHE_FLUSH_TS); + + if (flushes & FD6_INVALIDATE_CACHE) + event_write(ctx, ring, CACHE_INVALIDATE); + + if (flushes & FD6_WAIT_MEM_WRITES) + OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0); + + if ((flushes & FD6_WAIT_FOR_IDLE) || + (ctx->screen->info->a6xx.has_ccu_flush_bug && + (flushes & (FD6_FLUSH_CCU_COLOR | FD6_FLUSH_CCU_DEPTH)))) + OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); + + if (flushes & FD6_WAIT_FOR_ME) + OUT_PKT7(ring, CP_WAIT_FOR_ME, 0); +} + +void +fd6_barrier_flush(struct fd_batch *batch) +{ + fd6_emit_flushes(batch->ctx, batch->draw, batch->barrier); + batch->barrier = 0; +} + +static void +add_flushes(struct pipe_context *pctx, unsigned flushes) + assert_dt +{ + struct fd_batch *batch = NULL; + + fd_batch_reference(&batch, fd_context(pctx)->batch); + + /* A batch flush is already a sufficient barrier: */ + if (!batch) + return; + + batch->barrier |= flushes; + + fd_batch_reference(&batch, NULL); +} + +static void +fd6_texture_barrier(struct pipe_context *pctx, unsigned flags) + in_dt +{ + unsigned flushes = 0; + + if (flags & PIPE_TEXTURE_BARRIER_SAMPLER) { + /* If we are sampling from the fb, we could get away with treating + * this as a PIPE_TEXTURE_BARRIER_FRAMEBUFFER in sysmem mode, but + * that won't work out in gmem mode because we don't patch the tex + * state outside of the case that the frag shader tells us it is + * an fb-read. And in particular, the fb-read case guarantees us + * that the read will be from the same texel, but the fb-bound-as- + * tex case does not. + * + * We could try to be clever here and detect if zsbuf/cbuf[n] is + * bound as a texture, but that doesn't really help if it is bound + * as a texture after the barrier without a lot of extra book- + * keeping. So hopefully no one calls glTextureBarrierNV() just + * for lolz. + */ + pctx->flush(pctx, NULL, 0); + return; + } + + if (flags & PIPE_TEXTURE_BARRIER_FRAMEBUFFER) { + flushes |= FD6_WAIT_FOR_IDLE | FD6_WAIT_FOR_ME | + FD6_FLUSH_CCU_COLOR | FD6_FLUSH_CCU_DEPTH | + FD6_FLUSH_CACHE | FD6_INVALIDATE_CACHE; + } + + add_flushes(pctx, flushes); +} + +static void +fd6_memory_barrier(struct pipe_context *pctx, unsigned flags) + in_dt +{ + unsigned flushes = 0; + + if (flags & (PIPE_BARRIER_SHADER_BUFFER | + PIPE_BARRIER_IMAGE | + PIPE_BARRIER_CONSTANT_BUFFER | + PIPE_BARRIER_VERTEX_BUFFER | + PIPE_BARRIER_INDEX_BUFFER | + PIPE_BARRIER_STREAMOUT_BUFFER)) { + flushes |= FD6_WAIT_FOR_IDLE; + } + + if (flags & (PIPE_BARRIER_TEXTURE | + PIPE_BARRIER_INDIRECT_BUFFER | + PIPE_BARRIER_UPDATE_BUFFER | + PIPE_BARRIER_UPDATE_TEXTURE)) { + flushes |= FD6_FLUSH_CACHE | FD6_WAIT_FOR_IDLE; + } + + if (flags & PIPE_BARRIER_FRAMEBUFFER) { + fd6_texture_barrier(pctx, PIPE_TEXTURE_BARRIER_FRAMEBUFFER); + } + + add_flushes(pctx, flushes); +} + +void +fd6_barrier_init(struct pipe_context *pctx) +{ + pctx->texture_barrier = fd6_texture_barrier; + pctx->memory_barrier = fd6_memory_barrier; +} diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_barrier.h b/src/gallium/drivers/freedreno/a6xx/fd6_barrier.h new file mode 100644 index 00000000000..74217d851d3 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_barrier.h @@ -0,0 +1,48 @@ +/* + * Copyright © 2023 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef FD6_BARRIER_H_ +#define FD6_BARRIER_H_ + +#include "freedreno_context.h" + +/** + * Various flush operations that could be needed + */ +enum fd6_flush { + FD6_FLUSH_CCU_COLOR = BIT(0), + FD6_FLUSH_CCU_DEPTH = BIT(1), + FD6_INVALIDATE_CCU_COLOR = BIT(2), + FD6_INVALIDATE_CCU_DEPTH = BIT(3), + FD6_FLUSH_CACHE = BIT(4), + FD6_INVALIDATE_CACHE = BIT(5), + FD6_WAIT_MEM_WRITES = BIT(6), + FD6_WAIT_FOR_IDLE = BIT(7), + FD6_WAIT_FOR_ME = BIT(8), +}; + +void fd6_barrier_flush(struct fd_batch *batch) assert_dt; + +void fd6_barrier_init(struct pipe_context *pctx); + +#endif /* FD6_BARRIER_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c index 0414971c3e7..bf4c377079d 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c @@ -33,6 +33,7 @@ #include "freedreno_resource.h" #include "freedreno_tracepoints.h" +#include "fd6_barrier.h" #include "fd6_compute.h" #include "fd6_const.h" #include "fd6_context.h" @@ -181,6 +182,9 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */ OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */ + if (ctx->batch->barrier) + fd6_barrier_flush(ctx->batch); + if (info->indirect) { struct fd_resource *rsc = fd_resource(info->indirect); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.c b/src/gallium/drivers/freedreno/a6xx/fd6_context.c index 8fa3d974d88..1a14afbe126 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.c @@ -30,6 +30,7 @@ #include "freedreno_query_acc.h" #include "freedreno_state.h" +#include "fd6_barrier.h" #include "fd6_blend.h" #include "fd6_blitter.h" #include "fd6_compute.h" @@ -229,7 +230,6 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv, fd6_gmem_init(pctx); fd6_texture_init(pctx); fd6_prog_init(pctx); - fd6_emit_init(pctx); fd6_query_context_init(pctx); setup_state_map(&fd6_ctx->base); @@ -243,6 +243,9 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv, /* after fd_context_init() to override set_shader_images() */ fd6_image_init(pctx); + /* after fd_context_init() to override memory_barrier/texture_barrier(): */ + fd6_barrier_init(pctx); + util_blitter_set_texture_multisample(fd6_ctx->base.blitter, true); pctx->delete_vertex_elements_state = fd6_vertex_state_delete; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c index 525dcd81468..c82d9a52092 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c @@ -35,6 +35,7 @@ #include "freedreno_resource.h" #include "freedreno_state.h" +#include "fd6_barrier.h" #include "fd6_context.h" #include "fd6_draw.h" #include "fd6_emit.h" @@ -326,6 +327,9 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, if (emit.dirty_groups) fd6_emit_3d_state(ring, &emit); + if (ctx->batch->barrier) + fd6_barrier_flush(ctx->batch); + /* for debug after a lock up, write a unique counter value * to scratch7 for each draw, to make it easier to match up * register dumps to cmdstream. The combination of IB diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 98577637fc8..3ce2d0e36e2 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -1131,48 +1131,6 @@ fd6_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst, } } -/* this is *almost* the same as fd6_cache_flush().. which I guess - * could be re-worked to be something a bit more generic w/ param - * indicating what needs to be flushed.. although that would mean - * figuring out which events trigger what state to flush.. - */ -static void -fd6_framebuffer_barrier(struct fd_context *ctx) assert_dt -{ - struct fd6_context *fd6_ctx = fd6_context(ctx); - struct fd_batch *batch = fd_context_batch_locked(ctx); - struct fd_ringbuffer *ring = batch->draw; - unsigned seqno; - - fd_batch_needs_flush(batch); - - seqno = fd6_event_write(batch, ring, RB_DONE_TS, true); - - OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); - OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) | - CP_WAIT_REG_MEM_0_POLL_MEMORY); - OUT_RELOC(ring, control_ptr(fd6_ctx, seqno)); - OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(seqno)); - OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(~0)); - OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16)); - - fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true); - fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true); - - seqno = fd6_event_write(batch, ring, CACHE_FLUSH_TS, true); - fd_wfi(batch, ring); - - fd6_event_write(batch, ring, CACHE_INVALIDATE, false); - - OUT_PKT7(ring, CP_WAIT_MEM_GTE, 4); - OUT_RING(ring, CP_WAIT_MEM_GTE_0_RESERVED(0)); - OUT_RELOC(ring, control_ptr(fd6_ctx, seqno)); - OUT_RING(ring, CP_WAIT_MEM_GTE_3_REF(seqno)); - - fd_batch_unlock_submit(batch); - fd_batch_reference(&batch, NULL); -} - void fd6_emit_init_screen(struct pipe_screen *pscreen) { @@ -1180,10 +1138,3 @@ fd6_emit_init_screen(struct pipe_screen *pscreen) screen->emit_ib = fd6_emit_ib; screen->mem_to_mem = fd6_mem_to_mem; } - -void -fd6_emit_init(struct pipe_context *pctx) disable_thread_safety_analysis -{ - struct fd_context *ctx = fd_context(pctx); - ctx->framebuffer_barrier = fd6_framebuffer_barrier; -} diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index f60d1b82c21..6a67877425c 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -324,7 +324,6 @@ void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring); void fd6_emit_init_screen(struct pipe_screen *pscreen); -void fd6_emit_init(struct pipe_context *pctx); static inline void fd6_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h index bfccc2f0b8f..e045c588400 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.h +++ b/src/gallium/drivers/freedreno/freedreno_batch.h @@ -114,6 +114,9 @@ struct fd_batch { */ const struct fd_gmem_stateobj *gmem_state; + /* Driver specific barrier/flush flags: */ + unsigned barrier; + /* A calculated "draw cost" value for the batch, which tries to * estimate the bandwidth-per-sample of all the draws according * to: diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index b341d2baa8e..937de2277a7 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -156,15 +156,6 @@ out: static void fd_texture_barrier(struct pipe_context *pctx, unsigned flags) in_dt { - if (flags == PIPE_TEXTURE_BARRIER_FRAMEBUFFER) { - struct fd_context *ctx = fd_context(pctx); - - if (ctx->framebuffer_barrier) { - ctx->framebuffer_barrier(ctx); - return; - } - } - /* On devices that could sample from GMEM we could possibly do better. * Or if we knew that we were doing GMEM bypass we could just emit a * cache flush, perhaps? But we don't know if future draws would cause diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 2a3623ab3ff..ebc806f3183 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -510,9 +510,6 @@ struct fd_context { void (*validate_format)(struct fd_context *ctx, struct fd_resource *rsc, enum pipe_format format) dt; - /* handling for barriers: */ - void (*framebuffer_barrier)(struct fd_context *ctx) dt; - /* logger: */ void (*record_timestamp)(struct fd_ringbuffer *ring, struct fd_bo *bo, unsigned offset); diff --git a/src/gallium/drivers/freedreno/meson.build b/src/gallium/drivers/freedreno/meson.build index 9dc2cdbb885..a5bb13ca692 100644 --- a/src/gallium/drivers/freedreno/meson.build +++ b/src/gallium/drivers/freedreno/meson.build @@ -178,6 +178,8 @@ files_libfreedreno = files( 'a5xx/fd5_texture.h', 'a5xx/fd5_zsa.c', 'a5xx/fd5_zsa.h', + 'a6xx/fd6_barrier.c', + 'a6xx/fd6_barrier.h', 'a6xx/fd6_blend.c', 'a6xx/fd6_blend.h', 'a6xx/fd6_blitter.c',