freedreno/a6xx: Split preamble for gmem vs sysmem

Possibly overkill currently, if we only preempt on bin boundaries.  But
might as well be complete in case that ever changes on the kernel side.

Signed-off-by: Rob Clark <rob.clark@oss.qualcomm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38450>
This commit is contained in:
Rob Clark 2025-11-02 09:37:29 -08:00 committed by Marge Bot
parent 701eb1ce8d
commit e0b6f97b9c
5 changed files with 35 additions and 22 deletions

View file

@ -44,8 +44,11 @@ fd6_context_destroy(struct pipe_context *pctx) in_dt
if (fd6_ctx->sample_locations_disable_stateobj)
fd_ringbuffer_del(fd6_ctx->sample_locations_disable_stateobj);
if (fd6_ctx->preamble)
fd_ringbuffer_del(fd6_ctx->preamble);
if (fd6_ctx->sysmem_preamble)
fd_ringbuffer_del(fd6_ctx->sysmem_preamble);
if (fd6_ctx->gmem_preamble)
fd_ringbuffer_del(fd6_ctx->gmem_preamble);
if (fd6_ctx->restore)
fd_ringbuffer_del(fd6_ctx->restore);
@ -315,7 +318,8 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv,
fd6_ctx->sample_locations_disable_stateobj = crb;
fd6_ctx->preamble = fd6_build_preemption_preamble<CHIP>(&fd6_ctx->base);
fd6_ctx->sysmem_preamble = fd6_build_preemption_preamble<CHIP>(&fd6_ctx->base, false);
fd6_ctx->gmem_preamble = fd6_build_preemption_preamble<CHIP>(&fd6_ctx->base, true);
fd_cs restore(fd6_ctx->base.pipe, 0x1000);
fd6_emit_static_regs<CHIP>(restore, &fd6_ctx->base);

View file

@ -94,7 +94,8 @@ struct fd6_context {
struct fd_ringbuffer *sample_locations_disable_stateobj;
/* pre-baked stateobj for preamble: */
struct fd_ringbuffer *preamble, *restore;
struct fd_ringbuffer *sysmem_preamble, *gmem_preamble;
struct fd_ringbuffer *restore;
/* storage for ctx->last.key: */
struct ir3_shader_key last_key;

View file

@ -1109,20 +1109,6 @@ fd6_emit_restore(fd_cs &cs, struct fd_batch *batch)
fd6_emit_ib<CHIP>(cs, fd6_context(ctx)->restore);
fd6_emit_gmem_cache_cntl<CHIP>(cs, screen, false);
uint32_t dwords;
fd_pkt7(cs, CP_SET_AMBLE, 3)
.add(fd6_context(ctx)->preamble, 0, &dwords)
.add(CP_SET_AMBLE_2(.dwords = dwords, .type = BIN_PREAMBLE_AMBLE_TYPE));
fd_pkt7(cs, CP_SET_AMBLE, 3)
.add(CP_SET_AMBLE_ADDR())
.add(CP_SET_AMBLE_2(.type = PREAMBLE_AMBLE_TYPE));
fd_pkt7(cs, CP_SET_AMBLE, 3)
.add(CP_SET_AMBLE_ADDR())
.add(CP_SET_AMBLE_2(.type = POSTAMBLE_AMBLE_TYPE));
if (!batch->nondraw) {
trace_end_state_restore(&batch->trace, cs);
}

View file

@ -1050,17 +1050,17 @@ fd7_emit_static_binning_regs(fd_cs &cs, bool gmem)
template <chip CHIP>
struct fd_ringbuffer *
fd6_build_preemption_preamble(struct fd_context *ctx)
fd6_build_preemption_preamble(struct fd_context *ctx, bool gmem)
{
struct fd_screen *screen = ctx->screen;
fd_cs cs(ctx->pipe, 0x1000);
fd6_emit_static_regs<CHIP>(cs, ctx);
fd6_emit_gmem_cache_cntl<CHIP>(cs, screen, true);
fd6_emit_gmem_cache_cntl<CHIP>(cs, screen, gmem);
if (CHIP >= A7XX) {
fd7_emit_static_binning_regs<CHIP>(cs, true);
fd7_emit_static_binning_regs<CHIP>(cs, gmem);
}
/* TODO use CP_MEM_TO_SCRATCH_MEM on a7xx. The VSC scratch mem should be
@ -1080,6 +1080,26 @@ fd6_build_preemption_preamble(struct fd_context *ctx)
}
FD_GENX(fd6_build_preemption_preamble);
static void
emit_preamble(struct fd_context *ctx, fd_cs &cs, bool gmem)
{
struct fd6_context *fd6_ctx = fd6_context(ctx);
struct fd_ringbuffer *preamble = gmem ? fd6_ctx->gmem_preamble : fd6_ctx->sysmem_preamble;
uint32_t dwords;
fd_pkt7(cs, CP_SET_AMBLE, 3)
.add(preamble, 0, &dwords)
.add(CP_SET_AMBLE_2(.dwords = dwords, .type = BIN_PREAMBLE_AMBLE_TYPE));
fd_pkt7(cs, CP_SET_AMBLE, 3)
.add(CP_SET_AMBLE_ADDR())
.add(CP_SET_AMBLE_2(.type = PREAMBLE_AMBLE_TYPE));
fd_pkt7(cs, CP_SET_AMBLE, 3)
.add(CP_SET_AMBLE_ADDR())
.add(CP_SET_AMBLE_2(.type = POSTAMBLE_AMBLE_TYPE));
}
/* before first tile */
template <chip CHIP>
static void
@ -1093,6 +1113,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt
emit_lrz_clears<CHIP>(batch);
fd6_emit_restore<CHIP>(cs, batch);
emit_preamble(batch->ctx, cs, true);
fd6_event_write<CHIP>(batch->ctx, cs, FD_LRZ_FLUSH);
@ -1973,6 +1994,7 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
emit_lrz_clears<CHIP>(batch);
fd6_emit_restore<CHIP>(cs, batch);
emit_preamble(batch->ctx, cs, true);
fd6_event_write<CHIP>(batch->ctx, cs, FD_LRZ_FLUSH);
if (batch->prologue) {

View file

@ -13,7 +13,7 @@
#include "pipe/p_context.h"
template <chip CHIP>
struct fd_ringbuffer *fd6_build_preemption_preamble(struct fd_context *ctx);
struct fd_ringbuffer *fd6_build_preemption_preamble(struct fd_context *ctx, bool gmem);
template <chip CHIP>
void fd6_gmem_init(struct pipe_context *pctx);