panvk/csf: add PANLIB_BARRIER_CSF_WAIT, to insert WAIT after precomp
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

This is useful for running multiple dependent precomp shaders in a row.
We could do this using PANLIB_BARRIER_CSF_WAIT and then immediately wait
on the syncobj, but it's a little silly to do that on the same subqueue.

Signed-off-by: Olivia Lee <olivia.lee@collabora.com>
Acked-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37971>
This commit is contained in:
Olivia Lee 2025-10-06 10:25:18 -07:00 committed by Marge Bot
parent f5e0d90231
commit 4a2df9127f
2 changed files with 32 additions and 1 deletions

View file

@ -13,6 +13,8 @@ enum panlib_barrier {
PANLIB_BARRIER_JM_SUPPRESS_PREFETCH = (1 << 1),
/* On panvk, increment the syncobj on the compute subqueue */
PANLIB_BARRIER_CSF_SYNC = (1 << 2),
/* On panvk, insert a WAIT on the compute dispatch */
PANLIB_BARRIER_CSF_WAIT = (1 << 3),
};
struct panlib_precomp_grid {

View file

@ -23,8 +23,13 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
enum libpan_shaders_program idx, void *data,
size_t data_size)
{
enum panlib_barrier supported_barriers = PANLIB_BARRIER_CSF_SYNC;
enum panlib_barrier supported_barriers =
PANLIB_BARRIER_CSF_SYNC | PANLIB_BARRIER_CSF_WAIT;
assert(!(barrier & ~supported_barriers) && "Unsupported barrier flags");
assert(!(barrier & PANLIB_BARRIER_CSF_SYNC &&
barrier & PANLIB_BARRIER_CSF_WAIT) &&
"Cannot use both CSF_SYNC and CSF_WAIT barriers simultaneously");
struct panvk_cmd_buffer *cmdbuf = ctx->cmdbuf;
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
@ -164,6 +169,30 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
#endif
++cmdbuf->state.cs[PANVK_SUBQUEUE_COMPUTE].relative_sync_point;
} else if (barrier & PANLIB_BARRIER_CSF_WAIT) {
#if PAN_ARCH >= 11
cs_wait_indirect(b);
#else
struct cs_index iter_sb = cs_scratch_reg32(b, 0);
struct cs_index cmp_scratch = cs_scratch_reg32(b, 1);
cs_load32_to(b, iter_sb, cs_subqueue_ctx_reg(b),
offsetof(struct panvk_cs_subqueue_context, iter_sb));
cs_match(b, iter_sb, cmp_scratch) {
#define CASE(x) \
cs_case(b, SB_ITER(x)) { \
cs_wait_slot(b, SB_ITER(x)); \
}
CASE(0)
CASE(1)
CASE(2)
CASE(3)
CASE(4)
#undef CASE
}
#endif
}
/* XXX: clobbers the registers instead to avoid recreating them when calling