panvk: Make most end work instrumentation synchronous

Most of it is tracking stuff that is ending synchronously anyways.
For example, in emit_barrier_insert_waits, cs_sync64_wait is sync and
therefore there is no need to defer the timestamp write on any SBs.

Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
Reviewed-by: Olivia Lee <olivia.lee@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36161>
This commit is contained in:
Christoph Pillmayer 2025-07-15 14:08:18 +00:00 committed by Marge Bot
parent 8255af9f54
commit 9c104fa026
5 changed files with 44 additions and 20 deletions

View file

@ -67,3 +67,11 @@ void panvk_per_arch(panvk_instr_end_work)(
enum panvk_subqueue_id id, struct panvk_cmd_buffer *cmdbuf,
enum panvk_instr_work_type work_type,
const struct panvk_instr_end_args *const args);
/**
* Mark the end of async work with an immediate scoreboard mask.
*/
void panvk_per_arch(panvk_instr_end_work_async)(
enum panvk_subqueue_id id, struct panvk_cmd_buffer *cmdbuf,
enum panvk_instr_work_type work_type,
const struct panvk_instr_end_args *const args, unsigned int wait_mask);

View file

@ -380,9 +380,10 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
.group_size_y = shader->cs.local_size.y,
.group_size_z = shader->cs.local_size.z,
}};
panvk_per_arch(panvk_instr_end_work)(PANVK_SUBQUEUE_COMPUTE, cmdbuf,
PANVK_INSTR_WORK_TYPE_DISPATCH,
&instr_info);
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
panvk_per_arch(panvk_instr_end_work_async)(
PANVK_SUBQUEUE_COMPUTE, cmdbuf, PANVK_INSTR_WORK_TYPE_DISPATCH,
&instr_info, dev->csf.sb.all_iters_mask);
}
VKAPI_ATTR void VKAPI_CALL
@ -401,10 +402,11 @@ panvk_per_arch(CmdDispatchIndirect)(VkCommandBuffer commandBuffer,
cmd_dispatch(cmdbuf, &info);
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
struct panvk_instr_end_args instr_info = {.dispatch_indirect = {
.buffer_gpu = buffer_gpu,
}};
panvk_per_arch(panvk_instr_end_work)(PANVK_SUBQUEUE_COMPUTE, cmdbuf,
PANVK_INSTR_WORK_TYPE_DISPATCH_INDIRECT,
&instr_info);
panvk_per_arch(panvk_instr_end_work_async)(
PANVK_SUBQUEUE_COMPUTE, cmdbuf, PANVK_INSTR_WORK_TYPE_DISPATCH_INDIRECT,
&instr_info, dev->csf.sb.all_iters_mask);
}

View file

@ -3445,10 +3445,11 @@ panvk_per_arch(CmdEndRendering)(VkCommandBuffer commandBuffer)
.flags = cmdbuf->state.gfx.render.flags,
.fb = &cmdbuf->state.gfx.render.fb.info,
}};
panvk_per_arch(panvk_instr_end_work)(PANVK_SUBQUEUE_VERTEX_TILER, cmdbuf,
PANVK_INSTR_WORK_TYPE_RENDER,
&instr_info);
panvk_per_arch(panvk_instr_end_work)(PANVK_SUBQUEUE_FRAGMENT, cmdbuf,
PANVK_INSTR_WORK_TYPE_RENDER,
&instr_info);
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
panvk_per_arch(panvk_instr_end_work_async)(
PANVK_SUBQUEUE_VERTEX_TILER, cmdbuf, PANVK_INSTR_WORK_TYPE_RENDER,
&instr_info, dev->csf.sb.all_iters_mask);
panvk_per_arch(panvk_instr_end_work_async)(
PANVK_SUBQUEUE_FRAGMENT, cmdbuf, PANVK_INSTR_WORK_TYPE_RENDER,
&instr_info, dev->csf.sb.all_iters_mask);
}

View file

@ -108,10 +108,18 @@ panvk_per_arch(panvk_instr_end_work)(
enum panvk_instr_work_type work_type,
const struct panvk_instr_end_args *const args)
{
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
panvk_per_arch(panvk_instr_end_work_async)(id, cmdbuf, work_type, args, 0);
}
void
panvk_per_arch(panvk_instr_end_work_async)(
enum panvk_subqueue_id id, struct panvk_cmd_buffer *cmdbuf,
enum panvk_instr_work_type work_type,
const struct panvk_instr_end_args *const args, unsigned int wait_mask)
{
struct panvk_utrace_cs_info cs_info = {
.cmdbuf = cmdbuf,
.ts_wait_mask = dev->csf.sb.all_iters_mask | SB_MASK(DEFERRED_FLUSH),
.ts_wait_mask = wait_mask,
};
switch (work_type) {

View file

@ -65,8 +65,10 @@ panvk_per_arch(cmd_meta_compute_end)(
cmdbuf->state.compute.desc_state.push_sets[0];
#if PAN_ARCH >= 10
panvk_per_arch(panvk_instr_end_work)(PANVK_SUBQUEUE_COMPUTE, cmdbuf,
PANVK_INSTR_WORK_TYPE_META, NULL);
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
panvk_per_arch(panvk_instr_end_work_async)(PANVK_SUBQUEUE_COMPUTE, cmdbuf,
PANVK_INSTR_WORK_TYPE_META, NULL,
dev->csf.sb.all_iters_mask);
#endif
cmdbuf->state.compute.desc_state.sets[0] = save_ctx->set0;
@ -140,10 +142,13 @@ panvk_per_arch(cmd_meta_gfx_end)(
cmdbuf->state.gfx.desc_state.push_sets[0];
#if PAN_ARCH >= 10
panvk_per_arch(panvk_instr_end_work)(PANVK_SUBQUEUE_VERTEX_TILER, cmdbuf,
PANVK_INSTR_WORK_TYPE_META, NULL);
panvk_per_arch(panvk_instr_end_work)(PANVK_SUBQUEUE_FRAGMENT, cmdbuf,
PANVK_INSTR_WORK_TYPE_META, NULL);
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
panvk_per_arch(panvk_instr_end_work_async)(
PANVK_SUBQUEUE_VERTEX_TILER, cmdbuf, PANVK_INSTR_WORK_TYPE_META, NULL,
dev->csf.sb.all_iters_mask);
panvk_per_arch(panvk_instr_end_work_async)(PANVK_SUBQUEUE_FRAGMENT, cmdbuf,
PANVK_INSTR_WORK_TYPE_META, NULL,
dev->csf.sb.all_iters_mask);
#endif
cmdbuf->state.gfx.desc_state.sets[0] = save_ctx->set0;