panvk: remove debug_syncobjs

We can rely on panvk_per_arch(queue_check_status) to detect device lost.

Because we no longer emit cs_sync32_add from finish_cs to increment
debug syncobj, if an instr between the last draw/dispatch and
end-of-stream causes a CS error, the CS error is ignored.  This is fine
because the instr should have no side effect and the kernel emits
ERROR_BARRIER to recover from the CS error.

If that is undesirable, we can restore the old behavior by emitting
cs_sync64_add from finish_cs to increment regular syncobj (and fix
cs_progress_seqno_reg) when PANVK_DEBUG is set.

Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35606>
This commit is contained in:
Chia-I Wu 2025-06-17 15:41:27 -07:00 committed by Marge Bot
parent 6f21c4a549
commit 7868a551ff
4 changed files with 2 additions and 73 deletions

View file

@ -124,7 +124,6 @@ struct panvk_cs_subqueue_context {
uint32_t layer_count;
} tiler_oom_ctx;
struct {
uint64_t syncobjs;
struct {
uint64_t cs;
} tracebuf;

View file

@ -73,7 +73,6 @@ struct panvk_gpu_queue {
struct panvk_tiler_heap tiler_heap;
struct panvk_desc_ringbuf render_desc_ringbuf;
struct panvk_priv_mem syncobjs;
struct panvk_priv_mem debug_syncobjs;
struct {
struct vk_sync *sync;

View file

@ -155,38 +155,6 @@ finish_cs(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue)
cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH)));
cs_wait_slot(b, SB_ID(IMM_FLUSH));
/* If we're in sync/trace more, we signal the debug object. */
if (instance->debug_flags & (PANVK_DEBUG_SYNC | PANVK_DEBUG_TRACE)) {
struct cs_index debug_sync_addr = cs_scratch_reg64(b, 0);
struct cs_index one = cs_scratch_reg32(b, 2);
struct cs_index error = cs_scratch_reg32(b, 3);
struct cs_index cmp_scratch = cs_scratch_reg32(b, 2);
cs_move32_to(b, one, 1);
cs_load64_to(b, debug_sync_addr, cs_subqueue_ctx_reg(b),
offsetof(struct panvk_cs_subqueue_context, debug.syncobjs));
cs_add64(b, debug_sync_addr, debug_sync_addr,
sizeof(struct panvk_cs_sync32) * subqueue);
cs_load32_to(b, error, debug_sync_addr,
offsetof(struct panvk_cs_sync32, error));
cs_wait_slots(b, dev->csf.sb.all_mask);
if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
cs_sync32_add(b, true, MALI_CS_SYNC_SCOPE_CSG, one,
debug_sync_addr, cs_now());
cs_match(b, error, cmp_scratch) {
cs_case(b, 0) {
/* Do nothing. */
}
cs_default(b) {
/* Overwrite the sync error with the first error we encountered. */
cs_store32(b, error, debug_sync_addr,
offsetof(struct panvk_cs_sync32, error));
cs_flush_stores(b);
}
}
}
/* If this is a secondary command buffer, we don't poison the reg file to
* preserve the render pass context. We also don't poison the reg file if the
* last render pass was suspended. In practice we could preserve only the

View file

@ -381,7 +381,6 @@ init_subqueue(struct panvk_gpu_queue *queue, enum panvk_subqueue_id subqueue)
*cs_ctx = (struct panvk_cs_subqueue_context){
.syncobjs = panvk_priv_mem_dev_addr(queue->syncobjs),
.debug.syncobjs = panvk_priv_mem_dev_addr(queue->debug_syncobjs),
.debug.tracebuf.cs = subq->tracebuf.addr.dev,
#if PAN_ARCH == 10
/* Iterator scoreboard will be picked in CS and wrap back to SB_ITER(0) on
@ -529,7 +528,6 @@ cleanup_queue(struct panvk_gpu_queue *queue)
finish_render_desc_ringbuf(queue);
panvk_pool_free_mem(&queue->debug_syncobjs);
panvk_pool_free_mem(&queue->syncobjs);
}
@ -553,18 +551,6 @@ init_queue(struct panvk_gpu_queue *queue)
return panvk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
"Failed to allocate subqueue sync objects");
if (instance->debug_flags & (PANVK_DEBUG_SYNC | PANVK_DEBUG_TRACE)) {
alloc_info.size =
ALIGN_POT(sizeof(struct panvk_cs_sync32), 64) * PANVK_SUBQUEUE_COUNT,
queue->debug_syncobjs =
panvk_pool_alloc_mem(&dev->mempools.rw_nc, alloc_info);
if (!panvk_priv_mem_host_addr(queue->debug_syncobjs)) {
result = panvk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
"Failed to allocate subqueue sync objects");
goto err_cleanup_queue;
}
}
result = init_render_desc_ringbuf(queue);
if (result != VK_SUCCESS)
goto err_cleanup_queue;
@ -1035,14 +1021,6 @@ panvk_queue_submit_init_signals(struct panvk_queue_submit *submit,
.syncs = DRM_PANTHOR_OBJ_ARRAY(1, &submit->signal_ops[signal_op++]),
};
}
if (submit->force_sync) {
struct panvk_cs_sync32 *debug_syncs =
panvk_priv_mem_host_addr(queue->debug_syncobjs);
assert(debug_syncs);
memset(debug_syncs, 0, sizeof(*debug_syncs) * PANVK_SUBQUEUE_COUNT);
}
}
static VkResult
@ -1182,23 +1160,8 @@ panvk_queue_submit_process_debug(const struct panvk_queue_submit *submit)
pandecode_next_frame(decode_ctx);
/* validate last after the command streams are dumped */
if (submit->force_sync) {
struct panvk_cs_sync32 *debug_syncs =
panvk_priv_mem_host_addr(queue->debug_syncobjs);
uint32_t debug_sync_points[PANVK_SUBQUEUE_COUNT] = {0};
for (uint32_t i = 0; i < submit->qsubmit_count; i++) {
const struct drm_panthor_queue_submit *qsubmit = &submit->qsubmits[i];
if (qsubmit->stream_size)
debug_sync_points[qsubmit->queue_index]++;
}
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) {
if (debug_syncs[i].seqno != debug_sync_points[i] ||
debug_syncs[i].error != 0)
vk_queue_set_lost(&queue->vk, "Incomplete job or timeout");
}
}
if (submit->force_sync)
panvk_per_arch(gpu_queue_check_status)(&queue->vk);
}
VkResult