From 7868a551fff2834b68d1d5cee275fdca8d482a30 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Tue, 17 Jun 2025 15:41:27 -0700 Subject: [PATCH] panvk: remove debug_syncobjs We can rely on panvk_per_arch(queue_check_status) to detect device lost. Because we no longer emit cs_sync32_add from finish_cs to increment debug syncobj, if an instr between the last draw/dispatch and end-of-stream causes a CS error, the CS error is ignored. This is fine because the instr should have no side effect and the kernel emits ERROR_BARRIER to recover from the CS error. If that is undesirable, we can restore the old behavior by emitting cs_sync64_add from finish_cs to increment regular syncobj (and fix cs_progress_seqno_reg) when PANVK_DEBUG is set. Signed-off-by: Chia-I Wu Reviewed-by: Lars-Ivar Hesselberg Simonsen Part-of: --- src/panfrost/vulkan/csf/panvk_cmd_buffer.h | 1 - src/panfrost/vulkan/csf/panvk_queue.h | 1 - src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c | 32 --------------- src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c | 41 +------------------ 4 files changed, 2 insertions(+), 73 deletions(-) diff --git a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h index 915c5d7f8a2..8cd0542fb00 100644 --- a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h +++ b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h @@ -124,7 +124,6 @@ struct panvk_cs_subqueue_context { uint32_t layer_count; } tiler_oom_ctx; struct { - uint64_t syncobjs; struct { uint64_t cs; } tracebuf; diff --git a/src/panfrost/vulkan/csf/panvk_queue.h b/src/panfrost/vulkan/csf/panvk_queue.h index defd514f56e..b79a1674094 100644 --- a/src/panfrost/vulkan/csf/panvk_queue.h +++ b/src/panfrost/vulkan/csf/panvk_queue.h @@ -73,7 +73,6 @@ struct panvk_gpu_queue { struct panvk_tiler_heap tiler_heap; struct panvk_desc_ringbuf render_desc_ringbuf; struct panvk_priv_mem syncobjs; - struct panvk_priv_mem debug_syncobjs; struct { struct vk_sync *sync; diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c index eb80ae9fbae..9d5d6c802d4 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c @@ -155,38 +155,6 @@ finish_cs(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue) cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH))); cs_wait_slot(b, SB_ID(IMM_FLUSH)); - /* If we're in sync/trace more, we signal the debug object. */ - if (instance->debug_flags & (PANVK_DEBUG_SYNC | PANVK_DEBUG_TRACE)) { - struct cs_index debug_sync_addr = cs_scratch_reg64(b, 0); - struct cs_index one = cs_scratch_reg32(b, 2); - struct cs_index error = cs_scratch_reg32(b, 3); - struct cs_index cmp_scratch = cs_scratch_reg32(b, 2); - - cs_move32_to(b, one, 1); - cs_load64_to(b, debug_sync_addr, cs_subqueue_ctx_reg(b), - offsetof(struct panvk_cs_subqueue_context, debug.syncobjs)); - cs_add64(b, debug_sync_addr, debug_sync_addr, - sizeof(struct panvk_cs_sync32) * subqueue); - cs_load32_to(b, error, debug_sync_addr, - offsetof(struct panvk_cs_sync32, error)); - cs_wait_slots(b, dev->csf.sb.all_mask); - if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) - cs_sync32_add(b, true, MALI_CS_SYNC_SCOPE_CSG, one, - debug_sync_addr, cs_now()); - cs_match(b, error, cmp_scratch) { - cs_case(b, 0) { - /* Do nothing. */ - } - - cs_default(b) { - /* Overwrite the sync error with the first error we encountered. */ - cs_store32(b, error, debug_sync_addr, - offsetof(struct panvk_cs_sync32, error)); - cs_flush_stores(b); - } - } - } - /* If this is a secondary command buffer, we don't poison the reg file to * preserve the render pass context. We also don't poison the reg file if the * last render pass was suspended. In practice we could preserve only the diff --git a/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c b/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c index 2a10041d043..98601f00ea4 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c +++ b/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c @@ -381,7 +381,6 @@ init_subqueue(struct panvk_gpu_queue *queue, enum panvk_subqueue_id subqueue) *cs_ctx = (struct panvk_cs_subqueue_context){ .syncobjs = panvk_priv_mem_dev_addr(queue->syncobjs), - .debug.syncobjs = panvk_priv_mem_dev_addr(queue->debug_syncobjs), .debug.tracebuf.cs = subq->tracebuf.addr.dev, #if PAN_ARCH == 10 /* Iterator scoreboard will be picked in CS and wrap back to SB_ITER(0) on @@ -529,7 +528,6 @@ cleanup_queue(struct panvk_gpu_queue *queue) finish_render_desc_ringbuf(queue); - panvk_pool_free_mem(&queue->debug_syncobjs); panvk_pool_free_mem(&queue->syncobjs); } @@ -553,18 +551,6 @@ init_queue(struct panvk_gpu_queue *queue) return panvk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, "Failed to allocate subqueue sync objects"); - if (instance->debug_flags & (PANVK_DEBUG_SYNC | PANVK_DEBUG_TRACE)) { - alloc_info.size = - ALIGN_POT(sizeof(struct panvk_cs_sync32), 64) * PANVK_SUBQUEUE_COUNT, - queue->debug_syncobjs = - panvk_pool_alloc_mem(&dev->mempools.rw_nc, alloc_info); - if (!panvk_priv_mem_host_addr(queue->debug_syncobjs)) { - result = panvk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, - "Failed to allocate subqueue sync objects"); - goto err_cleanup_queue; - } - } - result = init_render_desc_ringbuf(queue); if (result != VK_SUCCESS) goto err_cleanup_queue; @@ -1035,14 +1021,6 @@ panvk_queue_submit_init_signals(struct panvk_queue_submit *submit, .syncs = DRM_PANTHOR_OBJ_ARRAY(1, &submit->signal_ops[signal_op++]), }; } - - if (submit->force_sync) { - struct panvk_cs_sync32 *debug_syncs = - panvk_priv_mem_host_addr(queue->debug_syncobjs); - - assert(debug_syncs); - memset(debug_syncs, 0, sizeof(*debug_syncs) * PANVK_SUBQUEUE_COUNT); - } } static VkResult @@ -1182,23 +1160,8 @@ panvk_queue_submit_process_debug(const struct panvk_queue_submit *submit) pandecode_next_frame(decode_ctx); /* validate last after the command streams are dumped */ - if (submit->force_sync) { - struct panvk_cs_sync32 *debug_syncs = - panvk_priv_mem_host_addr(queue->debug_syncobjs); - uint32_t debug_sync_points[PANVK_SUBQUEUE_COUNT] = {0}; - - for (uint32_t i = 0; i < submit->qsubmit_count; i++) { - const struct drm_panthor_queue_submit *qsubmit = &submit->qsubmits[i]; - if (qsubmit->stream_size) - debug_sync_points[qsubmit->queue_index]++; - } - - for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) { - if (debug_syncs[i].seqno != debug_sync_points[i] || - debug_syncs[i].error != 0) - vk_queue_set_lost(&queue->vk, "Incomplete job or timeout"); - } - } + if (submit->force_sync) + panvk_per_arch(gpu_queue_check_status)(&queue->vk); } VkResult