From 2dd27c647bef63bc92397f86328cf6f2bfedb7ec Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 28 Jul 2025 16:55:45 -0400 Subject: [PATCH] panvk: Use WB maps for command buffer memory Reviewed-by: Boris Brezillon Reviewed-by: Christoph Pillmayer Part-of: --- src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c | 9 ++++++-- src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c | 17 ++++++++++++-- src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c | 5 ++++- src/panfrost/vulkan/jm/panvk_vX_gpu_queue.c | 22 ++++++++++++++++--- 4 files changed, 45 insertions(+), 8 deletions(-) diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c index 12ba5e22fe4..804add2d7b7 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c @@ -267,6 +267,9 @@ panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer) } } + panvk_pool_flush_maps(&cmdbuf->cs_pool); + panvk_pool_flush_maps(&cmdbuf->desc_pool); + return vk_command_buffer_end(&cmdbuf->vk); } @@ -924,7 +927,8 @@ panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level, &cmdbuf->state.gfx.dynamic.sl; struct panvk_pool_properties cs_pool_props = { - .create_flags = 0, + .create_flags = + panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_WB_MMAP), .slab_size = 64 * 1024, .label = "Command buffer CS pool", .prealloc = false, @@ -934,7 +938,8 @@ panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level, panvk_pool_init(&cmdbuf->cs_pool, device, &pool->cs_bo_pool, NULL, &cs_pool_props); struct panvk_pool_properties desc_pool_props = { - .create_flags = 0, + .create_flags = + panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_WB_MMAP), .slab_size = 64 * 1024, .label = "Command buffer descriptor pool", .prealloc = false, diff --git a/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c b/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c index 85388092649..1368f1fab2a 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c +++ b/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c @@ -1200,15 +1200,28 @@ panvk_queue_submit_process_signals(struct panvk_queue_submit *submit, } static void -panvk_queue_submit_process_debug(const struct panvk_queue_submit *submit) +panvk_queue_submit_process_debug(const struct panvk_queue_submit *submit, + const struct vk_queue_submit *vk_submit) { struct panvk_gpu_queue *queue = submit->queue; + struct panvk_device *dev = to_panvk_device(queue->vk.base.device); struct pandecode_context *decode_ctx = submit->dev->debug.decode_ctx; if (PANVK_DEBUG(TRACE)) { const struct pan_kmod_dev_props *props = &submit->phys_dev->kmod.dev->props; + /* First we invalidate all desc buffers to make sure we see GPU updates + * on those. */ + for (uint32_t i = 0; i < vk_submit->command_buffer_count; i++) { + struct panvk_cmd_buffer *cmdbuf = container_of( + vk_submit->command_buffers[i], struct panvk_cmd_buffer, vk); + + panvk_pool_invalidate_maps(&cmdbuf->desc_pool); + } + + pan_kmod_flush_bo_map_syncs(dev->kmod.dev); + for (uint32_t i = 0; i < submit->qsubmit_count; i++) { const struct drm_panthor_queue_submit *qsubmit = &submit->qsubmits[i]; if (!qsubmit->stream_size) @@ -1282,7 +1295,7 @@ panvk_per_arch(gpu_queue_submit)(struct vk_queue *vk_queue, struct vk_queue_subm goto out; panvk_queue_submit_process_signals(&submit, vk_submit); - panvk_queue_submit_process_debug(&submit); + panvk_queue_submit_process_debug(&submit, vk_submit); out: panvk_queue_submit_cleanup_storage(&submit, &stack_storage); diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c index 9f41a13077e..8b5ddbfde8b 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c @@ -319,6 +319,8 @@ panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer) panvk_per_arch(cmd_close_batch)(cmdbuf); + panvk_pool_flush_maps(&cmdbuf->desc_pool); + return vk_command_buffer_end(&cmdbuf->vk); } @@ -434,7 +436,8 @@ panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level, &cmdbuf->state.gfx.dynamic.sl; struct panvk_pool_properties desc_pool_props = { - .create_flags = 0, + .create_flags = + panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_WB_MMAP), .slab_size = 64 * 1024, .label = "Command buffer descriptor pool", .prealloc = true, diff --git a/src/panfrost/vulkan/jm/panvk_vX_gpu_queue.c b/src/panfrost/vulkan/jm/panvk_vX_gpu_queue.c index 70e822c3b9f..95bee2974c7 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_gpu_queue.c +++ b/src/panfrost/vulkan/jm/panvk_vX_gpu_queue.c @@ -30,8 +30,10 @@ #include "drm-uapi/panfrost_drm.h" static void -panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batch, - uint32_t *bos, unsigned nr_bos, uint32_t *in_fences, +panvk_queue_submit_batch(struct panvk_gpu_queue *queue, + struct panvk_cmd_buffer *cmdbuf, + struct panvk_batch *batch, uint32_t *bos, + unsigned nr_bos, uint32_t *in_fences, unsigned nr_in_fences) { struct panvk_device *dev = to_panvk_device(queue->vk.base.device); @@ -54,6 +56,10 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc for (uint32_t i = 0; i < batch->fb.layer_count; i++) memcpy(&ctxs[i], &batch->tiler.ctx_templ, sizeof(*ctxs)); } + + /* We don't keep track of BO <-> job relationship, so let's just flush the + * whole desc pool for now. */ + panvk_pool_flush_maps(&cmdbuf->desc_pool); } /* Flush pending synchronization requests before submitting the job, to @@ -77,6 +83,11 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc ret = drmSyncobjWait(dev->drm_fd, &submit.out_sync, 1, INT64_MAX, 0, NULL); assert(!ret); + + /* If we want to read the descriptors back, we need to invalidate the + * whole desc pool, otherwise we might end up with stale data. */ + panvk_pool_invalidate_maps(&cmdbuf->desc_pool); + pan_kmod_flush_bo_map_syncs(dev->kmod.dev); } if (PANVK_DEBUG(TRACE)) { @@ -115,6 +126,11 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc ret = drmSyncobjWait(dev->drm_fd, &submit.out_sync, 1, INT64_MAX, 0, NULL); assert(!ret); + + /* If we want to read the descriptors back, we need to invalidate the + * whole desc pool, otherwise we might end up with stale data. */ + panvk_pool_invalidate_maps(&cmdbuf->desc_pool); + pan_kmod_flush_bo_map_syncs(dev->kmod.dev); } if (PANVK_DEBUG(TRACE)) @@ -288,7 +304,7 @@ panvk_per_arch(gpu_queue_submit)(struct vk_queue *vk_queue, struct vk_queue_subm panvk_add_wait_event_syncobjs(batch, in_fences, &nr_in_fences); - panvk_queue_submit_batch(queue, batch, bos, nr_bos, in_fences, + panvk_queue_submit_batch(queue, cmdbuf, batch, bos, nr_bos, in_fences, nr_in_fences); panvk_signal_event_syncobjs(queue, batch);