panvk: Use WB maps for command buffer memory

Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36385>
This commit is contained in:
Faith Ekstrand 2025-07-28 16:55:45 -04:00 committed by Boris Brezillon
parent f860c7bdf1
commit 2dd27c647b
4 changed files with 45 additions and 8 deletions

View file

@ -267,6 +267,9 @@ panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer)
}
}
panvk_pool_flush_maps(&cmdbuf->cs_pool);
panvk_pool_flush_maps(&cmdbuf->desc_pool);
return vk_command_buffer_end(&cmdbuf->vk);
}
@ -924,7 +927,8 @@ panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level,
&cmdbuf->state.gfx.dynamic.sl;
struct panvk_pool_properties cs_pool_props = {
.create_flags = 0,
.create_flags =
panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_WB_MMAP),
.slab_size = 64 * 1024,
.label = "Command buffer CS pool",
.prealloc = false,
@ -934,7 +938,8 @@ panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level,
panvk_pool_init(&cmdbuf->cs_pool, device, &pool->cs_bo_pool, NULL, &cs_pool_props);
struct panvk_pool_properties desc_pool_props = {
.create_flags = 0,
.create_flags =
panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_WB_MMAP),
.slab_size = 64 * 1024,
.label = "Command buffer descriptor pool",
.prealloc = false,

View file

@ -1200,15 +1200,28 @@ panvk_queue_submit_process_signals(struct panvk_queue_submit *submit,
}
static void
panvk_queue_submit_process_debug(const struct panvk_queue_submit *submit)
panvk_queue_submit_process_debug(const struct panvk_queue_submit *submit,
const struct vk_queue_submit *vk_submit)
{
struct panvk_gpu_queue *queue = submit->queue;
struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
struct pandecode_context *decode_ctx = submit->dev->debug.decode_ctx;
if (PANVK_DEBUG(TRACE)) {
const struct pan_kmod_dev_props *props =
&submit->phys_dev->kmod.dev->props;
/* First we invalidate all desc buffers to make sure we see GPU updates
* on those. */
for (uint32_t i = 0; i < vk_submit->command_buffer_count; i++) {
struct panvk_cmd_buffer *cmdbuf = container_of(
vk_submit->command_buffers[i], struct panvk_cmd_buffer, vk);
panvk_pool_invalidate_maps(&cmdbuf->desc_pool);
}
pan_kmod_flush_bo_map_syncs(dev->kmod.dev);
for (uint32_t i = 0; i < submit->qsubmit_count; i++) {
const struct drm_panthor_queue_submit *qsubmit = &submit->qsubmits[i];
if (!qsubmit->stream_size)
@ -1282,7 +1295,7 @@ panvk_per_arch(gpu_queue_submit)(struct vk_queue *vk_queue, struct vk_queue_subm
goto out;
panvk_queue_submit_process_signals(&submit, vk_submit);
panvk_queue_submit_process_debug(&submit);
panvk_queue_submit_process_debug(&submit, vk_submit);
out:
panvk_queue_submit_cleanup_storage(&submit, &stack_storage);

View file

@ -319,6 +319,8 @@ panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer)
panvk_per_arch(cmd_close_batch)(cmdbuf);
panvk_pool_flush_maps(&cmdbuf->desc_pool);
return vk_command_buffer_end(&cmdbuf->vk);
}
@ -434,7 +436,8 @@ panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level,
&cmdbuf->state.gfx.dynamic.sl;
struct panvk_pool_properties desc_pool_props = {
.create_flags = 0,
.create_flags =
panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_WB_MMAP),
.slab_size = 64 * 1024,
.label = "Command buffer descriptor pool",
.prealloc = true,

View file

@ -30,8 +30,10 @@
#include "drm-uapi/panfrost_drm.h"
static void
panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batch,
uint32_t *bos, unsigned nr_bos, uint32_t *in_fences,
panvk_queue_submit_batch(struct panvk_gpu_queue *queue,
struct panvk_cmd_buffer *cmdbuf,
struct panvk_batch *batch, uint32_t *bos,
unsigned nr_bos, uint32_t *in_fences,
unsigned nr_in_fences)
{
struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
@ -54,6 +56,10 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc
for (uint32_t i = 0; i < batch->fb.layer_count; i++)
memcpy(&ctxs[i], &batch->tiler.ctx_templ, sizeof(*ctxs));
}
/* We don't keep track of BO <-> job relationship, so let's just flush the
* whole desc pool for now. */
panvk_pool_flush_maps(&cmdbuf->desc_pool);
}
/* Flush pending synchronization requests before submitting the job, to
@ -77,6 +83,11 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc
ret = drmSyncobjWait(dev->drm_fd, &submit.out_sync, 1, INT64_MAX, 0,
NULL);
assert(!ret);
/* If we want to read the descriptors back, we need to invalidate the
* whole desc pool, otherwise we might end up with stale data. */
panvk_pool_invalidate_maps(&cmdbuf->desc_pool);
pan_kmod_flush_bo_map_syncs(dev->kmod.dev);
}
if (PANVK_DEBUG(TRACE)) {
@ -115,6 +126,11 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc
ret = drmSyncobjWait(dev->drm_fd, &submit.out_sync, 1, INT64_MAX, 0,
NULL);
assert(!ret);
/* If we want to read the descriptors back, we need to invalidate the
* whole desc pool, otherwise we might end up with stale data. */
panvk_pool_invalidate_maps(&cmdbuf->desc_pool);
pan_kmod_flush_bo_map_syncs(dev->kmod.dev);
}
if (PANVK_DEBUG(TRACE))
@ -288,7 +304,7 @@ panvk_per_arch(gpu_queue_submit)(struct vk_queue *vk_queue, struct vk_queue_subm
panvk_add_wait_event_syncobjs(batch, in_fences, &nr_in_fences);
panvk_queue_submit_batch(queue, batch, bos, nr_bos, in_fences,
panvk_queue_submit_batch(queue, cmdbuf, batch, bos, nr_bos, in_fences,
nr_in_fences);
panvk_signal_event_syncobjs(queue, batch);