From 05006c21dd95d850dd464cb92cbc0e2c33c75ded Mon Sep 17 00:00:00 2001 From: Christoph Pillmayer Date: Wed, 6 Aug 2025 12:54:56 +0000 Subject: [PATCH] panvk/utrace: Alloc utrace copy buf from userspace heap utrace requires some memory to allocate buffers for timestamps and indirect data each submit. It is expensive to allocate it from the kernel each time. Instead, allocate a big bo upfront and hand it out in small pieces later using util_vma_heap. Reviewed-by: Lars-Ivar Hesselberg Simonsen Part-of: --- src/panfrost/vulkan/csf/panvk_vX_utrace.c | 61 +++++++++++++++++++---- src/panfrost/vulkan/panvk_device.h | 5 ++ src/panfrost/vulkan/panvk_utrace.c | 48 ++++++++++++++---- src/panfrost/vulkan/panvk_utrace.h | 6 +++ 4 files changed, 101 insertions(+), 19 deletions(-) diff --git a/src/panfrost/vulkan/csf/panvk_vX_utrace.c b/src/panfrost/vulkan/csf/panvk_vX_utrace.c index e5e7610b1ec..d7c18d5a3c1 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_utrace.c +++ b/src/panfrost/vulkan/csf/panvk_vX_utrace.c @@ -1,5 +1,6 @@ /* * Copyright 2024 Google LLC + * Copyright 2025 Arm Ltd. * SPDX-License-Identifier: MIT */ @@ -10,7 +11,6 @@ #include "genxml/cs_builder.h" #include "panvk_cmd_buffer.h" #include "panvk_device.h" -#include "panvk_priv_bo.h" static void cmd_write_timestamp(const struct panvk_device *dev, struct cs_builder *b, @@ -134,8 +134,8 @@ panvk_utrace_record_ts(struct u_trace *ut, void *cs, void *timestamps, struct panvk_cmd_buffer *cmdbuf = cs_info->cmdbuf; struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); struct cs_builder *b = get_builder(cmdbuf, ut); - const struct panvk_priv_bo *bo = timestamps; - const uint64_t addr = bo->addr.dev + offset_B; + const struct panvk_utrace_buf *buf = timestamps; + const uint64_t addr = buf->dev + offset_B; cmd_write_timestamp(dev, b, addr, *cs_info->ts_async_op); } @@ -149,8 +149,8 @@ panvk_utrace_capture_data(struct u_trace *ut, void *cs, void *dst_buffer, * panvk_cmd_buffer so we can pass additional parameters. */ struct panvk_utrace_cs_info *cs_info = cs; struct cs_builder *b = get_builder(cs_info->cmdbuf, ut); - const struct panvk_priv_bo *dst_bo = dst_buffer; - const uint64_t dst_addr = dst_bo->addr.dev + dst_offset_B; + const struct panvk_utrace_buf *dst_buf = dst_buffer; + const uint64_t dst_addr = dst_buf->dev + dst_offset_B; const uint64_t src_addr = src_offset_B; /* src_offset_B is absolute, src_buffer is used to indicate register capture */ @@ -165,6 +165,26 @@ panvk_utrace_capture_data(struct u_trace *ut, void *cs, void *dst_buffer, cs_info->capture_data_wait_for_ts); } +static uint32_t +get_utrace_clone_mem_size() +{ + const char *v = getenv("PANVK_UTRACE_CLONE_MEM_SIZE"); + if (v) { + uint32_t size = 0; + sscanf(v, "%u", &size); + if (size > 0) { + return size; + } + sscanf(v, "0x%x", &size); + if (size > 0) { + mesa_logi("selected utrace mem size = 0x%x (%u) hex", size, size); + return size; + } + } + /* 10 MB default */ + return 0xa00000; +} + void panvk_per_arch(utrace_context_init)(struct panvk_device *dev) { @@ -174,12 +194,35 @@ panvk_per_arch(utrace_context_init)(struct panvk_device *dev) panvk_utrace_record_ts, panvk_utrace_read_ts, panvk_utrace_capture_data, panvk_utrace_get_data, panvk_utrace_delete_flush_data); + + VkResult result = panvk_priv_bo_create(dev, get_utrace_clone_mem_size(), 0, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT, + &dev->utrace.copy_buf_heap_bo); + assert(result == VK_SUCCESS); + if (result != VK_SUCCESS) + return; + + simple_mtx_init(&dev->utrace.copy_buf_heap_lock, mtx_plain); + + simple_mtx_lock(&dev->utrace.copy_buf_heap_lock); + util_vma_heap_init(&dev->utrace.copy_buf_heap, + dev->utrace.copy_buf_heap_bo->addr.dev, + dev->utrace.copy_buf_heap_bo->bo->size); + simple_mtx_unlock(&dev->utrace.copy_buf_heap_lock); } void panvk_per_arch(utrace_context_fini)(struct panvk_device *dev) { u_trace_context_fini(&dev->utrace.utctx); + + simple_mtx_lock(&dev->utrace.copy_buf_heap_lock); + util_vma_heap_finish(&dev->utrace.copy_buf_heap); + simple_mtx_unlock(&dev->utrace.copy_buf_heap_lock); + + panvk_priv_bo_unref(dev->utrace.copy_buf_heap_bo); + + simple_mtx_destroy(&dev->utrace.copy_buf_heap_lock); } void @@ -189,10 +232,10 @@ panvk_per_arch(utrace_copy_buffer)(struct u_trace_context *utctx, uint64_t to_offset, uint64_t size_B) { struct cs_builder *b = cmdstream; - const struct panvk_priv_bo *src_bo = ts_from; - const struct panvk_priv_bo *dst_bo = ts_to; - const uint64_t src_addr = src_bo->addr.dev + from_offset; - const uint64_t dst_addr = dst_bo->addr.dev + to_offset; + const struct panvk_utrace_buf *src_buf = ts_from; + const struct panvk_utrace_buf *dst_buf = ts_to; + const uint64_t src_addr = src_buf->dev + from_offset; + const uint64_t dst_addr = dst_buf->dev + to_offset; cmd_copy_data(b, dst_addr, src_addr, size_B, false); } diff --git a/src/panfrost/vulkan/panvk_device.h b/src/panfrost/vulkan/panvk_device.h index f3a43d84c1f..0174ce39331 100644 --- a/src/panfrost/vulkan/panvk_device.h +++ b/src/panfrost/vulkan/panvk_device.h @@ -24,6 +24,7 @@ #include "util/pan_ir.h" #include "util/perf/u_trace.h" +#include "util/simple_mtx.h" #include "util/u_printf.h" #include "util/vma.h" @@ -98,6 +99,10 @@ struct panvk_device { #ifdef HAVE_PERFETTO struct panvk_utrace_perfetto utp; #endif + /* Timestamp + indirect data storage */ + struct util_vma_heap copy_buf_heap; + struct panvk_priv_bo *copy_buf_heap_bo; + simple_mtx_t copy_buf_heap_lock; } utrace; struct panvk_device_draw_context* draw_ctx; diff --git a/src/panfrost/vulkan/panvk_utrace.c b/src/panfrost/vulkan/panvk_utrace.c index 34a3b85af47..9a542799480 100644 --- a/src/panfrost/vulkan/panvk_utrace.c +++ b/src/panfrost/vulkan/panvk_utrace.c @@ -1,5 +1,6 @@ /* * Copyright 2024 Google LLC + * Copyright 2025 Arm Ltd. * SPDX-License-Identifier: MIT */ @@ -23,20 +24,47 @@ void * panvk_utrace_create_buffer(struct u_trace_context *utctx, uint64_t size_B) { struct panvk_device *dev = to_dev(utctx); - struct panvk_priv_bo *bo; - if (panvk_priv_bo_create(dev, size_B, 0, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, - &bo) != VK_SUCCESS) + /* This memory is also used to write CSF commands, therefore we align to a + * cache line. */ + const uint64_t alignment = 0x40; + + simple_mtx_lock(&dev->utrace.copy_buf_heap_lock); + const uint64_t addr_dev = + util_vma_heap_alloc(&dev->utrace.copy_buf_heap, size_B, alignment); + simple_mtx_unlock(&dev->utrace.copy_buf_heap_lock); + + if (!addr_dev) { + mesa_loge("Couldn't allocate utrace buffer (size = 0x%" PRIx64 ")." + "Provide larger PANVK_UTRACE_CLONE_MEM_SIZE (current = 0x%zx)", + size_B, dev->utrace.copy_buf_heap_bo->bo->size); return NULL; + } - return bo; + struct panvk_utrace_buf *container = malloc(sizeof(struct panvk_utrace_buf)); + void *addr_host = dev->utrace.copy_buf_heap_bo->addr.host + addr_dev - + dev->utrace.copy_buf_heap_bo->addr.dev; + + *container = (struct panvk_utrace_buf){ + .host = addr_host, + .dev = addr_dev, + .size = size_B, + }; + + return container; } void panvk_utrace_delete_buffer(struct u_trace_context *utctx, void *buffer) { - struct panvk_priv_bo *bo = buffer; - panvk_priv_bo_unref(bo); + struct panvk_device *dev = to_dev(utctx); + struct panvk_utrace_buf *buf = buffer; + + simple_mtx_lock(&dev->utrace.copy_buf_heap_lock); + util_vma_heap_free(&dev->utrace.copy_buf_heap, buf->dev, buf->size); + simple_mtx_unlock(&dev->utrace.copy_buf_heap_lock); + + free(buffer); } uint64_t @@ -47,7 +75,7 @@ panvk_utrace_read_ts(struct u_trace_context *utctx, void *timestamps, const struct panvk_physical_device *pdev = to_panvk_physical_device(dev->vk.physical); const struct pan_kmod_dev_props *props = &pdev->kmod.props; - const struct panvk_priv_bo *bo = timestamps; + const struct panvk_utrace_buf *buf = timestamps; struct panvk_utrace_flush_data *data = flush_data; assert(props->timestamp_frequency); @@ -62,7 +90,7 @@ panvk_utrace_read_ts(struct u_trace_context *utctx, void *timestamps, data->wait_value = 0; } - const uint64_t *ts_ptr = bo->addr.host + offset_B; + const uint64_t *ts_ptr = buf->host + offset_B; uint64_t ts = *ts_ptr; if (ts != U_TRACE_NO_TIMESTAMP) ts = (ts * NSEC_PER_SEC) / props->timestamp_frequency; @@ -74,8 +102,8 @@ const void * panvk_utrace_get_data(struct u_trace_context *utctx, void *buffer, uint64_t offset_B, uint32_t size_B) { - const struct panvk_priv_bo *bo = buffer; - return bo->addr.host + offset_B; + const struct panvk_utrace_buf *buf = buffer; + return buf->host + offset_B; } void diff --git a/src/panfrost/vulkan/panvk_utrace.h b/src/panfrost/vulkan/panvk_utrace.h index a0783fabc8c..13fde739837 100644 --- a/src/panfrost/vulkan/panvk_utrace.h +++ b/src/panfrost/vulkan/panvk_utrace.h @@ -23,6 +23,12 @@ struct panvk_utrace_flush_data { struct panvk_pool clone_pool; }; +struct panvk_utrace_buf { + uint64_t dev; + uint64_t size; + void *host; +}; + void *panvk_utrace_create_buffer(struct u_trace_context *utctx, uint64_t size_B);