mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-12 05:40:15 +01:00
panvk/utrace: Alloc utrace copy buf from userspace heap
utrace requires some memory to allocate buffers for timestamps and indirect data each submit. It is expensive to allocate it from the kernel each time. Instead, allocate a big bo upfront and hand it out in small pieces later using util_vma_heap. Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36609>
This commit is contained in:
parent
c1b9396403
commit
05006c21dd
4 changed files with 101 additions and 19 deletions
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright 2024 Google LLC
|
||||
* Copyright 2025 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
|
|
@ -10,7 +11,6 @@
|
|||
#include "genxml/cs_builder.h"
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#include "panvk_device.h"
|
||||
#include "panvk_priv_bo.h"
|
||||
|
||||
static void
|
||||
cmd_write_timestamp(const struct panvk_device *dev, struct cs_builder *b,
|
||||
|
|
@ -134,8 +134,8 @@ panvk_utrace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
|
|||
struct panvk_cmd_buffer *cmdbuf = cs_info->cmdbuf;
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
struct cs_builder *b = get_builder(cmdbuf, ut);
|
||||
const struct panvk_priv_bo *bo = timestamps;
|
||||
const uint64_t addr = bo->addr.dev + offset_B;
|
||||
const struct panvk_utrace_buf *buf = timestamps;
|
||||
const uint64_t addr = buf->dev + offset_B;
|
||||
|
||||
cmd_write_timestamp(dev, b, addr, *cs_info->ts_async_op);
|
||||
}
|
||||
|
|
@ -149,8 +149,8 @@ panvk_utrace_capture_data(struct u_trace *ut, void *cs, void *dst_buffer,
|
|||
* panvk_cmd_buffer so we can pass additional parameters. */
|
||||
struct panvk_utrace_cs_info *cs_info = cs;
|
||||
struct cs_builder *b = get_builder(cs_info->cmdbuf, ut);
|
||||
const struct panvk_priv_bo *dst_bo = dst_buffer;
|
||||
const uint64_t dst_addr = dst_bo->addr.dev + dst_offset_B;
|
||||
const struct panvk_utrace_buf *dst_buf = dst_buffer;
|
||||
const uint64_t dst_addr = dst_buf->dev + dst_offset_B;
|
||||
const uint64_t src_addr = src_offset_B;
|
||||
|
||||
/* src_offset_B is absolute, src_buffer is used to indicate register capture */
|
||||
|
|
@ -165,6 +165,26 @@ panvk_utrace_capture_data(struct u_trace *ut, void *cs, void *dst_buffer,
|
|||
cs_info->capture_data_wait_for_ts);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
get_utrace_clone_mem_size()
|
||||
{
|
||||
const char *v = getenv("PANVK_UTRACE_CLONE_MEM_SIZE");
|
||||
if (v) {
|
||||
uint32_t size = 0;
|
||||
sscanf(v, "%u", &size);
|
||||
if (size > 0) {
|
||||
return size;
|
||||
}
|
||||
sscanf(v, "0x%x", &size);
|
||||
if (size > 0) {
|
||||
mesa_logi("selected utrace mem size = 0x%x (%u) hex", size, size);
|
||||
return size;
|
||||
}
|
||||
}
|
||||
/* 10 MB default */
|
||||
return 0xa00000;
|
||||
}
|
||||
|
||||
void
|
||||
panvk_per_arch(utrace_context_init)(struct panvk_device *dev)
|
||||
{
|
||||
|
|
@ -174,12 +194,35 @@ panvk_per_arch(utrace_context_init)(struct panvk_device *dev)
|
|||
panvk_utrace_record_ts, panvk_utrace_read_ts,
|
||||
panvk_utrace_capture_data, panvk_utrace_get_data,
|
||||
panvk_utrace_delete_flush_data);
|
||||
|
||||
VkResult result = panvk_priv_bo_create(dev, get_utrace_clone_mem_size(), 0,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT,
|
||||
&dev->utrace.copy_buf_heap_bo);
|
||||
assert(result == VK_SUCCESS);
|
||||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
|
||||
simple_mtx_init(&dev->utrace.copy_buf_heap_lock, mtx_plain);
|
||||
|
||||
simple_mtx_lock(&dev->utrace.copy_buf_heap_lock);
|
||||
util_vma_heap_init(&dev->utrace.copy_buf_heap,
|
||||
dev->utrace.copy_buf_heap_bo->addr.dev,
|
||||
dev->utrace.copy_buf_heap_bo->bo->size);
|
||||
simple_mtx_unlock(&dev->utrace.copy_buf_heap_lock);
|
||||
}
|
||||
|
||||
void
|
||||
panvk_per_arch(utrace_context_fini)(struct panvk_device *dev)
|
||||
{
|
||||
u_trace_context_fini(&dev->utrace.utctx);
|
||||
|
||||
simple_mtx_lock(&dev->utrace.copy_buf_heap_lock);
|
||||
util_vma_heap_finish(&dev->utrace.copy_buf_heap);
|
||||
simple_mtx_unlock(&dev->utrace.copy_buf_heap_lock);
|
||||
|
||||
panvk_priv_bo_unref(dev->utrace.copy_buf_heap_bo);
|
||||
|
||||
simple_mtx_destroy(&dev->utrace.copy_buf_heap_lock);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -189,10 +232,10 @@ panvk_per_arch(utrace_copy_buffer)(struct u_trace_context *utctx,
|
|||
uint64_t to_offset, uint64_t size_B)
|
||||
{
|
||||
struct cs_builder *b = cmdstream;
|
||||
const struct panvk_priv_bo *src_bo = ts_from;
|
||||
const struct panvk_priv_bo *dst_bo = ts_to;
|
||||
const uint64_t src_addr = src_bo->addr.dev + from_offset;
|
||||
const uint64_t dst_addr = dst_bo->addr.dev + to_offset;
|
||||
const struct panvk_utrace_buf *src_buf = ts_from;
|
||||
const struct panvk_utrace_buf *dst_buf = ts_to;
|
||||
const uint64_t src_addr = src_buf->dev + from_offset;
|
||||
const uint64_t dst_addr = dst_buf->dev + to_offset;
|
||||
|
||||
cmd_copy_data(b, dst_addr, src_addr, size_B, false);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
#include "util/pan_ir.h"
|
||||
#include "util/perf/u_trace.h"
|
||||
|
||||
#include "util/simple_mtx.h"
|
||||
#include "util/u_printf.h"
|
||||
#include "util/vma.h"
|
||||
|
||||
|
|
@ -98,6 +99,10 @@ struct panvk_device {
|
|||
#ifdef HAVE_PERFETTO
|
||||
struct panvk_utrace_perfetto utp;
|
||||
#endif
|
||||
/* Timestamp + indirect data storage */
|
||||
struct util_vma_heap copy_buf_heap;
|
||||
struct panvk_priv_bo *copy_buf_heap_bo;
|
||||
simple_mtx_t copy_buf_heap_lock;
|
||||
} utrace;
|
||||
|
||||
struct panvk_device_draw_context* draw_ctx;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright 2024 Google LLC
|
||||
* Copyright 2025 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
|
|
@ -23,20 +24,47 @@ void *
|
|||
panvk_utrace_create_buffer(struct u_trace_context *utctx, uint64_t size_B)
|
||||
{
|
||||
struct panvk_device *dev = to_dev(utctx);
|
||||
struct panvk_priv_bo *bo;
|
||||
|
||||
if (panvk_priv_bo_create(dev, size_B, 0, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
|
||||
&bo) != VK_SUCCESS)
|
||||
/* This memory is also used to write CSF commands, therefore we align to a
|
||||
* cache line. */
|
||||
const uint64_t alignment = 0x40;
|
||||
|
||||
simple_mtx_lock(&dev->utrace.copy_buf_heap_lock);
|
||||
const uint64_t addr_dev =
|
||||
util_vma_heap_alloc(&dev->utrace.copy_buf_heap, size_B, alignment);
|
||||
simple_mtx_unlock(&dev->utrace.copy_buf_heap_lock);
|
||||
|
||||
if (!addr_dev) {
|
||||
mesa_loge("Couldn't allocate utrace buffer (size = 0x%" PRIx64 ")."
|
||||
"Provide larger PANVK_UTRACE_CLONE_MEM_SIZE (current = 0x%zx)",
|
||||
size_B, dev->utrace.copy_buf_heap_bo->bo->size);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return bo;
|
||||
struct panvk_utrace_buf *container = malloc(sizeof(struct panvk_utrace_buf));
|
||||
void *addr_host = dev->utrace.copy_buf_heap_bo->addr.host + addr_dev -
|
||||
dev->utrace.copy_buf_heap_bo->addr.dev;
|
||||
|
||||
*container = (struct panvk_utrace_buf){
|
||||
.host = addr_host,
|
||||
.dev = addr_dev,
|
||||
.size = size_B,
|
||||
};
|
||||
|
||||
return container;
|
||||
}
|
||||
|
||||
void
|
||||
panvk_utrace_delete_buffer(struct u_trace_context *utctx, void *buffer)
|
||||
{
|
||||
struct panvk_priv_bo *bo = buffer;
|
||||
panvk_priv_bo_unref(bo);
|
||||
struct panvk_device *dev = to_dev(utctx);
|
||||
struct panvk_utrace_buf *buf = buffer;
|
||||
|
||||
simple_mtx_lock(&dev->utrace.copy_buf_heap_lock);
|
||||
util_vma_heap_free(&dev->utrace.copy_buf_heap, buf->dev, buf->size);
|
||||
simple_mtx_unlock(&dev->utrace.copy_buf_heap_lock);
|
||||
|
||||
free(buffer);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
|
|
@ -47,7 +75,7 @@ panvk_utrace_read_ts(struct u_trace_context *utctx, void *timestamps,
|
|||
const struct panvk_physical_device *pdev =
|
||||
to_panvk_physical_device(dev->vk.physical);
|
||||
const struct pan_kmod_dev_props *props = &pdev->kmod.props;
|
||||
const struct panvk_priv_bo *bo = timestamps;
|
||||
const struct panvk_utrace_buf *buf = timestamps;
|
||||
struct panvk_utrace_flush_data *data = flush_data;
|
||||
|
||||
assert(props->timestamp_frequency);
|
||||
|
|
@ -62,7 +90,7 @@ panvk_utrace_read_ts(struct u_trace_context *utctx, void *timestamps,
|
|||
data->wait_value = 0;
|
||||
}
|
||||
|
||||
const uint64_t *ts_ptr = bo->addr.host + offset_B;
|
||||
const uint64_t *ts_ptr = buf->host + offset_B;
|
||||
uint64_t ts = *ts_ptr;
|
||||
if (ts != U_TRACE_NO_TIMESTAMP)
|
||||
ts = (ts * NSEC_PER_SEC) / props->timestamp_frequency;
|
||||
|
|
@ -74,8 +102,8 @@ const void *
|
|||
panvk_utrace_get_data(struct u_trace_context *utctx, void *buffer,
|
||||
uint64_t offset_B, uint32_t size_B)
|
||||
{
|
||||
const struct panvk_priv_bo *bo = buffer;
|
||||
return bo->addr.host + offset_B;
|
||||
const struct panvk_utrace_buf *buf = buffer;
|
||||
return buf->host + offset_B;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -23,6 +23,12 @@ struct panvk_utrace_flush_data {
|
|||
struct panvk_pool clone_pool;
|
||||
};
|
||||
|
||||
struct panvk_utrace_buf {
|
||||
uint64_t dev;
|
||||
uint64_t size;
|
||||
void *host;
|
||||
};
|
||||
|
||||
void *panvk_utrace_create_buffer(struct u_trace_context *utctx,
|
||||
uint64_t size_B);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue