freedreno/drm: Add perfetto memory tracing

The design of the perfetto memory event is a bit more vk specific, but
we can abuse it to get a breakdown of memory usage for various purposes.
The memory_type parameter is (ab)used to get buffer vs image memory
split out into it's own track/graph.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28058>
This commit is contained in:
Rob Clark 2024-02-16 13:54:01 -08:00 committed by Marge Bot
parent a3fb2b07aa
commit 9936e91808
9 changed files with 274 additions and 7 deletions

View file

@ -27,6 +27,7 @@
#include "util/os_mman.h"
#include "freedreno_drmif.h"
#include "freedreno_drm_perfetto.h"
#include "freedreno_priv.h"
simple_mtx_t table_lock = SIMPLE_MTX_INITIALIZER;
@ -143,9 +144,9 @@ bo_new(struct fd_device *dev, uint32_t size, uint32_t flags,
if (size < FD_BO_HEAP_BLOCK_SIZE) {
uint32_t alloc_flags = flags & ~_FD_BO_HINTS;
if ((alloc_flags == 0) && dev->default_heap)
bo = fd_bo_heap_alloc(dev->default_heap, size);
bo = fd_bo_heap_alloc(dev->default_heap, size, flags);
else if ((alloc_flags == RING_FLAGS) && dev->ring_heap)
bo = fd_bo_heap_alloc(dev->ring_heap, size);
bo = fd_bo_heap_alloc(dev->ring_heap, size, flags);
if (bo)
return bo;
}
@ -169,6 +170,8 @@ bo_new(struct fd_device *dev, uint32_t size, uint32_t flags,
bo->alloc_flags = flags;
fd_alloc_log(bo, FD_ALLOC_NONE, FD_ALLOC_ACTIVE);
return bo;
}
@ -387,6 +390,7 @@ fd_bo_del(struct fd_bo *bo)
bo_finalize(bo);
dev_flush(dev);
fd_alloc_log(bo, FD_ALLOC_ACTIVE, FD_ALLOC_NONE);
bo_del(bo);
}
@ -420,6 +424,7 @@ fd_bo_del_array(struct fd_bo **bos, int count)
*/
for (int i = 0; i < count; i++) {
fd_alloc_log(bos[i], FD_ALLOC_ACTIVE, FD_ALLOC_NONE);
bo_del(bos[i]);
}
}

View file

@ -25,6 +25,7 @@
*/
#include "freedreno_drmif.h"
#include "freedreno_drm_perfetto.h"
#include "freedreno_priv.h"
#define FD_BO_CACHE_STATS 0
@ -172,6 +173,7 @@ fd_bo_cache_cleanup(struct fd_bo_cache *cache, time_t time)
bo_remove_from_bucket(bucket, bo);
bucket->expired++;
list_addtail(&bo->node, &freelist);
fd_alloc_log(bo, FD_ALLOC_CACHE, FD_ALLOC_NONE);
cnt++;
}
@ -258,11 +260,13 @@ retry:
if (bo->funcs->madvise(bo, true) <= 0) {
/* we've lost the backing pages, delete and try again: */
list_addtail(&bo->node, &freelist);
fd_alloc_log(bo, FD_ALLOC_CACHE, FD_ALLOC_NONE);
goto retry;
}
p_atomic_set(&bo->refcnt, 1);
bo->reloc_flags = FD_RELOC_FLAGS_INIT;
bucket->hits++;
fd_alloc_log(bo, FD_ALLOC_CACHE, FD_ALLOC_ACTIVE);
return bo;
}
bucket->misses++;
@ -301,6 +305,7 @@ fd_bo_cache_free(struct fd_bo_cache *cache, struct fd_bo *bo)
bucket->count++;
simple_mtx_unlock(&cache->lock);
fd_alloc_log(bo, FD_ALLOC_ACTIVE, FD_ALLOC_CACHE);
fd_bo_cache_cleanup(cache, time.tv_sec);
return 0;

View file

@ -22,6 +22,7 @@
*/
#include "freedreno_drmif.h"
#include "freedreno_drm_perfetto.h"
#include "freedreno_priv.h"
struct sa_bo {
@ -135,6 +136,9 @@ sa_release(struct fd_bo *bo)
util_vma_heap_free(&s->heap->heap, s->offset, bo->size);
/* The BO has already been moved ACTIVE->NONE, now move it back to heap: */
fd_alloc_log(bo, FD_ALLOC_NONE, FD_ALLOC_HEAP);
/* Drop our reference to the backing block object: */
fd_bo_del(s->heap->blocks[block_idx(s)]);
@ -210,7 +214,7 @@ heap_clean(struct fd_bo_heap *heap, bool idle)
}
struct fd_bo *
fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size)
fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size, uint32_t flags)
{
heap_clean(heap, true);
@ -245,7 +249,7 @@ fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size)
mesa_logi("alloc: %08x-%x idx=%d", s->offset, size, idx);
if (!heap->blocks[idx]) {
heap->blocks[idx] = fd_bo_new(
heap->dev, FD_BO_HEAP_BLOCK_SIZE, heap->flags,
heap->dev, FD_BO_HEAP_BLOCK_SIZE, heap->flags | _FD_BO_HINT_HEAP,
"heap-%x-block-%u", heap->flags, idx);
if (heap->flags == RING_FLAGS)
fd_bo_mark_for_dump(heap->blocks[idx]);
@ -259,7 +263,7 @@ fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size)
bo->size = size;
bo->funcs = &heap_bo_funcs;
bo->handle = 1; /* dummy handle to make fd_bo_init_common() happy */
bo->alloc_flags = heap->flags;
bo->alloc_flags = flags;
/* Pre-initialize mmap ptr, to avoid trying to os_mmap() */
bo->map = ((uint8_t *)fd_bo_map(heap->blocks[idx])) + block_offset(s);
@ -268,5 +272,7 @@ fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size)
bo->handle = FD_BO_SUBALLOC_HANDLE;
fd_alloc_log(bo, FD_ALLOC_HEAP, FD_ALLOC_ACTIVE);
return bo;
}

View file

@ -31,6 +31,7 @@
#include "util/os_file.h"
#include "freedreno_drmif.h"
#include "freedreno_drm_perfetto.h"
#include "freedreno_priv.h"
struct fd_device *msm_device_new(int fd, drmVersionPtr version);
@ -88,6 +89,8 @@ out:
if (!dev)
return NULL;
fd_drm_perfetto_init();
p_atomic_set(&dev->refcnt, 1);
dev->fd = fd;
dev->handle_table =

View file

@ -0,0 +1,171 @@
/*
* Copyright © 2024 Google, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <perfetto.h>
#include "freedreno_drm_perfetto.h"
#include "freedreno_drmif.h"
#include "util/log.h"
#include "util/perf/u_perfetto.h"
#include "util/simple_mtx.h"
class FdMemoryDataSource : public perfetto::DataSource<FdMemoryDataSource> {
public:
void OnSetup(const SetupArgs &) override
{
}
void OnStart(const StartArgs &) override
{
PERFETTO_LOG("Memory tracing started");
}
void OnStop(const StopArgs &) override
{
PERFETTO_LOG("Memory tracing stopped");
}
};
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(FdMemoryDataSource);
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(FdMemoryDataSource);
extern "C" void
fd_drm_perfetto_init(void)
{
util_perfetto_init();
perfetto::DataSourceDescriptor dsd;
dsd.set_name("gpu.memory.msm");
FdMemoryDataSource::Register(dsd);
}
extern "C" void
fd_alloc_log(struct fd_bo *bo, enum fd_alloc_category from, enum fd_alloc_category to)
{
/* Special case for BOs that back heap chunks, they don't immediately
* transition to active, despite what the caller thinks:
*/
if (bo->alloc_flags & _FD_BO_HINT_HEAP) {
if (to == FD_ALLOC_ACTIVE) {
to = FD_ALLOC_HEAP;
} else if (from == FD_ALLOC_ACTIVE) {
from = FD_ALLOC_HEAP;
}
}
#define MEMORY_DEBUGGING 0
if (MEMORY_DEBUGGING) {
static simple_mtx_t lock = SIMPLE_MTX_INITIALIZER;
assert(bo->size);
simple_mtx_lock(&lock);
static uint32_t sizes[4];
static uint32_t size_buffer, size_image, size_command, size_internal, *size_cat;
if (from != FD_ALLOC_NONE) {
assert(sizes[from] >= bo->size);
sizes[from] -= bo->size;
}
if (to != FD_ALLOC_NONE) {
sizes[to] += bo->size;
}
if (bo->alloc_flags & FD_BO_HINT_BUFFER) {
size_cat = &size_buffer;
} else if (bo->alloc_flags & FD_BO_HINT_IMAGE) {
size_cat = &size_image;
} else if (bo->alloc_flags & FD_BO_HINT_COMMAND) {
size_cat = &size_command;
} else {
size_cat = &size_internal;
}
if (to == FD_ALLOC_ACTIVE) {
*size_cat += bo->size;
} else if (from == FD_ALLOC_ACTIVE) {
assert(*size_cat >= bo->size);
*size_cat -= bo->size;
}
static time_t last_time;
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
if (last_time != time.tv_sec) {
mesa_logi("active=%'u, heap=%'u, cache=%'u, buffer=%'u, image=%'u, command=%'u, internal=%'u",
sizes[FD_ALLOC_ACTIVE], sizes[FD_ALLOC_HEAP], sizes[FD_ALLOC_CACHE],
size_buffer, size_image, size_command, size_internal);
last_time = time.tv_sec;
}
simple_mtx_unlock(&lock);
}
if ((to != FD_ALLOC_ACTIVE) && (from != FD_ALLOC_ACTIVE))
return;
FdMemoryDataSource::Trace([=](FdMemoryDataSource::TraceContext tctx) {
auto packet = tctx.NewTracePacket();
packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
auto event = packet->set_vulkan_memory_event();
event->set_timestamp(perfetto::base::GetBootTimeNs().count());
event->set_memory_size(bo->size);
event->set_memory_address(bo->iova);
event->set_allocation_scope(perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::SCOPE_COMMAND);
event->set_pid(getpid());
if (bo->alloc_flags & FD_BO_HINT_BUFFER) {
event->set_source(perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::SOURCE_BUFFER);
event->set_memory_type(1);
} else if (bo->alloc_flags & FD_BO_HINT_IMAGE) {
event->set_source(perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::SOURCE_IMAGE);
event->set_memory_type(2);
} else {
event->set_source(perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::SOURCE_DRIVER);
event->set_memory_type(3);
}
if (bo->alloc_flags & (FD_BO_HINT_BUFFER | FD_BO_HINT_IMAGE)) {
/* For IMAGE/BUFFER, the trace processor is looking for BIND/DESTROY_BOUND: */
if (to == FD_ALLOC_ACTIVE) {
event->set_operation(perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_BIND);
} else {
event->set_operation(perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_DESTROY_BOUND);
}
} else {
/* For SOURCE_DRIVER, the relevant ops are CREATE/DESTROY */
if (to == FD_ALLOC_ACTIVE) {
event->set_operation(perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_CREATE);
} else {
event->set_operation(perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_DESTROY);
}
}
});
}

View file

@ -0,0 +1,64 @@
/*
* Copyright © 2024 Google, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef FREEDRENO_DRM_PERFETTO_H_
#define FREEDRENO_DRM_PERFETTO_H_
#ifdef __cplusplus
extern "C" {
#endif
/**
* Memory events are logged as transition between allocation categories.
* Ie. a new allocation from kernel would be NONE -> ACTIVE, while a
* freed buffer going to the BO cache would be ACTIVE -> CACHE, and then
* if it is eventually freed from the cache, CACHE -> NONE.
*/
enum fd_alloc_category {
FD_ALLOC_NONE, /* freed / not allocated */
FD_ALLOC_HEAP, /* unused bo heap memory */
FD_ALLOC_CACHE, /* unused bo cache memory */
FD_ALLOC_ACTIVE, /* actively used */
};
struct fd_bo;
#ifdef HAVE_PERFETTO
void fd_drm_perfetto_init(void);
void fd_alloc_log(struct fd_bo *bo, enum fd_alloc_category from, enum fd_alloc_category to);
#else
static inline void
fd_drm_perfetto_init(void)
{
}
static inline void
fd_alloc_log(struct fd_bo *bo, enum fd_alloc_category from, enum fd_alloc_category to)
{
}
#endif
#ifdef __cplusplus
} /* end of extern "C" */
#endif
#endif /* FREEDRENO_DRM_PERFETTO_H_ */

View file

@ -152,9 +152,13 @@ int fd_fence_wait(struct fd_fence *f);
*/
#define FD_BO_HINT_BUFFER BITSET_BIT(8)
#define FD_BO_HINT_IMAGE BITSET_BIT(9)
#define FD_BO_HINT_COMMAND BITSET_BIT(10)
#define _FD_BO_HINT_HEAP BITSET_BIT(11)
#define _FD_BO_HINTS ( \
FD_BO_HINT_BUFFER | \
FD_BO_HINT_IMAGE | \
FD_BO_HINT_COMMAND | \
_FD_BO_HINT_HEAP | \
0)
/*

View file

@ -60,7 +60,7 @@ extern simple_mtx_t fence_lock;
#define SUBALLOC_SIZE (32 * 1024)
/* Maximum known alignment requirement is a6xx's TEX_CONST at 16 dwords */
#define SUBALLOC_ALIGNMENT 64
#define RING_FLAGS (FD_BO_GPUREADONLY | FD_BO_CACHED_COHERENT)
#define RING_FLAGS (FD_BO_GPUREADONLY | FD_BO_CACHED_COHERENT | FD_BO_HINT_COMMAND)
/*
* Stupid/simple growable array implementation:
@ -192,7 +192,7 @@ struct fd_bo_heap *fd_bo_heap_new(struct fd_device *dev, uint32_t flags);
void fd_bo_heap_destroy(struct fd_bo_heap *heap);
struct fd_bo *fd_bo_heap_block(struct fd_bo *bo);
struct fd_bo *fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size);
struct fd_bo *fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size, uint32_t flags);
static inline uint32_t
submit_offset(struct fd_bo *bo, uint32_t offset)

View file

@ -43,6 +43,15 @@ libfreedreno_drm_deps = [
dep_valgrind,
]
if with_perfetto
libfreedreno_drm_deps += dep_perfetto
libfreedreno_drm_files += 'freedreno_drm_perfetto.cc'
endif
# The header file ends up part of the build (but just a stub)
# in either case:
libfreedreno_drm_files += 'freedreno_drm_perfetto.h'
libfreedreno_drm_msm_files = files(
'msm/msm_bo.c',
'msm/msm_device.c',