From 748654e02a791760a0bfbd39652f44da4a4e1edb Mon Sep 17 00:00:00 2001 From: Julia Zhang Date: Sat, 21 Dec 2024 00:18:17 +0800 Subject: [PATCH] amd/ds: implement bo_create function Implement amdgpu_bo_create function for amdgpu_pps_ctx class, with which amd pps driver can create buffer to query perfctr value. Signed-off-by: Julia Zhang --- src/amd/ds/amd_pps_perf.cc | 107 +++++++++++++++++++++++++++++++++++++ src/amd/ds/amd_pps_perf.h | 51 ++++++++++++++++++ 2 files changed, 158 insertions(+) diff --git a/src/amd/ds/amd_pps_perf.cc b/src/amd/ds/amd_pps_perf.cc index a6cb1007bb2..3a7a6cefb8d 100644 --- a/src/amd/ds/amd_pps_perf.cc +++ b/src/amd/ds/amd_pps_perf.cc @@ -112,3 +112,110 @@ bool AMDPerf::amdgpu_dev_info_init(int drm_fd) return true; } + +int AMDPerf::amdgpu_bo_create(uint64_t size, uint64_t alignment, + uint32_t domain, enum radeon_bo_flag flags, + struct pps_amdgpu_bo **out_bo, uint8_t priority) +{ + int ret; + void *data; + uint32_t kms_handle = 0; + uint64_t va = 0; + ac_drm_bo buf_handle; + amdgpu_va_handle va_handle; + struct pps_amdgpu_bo *bo; + struct amdgpu_bo_alloc_request request = {0}; + + bo = CALLOC_STRUCT(pps_amdgpu_bo); + if (!bo) + return -1; + + unsigned virt_alignment = alignment; + if (size >= info.pte_fragment_size) + virt_alignment = MAX2(virt_alignment, info.pte_fragment_size); + + uint64_t va_flags = AMDGPU_VA_RANGE_HIGH | + (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) | + (flags & RADEON_FLAG_REPLAYABLE ? AMDGPU_VA_RANGE_REPLAYABLE : 0); + ret = ac_drm_va_range_alloc(dev, amdgpu_gpu_va_range_general, size, + virt_alignment, 0, &va, &va_handle, va_flags); + if (ret) + goto error_va_alloc; + + bo->base.va = va; + bo->base.size = size; + bo->va_handle = va_handle; + bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL); + + request.alloc_size = size; + request.phys_alignment = alignment; + if (domain & AMDGPU_GEM_DOMAIN_VRAM) { + request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM; + request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; + } + + if (domain & AMDGPU_GEM_DOMAIN_GTT) + request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; + + request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + if (flags & RADEON_FLAG_GTT_WC) + request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; + + ret = ac_drm_bo_alloc(dev, &request, &buf_handle); + if (ret) { + fprintf(stderr, "amd/pps: Failed to allocate a buffer:\n"); + fprintf(stderr, "amd/pps: size : %" PRIu64 " bytes\n", size); + fprintf(stderr, "amd/pps: alignment : %" PRIu64 " bytes\n", alignment); + fprintf(stderr, "amd/pps: domains : %" PRIu32 "\n", domain); + goto error_bo_alloc; + } + + ret = ac_drm_bo_export(dev, buf_handle, amdgpu_bo_handle_type_kms, &kms_handle); + assert (!ret); + + va_flags = AMDGPU_VM_PAGE_WRITEABLE | AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_EXECUTABLE; + size = align64(size, getpagesize()); + + ret = ac_drm_bo_va_op_raw(dev, kms_handle, 0, size, va, va_flags, AMDGPU_VA_OP_MAP); + if (ret) + goto error_va_map; + + ret = ac_drm_bo_cpu_map(dev, buf_handle, &data); + if (ret) + goto error_va_map; + + bo->bo = buf_handle; + bo->bo_handle = kms_handle; + bo->base.domain = domain; + bo->base.use_global_list = false; + bo->priority = priority; + bo->cpu_map = (uint8_t *)data; + + *out_bo = bo; + + return ret; + +error_va_map: + ac_drm_bo_free(dev, buf_handle); +error_bo_alloc: + ac_drm_va_range_free(bo->va_handle); +error_va_alloc: + FREE(bo); + return ret; +} + +void AMDPerf::amdgpu_bo_destroy(struct pps_amdgpu_bo *bo) +{ + if (bo->cpu_map) { + ac_drm_bo_cpu_unmap(dev, bo->bo); + bo->cpu_map = NULL; + } + + uint64_t ib_size = align64(bo->base.size, getpagesize()); + uint64_t flags = AMDGPU_VM_PAGE_WRITEABLE | AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_EXECUTABLE; + + ac_drm_bo_va_op_raw(dev, bo->bo_handle, 0, ib_size, bo->base.va, flags, AMDGPU_VA_OP_UNMAP); + ac_drm_bo_free(dev, bo->bo); + ac_drm_va_range_free(bo->va_handle); + FREE(bo); +} diff --git a/src/amd/ds/amd_pps_perf.h b/src/amd/ds/amd_pps_perf.h index bd1d69285f0..7afaf2651de 100644 --- a/src/amd/ds/amd_pps_perf.h +++ b/src/amd/ds/amd_pps_perf.h @@ -11,8 +11,55 @@ #include "util/list.h" #include "drm-uapi/amdgpu_drm.h" +#define PPS_BO_PRIORITY_CS 31 + enum { MAX_RINGS_PER_TYPE = 8 }; +enum radeon_bo_flag { + RADEON_FLAG_GTT_WC = (1 << 0), + RADEON_FLAG_CPU_ACCESS = (1 << 1), + RADEON_FLAG_NO_CPU_ACCESS = (1 << 2), + RADEON_FLAG_VIRTUAL = (1 << 3), + RADEON_FLAG_VA_UNCACHED = (1 << 4), + RADEON_FLAG_IMPLICIT_SYNC = (1 << 5), + RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 6), + RADEON_FLAG_READ_ONLY = (1 << 7), + RADEON_FLAG_32BIT = (1 << 8), + RADEON_FLAG_PREFER_LOCAL_BO = (1 << 9), + RADEON_FLAG_ZERO_VRAM = (1 << 10), + RADEON_FLAG_REPLAYABLE = (1 << 11), + RADEON_FLAG_DISCARDABLE = (1 << 12), +}; + +struct pps_amdgpu_map_range { + uint64_t offset; + uint64_t size; + struct pps_amdgpu_bo *bo; + uint64_t bo_offset; +}; + +struct radeon_bo { + uint64_t va; + uint64_t size; + bool is_local; + bool vram_no_cpu_access; + bool use_global_list; + uint32_t domain; +}; + +struct pps_amdgpu_bo { + struct radeon_bo base; + amdgpu_va_handle va_handle; + bool is_virtual; + uint8_t priority; + + struct { + ac_drm_bo bo; + uint32_t bo_handle; + void *cpu_map; + }; +}; + class AMDPerf { private: @@ -33,4 +80,8 @@ public: bool is_dev_initialized(); bool amd_perf_init(int drm_fd, bool is_virtio); void amd_perf_destroy(); + int amdgpu_bo_create(uint64_t size, uint64_t alignment, uint32_t domain, + enum radeon_bo_flag flags, struct pps_amdgpu_bo **out_bo, + uint8_t priority); + void amdgpu_bo_destroy(struct pps_amdgpu_bo *bo); };