mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 06:58:05 +02:00
tu/drm: Factor out shared helpers
Factor out a few things that we can re-use between virtio and msm backends. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23533>
This commit is contained in:
parent
c14a13d40d
commit
811f332d81
4 changed files with 456 additions and 411 deletions
|
|
@ -78,7 +78,7 @@ endif
|
|||
|
||||
if freedreno_kmds.contains('msm')
|
||||
tu_flags += '-DTU_HAS_MSM'
|
||||
libtu_files += files('tu_knl_drm_msm.cc')
|
||||
libtu_files += files('tu_knl_drm_msm.cc', 'tu_knl_drm.cc')
|
||||
tu_deps += dep_libdrm
|
||||
endif
|
||||
|
||||
|
|
|
|||
387
src/freedreno/vulkan/tu_knl_drm.cc
Normal file
387
src/freedreno/vulkan/tu_knl_drm.cc
Normal file
|
|
@ -0,0 +1,387 @@
|
|||
/*
|
||||
* Copyright © 2018 Google, Inc.
|
||||
* Copyright © 2015 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <xf86drm.h>
|
||||
|
||||
#include "tu_knl_drm.h"
|
||||
#include "tu_device.h"
|
||||
|
||||
static inline void
|
||||
tu_sync_cacheline_to_gpu(void const *p __attribute__((unused)))
|
||||
{
|
||||
#if DETECT_ARCH_AARCH64
|
||||
/* Clean data cache. */
|
||||
__asm volatile("dc cvac, %0" : : "r" (p) : "memory");
|
||||
#elif (DETECT_ARCH_X86 || DETECT_ARCH_X86_64)
|
||||
__builtin_ia32_clflush(p);
|
||||
#elif DETECT_ARCH_ARM
|
||||
/* DCCMVAC - same as DC CVAC on aarch64.
|
||||
* Seems to be illegal to call from userspace.
|
||||
*/
|
||||
//__asm volatile("mcr p15, 0, %0, c7, c10, 1" : : "r" (p) : "memory");
|
||||
unreachable("Cache line clean is unsupported on ARMv7");
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void
|
||||
tu_sync_cacheline_from_gpu(void const *p __attribute__((unused)))
|
||||
{
|
||||
#if DETECT_ARCH_AARCH64
|
||||
/* Clean and Invalidate data cache, there is no separate Invalidate. */
|
||||
__asm volatile("dc civac, %0" : : "r" (p) : "memory");
|
||||
#elif (DETECT_ARCH_X86 || DETECT_ARCH_X86_64)
|
||||
__builtin_ia32_clflush(p);
|
||||
#elif DETECT_ARCH_ARM
|
||||
/* DCCIMVAC - same as DC CIVAC on aarch64.
|
||||
* Seems to be illegal to call from userspace.
|
||||
*/
|
||||
//__asm volatile("mcr p15, 0, %0, c7, c14, 1" : : "r" (p) : "memory");
|
||||
unreachable("Cache line invalidate is unsupported on ARMv7");
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
tu_sync_cache_bo(struct tu_device *dev,
|
||||
struct tu_bo *bo,
|
||||
VkDeviceSize offset,
|
||||
VkDeviceSize size,
|
||||
enum tu_mem_sync_op op)
|
||||
{
|
||||
uintptr_t level1_dcache_size = dev->physical_device->level1_dcache_size;
|
||||
char *start = (char *) bo->map + offset;
|
||||
char *end = start + (size == VK_WHOLE_SIZE ? (bo->size - offset) : size);
|
||||
|
||||
start = (char *) ((uintptr_t) start & ~(level1_dcache_size - 1));
|
||||
|
||||
for (; start < end; start += level1_dcache_size) {
|
||||
if (op == TU_MEM_SYNC_CACHE_TO_GPU) {
|
||||
tu_sync_cacheline_to_gpu(start);
|
||||
} else {
|
||||
tu_sync_cacheline_from_gpu(start);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static VkResult
|
||||
sync_cache(VkDevice _device,
|
||||
enum tu_mem_sync_op op,
|
||||
uint32_t count,
|
||||
const VkMappedMemoryRange *ranges)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_device, device, _device);
|
||||
|
||||
if (!device->physical_device->has_cached_non_coherent_memory) {
|
||||
tu_finishme(
|
||||
"data cache clean and invalidation are unsupported on this arch!");
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
TU_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory);
|
||||
tu_sync_cache_bo(device, mem->bo, ranges[i].offset, ranges[i].size, op);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
tu_FlushMappedMemoryRanges(VkDevice _device,
|
||||
uint32_t memoryRangeCount,
|
||||
const VkMappedMemoryRange *pMemoryRanges)
|
||||
{
|
||||
return sync_cache(_device, TU_MEM_SYNC_CACHE_TO_GPU, memoryRangeCount,
|
||||
pMemoryRanges);
|
||||
}
|
||||
|
||||
VkResult
|
||||
tu_InvalidateMappedMemoryRanges(VkDevice _device,
|
||||
uint32_t memoryRangeCount,
|
||||
const VkMappedMemoryRange *pMemoryRanges)
|
||||
{
|
||||
return sync_cache(_device, TU_MEM_SYNC_CACHE_FROM_GPU, memoryRangeCount,
|
||||
pMemoryRanges);
|
||||
}
|
||||
|
||||
int
|
||||
tu_drm_export_dmabuf(struct tu_device *dev, struct tu_bo *bo)
|
||||
{
|
||||
int prime_fd;
|
||||
int ret = drmPrimeHandleToFD(dev->fd, bo->gem_handle,
|
||||
DRM_CLOEXEC | DRM_RDWR, &prime_fd);
|
||||
|
||||
return ret == 0 ? prime_fd : -1;
|
||||
}
|
||||
|
||||
void
|
||||
tu_drm_bo_finish(struct tu_device *dev, struct tu_bo *bo)
|
||||
{
|
||||
assert(bo->gem_handle);
|
||||
|
||||
u_rwlock_rdlock(&dev->dma_bo_lock);
|
||||
|
||||
if (!p_atomic_dec_zero(&bo->refcnt)) {
|
||||
u_rwlock_rdunlock(&dev->dma_bo_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
if (bo->map)
|
||||
munmap(bo->map, bo->size);
|
||||
|
||||
tu_debug_bos_del(dev, bo);
|
||||
|
||||
mtx_lock(&dev->bo_mutex);
|
||||
dev->bo_count--;
|
||||
dev->bo_list[bo->bo_list_idx] = dev->bo_list[dev->bo_count];
|
||||
|
||||
struct tu_bo* exchanging_bo = tu_device_lookup_bo(dev, dev->bo_list[bo->bo_list_idx].handle);
|
||||
exchanging_bo->bo_list_idx = bo->bo_list_idx;
|
||||
|
||||
if (bo->implicit_sync)
|
||||
dev->implicit_sync_bo_count--;
|
||||
|
||||
mtx_unlock(&dev->bo_mutex);
|
||||
|
||||
if (dev->physical_device->has_set_iova) {
|
||||
mtx_lock(&dev->vma_mutex);
|
||||
struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
|
||||
u_vector_add(&dev->zombie_vmas);
|
||||
vma->gem_handle = bo->gem_handle;
|
||||
#ifdef TU_HAS_VIRTIO
|
||||
vma->res_id = bo->res_id;
|
||||
#endif
|
||||
vma->iova = bo->iova;
|
||||
vma->size = bo->size;
|
||||
vma->fence = p_atomic_read(&dev->queues[0]->fence);
|
||||
|
||||
/* Must be cleared under the VMA mutex, or another thread could race to
|
||||
* reap the VMA, closing the BO and letting a new GEM allocation produce
|
||||
* this handle again.
|
||||
*/
|
||||
memset(bo, 0, sizeof(*bo));
|
||||
mtx_unlock(&dev->vma_mutex);
|
||||
} else {
|
||||
/* Our BO structs are stored in a sparse array in the physical device,
|
||||
* so we don't want to free the BO pointer, instead we want to reset it
|
||||
* to 0, to signal that array entry as being free.
|
||||
*/
|
||||
uint32_t gem_handle = bo->gem_handle;
|
||||
memset(bo, 0, sizeof(*bo));
|
||||
|
||||
/* Note that virtgpu GEM_CLOSE path is a bit different, but it does
|
||||
* not use the !has_set_iova path so we can ignore that
|
||||
*/
|
||||
struct drm_gem_close req = {
|
||||
.handle = gem_handle,
|
||||
};
|
||||
|
||||
drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
|
||||
}
|
||||
|
||||
u_rwlock_rdunlock(&dev->dma_bo_lock);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
tu_syncobj_from_vk_sync(struct vk_sync *sync)
|
||||
{
|
||||
uint32_t syncobj = -1;
|
||||
if (vk_sync_is_tu_timeline_sync(sync)) {
|
||||
syncobj = to_tu_timeline_sync(sync)->syncobj;
|
||||
} else if (vk_sync_type_is_drm_syncobj(sync->type)) {
|
||||
syncobj = vk_sync_as_drm_syncobj(sync)->syncobj;
|
||||
}
|
||||
|
||||
assert(syncobj != -1);
|
||||
|
||||
return syncobj;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
tu_timeline_sync_init(struct vk_device *vk_device,
|
||||
struct vk_sync *vk_sync,
|
||||
uint64_t initial_value)
|
||||
{
|
||||
struct tu_device *device = container_of(vk_device, struct tu_device, vk);
|
||||
struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
|
||||
uint32_t flags = 0;
|
||||
|
||||
assert(device->fd >= 0);
|
||||
|
||||
int err = drmSyncobjCreate(device->fd, flags, &sync->syncobj);
|
||||
|
||||
if (err < 0) {
|
||||
return vk_error(device, VK_ERROR_DEVICE_LOST);
|
||||
}
|
||||
|
||||
sync->state = initial_value ? TU_TIMELINE_SYNC_STATE_SIGNALED :
|
||||
TU_TIMELINE_SYNC_STATE_RESET;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
tu_timeline_sync_finish(struct vk_device *vk_device,
|
||||
struct vk_sync *vk_sync)
|
||||
{
|
||||
struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
|
||||
struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
|
||||
|
||||
assert(dev->fd >= 0);
|
||||
ASSERTED int err = drmSyncobjDestroy(dev->fd, sync->syncobj);
|
||||
assert(err == 0);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
tu_timeline_sync_reset(struct vk_device *vk_device,
|
||||
struct vk_sync *vk_sync)
|
||||
{
|
||||
struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
|
||||
struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
|
||||
|
||||
int err = drmSyncobjReset(dev->fd, &sync->syncobj, 1);
|
||||
if (err) {
|
||||
return vk_errorf(dev, VK_ERROR_UNKNOWN,
|
||||
"DRM_IOCTL_SYNCOBJ_RESET failed: %m");
|
||||
} else {
|
||||
sync->state = TU_TIMELINE_SYNC_STATE_RESET;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
drm_syncobj_wait(struct tu_device *device,
|
||||
uint32_t *handles, uint32_t count_handles,
|
||||
uint64_t timeout_nsec, bool wait_all)
|
||||
{
|
||||
uint32_t syncobj_wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
|
||||
if (wait_all) syncobj_wait_flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
|
||||
|
||||
/* syncobj absolute timeouts are signed. clamp OS_TIMEOUT_INFINITE down. */
|
||||
timeout_nsec = MIN2(timeout_nsec, (uint64_t)INT64_MAX);
|
||||
|
||||
int err = drmSyncobjWait(device->fd, handles,
|
||||
count_handles, timeout_nsec,
|
||||
syncobj_wait_flags,
|
||||
NULL /* first_signaled */);
|
||||
if (err && errno == ETIME) {
|
||||
return VK_TIMEOUT;
|
||||
} else if (err) {
|
||||
return vk_errorf(device, VK_ERROR_UNKNOWN,
|
||||
"DRM_IOCTL_SYNCOBJ_WAIT failed: %m");
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/* Based on anv_bo_sync_wait */
|
||||
static VkResult
|
||||
tu_timeline_sync_wait(struct vk_device *vk_device,
|
||||
uint32_t wait_count,
|
||||
const struct vk_sync_wait *waits,
|
||||
enum vk_sync_wait_flags wait_flags,
|
||||
uint64_t abs_timeout_ns)
|
||||
{
|
||||
struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
|
||||
bool wait_all = !(wait_flags & VK_SYNC_WAIT_ANY);
|
||||
|
||||
uint32_t handles[wait_count];
|
||||
uint32_t submit_count;
|
||||
VkResult ret = VK_SUCCESS;
|
||||
uint32_t pending = wait_count;
|
||||
struct tu_timeline_sync *submitted_syncs[wait_count];
|
||||
|
||||
while (pending) {
|
||||
pending = 0;
|
||||
submit_count = 0;
|
||||
|
||||
for (unsigned i = 0; i < wait_count; ++i) {
|
||||
struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync);
|
||||
|
||||
if (sync->state == TU_TIMELINE_SYNC_STATE_RESET) {
|
||||
assert(!(wait_flags & VK_SYNC_WAIT_PENDING));
|
||||
pending++;
|
||||
} else if (sync->state == TU_TIMELINE_SYNC_STATE_SIGNALED) {
|
||||
if (wait_flags & VK_SYNC_WAIT_ANY)
|
||||
return VK_SUCCESS;
|
||||
} else if (sync->state == TU_TIMELINE_SYNC_STATE_SUBMITTED) {
|
||||
if (!(wait_flags & VK_SYNC_WAIT_PENDING)) {
|
||||
handles[submit_count] = sync->syncobj;
|
||||
submitted_syncs[submit_count++] = sync;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (submit_count > 0) {
|
||||
do {
|
||||
ret = drm_syncobj_wait(dev, handles, submit_count, abs_timeout_ns, wait_all);
|
||||
} while (ret == VK_TIMEOUT && os_time_get_nano() < abs_timeout_ns);
|
||||
|
||||
if (ret == VK_SUCCESS) {
|
||||
for (unsigned i = 0; i < submit_count; ++i) {
|
||||
struct tu_timeline_sync *sync = submitted_syncs[i];
|
||||
sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
|
||||
}
|
||||
} else {
|
||||
/* return error covering timeout */
|
||||
return ret;
|
||||
}
|
||||
} else if (pending > 0) {
|
||||
/* If we've hit this then someone decided to vkWaitForFences before
|
||||
* they've actually submitted any of them to a queue. This is a
|
||||
* fairly pessimal case, so it's ok to lock here and use a standard
|
||||
* pthreads condition variable.
|
||||
*/
|
||||
pthread_mutex_lock(&dev->submit_mutex);
|
||||
|
||||
/* It's possible that some of the fences have changed state since the
|
||||
* last time we checked. Now that we have the lock, check for
|
||||
* pending fences again and don't wait if it's changed.
|
||||
*/
|
||||
uint32_t now_pending = 0;
|
||||
for (uint32_t i = 0; i < wait_count; i++) {
|
||||
struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync);
|
||||
if (sync->state == TU_TIMELINE_SYNC_STATE_RESET)
|
||||
now_pending++;
|
||||
}
|
||||
assert(now_pending <= pending);
|
||||
|
||||
if (now_pending == pending) {
|
||||
struct timespec abstime = {
|
||||
.tv_sec = abs_timeout_ns / NSEC_PER_SEC,
|
||||
.tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
|
||||
};
|
||||
|
||||
ASSERTED int ret;
|
||||
ret = pthread_cond_timedwait(&dev->timeline_cond,
|
||||
&dev->submit_mutex, &abstime);
|
||||
assert(ret != EINVAL);
|
||||
if (os_time_get_nano() >= abs_timeout_ns) {
|
||||
pthread_mutex_unlock(&dev->submit_mutex);
|
||||
return VK_TIMEOUT;
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&dev->submit_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct vk_sync_type tu_timeline_sync_type = {
|
||||
.size = sizeof(struct tu_timeline_sync),
|
||||
.features = (enum vk_sync_features)(
|
||||
VK_SYNC_FEATURE_BINARY | VK_SYNC_FEATURE_GPU_WAIT |
|
||||
VK_SYNC_FEATURE_GPU_MULTI_WAIT | VK_SYNC_FEATURE_CPU_WAIT |
|
||||
VK_SYNC_FEATURE_CPU_RESET | VK_SYNC_FEATURE_WAIT_ANY |
|
||||
VK_SYNC_FEATURE_WAIT_PENDING),
|
||||
.init = tu_timeline_sync_init,
|
||||
.finish = tu_timeline_sync_finish,
|
||||
.reset = tu_timeline_sync_reset,
|
||||
.wait_many = tu_timeline_sync_wait,
|
||||
};
|
||||
64
src/freedreno/vulkan/tu_knl_drm.h
Normal file
64
src/freedreno/vulkan/tu_knl_drm.h
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Copyright © 2018 Google, Inc.
|
||||
* Copyright © 2015 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef TU_KNL_DRM_H
|
||||
#define TU_KNL_DRM_H
|
||||
|
||||
#include "tu_knl.h"
|
||||
#include "drm-uapi/msm_drm.h"
|
||||
|
||||
#include "vk_util.h"
|
||||
|
||||
#include "util/timespec.h"
|
||||
|
||||
enum tu_mem_sync_op {
|
||||
TU_MEM_SYNC_CACHE_TO_GPU,
|
||||
TU_MEM_SYNC_CACHE_FROM_GPU,
|
||||
};
|
||||
|
||||
void
|
||||
tu_sync_cache_bo(struct tu_device *dev,
|
||||
struct tu_bo *bo,
|
||||
VkDeviceSize offset,
|
||||
VkDeviceSize size,
|
||||
enum tu_mem_sync_op op);
|
||||
|
||||
int tu_drm_export_dmabuf(struct tu_device *dev, struct tu_bo *bo);
|
||||
void tu_drm_bo_finish(struct tu_device *dev, struct tu_bo *bo);
|
||||
|
||||
static inline void
|
||||
get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
|
||||
{
|
||||
struct timespec t;
|
||||
clock_gettime(CLOCK_MONOTONIC, &t);
|
||||
tv->tv_sec = t.tv_sec + ns / 1000000000;
|
||||
tv->tv_nsec = t.tv_nsec + ns % 1000000000;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
fence_before(uint32_t a, uint32_t b)
|
||||
{
|
||||
return (int32_t)(a - b) < 0;
|
||||
}
|
||||
|
||||
extern const struct vk_sync_type tu_timeline_sync_type;
|
||||
|
||||
static inline bool
|
||||
vk_sync_is_tu_timeline_sync(const struct vk_sync *sync)
|
||||
{
|
||||
return sync->type == &tu_timeline_sync_type;
|
||||
}
|
||||
|
||||
static inline struct tu_timeline_sync *
|
||||
to_tu_timeline_sync(struct vk_sync *sync)
|
||||
{
|
||||
assert(sync->type == &tu_timeline_sync_type);
|
||||
return container_of(sync, struct tu_timeline_sync, base);
|
||||
}
|
||||
|
||||
uint32_t tu_syncobj_from_vk_sync(struct vk_sync *sync);
|
||||
|
||||
#endif
|
||||
|
|
@ -17,13 +17,12 @@
|
|||
#include "drm-uapi/msm_drm.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/timespec.h"
|
||||
#include "util/os_time.h"
|
||||
|
||||
#include "tu_cmd_buffer.h"
|
||||
#include "tu_cs.h"
|
||||
#include "tu_device.h"
|
||||
#include "tu_dynamic_rendering.h"
|
||||
#include "tu_knl_drm.h"
|
||||
|
||||
struct tu_queue_submit
|
||||
{
|
||||
|
|
@ -260,28 +259,6 @@ tu_gem_info(const struct tu_device *dev, uint32_t gem_handle, uint32_t info)
|
|||
return req.value;
|
||||
}
|
||||
|
||||
enum tu_mem_sync_op
|
||||
{
|
||||
TU_MEM_SYNC_CACHE_TO_GPU,
|
||||
TU_MEM_SYNC_CACHE_FROM_GPU,
|
||||
};
|
||||
|
||||
static void
|
||||
sync_cache_bo(struct tu_device *dev,
|
||||
struct tu_bo *bo,
|
||||
VkDeviceSize offset,
|
||||
VkDeviceSize size,
|
||||
enum tu_mem_sync_op op);
|
||||
|
||||
static inline void
|
||||
get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
|
||||
{
|
||||
struct timespec t;
|
||||
clock_gettime(CLOCK_MONOTONIC, &t);
|
||||
tv->tv_sec = t.tv_sec + ns / 1000000000;
|
||||
tv->tv_nsec = t.tv_nsec + ns % 1000000000;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
tu_wait_fence(struct tu_device *dev,
|
||||
uint32_t queue_id,
|
||||
|
|
@ -601,7 +578,7 @@ msm_bo_init(struct tu_device *dev,
|
|||
*
|
||||
* MSM already does this automatically for uncached (MSM_BO_WC) memory.
|
||||
*/
|
||||
sync_cache_bo(dev, bo, 0, VK_WHOLE_SIZE, TU_MEM_SYNC_CACHE_TO_GPU);
|
||||
tu_sync_cache_bo(dev, bo, 0, VK_WHOLE_SIZE, TU_MEM_SYNC_CACHE_TO_GPU);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
@ -658,16 +635,6 @@ msm_bo_init_dmabuf(struct tu_device *dev,
|
|||
return result;
|
||||
}
|
||||
|
||||
static int
|
||||
msm_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo)
|
||||
{
|
||||
int prime_fd;
|
||||
int ret = drmPrimeHandleToFD(dev->fd, bo->gem_handle,
|
||||
DRM_CLOEXEC | DRM_RDWR, &prime_fd);
|
||||
|
||||
return ret == 0 ? prime_fd : -1;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
msm_bo_map(struct tu_device *dev, struct tu_bo *bo)
|
||||
{
|
||||
|
|
@ -696,353 +663,6 @@ msm_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
|
|||
mtx_unlock(&dev->bo_mutex);
|
||||
}
|
||||
|
||||
static void
|
||||
msm_bo_finish(struct tu_device *dev, struct tu_bo *bo)
|
||||
{
|
||||
assert(bo->gem_handle);
|
||||
|
||||
u_rwlock_rdlock(&dev->dma_bo_lock);
|
||||
|
||||
if (!p_atomic_dec_zero(&bo->refcnt)) {
|
||||
u_rwlock_rdunlock(&dev->dma_bo_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
if (bo->map)
|
||||
munmap(bo->map, bo->size);
|
||||
|
||||
tu_debug_bos_del(dev, bo);
|
||||
|
||||
mtx_lock(&dev->bo_mutex);
|
||||
dev->bo_count--;
|
||||
dev->bo_list[bo->bo_list_idx] = dev->bo_list[dev->bo_count];
|
||||
|
||||
struct tu_bo* exchanging_bo = tu_device_lookup_bo(dev, dev->bo_list[bo->bo_list_idx].handle);
|
||||
exchanging_bo->bo_list_idx = bo->bo_list_idx;
|
||||
|
||||
if (bo->implicit_sync)
|
||||
dev->implicit_sync_bo_count--;
|
||||
|
||||
mtx_unlock(&dev->bo_mutex);
|
||||
|
||||
if (dev->physical_device->has_set_iova) {
|
||||
mtx_lock(&dev->vma_mutex);
|
||||
struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
|
||||
u_vector_add(&dev->zombie_vmas);
|
||||
vma->gem_handle = bo->gem_handle;
|
||||
vma->iova = bo->iova;
|
||||
vma->size = bo->size;
|
||||
vma->fence = p_atomic_read(&dev->queues[0]->fence);
|
||||
|
||||
/* Must be cleared under the VMA mutex, or another thread could race to
|
||||
* reap the VMA, closing the BO and letting a new GEM allocation produce
|
||||
* this handle again.
|
||||
*/
|
||||
memset(bo, 0, sizeof(*bo));
|
||||
mtx_unlock(&dev->vma_mutex);
|
||||
} else {
|
||||
/* Our BO structs are stored in a sparse array in the physical device,
|
||||
* so we don't want to free the BO pointer, instead we want to reset it
|
||||
* to 0, to signal that array entry as being free.
|
||||
*/
|
||||
uint32_t gem_handle = bo->gem_handle;
|
||||
memset(bo, 0, sizeof(*bo));
|
||||
|
||||
tu_gem_close(dev, gem_handle);
|
||||
}
|
||||
|
||||
u_rwlock_rdunlock(&dev->dma_bo_lock);
|
||||
}
|
||||
|
||||
static inline void
|
||||
tu_sync_cacheline_to_gpu(void const *p __attribute__((unused)))
|
||||
{
|
||||
#if DETECT_ARCH_AARCH64
|
||||
/* Clean data cache. */
|
||||
__asm volatile("dc cvac, %0" : : "r" (p) : "memory");
|
||||
#elif (DETECT_ARCH_X86 || DETECT_ARCH_X86_64)
|
||||
__builtin_ia32_clflush(p);
|
||||
#elif DETECT_ARCH_ARM
|
||||
/* DCCMVAC - same as DC CVAC on aarch64.
|
||||
* Seems to be illegal to call from userspace.
|
||||
*/
|
||||
//__asm volatile("mcr p15, 0, %0, c7, c10, 1" : : "r" (p) : "memory");
|
||||
unreachable("Cache line clean is unsupported on ARMv7");
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void
|
||||
tu_sync_cacheline_from_gpu(void const *p __attribute__((unused)))
|
||||
{
|
||||
#if DETECT_ARCH_AARCH64
|
||||
/* Clean and Invalidate data cache, there is no separate Invalidate. */
|
||||
__asm volatile("dc civac, %0" : : "r" (p) : "memory");
|
||||
#elif (DETECT_ARCH_X86 || DETECT_ARCH_X86_64)
|
||||
__builtin_ia32_clflush(p);
|
||||
#elif DETECT_ARCH_ARM
|
||||
/* DCCIMVAC - same as DC CIVAC on aarch64.
|
||||
* Seems to be illegal to call from userspace.
|
||||
*/
|
||||
//__asm volatile("mcr p15, 0, %0, c7, c14, 1" : : "r" (p) : "memory");
|
||||
unreachable("Cache line invalidate is unsupported on ARMv7");
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
sync_cache_bo(struct tu_device *dev,
|
||||
struct tu_bo *bo,
|
||||
VkDeviceSize offset,
|
||||
VkDeviceSize size,
|
||||
enum tu_mem_sync_op op)
|
||||
{
|
||||
uintptr_t level1_dcache_size = dev->physical_device->level1_dcache_size;
|
||||
char *start = (char *) bo->map + offset;
|
||||
char *end = start + (size == VK_WHOLE_SIZE ? (bo->size - offset) : size);
|
||||
|
||||
start = (char *) ((uintptr_t) start & ~(level1_dcache_size - 1));
|
||||
|
||||
for (; start < end; start += level1_dcache_size) {
|
||||
if (op == TU_MEM_SYNC_CACHE_TO_GPU) {
|
||||
tu_sync_cacheline_to_gpu(start);
|
||||
} else {
|
||||
tu_sync_cacheline_from_gpu(start);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static VkResult
|
||||
sync_cache(VkDevice _device,
|
||||
enum tu_mem_sync_op op,
|
||||
uint32_t count,
|
||||
const VkMappedMemoryRange *ranges)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_device, device, _device);
|
||||
|
||||
if (!device->physical_device->has_cached_non_coherent_memory) {
|
||||
tu_finishme(
|
||||
"data cache clean and invalidation are unsupported on this arch!");
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
TU_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory);
|
||||
sync_cache_bo(device, mem->bo, ranges[i].offset, ranges[i].size, op);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
tu_FlushMappedMemoryRanges(VkDevice _device,
|
||||
uint32_t memoryRangeCount,
|
||||
const VkMappedMemoryRange *pMemoryRanges)
|
||||
{
|
||||
return sync_cache(_device, TU_MEM_SYNC_CACHE_TO_GPU, memoryRangeCount,
|
||||
pMemoryRanges);
|
||||
}
|
||||
|
||||
VkResult
|
||||
tu_InvalidateMappedMemoryRanges(VkDevice _device,
|
||||
uint32_t memoryRangeCount,
|
||||
const VkMappedMemoryRange *pMemoryRanges)
|
||||
{
|
||||
return sync_cache(_device, TU_MEM_SYNC_CACHE_FROM_GPU, memoryRangeCount,
|
||||
pMemoryRanges);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
vk_sync_is_tu_timeline_sync(const struct vk_sync *sync);
|
||||
static struct tu_timeline_sync *
|
||||
to_tu_timeline_sync(struct vk_sync *sync);
|
||||
|
||||
static uint32_t
|
||||
tu_syncobj_from_vk_sync(struct vk_sync *sync)
|
||||
{
|
||||
uint32_t syncobj = -1;
|
||||
if (vk_sync_is_tu_timeline_sync(sync)) {
|
||||
syncobj = to_tu_timeline_sync(sync)->syncobj;
|
||||
} else if (vk_sync_type_is_drm_syncobj(sync->type)) {
|
||||
syncobj = vk_sync_as_drm_syncobj(sync)->syncobj;
|
||||
}
|
||||
|
||||
assert(syncobj != -1);
|
||||
|
||||
return syncobj;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
tu_timeline_sync_init(struct vk_device *vk_device,
|
||||
struct vk_sync *vk_sync,
|
||||
uint64_t initial_value)
|
||||
{
|
||||
struct tu_device *device = container_of(vk_device, struct tu_device, vk);
|
||||
struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
|
||||
uint32_t flags = 0;
|
||||
|
||||
assert(device->fd >= 0);
|
||||
|
||||
int err = drmSyncobjCreate(device->fd, flags, &sync->syncobj);
|
||||
|
||||
if (err < 0) {
|
||||
return vk_error(device, VK_ERROR_DEVICE_LOST);
|
||||
}
|
||||
|
||||
sync->state = initial_value ? TU_TIMELINE_SYNC_STATE_SIGNALED :
|
||||
TU_TIMELINE_SYNC_STATE_RESET;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
tu_timeline_sync_finish(struct vk_device *vk_device,
|
||||
struct vk_sync *vk_sync)
|
||||
{
|
||||
struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
|
||||
struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
|
||||
|
||||
assert(dev->fd >= 0);
|
||||
ASSERTED int err = drmSyncobjDestroy(dev->fd, sync->syncobj);
|
||||
assert(err == 0);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
tu_timeline_sync_reset(struct vk_device *vk_device,
|
||||
struct vk_sync *vk_sync)
|
||||
{
|
||||
struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
|
||||
struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
|
||||
|
||||
int err = drmSyncobjReset(dev->fd, &sync->syncobj, 1);
|
||||
if (err) {
|
||||
return vk_errorf(dev, VK_ERROR_UNKNOWN,
|
||||
"DRM_IOCTL_SYNCOBJ_RESET failed: %m");
|
||||
} else {
|
||||
sync->state = TU_TIMELINE_SYNC_STATE_RESET;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
drm_syncobj_wait(struct tu_device *device,
|
||||
uint32_t *handles, uint32_t count_handles,
|
||||
uint64_t timeout_nsec, bool wait_all)
|
||||
{
|
||||
uint32_t syncobj_wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
|
||||
if (wait_all) syncobj_wait_flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
|
||||
|
||||
/* syncobj absolute timeouts are signed. clamp OS_TIMEOUT_INFINITE down. */
|
||||
timeout_nsec = MIN2(timeout_nsec, (uint64_t)INT64_MAX);
|
||||
|
||||
int err = drmSyncobjWait(device->fd, handles,
|
||||
count_handles, timeout_nsec,
|
||||
syncobj_wait_flags,
|
||||
NULL /* first_signaled */);
|
||||
if (err && errno == ETIME) {
|
||||
return VK_TIMEOUT;
|
||||
} else if (err) {
|
||||
return vk_errorf(device, VK_ERROR_UNKNOWN,
|
||||
"DRM_IOCTL_SYNCOBJ_WAIT failed: %m");
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/* Based on anv_bo_sync_wait */
|
||||
static VkResult
|
||||
tu_timeline_sync_wait(struct vk_device *vk_device,
|
||||
uint32_t wait_count,
|
||||
const struct vk_sync_wait *waits,
|
||||
enum vk_sync_wait_flags wait_flags,
|
||||
uint64_t abs_timeout_ns)
|
||||
{
|
||||
struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
|
||||
bool wait_all = !(wait_flags & VK_SYNC_WAIT_ANY);
|
||||
|
||||
uint32_t handles[wait_count];
|
||||
uint32_t submit_count;
|
||||
VkResult ret = VK_SUCCESS;
|
||||
uint32_t pending = wait_count;
|
||||
struct tu_timeline_sync *submitted_syncs[wait_count];
|
||||
|
||||
while (pending) {
|
||||
pending = 0;
|
||||
submit_count = 0;
|
||||
|
||||
for (unsigned i = 0; i < wait_count; ++i) {
|
||||
struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync);
|
||||
|
||||
if (sync->state == TU_TIMELINE_SYNC_STATE_RESET) {
|
||||
assert(!(wait_flags & VK_SYNC_WAIT_PENDING));
|
||||
pending++;
|
||||
} else if (sync->state == TU_TIMELINE_SYNC_STATE_SIGNALED) {
|
||||
if (wait_flags & VK_SYNC_WAIT_ANY)
|
||||
return VK_SUCCESS;
|
||||
} else if (sync->state == TU_TIMELINE_SYNC_STATE_SUBMITTED) {
|
||||
if (!(wait_flags & VK_SYNC_WAIT_PENDING)) {
|
||||
handles[submit_count] = sync->syncobj;
|
||||
submitted_syncs[submit_count++] = sync;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (submit_count > 0) {
|
||||
do {
|
||||
ret = drm_syncobj_wait(dev, handles, submit_count, abs_timeout_ns, wait_all);
|
||||
} while (ret == VK_TIMEOUT && os_time_get_nano() < abs_timeout_ns);
|
||||
|
||||
if (ret == VK_SUCCESS) {
|
||||
for (unsigned i = 0; i < submit_count; ++i) {
|
||||
struct tu_timeline_sync *sync = submitted_syncs[i];
|
||||
sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
|
||||
}
|
||||
} else {
|
||||
/* return error covering timeout */
|
||||
return ret;
|
||||
}
|
||||
} else if (pending > 0) {
|
||||
/* If we've hit this then someone decided to vkWaitForFences before
|
||||
* they've actually submitted any of them to a queue. This is a
|
||||
* fairly pessimal case, so it's ok to lock here and use a standard
|
||||
* pthreads condition variable.
|
||||
*/
|
||||
pthread_mutex_lock(&dev->submit_mutex);
|
||||
|
||||
/* It's possible that some of the fences have changed state since the
|
||||
* last time we checked. Now that we have the lock, check for
|
||||
* pending fences again and don't wait if it's changed.
|
||||
*/
|
||||
uint32_t now_pending = 0;
|
||||
for (uint32_t i = 0; i < wait_count; i++) {
|
||||
struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync);
|
||||
if (sync->state == TU_TIMELINE_SYNC_STATE_RESET)
|
||||
now_pending++;
|
||||
}
|
||||
assert(now_pending <= pending);
|
||||
|
||||
if (now_pending == pending) {
|
||||
struct timespec abstime = {
|
||||
.tv_sec = abs_timeout_ns / NSEC_PER_SEC,
|
||||
.tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
|
||||
};
|
||||
|
||||
ASSERTED int ret;
|
||||
ret = pthread_cond_timedwait(&dev->timeline_cond,
|
||||
&dev->submit_mutex, &abstime);
|
||||
assert(ret != EINVAL);
|
||||
if (os_time_get_nano() >= abs_timeout_ns) {
|
||||
pthread_mutex_unlock(&dev->submit_mutex);
|
||||
return VK_TIMEOUT;
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&dev->submit_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
tu_queue_submit_create_locked(struct tu_queue *queue,
|
||||
struct vk_queue_submit *vk_submit,
|
||||
|
|
@ -1418,40 +1038,14 @@ static const struct tu_knl msm_knl_funcs = {
|
|||
.submitqueue_close = msm_submitqueue_close,
|
||||
.bo_init = msm_bo_init,
|
||||
.bo_init_dmabuf = msm_bo_init_dmabuf,
|
||||
.bo_export_dmabuf = msm_bo_export_dmabuf,
|
||||
.bo_export_dmabuf = tu_drm_export_dmabuf,
|
||||
.bo_map = msm_bo_map,
|
||||
.bo_allow_dump = msm_bo_allow_dump,
|
||||
.bo_finish = msm_bo_finish,
|
||||
.bo_finish = tu_drm_bo_finish,
|
||||
.device_wait_u_trace = msm_device_wait_u_trace,
|
||||
.queue_submit = msm_queue_submit,
|
||||
};
|
||||
|
||||
static const struct vk_sync_type tu_timeline_sync_type = {
|
||||
.size = sizeof(struct tu_timeline_sync),
|
||||
.features = (enum vk_sync_features)(
|
||||
VK_SYNC_FEATURE_BINARY | VK_SYNC_FEATURE_GPU_WAIT |
|
||||
VK_SYNC_FEATURE_GPU_MULTI_WAIT | VK_SYNC_FEATURE_CPU_WAIT |
|
||||
VK_SYNC_FEATURE_CPU_RESET | VK_SYNC_FEATURE_WAIT_ANY |
|
||||
VK_SYNC_FEATURE_WAIT_PENDING),
|
||||
.init = tu_timeline_sync_init,
|
||||
.finish = tu_timeline_sync_finish,
|
||||
.reset = tu_timeline_sync_reset,
|
||||
.wait_many = tu_timeline_sync_wait,
|
||||
};
|
||||
|
||||
static inline bool
|
||||
vk_sync_is_tu_timeline_sync(const struct vk_sync *sync)
|
||||
{
|
||||
return sync->type == &tu_timeline_sync_type;
|
||||
}
|
||||
|
||||
static struct tu_timeline_sync *
|
||||
to_tu_timeline_sync(struct vk_sync *sync)
|
||||
{
|
||||
assert(sync->type == &tu_timeline_sync_type);
|
||||
return container_of(sync, struct tu_timeline_sync, base);
|
||||
}
|
||||
|
||||
VkResult
|
||||
tu_knl_drm_msm_load(struct tu_instance *instance,
|
||||
int fd, struct _drmVersion *version,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue