From b20ff5ce1412ed926d6e5c0ddb39542f81e45af4 Mon Sep 17 00:00:00 2001 From: anonymix007 <48598263+anonymix007@users.noreply.github.com> Date: Sun, 30 Nov 2025 18:06:48 +0300 Subject: [PATCH 01/15] vulkan: Add VkD3DDDI* strusture definitions These are required for implementing Direct3D usermode drivers on top of Vulkan. D3D runtime provides callbacks and handles and expects drivers to use those instead of directly calling D3DKMT* functions --- include/vulkan/vulkan_d3dddi.h | 53 ++++++++++++++++++++++++++++ src/virtio/vulkan/vn_device_memory.c | 1 + 2 files changed, 54 insertions(+) create mode 100644 include/vulkan/vulkan_d3dddi.h diff --git a/include/vulkan/vulkan_d3dddi.h b/include/vulkan/vulkan_d3dddi.h new file mode 100644 index 00000000000..cfae046a1af --- /dev/null +++ b/include/vulkan/vulkan_d3dddi.h @@ -0,0 +1,53 @@ +#ifndef VULKAN_D3DDDI_H_ +#define VULKAN_D3DDDI_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#define VK_STRUCTURE_TYPE_D3DDDI_CALLBACKS ((VkStructureType)4281808695u) +#define VK_STRUCTURE_TYPE_D3DDDI_CREATE_RESOURCE ((VkStructureType)4281808696u) +#define VK_STRUCTURE_TYPE_D3DDDI_OPEN_RESOURCE ((VkStructureType)4281808697u) + +typedef struct { + VkStructureType sType; + void *pNext; + + LUID AdapterLuid; + + HANDLE hRTAdapter; // in: Runtime handle + HANDLE hRTDevice; // in: Runtime handle + const D3DDDI_ADAPTERCALLBACKS *pAdapterCallbacks; // in: Pointer to runtime callbacks that invoke kernel + const D3DDDI_DEVICECALLBACKS *pKTCallbacks; // in: Pointer to runtime callbacks that invoke kernel + const DXGI_DDI_BASE_CALLBACKS *pDXGIBaseCallbacks; // in: The driver should record this pointer for later use + + D3D10DDI_HRTCORELAYER hRTCoreLayer; // in: CoreLayer handle + const D3D11DDI_CORELAYER_DEVICECALLBACKS* p11UMCallbacks; // in: callbacks that stay in usermode + + HANDLE hContext; // out: Context handle +} VkD3DDDICallbacks; + +typedef struct { + VkStructureType sType; + void *pNext; + HANDLE hRTResource; + const D3D10DDIARG_CREATERESOURCE *pCreateResource; +} VkD3DDDICreateResource; + +typedef struct { + VkStructureType sType; + void *pNext; + HANDLE hRTResource; + const D3D10DDIARG_OPENRESOURCE *pOpenResource; + const void *pResourceInfo; /* VIOGPU_RES_INFO_REQ */ +} VkD3DDDIOpenResource; + +#define VK_STRUCTURE_TYPE_D3DDDI_CALLBACKS_cast VkD3DDDICallbacks +#define VK_STRUCTURE_TYPE_D3DDDI_CREATE_RESOURCE_cast VkD3DDDICreateResource +#define VK_STRUCTURE_TYPE_D3DDDI_OPEN_RESOURCE_cast VkD3DDDIOpenResource + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/virtio/vulkan/vn_device_memory.c b/src/virtio/vulkan/vn_device_memory.c index 213830aa9be..0330463ccf7 100644 --- a/src/virtio/vulkan/vn_device_memory.c +++ b/src/virtio/vulkan/vn_device_memory.c @@ -304,6 +304,7 @@ vn_device_memory_alloc(struct vn_device *dev, alloc_info = vn_device_memory_fix_alloc_info( alloc_info, renderer_handle_type, has_guest_vram, &local_info); + // FIXME: this is slightly wrong for Windows /* ensure correct blob flags */ mem_vk->export_handle_types = renderer_handle_type; } From e189e78d8c4d32fa28107bccb13c241ef74ef9c0 Mon Sep 17 00:00:00 2001 From: anonymix007 <48598263+anonymix007@users.noreply.github.com> Date: Sun, 30 Nov 2025 18:11:03 +0300 Subject: [PATCH 02/15] vulkan/runtime: Implement VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_KHR --- src/vulkan/runtime/vk_device.c | 9 +++++++++ src/vulkan/runtime/vk_physical_device.c | 1 + 2 files changed, 10 insertions(+) diff --git a/src/vulkan/runtime/vk_device.c b/src/vulkan/runtime/vk_device.c index a2ffe734ff9..34092431fdd 100644 --- a/src/vulkan/runtime/vk_device.c +++ b/src/vulkan/runtime/vk_device.c @@ -248,6 +248,7 @@ vk_device_init(struct vk_device *device, device->enabled_extensions.EXT_calibrated_timestamps) { /* sorted by preference */ const VkTimeDomainKHR calibrate_domains[] = { + VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_KHR, VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR, VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR, }; @@ -840,6 +841,14 @@ vk_device_get_timestamp(struct vk_device *device, VkTimeDomainKHR domain, return VK_SUCCESS; fail: +#else + if (domain == VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_KHR) { + LARGE_INTEGER ts; + if (QueryPerformanceCounter(&ts)) { + *timestamp = ts.QuadPart; + return VK_SUCCESS; + } + } #endif /* _WIN32 */ return VK_ERROR_FEATURE_NOT_PRESENT; } diff --git a/src/vulkan/runtime/vk_physical_device.c b/src/vulkan/runtime/vk_physical_device.c index ad11a03810a..83f358e112c 100644 --- a/src/vulkan/runtime/vk_physical_device.c +++ b/src/vulkan/runtime/vk_physical_device.c @@ -297,6 +297,7 @@ vk_common_GetPhysicalDeviceCalibrateableTimeDomainsKHR( const VkTimeDomainKHR host_time_domains[] = { VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR, VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR, + VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_KHR, }; for (uint32_t i = 0; i < ARRAY_SIZE(host_time_domains); i++) { const VkTimeDomainKHR domain = host_time_domains[i]; From d812c7e0855c517c9ecfa6a689cbd9fbcb27e23d Mon Sep 17 00:00:00 2001 From: anonymix007 <48598263+anonymix007@users.noreply.github.com> Date: Sun, 30 Nov 2025 18:13:05 +0300 Subject: [PATCH 03/15] [FIXME] vulkan/wsi: Workaround failed assertions when running DXVK --- src/vulkan/wsi/wsi_common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c index 44367b6b80b..10669f8dfba 100644 --- a/src/vulkan/wsi/wsi_common.c +++ b/src/vulkan/wsi/wsi_common.c @@ -3257,7 +3257,8 @@ wsi_WaitForPresentKHR(VkDevice device, VkSwapchainKHR _swapchain, uint64_t presentId, uint64_t timeout) { VK_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain); - assert(swapchain->wait_for_present); + //assert(swapchain->wait_for_present); + if (!swapchain->wait_for_present) return VK_SUCCESS; return swapchain->wait_for_present(swapchain, presentId, timeout); } @@ -3266,7 +3267,8 @@ wsi_WaitForPresent2KHR(VkDevice device, VkSwapchainKHR _swapchain, const VkPresentWait2InfoKHR *info) { VK_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain); - assert(swapchain->wait_for_present2); + //assert(swapchain->wait_for_present2); + if (!swapchain->wait_for_present2) return VK_SUCCESS; return swapchain->wait_for_present2(swapchain, info->presentId, info->timeout); } From fa534d0e036a3802acf59a92a263f6ecf7629d3b Mon Sep 17 00:00:00 2001 From: anonymix007 <48598263+anonymix007@users.noreply.github.com> Date: Sun, 30 Nov 2025 18:14:18 +0300 Subject: [PATCH 04/15] include/winddk: Fix .gitignore for case-sensitive filesystems d3d10TokenizedProgramFormat.hpp and d3d11TokenizedProgramFormat.hpp have mixed-case naming in latest EWDK --- include/winddk/.gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/winddk/.gitignore b/include/winddk/.gitignore index 14e87aa12db..fcc125cfd7c 100644 --- a/include/winddk/.gitignore +++ b/include/winddk/.gitignore @@ -1,6 +1,8 @@ d3d10tokenizedprogramformat.hpp +d3d10TokenizedProgramFormat.hpp d3d10umddi.h d3d11tokenizedprogramformat.hpp +d3d11TokenizedProgramFormat.hpp d3dkmddi.h d3dkmdt.h d3dkmthk.h From a362bcfe70a0256317893463f1e929c72e181a8f Mon Sep 17 00:00:00 2001 From: anonymix007 <48598263+anonymix007@users.noreply.github.com> Date: Sun, 30 Nov 2025 18:17:20 +0300 Subject: [PATCH 05/15] venus: print file and line in vn_log_result Useful for debugging to quickly pin-point the exact place where error is returned --- src/virtio/vulkan/vn_common.c | 4 +++- src/virtio/vulkan/vn_common.h | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/virtio/vulkan/vn_common.c b/src/virtio/vulkan/vn_common.c index 2b1aa74683e..d34f5230a32 100644 --- a/src/virtio/vulkan/vn_common.c +++ b/src/virtio/vulkan/vn_common.c @@ -103,9 +103,11 @@ vn_log(struct vn_instance *instance, const char *format, ...) VkResult vn_log_result(struct vn_instance *instance, VkResult result, + const char *file, + int line, const char *where) { - vn_log(instance, "%s: %s", where, vk_Result_to_str(result)); + vn_log(instance, "%s:%d: %s: %s", file, line, where, vk_Result_to_str(result)); return result; } diff --git a/src/virtio/vulkan/vn_common.h b/src/virtio/vulkan/vn_common.h index 25bd012f24f..ca525c9f50c 100644 --- a/src/virtio/vulkan/vn_common.h +++ b/src/virtio/vulkan/vn_common.h @@ -66,7 +66,7 @@ #define VN_PERF(category) (unlikely(vn_env.perf & VN_PERF_##category)) #define vn_error(instance, error) \ - (VN_DEBUG(RESULT) ? vn_log_result((instance), (error), __func__) : (error)) + (VN_DEBUG(RESULT) ? vn_log_result((instance), (error), __FILE__, __LINE__, __func__) : (error)) #define vn_result(instance, result) \ ((result) >= VK_SUCCESS ? (result) : vn_error((instance), (result))) @@ -326,6 +326,8 @@ vn_log(struct vn_instance *instance, const char *format, ...) VkResult vn_log_result(struct vn_instance *instance, VkResult result, + const char *file, + int line, const char *where); #define VN_REFCOUNT_INIT(val) \ From eb670ef70af2d3bf3f35ebdfccb868698058b442 Mon Sep 17 00:00:00 2001 From: anonymix007 <48598263+anonymix007@users.noreply.github.com> Date: Sun, 30 Nov 2025 18:24:09 +0300 Subject: [PATCH 06/15] venus: Expose VkInstanceCreateInfo to vn_renderer_create --- src/virtio/vulkan/vn_instance.c | 6 +++--- src/virtio/vulkan/vn_renderer.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/virtio/vulkan/vn_instance.c b/src/virtio/vulkan/vn_instance.c index 8a3f9af873a..5ec19337dbf 100644 --- a/src/virtio/vulkan/vn_instance.c +++ b/src/virtio/vulkan/vn_instance.c @@ -167,11 +167,11 @@ vn_instance_init_ring(struct vn_instance *instance) } static VkResult -vn_instance_init_renderer(struct vn_instance *instance) +vn_instance_init_renderer(struct vn_instance *instance, const VkInstanceCreateInfo *pCreateInfo) { const VkAllocationCallbacks *alloc = &instance->base.vk.alloc; - VkResult result = vn_renderer_create(instance, alloc, &instance->renderer); + VkResult result = vn_renderer_create(instance, alloc, pCreateInfo, &instance->renderer); if (result != VK_SUCCESS) return result; @@ -314,7 +314,7 @@ vn_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, goto out_mtx_destroy; } - result = vn_instance_init_renderer(instance); + result = vn_instance_init_renderer(instance, pCreateInfo); if (result == VK_ERROR_INITIALIZATION_FAILED) { assert(!instance->renderer); *pInstance = instance_handle; diff --git a/src/virtio/vulkan/vn_renderer.h b/src/virtio/vulkan/vn_renderer.h index 08f34df0454..a5372761761 100644 --- a/src/virtio/vulkan/vn_renderer.h +++ b/src/virtio/vulkan/vn_renderer.h @@ -244,6 +244,7 @@ vn_renderer_create_vtest(struct vn_instance *instance, static inline VkResult vn_renderer_create(struct vn_instance *instance, const VkAllocationCallbacks *alloc, + const VkInstanceCreateInfo *pCreateInfo, struct vn_renderer **renderer) { #ifdef HAVE_LIBDRM From 64c6a5db45e83c268887807b3b84c9f17689148c Mon Sep 17 00:00:00 2001 From: anonymix007 <48598263+anonymix007@users.noreply.github.com> Date: Sun, 30 Nov 2025 18:39:09 +0300 Subject: [PATCH 07/15] venus: Filter Windows extensions from host renderer list --- src/virtio/vulkan/vn_device.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/virtio/vulkan/vn_device.c b/src/virtio/vulkan/vn_device.c index 35796c6b398..e2f9426d72e 100644 --- a/src/virtio/vulkan/vn_device.c +++ b/src/virtio/vulkan/vn_device.c @@ -196,6 +196,12 @@ find_extension_names(const char *const *exts, uint32_t ext_count, const char *name) { +#ifdef VK_USE_PLATFORM_WIN32_KHR + /* Filter win32 extensions as they're fully implemented in the driver */ + if (strstr(name, "win32")) + return true; +#endif + for (uint32_t i = 0; i < ext_count; i++) { if (!strcmp(exts[i], name)) return true; From 08b16f030b00469ff47a213ca487ec6f81770a9f Mon Sep 17 00:00:00 2001 From: anonymix007 <48598263+anonymix007@users.noreply.github.com> Date: Sun, 30 Nov 2025 18:41:20 +0300 Subject: [PATCH 08/15] venus: Introduce VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE --- src/virtio/vulkan/vn_queue.c | 50 +++++++++++++++++++++++++++++++++--- src/virtio/vulkan/vn_queue.h | 10 ++++++++ 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/src/virtio/vulkan/vn_queue.c b/src/virtio/vulkan/vn_queue.c index 9756142fd58..145f8d3e585 100644 --- a/src/virtio/vulkan/vn_queue.c +++ b/src/virtio/vulkan/vn_queue.c @@ -25,6 +25,21 @@ #include "vn_renderer.h" #include "vn_wsi.h" +#ifdef VK_USE_PLATFORM_WIN32_KHR +#include +static inline VkResult sync_wait_handle(void *handle, int timeout) +{ + switch (WaitForSingleObject(handle, timeout)) { + case WAIT_OBJECT_0: + return VK_SUCCESS; + case WAIT_TIMEOUT: + return VK_NOT_READY; + default: + return VK_ERROR_DEVICE_LOST; + } +} +#endif + /* queue commands */ struct vn_submit_info_pnext_fix { @@ -398,7 +413,11 @@ vn_queue_submission_fix_batch_semaphores(struct vn_queue_submission *submit, struct vn_semaphore *sem = vn_semaphore_from_handle(sem_handle); const struct vn_sync_payload *payload = sem->payload; +#ifdef VK_USE_PLATFORM_WIN32_KHR + if (payload->type != VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE) +#else if (payload->type != VN_SYNC_TYPE_IMPORTED_SYNC_FD) +#endif continue; if (!vn_semaphore_wait_external(dev, sem)) @@ -1539,8 +1558,13 @@ static void vn_sync_payload_release(UNUSED struct vn_device *dev, struct vn_sync_payload *payload) { - if (payload->type == VN_SYNC_TYPE_IMPORTED_SYNC_FD && payload->fd >= 0) +#ifdef VK_USE_PLATFORM_WIN32_KHR + if (payload->type == VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE && payload->handle != NULL) + CloseHandle(payload->handle); +#else + if (payload->type == VN_SYNC_TYPE_IMPORTED_SYNC_FD && is_fd_valid(payload->fd)) close(payload->fd); +#endif payload->type = VN_SYNC_TYPE_INVALID; } @@ -1763,12 +1787,19 @@ vn_GetFenceStatus(VkDevice device, VkFence _fence) result = vn_call_vkGetFenceStatus(dev->primary_ring, device, _fence); } break; +#ifdef VK_USE_PLATFORM_WIN32_KHR + case VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE: + //vn_log(dev->instance, "waiting for handle %p", payload->handle); + result = payload->handle != NULL ? sync_wait_handle(payload->handle, 0) : VK_SUCCESS; + break; +#else case VN_SYNC_TYPE_IMPORTED_SYNC_FD: - if (payload->fd < 0 || sync_wait(payload->fd, 0) == 0) + if (!is_fd_valid(payload->fd) || sync_wait(payload->fd, 0) == 0) result = VK_SUCCESS; else result = errno == ETIME ? VK_NOT_READY : VK_ERROR_DEVICE_LOST; break; +#endif default: UNREACHABLE("unexpected fence payload type"); break; @@ -1911,7 +1942,7 @@ vn_create_sync_file(struct vn_device *dev, *out_fd = vn_renderer_sync_export_syncobj(dev->renderer, sync, true); vn_renderer_sync_destroy(dev->renderer, sync); - return *out_fd >= 0 ? VK_SUCCESS : VK_ERROR_TOO_MANY_OBJECTS; + return is_fd_valid(*out_fd) ? VK_SUCCESS : VK_ERROR_TOO_MANY_OBJECTS; } static inline bool @@ -1920,7 +1951,7 @@ vn_sync_valid_fd(int fd) /* the special value -1 for fd is treated like a valid sync file descriptor * referring to an object that has already signaled */ - return (fd >= 0 && sync_valid_fd(fd)) || fd == -1; + return (is_fd_valid(fd) && sync_valid_fd(fd)) || fd == -1; } VKAPI_ATTR VkResult VKAPI_CALL @@ -2015,12 +2046,23 @@ vn_semaphore_wait_external(struct vn_device *dev, struct vn_semaphore *sem) { struct vn_sync_payload *temp = &sem->temporary; + +#ifdef VK_USE_PLATFORM_WIN32_KHR + assert(temp->type == VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE); + + if (temp->handle != NULL) { + //vn_log(dev->instance, "waiting for handle %p", temp->handle); + if (sync_wait_handle(temp->handle, INFINITE) != VK_SUCCESS) + return false; + } +#else assert(temp->type == VN_SYNC_TYPE_IMPORTED_SYNC_FD); if (temp->fd >= 0) { if (sync_wait(temp->fd, -1)) return false; } +#endif vn_sync_payload_release(dev, &sem->temporary); sem->payload = &sem->permanent; diff --git a/src/virtio/vulkan/vn_queue.h b/src/virtio/vulkan/vn_queue.h index d4baefba062..75682054fa4 100644 --- a/src/virtio/vulkan/vn_queue.h +++ b/src/virtio/vulkan/vn_queue.h @@ -72,15 +72,25 @@ enum vn_sync_type { /* device object */ VN_SYNC_TYPE_DEVICE_ONLY, +#ifndef VK_USE_PLATFORM_WIN32_KHR /* payload is an imported sync file */ VN_SYNC_TYPE_IMPORTED_SYNC_FD, +#else + /* payload is an imported Win32 event handle */ + VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE, +#endif }; struct vn_sync_payload { enum vn_sync_type type; +#ifndef VK_USE_PLATFORM_WIN32_KHR /* If type is VN_SYNC_TYPE_IMPORTED_SYNC_FD, fd is a sync file. */ int fd; +#else + /* If type is VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE, fd is a Win32 event handle. */ + void *handle; +#endif }; /* For external fences and external semaphores submitted to be signaled. The From beb0e6a135c9bff953741c0c2fd8e0013742a331 Mon Sep 17 00:00:00 2001 From: anonymix007 <48598263+anonymix007@users.noreply.github.com> Date: Sun, 30 Nov 2025 18:44:27 +0300 Subject: [PATCH 09/15] venus: Implement VK_KHR_external_semaphore_win32 and VK_KHR_external_fence_win32 --- src/virtio/vulkan/vn_device.c | 11 ++ src/virtio/vulkan/vn_physical_device.c | 8 + src/virtio/vulkan/vn_queue.c | 214 +++++++++++++++++++++++++ src/virtio/vulkan/vn_renderer.h | 30 ++++ 4 files changed, 263 insertions(+) diff --git a/src/virtio/vulkan/vn_device.c b/src/virtio/vulkan/vn_device.c index e2f9426d72e..d5accf59e6c 100644 --- a/src/virtio/vulkan/vn_device.c +++ b/src/virtio/vulkan/vn_device.c @@ -376,6 +376,17 @@ vn_device_fix_create_info(const struct vn_device *dev, block_exts[block_count++] = VK_EXT_PCI_BUS_INFO_EXTENSION_NAME; } +#ifdef VK_USE_PLATFORM_WIN32_KHR + if (app_exts->KHR_external_fence_win32) { + /* see vn_physical_device_get_native_extensions */ + block_exts[block_count++] = VK_KHR_EXTERNAL_FENCE_WIN32_EXTENSION_NAME; + } + if (app_exts->KHR_external_semaphore_win32) { + /* see vn_physical_device_get_native_extensions */ + block_exts[block_count++] = VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME; + } +#endif + assert(extra_count <= ARRAY_SIZE(extra_exts)); assert(block_count <= ARRAY_SIZE(block_exts)); diff --git a/src/virtio/vulkan/vn_physical_device.c b/src/virtio/vulkan/vn_physical_device.c index 9c13fc9e060..c14047c3379 100644 --- a/src/virtio/vulkan/vn_physical_device.c +++ b/src/virtio/vulkan/vn_physical_device.c @@ -1171,19 +1171,27 @@ vn_physical_device_get_native_extensions( if (physical_dev->instance->renderer->info.has_external_sync && physical_dev->renderer_sync_fd.fence_exportable) { +#if DETECT_OS_WINDOWS + exts->KHR_external_fence_win32 = true; +#else if (physical_dev->external_fence_handles == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT) { exts->KHR_external_fence_fd = true; } +#endif } if (physical_dev->instance->renderer->info.has_external_sync && physical_dev->renderer_sync_fd.semaphore_importable && physical_dev->renderer_sync_fd.semaphore_exportable) { +#if DETECT_OS_WINDOWS + exts->KHR_external_semaphore_win32 = true; +#else if (physical_dev->external_binary_semaphore_handles == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) { exts->KHR_external_semaphore_fd = true; } +#endif } #ifdef VK_USE_PLATFORM_ANDROID_KHR diff --git a/src/virtio/vulkan/vn_queue.c b/src/virtio/vulkan/vn_queue.c index 145f8d3e585..58b32e4ba71 100644 --- a/src/virtio/vulkan/vn_queue.c +++ b/src/virtio/vulkan/vn_queue.c @@ -1900,6 +1900,128 @@ vn_WaitForFences(VkDevice device, return vn_result(dev->instance, result); } + + +#ifdef VK_USE_PLATFORM_WIN32_KHR +static VkResult +vn_create_sync_handle(struct vn_device *dev, + struct vn_sync_payload_external *external_payload, + HANDLE *out_handle) +{ + struct vn_renderer_sync *sync; + VkResult result = vn_renderer_sync_create(dev->renderer, 0, + VN_RENDERER_SYNC_BINARY, &sync); + if (result != VK_SUCCESS) + return vn_error(dev->instance, result); + + struct vn_renderer_submit_batch batch = { + .syncs = &sync, + .sync_values = &(const uint64_t){ 1 }, + .sync_count = 1, + .ring_idx = external_payload->ring_idx, + }; + + uint32_t local_data[8]; + struct vn_cs_encoder local_enc = + VN_CS_ENCODER_INITIALIZER_LOCAL(local_data, sizeof(local_data)); + if (external_payload->ring_seqno_valid) { + const uint64_t ring_id = vn_ring_get_id(dev->primary_ring); + vn_encode_vkWaitRingSeqnoMESA(&local_enc, 0, ring_id, + external_payload->ring_seqno); + batch.cs_data = local_data; + batch.cs_size = vn_cs_encoder_get_len(&local_enc); + } + + const struct vn_renderer_submit submit = { + .batches = &batch, + .batch_count = 1, + }; + result = vn_renderer_submit(dev->renderer, &submit); + if (result != VK_SUCCESS) { + vn_renderer_sync_destroy(dev->renderer, sync); + return vn_error(dev->instance, result); + } + + *out_handle = vn_renderer_sync_export_handle(dev->renderer, sync); + vn_renderer_sync_destroy(dev->renderer, sync); + + return *out_handle != NULL ? VK_SUCCESS : VK_ERROR_TOO_MANY_OBJECTS; +} + +VkResult +vn_ImportFenceWin32HandleKHR(VkDevice device, + const VkImportFenceWin32HandleInfoKHR *pImportFenceWin32HandleInfo) +{ + VN_TRACE_FUNC(); + struct vn_device *dev = vn_device_from_handle(device); + struct vn_fence *fence = vn_fence_from_handle(pImportFenceWin32HandleInfo->fence); + ASSERTED const bool is_handle = pImportFenceWin32HandleInfo->handleType == + VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_WIN32_BIT; + void *handle = pImportFenceWin32HandleInfo->handle; + const LPCWSTR name = pImportFenceWin32HandleInfo->name; + + assert(is_handle); + + if ((handle == NULL && name == NULL) && (handle != NULL && name != NULL)) + return vn_error(dev->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); + + struct vn_sync_payload *temp = &fence->temporary; + vn_sync_payload_release(dev, temp); + temp->type = VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE; + temp->handle = name != NULL ? CreateEventW(NULL, FALSE, FALSE, name) : handle; + fence->payload = temp; + //vn_log(dev->instance, "created handle %p", temp->handle); + + return VK_SUCCESS; +} + +VkResult +vn_GetFenceWin32HandleKHR(VkDevice device, + const VkFenceGetWin32HandleInfoKHR *pGetWin32HandleInfo, + HANDLE *pHandle) +{ + VN_TRACE_FUNC(); + struct vn_device *dev = vn_device_from_handle(device); + struct vn_fence *fence = vn_fence_from_handle(pGetWin32HandleInfo->fence); + const bool is_handle = + pGetWin32HandleInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_WIN32_BIT; + struct vn_sync_payload *payload = fence->payload; + VkResult result; + + assert(is_handle); + assert(dev->physical_device->renderer_sync_fd.fence_exportable); + + HANDLE handle = NULL; + if (payload->type == VN_SYNC_TYPE_DEVICE_ONLY) { + result = vn_create_sync_handle(dev, &fence->external_payload, &handle); + if (result != VK_SUCCESS) + return vn_error(dev->instance, result); + + vn_async_vkResetFenceResourceMESA(dev->primary_ring, device, + pGetWin32HandleInfo->fence); + + vn_sync_payload_release(dev, &fence->temporary); + fence->payload = &fence->permanent; + } else { + assert(payload->type == VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE); + + /* transfer ownership of imported sync fd to save a dup */ + handle = payload->handle; + payload->handle = NULL; + + /* reset host fence in case in signaled state before import */ + result = vn_ResetFences(device, 1, &pGetWin32HandleInfo->fence); + if (result != VK_SUCCESS) { + /* transfer sync fd ownership back on error */ + payload->handle = handle; + return result; + } + } + + *pHandle = handle; + return VK_SUCCESS; +} +#else static VkResult vn_create_sync_file(struct vn_device *dev, struct vn_sync_payload_external *external_payload, @@ -2025,6 +2147,7 @@ vn_GetFenceFdKHR(VkDevice device, *pFd = fd; return VK_SUCCESS; } +#endif /* semaphore commands */ @@ -2451,6 +2574,96 @@ vn_WaitSemaphores(VkDevice device, return vn_result(dev->instance, result); } +#ifdef VK_USE_PLATFORM_WIN32_KHR +VKAPI_ATTR VkResult VKAPI_CALL +vn_ImportSemaphoreWin32HandleKHR(VkDevice device, + const VkImportSemaphoreWin32HandleInfoKHR *pImportSemaphoreWin32HandleInfo) +{ + VN_TRACE_FUNC(); + struct vn_device *dev = vn_device_from_handle(device); + struct vn_semaphore *sem = + vn_semaphore_from_handle(pImportSemaphoreWin32HandleInfo->semaphore); + ASSERTED const bool is_handle = + pImportSemaphoreWin32HandleInfo->handleType == + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT; + void *handle = pImportSemaphoreWin32HandleInfo->handle; + const LPCWSTR name = pImportSemaphoreWin32HandleInfo->name; + + assert(is_handle); + + if ((handle == NULL && name == NULL) && (handle != NULL && name != NULL)) + return vn_error(dev->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); + + struct vn_sync_payload *temp = &sem->temporary; + vn_sync_payload_release(dev, temp); + temp->type = VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE; + temp->handle = name != NULL ? CreateEventW(NULL, FALSE, FALSE, name) : handle; + sem->payload = temp; + //vn_log(dev->instance, "created handle %p", temp->handle); + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vn_GetSemaphoreWin32HandleKHR(VkDevice device, + const VkSemaphoreGetWin32HandleInfoKHR *pGetWin32HandleInfo, + HANDLE *pHandle) +{ + VN_TRACE_FUNC(); + struct vn_device *dev = vn_device_from_handle(device); + struct vn_semaphore *sem = vn_semaphore_from_handle(pGetWin32HandleInfo->semaphore); + const bool is_handle = + pGetWin32HandleInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT; + struct vn_sync_payload *payload = sem->payload; + + assert(is_handle); + assert(dev->physical_device->renderer_sync_fd.semaphore_exportable); + assert(dev->physical_device->renderer_sync_fd.semaphore_importable); + + HANDLE handle = NULL; + if (payload->type == VN_SYNC_TYPE_DEVICE_ONLY) { + VkResult result = vn_create_sync_handle(dev, &sem->external_payload, &handle); + if (result != VK_SUCCESS) + return vn_error(dev->instance, result); + + vn_wsi_sync_wait_handle(dev, handle); + } else { + assert(payload->type == VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE); + + /* transfer ownership of imported sync handle to save a dup */ + handle = payload->handle; + payload->handle = NULL; + } + + /* When payload->type is VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE, the current + * payload is from a prior temporary sync_fd import. The permanent + * payload of the sempahore might be in signaled state. So we do an + * import here to ensure later wait operation is legit. With resourceId + * 0, renderer does a signaled sync_fd -1 payload import on the host + * semaphore. + */ + if (payload->type == VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE) { + const VkImportSemaphoreResourceInfoMESA res_info = { + .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_RESOURCE_INFO_MESA, + .semaphore = pGetWin32HandleInfo->semaphore, + .resourceId = 0, + }; + vn_async_vkImportSemaphoreResourceMESA(dev->primary_ring, device, + &res_info); + } + + /* perform wait operation on the host semaphore */ + vn_async_vkWaitSemaphoreResourceMESA(dev->primary_ring, device, + pGetWin32HandleInfo->semaphore); + + vn_sync_payload_release(dev, &sem->temporary); + sem->payload = &sem->permanent; + + *pHandle = handle; + return VK_SUCCESS; +} + +#else VKAPI_ATTR VkResult VKAPI_CALL vn_ImportSemaphoreFdKHR( VkDevice device, const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo) @@ -2536,6 +2749,7 @@ vn_GetSemaphoreFdKHR(VkDevice device, *pFd = fd; return VK_SUCCESS; } +#endif /* event commands */ diff --git a/src/virtio/vulkan/vn_renderer.h b/src/virtio/vulkan/vn_renderer.h index a5372761761..979c8c227d8 100644 --- a/src/virtio/vulkan/vn_renderer.h +++ b/src/virtio/vulkan/vn_renderer.h @@ -194,16 +194,27 @@ struct vn_renderer_sync_ops { uint32_t flags, struct vn_renderer_sync **out_sync); +#ifdef VK_USE_PLATFORM_WIN32_KHR + VkResult (*create_from_handle)(struct vn_renderer *renderer, + void *handle, + struct vn_renderer_sync **out_sync); +#else VkResult (*create_from_syncobj)(struct vn_renderer *renderer, int fd, bool sync_file, struct vn_renderer_sync **out_sync); +#endif void (*destroy)(struct vn_renderer *renderer, struct vn_renderer_sync *sync); +#ifdef VK_USE_PLATFORM_WIN32_KHR + void *(*export_handle)(struct vn_renderer *renderer, + struct vn_renderer_sync *sync); +#else int (*export_syncobj)(struct vn_renderer *renderer, struct vn_renderer_sync *sync, bool sync_file); +#endif /* reset the counter */ VkResult (*reset)(struct vn_renderer *renderer, @@ -421,6 +432,15 @@ vn_renderer_sync_create(struct vn_renderer *renderer, return renderer->sync_ops.create(renderer, initial_val, flags, out_sync); } +#ifdef VK_USE_PLATFORM_WIN32_KHR +static inline VkResult +vn_renderer_sync_create_from_handle(struct vn_renderer *renderer, + void *handle, + struct vn_renderer_sync **out_sync) +{ + return renderer->sync_ops.create_from_handle(renderer, handle, out_sync); +} +#else static inline VkResult vn_renderer_sync_create_from_syncobj(struct vn_renderer *renderer, int fd, @@ -430,6 +450,7 @@ vn_renderer_sync_create_from_syncobj(struct vn_renderer *renderer, return renderer->sync_ops.create_from_syncobj(renderer, fd, sync_file, out_sync); } +#endif static inline void vn_renderer_sync_destroy(struct vn_renderer *renderer, @@ -438,6 +459,14 @@ vn_renderer_sync_destroy(struct vn_renderer *renderer, renderer->sync_ops.destroy(renderer, sync); } +#ifdef VK_USE_PLATFORM_WIN32_KHR +static inline void * +vn_renderer_sync_export_handle(struct vn_renderer *renderer, + struct vn_renderer_sync *sync) +{ + return renderer->sync_ops.export_handle(renderer, sync); +} +#else static inline int vn_renderer_sync_export_syncobj(struct vn_renderer *renderer, struct vn_renderer_sync *sync, @@ -445,6 +474,7 @@ vn_renderer_sync_export_syncobj(struct vn_renderer *renderer, { return renderer->sync_ops.export_syncobj(renderer, sync, sync_file); } +#endif static inline VkResult vn_renderer_sync_reset(struct vn_renderer *renderer, From 41b565c78ee9a6cd5e595d4b71717ced32a95604 Mon Sep 17 00:00:00 2001 From: anonymix007 <48598263+anonymix007@users.noreply.github.com> Date: Sun, 30 Nov 2025 18:48:54 +0300 Subject: [PATCH 10/15] venus: Silence VK_ERROR_FORMAT_NOT_SUPPORTED log spam in vn_GetPhysicalDeviceImageFormatProperties2 --- src/virtio/vulkan/vn_physical_device.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/virtio/vulkan/vn_physical_device.c b/src/virtio/vulkan/vn_physical_device.c index c14047c3379..f08d273f8eb 100644 --- a/src/virtio/vulkan/vn_physical_device.c +++ b/src/virtio/vulkan/vn_physical_device.c @@ -2804,7 +2804,7 @@ vn_GetPhysicalDeviceImageFormatProperties2( pImageFormatInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA); if (wsi_info && !vn_wsi_validate_image_format_info(physical_dev, pImageFormatInfo)) { - return vn_error(physical_dev->instance, VK_ERROR_FORMAT_NOT_SUPPORTED); + return VK_ERROR_FORMAT_NOT_SUPPORTED; } const VkPhysicalDeviceExternalImageFormatInfo *external_info = @@ -2814,8 +2814,7 @@ vn_GetPhysicalDeviceImageFormatProperties2( if (!external_info->handleType) { external_info = NULL; } else if (!(external_info->handleType & supported_handle_types)) { - return vn_error(physical_dev->instance, - VK_ERROR_FORMAT_NOT_SUPPORTED); + return VK_ERROR_FORMAT_NOT_SUPPORTED; } /* Fully resolve AHB image format query on the driver side. */ @@ -2838,16 +2837,14 @@ vn_GetPhysicalDeviceImageFormatProperties2( VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT && pImageFormatInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { - return vn_error(physical_dev->instance, - VK_ERROR_FORMAT_NOT_SUPPORTED); + return VK_ERROR_FORMAT_NOT_SUPPORTED; } if (external_info->handleType != renderer_handle_type) { pImageFormatInfo = vn_physical_device_fix_image_format_info( pImageFormatInfo, renderer_handle_type, &local_info); if (!pImageFormatInfo) { - return vn_error(physical_dev->instance, - VK_ERROR_FORMAT_NOT_SUPPORTED); + return VK_ERROR_FORMAT_NOT_SUPPORTED; } } } @@ -2892,6 +2889,10 @@ vn_GetPhysicalDeviceImageFormatProperties2( } } + /* Silence the log spam */ + if (result == VK_ERROR_FORMAT_NOT_SUPPORTED) + return result; + return vn_result(physical_dev->instance, result); } From a13b31bee002766d356bfdcbc5262ca4ac1bb6c6 Mon Sep 17 00:00:00 2001 From: anonymix007 <48598263+anonymix007@users.noreply.github.com> Date: Sun, 30 Nov 2025 18:50:32 +0300 Subject: [PATCH 11/15] venus: Implement VK_KHR_external_memory_win32 --- src/virtio/vulkan/vn_device.c | 4 + src/virtio/vulkan/vn_device_memory.c | 155 ++++++++++++++++++++++++- src/virtio/vulkan/vn_device_memory.h | 2 + src/virtio/vulkan/vn_physical_device.c | 7 +- src/virtio/vulkan/vn_renderer.h | 54 +++++++++ 5 files changed, 218 insertions(+), 4 deletions(-) diff --git a/src/virtio/vulkan/vn_device.c b/src/virtio/vulkan/vn_device.c index d5accf59e6c..acf5d10f882 100644 --- a/src/virtio/vulkan/vn_device.c +++ b/src/virtio/vulkan/vn_device.c @@ -385,6 +385,10 @@ vn_device_fix_create_info(const struct vn_device *dev, /* see vn_physical_device_get_native_extensions */ block_exts[block_count++] = VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME; } + if (app_exts->KHR_external_memory_win32) { + /* see vn_physical_device_get_native_extensions */ + block_exts[block_count++] = VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME; + } #endif assert(extra_count <= ARRAY_SIZE(extra_exts)); diff --git a/src/virtio/vulkan/vn_device_memory.c b/src/virtio/vulkan/vn_device_memory.c index 0330463ccf7..6484647dc41 100644 --- a/src/virtio/vulkan/vn_device_memory.c +++ b/src/virtio/vulkan/vn_device_memory.c @@ -106,6 +106,58 @@ vn_device_memory_bo_fini(struct vn_device *dev, struct vn_device_memory *mem) } } +#ifdef VK_USE_PLATFORM_WIN32_KHR +static VkResult +vn_device_memory_import_handle(struct vn_device *dev, + struct vn_device_memory *mem, + const VkMemoryAllocateInfo *alloc_info, + bool is_kmt, + void *handle) +{ + const VkMemoryType *mem_type = + &dev->physical_device->memory_properties + .memoryTypes[alloc_info->memoryTypeIndex]; + const VkMemoryDedicatedAllocateInfo *dedicated_info = + vk_find_struct_const(alloc_info->pNext, MEMORY_DEDICATED_ALLOCATE_INFO); + const bool is_dedicated = + dedicated_info && (dedicated_info->image != VK_NULL_HANDLE || + dedicated_info->buffer != VK_NULL_HANDLE); + + struct vn_renderer_bo *bo; + VkResult result = vn_renderer_bo_create_from_handle( + dev->renderer, is_dedicated ? alloc_info->allocationSize : 0, + mem->base.id, is_kmt, handle, mem_type->propertyFlags, alloc_info, &bo); + if (result != VK_SUCCESS) + return result; + + vn_ring_roundtrip(dev->primary_ring); + + const VkImportMemoryResourceInfoMESA import_memory_resource_info = { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_RESOURCE_INFO_MESA, + .pNext = alloc_info->pNext, + .resourceId = bo->res_id, + }; + const VkMemoryAllocateInfo memory_allocate_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = &import_memory_resource_info, + .allocationSize = alloc_info->allocationSize, + .memoryTypeIndex = alloc_info->memoryTypeIndex, + }; + result = vn_device_memory_alloc_simple(dev, mem, &memory_allocate_info); + if (result != VK_SUCCESS) { + vn_renderer_bo_unref(dev->renderer, bo); + return result; + } + + if (!is_kmt) { + /* need to close import fd on success to avoid fd leak */ + CloseHandle((HANDLE) handle); + } + mem->base_bo = bo; + + return VK_SUCCESS; +} +#else VkResult vn_device_memory_import_dma_buf(struct vn_device *dev, struct vn_device_memory *mem, @@ -148,6 +200,7 @@ vn_device_memory_import_dma_buf(struct vn_device *dev, return VK_SUCCESS; } +#endif static VkResult vn_device_memory_alloc_guest_vram(struct vn_device *dev, @@ -368,10 +421,10 @@ vn_AllocateMemory(VkDevice device, vn_object_set_id(mem, vn_get_next_obj_id(), VK_OBJECT_TYPE_DEVICE_MEMORY); + VkResult result; +#ifndef VK_USE_PLATFORM_WIN32_KHR const VkImportMemoryFdInfoKHR *import_fd_info = vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR); - - VkResult result; if (mem->base.vk.ahardware_buffer) { result = vn_android_device_import_ahb(dev, mem, pAllocateInfo); } else if (import_fd_info) { @@ -382,6 +435,17 @@ vn_AllocateMemory(VkDevice device, if (result == VK_SUCCESS) vn_wsi_memory_info_init(mem, pAllocateInfo); } +#else + const VkImportMemoryWin32HandleInfoKHR *import_win32_info = + vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR); + if (import_win32_info) { + const bool is_kmt = !(import_win32_info->handleType & VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT); + result = vn_device_memory_import_handle(dev, mem, pAllocateInfo, + is_kmt, import_win32_info->handle); + } else { + result = vn_device_memory_alloc(dev, mem, pAllocateInfo); + } +#endif vn_device_memory_emit_report(dev, mem, /* is_alloc */ true, result); @@ -549,6 +613,92 @@ vn_GetDeviceMemoryCommitment(VkDevice device, pCommittedMemoryInBytes); } +#ifdef VK_USE_PLATFORM_WIN32_KHR +VKAPI_ATTR VkResult VKAPI_CALL +vn_GetMemoryWin32HandleKHR(VkDevice device, + const VkMemoryGetWin32HandleInfoKHR *pGetWin32HandleInfo, + HANDLE *pHandle) +{ + VN_TRACE_FUNC(); + struct vn_device *dev = vn_device_from_handle(device); + struct vn_device_memory *mem = + vn_device_memory_from_handle(pGetWin32HandleInfo->memory); + + /* At the moment, we support only the below handle type. */ + assert(pGetWin32HandleInfo->handleType & + (VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT)); + assert(mem->base_bo); + const bool is_kmt = !(pGetWin32HandleInfo->handleType & VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT); + *pHandle = vn_renderer_bo_export_handle(dev->renderer, mem->base_bo, is_kmt); + if (*pHandle == NULL) + return vn_error(dev->instance, VK_ERROR_TOO_MANY_OBJECTS); + + return VK_SUCCESS; +} + +static VkResult +vn_get_memory_handle_properties(struct vn_device *dev, + bool is_kmt, + void *handle, + void *alloc_info, + uint32_t *out_mem_type_bits) +{ + VkDevice device = vn_device_to_handle(dev); + + struct vn_renderer_bo *bo; + VkResult result = vn_renderer_bo_create_from_handle( + dev->renderer, 0 /* size */, 0 /* id */, is_kmt, handle, 0 /* flags */, alloc_info, &bo); + if (result != VK_SUCCESS) { + vn_log(dev->instance, "bo_create_from_handle failed"); + return result; + } + + vn_ring_roundtrip(dev->primary_ring); + + VkMemoryResourcePropertiesMESA props = { + .sType = VK_STRUCTURE_TYPE_MEMORY_RESOURCE_PROPERTIES_MESA, + }; + result = vn_call_vkGetMemoryResourcePropertiesMESA( + dev->primary_ring, device, bo->res_id, &props); + vn_renderer_bo_unref(dev->renderer, bo); + if (result != VK_SUCCESS) { + vn_log(dev->instance, "vkGetMemoryResourcePropertiesMESA failed"); + return result; + } + + *out_mem_type_bits = props.memoryTypeBits; + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vn_GetMemoryWin32HandlePropertiesKHR(VkDevice device, + VkExternalMemoryHandleTypeFlagBits handleType, + HANDLE handle, + VkMemoryWin32HandlePropertiesKHR *pMemoryWin32HandleProperties) +{ + VN_TRACE_FUNC(); + struct vn_device *dev = vn_device_from_handle(device); + uint32_t mem_type_bits = 0; + VkResult result = VK_SUCCESS; + + if (handleType != VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT && + handleType != VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT) + return vn_error(dev->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); + + const bool is_kmt = handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT; + + result = vn_get_memory_handle_properties( + dev, is_kmt, handle, pMemoryWin32HandleProperties, &mem_type_bits); + if (result != VK_SUCCESS) + return vn_error(dev->instance, result); + + pMemoryWin32HandleProperties->memoryTypeBits = mem_type_bits; + + return VK_SUCCESS; +} +#else VKAPI_ATTR VkResult VKAPI_CALL vn_GetMemoryFdKHR(VkDevice device, const VkMemoryGetFdInfoKHR *pGetFdInfo, @@ -626,3 +776,4 @@ vn_GetMemoryFdPropertiesKHR(VkDevice device, return VK_SUCCESS; } +#endif diff --git a/src/virtio/vulkan/vn_device_memory.h b/src/virtio/vulkan/vn_device_memory.h index d2b74b297fe..0374ebb0d3a 100644 --- a/src/virtio/vulkan/vn_device_memory.h +++ b/src/virtio/vulkan/vn_device_memory.h @@ -57,6 +57,7 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(vn_device_memory, VkDeviceMemory, VK_OBJECT_TYPE_DEVICE_MEMORY) +#ifndef VK_USE_PLATFORM_WIN32_KHR VkResult vn_device_memory_import_dma_buf(struct vn_device *dev, struct vn_device_memory *mem, @@ -67,5 +68,6 @@ VkResult vn_get_memory_dma_buf_properties(struct vn_device *dev, int fd, uint32_t *out_mem_type_bits); +#endif #endif /* VN_DEVICE_MEMORY_H */ diff --git a/src/virtio/vulkan/vn_physical_device.c b/src/virtio/vulkan/vn_physical_device.c index f08d273f8eb..3355409b7aa 100644 --- a/src/virtio/vulkan/vn_physical_device.c +++ b/src/virtio/vulkan/vn_physical_device.c @@ -1216,10 +1216,13 @@ vn_physical_device_get_native_extensions( } #else /* VK_USE_PLATFORM_ANDROID_KHR */ if (physical_dev->external_memory.renderer_handle_type) { -#if !DETECT_OS_WINDOWS +#if DETECT_OS_WINDOWS + exts->KHR_external_memory_win32 = true; + exts->KHR_win32_keyed_mutex = true; +#else exts->KHR_external_memory_fd = true; exts->EXT_external_memory_dma_buf = true; -#endif /* !DETECT_OS_WINDOWS */ +#endif /* DETECT_OS_WINDOWS */ } #endif /* VK_USE_PLATFORM_ANDROID_KHR */ diff --git a/src/virtio/vulkan/vn_renderer.h b/src/virtio/vulkan/vn_renderer.h index 979c8c227d8..134ccfc9685 100644 --- a/src/virtio/vulkan/vn_renderer.h +++ b/src/virtio/vulkan/vn_renderer.h @@ -154,16 +154,34 @@ struct vn_renderer_bo_ops { VkExternalMemoryHandleTypeFlags external_handles, struct vn_renderer_bo **out_bo); +#ifdef VK_USE_PLATFORM_WIN32_KHR + VkResult (*create_from_handle)(struct vn_renderer *renderer, + VkDeviceSize size, + /* externally allocated handles might not have a valid id */ + vn_object_id mem_id, + bool is_kmt, + void *handle, + VkMemoryPropertyFlags flags, + const VkMemoryAllocateInfo *alloc_info, + struct vn_renderer_bo **out_bo); +#else VkResult (*create_from_dma_buf)(struct vn_renderer *renderer, VkDeviceSize size, int fd, VkMemoryPropertyFlags flags, struct vn_renderer_bo **out_bo); +#endif bool (*destroy)(struct vn_renderer *renderer, struct vn_renderer_bo *bo); +#ifdef VK_USE_PLATFORM_WIN32_KHR + void *(*export_handle)(struct vn_renderer *renderer, + struct vn_renderer_bo *bo, + bool is_kmt); +#else int (*export_dma_buf)(struct vn_renderer *renderer, struct vn_renderer_bo *bo); +#endif int (*export_sync_file)(struct vn_renderer *renderer, struct vn_renderer_bo *bo); @@ -347,6 +365,31 @@ vn_renderer_bo_create_from_device_memory( return VK_SUCCESS; } +#ifdef VK_USE_PLATFORM_WIN32_KHR +static inline VkResult +vn_renderer_bo_create_from_handle(struct vn_renderer *renderer, + VkDeviceSize size, + vn_object_id mem_id, + bool is_kmt, + void *handle, + VkMemoryPropertyFlags flags, + const VkMemoryAllocateInfo *alloc_info, + struct vn_renderer_bo **out_bo) +{ + struct vn_renderer_bo *bo; + VkResult result = + renderer->bo_ops.create_from_handle(renderer, size, mem_id, is_kmt, handle, flags, alloc_info, &bo); + if (result != VK_SUCCESS) + return result; + + assert(vn_refcount_is_valid(&bo->refcount)); + assert(bo->res_id); + assert(!bo->mmap_size || bo->mmap_size >= size); + + *out_bo = bo; + return VK_SUCCESS; +} +#else static inline VkResult vn_renderer_bo_create_from_dma_buf(struct vn_renderer *renderer, VkDeviceSize size, @@ -367,6 +410,7 @@ vn_renderer_bo_create_from_dma_buf(struct vn_renderer *renderer, *out_bo = bo; return VK_SUCCESS; } +#endif static inline struct vn_renderer_bo * vn_renderer_bo_ref(struct vn_renderer *renderer, struct vn_renderer_bo *bo) @@ -383,12 +427,22 @@ vn_renderer_bo_unref(struct vn_renderer *renderer, struct vn_renderer_bo *bo) return false; } +#ifdef VK_USE_PLATFORM_WIN32_KHR +static inline void * +vn_renderer_bo_export_handle(struct vn_renderer *renderer, + struct vn_renderer_bo *bo, + bool is_kmt) +{ + return renderer->bo_ops.export_handle(renderer, bo, is_kmt); +} +#else static inline int vn_renderer_bo_export_dma_buf(struct vn_renderer *renderer, struct vn_renderer_bo *bo) { return renderer->bo_ops.export_dma_buf(renderer, bo); } +#endif static inline int vn_renderer_bo_export_sync_file(struct vn_renderer *renderer, From 6dd50cf3c15052807861bf2cad5de2c28e0f144c Mon Sep 17 00:00:00 2001 From: anonymix007 <48598263+anonymix007@users.noreply.github.com> Date: Sun, 4 Jan 2026 18:12:24 +0300 Subject: [PATCH 12/15] venus: Expose VkMemoryAllocateInfo to vn_renderer_bo_create_from_device_memory --- src/virtio/vulkan/vn_device_memory.c | 11 ++++++----- src/virtio/vulkan/vn_renderer.h | 4 +++- src/virtio/vulkan/vn_renderer_virtgpu.c | 1 + src/virtio/vulkan/vn_renderer_vtest.c | 1 + 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/virtio/vulkan/vn_device_memory.c b/src/virtio/vulkan/vn_device_memory.c index 6484647dc41..8f984dd0390 100644 --- a/src/virtio/vulkan/vn_device_memory.c +++ b/src/virtio/vulkan/vn_device_memory.c @@ -83,7 +83,8 @@ vn_device_memory_wait_alloc(struct vn_device *dev, } static inline VkResult -vn_device_memory_bo_init(struct vn_device *dev, struct vn_device_memory *mem) +vn_device_memory_bo_init(struct vn_device *dev, struct vn_device_memory *mem, + const VkMemoryAllocateInfo *alloc_info) { VkResult result = vn_device_memory_wait_alloc(dev, mem); if (result != VK_SUCCESS) @@ -94,7 +95,7 @@ vn_device_memory_bo_init(struct vn_device *dev, struct vn_device_memory *mem) .memoryTypes[mem_vk->memory_type_index]; return vn_renderer_bo_create_from_device_memory( dev->renderer, mem_vk->size, mem->base.id, mem_type->propertyFlags, - mem_vk->export_handle_types, &mem->base_bo); + mem_vk->export_handle_types, alloc_info, &mem->base_bo); } static inline void @@ -222,7 +223,7 @@ vn_device_memory_alloc_guest_vram(struct vn_device *dev, VkResult result = vn_renderer_bo_create_from_device_memory( dev->renderer, mem_vk->size, mem->base.id, flags, - mem_vk->export_handle_types, &mem->base_bo); + mem_vk->export_handle_types, alloc_info, &mem->base_bo); if (result != VK_SUCCESS) { return result; } @@ -260,7 +261,7 @@ vn_device_memory_alloc_export(struct vn_device *dev, if (result != VK_SUCCESS) return result; - result = vn_device_memory_bo_init(dev, mem); + result = vn_device_memory_bo_init(dev, mem, alloc_info); if (result != VK_SUCCESS) { vn_device_memory_free_simple(dev, mem); return result; @@ -520,7 +521,7 @@ vn_MapMemory2(VkDevice device, * the extension. */ if (need_bo) { - result = vn_device_memory_bo_init(dev, mem); + result = vn_device_memory_bo_init(dev, mem, NULL); if (result != VK_SUCCESS) return vn_error(dev->instance, result); } diff --git a/src/virtio/vulkan/vn_renderer.h b/src/virtio/vulkan/vn_renderer.h index 134ccfc9685..24b489f2cfa 100644 --- a/src/virtio/vulkan/vn_renderer.h +++ b/src/virtio/vulkan/vn_renderer.h @@ -152,6 +152,7 @@ struct vn_renderer_bo_ops { vn_object_id mem_id, VkMemoryPropertyFlags flags, VkExternalMemoryHandleTypeFlags external_handles, + const VkMemoryAllocateInfo *alloc_info, struct vn_renderer_bo **out_bo); #ifdef VK_USE_PLATFORM_WIN32_KHR @@ -349,11 +350,12 @@ vn_renderer_bo_create_from_device_memory( vn_object_id mem_id, VkMemoryPropertyFlags flags, VkExternalMemoryHandleTypeFlags external_handles, + const VkMemoryAllocateInfo *alloc_info, struct vn_renderer_bo **out_bo) { struct vn_renderer_bo *bo; VkResult result = renderer->bo_ops.create_from_device_memory( - renderer, size, mem_id, flags, external_handles, &bo); + renderer, size, mem_id, flags, external_handles, alloc_info, &bo); if (result != VK_SUCCESS) return result; diff --git a/src/virtio/vulkan/vn_renderer_virtgpu.c b/src/virtio/vulkan/vn_renderer_virtgpu.c index 9e233b560d3..2bdbb63a48c 100644 --- a/src/virtio/vulkan/vn_renderer_virtgpu.c +++ b/src/virtio/vulkan/vn_renderer_virtgpu.c @@ -1302,6 +1302,7 @@ virtgpu_bo_create_from_device_memory( vn_object_id mem_id, VkMemoryPropertyFlags flags, VkExternalMemoryHandleTypeFlags external_handles, + const VkMemoryAllocateInfo *alloc_info, struct vn_renderer_bo **out_bo) { struct virtgpu *gpu = (struct virtgpu *)renderer; diff --git a/src/virtio/vulkan/vn_renderer_vtest.c b/src/virtio/vulkan/vn_renderer_vtest.c index 5fad0b02f84..274ce5f5a8c 100644 --- a/src/virtio/vulkan/vn_renderer_vtest.c +++ b/src/virtio/vulkan/vn_renderer_vtest.c @@ -750,6 +750,7 @@ vtest_bo_create_from_device_memory( vn_object_id mem_id, VkMemoryPropertyFlags flags, VkExternalMemoryHandleTypeFlags external_handles, + const VkMemoryAllocateInfo *alloc_info, struct vn_renderer_bo **out_bo) { struct vtest *vtest = (struct vtest *)renderer; From a5155be34d1fac011b81545556fb2b91798a01db Mon Sep 17 00:00:00 2001 From: anonymix007 <48598263+anonymix007@users.noreply.github.com> Date: Sun, 4 Jan 2026 18:15:24 +0300 Subject: [PATCH 13/15] venus: Handle D3DDDI structures in VkMemoryAllocateInfo --- src/virtio/vulkan/vn_device_memory.c | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/virtio/vulkan/vn_device_memory.c b/src/virtio/vulkan/vn_device_memory.c index 8f984dd0390..116865b8db8 100644 --- a/src/virtio/vulkan/vn_device_memory.c +++ b/src/virtio/vulkan/vn_device_memory.c @@ -22,6 +22,14 @@ #include "vn_renderer.h" #include "vn_renderer_util.h" +#ifdef VK_USE_PLATFORM_WIN32_KHR +#define _D3D10_CONSTANTS +#define _D3D10_1_CONSTANTS +#include +#include +#include +#endif + /* device memory commands */ static inline VkResult @@ -286,6 +294,10 @@ struct vn_device_memory_alloc_info { VkMemoryAllocateFlagsInfo flags; VkMemoryDedicatedAllocateInfo dedicated; VkMemoryOpaqueCaptureAddressAllocateInfo capture; +#ifdef VK_USE_PLATFORM_WIN32_KHR + VkD3DDDICreateResource d3d_create; + VkD3DDDIOpenResource d3d_open; +#endif }; static const VkMemoryAllocateInfo * @@ -321,10 +333,22 @@ vn_device_memory_fix_alloc_info( memcpy(&local_info->capture, src, sizeof(local_info->capture)); next = &local_info->capture; break; + default: break; } + /* FIXME: -Werror=switch */ +#ifdef VK_USE_PLATFORM_WIN32_KHR + if (src->sType == VK_STRUCTURE_TYPE_D3DDDI_CREATE_RESOURCE) { + memcpy(&local_info->d3d_create, src, sizeof(local_info->d3d_create)); + next = &local_info->d3d_create; + } else if (src->sType == VK_STRUCTURE_TYPE_D3DDDI_OPEN_RESOURCE) { + memcpy(&local_info->d3d_open, src, sizeof(local_info->d3d_open)); + next = &local_info->d3d_open; + } +#endif + if (next) { cur->pNext = next; cur = next; @@ -363,6 +387,16 @@ vn_device_memory_alloc(struct vn_device *dev, mem_vk->export_handle_types = renderer_handle_type; } +#ifdef VK_USE_PLATFORM_WIN32_KHR + const bool need_bo_now = + vk_find_struct_const(alloc_info, D3DDDI_CREATE_RESOURCE) != NULL || + vk_find_struct_const(alloc_info, D3DDDI_OPEN_RESOURCE) != NULL; + + if (need_bo_now) { + return vn_device_memory_alloc_export(dev, mem, alloc_info); + } +#endif + if (has_guest_vram && (host_visible || export_alloc)) { return vn_device_memory_alloc_guest_vram(dev, mem, alloc_info); } else if (export_alloc) { From 15aa5756eb089eaefb4dbc37119645749fb2cb91 Mon Sep 17 00:00:00 2001 From: anonymix007 <48598263+anonymix007@users.noreply.github.com> Date: Sun, 30 Nov 2025 18:52:21 +0300 Subject: [PATCH 14/15] venus: Support sw wsi on Windows --- src/virtio/vulkan/vn_instance.c | 4 +- src/virtio/vulkan/vn_wsi.c | 69 ++++++++++++++++++++++++++++++++- src/virtio/vulkan/vn_wsi.h | 9 +++++ 3 files changed, 80 insertions(+), 2 deletions(-) diff --git a/src/virtio/vulkan/vn_instance.c b/src/virtio/vulkan/vn_instance.c index 5ec19337dbf..a02f1ec0417 100644 --- a/src/virtio/vulkan/vn_instance.c +++ b/src/virtio/vulkan/vn_instance.c @@ -56,7 +56,9 @@ static const struct vk_instance_extension_table #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT .EXT_acquire_xlib_display = true, #endif -#ifndef VK_USE_PLATFORM_WIN32_KHR +#ifdef VK_USE_PLATFORM_WIN32_KHR + .KHR_win32_surface = true, +#else .EXT_headless_surface = true, #endif #ifdef VK_USE_PLATFORM_DISPLAY_KHR diff --git a/src/virtio/vulkan/vn_wsi.c b/src/virtio/vulkan/vn_wsi.c index 45b4cd1e515..ee2cde70056 100644 --- a/src/virtio/vulkan/vn_wsi.c +++ b/src/virtio/vulkan/vn_wsi.c @@ -19,6 +19,10 @@ #include "vn_physical_device.h" #include "vn_queue.h" +#ifdef VK_USE_PLATFORM_WIN32_KHR +#include +#endif + #ifndef DRM_FORMAT_MOD_LINEAR #define DRM_FORMAT_MOD_LINEAR 0 #endif @@ -251,6 +255,7 @@ vn_wsi_memory_info_init(struct vn_device_memory *mem, } } +#ifndef VK_USE_PLATFORM_WIN32_KHR static uint32_t vn_modifier_plane_count(struct vn_physical_device *physical_dev, VkFormat format, @@ -292,11 +297,13 @@ vn_modifier_plane_count(struct vn_physical_device *physical_dev, STACK_ARRAY_FINISH(modifier_props); return plane_count; } +#endif bool vn_wsi_validate_image_format_info(struct vn_physical_device *physical_dev, const VkPhysicalDeviceImageFormatInfo2 *info) { +#ifndef VK_USE_PLATFORM_WIN32_KHR const VkPhysicalDeviceImageDrmFormatModifierInfoEXT *modifier_info = vk_find_struct_const( info->pNext, PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT); @@ -347,7 +354,7 @@ vn_wsi_validate_image_format_info(struct vn_physical_device *physical_dev, return false; } } - +#endif return true; } @@ -407,6 +414,7 @@ vn_wsi_fence_wait(struct vn_device *dev, struct vn_queue *queue) return vn_ResetFences(dev_handle, 1, &queue->async_present.fence); } +#ifndef VK_USE_PLATFORM_WIN32_KHR void vn_wsi_sync_wait(struct vn_device *dev, int fd) { @@ -435,6 +443,36 @@ vn_wsi_sync_wait(struct vn_device *dev, int fd) simple_mtx_lock(&queue->async_present.queue_mutex); } } +#else +void +vn_wsi_sync_wait_handle(struct vn_device *dev, void *handle) +{ + if (dev->renderer->info.has_implicit_fencing) + return; + + const pid_t tid = vn_gettid(); + struct vn_queue *queue = NULL; + for (uint32_t i = 0; i < dev->queue_count; i++) { + if (dev->queues[i].async_present.initialized && + dev->queues[i].async_present.tid == tid) { + queue = &dev->queues[i]; + break; + } + } + + if (queue) { + simple_mtx_unlock(&queue->async_present.queue_mutex); + vn_wsi_chains_unlock(dev, queue->async_present.info, /*all=*/false); + } + + WaitForSingleObject(handle, INFINITE); + + if (queue) { + vn_wsi_chains_lock(dev, queue->async_present.info, /*all=*/false); + simple_mtx_lock(&queue->async_present.queue_mutex); + } +} +#endif void vn_wsi_flush(struct vn_queue *queue) @@ -853,6 +891,33 @@ vn_AcquireNextImage2KHR(VkDevice device, if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR) return vn_error(dev->instance, result); +#ifdef VK_USE_PLATFORM_WIN32_KHR + /* XXX this relies on renderer side doing implicit fencing */ + if (pAcquireInfo->semaphore != VK_NULL_HANDLE) { + const VkImportSemaphoreWin32HandleInfoKHR info = { + .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR, + .semaphore = pAcquireInfo->semaphore, + .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT, + .handle = CreateEventA(NULL, TRUE, TRUE, NULL), + }; + //vn_log(dev->instance, "created handle %p", info.handle); + result = vn_ImportSemaphoreWin32HandleKHR(device, &info); + } + + if (result == VK_SUCCESS && pAcquireInfo->fence != VK_NULL_HANDLE) { + const VkImportFenceWin32HandleInfoKHR info = { + .sType = VK_STRUCTURE_TYPE_IMPORT_FENCE_WIN32_HANDLE_INFO_KHR, + .fence = pAcquireInfo->fence, + .flags = VK_FENCE_IMPORT_TEMPORARY_BIT, + .handleType = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_WIN32_BIT, + .handle = CreateEventA(NULL, TRUE, TRUE, NULL), + }; + //vn_log(dev->instance, "created handle %p", info.handle); + result = vn_ImportFenceWin32HandleKHR(device, &info); + } + +#else int sync_fd = -1; if (!dev->renderer->info.has_implicit_fencing) { VkDeviceMemory mem_handle = @@ -932,6 +997,8 @@ out: close(sem_fd); if (fence_fd >= 0) close(fence_fd); +#endif + return vn_result(dev->instance, result); } diff --git a/src/virtio/vulkan/vn_wsi.h b/src/virtio/vulkan/vn_wsi.h index 7edf6affc7b..714bf033627 100644 --- a/src/virtio/vulkan/vn_wsi.h +++ b/src/virtio/vulkan/vn_wsi.h @@ -42,8 +42,13 @@ vn_wsi_validate_image_format_info( VkResult vn_wsi_fence_wait(struct vn_device *dev, struct vn_queue *queue); +#ifndef VK_USE_PLATFORM_WIN32_KHR void vn_wsi_sync_wait(struct vn_device *dev, int fd); +#else +void +vn_wsi_sync_wait_handle(struct vn_device *dev, void *handle); +#endif void vn_wsi_flush(struct vn_queue *queue); @@ -91,7 +96,11 @@ vn_wsi_fence_wait(struct vn_device *dev, struct vn_queue *queue) } static inline void +#ifndef VK_USE_PLATFORM_WIN32_KHR vn_wsi_sync_wait(struct vn_device *dev, int fd) +#else +vn_wsi_sync_wait_handle(struct vn_device *dev, void *handle) +#endif { return; } From 50c7355e371e1e2cad19c0cc134d44085d9b2f19 Mon Sep 17 00:00:00 2001 From: anonymix007 <48598263+anonymix007@users.noreply.github.com> Date: Sun, 30 Nov 2025 18:53:08 +0300 Subject: [PATCH 15/15] venus: Add Windows renderer backend --- src/virtio/virtio-gpu/wddm_hw.h | 323 +++ src/virtio/vulkan/meson.build | 13 + src/virtio/vulkan/vn_renderer.h | 11 + src/virtio/vulkan/vn_renderer_virtgpu_win32.c | 2274 +++++++++++++++++ 4 files changed, 2621 insertions(+) create mode 100644 src/virtio/virtio-gpu/wddm_hw.h create mode 100644 src/virtio/vulkan/vn_renderer_virtgpu_win32.c diff --git a/src/virtio/virtio-gpu/wddm_hw.h b/src/virtio/virtio-gpu/wddm_hw.h new file mode 100644 index 00000000000..13bcf414bea --- /dev/null +++ b/src/virtio/virtio-gpu/wddm_hw.h @@ -0,0 +1,323 @@ +/* + * Copyright (C) 2019-2020 Red Hat, Inc. + * + * Written By: Vadim Rozenfeld + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met : + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and / or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of their contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#pragma once + +#include + +#pragma pack(1) +typedef struct _VIOGPU_BOX +{ + ULONG x; + ULONG y; + ULONG z; + ULONG width; + ULONG height; + ULONG depth; +} VIOGPU_BOX; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_BLOB_INFO { + ULONG width; + ULONG height; + ULONG format; + ULONG bind; // Same as virgl + ULONG strides[4]; + ULONG offsets[4]; +} VIOGPU_BLOB_INFO, *PVIOGPU_BLOB_INFO; +#pragma pack() + +// ================= QueryAdapterInfo UMDRIVERPRIVATE +#define VIOGPU_IAM 0x56696f475055 // Identifier for queryadapterinfo (VioGPU as hex) + +#define VIOGPU_CAPSET_GFXSTREAM_VULKAN 3 +#define VIOGPU_CAPSET_VENUS 4 + +typedef struct _VIOGPU_ADAPTERINFO +{ + ULONGLONG IamVioGPU; // Should be set by driver to VIOGPU_IAM + struct + { + UINT Supports3d : 1; + UINT HasShmem : 1; + UINT Reserved : 30; + } Flags; + ULONGLONG SupportedCapsetIDs; + LUID AdapterLuid; +} VIOGPU_ADAPTERINFO; + +// ================= ESCAPES +#define VIOGPU_GET_DEVICE_ID 0x000 +#define VIOGPU_GET_CUSTOM_RESOLUTION 0x001 +#define VIOGPU_GET_CAPS 0x002 +#define VIOGPU_GET_PCI_INFO 0x003 + +#define VIOGPU_RES_INFO 0x100 +#define VIOGPU_RES_BUSY 0x101 +#define VIOGPU_RES_BLOB_SET_INFO 0x102 + +#define VIOGPU_CTX_INIT 0x200 + +#define VIOGPU_BLIT_INIT 0x300 + +#pragma pack(1) +typedef struct _VIOGPU_DISP_MODE +{ + USHORT XResolution; + USHORT YResolution; +} VIOGPU_DISP_MODE, *PVIOGPU_DISP_MODE; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_PARAM_REQ +{ + ULONG ParamId; + UINT64 Value; +} VIOGPU_PARAM_REQ; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_CAPSET_REQ +{ + ULONG CapsetId; + ULONG Version; + ULONG Size; + UCHAR *Capset; +} VIOGPU_CAPSET_REQ; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_PCI_INFO_REQ +{ + ULONG Domain; + ULONG Bus; + ULONG Dev; + ULONG Func; +} VIOGPU_PCI_INFO_REQ; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_RES_INFO_REQ +{ + D3DKMT_HANDLE ResHandle; + ULONG Id; + + BOOL IsBlob; + BOOL IsCreated; + BOOL InfoValid; + + VIOGPU_BLOB_INFO Info; + + ULONG BlobMem; + ULONGLONG BlobId; + ULONGLONG Size; +} VIOGPU_RES_INFO_REQ; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_RES_BUSY_REQ +{ + D3DKMT_HANDLE ResHandle; + BOOL Wait; + BOOL IsBusy; +} VIOGPU_RES_BUSY_REQ; +#pragma pack() + +#pragma pack(1) +typedef struct { + D3DKMT_HANDLE ResHandle; + VIOGPU_BLOB_INFO Info; +} VIOGPU_RES_BLOB_SET_INFO_REQ, *PVIOGPU_RES_BLOB_SET_INFO_REQ; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_CTX_INIT_REQ +{ + UINT CapsetID; + UINT NumRings; + UCHAR DebugName[64]; +} VIOGPU_CTX_INIT_REQ; +#pragma pack() + +typedef struct _VIOGPU_BLIT_PRESENT VIOGPU_BLIT_PRESENT, *PVIOGPU_BLIT_PRESENT; + +#pragma pack(1) +typedef struct _VIOGPU_BLIT_INIT_REQ +{ + HANDLE EventUM; + HANDLE EventKM; + PVIOGPU_BLIT_PRESENT pBlitPresent; +} VIOGPU_BLIT_INIT_REQ; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_ESCAPE +{ + USHORT Type; + USHORT DataLength; + union { + ULONG Id; + VIOGPU_DISP_MODE Resolution; + VIOGPU_PARAM_REQ Parameter; + VIOGPU_CAPSET_REQ Capset; + VIOGPU_PCI_INFO_REQ PciInfo; + + VIOGPU_RES_INFO_REQ ResourceInfo; + VIOGPU_RES_BUSY_REQ ResourceBusy; + VIOGPU_RES_BLOB_SET_INFO_REQ BlobInfoSet; + + VIOGPU_CTX_INIT_REQ CtxInit; + + VIOGPU_BLIT_INIT_REQ BlitInit; + } DUMMYUNIONNAME; +} VIOGPU_ESCAPE, *PVIOGPU_ESCAPE; +#pragma pack() + +// ================= CreateResource +#pragma pack(1) +typedef struct _VIOGPU_RESOURCE_3D_OPTIONS +{ + ULONG target; + ULONG format; + ULONG bind; + ULONG width; + ULONG height; + ULONG depth; + ULONG array_size; + ULONG last_level; + ULONG nr_samples; + ULONG flags; +} VIOGPU_RESOURCE_3D_OPTIONS; +#pragma pack() + +#define VIOGPU_BLOB_MEM_GUEST 0x0001 +#define VIOGPU_BLOB_MEM_HOST3D 0x0002 +#define VIOGPU_BLOB_MEM_HOST3D_GUEST 0x0003 + +#define VIOGPU_BLOB_FLAG_USE_MAPPABLE 0x0001 +#define VIOGPU_BLOB_FLAG_USE_SHAREABLE 0x0002 +//#define VIOGPU_BLOB_FLAG_USE_CROSS_DEVICE 0x0004 +#pragma pack(1) +typedef struct _VIOGPU_RESOURCE_BLOB_OPTIONS +{ + ULONG blob_mem; + ULONG blob_flags; + ULONGLONG blob_id; +} VIOGPU_RESOURCE_BLOB_OPTIONS; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_CREATE_RESOURCE_EXCHANGE +{ + ULONG magic; +} VIOGPU_CREATE_RESOURCE_EXCHANGE; +#pragma pack() + +#define VIOGPU_RESOURCE_TYPE_3D 0 +#define VIOGPU_RESOURCE_TYPE_BLOB 1 +#pragma pack(1) +typedef struct _VIOGPU_CREATE_ALLOCATION_EXCHANGE +{ + ULONG Type; + union { + VIOGPU_RESOURCE_3D_OPTIONS Options3D; + VIOGPU_RESOURCE_BLOB_OPTIONS OptionsBlob; + }; + ULONGLONG Size; +} VIOGPU_CREATE_ALLOCATION_EXCHANGE; +#pragma pack() + +// ================= BLIT + +#pragma pack(1) +struct _VIOGPU_BLIT_PRESENT +{ + struct { + void *resource; + RECT rect; + } src; + struct { + VIOGPU_CREATE_ALLOCATION_EXCHANGE alloc; + VIOGPU_RES_INFO_REQ res_info; + RECT rect; + } dst; +}; +#pragma pack() + +// ================= COMMAND BUFFER +#define VIOGPU_CMD_NOP 0x0 +#define VIOGPU_CMD_SUBMIT 0x1 // Submit Command to virgl +#define VIOGPU_CMD_TRANSFER_TO_HOST 0x2 // Transfer resource to host +#define VIOGPU_CMD_TRANSFER_FROM_HOST 0x3 // Transfer resource to host +#define VIOGPU_CMD_MAP_BLOB 0x4 // Map blob resource +#define VIOGPU_CMD_UNMAP_BLOB 0x5 // Unmap blob resource + +//#define VIOGPU_CMD_SUBMIT_UM 0x6 + +// #define VIOGPU_EXECBUF_FENCE_FD_IN 0x01 +// #define VIOGPU_EXECBUF_FENCE_FD_OUT 0x02 +#define VIOGPU_EXECBUF_RING_IDX 0x04 +#define VIOGPU_EXECBUF_VIRGL 0x08 +// #define VIOGPU_EXECBUF_FLAGS (VIOGPU_EXECBUF_FENCE_FD_IN | VIOGPU_EXECBUF_FENCE_FD_OUT | VIOGPU_EXECBUF_RING_IDX) + +#pragma pack(1) +typedef struct _VIOGPU_COMMAND_HDR +{ + UINT type; + UINT size; + UINT flags; + UINT ring_idx; +} VIOGPU_COMMAND_HDR; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_TRANSFER_CMD +{ + ULONG res_id; + + VIOGPU_BOX box; + + ULONGLONG offset; + ULONG level; + ULONG stride; + ULONG layer_stride; +} VIOGPU_TRANSFER_CMD; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_BEGIN_UM_BLIT_CMD +{ + RECT src, dst; +} VIOGPU_BEGIN_UM_BLIT_CMD; +#pragma pack() + +#define BASE_NAMED_OBJECTS L"\\BaseNamedObjects\\" +#define GLOBAL_OBJECTS L"Global\\" +#define RESOLUTION_EVENT_NAME L"VioGpuResolutionEvent" diff --git a/src/virtio/vulkan/meson.build b/src/virtio/vulkan/meson.build index e9e9dca83bb..74900f2e369 100644 --- a/src/virtio/vulkan/meson.build +++ b/src/virtio/vulkan/meson.build @@ -105,6 +105,8 @@ vn_link_args = [ vulkan_icd_link_args, ] +vn_kwargs = {} + vn_libs = [] if not with_platform_windows @@ -136,6 +138,16 @@ if with_platform_android vn_deps += [dep_android, idep_u_gralloc] endif +if with_platform_windows + libvn_files += files('vn_renderer_virtgpu_win32.c') + vn_incs += inc_winddk + vn_link_args += '-static' + vn_kwargs = { + 'vs_module_defs': vulkan_api_def, + 'name_prefix': '', + } +endif + libvulkan_virtio = shared_library( 'vulkan_virtio', [libvn_files, vn_entrypoints, sha1_h], @@ -146,5 +158,6 @@ libvulkan_virtio = shared_library( link_args : vn_link_args, link_depends : vulkan_icd_link_depends, gnu_symbol_visibility : 'hidden', + kwargs: vn_kwargs, install : true, ) diff --git a/src/virtio/vulkan/vn_renderer.h b/src/virtio/vulkan/vn_renderer.h index 24b489f2cfa..dbee43f9811 100644 --- a/src/virtio/vulkan/vn_renderer.h +++ b/src/virtio/vulkan/vn_renderer.h @@ -266,6 +266,14 @@ vn_renderer_create_virtgpu(struct vn_instance *instance, struct vn_renderer **renderer); #endif +#ifdef VK_USE_PLATFORM_WIN32_KHR +VkResult +vn_renderer_create_virtgpu_win32(struct vn_instance *instance, + const VkAllocationCallbacks *alloc, + const VkInstanceCreateInfo *pInfo, + struct vn_renderer **renderer); +#endif + VkResult vn_renderer_create_vtest(struct vn_instance *instance, const VkAllocationCallbacks *alloc, @@ -285,6 +293,9 @@ vn_renderer_create(struct vn_instance *instance, } return vn_renderer_create_virtgpu(instance, alloc, renderer); + +#elif defined(VK_USE_PLATFORM_WIN32_KHR) + return vn_renderer_create_virtgpu_win32(instance, alloc, pCreateInfo, renderer); #else return vn_renderer_create_vtest(instance, alloc, renderer); #endif diff --git a/src/virtio/vulkan/vn_renderer_virtgpu_win32.c b/src/virtio/vulkan/vn_renderer_virtgpu_win32.c new file mode 100644 index 00000000000..dd3c4c29cec --- /dev/null +++ b/src/virtio/vulkan/vn_renderer_virtgpu_win32.c @@ -0,0 +1,2274 @@ +#include "vn_renderer_internal.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "util/os_file.h" +#include "util/sparse_array.h" + +#include +#include "virtio/virtio-gpu/venus_hw.h" +#include "virtio/virtio-gpu/wddm_hw.h" + +#define VIRTGPU_PCI_VENDOR_ID 0x1af4 +#define VIRTGPU_PCI_DEVICE_ID 0x10f7 // TODO: 1050 +#define VIRTGPU_WIN_DEVICE_ID "PCI\\VEN_1AF4&DEV_10F7" // TODO: 1050 + +struct virtgpu; + +struct virtgpu_shmem { + struct vn_renderer_shmem base; + D3DKMT_HANDLE alloc; + union { + D3DKMT_HANDLE kmt; + HANDLE h; + }; +}; + +struct virtgpu_bo { + struct vn_renderer_bo base; + D3DKMT_HANDLE alloc; + union { + struct { + D3DKMT_HANDLE local; + D3DKMT_HANDLE global; + } kmt; + + HANDLE h; + } /* resource */; + uint32_t blob_flags; +}; + +struct virtgpu_sync { + struct vn_renderer_sync base; + + /* + * drm_syncobj is in one of these states + * + * - value N: drm_syncobj has a signaled fence chain with seqno N + * - pending N->M: drm_syncobj has an unsignaled fence chain with seqno M + * (which may point to another unsignaled fence chain with + * seqno between N and M, and so on) + * + * TODO Do we want to use binary drm_syncobjs? They would be + * + * - value 0: drm_syncobj has no fence + * - value 1: drm_syncobj has a signaled fence with seqno 0 + * + * They are cheaper but require special care. + */ + uint32_t syncobj_handle; +}; + +struct virtgpu { + struct vn_renderer base; + + struct vn_instance *instance; + + VkD3DDDICallbacks *ddicb; + struct { + D3DKMT_HANDLE adapter; + D3DKMT_HANDLE device; + LUID luid; + HINSTANCE lib; + struct { + PFND3DKMT_QUERYADAPTERINFO queryAdapterInfo; + PFND3DKMT_ESCAPE escape; + PFND3DKMT_RENDER render; + PFND3DKMT_SIGNALSYNCHRONIZATIONOBJECT2 signalSynchronizationObject2; + PFND3DKMT_CREATECONTEXT createContext; + PFND3DKMT_DESTROYCONTEXT destroyContext; + PFND3DKMT_CREATEALLOCATION createAllocation; + PFND3DKMT_DESTROYALLOCATION destroyAllocation; + PFND3DKMT_LOCK lock; + PFND3DKMT_UNLOCK unlock; + PFND3DKMT_QUERYRESOURCEINFO queryResourceInfo; + PFND3DKMT_OPENRESOURCE openResource; + PFND3DKMT_CREATEDEVICE createDevice; + PFND3DKMT_DESTROYDEVICE destroyDevice; + PFND3DKMT_OPENADAPTERFROMHDC openAdapterFromHdc; + PFND3DKMT_CLOSEADAPTER closeAdapter; + } cb; + } d3dkmt; + + struct { + mtx_t lock; + + union { + D3DKMT_HANDLE kmt; + HANDLE h; + }; + + void *cmd_buf; + size_t cmd_size; + + D3DDDI_ALLOCATIONLIST *alloc_list; + size_t alloc_size; + + D3DDDI_PATCHLOCATIONLIST *patch_list; + size_t patch_size; + } ctx; + + struct { + uint16_t domain; + uint8_t bus; + uint8_t dev; + uint8_t func; + } pci_bus_info; + + uint32_t max_timeline_count; + + struct { + uint32_t id; + uint32_t version; + struct virgl_renderer_capset_venus data; + } capset; + + uint32_t shmem_blob_mem; + uint32_t bo_blob_mem; + + struct util_sparse_array syncobj_array; + /* note that we use kmt_handle instead of res_id to index because + * res_id is monotonically increasing by default (see + * virtio_gpu_resource_id_get) + */ + struct util_sparse_array shmem_array; + struct util_sparse_array bo_array; + + mtx_t win32_handle_import_mutex; + + struct vn_renderer_shmem_cache shmem_cache; + + // bool supports_cross_device; +}; + +static inline NTSTATUS +hr_to_nt(struct virtgpu *gpu, HRESULT hr) +{ + switch (hr) { + case S_OK: + return STATUS_SUCCESS; + case E_OUTOFMEMORY: + return STATUS_NO_MEMORY; + case E_INVALIDARG: + return STATUS_INVALID_PARAMETER; + default: + vn_log(gpu->instance, "Unknown HRESULT: %lx", hr); + return STATUS_INVALID_PARAMETER; + } +} + +#include "util/hash_table.h" +#include "util/u_idalloc.h" + +static struct { + once_flag init; + mtx_t mutex; + struct hash_table *syncobjs; + struct util_idalloc ida; + + // int signaled_fd; + HANDLE signaled_fd; +} sim; + +struct sim_syncobj { + mtx_t mutex; + uint64_t point; + + HANDLE pending_fd; + uint64_t pending_point; + bool pending_cpu; +}; + +static uint32_t +sim_syncobj_create(struct virtgpu *gpu, bool signaled) +{ + struct sim_syncobj *syncobj = calloc(1, sizeof(*syncobj)); + if (!syncobj) + return 0; + + mtx_init(&syncobj->mutex, mtx_plain); + syncobj->pending_fd = NULL; + + mtx_lock(&sim.mutex); + + /* initialize lazily */ + if (!sim.syncobjs) { + sim.syncobjs = _mesa_pointer_hash_table_create(NULL); + if (!sim.syncobjs) { + mtx_unlock(&sim.mutex); + mtx_destroy(&syncobj->mutex); + free(syncobj); + return 0; + } + + util_idalloc_init(&sim.ida, 32); + + // TODO: is this actually needed? + /* + struct drm_virtgpu_execbuffer args = { + .flags = VIRTGPU_EXECBUF_RING_IDX | VIRTGPU_EXECBUF_FENCE_FD_OUT, + .ring_idx = 0, / * CPU ring * / + }; + int ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args); + if (ret || args.fence_fd < 0) { + _mesa_hash_table_destroy(sim.syncobjs, NULL); + sim.syncobjs = NULL; + mtx_unlock(&sim.mutex); + mtx_destroy(&syncobj->mutex); + free(syncobj); + return 0; + } + sim.signaled_fd = args.fence_fd; + */ + + sim.signaled_fd = CreateEventA(NULL, TRUE, TRUE, NULL); + if (sim.signaled_fd == NULL) { + _mesa_hash_table_destroy(sim.syncobjs, NULL); + sim.syncobjs = NULL; + mtx_unlock(&sim.mutex); + mtx_destroy(&syncobj->mutex); + free(syncobj); + return 0; + } + // vn_log(gpu->instance, "created handle %p", sim.signaled_fd); + } + + const unsigned syncobj_handle = util_idalloc_alloc(&sim.ida) + 1; + _mesa_hash_table_insert(sim.syncobjs, + (const void *)(uintptr_t)syncobj_handle, syncobj); + + mtx_unlock(&sim.mutex); + + return syncobj_handle; +} + +static void +sim_syncobj_destroy(struct virtgpu *gpu, uint32_t syncobj_handle) +{ + struct sim_syncobj *syncobj = NULL; + + mtx_lock(&sim.mutex); + + struct hash_entry *entry = _mesa_hash_table_search( + sim.syncobjs, (const void *)(uintptr_t)syncobj_handle); + if (entry) { + syncobj = entry->data; + _mesa_hash_table_remove(sim.syncobjs, entry); + util_idalloc_free(&sim.ida, syncobj_handle - 1); + } + + mtx_unlock(&sim.mutex); + + if (syncobj) { + if (syncobj->pending_fd != NULL) + CloseHandle(syncobj->pending_fd); + mtx_destroy(&syncobj->mutex); + free(syncobj); + } +} + +static VkResult +sim_syncobj_poll(HANDLE fd, int poll_timeout) +{ + DWORD ret = WaitForSingleObject(fd, poll_timeout); + + if (ret == WAIT_OBJECT_0) { + return VK_SUCCESS; + } else if (ret == WAIT_TIMEOUT) { + return VK_TIMEOUT; + } else { + return VK_ERROR_DEVICE_LOST; + } +} + +static void +sim_syncobj_set_point_locked(struct sim_syncobj *syncobj, uint64_t point) +{ + syncobj->point = point; + + if (syncobj->pending_fd != NULL) { + CloseHandle(syncobj->pending_fd); + syncobj->pending_fd = NULL; + syncobj->pending_point = point; + } +} + +static void +sim_syncobj_update_point_locked(struct vn_instance *instance, + struct sim_syncobj *syncobj, + int poll_timeout) +{ + if (syncobj->pending_fd != NULL) { + VkResult result; + if (syncobj->pending_cpu) { + if (poll_timeout == -1) { + const int max_cpu_timeout = 2000; + poll_timeout = max_cpu_timeout; + // vn_log(instance, "waiting for handle %p", syncobj->pending_fd); + result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout); + if (result == VK_TIMEOUT) { + vn_log(NULL, "cpu sync timed out after %dms; ignoring", + poll_timeout); + result = VK_SUCCESS; + } + } else { + // vn_log(instance, "waiting for handle %p", syncobj->pending_fd); + result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout); + } + } else { + // vn_log(instance, "waiting for handle %p", syncobj->pending_fd); + result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout); + } + if (result == VK_SUCCESS) { + CloseHandle(syncobj->pending_fd); + syncobj->pending_fd = NULL; + syncobj->point = syncobj->pending_point; + } + } +} + +static struct sim_syncobj * +sim_syncobj_lookup(struct virtgpu *gpu, uint32_t syncobj_handle) +{ + struct sim_syncobj *syncobj = NULL; + + mtx_lock(&sim.mutex); + struct hash_entry *entry = _mesa_hash_table_search( + sim.syncobjs, (const void *)(uintptr_t)syncobj_handle); + if (entry) + syncobj = entry->data; + mtx_unlock(&sim.mutex); + + return syncobj; +} + +static bool +sim_syncobj_reset(struct virtgpu *gpu, uint32_t syncobj_handle) +{ + struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle); + if (!syncobj) + return false; + + mtx_lock(&syncobj->mutex); + sim_syncobj_set_point_locked(syncobj, 0); + mtx_unlock(&syncobj->mutex); + + return true; +} + +static bool +sim_syncobj_query(struct virtgpu *gpu, + uint32_t syncobj_handle, + uint64_t *point) +{ + struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle); + if (!syncobj) + return false; + + mtx_lock(&syncobj->mutex); + sim_syncobj_update_point_locked(gpu->instance, syncobj, 0); + *point = syncobj->point; + mtx_unlock(&syncobj->mutex); + + return true; +} + +static bool +sim_syncobj_signal(struct virtgpu *gpu, + uint32_t syncobj_handle, + uint64_t point) +{ + struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle); + if (!syncobj) + return false; + + mtx_lock(&syncobj->mutex); + sim_syncobj_set_point_locked(syncobj, point); + mtx_unlock(&syncobj->mutex); + + return true; +} + +static bool +sim_syncobj_submit(struct virtgpu *gpu, + uint32_t syncobj_handle, + HANDLE sync_fd, + uint64_t point, + bool cpu) +{ + struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle); + if (!syncobj) + return false; + + HANDLE pending_fd = NULL; + HANDLE proc = GetCurrentProcess(); + bool ret = DuplicateHandle(proc, sync_fd, proc, &pending_fd, 0, false, + DUPLICATE_SAME_ACCESS); + if (!ret) { + vn_log(gpu->instance, "failed to dup sync handle"); + return false; + } + + mtx_lock(&syncobj->mutex); + + if (syncobj->pending_fd != NULL) { + mtx_unlock(&syncobj->mutex); + + /* TODO */ + vn_log(gpu->instance, "sorry, no simulated timeline semaphore"); + CloseHandle(pending_fd); + return false; + } + if (syncobj->point >= point) + vn_log(gpu->instance, "non-monotonic signaling"); + + syncobj->pending_fd = pending_fd; + syncobj->pending_point = point; + syncobj->pending_cpu = cpu; + + mtx_unlock(&syncobj->mutex); + + return true; +} + +static int +timeout_to_poll_timeout(uint64_t timeout) +{ + const uint64_t ns_per_ms = 1000000; + const uint64_t ms = (timeout + ns_per_ms - 1) / ns_per_ms; + if (!ms && timeout) + return INFINITE; + return ms <= INT_MAX ? ms : INFINITE; +} + +static VkResult +sim_syncobj_wait(struct virtgpu *gpu, + const struct vn_renderer_wait *wait, + bool wait_avail) +{ + if (wait_avail) + return VK_ERROR_DEVICE_LOST; + + const int poll_timeout = timeout_to_poll_timeout(wait->timeout); + + /* TODO poll all fds at the same time */ + for (uint32_t i = 0; i < wait->sync_count; i++) { + struct virtgpu_sync *sync = (struct virtgpu_sync *)wait->syncs[i]; + const uint64_t point = wait->sync_values[i]; + + struct sim_syncobj *syncobj = + sim_syncobj_lookup(gpu, sync->syncobj_handle); + if (!syncobj) + return VK_ERROR_DEVICE_LOST; + + mtx_lock(&syncobj->mutex); + + if (syncobj->point < point) + sim_syncobj_update_point_locked(gpu->instance, syncobj, + poll_timeout); + + if (syncobj->point < point) { + if (wait->wait_any && i < wait->sync_count - 1 && + syncobj->pending_fd == NULL) { + mtx_unlock(&syncobj->mutex); + continue; + } + errno = ETIME; + mtx_unlock(&syncobj->mutex); + return VK_TIMEOUT; + } + + mtx_unlock(&syncobj->mutex); + + if (wait->wait_any) + break; + + /* TODO adjust poll_timeout */ + } + + return VK_SUCCESS; +} + +static HANDLE +sim_syncobj_export(struct virtgpu *gpu, uint32_t syncobj_handle) +{ + struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle); + if (!syncobj) + return NULL; + + HANDLE fd = NULL; + HANDLE proc = GetCurrentProcess(); + mtx_lock(&syncobj->mutex); + HANDLE in = + syncobj->pending_fd != NULL ? syncobj->pending_fd : sim.signaled_fd; + if (!DuplicateHandle(proc, in, proc, &fd, 0, false, + DUPLICATE_SAME_ACCESS)) { + vn_log(gpu->instance, "failed to duplicate handle"); + } + mtx_unlock(&syncobj->mutex); + + return fd; +} + +static uint32_t +sim_syncobj_import(struct virtgpu *gpu, uint32_t syncobj_handle, HANDLE fd) +{ + struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle); + if (!syncobj) + return 0; + + if (!sim_syncobj_submit(gpu, syncobj_handle, fd, 1, false)) + return 0; + + return syncobj_handle; +} + +static VkResult +sim_submit_signal_syncs(struct virtgpu *gpu, + HANDLE sync_fd, + struct vn_renderer_sync *const *syncs, + const uint64_t *sync_values, + uint32_t sync_count, + bool cpu) +{ + for (uint32_t i = 0; i < sync_count; i++) { + struct virtgpu_sync *sync = (struct virtgpu_sync *)syncs[i]; + const uint64_t pending_point = sync_values[i]; + + if (!sim_syncobj_submit(gpu, sync->syncobj_handle, sync_fd, + pending_point, cpu)) { + return VK_ERROR_DEVICE_LOST; + } + } + + return VK_SUCCESS; +} + +static NTSTATUS +virtgpu_ioctl_create_context(struct virtgpu *gpu) +{ + if (gpu->ddicb != NULL) { + D3DDDICB_CREATECONTEXT context = {}; + NTSTATUS status = + hr_to_nt(gpu, gpu->ddicb->pKTCallbacks->pfnCreateContextCb( + gpu->ddicb->hRTDevice, &context)); + if (!NT_SUCCESS(status)) { + return status; + } + + gpu->ddicb->hContext = context.hContext; + + gpu->ctx.h = context.hContext; + + gpu->ctx.cmd_buf = context.pCommandBuffer; + gpu->ctx.cmd_size = context.CommandBufferSize; + + gpu->ctx.alloc_list = context.pAllocationList; + gpu->ctx.alloc_size = context.AllocationListSize; + + gpu->ctx.patch_list = context.pPatchLocationList; + gpu->ctx.patch_size = context.PatchLocationListSize; + + return STATUS_SUCCESS; + } else { + D3DKMT_CREATECONTEXT context = { + .hDevice = gpu->d3dkmt.device, + .ClientHint = D3DKMT_CLIENTHINT_VULKAN, + }; + + NTSTATUS status = gpu->d3dkmt.cb.createContext(&context); + if (!NT_SUCCESS(status)) { + return status; + } + + gpu->ctx.kmt = context.hContext; + + gpu->ctx.cmd_buf = context.pCommandBuffer; + gpu->ctx.cmd_size = context.CommandBufferSize; + + gpu->ctx.alloc_list = context.pAllocationList; + gpu->ctx.alloc_size = context.AllocationListSize; + + gpu->ctx.patch_list = context.pPatchLocationList; + gpu->ctx.patch_size = context.PatchLocationListSize; + + return STATUS_SUCCESS; + } +} + +static NTSTATUS +virtgpu_ioctl_render(struct virtgpu *gpu, + unsigned cmd_offset, + unsigned cmd_length, + unsigned alloc_count, + void *priv, + unsigned priv_size) +{ + if (gpu->ddicb != NULL) { + D3DDDICB_RENDER render = { + .hContext = gpu->ctx.h, + .CommandOffset = cmd_offset, + .CommandLength = cmd_length, + .NumAllocations = alloc_count, + .NumPatchLocations = alloc_count, + .pPrivateDriverData = priv, + .PrivateDriverDataSize = priv_size, + }; + + NTSTATUS status = hr_to_nt(gpu, gpu->ddicb->pKTCallbacks->pfnRenderCb( + gpu->ddicb->hRTDevice, &render)); + + gpu->ctx.cmd_buf = render.pNewCommandBuffer; + gpu->ctx.cmd_size = render.NewCommandBufferSize; + + gpu->ctx.alloc_list = render.pNewAllocationList; + gpu->ctx.alloc_size = render.NewAllocationListSize; + + gpu->ctx.patch_list = render.pNewPatchLocationList; + gpu->ctx.patch_size = render.NewPatchLocationListSize; + + return status; + } else { + D3DKMT_RENDER render = { + .hContext = gpu->ctx.kmt, + .CommandOffset = cmd_offset, + .CommandLength = cmd_length, + .AllocationCount = alloc_count, + .PatchLocationCount = alloc_count, + .pPrivateDriverData = priv, + .PrivateDriverDataSize = priv_size, + }; + + NTSTATUS status = gpu->d3dkmt.cb.render(&render); + + gpu->ctx.cmd_buf = render.pNewCommandBuffer; + gpu->ctx.cmd_size = render.NewCommandBufferSize; + + gpu->ctx.alloc_list = render.pNewAllocationList; + gpu->ctx.alloc_size = render.NewAllocationListSize; + + gpu->ctx.patch_list = render.pNewPatchLocationList; + gpu->ctx.patch_size = render.NewPatchLocationListSize; + + return status; + } +} + +static NTSTATUS +virtgpu_ioctl_signal(struct virtgpu *gpu, HANDLE fence) +{ + if (gpu->ddicb != NULL) { + D3DDDICB_SIGNALSYNCHRONIZATIONOBJECT2 signal = { + .hContext = gpu->ctx.h, + .ObjectCount = 0, + .BroadcastContextCount = 0, + .Flags = { + .EnqueueCpuEvent = TRUE, + }, + .CpuEventHandle = fence, + }; + return hr_to_nt( + gpu, gpu->ddicb->pKTCallbacks->pfnSignalSynchronizationObject2Cb( + gpu->ddicb->hRTDevice, &signal)); + } else { + D3DKMT_SIGNALSYNCHRONIZATIONOBJECT2 signal = { + .hContext = gpu->ctx.kmt, + .ObjectCount = 0, + .BroadcastContextCount = 0, + .Flags = { + .EnqueueCpuEvent = TRUE, + }, + .CpuEventHandle = fence, + }; + + return gpu->d3dkmt.cb.signalSynchronizationObject2(&signal); + } +} + +static VkResult +sim_submit(struct virtgpu *gpu, const struct vn_renderer_submit *submit) +{ + assert(submit->bo_count < gpu->ctx.alloc_size); + assert(submit->batch_count); + + VkResult ret = VK_SUCCESS; + for (uint32_t i = 0; i < submit->batch_count; i++) { + const struct vn_renderer_submit_batch *batch = &submit->batches[i]; + mtx_lock(&gpu->ctx.lock); + + for (uint32_t i = 0; i < submit->bo_count; i++) { + struct virtgpu_bo *bo = (struct virtgpu_bo *)submit->bos[i]; + assert(bo->alloc != 0); + //if (bo->alloc == 0) return VK_ERROR_FEATURE_NOT_PRESENT; // TODO: we should not call render here, but rather save commands into present command buffer + gpu->ctx.alloc_list[i].hAllocation = bo->alloc; + gpu->ctx.patch_list[i].AllocationIndex = i; + } + + VIOGPU_COMMAND_HDR *hdr = gpu->ctx.cmd_buf; + hdr->type = VIOGPU_CMD_SUBMIT; + hdr->size = batch->cs_size; + hdr->flags = VIOGPU_EXECBUF_RING_IDX, hdr->ring_idx = batch->ring_idx; + + assert(batch->cs_size + sizeof(*hdr) <= gpu->ctx.cmd_size); + memcpy(gpu->ctx.cmd_buf + sizeof(*hdr), batch->cs_data, batch->cs_size); + NTSTATUS status = virtgpu_ioctl_render( + gpu, 0, sizeof(*hdr) + batch->cs_size, submit->bo_count, NULL, 0); + mtx_unlock(&gpu->ctx.lock); + if (!NT_SUCCESS(status)) { + vn_log(gpu->instance, "failed to render: 0x%lx", status); + break; + } + + if (batch->sync_count > 0) { + HANDLE fence = CreateEventA(NULL, TRUE, FALSE, NULL); + // vn_log(gpu->instance, "created handle %p", fence); + NTSTATUS status = virtgpu_ioctl_signal(gpu, fence); + if (!NT_SUCCESS(status)) { + vn_log(gpu->instance, "failed to execbuffer: 0x%lx", status); + break; + } + + ret = sim_submit_signal_syncs(gpu, fence, batch->syncs, + batch->sync_values, batch->sync_count, + batch->ring_idx == 0); + CloseHandle(fence); + if (ret != VK_SUCCESS) + break; + } + } + + return ret; +} + +static NTSTATUS +virtgpu_ioctl_getparam(struct virtgpu *gpu, + KMTQUERYADAPTERINFOTYPE type, + void *priv, + unsigned priv_size) +{ + if (gpu->ddicb != NULL) { + D3DDDICB_QUERYADAPTERINFO query = { + .pPrivateDriverData = priv, + .PrivateDriverDataSize = priv_size, + }; + return hr_to_nt(gpu, + gpu->ddicb->pAdapterCallbacks->pfnQueryAdapterInfoCb( + gpu->ddicb->hRTAdapter, &query)); + } else { + D3DKMT_QUERYADAPTERINFO query = { + .hAdapter = gpu->d3dkmt.adapter, + .Type = KMTQAITYPE_UMDRIVERPRIVATE, + .pPrivateDriverData = priv, + .PrivateDriverDataSize = priv_size, + }; + + return gpu->d3dkmt.cb.queryAdapterInfo(&query); + } +} + +static NTSTATUS +virtgpu_ioctl_escape(struct virtgpu *gpu, VIOGPU_ESCAPE *priv) +{ + if (gpu->ddicb != NULL) { + D3DDDICB_ESCAPE escape = { + .hDevice = gpu->ddicb->hRTDevice, + .pPrivateDriverData = priv, + .PrivateDriverDataSize = sizeof(*priv), + .hContext = gpu->ctx.h, + }; + return hr_to_nt(gpu, gpu->ddicb->pKTCallbacks->pfnEscapeCb( + gpu->ddicb->hRTAdapter, &escape)); + } else { + D3DKMT_ESCAPE escape = { + .hAdapter = gpu->d3dkmt.adapter, + .hDevice = gpu->d3dkmt.device, + .pPrivateDriverData = priv, + .PrivateDriverDataSize = sizeof(*priv), + }; + + return gpu->d3dkmt.cb.escape(&escape); + } +} + +static NTSTATUS +virtgpu_ioctl_get_caps(struct virtgpu *gpu, + uint32_t id, + uint32_t version, + void *capset, + size_t capset_size) +{ + VIOGPU_ESCAPE caps = { + .Type = VIOGPU_GET_CAPS, + .DataLength = sizeof(caps.Capset), + .Capset = { + .CapsetId = id, + .Version = version, + .Size = capset_size, + .Capset = capset, + }, + }; + + return virtgpu_ioctl_escape(gpu, &caps); +} + +static NTSTATUS +virtgpu_ioctl_init_map(struct virtgpu *gpu, D3DKMT_HANDLE handle) +{ + mtx_lock(&gpu->ctx.lock); + + gpu->ctx.alloc_list[0].hAllocation = handle; + gpu->ctx.patch_list[0].AllocationIndex = 0; + + VIOGPU_COMMAND_HDR *hdr = gpu->ctx.cmd_buf; + hdr->type = VIOGPU_CMD_MAP_BLOB; + hdr->size = sizeof(ULONG); + hdr->flags = 0; + hdr->ring_idx = 0; + + ULONG *index = (void *)(hdr + 1); + *index = 0; + memset(index + 1, 0, sizeof(*hdr)); + + NTSTATUS status = virtgpu_ioctl_render( + gpu, 0, 2 * sizeof(*hdr) + sizeof(ULONG), 1, NULL, 0); + mtx_unlock(&gpu->ctx.lock); + if (!NT_SUCCESS(status)) { + return status; + } + + HANDLE fence = CreateEventA(NULL, TRUE, FALSE, NULL); + // vn_log(gpu->instance, "created handle %p", fence); + status = virtgpu_ioctl_signal(gpu, fence); + if (!NT_SUCCESS(status)) { + return status; + } + + // vn_log(gpu->instance, "waiting for handle %p", fence); + if (WaitForSingleObject(fence, INFINITE) != WAIT_OBJECT_0) { + return STATUS_ABANDONED_WAIT_0; + } + + return STATUS_SUCCESS; +} + +static NTSTATUS +virtgpu_ioctl_destroy_map(struct virtgpu *gpu, D3DKMT_HANDLE handle) +{ + mtx_lock(&gpu->ctx.lock); + + gpu->ctx.alloc_list[0].hAllocation = handle; + gpu->ctx.patch_list[0].AllocationIndex = 0; + + VIOGPU_COMMAND_HDR *hdr = gpu->ctx.cmd_buf; + hdr->type = VIOGPU_CMD_UNMAP_BLOB; + hdr->size = sizeof(ULONG); + hdr->flags = 0; + hdr->ring_idx = 0; + + ULONG *index = (void *)(hdr + 1); + *index = 0; + memset(index + 1, 0, sizeof(*hdr)); + + NTSTATUS status = virtgpu_ioctl_render( + gpu, 0, 2 * sizeof(*hdr) + sizeof(ULONG), 1, NULL, 0); + mtx_unlock(&gpu->ctx.lock); + if (!NT_SUCCESS(status)) { + return status; + } + + HANDLE fence = CreateEventA(NULL, TRUE, FALSE, NULL); + // vn_log(gpu->instance, "created handle %p", fence); + status = virtgpu_ioctl_signal(gpu, fence); + if (!NT_SUCCESS(status)) { + return status; + } + + // vn_log(gpu->instance, "waiting for handle %p", fence); + if (WaitForSingleObject(fence, INFINITE) != WAIT_OBJECT_0) { + return STATUS_ABANDONED_WAIT_0; + } + + return STATUS_SUCCESS; + // return virtgpu_ioctl_unlock(gpu, handle); +} + +static NTSTATUS +virtgpu_ioctl_wait(struct virtgpu *gpu) +{ + HANDLE fence = CreateEventA(NULL, TRUE, FALSE, NULL); + // vn_log(gpu->instance, "created handle %p", fence); + NTSTATUS status = virtgpu_ioctl_signal(gpu, fence); + if (!NT_SUCCESS(status)) { + return status; + } + // vn_log(gpu->instance, "waiting for handle %p", fence); + if (WaitForSingleObject(fence, INFINITE) != WAIT_OBJECT_0) { + return STATUS_ABANDONED_WAIT_0; + } + + return STATUS_SUCCESS; +} + +#define VIRTGPU_SYNC_OR_RETURN_NTSTATUS(gpu) \ + do { \ + NTSTATUS status = virtgpu_ioctl_wait(gpu); \ + if (!NT_SUCCESS(status)) { \ + return status; \ + } \ + } while (0) + +static NTSTATUS +virtgpu_ioctl_resource_create_blob(struct virtgpu *gpu, + uint32_t blob_mem, + uint32_t blob_flags, + size_t blob_size, + uint64_t blob_id, + uint32_t *res_id, + D3DKMT_HANDLE *alloc_handle, + D3DKMT_HANDLE *res_kmt_local, + D3DKMT_HANDLE *res_kmt_global, + HANDLE *res_h) +{ + blob_size = align64(blob_size, 4096); + + VIOGPU_CREATE_ALLOCATION_EXCHANGE alloc_priv = { + .Type = VIOGPU_RESOURCE_TYPE_BLOB, + .OptionsBlob = { + .blob_mem = blob_mem, + .blob_flags = blob_flags, + .blob_id = blob_id, + }, + .Size = blob_size, + }; + + VIOGPU_CREATE_RESOURCE_EXCHANGE res_priv = { 0 }; + + D3DDDI_ALLOCATIONINFO alloc_info = { + .pPrivateDriverData = &alloc_priv, + .PrivateDriverDataSize = sizeof(alloc_priv), + }; + + bool is_shareable = !!(blob_flags & VIOGPU_BLOB_FLAG_USE_SHAREABLE); + bool is_mappable = !!(blob_flags & VIOGPU_BLOB_FLAG_USE_MAPPABLE); + + // TODO: is this required? + VIRTGPU_SYNC_OR_RETURN_NTSTATUS(gpu); + + if (gpu->ddicb != NULL) { + D3DDDICB_ALLOCATE alloc = { + .pPrivateDriverData = &res_priv, + .PrivateDriverDataSize = sizeof(res_priv), + .hResource = *res_h, + .NumAllocations = 1, + .pAllocationInfo = &alloc_info, + }; + + NTSTATUS status = hr_to_nt(gpu, gpu->ddicb->pKTCallbacks->pfnAllocateCb( + gpu->ddicb->hRTDevice, &alloc)); + if (!NT_SUCCESS(status)) { + return status; + } + *res_kmt_local = alloc.hKMResource; + *alloc_handle = alloc_info.hAllocation; + } else { + D3DKMT_CREATEALLOCATION alloc = { + .hDevice = gpu->d3dkmt.device, + .pPrivateDriverData = &res_priv, + .PrivateDriverDataSize = sizeof(res_priv), + .NumAllocations = 1, + .pAllocationInfo = &alloc_info, + .Flags = { + .CreateResource = 1, + .CreateShared = is_shareable, + }, + }; + NTSTATUS status = gpu->d3dkmt.cb.createAllocation(&alloc); + if (!NT_SUCCESS(status)) { + return status; + } + *res_kmt_local = alloc.hResource; + if (res_kmt_global) { + *res_kmt_global = alloc.hGlobalShare; + } + *alloc_handle = alloc_info.hAllocation; + } + + // TODO: is this required? + VIRTGPU_SYNC_OR_RETURN_NTSTATUS(gpu); + + VIOGPU_ESCAPE res_info = { + .Type = VIOGPU_RES_INFO, + .DataLength = sizeof(res_info.ResourceInfo), + .ResourceInfo = { + .ResHandle = *alloc_handle, + }, + }; + + NTSTATUS status = virtgpu_ioctl_escape(gpu, &res_info); + if (!NT_SUCCESS(status)) { + return status; + } + + if (!res_info.ResourceInfo.IsBlob || !res_info.ResourceInfo.IsCreated) { + return STATUS_INVALID_PARAMETER; + } + + *res_id = res_info.ResourceInfo.Id; + + return is_mappable ? virtgpu_ioctl_init_map(gpu, *alloc_handle) + : STATUS_SUCCESS; +} + +static NTSTATUS +virtgpu_ioctl_resource_destroy_blob(struct virtgpu *gpu, + D3DKMT_HANDLE alloc_handle, + D3DKMT_HANDLE res_kmt, + HANDLE res_h) +{ + // TODO: is this required? + VIRTGPU_SYNC_OR_RETURN_NTSTATUS(gpu); + + if (gpu->ddicb != NULL) { + D3DDDICB_DEALLOCATE destroy = { + .hResource = res_h, + .NumAllocations = res_h == NULL ? 1 : 0, + .HandleList = res_h == NULL ? &alloc_handle : NULL, + }; + + NTSTATUS status = hr_to_nt(gpu, + gpu->ddicb->pKTCallbacks->pfnDeallocateCb(gpu->ddicb->hRTDevice, + &destroy)); + + if (!NT_SUCCESS(status)) { + return status; + } + } else { + D3DKMT_DESTROYALLOCATION destroy = { + .hDevice = gpu->d3dkmt.device, + .hResource = res_kmt, + .AllocationCount = res_kmt == 0 ? 1 : 0, + .phAllocationList = res_kmt == 0 ? &alloc_handle : NULL, + }; + + NTSTATUS status = gpu->d3dkmt.cb.destroyAllocation(&destroy); + if (!NT_SUCCESS(status)) { + return status; + } + } + + // TODO: is this required? + VIRTGPU_SYNC_OR_RETURN_NTSTATUS(gpu); + + return STATUS_SUCCESS; +} + +static NTSTATUS +virtgpu_ioctl_lock(struct virtgpu *gpu, D3DKMT_HANDLE handle, void **ptr) +{ + if (gpu->ddicb != NULL) { + D3DDDICB_LOCK lock = { + .hAllocation = handle, + .Flags = { + // .IgnoreSync = 1, + .LockEntire = 1, + }, + }; + NTSTATUS status = hr_to_nt(gpu, gpu->ddicb->pKTCallbacks->pfnLockCb( + gpu->ddicb->hRTDevice, &lock)); + if (!NT_SUCCESS(status)) { + return status; + } + *ptr = lock.pData; + } else { + D3DKMT_LOCK lock = { + .hDevice = gpu->d3dkmt.device, + .Flags = { + // .IgnoreSync = 1, + .LockEntire = 1, + }, + .hAllocation = handle, + }; + NTSTATUS status = gpu->d3dkmt.cb.lock(&lock); + if (!NT_SUCCESS(status)) { + return status; + } + *ptr = lock.pData; + } + + return STATUS_SUCCESS; +} + +static NTSTATUS +virtgpu_ioctl_unlock(struct virtgpu *gpu, D3DKMT_HANDLE handle) +{ + if (gpu->ddicb != NULL) { + D3DDDICB_UNLOCK unlock = { + .NumAllocations = 1, + .phAllocations = &handle, + }; + return hr_to_nt(gpu, gpu->ddicb->pKTCallbacks->pfnUnlockCb( + gpu->ddicb->hRTDevice, &unlock)); + } else { + D3DKMT_UNLOCK unlock = { + .hDevice = gpu->d3dkmt.device, + .NumAllocations = 1, + .phAllocations = &handle, + }; + return gpu->d3dkmt.cb.unlock(&unlock); + } +} + +static inline void +virtgpu_init_shmem_blob_mem(ASSERTED struct virtgpu *gpu) +{ + /* VIOGPU_BLOB_MEM_GUEST allocates from the guest system memory. They are + * logically contiguous in the guest but are sglists (iovecs) in the host. + * That makes them slower to process in the host. With host process + * isolation, it also becomes impossible for the host to access sglists + * directly. + * + * While there are ideas (and shipped code in some cases) such as creating + * udmabufs from sglists, or having a dedicated guest heap, it seems the + * easiest way is to reuse VIRTGPU_BLOB_MEM_HOST3D. That is, when the + * renderer sees a request to export a blob where + * + * - blob_mem is VIOGPU_BLOB_MEM_HOST3D + * - blob_flags is VIOGPU_BLOB_FLAG_USE_MAPPABLE + * - blob_id is 0 + * + * it allocates a host shmem. + * + * supports_blob_id_0 has been enforced by mandated render server config. + */ + assert(gpu->capset.data.supports_blob_id_0); + gpu->shmem_blob_mem = VIOGPU_BLOB_MEM_HOST3D; +} + +static VkResult +virtgpu_init_context(struct virtgpu *gpu) +{ + assert(!gpu->capset.version); + + VIOGPU_ESCAPE ctx_init = { + .Type = VIOGPU_CTX_INIT, + .DataLength = sizeof(ctx_init.CtxInit), + .CtxInit = { + .CapsetID = gpu->capset.id, + .NumRings = 64, + .DebugName = "venus-win32", + }, + }; + + NTSTATUS status = virtgpu_ioctl_escape(gpu, &ctx_init); + if (!NT_SUCCESS(status)) { + if (VN_DEBUG(INIT)) { + vn_log(gpu->instance, "failed to create context: 0x%lx", status); + } + return VK_ERROR_INITIALIZATION_FAILED; + } + + status = virtgpu_ioctl_create_context(gpu); + if (!NT_SUCCESS(status)) { + if (VN_DEBUG(INIT)) { + vn_log(gpu->instance, "failed to create context: 0x%lx", status); + } + return VK_ERROR_INITIALIZATION_FAILED; + } + + return VK_SUCCESS; +} + +static VkResult +virtgpu_init_capset(struct virtgpu *gpu) +{ + gpu->capset.id = VIOGPU_CAPSET_VENUS; + gpu->capset.version = 0; + + NTSTATUS status = + virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version, + &gpu->capset.data, sizeof(gpu->capset.data)); + if (!NT_SUCCESS(status)) { + if (VN_DEBUG(INIT)) { + vn_log(gpu->instance, "failed to get venus v%d capset: 0x%lx", + gpu->capset.version, status); + } + return VK_ERROR_INITIALIZATION_FAILED; + } + + if (gpu->capset.data.wire_format_version == 0) { + if (VN_DEBUG(INIT)) { + vn_log(gpu->instance, "Unsupported wire format version %u", + gpu->capset.data.wire_format_version); + } + return VK_ERROR_INITIALIZATION_FAILED; + } + + return VK_SUCCESS; +} + +static VkResult +virtgpu_init_params(struct virtgpu *gpu) +{ + VIOGPU_ADAPTERINFO info = { 0 }; + + NTSTATUS status = virtgpu_ioctl_getparam(gpu, KMTQAITYPE_UMDRIVERPRIVATE, + &info, sizeof(info)); + + if (!NT_SUCCESS(status)) { + if (VN_DEBUG(INIT)) { + vn_log(gpu->instance, + "failed to get adapter info from kernel: 0x%lx", status); + } + return VK_ERROR_INITIALIZATION_FAILED; + } + + if (info.IamVioGPU != VIOGPU_IAM || !info.Flags.Supports3d) { + if (VN_DEBUG(INIT)) { + vn_log(gpu->instance, "no venus support in this driver"); + } + return VK_ERROR_INITIALIZATION_FAILED; + } + + /* Don't care for VIRTGPU_BLOB_MEM_GUEST_VRAM since this driver is mainly + * developed for QEMU, but whoever needs it may feel free to implement this */ + if (info.Flags.HasShmem) { + gpu->bo_blob_mem = VIOGPU_BLOB_MEM_HOST3D; + } else { + if (VN_DEBUG(INIT)) { + vn_log( + gpu->instance, + "driver does not support the required host-visible shmem region"); + } + return VK_ERROR_INITIALIZATION_FAILED; + } + + /* Don't care about cross-device */ + // gpu->supports_cross_device = false; + + /* implied by CONTEXT_INIT uapi */ + gpu->max_timeline_count = 64; + + VIOGPU_ESCAPE pci_info = { + .Type = VIOGPU_GET_PCI_INFO, + .DataLength = sizeof(pci_info.PciInfo), + .PciInfo = {}, + }; + + status = virtgpu_ioctl_escape(gpu, &pci_info); + if (!NT_SUCCESS(status)) { + if (VN_DEBUG(INIT)) { + vn_log(gpu->instance, "failed to get device pci info from kernel"); + } + return VK_ERROR_INITIALIZATION_FAILED; + } + + gpu->pci_bus_info.domain = pci_info.PciInfo.Domain; + gpu->pci_bus_info.bus = pci_info.PciInfo.Bus; + gpu->pci_bus_info.dev = pci_info.PciInfo.Dev; + gpu->pci_bus_info.func = pci_info.PciInfo.Func; + + return VK_SUCCESS; +} + +static VkResult +virtgpu_find_adapter(struct virtgpu *gpu) +{ + DISPLAY_DEVICE adapter = { + .cb = sizeof(adapter), + }; + + for (int i = 0; EnumDisplayDevicesA(NULL, i, &adapter, 0); i++) { + if (strncasecmp(adapter.DeviceID, VIRTGPU_WIN_DEVICE_ID, + strlen(VIRTGPU_WIN_DEVICE_ID)) == 0) { + HDC hdc = CreateDC(NULL, adapter.DeviceName, NULL, NULL); + D3DKMT_OPENADAPTERFROMHDC open_adapter = { + .hDc = hdc, + }; + + NTSTATUS status = gpu->d3dkmt.cb.openAdapterFromHdc(&open_adapter); + if (!NT_SUCCESS(status)) { + if (VN_DEBUG(INIT)) { + vn_log(gpu->instance, "failed to open adapter %s: 0x%lx", + adapter.DeviceName, status); + } + + continue; + } + // TODO: ReleaseDC(NULL, hdc); + gpu->d3dkmt.adapter = open_adapter.hAdapter; + gpu->d3dkmt.luid = open_adapter.AdapterLuid; + + if (VN_DEBUG(INIT)) { + vn_log(gpu->instance, "using adapter %s (LUID %lx-%lx)", + adapter.DeviceName, open_adapter.AdapterLuid.HighPart, + open_adapter.AdapterLuid.LowPart); + } + return VK_SUCCESS; + } + } + return VK_ERROR_INCOMPATIBLE_DRIVER; +} + +static NTSTATUS +virtgpu_ioctl_create_device(struct virtgpu *gpu) +{ + if (gpu->ddicb != NULL) { + /* Nothing to do here, device was already created before */ + return STATUS_SUCCESS; + } else { + D3DKMT_CREATEDEVICE create_device = { + .hAdapter = gpu->d3dkmt.adapter, + }; + NTSTATUS status = gpu->d3dkmt.cb.createDevice(&create_device); + if (!NT_SUCCESS(status)) { + return status; + } + + gpu->d3dkmt.device = create_device.hDevice; + return STATUS_SUCCESS; + } +} + +static VkResult +virtgpu_open(struct virtgpu *gpu, void *info) +{ + VkD3DDDICallbacks *callbacks = vk_find_struct(info, D3DDDI_CALLBACKS); + if (callbacks != NULL) { + /* D3D11 UMD */ + gpu->ddicb = callbacks; + } else { + /* Standalone Vulkan ICD, using D3DKMT */ + HINSTANCE gdi32lib = LoadLibraryA("GDI32.dll"); + gpu->d3dkmt.lib = gdi32lib; + + gpu->d3dkmt.cb.queryAdapterInfo = + (void *)GetProcAddress(gdi32lib, "D3DKMTQueryAdapterInfo"); + gpu->d3dkmt.cb.escape = + (void *)GetProcAddress(gdi32lib, "D3DKMTEscape"); + gpu->d3dkmt.cb.render = + (void *)GetProcAddress(gdi32lib, "D3DKMTRender"); + gpu->d3dkmt.cb.signalSynchronizationObject2 = (void *)GetProcAddress( + gdi32lib, "D3DKMTSignalSynchronizationObject2"); + gpu->d3dkmt.cb.createContext = + (void *)GetProcAddress(gdi32lib, "D3DKMTCreateContext"); + gpu->d3dkmt.cb.destroyContext = + (void *)GetProcAddress(gdi32lib, "D3DKMTDestroyContext"); + gpu->d3dkmt.cb.createAllocation = + (void *)GetProcAddress(gdi32lib, "D3DKMTCreateAllocation"); + gpu->d3dkmt.cb.destroyAllocation = + (void *)GetProcAddress(gdi32lib, "D3DKMTDestroyAllocation"); + gpu->d3dkmt.cb.lock = (void *)GetProcAddress(gdi32lib, "D3DKMTLock"); + gpu->d3dkmt.cb.unlock = + (void *)GetProcAddress(gdi32lib, "D3DKMTUnlock"); + gpu->d3dkmt.cb.queryResourceInfo = + (void *)GetProcAddress(gdi32lib, "D3DKMTQueryResourceInfo"); + gpu->d3dkmt.cb.openResource = + (void *)GetProcAddress(gdi32lib, "D3DKMTOpenResource"); + gpu->d3dkmt.cb.createDevice = + (void *)GetProcAddress(gdi32lib, "D3DKMTCreateDevice"); + gpu->d3dkmt.cb.destroyDevice = + (void *)GetProcAddress(gdi32lib, "D3DKMTDestroyDevice"); + gpu->d3dkmt.cb.openAdapterFromHdc = + (void *)GetProcAddress(gdi32lib, "D3DKMTOpenAdapterFromHdc"); + gpu->d3dkmt.cb.closeAdapter = + (void *)GetProcAddress(gdi32lib, "D3DKMTCloseAdapter"); + + NTSTATUS status = virtgpu_find_adapter(gpu); + if (!NT_SUCCESS(status)) { + return VK_ERROR_DEVICE_LOST; + } + } + + NTSTATUS status = virtgpu_ioctl_create_device(gpu); + if (!NT_SUCCESS(status)) { + return VK_ERROR_DEVICE_LOST; + } + + return VK_SUCCESS; +} + +static uint32_t +virtgpu_bo_blob_flags(struct virtgpu *gpu, + VkMemoryPropertyFlags flags, + VkExternalMemoryHandleTypeFlags external_handles) +{ + uint32_t blob_flags = 0; + if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + blob_flags |= VIOGPU_BLOB_FLAG_USE_MAPPABLE; + if (external_handles) + blob_flags |= VIOGPU_BLOB_FLAG_USE_SHAREABLE; + // if (external_handles & VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT) { + // if (gpu->supports_cross_device) + // blob_flags |= VIOGPU_BLOB_FLAG_USE_CROSS_DEVICE; + // } + + return blob_flags; +} + +static NTSTATUS +virtgpu_ioctl_open_resource(struct virtgpu *gpu, + D3DKMT_HANDLE res_kmt_global, + D3DKMT_HANDLE *alloc, + D3DKMT_HANDLE *res_kmt_local, + VIOGPU_RES_INFO_REQ *res_info, + const VkD3DDDIOpenResource *d3d_open) +{ + if (gpu->ddicb != NULL) { + assert(d3d_open != NULL); + + //const VIOGPU_CREATE_ALLOCATION_EXCHANGE *alloc_priv = + // d3d_open->pOpenResource->pOpenAllocationInfo[0].pPrivateDriverData; + //assert(alloc_priv->Type == VIOGPU_RESOURCE_TYPE_BLOB); + *alloc = + d3d_open->pOpenResource->pOpenAllocationInfo[0].hAllocation; + *res_kmt_local = + d3d_open->pOpenResource->hKMResource.handle; + + *res_info = *(VIOGPU_RES_INFO_REQ *) d3d_open->pResourceInfo; + + return STATUS_SUCCESS; + } else { + D3DKMT_QUERYRESOURCEINFO query = { + .hDevice = gpu->d3dkmt.device, + .hGlobalShare = res_kmt_global, + }; + + NTSTATUS status = gpu->d3dkmt.cb.queryResourceInfo(&query); + if (!NT_SUCCESS(status)) { + return status; + } + + assert(query.ResourcePrivateDriverDataSize >= sizeof(VIOGPU_CREATE_RESOURCE_EXCHANGE)); + assert(query.TotalPrivateDriverDataSize >= sizeof(VIOGPU_CREATE_ALLOCATION_EXCHANGE) * query.NumAllocations); + + size_t runtime_data_off = 0; + size_t res_priv_off = + runtime_data_off + align64(query.PrivateRuntimeDataSize, 8); + size_t alloc_priv_off = + res_priv_off + align64(query.ResourcePrivateDriverDataSize, 8); + size_t alloc_list_off = + alloc_priv_off + align64(query.TotalPrivateDriverDataSize, 8); + + size_t total_size = alloc_list_off + sizeof(D3DDDI_OPENALLOCATIONINFO) * + query.NumAllocations; + void *data = calloc(total_size, 1); + uintptr_t p = (uintptr_t)data; + + void *runtime = (void *)(p + runtime_data_off); + VIOGPU_CREATE_RESOURCE_EXCHANGE *resource_priv = + (void *)(p + res_priv_off); + + VIOGPU_CREATE_ALLOCATION_EXCHANGE *full_alloc_priv = + (void *)(p + alloc_priv_off); + + D3DDDI_OPENALLOCATIONINFO *alloc_list = (void *)(p + alloc_list_off); + + D3DKMT_OPENRESOURCE open = { + .hDevice = gpu->d3dkmt.device, + .hGlobalShare = res_kmt_global, + .NumAllocations = query.NumAllocations, + .pOpenAllocationInfo = alloc_list, + .pResourcePrivateDriverData = resource_priv, + .ResourcePrivateDriverDataSize = query.ResourcePrivateDriverDataSize, + .pPrivateRuntimeData = runtime, + .PrivateRuntimeDataSize = query.PrivateRuntimeDataSize, + .pTotalPrivateDriverDataBuffer = full_alloc_priv, + .TotalPrivateDriverDataBufferSize = query.TotalPrivateDriverDataSize, + }; + + status = gpu->d3dkmt.cb.openResource(&open); + if (!NT_SUCCESS(status)) { + goto end; + } + + const VIOGPU_CREATE_ALLOCATION_EXCHANGE *alloc_priv = + alloc_list[0].pPrivateDriverData; + //assert(alloc_priv->Type == VIOGPU_RESOURCE_TYPE_BLOB); + + *alloc = alloc_list[0].hAllocation; + *res_kmt_local = open.hResource; + + VIOGPU_ESCAPE res_esc = { + .Type = VIOGPU_RES_INFO, + .DataLength = sizeof(res_esc.ResourceInfo), + .ResourceInfo = { + .ResHandle = *alloc, + }, + }; + + status = virtgpu_ioctl_escape(gpu, &res_esc); + if (!NT_SUCCESS(status)) { + goto end; + } + *res_info = res_esc.ResourceInfo; + + end: + free(data); + return status; + } +} + +static VkResult +virtgpu_bo_create_from_handle(struct vn_renderer *renderer, + VkDeviceSize size, + vn_object_id mem_id, + bool is_kmt, + void *handle, + VkMemoryPropertyFlags flags, + const VkMemoryAllocateInfo *alloc_info, + struct vn_renderer_bo **out_bo) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_bo *bo = NULL; + + VIOGPU_RES_INFO_REQ res_info = {}; + + mtx_lock(&gpu->win32_handle_import_mutex); + + // TODO: virtgpu_ioctl_open_resource_from_nthandle + assert(is_kmt); + + D3DKMT_HANDLE alloc, kmt_local, + kmt_global = (D3DKMT_HANDLE)(uintptr_t)handle; + + const VkD3DDDIOpenResource *d3d_open = + vk_find_struct_const(alloc_info, D3DDDI_OPEN_RESOURCE); + + NTSTATUS status = + virtgpu_ioctl_open_resource(gpu, kmt_global, &alloc, &kmt_local, &res_info, d3d_open); + if (!NT_SUCCESS(status)) { + vn_log(gpu->instance, "failed to open resource: 0x%lx", status); + return VK_ERROR_DEVICE_LOST; + } + + if (!alloc) + goto fail; + bo = util_sparse_array_get(&gpu->bo_array, alloc); + + /* Upon import, blob_flags is not passed to the kernel and is only for + * internal use. Set it to what works best for us. + * - blob mem: SHAREABLE + conditional MAPPABLE per VkMemoryPropertyFlags + * - classic 3d: SHAREABLE only for export and to fail the map + */ + uint32_t blob_flags = VIOGPU_BLOB_FLAG_USE_SHAREABLE; + size_t mmap_size = 0; + if (res_info.BlobMem) { + /* must be VIOGPU_BLOB_MEM_HOST3D */ + if (res_info.BlobMem != gpu->bo_blob_mem) { + vn_log(gpu->instance, + "NT/KMT handle import failed: info.blob_mem(%lu) != " + "gpu->bo_blob_mem(%u)", + res_info.BlobMem, gpu->bo_blob_mem); + goto fail; + } + + blob_flags |= virtgpu_bo_blob_flags(gpu, flags, 0); + + /* mmap_size is only used when mappable */ + mmap_size = 0; + if (blob_flags & VIOGPU_BLOB_FLAG_USE_MAPPABLE) { + if (res_info.Size < size) { + /* If queried blob size is smaller than requested allocation size, + * we drop the mappable flag to defer the mapping failure till the + * app attempts to map the imported memory. + */ + blob_flags &= ~VIOGPU_BLOB_FLAG_USE_MAPPABLE; + } else { + /* Similar to virtgpu_bo_create_from_device_memory, the app can + * do multiple imports with different sizes for suballocation. So + * on the initial import, the mapping size has to be initialized + * with the real size of the backing blob resource. + */ + mmap_size = res_info.Size; + } + } + } + + /* we check bo->alloc instead of bo->refcount because bo->refcount + * might only be memset to 0 and is not considered initialized in theory + */ + if (bo->alloc == alloc) { + if (bo->base.mmap_size < mmap_size) { + vn_log(gpu->instance, + "NT/KMT handle import failed: bo->base.mmap_size(%zu) < " + "mmap_size(%zu)", + bo->base.mmap_size, mmap_size); + goto fail; + } + if (blob_flags & ~bo->blob_flags) { + vn_log(gpu->instance, + "NT/KMT handle import failed: blob_flags(%u) & " + "~bo->blob_flags(%u)", + blob_flags, bo->blob_flags); + goto fail; + } + + /* we can't use vn_renderer_bo_ref as the refcount may drop to 0 + * temporarily before virtgpu_bo_destroy grabs the lock + */ + vn_refcount_fetch_add_relaxed(&bo->base.refcount, 1); + } else { + *bo = (struct virtgpu_bo){ + .base = { + .refcount = VN_REFCOUNT_INIT(1), + .res_id = res_info.Id, + .mmap_size = mmap_size, + }, + .alloc = alloc, + .blob_flags = blob_flags, + }; + } + if (gpu->ddicb != NULL) { + bo->h = handle; + } else { + bo->kmt.local = kmt_local; + bo->kmt.global = kmt_global; + } + + mtx_unlock(&gpu->win32_handle_import_mutex); + + *out_bo = &bo->base; + + return VK_SUCCESS; + +fail: + mtx_unlock(&gpu->win32_handle_import_mutex); + return VK_ERROR_INVALID_EXTERNAL_HANDLE; +} + +static VkResult +virtgpu_bo_create_from_device_memory( + struct vn_renderer *renderer, + VkDeviceSize size, + vn_object_id mem_id, + VkMemoryPropertyFlags flags, + VkExternalMemoryHandleTypeFlags external_handles, + const VkMemoryAllocateInfo *alloc_info, + struct vn_renderer_bo **out_bo) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + const uint32_t blob_flags = + virtgpu_bo_blob_flags(gpu, flags, external_handles); + + uint32_t res_id; + + HANDLE h = NULL; + const VkD3DDDICreateResource *d3d_create = + vk_find_struct_const(alloc_info, D3DDDI_CREATE_RESOURCE); + if (gpu->ddicb != NULL && d3d_create != NULL) { + h = d3d_create->hRTResource; + } + + D3DKMT_HANDLE alloc, kmt_local, kmt_global; + NTSTATUS status = virtgpu_ioctl_resource_create_blob( + gpu, gpu->bo_blob_mem, blob_flags, size, mem_id, &res_id, &alloc, + &kmt_local, &kmt_global, &h); + if (!NT_SUCCESS(status)) { + vn_log(gpu->instance, + "RESOURCE_CREATE_BLOB failed: type=%u, flags=%u, size=%zu, " + "id=%" PRIu64 ", err=0x%lx", + gpu->bo_blob_mem, blob_flags, size, mem_id, status); + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + + /* There's a single underlying bo mapping shared by the initial alloc here + * and the later import of the same. The mapping size has to be initialized + * with the real size of the created blob resource, since the app can query + * the exported native handle size for re-import. e.g. lseek dma-buf size + */ + const uint32_t mappable_and_shareable = + VIOGPU_BLOB_FLAG_USE_MAPPABLE | VIOGPU_BLOB_FLAG_USE_SHAREABLE; + if ((blob_flags & mappable_and_shareable) == mappable_and_shareable) { + VIOGPU_ESCAPE res_info = { + .Type = VIOGPU_RES_INFO, + .DataLength = sizeof(res_info.ResourceInfo), + .ResourceInfo = { + .ResHandle = alloc, + }, + }; + + NTSTATUS status = virtgpu_ioctl_escape(gpu, &res_info); + if (!NT_SUCCESS(status)) { + vn_log(gpu->instance, "RESOURCE_INFO failed: handle=%u, err=0x%lx", + alloc, status); + virtgpu_ioctl_resource_destroy_blob(gpu, alloc, kmt_local, h); + return VK_ERROR_INVALID_EXTERNAL_HANDLE; + } + + assert(res_info.ResourceInfo.IsBlob); + assert(res_info.ResourceInfo.BlobMem); + if (res_info.ResourceInfo.Size < size) { + virtgpu_ioctl_resource_destroy_blob(gpu, alloc, kmt_local, h); + vn_log(gpu->instance, + "blob mem create failed: info.size(%llu) < size(%" PRIu64 ")", + res_info.ResourceInfo.Size, size); + return VK_ERROR_INVALID_EXTERNAL_HANDLE; + } + + size = res_info.ResourceInfo.Size; + } + + struct virtgpu_bo *bo = util_sparse_array_get(&gpu->bo_array, alloc); + *bo = (struct virtgpu_bo){ + .base = { + .refcount = VN_REFCOUNT_INIT(1), + .res_id = res_id, + .mmap_size = size, + }, + .alloc = alloc, + .blob_flags = blob_flags, + }; + + if (gpu->ddicb != NULL) { + bo->h = h; + } else { + bo->kmt.local = kmt_local; + bo->kmt.global = kmt_global; + } + + *out_bo = &bo->base; + + return VK_SUCCESS; +} + +static void +virtgpu_bo_invalidate(struct vn_renderer *renderer, + struct vn_renderer_bo *bo, + VkDeviceSize offset, + VkDeviceSize size) +{ + /* nop because kernel makes every mapping coherent */ + // TODO: check if this is true +} + +static void +virtgpu_bo_flush(struct vn_renderer *renderer, + struct vn_renderer_bo *bo, + VkDeviceSize offset, + VkDeviceSize size) +{ + /* nop because kernel makes every mapping coherent */ + // TODO: check if this is true +} + +static void * +virtgpu_bo_map(struct vn_renderer *renderer, + struct vn_renderer_bo *_bo, + void *placed_addr) +{ + assert(placed_addr == NULL); + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo; + const bool mappable = bo->blob_flags & VIOGPU_BLOB_FLAG_USE_MAPPABLE; + + /* not thread-safe but is fine */ + if (!bo->base.mmap_ptr && mappable) { + NTSTATUS status = + virtgpu_ioctl_lock(gpu, bo->alloc, &bo->base.mmap_ptr); + if (!NT_SUCCESS(status)) { + vn_log(gpu->instance, "failed to map blob resource: 0x%lx", status); + } + } + + return bo->base.mmap_ptr; +} + +static void * +virtgpu_bo_export_handle(struct vn_renderer *renderer, + struct vn_renderer_bo *_bo, + bool is_kmt) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo; + const bool shareable = bo->blob_flags & VIOGPU_BLOB_FLAG_USE_SHAREABLE; + + if (is_kmt && gpu->ddicb != NULL) + /* Special hack for DXGI DDI */ + return (void *)(uintptr_t)bo->alloc; + else if (!shareable) + return NULL; + else if (is_kmt && gpu->ddicb == NULL) + return (void *)(uintptr_t)bo->kmt.global; + else + return NULL /* TODO */; +} + +static bool +virtgpu_bo_destroy(struct vn_renderer *renderer, struct vn_renderer_bo *_bo) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo; + + mtx_lock(&gpu->win32_handle_import_mutex); + + /* Check the refcount again after the import lock is grabbed. Yes, we use + * the double-checked locking anti-pattern. + */ + if (vn_refcount_is_valid(&bo->base.refcount)) { + mtx_unlock(&gpu->win32_handle_import_mutex); + return false; + } + + if (bo->base.mmap_ptr) { + virtgpu_ioctl_unlock(gpu, bo->alloc); + virtgpu_ioctl_destroy_map(gpu, bo->alloc); + } + + /* Set alloc and res to 0 to indicate that the bo is invalid. Must be set + * before closing the handles. Otherwise the same handles can be reused + * by another newly created bo and unexpectedly gotten zero'ed out the + * tracked handles. + */ + const D3DKMT_HANDLE alloc = bo->alloc, kmt = bo->kmt.local; + const HANDLE h = bo->h; + bo->alloc = 0; + bo->kmt.local = 0; + bo->h = NULL; + virtgpu_ioctl_resource_destroy_blob(gpu, alloc, kmt, h); + + mtx_unlock(&gpu->win32_handle_import_mutex); + + return true; +} + +static VkResult +virtgpu_sync_write(struct vn_renderer *renderer, + struct vn_renderer_sync *_sync, + uint64_t val) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync; + + const bool ret = sim_syncobj_signal(gpu, sync->syncobj_handle, val); + + return ret ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +static VkResult +virtgpu_sync_read(struct vn_renderer *renderer, + struct vn_renderer_sync *_sync, + uint64_t *val) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync; + + const bool ret = sim_syncobj_query(gpu, sync->syncobj_handle, val); + + return ret ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +static VkResult +virtgpu_sync_reset(struct vn_renderer *renderer, + struct vn_renderer_sync *_sync, + uint64_t initial_val) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync; + + bool ret = sim_syncobj_reset(gpu, sync->syncobj_handle); + if (!ret) { + ret = sim_syncobj_signal(gpu, sync->syncobj_handle, initial_val); + } + + return ret ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +static void * +virtgpu_sync_export_handle(struct vn_renderer *renderer, + struct vn_renderer_sync *_sync) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync; + + return sim_syncobj_export(gpu, sync->syncobj_handle); +} + +static void +virtgpu_sync_destroy(struct vn_renderer *renderer, + struct vn_renderer_sync *_sync) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync; + + sim_syncobj_destroy(gpu, sync->syncobj_handle); + + free(sync); +} + +static VkResult +virtgpu_sync_create_from_handle(struct vn_renderer *renderer, + void *handle, + struct vn_renderer_sync **out_sync) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + + uint32_t syncobj_handle = sim_syncobj_create(gpu, false); + if (!syncobj_handle) + return VK_ERROR_OUT_OF_HOST_MEMORY; + if (!sim_syncobj_import(gpu, syncobj_handle, handle)) { + sim_syncobj_destroy(gpu, syncobj_handle); + return VK_ERROR_INVALID_EXTERNAL_HANDLE; + } + + struct virtgpu_sync *sync = calloc(1, sizeof(*sync)); + if (!sync) { + sim_syncobj_destroy(gpu, syncobj_handle); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + sync->syncobj_handle = syncobj_handle; + sync->base.sync_id = 0; /* TODO */ + + *out_sync = &sync->base; + + return VK_SUCCESS; +} + +static VkResult +virtgpu_sync_create(struct vn_renderer *renderer, + uint64_t initial_val, + uint32_t flags, + struct vn_renderer_sync **out_sync) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + + /* TODO */ + if (flags & VN_RENDERER_SYNC_SHAREABLE) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + /* always false because we don't use binary drm_syncobjs */ + const bool signaled = false; + const uint32_t syncobj_handle = sim_syncobj_create(gpu, signaled); + if (!syncobj_handle) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + /* add a signaled fence chain with seqno initial_val */ + const bool ret = sim_syncobj_signal(gpu, syncobj_handle, initial_val); + if (!ret) { + sim_syncobj_destroy(gpu, syncobj_handle); + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + + struct virtgpu_sync *sync = calloc(1, sizeof(*sync)); + if (!sync) { + sim_syncobj_destroy(gpu, syncobj_handle); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + sync->syncobj_handle = syncobj_handle; + /* we will have a sync_id when shareable is true and virtio-gpu associates + * a host sync object with guest drm_syncobj + */ + sync->base.sync_id = 0; + + *out_sync = &sync->base; + + return VK_SUCCESS; +} + +static void +virtgpu_shmem_destroy_now(struct vn_renderer *renderer, + struct vn_renderer_shmem *_shmem) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_shmem *shmem = (struct virtgpu_shmem *)_shmem; + + virtgpu_ioctl_unlock(gpu, shmem->alloc); + virtgpu_ioctl_destroy_map(gpu, shmem->alloc); + virtgpu_ioctl_resource_destroy_blob(gpu, shmem->alloc, shmem->kmt, + shmem->h); +} + +static void +virtgpu_shmem_destroy(struct vn_renderer *renderer, + struct vn_renderer_shmem *shmem) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + + if (vn_renderer_shmem_cache_add(&gpu->shmem_cache, shmem)) + return; + + virtgpu_shmem_destroy_now(&gpu->base, shmem); +} + +static struct vn_renderer_shmem * +virtgpu_shmem_create(struct vn_renderer *renderer, size_t size) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + + struct vn_renderer_shmem *cached_shmem = + vn_renderer_shmem_cache_get(&gpu->shmem_cache, size); + if (cached_shmem) { + cached_shmem->refcount = VN_REFCOUNT_INIT(1); + return cached_shmem; + } + + uint32_t res_id; + HANDLE h = NULL; /* This is a device allocation */ + D3DKMT_HANDLE alloc, kmt; + + NTSTATUS status = virtgpu_ioctl_resource_create_blob( + gpu, gpu->shmem_blob_mem, VIOGPU_BLOB_FLAG_USE_MAPPABLE, size, 0, + &res_id, &alloc, &kmt, NULL, &h); + if (!NT_SUCCESS(status)) + return NULL; + + void *ptr = NULL; + status = virtgpu_ioctl_lock(gpu, alloc, &ptr); + if (!NT_SUCCESS(status)) { + virtgpu_ioctl_resource_destroy_blob(gpu, alloc, kmt, h); + vn_log(gpu->instance, "failed to map blob resource: 0x%lx", status); + return NULL; + } + + struct virtgpu_shmem *shmem = + util_sparse_array_get(&gpu->shmem_array, alloc); + *shmem = (struct virtgpu_shmem){ + .base = { + .refcount = VN_REFCOUNT_INIT(1), + .res_id = res_id, + .mmap_size = size, + .mmap_ptr = ptr, + }, + .alloc = alloc, + }; + + if (gpu->ddicb != NULL) { + shmem->h = h; + } else { + shmem->kmt = kmt; + } + + return &shmem->base; +} + +static VkResult +virtgpu_wait(struct vn_renderer *renderer, + const struct vn_renderer_wait *wait) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + return sim_syncobj_wait(gpu, wait, false); +} + +static VkResult +virtgpu_submit(struct vn_renderer *renderer, + const struct vn_renderer_submit *submit) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + return sim_submit(gpu, submit); +} + +static void +virtgpu_init_renderer_info(struct virtgpu *gpu) +{ + struct vn_renderer_info *info = &gpu->base.info; + + info->pci.vendor_id = VIRTGPU_PCI_VENDOR_ID; + info->pci.device_id = VIRTGPU_PCI_DEVICE_ID; + + info->pci.has_bus_info = true; + info->pci.props = (VkPhysicalDevicePCIBusInfoPropertiesEXT){ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT, + .pciDomain = gpu->pci_bus_info.domain, + .pciBus = gpu->pci_bus_info.bus, + .pciDevice = gpu->pci_bus_info.dev, + .pciFunction = gpu->pci_bus_info.func, + }; + + info->has_dma_buf_import = true; + info->has_external_sync = true; + + info->has_implicit_fencing = false; + + const struct virgl_renderer_capset_venus *capset = &gpu->capset.data; + info->wire_format_version = capset->wire_format_version; + info->vk_xml_version = capset->vk_xml_version; + info->vk_ext_command_serialization_spec_version = + capset->vk_ext_command_serialization_spec_version; + info->vk_mesa_venus_protocol_spec_version = + capset->vk_mesa_venus_protocol_spec_version; + assert(capset->supports_blob_id_0); + + /* ensure vk_extension_mask is large enough to hold all capset masks */ + STATIC_ASSERT(sizeof(info->vk_extension_mask) >= + sizeof(capset->vk_extension_mask1)); + memcpy(info->vk_extension_mask, capset->vk_extension_mask1, + sizeof(capset->vk_extension_mask1)); + + assert(capset->allow_vk_wait_syncs); + + assert(capset->supports_multiple_timelines); + info->max_timeline_count = gpu->max_timeline_count; + + /* Use guest blob allocations from dedicated heap (Host visible memory) */ + //if (gpu->bo_blob_mem == VIOGPU_BLOB_MEM_HOST3D && capset->use_guest_vram) + // info->has_guest_vram = true; + info->has_guest_vram = false; + + if (gpu->ddicb != NULL) { + info->id.has_luid = true; + info->id.node_mask = 1; /* TODO D3D12 interop*/ + memcpy(info->id.luid, &gpu->ddicb->AdapterLuid, VK_LUID_SIZE); + } else { + info->id.has_luid = true; + info->id.node_mask = 1; /* TODO D3D12 interop*/ + static_assert(sizeof(gpu->d3dkmt.luid) == VK_LUID_SIZE); + memcpy(info->id.luid, &gpu->d3dkmt.luid, VK_LUID_SIZE); + } +} + +static NTSTATUS +virtgpu_ioctl_destroy_context(struct virtgpu *gpu) +{ + if (gpu->ddicb != NULL) { + D3DDDICB_DESTROYCONTEXT destroy = { + .hContext = gpu->ctx.h, + }; + return hr_to_nt(gpu, gpu->ddicb->pKTCallbacks->pfnDestroyContextCb( + gpu->ddicb->hRTDevice, &destroy)); + } else { + D3DKMT_DESTROYCONTEXT destroy = { + .hContext = gpu->ctx.kmt, + }; + return gpu->d3dkmt.cb.destroyContext(&destroy); + } +} + +static NTSTATUS +virtgpu_ioctl_destroy_device(struct virtgpu *gpu) +{ + D3DKMT_DESTROYDEVICE destroy = { + .hDevice = gpu->d3dkmt.device, + }; + return gpu->d3dkmt.cb.destroyDevice(&destroy); +} + +static NTSTATUS +virtgpu_ioctl_close_adapter(struct virtgpu *gpu) +{ + D3DKMT_CLOSEADAPTER close = { + .hAdapter = gpu->d3dkmt.adapter, + }; + return gpu->d3dkmt.cb.closeAdapter(&close); +} + +static void +virtgpu_destroy(struct vn_renderer *renderer, + const VkAllocationCallbacks *alloc) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + + vn_renderer_shmem_cache_fini(&gpu->shmem_cache); + + if (gpu->ctx.h) + virtgpu_ioctl_destroy_context(gpu); + if (gpu->d3dkmt.device) + virtgpu_ioctl_destroy_device(gpu); + if (gpu->d3dkmt.adapter) + virtgpu_ioctl_close_adapter(gpu); + if (gpu->d3dkmt.lib) + FreeLibrary(gpu->d3dkmt.lib); + + mtx_destroy(&gpu->win32_handle_import_mutex); + mtx_destroy(&gpu->ctx.lock); + + util_sparse_array_finish(&gpu->shmem_array); + util_sparse_array_finish(&gpu->bo_array); + + vk_free(alloc, gpu); +} + +static VkResult +virtgpu_init(struct virtgpu *gpu, void *info) +{ + util_sparse_array_init(&gpu->syncobj_array, sizeof(struct virtgpu_sync), + 1024); + + util_sparse_array_init(&gpu->shmem_array, sizeof(struct virtgpu_shmem), + 1024); + util_sparse_array_init(&gpu->bo_array, sizeof(struct virtgpu_bo), 1024); + + mtx_init(&gpu->win32_handle_import_mutex, mtx_plain); + + mtx_init(&gpu->ctx.lock, mtx_plain); + + VkResult result = virtgpu_open(gpu, info); + if (result == VK_SUCCESS) + result = virtgpu_init_params(gpu); + if (result == VK_SUCCESS) + result = virtgpu_init_capset(gpu); + if (result == VK_SUCCESS) + result = virtgpu_init_context(gpu); + if (result != VK_SUCCESS) + return result; + + virtgpu_init_shmem_blob_mem(gpu); + + vn_renderer_shmem_cache_init(&gpu->shmem_cache, &gpu->base, + virtgpu_shmem_destroy_now); + + virtgpu_init_renderer_info(gpu); + + gpu->base.ops.destroy = virtgpu_destroy; + gpu->base.ops.submit = virtgpu_submit; + gpu->base.ops.wait = virtgpu_wait; + + gpu->base.shmem_ops.create = virtgpu_shmem_create; + gpu->base.shmem_ops.destroy = virtgpu_shmem_destroy; + + gpu->base.bo_ops.create_from_device_memory = + virtgpu_bo_create_from_device_memory; + gpu->base.bo_ops.destroy = virtgpu_bo_destroy; + gpu->base.bo_ops.create_from_handle = virtgpu_bo_create_from_handle; + gpu->base.bo_ops.export_handle = virtgpu_bo_export_handle; + gpu->base.bo_ops.map = virtgpu_bo_map; + gpu->base.bo_ops.flush = virtgpu_bo_flush; + gpu->base.bo_ops.invalidate = virtgpu_bo_invalidate; + + gpu->base.sync_ops.create = virtgpu_sync_create; + gpu->base.sync_ops.create_from_handle = virtgpu_sync_create_from_handle; + gpu->base.sync_ops.destroy = virtgpu_sync_destroy; + gpu->base.sync_ops.export_handle = virtgpu_sync_export_handle; + gpu->base.sync_ops.reset = virtgpu_sync_reset; + gpu->base.sync_ops.read = virtgpu_sync_read; + gpu->base.sync_ops.write = virtgpu_sync_write; + return VK_SUCCESS; +} + +static void +sim_init_mutex(void) +{ + mtx_init(&sim.mutex, mtx_plain); +} + +VkResult +vn_renderer_create_virtgpu_win32(struct vn_instance *instance, + const VkAllocationCallbacks *alloc, + const VkInstanceCreateInfo *pCreateInfo, + struct vn_renderer **renderer) +{ + struct virtgpu *gpu = vk_zalloc(alloc, sizeof(*gpu), VN_DEFAULT_ALIGN, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!gpu) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + call_once(&sim.init, sim_init_mutex); + + gpu->instance = instance; + + VkResult result = virtgpu_init(gpu, (void *) pCreateInfo->pNext); + if (result != VK_SUCCESS) { + virtgpu_destroy(&gpu->base, alloc); + return result; + } + + *renderer = &gpu->base; + + return VK_SUCCESS; +}