diff --git a/include/vulkan/vulkan_d3dddi.h b/include/vulkan/vulkan_d3dddi.h new file mode 100644 index 00000000000..cfae046a1af --- /dev/null +++ b/include/vulkan/vulkan_d3dddi.h @@ -0,0 +1,53 @@ +#ifndef VULKAN_D3DDDI_H_ +#define VULKAN_D3DDDI_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#define VK_STRUCTURE_TYPE_D3DDDI_CALLBACKS ((VkStructureType)4281808695u) +#define VK_STRUCTURE_TYPE_D3DDDI_CREATE_RESOURCE ((VkStructureType)4281808696u) +#define VK_STRUCTURE_TYPE_D3DDDI_OPEN_RESOURCE ((VkStructureType)4281808697u) + +typedef struct { + VkStructureType sType; + void *pNext; + + LUID AdapterLuid; + + HANDLE hRTAdapter; // in: Runtime handle + HANDLE hRTDevice; // in: Runtime handle + const D3DDDI_ADAPTERCALLBACKS *pAdapterCallbacks; // in: Pointer to runtime callbacks that invoke kernel + const D3DDDI_DEVICECALLBACKS *pKTCallbacks; // in: Pointer to runtime callbacks that invoke kernel + const DXGI_DDI_BASE_CALLBACKS *pDXGIBaseCallbacks; // in: The driver should record this pointer for later use + + D3D10DDI_HRTCORELAYER hRTCoreLayer; // in: CoreLayer handle + const D3D11DDI_CORELAYER_DEVICECALLBACKS* p11UMCallbacks; // in: callbacks that stay in usermode + + HANDLE hContext; // out: Context handle +} VkD3DDDICallbacks; + +typedef struct { + VkStructureType sType; + void *pNext; + HANDLE hRTResource; + const D3D10DDIARG_CREATERESOURCE *pCreateResource; +} VkD3DDDICreateResource; + +typedef struct { + VkStructureType sType; + void *pNext; + HANDLE hRTResource; + const D3D10DDIARG_OPENRESOURCE *pOpenResource; + const void *pResourceInfo; /* VIOGPU_RES_INFO_REQ */ +} VkD3DDDIOpenResource; + +#define VK_STRUCTURE_TYPE_D3DDDI_CALLBACKS_cast VkD3DDDICallbacks +#define VK_STRUCTURE_TYPE_D3DDDI_CREATE_RESOURCE_cast VkD3DDDICreateResource +#define VK_STRUCTURE_TYPE_D3DDDI_OPEN_RESOURCE_cast VkD3DDDIOpenResource + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/winddk/.gitignore b/include/winddk/.gitignore index 14e87aa12db..fcc125cfd7c 100644 --- a/include/winddk/.gitignore +++ b/include/winddk/.gitignore @@ -1,6 +1,8 @@ d3d10tokenizedprogramformat.hpp +d3d10TokenizedProgramFormat.hpp d3d10umddi.h d3d11tokenizedprogramformat.hpp +d3d11TokenizedProgramFormat.hpp d3dkmddi.h d3dkmdt.h d3dkmthk.h diff --git a/src/virtio/virtio-gpu/wddm_hw.h b/src/virtio/virtio-gpu/wddm_hw.h new file mode 100644 index 00000000000..13bcf414bea --- /dev/null +++ b/src/virtio/virtio-gpu/wddm_hw.h @@ -0,0 +1,323 @@ +/* + * Copyright (C) 2019-2020 Red Hat, Inc. + * + * Written By: Vadim Rozenfeld + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met : + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and / or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of their contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#pragma once + +#include + +#pragma pack(1) +typedef struct _VIOGPU_BOX +{ + ULONG x; + ULONG y; + ULONG z; + ULONG width; + ULONG height; + ULONG depth; +} VIOGPU_BOX; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_BLOB_INFO { + ULONG width; + ULONG height; + ULONG format; + ULONG bind; // Same as virgl + ULONG strides[4]; + ULONG offsets[4]; +} VIOGPU_BLOB_INFO, *PVIOGPU_BLOB_INFO; +#pragma pack() + +// ================= QueryAdapterInfo UMDRIVERPRIVATE +#define VIOGPU_IAM 0x56696f475055 // Identifier for queryadapterinfo (VioGPU as hex) + +#define VIOGPU_CAPSET_GFXSTREAM_VULKAN 3 +#define VIOGPU_CAPSET_VENUS 4 + +typedef struct _VIOGPU_ADAPTERINFO +{ + ULONGLONG IamVioGPU; // Should be set by driver to VIOGPU_IAM + struct + { + UINT Supports3d : 1; + UINT HasShmem : 1; + UINT Reserved : 30; + } Flags; + ULONGLONG SupportedCapsetIDs; + LUID AdapterLuid; +} VIOGPU_ADAPTERINFO; + +// ================= ESCAPES +#define VIOGPU_GET_DEVICE_ID 0x000 +#define VIOGPU_GET_CUSTOM_RESOLUTION 0x001 +#define VIOGPU_GET_CAPS 0x002 +#define VIOGPU_GET_PCI_INFO 0x003 + +#define VIOGPU_RES_INFO 0x100 +#define VIOGPU_RES_BUSY 0x101 +#define VIOGPU_RES_BLOB_SET_INFO 0x102 + +#define VIOGPU_CTX_INIT 0x200 + +#define VIOGPU_BLIT_INIT 0x300 + +#pragma pack(1) +typedef struct _VIOGPU_DISP_MODE +{ + USHORT XResolution; + USHORT YResolution; +} VIOGPU_DISP_MODE, *PVIOGPU_DISP_MODE; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_PARAM_REQ +{ + ULONG ParamId; + UINT64 Value; +} VIOGPU_PARAM_REQ; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_CAPSET_REQ +{ + ULONG CapsetId; + ULONG Version; + ULONG Size; + UCHAR *Capset; +} VIOGPU_CAPSET_REQ; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_PCI_INFO_REQ +{ + ULONG Domain; + ULONG Bus; + ULONG Dev; + ULONG Func; +} VIOGPU_PCI_INFO_REQ; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_RES_INFO_REQ +{ + D3DKMT_HANDLE ResHandle; + ULONG Id; + + BOOL IsBlob; + BOOL IsCreated; + BOOL InfoValid; + + VIOGPU_BLOB_INFO Info; + + ULONG BlobMem; + ULONGLONG BlobId; + ULONGLONG Size; +} VIOGPU_RES_INFO_REQ; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_RES_BUSY_REQ +{ + D3DKMT_HANDLE ResHandle; + BOOL Wait; + BOOL IsBusy; +} VIOGPU_RES_BUSY_REQ; +#pragma pack() + +#pragma pack(1) +typedef struct { + D3DKMT_HANDLE ResHandle; + VIOGPU_BLOB_INFO Info; +} VIOGPU_RES_BLOB_SET_INFO_REQ, *PVIOGPU_RES_BLOB_SET_INFO_REQ; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_CTX_INIT_REQ +{ + UINT CapsetID; + UINT NumRings; + UCHAR DebugName[64]; +} VIOGPU_CTX_INIT_REQ; +#pragma pack() + +typedef struct _VIOGPU_BLIT_PRESENT VIOGPU_BLIT_PRESENT, *PVIOGPU_BLIT_PRESENT; + +#pragma pack(1) +typedef struct _VIOGPU_BLIT_INIT_REQ +{ + HANDLE EventUM; + HANDLE EventKM; + PVIOGPU_BLIT_PRESENT pBlitPresent; +} VIOGPU_BLIT_INIT_REQ; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_ESCAPE +{ + USHORT Type; + USHORT DataLength; + union { + ULONG Id; + VIOGPU_DISP_MODE Resolution; + VIOGPU_PARAM_REQ Parameter; + VIOGPU_CAPSET_REQ Capset; + VIOGPU_PCI_INFO_REQ PciInfo; + + VIOGPU_RES_INFO_REQ ResourceInfo; + VIOGPU_RES_BUSY_REQ ResourceBusy; + VIOGPU_RES_BLOB_SET_INFO_REQ BlobInfoSet; + + VIOGPU_CTX_INIT_REQ CtxInit; + + VIOGPU_BLIT_INIT_REQ BlitInit; + } DUMMYUNIONNAME; +} VIOGPU_ESCAPE, *PVIOGPU_ESCAPE; +#pragma pack() + +// ================= CreateResource +#pragma pack(1) +typedef struct _VIOGPU_RESOURCE_3D_OPTIONS +{ + ULONG target; + ULONG format; + ULONG bind; + ULONG width; + ULONG height; + ULONG depth; + ULONG array_size; + ULONG last_level; + ULONG nr_samples; + ULONG flags; +} VIOGPU_RESOURCE_3D_OPTIONS; +#pragma pack() + +#define VIOGPU_BLOB_MEM_GUEST 0x0001 +#define VIOGPU_BLOB_MEM_HOST3D 0x0002 +#define VIOGPU_BLOB_MEM_HOST3D_GUEST 0x0003 + +#define VIOGPU_BLOB_FLAG_USE_MAPPABLE 0x0001 +#define VIOGPU_BLOB_FLAG_USE_SHAREABLE 0x0002 +//#define VIOGPU_BLOB_FLAG_USE_CROSS_DEVICE 0x0004 +#pragma pack(1) +typedef struct _VIOGPU_RESOURCE_BLOB_OPTIONS +{ + ULONG blob_mem; + ULONG blob_flags; + ULONGLONG blob_id; +} VIOGPU_RESOURCE_BLOB_OPTIONS; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_CREATE_RESOURCE_EXCHANGE +{ + ULONG magic; +} VIOGPU_CREATE_RESOURCE_EXCHANGE; +#pragma pack() + +#define VIOGPU_RESOURCE_TYPE_3D 0 +#define VIOGPU_RESOURCE_TYPE_BLOB 1 +#pragma pack(1) +typedef struct _VIOGPU_CREATE_ALLOCATION_EXCHANGE +{ + ULONG Type; + union { + VIOGPU_RESOURCE_3D_OPTIONS Options3D; + VIOGPU_RESOURCE_BLOB_OPTIONS OptionsBlob; + }; + ULONGLONG Size; +} VIOGPU_CREATE_ALLOCATION_EXCHANGE; +#pragma pack() + +// ================= BLIT + +#pragma pack(1) +struct _VIOGPU_BLIT_PRESENT +{ + struct { + void *resource; + RECT rect; + } src; + struct { + VIOGPU_CREATE_ALLOCATION_EXCHANGE alloc; + VIOGPU_RES_INFO_REQ res_info; + RECT rect; + } dst; +}; +#pragma pack() + +// ================= COMMAND BUFFER +#define VIOGPU_CMD_NOP 0x0 +#define VIOGPU_CMD_SUBMIT 0x1 // Submit Command to virgl +#define VIOGPU_CMD_TRANSFER_TO_HOST 0x2 // Transfer resource to host +#define VIOGPU_CMD_TRANSFER_FROM_HOST 0x3 // Transfer resource to host +#define VIOGPU_CMD_MAP_BLOB 0x4 // Map blob resource +#define VIOGPU_CMD_UNMAP_BLOB 0x5 // Unmap blob resource + +//#define VIOGPU_CMD_SUBMIT_UM 0x6 + +// #define VIOGPU_EXECBUF_FENCE_FD_IN 0x01 +// #define VIOGPU_EXECBUF_FENCE_FD_OUT 0x02 +#define VIOGPU_EXECBUF_RING_IDX 0x04 +#define VIOGPU_EXECBUF_VIRGL 0x08 +// #define VIOGPU_EXECBUF_FLAGS (VIOGPU_EXECBUF_FENCE_FD_IN | VIOGPU_EXECBUF_FENCE_FD_OUT | VIOGPU_EXECBUF_RING_IDX) + +#pragma pack(1) +typedef struct _VIOGPU_COMMAND_HDR +{ + UINT type; + UINT size; + UINT flags; + UINT ring_idx; +} VIOGPU_COMMAND_HDR; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_TRANSFER_CMD +{ + ULONG res_id; + + VIOGPU_BOX box; + + ULONGLONG offset; + ULONG level; + ULONG stride; + ULONG layer_stride; +} VIOGPU_TRANSFER_CMD; +#pragma pack() + +#pragma pack(1) +typedef struct _VIOGPU_BEGIN_UM_BLIT_CMD +{ + RECT src, dst; +} VIOGPU_BEGIN_UM_BLIT_CMD; +#pragma pack() + +#define BASE_NAMED_OBJECTS L"\\BaseNamedObjects\\" +#define GLOBAL_OBJECTS L"Global\\" +#define RESOLUTION_EVENT_NAME L"VioGpuResolutionEvent" diff --git a/src/virtio/vulkan/meson.build b/src/virtio/vulkan/meson.build index e9e9dca83bb..74900f2e369 100644 --- a/src/virtio/vulkan/meson.build +++ b/src/virtio/vulkan/meson.build @@ -105,6 +105,8 @@ vn_link_args = [ vulkan_icd_link_args, ] +vn_kwargs = {} + vn_libs = [] if not with_platform_windows @@ -136,6 +138,16 @@ if with_platform_android vn_deps += [dep_android, idep_u_gralloc] endif +if with_platform_windows + libvn_files += files('vn_renderer_virtgpu_win32.c') + vn_incs += inc_winddk + vn_link_args += '-static' + vn_kwargs = { + 'vs_module_defs': vulkan_api_def, + 'name_prefix': '', + } +endif + libvulkan_virtio = shared_library( 'vulkan_virtio', [libvn_files, vn_entrypoints, sha1_h], @@ -146,5 +158,6 @@ libvulkan_virtio = shared_library( link_args : vn_link_args, link_depends : vulkan_icd_link_depends, gnu_symbol_visibility : 'hidden', + kwargs: vn_kwargs, install : true, ) diff --git a/src/virtio/vulkan/vn_common.c b/src/virtio/vulkan/vn_common.c index aa67cd34500..f36e9667808 100644 --- a/src/virtio/vulkan/vn_common.c +++ b/src/virtio/vulkan/vn_common.c @@ -103,9 +103,11 @@ vn_log(struct vn_instance *instance, const char *format, ...) VkResult vn_log_result(struct vn_instance *instance, VkResult result, + const char *file, + int line, const char *where) { - vn_log(instance, "%s: %s", where, vk_Result_to_str(result)); + vn_log(instance, "%s:%d: %s: %s", file, line, where, vk_Result_to_str(result)); return result; } diff --git a/src/virtio/vulkan/vn_common.h b/src/virtio/vulkan/vn_common.h index 7836bc86463..b49f5ad09b4 100644 --- a/src/virtio/vulkan/vn_common.h +++ b/src/virtio/vulkan/vn_common.h @@ -66,7 +66,7 @@ #define VN_PERF(category) (unlikely(vn_env.perf & VN_PERF_##category)) #define vn_error(instance, error) \ - (VN_DEBUG(RESULT) ? vn_log_result((instance), (error), __func__) : (error)) + (VN_DEBUG(RESULT) ? vn_log_result((instance), (error), __FILE__, __LINE__, __func__) : (error)) #define vn_result(instance, result) \ ((result) >= VK_SUCCESS ? (result) : vn_error((instance), (result))) @@ -326,6 +326,8 @@ vn_log(struct vn_instance *instance, const char *format, ...) VkResult vn_log_result(struct vn_instance *instance, VkResult result, + const char *file, + int line, const char *where); #define VN_REFCOUNT_INIT(val) \ diff --git a/src/virtio/vulkan/vn_device.c b/src/virtio/vulkan/vn_device.c index 35796c6b398..acf5d10f882 100644 --- a/src/virtio/vulkan/vn_device.c +++ b/src/virtio/vulkan/vn_device.c @@ -196,6 +196,12 @@ find_extension_names(const char *const *exts, uint32_t ext_count, const char *name) { +#ifdef VK_USE_PLATFORM_WIN32_KHR + /* Filter win32 extensions as they're fully implemented in the driver */ + if (strstr(name, "win32")) + return true; +#endif + for (uint32_t i = 0; i < ext_count; i++) { if (!strcmp(exts[i], name)) return true; @@ -370,6 +376,21 @@ vn_device_fix_create_info(const struct vn_device *dev, block_exts[block_count++] = VK_EXT_PCI_BUS_INFO_EXTENSION_NAME; } +#ifdef VK_USE_PLATFORM_WIN32_KHR + if (app_exts->KHR_external_fence_win32) { + /* see vn_physical_device_get_native_extensions */ + block_exts[block_count++] = VK_KHR_EXTERNAL_FENCE_WIN32_EXTENSION_NAME; + } + if (app_exts->KHR_external_semaphore_win32) { + /* see vn_physical_device_get_native_extensions */ + block_exts[block_count++] = VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME; + } + if (app_exts->KHR_external_memory_win32) { + /* see vn_physical_device_get_native_extensions */ + block_exts[block_count++] = VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME; + } +#endif + assert(extra_count <= ARRAY_SIZE(extra_exts)); assert(block_count <= ARRAY_SIZE(block_exts)); diff --git a/src/virtio/vulkan/vn_device_memory.c b/src/virtio/vulkan/vn_device_memory.c index 213830aa9be..116865b8db8 100644 --- a/src/virtio/vulkan/vn_device_memory.c +++ b/src/virtio/vulkan/vn_device_memory.c @@ -22,6 +22,14 @@ #include "vn_renderer.h" #include "vn_renderer_util.h" +#ifdef VK_USE_PLATFORM_WIN32_KHR +#define _D3D10_CONSTANTS +#define _D3D10_1_CONSTANTS +#include +#include +#include +#endif + /* device memory commands */ static inline VkResult @@ -83,7 +91,8 @@ vn_device_memory_wait_alloc(struct vn_device *dev, } static inline VkResult -vn_device_memory_bo_init(struct vn_device *dev, struct vn_device_memory *mem) +vn_device_memory_bo_init(struct vn_device *dev, struct vn_device_memory *mem, + const VkMemoryAllocateInfo *alloc_info) { VkResult result = vn_device_memory_wait_alloc(dev, mem); if (result != VK_SUCCESS) @@ -94,7 +103,7 @@ vn_device_memory_bo_init(struct vn_device *dev, struct vn_device_memory *mem) .memoryTypes[mem_vk->memory_type_index]; return vn_renderer_bo_create_from_device_memory( dev->renderer, mem_vk->size, mem->base.id, mem_type->propertyFlags, - mem_vk->export_handle_types, &mem->base_bo); + mem_vk->export_handle_types, alloc_info, &mem->base_bo); } static inline void @@ -106,6 +115,58 @@ vn_device_memory_bo_fini(struct vn_device *dev, struct vn_device_memory *mem) } } +#ifdef VK_USE_PLATFORM_WIN32_KHR +static VkResult +vn_device_memory_import_handle(struct vn_device *dev, + struct vn_device_memory *mem, + const VkMemoryAllocateInfo *alloc_info, + bool is_kmt, + void *handle) +{ + const VkMemoryType *mem_type = + &dev->physical_device->memory_properties + .memoryTypes[alloc_info->memoryTypeIndex]; + const VkMemoryDedicatedAllocateInfo *dedicated_info = + vk_find_struct_const(alloc_info->pNext, MEMORY_DEDICATED_ALLOCATE_INFO); + const bool is_dedicated = + dedicated_info && (dedicated_info->image != VK_NULL_HANDLE || + dedicated_info->buffer != VK_NULL_HANDLE); + + struct vn_renderer_bo *bo; + VkResult result = vn_renderer_bo_create_from_handle( + dev->renderer, is_dedicated ? alloc_info->allocationSize : 0, + mem->base.id, is_kmt, handle, mem_type->propertyFlags, alloc_info, &bo); + if (result != VK_SUCCESS) + return result; + + vn_ring_roundtrip(dev->primary_ring); + + const VkImportMemoryResourceInfoMESA import_memory_resource_info = { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_RESOURCE_INFO_MESA, + .pNext = alloc_info->pNext, + .resourceId = bo->res_id, + }; + const VkMemoryAllocateInfo memory_allocate_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = &import_memory_resource_info, + .allocationSize = alloc_info->allocationSize, + .memoryTypeIndex = alloc_info->memoryTypeIndex, + }; + result = vn_device_memory_alloc_simple(dev, mem, &memory_allocate_info); + if (result != VK_SUCCESS) { + vn_renderer_bo_unref(dev->renderer, bo); + return result; + } + + if (!is_kmt) { + /* need to close import fd on success to avoid fd leak */ + CloseHandle((HANDLE) handle); + } + mem->base_bo = bo; + + return VK_SUCCESS; +} +#else VkResult vn_device_memory_import_dma_buf(struct vn_device *dev, struct vn_device_memory *mem, @@ -148,6 +209,7 @@ vn_device_memory_import_dma_buf(struct vn_device *dev, return VK_SUCCESS; } +#endif static VkResult vn_device_memory_alloc_guest_vram(struct vn_device *dev, @@ -169,7 +231,7 @@ vn_device_memory_alloc_guest_vram(struct vn_device *dev, VkResult result = vn_renderer_bo_create_from_device_memory( dev->renderer, mem_vk->size, mem->base.id, flags, - mem_vk->export_handle_types, &mem->base_bo); + mem_vk->export_handle_types, alloc_info, &mem->base_bo); if (result != VK_SUCCESS) { return result; } @@ -207,7 +269,7 @@ vn_device_memory_alloc_export(struct vn_device *dev, if (result != VK_SUCCESS) return result; - result = vn_device_memory_bo_init(dev, mem); + result = vn_device_memory_bo_init(dev, mem, alloc_info); if (result != VK_SUCCESS) { vn_device_memory_free_simple(dev, mem); return result; @@ -232,6 +294,10 @@ struct vn_device_memory_alloc_info { VkMemoryAllocateFlagsInfo flags; VkMemoryDedicatedAllocateInfo dedicated; VkMemoryOpaqueCaptureAddressAllocateInfo capture; +#ifdef VK_USE_PLATFORM_WIN32_KHR + VkD3DDDICreateResource d3d_create; + VkD3DDDIOpenResource d3d_open; +#endif }; static const VkMemoryAllocateInfo * @@ -267,10 +333,22 @@ vn_device_memory_fix_alloc_info( memcpy(&local_info->capture, src, sizeof(local_info->capture)); next = &local_info->capture; break; + default: break; } + /* FIXME: -Werror=switch */ +#ifdef VK_USE_PLATFORM_WIN32_KHR + if (src->sType == VK_STRUCTURE_TYPE_D3DDDI_CREATE_RESOURCE) { + memcpy(&local_info->d3d_create, src, sizeof(local_info->d3d_create)); + next = &local_info->d3d_create; + } else if (src->sType == VK_STRUCTURE_TYPE_D3DDDI_OPEN_RESOURCE) { + memcpy(&local_info->d3d_open, src, sizeof(local_info->d3d_open)); + next = &local_info->d3d_open; + } +#endif + if (next) { cur->pNext = next; cur = next; @@ -304,10 +382,21 @@ vn_device_memory_alloc(struct vn_device *dev, alloc_info = vn_device_memory_fix_alloc_info( alloc_info, renderer_handle_type, has_guest_vram, &local_info); + // FIXME: this is slightly wrong for Windows /* ensure correct blob flags */ mem_vk->export_handle_types = renderer_handle_type; } +#ifdef VK_USE_PLATFORM_WIN32_KHR + const bool need_bo_now = + vk_find_struct_const(alloc_info, D3DDDI_CREATE_RESOURCE) != NULL || + vk_find_struct_const(alloc_info, D3DDDI_OPEN_RESOURCE) != NULL; + + if (need_bo_now) { + return vn_device_memory_alloc_export(dev, mem, alloc_info); + } +#endif + if (has_guest_vram && (host_visible || export_alloc)) { return vn_device_memory_alloc_guest_vram(dev, mem, alloc_info); } else if (export_alloc) { @@ -367,10 +456,10 @@ vn_AllocateMemory(VkDevice device, vn_object_set_id(mem, vn_get_next_obj_id(), VK_OBJECT_TYPE_DEVICE_MEMORY); + VkResult result; +#ifndef VK_USE_PLATFORM_WIN32_KHR const VkImportMemoryFdInfoKHR *import_fd_info = vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR); - - VkResult result; if (mem->base.vk.ahardware_buffer) { result = vn_android_device_import_ahb(dev, mem, pAllocateInfo); } else if (import_fd_info) { @@ -381,6 +470,17 @@ vn_AllocateMemory(VkDevice device, if (result == VK_SUCCESS) vn_wsi_memory_info_init(mem, pAllocateInfo); } +#else + const VkImportMemoryWin32HandleInfoKHR *import_win32_info = + vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR); + if (import_win32_info) { + const bool is_kmt = !(import_win32_info->handleType & VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT); + result = vn_device_memory_import_handle(dev, mem, pAllocateInfo, + is_kmt, import_win32_info->handle); + } else { + result = vn_device_memory_alloc(dev, mem, pAllocateInfo); + } +#endif vn_device_memory_emit_report(dev, mem, /* is_alloc */ true, result); @@ -455,7 +555,7 @@ vn_MapMemory2(VkDevice device, * the extension. */ if (need_bo) { - result = vn_device_memory_bo_init(dev, mem); + result = vn_device_memory_bo_init(dev, mem, NULL); if (result != VK_SUCCESS) return vn_error(dev->instance, result); } @@ -548,6 +648,92 @@ vn_GetDeviceMemoryCommitment(VkDevice device, pCommittedMemoryInBytes); } +#ifdef VK_USE_PLATFORM_WIN32_KHR +VKAPI_ATTR VkResult VKAPI_CALL +vn_GetMemoryWin32HandleKHR(VkDevice device, + const VkMemoryGetWin32HandleInfoKHR *pGetWin32HandleInfo, + HANDLE *pHandle) +{ + VN_TRACE_FUNC(); + struct vn_device *dev = vn_device_from_handle(device); + struct vn_device_memory *mem = + vn_device_memory_from_handle(pGetWin32HandleInfo->memory); + + /* At the moment, we support only the below handle type. */ + assert(pGetWin32HandleInfo->handleType & + (VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT)); + assert(mem->base_bo); + const bool is_kmt = !(pGetWin32HandleInfo->handleType & VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT); + *pHandle = vn_renderer_bo_export_handle(dev->renderer, mem->base_bo, is_kmt); + if (*pHandle == NULL) + return vn_error(dev->instance, VK_ERROR_TOO_MANY_OBJECTS); + + return VK_SUCCESS; +} + +static VkResult +vn_get_memory_handle_properties(struct vn_device *dev, + bool is_kmt, + void *handle, + void *alloc_info, + uint32_t *out_mem_type_bits) +{ + VkDevice device = vn_device_to_handle(dev); + + struct vn_renderer_bo *bo; + VkResult result = vn_renderer_bo_create_from_handle( + dev->renderer, 0 /* size */, 0 /* id */, is_kmt, handle, 0 /* flags */, alloc_info, &bo); + if (result != VK_SUCCESS) { + vn_log(dev->instance, "bo_create_from_handle failed"); + return result; + } + + vn_ring_roundtrip(dev->primary_ring); + + VkMemoryResourcePropertiesMESA props = { + .sType = VK_STRUCTURE_TYPE_MEMORY_RESOURCE_PROPERTIES_MESA, + }; + result = vn_call_vkGetMemoryResourcePropertiesMESA( + dev->primary_ring, device, bo->res_id, &props); + vn_renderer_bo_unref(dev->renderer, bo); + if (result != VK_SUCCESS) { + vn_log(dev->instance, "vkGetMemoryResourcePropertiesMESA failed"); + return result; + } + + *out_mem_type_bits = props.memoryTypeBits; + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vn_GetMemoryWin32HandlePropertiesKHR(VkDevice device, + VkExternalMemoryHandleTypeFlagBits handleType, + HANDLE handle, + VkMemoryWin32HandlePropertiesKHR *pMemoryWin32HandleProperties) +{ + VN_TRACE_FUNC(); + struct vn_device *dev = vn_device_from_handle(device); + uint32_t mem_type_bits = 0; + VkResult result = VK_SUCCESS; + + if (handleType != VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT && + handleType != VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT) + return vn_error(dev->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); + + const bool is_kmt = handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT; + + result = vn_get_memory_handle_properties( + dev, is_kmt, handle, pMemoryWin32HandleProperties, &mem_type_bits); + if (result != VK_SUCCESS) + return vn_error(dev->instance, result); + + pMemoryWin32HandleProperties->memoryTypeBits = mem_type_bits; + + return VK_SUCCESS; +} +#else VKAPI_ATTR VkResult VKAPI_CALL vn_GetMemoryFdKHR(VkDevice device, const VkMemoryGetFdInfoKHR *pGetFdInfo, @@ -625,3 +811,4 @@ vn_GetMemoryFdPropertiesKHR(VkDevice device, return VK_SUCCESS; } +#endif diff --git a/src/virtio/vulkan/vn_device_memory.h b/src/virtio/vulkan/vn_device_memory.h index d2b74b297fe..0374ebb0d3a 100644 --- a/src/virtio/vulkan/vn_device_memory.h +++ b/src/virtio/vulkan/vn_device_memory.h @@ -57,6 +57,7 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(vn_device_memory, VkDeviceMemory, VK_OBJECT_TYPE_DEVICE_MEMORY) +#ifndef VK_USE_PLATFORM_WIN32_KHR VkResult vn_device_memory_import_dma_buf(struct vn_device *dev, struct vn_device_memory *mem, @@ -67,5 +68,6 @@ VkResult vn_get_memory_dma_buf_properties(struct vn_device *dev, int fd, uint32_t *out_mem_type_bits); +#endif #endif /* VN_DEVICE_MEMORY_H */ diff --git a/src/virtio/vulkan/vn_instance.c b/src/virtio/vulkan/vn_instance.c index 8a3f9af873a..a02f1ec0417 100644 --- a/src/virtio/vulkan/vn_instance.c +++ b/src/virtio/vulkan/vn_instance.c @@ -56,7 +56,9 @@ static const struct vk_instance_extension_table #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT .EXT_acquire_xlib_display = true, #endif -#ifndef VK_USE_PLATFORM_WIN32_KHR +#ifdef VK_USE_PLATFORM_WIN32_KHR + .KHR_win32_surface = true, +#else .EXT_headless_surface = true, #endif #ifdef VK_USE_PLATFORM_DISPLAY_KHR @@ -167,11 +169,11 @@ vn_instance_init_ring(struct vn_instance *instance) } static VkResult -vn_instance_init_renderer(struct vn_instance *instance) +vn_instance_init_renderer(struct vn_instance *instance, const VkInstanceCreateInfo *pCreateInfo) { const VkAllocationCallbacks *alloc = &instance->base.vk.alloc; - VkResult result = vn_renderer_create(instance, alloc, &instance->renderer); + VkResult result = vn_renderer_create(instance, alloc, pCreateInfo, &instance->renderer); if (result != VK_SUCCESS) return result; @@ -314,7 +316,7 @@ vn_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, goto out_mtx_destroy; } - result = vn_instance_init_renderer(instance); + result = vn_instance_init_renderer(instance, pCreateInfo); if (result == VK_ERROR_INITIALIZATION_FAILED) { assert(!instance->renderer); *pInstance = instance_handle; diff --git a/src/virtio/vulkan/vn_physical_device.c b/src/virtio/vulkan/vn_physical_device.c index 28d05f5e66c..ba91a4aff3f 100644 --- a/src/virtio/vulkan/vn_physical_device.c +++ b/src/virtio/vulkan/vn_physical_device.c @@ -1171,19 +1171,27 @@ vn_physical_device_get_native_extensions( if (physical_dev->instance->renderer->info.has_external_sync && physical_dev->renderer_sync_fd.fence_exportable) { +#if DETECT_OS_WINDOWS + exts->KHR_external_fence_win32 = true; +#else if (physical_dev->external_fence_handles == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT) { exts->KHR_external_fence_fd = true; } +#endif } if (physical_dev->instance->renderer->info.has_external_sync && physical_dev->renderer_sync_fd.semaphore_importable && physical_dev->renderer_sync_fd.semaphore_exportable) { +#if DETECT_OS_WINDOWS + exts->KHR_external_semaphore_win32 = true; +#else if (physical_dev->external_binary_semaphore_handles == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) { exts->KHR_external_semaphore_fd = true; } +#endif } #ifdef VK_USE_PLATFORM_ANDROID_KHR @@ -1208,10 +1216,13 @@ vn_physical_device_get_native_extensions( } #else /* VK_USE_PLATFORM_ANDROID_KHR */ if (physical_dev->external_memory.renderer_handle_type) { -#if !DETECT_OS_WINDOWS +#if DETECT_OS_WINDOWS + exts->KHR_external_memory_win32 = true; + exts->KHR_win32_keyed_mutex = true; +#else exts->KHR_external_memory_fd = true; exts->EXT_external_memory_dma_buf = true; -#endif /* !DETECT_OS_WINDOWS */ +#endif /* DETECT_OS_WINDOWS */ } #endif /* VK_USE_PLATFORM_ANDROID_KHR */ @@ -2796,7 +2807,7 @@ vn_GetPhysicalDeviceImageFormatProperties2( pImageFormatInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA); if (wsi_info && !vn_wsi_validate_image_format_info(physical_dev, pImageFormatInfo)) { - return vn_error(physical_dev->instance, VK_ERROR_FORMAT_NOT_SUPPORTED); + return VK_ERROR_FORMAT_NOT_SUPPORTED; } const VkPhysicalDeviceExternalImageFormatInfo *external_info = @@ -2806,8 +2817,7 @@ vn_GetPhysicalDeviceImageFormatProperties2( if (!external_info->handleType) { external_info = NULL; } else if (!(external_info->handleType & supported_handle_types)) { - return vn_error(physical_dev->instance, - VK_ERROR_FORMAT_NOT_SUPPORTED); + return VK_ERROR_FORMAT_NOT_SUPPORTED; } /* Fully resolve AHB image format query on the driver side. */ @@ -2830,16 +2840,14 @@ vn_GetPhysicalDeviceImageFormatProperties2( VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT && pImageFormatInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { - return vn_error(physical_dev->instance, - VK_ERROR_FORMAT_NOT_SUPPORTED); + return VK_ERROR_FORMAT_NOT_SUPPORTED; } if (external_info->handleType != renderer_handle_type) { pImageFormatInfo = vn_physical_device_fix_image_format_info( pImageFormatInfo, renderer_handle_type, &local_info); if (!pImageFormatInfo) { - return vn_error(physical_dev->instance, - VK_ERROR_FORMAT_NOT_SUPPORTED); + return VK_ERROR_FORMAT_NOT_SUPPORTED; } } } @@ -2884,6 +2892,10 @@ vn_GetPhysicalDeviceImageFormatProperties2( } } + /* Silence the log spam */ + if (result == VK_ERROR_FORMAT_NOT_SUPPORTED) + return result; + return vn_result(physical_dev->instance, result); } diff --git a/src/virtio/vulkan/vn_queue.c b/src/virtio/vulkan/vn_queue.c index 9756142fd58..58b32e4ba71 100644 --- a/src/virtio/vulkan/vn_queue.c +++ b/src/virtio/vulkan/vn_queue.c @@ -25,6 +25,21 @@ #include "vn_renderer.h" #include "vn_wsi.h" +#ifdef VK_USE_PLATFORM_WIN32_KHR +#include +static inline VkResult sync_wait_handle(void *handle, int timeout) +{ + switch (WaitForSingleObject(handle, timeout)) { + case WAIT_OBJECT_0: + return VK_SUCCESS; + case WAIT_TIMEOUT: + return VK_NOT_READY; + default: + return VK_ERROR_DEVICE_LOST; + } +} +#endif + /* queue commands */ struct vn_submit_info_pnext_fix { @@ -398,7 +413,11 @@ vn_queue_submission_fix_batch_semaphores(struct vn_queue_submission *submit, struct vn_semaphore *sem = vn_semaphore_from_handle(sem_handle); const struct vn_sync_payload *payload = sem->payload; +#ifdef VK_USE_PLATFORM_WIN32_KHR + if (payload->type != VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE) +#else if (payload->type != VN_SYNC_TYPE_IMPORTED_SYNC_FD) +#endif continue; if (!vn_semaphore_wait_external(dev, sem)) @@ -1539,8 +1558,13 @@ static void vn_sync_payload_release(UNUSED struct vn_device *dev, struct vn_sync_payload *payload) { - if (payload->type == VN_SYNC_TYPE_IMPORTED_SYNC_FD && payload->fd >= 0) +#ifdef VK_USE_PLATFORM_WIN32_KHR + if (payload->type == VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE && payload->handle != NULL) + CloseHandle(payload->handle); +#else + if (payload->type == VN_SYNC_TYPE_IMPORTED_SYNC_FD && is_fd_valid(payload->fd)) close(payload->fd); +#endif payload->type = VN_SYNC_TYPE_INVALID; } @@ -1763,12 +1787,19 @@ vn_GetFenceStatus(VkDevice device, VkFence _fence) result = vn_call_vkGetFenceStatus(dev->primary_ring, device, _fence); } break; +#ifdef VK_USE_PLATFORM_WIN32_KHR + case VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE: + //vn_log(dev->instance, "waiting for handle %p", payload->handle); + result = payload->handle != NULL ? sync_wait_handle(payload->handle, 0) : VK_SUCCESS; + break; +#else case VN_SYNC_TYPE_IMPORTED_SYNC_FD: - if (payload->fd < 0 || sync_wait(payload->fd, 0) == 0) + if (!is_fd_valid(payload->fd) || sync_wait(payload->fd, 0) == 0) result = VK_SUCCESS; else result = errno == ETIME ? VK_NOT_READY : VK_ERROR_DEVICE_LOST; break; +#endif default: UNREACHABLE("unexpected fence payload type"); break; @@ -1869,6 +1900,128 @@ vn_WaitForFences(VkDevice device, return vn_result(dev->instance, result); } + + +#ifdef VK_USE_PLATFORM_WIN32_KHR +static VkResult +vn_create_sync_handle(struct vn_device *dev, + struct vn_sync_payload_external *external_payload, + HANDLE *out_handle) +{ + struct vn_renderer_sync *sync; + VkResult result = vn_renderer_sync_create(dev->renderer, 0, + VN_RENDERER_SYNC_BINARY, &sync); + if (result != VK_SUCCESS) + return vn_error(dev->instance, result); + + struct vn_renderer_submit_batch batch = { + .syncs = &sync, + .sync_values = &(const uint64_t){ 1 }, + .sync_count = 1, + .ring_idx = external_payload->ring_idx, + }; + + uint32_t local_data[8]; + struct vn_cs_encoder local_enc = + VN_CS_ENCODER_INITIALIZER_LOCAL(local_data, sizeof(local_data)); + if (external_payload->ring_seqno_valid) { + const uint64_t ring_id = vn_ring_get_id(dev->primary_ring); + vn_encode_vkWaitRingSeqnoMESA(&local_enc, 0, ring_id, + external_payload->ring_seqno); + batch.cs_data = local_data; + batch.cs_size = vn_cs_encoder_get_len(&local_enc); + } + + const struct vn_renderer_submit submit = { + .batches = &batch, + .batch_count = 1, + }; + result = vn_renderer_submit(dev->renderer, &submit); + if (result != VK_SUCCESS) { + vn_renderer_sync_destroy(dev->renderer, sync); + return vn_error(dev->instance, result); + } + + *out_handle = vn_renderer_sync_export_handle(dev->renderer, sync); + vn_renderer_sync_destroy(dev->renderer, sync); + + return *out_handle != NULL ? VK_SUCCESS : VK_ERROR_TOO_MANY_OBJECTS; +} + +VkResult +vn_ImportFenceWin32HandleKHR(VkDevice device, + const VkImportFenceWin32HandleInfoKHR *pImportFenceWin32HandleInfo) +{ + VN_TRACE_FUNC(); + struct vn_device *dev = vn_device_from_handle(device); + struct vn_fence *fence = vn_fence_from_handle(pImportFenceWin32HandleInfo->fence); + ASSERTED const bool is_handle = pImportFenceWin32HandleInfo->handleType == + VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_WIN32_BIT; + void *handle = pImportFenceWin32HandleInfo->handle; + const LPCWSTR name = pImportFenceWin32HandleInfo->name; + + assert(is_handle); + + if ((handle == NULL && name == NULL) && (handle != NULL && name != NULL)) + return vn_error(dev->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); + + struct vn_sync_payload *temp = &fence->temporary; + vn_sync_payload_release(dev, temp); + temp->type = VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE; + temp->handle = name != NULL ? CreateEventW(NULL, FALSE, FALSE, name) : handle; + fence->payload = temp; + //vn_log(dev->instance, "created handle %p", temp->handle); + + return VK_SUCCESS; +} + +VkResult +vn_GetFenceWin32HandleKHR(VkDevice device, + const VkFenceGetWin32HandleInfoKHR *pGetWin32HandleInfo, + HANDLE *pHandle) +{ + VN_TRACE_FUNC(); + struct vn_device *dev = vn_device_from_handle(device); + struct vn_fence *fence = vn_fence_from_handle(pGetWin32HandleInfo->fence); + const bool is_handle = + pGetWin32HandleInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_WIN32_BIT; + struct vn_sync_payload *payload = fence->payload; + VkResult result; + + assert(is_handle); + assert(dev->physical_device->renderer_sync_fd.fence_exportable); + + HANDLE handle = NULL; + if (payload->type == VN_SYNC_TYPE_DEVICE_ONLY) { + result = vn_create_sync_handle(dev, &fence->external_payload, &handle); + if (result != VK_SUCCESS) + return vn_error(dev->instance, result); + + vn_async_vkResetFenceResourceMESA(dev->primary_ring, device, + pGetWin32HandleInfo->fence); + + vn_sync_payload_release(dev, &fence->temporary); + fence->payload = &fence->permanent; + } else { + assert(payload->type == VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE); + + /* transfer ownership of imported sync fd to save a dup */ + handle = payload->handle; + payload->handle = NULL; + + /* reset host fence in case in signaled state before import */ + result = vn_ResetFences(device, 1, &pGetWin32HandleInfo->fence); + if (result != VK_SUCCESS) { + /* transfer sync fd ownership back on error */ + payload->handle = handle; + return result; + } + } + + *pHandle = handle; + return VK_SUCCESS; +} +#else static VkResult vn_create_sync_file(struct vn_device *dev, struct vn_sync_payload_external *external_payload, @@ -1911,7 +2064,7 @@ vn_create_sync_file(struct vn_device *dev, *out_fd = vn_renderer_sync_export_syncobj(dev->renderer, sync, true); vn_renderer_sync_destroy(dev->renderer, sync); - return *out_fd >= 0 ? VK_SUCCESS : VK_ERROR_TOO_MANY_OBJECTS; + return is_fd_valid(*out_fd) ? VK_SUCCESS : VK_ERROR_TOO_MANY_OBJECTS; } static inline bool @@ -1920,7 +2073,7 @@ vn_sync_valid_fd(int fd) /* the special value -1 for fd is treated like a valid sync file descriptor * referring to an object that has already signaled */ - return (fd >= 0 && sync_valid_fd(fd)) || fd == -1; + return (is_fd_valid(fd) && sync_valid_fd(fd)) || fd == -1; } VKAPI_ATTR VkResult VKAPI_CALL @@ -1994,6 +2147,7 @@ vn_GetFenceFdKHR(VkDevice device, *pFd = fd; return VK_SUCCESS; } +#endif /* semaphore commands */ @@ -2015,12 +2169,23 @@ vn_semaphore_wait_external(struct vn_device *dev, struct vn_semaphore *sem) { struct vn_sync_payload *temp = &sem->temporary; + +#ifdef VK_USE_PLATFORM_WIN32_KHR + assert(temp->type == VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE); + + if (temp->handle != NULL) { + //vn_log(dev->instance, "waiting for handle %p", temp->handle); + if (sync_wait_handle(temp->handle, INFINITE) != VK_SUCCESS) + return false; + } +#else assert(temp->type == VN_SYNC_TYPE_IMPORTED_SYNC_FD); if (temp->fd >= 0) { if (sync_wait(temp->fd, -1)) return false; } +#endif vn_sync_payload_release(dev, &sem->temporary); sem->payload = &sem->permanent; @@ -2409,6 +2574,96 @@ vn_WaitSemaphores(VkDevice device, return vn_result(dev->instance, result); } +#ifdef VK_USE_PLATFORM_WIN32_KHR +VKAPI_ATTR VkResult VKAPI_CALL +vn_ImportSemaphoreWin32HandleKHR(VkDevice device, + const VkImportSemaphoreWin32HandleInfoKHR *pImportSemaphoreWin32HandleInfo) +{ + VN_TRACE_FUNC(); + struct vn_device *dev = vn_device_from_handle(device); + struct vn_semaphore *sem = + vn_semaphore_from_handle(pImportSemaphoreWin32HandleInfo->semaphore); + ASSERTED const bool is_handle = + pImportSemaphoreWin32HandleInfo->handleType == + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT; + void *handle = pImportSemaphoreWin32HandleInfo->handle; + const LPCWSTR name = pImportSemaphoreWin32HandleInfo->name; + + assert(is_handle); + + if ((handle == NULL && name == NULL) && (handle != NULL && name != NULL)) + return vn_error(dev->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); + + struct vn_sync_payload *temp = &sem->temporary; + vn_sync_payload_release(dev, temp); + temp->type = VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE; + temp->handle = name != NULL ? CreateEventW(NULL, FALSE, FALSE, name) : handle; + sem->payload = temp; + //vn_log(dev->instance, "created handle %p", temp->handle); + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vn_GetSemaphoreWin32HandleKHR(VkDevice device, + const VkSemaphoreGetWin32HandleInfoKHR *pGetWin32HandleInfo, + HANDLE *pHandle) +{ + VN_TRACE_FUNC(); + struct vn_device *dev = vn_device_from_handle(device); + struct vn_semaphore *sem = vn_semaphore_from_handle(pGetWin32HandleInfo->semaphore); + const bool is_handle = + pGetWin32HandleInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT; + struct vn_sync_payload *payload = sem->payload; + + assert(is_handle); + assert(dev->physical_device->renderer_sync_fd.semaphore_exportable); + assert(dev->physical_device->renderer_sync_fd.semaphore_importable); + + HANDLE handle = NULL; + if (payload->type == VN_SYNC_TYPE_DEVICE_ONLY) { + VkResult result = vn_create_sync_handle(dev, &sem->external_payload, &handle); + if (result != VK_SUCCESS) + return vn_error(dev->instance, result); + + vn_wsi_sync_wait_handle(dev, handle); + } else { + assert(payload->type == VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE); + + /* transfer ownership of imported sync handle to save a dup */ + handle = payload->handle; + payload->handle = NULL; + } + + /* When payload->type is VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE, the current + * payload is from a prior temporary sync_fd import. The permanent + * payload of the sempahore might be in signaled state. So we do an + * import here to ensure later wait operation is legit. With resourceId + * 0, renderer does a signaled sync_fd -1 payload import on the host + * semaphore. + */ + if (payload->type == VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE) { + const VkImportSemaphoreResourceInfoMESA res_info = { + .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_RESOURCE_INFO_MESA, + .semaphore = pGetWin32HandleInfo->semaphore, + .resourceId = 0, + }; + vn_async_vkImportSemaphoreResourceMESA(dev->primary_ring, device, + &res_info); + } + + /* perform wait operation on the host semaphore */ + vn_async_vkWaitSemaphoreResourceMESA(dev->primary_ring, device, + pGetWin32HandleInfo->semaphore); + + vn_sync_payload_release(dev, &sem->temporary); + sem->payload = &sem->permanent; + + *pHandle = handle; + return VK_SUCCESS; +} + +#else VKAPI_ATTR VkResult VKAPI_CALL vn_ImportSemaphoreFdKHR( VkDevice device, const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo) @@ -2494,6 +2749,7 @@ vn_GetSemaphoreFdKHR(VkDevice device, *pFd = fd; return VK_SUCCESS; } +#endif /* event commands */ diff --git a/src/virtio/vulkan/vn_queue.h b/src/virtio/vulkan/vn_queue.h index d4baefba062..75682054fa4 100644 --- a/src/virtio/vulkan/vn_queue.h +++ b/src/virtio/vulkan/vn_queue.h @@ -72,15 +72,25 @@ enum vn_sync_type { /* device object */ VN_SYNC_TYPE_DEVICE_ONLY, +#ifndef VK_USE_PLATFORM_WIN32_KHR /* payload is an imported sync file */ VN_SYNC_TYPE_IMPORTED_SYNC_FD, +#else + /* payload is an imported Win32 event handle */ + VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE, +#endif }; struct vn_sync_payload { enum vn_sync_type type; +#ifndef VK_USE_PLATFORM_WIN32_KHR /* If type is VN_SYNC_TYPE_IMPORTED_SYNC_FD, fd is a sync file. */ int fd; +#else + /* If type is VN_SYNC_TYPE_IMPORTED_WIN32_HANDLE, fd is a Win32 event handle. */ + void *handle; +#endif }; /* For external fences and external semaphores submitted to be signaled. The diff --git a/src/virtio/vulkan/vn_renderer.h b/src/virtio/vulkan/vn_renderer.h index 08f34df0454..dbee43f9811 100644 --- a/src/virtio/vulkan/vn_renderer.h +++ b/src/virtio/vulkan/vn_renderer.h @@ -152,18 +152,37 @@ struct vn_renderer_bo_ops { vn_object_id mem_id, VkMemoryPropertyFlags flags, VkExternalMemoryHandleTypeFlags external_handles, + const VkMemoryAllocateInfo *alloc_info, struct vn_renderer_bo **out_bo); +#ifdef VK_USE_PLATFORM_WIN32_KHR + VkResult (*create_from_handle)(struct vn_renderer *renderer, + VkDeviceSize size, + /* externally allocated handles might not have a valid id */ + vn_object_id mem_id, + bool is_kmt, + void *handle, + VkMemoryPropertyFlags flags, + const VkMemoryAllocateInfo *alloc_info, + struct vn_renderer_bo **out_bo); +#else VkResult (*create_from_dma_buf)(struct vn_renderer *renderer, VkDeviceSize size, int fd, VkMemoryPropertyFlags flags, struct vn_renderer_bo **out_bo); +#endif bool (*destroy)(struct vn_renderer *renderer, struct vn_renderer_bo *bo); +#ifdef VK_USE_PLATFORM_WIN32_KHR + void *(*export_handle)(struct vn_renderer *renderer, + struct vn_renderer_bo *bo, + bool is_kmt); +#else int (*export_dma_buf)(struct vn_renderer *renderer, struct vn_renderer_bo *bo); +#endif int (*export_sync_file)(struct vn_renderer *renderer, struct vn_renderer_bo *bo); @@ -194,16 +213,27 @@ struct vn_renderer_sync_ops { uint32_t flags, struct vn_renderer_sync **out_sync); +#ifdef VK_USE_PLATFORM_WIN32_KHR + VkResult (*create_from_handle)(struct vn_renderer *renderer, + void *handle, + struct vn_renderer_sync **out_sync); +#else VkResult (*create_from_syncobj)(struct vn_renderer *renderer, int fd, bool sync_file, struct vn_renderer_sync **out_sync); +#endif void (*destroy)(struct vn_renderer *renderer, struct vn_renderer_sync *sync); +#ifdef VK_USE_PLATFORM_WIN32_KHR + void *(*export_handle)(struct vn_renderer *renderer, + struct vn_renderer_sync *sync); +#else int (*export_syncobj)(struct vn_renderer *renderer, struct vn_renderer_sync *sync, bool sync_file); +#endif /* reset the counter */ VkResult (*reset)(struct vn_renderer *renderer, @@ -236,6 +266,14 @@ vn_renderer_create_virtgpu(struct vn_instance *instance, struct vn_renderer **renderer); #endif +#ifdef VK_USE_PLATFORM_WIN32_KHR +VkResult +vn_renderer_create_virtgpu_win32(struct vn_instance *instance, + const VkAllocationCallbacks *alloc, + const VkInstanceCreateInfo *pInfo, + struct vn_renderer **renderer); +#endif + VkResult vn_renderer_create_vtest(struct vn_instance *instance, const VkAllocationCallbacks *alloc, @@ -244,6 +282,7 @@ vn_renderer_create_vtest(struct vn_instance *instance, static inline VkResult vn_renderer_create(struct vn_instance *instance, const VkAllocationCallbacks *alloc, + const VkInstanceCreateInfo *pCreateInfo, struct vn_renderer **renderer) { #ifdef HAVE_LIBDRM @@ -254,6 +293,9 @@ vn_renderer_create(struct vn_instance *instance, } return vn_renderer_create_virtgpu(instance, alloc, renderer); + +#elif defined(VK_USE_PLATFORM_WIN32_KHR) + return vn_renderer_create_virtgpu_win32(instance, alloc, pCreateInfo, renderer); #else return vn_renderer_create_vtest(instance, alloc, renderer); #endif @@ -319,11 +361,12 @@ vn_renderer_bo_create_from_device_memory( vn_object_id mem_id, VkMemoryPropertyFlags flags, VkExternalMemoryHandleTypeFlags external_handles, + const VkMemoryAllocateInfo *alloc_info, struct vn_renderer_bo **out_bo) { struct vn_renderer_bo *bo; VkResult result = renderer->bo_ops.create_from_device_memory( - renderer, size, mem_id, flags, external_handles, &bo); + renderer, size, mem_id, flags, external_handles, alloc_info, &bo); if (result != VK_SUCCESS) return result; @@ -335,6 +378,31 @@ vn_renderer_bo_create_from_device_memory( return VK_SUCCESS; } +#ifdef VK_USE_PLATFORM_WIN32_KHR +static inline VkResult +vn_renderer_bo_create_from_handle(struct vn_renderer *renderer, + VkDeviceSize size, + vn_object_id mem_id, + bool is_kmt, + void *handle, + VkMemoryPropertyFlags flags, + const VkMemoryAllocateInfo *alloc_info, + struct vn_renderer_bo **out_bo) +{ + struct vn_renderer_bo *bo; + VkResult result = + renderer->bo_ops.create_from_handle(renderer, size, mem_id, is_kmt, handle, flags, alloc_info, &bo); + if (result != VK_SUCCESS) + return result; + + assert(vn_refcount_is_valid(&bo->refcount)); + assert(bo->res_id); + assert(!bo->mmap_size || bo->mmap_size >= size); + + *out_bo = bo; + return VK_SUCCESS; +} +#else static inline VkResult vn_renderer_bo_create_from_dma_buf(struct vn_renderer *renderer, VkDeviceSize size, @@ -355,6 +423,7 @@ vn_renderer_bo_create_from_dma_buf(struct vn_renderer *renderer, *out_bo = bo; return VK_SUCCESS; } +#endif static inline struct vn_renderer_bo * vn_renderer_bo_ref(struct vn_renderer *renderer, struct vn_renderer_bo *bo) @@ -371,12 +440,22 @@ vn_renderer_bo_unref(struct vn_renderer *renderer, struct vn_renderer_bo *bo) return false; } +#ifdef VK_USE_PLATFORM_WIN32_KHR +static inline void * +vn_renderer_bo_export_handle(struct vn_renderer *renderer, + struct vn_renderer_bo *bo, + bool is_kmt) +{ + return renderer->bo_ops.export_handle(renderer, bo, is_kmt); +} +#else static inline int vn_renderer_bo_export_dma_buf(struct vn_renderer *renderer, struct vn_renderer_bo *bo) { return renderer->bo_ops.export_dma_buf(renderer, bo); } +#endif static inline int vn_renderer_bo_export_sync_file(struct vn_renderer *renderer, @@ -420,6 +499,15 @@ vn_renderer_sync_create(struct vn_renderer *renderer, return renderer->sync_ops.create(renderer, initial_val, flags, out_sync); } +#ifdef VK_USE_PLATFORM_WIN32_KHR +static inline VkResult +vn_renderer_sync_create_from_handle(struct vn_renderer *renderer, + void *handle, + struct vn_renderer_sync **out_sync) +{ + return renderer->sync_ops.create_from_handle(renderer, handle, out_sync); +} +#else static inline VkResult vn_renderer_sync_create_from_syncobj(struct vn_renderer *renderer, int fd, @@ -429,6 +517,7 @@ vn_renderer_sync_create_from_syncobj(struct vn_renderer *renderer, return renderer->sync_ops.create_from_syncobj(renderer, fd, sync_file, out_sync); } +#endif static inline void vn_renderer_sync_destroy(struct vn_renderer *renderer, @@ -437,6 +526,14 @@ vn_renderer_sync_destroy(struct vn_renderer *renderer, renderer->sync_ops.destroy(renderer, sync); } +#ifdef VK_USE_PLATFORM_WIN32_KHR +static inline void * +vn_renderer_sync_export_handle(struct vn_renderer *renderer, + struct vn_renderer_sync *sync) +{ + return renderer->sync_ops.export_handle(renderer, sync); +} +#else static inline int vn_renderer_sync_export_syncobj(struct vn_renderer *renderer, struct vn_renderer_sync *sync, @@ -444,6 +541,7 @@ vn_renderer_sync_export_syncobj(struct vn_renderer *renderer, { return renderer->sync_ops.export_syncobj(renderer, sync, sync_file); } +#endif static inline VkResult vn_renderer_sync_reset(struct vn_renderer *renderer, diff --git a/src/virtio/vulkan/vn_renderer_virtgpu.c b/src/virtio/vulkan/vn_renderer_virtgpu.c index 9e233b560d3..2bdbb63a48c 100644 --- a/src/virtio/vulkan/vn_renderer_virtgpu.c +++ b/src/virtio/vulkan/vn_renderer_virtgpu.c @@ -1302,6 +1302,7 @@ virtgpu_bo_create_from_device_memory( vn_object_id mem_id, VkMemoryPropertyFlags flags, VkExternalMemoryHandleTypeFlags external_handles, + const VkMemoryAllocateInfo *alloc_info, struct vn_renderer_bo **out_bo) { struct virtgpu *gpu = (struct virtgpu *)renderer; diff --git a/src/virtio/vulkan/vn_renderer_virtgpu_win32.c b/src/virtio/vulkan/vn_renderer_virtgpu_win32.c new file mode 100644 index 00000000000..dd3c4c29cec --- /dev/null +++ b/src/virtio/vulkan/vn_renderer_virtgpu_win32.c @@ -0,0 +1,2274 @@ +#include "vn_renderer_internal.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "util/os_file.h" +#include "util/sparse_array.h" + +#include +#include "virtio/virtio-gpu/venus_hw.h" +#include "virtio/virtio-gpu/wddm_hw.h" + +#define VIRTGPU_PCI_VENDOR_ID 0x1af4 +#define VIRTGPU_PCI_DEVICE_ID 0x10f7 // TODO: 1050 +#define VIRTGPU_WIN_DEVICE_ID "PCI\\VEN_1AF4&DEV_10F7" // TODO: 1050 + +struct virtgpu; + +struct virtgpu_shmem { + struct vn_renderer_shmem base; + D3DKMT_HANDLE alloc; + union { + D3DKMT_HANDLE kmt; + HANDLE h; + }; +}; + +struct virtgpu_bo { + struct vn_renderer_bo base; + D3DKMT_HANDLE alloc; + union { + struct { + D3DKMT_HANDLE local; + D3DKMT_HANDLE global; + } kmt; + + HANDLE h; + } /* resource */; + uint32_t blob_flags; +}; + +struct virtgpu_sync { + struct vn_renderer_sync base; + + /* + * drm_syncobj is in one of these states + * + * - value N: drm_syncobj has a signaled fence chain with seqno N + * - pending N->M: drm_syncobj has an unsignaled fence chain with seqno M + * (which may point to another unsignaled fence chain with + * seqno between N and M, and so on) + * + * TODO Do we want to use binary drm_syncobjs? They would be + * + * - value 0: drm_syncobj has no fence + * - value 1: drm_syncobj has a signaled fence with seqno 0 + * + * They are cheaper but require special care. + */ + uint32_t syncobj_handle; +}; + +struct virtgpu { + struct vn_renderer base; + + struct vn_instance *instance; + + VkD3DDDICallbacks *ddicb; + struct { + D3DKMT_HANDLE adapter; + D3DKMT_HANDLE device; + LUID luid; + HINSTANCE lib; + struct { + PFND3DKMT_QUERYADAPTERINFO queryAdapterInfo; + PFND3DKMT_ESCAPE escape; + PFND3DKMT_RENDER render; + PFND3DKMT_SIGNALSYNCHRONIZATIONOBJECT2 signalSynchronizationObject2; + PFND3DKMT_CREATECONTEXT createContext; + PFND3DKMT_DESTROYCONTEXT destroyContext; + PFND3DKMT_CREATEALLOCATION createAllocation; + PFND3DKMT_DESTROYALLOCATION destroyAllocation; + PFND3DKMT_LOCK lock; + PFND3DKMT_UNLOCK unlock; + PFND3DKMT_QUERYRESOURCEINFO queryResourceInfo; + PFND3DKMT_OPENRESOURCE openResource; + PFND3DKMT_CREATEDEVICE createDevice; + PFND3DKMT_DESTROYDEVICE destroyDevice; + PFND3DKMT_OPENADAPTERFROMHDC openAdapterFromHdc; + PFND3DKMT_CLOSEADAPTER closeAdapter; + } cb; + } d3dkmt; + + struct { + mtx_t lock; + + union { + D3DKMT_HANDLE kmt; + HANDLE h; + }; + + void *cmd_buf; + size_t cmd_size; + + D3DDDI_ALLOCATIONLIST *alloc_list; + size_t alloc_size; + + D3DDDI_PATCHLOCATIONLIST *patch_list; + size_t patch_size; + } ctx; + + struct { + uint16_t domain; + uint8_t bus; + uint8_t dev; + uint8_t func; + } pci_bus_info; + + uint32_t max_timeline_count; + + struct { + uint32_t id; + uint32_t version; + struct virgl_renderer_capset_venus data; + } capset; + + uint32_t shmem_blob_mem; + uint32_t bo_blob_mem; + + struct util_sparse_array syncobj_array; + /* note that we use kmt_handle instead of res_id to index because + * res_id is monotonically increasing by default (see + * virtio_gpu_resource_id_get) + */ + struct util_sparse_array shmem_array; + struct util_sparse_array bo_array; + + mtx_t win32_handle_import_mutex; + + struct vn_renderer_shmem_cache shmem_cache; + + // bool supports_cross_device; +}; + +static inline NTSTATUS +hr_to_nt(struct virtgpu *gpu, HRESULT hr) +{ + switch (hr) { + case S_OK: + return STATUS_SUCCESS; + case E_OUTOFMEMORY: + return STATUS_NO_MEMORY; + case E_INVALIDARG: + return STATUS_INVALID_PARAMETER; + default: + vn_log(gpu->instance, "Unknown HRESULT: %lx", hr); + return STATUS_INVALID_PARAMETER; + } +} + +#include "util/hash_table.h" +#include "util/u_idalloc.h" + +static struct { + once_flag init; + mtx_t mutex; + struct hash_table *syncobjs; + struct util_idalloc ida; + + // int signaled_fd; + HANDLE signaled_fd; +} sim; + +struct sim_syncobj { + mtx_t mutex; + uint64_t point; + + HANDLE pending_fd; + uint64_t pending_point; + bool pending_cpu; +}; + +static uint32_t +sim_syncobj_create(struct virtgpu *gpu, bool signaled) +{ + struct sim_syncobj *syncobj = calloc(1, sizeof(*syncobj)); + if (!syncobj) + return 0; + + mtx_init(&syncobj->mutex, mtx_plain); + syncobj->pending_fd = NULL; + + mtx_lock(&sim.mutex); + + /* initialize lazily */ + if (!sim.syncobjs) { + sim.syncobjs = _mesa_pointer_hash_table_create(NULL); + if (!sim.syncobjs) { + mtx_unlock(&sim.mutex); + mtx_destroy(&syncobj->mutex); + free(syncobj); + return 0; + } + + util_idalloc_init(&sim.ida, 32); + + // TODO: is this actually needed? + /* + struct drm_virtgpu_execbuffer args = { + .flags = VIRTGPU_EXECBUF_RING_IDX | VIRTGPU_EXECBUF_FENCE_FD_OUT, + .ring_idx = 0, / * CPU ring * / + }; + int ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args); + if (ret || args.fence_fd < 0) { + _mesa_hash_table_destroy(sim.syncobjs, NULL); + sim.syncobjs = NULL; + mtx_unlock(&sim.mutex); + mtx_destroy(&syncobj->mutex); + free(syncobj); + return 0; + } + sim.signaled_fd = args.fence_fd; + */ + + sim.signaled_fd = CreateEventA(NULL, TRUE, TRUE, NULL); + if (sim.signaled_fd == NULL) { + _mesa_hash_table_destroy(sim.syncobjs, NULL); + sim.syncobjs = NULL; + mtx_unlock(&sim.mutex); + mtx_destroy(&syncobj->mutex); + free(syncobj); + return 0; + } + // vn_log(gpu->instance, "created handle %p", sim.signaled_fd); + } + + const unsigned syncobj_handle = util_idalloc_alloc(&sim.ida) + 1; + _mesa_hash_table_insert(sim.syncobjs, + (const void *)(uintptr_t)syncobj_handle, syncobj); + + mtx_unlock(&sim.mutex); + + return syncobj_handle; +} + +static void +sim_syncobj_destroy(struct virtgpu *gpu, uint32_t syncobj_handle) +{ + struct sim_syncobj *syncobj = NULL; + + mtx_lock(&sim.mutex); + + struct hash_entry *entry = _mesa_hash_table_search( + sim.syncobjs, (const void *)(uintptr_t)syncobj_handle); + if (entry) { + syncobj = entry->data; + _mesa_hash_table_remove(sim.syncobjs, entry); + util_idalloc_free(&sim.ida, syncobj_handle - 1); + } + + mtx_unlock(&sim.mutex); + + if (syncobj) { + if (syncobj->pending_fd != NULL) + CloseHandle(syncobj->pending_fd); + mtx_destroy(&syncobj->mutex); + free(syncobj); + } +} + +static VkResult +sim_syncobj_poll(HANDLE fd, int poll_timeout) +{ + DWORD ret = WaitForSingleObject(fd, poll_timeout); + + if (ret == WAIT_OBJECT_0) { + return VK_SUCCESS; + } else if (ret == WAIT_TIMEOUT) { + return VK_TIMEOUT; + } else { + return VK_ERROR_DEVICE_LOST; + } +} + +static void +sim_syncobj_set_point_locked(struct sim_syncobj *syncobj, uint64_t point) +{ + syncobj->point = point; + + if (syncobj->pending_fd != NULL) { + CloseHandle(syncobj->pending_fd); + syncobj->pending_fd = NULL; + syncobj->pending_point = point; + } +} + +static void +sim_syncobj_update_point_locked(struct vn_instance *instance, + struct sim_syncobj *syncobj, + int poll_timeout) +{ + if (syncobj->pending_fd != NULL) { + VkResult result; + if (syncobj->pending_cpu) { + if (poll_timeout == -1) { + const int max_cpu_timeout = 2000; + poll_timeout = max_cpu_timeout; + // vn_log(instance, "waiting for handle %p", syncobj->pending_fd); + result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout); + if (result == VK_TIMEOUT) { + vn_log(NULL, "cpu sync timed out after %dms; ignoring", + poll_timeout); + result = VK_SUCCESS; + } + } else { + // vn_log(instance, "waiting for handle %p", syncobj->pending_fd); + result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout); + } + } else { + // vn_log(instance, "waiting for handle %p", syncobj->pending_fd); + result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout); + } + if (result == VK_SUCCESS) { + CloseHandle(syncobj->pending_fd); + syncobj->pending_fd = NULL; + syncobj->point = syncobj->pending_point; + } + } +} + +static struct sim_syncobj * +sim_syncobj_lookup(struct virtgpu *gpu, uint32_t syncobj_handle) +{ + struct sim_syncobj *syncobj = NULL; + + mtx_lock(&sim.mutex); + struct hash_entry *entry = _mesa_hash_table_search( + sim.syncobjs, (const void *)(uintptr_t)syncobj_handle); + if (entry) + syncobj = entry->data; + mtx_unlock(&sim.mutex); + + return syncobj; +} + +static bool +sim_syncobj_reset(struct virtgpu *gpu, uint32_t syncobj_handle) +{ + struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle); + if (!syncobj) + return false; + + mtx_lock(&syncobj->mutex); + sim_syncobj_set_point_locked(syncobj, 0); + mtx_unlock(&syncobj->mutex); + + return true; +} + +static bool +sim_syncobj_query(struct virtgpu *gpu, + uint32_t syncobj_handle, + uint64_t *point) +{ + struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle); + if (!syncobj) + return false; + + mtx_lock(&syncobj->mutex); + sim_syncobj_update_point_locked(gpu->instance, syncobj, 0); + *point = syncobj->point; + mtx_unlock(&syncobj->mutex); + + return true; +} + +static bool +sim_syncobj_signal(struct virtgpu *gpu, + uint32_t syncobj_handle, + uint64_t point) +{ + struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle); + if (!syncobj) + return false; + + mtx_lock(&syncobj->mutex); + sim_syncobj_set_point_locked(syncobj, point); + mtx_unlock(&syncobj->mutex); + + return true; +} + +static bool +sim_syncobj_submit(struct virtgpu *gpu, + uint32_t syncobj_handle, + HANDLE sync_fd, + uint64_t point, + bool cpu) +{ + struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle); + if (!syncobj) + return false; + + HANDLE pending_fd = NULL; + HANDLE proc = GetCurrentProcess(); + bool ret = DuplicateHandle(proc, sync_fd, proc, &pending_fd, 0, false, + DUPLICATE_SAME_ACCESS); + if (!ret) { + vn_log(gpu->instance, "failed to dup sync handle"); + return false; + } + + mtx_lock(&syncobj->mutex); + + if (syncobj->pending_fd != NULL) { + mtx_unlock(&syncobj->mutex); + + /* TODO */ + vn_log(gpu->instance, "sorry, no simulated timeline semaphore"); + CloseHandle(pending_fd); + return false; + } + if (syncobj->point >= point) + vn_log(gpu->instance, "non-monotonic signaling"); + + syncobj->pending_fd = pending_fd; + syncobj->pending_point = point; + syncobj->pending_cpu = cpu; + + mtx_unlock(&syncobj->mutex); + + return true; +} + +static int +timeout_to_poll_timeout(uint64_t timeout) +{ + const uint64_t ns_per_ms = 1000000; + const uint64_t ms = (timeout + ns_per_ms - 1) / ns_per_ms; + if (!ms && timeout) + return INFINITE; + return ms <= INT_MAX ? ms : INFINITE; +} + +static VkResult +sim_syncobj_wait(struct virtgpu *gpu, + const struct vn_renderer_wait *wait, + bool wait_avail) +{ + if (wait_avail) + return VK_ERROR_DEVICE_LOST; + + const int poll_timeout = timeout_to_poll_timeout(wait->timeout); + + /* TODO poll all fds at the same time */ + for (uint32_t i = 0; i < wait->sync_count; i++) { + struct virtgpu_sync *sync = (struct virtgpu_sync *)wait->syncs[i]; + const uint64_t point = wait->sync_values[i]; + + struct sim_syncobj *syncobj = + sim_syncobj_lookup(gpu, sync->syncobj_handle); + if (!syncobj) + return VK_ERROR_DEVICE_LOST; + + mtx_lock(&syncobj->mutex); + + if (syncobj->point < point) + sim_syncobj_update_point_locked(gpu->instance, syncobj, + poll_timeout); + + if (syncobj->point < point) { + if (wait->wait_any && i < wait->sync_count - 1 && + syncobj->pending_fd == NULL) { + mtx_unlock(&syncobj->mutex); + continue; + } + errno = ETIME; + mtx_unlock(&syncobj->mutex); + return VK_TIMEOUT; + } + + mtx_unlock(&syncobj->mutex); + + if (wait->wait_any) + break; + + /* TODO adjust poll_timeout */ + } + + return VK_SUCCESS; +} + +static HANDLE +sim_syncobj_export(struct virtgpu *gpu, uint32_t syncobj_handle) +{ + struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle); + if (!syncobj) + return NULL; + + HANDLE fd = NULL; + HANDLE proc = GetCurrentProcess(); + mtx_lock(&syncobj->mutex); + HANDLE in = + syncobj->pending_fd != NULL ? syncobj->pending_fd : sim.signaled_fd; + if (!DuplicateHandle(proc, in, proc, &fd, 0, false, + DUPLICATE_SAME_ACCESS)) { + vn_log(gpu->instance, "failed to duplicate handle"); + } + mtx_unlock(&syncobj->mutex); + + return fd; +} + +static uint32_t +sim_syncobj_import(struct virtgpu *gpu, uint32_t syncobj_handle, HANDLE fd) +{ + struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle); + if (!syncobj) + return 0; + + if (!sim_syncobj_submit(gpu, syncobj_handle, fd, 1, false)) + return 0; + + return syncobj_handle; +} + +static VkResult +sim_submit_signal_syncs(struct virtgpu *gpu, + HANDLE sync_fd, + struct vn_renderer_sync *const *syncs, + const uint64_t *sync_values, + uint32_t sync_count, + bool cpu) +{ + for (uint32_t i = 0; i < sync_count; i++) { + struct virtgpu_sync *sync = (struct virtgpu_sync *)syncs[i]; + const uint64_t pending_point = sync_values[i]; + + if (!sim_syncobj_submit(gpu, sync->syncobj_handle, sync_fd, + pending_point, cpu)) { + return VK_ERROR_DEVICE_LOST; + } + } + + return VK_SUCCESS; +} + +static NTSTATUS +virtgpu_ioctl_create_context(struct virtgpu *gpu) +{ + if (gpu->ddicb != NULL) { + D3DDDICB_CREATECONTEXT context = {}; + NTSTATUS status = + hr_to_nt(gpu, gpu->ddicb->pKTCallbacks->pfnCreateContextCb( + gpu->ddicb->hRTDevice, &context)); + if (!NT_SUCCESS(status)) { + return status; + } + + gpu->ddicb->hContext = context.hContext; + + gpu->ctx.h = context.hContext; + + gpu->ctx.cmd_buf = context.pCommandBuffer; + gpu->ctx.cmd_size = context.CommandBufferSize; + + gpu->ctx.alloc_list = context.pAllocationList; + gpu->ctx.alloc_size = context.AllocationListSize; + + gpu->ctx.patch_list = context.pPatchLocationList; + gpu->ctx.patch_size = context.PatchLocationListSize; + + return STATUS_SUCCESS; + } else { + D3DKMT_CREATECONTEXT context = { + .hDevice = gpu->d3dkmt.device, + .ClientHint = D3DKMT_CLIENTHINT_VULKAN, + }; + + NTSTATUS status = gpu->d3dkmt.cb.createContext(&context); + if (!NT_SUCCESS(status)) { + return status; + } + + gpu->ctx.kmt = context.hContext; + + gpu->ctx.cmd_buf = context.pCommandBuffer; + gpu->ctx.cmd_size = context.CommandBufferSize; + + gpu->ctx.alloc_list = context.pAllocationList; + gpu->ctx.alloc_size = context.AllocationListSize; + + gpu->ctx.patch_list = context.pPatchLocationList; + gpu->ctx.patch_size = context.PatchLocationListSize; + + return STATUS_SUCCESS; + } +} + +static NTSTATUS +virtgpu_ioctl_render(struct virtgpu *gpu, + unsigned cmd_offset, + unsigned cmd_length, + unsigned alloc_count, + void *priv, + unsigned priv_size) +{ + if (gpu->ddicb != NULL) { + D3DDDICB_RENDER render = { + .hContext = gpu->ctx.h, + .CommandOffset = cmd_offset, + .CommandLength = cmd_length, + .NumAllocations = alloc_count, + .NumPatchLocations = alloc_count, + .pPrivateDriverData = priv, + .PrivateDriverDataSize = priv_size, + }; + + NTSTATUS status = hr_to_nt(gpu, gpu->ddicb->pKTCallbacks->pfnRenderCb( + gpu->ddicb->hRTDevice, &render)); + + gpu->ctx.cmd_buf = render.pNewCommandBuffer; + gpu->ctx.cmd_size = render.NewCommandBufferSize; + + gpu->ctx.alloc_list = render.pNewAllocationList; + gpu->ctx.alloc_size = render.NewAllocationListSize; + + gpu->ctx.patch_list = render.pNewPatchLocationList; + gpu->ctx.patch_size = render.NewPatchLocationListSize; + + return status; + } else { + D3DKMT_RENDER render = { + .hContext = gpu->ctx.kmt, + .CommandOffset = cmd_offset, + .CommandLength = cmd_length, + .AllocationCount = alloc_count, + .PatchLocationCount = alloc_count, + .pPrivateDriverData = priv, + .PrivateDriverDataSize = priv_size, + }; + + NTSTATUS status = gpu->d3dkmt.cb.render(&render); + + gpu->ctx.cmd_buf = render.pNewCommandBuffer; + gpu->ctx.cmd_size = render.NewCommandBufferSize; + + gpu->ctx.alloc_list = render.pNewAllocationList; + gpu->ctx.alloc_size = render.NewAllocationListSize; + + gpu->ctx.patch_list = render.pNewPatchLocationList; + gpu->ctx.patch_size = render.NewPatchLocationListSize; + + return status; + } +} + +static NTSTATUS +virtgpu_ioctl_signal(struct virtgpu *gpu, HANDLE fence) +{ + if (gpu->ddicb != NULL) { + D3DDDICB_SIGNALSYNCHRONIZATIONOBJECT2 signal = { + .hContext = gpu->ctx.h, + .ObjectCount = 0, + .BroadcastContextCount = 0, + .Flags = { + .EnqueueCpuEvent = TRUE, + }, + .CpuEventHandle = fence, + }; + return hr_to_nt( + gpu, gpu->ddicb->pKTCallbacks->pfnSignalSynchronizationObject2Cb( + gpu->ddicb->hRTDevice, &signal)); + } else { + D3DKMT_SIGNALSYNCHRONIZATIONOBJECT2 signal = { + .hContext = gpu->ctx.kmt, + .ObjectCount = 0, + .BroadcastContextCount = 0, + .Flags = { + .EnqueueCpuEvent = TRUE, + }, + .CpuEventHandle = fence, + }; + + return gpu->d3dkmt.cb.signalSynchronizationObject2(&signal); + } +} + +static VkResult +sim_submit(struct virtgpu *gpu, const struct vn_renderer_submit *submit) +{ + assert(submit->bo_count < gpu->ctx.alloc_size); + assert(submit->batch_count); + + VkResult ret = VK_SUCCESS; + for (uint32_t i = 0; i < submit->batch_count; i++) { + const struct vn_renderer_submit_batch *batch = &submit->batches[i]; + mtx_lock(&gpu->ctx.lock); + + for (uint32_t i = 0; i < submit->bo_count; i++) { + struct virtgpu_bo *bo = (struct virtgpu_bo *)submit->bos[i]; + assert(bo->alloc != 0); + //if (bo->alloc == 0) return VK_ERROR_FEATURE_NOT_PRESENT; // TODO: we should not call render here, but rather save commands into present command buffer + gpu->ctx.alloc_list[i].hAllocation = bo->alloc; + gpu->ctx.patch_list[i].AllocationIndex = i; + } + + VIOGPU_COMMAND_HDR *hdr = gpu->ctx.cmd_buf; + hdr->type = VIOGPU_CMD_SUBMIT; + hdr->size = batch->cs_size; + hdr->flags = VIOGPU_EXECBUF_RING_IDX, hdr->ring_idx = batch->ring_idx; + + assert(batch->cs_size + sizeof(*hdr) <= gpu->ctx.cmd_size); + memcpy(gpu->ctx.cmd_buf + sizeof(*hdr), batch->cs_data, batch->cs_size); + NTSTATUS status = virtgpu_ioctl_render( + gpu, 0, sizeof(*hdr) + batch->cs_size, submit->bo_count, NULL, 0); + mtx_unlock(&gpu->ctx.lock); + if (!NT_SUCCESS(status)) { + vn_log(gpu->instance, "failed to render: 0x%lx", status); + break; + } + + if (batch->sync_count > 0) { + HANDLE fence = CreateEventA(NULL, TRUE, FALSE, NULL); + // vn_log(gpu->instance, "created handle %p", fence); + NTSTATUS status = virtgpu_ioctl_signal(gpu, fence); + if (!NT_SUCCESS(status)) { + vn_log(gpu->instance, "failed to execbuffer: 0x%lx", status); + break; + } + + ret = sim_submit_signal_syncs(gpu, fence, batch->syncs, + batch->sync_values, batch->sync_count, + batch->ring_idx == 0); + CloseHandle(fence); + if (ret != VK_SUCCESS) + break; + } + } + + return ret; +} + +static NTSTATUS +virtgpu_ioctl_getparam(struct virtgpu *gpu, + KMTQUERYADAPTERINFOTYPE type, + void *priv, + unsigned priv_size) +{ + if (gpu->ddicb != NULL) { + D3DDDICB_QUERYADAPTERINFO query = { + .pPrivateDriverData = priv, + .PrivateDriverDataSize = priv_size, + }; + return hr_to_nt(gpu, + gpu->ddicb->pAdapterCallbacks->pfnQueryAdapterInfoCb( + gpu->ddicb->hRTAdapter, &query)); + } else { + D3DKMT_QUERYADAPTERINFO query = { + .hAdapter = gpu->d3dkmt.adapter, + .Type = KMTQAITYPE_UMDRIVERPRIVATE, + .pPrivateDriverData = priv, + .PrivateDriverDataSize = priv_size, + }; + + return gpu->d3dkmt.cb.queryAdapterInfo(&query); + } +} + +static NTSTATUS +virtgpu_ioctl_escape(struct virtgpu *gpu, VIOGPU_ESCAPE *priv) +{ + if (gpu->ddicb != NULL) { + D3DDDICB_ESCAPE escape = { + .hDevice = gpu->ddicb->hRTDevice, + .pPrivateDriverData = priv, + .PrivateDriverDataSize = sizeof(*priv), + .hContext = gpu->ctx.h, + }; + return hr_to_nt(gpu, gpu->ddicb->pKTCallbacks->pfnEscapeCb( + gpu->ddicb->hRTAdapter, &escape)); + } else { + D3DKMT_ESCAPE escape = { + .hAdapter = gpu->d3dkmt.adapter, + .hDevice = gpu->d3dkmt.device, + .pPrivateDriverData = priv, + .PrivateDriverDataSize = sizeof(*priv), + }; + + return gpu->d3dkmt.cb.escape(&escape); + } +} + +static NTSTATUS +virtgpu_ioctl_get_caps(struct virtgpu *gpu, + uint32_t id, + uint32_t version, + void *capset, + size_t capset_size) +{ + VIOGPU_ESCAPE caps = { + .Type = VIOGPU_GET_CAPS, + .DataLength = sizeof(caps.Capset), + .Capset = { + .CapsetId = id, + .Version = version, + .Size = capset_size, + .Capset = capset, + }, + }; + + return virtgpu_ioctl_escape(gpu, &caps); +} + +static NTSTATUS +virtgpu_ioctl_init_map(struct virtgpu *gpu, D3DKMT_HANDLE handle) +{ + mtx_lock(&gpu->ctx.lock); + + gpu->ctx.alloc_list[0].hAllocation = handle; + gpu->ctx.patch_list[0].AllocationIndex = 0; + + VIOGPU_COMMAND_HDR *hdr = gpu->ctx.cmd_buf; + hdr->type = VIOGPU_CMD_MAP_BLOB; + hdr->size = sizeof(ULONG); + hdr->flags = 0; + hdr->ring_idx = 0; + + ULONG *index = (void *)(hdr + 1); + *index = 0; + memset(index + 1, 0, sizeof(*hdr)); + + NTSTATUS status = virtgpu_ioctl_render( + gpu, 0, 2 * sizeof(*hdr) + sizeof(ULONG), 1, NULL, 0); + mtx_unlock(&gpu->ctx.lock); + if (!NT_SUCCESS(status)) { + return status; + } + + HANDLE fence = CreateEventA(NULL, TRUE, FALSE, NULL); + // vn_log(gpu->instance, "created handle %p", fence); + status = virtgpu_ioctl_signal(gpu, fence); + if (!NT_SUCCESS(status)) { + return status; + } + + // vn_log(gpu->instance, "waiting for handle %p", fence); + if (WaitForSingleObject(fence, INFINITE) != WAIT_OBJECT_0) { + return STATUS_ABANDONED_WAIT_0; + } + + return STATUS_SUCCESS; +} + +static NTSTATUS +virtgpu_ioctl_destroy_map(struct virtgpu *gpu, D3DKMT_HANDLE handle) +{ + mtx_lock(&gpu->ctx.lock); + + gpu->ctx.alloc_list[0].hAllocation = handle; + gpu->ctx.patch_list[0].AllocationIndex = 0; + + VIOGPU_COMMAND_HDR *hdr = gpu->ctx.cmd_buf; + hdr->type = VIOGPU_CMD_UNMAP_BLOB; + hdr->size = sizeof(ULONG); + hdr->flags = 0; + hdr->ring_idx = 0; + + ULONG *index = (void *)(hdr + 1); + *index = 0; + memset(index + 1, 0, sizeof(*hdr)); + + NTSTATUS status = virtgpu_ioctl_render( + gpu, 0, 2 * sizeof(*hdr) + sizeof(ULONG), 1, NULL, 0); + mtx_unlock(&gpu->ctx.lock); + if (!NT_SUCCESS(status)) { + return status; + } + + HANDLE fence = CreateEventA(NULL, TRUE, FALSE, NULL); + // vn_log(gpu->instance, "created handle %p", fence); + status = virtgpu_ioctl_signal(gpu, fence); + if (!NT_SUCCESS(status)) { + return status; + } + + // vn_log(gpu->instance, "waiting for handle %p", fence); + if (WaitForSingleObject(fence, INFINITE) != WAIT_OBJECT_0) { + return STATUS_ABANDONED_WAIT_0; + } + + return STATUS_SUCCESS; + // return virtgpu_ioctl_unlock(gpu, handle); +} + +static NTSTATUS +virtgpu_ioctl_wait(struct virtgpu *gpu) +{ + HANDLE fence = CreateEventA(NULL, TRUE, FALSE, NULL); + // vn_log(gpu->instance, "created handle %p", fence); + NTSTATUS status = virtgpu_ioctl_signal(gpu, fence); + if (!NT_SUCCESS(status)) { + return status; + } + // vn_log(gpu->instance, "waiting for handle %p", fence); + if (WaitForSingleObject(fence, INFINITE) != WAIT_OBJECT_0) { + return STATUS_ABANDONED_WAIT_0; + } + + return STATUS_SUCCESS; +} + +#define VIRTGPU_SYNC_OR_RETURN_NTSTATUS(gpu) \ + do { \ + NTSTATUS status = virtgpu_ioctl_wait(gpu); \ + if (!NT_SUCCESS(status)) { \ + return status; \ + } \ + } while (0) + +static NTSTATUS +virtgpu_ioctl_resource_create_blob(struct virtgpu *gpu, + uint32_t blob_mem, + uint32_t blob_flags, + size_t blob_size, + uint64_t blob_id, + uint32_t *res_id, + D3DKMT_HANDLE *alloc_handle, + D3DKMT_HANDLE *res_kmt_local, + D3DKMT_HANDLE *res_kmt_global, + HANDLE *res_h) +{ + blob_size = align64(blob_size, 4096); + + VIOGPU_CREATE_ALLOCATION_EXCHANGE alloc_priv = { + .Type = VIOGPU_RESOURCE_TYPE_BLOB, + .OptionsBlob = { + .blob_mem = blob_mem, + .blob_flags = blob_flags, + .blob_id = blob_id, + }, + .Size = blob_size, + }; + + VIOGPU_CREATE_RESOURCE_EXCHANGE res_priv = { 0 }; + + D3DDDI_ALLOCATIONINFO alloc_info = { + .pPrivateDriverData = &alloc_priv, + .PrivateDriverDataSize = sizeof(alloc_priv), + }; + + bool is_shareable = !!(blob_flags & VIOGPU_BLOB_FLAG_USE_SHAREABLE); + bool is_mappable = !!(blob_flags & VIOGPU_BLOB_FLAG_USE_MAPPABLE); + + // TODO: is this required? + VIRTGPU_SYNC_OR_RETURN_NTSTATUS(gpu); + + if (gpu->ddicb != NULL) { + D3DDDICB_ALLOCATE alloc = { + .pPrivateDriverData = &res_priv, + .PrivateDriverDataSize = sizeof(res_priv), + .hResource = *res_h, + .NumAllocations = 1, + .pAllocationInfo = &alloc_info, + }; + + NTSTATUS status = hr_to_nt(gpu, gpu->ddicb->pKTCallbacks->pfnAllocateCb( + gpu->ddicb->hRTDevice, &alloc)); + if (!NT_SUCCESS(status)) { + return status; + } + *res_kmt_local = alloc.hKMResource; + *alloc_handle = alloc_info.hAllocation; + } else { + D3DKMT_CREATEALLOCATION alloc = { + .hDevice = gpu->d3dkmt.device, + .pPrivateDriverData = &res_priv, + .PrivateDriverDataSize = sizeof(res_priv), + .NumAllocations = 1, + .pAllocationInfo = &alloc_info, + .Flags = { + .CreateResource = 1, + .CreateShared = is_shareable, + }, + }; + NTSTATUS status = gpu->d3dkmt.cb.createAllocation(&alloc); + if (!NT_SUCCESS(status)) { + return status; + } + *res_kmt_local = alloc.hResource; + if (res_kmt_global) { + *res_kmt_global = alloc.hGlobalShare; + } + *alloc_handle = alloc_info.hAllocation; + } + + // TODO: is this required? + VIRTGPU_SYNC_OR_RETURN_NTSTATUS(gpu); + + VIOGPU_ESCAPE res_info = { + .Type = VIOGPU_RES_INFO, + .DataLength = sizeof(res_info.ResourceInfo), + .ResourceInfo = { + .ResHandle = *alloc_handle, + }, + }; + + NTSTATUS status = virtgpu_ioctl_escape(gpu, &res_info); + if (!NT_SUCCESS(status)) { + return status; + } + + if (!res_info.ResourceInfo.IsBlob || !res_info.ResourceInfo.IsCreated) { + return STATUS_INVALID_PARAMETER; + } + + *res_id = res_info.ResourceInfo.Id; + + return is_mappable ? virtgpu_ioctl_init_map(gpu, *alloc_handle) + : STATUS_SUCCESS; +} + +static NTSTATUS +virtgpu_ioctl_resource_destroy_blob(struct virtgpu *gpu, + D3DKMT_HANDLE alloc_handle, + D3DKMT_HANDLE res_kmt, + HANDLE res_h) +{ + // TODO: is this required? + VIRTGPU_SYNC_OR_RETURN_NTSTATUS(gpu); + + if (gpu->ddicb != NULL) { + D3DDDICB_DEALLOCATE destroy = { + .hResource = res_h, + .NumAllocations = res_h == NULL ? 1 : 0, + .HandleList = res_h == NULL ? &alloc_handle : NULL, + }; + + NTSTATUS status = hr_to_nt(gpu, + gpu->ddicb->pKTCallbacks->pfnDeallocateCb(gpu->ddicb->hRTDevice, + &destroy)); + + if (!NT_SUCCESS(status)) { + return status; + } + } else { + D3DKMT_DESTROYALLOCATION destroy = { + .hDevice = gpu->d3dkmt.device, + .hResource = res_kmt, + .AllocationCount = res_kmt == 0 ? 1 : 0, + .phAllocationList = res_kmt == 0 ? &alloc_handle : NULL, + }; + + NTSTATUS status = gpu->d3dkmt.cb.destroyAllocation(&destroy); + if (!NT_SUCCESS(status)) { + return status; + } + } + + // TODO: is this required? + VIRTGPU_SYNC_OR_RETURN_NTSTATUS(gpu); + + return STATUS_SUCCESS; +} + +static NTSTATUS +virtgpu_ioctl_lock(struct virtgpu *gpu, D3DKMT_HANDLE handle, void **ptr) +{ + if (gpu->ddicb != NULL) { + D3DDDICB_LOCK lock = { + .hAllocation = handle, + .Flags = { + // .IgnoreSync = 1, + .LockEntire = 1, + }, + }; + NTSTATUS status = hr_to_nt(gpu, gpu->ddicb->pKTCallbacks->pfnLockCb( + gpu->ddicb->hRTDevice, &lock)); + if (!NT_SUCCESS(status)) { + return status; + } + *ptr = lock.pData; + } else { + D3DKMT_LOCK lock = { + .hDevice = gpu->d3dkmt.device, + .Flags = { + // .IgnoreSync = 1, + .LockEntire = 1, + }, + .hAllocation = handle, + }; + NTSTATUS status = gpu->d3dkmt.cb.lock(&lock); + if (!NT_SUCCESS(status)) { + return status; + } + *ptr = lock.pData; + } + + return STATUS_SUCCESS; +} + +static NTSTATUS +virtgpu_ioctl_unlock(struct virtgpu *gpu, D3DKMT_HANDLE handle) +{ + if (gpu->ddicb != NULL) { + D3DDDICB_UNLOCK unlock = { + .NumAllocations = 1, + .phAllocations = &handle, + }; + return hr_to_nt(gpu, gpu->ddicb->pKTCallbacks->pfnUnlockCb( + gpu->ddicb->hRTDevice, &unlock)); + } else { + D3DKMT_UNLOCK unlock = { + .hDevice = gpu->d3dkmt.device, + .NumAllocations = 1, + .phAllocations = &handle, + }; + return gpu->d3dkmt.cb.unlock(&unlock); + } +} + +static inline void +virtgpu_init_shmem_blob_mem(ASSERTED struct virtgpu *gpu) +{ + /* VIOGPU_BLOB_MEM_GUEST allocates from the guest system memory. They are + * logically contiguous in the guest but are sglists (iovecs) in the host. + * That makes them slower to process in the host. With host process + * isolation, it also becomes impossible for the host to access sglists + * directly. + * + * While there are ideas (and shipped code in some cases) such as creating + * udmabufs from sglists, or having a dedicated guest heap, it seems the + * easiest way is to reuse VIRTGPU_BLOB_MEM_HOST3D. That is, when the + * renderer sees a request to export a blob where + * + * - blob_mem is VIOGPU_BLOB_MEM_HOST3D + * - blob_flags is VIOGPU_BLOB_FLAG_USE_MAPPABLE + * - blob_id is 0 + * + * it allocates a host shmem. + * + * supports_blob_id_0 has been enforced by mandated render server config. + */ + assert(gpu->capset.data.supports_blob_id_0); + gpu->shmem_blob_mem = VIOGPU_BLOB_MEM_HOST3D; +} + +static VkResult +virtgpu_init_context(struct virtgpu *gpu) +{ + assert(!gpu->capset.version); + + VIOGPU_ESCAPE ctx_init = { + .Type = VIOGPU_CTX_INIT, + .DataLength = sizeof(ctx_init.CtxInit), + .CtxInit = { + .CapsetID = gpu->capset.id, + .NumRings = 64, + .DebugName = "venus-win32", + }, + }; + + NTSTATUS status = virtgpu_ioctl_escape(gpu, &ctx_init); + if (!NT_SUCCESS(status)) { + if (VN_DEBUG(INIT)) { + vn_log(gpu->instance, "failed to create context: 0x%lx", status); + } + return VK_ERROR_INITIALIZATION_FAILED; + } + + status = virtgpu_ioctl_create_context(gpu); + if (!NT_SUCCESS(status)) { + if (VN_DEBUG(INIT)) { + vn_log(gpu->instance, "failed to create context: 0x%lx", status); + } + return VK_ERROR_INITIALIZATION_FAILED; + } + + return VK_SUCCESS; +} + +static VkResult +virtgpu_init_capset(struct virtgpu *gpu) +{ + gpu->capset.id = VIOGPU_CAPSET_VENUS; + gpu->capset.version = 0; + + NTSTATUS status = + virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version, + &gpu->capset.data, sizeof(gpu->capset.data)); + if (!NT_SUCCESS(status)) { + if (VN_DEBUG(INIT)) { + vn_log(gpu->instance, "failed to get venus v%d capset: 0x%lx", + gpu->capset.version, status); + } + return VK_ERROR_INITIALIZATION_FAILED; + } + + if (gpu->capset.data.wire_format_version == 0) { + if (VN_DEBUG(INIT)) { + vn_log(gpu->instance, "Unsupported wire format version %u", + gpu->capset.data.wire_format_version); + } + return VK_ERROR_INITIALIZATION_FAILED; + } + + return VK_SUCCESS; +} + +static VkResult +virtgpu_init_params(struct virtgpu *gpu) +{ + VIOGPU_ADAPTERINFO info = { 0 }; + + NTSTATUS status = virtgpu_ioctl_getparam(gpu, KMTQAITYPE_UMDRIVERPRIVATE, + &info, sizeof(info)); + + if (!NT_SUCCESS(status)) { + if (VN_DEBUG(INIT)) { + vn_log(gpu->instance, + "failed to get adapter info from kernel: 0x%lx", status); + } + return VK_ERROR_INITIALIZATION_FAILED; + } + + if (info.IamVioGPU != VIOGPU_IAM || !info.Flags.Supports3d) { + if (VN_DEBUG(INIT)) { + vn_log(gpu->instance, "no venus support in this driver"); + } + return VK_ERROR_INITIALIZATION_FAILED; + } + + /* Don't care for VIRTGPU_BLOB_MEM_GUEST_VRAM since this driver is mainly + * developed for QEMU, but whoever needs it may feel free to implement this */ + if (info.Flags.HasShmem) { + gpu->bo_blob_mem = VIOGPU_BLOB_MEM_HOST3D; + } else { + if (VN_DEBUG(INIT)) { + vn_log( + gpu->instance, + "driver does not support the required host-visible shmem region"); + } + return VK_ERROR_INITIALIZATION_FAILED; + } + + /* Don't care about cross-device */ + // gpu->supports_cross_device = false; + + /* implied by CONTEXT_INIT uapi */ + gpu->max_timeline_count = 64; + + VIOGPU_ESCAPE pci_info = { + .Type = VIOGPU_GET_PCI_INFO, + .DataLength = sizeof(pci_info.PciInfo), + .PciInfo = {}, + }; + + status = virtgpu_ioctl_escape(gpu, &pci_info); + if (!NT_SUCCESS(status)) { + if (VN_DEBUG(INIT)) { + vn_log(gpu->instance, "failed to get device pci info from kernel"); + } + return VK_ERROR_INITIALIZATION_FAILED; + } + + gpu->pci_bus_info.domain = pci_info.PciInfo.Domain; + gpu->pci_bus_info.bus = pci_info.PciInfo.Bus; + gpu->pci_bus_info.dev = pci_info.PciInfo.Dev; + gpu->pci_bus_info.func = pci_info.PciInfo.Func; + + return VK_SUCCESS; +} + +static VkResult +virtgpu_find_adapter(struct virtgpu *gpu) +{ + DISPLAY_DEVICE adapter = { + .cb = sizeof(adapter), + }; + + for (int i = 0; EnumDisplayDevicesA(NULL, i, &adapter, 0); i++) { + if (strncasecmp(adapter.DeviceID, VIRTGPU_WIN_DEVICE_ID, + strlen(VIRTGPU_WIN_DEVICE_ID)) == 0) { + HDC hdc = CreateDC(NULL, adapter.DeviceName, NULL, NULL); + D3DKMT_OPENADAPTERFROMHDC open_adapter = { + .hDc = hdc, + }; + + NTSTATUS status = gpu->d3dkmt.cb.openAdapterFromHdc(&open_adapter); + if (!NT_SUCCESS(status)) { + if (VN_DEBUG(INIT)) { + vn_log(gpu->instance, "failed to open adapter %s: 0x%lx", + adapter.DeviceName, status); + } + + continue; + } + // TODO: ReleaseDC(NULL, hdc); + gpu->d3dkmt.adapter = open_adapter.hAdapter; + gpu->d3dkmt.luid = open_adapter.AdapterLuid; + + if (VN_DEBUG(INIT)) { + vn_log(gpu->instance, "using adapter %s (LUID %lx-%lx)", + adapter.DeviceName, open_adapter.AdapterLuid.HighPart, + open_adapter.AdapterLuid.LowPart); + } + return VK_SUCCESS; + } + } + return VK_ERROR_INCOMPATIBLE_DRIVER; +} + +static NTSTATUS +virtgpu_ioctl_create_device(struct virtgpu *gpu) +{ + if (gpu->ddicb != NULL) { + /* Nothing to do here, device was already created before */ + return STATUS_SUCCESS; + } else { + D3DKMT_CREATEDEVICE create_device = { + .hAdapter = gpu->d3dkmt.adapter, + }; + NTSTATUS status = gpu->d3dkmt.cb.createDevice(&create_device); + if (!NT_SUCCESS(status)) { + return status; + } + + gpu->d3dkmt.device = create_device.hDevice; + return STATUS_SUCCESS; + } +} + +static VkResult +virtgpu_open(struct virtgpu *gpu, void *info) +{ + VkD3DDDICallbacks *callbacks = vk_find_struct(info, D3DDDI_CALLBACKS); + if (callbacks != NULL) { + /* D3D11 UMD */ + gpu->ddicb = callbacks; + } else { + /* Standalone Vulkan ICD, using D3DKMT */ + HINSTANCE gdi32lib = LoadLibraryA("GDI32.dll"); + gpu->d3dkmt.lib = gdi32lib; + + gpu->d3dkmt.cb.queryAdapterInfo = + (void *)GetProcAddress(gdi32lib, "D3DKMTQueryAdapterInfo"); + gpu->d3dkmt.cb.escape = + (void *)GetProcAddress(gdi32lib, "D3DKMTEscape"); + gpu->d3dkmt.cb.render = + (void *)GetProcAddress(gdi32lib, "D3DKMTRender"); + gpu->d3dkmt.cb.signalSynchronizationObject2 = (void *)GetProcAddress( + gdi32lib, "D3DKMTSignalSynchronizationObject2"); + gpu->d3dkmt.cb.createContext = + (void *)GetProcAddress(gdi32lib, "D3DKMTCreateContext"); + gpu->d3dkmt.cb.destroyContext = + (void *)GetProcAddress(gdi32lib, "D3DKMTDestroyContext"); + gpu->d3dkmt.cb.createAllocation = + (void *)GetProcAddress(gdi32lib, "D3DKMTCreateAllocation"); + gpu->d3dkmt.cb.destroyAllocation = + (void *)GetProcAddress(gdi32lib, "D3DKMTDestroyAllocation"); + gpu->d3dkmt.cb.lock = (void *)GetProcAddress(gdi32lib, "D3DKMTLock"); + gpu->d3dkmt.cb.unlock = + (void *)GetProcAddress(gdi32lib, "D3DKMTUnlock"); + gpu->d3dkmt.cb.queryResourceInfo = + (void *)GetProcAddress(gdi32lib, "D3DKMTQueryResourceInfo"); + gpu->d3dkmt.cb.openResource = + (void *)GetProcAddress(gdi32lib, "D3DKMTOpenResource"); + gpu->d3dkmt.cb.createDevice = + (void *)GetProcAddress(gdi32lib, "D3DKMTCreateDevice"); + gpu->d3dkmt.cb.destroyDevice = + (void *)GetProcAddress(gdi32lib, "D3DKMTDestroyDevice"); + gpu->d3dkmt.cb.openAdapterFromHdc = + (void *)GetProcAddress(gdi32lib, "D3DKMTOpenAdapterFromHdc"); + gpu->d3dkmt.cb.closeAdapter = + (void *)GetProcAddress(gdi32lib, "D3DKMTCloseAdapter"); + + NTSTATUS status = virtgpu_find_adapter(gpu); + if (!NT_SUCCESS(status)) { + return VK_ERROR_DEVICE_LOST; + } + } + + NTSTATUS status = virtgpu_ioctl_create_device(gpu); + if (!NT_SUCCESS(status)) { + return VK_ERROR_DEVICE_LOST; + } + + return VK_SUCCESS; +} + +static uint32_t +virtgpu_bo_blob_flags(struct virtgpu *gpu, + VkMemoryPropertyFlags flags, + VkExternalMemoryHandleTypeFlags external_handles) +{ + uint32_t blob_flags = 0; + if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + blob_flags |= VIOGPU_BLOB_FLAG_USE_MAPPABLE; + if (external_handles) + blob_flags |= VIOGPU_BLOB_FLAG_USE_SHAREABLE; + // if (external_handles & VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT) { + // if (gpu->supports_cross_device) + // blob_flags |= VIOGPU_BLOB_FLAG_USE_CROSS_DEVICE; + // } + + return blob_flags; +} + +static NTSTATUS +virtgpu_ioctl_open_resource(struct virtgpu *gpu, + D3DKMT_HANDLE res_kmt_global, + D3DKMT_HANDLE *alloc, + D3DKMT_HANDLE *res_kmt_local, + VIOGPU_RES_INFO_REQ *res_info, + const VkD3DDDIOpenResource *d3d_open) +{ + if (gpu->ddicb != NULL) { + assert(d3d_open != NULL); + + //const VIOGPU_CREATE_ALLOCATION_EXCHANGE *alloc_priv = + // d3d_open->pOpenResource->pOpenAllocationInfo[0].pPrivateDriverData; + //assert(alloc_priv->Type == VIOGPU_RESOURCE_TYPE_BLOB); + *alloc = + d3d_open->pOpenResource->pOpenAllocationInfo[0].hAllocation; + *res_kmt_local = + d3d_open->pOpenResource->hKMResource.handle; + + *res_info = *(VIOGPU_RES_INFO_REQ *) d3d_open->pResourceInfo; + + return STATUS_SUCCESS; + } else { + D3DKMT_QUERYRESOURCEINFO query = { + .hDevice = gpu->d3dkmt.device, + .hGlobalShare = res_kmt_global, + }; + + NTSTATUS status = gpu->d3dkmt.cb.queryResourceInfo(&query); + if (!NT_SUCCESS(status)) { + return status; + } + + assert(query.ResourcePrivateDriverDataSize >= sizeof(VIOGPU_CREATE_RESOURCE_EXCHANGE)); + assert(query.TotalPrivateDriverDataSize >= sizeof(VIOGPU_CREATE_ALLOCATION_EXCHANGE) * query.NumAllocations); + + size_t runtime_data_off = 0; + size_t res_priv_off = + runtime_data_off + align64(query.PrivateRuntimeDataSize, 8); + size_t alloc_priv_off = + res_priv_off + align64(query.ResourcePrivateDriverDataSize, 8); + size_t alloc_list_off = + alloc_priv_off + align64(query.TotalPrivateDriverDataSize, 8); + + size_t total_size = alloc_list_off + sizeof(D3DDDI_OPENALLOCATIONINFO) * + query.NumAllocations; + void *data = calloc(total_size, 1); + uintptr_t p = (uintptr_t)data; + + void *runtime = (void *)(p + runtime_data_off); + VIOGPU_CREATE_RESOURCE_EXCHANGE *resource_priv = + (void *)(p + res_priv_off); + + VIOGPU_CREATE_ALLOCATION_EXCHANGE *full_alloc_priv = + (void *)(p + alloc_priv_off); + + D3DDDI_OPENALLOCATIONINFO *alloc_list = (void *)(p + alloc_list_off); + + D3DKMT_OPENRESOURCE open = { + .hDevice = gpu->d3dkmt.device, + .hGlobalShare = res_kmt_global, + .NumAllocations = query.NumAllocations, + .pOpenAllocationInfo = alloc_list, + .pResourcePrivateDriverData = resource_priv, + .ResourcePrivateDriverDataSize = query.ResourcePrivateDriverDataSize, + .pPrivateRuntimeData = runtime, + .PrivateRuntimeDataSize = query.PrivateRuntimeDataSize, + .pTotalPrivateDriverDataBuffer = full_alloc_priv, + .TotalPrivateDriverDataBufferSize = query.TotalPrivateDriverDataSize, + }; + + status = gpu->d3dkmt.cb.openResource(&open); + if (!NT_SUCCESS(status)) { + goto end; + } + + const VIOGPU_CREATE_ALLOCATION_EXCHANGE *alloc_priv = + alloc_list[0].pPrivateDriverData; + //assert(alloc_priv->Type == VIOGPU_RESOURCE_TYPE_BLOB); + + *alloc = alloc_list[0].hAllocation; + *res_kmt_local = open.hResource; + + VIOGPU_ESCAPE res_esc = { + .Type = VIOGPU_RES_INFO, + .DataLength = sizeof(res_esc.ResourceInfo), + .ResourceInfo = { + .ResHandle = *alloc, + }, + }; + + status = virtgpu_ioctl_escape(gpu, &res_esc); + if (!NT_SUCCESS(status)) { + goto end; + } + *res_info = res_esc.ResourceInfo; + + end: + free(data); + return status; + } +} + +static VkResult +virtgpu_bo_create_from_handle(struct vn_renderer *renderer, + VkDeviceSize size, + vn_object_id mem_id, + bool is_kmt, + void *handle, + VkMemoryPropertyFlags flags, + const VkMemoryAllocateInfo *alloc_info, + struct vn_renderer_bo **out_bo) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_bo *bo = NULL; + + VIOGPU_RES_INFO_REQ res_info = {}; + + mtx_lock(&gpu->win32_handle_import_mutex); + + // TODO: virtgpu_ioctl_open_resource_from_nthandle + assert(is_kmt); + + D3DKMT_HANDLE alloc, kmt_local, + kmt_global = (D3DKMT_HANDLE)(uintptr_t)handle; + + const VkD3DDDIOpenResource *d3d_open = + vk_find_struct_const(alloc_info, D3DDDI_OPEN_RESOURCE); + + NTSTATUS status = + virtgpu_ioctl_open_resource(gpu, kmt_global, &alloc, &kmt_local, &res_info, d3d_open); + if (!NT_SUCCESS(status)) { + vn_log(gpu->instance, "failed to open resource: 0x%lx", status); + return VK_ERROR_DEVICE_LOST; + } + + if (!alloc) + goto fail; + bo = util_sparse_array_get(&gpu->bo_array, alloc); + + /* Upon import, blob_flags is not passed to the kernel and is only for + * internal use. Set it to what works best for us. + * - blob mem: SHAREABLE + conditional MAPPABLE per VkMemoryPropertyFlags + * - classic 3d: SHAREABLE only for export and to fail the map + */ + uint32_t blob_flags = VIOGPU_BLOB_FLAG_USE_SHAREABLE; + size_t mmap_size = 0; + if (res_info.BlobMem) { + /* must be VIOGPU_BLOB_MEM_HOST3D */ + if (res_info.BlobMem != gpu->bo_blob_mem) { + vn_log(gpu->instance, + "NT/KMT handle import failed: info.blob_mem(%lu) != " + "gpu->bo_blob_mem(%u)", + res_info.BlobMem, gpu->bo_blob_mem); + goto fail; + } + + blob_flags |= virtgpu_bo_blob_flags(gpu, flags, 0); + + /* mmap_size is only used when mappable */ + mmap_size = 0; + if (blob_flags & VIOGPU_BLOB_FLAG_USE_MAPPABLE) { + if (res_info.Size < size) { + /* If queried blob size is smaller than requested allocation size, + * we drop the mappable flag to defer the mapping failure till the + * app attempts to map the imported memory. + */ + blob_flags &= ~VIOGPU_BLOB_FLAG_USE_MAPPABLE; + } else { + /* Similar to virtgpu_bo_create_from_device_memory, the app can + * do multiple imports with different sizes for suballocation. So + * on the initial import, the mapping size has to be initialized + * with the real size of the backing blob resource. + */ + mmap_size = res_info.Size; + } + } + } + + /* we check bo->alloc instead of bo->refcount because bo->refcount + * might only be memset to 0 and is not considered initialized in theory + */ + if (bo->alloc == alloc) { + if (bo->base.mmap_size < mmap_size) { + vn_log(gpu->instance, + "NT/KMT handle import failed: bo->base.mmap_size(%zu) < " + "mmap_size(%zu)", + bo->base.mmap_size, mmap_size); + goto fail; + } + if (blob_flags & ~bo->blob_flags) { + vn_log(gpu->instance, + "NT/KMT handle import failed: blob_flags(%u) & " + "~bo->blob_flags(%u)", + blob_flags, bo->blob_flags); + goto fail; + } + + /* we can't use vn_renderer_bo_ref as the refcount may drop to 0 + * temporarily before virtgpu_bo_destroy grabs the lock + */ + vn_refcount_fetch_add_relaxed(&bo->base.refcount, 1); + } else { + *bo = (struct virtgpu_bo){ + .base = { + .refcount = VN_REFCOUNT_INIT(1), + .res_id = res_info.Id, + .mmap_size = mmap_size, + }, + .alloc = alloc, + .blob_flags = blob_flags, + }; + } + if (gpu->ddicb != NULL) { + bo->h = handle; + } else { + bo->kmt.local = kmt_local; + bo->kmt.global = kmt_global; + } + + mtx_unlock(&gpu->win32_handle_import_mutex); + + *out_bo = &bo->base; + + return VK_SUCCESS; + +fail: + mtx_unlock(&gpu->win32_handle_import_mutex); + return VK_ERROR_INVALID_EXTERNAL_HANDLE; +} + +static VkResult +virtgpu_bo_create_from_device_memory( + struct vn_renderer *renderer, + VkDeviceSize size, + vn_object_id mem_id, + VkMemoryPropertyFlags flags, + VkExternalMemoryHandleTypeFlags external_handles, + const VkMemoryAllocateInfo *alloc_info, + struct vn_renderer_bo **out_bo) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + const uint32_t blob_flags = + virtgpu_bo_blob_flags(gpu, flags, external_handles); + + uint32_t res_id; + + HANDLE h = NULL; + const VkD3DDDICreateResource *d3d_create = + vk_find_struct_const(alloc_info, D3DDDI_CREATE_RESOURCE); + if (gpu->ddicb != NULL && d3d_create != NULL) { + h = d3d_create->hRTResource; + } + + D3DKMT_HANDLE alloc, kmt_local, kmt_global; + NTSTATUS status = virtgpu_ioctl_resource_create_blob( + gpu, gpu->bo_blob_mem, blob_flags, size, mem_id, &res_id, &alloc, + &kmt_local, &kmt_global, &h); + if (!NT_SUCCESS(status)) { + vn_log(gpu->instance, + "RESOURCE_CREATE_BLOB failed: type=%u, flags=%u, size=%zu, " + "id=%" PRIu64 ", err=0x%lx", + gpu->bo_blob_mem, blob_flags, size, mem_id, status); + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + + /* There's a single underlying bo mapping shared by the initial alloc here + * and the later import of the same. The mapping size has to be initialized + * with the real size of the created blob resource, since the app can query + * the exported native handle size for re-import. e.g. lseek dma-buf size + */ + const uint32_t mappable_and_shareable = + VIOGPU_BLOB_FLAG_USE_MAPPABLE | VIOGPU_BLOB_FLAG_USE_SHAREABLE; + if ((blob_flags & mappable_and_shareable) == mappable_and_shareable) { + VIOGPU_ESCAPE res_info = { + .Type = VIOGPU_RES_INFO, + .DataLength = sizeof(res_info.ResourceInfo), + .ResourceInfo = { + .ResHandle = alloc, + }, + }; + + NTSTATUS status = virtgpu_ioctl_escape(gpu, &res_info); + if (!NT_SUCCESS(status)) { + vn_log(gpu->instance, "RESOURCE_INFO failed: handle=%u, err=0x%lx", + alloc, status); + virtgpu_ioctl_resource_destroy_blob(gpu, alloc, kmt_local, h); + return VK_ERROR_INVALID_EXTERNAL_HANDLE; + } + + assert(res_info.ResourceInfo.IsBlob); + assert(res_info.ResourceInfo.BlobMem); + if (res_info.ResourceInfo.Size < size) { + virtgpu_ioctl_resource_destroy_blob(gpu, alloc, kmt_local, h); + vn_log(gpu->instance, + "blob mem create failed: info.size(%llu) < size(%" PRIu64 ")", + res_info.ResourceInfo.Size, size); + return VK_ERROR_INVALID_EXTERNAL_HANDLE; + } + + size = res_info.ResourceInfo.Size; + } + + struct virtgpu_bo *bo = util_sparse_array_get(&gpu->bo_array, alloc); + *bo = (struct virtgpu_bo){ + .base = { + .refcount = VN_REFCOUNT_INIT(1), + .res_id = res_id, + .mmap_size = size, + }, + .alloc = alloc, + .blob_flags = blob_flags, + }; + + if (gpu->ddicb != NULL) { + bo->h = h; + } else { + bo->kmt.local = kmt_local; + bo->kmt.global = kmt_global; + } + + *out_bo = &bo->base; + + return VK_SUCCESS; +} + +static void +virtgpu_bo_invalidate(struct vn_renderer *renderer, + struct vn_renderer_bo *bo, + VkDeviceSize offset, + VkDeviceSize size) +{ + /* nop because kernel makes every mapping coherent */ + // TODO: check if this is true +} + +static void +virtgpu_bo_flush(struct vn_renderer *renderer, + struct vn_renderer_bo *bo, + VkDeviceSize offset, + VkDeviceSize size) +{ + /* nop because kernel makes every mapping coherent */ + // TODO: check if this is true +} + +static void * +virtgpu_bo_map(struct vn_renderer *renderer, + struct vn_renderer_bo *_bo, + void *placed_addr) +{ + assert(placed_addr == NULL); + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo; + const bool mappable = bo->blob_flags & VIOGPU_BLOB_FLAG_USE_MAPPABLE; + + /* not thread-safe but is fine */ + if (!bo->base.mmap_ptr && mappable) { + NTSTATUS status = + virtgpu_ioctl_lock(gpu, bo->alloc, &bo->base.mmap_ptr); + if (!NT_SUCCESS(status)) { + vn_log(gpu->instance, "failed to map blob resource: 0x%lx", status); + } + } + + return bo->base.mmap_ptr; +} + +static void * +virtgpu_bo_export_handle(struct vn_renderer *renderer, + struct vn_renderer_bo *_bo, + bool is_kmt) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo; + const bool shareable = bo->blob_flags & VIOGPU_BLOB_FLAG_USE_SHAREABLE; + + if (is_kmt && gpu->ddicb != NULL) + /* Special hack for DXGI DDI */ + return (void *)(uintptr_t)bo->alloc; + else if (!shareable) + return NULL; + else if (is_kmt && gpu->ddicb == NULL) + return (void *)(uintptr_t)bo->kmt.global; + else + return NULL /* TODO */; +} + +static bool +virtgpu_bo_destroy(struct vn_renderer *renderer, struct vn_renderer_bo *_bo) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo; + + mtx_lock(&gpu->win32_handle_import_mutex); + + /* Check the refcount again after the import lock is grabbed. Yes, we use + * the double-checked locking anti-pattern. + */ + if (vn_refcount_is_valid(&bo->base.refcount)) { + mtx_unlock(&gpu->win32_handle_import_mutex); + return false; + } + + if (bo->base.mmap_ptr) { + virtgpu_ioctl_unlock(gpu, bo->alloc); + virtgpu_ioctl_destroy_map(gpu, bo->alloc); + } + + /* Set alloc and res to 0 to indicate that the bo is invalid. Must be set + * before closing the handles. Otherwise the same handles can be reused + * by another newly created bo and unexpectedly gotten zero'ed out the + * tracked handles. + */ + const D3DKMT_HANDLE alloc = bo->alloc, kmt = bo->kmt.local; + const HANDLE h = bo->h; + bo->alloc = 0; + bo->kmt.local = 0; + bo->h = NULL; + virtgpu_ioctl_resource_destroy_blob(gpu, alloc, kmt, h); + + mtx_unlock(&gpu->win32_handle_import_mutex); + + return true; +} + +static VkResult +virtgpu_sync_write(struct vn_renderer *renderer, + struct vn_renderer_sync *_sync, + uint64_t val) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync; + + const bool ret = sim_syncobj_signal(gpu, sync->syncobj_handle, val); + + return ret ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +static VkResult +virtgpu_sync_read(struct vn_renderer *renderer, + struct vn_renderer_sync *_sync, + uint64_t *val) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync; + + const bool ret = sim_syncobj_query(gpu, sync->syncobj_handle, val); + + return ret ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +static VkResult +virtgpu_sync_reset(struct vn_renderer *renderer, + struct vn_renderer_sync *_sync, + uint64_t initial_val) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync; + + bool ret = sim_syncobj_reset(gpu, sync->syncobj_handle); + if (!ret) { + ret = sim_syncobj_signal(gpu, sync->syncobj_handle, initial_val); + } + + return ret ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +static void * +virtgpu_sync_export_handle(struct vn_renderer *renderer, + struct vn_renderer_sync *_sync) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync; + + return sim_syncobj_export(gpu, sync->syncobj_handle); +} + +static void +virtgpu_sync_destroy(struct vn_renderer *renderer, + struct vn_renderer_sync *_sync) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync; + + sim_syncobj_destroy(gpu, sync->syncobj_handle); + + free(sync); +} + +static VkResult +virtgpu_sync_create_from_handle(struct vn_renderer *renderer, + void *handle, + struct vn_renderer_sync **out_sync) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + + uint32_t syncobj_handle = sim_syncobj_create(gpu, false); + if (!syncobj_handle) + return VK_ERROR_OUT_OF_HOST_MEMORY; + if (!sim_syncobj_import(gpu, syncobj_handle, handle)) { + sim_syncobj_destroy(gpu, syncobj_handle); + return VK_ERROR_INVALID_EXTERNAL_HANDLE; + } + + struct virtgpu_sync *sync = calloc(1, sizeof(*sync)); + if (!sync) { + sim_syncobj_destroy(gpu, syncobj_handle); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + sync->syncobj_handle = syncobj_handle; + sync->base.sync_id = 0; /* TODO */ + + *out_sync = &sync->base; + + return VK_SUCCESS; +} + +static VkResult +virtgpu_sync_create(struct vn_renderer *renderer, + uint64_t initial_val, + uint32_t flags, + struct vn_renderer_sync **out_sync) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + + /* TODO */ + if (flags & VN_RENDERER_SYNC_SHAREABLE) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + /* always false because we don't use binary drm_syncobjs */ + const bool signaled = false; + const uint32_t syncobj_handle = sim_syncobj_create(gpu, signaled); + if (!syncobj_handle) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + /* add a signaled fence chain with seqno initial_val */ + const bool ret = sim_syncobj_signal(gpu, syncobj_handle, initial_val); + if (!ret) { + sim_syncobj_destroy(gpu, syncobj_handle); + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + + struct virtgpu_sync *sync = calloc(1, sizeof(*sync)); + if (!sync) { + sim_syncobj_destroy(gpu, syncobj_handle); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + sync->syncobj_handle = syncobj_handle; + /* we will have a sync_id when shareable is true and virtio-gpu associates + * a host sync object with guest drm_syncobj + */ + sync->base.sync_id = 0; + + *out_sync = &sync->base; + + return VK_SUCCESS; +} + +static void +virtgpu_shmem_destroy_now(struct vn_renderer *renderer, + struct vn_renderer_shmem *_shmem) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + struct virtgpu_shmem *shmem = (struct virtgpu_shmem *)_shmem; + + virtgpu_ioctl_unlock(gpu, shmem->alloc); + virtgpu_ioctl_destroy_map(gpu, shmem->alloc); + virtgpu_ioctl_resource_destroy_blob(gpu, shmem->alloc, shmem->kmt, + shmem->h); +} + +static void +virtgpu_shmem_destroy(struct vn_renderer *renderer, + struct vn_renderer_shmem *shmem) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + + if (vn_renderer_shmem_cache_add(&gpu->shmem_cache, shmem)) + return; + + virtgpu_shmem_destroy_now(&gpu->base, shmem); +} + +static struct vn_renderer_shmem * +virtgpu_shmem_create(struct vn_renderer *renderer, size_t size) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + + struct vn_renderer_shmem *cached_shmem = + vn_renderer_shmem_cache_get(&gpu->shmem_cache, size); + if (cached_shmem) { + cached_shmem->refcount = VN_REFCOUNT_INIT(1); + return cached_shmem; + } + + uint32_t res_id; + HANDLE h = NULL; /* This is a device allocation */ + D3DKMT_HANDLE alloc, kmt; + + NTSTATUS status = virtgpu_ioctl_resource_create_blob( + gpu, gpu->shmem_blob_mem, VIOGPU_BLOB_FLAG_USE_MAPPABLE, size, 0, + &res_id, &alloc, &kmt, NULL, &h); + if (!NT_SUCCESS(status)) + return NULL; + + void *ptr = NULL; + status = virtgpu_ioctl_lock(gpu, alloc, &ptr); + if (!NT_SUCCESS(status)) { + virtgpu_ioctl_resource_destroy_blob(gpu, alloc, kmt, h); + vn_log(gpu->instance, "failed to map blob resource: 0x%lx", status); + return NULL; + } + + struct virtgpu_shmem *shmem = + util_sparse_array_get(&gpu->shmem_array, alloc); + *shmem = (struct virtgpu_shmem){ + .base = { + .refcount = VN_REFCOUNT_INIT(1), + .res_id = res_id, + .mmap_size = size, + .mmap_ptr = ptr, + }, + .alloc = alloc, + }; + + if (gpu->ddicb != NULL) { + shmem->h = h; + } else { + shmem->kmt = kmt; + } + + return &shmem->base; +} + +static VkResult +virtgpu_wait(struct vn_renderer *renderer, + const struct vn_renderer_wait *wait) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + return sim_syncobj_wait(gpu, wait, false); +} + +static VkResult +virtgpu_submit(struct vn_renderer *renderer, + const struct vn_renderer_submit *submit) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + return sim_submit(gpu, submit); +} + +static void +virtgpu_init_renderer_info(struct virtgpu *gpu) +{ + struct vn_renderer_info *info = &gpu->base.info; + + info->pci.vendor_id = VIRTGPU_PCI_VENDOR_ID; + info->pci.device_id = VIRTGPU_PCI_DEVICE_ID; + + info->pci.has_bus_info = true; + info->pci.props = (VkPhysicalDevicePCIBusInfoPropertiesEXT){ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT, + .pciDomain = gpu->pci_bus_info.domain, + .pciBus = gpu->pci_bus_info.bus, + .pciDevice = gpu->pci_bus_info.dev, + .pciFunction = gpu->pci_bus_info.func, + }; + + info->has_dma_buf_import = true; + info->has_external_sync = true; + + info->has_implicit_fencing = false; + + const struct virgl_renderer_capset_venus *capset = &gpu->capset.data; + info->wire_format_version = capset->wire_format_version; + info->vk_xml_version = capset->vk_xml_version; + info->vk_ext_command_serialization_spec_version = + capset->vk_ext_command_serialization_spec_version; + info->vk_mesa_venus_protocol_spec_version = + capset->vk_mesa_venus_protocol_spec_version; + assert(capset->supports_blob_id_0); + + /* ensure vk_extension_mask is large enough to hold all capset masks */ + STATIC_ASSERT(sizeof(info->vk_extension_mask) >= + sizeof(capset->vk_extension_mask1)); + memcpy(info->vk_extension_mask, capset->vk_extension_mask1, + sizeof(capset->vk_extension_mask1)); + + assert(capset->allow_vk_wait_syncs); + + assert(capset->supports_multiple_timelines); + info->max_timeline_count = gpu->max_timeline_count; + + /* Use guest blob allocations from dedicated heap (Host visible memory) */ + //if (gpu->bo_blob_mem == VIOGPU_BLOB_MEM_HOST3D && capset->use_guest_vram) + // info->has_guest_vram = true; + info->has_guest_vram = false; + + if (gpu->ddicb != NULL) { + info->id.has_luid = true; + info->id.node_mask = 1; /* TODO D3D12 interop*/ + memcpy(info->id.luid, &gpu->ddicb->AdapterLuid, VK_LUID_SIZE); + } else { + info->id.has_luid = true; + info->id.node_mask = 1; /* TODO D3D12 interop*/ + static_assert(sizeof(gpu->d3dkmt.luid) == VK_LUID_SIZE); + memcpy(info->id.luid, &gpu->d3dkmt.luid, VK_LUID_SIZE); + } +} + +static NTSTATUS +virtgpu_ioctl_destroy_context(struct virtgpu *gpu) +{ + if (gpu->ddicb != NULL) { + D3DDDICB_DESTROYCONTEXT destroy = { + .hContext = gpu->ctx.h, + }; + return hr_to_nt(gpu, gpu->ddicb->pKTCallbacks->pfnDestroyContextCb( + gpu->ddicb->hRTDevice, &destroy)); + } else { + D3DKMT_DESTROYCONTEXT destroy = { + .hContext = gpu->ctx.kmt, + }; + return gpu->d3dkmt.cb.destroyContext(&destroy); + } +} + +static NTSTATUS +virtgpu_ioctl_destroy_device(struct virtgpu *gpu) +{ + D3DKMT_DESTROYDEVICE destroy = { + .hDevice = gpu->d3dkmt.device, + }; + return gpu->d3dkmt.cb.destroyDevice(&destroy); +} + +static NTSTATUS +virtgpu_ioctl_close_adapter(struct virtgpu *gpu) +{ + D3DKMT_CLOSEADAPTER close = { + .hAdapter = gpu->d3dkmt.adapter, + }; + return gpu->d3dkmt.cb.closeAdapter(&close); +} + +static void +virtgpu_destroy(struct vn_renderer *renderer, + const VkAllocationCallbacks *alloc) +{ + struct virtgpu *gpu = (struct virtgpu *)renderer; + + vn_renderer_shmem_cache_fini(&gpu->shmem_cache); + + if (gpu->ctx.h) + virtgpu_ioctl_destroy_context(gpu); + if (gpu->d3dkmt.device) + virtgpu_ioctl_destroy_device(gpu); + if (gpu->d3dkmt.adapter) + virtgpu_ioctl_close_adapter(gpu); + if (gpu->d3dkmt.lib) + FreeLibrary(gpu->d3dkmt.lib); + + mtx_destroy(&gpu->win32_handle_import_mutex); + mtx_destroy(&gpu->ctx.lock); + + util_sparse_array_finish(&gpu->shmem_array); + util_sparse_array_finish(&gpu->bo_array); + + vk_free(alloc, gpu); +} + +static VkResult +virtgpu_init(struct virtgpu *gpu, void *info) +{ + util_sparse_array_init(&gpu->syncobj_array, sizeof(struct virtgpu_sync), + 1024); + + util_sparse_array_init(&gpu->shmem_array, sizeof(struct virtgpu_shmem), + 1024); + util_sparse_array_init(&gpu->bo_array, sizeof(struct virtgpu_bo), 1024); + + mtx_init(&gpu->win32_handle_import_mutex, mtx_plain); + + mtx_init(&gpu->ctx.lock, mtx_plain); + + VkResult result = virtgpu_open(gpu, info); + if (result == VK_SUCCESS) + result = virtgpu_init_params(gpu); + if (result == VK_SUCCESS) + result = virtgpu_init_capset(gpu); + if (result == VK_SUCCESS) + result = virtgpu_init_context(gpu); + if (result != VK_SUCCESS) + return result; + + virtgpu_init_shmem_blob_mem(gpu); + + vn_renderer_shmem_cache_init(&gpu->shmem_cache, &gpu->base, + virtgpu_shmem_destroy_now); + + virtgpu_init_renderer_info(gpu); + + gpu->base.ops.destroy = virtgpu_destroy; + gpu->base.ops.submit = virtgpu_submit; + gpu->base.ops.wait = virtgpu_wait; + + gpu->base.shmem_ops.create = virtgpu_shmem_create; + gpu->base.shmem_ops.destroy = virtgpu_shmem_destroy; + + gpu->base.bo_ops.create_from_device_memory = + virtgpu_bo_create_from_device_memory; + gpu->base.bo_ops.destroy = virtgpu_bo_destroy; + gpu->base.bo_ops.create_from_handle = virtgpu_bo_create_from_handle; + gpu->base.bo_ops.export_handle = virtgpu_bo_export_handle; + gpu->base.bo_ops.map = virtgpu_bo_map; + gpu->base.bo_ops.flush = virtgpu_bo_flush; + gpu->base.bo_ops.invalidate = virtgpu_bo_invalidate; + + gpu->base.sync_ops.create = virtgpu_sync_create; + gpu->base.sync_ops.create_from_handle = virtgpu_sync_create_from_handle; + gpu->base.sync_ops.destroy = virtgpu_sync_destroy; + gpu->base.sync_ops.export_handle = virtgpu_sync_export_handle; + gpu->base.sync_ops.reset = virtgpu_sync_reset; + gpu->base.sync_ops.read = virtgpu_sync_read; + gpu->base.sync_ops.write = virtgpu_sync_write; + return VK_SUCCESS; +} + +static void +sim_init_mutex(void) +{ + mtx_init(&sim.mutex, mtx_plain); +} + +VkResult +vn_renderer_create_virtgpu_win32(struct vn_instance *instance, + const VkAllocationCallbacks *alloc, + const VkInstanceCreateInfo *pCreateInfo, + struct vn_renderer **renderer) +{ + struct virtgpu *gpu = vk_zalloc(alloc, sizeof(*gpu), VN_DEFAULT_ALIGN, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!gpu) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + call_once(&sim.init, sim_init_mutex); + + gpu->instance = instance; + + VkResult result = virtgpu_init(gpu, (void *) pCreateInfo->pNext); + if (result != VK_SUCCESS) { + virtgpu_destroy(&gpu->base, alloc); + return result; + } + + *renderer = &gpu->base; + + return VK_SUCCESS; +} diff --git a/src/virtio/vulkan/vn_renderer_vtest.c b/src/virtio/vulkan/vn_renderer_vtest.c index 5fad0b02f84..274ce5f5a8c 100644 --- a/src/virtio/vulkan/vn_renderer_vtest.c +++ b/src/virtio/vulkan/vn_renderer_vtest.c @@ -750,6 +750,7 @@ vtest_bo_create_from_device_memory( vn_object_id mem_id, VkMemoryPropertyFlags flags, VkExternalMemoryHandleTypeFlags external_handles, + const VkMemoryAllocateInfo *alloc_info, struct vn_renderer_bo **out_bo) { struct vtest *vtest = (struct vtest *)renderer; diff --git a/src/virtio/vulkan/vn_wsi.c b/src/virtio/vulkan/vn_wsi.c index 45b4cd1e515..ee2cde70056 100644 --- a/src/virtio/vulkan/vn_wsi.c +++ b/src/virtio/vulkan/vn_wsi.c @@ -19,6 +19,10 @@ #include "vn_physical_device.h" #include "vn_queue.h" +#ifdef VK_USE_PLATFORM_WIN32_KHR +#include +#endif + #ifndef DRM_FORMAT_MOD_LINEAR #define DRM_FORMAT_MOD_LINEAR 0 #endif @@ -251,6 +255,7 @@ vn_wsi_memory_info_init(struct vn_device_memory *mem, } } +#ifndef VK_USE_PLATFORM_WIN32_KHR static uint32_t vn_modifier_plane_count(struct vn_physical_device *physical_dev, VkFormat format, @@ -292,11 +297,13 @@ vn_modifier_plane_count(struct vn_physical_device *physical_dev, STACK_ARRAY_FINISH(modifier_props); return plane_count; } +#endif bool vn_wsi_validate_image_format_info(struct vn_physical_device *physical_dev, const VkPhysicalDeviceImageFormatInfo2 *info) { +#ifndef VK_USE_PLATFORM_WIN32_KHR const VkPhysicalDeviceImageDrmFormatModifierInfoEXT *modifier_info = vk_find_struct_const( info->pNext, PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT); @@ -347,7 +354,7 @@ vn_wsi_validate_image_format_info(struct vn_physical_device *physical_dev, return false; } } - +#endif return true; } @@ -407,6 +414,7 @@ vn_wsi_fence_wait(struct vn_device *dev, struct vn_queue *queue) return vn_ResetFences(dev_handle, 1, &queue->async_present.fence); } +#ifndef VK_USE_PLATFORM_WIN32_KHR void vn_wsi_sync_wait(struct vn_device *dev, int fd) { @@ -435,6 +443,36 @@ vn_wsi_sync_wait(struct vn_device *dev, int fd) simple_mtx_lock(&queue->async_present.queue_mutex); } } +#else +void +vn_wsi_sync_wait_handle(struct vn_device *dev, void *handle) +{ + if (dev->renderer->info.has_implicit_fencing) + return; + + const pid_t tid = vn_gettid(); + struct vn_queue *queue = NULL; + for (uint32_t i = 0; i < dev->queue_count; i++) { + if (dev->queues[i].async_present.initialized && + dev->queues[i].async_present.tid == tid) { + queue = &dev->queues[i]; + break; + } + } + + if (queue) { + simple_mtx_unlock(&queue->async_present.queue_mutex); + vn_wsi_chains_unlock(dev, queue->async_present.info, /*all=*/false); + } + + WaitForSingleObject(handle, INFINITE); + + if (queue) { + vn_wsi_chains_lock(dev, queue->async_present.info, /*all=*/false); + simple_mtx_lock(&queue->async_present.queue_mutex); + } +} +#endif void vn_wsi_flush(struct vn_queue *queue) @@ -853,6 +891,33 @@ vn_AcquireNextImage2KHR(VkDevice device, if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR) return vn_error(dev->instance, result); +#ifdef VK_USE_PLATFORM_WIN32_KHR + /* XXX this relies on renderer side doing implicit fencing */ + if (pAcquireInfo->semaphore != VK_NULL_HANDLE) { + const VkImportSemaphoreWin32HandleInfoKHR info = { + .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR, + .semaphore = pAcquireInfo->semaphore, + .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT, + .handle = CreateEventA(NULL, TRUE, TRUE, NULL), + }; + //vn_log(dev->instance, "created handle %p", info.handle); + result = vn_ImportSemaphoreWin32HandleKHR(device, &info); + } + + if (result == VK_SUCCESS && pAcquireInfo->fence != VK_NULL_HANDLE) { + const VkImportFenceWin32HandleInfoKHR info = { + .sType = VK_STRUCTURE_TYPE_IMPORT_FENCE_WIN32_HANDLE_INFO_KHR, + .fence = pAcquireInfo->fence, + .flags = VK_FENCE_IMPORT_TEMPORARY_BIT, + .handleType = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_WIN32_BIT, + .handle = CreateEventA(NULL, TRUE, TRUE, NULL), + }; + //vn_log(dev->instance, "created handle %p", info.handle); + result = vn_ImportFenceWin32HandleKHR(device, &info); + } + +#else int sync_fd = -1; if (!dev->renderer->info.has_implicit_fencing) { VkDeviceMemory mem_handle = @@ -932,6 +997,8 @@ out: close(sem_fd); if (fence_fd >= 0) close(fence_fd); +#endif + return vn_result(dev->instance, result); } diff --git a/src/virtio/vulkan/vn_wsi.h b/src/virtio/vulkan/vn_wsi.h index 7edf6affc7b..714bf033627 100644 --- a/src/virtio/vulkan/vn_wsi.h +++ b/src/virtio/vulkan/vn_wsi.h @@ -42,8 +42,13 @@ vn_wsi_validate_image_format_info( VkResult vn_wsi_fence_wait(struct vn_device *dev, struct vn_queue *queue); +#ifndef VK_USE_PLATFORM_WIN32_KHR void vn_wsi_sync_wait(struct vn_device *dev, int fd); +#else +void +vn_wsi_sync_wait_handle(struct vn_device *dev, void *handle); +#endif void vn_wsi_flush(struct vn_queue *queue); @@ -91,7 +96,11 @@ vn_wsi_fence_wait(struct vn_device *dev, struct vn_queue *queue) } static inline void +#ifndef VK_USE_PLATFORM_WIN32_KHR vn_wsi_sync_wait(struct vn_device *dev, int fd) +#else +vn_wsi_sync_wait_handle(struct vn_device *dev, void *handle) +#endif { return; } diff --git a/src/vulkan/runtime/vk_device.c b/src/vulkan/runtime/vk_device.c index a2ffe734ff9..34092431fdd 100644 --- a/src/vulkan/runtime/vk_device.c +++ b/src/vulkan/runtime/vk_device.c @@ -248,6 +248,7 @@ vk_device_init(struct vk_device *device, device->enabled_extensions.EXT_calibrated_timestamps) { /* sorted by preference */ const VkTimeDomainKHR calibrate_domains[] = { + VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_KHR, VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR, VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR, }; @@ -840,6 +841,14 @@ vk_device_get_timestamp(struct vk_device *device, VkTimeDomainKHR domain, return VK_SUCCESS; fail: +#else + if (domain == VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_KHR) { + LARGE_INTEGER ts; + if (QueryPerformanceCounter(&ts)) { + *timestamp = ts.QuadPart; + return VK_SUCCESS; + } + } #endif /* _WIN32 */ return VK_ERROR_FEATURE_NOT_PRESENT; } diff --git a/src/vulkan/runtime/vk_physical_device.c b/src/vulkan/runtime/vk_physical_device.c index ad11a03810a..83f358e112c 100644 --- a/src/vulkan/runtime/vk_physical_device.c +++ b/src/vulkan/runtime/vk_physical_device.c @@ -297,6 +297,7 @@ vk_common_GetPhysicalDeviceCalibrateableTimeDomainsKHR( const VkTimeDomainKHR host_time_domains[] = { VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR, VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR, + VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_KHR, }; for (uint32_t i = 0; i < ARRAY_SIZE(host_time_domains); i++) { const VkTimeDomainKHR domain = host_time_domains[i]; diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c index 44367b6b80b..10669f8dfba 100644 --- a/src/vulkan/wsi/wsi_common.c +++ b/src/vulkan/wsi/wsi_common.c @@ -3257,7 +3257,8 @@ wsi_WaitForPresentKHR(VkDevice device, VkSwapchainKHR _swapchain, uint64_t presentId, uint64_t timeout) { VK_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain); - assert(swapchain->wait_for_present); + //assert(swapchain->wait_for_present); + if (!swapchain->wait_for_present) return VK_SUCCESS; return swapchain->wait_for_present(swapchain, presentId, timeout); } @@ -3266,7 +3267,8 @@ wsi_WaitForPresent2KHR(VkDevice device, VkSwapchainKHR _swapchain, const VkPresentWait2InfoKHR *info) { VK_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain); - assert(swapchain->wait_for_present2); + //assert(swapchain->wait_for_present2); + if (!swapchain->wait_for_present2) return VK_SUCCESS; return swapchain->wait_for_present2(swapchain, info->presentId, info->timeout); }