From 33ce3040e613bde4a20c1bccd613c94a3d6ac1bb Mon Sep 17 00:00:00 2001 From: squidbus <1249084-squidbus@users.noreply.gitlab.freedesktop.org> Date: Thu, 14 May 2026 21:45:03 -0700 Subject: [PATCH] kk: Support VK_EXT_host_image_copy Metal provides straightforward ways to copy an image to/from memory, and image-to-image copies can be implemented by chaining them. Note that host copy of combined depth-stencil is not supported, as Metal does not allow CPU copy for these formats. Additionally, GPU optimized contents are not allowed with host image copy usage; CTS directly initializes the raw memory of optimized images to random invalid data, which appears to decompress differently on GPU vs CPU and fail. Reviewed-by: Aitor Camacho Part-of: --- docs/features.txt | 2 +- src/kosmickrisp/bridge/mtl_texture.h | 6 + src/kosmickrisp/bridge/mtl_texture.m | 33 +++ src/kosmickrisp/bridge/mtl_types.h | 9 + src/kosmickrisp/bridge/stubs/mtl_texture.c | 12 + src/kosmickrisp/vulkan/kk_cmd_copy.c | 7 - src/kosmickrisp/vulkan/kk_image.c | 231 +++++++++++++++++++- src/kosmickrisp/vulkan/kk_image.h | 7 + src/kosmickrisp/vulkan/kk_image_layout.c | 13 +- src/kosmickrisp/vulkan/kk_image_layout.h | 4 +- src/kosmickrisp/vulkan/kk_physical_device.c | 5 +- 11 files changed, 313 insertions(+), 16 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index de68f4ca7e6..f14bcbdac69 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -532,7 +532,7 @@ Vulkan 1.4 -- all DONE: anv, hk, lvp, nvk, panvk/v10+, radv/gfx8+, tu/a7xx+, vn VK_KHR_shader_float_controls2 DONE (anv, lvp, nvk, panvk/v10+, radv, tu, vn) VK_KHR_shader_subgroup_rotate DONE (anv, kk, lvp, nvk, panvk, radv, tu, vn) VK_KHR_vertex_attribute_divisor DONE (anv, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn) - VK_EXT_host_image_copy DONE (anv, lvp, nvk/Turing+, panvk, radv/gfx10+, tu, vn) + VK_EXT_host_image_copy DONE (anv, kk, lvp, nvk/Turing+, panvk, radv/gfx10+, tu, vn) VK_EXT_pipeline_protected_access DONE (anv/gfx12+, radv, vn) VK_EXT_pipeline_robustness DONE (anv, kk, lvp, nvk, panvk, radv, v3dv, tu, vn) diff --git a/src/kosmickrisp/bridge/mtl_texture.h b/src/kosmickrisp/bridge/mtl_texture.h index ab1de341b01..b32170b226b 100644 --- a/src/kosmickrisp/bridge/mtl_texture.h +++ b/src/kosmickrisp/bridge/mtl_texture.h @@ -24,4 +24,10 @@ mtl_texture * mtl_new_texture_view_with_no_swizzle(mtl_texture *texture, const struct kk_view_layout *layout); +void mtl_texture_get_bytes(mtl_texture *texture, void *host_ptr, + struct mtl_texture_memory_copy *data); + +void mtl_texture_replace_region(mtl_texture *texture, const void *host_ptr, + struct mtl_texture_memory_copy *data); + #endif /* MTL_TEXTURE_H */ diff --git a/src/kosmickrisp/bridge/mtl_texture.m b/src/kosmickrisp/bridge/mtl_texture.m index 5042e7f2926..496ce143324 100644 --- a/src/kosmickrisp/bridge/mtl_texture.m +++ b/src/kosmickrisp/bridge/mtl_texture.m @@ -92,3 +92,36 @@ mtl_new_texture_view_with_no_swizzle(mtl_texture *texture, const struct kk_view_ } } +void +mtl_texture_get_bytes(mtl_texture *texture, void *host_ptr, + struct mtl_texture_memory_copy *data) +{ + @autoreleasepool { + id tex = (id)texture; + MTLRegion region = MTLRegionMake3D(data->image_origin.x, data->image_origin.y, data->image_origin.z, + data->image_size.x, data->image_size.y, data->image_size.z); + return [tex getBytes:host_ptr + bytesPerRow:data->buffer_stride_B + bytesPerImage:data->buffer_2d_image_size_B + fromRegion:region + mipmapLevel:data->image_level + slice:data->image_slice]; + } +} + +void +mtl_texture_replace_region(mtl_texture *texture, const void *host_ptr, + struct mtl_texture_memory_copy *data) +{ + @autoreleasepool { + id tex = (id)texture; + MTLRegion region = MTLRegionMake3D(data->image_origin.x, data->image_origin.y, data->image_origin.z, + data->image_size.x, data->image_size.y, data->image_size.z); + return [tex replaceRegion:region + mipmapLevel:data->image_level + slice:data->image_slice + withBytes:host_ptr + bytesPerRow:data->buffer_stride_B + bytesPerImage:data->buffer_2d_image_size_B]; + } +} diff --git a/src/kosmickrisp/bridge/mtl_types.h b/src/kosmickrisp/bridge/mtl_types.h index 600cb0db992..aa4eedf7760 100644 --- a/src/kosmickrisp/bridge/mtl_types.h +++ b/src/kosmickrisp/bridge/mtl_types.h @@ -275,4 +275,13 @@ struct mtl_buffer_image_copy { enum mtl_blit_options options; }; +struct mtl_texture_memory_copy { + struct mtl_size image_size; + struct mtl_origin image_origin; + size_t buffer_stride_B; + size_t buffer_2d_image_size_B; + size_t image_slice; + size_t image_level; +}; + #endif /* KK_MTL_TYPES_H */ diff --git a/src/kosmickrisp/bridge/stubs/mtl_texture.c b/src/kosmickrisp/bridge/stubs/mtl_texture.c index 8ed3b22ec95..f71db0c699e 100644 --- a/src/kosmickrisp/bridge/stubs/mtl_texture.c +++ b/src/kosmickrisp/bridge/stubs/mtl_texture.c @@ -27,3 +27,15 @@ mtl_new_texture_view_with_no_swizzle(mtl_texture *texture, { return NULL; } + +void +mtl_texture_get_bytes(mtl_texture *texture, void *host_ptr, + struct mtl_texture_memory_copy *data) +{ +} + +void +mtl_texture_replace_region(mtl_texture *texture, const void *host_ptr, + struct mtl_texture_memory_copy *data) +{ +} diff --git a/src/kosmickrisp/vulkan/kk_cmd_copy.c b/src/kosmickrisp/vulkan/kk_cmd_copy.c index 3ebb65c60bd..4a57d272a8a 100644 --- a/src/kosmickrisp/vulkan/kk_cmd_copy.c +++ b/src/kosmickrisp/vulkan/kk_cmd_copy.c @@ -83,13 +83,6 @@ vk_buffer_image_copy_to_mtl_buffer_image_copy( return copy; } -#define kk_foreach_slice(ndx, image, subresource_member) \ - for (uint32_t ndx = region->subresource_member.baseArrayLayer; \ - ndx < (region->subresource_member.baseArrayLayer + \ - vk_image_subresource_layer_count(&image->vk, \ - ®ion->subresource_member)); \ - ++ndx) - VKAPI_ATTR void VKAPI_CALL kk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo) diff --git a/src/kosmickrisp/vulkan/kk_image.c b/src/kosmickrisp/vulkan/kk_image.c index 08bab315697..0ccf08dd470 100644 --- a/src/kosmickrisp/vulkan/kk_image.c +++ b/src/kosmickrisp/vulkan/kk_image.c @@ -14,6 +14,7 @@ #include "kk_physical_device.h" #include "kosmickrisp/bridge/mtl_bridge.h" +#include "kosmickrisp/bridge/vk_to_mtl_map.h" #include "vk_enum_defines.h" #include "vk_enum_to_str.h" @@ -91,6 +92,10 @@ kk_get_image_plane_format_features(struct kk_physical_device *pdev, if (features != 0) { features |= VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT; features |= VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT; + + /* Metal does not allow CPU access to combined depth-stencil */ + if (!util_format_is_depth_and_stencil(p_format)) + features |= VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT; } return features; @@ -230,8 +235,11 @@ kk_GetPhysicalDeviceImageFormatProperties2( pImageFormatInfo->type == VK_IMAGE_TYPE_3D) return VK_ERROR_FORMAT_NOT_SUPPORTED; + const enum pipe_format p_format = + vk_format_to_pipe_format(pImageFormatInfo->format); + /* Metal does not support EAC/ETC formats for 3D textures. */ - if (util_format_is_etc(vk_format_to_pipe_format(pImageFormatInfo->format)) && + if (util_format_is_etc(p_format) && pImageFormatInfo->type == VK_IMAGE_TYPE_3D) return VK_ERROR_FORMAT_NOT_SUPPORTED; @@ -484,8 +492,16 @@ kk_GetPhysicalDeviceImageFormatProperties2( } case VK_STRUCTURE_TYPE_HOST_IMAGE_COPY_DEVICE_PERFORMANCE_QUERY_EXT: { VkHostImageCopyDevicePerformanceQueryEXT *host_props = (void *)s; - host_props->optimalDeviceAccess = true; - host_props->identicalMemoryLayout = true; + /* Optimal device access and identical memory layout if optimization + * is the same both with and without host transfer usage */ + bool with_host_transfer = kk_image_layout_can_optimize( + pImageFormatInfo->usage, pImageFormatInfo->tiling, p_format); + bool without_host_transfer = kk_image_layout_can_optimize( + pImageFormatInfo->usage & ~VK_IMAGE_USAGE_HOST_TRANSFER_BIT, + pImageFormatInfo->tiling, p_format); + host_props->optimalDeviceAccess = + with_host_transfer == without_host_transfer; + host_props->identicalMemoryLayout = host_props->optimalDeviceAccess; break; } default: @@ -778,6 +794,12 @@ kk_get_image_subresource_layout(struct kk_device *dev, struct kk_image *image, .arrayPitch = plane->layout.layer_stride_B, .depthPitch = 1u, }; + + VkSubresourceHostMemcpySize *memcpy_size = + vk_find_struct(pLayout, SUBRESOURCE_HOST_MEMCPY_SIZE_EXT); + if (memcpy_size) { + memcpy_size->size = pLayout->subresourceLayout.size; + } } VKAPI_ATTR void VKAPI_CALL @@ -935,3 +957,206 @@ kk_GetImageOpaqueCaptureDescriptorDataEXT( { return VK_SUCCESS; } + +struct kk_host_copy_info { + struct mtl_texture_memory_copy mtl_data; + size_t buffer_slice_size_B; +}; + +static struct kk_host_copy_info +vk_image_to_memory_copy_to_mtl_texture_memory_copy( + const VkImageToMemoryCopy *region, const struct kk_image_plane *plane) +{ + struct kk_host_copy_info copy; + enum pipe_format p_format = plane->layout.format.pipe; + if (region->imageSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) + p_format = util_format_get_depth_only(p_format); + else if (region->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) + p_format = PIPE_FORMAT_S8_UINT; + + const uint32_t buffer_width = region->memoryRowLength + ? region->memoryRowLength + : region->imageExtent.width; + const uint32_t buffer_height = region->memoryImageHeight + ? region->memoryImageHeight + : region->imageExtent.height; + + const uint32_t buffer_stride_B = + util_format_get_stride(p_format, buffer_width); + const uint32_t buffer_size_2d_B = + util_format_get_2d_size(p_format, buffer_stride_B, buffer_height); + + /* Metal requires this value to be 0 for 2D images, otherwise the number of + * bytes between each 2D image of a 3D texture */ + copy.mtl_data.buffer_2d_image_size_B = + plane->layout.depth_px == 1u ? 0u : buffer_size_2d_B; + copy.mtl_data.buffer_stride_B = buffer_stride_B; + copy.mtl_data.image_size = vk_extent_3d_to_mtl_size(®ion->imageExtent); + copy.mtl_data.image_origin = + vk_offset_3d_to_mtl_origin(®ion->imageOffset); + copy.mtl_data.image_level = region->imageSubresource.mipLevel; + copy.buffer_slice_size_B = buffer_size_2d_B * region->imageExtent.depth; + + return copy; +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_CopyImageToMemory(UNUSED VkDevice device, + const VkCopyImageToMemoryInfo *pCopyImageToMemoryInfo) +{ + VK_FROM_HANDLE(kk_image, image, pCopyImageToMemoryInfo->srcImage); + + for (unsigned r = 0; r < pCopyImageToMemoryInfo->regionCount; r++) { + const VkImageToMemoryCopy *region = &pCopyImageToMemoryInfo->pRegions[r]; + + const uint8_t plane_index = kk_image_memory_aspects_to_plane( + image, region->imageSubresource.aspectMask); + struct kk_image_plane *plane = &image->planes[plane_index]; + + struct kk_host_copy_info info = + vk_image_to_memory_copy_to_mtl_texture_memory_copy(region, plane); + + uint8_t *host_ptr = region->pHostPointer; + kk_foreach_slice(slice, image, imageSubresource) + { + info.mtl_data.image_slice = slice; + mtl_texture_get_bytes(plane->mtl_handle, host_ptr, &info.mtl_data); + host_ptr += info.buffer_slice_size_B; + } + } + + return VK_SUCCESS; +} + +static struct kk_host_copy_info +vk_memory_to_image_copy_to_mtl_texture_memory_copy( + const VkMemoryToImageCopy *region, const struct kk_image_plane *plane) +{ + /* Prevent code duplication by mapping between structures */ + const VkImageToMemoryCopy mapped = { + .memoryRowLength = region->memoryRowLength, + .memoryImageHeight = region->memoryImageHeight, + .imageSubresource = region->imageSubresource, + .imageOffset = region->imageOffset, + .imageExtent = region->imageExtent, + }; + return vk_image_to_memory_copy_to_mtl_texture_memory_copy(&mapped, plane); +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_CopyMemoryToImage(UNUSED VkDevice device, + const VkCopyMemoryToImageInfo *pCopyMemoryToImageInfo) +{ + VK_FROM_HANDLE(kk_image, image, pCopyMemoryToImageInfo->dstImage); + + for (int r = 0; r < pCopyMemoryToImageInfo->regionCount; r++) { + const VkMemoryToImageCopy *region = &pCopyMemoryToImageInfo->pRegions[r]; + + const uint8_t plane_index = kk_image_memory_aspects_to_plane( + image, region->imageSubresource.aspectMask); + struct kk_image_plane *plane = &image->planes[plane_index]; + + struct kk_host_copy_info info = + vk_memory_to_image_copy_to_mtl_texture_memory_copy(region, plane); + + const uint8_t *host_ptr = region->pHostPointer; + kk_foreach_slice(slice, image, imageSubresource) + { + info.mtl_data.image_slice = slice; + mtl_texture_replace_region(plane->mtl_handle, host_ptr, + &info.mtl_data); + host_ptr += info.buffer_slice_size_B; + } + } + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_CopyImageToImage(VkDevice device, + const VkCopyImageToImageInfo *pCopyImageToImageInfo) +{ + VK_FROM_HANDLE(kk_image, src, pCopyImageToImageInfo->srcImage); + + /* Determine the buffer size required to satisfy all copies */ + uint64_t buffer_size = 0; + for (uint32_t i = 0u; i < pCopyImageToImageInfo->regionCount; i++) { + const VkImageCopy2 *region = &pCopyImageToImageInfo->pRegions[i]; + + uint8_t src_index = + kk_image_aspects_to_plane(src, region->srcSubresource.aspectMask); + struct kk_image_plane *src_plane = &src->planes[src_index]; + + buffer_size = MAX2(buffer_size, src_plane->layout.size_B); + } + + /* Metal does not provide a direct image-to-image host copy, so we implement + * host image-to-image copy using CopyImageToMemory and CopyMemoryToImage */ + uint8_t *temp = ralloc_size(NULL, buffer_size); + if (!temp) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + VkResult result = VK_SUCCESS; + + for (uint32_t i = 0u; i < pCopyImageToImageInfo->regionCount; ++i) { + const VkImageCopy2 *region = &pCopyImageToImageInfo->pRegions[i]; + + VkImageToMemoryCopy src_copy = { + .sType = VK_STRUCTURE_TYPE_IMAGE_TO_MEMORY_COPY, + .pNext = NULL, + .pHostPointer = temp, + .memoryRowLength = 0, + .memoryImageHeight = 0, + .imageSubresource = region->srcSubresource, + .imageOffset = region->srcOffset, + .imageExtent = region->extent, + }; + VkCopyImageToMemoryInfo src_copy_info = { + .sType = VK_STRUCTURE_TYPE_COPY_IMAGE_TO_MEMORY_INFO, + .pNext = NULL, + .flags = pCopyImageToImageInfo->flags, + .srcImage = pCopyImageToImageInfo->srcImage, + .srcImageLayout = pCopyImageToImageInfo->srcImageLayout, + .regionCount = 1, + .pRegions = &src_copy, + }; + result = kk_CopyImageToMemory(device, &src_copy_info); + if (result != VK_SUCCESS) + break; + + VkMemoryToImageCopy dst_copy = { + .sType = VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY, + .pNext = NULL, + .pHostPointer = temp, + .memoryRowLength = 0, + .memoryImageHeight = 0, + .imageSubresource = region->dstSubresource, + .imageOffset = region->dstOffset, + .imageExtent = region->extent, + }; + VkCopyMemoryToImageInfo dst_copy_info = { + .sType = VK_STRUCTURE_TYPE_COPY_MEMORY_TO_IMAGE_INFO, + .pNext = NULL, + .flags = pCopyImageToImageInfo->flags, + .dstImage = pCopyImageToImageInfo->dstImage, + .dstImageLayout = pCopyImageToImageInfo->dstImageLayout, + .regionCount = 1, + .pRegions = &dst_copy, + }; + result = kk_CopyMemoryToImage(device, &dst_copy_info); + if (result != VK_SUCCESS) + break; + } + + ralloc_free(temp); + return result; +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_TransitionImageLayoutEXT( + UNUSED VkDevice device, UNUSED uint32_t transitionCount, + UNUSED const VkHostImageLayoutTransitionInfoEXT *transitions) +{ + /* We don't do anything with layouts so this should be a no-op */ + return VK_SUCCESS; +} diff --git a/src/kosmickrisp/vulkan/kk_image.h b/src/kosmickrisp/vulkan/kk_image.h index f1980effc9f..83a8bb9c63d 100644 --- a/src/kosmickrisp/vulkan/kk_image.h +++ b/src/kosmickrisp/vulkan/kk_image.h @@ -51,6 +51,13 @@ struct kk_image { struct kk_image_plane planes[3]; }; +#define kk_foreach_slice(ndx, image, subresource_member) \ + for (uint32_t ndx = region->subresource_member.baseArrayLayer; \ + ndx < (region->subresource_member.baseArrayLayer + \ + vk_image_subresource_layer_count(&image->vk, \ + ®ion->subresource_member)); \ + ++ndx) + static inline mtl_resource * kk_image_to_mtl_resource(const struct kk_image *image, int plane) { diff --git a/src/kosmickrisp/vulkan/kk_image_layout.c b/src/kosmickrisp/vulkan/kk_image_layout.c index 2c71eb009bc..5b7b0a723f2 100644 --- a/src/kosmickrisp/vulkan/kk_image_layout.c +++ b/src/kosmickrisp/vulkan/kk_image_layout.c @@ -77,12 +77,21 @@ vk_image_usage_flags_to_mtl_texture_usage(VkImageUsageFlags usage_flags, } bool -kk_image_layout_can_optimize(VkImageUsageFlags usage, VkImageTiling tiling) +kk_image_layout_can_optimize(VkImageUsageFlags usage, VkImageTiling tiling, + enum pipe_format format) { /* Can only optimize if tiling is optimal */ if (tiling != VK_IMAGE_TILING_OPTIMAL) return false; + /* Cannot optimize if host transfer for a format that would use Apple's + * lossless compression. Otherwise, CTS tests which populate memory with + * random data fail due to differences in how invalid optimized data is + * decompressed by GPU vs CPU. */ + if ((usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT) && + !util_format_is_compressed(format)) + return false; + return true; } @@ -101,7 +110,7 @@ kk_image_layout_init(const struct kk_device *dev, layout->levels = image->mip_levels; layout->linear = image->tiling != VK_IMAGE_TILING_OPTIMAL; layout->optimized_layout = kk_image_layout_can_optimize( - image->usage, image->tiling); + image->usage, image->tiling, format); layout->usage = vk_image_usage_flags_to_mtl_texture_usage( image->usage, image->create_flags, supported_format->atomic); layout->format.pipe = format; diff --git a/src/kosmickrisp/vulkan/kk_image_layout.h b/src/kosmickrisp/vulkan/kk_image_layout.h index c32fe791829..43bdd8043ee 100644 --- a/src/kosmickrisp/vulkan/kk_image_layout.h +++ b/src/kosmickrisp/vulkan/kk_image_layout.h @@ -137,8 +137,8 @@ struct kk_view_layout { uint16_t min_lod_clamp; }; -bool kk_image_layout_can_optimize(VkImageUsageFlags usage, - VkImageTiling tiling); +bool kk_image_layout_can_optimize(VkImageUsageFlags usage, VkImageTiling tiling, + enum pipe_format format); void kk_image_layout_init(const struct kk_device *dev, const struct vk_image *image, diff --git a/src/kosmickrisp/vulkan/kk_physical_device.c b/src/kosmickrisp/vulkan/kk_physical_device.c index d67b5034308..10f7d3ae284 100644 --- a/src/kosmickrisp/vulkan/kk_physical_device.c +++ b/src/kosmickrisp/vulkan/kk_physical_device.c @@ -129,6 +129,7 @@ kk_get_device_extensions(const struct kk_instance *instance, .KHR_vertex_attribute_divisor = true, .EXT_global_priority = true, .EXT_global_priority_query = true, + .EXT_host_image_copy = true, .EXT_index_type_uint8 = true, .EXT_line_rasterization = true, .EXT_pipeline_robustness = true, @@ -302,6 +303,7 @@ kk_get_device_features( /* Vulkan 1.4 */ .bresenhamLines = true, .globalPriorityQuery = true, + .hostImageCopy = true, .indexTypeUint8 = true, .maintenance5 = true, .maintenance6 = true, @@ -778,7 +780,8 @@ kk_get_device_properties(const struct kk_physical_device *pdev, memcpy(properties->optimalTilingLayoutUUID, instance->driver_build_sha, VK_UUID_SIZE); - properties->identicalMemoryTypeRequirements = false; + /* We're a UMR so we can always map every kind of memory */ + properties->identicalMemoryTypeRequirements = true; /* VK_EXT_shader_module_identifier */ STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==