kk: Support VK_EXT_host_image_copy

Metal provides straightforward ways to copy an image to/from memory,
and image-to-image copies can be implemented by chaining them.

Note that host copy of combined depth-stencil is not supported, as
Metal does not allow CPU copy for these formats. Additionally, GPU
optimized contents are not allowed with host image copy usage; CTS
directly initializes the raw memory of optimized images to random
invalid data, which appears to decompress differently on GPU vs CPU
and fail.

Reviewed-by: Aitor Camacho <aitor@lunarg.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41714>
This commit is contained in:
squidbus 2026-05-14 21:45:03 -07:00
parent 76125cb7af
commit 33ce3040e6
11 changed files with 313 additions and 16 deletions

View file

@ -532,7 +532,7 @@ Vulkan 1.4 -- all DONE: anv, hk, lvp, nvk, panvk/v10+, radv/gfx8+, tu/a7xx+, vn
VK_KHR_shader_float_controls2 DONE (anv, lvp, nvk, panvk/v10+, radv, tu, vn)
VK_KHR_shader_subgroup_rotate DONE (anv, kk, lvp, nvk, panvk, radv, tu, vn)
VK_KHR_vertex_attribute_divisor DONE (anv, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn)
VK_EXT_host_image_copy DONE (anv, lvp, nvk/Turing+, panvk, radv/gfx10+, tu, vn)
VK_EXT_host_image_copy DONE (anv, kk, lvp, nvk/Turing+, panvk, radv/gfx10+, tu, vn)
VK_EXT_pipeline_protected_access DONE (anv/gfx12+, radv, vn)
VK_EXT_pipeline_robustness DONE (anv, kk, lvp, nvk, panvk, radv, v3dv, tu, vn)

View file

@ -24,4 +24,10 @@ mtl_texture *
mtl_new_texture_view_with_no_swizzle(mtl_texture *texture,
const struct kk_view_layout *layout);
void mtl_texture_get_bytes(mtl_texture *texture, void *host_ptr,
struct mtl_texture_memory_copy *data);
void mtl_texture_replace_region(mtl_texture *texture, const void *host_ptr,
struct mtl_texture_memory_copy *data);
#endif /* MTL_TEXTURE_H */

View file

@ -92,3 +92,36 @@ mtl_new_texture_view_with_no_swizzle(mtl_texture *texture, const struct kk_view_
}
}
void
mtl_texture_get_bytes(mtl_texture *texture, void *host_ptr,
struct mtl_texture_memory_copy *data)
{
@autoreleasepool {
id<MTLTexture> tex = (id<MTLTexture>)texture;
MTLRegion region = MTLRegionMake3D(data->image_origin.x, data->image_origin.y, data->image_origin.z,
data->image_size.x, data->image_size.y, data->image_size.z);
return [tex getBytes:host_ptr
bytesPerRow:data->buffer_stride_B
bytesPerImage:data->buffer_2d_image_size_B
fromRegion:region
mipmapLevel:data->image_level
slice:data->image_slice];
}
}
void
mtl_texture_replace_region(mtl_texture *texture, const void *host_ptr,
struct mtl_texture_memory_copy *data)
{
@autoreleasepool {
id<MTLTexture> tex = (id<MTLTexture>)texture;
MTLRegion region = MTLRegionMake3D(data->image_origin.x, data->image_origin.y, data->image_origin.z,
data->image_size.x, data->image_size.y, data->image_size.z);
return [tex replaceRegion:region
mipmapLevel:data->image_level
slice:data->image_slice
withBytes:host_ptr
bytesPerRow:data->buffer_stride_B
bytesPerImage:data->buffer_2d_image_size_B];
}
}

View file

@ -275,4 +275,13 @@ struct mtl_buffer_image_copy {
enum mtl_blit_options options;
};
struct mtl_texture_memory_copy {
struct mtl_size image_size;
struct mtl_origin image_origin;
size_t buffer_stride_B;
size_t buffer_2d_image_size_B;
size_t image_slice;
size_t image_level;
};
#endif /* KK_MTL_TYPES_H */

View file

@ -27,3 +27,15 @@ mtl_new_texture_view_with_no_swizzle(mtl_texture *texture,
{
return NULL;
}
void
mtl_texture_get_bytes(mtl_texture *texture, void *host_ptr,
struct mtl_texture_memory_copy *data)
{
}
void
mtl_texture_replace_region(mtl_texture *texture, const void *host_ptr,
struct mtl_texture_memory_copy *data)
{
}

View file

@ -83,13 +83,6 @@ vk_buffer_image_copy_to_mtl_buffer_image_copy(
return copy;
}
#define kk_foreach_slice(ndx, image, subresource_member) \
for (uint32_t ndx = region->subresource_member.baseArrayLayer; \
ndx < (region->subresource_member.baseArrayLayer + \
vk_image_subresource_layer_count(&image->vk, \
&region->subresource_member)); \
++ndx)
VKAPI_ATTR void VKAPI_CALL
kk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)

View file

@ -14,6 +14,7 @@
#include "kk_physical_device.h"
#include "kosmickrisp/bridge/mtl_bridge.h"
#include "kosmickrisp/bridge/vk_to_mtl_map.h"
#include "vk_enum_defines.h"
#include "vk_enum_to_str.h"
@ -91,6 +92,10 @@ kk_get_image_plane_format_features(struct kk_physical_device *pdev,
if (features != 0) {
features |= VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT;
features |= VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT;
/* Metal does not allow CPU access to combined depth-stencil */
if (!util_format_is_depth_and_stencil(p_format))
features |= VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT;
}
return features;
@ -230,8 +235,11 @@ kk_GetPhysicalDeviceImageFormatProperties2(
pImageFormatInfo->type == VK_IMAGE_TYPE_3D)
return VK_ERROR_FORMAT_NOT_SUPPORTED;
const enum pipe_format p_format =
vk_format_to_pipe_format(pImageFormatInfo->format);
/* Metal does not support EAC/ETC formats for 3D textures. */
if (util_format_is_etc(vk_format_to_pipe_format(pImageFormatInfo->format)) &&
if (util_format_is_etc(p_format) &&
pImageFormatInfo->type == VK_IMAGE_TYPE_3D)
return VK_ERROR_FORMAT_NOT_SUPPORTED;
@ -484,8 +492,16 @@ kk_GetPhysicalDeviceImageFormatProperties2(
}
case VK_STRUCTURE_TYPE_HOST_IMAGE_COPY_DEVICE_PERFORMANCE_QUERY_EXT: {
VkHostImageCopyDevicePerformanceQueryEXT *host_props = (void *)s;
host_props->optimalDeviceAccess = true;
host_props->identicalMemoryLayout = true;
/* Optimal device access and identical memory layout if optimization
* is the same both with and without host transfer usage */
bool with_host_transfer = kk_image_layout_can_optimize(
pImageFormatInfo->usage, pImageFormatInfo->tiling, p_format);
bool without_host_transfer = kk_image_layout_can_optimize(
pImageFormatInfo->usage & ~VK_IMAGE_USAGE_HOST_TRANSFER_BIT,
pImageFormatInfo->tiling, p_format);
host_props->optimalDeviceAccess =
with_host_transfer == without_host_transfer;
host_props->identicalMemoryLayout = host_props->optimalDeviceAccess;
break;
}
default:
@ -778,6 +794,12 @@ kk_get_image_subresource_layout(struct kk_device *dev, struct kk_image *image,
.arrayPitch = plane->layout.layer_stride_B,
.depthPitch = 1u,
};
VkSubresourceHostMemcpySize *memcpy_size =
vk_find_struct(pLayout, SUBRESOURCE_HOST_MEMCPY_SIZE_EXT);
if (memcpy_size) {
memcpy_size->size = pLayout->subresourceLayout.size;
}
}
VKAPI_ATTR void VKAPI_CALL
@ -935,3 +957,206 @@ kk_GetImageOpaqueCaptureDescriptorDataEXT(
{
return VK_SUCCESS;
}
struct kk_host_copy_info {
struct mtl_texture_memory_copy mtl_data;
size_t buffer_slice_size_B;
};
static struct kk_host_copy_info
vk_image_to_memory_copy_to_mtl_texture_memory_copy(
const VkImageToMemoryCopy *region, const struct kk_image_plane *plane)
{
struct kk_host_copy_info copy;
enum pipe_format p_format = plane->layout.format.pipe;
if (region->imageSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT)
p_format = util_format_get_depth_only(p_format);
else if (region->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
p_format = PIPE_FORMAT_S8_UINT;
const uint32_t buffer_width = region->memoryRowLength
? region->memoryRowLength
: region->imageExtent.width;
const uint32_t buffer_height = region->memoryImageHeight
? region->memoryImageHeight
: region->imageExtent.height;
const uint32_t buffer_stride_B =
util_format_get_stride(p_format, buffer_width);
const uint32_t buffer_size_2d_B =
util_format_get_2d_size(p_format, buffer_stride_B, buffer_height);
/* Metal requires this value to be 0 for 2D images, otherwise the number of
* bytes between each 2D image of a 3D texture */
copy.mtl_data.buffer_2d_image_size_B =
plane->layout.depth_px == 1u ? 0u : buffer_size_2d_B;
copy.mtl_data.buffer_stride_B = buffer_stride_B;
copy.mtl_data.image_size = vk_extent_3d_to_mtl_size(&region->imageExtent);
copy.mtl_data.image_origin =
vk_offset_3d_to_mtl_origin(&region->imageOffset);
copy.mtl_data.image_level = region->imageSubresource.mipLevel;
copy.buffer_slice_size_B = buffer_size_2d_B * region->imageExtent.depth;
return copy;
}
VKAPI_ATTR VkResult VKAPI_CALL
kk_CopyImageToMemory(UNUSED VkDevice device,
const VkCopyImageToMemoryInfo *pCopyImageToMemoryInfo)
{
VK_FROM_HANDLE(kk_image, image, pCopyImageToMemoryInfo->srcImage);
for (unsigned r = 0; r < pCopyImageToMemoryInfo->regionCount; r++) {
const VkImageToMemoryCopy *region = &pCopyImageToMemoryInfo->pRegions[r];
const uint8_t plane_index = kk_image_memory_aspects_to_plane(
image, region->imageSubresource.aspectMask);
struct kk_image_plane *plane = &image->planes[plane_index];
struct kk_host_copy_info info =
vk_image_to_memory_copy_to_mtl_texture_memory_copy(region, plane);
uint8_t *host_ptr = region->pHostPointer;
kk_foreach_slice(slice, image, imageSubresource)
{
info.mtl_data.image_slice = slice;
mtl_texture_get_bytes(plane->mtl_handle, host_ptr, &info.mtl_data);
host_ptr += info.buffer_slice_size_B;
}
}
return VK_SUCCESS;
}
static struct kk_host_copy_info
vk_memory_to_image_copy_to_mtl_texture_memory_copy(
const VkMemoryToImageCopy *region, const struct kk_image_plane *plane)
{
/* Prevent code duplication by mapping between structures */
const VkImageToMemoryCopy mapped = {
.memoryRowLength = region->memoryRowLength,
.memoryImageHeight = region->memoryImageHeight,
.imageSubresource = region->imageSubresource,
.imageOffset = region->imageOffset,
.imageExtent = region->imageExtent,
};
return vk_image_to_memory_copy_to_mtl_texture_memory_copy(&mapped, plane);
}
VKAPI_ATTR VkResult VKAPI_CALL
kk_CopyMemoryToImage(UNUSED VkDevice device,
const VkCopyMemoryToImageInfo *pCopyMemoryToImageInfo)
{
VK_FROM_HANDLE(kk_image, image, pCopyMemoryToImageInfo->dstImage);
for (int r = 0; r < pCopyMemoryToImageInfo->regionCount; r++) {
const VkMemoryToImageCopy *region = &pCopyMemoryToImageInfo->pRegions[r];
const uint8_t plane_index = kk_image_memory_aspects_to_plane(
image, region->imageSubresource.aspectMask);
struct kk_image_plane *plane = &image->planes[plane_index];
struct kk_host_copy_info info =
vk_memory_to_image_copy_to_mtl_texture_memory_copy(region, plane);
const uint8_t *host_ptr = region->pHostPointer;
kk_foreach_slice(slice, image, imageSubresource)
{
info.mtl_data.image_slice = slice;
mtl_texture_replace_region(plane->mtl_handle, host_ptr,
&info.mtl_data);
host_ptr += info.buffer_slice_size_B;
}
}
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
kk_CopyImageToImage(VkDevice device,
const VkCopyImageToImageInfo *pCopyImageToImageInfo)
{
VK_FROM_HANDLE(kk_image, src, pCopyImageToImageInfo->srcImage);
/* Determine the buffer size required to satisfy all copies */
uint64_t buffer_size = 0;
for (uint32_t i = 0u; i < pCopyImageToImageInfo->regionCount; i++) {
const VkImageCopy2 *region = &pCopyImageToImageInfo->pRegions[i];
uint8_t src_index =
kk_image_aspects_to_plane(src, region->srcSubresource.aspectMask);
struct kk_image_plane *src_plane = &src->planes[src_index];
buffer_size = MAX2(buffer_size, src_plane->layout.size_B);
}
/* Metal does not provide a direct image-to-image host copy, so we implement
* host image-to-image copy using CopyImageToMemory and CopyMemoryToImage */
uint8_t *temp = ralloc_size(NULL, buffer_size);
if (!temp)
return VK_ERROR_OUT_OF_HOST_MEMORY;
VkResult result = VK_SUCCESS;
for (uint32_t i = 0u; i < pCopyImageToImageInfo->regionCount; ++i) {
const VkImageCopy2 *region = &pCopyImageToImageInfo->pRegions[i];
VkImageToMemoryCopy src_copy = {
.sType = VK_STRUCTURE_TYPE_IMAGE_TO_MEMORY_COPY,
.pNext = NULL,
.pHostPointer = temp,
.memoryRowLength = 0,
.memoryImageHeight = 0,
.imageSubresource = region->srcSubresource,
.imageOffset = region->srcOffset,
.imageExtent = region->extent,
};
VkCopyImageToMemoryInfo src_copy_info = {
.sType = VK_STRUCTURE_TYPE_COPY_IMAGE_TO_MEMORY_INFO,
.pNext = NULL,
.flags = pCopyImageToImageInfo->flags,
.srcImage = pCopyImageToImageInfo->srcImage,
.srcImageLayout = pCopyImageToImageInfo->srcImageLayout,
.regionCount = 1,
.pRegions = &src_copy,
};
result = kk_CopyImageToMemory(device, &src_copy_info);
if (result != VK_SUCCESS)
break;
VkMemoryToImageCopy dst_copy = {
.sType = VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY,
.pNext = NULL,
.pHostPointer = temp,
.memoryRowLength = 0,
.memoryImageHeight = 0,
.imageSubresource = region->dstSubresource,
.imageOffset = region->dstOffset,
.imageExtent = region->extent,
};
VkCopyMemoryToImageInfo dst_copy_info = {
.sType = VK_STRUCTURE_TYPE_COPY_MEMORY_TO_IMAGE_INFO,
.pNext = NULL,
.flags = pCopyImageToImageInfo->flags,
.dstImage = pCopyImageToImageInfo->dstImage,
.dstImageLayout = pCopyImageToImageInfo->dstImageLayout,
.regionCount = 1,
.pRegions = &dst_copy,
};
result = kk_CopyMemoryToImage(device, &dst_copy_info);
if (result != VK_SUCCESS)
break;
}
ralloc_free(temp);
return result;
}
VKAPI_ATTR VkResult VKAPI_CALL
kk_TransitionImageLayoutEXT(
UNUSED VkDevice device, UNUSED uint32_t transitionCount,
UNUSED const VkHostImageLayoutTransitionInfoEXT *transitions)
{
/* We don't do anything with layouts so this should be a no-op */
return VK_SUCCESS;
}

View file

@ -51,6 +51,13 @@ struct kk_image {
struct kk_image_plane planes[3];
};
#define kk_foreach_slice(ndx, image, subresource_member) \
for (uint32_t ndx = region->subresource_member.baseArrayLayer; \
ndx < (region->subresource_member.baseArrayLayer + \
vk_image_subresource_layer_count(&image->vk, \
&region->subresource_member)); \
++ndx)
static inline mtl_resource *
kk_image_to_mtl_resource(const struct kk_image *image, int plane)
{

View file

@ -77,12 +77,21 @@ vk_image_usage_flags_to_mtl_texture_usage(VkImageUsageFlags usage_flags,
}
bool
kk_image_layout_can_optimize(VkImageUsageFlags usage, VkImageTiling tiling)
kk_image_layout_can_optimize(VkImageUsageFlags usage, VkImageTiling tiling,
enum pipe_format format)
{
/* Can only optimize if tiling is optimal */
if (tiling != VK_IMAGE_TILING_OPTIMAL)
return false;
/* Cannot optimize if host transfer for a format that would use Apple's
* lossless compression. Otherwise, CTS tests which populate memory with
* random data fail due to differences in how invalid optimized data is
* decompressed by GPU vs CPU. */
if ((usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT) &&
!util_format_is_compressed(format))
return false;
return true;
}
@ -101,7 +110,7 @@ kk_image_layout_init(const struct kk_device *dev,
layout->levels = image->mip_levels;
layout->linear = image->tiling != VK_IMAGE_TILING_OPTIMAL;
layout->optimized_layout = kk_image_layout_can_optimize(
image->usage, image->tiling);
image->usage, image->tiling, format);
layout->usage = vk_image_usage_flags_to_mtl_texture_usage(
image->usage, image->create_flags, supported_format->atomic);
layout->format.pipe = format;

View file

@ -137,8 +137,8 @@ struct kk_view_layout {
uint16_t min_lod_clamp;
};
bool kk_image_layout_can_optimize(VkImageUsageFlags usage,
VkImageTiling tiling);
bool kk_image_layout_can_optimize(VkImageUsageFlags usage, VkImageTiling tiling,
enum pipe_format format);
void kk_image_layout_init(const struct kk_device *dev,
const struct vk_image *image,

View file

@ -129,6 +129,7 @@ kk_get_device_extensions(const struct kk_instance *instance,
.KHR_vertex_attribute_divisor = true,
.EXT_global_priority = true,
.EXT_global_priority_query = true,
.EXT_host_image_copy = true,
.EXT_index_type_uint8 = true,
.EXT_line_rasterization = true,
.EXT_pipeline_robustness = true,
@ -302,6 +303,7 @@ kk_get_device_features(
/* Vulkan 1.4 */
.bresenhamLines = true,
.globalPriorityQuery = true,
.hostImageCopy = true,
.indexTypeUint8 = true,
.maintenance5 = true,
.maintenance6 = true,
@ -778,7 +780,8 @@ kk_get_device_properties(const struct kk_physical_device *pdev,
memcpy(properties->optimalTilingLayoutUUID, instance->driver_build_sha,
VK_UUID_SIZE);
properties->identicalMemoryTypeRequirements = false;
/* We're a UMR so we can always map every kind of memory */
properties->identicalMemoryTypeRequirements = true;
/* VK_EXT_shader_module_identifier */
STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==