render/vulkan: Use VK_EXT_host_image_copy for shm

When uploading shm buffers to our internal backing texture, we first
allocate suitable chunk from our staging buffer accessible from both CPU
and GPU, and then we queue a GPU-side copy from the staging buffer to
the texture. This is a lot of copying, especially on iGPUs where
everything is system memory anyway.

Instead, use VK_EXT_host_image_copy when available for formats that are
reported as having optimal device access. This allows us to copy
directly to the target texture from CPU, eliminating the queued GPU-side
copy. To keep things simple we keep this texture in GENERAL for the
entire duration for now.
This commit is contained in:
Kenny Levinsen 2026-04-17 01:08:25 +02:00
parent e8c03e9ce9
commit 9a457a3f1d
4 changed files with 199 additions and 17 deletions

View file

@ -44,6 +44,7 @@ struct wlr_vk_device {
bool sync_file_import_export;
bool implicit_sync_interop;
bool sampler_ycbcr_conversion;
bool host_image_copy;
// we only ever need one queue for rendering and transfer commands
uint32_t queue_family;
@ -60,6 +61,8 @@ struct wlr_vk_device {
PFN_vkCreateSamplerYcbcrConversionKHR vkCreateSamplerYcbcrConversionKHR;
PFN_vkDestroySamplerYcbcrConversionKHR vkDestroySamplerYcbcrConversionKHR;
PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR;
PFN_vkCopyMemoryToImageEXT vkCopyMemoryToImageEXT;
PFN_vkTransitionImageLayoutEXT vkTransitionImageLayoutEXT;
} api;
uint32_t format_prop_count;
@ -115,6 +118,7 @@ struct wlr_vk_format_props {
VkExtent2D max_extent;
VkFormatFeatureFlags features;
bool has_mutable_srgb;
bool host_image_copy;
} shm;
struct {
@ -518,6 +522,7 @@ struct wlr_vk_texture {
bool transitioned; // if dma_imported: whether we transitioned it away from preinit
bool has_alpha; // whether the image is has alpha channel
bool using_mutable_srgb; // can be accessed through _SRGB format view
bool host_image_copy; // whether uploads skip the staging buffer
struct wl_list foreign_link; // wlr_vk_renderer.foreign_textures
struct wl_list destroy_link; // wlr_vk_command_buffer.destroy_textures
struct wl_list link; // wlr_vk_renderer.textures

View file

@ -349,7 +349,8 @@ static bool query_modifier_usage_support(struct wlr_vk_device *dev, VkFormat vk_
}
static bool query_shm_support(struct wlr_vk_device *dev, VkFormat vk_format,
VkFormat vk_format_variant, VkImageFormatProperties *out,
VkFormat vk_format_variant, VkImageUsageFlags usage,
VkImageFormatProperties *out, bool *out_host_copy_optimal,
const char **errmsg) {
VkResult res;
*errmsg = NULL;
@ -369,12 +370,16 @@ static bool query_shm_support(struct wlr_vk_device *dev, VkFormat vk_format,
.type = VK_IMAGE_TYPE_2D,
.format = vk_format,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = vulkan_shm_tex_usage,
.usage = usage,
.flags = vk_format_variant ? VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT : 0,
.pNext = &listi,
};
VkHostImageCopyDevicePerformanceQueryEXT perf_query = {
.sType = VK_STRUCTURE_TYPE_HOST_IMAGE_COPY_DEVICE_PERFORMANCE_QUERY_EXT,
};
VkImageFormatProperties2 ifmtp = {
.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
.pNext = out_host_copy_optimal ? &perf_query : NULL,
};
res = vkGetPhysicalDeviceImageFormatProperties2(dev->phdev, &fmti, &ifmtp);
@ -389,6 +394,9 @@ static bool query_shm_support(struct wlr_vk_device *dev, VkFormat vk_format,
}
*out = ifmtp.imageFormatProperties;
if (out_host_copy_optimal) {
*out_host_copy_optimal = perf_query.optimalDeviceAccess;
}
return true;
}
@ -522,9 +530,13 @@ void vulkan_format_props_query(struct wlr_vk_device *dev,
VkDrmFormatModifierPropertiesListEXT modp = {
.sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
};
VkFormatProperties3 fmtp3 = {
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3,
.pNext = &modp,
};
VkFormatProperties2 fmtp = {
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
.pNext = &modp,
.pNext = dev->host_image_copy ? (void *)&fmtp3 : (void *)&modp,
};
vkGetPhysicalDeviceFormatProperties2(dev->phdev, format->vk, &fmtp);
@ -542,12 +554,32 @@ void vulkan_format_props_query(struct wlr_vk_device *dev,
!vulkan_format_is_ycbcr(format) && format_info != NULL) {
VkImageFormatProperties ifmtp;
bool supported = false, has_mutable_srgb = false;
if (query_shm_support(dev, format->vk, format->vk_srgb, &ifmtp, &errmsg)) {
bool host_copy_optimal = false;
if (dev->host_image_copy &&
(fmtp3.optimalTilingFeatures & VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT_EXT)) {
VkImageUsageFlags usage = vulkan_shm_tex_usage |
VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT;
if (query_shm_support(dev, format->vk, format->vk_srgb, usage, &ifmtp,
&host_copy_optimal, &errmsg)) {
has_mutable_srgb = format->vk_srgb != 0;
supported = true;
} else if (format->vk_srgb && query_shm_support(dev, format->vk, 0, usage, &ifmtp,
&host_copy_optimal, &errmsg)) {
supported = true;
}
if (!host_copy_optimal) {
has_mutable_srgb = false;
supported = false;
}
}
if (!supported && query_shm_support(dev, format->vk, format->vk_srgb,
vulkan_shm_tex_usage, &ifmtp, NULL, &errmsg)) {
supported = true;
has_mutable_srgb = format->vk_srgb != 0;
}
if (!supported && format->vk_srgb) {
supported = query_shm_support(dev, format->vk, 0, &ifmtp, &errmsg);
supported = query_shm_support(dev, format->vk, 0,
vulkan_shm_tex_usage, &ifmtp, NULL, &errmsg);
}
if (supported) {
@ -555,6 +587,7 @@ void vulkan_format_props_query(struct wlr_vk_device *dev,
props.shm.max_extent.height = ifmtp.maxExtent.height;
props.shm.features = fmtp.formatProperties.optimalTilingFeatures;
props.shm.has_mutable_srgb = has_mutable_srgb;
props.shm.host_image_copy = host_copy_optimal;
wlr_drm_format_set_add(&dev->shm_texture_formats,
format->drm, DRM_FORMAT_MOD_LINEAR);

View file

@ -35,9 +35,84 @@ static VkImageAspectFlagBits mem_plane_aspect(unsigned i) {
}
}
static bool write_pixels_host(struct wlr_vk_texture *texture,
uint32_t stride, const pixman_region32_t *region, const void *vdata) {
struct wlr_vk_renderer *renderer = texture->renderer;
const struct wlr_pixel_format_info *format_info = drm_get_pixel_format_info(texture->format->drm);
assert(format_info);
// If the buffer is in use, we unfortunately need to wait for that to finish
if (texture->last_used_cb != NULL &&
!vulkan_wait_command_buffer(texture->last_used_cb, renderer)) {
return false;
}
int rects_len = 0;
const pixman_box32_t *rects = pixman_region32_rectangles(region, &rects_len);
if (rects_len == 0) {
return true;
}
VkMemoryToImageCopyEXT *copies = calloc((size_t)rects_len, sizeof(*copies));
if (!copies) {
wlr_log(WLR_ERROR, "Failed to allocate image copy parameters");
return false;
}
for (int i = 0; i < rects_len; i++) {
pixman_box32_t rect = rects[i];
uint32_t width = rect.x2 - rect.x1;
uint32_t height = rect.y2 - rect.y1;
uint32_t src_x = rect.x1;
uint32_t src_y = rect.y1;
assert((uint32_t)rect.x2 <= texture->wlr_texture.width);
assert((uint32_t)rect.y2 <= texture->wlr_texture.height);
const char *pdata = (const char *)vdata;
pdata += stride * src_y;
pdata += format_info->bytes_per_block * src_x;
copies[i] = (VkMemoryToImageCopyEXT) {
.sType = VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY_EXT,
.pHostPointer = pdata,
.memoryRowLength = stride / format_info->bytes_per_block,
.memoryImageHeight = height,
.imageExtent.width = width,
.imageExtent.height = height,
.imageExtent.depth = 1,
.imageOffset.x = src_x,
.imageOffset.y = src_y,
.imageOffset.z = 0,
.imageSubresource.mipLevel = 0,
.imageSubresource.baseArrayLayer = 0,
.imageSubresource.layerCount = 1,
.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
};
}
VkCopyMemoryToImageInfoEXT copy_info = {
.sType = VK_STRUCTURE_TYPE_COPY_MEMORY_TO_IMAGE_INFO_EXT,
.dstImage = texture->image,
.dstImageLayout = VK_IMAGE_LAYOUT_GENERAL,
.regionCount = (uint32_t)rects_len,
.pRegions = copies,
};
VkResult res = renderer->dev->api.vkCopyMemoryToImageEXT(renderer->dev->dev, &copy_info);
free(copies);
if (res != VK_SUCCESS) {
wlr_vk_error("vkCopyMemoryToImageEXT failed", res);
return false;
}
return true;
}
// Will transition the texture to shaderReadOnlyOptimal layout for reading
// from fragment shader later on
static bool write_pixels(struct wlr_vk_texture *texture,
static bool write_pixels_staging(struct wlr_vk_texture *texture,
uint32_t stride, const pixman_region32_t *region, const void *vdata,
VkImageLayout old_layout, VkPipelineStageFlags src_stage,
VkAccessFlags src_access) {
@ -172,8 +247,13 @@ static bool vulkan_texture_update_from_buffer(struct wlr_texture *wlr_texture,
goto out;
}
ok = write_pixels(texture, stride, damage, data, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT);
if (texture->host_image_copy) {
ok = write_pixels_host(texture, stride, damage, data);
} else {
ok = write_pixels_staging(texture, stride, damage, data,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT);
}
out:
wlr_buffer_end_data_ptr_access(buffer);
@ -337,7 +417,9 @@ struct wlr_vk_texture_view *vulkan_texture_get_or_create_view(struct wlr_vk_text
VkDescriptorImageInfo ds_img_info = {
.imageView = view->image_view,
.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
.imageLayout = texture->host_image_copy
? VK_IMAGE_LAYOUT_GENERAL
: VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
};
VkWriteDescriptorSet ds_write = {
@ -399,6 +481,12 @@ static struct wlr_texture *vulkan_texture_from_pixels(
}
texture_set_format(texture, &fmt->format, fmt->shm.has_mutable_srgb);
texture->host_image_copy = fmt->shm.host_image_copy;
VkImageUsageFlags usage = vulkan_shm_tex_usage;
if (texture->host_image_copy) {
usage |= VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT;
}
VkFormat view_formats[] = {
fmt->format.vk,
@ -420,7 +508,7 @@ static struct wlr_texture *vulkan_texture_from_pixels(
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.extent = (VkExtent3D) { width, height, 1 },
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = vulkan_shm_tex_usage,
.usage = usage,
.pNext = fmt->shm.has_mutable_srgb ? &list_info : NULL,
};
if (fmt->shm.has_mutable_srgb) {
@ -462,14 +550,40 @@ static struct wlr_texture *vulkan_texture_from_pixels(
goto error;
}
pixman_region32_t region;
pixman_region32_init_rect(&region, 0, 0, width, height);
if (!write_pixels(texture, stride, &region, data, VK_IMAGE_LAYOUT_UNDEFINED,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0)) {
goto error;
if (texture->host_image_copy) {
VkHostImageLayoutTransitionInfoEXT transition = {
.sType = VK_STRUCTURE_TYPE_HOST_IMAGE_LAYOUT_TRANSITION_INFO_EXT,
.image = texture->image,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
res = renderer->dev->api.vkTransitionImageLayoutEXT(dev, 1, &transition);
if (res != VK_SUCCESS) {
wlr_vk_error("vkTransitionImageLayoutEXT failed", res);
goto error;
}
}
return &texture->wlr_texture;
pixman_region32_t region;
pixman_region32_init_rect(&region, 0, 0, width, height);
bool ok;
if (texture->host_image_copy) {
ok = write_pixels_host(texture, stride, &region, data);
} else {
ok = write_pixels_staging(texture, stride, &region, data,
VK_IMAGE_LAYOUT_UNDEFINED, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0);
}
if (ok) {
return &texture->wlr_texture;
}
error:
vulkan_texture_destroy(texture);

View file

@ -540,8 +540,12 @@ struct wlr_vk_device *vulkan_device_create(struct wlr_vk_instance *ini,
"falling back to blocking");
}
VkPhysicalDeviceHostImageCopyFeaturesEXT phdev_host_image_copy_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_FEATURES_EXT,
};
VkPhysicalDeviceSamplerYcbcrConversionFeatures phdev_sampler_ycbcr_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES,
.pNext = &phdev_host_image_copy_features,
};
VkPhysicalDeviceFeatures2 phdev_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
@ -553,6 +557,12 @@ struct wlr_vk_device *vulkan_device_create(struct wlr_vk_instance *ini,
wlr_log(WLR_DEBUG, "Sampler YCbCr conversion %s",
dev->sampler_ycbcr_conversion ? "supported" : "not supported");
dev->host_image_copy =
check_extension(avail_ext_props, avail_extc, VK_EXT_HOST_IMAGE_COPY_EXTENSION_NAME) &&
phdev_host_image_copy_features.hostImageCopy;
wlr_log(WLR_DEBUG, "Host image copy %s",
dev->host_image_copy ? "supported" : "not supported");
const float prio = 1.f;
VkDeviceQueueCreateInfo qinfo = {
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
@ -592,9 +602,22 @@ struct wlr_vk_device *vulkan_device_create(struct wlr_vk_instance *ini,
.pNext = &sync2_features,
.timelineSemaphore = VK_TRUE,
};
VkPhysicalDeviceHostImageCopyFeaturesEXT host_image_copy_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_FEATURES_EXT,
.pNext = &timeline_features,
.hostImageCopy = VK_TRUE,
};
const void *features_chain = &timeline_features;
if (dev->host_image_copy) {
extensions[extensions_len++] = VK_EXT_HOST_IMAGE_COPY_EXTENSION_NAME;
extensions[extensions_len++] = VK_KHR_COPY_COMMANDS_2_EXTENSION_NAME;
extensions[extensions_len++] = VK_KHR_FORMAT_FEATURE_FLAGS_2_EXTENSION_NAME;
features_chain = &host_image_copy_features;
}
VkDeviceCreateInfo dev_info = {
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
.pNext = &timeline_features,
.pNext = features_chain,
.queueCreateInfoCount = 1u,
.pQueueCreateInfos = &qinfo,
.enabledExtensionCount = extensions_len,
@ -637,6 +660,13 @@ struct wlr_vk_device *vulkan_device_create(struct wlr_vk_instance *ini,
load_device_proc(dev, "vkImportSemaphoreFdKHR", &dev->api.vkImportSemaphoreFdKHR);
}
if (dev->host_image_copy) {
load_device_proc(dev, "vkCopyMemoryToImageEXT",
&dev->api.vkCopyMemoryToImageEXT);
load_device_proc(dev, "vkTransitionImageLayoutEXT",
&dev->api.vkTransitionImageLayoutEXT);
}
size_t max_fmts;
const struct wlr_vk_format *fmts = vulkan_get_format_list(&max_fmts);
dev->format_props = calloc(max_fmts, sizeof(*dev->format_props));