diff --git a/src/vulkan/runtime/meson.build b/src/vulkan/runtime/meson.build index 72080b37977..c19a5e64867 100644 --- a/src/vulkan/runtime/meson.build +++ b/src/vulkan/runtime/meson.build @@ -258,6 +258,7 @@ vulkan_runtime_files = files( 'vk_meta.c', 'vk_meta_blit_resolve.c', 'vk_meta_clear.c', + 'vk_meta_copy_fill_update.c', 'vk_meta_draw_rects.c', 'vk_nir.c', 'vk_nir_convert_ycbcr.c', diff --git a/src/vulkan/runtime/vk_meta.c b/src/vulkan/runtime/vk_meta.c index b7f93b08b6f..89a2b6208b5 100644 --- a/src/vulkan/runtime/vk_meta.c +++ b/src/vulkan/runtime/vk_meta.c @@ -24,6 +24,7 @@ #include "vk_meta_object_list.h" #include "vk_meta_private.h" +#include "vk_buffer.h" #include "vk_command_buffer.h" #include "vk_device.h" #include "vk_pipeline.h" @@ -554,3 +555,22 @@ vk_meta_create_buffer_view(struct vk_command_buffer *cmd, (uint64_t)*buffer_view_out); return VK_SUCCESS; } + +VkDeviceAddress +vk_meta_buffer_address(struct vk_device *device, VkBuffer buffer, + uint64_t offset, uint64_t range) +{ + const VkBufferDeviceAddressInfo info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, + .buffer = buffer, + }; + VkDeviceAddress base = device->dispatch_table.GetBufferDeviceAddress( + vk_device_to_handle(device), &info); + + /* Only called for the assert()s in vk_buffer_range(), we don't care about + * the result. + */ + vk_buffer_range(vk_buffer_from_handle(buffer), offset, range); + + return base + offset; +} diff --git a/src/vulkan/runtime/vk_meta.h b/src/vulkan/runtime/vk_meta.h index 6ca9a4e85f7..3fd767dc7b5 100644 --- a/src/vulkan/runtime/vk_meta.h +++ b/src/vulkan/runtime/vk_meta.h @@ -28,6 +28,8 @@ #include "util/simple_mtx.h" +#include "compiler/nir/nir.h" + #ifdef __cplusplus extern "C" { #endif @@ -47,6 +49,55 @@ struct vk_meta_rect { #define VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA (VkPrimitiveTopology)11 #define VK_IMAGE_VIEW_CREATE_INTERNAL_MESA (VkImageViewCreateFlagBits)0x40000000 +struct vk_meta_copy_image_properties { + union { + struct { + /* Format to use for the image view of a color aspect. + * Format must not be compressed and be in the RGB/sRGB colorspace. + */ + VkFormat view_format; + } color; + + struct { + struct { + /* Format to use for the image view of a depth aspect. + * Format must not be compressed and be in the RGB/sRGB colorspace. + */ + VkFormat view_format; + + /* Describe the depth/stencil componant layout. Bits in the mask + * must be consecutive and match the original depth bit size. + */ + uint8_t component_mask; + } depth; + + struct { + /* Format to use for the image view of a stencil aspect. + * Format must not be compressed and be in the RGB/sRGB colorspace. + */ + VkFormat view_format; + + /* Describe the depth/stencil componant layout. Bits in the mask + * must be consecutive and match the original depth bit size. + */ + uint8_t component_mask; + } stencil; + }; + }; + + /* Size of the image tile. Used to select the optimal workgroup size. */ + VkExtent3D tile_size; +}; + +enum vk_meta_buffer_chunk_size_id { + VK_META_BUFFER_1_BYTE_CHUNK = 0, + VK_META_BUFFER_2_BYTE_CHUNK, + VK_META_BUFFER_4_BYTE_CHUNK, + VK_META_BUFFER_8_BYTE_CHUNK, + VK_META_BUFFER_16_BYTE_CHUNK, + VK_META_BUFFER_CHUNK_SIZE_COUNT, +}; + struct vk_meta_device { struct hash_table *cache; simple_mtx_t cache_mtx; @@ -56,6 +107,16 @@ struct vk_meta_device { bool use_gs_for_layer; bool use_stencil_export; + struct { + /* Optimal workgroup size for each possible chunk size. This should be + * chosen to keep things cache-friendly (something big enough to maximize + * cache hits on executing threads, but small enough to not trash the + * cache) while keeping GPU utilization high enough to not make copies + * fast enough. + */ + uint32_t optimal_wg_size[VK_META_BUFFER_CHUNK_SIZE_COUNT]; + } buffer_access; + VkResult (*cmd_bind_map_buffer)(struct vk_command_buffer *cmd, struct vk_meta_device *meta, VkBuffer buffer, @@ -72,6 +133,19 @@ struct vk_meta_device { uint32_t layer_count); }; +static inline uint32_t +vk_meta_buffer_access_wg_size(const struct vk_meta_device *meta, + uint32_t chunk_size) +{ + assert(util_is_power_of_two_nonzero(chunk_size)); + unsigned idx = ffs(chunk_size) - 1; + + assert(idx < ARRAY_SIZE(meta->buffer_access.optimal_wg_size)); + assert(meta->buffer_access.optimal_wg_size[idx] != 0); + + return meta->buffer_access.optimal_wg_size[idx]; +} + VkResult vk_meta_device_init(struct vk_device *device, struct vk_meta_device *meta); void vk_meta_device_finish(struct vk_device *device, @@ -83,6 +157,11 @@ enum vk_meta_object_key_type { VK_META_OBJECT_KEY_CLEAR_PIPELINE, VK_META_OBJECT_KEY_BLIT_PIPELINE, VK_META_OBJECT_KEY_BLIT_SAMPLER, + VK_META_OBJECT_KEY_COPY_BUFFER_PIPELINE, + VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE, + VK_META_OBJECT_KEY_COPY_BUFFER_TO_IMAGE_PIPELINE, + VK_META_OBJECT_KEY_COPY_IMAGE_PIPELINE, + VK_META_OBJECT_KEY_FILL_BUFFER_PIPELINE, }; uint64_t vk_meta_lookup_object(struct vk_meta_device *meta, @@ -192,6 +271,9 @@ VkResult vk_meta_create_buffer_view(struct vk_command_buffer *cmd, struct vk_meta_device *meta, const VkBufferViewCreateInfo *info, VkBufferView *buffer_view_out); + +#define VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA 0x80000000 + VkResult vk_meta_create_image_view(struct vk_command_buffer *cmd, struct vk_meta_device *meta, const VkImageViewCreateInfo *info, @@ -273,6 +355,41 @@ void vk_meta_resolve_rendering(struct vk_command_buffer *cmd, struct vk_meta_device *meta, const VkRenderingInfo *pRenderingInfo); +VkDeviceAddress vk_meta_buffer_address(struct vk_device *device, + VkBuffer buffer, uint64_t offset, + uint64_t range); + +void vk_meta_copy_buffer(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + const VkCopyBufferInfo2 *info); + +void vk_meta_copy_image_to_buffer( + struct vk_command_buffer *cmd, struct vk_meta_device *meta, + const VkCopyImageToBufferInfo2 *info, + const struct vk_meta_copy_image_properties *img_props); + +void vk_meta_copy_buffer_to_image( + struct vk_command_buffer *cmd, struct vk_meta_device *meta, + const VkCopyBufferToImageInfo2 *info, + const struct vk_meta_copy_image_properties *img_props, + VkPipelineBindPoint bind_point); + +void vk_meta_copy_image(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + const VkCopyImageInfo2 *info, + const struct vk_meta_copy_image_properties *src_props, + const struct vk_meta_copy_image_properties *dst_props, + VkPipelineBindPoint bind_point); + +void vk_meta_update_buffer(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, VkBuffer buffer, + VkDeviceSize offset, VkDeviceSize size, + const void *data); + +void vk_meta_fill_buffer(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, VkBuffer buffer, + VkDeviceSize offset, VkDeviceSize size, uint32_t data); + #ifdef __cplusplus } #endif diff --git a/src/vulkan/runtime/vk_meta_copy_fill_update.c b/src/vulkan/runtime/vk_meta_copy_fill_update.c new file mode 100644 index 00000000000..efff4bd3c99 --- /dev/null +++ b/src/vulkan/runtime/vk_meta_copy_fill_update.c @@ -0,0 +1,2502 @@ +/* + * Copyright © 2023 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "nir/nir_builder.h" +#include "nir/nir_format_convert.h" + +#include "vk_buffer.h" +#include "vk_command_buffer.h" +#include "vk_command_pool.h" +#include "vk_device.h" +#include "vk_format.h" +#include "vk_meta.h" +#include "vk_meta_private.h" +#include "vk_physical_device.h" +#include "vk_pipeline.h" + +#include "util/format/u_format.h" + +struct vk_meta_fill_buffer_key { + enum vk_meta_object_key_type key_type; +}; + +struct vk_meta_copy_buffer_key { + enum vk_meta_object_key_type key_type; + + uint32_t chunk_size; +}; + +struct vk_meta_copy_image_view { + VkImageViewType type; + + union { + struct { + VkFormat format; + } color; + struct { + struct { + VkFormat format; + nir_component_mask_t component_mask; + } depth, stencil; + }; + }; +}; + +struct vk_meta_copy_buffer_image_key { + enum vk_meta_object_key_type key_type; + + VkPipelineBindPoint bind_point; + + struct { + struct vk_meta_copy_image_view view; + + VkImageAspectFlagBits aspect; + } img; + + uint32_t wg_size[3]; +}; + +struct vk_meta_copy_image_key { + enum vk_meta_object_key_type key_type; + + VkPipelineBindPoint bind_point; + + /* One source per-aspect being copied. */ + struct { + struct vk_meta_copy_image_view view; + } src, dst; + + VkImageAspectFlagBits aspects; + VkSampleCountFlagBits samples; + + uint32_t wg_size[3]; +}; + +#define load_info(__b, __type, __field_name) \ + nir_load_push_constant((__b), 1, \ + sizeof(((__type *)NULL)->__field_name) * 8, \ + nir_imm_int(b, offsetof(__type, __field_name))) + +struct vk_meta_fill_buffer_info { + uint64_t buf_addr; + uint32_t data; + uint32_t size; +}; + +struct vk_meta_copy_buffer_info { + uint64_t src_addr; + uint64_t dst_addr; + uint32_t size; +}; + +struct vk_meta_copy_buffer_image_info { + struct { + uint64_t addr; + uint32_t row_stride; + uint32_t image_stride; + } buf; + + struct { + struct { + uint32_t x, y, z; + } offset; + } img; + + /* Workgroup size should be selected based on the image tile size. This + * means we can issue threads outside the image area we want to copy + * from/to. This field encodes the copy IDs that should be skipped, and + * also serve as an adjustment for the buffer/image coordinates. */ + struct { + struct { + uint32_t x, y, z; + } start, end; + } copy_id_range; +}; + +struct vk_meta_copy_image_fs_info { + struct { + int32_t x, y, z; + } dst_to_src_offs; +}; + +struct vk_meta_copy_image_cs_info { + struct { + struct { + uint32_t x, y, z; + } offset; + } src_img, dst_img; + + /* Workgroup size should be selected based on the image tile size. This + * means we can issue threads outside the image area we want to copy + * from/to. This field encodes the copy IDs that should be skipped, and + * also serve as an adjustment for the buffer/image coordinates. */ + struct { + struct { + uint32_t x, y, z; + } start, end; + } copy_id_range; +}; + +static VkOffset3D +base_layer_as_offset(VkImageViewType view_type, VkOffset3D offset, + uint32_t base_layer) +{ + switch (view_type) { + case VK_IMAGE_VIEW_TYPE_1D: + return (VkOffset3D){ + .x = offset.x, + }; + + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + return (VkOffset3D){ + .x = offset.x, + .y = base_layer, + }; + + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + return (VkOffset3D){ + .x = offset.x, + .y = offset.y, + .z = base_layer, + }; + + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_3D: + return offset; + + default: + assert(!"Invalid view type"); + return (VkOffset3D){0}; + } +} + +static VkExtent3D +layer_count_as_extent(VkImageViewType view_type, VkExtent3D extent, + uint32_t layer_count) +{ + switch (view_type) { + case VK_IMAGE_VIEW_TYPE_1D: + return (VkExtent3D){ + .width = extent.width, + .height = 1, + .depth = 1, + }; + + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + return (VkExtent3D){ + .width = extent.width, + .height = layer_count, + .depth = 1, + }; + + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + return (VkExtent3D){ + .width = extent.width, + .height = extent.height, + .depth = layer_count, + }; + + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_3D: + return extent; + + default: + assert(!"Invalid view type"); + return (VkExtent3D){0}; + } +} + +#define COPY_SHADER_BINDING(__binding, __type, __stage) \ + { \ + .binding = __binding, \ + .descriptorCount = 1, \ + .descriptorType = VK_DESCRIPTOR_TYPE_##__type, \ + .stageFlags = VK_SHADER_STAGE_##__stage##_BIT, \ + } + +static VkResult +get_copy_pipeline_layout(struct vk_device *device, struct vk_meta_device *meta, + const char *key, VkShaderStageFlagBits shader_stage, + size_t push_const_size, + const struct VkDescriptorSetLayoutBinding *bindings, + uint32_t binding_count, VkPipelineLayout *layout_out) +{ + const VkDescriptorSetLayoutCreateInfo set_layout = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = binding_count, + .pBindings = bindings, + }; + + const VkPushConstantRange push_range = { + .stageFlags = shader_stage, + .offset = 0, + .size = push_const_size, + }; + + return vk_meta_get_pipeline_layout(device, meta, &set_layout, &push_range, + key, strlen(key) + 1, layout_out); +} + +#define COPY_PUSH_SET_IMG_DESC(__binding, __type, __iview, __layout) \ + { \ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, \ + .dstBinding = __binding, \ + .descriptorType = VK_DESCRIPTOR_TYPE_##__type##_IMAGE, \ + .descriptorCount = 1, \ + .pImageInfo = &(VkDescriptorImageInfo){ \ + .imageView = __iview, \ + .imageLayout = __layout, \ + }, \ + } + +static VkFormat +copy_img_view_format_for_aspect(const struct vk_meta_copy_image_view *info, + VkImageAspectFlagBits aspect) +{ + switch (aspect) { + case VK_IMAGE_ASPECT_COLOR_BIT: + return info->color.format; + + case VK_IMAGE_ASPECT_DEPTH_BIT: + return info->depth.format; + + case VK_IMAGE_ASPECT_STENCIL_BIT: + return info->stencil.format; + + default: + assert(!"Unsupported aspect"); + return VK_FORMAT_UNDEFINED; + } +} + +static bool +depth_stencil_interleaved(const struct vk_meta_copy_image_view *view) +{ + return view->stencil.format != VK_FORMAT_UNDEFINED && + view->depth.format != VK_FORMAT_UNDEFINED && + view->stencil.format == view->depth.format && + view->stencil.component_mask != 0 && + view->depth.component_mask != 0 && + (view->stencil.component_mask & view->depth.component_mask) == 0; +} + +static VkResult +get_gfx_copy_pipeline( + struct vk_device *device, struct vk_meta_device *meta, + VkPipelineLayout layout, VkSampleCountFlagBits samples, + nir_shader *(*build_nir)(const struct vk_meta_device *, const void *), + VkImageAspectFlagBits aspects, const struct vk_meta_copy_image_view *view, + const void *key_data, size_t key_size, VkPipeline *pipeline_out) +{ + VkPipeline from_cache = vk_meta_lookup_pipeline(meta, key_data, key_size); + if (from_cache != VK_NULL_HANDLE) { + *pipeline_out = from_cache; + return VK_SUCCESS; + } + + const VkPipelineShaderStageNirCreateInfoMESA fs_nir_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA, + .nir = build_nir(meta, key_data), + }; + const VkPipelineShaderStageCreateInfo fs_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = &fs_nir_info, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .pName = "main", + }; + + VkPipelineDepthStencilStateCreateInfo ds_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + }; + VkPipelineDynamicStateCreateInfo dyn_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + }; + struct vk_meta_rendering_info render = { + .samples = samples, + }; + + const VkGraphicsPipelineCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = 1, + .pStages = &fs_info, + .pDepthStencilState = &ds_info, + .pDynamicState = &dyn_info, + .layout = layout, + }; + + if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) { + VkFormat fmt = + copy_img_view_format_for_aspect(view, aspects); + + render.color_attachment_formats[render.color_attachment_count] = fmt; + render.color_attachment_write_masks[render.color_attachment_count] = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + render.color_attachment_count++; + } + + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + VkFormat fmt = + copy_img_view_format_for_aspect(view, VK_IMAGE_ASPECT_DEPTH_BIT); + + render.color_attachment_formats[render.color_attachment_count] = fmt; + render.color_attachment_write_masks[render.color_attachment_count] = + (VkColorComponentFlags)view->depth.component_mask; + render.color_attachment_count++; + } + + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + VkFormat fmt = + copy_img_view_format_for_aspect(view, VK_IMAGE_ASPECT_STENCIL_BIT); + + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT && + depth_stencil_interleaved(view)) { + render.color_attachment_write_masks[0] |= view->stencil.component_mask; + } else { + render.color_attachment_formats[render.color_attachment_count] = fmt; + render.color_attachment_write_masks[render.color_attachment_count] = + (VkColorComponentFlags)view->stencil.component_mask; + render.color_attachment_count++; + } + } + + VkResult result = vk_meta_create_graphics_pipeline( + device, meta, &info, &render, key_data, key_size, pipeline_out); + + ralloc_free(fs_nir_info.nir); + + return result; +} + +static VkResult +get_compute_copy_pipeline( + struct vk_device *device, struct vk_meta_device *meta, + VkPipelineLayout layout, + nir_shader *(*build_nir)(const struct vk_meta_device *, const void *), + const void *key_data, size_t key_size, VkPipeline *pipeline_out) +{ + VkPipeline from_cache = vk_meta_lookup_pipeline(meta, key_data, key_size); + if (from_cache != VK_NULL_HANDLE) { + *pipeline_out = from_cache; + return VK_SUCCESS; + } + + const VkPipelineShaderStageNirCreateInfoMESA cs_nir_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA, + .nir = build_nir(meta, key_data), + }; + + const VkComputePipelineCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = &cs_nir_info, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .pName = "main", + }, + .layout = layout, + }; + + VkResult result = vk_meta_create_compute_pipeline( + device, meta, &info, key_data, key_size, pipeline_out); + + ralloc_free(cs_nir_info.nir); + + return result; +} + +static VkResult +copy_create_src_image_view(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, struct vk_image *img, + const struct vk_meta_copy_image_view *view_info, + VkImageAspectFlags aspect, + const VkImageSubresourceLayers *subres, + VkImageView *view_out) +{ + const VkImageViewUsageCreateInfo usage = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + }; + + VkFormat format = copy_img_view_format_for_aspect(view_info, aspect); + + VkImageViewCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = &usage, + .flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA, + .image = vk_image_to_handle(img), + .viewType = view_info->type, + .format = format, + .subresourceRange = { + .aspectMask = vk_format_aspects(format), + .baseMipLevel = subres->mipLevel, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = img->array_layers, + }, + }; + + if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + nir_component_mask_t comp_mask = aspect == VK_IMAGE_ASPECT_STENCIL_BIT + ? view_info->stencil.component_mask + : view_info->depth.component_mask; + assert(comp_mask != 0); + + VkComponentSwizzle *swizzle = &info.components.r; + unsigned num_comps = util_bitcount(comp_mask); + unsigned first_comp = ffs(comp_mask) - 1; + + assert(first_comp + num_comps <= 4); + + for (unsigned i = 0; i < num_comps; i++) + swizzle[i] = first_comp + i + VK_COMPONENT_SWIZZLE_R; + } + + return vk_meta_create_image_view(cmd, meta, &info, view_out); +} + +static VkResult +copy_create_dst_image_view(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, struct vk_image *img, + const struct vk_meta_copy_image_view *view_info, + VkImageAspectFlags aspect, const VkOffset3D *offset, + const VkExtent3D *extent, + const VkImageSubresourceLayers *subres, + VkPipelineBindPoint bind_point, + VkImageView *view_out) +{ + uint32_t layer_count, base_layer; + VkFormat format = copy_img_view_format_for_aspect(view_info, aspect); + VkImageAspectFlags fmt_aspects = vk_format_aspects(format); + const VkImageViewUsageCreateInfo usage = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, + .usage = bind_point == VK_PIPELINE_BIND_POINT_COMPUTE + ? VK_IMAGE_USAGE_STORAGE_BIT + : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + }; + + if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { + layer_count = + MAX2(extent->depth, vk_image_subresource_layer_count(img, subres)); + base_layer = img->image_type == VK_IMAGE_TYPE_3D ? offset->z + : subres->baseArrayLayer; + } else { + /* Always create a view covering the whole image in case of compute. */ + layer_count = img->image_type == VK_IMAGE_TYPE_3D ? 1 : img->array_layers; + base_layer = 0; + } + + const VkImageViewCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = &usage, + .flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA, + .image = vk_image_to_handle(img), + .viewType = bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS + ? vk_image_render_view_type(img, layer_count) + : vk_image_storage_view_type(img), + .format = format, + .subresourceRange = { + .aspectMask = fmt_aspects, + .baseMipLevel = subres->mipLevel, + .levelCount = 1, + .baseArrayLayer = base_layer, + .layerCount = layer_count, + }, + }; + + return vk_meta_create_image_view(cmd, meta, &info, view_out); +} + +static nir_def * +trim_img_coords(nir_builder *b, VkImageViewType view_type, nir_def *coords) +{ + switch (view_type) { + case VK_IMAGE_VIEW_TYPE_1D: + return nir_channel(b, coords, 0); + + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + case VK_IMAGE_VIEW_TYPE_2D: + return nir_trim_vector(b, coords, 2); + + default: + return nir_trim_vector(b, coords, 3); + } +} + +static nir_def * +copy_img_buf_addr(nir_builder *b, enum pipe_format pfmt, nir_def *coords) +{ + nir_def *buf_row_stride = + load_info(b, struct vk_meta_copy_buffer_image_info, buf.row_stride); + nir_def *buf_img_stride = + load_info(b, struct vk_meta_copy_buffer_image_info, buf.image_stride); + nir_def *buf_addr = + load_info(b, struct vk_meta_copy_buffer_image_info, buf.addr); + nir_def *offset = nir_imul(b, nir_channel(b, coords, 2), buf_img_stride); + unsigned blk_sz = util_format_get_blocksize(pfmt); + + offset = nir_iadd(b, offset, + nir_imul(b, nir_channel(b, coords, 1), buf_row_stride)); + offset = nir_iadd(b, offset, + nir_imul_imm(b, nir_channel(b, coords, 0), blk_sz)); + + return nir_iadd(b, buf_addr, nir_u2u64(b, offset)); +} + +static VkFormat +copy_img_buf_format_for_aspect(const struct vk_meta_copy_image_view *info, + VkImageAspectFlagBits aspect) +{ + if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) { + enum pipe_format pfmt = vk_format_to_pipe_format(info->depth.format); + unsigned num_comps = util_format_get_nr_components(pfmt); + unsigned depth_comp_bits = 0; + + for (unsigned i = 0; i < num_comps; i++) { + if (info->depth.component_mask & BITFIELD_BIT(i)) + depth_comp_bits += util_format_get_component_bits( + pfmt, UTIL_FORMAT_COLORSPACE_RGB, i); + } + + switch (depth_comp_bits) { + case 16: + return VK_FORMAT_R16_UINT; + case 24: + case 32: + return VK_FORMAT_R32_UINT; + default: + assert(!"Unsupported format"); + return VK_FORMAT_UNDEFINED; + } + } else if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) { + return VK_FORMAT_R8_UINT; + } + + enum pipe_format pfmt = vk_format_to_pipe_format(info->color.format); + + switch (util_format_get_blocksize(pfmt)) { + case 1: + return VK_FORMAT_R8_UINT; + case 2: + return VK_FORMAT_R16_UINT; + case 3: + return VK_FORMAT_R8G8B8_UINT; + case 4: + return VK_FORMAT_R32_UINT; + case 6: + return VK_FORMAT_R16G16B16_UINT; + case 8: + return VK_FORMAT_R32G32_UINT; + case 12: + return VK_FORMAT_R32G32B32_UINT; + case 16: + return VK_FORMAT_R32G32B32A32_UINT; + default: + assert(!"Unsupported format"); + return VK_FORMAT_UNDEFINED; + } +} + +static nir_def * +convert_texel(nir_builder *b, VkFormat src_fmt, VkFormat dst_fmt, + nir_def *texel) +{ + enum pipe_format src_pfmt = vk_format_to_pipe_format(src_fmt); + enum pipe_format dst_pfmt = vk_format_to_pipe_format(dst_fmt); + + if (src_pfmt == dst_pfmt) + return texel; + + unsigned src_blksz = util_format_get_blocksize(src_pfmt); + unsigned dst_blksz = util_format_get_blocksize(dst_pfmt); + + nir_def *packed = nir_format_pack_rgba(b, src_pfmt, texel); + + /* Needed for depth/stencil copies where the source/dest formats might + * have a different size. */ + if (src_blksz < dst_blksz) + packed = nir_pad_vector_imm_int(b, packed, 0, 4); + + nir_def *unpacked = nir_format_unpack_rgba(b, packed, dst_pfmt); + + return unpacked; +} + +static nir_def * +place_ds_texel(nir_builder *b, VkFormat fmt, nir_component_mask_t comp_mask, + nir_def *texel) +{ + assert(comp_mask != 0); + + enum pipe_format pfmt = vk_format_to_pipe_format(fmt); + unsigned num_comps = util_format_get_nr_components(pfmt); + + if (comp_mask == nir_component_mask(num_comps)) + return texel; + + assert(num_comps <= 4); + + nir_def *comps[4]; + unsigned c = 0; + + for (unsigned i = 0; i < num_comps; i++) { + if (comp_mask & BITFIELD_BIT(i)) + comps[i] = nir_channel(b, texel, c++); + else + comps[i] = nir_imm_intN_t(b, 0, texel->bit_size); + } + + return nir_vec(b, comps, num_comps); +} + +static nir_deref_instr * +tex_deref(nir_builder *b, const struct vk_meta_copy_image_view *view, + VkImageAspectFlags aspect, VkSampleCountFlagBits samples, + unsigned binding) +{ + VkFormat fmt = copy_img_view_format_for_aspect(view, aspect); + bool is_array = vk_image_view_type_is_array(view->type); + enum glsl_sampler_dim sampler_dim = + samples != VK_SAMPLE_COUNT_1_BIT + ? GLSL_SAMPLER_DIM_MS + : vk_image_view_type_to_sampler_dim(view->type); + enum pipe_format pfmt = vk_format_to_pipe_format(fmt); + enum glsl_base_type base_type = + util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT + : util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT + : GLSL_TYPE_FLOAT; + const char *tex_name; + switch (aspect) { + case VK_IMAGE_ASPECT_COLOR_BIT: + tex_name = "color_tex"; + break; + case VK_IMAGE_ASPECT_DEPTH_BIT: + tex_name = "depth_tex"; + break; + case VK_IMAGE_ASPECT_STENCIL_BIT: + tex_name = "stencil_tex"; + break; + default: + assert(!"Unsupported aspect"); + return NULL; + } + + const struct glsl_type *texture_type = + glsl_sampler_type(sampler_dim, false, is_array, base_type); + nir_variable *texture = + nir_variable_create(b->shader, nir_var_uniform, texture_type, tex_name); + texture->data.descriptor_set = 0; + texture->data.binding = binding; + + return nir_build_deref_var(b, texture); +} + +static nir_deref_instr * +img_deref(nir_builder *b, const struct vk_meta_copy_image_view *view, + VkImageAspectFlags aspect, VkSampleCountFlagBits samples, + unsigned binding) +{ + VkFormat fmt = copy_img_view_format_for_aspect(view, aspect); + bool is_array = vk_image_view_type_is_array(view->type); + enum glsl_sampler_dim sampler_dim = + samples != VK_SAMPLE_COUNT_1_BIT + ? GLSL_SAMPLER_DIM_MS + : vk_image_view_type_to_sampler_dim(view->type); + enum pipe_format pfmt = vk_format_to_pipe_format(fmt); + enum glsl_base_type base_type = + util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT + : util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT + : GLSL_TYPE_FLOAT; + const char *img_name; + switch (aspect) { + case VK_IMAGE_ASPECT_COLOR_BIT: + img_name = "color_img"; + break; + case VK_IMAGE_ASPECT_DEPTH_BIT: + img_name = "depth_img"; + break; + case VK_IMAGE_ASPECT_STENCIL_BIT: + img_name = "stencil_img"; + break; + default: + assert(!"Unsupported aspect"); + return NULL; + } + const struct glsl_type *image_type = + glsl_image_type(sampler_dim, is_array, base_type); + nir_variable *image_var = + nir_variable_create(b->shader, nir_var_uniform, image_type, img_name); + image_var->data.descriptor_set = 0; + image_var->data.binding = binding; + + return nir_build_deref_var(b, image_var); +} + +static nir_def * +read_texel(nir_builder *b, nir_deref_instr *tex_deref, nir_def *coords, + nir_def *sample_id) +{ + return sample_id ? nir_txf_ms_deref(b, tex_deref, coords, sample_id) + : nir_txf_deref(b, tex_deref, coords, NULL); +} + +static nir_variable * +frag_var(nir_builder *b, const struct vk_meta_copy_image_view *view, + VkImageAspectFlags aspect, uint32_t rt) +{ + VkFormat fmt = copy_img_view_format_for_aspect(view, aspect); + enum pipe_format pfmt = vk_format_to_pipe_format(fmt); + enum glsl_base_type base_type = + util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT + : util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT + : GLSL_TYPE_FLOAT; + const struct glsl_type *var_type = glsl_vector_type(base_type, 4); + static const char *var_names[] = { + "gl_FragData[0]", + "gl_FragData[1]", + }; + + assert(rt < ARRAY_SIZE(var_names)); + + nir_variable *var = nir_variable_create(b->shader, nir_var_shader_out, + var_type, var_names[rt]); + var->data.location = FRAG_RESULT_DATA0 + rt; + + return var; +} + +static void +write_frag(nir_builder *b, const struct vk_meta_copy_image_view *view, + VkImageAspectFlags aspect, nir_variable *frag_var, nir_def *frag_val) +{ + nir_component_mask_t comp_mask; + + if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + VkFormat fmt = copy_img_view_format_for_aspect(view, aspect); + + comp_mask = aspect == VK_IMAGE_ASPECT_DEPTH_BIT + ? view->depth.component_mask + : view->stencil.component_mask; + frag_val = place_ds_texel(b, fmt, comp_mask, frag_val); + } else { + comp_mask = nir_component_mask(4); + } + + if (frag_val->bit_size != 32) { + switch (glsl_get_base_type(frag_var->type)) { + case GLSL_TYPE_INT: + frag_val = nir_i2i32(b, frag_val); + break; + case GLSL_TYPE_UINT: + frag_val = nir_u2u32(b, frag_val); + break; + case GLSL_TYPE_FLOAT: + frag_val = nir_f2f32(b, frag_val); + break; + default: + assert(!"Invalid type"); + frag_val = NULL; + break; + } + } + + frag_val = nir_pad_vector_imm_int(b, frag_val, 0, 4); + + nir_store_var(b, frag_var, frag_val, comp_mask); +} + +static void +write_img(nir_builder *b, const struct vk_meta_copy_image_view *view, + VkImageAspectFlags aspect, VkSampleCountFlagBits samples, + nir_deref_instr *img_deref, nir_def *coords, nir_def *sample_id, + nir_def *val) +{ + VkFormat fmt = copy_img_view_format_for_aspect(view, aspect); + enum pipe_format pfmt = vk_format_to_pipe_format(fmt); + enum glsl_base_type base_type = + util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT + : util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT + : GLSL_TYPE_FLOAT; + enum glsl_sampler_dim sampler_dim = + samples != VK_SAMPLE_COUNT_1_BIT + ? GLSL_SAMPLER_DIM_MS + : vk_image_view_type_to_sampler_dim(view->type); + bool is_array = vk_image_view_type_is_array(view->type); + + if (!sample_id) { + assert(samples == VK_SAMPLE_COUNT_1_BIT); + sample_id = nir_imm_int(b, 0); + } + + unsigned access_flags = ACCESS_NON_READABLE; + nir_def *zero_lod = nir_imm_int(b, 0); + + if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + nir_component_mask_t comp_mask = aspect == VK_IMAGE_ASPECT_DEPTH_BIT + ? view->depth.component_mask + : view->stencil.component_mask; + unsigned num_comps = util_format_get_nr_components(pfmt); + + val = place_ds_texel(b, fmt, comp_mask, val); + + if (comp_mask != nir_component_mask(num_comps)) { + nir_def *comps[4]; + access_flags = 0; + + nir_def *old_val = nir_image_deref_load(b, + val->num_components, val->bit_size, &img_deref->def, coords, + sample_id, zero_lod, .image_dim = sampler_dim, + .image_array = is_array, .format = pfmt, .access = access_flags, + .dest_type = nir_get_nir_type_for_glsl_base_type(base_type)); + + for (unsigned i = 0; i < val->num_components; i++) { + if (comp_mask & BITFIELD_BIT(i)) + comps[i] = nir_channel(b, val, i); + else + comps[i] = nir_channel(b, old_val, i); + } + + val = nir_vec(b, comps, val->num_components); + } + } + + nir_image_deref_store(b, + &img_deref->def, coords, sample_id, val, zero_lod, + .image_dim = sampler_dim, .image_array = is_array, .format = pfmt, + .access = access_flags, + .src_type = nir_get_nir_type_for_glsl_base_type(base_type)); +} + +static nir_shader * +build_image_to_buffer_shader(const struct vk_meta_device *meta, + const void *key_data) +{ + const struct vk_meta_copy_buffer_image_key *key = key_data; + + assert(key->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE); + + nir_builder builder = nir_builder_init_simple_shader( + MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-image-to-buffer"); + nir_builder *b = &builder; + + b->shader->info.workgroup_size[0] = key->wg_size[0]; + b->shader->info.workgroup_size[1] = key->wg_size[1]; + b->shader->info.workgroup_size[2] = key->wg_size[2]; + + VkFormat buf_fmt = + copy_img_buf_format_for_aspect(&key->img.view, key->img.aspect); + enum pipe_format buf_pfmt = vk_format_to_pipe_format(buf_fmt); + + nir_def *copy_id = nir_load_global_invocation_id(b, 32); + nir_def *copy_id_start = + nir_vec3(b, + load_info(b, struct vk_meta_copy_buffer_image_info, + copy_id_range.start.x), + load_info(b, struct vk_meta_copy_buffer_image_info, + copy_id_range.start.y), + load_info(b, struct vk_meta_copy_buffer_image_info, + copy_id_range.start.z)); + nir_def *copy_id_end = nir_vec3(b, + load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.x), + load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.y), + load_info(b, struct vk_meta_copy_buffer_image_info, + copy_id_range.end.z)); + + nir_def *in_bounds = + nir_iand(b, nir_ball(b, nir_uge(b, copy_id, copy_id_start)), + nir_ball(b, nir_ult(b, copy_id, copy_id_end))); + + nir_push_if(b, in_bounds); + + copy_id = nir_isub(b, copy_id, copy_id_start); + + nir_def *img_offs = nir_vec3(b, + load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.x), + load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.y), + load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.z)); + + nir_def *img_coords = + trim_img_coords(b, key->img.view.type, nir_iadd(b, copy_id, img_offs)); + + VkFormat iview_fmt = + copy_img_view_format_for_aspect(&key->img.view, key->img.aspect); + nir_deref_instr *tex = + tex_deref(b, &key->img.view, key->img.aspect, VK_SAMPLE_COUNT_1_BIT, 0); + nir_def *texel = read_texel(b, tex, img_coords, NULL); + + texel = convert_texel(b, iview_fmt, buf_fmt, texel); + + unsigned blk_sz = util_format_get_blocksize(buf_pfmt); + unsigned comp_count = util_format_get_nr_components(buf_pfmt); + assert(blk_sz % comp_count == 0); + unsigned comp_sz = (blk_sz / comp_count) * 8; + + /* nir_format_unpack() (which is called in convert_texel()) always + * returns a 32-bit result, which we might have to downsize to match + * the component size we want, hence the u2uN(). + */ + texel = nir_u2uN(b, texel, comp_sz); + + /* nir_format_unpack_rgba() (which is called from convert_texel()) returns + * a vec4, which means we might have more components than we need, but + * that's fine because we pass a write_mask to store_global. + */ + assert(texel->num_components >= comp_count); + nir_store_global(b, copy_img_buf_addr(b, buf_pfmt, copy_id), + comp_sz / 8, texel, nir_component_mask(comp_count)); + + nir_pop_if(b, NULL); + + return b->shader; +} + +static VkResult +get_copy_image_to_buffer_pipeline( + struct vk_device *device, struct vk_meta_device *meta, + const struct vk_meta_copy_buffer_image_key *key, + VkPipelineLayout *layout_out, VkPipeline *pipeline_out) +{ + const VkDescriptorSetLayoutBinding bindings[] = { + COPY_SHADER_BINDING(0, SAMPLED_IMAGE, COMPUTE), + }; + + VkResult result = get_copy_pipeline_layout( + device, meta, "vk-meta-copy-image-to-buffer-pipeline-layout", + VK_SHADER_STAGE_COMPUTE_BIT, + sizeof(struct vk_meta_copy_buffer_image_info), bindings, + ARRAY_SIZE(bindings), layout_out); + + if (unlikely(result != VK_SUCCESS)) + return result; + + return get_compute_copy_pipeline(device, meta, *layout_out, + build_image_to_buffer_shader, key, + sizeof(*key), pipeline_out); +} + +static nir_shader * +build_buffer_to_image_fs(const struct vk_meta_device *meta, + const void *key_data) +{ + const struct vk_meta_copy_buffer_image_key *key = key_data; + + assert(key->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS); + + nir_builder builder = nir_builder_init_simple_shader( + MESA_SHADER_FRAGMENT, NULL, "vk-meta-copy-buffer-to-image-frag"); + nir_builder *b = &builder; + + VkFormat buf_fmt = + copy_img_buf_format_for_aspect(&key->img.view, key->img.aspect); + + enum pipe_format buf_pfmt = vk_format_to_pipe_format(buf_fmt); + nir_def *out_coord_xy = nir_f2u32(b, nir_load_frag_coord(b)); + nir_def *out_layer = nir_load_layer_id(b); + + nir_def *img_offs = nir_vec3(b, + load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.x), + load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.y), + load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.z)); + + /* Move the layer ID to the second coordinate if we're dealing with a 1D + * array, as this is where the texture instruction expects it. */ + nir_def *coords = key->img.view.type == VK_IMAGE_VIEW_TYPE_1D_ARRAY + ? nir_vec3(b, nir_channel(b, out_coord_xy, 0), + out_layer, nir_imm_int(b, 0)) + : nir_vec3(b, nir_channel(b, out_coord_xy, 0), + nir_channel(b, out_coord_xy, 1), out_layer); + + unsigned blk_sz = util_format_get_blocksize(buf_pfmt); + unsigned comp_count = util_format_get_nr_components(buf_pfmt); + assert(blk_sz % comp_count == 0); + unsigned comp_sz = (blk_sz / comp_count) * 8; + + coords = nir_isub(b, coords, img_offs); + + nir_def *texel = nir_build_load_global(b, + comp_count, comp_sz, copy_img_buf_addr(b, buf_pfmt, coords), + .align_mul = 1 << (ffs(blk_sz) - 1)); + + /* We don't do compressed formats. The driver should select a non-compressed + * format with the same block size. */ + assert(!util_format_is_compressed(buf_pfmt)); + + VkFormat iview_fmt = + copy_img_view_format_for_aspect(&key->img.view, key->img.aspect); + nir_variable *out_var = frag_var(b, &key->img.view, key->img.aspect, 0); + + texel = convert_texel(b, buf_fmt, iview_fmt, texel); + write_frag(b, &key->img.view, key->img.aspect, out_var, texel); + return b->shader; +} + +static VkResult +get_copy_buffer_to_image_gfx_pipeline( + struct vk_device *device, struct vk_meta_device *meta, + const struct vk_meta_copy_buffer_image_key *key, + VkPipelineLayout *layout_out, VkPipeline *pipeline_out) +{ + VkResult result = get_copy_pipeline_layout( + device, meta, "vk-meta-copy-buffer-to-image-gfx-pipeline-layout", + VK_SHADER_STAGE_FRAGMENT_BIT, + sizeof(struct vk_meta_copy_buffer_image_info), NULL, 0, layout_out); + + if (unlikely(result != VK_SUCCESS)) + return result; + + return get_gfx_copy_pipeline(device, meta, *layout_out, + VK_SAMPLE_COUNT_1_BIT, build_buffer_to_image_fs, + key->img.aspect, &key->img.view, key, + sizeof(*key), pipeline_out); +} + +static nir_shader * +build_buffer_to_image_cs(const struct vk_meta_device *meta, + const void *key_data) +{ + const struct vk_meta_copy_buffer_image_key *key = key_data; + + assert(key->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE); + + nir_builder builder = nir_builder_init_simple_shader( + MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-buffer-to-image-compute"); + nir_builder *b = &builder; + + b->shader->info.workgroup_size[0] = key->wg_size[0]; + b->shader->info.workgroup_size[1] = key->wg_size[1]; + b->shader->info.workgroup_size[2] = key->wg_size[2]; + + VkFormat buf_fmt = + copy_img_buf_format_for_aspect(&key->img.view, key->img.aspect); + VkFormat img_fmt = + copy_img_view_format_for_aspect(&key->img.view, key->img.aspect); + enum pipe_format buf_pfmt = vk_format_to_pipe_format(buf_fmt); + nir_deref_instr *image_deref = + img_deref(b, &key->img.view, key->img.aspect, VK_SAMPLE_COUNT_1_BIT, 0); + + nir_def *copy_id = nir_load_global_invocation_id(b, 32); + nir_def *copy_id_start = + nir_vec3(b, + load_info(b, struct vk_meta_copy_buffer_image_info, + copy_id_range.start.x), + load_info(b, struct vk_meta_copy_buffer_image_info, + copy_id_range.start.y), + load_info(b, struct vk_meta_copy_buffer_image_info, + copy_id_range.start.z)); + nir_def *copy_id_end = nir_vec3(b, + load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.x), + load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.y), + load_info(b, struct vk_meta_copy_buffer_image_info, + copy_id_range.end.z)); + + nir_def *in_bounds = + nir_iand(b, nir_ball(b, nir_uge(b, copy_id, copy_id_start)), + nir_ball(b, nir_ult(b, copy_id, copy_id_end))); + + nir_push_if(b, in_bounds); + + /* Adjust the copy ID such that we can directly deduce the image coords and + * buffer offset from it. */ + copy_id = nir_isub(b, copy_id, copy_id_start); + + nir_def *img_offs = nir_vec3(b, + load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.x), + load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.y), + load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.z)); + + nir_def *img_coords = + trim_img_coords(b, key->img.view.type, nir_iadd(b, copy_id, img_offs)); + + img_coords = nir_pad_vector_imm_int(b, img_coords, 0, 4); + + unsigned blk_sz = util_format_get_blocksize(buf_pfmt); + unsigned bit_sz = blk_sz & 1 ? 8 : blk_sz & 2 ? 16 : 32; + unsigned comp_count = blk_sz * 8 / bit_sz; + + nir_def *texel = nir_build_load_global(b, + comp_count, bit_sz, copy_img_buf_addr(b, buf_pfmt, copy_id), + .align_mul = 1 << (ffs(blk_sz) - 1)); + + texel = convert_texel(b, buf_fmt, img_fmt, texel); + + write_img(b, &key->img.view, key->img.aspect, VK_SAMPLE_COUNT_1_BIT, + image_deref, img_coords, NULL, texel); + + nir_pop_if(b, NULL); + + return b->shader; +} + +static VkResult +get_copy_buffer_to_image_compute_pipeline( + struct vk_device *device, struct vk_meta_device *meta, + const struct vk_meta_copy_buffer_image_key *key, + VkPipelineLayout *layout_out, VkPipeline *pipeline_out) +{ + const VkDescriptorSetLayoutBinding bindings[] = { + COPY_SHADER_BINDING(0, STORAGE_IMAGE, COMPUTE), + }; + + VkResult result = get_copy_pipeline_layout( + device, meta, "vk-meta-copy-buffer-to-image-compute-pipeline-layout", + VK_SHADER_STAGE_COMPUTE_BIT, + sizeof(struct vk_meta_copy_buffer_image_info), bindings, + ARRAY_SIZE(bindings), layout_out); + + if (unlikely(result != VK_SUCCESS)) + return result; + + return get_compute_copy_pipeline(device, meta, *layout_out, + build_buffer_to_image_cs, key, sizeof(*key), + pipeline_out); +} + +static VkResult +copy_buffer_image_prepare_gfx_push_const( + struct vk_command_buffer *cmd, struct vk_meta_device *meta, + const struct vk_meta_copy_buffer_image_key *key, + VkPipelineLayout pipeline_layout, VkBuffer buffer, + const struct vk_image_buffer_layout *buf_layout, struct vk_image *img, + const VkBufferImageCopy2 *region) +{ + struct vk_device *dev = cmd->base.device; + const struct vk_device_dispatch_table *disp = &dev->dispatch_table; + uint32_t depth_or_layer_count = + MAX2(region->imageExtent.depth, + vk_image_subresource_layer_count(img, ®ion->imageSubresource)); + VkImageViewType img_view_type = + vk_image_render_view_type(img, depth_or_layer_count); + VkOffset3D img_offs = + base_layer_as_offset(img_view_type, region->imageOffset, + region->imageSubresource.baseArrayLayer); + + /* vk_meta_copy_buffer_image_info::image_stride is 32-bit for now. + * We might want to make it a 64-bit integer (and patch the shader code + * accordingly) if that becomes a limiting factor for vk_meta_copy users. + */ + assert(buf_layout->image_stride_B <= UINT32_MAX); + + struct vk_meta_copy_buffer_image_info info = { + .buf = { + .row_stride = buf_layout->row_stride_B, + .image_stride = buf_layout->image_stride_B, + .addr = vk_meta_buffer_address(dev, buffer, region->bufferOffset, + VK_WHOLE_SIZE), + }, + .img.offset = { + .x = img_offs.x, + .y = img_offs.y, + .z = img_offs.z, + }, + }; + + disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout, + VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(info), &info); + return VK_SUCCESS; +} + +static VkResult +copy_buffer_image_prepare_compute_push_const( + struct vk_command_buffer *cmd, struct vk_meta_device *meta, + const struct vk_meta_copy_buffer_image_key *key, + VkPipelineLayout pipeline_layout, VkBuffer buffer, + const struct vk_image_buffer_layout *buf_layout, struct vk_image *img, + const VkBufferImageCopy2 *region, uint32_t *wg_count) +{ + struct vk_device *dev = cmd->base.device; + const struct vk_device_dispatch_table *disp = &dev->dispatch_table; + VkImageViewType img_view_type = key->img.view.type; + VkOffset3D img_offs = + base_layer_as_offset(img_view_type, region->imageOffset, + region->imageSubresource.baseArrayLayer); + uint32_t layer_count = + vk_image_subresource_layer_count(img, ®ion->imageSubresource); + VkExtent3D img_extent = + layer_count_as_extent(img_view_type, region->imageExtent, layer_count); + + struct vk_meta_copy_buffer_image_info info = { + .buf = { + .row_stride = buf_layout->row_stride_B, + .image_stride = buf_layout->image_stride_B, + .addr = vk_meta_buffer_address(dev, buffer, region->bufferOffset, + VK_WHOLE_SIZE), + }, + .img.offset = { + .x = img_offs.x, + .y = img_offs.y, + .z = img_offs.z, + }, + }; + + info.copy_id_range.start.x = img_offs.x % key->wg_size[0]; + info.copy_id_range.start.y = img_offs.y % key->wg_size[1]; + info.copy_id_range.start.z = img_offs.z % key->wg_size[2]; + info.copy_id_range.end.x = info.copy_id_range.start.x + img_extent.width; + info.copy_id_range.end.y = info.copy_id_range.start.y + img_extent.height; + info.copy_id_range.end.z = info.copy_id_range.start.z + img_extent.depth; + wg_count[0] = DIV_ROUND_UP(info.copy_id_range.end.x, key->wg_size[0]); + wg_count[1] = DIV_ROUND_UP(info.copy_id_range.end.y, key->wg_size[1]); + wg_count[2] = DIV_ROUND_UP(info.copy_id_range.end.z, key->wg_size[2]); + + disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(info), &info); + return VK_SUCCESS; +} + +static bool +format_is_supported(VkFormat fmt) +{ + enum pipe_format pfmt = vk_format_to_pipe_format(fmt); + const struct util_format_description *fdesc = util_format_description(pfmt); + + /* We only support RGB formats in the copy path to keep things simple. */ + return fdesc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || + fdesc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB; +} + +static struct vk_meta_copy_image_view +img_copy_view_info(VkImageViewType view_type, VkImageAspectFlags aspects, + const struct vk_image *img, + const struct vk_meta_copy_image_properties *img_props) +{ + struct vk_meta_copy_image_view view = { + .type = view_type, + }; + + /* We only support color/depth/stencil aspects. */ + assert(aspects & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + + if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) { + /* Color aspect can't be combined with other aspects. */ + assert(!(aspects & ~VK_IMAGE_ASPECT_COLOR_BIT)); + view.color.format = img_props->color.view_format; + assert(format_is_supported(view.color.format)); + return view; + } + + + view.depth.format = img_props->depth.view_format; + view.depth.component_mask = img_props->depth.component_mask; + view.stencil.format = img_props->stencil.view_format; + view.stencil.component_mask = img_props->stencil.component_mask; + + assert(view.depth.format == VK_FORMAT_UNDEFINED || + format_is_supported(view.depth.format)); + assert(view.stencil.format == VK_FORMAT_UNDEFINED || + format_is_supported(view.stencil.format)); + return view; +} + +static void +copy_image_to_buffer_region( + struct vk_command_buffer *cmd, struct vk_meta_device *meta, + struct vk_image *img, VkImageLayout img_layout, + const struct vk_meta_copy_image_properties *img_props, VkBuffer buffer, + const struct vk_image_buffer_layout *buf_layout, + const VkBufferImageCopy2 *region) +{ + struct vk_device *dev = cmd->base.device; + const struct vk_device_dispatch_table *disp = &dev->dispatch_table; + struct vk_meta_copy_buffer_image_key key = { + .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE, + .bind_point = VK_PIPELINE_BIND_POINT_COMPUTE, + .img = { + .view = img_copy_view_info(vk_image_sampled_view_type(img), + region->imageSubresource.aspectMask, img, + img_props), + .aspect = region->imageSubresource.aspectMask, + }, + .wg_size = { + img_props->tile_size.width, + img_props->tile_size.height, + img_props->tile_size.depth, + }, + }; + + VkPipelineLayout pipeline_layout; + VkPipeline pipeline; + VkResult result = get_copy_image_to_buffer_pipeline( + dev, meta, &key, &pipeline_layout, &pipeline); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd), + VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + + VkImageView iview; + result = copy_create_src_image_view(cmd, meta, img, &key.img.view, + region->imageSubresource.aspectMask, + ®ion->imageSubresource, &iview); + + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + const VkWriteDescriptorSet descs[] = { + COPY_PUSH_SET_IMG_DESC(0, SAMPLED, iview, img_layout), + }; + + disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd), + VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline_layout, 0, ARRAY_SIZE(descs), descs); + + uint32_t wg_count[3] = {0}; + + result = copy_buffer_image_prepare_compute_push_const( + cmd, meta, &key, pipeline_layout, buffer, buf_layout, img, region, + wg_count); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count[0], wg_count[1], + wg_count[2]); +} + +void +vk_meta_copy_image_to_buffer( + struct vk_command_buffer *cmd, struct vk_meta_device *meta, + const VkCopyImageToBufferInfo2 *info, + const struct vk_meta_copy_image_properties *img_props) +{ + VK_FROM_HANDLE(vk_image, img, info->srcImage); + + for (uint32_t i = 0; i < info->regionCount; i++) { + VkBufferImageCopy2 region = info->pRegions[i]; + struct vk_image_buffer_layout buf_layout = + vk_image_buffer_copy_layout(img, ®ion); + + region.imageExtent = vk_image_extent_to_elements(img, region.imageExtent); + region.imageOffset = vk_image_offset_to_elements(img, region.imageOffset); + + copy_image_to_buffer_region(cmd, meta, img, info->srcImageLayout, + img_props, info->dstBuffer, &buf_layout, + ®ion); + } +} + +static void +copy_draw(struct vk_command_buffer *cmd, struct vk_meta_device *meta, + struct vk_image *dst_img, VkImageLayout dst_img_layout, + const VkImageSubresourceLayers *dst_img_subres, + const VkOffset3D *dst_img_offset, const VkExtent3D *copy_extent, + const struct vk_meta_copy_image_view *view_info) +{ + struct vk_device *dev = cmd->base.device; + const struct vk_device_dispatch_table *disp = &dev->dispatch_table; + uint32_t depth_or_layer_count = + MAX2(copy_extent->depth, + vk_image_subresource_layer_count(dst_img, dst_img_subres)); + struct vk_meta_rect rect = { + .x0 = dst_img_offset->x, + .x1 = dst_img_offset->x + copy_extent->width, + .y0 = dst_img_offset->y, + .y1 = dst_img_offset->y + copy_extent->height, + }; + VkRenderingAttachmentInfo vk_atts[2]; + VkRenderingInfo vk_render = { + .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, + .renderArea = { + .offset = { + dst_img_offset->x, + dst_img_offset->y, + }, + .extent = { + copy_extent->width, + copy_extent->height, + }, + }, + .layerCount = depth_or_layer_count, + .pColorAttachments = vk_atts, + }; + VkImageView iview = VK_NULL_HANDLE; + + u_foreach_bit(a, dst_img_subres->aspectMask) { + VkImageAspectFlagBits aspect = 1 << a; + + if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && iview != VK_NULL_HANDLE && + depth_stencil_interleaved(view_info)) + continue; + + VkResult result = copy_create_dst_image_view( + cmd, meta, dst_img, view_info, aspect, dst_img_offset, copy_extent, + dst_img_subres, VK_PIPELINE_BIND_POINT_GRAPHICS, &iview); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + vk_atts[vk_render.colorAttachmentCount] = (VkRenderingAttachmentInfo){ + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + .imageView = iview, + .imageLayout = dst_img_layout, + .loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + }; + + /* If we have interleaved depth/stencil and only one aspect is copied, we + * need to load the attachment to preserve the other component. */ + if (vk_format_has_depth(dst_img->format) && + vk_format_has_stencil(dst_img->format) && + depth_stencil_interleaved(view_info) && + (dst_img_subres->aspectMask != + (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) { + vk_atts[vk_render.colorAttachmentCount].loadOp = + VK_ATTACHMENT_LOAD_OP_LOAD; + } + + vk_render.colorAttachmentCount++; + } + + disp->CmdBeginRendering(vk_command_buffer_to_handle(cmd), &vk_render); + meta->cmd_draw_volume(cmd, meta, &rect, vk_render.layerCount); + disp->CmdEndRendering(vk_command_buffer_to_handle(cmd)); +} + +static void +copy_buffer_to_image_region_gfx( + struct vk_command_buffer *cmd, struct vk_meta_device *meta, + struct vk_image *img, VkImageLayout img_layout, + const struct vk_meta_copy_image_properties *img_props, VkBuffer buffer, + const struct vk_image_buffer_layout *buf_layout, + const VkBufferImageCopy2 *region) +{ + struct vk_device *dev = cmd->base.device; + const struct vk_device_dispatch_table *disp = &dev->dispatch_table; + + /* We only special-case 1D_ARRAY to move the layer ID to the second + * component instead of the third. For all other view types, let's pick an + * invalid VkImageViewType value so we don't end up creating the same + * pipeline multiple times. */ + VkImageViewType view_type = + img->image_type == VK_IMAGE_TYPE_1D && img->array_layers > 1 + ? VK_IMAGE_VIEW_TYPE_1D_ARRAY + : (VkImageViewType)-1; + + struct vk_meta_copy_buffer_image_key key = { + .key_type = VK_META_OBJECT_KEY_COPY_BUFFER_TO_IMAGE_PIPELINE, + .bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS, + .img = { + .view = img_copy_view_info(view_type, + region->imageSubresource.aspectMask, img, + img_props), + .aspect = region->imageSubresource.aspectMask, + }, + }; + + VkPipelineLayout pipeline_layout; + VkPipeline pipeline; + VkResult result = get_copy_buffer_to_image_gfx_pipeline( + dev, meta, &key, &pipeline_layout, &pipeline); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd), + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + + result = copy_buffer_image_prepare_gfx_push_const( + cmd, meta, &key, pipeline_layout, buffer, buf_layout, img, region); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + copy_draw(cmd, meta, img, img_layout, ®ion->imageSubresource, + ®ion->imageOffset, ®ion->imageExtent, &key.img.view); +} + +static void +copy_buffer_to_image_region_compute( + struct vk_command_buffer *cmd, struct vk_meta_device *meta, + struct vk_image *img, VkImageLayout img_layout, + const struct vk_meta_copy_image_properties *img_props, VkBuffer buffer, + const struct vk_image_buffer_layout *buf_layout, + const VkBufferImageCopy2 *region) +{ + struct vk_device *dev = cmd->base.device; + const struct vk_device_dispatch_table *disp = &dev->dispatch_table; + VkImageViewType view_type = vk_image_storage_view_type(img); + struct vk_meta_copy_buffer_image_key key = { + .key_type = VK_META_OBJECT_KEY_COPY_BUFFER_TO_IMAGE_PIPELINE, + .bind_point = VK_PIPELINE_BIND_POINT_COMPUTE, + .img = { + .view = img_copy_view_info(view_type, + region->imageSubresource.aspectMask, img, + img_props), + .aspect = region->imageSubresource.aspectMask, + }, + .wg_size = { + img_props->tile_size.width, + img_props->tile_size.height, + img_props->tile_size.depth, + }, + }; + + VkPipelineLayout pipeline_layout; + VkPipeline pipeline; + VkResult result = get_copy_buffer_to_image_compute_pipeline( + dev, meta, &key, &pipeline_layout, &pipeline); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd), + VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + + VkImageView iview; + result = copy_create_dst_image_view( + cmd, meta, img, &key.img.view, region->imageSubresource.aspectMask, + ®ion->imageOffset, ®ion->imageExtent, ®ion->imageSubresource, + VK_PIPELINE_BIND_POINT_COMPUTE, &iview); + + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + const VkWriteDescriptorSet descs[] = { + COPY_PUSH_SET_IMG_DESC(0, STORAGE, iview, img_layout), + }; + + disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd), + VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline_layout, 0, ARRAY_SIZE(descs), descs); + + uint32_t wg_count[3] = {0}; + + result = copy_buffer_image_prepare_compute_push_const( + cmd, meta, &key, pipeline_layout, buffer, buf_layout, img, region, + wg_count); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + disp->CmdDispatch(vk_command_buffer_to_handle(cmd), + wg_count[0], wg_count[1], wg_count[2]); +} + +void +vk_meta_copy_buffer_to_image( + struct vk_command_buffer *cmd, struct vk_meta_device *meta, + const VkCopyBufferToImageInfo2 *info, + const struct vk_meta_copy_image_properties *img_props, + VkPipelineBindPoint bind_point) +{ + VK_FROM_HANDLE(vk_image, img, info->dstImage); + + for (uint32_t i = 0; i < info->regionCount; i++) { + VkBufferImageCopy2 region = info->pRegions[i]; + struct vk_image_buffer_layout buf_layout = + vk_image_buffer_copy_layout(img, ®ion); + + region.imageExtent = vk_image_extent_to_elements(img, region.imageExtent); + region.imageOffset = vk_image_offset_to_elements(img, region.imageOffset); + + if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { + copy_buffer_to_image_region_gfx(cmd, meta, img, info->dstImageLayout, + img_props, info->srcBuffer, + &buf_layout, ®ion); + } else { + copy_buffer_to_image_region_compute(cmd, meta, img, + info->dstImageLayout, img_props, + info->srcBuffer, &buf_layout, + ®ion); + } + } +} + +static nir_shader * +build_copy_image_fs(const struct vk_meta_device *meta, const void *key_data) +{ + const struct vk_meta_copy_image_key *key = key_data; + + assert(key->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS); + + nir_builder builder = nir_builder_init_simple_shader( + MESA_SHADER_FRAGMENT, NULL, "vk-meta-copy-image-frag"); + nir_builder *b = &builder; + + b->shader->info.fs.uses_sample_shading = + key->samples != VK_SAMPLE_COUNT_1_BIT; + + nir_def *out_coord_xy = nir_f2u32(b, nir_load_frag_coord(b)); + nir_def *out_layer = nir_load_layer_id(b); + + nir_def *src_offset = nir_vec3(b, + load_info(b, struct vk_meta_copy_image_fs_info, dst_to_src_offs.x), + load_info(b, struct vk_meta_copy_image_fs_info, dst_to_src_offs.y), + load_info(b, struct vk_meta_copy_image_fs_info, dst_to_src_offs.z)); + + /* Move the layer ID to the second coordinate if we're dealing with a 1D + * array, as this is where the texture instruction expects it. */ + nir_def *src_coords = + key->dst.view.type == VK_IMAGE_VIEW_TYPE_1D_ARRAY + ? nir_vec3(b, nir_channel(b, out_coord_xy, 0), out_layer, + nir_imm_int(b, 0)) + : nir_vec3(b, nir_channel(b, out_coord_xy, 0), + nir_channel(b, out_coord_xy, 1), out_layer); + + src_coords = trim_img_coords(b, key->src.view.type, + nir_iadd(b, src_coords, src_offset)); + + nir_def *sample_id = + key->samples != VK_SAMPLE_COUNT_1_BIT ? nir_load_sample_id(b) : NULL; + nir_variable *color_var = NULL; + uint32_t tex_binding = 0; + + u_foreach_bit(a, key->aspects) { + VkImageAspectFlagBits aspect = 1 << a; + VkFormat src_fmt = + copy_img_view_format_for_aspect(&key->src.view, aspect); + VkFormat dst_fmt = + copy_img_view_format_for_aspect(&key->dst.view, aspect); + nir_deref_instr *tex = + tex_deref(b, &key->src.view, aspect, key->samples, tex_binding++); + nir_def *texel = read_texel(b, tex, src_coords, sample_id); + + if (!color_var || !depth_stencil_interleaved(&key->dst.view)) { + color_var = + frag_var(b, &key->dst.view, aspect, color_var != NULL ? 1 : 0); + } + + texel = convert_texel(b, src_fmt, dst_fmt, texel); + write_frag(b, &key->dst.view, aspect, color_var, texel); + } + + return b->shader; +} + +static VkResult +get_copy_image_gfx_pipeline(struct vk_device *device, + struct vk_meta_device *meta, + const struct vk_meta_copy_image_key *key, + VkPipelineLayout *layout_out, + VkPipeline *pipeline_out) +{ + const struct VkDescriptorSetLayoutBinding bindings[] = { + COPY_SHADER_BINDING(0, SAMPLED_IMAGE, FRAGMENT), + COPY_SHADER_BINDING(1, SAMPLED_IMAGE, FRAGMENT), + }; + + VkResult result = get_copy_pipeline_layout( + device, meta, "vk-meta-copy-image-gfx-pipeline-layout", + VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(struct vk_meta_copy_image_fs_info), + bindings, ARRAY_SIZE(bindings), layout_out); + if (unlikely(result != VK_SUCCESS)) + return result; + + return get_gfx_copy_pipeline( + device, meta, *layout_out, key->samples, build_copy_image_fs, + key->aspects, &key->dst.view, key, sizeof(*key), pipeline_out); +} + +static nir_shader * +build_copy_image_cs(const struct vk_meta_device *meta, const void *key_data) +{ + const struct vk_meta_copy_image_key *key = key_data; + + assert(key->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE); + + nir_builder builder = nir_builder_init_simple_shader( + MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-image-compute"); + nir_builder *b = &builder; + + b->shader->info.workgroup_size[0] = key->wg_size[0]; + b->shader->info.workgroup_size[1] = key->wg_size[1]; + b->shader->info.workgroup_size[2] = key->wg_size[2]; + + nir_def *copy_id = nir_load_global_invocation_id(b, 32); + nir_def *copy_id_start = nir_vec3(b, + load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.start.x), + load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.start.y), + load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.start.z)); + nir_def *copy_id_end = nir_vec3(b, + load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.end.x), + load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.end.y), + load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.end.z)); + + nir_def *in_bounds = + nir_iand(b, nir_ball(b, nir_uge(b, copy_id, copy_id_start)), + nir_ball(b, nir_ult(b, copy_id, copy_id_end))); + + nir_push_if(b, in_bounds); + + nir_def *src_offset = nir_vec3(b, + load_info(b, struct vk_meta_copy_image_cs_info, src_img.offset.x), + load_info(b, struct vk_meta_copy_image_cs_info, src_img.offset.y), + load_info(b, struct vk_meta_copy_image_cs_info, src_img.offset.z)); + nir_def *dst_offset = nir_vec3(b, + load_info(b, struct vk_meta_copy_image_cs_info, dst_img.offset.x), + load_info(b, struct vk_meta_copy_image_cs_info, dst_img.offset.y), + load_info(b, struct vk_meta_copy_image_cs_info, dst_img.offset.z)); + + nir_def *src_coords = trim_img_coords(b, key->src.view.type, + nir_iadd(b, copy_id, src_offset)); + nir_def *dst_coords = trim_img_coords(b, key->dst.view.type, + nir_iadd(b, copy_id, dst_offset)); + + dst_coords = nir_pad_vector_imm_int(b, dst_coords, 0, 4); + + uint32_t binding = 0; + u_foreach_bit(a, key->aspects) { + VkImageAspectFlagBits aspect = 1 << a; + VkFormat src_fmt = + copy_img_view_format_for_aspect(&key->src.view, aspect); + VkFormat dst_fmt = + copy_img_view_format_for_aspect(&key->dst.view, aspect); + nir_deref_instr *tex = + tex_deref(b, &key->src.view, aspect, key->samples, binding); + nir_deref_instr *img = + img_deref(b, &key->dst.view, aspect, key->samples, binding + 1); + + for (uint32_t s = 0; s < key->samples; s++) { + nir_def *sample_id = + key->samples == VK_SAMPLE_COUNT_1_BIT ? NULL : nir_imm_int(b, s); + nir_def *texel = read_texel(b, tex, src_coords, sample_id); + + texel = convert_texel(b, src_fmt, dst_fmt, texel); + write_img(b, &key->dst.view, aspect, key->samples, img, dst_coords, + sample_id, texel); + } + + binding += 2; + } + + nir_pop_if(b, NULL); + + return b->shader; +} + +static VkResult +get_copy_image_compute_pipeline(struct vk_device *device, + struct vk_meta_device *meta, + const struct vk_meta_copy_image_key *key, + VkPipelineLayout *layout_out, + VkPipeline *pipeline_out) +{ + const VkDescriptorSetLayoutBinding bindings[] = { + COPY_SHADER_BINDING(0, SAMPLED_IMAGE, COMPUTE), + COPY_SHADER_BINDING(1, STORAGE_IMAGE, COMPUTE), + COPY_SHADER_BINDING(2, SAMPLED_IMAGE, COMPUTE), + COPY_SHADER_BINDING(3, STORAGE_IMAGE, COMPUTE), + }; + + VkResult result = get_copy_pipeline_layout( + device, meta, "vk-meta-copy-image-compute-pipeline-layout", + VK_SHADER_STAGE_COMPUTE_BIT, sizeof(struct vk_meta_copy_image_cs_info), + bindings, ARRAY_SIZE(bindings), layout_out); + + if (unlikely(result != VK_SUCCESS)) + return result; + + return get_compute_copy_pipeline(device, meta, *layout_out, + build_copy_image_cs, key, sizeof(*key), + pipeline_out); +} + +static VkResult +copy_image_prepare_gfx_desc_set( + struct vk_command_buffer *cmd, struct vk_meta_device *meta, + const struct vk_meta_copy_image_key *key, VkPipelineLayout pipeline_layout, + struct vk_image *src_img, VkImageLayout src_img_layout, + struct vk_image *dst_img, VkImageLayout dst_img_layout, + const VkImageCopy2 *region) +{ + struct vk_device *dev = cmd->base.device; + const struct vk_device_dispatch_table *disp = &dev->dispatch_table; + VkImageAspectFlags aspects = key->aspects; + VkImageView iviews[] = { + VK_NULL_HANDLE, + VK_NULL_HANDLE, + }; + uint32_t desc_count = 0; + + u_foreach_bit(a, aspects) { + assert(desc_count < ARRAY_SIZE(iviews)); + + VkResult result = copy_create_src_image_view( + cmd, meta, src_img, &key->src.view, 1 << a, ®ion->srcSubresource, + &iviews[desc_count++]); + if (unlikely(result != VK_SUCCESS)) + return result; + } + + VkWriteDescriptorSet descs[2] = { + COPY_PUSH_SET_IMG_DESC(0, SAMPLED, iviews[0], src_img_layout), + COPY_PUSH_SET_IMG_DESC(1, SAMPLED, iviews[1], src_img_layout), + }; + + disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd), + VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_layout, 0, desc_count, descs); + return VK_SUCCESS; +} + +static VkResult +copy_image_prepare_compute_desc_set( + struct vk_command_buffer *cmd, struct vk_meta_device *meta, + const struct vk_meta_copy_image_key *key, VkPipelineLayout pipeline_layout, + struct vk_image *src_img, VkImageLayout src_img_layout, + struct vk_image *dst_img, VkImageLayout dst_img_layout, + const VkImageCopy2 *region) +{ + struct vk_device *dev = cmd->base.device; + const struct vk_device_dispatch_table *disp = &dev->dispatch_table; + VkImageAspectFlags aspects = key->aspects; + VkImageView iviews[] = { + VK_NULL_HANDLE, + VK_NULL_HANDLE, + VK_NULL_HANDLE, + VK_NULL_HANDLE, + }; + unsigned desc_count = 0; + + u_foreach_bit(a, aspects) { + VkImageAspectFlagBits aspect = 1 << a; + + assert(desc_count + 2 <= ARRAY_SIZE(iviews)); + + VkResult result = copy_create_src_image_view( + cmd, meta, src_img, &key->src.view, aspect, ®ion->srcSubresource, + &iviews[desc_count++]); + if (unlikely(result != VK_SUCCESS)) + return result; + + result = copy_create_dst_image_view( + cmd, meta, dst_img, &key->dst.view, aspect, ®ion->dstOffset, + ®ion->extent, ®ion->dstSubresource, + VK_PIPELINE_BIND_POINT_COMPUTE, &iviews[desc_count++]); + if (unlikely(result != VK_SUCCESS)) + return result; + } + + VkWriteDescriptorSet descs[] = { + COPY_PUSH_SET_IMG_DESC(0, SAMPLED, iviews[0], src_img_layout), + COPY_PUSH_SET_IMG_DESC(1, STORAGE, iviews[1], dst_img_layout), + COPY_PUSH_SET_IMG_DESC(2, SAMPLED, iviews[2], src_img_layout), + COPY_PUSH_SET_IMG_DESC(3, STORAGE, iviews[3], dst_img_layout), + }; + + disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd), + VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline_layout, 0, desc_count, descs); + return VK_SUCCESS; +} + +enum vk_meta_copy_image_align_policy { + VK_META_COPY_IMAGE_ALIGN_ON_SRC_TILE, + VK_META_COPY_IMAGE_ALIGN_ON_DST_TILE, +}; + +static VkResult +copy_image_prepare_compute_push_const( + struct vk_command_buffer *cmd, struct vk_meta_device *meta, + const struct vk_meta_copy_image_key *key, VkPipelineLayout pipeline_layout, + const struct vk_image *src, const struct vk_image *dst, + enum vk_meta_copy_image_align_policy align_policy, + const VkImageCopy2 *region, uint32_t *wg_count) +{ + struct vk_device *dev = cmd->base.device; + const struct vk_device_dispatch_table *disp = &dev->dispatch_table; + VkOffset3D src_offs = + base_layer_as_offset(key->src.view.type, region->srcOffset, + region->srcSubresource.baseArrayLayer); + uint32_t layer_count = + vk_image_subresource_layer_count(src, ®ion->srcSubresource); + VkExtent3D src_extent = + layer_count_as_extent(key->src.view.type, region->extent, layer_count); + VkOffset3D dst_offs = + base_layer_as_offset(key->dst.view.type, region->dstOffset, + region->dstSubresource.baseArrayLayer); + + struct vk_meta_copy_image_cs_info info = {0}; + + /* We can't necessarily optimize the read+write path, so align things + * on the biggest tile size. */ + if (align_policy == VK_META_COPY_IMAGE_ALIGN_ON_SRC_TILE) { + info.copy_id_range.start.x = src_offs.x % key->wg_size[0]; + info.copy_id_range.start.y = src_offs.y % key->wg_size[1]; + info.copy_id_range.start.z = src_offs.z % key->wg_size[2]; + } else { + info.copy_id_range.start.x = dst_offs.x % key->wg_size[0]; + info.copy_id_range.start.y = dst_offs.y % key->wg_size[1]; + info.copy_id_range.start.z = dst_offs.z % key->wg_size[2]; + } + + info.copy_id_range.end.x = info.copy_id_range.start.x + src_extent.width; + info.copy_id_range.end.y = info.copy_id_range.start.y + src_extent.height; + info.copy_id_range.end.z = info.copy_id_range.start.z + src_extent.depth; + + info.src_img.offset.x = src_offs.x - info.copy_id_range.start.x; + info.src_img.offset.y = src_offs.y - info.copy_id_range.start.y; + info.src_img.offset.z = src_offs.z - info.copy_id_range.start.z; + info.dst_img.offset.x = dst_offs.x - info.copy_id_range.start.x; + info.dst_img.offset.y = dst_offs.y - info.copy_id_range.start.y; + info.dst_img.offset.z = dst_offs.z - info.copy_id_range.start.z; + wg_count[0] = DIV_ROUND_UP(info.copy_id_range.end.x, key->wg_size[0]); + wg_count[1] = DIV_ROUND_UP(info.copy_id_range.end.y, key->wg_size[1]); + wg_count[2] = DIV_ROUND_UP(info.copy_id_range.end.z, key->wg_size[2]); + + disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(info), &info); + + return VK_SUCCESS; +} + +static VkResult +copy_image_prepare_gfx_push_const(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, + const struct vk_meta_copy_image_key *key, + VkPipelineLayout pipeline_layout, + struct vk_image *src_img, + struct vk_image *dst_img, + const VkImageCopy2 *region) +{ + struct vk_device *dev = cmd->base.device; + const struct vk_device_dispatch_table *disp = &dev->dispatch_table; + VkOffset3D src_img_offs = + base_layer_as_offset(key->src.view.type, region->srcOffset, + region->srcSubresource.baseArrayLayer); + + struct vk_meta_copy_image_fs_info info = { + .dst_to_src_offs = { + /* The subtraction may lead to negative values, but that's fine + * because the shader does the mirror operation thus guaranteeing + * a src_coords >= 0. */ + .x = src_img_offs.x - region->dstOffset.x, + .y = src_img_offs.y - region->dstOffset.y, + /* Render image view only contains the layers needed for rendering, + * so we consider the coordinate containing the layer to always be + * zero. + */ + .z = src_img_offs.z, + }, + }; + + disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout, + VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(info), &info); + + return VK_SUCCESS; +} + +static void +copy_image_region_gfx(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, struct vk_image *src_img, + VkImageLayout src_image_layout, + const struct vk_meta_copy_image_properties *src_props, + struct vk_image *dst_img, VkImageLayout dst_image_layout, + const struct vk_meta_copy_image_properties *dst_props, + const VkImageCopy2 *region) +{ + struct vk_device *dev = cmd->base.device; + const struct vk_device_dispatch_table *disp = &dev->dispatch_table; + + /* We only special-case 1D_ARRAY to move the layer ID to the second + * component instead of the third. For all other view types, let's pick an + * invalid VkImageViewType value so we don't end up creating the same + * pipeline multiple times. */ + VkImageViewType dst_view_type = + dst_img->image_type == VK_IMAGE_TYPE_1D && dst_img->array_layers > 1 + ? VK_IMAGE_VIEW_TYPE_1D_ARRAY + : (VkImageViewType)-1; + + assert(region->srcSubresource.aspectMask == + region->dstSubresource.aspectMask); + + struct vk_meta_copy_image_key key = { + .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_PIPELINE, + .bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS, + .samples = src_img->samples, + .aspects = region->srcSubresource.aspectMask, + .src.view = img_copy_view_info(vk_image_sampled_view_type(src_img), + region->srcSubresource.aspectMask, src_img, + src_props), + .dst.view = img_copy_view_info(dst_view_type, + region->dstSubresource.aspectMask, dst_img, + dst_props), + }; + + VkPipelineLayout pipeline_layout; + VkPipeline pipeline; + VkResult result = + get_copy_image_gfx_pipeline(dev, meta, &key, &pipeline_layout, &pipeline); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd), + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + + result = copy_image_prepare_gfx_desc_set(cmd, meta, &key, pipeline_layout, + src_img, src_image_layout, dst_img, + dst_image_layout, region); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + result = copy_image_prepare_gfx_push_const(cmd, meta, &key, pipeline_layout, + src_img, dst_img, region); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + copy_draw(cmd, meta, dst_img, dst_image_layout, ®ion->dstSubresource, + ®ion->dstOffset, ®ion->extent, &key.dst.view); +} + +static void +copy_image_region_compute(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, struct vk_image *src_img, + VkImageLayout src_image_layout, + const struct vk_meta_copy_image_properties *src_props, + struct vk_image *dst_img, + VkImageLayout dst_image_layout, + const struct vk_meta_copy_image_properties *dst_props, + const VkImageCopy2 *region) +{ + struct vk_device *dev = cmd->base.device; + const struct vk_device_dispatch_table *disp = &dev->dispatch_table; + VkImageViewType dst_view_type = vk_image_storage_view_type(dst_img); + + assert(region->srcSubresource.aspectMask == + region->dstSubresource.aspectMask); + + struct vk_meta_copy_image_key key = { + .key_type = VK_META_OBJECT_KEY_COPY_IMAGE_PIPELINE, + .bind_point = VK_PIPELINE_BIND_POINT_COMPUTE, + .samples = src_img->samples, + .aspects = region->srcSubresource.aspectMask, + .src.view = img_copy_view_info(vk_image_sampled_view_type(src_img), + region->srcSubresource.aspectMask, src_img, + src_props), + .dst.view = img_copy_view_info( + dst_view_type, region->dstSubresource.aspectMask, dst_img, dst_props), + }; + + uint32_t src_pix_per_tile = src_props->tile_size.width * + src_props->tile_size.height * + src_props->tile_size.depth; + uint32_t dst_pix_per_tile = dst_props->tile_size.width * + dst_props->tile_size.height * + dst_props->tile_size.depth; + enum vk_meta_copy_image_align_policy align_policy; + + if (src_pix_per_tile >= dst_pix_per_tile) { + key.wg_size[0] = src_props->tile_size.width; + key.wg_size[1] = src_props->tile_size.height; + key.wg_size[2] = src_props->tile_size.depth; + align_policy = VK_META_COPY_IMAGE_ALIGN_ON_SRC_TILE; + } else { + key.wg_size[0] = dst_props->tile_size.width; + key.wg_size[1] = dst_props->tile_size.height; + key.wg_size[2] = dst_props->tile_size.depth; + align_policy = VK_META_COPY_IMAGE_ALIGN_ON_DST_TILE; + } + + VkPipelineLayout pipeline_layout; + VkPipeline pipeline; + VkResult result = get_copy_image_compute_pipeline( + dev, meta, &key, &pipeline_layout, &pipeline); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd), + VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + + result = copy_image_prepare_compute_desc_set( + cmd, meta, &key, pipeline_layout, src_img, src_image_layout, dst_img, + dst_image_layout, region); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + assert(key.wg_size[0] && key.wg_size[1] && key.wg_size[2]); + + uint32_t wg_count[3] = {0}; + + result = copy_image_prepare_compute_push_const( + cmd, meta, &key, pipeline_layout, src_img, dst_img, align_policy, region, + wg_count); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count[0], wg_count[1], + wg_count[2]); +} + +void +vk_meta_copy_image(struct vk_command_buffer *cmd, struct vk_meta_device *meta, + const VkCopyImageInfo2 *info, + const struct vk_meta_copy_image_properties *src_props, + const struct vk_meta_copy_image_properties *dst_props, + VkPipelineBindPoint bind_point) +{ + VK_FROM_HANDLE(vk_image, src_img, info->srcImage); + VK_FROM_HANDLE(vk_image, dst_img, info->dstImage); + + for (uint32_t i = 0; i < info->regionCount; i++) { + VkImageCopy2 region = info->pRegions[i]; + + region.extent = vk_image_extent_to_elements(src_img, region.extent); + region.srcOffset = vk_image_offset_to_elements(src_img, region.srcOffset); + region.dstOffset = vk_image_offset_to_elements(dst_img, region.dstOffset); + + if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { + copy_image_region_gfx(cmd, meta, src_img, info->srcImageLayout, + src_props, dst_img, info->dstImageLayout, + dst_props, ®ion); + } else { + copy_image_region_compute(cmd, meta, src_img, info->srcImageLayout, + src_props, dst_img, info->dstImageLayout, + dst_props, ®ion); + } + } +} + +static nir_shader * +build_copy_buffer_shader(const struct vk_meta_device *meta, + const void *key_data) +{ + const struct vk_meta_copy_buffer_key *key = key_data; + nir_builder builder = nir_builder_init_simple_shader( + MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-buffer"); + nir_builder *b = &builder; + + b->shader->info.workgroup_size[0] = + vk_meta_buffer_access_wg_size(meta, key->chunk_size); + b->shader->info.workgroup_size[1] = 1; + b->shader->info.workgroup_size[2] = 1; + + uint32_t chunk_bit_size, chunk_comp_count; + + assert(util_is_power_of_two_nonzero(key->chunk_size)); + if (key->chunk_size <= 4) { + chunk_bit_size = key->chunk_size * 8; + chunk_comp_count = 1; + } else { + chunk_bit_size = 32; + chunk_comp_count = key->chunk_size / 4; + } + + assert(chunk_comp_count < NIR_MAX_VEC_COMPONENTS); + + nir_def *global_id = nir_load_global_invocation_id(b, 32); + nir_def *copy_id = nir_channel(b, global_id, 0); + nir_def *offset = nir_imul_imm(b, copy_id, key->chunk_size); + nir_def *size = load_info(b, struct vk_meta_copy_buffer_info, size); + + nir_push_if(b, nir_ult(b, offset, size)); + + offset = nir_u2u64(b, offset); + + nir_def *src_addr = load_info(b, struct vk_meta_copy_buffer_info, src_addr); + nir_def *dst_addr = nir_load_push_constant(b, 1, 64, nir_imm_int(b, 8)); + nir_def *data = nir_build_load_global(b, chunk_comp_count, chunk_bit_size, + nir_iadd(b, src_addr, offset), + .align_mul = chunk_bit_size / 8); + + nir_build_store_global(b, data, nir_iadd(b, dst_addr, offset), + .align_mul = key->chunk_size); + + nir_pop_if(b, NULL); + + return b->shader; +} + +static VkResult +get_copy_buffer_pipeline(struct vk_device *device, struct vk_meta_device *meta, + const struct vk_meta_copy_buffer_key *key, + VkPipelineLayout *layout_out, VkPipeline *pipeline_out) +{ + VkResult result = get_copy_pipeline_layout( + device, meta, "vk-meta-copy-buffer-pipeline-layout", + VK_SHADER_STAGE_COMPUTE_BIT, sizeof(struct vk_meta_copy_buffer_info), + NULL, 0, layout_out); + + if (unlikely(result != VK_SUCCESS)) + return result; + + return get_compute_copy_pipeline(device, meta, *layout_out, + build_copy_buffer_shader, key, sizeof(*key), + pipeline_out); +} + +static void +copy_buffer_region(struct vk_command_buffer *cmd, struct vk_meta_device *meta, + VkBuffer src, VkBuffer dst, const VkBufferCopy2 *region) +{ + struct vk_device *dev = cmd->base.device; + const struct vk_physical_device *pdev = dev->physical; + const struct vk_device_dispatch_table *disp = &dev->dispatch_table; + VkResult result; + + struct vk_meta_copy_buffer_key key = { + .key_type = VK_META_OBJECT_KEY_COPY_BUFFER_PIPELINE, + }; + + VkDeviceSize size = region->size; + VkDeviceAddress src_addr = + vk_meta_buffer_address(dev, src, region->srcOffset, size); + VkDeviceAddress dst_addr = + vk_meta_buffer_address(dev, dst, region->dstOffset, size); + + /* Combine the size and src/dst address to extract the alignment. */ + uint64_t align = src_addr | dst_addr | size; + + assert(align != 0); + + /* Pick the first power-of-two of the combined src/dst address and size as + * our alignment. We limit the chunk size to 16 bytes (a uvec4) for now. + */ + key.chunk_size = MIN2(16, 1 << (ffs(align) - 1)); + + VkPipelineLayout pipeline_layout; + VkPipeline pipeline; + result = + get_copy_buffer_pipeline(dev, meta, &key, &pipeline_layout, &pipeline); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd), + VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + + const uint32_t optimal_wg_size = + vk_meta_buffer_access_wg_size(meta, key.chunk_size); + const uint32_t per_wg_copy_size = optimal_wg_size * key.chunk_size; + uint32_t max_per_dispatch_size = + pdev->properties.maxComputeWorkGroupCount[0] * per_wg_copy_size; + + assert(optimal_wg_size <= pdev->properties.maxComputeWorkGroupSize[0]); + + while (size) { + struct vk_meta_copy_buffer_info args = { + .size = MIN2(size, max_per_dispatch_size), + .src_addr = src_addr, + .dst_addr = dst_addr, + }; + uint32_t wg_count = DIV_ROUND_UP(args.size, per_wg_copy_size); + + disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), + &args); + + disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count, 1, 1); + + src_addr += args.size; + dst_addr += args.size; + size -= args.size; + } +} + +void +vk_meta_copy_buffer(struct vk_command_buffer *cmd, struct vk_meta_device *meta, + const VkCopyBufferInfo2 *info) +{ + for (unsigned i = 0; i < info->regionCount; i++) { + const VkBufferCopy2 *region = &info->pRegions[i]; + + copy_buffer_region(cmd, meta, info->srcBuffer, info->dstBuffer, region); + } +} + +void +vk_meta_update_buffer(struct vk_command_buffer *cmd, + struct vk_meta_device *meta, VkBuffer buffer, + VkDeviceSize offset, VkDeviceSize size, const void *data) +{ + VkResult result; + + const VkBufferCreateInfo tmp_buffer_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = size, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + .queueFamilyIndexCount = 1, + .pQueueFamilyIndices = &cmd->pool->queue_family_index, + }; + + VkBuffer tmp_buffer; + result = vk_meta_create_buffer(cmd, meta, &tmp_buffer_info, &tmp_buffer); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + void *tmp_buffer_map; + result = meta->cmd_bind_map_buffer(cmd, meta, tmp_buffer, &tmp_buffer_map); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + memcpy(tmp_buffer_map, data, size); + + const VkBufferCopy2 copy_region = { + .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2, + .srcOffset = 0, + .dstOffset = offset, + .size = size, + }; + const VkCopyBufferInfo2 copy_info = { + .sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2, + .srcBuffer = tmp_buffer, + .dstBuffer = buffer, + .regionCount = 1, + .pRegions = ©_region, + }; + + vk_meta_copy_buffer(cmd, meta, ©_info); +} + +static nir_shader * +build_fill_buffer_shader(const struct vk_meta_device *meta, + UNUSED const void *key_data) +{ + nir_builder builder = nir_builder_init_simple_shader( + MESA_SHADER_COMPUTE, NULL, "vk-meta-fill-buffer"); + nir_builder *b = &builder; + + b->shader->info.workgroup_size[0] = vk_meta_buffer_access_wg_size(meta, 4); + b->shader->info.workgroup_size[1] = 1; + b->shader->info.workgroup_size[2] = 1; + + nir_def *global_id = nir_load_global_invocation_id(b, 32); + nir_def *copy_id = nir_channel(b, global_id, 0); + nir_def *offset = nir_imul_imm(b, copy_id, 4); + nir_def *size = load_info(b, struct vk_meta_fill_buffer_info, size); + nir_def *data = load_info(b, struct vk_meta_fill_buffer_info, data); + + nir_push_if(b, nir_ult(b, offset, size)); + + offset = nir_u2u64(b, offset); + + nir_def *buf_addr = + load_info(b, struct vk_meta_fill_buffer_info, buf_addr); + + nir_build_store_global(b, data, nir_iadd(b, buf_addr, offset), + .align_mul = 4); + + nir_pop_if(b, NULL); + + return b->shader; +} + +static VkResult +get_fill_buffer_pipeline(struct vk_device *device, struct vk_meta_device *meta, + const struct vk_meta_fill_buffer_key *key, + VkPipelineLayout *layout_out, VkPipeline *pipeline_out) +{ + VkResult result = get_copy_pipeline_layout( + device, meta, "vk-meta-fill-buffer-pipeline-layout", + VK_SHADER_STAGE_COMPUTE_BIT, sizeof(struct vk_meta_fill_buffer_info), NULL, 0, + layout_out); + if (unlikely(result != VK_SUCCESS)) + return result; + + return get_compute_copy_pipeline(device, meta, *layout_out, + build_fill_buffer_shader, key, sizeof(*key), + pipeline_out); +} + +void +vk_meta_fill_buffer(struct vk_command_buffer *cmd, struct vk_meta_device *meta, + VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, + uint32_t data) +{ + VK_FROM_HANDLE(vk_buffer, buf, buffer); + struct vk_device *dev = cmd->base.device; + const struct vk_physical_device *pdev = dev->physical; + const struct vk_device_dispatch_table *disp = &dev->dispatch_table; + VkResult result; + + struct vk_meta_fill_buffer_key key = { + .key_type = VK_META_OBJECT_KEY_FILL_BUFFER_PIPELINE, + }; + + VkPipelineLayout pipeline_layout; + VkPipeline pipeline; + result = + get_fill_buffer_pipeline(dev, meta, &key, &pipeline_layout, &pipeline); + if (unlikely(result != VK_SUCCESS)) { + vk_command_buffer_set_error(cmd, result); + return; + } + + disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd), + VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + + /* From the Vulkan 1.3.290 spec: + * + * "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not a + * multiple of 4, then the nearest smaller multiple is used." + * + * hence the mask to align the size on 4 bytes here. + */ + size = vk_buffer_range(buf, offset, size) & ~3u; + + const uint32_t optimal_wg_size = vk_meta_buffer_access_wg_size(meta, 4); + const uint32_t per_wg_copy_size = optimal_wg_size * 4; + uint32_t max_per_dispatch_size = + pdev->properties.maxComputeWorkGroupCount[0] * per_wg_copy_size; + + while (size > 0) { + struct vk_meta_fill_buffer_info args = { + .size = MIN2(size, max_per_dispatch_size), + .buf_addr = vk_meta_buffer_address(dev, buffer, offset, size), + .data = data, + }; + uint32_t wg_count = DIV_ROUND_UP(args.size, per_wg_copy_size); + + disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), + &args); + + disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count, 1, 1); + + offset += args.size; + size -= args.size; + } +} diff --git a/src/vulkan/runtime/vk_meta_private.h b/src/vulkan/runtime/vk_meta_private.h index d10f9c7622f..2fe36b00f3f 100644 --- a/src/vulkan/runtime/vk_meta_private.h +++ b/src/vulkan/runtime/vk_meta_private.h @@ -26,6 +26,8 @@ #include "vk_image.h" #include "vk_meta.h" +#include "glsl_types.h" + #ifdef __cplusplus extern "C" { #endif @@ -83,6 +85,67 @@ vk_image_render_view_type(const struct vk_image *image, uint32_t layer_count) } } +static inline VkImageViewType +vk_image_storage_view_type(const struct vk_image *image) +{ + switch (image->image_type) { + case VK_IMAGE_TYPE_1D: + return image->array_layers == 1 ? VK_IMAGE_VIEW_TYPE_1D + : VK_IMAGE_VIEW_TYPE_1D_ARRAY; + case VK_IMAGE_TYPE_2D: + return image->array_layers == 1 ? VK_IMAGE_VIEW_TYPE_2D + : VK_IMAGE_VIEW_TYPE_2D_ARRAY; + case VK_IMAGE_TYPE_3D: + return VK_IMAGE_VIEW_TYPE_3D; + default: + unreachable("Invalid image type"); + } +} + +static inline enum glsl_sampler_dim +vk_image_view_type_to_sampler_dim(VkImageViewType view_type) +{ + switch (view_type) { + case VK_IMAGE_VIEW_TYPE_1D: + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + return GLSL_SAMPLER_DIM_1D; + + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + return GLSL_SAMPLER_DIM_2D; + + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + return GLSL_SAMPLER_DIM_CUBE; + + case VK_IMAGE_VIEW_TYPE_3D: + return GLSL_SAMPLER_DIM_3D; + + default: + unreachable(); + } +} + +static inline bool +vk_image_view_type_is_array(VkImageViewType view_type) +{ + switch (view_type) { + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + return true; + + case VK_IMAGE_VIEW_TYPE_1D: + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_3D: + case VK_IMAGE_VIEW_TYPE_CUBE: + return false; + + default: + unreachable(); + } +} + #ifdef __cplusplus } #endif