mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-23 10:48:08 +02:00
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37959>
2638 lines
94 KiB
C
2638 lines
94 KiB
C
/*
|
|
* Copyright © 2023 Collabora Ltd.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "nir/nir_builder.h"
|
|
#include "nir/nir_format_convert.h"
|
|
|
|
#include "vk_buffer.h"
|
|
#include "vk_command_buffer.h"
|
|
#include "vk_command_pool.h"
|
|
#include "vk_device.h"
|
|
#include "vk_format.h"
|
|
#include "vk_meta.h"
|
|
#include "vk_meta_private.h"
|
|
#include "vk_physical_device.h"
|
|
#include "vk_pipeline.h"
|
|
|
|
#include "util/format/u_format.h"
|
|
|
|
struct vk_meta_fill_buffer_key {
|
|
enum vk_meta_object_key_type key_type;
|
|
};
|
|
|
|
struct vk_meta_copy_buffer_key {
|
|
enum vk_meta_object_key_type key_type;
|
|
|
|
uint32_t chunk_size;
|
|
};
|
|
|
|
struct vk_meta_copy_image_view {
|
|
VkImageViewType type;
|
|
|
|
union {
|
|
struct {
|
|
VkFormat format;
|
|
} color;
|
|
struct {
|
|
VkFormat format;
|
|
} plane;
|
|
struct {
|
|
struct {
|
|
VkFormat format;
|
|
nir_component_mask_t component_mask;
|
|
} depth, stencil;
|
|
};
|
|
};
|
|
};
|
|
|
|
struct vk_meta_copy_buffer_image_key {
|
|
enum vk_meta_object_key_type key_type;
|
|
|
|
struct {
|
|
struct vk_meta_copy_image_view view;
|
|
|
|
VkImageAspectFlagBits aspect;
|
|
} img;
|
|
|
|
uint32_t wg_size[3];
|
|
};
|
|
|
|
struct vk_meta_copy_image_key {
|
|
enum vk_meta_object_key_type key_type;
|
|
|
|
/* One source per-aspect being copied. */
|
|
struct {
|
|
struct vk_meta_copy_image_view view;
|
|
VkImageAspectFlagBits aspects;
|
|
} src, dst;
|
|
|
|
VkSampleCountFlagBits samples;
|
|
|
|
uint32_t wg_size[3];
|
|
};
|
|
|
|
#define load_info(__b, __type, __field_name) \
|
|
nir_load_push_constant((__b), 1, \
|
|
sizeof(((__type *)NULL)->__field_name) * 8, \
|
|
nir_imm_int(b, offsetof(__type, __field_name)))
|
|
|
|
struct vk_meta_fill_buffer_info {
|
|
uint64_t buf_addr;
|
|
uint32_t data;
|
|
uint32_t size;
|
|
};
|
|
|
|
struct vk_meta_copy_buffer_info {
|
|
uint64_t src_addr;
|
|
uint64_t dst_addr;
|
|
uint32_t size;
|
|
};
|
|
|
|
struct vk_meta_copy_buffer_image_info {
|
|
struct {
|
|
uint64_t addr;
|
|
uint32_t row_stride;
|
|
uint32_t image_stride;
|
|
} buf;
|
|
|
|
struct {
|
|
struct {
|
|
uint32_t x, y, z;
|
|
} offset;
|
|
} img;
|
|
|
|
/* Workgroup size should be selected based on the image tile size. This
|
|
* means we can issue threads outside the image area we want to copy
|
|
* from/to. This field encodes the copy IDs that should be skipped, and
|
|
* also serve as an adjustment for the buffer/image coordinates. */
|
|
struct {
|
|
struct {
|
|
uint32_t x, y, z;
|
|
} start, end;
|
|
} copy_id_range;
|
|
};
|
|
|
|
struct vk_meta_copy_image_fs_info {
|
|
struct {
|
|
int32_t x, y, z;
|
|
} dst_to_src_offs;
|
|
};
|
|
|
|
struct vk_meta_copy_image_cs_info {
|
|
struct {
|
|
struct {
|
|
uint32_t x, y, z;
|
|
} offset;
|
|
} src_img, dst_img;
|
|
|
|
/* Workgroup size should be selected based on the image tile size. This
|
|
* means we can issue threads outside the image area we want to copy
|
|
* from/to. This field encodes the copy IDs that should be skipped, and
|
|
* also serve as an adjustment for the buffer/image coordinates. */
|
|
struct {
|
|
struct {
|
|
uint32_t x, y, z;
|
|
} start, end;
|
|
} copy_id_range;
|
|
};
|
|
|
|
static VkOffset3D
|
|
base_layer_as_offset(VkImageViewType view_type, VkOffset3D offset,
|
|
uint32_t base_layer)
|
|
{
|
|
switch (view_type) {
|
|
case VK_IMAGE_VIEW_TYPE_1D:
|
|
return (VkOffset3D){
|
|
.x = offset.x,
|
|
};
|
|
|
|
case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
|
|
return (VkOffset3D){
|
|
.x = offset.x,
|
|
.y = base_layer,
|
|
};
|
|
|
|
case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
|
|
case VK_IMAGE_VIEW_TYPE_CUBE:
|
|
case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
|
|
return (VkOffset3D){
|
|
.x = offset.x,
|
|
.y = offset.y,
|
|
.z = base_layer,
|
|
};
|
|
|
|
case VK_IMAGE_VIEW_TYPE_2D:
|
|
case VK_IMAGE_VIEW_TYPE_3D:
|
|
return offset;
|
|
|
|
default:
|
|
assert(!"Invalid view type");
|
|
return (VkOffset3D){0};
|
|
}
|
|
}
|
|
|
|
static VkExtent3D
|
|
layer_count_as_extent(VkImageViewType view_type, VkExtent3D extent,
|
|
uint32_t layer_count)
|
|
{
|
|
switch (view_type) {
|
|
case VK_IMAGE_VIEW_TYPE_1D:
|
|
return (VkExtent3D){
|
|
.width = extent.width,
|
|
.height = 1,
|
|
.depth = 1,
|
|
};
|
|
|
|
case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
|
|
return (VkExtent3D){
|
|
.width = extent.width,
|
|
.height = layer_count,
|
|
.depth = 1,
|
|
};
|
|
|
|
case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
|
|
case VK_IMAGE_VIEW_TYPE_CUBE:
|
|
case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
|
|
return (VkExtent3D){
|
|
.width = extent.width,
|
|
.height = extent.height,
|
|
.depth = layer_count,
|
|
};
|
|
|
|
case VK_IMAGE_VIEW_TYPE_2D:
|
|
case VK_IMAGE_VIEW_TYPE_3D:
|
|
return extent;
|
|
|
|
default:
|
|
assert(!"Invalid view type");
|
|
return (VkExtent3D){0};
|
|
}
|
|
}
|
|
|
|
#define COPY_SHADER_BINDING(__binding, __type, __stage) \
|
|
{ \
|
|
.binding = __binding, \
|
|
.descriptorCount = 1, \
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_##__type, \
|
|
.stageFlags = VK_SHADER_STAGE_##__stage##_BIT, \
|
|
}
|
|
|
|
static VkResult
|
|
get_copy_pipeline_layout(struct vk_device *device, struct vk_meta_device *meta,
|
|
enum vk_meta_object_key_type key, VkShaderStageFlagBits shader_stage,
|
|
size_t push_const_size,
|
|
const struct VkDescriptorSetLayoutBinding *bindings,
|
|
uint32_t binding_count, VkPipelineLayout *layout_out)
|
|
{
|
|
const VkDescriptorSetLayoutCreateInfo set_layout = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
|
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
|
|
.bindingCount = binding_count,
|
|
.pBindings = bindings,
|
|
};
|
|
|
|
const VkPushConstantRange push_range = {
|
|
.stageFlags = shader_stage,
|
|
.offset = 0,
|
|
.size = push_const_size,
|
|
};
|
|
|
|
return vk_meta_get_pipeline_layout(device, meta, &set_layout, &push_range,
|
|
&key, sizeof(key), layout_out);
|
|
}
|
|
|
|
#define COPY_PUSH_SET_IMG_DESC(__binding, __type, __iview, __layout) \
|
|
{ \
|
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, \
|
|
.dstBinding = __binding, \
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_##__type##_IMAGE, \
|
|
.descriptorCount = 1, \
|
|
.pImageInfo = &(VkDescriptorImageInfo){ \
|
|
.imageView = __iview, \
|
|
.imageLayout = __layout, \
|
|
}, \
|
|
}
|
|
|
|
static VkFormat
|
|
copy_img_view_format_for_aspect(const struct vk_meta_copy_image_view *info,
|
|
VkImageAspectFlagBits aspect)
|
|
{
|
|
switch (aspect) {
|
|
case VK_IMAGE_ASPECT_COLOR_BIT:
|
|
return info->color.format;
|
|
|
|
case VK_IMAGE_ASPECT_PLANE_0_BIT:
|
|
case VK_IMAGE_ASPECT_PLANE_1_BIT:
|
|
case VK_IMAGE_ASPECT_PLANE_2_BIT:
|
|
return info->plane.format;
|
|
|
|
case VK_IMAGE_ASPECT_DEPTH_BIT:
|
|
return info->depth.format;
|
|
|
|
case VK_IMAGE_ASPECT_STENCIL_BIT:
|
|
return info->stencil.format;
|
|
|
|
default:
|
|
assert(!"Unsupported aspect");
|
|
return VK_FORMAT_UNDEFINED;
|
|
}
|
|
}
|
|
|
|
static bool
|
|
depth_stencil_interleaved(const struct vk_meta_copy_image_view *view)
|
|
{
|
|
return view->stencil.format != VK_FORMAT_UNDEFINED &&
|
|
view->depth.format != VK_FORMAT_UNDEFINED &&
|
|
view->stencil.format == view->depth.format &&
|
|
view->stencil.component_mask != 0 &&
|
|
view->depth.component_mask != 0 &&
|
|
(view->stencil.component_mask & view->depth.component_mask) == 0;
|
|
}
|
|
|
|
static VkResult
|
|
get_gfx_copy_pipeline(
|
|
struct vk_device *device, struct vk_meta_device *meta,
|
|
VkPipelineLayout layout, VkSampleCountFlagBits samples,
|
|
nir_shader *(*build_nir)(const struct vk_meta_device *, const void *),
|
|
VkImageAspectFlagBits aspects, const struct vk_meta_copy_image_view *view,
|
|
const void *key_data, size_t key_size, VkPipeline *pipeline_out)
|
|
{
|
|
VkPipeline from_cache = vk_meta_lookup_pipeline(meta, key_data, key_size);
|
|
if (from_cache != VK_NULL_HANDLE) {
|
|
*pipeline_out = from_cache;
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
const VkPipelineShaderStageNirCreateInfoMESA fs_nir_info = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA,
|
|
.nir = build_nir(meta, key_data),
|
|
};
|
|
const VkPipelineShaderStageCreateInfo fs_info = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
|
.pNext = &fs_nir_info,
|
|
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
|
|
.pName = "main",
|
|
};
|
|
|
|
VkPipelineDepthStencilStateCreateInfo ds_info = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
|
};
|
|
VkPipelineDynamicStateCreateInfo dyn_info = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
|
};
|
|
struct vk_meta_rendering_info render = {
|
|
.samples = samples,
|
|
};
|
|
|
|
const VkGraphicsPipelineCreateInfo info = {
|
|
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
|
.stageCount = 1,
|
|
.pStages = &fs_info,
|
|
.pDepthStencilState = &ds_info,
|
|
.pDynamicState = &dyn_info,
|
|
.layout = layout,
|
|
};
|
|
|
|
/* Since copies happen one plane at a time, multiplanar copies can be
|
|
* handled like color copies.
|
|
*/
|
|
if (aspects &
|
|
(VK_IMAGE_ASPECT_COLOR_BIT |
|
|
VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT |
|
|
VK_IMAGE_ASPECT_PLANE_2_BIT)) {
|
|
VkFormat fmt =
|
|
copy_img_view_format_for_aspect(view, aspects);
|
|
|
|
render.color_attachment_formats[render.color_attachment_count] = fmt;
|
|
render.color_attachment_write_masks[render.color_attachment_count] =
|
|
VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
|
|
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
|
|
render.color_attachment_count++;
|
|
}
|
|
|
|
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
|
|
VkFormat fmt =
|
|
copy_img_view_format_for_aspect(view, VK_IMAGE_ASPECT_DEPTH_BIT);
|
|
|
|
render.color_attachment_formats[render.color_attachment_count] = fmt;
|
|
render.color_attachment_write_masks[render.color_attachment_count] =
|
|
(VkColorComponentFlags)view->depth.component_mask;
|
|
render.color_attachment_count++;
|
|
}
|
|
|
|
if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
|
|
VkFormat fmt =
|
|
copy_img_view_format_for_aspect(view, VK_IMAGE_ASPECT_STENCIL_BIT);
|
|
|
|
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT &&
|
|
depth_stencil_interleaved(view)) {
|
|
render.color_attachment_write_masks[0] |= view->stencil.component_mask;
|
|
} else {
|
|
render.color_attachment_formats[render.color_attachment_count] = fmt;
|
|
render.color_attachment_write_masks[render.color_attachment_count] =
|
|
(VkColorComponentFlags)view->stencil.component_mask;
|
|
render.color_attachment_count++;
|
|
}
|
|
}
|
|
|
|
VkResult result = vk_meta_create_graphics_pipeline(
|
|
device, meta, &info, &render, key_data, key_size, pipeline_out);
|
|
|
|
ralloc_free(fs_nir_info.nir);
|
|
|
|
return result;
|
|
}
|
|
|
|
static VkResult
|
|
get_compute_copy_pipeline(
|
|
struct vk_device *device, struct vk_meta_device *meta,
|
|
VkPipelineLayout layout,
|
|
nir_shader *(*build_nir)(const struct vk_meta_device *, const void *),
|
|
const void *key_data, size_t key_size, VkPipeline *pipeline_out)
|
|
{
|
|
VkPipeline from_cache = vk_meta_lookup_pipeline(meta, key_data, key_size);
|
|
if (from_cache != VK_NULL_HANDLE) {
|
|
*pipeline_out = from_cache;
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
const VkPipelineShaderStageNirCreateInfoMESA cs_nir_info = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA,
|
|
.nir = build_nir(meta, key_data),
|
|
};
|
|
|
|
const VkComputePipelineCreateInfo info = {
|
|
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
|
.stage = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
|
.pNext = &cs_nir_info,
|
|
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.pName = "main",
|
|
},
|
|
.layout = layout,
|
|
};
|
|
|
|
VkResult result = vk_meta_create_compute_pipeline(
|
|
device, meta, &info, key_data, key_size, pipeline_out);
|
|
|
|
ralloc_free(cs_nir_info.nir);
|
|
|
|
return result;
|
|
}
|
|
|
|
static VkResult
|
|
copy_create_src_image_view(struct vk_command_buffer *cmd,
|
|
struct vk_meta_device *meta, struct vk_image *img,
|
|
const struct vk_meta_copy_image_view *view_info,
|
|
VkImageAspectFlags aspect,
|
|
const VkImageSubresourceLayers *subres,
|
|
VkImageView *view_out)
|
|
{
|
|
const VkImageViewUsageCreateInfo usage = {
|
|
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
|
|
.usage = VK_IMAGE_USAGE_SAMPLED_BIT,
|
|
};
|
|
|
|
VkFormat format = copy_img_view_format_for_aspect(view_info, aspect);
|
|
|
|
/* For multiplane, we only want the aspect for the plane rather than the
|
|
* set of aspects for the full image.
|
|
*/
|
|
VkImageAspectFlags view_aspects =
|
|
aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) ?
|
|
vk_format_aspects(format) : aspect;
|
|
|
|
VkImageViewCreateInfo info = {
|
|
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
|
.pNext = &usage,
|
|
.flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA,
|
|
.image = vk_image_to_handle(img),
|
|
.viewType = view_info->type,
|
|
.format = format,
|
|
.subresourceRange = {
|
|
.aspectMask = view_aspects,
|
|
.baseMipLevel = subres->mipLevel,
|
|
.levelCount = 1,
|
|
.baseArrayLayer = 0,
|
|
.layerCount = img->array_layers,
|
|
},
|
|
};
|
|
|
|
if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
|
|
nir_component_mask_t comp_mask = aspect == VK_IMAGE_ASPECT_STENCIL_BIT
|
|
? view_info->stencil.component_mask
|
|
: view_info->depth.component_mask;
|
|
assert(comp_mask != 0);
|
|
|
|
VkComponentSwizzle *swizzle = &info.components.r;
|
|
unsigned num_comps = util_bitcount(comp_mask);
|
|
unsigned first_comp = ffs(comp_mask) - 1;
|
|
|
|
assert(first_comp + num_comps <= 4);
|
|
|
|
for (unsigned i = 0; i < num_comps; i++)
|
|
swizzle[i] = first_comp + i + VK_COMPONENT_SWIZZLE_R;
|
|
}
|
|
|
|
return vk_meta_create_image_view(cmd, meta, &info, view_out);
|
|
}
|
|
|
|
static VkResult
|
|
copy_create_dst_image_view(struct vk_command_buffer *cmd,
|
|
struct vk_meta_device *meta, struct vk_image *img,
|
|
const struct vk_meta_copy_image_view *view_info,
|
|
VkImageAspectFlags aspect, const VkOffset3D *offset,
|
|
const VkExtent3D *extent,
|
|
const VkImageSubresourceLayers *subres,
|
|
VkPipelineBindPoint bind_point,
|
|
VkImageView *view_out)
|
|
{
|
|
uint32_t layer_count, base_layer;
|
|
VkFormat format = copy_img_view_format_for_aspect(view_info, aspect);
|
|
|
|
/* For multiplane, we only want the aspect for the plane rather than the
|
|
* set of aspects for the full image.
|
|
*/
|
|
VkImageAspectFlags view_aspects =
|
|
aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) ?
|
|
vk_format_aspects(format) : aspect;
|
|
const VkImageViewUsageCreateInfo usage = {
|
|
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
|
|
.usage = bind_point == VK_PIPELINE_BIND_POINT_COMPUTE
|
|
? VK_IMAGE_USAGE_STORAGE_BIT
|
|
: VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
|
};
|
|
|
|
if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
|
|
layer_count =
|
|
MAX2(extent->depth, vk_image_subresource_layer_count(img, subres));
|
|
base_layer = img->image_type == VK_IMAGE_TYPE_3D ? offset->z
|
|
: subres->baseArrayLayer;
|
|
} else {
|
|
/* Always create a view covering the whole image in case of compute. */
|
|
layer_count = img->image_type == VK_IMAGE_TYPE_3D ? 1 : img->array_layers;
|
|
base_layer = 0;
|
|
}
|
|
|
|
const VkImageViewCreateInfo info = {
|
|
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
|
.pNext = &usage,
|
|
.flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA,
|
|
.image = vk_image_to_handle(img),
|
|
.viewType = bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS
|
|
? vk_image_render_view_type(img, layer_count)
|
|
: vk_image_storage_view_type(img),
|
|
.format = format,
|
|
.subresourceRange = {
|
|
.aspectMask = view_aspects,
|
|
.baseMipLevel = subres->mipLevel,
|
|
.levelCount = 1,
|
|
.baseArrayLayer = base_layer,
|
|
.layerCount = layer_count,
|
|
},
|
|
};
|
|
|
|
return vk_meta_create_image_view(cmd, meta, &info, view_out);
|
|
}
|
|
|
|
static nir_def *
|
|
trim_img_coords(nir_builder *b, VkImageViewType view_type, nir_def *coords)
|
|
{
|
|
switch (view_type) {
|
|
case VK_IMAGE_VIEW_TYPE_1D:
|
|
return nir_channel(b, coords, 0);
|
|
|
|
case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
|
|
case VK_IMAGE_VIEW_TYPE_2D:
|
|
return nir_trim_vector(b, coords, 2);
|
|
|
|
default:
|
|
return nir_trim_vector(b, coords, 3);
|
|
}
|
|
}
|
|
|
|
static nir_def *
|
|
copy_img_buf_addr(nir_builder *b, enum pipe_format pfmt, nir_def *coords)
|
|
{
|
|
nir_def *buf_row_stride =
|
|
load_info(b, struct vk_meta_copy_buffer_image_info, buf.row_stride);
|
|
nir_def *buf_img_stride =
|
|
load_info(b, struct vk_meta_copy_buffer_image_info, buf.image_stride);
|
|
nir_def *buf_addr =
|
|
load_info(b, struct vk_meta_copy_buffer_image_info, buf.addr);
|
|
nir_def *offset = nir_imul(b, nir_channel(b, coords, 2), buf_img_stride);
|
|
unsigned blk_sz = util_format_get_blocksize(pfmt);
|
|
|
|
offset = nir_iadd(b, offset,
|
|
nir_imul(b, nir_channel(b, coords, 1), buf_row_stride));
|
|
offset = nir_iadd(b, offset,
|
|
nir_imul_imm(b, nir_channel(b, coords, 0), blk_sz));
|
|
|
|
return nir_iadd(b, buf_addr, nir_u2u64(b, offset));
|
|
}
|
|
|
|
static VkFormat
|
|
copy_img_buf_format_for_aspect(const struct vk_meta_copy_image_view *info,
|
|
VkImageAspectFlagBits aspect)
|
|
{
|
|
if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {
|
|
enum pipe_format pfmt = vk_format_to_pipe_format(info->depth.format);
|
|
unsigned num_comps = util_format_get_nr_components(pfmt);
|
|
unsigned depth_comp_bits = 0;
|
|
|
|
for (unsigned i = 0; i < num_comps; i++) {
|
|
if (info->depth.component_mask & BITFIELD_BIT(i))
|
|
depth_comp_bits += util_format_get_component_bits(
|
|
pfmt, UTIL_FORMAT_COLORSPACE_RGB, i);
|
|
}
|
|
|
|
switch (depth_comp_bits) {
|
|
case 16:
|
|
return VK_FORMAT_R16_UINT;
|
|
case 24:
|
|
case 32:
|
|
return VK_FORMAT_R32_UINT;
|
|
default:
|
|
assert(!"Unsupported format");
|
|
return VK_FORMAT_UNDEFINED;
|
|
}
|
|
} else if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
|
|
return VK_FORMAT_R8_UINT;
|
|
}
|
|
|
|
enum pipe_format pfmt = vk_format_to_pipe_format(info->color.format);
|
|
|
|
switch (util_format_get_blocksize(pfmt)) {
|
|
case 1:
|
|
return VK_FORMAT_R8_UINT;
|
|
case 2:
|
|
return VK_FORMAT_R16_UINT;
|
|
case 3:
|
|
return VK_FORMAT_R8G8B8_UINT;
|
|
case 4:
|
|
return VK_FORMAT_R32_UINT;
|
|
case 6:
|
|
return VK_FORMAT_R16G16B16_UINT;
|
|
case 8:
|
|
return VK_FORMAT_R32G32_UINT;
|
|
case 12:
|
|
return VK_FORMAT_R32G32B32_UINT;
|
|
case 16:
|
|
return VK_FORMAT_R32G32B32A32_UINT;
|
|
default:
|
|
assert(!"Unsupported format");
|
|
return VK_FORMAT_UNDEFINED;
|
|
}
|
|
}
|
|
|
|
static nir_def *
|
|
convert_texel(nir_builder *b, VkFormat src_fmt, VkFormat dst_fmt,
|
|
nir_def *texel)
|
|
{
|
|
enum pipe_format src_pfmt = vk_format_to_pipe_format(src_fmt);
|
|
enum pipe_format dst_pfmt = vk_format_to_pipe_format(dst_fmt);
|
|
|
|
if (src_pfmt == dst_pfmt)
|
|
return texel;
|
|
|
|
unsigned src_blksz = util_format_get_blocksize(src_pfmt);
|
|
unsigned dst_blksz = util_format_get_blocksize(dst_pfmt);
|
|
|
|
nir_def *packed = nir_format_pack_rgba(b, src_pfmt, texel);
|
|
|
|
/* Needed for depth/stencil copies where the source/dest formats might
|
|
* have a different size. */
|
|
if (src_blksz < dst_blksz)
|
|
packed = nir_pad_vector_imm_int(b, packed, 0, 4);
|
|
|
|
nir_def *unpacked = nir_format_unpack_rgba(b, packed, dst_pfmt);
|
|
|
|
return unpacked;
|
|
}
|
|
|
|
static nir_def *
|
|
place_ds_texel(nir_builder *b, VkFormat fmt, nir_component_mask_t comp_mask,
|
|
nir_def *texel)
|
|
{
|
|
assert(comp_mask != 0);
|
|
|
|
enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
|
|
unsigned num_comps = util_format_get_nr_components(pfmt);
|
|
|
|
if (comp_mask == nir_component_mask(num_comps))
|
|
return texel;
|
|
|
|
assert(num_comps <= 4);
|
|
|
|
nir_def *comps[4];
|
|
unsigned c = 0;
|
|
|
|
for (unsigned i = 0; i < num_comps; i++) {
|
|
if (comp_mask & BITFIELD_BIT(i))
|
|
comps[i] = nir_channel(b, texel, c++);
|
|
else
|
|
comps[i] = nir_imm_intN_t(b, 0, texel->bit_size);
|
|
}
|
|
|
|
return nir_vec(b, comps, num_comps);
|
|
}
|
|
|
|
static nir_deref_instr *
|
|
tex_deref(nir_builder *b, const struct vk_meta_copy_image_view *view,
|
|
VkImageAspectFlags aspect, VkSampleCountFlagBits samples,
|
|
unsigned binding)
|
|
{
|
|
VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
|
|
bool is_array = vk_image_view_type_is_array(view->type);
|
|
enum glsl_sampler_dim sampler_dim =
|
|
samples != VK_SAMPLE_COUNT_1_BIT
|
|
? GLSL_SAMPLER_DIM_MS
|
|
: vk_image_view_type_to_sampler_dim(view->type);
|
|
enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
|
|
enum glsl_base_type base_type =
|
|
util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT
|
|
: util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT
|
|
: GLSL_TYPE_FLOAT;
|
|
const char *tex_name;
|
|
switch (aspect) {
|
|
case VK_IMAGE_ASPECT_COLOR_BIT:
|
|
case VK_IMAGE_ASPECT_PLANE_0_BIT:
|
|
case VK_IMAGE_ASPECT_PLANE_1_BIT:
|
|
case VK_IMAGE_ASPECT_PLANE_2_BIT:
|
|
tex_name = "color_tex";
|
|
break;
|
|
case VK_IMAGE_ASPECT_DEPTH_BIT:
|
|
tex_name = "depth_tex";
|
|
break;
|
|
case VK_IMAGE_ASPECT_STENCIL_BIT:
|
|
tex_name = "stencil_tex";
|
|
break;
|
|
default:
|
|
assert(!"Unsupported aspect");
|
|
return NULL;
|
|
}
|
|
|
|
const struct glsl_type *texture_type =
|
|
glsl_sampler_type(sampler_dim, false, is_array, base_type);
|
|
nir_variable *texture =
|
|
nir_variable_create(b->shader, nir_var_uniform, texture_type, tex_name);
|
|
texture->data.descriptor_set = 0;
|
|
texture->data.binding = binding;
|
|
|
|
return nir_build_deref_var(b, texture);
|
|
}
|
|
|
|
static nir_deref_instr *
|
|
img_deref(nir_builder *b, const struct vk_meta_copy_image_view *view,
|
|
VkImageAspectFlags aspect, VkSampleCountFlagBits samples,
|
|
unsigned binding)
|
|
{
|
|
VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
|
|
bool is_array = vk_image_view_type_is_array(view->type);
|
|
enum glsl_sampler_dim sampler_dim =
|
|
samples != VK_SAMPLE_COUNT_1_BIT
|
|
? GLSL_SAMPLER_DIM_MS
|
|
: vk_image_view_type_to_sampler_dim(view->type);
|
|
enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
|
|
enum glsl_base_type base_type =
|
|
util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT
|
|
: util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT
|
|
: GLSL_TYPE_FLOAT;
|
|
const char *img_name;
|
|
switch (aspect) {
|
|
case VK_IMAGE_ASPECT_COLOR_BIT:
|
|
case VK_IMAGE_ASPECT_PLANE_0_BIT:
|
|
case VK_IMAGE_ASPECT_PLANE_1_BIT:
|
|
case VK_IMAGE_ASPECT_PLANE_2_BIT:
|
|
img_name = "color_img";
|
|
break;
|
|
case VK_IMAGE_ASPECT_DEPTH_BIT:
|
|
img_name = "depth_img";
|
|
break;
|
|
case VK_IMAGE_ASPECT_STENCIL_BIT:
|
|
img_name = "stencil_img";
|
|
break;
|
|
default:
|
|
assert(!"Unsupported aspect");
|
|
return NULL;
|
|
}
|
|
const struct glsl_type *image_type =
|
|
glsl_image_type(sampler_dim, is_array, base_type);
|
|
nir_variable *image_var =
|
|
nir_variable_create(b->shader, nir_var_uniform, image_type, img_name);
|
|
image_var->data.descriptor_set = 0;
|
|
image_var->data.binding = binding;
|
|
|
|
return nir_build_deref_var(b, image_var);
|
|
}
|
|
|
|
static nir_def *
|
|
read_texel(nir_builder *b, nir_deref_instr *tex, nir_def *coords,
|
|
nir_def *sample_id)
|
|
{
|
|
return sample_id ? nir_txf_ms(b, coords, sample_id, .texture_deref = tex)
|
|
: nir_txf(b, coords, .texture_deref = tex);
|
|
}
|
|
|
|
static nir_variable *
|
|
frag_var(nir_builder *b, const struct vk_meta_copy_image_view *view,
|
|
VkImageAspectFlags aspect, uint32_t rt)
|
|
{
|
|
VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
|
|
enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
|
|
enum glsl_base_type base_type =
|
|
util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT
|
|
: util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT
|
|
: GLSL_TYPE_FLOAT;
|
|
const struct glsl_type *var_type = glsl_vector_type(base_type, 4);
|
|
static const char *var_names[] = {
|
|
"gl_FragData[0]",
|
|
"gl_FragData[1]",
|
|
};
|
|
|
|
assert(rt < ARRAY_SIZE(var_names));
|
|
|
|
nir_variable *var = nir_variable_create(b->shader, nir_var_shader_out,
|
|
var_type, var_names[rt]);
|
|
var->data.location = FRAG_RESULT_DATA0 + rt;
|
|
|
|
return var;
|
|
}
|
|
|
|
static void
|
|
write_frag(nir_builder *b, const struct vk_meta_copy_image_view *view,
|
|
VkImageAspectFlags aspect, nir_variable *frag_var, nir_def *frag_val)
|
|
{
|
|
nir_component_mask_t comp_mask;
|
|
|
|
if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
|
|
VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
|
|
|
|
comp_mask = aspect == VK_IMAGE_ASPECT_DEPTH_BIT
|
|
? view->depth.component_mask
|
|
: view->stencil.component_mask;
|
|
frag_val = place_ds_texel(b, fmt, comp_mask, frag_val);
|
|
} else {
|
|
comp_mask = nir_component_mask(4);
|
|
}
|
|
|
|
if (frag_val->bit_size != 32) {
|
|
switch (glsl_get_base_type(frag_var->type)) {
|
|
case GLSL_TYPE_INT:
|
|
frag_val = nir_i2i32(b, frag_val);
|
|
break;
|
|
case GLSL_TYPE_UINT:
|
|
frag_val = nir_u2u32(b, frag_val);
|
|
break;
|
|
case GLSL_TYPE_FLOAT:
|
|
frag_val = nir_f2f32(b, frag_val);
|
|
break;
|
|
default:
|
|
assert(!"Invalid type");
|
|
frag_val = NULL;
|
|
break;
|
|
}
|
|
}
|
|
|
|
frag_val = nir_pad_vector_imm_int(b, frag_val, 0, 4);
|
|
|
|
nir_store_var(b, frag_var, frag_val, comp_mask);
|
|
}
|
|
|
|
static void
|
|
write_img(nir_builder *b, const struct vk_meta_copy_image_view *view,
|
|
VkImageAspectFlags aspect, VkSampleCountFlagBits samples,
|
|
nir_deref_instr *img_deref, nir_def *coords, nir_def *sample_id,
|
|
nir_def *val)
|
|
{
|
|
VkFormat fmt = copy_img_view_format_for_aspect(view, aspect);
|
|
enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
|
|
enum glsl_base_type base_type =
|
|
util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT
|
|
: util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT
|
|
: GLSL_TYPE_FLOAT;
|
|
enum glsl_sampler_dim sampler_dim =
|
|
samples != VK_SAMPLE_COUNT_1_BIT
|
|
? GLSL_SAMPLER_DIM_MS
|
|
: vk_image_view_type_to_sampler_dim(view->type);
|
|
bool is_array = vk_image_view_type_is_array(view->type);
|
|
|
|
if (!sample_id) {
|
|
assert(samples == VK_SAMPLE_COUNT_1_BIT);
|
|
sample_id = nir_imm_int(b, 0);
|
|
}
|
|
|
|
unsigned access_flags = ACCESS_NON_READABLE;
|
|
nir_def *zero_lod = nir_imm_int(b, 0);
|
|
|
|
if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
|
|
nir_component_mask_t comp_mask = aspect == VK_IMAGE_ASPECT_DEPTH_BIT
|
|
? view->depth.component_mask
|
|
: view->stencil.component_mask;
|
|
unsigned num_comps = util_format_get_nr_components(pfmt);
|
|
|
|
val = place_ds_texel(b, fmt, comp_mask, val);
|
|
|
|
if (comp_mask != nir_component_mask(num_comps)) {
|
|
nir_def *comps[4];
|
|
access_flags = 0;
|
|
|
|
nir_def *old_val = nir_image_deref_load(b,
|
|
val->num_components, val->bit_size, &img_deref->def, coords,
|
|
sample_id, zero_lod, .image_dim = sampler_dim,
|
|
.image_array = is_array, .format = pfmt, .access = access_flags,
|
|
.dest_type = nir_get_nir_type_for_glsl_base_type(base_type));
|
|
|
|
for (unsigned i = 0; i < val->num_components; i++) {
|
|
if (comp_mask & BITFIELD_BIT(i))
|
|
comps[i] = nir_channel(b, val, i);
|
|
else
|
|
comps[i] = nir_channel(b, old_val, i);
|
|
}
|
|
|
|
val = nir_vec(b, comps, val->num_components);
|
|
}
|
|
}
|
|
|
|
nir_image_deref_store(b,
|
|
&img_deref->def, coords, sample_id, val, zero_lod,
|
|
.image_dim = sampler_dim, .image_array = is_array, .format = pfmt,
|
|
.access = access_flags,
|
|
.src_type = nir_get_nir_type_for_glsl_base_type(base_type));
|
|
}
|
|
|
|
static nir_shader *
|
|
build_image_to_buffer_shader(const struct vk_meta_device *meta,
|
|
const void *key_data)
|
|
{
|
|
const struct vk_meta_copy_buffer_image_key *key = key_data;
|
|
|
|
nir_builder builder = nir_builder_init_simple_shader(
|
|
MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-image-to-buffer");
|
|
nir_builder *b = &builder;
|
|
|
|
b->shader->info.workgroup_size[0] = key->wg_size[0];
|
|
b->shader->info.workgroup_size[1] = key->wg_size[1];
|
|
b->shader->info.workgroup_size[2] = key->wg_size[2];
|
|
|
|
VkFormat buf_fmt =
|
|
copy_img_buf_format_for_aspect(&key->img.view, key->img.aspect);
|
|
enum pipe_format buf_pfmt = vk_format_to_pipe_format(buf_fmt);
|
|
|
|
nir_def *copy_id = nir_load_global_invocation_id(b, 32);
|
|
nir_def *copy_id_start =
|
|
nir_vec3(b,
|
|
load_info(b, struct vk_meta_copy_buffer_image_info,
|
|
copy_id_range.start.x),
|
|
load_info(b, struct vk_meta_copy_buffer_image_info,
|
|
copy_id_range.start.y),
|
|
load_info(b, struct vk_meta_copy_buffer_image_info,
|
|
copy_id_range.start.z));
|
|
nir_def *copy_id_end = nir_vec3(b,
|
|
load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.x),
|
|
load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.y),
|
|
load_info(b, struct vk_meta_copy_buffer_image_info,
|
|
copy_id_range.end.z));
|
|
|
|
nir_def *in_bounds =
|
|
nir_iand(b, nir_ball(b, nir_uge(b, copy_id, copy_id_start)),
|
|
nir_ball(b, nir_ult(b, copy_id, copy_id_end)));
|
|
|
|
nir_push_if(b, in_bounds);
|
|
|
|
copy_id = nir_isub(b, copy_id, copy_id_start);
|
|
|
|
nir_def *img_offs = nir_vec3(b,
|
|
load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.x),
|
|
load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.y),
|
|
load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.z));
|
|
|
|
nir_def *img_coords =
|
|
trim_img_coords(b, key->img.view.type, nir_iadd(b, copy_id, img_offs));
|
|
|
|
VkFormat iview_fmt =
|
|
copy_img_view_format_for_aspect(&key->img.view, key->img.aspect);
|
|
nir_deref_instr *tex =
|
|
tex_deref(b, &key->img.view, key->img.aspect, VK_SAMPLE_COUNT_1_BIT, 0);
|
|
nir_def *texel = read_texel(b, tex, img_coords, NULL);
|
|
|
|
texel = convert_texel(b, iview_fmt, buf_fmt, texel);
|
|
|
|
unsigned blk_sz = util_format_get_blocksize(buf_pfmt);
|
|
unsigned comp_count = util_format_get_nr_components(buf_pfmt);
|
|
assert(blk_sz % comp_count == 0);
|
|
unsigned comp_sz = (blk_sz / comp_count) * 8;
|
|
|
|
/* nir_format_unpack() (which is called in convert_texel()) always
|
|
* returns a 32-bit result, which we might have to downsize to match
|
|
* the component size we want, hence the u2uN().
|
|
*/
|
|
texel = nir_u2uN(b, texel, comp_sz);
|
|
|
|
/* nir_format_unpack_rgba() (which is called from convert_texel()) returns
|
|
* a vec4, which means we might have more components than we need, but
|
|
* that's fine because we pass a write_mask to store_global.
|
|
*/
|
|
assert(texel->num_components >= comp_count);
|
|
nir_store_global(b, texel, copy_img_buf_addr(b, buf_pfmt, copy_id),
|
|
.write_mask = nir_component_mask(comp_count));
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
return b->shader;
|
|
}
|
|
|
|
static VkResult
|
|
get_copy_image_to_buffer_pipeline(
|
|
struct vk_device *device, struct vk_meta_device *meta,
|
|
const struct vk_meta_copy_buffer_image_key *key,
|
|
VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
|
|
{
|
|
const VkDescriptorSetLayoutBinding bindings[] = {
|
|
COPY_SHADER_BINDING(0, SAMPLED_IMAGE, COMPUTE),
|
|
};
|
|
|
|
VkResult result = get_copy_pipeline_layout(
|
|
device, meta, VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER,
|
|
VK_SHADER_STAGE_COMPUTE_BIT,
|
|
sizeof(struct vk_meta_copy_buffer_image_info), bindings,
|
|
ARRAY_SIZE(bindings), layout_out);
|
|
|
|
if (unlikely(result != VK_SUCCESS))
|
|
return result;
|
|
|
|
return get_compute_copy_pipeline(device, meta, *layout_out,
|
|
build_image_to_buffer_shader, key,
|
|
sizeof(*key), pipeline_out);
|
|
}
|
|
|
|
static nir_shader *
|
|
build_buffer_to_image_fs(const struct vk_meta_device *meta,
|
|
const void *key_data)
|
|
{
|
|
const struct vk_meta_copy_buffer_image_key *key = key_data;
|
|
|
|
nir_builder builder = nir_builder_init_simple_shader(
|
|
MESA_SHADER_FRAGMENT, NULL, "vk-meta-copy-buffer-to-image-frag");
|
|
nir_builder *b = &builder;
|
|
|
|
VkFormat buf_fmt =
|
|
copy_img_buf_format_for_aspect(&key->img.view, key->img.aspect);
|
|
|
|
enum pipe_format buf_pfmt = vk_format_to_pipe_format(buf_fmt);
|
|
nir_def *out_coord_xy = nir_f2u32(b, nir_load_frag_coord(b));
|
|
nir_def *out_layer = nir_load_layer_id(b);
|
|
|
|
nir_def *img_offs = nir_vec3(b,
|
|
load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.x),
|
|
load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.y),
|
|
load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.z));
|
|
|
|
/* Move the layer ID to the second coordinate if we're dealing with a 1D
|
|
* array, as this is where the texture instruction expects it. */
|
|
nir_def *coords = key->img.view.type == VK_IMAGE_VIEW_TYPE_1D_ARRAY
|
|
? nir_vec3(b, nir_channel(b, out_coord_xy, 0),
|
|
out_layer, nir_imm_int(b, 0))
|
|
: nir_vec3(b, nir_channel(b, out_coord_xy, 0),
|
|
nir_channel(b, out_coord_xy, 1), out_layer);
|
|
|
|
unsigned blk_sz = util_format_get_blocksize(buf_pfmt);
|
|
unsigned comp_count = util_format_get_nr_components(buf_pfmt);
|
|
assert(blk_sz % comp_count == 0);
|
|
unsigned comp_sz = (blk_sz / comp_count) * 8;
|
|
|
|
coords = nir_isub(b, coords, img_offs);
|
|
|
|
nir_def *texel = nir_load_global(b,
|
|
comp_count, comp_sz, copy_img_buf_addr(b, buf_pfmt, coords),
|
|
.align_mul = 1 << (ffs(blk_sz) - 1));
|
|
|
|
/* We don't do compressed formats. The driver should select a non-compressed
|
|
* format with the same block size. */
|
|
assert(!util_format_is_compressed(buf_pfmt));
|
|
|
|
VkFormat iview_fmt =
|
|
copy_img_view_format_for_aspect(&key->img.view, key->img.aspect);
|
|
nir_variable *out_var = frag_var(b, &key->img.view, key->img.aspect, 0);
|
|
|
|
texel = convert_texel(b, buf_fmt, iview_fmt, texel);
|
|
write_frag(b, &key->img.view, key->img.aspect, out_var, texel);
|
|
return b->shader;
|
|
}
|
|
|
|
static VkResult
|
|
get_copy_buffer_to_image_gfx_pipeline(
|
|
struct vk_device *device, struct vk_meta_device *meta,
|
|
const struct vk_meta_copy_buffer_image_key *key,
|
|
VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
|
|
{
|
|
VkResult result = get_copy_pipeline_layout(
|
|
device, meta, VK_META_OBJECT_KEY_COPY_BUFFER_TO_IMAGE_GFX,
|
|
VK_SHADER_STAGE_FRAGMENT_BIT,
|
|
sizeof(struct vk_meta_copy_buffer_image_info), NULL, 0, layout_out);
|
|
|
|
if (unlikely(result != VK_SUCCESS))
|
|
return result;
|
|
|
|
return get_gfx_copy_pipeline(device, meta, *layout_out,
|
|
VK_SAMPLE_COUNT_1_BIT, build_buffer_to_image_fs,
|
|
key->img.aspect, &key->img.view, key,
|
|
sizeof(*key), pipeline_out);
|
|
}
|
|
|
|
static nir_shader *
|
|
build_buffer_to_image_cs(const struct vk_meta_device *meta,
|
|
const void *key_data)
|
|
{
|
|
const struct vk_meta_copy_buffer_image_key *key = key_data;
|
|
|
|
nir_builder builder = nir_builder_init_simple_shader(
|
|
MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-buffer-to-image-compute");
|
|
nir_builder *b = &builder;
|
|
|
|
b->shader->info.workgroup_size[0] = key->wg_size[0];
|
|
b->shader->info.workgroup_size[1] = key->wg_size[1];
|
|
b->shader->info.workgroup_size[2] = key->wg_size[2];
|
|
|
|
VkFormat buf_fmt =
|
|
copy_img_buf_format_for_aspect(&key->img.view, key->img.aspect);
|
|
VkFormat img_fmt =
|
|
copy_img_view_format_for_aspect(&key->img.view, key->img.aspect);
|
|
enum pipe_format buf_pfmt = vk_format_to_pipe_format(buf_fmt);
|
|
nir_deref_instr *image_deref =
|
|
img_deref(b, &key->img.view, key->img.aspect, VK_SAMPLE_COUNT_1_BIT, 0);
|
|
|
|
nir_def *copy_id = nir_load_global_invocation_id(b, 32);
|
|
nir_def *copy_id_start =
|
|
nir_vec3(b,
|
|
load_info(b, struct vk_meta_copy_buffer_image_info,
|
|
copy_id_range.start.x),
|
|
load_info(b, struct vk_meta_copy_buffer_image_info,
|
|
copy_id_range.start.y),
|
|
load_info(b, struct vk_meta_copy_buffer_image_info,
|
|
copy_id_range.start.z));
|
|
nir_def *copy_id_end = nir_vec3(b,
|
|
load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.x),
|
|
load_info(b, struct vk_meta_copy_buffer_image_info, copy_id_range.end.y),
|
|
load_info(b, struct vk_meta_copy_buffer_image_info,
|
|
copy_id_range.end.z));
|
|
|
|
nir_def *in_bounds =
|
|
nir_iand(b, nir_ball(b, nir_uge(b, copy_id, copy_id_start)),
|
|
nir_ball(b, nir_ult(b, copy_id, copy_id_end)));
|
|
|
|
nir_push_if(b, in_bounds);
|
|
|
|
/* Adjust the copy ID such that we can directly deduce the image coords and
|
|
* buffer offset from it. */
|
|
copy_id = nir_isub(b, copy_id, copy_id_start);
|
|
|
|
nir_def *img_offs = nir_vec3(b,
|
|
load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.x),
|
|
load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.y),
|
|
load_info(b, struct vk_meta_copy_buffer_image_info, img.offset.z));
|
|
|
|
nir_def *img_coords =
|
|
trim_img_coords(b, key->img.view.type, nir_iadd(b, copy_id, img_offs));
|
|
|
|
img_coords = nir_pad_vector_imm_int(b, img_coords, 0, 4);
|
|
|
|
unsigned blk_sz = util_format_get_blocksize(buf_pfmt);
|
|
unsigned bit_sz = blk_sz & 1 ? 8 : blk_sz & 2 ? 16 : 32;
|
|
unsigned comp_count = blk_sz * 8 / bit_sz;
|
|
|
|
nir_def *texel = nir_load_global(b,
|
|
comp_count, bit_sz, copy_img_buf_addr(b, buf_pfmt, copy_id),
|
|
.align_mul = 1 << (ffs(blk_sz) - 1));
|
|
|
|
texel = convert_texel(b, buf_fmt, img_fmt, texel);
|
|
|
|
/* If the image view format matches buf_fmt, convert_texel() does nothing,
|
|
* but we still need to promote the texel to a 32-bit unsigned integer,
|
|
* because write_img() wants a 32-bit value.
|
|
*/
|
|
if (texel->bit_size < 32)
|
|
texel = nir_u2u32(b, texel);
|
|
|
|
write_img(b, &key->img.view, key->img.aspect, VK_SAMPLE_COUNT_1_BIT,
|
|
image_deref, img_coords, NULL, texel);
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
return b->shader;
|
|
}
|
|
|
|
static VkResult
|
|
get_copy_buffer_to_image_compute_pipeline(
|
|
struct vk_device *device, struct vk_meta_device *meta,
|
|
const struct vk_meta_copy_buffer_image_key *key,
|
|
VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
|
|
{
|
|
const VkDescriptorSetLayoutBinding bindings[] = {
|
|
COPY_SHADER_BINDING(0, STORAGE_IMAGE, COMPUTE),
|
|
};
|
|
|
|
VkResult result = get_copy_pipeline_layout(
|
|
device, meta, VK_META_OBJECT_KEY_COPY_BUFFER_TO_IMAGE_CS,
|
|
VK_SHADER_STAGE_COMPUTE_BIT,
|
|
sizeof(struct vk_meta_copy_buffer_image_info), bindings,
|
|
ARRAY_SIZE(bindings), layout_out);
|
|
|
|
if (unlikely(result != VK_SUCCESS))
|
|
return result;
|
|
|
|
return get_compute_copy_pipeline(device, meta, *layout_out,
|
|
build_buffer_to_image_cs, key, sizeof(*key),
|
|
pipeline_out);
|
|
}
|
|
|
|
static VkResult
|
|
copy_buffer_image_prepare_gfx_push_const(
|
|
struct vk_command_buffer *cmd, struct vk_meta_device *meta,
|
|
const struct vk_meta_copy_buffer_image_key *key,
|
|
VkPipelineLayout pipeline_layout, VkBuffer buffer,
|
|
const struct vk_image_buffer_layout *buf_layout, struct vk_image *img,
|
|
const VkBufferImageCopy2 *region)
|
|
{
|
|
struct vk_device *dev = cmd->base.device;
|
|
const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
|
|
|
|
/* vk_meta_copy_buffer_image_info::image_stride is 32-bit for now.
|
|
* We might want to make it a 64-bit integer (and patch the shader code
|
|
* accordingly) if that becomes a limiting factor for vk_meta_copy users.
|
|
*/
|
|
assert(buf_layout->image_stride_B <= UINT32_MAX);
|
|
|
|
struct vk_meta_copy_buffer_image_info info = {
|
|
.buf = {
|
|
.row_stride = buf_layout->row_stride_B,
|
|
.image_stride = buf_layout->image_stride_B,
|
|
.addr = vk_meta_buffer_address(dev, buffer, region->bufferOffset,
|
|
VK_WHOLE_SIZE),
|
|
},
|
|
.img.offset = {
|
|
.x = region->imageOffset.x,
|
|
.y = region->imageOffset.y,
|
|
.z = region->imageOffset.z,
|
|
},
|
|
};
|
|
|
|
disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
|
|
VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(info), &info);
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
copy_buffer_image_prepare_compute_push_const(
|
|
struct vk_command_buffer *cmd, struct vk_meta_device *meta,
|
|
const struct vk_meta_copy_buffer_image_key *key,
|
|
VkPipelineLayout pipeline_layout, VkBuffer buffer,
|
|
const struct vk_image_buffer_layout *buf_layout, struct vk_image *img,
|
|
const VkBufferImageCopy2 *region, uint32_t *wg_count)
|
|
{
|
|
struct vk_device *dev = cmd->base.device;
|
|
const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
|
|
VkImageViewType img_view_type = key->img.view.type;
|
|
VkOffset3D img_offs =
|
|
base_layer_as_offset(img_view_type, region->imageOffset,
|
|
region->imageSubresource.baseArrayLayer);
|
|
uint32_t layer_count =
|
|
vk_image_subresource_layer_count(img, ®ion->imageSubresource);
|
|
VkExtent3D img_extent =
|
|
layer_count_as_extent(img_view_type, region->imageExtent, layer_count);
|
|
|
|
struct vk_meta_copy_buffer_image_info info = {
|
|
.buf = {
|
|
.row_stride = buf_layout->row_stride_B,
|
|
.image_stride = buf_layout->image_stride_B,
|
|
.addr = vk_meta_buffer_address(dev, buffer, region->bufferOffset,
|
|
VK_WHOLE_SIZE),
|
|
},
|
|
.img.offset = {
|
|
.x = img_offs.x,
|
|
.y = img_offs.y,
|
|
.z = img_offs.z,
|
|
},
|
|
};
|
|
|
|
info.copy_id_range.start.x = img_offs.x % key->wg_size[0];
|
|
info.copy_id_range.start.y = img_offs.y % key->wg_size[1];
|
|
info.copy_id_range.start.z = img_offs.z % key->wg_size[2];
|
|
info.copy_id_range.end.x = info.copy_id_range.start.x + img_extent.width;
|
|
info.copy_id_range.end.y = info.copy_id_range.start.y + img_extent.height;
|
|
info.copy_id_range.end.z = info.copy_id_range.start.z + img_extent.depth;
|
|
wg_count[0] = DIV_ROUND_UP(info.copy_id_range.end.x, key->wg_size[0]);
|
|
wg_count[1] = DIV_ROUND_UP(info.copy_id_range.end.y, key->wg_size[1]);
|
|
wg_count[2] = DIV_ROUND_UP(info.copy_id_range.end.z, key->wg_size[2]);
|
|
|
|
disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
|
|
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(info), &info);
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static bool
|
|
format_is_supported(VkFormat fmt)
|
|
{
|
|
enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
|
|
const struct util_format_description *fdesc = util_format_description(pfmt);
|
|
|
|
/* We only support RGB formats in the copy path to keep things simple. */
|
|
return fdesc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
|
|
fdesc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB;
|
|
}
|
|
|
|
static struct vk_meta_copy_image_view
|
|
img_copy_view_info(VkImageViewType view_type, VkImageAspectFlags aspects,
|
|
const struct vk_image *img,
|
|
const struct vk_meta_copy_image_properties *img_props)
|
|
{
|
|
struct vk_meta_copy_image_view view = {
|
|
.type = view_type,
|
|
};
|
|
|
|
assert(aspects &
|
|
(VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT |
|
|
VK_IMAGE_ASPECT_STENCIL_BIT | VK_IMAGE_ASPECT_PLANE_0_BIT |
|
|
VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT));
|
|
|
|
if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
|
|
/* Color aspect can't be combined with other aspects. */
|
|
assert(!(aspects & ~VK_IMAGE_ASPECT_COLOR_BIT));
|
|
view.color.format = img_props->color.view_format;
|
|
assert(format_is_supported(view.color.format));
|
|
return view;
|
|
}
|
|
|
|
if (aspects &
|
|
(VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT |
|
|
VK_IMAGE_ASPECT_PLANE_2_BIT)) {
|
|
switch (aspects) {
|
|
case VK_IMAGE_ASPECT_PLANE_0_BIT:
|
|
view.plane.format = img_props->plane[0].view_format;
|
|
break;
|
|
case VK_IMAGE_ASPECT_PLANE_1_BIT:
|
|
view.plane.format = img_props->plane[1].view_format;
|
|
break;
|
|
case VK_IMAGE_ASPECT_PLANE_2_BIT:
|
|
view.plane.format = img_props->plane[2].view_format;
|
|
break;
|
|
default:
|
|
UNREACHABLE("invalid ycbcr aspect");
|
|
}
|
|
|
|
assert(format_is_supported(view.color.format));
|
|
return view;
|
|
}
|
|
|
|
view.depth.format = img_props->depth.view_format;
|
|
view.depth.component_mask = img_props->depth.component_mask;
|
|
view.stencil.format = img_props->stencil.view_format;
|
|
view.stencil.component_mask = img_props->stencil.component_mask;
|
|
|
|
assert(view.depth.format == VK_FORMAT_UNDEFINED ||
|
|
format_is_supported(view.depth.format));
|
|
assert(view.stencil.format == VK_FORMAT_UNDEFINED ||
|
|
format_is_supported(view.stencil.format));
|
|
return view;
|
|
}
|
|
|
|
static void
|
|
copy_image_to_buffer_region(
|
|
struct vk_command_buffer *cmd, struct vk_meta_device *meta,
|
|
struct vk_image *img, VkImageLayout img_layout,
|
|
const struct vk_meta_copy_image_properties *img_props, VkBuffer buffer,
|
|
const struct vk_image_buffer_layout *buf_layout,
|
|
const VkBufferImageCopy2 *region)
|
|
{
|
|
struct vk_device *dev = cmd->base.device;
|
|
const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
|
|
struct vk_meta_copy_buffer_image_key key = {
|
|
.key_type = VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER,
|
|
.img = {
|
|
.view = img_copy_view_info(vk_image_sampled_view_type(img),
|
|
region->imageSubresource.aspectMask, img,
|
|
img_props),
|
|
.aspect = region->imageSubresource.aspectMask,
|
|
},
|
|
.wg_size = {
|
|
img_props->tile_size.width,
|
|
img_props->tile_size.height,
|
|
img_props->tile_size.depth,
|
|
},
|
|
};
|
|
|
|
VkPipelineLayout pipeline_layout;
|
|
VkPipeline pipeline;
|
|
VkResult result = get_copy_image_to_buffer_pipeline(
|
|
dev, meta, &key, &pipeline_layout, &pipeline);
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
|
|
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
|
|
|
VkImageView iview;
|
|
result = copy_create_src_image_view(cmd, meta, img, &key.img.view,
|
|
region->imageSubresource.aspectMask,
|
|
®ion->imageSubresource, &iview);
|
|
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
const VkWriteDescriptorSet descs[] = {
|
|
COPY_PUSH_SET_IMG_DESC(0, SAMPLED, iview, img_layout),
|
|
};
|
|
|
|
disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
|
|
VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
pipeline_layout, 0, ARRAY_SIZE(descs), descs);
|
|
|
|
uint32_t wg_count[3] = {0};
|
|
|
|
result = copy_buffer_image_prepare_compute_push_const(
|
|
cmd, meta, &key, pipeline_layout, buffer, buf_layout, img, region,
|
|
wg_count);
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count[0], wg_count[1],
|
|
wg_count[2]);
|
|
}
|
|
|
|
void
|
|
vk_meta_copy_image_to_buffer(
|
|
struct vk_command_buffer *cmd, struct vk_meta_device *meta,
|
|
const VkCopyImageToBufferInfo2 *info,
|
|
const struct vk_meta_copy_image_properties *img_props)
|
|
{
|
|
VK_FROM_HANDLE(vk_image, img, info->srcImage);
|
|
|
|
for (uint32_t i = 0; i < info->regionCount; i++) {
|
|
VkBufferImageCopy2 region = info->pRegions[i];
|
|
struct vk_image_buffer_layout buf_layout =
|
|
vk_image_buffer_copy_layout(img, ®ion);
|
|
|
|
region.imageExtent = vk_image_extent_to_elements(img, region.imageExtent);
|
|
region.imageOffset = vk_image_offset_to_elements(img, region.imageOffset);
|
|
|
|
copy_image_to_buffer_region(cmd, meta, img, info->srcImageLayout,
|
|
img_props, info->dstBuffer, &buf_layout,
|
|
®ion);
|
|
}
|
|
}
|
|
|
|
static void
|
|
copy_draw(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
|
|
struct vk_image *dst_img, VkImageLayout dst_img_layout,
|
|
const VkImageSubresourceLayers *dst_img_subres,
|
|
const VkOffset3D *dst_img_offset, const VkExtent3D *copy_extent,
|
|
const struct vk_meta_copy_image_view *view_info)
|
|
{
|
|
struct vk_device *dev = cmd->base.device;
|
|
const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
|
|
uint32_t depth_or_layer_count =
|
|
MAX2(copy_extent->depth,
|
|
vk_image_subresource_layer_count(dst_img, dst_img_subres));
|
|
struct vk_meta_rect rect = {
|
|
.x0 = dst_img_offset->x,
|
|
.x1 = dst_img_offset->x + copy_extent->width,
|
|
.y0 = dst_img_offset->y,
|
|
.y1 = dst_img_offset->y + copy_extent->height,
|
|
};
|
|
VkRenderingAttachmentInfo vk_atts[2];
|
|
VkRenderingInfo vk_render = {
|
|
.sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
|
|
.renderArea = {
|
|
.offset = {
|
|
dst_img_offset->x,
|
|
dst_img_offset->y,
|
|
},
|
|
.extent = {
|
|
copy_extent->width,
|
|
copy_extent->height,
|
|
},
|
|
},
|
|
.layerCount = depth_or_layer_count,
|
|
.pColorAttachments = vk_atts,
|
|
};
|
|
VkImageView iview = VK_NULL_HANDLE;
|
|
|
|
u_foreach_bit(a, dst_img_subres->aspectMask) {
|
|
VkImageAspectFlagBits aspect = 1 << a;
|
|
|
|
if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && iview != VK_NULL_HANDLE &&
|
|
depth_stencil_interleaved(view_info))
|
|
continue;
|
|
|
|
VkResult result = copy_create_dst_image_view(
|
|
cmd, meta, dst_img, view_info, aspect, dst_img_offset, copy_extent,
|
|
dst_img_subres, VK_PIPELINE_BIND_POINT_GRAPHICS, &iview);
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
vk_atts[vk_render.colorAttachmentCount] = (VkRenderingAttachmentInfo){
|
|
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
|
|
.imageView = iview,
|
|
.imageLayout = dst_img_layout,
|
|
.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
|
|
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
|
};
|
|
|
|
/* If we have interleaved depth/stencil and only one aspect is copied, we
|
|
* need to load the attachment to preserve the other component. */
|
|
if (vk_format_has_depth(dst_img->format) &&
|
|
vk_format_has_stencil(dst_img->format) &&
|
|
depth_stencil_interleaved(view_info) &&
|
|
(dst_img_subres->aspectMask !=
|
|
(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
|
|
vk_atts[vk_render.colorAttachmentCount].loadOp =
|
|
VK_ATTACHMENT_LOAD_OP_LOAD;
|
|
}
|
|
|
|
vk_render.colorAttachmentCount++;
|
|
}
|
|
|
|
disp->CmdBeginRendering(vk_command_buffer_to_handle(cmd), &vk_render);
|
|
meta->cmd_draw_volume(cmd, meta, &rect, vk_render.layerCount);
|
|
disp->CmdEndRendering(vk_command_buffer_to_handle(cmd));
|
|
}
|
|
|
|
static void
|
|
copy_buffer_to_image_region_gfx(
|
|
struct vk_command_buffer *cmd, struct vk_meta_device *meta,
|
|
struct vk_image *img, VkImageLayout img_layout,
|
|
const struct vk_meta_copy_image_properties *img_props, VkBuffer buffer,
|
|
const struct vk_image_buffer_layout *buf_layout,
|
|
const VkBufferImageCopy2 *region)
|
|
{
|
|
struct vk_device *dev = cmd->base.device;
|
|
const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
|
|
|
|
/* We only special-case 1D_ARRAY to move the layer ID to the second
|
|
* component instead of the third. For all other view types, let's pick an
|
|
* invalid VkImageViewType value so we don't end up creating the same
|
|
* pipeline multiple times. */
|
|
VkImageViewType view_type =
|
|
img->image_type == VK_IMAGE_TYPE_1D && img->array_layers > 1
|
|
? VK_IMAGE_VIEW_TYPE_1D_ARRAY
|
|
: (VkImageViewType)-1;
|
|
|
|
struct vk_meta_copy_buffer_image_key key = {
|
|
.key_type = VK_META_OBJECT_KEY_COPY_BUFFER_TO_IMAGE_GFX,
|
|
.img = {
|
|
.view = img_copy_view_info(view_type,
|
|
region->imageSubresource.aspectMask, img,
|
|
img_props),
|
|
.aspect = region->imageSubresource.aspectMask,
|
|
},
|
|
};
|
|
|
|
VkPipelineLayout pipeline_layout;
|
|
VkPipeline pipeline;
|
|
VkResult result = get_copy_buffer_to_image_gfx_pipeline(
|
|
dev, meta, &key, &pipeline_layout, &pipeline);
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
|
|
VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
|
|
|
|
result = copy_buffer_image_prepare_gfx_push_const(
|
|
cmd, meta, &key, pipeline_layout, buffer, buf_layout, img, region);
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
copy_draw(cmd, meta, img, img_layout, ®ion->imageSubresource,
|
|
®ion->imageOffset, ®ion->imageExtent, &key.img.view);
|
|
}
|
|
|
|
static void
|
|
copy_buffer_to_image_region_compute(
|
|
struct vk_command_buffer *cmd, struct vk_meta_device *meta,
|
|
struct vk_image *img, VkImageLayout img_layout,
|
|
const struct vk_meta_copy_image_properties *img_props, VkBuffer buffer,
|
|
const struct vk_image_buffer_layout *buf_layout,
|
|
const VkBufferImageCopy2 *region)
|
|
{
|
|
struct vk_device *dev = cmd->base.device;
|
|
const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
|
|
VkImageViewType view_type = vk_image_storage_view_type(img);
|
|
struct vk_meta_copy_buffer_image_key key = {
|
|
.key_type = VK_META_OBJECT_KEY_COPY_BUFFER_TO_IMAGE_CS,
|
|
.img = {
|
|
.view = img_copy_view_info(view_type,
|
|
region->imageSubresource.aspectMask, img,
|
|
img_props),
|
|
.aspect = region->imageSubresource.aspectMask,
|
|
},
|
|
.wg_size = {
|
|
img_props->tile_size.width,
|
|
img_props->tile_size.height,
|
|
img_props->tile_size.depth,
|
|
},
|
|
};
|
|
|
|
VkPipelineLayout pipeline_layout;
|
|
VkPipeline pipeline;
|
|
VkResult result = get_copy_buffer_to_image_compute_pipeline(
|
|
dev, meta, &key, &pipeline_layout, &pipeline);
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
|
|
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
|
|
|
VkImageView iview;
|
|
result = copy_create_dst_image_view(
|
|
cmd, meta, img, &key.img.view, region->imageSubresource.aspectMask,
|
|
®ion->imageOffset, ®ion->imageExtent, ®ion->imageSubresource,
|
|
VK_PIPELINE_BIND_POINT_COMPUTE, &iview);
|
|
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
const VkWriteDescriptorSet descs[] = {
|
|
COPY_PUSH_SET_IMG_DESC(0, STORAGE, iview, img_layout),
|
|
};
|
|
|
|
disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
|
|
VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
pipeline_layout, 0, ARRAY_SIZE(descs), descs);
|
|
|
|
uint32_t wg_count[3] = {0};
|
|
|
|
result = copy_buffer_image_prepare_compute_push_const(
|
|
cmd, meta, &key, pipeline_layout, buffer, buf_layout, img, region,
|
|
wg_count);
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
disp->CmdDispatch(vk_command_buffer_to_handle(cmd),
|
|
wg_count[0], wg_count[1], wg_count[2]);
|
|
}
|
|
|
|
void
|
|
vk_meta_copy_buffer_to_image(
|
|
struct vk_command_buffer *cmd, struct vk_meta_device *meta,
|
|
const VkCopyBufferToImageInfo2 *info,
|
|
const struct vk_meta_copy_image_properties *img_props,
|
|
VkPipelineBindPoint bind_point)
|
|
{
|
|
VK_FROM_HANDLE(vk_image, img, info->dstImage);
|
|
|
|
for (uint32_t i = 0; i < info->regionCount; i++) {
|
|
VkBufferImageCopy2 region = info->pRegions[i];
|
|
struct vk_image_buffer_layout buf_layout =
|
|
vk_image_buffer_copy_layout(img, ®ion);
|
|
|
|
region.imageExtent = vk_image_extent_to_elements(img, region.imageExtent);
|
|
region.imageOffset = vk_image_offset_to_elements(img, region.imageOffset);
|
|
|
|
if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
|
|
copy_buffer_to_image_region_gfx(cmd, meta, img, info->dstImageLayout,
|
|
img_props, info->srcBuffer,
|
|
&buf_layout, ®ion);
|
|
} else {
|
|
copy_buffer_to_image_region_compute(cmd, meta, img,
|
|
info->dstImageLayout, img_props,
|
|
info->srcBuffer, &buf_layout,
|
|
®ion);
|
|
}
|
|
}
|
|
}
|
|
|
|
static nir_shader *
|
|
build_copy_image_fs(const struct vk_meta_device *meta, const void *key_data)
|
|
{
|
|
const struct vk_meta_copy_image_key *key = key_data;
|
|
|
|
nir_builder builder = nir_builder_init_simple_shader(
|
|
MESA_SHADER_FRAGMENT, NULL, "vk-meta-copy-image-frag");
|
|
nir_builder *b = &builder;
|
|
|
|
b->shader->info.fs.uses_sample_shading =
|
|
key->samples != VK_SAMPLE_COUNT_1_BIT;
|
|
|
|
nir_def *out_coord_xy = nir_f2u32(b, nir_load_frag_coord(b));
|
|
nir_def *out_layer = nir_load_layer_id(b);
|
|
|
|
nir_def *src_offset = nir_vec3(b,
|
|
load_info(b, struct vk_meta_copy_image_fs_info, dst_to_src_offs.x),
|
|
load_info(b, struct vk_meta_copy_image_fs_info, dst_to_src_offs.y),
|
|
load_info(b, struct vk_meta_copy_image_fs_info, dst_to_src_offs.z));
|
|
|
|
/* Move the layer ID to the second coordinate if we're dealing with a 1D
|
|
* array, as this is where the texture instruction expects it. */
|
|
nir_def *src_coords =
|
|
key->dst.view.type == VK_IMAGE_VIEW_TYPE_1D_ARRAY
|
|
? nir_vec3(b, nir_channel(b, out_coord_xy, 0), out_layer,
|
|
nir_imm_int(b, 0))
|
|
: nir_vec3(b, nir_channel(b, out_coord_xy, 0),
|
|
nir_channel(b, out_coord_xy, 1), out_layer);
|
|
|
|
src_coords = trim_img_coords(b, key->src.view.type,
|
|
nir_iadd(b, src_coords, src_offset));
|
|
|
|
nir_def *sample_id =
|
|
key->samples != VK_SAMPLE_COUNT_1_BIT ? nir_load_sample_id(b) : NULL;
|
|
nir_variable *color_var = NULL;
|
|
uint32_t tex_binding = 0;
|
|
|
|
u_foreach_bit(a, key->src.aspects) {
|
|
VkImageAspectFlags src_aspect = 1 << a;
|
|
VkImageAspectFlags dst_aspect =
|
|
key->dst.aspects == key->src.aspects ? src_aspect : key->dst.aspects;
|
|
VkFormat src_fmt =
|
|
copy_img_view_format_for_aspect(&key->src.view, src_aspect);
|
|
VkFormat dst_fmt =
|
|
copy_img_view_format_for_aspect(&key->dst.view, dst_aspect);
|
|
nir_deref_instr *tex =
|
|
tex_deref(b, &key->src.view, src_aspect, key->samples, tex_binding++);
|
|
nir_def *texel = read_texel(b, tex, src_coords, sample_id);
|
|
|
|
if (!color_var || !depth_stencil_interleaved(&key->dst.view)) {
|
|
color_var =
|
|
frag_var(b, &key->dst.view, dst_aspect, color_var != NULL ? 1 : 0);
|
|
}
|
|
|
|
texel = convert_texel(b, src_fmt, dst_fmt, texel);
|
|
write_frag(b, &key->dst.view, dst_aspect, color_var, texel);
|
|
}
|
|
|
|
return b->shader;
|
|
}
|
|
|
|
static VkResult
|
|
get_copy_image_gfx_pipeline(struct vk_device *device,
|
|
struct vk_meta_device *meta,
|
|
const struct vk_meta_copy_image_key *key,
|
|
VkPipelineLayout *layout_out,
|
|
VkPipeline *pipeline_out)
|
|
{
|
|
const struct VkDescriptorSetLayoutBinding bindings[] = {
|
|
COPY_SHADER_BINDING(0, SAMPLED_IMAGE, FRAGMENT),
|
|
COPY_SHADER_BINDING(1, SAMPLED_IMAGE, FRAGMENT),
|
|
};
|
|
|
|
VkResult result = get_copy_pipeline_layout(
|
|
device, meta, VK_META_OBJECT_KEY_COPY_IMAGE_GFX,
|
|
VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(struct vk_meta_copy_image_fs_info),
|
|
bindings, ARRAY_SIZE(bindings), layout_out);
|
|
if (unlikely(result != VK_SUCCESS))
|
|
return result;
|
|
|
|
return get_gfx_copy_pipeline(
|
|
device, meta, *layout_out, key->samples, build_copy_image_fs,
|
|
key->dst.aspects, &key->dst.view, key, sizeof(*key), pipeline_out);
|
|
}
|
|
|
|
static nir_shader *
|
|
build_copy_image_cs(const struct vk_meta_device *meta, const void *key_data)
|
|
{
|
|
const struct vk_meta_copy_image_key *key = key_data;
|
|
|
|
nir_builder builder = nir_builder_init_simple_shader(
|
|
MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-image-compute");
|
|
nir_builder *b = &builder;
|
|
|
|
b->shader->info.workgroup_size[0] = key->wg_size[0];
|
|
b->shader->info.workgroup_size[1] = key->wg_size[1];
|
|
b->shader->info.workgroup_size[2] = key->wg_size[2];
|
|
|
|
nir_def *copy_id = nir_load_global_invocation_id(b, 32);
|
|
nir_def *copy_id_start = nir_vec3(b,
|
|
load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.start.x),
|
|
load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.start.y),
|
|
load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.start.z));
|
|
nir_def *copy_id_end = nir_vec3(b,
|
|
load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.end.x),
|
|
load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.end.y),
|
|
load_info(b, struct vk_meta_copy_image_cs_info, copy_id_range.end.z));
|
|
|
|
nir_def *in_bounds =
|
|
nir_iand(b, nir_ball(b, nir_uge(b, copy_id, copy_id_start)),
|
|
nir_ball(b, nir_ult(b, copy_id, copy_id_end)));
|
|
|
|
nir_push_if(b, in_bounds);
|
|
|
|
nir_def *src_offset = nir_vec3(b,
|
|
load_info(b, struct vk_meta_copy_image_cs_info, src_img.offset.x),
|
|
load_info(b, struct vk_meta_copy_image_cs_info, src_img.offset.y),
|
|
load_info(b, struct vk_meta_copy_image_cs_info, src_img.offset.z));
|
|
nir_def *dst_offset = nir_vec3(b,
|
|
load_info(b, struct vk_meta_copy_image_cs_info, dst_img.offset.x),
|
|
load_info(b, struct vk_meta_copy_image_cs_info, dst_img.offset.y),
|
|
load_info(b, struct vk_meta_copy_image_cs_info, dst_img.offset.z));
|
|
|
|
nir_def *src_coords = trim_img_coords(b, key->src.view.type,
|
|
nir_iadd(b, copy_id, src_offset));
|
|
nir_def *dst_coords = trim_img_coords(b, key->dst.view.type,
|
|
nir_iadd(b, copy_id, dst_offset));
|
|
|
|
dst_coords = nir_pad_vector_imm_int(b, dst_coords, 0, 4);
|
|
|
|
uint32_t binding = 0;
|
|
u_foreach_bit(a, key->src.aspects) {
|
|
VkImageAspectFlagBits src_aspect = 1 << a;
|
|
VkImageAspectFlags dst_aspect =
|
|
key->dst.aspects == key->src.aspects ? src_aspect : key->dst.aspects;
|
|
VkFormat src_fmt =
|
|
copy_img_view_format_for_aspect(&key->src.view, src_aspect);
|
|
VkFormat dst_fmt =
|
|
copy_img_view_format_for_aspect(&key->dst.view, dst_aspect);
|
|
nir_deref_instr *tex =
|
|
tex_deref(b, &key->src.view, src_aspect, key->samples, binding);
|
|
nir_deref_instr *img =
|
|
img_deref(b, &key->dst.view, dst_aspect, key->samples, binding + 1);
|
|
|
|
for (uint32_t s = 0; s < key->samples; s++) {
|
|
nir_def *sample_id =
|
|
key->samples == VK_SAMPLE_COUNT_1_BIT ? NULL : nir_imm_int(b, s);
|
|
nir_def *texel = read_texel(b, tex, src_coords, sample_id);
|
|
|
|
texel = convert_texel(b, src_fmt, dst_fmt, texel);
|
|
write_img(b, &key->dst.view, dst_aspect, key->samples, img, dst_coords,
|
|
sample_id, texel);
|
|
}
|
|
|
|
binding += 2;
|
|
}
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
return b->shader;
|
|
}
|
|
|
|
static VkResult
|
|
get_copy_image_compute_pipeline(struct vk_device *device,
|
|
struct vk_meta_device *meta,
|
|
const struct vk_meta_copy_image_key *key,
|
|
VkPipelineLayout *layout_out,
|
|
VkPipeline *pipeline_out)
|
|
{
|
|
const VkDescriptorSetLayoutBinding bindings[] = {
|
|
COPY_SHADER_BINDING(0, SAMPLED_IMAGE, COMPUTE),
|
|
COPY_SHADER_BINDING(1, STORAGE_IMAGE, COMPUTE),
|
|
COPY_SHADER_BINDING(2, SAMPLED_IMAGE, COMPUTE),
|
|
COPY_SHADER_BINDING(3, STORAGE_IMAGE, COMPUTE),
|
|
};
|
|
|
|
VkResult result = get_copy_pipeline_layout(
|
|
device, meta, VK_META_OBJECT_KEY_COPY_IMAGE_CS,
|
|
VK_SHADER_STAGE_COMPUTE_BIT, sizeof(struct vk_meta_copy_image_cs_info),
|
|
bindings, ARRAY_SIZE(bindings), layout_out);
|
|
|
|
if (unlikely(result != VK_SUCCESS))
|
|
return result;
|
|
|
|
return get_compute_copy_pipeline(device, meta, *layout_out,
|
|
build_copy_image_cs, key, sizeof(*key),
|
|
pipeline_out);
|
|
}
|
|
|
|
static VkResult
|
|
copy_image_prepare_gfx_desc_set(
|
|
struct vk_command_buffer *cmd, struct vk_meta_device *meta,
|
|
const struct vk_meta_copy_image_key *key, VkPipelineLayout pipeline_layout,
|
|
struct vk_image *src_img, VkImageLayout src_img_layout,
|
|
struct vk_image *dst_img, VkImageLayout dst_img_layout,
|
|
const VkImageCopy2 *region)
|
|
{
|
|
struct vk_device *dev = cmd->base.device;
|
|
const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
|
|
VkImageAspectFlags src_aspects = key->src.aspects;
|
|
VkImageView iviews[] = {
|
|
VK_NULL_HANDLE,
|
|
VK_NULL_HANDLE,
|
|
};
|
|
uint32_t desc_count = 0;
|
|
|
|
u_foreach_bit(a, src_aspects) {
|
|
assert(desc_count < ARRAY_SIZE(iviews));
|
|
|
|
VkResult result = copy_create_src_image_view(
|
|
cmd, meta, src_img, &key->src.view, 1 << a, ®ion->srcSubresource,
|
|
&iviews[desc_count++]);
|
|
if (unlikely(result != VK_SUCCESS))
|
|
return result;
|
|
}
|
|
|
|
VkWriteDescriptorSet descs[2] = {
|
|
COPY_PUSH_SET_IMG_DESC(0, SAMPLED, iviews[0], src_img_layout),
|
|
COPY_PUSH_SET_IMG_DESC(1, SAMPLED, iviews[1], src_img_layout),
|
|
};
|
|
|
|
disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
|
|
VK_PIPELINE_BIND_POINT_GRAPHICS,
|
|
pipeline_layout, 0, desc_count, descs);
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
copy_image_prepare_compute_desc_set(
|
|
struct vk_command_buffer *cmd, struct vk_meta_device *meta,
|
|
const struct vk_meta_copy_image_key *key, VkPipelineLayout pipeline_layout,
|
|
struct vk_image *src_img, VkImageLayout src_img_layout,
|
|
struct vk_image *dst_img, VkImageLayout dst_img_layout,
|
|
const VkImageCopy2 *region)
|
|
{
|
|
struct vk_device *dev = cmd->base.device;
|
|
const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
|
|
VkImageAspectFlags src_aspects = key->src.aspects;
|
|
VkImageView iviews[] = {
|
|
VK_NULL_HANDLE,
|
|
VK_NULL_HANDLE,
|
|
VK_NULL_HANDLE,
|
|
VK_NULL_HANDLE,
|
|
};
|
|
unsigned desc_count = 0;
|
|
|
|
u_foreach_bit(a, src_aspects) {
|
|
VkImageAspectFlagBits src_aspect = 1 << a;
|
|
VkImageAspectFlags dst_aspect =
|
|
key->dst.aspects == key->src.aspects ? src_aspect : key->dst.aspects;
|
|
|
|
assert(desc_count + 2 <= ARRAY_SIZE(iviews));
|
|
|
|
VkResult result = copy_create_src_image_view(
|
|
cmd, meta, src_img, &key->src.view, src_aspect, ®ion->srcSubresource,
|
|
&iviews[desc_count++]);
|
|
if (unlikely(result != VK_SUCCESS))
|
|
return result;
|
|
|
|
result = copy_create_dst_image_view(
|
|
cmd, meta, dst_img, &key->dst.view, dst_aspect, ®ion->dstOffset,
|
|
®ion->extent, ®ion->dstSubresource,
|
|
VK_PIPELINE_BIND_POINT_COMPUTE, &iviews[desc_count++]);
|
|
if (unlikely(result != VK_SUCCESS))
|
|
return result;
|
|
}
|
|
|
|
VkWriteDescriptorSet descs[] = {
|
|
COPY_PUSH_SET_IMG_DESC(0, SAMPLED, iviews[0], src_img_layout),
|
|
COPY_PUSH_SET_IMG_DESC(1, STORAGE, iviews[1], dst_img_layout),
|
|
COPY_PUSH_SET_IMG_DESC(2, SAMPLED, iviews[2], src_img_layout),
|
|
COPY_PUSH_SET_IMG_DESC(3, STORAGE, iviews[3], dst_img_layout),
|
|
};
|
|
|
|
disp->CmdPushDescriptorSetKHR(vk_command_buffer_to_handle(cmd),
|
|
VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
pipeline_layout, 0, desc_count, descs);
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
enum vk_meta_copy_image_align_policy {
|
|
VK_META_COPY_IMAGE_ALIGN_ON_SRC_TILE,
|
|
VK_META_COPY_IMAGE_ALIGN_ON_DST_TILE,
|
|
};
|
|
|
|
static VkResult
|
|
copy_image_prepare_compute_push_const(
|
|
struct vk_command_buffer *cmd, struct vk_meta_device *meta,
|
|
const struct vk_meta_copy_image_key *key, VkPipelineLayout pipeline_layout,
|
|
const struct vk_image *src, const struct vk_image *dst,
|
|
enum vk_meta_copy_image_align_policy align_policy,
|
|
const VkImageCopy2 *region, uint32_t *wg_count)
|
|
{
|
|
struct vk_device *dev = cmd->base.device;
|
|
const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
|
|
VkOffset3D src_offs =
|
|
base_layer_as_offset(key->src.view.type, region->srcOffset,
|
|
region->srcSubresource.baseArrayLayer);
|
|
uint32_t layer_count =
|
|
vk_image_subresource_layer_count(src, ®ion->srcSubresource);
|
|
VkExtent3D src_extent =
|
|
layer_count_as_extent(key->src.view.type, region->extent, layer_count);
|
|
VkOffset3D dst_offs =
|
|
base_layer_as_offset(key->dst.view.type, region->dstOffset,
|
|
region->dstSubresource.baseArrayLayer);
|
|
|
|
struct vk_meta_copy_image_cs_info info = {0};
|
|
|
|
/* We can't necessarily optimize the read+write path, so align things
|
|
* on the biggest tile size. */
|
|
if (align_policy == VK_META_COPY_IMAGE_ALIGN_ON_SRC_TILE) {
|
|
info.copy_id_range.start.x = src_offs.x % key->wg_size[0];
|
|
info.copy_id_range.start.y = src_offs.y % key->wg_size[1];
|
|
info.copy_id_range.start.z = src_offs.z % key->wg_size[2];
|
|
} else {
|
|
info.copy_id_range.start.x = dst_offs.x % key->wg_size[0];
|
|
info.copy_id_range.start.y = dst_offs.y % key->wg_size[1];
|
|
info.copy_id_range.start.z = dst_offs.z % key->wg_size[2];
|
|
}
|
|
|
|
info.copy_id_range.end.x = info.copy_id_range.start.x + src_extent.width;
|
|
info.copy_id_range.end.y = info.copy_id_range.start.y + src_extent.height;
|
|
info.copy_id_range.end.z = info.copy_id_range.start.z + src_extent.depth;
|
|
|
|
info.src_img.offset.x = src_offs.x - info.copy_id_range.start.x;
|
|
info.src_img.offset.y = src_offs.y - info.copy_id_range.start.y;
|
|
info.src_img.offset.z = src_offs.z - info.copy_id_range.start.z;
|
|
info.dst_img.offset.x = dst_offs.x - info.copy_id_range.start.x;
|
|
info.dst_img.offset.y = dst_offs.y - info.copy_id_range.start.y;
|
|
info.dst_img.offset.z = dst_offs.z - info.copy_id_range.start.z;
|
|
wg_count[0] = DIV_ROUND_UP(info.copy_id_range.end.x, key->wg_size[0]);
|
|
wg_count[1] = DIV_ROUND_UP(info.copy_id_range.end.y, key->wg_size[1]);
|
|
wg_count[2] = DIV_ROUND_UP(info.copy_id_range.end.z, key->wg_size[2]);
|
|
|
|
disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
|
|
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(info), &info);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
copy_image_prepare_gfx_push_const(struct vk_command_buffer *cmd,
|
|
struct vk_meta_device *meta,
|
|
const struct vk_meta_copy_image_key *key,
|
|
VkPipelineLayout pipeline_layout,
|
|
struct vk_image *src_img,
|
|
struct vk_image *dst_img,
|
|
const VkImageCopy2 *region)
|
|
{
|
|
struct vk_device *dev = cmd->base.device;
|
|
const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
|
|
VkOffset3D src_img_offs =
|
|
base_layer_as_offset(key->src.view.type, region->srcOffset,
|
|
region->srcSubresource.baseArrayLayer);
|
|
|
|
struct vk_meta_copy_image_fs_info info = {
|
|
.dst_to_src_offs = {
|
|
/* The subtraction may lead to negative values, but that's fine
|
|
* because the shader does the mirror operation thus guaranteeing
|
|
* a src_coords >= 0. */
|
|
.x = src_img_offs.x - region->dstOffset.x,
|
|
.y = src_img_offs.y - region->dstOffset.y,
|
|
/* Render image view only contains the layers needed for rendering,
|
|
* so we consider the coordinate containing the layer to always be
|
|
* zero.
|
|
*/
|
|
.z = src_img_offs.z,
|
|
},
|
|
};
|
|
|
|
disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
|
|
VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(info), &info);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static bool
|
|
valid_multiplane_aspect_mask(VkImageAspectFlags aspect)
|
|
{
|
|
switch (aspect) {
|
|
case VK_IMAGE_ASPECT_PLANE_0_BIT:
|
|
case VK_IMAGE_ASPECT_PLANE_1_BIT:
|
|
case VK_IMAGE_ASPECT_PLANE_2_BIT:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static bool
|
|
aspect_masks_valid(struct vk_image *src_img, struct vk_image *dst_img,
|
|
const VkImageCopy2 *region)
|
|
{
|
|
const struct vk_format_ycbcr_info *src_ycbcr_info =
|
|
vk_format_get_ycbcr_info(src_img->format);
|
|
const struct vk_format_ycbcr_info *dst_ycbcr_info =
|
|
vk_format_get_ycbcr_info(dst_img->format);
|
|
|
|
if (!src_ycbcr_info && !dst_ycbcr_info) {
|
|
ASSERTED const VkImageAspectFlags valid_aspects =
|
|
VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT |
|
|
VK_IMAGE_ASPECT_STENCIL_BIT;
|
|
|
|
assert(!(region->srcSubresource.aspectMask & ~valid_aspects) &&
|
|
!(region->dstSubresource.aspectMask & ~valid_aspects));
|
|
|
|
/* From the Vulkan 1.4.303 spec, vkCmdCopyImage:
|
|
*
|
|
* VUID-vkCmdCopyImage-srcSubresource-10214
|
|
*
|
|
* "If srcSubresource.aspectMask is VK_IMAGE_ASPECT_COLOR_BIT, then
|
|
* dstSubresource.aspectMask must not contain both
|
|
* VK_IMAGE_ASPECT_DEPTH_BIT and VK_IMAGE_ASPECT_STENCIL_BIT"
|
|
*
|
|
* VUID-vkCmdCopyImage-dstSubresource-10215
|
|
*
|
|
* "If dstSubresource.aspectMask is VK_IMAGE_ASPECT_COLOR_BIT, then
|
|
* srSubresource.aspectMask must not contain both
|
|
* VK_IMAGE_ASPECT_DEPTH_BIT and VK_IMAGE_ASPECT_STENCIL_BIT"
|
|
*/
|
|
if ((region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT ||
|
|
region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) &&
|
|
(util_bitcount(region->srcSubresource.aspectMask) != 1 ||
|
|
util_bitcount(region->dstSubresource.aspectMask) != 1))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/* VUID-vkCmdCopyImage-srcImage-08713
|
|
*
|
|
* "If srcImage has a multi-planar image format, then for each element
|
|
* of pRegions, srcSubresource.aspectMask must be a single valid
|
|
* multi-planar aspect mask bit"
|
|
*/
|
|
if (src_ycbcr_info &&
|
|
!valid_multiplane_aspect_mask(region->srcSubresource.aspectMask))
|
|
return false;
|
|
|
|
/* VUID-vkCmdCopyImage-dstImage-08714
|
|
*
|
|
* "If dstImage has a multi-planar image format, then for each element
|
|
* of pRegions, dstSubresource.aspectMask must be a single valid
|
|
* multi-planar aspect mask bit"
|
|
*/
|
|
if (dst_ycbcr_info &&
|
|
!valid_multiplane_aspect_mask(region->dstSubresource.aspectMask))
|
|
return false;
|
|
|
|
/* VUID-vkCmdCopyImage-srcImage-01556
|
|
*
|
|
* "If srcImage has a multi-planar image format and the dstImage does
|
|
* not have a multi-planar image format, then for each element of
|
|
* pRegions, dstSubresource.aspectMask must be VK_IMAGE_ASPECT_COLOR_BIT"
|
|
*/
|
|
if (src_ycbcr_info && !dst_ycbcr_info &&
|
|
region->dstSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT)
|
|
return false;
|
|
|
|
/* VUID-vkCmdCopyImage-dstImage-01557
|
|
*
|
|
* "If dstImage has a multi-planar image format and the srcImage does
|
|
* not have a multi-planar image format, then for each element of
|
|
* pRegions, srcSubresource.aspectMask must be VK_IMAGE_ASPECT_COLOR_BIT"
|
|
*/
|
|
if (!src_ycbcr_info && dst_ycbcr_info &&
|
|
region->srcSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
static void
|
|
copy_image_region_gfx(struct vk_command_buffer *cmd,
|
|
struct vk_meta_device *meta, struct vk_image *src_img,
|
|
VkImageLayout src_image_layout,
|
|
const struct vk_meta_copy_image_properties *src_props,
|
|
struct vk_image *dst_img, VkImageLayout dst_image_layout,
|
|
const struct vk_meta_copy_image_properties *dst_props,
|
|
const VkImageCopy2 *region)
|
|
{
|
|
struct vk_device *dev = cmd->base.device;
|
|
const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
|
|
|
|
/* We only special-case 1D_ARRAY to move the layer ID to the second
|
|
* component instead of the third. For all other view types, let's pick an
|
|
* invalid VkImageViewType value so we don't end up creating the same
|
|
* pipeline multiple times. */
|
|
VkImageViewType dst_view_type =
|
|
dst_img->image_type == VK_IMAGE_TYPE_1D && dst_img->array_layers > 1
|
|
? VK_IMAGE_VIEW_TYPE_1D_ARRAY
|
|
: (VkImageViewType)-1;
|
|
|
|
assert(aspect_masks_valid(src_img, dst_img, region));
|
|
|
|
struct vk_meta_copy_image_key key = {
|
|
.key_type = VK_META_OBJECT_KEY_COPY_IMAGE_GFX,
|
|
.samples = src_img->samples,
|
|
.src.aspects = region->srcSubresource.aspectMask,
|
|
.src.view = img_copy_view_info(vk_image_sampled_view_type(src_img),
|
|
region->srcSubresource.aspectMask, src_img,
|
|
src_props),
|
|
.dst.aspects = region->dstSubresource.aspectMask,
|
|
.dst.view = img_copy_view_info(dst_view_type,
|
|
region->dstSubresource.aspectMask, dst_img,
|
|
dst_props),
|
|
};
|
|
|
|
VkPipelineLayout pipeline_layout;
|
|
VkPipeline pipeline;
|
|
VkResult result =
|
|
get_copy_image_gfx_pipeline(dev, meta, &key, &pipeline_layout, &pipeline);
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
|
|
VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
|
|
|
|
result = copy_image_prepare_gfx_desc_set(cmd, meta, &key, pipeline_layout,
|
|
src_img, src_image_layout, dst_img,
|
|
dst_image_layout, region);
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
result = copy_image_prepare_gfx_push_const(cmd, meta, &key, pipeline_layout,
|
|
src_img, dst_img, region);
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
copy_draw(cmd, meta, dst_img, dst_image_layout, ®ion->dstSubresource,
|
|
®ion->dstOffset, ®ion->extent, &key.dst.view);
|
|
}
|
|
|
|
static void
|
|
copy_image_region_compute(struct vk_command_buffer *cmd,
|
|
struct vk_meta_device *meta, struct vk_image *src_img,
|
|
VkImageLayout src_image_layout,
|
|
const struct vk_meta_copy_image_properties *src_props,
|
|
struct vk_image *dst_img,
|
|
VkImageLayout dst_image_layout,
|
|
const struct vk_meta_copy_image_properties *dst_props,
|
|
const VkImageCopy2 *region)
|
|
{
|
|
struct vk_device *dev = cmd->base.device;
|
|
const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
|
|
VkImageViewType dst_view_type = vk_image_storage_view_type(dst_img);
|
|
|
|
assert(aspect_masks_valid(src_img, dst_img, region));
|
|
|
|
struct vk_meta_copy_image_key key = {
|
|
.key_type = VK_META_OBJECT_KEY_COPY_IMAGE_CS,
|
|
.samples = src_img->samples,
|
|
.src.aspects = region->srcSubresource.aspectMask,
|
|
.src.view = img_copy_view_info(vk_image_sampled_view_type(src_img),
|
|
region->srcSubresource.aspectMask, src_img,
|
|
src_props),
|
|
.dst.aspects = region->dstSubresource.aspectMask,
|
|
.dst.view = img_copy_view_info(
|
|
dst_view_type, region->dstSubresource.aspectMask, dst_img, dst_props),
|
|
};
|
|
|
|
uint32_t src_pix_per_tile = src_props->tile_size.width *
|
|
src_props->tile_size.height *
|
|
src_props->tile_size.depth;
|
|
uint32_t dst_pix_per_tile = dst_props->tile_size.width *
|
|
dst_props->tile_size.height *
|
|
dst_props->tile_size.depth;
|
|
enum vk_meta_copy_image_align_policy align_policy;
|
|
|
|
if (src_pix_per_tile >= dst_pix_per_tile) {
|
|
key.wg_size[0] = src_props->tile_size.width;
|
|
key.wg_size[1] = src_props->tile_size.height;
|
|
key.wg_size[2] = src_props->tile_size.depth;
|
|
align_policy = VK_META_COPY_IMAGE_ALIGN_ON_SRC_TILE;
|
|
} else {
|
|
key.wg_size[0] = dst_props->tile_size.width;
|
|
key.wg_size[1] = dst_props->tile_size.height;
|
|
key.wg_size[2] = dst_props->tile_size.depth;
|
|
align_policy = VK_META_COPY_IMAGE_ALIGN_ON_DST_TILE;
|
|
}
|
|
|
|
VkPipelineLayout pipeline_layout;
|
|
VkPipeline pipeline;
|
|
VkResult result = get_copy_image_compute_pipeline(
|
|
dev, meta, &key, &pipeline_layout, &pipeline);
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
|
|
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
|
|
|
result = copy_image_prepare_compute_desc_set(
|
|
cmd, meta, &key, pipeline_layout, src_img, src_image_layout, dst_img,
|
|
dst_image_layout, region);
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
assert(key.wg_size[0] && key.wg_size[1] && key.wg_size[2]);
|
|
|
|
uint32_t wg_count[3] = {0};
|
|
|
|
result = copy_image_prepare_compute_push_const(
|
|
cmd, meta, &key, pipeline_layout, src_img, dst_img, align_policy, region,
|
|
wg_count);
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count[0], wg_count[1],
|
|
wg_count[2]);
|
|
}
|
|
|
|
void
|
|
vk_meta_copy_image(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
|
|
const VkCopyImageInfo2 *info,
|
|
const struct vk_meta_copy_image_properties *src_props,
|
|
const struct vk_meta_copy_image_properties *dst_props,
|
|
VkPipelineBindPoint bind_point)
|
|
{
|
|
VK_FROM_HANDLE(vk_image, src_img, info->srcImage);
|
|
VK_FROM_HANDLE(vk_image, dst_img, info->dstImage);
|
|
|
|
for (uint32_t i = 0; i < info->regionCount; i++) {
|
|
VkImageCopy2 region = info->pRegions[i];
|
|
|
|
region.extent = vk_image_extent_to_elements(src_img, region.extent);
|
|
region.srcOffset = vk_image_offset_to_elements(src_img, region.srcOffset);
|
|
region.dstOffset = vk_image_offset_to_elements(dst_img, region.dstOffset);
|
|
|
|
if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
|
|
copy_image_region_gfx(cmd, meta, src_img, info->srcImageLayout,
|
|
src_props, dst_img, info->dstImageLayout,
|
|
dst_props, ®ion);
|
|
} else {
|
|
copy_image_region_compute(cmd, meta, src_img, info->srcImageLayout,
|
|
src_props, dst_img, info->dstImageLayout,
|
|
dst_props, ®ion);
|
|
}
|
|
}
|
|
}
|
|
|
|
static nir_shader *
|
|
build_copy_buffer_shader(const struct vk_meta_device *meta,
|
|
const void *key_data)
|
|
{
|
|
const struct vk_meta_copy_buffer_key *key = key_data;
|
|
nir_builder builder = nir_builder_init_simple_shader(
|
|
MESA_SHADER_COMPUTE, NULL, "vk-meta-copy-buffer");
|
|
nir_builder *b = &builder;
|
|
|
|
b->shader->info.workgroup_size[0] =
|
|
vk_meta_buffer_access_wg_size(meta, key->chunk_size);
|
|
b->shader->info.workgroup_size[1] = 1;
|
|
b->shader->info.workgroup_size[2] = 1;
|
|
|
|
uint32_t chunk_bit_size, chunk_comp_count;
|
|
|
|
assert(util_is_power_of_two_nonzero(key->chunk_size));
|
|
if (key->chunk_size <= 4) {
|
|
chunk_bit_size = key->chunk_size * 8;
|
|
chunk_comp_count = 1;
|
|
} else {
|
|
chunk_bit_size = 32;
|
|
chunk_comp_count = key->chunk_size / 4;
|
|
}
|
|
|
|
assert(chunk_comp_count < NIR_MAX_VEC_COMPONENTS);
|
|
|
|
nir_def *global_id = nir_load_global_invocation_id(b, 32);
|
|
nir_def *copy_id = nir_channel(b, global_id, 0);
|
|
nir_def *offset = nir_imul_imm(b, copy_id, key->chunk_size);
|
|
nir_def *size = load_info(b, struct vk_meta_copy_buffer_info, size);
|
|
|
|
nir_push_if(b, nir_ult(b, offset, size));
|
|
|
|
offset = nir_u2u64(b, offset);
|
|
|
|
nir_def *src_addr = load_info(b, struct vk_meta_copy_buffer_info, src_addr);
|
|
nir_def *dst_addr = nir_load_push_constant(b, 1, 64, nir_imm_int(b, 8));
|
|
nir_def *data = nir_load_global(b, chunk_comp_count, chunk_bit_size,
|
|
nir_iadd(b, src_addr, offset),
|
|
.align_mul = chunk_bit_size / 8);
|
|
|
|
nir_store_global(b, data, nir_iadd(b, dst_addr, offset),
|
|
.align_mul = key->chunk_size);
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
return b->shader;
|
|
}
|
|
|
|
static VkResult
|
|
get_copy_buffer_pipeline(struct vk_device *device, struct vk_meta_device *meta,
|
|
const struct vk_meta_copy_buffer_key *key,
|
|
VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
|
|
{
|
|
VkResult result = get_copy_pipeline_layout(
|
|
device, meta, VK_META_OBJECT_KEY_COPY_BUFFER,
|
|
VK_SHADER_STAGE_COMPUTE_BIT, sizeof(struct vk_meta_copy_buffer_info),
|
|
NULL, 0, layout_out);
|
|
|
|
if (unlikely(result != VK_SUCCESS))
|
|
return result;
|
|
|
|
return get_compute_copy_pipeline(device, meta, *layout_out,
|
|
build_copy_buffer_shader, key, sizeof(*key),
|
|
pipeline_out);
|
|
}
|
|
|
|
static void
|
|
copy_buffer_region(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
|
|
VkBuffer src, VkBuffer dst, const VkBufferCopy2 *region)
|
|
{
|
|
struct vk_device *dev = cmd->base.device;
|
|
const struct vk_physical_device *pdev = dev->physical;
|
|
const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
|
|
VkResult result;
|
|
|
|
struct vk_meta_copy_buffer_key key = {
|
|
.key_type = VK_META_OBJECT_KEY_COPY_BUFFER,
|
|
};
|
|
|
|
VkDeviceSize size = region->size;
|
|
VkDeviceAddress src_addr =
|
|
vk_meta_buffer_address(dev, src, region->srcOffset, size);
|
|
VkDeviceAddress dst_addr =
|
|
vk_meta_buffer_address(dev, dst, region->dstOffset, size);
|
|
|
|
/* Combine the size and src/dst address to extract the alignment. */
|
|
uint64_t align = src_addr | dst_addr | size;
|
|
|
|
assert(align != 0);
|
|
|
|
/* Pick the first power-of-two of the combined src/dst address and size as
|
|
* our alignment. We limit the chunk size to 16 bytes (a uvec4) for now.
|
|
*/
|
|
key.chunk_size = MIN2(16, 1 << (ffs(align) - 1));
|
|
|
|
VkPipelineLayout pipeline_layout;
|
|
VkPipeline pipeline;
|
|
result =
|
|
get_copy_buffer_pipeline(dev, meta, &key, &pipeline_layout, &pipeline);
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
|
|
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
|
|
|
const uint32_t optimal_wg_size =
|
|
vk_meta_buffer_access_wg_size(meta, key.chunk_size);
|
|
const uint32_t per_wg_copy_size = optimal_wg_size * key.chunk_size;
|
|
uint32_t max_per_dispatch_size =
|
|
pdev->properties.maxComputeWorkGroupCount[0] * per_wg_copy_size;
|
|
|
|
assert(optimal_wg_size <= pdev->properties.maxComputeWorkGroupSize[0]);
|
|
|
|
while (size) {
|
|
struct vk_meta_copy_buffer_info args = {
|
|
.size = MIN2(size, max_per_dispatch_size),
|
|
.src_addr = src_addr,
|
|
.dst_addr = dst_addr,
|
|
};
|
|
uint32_t wg_count = DIV_ROUND_UP(args.size, per_wg_copy_size);
|
|
|
|
disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
|
|
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args),
|
|
&args);
|
|
|
|
disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count, 1, 1);
|
|
|
|
src_addr += args.size;
|
|
dst_addr += args.size;
|
|
size -= args.size;
|
|
}
|
|
}
|
|
|
|
void
|
|
vk_meta_copy_buffer(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
|
|
const VkCopyBufferInfo2 *info)
|
|
{
|
|
for (unsigned i = 0; i < info->regionCount; i++) {
|
|
const VkBufferCopy2 *region = &info->pRegions[i];
|
|
|
|
copy_buffer_region(cmd, meta, info->srcBuffer, info->dstBuffer, region);
|
|
}
|
|
}
|
|
|
|
void
|
|
vk_meta_update_buffer(struct vk_command_buffer *cmd,
|
|
struct vk_meta_device *meta, VkBuffer buffer,
|
|
VkDeviceSize offset, VkDeviceSize size, const void *data)
|
|
{
|
|
VkResult result;
|
|
|
|
const VkBufferCreateInfo tmp_buffer_info = {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
|
.size = size,
|
|
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
|
.queueFamilyIndexCount = 1,
|
|
.pQueueFamilyIndices = &cmd->pool->queue_family_index,
|
|
};
|
|
|
|
VkBuffer tmp_buffer;
|
|
result = vk_meta_create_buffer(cmd, meta, &tmp_buffer_info, &tmp_buffer);
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
void *tmp_buffer_map;
|
|
result = meta->cmd_bind_map_buffer(cmd, meta, tmp_buffer, &tmp_buffer_map);
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
memcpy(tmp_buffer_map, data, size);
|
|
|
|
const VkBufferCopy2 copy_region = {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
|
|
.srcOffset = 0,
|
|
.dstOffset = offset,
|
|
.size = size,
|
|
};
|
|
const VkCopyBufferInfo2 copy_info = {
|
|
.sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2,
|
|
.srcBuffer = tmp_buffer,
|
|
.dstBuffer = buffer,
|
|
.regionCount = 1,
|
|
.pRegions = ©_region,
|
|
};
|
|
|
|
vk_meta_copy_buffer(cmd, meta, ©_info);
|
|
}
|
|
|
|
static nir_shader *
|
|
build_fill_buffer_shader(const struct vk_meta_device *meta,
|
|
UNUSED const void *key_data)
|
|
{
|
|
nir_builder builder = nir_builder_init_simple_shader(
|
|
MESA_SHADER_COMPUTE, NULL, "vk-meta-fill-buffer");
|
|
nir_builder *b = &builder;
|
|
|
|
b->shader->info.workgroup_size[0] = vk_meta_buffer_access_wg_size(meta, 4);
|
|
b->shader->info.workgroup_size[1] = 1;
|
|
b->shader->info.workgroup_size[2] = 1;
|
|
|
|
nir_def *global_id = nir_load_global_invocation_id(b, 32);
|
|
nir_def *copy_id = nir_channel(b, global_id, 0);
|
|
nir_def *offset = nir_imul_imm(b, copy_id, 4);
|
|
nir_def *size = load_info(b, struct vk_meta_fill_buffer_info, size);
|
|
nir_def *data = load_info(b, struct vk_meta_fill_buffer_info, data);
|
|
|
|
nir_push_if(b, nir_ult(b, offset, size));
|
|
|
|
offset = nir_u2u64(b, offset);
|
|
|
|
nir_def *buf_addr =
|
|
load_info(b, struct vk_meta_fill_buffer_info, buf_addr);
|
|
|
|
nir_store_global(b, data, nir_iadd(b, buf_addr, offset),
|
|
.align_mul = 4);
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
return b->shader;
|
|
}
|
|
|
|
static VkResult
|
|
get_fill_buffer_pipeline(struct vk_device *device, struct vk_meta_device *meta,
|
|
const struct vk_meta_fill_buffer_key *key,
|
|
VkPipelineLayout *layout_out, VkPipeline *pipeline_out)
|
|
{
|
|
VkResult result = get_copy_pipeline_layout(
|
|
device, meta, VK_META_OBJECT_KEY_FILL_BUFFER,
|
|
VK_SHADER_STAGE_COMPUTE_BIT, sizeof(struct vk_meta_fill_buffer_info), NULL, 0,
|
|
layout_out);
|
|
if (unlikely(result != VK_SUCCESS))
|
|
return result;
|
|
|
|
return get_compute_copy_pipeline(device, meta, *layout_out,
|
|
build_fill_buffer_shader, key, sizeof(*key),
|
|
pipeline_out);
|
|
}
|
|
|
|
void
|
|
vk_meta_fill_buffer(struct vk_command_buffer *cmd, struct vk_meta_device *meta,
|
|
VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size,
|
|
uint32_t data)
|
|
{
|
|
VK_FROM_HANDLE(vk_buffer, buf, buffer);
|
|
struct vk_device *dev = cmd->base.device;
|
|
const struct vk_physical_device *pdev = dev->physical;
|
|
const struct vk_device_dispatch_table *disp = &dev->dispatch_table;
|
|
VkResult result;
|
|
|
|
struct vk_meta_fill_buffer_key key = {
|
|
.key_type = VK_META_OBJECT_KEY_FILL_BUFFER,
|
|
};
|
|
|
|
VkPipelineLayout pipeline_layout;
|
|
VkPipeline pipeline;
|
|
result =
|
|
get_fill_buffer_pipeline(dev, meta, &key, &pipeline_layout, &pipeline);
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(cmd, result);
|
|
return;
|
|
}
|
|
|
|
disp->CmdBindPipeline(vk_command_buffer_to_handle(cmd),
|
|
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
|
|
|
/* From the Vulkan 1.3.290 spec:
|
|
*
|
|
* "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not a
|
|
* multiple of 4, then the nearest smaller multiple is used."
|
|
*
|
|
* hence the mask to align the size on 4 bytes here.
|
|
*/
|
|
size = vk_buffer_range(buf, offset, size) & ~3u;
|
|
|
|
const uint32_t optimal_wg_size = vk_meta_buffer_access_wg_size(meta, 4);
|
|
const uint32_t per_wg_copy_size = optimal_wg_size * 4;
|
|
uint32_t max_per_dispatch_size =
|
|
pdev->properties.maxComputeWorkGroupCount[0] * per_wg_copy_size;
|
|
|
|
while (size > 0) {
|
|
struct vk_meta_fill_buffer_info args = {
|
|
.size = MIN2(size, max_per_dispatch_size),
|
|
.buf_addr = vk_meta_buffer_address(dev, buffer, offset, size),
|
|
.data = data,
|
|
};
|
|
uint32_t wg_count = DIV_ROUND_UP(args.size, per_wg_copy_size);
|
|
|
|
disp->CmdPushConstants(vk_command_buffer_to_handle(cmd), pipeline_layout,
|
|
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args),
|
|
&args);
|
|
|
|
disp->CmdDispatch(vk_command_buffer_to_handle(cmd), wg_count, 1, 1);
|
|
|
|
offset += args.size;
|
|
size -= args.size;
|
|
}
|
|
}
|