vk/meta: Add copy/fill/update helpers

Add buffer copy/fill/update helpers using compute shaders. The driver
can select the optimal per-workgroup copy/fill/update size by specifying
a non-zero vk_meta_device::buffer_access::optimal_size_per_wg size.
If zero, the core will assume a 64-byte size (the usual cache-line size).

Buffer accesses will be done through SSBOs unless
vk_meta_device::buffer_access::use_global_address is true, in which
case the core will the buffer address using GetBufferDeviceAddress()
and pass that address as a push constant to the compute shader.

Image to buffer copies are always done through a compute shader. The
optimal workgroup size will be chosen based on
vk_meta_copy_image_properties::tile_size: the copy logic picks a
workgroup size matching the tile size, and aligns accesses on a tile.
The view format is selected by the driver. To optimize things on the
shader side, pick UINT formats (usually less work to do to pack data).

Buffer to image copies can be done done through the graphics pipeline
if needed (use_gfx_pipeline passed to vk_meta_copy_buffer_to_image()),
which is useful for vendor-specific compressed formats that can't be
written outside of the graphics pipeline. Drivers should normally prefer
compute-based copies when that's an option. Just like for image to buffer
copies, the workgroup size of compute shaders is picked based on the
image tile size, and the view format must be selected by the driver.

Image to image copies is just a mix of the above, with the driver being
able to select the pipeline type, as well as define the tile size and
view format to use. When using a compute pipeline, the workgroup size
will be MAX2(src_tile_sz, dst_tile_sz), and accesses will be aligned
on the selected reference image.

For compressed formats, the caller should pick an RGBA format matching
the compressed block size.

Co-developed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29333>
This commit is contained in:
Constantine Shablia 2024-05-22 18:37:02 +02:00 committed by Marge Bot
parent dad5c1184f
commit 07c6459cd8
5 changed files with 2703 additions and 0 deletions

View file

@ -258,6 +258,7 @@ vulkan_runtime_files = files(
'vk_meta.c',
'vk_meta_blit_resolve.c',
'vk_meta_clear.c',
'vk_meta_copy_fill_update.c',
'vk_meta_draw_rects.c',
'vk_nir.c',
'vk_nir_convert_ycbcr.c',

View file

@ -24,6 +24,7 @@
#include "vk_meta_object_list.h"
#include "vk_meta_private.h"
#include "vk_buffer.h"
#include "vk_command_buffer.h"
#include "vk_device.h"
#include "vk_pipeline.h"
@ -554,3 +555,22 @@ vk_meta_create_buffer_view(struct vk_command_buffer *cmd,
(uint64_t)*buffer_view_out);
return VK_SUCCESS;
}
VkDeviceAddress
vk_meta_buffer_address(struct vk_device *device, VkBuffer buffer,
uint64_t offset, uint64_t range)
{
const VkBufferDeviceAddressInfo info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
.buffer = buffer,
};
VkDeviceAddress base = device->dispatch_table.GetBufferDeviceAddress(
vk_device_to_handle(device), &info);
/* Only called for the assert()s in vk_buffer_range(), we don't care about
* the result.
*/
vk_buffer_range(vk_buffer_from_handle(buffer), offset, range);
return base + offset;
}

View file

@ -28,6 +28,8 @@
#include "util/simple_mtx.h"
#include "compiler/nir/nir.h"
#ifdef __cplusplus
extern "C" {
#endif
@ -47,6 +49,55 @@ struct vk_meta_rect {
#define VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA (VkPrimitiveTopology)11
#define VK_IMAGE_VIEW_CREATE_INTERNAL_MESA (VkImageViewCreateFlagBits)0x40000000
struct vk_meta_copy_image_properties {
union {
struct {
/* Format to use for the image view of a color aspect.
* Format must not be compressed and be in the RGB/sRGB colorspace.
*/
VkFormat view_format;
} color;
struct {
struct {
/* Format to use for the image view of a depth aspect.
* Format must not be compressed and be in the RGB/sRGB colorspace.
*/
VkFormat view_format;
/* Describe the depth/stencil componant layout. Bits in the mask
* must be consecutive and match the original depth bit size.
*/
uint8_t component_mask;
} depth;
struct {
/* Format to use for the image view of a stencil aspect.
* Format must not be compressed and be in the RGB/sRGB colorspace.
*/
VkFormat view_format;
/* Describe the depth/stencil componant layout. Bits in the mask
* must be consecutive and match the original depth bit size.
*/
uint8_t component_mask;
} stencil;
};
};
/* Size of the image tile. Used to select the optimal workgroup size. */
VkExtent3D tile_size;
};
enum vk_meta_buffer_chunk_size_id {
VK_META_BUFFER_1_BYTE_CHUNK = 0,
VK_META_BUFFER_2_BYTE_CHUNK,
VK_META_BUFFER_4_BYTE_CHUNK,
VK_META_BUFFER_8_BYTE_CHUNK,
VK_META_BUFFER_16_BYTE_CHUNK,
VK_META_BUFFER_CHUNK_SIZE_COUNT,
};
struct vk_meta_device {
struct hash_table *cache;
simple_mtx_t cache_mtx;
@ -56,6 +107,16 @@ struct vk_meta_device {
bool use_gs_for_layer;
bool use_stencil_export;
struct {
/* Optimal workgroup size for each possible chunk size. This should be
* chosen to keep things cache-friendly (something big enough to maximize
* cache hits on executing threads, but small enough to not trash the
* cache) while keeping GPU utilization high enough to not make copies
* fast enough.
*/
uint32_t optimal_wg_size[VK_META_BUFFER_CHUNK_SIZE_COUNT];
} buffer_access;
VkResult (*cmd_bind_map_buffer)(struct vk_command_buffer *cmd,
struct vk_meta_device *meta,
VkBuffer buffer,
@ -72,6 +133,19 @@ struct vk_meta_device {
uint32_t layer_count);
};
static inline uint32_t
vk_meta_buffer_access_wg_size(const struct vk_meta_device *meta,
uint32_t chunk_size)
{
assert(util_is_power_of_two_nonzero(chunk_size));
unsigned idx = ffs(chunk_size) - 1;
assert(idx < ARRAY_SIZE(meta->buffer_access.optimal_wg_size));
assert(meta->buffer_access.optimal_wg_size[idx] != 0);
return meta->buffer_access.optimal_wg_size[idx];
}
VkResult vk_meta_device_init(struct vk_device *device,
struct vk_meta_device *meta);
void vk_meta_device_finish(struct vk_device *device,
@ -83,6 +157,11 @@ enum vk_meta_object_key_type {
VK_META_OBJECT_KEY_CLEAR_PIPELINE,
VK_META_OBJECT_KEY_BLIT_PIPELINE,
VK_META_OBJECT_KEY_BLIT_SAMPLER,
VK_META_OBJECT_KEY_COPY_BUFFER_PIPELINE,
VK_META_OBJECT_KEY_COPY_IMAGE_TO_BUFFER_PIPELINE,
VK_META_OBJECT_KEY_COPY_BUFFER_TO_IMAGE_PIPELINE,
VK_META_OBJECT_KEY_COPY_IMAGE_PIPELINE,
VK_META_OBJECT_KEY_FILL_BUFFER_PIPELINE,
};
uint64_t vk_meta_lookup_object(struct vk_meta_device *meta,
@ -192,6 +271,9 @@ VkResult vk_meta_create_buffer_view(struct vk_command_buffer *cmd,
struct vk_meta_device *meta,
const VkBufferViewCreateInfo *info,
VkBufferView *buffer_view_out);
#define VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA 0x80000000
VkResult vk_meta_create_image_view(struct vk_command_buffer *cmd,
struct vk_meta_device *meta,
const VkImageViewCreateInfo *info,
@ -273,6 +355,41 @@ void vk_meta_resolve_rendering(struct vk_command_buffer *cmd,
struct vk_meta_device *meta,
const VkRenderingInfo *pRenderingInfo);
VkDeviceAddress vk_meta_buffer_address(struct vk_device *device,
VkBuffer buffer, uint64_t offset,
uint64_t range);
void vk_meta_copy_buffer(struct vk_command_buffer *cmd,
struct vk_meta_device *meta,
const VkCopyBufferInfo2 *info);
void vk_meta_copy_image_to_buffer(
struct vk_command_buffer *cmd, struct vk_meta_device *meta,
const VkCopyImageToBufferInfo2 *info,
const struct vk_meta_copy_image_properties *img_props);
void vk_meta_copy_buffer_to_image(
struct vk_command_buffer *cmd, struct vk_meta_device *meta,
const VkCopyBufferToImageInfo2 *info,
const struct vk_meta_copy_image_properties *img_props,
VkPipelineBindPoint bind_point);
void vk_meta_copy_image(struct vk_command_buffer *cmd,
struct vk_meta_device *meta,
const VkCopyImageInfo2 *info,
const struct vk_meta_copy_image_properties *src_props,
const struct vk_meta_copy_image_properties *dst_props,
VkPipelineBindPoint bind_point);
void vk_meta_update_buffer(struct vk_command_buffer *cmd,
struct vk_meta_device *meta, VkBuffer buffer,
VkDeviceSize offset, VkDeviceSize size,
const void *data);
void vk_meta_fill_buffer(struct vk_command_buffer *cmd,
struct vk_meta_device *meta, VkBuffer buffer,
VkDeviceSize offset, VkDeviceSize size, uint32_t data);
#ifdef __cplusplus
}
#endif

File diff suppressed because it is too large Load diff

View file

@ -26,6 +26,8 @@
#include "vk_image.h"
#include "vk_meta.h"
#include "glsl_types.h"
#ifdef __cplusplus
extern "C" {
#endif
@ -83,6 +85,67 @@ vk_image_render_view_type(const struct vk_image *image, uint32_t layer_count)
}
}
static inline VkImageViewType
vk_image_storage_view_type(const struct vk_image *image)
{
switch (image->image_type) {
case VK_IMAGE_TYPE_1D:
return image->array_layers == 1 ? VK_IMAGE_VIEW_TYPE_1D
: VK_IMAGE_VIEW_TYPE_1D_ARRAY;
case VK_IMAGE_TYPE_2D:
return image->array_layers == 1 ? VK_IMAGE_VIEW_TYPE_2D
: VK_IMAGE_VIEW_TYPE_2D_ARRAY;
case VK_IMAGE_TYPE_3D:
return VK_IMAGE_VIEW_TYPE_3D;
default:
unreachable("Invalid image type");
}
}
static inline enum glsl_sampler_dim
vk_image_view_type_to_sampler_dim(VkImageViewType view_type)
{
switch (view_type) {
case VK_IMAGE_VIEW_TYPE_1D:
case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
return GLSL_SAMPLER_DIM_1D;
case VK_IMAGE_VIEW_TYPE_2D:
case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
return GLSL_SAMPLER_DIM_2D;
case VK_IMAGE_VIEW_TYPE_CUBE:
case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
return GLSL_SAMPLER_DIM_CUBE;
case VK_IMAGE_VIEW_TYPE_3D:
return GLSL_SAMPLER_DIM_3D;
default:
unreachable();
}
}
static inline bool
vk_image_view_type_is_array(VkImageViewType view_type)
{
switch (view_type) {
case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
return true;
case VK_IMAGE_VIEW_TYPE_1D:
case VK_IMAGE_VIEW_TYPE_2D:
case VK_IMAGE_VIEW_TYPE_3D:
case VK_IMAGE_VIEW_TYPE_CUBE:
return false;
default:
unreachable();
}
}
#ifdef __cplusplus
}
#endif