mesa/src/broadcom/vulkan/v3dv_image.c
Iago Toral Quiroga 7f3e79ef9f v3dv: don't swap R/B channels for VK_FORMAT_R5B6G5_UNORM_PACK16
This corresponds to PIPE_FORMAT_B5G6R5_UNORM, which is the format that
is natively supported. Also, we can't swap R/B on 3-channel images!

Also, we should rely on the v3dv format table for this rather than
pipe format descriptions since we specify the expected correct swizzles
there for all supported formats. This, for example, gets us correct
beahvior for things like VK_FORMAT_B4G4R4A4_UNORM_PACK16 without
needing to special case it.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
2020-10-13 21:21:27 +00:00

545 lines
19 KiB
C

/*
* Copyright © 2019 Raspberry Pi
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "v3dv_private.h"
#include "broadcom/cle/v3dx_pack.h"
#include "drm-uapi/drm_fourcc.h"
#include "util/format/u_format.h"
#include "util/u_math.h"
#include "vk_format_info.h"
#include "vk_util.h"
#include "vulkan/wsi/wsi_common.h"
/* These are tunable parameters in the HW design, but all the V3D
* implementations agree.
*/
#define VC5_UIFCFG_BANKS 8
#define VC5_UIFCFG_PAGE_SIZE 4096
#define VC5_UIFCFG_XOR_VALUE (1 << 4)
#define VC5_PAGE_CACHE_SIZE (VC5_UIFCFG_PAGE_SIZE * VC5_UIFCFG_BANKS)
#define VC5_UBLOCK_SIZE 64
#define VC5_UIFBLOCK_SIZE (4 * VC5_UBLOCK_SIZE)
#define VC5_UIFBLOCK_ROW_SIZE (4 * VC5_UIFBLOCK_SIZE)
#define PAGE_UB_ROWS (VC5_UIFCFG_PAGE_SIZE / VC5_UIFBLOCK_ROW_SIZE)
#define PAGE_UB_ROWS_TIMES_1_5 ((PAGE_UB_ROWS * 3) >> 1)
#define PAGE_CACHE_UB_ROWS (VC5_PAGE_CACHE_SIZE / VC5_UIFBLOCK_ROW_SIZE)
#define PAGE_CACHE_MINUS_1_5_UB_ROWS (PAGE_CACHE_UB_ROWS - PAGE_UB_ROWS_TIMES_1_5)
/**
* Computes the HW's UIFblock padding for a given height/cpp.
*
* The goal of the padding is to keep pages of the same color (bank number) at
* least half a page away from each other vertically when crossing between
* columns of UIF blocks.
*/
static uint32_t
v3d_get_ub_pad(uint32_t cpp, uint32_t height)
{
uint32_t utile_h = v3d_utile_height(cpp);
uint32_t uif_block_h = utile_h * 2;
uint32_t height_ub = height / uif_block_h;
uint32_t height_offset_in_pc = height_ub % PAGE_CACHE_UB_ROWS;
/* For the perfectly-aligned-for-UIF-XOR case, don't add any pad. */
if (height_offset_in_pc == 0)
return 0;
/* Try padding up to where we're offset by at least half a page. */
if (height_offset_in_pc < PAGE_UB_ROWS_TIMES_1_5) {
/* If we fit entirely in the page cache, don't pad. */
if (height_ub < PAGE_CACHE_UB_ROWS)
return 0;
else
return PAGE_UB_ROWS_TIMES_1_5 - height_offset_in_pc;
}
/* If we're close to being aligned to page cache size, then round up
* and rely on XOR.
*/
if (height_offset_in_pc > PAGE_CACHE_MINUS_1_5_UB_ROWS)
return PAGE_CACHE_UB_ROWS - height_offset_in_pc;
/* Otherwise, we're far enough away (top and bottom) to not need any
* padding.
*/
return 0;
}
static void
v3d_setup_slices(struct v3dv_image *image)
{
assert(image->cpp > 0);
uint32_t width = image->extent.width;
uint32_t height = image->extent.height;
uint32_t depth = image->extent.depth;
/* Note that power-of-two padding is based on level 1. These are not
* equivalent to just util_next_power_of_two(dimension), because at a
* level 0 dimension of 9, the level 1 power-of-two padded value is 4,
* not 8.
*/
uint32_t pot_width = 2 * util_next_power_of_two(u_minify(width, 1));
uint32_t pot_height = 2 * util_next_power_of_two(u_minify(height, 1));
uint32_t pot_depth = 2 * util_next_power_of_two(u_minify(depth, 1));
uint32_t utile_w = v3d_utile_width(image->cpp);
uint32_t utile_h = v3d_utile_height(image->cpp);
uint32_t uif_block_w = utile_w * 2;
uint32_t uif_block_h = utile_h * 2;
uint32_t block_width = vk_format_get_blockwidth(image->vk_format);
uint32_t block_height = vk_format_get_blockheight(image->vk_format);
bool msaa = image->samples > VK_SAMPLE_COUNT_1_BIT;
bool uif_top = msaa;
assert(image->array_size > 0);
assert(depth > 0);
assert(image->levels >= 1);
uint32_t offset = 0;
for (int32_t i = image->levels - 1; i >= 0; i--) {
struct v3d_resource_slice *slice = &image->slices[i];
uint32_t level_width, level_height, level_depth;
if (i < 2) {
level_width = u_minify(width, i);
level_height = u_minify(height, i);
} else {
level_width = u_minify(pot_width, i);
level_height = u_minify(pot_height, i);
}
if (i < 1)
level_depth = u_minify(depth, i);
else
level_depth = u_minify(pot_depth, i);
if (msaa) {
level_width *= 2;
level_height *= 2;
}
level_width = DIV_ROUND_UP(level_width, block_width);
level_height = DIV_ROUND_UP(level_height, block_height);
if (!image->tiled) {
slice->tiling = VC5_TILING_RASTER;
if (image->type == VK_IMAGE_TYPE_1D)
level_width = align(level_width, 64 / image->cpp);
} else {
if ((i != 0 || !uif_top) &&
(level_width <= utile_w || level_height <= utile_h)) {
slice->tiling = VC5_TILING_LINEARTILE;
level_width = align(level_width, utile_w);
level_height = align(level_height, utile_h);
} else if ((i != 0 || !uif_top) && level_width <= uif_block_w) {
slice->tiling = VC5_TILING_UBLINEAR_1_COLUMN;
level_width = align(level_width, uif_block_w);
level_height = align(level_height, uif_block_h);
} else if ((i != 0 || !uif_top) && level_width <= 2 * uif_block_w) {
slice->tiling = VC5_TILING_UBLINEAR_2_COLUMN;
level_width = align(level_width, 2 * uif_block_w);
level_height = align(level_height, uif_block_h);
} else {
/* We align the width to a 4-block column of UIF blocks, but we
* only align height to UIF blocks.
*/
level_width = align(level_width, 4 * uif_block_w);
level_height = align(level_height, uif_block_h);
slice->ub_pad = v3d_get_ub_pad(image->cpp, level_height);
level_height += slice->ub_pad * uif_block_h;
/* If the padding set us to to be aligned to the page cache size,
* then the HW will use the XOR bit on odd columns to get us
* perfectly misaligned.
*/
if ((level_height / uif_block_h) %
(VC5_PAGE_CACHE_SIZE / VC5_UIFBLOCK_ROW_SIZE) == 0) {
slice->tiling = VC5_TILING_UIF_XOR;
} else {
slice->tiling = VC5_TILING_UIF_NO_XOR;
}
}
}
slice->offset = offset;
slice->stride = level_width * image->cpp;
slice->padded_height = level_height;
if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
slice->tiling == VC5_TILING_UIF_XOR) {
slice->padded_height_of_output_image_in_uif_blocks =
slice->padded_height / (2 * v3d_utile_height(image->cpp));
}
slice->size = level_height * slice->stride;
uint32_t slice_total_size = slice->size * level_depth;
/* The HW aligns level 1's base to a page if any of level 1 or
* below could be UIF XOR. The lower levels then inherit the
* alignment for as long as necesary, thanks to being power of
* two aligned.
*/
if (i == 1 &&
level_width > 4 * uif_block_w &&
level_height > PAGE_CACHE_MINUS_1_5_UB_ROWS * uif_block_h) {
slice_total_size = align(slice_total_size, VC5_UIFCFG_PAGE_SIZE);
}
offset += slice_total_size;
}
image->size = offset;
/* UIF/UBLINEAR levels need to be aligned to UIF-blocks, and LT only
* needs to be aligned to utile boundaries. Since tiles are laid out
* from small to big in memory, we need to align the later UIF slices
* to UIF blocks, if they were preceded by non-UIF-block-aligned LT
* slices.
*
* We additionally align to 4k, which improves UIF XOR performance.
*/
image->alignment = 4096;
uint32_t page_align_offset =
align(image->slices[0].offset, image->alignment) - image->slices[0].offset;
if (page_align_offset) {
image->size += page_align_offset;
for (int i = 0; i < image->levels; i++)
image->slices[i].offset += page_align_offset;
}
/* Arrays and cube textures have a stride which is the distance from
* one full mipmap tree to the next (64b aligned). For 3D textures,
* we need to program the stride between slices of miplevel 0.
*/
if (image->type != VK_IMAGE_TYPE_3D) {
image->cube_map_stride =
align(image->slices[0].offset + image->slices[0].size, 64);
image->size += image->cube_map_stride * (image->array_size - 1);
} else {
image->cube_map_stride = image->slices[0].size;
}
}
uint32_t
v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer)
{
const struct v3d_resource_slice *slice = &image->slices[level];
if (image->type == VK_IMAGE_TYPE_3D)
return slice->offset + layer * slice->size;
else
return slice->offset + layer * image->cube_map_stride;
}
VkResult
v3dv_CreateImage(VkDevice _device,
const VkImageCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkImage *pImage)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
struct v3dv_image *image = NULL;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
v3dv_assert(pCreateInfo->mipLevels > 0);
v3dv_assert(pCreateInfo->arrayLayers > 0);
v3dv_assert(pCreateInfo->samples > 0);
v3dv_assert(pCreateInfo->extent.width > 0);
v3dv_assert(pCreateInfo->extent.height > 0);
v3dv_assert(pCreateInfo->extent.depth > 0);
/* When using the simulator the WSI common code will see that our
* driver wsi device doesn't match the display device and because of that
* it will not attempt to present directly from the swapchain images,
* instead it will use the prime blit path (use_prime_blit flag in
* struct wsi_swapchain), where it copies the contents of the swapchain
* images to a linear buffer with appropriate row stride for presentation.
* As a result, on that path, swapchain images do not have any special
* requirements and are not created with the pNext structs below.
*/
uint64_t modifier = DRM_FORMAT_MOD_INVALID;
if (pCreateInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
const VkImageDrmFormatModifierListCreateInfoEXT *mod_info =
vk_find_struct_const(pCreateInfo->pNext,
IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
assert(mod_info);
for (uint32_t i = 0; i < mod_info->drmFormatModifierCount; i++) {
switch (mod_info->pDrmFormatModifiers[i]) {
case DRM_FORMAT_MOD_LINEAR:
if (modifier == DRM_FORMAT_MOD_INVALID)
modifier = DRM_FORMAT_MOD_LINEAR;
break;
case DRM_FORMAT_MOD_BROADCOM_UIF:
modifier = DRM_FORMAT_MOD_BROADCOM_UIF;
break;
}
}
} else {
const struct wsi_image_create_info *wsi_info =
vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
if (wsi_info)
modifier = DRM_FORMAT_MOD_LINEAR;
else
modifier = DRM_FORMAT_MOD_BROADCOM_UIF;
}
/* 1D and 1D_ARRAY textures are always raster-order */
if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D)
modifier = DRM_FORMAT_MOD_LINEAR;
assert(modifier != DRM_FORMAT_MOD_INVALID);
const struct v3dv_format *format = v3dv_get_format(pCreateInfo->format);
v3dv_assert(format != NULL && format->supported);
image = vk_zalloc2(&device->alloc, pAllocator, sizeof(*image), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!image)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
image->type = pCreateInfo->imageType;
image->extent = pCreateInfo->extent;
image->vk_format = pCreateInfo->format;
image->format = format;
image->aspects = vk_format_aspects(image->vk_format);
image->levels = pCreateInfo->mipLevels;
image->array_size = pCreateInfo->arrayLayers;
image->samples = pCreateInfo->samples;
image->usage = pCreateInfo->usage;
image->create_flags = pCreateInfo->flags;
image->tiling = pCreateInfo->tiling;
image->drm_format_mod = modifier;
image->tiled = image->drm_format_mod != DRM_FORMAT_MOD_LINEAR;
image->cpp = vk_format_get_blocksize(image->vk_format);
v3d_setup_slices(image);
*pImage = v3dv_image_to_handle(image);
return VK_SUCCESS;
}
void
v3dv_GetImageSubresourceLayout(VkDevice device,
VkImage _image,
const VkImageSubresource *subresource,
VkSubresourceLayout *layout)
{
V3DV_FROM_HANDLE(v3dv_image, image, _image);
const struct v3d_resource_slice *slice =
&image->slices[subresource->mipLevel];
layout->offset = slice->offset;
layout->rowPitch = slice->stride;
layout->depthPitch = image->cube_map_stride;
layout->arrayPitch = image->cube_map_stride;
layout->size = slice->size;
}
VkResult
v3dv_GetImageDrmFormatModifierPropertiesEXT(
VkDevice device,
VkImage _image,
VkImageDrmFormatModifierPropertiesEXT *pProperties)
{
V3DV_FROM_HANDLE(v3dv_image, image, _image);
assert(pProperties->sType =
VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT);
pProperties->drmFormatModifier = image->drm_format_mod;
return VK_SUCCESS;
}
void
v3dv_DestroyImage(VkDevice _device,
VkImage _image,
const VkAllocationCallbacks* pAllocator)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
V3DV_FROM_HANDLE(v3dv_image, image, _image);
vk_free2(&device->alloc, pAllocator, image);
}
VkResult
v3dv_CreateImageView(VkDevice _device,
const VkImageViewCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkImageView *pView)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
V3DV_FROM_HANDLE(v3dv_image, image, pCreateInfo->image);
struct v3dv_image_view *iview;
iview = vk_zalloc2(&device->alloc, pAllocator, sizeof(*iview), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (iview == NULL)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
assert(range->layerCount > 0);
assert(range->baseMipLevel < image->levels);
#ifdef DEBUG
switch (image->type) {
case VK_IMAGE_TYPE_1D:
case VK_IMAGE_TYPE_2D:
assert(range->baseArrayLayer + v3dv_layer_count(image, range) - 1 <=
image->array_size);
break;
case VK_IMAGE_TYPE_3D:
assert(range->baseArrayLayer + v3dv_layer_count(image, range) - 1
<= u_minify(image->extent.depth, range->baseMipLevel));
break;
default:
unreachable("bad VkImageType");
}
#endif
iview->image = image;
iview->aspects = range->aspectMask;
iview->base_level = range->baseMipLevel;
iview->extent = (VkExtent3D) {
.width = u_minify(image->extent.width , iview->base_level),
.height = u_minify(image->extent.height, iview->base_level),
.depth = u_minify(image->extent.depth , iview->base_level),
};
iview->first_layer = range->baseArrayLayer;
iview->last_layer = range->baseArrayLayer +
v3dv_layer_count(image, range) - 1;
iview->offset =
v3dv_layer_offset(image, iview->base_level, iview->first_layer);
iview->tiling = image->slices[0].tiling;
iview->vk_format = pCreateInfo->format;
iview->format = v3dv_get_format(pCreateInfo->format);
assert(iview->format && iview->format->supported);
iview->swap_rb = iview->format->swizzle[0] == PIPE_SWIZZLE_Z;
/* FIXME: should we just move this to
* v3dv_get_internal_type_bpp_for_output_format instead?
*/
if (vk_format_is_depth_or_stencil(iview->vk_format)) {
switch (iview->vk_format) {
case VK_FORMAT_D16_UNORM:
iview->internal_type = V3D_INTERNAL_TYPE_DEPTH_16;
break;
case VK_FORMAT_D32_SFLOAT:
iview->internal_type = V3D_INTERNAL_TYPE_DEPTH_32F;
break;
case VK_FORMAT_X8_D24_UNORM_PACK32:
case VK_FORMAT_D24_UNORM_S8_UINT:
iview->internal_type = V3D_INTERNAL_TYPE_DEPTH_24;
break;
default:
assert(!"unsupported format");
break;
}
} else {
v3dv_get_internal_type_bpp_for_output_format(iview->format->rt_type,
&iview->internal_type,
&iview->internal_bpp);
}
*pView = v3dv_image_view_to_handle(iview);
return VK_SUCCESS;
}
void
v3dv_DestroyImageView(VkDevice _device,
VkImageView imageView,
const VkAllocationCallbacks* pAllocator)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
V3DV_FROM_HANDLE(v3dv_image_view, image_view, imageView);
vk_free2(&device->alloc, pAllocator, image_view);
}
VkResult
v3dv_CreateBufferView(VkDevice _device,
const VkBufferViewCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkBufferView *pView)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
const struct v3dv_buffer *buffer =
v3dv_buffer_from_handle(pCreateInfo->buffer);
struct v3dv_buffer_view *view =
vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!view)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
uint32_t range;
if (pCreateInfo->range == VK_WHOLE_SIZE)
range = buffer->size - pCreateInfo->offset;
else
range = pCreateInfo->range;
enum pipe_format pipe_format = vk_format_to_pipe_format(pCreateInfo->format);
uint32_t num_elements = range / util_format_get_blocksize(pipe_format);
view->buffer = buffer;
view->offset = pCreateInfo->offset;
view->size = view->offset + range;
view->num_elements = num_elements;
view->vk_format = pCreateInfo->format;
view->format = v3dv_get_format(view->vk_format);
v3dv_get_internal_type_bpp_for_output_format(view->format->rt_type,
&view->internal_type,
&view->internal_bpp);
*pView = v3dv_buffer_view_to_handle(view);
return VK_SUCCESS;
}
void
v3dv_DestroyBufferView(VkDevice _device,
VkBufferView bufferView,
const VkAllocationCallbacks *pAllocator)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
V3DV_FROM_HANDLE(v3dv_buffer_view, buffer_view, bufferView);
vk_free2(&device->alloc, pAllocator, buffer_view);
}