mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-21 17:38:08 +02:00
In the C23 standard unreachable() is now a predefined function-like macro in <stddef.h> See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in And this causes build errors when building for C23: ----------------------------------------------------------------------- In file included from ../src/util/log.h:30, from ../src/util/log.c:30: ../src/util/macros.h:123:9: warning: "unreachable" redefined 123 | #define unreachable(str) \ | ^~~~~~~~~~~ In file included from ../src/util/macros.h:31: /usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition 456 | #define unreachable() (__builtin_unreachable ()) | ^~~~~~~~~~~ ----------------------------------------------------------------------- So don't redefine it with the same name, but use the name UNREACHABLE() to also signify it's a macro. Using a different name also makes sense because the behavior of the macro was extending the one of __builtin_unreachable() anyway, and it also had a different signature, accepting one argument, compared to the standard unreachable() with no arguments. This change improves the chances of building mesa with the C23 standard, which for instance is the default in recent AOSP versions. All the instances of the macro, including the definition, were updated with the following command line: git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \ while read file; \ do \ sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \ done && \ sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2306 lines
87 KiB
C
2306 lines
87 KiB
C
/*
|
|
* Copyright © 2022 Imagination Technologies Ltd.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <stdbool.h>
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include <vulkan/vulkan.h>
|
|
|
|
#include "pvr_blit.h"
|
|
#include "pvr_clear.h"
|
|
#include "pvr_csb.h"
|
|
#include "pvr_formats.h"
|
|
#include "pvr_job_transfer.h"
|
|
#include "pvr_private.h"
|
|
#include "usc/programs/pvr_shader_factory.h"
|
|
#include "usc/programs/pvr_static_shaders.h"
|
|
#include "pvr_types.h"
|
|
#include "util/bitscan.h"
|
|
#include "util/list.h"
|
|
#include "util/macros.h"
|
|
#include "util/u_math.h"
|
|
#include "vk_alloc.h"
|
|
#include "vk_command_buffer.h"
|
|
#include "vk_command_pool.h"
|
|
#include "vk_format.h"
|
|
#include "vk_log.h"
|
|
|
|
/* TODO: Investigate where this limit comes from. */
|
|
#define PVR_MAX_TRANSFER_SIZE_IN_TEXELS 2048U
|
|
|
|
static struct pvr_transfer_cmd *
|
|
pvr_transfer_cmd_alloc(struct pvr_cmd_buffer *cmd_buffer)
|
|
{
|
|
struct pvr_transfer_cmd *transfer_cmd;
|
|
|
|
transfer_cmd = vk_zalloc(&cmd_buffer->vk.pool->alloc,
|
|
sizeof(*transfer_cmd),
|
|
8U,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
|
if (!transfer_cmd) {
|
|
vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
return NULL;
|
|
}
|
|
|
|
/* transfer_cmd->mapping_count is already set to zero. */
|
|
transfer_cmd->sources[0].filter = PVR_FILTER_POINT;
|
|
transfer_cmd->sources[0].resolve_op = PVR_RESOLVE_BLEND;
|
|
transfer_cmd->sources[0].addr_mode = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
|
|
transfer_cmd->cmd_buffer = cmd_buffer;
|
|
|
|
return transfer_cmd;
|
|
}
|
|
|
|
static void pvr_setup_buffer_surface(struct pvr_transfer_cmd_surface *surface,
|
|
VkRect2D *rect,
|
|
pvr_dev_addr_t dev_addr,
|
|
VkDeviceSize offset,
|
|
VkFormat vk_format,
|
|
VkFormat image_format,
|
|
uint32_t width,
|
|
uint32_t height,
|
|
uint32_t stride)
|
|
{
|
|
enum pipe_format pformat = vk_format_to_pipe_format(image_format);
|
|
|
|
surface->dev_addr = PVR_DEV_ADDR_OFFSET(dev_addr, offset);
|
|
surface->width = width;
|
|
surface->height = height;
|
|
surface->stride = stride;
|
|
surface->vk_format = vk_format;
|
|
surface->mem_layout = PVR_MEMLAYOUT_LINEAR;
|
|
surface->sample_count = 1;
|
|
|
|
/* Initialize rectangle extent. Also, rectangle.offset should be set to
|
|
* zero, as the offset is already adjusted in the device address above. We
|
|
* don't explicitly set offset to zero as transfer_cmd is zero allocated.
|
|
*/
|
|
rect->extent.width = width;
|
|
rect->extent.height = height;
|
|
|
|
if (util_format_is_compressed(pformat)) {
|
|
uint32_t block_width = util_format_get_blockwidth(pformat);
|
|
uint32_t block_height = util_format_get_blockheight(pformat);
|
|
|
|
surface->width = MAX2(1U, DIV_ROUND_UP(surface->width, block_width));
|
|
surface->height = MAX2(1U, DIV_ROUND_UP(surface->height, block_height));
|
|
surface->stride = MAX2(1U, DIV_ROUND_UP(surface->stride, block_width));
|
|
|
|
rect->offset.x /= block_width;
|
|
rect->offset.y /= block_height;
|
|
rect->extent.width =
|
|
MAX2(1U, DIV_ROUND_UP(rect->extent.width, block_width));
|
|
rect->extent.height =
|
|
MAX2(1U, DIV_ROUND_UP(rect->extent.height, block_height));
|
|
}
|
|
}
|
|
|
|
VkFormat pvr_get_raw_copy_format(VkFormat format)
|
|
{
|
|
switch (vk_format_get_blocksize(format)) {
|
|
case 1:
|
|
return VK_FORMAT_R8_UINT;
|
|
case 2:
|
|
return VK_FORMAT_R8G8_UINT;
|
|
case 3:
|
|
return VK_FORMAT_R8G8B8_UINT;
|
|
case 4:
|
|
return VK_FORMAT_R32_UINT;
|
|
case 6:
|
|
return VK_FORMAT_R16G16B16_UINT;
|
|
case 8:
|
|
return VK_FORMAT_R32G32_UINT;
|
|
case 12:
|
|
return VK_FORMAT_R32G32B32_UINT;
|
|
case 16:
|
|
return VK_FORMAT_R32G32B32A32_UINT;
|
|
default:
|
|
UNREACHABLE("Unhandled copy block size.");
|
|
}
|
|
}
|
|
|
|
static void pvr_setup_transfer_surface(struct pvr_device *device,
|
|
struct pvr_transfer_cmd_surface *surface,
|
|
VkRect2D *rect,
|
|
const struct pvr_image *image,
|
|
uint32_t array_layer,
|
|
uint32_t mip_level,
|
|
const VkOffset3D *offset,
|
|
const VkExtent3D *extent,
|
|
float fdepth,
|
|
VkFormat format,
|
|
VkImageAspectFlags aspect_mask)
|
|
{
|
|
const uint32_t height = MAX2(image->vk.extent.height >> mip_level, 1U);
|
|
const uint32_t width = MAX2(image->vk.extent.width >> mip_level, 1U);
|
|
enum pipe_format image_pformat = vk_format_to_pipe_format(image->vk.format);
|
|
enum pipe_format pformat = vk_format_to_pipe_format(format);
|
|
const VkImageSubresource sub_resource = {
|
|
.aspectMask = aspect_mask,
|
|
.mipLevel = mip_level,
|
|
.arrayLayer = array_layer,
|
|
};
|
|
VkSubresourceLayout info;
|
|
uint32_t depth;
|
|
|
|
if (image->memlayout == PVR_MEMLAYOUT_3DTWIDDLED)
|
|
depth = MAX2(image->vk.extent.depth >> mip_level, 1U);
|
|
else
|
|
depth = 1U;
|
|
|
|
pvr_get_image_subresource_layout(image, &sub_resource, &info);
|
|
|
|
surface->dev_addr = PVR_DEV_ADDR_OFFSET(image->dev_addr, info.offset);
|
|
surface->width = width;
|
|
surface->height = height;
|
|
surface->depth = depth;
|
|
|
|
assert(info.rowPitch % vk_format_get_blocksize(format) == 0);
|
|
surface->stride = info.rowPitch / vk_format_get_blocksize(format);
|
|
|
|
surface->vk_format = format;
|
|
surface->mem_layout = image->memlayout;
|
|
surface->sample_count = image->vk.samples;
|
|
|
|
if (image->memlayout == PVR_MEMLAYOUT_3DTWIDDLED)
|
|
surface->z_position = fdepth;
|
|
else
|
|
surface->dev_addr.addr += info.depthPitch * ((uint32_t)fdepth);
|
|
|
|
rect->offset.x = offset->x;
|
|
rect->offset.y = offset->y;
|
|
rect->extent.width = extent->width;
|
|
rect->extent.height = extent->height;
|
|
|
|
if (util_format_is_compressed(image_pformat) &&
|
|
!util_format_is_compressed(pformat)) {
|
|
uint32_t block_width = util_format_get_blockwidth(image_pformat);
|
|
uint32_t block_height = util_format_get_blockheight(image_pformat);
|
|
|
|
surface->width = MAX2(1U, DIV_ROUND_UP(surface->width, block_width));
|
|
surface->height = MAX2(1U, DIV_ROUND_UP(surface->height, block_height));
|
|
surface->stride = MAX2(1U, DIV_ROUND_UP(surface->stride, block_width));
|
|
|
|
rect->offset.x /= block_width;
|
|
rect->offset.y /= block_height;
|
|
rect->extent.width =
|
|
MAX2(1U, DIV_ROUND_UP(rect->extent.width, block_width));
|
|
rect->extent.height =
|
|
MAX2(1U, DIV_ROUND_UP(rect->extent.height, block_height));
|
|
}
|
|
}
|
|
|
|
void pvr_CmdBlitImage2(VkCommandBuffer commandBuffer,
|
|
const VkBlitImageInfo2 *pBlitImageInfo)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
PVR_FROM_HANDLE(pvr_image, src, pBlitImageInfo->srcImage);
|
|
PVR_FROM_HANDLE(pvr_image, dst, pBlitImageInfo->dstImage);
|
|
struct pvr_device *device = cmd_buffer->device;
|
|
enum pvr_filter filter = PVR_FILTER_DONTCARE;
|
|
|
|
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
|
|
|
if (pBlitImageInfo->filter == VK_FILTER_LINEAR)
|
|
filter = PVR_FILTER_LINEAR;
|
|
|
|
for (uint32_t i = 0U; i < pBlitImageInfo->regionCount; i++) {
|
|
const VkImageBlit2 *region = &pBlitImageInfo->pRegions[i];
|
|
|
|
assert(region->srcSubresource.layerCount ==
|
|
region->dstSubresource.layerCount);
|
|
const bool inverted_dst_z =
|
|
(region->dstOffsets[1].z < region->dstOffsets[0].z);
|
|
const bool inverted_src_z =
|
|
(region->srcOffsets[1].z < region->srcOffsets[0].z);
|
|
const uint32_t min_src_z = inverted_src_z ? region->srcOffsets[1].z
|
|
: region->srcOffsets[0].z;
|
|
const uint32_t max_src_z = inverted_src_z ? region->srcOffsets[0].z
|
|
: region->srcOffsets[1].z;
|
|
const uint32_t min_dst_z = inverted_dst_z ? region->dstOffsets[1].z
|
|
: region->dstOffsets[0].z;
|
|
const uint32_t max_dst_z = inverted_dst_z ? region->dstOffsets[0].z
|
|
: region->dstOffsets[1].z;
|
|
|
|
const uint32_t src_width =
|
|
region->srcOffsets[1].x - region->srcOffsets[0].x;
|
|
const uint32_t src_height =
|
|
region->srcOffsets[1].y - region->srcOffsets[0].y;
|
|
uint32_t dst_width;
|
|
uint32_t dst_height;
|
|
|
|
float initial_depth_offset;
|
|
VkExtent3D src_extent;
|
|
VkExtent3D dst_extent;
|
|
VkOffset3D dst_offset = region->dstOffsets[0];
|
|
float z_slice_stride;
|
|
bool flip_x;
|
|
bool flip_y;
|
|
|
|
if (region->dstOffsets[1].x > region->dstOffsets[0].x) {
|
|
dst_width = region->dstOffsets[1].x - region->dstOffsets[0].x;
|
|
flip_x = false;
|
|
} else {
|
|
dst_width = region->dstOffsets[0].x - region->dstOffsets[1].x;
|
|
flip_x = true;
|
|
dst_offset.x = region->dstOffsets[1].x;
|
|
}
|
|
|
|
if (region->dstOffsets[1].y > region->dstOffsets[0].y) {
|
|
dst_height = region->dstOffsets[1].y - region->dstOffsets[0].y;
|
|
flip_y = false;
|
|
} else {
|
|
dst_height = region->dstOffsets[0].y - region->dstOffsets[1].y;
|
|
flip_y = true;
|
|
dst_offset.y = region->dstOffsets[1].y;
|
|
}
|
|
|
|
/* If any of the extent regions is zero, then reject the blit and
|
|
* continue.
|
|
*/
|
|
if (!src_width || !src_height || !dst_width || !dst_height ||
|
|
!(max_dst_z - min_dst_z) || !(max_src_z - min_src_z)) {
|
|
mesa_loge("BlitImage: Region %i has an area of zero", i);
|
|
continue;
|
|
}
|
|
|
|
src_extent = (VkExtent3D){
|
|
.width = src_width,
|
|
.height = src_height,
|
|
.depth = 0U,
|
|
};
|
|
|
|
dst_extent = (VkExtent3D){
|
|
.width = dst_width,
|
|
.height = dst_height,
|
|
.depth = 0U,
|
|
};
|
|
|
|
/* The z_position of a transfer surface is intended to be in the range
|
|
* of 0.0f <= z_position <= depth. It will be used as a texture coordinate
|
|
* in the source surface for cases where linear filtering is enabled, so
|
|
* the fractional part will need to represent the exact midpoint of a z
|
|
* slice range in the source texture, as it maps to each destination
|
|
* slice.
|
|
*
|
|
* For destination surfaces, the fractional part is discarded, so
|
|
* we can safely pass the slice index.
|
|
*/
|
|
|
|
/* Calculate the ratio of z slices in our source region to that of our
|
|
* destination region, to get the number of z slices in our source region
|
|
* to iterate over for each destination slice.
|
|
*
|
|
* If our destination region is inverted, we iterate backwards.
|
|
*/
|
|
z_slice_stride =
|
|
(inverted_dst_z ? -1.0f : 1.0f) *
|
|
((float)(max_src_z - min_src_z) / (float)(max_dst_z - min_dst_z));
|
|
|
|
/* Offset the initial depth offset by half of the z slice stride, into the
|
|
* blit region's z range.
|
|
*/
|
|
initial_depth_offset =
|
|
(inverted_dst_z ? max_src_z : min_src_z) + (0.5f * z_slice_stride);
|
|
|
|
for (uint32_t j = 0U; j < region->srcSubresource.layerCount; j++) {
|
|
struct pvr_transfer_cmd_surface src_surface = { 0 };
|
|
struct pvr_transfer_cmd_surface dst_surface = { 0 };
|
|
VkRect2D src_rect;
|
|
VkRect2D dst_rect;
|
|
|
|
/* Get the subresource info for the src and dst images, this is
|
|
* required when incrementing the address of the depth slice used by
|
|
* the transfer surface.
|
|
*/
|
|
VkSubresourceLayout src_info, dst_info;
|
|
const VkImageSubresource src_sub_resource = {
|
|
.aspectMask = region->srcSubresource.aspectMask,
|
|
.mipLevel = region->srcSubresource.mipLevel,
|
|
.arrayLayer = region->srcSubresource.baseArrayLayer + j,
|
|
};
|
|
const VkImageSubresource dst_sub_resource = {
|
|
.aspectMask = region->dstSubresource.aspectMask,
|
|
.mipLevel = region->dstSubresource.mipLevel,
|
|
.arrayLayer = region->dstSubresource.baseArrayLayer + j,
|
|
};
|
|
|
|
pvr_get_image_subresource_layout(src, &src_sub_resource, &src_info);
|
|
pvr_get_image_subresource_layout(dst, &dst_sub_resource, &dst_info);
|
|
|
|
/* Setup the transfer surfaces once per image layer, which saves us
|
|
* from repeating subresource queries by manually incrementing the
|
|
* depth slices.
|
|
*/
|
|
pvr_setup_transfer_surface(device,
|
|
&src_surface,
|
|
&src_rect,
|
|
src,
|
|
region->srcSubresource.baseArrayLayer + j,
|
|
region->srcSubresource.mipLevel,
|
|
®ion->srcOffsets[0],
|
|
&src_extent,
|
|
initial_depth_offset,
|
|
src->vk.format,
|
|
region->srcSubresource.aspectMask);
|
|
|
|
pvr_setup_transfer_surface(device,
|
|
&dst_surface,
|
|
&dst_rect,
|
|
dst,
|
|
region->dstSubresource.baseArrayLayer + j,
|
|
region->dstSubresource.mipLevel,
|
|
&dst_offset,
|
|
&dst_extent,
|
|
min_dst_z,
|
|
dst->vk.format,
|
|
region->dstSubresource.aspectMask);
|
|
|
|
for (uint32_t dst_z = min_dst_z; dst_z < max_dst_z; dst_z++) {
|
|
struct pvr_transfer_cmd *transfer_cmd;
|
|
VkResult result;
|
|
|
|
/* TODO: See if we can allocate all the transfer cmds in one go. */
|
|
transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
|
|
if (!transfer_cmd)
|
|
return;
|
|
|
|
transfer_cmd->sources[0].mappings[0].src_rect = src_rect;
|
|
transfer_cmd->sources[0].mappings[0].dst_rect = dst_rect;
|
|
transfer_cmd->sources[0].mappings[0].flip_x = flip_x;
|
|
transfer_cmd->sources[0].mappings[0].flip_y = flip_y;
|
|
transfer_cmd->sources[0].mapping_count++;
|
|
|
|
transfer_cmd->sources[0].surface = src_surface;
|
|
transfer_cmd->sources[0].filter = filter;
|
|
transfer_cmd->source_count = 1;
|
|
|
|
transfer_cmd->dst = dst_surface;
|
|
transfer_cmd->scissor = dst_rect;
|
|
|
|
result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
|
|
if (result != VK_SUCCESS) {
|
|
vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
|
|
return;
|
|
}
|
|
|
|
if (src_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
|
|
src_surface.z_position += z_slice_stride;
|
|
} else {
|
|
src_surface.dev_addr.addr +=
|
|
src_info.depthPitch * ((uint32_t)z_slice_stride);
|
|
}
|
|
|
|
if (dst_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
|
|
dst_surface.z_position += 1.0f;
|
|
else
|
|
dst_surface.dev_addr.addr += dst_info.depthPitch;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static VkFormat pvr_get_copy_format(VkFormat format)
|
|
{
|
|
switch (format) {
|
|
case VK_FORMAT_R8_SNORM:
|
|
return VK_FORMAT_R8_SINT;
|
|
case VK_FORMAT_R8G8_SNORM:
|
|
return VK_FORMAT_R8G8_SINT;
|
|
case VK_FORMAT_R8G8B8_SNORM:
|
|
return VK_FORMAT_R8G8B8_SINT;
|
|
case VK_FORMAT_R8G8B8A8_SNORM:
|
|
return VK_FORMAT_R8G8B8A8_SINT;
|
|
case VK_FORMAT_B8G8R8A8_SNORM:
|
|
return VK_FORMAT_B8G8R8A8_SINT;
|
|
default:
|
|
return format;
|
|
}
|
|
}
|
|
|
|
static void
|
|
pvr_setup_surface_for_image(struct pvr_device *device,
|
|
struct pvr_transfer_cmd_surface *surface,
|
|
VkRect2D *rect,
|
|
const struct pvr_image *image,
|
|
uint32_t array_layer,
|
|
uint32_t array_offset,
|
|
uint32_t mip_level,
|
|
const VkOffset3D *offset,
|
|
const VkExtent3D *extent,
|
|
uint32_t depth,
|
|
VkFormat format,
|
|
const VkImageAspectFlags aspect_mask)
|
|
{
|
|
if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
|
|
pvr_setup_transfer_surface(device,
|
|
surface,
|
|
rect,
|
|
image,
|
|
array_layer + array_offset,
|
|
mip_level,
|
|
offset,
|
|
extent,
|
|
0.0f,
|
|
format,
|
|
aspect_mask);
|
|
} else {
|
|
pvr_setup_transfer_surface(device,
|
|
surface,
|
|
rect,
|
|
image,
|
|
array_layer,
|
|
mip_level,
|
|
offset,
|
|
extent,
|
|
(float)depth,
|
|
format,
|
|
aspect_mask);
|
|
}
|
|
}
|
|
|
|
static VkResult
|
|
pvr_copy_or_resolve_image_region(struct pvr_cmd_buffer *cmd_buffer,
|
|
enum pvr_resolve_op resolve_op,
|
|
const struct pvr_image *src,
|
|
const struct pvr_image *dst,
|
|
const VkImageCopy2 *region)
|
|
{
|
|
enum pipe_format src_pformat = vk_format_to_pipe_format(src->vk.format);
|
|
enum pipe_format dst_pformat = vk_format_to_pipe_format(dst->vk.format);
|
|
bool src_block_compressed = util_format_is_compressed(src_pformat);
|
|
bool dst_block_compressed = util_format_is_compressed(dst_pformat);
|
|
VkExtent3D src_extent;
|
|
VkExtent3D dst_extent;
|
|
VkFormat dst_format;
|
|
VkFormat src_format;
|
|
uint32_t dst_layers;
|
|
uint32_t src_layers;
|
|
uint32_t max_slices;
|
|
uint32_t flags = 0U;
|
|
|
|
if (src->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
|
|
region->srcSubresource.aspectMask !=
|
|
(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
|
|
/* Takes the stencil of the source and the depth of the destination and
|
|
* combines the two interleaved.
|
|
*/
|
|
flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE;
|
|
|
|
if (region->srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
|
|
/* Takes the depth of the source and the stencil of the destination and
|
|
* combines the two interleaved.
|
|
*/
|
|
flags |= PVR_TRANSFER_CMD_FLAGS_PICKD;
|
|
}
|
|
}
|
|
|
|
src_extent = region->extent;
|
|
dst_extent = region->extent;
|
|
|
|
if (src_block_compressed && !dst_block_compressed) {
|
|
uint32_t block_width = util_format_get_blockwidth(src_pformat);
|
|
uint32_t block_height = util_format_get_blockheight(src_pformat);
|
|
|
|
dst_extent.width = MAX2(1U, DIV_ROUND_UP(src_extent.width, block_width));
|
|
dst_extent.height =
|
|
MAX2(1U, DIV_ROUND_UP(src_extent.height, block_height));
|
|
} else if (!src_block_compressed && dst_block_compressed) {
|
|
uint32_t block_width = util_format_get_blockwidth(dst_pformat);
|
|
uint32_t block_height = util_format_get_blockheight(dst_pformat);
|
|
|
|
dst_extent.width = MAX2(1U, src_extent.width * block_width);
|
|
dst_extent.height = MAX2(1U, src_extent.height * block_height);
|
|
}
|
|
|
|
if (src->vk.samples > dst->vk.samples) {
|
|
/* Resolve op needs to know the actual format. */
|
|
dst_format = dst->vk.format;
|
|
} else {
|
|
/* We don't care what format dst is as it's guaranteed to be size
|
|
* compatible with src.
|
|
*/
|
|
dst_format = pvr_get_raw_copy_format(src->vk.format);
|
|
}
|
|
src_format = dst_format;
|
|
|
|
src_layers =
|
|
vk_image_subresource_layer_count(&src->vk, ®ion->srcSubresource);
|
|
dst_layers =
|
|
vk_image_subresource_layer_count(&dst->vk, ®ion->dstSubresource);
|
|
|
|
/* srcSubresource.layerCount must match layerCount of dstSubresource in
|
|
* copies not involving 3D images. In copies involving 3D images, if there is
|
|
* a 2D image it's layerCount.
|
|
*/
|
|
max_slices = MAX3(src_layers, dst_layers, region->extent.depth);
|
|
|
|
for (uint32_t i = 0U; i < max_slices; i++) {
|
|
struct pvr_transfer_cmd *transfer_cmd;
|
|
VkResult result;
|
|
|
|
transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
|
|
if (!transfer_cmd)
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
transfer_cmd->flags |= flags;
|
|
transfer_cmd->sources[0].resolve_op = resolve_op;
|
|
|
|
pvr_setup_surface_for_image(
|
|
cmd_buffer->device,
|
|
&transfer_cmd->sources[0].surface,
|
|
&transfer_cmd->sources[0].mappings[0U].src_rect,
|
|
src,
|
|
region->srcSubresource.baseArrayLayer,
|
|
i,
|
|
region->srcSubresource.mipLevel,
|
|
®ion->srcOffset,
|
|
&src_extent,
|
|
region->srcOffset.z + i,
|
|
src_format,
|
|
region->srcSubresource.aspectMask);
|
|
|
|
pvr_setup_surface_for_image(cmd_buffer->device,
|
|
&transfer_cmd->dst,
|
|
&transfer_cmd->scissor,
|
|
dst,
|
|
region->dstSubresource.baseArrayLayer,
|
|
i,
|
|
region->dstSubresource.mipLevel,
|
|
®ion->dstOffset,
|
|
&dst_extent,
|
|
region->dstOffset.z + i,
|
|
dst_format,
|
|
region->dstSubresource.aspectMask);
|
|
|
|
transfer_cmd->sources[0].mappings[0U].dst_rect = transfer_cmd->scissor;
|
|
transfer_cmd->sources[0].mapping_count++;
|
|
transfer_cmd->source_count = 1;
|
|
|
|
result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
|
|
if (result != VK_SUCCESS) {
|
|
vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
|
|
return result;
|
|
}
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
VkResult
|
|
pvr_copy_or_resolve_color_image_region(struct pvr_cmd_buffer *cmd_buffer,
|
|
const struct pvr_image *src,
|
|
const struct pvr_image *dst,
|
|
const VkImageCopy2 *region)
|
|
{
|
|
enum pvr_resolve_op resolve_op = PVR_RESOLVE_BLEND;
|
|
|
|
if (src->vk.samples > 1U && dst->vk.samples < 2U) {
|
|
/* Integer resolve picks a single sample. */
|
|
if (vk_format_is_int(src->vk.format))
|
|
resolve_op = PVR_RESOLVE_SAMPLE0;
|
|
}
|
|
|
|
return pvr_copy_or_resolve_image_region(cmd_buffer,
|
|
resolve_op,
|
|
src,
|
|
dst,
|
|
region);
|
|
}
|
|
|
|
static bool pvr_can_merge_ds_regions(const VkImageCopy2 *pRegionA,
|
|
const VkImageCopy2 *pRegionB)
|
|
{
|
|
assert(pRegionA->srcSubresource.aspectMask != 0U);
|
|
assert(pRegionB->srcSubresource.aspectMask != 0U);
|
|
|
|
if (!((pRegionA->srcSubresource.aspectMask ^
|
|
pRegionB->srcSubresource.aspectMask) &
|
|
(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
|
|
return false;
|
|
}
|
|
|
|
/* Assert if aspectMask mismatch between src and dst, given it's a depth and
|
|
* stencil image so not multi-planar and from the Vulkan 1.0.223 spec:
|
|
*
|
|
* If neither srcImage nor dstImage has a multi-planar image format then
|
|
* for each element of pRegions, srcSubresource.aspectMask and
|
|
* dstSubresource.aspectMask must match.
|
|
*/
|
|
assert(pRegionA->srcSubresource.aspectMask ==
|
|
pRegionA->dstSubresource.aspectMask);
|
|
assert(pRegionB->srcSubresource.aspectMask ==
|
|
pRegionB->dstSubresource.aspectMask);
|
|
|
|
if (!(pRegionA->srcSubresource.mipLevel ==
|
|
pRegionB->srcSubresource.mipLevel &&
|
|
pRegionA->srcSubresource.baseArrayLayer ==
|
|
pRegionB->srcSubresource.baseArrayLayer &&
|
|
pRegionA->srcSubresource.layerCount ==
|
|
pRegionB->srcSubresource.layerCount)) {
|
|
return false;
|
|
}
|
|
|
|
if (!(pRegionA->dstSubresource.mipLevel ==
|
|
pRegionB->dstSubresource.mipLevel &&
|
|
pRegionA->dstSubresource.baseArrayLayer ==
|
|
pRegionB->dstSubresource.baseArrayLayer &&
|
|
pRegionA->dstSubresource.layerCount ==
|
|
pRegionB->dstSubresource.layerCount)) {
|
|
return false;
|
|
}
|
|
|
|
if (!(pRegionA->srcOffset.x == pRegionB->srcOffset.x &&
|
|
pRegionA->srcOffset.y == pRegionB->srcOffset.y &&
|
|
pRegionA->srcOffset.z == pRegionB->srcOffset.z)) {
|
|
return false;
|
|
}
|
|
|
|
if (!(pRegionA->dstOffset.x == pRegionB->dstOffset.x &&
|
|
pRegionA->dstOffset.y == pRegionB->dstOffset.y &&
|
|
pRegionA->dstOffset.z == pRegionB->dstOffset.z)) {
|
|
return false;
|
|
}
|
|
|
|
if (!(pRegionA->extent.width == pRegionB->extent.width &&
|
|
pRegionA->extent.height == pRegionB->extent.height &&
|
|
pRegionA->extent.depth == pRegionB->extent.depth)) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void pvr_CmdCopyImage2(VkCommandBuffer commandBuffer,
|
|
const VkCopyImageInfo2 *pCopyImageInfo)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
PVR_FROM_HANDLE(pvr_image, src, pCopyImageInfo->srcImage);
|
|
PVR_FROM_HANDLE(pvr_image, dst, pCopyImageInfo->dstImage);
|
|
|
|
const bool can_merge_ds = src->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
|
|
dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT;
|
|
|
|
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
|
|
|
for (uint32_t i = 0U; i < pCopyImageInfo->regionCount; i++) {
|
|
VkResult result;
|
|
|
|
/* If an application has split a copy between D24S8 images into two
|
|
* separate copy regions (one for the depth aspect and one for the
|
|
* stencil aspect) attempt to merge the two regions back into one blit.
|
|
*
|
|
* This can only be merged if both regions are identical apart from the
|
|
* aspectMask, one of which has to be depth and the other has to be
|
|
* stencil.
|
|
*
|
|
* Only attempt to merge consecutive regions, ignore the case of merging
|
|
* non-consecutive regions.
|
|
*/
|
|
if (can_merge_ds && i != (pCopyImageInfo->regionCount - 1)) {
|
|
const bool ret =
|
|
pvr_can_merge_ds_regions(&pCopyImageInfo->pRegions[i],
|
|
&pCopyImageInfo->pRegions[i + 1]);
|
|
if (ret) {
|
|
VkImageCopy2 region = pCopyImageInfo->pRegions[i];
|
|
|
|
region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT |
|
|
VK_IMAGE_ASPECT_STENCIL_BIT;
|
|
region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT |
|
|
VK_IMAGE_ASPECT_STENCIL_BIT;
|
|
|
|
result = pvr_copy_or_resolve_color_image_region(cmd_buffer,
|
|
src,
|
|
dst,
|
|
®ion);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
|
|
/* Skip the next region as it has been processed with the last
|
|
* region.
|
|
*/
|
|
i++;
|
|
|
|
continue;
|
|
}
|
|
}
|
|
|
|
result =
|
|
pvr_copy_or_resolve_color_image_region(cmd_buffer,
|
|
src,
|
|
dst,
|
|
&pCopyImageInfo->pRegions[i]);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
}
|
|
|
|
VkResult
|
|
pvr_copy_buffer_to_image_region_format(struct pvr_cmd_buffer *const cmd_buffer,
|
|
const pvr_dev_addr_t buffer_dev_addr,
|
|
const struct pvr_image *const image,
|
|
const VkBufferImageCopy2 *const region,
|
|
const VkFormat src_format,
|
|
const VkFormat dst_format,
|
|
const uint32_t flags)
|
|
{
|
|
enum pipe_format pformat = vk_format_to_pipe_format(dst_format);
|
|
uint32_t row_length_in_texels;
|
|
uint32_t buffer_slice_size;
|
|
uint32_t buffer_layer_size;
|
|
uint32_t height_in_blks;
|
|
uint32_t row_length;
|
|
|
|
if (region->bufferRowLength == 0)
|
|
row_length_in_texels = region->imageExtent.width;
|
|
else
|
|
row_length_in_texels = region->bufferRowLength;
|
|
|
|
if (region->bufferImageHeight == 0)
|
|
height_in_blks = region->imageExtent.height;
|
|
else
|
|
height_in_blks = region->bufferImageHeight;
|
|
|
|
if (util_format_is_compressed(pformat)) {
|
|
uint32_t block_width = util_format_get_blockwidth(pformat);
|
|
uint32_t block_height = util_format_get_blockheight(pformat);
|
|
uint32_t block_size = util_format_get_blocksize(pformat);
|
|
|
|
height_in_blks = DIV_ROUND_UP(height_in_blks, block_height);
|
|
row_length_in_texels =
|
|
DIV_ROUND_UP(row_length_in_texels, block_width) * block_size;
|
|
}
|
|
|
|
row_length = row_length_in_texels * vk_format_get_blocksize(src_format);
|
|
|
|
buffer_slice_size = height_in_blks * row_length;
|
|
buffer_layer_size = buffer_slice_size * region->imageExtent.depth;
|
|
|
|
for (uint32_t i = 0; i < region->imageExtent.depth; i++) {
|
|
const uint32_t depth = i + (uint32_t)region->imageOffset.z;
|
|
|
|
for (uint32_t j = 0; j < region->imageSubresource.layerCount; j++) {
|
|
const VkDeviceSize buffer_offset = region->bufferOffset +
|
|
(j * buffer_layer_size) +
|
|
(i * buffer_slice_size);
|
|
struct pvr_transfer_cmd *transfer_cmd;
|
|
VkResult result;
|
|
|
|
transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
|
|
if (!transfer_cmd)
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
transfer_cmd->flags = flags;
|
|
|
|
pvr_setup_buffer_surface(
|
|
&transfer_cmd->sources[0].surface,
|
|
&transfer_cmd->sources[0].mappings[0].src_rect,
|
|
buffer_dev_addr,
|
|
buffer_offset,
|
|
src_format,
|
|
image->vk.format,
|
|
region->imageExtent.width,
|
|
region->imageExtent.height,
|
|
row_length_in_texels);
|
|
|
|
transfer_cmd->sources[0].surface.depth = 1;
|
|
transfer_cmd->source_count = 1;
|
|
|
|
pvr_setup_transfer_surface(cmd_buffer->device,
|
|
&transfer_cmd->dst,
|
|
&transfer_cmd->scissor,
|
|
image,
|
|
region->imageSubresource.baseArrayLayer + j,
|
|
region->imageSubresource.mipLevel,
|
|
®ion->imageOffset,
|
|
®ion->imageExtent,
|
|
depth,
|
|
dst_format,
|
|
region->imageSubresource.aspectMask);
|
|
|
|
transfer_cmd->sources[0].mappings[0].dst_rect = transfer_cmd->scissor;
|
|
transfer_cmd->sources[0].mapping_count++;
|
|
|
|
result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
|
|
if (result != VK_SUCCESS) {
|
|
vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
|
|
return result;
|
|
}
|
|
}
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
VkResult
|
|
pvr_copy_buffer_to_image_region(struct pvr_cmd_buffer *const cmd_buffer,
|
|
const pvr_dev_addr_t buffer_dev_addr,
|
|
const struct pvr_image *const image,
|
|
const VkBufferImageCopy2 *const region)
|
|
{
|
|
const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask;
|
|
VkFormat src_format;
|
|
VkFormat dst_format;
|
|
uint32_t flags = 0;
|
|
|
|
if (vk_format_has_depth(image->vk.format) &&
|
|
vk_format_has_stencil(image->vk.format)) {
|
|
flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE;
|
|
|
|
if ((aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0) {
|
|
src_format = vk_format_stencil_only(image->vk.format);
|
|
} else {
|
|
src_format = vk_format_depth_only(image->vk.format);
|
|
flags |= PVR_TRANSFER_CMD_FLAGS_PICKD;
|
|
}
|
|
|
|
dst_format = image->vk.format;
|
|
} else {
|
|
src_format = pvr_get_raw_copy_format(image->vk.format);
|
|
dst_format = src_format;
|
|
}
|
|
|
|
return pvr_copy_buffer_to_image_region_format(cmd_buffer,
|
|
buffer_dev_addr,
|
|
image,
|
|
region,
|
|
src_format,
|
|
dst_format,
|
|
flags);
|
|
}
|
|
|
|
void pvr_CmdCopyBufferToImage2(
|
|
VkCommandBuffer commandBuffer,
|
|
const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferToImageInfo->srcBuffer);
|
|
PVR_FROM_HANDLE(pvr_image, dst, pCopyBufferToImageInfo->dstImage);
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
|
|
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
|
|
|
for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++) {
|
|
const VkResult result =
|
|
pvr_copy_buffer_to_image_region(cmd_buffer,
|
|
src->dev_addr,
|
|
dst,
|
|
&pCopyBufferToImageInfo->pRegions[i]);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
}
|
|
|
|
VkResult
|
|
pvr_copy_image_to_buffer_region_format(struct pvr_cmd_buffer *const cmd_buffer,
|
|
const struct pvr_image *const image,
|
|
const pvr_dev_addr_t buffer_dev_addr,
|
|
const VkBufferImageCopy2 *const region,
|
|
const VkFormat src_format,
|
|
const VkFormat dst_format)
|
|
{
|
|
enum pipe_format pformat = vk_format_to_pipe_format(image->vk.format);
|
|
struct pvr_transfer_cmd_surface dst_surface = { 0 };
|
|
VkImageSubresource sub_resource;
|
|
uint32_t buffer_image_height;
|
|
uint32_t buffer_row_length;
|
|
uint32_t buffer_slice_size;
|
|
uint32_t max_array_layers;
|
|
VkRect2D dst_rect = { 0 };
|
|
uint32_t max_depth_slice;
|
|
VkSubresourceLayout info;
|
|
|
|
/* Only images with VK_SAMPLE_COUNT_1_BIT can be copied to buffer. */
|
|
assert(image->vk.samples == 1);
|
|
|
|
if (region->bufferRowLength == 0)
|
|
buffer_row_length = region->imageExtent.width;
|
|
else
|
|
buffer_row_length = region->bufferRowLength;
|
|
|
|
if (region->bufferImageHeight == 0)
|
|
buffer_image_height = region->imageExtent.height;
|
|
else
|
|
buffer_image_height = region->bufferImageHeight;
|
|
|
|
max_array_layers =
|
|
region->imageSubresource.baseArrayLayer +
|
|
vk_image_subresource_layer_count(&image->vk, ®ion->imageSubresource);
|
|
|
|
buffer_slice_size = buffer_image_height * buffer_row_length *
|
|
vk_format_get_blocksize(dst_format);
|
|
|
|
max_depth_slice = region->imageExtent.depth + region->imageOffset.z;
|
|
|
|
pvr_setup_buffer_surface(&dst_surface,
|
|
&dst_rect,
|
|
buffer_dev_addr,
|
|
region->bufferOffset,
|
|
dst_format,
|
|
image->vk.format,
|
|
buffer_row_length,
|
|
buffer_image_height,
|
|
buffer_row_length);
|
|
|
|
dst_rect.extent.width = region->imageExtent.width;
|
|
dst_rect.extent.height = region->imageExtent.height;
|
|
|
|
if (util_format_is_compressed(pformat)) {
|
|
uint32_t block_width = util_format_get_blockwidth(pformat);
|
|
uint32_t block_height = util_format_get_blockheight(pformat);
|
|
|
|
dst_rect.extent.width =
|
|
MAX2(1U, DIV_ROUND_UP(dst_rect.extent.width, block_width));
|
|
dst_rect.extent.height =
|
|
MAX2(1U, DIV_ROUND_UP(dst_rect.extent.height, block_height));
|
|
}
|
|
|
|
sub_resource = (VkImageSubresource){
|
|
.aspectMask = region->imageSubresource.aspectMask,
|
|
.mipLevel = region->imageSubresource.mipLevel,
|
|
.arrayLayer = region->imageSubresource.baseArrayLayer,
|
|
};
|
|
|
|
pvr_get_image_subresource_layout(image, &sub_resource, &info);
|
|
|
|
for (uint32_t i = region->imageSubresource.baseArrayLayer;
|
|
i < max_array_layers;
|
|
i++) {
|
|
struct pvr_transfer_cmd_surface src_surface = { 0 };
|
|
VkRect2D src_rect = { 0 };
|
|
|
|
/* Note: Set the depth to the initial depth offset, the memory address (or
|
|
* the z_position) for the depth slice will be incremented manually in the
|
|
* loop below.
|
|
*/
|
|
pvr_setup_transfer_surface(cmd_buffer->device,
|
|
&src_surface,
|
|
&src_rect,
|
|
image,
|
|
i,
|
|
region->imageSubresource.mipLevel,
|
|
®ion->imageOffset,
|
|
®ion->imageExtent,
|
|
region->imageOffset.z,
|
|
src_format,
|
|
region->imageSubresource.aspectMask);
|
|
|
|
for (uint32_t j = region->imageOffset.z; j < max_depth_slice; j++) {
|
|
struct pvr_transfer_cmd *transfer_cmd;
|
|
VkResult result;
|
|
|
|
/* TODO: See if we can allocate all the transfer cmds in one go. */
|
|
transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
|
|
if (!transfer_cmd)
|
|
return vk_error(cmd_buffer->device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
transfer_cmd->sources[0].mappings[0].src_rect = src_rect;
|
|
transfer_cmd->sources[0].mappings[0].dst_rect = dst_rect;
|
|
transfer_cmd->sources[0].mapping_count++;
|
|
|
|
transfer_cmd->sources[0].surface = src_surface;
|
|
transfer_cmd->source_count = 1;
|
|
|
|
transfer_cmd->dst = dst_surface;
|
|
transfer_cmd->scissor = dst_rect;
|
|
|
|
result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
|
|
if (result != VK_SUCCESS) {
|
|
vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
|
|
return result;
|
|
}
|
|
|
|
dst_surface.dev_addr.addr += buffer_slice_size;
|
|
|
|
if (src_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
|
|
src_surface.z_position += 1.0f;
|
|
else
|
|
src_surface.dev_addr.addr += info.depthPitch;
|
|
}
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
VkResult
|
|
pvr_copy_image_to_buffer_region(struct pvr_cmd_buffer *const cmd_buffer,
|
|
const struct pvr_image *const image,
|
|
const pvr_dev_addr_t buffer_dev_addr,
|
|
const VkBufferImageCopy2 *const region)
|
|
{
|
|
const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask;
|
|
|
|
VkFormat src_format = pvr_get_copy_format(image->vk.format);
|
|
VkFormat dst_format;
|
|
|
|
/* Color and depth aspect copies can be done using an appropriate raw format.
|
|
*/
|
|
if (aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT)) {
|
|
src_format = pvr_get_raw_copy_format(src_format);
|
|
dst_format = src_format;
|
|
} else if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
|
|
/* From the Vulkan spec:
|
|
*
|
|
* Data copied to or from the stencil aspect of any depth/stencil
|
|
* format is tightly packed with one VK_FORMAT_S8_UINT value per texel.
|
|
*/
|
|
dst_format = VK_FORMAT_S8_UINT;
|
|
} else {
|
|
/* YUV Planes require specific formats. */
|
|
dst_format = src_format;
|
|
}
|
|
|
|
return pvr_copy_image_to_buffer_region_format(cmd_buffer,
|
|
image,
|
|
buffer_dev_addr,
|
|
region,
|
|
src_format,
|
|
dst_format);
|
|
}
|
|
|
|
void pvr_CmdCopyImageToBuffer2(
|
|
VkCommandBuffer commandBuffer,
|
|
const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_buffer, dst, pCopyImageToBufferInfo->dstBuffer);
|
|
PVR_FROM_HANDLE(pvr_image, src, pCopyImageToBufferInfo->srcImage);
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
|
|
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
|
|
|
for (uint32_t i = 0U; i < pCopyImageToBufferInfo->regionCount; i++) {
|
|
const VkBufferImageCopy2 *region = &pCopyImageToBufferInfo->pRegions[i];
|
|
|
|
const VkResult result = pvr_copy_image_to_buffer_region(cmd_buffer,
|
|
src,
|
|
dst->dev_addr,
|
|
region);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
}
|
|
|
|
static void pvr_calc_mip_level_extents(const struct pvr_image *image,
|
|
uint16_t mip_level,
|
|
VkExtent3D *extent_out)
|
|
{
|
|
/* 3D textures are clamped to 4x4x4. */
|
|
const uint32_t clamp = (image->vk.image_type == VK_IMAGE_TYPE_3D) ? 4 : 1;
|
|
const VkExtent3D *extent = &image->vk.extent;
|
|
|
|
extent_out->width = MAX2(extent->width >> mip_level, clamp);
|
|
extent_out->height = MAX2(extent->height >> mip_level, clamp);
|
|
extent_out->depth = MAX2(extent->depth >> mip_level, clamp);
|
|
}
|
|
|
|
static VkResult pvr_clear_image_range(struct pvr_cmd_buffer *cmd_buffer,
|
|
const struct pvr_image *image,
|
|
const VkClearColorValue *pColor,
|
|
const VkImageSubresourceRange *psRange,
|
|
uint32_t flags)
|
|
{
|
|
const uint32_t layer_count =
|
|
vk_image_subresource_layer_count(&image->vk, psRange);
|
|
const uint32_t max_layers = psRange->baseArrayLayer + layer_count;
|
|
VkFormat format = image->vk.format;
|
|
const VkOffset3D offset = { 0 };
|
|
VkExtent3D mip_extent;
|
|
|
|
assert((psRange->baseArrayLayer + layer_count) <= image->vk.array_layers);
|
|
|
|
for (uint32_t layer = psRange->baseArrayLayer; layer < max_layers; layer++) {
|
|
const uint32_t level_count =
|
|
vk_image_subresource_level_count(&image->vk, psRange);
|
|
const uint32_t max_level = psRange->baseMipLevel + level_count;
|
|
|
|
assert((psRange->baseMipLevel + level_count) <= image->vk.mip_levels);
|
|
|
|
for (uint32_t level = psRange->baseMipLevel; level < max_level; level++) {
|
|
pvr_calc_mip_level_extents(image, level, &mip_extent);
|
|
|
|
for (uint32_t depth = 0; depth < mip_extent.depth; depth++) {
|
|
struct pvr_transfer_cmd *transfer_cmd;
|
|
VkResult result;
|
|
|
|
transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
|
|
if (!transfer_cmd)
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
transfer_cmd->flags |= flags;
|
|
transfer_cmd->flags |= PVR_TRANSFER_CMD_FLAGS_FILL;
|
|
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(transfer_cmd->clear_color); i++)
|
|
transfer_cmd->clear_color[i].ui = pColor->uint32[i];
|
|
|
|
pvr_setup_transfer_surface(cmd_buffer->device,
|
|
&transfer_cmd->dst,
|
|
&transfer_cmd->scissor,
|
|
image,
|
|
layer,
|
|
level,
|
|
&offset,
|
|
&mip_extent,
|
|
depth,
|
|
format,
|
|
psRange->aspectMask);
|
|
|
|
result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
|
|
if (result != VK_SUCCESS) {
|
|
vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
|
|
return result;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
void pvr_CmdClearColorImage(VkCommandBuffer commandBuffer,
|
|
VkImage _image,
|
|
VkImageLayout imageLayout,
|
|
const VkClearColorValue *pColor,
|
|
uint32_t rangeCount,
|
|
const VkImageSubresourceRange *pRanges)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
PVR_FROM_HANDLE(pvr_image, image, _image);
|
|
|
|
for (uint32_t i = 0; i < rangeCount; i++) {
|
|
const VkResult result =
|
|
pvr_clear_image_range(cmd_buffer, image, pColor, &pRanges[i], 0);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
}
|
|
|
|
void pvr_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
|
|
VkImage _image,
|
|
VkImageLayout imageLayout,
|
|
const VkClearDepthStencilValue *pDepthStencil,
|
|
uint32_t rangeCount,
|
|
const VkImageSubresourceRange *pRanges)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
PVR_FROM_HANDLE(pvr_image, image, _image);
|
|
|
|
for (uint32_t i = 0; i < rangeCount; i++) {
|
|
const VkImageAspectFlags ds_aspect = VK_IMAGE_ASPECT_DEPTH_BIT |
|
|
VK_IMAGE_ASPECT_STENCIL_BIT;
|
|
VkClearColorValue clear_ds = { 0 };
|
|
uint32_t flags = 0U;
|
|
VkResult result;
|
|
|
|
if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
|
|
pRanges[i].aspectMask != ds_aspect) {
|
|
/* A depth or stencil blit to a packed_depth_stencil requires a merge
|
|
* operation.
|
|
*/
|
|
flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE;
|
|
|
|
if (pRanges[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
|
flags |= PVR_TRANSFER_CMD_FLAGS_PICKD;
|
|
}
|
|
|
|
clear_ds.float32[0] = pDepthStencil->depth;
|
|
clear_ds.uint32[1] = pDepthStencil->stencil;
|
|
|
|
result =
|
|
pvr_clear_image_range(cmd_buffer, image, &clear_ds, pRanges + i, flags);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
}
|
|
|
|
static VkResult pvr_cmd_copy_buffer_region(struct pvr_cmd_buffer *cmd_buffer,
|
|
pvr_dev_addr_t src_addr,
|
|
VkDeviceSize src_offset,
|
|
pvr_dev_addr_t dst_addr,
|
|
VkDeviceSize dst_offset,
|
|
VkDeviceSize size,
|
|
uint32_t fill_data,
|
|
bool is_fill)
|
|
{
|
|
VkDeviceSize offset = 0;
|
|
|
|
while (offset < size) {
|
|
const VkDeviceSize remaining_size = size - offset;
|
|
struct pvr_transfer_cmd *transfer_cmd;
|
|
uint32_t src_align = (src_addr.addr + offset + src_offset) & 0xF;
|
|
uint32_t dst_align = (dst_addr.addr + offset + src_offset) & 0xF;
|
|
uint32_t texel_width;
|
|
VkDeviceSize texels;
|
|
VkFormat vk_format;
|
|
VkResult result;
|
|
uint32_t height;
|
|
uint32_t width;
|
|
|
|
if (is_fill) {
|
|
vk_format = VK_FORMAT_R32_UINT;
|
|
texel_width = 4U;
|
|
} else if (remaining_size >= 16U && (src_align % 16U) == 0 &&
|
|
(dst_align % 16U) == 0) {
|
|
/* Only if address is 128bpp aligned */
|
|
vk_format = VK_FORMAT_R32G32B32A32_UINT;
|
|
texel_width = 16U;
|
|
} else if (remaining_size >= 4U) {
|
|
vk_format = VK_FORMAT_R32_UINT;
|
|
texel_width = 4U;
|
|
} else {
|
|
vk_format = VK_FORMAT_R8_UINT;
|
|
texel_width = 1U;
|
|
}
|
|
|
|
texels = remaining_size / texel_width;
|
|
|
|
/* Try to do max-width rects, fall back to a 1-height rect for the
|
|
* remainder.
|
|
*/
|
|
if (texels > PVR_MAX_TRANSFER_SIZE_IN_TEXELS) {
|
|
width = PVR_MAX_TRANSFER_SIZE_IN_TEXELS;
|
|
height = texels / PVR_MAX_TRANSFER_SIZE_IN_TEXELS;
|
|
height = MIN2(height, PVR_MAX_TRANSFER_SIZE_IN_TEXELS);
|
|
} else {
|
|
width = texels;
|
|
height = 1;
|
|
}
|
|
|
|
transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
|
|
if (!transfer_cmd)
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
if (!is_fill) {
|
|
pvr_setup_buffer_surface(
|
|
&transfer_cmd->sources[0].surface,
|
|
&transfer_cmd->sources[0].mappings[0].src_rect,
|
|
src_addr,
|
|
offset + src_offset,
|
|
vk_format,
|
|
vk_format,
|
|
width,
|
|
height,
|
|
width);
|
|
transfer_cmd->source_count = 1;
|
|
} else {
|
|
transfer_cmd->flags |= PVR_TRANSFER_CMD_FLAGS_FILL;
|
|
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(transfer_cmd->clear_color); i++)
|
|
transfer_cmd->clear_color[i].ui = fill_data;
|
|
}
|
|
|
|
pvr_setup_buffer_surface(&transfer_cmd->dst,
|
|
&transfer_cmd->scissor,
|
|
dst_addr,
|
|
offset + dst_offset,
|
|
vk_format,
|
|
vk_format,
|
|
width,
|
|
height,
|
|
width);
|
|
|
|
if (transfer_cmd->source_count > 0) {
|
|
transfer_cmd->sources[0].mappings[0].dst_rect = transfer_cmd->scissor;
|
|
|
|
transfer_cmd->sources[0].mapping_count++;
|
|
}
|
|
|
|
result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
|
|
if (result != VK_SUCCESS) {
|
|
vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
|
|
return result;
|
|
}
|
|
|
|
offset += width * height * texel_width;
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
void pvr_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
|
|
VkBuffer dstBuffer,
|
|
VkDeviceSize dstOffset,
|
|
VkDeviceSize dataSize,
|
|
const void *pData)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
PVR_FROM_HANDLE(pvr_buffer, dst, dstBuffer);
|
|
struct pvr_suballoc_bo *pvr_bo;
|
|
VkResult result;
|
|
|
|
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
|
|
|
result = pvr_cmd_buffer_upload_general(cmd_buffer, pData, dataSize, &pvr_bo);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
|
|
pvr_cmd_copy_buffer_region(cmd_buffer,
|
|
pvr_bo->dev_addr,
|
|
0,
|
|
dst->dev_addr,
|
|
dstOffset,
|
|
dataSize,
|
|
0U,
|
|
false);
|
|
}
|
|
|
|
void pvr_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
|
|
const VkCopyBufferInfo2 *pCopyBufferInfo)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferInfo->srcBuffer);
|
|
PVR_FROM_HANDLE(pvr_buffer, dst, pCopyBufferInfo->dstBuffer);
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
|
|
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
|
|
|
for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
|
|
const VkResult result =
|
|
pvr_cmd_copy_buffer_region(cmd_buffer,
|
|
src->dev_addr,
|
|
pCopyBufferInfo->pRegions[i].srcOffset,
|
|
dst->dev_addr,
|
|
pCopyBufferInfo->pRegions[i].dstOffset,
|
|
pCopyBufferInfo->pRegions[i].size,
|
|
0U,
|
|
false);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
}
|
|
|
|
void pvr_CmdFillBuffer(VkCommandBuffer commandBuffer,
|
|
VkBuffer dstBuffer,
|
|
VkDeviceSize dstOffset,
|
|
VkDeviceSize fillSize,
|
|
uint32_t data)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
PVR_FROM_HANDLE(pvr_buffer, dst, dstBuffer);
|
|
|
|
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
|
|
|
fillSize = vk_buffer_range(&dst->vk, dstOffset, fillSize);
|
|
|
|
/* From the Vulkan spec:
|
|
*
|
|
* "size is the number of bytes to fill, and must be either a multiple
|
|
* of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
|
|
* the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
|
|
* buffer is not a multiple of 4, then the nearest smaller multiple is
|
|
* used."
|
|
*/
|
|
fillSize &= ~3ULL;
|
|
|
|
pvr_cmd_copy_buffer_region(cmd_buffer,
|
|
PVR_DEV_ADDR_INVALID,
|
|
0,
|
|
dst->dev_addr,
|
|
dstOffset,
|
|
fillSize,
|
|
data,
|
|
true);
|
|
}
|
|
|
|
/**
|
|
* \brief Returns the maximum number of layers to clear starting from base_layer
|
|
* that contain or match the target rectangle.
|
|
*
|
|
* \param[in] target_rect The region which the clear should contain or
|
|
* match.
|
|
* \param[in] base_layer The layer index to start at.
|
|
* \param[in] clear_rect_count Amount of clear_rects
|
|
* \param[in] clear_rects Array of clear rects.
|
|
*
|
|
* \return Max number of layers that cover or match the target region.
|
|
*/
|
|
static uint32_t
|
|
pvr_get_max_layers_covering_target(VkRect2D target_rect,
|
|
uint32_t base_layer,
|
|
uint32_t clear_rect_count,
|
|
const VkClearRect *clear_rects)
|
|
{
|
|
const int32_t target_x0 = target_rect.offset.x;
|
|
const int32_t target_x1 = target_x0 + (int32_t)target_rect.extent.width;
|
|
const int32_t target_y0 = target_rect.offset.y;
|
|
const int32_t target_y1 = target_y0 + (int32_t)target_rect.extent.height;
|
|
|
|
uint32_t layer_count = 0;
|
|
|
|
assert((int64_t)target_x0 + (int64_t)target_rect.extent.width <= INT32_MAX);
|
|
assert((int64_t)target_y0 + (int64_t)target_rect.extent.height <= INT32_MAX);
|
|
|
|
for (uint32_t i = 0; i < clear_rect_count; i++) {
|
|
const VkClearRect *clear_rect = &clear_rects[i];
|
|
const uint32_t max_layer =
|
|
clear_rect->baseArrayLayer + clear_rect->layerCount;
|
|
bool target_is_covered;
|
|
int32_t x0, x1;
|
|
int32_t y0, y1;
|
|
|
|
if (clear_rect->baseArrayLayer == 0)
|
|
continue;
|
|
|
|
assert((uint64_t)clear_rect->baseArrayLayer + clear_rect->layerCount <=
|
|
UINT32_MAX);
|
|
|
|
/* Check for layer intersection. */
|
|
if (clear_rect->baseArrayLayer > base_layer || max_layer <= base_layer)
|
|
continue;
|
|
|
|
x0 = clear_rect->rect.offset.x;
|
|
x1 = x0 + (int32_t)clear_rect->rect.extent.width;
|
|
y0 = clear_rect->rect.offset.y;
|
|
y1 = y0 + (int32_t)clear_rect->rect.extent.height;
|
|
|
|
assert((int64_t)x0 + (int64_t)clear_rect->rect.extent.width <= INT32_MAX);
|
|
assert((int64_t)y0 + (int64_t)clear_rect->rect.extent.height <=
|
|
INT32_MAX);
|
|
|
|
target_is_covered = x0 <= target_x0 && x1 >= target_x1;
|
|
target_is_covered &= y0 <= target_y0 && y1 >= target_y1;
|
|
|
|
if (target_is_covered)
|
|
layer_count = MAX2(layer_count, max_layer - base_layer);
|
|
}
|
|
|
|
return layer_count;
|
|
}
|
|
|
|
/* Return true if vertex shader is required to output render target id to pick
|
|
* the texture array layer.
|
|
*/
|
|
static inline bool
|
|
pvr_clear_needs_rt_id_output(struct pvr_device_info *dev_info,
|
|
uint32_t rect_count,
|
|
const VkClearRect *rects)
|
|
{
|
|
if (!PVR_HAS_FEATURE(dev_info, gs_rta_support))
|
|
return false;
|
|
|
|
for (uint32_t i = 0; i < rect_count; i++) {
|
|
if (rects[i].baseArrayLayer != 0 || rects[i].layerCount > 1)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static VkResult pvr_clear_color_attachment_static_create_consts_buffer(
|
|
struct pvr_cmd_buffer *cmd_buffer,
|
|
const struct pvr_shader_factory_info *shader_info,
|
|
const uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],
|
|
ASSERTED bool uses_tile_buffer,
|
|
uint32_t tile_buffer_idx,
|
|
struct pvr_suballoc_bo **const const_shareds_buffer_out)
|
|
{
|
|
struct pvr_device *device = cmd_buffer->device;
|
|
struct pvr_suballoc_bo *const_shareds_buffer;
|
|
struct pvr_bo *tile_buffer;
|
|
uint64_t tile_dev_addr;
|
|
uint32_t *buffer;
|
|
VkResult result;
|
|
|
|
/* TODO: This doesn't need to be aligned to slc size. Alignment to 4 is fine.
|
|
* Change pvr_cmd_buffer_alloc_mem() to take in an alignment?
|
|
*/
|
|
result =
|
|
pvr_cmd_buffer_alloc_mem(cmd_buffer,
|
|
device->heaps.general_heap,
|
|
PVR_DW_TO_BYTES(shader_info->const_shared_regs),
|
|
&const_shareds_buffer);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
buffer = pvr_bo_suballoc_get_map_addr(const_shareds_buffer);
|
|
|
|
for (uint32_t i = 0; i < PVR_CLEAR_ATTACHMENT_CONST_COUNT; i++) {
|
|
uint32_t dest_idx = shader_info->driver_const_location_map[i];
|
|
|
|
if (dest_idx == PVR_CLEAR_ATTACHMENT_DEST_ID_UNUSED)
|
|
continue;
|
|
|
|
assert(dest_idx < shader_info->const_shared_regs);
|
|
|
|
switch (i) {
|
|
case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_0:
|
|
case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_1:
|
|
case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_2:
|
|
case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_3:
|
|
buffer[dest_idx] = clear_color[i];
|
|
break;
|
|
|
|
case PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_UPPER:
|
|
assert(uses_tile_buffer);
|
|
tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx];
|
|
tile_dev_addr = tile_buffer->vma->dev_addr.addr;
|
|
buffer[dest_idx] = (uint32_t)(tile_dev_addr >> 32);
|
|
break;
|
|
|
|
case PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_LOWER:
|
|
assert(uses_tile_buffer);
|
|
tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx];
|
|
tile_dev_addr = tile_buffer->vma->dev_addr.addr;
|
|
buffer[dest_idx] = (uint32_t)tile_dev_addr;
|
|
break;
|
|
|
|
default:
|
|
UNREACHABLE("Unsupported clear attachment const type.");
|
|
}
|
|
}
|
|
|
|
for (uint32_t i = 0; i < shader_info->num_static_const; i++) {
|
|
const struct pvr_static_buffer *static_buff =
|
|
&shader_info->static_const_buffer[i];
|
|
|
|
assert(static_buff->dst_idx < shader_info->const_shared_regs);
|
|
|
|
buffer[static_buff->dst_idx] = static_buff->value;
|
|
}
|
|
|
|
*const_shareds_buffer_out = const_shareds_buffer;
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult pvr_clear_color_attachment_static(
|
|
struct pvr_cmd_buffer *cmd_buffer,
|
|
const struct usc_mrt_resource *mrt_resource,
|
|
VkFormat format,
|
|
uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],
|
|
uint32_t template_idx,
|
|
uint32_t stencil,
|
|
bool vs_has_rt_id_output)
|
|
{
|
|
struct pvr_device *device = cmd_buffer->device;
|
|
ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
|
ASSERTED const bool has_eight_output_registers =
|
|
PVR_HAS_FEATURE(dev_info, eight_output_registers);
|
|
const struct pvr_device_static_clear_state *dev_clear_state =
|
|
&device->static_clear_state;
|
|
const bool uses_tile_buffer = mrt_resource->type ==
|
|
USC_MRT_RESOURCE_TYPE_MEMORY;
|
|
const struct pvr_pds_clear_attachment_program_info *clear_attachment_program;
|
|
struct pvr_pds_pixel_shader_sa_program texture_program;
|
|
uint32_t pds_state[PVR_STATIC_CLEAR_PDS_STATE_COUNT];
|
|
const struct pvr_shader_factory_info *shader_info;
|
|
struct pvr_suballoc_bo *pds_texture_program_bo;
|
|
struct pvr_static_clear_ppp_template template;
|
|
struct pvr_suballoc_bo *const_shareds_buffer;
|
|
uint64_t pds_texture_program_addr;
|
|
struct pvr_suballoc_bo *pvr_bo;
|
|
uint32_t tile_buffer_idx = 0;
|
|
uint32_t out_reg_count;
|
|
uint32_t output_offset;
|
|
uint32_t program_idx;
|
|
uint32_t *buffer;
|
|
VkResult result;
|
|
|
|
out_reg_count =
|
|
DIV_ROUND_UP(pvr_get_pbe_accum_format_size_in_bytes(format), 4U);
|
|
|
|
if (uses_tile_buffer) {
|
|
tile_buffer_idx = mrt_resource->mem.tile_buffer;
|
|
output_offset = mrt_resource->mem.offset_dw;
|
|
} else {
|
|
output_offset = mrt_resource->reg.output_reg;
|
|
}
|
|
|
|
assert(has_eight_output_registers || out_reg_count + output_offset <= 4);
|
|
|
|
program_idx = pvr_get_clear_attachment_program_index(out_reg_count,
|
|
output_offset,
|
|
uses_tile_buffer);
|
|
|
|
shader_info = clear_attachment_collection[program_idx].info;
|
|
|
|
result = pvr_clear_color_attachment_static_create_consts_buffer(
|
|
cmd_buffer,
|
|
shader_info,
|
|
clear_color,
|
|
uses_tile_buffer,
|
|
tile_buffer_idx,
|
|
&const_shareds_buffer);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
/* clang-format off */
|
|
texture_program = (struct pvr_pds_pixel_shader_sa_program){
|
|
.num_texture_dma_kicks = 1,
|
|
.texture_dma_address = {
|
|
[0] = const_shareds_buffer->dev_addr.addr,
|
|
}
|
|
};
|
|
/* clang-format on */
|
|
|
|
pvr_csb_pack (&texture_program.texture_dma_control[0],
|
|
PDSINST_DOUT_FIELDS_DOUTD_SRC1,
|
|
doutd_src1) {
|
|
doutd_src1.dest = ROGUE_PDSINST_DOUTD_DEST_COMMON_STORE;
|
|
doutd_src1.bsize = shader_info->const_shared_regs;
|
|
}
|
|
|
|
clear_attachment_program =
|
|
&dev_clear_state->pds_clear_attachment_program_info[program_idx];
|
|
|
|
/* TODO: This doesn't need to be aligned to slc size. Alignment to 4 is fine.
|
|
* Change pvr_cmd_buffer_alloc_mem() to take in an alignment?
|
|
*/
|
|
result = pvr_cmd_buffer_alloc_mem(
|
|
cmd_buffer,
|
|
device->heaps.pds_heap,
|
|
clear_attachment_program->texture_program_data_size,
|
|
&pds_texture_program_bo);
|
|
if (result != VK_SUCCESS) {
|
|
list_del(&const_shareds_buffer->link);
|
|
pvr_bo_suballoc_free(const_shareds_buffer);
|
|
|
|
return result;
|
|
}
|
|
|
|
buffer = pvr_bo_suballoc_get_map_addr(pds_texture_program_bo);
|
|
pds_texture_program_addr = pds_texture_program_bo->dev_addr.addr -
|
|
device->heaps.pds_heap->base_addr.addr;
|
|
|
|
pvr_pds_generate_pixel_shader_sa_texture_state_data(
|
|
&texture_program,
|
|
buffer,
|
|
&device->pdevice->dev_info);
|
|
|
|
pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SHADERBASE],
|
|
TA_STATE_PDS_SHADERBASE,
|
|
shaderbase) {
|
|
shaderbase.addr = clear_attachment_program->pixel_program_offset;
|
|
}
|
|
|
|
pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_TEXUNICODEBASE],
|
|
TA_STATE_PDS_TEXUNICODEBASE,
|
|
texunicodebase) {
|
|
texunicodebase.addr = clear_attachment_program->texture_program_offset;
|
|
}
|
|
|
|
pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SIZEINFO1],
|
|
TA_STATE_PDS_SIZEINFO1,
|
|
sizeinfo1) {
|
|
sizeinfo1.pds_texturestatesize = DIV_ROUND_UP(
|
|
clear_attachment_program->texture_program_data_size,
|
|
ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_TEXTURESTATESIZE_UNIT_SIZE);
|
|
|
|
sizeinfo1.pds_tempsize =
|
|
DIV_ROUND_UP(clear_attachment_program->texture_program_pds_temps_count,
|
|
ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE);
|
|
}
|
|
|
|
pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SIZEINFO2],
|
|
TA_STATE_PDS_SIZEINFO2,
|
|
sizeinfo2) {
|
|
sizeinfo2.usc_sharedsize =
|
|
DIV_ROUND_UP(shader_info->const_shared_regs,
|
|
ROGUE_TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE);
|
|
}
|
|
|
|
/* Dummy coefficient loading program. */
|
|
pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_VARYINGBASE] = 0;
|
|
|
|
pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_TEXTUREDATABASE],
|
|
TA_STATE_PDS_TEXTUREDATABASE,
|
|
texturedatabase) {
|
|
texturedatabase.addr = PVR_DEV_ADDR(pds_texture_program_addr);
|
|
}
|
|
|
|
assert(template_idx < PVR_STATIC_CLEAR_VARIANT_COUNT);
|
|
template =
|
|
cmd_buffer->device->static_clear_state.ppp_templates[template_idx];
|
|
|
|
template.config.pds_state = &pds_state;
|
|
|
|
template.config.ispctl.upass =
|
|
cmd_buffer->state.render_pass_info.isp_userpass;
|
|
|
|
if (template_idx & VK_IMAGE_ASPECT_STENCIL_BIT) {
|
|
/* clang-format off */
|
|
template.config.ispa.sref = stencil & ROGUE_TA_STATE_ISPA_SREF_SIZE_MAX;
|
|
/* clang-format on */
|
|
}
|
|
|
|
if (vs_has_rt_id_output) {
|
|
template.config.output_sel.rhw_pres = true;
|
|
template.config.output_sel.render_tgt_pres = true;
|
|
template.config.output_sel.vtxsize = 4 + 1;
|
|
}
|
|
|
|
result = pvr_emit_ppp_from_template(
|
|
&cmd_buffer->state.current_sub_cmd->gfx.control_stream,
|
|
&template,
|
|
&pvr_bo);
|
|
if (result != VK_SUCCESS) {
|
|
list_del(&pds_texture_program_bo->link);
|
|
pvr_bo_suballoc_free(pds_texture_program_bo);
|
|
|
|
list_del(&const_shareds_buffer->link);
|
|
pvr_bo_suballoc_free(const_shareds_buffer);
|
|
|
|
return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
|
|
}
|
|
|
|
list_add(&pvr_bo->link, &cmd_buffer->bo_list);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
/**
|
|
* \brief Record a deferred clear operation into the command buffer.
|
|
*
|
|
* Devices which don't have gs_rta_support require extra handling for RTA
|
|
* clears. We setup a list of deferred clear transfer commands which will be
|
|
* processed at the end of the graphics sub command to account for the missing
|
|
* feature.
|
|
*/
|
|
static VkResult pvr_add_deferred_rta_clear(struct pvr_cmd_buffer *cmd_buffer,
|
|
const VkClearAttachment *attachment,
|
|
const VkClearRect *rect,
|
|
bool is_render_init)
|
|
{
|
|
struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info;
|
|
struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx;
|
|
const struct pvr_renderpass_hwsetup_render *hw_render =
|
|
&pass_info->pass->hw_setup->renders[sub_cmd->hw_render_idx];
|
|
struct pvr_transfer_cmd *transfer_cmd_list;
|
|
const struct pvr_image_view *image_view;
|
|
const struct pvr_image *image;
|
|
uint32_t base_layer;
|
|
|
|
const VkOffset3D offset = {
|
|
.x = rect->rect.offset.x,
|
|
.y = rect->rect.offset.y,
|
|
.z = 1,
|
|
};
|
|
const VkExtent3D extent = {
|
|
.width = rect->rect.extent.width,
|
|
.height = rect->rect.extent.height,
|
|
.depth = 1,
|
|
};
|
|
|
|
assert(
|
|
!PVR_HAS_FEATURE(&cmd_buffer->device->pdevice->dev_info, gs_rta_support));
|
|
|
|
transfer_cmd_list = util_dynarray_grow(&cmd_buffer->deferred_clears,
|
|
struct pvr_transfer_cmd,
|
|
rect->layerCount);
|
|
if (!transfer_cmd_list) {
|
|
return vk_command_buffer_set_error(&cmd_buffer->vk,
|
|
VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
}
|
|
|
|
/* From the Vulkan 1.3.229 spec VUID-VkClearAttachment-aspectMask-00019:
|
|
*
|
|
* "If aspectMask includes VK_IMAGE_ASPECT_COLOR_BIT, it must not
|
|
* include VK_IMAGE_ASPECT_DEPTH_BIT or VK_IMAGE_ASPECT_STENCIL_BIT"
|
|
*
|
|
*/
|
|
if (attachment->aspectMask != VK_IMAGE_ASPECT_COLOR_BIT) {
|
|
assert(attachment->aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ||
|
|
attachment->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT ||
|
|
attachment->aspectMask ==
|
|
(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
|
|
|
|
image_view = pass_info->attachments[hw_render->ds_attach_idx];
|
|
} else if (is_render_init) {
|
|
uint32_t index;
|
|
|
|
assert(attachment->colorAttachment < hw_render->color_init_count);
|
|
index = hw_render->color_init[attachment->colorAttachment].index;
|
|
|
|
image_view = pass_info->attachments[index];
|
|
} else {
|
|
const struct pvr_renderpass_hwsetup_subpass *hw_pass =
|
|
pvr_get_hw_subpass(pass_info->pass, pass_info->subpass_idx);
|
|
const struct pvr_render_subpass *sub_pass =
|
|
&pass_info->pass->subpasses[hw_pass->index];
|
|
const uint32_t attachment_idx =
|
|
sub_pass->color_attachments[attachment->colorAttachment];
|
|
|
|
assert(attachment->colorAttachment < sub_pass->color_count);
|
|
|
|
image_view = pass_info->attachments[attachment_idx];
|
|
}
|
|
|
|
base_layer = image_view->vk.base_array_layer + rect->baseArrayLayer;
|
|
image = vk_to_pvr_image(image_view->vk.image);
|
|
|
|
for (uint32_t i = 0; i < rect->layerCount; i++) {
|
|
struct pvr_transfer_cmd *transfer_cmd = &transfer_cmd_list[i];
|
|
|
|
/* TODO: Add an init function for when we don't want to use
|
|
* pvr_transfer_cmd_alloc()? And use it here.
|
|
*/
|
|
*transfer_cmd = (struct pvr_transfer_cmd){
|
|
.flags = PVR_TRANSFER_CMD_FLAGS_FILL,
|
|
.cmd_buffer = cmd_buffer,
|
|
.is_deferred_clear = true,
|
|
};
|
|
|
|
if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
|
|
for (uint32_t j = 0; j < ARRAY_SIZE(transfer_cmd->clear_color); j++) {
|
|
transfer_cmd->clear_color[j].ui =
|
|
attachment->clearValue.color.uint32[j];
|
|
}
|
|
} else {
|
|
transfer_cmd->clear_color[0].f =
|
|
attachment->clearValue.depthStencil.depth;
|
|
transfer_cmd->clear_color[1].ui =
|
|
attachment->clearValue.depthStencil.stencil;
|
|
}
|
|
|
|
pvr_setup_transfer_surface(cmd_buffer->device,
|
|
&transfer_cmd->dst,
|
|
&transfer_cmd->scissor,
|
|
image,
|
|
base_layer + i,
|
|
0,
|
|
&offset,
|
|
&extent,
|
|
0.0f,
|
|
image->vk.format,
|
|
attachment->aspectMask);
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer,
|
|
uint32_t attachment_count,
|
|
const VkClearAttachment *attachments,
|
|
uint32_t rect_count,
|
|
const VkClearRect *rects,
|
|
bool is_render_init)
|
|
{
|
|
const struct pvr_render_pass *pass = cmd_buffer->state.render_pass_info.pass;
|
|
struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info;
|
|
const struct pvr_renderpass_hwsetup_subpass *hw_pass =
|
|
pvr_get_hw_subpass(pass, pass_info->subpass_idx);
|
|
struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx;
|
|
struct pvr_device_info *dev_info = &cmd_buffer->device->pdevice->dev_info;
|
|
struct pvr_render_subpass *sub_pass = &pass->subpasses[hw_pass->index];
|
|
uint32_t vs_output_size_in_bytes;
|
|
bool vs_has_rt_id_output;
|
|
|
|
/* TODO: This function can be optimized so that most of the device memory
|
|
* gets allocated together in one go and then filled as needed. There might
|
|
* also be opportunities to reuse pds code and data segments.
|
|
*/
|
|
|
|
assert(cmd_buffer->state.current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
|
|
|
|
pvr_reset_graphics_dirty_state(cmd_buffer, false);
|
|
|
|
/* We'll be emitting to the control stream. */
|
|
sub_cmd->empty_cmd = false;
|
|
|
|
vs_has_rt_id_output =
|
|
pvr_clear_needs_rt_id_output(dev_info, rect_count, rects);
|
|
|
|
/* 4 because we're expecting the USC to output X, Y, Z, and W. */
|
|
vs_output_size_in_bytes = PVR_DW_TO_BYTES(4);
|
|
if (vs_has_rt_id_output)
|
|
vs_output_size_in_bytes += PVR_DW_TO_BYTES(1);
|
|
|
|
for (uint32_t i = 0; i < attachment_count; i++) {
|
|
const VkClearAttachment *attachment = &attachments[i];
|
|
struct pvr_pds_vertex_shader_program pds_program;
|
|
struct pvr_pds_upload pds_program_upload = { 0 };
|
|
uint64_t current_base_array_layer = ~0;
|
|
VkResult result;
|
|
float depth;
|
|
|
|
if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
|
|
uint32_t packed_clear_color[PVR_CLEAR_COLOR_ARRAY_SIZE];
|
|
const struct usc_mrt_resource *mrt_resource;
|
|
uint32_t global_attachment_idx;
|
|
uint32_t local_attachment_idx;
|
|
VkFormat format;
|
|
|
|
local_attachment_idx = attachment->colorAttachment;
|
|
|
|
if (is_render_init) {
|
|
struct pvr_renderpass_hwsetup_render *hw_render;
|
|
|
|
assert(pass->hw_setup->render_count > 0);
|
|
hw_render = &pass->hw_setup->renders[0];
|
|
|
|
mrt_resource =
|
|
&hw_render->init_setup.mrt_resources[local_attachment_idx];
|
|
|
|
assert(local_attachment_idx < hw_render->color_init_count);
|
|
global_attachment_idx =
|
|
hw_render->color_init[local_attachment_idx].index;
|
|
} else {
|
|
mrt_resource = &hw_pass->setup.mrt_resources[local_attachment_idx];
|
|
|
|
assert(local_attachment_idx < sub_pass->color_count);
|
|
global_attachment_idx =
|
|
sub_pass->color_attachments[local_attachment_idx];
|
|
}
|
|
|
|
if (global_attachment_idx == VK_ATTACHMENT_UNUSED)
|
|
continue;
|
|
|
|
assert(global_attachment_idx < pass->attachment_count);
|
|
format = pass->attachments[global_attachment_idx].vk_format;
|
|
|
|
assert(format != VK_FORMAT_UNDEFINED);
|
|
|
|
pvr_get_hw_clear_color(format,
|
|
attachment->clearValue.color,
|
|
packed_clear_color);
|
|
|
|
result = pvr_clear_color_attachment_static(cmd_buffer,
|
|
mrt_resource,
|
|
format,
|
|
packed_clear_color,
|
|
VK_IMAGE_ASPECT_COLOR_BIT,
|
|
0,
|
|
vs_has_rt_id_output);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
} else if (hw_pass->z_replicate != -1 &&
|
|
attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
|
|
const VkClearColorValue clear_color = {
|
|
.float32 = { [0] = attachment->clearValue.depthStencil.depth, },
|
|
};
|
|
const uint32_t template_idx = attachment->aspectMask |
|
|
VK_IMAGE_ASPECT_COLOR_BIT;
|
|
const uint32_t stencil = attachment->clearValue.depthStencil.stencil;
|
|
uint32_t packed_clear_color[PVR_CLEAR_COLOR_ARRAY_SIZE];
|
|
const struct usc_mrt_resource *mrt_resource;
|
|
|
|
mrt_resource = &hw_pass->setup.mrt_resources[hw_pass->z_replicate];
|
|
|
|
pvr_get_hw_clear_color(VK_FORMAT_R32_SFLOAT,
|
|
clear_color,
|
|
packed_clear_color);
|
|
|
|
result = pvr_clear_color_attachment_static(cmd_buffer,
|
|
mrt_resource,
|
|
VK_FORMAT_R32_SFLOAT,
|
|
packed_clear_color,
|
|
template_idx,
|
|
stencil,
|
|
vs_has_rt_id_output);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
} else {
|
|
const uint32_t template_idx = attachment->aspectMask;
|
|
struct pvr_static_clear_ppp_template template;
|
|
struct pvr_suballoc_bo *pvr_bo;
|
|
|
|
assert(template_idx < PVR_STATIC_CLEAR_VARIANT_COUNT);
|
|
template =
|
|
cmd_buffer->device->static_clear_state.ppp_templates[template_idx];
|
|
|
|
if (attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
|
|
/* clang-format off */
|
|
template.config.ispa.sref =
|
|
attachment->clearValue.depthStencil.stencil &
|
|
ROGUE_TA_STATE_ISPA_SREF_SIZE_MAX;
|
|
/* clang-format on */
|
|
}
|
|
|
|
if (vs_has_rt_id_output) {
|
|
template.config.output_sel.rhw_pres = true;
|
|
template.config.output_sel.render_tgt_pres = true;
|
|
template.config.output_sel.vtxsize = 4 + 1;
|
|
}
|
|
|
|
result = pvr_emit_ppp_from_template(&sub_cmd->control_stream,
|
|
&template,
|
|
&pvr_bo);
|
|
if (result != VK_SUCCESS) {
|
|
pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
|
|
return;
|
|
}
|
|
|
|
list_add(&pvr_bo->link, &cmd_buffer->bo_list);
|
|
}
|
|
|
|
if (attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
|
depth = attachment->clearValue.depthStencil.depth;
|
|
else
|
|
depth = 1.0f;
|
|
|
|
if (vs_has_rt_id_output) {
|
|
const struct pvr_device_static_clear_state *dev_clear_state =
|
|
&cmd_buffer->device->static_clear_state;
|
|
const struct pvr_suballoc_bo *multi_layer_vert_bo =
|
|
dev_clear_state->usc_multi_layer_vertex_shader_bo;
|
|
|
|
/* We can't use the device's passthrough pds program since it doesn't
|
|
* have iterate_instance_id enabled. We'll be uploading code sections
|
|
* per each clear rect.
|
|
*/
|
|
|
|
/* TODO: See if we can allocate all the code section memory in one go.
|
|
* We'd need to make sure that changing instance_id_modifier doesn't
|
|
* change the code section size.
|
|
* Also check if we can reuse the same code segment for each rect.
|
|
* Seems like the instance_id_modifier is written into the data section
|
|
* and used by the pds ADD instruction that way instead of it being
|
|
* embedded into the code section.
|
|
*/
|
|
|
|
pvr_pds_clear_rta_vertex_shader_program_init_base(&pds_program,
|
|
multi_layer_vert_bo);
|
|
} else {
|
|
/* We can reuse the device's code section but we'll need to upload data
|
|
* sections so initialize the program.
|
|
*/
|
|
pvr_pds_clear_vertex_shader_program_init_base(
|
|
&pds_program,
|
|
cmd_buffer->device->static_clear_state.usc_vertex_shader_bo);
|
|
|
|
pds_program_upload.code_offset =
|
|
cmd_buffer->device->static_clear_state.pds.code_offset;
|
|
/* TODO: The code size doesn't get used by pvr_clear_vdm_state() maybe
|
|
* let's change its interface to make that clear and not set this?
|
|
*/
|
|
pds_program_upload.code_size =
|
|
cmd_buffer->device->static_clear_state.pds.code_size;
|
|
}
|
|
|
|
for (uint32_t j = 0; j < rect_count; j++) {
|
|
struct pvr_pds_upload pds_program_data_upload;
|
|
const VkClearRect *clear_rect = &rects[j];
|
|
struct pvr_suballoc_bo *vertices_bo;
|
|
uint32_t vdm_cs_size_in_dw;
|
|
uint32_t *vdm_cs_buffer;
|
|
VkResult result;
|
|
|
|
if (!PVR_HAS_FEATURE(dev_info, gs_rta_support) &&
|
|
(clear_rect->baseArrayLayer != 0 || clear_rect->layerCount > 1)) {
|
|
result = pvr_add_deferred_rta_clear(cmd_buffer,
|
|
attachment,
|
|
clear_rect,
|
|
is_render_init);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
|
|
if (clear_rect->baseArrayLayer != 0)
|
|
continue;
|
|
}
|
|
|
|
/* TODO: Allocate all the buffers in one go before the loop, and add
|
|
* support to multi-alloc bo.
|
|
*/
|
|
result = pvr_clear_vertices_upload(cmd_buffer->device,
|
|
&clear_rect->rect,
|
|
depth,
|
|
&vertices_bo);
|
|
if (result != VK_SUCCESS) {
|
|
pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
|
|
return;
|
|
}
|
|
|
|
list_add(&vertices_bo->link, &cmd_buffer->bo_list);
|
|
|
|
if (vs_has_rt_id_output) {
|
|
if (current_base_array_layer != clear_rect->baseArrayLayer) {
|
|
const uint32_t base_array_layer = clear_rect->baseArrayLayer;
|
|
struct pvr_pds_upload pds_program_code_upload;
|
|
|
|
result =
|
|
pvr_pds_clear_rta_vertex_shader_program_create_and_upload_code(
|
|
&pds_program,
|
|
cmd_buffer,
|
|
base_array_layer,
|
|
&pds_program_code_upload);
|
|
if (result != VK_SUCCESS) {
|
|
pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
|
|
return;
|
|
}
|
|
|
|
pds_program_upload.code_offset =
|
|
pds_program_code_upload.code_offset;
|
|
/* TODO: The code size doesn't get used by pvr_clear_vdm_state()
|
|
* maybe let's change its interface to make that clear and not
|
|
* set this?
|
|
*/
|
|
pds_program_upload.code_size = pds_program_code_upload.code_size;
|
|
|
|
current_base_array_layer = base_array_layer;
|
|
}
|
|
|
|
result =
|
|
pvr_pds_clear_rta_vertex_shader_program_create_and_upload_data(
|
|
&pds_program,
|
|
cmd_buffer,
|
|
vertices_bo,
|
|
&pds_program_data_upload);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
} else {
|
|
result = pvr_pds_clear_vertex_shader_program_create_and_upload_data(
|
|
&pds_program,
|
|
cmd_buffer,
|
|
vertices_bo,
|
|
&pds_program_data_upload);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
|
|
pds_program_upload.data_offset = pds_program_data_upload.data_offset;
|
|
pds_program_upload.data_size = pds_program_data_upload.data_size;
|
|
|
|
vdm_cs_size_in_dw =
|
|
pvr_clear_vdm_state_get_size_in_dw(dev_info,
|
|
clear_rect->layerCount);
|
|
|
|
pvr_csb_set_relocation_mark(&sub_cmd->control_stream);
|
|
|
|
vdm_cs_buffer =
|
|
pvr_csb_alloc_dwords(&sub_cmd->control_stream, vdm_cs_size_in_dw);
|
|
if (!vdm_cs_buffer) {
|
|
pvr_cmd_buffer_set_error_unwarned(cmd_buffer,
|
|
sub_cmd->control_stream.status);
|
|
return;
|
|
}
|
|
|
|
pvr_pack_clear_vdm_state(dev_info,
|
|
&pds_program_upload,
|
|
pds_program.temps_used,
|
|
4,
|
|
vs_output_size_in_bytes,
|
|
clear_rect->layerCount,
|
|
vdm_cs_buffer);
|
|
|
|
pvr_csb_clear_relocation_mark(&sub_cmd->control_stream);
|
|
}
|
|
}
|
|
}
|
|
|
|
void pvr_clear_attachments_render_init(struct pvr_cmd_buffer *cmd_buffer,
|
|
const VkClearAttachment *attachment,
|
|
const VkClearRect *rect)
|
|
{
|
|
pvr_clear_attachments(cmd_buffer, 1, attachment, 1, rect, true);
|
|
}
|
|
|
|
void pvr_CmdClearAttachments(VkCommandBuffer commandBuffer,
|
|
uint32_t attachmentCount,
|
|
const VkClearAttachment *pAttachments,
|
|
uint32_t rectCount,
|
|
const VkClearRect *pRects)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
|
|
struct pvr_sub_cmd_gfx *sub_cmd = &state->current_sub_cmd->gfx;
|
|
|
|
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
|
assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
|
|
|
|
/* TODO: There are some optimizations that can be made here:
|
|
* - For a full screen clear, update the clear values for the corresponding
|
|
* attachment index.
|
|
* - For a full screen color attachment clear, add its index to a load op
|
|
* override to add it to the background shader. This will elide any load
|
|
* op loads currently in the background shader as well as the usual
|
|
* frag kick for geometry clear.
|
|
*/
|
|
|
|
/* If we have any depth/stencil clears, update the sub command depth/stencil
|
|
* modification and usage flags.
|
|
*/
|
|
if (state->depth_format != VK_FORMAT_UNDEFINED) {
|
|
uint32_t full_screen_clear_count;
|
|
bool has_stencil_clear = false;
|
|
bool has_depth_clear = false;
|
|
|
|
for (uint32_t i = 0; i < attachmentCount; i++) {
|
|
const VkImageAspectFlags aspect_mask = pAttachments[i].aspectMask;
|
|
|
|
if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
|
|
has_stencil_clear = true;
|
|
|
|
if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
|
has_depth_clear = true;
|
|
|
|
if (has_stencil_clear && has_depth_clear)
|
|
break;
|
|
}
|
|
|
|
sub_cmd->modifies_stencil |= has_stencil_clear;
|
|
sub_cmd->modifies_depth |= has_depth_clear;
|
|
|
|
/* We only care about clears that have a baseArrayLayer of 0 as any
|
|
* attachment clears we move to the background shader must apply to all of
|
|
* the attachment's sub resources.
|
|
*/
|
|
full_screen_clear_count =
|
|
pvr_get_max_layers_covering_target(state->render_pass_info.render_area,
|
|
0,
|
|
rectCount,
|
|
pRects);
|
|
|
|
if (full_screen_clear_count > 0) {
|
|
if (has_stencil_clear &&
|
|
sub_cmd->stencil_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) {
|
|
sub_cmd->stencil_usage = PVR_DEPTH_STENCIL_USAGE_NEVER;
|
|
}
|
|
|
|
if (has_depth_clear &&
|
|
sub_cmd->depth_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) {
|
|
sub_cmd->depth_usage = PVR_DEPTH_STENCIL_USAGE_NEVER;
|
|
}
|
|
}
|
|
}
|
|
|
|
pvr_clear_attachments(cmd_buffer,
|
|
attachmentCount,
|
|
pAttachments,
|
|
rectCount,
|
|
pRects,
|
|
false);
|
|
}
|
|
|
|
void pvr_CmdResolveImage2(VkCommandBuffer commandBuffer,
|
|
const VkResolveImageInfo2 *pResolveImageInfo)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_image, src, pResolveImageInfo->srcImage);
|
|
PVR_FROM_HANDLE(pvr_image, dst, pResolveImageInfo->dstImage);
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
|
|
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
|
|
|
for (uint32_t i = 0U; i < pResolveImageInfo->regionCount; i++) {
|
|
VkImageCopy2 region = {
|
|
.sType = VK_STRUCTURE_TYPE_IMAGE_COPY_2,
|
|
.srcSubresource = pResolveImageInfo->pRegions[i].srcSubresource,
|
|
.srcOffset = pResolveImageInfo->pRegions[i].srcOffset,
|
|
.dstSubresource = pResolveImageInfo->pRegions[i].dstSubresource,
|
|
.dstOffset = pResolveImageInfo->pRegions[i].dstOffset,
|
|
.extent = pResolveImageInfo->pRegions[i].extent,
|
|
};
|
|
|
|
VkResult result =
|
|
pvr_copy_or_resolve_color_image_region(cmd_buffer, src, dst, ®ion);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
}
|