mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 04:50:11 +01:00
Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Acked-by: Frank Binns <frank.binns@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21550>
2125 lines
79 KiB
C
2125 lines
79 KiB
C
/*
|
|
* Copyright © 2022 Imagination Technologies Ltd.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <stdbool.h>
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include <vulkan/vulkan.h>
|
|
|
|
#include "pvr_clear.h"
|
|
#include "pvr_csb.h"
|
|
#include "pvr_formats.h"
|
|
#include "pvr_job_transfer.h"
|
|
#include "pvr_private.h"
|
|
#include "pvr_shader_factory.h"
|
|
#include "pvr_static_shaders.h"
|
|
#include "pvr_types.h"
|
|
#include "util/bitscan.h"
|
|
#include "util/list.h"
|
|
#include "util/macros.h"
|
|
#include "util/u_math.h"
|
|
#include "vk_alloc.h"
|
|
#include "vk_command_buffer.h"
|
|
#include "vk_command_pool.h"
|
|
#include "vk_format.h"
|
|
#include "vk_log.h"
|
|
|
|
/* TODO: Investigate where this limit comes from. */
|
|
#define PVR_MAX_TRANSFER_SIZE_IN_TEXELS 2048U
|
|
|
|
static struct pvr_transfer_cmd *
|
|
pvr_transfer_cmd_alloc(struct pvr_cmd_buffer *cmd_buffer)
|
|
{
|
|
struct pvr_transfer_cmd *transfer_cmd;
|
|
|
|
transfer_cmd = vk_zalloc(&cmd_buffer->vk.pool->alloc,
|
|
sizeof(*transfer_cmd),
|
|
8U,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
|
if (!transfer_cmd) {
|
|
cmd_buffer->state.status =
|
|
vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
return NULL;
|
|
}
|
|
|
|
/* transfer_cmd->mapping_count is already set to zero. */
|
|
transfer_cmd->sources[0].filter = PVR_FILTER_POINT;
|
|
transfer_cmd->sources[0].resolve_op = PVR_RESOLVE_BLEND;
|
|
transfer_cmd->sources[0].addr_mode = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
|
|
transfer_cmd->cmd_buffer = cmd_buffer;
|
|
|
|
return transfer_cmd;
|
|
}
|
|
|
|
static void pvr_setup_buffer_surface(struct pvr_transfer_cmd_surface *surface,
|
|
VkRect2D *rect,
|
|
pvr_dev_addr_t dev_addr,
|
|
VkDeviceSize offset,
|
|
VkFormat vk_format,
|
|
uint32_t width,
|
|
uint32_t height,
|
|
uint32_t stride)
|
|
{
|
|
surface->dev_addr = PVR_DEV_ADDR_OFFSET(dev_addr, offset);
|
|
surface->width = width;
|
|
surface->height = height;
|
|
surface->stride = stride;
|
|
surface->vk_format = vk_format;
|
|
surface->mem_layout = PVR_MEMLAYOUT_LINEAR;
|
|
surface->sample_count = 1;
|
|
|
|
/* Initialize rectangle extent. Also, rectangle.offset should be set to
|
|
* zero, as the offset is already adjusted in the device address above. We
|
|
* don't explicitly set offset to zero as transfer_cmd is zero allocated.
|
|
*/
|
|
rect->extent.width = width;
|
|
rect->extent.height = height;
|
|
}
|
|
|
|
static VkFormat pvr_get_raw_copy_format(VkFormat format)
|
|
{
|
|
switch (vk_format_get_blocksize(format)) {
|
|
case 1:
|
|
return VK_FORMAT_R8_UINT;
|
|
case 2:
|
|
return VK_FORMAT_R8G8_UINT;
|
|
case 3:
|
|
return VK_FORMAT_R8G8B8_UINT;
|
|
case 4:
|
|
return VK_FORMAT_R32_UINT;
|
|
case 6:
|
|
return VK_FORMAT_R16G16B16_UINT;
|
|
case 8:
|
|
return VK_FORMAT_R32G32_UINT;
|
|
case 12:
|
|
return VK_FORMAT_R32G32B32_UINT;
|
|
case 16:
|
|
return VK_FORMAT_R32G32B32A32_UINT;
|
|
default:
|
|
unreachable("Unhandled copy block size.");
|
|
}
|
|
}
|
|
|
|
static void pvr_setup_transfer_surface(struct pvr_device *device,
|
|
struct pvr_transfer_cmd_surface *surface,
|
|
VkRect2D *rect,
|
|
const struct pvr_image *image,
|
|
uint32_t array_layer,
|
|
uint32_t mip_level,
|
|
const VkOffset3D *offset,
|
|
const VkExtent3D *extent,
|
|
float fdepth,
|
|
VkFormat format,
|
|
VkImageAspectFlags aspect_mask)
|
|
{
|
|
const uint32_t height = MAX2(image->vk.extent.height >> mip_level, 1U);
|
|
const uint32_t width = MAX2(image->vk.extent.width >> mip_level, 1U);
|
|
const VkImageSubresource sub_resource = {
|
|
.aspectMask = aspect_mask,
|
|
.mipLevel = mip_level,
|
|
.arrayLayer = array_layer,
|
|
};
|
|
VkSubresourceLayout info;
|
|
uint32_t depth;
|
|
|
|
if (image->memlayout == PVR_MEMLAYOUT_3DTWIDDLED)
|
|
depth = MAX2(image->vk.extent.depth >> mip_level, 1U);
|
|
else
|
|
depth = 1U;
|
|
|
|
pvr_get_image_subresource_layout(image, &sub_resource, &info);
|
|
|
|
surface->dev_addr = PVR_DEV_ADDR_OFFSET(image->dev_addr, info.offset);
|
|
surface->width = width;
|
|
surface->height = height;
|
|
surface->depth = depth;
|
|
surface->stride = info.rowPitch / vk_format_get_blocksize(format);
|
|
surface->vk_format = format;
|
|
surface->mem_layout = image->memlayout;
|
|
surface->sample_count = image->vk.samples;
|
|
|
|
if (image->memlayout == PVR_MEMLAYOUT_3DTWIDDLED)
|
|
surface->z_position = fdepth;
|
|
else
|
|
surface->dev_addr.addr += info.depthPitch * ((uint32_t)fdepth);
|
|
|
|
rect->offset.x = offset->x;
|
|
rect->offset.y = offset->y;
|
|
rect->extent.width = extent->width;
|
|
rect->extent.height = extent->height;
|
|
}
|
|
|
|
void pvr_CmdBlitImage2KHR(VkCommandBuffer commandBuffer,
|
|
const VkBlitImageInfo2KHR *pBlitImageInfo)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
PVR_FROM_HANDLE(pvr_image, src, pBlitImageInfo->srcImage);
|
|
PVR_FROM_HANDLE(pvr_image, dst, pBlitImageInfo->dstImage);
|
|
struct pvr_device *device = cmd_buffer->device;
|
|
enum pvr_filter filter = PVR_FILTER_DONTCARE;
|
|
|
|
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
|
|
|
if (pBlitImageInfo->filter == VK_FILTER_LINEAR)
|
|
filter = PVR_FILTER_LINEAR;
|
|
|
|
for (uint32_t i = 0U; i < pBlitImageInfo->regionCount; i++) {
|
|
const VkImageBlit2 *region = &pBlitImageInfo->pRegions[i];
|
|
|
|
assert(region->srcSubresource.layerCount ==
|
|
region->dstSubresource.layerCount);
|
|
const bool inverted_dst_z =
|
|
(region->dstOffsets[1].z < region->dstOffsets[0].z);
|
|
const bool inverted_src_z =
|
|
(region->srcOffsets[1].z < region->srcOffsets[0].z);
|
|
const uint32_t min_src_z = inverted_src_z ? region->srcOffsets[1].z
|
|
: region->srcOffsets[0].z;
|
|
const uint32_t max_src_z = inverted_src_z ? region->srcOffsets[0].z
|
|
: region->srcOffsets[1].z;
|
|
const uint32_t min_dst_z = inverted_dst_z ? region->dstOffsets[1].z
|
|
: region->dstOffsets[0].z;
|
|
const uint32_t max_dst_z = inverted_dst_z ? region->dstOffsets[0].z
|
|
: region->dstOffsets[1].z;
|
|
|
|
const uint32_t src_width =
|
|
region->srcOffsets[1].x - region->srcOffsets[0].x;
|
|
const uint32_t src_height =
|
|
region->srcOffsets[1].y - region->srcOffsets[0].y;
|
|
const uint32_t dst_width =
|
|
region->dstOffsets[1].x - region->dstOffsets[0].x;
|
|
const uint32_t dst_height =
|
|
region->dstOffsets[1].y - region->dstOffsets[0].y;
|
|
|
|
float initial_depth_offset;
|
|
VkExtent3D src_extent;
|
|
VkExtent3D dst_extent;
|
|
float z_slice_stride;
|
|
|
|
/* If any of the extent regions is zero, then reject the blit and
|
|
* continue.
|
|
*/
|
|
if (!src_width || !src_height || !dst_width || !dst_height ||
|
|
!(max_dst_z - min_dst_z) || !(max_src_z - min_src_z)) {
|
|
mesa_loge("BlitImage: Region %i has an area of zero", i);
|
|
continue;
|
|
}
|
|
|
|
src_extent = (VkExtent3D){
|
|
.width = src_width,
|
|
.height = src_height,
|
|
.depth = 0U,
|
|
};
|
|
|
|
dst_extent = (VkExtent3D){
|
|
.width = dst_width,
|
|
.height = dst_height,
|
|
.depth = 0U,
|
|
};
|
|
|
|
/* The z_position of a transfer surface is intended to be in the range
|
|
* of 0.0f <= z_position <= depth. It will be used as a texture coordinate
|
|
* in the source surface for cases where linear filtering is enabled, so
|
|
* the fractional part will need to represent the exact midpoint of a z
|
|
* slice range in the source texture, as it maps to each destination
|
|
* slice.
|
|
*
|
|
* For destination surfaces, the fractional part is discarded, so
|
|
* we can safely pass the slice index.
|
|
*/
|
|
|
|
/* Calculate the ratio of z slices in our source region to that of our
|
|
* destination region, to get the number of z slices in our source region
|
|
* to iterate over for each destination slice.
|
|
*
|
|
* If our destination region is inverted, we iterate backwards.
|
|
*/
|
|
z_slice_stride =
|
|
(inverted_dst_z ? -1.0f : 1.0f) *
|
|
((float)(max_src_z - min_src_z) / (float)(max_dst_z - min_dst_z));
|
|
|
|
/* Offset the initial depth offset by half of the z slice stride, into the
|
|
* blit region's z range.
|
|
*/
|
|
initial_depth_offset =
|
|
(inverted_dst_z ? max_src_z : min_src_z) + (0.5f * z_slice_stride);
|
|
|
|
for (uint32_t j = 0U; j < region->srcSubresource.layerCount; j++) {
|
|
struct pvr_transfer_cmd_surface src_surface = { 0 };
|
|
struct pvr_transfer_cmd_surface dst_surface = { 0 };
|
|
VkRect2D src_rect;
|
|
VkRect2D dst_rect;
|
|
|
|
/* Get the subresource info for the src and dst images, this is
|
|
* required when incrementing the address of the depth slice used by
|
|
* the transfer surface.
|
|
*/
|
|
VkSubresourceLayout src_info, dst_info;
|
|
const VkImageSubresource src_sub_resource = {
|
|
.aspectMask = region->srcSubresource.aspectMask,
|
|
.mipLevel = region->srcSubresource.mipLevel,
|
|
.arrayLayer = region->srcSubresource.baseArrayLayer + j,
|
|
};
|
|
const VkImageSubresource dst_sub_resource = {
|
|
.aspectMask = region->dstSubresource.aspectMask,
|
|
.mipLevel = region->dstSubresource.mipLevel,
|
|
.arrayLayer = region->dstSubresource.baseArrayLayer + j,
|
|
};
|
|
|
|
pvr_get_image_subresource_layout(src, &src_sub_resource, &src_info);
|
|
pvr_get_image_subresource_layout(dst, &dst_sub_resource, &dst_info);
|
|
|
|
/* Setup the transfer surfaces once per image layer, which saves us
|
|
* from repeating subresource queries by manually incrementing the
|
|
* depth slices.
|
|
*/
|
|
pvr_setup_transfer_surface(device,
|
|
&src_surface,
|
|
&src_rect,
|
|
src,
|
|
region->srcSubresource.baseArrayLayer + j,
|
|
region->srcSubresource.mipLevel,
|
|
®ion->srcOffsets[0],
|
|
&src_extent,
|
|
initial_depth_offset,
|
|
src->vk.format,
|
|
region->srcSubresource.aspectMask);
|
|
|
|
pvr_setup_transfer_surface(device,
|
|
&dst_surface,
|
|
&dst_rect,
|
|
dst,
|
|
region->dstSubresource.baseArrayLayer + j,
|
|
region->dstSubresource.mipLevel,
|
|
®ion->dstOffsets[0],
|
|
&dst_extent,
|
|
min_dst_z,
|
|
dst->vk.format,
|
|
region->dstSubresource.aspectMask);
|
|
|
|
for (uint32_t dst_z = min_dst_z; dst_z < max_dst_z; dst_z++) {
|
|
struct pvr_transfer_cmd *transfer_cmd;
|
|
VkResult result;
|
|
|
|
/* TODO: See if we can allocate all the transfer cmds in one go. */
|
|
transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
|
|
if (!transfer_cmd)
|
|
return;
|
|
|
|
transfer_cmd->sources[0].mappings[0].src_rect = src_rect;
|
|
transfer_cmd->sources[0].mappings[0].dst_rect = dst_rect;
|
|
transfer_cmd->sources[0].mapping_count++;
|
|
|
|
transfer_cmd->sources[0].surface = src_surface;
|
|
transfer_cmd->sources[0].filter = filter;
|
|
transfer_cmd->source_count = 1;
|
|
|
|
transfer_cmd->dst = dst_surface;
|
|
transfer_cmd->scissor = dst_rect;
|
|
|
|
result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
|
|
if (result != VK_SUCCESS) {
|
|
vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
|
|
return;
|
|
}
|
|
|
|
if (src_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
|
|
src_surface.z_position += z_slice_stride;
|
|
} else {
|
|
src_surface.dev_addr.addr +=
|
|
src_info.depthPitch * ((uint32_t)z_slice_stride);
|
|
}
|
|
|
|
if (dst_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
|
|
dst_surface.z_position += 1.0f;
|
|
else
|
|
dst_surface.dev_addr.addr += dst_info.depthPitch;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static VkFormat pvr_get_copy_format(VkFormat format)
|
|
{
|
|
switch (format) {
|
|
case VK_FORMAT_R8_SNORM:
|
|
return VK_FORMAT_R8_SINT;
|
|
case VK_FORMAT_R8G8_SNORM:
|
|
return VK_FORMAT_R8G8_SINT;
|
|
case VK_FORMAT_R8G8B8_SNORM:
|
|
return VK_FORMAT_R8G8B8_SINT;
|
|
case VK_FORMAT_R8G8B8A8_SNORM:
|
|
return VK_FORMAT_R8G8B8A8_SINT;
|
|
case VK_FORMAT_B8G8R8A8_SNORM:
|
|
return VK_FORMAT_B8G8R8A8_SINT;
|
|
default:
|
|
return format;
|
|
}
|
|
}
|
|
|
|
static void
|
|
pvr_setup_surface_for_image(struct pvr_device *device,
|
|
struct pvr_transfer_cmd_surface *surface,
|
|
VkRect2D *rect,
|
|
const struct pvr_image *image,
|
|
uint32_t array_layer,
|
|
uint32_t array_offset,
|
|
uint32_t mip_level,
|
|
const VkOffset3D *offset,
|
|
const VkExtent3D *extent,
|
|
uint32_t depth,
|
|
VkFormat format,
|
|
const VkImageAspectFlags aspect_mask)
|
|
{
|
|
if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
|
|
pvr_setup_transfer_surface(device,
|
|
surface,
|
|
rect,
|
|
image,
|
|
array_layer + array_offset,
|
|
mip_level,
|
|
offset,
|
|
extent,
|
|
0.0f,
|
|
format,
|
|
aspect_mask);
|
|
} else {
|
|
pvr_setup_transfer_surface(device,
|
|
surface,
|
|
rect,
|
|
image,
|
|
array_layer,
|
|
mip_level,
|
|
offset,
|
|
extent,
|
|
(float)depth,
|
|
format,
|
|
aspect_mask);
|
|
}
|
|
}
|
|
|
|
static VkResult
|
|
pvr_copy_or_resolve_image_region(struct pvr_cmd_buffer *cmd_buffer,
|
|
enum pvr_resolve_op resolve_op,
|
|
const struct pvr_image *src,
|
|
const struct pvr_image *dst,
|
|
const VkImageCopy2 *region)
|
|
{
|
|
VkExtent3D src_extent;
|
|
VkExtent3D dst_extent;
|
|
VkFormat dst_format;
|
|
VkFormat src_format;
|
|
uint32_t dst_layers;
|
|
uint32_t src_layers;
|
|
uint32_t max_slices;
|
|
uint32_t flags = 0U;
|
|
|
|
if (src->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
|
|
region->srcSubresource.aspectMask !=
|
|
(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
|
|
/* Takes the stencil of the source and the depth of the destination and
|
|
* combines the two interleaved.
|
|
*/
|
|
flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE;
|
|
|
|
if (region->srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
|
|
/* Takes the depth of the source and the stencil of the destination and
|
|
* combines the two interleaved.
|
|
*/
|
|
flags |= PVR_TRANSFER_CMD_FLAGS_PICKD;
|
|
}
|
|
}
|
|
|
|
src_extent = region->extent;
|
|
dst_extent = region->extent;
|
|
|
|
/* We don't care what format dst is as it's guaranteed to be size compatible
|
|
* with src.
|
|
*/
|
|
dst_format = pvr_get_copy_format(src->vk.format);
|
|
src_format = dst_format;
|
|
|
|
src_layers =
|
|
vk_image_subresource_layer_count(&src->vk, ®ion->srcSubresource);
|
|
dst_layers =
|
|
vk_image_subresource_layer_count(&dst->vk, ®ion->dstSubresource);
|
|
|
|
/* srcSubresource.layerCount must match layerCount of dstSubresource in
|
|
* copies not involving 3D images. In copies involving 3D images, if there is
|
|
* a 2D image it's layerCount.
|
|
*/
|
|
max_slices = MAX3(src_layers, dst_layers, region->extent.depth);
|
|
|
|
for (uint32_t i = 0U; i < max_slices; i++) {
|
|
struct pvr_transfer_cmd *transfer_cmd;
|
|
VkResult result;
|
|
|
|
transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
|
|
if (!transfer_cmd)
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
transfer_cmd->flags |= flags;
|
|
transfer_cmd->sources[0].resolve_op = resolve_op;
|
|
|
|
pvr_setup_surface_for_image(
|
|
cmd_buffer->device,
|
|
&transfer_cmd->sources[0].surface,
|
|
&transfer_cmd->sources[0].mappings[0U].src_rect,
|
|
src,
|
|
region->srcSubresource.baseArrayLayer,
|
|
i,
|
|
region->srcSubresource.mipLevel,
|
|
®ion->srcOffset,
|
|
&src_extent,
|
|
region->srcOffset.z + i,
|
|
src_format,
|
|
region->srcSubresource.aspectMask);
|
|
|
|
pvr_setup_surface_for_image(cmd_buffer->device,
|
|
&transfer_cmd->dst,
|
|
&transfer_cmd->scissor,
|
|
dst,
|
|
region->dstSubresource.baseArrayLayer,
|
|
i,
|
|
region->dstSubresource.mipLevel,
|
|
®ion->dstOffset,
|
|
&dst_extent,
|
|
region->dstOffset.z + i,
|
|
dst_format,
|
|
region->dstSubresource.aspectMask);
|
|
|
|
transfer_cmd->sources[0].mappings[0U].dst_rect = transfer_cmd->scissor;
|
|
transfer_cmd->sources[0].mapping_count++;
|
|
transfer_cmd->source_count = 1;
|
|
|
|
result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
|
|
if (result != VK_SUCCESS) {
|
|
vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
|
|
return result;
|
|
}
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
VkResult
|
|
pvr_copy_or_resolve_color_image_region(struct pvr_cmd_buffer *cmd_buffer,
|
|
const struct pvr_image *src,
|
|
const struct pvr_image *dst,
|
|
const VkImageCopy2 *region)
|
|
{
|
|
enum pvr_resolve_op resolve_op = PVR_RESOLVE_BLEND;
|
|
|
|
if (src->vk.samples > 1U && dst->vk.samples < 2U) {
|
|
/* Integer resolve picks a single sample. */
|
|
if (vk_format_is_int(src->vk.format))
|
|
resolve_op = PVR_RESOLVE_SAMPLE0;
|
|
}
|
|
|
|
return pvr_copy_or_resolve_image_region(cmd_buffer,
|
|
resolve_op,
|
|
src,
|
|
dst,
|
|
region);
|
|
}
|
|
|
|
static bool pvr_can_merge_ds_regions(const VkImageCopy2 *pRegionA,
|
|
const VkImageCopy2 *pRegionB)
|
|
{
|
|
assert(pRegionA->srcSubresource.aspectMask != 0U);
|
|
assert(pRegionB->srcSubresource.aspectMask != 0U);
|
|
|
|
if (!((pRegionA->srcSubresource.aspectMask ^
|
|
pRegionB->srcSubresource.aspectMask) &
|
|
(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
|
|
return false;
|
|
}
|
|
|
|
/* Assert if aspectMask mismatch between src and dst, given it's a depth and
|
|
* stencil image so not multi-planar and from the Vulkan 1.0.223 spec:
|
|
*
|
|
* If neither srcImage nor dstImage has a multi-planar image format then
|
|
* for each element of pRegions, srcSubresource.aspectMask and
|
|
* dstSubresource.aspectMask must match.
|
|
*/
|
|
assert(pRegionA->srcSubresource.aspectMask ==
|
|
pRegionA->dstSubresource.aspectMask);
|
|
assert(pRegionB->srcSubresource.aspectMask ==
|
|
pRegionB->dstSubresource.aspectMask);
|
|
|
|
if (!(pRegionA->srcSubresource.mipLevel ==
|
|
pRegionB->srcSubresource.mipLevel &&
|
|
pRegionA->srcSubresource.baseArrayLayer ==
|
|
pRegionB->srcSubresource.baseArrayLayer &&
|
|
pRegionA->srcSubresource.layerCount ==
|
|
pRegionB->srcSubresource.layerCount)) {
|
|
return false;
|
|
}
|
|
|
|
if (!(pRegionA->dstSubresource.mipLevel ==
|
|
pRegionB->dstSubresource.mipLevel &&
|
|
pRegionA->dstSubresource.baseArrayLayer ==
|
|
pRegionB->dstSubresource.baseArrayLayer &&
|
|
pRegionA->dstSubresource.layerCount ==
|
|
pRegionB->dstSubresource.layerCount)) {
|
|
return false;
|
|
}
|
|
|
|
if (!(pRegionA->srcOffset.x == pRegionB->srcOffset.x &&
|
|
pRegionA->srcOffset.y == pRegionB->srcOffset.y &&
|
|
pRegionA->srcOffset.z == pRegionB->srcOffset.z)) {
|
|
return false;
|
|
}
|
|
|
|
if (!(pRegionA->dstOffset.x == pRegionB->dstOffset.x &&
|
|
pRegionA->dstOffset.y == pRegionB->dstOffset.y &&
|
|
pRegionA->dstOffset.z == pRegionB->dstOffset.z)) {
|
|
return false;
|
|
}
|
|
|
|
if (!(pRegionA->extent.width == pRegionB->extent.width &&
|
|
pRegionA->extent.height == pRegionB->extent.height &&
|
|
pRegionA->extent.depth == pRegionB->extent.depth)) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void pvr_CmdCopyImage2KHR(VkCommandBuffer commandBuffer,
|
|
const VkCopyImageInfo2KHR *pCopyImageInfo)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
PVR_FROM_HANDLE(pvr_image, src, pCopyImageInfo->srcImage);
|
|
PVR_FROM_HANDLE(pvr_image, dst, pCopyImageInfo->dstImage);
|
|
|
|
const bool can_merge_ds = src->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
|
|
dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT;
|
|
|
|
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
|
|
|
for (uint32_t i = 0U; i < pCopyImageInfo->regionCount; i++) {
|
|
VkResult result;
|
|
|
|
/* If an application has split a copy between D24S8 images into two
|
|
* separate copy regions (one for the depth aspect and one for the
|
|
* stencil aspect) attempt to merge the two regions back into one blit.
|
|
*
|
|
* This can only be merged if both regions are identical apart from the
|
|
* aspectMask, one of which has to be depth and the other has to be
|
|
* stencil.
|
|
*
|
|
* Only attempt to merge consecutive regions, ignore the case of merging
|
|
* non-consecutive regions.
|
|
*/
|
|
if (can_merge_ds && i != (pCopyImageInfo->regionCount - 1)) {
|
|
const bool ret =
|
|
pvr_can_merge_ds_regions(&pCopyImageInfo->pRegions[i],
|
|
&pCopyImageInfo->pRegions[i + 1]);
|
|
if (ret) {
|
|
VkImageCopy2 region = pCopyImageInfo->pRegions[i];
|
|
|
|
region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT |
|
|
VK_IMAGE_ASPECT_STENCIL_BIT;
|
|
region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT |
|
|
VK_IMAGE_ASPECT_STENCIL_BIT;
|
|
|
|
result = pvr_copy_or_resolve_color_image_region(cmd_buffer,
|
|
src,
|
|
dst,
|
|
®ion);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
|
|
/* Skip the next region as it has been processed with the last
|
|
* region.
|
|
*/
|
|
i++;
|
|
|
|
continue;
|
|
}
|
|
}
|
|
|
|
result =
|
|
pvr_copy_or_resolve_color_image_region(cmd_buffer,
|
|
src,
|
|
dst,
|
|
&pCopyImageInfo->pRegions[i]);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
}
|
|
|
|
static VkResult
|
|
pvr_copy_buffer_to_image_region(struct pvr_cmd_buffer *cmd_buffer,
|
|
const struct pvr_buffer *buffer,
|
|
const struct pvr_image *image,
|
|
const VkBufferImageCopy2 *region)
|
|
{
|
|
const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask;
|
|
uint32_t row_length_in_texels;
|
|
uint32_t buffer_slice_size;
|
|
uint32_t buffer_layer_size;
|
|
uint32_t height_in_blks;
|
|
uint32_t row_length;
|
|
VkFormat src_format;
|
|
VkFormat dst_format;
|
|
uint32_t flags = 0;
|
|
|
|
if (vk_format_has_depth(image->vk.format) &&
|
|
vk_format_has_stencil(image->vk.format)) {
|
|
flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE;
|
|
|
|
if ((aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0) {
|
|
src_format = vk_format_stencil_only(image->vk.format);
|
|
} else {
|
|
src_format = vk_format_depth_only(image->vk.format);
|
|
flags |= PVR_TRANSFER_CMD_FLAGS_PICKD;
|
|
}
|
|
|
|
dst_format = image->vk.format;
|
|
} else {
|
|
src_format = pvr_get_raw_copy_format(image->vk.format);
|
|
dst_format = src_format;
|
|
}
|
|
|
|
if (region->bufferRowLength == 0)
|
|
row_length_in_texels = region->imageExtent.width;
|
|
else
|
|
row_length_in_texels = region->bufferRowLength;
|
|
|
|
if (region->bufferImageHeight == 0)
|
|
height_in_blks = region->imageExtent.height;
|
|
else
|
|
height_in_blks = region->bufferImageHeight;
|
|
|
|
row_length = row_length_in_texels * vk_format_get_blocksize(src_format);
|
|
|
|
buffer_slice_size = height_in_blks * row_length;
|
|
buffer_layer_size = buffer_slice_size * region->imageExtent.depth;
|
|
|
|
for (uint32_t i = 0; i < region->imageExtent.depth; i++) {
|
|
const uint32_t depth = i + (uint32_t)region->imageOffset.z;
|
|
|
|
for (uint32_t j = 0; j < region->imageSubresource.layerCount; j++) {
|
|
const VkDeviceSize buffer_offset = region->bufferOffset +
|
|
(j * buffer_layer_size) +
|
|
(i * buffer_slice_size);
|
|
struct pvr_transfer_cmd *transfer_cmd;
|
|
VkResult result;
|
|
|
|
transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
|
|
if (!transfer_cmd)
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
transfer_cmd->flags = flags;
|
|
|
|
pvr_setup_buffer_surface(
|
|
&transfer_cmd->sources[0].surface,
|
|
&transfer_cmd->sources[0].mappings[0].src_rect,
|
|
buffer->dev_addr,
|
|
buffer_offset,
|
|
src_format,
|
|
region->imageExtent.width,
|
|
region->imageExtent.height,
|
|
row_length_in_texels);
|
|
|
|
transfer_cmd->sources[0].surface.depth = 1;
|
|
transfer_cmd->source_count = 1;
|
|
|
|
pvr_setup_transfer_surface(cmd_buffer->device,
|
|
&transfer_cmd->dst,
|
|
&transfer_cmd->scissor,
|
|
image,
|
|
region->imageSubresource.baseArrayLayer + j,
|
|
region->imageSubresource.mipLevel,
|
|
®ion->imageOffset,
|
|
®ion->imageExtent,
|
|
depth,
|
|
dst_format,
|
|
region->imageSubresource.aspectMask);
|
|
|
|
transfer_cmd->sources[0].mappings[0].dst_rect = transfer_cmd->scissor;
|
|
transfer_cmd->sources[0].mapping_count++;
|
|
|
|
result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
|
|
if (result != VK_SUCCESS) {
|
|
vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
|
|
return result;
|
|
}
|
|
}
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
void pvr_CmdCopyBufferToImage2KHR(
|
|
VkCommandBuffer commandBuffer,
|
|
const VkCopyBufferToImageInfo2KHR *pCopyBufferToImageInfo)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferToImageInfo->srcBuffer);
|
|
PVR_FROM_HANDLE(pvr_image, dst, pCopyBufferToImageInfo->dstImage);
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
|
|
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
|
|
|
for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++) {
|
|
const VkResult result =
|
|
pvr_copy_buffer_to_image_region(cmd_buffer,
|
|
src,
|
|
dst,
|
|
&pCopyBufferToImageInfo->pRegions[i]);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
}
|
|
|
|
static VkResult
|
|
pvr_copy_image_to_buffer_region(struct pvr_device *device,
|
|
struct pvr_cmd_buffer *cmd_buffer,
|
|
const struct pvr_image *image,
|
|
const struct pvr_buffer *buffer,
|
|
const VkBufferImageCopy2 *region)
|
|
{
|
|
const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask;
|
|
VkFormat image_format = pvr_get_copy_format(image->vk.format);
|
|
struct pvr_transfer_cmd_surface dst_surface = { 0 };
|
|
VkImageSubresource sub_resource;
|
|
uint32_t buffer_image_height;
|
|
uint32_t buffer_row_length;
|
|
uint32_t buffer_slice_size;
|
|
uint32_t max_array_layers;
|
|
VkRect2D dst_rect = { 0 };
|
|
uint32_t max_depth_slice;
|
|
VkSubresourceLayout info;
|
|
VkFormat dst_format;
|
|
|
|
/* Only images with VK_SAMPLE_COUNT_1_BIT can be copied to buffer. */
|
|
assert(image->vk.samples == 1);
|
|
|
|
/* Only one aspect bit should be set per region. */
|
|
assert(util_is_power_of_two_nonzero(aspect_mask));
|
|
|
|
/* Color and depth aspect copies can be done using an appropriate raw format.
|
|
*/
|
|
if (aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT)) {
|
|
image_format = pvr_get_raw_copy_format(image_format);
|
|
dst_format = image_format;
|
|
} else if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
|
|
/* From the Vulkan spec:
|
|
*
|
|
* Data copied to or from the stencil aspect of any depth/stencil
|
|
* format is tightly packed with one VK_FORMAT_S8_UINT value per texel.
|
|
*/
|
|
dst_format = VK_FORMAT_S8_UINT;
|
|
} else {
|
|
/* YUV Planes require specific formats. */
|
|
dst_format = image_format;
|
|
}
|
|
|
|
if (region->bufferRowLength == 0)
|
|
buffer_row_length = region->imageExtent.width;
|
|
else
|
|
buffer_row_length = region->bufferRowLength;
|
|
|
|
if (region->bufferImageHeight == 0)
|
|
buffer_image_height = region->imageExtent.height;
|
|
else
|
|
buffer_image_height = region->bufferImageHeight;
|
|
|
|
max_array_layers =
|
|
region->imageSubresource.baseArrayLayer +
|
|
vk_image_subresource_layer_count(&image->vk, ®ion->imageSubresource);
|
|
|
|
buffer_slice_size = buffer_image_height * buffer_row_length *
|
|
vk_format_get_blocksize(dst_format);
|
|
|
|
max_depth_slice = region->imageExtent.depth + region->imageOffset.z;
|
|
|
|
pvr_setup_buffer_surface(&dst_surface,
|
|
&dst_rect,
|
|
buffer->dev_addr,
|
|
region->bufferOffset,
|
|
dst_format,
|
|
buffer_row_length,
|
|
buffer_image_height,
|
|
buffer_row_length);
|
|
|
|
dst_rect.extent.width = region->imageExtent.width;
|
|
dst_rect.extent.height = region->imageExtent.height;
|
|
|
|
sub_resource = (VkImageSubresource){
|
|
.aspectMask = region->imageSubresource.aspectMask,
|
|
.mipLevel = region->imageSubresource.mipLevel,
|
|
.arrayLayer = region->imageSubresource.baseArrayLayer,
|
|
};
|
|
|
|
pvr_get_image_subresource_layout(image, &sub_resource, &info);
|
|
|
|
for (uint32_t i = region->imageSubresource.baseArrayLayer;
|
|
i < max_array_layers;
|
|
i++) {
|
|
struct pvr_transfer_cmd_surface src_surface = { 0 };
|
|
VkRect2D src_rect = { 0 };
|
|
|
|
/* Note: Set the depth to the initial depth offset, the memory address (or
|
|
* the z_position) for the depth slice will be incremented manually in the
|
|
* loop below.
|
|
*/
|
|
pvr_setup_transfer_surface(device,
|
|
&src_surface,
|
|
&src_rect,
|
|
image,
|
|
i,
|
|
region->imageSubresource.mipLevel,
|
|
®ion->imageOffset,
|
|
®ion->imageExtent,
|
|
region->imageOffset.z,
|
|
image_format,
|
|
region->imageSubresource.aspectMask);
|
|
|
|
for (uint32_t j = region->imageOffset.z; j < max_depth_slice; j++) {
|
|
struct pvr_transfer_cmd *transfer_cmd;
|
|
VkResult result;
|
|
|
|
/* TODO: See if we can allocate all the transfer cmds in one go. */
|
|
transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
|
|
if (!transfer_cmd)
|
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
transfer_cmd->sources[0].mappings[0].src_rect = src_rect;
|
|
transfer_cmd->sources[0].mappings[0].dst_rect = dst_rect;
|
|
transfer_cmd->sources[0].mapping_count++;
|
|
|
|
transfer_cmd->sources[0].surface = src_surface;
|
|
transfer_cmd->source_count = 1;
|
|
|
|
transfer_cmd->dst = dst_surface;
|
|
transfer_cmd->scissor = dst_rect;
|
|
|
|
result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
|
|
if (result != VK_SUCCESS) {
|
|
vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
|
|
return result;
|
|
}
|
|
|
|
transfer_cmd->dst.dev_addr.addr += buffer_slice_size;
|
|
|
|
if (src_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
|
|
src_surface.z_position += 1.0f;
|
|
else
|
|
src_surface.dev_addr.addr += info.depthPitch;
|
|
}
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
void pvr_CmdCopyImageToBuffer2(
|
|
VkCommandBuffer commandBuffer,
|
|
const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_buffer, dst, pCopyImageToBufferInfo->dstBuffer);
|
|
PVR_FROM_HANDLE(pvr_image, src, pCopyImageToBufferInfo->srcImage);
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
|
|
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
|
|
|
for (uint32_t i = 0U; i < pCopyImageToBufferInfo->regionCount; i++) {
|
|
const VkBufferImageCopy2 *region = &pCopyImageToBufferInfo->pRegions[i];
|
|
const VkResult result =
|
|
pvr_copy_image_to_buffer_region(cmd_buffer->device,
|
|
cmd_buffer,
|
|
src,
|
|
dst,
|
|
region);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
}
|
|
|
|
static void pvr_calc_mip_level_extents(const struct pvr_image *image,
|
|
uint16_t mip_level,
|
|
VkExtent3D *extent_out)
|
|
{
|
|
/* 3D textures are clamped to 4x4x4. */
|
|
const uint32_t clamp = (image->vk.image_type == VK_IMAGE_TYPE_3D) ? 4 : 1;
|
|
const VkExtent3D *extent = &image->vk.extent;
|
|
|
|
extent_out->width = MAX2(extent->width >> mip_level, clamp);
|
|
extent_out->height = MAX2(extent->height >> mip_level, clamp);
|
|
extent_out->depth = MAX2(extent->depth >> mip_level, clamp);
|
|
}
|
|
|
|
static VkResult pvr_clear_image_range(struct pvr_cmd_buffer *cmd_buffer,
|
|
const struct pvr_image *image,
|
|
const VkClearColorValue *pColor,
|
|
const VkImageSubresourceRange *psRange,
|
|
uint32_t flags)
|
|
{
|
|
const uint32_t layer_count =
|
|
vk_image_subresource_layer_count(&image->vk, psRange);
|
|
const uint32_t max_layers = psRange->baseArrayLayer + layer_count;
|
|
VkFormat format = image->vk.format;
|
|
const VkOffset3D offset = { 0 };
|
|
VkExtent3D mip_extent;
|
|
|
|
assert((psRange->baseArrayLayer + layer_count) <= image->vk.array_layers);
|
|
|
|
for (uint32_t layer = psRange->baseArrayLayer; layer < max_layers; layer++) {
|
|
const uint32_t level_count =
|
|
vk_image_subresource_level_count(&image->vk, psRange);
|
|
const uint32_t max_level = psRange->baseMipLevel + level_count;
|
|
|
|
assert((psRange->baseMipLevel + level_count) <= image->vk.mip_levels);
|
|
|
|
for (uint32_t level = psRange->baseMipLevel; level < max_level; level++) {
|
|
pvr_calc_mip_level_extents(image, level, &mip_extent);
|
|
|
|
for (uint32_t depth = 0; depth < mip_extent.depth; depth++) {
|
|
struct pvr_transfer_cmd *transfer_cmd;
|
|
VkResult result;
|
|
|
|
transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
|
|
if (!transfer_cmd)
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
transfer_cmd->flags |= flags;
|
|
transfer_cmd->flags |= PVR_TRANSFER_CMD_FLAGS_FILL;
|
|
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(transfer_cmd->clear_color); i++)
|
|
transfer_cmd->clear_color[i].ui = pColor->uint32[i];
|
|
|
|
pvr_setup_transfer_surface(cmd_buffer->device,
|
|
&transfer_cmd->dst,
|
|
&transfer_cmd->scissor,
|
|
image,
|
|
layer,
|
|
level,
|
|
&offset,
|
|
&mip_extent,
|
|
depth,
|
|
format,
|
|
psRange->aspectMask);
|
|
|
|
result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
|
|
if (result != VK_SUCCESS) {
|
|
vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
|
|
return result;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
void pvr_CmdClearColorImage(VkCommandBuffer commandBuffer,
|
|
VkImage _image,
|
|
VkImageLayout imageLayout,
|
|
const VkClearColorValue *pColor,
|
|
uint32_t rangeCount,
|
|
const VkImageSubresourceRange *pRanges)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
PVR_FROM_HANDLE(pvr_image, image, _image);
|
|
|
|
for (uint32_t i = 0; i < rangeCount; i++) {
|
|
const VkResult result =
|
|
pvr_clear_image_range(cmd_buffer, image, pColor, &pRanges[i], 0);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
}
|
|
|
|
void pvr_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
|
|
VkImage _image,
|
|
VkImageLayout imageLayout,
|
|
const VkClearDepthStencilValue *pDepthStencil,
|
|
uint32_t rangeCount,
|
|
const VkImageSubresourceRange *pRanges)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
PVR_FROM_HANDLE(pvr_image, image, _image);
|
|
|
|
for (uint32_t i = 0; i < rangeCount; i++) {
|
|
const VkImageAspectFlags ds_aspect = VK_IMAGE_ASPECT_DEPTH_BIT |
|
|
VK_IMAGE_ASPECT_STENCIL_BIT;
|
|
VkClearColorValue clear_ds = { 0 };
|
|
uint32_t flags = 0U;
|
|
VkResult result;
|
|
|
|
if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
|
|
pRanges[i].aspectMask != ds_aspect) {
|
|
/* A depth or stencil blit to a packed_depth_stencil requires a merge
|
|
* operation.
|
|
*/
|
|
flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE;
|
|
|
|
if (pRanges[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
|
flags |= PVR_TRANSFER_CMD_FLAGS_PICKD;
|
|
}
|
|
|
|
clear_ds.float32[0] = pDepthStencil->depth;
|
|
clear_ds.uint32[1] = pDepthStencil->stencil;
|
|
|
|
result =
|
|
pvr_clear_image_range(cmd_buffer, image, &clear_ds, pRanges + i, flags);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
}
|
|
|
|
static VkResult pvr_cmd_copy_buffer_region(struct pvr_cmd_buffer *cmd_buffer,
|
|
pvr_dev_addr_t src_addr,
|
|
VkDeviceSize src_offset,
|
|
pvr_dev_addr_t dst_addr,
|
|
VkDeviceSize dst_offset,
|
|
VkDeviceSize size,
|
|
uint32_t fill_data,
|
|
bool is_fill)
|
|
{
|
|
VkDeviceSize offset = 0;
|
|
|
|
while (offset < size) {
|
|
const VkDeviceSize remaining_size = size - offset;
|
|
struct pvr_transfer_cmd *transfer_cmd;
|
|
uint32_t texel_width;
|
|
VkDeviceSize texels;
|
|
VkFormat vk_format;
|
|
VkResult result;
|
|
uint32_t height;
|
|
uint32_t width;
|
|
|
|
if (is_fill) {
|
|
vk_format = VK_FORMAT_R32_UINT;
|
|
texel_width = 4U;
|
|
} else if (remaining_size >= 16U) {
|
|
vk_format = VK_FORMAT_R32G32B32A32_UINT;
|
|
texel_width = 16U;
|
|
} else if (remaining_size >= 4U) {
|
|
vk_format = VK_FORMAT_R32_UINT;
|
|
texel_width = 4U;
|
|
} else {
|
|
vk_format = VK_FORMAT_R8_UINT;
|
|
texel_width = 1U;
|
|
}
|
|
|
|
texels = remaining_size / texel_width;
|
|
|
|
/* Try to do max-width rects, fall back to a 1-height rect for the
|
|
* remainder.
|
|
*/
|
|
if (texels > PVR_MAX_TRANSFER_SIZE_IN_TEXELS) {
|
|
width = PVR_MAX_TRANSFER_SIZE_IN_TEXELS;
|
|
height = texels / PVR_MAX_TRANSFER_SIZE_IN_TEXELS;
|
|
height = MIN2(height, PVR_MAX_TRANSFER_SIZE_IN_TEXELS);
|
|
} else {
|
|
width = texels;
|
|
height = 1;
|
|
}
|
|
|
|
transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
|
|
if (!transfer_cmd)
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
if (!is_fill) {
|
|
pvr_setup_buffer_surface(
|
|
&transfer_cmd->sources[0].surface,
|
|
&transfer_cmd->sources[0].mappings[0].src_rect,
|
|
src_addr,
|
|
offset + src_offset,
|
|
vk_format,
|
|
width,
|
|
height,
|
|
width);
|
|
transfer_cmd->source_count = 1;
|
|
} else {
|
|
transfer_cmd->flags |= PVR_TRANSFER_CMD_FLAGS_FILL;
|
|
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(transfer_cmd->clear_color); i++)
|
|
transfer_cmd->clear_color[i].ui = fill_data;
|
|
}
|
|
|
|
pvr_setup_buffer_surface(&transfer_cmd->dst,
|
|
&transfer_cmd->scissor,
|
|
dst_addr,
|
|
offset + dst_offset,
|
|
vk_format,
|
|
width,
|
|
height,
|
|
width);
|
|
|
|
if (transfer_cmd->source_count > 0) {
|
|
transfer_cmd->sources[0].mappings[0].dst_rect = transfer_cmd->scissor;
|
|
|
|
transfer_cmd->sources[0].mapping_count++;
|
|
}
|
|
|
|
result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
|
|
if (result != VK_SUCCESS) {
|
|
vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
|
|
return result;
|
|
}
|
|
|
|
offset += width * height * texel_width;
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
void pvr_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
|
|
VkBuffer dstBuffer,
|
|
VkDeviceSize dstOffset,
|
|
VkDeviceSize dataSize,
|
|
const void *pData)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
PVR_FROM_HANDLE(pvr_buffer, dst, dstBuffer);
|
|
struct pvr_bo *pvr_bo;
|
|
VkResult result;
|
|
|
|
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
|
|
|
result = pvr_cmd_buffer_upload_general(cmd_buffer, pData, dataSize, &pvr_bo);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
|
|
pvr_cmd_copy_buffer_region(cmd_buffer,
|
|
pvr_bo->vma->dev_addr,
|
|
0,
|
|
dst->dev_addr,
|
|
dstOffset,
|
|
dataSize,
|
|
0U,
|
|
false);
|
|
}
|
|
|
|
void pvr_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,
|
|
const VkCopyBufferInfo2 *pCopyBufferInfo)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferInfo->srcBuffer);
|
|
PVR_FROM_HANDLE(pvr_buffer, dst, pCopyBufferInfo->dstBuffer);
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
|
|
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
|
|
|
for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
|
|
const VkResult result =
|
|
pvr_cmd_copy_buffer_region(cmd_buffer,
|
|
src->dev_addr,
|
|
pCopyBufferInfo->pRegions[i].srcOffset,
|
|
dst->dev_addr,
|
|
pCopyBufferInfo->pRegions[i].dstOffset,
|
|
pCopyBufferInfo->pRegions[i].size,
|
|
0U,
|
|
false);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
}
|
|
|
|
void pvr_CmdFillBuffer(VkCommandBuffer commandBuffer,
|
|
VkBuffer dstBuffer,
|
|
VkDeviceSize dstOffset,
|
|
VkDeviceSize fillSize,
|
|
uint32_t data)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
PVR_FROM_HANDLE(pvr_buffer, dst, dstBuffer);
|
|
|
|
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
|
|
|
fillSize = vk_buffer_range(&dst->vk, dstOffset, fillSize);
|
|
|
|
/* From the Vulkan spec:
|
|
*
|
|
* "size is the number of bytes to fill, and must be either a multiple
|
|
* of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
|
|
* the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
|
|
* buffer is not a multiple of 4, then the nearest smaller multiple is
|
|
* used."
|
|
*/
|
|
fillSize &= ~3ULL;
|
|
|
|
pvr_cmd_copy_buffer_region(cmd_buffer,
|
|
PVR_DEV_ADDR_INVALID,
|
|
0,
|
|
dst->dev_addr,
|
|
dstOffset,
|
|
fillSize,
|
|
data,
|
|
true);
|
|
}
|
|
|
|
/**
|
|
* \brief Returns the maximum number of layers to clear starting from base_layer
|
|
* that contain or match the target rectangle.
|
|
*
|
|
* \param[in] target_rect The region which the clear should contain or
|
|
* match.
|
|
* \param[in] base_layer The layer index to start at.
|
|
* \param[in] clear_rect_count Amount of clear_rects
|
|
* \param[in] clear_rects Array of clear rects.
|
|
*
|
|
* \return Max number of layers that cover or match the target region.
|
|
*/
|
|
static uint32_t
|
|
pvr_get_max_layers_covering_target(VkRect2D target_rect,
|
|
uint32_t base_layer,
|
|
uint32_t clear_rect_count,
|
|
const VkClearRect *clear_rects)
|
|
{
|
|
const int32_t target_x0 = target_rect.offset.x;
|
|
const int32_t target_x1 = target_x0 + (int32_t)target_rect.extent.width;
|
|
const int32_t target_y0 = target_rect.offset.y;
|
|
const int32_t target_y1 = target_y0 + (int32_t)target_rect.extent.height;
|
|
|
|
uint32_t layer_count = 0;
|
|
|
|
assert((int64_t)target_x0 + (int64_t)target_rect.extent.width <= INT32_MAX);
|
|
assert((int64_t)target_y0 + (int64_t)target_rect.extent.height <= INT32_MAX);
|
|
|
|
for (uint32_t i = 0; i < clear_rect_count; i++) {
|
|
const VkClearRect *clear_rect = &clear_rects[i];
|
|
const uint32_t max_layer =
|
|
clear_rect->baseArrayLayer + clear_rect->layerCount;
|
|
bool target_is_covered;
|
|
int32_t x0, x1;
|
|
int32_t y0, y1;
|
|
|
|
if (clear_rect->baseArrayLayer == 0)
|
|
continue;
|
|
|
|
assert((uint64_t)clear_rect->baseArrayLayer + clear_rect->layerCount <=
|
|
UINT32_MAX);
|
|
|
|
/* Check for layer intersection. */
|
|
if (clear_rect->baseArrayLayer > base_layer || max_layer <= base_layer)
|
|
continue;
|
|
|
|
x0 = clear_rect->rect.offset.x;
|
|
x1 = x0 + (int32_t)clear_rect->rect.extent.width;
|
|
y0 = clear_rect->rect.offset.y;
|
|
y1 = y0 + (int32_t)clear_rect->rect.extent.height;
|
|
|
|
assert((int64_t)x0 + (int64_t)clear_rect->rect.extent.width <= INT32_MAX);
|
|
assert((int64_t)y0 + (int64_t)clear_rect->rect.extent.height <=
|
|
INT32_MAX);
|
|
|
|
target_is_covered = x0 <= target_x0 && x1 >= target_x1;
|
|
target_is_covered &= y0 <= target_y0 && y1 >= target_y1;
|
|
|
|
if (target_is_covered)
|
|
layer_count = MAX2(layer_count, max_layer - base_layer);
|
|
}
|
|
|
|
return layer_count;
|
|
}
|
|
|
|
/* Return true if vertex shader is required to output render target id to pick
|
|
* the texture array layer.
|
|
*/
|
|
static inline bool
|
|
pvr_clear_needs_rt_id_output(struct pvr_device_info *dev_info,
|
|
uint32_t rect_count,
|
|
const VkClearRect *rects)
|
|
{
|
|
if (!PVR_HAS_FEATURE(dev_info, gs_rta_support))
|
|
return false;
|
|
|
|
for (uint32_t i = 0; i < rect_count; i++) {
|
|
if (rects[i].baseArrayLayer != 0 || rects[i].layerCount > 1)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static VkResult pvr_clear_color_attachment_static_create_consts_buffer(
|
|
struct pvr_cmd_buffer *cmd_buffer,
|
|
const struct pvr_shader_factory_info *shader_info,
|
|
const uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],
|
|
ASSERTED bool uses_tile_buffer,
|
|
uint32_t tile_buffer_idx,
|
|
struct pvr_bo **const const_shareds_buffer_out)
|
|
{
|
|
struct pvr_device *device = cmd_buffer->device;
|
|
struct pvr_bo *const_shareds_buffer;
|
|
struct pvr_bo *tile_buffer;
|
|
uint64_t tile_dev_addr;
|
|
uint32_t *buffer;
|
|
VkResult result;
|
|
|
|
/* TODO: This doesn't need to be aligned to slc size. Alignment to 4 is fine.
|
|
* Change pvr_cmd_buffer_alloc_mem() to take in an alignment?
|
|
*/
|
|
result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
|
|
device->heaps.general_heap,
|
|
shader_info->const_shared_regs,
|
|
PVR_BO_ALLOC_FLAG_CPU_MAPPED,
|
|
&const_shareds_buffer);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
buffer = const_shareds_buffer->bo->map;
|
|
|
|
for (uint32_t i = 0; i < PVR_CLEAR_ATTACHMENT_CONST_COUNT; i++) {
|
|
uint32_t dest_idx = shader_info->driver_const_location_map[i];
|
|
|
|
if (dest_idx == PVR_CLEAR_ATTACHMENT_DEST_ID_UNUSED)
|
|
continue;
|
|
|
|
assert(dest_idx < shader_info->const_shared_regs);
|
|
|
|
switch (i) {
|
|
case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_0:
|
|
case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_1:
|
|
case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_2:
|
|
case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_3:
|
|
buffer[dest_idx] = clear_color[i];
|
|
break;
|
|
|
|
case PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_UPPER:
|
|
assert(uses_tile_buffer);
|
|
tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx];
|
|
tile_dev_addr = tile_buffer->vma->dev_addr.addr;
|
|
buffer[dest_idx] = (uint32_t)(tile_dev_addr >> 32);
|
|
break;
|
|
|
|
case PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_LOWER:
|
|
assert(uses_tile_buffer);
|
|
tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx];
|
|
tile_dev_addr = tile_buffer->vma->dev_addr.addr;
|
|
buffer[dest_idx] = (uint32_t)tile_dev_addr;
|
|
break;
|
|
|
|
default:
|
|
unreachable("Unsupported clear attachment const type.");
|
|
}
|
|
}
|
|
|
|
for (uint32_t i = 0; i < shader_info->num_static_const; i++) {
|
|
const struct pvr_static_buffer *static_buff =
|
|
&shader_info->static_const_buffer[i];
|
|
|
|
assert(static_buff->dst_idx < shader_info->const_shared_regs);
|
|
|
|
buffer[static_buff->dst_idx] = static_buff->value;
|
|
}
|
|
|
|
pvr_bo_cpu_unmap(device, const_shareds_buffer);
|
|
|
|
*const_shareds_buffer_out = const_shareds_buffer;
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult pvr_clear_color_attachment_static(
|
|
struct pvr_cmd_buffer *cmd_buffer,
|
|
const struct usc_mrt_resource *mrt_resource,
|
|
VkFormat format,
|
|
uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],
|
|
uint32_t template_idx,
|
|
uint32_t stencil,
|
|
bool vs_has_rt_id_output)
|
|
{
|
|
struct pvr_device *device = cmd_buffer->device;
|
|
ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
|
ASSERTED const bool has_eight_output_registers =
|
|
PVR_HAS_FEATURE(dev_info, eight_output_registers);
|
|
const struct pvr_device_static_clear_state *dev_clear_state =
|
|
&device->static_clear_state;
|
|
const bool uses_tile_buffer = mrt_resource->type ==
|
|
USC_MRT_RESOURCE_TYPE_MEMORY;
|
|
const struct pvr_pds_clear_attachment_program_info *clear_attachment_program;
|
|
struct pvr_pds_pixel_shader_sa_program texture_program;
|
|
uint32_t pds_state[PVR_STATIC_CLEAR_PDS_STATE_COUNT];
|
|
const struct pvr_shader_factory_info *shader_info;
|
|
struct pvr_static_clear_ppp_template template;
|
|
struct pvr_bo *pds_texture_program_bo;
|
|
struct pvr_bo *const_shareds_buffer;
|
|
uint64_t pds_texture_program_addr;
|
|
uint32_t tile_buffer_idx = 0;
|
|
uint32_t out_reg_count;
|
|
uint32_t output_offset;
|
|
struct pvr_bo *pvr_bo;
|
|
uint32_t program_idx;
|
|
uint32_t *buffer;
|
|
VkResult result;
|
|
|
|
out_reg_count =
|
|
DIV_ROUND_UP(pvr_get_pbe_accum_format_size_in_bytes(format), 4U);
|
|
|
|
if (uses_tile_buffer) {
|
|
tile_buffer_idx = mrt_resource->mem.tile_buffer;
|
|
output_offset = mrt_resource->mem.offset_dw;
|
|
} else {
|
|
output_offset = mrt_resource->reg.offset;
|
|
}
|
|
|
|
assert(has_eight_output_registers || out_reg_count + output_offset <= 4);
|
|
|
|
program_idx = pvr_get_clear_attachment_program_index(out_reg_count,
|
|
output_offset,
|
|
uses_tile_buffer);
|
|
|
|
shader_info = clear_attachment_collection[program_idx].info;
|
|
|
|
result = pvr_clear_color_attachment_static_create_consts_buffer(
|
|
cmd_buffer,
|
|
shader_info,
|
|
clear_color,
|
|
uses_tile_buffer,
|
|
tile_buffer_idx,
|
|
&const_shareds_buffer);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
/* clang-format off */
|
|
texture_program = (struct pvr_pds_pixel_shader_sa_program){
|
|
.num_texture_dma_kicks = 1,
|
|
.texture_dma_address = {
|
|
[0] = const_shareds_buffer->vma->dev_addr.addr,
|
|
}
|
|
};
|
|
/* clang-format on */
|
|
|
|
pvr_csb_pack (&texture_program.texture_dma_control[0],
|
|
PDSINST_DOUT_FIELDS_DOUTD_SRC1,
|
|
doutd_src1) {
|
|
doutd_src1.dest = PVRX(PDSINST_DOUTD_DEST_COMMON_STORE);
|
|
doutd_src1.bsize = shader_info->const_shared_regs;
|
|
}
|
|
|
|
clear_attachment_program =
|
|
&dev_clear_state->pds_clear_attachment_program_info[program_idx];
|
|
|
|
/* TODO: This doesn't need to be aligned to slc size. Alignment to 4 is fine.
|
|
* Change pvr_cmd_buffer_alloc_mem() to take in an alignment?
|
|
*/
|
|
result = pvr_cmd_buffer_alloc_mem(
|
|
cmd_buffer,
|
|
device->heaps.pds_heap,
|
|
clear_attachment_program->texture_program_data_size,
|
|
PVR_BO_ALLOC_FLAG_CPU_MAPPED,
|
|
&pds_texture_program_bo);
|
|
if (result != VK_SUCCESS) {
|
|
list_del(&const_shareds_buffer->link);
|
|
pvr_bo_free(device, const_shareds_buffer);
|
|
|
|
return result;
|
|
}
|
|
|
|
buffer = pds_texture_program_bo->bo->map;
|
|
pds_texture_program_addr = pds_texture_program_bo->vma->dev_addr.addr -
|
|
device->heaps.pds_heap->base_addr.addr;
|
|
|
|
pvr_pds_generate_pixel_shader_sa_texture_state_data(
|
|
&texture_program,
|
|
buffer,
|
|
&device->pdevice->dev_info);
|
|
|
|
pvr_bo_cpu_unmap(device, pds_texture_program_bo);
|
|
|
|
pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SHADERBASE],
|
|
TA_STATE_PDS_SHADERBASE,
|
|
shaderbase) {
|
|
shaderbase.addr = clear_attachment_program->pixel_program_offset;
|
|
}
|
|
|
|
pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_TEXUNICODEBASE],
|
|
TA_STATE_PDS_TEXUNICODEBASE,
|
|
texunicodebase) {
|
|
texunicodebase.addr = clear_attachment_program->texture_program_offset;
|
|
}
|
|
|
|
pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SIZEINFO1],
|
|
TA_STATE_PDS_SIZEINFO1,
|
|
sizeinfo1) {
|
|
sizeinfo1.pds_texturestatesize = DIV_ROUND_UP(
|
|
clear_attachment_program->texture_program_data_size,
|
|
PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEXTURESTATESIZE_UNIT_SIZE));
|
|
|
|
sizeinfo1.pds_tempsize =
|
|
DIV_ROUND_UP(clear_attachment_program->texture_program_pds_temps_count,
|
|
PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE));
|
|
}
|
|
|
|
pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SIZEINFO2],
|
|
TA_STATE_PDS_SIZEINFO2,
|
|
sizeinfo2) {
|
|
sizeinfo2.usc_sharedsize =
|
|
DIV_ROUND_UP(shader_info->const_shared_regs,
|
|
PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));
|
|
}
|
|
|
|
/* Dummy coefficient loading program. */
|
|
pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_VARYINGBASE] = 0;
|
|
|
|
pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_TEXTUREDATABASE],
|
|
TA_STATE_PDS_TEXTUREDATABASE,
|
|
texturedatabase) {
|
|
texturedatabase.addr = PVR_DEV_ADDR(pds_texture_program_addr);
|
|
}
|
|
|
|
assert(template_idx < PVR_STATIC_CLEAR_VARIANT_COUNT);
|
|
template =
|
|
cmd_buffer->device->static_clear_state.ppp_templates[template_idx];
|
|
|
|
template.config.pds_state = &pds_state;
|
|
|
|
template.config.ispctl.upass =
|
|
cmd_buffer->state.render_pass_info.isp_userpass;
|
|
|
|
if (template_idx & VK_IMAGE_ASPECT_STENCIL_BIT)
|
|
template.config.ispa.sref = stencil;
|
|
|
|
if (vs_has_rt_id_output) {
|
|
template.config.output_sel.rhw_pres = true;
|
|
template.config.output_sel.render_tgt_pres = true;
|
|
template.config.output_sel.vtxsize = 4 + 1;
|
|
}
|
|
|
|
result = pvr_emit_ppp_from_template(
|
|
&cmd_buffer->state.current_sub_cmd->gfx.control_stream,
|
|
&template,
|
|
&pvr_bo);
|
|
if (result != VK_SUCCESS) {
|
|
list_del(&pds_texture_program_bo->link);
|
|
pvr_bo_free(device, pds_texture_program_bo);
|
|
|
|
list_del(&const_shareds_buffer->link);
|
|
pvr_bo_free(device, const_shareds_buffer);
|
|
|
|
cmd_buffer->state.status = result;
|
|
return result;
|
|
}
|
|
|
|
list_add(&pvr_bo->link, &cmd_buffer->bo_list);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
/**
|
|
* \brief Record a deferred clear operation into the command buffer.
|
|
*
|
|
* Devices which don't have gs_rta_support require extra handling for RTA
|
|
* clears. We setup a list of deferred clear transfer commands which will be
|
|
* processed at the end of the graphics sub command to account for the missing
|
|
* feature.
|
|
*/
|
|
static VkResult pvr_add_deferred_rta_clear(struct pvr_cmd_buffer *cmd_buffer,
|
|
const VkClearAttachment *attachment,
|
|
const VkClearRect *rect)
|
|
{
|
|
struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info;
|
|
struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx;
|
|
const struct pvr_renderpass_hwsetup_render *hw_render =
|
|
&pass_info->pass->hw_setup->renders[sub_cmd->hw_render_idx];
|
|
struct pvr_transfer_cmd *transfer_cmd_list;
|
|
const struct pvr_image_view *image_view;
|
|
const struct pvr_image *image;
|
|
uint32_t base_layer;
|
|
|
|
const VkOffset3D offset = {
|
|
.x = rect->rect.offset.x,
|
|
.y = rect->rect.offset.y,
|
|
.z = 1,
|
|
};
|
|
const VkExtent3D extent = {
|
|
.width = rect->rect.extent.width,
|
|
.height = rect->rect.extent.height,
|
|
.depth = 1,
|
|
};
|
|
|
|
assert(
|
|
!PVR_HAS_FEATURE(&cmd_buffer->device->pdevice->dev_info, gs_rta_support));
|
|
|
|
transfer_cmd_list = util_dynarray_grow(&cmd_buffer->deferred_clears,
|
|
struct pvr_transfer_cmd,
|
|
rect->layerCount);
|
|
if (!transfer_cmd_list) {
|
|
cmd_buffer->state.status =
|
|
vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
return cmd_buffer->state.status;
|
|
}
|
|
|
|
/* From the Vulkan 1.3.229 spec VUID-VkClearAttachment-aspectMask-00019:
|
|
*
|
|
* "If aspectMask includes VK_IMAGE_ASPECT_COLOR_BIT, it must not
|
|
* include VK_IMAGE_ASPECT_DEPTH_BIT or VK_IMAGE_ASPECT_STENCIL_BIT"
|
|
*
|
|
*/
|
|
if (attachment->aspectMask != VK_IMAGE_ASPECT_COLOR_BIT) {
|
|
assert(attachment->aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ||
|
|
attachment->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT ||
|
|
attachment->aspectMask ==
|
|
(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
|
|
|
|
image_view = pass_info->attachments[hw_render->ds_attach_idx];
|
|
} else {
|
|
const struct pvr_renderpass_hwsetup_subpass *hw_pass =
|
|
pvr_get_hw_subpass(pass_info->pass, pass_info->subpass_idx);
|
|
const struct pvr_render_subpass *sub_pass =
|
|
&pass_info->pass->subpasses[hw_pass->index];
|
|
const uint32_t attachment_idx =
|
|
sub_pass->color_attachments[attachment->colorAttachment];
|
|
|
|
assert(attachment->colorAttachment < sub_pass->color_count);
|
|
|
|
image_view = pass_info->attachments[attachment_idx];
|
|
}
|
|
|
|
base_layer = image_view->vk.base_array_layer + rect->baseArrayLayer;
|
|
image = vk_to_pvr_image(image_view->vk.image);
|
|
|
|
for (uint32_t i = 0; i < rect->layerCount; i++) {
|
|
struct pvr_transfer_cmd *transfer_cmd = &transfer_cmd_list[i];
|
|
|
|
/* TODO: Add an init function for when we don't want to use
|
|
* pvr_transfer_cmd_alloc()? And use it here.
|
|
*/
|
|
*transfer_cmd = (struct pvr_transfer_cmd){
|
|
.flags = PVR_TRANSFER_CMD_FLAGS_FILL,
|
|
.source_count = 1,
|
|
.sources = {
|
|
[0] = {
|
|
.filter = PVR_FILTER_POINT,
|
|
.resolve_op = PVR_RESOLVE_BLEND,
|
|
.addr_mode = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE),
|
|
},
|
|
},
|
|
.cmd_buffer = cmd_buffer,
|
|
};
|
|
|
|
if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(transfer_cmd->clear_color); i++) {
|
|
transfer_cmd->clear_color[i].ui =
|
|
attachment->clearValue.color.uint32[i];
|
|
}
|
|
} else {
|
|
transfer_cmd->clear_color[0].f =
|
|
attachment->clearValue.depthStencil.depth;
|
|
transfer_cmd->clear_color[1].ui =
|
|
attachment->clearValue.depthStencil.stencil;
|
|
}
|
|
|
|
pvr_setup_transfer_surface(cmd_buffer->device,
|
|
&transfer_cmd->sources[0].surface,
|
|
&transfer_cmd->scissor,
|
|
image,
|
|
base_layer + i,
|
|
0,
|
|
&offset,
|
|
&extent,
|
|
0.0f,
|
|
image->vk.format,
|
|
attachment->aspectMask);
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer,
|
|
uint32_t attachment_count,
|
|
const VkClearAttachment *attachments,
|
|
uint32_t rect_count,
|
|
const VkClearRect *rects)
|
|
{
|
|
const struct pvr_render_pass *pass = cmd_buffer->state.render_pass_info.pass;
|
|
struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info;
|
|
const struct pvr_renderpass_hwsetup_subpass *hw_pass =
|
|
pvr_get_hw_subpass(pass, pass_info->subpass_idx);
|
|
struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx;
|
|
struct pvr_device_info *dev_info = &cmd_buffer->device->pdevice->dev_info;
|
|
struct pvr_render_subpass *sub_pass = &pass->subpasses[hw_pass->index];
|
|
bool z_replicate = hw_pass->z_replicate != -1;
|
|
uint32_t vs_output_size_in_bytes;
|
|
bool vs_has_rt_id_output;
|
|
|
|
/* TODO: This function can be optimized so that most of the device memory
|
|
* gets allocated together in one go and then filled as needed. There might
|
|
* also be opportunities to reuse pds code and data segments.
|
|
*/
|
|
|
|
assert(cmd_buffer->state.current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
|
|
|
|
pvr_reset_graphics_dirty_state(cmd_buffer, false);
|
|
|
|
/* We'll be emitting to the control stream. */
|
|
sub_cmd->empty_cmd = false;
|
|
|
|
vs_has_rt_id_output =
|
|
pvr_clear_needs_rt_id_output(dev_info, rect_count, rects);
|
|
|
|
/* 4 because we're expecting the USC to output X, Y, Z, and W. */
|
|
vs_output_size_in_bytes = 4 * sizeof(uint32_t);
|
|
if (vs_has_rt_id_output)
|
|
vs_output_size_in_bytes += sizeof(uint32_t);
|
|
|
|
for (uint32_t i = 0; i < attachment_count; i++) {
|
|
const VkClearAttachment *attachment = &attachments[i];
|
|
struct pvr_pds_vertex_shader_program pds_program;
|
|
struct pvr_pds_upload pds_program_upload = { 0 };
|
|
uint64_t current_base_array_layer = ~0;
|
|
VkResult result;
|
|
float depth;
|
|
|
|
if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
|
|
uint32_t packed_clear_color[PVR_CLEAR_COLOR_ARRAY_SIZE];
|
|
const struct usc_mrt_resource *mrt_resource;
|
|
uint32_t global_attachment_idx;
|
|
uint32_t local_attachment_idx;
|
|
VkFormat format;
|
|
|
|
local_attachment_idx = attachment->colorAttachment;
|
|
mrt_resource = &hw_pass->setup.mrt_resources[local_attachment_idx];
|
|
|
|
assert(local_attachment_idx < sub_pass->color_count);
|
|
global_attachment_idx =
|
|
sub_pass->color_attachments[local_attachment_idx];
|
|
|
|
if (global_attachment_idx == VK_ATTACHMENT_UNUSED)
|
|
continue;
|
|
|
|
assert(global_attachment_idx < pass->attachment_count);
|
|
format = pass->attachments[global_attachment_idx].vk_format;
|
|
|
|
assert(format != VK_FORMAT_UNDEFINED);
|
|
|
|
pvr_get_hw_clear_color(format,
|
|
attachment->clearValue.color,
|
|
packed_clear_color);
|
|
|
|
result = pvr_clear_color_attachment_static(cmd_buffer,
|
|
mrt_resource,
|
|
format,
|
|
packed_clear_color,
|
|
VK_IMAGE_ASPECT_COLOR_BIT,
|
|
0,
|
|
vs_has_rt_id_output);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
} else if (z_replicate &&
|
|
attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
|
|
const VkClearColorValue clear_color = {
|
|
.float32 = { [0] = attachment->clearValue.depthStencil.depth, },
|
|
};
|
|
const uint32_t template_idx = attachment->aspectMask |
|
|
VK_IMAGE_ASPECT_COLOR_BIT;
|
|
const uint32_t stencil = attachment->clearValue.depthStencil.stencil;
|
|
uint32_t packed_clear_color[PVR_CLEAR_COLOR_ARRAY_SIZE];
|
|
const struct usc_mrt_resource *mrt_resource;
|
|
|
|
assert(hw_pass->z_replicate > 0);
|
|
mrt_resource = &hw_pass->setup.mrt_resources[hw_pass->z_replicate];
|
|
|
|
pvr_get_hw_clear_color(VK_FORMAT_R32_SFLOAT,
|
|
clear_color,
|
|
packed_clear_color);
|
|
|
|
result = pvr_clear_color_attachment_static(cmd_buffer,
|
|
mrt_resource,
|
|
VK_FORMAT_R32_SFLOAT,
|
|
packed_clear_color,
|
|
template_idx,
|
|
stencil,
|
|
vs_has_rt_id_output);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
} else {
|
|
const uint32_t template_idx = attachment->aspectMask;
|
|
struct pvr_static_clear_ppp_template template;
|
|
struct pvr_bo *pvr_bo;
|
|
|
|
assert(template_idx < PVR_STATIC_CLEAR_VARIANT_COUNT);
|
|
template =
|
|
cmd_buffer->device->static_clear_state.ppp_templates[template_idx];
|
|
|
|
if (attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
|
|
template.config.ispa.sref =
|
|
attachment->clearValue.depthStencil.stencil;
|
|
}
|
|
|
|
if (vs_has_rt_id_output) {
|
|
template.config.output_sel.rhw_pres = true;
|
|
template.config.output_sel.render_tgt_pres = true;
|
|
template.config.output_sel.vtxsize = 4 + 1;
|
|
}
|
|
|
|
result = pvr_emit_ppp_from_template(&sub_cmd->control_stream,
|
|
&template,
|
|
&pvr_bo);
|
|
if (result != VK_SUCCESS) {
|
|
cmd_buffer->state.status = result;
|
|
return;
|
|
}
|
|
|
|
list_add(&pvr_bo->link, &cmd_buffer->bo_list);
|
|
}
|
|
|
|
if (attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
|
depth = attachment->clearValue.depthStencil.depth;
|
|
else
|
|
depth = 1.0f;
|
|
|
|
if (vs_has_rt_id_output) {
|
|
const struct pvr_device_static_clear_state *dev_clear_state =
|
|
&cmd_buffer->device->static_clear_state;
|
|
const struct pvr_bo *multi_layer_vert_bo =
|
|
dev_clear_state->usc_multi_layer_vertex_shader_bo;
|
|
|
|
/* We can't use the device's passthrough pds program since it doesn't
|
|
* have iterate_instance_id enabled. We'll be uploading code sections
|
|
* per each clear rect.
|
|
*/
|
|
|
|
/* TODO: See if we can allocate all the code section memory in one go.
|
|
* We'd need to make sure that changing instance_id_modifier doesn't
|
|
* change the code section size.
|
|
* Also check if we can reuse the same code segment for each rect.
|
|
* Seems like the instance_id_modifier is written into the data section
|
|
* and used by the pds ADD instruction that way instead of it being
|
|
* embedded into the code section.
|
|
*/
|
|
|
|
pvr_pds_clear_rta_vertex_shader_program_init_base(&pds_program,
|
|
multi_layer_vert_bo);
|
|
} else {
|
|
/* We can reuse the device's code section but we'll need to upload data
|
|
* sections so initialize the program.
|
|
*/
|
|
pvr_pds_clear_vertex_shader_program_init_base(
|
|
&pds_program,
|
|
cmd_buffer->device->static_clear_state.usc_vertex_shader_bo);
|
|
|
|
pds_program_upload.code_offset =
|
|
cmd_buffer->device->static_clear_state.pds.code_offset;
|
|
/* TODO: The code size doesn't get used by pvr_clear_vdm_state() maybe
|
|
* let's change its interface to make that clear and not set this?
|
|
*/
|
|
pds_program_upload.code_size =
|
|
cmd_buffer->device->static_clear_state.pds.code_size;
|
|
}
|
|
|
|
for (uint32_t j = 0; j < rect_count; j++) {
|
|
struct pvr_pds_upload pds_program_data_upload;
|
|
const VkClearRect *clear_rect = &rects[j];
|
|
struct pvr_bo *vertices_bo;
|
|
uint32_t *vdm_cs_buffer;
|
|
VkResult result;
|
|
|
|
if (!PVR_HAS_FEATURE(dev_info, gs_rta_support) &&
|
|
(clear_rect->baseArrayLayer != 0 || clear_rect->layerCount > 1)) {
|
|
result =
|
|
pvr_add_deferred_rta_clear(cmd_buffer, attachment, clear_rect);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
|
|
if (clear_rect->baseArrayLayer != 0)
|
|
continue;
|
|
}
|
|
|
|
/* TODO: Allocate all the buffers in one go before the loop, and add
|
|
* support to multi-alloc bo.
|
|
*/
|
|
result = pvr_clear_vertices_upload(cmd_buffer->device,
|
|
&clear_rect->rect,
|
|
depth,
|
|
&vertices_bo);
|
|
if (result != VK_SUCCESS) {
|
|
cmd_buffer->state.status = result;
|
|
return;
|
|
}
|
|
|
|
list_add(&vertices_bo->link, &cmd_buffer->bo_list);
|
|
|
|
if (vs_has_rt_id_output) {
|
|
if (current_base_array_layer != clear_rect->baseArrayLayer) {
|
|
const uint32_t base_array_layer = clear_rect->baseArrayLayer;
|
|
struct pvr_pds_upload pds_program_code_upload;
|
|
|
|
result =
|
|
pvr_pds_clear_rta_vertex_shader_program_create_and_upload_code(
|
|
&pds_program,
|
|
cmd_buffer,
|
|
base_array_layer,
|
|
&pds_program_code_upload);
|
|
if (result != VK_SUCCESS) {
|
|
cmd_buffer->state.status = result;
|
|
return;
|
|
}
|
|
|
|
pds_program_upload.code_offset =
|
|
pds_program_code_upload.code_offset;
|
|
/* TODO: The code size doesn't get used by pvr_clear_vdm_state()
|
|
* maybe let's change its interface to make that clear and not
|
|
* set this?
|
|
*/
|
|
pds_program_upload.code_size = pds_program_code_upload.code_size;
|
|
|
|
current_base_array_layer = base_array_layer;
|
|
}
|
|
|
|
result =
|
|
pvr_pds_clear_rta_vertex_shader_program_create_and_upload_data(
|
|
&pds_program,
|
|
cmd_buffer,
|
|
vertices_bo,
|
|
&pds_program_data_upload);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
} else {
|
|
result = pvr_pds_clear_vertex_shader_program_create_and_upload_data(
|
|
&pds_program,
|
|
cmd_buffer,
|
|
vertices_bo,
|
|
&pds_program_data_upload);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
|
|
pds_program_upload.data_offset = pds_program_data_upload.data_offset;
|
|
pds_program_upload.data_size = pds_program_data_upload.data_size;
|
|
|
|
vdm_cs_buffer = pvr_csb_alloc_dwords(&sub_cmd->control_stream,
|
|
PVR_CLEAR_VDM_STATE_DWORD_COUNT);
|
|
if (!vdm_cs_buffer) {
|
|
result = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
cmd_buffer->state.status = result;
|
|
return;
|
|
}
|
|
|
|
pvr_pack_clear_vdm_state(dev_info,
|
|
&pds_program_upload,
|
|
pds_program.temps_used,
|
|
4,
|
|
vs_output_size_in_bytes,
|
|
clear_rect->layerCount,
|
|
vdm_cs_buffer);
|
|
}
|
|
}
|
|
}
|
|
|
|
void pvr_CmdClearAttachments(VkCommandBuffer commandBuffer,
|
|
uint32_t attachmentCount,
|
|
const VkClearAttachment *pAttachments,
|
|
uint32_t rectCount,
|
|
const VkClearRect *pRects)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
|
|
struct pvr_sub_cmd_gfx *sub_cmd = &state->current_sub_cmd->gfx;
|
|
|
|
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
|
assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
|
|
|
|
/* TODO: There are some optimizations that can be made here:
|
|
* - For a full screen clear, update the clear values for the corresponding
|
|
* attachment index.
|
|
* - For a full screen color attachment clear, add its index to a load op
|
|
* override to add it to the background shader. This will elide any load
|
|
* op loads currently in the background shader as well as the usual
|
|
* frag kick for geometry clear.
|
|
*/
|
|
|
|
/* If we have any depth/stencil clears, update the sub command depth/stencil
|
|
* modification and usage flags.
|
|
*/
|
|
if (state->depth_format != VK_FORMAT_UNDEFINED) {
|
|
uint32_t full_screen_clear_count;
|
|
bool has_stencil_clear = false;
|
|
bool has_depth_clear = false;
|
|
|
|
for (uint32_t i = 0; i < attachmentCount; i++) {
|
|
const VkImageAspectFlags aspect_mask = pAttachments[i].aspectMask;
|
|
|
|
if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
|
|
has_stencil_clear = true;
|
|
|
|
if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
|
has_depth_clear = true;
|
|
|
|
if (has_stencil_clear && has_depth_clear)
|
|
break;
|
|
}
|
|
|
|
sub_cmd->modifies_stencil |= has_stencil_clear;
|
|
sub_cmd->modifies_depth |= has_depth_clear;
|
|
|
|
/* We only care about clears that have a baseArrayLayer of 0 as any
|
|
* attachment clears we move to the background shader must apply to all of
|
|
* the attachment's sub resources.
|
|
*/
|
|
full_screen_clear_count =
|
|
pvr_get_max_layers_covering_target(state->render_pass_info.render_area,
|
|
0,
|
|
rectCount,
|
|
pRects);
|
|
|
|
if (full_screen_clear_count > 0) {
|
|
if (has_stencil_clear &&
|
|
sub_cmd->stencil_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) {
|
|
sub_cmd->stencil_usage = PVR_DEPTH_STENCIL_USAGE_NEVER;
|
|
}
|
|
|
|
if (has_depth_clear &&
|
|
sub_cmd->depth_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) {
|
|
sub_cmd->depth_usage = PVR_DEPTH_STENCIL_USAGE_NEVER;
|
|
}
|
|
}
|
|
}
|
|
|
|
pvr_clear_attachments(cmd_buffer,
|
|
attachmentCount,
|
|
pAttachments,
|
|
rectCount,
|
|
pRects);
|
|
}
|
|
|
|
void pvr_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,
|
|
const VkResolveImageInfo2 *pResolveImageInfo)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_image, src, pResolveImageInfo->srcImage);
|
|
PVR_FROM_HANDLE(pvr_image, dst, pResolveImageInfo->dstImage);
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
|
|
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
|
|
|
for (uint32_t i = 0U; i < pResolveImageInfo->regionCount; i++) {
|
|
VkImageCopy2 region = {
|
|
.sType = VK_STRUCTURE_TYPE_IMAGE_COPY_2,
|
|
.srcSubresource = pResolveImageInfo->pRegions[i].srcSubresource,
|
|
.srcOffset = pResolveImageInfo->pRegions[i].srcOffset,
|
|
.dstSubresource = pResolveImageInfo->pRegions[i].dstSubresource,
|
|
.dstOffset = pResolveImageInfo->pRegions[i].dstOffset,
|
|
.extent = pResolveImageInfo->pRegions[i].extent,
|
|
};
|
|
|
|
VkResult result =
|
|
pvr_copy_or_resolve_color_image_region(cmd_buffer, src, dst, ®ion);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
}
|