mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-19 13:38:19 +02:00
This is trivial because this path relies on our blit_shader interface which supports this already, so it just needs to pass it along. I don't think this is ever triggered practice, since we should be able to handle any case that could require this with the texel buffer path, but at least it allows us to simplify a bit the code. Tested by manually disabling the priority paths to ensure we exercise component swizzles with this path. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8875>
5730 lines
204 KiB
C
5730 lines
204 KiB
C
/*
|
||
* Copyright © 2019 Raspberry Pi
|
||
*
|
||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||
* copy of this software and associated documentation files (the "Software"),
|
||
* to deal in the Software without restriction, including without limitation
|
||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||
* and/or sell copies of the Software, and to permit persons to whom the
|
||
* Software is furnished to do so, subject to the following conditions:
|
||
*
|
||
* The above copyright notice and this permission notice (including the next
|
||
* paragraph) shall be included in all copies or substantial portions of the
|
||
* Software.
|
||
*
|
||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||
* IN THE SOFTWARE.
|
||
*/
|
||
|
||
#include "v3dv_private.h"
|
||
|
||
#include "compiler/nir/nir_builder.h"
|
||
#include "broadcom/cle/v3dx_pack.h"
|
||
#include "vk_format_info.h"
|
||
#include "util/u_pack_color.h"
|
||
|
||
static uint32_t
|
||
meta_blit_key_hash(const void *key)
|
||
{
|
||
return _mesa_hash_data(key, V3DV_META_BLIT_CACHE_KEY_SIZE);
|
||
}
|
||
|
||
static bool
|
||
meta_blit_key_compare(const void *key1, const void *key2)
|
||
{
|
||
return memcmp(key1, key2, V3DV_META_BLIT_CACHE_KEY_SIZE) == 0;
|
||
}
|
||
|
||
static bool
|
||
create_blit_pipeline_layout(struct v3dv_device *device,
|
||
VkDescriptorSetLayout *descriptor_set_layout,
|
||
VkPipelineLayout *pipeline_layout)
|
||
{
|
||
VkResult result;
|
||
|
||
if (*descriptor_set_layout == 0) {
|
||
VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
|
||
.binding = 0,
|
||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||
.descriptorCount = 1,
|
||
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||
};
|
||
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
|
||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||
.bindingCount = 1,
|
||
.pBindings = &descriptor_set_layout_binding,
|
||
};
|
||
result =
|
||
v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
|
||
&descriptor_set_layout_info,
|
||
&device->vk.alloc,
|
||
descriptor_set_layout);
|
||
if (result != VK_SUCCESS)
|
||
return false;
|
||
}
|
||
|
||
assert(*pipeline_layout == 0);
|
||
VkPipelineLayoutCreateInfo pipeline_layout_info = {
|
||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||
.setLayoutCount = 1,
|
||
.pSetLayouts = descriptor_set_layout,
|
||
.pushConstantRangeCount = 1,
|
||
.pPushConstantRanges =
|
||
&(VkPushConstantRange) { VK_SHADER_STAGE_VERTEX_BIT, 0, 20 },
|
||
};
|
||
|
||
result =
|
||
v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
|
||
&pipeline_layout_info,
|
||
&device->vk.alloc,
|
||
pipeline_layout);
|
||
return result == VK_SUCCESS;
|
||
}
|
||
|
||
void
|
||
v3dv_meta_blit_init(struct v3dv_device *device)
|
||
{
|
||
for (uint32_t i = 0; i < 3; i++) {
|
||
device->meta.blit.cache[i] =
|
||
_mesa_hash_table_create(NULL,
|
||
meta_blit_key_hash,
|
||
meta_blit_key_compare);
|
||
}
|
||
|
||
create_blit_pipeline_layout(device,
|
||
&device->meta.blit.ds_layout,
|
||
&device->meta.blit.p_layout);
|
||
}
|
||
|
||
void
|
||
v3dv_meta_blit_finish(struct v3dv_device *device)
|
||
{
|
||
VkDevice _device = v3dv_device_to_handle(device);
|
||
|
||
for (uint32_t i = 0; i < 3; i++) {
|
||
hash_table_foreach(device->meta.blit.cache[i], entry) {
|
||
struct v3dv_meta_blit_pipeline *item = entry->data;
|
||
v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
|
||
v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
|
||
v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
|
||
vk_free(&device->vk.alloc, item);
|
||
}
|
||
_mesa_hash_table_destroy(device->meta.blit.cache[i], NULL);
|
||
}
|
||
|
||
if (device->meta.blit.p_layout) {
|
||
v3dv_DestroyPipelineLayout(_device, device->meta.blit.p_layout,
|
||
&device->vk.alloc);
|
||
}
|
||
|
||
if (device->meta.blit.ds_layout) {
|
||
v3dv_DestroyDescriptorSetLayout(_device, device->meta.blit.ds_layout,
|
||
&device->vk.alloc);
|
||
}
|
||
}
|
||
|
||
static uint32_t
|
||
meta_texel_buffer_copy_key_hash(const void *key)
|
||
{
|
||
return _mesa_hash_data(key, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
|
||
}
|
||
|
||
static bool
|
||
meta_texel_buffer_copy_key_compare(const void *key1, const void *key2)
|
||
{
|
||
return memcmp(key1, key2, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE) == 0;
|
||
}
|
||
|
||
static bool
|
||
create_texel_buffer_copy_pipeline_layout(struct v3dv_device *device,
|
||
VkDescriptorSetLayout *ds_layout,
|
||
VkPipelineLayout *p_layout)
|
||
{
|
||
VkResult result;
|
||
|
||
if (*ds_layout == 0) {
|
||
VkDescriptorSetLayoutBinding ds_layout_binding = {
|
||
.binding = 0,
|
||
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
|
||
.descriptorCount = 1,
|
||
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||
};
|
||
VkDescriptorSetLayoutCreateInfo ds_layout_info = {
|
||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||
.bindingCount = 1,
|
||
.pBindings = &ds_layout_binding,
|
||
};
|
||
result =
|
||
v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
|
||
&ds_layout_info,
|
||
&device->vk.alloc,
|
||
ds_layout);
|
||
if (result != VK_SUCCESS)
|
||
return false;
|
||
}
|
||
|
||
assert(*p_layout == 0);
|
||
VkPipelineLayoutCreateInfo p_layout_info = {
|
||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||
.setLayoutCount = 1,
|
||
.pSetLayouts = ds_layout,
|
||
.pushConstantRangeCount = 1,
|
||
.pPushConstantRanges =
|
||
&(VkPushConstantRange) { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 20 },
|
||
};
|
||
|
||
result =
|
||
v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
|
||
&p_layout_info,
|
||
&device->vk.alloc,
|
||
p_layout);
|
||
return result == VK_SUCCESS;
|
||
}
|
||
|
||
void
|
||
v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device)
|
||
{
|
||
for (uint32_t i = 0; i < 3; i++) {
|
||
device->meta.texel_buffer_copy.cache[i] =
|
||
_mesa_hash_table_create(NULL,
|
||
meta_texel_buffer_copy_key_hash,
|
||
meta_texel_buffer_copy_key_compare);
|
||
}
|
||
|
||
create_texel_buffer_copy_pipeline_layout(
|
||
device,
|
||
&device->meta.texel_buffer_copy.ds_layout,
|
||
&device->meta.texel_buffer_copy.p_layout);
|
||
}
|
||
|
||
void
|
||
v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device)
|
||
{
|
||
VkDevice _device = v3dv_device_to_handle(device);
|
||
|
||
for (uint32_t i = 0; i < 3; i++) {
|
||
hash_table_foreach(device->meta.texel_buffer_copy.cache[i], entry) {
|
||
struct v3dv_meta_texel_buffer_copy_pipeline *item = entry->data;
|
||
v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
|
||
v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
|
||
v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
|
||
vk_free(&device->vk.alloc, item);
|
||
}
|
||
_mesa_hash_table_destroy(device->meta.texel_buffer_copy.cache[i], NULL);
|
||
}
|
||
|
||
if (device->meta.texel_buffer_copy.p_layout) {
|
||
v3dv_DestroyPipelineLayout(_device, device->meta.texel_buffer_copy.p_layout,
|
||
&device->vk.alloc);
|
||
}
|
||
|
||
if (device->meta.texel_buffer_copy.ds_layout) {
|
||
v3dv_DestroyDescriptorSetLayout(_device, device->meta.texel_buffer_copy.ds_layout,
|
||
&device->vk.alloc);
|
||
}
|
||
}
|
||
|
||
static inline bool
|
||
can_use_tlb(struct v3dv_image *image,
|
||
const VkOffset3D *offset,
|
||
VkFormat *compat_format);
|
||
|
||
/**
|
||
* Copy operations implemented in this file don't operate on a framebuffer
|
||
* object provided by the user, however, since most use the TLB for this,
|
||
* we still need to have some representation of the framebuffer. For the most
|
||
* part, the job's frame tiling information is enough for this, however we
|
||
* still need additional information such us the internal type of our single
|
||
* render target, so we use this auxiliary struct to pass that information
|
||
* around.
|
||
*/
|
||
struct framebuffer_data {
|
||
/* The internal type of the single render target */
|
||
uint32_t internal_type;
|
||
|
||
/* Supertile coverage */
|
||
uint32_t min_x_supertile;
|
||
uint32_t min_y_supertile;
|
||
uint32_t max_x_supertile;
|
||
uint32_t max_y_supertile;
|
||
|
||
/* Format info */
|
||
VkFormat vk_format;
|
||
const struct v3dv_format *format;
|
||
uint8_t internal_depth_type;
|
||
};
|
||
|
||
static void
|
||
setup_framebuffer_data(struct framebuffer_data *fb,
|
||
VkFormat vk_format,
|
||
uint32_t internal_type,
|
||
const struct v3dv_frame_tiling *tiling)
|
||
{
|
||
fb->internal_type = internal_type;
|
||
|
||
/* Supertile coverage always starts at 0,0 */
|
||
uint32_t supertile_w_in_pixels =
|
||
tiling->tile_width * tiling->supertile_width;
|
||
uint32_t supertile_h_in_pixels =
|
||
tiling->tile_height * tiling->supertile_height;
|
||
|
||
fb->min_x_supertile = 0;
|
||
fb->min_y_supertile = 0;
|
||
fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels;
|
||
fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels;
|
||
|
||
fb->vk_format = vk_format;
|
||
fb->format = v3dv_get_format(vk_format);
|
||
|
||
fb->internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F;
|
||
if (vk_format_is_depth_or_stencil(vk_format))
|
||
fb->internal_depth_type = v3dv_get_internal_depth_type(vk_format);
|
||
}
|
||
|
||
/* This chooses a tile buffer format that is appropriate for the copy operation.
|
||
* Typically, this is the image render target type, however, if we are copying
|
||
* depth/stencil to/from a buffer the hardware can't do raster loads/stores, so
|
||
* we need to load and store to/from a tile color buffer using a compatible
|
||
* color format.
|
||
*/
|
||
static uint32_t
|
||
choose_tlb_format(struct framebuffer_data *framebuffer,
|
||
VkImageAspectFlags aspect,
|
||
bool for_store,
|
||
bool is_copy_to_buffer,
|
||
bool is_copy_from_buffer)
|
||
{
|
||
if (is_copy_to_buffer || is_copy_from_buffer) {
|
||
switch (framebuffer->vk_format) {
|
||
case VK_FORMAT_D16_UNORM:
|
||
return V3D_OUTPUT_IMAGE_FORMAT_R16UI;
|
||
case VK_FORMAT_D32_SFLOAT:
|
||
return V3D_OUTPUT_IMAGE_FORMAT_R32F;
|
||
case VK_FORMAT_X8_D24_UNORM_PACK32:
|
||
return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
|
||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||
/* When storing the stencil aspect of a combined depth/stencil image
|
||
* to a buffer, the Vulkan spec states that the output buffer must
|
||
* have packed stencil values, so we choose an R8UI format for our
|
||
* store outputs. For the load input we still want RGBA8UI since the
|
||
* source image contains 4 channels (including the 3 channels
|
||
* containing the 24-bit depth value).
|
||
*
|
||
* When loading the stencil aspect of a combined depth/stencil image
|
||
* from a buffer, we read packed 8-bit stencil values from the buffer
|
||
* that we need to put into the LSB of the 32-bit format (the R
|
||
* channel), so we use R8UI. For the store, if we used R8UI then we
|
||
* would write 8-bit stencil values consecutively over depth channels,
|
||
* so we need to use RGBA8UI. This will write each stencil value in
|
||
* its correct position, but will overwrite depth values (channels G
|
||
* B,A) with undefined values. To fix this, we will have to restore
|
||
* the depth aspect from the Z tile buffer, which we should pre-load
|
||
* from the image before the store).
|
||
*/
|
||
if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
|
||
return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
|
||
} else {
|
||
assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT);
|
||
if (is_copy_to_buffer) {
|
||
return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI :
|
||
V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
|
||
} else {
|
||
assert(is_copy_from_buffer);
|
||
return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI :
|
||
V3D_OUTPUT_IMAGE_FORMAT_R8UI;
|
||
}
|
||
}
|
||
default: /* Color formats */
|
||
return framebuffer->format->rt_type;
|
||
break;
|
||
}
|
||
} else {
|
||
return framebuffer->format->rt_type;
|
||
}
|
||
}
|
||
|
||
static inline bool
|
||
format_needs_rb_swap(VkFormat format)
|
||
{
|
||
const uint8_t *swizzle = v3dv_get_format_swizzle(format);
|
||
return swizzle[0] == PIPE_SWIZZLE_Z;
|
||
}
|
||
|
||
static void
|
||
get_internal_type_bpp_for_image_aspects(VkFormat vk_format,
|
||
VkImageAspectFlags aspect_mask,
|
||
uint32_t *internal_type,
|
||
uint32_t *internal_bpp)
|
||
{
|
||
const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
|
||
VK_IMAGE_ASPECT_STENCIL_BIT;
|
||
|
||
/* We can't store depth/stencil pixel formats to a raster format, so
|
||
* so instead we load our depth/stencil aspects to a compatible color
|
||
* format.
|
||
*/
|
||
/* FIXME: pre-compute this at image creation time? */
|
||
if (aspect_mask & ds_aspects) {
|
||
switch (vk_format) {
|
||
case VK_FORMAT_D16_UNORM:
|
||
*internal_type = V3D_INTERNAL_TYPE_16UI;
|
||
*internal_bpp = V3D_INTERNAL_BPP_64;
|
||
break;
|
||
case VK_FORMAT_D32_SFLOAT:
|
||
*internal_type = V3D_INTERNAL_TYPE_32F;
|
||
*internal_bpp = V3D_INTERNAL_BPP_128;
|
||
break;
|
||
case VK_FORMAT_X8_D24_UNORM_PACK32:
|
||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||
/* Use RGBA8 format so we can relocate the X/S bits in the appropriate
|
||
* place to match Vulkan expectations. See the comment on the tile
|
||
* load command for more details.
|
||
*/
|
||
*internal_type = V3D_INTERNAL_TYPE_8UI;
|
||
*internal_bpp = V3D_INTERNAL_BPP_32;
|
||
break;
|
||
default:
|
||
assert(!"unsupported format");
|
||
break;
|
||
}
|
||
} else {
|
||
const struct v3dv_format *format = v3dv_get_format(vk_format);
|
||
v3dv_get_internal_type_bpp_for_output_format(format->rt_type,
|
||
internal_type,
|
||
internal_bpp);
|
||
}
|
||
}
|
||
|
||
struct rcl_clear_info {
|
||
const union v3dv_clear_value *clear_value;
|
||
struct v3dv_image *image;
|
||
VkImageAspectFlags aspects;
|
||
uint32_t layer;
|
||
uint32_t level;
|
||
};
|
||
|
||
static struct v3dv_cl *
|
||
emit_rcl_prologue(struct v3dv_job *job,
|
||
struct framebuffer_data *fb,
|
||
const struct rcl_clear_info *clear_info)
|
||
{
|
||
const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
|
||
|
||
struct v3dv_cl *rcl = &job->rcl;
|
||
v3dv_cl_ensure_space_with_branch(rcl, 200 +
|
||
tiling->layers * 256 *
|
||
cl_packet_length(SUPERTILE_COORDINATES));
|
||
if (job->cmd_buffer->state.oom)
|
||
return NULL;
|
||
|
||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
|
||
config.early_z_disable = true;
|
||
config.image_width_pixels = tiling->width;
|
||
config.image_height_pixels = tiling->height;
|
||
config.number_of_render_targets = 1;
|
||
config.multisample_mode_4x = tiling->msaa;
|
||
config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
|
||
config.internal_depth_type = fb->internal_depth_type;
|
||
}
|
||
|
||
if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) {
|
||
uint32_t clear_pad = 0;
|
||
if (clear_info->image) {
|
||
const struct v3dv_image *image = clear_info->image;
|
||
const struct v3d_resource_slice *slice =
|
||
&image->slices[clear_info->level];
|
||
if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
|
||
slice->tiling == VC5_TILING_UIF_XOR) {
|
||
int uif_block_height = v3d_utile_height(image->cpp) * 2;
|
||
|
||
uint32_t implicit_padded_height =
|
||
align(tiling->height, uif_block_height) / uif_block_height;
|
||
|
||
if (slice->padded_height_of_output_image_in_uif_blocks -
|
||
implicit_padded_height >= 15) {
|
||
clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
|
||
}
|
||
}
|
||
}
|
||
|
||
const uint32_t *color = &clear_info->clear_value->color[0];
|
||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
|
||
clear.clear_color_low_32_bits = color[0];
|
||
clear.clear_color_next_24_bits = color[1] & 0x00ffffff;
|
||
clear.render_target_number = 0;
|
||
};
|
||
|
||
if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
|
||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
|
||
clear.clear_color_mid_low_32_bits =
|
||
((color[1] >> 24) | (color[2] << 8));
|
||
clear.clear_color_mid_high_24_bits =
|
||
((color[2] >> 24) | ((color[3] & 0xffff) << 8));
|
||
clear.render_target_number = 0;
|
||
};
|
||
}
|
||
|
||
if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
|
||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
|
||
clear.uif_padded_height_in_uif_blocks = clear_pad;
|
||
clear.clear_color_high_16_bits = color[3] >> 16;
|
||
clear.render_target_number = 0;
|
||
};
|
||
}
|
||
}
|
||
|
||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
|
||
rt.render_target_0_internal_bpp = tiling->internal_bpp;
|
||
rt.render_target_0_internal_type = fb->internal_type;
|
||
rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
|
||
}
|
||
|
||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
|
||
clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f;
|
||
clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0;
|
||
};
|
||
|
||
cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
|
||
init.use_auto_chained_tile_lists = true;
|
||
init.size_of_first_block_in_chained_tile_lists =
|
||
TILE_ALLOCATION_BLOCK_SIZE_64B;
|
||
}
|
||
|
||
return rcl;
|
||
}
|
||
|
||
static void
|
||
emit_frame_setup(struct v3dv_job *job,
|
||
uint32_t layer,
|
||
const union v3dv_clear_value *clear_value)
|
||
{
|
||
v3dv_return_if_oom(NULL, job);
|
||
|
||
const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
|
||
|
||
struct v3dv_cl *rcl = &job->rcl;
|
||
|
||
const uint32_t tile_alloc_offset =
|
||
64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
|
||
cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
|
||
list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
|
||
}
|
||
|
||
cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
|
||
config.number_of_bin_tile_lists = 1;
|
||
config.total_frame_width_in_tiles = tiling->draw_tiles_x;
|
||
config.total_frame_height_in_tiles = tiling->draw_tiles_y;
|
||
|
||
config.supertile_width_in_tiles = tiling->supertile_width;
|
||
config.supertile_height_in_tiles = tiling->supertile_height;
|
||
|
||
config.total_frame_width_in_supertiles =
|
||
tiling->frame_width_in_supertiles;
|
||
config.total_frame_height_in_supertiles =
|
||
tiling->frame_height_in_supertiles;
|
||
}
|
||
|
||
/* Implement GFXH-1742 workaround. Also, if we are clearing we have to do
|
||
* it here.
|
||
*/
|
||
for (int i = 0; i < 2; i++) {
|
||
cl_emit(rcl, TILE_COORDINATES, coords);
|
||
cl_emit(rcl, END_OF_LOADS, end);
|
||
cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
|
||
store.buffer_to_store = NONE;
|
||
}
|
||
if (clear_value && i == 0) {
|
||
cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
|
||
clear.clear_z_stencil_buffer = true;
|
||
clear.clear_all_render_targets = true;
|
||
}
|
||
}
|
||
cl_emit(rcl, END_OF_TILE_MARKER, end);
|
||
}
|
||
|
||
cl_emit(rcl, FLUSH_VCD_CACHE, flush);
|
||
}
|
||
|
||
static void
|
||
emit_supertile_coordinates(struct v3dv_job *job,
|
||
struct framebuffer_data *framebuffer)
|
||
{
|
||
v3dv_return_if_oom(NULL, job);
|
||
|
||
struct v3dv_cl *rcl = &job->rcl;
|
||
|
||
const uint32_t min_y = framebuffer->min_y_supertile;
|
||
const uint32_t max_y = framebuffer->max_y_supertile;
|
||
const uint32_t min_x = framebuffer->min_x_supertile;
|
||
const uint32_t max_x = framebuffer->max_x_supertile;
|
||
|
||
for (int y = min_y; y <= max_y; y++) {
|
||
for (int x = min_x; x <= max_x; x++) {
|
||
cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
|
||
coords.column_number_in_supertiles = x;
|
||
coords.row_number_in_supertiles = y;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
static void
|
||
emit_linear_load(struct v3dv_cl *cl,
|
||
uint32_t buffer,
|
||
struct v3dv_bo *bo,
|
||
uint32_t offset,
|
||
uint32_t stride,
|
||
uint32_t format)
|
||
{
|
||
cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
|
||
load.buffer_to_load = buffer;
|
||
load.address = v3dv_cl_address(bo, offset);
|
||
load.input_image_format = format;
|
||
load.memory_format = VC5_TILING_RASTER;
|
||
load.height_in_ub_or_stride = stride;
|
||
load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
|
||
}
|
||
}
|
||
|
||
static void
|
||
emit_linear_store(struct v3dv_cl *cl,
|
||
uint32_t buffer,
|
||
struct v3dv_bo *bo,
|
||
uint32_t offset,
|
||
uint32_t stride,
|
||
bool msaa,
|
||
uint32_t format)
|
||
{
|
||
cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
|
||
store.buffer_to_store = RENDER_TARGET_0;
|
||
store.address = v3dv_cl_address(bo, offset);
|
||
store.clear_buffer_being_stored = false;
|
||
store.output_image_format = format;
|
||
store.memory_format = VC5_TILING_RASTER;
|
||
store.height_in_ub_or_stride = stride;
|
||
store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES :
|
||
V3D_DECIMATE_MODE_SAMPLE_0;
|
||
}
|
||
}
|
||
|
||
static void
|
||
emit_image_load(struct v3dv_cl *cl,
|
||
struct framebuffer_data *framebuffer,
|
||
struct v3dv_image *image,
|
||
VkImageAspectFlags aspect,
|
||
uint32_t layer,
|
||
uint32_t mip_level,
|
||
bool is_copy_to_buffer,
|
||
bool is_copy_from_buffer)
|
||
{
|
||
uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
|
||
|
||
/* For image to/from buffer copies we always load to and store from RT0,
|
||
* even for depth/stencil aspects, because the hardware can't do raster
|
||
* stores or loads from/to the depth/stencil tile buffers.
|
||
*/
|
||
bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
|
||
aspect == VK_IMAGE_ASPECT_COLOR_BIT;
|
||
|
||
const struct v3d_resource_slice *slice = &image->slices[mip_level];
|
||
cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
|
||
load.buffer_to_load = load_to_color_tlb ?
|
||
RENDER_TARGET_0 : v3dv_zs_buffer_from_aspect_bits(aspect);
|
||
|
||
load.address = v3dv_cl_address(image->mem->bo, layer_offset);
|
||
|
||
load.input_image_format = choose_tlb_format(framebuffer, aspect, false,
|
||
is_copy_to_buffer,
|
||
is_copy_from_buffer);
|
||
load.memory_format = slice->tiling;
|
||
|
||
/* When copying depth/stencil images to a buffer, for D24 formats Vulkan
|
||
* expects the depth value in the LSB bits of each 32-bit pixel.
|
||
* Unfortunately, the hardware seems to put the S8/X8 bits there and the
|
||
* depth bits on the MSB. To work around that we can reverse the channel
|
||
* order and then swap the R/B channels to get what we want.
|
||
*
|
||
* NOTE: reversing and swapping only gets us the behavior we want if the
|
||
* operations happen in that exact order, which seems to be the case when
|
||
* done on the tile buffer load operations. On the store, it seems the
|
||
* order is not the same. The order on the store is probably reversed so
|
||
* that reversing and swapping on both the load and the store preserves
|
||
* the original order of the channels in memory.
|
||
*
|
||
* Notice that we only need to do this when copying to a buffer, where
|
||
* depth and stencil aspects are copied as separate regions and
|
||
* the spec expects them to be tightly packed.
|
||
*/
|
||
bool needs_rb_swap = false;
|
||
bool needs_chan_reverse = false;
|
||
if (is_copy_to_buffer &&
|
||
(framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
|
||
(framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
|
||
(aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
|
||
needs_rb_swap = true;
|
||
needs_chan_reverse = true;
|
||
} else if (!is_copy_from_buffer && !is_copy_to_buffer &&
|
||
(aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
|
||
/* This is not a raw data copy (i.e. we are clearing the image),
|
||
* so we need to make sure we respect the format swizzle.
|
||
*/
|
||
needs_rb_swap = format_needs_rb_swap(framebuffer->vk_format);
|
||
}
|
||
|
||
load.r_b_swap = needs_rb_swap;
|
||
load.channel_reverse = needs_chan_reverse;
|
||
|
||
if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
|
||
slice->tiling == VC5_TILING_UIF_XOR) {
|
||
load.height_in_ub_or_stride =
|
||
slice->padded_height_of_output_image_in_uif_blocks;
|
||
} else if (slice->tiling == VC5_TILING_RASTER) {
|
||
load.height_in_ub_or_stride = slice->stride;
|
||
}
|
||
|
||
if (image->samples > VK_SAMPLE_COUNT_1_BIT)
|
||
load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
|
||
else
|
||
load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
|
||
}
|
||
}
|
||
|
||
static void
|
||
emit_image_store(struct v3dv_cl *cl,
|
||
struct framebuffer_data *framebuffer,
|
||
struct v3dv_image *image,
|
||
VkImageAspectFlags aspect,
|
||
uint32_t layer,
|
||
uint32_t mip_level,
|
||
bool is_copy_to_buffer,
|
||
bool is_copy_from_buffer,
|
||
bool is_multisample_resolve)
|
||
{
|
||
uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
|
||
|
||
bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
|
||
aspect == VK_IMAGE_ASPECT_COLOR_BIT;
|
||
|
||
const struct v3d_resource_slice *slice = &image->slices[mip_level];
|
||
cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
|
||
store.buffer_to_store = store_from_color_tlb ?
|
||
RENDER_TARGET_0 : v3dv_zs_buffer_from_aspect_bits(aspect);
|
||
|
||
store.address = v3dv_cl_address(image->mem->bo, layer_offset);
|
||
store.clear_buffer_being_stored = false;
|
||
|
||
/* See rationale in emit_image_load() */
|
||
bool needs_rb_swap = false;
|
||
bool needs_chan_reverse = false;
|
||
if (is_copy_from_buffer &&
|
||
(framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
|
||
(framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
|
||
(aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
|
||
needs_rb_swap = true;
|
||
needs_chan_reverse = true;
|
||
} else if (!is_copy_from_buffer && !is_copy_to_buffer &&
|
||
(aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
|
||
needs_rb_swap = format_needs_rb_swap(framebuffer->vk_format);
|
||
}
|
||
|
||
store.r_b_swap = needs_rb_swap;
|
||
store.channel_reverse = needs_chan_reverse;
|
||
|
||
store.output_image_format = choose_tlb_format(framebuffer, aspect, true,
|
||
is_copy_to_buffer,
|
||
is_copy_from_buffer);
|
||
store.memory_format = slice->tiling;
|
||
if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
|
||
slice->tiling == VC5_TILING_UIF_XOR) {
|
||
store.height_in_ub_or_stride =
|
||
slice->padded_height_of_output_image_in_uif_blocks;
|
||
} else if (slice->tiling == VC5_TILING_RASTER) {
|
||
store.height_in_ub_or_stride = slice->stride;
|
||
}
|
||
|
||
if (image->samples > VK_SAMPLE_COUNT_1_BIT)
|
||
store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
|
||
else if (is_multisample_resolve)
|
||
store.decimate_mode = V3D_DECIMATE_MODE_4X;
|
||
else
|
||
store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
|
||
}
|
||
}
|
||
|
||
static void
|
||
emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job,
|
||
struct framebuffer_data *framebuffer,
|
||
struct v3dv_buffer *buffer,
|
||
struct v3dv_image *image,
|
||
uint32_t layer_offset,
|
||
const VkBufferImageCopy *region)
|
||
{
|
||
struct v3dv_cl *cl = &job->indirect;
|
||
v3dv_cl_ensure_space(cl, 200, 1);
|
||
v3dv_return_if_oom(NULL, job);
|
||
|
||
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
|
||
|
||
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
|
||
|
||
/* Load image to TLB */
|
||
assert((image->type != VK_IMAGE_TYPE_3D &&
|
||
layer_offset < region->imageSubresource.layerCount) ||
|
||
layer_offset < image->extent.depth);
|
||
|
||
const uint32_t image_layer = image->type != VK_IMAGE_TYPE_3D ?
|
||
region->imageSubresource.baseArrayLayer + layer_offset :
|
||
region->imageOffset.z + layer_offset;
|
||
|
||
emit_image_load(cl, framebuffer, image,
|
||
region->imageSubresource.aspectMask,
|
||
image_layer,
|
||
region->imageSubresource.mipLevel,
|
||
true, false);
|
||
|
||
cl_emit(cl, END_OF_LOADS, end);
|
||
|
||
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
|
||
|
||
/* Store TLB to buffer */
|
||
uint32_t width, height;
|
||
if (region->bufferRowLength == 0)
|
||
width = region->imageExtent.width;
|
||
else
|
||
width = region->bufferRowLength;
|
||
|
||
if (region->bufferImageHeight == 0)
|
||
height = region->imageExtent.height;
|
||
else
|
||
height = region->bufferImageHeight;
|
||
|
||
/* Handle copy from compressed format */
|
||
width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk_format));
|
||
height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk_format));
|
||
|
||
/* If we are storing stencil from a combined depth/stencil format the
|
||
* Vulkan spec states that the output buffer must have packed stencil
|
||
* values, where each stencil value is 1 byte.
|
||
*/
|
||
uint32_t cpp =
|
||
region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
|
||
1 : image->cpp;
|
||
uint32_t buffer_stride = width * cpp;
|
||
uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset +
|
||
height * buffer_stride * layer_offset;
|
||
|
||
uint32_t format = choose_tlb_format(framebuffer,
|
||
region->imageSubresource.aspectMask,
|
||
true, true, false);
|
||
bool msaa = image->samples > VK_SAMPLE_COUNT_1_BIT;
|
||
|
||
emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo,
|
||
buffer_offset, buffer_stride, msaa, format);
|
||
|
||
cl_emit(cl, END_OF_TILE_MARKER, end);
|
||
|
||
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
|
||
|
||
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
|
||
branch.start = tile_list_start;
|
||
branch.end = v3dv_cl_get_address(cl);
|
||
}
|
||
}
|
||
|
||
static void
|
||
emit_copy_layer_to_buffer(struct v3dv_job *job,
|
||
struct v3dv_buffer *buffer,
|
||
struct v3dv_image *image,
|
||
struct framebuffer_data *framebuffer,
|
||
uint32_t layer,
|
||
const VkBufferImageCopy *region)
|
||
{
|
||
emit_frame_setup(job, layer, NULL);
|
||
emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer,
|
||
image, layer, region);
|
||
emit_supertile_coordinates(job, framebuffer);
|
||
}
|
||
|
||
static void
|
||
emit_copy_image_to_buffer_rcl(struct v3dv_job *job,
|
||
struct v3dv_buffer *buffer,
|
||
struct v3dv_image *image,
|
||
struct framebuffer_data *framebuffer,
|
||
const VkBufferImageCopy *region)
|
||
{
|
||
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
|
||
v3dv_return_if_oom(NULL, job);
|
||
|
||
for (int layer = 0; layer < job->frame_tiling.layers; layer++)
|
||
emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region);
|
||
cl_emit(rcl, END_OF_RENDERING, end);
|
||
}
|
||
|
||
/* Implements a copy using the TLB.
|
||
*
|
||
* This only works if we are copying from offset (0,0), since a TLB store for
|
||
* tile (x,y) will be written at the same tile offset into the destination.
|
||
* When this requirement is not met, we need to use a blit instead.
|
||
*
|
||
* Returns true if the implementation supports the requested operation (even if
|
||
* it failed to process it, for example, due to an out-of-memory error).
|
||
*
|
||
*/
|
||
static bool
|
||
copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_buffer *buffer,
|
||
struct v3dv_image *image,
|
||
const VkBufferImageCopy *region)
|
||
{
|
||
VkFormat fb_format;
|
||
if (!can_use_tlb(image, ®ion->imageOffset, &fb_format))
|
||
return false;
|
||
|
||
uint32_t internal_type, internal_bpp;
|
||
get_internal_type_bpp_for_image_aspects(fb_format,
|
||
region->imageSubresource.aspectMask,
|
||
&internal_type, &internal_bpp);
|
||
|
||
uint32_t num_layers;
|
||
if (image->type != VK_IMAGE_TYPE_3D)
|
||
num_layers = region->imageSubresource.layerCount;
|
||
else
|
||
num_layers = region->imageExtent.depth;
|
||
assert(num_layers > 0);
|
||
|
||
struct v3dv_job *job =
|
||
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
|
||
if (!job)
|
||
return true;
|
||
|
||
/* Handle copy from compressed format using a compatible format */
|
||
const uint32_t block_w = vk_format_get_blockwidth(image->vk_format);
|
||
const uint32_t block_h = vk_format_get_blockheight(image->vk_format);
|
||
const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
|
||
const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
|
||
|
||
v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, false);
|
||
|
||
struct framebuffer_data framebuffer;
|
||
setup_framebuffer_data(&framebuffer, fb_format, internal_type,
|
||
&job->frame_tiling);
|
||
|
||
v3dv_job_emit_binning_flush(job);
|
||
emit_copy_image_to_buffer_rcl(job, buffer, image, &framebuffer, region);
|
||
|
||
v3dv_cmd_buffer_finish_job(cmd_buffer);
|
||
|
||
return true;
|
||
}
|
||
|
||
static bool
|
||
blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_image *dst,
|
||
VkFormat dst_format,
|
||
struct v3dv_image *src,
|
||
VkFormat src_format,
|
||
VkColorComponentFlags cmask,
|
||
VkComponentMapping *cswizzle,
|
||
const VkImageBlit *region,
|
||
VkFilter filter,
|
||
bool dst_is_padded_image);
|
||
|
||
/**
|
||
* Returns true if the implementation supports the requested operation (even if
|
||
* it failed to process it, for example, due to an out-of-memory error).
|
||
*/
|
||
static bool
|
||
copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_buffer *buffer,
|
||
struct v3dv_image *image,
|
||
const VkBufferImageCopy *region)
|
||
{
|
||
bool handled = false;
|
||
|
||
/* Generally, the bpp of the data in the buffer matches that of the
|
||
* source image. The exception is the case where we are copying
|
||
* stencil (8bpp) to a combined d24s8 image (32bpp).
|
||
*/
|
||
uint32_t buffer_bpp = image->cpp;
|
||
|
||
VkImageAspectFlags copy_aspect = region->imageSubresource.aspectMask;
|
||
|
||
/* Because we are going to implement the copy as a blit, we need to create
|
||
* a linear image from the destination buffer and we also want our blit
|
||
* source and destination formats to be the same (to avoid any format
|
||
* conversions), so we choose a canonical format that matches the
|
||
* source image bpp.
|
||
*
|
||
* The exception to the above is copying from combined depth/stencil images
|
||
* because we are copying only one aspect of the image, so we need to setup
|
||
* our formats, color write mask and source swizzle mask to match that.
|
||
*/
|
||
VkFormat dst_format;
|
||
VkFormat src_format;
|
||
VkColorComponentFlags cmask = 0; /* All components */
|
||
VkComponentMapping cswizzle = {
|
||
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||
};
|
||
switch (buffer_bpp) {
|
||
case 16:
|
||
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
|
||
dst_format = VK_FORMAT_R32G32B32A32_UINT;
|
||
src_format = dst_format;
|
||
break;
|
||
case 8:
|
||
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
|
||
dst_format = VK_FORMAT_R16G16B16A16_UINT;
|
||
src_format = dst_format;
|
||
break;
|
||
case 4:
|
||
switch (copy_aspect) {
|
||
case VK_IMAGE_ASPECT_COLOR_BIT:
|
||
src_format = VK_FORMAT_R8G8B8A8_UINT;
|
||
dst_format = VK_FORMAT_R8G8B8A8_UINT;
|
||
break;
|
||
case VK_IMAGE_ASPECT_DEPTH_BIT:
|
||
assert(image->vk_format == VK_FORMAT_D32_SFLOAT ||
|
||
image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
|
||
image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32);
|
||
if (image->vk_format == VK_FORMAT_D32_SFLOAT) {
|
||
src_format = VK_FORMAT_R32_UINT;
|
||
dst_format = VK_FORMAT_R32_UINT;
|
||
} else {
|
||
/* We want to write depth in the buffer in the first 24-bits,
|
||
* however, the hardware has depth in bits 8-31, so swizzle the
|
||
* the source components to match what we want. Also, we don't
|
||
* want to write bits 24-31 in the destination.
|
||
*/
|
||
src_format = VK_FORMAT_R8G8B8A8_UINT;
|
||
dst_format = VK_FORMAT_R8G8B8A8_UINT;
|
||
cmask = VK_COLOR_COMPONENT_R_BIT |
|
||
VK_COLOR_COMPONENT_G_BIT |
|
||
VK_COLOR_COMPONENT_B_BIT;
|
||
cswizzle.r = VK_COMPONENT_SWIZZLE_G;
|
||
cswizzle.g = VK_COMPONENT_SWIZZLE_B;
|
||
cswizzle.b = VK_COMPONENT_SWIZZLE_A;
|
||
cswizzle.a = VK_COMPONENT_SWIZZLE_ZERO;
|
||
}
|
||
break;
|
||
case VK_IMAGE_ASPECT_STENCIL_BIT:
|
||
assert(copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT);
|
||
assert(image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT);
|
||
/* Copying from S8D24. We want to write 8-bit stencil values only,
|
||
* so adjust the buffer bpp for that. Since the hardware stores stencil
|
||
* in the LSB, we can just do a RGBA8UI to R8UI blit.
|
||
*/
|
||
src_format = VK_FORMAT_R8G8B8A8_UINT;
|
||
dst_format = VK_FORMAT_R8_UINT;
|
||
buffer_bpp = 1;
|
||
break;
|
||
default:
|
||
unreachable("unsupported aspect");
|
||
return handled;
|
||
};
|
||
break;
|
||
case 2:
|
||
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
|
||
copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
|
||
dst_format = VK_FORMAT_R16_UINT;
|
||
src_format = dst_format;
|
||
break;
|
||
case 1:
|
||
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
|
||
dst_format = VK_FORMAT_R8_UINT;
|
||
src_format = dst_format;
|
||
break;
|
||
default:
|
||
unreachable("unsupported bit-size");
|
||
return handled;
|
||
};
|
||
|
||
/* The hardware doesn't support linear depth/stencil stores, so we
|
||
* implement copies of depth/stencil aspect as color copies using a
|
||
* compatible color format.
|
||
*/
|
||
assert(vk_format_is_color(src_format));
|
||
assert(vk_format_is_color(dst_format));
|
||
copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
|
||
|
||
/* We should be able to handle the blit if we got this far */
|
||
handled = true;
|
||
|
||
/* Obtain the 2D buffer region spec */
|
||
uint32_t buf_width, buf_height;
|
||
if (region->bufferRowLength == 0)
|
||
buf_width = region->imageExtent.width;
|
||
else
|
||
buf_width = region->bufferRowLength;
|
||
|
||
if (region->bufferImageHeight == 0)
|
||
buf_height = region->imageExtent.height;
|
||
else
|
||
buf_height = region->bufferImageHeight;
|
||
|
||
/* If the image is compressed, the bpp refers to blocks, not pixels */
|
||
uint32_t block_width = vk_format_get_blockwidth(image->vk_format);
|
||
uint32_t block_height = vk_format_get_blockheight(image->vk_format);
|
||
buf_width = buf_width / block_width;
|
||
buf_height = buf_height / block_height;
|
||
|
||
/* Compute layers to copy */
|
||
uint32_t num_layers;
|
||
if (image->type != VK_IMAGE_TYPE_3D)
|
||
num_layers = region->imageSubresource.layerCount;
|
||
else
|
||
num_layers = region->imageExtent.depth;
|
||
assert(num_layers > 0);
|
||
|
||
/* Our blit interface can see the real format of the images to detect
|
||
* copies between compressed and uncompressed images and adapt the
|
||
* blit region accordingly. Here we are just doing a raw copy of
|
||
* compressed data, but we are passing an uncompressed view of the
|
||
* buffer for the blit destination image (since compressed formats are
|
||
* not renderable), so we also want to provide an uncompressed view of
|
||
* the source image.
|
||
*/
|
||
VkResult result;
|
||
struct v3dv_device *device = cmd_buffer->device;
|
||
VkDevice _device = v3dv_device_to_handle(device);
|
||
if (vk_format_is_compressed(image->vk_format)) {
|
||
VkImage uiview;
|
||
VkImageCreateInfo uiview_info = {
|
||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||
.imageType = VK_IMAGE_TYPE_3D,
|
||
.format = dst_format,
|
||
.extent = { buf_width, buf_height, image->extent.depth },
|
||
.mipLevels = image->levels,
|
||
.arrayLayers = image->array_size,
|
||
.samples = image->samples,
|
||
.tiling = image->tiling,
|
||
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
|
||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||
.queueFamilyIndexCount = 0,
|
||
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||
};
|
||
result = v3dv_CreateImage(_device, &uiview_info, &device->vk.alloc, &uiview);
|
||
if (result != VK_SUCCESS)
|
||
return handled;
|
||
|
||
v3dv_cmd_buffer_add_private_obj(
|
||
cmd_buffer, (uintptr_t)uiview,
|
||
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
|
||
|
||
result = v3dv_BindImageMemory(_device, uiview,
|
||
v3dv_device_memory_to_handle(image->mem),
|
||
image->mem_offset);
|
||
if (result != VK_SUCCESS)
|
||
return handled;
|
||
|
||
image = v3dv_image_from_handle(uiview);
|
||
}
|
||
|
||
/* Copy requested layers */
|
||
for (uint32_t i = 0; i < num_layers; i++) {
|
||
/* Create the destination blit image from the destination buffer */
|
||
VkImageCreateInfo image_info = {
|
||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||
.imageType = VK_IMAGE_TYPE_2D,
|
||
.format = dst_format,
|
||
.extent = { buf_width, buf_height, 1 },
|
||
.mipLevels = 1,
|
||
.arrayLayers = 1,
|
||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||
.tiling = VK_IMAGE_TILING_LINEAR,
|
||
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
|
||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||
.queueFamilyIndexCount = 0,
|
||
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||
};
|
||
|
||
VkImage buffer_image;
|
||
result =
|
||
v3dv_CreateImage(_device, &image_info, &device->vk.alloc, &buffer_image);
|
||
if (result != VK_SUCCESS)
|
||
return handled;
|
||
|
||
v3dv_cmd_buffer_add_private_obj(
|
||
cmd_buffer, (uintptr_t)buffer_image,
|
||
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
|
||
|
||
/* Bind the buffer memory to the image */
|
||
VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset +
|
||
i * buf_width * buf_height * buffer_bpp;
|
||
result = v3dv_BindImageMemory(_device, buffer_image,
|
||
v3dv_device_memory_to_handle(buffer->mem),
|
||
buffer_offset);
|
||
if (result != VK_SUCCESS)
|
||
return handled;
|
||
|
||
/* Blit-copy the requested image extent.
|
||
*
|
||
* Since we are copying, the blit must use the same format on the
|
||
* destination and source images to avoid format conversions. The
|
||
* only exception is copying stencil, which we upload to a R8UI source
|
||
* image, but that we need to blit to a S8D24 destination (the only
|
||
* stencil format we support).
|
||
*/
|
||
const VkImageBlit blit_region = {
|
||
.srcSubresource = {
|
||
.aspectMask = copy_aspect,
|
||
.mipLevel = region->imageSubresource.mipLevel,
|
||
.baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
|
||
.layerCount = 1,
|
||
},
|
||
.srcOffsets = {
|
||
{
|
||
DIV_ROUND_UP(region->imageOffset.x, block_width),
|
||
DIV_ROUND_UP(region->imageOffset.y, block_height),
|
||
region->imageOffset.z + i,
|
||
},
|
||
{
|
||
DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
|
||
block_width),
|
||
DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
|
||
block_height),
|
||
region->imageOffset.z + i + 1,
|
||
},
|
||
},
|
||
.dstSubresource = {
|
||
.aspectMask = copy_aspect,
|
||
.mipLevel = 0,
|
||
.baseArrayLayer = 0,
|
||
.layerCount = 1,
|
||
},
|
||
.dstOffsets = {
|
||
{ 0, 0, 0 },
|
||
{
|
||
DIV_ROUND_UP(region->imageExtent.width, block_width),
|
||
DIV_ROUND_UP(region->imageExtent.height, block_height),
|
||
1
|
||
},
|
||
},
|
||
};
|
||
|
||
handled = blit_shader(cmd_buffer,
|
||
v3dv_image_from_handle(buffer_image), dst_format,
|
||
image, src_format,
|
||
cmask, &cswizzle,
|
||
&blit_region, VK_FILTER_NEAREST, false);
|
||
if (!handled) {
|
||
/* This is unexpected, we should have a supported blit spec */
|
||
unreachable("Unable to blit buffer to destination image");
|
||
return false;
|
||
}
|
||
}
|
||
|
||
assert(handled);
|
||
return true;
|
||
}
|
||
|
||
static VkFormat
|
||
get_compatible_tlb_format(VkFormat format)
|
||
{
|
||
switch (format) {
|
||
case VK_FORMAT_R8G8B8A8_SNORM:
|
||
return VK_FORMAT_R8G8B8A8_UINT;
|
||
|
||
case VK_FORMAT_R8G8_SNORM:
|
||
return VK_FORMAT_R8G8_UINT;
|
||
|
||
case VK_FORMAT_R8_SNORM:
|
||
return VK_FORMAT_R8_UINT;
|
||
|
||
case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
|
||
return VK_FORMAT_A8B8G8R8_UINT_PACK32;
|
||
|
||
case VK_FORMAT_R16_UNORM:
|
||
case VK_FORMAT_R16_SNORM:
|
||
return VK_FORMAT_R16_UINT;
|
||
|
||
case VK_FORMAT_R16G16_UNORM:
|
||
case VK_FORMAT_R16G16_SNORM:
|
||
return VK_FORMAT_R16G16_UINT;
|
||
|
||
case VK_FORMAT_R16G16B16A16_UNORM:
|
||
case VK_FORMAT_R16G16B16A16_SNORM:
|
||
return VK_FORMAT_R16G16B16A16_UINT;
|
||
|
||
case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
|
||
return VK_FORMAT_R32_SFLOAT;
|
||
|
||
/* We can't render to compressed formats using the TLB so instead we use
|
||
* a compatible format with the same bpp as the compressed format. Because
|
||
* the compressed format's bpp is for a full block (i.e. 4x4 pixels in the
|
||
* case of ETC), when we implement copies with the compatible format we
|
||
* will have to divide offsets and dimensions on the compressed image by
|
||
* the compressed block size.
|
||
*/
|
||
case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
|
||
case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
|
||
case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
|
||
case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
|
||
return VK_FORMAT_R32G32B32A32_UINT;
|
||
|
||
case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
|
||
case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
|
||
case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
|
||
case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
|
||
case VK_FORMAT_EAC_R11_UNORM_BLOCK:
|
||
case VK_FORMAT_EAC_R11_SNORM_BLOCK:
|
||
return VK_FORMAT_R16G16B16A16_UINT;
|
||
|
||
default:
|
||
return VK_FORMAT_UNDEFINED;
|
||
}
|
||
}
|
||
|
||
static inline bool
|
||
can_use_tlb(struct v3dv_image *image,
|
||
const VkOffset3D *offset,
|
||
VkFormat *compat_format)
|
||
{
|
||
if (offset->x != 0 || offset->y != 0)
|
||
return false;
|
||
|
||
if (image->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) {
|
||
if (compat_format)
|
||
*compat_format = image->vk_format;
|
||
return true;
|
||
}
|
||
|
||
/* If the image format is not TLB-supported, then check if we can use
|
||
* a compatible format instead.
|
||
*/
|
||
if (compat_format) {
|
||
*compat_format = get_compatible_tlb_format(image->vk_format);
|
||
if (*compat_format != VK_FORMAT_UNDEFINED)
|
||
return true;
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
void
|
||
v3dv_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
|
||
VkImage srcImage,
|
||
VkImageLayout srcImageLayout,
|
||
VkBuffer destBuffer,
|
||
uint32_t regionCount,
|
||
const VkBufferImageCopy *pRegions)
|
||
{
|
||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
||
V3DV_FROM_HANDLE(v3dv_image, image, srcImage);
|
||
V3DV_FROM_HANDLE(v3dv_buffer, buffer, destBuffer);
|
||
|
||
assert(image->samples == VK_SAMPLE_COUNT_1_BIT);
|
||
|
||
for (uint32_t i = 0; i < regionCount; i++) {
|
||
if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, &pRegions[i]))
|
||
continue;
|
||
if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, &pRegions[i]))
|
||
continue;
|
||
unreachable("Unsupported image to buffer copy.");
|
||
}
|
||
}
|
||
|
||
static void
|
||
emit_copy_image_layer_per_tile_list(struct v3dv_job *job,
|
||
struct framebuffer_data *framebuffer,
|
||
struct v3dv_image *dst,
|
||
struct v3dv_image *src,
|
||
uint32_t layer_offset,
|
||
const VkImageCopy *region)
|
||
{
|
||
struct v3dv_cl *cl = &job->indirect;
|
||
v3dv_cl_ensure_space(cl, 200, 1);
|
||
v3dv_return_if_oom(NULL, job);
|
||
|
||
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
|
||
|
||
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
|
||
|
||
assert((src->type != VK_IMAGE_TYPE_3D &&
|
||
layer_offset < region->srcSubresource.layerCount) ||
|
||
layer_offset < src->extent.depth);
|
||
|
||
const uint32_t src_layer = src->type != VK_IMAGE_TYPE_3D ?
|
||
region->srcSubresource.baseArrayLayer + layer_offset :
|
||
region->srcOffset.z + layer_offset;
|
||
|
||
emit_image_load(cl, framebuffer, src,
|
||
region->srcSubresource.aspectMask,
|
||
src_layer,
|
||
region->srcSubresource.mipLevel,
|
||
false, false);
|
||
|
||
cl_emit(cl, END_OF_LOADS, end);
|
||
|
||
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
|
||
|
||
assert((dst->type != VK_IMAGE_TYPE_3D &&
|
||
layer_offset < region->dstSubresource.layerCount) ||
|
||
layer_offset < dst->extent.depth);
|
||
|
||
const uint32_t dst_layer = dst->type != VK_IMAGE_TYPE_3D ?
|
||
region->dstSubresource.baseArrayLayer + layer_offset :
|
||
region->dstOffset.z + layer_offset;
|
||
|
||
emit_image_store(cl, framebuffer, dst,
|
||
region->dstSubresource.aspectMask,
|
||
dst_layer,
|
||
region->dstSubresource.mipLevel,
|
||
false, false, false);
|
||
|
||
cl_emit(cl, END_OF_TILE_MARKER, end);
|
||
|
||
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
|
||
|
||
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
|
||
branch.start = tile_list_start;
|
||
branch.end = v3dv_cl_get_address(cl);
|
||
}
|
||
}
|
||
|
||
static void
|
||
emit_copy_image_layer(struct v3dv_job *job,
|
||
struct v3dv_image *dst,
|
||
struct v3dv_image *src,
|
||
struct framebuffer_data *framebuffer,
|
||
uint32_t layer,
|
||
const VkImageCopy *region)
|
||
{
|
||
emit_frame_setup(job, layer, NULL);
|
||
emit_copy_image_layer_per_tile_list(job, framebuffer, dst, src, layer, region);
|
||
emit_supertile_coordinates(job, framebuffer);
|
||
}
|
||
|
||
static void
|
||
emit_copy_image_rcl(struct v3dv_job *job,
|
||
struct v3dv_image *dst,
|
||
struct v3dv_image *src,
|
||
struct framebuffer_data *framebuffer,
|
||
const VkImageCopy *region)
|
||
{
|
||
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
|
||
v3dv_return_if_oom(NULL, job);
|
||
|
||
for (int layer = 0; layer < job->frame_tiling.layers; layer++)
|
||
emit_copy_image_layer(job, dst, src, framebuffer, layer, region);
|
||
cl_emit(rcl, END_OF_RENDERING, end);
|
||
}
|
||
|
||
/* Disable level 0 write, just write following mipmaps */
|
||
#define V3D_TFU_IOA_DIMTW (1 << 0)
|
||
#define V3D_TFU_IOA_FORMAT_SHIFT 3
|
||
#define V3D_TFU_IOA_FORMAT_LINEARTILE 3
|
||
#define V3D_TFU_IOA_FORMAT_UBLINEAR_1_COLUMN 4
|
||
#define V3D_TFU_IOA_FORMAT_UBLINEAR_2_COLUMN 5
|
||
#define V3D_TFU_IOA_FORMAT_UIF_NO_XOR 6
|
||
#define V3D_TFU_IOA_FORMAT_UIF_XOR 7
|
||
|
||
#define V3D_TFU_ICFG_NUMMM_SHIFT 5
|
||
#define V3D_TFU_ICFG_TTYPE_SHIFT 9
|
||
|
||
#define V3D_TFU_ICFG_OPAD_SHIFT 22
|
||
|
||
#define V3D_TFU_ICFG_FORMAT_SHIFT 18
|
||
#define V3D_TFU_ICFG_FORMAT_RASTER 0
|
||
#define V3D_TFU_ICFG_FORMAT_SAND_128 1
|
||
#define V3D_TFU_ICFG_FORMAT_SAND_256 2
|
||
#define V3D_TFU_ICFG_FORMAT_LINEARTILE 11
|
||
#define V3D_TFU_ICFG_FORMAT_UBLINEAR_1_COLUMN 12
|
||
#define V3D_TFU_ICFG_FORMAT_UBLINEAR_2_COLUMN 13
|
||
#define V3D_TFU_ICFG_FORMAT_UIF_NO_XOR 14
|
||
#define V3D_TFU_ICFG_FORMAT_UIF_XOR 15
|
||
|
||
static void
|
||
emit_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_image *dst,
|
||
uint32_t dst_mip_level,
|
||
uint32_t dst_layer,
|
||
struct v3dv_image *src,
|
||
uint32_t src_mip_level,
|
||
uint32_t src_layer,
|
||
uint32_t width,
|
||
uint32_t height,
|
||
const struct v3dv_format *format)
|
||
{
|
||
const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level];
|
||
const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level];
|
||
|
||
assert(dst->mem && dst->mem->bo);
|
||
const struct v3dv_bo *dst_bo = dst->mem->bo;
|
||
|
||
assert(src->mem && src->mem->bo);
|
||
const struct v3dv_bo *src_bo = src->mem->bo;
|
||
|
||
struct drm_v3d_submit_tfu tfu = {
|
||
.ios = (height << 16) | width,
|
||
.bo_handles = {
|
||
dst_bo->handle,
|
||
src_bo->handle != dst_bo->handle ? src_bo->handle : 0
|
||
},
|
||
};
|
||
|
||
const uint32_t src_offset =
|
||
src_bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer);
|
||
tfu.iia |= src_offset;
|
||
|
||
uint32_t icfg;
|
||
if (src_slice->tiling == VC5_TILING_RASTER) {
|
||
icfg = V3D_TFU_ICFG_FORMAT_RASTER;
|
||
} else {
|
||
icfg = V3D_TFU_ICFG_FORMAT_LINEARTILE +
|
||
(src_slice->tiling - VC5_TILING_LINEARTILE);
|
||
}
|
||
tfu.icfg |= icfg << V3D_TFU_ICFG_FORMAT_SHIFT;
|
||
|
||
const uint32_t dst_offset =
|
||
dst_bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer);
|
||
tfu.ioa |= dst_offset;
|
||
|
||
tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE +
|
||
(dst_slice->tiling - VC5_TILING_LINEARTILE)) <<
|
||
V3D_TFU_IOA_FORMAT_SHIFT;
|
||
tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT;
|
||
|
||
switch (src_slice->tiling) {
|
||
case VC5_TILING_UIF_NO_XOR:
|
||
case VC5_TILING_UIF_XOR:
|
||
tfu.iis |= src_slice->padded_height / (2 * v3d_utile_height(src->cpp));
|
||
break;
|
||
case VC5_TILING_RASTER:
|
||
tfu.iis |= src_slice->stride / src->cpp;
|
||
break;
|
||
default:
|
||
break;
|
||
}
|
||
|
||
/* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
|
||
* OPAD field for the destination (how many extra UIF blocks beyond
|
||
* those necessary to cover the height).
|
||
*/
|
||
if (dst_slice->tiling == VC5_TILING_UIF_NO_XOR ||
|
||
dst_slice->tiling == VC5_TILING_UIF_XOR) {
|
||
uint32_t uif_block_h = 2 * v3d_utile_height(dst->cpp);
|
||
uint32_t implicit_padded_height = align(height, uif_block_h);
|
||
uint32_t icfg =
|
||
(dst_slice->padded_height - implicit_padded_height) / uif_block_h;
|
||
tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT;
|
||
}
|
||
|
||
v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
|
||
}
|
||
|
||
/**
|
||
* Returns true if the implementation supports the requested operation (even if
|
||
* it failed to process it, for example, due to an out-of-memory error).
|
||
*/
|
||
static bool
|
||
copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_image *dst,
|
||
struct v3dv_image *src,
|
||
const VkImageCopy *region)
|
||
{
|
||
/* Destination can't be raster format */
|
||
if (dst->tiling == VK_IMAGE_TILING_LINEAR)
|
||
return false;
|
||
|
||
/* We can only do full copies, so if the format is D24S8 both aspects need
|
||
* to be copied. We only need to check the dst format because the spec
|
||
* states that depth/stencil formats must match exactly.
|
||
*/
|
||
if (dst->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
|
||
const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
|
||
VK_IMAGE_ASPECT_STENCIL_BIT;
|
||
if (region->dstSubresource.aspectMask != ds_aspects)
|
||
return false;
|
||
}
|
||
|
||
/* Don't handle copies between uncompressed and compressed formats for now.
|
||
*
|
||
* FIXME: we should be able to handle these easily but there is no coverage
|
||
* in CTS at the moment that make such copies with full images (which we
|
||
* require here), only partial copies. Also, in that case the code below that
|
||
* checks for "dst image complete" requires some changes, since it is
|
||
* checking against the region dimensions, which are in units of the source
|
||
* image format.
|
||
*/
|
||
if (vk_format_is_compressed(dst->vk_format) !=
|
||
vk_format_is_compressed(src->vk_format)) {
|
||
return false;
|
||
}
|
||
|
||
/* Source region must start at (0,0) */
|
||
if (region->srcOffset.x != 0 || region->srcOffset.y != 0)
|
||
return false;
|
||
|
||
/* Destination image must be complete */
|
||
if (region->dstOffset.x != 0 || region->dstOffset.y != 0)
|
||
return false;
|
||
|
||
const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
|
||
uint32_t dst_width = u_minify(dst->extent.width, dst_mip_level);
|
||
uint32_t dst_height = u_minify(dst->extent.height, dst_mip_level);
|
||
if (region->extent.width != dst_width || region->extent.height != dst_height)
|
||
return false;
|
||
|
||
/* From vkCmdCopyImage:
|
||
*
|
||
* "When copying between compressed and uncompressed formats the extent
|
||
* members represent the texel dimensions of the source image and not
|
||
* the destination."
|
||
*/
|
||
const uint32_t block_w = vk_format_get_blockwidth(src->vk_format);
|
||
const uint32_t block_h = vk_format_get_blockheight(src->vk_format);
|
||
uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
|
||
uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
|
||
|
||
/* Account for sample count */
|
||
assert(dst->samples == src->samples);
|
||
if (dst->samples > VK_SAMPLE_COUNT_1_BIT) {
|
||
assert(dst->samples == VK_SAMPLE_COUNT_4_BIT);
|
||
width *= 2;
|
||
height *= 2;
|
||
}
|
||
|
||
/* The TFU unit doesn't handle format conversions so we need the formats to
|
||
* match. On the other hand, vkCmdCopyImage allows different color formats
|
||
* on the source and destination images, but only if they are texel
|
||
* compatible. For us, this means that we can effectively ignore different
|
||
* formats and just make the copy using either of them, since we are just
|
||
* moving raw data and not making any conversions.
|
||
*
|
||
* Also, the formats supported by the TFU unit are limited, but again, since
|
||
* we are only doing raw copies here without interpreting or converting
|
||
* the underlying pixel data according to its format, we can always choose
|
||
* to use compatible formats that are supported with the TFU unit.
|
||
*/
|
||
assert(dst->cpp == src->cpp);
|
||
const struct v3dv_format *format =
|
||
v3dv_get_compatible_tfu_format(&cmd_buffer->device->devinfo,
|
||
dst->cpp, NULL);
|
||
|
||
/* Emit a TFU job for each layer to blit */
|
||
const uint32_t layer_count = dst->type != VK_IMAGE_TYPE_3D ?
|
||
region->dstSubresource.layerCount :
|
||
region->extent.depth;
|
||
const uint32_t src_mip_level = region->srcSubresource.mipLevel;
|
||
|
||
const uint32_t base_src_layer = src->type != VK_IMAGE_TYPE_3D ?
|
||
region->srcSubresource.baseArrayLayer : region->srcOffset.z;
|
||
const uint32_t base_dst_layer = dst->type != VK_IMAGE_TYPE_3D ?
|
||
region->dstSubresource.baseArrayLayer : region->dstOffset.z;
|
||
for (uint32_t i = 0; i < layer_count; i++) {
|
||
emit_tfu_job(cmd_buffer,
|
||
dst, dst_mip_level, base_dst_layer + i,
|
||
src, src_mip_level, base_src_layer + i,
|
||
width, height, format);
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
/**
|
||
* Returns true if the implementation supports the requested operation (even if
|
||
* it failed to process it, for example, due to an out-of-memory error).
|
||
*/
|
||
static bool
|
||
copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_image *dst,
|
||
struct v3dv_image *src,
|
||
const VkImageCopy *region)
|
||
{
|
||
VkFormat fb_format;
|
||
if (!can_use_tlb(src, ®ion->srcOffset, &fb_format) ||
|
||
!can_use_tlb(dst, ®ion->dstOffset, &fb_format)) {
|
||
return false;
|
||
}
|
||
|
||
/* From the Vulkan spec, VkImageCopy valid usage:
|
||
*
|
||
* "If neither the calling command’s srcImage nor the calling command’s
|
||
* dstImage has a multi-planar image format then the aspectMask member
|
||
* of srcSubresource and dstSubresource must match."
|
||
*/
|
||
assert(region->dstSubresource.aspectMask ==
|
||
region->srcSubresource.aspectMask);
|
||
uint32_t internal_type, internal_bpp;
|
||
get_internal_type_bpp_for_image_aspects(fb_format,
|
||
region->dstSubresource.aspectMask,
|
||
&internal_type, &internal_bpp);
|
||
|
||
/* From the Vulkan spec with VK_KHR_maintenance1, VkImageCopy valid usage:
|
||
*
|
||
* "The number of slices of the extent (for 3D) or layers of the
|
||
* srcSubresource (for non-3D) must match the number of slices of the
|
||
* extent (for 3D) or layers of the dstSubresource (for non-3D)."
|
||
*/
|
||
assert((src->type != VK_IMAGE_TYPE_3D ?
|
||
region->srcSubresource.layerCount : region->extent.depth) ==
|
||
(dst->type != VK_IMAGE_TYPE_3D ?
|
||
region->dstSubresource.layerCount : region->extent.depth));
|
||
uint32_t num_layers;
|
||
if (dst->type != VK_IMAGE_TYPE_3D)
|
||
num_layers = region->dstSubresource.layerCount;
|
||
else
|
||
num_layers = region->extent.depth;
|
||
assert(num_layers > 0);
|
||
|
||
struct v3dv_job *job =
|
||
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
|
||
if (!job)
|
||
return true;
|
||
|
||
/* Handle copy to compressed image using compatible format */
|
||
const uint32_t block_w = vk_format_get_blockwidth(dst->vk_format);
|
||
const uint32_t block_h = vk_format_get_blockheight(dst->vk_format);
|
||
const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
|
||
const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
|
||
|
||
v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp,
|
||
src->samples > VK_SAMPLE_COUNT_1_BIT);
|
||
|
||
struct framebuffer_data framebuffer;
|
||
setup_framebuffer_data(&framebuffer, fb_format, internal_type,
|
||
&job->frame_tiling);
|
||
|
||
v3dv_job_emit_binning_flush(job);
|
||
emit_copy_image_rcl(job, dst, src, &framebuffer, region);
|
||
|
||
v3dv_cmd_buffer_finish_job(cmd_buffer);
|
||
|
||
return true;
|
||
}
|
||
|
||
/**
|
||
* Takes the image provided as argument and creates a new image that has
|
||
* the same specification and aliases the same memory storage, except that:
|
||
*
|
||
* - It has the uncompressed format passed in.
|
||
* - Its original width/height are scaled by the factors passed in.
|
||
*
|
||
* This is useful to implement copies from compressed images using the blit
|
||
* path. The idea is that we create uncompressed "image views" of both the
|
||
* source and destination images using the uncompressed format and then we
|
||
* define the copy blit in terms of that format.
|
||
*/
|
||
static struct v3dv_image *
|
||
create_image_alias(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_image *src,
|
||
float width_scale,
|
||
float height_scale,
|
||
VkFormat format)
|
||
{
|
||
assert(!vk_format_is_compressed(format));
|
||
|
||
VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
|
||
|
||
VkImageCreateInfo info = {
|
||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||
.imageType = src->type,
|
||
.format = format,
|
||
.extent = {
|
||
.width = src->extent.width * width_scale,
|
||
.height = src->extent.height * height_scale,
|
||
.depth = src->extent.depth,
|
||
},
|
||
.mipLevels = src->levels,
|
||
.arrayLayers = src->array_size,
|
||
.samples = src->samples,
|
||
.tiling = src->tiling,
|
||
.usage = src->usage,
|
||
};
|
||
|
||
VkImage _image;
|
||
VkResult result =
|
||
v3dv_CreateImage(_device, &info, &cmd_buffer->device->vk.alloc, &_image);
|
||
if (result != VK_SUCCESS) {
|
||
v3dv_flag_oom(cmd_buffer, NULL);
|
||
return NULL;
|
||
}
|
||
|
||
struct v3dv_image *image = v3dv_image_from_handle(_image);
|
||
image->mem = src->mem;
|
||
image->mem_offset = src->mem_offset;
|
||
return image;
|
||
}
|
||
|
||
/**
|
||
* Returns true if the implementation supports the requested operation (even if
|
||
* it failed to process it, for example, due to an out-of-memory error).
|
||
*/
|
||
static bool
|
||
copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_image *dst,
|
||
struct v3dv_image *src,
|
||
const VkImageCopy *region)
|
||
{
|
||
const uint32_t src_block_w = vk_format_get_blockwidth(src->vk_format);
|
||
const uint32_t src_block_h = vk_format_get_blockheight(src->vk_format);
|
||
const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk_format);
|
||
const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk_format);
|
||
const float block_scale_w = (float)src_block_w / (float)dst_block_w;
|
||
const float block_scale_h = (float)src_block_h / (float)dst_block_h;
|
||
|
||
/* We need to choose a single format for the blit to ensure that this is
|
||
* really a copy and there are not format conversions going on. Since we
|
||
* going to blit, we need to make sure that the selected format can be
|
||
* both rendered to and textured from.
|
||
*/
|
||
VkFormat format;
|
||
float src_scale_w = 1.0f;
|
||
float src_scale_h = 1.0f;
|
||
float dst_scale_w = block_scale_w;
|
||
float dst_scale_h = block_scale_h;
|
||
if (vk_format_is_compressed(src->vk_format)) {
|
||
/* If we are copying from a compressed format we should be aware that we
|
||
* are going to texture from the source image, and the texture setup
|
||
* knows the actual size of the image, so we need to choose a format
|
||
* that has a per-texel (not per-block) bpp that is compatible for that
|
||
* image size. For example, for a source image with size Bw*WxBh*H
|
||
* and format ETC2_RGBA8_UNORM copied to a WxH image of format RGBA32UI,
|
||
* each of the Bw*WxBh*H texels in the compressed source image is 8-bit
|
||
* (which translates to a 128-bit 4x4 RGBA32 block when uncompressed),
|
||
* so we could specify a blit with size Bw*WxBh*H and a format with
|
||
* a bpp of 8-bit per texel (R8_UINT).
|
||
*
|
||
* Unfortunately, when copying from a format like ETC2_RGB8A1_UNORM,
|
||
* which is 64-bit per texel, then we would need a 4-bit format, which
|
||
* we don't have, so instead we still choose an 8-bit format, but we
|
||
* apply a divisor to the row dimensions of the blit, since we are
|
||
* copying two texels per item.
|
||
*
|
||
* Generally, we can choose any format so long as we compute appropriate
|
||
* divisors for the width and height depending on the source image's
|
||
* bpp.
|
||
*/
|
||
assert(src->cpp == dst->cpp);
|
||
|
||
uint32_t divisor_w, divisor_h;
|
||
format = VK_FORMAT_R32G32_UINT;
|
||
switch (src->cpp) {
|
||
case 16:
|
||
format = VK_FORMAT_R32G32B32A32_UINT;
|
||
divisor_w = 4;
|
||
divisor_h = 4;
|
||
break;
|
||
case 8:
|
||
format = VK_FORMAT_R16G16B16A16_UINT;
|
||
divisor_w = 4;
|
||
divisor_h = 4;
|
||
break;
|
||
default:
|
||
unreachable("Unsupported compressed format");
|
||
}
|
||
|
||
/* Create image views of the src/dst images that we can interpret in
|
||
* terms of the canonical format.
|
||
*/
|
||
src_scale_w /= divisor_w;
|
||
src_scale_h /= divisor_h;
|
||
dst_scale_w /= divisor_w;
|
||
dst_scale_h /= divisor_h;
|
||
|
||
src = create_image_alias(cmd_buffer, src,
|
||
src_scale_w, src_scale_h, format);
|
||
|
||
dst = create_image_alias(cmd_buffer, dst,
|
||
dst_scale_w, dst_scale_h, format);
|
||
} else {
|
||
format = src->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ?
|
||
src->vk_format : get_compatible_tlb_format(src->vk_format);
|
||
if (format == VK_FORMAT_UNDEFINED)
|
||
return false;
|
||
|
||
const struct v3dv_format *f = v3dv_get_format(format);
|
||
if (!f->supported || f->tex_type == TEXTURE_DATA_FORMAT_NO)
|
||
return false;
|
||
}
|
||
|
||
/* Given an uncompressed image with size WxH, if we copy it to a compressed
|
||
* image, it will result in an image with size W*bWxH*bH, where bW and bH
|
||
* are the compressed format's block width and height. This means that
|
||
* copies between compressed and uncompressed images involve different
|
||
* image sizes, and therefore, we need to take that into account when
|
||
* setting up the source and destination blit regions below, so they are
|
||
* consistent from the point of view of the single compatible format
|
||
* selected for the copy.
|
||
*
|
||
* We should take into account that the dimensions of the region provided
|
||
* to the copy command are specified in terms of the source image. With that
|
||
* in mind, below we adjust the blit destination region to be consistent with
|
||
* the source region for the compatible format, so basically, we apply
|
||
* the block scale factor to the destination offset provided by the copy
|
||
* command (because it is specified in terms of the destination image, not
|
||
* the source), and then we just add the region copy dimensions to that
|
||
* (since the region dimensions are already specified in terms of the source
|
||
* image).
|
||
*/
|
||
const VkOffset3D src_start = {
|
||
region->srcOffset.x * src_scale_w,
|
||
region->srcOffset.y * src_scale_h,
|
||
region->srcOffset.z,
|
||
};
|
||
const VkOffset3D src_end = {
|
||
src_start.x + region->extent.width * src_scale_w,
|
||
src_start.y + region->extent.height * src_scale_h,
|
||
src_start.z + region->extent.depth,
|
||
};
|
||
|
||
const VkOffset3D dst_start = {
|
||
region->dstOffset.x * dst_scale_w,
|
||
region->dstOffset.y * dst_scale_h,
|
||
region->dstOffset.z,
|
||
};
|
||
const VkOffset3D dst_end = {
|
||
dst_start.x + region->extent.width * src_scale_w,
|
||
dst_start.y + region->extent.height * src_scale_h,
|
||
dst_start.z + region->extent.depth,
|
||
};
|
||
|
||
const VkImageBlit blit_region = {
|
||
.srcSubresource = region->srcSubresource,
|
||
.srcOffsets = { src_start, src_end },
|
||
.dstSubresource = region->dstSubresource,
|
||
.dstOffsets = { dst_start, dst_end },
|
||
};
|
||
bool handled = blit_shader(cmd_buffer,
|
||
dst, format,
|
||
src, format,
|
||
0, NULL,
|
||
&blit_region, VK_FILTER_NEAREST, true);
|
||
|
||
/* We should have selected formats that we can blit */
|
||
assert(handled);
|
||
return handled;
|
||
}
|
||
|
||
void
|
||
v3dv_CmdCopyImage(VkCommandBuffer commandBuffer,
|
||
VkImage srcImage,
|
||
VkImageLayout srcImageLayout,
|
||
VkImage dstImage,
|
||
VkImageLayout dstImageLayout,
|
||
uint32_t regionCount,
|
||
const VkImageCopy *pRegions)
|
||
{
|
||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
||
V3DV_FROM_HANDLE(v3dv_image, src, srcImage);
|
||
V3DV_FROM_HANDLE(v3dv_image, dst, dstImage);
|
||
|
||
assert(src->samples == dst->samples);
|
||
|
||
for (uint32_t i = 0; i < regionCount; i++) {
|
||
if (copy_image_tfu(cmd_buffer, dst, src, &pRegions[i]))
|
||
continue;
|
||
if (copy_image_tlb(cmd_buffer, dst, src, &pRegions[i]))
|
||
continue;
|
||
if (copy_image_blit(cmd_buffer, dst, src, &pRegions[i]))
|
||
continue;
|
||
unreachable("Image copy not supported");
|
||
}
|
||
}
|
||
|
||
static void
|
||
emit_clear_image_per_tile_list(struct v3dv_job *job,
|
||
struct framebuffer_data *framebuffer,
|
||
struct v3dv_image *image,
|
||
VkImageAspectFlags aspects,
|
||
uint32_t layer,
|
||
uint32_t level)
|
||
{
|
||
struct v3dv_cl *cl = &job->indirect;
|
||
v3dv_cl_ensure_space(cl, 200, 1);
|
||
v3dv_return_if_oom(NULL, job);
|
||
|
||
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
|
||
|
||
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
|
||
|
||
cl_emit(cl, END_OF_LOADS, end);
|
||
|
||
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
|
||
|
||
emit_image_store(cl, framebuffer, image, aspects, layer, level,
|
||
false, false, false);
|
||
|
||
cl_emit(cl, END_OF_TILE_MARKER, end);
|
||
|
||
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
|
||
|
||
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
|
||
branch.start = tile_list_start;
|
||
branch.end = v3dv_cl_get_address(cl);
|
||
}
|
||
}
|
||
|
||
static void
|
||
emit_clear_image(struct v3dv_job *job,
|
||
struct v3dv_image *image,
|
||
struct framebuffer_data *framebuffer,
|
||
VkImageAspectFlags aspects,
|
||
uint32_t layer,
|
||
uint32_t level)
|
||
{
|
||
emit_clear_image_per_tile_list(job, framebuffer, image, aspects, layer, level);
|
||
emit_supertile_coordinates(job, framebuffer);
|
||
}
|
||
|
||
static void
|
||
emit_clear_image_rcl(struct v3dv_job *job,
|
||
struct v3dv_image *image,
|
||
struct framebuffer_data *framebuffer,
|
||
const union v3dv_clear_value *clear_value,
|
||
VkImageAspectFlags aspects,
|
||
uint32_t layer,
|
||
uint32_t level)
|
||
{
|
||
const struct rcl_clear_info clear_info = {
|
||
.clear_value = clear_value,
|
||
.image = image,
|
||
.aspects = aspects,
|
||
.layer = layer,
|
||
.level = level,
|
||
};
|
||
|
||
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
|
||
v3dv_return_if_oom(NULL, job);
|
||
|
||
emit_frame_setup(job, 0, clear_value);
|
||
emit_clear_image(job, image, framebuffer, aspects, layer, level);
|
||
cl_emit(rcl, END_OF_RENDERING, end);
|
||
}
|
||
|
||
static void
|
||
get_hw_clear_color(const VkClearColorValue *color,
|
||
VkFormat fb_format,
|
||
VkFormat image_format,
|
||
uint32_t internal_type,
|
||
uint32_t internal_bpp,
|
||
uint32_t *hw_color)
|
||
{
|
||
const uint32_t internal_size = 4 << internal_bpp;
|
||
|
||
/* If the image format doesn't match the framebuffer format, then we are
|
||
* trying to clear an unsupported tlb format using a compatible
|
||
* format for the framebuffer. In this case, we want to make sure that
|
||
* we pack the clear value according to the original format semantics,
|
||
* not the compatible format.
|
||
*/
|
||
if (fb_format == image_format) {
|
||
v3dv_get_hw_clear_color(color, internal_type, internal_size, hw_color);
|
||
} else {
|
||
union util_color uc;
|
||
enum pipe_format pipe_image_format =
|
||
vk_format_to_pipe_format(image_format);
|
||
util_pack_color(color->float32, pipe_image_format, &uc);
|
||
memcpy(hw_color, uc.ui, internal_size);
|
||
}
|
||
}
|
||
|
||
/* Returns true if the implementation is able to handle the case, false
|
||
* otherwise.
|
||
*/
|
||
static bool
|
||
clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_image *image,
|
||
const VkClearValue *clear_value,
|
||
const VkImageSubresourceRange *range)
|
||
{
|
||
const VkOffset3D origin = { 0, 0, 0 };
|
||
VkFormat fb_format;
|
||
if (!can_use_tlb(image, &origin, &fb_format))
|
||
return false;
|
||
|
||
uint32_t internal_type, internal_bpp;
|
||
get_internal_type_bpp_for_image_aspects(fb_format, range->aspectMask,
|
||
&internal_type, &internal_bpp);
|
||
|
||
union v3dv_clear_value hw_clear_value = { 0 };
|
||
if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
|
||
get_hw_clear_color(&clear_value->color, fb_format, image->vk_format,
|
||
internal_type, internal_bpp, &hw_clear_value.color[0]);
|
||
} else {
|
||
assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ||
|
||
(range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT));
|
||
hw_clear_value.z = clear_value->depthStencil.depth;
|
||
hw_clear_value.s = clear_value->depthStencil.stencil;
|
||
}
|
||
|
||
uint32_t level_count = range->levelCount == VK_REMAINING_MIP_LEVELS ?
|
||
image->levels - range->baseMipLevel :
|
||
range->levelCount;
|
||
uint32_t min_level = range->baseMipLevel;
|
||
uint32_t max_level = range->baseMipLevel + level_count;
|
||
|
||
/* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively.
|
||
* Instead, we need to consider the full depth dimension of the image, which
|
||
* goes from 0 up to the level's depth extent.
|
||
*/
|
||
uint32_t min_layer;
|
||
uint32_t max_layer;
|
||
if (image->type != VK_IMAGE_TYPE_3D) {
|
||
uint32_t layer_count = range->layerCount == VK_REMAINING_ARRAY_LAYERS ?
|
||
image->array_size - range->baseArrayLayer :
|
||
range->layerCount;
|
||
min_layer = range->baseArrayLayer;
|
||
max_layer = range->baseArrayLayer + layer_count;
|
||
} else {
|
||
min_layer = 0;
|
||
max_layer = 0;
|
||
}
|
||
|
||
for (uint32_t level = min_level; level < max_level; level++) {
|
||
if (image->type == VK_IMAGE_TYPE_3D)
|
||
max_layer = u_minify(image->extent.depth, level);
|
||
for (uint32_t layer = min_layer; layer < max_layer; layer++) {
|
||
uint32_t width = u_minify(image->extent.width, level);
|
||
uint32_t height = u_minify(image->extent.height, level);
|
||
|
||
struct v3dv_job *job =
|
||
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
|
||
|
||
if (!job)
|
||
return true;
|
||
|
||
/* We start a a new job for each layer so the frame "depth" is 1 */
|
||
v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp,
|
||
image->samples > VK_SAMPLE_COUNT_1_BIT);
|
||
|
||
struct framebuffer_data framebuffer;
|
||
setup_framebuffer_data(&framebuffer, fb_format, internal_type,
|
||
&job->frame_tiling);
|
||
|
||
v3dv_job_emit_binning_flush(job);
|
||
|
||
/* If this triggers it is an application bug: the spec requires
|
||
* that any aspects to clear are present in the image.
|
||
*/
|
||
assert(range->aspectMask & image->aspects);
|
||
|
||
emit_clear_image_rcl(job, image, &framebuffer, &hw_clear_value,
|
||
range->aspectMask, layer, level);
|
||
|
||
v3dv_cmd_buffer_finish_job(cmd_buffer);
|
||
}
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
void
|
||
v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,
|
||
VkImage _image,
|
||
VkImageLayout imageLayout,
|
||
const VkClearColorValue *pColor,
|
||
uint32_t rangeCount,
|
||
const VkImageSubresourceRange *pRanges)
|
||
{
|
||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
||
V3DV_FROM_HANDLE(v3dv_image, image, _image);
|
||
|
||
const VkClearValue clear_value = {
|
||
.color = *pColor,
|
||
};
|
||
|
||
for (uint32_t i = 0; i < rangeCount; i++) {
|
||
if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
|
||
continue;
|
||
unreachable("Unsupported color clear.");
|
||
}
|
||
}
|
||
|
||
void
|
||
v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
|
||
VkImage _image,
|
||
VkImageLayout imageLayout,
|
||
const VkClearDepthStencilValue *pDepthStencil,
|
||
uint32_t rangeCount,
|
||
const VkImageSubresourceRange *pRanges)
|
||
{
|
||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
||
V3DV_FROM_HANDLE(v3dv_image, image, _image);
|
||
|
||
const VkClearValue clear_value = {
|
||
.depthStencil = *pDepthStencil,
|
||
};
|
||
|
||
for (uint32_t i = 0; i < rangeCount; i++) {
|
||
if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
|
||
continue;
|
||
unreachable("Unsupported depth/stencil clear.");
|
||
}
|
||
}
|
||
|
||
static void
|
||
emit_copy_buffer_per_tile_list(struct v3dv_job *job,
|
||
struct v3dv_bo *dst,
|
||
struct v3dv_bo *src,
|
||
uint32_t dst_offset,
|
||
uint32_t src_offset,
|
||
uint32_t stride,
|
||
uint32_t format)
|
||
{
|
||
struct v3dv_cl *cl = &job->indirect;
|
||
v3dv_cl_ensure_space(cl, 200, 1);
|
||
v3dv_return_if_oom(NULL, job);
|
||
|
||
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
|
||
|
||
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
|
||
|
||
emit_linear_load(cl, RENDER_TARGET_0, src, src_offset, stride, format);
|
||
|
||
cl_emit(cl, END_OF_LOADS, end);
|
||
|
||
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
|
||
|
||
emit_linear_store(cl, RENDER_TARGET_0,
|
||
dst, dst_offset, stride, false, format);
|
||
|
||
cl_emit(cl, END_OF_TILE_MARKER, end);
|
||
|
||
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
|
||
|
||
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
|
||
branch.start = tile_list_start;
|
||
branch.end = v3dv_cl_get_address(cl);
|
||
}
|
||
}
|
||
|
||
static void
|
||
emit_copy_buffer(struct v3dv_job *job,
|
||
struct v3dv_bo *dst,
|
||
struct v3dv_bo *src,
|
||
uint32_t dst_offset,
|
||
uint32_t src_offset,
|
||
struct framebuffer_data *framebuffer,
|
||
uint32_t format)
|
||
{
|
||
const uint32_t stride = job->frame_tiling.width * 4;
|
||
emit_copy_buffer_per_tile_list(job, dst, src,
|
||
dst_offset, src_offset,
|
||
stride, format);
|
||
emit_supertile_coordinates(job, framebuffer);
|
||
}
|
||
|
||
static void
|
||
emit_copy_buffer_rcl(struct v3dv_job *job,
|
||
struct v3dv_bo *dst,
|
||
struct v3dv_bo *src,
|
||
uint32_t dst_offset,
|
||
uint32_t src_offset,
|
||
struct framebuffer_data *framebuffer,
|
||
uint32_t format)
|
||
{
|
||
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
|
||
v3dv_return_if_oom(NULL, job);
|
||
|
||
emit_frame_setup(job, 0, NULL);
|
||
emit_copy_buffer(job, dst, src, dst_offset, src_offset, framebuffer, format);
|
||
cl_emit(rcl, END_OF_RENDERING, end);
|
||
}
|
||
|
||
/* Figure out a TLB size configuration for a number of pixels to process.
|
||
* Beware that we can't "render" more than 4096x4096 pixels in a single job,
|
||
* if the pixel count is larger than this, the caller might need to split
|
||
* the job and call this function multiple times.
|
||
*/
|
||
static void
|
||
framebuffer_size_for_pixel_count(uint32_t num_pixels,
|
||
uint32_t *width,
|
||
uint32_t *height)
|
||
{
|
||
assert(num_pixels > 0);
|
||
|
||
const uint32_t max_dim_pixels = 4096;
|
||
const uint32_t max_pixels = max_dim_pixels * max_dim_pixels;
|
||
|
||
uint32_t w, h;
|
||
if (num_pixels > max_pixels) {
|
||
w = max_dim_pixels;
|
||
h = max_dim_pixels;
|
||
} else {
|
||
w = num_pixels;
|
||
h = 1;
|
||
while (w > max_dim_pixels || ((w % 2) == 0 && w > 2 * h)) {
|
||
w >>= 1;
|
||
h <<= 1;
|
||
}
|
||
}
|
||
assert(w <= max_dim_pixels && h <= max_dim_pixels);
|
||
assert(w * h <= num_pixels);
|
||
assert(w > 0 && h > 0);
|
||
|
||
*width = w;
|
||
*height = h;
|
||
}
|
||
|
||
static struct v3dv_job *
|
||
copy_buffer(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_bo *dst,
|
||
uint32_t dst_offset,
|
||
struct v3dv_bo *src,
|
||
uint32_t src_offset,
|
||
const VkBufferCopy *region)
|
||
{
|
||
const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
|
||
const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
|
||
|
||
/* Select appropriate pixel format for the copy operation based on the
|
||
* size to copy and the alignment of the source and destination offsets.
|
||
*/
|
||
src_offset += region->srcOffset;
|
||
dst_offset += region->dstOffset;
|
||
uint32_t item_size = 4;
|
||
while (item_size > 1 &&
|
||
(src_offset % item_size != 0 || dst_offset % item_size != 0)) {
|
||
item_size /= 2;
|
||
}
|
||
|
||
while (item_size > 1 && region->size % item_size != 0)
|
||
item_size /= 2;
|
||
|
||
assert(region->size % item_size == 0);
|
||
uint32_t num_items = region->size / item_size;
|
||
assert(num_items > 0);
|
||
|
||
uint32_t format;
|
||
VkFormat vk_format;
|
||
switch (item_size) {
|
||
case 4:
|
||
format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
|
||
vk_format = VK_FORMAT_R8G8B8A8_UINT;
|
||
break;
|
||
case 2:
|
||
format = V3D_OUTPUT_IMAGE_FORMAT_RG8UI;
|
||
vk_format = VK_FORMAT_R8G8_UINT;
|
||
break;
|
||
default:
|
||
format = V3D_OUTPUT_IMAGE_FORMAT_R8UI;
|
||
vk_format = VK_FORMAT_R8_UINT;
|
||
break;
|
||
}
|
||
|
||
struct v3dv_job *job = NULL;
|
||
while (num_items > 0) {
|
||
job = v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
|
||
if (!job)
|
||
return NULL;
|
||
|
||
uint32_t width, height;
|
||
framebuffer_size_for_pixel_count(num_items, &width, &height);
|
||
|
||
v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp, false);
|
||
|
||
struct framebuffer_data framebuffer;
|
||
setup_framebuffer_data(&framebuffer, vk_format, internal_type,
|
||
&job->frame_tiling);
|
||
|
||
v3dv_job_emit_binning_flush(job);
|
||
|
||
emit_copy_buffer_rcl(job, dst, src, dst_offset, src_offset,
|
||
&framebuffer, format);
|
||
|
||
v3dv_cmd_buffer_finish_job(cmd_buffer);
|
||
|
||
const uint32_t items_copied = width * height;
|
||
const uint32_t bytes_copied = items_copied * item_size;
|
||
num_items -= items_copied;
|
||
src_offset += bytes_copied;
|
||
dst_offset += bytes_copied;
|
||
}
|
||
|
||
return job;
|
||
}
|
||
|
||
void
|
||
v3dv_CmdCopyBuffer(VkCommandBuffer commandBuffer,
|
||
VkBuffer srcBuffer,
|
||
VkBuffer dstBuffer,
|
||
uint32_t regionCount,
|
||
const VkBufferCopy *pRegions)
|
||
{
|
||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
||
V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, srcBuffer);
|
||
V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
|
||
|
||
for (uint32_t i = 0; i < regionCount; i++) {
|
||
copy_buffer(cmd_buffer,
|
||
dst_buffer->mem->bo, dst_buffer->mem_offset,
|
||
src_buffer->mem->bo, src_buffer->mem_offset,
|
||
&pRegions[i]);
|
||
}
|
||
}
|
||
|
||
static void
|
||
destroy_update_buffer_cb(VkDevice _device,
|
||
uint64_t pobj,
|
||
VkAllocationCallbacks *alloc)
|
||
{
|
||
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
||
struct v3dv_bo *bo = (struct v3dv_bo *)((uintptr_t) pobj);
|
||
v3dv_bo_free(device, bo);
|
||
}
|
||
|
||
void
|
||
v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
|
||
VkBuffer dstBuffer,
|
||
VkDeviceSize dstOffset,
|
||
VkDeviceSize dataSize,
|
||
const void *pData)
|
||
{
|
||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
||
V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
|
||
|
||
struct v3dv_bo *src_bo =
|
||
v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true);
|
||
if (!src_bo) {
|
||
fprintf(stderr, "Failed to allocate BO for vkCmdUpdateBuffer.\n");
|
||
return;
|
||
}
|
||
|
||
bool ok = v3dv_bo_map(cmd_buffer->device, src_bo, src_bo->size);
|
||
if (!ok) {
|
||
fprintf(stderr, "Failed to map BO for vkCmdUpdateBuffer.\n");
|
||
return;
|
||
}
|
||
|
||
memcpy(src_bo->map, pData, dataSize);
|
||
|
||
v3dv_bo_unmap(cmd_buffer->device, src_bo);
|
||
|
||
VkBufferCopy region = {
|
||
.srcOffset = 0,
|
||
.dstOffset = dstOffset,
|
||
.size = dataSize,
|
||
};
|
||
struct v3dv_job *copy_job =
|
||
copy_buffer(cmd_buffer,
|
||
dst_buffer->mem->bo, dst_buffer->mem_offset,
|
||
src_bo, 0,
|
||
®ion);
|
||
if (!copy_job)
|
||
return;
|
||
|
||
v3dv_cmd_buffer_add_private_obj(
|
||
cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);
|
||
}
|
||
|
||
static void
|
||
emit_fill_buffer_per_tile_list(struct v3dv_job *job,
|
||
struct v3dv_bo *bo,
|
||
uint32_t offset,
|
||
uint32_t stride)
|
||
{
|
||
struct v3dv_cl *cl = &job->indirect;
|
||
v3dv_cl_ensure_space(cl, 200, 1);
|
||
v3dv_return_if_oom(NULL, job);
|
||
|
||
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
|
||
|
||
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
|
||
|
||
cl_emit(cl, END_OF_LOADS, end);
|
||
|
||
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
|
||
|
||
emit_linear_store(cl, RENDER_TARGET_0, bo, offset, stride, false,
|
||
V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI);
|
||
|
||
cl_emit(cl, END_OF_TILE_MARKER, end);
|
||
|
||
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
|
||
|
||
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
|
||
branch.start = tile_list_start;
|
||
branch.end = v3dv_cl_get_address(cl);
|
||
}
|
||
}
|
||
|
||
static void
|
||
emit_fill_buffer(struct v3dv_job *job,
|
||
struct v3dv_bo *bo,
|
||
uint32_t offset,
|
||
struct framebuffer_data *framebuffer)
|
||
{
|
||
const uint32_t stride = job->frame_tiling.width * 4;
|
||
emit_fill_buffer_per_tile_list(job, bo, offset, stride);
|
||
emit_supertile_coordinates(job, framebuffer);
|
||
}
|
||
|
||
static void
|
||
emit_fill_buffer_rcl(struct v3dv_job *job,
|
||
struct v3dv_bo *bo,
|
||
uint32_t offset,
|
||
struct framebuffer_data *framebuffer,
|
||
uint32_t data)
|
||
{
|
||
const union v3dv_clear_value clear_value = {
|
||
.color = { data, 0, 0, 0 },
|
||
};
|
||
|
||
const struct rcl_clear_info clear_info = {
|
||
.clear_value = &clear_value,
|
||
.image = NULL,
|
||
.aspects = VK_IMAGE_ASPECT_COLOR_BIT,
|
||
.layer = 0,
|
||
.level = 0,
|
||
};
|
||
|
||
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
|
||
v3dv_return_if_oom(NULL, job);
|
||
|
||
emit_frame_setup(job, 0, &clear_value);
|
||
emit_fill_buffer(job, bo, offset, framebuffer);
|
||
cl_emit(rcl, END_OF_RENDERING, end);
|
||
}
|
||
|
||
static void
|
||
fill_buffer(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_bo *bo,
|
||
uint32_t offset,
|
||
uint32_t size,
|
||
uint32_t data)
|
||
{
|
||
assert(size > 0 && size % 4 == 0);
|
||
assert(offset + size <= bo->size);
|
||
|
||
const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
|
||
const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
|
||
uint32_t num_items = size / 4;
|
||
|
||
while (num_items > 0) {
|
||
struct v3dv_job *job =
|
||
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
|
||
if (!job)
|
||
return;
|
||
|
||
uint32_t width, height;
|
||
framebuffer_size_for_pixel_count(num_items, &width, &height);
|
||
|
||
v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp, false);
|
||
|
||
struct framebuffer_data framebuffer;
|
||
setup_framebuffer_data(&framebuffer, VK_FORMAT_R8G8B8A8_UINT,
|
||
internal_type, &job->frame_tiling);
|
||
|
||
v3dv_job_emit_binning_flush(job);
|
||
|
||
emit_fill_buffer_rcl(job, bo, offset, &framebuffer, data);
|
||
|
||
v3dv_cmd_buffer_finish_job(cmd_buffer);
|
||
|
||
const uint32_t items_copied = width * height;
|
||
const uint32_t bytes_copied = items_copied * 4;
|
||
num_items -= items_copied;
|
||
offset += bytes_copied;
|
||
}
|
||
}
|
||
|
||
void
|
||
v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,
|
||
VkBuffer dstBuffer,
|
||
VkDeviceSize dstOffset,
|
||
VkDeviceSize size,
|
||
uint32_t data)
|
||
{
|
||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
||
V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
|
||
|
||
struct v3dv_bo *bo = dst_buffer->mem->bo;
|
||
|
||
/* From the Vulkan spec:
|
||
*
|
||
* "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not
|
||
* a multiple of 4, then the nearest smaller multiple is used."
|
||
*/
|
||
if (size == VK_WHOLE_SIZE) {
|
||
size = dst_buffer->size - dstOffset;
|
||
size -= size % 4;
|
||
}
|
||
|
||
fill_buffer(cmd_buffer, bo, dstOffset, size, data);
|
||
}
|
||
|
||
/**
|
||
* Returns true if the implementation supports the requested operation (even if
|
||
* it failed to process it, for example, due to an out-of-memory error).
|
||
*/
|
||
static bool
|
||
copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_image *image,
|
||
struct v3dv_buffer *buffer,
|
||
const VkBufferImageCopy *region)
|
||
{
|
||
assert(image->samples == VK_SAMPLE_COUNT_1_BIT);
|
||
|
||
/* Destination can't be raster format */
|
||
if (image->tiling == VK_IMAGE_TILING_LINEAR)
|
||
return false;
|
||
|
||
/* We can't copy D24S8 because buffer to image copies only copy one aspect
|
||
* at a time, and the TFU copies full images. Also, V3D depth bits for
|
||
* both D24S8 and D24X8 stored in the 24-bit MSB of each 32-bit word, but
|
||
* the Vulkan spec has the buffer data specified the other way around, so it
|
||
* is not a straight copy, we would havew to swizzle the channels, which the
|
||
* TFU can't do.
|
||
*/
|
||
if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
|
||
image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) {
|
||
return false;
|
||
}
|
||
|
||
/* Region must include full slice */
|
||
const uint32_t offset_x = region->imageOffset.x;
|
||
const uint32_t offset_y = region->imageOffset.y;
|
||
if (offset_x != 0 || offset_y != 0)
|
||
return false;
|
||
|
||
uint32_t width, height;
|
||
if (region->bufferRowLength == 0)
|
||
width = region->imageExtent.width;
|
||
else
|
||
width = region->bufferRowLength;
|
||
|
||
if (region->bufferImageHeight == 0)
|
||
height = region->imageExtent.height;
|
||
else
|
||
height = region->bufferImageHeight;
|
||
|
||
if (width != image->extent.width || height != image->extent.height)
|
||
return false;
|
||
|
||
/* Handle region semantics for compressed images */
|
||
const uint32_t block_w = vk_format_get_blockwidth(image->vk_format);
|
||
const uint32_t block_h = vk_format_get_blockheight(image->vk_format);
|
||
width = DIV_ROUND_UP(width, block_w);
|
||
height = DIV_ROUND_UP(height, block_h);
|
||
|
||
/* Format must be supported for texturing via the TFU. Since we are just
|
||
* copying raw data and not converting between pixel formats, we can ignore
|
||
* the image's format and choose a compatible TFU format for the image
|
||
* texel size instead, which expands the list of formats we can handle here.
|
||
*/
|
||
const struct v3dv_format *format =
|
||
v3dv_get_compatible_tfu_format(&cmd_buffer->device->devinfo,
|
||
image->cpp, NULL);
|
||
|
||
const uint32_t mip_level = region->imageSubresource.mipLevel;
|
||
const struct v3d_resource_slice *slice = &image->slices[mip_level];
|
||
|
||
uint32_t num_layers;
|
||
if (image->type != VK_IMAGE_TYPE_3D)
|
||
num_layers = region->imageSubresource.layerCount;
|
||
else
|
||
num_layers = region->imageExtent.depth;
|
||
assert(num_layers > 0);
|
||
|
||
assert(image->mem && image->mem->bo);
|
||
const struct v3dv_bo *dst_bo = image->mem->bo;
|
||
|
||
assert(buffer->mem && buffer->mem->bo);
|
||
const struct v3dv_bo *src_bo = buffer->mem->bo;
|
||
|
||
/* Emit a TFU job per layer to copy */
|
||
const uint32_t buffer_stride = width * image->cpp;
|
||
for (int i = 0; i < num_layers; i++) {
|
||
uint32_t layer = region->imageSubresource.baseArrayLayer + i;
|
||
|
||
struct drm_v3d_submit_tfu tfu = {
|
||
.ios = (height << 16) | width,
|
||
.bo_handles = {
|
||
dst_bo->handle,
|
||
src_bo->handle != dst_bo->handle ? src_bo->handle : 0
|
||
},
|
||
};
|
||
|
||
const uint32_t buffer_offset =
|
||
buffer->mem_offset + region->bufferOffset +
|
||
height * buffer_stride * i;
|
||
|
||
const uint32_t src_offset = src_bo->offset + buffer_offset;
|
||
tfu.iia |= src_offset;
|
||
tfu.icfg |= V3D_TFU_ICFG_FORMAT_RASTER << V3D_TFU_ICFG_FORMAT_SHIFT;
|
||
tfu.iis |= width;
|
||
|
||
const uint32_t dst_offset =
|
||
dst_bo->offset + v3dv_layer_offset(image, mip_level, layer);
|
||
tfu.ioa |= dst_offset;
|
||
|
||
tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE +
|
||
(slice->tiling - VC5_TILING_LINEARTILE)) <<
|
||
V3D_TFU_IOA_FORMAT_SHIFT;
|
||
tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT;
|
||
|
||
/* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
|
||
* OPAD field for the destination (how many extra UIF blocks beyond
|
||
* those necessary to cover the height).
|
||
*/
|
||
if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
|
||
slice->tiling == VC5_TILING_UIF_XOR) {
|
||
uint32_t uif_block_h = 2 * v3d_utile_height(image->cpp);
|
||
uint32_t implicit_padded_height = align(height, uif_block_h);
|
||
uint32_t icfg =
|
||
(slice->padded_height - implicit_padded_height) / uif_block_h;
|
||
tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT;
|
||
}
|
||
|
||
v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
static void
|
||
emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job,
|
||
struct framebuffer_data *framebuffer,
|
||
struct v3dv_image *image,
|
||
struct v3dv_buffer *buffer,
|
||
uint32_t layer,
|
||
const VkBufferImageCopy *region)
|
||
{
|
||
struct v3dv_cl *cl = &job->indirect;
|
||
v3dv_cl_ensure_space(cl, 200, 1);
|
||
v3dv_return_if_oom(NULL, job);
|
||
|
||
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
|
||
|
||
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
|
||
|
||
const VkImageSubresourceLayers *imgrsc = ®ion->imageSubresource;
|
||
assert((image->type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) ||
|
||
layer < image->extent.depth);
|
||
|
||
/* Load TLB from buffer */
|
||
uint32_t width, height;
|
||
if (region->bufferRowLength == 0)
|
||
width = region->imageExtent.width;
|
||
else
|
||
width = region->bufferRowLength;
|
||
|
||
if (region->bufferImageHeight == 0)
|
||
height = region->imageExtent.height;
|
||
else
|
||
height = region->bufferImageHeight;
|
||
|
||
/* Handle copy to compressed format using a compatible format */
|
||
width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk_format));
|
||
height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk_format));
|
||
|
||
uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
|
||
1 : image->cpp;
|
||
uint32_t buffer_stride = width * cpp;
|
||
uint32_t buffer_offset =
|
||
buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer;
|
||
|
||
uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask,
|
||
false, false, true);
|
||
|
||
emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo,
|
||
buffer_offset, buffer_stride, format);
|
||
|
||
/* Because we can't do raster loads/stores of Z/S formats we need to
|
||
* use a color tile buffer with a compatible RGBA color format instead.
|
||
* However, when we are uploading a single aspect to a combined
|
||
* depth/stencil image we have the problem that our tile buffer stores don't
|
||
* allow us to mask out the other aspect, so we always write all four RGBA
|
||
* channels to the image and we end up overwriting that other aspect with
|
||
* undefined values. To work around that, we first load the aspect we are
|
||
* not copying from the image memory into a proper Z/S tile buffer. Then we
|
||
* do our store from the color buffer for the aspect we are copying, and
|
||
* after that, we do another store from the Z/S tile buffer to restore the
|
||
* other aspect to its original value.
|
||
*/
|
||
if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
|
||
if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
|
||
emit_image_load(cl, framebuffer, image, VK_IMAGE_ASPECT_STENCIL_BIT,
|
||
imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
|
||
false, false);
|
||
} else {
|
||
assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
|
||
emit_image_load(cl, framebuffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
|
||
imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
|
||
false, false);
|
||
}
|
||
}
|
||
|
||
cl_emit(cl, END_OF_LOADS, end);
|
||
|
||
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
|
||
|
||
/* Store TLB to image */
|
||
emit_image_store(cl, framebuffer, image, imgrsc->aspectMask,
|
||
imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
|
||
false, true, false);
|
||
|
||
if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
|
||
if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
|
||
emit_image_store(cl, framebuffer, image, VK_IMAGE_ASPECT_STENCIL_BIT,
|
||
imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
|
||
false, false, false);
|
||
} else {
|
||
assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
|
||
emit_image_store(cl, framebuffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
|
||
imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
|
||
false, false, false);
|
||
}
|
||
}
|
||
|
||
cl_emit(cl, END_OF_TILE_MARKER, end);
|
||
|
||
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
|
||
|
||
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
|
||
branch.start = tile_list_start;
|
||
branch.end = v3dv_cl_get_address(cl);
|
||
}
|
||
}
|
||
|
||
static void
|
||
emit_copy_buffer_to_layer(struct v3dv_job *job,
|
||
struct v3dv_image *image,
|
||
struct v3dv_buffer *buffer,
|
||
struct framebuffer_data *framebuffer,
|
||
uint32_t layer,
|
||
const VkBufferImageCopy *region)
|
||
{
|
||
emit_frame_setup(job, layer, NULL);
|
||
emit_copy_buffer_to_layer_per_tile_list(job, framebuffer, image, buffer,
|
||
layer, region);
|
||
emit_supertile_coordinates(job, framebuffer);
|
||
}
|
||
|
||
static void
|
||
emit_copy_buffer_to_image_rcl(struct v3dv_job *job,
|
||
struct v3dv_image *image,
|
||
struct v3dv_buffer *buffer,
|
||
struct framebuffer_data *framebuffer,
|
||
const VkBufferImageCopy *region)
|
||
{
|
||
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
|
||
v3dv_return_if_oom(NULL, job);
|
||
|
||
for (int layer = 0; layer < job->frame_tiling.layers; layer++)
|
||
emit_copy_buffer_to_layer(job, image, buffer, framebuffer, layer, region);
|
||
cl_emit(rcl, END_OF_RENDERING, end);
|
||
}
|
||
|
||
/**
|
||
* Returns true if the implementation supports the requested operation (even if
|
||
* it failed to process it, for example, due to an out-of-memory error).
|
||
*/
|
||
static bool
|
||
copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_image *image,
|
||
struct v3dv_buffer *buffer,
|
||
const VkBufferImageCopy *region)
|
||
{
|
||
VkFormat fb_format;
|
||
if (!can_use_tlb(image, ®ion->imageOffset, &fb_format))
|
||
return false;
|
||
|
||
uint32_t internal_type, internal_bpp;
|
||
get_internal_type_bpp_for_image_aspects(fb_format,
|
||
region->imageSubresource.aspectMask,
|
||
&internal_type, &internal_bpp);
|
||
|
||
uint32_t num_layers;
|
||
if (image->type != VK_IMAGE_TYPE_3D)
|
||
num_layers = region->imageSubresource.layerCount;
|
||
else
|
||
num_layers = region->imageExtent.depth;
|
||
assert(num_layers > 0);
|
||
|
||
struct v3dv_job *job =
|
||
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
|
||
if (!job)
|
||
return true;
|
||
|
||
/* Handle copy to compressed format using a compatible format */
|
||
const uint32_t block_w = vk_format_get_blockwidth(image->vk_format);
|
||
const uint32_t block_h = vk_format_get_blockheight(image->vk_format);
|
||
const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
|
||
const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
|
||
|
||
v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, false);
|
||
|
||
struct framebuffer_data framebuffer;
|
||
setup_framebuffer_data(&framebuffer, fb_format, internal_type,
|
||
&job->frame_tiling);
|
||
|
||
v3dv_job_emit_binning_flush(job);
|
||
emit_copy_buffer_to_image_rcl(job, image, buffer, &framebuffer, region);
|
||
|
||
v3dv_cmd_buffer_finish_job(cmd_buffer);
|
||
|
||
return true;
|
||
}
|
||
|
||
static bool
|
||
create_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_image *image,
|
||
struct v3dv_buffer *buffer,
|
||
const VkBufferImageCopy *region)
|
||
{
|
||
if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, region))
|
||
return true;
|
||
if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, region))
|
||
return true;
|
||
return false;
|
||
}
|
||
|
||
static VkResult
|
||
create_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
|
||
{
|
||
/* If this is not the first pool we create for this command buffer
|
||
* size it based on the size of the currently exhausted pool.
|
||
*/
|
||
uint32_t descriptor_count = 64;
|
||
if (cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE) {
|
||
struct v3dv_descriptor_pool *exhausted_pool =
|
||
v3dv_descriptor_pool_from_handle(cmd_buffer->meta.texel_buffer_copy.dspool);
|
||
descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
|
||
}
|
||
|
||
/* Create the descriptor pool */
|
||
cmd_buffer->meta.texel_buffer_copy.dspool = VK_NULL_HANDLE;
|
||
VkDescriptorPoolSize pool_size = {
|
||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
|
||
.descriptorCount = descriptor_count,
|
||
};
|
||
VkDescriptorPoolCreateInfo info = {
|
||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||
.maxSets = descriptor_count,
|
||
.poolSizeCount = 1,
|
||
.pPoolSizes = &pool_size,
|
||
.flags = 0,
|
||
};
|
||
VkResult result =
|
||
v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
|
||
&info,
|
||
&cmd_buffer->device->vk.alloc,
|
||
&cmd_buffer->meta.texel_buffer_copy.dspool);
|
||
|
||
if (result == VK_SUCCESS) {
|
||
assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
|
||
const VkDescriptorPool _pool = cmd_buffer->meta.texel_buffer_copy.dspool;
|
||
|
||
v3dv_cmd_buffer_add_private_obj(
|
||
cmd_buffer, (uintptr_t) _pool,
|
||
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
|
||
|
||
struct v3dv_descriptor_pool *pool =
|
||
v3dv_descriptor_pool_from_handle(_pool);
|
||
pool->is_driver_internal = true;
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
static VkResult
|
||
allocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
|
||
VkDescriptorSet *set)
|
||
{
|
||
/* Make sure we have a descriptor pool */
|
||
VkResult result;
|
||
if (cmd_buffer->meta.texel_buffer_copy.dspool == VK_NULL_HANDLE) {
|
||
result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
|
||
if (result != VK_SUCCESS)
|
||
return result;
|
||
}
|
||
assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
|
||
|
||
/* Allocate descriptor set */
|
||
struct v3dv_device *device = cmd_buffer->device;
|
||
VkDevice _device = v3dv_device_to_handle(device);
|
||
VkDescriptorSetAllocateInfo info = {
|
||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
||
.descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool,
|
||
.descriptorSetCount = 1,
|
||
.pSetLayouts = &device->meta.texel_buffer_copy.ds_layout,
|
||
};
|
||
result = v3dv_AllocateDescriptorSets(_device, &info, set);
|
||
|
||
/* If we ran out of pool space, grow the pool and try again */
|
||
if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
|
||
result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
|
||
if (result == VK_SUCCESS) {
|
||
info.descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool;
|
||
result = v3dv_AllocateDescriptorSets(_device, &info, set);
|
||
}
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
static void
|
||
get_texel_buffer_copy_pipeline_cache_key(VkFormat format,
|
||
VkColorComponentFlags cmask,
|
||
VkComponentMapping *cswizzle,
|
||
uint8_t *key)
|
||
{
|
||
memset(key, 0, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
|
||
|
||
uint32_t *p = (uint32_t *) key;
|
||
|
||
*p = format;
|
||
p++;
|
||
|
||
*p = cmask;
|
||
p++;
|
||
|
||
memcpy(p, cswizzle, sizeof(VkComponentMapping));
|
||
p += sizeof(VkComponentMapping) / sizeof(uint32_t);
|
||
|
||
assert(((uint8_t*)p - key) == V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
|
||
}
|
||
|
||
static bool
|
||
create_blit_render_pass(struct v3dv_device *device,
|
||
VkFormat dst_format,
|
||
VkFormat src_format,
|
||
VkRenderPass *pass_load,
|
||
VkRenderPass *pass_no_load);
|
||
|
||
static nir_ssa_def *gen_rect_vertices(nir_builder *b);
|
||
|
||
static bool
|
||
create_pipeline(struct v3dv_device *device,
|
||
struct v3dv_render_pass *pass,
|
||
struct nir_shader *vs_nir,
|
||
struct nir_shader *fs_nir,
|
||
const VkPipelineVertexInputStateCreateInfo *vi_state,
|
||
const VkPipelineDepthStencilStateCreateInfo *ds_state,
|
||
const VkPipelineColorBlendStateCreateInfo *cb_state,
|
||
const VkPipelineMultisampleStateCreateInfo *ms_state,
|
||
const VkPipelineLayout layout,
|
||
VkPipeline *pipeline);
|
||
|
||
static nir_shader *
|
||
get_texel_buffer_copy_vs()
|
||
{
|
||
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
|
||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
|
||
"meta texel buffer copy vs");
|
||
nir_variable *vs_out_pos =
|
||
nir_variable_create(b.shader, nir_var_shader_out,
|
||
glsl_vec4_type(), "gl_Position");
|
||
vs_out_pos->data.location = VARYING_SLOT_POS;
|
||
|
||
nir_ssa_def *pos = gen_rect_vertices(&b);
|
||
nir_store_var(&b, vs_out_pos, pos, 0xf);
|
||
|
||
return b.shader;
|
||
}
|
||
|
||
static nir_ssa_def *
|
||
load_frag_coord(nir_builder *b)
|
||
{
|
||
nir_foreach_shader_in_variable(var, b->shader) {
|
||
if (var->data.location == VARYING_SLOT_POS)
|
||
return nir_load_var(b, var);
|
||
}
|
||
nir_variable *pos = nir_variable_create(b->shader, nir_var_shader_in,
|
||
glsl_vec4_type(), NULL);
|
||
pos->data.location = VARYING_SLOT_POS;
|
||
return nir_load_var(b, pos);
|
||
}
|
||
|
||
static uint32_t
|
||
component_swizzle_to_nir_swizzle(VkComponentSwizzle comp, VkComponentSwizzle swz)
|
||
{
|
||
if (swz == VK_COMPONENT_SWIZZLE_IDENTITY)
|
||
swz = comp;
|
||
|
||
switch (swz) {
|
||
case VK_COMPONENT_SWIZZLE_R:
|
||
return 0;
|
||
case VK_COMPONENT_SWIZZLE_G:
|
||
return 1;
|
||
case VK_COMPONENT_SWIZZLE_B:
|
||
return 2;
|
||
case VK_COMPONENT_SWIZZLE_A:
|
||
return 3;
|
||
default:
|
||
unreachable("Invalid swizzle");
|
||
};
|
||
}
|
||
|
||
static nir_shader *
|
||
get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
|
||
VkComponentMapping *cswizzle)
|
||
{
|
||
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
|
||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
|
||
"meta texel buffer copy fs");
|
||
|
||
/* We only use the copy from texel buffer shader to implement
|
||
* copy_buffer_to_image_shader, which always selects a compatible integer
|
||
* format for the copy.
|
||
*/
|
||
assert(vk_format_is_int(format));
|
||
|
||
/* Fragment shader output color */
|
||
nir_variable *fs_out_color =
|
||
nir_variable_create(b.shader, nir_var_shader_out,
|
||
glsl_uvec4_type(), "out_color");
|
||
fs_out_color->data.location = FRAG_RESULT_DATA0;
|
||
|
||
/* Texel buffer input */
|
||
const struct glsl_type *sampler_type =
|
||
glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
|
||
nir_variable *sampler =
|
||
nir_variable_create(b.shader, nir_var_uniform, sampler_type, "texel_buf");
|
||
sampler->data.descriptor_set = 0;
|
||
sampler->data.binding = 0;
|
||
|
||
/* Load the box describing the pixel region we want to copy from the
|
||
* texel buffer.
|
||
*/
|
||
nir_ssa_def *box =
|
||
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
|
||
|
||
/* Load the buffer stride (this comes in texel units) */
|
||
nir_ssa_def *stride =
|
||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
|
||
|
||
/* Load the buffer offset (this comes in texel units) */
|
||
nir_ssa_def *offset =
|
||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 20, .range = 4);
|
||
|
||
nir_ssa_def *coord = nir_f2i32(&b, load_frag_coord(&b));
|
||
|
||
/* Load pixel data from texel buffer based on the x,y offset of the pixel
|
||
* within the box. Texel buffers are 1D arrays of texels.
|
||
*
|
||
* Notice that we already make sure that we only generate fragments that are
|
||
* inside the box through the scissor/viewport state, so our offset into the
|
||
* texel buffer should always be within its bounds and we we don't need
|
||
* to add a check for that here.
|
||
*/
|
||
nir_ssa_def *x_offset =
|
||
nir_isub(&b, nir_channel(&b, coord, 0),
|
||
nir_channel(&b, box, 0));
|
||
nir_ssa_def *y_offset =
|
||
nir_isub(&b, nir_channel(&b, coord, 1),
|
||
nir_channel(&b, box, 1));
|
||
nir_ssa_def *texel_offset =
|
||
nir_iadd(&b, nir_iadd(&b, offset, x_offset),
|
||
nir_imul(&b, y_offset, stride));
|
||
|
||
nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
|
||
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
|
||
tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
|
||
tex->op = nir_texop_txf;
|
||
tex->src[0].src_type = nir_tex_src_coord;
|
||
tex->src[0].src = nir_src_for_ssa(texel_offset);
|
||
tex->src[1].src_type = nir_tex_src_texture_deref;
|
||
tex->src[1].src = nir_src_for_ssa(tex_deref);
|
||
tex->dest_type = nir_type_uint32;
|
||
tex->is_array = false;
|
||
tex->coord_components = 1;
|
||
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "texel buffer result");
|
||
nir_builder_instr_insert(&b, &tex->instr);
|
||
|
||
uint32_t swiz[4];
|
||
swiz[0] =
|
||
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_R, cswizzle->r);
|
||
swiz[1] =
|
||
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_G, cswizzle->g);
|
||
swiz[2] =
|
||
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_B, cswizzle->b);
|
||
swiz[3] =
|
||
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_A, cswizzle->a);
|
||
nir_ssa_def *s = nir_swizzle(&b, &tex->dest.ssa, swiz, 4);
|
||
nir_store_var(&b, fs_out_color, s, 0xf);
|
||
|
||
return b.shader;
|
||
}
|
||
|
||
static bool
|
||
create_texel_buffer_copy_pipeline(struct v3dv_device *device,
|
||
VkFormat format,
|
||
VkColorComponentFlags cmask,
|
||
VkComponentMapping *cswizzle,
|
||
VkRenderPass _pass,
|
||
VkPipelineLayout pipeline_layout,
|
||
VkPipeline *pipeline)
|
||
{
|
||
struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
|
||
|
||
assert(vk_format_is_color(format));
|
||
|
||
nir_shader *vs_nir = get_texel_buffer_copy_vs();
|
||
nir_shader *fs_nir = get_texel_buffer_copy_fs(device, format, cswizzle);
|
||
|
||
const VkPipelineVertexInputStateCreateInfo vi_state = {
|
||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||
.vertexBindingDescriptionCount = 0,
|
||
.vertexAttributeDescriptionCount = 0,
|
||
};
|
||
|
||
VkPipelineDepthStencilStateCreateInfo ds_state = {
|
||
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
||
};
|
||
|
||
VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
|
||
blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
|
||
.blendEnable = false,
|
||
.colorWriteMask = cmask,
|
||
};
|
||
|
||
const VkPipelineColorBlendStateCreateInfo cb_state = {
|
||
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
|
||
.logicOpEnable = false,
|
||
.attachmentCount = 1,
|
||
.pAttachments = blend_att_state
|
||
};
|
||
|
||
const VkPipelineMultisampleStateCreateInfo ms_state = {
|
||
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
|
||
.sampleShadingEnable = false,
|
||
.pSampleMask = NULL,
|
||
.alphaToCoverageEnable = false,
|
||
.alphaToOneEnable = false,
|
||
};
|
||
|
||
return create_pipeline(device,
|
||
pass,
|
||
vs_nir, fs_nir,
|
||
&vi_state,
|
||
&ds_state,
|
||
&cb_state,
|
||
&ms_state,
|
||
pipeline_layout,
|
||
pipeline);
|
||
}
|
||
|
||
static bool
|
||
get_copy_texel_buffer_pipeline(
|
||
struct v3dv_device *device,
|
||
VkFormat format,
|
||
VkColorComponentFlags cmask,
|
||
VkComponentMapping *cswizzle,
|
||
VkImageType image_type,
|
||
struct v3dv_meta_texel_buffer_copy_pipeline **pipeline)
|
||
{
|
||
bool ok = true;
|
||
|
||
uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
|
||
get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, key);
|
||
|
||
mtx_lock(&device->meta.mtx);
|
||
struct hash_entry *entry =
|
||
_mesa_hash_table_search(device->meta.texel_buffer_copy.cache[image_type],
|
||
&key);
|
||
if (entry) {
|
||
mtx_unlock(&device->meta.mtx);
|
||
*pipeline = entry->data;
|
||
return true;
|
||
}
|
||
|
||
*pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
|
||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||
|
||
if (*pipeline == NULL)
|
||
goto fail;
|
||
|
||
/* The blit render pass is compatible */
|
||
ok = create_blit_render_pass(device, format, format,
|
||
&(*pipeline)->pass,
|
||
&(*pipeline)->pass_no_load);
|
||
if (!ok)
|
||
goto fail;
|
||
|
||
ok =
|
||
create_texel_buffer_copy_pipeline(device, format, cmask, cswizzle,
|
||
(*pipeline)->pass,
|
||
device->meta.texel_buffer_copy.p_layout,
|
||
&(*pipeline)->pipeline);
|
||
if (!ok)
|
||
goto fail;
|
||
|
||
_mesa_hash_table_insert(device->meta.texel_buffer_copy.cache[image_type],
|
||
&key, *pipeline);
|
||
|
||
mtx_unlock(&device->meta.mtx);
|
||
return true;
|
||
|
||
fail:
|
||
mtx_unlock(&device->meta.mtx);
|
||
|
||
VkDevice _device = v3dv_device_to_handle(device);
|
||
if (*pipeline) {
|
||
if ((*pipeline)->pass)
|
||
v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
|
||
if ((*pipeline)->pipeline)
|
||
v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
|
||
vk_free(&device->vk.alloc, *pipeline);
|
||
*pipeline = NULL;
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
static bool
|
||
texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
|
||
VkImageAspectFlags aspect,
|
||
struct v3dv_image *image,
|
||
VkFormat dst_format,
|
||
VkFormat src_format,
|
||
struct v3dv_buffer *buffer,
|
||
uint32_t buffer_bpp,
|
||
VkColorComponentFlags cmask,
|
||
VkComponentMapping *cswizzle,
|
||
uint32_t region_count,
|
||
const VkBufferImageCopy *regions)
|
||
{
|
||
VkResult result;
|
||
bool handled = false;
|
||
|
||
assert(cswizzle);
|
||
|
||
/* This is a copy path, so we don't handle format conversions. The only
|
||
* exception are stencil to D24S8 copies, which are handled as a color
|
||
* masked R8->RGBA8 copy.
|
||
*/
|
||
assert(src_format == dst_format ||
|
||
(dst_format == VK_FORMAT_R8G8B8A8_UINT &&
|
||
src_format == VK_FORMAT_R8_UINT &&
|
||
cmask == VK_COLOR_COMPONENT_R_BIT));
|
||
|
||
/* We only handle color copies. Callers can copy D/S aspects by using
|
||
* a compatible color format and maybe a cmask/cswizzle for D24 formats.
|
||
*/
|
||
if (aspect != VK_IMAGE_ASPECT_COLOR_BIT)
|
||
return handled;
|
||
|
||
/* FIXME: we only handle uncompressed images for now. */
|
||
if (vk_format_is_compressed(image->vk_format))
|
||
return handled;
|
||
|
||
const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
|
||
VK_COLOR_COMPONENT_G_BIT |
|
||
VK_COLOR_COMPONENT_B_BIT |
|
||
VK_COLOR_COMPONENT_A_BIT;
|
||
if (cmask == 0)
|
||
cmask = full_cmask;
|
||
|
||
/* The buffer needs to have VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT
|
||
* so we can bind it as a texel buffer. Otherwise, the buffer view
|
||
* we create below won't setup the texture state that we need for this.
|
||
*/
|
||
if (!(buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT)) {
|
||
if (v3dv_buffer_format_supports_features(
|
||
src_format, VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT)) {
|
||
buffer->usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
|
||
} else {
|
||
return handled;
|
||
}
|
||
}
|
||
|
||
/* At this point we should be able to handle the copy unless an unexpected
|
||
* error occurs, such as an OOM.
|
||
*/
|
||
handled = true;
|
||
|
||
/* Get the texel buffer copy pipeline */
|
||
struct v3dv_meta_texel_buffer_copy_pipeline *pipeline = NULL;
|
||
bool ok = get_copy_texel_buffer_pipeline(cmd_buffer->device,
|
||
dst_format, cmask, cswizzle,
|
||
image->type, &pipeline);
|
||
if (!ok)
|
||
return handled;
|
||
assert(pipeline && pipeline->pipeline && pipeline->pass);
|
||
|
||
/* Setup descriptor set for the source texel buffer. We don't have to
|
||
* register the descriptor as a private command buffer object since
|
||
* all descriptors will be freed automatically with the descriptor
|
||
* pool.
|
||
*/
|
||
VkDescriptorSet set;
|
||
result = allocate_texel_buffer_copy_descriptor_set(cmd_buffer, &set);
|
||
if (result != VK_SUCCESS)
|
||
return handled;
|
||
|
||
/* FIXME: for some reason passing region->bufferOffset here for the
|
||
* offset field doesn't work, making the following CTS tests fail:
|
||
*
|
||
* dEQP-VK.api.copy_and_blit.core.buffer_to_image.*buffer_offset*
|
||
*
|
||
* So instead we pass 0 here and we pass the offset in texels as a push
|
||
* constant to the shader, which seems to work correctly.
|
||
*/
|
||
VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
|
||
VkBufferViewCreateInfo buffer_view_info = {
|
||
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
|
||
.buffer = v3dv_buffer_to_handle(buffer),
|
||
.format = src_format,
|
||
.offset = 0,
|
||
.range = VK_WHOLE_SIZE,
|
||
};
|
||
|
||
VkBufferView texel_buffer_view;
|
||
result = v3dv_CreateBufferView(_device, &buffer_view_info,
|
||
&cmd_buffer->device->vk.alloc,
|
||
&texel_buffer_view);
|
||
if (result != VK_SUCCESS)
|
||
return handled;
|
||
|
||
v3dv_cmd_buffer_add_private_obj(
|
||
cmd_buffer, (uintptr_t)texel_buffer_view,
|
||
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyBufferView);
|
||
|
||
VkWriteDescriptorSet write = {
|
||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||
.dstSet = set,
|
||
.dstBinding = 0,
|
||
.dstArrayElement = 0,
|
||
.descriptorCount = 1,
|
||
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
|
||
.pTexelBufferView = &texel_buffer_view,
|
||
};
|
||
v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
|
||
|
||
/* Push command buffer state before starting meta operation */
|
||
v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
|
||
uint32_t dirty_dynamic_state = 0;
|
||
|
||
/* Bind common state for all layers and regions */
|
||
VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
|
||
v3dv_CmdBindPipeline(_cmd_buffer,
|
||
VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||
pipeline->pipeline);
|
||
|
||
v3dv_CmdBindDescriptorSets(_cmd_buffer,
|
||
VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||
cmd_buffer->device->meta.texel_buffer_copy.p_layout,
|
||
0, 1, &set,
|
||
0, NULL);
|
||
|
||
/* Compute the number of layers to copy.
|
||
*
|
||
* If we are batching (region_count > 1) all our regions have the same
|
||
* image subresource so we can take this from the first region.
|
||
*/
|
||
const VkImageSubresourceLayers *resource = ®ions[0].imageSubresource;
|
||
uint32_t num_layers;
|
||
if (image->type != VK_IMAGE_TYPE_3D) {
|
||
num_layers = resource->layerCount;
|
||
} else {
|
||
assert(region_count == 1);
|
||
num_layers = regions[0].imageExtent.depth;
|
||
}
|
||
assert(num_layers > 0);
|
||
|
||
/* Sanity check: we can only batch multiple regions together if they have
|
||
* the same framebuffer (so the same layer).
|
||
*/
|
||
assert(num_layers == 1 || region_count == 1);
|
||
|
||
/* For each layer */
|
||
for (uint32_t l = 0; l < num_layers; l++) {
|
||
/* Setup framebuffer for this layer.
|
||
*
|
||
* FIXME: once we support geometry shaders, we should be able to have
|
||
* one layered framebuffer and emit just one draw call for
|
||
* all layers using layered rendering. At that point, we should
|
||
* also be able to batch multi-layered regions as well.
|
||
*/
|
||
VkImageViewCreateInfo image_view_info = {
|
||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||
.image = v3dv_image_to_handle(image),
|
||
.viewType = v3dv_image_type_to_view_type(image->type),
|
||
.format = dst_format,
|
||
.subresourceRange = {
|
||
.aspectMask = aspect,
|
||
.baseMipLevel = resource->mipLevel,
|
||
.levelCount = 1,
|
||
.baseArrayLayer = resource->baseArrayLayer + l,
|
||
.layerCount = 1
|
||
},
|
||
};
|
||
VkImageView image_view;
|
||
result = v3dv_CreateImageView(_device, &image_view_info,
|
||
&cmd_buffer->device->vk.alloc, &image_view);
|
||
if (result != VK_SUCCESS)
|
||
goto fail;
|
||
|
||
v3dv_cmd_buffer_add_private_obj(
|
||
cmd_buffer, (uintptr_t)image_view,
|
||
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
|
||
|
||
VkFramebufferCreateInfo fb_info = {
|
||
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
|
||
.renderPass = pipeline->pass,
|
||
.attachmentCount = 1,
|
||
.pAttachments = &image_view,
|
||
.width = u_minify(image->extent.width, resource->mipLevel),
|
||
.height = u_minify(image->extent.height, resource->mipLevel),
|
||
.layers = 1,
|
||
};
|
||
|
||
VkFramebuffer fb;
|
||
result = v3dv_CreateFramebuffer(_device, &fb_info,
|
||
&cmd_buffer->device->vk.alloc, &fb);
|
||
if (result != VK_SUCCESS)
|
||
goto fail;
|
||
|
||
v3dv_cmd_buffer_add_private_obj(
|
||
cmd_buffer, (uintptr_t)fb,
|
||
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
|
||
|
||
/* Start render pass for this layer.
|
||
*
|
||
* If the we only have one region to copy, then we might be able to
|
||
* skip the TLB load if it is aligned to tile boundaries. All layers
|
||
* copy the same area, so we only need to check this once.
|
||
*/
|
||
bool can_skip_tlb_load = false;
|
||
VkRect2D render_area;
|
||
if (region_count == 1) {
|
||
render_area.offset.x = regions[0].imageOffset.x;
|
||
render_area.offset.y = regions[0].imageOffset.y;
|
||
render_area.extent.width = regions[0].imageExtent.width;
|
||
render_area.extent.height = regions[0].imageExtent.height;
|
||
|
||
if (l == 0) {
|
||
struct v3dv_render_pass *pipeline_pass =
|
||
v3dv_render_pass_from_handle(pipeline->pass);
|
||
can_skip_tlb_load =
|
||
cmask == full_cmask &&
|
||
v3dv_subpass_area_is_tile_aligned(&render_area,
|
||
v3dv_framebuffer_from_handle(fb),
|
||
pipeline_pass, 0);
|
||
}
|
||
} else {
|
||
render_area.offset.x = 0;
|
||
render_area.offset.y = 0;
|
||
render_area.extent.width = fb_info.width;
|
||
render_area.extent.height = fb_info.height;
|
||
}
|
||
|
||
VkRenderPassBeginInfo rp_info = {
|
||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||
.renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
|
||
pipeline->pass,
|
||
.framebuffer = fb,
|
||
.renderArea = render_area,
|
||
.clearValueCount = 0,
|
||
};
|
||
|
||
v3dv_CmdBeginRenderPass(_cmd_buffer, &rp_info, VK_SUBPASS_CONTENTS_INLINE);
|
||
struct v3dv_job *job = cmd_buffer->state.job;
|
||
if (!job)
|
||
goto fail;
|
||
|
||
/* For each region */
|
||
dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
|
||
for (uint32_t r = 0; r < region_count; r++) {
|
||
const VkBufferImageCopy *region = ®ions[r];
|
||
|
||
/* Obtain the 2D buffer region spec */
|
||
uint32_t buf_width, buf_height;
|
||
if (region->bufferRowLength == 0)
|
||
buf_width = region->imageExtent.width;
|
||
else
|
||
buf_width = region->bufferRowLength;
|
||
|
||
if (region->bufferImageHeight == 0)
|
||
buf_height = region->imageExtent.height;
|
||
else
|
||
buf_height = region->bufferImageHeight;
|
||
|
||
const VkViewport viewport = {
|
||
.x = region->imageOffset.x,
|
||
.y = region->imageOffset.y,
|
||
.width = region->imageExtent.width,
|
||
.height = region->imageExtent.height,
|
||
.minDepth = 0.0f,
|
||
.maxDepth = 1.0f
|
||
};
|
||
v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
|
||
const VkRect2D scissor = {
|
||
.offset = { region->imageOffset.x, region->imageOffset.y },
|
||
.extent = { region->imageExtent.width, region->imageExtent.height }
|
||
};
|
||
v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
|
||
|
||
const VkDeviceSize buf_offset =
|
||
region->bufferOffset / buffer_bpp + l * buf_height * buf_width;
|
||
uint32_t push_data[6] = {
|
||
region->imageOffset.x,
|
||
region->imageOffset.y,
|
||
region->imageOffset.x + region->imageExtent.width - 1,
|
||
region->imageOffset.y + region->imageExtent.height - 1,
|
||
buf_width,
|
||
buf_offset,
|
||
};
|
||
|
||
v3dv_CmdPushConstants(_cmd_buffer,
|
||
cmd_buffer->device->meta.texel_buffer_copy.p_layout,
|
||
VK_SHADER_STAGE_FRAGMENT_BIT,
|
||
0, sizeof(push_data), &push_data);
|
||
|
||
v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
|
||
} /* For each region */
|
||
|
||
v3dv_CmdEndRenderPass(_cmd_buffer);
|
||
} /* For each layer */
|
||
|
||
fail:
|
||
v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);
|
||
return handled;
|
||
}
|
||
|
||
/**
|
||
* Returns true if the implementation supports the requested operation (even if
|
||
* it failed to process it, for example, due to an out-of-memory error).
|
||
*/
|
||
static bool
|
||
copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
|
||
VkImageAspectFlags aspect,
|
||
struct v3dv_image *image,
|
||
VkFormat dst_format,
|
||
VkFormat src_format,
|
||
struct v3dv_buffer *buffer,
|
||
uint32_t buffer_bpp,
|
||
VkColorComponentFlags cmask,
|
||
VkComponentMapping *cswizzle,
|
||
uint32_t region_count,
|
||
const VkBufferImageCopy *regions)
|
||
{
|
||
/* Since we can't sample linear images we need to upload the linear
|
||
* buffer to a tiled image that we can use as a blit source, which
|
||
* is slow.
|
||
*/
|
||
perf_debug("Falling back to blit path for buffer to image copy.\n");
|
||
|
||
struct v3dv_device *device = cmd_buffer->device;
|
||
VkDevice _device = v3dv_device_to_handle(device);
|
||
bool handled = true;
|
||
|
||
/* Allocate memory for the tiled image. Since we copy layer by layer
|
||
* we allocate memory to hold a full layer, which is the worse case.
|
||
* For that we create a dummy image with that spec, get memory requirements
|
||
* for it and use that information to create the memory allocation.
|
||
* We will then reuse this memory store for all the regions we want to
|
||
* copy.
|
||
*/
|
||
VkImage dummy_image;
|
||
VkImageCreateInfo dummy_info = {
|
||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||
.imageType = VK_IMAGE_TYPE_2D,
|
||
.format = src_format,
|
||
.extent = { image->extent.width, image->extent.height, 1 },
|
||
.mipLevels = 1,
|
||
.arrayLayers = 1,
|
||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||
.tiling = VK_IMAGE_TILING_OPTIMAL,
|
||
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
||
VK_IMAGE_USAGE_TRANSFER_DST_BIT,
|
||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||
.queueFamilyIndexCount = 0,
|
||
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||
};
|
||
VkResult result =
|
||
v3dv_CreateImage(_device, &dummy_info, &device->vk.alloc, &dummy_image);
|
||
if (result != VK_SUCCESS)
|
||
return handled;
|
||
|
||
VkMemoryRequirements reqs;
|
||
v3dv_GetImageMemoryRequirements(_device, dummy_image, &reqs);
|
||
v3dv_DestroyImage(_device, dummy_image, &device->vk.alloc);
|
||
|
||
VkDeviceMemory mem;
|
||
VkMemoryAllocateInfo alloc_info = {
|
||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||
.allocationSize = reqs.size,
|
||
.memoryTypeIndex = 0,
|
||
};
|
||
result = v3dv_AllocateMemory(_device, &alloc_info, &device->vk.alloc, &mem);
|
||
if (result != VK_SUCCESS)
|
||
return handled;
|
||
|
||
v3dv_cmd_buffer_add_private_obj(
|
||
cmd_buffer, (uintptr_t)mem,
|
||
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_FreeMemory);
|
||
|
||
/* Obtain the layer count.
|
||
*
|
||
* If we are batching (region_count > 1) all our regions have the same
|
||
* image subresource so we can take this from the first region.
|
||
*/
|
||
uint32_t num_layers;
|
||
if (image->type != VK_IMAGE_TYPE_3D)
|
||
num_layers = regions[0].imageSubresource.layerCount;
|
||
else
|
||
num_layers = regions[0].imageExtent.depth;
|
||
assert(num_layers > 0);
|
||
|
||
/* Sanity check: we can only batch multiple regions together if they have
|
||
* the same framebuffer (so the same layer).
|
||
*/
|
||
assert(num_layers == 1 || region_count == 1);
|
||
|
||
const uint32_t block_width = vk_format_get_blockwidth(image->vk_format);
|
||
const uint32_t block_height = vk_format_get_blockheight(image->vk_format);
|
||
|
||
/* Copy regions by uploading each region to a temporary tiled image using
|
||
* the memory we have just allocated as storage.
|
||
*/
|
||
for (uint32_t r = 0; r < region_count; r++) {
|
||
const VkBufferImageCopy *region = ®ions[r];
|
||
|
||
/* Obtain the 2D buffer region spec */
|
||
uint32_t buf_width, buf_height;
|
||
if (region->bufferRowLength == 0)
|
||
buf_width = region->imageExtent.width;
|
||
else
|
||
buf_width = region->bufferRowLength;
|
||
|
||
if (region->bufferImageHeight == 0)
|
||
buf_height = region->imageExtent.height;
|
||
else
|
||
buf_height = region->bufferImageHeight;
|
||
|
||
/* If the image is compressed, the bpp refers to blocks, not pixels */
|
||
buf_width = buf_width / block_width;
|
||
buf_height = buf_height / block_height;
|
||
|
||
for (uint32_t i = 0; i < num_layers; i++) {
|
||
/* Create the tiled image */
|
||
VkImageCreateInfo image_info = {
|
||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||
.imageType = VK_IMAGE_TYPE_2D,
|
||
.format = src_format,
|
||
.extent = { buf_width, buf_height, 1 },
|
||
.mipLevels = 1,
|
||
.arrayLayers = 1,
|
||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||
.tiling = VK_IMAGE_TILING_OPTIMAL,
|
||
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
||
VK_IMAGE_USAGE_TRANSFER_DST_BIT,
|
||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||
.queueFamilyIndexCount = 0,
|
||
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||
};
|
||
|
||
VkImage buffer_image;
|
||
VkResult result =
|
||
v3dv_CreateImage(_device, &image_info, &device->vk.alloc,
|
||
&buffer_image);
|
||
if (result != VK_SUCCESS)
|
||
return handled;
|
||
|
||
v3dv_cmd_buffer_add_private_obj(
|
||
cmd_buffer, (uintptr_t)buffer_image,
|
||
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
|
||
|
||
result = v3dv_BindImageMemory(_device, buffer_image, mem, 0);
|
||
if (result != VK_SUCCESS)
|
||
return handled;
|
||
|
||
/* Upload buffer contents for the selected layer */
|
||
const VkDeviceSize buf_offset_bytes =
|
||
region->bufferOffset + i * buf_height * buf_width * buffer_bpp;
|
||
const VkBufferImageCopy buffer_image_copy = {
|
||
.bufferOffset = buf_offset_bytes,
|
||
.bufferRowLength = region->bufferRowLength / block_width,
|
||
.bufferImageHeight = region->bufferImageHeight / block_height,
|
||
.imageSubresource = {
|
||
.aspectMask = aspect,
|
||
.mipLevel = 0,
|
||
.baseArrayLayer = 0,
|
||
.layerCount = 1,
|
||
},
|
||
.imageOffset = { 0, 0, 0 },
|
||
.imageExtent = { buf_width, buf_height, 1 }
|
||
};
|
||
handled =
|
||
create_tiled_image_from_buffer(cmd_buffer,
|
||
v3dv_image_from_handle(buffer_image),
|
||
buffer, &buffer_image_copy);
|
||
if (!handled) {
|
||
/* This is unexpected, we should have setup the upload to be
|
||
* conformant to a TFU or TLB copy.
|
||
*/
|
||
unreachable("Unable to copy buffer to image through TLB");
|
||
return false;
|
||
}
|
||
|
||
/* Blit-copy the requested image extent from the buffer image to the
|
||
* destination image.
|
||
*
|
||
* Since we are copying, the blit must use the same format on the
|
||
* destination and source images to avoid format conversions. The
|
||
* only exception is copying stencil, which we upload to a R8UI source
|
||
* image, but that we need to blit to a S8D24 destination (the only
|
||
* stencil format we support).
|
||
*/
|
||
const VkImageBlit blit_region = {
|
||
.srcSubresource = {
|
||
.aspectMask = aspect,
|
||
.mipLevel = 0,
|
||
.baseArrayLayer = 0,
|
||
.layerCount = 1,
|
||
},
|
||
.srcOffsets = {
|
||
{ 0, 0, 0 },
|
||
{ region->imageExtent.width, region->imageExtent.height, 1 },
|
||
},
|
||
.dstSubresource = {
|
||
.aspectMask = aspect,
|
||
.mipLevel = region->imageSubresource.mipLevel,
|
||
.baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
|
||
.layerCount = 1,
|
||
},
|
||
.dstOffsets = {
|
||
{
|
||
DIV_ROUND_UP(region->imageOffset.x, block_width),
|
||
DIV_ROUND_UP(region->imageOffset.y, block_height),
|
||
region->imageOffset.z + i,
|
||
},
|
||
{
|
||
DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
|
||
block_width),
|
||
DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
|
||
block_height),
|
||
region->imageOffset.z + i + 1,
|
||
},
|
||
},
|
||
};
|
||
|
||
handled = blit_shader(cmd_buffer,
|
||
image, dst_format,
|
||
v3dv_image_from_handle(buffer_image), src_format,
|
||
cmask, cswizzle,
|
||
&blit_region, VK_FILTER_NEAREST, true);
|
||
if (!handled) {
|
||
/* This is unexpected, we should have a supported blit spec */
|
||
unreachable("Unable to blit buffer to destination image");
|
||
return false;
|
||
}
|
||
}
|
||
}
|
||
|
||
return handled;
|
||
}
|
||
|
||
/**
|
||
* Returns true if the implementation supports the requested operation (even if
|
||
* it failed to process it, for example, due to an out-of-memory error).
|
||
*/
|
||
static bool
|
||
copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_image *image,
|
||
struct v3dv_buffer *buffer,
|
||
uint32_t region_count,
|
||
const VkBufferImageCopy *regions,
|
||
bool use_texel_buffer)
|
||
{
|
||
/* We can only call this with region_count > 1 if we can batch the regions
|
||
* together, in which case they share the same image subresource, and so
|
||
* the same aspect.
|
||
*/
|
||
VkImageAspectFlags aspect = regions[0].imageSubresource.aspectMask;
|
||
|
||
/* Generally, the bpp of the data in the buffer matches that of the
|
||
* destination image. The exception is the case where we are uploading
|
||
* stencil (8bpp) to a combined d24s8 image (32bpp).
|
||
*/
|
||
uint32_t buf_bpp = image->cpp;
|
||
|
||
/* We are about to upload the buffer data to an image so we can then
|
||
* blit that to our destination region. Because we are going to implement
|
||
* the copy as a blit, we want our blit source and destination formats to be
|
||
* the same (to avoid any format conversions), so we choose a canonical
|
||
* format that matches the destination image bpp.
|
||
*/
|
||
VkComponentMapping ident_swizzle = {
|
||
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||
};
|
||
|
||
VkComponentMapping cswizzle = ident_swizzle;
|
||
VkColorComponentFlags cmask = 0; /* Write all components */
|
||
VkFormat src_format;
|
||
VkFormat dst_format;
|
||
switch (buf_bpp) {
|
||
case 16:
|
||
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
|
||
src_format = VK_FORMAT_R32G32B32A32_UINT;
|
||
dst_format = src_format;
|
||
break;
|
||
case 8:
|
||
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
|
||
src_format = VK_FORMAT_R16G16B16A16_UINT;
|
||
dst_format = src_format;
|
||
break;
|
||
case 4:
|
||
switch (aspect) {
|
||
case VK_IMAGE_ASPECT_COLOR_BIT:
|
||
src_format = VK_FORMAT_R8G8B8A8_UINT;
|
||
dst_format = src_format;
|
||
break;
|
||
case VK_IMAGE_ASPECT_DEPTH_BIT:
|
||
assert(image->vk_format == VK_FORMAT_D32_SFLOAT ||
|
||
image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
|
||
image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32);
|
||
src_format = VK_FORMAT_R8G8B8A8_UINT;
|
||
dst_format = src_format;
|
||
aspect = VK_IMAGE_ASPECT_COLOR_BIT;
|
||
|
||
/* For D24 formats, the Vulkan spec states that the depth component
|
||
* in the buffer is stored in the 24-LSB, but V3D wants it in the
|
||
* 24-MSB.
|
||
*/
|
||
if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
|
||
image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) {
|
||
cmask = VK_COLOR_COMPONENT_G_BIT |
|
||
VK_COLOR_COMPONENT_B_BIT |
|
||
VK_COLOR_COMPONENT_A_BIT;
|
||
cswizzle.r = VK_COMPONENT_SWIZZLE_R;
|
||
cswizzle.g = VK_COMPONENT_SWIZZLE_R;
|
||
cswizzle.b = VK_COMPONENT_SWIZZLE_G;
|
||
cswizzle.a = VK_COMPONENT_SWIZZLE_B;
|
||
}
|
||
break;
|
||
case VK_IMAGE_ASPECT_STENCIL_BIT:
|
||
/* Since we don't support separate stencil this is always a stencil
|
||
* copy to a combined depth/stencil image. Because we don't support
|
||
* separate stencil images, we interpret the buffer data as a
|
||
* color R8UI image, and implement the blit as a compatible color
|
||
* blit to an RGBA8UI destination masking out writes to components
|
||
* GBA (which map to the D24 component of a S8D24 image).
|
||
*/
|
||
assert(image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT);
|
||
buf_bpp = 1;
|
||
src_format = VK_FORMAT_R8_UINT;
|
||
dst_format = VK_FORMAT_R8G8B8A8_UINT;
|
||
cmask = VK_COLOR_COMPONENT_R_BIT;
|
||
aspect = VK_IMAGE_ASPECT_COLOR_BIT;
|
||
break;
|
||
default:
|
||
unreachable("unsupported aspect");
|
||
return false;
|
||
};
|
||
break;
|
||
case 2:
|
||
aspect = VK_IMAGE_ASPECT_COLOR_BIT;
|
||
src_format = VK_FORMAT_R16_UINT;
|
||
dst_format = src_format;
|
||
break;
|
||
case 1:
|
||
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
|
||
src_format = VK_FORMAT_R8_UINT;
|
||
dst_format = src_format;
|
||
break;
|
||
default:
|
||
unreachable("unsupported bit-size");
|
||
return false;
|
||
}
|
||
|
||
if (use_texel_buffer) {
|
||
return texel_buffer_shader_copy(cmd_buffer, aspect, image,
|
||
dst_format, src_format,
|
||
buffer, buf_bpp,
|
||
cmask, &cswizzle,
|
||
region_count, regions);
|
||
} else {
|
||
return copy_buffer_to_image_blit(cmd_buffer, aspect, image,
|
||
dst_format, src_format,
|
||
buffer, buf_bpp,
|
||
cmask, &cswizzle,
|
||
region_count, regions);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Returns true if the implementation supports the requested operation (even if
|
||
* it failed to process it, for example, due to an out-of-memory error).
|
||
*/
|
||
static bool
|
||
copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_image *image,
|
||
struct v3dv_buffer *buffer,
|
||
const VkBufferImageCopy *region)
|
||
{
|
||
/* FIXME */
|
||
if (vk_format_is_depth_or_stencil(image->vk_format))
|
||
return false;
|
||
|
||
if (vk_format_is_compressed(image->vk_format))
|
||
return false;
|
||
|
||
if (image->tiling == VK_IMAGE_TILING_LINEAR)
|
||
return false;
|
||
|
||
uint32_t buffer_width, buffer_height;
|
||
if (region->bufferRowLength == 0)
|
||
buffer_width = region->imageExtent.width;
|
||
else
|
||
buffer_width = region->bufferRowLength;
|
||
|
||
if (region->bufferImageHeight == 0)
|
||
buffer_height = region->imageExtent.height;
|
||
else
|
||
buffer_height = region->bufferImageHeight;
|
||
|
||
uint32_t buffer_stride = buffer_width * image->cpp;
|
||
uint32_t buffer_layer_stride = buffer_stride * buffer_height;
|
||
|
||
uint32_t num_layers;
|
||
if (image->type != VK_IMAGE_TYPE_3D)
|
||
num_layers = region->imageSubresource.layerCount;
|
||
else
|
||
num_layers = region->imageExtent.depth;
|
||
assert(num_layers > 0);
|
||
|
||
struct v3dv_job *job =
|
||
v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
|
||
V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
|
||
cmd_buffer, -1);
|
||
if (!job)
|
||
return true;
|
||
|
||
job->cpu.copy_buffer_to_image.image = image;
|
||
job->cpu.copy_buffer_to_image.buffer = buffer;
|
||
job->cpu.copy_buffer_to_image.buffer_stride = buffer_stride;
|
||
job->cpu.copy_buffer_to_image.buffer_layer_stride = buffer_layer_stride;
|
||
job->cpu.copy_buffer_to_image.buffer_offset = region->bufferOffset;
|
||
job->cpu.copy_buffer_to_image.image_extent = region->imageExtent;
|
||
job->cpu.copy_buffer_to_image.image_offset = region->imageOffset;
|
||
job->cpu.copy_buffer_to_image.mip_level =
|
||
region->imageSubresource.mipLevel;
|
||
job->cpu.copy_buffer_to_image.base_layer =
|
||
region->imageSubresource.baseArrayLayer;
|
||
job->cpu.copy_buffer_to_image.layer_count = num_layers;
|
||
|
||
list_addtail(&job->list_link, &cmd_buffer->jobs);
|
||
|
||
return true;
|
||
}
|
||
|
||
void
|
||
v3dv_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
|
||
VkBuffer srcBuffer,
|
||
VkImage dstImage,
|
||
VkImageLayout dstImageLayout,
|
||
uint32_t regionCount,
|
||
const VkBufferImageCopy *pRegions)
|
||
{
|
||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
||
V3DV_FROM_HANDLE(v3dv_buffer, buffer, srcBuffer);
|
||
V3DV_FROM_HANDLE(v3dv_image, image, dstImage);
|
||
|
||
assert(image->samples == VK_SAMPLE_COUNT_1_BIT);
|
||
|
||
uint32_t r = 0;
|
||
while (r < regionCount) {
|
||
/* The TFU and TLB paths can only copy one region at a time and the region
|
||
* needs to start at the origin. We try these first for the common case
|
||
* where we are copying full images, since they should be the fastest.
|
||
*/
|
||
uint32_t batch_size = 1;
|
||
if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &pRegions[r]))
|
||
goto handled;
|
||
|
||
if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &pRegions[r]))
|
||
goto handled;
|
||
|
||
/* Otherwise, we are copying subrects, so we fallback to copying
|
||
* via shader and texel buffers and we try to batch the regions
|
||
* if possible. We can only batch copies if they target the same
|
||
* image subresource (so they have the same framebuffer spec).
|
||
*/
|
||
const VkImageSubresourceLayers *rsc = &pRegions[r].imageSubresource;
|
||
if (image->type != VK_IMAGE_TYPE_3D) {
|
||
for (uint32_t s = r + 1; s < regionCount; s++) {
|
||
const VkImageSubresourceLayers *rsc_s = &pRegions[s].imageSubresource;
|
||
if (memcmp(rsc, rsc_s, sizeof(VkImageSubresourceLayers)) != 0)
|
||
break;
|
||
batch_size++;
|
||
}
|
||
}
|
||
|
||
if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
|
||
batch_size, &pRegions[r], true)) {
|
||
goto handled;
|
||
}
|
||
|
||
/* If we still could not copy, fallback to slower paths.
|
||
*
|
||
* FIXME: we could try to batch these too, but since they are bound to be
|
||
* slow it might not be worth it and we should instead put more effort
|
||
* in handling more cases with the other paths.
|
||
*/
|
||
if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer, &pRegions[r])) {
|
||
batch_size = 1;
|
||
goto handled;
|
||
}
|
||
|
||
if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
|
||
batch_size, &pRegions[r], false)) {
|
||
goto handled;
|
||
}
|
||
|
||
unreachable("Unsupported buffer to image copy.");
|
||
|
||
handled:
|
||
r += batch_size;
|
||
}
|
||
}
|
||
|
||
static void
|
||
compute_blit_3d_layers(const VkOffset3D *offsets,
|
||
uint32_t *min_layer, uint32_t *max_layer,
|
||
bool *mirror_z);
|
||
|
||
/**
|
||
* Returns true if the implementation supports the requested operation (even if
|
||
* it failed to process it, for example, due to an out-of-memory error).
|
||
*
|
||
* The TFU blit path doesn't handle scaling so the blit filter parameter can
|
||
* be ignored.
|
||
*/
|
||
static bool
|
||
blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_image *dst,
|
||
struct v3dv_image *src,
|
||
const VkImageBlit *region)
|
||
{
|
||
assert(dst->samples == VK_SAMPLE_COUNT_1_BIT);
|
||
assert(src->samples == VK_SAMPLE_COUNT_1_BIT);
|
||
|
||
/* Format must match */
|
||
if (src->vk_format != dst->vk_format)
|
||
return false;
|
||
|
||
/* Destination can't be raster format */
|
||
if (dst->tiling == VK_IMAGE_TILING_LINEAR)
|
||
return false;
|
||
|
||
/* Source region must start at (0,0) */
|
||
if (region->srcOffsets[0].x != 0 || region->srcOffsets[0].y != 0)
|
||
return false;
|
||
|
||
/* Destination image must be complete */
|
||
if (region->dstOffsets[0].x != 0 || region->dstOffsets[0].y != 0)
|
||
return false;
|
||
|
||
const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
|
||
const uint32_t dst_width = u_minify(dst->extent.width, dst_mip_level);
|
||
const uint32_t dst_height = u_minify(dst->extent.height, dst_mip_level);
|
||
if (region->dstOffsets[1].x < dst_width - 1||
|
||
region->dstOffsets[1].y < dst_height - 1) {
|
||
return false;
|
||
}
|
||
|
||
/* No XY scaling */
|
||
if (region->srcOffsets[1].x != region->dstOffsets[1].x ||
|
||
region->srcOffsets[1].y != region->dstOffsets[1].y) {
|
||
return false;
|
||
}
|
||
|
||
/* If the format is D24S8 both aspects need to be copied, since the TFU
|
||
* can't be programmed to copy only one aspect of the image.
|
||
*/
|
||
if (dst->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
|
||
const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
|
||
VK_IMAGE_ASPECT_STENCIL_BIT;
|
||
if (region->dstSubresource.aspectMask != ds_aspects)
|
||
return false;
|
||
}
|
||
|
||
/* Our TFU blits only handle exact copies (it requires same formats
|
||
* on input and output, no scaling, etc), so there is no pixel format
|
||
* conversions and we can rewrite the format to use one that is TFU
|
||
* compatible based on its texel size.
|
||
*/
|
||
const struct v3dv_format *format =
|
||
v3dv_get_compatible_tfu_format(&cmd_buffer->device->devinfo,
|
||
dst->cpp, NULL);
|
||
|
||
/* Emit a TFU job for each layer to blit */
|
||
assert(region->dstSubresource.layerCount ==
|
||
region->srcSubresource.layerCount);
|
||
|
||
uint32_t min_dst_layer;
|
||
uint32_t max_dst_layer;
|
||
bool dst_mirror_z = false;
|
||
if (dst->type == VK_IMAGE_TYPE_3D) {
|
||
compute_blit_3d_layers(region->dstOffsets,
|
||
&min_dst_layer, &max_dst_layer,
|
||
&dst_mirror_z);
|
||
} else {
|
||
min_dst_layer = region->dstSubresource.baseArrayLayer;
|
||
max_dst_layer = min_dst_layer + region->dstSubresource.layerCount;
|
||
}
|
||
|
||
uint32_t min_src_layer;
|
||
uint32_t max_src_layer;
|
||
bool src_mirror_z = false;
|
||
if (src->type == VK_IMAGE_TYPE_3D) {
|
||
compute_blit_3d_layers(region->srcOffsets,
|
||
&min_src_layer, &max_src_layer,
|
||
&src_mirror_z);
|
||
} else {
|
||
min_src_layer = region->srcSubresource.baseArrayLayer;
|
||
max_src_layer = min_src_layer + region->srcSubresource.layerCount;
|
||
}
|
||
|
||
/* No Z scaling for 3D images (for non-3D images both src and dst must
|
||
* have the same layerCount).
|
||
*/
|
||
if (max_dst_layer - min_dst_layer != max_src_layer - min_src_layer)
|
||
return false;
|
||
|
||
const uint32_t layer_count = max_dst_layer - min_dst_layer;
|
||
const uint32_t src_mip_level = region->srcSubresource.mipLevel;
|
||
for (uint32_t i = 0; i < layer_count; i++) {
|
||
/* Since the TFU path doesn't handle scaling, Z mirroring for 3D images
|
||
* only involves reversing the order of the slices.
|
||
*/
|
||
const uint32_t dst_layer =
|
||
dst_mirror_z ? max_dst_layer - i - 1: min_dst_layer + i;
|
||
const uint32_t src_layer =
|
||
src_mirror_z ? max_src_layer - i - 1: min_src_layer + i;
|
||
emit_tfu_job(cmd_buffer,
|
||
dst, dst_mip_level, dst_layer,
|
||
src, src_mip_level, src_layer,
|
||
dst_width, dst_height, format);
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
static bool
|
||
format_needs_software_int_clamp(VkFormat format)
|
||
{
|
||
switch (format) {
|
||
case VK_FORMAT_A2R10G10B10_UINT_PACK32:
|
||
case VK_FORMAT_A2R10G10B10_SINT_PACK32:
|
||
case VK_FORMAT_A2B10G10R10_UINT_PACK32:
|
||
case VK_FORMAT_A2B10G10R10_SINT_PACK32:
|
||
return true;
|
||
default:
|
||
return false;
|
||
};
|
||
}
|
||
|
||
static void
|
||
get_blit_pipeline_cache_key(VkFormat dst_format,
|
||
VkFormat src_format,
|
||
VkColorComponentFlags cmask,
|
||
VkSampleCountFlagBits dst_samples,
|
||
VkSampleCountFlagBits src_samples,
|
||
uint8_t *key)
|
||
{
|
||
memset(key, 0, V3DV_META_BLIT_CACHE_KEY_SIZE);
|
||
|
||
uint32_t *p = (uint32_t *) key;
|
||
|
||
*p = dst_format;
|
||
p++;
|
||
|
||
/* Generally, when blitting from a larger format to a smaller format
|
||
* the hardware takes care of clamping the source to the RT range.
|
||
* Specifically, for integer formats, this is done by using
|
||
* V3D_RENDER_TARGET_CLAMP_INT in the render target setup, however, this
|
||
* clamps to the bit-size of the render type, and some formats, such as
|
||
* rgb10a2_uint have a 16-bit type, so it won't do what we need and we
|
||
* require to clamp in software. In these cases, we need to amend the blit
|
||
* shader with clamp code that depends on both the src and dst formats, so
|
||
* we need the src format to be part of the key.
|
||
*/
|
||
*p = format_needs_software_int_clamp(dst_format) ? src_format : 0;
|
||
p++;
|
||
|
||
*p = cmask;
|
||
p++;
|
||
|
||
*p = (dst_samples << 8) | src_samples;
|
||
p++;
|
||
|
||
assert(((uint8_t*)p - key) == V3DV_META_BLIT_CACHE_KEY_SIZE);
|
||
}
|
||
|
||
static bool
|
||
create_blit_render_pass(struct v3dv_device *device,
|
||
VkFormat dst_format,
|
||
VkFormat src_format,
|
||
VkRenderPass *pass_load,
|
||
VkRenderPass *pass_no_load)
|
||
{
|
||
const bool is_color_blit = vk_format_is_color(dst_format);
|
||
|
||
/* Attachment load operation is specified below */
|
||
VkAttachmentDescription att = {
|
||
.format = dst_format,
|
||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||
};
|
||
|
||
VkAttachmentReference att_ref = {
|
||
.attachment = 0,
|
||
.layout = VK_IMAGE_LAYOUT_GENERAL,
|
||
};
|
||
|
||
VkSubpassDescription subpass = {
|
||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||
.inputAttachmentCount = 0,
|
||
.colorAttachmentCount = is_color_blit ? 1 : 0,
|
||
.pColorAttachments = is_color_blit ? &att_ref : NULL,
|
||
.pResolveAttachments = NULL,
|
||
.pDepthStencilAttachment = is_color_blit ? NULL : &att_ref,
|
||
.preserveAttachmentCount = 0,
|
||
.pPreserveAttachments = NULL,
|
||
};
|
||
|
||
VkRenderPassCreateInfo info = {
|
||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
|
||
.attachmentCount = 1,
|
||
.pAttachments = &att,
|
||
.subpassCount = 1,
|
||
.pSubpasses = &subpass,
|
||
.dependencyCount = 0,
|
||
.pDependencies = NULL,
|
||
};
|
||
|
||
VkResult result;
|
||
att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||
result = v3dv_CreateRenderPass(v3dv_device_to_handle(device),
|
||
&info, &device->vk.alloc, pass_load);
|
||
if (result != VK_SUCCESS)
|
||
return false;
|
||
|
||
att.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
||
result = v3dv_CreateRenderPass(v3dv_device_to_handle(device),
|
||
&info, &device->vk.alloc, pass_no_load);
|
||
return result == VK_SUCCESS;
|
||
}
|
||
|
||
static nir_ssa_def *
|
||
gen_rect_vertices(nir_builder *b)
|
||
{
|
||
nir_ssa_def *vertex_id = nir_load_vertex_id(b);
|
||
|
||
/* vertex 0: -1.0, -1.0
|
||
* vertex 1: -1.0, 1.0
|
||
* vertex 2: 1.0, -1.0
|
||
* vertex 3: 1.0, 1.0
|
||
*
|
||
* so:
|
||
*
|
||
* channel 0 is vertex_id < 2 ? -1.0 : 1.0
|
||
* channel 1 is vertex id & 1 ? 1.0 : -1.0
|
||
*/
|
||
|
||
nir_ssa_def *one = nir_imm_int(b, 1);
|
||
nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));
|
||
nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
|
||
|
||
nir_ssa_def *comp[4];
|
||
comp[0] = nir_bcsel(b, c0cmp,
|
||
nir_imm_float(b, -1.0f),
|
||
nir_imm_float(b, 1.0f));
|
||
|
||
comp[1] = nir_bcsel(b, c1cmp,
|
||
nir_imm_float(b, 1.0f),
|
||
nir_imm_float(b, -1.0f));
|
||
comp[2] = nir_imm_float(b, 0.0f);
|
||
comp[3] = nir_imm_float(b, 1.0f);
|
||
return nir_vec(b, comp, 4);
|
||
}
|
||
|
||
static nir_ssa_def *
|
||
gen_tex_coords(nir_builder *b)
|
||
{
|
||
nir_ssa_def *tex_box =
|
||
nir_load_push_constant(b, 4, 32, nir_imm_int(b, 0), .base = 0, .range = 16);
|
||
|
||
nir_ssa_def *tex_z =
|
||
nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
|
||
|
||
nir_ssa_def *vertex_id = nir_load_vertex_id(b);
|
||
|
||
/* vertex 0: src0_x, src0_y
|
||
* vertex 1: src0_x, src1_y
|
||
* vertex 2: src1_x, src0_y
|
||
* vertex 3: src1_x, src1_y
|
||
*
|
||
* So:
|
||
*
|
||
* channel 0 is vertex_id < 2 ? src0_x : src1_x
|
||
* channel 1 is vertex id & 1 ? src1_y : src0_y
|
||
*/
|
||
|
||
nir_ssa_def *one = nir_imm_int(b, 1);
|
||
nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));
|
||
nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
|
||
|
||
nir_ssa_def *comp[4];
|
||
comp[0] = nir_bcsel(b, c0cmp,
|
||
nir_channel(b, tex_box, 0),
|
||
nir_channel(b, tex_box, 2));
|
||
|
||
comp[1] = nir_bcsel(b, c1cmp,
|
||
nir_channel(b, tex_box, 3),
|
||
nir_channel(b, tex_box, 1));
|
||
comp[2] = tex_z;
|
||
comp[3] = nir_imm_float(b, 1.0f);
|
||
return nir_vec(b, comp, 4);
|
||
}
|
||
|
||
static nir_ssa_def *
|
||
build_nir_tex_op_read(struct nir_builder *b,
|
||
nir_ssa_def *tex_pos,
|
||
enum glsl_base_type tex_type,
|
||
enum glsl_sampler_dim dim)
|
||
{
|
||
assert(dim != GLSL_SAMPLER_DIM_MS);
|
||
|
||
const struct glsl_type *sampler_type =
|
||
glsl_sampler_type(dim, false, false, tex_type);
|
||
nir_variable *sampler =
|
||
nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
|
||
sampler->data.descriptor_set = 0;
|
||
sampler->data.binding = 0;
|
||
|
||
nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
|
||
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
|
||
tex->sampler_dim = dim;
|
||
tex->op = nir_texop_tex;
|
||
tex->src[0].src_type = nir_tex_src_coord;
|
||
tex->src[0].src = nir_src_for_ssa(tex_pos);
|
||
tex->src[1].src_type = nir_tex_src_texture_deref;
|
||
tex->src[1].src = nir_src_for_ssa(tex_deref);
|
||
tex->src[2].src_type = nir_tex_src_sampler_deref;
|
||
tex->src[2].src = nir_src_for_ssa(tex_deref);
|
||
tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
|
||
tex->is_array = glsl_sampler_type_is_array(sampler_type);
|
||
tex->coord_components = tex_pos->num_components;
|
||
|
||
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
|
||
nir_builder_instr_insert(b, &tex->instr);
|
||
return &tex->dest.ssa;
|
||
}
|
||
|
||
static nir_ssa_def *
|
||
build_nir_tex_op_ms_fetch_sample(struct nir_builder *b,
|
||
nir_variable *sampler,
|
||
nir_ssa_def *tex_deref,
|
||
enum glsl_base_type tex_type,
|
||
nir_ssa_def *tex_pos,
|
||
nir_ssa_def *sample_idx)
|
||
{
|
||
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 4);
|
||
tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
|
||
tex->op = nir_texop_txf_ms;
|
||
tex->src[0].src_type = nir_tex_src_coord;
|
||
tex->src[0].src = nir_src_for_ssa(tex_pos);
|
||
tex->src[1].src_type = nir_tex_src_texture_deref;
|
||
tex->src[1].src = nir_src_for_ssa(tex_deref);
|
||
tex->src[2].src_type = nir_tex_src_sampler_deref;
|
||
tex->src[2].src = nir_src_for_ssa(tex_deref);
|
||
tex->src[3].src_type = nir_tex_src_ms_index;
|
||
tex->src[3].src = nir_src_for_ssa(sample_idx);
|
||
tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
|
||
tex->is_array = false;
|
||
tex->coord_components = tex_pos->num_components;
|
||
|
||
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
|
||
nir_builder_instr_insert(b, &tex->instr);
|
||
return &tex->dest.ssa;
|
||
}
|
||
|
||
/* Fetches all samples at the given position and averages them */
|
||
static nir_ssa_def *
|
||
build_nir_tex_op_ms_resolve(struct nir_builder *b,
|
||
nir_ssa_def *tex_pos,
|
||
enum glsl_base_type tex_type,
|
||
VkSampleCountFlagBits src_samples)
|
||
{
|
||
assert(src_samples > VK_SAMPLE_COUNT_1_BIT);
|
||
const struct glsl_type *sampler_type =
|
||
glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
|
||
nir_variable *sampler =
|
||
nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
|
||
sampler->data.descriptor_set = 0;
|
||
sampler->data.binding = 0;
|
||
|
||
const bool is_int = glsl_base_type_is_integer(tex_type);
|
||
|
||
nir_ssa_def *tmp;
|
||
nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
|
||
for (uint32_t i = 0; i < src_samples; i++) {
|
||
nir_ssa_def *s =
|
||
build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
|
||
tex_type, tex_pos,
|
||
nir_imm_int(b, i));
|
||
|
||
/* For integer formats, the multisample resolve operation is expected to
|
||
* return one of the samples, we just return the first one.
|
||
*/
|
||
if (is_int)
|
||
return s;
|
||
|
||
tmp = i == 0 ? s : nir_fadd(b, tmp, s);
|
||
}
|
||
|
||
assert(!is_int);
|
||
return nir_fmul(b, tmp, nir_imm_float(b, 1.0f / src_samples));
|
||
}
|
||
|
||
/* Fetches the current sample (gl_SampleID) at the given position */
|
||
static nir_ssa_def *
|
||
build_nir_tex_op_ms_read(struct nir_builder *b,
|
||
nir_ssa_def *tex_pos,
|
||
enum glsl_base_type tex_type)
|
||
{
|
||
const struct glsl_type *sampler_type =
|
||
glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
|
||
nir_variable *sampler =
|
||
nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
|
||
sampler->data.descriptor_set = 0;
|
||
sampler->data.binding = 0;
|
||
|
||
nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
|
||
|
||
return build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
|
||
tex_type, tex_pos,
|
||
nir_load_sample_id(b));
|
||
}
|
||
|
||
static nir_ssa_def *
|
||
build_nir_tex_op(struct nir_builder *b,
|
||
struct v3dv_device *device,
|
||
nir_ssa_def *tex_pos,
|
||
enum glsl_base_type tex_type,
|
||
VkSampleCountFlagBits dst_samples,
|
||
VkSampleCountFlagBits src_samples,
|
||
enum glsl_sampler_dim dim)
|
||
{
|
||
switch (dim) {
|
||
case GLSL_SAMPLER_DIM_MS:
|
||
assert(src_samples == VK_SAMPLE_COUNT_4_BIT);
|
||
/* For multisampled texture sources we need to use fetching instead of
|
||
* normalized texture coordinates. We already configured our blit
|
||
* coordinates to be in texel units, but here we still need to convert
|
||
* them from floating point to integer.
|
||
*/
|
||
tex_pos = nir_f2i32(b, tex_pos);
|
||
|
||
if (dst_samples == VK_SAMPLE_COUNT_1_BIT)
|
||
return build_nir_tex_op_ms_resolve(b, tex_pos, tex_type, src_samples);
|
||
else
|
||
return build_nir_tex_op_ms_read(b, tex_pos, tex_type);
|
||
default:
|
||
assert(src_samples == VK_SAMPLE_COUNT_1_BIT);
|
||
return build_nir_tex_op_read(b, tex_pos, tex_type, dim);
|
||
}
|
||
}
|
||
|
||
static nir_shader *
|
||
get_blit_vs()
|
||
{
|
||
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
|
||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
|
||
"meta blit vs");
|
||
|
||
const struct glsl_type *vec4 = glsl_vec4_type();
|
||
|
||
nir_variable *vs_out_pos =
|
||
nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
|
||
vs_out_pos->data.location = VARYING_SLOT_POS;
|
||
|
||
nir_variable *vs_out_tex_coord =
|
||
nir_variable_create(b.shader, nir_var_shader_out, vec4, "out_tex_coord");
|
||
vs_out_tex_coord->data.location = VARYING_SLOT_VAR0;
|
||
vs_out_tex_coord->data.interpolation = INTERP_MODE_SMOOTH;
|
||
|
||
nir_ssa_def *pos = gen_rect_vertices(&b);
|
||
nir_store_var(&b, vs_out_pos, pos, 0xf);
|
||
|
||
nir_ssa_def *tex_coord = gen_tex_coords(&b);
|
||
nir_store_var(&b, vs_out_tex_coord, tex_coord, 0xf);
|
||
|
||
return b.shader;
|
||
}
|
||
|
||
static uint32_t
|
||
get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)
|
||
{
|
||
switch (sampler_dim) {
|
||
case GLSL_SAMPLER_DIM_1D: return 0x1;
|
||
case GLSL_SAMPLER_DIM_2D: return 0x3;
|
||
case GLSL_SAMPLER_DIM_MS: return 0x3;
|
||
case GLSL_SAMPLER_DIM_3D: return 0x7;
|
||
default:
|
||
unreachable("invalid sampler dim");
|
||
};
|
||
}
|
||
|
||
static nir_shader *
|
||
get_color_blit_fs(struct v3dv_device *device,
|
||
VkFormat dst_format,
|
||
VkFormat src_format,
|
||
VkSampleCountFlagBits dst_samples,
|
||
VkSampleCountFlagBits src_samples,
|
||
enum glsl_sampler_dim sampler_dim)
|
||
{
|
||
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
|
||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
|
||
"meta blit fs");
|
||
|
||
const struct glsl_type *vec4 = glsl_vec4_type();
|
||
|
||
nir_variable *fs_in_tex_coord =
|
||
nir_variable_create(b.shader, nir_var_shader_in, vec4, "in_tex_coord");
|
||
fs_in_tex_coord->data.location = VARYING_SLOT_VAR0;
|
||
|
||
const struct glsl_type *fs_out_type =
|
||
vk_format_is_sint(dst_format) ? glsl_ivec4_type() :
|
||
vk_format_is_uint(dst_format) ? glsl_uvec4_type() :
|
||
glsl_vec4_type();
|
||
|
||
enum glsl_base_type src_base_type =
|
||
vk_format_is_sint(src_format) ? GLSL_TYPE_INT :
|
||
vk_format_is_uint(src_format) ? GLSL_TYPE_UINT :
|
||
GLSL_TYPE_FLOAT;
|
||
|
||
nir_variable *fs_out_color =
|
||
nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
|
||
fs_out_color->data.location = FRAG_RESULT_DATA0;
|
||
|
||
nir_ssa_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);
|
||
const uint32_t channel_mask = get_channel_mask_for_sampler_dim(sampler_dim);
|
||
tex_coord = nir_channels(&b, tex_coord, channel_mask);
|
||
|
||
nir_ssa_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,
|
||
dst_samples, src_samples, sampler_dim);
|
||
|
||
/* For integer textures, if the bit-size of the destination is too small to
|
||
* hold source value, Vulkan (CTS) expects the implementation to clamp to the
|
||
* maximum value the destination can hold. The hardware can clamp to the
|
||
* render target type, which usually matches the component bit-size, but
|
||
* there are some cases that won't match, such as rgb10a2, which has a 16-bit
|
||
* render target type, so in these cases we need to clamp manually.
|
||
*/
|
||
if (format_needs_software_int_clamp(dst_format)) {
|
||
assert(vk_format_is_int(dst_format));
|
||
enum pipe_format src_pformat = vk_format_to_pipe_format(src_format);
|
||
enum pipe_format dst_pformat = vk_format_to_pipe_format(dst_format);
|
||
|
||
nir_ssa_def *c[4];
|
||
for (uint32_t i = 0; i < 4; i++) {
|
||
c[i] = nir_channel(&b, color, i);
|
||
|
||
const uint32_t src_bit_size =
|
||
util_format_get_component_bits(src_pformat,
|
||
UTIL_FORMAT_COLORSPACE_RGB,
|
||
i);
|
||
const uint32_t dst_bit_size =
|
||
util_format_get_component_bits(dst_pformat,
|
||
UTIL_FORMAT_COLORSPACE_RGB,
|
||
i);
|
||
|
||
if (dst_bit_size >= src_bit_size)
|
||
continue;
|
||
|
||
if (util_format_is_pure_uint(dst_pformat)) {
|
||
nir_ssa_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
|
||
c[i] = nir_umin(&b, c[i], max);
|
||
} else {
|
||
nir_ssa_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);
|
||
nir_ssa_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));
|
||
c[i] = nir_imax(&b, nir_imin(&b, c[i], max), min);
|
||
}
|
||
}
|
||
|
||
color = nir_vec4(&b, c[0], c[1], c[2], c[3]);
|
||
}
|
||
|
||
nir_store_var(&b, fs_out_color, color, 0xf);
|
||
|
||
return b.shader;
|
||
}
|
||
|
||
static bool
|
||
create_pipeline(struct v3dv_device *device,
|
||
struct v3dv_render_pass *pass,
|
||
struct nir_shader *vs_nir,
|
||
struct nir_shader *fs_nir,
|
||
const VkPipelineVertexInputStateCreateInfo *vi_state,
|
||
const VkPipelineDepthStencilStateCreateInfo *ds_state,
|
||
const VkPipelineColorBlendStateCreateInfo *cb_state,
|
||
const VkPipelineMultisampleStateCreateInfo *ms_state,
|
||
const VkPipelineLayout layout,
|
||
VkPipeline *pipeline)
|
||
{
|
||
struct v3dv_shader_module vs_m;
|
||
struct v3dv_shader_module fs_m;
|
||
|
||
v3dv_shader_module_internal_init(&vs_m, vs_nir);
|
||
v3dv_shader_module_internal_init(&fs_m, fs_nir);
|
||
|
||
VkPipelineShaderStageCreateInfo stages[2] = {
|
||
{
|
||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||
.stage = VK_SHADER_STAGE_VERTEX_BIT,
|
||
.module = v3dv_shader_module_to_handle(&vs_m),
|
||
.pName = "main",
|
||
},
|
||
{
|
||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||
.module = v3dv_shader_module_to_handle(&fs_m),
|
||
.pName = "main",
|
||
},
|
||
};
|
||
|
||
VkGraphicsPipelineCreateInfo info = {
|
||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||
|
||
.stageCount = 2,
|
||
.pStages = stages,
|
||
|
||
.pVertexInputState = vi_state,
|
||
|
||
.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
|
||
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
|
||
.primitiveRestartEnable = false,
|
||
},
|
||
|
||
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
|
||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||
.viewportCount = 1,
|
||
.scissorCount = 1,
|
||
},
|
||
|
||
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
|
||
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||
.rasterizerDiscardEnable = false,
|
||
.polygonMode = VK_POLYGON_MODE_FILL,
|
||
.cullMode = VK_CULL_MODE_NONE,
|
||
.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
|
||
.depthBiasEnable = false,
|
||
},
|
||
|
||
.pMultisampleState = ms_state,
|
||
|
||
.pDepthStencilState = ds_state,
|
||
|
||
.pColorBlendState = cb_state,
|
||
|
||
/* The meta clear pipeline declares all state as dynamic.
|
||
* As a consequence, vkCmdBindPipeline writes no dynamic state
|
||
* to the cmd buffer. Therefore, at the end of the meta clear,
|
||
* we need only restore dynamic state that was vkCmdSet.
|
||
*/
|
||
.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
|
||
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
||
.dynamicStateCount = 6,
|
||
.pDynamicStates = (VkDynamicState[]) {
|
||
VK_DYNAMIC_STATE_VIEWPORT,
|
||
VK_DYNAMIC_STATE_SCISSOR,
|
||
VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
|
||
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
|
||
VK_DYNAMIC_STATE_STENCIL_REFERENCE,
|
||
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
|
||
VK_DYNAMIC_STATE_DEPTH_BIAS,
|
||
VK_DYNAMIC_STATE_LINE_WIDTH,
|
||
},
|
||
},
|
||
|
||
.flags = 0,
|
||
.layout = layout,
|
||
.renderPass = v3dv_render_pass_to_handle(pass),
|
||
.subpass = 0,
|
||
};
|
||
|
||
VkResult result =
|
||
v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
|
||
VK_NULL_HANDLE,
|
||
1, &info,
|
||
&device->vk.alloc,
|
||
pipeline);
|
||
|
||
ralloc_free(vs_nir);
|
||
ralloc_free(fs_nir);
|
||
|
||
return result == VK_SUCCESS;
|
||
}
|
||
|
||
static enum glsl_sampler_dim
|
||
get_sampler_dim(VkImageType type, VkSampleCountFlagBits src_samples)
|
||
{
|
||
/* From the Vulkan 1.0 spec, VkImageCreateInfo Validu Usage:
|
||
*
|
||
* "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be
|
||
* VK_IMAGE_TYPE_2D, ..."
|
||
*/
|
||
assert(src_samples == VK_SAMPLE_COUNT_1_BIT || type == VK_IMAGE_TYPE_2D);
|
||
|
||
switch (type) {
|
||
case VK_IMAGE_TYPE_1D: return GLSL_SAMPLER_DIM_1D;
|
||
case VK_IMAGE_TYPE_2D:
|
||
return src_samples == VK_SAMPLE_COUNT_1_BIT ? GLSL_SAMPLER_DIM_2D :
|
||
GLSL_SAMPLER_DIM_MS;
|
||
case VK_IMAGE_TYPE_3D: return GLSL_SAMPLER_DIM_3D;
|
||
default:
|
||
unreachable("Invalid image type");
|
||
}
|
||
}
|
||
|
||
static bool
|
||
create_blit_pipeline(struct v3dv_device *device,
|
||
VkFormat dst_format,
|
||
VkFormat src_format,
|
||
VkColorComponentFlags cmask,
|
||
VkImageType src_type,
|
||
VkSampleCountFlagBits dst_samples,
|
||
VkSampleCountFlagBits src_samples,
|
||
VkRenderPass _pass,
|
||
VkPipelineLayout pipeline_layout,
|
||
VkPipeline *pipeline)
|
||
{
|
||
struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
|
||
|
||
/* We always rewrite depth/stencil blits to compatible color blits */
|
||
assert(vk_format_is_color(dst_format));
|
||
assert(vk_format_is_color(src_format));
|
||
|
||
const enum glsl_sampler_dim sampler_dim =
|
||
get_sampler_dim(src_type, src_samples);
|
||
|
||
nir_shader *vs_nir = get_blit_vs();
|
||
nir_shader *fs_nir =
|
||
get_color_blit_fs(device, dst_format, src_format,
|
||
dst_samples, src_samples, sampler_dim);
|
||
|
||
const VkPipelineVertexInputStateCreateInfo vi_state = {
|
||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||
.vertexBindingDescriptionCount = 0,
|
||
.vertexAttributeDescriptionCount = 0,
|
||
};
|
||
|
||
VkPipelineDepthStencilStateCreateInfo ds_state = {
|
||
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
||
};
|
||
|
||
VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
|
||
blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
|
||
.blendEnable = false,
|
||
.colorWriteMask = cmask,
|
||
};
|
||
|
||
const VkPipelineColorBlendStateCreateInfo cb_state = {
|
||
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
|
||
.logicOpEnable = false,
|
||
.attachmentCount = 1,
|
||
.pAttachments = blend_att_state
|
||
};
|
||
|
||
const VkPipelineMultisampleStateCreateInfo ms_state = {
|
||
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||
.rasterizationSamples = dst_samples,
|
||
.sampleShadingEnable = dst_samples > VK_SAMPLE_COUNT_1_BIT,
|
||
.pSampleMask = NULL,
|
||
.alphaToCoverageEnable = false,
|
||
.alphaToOneEnable = false,
|
||
};
|
||
|
||
return create_pipeline(device,
|
||
pass,
|
||
vs_nir, fs_nir,
|
||
&vi_state,
|
||
&ds_state,
|
||
&cb_state,
|
||
&ms_state,
|
||
pipeline_layout,
|
||
pipeline);
|
||
}
|
||
|
||
/**
|
||
* Return a pipeline suitable for blitting the requested aspect given the
|
||
* destination and source formats.
|
||
*/
|
||
static bool
|
||
get_blit_pipeline(struct v3dv_device *device,
|
||
VkFormat dst_format,
|
||
VkFormat src_format,
|
||
VkColorComponentFlags cmask,
|
||
VkImageType src_type,
|
||
VkSampleCountFlagBits dst_samples,
|
||
VkSampleCountFlagBits src_samples,
|
||
struct v3dv_meta_blit_pipeline **pipeline)
|
||
{
|
||
bool ok = true;
|
||
|
||
uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
|
||
get_blit_pipeline_cache_key(dst_format, src_format, cmask,
|
||
dst_samples, src_samples, key);
|
||
mtx_lock(&device->meta.mtx);
|
||
struct hash_entry *entry =
|
||
_mesa_hash_table_search(device->meta.blit.cache[src_type], &key);
|
||
if (entry) {
|
||
mtx_unlock(&device->meta.mtx);
|
||
*pipeline = entry->data;
|
||
return true;
|
||
}
|
||
|
||
*pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
|
||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||
|
||
if (*pipeline == NULL)
|
||
goto fail;
|
||
|
||
ok = create_blit_render_pass(device, dst_format, src_format,
|
||
&(*pipeline)->pass,
|
||
&(*pipeline)->pass_no_load);
|
||
if (!ok)
|
||
goto fail;
|
||
|
||
/* Create the pipeline using one of the render passes, they are both
|
||
* compatible, so we don't care which one we use here.
|
||
*/
|
||
ok = create_blit_pipeline(device,
|
||
dst_format,
|
||
src_format,
|
||
cmask,
|
||
src_type,
|
||
dst_samples,
|
||
src_samples,
|
||
(*pipeline)->pass,
|
||
device->meta.blit.p_layout,
|
||
&(*pipeline)->pipeline);
|
||
if (!ok)
|
||
goto fail;
|
||
|
||
memcpy((*pipeline)->key, key, sizeof((*pipeline)->key));
|
||
_mesa_hash_table_insert(device->meta.blit.cache[src_type],
|
||
&(*pipeline)->key, *pipeline);
|
||
|
||
mtx_unlock(&device->meta.mtx);
|
||
return true;
|
||
|
||
fail:
|
||
mtx_unlock(&device->meta.mtx);
|
||
|
||
VkDevice _device = v3dv_device_to_handle(device);
|
||
if (*pipeline) {
|
||
if ((*pipeline)->pass)
|
||
v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
|
||
if ((*pipeline)->pass_no_load)
|
||
v3dv_DestroyRenderPass(_device, (*pipeline)->pass_no_load, &device->vk.alloc);
|
||
if ((*pipeline)->pipeline)
|
||
v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
|
||
vk_free(&device->vk.alloc, *pipeline);
|
||
*pipeline = NULL;
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
static void
|
||
compute_blit_box(const VkOffset3D *offsets,
|
||
uint32_t image_w, uint32_t image_h,
|
||
uint32_t *x, uint32_t *y, uint32_t *w, uint32_t *h,
|
||
bool *mirror_x, bool *mirror_y)
|
||
{
|
||
if (offsets[1].x >= offsets[0].x) {
|
||
*mirror_x = false;
|
||
*x = MIN2(offsets[0].x, image_w - 1);
|
||
*w = MIN2(offsets[1].x - offsets[0].x, image_w - offsets[0].x);
|
||
} else {
|
||
*mirror_x = true;
|
||
*x = MIN2(offsets[1].x, image_w - 1);
|
||
*w = MIN2(offsets[0].x - offsets[1].x, image_w - offsets[1].x);
|
||
}
|
||
if (offsets[1].y >= offsets[0].y) {
|
||
*mirror_y = false;
|
||
*y = MIN2(offsets[0].y, image_h - 1);
|
||
*h = MIN2(offsets[1].y - offsets[0].y, image_h - offsets[0].y);
|
||
} else {
|
||
*mirror_y = true;
|
||
*y = MIN2(offsets[1].y, image_h - 1);
|
||
*h = MIN2(offsets[0].y - offsets[1].y, image_h - offsets[1].y);
|
||
}
|
||
}
|
||
|
||
static void
|
||
compute_blit_3d_layers(const VkOffset3D *offsets,
|
||
uint32_t *min_layer, uint32_t *max_layer,
|
||
bool *mirror_z)
|
||
{
|
||
if (offsets[1].z >= offsets[0].z) {
|
||
*mirror_z = false;
|
||
*min_layer = offsets[0].z;
|
||
*max_layer = offsets[1].z;
|
||
} else {
|
||
*mirror_z = true;
|
||
*min_layer = offsets[1].z;
|
||
*max_layer = offsets[0].z;
|
||
}
|
||
}
|
||
|
||
static VkResult
|
||
create_blit_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
|
||
{
|
||
/* If this is not the first pool we create for this command buffer
|
||
* size it based on the size of the currently exhausted pool.
|
||
*/
|
||
uint32_t descriptor_count = 64;
|
||
if (cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE) {
|
||
struct v3dv_descriptor_pool *exhausted_pool =
|
||
v3dv_descriptor_pool_from_handle(cmd_buffer->meta.blit.dspool);
|
||
descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
|
||
}
|
||
|
||
/* Create the descriptor pool */
|
||
cmd_buffer->meta.blit.dspool = VK_NULL_HANDLE;
|
||
VkDescriptorPoolSize pool_size = {
|
||
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||
.descriptorCount = descriptor_count,
|
||
};
|
||
VkDescriptorPoolCreateInfo info = {
|
||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||
.maxSets = descriptor_count,
|
||
.poolSizeCount = 1,
|
||
.pPoolSizes = &pool_size,
|
||
.flags = 0,
|
||
};
|
||
VkResult result =
|
||
v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
|
||
&info,
|
||
&cmd_buffer->device->vk.alloc,
|
||
&cmd_buffer->meta.blit.dspool);
|
||
|
||
if (result == VK_SUCCESS) {
|
||
assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
|
||
const VkDescriptorPool _pool = cmd_buffer->meta.blit.dspool;
|
||
|
||
v3dv_cmd_buffer_add_private_obj(
|
||
cmd_buffer, (uintptr_t) _pool,
|
||
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
|
||
|
||
struct v3dv_descriptor_pool *pool =
|
||
v3dv_descriptor_pool_from_handle(_pool);
|
||
pool->is_driver_internal = true;
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
static VkResult
|
||
allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
|
||
VkDescriptorSet *set)
|
||
{
|
||
/* Make sure we have a descriptor pool */
|
||
VkResult result;
|
||
if (cmd_buffer->meta.blit.dspool == VK_NULL_HANDLE) {
|
||
result = create_blit_descriptor_pool(cmd_buffer);
|
||
if (result != VK_SUCCESS)
|
||
return result;
|
||
}
|
||
assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
|
||
|
||
/* Allocate descriptor set */
|
||
struct v3dv_device *device = cmd_buffer->device;
|
||
VkDevice _device = v3dv_device_to_handle(device);
|
||
VkDescriptorSetAllocateInfo info = {
|
||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
||
.descriptorPool = cmd_buffer->meta.blit.dspool,
|
||
.descriptorSetCount = 1,
|
||
.pSetLayouts = &device->meta.blit.ds_layout,
|
||
};
|
||
result = v3dv_AllocateDescriptorSets(_device, &info, set);
|
||
|
||
/* If we ran out of pool space, grow the pool and try again */
|
||
if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
|
||
result = create_blit_descriptor_pool(cmd_buffer);
|
||
if (result == VK_SUCCESS) {
|
||
info.descriptorPool = cmd_buffer->meta.blit.dspool;
|
||
result = v3dv_AllocateDescriptorSets(_device, &info, set);
|
||
}
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
/**
|
||
* Returns true if the implementation supports the requested operation (even if
|
||
* it failed to process it, for example, due to an out-of-memory error).
|
||
*
|
||
* The caller can specify the channels on the destination to be written via the
|
||
* cmask parameter (which can be 0 to default to all channels), as well as a
|
||
* swizzle to apply to the source via the cswizzle parameter (which can be NULL
|
||
* to use the default identity swizzle).
|
||
*/
|
||
static bool
|
||
blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_image *dst,
|
||
VkFormat dst_format,
|
||
struct v3dv_image *src,
|
||
VkFormat src_format,
|
||
VkColorComponentFlags cmask,
|
||
VkComponentMapping *cswizzle,
|
||
const VkImageBlit *_region,
|
||
VkFilter filter,
|
||
bool dst_is_padded_image)
|
||
{
|
||
bool handled = true;
|
||
VkResult result;
|
||
uint32_t dirty_dynamic_state = 0;
|
||
|
||
/* We don't support rendering to linear depth/stencil, this should have
|
||
* been rewritten to a compatible color blit by the caller.
|
||
*/
|
||
assert(dst->tiling != VK_IMAGE_TILING_LINEAR ||
|
||
!vk_format_is_depth_or_stencil(dst_format));
|
||
|
||
/* Can't sample from linear images */
|
||
if (src->tiling == VK_IMAGE_TILING_LINEAR && src->type != VK_IMAGE_TYPE_1D)
|
||
return false;
|
||
|
||
VkImageBlit region = *_region;
|
||
/* Rewrite combined D/S blits to compatible color blits */
|
||
if (vk_format_is_depth_or_stencil(dst_format)) {
|
||
assert(src_format == dst_format);
|
||
assert(cmask == 0);
|
||
switch(dst_format) {
|
||
case VK_FORMAT_D16_UNORM:
|
||
dst_format = VK_FORMAT_R16_UINT;
|
||
break;
|
||
case VK_FORMAT_D32_SFLOAT:
|
||
dst_format = VK_FORMAT_R32_UINT;
|
||
break;
|
||
case VK_FORMAT_X8_D24_UNORM_PACK32:
|
||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||
if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
|
||
cmask |= VK_COLOR_COMPONENT_G_BIT |
|
||
VK_COLOR_COMPONENT_B_BIT |
|
||
VK_COLOR_COMPONENT_A_BIT;
|
||
}
|
||
if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
|
||
assert(dst_format == VK_FORMAT_D24_UNORM_S8_UINT);
|
||
cmask |= VK_COLOR_COMPONENT_R_BIT;
|
||
}
|
||
dst_format = VK_FORMAT_R8G8B8A8_UINT;
|
||
break;
|
||
default:
|
||
unreachable("Unsupported depth/stencil format");
|
||
};
|
||
src_format = dst_format;
|
||
region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||
region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||
}
|
||
|
||
const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
|
||
VK_COLOR_COMPONENT_G_BIT |
|
||
VK_COLOR_COMPONENT_B_BIT |
|
||
VK_COLOR_COMPONENT_A_BIT;
|
||
if (cmask == 0)
|
||
cmask = full_cmask;
|
||
|
||
VkComponentMapping ident_swizzle = {
|
||
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||
};
|
||
if (!cswizzle)
|
||
cswizzle = &ident_swizzle;
|
||
|
||
/* When we get here from a copy between compressed / uncompressed images
|
||
* we choose to specify the destination blit region based on the size
|
||
* semantics of the source image of the copy (see copy_image_blit), so we
|
||
* need to apply those same semantics here when we compute the size of the
|
||
* destination image level.
|
||
*/
|
||
const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk_format);
|
||
const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk_format);
|
||
const uint32_t src_block_w = vk_format_get_blockwidth(src->vk_format);
|
||
const uint32_t src_block_h = vk_format_get_blockheight(src->vk_format);
|
||
const uint32_t dst_level_w =
|
||
u_minify(DIV_ROUND_UP(dst->extent.width * src_block_w, dst_block_w),
|
||
region.dstSubresource.mipLevel);
|
||
const uint32_t dst_level_h =
|
||
u_minify(DIV_ROUND_UP(dst->extent.height * src_block_h, dst_block_h),
|
||
region.dstSubresource.mipLevel);
|
||
|
||
const uint32_t src_level_w =
|
||
u_minify(src->extent.width, region.srcSubresource.mipLevel);
|
||
const uint32_t src_level_h =
|
||
u_minify(src->extent.height, region.srcSubresource.mipLevel);
|
||
const uint32_t src_level_d =
|
||
u_minify(src->extent.depth, region.srcSubresource.mipLevel);
|
||
|
||
uint32_t dst_x, dst_y, dst_w, dst_h;
|
||
bool dst_mirror_x, dst_mirror_y;
|
||
compute_blit_box(region.dstOffsets,
|
||
dst_level_w, dst_level_h,
|
||
&dst_x, &dst_y, &dst_w, &dst_h,
|
||
&dst_mirror_x, &dst_mirror_y);
|
||
|
||
uint32_t src_x, src_y, src_w, src_h;
|
||
bool src_mirror_x, src_mirror_y;
|
||
compute_blit_box(region.srcOffsets,
|
||
src_level_w, src_level_h,
|
||
&src_x, &src_y, &src_w, &src_h,
|
||
&src_mirror_x, &src_mirror_y);
|
||
|
||
uint32_t min_dst_layer;
|
||
uint32_t max_dst_layer;
|
||
bool dst_mirror_z = false;
|
||
if (dst->type != VK_IMAGE_TYPE_3D) {
|
||
min_dst_layer = region.dstSubresource.baseArrayLayer;
|
||
max_dst_layer = min_dst_layer + region.dstSubresource.layerCount;
|
||
} else {
|
||
compute_blit_3d_layers(region.dstOffsets,
|
||
&min_dst_layer, &max_dst_layer,
|
||
&dst_mirror_z);
|
||
}
|
||
|
||
uint32_t min_src_layer;
|
||
uint32_t max_src_layer;
|
||
bool src_mirror_z = false;
|
||
if (src->type != VK_IMAGE_TYPE_3D) {
|
||
min_src_layer = region.srcSubresource.baseArrayLayer;
|
||
max_src_layer = min_src_layer + region.srcSubresource.layerCount;
|
||
} else {
|
||
compute_blit_3d_layers(region.srcOffsets,
|
||
&min_src_layer, &max_src_layer,
|
||
&src_mirror_z);
|
||
}
|
||
|
||
uint32_t layer_count = max_dst_layer - min_dst_layer;
|
||
|
||
/* Translate source blit coordinates to normalized texture coordinates for
|
||
* single sampled textures. For multisampled textures we require
|
||
* unnormalized coordinates, since we can only do texelFetch on them.
|
||
*/
|
||
float coords[4] = {
|
||
(float)src_x,
|
||
(float)src_y,
|
||
(float)(src_x + src_w),
|
||
(float)(src_y + src_h),
|
||
};
|
||
|
||
if (src->samples == VK_SAMPLE_COUNT_1_BIT) {
|
||
coords[0] /= (float)src_level_w;
|
||
coords[1] /= (float)src_level_h;
|
||
coords[2] /= (float)src_level_w;
|
||
coords[3] /= (float)src_level_h;
|
||
}
|
||
|
||
/* Handle mirroring */
|
||
const bool mirror_x = dst_mirror_x != src_mirror_x;
|
||
const bool mirror_y = dst_mirror_y != src_mirror_y;
|
||
const bool mirror_z = dst_mirror_z != src_mirror_z;
|
||
float tex_coords[5] = {
|
||
!mirror_x ? coords[0] : coords[2],
|
||
!mirror_y ? coords[1] : coords[3],
|
||
!mirror_x ? coords[2] : coords[0],
|
||
!mirror_y ? coords[3] : coords[1],
|
||
/* Z coordinate for 3D blit sources, to be filled for each
|
||
* destination layer
|
||
*/
|
||
0.0f
|
||
};
|
||
|
||
/* For blits from 3D images we also need to compute the slice coordinate to
|
||
* sample from, which will change for each layer in the destination.
|
||
* Compute the step we should increase for each iteration.
|
||
*/
|
||
const float src_z_step =
|
||
(float)(max_src_layer - min_src_layer) / (float)layer_count;
|
||
|
||
/* Get the blit pipeline */
|
||
struct v3dv_meta_blit_pipeline *pipeline = NULL;
|
||
bool ok = get_blit_pipeline(cmd_buffer->device,
|
||
dst_format, src_format, cmask, src->type,
|
||
dst->samples, src->samples,
|
||
&pipeline);
|
||
if (!ok)
|
||
return handled;
|
||
assert(pipeline && pipeline->pipeline &&
|
||
pipeline->pass && pipeline->pass_no_load);
|
||
|
||
struct v3dv_device *device = cmd_buffer->device;
|
||
assert(device->meta.blit.ds_layout);
|
||
|
||
VkDevice _device = v3dv_device_to_handle(device);
|
||
VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
|
||
|
||
/* Create sampler for blit source image */
|
||
VkSamplerCreateInfo sampler_info = {
|
||
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
|
||
.magFilter = filter,
|
||
.minFilter = filter,
|
||
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
|
||
};
|
||
VkSampler sampler;
|
||
result = v3dv_CreateSampler(_device, &sampler_info, &device->vk.alloc,
|
||
&sampler);
|
||
if (result != VK_SUCCESS)
|
||
goto fail;
|
||
|
||
v3dv_cmd_buffer_add_private_obj(
|
||
cmd_buffer, (uintptr_t)sampler,
|
||
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroySampler);
|
||
|
||
/* Push command buffer state before starting meta operation */
|
||
v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
|
||
|
||
/* Push state that is common for all layers */
|
||
v3dv_CmdBindPipeline(_cmd_buffer,
|
||
VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||
pipeline->pipeline);
|
||
|
||
const VkViewport viewport = {
|
||
.x = dst_x,
|
||
.y = dst_y,
|
||
.width = dst_w,
|
||
.height = dst_h,
|
||
.minDepth = 0.0f,
|
||
.maxDepth = 1.0f
|
||
};
|
||
v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
|
||
|
||
const VkRect2D scissor = {
|
||
.offset = { dst_x, dst_y },
|
||
.extent = { dst_w, dst_h }
|
||
};
|
||
v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
|
||
|
||
bool can_skip_tlb_load = false;
|
||
const VkRect2D render_area = {
|
||
.offset = { dst_x, dst_y },
|
||
.extent = { dst_w, dst_h },
|
||
};
|
||
|
||
/* Record per-layer commands */
|
||
VkImageAspectFlags aspects = region.dstSubresource.aspectMask;
|
||
for (uint32_t i = 0; i < layer_count; i++) {
|
||
/* Setup framebuffer */
|
||
VkImageViewCreateInfo dst_image_view_info = {
|
||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||
.image = v3dv_image_to_handle(dst),
|
||
.viewType = v3dv_image_type_to_view_type(dst->type),
|
||
.format = dst_format,
|
||
.subresourceRange = {
|
||
.aspectMask = aspects,
|
||
.baseMipLevel = region.dstSubresource.mipLevel,
|
||
.levelCount = 1,
|
||
.baseArrayLayer = min_dst_layer + i,
|
||
.layerCount = 1
|
||
},
|
||
};
|
||
VkImageView dst_image_view;
|
||
result = v3dv_CreateImageView(_device, &dst_image_view_info,
|
||
&device->vk.alloc, &dst_image_view);
|
||
if (result != VK_SUCCESS)
|
||
goto fail;
|
||
|
||
v3dv_cmd_buffer_add_private_obj(
|
||
cmd_buffer, (uintptr_t)dst_image_view,
|
||
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
|
||
|
||
VkFramebufferCreateInfo fb_info = {
|
||
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
|
||
.renderPass = pipeline->pass,
|
||
.attachmentCount = 1,
|
||
.pAttachments = &dst_image_view,
|
||
.width = dst_x + dst_w,
|
||
.height = dst_y + dst_h,
|
||
.layers = 1,
|
||
};
|
||
|
||
VkFramebuffer fb;
|
||
result = v3dv_CreateFramebuffer(_device, &fb_info,
|
||
&cmd_buffer->device->vk.alloc, &fb);
|
||
if (result != VK_SUCCESS)
|
||
goto fail;
|
||
|
||
struct v3dv_framebuffer *framebuffer = v3dv_framebuffer_from_handle(fb);
|
||
framebuffer->has_edge_padding = fb_info.width == dst_level_w &&
|
||
fb_info.height == dst_level_h &&
|
||
dst_is_padded_image;
|
||
|
||
v3dv_cmd_buffer_add_private_obj(
|
||
cmd_buffer, (uintptr_t)fb,
|
||
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
|
||
|
||
/* Setup descriptor set for blit source texture. We don't have to
|
||
* register the descriptor as a private command buffer object since
|
||
* all descriptors will be freed automatically with the descriptor
|
||
* pool.
|
||
*/
|
||
VkDescriptorSet set;
|
||
result = allocate_blit_source_descriptor_set(cmd_buffer, &set);
|
||
if (result != VK_SUCCESS)
|
||
goto fail;
|
||
|
||
VkImageViewCreateInfo src_image_view_info = {
|
||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||
.image = v3dv_image_to_handle(src),
|
||
.viewType = v3dv_image_type_to_view_type(src->type),
|
||
.format = src_format,
|
||
.components = *cswizzle,
|
||
.subresourceRange = {
|
||
.aspectMask = aspects,
|
||
.baseMipLevel = region.srcSubresource.mipLevel,
|
||
.levelCount = 1,
|
||
.baseArrayLayer =
|
||
src->type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i,
|
||
.layerCount = 1
|
||
},
|
||
};
|
||
VkImageView src_image_view;
|
||
result = v3dv_CreateImageView(_device, &src_image_view_info,
|
||
&device->vk.alloc, &src_image_view);
|
||
if (result != VK_SUCCESS)
|
||
goto fail;
|
||
|
||
v3dv_cmd_buffer_add_private_obj(
|
||
cmd_buffer, (uintptr_t)src_image_view,
|
||
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
|
||
|
||
VkDescriptorImageInfo image_info = {
|
||
.sampler = sampler,
|
||
.imageView = src_image_view,
|
||
.imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||
};
|
||
VkWriteDescriptorSet write = {
|
||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||
.dstSet = set,
|
||
.dstBinding = 0,
|
||
.dstArrayElement = 0,
|
||
.descriptorCount = 1,
|
||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||
.pImageInfo = &image_info,
|
||
};
|
||
v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
|
||
|
||
v3dv_CmdBindDescriptorSets(_cmd_buffer,
|
||
VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||
device->meta.blit.p_layout,
|
||
0, 1, &set,
|
||
0, NULL);
|
||
|
||
/* If the region we are about to blit is tile-aligned, then we can
|
||
* use the render pass version that won't pre-load the tile buffer
|
||
* with the dst image contents before the blit. The exception is when we
|
||
* don't have a full color mask, since in that case we need to preserve
|
||
* the original value of some of the color components.
|
||
*
|
||
* Since all layers have the same area, we only need to compute this for
|
||
* the first.
|
||
*/
|
||
if (i == 0) {
|
||
struct v3dv_render_pass *pipeline_pass =
|
||
v3dv_render_pass_from_handle(pipeline->pass);
|
||
can_skip_tlb_load =
|
||
cmask == full_cmask &&
|
||
v3dv_subpass_area_is_tile_aligned(&render_area, framebuffer,
|
||
pipeline_pass, 0);
|
||
}
|
||
|
||
/* Record blit */
|
||
VkRenderPassBeginInfo rp_info = {
|
||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||
.renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
|
||
pipeline->pass,
|
||
.framebuffer = fb,
|
||
.renderArea = render_area,
|
||
.clearValueCount = 0,
|
||
};
|
||
|
||
v3dv_CmdBeginRenderPass(_cmd_buffer, &rp_info, VK_SUBPASS_CONTENTS_INLINE);
|
||
struct v3dv_job *job = cmd_buffer->state.job;
|
||
if (!job)
|
||
goto fail;
|
||
|
||
/* For 3D blits we need to compute the source slice to blit from (the Z
|
||
* coordinate of the source sample operation). We want to choose this
|
||
* based on the ratio of the depth of the source and the destination
|
||
* images, picking the coordinate in the middle of each step.
|
||
*/
|
||
if (src->type == VK_IMAGE_TYPE_3D) {
|
||
tex_coords[4] =
|
||
!mirror_z ?
|
||
(min_src_layer + (i + 0.5f) * src_z_step) / (float)src_level_d :
|
||
(max_src_layer - (i + 0.5f) * src_z_step) / (float)src_level_d;
|
||
}
|
||
|
||
v3dv_CmdPushConstants(_cmd_buffer,
|
||
device->meta.blit.p_layout,
|
||
VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
|
||
&tex_coords);
|
||
|
||
v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
|
||
|
||
v3dv_CmdEndRenderPass(_cmd_buffer);
|
||
dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
|
||
}
|
||
|
||
fail:
|
||
v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);
|
||
|
||
return handled;
|
||
}
|
||
|
||
void
|
||
v3dv_CmdBlitImage(VkCommandBuffer commandBuffer,
|
||
VkImage srcImage,
|
||
VkImageLayout srcImageLayout,
|
||
VkImage dstImage,
|
||
VkImageLayout dstImageLayout,
|
||
uint32_t regionCount,
|
||
const VkImageBlit* pRegions,
|
||
VkFilter filter)
|
||
{
|
||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
||
V3DV_FROM_HANDLE(v3dv_image, src, srcImage);
|
||
V3DV_FROM_HANDLE(v3dv_image, dst, dstImage);
|
||
|
||
/* This command can only happen outside a render pass */
|
||
assert(cmd_buffer->state.pass == NULL);
|
||
assert(cmd_buffer->state.job == NULL);
|
||
|
||
/* From the Vulkan 1.0 spec, vkCmdBlitImage valid usage */
|
||
assert(dst->samples == VK_SAMPLE_COUNT_1_BIT &&
|
||
src->samples == VK_SAMPLE_COUNT_1_BIT);
|
||
|
||
/* We don't export VK_FORMAT_FEATURE_BLIT_DST_BIT on compressed formats */
|
||
assert(!vk_format_is_compressed(dst->vk_format));
|
||
|
||
for (uint32_t i = 0; i < regionCount; i++) {
|
||
if (blit_tfu(cmd_buffer, dst, src, &pRegions[i]))
|
||
continue;
|
||
if (blit_shader(cmd_buffer,
|
||
dst, dst->vk_format,
|
||
src, src->vk_format,
|
||
0, NULL,
|
||
&pRegions[i], filter, true)) {
|
||
continue;
|
||
}
|
||
unreachable("Unsupported blit operation");
|
||
}
|
||
}
|
||
|
||
static void
|
||
emit_resolve_image_layer_per_tile_list(struct v3dv_job *job,
|
||
struct framebuffer_data *framebuffer,
|
||
struct v3dv_image *dst,
|
||
struct v3dv_image *src,
|
||
uint32_t layer_offset,
|
||
const VkImageResolve *region)
|
||
{
|
||
struct v3dv_cl *cl = &job->indirect;
|
||
v3dv_cl_ensure_space(cl, 200, 1);
|
||
v3dv_return_if_oom(NULL, job);
|
||
|
||
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
|
||
|
||
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
|
||
|
||
assert((src->type != VK_IMAGE_TYPE_3D &&
|
||
layer_offset < region->srcSubresource.layerCount) ||
|
||
layer_offset < src->extent.depth);
|
||
|
||
const uint32_t src_layer = src->type != VK_IMAGE_TYPE_3D ?
|
||
region->srcSubresource.baseArrayLayer + layer_offset :
|
||
region->srcOffset.z + layer_offset;
|
||
|
||
emit_image_load(cl, framebuffer, src,
|
||
region->srcSubresource.aspectMask,
|
||
src_layer,
|
||
region->srcSubresource.mipLevel,
|
||
false, false);
|
||
|
||
cl_emit(cl, END_OF_LOADS, end);
|
||
|
||
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
|
||
|
||
assert((dst->type != VK_IMAGE_TYPE_3D &&
|
||
layer_offset < region->dstSubresource.layerCount) ||
|
||
layer_offset < dst->extent.depth);
|
||
|
||
const uint32_t dst_layer = dst->type != VK_IMAGE_TYPE_3D ?
|
||
region->dstSubresource.baseArrayLayer + layer_offset :
|
||
region->dstOffset.z + layer_offset;
|
||
|
||
emit_image_store(cl, framebuffer, dst,
|
||
region->dstSubresource.aspectMask,
|
||
dst_layer,
|
||
region->dstSubresource.mipLevel,
|
||
false, false, true);
|
||
|
||
cl_emit(cl, END_OF_TILE_MARKER, end);
|
||
|
||
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
|
||
|
||
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
|
||
branch.start = tile_list_start;
|
||
branch.end = v3dv_cl_get_address(cl);
|
||
}
|
||
}
|
||
|
||
static void
|
||
emit_resolve_image_layer(struct v3dv_job *job,
|
||
struct v3dv_image *dst,
|
||
struct v3dv_image *src,
|
||
struct framebuffer_data *framebuffer,
|
||
uint32_t layer,
|
||
const VkImageResolve *region)
|
||
{
|
||
emit_frame_setup(job, layer, NULL);
|
||
emit_resolve_image_layer_per_tile_list(job, framebuffer,
|
||
dst, src, layer, region);
|
||
emit_supertile_coordinates(job, framebuffer);
|
||
}
|
||
|
||
static void
|
||
emit_resolve_image_rcl(struct v3dv_job *job,
|
||
struct v3dv_image *dst,
|
||
struct v3dv_image *src,
|
||
struct framebuffer_data *framebuffer,
|
||
const VkImageResolve *region)
|
||
{
|
||
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
|
||
v3dv_return_if_oom(NULL, job);
|
||
|
||
for (int layer = 0; layer < job->frame_tiling.layers; layer++)
|
||
emit_resolve_image_layer(job, dst, src, framebuffer, layer, region);
|
||
cl_emit(rcl, END_OF_RENDERING, end);
|
||
}
|
||
|
||
static bool
|
||
resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_image *dst,
|
||
struct v3dv_image *src,
|
||
const VkImageResolve *region)
|
||
{
|
||
if (!can_use_tlb(src, ®ion->srcOffset, NULL) ||
|
||
!can_use_tlb(dst, ®ion->dstOffset, NULL)) {
|
||
return false;
|
||
}
|
||
|
||
if (!v3dv_format_supports_tlb_resolve(src->format))
|
||
return false;
|
||
|
||
const VkFormat fb_format = src->vk_format;
|
||
|
||
uint32_t num_layers;
|
||
if (dst->type != VK_IMAGE_TYPE_3D)
|
||
num_layers = region->dstSubresource.layerCount;
|
||
else
|
||
num_layers = region->extent.depth;
|
||
assert(num_layers > 0);
|
||
|
||
struct v3dv_job *job =
|
||
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
|
||
if (!job)
|
||
return true;
|
||
|
||
const uint32_t block_w = vk_format_get_blockwidth(dst->vk_format);
|
||
const uint32_t block_h = vk_format_get_blockheight(dst->vk_format);
|
||
const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
|
||
const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
|
||
|
||
uint32_t internal_type, internal_bpp;
|
||
get_internal_type_bpp_for_image_aspects(fb_format,
|
||
region->srcSubresource.aspectMask,
|
||
&internal_type, &internal_bpp);
|
||
|
||
v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, true);
|
||
|
||
struct framebuffer_data framebuffer;
|
||
setup_framebuffer_data(&framebuffer, fb_format, internal_type,
|
||
&job->frame_tiling);
|
||
|
||
v3dv_job_emit_binning_flush(job);
|
||
emit_resolve_image_rcl(job, dst, src, &framebuffer, region);
|
||
|
||
v3dv_cmd_buffer_finish_job(cmd_buffer);
|
||
return true;
|
||
}
|
||
|
||
static bool
|
||
resolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
|
||
struct v3dv_image *dst,
|
||
struct v3dv_image *src,
|
||
const VkImageResolve *region)
|
||
{
|
||
const VkImageBlit blit_region = {
|
||
.srcSubresource = region->srcSubresource,
|
||
.srcOffsets = {
|
||
region->srcOffset,
|
||
{
|
||
region->srcOffset.x + region->extent.width,
|
||
region->srcOffset.y + region->extent.height,
|
||
}
|
||
},
|
||
.dstSubresource = region->dstSubresource,
|
||
.dstOffsets = {
|
||
region->dstOffset,
|
||
{
|
||
region->dstOffset.x + region->extent.width,
|
||
region->dstOffset.y + region->extent.height,
|
||
}
|
||
},
|
||
};
|
||
return blit_shader(cmd_buffer,
|
||
dst, dst->vk_format,
|
||
src, src->vk_format,
|
||
0, NULL,
|
||
&blit_region, VK_FILTER_NEAREST, true);
|
||
}
|
||
|
||
void
|
||
v3dv_CmdResolveImage(VkCommandBuffer commandBuffer,
|
||
VkImage srcImage,
|
||
VkImageLayout srcImageLayout,
|
||
VkImage dstImage,
|
||
VkImageLayout dstImageLayout,
|
||
uint32_t regionCount,
|
||
const VkImageResolve *pRegions)
|
||
{
|
||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
||
V3DV_FROM_HANDLE(v3dv_image, src, srcImage);
|
||
V3DV_FROM_HANDLE(v3dv_image, dst, dstImage);
|
||
|
||
/* This command can only happen outside a render pass */
|
||
assert(cmd_buffer->state.pass == NULL);
|
||
assert(cmd_buffer->state.job == NULL);
|
||
|
||
assert(src->samples == VK_SAMPLE_COUNT_4_BIT);
|
||
assert(dst->samples == VK_SAMPLE_COUNT_1_BIT);
|
||
|
||
for (uint32_t i = 0; i < regionCount; i++) {
|
||
if (resolve_image_tlb(cmd_buffer, dst, src, &pRegions[i]))
|
||
continue;
|
||
if (resolve_image_blit(cmd_buffer, dst, src, &pRegions[i]))
|
||
continue;
|
||
unreachable("Unsupported multismaple resolve operation");
|
||
}
|
||
}
|