mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-21 14:10:37 +02:00
turnip: implement UBWC
This enables UBWC for everything except 3D textures. It breaks many image_to_image copies but those aren't important and it can be worked around later (image_to_image copy needs to be done in two steps, decode from the source format and then encode to the destination format). Signed-off-by: Jonathan Marek <jonathan@marek.ca> Reviewed-by: Eric Anholt <eric@anholt.net>
This commit is contained in:
parent
91fd83d142
commit
773d640efa
9 changed files with 324 additions and 124 deletions
|
|
@ -31,6 +31,8 @@
|
|||
#include <vulkan/vk_android_native_buffer.h>
|
||||
#include <vulkan/vk_icd.h>
|
||||
|
||||
#include "drm-uapi/drm_fourcc.h"
|
||||
|
||||
static int
|
||||
tu_hal_open(const struct hw_module_t *mod,
|
||||
const char *id,
|
||||
|
|
@ -120,12 +122,8 @@ tu_image_from_gralloc(VkDevice device_h,
|
|||
struct tu_bo *bo = NULL;
|
||||
VkResult result;
|
||||
|
||||
result = tu_image_create(
|
||||
device_h,
|
||||
&(struct tu_image_create_info) {
|
||||
.vk_info = base_info, .scanout = true, .no_metadata_planes = true },
|
||||
alloc, &image_h);
|
||||
|
||||
result = tu_image_create(device_h, base_info, alloc, &image_h,
|
||||
DRM_FORMAT_MOD_LINEAR);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ blit_copy_format(VkFormat format)
|
|||
switch (vk_format_get_blocksizebits(format)) {
|
||||
case 8: return VK_FORMAT_R8_UINT;
|
||||
case 16: return VK_FORMAT_R16_UINT;
|
||||
case 32: return VK_FORMAT_R8G8B8A8_UINT;
|
||||
case 32: return VK_FORMAT_R32_UINT;
|
||||
case 64: return VK_FORMAT_R32G32_UINT;
|
||||
case 96: return VK_FORMAT_R32G32B32_UINT;
|
||||
case 128:return VK_FORMAT_R32G32B32A32_UINT;
|
||||
|
|
@ -74,7 +74,8 @@ blit_image_info(const struct tu_blit_surf *img, bool src, bool stencil_read)
|
|||
return A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb) |
|
||||
A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(img->tile_mode) |
|
||||
A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(swap) |
|
||||
COND(vk_format_is_srgb(img->fmt), A6XX_SP_PS_2D_SRC_INFO_SRGB);
|
||||
COND(vk_format_is_srgb(img->fmt), A6XX_SP_PS_2D_SRC_INFO_SRGB) |
|
||||
COND(img->ubwc_size, A6XX_SP_PS_2D_SRC_INFO_FLAGS);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -82,7 +83,7 @@ emit_blit_step(struct tu_cmd_buffer *cmdbuf, const struct tu_blit *blt)
|
|||
{
|
||||
struct tu_cs *cs = &cmdbuf->cs;
|
||||
|
||||
tu_cs_reserve_space(cmdbuf->device, cs, 52);
|
||||
tu_cs_reserve_space(cmdbuf->device, cs, 66);
|
||||
|
||||
enum a6xx_color_fmt fmt = tu6_get_native_format(blt->dst.fmt)->rb;
|
||||
if (fmt == RB6_Z24_UNORM_S8_UINT)
|
||||
|
|
@ -135,6 +136,16 @@ emit_blit_step(struct tu_cmd_buffer *cmdbuf, const struct tu_blit *blt)
|
|||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
|
||||
if (blt->src.ubwc_size) {
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO, 6);
|
||||
tu_cs_emit_qw(cs, blt->src.ubwc_va);
|
||||
tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_FLAGS_PITCH_PITCH(blt->src.ubwc_pitch) |
|
||||
A6XX_SP_PS_2D_SRC_FLAGS_PITCH_ARRAY_PITCH(blt->src.ubwc_size >> 2));
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -150,6 +161,16 @@ emit_blit_step(struct tu_cmd_buffer *cmdbuf, const struct tu_blit *blt)
|
|||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
|
||||
if (blt->dst.ubwc_size) {
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS_LO, 6);
|
||||
tu_cs_emit_qw(cs, blt->dst.ubwc_va);
|
||||
tu_cs_emit(cs, A6XX_RB_2D_DST_FLAGS_PITCH_PITCH(blt->dst.ubwc_pitch) |
|
||||
A6XX_RB_2D_DST_FLAGS_PITCH_ARRAY_PITCH(blt->dst.ubwc_size >> 2));
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
|
||||
tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_X_X(blt->src.x));
|
||||
tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_X_X(blt->src.x + blt->src.width - 1));
|
||||
|
|
@ -196,7 +217,7 @@ void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt)
|
|||
switch (blt->type) {
|
||||
case TU_BLIT_COPY:
|
||||
blt->stencil_read =
|
||||
blt->dst.fmt == VK_FORMAT_R8_UINT &&
|
||||
blt->dst.fmt == VK_FORMAT_R8_UNORM &&
|
||||
blt->src.fmt == VK_FORMAT_D24_UNORM_S8_UINT;
|
||||
|
||||
assert(vk_format_get_blocksize(blt->dst.fmt) ==
|
||||
|
|
@ -210,6 +231,7 @@ void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt)
|
|||
blt->src.pitch /= block_width;
|
||||
blt->src.x /= block_width;
|
||||
blt->src.y /= block_height;
|
||||
blt->src.fmt = blit_copy_format(blt->src.fmt);
|
||||
|
||||
/* for image_to_image copy, width/height is on the src format */
|
||||
blt->dst.width = blt->src.width = DIV_ROUND_UP(blt->src.width, block_width);
|
||||
|
|
@ -223,12 +245,16 @@ void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt)
|
|||
blt->dst.pitch /= block_width;
|
||||
blt->dst.x /= block_width;
|
||||
blt->dst.y /= block_height;
|
||||
blt->dst.fmt = blit_copy_format(blt->dst.fmt);
|
||||
}
|
||||
|
||||
blt->src.fmt = blit_copy_format(blt->src.fmt);
|
||||
blt->dst.fmt = blit_copy_format(blt->dst.fmt);
|
||||
if (blt->dst.fmt == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
|
||||
blt->dst.fmt = blit_copy_format(blt->dst.fmt);
|
||||
|
||||
/* TODO: does this work correctly with tiling/etc ? */
|
||||
if (blt->src.fmt == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
|
||||
blt->src.fmt = blit_copy_format(blt->src.fmt);
|
||||
|
||||
/* TODO: multisample image copy does not work correctly with tiling/UBWC */
|
||||
blt->src.x *= blt->src.samples;
|
||||
blt->dst.x *= blt->dst.samples;
|
||||
blt->src.width *= blt->src.samples;
|
||||
|
|
@ -304,6 +330,8 @@ void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt)
|
|||
}
|
||||
blt->dst.va += blt->dst.layer_size;
|
||||
blt->src.va += blt->src.layer_size;
|
||||
blt->dst.ubwc_va += blt->dst.ubwc_size;
|
||||
blt->src.ubwc_va += blt->src.ubwc_size;
|
||||
}
|
||||
|
||||
tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 17);
|
||||
|
|
|
|||
|
|
@ -41,27 +41,37 @@ struct tu_blit_surf {
|
|||
uint32_t x, y;
|
||||
uint32_t width, height;
|
||||
unsigned samples;
|
||||
uint64_t ubwc_va;
|
||||
uint32_t ubwc_pitch;
|
||||
uint32_t ubwc_size;
|
||||
};
|
||||
|
||||
static inline struct tu_blit_surf
|
||||
tu_blit_surf(struct tu_image *img,
|
||||
tu_blit_surf(struct tu_image *image,
|
||||
VkImageSubresourceLayers subres,
|
||||
const VkOffset3D *offsets)
|
||||
{
|
||||
unsigned layer = subres.baseArrayLayer;
|
||||
if (image->type == VK_IMAGE_TYPE_3D) {
|
||||
assert(layer == 0);
|
||||
layer = MIN2(offsets[0].z, offsets[1].z);
|
||||
}
|
||||
|
||||
return (struct tu_blit_surf) {
|
||||
.fmt = img->vk_format,
|
||||
.tile_mode = tu6_get_image_tile_mode(img, subres.mipLevel),
|
||||
.tiled = img->tile_mode != TILE6_LINEAR,
|
||||
.va = img->bo->iova + img->bo_offset + img->levels[subres.mipLevel].offset +
|
||||
subres.baseArrayLayer * img->layer_size +
|
||||
MIN2(offsets[0].z, offsets[1].z) * img->levels[subres.mipLevel].size,
|
||||
.pitch = img->levels[subres.mipLevel].pitch * vk_format_get_blocksize(img->vk_format) * img->samples,
|
||||
.layer_size = img->type == VK_IMAGE_TYPE_3D ? img->levels[subres.mipLevel].size : img->layer_size,
|
||||
.fmt = image->vk_format,
|
||||
.tile_mode = tu6_get_image_tile_mode(image, subres.mipLevel),
|
||||
.tiled = image->tile_mode != TILE6_LINEAR,
|
||||
.va = tu_image_base(image, subres.mipLevel, layer),
|
||||
.pitch = tu_image_stride(image, subres.mipLevel),
|
||||
.layer_size = tu_layer_size(image, subres.mipLevel),
|
||||
.x = MIN2(offsets[0].x, offsets[1].x),
|
||||
.y = MIN2(offsets[0].y, offsets[1].y),
|
||||
.width = abs(offsets[1].x - offsets[0].x),
|
||||
.height = abs(offsets[1].y - offsets[0].y),
|
||||
.samples = img->samples,
|
||||
.samples = image->samples,
|
||||
.ubwc_va = tu_image_ubwc_base(image, subres.mipLevel, layer),
|
||||
.ubwc_pitch = tu_image_ubwc_pitch(image, subres.mipLevel),
|
||||
.ubwc_size = tu_image_ubwc_size(image, subres.mipLevel),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -388,6 +388,22 @@ tu6_emit_wfi(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_flag_buffer(struct tu_cs *cs, const struct tu_image_view *iview)
|
||||
{
|
||||
uint64_t va = tu_image_ubwc_base(iview->image, iview->base_mip, iview->base_layer);
|
||||
uint32_t pitch = tu_image_ubwc_pitch(iview->image, iview->base_mip);
|
||||
uint32_t size = tu_image_ubwc_size(iview->image, iview->base_mip);
|
||||
if (iview->image->ubwc_size) {
|
||||
tu_cs_emit_qw(cs, va);
|
||||
tu_cs_emit(cs, A6XX_RB_DEPTH_FLAG_BUFFER_PITCH_PITCH(pitch) |
|
||||
A6XX_RB_DEPTH_FLAG_BUFFER_PITCH_ARRAY_PITCH(size >> 2));
|
||||
} else {
|
||||
tu_cs_emit_qw(cs, 0);
|
||||
tu_cs_emit(cs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
{
|
||||
|
|
@ -430,22 +446,21 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
}
|
||||
|
||||
const struct tu_image_view *iview = fb->attachments[a].attachment;
|
||||
const struct tu_image_level *slice = &iview->image->levels[iview->base_mip];
|
||||
enum a6xx_depth_format fmt = tu6_pipe2depth(iview->vk_format);
|
||||
|
||||
uint32_t offset = slice->offset + slice->size * iview->base_layer;
|
||||
uint32_t stride = slice->pitch * iview->image->cpp;
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
|
||||
tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
|
||||
tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(stride));
|
||||
tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(slice->size));
|
||||
tu_cs_emit_qw(cs, iview->image->bo->iova + iview->image->bo_offset + offset);
|
||||
tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(tu_image_stride(iview->image, iview->base_mip)));
|
||||
tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(iview->image->layer_size));
|
||||
tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer));
|
||||
tu_cs_emit(cs, tiling->gmem_offsets[gmem_index]);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
|
||||
tu_cs_emit(cs, A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
|
||||
tu6_emit_flag_buffer(cs, iview);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5);
|
||||
tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
|
||||
tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
|
||||
|
|
@ -475,12 +490,8 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
continue;
|
||||
|
||||
const struct tu_image_view *iview = fb->attachments[a].attachment;
|
||||
const struct tu_image_level *slice =
|
||||
&iview->image->levels[iview->base_mip];
|
||||
const enum a6xx_tile_mode tile_mode =
|
||||
tu6_get_image_tile_mode(iview->image, iview->base_mip);
|
||||
uint32_t stride = 0;
|
||||
uint32_t offset = 0;
|
||||
|
||||
mrt_comp[i] = 0xf;
|
||||
|
||||
|
|
@ -491,33 +502,21 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
tu6_get_native_format(iview->vk_format);
|
||||
assert(format && format->rb >= 0);
|
||||
|
||||
offset = slice->offset + slice->size * iview->base_layer;
|
||||
stride = slice->pitch * iview->image->cpp;
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(i), 6);
|
||||
tu_cs_emit(cs, A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format->rb) |
|
||||
A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
|
||||
A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(format->swap));
|
||||
tu_cs_emit(cs, A6XX_RB_MRT_PITCH(stride));
|
||||
tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(slice->size));
|
||||
tu_cs_emit_qw(cs, iview->image->bo->iova + iview->image->bo_offset +
|
||||
offset); /* BASE_LO/HI */
|
||||
tu_cs_emit(cs, A6XX_RB_MRT_PITCH(tu_image_stride(iview->image, iview->base_mip)));
|
||||
tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(iview->image->layer_size));
|
||||
tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer));
|
||||
tu_cs_emit(
|
||||
cs, tiling->gmem_offsets[gmem_index++]); /* RB_MRT[i].BASE_GMEM */
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_MRT_REG(i), 1);
|
||||
tu_cs_emit(cs, A6XX_SP_FS_MRT_REG_COLOR_FORMAT(format->rb));
|
||||
|
||||
#if 0
|
||||
/* when we support UBWC, these would be the system memory
|
||||
* addr/pitch/etc:
|
||||
*/
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 4);
|
||||
tu_cs_emit(cs, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
|
||||
tu_cs_emit(cs, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
|
||||
tu_cs_emit(cs, A6XX_RB_MRT_FLAG_BUFFER_PITCH(0));
|
||||
tu_cs_emit(cs, A6XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0));
|
||||
#endif
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 3);
|
||||
tu6_emit_flag_buffer(cs, iview);
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_SRGB_CNTL, 1);
|
||||
|
|
@ -633,11 +632,6 @@ tu6_emit_blit_info(struct tu_cmd_buffer *cmd,
|
|||
uint32_t gmem_offset,
|
||||
uint32_t blit_info)
|
||||
{
|
||||
const struct tu_image_level *slice =
|
||||
&iview->image->levels[iview->base_mip];
|
||||
const uint32_t offset = slice->offset + slice->size * iview->base_layer;
|
||||
const uint32_t stride = slice->pitch * iview->image->cpp;
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1);
|
||||
tu_cs_emit(cs, blit_info);
|
||||
|
||||
|
|
@ -651,11 +645,16 @@ tu6_emit_blit_info(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_TILE_MODE(tile_mode) |
|
||||
A6XX_RB_BLIT_DST_INFO_SAMPLES(tu_msaa_samples(iview->image->samples)) |
|
||||
A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb) |
|
||||
A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(format->swap));
|
||||
tu_cs_emit_qw(cs,
|
||||
iview->image->bo->iova + iview->image->bo_offset + offset);
|
||||
tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(stride));
|
||||
tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(slice->size));
|
||||
A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(format->swap) |
|
||||
COND(iview->image->ubwc_size, A6XX_RB_BLIT_DST_INFO_FLAGS));
|
||||
tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer));
|
||||
tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(tu_image_stride(iview->image, iview->base_mip)));
|
||||
tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(iview->image->layer_size));
|
||||
|
||||
if (iview->image->ubwc_size) {
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3);
|
||||
tu6_emit_flag_buffer(cs, iview);
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
|
||||
tu_cs_emit(cs, gmem_offset);
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@
|
|||
#include "util/u_half.h"
|
||||
#include "vk_format.h"
|
||||
#include "vk_util.h"
|
||||
#include "drm-uapi/drm_fourcc.h"
|
||||
|
||||
/**
|
||||
* Declare a format table. A format table is an array of tu_native_format.
|
||||
|
|
@ -784,6 +785,23 @@ tu_GetPhysicalDeviceFormatProperties2(
|
|||
|
||||
tu_physical_device_get_format_properties(
|
||||
physical_device, format, &pFormatProperties->formatProperties);
|
||||
|
||||
struct wsi_format_modifier_properties_list *list =
|
||||
vk_find_struct(pFormatProperties->pNext, WSI_FORMAT_MODIFIER_PROPERTIES_LIST_MESA);
|
||||
if (list) {
|
||||
VK_OUTARRAY_MAKE(out, list->modifier_properties, &list->modifier_count);
|
||||
|
||||
vk_outarray_append(&out, mod_props) {
|
||||
mod_props->modifier = DRM_FORMAT_MOD_LINEAR;
|
||||
mod_props->modifier_plane_count = 1;
|
||||
}
|
||||
|
||||
/* TODO: any cases where this should be disabled? */
|
||||
vk_outarray_append(&out, mod_props) {
|
||||
mod_props->modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
|
||||
mod_props->modifier_plane_count = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static VkResult
|
||||
|
|
|
|||
|
|
@ -31,18 +31,20 @@
|
|||
#include "util/u_atomic.h"
|
||||
#include "vk_format.h"
|
||||
#include "vk_util.h"
|
||||
#include "drm-uapi/drm_fourcc.h"
|
||||
|
||||
static inline bool
|
||||
image_level_linear(struct tu_image *image, int level)
|
||||
image_level_linear(struct tu_image *image, int level, bool ubwc)
|
||||
{
|
||||
unsigned w = u_minify(image->extent.width, level);
|
||||
return w < 16;
|
||||
/* all levels are tiled/compressed with UBWC */
|
||||
return ubwc ? false : (w < 16);
|
||||
}
|
||||
|
||||
enum a6xx_tile_mode
|
||||
tu6_get_image_tile_mode(struct tu_image *image, int level)
|
||||
{
|
||||
if (image_level_linear(image, level))
|
||||
if (image_level_linear(image, level, !!image->ubwc_size))
|
||||
return TILE6_LINEAR;
|
||||
else
|
||||
return image->tile_mode;
|
||||
|
|
@ -50,32 +52,44 @@ tu6_get_image_tile_mode(struct tu_image *image, int level)
|
|||
|
||||
/* indexed by cpp, including msaa 2x and 4x: */
|
||||
static const struct {
|
||||
unsigned pitchalign;
|
||||
unsigned heightalign;
|
||||
uint8_t pitchalign;
|
||||
uint8_t heightalign;
|
||||
uint8_t ubwc_blockwidth;
|
||||
uint8_t ubwc_blockheight;
|
||||
} tile_alignment[] = {
|
||||
[1] = { 128, 32 },
|
||||
[2] = { 128, 16 },
|
||||
/* TODO:
|
||||
* cpp=1 UBWC needs testing at larger texture sizes
|
||||
* missing UBWC blockwidth/blockheight for npot+64 cpp
|
||||
* missing 96/128 CPP for 8x MSAA with 32_32_32/32_32_32_32
|
||||
*/
|
||||
[1] = { 128, 32, 16, 4 },
|
||||
[2] = { 128, 16, 16, 4 },
|
||||
[3] = { 64, 32 },
|
||||
[4] = { 64, 16 },
|
||||
[4] = { 64, 16, 16, 4 },
|
||||
[6] = { 64, 16 },
|
||||
[8] = { 64, 16 },
|
||||
[8] = { 64, 16, 8, 4, },
|
||||
[12] = { 64, 16 },
|
||||
[16] = { 64, 16 },
|
||||
[16] = { 64, 16, 4, 4, },
|
||||
[24] = { 64, 16 },
|
||||
[32] = { 64, 16 },
|
||||
[32] = { 64, 16, 4, 2 },
|
||||
[48] = { 64, 16 },
|
||||
[64] = { 64, 16 },
|
||||
|
||||
/* special case for r8g8: */
|
||||
[0] = { 64, 32 },
|
||||
[0] = { 64, 32, 16, 4 },
|
||||
};
|
||||
|
||||
static void
|
||||
setup_slices(struct tu_image *image, const VkImageCreateInfo *pCreateInfo)
|
||||
setup_slices(struct tu_image *image,
|
||||
const VkImageCreateInfo *pCreateInfo,
|
||||
bool ubwc_enabled)
|
||||
{
|
||||
#define RGB_TILE_WIDTH_ALIGNMENT 64
|
||||
#define RGB_TILE_HEIGHT_ALIGNMENT 16
|
||||
#define UBWC_PLANE_SIZE_ALIGNMENT 4096
|
||||
VkFormat format = pCreateInfo->format;
|
||||
enum util_format_layout layout = vk_format_description(format)->layout;
|
||||
uint32_t layer_size = 0;
|
||||
uint32_t ubwc_size = 0;
|
||||
int ta = image->cpp;
|
||||
|
||||
/* The r8g8 format seems to not play by the normal tiling rules: */
|
||||
|
|
@ -84,6 +98,7 @@ setup_slices(struct tu_image *image, const VkImageCreateInfo *pCreateInfo)
|
|||
|
||||
for (unsigned level = 0; level < pCreateInfo->mipLevels; level++) {
|
||||
struct tu_image_level *slice = &image->levels[level];
|
||||
struct tu_image_level *ubwc_slice = &image->ubwc_levels[level];
|
||||
uint32_t width = u_minify(pCreateInfo->extent.width, level);
|
||||
uint32_t height = u_minify(pCreateInfo->extent.height, level);
|
||||
uint32_t depth = u_minify(pCreateInfo->extent.depth, level);
|
||||
|
|
@ -91,7 +106,7 @@ setup_slices(struct tu_image *image, const VkImageCreateInfo *pCreateInfo)
|
|||
uint32_t blocks;
|
||||
uint32_t pitchalign;
|
||||
|
||||
if (image->tile_mode && !image_level_linear(image, level)) {
|
||||
if (image->tile_mode && !image_level_linear(image, level, ubwc_enabled)) {
|
||||
/* tiled levels of 3D textures are rounded up to PoT dimensions: */
|
||||
if (pCreateInfo->imageType == VK_IMAGE_TYPE_3D) {
|
||||
width = util_next_power_of_two(width);
|
||||
|
|
@ -139,19 +154,47 @@ setup_slices(struct tu_image *image, const VkImageCreateInfo *pCreateInfo)
|
|||
}
|
||||
|
||||
layer_size += slice->size * depth;
|
||||
}
|
||||
if (ubwc_enabled) {
|
||||
/* with UBWC every level is aligned to 4K */
|
||||
layer_size = align(layer_size, 4096);
|
||||
|
||||
uint32_t block_width = tile_alignment[ta].ubwc_blockwidth;
|
||||
uint32_t block_height = tile_alignment[ta].ubwc_blockheight;
|
||||
uint32_t meta_pitch = align(DIV_ROUND_UP(width, block_width), RGB_TILE_WIDTH_ALIGNMENT);
|
||||
uint32_t meta_height = align(DIV_ROUND_UP(height, block_height), RGB_TILE_HEIGHT_ALIGNMENT);
|
||||
|
||||
/* it looks like mipmaps need alignment to power of two
|
||||
* TODO: needs testing with large npot textures
|
||||
* (needed for the first level?)
|
||||
*/
|
||||
if (pCreateInfo->mipLevels > 1) {
|
||||
meta_pitch = util_next_power_of_two(meta_pitch);
|
||||
meta_height = util_next_power_of_two(meta_height);
|
||||
}
|
||||
|
||||
ubwc_slice->pitch = meta_pitch;
|
||||
ubwc_slice->offset = ubwc_size;
|
||||
ubwc_size += align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
|
||||
}
|
||||
}
|
||||
image->layer_size = align(layer_size, 4096);
|
||||
|
||||
VkDeviceSize offset = ubwc_size * pCreateInfo->arrayLayers;
|
||||
for (unsigned level = 0; level < pCreateInfo->mipLevels; level++)
|
||||
image->levels[level].offset += offset;
|
||||
|
||||
image->size = offset + image->layer_size * pCreateInfo->arrayLayers;
|
||||
image->ubwc_size = ubwc_size;
|
||||
}
|
||||
|
||||
VkResult
|
||||
tu_image_create(VkDevice _device,
|
||||
const struct tu_image_create_info *create_info,
|
||||
const VkImageCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *alloc,
|
||||
VkImage *pImage)
|
||||
VkImage *pImage,
|
||||
uint64_t modifier)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_device, device, _device);
|
||||
const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
|
||||
struct tu_image *image = NULL;
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
|
||||
|
||||
|
|
@ -195,21 +238,42 @@ tu_image_create(VkDevice _device,
|
|||
EXTERNAL_MEMORY_IMAGE_CREATE_INFO) != NULL;
|
||||
|
||||
image->tile_mode = TILE6_3;
|
||||
bool ubwc_enabled = true;
|
||||
|
||||
/* disable tiling when linear is requested and for compressed formats */
|
||||
if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR ||
|
||||
/* compressed textures can't use tiling? */
|
||||
vk_format_is_compressed(image->vk_format) ||
|
||||
/* scanout needs to be linear (what about tiling modifiers?) */
|
||||
create_info->scanout ||
|
||||
/* image_to_image copy doesn't deal with tiling+swap */
|
||||
tu6_get_native_format(image->vk_format)->swap ||
|
||||
/* r8g8 formats are tiled different and could break image_to_image copy */
|
||||
(image->cpp == 2 && vk_format_get_nr_components(image->vk_format) == 2))
|
||||
modifier == DRM_FORMAT_MOD_LINEAR ||
|
||||
vk_format_is_compressed(image->vk_format)) {
|
||||
image->tile_mode = TILE6_LINEAR;
|
||||
ubwc_enabled = false;
|
||||
}
|
||||
|
||||
setup_slices(image, pCreateInfo);
|
||||
/* using UBWC with D24S8 breaks the "stencil read" copy path (why?)
|
||||
* (causes any deqp tests that need to check stencil to fail)
|
||||
* disable UBWC for this format until we properly support copy aspect masks
|
||||
*/
|
||||
if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT)
|
||||
ubwc_enabled = false;
|
||||
|
||||
/* UBWC can't be used with E5B9G9R9 */
|
||||
if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
|
||||
ubwc_enabled = false;
|
||||
|
||||
if (image->extent.depth > 1) {
|
||||
tu_finishme("UBWC with 3D textures");
|
||||
ubwc_enabled = false;
|
||||
}
|
||||
|
||||
if (!tile_alignment[image->cpp].ubwc_blockwidth) {
|
||||
tu_finishme("UBWC for cpp=%d", image->cpp);
|
||||
ubwc_enabled = false;
|
||||
}
|
||||
|
||||
/* expect UBWC enabled if we asked for it */
|
||||
assert(modifier != DRM_FORMAT_MOD_QCOM_COMPRESSED || ubwc_enabled);
|
||||
|
||||
setup_slices(image, pCreateInfo, ubwc_enabled);
|
||||
|
||||
image->size = image->layer_size * pCreateInfo->arrayLayers;
|
||||
*pImage = tu_image_to_handle(image);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
|
@ -324,12 +388,13 @@ tu_image_view_init(struct tu_image_view *iview,
|
|||
memset(iview->descriptor, 0, sizeof(iview->descriptor));
|
||||
|
||||
const struct tu_native_format *fmt = tu6_get_native_format(iview->vk_format);
|
||||
struct tu_image_level *slice0 = &image->levels[iview->base_mip];
|
||||
uint64_t base_addr = image->bo->iova + iview->base_layer * image->layer_size + slice0->offset;
|
||||
uint32_t pitch = (slice0->pitch / vk_format_get_blockwidth(iview->vk_format)) *
|
||||
vk_format_get_blocksize(iview->vk_format);
|
||||
enum a6xx_tile_mode tile_mode =
|
||||
image_level_linear(image, iview->base_mip) ? TILE6_LINEAR : image->tile_mode;
|
||||
uint64_t base_addr = tu_image_base(image, iview->base_mip, iview->base_layer);
|
||||
uint64_t ubwc_addr = tu_image_ubwc_base(image, iview->base_mip, iview->base_layer);
|
||||
|
||||
uint32_t pitch = tu_image_stride(image, iview->base_mip) / vk_format_get_blockwidth(iview->vk_format);
|
||||
enum a6xx_tile_mode tile_mode = tu6_get_image_tile_mode(image, iview->base_mip);
|
||||
uint32_t width = u_minify(image->extent.width, iview->base_mip);
|
||||
uint32_t height = u_minify(image->extent.height, iview->base_mip);
|
||||
|
||||
iview->descriptor[0] =
|
||||
A6XX_TEX_CONST_0_TILE_MODE(tile_mode) |
|
||||
|
|
@ -339,24 +404,34 @@ tu_image_view_init(struct tu_image_view *iview,
|
|||
A6XX_TEX_CONST_0_SWAP(image->tile_mode ? WZYX : fmt->swap) |
|
||||
tu6_texswiz(&pCreateInfo->components, vk_format_description(iview->vk_format)->swizzle) |
|
||||
A6XX_TEX_CONST_0_MIPLVLS(iview->level_count - 1);
|
||||
iview->descriptor[1] =
|
||||
A6XX_TEX_CONST_1_WIDTH(u_minify(image->extent.width, iview->base_mip)) |
|
||||
A6XX_TEX_CONST_1_HEIGHT(u_minify(image->extent.height, iview->base_mip));
|
||||
iview->descriptor[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height);
|
||||
iview->descriptor[2] =
|
||||
A6XX_TEX_CONST_2_FETCHSIZE(tu6_fetchsize(iview->vk_format)) |
|
||||
A6XX_TEX_CONST_2_PITCH(pitch) |
|
||||
A6XX_TEX_CONST_2_TYPE(tu6_tex_type(pCreateInfo->viewType));
|
||||
iview->descriptor[3] = 0;
|
||||
iview->descriptor[3] = A6XX_TEX_CONST_3_ARRAY_PITCH(tu_layer_size(image, iview->base_mip));
|
||||
iview->descriptor[4] = base_addr;
|
||||
iview->descriptor[5] = base_addr >> 32;
|
||||
|
||||
if (image->ubwc_size) {
|
||||
uint32_t block_width = tile_alignment[image->cpp].ubwc_blockwidth;
|
||||
uint32_t block_height = tile_alignment[image->cpp].ubwc_blockheight;
|
||||
|
||||
iview->descriptor[3] |= A6XX_TEX_CONST_3_FLAG | A6XX_TEX_CONST_3_TILE_ALL;
|
||||
iview->descriptor[7] = ubwc_addr;
|
||||
iview->descriptor[8] = ubwc_addr >> 32;
|
||||
iview->descriptor[9] |= A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(tu_image_ubwc_size(image, iview->base_mip) >> 2);
|
||||
iview->descriptor[10] |=
|
||||
A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(tu_image_ubwc_pitch(image, iview->base_mip)) |
|
||||
A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(DIV_ROUND_UP(width, block_width))) |
|
||||
A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(DIV_ROUND_UP(height, block_height)));
|
||||
}
|
||||
|
||||
if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_3D) {
|
||||
iview->descriptor[3] |= A6XX_TEX_CONST_3_ARRAY_PITCH(image->layer_size);
|
||||
iview->descriptor[5] |= A6XX_TEX_CONST_5_DEPTH(iview->layer_count);
|
||||
} else {
|
||||
iview->descriptor[3] |=
|
||||
A6XX_TEX_CONST_3_MIN_LAYERSZ(image->levels[image->level_count - 1].size) |
|
||||
A6XX_TEX_CONST_3_ARRAY_PITCH(slice0->size);
|
||||
A6XX_TEX_CONST_3_MIN_LAYERSZ(image->levels[image->level_count - 1].size);
|
||||
iview->descriptor[5] |=
|
||||
A6XX_TEX_CONST_5_DEPTH(u_minify(image->extent.depth, iview->base_mip));
|
||||
}
|
||||
|
|
@ -393,14 +468,17 @@ tu_CreateImage(VkDevice device,
|
|||
|
||||
const struct wsi_image_create_info *wsi_info =
|
||||
vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
|
||||
bool scanout = wsi_info && wsi_info->scanout;
|
||||
uint64_t modifier = DRM_FORMAT_MOD_INVALID;
|
||||
|
||||
return tu_image_create(device,
|
||||
&(struct tu_image_create_info) {
|
||||
.vk_info = pCreateInfo,
|
||||
.scanout = scanout,
|
||||
},
|
||||
pAllocator, pImage);
|
||||
if (wsi_info) {
|
||||
modifier = DRM_FORMAT_MOD_LINEAR;
|
||||
for (unsigned i = 0; i < wsi_info->modifier_count; i++) {
|
||||
if (wsi_info->modifiers[i] == DRM_FORMAT_MOD_QCOM_COMPRESSED)
|
||||
modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
|
||||
}
|
||||
}
|
||||
|
||||
return tu_image_create(device, pCreateInfo, pAllocator, pImage, modifier);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -438,6 +516,13 @@ tu_GetImageSubresourceLayout(VkDevice _device,
|
|||
level->pitch * vk_format_get_blocksize(image->vk_format);
|
||||
pLayout->arrayPitch = image->layer_size;
|
||||
pLayout->depthPitch = level->size;
|
||||
|
||||
if (image->ubwc_size) {
|
||||
/* UBWC starts at offset 0 */
|
||||
pLayout->offset = 0;
|
||||
/* UBWC scanout won't match what the kernel wants if we have levels/layers */
|
||||
assert(image->level_count == 1 && image->layer_count == 1);
|
||||
}
|
||||
}
|
||||
|
||||
VkResult
|
||||
|
|
|
|||
|
|
@ -200,7 +200,7 @@ tu_blit_buffer(struct tu_buffer *buffer,
|
|||
const VkBufferImageCopy *info)
|
||||
{
|
||||
if (info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
format = VK_FORMAT_R8_UINT;
|
||||
format = VK_FORMAT_R8_UNORM;
|
||||
|
||||
unsigned pitch = (info->bufferRowLength ?: info->imageExtent.width) *
|
||||
vk_format_get_blocksize(format);
|
||||
|
|
|
|||
|
|
@ -1271,6 +1271,8 @@ struct tu_image
|
|||
struct tu_image_level levels[15];
|
||||
unsigned tile_mode;
|
||||
unsigned cpp;
|
||||
struct tu_image_level ubwc_levels[15];
|
||||
uint32_t ubwc_size;
|
||||
|
||||
unsigned queue_family_mask;
|
||||
bool exclusive;
|
||||
|
|
@ -1307,6 +1309,46 @@ tu_get_levelCount(const struct tu_image *image,
|
|||
: range->levelCount;
|
||||
}
|
||||
|
||||
static inline VkDeviceSize
|
||||
tu_layer_size(struct tu_image *image, int level)
|
||||
{
|
||||
if (image->type == VK_IMAGE_TYPE_3D)
|
||||
return image->levels[level].size;
|
||||
return image->layer_size;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
tu_image_stride(struct tu_image *image, int level)
|
||||
{
|
||||
return image->levels[level].pitch * image->cpp;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
tu_image_base(struct tu_image *image, int level, int layer)
|
||||
{
|
||||
return image->bo->iova + image->bo_offset + image->levels[level].offset +
|
||||
layer * tu_layer_size(image, level);
|
||||
}
|
||||
|
||||
static inline VkDeviceSize
|
||||
tu_image_ubwc_size(struct tu_image *image, int level)
|
||||
{
|
||||
return image->ubwc_size;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
tu_image_ubwc_pitch(struct tu_image *image, int level)
|
||||
{
|
||||
return image->ubwc_levels[level].pitch;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
tu_image_ubwc_base(struct tu_image *image, int level, int layer)
|
||||
{
|
||||
return image->bo->iova + image->bo_offset + image->ubwc_levels[level].offset +
|
||||
layer * tu_image_ubwc_size(image, level);
|
||||
}
|
||||
|
||||
enum a6xx_tile_mode
|
||||
tu6_get_image_tile_mode(struct tu_image *image, int level);
|
||||
enum a3xx_msaa_samples
|
||||
|
|
@ -1340,18 +1382,12 @@ struct tu_sampler
|
|||
bool needs_border;
|
||||
};
|
||||
|
||||
struct tu_image_create_info
|
||||
{
|
||||
const VkImageCreateInfo *vk_info;
|
||||
bool scanout;
|
||||
bool no_metadata_planes;
|
||||
};
|
||||
|
||||
VkResult
|
||||
tu_image_create(VkDevice _device,
|
||||
const struct tu_image_create_info *info,
|
||||
const VkImageCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *alloc,
|
||||
VkImage *pImage);
|
||||
VkImage *pImage,
|
||||
uint64_t modifier);
|
||||
|
||||
VkResult
|
||||
tu_image_from_gralloc(VkDevice device_h,
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@
|
|||
|
||||
#include "vk_util.h"
|
||||
#include "wsi_common.h"
|
||||
#include "drm-uapi/drm_fourcc.h"
|
||||
|
||||
static PFN_vkVoidFunction
|
||||
tu_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
|
||||
|
|
@ -34,13 +35,38 @@ tu_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
|
|||
return tu_lookup_entrypoint_unchecked(pName);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
tu_wsi_image_get_modifier(VkImage _image)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_image, image, _image);
|
||||
|
||||
if (!image->tile_mode)
|
||||
return DRM_FORMAT_MOD_LINEAR;
|
||||
|
||||
if (image->ubwc_size)
|
||||
return DRM_FORMAT_MOD_QCOM_COMPRESSED;
|
||||
|
||||
/* TODO invent a modifier for tiled but not UBWC buffers: */
|
||||
return DRM_FORMAT_MOD_INVALID;
|
||||
}
|
||||
|
||||
VkResult
|
||||
tu_wsi_init(struct tu_physical_device *physical_device)
|
||||
{
|
||||
return wsi_device_init(&physical_device->wsi_device,
|
||||
tu_physical_device_to_handle(physical_device),
|
||||
tu_wsi_proc_addr, &physical_device->instance->alloc,
|
||||
physical_device->master_fd, NULL);
|
||||
VkResult result;
|
||||
|
||||
result = wsi_device_init(&physical_device->wsi_device,
|
||||
tu_physical_device_to_handle(physical_device),
|
||||
tu_wsi_proc_addr,
|
||||
&physical_device->instance->alloc,
|
||||
physical_device->master_fd, NULL);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
physical_device->wsi_device.supports_modifiers = true;
|
||||
physical_device->wsi_device.image_get_modifier = tu_wsi_image_get_modifier;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue