freedreno/a6xx: UBWC fixes

A few fixes that get UBWC working for the games/benchmarks where I
noticed problems before (in particular and manhattan, and stk (modulo
image support for UBWC when compute shaders are used for post-process
effects):

  + fix the size of the UBWC meta buffer (ie, the offset to color
    pixel data) that is returned by ->fill_ubwc_buffer_sizes()
  + correct size/layout for 8 and 16 byte per pixel formats
  + limit the supported formats.. Note all formats that can be
    tiled can be compressed.

Signed-off-by: Rob Clark <robdclark@chromium.org>
This commit is contained in:
Rob Clark 2019-05-03 13:10:22 -07:00
parent 6ffb58726b
commit 857d9f3b02
2 changed files with 79 additions and 12 deletions

View file

@ -26,6 +26,7 @@
*/
#include "fd6_resource.h"
#include "fd6_format.h"
#include "a6xx.xml.h"
@ -161,12 +162,51 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format forma
return size;
}
/* A subset of the valid tiled formats can be compressed. We do
* already require tiled in order to be compressed, but just because
* it can be tiled doesn't mean it can be compressed.
*/
static bool
ok_ubwc_format(enum a6xx_color_fmt fmt)
{
switch (fmt) {
case RB6_R10G10B10A2_UINT:
case RB6_R10G10B10A2_UNORM:
case RB6_R11G11B10_FLOAT:
case RB6_R16_FLOAT:
case RB6_R16G16B16A16_FLOAT:
case RB6_R16G16B16A16_SINT:
case RB6_R16G16B16A16_UINT:
case RB6_R16G16_FLOAT:
case RB6_R16G16_SINT:
case RB6_R16G16_UINT:
case RB6_R16_SINT:
case RB6_R16_UINT:
case RB6_R32G32B32A32_SINT:
case RB6_R32G32B32A32_UINT:
case RB6_R32G32_SINT:
case RB6_R32G32_UINT:
case RB6_R32_SINT:
case RB6_R32_UINT:
case RB6_R5G6B5_UNORM:
case RB6_R8G8B8A8_SINT:
case RB6_R8G8B8A8_UINT:
case RB6_R8G8B8A8_UNORM:
case RB6_R8G8B8_UNORM:
case RB6_R8G8_SINT:
case RB6_R8G8_UINT:
case RB6_R8G8_UNORM:
case RB6_X8Z24_UNORM:
return true;
default:
return false;
}
}
uint32_t
fd6_fill_ubwc_buffer_sizes(struct fd_resource *rsc)
{
#define RGB_TILE_WIDTH 16
#define RBG_TILE_WIDTH_ALIGNMENT 64
#define RGB_TILE_HEIGHT 4
#define RGB_TILE_HEIGHT_ALIGNMENT 16
#define UBWC_PLANE_SIZE_ALIGNMENT 4096
@ -174,23 +214,50 @@ fd6_fill_ubwc_buffer_sizes(struct fd_resource *rsc)
uint32_t width = prsc->width0;
uint32_t height = prsc->height0;
if (!ok_ubwc_format(fd6_pipe2color(prsc->format)))
return 0;
/* limit things to simple single level 2d for now: */
if ((prsc->depth0 != 1) || (prsc->array_size != 1) || (prsc->last_level != 0))
return 0;
uint32_t meta_stride =
ALIGN_POT(DIV_ROUND_UP(width, RGB_TILE_WIDTH), RBG_TILE_WIDTH_ALIGNMENT);
uint32_t meta_scanlines =
ALIGN_POT(DIV_ROUND_UP(height, RGB_TILE_HEIGHT), RGB_TILE_HEIGHT_ALIGNMENT);
uint32_t meta_plane =
ALIGN_POT(meta_stride * meta_scanlines, UBWC_PLANE_SIZE_ALIGNMENT);
uint32_t block_width, block_height;
switch (rsc->cpp) {
case 2:
case 4:
block_width = 16;
block_height = 4;
break;
case 8:
block_width = 8;
block_height = 4;
break;
case 16:
block_width = 4;
block_height = 4;
break;
default:
return 0;
}
rsc->offset = meta_plane;
uint32_t meta_stride =
ALIGN_POT(DIV_ROUND_UP(width, block_width), RBG_TILE_WIDTH_ALIGNMENT);
uint32_t meta_height =
ALIGN_POT(DIV_ROUND_UP(height, block_height), RGB_TILE_HEIGHT_ALIGNMENT);
uint32_t meta_size =
ALIGN_POT(meta_stride * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
/* UBWC goes first, then color data.. this constraint is mainly only
* because it is what the kernel expects for scanout. For non-2D we
* could just use a separate UBWC buffer..
*/
rsc->ubwc_offset = 0;
rsc->offset = meta_size;
rsc->ubwc_pitch = meta_stride;
rsc->ubwc_size = meta_plane >> 2;
rsc->ubwc_size = meta_size >> 2; /* in dwords??? */
rsc->tile_mode = TILE6_3;
return rsc->ubwc_size;
return meta_size;
}
uint32_t

View file

@ -266,7 +266,7 @@ fd6_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
format, rsc->slices[lvl].pitch) * rsc->cpp);
so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
so->ubwc_enabled = rsc->ubwc_size && u_minify(prsc->width0, lvl) >= 16;
so->ubwc_enabled = rsc->ubwc_size && !fd_resource_level_linear(prsc, lvl);
}
so->texconst0 |= fd6_tex_const_0(prsc, lvl, cso->format,