panfrost: XML-ify the multi-target framebuffer descriptors

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6797>
This commit is contained in:
Boris Brezillon 2020-09-08 10:17:40 +02:00 committed by Alyssa Rosenzweig
parent efce73d99d
commit 5d5f7552a5
7 changed files with 782 additions and 948 deletions

View file

@ -744,13 +744,13 @@ panfrost_batch_reserve_framebuffer(struct panfrost_batch *batch)
if (!batch->framebuffer.gpu) { if (!batch->framebuffer.gpu) {
unsigned size = (dev->quirks & MIDGARD_SFBD) ? unsigned size = (dev->quirks & MIDGARD_SFBD) ?
MALI_SINGLE_TARGET_FRAMEBUFFER_LENGTH : MALI_SINGLE_TARGET_FRAMEBUFFER_LENGTH :
sizeof(struct mali_framebuffer); MALI_MULTI_TARGET_FRAMEBUFFER_LENGTH;
batch->framebuffer = panfrost_pool_alloc_aligned(&batch->pool, size, 64); batch->framebuffer = panfrost_pool_alloc_aligned(&batch->pool, size, 64);
/* Tag the pointer */ /* Tag the pointer */
if (!(dev->quirks & MIDGARD_SFBD)) if (!(dev->quirks & MIDGARD_SFBD))
batch->framebuffer.gpu |= MALI_MFBD; batch->framebuffer.gpu |= MALI_FBD_TAG_IS_MFBD;
} }
return batch->framebuffer.gpu; return batch->framebuffer.gpu;

View file

@ -28,8 +28,38 @@
#include "pan_util.h" #include "pan_util.h"
#include "panfrost-quirks.h" #include "panfrost-quirks.h"
static struct mali_rt_format
panfrost_mfbd_format(struct pipe_surface *surf) static bool
panfrost_mfbd_has_zs_crc_ext(struct panfrost_batch *batch)
{
if (batch->key.nr_cbufs == 1) {
struct pipe_surface *surf = batch->key.cbufs[0];
struct panfrost_resource *rsrc = pan_resource(surf->texture);
if (rsrc->checksummed)
return true;
}
if (batch->key.zsbuf &&
((batch->clear | batch->draws) & PIPE_CLEAR_DEPTHSTENCIL))
return true;
return false;
}
static unsigned
panfrost_mfbd_size(struct panfrost_batch *batch)
{
unsigned rt_count = MAX2(batch->key.nr_cbufs, 1);
return MALI_MULTI_TARGET_FRAMEBUFFER_LENGTH +
(panfrost_mfbd_has_zs_crc_ext(batch) * MALI_ZS_CRC_EXTENSION_LENGTH) +
(rt_count * MALI_RENDER_TARGET_LENGTH);
}
static void
panfrost_mfbd_rt_init_format(struct pipe_surface *surf,
struct MALI_RENDER_TARGET *rt)
{ {
/* Explode details on the format */ /* Explode details on the format */
@ -41,68 +71,74 @@ panfrost_mfbd_format(struct pipe_surface *surf)
unsigned char swizzle[4]; unsigned char swizzle[4];
panfrost_invert_swizzle(desc->swizzle, swizzle); panfrost_invert_swizzle(desc->swizzle, swizzle);
rt->swizzle = panfrost_translate_swizzle_4(swizzle);
/* Fill in accordingly, defaulting to 8-bit UNORM */ /* Fill in accordingly, defaulting to 8-bit UNORM */
struct mali_rt_format fmt = {
.unk1 = 0x4000000,
.unk2 = 0x1,
.nr_channels = MALI_POSITIVE(desc->nr_channels),
.unk3 = 0x4,
.flags = 0x2,
.swizzle = panfrost_translate_swizzle_4(swizzle),
.no_preload = true
};
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
fmt.flags |= MALI_MFBD_FORMAT_SRGB; rt->srgb = true;
/* sRGB handled as a dedicated flag */ /* sRGB handled as a dedicated flag */
enum pipe_format linearized = util_format_linear(surf->format); enum pipe_format linearized = util_format_linear(surf->format);
if (util_format_is_unorm8(desc)) {
rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8;
switch (desc->nr_channels) {
case 1:
rt->writeback_format = MALI_MFBD_COLOR_FORMAT_R8;
break;
case 2:
rt->writeback_format = MALI_MFBD_COLOR_FORMAT_R8G8;
break;
case 3:
rt->writeback_format = MALI_MFBD_COLOR_FORMAT_R8G8B8;
break;
case 4:
rt->writeback_format = MALI_MFBD_COLOR_FORMAT_R8G8B8A8;
break;
default:
unreachable("Invalid number of channels");
}
/* If RGB, we're good to go */ /* If RGB, we're good to go */
if (util_format_is_unorm8(desc)) return;
return fmt; }
/* Set flags for alternative formats */ /* Set flags for alternative formats */
switch (linearized) { switch (linearized) {
case PIPE_FORMAT_B5G6R5_UNORM: case PIPE_FORMAT_B5G6R5_UNORM:
fmt.unk1 = 0x14000000; rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R5G6B5A0;
fmt.nr_channels = MALI_POSITIVE(2); rt->writeback_format = MALI_MFBD_COLOR_FORMAT_R5G6B5;
fmt.unk3 |= 0x1;
break; break;
case PIPE_FORMAT_A4B4G4R4_UNORM: case PIPE_FORMAT_A4B4G4R4_UNORM:
case PIPE_FORMAT_B4G4R4A4_UNORM: case PIPE_FORMAT_B4G4R4A4_UNORM:
case PIPE_FORMAT_R4G4B4A4_UNORM: case PIPE_FORMAT_R4G4B4A4_UNORM:
fmt.unk1 = 0x10000000; rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R4G4B4A4;
fmt.unk3 = 0x5; rt->writeback_format = MALI_MFBD_COLOR_FORMAT_R4G4B4A4;
fmt.nr_channels = MALI_POSITIVE(1);
break; break;
case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_R10G10B10A2_UNORM:
case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_R10G10B10X2_UNORM: case PIPE_FORMAT_R10G10B10X2_UNORM:
case PIPE_FORMAT_B10G10R10X2_UNORM: case PIPE_FORMAT_B10G10R10X2_UNORM:
fmt.unk1 = 0x08000000; rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R10G10B10A2;
fmt.unk3 = 0x6; rt->writeback_format = MALI_MFBD_COLOR_FORMAT_R10G10B10A2;
fmt.nr_channels = MALI_POSITIVE(1);
break; break;
case PIPE_FORMAT_B5G5R5A1_UNORM: case PIPE_FORMAT_B5G5R5A1_UNORM:
case PIPE_FORMAT_R5G5B5A1_UNORM: case PIPE_FORMAT_R5G5B5A1_UNORM:
case PIPE_FORMAT_B5G5R5X1_UNORM: case PIPE_FORMAT_B5G5R5X1_UNORM:
fmt.unk1 = 0x18000000; rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R5G5B5A1;
fmt.unk3 = 0x7; rt->writeback_format = MALI_MFBD_COLOR_FORMAT_R5G5B5A1;
fmt.nr_channels = MALI_POSITIVE(2);
break; break;
/* Generic 8-bit */ /* Generic 8-bit */
case PIPE_FORMAT_R8_UINT: case PIPE_FORMAT_R8_UINT:
case PIPE_FORMAT_R8_SINT: case PIPE_FORMAT_R8_SINT:
fmt.unk1 = 0x80000000; rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW8;
fmt.unk3 = 0x0; rt->writeback_format = MALI_MFBD_COLOR_FORMAT_RAW8;
fmt.nr_channels = MALI_POSITIVE(1);
break; break;
/* Generic 32-bit */ /* Generic 32-bit */
@ -116,9 +152,8 @@ panfrost_mfbd_format(struct pipe_surface *surf)
case PIPE_FORMAT_R32_UINT: case PIPE_FORMAT_R32_UINT:
case PIPE_FORMAT_R32_SINT: case PIPE_FORMAT_R32_SINT:
case PIPE_FORMAT_R10G10B10A2_UINT: case PIPE_FORMAT_R10G10B10A2_UINT:
fmt.unk1 = 0x88000000; rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW32;
fmt.unk3 = 0x0; rt->writeback_format = MALI_MFBD_COLOR_FORMAT_RAW32;
fmt.nr_channels = MALI_POSITIVE(4);
break; break;
/* Generic 16-bit */ /* Generic 16-bit */
@ -127,9 +162,8 @@ panfrost_mfbd_format(struct pipe_surface *surf)
case PIPE_FORMAT_R16_FLOAT: case PIPE_FORMAT_R16_FLOAT:
case PIPE_FORMAT_R16_UINT: case PIPE_FORMAT_R16_UINT:
case PIPE_FORMAT_R16_SINT: case PIPE_FORMAT_R16_SINT:
fmt.unk1 = 0x84000000; rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW16;
fmt.unk3 = 0x0; rt->writeback_format = MALI_MFBD_COLOR_FORMAT_RAW16;
fmt.nr_channels = MALI_POSITIVE(2);
break; break;
/* Generic 64-bit */ /* Generic 64-bit */
@ -139,73 +173,30 @@ panfrost_mfbd_format(struct pipe_surface *surf)
case PIPE_FORMAT_R16G16B16A16_FLOAT: case PIPE_FORMAT_R16G16B16A16_FLOAT:
case PIPE_FORMAT_R16G16B16A16_SINT: case PIPE_FORMAT_R16G16B16A16_SINT:
case PIPE_FORMAT_R16G16B16A16_UINT: case PIPE_FORMAT_R16G16B16A16_UINT:
fmt.unk1 = 0x8c000000; rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW64;
fmt.unk3 = 0x1; rt->writeback_format = MALI_MFBD_COLOR_FORMAT_RAW64;
fmt.nr_channels = MALI_POSITIVE(2);
break; break;
/* Generic 128-bit */ /* Generic 128-bit */
case PIPE_FORMAT_R32G32B32A32_FLOAT: case PIPE_FORMAT_R32G32B32A32_FLOAT:
case PIPE_FORMAT_R32G32B32A32_SINT: case PIPE_FORMAT_R32G32B32A32_SINT:
case PIPE_FORMAT_R32G32B32A32_UINT: case PIPE_FORMAT_R32G32B32A32_UINT:
fmt.unk1 = 0x90000000; rt->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW128;
fmt.unk3 = 0x1; rt->writeback_format = MALI_MFBD_COLOR_FORMAT_RAW128;
fmt.nr_channels = MALI_POSITIVE(4);
break; break;
default: default:
unreachable("Invalid format rendering"); unreachable("Invalid format rendering");
} }
return fmt;
}
static void
panfrost_mfbd_clear(
struct panfrost_batch *batch,
struct mali_framebuffer *fb,
struct mali_framebuffer_extra *fbx,
struct mali_render_target *rts,
unsigned rt_count)
{
struct panfrost_context *ctx = batch->ctx;
struct pipe_context *gallium = (struct pipe_context *) ctx;
struct panfrost_device *dev = pan_device(gallium->screen);
for (unsigned i = 0; i < rt_count; ++i) {
if (!(batch->clear & (PIPE_CLEAR_COLOR0 << i)))
continue;
rts[i].clear_color_1 = batch->clear_color[i][0];
rts[i].clear_color_2 = batch->clear_color[i][1];
rts[i].clear_color_3 = batch->clear_color[i][2];
rts[i].clear_color_4 = batch->clear_color[i][3];
}
if (batch->clear & PIPE_CLEAR_DEPTH) {
fb->clear_depth = batch->clear_depth;
}
if (batch->clear & PIPE_CLEAR_STENCIL) {
fb->clear_stencil = batch->clear_stencil;
}
if (dev->quirks & IS_BIFROST) {
fbx->clear_color_1 = batch->clear_color[0][0];
fbx->clear_color_2 = 0xc0000000 | (fbx->clear_color_1 & 0xffff); /* WTF? */
}
} }
static void static void
panfrost_mfbd_set_cbuf( panfrost_mfbd_rt_set_buf(struct pipe_surface *surf,
struct mali_render_target *rt, struct MALI_RENDER_TARGET *rt)
struct pipe_surface *surf)
{ {
struct panfrost_resource *rsrc = pan_resource(surf->texture);
struct panfrost_device *dev = pan_device(surf->context->screen); struct panfrost_device *dev = pan_device(surf->context->screen);
bool is_bifrost = dev->quirks & IS_BIFROST; unsigned version = dev->gpu_id >> 12;
struct panfrost_resource *rsrc = pan_resource(surf->texture);
unsigned level = surf->u.tex.level; unsigned level = surf->u.tex.level;
unsigned first_layer = surf->u.tex.first_layer; unsigned first_layer = surf->u.tex.first_layer;
assert(surf->u.tex.last_layer == first_layer); assert(surf->u.tex.last_layer == first_layer);
@ -215,210 +206,239 @@ panfrost_mfbd_set_cbuf(
unsigned nr_samples = surf->texture->nr_samples; unsigned nr_samples = surf->texture->nr_samples;
unsigned layer_stride = (nr_samples > 1) ? rsrc->slices[level].size0 : 0; unsigned layer_stride = (nr_samples > 1) ? rsrc->slices[level].size0 : 0;
mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer, 0); mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer, 0);
rt->format = panfrost_mfbd_format(surf);
if (layer_stride) if (layer_stride)
rt->format.msaa = MALI_MSAA_LAYERED; rt->writeback_msaa = MALI_MSAA_LAYERED;
else if (surf->nr_samples) else if (surf->nr_samples)
rt->format.msaa = MALI_MSAA_AVERAGE; rt->writeback_msaa = MALI_MSAA_AVERAGE;
else else
rt->format.msaa = MALI_MSAA_SINGLE; rt->writeback_msaa = MALI_MSAA_SINGLE;
/* Now, we set the modifier specific pieces */ panfrost_mfbd_rt_init_format(surf, rt);
if (rsrc->modifier == DRM_FORMAT_MOD_LINEAR) { if (rsrc->modifier == DRM_FORMAT_MOD_LINEAR) {
if (is_bifrost) { if (version >= 7)
rt->format.unk4 = 0x1; rt->writeback_block_format_v7 = MALI_BLOCK_FORMAT_V7_LINEAR;
} else { else
rt->format.block = MALI_BLOCK_FORMAT_LINEAR; rt->writeback_block_format = MALI_BLOCK_FORMAT_LINEAR;
}
rt->framebuffer = base; rt->writeback_base = base;
rt->framebuffer_stride = stride / 16; rt->writeback_row_stride = stride;
rt->layer_stride = layer_stride; rt->writeback_surface_stride = layer_stride;
} else if (rsrc->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED) { } else if (rsrc->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED) {
if (is_bifrost) { if (version >= 7)
rt->format.unk3 |= 0x8; rt->writeback_block_format_v7 = MALI_BLOCK_FORMAT_V7_TILED_U_INTERLEAVED;
} else { else
rt->format.block = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED; rt->writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
}
rt->framebuffer = base; rt->writeback_base = base;
rt->framebuffer_stride = stride; rt->writeback_row_stride = stride * 16;
rt->layer_stride = layer_stride; rt->writeback_surface_stride = layer_stride;
} else if (drm_is_afbc(rsrc->modifier)) { } else if (drm_is_afbc(rsrc->modifier)) {
rt->format.block = MALI_BLOCK_FORMAT_AFBC; if (version >= 7)
rt->writeback_block_format = MALI_BLOCK_FORMAT_V7_AFBC;
else
rt->writeback_block_format = MALI_BLOCK_FORMAT_AFBC;
unsigned header_size = rsrc->slices[level].header_size; unsigned header_size = rsrc->slices[level].header_size;
rt->framebuffer = base + header_size; rt->afbc_header = base;
rt->layer_stride = layer_stride; rt->afbc_chunk_size = 9;
rt->afbc.metadata = base; rt->afbc_sparse = true;
rt->afbc.stride = 0; rt->afbc_body = base + header_size;
rt->afbc.flags = MALI_AFBC_FLAGS; rt->writeback_surface_stride = layer_stride;
if (rsrc->modifier & AFBC_FORMAT_MOD_YTR) if (rsrc->modifier & AFBC_FORMAT_MOD_YTR)
rt->afbc.flags |= MALI_AFBC_YTR; rt->afbc_yuv_transform_enable = true;
/* TODO: The blob sets this to something nonzero, but it's not /* TODO: The blob sets this to something nonzero, but it's not
* clear what/how to calculate/if it matters */ * clear what/how to calculate/if it matters */
rt->framebuffer_stride = 0; rt->afbc_body_size = 0;
} else { } else {
unreachable("Invalid mod"); unreachable("Invalid mod");
} }
} }
static void static void
panfrost_mfbd_set_zsbuf( panfrost_mfbd_emit_rt(struct panfrost_batch *batch,
struct mali_framebuffer *fb, void *rtp, struct pipe_surface *surf,
struct mali_framebuffer_extra *fbx, unsigned rt_offset, unsigned rt_idx)
struct pipe_surface *surf)
{ {
struct panfrost_device *dev = pan_device(surf->context->screen); struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
bool is_bifrost = dev->quirks & IS_BIFROST; unsigned version = dev->gpu_id >> 12;
struct panfrost_resource *rsrc = pan_resource(surf->texture);
unsigned nr_samples = surf->texture->nr_samples; pan_pack(rtp, RENDER_TARGET, rt) {
nr_samples = MAX2(nr_samples, 1); rt.clean_pixel_write_enable = true;
if (surf) {
rt.write_enable = true;
rt.dithering_enable = true;
rt.internal_buffer_offset = rt_offset;
panfrost_mfbd_rt_set_buf(surf, &rt);
} else {
rt.internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8;
rt.internal_buffer_offset = rt_offset;
if (version >= 7) {
rt.writeback_block_format_v7 = MALI_BLOCK_FORMAT_V7_TILED_U_INTERLEAVED;
rt.dithering_enable = true;
}
}
fbx->zs_samples = MALI_POSITIVE(nr_samples); if (batch->clear & (PIPE_CLEAR_COLOR0 << rt_idx)) {
rt.clear_color_0 = batch->clear_color[rt_idx][0];
rt.clear_color_1 = batch->clear_color[rt_idx][1];
rt.clear_color_2 = batch->clear_color[rt_idx][2];
rt.clear_color_3 = batch->clear_color[rt_idx][3];
}
}
}
unsigned level = surf->u.tex.level; static enum mali_z_internal_format
unsigned first_layer = surf->u.tex.first_layer; get_z_internal_format(struct panfrost_batch *batch)
assert(surf->u.tex.last_layer == first_layer); {
struct pipe_surface *zs_surf = batch->key.zsbuf;
/* Default to 24 bit depth if there's no surface. */
if (!zs_surf || !((batch->clear | batch->draws) & PIPE_CLEAR_DEPTHSTENCIL))
return MALI_Z_INTERNAL_FORMAT_D24;
return panfrost_get_z_internal_format(zs_surf->format);
}
static void
panfrost_mfbd_zs_crc_ext_set_bufs(struct panfrost_batch *batch,
struct MALI_ZS_CRC_EXTENSION *ext)
{
struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
unsigned version = dev->gpu_id >> 12;
/* Checksumming only works with a single render target */
if (batch->key.nr_cbufs == 1) {
struct pipe_surface *c_surf = batch->key.cbufs[0];
struct panfrost_resource *rsrc = pan_resource(c_surf->texture);
if (rsrc->checksummed) {
unsigned level = c_surf->u.tex.level;
struct panfrost_slice *slice = &rsrc->slices[level];
ext->crc_row_stride = slice->checksum_stride;
if (slice->checksum_bo)
ext->crc_base = slice->checksum_bo->gpu;
else
ext->crc_base = rsrc->bo->gpu + slice->checksum_offset;
if ((batch->clear & PIPE_CLEAR_COLOR0) && version >= 7) {
ext->crc_clear_color = batch->clear_color[0][0] |
0xc000000000000000 |
((uint64_t)batch->clear_color[0][0] & 0xffff) << 32;
}
}
}
struct pipe_surface *zs_surf = batch->key.zsbuf;
if (!((batch->clear | batch->draws) & PIPE_CLEAR_DEPTHSTENCIL))
zs_surf = NULL;
if (!zs_surf)
return;
struct panfrost_resource *rsrc = pan_resource(zs_surf->texture);
unsigned nr_samples = MAX2(zs_surf->texture->nr_samples, 1);
unsigned level = zs_surf->u.tex.level;
unsigned first_layer = zs_surf->u.tex.first_layer;
assert(zs_surf->u.tex.last_layer == first_layer);
mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer, 0); mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer, 0);
ext->zs_msaa = nr_samples > 1 ? MALI_MSAA_LAYERED : MALI_MSAA_SINGLE;
if (drm_is_afbc(rsrc->modifier)) { if (drm_is_afbc(rsrc->modifier)) {
/* The only Z/S format we can compress is Z24S8 or variants /* The only Z/S format we can compress is Z24S8 or variants
* thereof (handled by the gallium frontend) */ * thereof (handled by the gallium frontend) */
assert(panfrost_is_z24s8_variant(surf->format)); assert(panfrost_is_z24s8_variant(zs_surf->format));
unsigned header_size = rsrc->slices[level].header_size; unsigned header_size = rsrc->slices[level].header_size;
fb->mfbd_flags |= MALI_MFBD_EXTRA | MALI_MFBD_DEPTH_WRITE; ext->zs_write_format = MALI_ZS_FORMAT_D24S8;
if (version >= 7)
ext->zs_block_format_v7 = MALI_BLOCK_FORMAT_V7_AFBC;
else
ext->zs_block_format = MALI_BLOCK_FORMAT_AFBC;
fbx->flags_hi |= MALI_EXTRA_PRESENT; ext->zs_afbc_header = base;
fbx->flags_lo |= MALI_EXTRA_ZS | 0x1; /* unknown */ ext->zs_afbc_body = base + header_size;
fbx->zs_block = MALI_BLOCK_FORMAT_AFBC; ext->zs_afbc_body_size = 0x1000;
ext->zs_afbc_chunk_size = 9;
fbx->ds_afbc.depth_stencil = base + header_size; ext->zs_afbc_sparse = true;
fbx->ds_afbc.depth_stencil_afbc_metadata = base;
fbx->ds_afbc.depth_stencil_afbc_stride = 0;
fbx->ds_afbc.flags = MALI_AFBC_FLAGS;
fbx->ds_afbc.padding = 0x1000;
} else { } else {
assert(rsrc->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED || rsrc->modifier == DRM_FORMAT_MOD_LINEAR); assert(rsrc->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
rsrc->modifier == DRM_FORMAT_MOD_LINEAR);
/* TODO: Z32F(S8) support, which is always linear */ /* TODO: Z32F(S8) support, which is always linear */
int stride = rsrc->slices[level].stride; int stride = rsrc->slices[level].stride;
unsigned layer_stride = (nr_samples > 1) ? rsrc->slices[level].size0 : 0; unsigned layer_stride = (nr_samples > 1) ? rsrc->slices[level].size0 : 0;
fb->mfbd_flags |= MALI_MFBD_EXTRA | MALI_MFBD_DEPTH_WRITE; ext->zs_writeback_base = base;
fbx->flags_hi |= MALI_EXTRA_PRESENT; ext->zs_writeback_row_stride = stride;
fbx->flags_lo |= MALI_EXTRA_ZS; ext->zs_writeback_surface_stride = layer_stride;
fbx->ds_linear.depth = base;
if (rsrc->modifier == DRM_FORMAT_MOD_LINEAR) { if (rsrc->modifier == DRM_FORMAT_MOD_LINEAR) {
fbx->zs_block = MALI_BLOCK_FORMAT_LINEAR; if (version >= 7)
fbx->ds_linear.depth_stride = stride / 16; ext->zs_block_format_v7 = MALI_BLOCK_FORMAT_V7_LINEAR;
fbx->ds_linear.depth_layer_stride = layer_stride; else
ext->zs_block_format = MALI_BLOCK_FORMAT_LINEAR;
} else { } else {
if (is_bifrost) { ext->zs_writeback_row_stride *= 16;
/* XXX: Bifrost fields are different here */ if (version >= 7)
fbx->zs_block = 1; ext->zs_block_format_v7 = MALI_BLOCK_FORMAT_V7_TILED_U_INTERLEAVED;
fbx->flags_hi |= 0x440; else
fbx->flags_lo |= 0x1; ext->zs_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
} else {
fbx->zs_block = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
} }
fbx->ds_linear.depth_stride = stride; switch (zs_surf->format) {
fbx->ds_linear.depth_layer_stride = layer_stride; case PIPE_FORMAT_Z24_UNORM_S8_UINT:
} ext->zs_write_format = MALI_ZS_FORMAT_D24S8;
break;
if (panfrost_is_z24s8_variant(surf->format)) { case PIPE_FORMAT_Z24X8_UNORM:
fbx->flags_lo |= 0x1; ext->zs_write_format = MALI_ZS_FORMAT_D24X8;
} else if (surf->format == PIPE_FORMAT_Z32_FLOAT) { break;
fbx->flags_lo |= 0xA; case PIPE_FORMAT_Z32_FLOAT:
fb->mfbd_flags ^= 0x100; ext->zs_write_format = MALI_ZS_FORMAT_D32;
fb->mfbd_flags |= 0x200; break;
} else if (surf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
fbx->flags_hi |= 0x40; /* Midgard/Bifrost support interleaved depth/stencil
fbx->flags_lo |= 0xA; * buffers, but we always treat them as multu-planar.
fb->mfbd_flags ^= 0x100; */
fb->mfbd_flags |= 0x201; ext->zs_write_format = MALI_ZS_FORMAT_D32;
ext->s_write_format = MALI_S_FORMAT_S8;
struct panfrost_resource *stencil = rsrc->separate_stencil; struct panfrost_resource *stencil = rsrc->separate_stencil;
struct panfrost_slice stencil_slice = stencil->slices[level]; struct panfrost_slice stencil_slice = stencil->slices[level];
unsigned stencil_layer_stride = (nr_samples > 1) ? stencil_slice.size0 : 0; unsigned stencil_layer_stride = (nr_samples > 1) ? stencil_slice.size0 : 0;
fbx->ds_linear.stencil = panfrost_get_texture_address(stencil, level, first_layer, 0); ext->s_writeback_base = panfrost_get_texture_address(stencil, level, first_layer, 0);
fbx->ds_linear.stencil_stride = stencil_slice.stride; ext->s_writeback_row_stride = stencil_slice.stride;
fbx->ds_linear.stencil_layer_stride = stencil_layer_stride; if (rsrc->modifier != DRM_FORMAT_MOD_LINEAR)
ext->s_writeback_row_stride *= 16;
ext->s_writeback_surface_stride = stencil_layer_stride;
break;
default:
unreachable("Unsupported depth/stencil format.");
} }
} }
} }
/* Helper for sequential uploads used for MFBD */ static void
panfrost_mfbd_emit_zs_crc_ext(struct panfrost_batch *batch, void *extp)
#define UPLOAD(dest, offset, src, max) { \
size_t sz = sizeof(*src); \
memcpy(dest.cpu + offset, src, sz); \
assert((offset + sz) <= max); \
offset += sz; \
}
static mali_ptr
panfrost_mfbd_upload(struct panfrost_batch *batch,
struct mali_framebuffer *fb,
struct mali_framebuffer_extra *fbx,
struct mali_render_target *rts,
unsigned rt_count)
{ {
off_t offset = 0; pan_pack(extp, ZS_CRC_EXTENSION, ext) {
ext.zs_clean_pixel_write_enable = true;
/* There may be extra data stuck in the middle */ panfrost_mfbd_zs_crc_ext_set_bufs(batch, &ext);
bool has_extra = fb->mfbd_flags & MALI_MFBD_EXTRA;
/* Compute total size for transfer */
size_t total_sz =
sizeof(struct mali_framebuffer) +
(has_extra ? sizeof(struct mali_framebuffer_extra) : 0) +
sizeof(struct mali_render_target) * 8;
struct panfrost_transfer m_f_trans =
panfrost_pool_alloc_aligned(&batch->pool, total_sz, 64);
/* Do the transfer */
UPLOAD(m_f_trans, offset, fb, total_sz);
if (has_extra)
UPLOAD(m_f_trans, offset, fbx, total_sz);
for (unsigned c = 0; c < 8; ++c) {
UPLOAD(m_f_trans, offset, &rts[c], total_sz);
} }
/* Return pointer suitable for the fragment section */
unsigned tag =
MALI_MFBD |
(has_extra ? MALI_MFBD_TAG_EXTRA : 0) |
(MALI_POSITIVE(rt_count) << 2);
return m_f_trans.gpu | tag;
} }
#undef UPLOAD
/* Determines the # of bytes per pixel we need to reserve for a given format in /* Determines the # of bytes per pixel we need to reserve for a given format in
* the tilebuffer (compared to 128-bit budget, etc). Usually the same as the * the tilebuffer (compared to 128-bit budget, etc). Usually the same as the
* bytes per pixel of the format itself, but there are some special cases I * bytes per pixel of the format itself, but there are some special cases I
@ -436,71 +456,46 @@ pan_bytes_per_pixel_tib(enum pipe_format format)
return desc->block.bits / 8; return desc->block.bits / 8;
} }
/* Determines whether a framebuffer uses too much tilebuffer space (requiring /* Calculates the internal color buffer size and tile size based on the number
* us to scale up the tile at a performance penalty). This is conservative but * of RT, the format and the number of pixels. If things do not fit in 4KB, we
* afaict you get 128-bits per pixel normally */ * shrink the tile size to make it fit.
*/
static unsigned static unsigned
pan_tib_size(struct panfrost_batch *batch) pan_internal_cbuf_size(struct panfrost_batch *batch, unsigned *tile_size)
{ {
unsigned size = 0; unsigned total_size = 0;
*tile_size = 16 * 16;
for (int cb = 0; cb < batch->key.nr_cbufs; ++cb) { for (int cb = 0; cb < batch->key.nr_cbufs; ++cb) {
struct pipe_surface *surf = batch->key.cbufs[cb]; struct pipe_surface *surf = batch->key.cbufs[cb];
assert(surf); assert(surf);
size += pan_bytes_per_pixel_tib(surf->format);
unsigned nr_samples = MAX3(surf->nr_samples, surf->texture->nr_samples, 1);
total_size += pan_bytes_per_pixel_tib(surf->format) *
nr_samples * (*tile_size);
} }
return size; /* We have a 4KB budget, let's reduce the tile size until it fits. */
while (total_size > 4096) {
total_size >>= 1;
*tile_size >>= 1;
}
/* Align on 1k. */
total_size = ALIGN_POT(total_size, 1024);
/* Minimum tile size is 4x4. */
assert(*tile_size > 4 * 4);
return total_size;
} }
static unsigned static void
pan_tib_shift(struct panfrost_batch *batch) panfrost_mfbd_emit_local_storage(struct panfrost_batch *batch, void *fb)
{ {
unsigned size = pan_tib_size(batch); struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
if (size > 128) pan_section_pack(fb, MULTI_TARGET_FRAMEBUFFER, LOCAL_STORAGE, ls) {
return 4;
else if (size > 64)
return 5;
else if (size > 32)
return 6;
else if (size > 16)
return 7;
else
return 8;
}
static struct mali_framebuffer
panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
{
struct panfrost_context *ctx = batch->ctx;
struct pipe_context *gallium = (struct pipe_context *) ctx;
struct panfrost_device *dev = pan_device(gallium->screen);
unsigned width = batch->key.width;
unsigned height = batch->key.height;
struct mali_framebuffer mfbd = {
.width1 = MALI_POSITIVE(width),
.height1 = MALI_POSITIVE(height),
.width2 = MALI_POSITIVE(width),
.height2 = MALI_POSITIVE(height),
/* Configures tib size */
.unk1 = (pan_tib_shift(batch) << 9) | 0x80,
.rt_count_1 = MALI_POSITIVE(MAX2(batch->key.nr_cbufs, 1)),
.rt_count_2 = 4,
};
if (dev->quirks & IS_BIFROST) {
mfbd.msaa.sample_locations = panfrost_emit_sample_locations(batch);
mfbd.tiler_meta = panfrost_batch_get_bifrost_tiler(batch, vertex_count);
} else {
struct mali_local_storage_packed lsp;
pan_pack(&lsp, LOCAL_STORAGE, ls) {
if (batch->stack_size) { if (batch->stack_size) {
unsigned shift = unsigned shift =
panfrost_get_stack_shift(batch->stack_size); panfrost_get_stack_shift(batch->stack_size);
@ -515,23 +510,64 @@ panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM; ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
} }
mfbd.shared_memory = lsp; }
struct mali_midgard_tiler_packed t; static void
panfrost_emit_midg_tiler(batch, &t, vertex_count); panfrost_mfbd_emit_midgard_tiler(struct panfrost_batch *batch, void *fb,
mfbd.tiler = t; unsigned vertex_count)
{
void *t = pan_section_ptr(fb, MULTI_TARGET_FRAMEBUFFER, TILER);
panfrost_emit_midg_tiler(batch, t, vertex_count);
/* All weights set to 0, nothing to do here */
pan_section_pack(fb, MULTI_TARGET_FRAMEBUFFER, TILER_WEIGHTS, w);
}
static void
panfrost_mfbd_emit_bifrost_parameters(struct panfrost_batch *batch, void *fb)
{
pan_section_pack(fb, MULTI_TARGET_FRAMEBUFFER, BIFROST_PARAMETERS, params) {
params.sample_locations = panfrost_emit_sample_locations(batch);
} }
}
return mfbd; static void
panfrost_mfbd_emit_bifrost_tiler(struct panfrost_batch *batch, void *fb,
unsigned vertex_count)
{
pan_section_pack(fb, MULTI_TARGET_FRAMEBUFFER, BIFROST_TILER_POINTER, tiler) {
tiler.address = panfrost_batch_get_bifrost_tiler(batch, vertex_count);
}
pan_section_pack(fb, MULTI_TARGET_FRAMEBUFFER, BIFROST_PADDING, padding);
} }
void void
panfrost_attach_mfbd(struct panfrost_batch *batch, unsigned vertex_count) panfrost_attach_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
{ {
struct mali_framebuffer mfbd = struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
panfrost_emit_mfbd(batch, vertex_count); void *fb = batch->framebuffer.cpu;
memcpy(batch->framebuffer.cpu, &mfbd, sizeof(mfbd)); if (dev->quirks & IS_BIFROST)
panfrost_mfbd_emit_bifrost_parameters(batch, fb);
else
panfrost_mfbd_emit_local_storage(batch, fb);
pan_section_pack(fb, MULTI_TARGET_FRAMEBUFFER, PARAMETERS, params) {
params.width = batch->key.width;
params.height = batch->key.height;
params.bound_max_x = batch->key.width - 1;
params.bound_max_y = batch->key.height - 1;
params.color_buffer_allocation =
pan_internal_cbuf_size(batch, &params.effective_tile_size);
params.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
params.render_target_count = MAX2(batch->key.nr_cbufs, 1);
}
if (dev->quirks & IS_BIFROST)
panfrost_mfbd_emit_bifrost_tiler(batch, fb, vertex_count);
else
panfrost_mfbd_emit_midgard_tiler(batch, fb, vertex_count);
} }
/* Creates an MFBD for the FRAGMENT section of the bound framebuffer */ /* Creates an MFBD for the FRAGMENT section of the bound framebuffer */
@ -540,64 +576,18 @@ mali_ptr
panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws) panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws)
{ {
struct panfrost_device *dev = pan_device(batch->ctx->base.screen); struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
bool is_bifrost = dev->quirks & IS_BIFROST; unsigned vertex_count = has_draws;
struct panfrost_transfer t =
panfrost_pool_alloc_aligned(&batch->pool,
panfrost_mfbd_size(batch), 64);
void *fb = t.cpu, *zs_crc_ext, *rts;
struct mali_framebuffer fb = panfrost_emit_mfbd(batch, has_draws); if (panfrost_mfbd_has_zs_crc_ext(batch)) {
struct mali_framebuffer_extra fbx = {0}; zs_crc_ext = fb + MALI_MULTI_TARGET_FRAMEBUFFER_LENGTH;
struct mali_render_target rts[8] = {0}; rts = zs_crc_ext + MALI_ZS_CRC_EXTENSION_LENGTH;
/* We always upload at least one dummy GL_NONE render target */
unsigned rt_descriptors = MAX2(batch->key.nr_cbufs, 1);
fb.rt_count_1 = MALI_POSITIVE(rt_descriptors);
fb.mfbd_flags = 0x100;
panfrost_mfbd_clear(batch, &fb, &fbx, rts, rt_descriptors);
/* Upload either the render target or a dummy GL_NONE target */
unsigned offset = 0;
unsigned tib_shift = pan_tib_shift(batch);
for (int cb = 0; cb < rt_descriptors; ++cb) {
struct pipe_surface *surf = batch->key.cbufs[cb];
unsigned rt_offset = offset << tib_shift;
if (surf && ((batch->clear | batch->draws) & (PIPE_CLEAR_COLOR0 << cb))) {
if (MAX2(surf->nr_samples, surf->texture->nr_samples) > 1)
batch->requirements |= PAN_REQ_MSAA;
panfrost_mfbd_set_cbuf(&rts[cb], surf);
offset += pan_bytes_per_pixel_tib(surf->format);
} else { } else {
struct mali_rt_format null_rt = { zs_crc_ext = NULL;
.unk1 = 0x4000000, rts = fb + MALI_MULTI_TARGET_FRAMEBUFFER_LENGTH;
.no_preload = true
};
if (is_bifrost) {
null_rt.flags = 0x2;
null_rt.unk3 = 0x8;
}
rts[cb].format = null_rt;
rts[cb].framebuffer = 0;
rts[cb].framebuffer_stride = 0;
}
/* TODO: Break out the field */
rts[cb].format.unk1 |= rt_offset;
}
fb.rt_count_2 = MAX2(DIV_ROUND_UP(offset, 1 << (10 - tib_shift)), 1);
if (batch->key.zsbuf && ((batch->clear | batch->draws) & PIPE_CLEAR_DEPTHSTENCIL)) {
if (MAX2(batch->key.zsbuf->nr_samples, batch->key.zsbuf->nr_samples) > 1)
batch->requirements |= PAN_REQ_MSAA;
panfrost_mfbd_set_zsbuf(&fb, &fbx, batch->key.zsbuf);
} }
/* When scanning out, the depth buffer is immediately invalidated, so /* When scanning out, the depth buffer is immediately invalidated, so
@ -611,36 +601,88 @@ panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws)
if (panfrost_batch_is_scanout(batch)) if (panfrost_batch_is_scanout(batch))
batch->requirements &= ~PAN_REQ_DEPTH_WRITE; batch->requirements &= ~PAN_REQ_DEPTH_WRITE;
/* Actualize the requirements */ if (zs_crc_ext) {
if (batch->key.zsbuf &&
MAX2(batch->key.zsbuf->nr_samples, batch->key.zsbuf->nr_samples) > 1)
batch->requirements |= PAN_REQ_MSAA;
panfrost_mfbd_emit_zs_crc_ext(batch, zs_crc_ext);
}
/* We always upload at least one dummy GL_NONE render target */
unsigned rt_descriptors = MAX2(batch->key.nr_cbufs, 1);
/* Upload either the render target or a dummy GL_NONE target */
unsigned rt_offset = 0, tib_size;
unsigned internal_cbuf_size = pan_internal_cbuf_size(batch, &tib_size);
for (int cb = 0; cb < rt_descriptors; ++cb) {
struct pipe_surface *surf = batch->key.cbufs[cb];
void *rt = rts + (cb * MALI_RENDER_TARGET_LENGTH);
if (!((batch->clear | batch->draws) & (PIPE_CLEAR_COLOR0 << cb)))
surf = NULL;
panfrost_mfbd_emit_rt(batch, rt, surf, rt_offset, cb);
if (surf) {
if (MAX2(surf->nr_samples, surf->texture->nr_samples) > 1)
batch->requirements |= PAN_REQ_MSAA;
rt_offset += pan_bytes_per_pixel_tib(surf->format) * tib_size;
}
}
if (dev->quirks & IS_BIFROST)
panfrost_mfbd_emit_bifrost_parameters(batch, fb);
else
panfrost_mfbd_emit_local_storage(batch, fb);
pan_section_pack(fb, MULTI_TARGET_FRAMEBUFFER, PARAMETERS, params) {
params.width = batch->key.width;
params.height = batch->key.height;
params.bound_max_x = batch->key.width - 1;
params.bound_max_y = batch->key.height - 1;
params.effective_tile_size = tib_size;
params.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
params.render_target_count = rt_descriptors;
params.z_internal_format = get_z_internal_format(batch);
if (batch->clear & PIPE_CLEAR_DEPTH)
params.z_clear = batch->clear_depth;
if (batch->clear & PIPE_CLEAR_STENCIL)
params.s_clear = batch->clear_stencil & 0xff;
params.color_buffer_allocation = internal_cbuf_size;
if (batch->requirements & PAN_REQ_MSAA) { if (batch->requirements & PAN_REQ_MSAA) {
/* XXX */ /* MSAA 4x */
fb.unk1 |= (1 << 4) | (1 << 1); params.sample_count = 4;
fb.rt_count_2 = 4; params.sample_pattern = MALI_SAMPLE_PATTERN_ROTATED_4X_GRID;
} }
if (batch->requirements & PAN_REQ_DEPTH_WRITE) if (batch->key.zsbuf &&
fb.mfbd_flags |= MALI_MFBD_DEPTH_WRITE; ((batch->clear | batch->draws) & PIPE_CLEAR_DEPTHSTENCIL)) {
params.z_write_enable = true;
if (batch->key.zsbuf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
params.s_write_enable = true;
}
/* Checksumming only works with a single render target */ params.has_zs_crc_extension = !!zs_crc_ext;
}
if (batch->key.nr_cbufs == 1) { if (dev->quirks & IS_BIFROST)
struct pipe_surface *surf = batch->key.cbufs[0]; panfrost_mfbd_emit_bifrost_tiler(batch, fb, vertex_count);
struct panfrost_resource *rsrc = pan_resource(surf->texture);
if (rsrc->checksummed) {
unsigned level = surf->u.tex.level;
struct panfrost_slice *slice = &rsrc->slices[level];
fb.mfbd_flags |= MALI_MFBD_EXTRA;
fbx.flags_hi |= MALI_EXTRA_PRESENT;
fbx.checksum_stride = slice->checksum_stride;
if (slice->checksum_bo)
fbx.checksum = slice->checksum_bo->gpu;
else else
fbx.checksum = rsrc->bo->gpu + slice->checksum_offset; panfrost_mfbd_emit_midgard_tiler(batch, fb, vertex_count);
}
}
return panfrost_mfbd_upload(batch, &fb, &fbx, rts, rt_descriptors); /* Return pointer suitable for the fragment section */
unsigned tag =
MALI_FBD_TAG_IS_MFBD |
(zs_crc_ext ? MALI_FBD_TAG_HAS_ZS_RT : 0) |
(MALI_POSITIVE(rt_descriptors) << 2);
return t.gpu | tag;
} }

View file

@ -392,14 +392,6 @@ struct mali_payload_write_value {
* 4. Otherwise, set magic_divisor = m and extra_flags = 0. * 4. Otherwise, set magic_divisor = m and extra_flags = 0.
*/ */
#define FBD_MASK (~0x3f)
/* MFBD, rather than SFBD */
#define MALI_MFBD (0x1)
/* ORed into an MFBD address to specify the fbx section is included */
#define MALI_MFBD_TAG_EXTRA (0x2)
/* On Bifrost, these fields are the same between the vertex and tiler payloads. /* On Bifrost, these fields are the same between the vertex and tiler payloads.
* They also seem to be the same between Bifrost and Midgard. They're shared in * They also seem to be the same between Bifrost and Midgard. They're shared in
* fused payloads. * fused payloads.
@ -518,187 +510,4 @@ struct mali_payload_fragment {
mali_ptr framebuffer; mali_ptr framebuffer;
} __attribute__((packed)); } __attribute__((packed));
/* Configures multisampling on Bifrost fragment jobs */
struct bifrost_multisampling {
u64 zero1;
u64 zero2;
mali_ptr sample_locations;
u64 zero4;
} __attribute__((packed));
#define MALI_MFBD_FORMAT_SRGB (1 << 0)
struct mali_rt_format {
unsigned unk1 : 32;
unsigned unk2 : 3;
unsigned nr_channels : 2; /* MALI_POSITIVE */
unsigned unk3 : 4;
unsigned unk4 : 1;
enum mali_block_format block : 2;
enum mali_msaa msaa : 2;
unsigned flags : 2;
unsigned swizzle : 12;
unsigned zero : 3;
/* Disables MFBD preload. When this bit is set, the render target will
* be cleared every frame. When this bit is clear, the hardware will
* automatically wallpaper the render target back from main memory.
* Unfortunately, MFBD preload is very broken on Midgard, so in
* practice, this is a chicken bit that should always be set.
* Discovered by accident, as all good chicken bits are. */
unsigned no_preload : 1;
} __attribute__((packed));
/* Flags for afbc.flags and ds_afbc.flags */
#define MALI_AFBC_FLAGS 0x10009
/* Lossless RGB and RGBA colorspace transform */
#define MALI_AFBC_YTR (1 << 17)
struct mali_render_target {
struct mali_rt_format format;
u64 zero1;
struct {
/* Stuff related to ARM Framebuffer Compression. When AFBC is enabled,
* there is an extra metadata buffer that contains 16 bytes per tile.
* The framebuffer needs to be the same size as before, since we don't
* know ahead of time how much space it will take up. The
* framebuffer_stride is set to 0, since the data isn't stored linearly
* anymore.
*
* When AFBC is disabled, these fields are zero.
*/
mali_ptr metadata;
u32 stride; // stride in units of tiles
u32 flags; // = 0x20000
} afbc;
mali_ptr framebuffer;
u32 zero2 : 4;
u32 framebuffer_stride : 28; // in units of bytes, row to next
u32 layer_stride; /* For multisample rendering */
u32 clear_color_1; // RGBA8888 from glClear, actually used by hardware
u32 clear_color_2; // always equal, but unclear function?
u32 clear_color_3; // always equal, but unclear function?
u32 clear_color_4; // always equal, but unclear function?
} __attribute__((packed));
/* An optional part of mali_framebuffer. It comes between the main structure
* and the array of render targets. It must be included if any of these are
* enabled:
*
* - Transaction Elimination
* - Depth/stencil
* - TODO: Anything else?
*/
/* flags_hi */
#define MALI_EXTRA_PRESENT (0x1)
/* flags_lo */
#define MALI_EXTRA_ZS (0x4)
struct mali_framebuffer_extra {
mali_ptr checksum;
/* Each tile has an 8 byte checksum, so the stride is "width in tiles * 8" */
u32 checksum_stride;
unsigned flags_lo : 4;
enum mali_block_format zs_block : 2;
/* Number of samples in Z/S attachment, MALI_POSITIVE. So zero for
* 1-sample (non-MSAA), 0x3 for MSAA 4x, etc */
unsigned zs_samples : 4;
unsigned flags_hi : 22;
union {
/* Note: AFBC is only allowed for 24/8 combined depth/stencil. */
struct {
mali_ptr depth_stencil_afbc_metadata;
u32 depth_stencil_afbc_stride; // in units of tiles
u32 flags;
mali_ptr depth_stencil;
u64 padding;
} ds_afbc;
struct {
/* Depth becomes depth/stencil in case of combined D/S */
mali_ptr depth;
u32 depth_stride_zero : 4;
u32 depth_stride : 28;
u32 depth_layer_stride;
mali_ptr stencil;
u32 stencil_stride_zero : 4;
u32 stencil_stride : 28;
u32 stencil_layer_stride;
} ds_linear;
};
u32 clear_color_1;
u32 clear_color_2;
u64 zero3;
} __attribute__((packed));
/* Flags for mfbd_flags */
/* Enables writing depth results back to main memory (rather than keeping them
* on-chip in the tile buffer and then discarding) */
#define MALI_MFBD_DEPTH_WRITE (1 << 10)
/* The MFBD contains the extra mali_framebuffer_extra section */
#define MALI_MFBD_EXTRA (1 << 13)
struct mali_framebuffer {
union {
struct mali_local_storage_packed shared_memory;
struct bifrost_multisampling msaa;
};
/* 0x20 */
u16 width1, height1;
u32 zero3;
u16 width2, height2;
u32 unk1 : 19; // = 0x01000
u32 rt_count_1 : 3; // off-by-one (use MALI_POSITIVE)
u32 unk2 : 2; // = 0
u32 rt_count_2 : 3; // no off-by-one
u32 zero4 : 5;
/* 0x30 */
u32 clear_stencil : 8;
u32 mfbd_flags : 24; // = 0x100
float clear_depth;
union {
struct {
struct mali_midgard_tiler_packed tiler;
struct mali_midgard_tiler_weights_packed tiler_weights;
};
struct {
mali_ptr tiler_meta;
u32 zeros[16];
};
};
/* optional: struct mali_framebuffer_extra extra */
/* struct mali_render_target rts[] */
} __attribute__((packed));
#endif /* __PANFROST_JOB_H__ */ #endif /* __PANFROST_JOB_H__ */

View file

@ -183,76 +183,6 @@ pandecode_validate_buffer(mali_ptr addr, size_t sz)
} }
} }
struct pandecode_flag_info {
u64 flag;
const char *name;
};
static void
pandecode_log_decoded_flags(const struct pandecode_flag_info *flag_info,
u64 flags)
{
bool decodable_flags_found = false;
for (int i = 0; flag_info[i].name; i++) {
if ((flags & flag_info[i].flag) != flag_info[i].flag)
continue;
if (!decodable_flags_found) {
decodable_flags_found = true;
} else {
pandecode_log_cont(" | ");
}
pandecode_log_cont("%s", flag_info[i].name);
flags &= ~flag_info[i].flag;
}
if (decodable_flags_found) {
if (flags)
pandecode_log_cont(" | 0x%" PRIx64, flags);
} else {
pandecode_log_cont("0x%" PRIx64, flags);
}
}
#define FLAG_INFO(flag) { MALI_MFBD_FORMAT_##flag, "MALI_MFBD_FORMAT_" #flag }
static const struct pandecode_flag_info mfbd_fmt_flag_info[] = {
FLAG_INFO(SRGB),
{}
};
#undef FLAG_INFO
#define FLAG_INFO(flag) { MALI_AFBC_##flag, "MALI_AFBC_" #flag }
static const struct pandecode_flag_info afbc_fmt_flag_info[] = {
FLAG_INFO(YTR),
{}
};
#undef FLAG_INFO
#define FLAG_INFO(flag) { MALI_EXTRA_##flag, "MALI_EXTRA_" #flag }
static const struct pandecode_flag_info mfbd_extra_flag_hi_info[] = {
FLAG_INFO(PRESENT),
{}
};
#undef FLAG_INFO
#define FLAG_INFO(flag) { MALI_EXTRA_##flag, "MALI_EXTRA_" #flag }
static const struct pandecode_flag_info mfbd_extra_flag_lo_info[] = {
FLAG_INFO(ZS),
{}
};
#undef FLAG_INFO
#define FLAG_INFO(flag) { MALI_MFBD_##flag, "MALI_MFBD_" #flag }
static const struct pandecode_flag_info mfbd_flag_info [] = {
FLAG_INFO(DEPTH_WRITE),
FLAG_INFO(EXTRA),
{}
};
#undef FLAG_INFO
/* Midgard's tiler descriptor is embedded within the /* Midgard's tiler descriptor is embedded within the
* larger FBD */ * larger FBD */
@ -350,30 +280,6 @@ pandecode_midgard_tiler_descriptor(
DUMP_UNPACKED(MIDGARD_TILER_WEIGHTS, w, "Tiler Weights:\n"); DUMP_UNPACKED(MIDGARD_TILER_WEIGHTS, w, "Tiler Weights:\n");
} }
/* TODO: The Bifrost tiler is not understood at all yet */
static void
pandecode_bifrost_tiler_descriptor(const struct mali_framebuffer *fb)
{
pandecode_log(".tiler = {\n");
pandecode_indent++;
MEMORY_PROP(fb, tiler_meta);
for (int i = 0; i < 16; i++) {
if (fb->zeros[i] != 0) {
pandecode_msg("XXX: tiler descriptor zero %d tripped, value %x\n",
i, fb->zeros[i]);
}
}
pandecode_log("},\n");
pandecode_indent--;
pandecode_log("}\n");
}
/* Information about the framebuffer passed back for /* Information about the framebuffer passed back for
* additional analysis */ * additional analysis */
@ -524,126 +430,29 @@ pandecode_swizzle(unsigned swizzle, enum mali_format format)
} }
static void static void
pandecode_rt_format(struct mali_rt_format format) pandecode_render_target(uint64_t gpu_va, unsigned job_no, bool is_bifrost, unsigned gpu_id,
const struct MALI_MULTI_TARGET_FRAMEBUFFER_PARAMETERS *fb)
{ {
pandecode_log(".format = {\n"); pandecode_log("Color Render Targets:\n");
pandecode_indent++; pandecode_indent++;
pandecode_prop("unk1 = 0x%" PRIx32, format.unk1); for (int i = 0; i < (fb->render_target_count); i++) {
pandecode_prop("unk2 = 0x%" PRIx32, format.unk2); mali_ptr rt_va = gpu_va + i * MALI_RENDER_TARGET_LENGTH;
pandecode_prop("unk3 = 0x%" PRIx32, format.unk3); struct pandecode_mapped_memory *mem =
pandecode_prop("unk4 = 0x%" PRIx32, format.unk4); pandecode_find_mapped_gpu_mem_containing(rt_va);
const struct mali_render_target_packed *PANDECODE_PTR_VAR(rtp, mem, (mali_ptr) rt_va);
pandecode_prop("block = %s", mali_block_format_as_str(format.block)); DUMP_CL(RENDER_TARGET, rtp, "Color Render Target %d:\n", i);
/* TODO: Map formats so we can check swizzles and print nicely */
pandecode_log("swizzle");
pandecode_swizzle(format.swizzle, MALI_RGBA8_UNORM);
pandecode_log_cont(",\n");
pandecode_prop("nr_channels = MALI_POSITIVE(%d)",
(format.nr_channels + 1));
pandecode_log(".flags = ");
pandecode_log_decoded_flags(mfbd_fmt_flag_info, format.flags);
pandecode_log_cont(",\n");
pandecode_prop("msaa = %s", mali_msaa_as_str(format.msaa));
/* In theory, the no_preload bit can be cleared to enable MFBD preload,
* which is a faster hardware-based alternative to the wallpaper method
* to preserve framebuffer contents across frames. In practice, MFBD
* preload is buggy on Midgard, and so this is a chicken bit. If this
* bit isn't set, most likely something broke unrelated to preload */
if (!format.no_preload) {
pandecode_msg("XXX: buggy MFBD preload enabled - chicken bit should be clear\n");
pandecode_prop("no_preload = 0x%" PRIx32, format.no_preload);
} }
if (format.zero)
pandecode_prop("zero = 0x%" PRIx32, format.zero);
pandecode_indent--; pandecode_indent--;
pandecode_log("},\n"); pandecode_log("\n");
} }
static void static void
pandecode_render_target(uint64_t gpu_va, unsigned job_no, const struct mali_framebuffer *fb) pandecode_mfbd_bifrost_deps(const void *fb, int job_no)
{ {
pandecode_log("struct mali_render_target rts_list_%"PRIx64"_%d[] = {\n", gpu_va, job_no); pan_section_unpack(fb, MULTI_TARGET_FRAMEBUFFER, BIFROST_PARAMETERS, params);
pandecode_indent++;
for (int i = 0; i < (fb->rt_count_1 + 1); i++) {
mali_ptr rt_va = gpu_va + i * sizeof(struct mali_render_target);
struct pandecode_mapped_memory *mem =
pandecode_find_mapped_gpu_mem_containing(rt_va);
const struct mali_render_target *PANDECODE_PTR_VAR(rt, mem, (mali_ptr) rt_va);
pandecode_log("{\n");
pandecode_indent++;
pandecode_rt_format(rt->format);
if (rt->format.block == MALI_BLOCK_FORMAT_AFBC) {
pandecode_log(".afbc = {\n");
pandecode_indent++;
char *a = pointer_as_memory_reference(rt->afbc.metadata);
pandecode_prop("metadata = %s", a);
free(a);
pandecode_prop("stride = %d", rt->afbc.stride);
pandecode_log(".flags = ");
pandecode_log_decoded_flags(afbc_fmt_flag_info, rt->afbc.flags);
pandecode_log_cont(",\n");
pandecode_indent--;
pandecode_log("},\n");
} else if (rt->afbc.metadata || rt->afbc.stride || rt->afbc.flags) {
pandecode_msg("XXX: AFBC disabled but AFBC field set (0x%lX, 0x%x, 0x%x)\n",
rt->afbc.metadata,
rt->afbc.stride,
rt->afbc.flags);
}
MEMORY_PROP(rt, framebuffer);
pandecode_prop("framebuffer_stride = %d", rt->framebuffer_stride);
if (rt->layer_stride)
pandecode_prop("layer_stride = %d", rt->layer_stride);
if (rt->clear_color_1 | rt->clear_color_2 | rt->clear_color_3 | rt->clear_color_4) {
pandecode_prop("clear_color_1 = 0x%" PRIx32, rt->clear_color_1);
pandecode_prop("clear_color_2 = 0x%" PRIx32, rt->clear_color_2);
pandecode_prop("clear_color_3 = 0x%" PRIx32, rt->clear_color_3);
pandecode_prop("clear_color_4 = 0x%" PRIx32, rt->clear_color_4);
}
if (rt->zero1 || rt->zero2) {
pandecode_msg("XXX: render target zeros tripped\n");
pandecode_prop("zero1 = 0x%" PRIx64, rt->zero1);
pandecode_prop("zero2 = 0x%" PRIx32, rt->zero2);
}
pandecode_indent--;
pandecode_log("},\n");
}
pandecode_indent--;
pandecode_log("};\n");
}
static struct pandecode_fbd
pandecode_mfbd_bfr(uint64_t gpu_va, int job_no, bool is_fragment, bool is_compute, bool is_bifrost)
{
struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
const struct mali_framebuffer *PANDECODE_PTR_VAR(fb, mem, (mali_ptr) gpu_va);
struct pandecode_fbd info;
if (is_bifrost && fb->msaa.sample_locations) {
/* The blob stores all possible sample locations in a single buffer /* The blob stores all possible sample locations in a single buffer
* allocated on startup, and just switches the pointer when switching * allocated on startup, and just switches the pointer when switching
* MSAA state. For now, we just put the data into the cmdstream, but we * MSAA state. For now, we just put the data into the cmdstream, but we
@ -655,193 +464,82 @@ pandecode_mfbd_bfr(uint64_t gpu_va, int job_no, bool is_fragment, bool is_comput
* samples?). * samples?).
*/ */
struct pandecode_mapped_memory *smem = pandecode_find_mapped_gpu_mem_containing(fb->msaa.sample_locations); struct pandecode_mapped_memory *smem =
pandecode_find_mapped_gpu_mem_containing(params.sample_locations);
const u16 *PANDECODE_PTR_VAR(samples, smem, fb->msaa.sample_locations); const u16 *PANDECODE_PTR_VAR(samples, smem, params.sample_locations);
pandecode_log("uint16_t sample_locations_%d[] = {\n", job_no); pandecode_log("uint16_t sample_locations_%d[] = {\n", job_no);
pandecode_indent++; pandecode_indent++;
for (int i = 0; i < 32 + 16; i++) { for (int i = 0; i < 32 + 16; i++) {
pandecode_log("%d, %d,\n", samples[2 * i], samples[2 * i + 1]); pandecode_log("%d, %d,\n", samples[2 * i], samples[2 * i + 1]);
} }
pandecode_indent--; pandecode_indent--;
pandecode_log("};\n"); pandecode_log("};\n");
} }
pandecode_log("struct mali_framebuffer framebuffer_%"PRIx64"_%d = {\n", gpu_va, job_no); static struct pandecode_fbd
pandecode_mfbd_bfr(uint64_t gpu_va, int job_no, bool is_fragment, bool is_compute, bool is_bifrost, unsigned gpu_id)
{
struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
const void *PANDECODE_PTR_VAR(fb, mem, (mali_ptr) gpu_va);
pan_section_unpack(fb, MULTI_TARGET_FRAMEBUFFER, PARAMETERS, params);
struct pandecode_fbd info;
if (is_bifrost)
pandecode_mfbd_bifrost_deps(fb, job_no);
pandecode_log("Multi-Target Framebuffer:\n");
pandecode_indent++; pandecode_indent++;
if (is_bifrost) { if (is_bifrost) {
pandecode_log(".msaa = {\n"); DUMP_SECTION(MULTI_TARGET_FRAMEBUFFER, BIFROST_PARAMETERS, fb, "Bifrost Params:\n");
pandecode_indent++;
if (fb->msaa.sample_locations)
pandecode_prop("sample_locations = sample_locations_%d", job_no);
else
pandecode_msg("XXX: sample_locations missing\n");
if (fb->msaa.zero1 || fb->msaa.zero2 || fb->msaa.zero4) {
pandecode_msg("XXX: multisampling zero tripped\n");
pandecode_prop("zero1 = %" PRIx64, fb->msaa.zero1);
pandecode_prop("zero2 = %" PRIx64, fb->msaa.zero2);
pandecode_prop("zero4 = %" PRIx64, fb->msaa.zero4);
}
pandecode_indent--;
pandecode_log("},\n");
} else { } else {
struct mali_local_storage_packed ls = fb->shared_memory; DUMP_SECTION(MULTI_TARGET_FRAMEBUFFER, LOCAL_STORAGE, fb, "Local Storage:\n");
DUMP_CL(LOCAL_STORAGE, &ls, "Local Storage:\n");
} }
info.width = fb->width1 + 1; info.width = params.width;
info.height = fb->height1 + 1; info.height = params.height;
info.rt_count = fb->rt_count_1 + 1; info.rt_count = params.render_target_count;
DUMP_UNPACKED(MULTI_TARGET_FRAMEBUFFER_PARAMETERS, params, "Parameters:\n");
pandecode_prop("width1 = MALI_POSITIVE(%d)", fb->width1 + 1); if (!is_compute) {
pandecode_prop("height1 = MALI_POSITIVE(%d)", fb->height1 + 1); if (is_bifrost) {
pandecode_prop("width2 = MALI_POSITIVE(%d)", fb->width2 + 1); DUMP_SECTION(MULTI_TARGET_FRAMEBUFFER, BIFROST_TILER_POINTER, fb, "Tiler Pointer");
pandecode_prop("height2 = MALI_POSITIVE(%d)", fb->height2 + 1); } else {
const void *t = pan_section_ptr(fb, MULTI_TARGET_FRAMEBUFFER, TILER);
pandecode_prop("unk1 = 0x%x", fb->unk1); const void *w = pan_section_ptr(fb, MULTI_TARGET_FRAMEBUFFER, TILER_WEIGHTS);
pandecode_prop("unk2 = 0x%x", fb->unk2); pandecode_midgard_tiler_descriptor(t, w, params.width, params.height, is_fragment, true);
pandecode_prop("rt_count_1 = MALI_POSITIVE(%d)", fb->rt_count_1 + 1);
pandecode_prop("rt_count_2 = %d", fb->rt_count_2);
pandecode_log(".mfbd_flags = ");
pandecode_log_decoded_flags(mfbd_flag_info, fb->mfbd_flags);
pandecode_log_cont(",\n");
if (fb->clear_stencil)
pandecode_prop("clear_stencil = 0x%x", fb->clear_stencil);
if (fb->clear_depth)
pandecode_prop("clear_depth = %f", fb->clear_depth);
if (!is_compute)
if (is_bifrost)
pandecode_bifrost_tiler_descriptor(fb);
else {
const struct mali_midgard_tiler_packed t = fb->tiler;
const struct mali_midgard_tiler_weights_packed w = fb->tiler_weights;
pandecode_midgard_tiler_descriptor(&t, &w, fb->width1 + 1, fb->height1 + 1, is_fragment, true);
} }
else } else {
pandecode_msg("XXX: skipping compute MFBD, fixme\n"); pandecode_msg("XXX: skipping compute MFBD, fixme\n");
}
if (fb->zero3 || fb->zero4) { if (is_bifrost) {
pandecode_msg("XXX: framebuffer zeros tripped\n"); pan_section_unpack(fb, MULTI_TARGET_FRAMEBUFFER, BIFROST_PADDING, padding);
pandecode_prop("zero3 = 0x%" PRIx32, fb->zero3);
pandecode_prop("zero4 = 0x%" PRIx32, fb->zero4);
} }
pandecode_indent--; pandecode_indent--;
pandecode_log("};\n"); pandecode_log("\n");
gpu_va += sizeof(struct mali_framebuffer); gpu_va += MALI_MULTI_TARGET_FRAMEBUFFER_LENGTH;
info.has_extra = (fb->mfbd_flags & MALI_MFBD_EXTRA) && is_fragment; info.has_extra = params.has_zs_crc_extension;
if (info.has_extra) { if (info.has_extra) {
mem = pandecode_find_mapped_gpu_mem_containing(gpu_va); struct pandecode_mapped_memory *mem =
const struct mali_framebuffer_extra *PANDECODE_PTR_VAR(fbx, mem, (mali_ptr) gpu_va); pandecode_find_mapped_gpu_mem_containing(gpu_va);
const struct mali_zs_crc_extension_packed *PANDECODE_PTR_VAR(zs_crc, mem, (mali_ptr)gpu_va);
DUMP_CL(ZS_CRC_EXTENSION, zs_crc, "ZS CRC Extension:\n");
pandecode_log("\n");
pandecode_log("struct mali_framebuffer_extra fb_extra_%"PRIx64"_%d = {\n", gpu_va, job_no); gpu_va += MALI_ZS_CRC_EXTENSION_LENGTH;
pandecode_indent++;
MEMORY_PROP(fbx, checksum);
if (fbx->checksum_stride)
pandecode_prop("checksum_stride = %d", fbx->checksum_stride);
pandecode_log(".flags_hi = ");
pandecode_log_decoded_flags(mfbd_extra_flag_hi_info, fbx->flags_hi);
pandecode_log_cont(",\n");
pandecode_log(".flags_lo = ");
pandecode_log_decoded_flags(mfbd_extra_flag_lo_info, fbx->flags_lo);
pandecode_log_cont(",\n");
pandecode_prop("zs_block = %s", mali_block_format_as_str(fbx->zs_block));
pandecode_prop("zs_samples = MALI_POSITIVE(%u)", fbx->zs_samples + 1);
if (fbx->zs_block == MALI_BLOCK_FORMAT_AFBC) {
pandecode_log(".ds_afbc = {\n");
pandecode_indent++;
MEMORY_PROP_DIR(fbx->ds_afbc, depth_stencil_afbc_metadata);
pandecode_prop("depth_stencil_afbc_stride = %d",
fbx->ds_afbc.depth_stencil_afbc_stride);
MEMORY_PROP_DIR(fbx->ds_afbc, depth_stencil);
pandecode_log(".flags = ");
pandecode_log_decoded_flags(afbc_fmt_flag_info, fbx->ds_afbc.flags);
pandecode_log_cont(",\n");
if (fbx->ds_afbc.padding) {
pandecode_msg("XXX: Depth/stencil AFBC zeros tripped\n");
pandecode_prop("padding = 0x%" PRIx64, fbx->ds_afbc.padding);
}
pandecode_indent--;
pandecode_log("},\n");
} else {
pandecode_log(".ds_linear = {\n");
pandecode_indent++;
if (fbx->ds_linear.depth) {
MEMORY_PROP_DIR(fbx->ds_linear, depth);
pandecode_prop("depth_stride = %d",
fbx->ds_linear.depth_stride);
pandecode_prop("depth_layer_stride = %d",
fbx->ds_linear.depth_layer_stride);
} else if (fbx->ds_linear.depth_stride || fbx->ds_linear.depth_layer_stride) {
pandecode_msg("XXX: depth stride zero tripped %d %d\n", fbx->ds_linear.depth_stride, fbx->ds_linear.depth_layer_stride);
}
if (fbx->ds_linear.stencil) {
MEMORY_PROP_DIR(fbx->ds_linear, stencil);
pandecode_prop("stencil_stride = %d",
fbx->ds_linear.stencil_stride);
pandecode_prop("stencil_layer_stride = %d",
fbx->ds_linear.stencil_layer_stride);
} else if (fbx->ds_linear.stencil_stride || fbx->ds_linear.stencil_layer_stride) {
pandecode_msg("XXX: stencil stride zero tripped %d %d\n", fbx->ds_linear.stencil_stride, fbx->ds_linear.stencil_layer_stride);
}
if (fbx->ds_linear.depth_stride_zero ||
fbx->ds_linear.stencil_stride_zero) {
pandecode_msg("XXX: Depth/stencil zeros tripped\n");
pandecode_prop("depth_stride_zero = 0x%x",
fbx->ds_linear.depth_stride_zero);
pandecode_prop("stencil_stride_zero = 0x%x",
fbx->ds_linear.stencil_stride_zero);
}
pandecode_indent--;
pandecode_log("},\n");
}
if (fbx->clear_color_1 | fbx->clear_color_2) {
pandecode_prop("clear_color_1 = 0x%" PRIx32, fbx->clear_color_1);
pandecode_prop("clear_color_2 = 0x%" PRIx32, fbx->clear_color_2);
}
if (fbx->zero3) {
pandecode_msg("XXX: fb_extra zeros tripped\n");
pandecode_prop("zero3 = 0x%" PRIx64, fbx->zero3);
}
pandecode_indent--;
pandecode_log("};\n");
gpu_va += sizeof(struct mali_framebuffer_extra);
} }
if (is_fragment) if (is_fragment)
pandecode_render_target(gpu_va, job_no, fb); pandecode_render_target(gpu_va, job_no, is_bifrost, gpu_id, &params);
return info; return info;
} }
@ -1412,8 +1110,9 @@ pandecode_vertex_tiler_postfix_pre(
if (is_bifrost) if (is_bifrost)
pandecode_compute_fbd(p->shared & ~1, job_no); pandecode_compute_fbd(p->shared & ~1, job_no);
else if (p->shared & MALI_MFBD) else if (p->shared & MALI_FBD_TAG_IS_MFBD)
fbd_info = pandecode_mfbd_bfr((u64) ((uintptr_t) p->shared) & FBD_MASK, job_no, false, job_type == MALI_JOB_TYPE_COMPUTE, false); fbd_info = pandecode_mfbd_bfr((u64) ((uintptr_t) p->shared) & ~MALI_FBD_TAG_MASK,
job_no, false, job_type == MALI_JOB_TYPE_COMPUTE, is_bifrost, gpu_id);
else if (job_type == MALI_JOB_TYPE_COMPUTE) else if (job_type == MALI_JOB_TYPE_COMPUTE)
pandecode_compute_fbd((u64) (uintptr_t) p->shared, job_no); pandecode_compute_fbd((u64) (uintptr_t) p->shared, job_no);
else else
@ -1509,7 +1208,8 @@ pandecode_vertex_tiler_postfix_pre(
/* MRT blend fields are used whenever MFBD is used, with /* MRT blend fields are used whenever MFBD is used, with
* per-RT descriptors */ * per-RT descriptors */
if (job_type == MALI_JOB_TYPE_TILER && (is_bifrost || p->shared & MALI_MFBD)) { if (job_type == MALI_JOB_TYPE_TILER &&
(is_bifrost || p->shared & MALI_FBD_TAG_IS_MFBD)) {
void* blend_base = ((void *) cl) + MALI_STATE_LENGTH; void* blend_base = ((void *) cl) + MALI_STATE_LENGTH;
for (unsigned i = 0; i < fbd_info.rt_count; i++) { for (unsigned i = 0; i < fbd_info.rt_count; i++) {
@ -1706,7 +1406,7 @@ pandecode_fragment_job(const struct pandecode_mapped_memory *mem,
{ {
const struct mali_payload_fragment *PANDECODE_PTR_VAR(s, mem, payload); const struct mali_payload_fragment *PANDECODE_PTR_VAR(s, mem, payload);
bool is_mfbd = s->framebuffer & MALI_MFBD; bool is_mfbd = s->framebuffer & MALI_FBD_TAG_IS_MFBD;
if (!is_mfbd && is_bifrost) if (!is_mfbd && is_bifrost)
pandecode_msg("XXX: Bifrost fragment must use MFBD\n"); pandecode_msg("XXX: Bifrost fragment must use MFBD\n");
@ -1714,20 +1414,22 @@ pandecode_fragment_job(const struct pandecode_mapped_memory *mem,
struct pandecode_fbd info; struct pandecode_fbd info;
if (is_mfbd) if (is_mfbd)
info = pandecode_mfbd_bfr(s->framebuffer & FBD_MASK, job_no, true, false, is_bifrost); info = pandecode_mfbd_bfr(s->framebuffer & ~MALI_FBD_TAG_MASK, job_no,
true, false, is_bifrost, gpu_id);
else else
info = pandecode_sfbd(s->framebuffer & FBD_MASK, job_no, true, gpu_id); info = pandecode_sfbd(s->framebuffer & ~MALI_FBD_TAG_MASK, job_no,
true, gpu_id);
/* Compute the tag for the tagged pointer. This contains the type of /* Compute the tag for the tagged pointer. This contains the type of
* FBD (MFBD/SFBD), and in the case of an MFBD, information about which * FBD (MFBD/SFBD), and in the case of an MFBD, information about which
* additional structures follow the MFBD header (an extra payload or * additional structures follow the MFBD header (an extra payload or
* not, as well as a count of render targets) */ * not, as well as a count of render targets) */
unsigned expected_tag = is_mfbd ? MALI_MFBD : 0; unsigned expected_tag = is_mfbd ? MALI_FBD_TAG_IS_MFBD : 0;
if (is_mfbd) { if (is_mfbd) {
if (info.has_extra) if (info.has_extra)
expected_tag |= MALI_MFBD_TAG_EXTRA; expected_tag |= MALI_FBD_TAG_HAS_ZS_RT;
expected_tag |= (MALI_POSITIVE(info.rt_count) << 2); expected_tag |= (MALI_POSITIVE(info.rt_count) << 2);
} }
@ -1780,7 +1482,7 @@ pandecode_fragment_job(const struct pandecode_mapped_memory *mem,
/* The FBD is a tagged pointer */ /* The FBD is a tagged pointer */
unsigned tag = (s->framebuffer & ~FBD_MASK); unsigned tag = (s->framebuffer & MALI_FBD_TAG_MASK);
if (tag != expected_tag) if (tag != expected_tag)
pandecode_msg("XXX: expected FBD tag %X but got %X\n", expected_tag, tag); pandecode_msg("XXX: expected FBD tag %X but got %X\n", expected_tag, tag);

View file

@ -186,6 +186,14 @@
<value name="AFBC" value="3"/> <value name="AFBC" value="3"/>
</enum> </enum>
<enum name="Block Format v7">
<value name="No Write" value="0"/>
<value name="Tiled U-Interleaved" value="1"/>
<value name="Linear" value="2"/>
<value name="AFBC" value="12"/>
<value name="AFBC Tiled" value="13"/>
</enum>
<enum name="Mipmap Mode"> <enum name="Mipmap Mode">
<value name="Nearest" value="0"/> <value name="Nearest" value="0"/>
<value name="None" value="1"/> <value name="None" value="1"/>
@ -637,6 +645,37 @@
<value name="A1B5G5R5" value="29"/> <value name="A1B5G5R5" value="29"/>
</enum> </enum>
<enum name="MFBD Color Format">
<value name="RAW8" value="0"/>
<value name="RAW16" value="1"/>
<value name="RAW24" value="2"/>
<value name="RAW32" value="3"/>
<value name="RAW48" value="4"/>
<value name="RAW64" value="5"/>
<value name="RAW96" value="6"/>
<value name="RAW128" value="7"/>
<value name="RAW192" value="8"/>
<value name="RAW256" value="9"/>
<value name="RAW384" value="10"/>
<value name="RAW512" value="11"/>
<value name="RAW768" value="12"/>
<value name="RAW1024" value="13"/>
<value name="RAW1536" value="14"/>
<value name="RAW2048" value="15"/>
<value name="R8" value="16"/>
<value name="R8G8" value="17"/>
<value name="R8G8B8" value="18"/>
<value name="R8G8B8A8" value="19"/>
<value name="R4G4B4A4" value="20"/>
<value name="R5G6B5" value="21"/>
<value name="R8G8B8_FROM_R8G8B8A2" value="22"/>
<value name="R10G10B10A2" value="24"/>
<value name="A2B10G10R10" value="25"/>
<value name="R5G5B5A1" value="28"/>
<value name="A1B5G5R5" value="29"/>
<value name="NATIVE" value="31"/>
</enum>
<enum name="Downsampling Accumulation Mode"> <enum name="Downsampling Accumulation Mode">
<value name="Unsigned normalized integer" value="0"/> <value name="Unsigned normalized integer" value="0"/>
<value name="Signed normalized integer" value="1"/> <value name="Signed normalized integer" value="1"/>
@ -661,6 +700,10 @@
<value name="D32_S8X24" value="15"/> <value name="D32_S8X24" value="15"/>
</enum> </enum>
<enum name="ZS Preload Format">
<value name="D32_S8X24" value="4"/>
</enum>
<enum name="S Format"> <enum name="S Format">
<value name="S8" value="1"/> <value name="S8" value="1"/>
<value name="S8X8" value="2"/> <value name="S8X8" value="2"/>
@ -760,6 +803,206 @@
<value name="D3D 16x Grid" value="4"/> <value name="D3D 16x Grid" value="4"/>
</enum> </enum>
<enum name="Z Internal Format">
<value name="D16" value="0"/>
<value name="D24" value="1"/>
<value name="D32" value="2"/>
</enum>
<enum name="FBD Tag">
<value name="IS_MFBD" value="1"/>
<value name="HAS_ZS_RT" value="2"/>
<value name="MASK" value="63"/>
</enum>
<struct name="Multi-Target Framebuffer Parameters">
<field name="Width" size="16" start="0:0" type="uint" modifier="minus(1)"/>
<field name="Height" size="16" start="0:16" type="uint" modifier="minus(1)"/>
<field name="Bound Min X" size="16" start="1:0" type="uint"/>
<field name="Bound Min Y" size="16" start="1:16" type="uint"/>
<field name="Bound Max X" size="16" start="2:0" type="uint"/>
<field name="Bound Max Y" size="16" start="2:16" type="uint"/>
<field name="Sample Count" size="3" start="3:0" type="uint" default="1" modifier="log2"/>
<field name="Sample Pattern" size="3" start="3:3" type="Sample Pattern"/>
<field name="Tie-Break Rule" size="3" start="3:6" type="Tie-Break Rule"/>
<field name="Effective Tile Size" size="4" start="3:9" type="uint" modifier="log2"/>
<field name="X Downsampling Scale" size="3" start="3:13" type="uint"/>
<field name="Y Downsampling Scale" size="3" start="3:16" type="uint"/>
<field name="Render Target Count" size="4" start="3:19" type="uint" modifier="minus(1)"/>
<field name="Color Buffer Allocation" size="8" start="3:24" type="uint" modifier="shr(10)"/>
<field name="S Clear" size="8" start="4:0" type="uint"/>
<field name="S Write Enable" size="1" start="4:8" type="bool"/>
<field name="S Preload Enable" size="1" start="4:9" type="bool"/>
<field name="S Unload Enable" size="1" start="4:10" type="bool"/>
<field name="Z Internal Format" size="2" start="4:16" type="Z Internal Format"/>
<field name="Z Write Enable" size="1" start="4:18" type="bool"/>
<field name="Z Preload Enable" size="1" start="4:19" type="bool"/>
<field name="Z Unload Enable" size="1" start="4:20" type="bool"/>
<field name="Has ZS CRC Extension" size="1" start="4:21" type="bool"/>
<field name="CRC Read Enable" size="1" start="4:30" type="bool"/>
<field name="CRC Write Enable" size="1" start="4:31" type="bool"/>
<field name="Z Clear" size="32" start="5:0" type="float"/>
</struct>
<struct name="ZS CRC Extension">
<field name="CRC Base" size="64" start="0:0" type="address"/>
<field name="CRC Row Stride" size="32" start="2:0" type="uint"/>
<field name="ZS Write Format" size="4" start="3:0" type="ZS Format"/>
<field name="ZS Block Format" size="2" start="3:4" type="Block Format"/>
<field name="ZS Block Format v7" size="2" start="3:4" type="Block Format v7"/>
<field name="ZS MSAA" size="2" start="3:6" default="Single" type="MSAA"/>
<field name="ZS Big Endian" size="1" start="3:8" type="bool"/>
<field name="ZS Clean Pixel Write Enable" size="1" start="3:10" type="bool"/>
<field name="CRC Render Target" size="4" start="3:11" type="uint"/>
<field name="S Write Format" size="4" start="3:16" type="S Format"/>
<field name="S Block Format" size="2" start="3:20" type="Block Format"/>
<field name="S MSAA" size="2" start="3:22" default="Single" type="MSAA"/>
<field name="ZS Preload Format" size="4" start="3:28" type="ZS Preload Format"/>
<field name="ZS Writeback Base" size="64" start="4:0" type="address"/>
<field name="ZS Writeback Row Stride" size="32" start="6:0" type="uint"/>
<field name="ZS Writeback Surface Stride" size="32" start="7:0" type="uint"/>
<field name="S Writeback Base" size="64" start="8:0" type="address"/>
<field name="S Writeback Row Stride" size="32" start="10:0" type="uint"/>
<field name="S Writeback Surface Stride" size="32" start="11:0" type="uint"/>
<field name="ZS AFBC Header" size="64" start="4:0" type="address"/>
<field name="ZS AFBC Row Stride" size="13" start="6:0" type="uint"/>
<field name="ZS AFBC Chunk Size" size="12" start="7:0" type="uint"/>
<field name="ZS AFBC Sparse" size="1" start="7:16" type="bool"/>
<field name="ZS AFBC Body" size="64" start="8:0" type="address"/>
<field name="ZS AFBC Body Size" size="32" start="10:0" type="uint"/>
<field name="ZS Preload Base" size="64" start="12:0" type="address"/>
<field name="ZS Preload Row Stride" size="32" start="14:0" type="uint"/>
<field name="ZS Preload Surface Stride" size="32" start="15:0" type="uint"/>
<field name="CRC Clear Color" size="64" start="12:0" type="uint"/>
</struct>
<enum name="RT Endianness">
<value name="Little Endian" value="0"/>
<value name="Big Endian 2B" value="1"/>
<value name="Big Endian 4B" value="2"/>
<value name="Big Endian 8B" value="3"/>
</enum>
<enum name="YUV Conv K6">
<value name="0" value="0"/>
<value name="16" value="1"/>
</enum>
<enum name="YUV Conv K7 Clamp">
<value name="MINUS_128_TO_127" value="0"/>
<value name="MINUS_112_TO_111" value="1"/>
<value name="0_TO_255" value="2"/>
<value name="16_TO_239" value="3"/>
</enum>
<enum name="YUV Conv K8">
<value name="220" value="0"/>
<value name="256" value="1"/>
</enum>
<enum name="YUV Swizzle">
<value name="YUVA" value="0"/>
<value name="YVUA" value="1"/>
<value name="UYVA" value="2"/>
<value name="UVYA" value="3"/>
<value name="VUYA" value="4"/>
<value name="VYUA" value="5"/>
<value name="Y00A" value="6"/>
<value name="YXXA" value="7"/>
</enum>
<enum name="YUV Conversion Mode">
<value name="No Conversion" value="0"/>
<value name="BT 601" value="3"/>
<value name="BT 709" value="4"/>
<value name="BT 2020" value="6"/>
</enum>
<enum name="YUV Cr Siting">
<value name="Co-Sited" value="0"/>
<value name="Center Y" value="1"/>
<value name="Center X" value="2"/>
<value name="Center" value="3"/>
<value name="One Quarter" value="4"/>
<value name="Three Quarters" value="5"/>
</enum>
<struct name="Render Target">
<field name="Internal Buffer Offset" size="12" start="0:4" type="uint" modifier="shr(4)"/>
<field name="YUV Enable" size="1" start="0:24" type="bool"/>
<field name="Dithered Clear" size="1" start="0:25" type="bool"/>
<field name="Internal Format" size="6" start="0:26" type="Color Buffer Internal Format"/>
<field name="Write Enable" size="1" start="1:0" type="bool"/>
<field name="Writeback Format" size="5" start="1:3" type="MFBD Color Format"/>
<field name="Writeback Endianness" size="2" start="1:8" type="RT Endianness"/>
<field name="Writeback Block Format" size="2" start="1:10" type="Block Format"/>
<field name="Writeback Block Format v7" size="4" start="1:8" type="Block Format v7"/>
<field name="Writeback MSAA" size="2" start="1:12" type="MSAA"/>
<field name="sRGB" size="1" start="1:14" type="bool"/>
<field name="Dithering Enable" size="1" start="1:15" type="bool"/>
<field name="Swizzle" size="12" start="1:16" type="uint"/>
<field name="Writeback Sampling Mode" size="2" start="1:29" type="Downsampling Accumulation Mode"/>
<field name="Clean Pixel Write Enable" size="1" start="1:31" type="bool"/>
<field name="Preload Enable" size="1" start="2:0" type="bool"/>
<field name="Unload Enable" size="1" start="2:1" type="bool"/>
<field name="Preload Format" size="5" start="2:3" type="MFBD Color Format"/>
<field name="Preload Endianness" size="2" start="2:8" type="RT Endianness"/>
<field name="Preload Block Format" size="4" start="2:10" type="Block Format"/>
<field name="Preload MSAA" size="2" start="2:14" type="MSAA"/>
<field name="YUV Conv K5" size="8" start="2:16" type="uint"/>
<field name="YUV Swizzle" size="3" start="2:16" type="YUV Swizzle"/>
<field name="YUV Full Range" size="1" start="2:20" type="bool"/>
<field name="YUV Conversion Mode" size="4" start="2:21" type="YUV Conversion Mode"/>
<field name="YUV Cr Siting" size="3" start="2:25" type="YUV Cr Siting"/>
<field name="YUV Unsigned Cr Range" size="1" start="2:28" type="bool"/>
<field name="YUV Conv K6" size="1" start="2:24" type="YUV Conv K6"/>
<field name="YUV Conv K7 Clamp" size="2" start="2:25" type="YUV Conv K7 Clamp"/>
<field name="YUV Conv K8" size="1" start="2:27" type="YUV Conv K8"/>
<field name="YUV Conv Disable" size="1" start="2:31" type="bool"/>
<field name="YUV Conv K1" size="8" start="3:0" type="uint"/>
<field name="YUV Conv K2" size="8" start="3:8" type="uint"/>
<field name="YUV Conv K3" size="8" start="3:16" type="uint"/>
<field name="YUV Conv K4" size="8" start="3:24" type="uint"/>
<field name="YUV Plane 0 Base" size="64" start="4:0" type="address"/>
<field name="YUV Plane 1 Base" size="64" start="6:0" type="address"/>
<field name="YUV Plane 2 Base" size="64" start="8:0" type="address"/>
<field name="YUV Plane 0 Stride" size="32" start="10:0" type="uint"/>
<field name="YUV Plane 1 2 Stride" size="32" start="11:0" type="uint"/>
<field name="AFBC Header" size="64" start="4:0" type="address"/>
<field name="AFBC Row Stride" size="13" start="6:0" type="uint"/>
<field name="AFBC Chunk Size" size="12" start="7:0" type="uint"/>
<field name="AFBC Sparse" size="1" start="7:16" type="bool"/>
<field name="AFBC YUV Transform Enable" size="1" start="7:17" type="bool"/>
<field name="AFBC Split Block Enable" size="1" start="7:18" type="bool"/>
<field name="AFBC Wide Block Enable" size="1" start="7:19" type="bool"/>
<field name="AFBC Body" size="64" start="8:0" type="address"/>
<field name="AFBC Body Size" size="32" start="10:0" type="uint"/>
<field name="Writeback Base" size="64" start="8:0" type="address"/>
<field name="Writeback Row Stride" size="32" start="10:0" type="uint"/>
<field name="Writeback Surface Stride" size="32" start="11:0" type="uint"/>
<field name="Preload Base" size="64" start="12:0" type="address"/>
<field name="Preload Row Stride" size="32" start="14:0" type="uint"/>
<field name="Preload Surface Stride" size="32" start="15:0" type="uint"/>
<field name="Clear Color 0" size="32" start="12:0" type="uint"/>
<field name="Clear Color 1" size="32" start="13:0" type="uint"/>
<field name="Clear Color 2" size="32" start="14:0" type="uint"/>
<field name="Clear Color 3" size="32" start="15:0" type="uint"/>
</struct>
<enum name="Pre Post Frame Shader Mode">
<value name="Never" value="0"/>
<value name="Always" value="1"/>
<value name="Intersect" value="2"/>
</enum>
<struct name="Bifrost Framebuffer Parameters">
<field name="Pre Frame 0" size="3" start="0:0" type="Pre Post Frame Shader Mode"/>
<field name="Pre Frame 1" size="3" start="0:3" type="Pre Post Frame Shader Mode"/>
<field name="Post Frame" size="3" start="0:6" type="Pre Post Frame Shader Mode"/>
<field name="Sample Locations" size="64" start="4:0" type="address"/>
<field name="Frame Shader DCDs" size="64" start="6:0" type="address"/>
</struct>
<struct name="Bifrost Tiler Heap"> <struct name="Bifrost Tiler Heap">
<field name="Size" size="32" start="1:0" type="uint" modifier="align(4096)"/> <field name="Size" size="32" start="1:0" type="uint" modifier="align(4096)"/>
<field name="Base" size="64" start="2:0" type="address"/> <field name="Base" size="64" start="2:0" type="address"/>
@ -781,4 +1024,21 @@
<field name="Heap" size="64" start="6:0" type="address"/> <field name="Heap" size="64" start="6:0" type="address"/>
<field name="Weights" size="32" start="8:0" type="Bifrost Tiler Weights" elements="8"/> <field name="Weights" size="32" start="8:0" type="Bifrost Tiler Weights" elements="8"/>
</struct> </struct>
<struct name="Bifrost Tiler Pointer">
<field name="Address" size="64" start="0:0" type="address"/>
</struct>
<struct name="Bifrost Framebuffer Padding" size="16">
</struct>
<aggregate name="Multi-Target Framebuffer">
<section name="Local Storage" offset="0" type="Local Storage"/>
<section name="Bifrost Parameters" offset="0" type="Bifrost Framebuffer Parameters"/>
<section name="Parameters" offset="32" type="Multi-Target Framebuffer Parameters"/>
<section name="Tiler" offset="56" type="Midgard Tiler"/>
<section name="Tiler Weights" offset="96" type="Midgard Tiler Weights"/>
<section name="Bifrost Tiler Pointer" offset="56" type="Bifrost Tiler Pointer"/>
<section name="Bifrost Padding" offset="64" type="Bifrost Framebuffer Padding"/>
</aggregate>
</panxml> </panxml>

View file

@ -408,3 +408,21 @@ panfrost_format_to_bifrost_blend(const struct util_format_description *desc)
return format; return format;
} }
} }
enum mali_z_internal_format
panfrost_get_z_internal_format(enum pipe_format fmt)
{
switch (fmt) {
case PIPE_FORMAT_Z16_UNORM:
case PIPE_FORMAT_Z16_UNORM_S8_UINT:
return MALI_Z_INTERNAL_FORMAT_D16;
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
case PIPE_FORMAT_Z24X8_UNORM:
return MALI_Z_INTERNAL_FORMAT_D24;
case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
return MALI_Z_INTERNAL_FORMAT_D32;
default:
unreachable("Unsupported depth/stencil format.");
}
}

View file

@ -147,6 +147,9 @@ extern struct panfrost_format panfrost_pipe_format_table[PIPE_FORMAT_COUNT];
bool bool
panfrost_is_z24s8_variant(enum pipe_format fmt); panfrost_is_z24s8_variant(enum pipe_format fmt);
enum mali_z_internal_format
panfrost_get_z_internal_format(enum pipe_format fmt);
unsigned unsigned
panfrost_translate_swizzle_4(const unsigned char swizzle[4]); panfrost_translate_swizzle_4(const unsigned char swizzle[4]);