pan/layout: Interleave header/body in AFBC(3D)

This allows us to get rid on an annoying limitation on AFBC(3D), and
makes things a lot easier to reason about.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Tested-by: Mary Guillemard <mary.guillemard@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35731>
This commit is contained in:
Boris Brezillon 2025-06-24 10:14:22 +02:00 committed by Marge Bot
parent 13e6d2ba00
commit 01d325ba63
9 changed files with 98 additions and 138 deletions

View file

@ -3640,6 +3640,7 @@ panfrost_afbc_pack(struct panfrost_batch *batch, struct panfrost_resource *src,
{
MESA_TRACE_FUNC();
struct panfrost_device *dev = pan_device(src->base.screen);
struct pan_image_slice_layout *src_slice = &src->plane.layout.slices[level];
unsigned src_stride_sb = pan_afbc_stride_blocks(
src->image.props.modifier, src_slice->afbc.header.row_stride_B);
@ -3653,7 +3654,9 @@ panfrost_afbc_pack(struct panfrost_batch *batch, struct panfrost_resource *src,
.src = src->plane.base + src_slice->offset_B,
.dst = dst->ptr.gpu + dst_slice->offset_B,
.metadata = metadata->ptr.gpu + metadata_offset_B,
.header_size = dst_slice->afbc.header.size_B,
.header_size =
pan_afbc_body_offset(dev->arch, src->image.props.modifier,
src_slice->afbc.header.surface_size_B),
.src_stride = src_stride_sb,
.dst_stride = dst_stride_sb,
};

View file

@ -834,22 +834,21 @@ panfrost_resource_init_afbc_headers(struct panfrost_resource *pres)
if (panfrost_bo_mmap(pres->bo))
return -1;
unsigned nr_samples = MAX2(pres->base.nr_samples, 1);
for (unsigned i = 0; i < pres->base.array_size; ++i) {
for (unsigned l = 0; l <= pres->base.last_level; ++l) {
struct pan_image_slice_layout *slice = &pres->plane.layout.slices[l];
unsigned z_slice_count = u_minify(pres->base.depth0, l);
for (unsigned s = 0; s < nr_samples; ++s) {
void *ptr =
pres->bo->ptr.cpu + (i * pres->plane.layout.array_stride_B) +
slice->offset_B + (s * slice->afbc.header.surface_stride_B);
for (unsigned z = 0; z < z_slice_count; ++z) {
void *ptr = pres->bo->ptr.cpu +
(i * pres->plane.layout.array_stride_B) +
slice->offset_B + (z * slice->afbc.surface_stride_B);
/* Zero-ed AFBC headers seem to encode a plain
* black. Let's use this pattern to keep the
* initialization simple.
*/
memset(ptr, 0, slice->afbc.header.size_B);
memset(ptr, 0, slice->afbc.header.surface_size_B);
}
}
}
@ -2038,6 +2037,8 @@ panfrost_pack_afbc(struct panfrost_context *ctx,
src_stride *
pan_afbc_height_blocks(
src_modifier, u_minify(prsrc->image.props.extent_px.height, level));
uint32_t body_offset_B = pan_afbc_body_offset(
dev->arch, dst_modifier, src_slice->afbc.header.surface_size_B);
uint32_t offset = 0;
struct pan_afbc_block_info *meta =
metadata_bo->ptr.cpu + metadata_offsets[level];
@ -2073,25 +2074,11 @@ panfrost_pack_afbc(struct panfrost_context *ctx,
total_size =
ALIGN_POT(total_size, pan_afbc_header_align(dev->arch, dst_modifier));
{
unsigned width = u_minify(prsrc->base.width0, level);
unsigned height = u_minify(prsrc->base.height0, level);
unsigned dst_stride =
DIV_ROUND_UP(width, pan_afbc_superblock_width(dst_modifier));
unsigned dst_height =
DIV_ROUND_UP(height, pan_afbc_superblock_height(dst_modifier));
dst_slice->afbc.header.surface_stride_B =
dst_stride * dst_height * AFBC_HEADER_BYTES_PER_TILE;
dst_slice->afbc.header.size_B =
ALIGN_POT(dst_slice->afbc.header.surface_stride_B,
pan_afbc_body_align(dev->arch, dst_modifier));
dst_slice->afbc.body.surface_stride_B = offset;
dst_slice->afbc.body.size_B = offset;
/* Header layout is exactly the same, only the body is shrunk. */
dst_slice->afbc.header = src_slice->afbc.header;
dst_slice->afbc.surface_stride_B = body_offset_B + offset;
dst_slice->size_B = dst_slice->afbc.surface_stride_B;
dst_slice->offset_B = total_size;
dst_slice->afbc.header.row_stride_B = dst_stride * AFBC_HEADER_BYTES_PER_TILE;
dst_slice->afbc.header.surface_stride_B = dst_slice->afbc.header.surface_stride_B;
dst_slice->size_B = dst_slice->afbc.header.size_B + dst_slice->afbc.body.size_B;
/* We can't write to AFBC-packed resource, so there is no reason to
* keep CRC data around */

View file

@ -251,6 +251,13 @@ pan_afbc_body_align(unsigned arch, uint64_t modifier)
return pan_afbc_header_align(arch, modifier);
}
/* Get the body offset for a given AFBC header size. */
static inline uint32_t
pan_afbc_body_offset(unsigned arch, uint64_t modifier, uint32_t header_size)
{
return ALIGN_POT(header_size, pan_afbc_body_align(arch, modifier));
}
/*
* Determine the tile size used by AFBC. This tiles superblocks themselves.
* Current GPUs support either 8x8 tiling or no tiling (1x1)

View file

@ -219,18 +219,14 @@ get_afbc_att_mem_props(struct pan_image_plane_ref pref, unsigned mip_level,
const struct pan_image_plane *plane = image->planes[pref.plane_idx];
const struct pan_image_slice_layout *slayout =
&plane->layout.slices[mip_level];
const uint64_t stride_B = image->props.dim == MALI_TEXTURE_DIMENSION_3D
? slayout->afbc.surface_stride_B
: plane->layout.array_stride_B;
*row_stride = slayout->afbc.header.row_stride_B;
*body_offset = slayout->afbc.header.size_B;
*header = plane->base + slayout->offset_B;
if (image->props.dim == MALI_TEXTURE_DIMENSION_3D) {
*header += (layer_or_z_slice * slayout->afbc.header.surface_stride_B);
*body_offset +=
(layer_or_z_slice * slayout->afbc.body.surface_stride_B) -
(layer_or_z_slice * slayout->afbc.header.surface_stride_B);
} else {
*header += (layer_or_z_slice * plane->layout.array_stride_B);
}
*body_offset = pan_afbc_body_offset(PAN_ARCH, image->props.modifier,
slayout->afbc.header.surface_size_B);
*header = plane->base + slayout->offset_B + (stride_B * layer_or_z_slice);
}
void
@ -700,7 +696,10 @@ GENX(pan_emit_afbc_color_attachment)(const struct pan_fb_info *fb,
const struct pan_image_slice_layout *slayout =
&plane->layout.slices[mip_level];
cfg.afbc.body_size = slayout->afbc.body.surface_stride_B;
cfg.afbc.body_size =
slayout->afbc.surface_stride_B -
pan_afbc_body_offset(PAN_ARCH, image->props.modifier,
slayout->afbc.header.surface_size_B);
cfg.afbc.chunk_size = 9;
cfg.afbc.sparse = true;
#endif

View file

@ -30,28 +30,19 @@ struct pan_afbc_image_slice_layout {
/* Number of bytes between two rows of AFBC headers. */
uint32_t row_stride_B;
/* For 3D textures, this is the stride in bytes between AFBC headers of two
* consecutive Z slices. For 2D textures, the utile AFBC header size
* (header_size_B without the padding).
/* For 3D textures, this is the size in bytes of AFBC headers covering
* a single Z slice. For 2D this is the total header size. This size is
* the utile header size, it doesn't count the padding needed to meet the
* body alignment constraints. Pass this to pan_afbc_body_offset() to get
* the body offset.
*/
uint32_t surface_stride_B;
/* AFBC header size. This contains padding to meet AFBC header alignment
* constraints, meaning it can't be used to determine the number of AFBC
* tiles in the image slice.
*/
uint32_t size_B;
uint32_t surface_size_B;
} header;
struct {
/* For 3D textures, this is the stride in bytes between AFBC data of two
* consecutive Z slices. For 2D textures, this is the same as body_size_B.
*/
uint32_t surface_stride_B;
/* Size of the AFBC body. */
uint32_t size_B;
} body;
/* For 3D textures, this is the stride in bytes between AFBC headers of two
* consecutive Z slices. For 2D, this is the total size of the 2D level.
*/
uint32_t surface_stride_B;
};
struct pan_tiled_or_linear_image_slice_layout {

View file

@ -150,37 +150,21 @@ pan_mod_afbc_init_slice_layout(
const unsigned surface_stride_sb =
row_stride_sb * (aligned_extent_px.height / afbc_tile_extent_px.height);
uint64_t hdr_surface_stride_B = (uint64_t)surface_stride_sb *
AFBC_HEADER_BYTES_PER_TILE;
hdr_surface_stride_B =
ALIGN_POT(hdr_surface_stride_B, (uint64_t)(offset_align_mask + 1));
uint64_t hdr_surf_size_B =
(uint64_t)surface_stride_sb * AFBC_HEADER_BYTES_PER_TILE;
uint64_t body_offset_B =
pan_afbc_body_offset(PAN_ARCH, props->modifier, hdr_surf_size_B);
uint64_t surf_stride_B =
body_offset_B + ((uint64_t)surface_stride_sb * afbc_tile_payload_size_B);
slayout->afbc.header.surface_stride_B = hdr_surface_stride_B;
slayout->afbc.header.surface_size_B = hdr_surf_size_B;
slayout->afbc.surface_stride_B = surf_stride_B;
slayout->size_B = surf_stride_B * mip_extent_px.depth;
uint64_t header_size_B = hdr_surface_stride_B * aligned_extent_px.depth;
header_size_B = ALIGN_POT(
header_size_B, (uint64_t)pan_afbc_body_align(PAN_ARCH, props->modifier));
slayout->afbc.header.size_B = header_size_B;
uint64_t body_surf_stride_B =
(uint64_t)surface_stride_sb * afbc_tile_payload_size_B;
uint64_t body_size_B = body_surf_stride_B * aligned_extent_px.depth;
/* Each AFBC header encodes the offset to its AFBC data in a 32-bit field.
* AFBC headers of all 3D slices are placed at the beginning, meaning the
* maximum offset that exists is between the last header, and the last
* tile. */
ASSERTED uint64_t max_body_offset = body_size_B - afbc_tile_payload_size_B +
header_size_B -
AFBC_HEADER_BYTES_PER_TILE;
if (max_body_offset > UINT32_MAX)
if (hdr_surf_size_B > UINT32_MAX || surf_stride_B > UINT32_MAX ||
slayout->size_B > UINT32_MAX)
return false;
slayout->afbc.body.surface_stride_B = body_surf_stride_B;
slayout->afbc.body.size_B = body_size_B;
slayout->size_B = header_size_B + body_size_B;
return true;
}

View file

@ -613,9 +613,7 @@ get_afbc_plane_props(const struct pan_image_view *iview, int plane_idx,
*header_pointer = plane->base + slayout->offset_B;
*header_row_stride = slayout->afbc.header.row_stride_B;
/* Header/body of 3D resources are not interleaved, so the header slice
* size and header slice stride are the same thing. */
*header_slice_size = slayout->afbc.header.surface_stride_B;
*header_slice_size = slayout->afbc.header.surface_size_B;
*header_slice_stride = 0;
*size = slayout->size_B;
@ -623,20 +621,15 @@ get_afbc_plane_props(const struct pan_image_view *iview, int plane_idx,
assert(pref.image->props.dim == MALI_TEXTURE_DIMENSION_3D);
assert(layer_or_z_slice == 0);
*header_slice_stride = slayout->afbc.header.surface_stride_B;
*header_slice_stride = slayout->afbc.surface_stride_B;
} else if (pref.image->props.dim == MALI_TEXTURE_DIMENSION_3D) {
assert(iview->dim == MALI_TEXTURE_DIMENSION_2D);
/* When viewing 3D image as 2D-array, each plane describes a single Z
* slice. The header pointer is moved to the right slice, and the size is
* set to a single slice. */
*header_pointer +=
layer_or_z_slice * slayout->afbc.header.surface_stride_B;
*header_slice_stride = slayout->afbc.header.surface_stride_B;
/* Skip headers and bodies that fall outside the Z slice being
* addressed. */
*size = (slayout->afbc.header.size_B -
(layer_or_z_slice * slayout->afbc.header.surface_stride_B)) +
(slayout->afbc.body.surface_stride_B * (layer_or_z_slice + 1));
*header_pointer += layer_or_z_slice * slayout->afbc.surface_stride_B;
*header_slice_stride = slayout->afbc.surface_stride_B;
*size = slayout->afbc.surface_stride_B;
} else {
*header_pointer += layer_or_z_slice * plane->layout.array_stride_B;
}
@ -866,7 +859,11 @@ get_afbc_surface_props(const struct pan_image_view *iview,
const struct pan_image_plane *plane = pref.image->planes[pref.plane_idx];
const struct pan_image_slice_layout *slayout =
&plane->layout.slices[mip_level];
uint64_t plane_header_addr = plane->base + slayout->offset_B;
uint64_t stride_B = pref.image->props.dim == MALI_TEXTURE_DIMENSION_3D
? slayout->afbc.surface_stride_B
: plane->layout.array_stride_B;
uint64_t plane_header_addr =
plane->base + slayout->offset_B + (stride_B * layer_or_z_slice);
unsigned tag = 0;
#if PAN_ARCH >= 5
@ -876,17 +873,10 @@ get_afbc_surface_props(const struct pan_image_view *iview,
assert(sample == 0);
if (pref.image->props.dim == MALI_TEXTURE_DIMENSION_3D) {
plane_header_addr +=
layer_or_z_slice * slayout->afbc.header.surface_stride_B;
*surf_stride = slayout->afbc.header.surface_stride_B;
} else {
plane_header_addr += layer_or_z_slice * plane->layout.array_stride_B;
/* Surface stride is used to do a bound check, and must cover the header
* and payload sections. */
*surf_stride = slayout->afbc.header.size_B + slayout->afbc.body.size_B;
}
/* On 2D views, surface stride is used to do a bound check, so we can't set
* it to zero.
*/
*surf_stride = slayout->afbc.surface_stride_B;
*pointer = plane_header_addr | tag;
*row_stride = slayout->afbc.header.row_stride_B;
}

View file

@ -381,10 +381,12 @@ TEST(AFBCLayout, Linear3D)
ASSERT_TRUE(layout_init(0, &p, 0, NULL, &l));
/* AFBC Surface stride is bytes between consecutive surface headers, which is
* the header size since this is a 3D texture. At superblock size 16x16, the
* 8x32 layer has 1x2 superblocks, so the header size is 2 * 16 = 32 bytes,
* rounded up to cache line 64.
/* AFBC Surface size is the size of headers for a single surface. At superblock
* size 16x16, the 8x32 layer has 1x2 superblocks, so the header size is 2 *
* 16 = 32 bytes. Body offset needs to be aligned on 64 bytes on v6-.
* Header/body sections of a 3D image are interleaved, so the surface stride is
* is the header size, aligned to meet body offset alignment constraints, plus
* the body of a single surface.
*
* There is only 1 superblock per row, so the row stride is the bytes per 1
* header block = 16.
@ -396,11 +398,9 @@ TEST(AFBCLayout, Linear3D)
*/
EXPECT_EQ(l.slices[0].offset_B, 0);
EXPECT_EQ(l.slices[0].afbc.header.row_stride_B, 16);
EXPECT_EQ(l.slices[0].afbc.header.surface_stride_B, 64);
EXPECT_EQ(l.slices[0].afbc.header.size_B, 64 * 16);
EXPECT_EQ(l.slices[0].afbc.body.surface_stride_B, 2048);
EXPECT_EQ(l.slices[0].afbc.body.size_B, 2048 * 16);
EXPECT_EQ(l.slices[0].size_B, 2048 * 16 + 64 * 16);
EXPECT_EQ(l.slices[0].afbc.header.surface_size_B, 32);
EXPECT_EQ(l.slices[0].afbc.surface_stride_B, 64 + 2048);
EXPECT_EQ(l.slices[0].size_B, (64 + 2048) * 16);
}
TEST(AFBCLayout, Tiled16x16)
@ -439,10 +439,8 @@ TEST(AFBCLayout, Tiled16x16)
*/
EXPECT_EQ(l.slices[0].offset_B, 0);
EXPECT_EQ(l.slices[0].afbc.header.row_stride_B, 8192);
EXPECT_EQ(l.slices[0].afbc.header.surface_stride_B, 32768);
EXPECT_EQ(l.slices[0].afbc.header.size_B, 32768);
EXPECT_EQ(l.slices[0].afbc.body.surface_stride_B, 2097152);
EXPECT_EQ(l.slices[0].afbc.body.size_B, 2097152);
EXPECT_EQ(l.slices[0].afbc.header.surface_size_B, 32768);
EXPECT_EQ(l.slices[0].afbc.surface_stride_B, 2129920);
EXPECT_EQ(l.slices[0].size_B, 2129920);
}
@ -470,10 +468,8 @@ TEST(AFBCLayout, Linear16x16Minimal)
/* Image is 1x1 to test for correct alignment everywhere. */
EXPECT_EQ(l.slices[0].offset_B, 0);
EXPECT_EQ(l.slices[0].afbc.header.row_stride_B, 16);
EXPECT_EQ(l.slices[0].afbc.header.surface_stride_B, 64);
EXPECT_EQ(l.slices[0].afbc.header.size_B, 64);
EXPECT_EQ(l.slices[0].afbc.body.surface_stride_B, 32 * 8);
EXPECT_EQ(l.slices[0].afbc.body.size_B, 32 * 8);
EXPECT_EQ(l.slices[0].afbc.header.surface_size_B, 16);
EXPECT_EQ(l.slices[0].afbc.surface_stride_B, 64 + (32 * 8));
EXPECT_EQ(l.slices[0].size_B, 64 + (32 * 8));
}
@ -501,10 +497,8 @@ TEST(AFBCLayout, Linear16x16Minimalv6)
/* Image is 1x1 to test for correct alignment everywhere. */
EXPECT_EQ(l.slices[0].offset_B, 0);
EXPECT_EQ(l.slices[0].afbc.header.row_stride_B, 16);
EXPECT_EQ(l.slices[0].afbc.header.surface_stride_B, 128);
EXPECT_EQ(l.slices[0].afbc.header.size_B, 128);
EXPECT_EQ(l.slices[0].afbc.body.surface_stride_B, 32 * 8);
EXPECT_EQ(l.slices[0].afbc.body.size_B, 32 * 8);
EXPECT_EQ(l.slices[0].afbc.header.surface_size_B, 16);
EXPECT_EQ(l.slices[0].afbc.surface_stride_B, 128 + (32 * 8));
EXPECT_EQ(l.slices[0].size_B, 128 + (32 * 8));
}
@ -533,10 +527,8 @@ TEST(AFBCLayout, Tiled16x16Minimal)
/* Image is 1x1 to test for correct alignment everywhere. */
EXPECT_EQ(l.slices[0].offset_B, 0);
EXPECT_EQ(l.slices[0].afbc.header.row_stride_B, 16 * 8 * 8);
EXPECT_EQ(l.slices[0].afbc.header.surface_stride_B, 4096);
EXPECT_EQ(l.slices[0].afbc.header.size_B, 4096);
EXPECT_EQ(l.slices[0].afbc.body.surface_stride_B, 32 * 8 * 8 * 8);
EXPECT_EQ(l.slices[0].afbc.body.size_B, 32 * 8 * 8 * 8);
EXPECT_EQ(l.slices[0].afbc.header.surface_size_B, 16 * 8 * 8);
EXPECT_EQ(l.slices[0].afbc.surface_stride_B, 4096 + (32 * 8 * 8 * 8));
EXPECT_EQ(l.slices[0].size_B, 4096 + (32 * 8 * 8 * 8));
}

View file

@ -407,11 +407,18 @@ panvk_image_plane_bind(struct panvk_device *dev,
layer++) {
for (unsigned level = 0; level < plane->image.props.nr_slices;
level++) {
void *header = bo_base + offset +
(layer * plane->plane.layout.array_stride_B) +
plane->plane.layout.slices[level].offset_B;
memset(header, 0,
plane->plane.layout.slices[level].afbc.header.size_B);
const struct pan_image_slice_layout *slayout =
&plane->plane.layout.slices[level];
uint32_t z_slice_count =
u_minify(plane->image.props.extent_px.depth, level);
for (unsigned z = 0; z < z_slice_count; z++) {
void *header = bo_base + offset +
((uint64_t)slayout->afbc.surface_stride_B * z) +
(layer * plane->plane.layout.array_stride_B) +
plane->plane.layout.slices[level].offset_B;
memset(header, 0, slayout->afbc.header.surface_size_B);
}
}
}
@ -502,7 +509,7 @@ get_image_subresource_layout(const struct panvk_image *image,
layout->rowPitch = pan_afbc_stride_blocks(
image->vk.drm_format_mod, slice_layout->afbc.header.row_stride_B);
layout->depthPitch = pan_afbc_stride_blocks(
image->vk.drm_format_mod, slice_layout->afbc.header.surface_stride_B);
image->vk.drm_format_mod, slice_layout->afbc.header.surface_size_B);
} else {
layout->rowPitch = slice_layout->tiled_or_linear.row_stride_B;
layout->depthPitch = slice_layout->tiled_or_linear.surface_stride_B;