panfrost: Fix AFBC packing

Layout refactoring commits broke AFBC packing while removing several
fields to simplify the logic. The stride and height are now derived
when necessary at packing time based on the resource modifier. The
problem is that the code assumes that the source and destination
headers are the same although the source and destination modifiers
might differ and create size mismatches when passed to the AFBC
utilities in pan_afbc.h. The destination modifier is set as the source
modifier without the AFBC_FORMAT_MOD_SPARSE and AFBC_FORMAT_MOD_TILED
flags. While the AFBC_FORMAT_MOD_SPARSE flag doesn't have any impact
on these utilities, the AFBC_FORMAT_MOD_TILED flag does.

This commit fixes the issue by keeping the same header block layout
(linear or tiled header layout) when packing a resource. This allows
to simply parse header blocks linearly without having to bother with
the internal layout (Morton order). The tiled packed resource might
also benefit from better cache accesses.

Fixes: a2e9ce39e9 pan/layout: Drop pan_image_slice_layout::afbc::{stride_sb,nr_sblocks}
Fixes: 01d325ba63 pan/layout: Interleave header/body in AFBC(3D)
Signed-off-by: Loïc Molinari <loic.molinari@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Acked-by: Eric R. Smith <eric.smith@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35001>
This commit is contained in:
Loïc Molinari 2025-07-01 15:37:25 +02:00 committed by Marge Bot
parent 34b693914a
commit 98c8c95755
2 changed files with 25 additions and 71 deletions

View file

@ -75,27 +75,6 @@ write_afbc_header(nir_builder *b, nir_def *buf, nir_def *idx, nir_def *hdr)
nir_store_global(b, nir_iadd(b, buf, nir_u2u64(b, offset)), 16, hdr, 0xF);
}
static nir_def *
get_morton_index(nir_builder *b, nir_def *idx, nir_def *src_stride,
nir_def *dst_stride)
{
nir_def *x = nir_umod(b, idx, dst_stride);
nir_def *y = nir_udiv(b, idx, dst_stride);
nir_def *offset = nir_imul(b, nir_iand_imm(b, y, ~0x7), src_stride);
offset = nir_iadd(b, offset, nir_ishl_imm(b, nir_ushr_imm(b, x, 3), 6));
x = nir_iand_imm(b, x, 0x7);
x = nir_iand_imm(b, nir_ior(b, x, nir_ishl_imm(b, x, 2)), 0x13);
x = nir_iand_imm(b, nir_ior(b, x, nir_ishl_imm(b, x, 1)), 0x15);
y = nir_iand_imm(b, y, 0x7);
y = nir_iand_imm(b, nir_ior(b, y, nir_ishl_imm(b, y, 2)), 0x13);
y = nir_iand_imm(b, nir_ior(b, y, nir_ishl_imm(b, y, 1)), 0x15);
nir_def *tile_idx = nir_ior(b, x, nir_ishl_imm(b, y, 1));
return nir_iadd(b, offset, tile_idx);
}
static nir_def *
get_superblock_size(nir_builder *b, unsigned arch, nir_def *hdr,
nir_def *uncompressed_size)
@ -167,16 +146,15 @@ get_packed_offset(nir_builder *b, nir_def *metadata, nir_def *idx,
#define MAX_LINE_SIZE 16
static void
copy_superblock(nir_builder *b, nir_def *dst, nir_def *dst_idx, nir_def *hdr_sz,
nir_def *src, nir_def *src_idx, nir_def *metadata,
nir_def *meta_idx, unsigned align)
copy_superblock(nir_builder *b, nir_def *dst, nir_def *hdr_sz, nir_def *src,
nir_def *metadata, nir_def *idx, unsigned align)
{
nir_def *hdr = read_afbc_header(b, src, src_idx);
nir_def *hdr = read_afbc_header(b, src, idx);
nir_def *src_body_base_ptr = nir_u2u64(b, nir_channel(b, hdr, 0));
nir_def *src_bodyptr = nir_iadd(b, src, src_body_base_ptr);
nir_def *size;
nir_def *dst_offset = get_packed_offset(b, metadata, meta_idx, &size);
nir_def *dst_offset = get_packed_offset(b, metadata, idx, &size);
nir_def *dst_body_base_ptr = nir_iadd(b, dst_offset, hdr_sz);
nir_def *dst_bodyptr = nir_iadd(b, dst, dst_body_base_ptr);
@ -184,7 +162,7 @@ copy_superblock(nir_builder *b, nir_def *dst, nir_def *dst_idx, nir_def *hdr_sz,
nir_def *hdr2 =
nir_vector_insert_imm(b, hdr, nir_u2u32(b, dst_body_base_ptr), 0);
hdr = nir_bcsel(b, nir_ieq_imm(b, src_body_base_ptr, 0), hdr, hdr2);
write_afbc_header(b, dst, dst_idx, hdr);
write_afbc_header(b, dst, idx, hdr);
nir_variable *offset_var =
nir_local_variable_create(b->impl, glsl_uint_type(), "offset");
@ -256,7 +234,6 @@ panfrost_create_afbc_pack_shader(struct panfrost_screen *screen,
const struct pan_mod_convert_shader_key *key)
{
unsigned align = key->afbc.align;
bool tiled = key->mod & AFBC_FORMAT_MOD_TILED;
struct panfrost_device *dev = pan_device(&screen->base);
nir_builder b = nir_builder_init_simple_shader(
MESA_SHADER_COMPUTE, pan_shader_get_compiler_options(dev->arch),
@ -265,19 +242,14 @@ panfrost_create_afbc_pack_shader(struct panfrost_screen *screen,
panfrost_afbc_add_info_ubo(pack, b);
nir_def *coord = nir_load_global_invocation_id(&b, 32);
nir_def *src_stride = panfrost_afbc_pack_get_info_field(&b, src_stride);
nir_def *dst_stride = panfrost_afbc_pack_get_info_field(&b, dst_stride);
nir_def *dst_idx = nir_channel(&b, coord, 0);
nir_def *src_idx =
tiled ? get_morton_index(&b, dst_idx, src_stride, dst_stride) : dst_idx;
nir_def *idx = nir_channel(&b, coord, 0);
nir_def *src = panfrost_afbc_pack_get_info_field(&b, src);
nir_def *dst = panfrost_afbc_pack_get_info_field(&b, dst);
nir_def *header_size =
nir_u2u64(&b, panfrost_afbc_pack_get_info_field(&b, header_size));
nir_def *metadata = panfrost_afbc_pack_get_info_field(&b, metadata);
copy_superblock(&b, dst, dst_idx, header_size, src, src_idx, metadata,
src_idx, align);
copy_superblock(&b, dst, header_size, src, metadata, idx, align);
return b.shader;
}
@ -460,7 +432,7 @@ panfrost_get_afbc_pack_shaders(struct panfrost_context *ctx,
struct panfrost_resource *rsrc, unsigned align)
{
struct pan_mod_convert_shader_key key = {
.mod = DRM_FORMAT_MOD_ARM_AFBC(rsrc->modifier & AFBC_FORMAT_MOD_TILED),
.mod = DRM_FORMAT_MOD_ARM_AFBC(0),
.afbc = {
.bpp = util_format_get_blocksizebits(rsrc->base.format),
.align = align,

View file

@ -1464,16 +1464,6 @@ pan_dump_resource(struct panfrost_context *ctx, struct panfrost_resource *rsc)
#endif
/* Get scan-order index from (x, y) position when blocks are
* arranged in z-order in 8x8 tiles */
static unsigned
get_morton_index(unsigned x, unsigned y, unsigned stride)
{
unsigned i = ((x << 0) & 1) | ((y << 1) & 2) | ((x << 1) & 4) |
((y << 2) & 8) | ((x << 2) & 16) | ((y << 3) & 32);
return (((y & ~7) * stride) + ((x & ~7) << 3)) + i;
}
static void
panfrost_store_tiled_images(struct panfrost_transfer *transfer,
struct panfrost_resource *rsrc)
@ -2020,10 +2010,7 @@ panfrost_pack_afbc(struct panfrost_context *ctx,
assert(prsrc->base.array_size == 1);
uint64_t src_modifier = prsrc->modifier;
uint64_t dst_modifier =
src_modifier & ~(AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SPARSE);
bool is_tiled = src_modifier & AFBC_FORMAT_MOD_TILED;
uint64_t modifier = prsrc->modifier & ~AFBC_FORMAT_MOD_SPARSE;
unsigned last_level = prsrc->base.last_level;
struct pan_image_slice_layout slice_infos[PIPE_MAX_TEXTURE_LEVELS] = {0};
unsigned total_size = 0;
@ -2049,14 +2036,13 @@ panfrost_pack_afbc(struct panfrost_context *ctx,
struct pan_image_slice_layout *src_slice =
&prsrc->plane.layout.slices[level];
struct pan_image_slice_layout *dst_slice = &slice_infos[level];
unsigned src_stride = pan_afbc_stride_blocks(
src_modifier, src_slice->afbc.header.row_stride_B);
unsigned src_nr_sblocks =
src_stride *
unsigned nr_sblocks_total =
pan_afbc_stride_blocks(
modifier, src_slice->afbc.header.row_stride_B) *
pan_afbc_height_blocks(
src_modifier, u_minify(prsrc->image.props.extent_px.height, level));
modifier, u_minify(prsrc->image.props.extent_px.height, level));
uint32_t body_offset_B = pan_afbc_body_offset(
dev->arch, dst_modifier, src_slice->afbc.header.surface_size_B);
dev->arch, modifier, src_slice->afbc.header.surface_size_B);
uint32_t offset = 0;
struct pan_afbc_block_info *meta =
metadata_bo->ptr.cpu + metadata_offsets[level];
@ -2069,28 +2055,24 @@ panfrost_pack_afbc(struct panfrost_context *ctx,
* at most 8 kB can be copied at each iteration (smaller values tend to
* increase latency). */
alignas(16) struct pan_afbc_block_info meta_chunk[64 * 16];
unsigned nr_blocks_per_chunk = ARRAY_SIZE(meta_chunk);
unsigned nr_sblocks_per_chunk = ARRAY_SIZE(meta_chunk);
for (unsigned i = 0; i < src_nr_sblocks; i += nr_blocks_per_chunk) {
unsigned nr_sblocks = MIN2(nr_blocks_per_chunk, src_nr_sblocks - i);
for (unsigned i = 0; i < nr_sblocks_total; i += nr_sblocks_per_chunk) {
unsigned nr_sblocks =
MIN2(nr_sblocks_per_chunk, nr_sblocks_total - i);
util_streaming_load_memcpy(
meta_chunk, &meta[i],
nr_sblocks * sizeof(struct pan_afbc_block_info));
for (unsigned j = 0; j < nr_sblocks; j++) {
unsigned idx = j;
if (is_tiled) {
idx &= ~63;
idx += get_morton_index(j & 7, (j & 63) >> 3, src_stride);
}
meta[i + idx].offset = offset;
offset += meta_chunk[idx].size;
meta[i + j].offset = offset;
offset += meta_chunk[j].size;
}
}
total_size =
ALIGN_POT(total_size, pan_afbc_header_align(dev->arch, dst_modifier));
ALIGN_POT(total_size, pan_afbc_header_align(dev->arch, modifier));
{
/* Header layout is exactly the same, only the body is shrunk. */
dst_slice->afbc.header = src_slice->afbc.header;
@ -2125,7 +2107,7 @@ panfrost_pack_afbc(struct panfrost_context *ctx,
}
char *new_label =
panfrost_resource_new_label(prsrc, dst_modifier, old_user_label);
panfrost_resource_new_label(prsrc, modifier, old_user_label);
struct panfrost_bo *dst =
panfrost_bo_create(dev, new_size, 0, new_label);
@ -2156,9 +2138,9 @@ panfrost_pack_afbc(struct panfrost_context *ctx,
free(old_label);
}
assert(!panfrost_is_emulated_mod(dst_modifier));
prsrc->image.props.modifier = dst_modifier;
prsrc->modifier = dst_modifier;
assert(!panfrost_is_emulated_mod(modifier));
prsrc->image.props.modifier = modifier;
prsrc->modifier = modifier;
panfrost_bo_unreference(prsrc->bo);
prsrc->bo = dst;
prsrc->plane.base = dst->ptr.gpu;