From 98c8c957559942c77f404e1b6a8ba41c65b6619c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Molinari?= Date: Tue, 1 Jul 2025 15:37:25 +0200 Subject: [PATCH] panfrost: Fix AFBC packing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Layout refactoring commits broke AFBC packing while removing several fields to simplify the logic. The stride and height are now derived when necessary at packing time based on the resource modifier. The problem is that the code assumes that the source and destination headers are the same although the source and destination modifiers might differ and create size mismatches when passed to the AFBC utilities in pan_afbc.h. The destination modifier is set as the source modifier without the AFBC_FORMAT_MOD_SPARSE and AFBC_FORMAT_MOD_TILED flags. While the AFBC_FORMAT_MOD_SPARSE flag doesn't have any impact on these utilities, the AFBC_FORMAT_MOD_TILED flag does. This commit fixes the issue by keeping the same header block layout (linear or tiled header layout) when packing a resource. This allows to simply parse header blocks linearly without having to bother with the internal layout (Morton order). The tiled packed resource might also benefit from better cache accesses. Fixes: a2e9ce39e9c pan/layout: Drop pan_image_slice_layout::afbc::{stride_sb,nr_sblocks} Fixes: 01d325ba633 pan/layout: Interleave header/body in AFBC(3D) Signed-off-by: Loïc Molinari Reviewed-by: Boris Brezillon Reviewed-by: Mary Guillemard Acked-by: Eric R. Smith Part-of: --- .../drivers/panfrost/pan_mod_conv_cso.c | 44 +++------------- src/gallium/drivers/panfrost/pan_resource.c | 52 ++++++------------- 2 files changed, 25 insertions(+), 71 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_mod_conv_cso.c b/src/gallium/drivers/panfrost/pan_mod_conv_cso.c index e996ccaec66..68ce77120f9 100644 --- a/src/gallium/drivers/panfrost/pan_mod_conv_cso.c +++ b/src/gallium/drivers/panfrost/pan_mod_conv_cso.c @@ -75,27 +75,6 @@ write_afbc_header(nir_builder *b, nir_def *buf, nir_def *idx, nir_def *hdr) nir_store_global(b, nir_iadd(b, buf, nir_u2u64(b, offset)), 16, hdr, 0xF); } -static nir_def * -get_morton_index(nir_builder *b, nir_def *idx, nir_def *src_stride, - nir_def *dst_stride) -{ - nir_def *x = nir_umod(b, idx, dst_stride); - nir_def *y = nir_udiv(b, idx, dst_stride); - - nir_def *offset = nir_imul(b, nir_iand_imm(b, y, ~0x7), src_stride); - offset = nir_iadd(b, offset, nir_ishl_imm(b, nir_ushr_imm(b, x, 3), 6)); - - x = nir_iand_imm(b, x, 0x7); - x = nir_iand_imm(b, nir_ior(b, x, nir_ishl_imm(b, x, 2)), 0x13); - x = nir_iand_imm(b, nir_ior(b, x, nir_ishl_imm(b, x, 1)), 0x15); - y = nir_iand_imm(b, y, 0x7); - y = nir_iand_imm(b, nir_ior(b, y, nir_ishl_imm(b, y, 2)), 0x13); - y = nir_iand_imm(b, nir_ior(b, y, nir_ishl_imm(b, y, 1)), 0x15); - nir_def *tile_idx = nir_ior(b, x, nir_ishl_imm(b, y, 1)); - - return nir_iadd(b, offset, tile_idx); -} - static nir_def * get_superblock_size(nir_builder *b, unsigned arch, nir_def *hdr, nir_def *uncompressed_size) @@ -167,16 +146,15 @@ get_packed_offset(nir_builder *b, nir_def *metadata, nir_def *idx, #define MAX_LINE_SIZE 16 static void -copy_superblock(nir_builder *b, nir_def *dst, nir_def *dst_idx, nir_def *hdr_sz, - nir_def *src, nir_def *src_idx, nir_def *metadata, - nir_def *meta_idx, unsigned align) +copy_superblock(nir_builder *b, nir_def *dst, nir_def *hdr_sz, nir_def *src, + nir_def *metadata, nir_def *idx, unsigned align) { - nir_def *hdr = read_afbc_header(b, src, src_idx); + nir_def *hdr = read_afbc_header(b, src, idx); nir_def *src_body_base_ptr = nir_u2u64(b, nir_channel(b, hdr, 0)); nir_def *src_bodyptr = nir_iadd(b, src, src_body_base_ptr); nir_def *size; - nir_def *dst_offset = get_packed_offset(b, metadata, meta_idx, &size); + nir_def *dst_offset = get_packed_offset(b, metadata, idx, &size); nir_def *dst_body_base_ptr = nir_iadd(b, dst_offset, hdr_sz); nir_def *dst_bodyptr = nir_iadd(b, dst, dst_body_base_ptr); @@ -184,7 +162,7 @@ copy_superblock(nir_builder *b, nir_def *dst, nir_def *dst_idx, nir_def *hdr_sz, nir_def *hdr2 = nir_vector_insert_imm(b, hdr, nir_u2u32(b, dst_body_base_ptr), 0); hdr = nir_bcsel(b, nir_ieq_imm(b, src_body_base_ptr, 0), hdr, hdr2); - write_afbc_header(b, dst, dst_idx, hdr); + write_afbc_header(b, dst, idx, hdr); nir_variable *offset_var = nir_local_variable_create(b->impl, glsl_uint_type(), "offset"); @@ -256,7 +234,6 @@ panfrost_create_afbc_pack_shader(struct panfrost_screen *screen, const struct pan_mod_convert_shader_key *key) { unsigned align = key->afbc.align; - bool tiled = key->mod & AFBC_FORMAT_MOD_TILED; struct panfrost_device *dev = pan_device(&screen->base); nir_builder b = nir_builder_init_simple_shader( MESA_SHADER_COMPUTE, pan_shader_get_compiler_options(dev->arch), @@ -265,19 +242,14 @@ panfrost_create_afbc_pack_shader(struct panfrost_screen *screen, panfrost_afbc_add_info_ubo(pack, b); nir_def *coord = nir_load_global_invocation_id(&b, 32); - nir_def *src_stride = panfrost_afbc_pack_get_info_field(&b, src_stride); - nir_def *dst_stride = panfrost_afbc_pack_get_info_field(&b, dst_stride); - nir_def *dst_idx = nir_channel(&b, coord, 0); - nir_def *src_idx = - tiled ? get_morton_index(&b, dst_idx, src_stride, dst_stride) : dst_idx; + nir_def *idx = nir_channel(&b, coord, 0); nir_def *src = panfrost_afbc_pack_get_info_field(&b, src); nir_def *dst = panfrost_afbc_pack_get_info_field(&b, dst); nir_def *header_size = nir_u2u64(&b, panfrost_afbc_pack_get_info_field(&b, header_size)); nir_def *metadata = panfrost_afbc_pack_get_info_field(&b, metadata); - copy_superblock(&b, dst, dst_idx, header_size, src, src_idx, metadata, - src_idx, align); + copy_superblock(&b, dst, header_size, src, metadata, idx, align); return b.shader; } @@ -460,7 +432,7 @@ panfrost_get_afbc_pack_shaders(struct panfrost_context *ctx, struct panfrost_resource *rsrc, unsigned align) { struct pan_mod_convert_shader_key key = { - .mod = DRM_FORMAT_MOD_ARM_AFBC(rsrc->modifier & AFBC_FORMAT_MOD_TILED), + .mod = DRM_FORMAT_MOD_ARM_AFBC(0), .afbc = { .bpp = util_format_get_blocksizebits(rsrc->base.format), .align = align, diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c index 792bec7c625..1562850269c 100644 --- a/src/gallium/drivers/panfrost/pan_resource.c +++ b/src/gallium/drivers/panfrost/pan_resource.c @@ -1464,16 +1464,6 @@ pan_dump_resource(struct panfrost_context *ctx, struct panfrost_resource *rsc) #endif -/* Get scan-order index from (x, y) position when blocks are - * arranged in z-order in 8x8 tiles */ -static unsigned -get_morton_index(unsigned x, unsigned y, unsigned stride) -{ - unsigned i = ((x << 0) & 1) | ((y << 1) & 2) | ((x << 1) & 4) | - ((y << 2) & 8) | ((x << 2) & 16) | ((y << 3) & 32); - return (((y & ~7) * stride) + ((x & ~7) << 3)) + i; -} - static void panfrost_store_tiled_images(struct panfrost_transfer *transfer, struct panfrost_resource *rsrc) @@ -2020,10 +2010,7 @@ panfrost_pack_afbc(struct panfrost_context *ctx, assert(prsrc->base.array_size == 1); - uint64_t src_modifier = prsrc->modifier; - uint64_t dst_modifier = - src_modifier & ~(AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SPARSE); - bool is_tiled = src_modifier & AFBC_FORMAT_MOD_TILED; + uint64_t modifier = prsrc->modifier & ~AFBC_FORMAT_MOD_SPARSE; unsigned last_level = prsrc->base.last_level; struct pan_image_slice_layout slice_infos[PIPE_MAX_TEXTURE_LEVELS] = {0}; unsigned total_size = 0; @@ -2049,14 +2036,13 @@ panfrost_pack_afbc(struct panfrost_context *ctx, struct pan_image_slice_layout *src_slice = &prsrc->plane.layout.slices[level]; struct pan_image_slice_layout *dst_slice = &slice_infos[level]; - unsigned src_stride = pan_afbc_stride_blocks( - src_modifier, src_slice->afbc.header.row_stride_B); - unsigned src_nr_sblocks = - src_stride * + unsigned nr_sblocks_total = + pan_afbc_stride_blocks( + modifier, src_slice->afbc.header.row_stride_B) * pan_afbc_height_blocks( - src_modifier, u_minify(prsrc->image.props.extent_px.height, level)); + modifier, u_minify(prsrc->image.props.extent_px.height, level)); uint32_t body_offset_B = pan_afbc_body_offset( - dev->arch, dst_modifier, src_slice->afbc.header.surface_size_B); + dev->arch, modifier, src_slice->afbc.header.surface_size_B); uint32_t offset = 0; struct pan_afbc_block_info *meta = metadata_bo->ptr.cpu + metadata_offsets[level]; @@ -2069,28 +2055,24 @@ panfrost_pack_afbc(struct panfrost_context *ctx, * at most 8 kB can be copied at each iteration (smaller values tend to * increase latency). */ alignas(16) struct pan_afbc_block_info meta_chunk[64 * 16]; - unsigned nr_blocks_per_chunk = ARRAY_SIZE(meta_chunk); + unsigned nr_sblocks_per_chunk = ARRAY_SIZE(meta_chunk); - for (unsigned i = 0; i < src_nr_sblocks; i += nr_blocks_per_chunk) { - unsigned nr_sblocks = MIN2(nr_blocks_per_chunk, src_nr_sblocks - i); + for (unsigned i = 0; i < nr_sblocks_total; i += nr_sblocks_per_chunk) { + unsigned nr_sblocks = + MIN2(nr_sblocks_per_chunk, nr_sblocks_total - i); util_streaming_load_memcpy( meta_chunk, &meta[i], nr_sblocks * sizeof(struct pan_afbc_block_info)); for (unsigned j = 0; j < nr_sblocks; j++) { - unsigned idx = j; - if (is_tiled) { - idx &= ~63; - idx += get_morton_index(j & 7, (j & 63) >> 3, src_stride); - } - meta[i + idx].offset = offset; - offset += meta_chunk[idx].size; + meta[i + j].offset = offset; + offset += meta_chunk[j].size; } } total_size = - ALIGN_POT(total_size, pan_afbc_header_align(dev->arch, dst_modifier)); + ALIGN_POT(total_size, pan_afbc_header_align(dev->arch, modifier)); { /* Header layout is exactly the same, only the body is shrunk. */ dst_slice->afbc.header = src_slice->afbc.header; @@ -2125,7 +2107,7 @@ panfrost_pack_afbc(struct panfrost_context *ctx, } char *new_label = - panfrost_resource_new_label(prsrc, dst_modifier, old_user_label); + panfrost_resource_new_label(prsrc, modifier, old_user_label); struct panfrost_bo *dst = panfrost_bo_create(dev, new_size, 0, new_label); @@ -2156,9 +2138,9 @@ panfrost_pack_afbc(struct panfrost_context *ctx, free(old_label); } - assert(!panfrost_is_emulated_mod(dst_modifier)); - prsrc->image.props.modifier = dst_modifier; - prsrc->modifier = dst_modifier; + assert(!panfrost_is_emulated_mod(modifier)); + prsrc->image.props.modifier = modifier; + prsrc->modifier = modifier; panfrost_bo_unreference(prsrc->bo); prsrc->bo = dst; prsrc->plane.base = dst->ptr.gpu;