mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 08:58:02 +02:00
panfrost: Fix AFBC packing
Layout refactoring commits broke AFBC packing while removing several fields to simplify the logic. The stride and height are now derived when necessary at packing time based on the resource modifier. The problem is that the code assumes that the source and destination headers are the same although the source and destination modifiers might differ and create size mismatches when passed to the AFBC utilities in pan_afbc.h. The destination modifier is set as the source modifier without the AFBC_FORMAT_MOD_SPARSE and AFBC_FORMAT_MOD_TILED flags. While the AFBC_FORMAT_MOD_SPARSE flag doesn't have any impact on these utilities, the AFBC_FORMAT_MOD_TILED flag does. This commit fixes the issue by keeping the same header block layout (linear or tiled header layout) when packing a resource. This allows to simply parse header blocks linearly without having to bother with the internal layout (Morton order). The tiled packed resource might also benefit from better cache accesses. Fixes:a2e9ce39e9pan/layout: Drop pan_image_slice_layout::afbc::{stride_sb,nr_sblocks} Fixes:01d325ba63pan/layout: Interleave header/body in AFBC(3D) Signed-off-by: Loïc Molinari <loic.molinari@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com> Acked-by: Eric R. Smith <eric.smith@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35001>
This commit is contained in:
parent
34b693914a
commit
98c8c95755
2 changed files with 25 additions and 71 deletions
|
|
@ -75,27 +75,6 @@ write_afbc_header(nir_builder *b, nir_def *buf, nir_def *idx, nir_def *hdr)
|
|||
nir_store_global(b, nir_iadd(b, buf, nir_u2u64(b, offset)), 16, hdr, 0xF);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
get_morton_index(nir_builder *b, nir_def *idx, nir_def *src_stride,
|
||||
nir_def *dst_stride)
|
||||
{
|
||||
nir_def *x = nir_umod(b, idx, dst_stride);
|
||||
nir_def *y = nir_udiv(b, idx, dst_stride);
|
||||
|
||||
nir_def *offset = nir_imul(b, nir_iand_imm(b, y, ~0x7), src_stride);
|
||||
offset = nir_iadd(b, offset, nir_ishl_imm(b, nir_ushr_imm(b, x, 3), 6));
|
||||
|
||||
x = nir_iand_imm(b, x, 0x7);
|
||||
x = nir_iand_imm(b, nir_ior(b, x, nir_ishl_imm(b, x, 2)), 0x13);
|
||||
x = nir_iand_imm(b, nir_ior(b, x, nir_ishl_imm(b, x, 1)), 0x15);
|
||||
y = nir_iand_imm(b, y, 0x7);
|
||||
y = nir_iand_imm(b, nir_ior(b, y, nir_ishl_imm(b, y, 2)), 0x13);
|
||||
y = nir_iand_imm(b, nir_ior(b, y, nir_ishl_imm(b, y, 1)), 0x15);
|
||||
nir_def *tile_idx = nir_ior(b, x, nir_ishl_imm(b, y, 1));
|
||||
|
||||
return nir_iadd(b, offset, tile_idx);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
get_superblock_size(nir_builder *b, unsigned arch, nir_def *hdr,
|
||||
nir_def *uncompressed_size)
|
||||
|
|
@ -167,16 +146,15 @@ get_packed_offset(nir_builder *b, nir_def *metadata, nir_def *idx,
|
|||
#define MAX_LINE_SIZE 16
|
||||
|
||||
static void
|
||||
copy_superblock(nir_builder *b, nir_def *dst, nir_def *dst_idx, nir_def *hdr_sz,
|
||||
nir_def *src, nir_def *src_idx, nir_def *metadata,
|
||||
nir_def *meta_idx, unsigned align)
|
||||
copy_superblock(nir_builder *b, nir_def *dst, nir_def *hdr_sz, nir_def *src,
|
||||
nir_def *metadata, nir_def *idx, unsigned align)
|
||||
{
|
||||
nir_def *hdr = read_afbc_header(b, src, src_idx);
|
||||
nir_def *hdr = read_afbc_header(b, src, idx);
|
||||
nir_def *src_body_base_ptr = nir_u2u64(b, nir_channel(b, hdr, 0));
|
||||
nir_def *src_bodyptr = nir_iadd(b, src, src_body_base_ptr);
|
||||
|
||||
nir_def *size;
|
||||
nir_def *dst_offset = get_packed_offset(b, metadata, meta_idx, &size);
|
||||
nir_def *dst_offset = get_packed_offset(b, metadata, idx, &size);
|
||||
nir_def *dst_body_base_ptr = nir_iadd(b, dst_offset, hdr_sz);
|
||||
nir_def *dst_bodyptr = nir_iadd(b, dst, dst_body_base_ptr);
|
||||
|
||||
|
|
@ -184,7 +162,7 @@ copy_superblock(nir_builder *b, nir_def *dst, nir_def *dst_idx, nir_def *hdr_sz,
|
|||
nir_def *hdr2 =
|
||||
nir_vector_insert_imm(b, hdr, nir_u2u32(b, dst_body_base_ptr), 0);
|
||||
hdr = nir_bcsel(b, nir_ieq_imm(b, src_body_base_ptr, 0), hdr, hdr2);
|
||||
write_afbc_header(b, dst, dst_idx, hdr);
|
||||
write_afbc_header(b, dst, idx, hdr);
|
||||
|
||||
nir_variable *offset_var =
|
||||
nir_local_variable_create(b->impl, glsl_uint_type(), "offset");
|
||||
|
|
@ -256,7 +234,6 @@ panfrost_create_afbc_pack_shader(struct panfrost_screen *screen,
|
|||
const struct pan_mod_convert_shader_key *key)
|
||||
{
|
||||
unsigned align = key->afbc.align;
|
||||
bool tiled = key->mod & AFBC_FORMAT_MOD_TILED;
|
||||
struct panfrost_device *dev = pan_device(&screen->base);
|
||||
nir_builder b = nir_builder_init_simple_shader(
|
||||
MESA_SHADER_COMPUTE, pan_shader_get_compiler_options(dev->arch),
|
||||
|
|
@ -265,19 +242,14 @@ panfrost_create_afbc_pack_shader(struct panfrost_screen *screen,
|
|||
panfrost_afbc_add_info_ubo(pack, b);
|
||||
|
||||
nir_def *coord = nir_load_global_invocation_id(&b, 32);
|
||||
nir_def *src_stride = panfrost_afbc_pack_get_info_field(&b, src_stride);
|
||||
nir_def *dst_stride = panfrost_afbc_pack_get_info_field(&b, dst_stride);
|
||||
nir_def *dst_idx = nir_channel(&b, coord, 0);
|
||||
nir_def *src_idx =
|
||||
tiled ? get_morton_index(&b, dst_idx, src_stride, dst_stride) : dst_idx;
|
||||
nir_def *idx = nir_channel(&b, coord, 0);
|
||||
nir_def *src = panfrost_afbc_pack_get_info_field(&b, src);
|
||||
nir_def *dst = panfrost_afbc_pack_get_info_field(&b, dst);
|
||||
nir_def *header_size =
|
||||
nir_u2u64(&b, panfrost_afbc_pack_get_info_field(&b, header_size));
|
||||
nir_def *metadata = panfrost_afbc_pack_get_info_field(&b, metadata);
|
||||
|
||||
copy_superblock(&b, dst, dst_idx, header_size, src, src_idx, metadata,
|
||||
src_idx, align);
|
||||
copy_superblock(&b, dst, header_size, src, metadata, idx, align);
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
|
@ -460,7 +432,7 @@ panfrost_get_afbc_pack_shaders(struct panfrost_context *ctx,
|
|||
struct panfrost_resource *rsrc, unsigned align)
|
||||
{
|
||||
struct pan_mod_convert_shader_key key = {
|
||||
.mod = DRM_FORMAT_MOD_ARM_AFBC(rsrc->modifier & AFBC_FORMAT_MOD_TILED),
|
||||
.mod = DRM_FORMAT_MOD_ARM_AFBC(0),
|
||||
.afbc = {
|
||||
.bpp = util_format_get_blocksizebits(rsrc->base.format),
|
||||
.align = align,
|
||||
|
|
|
|||
|
|
@ -1464,16 +1464,6 @@ pan_dump_resource(struct panfrost_context *ctx, struct panfrost_resource *rsc)
|
|||
|
||||
#endif
|
||||
|
||||
/* Get scan-order index from (x, y) position when blocks are
|
||||
* arranged in z-order in 8x8 tiles */
|
||||
static unsigned
|
||||
get_morton_index(unsigned x, unsigned y, unsigned stride)
|
||||
{
|
||||
unsigned i = ((x << 0) & 1) | ((y << 1) & 2) | ((x << 1) & 4) |
|
||||
((y << 2) & 8) | ((x << 2) & 16) | ((y << 3) & 32);
|
||||
return (((y & ~7) * stride) + ((x & ~7) << 3)) + i;
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_store_tiled_images(struct panfrost_transfer *transfer,
|
||||
struct panfrost_resource *rsrc)
|
||||
|
|
@ -2020,10 +2010,7 @@ panfrost_pack_afbc(struct panfrost_context *ctx,
|
|||
|
||||
assert(prsrc->base.array_size == 1);
|
||||
|
||||
uint64_t src_modifier = prsrc->modifier;
|
||||
uint64_t dst_modifier =
|
||||
src_modifier & ~(AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SPARSE);
|
||||
bool is_tiled = src_modifier & AFBC_FORMAT_MOD_TILED;
|
||||
uint64_t modifier = prsrc->modifier & ~AFBC_FORMAT_MOD_SPARSE;
|
||||
unsigned last_level = prsrc->base.last_level;
|
||||
struct pan_image_slice_layout slice_infos[PIPE_MAX_TEXTURE_LEVELS] = {0};
|
||||
unsigned total_size = 0;
|
||||
|
|
@ -2049,14 +2036,13 @@ panfrost_pack_afbc(struct panfrost_context *ctx,
|
|||
struct pan_image_slice_layout *src_slice =
|
||||
&prsrc->plane.layout.slices[level];
|
||||
struct pan_image_slice_layout *dst_slice = &slice_infos[level];
|
||||
unsigned src_stride = pan_afbc_stride_blocks(
|
||||
src_modifier, src_slice->afbc.header.row_stride_B);
|
||||
unsigned src_nr_sblocks =
|
||||
src_stride *
|
||||
unsigned nr_sblocks_total =
|
||||
pan_afbc_stride_blocks(
|
||||
modifier, src_slice->afbc.header.row_stride_B) *
|
||||
pan_afbc_height_blocks(
|
||||
src_modifier, u_minify(prsrc->image.props.extent_px.height, level));
|
||||
modifier, u_minify(prsrc->image.props.extent_px.height, level));
|
||||
uint32_t body_offset_B = pan_afbc_body_offset(
|
||||
dev->arch, dst_modifier, src_slice->afbc.header.surface_size_B);
|
||||
dev->arch, modifier, src_slice->afbc.header.surface_size_B);
|
||||
uint32_t offset = 0;
|
||||
struct pan_afbc_block_info *meta =
|
||||
metadata_bo->ptr.cpu + metadata_offsets[level];
|
||||
|
|
@ -2069,28 +2055,24 @@ panfrost_pack_afbc(struct panfrost_context *ctx,
|
|||
* at most 8 kB can be copied at each iteration (smaller values tend to
|
||||
* increase latency). */
|
||||
alignas(16) struct pan_afbc_block_info meta_chunk[64 * 16];
|
||||
unsigned nr_blocks_per_chunk = ARRAY_SIZE(meta_chunk);
|
||||
unsigned nr_sblocks_per_chunk = ARRAY_SIZE(meta_chunk);
|
||||
|
||||
for (unsigned i = 0; i < src_nr_sblocks; i += nr_blocks_per_chunk) {
|
||||
unsigned nr_sblocks = MIN2(nr_blocks_per_chunk, src_nr_sblocks - i);
|
||||
for (unsigned i = 0; i < nr_sblocks_total; i += nr_sblocks_per_chunk) {
|
||||
unsigned nr_sblocks =
|
||||
MIN2(nr_sblocks_per_chunk, nr_sblocks_total - i);
|
||||
|
||||
util_streaming_load_memcpy(
|
||||
meta_chunk, &meta[i],
|
||||
nr_sblocks * sizeof(struct pan_afbc_block_info));
|
||||
|
||||
for (unsigned j = 0; j < nr_sblocks; j++) {
|
||||
unsigned idx = j;
|
||||
if (is_tiled) {
|
||||
idx &= ~63;
|
||||
idx += get_morton_index(j & 7, (j & 63) >> 3, src_stride);
|
||||
}
|
||||
meta[i + idx].offset = offset;
|
||||
offset += meta_chunk[idx].size;
|
||||
meta[i + j].offset = offset;
|
||||
offset += meta_chunk[j].size;
|
||||
}
|
||||
}
|
||||
|
||||
total_size =
|
||||
ALIGN_POT(total_size, pan_afbc_header_align(dev->arch, dst_modifier));
|
||||
ALIGN_POT(total_size, pan_afbc_header_align(dev->arch, modifier));
|
||||
{
|
||||
/* Header layout is exactly the same, only the body is shrunk. */
|
||||
dst_slice->afbc.header = src_slice->afbc.header;
|
||||
|
|
@ -2125,7 +2107,7 @@ panfrost_pack_afbc(struct panfrost_context *ctx,
|
|||
}
|
||||
|
||||
char *new_label =
|
||||
panfrost_resource_new_label(prsrc, dst_modifier, old_user_label);
|
||||
panfrost_resource_new_label(prsrc, modifier, old_user_label);
|
||||
|
||||
struct panfrost_bo *dst =
|
||||
panfrost_bo_create(dev, new_size, 0, new_label);
|
||||
|
|
@ -2156,9 +2138,9 @@ panfrost_pack_afbc(struct panfrost_context *ctx,
|
|||
free(old_label);
|
||||
}
|
||||
|
||||
assert(!panfrost_is_emulated_mod(dst_modifier));
|
||||
prsrc->image.props.modifier = dst_modifier;
|
||||
prsrc->modifier = dst_modifier;
|
||||
assert(!panfrost_is_emulated_mod(modifier));
|
||||
prsrc->image.props.modifier = modifier;
|
||||
prsrc->modifier = modifier;
|
||||
panfrost_bo_unreference(prsrc->bo);
|
||||
prsrc->bo = dst;
|
||||
prsrc->plane.base = dst->ptr.gpu;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue