From 16627035ef897be294ebfd5598ba8a676f8d8c8e Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 29 Dec 2025 22:12:29 -0500 Subject: [PATCH] asahi: tightly pack tilebuffer order render targets by alignment, eliminating gaps. this is the same trick we use in RA. dEQP-GLES3.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.3 now allocates as 32x32 instead of 32x16. Signed-off-by: Alyssa Rosenzweig Reviewed-by: Mary Guillemard Part-of: --- src/asahi/lib/agx_tilebuffer.c | 68 ++++++++++++------------- src/asahi/lib/tests/test-tilebuffer.cpp | 10 ++-- 2 files changed, 38 insertions(+), 40 deletions(-) diff --git a/src/asahi/lib/agx_tilebuffer.c b/src/asahi/lib/agx_tilebuffer.c index f4a982ffdfe..535cab35984 100644 --- a/src/asahi/lib/agx_tilebuffer.c +++ b/src/asahi/lib/agx_tilebuffer.c @@ -58,6 +58,14 @@ agx_shared_layout_from_tile_size(struct agx_tile_size t) UNREACHABLE("Invalid tile size"); } +static inline unsigned +format_align_B(enum pipe_format format) +{ + /* For some reason util_format_get_blocksize(NONE) = 1 */ + enum pipe_format phys = ail_pixel_format[format].renderable; + return (format != PIPE_FORMAT_NONE) ? util_format_get_blocksize(phys) : 0; +} + struct agx_tilebuffer_layout agx_build_tilebuffer_layout(const enum pipe_format *formats, uint8_t nr_cbufs, uint8_t nr_samples, bool layered) @@ -68,51 +76,41 @@ agx_build_tilebuffer_layout(const enum pipe_format *formats, uint8_t nr_cbufs, }; uint32_t offset_B = 0; + uint8_t order[] = {0, 1, 2, 3, 4, 5, 6, 7}; - for (unsigned rt = 0; rt < nr_cbufs; ++rt) { - tib.logical_format[rt] = formats[rt]; + /* Sort render targets in descending order of alignment, eliminating padding + * and giving the optimal order of render targets. We use insertion sort + * because it is simple, stable, fast for small n, and free for n=1. + */ + for (int i = 1; i < nr_cbufs; ++i) { + for (int j = i; j > 0 && format_align_B(formats[order[j - 1]]) < + format_align_B(formats[order[j]]); + --j) { + SWAP(order[j], order[j - 1]); + } + } - /* If there are gaps in the layout, don't allocate holes. Obscure, - * PIPE_FORMAT_NONE has a size of 1, not 0. - */ - if (formats[rt] == PIPE_FORMAT_NONE) - continue; + for (unsigned i = 0; i < nr_cbufs; ++i) { + unsigned rt = order[i]; + enum pipe_format format = formats[rt]; + tib.logical_format[rt] = format; - /* Require natural alignment for tilebuffer allocations. This could be - * optimized, but this shouldn't be a problem in practice. - */ - enum pipe_format physical_fmt = agx_tilebuffer_physical_format(&tib, rt); - unsigned align_B = util_format_get_blocksize(physical_fmt); - assert(util_is_power_of_two_nonzero(align_B) && - util_is_power_of_two_nonzero(MAX_BYTES_PER_SAMPLE) && - align_B < MAX_BYTES_PER_SAMPLE && - "max bytes per sample divisible by alignment"); + assert(util_is_aligned(offset_B, MAX2(format_align_B(formats[rt]), 1)) && + "loop invariant ensured by the sort"); - offset_B = ALIGN_POT(offset_B, align_B); - assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant + above"); - - /* Determine the size, if we were to allocate this render target to the - * tilebuffer as desired. - */ - unsigned nr = util_format_get_nr_components(physical_fmt) == 1 - ? util_format_get_nr_components(formats[rt]) - : 1; - - unsigned size_B = align_B * nr; - unsigned new_offset_B = offset_B + size_B; + unsigned size_B = format_align_B(format); + enum pipe_format phys = ail_pixel_format[format].renderable; + if (util_format_get_nr_components(phys) == 1) { + size_B *= util_format_get_nr_components(format); + } /* If allocating this render target would exceed any tilebuffer limits, we - * need to spill it to memory. We continue processing in case there are - * smaller render targets after that would still fit. Otherwise, we - * allocate it to the tilebuffer. - * - * TODO: Suboptimal, we might be able to reorder render targets to - * avoid fragmentation causing spilling. + * need to spill it to memory. Otherwise, allocate it to the tilebuffer. */ + unsigned new_offset_B = offset_B + size_B; bool fits = (new_offset_B <= MAX_BYTES_PER_SAMPLE) && (ALIGN_POT(new_offset_B, 8) * MIN_TILE_SIZE_PX * nr_samples) <= MAX_BYTES_PER_TILE; - if (fits) { tib._offset_B[rt] = offset_B; offset_B = new_offset_B; diff --git a/src/asahi/lib/tests/test-tilebuffer.cpp b/src/asahi/lib/tests/test-tilebuffer.cpp index 7f4f9fb920e..609058854f5 100644 --- a/src/asahi/lib/tests/test-tilebuffer.cpp +++ b/src/asahi/lib/tests/test-tilebuffer.cpp @@ -64,7 +64,7 @@ struct test tests[] = { PIPE_FORMAT_R32G32_SINT, }, { - ._offset_B = { 0, 4, 12, 16 }, + ._offset_B = { 16, 0, 18, 8 }, .sample_size_B = 24, .nr_samples = 1, .tile_size = { 32, 32 }, @@ -81,7 +81,7 @@ struct test tests[] = { PIPE_FORMAT_R32G32_SINT, }, { - ._offset_B = { 0, 4, 12, 16 }, + ._offset_B = { 16, 0, 18, 8 }, .sample_size_B = 24, .nr_samples = 2, .tile_size = { 32, 16 }, @@ -98,7 +98,7 @@ struct test tests[] = { PIPE_FORMAT_R32G32_SINT, }, { - ._offset_B = { 0, 4, 12, 16 }, + ._offset_B = { 16, 0, 18, 8 }, .sample_size_B = 24, .nr_samples = 4, .tile_size = { 16, 16 }, @@ -110,7 +110,7 @@ struct test tests[] = { 1, { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R16G16_SNORM }, { - ._offset_B = { 0, 2 }, + ._offset_B = { 4, 0 }, .sample_size_B = 8, .nr_samples = 1, .tile_size = { 32, 32 }, @@ -122,7 +122,7 @@ struct test tests[] = { 1, { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R10G10B10A2_UNORM }, { - ._offset_B = { 0, 4 }, + ._offset_B = { 4, 0 }, .sample_size_B = 8, .nr_samples = 1, .tile_size = { 32, 32 },