broadcom/common: add tile alloc block size macros and sizing helper

Add V3D_TILE_ALLOC_INITIAL_BLOCK_SIZE = 128 and
V3D_TILE_ALLOC_OVERFLOW_BLOCK_SIZE = 64 to v3d_limits.h.

Corresponding _ENUM macros provide the 2-bit hardware encoding for the
TILE_BINNING_MODE_CFG packets.

The previous implicit 64B initial blocks were too small: a single draw
call emits ~88 bytes of per-tile BCL state, immediately overflowing
into continuation blocks. 128B initial blocks avoid the first
continuation allocation for simple single-draw passes.

Add v3d_tile_alloc_sizes() to v3d_util with the full tile alloc BO and
TSDA sizing logic. This uses the 128B initial blocks and tile_alloc
becomes proportional to the number of draws and size of the initial
blocks allocation with the cap of the previous fixed allocation. So
jobs with 0 or 1 drawcalls (blits/fills) reduce their headroom
dramatically.

The draw-proportional formula replaces a flat 512 KB continuation pool:

  headroom = MIN2((tiles_size * draw_count) / 2, 512 KB)

Benchmarked on RPi5 (V3D 7.1) against GfxBench GL tests and
apitrace replays at 1080p. Tile-alloc memory reduction versus the
flat 512 KB headroom (taking into account 256kb kernel alloc per OOM):

  GfxBench (5 benchmarks): -45% to -70% reduction, OOM at or below baseline
  Apitrace (19 traces): -4% to -77%  reduction on 20/24 traces

No FPS regressions observed on any workload.

Reviewed-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40554>
This commit is contained in:
Jose Maria Casanova Crespo 2026-03-19 10:37:40 +01:00 committed by Marge Bot
parent 4542982062
commit 47fa229605
3 changed files with 71 additions and 0 deletions

View file

@ -71,4 +71,20 @@
#define V3D_MAX_VERTEX_ATTRIB_DIVISOR 0xffff
/* Tile allocation block sizes for the PTB, as enum values matching
* the TILE_BINNING_MODE_CFG / TILE_LIST_INITIAL_BLOCK_SIZE packets.
* The byte size is 64 << enum_value (0 = 64B, 1 = 128B, 2 = 256B).
*
* Using 128B initial blocks avoids tile overflow for simple draws
* (a single draw emits ~88 bytes of state per tile). 64B continuation
* blocks reduce internal fragmentation in the tile allocation pool.
*/
#define V3D_TILE_ALLOC_INITIAL_BLOCK_SIZE 128
#define V3D_TILE_ALLOC_OVERFLOW_BLOCK_SIZE 64
#define V3D_TILE_ALLOC_INITIAL_BLOCK_SIZE_ENUM \
(V3D_TILE_ALLOC_INITIAL_BLOCK_SIZE >> 7)
#define V3D_TILE_ALLOC_OVERFLOW_BLOCK_SIZE_ENUM \
(V3D_TILE_ALLOC_OVERFLOW_BLOCK_SIZE >> 7)
#endif /* V3D_LIMITS_H */

View file

@ -22,7 +22,9 @@
*/
#include "v3d_util.h"
#include "v3d_limits.h"
#include "util/macros.h"
#include "util/u_math.h"
/* Choose a number of workgroups per supergroup that maximizes
* lane occupancy. We can pack up to 16 workgroups into a supergroup.
@ -266,6 +268,50 @@ v3d_internal_bpp_words(uint32_t internal_bpp)
}
}
void
v3d_tile_alloc_sizes(uint32_t layers,
uint32_t tiles_x,
uint32_t tiles_y,
uint32_t draws,
uint32_t page_size,
uint32_t *tile_alloc_size,
uint32_t *tile_state_size)
{
assert(layers > 0);
/* The PTB will request the tile alloc initial size per tile at start
* of tile binning. The size must match the initial block size
* configured in the TILE_BINNING_MODE_CFG packet.
*/
uint32_t tiles_size =
layers * tiles_x * tiles_y * V3D_TILE_ALLOC_INITIAL_BLOCK_SIZE;
/* The PTB allocates in aligned 4k chunks after the initial setup. */
uint32_t alloc_size = align(tiles_size, 4096);
/* Include the first two chunk allocations that the PTB does so that
* we definitely clear the OOM condition before triggering one (the HW
* won't trigger OOM during the first allocations).
*/
alloc_size += 8192;
/* Pre-allocate a continuation pool so the GPU rarely has to stall
* waiting for the kernel OOM handler. Each draw call writes per-tile
* BCL state (primitives, uniforms, shader records) whose size scales
* with both the number of tiles and the number of draws. Use the
* product (tiles_size * draws) / 2 as an estimate, capped at 512 KB
* to avoid over-allocating on high draw-count scenes. Align the
* total to page_size.
* The formula assumes the initial block size of 128B, so if it is
* changed it needs to be adjusted.
*/
STATIC_ASSERT(V3D_TILE_ALLOC_INITIAL_BLOCK_SIZE == 128);
alloc_size += MIN2((tiles_size * draws) / 2, 512 * 1024);
alloc_size = align(alloc_size, page_size);
*tile_alloc_size = alloc_size;
*tile_state_size = layers * tiles_x * tiles_y * 256;
}
uint32_t
v3d_compute_rt_row_row_stride_128_bits(uint32_t tile_width,
uint32_t bpp)

View file

@ -98,6 +98,15 @@ log2_tile_size(uint32_t size)
}
}
void
v3d_tile_alloc_sizes(uint32_t layers,
uint32_t tiles_x,
uint32_t tiles_y,
uint32_t draws,
uint32_t page_size,
uint32_t *tile_alloc_size,
uint32_t *tile_state_size);
uint32_t
v3d_compute_rt_row_row_stride_128_bits(uint32_t tile_width,
uint32_t bpp);