v3dv: use shared v3d_tile_alloc_sizes() and 128B initial blocks

Replace the inline tile_alloc/TSDA sizing in v3dv_job_allocate_tile_state()
with a call to the new v3d_tile_alloc_sizes() helper. This switches from
64B to 128B initial tile alloc blocks (avoiding overflow for simple draws)
and from a flat 512KB headroom to a draw-proportional formula.

Set tile_allocation_initial_block_size and tile_allocation_block_size
in all TILE_BINNING_MODE_CFG emissions and update the
TILE_LIST_INITIAL_BLOCK_SIZE packets to match.

Benchmarked on RPi5 (V3D 7.1) with GfxBench Vulkan Aztec Ruins at
1920x1040. Average tile_alloc BO size dropped 75% (535 KB to 132 KB)
with 20% fewer OOM events (521 to 417) and no FPS regression.

This avoids exhausting GPU memory when multiple blit or fill jobs
are batched in the same command buffer, with a huge reduction of
the memory footprint avoiding the 512 KB of the tile_alloc per batched
job.

Reviewed-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40554>
This commit is contained in:
Jose Maria Casanova Crespo 2026-03-19 10:41:13 +01:00 committed by Marge Bot
parent 35efce437e
commit 69880047d8
4 changed files with 23 additions and 31 deletions

View file

@ -456,27 +456,14 @@ v3dv_job_allocate_tile_state(struct v3dv_job *job)
const uint32_t layers =
job->allocate_tile_state_for_all_layers ? tiling->layers : 1;
/* The PTB will request the tile alloc initial size per tile at start
* of tile binning.
*/
uint32_t tile_alloc_size = 64 * layers *
tiling->draw_tiles_x *
tiling->draw_tiles_y;
/* The PTB allocates in aligned 4k chunks after the initial setup. */
tile_alloc_size = align(tile_alloc_size, 4096);
/* Include the first two chunk allocations that the PTB does so that
* we definitely clear the OOM condition before triggering one (the HW
* won't trigger OOM during the first allocations).
*/
tile_alloc_size += 8192;
/* For performance, allocate some extra initial memory after the PTB's
* minimal allocations, so that we hopefully don't have to block the
* GPU on the kernel handling an OOM signal.
*/
tile_alloc_size += 512 * 1024;
uint32_t tile_alloc_size, tile_state_size;
v3d_tile_alloc_sizes(layers,
tiling->draw_tiles_x,
tiling->draw_tiles_y,
job->draw_count,
job->device->devinfo.page_size,
&tile_alloc_size,
&tile_state_size);
job->tile_alloc = v3dv_bo_alloc(job->device, tile_alloc_size,
"tile_alloc", true);
@ -487,11 +474,6 @@ v3dv_job_allocate_tile_state(struct v3dv_job *job)
v3dv_job_add_bo_unchecked(job, job->tile_alloc);
const uint32_t tsda_per_tile_size = 256;
const uint32_t tile_state_size = layers *
tiling->draw_tiles_x *
tiling->draw_tiles_y *
tsda_per_tile_size;
job->tile_state = v3dv_bo_alloc(job->device, tile_state_size, "TSDA", true);
if (!job->tile_state) {
v3dv_flag_oom(NULL, job);

View file

@ -59,6 +59,10 @@ v3dX(job_emit_enable_double_buffer)(struct v3dv_job *job)
};
config.width_in_pixels = tiling->width;
config.height_in_pixels = tiling->height;
config.tile_allocation_initial_block_size =
V3D_TILE_ALLOC_INITIAL_BLOCK_SIZE_ENUM;
config.tile_allocation_block_size =
V3D_TILE_ALLOC_OVERFLOW_BLOCK_SIZE_ENUM;
#if V3D_VERSION == 42
config.number_of_render_targets = MAX2(tiling->render_target_count, 1);
config.multisample_mode_4x = tiling->msaa;
@ -91,6 +95,10 @@ v3dX(job_emit_binning_prolog)(struct v3dv_job *job,
cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
config.width_in_pixels = tiling->width;
config.height_in_pixels = tiling->height;
config.tile_allocation_initial_block_size =
V3D_TILE_ALLOC_INITIAL_BLOCK_SIZE_ENUM;
config.tile_allocation_block_size =
V3D_TILE_ALLOC_OVERFLOW_BLOCK_SIZE_ENUM;
#if V3D_VERSION == 42
config.number_of_render_targets = MAX2(tiling->render_target_count, 1);
config.multisample_mode_4x = tiling->msaa;
@ -752,7 +760,8 @@ cmd_buffer_emit_render_pass_layer_rcl(struct v3dv_cmd_buffer *cmd_buffer,
*/
const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
const uint32_t tile_alloc_offset =
64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
V3D_TILE_ALLOC_INITIAL_BLOCK_SIZE *
layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
}
@ -1227,7 +1236,7 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
init.use_auto_chained_tile_lists = true;
init.size_of_first_block_in_chained_tile_lists =
TILE_ALLOCATION_BLOCK_SIZE_64B;
V3D_TILE_ALLOC_INITIAL_BLOCK_SIZE_ENUM;
}
cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {

View file

@ -178,7 +178,7 @@ emit_rcl_prologue(struct v3dv_job *job,
cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
init.use_auto_chained_tile_lists = true;
init.size_of_first_block_in_chained_tile_lists =
TILE_ALLOCATION_BLOCK_SIZE_64B;
V3D_TILE_ALLOC_INITIAL_BLOCK_SIZE_ENUM;
}
return rcl;
@ -196,7 +196,8 @@ emit_frame_setup(struct v3dv_job *job,
struct v3dv_cl *rcl = &job->rcl;
const uint32_t tile_alloc_offset =
64 * min_layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
V3D_TILE_ALLOC_INITIAL_BLOCK_SIZE *
min_layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
}

View file

@ -76,7 +76,7 @@ v3dX(job_emit_noop)(struct v3dv_job *job)
cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
init.use_auto_chained_tile_lists = true;
init.size_of_first_block_in_chained_tile_lists =
TILE_ALLOCATION_BLOCK_SIZE_64B;
V3D_TILE_ALLOC_INITIAL_BLOCK_SIZE_ENUM;
}
cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {