tu: Move FDM tile configuration to a new file

This is a well-isolated part of tu_cmd_buffer.cc. Split it out before
expanding it even further for subsampled images.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39868>
This commit is contained in:
Connor Abbott 2026-01-28 11:06:13 -05:00 committed by Marge Bot
parent 5a8ee1a067
commit 2ddb70444e
5 changed files with 512 additions and 471 deletions

View file

@ -48,6 +48,7 @@ libtu_files = files(
'tu_rmv.cc',
'tu_shader.cc',
'tu_suballoc.cc',
'tu_tile_config.cc',
'tu_util.cc',
)

View file

@ -20,6 +20,7 @@
#include "tu_event.h"
#include "tu_image.h"
#include "tu_knl.h"
#include "tu_tile_config.h"
#include "tu_tracepoints.h"
#include "common/freedreno_gpu_event.h"
@ -1310,13 +1311,6 @@ use_hw_binning(struct tu_cmd_buffer *cmd)
return vsc->binning;
}
static uint32_t
tu_fdm_num_layers(const struct tu_cmd_buffer *cmd)
{
return cmd->state.pass->num_views ? cmd->state.pass->num_views :
(cmd->state.fdm_per_layer ? cmd->state.framebuffer->layers : 1);
}
static bool
use_sysmem_rendering(struct tu_cmd_buffer *cmd,
struct tu_renderpass_result **autotune_result)
@ -1424,59 +1418,6 @@ tu6_emit_cond_for_load_stores(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
}
}
struct tu_tile_config {
VkOffset2D pos;
uint32_t pipe;
uint32_t slot_mask;
uint32_t visible_views;
/* The tile this tile was merged with. */
struct tu_tile_config *merged_tile;
/* For merged tiles, the extent in tiles when resolved to system memory.
*/
VkExtent2D sysmem_extent;
/* For merged tiles, the extent in tiles in GMEM. This can only be more
* than 1 if there is extra free space from an unused view.
*/
VkExtent2D gmem_extent;
VkExtent2D frag_areas[MAX_VIEWS];
};
/* For bin offsetting we want to do "Euclidean division," where the remainder
* (i.e. the offset of the bin) is always positive. Unfortunately C/C++
* remainder and division don't do this, so we have to implement it ourselves.
*
* For example, we should have:
*
* euclid_rem(-3, 4) = 1
* euclid_rem(-4, 4) = 0
* euclid_rem(-4, 4) = 3
*/
static int32_t
euclid_rem(int32_t divisor, int32_t divisend)
{
if (divisor >= 0)
return divisor % divisend;
int32_t tmp = divisend - (-divisor % divisend);
return tmp == divisend ? 0 : tmp;
}
/* Calculate how much the bins for a given view should be shifted to the left
* and upwards, given the application-provided FDM offset.
*/
static VkOffset2D
tu_bin_offset(VkOffset2D fdm_offset, const struct tu_tiling_config *tiling)
{
return (VkOffset2D) {
euclid_rem(-fdm_offset.x, tiling->tile0.width),
euclid_rem(-fdm_offset.y, tiling->tile0.height),
};
}
template <chip CHIP>
static void
tu6_emit_bin_size_gmem(struct tu_cmd_buffer *cmd,
@ -3673,184 +3614,6 @@ tu6_tile_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
tu_cs_sanity_check(cs);
}
static void
tu_calc_frag_area(struct tu_cmd_buffer *cmd,
struct tu_tile_config *tile,
const struct tu_image_view *fdm,
const VkOffset2D *fdm_offsets)
{
const struct tu_tiling_config *tiling = cmd->state.tiling;
const uint32_t x1 = tiling->tile0.width * tile->pos.x;
const uint32_t y1 = tiling->tile0.height * tile->pos.y;
const uint32_t x2 = MIN2(x1 + tiling->tile0.width, MAX_VIEWPORT_SIZE);
const uint32_t y2 = MIN2(y1 + tiling->tile0.height, MAX_VIEWPORT_SIZE);
unsigned views = tu_fdm_num_layers(cmd);
const struct tu_framebuffer *fb = cmd->state.framebuffer;
struct tu_frag_area raw_areas[views];
if (fdm) {
for (unsigned i = 0; i < views; i++) {
VkOffset2D sample_pos = { 0, 0 };
/* Offsets less than a tile size are accomplished by sliding the
* tiles. However once we shift a whole tile size then we reset the
* tiles back to where they were at the beginning and we need to
* adjust where each bin is sampling from:
*
* x offset = 0:
*
* ------------------------------------
* | * | * | * | (unused) |
* ------------------------------------
*
* x offset = 4:
*
* -------------------------
* | * | * | * | * |
* -------------------------
*
* x offset = 8:
*
* ------------------------------------
* | * | * | * | (unused) |
* ------------------------------------
*
* As the user's offset increases we slide the tiles to the right,
* until we reach the whole tile size and reset the tile positions.
* tu_bin_offset() returns an amount to shift to the left, negating
* the offset.
*
* If we were forced to use a shared viewport, then we must not shift
* over the tiles and instead must only shift when sampling because
* we cannot shift the tiles differently per view. This disables
* smooth transitions of the fragment density map and effectively
* negates the extension.
*
* Note that we cannot clamp x2/y2 to the framebuffer size, as we
* normally would do, because then tiles along the edge would
* incorrectly nudge the sample_pos towards the center of the
* framebuffer. If we shift one complete tile over towards the
* center and reset the tiles as above, the sample_pos would
* then shift back towards the edge and we could get a "pop" from
* suddenly changing density due to the slight shift.
*/
if (fdm_offsets) {
VkOffset2D offset = fdm_offsets[i];
if (!cmd->state.rp.shared_viewport) {
VkOffset2D bin_offset = tu_bin_offset(fdm_offsets[i], tiling);
offset.x += bin_offset.x;
offset.y += bin_offset.y;
}
sample_pos.x = (x1 + x2) / 2 - offset.x;
sample_pos.y = (y1 + y2) / 2 - offset.y;
} else {
sample_pos.x = (x1 + MIN2(x2, fb->width)) / 2;
sample_pos.y = (y1 + MIN2(y2, fb->height)) / 2;
}
tu_fragment_density_map_sample(fdm,
sample_pos.x,
sample_pos.y,
fb->width, fb->height, i,
&raw_areas[i]);
}
} else {
for (unsigned i = 0; i < views; i++)
raw_areas[i].width = raw_areas[i].height = 1.0f;
}
for (unsigned i = 0; i < views; i++) {
float floor_x, floor_y;
float area = raw_areas[i].width * raw_areas[i].height;
float frac_x = modff(raw_areas[i].width, &floor_x);
float frac_y = modff(raw_areas[i].height, &floor_y);
/* The Vulkan spec says that a density of 0 results in an undefined
* fragment area. However the blob driver skips rendering tiles with 0
* density, and apps rely on that behavior. Replicate that here.
*/
if (!isfinite(area)) {
tile->frag_areas[i].width = UINT32_MAX;
tile->frag_areas[i].height = UINT32_MAX;
tile->visible_views &= ~(1u << i);
continue;
}
/* The spec allows rounding up one of the axes as long as the total
* area is less than or equal to the original area. Take advantage of
* this to try rounding up the number with the largest fraction.
*/
if ((frac_x > frac_y ? (floor_x + 1.f) * floor_y :
floor_x * (floor_y + 1.f)) <= area) {
if (frac_x > frac_y)
floor_x += 1.f;
else
floor_y += 1.f;
}
uint32_t width = floor_x;
uint32_t height = floor_y;
/* Areas that aren't a power of two, especially large areas, can create
* in floating-point rounding errors when dividing by the area in the
* viewport that result in under-rendering. Round down to a power of two
* to make sure all operations are exact.
*/
width = 1u << util_logbase2(width);
height = 1u << util_logbase2(height);
/* When FDM offset is enabled, the fragment area has to divide the
* offset to make sure that we don't have tiles with partial fragments.
* It would be bad to have the fragment area change as a function of the
* offset, because we'd get "popping" as the resolution changes with the
* offset, so just make sure it divides the offset granularity. This
* should mean it always divides the offset for any possible offset.
*/
if (fdm_offsets) {
width = MIN2(width, TU_FDM_OFFSET_GRANULARITY);
height = MIN2(height, TU_FDM_OFFSET_GRANULARITY);
}
/* HW viewport scaling supports a maximum fragment width/height of 4.
*/
if (views <= MAX_HW_SCALED_VIEWS) {
width = MIN2(width, 4);
height = MIN2(height, 4);
}
/* Make sure that the width/height divides the tile width/height so
* we don't have to do extra awkward clamping of the edges of each
* bin when resolving. It also has to divide the fdm offset, if any.
* Note that because the tile width is rounded to a multiple of 32 any
* power of two 32 or less will work, and if there is an offset then it
* must be a multiple of 4 so 2 or 4 will definitely work.
*
* TODO: Try to take advantage of the total area allowance here, too.
*/
while (tiling->tile0.width % width != 0)
width /= 2;
while (tiling->tile0.height % height != 0)
height /= 2;
tile->frag_areas[i].width = width;
tile->frag_areas[i].height = height;
}
/* If at any point we were forced to use the same scaling for all
* viewports, we need to make sure that any users *not* using shared
* scaling, including loads/stores, also consistently share the scaling.
*/
if (cmd->state.rp.shared_viewport) {
VkExtent2D frag_area = { UINT32_MAX, UINT32_MAX };
for (unsigned i = 0; i < views; i++) {
frag_area.width = MIN2(frag_area.width, tile->frag_areas[i].width);
frag_area.height = MIN2(frag_area.height, tile->frag_areas[i].height);
}
for (unsigned i = 0; i < views; i++)
tile->frag_areas[i] = frag_area;
}
}
static void
tu_identity_frag_area(struct tu_cmd_buffer *cmd,
struct tu_tile_config *tile)
@ -3859,239 +3622,6 @@ tu_identity_frag_area(struct tu_cmd_buffer *cmd,
tile->frag_areas[i] = (VkExtent2D) { 1, 1 };
}
static bool
rects_intersect(VkRect2D a, VkRect2D b)
{
return a.offset.x < b.offset.x + (int32_t)b.extent.width &&
b.offset.x < a.offset.x + (int32_t)a.extent.width &&
a.offset.y < b.offset.y + (int32_t)b.extent.height &&
b.offset.y < a.offset.y + (int32_t)a.extent.height;
}
/* Use the render area(s) to figure out which views of the bin are visible.
*/
static void
tu_calc_bin_visibility(struct tu_cmd_buffer *cmd,
struct tu_tile_config *tile,
const VkOffset2D *offsets)
{
const struct tu_tiling_config *tiling = cmd->state.tiling;
uint32_t views = tu_fdm_num_layers(cmd);
VkRect2D bin = {
{
tile->pos.x * tiling->tile0.width,
tile->pos.y * tiling->tile0.height
},
tiling->tile0
};
tile->visible_views = 0;
for (unsigned i = 0; i < views; i++) {
VkRect2D offsetted_bin = bin;
if (offsets && !cmd->state.rp.shared_viewport) {
VkOffset2D bin_offset = tu_bin_offset(offsets[i], tiling);
offsetted_bin.offset.x -= bin_offset.x;
offsetted_bin.offset.y -= bin_offset.y;
}
if (rects_intersect(offsetted_bin,
cmd->state.per_layer_render_area ?
cmd->state.render_areas[i] :
cmd->state.render_areas[0])) {
tile->visible_views |= (1u << i);
}
}
}
static bool
try_merge_tiles(struct tu_tile_config *dst, struct tu_tile_config *src,
unsigned views, bool has_abs_bin_mask, bool shared_viewport)
{
uint32_t slot_mask = dst->slot_mask | src->slot_mask;
uint32_t visible_views = dst->visible_views | src->visible_views;
/* The fragment areas must be the same for views where both bins are
* visible.
*/
for (unsigned i = 0; i < views; i++) {
if ((dst->visible_views & src->visible_views & (1u << i)) &&
(dst->frag_areas[i].width != src->frag_areas[i].width ||
dst->frag_areas[i].height != src->frag_areas[i].height))
return false;
}
/* The tiles must be vertically or horizontally adjacent and have the
* compatible width/height.
*/
if (dst->pos.x == src->pos.x) {
if (dst->sysmem_extent.height != src->sysmem_extent.height)
return false;
} else if (dst->pos.y == src->pos.y) {
if (dst->sysmem_extent.width != src->sysmem_extent.width)
return false;
} else {
return false;
}
if (dst->gmem_extent.width != src->gmem_extent.width ||
dst->gmem_extent.height != src->gmem_extent.height)
return false;
if (!has_abs_bin_mask) {
/* The mask of the combined tile has to fit in 16 bits */
uint32_t hw_mask = slot_mask >> (ffs(slot_mask) - 1);
if ((hw_mask & 0xffff) != hw_mask)
return false;
}
/* Note, this assumes that dst is below or to the right of src, which is
* how we call this function below.
*/
VkExtent2D extent = {
dst->sysmem_extent.width + (dst->pos.x - src->pos.x),
dst->sysmem_extent.height + (dst->pos.y - src->pos.y),
};
assert(dst->sysmem_extent.height > 0);
/* If only the first view is visible in both tiles, we can reuse the GMEM
* space meant for the rest of the views to multiply the height of the
* tile. We can't do this if we can't override the scissor for different
* views though.
*/
unsigned height_multiplier = 1;
if (visible_views == 1 && views > 1 && dst->gmem_extent.height == 1 &&
!shared_viewport)
height_multiplier = views;
else
height_multiplier = dst->gmem_extent.height;
/* The combined fragment areas must not be smaller than the combined bin
* extent, so that the combined bin is not larger than the original
* unscaled bin.
*/
for (unsigned i = 0; i < views; i++) {
if ((dst->visible_views & (1u << i)) &&
(dst->frag_areas[i].width < extent.width ||
dst->frag_areas[i].height * height_multiplier < extent.height))
return false;
if ((src->visible_views & (1u << i)) &&
(src->frag_areas[i].width < extent.width ||
src->frag_areas[i].height * height_multiplier < extent.height))
return false;
}
/* Ok, let's combine them. dst is below or to the right of src, so it takes
* src's position.
*/
for (unsigned i = 0; i < views; i++) {
if (src->visible_views & ~dst->visible_views & (1u << i))
dst->frag_areas[i] = src->frag_areas[i];
if (((src->visible_views | dst->visible_views) & (1u << i)) &&
dst->frag_areas[i].height < extent.height)
dst->gmem_extent.height = height_multiplier;
}
dst->sysmem_extent = extent;
dst->visible_views = visible_views;
dst->pos = src->pos;
dst->slot_mask = slot_mask;
src->merged_tile = dst;
return true;
}
static void
tu_merge_tiles(struct tu_cmd_buffer *cmd, const struct tu_vsc_config *vsc,
struct tu_tile_config *tiles,
uint32_t tx1, uint32_t ty1, uint32_t tx2, uint32_t ty2)
{
bool has_abs_mask =
cmd->device->physical_device->info->props.has_abs_bin_mask;
unsigned views = tu_fdm_num_layers(cmd);
bool shared_viewport = cmd->state.rp.shared_viewport;
uint32_t width = vsc->tile_count.width;
for (uint32_t y = ty1; y < ty2; y++) {
for (uint32_t x = tx1; x < tx2; x++) {
struct tu_tile_config *tile =
&tiles[width * y + x];
if (tile->visible_views == 0)
continue;
if (x > tx1) {
struct tu_tile_config *prev_x_tile = &tiles[width * y + x - 1];
try_merge_tiles(tile, prev_x_tile, views, has_abs_mask,
shared_viewport);
}
if (y > ty1) {
unsigned prev_y_idx = width * (y - 1) + x;
struct tu_tile_config *prev_y_tile = &tiles[prev_y_idx];
/* We can't merge prev_y_tile into tile if it's already been
* merged horizontally into its neighbor in the previous row.
*/
if (!prev_y_tile->merged_tile) {
try_merge_tiles(tile, prev_y_tile, views, has_abs_mask,
shared_viewport);
}
}
}
}
}
static struct tu_tile_config *
tu_calc_tile_config(struct tu_cmd_buffer *cmd, const struct tu_vsc_config *vsc,
const struct tu_image_view *fdm, const VkOffset2D *fdm_offsets)
{
struct tu_tile_config *tiles = (struct tu_tile_config *)
calloc(vsc->tile_count.width * vsc->tile_count.height,
sizeof(struct tu_tile_config));
for (uint32_t py = 0; py < vsc->pipe_count.height; py++) {
uint32_t ty1 = py * vsc->pipe0.height;
uint32_t ty2 = MIN2(ty1 + vsc->pipe0.height, vsc->tile_count.height);
for (uint32_t px = 0; px < vsc->pipe_count.width; px++) {
uint32_t tx1 = px * vsc->pipe0.width;
uint32_t tx2 = MIN2(tx1 + vsc->pipe0.width, vsc->tile_count.width);
uint32_t pipe_width = tx2 - tx1;
uint32_t pipe = py * vsc->pipe_count.width + px;
/* Initialize tiles and sample fragment density map */
for (uint32_t y = ty1; y < ty2; y++) {
for (uint32_t x = tx1; x < tx2; x++) {
uint32_t tx = x - tx1;
uint32_t ty = y - ty1;
struct tu_tile_config *tile = &tiles[vsc->tile_count.width * y + x];
tile->pos = { x, y };
tile->sysmem_extent = { 1, 1 };
tile->gmem_extent = { 1, 1 };
tile->pipe = pipe;
tile->slot_mask = 1u << (pipe_width * ty + tx);
tile->merged_tile = NULL;
tu_calc_bin_visibility(cmd, tile, fdm_offsets);
tu_calc_frag_area(cmd, tile, fdm, fdm_offsets);
}
}
/* Merge tiles */
/* TODO: we should also be able to merge tiles when only
* per_view_render_areas is used without FDM. That requires using
* another method to force disable draws since we don't want to force
* the viewport to be re-emitted, like overriding the view mask. It
* would also require disabling stores, and adding patchpoints for
* CmdClearAttachments in secondaries or making it use the view mask.
*/
if (!TU_DEBUG(NO_BIN_MERGING) &&
cmd->device->physical_device->info->props.has_bin_mask) {
tu_merge_tiles(cmd, vsc, tiles, tx1, ty1, tx2, ty2);
}
}
}
return tiles;
}
static VkResult
tu_allocate_transient_attachments(struct tu_cmd_buffer *cmd, bool sysmem)
{

View file

@ -954,4 +954,43 @@ tu7_set_thread_br_patchpoint(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
bool force_disable_cb);
/* For bin offsetting we want to do "Euclidean division," where the remainder
* (i.e. the offset of the bin) is always positive. Unfortunately C/C++
* remainder and division don't do this, so we have to implement it ourselves.
*
* For example, we should have:
*
* euclid_rem(-3, 4) = 1
* euclid_rem(-4, 4) = 0
* euclid_rem(-4, 4) = 3
*/
static inline int32_t
euclid_rem(int32_t divisor, int32_t divisend)
{
if (divisor >= 0)
return divisor % divisend;
int32_t tmp = divisend - (-divisor % divisend);
return tmp == divisend ? 0 : tmp;
}
/* Calculate how much the bins for a given view should be shifted to the left
* and upwards, given the application-provided FDM offset.
*/
static inline VkOffset2D
tu_bin_offset(VkOffset2D fdm_offset, const struct tu_tiling_config *tiling)
{
return (VkOffset2D) {
euclid_rem(-fdm_offset.x, tiling->tile0.width),
euclid_rem(-fdm_offset.y, tiling->tile0.height),
};
}
static inline uint32_t
tu_fdm_num_layers(const struct tu_cmd_buffer *cmd)
{
return cmd->state.pass->num_views ? cmd->state.pass->num_views :
(cmd->state.fdm_per_layer ? cmd->state.framebuffer->layers : 1);
}
#endif /* TU_CMD_BUFFER_H */

View file

@ -0,0 +1,425 @@
/*
* Copyright © 2026 Valve Corporation.
* Copyright © 2016 Red Hat.
* Copyright © 2016 Bas Nieuwenhuizen
* SPDX-License-Identifier: MIT
*
* based in part on anv driver which is:
* Copyright © 2015 Intel Corporation
*/
#include "tu_cmd_buffer.h"
#include "tu_tile_config.h"
static void
tu_calc_frag_area(struct tu_cmd_buffer *cmd,
struct tu_tile_config *tile,
const struct tu_image_view *fdm,
const VkOffset2D *fdm_offsets)
{
const struct tu_tiling_config *tiling = cmd->state.tiling;
const uint32_t x1 = tiling->tile0.width * tile->pos.x;
const uint32_t y1 = tiling->tile0.height * tile->pos.y;
const uint32_t x2 = MIN2(x1 + tiling->tile0.width, MAX_VIEWPORT_SIZE);
const uint32_t y2 = MIN2(y1 + tiling->tile0.height, MAX_VIEWPORT_SIZE);
unsigned views = tu_fdm_num_layers(cmd);
const struct tu_framebuffer *fb = cmd->state.framebuffer;
struct tu_frag_area raw_areas[views];
if (fdm) {
for (unsigned i = 0; i < views; i++) {
VkOffset2D sample_pos = { 0, 0 };
/* Offsets less than a tile size are accomplished by sliding the
* tiles. However once we shift a whole tile size then we reset the
* tiles back to where they were at the beginning and we need to
* adjust where each bin is sampling from:
*
* x offset = 0:
*
* ------------------------------------
* | * | * | * | (unused) |
* ------------------------------------
*
* x offset = 4:
*
* -------------------------
* | * | * | * | * |
* -------------------------
*
* x offset = 8:
*
* ------------------------------------
* | * | * | * | (unused) |
* ------------------------------------
*
* As the user's offset increases we slide the tiles to the right,
* until we reach the whole tile size and reset the tile positions.
* tu_bin_offset() returns an amount to shift to the left, negating
* the offset.
*
* If we were forced to use a shared viewport, then we must not shift
* over the tiles and instead must only shift when sampling because
* we cannot shift the tiles differently per view. This disables
* smooth transitions of the fragment density map and effectively
* negates the extension.
*
* Note that we cannot clamp x2/y2 to the framebuffer size, as we
* normally would do, because then tiles along the edge would
* incorrectly nudge the sample_pos towards the center of the
* framebuffer. If we shift one complete tile over towards the
* center and reset the tiles as above, the sample_pos would
* then shift back towards the edge and we could get a "pop" from
* suddenly changing density due to the slight shift.
*/
if (fdm_offsets) {
VkOffset2D offset = fdm_offsets[i];
if (!cmd->state.rp.shared_viewport) {
VkOffset2D bin_offset = tu_bin_offset(fdm_offsets[i], tiling);
offset.x += bin_offset.x;
offset.y += bin_offset.y;
}
sample_pos.x = (x1 + x2) / 2 - offset.x;
sample_pos.y = (y1 + y2) / 2 - offset.y;
} else {
sample_pos.x = (x1 + MIN2(x2, fb->width)) / 2;
sample_pos.y = (y1 + MIN2(y2, fb->height)) / 2;
}
tu_fragment_density_map_sample(fdm,
sample_pos.x,
sample_pos.y,
fb->width, fb->height, i,
&raw_areas[i]);
}
} else {
for (unsigned i = 0; i < views; i++)
raw_areas[i].width = raw_areas[i].height = 1.0f;
}
for (unsigned i = 0; i < views; i++) {
float floor_x, floor_y;
float area = raw_areas[i].width * raw_areas[i].height;
float frac_x = modff(raw_areas[i].width, &floor_x);
float frac_y = modff(raw_areas[i].height, &floor_y);
/* The Vulkan spec says that a density of 0 results in an undefined
* fragment area. However the blob driver skips rendering tiles with 0
* density, and apps rely on that behavior. Replicate that here.
*/
if (!isfinite(area)) {
tile->frag_areas[i].width = UINT32_MAX;
tile->frag_areas[i].height = UINT32_MAX;
tile->visible_views &= ~(1u << i);
continue;
}
/* The spec allows rounding up one of the axes as long as the total
* area is less than or equal to the original area. Take advantage of
* this to try rounding up the number with the largest fraction.
*/
if ((frac_x > frac_y ? (floor_x + 1.f) * floor_y :
floor_x * (floor_y + 1.f)) <= area) {
if (frac_x > frac_y)
floor_x += 1.f;
else
floor_y += 1.f;
}
uint32_t width = floor_x;
uint32_t height = floor_y;
/* Areas that aren't a power of two, especially large areas, can create
* in floating-point rounding errors when dividing by the area in the
* viewport that result in under-rendering. Round down to a power of two
* to make sure all operations are exact.
*/
width = 1u << util_logbase2(width);
height = 1u << util_logbase2(height);
/* When FDM offset is enabled, the fragment area has to divide the
* offset to make sure that we don't have tiles with partial fragments.
* It would be bad to have the fragment area change as a function of the
* offset, because we'd get "popping" as the resolution changes with the
* offset, so just make sure it divides the offset granularity. This
* should mean it always divides the offset for any possible offset.
*/
if (fdm_offsets) {
width = MIN2(width, TU_FDM_OFFSET_GRANULARITY);
height = MIN2(height, TU_FDM_OFFSET_GRANULARITY);
}
/* HW viewport scaling supports a maximum fragment width/height of 4.
*/
if (views <= MAX_HW_SCALED_VIEWS) {
width = MIN2(width, 4);
height = MIN2(height, 4);
}
/* Make sure that the width/height divides the tile width/height so
* we don't have to do extra awkward clamping of the edges of each
* bin when resolving. It also has to divide the fdm offset, if any.
* Note that because the tile width is rounded to a multiple of 32 any
* power of two 32 or less will work, and if there is an offset then it
* must be a multiple of 4 so 2 or 4 will definitely work.
*
* TODO: Try to take advantage of the total area allowance here, too.
*/
while (tiling->tile0.width % width != 0)
width /= 2;
while (tiling->tile0.height % height != 0)
height /= 2;
tile->frag_areas[i].width = width;
tile->frag_areas[i].height = height;
}
/* If at any point we were forced to use the same scaling for all
* viewports, we need to make sure that any users *not* using shared
* scaling, including loads/stores, also consistently share the scaling.
*/
if (cmd->state.rp.shared_viewport) {
VkExtent2D frag_area = { UINT32_MAX, UINT32_MAX };
for (unsigned i = 0; i < views; i++) {
frag_area.width = MIN2(frag_area.width, tile->frag_areas[i].width);
frag_area.height = MIN2(frag_area.height, tile->frag_areas[i].height);
}
for (unsigned i = 0; i < views; i++)
tile->frag_areas[i] = frag_area;
}
}
static bool
rects_intersect(VkRect2D a, VkRect2D b)
{
return a.offset.x < b.offset.x + (int32_t)b.extent.width &&
b.offset.x < a.offset.x + (int32_t)a.extent.width &&
a.offset.y < b.offset.y + (int32_t)b.extent.height &&
b.offset.y < a.offset.y + (int32_t)a.extent.height;
}
/* Use the render area(s) to figure out which views of the bin are visible.
*/
void
tu_calc_bin_visibility(struct tu_cmd_buffer *cmd,
struct tu_tile_config *tile,
const VkOffset2D *offsets)
{
const struct tu_tiling_config *tiling = cmd->state.tiling;
uint32_t views = tu_fdm_num_layers(cmd);
VkRect2D bin = {
{
tile->pos.x * tiling->tile0.width,
tile->pos.y * tiling->tile0.height
},
tiling->tile0
};
tile->visible_views = 0;
for (unsigned i = 0; i < views; i++) {
VkRect2D offsetted_bin = bin;
if (offsets && !cmd->state.rp.shared_viewport) {
VkOffset2D bin_offset = tu_bin_offset(offsets[i], tiling);
offsetted_bin.offset.x -= bin_offset.x;
offsetted_bin.offset.y -= bin_offset.y;
}
if (rects_intersect(offsetted_bin,
cmd->state.per_layer_render_area ?
cmd->state.render_areas[i] :
cmd->state.render_areas[0])) {
tile->visible_views |= (1u << i);
}
}
}
static bool
try_merge_tiles(struct tu_tile_config *dst, struct tu_tile_config *src,
unsigned views, bool has_abs_bin_mask, bool shared_viewport)
{
uint32_t slot_mask = dst->slot_mask | src->slot_mask;
uint32_t visible_views = dst->visible_views | src->visible_views;
/* The fragment areas must be the same for views where both bins are
* visible.
*/
for (unsigned i = 0; i < views; i++) {
if ((dst->visible_views & src->visible_views & (1u << i)) &&
(dst->frag_areas[i].width != src->frag_areas[i].width ||
dst->frag_areas[i].height != src->frag_areas[i].height))
return false;
}
/* The tiles must be vertically or horizontally adjacent and have the
* compatible width/height.
*/
if (dst->pos.x == src->pos.x) {
if (dst->sysmem_extent.height != src->sysmem_extent.height)
return false;
} else if (dst->pos.y == src->pos.y) {
if (dst->sysmem_extent.width != src->sysmem_extent.width)
return false;
} else {
return false;
}
if (dst->gmem_extent.width != src->gmem_extent.width ||
dst->gmem_extent.height != src->gmem_extent.height)
return false;
if (!has_abs_bin_mask) {
/* The mask of the combined tile has to fit in 16 bits */
uint32_t hw_mask = slot_mask >> (ffs(slot_mask) - 1);
if ((hw_mask & 0xffff) != hw_mask)
return false;
}
/* Note, this assumes that dst is below or to the right of src, which is
* how we call this function below.
*/
VkExtent2D extent = {
dst->sysmem_extent.width + (dst->pos.x - src->pos.x),
dst->sysmem_extent.height + (dst->pos.y - src->pos.y),
};
assert(dst->sysmem_extent.height > 0);
/* If only the first view is visible in both tiles, we can reuse the GMEM
* space meant for the rest of the views to multiply the height of the
* tile. We can't do this if we can't override the scissor for different
* views though.
*/
unsigned height_multiplier = 1;
if (visible_views == 1 && views > 1 && dst->gmem_extent.height == 1 &&
!shared_viewport)
height_multiplier = views;
else
height_multiplier = dst->gmem_extent.height;
/* The combined fragment areas must not be smaller than the combined bin
* extent, so that the combined bin is not larger than the original
* unscaled bin.
*/
for (unsigned i = 0; i < views; i++) {
if ((dst->visible_views & (1u << i)) &&
(dst->frag_areas[i].width < extent.width ||
dst->frag_areas[i].height * height_multiplier < extent.height))
return false;
if ((src->visible_views & (1u << i)) &&
(src->frag_areas[i].width < extent.width ||
src->frag_areas[i].height * height_multiplier < extent.height))
return false;
}
/* Ok, let's combine them. dst is below or to the right of src, so it takes
* src's position.
*/
for (unsigned i = 0; i < views; i++) {
if (src->visible_views & ~dst->visible_views & (1u << i))
dst->frag_areas[i] = src->frag_areas[i];
if (((src->visible_views | dst->visible_views) & (1u << i)) &&
dst->frag_areas[i].height < extent.height)
dst->gmem_extent.height = height_multiplier;
}
dst->sysmem_extent = extent;
dst->visible_views = visible_views;
dst->pos = src->pos;
dst->slot_mask = slot_mask;
src->merged_tile = dst;
return true;
}
static void
tu_merge_tiles(struct tu_cmd_buffer *cmd, const struct tu_vsc_config *vsc,
struct tu_tile_config *tiles,
uint32_t tx1, uint32_t ty1, uint32_t tx2, uint32_t ty2)
{
bool has_abs_mask =
cmd->device->physical_device->info->props.has_abs_bin_mask;
unsigned views = tu_fdm_num_layers(cmd);
bool shared_viewport = cmd->state.rp.shared_viewport;
uint32_t width = vsc->tile_count.width;
for (uint32_t y = ty1; y < ty2; y++) {
for (uint32_t x = tx1; x < tx2; x++) {
struct tu_tile_config *tile =
&tiles[width * y + x];
if (tile->visible_views == 0)
continue;
if (x > tx1) {
struct tu_tile_config *prev_x_tile = &tiles[width * y + x - 1];
try_merge_tiles(tile, prev_x_tile, views, has_abs_mask,
shared_viewport);
}
if (y > ty1) {
unsigned prev_y_idx = width * (y - 1) + x;
struct tu_tile_config *prev_y_tile = &tiles[prev_y_idx];
/* We can't merge prev_y_tile into tile if it's already been
* merged horizontally into its neighbor in the previous row.
*/
if (!prev_y_tile->merged_tile) {
try_merge_tiles(tile, prev_y_tile, views, has_abs_mask,
shared_viewport);
}
}
}
}
}
struct tu_tile_config *
tu_calc_tile_config(struct tu_cmd_buffer *cmd, const struct tu_vsc_config *vsc,
const struct tu_image_view *fdm, const VkOffset2D *fdm_offsets)
{
struct tu_tile_config *tiles = (struct tu_tile_config *)
calloc(vsc->tile_count.width * vsc->tile_count.height,
sizeof(struct tu_tile_config));
for (uint32_t py = 0; py < vsc->pipe_count.height; py++) {
uint32_t ty1 = py * vsc->pipe0.height;
uint32_t ty2 = MIN2(ty1 + vsc->pipe0.height, vsc->tile_count.height);
for (uint32_t px = 0; px < vsc->pipe_count.width; px++) {
uint32_t tx1 = px * vsc->pipe0.width;
uint32_t tx2 = MIN2(tx1 + vsc->pipe0.width, vsc->tile_count.width);
uint32_t pipe_width = tx2 - tx1;
uint32_t pipe = py * vsc->pipe_count.width + px;
/* Initialize tiles and sample fragment density map */
for (uint32_t y = ty1; y < ty2; y++) {
for (uint32_t x = tx1; x < tx2; x++) {
uint32_t tx = x - tx1;
uint32_t ty = y - ty1;
struct tu_tile_config *tile = &tiles[vsc->tile_count.width * y + x];
tile->pos = { x, y };
tile->sysmem_extent = { 1, 1 };
tile->gmem_extent = { 1, 1 };
tile->pipe = pipe;
tile->slot_mask = 1u << (pipe_width * ty + tx);
tile->merged_tile = NULL;
tu_calc_bin_visibility(cmd, tile, fdm_offsets);
tu_calc_frag_area(cmd, tile, fdm, fdm_offsets);
}
}
/* Merge tiles */
/* TODO: we should also be able to merge tiles when only
* per_view_render_areas is used without FDM. That requires using
* another method to force disable draws since we don't want to force
* the viewport to be re-emitted, like overriding the view mask. It
* would also require disabling stores, and adding patchpoints for
* CmdClearAttachments in secondaries or making it use the view mask.
*/
if (!TU_DEBUG(NO_BIN_MERGING) &&
cmd->device->physical_device->info->props.has_bin_mask) {
tu_merge_tiles(cmd, vsc, tiles, tx1, ty1, tx2, ty2);
}
}
}
return tiles;
}

View file

@ -0,0 +1,46 @@
/*
* Copyright © 2026 Valve Corporation.
* Copyright © 2016 Red Hat.
* Copyright © 2016 Bas Nieuwenhuizen
* SPDX-License-Identifier: MIT
*
* based in part on anv driver which is:
* Copyright © 2015 Intel Corporation
*/
#include "tu_common.h"
#ifndef TU_TILE_CONFIG_H
#define TU_TILE_CONFIG_H
struct tu_tile_config {
VkOffset2D pos;
uint32_t pipe;
uint32_t slot_mask;
uint32_t visible_views;
/* The tile this tile was merged with. */
struct tu_tile_config *merged_tile;
/* For merged tiles, the extent in tiles when resolved to system memory.
*/
VkExtent2D sysmem_extent;
/* For merged tiles, the extent in tiles in GMEM. This can only be more
* than 1 if there is extra free space from an unused view.
*/
VkExtent2D gmem_extent;
VkExtent2D frag_areas[MAX_VIEWS];
};
struct tu_tile_config *
tu_calc_tile_config(struct tu_cmd_buffer *cmd, const struct tu_vsc_config *vsc,
const struct tu_image_view *fdm, const VkOffset2D *fdm_offsets);
void
tu_calc_bin_visibility(struct tu_cmd_buffer *cmd,
struct tu_tile_config *tile,
const VkOffset2D *offsets);
#endif