tu: Move FDM tile configuration to a new file

This is a well-isolated part of tu_cmd_buffer.cc. Split it out before expanding it even further for subsampled images. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39868>
2026-05-07 04:58:05 +02:00 · 2026-01-28 11:06:13 -05:00 · 2026-01-28 11:06:13 -05:00 · 2ddb70444e
commit 2ddb70444e
parent 5a8ee1a067
5 changed files with 512 additions and 471 deletions
--- a/src/freedreno/vulkan/meson.build
+++ b/src/freedreno/vulkan/meson.build
@ -48,6 +48,7 @@ libtu_files = files(
  'tu_rmv.cc',
  'tu_shader.cc',
  'tu_suballoc.cc',
+  'tu_tile_config.cc',
  'tu_util.cc',
 )

--- a/src/freedreno/vulkan/tu_cmd_buffer.cc
+++ b/src/freedreno/vulkan/tu_cmd_buffer.cc
@ -20,6 +20,7 @@
 #include "tu_event.h"
 #include "tu_image.h"
 #include "tu_knl.h"
+#include "tu_tile_config.h"
 #include "tu_tracepoints.h"

 #include "common/freedreno_gpu_event.h"
@ -1310,13 +1311,6 @@ use_hw_binning(struct tu_cmd_buffer *cmd)
   return vsc->binning;
 }

-static uint32_t
-tu_fdm_num_layers(const struct tu_cmd_buffer *cmd)
-{
-   return cmd->state.pass->num_views ? cmd->state.pass->num_views : 
-      (cmd->state.fdm_per_layer ? cmd->state.framebuffer->layers : 1);
-}
-
 static bool
 use_sysmem_rendering(struct tu_cmd_buffer *cmd,
                     struct tu_renderpass_result **autotune_result)
@ -1424,59 +1418,6 @@ tu6_emit_cond_for_load_stores(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
   }
 }

-struct tu_tile_config {
-   VkOffset2D pos;
-   uint32_t pipe;
-   uint32_t slot_mask;
-   uint32_t visible_views;
-
-   /* The tile this tile was merged with. */
-   struct tu_tile_config *merged_tile;
-
-   /* For merged tiles, the extent in tiles when resolved to system memory.
-    */
-   VkExtent2D sysmem_extent;
-
-   /* For merged tiles, the extent in tiles in GMEM. This can only be more
-    * than 1 if there is extra free space from an unused view.
-    */
-   VkExtent2D gmem_extent;
-
-   VkExtent2D frag_areas[MAX_VIEWS];
-};
-
-/* For bin offsetting we want to do "Euclidean division," where the remainder
- * (i.e. the offset of the bin) is always positive. Unfortunately C/C++
- * remainder and division don't do this, so we have to implement it ourselves.
- *
- * For example, we should have:
- *
- * euclid_rem(-3, 4) = 1
- * euclid_rem(-4, 4) = 0
- * euclid_rem(-4, 4) = 3
- */
-
-static int32_t
-euclid_rem(int32_t divisor, int32_t divisend)
-{
-   if (divisor >= 0)
-      return divisor % divisend;
-   int32_t tmp = divisend - (-divisor % divisend);
-   return tmp == divisend ? 0 : tmp;
-}
-
-/* Calculate how much the bins for a given view should be shifted to the left
- * and upwards, given the application-provided FDM offset.
- */
-static VkOffset2D
-tu_bin_offset(VkOffset2D fdm_offset, const struct tu_tiling_config *tiling)
-{
-   return (VkOffset2D) {
-      euclid_rem(-fdm_offset.x, tiling->tile0.width),
-      euclid_rem(-fdm_offset.y, tiling->tile0.height),
-   };
-}
-
 template <chip CHIP>
 static void
 tu6_emit_bin_size_gmem(struct tu_cmd_buffer *cmd,
@ -3673,184 +3614,6 @@ tu6_tile_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
   tu_cs_sanity_check(cs);
 }

-static void
-tu_calc_frag_area(struct tu_cmd_buffer *cmd,
-                  struct tu_tile_config *tile,
-                  const struct tu_image_view *fdm,
-                  const VkOffset2D *fdm_offsets)
-{
-   const struct tu_tiling_config *tiling = cmd->state.tiling;
-   const uint32_t x1 = tiling->tile0.width * tile->pos.x;
-   const uint32_t y1 = tiling->tile0.height * tile->pos.y;
-   const uint32_t x2 = MIN2(x1 + tiling->tile0.width, MAX_VIEWPORT_SIZE);
-   const uint32_t y2 = MIN2(y1 + tiling->tile0.height, MAX_VIEWPORT_SIZE);
-
-   unsigned views = tu_fdm_num_layers(cmd);
-   const struct tu_framebuffer *fb = cmd->state.framebuffer;
-   struct tu_frag_area raw_areas[views];
-   if (fdm) {
-      for (unsigned i = 0; i < views; i++) {
-         VkOffset2D sample_pos = { 0, 0 };
-
-         /* Offsets less than a tile size are accomplished by sliding the
-          * tiles.  However once we shift a whole tile size then we reset the
-          * tiles back to where they were at the beginning and we need to
-          * adjust where each bin is sampling from:
-          *
-          * x offset = 0:
-          *
-          * ------------------------------------
-          * |   *   |   *   |   *   | (unused) |
-          * ------------------------------------
-          *
-          * x offset = 4:
-          *
-          * -------------------------
-          * | * |   *   |   *   | * |
-          * -------------------------
-          *
-          * x offset = 8:
-          *
-          * ------------------------------------
-          * |   *   |   *   |   *   | (unused) |
-          * ------------------------------------
-          *
-          * As the user's offset increases we slide the tiles to the right,
-          * until we reach the whole tile size and reset the tile positions.
-          * tu_bin_offset() returns an amount to shift to the left, negating
-          * the offset.
-          *
-          * If we were forced to use a shared viewport, then we must not shift
-          * over the tiles and instead must only shift when sampling because
-          * we cannot shift the tiles differently per view. This disables
-          * smooth transitions of the fragment density map and effectively
-          * negates the extension.
-          *
-          * Note that we cannot clamp x2/y2 to the framebuffer size, as we
-          * normally would do, because then tiles along the edge would
-          * incorrectly nudge the sample_pos towards the center of the
-          * framebuffer. If we shift one complete tile over towards the
-          * center and reset the tiles as above, the sample_pos would
-          * then shift back towards the edge and we could get a "pop" from
-          * suddenly changing density due to the slight shift.
-          */
-         if (fdm_offsets) {
-            VkOffset2D offset = fdm_offsets[i];
-            if (!cmd->state.rp.shared_viewport) {
-               VkOffset2D bin_offset = tu_bin_offset(fdm_offsets[i], tiling);
-               offset.x += bin_offset.x;
-               offset.y += bin_offset.y;
-            }
-            sample_pos.x = (x1 + x2) / 2 - offset.x;
-            sample_pos.y = (y1 + y2) / 2 - offset.y;
-         } else {
-            sample_pos.x = (x1 + MIN2(x2, fb->width)) / 2;
-            sample_pos.y = (y1 + MIN2(y2, fb->height)) / 2;
-         }
-
-         tu_fragment_density_map_sample(fdm,
-                                        sample_pos.x,
-                                        sample_pos.y,
-                                        fb->width, fb->height, i,
-                                        &raw_areas[i]);
-      }
-   } else {
-      for (unsigned i = 0; i < views; i++)
-         raw_areas[i].width = raw_areas[i].height = 1.0f;
-   }
-
-   for (unsigned i = 0; i < views; i++) {
-      float floor_x, floor_y;
-      float area = raw_areas[i].width * raw_areas[i].height;
-      float frac_x = modff(raw_areas[i].width, &floor_x);
-      float frac_y = modff(raw_areas[i].height, &floor_y);
-
-      /* The Vulkan spec says that a density of 0 results in an undefined
-       * fragment area. However the blob driver skips rendering tiles with 0
-       * density, and apps rely on that behavior. Replicate that here.
-       */
-      if (!isfinite(area)) {
-         tile->frag_areas[i].width = UINT32_MAX;
-         tile->frag_areas[i].height = UINT32_MAX;
-         tile->visible_views &= ~(1u << i);
-         continue;
-      }
-
-      /* The spec allows rounding up one of the axes as long as the total
-       * area is less than or equal to the original area. Take advantage of
-       * this to try rounding up the number with the largest fraction.
-       */
-      if ((frac_x > frac_y ? (floor_x + 1.f) * floor_y :
-                              floor_x * (floor_y + 1.f)) <= area) {
-         if (frac_x > frac_y)
-            floor_x += 1.f;
-         else
-            floor_y += 1.f;
-      }
-      uint32_t width = floor_x;
-      uint32_t height = floor_y;
-
-      /* Areas that aren't a power of two, especially large areas, can create
-       * in floating-point rounding errors when dividing by the area in the
-       * viewport that result in under-rendering. Round down to a power of two
-       * to make sure all operations are exact.
-       */
-      width = 1u << util_logbase2(width);
-      height = 1u << util_logbase2(height);
-
-      /* When FDM offset is enabled, the fragment area has to divide the
-       * offset to make sure that we don't have tiles with partial fragments.
-       * It would be bad to have the fragment area change as a function of the
-       * offset, because we'd get "popping" as the resolution changes with the
-       * offset, so just make sure it divides the offset granularity. This
-       * should mean it always divides the offset for any possible offset.
-       */
-      if (fdm_offsets) {
-         width = MIN2(width, TU_FDM_OFFSET_GRANULARITY);
-         height = MIN2(height, TU_FDM_OFFSET_GRANULARITY);
-      }
-
-      /* HW viewport scaling supports a maximum fragment width/height of 4.
-       */
-      if (views <= MAX_HW_SCALED_VIEWS) {
-         width = MIN2(width, 4);
-         height = MIN2(height, 4);
-      }
-
-      /* Make sure that the width/height divides the tile width/height so
-       * we don't have to do extra awkward clamping of the edges of each
-       * bin when resolving. It also has to divide the fdm offset, if any.
-       * Note that because the tile width is rounded to a multiple of 32 any
-       * power of two 32 or less will work, and if there is an offset then it
-       * must be a multiple of 4 so 2 or 4 will definitely work.
-       *
-       * TODO: Try to take advantage of the total area allowance here, too.
-       */
-      while (tiling->tile0.width % width != 0)
-         width /= 2;
-      while (tiling->tile0.height % height != 0)
-         height /= 2;
-
-      tile->frag_areas[i].width = width;
-      tile->frag_areas[i].height = height;
-   }
-
-   /* If at any point we were forced to use the same scaling for all
-    * viewports, we need to make sure that any users *not* using shared
-    * scaling, including loads/stores, also consistently share the scaling. 
-    */
-   if (cmd->state.rp.shared_viewport) {
-      VkExtent2D frag_area = { UINT32_MAX, UINT32_MAX };
-      for (unsigned i = 0; i < views; i++) {
-         frag_area.width = MIN2(frag_area.width, tile->frag_areas[i].width);
-         frag_area.height = MIN2(frag_area.height, tile->frag_areas[i].height);
-      }
-
-      for (unsigned i = 0; i < views; i++)
-         tile->frag_areas[i] = frag_area;
-   }
-}
-
 static void
 tu_identity_frag_area(struct tu_cmd_buffer *cmd,
                      struct tu_tile_config *tile)
@ -3859,239 +3622,6 @@ tu_identity_frag_area(struct tu_cmd_buffer *cmd,
      tile->frag_areas[i] = (VkExtent2D) { 1, 1 };
 }

-static bool
-rects_intersect(VkRect2D a, VkRect2D b)
-{
-   return a.offset.x < b.offset.x + (int32_t)b.extent.width &&
-          b.offset.x < a.offset.x + (int32_t)a.extent.width &&
-          a.offset.y < b.offset.y + (int32_t)b.extent.height &&
-          b.offset.y < a.offset.y + (int32_t)a.extent.height;
-}
-
-/* Use the render area(s) to figure out which views of the bin are visible.
- */
-static void
-tu_calc_bin_visibility(struct tu_cmd_buffer *cmd,
-                       struct tu_tile_config *tile,
-                       const VkOffset2D *offsets)
-{
-   const struct tu_tiling_config *tiling = cmd->state.tiling;
-   uint32_t views = tu_fdm_num_layers(cmd);
-   VkRect2D bin = {
-      {
-         tile->pos.x * tiling->tile0.width,
-         tile->pos.y * tiling->tile0.height
-      },
-      tiling->tile0
-   };
-
-   tile->visible_views = 0;
-   for (unsigned i = 0; i < views; i++) {
-      VkRect2D offsetted_bin = bin;
-      if (offsets && !cmd->state.rp.shared_viewport) {
-         VkOffset2D bin_offset = tu_bin_offset(offsets[i], tiling);
-         offsetted_bin.offset.x -= bin_offset.x;
-         offsetted_bin.offset.y -= bin_offset.y;
-      }
-
-      if (rects_intersect(offsetted_bin,
-                          cmd->state.per_layer_render_area ?
-                          cmd->state.render_areas[i] :
-                          cmd->state.render_areas[0])) {
-         tile->visible_views |= (1u << i);
-      }
-   }
-}
-
-static bool
-try_merge_tiles(struct tu_tile_config *dst, struct tu_tile_config *src,
-                unsigned views, bool has_abs_bin_mask, bool shared_viewport)
-{
-   uint32_t slot_mask = dst->slot_mask | src->slot_mask;
-   uint32_t visible_views = dst->visible_views | src->visible_views;
-
-   /* The fragment areas must be the same for views where both bins are
-    * visible.
-    */
-   for (unsigned i = 0; i < views; i++) {
-      if ((dst->visible_views & src->visible_views & (1u << i)) &&
-          (dst->frag_areas[i].width != src->frag_areas[i].width ||
-           dst->frag_areas[i].height != src->frag_areas[i].height))
-         return false;
-   }
-
-   /* The tiles must be vertically or horizontally adjacent and have the
-    * compatible width/height.
-    */
-   if (dst->pos.x == src->pos.x) {
-      if (dst->sysmem_extent.height != src->sysmem_extent.height)
-         return false;
-   } else if (dst->pos.y == src->pos.y) {
-      if (dst->sysmem_extent.width != src->sysmem_extent.width)
-         return false;
-   } else {
-      return false;
-   }
-
-   if (dst->gmem_extent.width != src->gmem_extent.width ||
-       dst->gmem_extent.height != src->gmem_extent.height)
-      return false;
-
-   if (!has_abs_bin_mask) {
-      /* The mask of the combined tile has to fit in 16 bits */
-      uint32_t hw_mask = slot_mask >> (ffs(slot_mask) - 1);
-      if ((hw_mask & 0xffff) != hw_mask)
-         return false;
-   }
-
-   /* Note, this assumes that dst is below or to the right of src, which is
-    * how we call this function below.
-    */
-   VkExtent2D extent = {
-      dst->sysmem_extent.width + (dst->pos.x - src->pos.x),
-      dst->sysmem_extent.height + (dst->pos.y - src->pos.y),
-   };
-
-   assert(dst->sysmem_extent.height > 0);
-
-   /* If only the first view is visible in both tiles, we can reuse the GMEM
-    * space meant for the rest of the views to multiply the height of the
-    * tile. We can't do this if we can't override the scissor for different
-    * views though.
-    */
-   unsigned height_multiplier = 1;
-   if (visible_views == 1 && views > 1 && dst->gmem_extent.height == 1 &&
-       !shared_viewport)
-      height_multiplier = views;
-   else
-      height_multiplier = dst->gmem_extent.height;
-
-   /* The combined fragment areas must not be smaller than the combined bin
-    * extent, so that the combined bin is not larger than the original
-    * unscaled bin.
-    */
-   for (unsigned i = 0; i < views; i++) {
-      if ((dst->visible_views & (1u << i)) &&
-          (dst->frag_areas[i].width < extent.width ||
-           dst->frag_areas[i].height * height_multiplier < extent.height))
-         return false;
-      if ((src->visible_views & (1u << i)) &&
-          (src->frag_areas[i].width < extent.width ||
-           src->frag_areas[i].height * height_multiplier < extent.height))
-         return false;
-   }
-
-   /* Ok, let's combine them. dst is below or to the right of src, so it takes
-    * src's position.
-    */
-   for (unsigned i = 0; i < views; i++) {
-      if (src->visible_views & ~dst->visible_views & (1u << i))
-         dst->frag_areas[i] = src->frag_areas[i];
-      if (((src->visible_views | dst->visible_views) & (1u << i)) &&
-          dst->frag_areas[i].height < extent.height)
-         dst->gmem_extent.height = height_multiplier;
-   }
-   dst->sysmem_extent = extent;
-   dst->visible_views = visible_views;
-   dst->pos = src->pos;
-   dst->slot_mask = slot_mask;
-
-   src->merged_tile = dst;
-
-   return true;
-}
-
-static void
-tu_merge_tiles(struct tu_cmd_buffer *cmd, const struct tu_vsc_config *vsc,
-               struct tu_tile_config *tiles,
-               uint32_t tx1, uint32_t ty1, uint32_t tx2, uint32_t ty2)
-{
-   bool has_abs_mask =
-      cmd->device->physical_device->info->props.has_abs_bin_mask;
-   unsigned views = tu_fdm_num_layers(cmd);
-   bool shared_viewport = cmd->state.rp.shared_viewport;
-   uint32_t width = vsc->tile_count.width;
-
-   for (uint32_t y = ty1; y < ty2; y++) {
-      for (uint32_t x = tx1; x < tx2; x++) {
-         struct tu_tile_config *tile =
-            &tiles[width * y + x];
-         if (tile->visible_views == 0)
-            continue;
-         if (x > tx1) {
-            struct tu_tile_config *prev_x_tile = &tiles[width * y + x - 1];
-            try_merge_tiles(tile, prev_x_tile, views, has_abs_mask,
-                            shared_viewport);
-         }
-         if (y > ty1) {
-            unsigned prev_y_idx = width * (y - 1) + x;
-            struct tu_tile_config *prev_y_tile = &tiles[prev_y_idx];
-
-            /* We can't merge prev_y_tile into tile if it's already been
-             * merged horizontally into its neighbor in the previous row.
-             */
-            if (!prev_y_tile->merged_tile) {
-               try_merge_tiles(tile, prev_y_tile, views, has_abs_mask,
-                               shared_viewport);
-            }
-         }
-      }
-   }
-}
-
-static struct tu_tile_config *
-tu_calc_tile_config(struct tu_cmd_buffer *cmd, const struct tu_vsc_config *vsc,
-                    const struct tu_image_view *fdm, const VkOffset2D *fdm_offsets)
-{
-   struct tu_tile_config *tiles = (struct tu_tile_config *)
-      calloc(vsc->tile_count.width * vsc->tile_count.height,
-             sizeof(struct tu_tile_config));
-
-   for (uint32_t py = 0; py < vsc->pipe_count.height; py++) {
-      uint32_t ty1 = py * vsc->pipe0.height;
-      uint32_t ty2 = MIN2(ty1 + vsc->pipe0.height, vsc->tile_count.height);
-      for (uint32_t px = 0; px < vsc->pipe_count.width; px++) {
-         uint32_t tx1 = px * vsc->pipe0.width;
-         uint32_t tx2 = MIN2(tx1 + vsc->pipe0.width, vsc->tile_count.width);
-         uint32_t pipe_width = tx2 - tx1;
-         uint32_t pipe = py * vsc->pipe_count.width + px;
-
-         /* Initialize tiles and sample fragment density map */
-         for (uint32_t y = ty1; y < ty2; y++) {
-            for (uint32_t x = tx1; x < tx2; x++) {
-               uint32_t tx = x - tx1;
-               uint32_t ty = y - ty1;
-               struct tu_tile_config *tile = &tiles[vsc->tile_count.width * y + x];
-
-               tile->pos = { x, y };
-               tile->sysmem_extent = { 1, 1 };
-               tile->gmem_extent = { 1, 1 };
-               tile->pipe = pipe;
-               tile->slot_mask = 1u << (pipe_width * ty + tx);
-               tile->merged_tile = NULL;
-               tu_calc_bin_visibility(cmd, tile, fdm_offsets);
-               tu_calc_frag_area(cmd, tile, fdm, fdm_offsets);
-            }
-         }
-
-         /* Merge tiles */
-         /* TODO: we should also be able to merge tiles when only
-          * per_view_render_areas is used without FDM. That requires using
-          * another method to force disable draws since we don't want to force
-          * the viewport to be re-emitted, like overriding the view mask. It
-          * would also require disabling stores, and adding patchpoints for
-          * CmdClearAttachments in secondaries or making it use the view mask.
-          */
-         if (!TU_DEBUG(NO_BIN_MERGING) &&
-             cmd->device->physical_device->info->props.has_bin_mask) {
-            tu_merge_tiles(cmd, vsc, tiles, tx1, ty1, tx2, ty2);
-         }
-      }
-   }
-
-   return tiles;
-}
-
 static VkResult
 tu_allocate_transient_attachments(struct tu_cmd_buffer *cmd, bool sysmem)
 {
--- a/src/freedreno/vulkan/tu_cmd_buffer.h
+++ b/src/freedreno/vulkan/tu_cmd_buffer.h
@ -954,4 +954,43 @@ tu7_set_thread_br_patchpoint(struct tu_cmd_buffer *cmd,
                             struct tu_cs *cs,
                             bool force_disable_cb);

+/* For bin offsetting we want to do "Euclidean division," where the remainder
+ * (i.e. the offset of the bin) is always positive. Unfortunately C/C++
+ * remainder and division don't do this, so we have to implement it ourselves.
+ *
+ * For example, we should have:
+ *
+ * euclid_rem(-3, 4) = 1
+ * euclid_rem(-4, 4) = 0
+ * euclid_rem(-4, 4) = 3
+ */
+
+static inline int32_t
+euclid_rem(int32_t divisor, int32_t divisend)
+{
+   if (divisor >= 0)
+      return divisor % divisend;
+   int32_t tmp = divisend - (-divisor % divisend);
+   return tmp == divisend ? 0 : tmp;
+}
+
+/* Calculate how much the bins for a given view should be shifted to the left
+ * and upwards, given the application-provided FDM offset.
+ */
+static inline VkOffset2D
+tu_bin_offset(VkOffset2D fdm_offset, const struct tu_tiling_config *tiling)
+{
+   return (VkOffset2D) {
+      euclid_rem(-fdm_offset.x, tiling->tile0.width),
+      euclid_rem(-fdm_offset.y, tiling->tile0.height),
+   };
+}
+
+static inline uint32_t
+tu_fdm_num_layers(const struct tu_cmd_buffer *cmd)
+{
+   return cmd->state.pass->num_views ? cmd->state.pass->num_views : 
+      (cmd->state.fdm_per_layer ? cmd->state.framebuffer->layers : 1);
+}
+
 #endif /* TU_CMD_BUFFER_H */
--- a/src/freedreno/vulkan/tu_tile_config.cc
+++ b/src/freedreno/vulkan/tu_tile_config.cc
@ -0,0 +1,425 @@
+/*
+ * Copyright © 2026 Valve Corporation.
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ * SPDX-License-Identifier: MIT
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ */
+
+#include "tu_cmd_buffer.h"
+#include "tu_tile_config.h"
+
+static void
+tu_calc_frag_area(struct tu_cmd_buffer *cmd,
+                  struct tu_tile_config *tile,
+                  const struct tu_image_view *fdm,
+                  const VkOffset2D *fdm_offsets)
+{
+   const struct tu_tiling_config *tiling = cmd->state.tiling;
+   const uint32_t x1 = tiling->tile0.width * tile->pos.x;
+   const uint32_t y1 = tiling->tile0.height * tile->pos.y;
+   const uint32_t x2 = MIN2(x1 + tiling->tile0.width, MAX_VIEWPORT_SIZE);
+   const uint32_t y2 = MIN2(y1 + tiling->tile0.height, MAX_VIEWPORT_SIZE);
+
+   unsigned views = tu_fdm_num_layers(cmd);
+   const struct tu_framebuffer *fb = cmd->state.framebuffer;
+   struct tu_frag_area raw_areas[views];
+   if (fdm) {
+      for (unsigned i = 0; i < views; i++) {
+         VkOffset2D sample_pos = { 0, 0 };
+
+         /* Offsets less than a tile size are accomplished by sliding the
+          * tiles.  However once we shift a whole tile size then we reset the
+          * tiles back to where they were at the beginning and we need to
+          * adjust where each bin is sampling from:
+          *
+          * x offset = 0:
+          *
+          * ------------------------------------
+          * |   *   |   *   |   *   | (unused) |
+          * ------------------------------------
+          *
+          * x offset = 4:
+          *
+          * -------------------------
+          * | * |   *   |   *   | * |
+          * -------------------------
+          *
+          * x offset = 8:
+          *
+          * ------------------------------------
+          * |   *   |   *   |   *   | (unused) |
+          * ------------------------------------
+          *
+          * As the user's offset increases we slide the tiles to the right,
+          * until we reach the whole tile size and reset the tile positions.
+          * tu_bin_offset() returns an amount to shift to the left, negating
+          * the offset.
+          *
+          * If we were forced to use a shared viewport, then we must not shift
+          * over the tiles and instead must only shift when sampling because
+          * we cannot shift the tiles differently per view. This disables
+          * smooth transitions of the fragment density map and effectively
+          * negates the extension.
+          *
+          * Note that we cannot clamp x2/y2 to the framebuffer size, as we
+          * normally would do, because then tiles along the edge would
+          * incorrectly nudge the sample_pos towards the center of the
+          * framebuffer. If we shift one complete tile over towards the
+          * center and reset the tiles as above, the sample_pos would
+          * then shift back towards the edge and we could get a "pop" from
+          * suddenly changing density due to the slight shift.
+          */
+         if (fdm_offsets) {
+            VkOffset2D offset = fdm_offsets[i];
+            if (!cmd->state.rp.shared_viewport) {
+               VkOffset2D bin_offset = tu_bin_offset(fdm_offsets[i], tiling);
+               offset.x += bin_offset.x;
+               offset.y += bin_offset.y;
+            }
+            sample_pos.x = (x1 + x2) / 2 - offset.x;
+            sample_pos.y = (y1 + y2) / 2 - offset.y;
+         } else {
+            sample_pos.x = (x1 + MIN2(x2, fb->width)) / 2;
+            sample_pos.y = (y1 + MIN2(y2, fb->height)) / 2;
+         }
+
+         tu_fragment_density_map_sample(fdm,
+                                        sample_pos.x,
+                                        sample_pos.y,
+                                        fb->width, fb->height, i,
+                                        &raw_areas[i]);
+      }
+   } else {
+      for (unsigned i = 0; i < views; i++)
+         raw_areas[i].width = raw_areas[i].height = 1.0f;
+   }
+
+   for (unsigned i = 0; i < views; i++) {
+      float floor_x, floor_y;
+      float area = raw_areas[i].width * raw_areas[i].height;
+      float frac_x = modff(raw_areas[i].width, &floor_x);
+      float frac_y = modff(raw_areas[i].height, &floor_y);
+
+      /* The Vulkan spec says that a density of 0 results in an undefined
+       * fragment area. However the blob driver skips rendering tiles with 0
+       * density, and apps rely on that behavior. Replicate that here.
+       */
+      if (!isfinite(area)) {
+         tile->frag_areas[i].width = UINT32_MAX;
+         tile->frag_areas[i].height = UINT32_MAX;
+         tile->visible_views &= ~(1u << i);
+         continue;
+      }
+
+      /* The spec allows rounding up one of the axes as long as the total
+       * area is less than or equal to the original area. Take advantage of
+       * this to try rounding up the number with the largest fraction.
+       */
+      if ((frac_x > frac_y ? (floor_x + 1.f) * floor_y :
+                              floor_x * (floor_y + 1.f)) <= area) {
+         if (frac_x > frac_y)
+            floor_x += 1.f;
+         else
+            floor_y += 1.f;
+      }
+      uint32_t width = floor_x;
+      uint32_t height = floor_y;
+
+      /* Areas that aren't a power of two, especially large areas, can create
+       * in floating-point rounding errors when dividing by the area in the
+       * viewport that result in under-rendering. Round down to a power of two
+       * to make sure all operations are exact.
+       */
+      width = 1u << util_logbase2(width);
+      height = 1u << util_logbase2(height);
+
+      /* When FDM offset is enabled, the fragment area has to divide the
+       * offset to make sure that we don't have tiles with partial fragments.
+       * It would be bad to have the fragment area change as a function of the
+       * offset, because we'd get "popping" as the resolution changes with the
+       * offset, so just make sure it divides the offset granularity. This
+       * should mean it always divides the offset for any possible offset.
+       */
+      if (fdm_offsets) {
+         width = MIN2(width, TU_FDM_OFFSET_GRANULARITY);
+         height = MIN2(height, TU_FDM_OFFSET_GRANULARITY);
+      }
+
+      /* HW viewport scaling supports a maximum fragment width/height of 4.
+       */
+      if (views <= MAX_HW_SCALED_VIEWS) {
+         width = MIN2(width, 4);
+         height = MIN2(height, 4);
+      }
+
+      /* Make sure that the width/height divides the tile width/height so
+       * we don't have to do extra awkward clamping of the edges of each
+       * bin when resolving. It also has to divide the fdm offset, if any.
+       * Note that because the tile width is rounded to a multiple of 32 any
+       * power of two 32 or less will work, and if there is an offset then it
+       * must be a multiple of 4 so 2 or 4 will definitely work.
+       *
+       * TODO: Try to take advantage of the total area allowance here, too.
+       */
+      while (tiling->tile0.width % width != 0)
+         width /= 2;
+      while (tiling->tile0.height % height != 0)
+         height /= 2;
+
+      tile->frag_areas[i].width = width;
+      tile->frag_areas[i].height = height;
+   }
+
+   /* If at any point we were forced to use the same scaling for all
+    * viewports, we need to make sure that any users *not* using shared
+    * scaling, including loads/stores, also consistently share the scaling. 
+    */
+   if (cmd->state.rp.shared_viewport) {
+      VkExtent2D frag_area = { UINT32_MAX, UINT32_MAX };
+      for (unsigned i = 0; i < views; i++) {
+         frag_area.width = MIN2(frag_area.width, tile->frag_areas[i].width);
+         frag_area.height = MIN2(frag_area.height, tile->frag_areas[i].height);
+      }
+
+      for (unsigned i = 0; i < views; i++)
+         tile->frag_areas[i] = frag_area;
+   }
+}
+
+static bool
+rects_intersect(VkRect2D a, VkRect2D b)
+{
+   return a.offset.x < b.offset.x + (int32_t)b.extent.width &&
+          b.offset.x < a.offset.x + (int32_t)a.extent.width &&
+          a.offset.y < b.offset.y + (int32_t)b.extent.height &&
+          b.offset.y < a.offset.y + (int32_t)a.extent.height;
+}
+
+/* Use the render area(s) to figure out which views of the bin are visible.
+ */
+void
+tu_calc_bin_visibility(struct tu_cmd_buffer *cmd,
+                       struct tu_tile_config *tile,
+                       const VkOffset2D *offsets)
+{
+   const struct tu_tiling_config *tiling = cmd->state.tiling;
+   uint32_t views = tu_fdm_num_layers(cmd);
+   VkRect2D bin = {
+      {
+         tile->pos.x * tiling->tile0.width,
+         tile->pos.y * tiling->tile0.height
+      },
+      tiling->tile0
+   };
+
+   tile->visible_views = 0;
+   for (unsigned i = 0; i < views; i++) {
+      VkRect2D offsetted_bin = bin;
+      if (offsets && !cmd->state.rp.shared_viewport) {
+         VkOffset2D bin_offset = tu_bin_offset(offsets[i], tiling);
+         offsetted_bin.offset.x -= bin_offset.x;
+         offsetted_bin.offset.y -= bin_offset.y;
+      }
+
+      if (rects_intersect(offsetted_bin,
+                          cmd->state.per_layer_render_area ?
+                          cmd->state.render_areas[i] :
+                          cmd->state.render_areas[0])) {
+         tile->visible_views |= (1u << i);
+      }
+   }
+}
+
+static bool
+try_merge_tiles(struct tu_tile_config *dst, struct tu_tile_config *src,
+                unsigned views, bool has_abs_bin_mask, bool shared_viewport)
+{
+   uint32_t slot_mask = dst->slot_mask | src->slot_mask;
+   uint32_t visible_views = dst->visible_views | src->visible_views;
+
+   /* The fragment areas must be the same for views where both bins are
+    * visible.
+    */
+   for (unsigned i = 0; i < views; i++) {
+      if ((dst->visible_views & src->visible_views & (1u << i)) &&
+          (dst->frag_areas[i].width != src->frag_areas[i].width ||
+           dst->frag_areas[i].height != src->frag_areas[i].height))
+         return false;
+   }
+
+   /* The tiles must be vertically or horizontally adjacent and have the
+    * compatible width/height.
+    */
+   if (dst->pos.x == src->pos.x) {
+      if (dst->sysmem_extent.height != src->sysmem_extent.height)
+         return false;
+   } else if (dst->pos.y == src->pos.y) {
+      if (dst->sysmem_extent.width != src->sysmem_extent.width)
+         return false;
+   } else {
+      return false;
+   }
+
+   if (dst->gmem_extent.width != src->gmem_extent.width ||
+       dst->gmem_extent.height != src->gmem_extent.height)
+      return false;
+
+   if (!has_abs_bin_mask) {
+      /* The mask of the combined tile has to fit in 16 bits */
+      uint32_t hw_mask = slot_mask >> (ffs(slot_mask) - 1);
+      if ((hw_mask & 0xffff) != hw_mask)
+         return false;
+   }
+
+   /* Note, this assumes that dst is below or to the right of src, which is
+    * how we call this function below.
+    */
+   VkExtent2D extent = {
+      dst->sysmem_extent.width + (dst->pos.x - src->pos.x),
+      dst->sysmem_extent.height + (dst->pos.y - src->pos.y),
+   };
+
+   assert(dst->sysmem_extent.height > 0);
+
+   /* If only the first view is visible in both tiles, we can reuse the GMEM
+    * space meant for the rest of the views to multiply the height of the
+    * tile. We can't do this if we can't override the scissor for different
+    * views though.
+    */
+   unsigned height_multiplier = 1;
+   if (visible_views == 1 && views > 1 && dst->gmem_extent.height == 1 &&
+       !shared_viewport)
+      height_multiplier = views;
+   else
+      height_multiplier = dst->gmem_extent.height;
+
+   /* The combined fragment areas must not be smaller than the combined bin
+    * extent, so that the combined bin is not larger than the original
+    * unscaled bin.
+    */
+   for (unsigned i = 0; i < views; i++) {
+      if ((dst->visible_views & (1u << i)) &&
+          (dst->frag_areas[i].width < extent.width ||
+           dst->frag_areas[i].height * height_multiplier < extent.height))
+         return false;
+      if ((src->visible_views & (1u << i)) &&
+          (src->frag_areas[i].width < extent.width ||
+           src->frag_areas[i].height * height_multiplier < extent.height))
+         return false;
+   }
+
+   /* Ok, let's combine them. dst is below or to the right of src, so it takes
+    * src's position.
+    */
+   for (unsigned i = 0; i < views; i++) {
+      if (src->visible_views & ~dst->visible_views & (1u << i))
+         dst->frag_areas[i] = src->frag_areas[i];
+      if (((src->visible_views | dst->visible_views) & (1u << i)) &&
+          dst->frag_areas[i].height < extent.height)
+         dst->gmem_extent.height = height_multiplier;
+   }
+   dst->sysmem_extent = extent;
+   dst->visible_views = visible_views;
+   dst->pos = src->pos;
+   dst->slot_mask = slot_mask;
+
+   src->merged_tile = dst;
+
+   return true;
+}
+
+static void
+tu_merge_tiles(struct tu_cmd_buffer *cmd, const struct tu_vsc_config *vsc,
+               struct tu_tile_config *tiles,
+               uint32_t tx1, uint32_t ty1, uint32_t tx2, uint32_t ty2)
+{
+   bool has_abs_mask =
+      cmd->device->physical_device->info->props.has_abs_bin_mask;
+   unsigned views = tu_fdm_num_layers(cmd);
+   bool shared_viewport = cmd->state.rp.shared_viewport;
+   uint32_t width = vsc->tile_count.width;
+
+   for (uint32_t y = ty1; y < ty2; y++) {
+      for (uint32_t x = tx1; x < tx2; x++) {
+         struct tu_tile_config *tile =
+            &tiles[width * y + x];
+         if (tile->visible_views == 0)
+            continue;
+         if (x > tx1) {
+            struct tu_tile_config *prev_x_tile = &tiles[width * y + x - 1];
+            try_merge_tiles(tile, prev_x_tile, views, has_abs_mask,
+                            shared_viewport);
+         }
+         if (y > ty1) {
+            unsigned prev_y_idx = width * (y - 1) + x;
+            struct tu_tile_config *prev_y_tile = &tiles[prev_y_idx];
+
+            /* We can't merge prev_y_tile into tile if it's already been
+             * merged horizontally into its neighbor in the previous row.
+             */
+            if (!prev_y_tile->merged_tile) {
+               try_merge_tiles(tile, prev_y_tile, views, has_abs_mask,
+                               shared_viewport);
+            }
+         }
+      }
+   }
+}
+
+
+struct tu_tile_config *
+tu_calc_tile_config(struct tu_cmd_buffer *cmd, const struct tu_vsc_config *vsc,
+                    const struct tu_image_view *fdm, const VkOffset2D *fdm_offsets)
+{
+   struct tu_tile_config *tiles = (struct tu_tile_config *)
+      calloc(vsc->tile_count.width * vsc->tile_count.height,
+             sizeof(struct tu_tile_config));
+
+   for (uint32_t py = 0; py < vsc->pipe_count.height; py++) {
+      uint32_t ty1 = py * vsc->pipe0.height;
+      uint32_t ty2 = MIN2(ty1 + vsc->pipe0.height, vsc->tile_count.height);
+      for (uint32_t px = 0; px < vsc->pipe_count.width; px++) {
+         uint32_t tx1 = px * vsc->pipe0.width;
+         uint32_t tx2 = MIN2(tx1 + vsc->pipe0.width, vsc->tile_count.width);
+         uint32_t pipe_width = tx2 - tx1;
+         uint32_t pipe = py * vsc->pipe_count.width + px;
+
+         /* Initialize tiles and sample fragment density map */
+         for (uint32_t y = ty1; y < ty2; y++) {
+            for (uint32_t x = tx1; x < tx2; x++) {
+               uint32_t tx = x - tx1;
+               uint32_t ty = y - ty1;
+               struct tu_tile_config *tile = &tiles[vsc->tile_count.width * y + x];
+
+               tile->pos = { x, y };
+               tile->sysmem_extent = { 1, 1 };
+               tile->gmem_extent = { 1, 1 };
+               tile->pipe = pipe;
+               tile->slot_mask = 1u << (pipe_width * ty + tx);
+               tile->merged_tile = NULL;
+               tu_calc_bin_visibility(cmd, tile, fdm_offsets);
+               tu_calc_frag_area(cmd, tile, fdm, fdm_offsets);
+            }
+         }
+
+         /* Merge tiles */
+         /* TODO: we should also be able to merge tiles when only
+          * per_view_render_areas is used without FDM. That requires using
+          * another method to force disable draws since we don't want to force
+          * the viewport to be re-emitted, like overriding the view mask. It
+          * would also require disabling stores, and adding patchpoints for
+          * CmdClearAttachments in secondaries or making it use the view mask.
+          */
+         if (!TU_DEBUG(NO_BIN_MERGING) &&
+             cmd->device->physical_device->info->props.has_bin_mask) {
+            tu_merge_tiles(cmd, vsc, tiles, tx1, ty1, tx2, ty2);
+         }
+      }
+   }
+
+   return tiles;
+}
+
--- a/src/freedreno/vulkan/tu_tile_config.h
+++ b/src/freedreno/vulkan/tu_tile_config.h
@ -0,0 +1,46 @@
+/*
+ * Copyright © 2026 Valve Corporation.
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ * SPDX-License-Identifier: MIT
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ */
+
+#include "tu_common.h"
+
+#ifndef TU_TILE_CONFIG_H
+#define TU_TILE_CONFIG_H
+
+struct tu_tile_config {
+   VkOffset2D pos;
+   uint32_t pipe;
+   uint32_t slot_mask;
+   uint32_t visible_views;
+
+   /* The tile this tile was merged with. */
+   struct tu_tile_config *merged_tile;
+
+   /* For merged tiles, the extent in tiles when resolved to system memory.
+    */
+   VkExtent2D sysmem_extent;
+
+   /* For merged tiles, the extent in tiles in GMEM. This can only be more
+    * than 1 if there is extra free space from an unused view.
+    */
+   VkExtent2D gmem_extent;
+
+   VkExtent2D frag_areas[MAX_VIEWS];
+};
+
+struct tu_tile_config *
+tu_calc_tile_config(struct tu_cmd_buffer *cmd, const struct tu_vsc_config *vsc,
+                    const struct tu_image_view *fdm, const VkOffset2D *fdm_offsets);
+
+void
+tu_calc_bin_visibility(struct tu_cmd_buffer *cmd,
+                       struct tu_tile_config *tile,
+                       const VkOffset2D *offsets);
+
+#endif