From 72e4b256c72e5014b5e34e6fd27f16828a982522 Mon Sep 17 00:00:00 2001
From: Mohamed Ahmed <mohamedahmedegypt2001@gmail.com>
Date: Thu, 19 Mar 2026 22:24:08 +0400
Subject: [PATCH] nvk: Defer tiled shadow plane memory allocation to draw time

Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Reviewed-by: Mel Henning <mhenning@darkrefraction.com>
Tested-by: Mary Guillemard <mary@mary.zone>

Backport-to: 26.1
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40473>
---
 src/nouveau/vulkan/nvk_cmd_draw.c | 51 ++++++++++++++++++++++++++++---
 src/nouveau/vulkan/nvk_image.c    | 19 +++---------
 src/nouveau/vulkan/nvk_image.h    |  7 +++++
 3 files changed, 58 insertions(+), 19 deletions(-)

diff --git a/src/nouveau/vulkan/nvk_cmd_draw.c b/src/nouveau/vulkan/nvk_cmd_draw.c
index 8c1bc6162a5..ea72ba1ec8d 100644
--- a/src/nouveau/vulkan/nvk_cmd_draw.c
+++ b/src/nouveau/vulkan/nvk_cmd_draw.c
@@ -1035,6 +1035,44 @@ nvk_rendering_linear(const struct nvk_rendering_state *render)
    return true;
 }
 
+static VkResult
+ensure_linear_tiled_shadow_mem_locked(struct nvk_device *dev,
+                                      struct nvk_image *image,
+                                      uint8_t plane_idx)
+{
+   if (image->linear_tiled_shadow_mem[plane_idx] != NULL) {
+      assert(image->linear_tiled_shadows[plane_idx].addr != 0);
+      return VK_SUCCESS;
+   }
+
+   struct nvk_image_plane *plane = &image->linear_tiled_shadows[plane_idx];
+   assert(plane->nil.size_B > 0);
+   VkResult result =
+      nvkmd_dev_alloc_tiled_mem(dev->nvkmd, &dev->vk.base,
+                                plane->nil.size_B, plane->nil.align_B,
+                                plane->nil.pte_kind, plane->nil.tile_mode,
+                                NVKMD_MEM_LOCAL,
+                                &image->linear_tiled_shadow_mem[plane_idx]);
+   if (result != VK_SUCCESS)
+      return result;
+
+   plane->addr = image->linear_tiled_shadow_mem[plane_idx]->va->addr;
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+nvk_image_ensure_linear_tiled_shadow_mem(struct nvk_device *dev,
+                                         struct nvk_image *image,
+                                         uint8_t plane_idx)
+{
+   simple_mtx_lock(&image->tiled_shadow_mutex);
+   VkResult result = ensure_linear_tiled_shadow_mem_locked(dev, image,
+                                                           plane_idx);
+   simple_mtx_unlock(&image->tiled_shadow_mutex);
+   return result;
+}
+
 static void
 get_depth_stencil_plane_params(struct nvk_image_view *iview,
                                uint32_t plane,
@@ -1104,7 +1142,7 @@ nvk_CmdBeginRendering(VkCommandBuffer commandBuffer,
                       const VkRenderingInfo *pRenderingInfo)
 {
    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
-   const struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
+   struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
    const struct nvk_physical_device *pdev = nvk_device_physical(dev);
    struct nvk_rendering_state *render = &cmd->state.gfx.render;
 
@@ -1182,18 +1220,23 @@ nvk_CmdBeginRendering(VkCommandBuffer commandBuffer,
    for (uint32_t i = 0; i < NVK_MAX_RTS; i++) {
       if (render->color_att[i].iview) {
          const struct nvk_image_view *iview = render->color_att[i].iview;
-         const struct nvk_image *image = (struct nvk_image *)iview->vk.image;
+         struct nvk_image *image = (struct nvk_image *)iview->vk.image;
          /* Rendering to multi-planar images is valid for a specific single
           * plane only, so assert that what we have is a single-plane, obtain
           * its index, and begin rendering
           */
          assert(iview->plane_count == 1);
          const uint8_t ip = iview->planes[0].image_plane;
-         const struct nvk_image_plane *plane = &image->planes[ip];
+         struct nvk_image_plane *plane = &image->planes[ip];
 
          if (!render->linear &&
-             plane->nil.levels[0].tiling.gob_type == NIL_GOB_TYPE_LINEAR)
+             plane->nil.levels[0].tiling.gob_type == NIL_GOB_TYPE_LINEAR) {
+            VkResult result;
+            result = nvk_image_ensure_linear_tiled_shadow_mem(dev, image, ip);
+            if (result != VK_SUCCESS)
+               vk_command_buffer_set_error(&cmd->vk, result);
             plane = &image->linear_tiled_shadows[ip];
+         }
 
          const struct nil_image *nil_image = &plane->nil;
          const struct nil_image_level *level =
diff --git a/src/nouveau/vulkan/nvk_image.c b/src/nouveau/vulkan/nvk_image.c
index fe44fe9c93c..15ab7c23486 100644
--- a/src/nouveau/vulkan/nvk_image.c
+++ b/src/nouveau/vulkan/nvk_image.c
@@ -985,6 +985,8 @@ nvk_image_init(struct nvk_device *dev,
       }
    }
 
+   simple_mtx_init(&image->tiled_shadow_mutex, mtx_plain);
+
    /* The video decode engine needs the block size to be the same across chroma
     * and luma planes, so in order to work around this limitation we gather all
     * the info for NIL early, which would give it enough information to get and
@@ -1223,6 +1225,8 @@ nvk_image_finish(struct nvk_device *dev, struct nvk_image *image,
          nvkmd_mem_unref(image->linear_tiled_shadow_mem[plane]);
    }
 
+   simple_mtx_destroy(&image->tiled_shadow_mutex);
+
    vk_image_finish(&image->vk);
 }
 
@@ -1269,21 +1273,6 @@ nvk_CreateImage(VkDevice _device,
       }
    }
 
-   for (uint8_t plane = 0; plane < image->plane_count; plane++) {
-      if (image->linear_tiled_shadows[plane].nil.size_B > 0) {
-         struct nvk_image_plane *shadow = &image->linear_tiled_shadows[plane];
-         result = nvkmd_dev_alloc_tiled_mem(dev->nvkmd, &dev->vk.base,
-                                            shadow->nil.size_B, shadow->nil.align_B,
-                                            shadow->nil.pte_kind, shadow->nil.tile_mode,
-                                            NVKMD_MEM_LOCAL,
-                                            &image->linear_tiled_shadow_mem[plane]);
-         if (result != VK_SUCCESS)
-            goto fail;
-
-         shadow->addr = image->linear_tiled_shadow_mem[plane]->va->addr;
-      }
-   }
-
    /* This section is removed by the optimizer for non-ANDROID builds */
    if (vk_image_is_android_native_buffer(&image->vk)) {
       result = vk_android_import_anb(&dev->vk, pCreateInfo, pAllocator,
diff --git a/src/nouveau/vulkan/nvk_image.h b/src/nouveau/vulkan/nvk_image.h
index 6162c400431..429acbae0a7 100644
--- a/src/nouveau/vulkan/nvk_image.h
+++ b/src/nouveau/vulkan/nvk_image.h
@@ -125,7 +125,14 @@ struct nvk_image {
     * under certain conditions, so to support DRM_FORMAT_MOD_LINEAR
     * rendering in the general case, we need to keep a tiled copy, which would
     * be used to fake support if the conditions aren't satisfied.
+    * 
+    * In order to avoid needlessly paying the memory cost of the tiled shadows
+    * even if they aren't needed (imports for texturing or rendering with the
+    * conditions satisfied), we hold a mutex at image create time to guard the
+    * memory object and defer the memory allocation till draw time, where we
+    * check if it's needed per-plane and selectively allocate memory for it.
     */
+   simple_mtx_t tiled_shadow_mutex;
    struct nvk_image_plane linear_tiled_shadows[NVK_MAX_IMAGE_PLANES];
    struct nvkmd_mem *linear_tiled_shadow_mem[NVK_MAX_IMAGE_PLANES];