nvk: Defer tiled shadow plane memory allocation to draw time

Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Reviewed-by: Mel Henning <mhenning@darkrefraction.com> Tested-by: Mary Guillemard <mary@mary.zone> Backport-to: 26.1 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40473>
2026-05-06 13:48:06 +02:00 · 2026-03-19 22:24:08 +04:00 · 2026-03-19 22:24:08 +04:00 · 72e4b256c7
commit 72e4b256c7
parent 1564995b0f
3 changed files with 58 additions and 19 deletions
--- a/src/nouveau/vulkan/nvk_cmd_draw.c
+++ b/src/nouveau/vulkan/nvk_cmd_draw.c
@ -1035,6 +1035,44 @@ nvk_rendering_linear(const struct nvk_rendering_state *render)
   return true;
 }

+static VkResult
+ensure_linear_tiled_shadow_mem_locked(struct nvk_device *dev,
+                                      struct nvk_image *image,
+                                      uint8_t plane_idx)
+{
+   if (image->linear_tiled_shadow_mem[plane_idx] != NULL) {
+      assert(image->linear_tiled_shadows[plane_idx].addr != 0);
+      return VK_SUCCESS;
+   }
+
+   struct nvk_image_plane *plane = &image->linear_tiled_shadows[plane_idx];
+   assert(plane->nil.size_B > 0);
+   VkResult result =
+      nvkmd_dev_alloc_tiled_mem(dev->nvkmd, &dev->vk.base,
+                                plane->nil.size_B, plane->nil.align_B,
+                                plane->nil.pte_kind, plane->nil.tile_mode,
+                                NVKMD_MEM_LOCAL,
+                                &image->linear_tiled_shadow_mem[plane_idx]);
+   if (result != VK_SUCCESS)
+      return result;
+
+   plane->addr = image->linear_tiled_shadow_mem[plane_idx]->va->addr;
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+nvk_image_ensure_linear_tiled_shadow_mem(struct nvk_device *dev,
+                                         struct nvk_image *image,
+                                         uint8_t plane_idx)
+{
+   simple_mtx_lock(&image->tiled_shadow_mutex);
+   VkResult result = ensure_linear_tiled_shadow_mem_locked(dev, image,
+                                                           plane_idx);
+   simple_mtx_unlock(&image->tiled_shadow_mutex);
+   return result;
+}
+
 static void
 get_depth_stencil_plane_params(struct nvk_image_view *iview,
                               uint32_t plane,
@ -1104,7 +1142,7 @@ nvk_CmdBeginRendering(VkCommandBuffer commandBuffer,
                      const VkRenderingInfo *pRenderingInfo)
 {
   VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
-   const struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
+   struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
   const struct nvk_physical_device *pdev = nvk_device_physical(dev);
   struct nvk_rendering_state *render = &cmd->state.gfx.render;

@ -1182,18 +1220,23 @@ nvk_CmdBeginRendering(VkCommandBuffer commandBuffer,
   for (uint32_t i = 0; i < NVK_MAX_RTS; i++) {
      if (render->color_att[i].iview) {
         const struct nvk_image_view *iview = render->color_att[i].iview;
-         const struct nvk_image *image = (struct nvk_image *)iview->vk.image;
+         struct nvk_image *image = (struct nvk_image *)iview->vk.image;
         /* Rendering to multi-planar images is valid for a specific single
          * plane only, so assert that what we have is a single-plane, obtain
          * its index, and begin rendering
          */
         assert(iview->plane_count == 1);
         const uint8_t ip = iview->planes[0].image_plane;
-         const struct nvk_image_plane *plane = &image->planes[ip];
+         struct nvk_image_plane *plane = &image->planes[ip];

         if (!render->linear &&
-             plane->nil.levels[0].tiling.gob_type == NIL_GOB_TYPE_LINEAR)
+             plane->nil.levels[0].tiling.gob_type == NIL_GOB_TYPE_LINEAR) {
+            VkResult result;
+            result = nvk_image_ensure_linear_tiled_shadow_mem(dev, image, ip);
+            if (result != VK_SUCCESS)
+               vk_command_buffer_set_error(&cmd->vk, result);
            plane = &image->linear_tiled_shadows[ip];
+         }

         const struct nil_image *nil_image = &plane->nil;
         const struct nil_image_level *level =
--- a/src/nouveau/vulkan/nvk_image.c
+++ b/src/nouveau/vulkan/nvk_image.c
@ -985,6 +985,8 @@ nvk_image_init(struct nvk_device *dev,
      }
   }

+   simple_mtx_init(&image->tiled_shadow_mutex, mtx_plain);
+
   /* The video decode engine needs the block size to be the same across chroma
    * and luma planes, so in order to work around this limitation we gather all
    * the info for NIL early, which would give it enough information to get and
@ -1223,6 +1225,8 @@ nvk_image_finish(struct nvk_device *dev, struct nvk_image *image,
         nvkmd_mem_unref(image->linear_tiled_shadow_mem[plane]);
   }

+   simple_mtx_destroy(&image->tiled_shadow_mutex);
+
   vk_image_finish(&image->vk);
 }

@ -1269,21 +1273,6 @@ nvk_CreateImage(VkDevice _device,
      }
   }

-   for (uint8_t plane = 0; plane < image->plane_count; plane++) {
-      if (image->linear_tiled_shadows[plane].nil.size_B > 0) {
-         struct nvk_image_plane *shadow = &image->linear_tiled_shadows[plane];
-         result = nvkmd_dev_alloc_tiled_mem(dev->nvkmd, &dev->vk.base,
-                                            shadow->nil.size_B, shadow->nil.align_B,
-                                            shadow->nil.pte_kind, shadow->nil.tile_mode,
-                                            NVKMD_MEM_LOCAL,
-                                            &image->linear_tiled_shadow_mem[plane]);
-         if (result != VK_SUCCESS)
-            goto fail;
-
-         shadow->addr = image->linear_tiled_shadow_mem[plane]->va->addr;
-      }
-   }
-
   /* This section is removed by the optimizer for non-ANDROID builds */
   if (vk_image_is_android_native_buffer(&image->vk)) {
      result = vk_android_import_anb(&dev->vk, pCreateInfo, pAllocator,
--- a/src/nouveau/vulkan/nvk_image.h
+++ b/src/nouveau/vulkan/nvk_image.h
@ -125,7 +125,14 @@ struct nvk_image {
    * under certain conditions, so to support DRM_FORMAT_MOD_LINEAR
    * rendering in the general case, we need to keep a tiled copy, which would
    * be used to fake support if the conditions aren't satisfied.
+    * 
+    * In order to avoid needlessly paying the memory cost of the tiled shadows
+    * even if they aren't needed (imports for texturing or rendering with the
+    * conditions satisfied), we hold a mutex at image create time to guard the
+    * memory object and defer the memory allocation till draw time, where we
+    * check if it's needed per-plane and selectively allocate memory for it.
    */
+   simple_mtx_t tiled_shadow_mutex;
   struct nvk_image_plane linear_tiled_shadows[NVK_MAX_IMAGE_PLANES];
   struct nvkmd_mem *linear_tiled_shadow_mem[NVK_MAX_IMAGE_PLANES];