From 72e4b256c72e5014b5e34e6fd27f16828a982522 Mon Sep 17 00:00:00 2001 From: Mohamed Ahmed Date: Thu, 19 Mar 2026 22:24:08 +0400 Subject: [PATCH] nvk: Defer tiled shadow plane memory allocation to draw time Reviewed-by: Faith Ekstrand Reviewed-by: Mel Henning Tested-by: Mary Guillemard Backport-to: 26.1 Part-of: --- src/nouveau/vulkan/nvk_cmd_draw.c | 51 ++++++++++++++++++++++++++++--- src/nouveau/vulkan/nvk_image.c | 19 +++--------- src/nouveau/vulkan/nvk_image.h | 7 +++++ 3 files changed, 58 insertions(+), 19 deletions(-) diff --git a/src/nouveau/vulkan/nvk_cmd_draw.c b/src/nouveau/vulkan/nvk_cmd_draw.c index 8c1bc6162a5..ea72ba1ec8d 100644 --- a/src/nouveau/vulkan/nvk_cmd_draw.c +++ b/src/nouveau/vulkan/nvk_cmd_draw.c @@ -1035,6 +1035,44 @@ nvk_rendering_linear(const struct nvk_rendering_state *render) return true; } +static VkResult +ensure_linear_tiled_shadow_mem_locked(struct nvk_device *dev, + struct nvk_image *image, + uint8_t plane_idx) +{ + if (image->linear_tiled_shadow_mem[plane_idx] != NULL) { + assert(image->linear_tiled_shadows[plane_idx].addr != 0); + return VK_SUCCESS; + } + + struct nvk_image_plane *plane = &image->linear_tiled_shadows[plane_idx]; + assert(plane->nil.size_B > 0); + VkResult result = + nvkmd_dev_alloc_tiled_mem(dev->nvkmd, &dev->vk.base, + plane->nil.size_B, plane->nil.align_B, + plane->nil.pte_kind, plane->nil.tile_mode, + NVKMD_MEM_LOCAL, + &image->linear_tiled_shadow_mem[plane_idx]); + if (result != VK_SUCCESS) + return result; + + plane->addr = image->linear_tiled_shadow_mem[plane_idx]->va->addr; + + return VK_SUCCESS; +} + +static VkResult +nvk_image_ensure_linear_tiled_shadow_mem(struct nvk_device *dev, + struct nvk_image *image, + uint8_t plane_idx) +{ + simple_mtx_lock(&image->tiled_shadow_mutex); + VkResult result = ensure_linear_tiled_shadow_mem_locked(dev, image, + plane_idx); + simple_mtx_unlock(&image->tiled_shadow_mutex); + return result; +} + static void get_depth_stencil_plane_params(struct nvk_image_view *iview, uint32_t plane, @@ -1104,7 +1142,7 @@ nvk_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRenderingInfo) { VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); - const struct nvk_device *dev = nvk_cmd_buffer_device(cmd); + struct nvk_device *dev = nvk_cmd_buffer_device(cmd); const struct nvk_physical_device *pdev = nvk_device_physical(dev); struct nvk_rendering_state *render = &cmd->state.gfx.render; @@ -1182,18 +1220,23 @@ nvk_CmdBeginRendering(VkCommandBuffer commandBuffer, for (uint32_t i = 0; i < NVK_MAX_RTS; i++) { if (render->color_att[i].iview) { const struct nvk_image_view *iview = render->color_att[i].iview; - const struct nvk_image *image = (struct nvk_image *)iview->vk.image; + struct nvk_image *image = (struct nvk_image *)iview->vk.image; /* Rendering to multi-planar images is valid for a specific single * plane only, so assert that what we have is a single-plane, obtain * its index, and begin rendering */ assert(iview->plane_count == 1); const uint8_t ip = iview->planes[0].image_plane; - const struct nvk_image_plane *plane = &image->planes[ip]; + struct nvk_image_plane *plane = &image->planes[ip]; if (!render->linear && - plane->nil.levels[0].tiling.gob_type == NIL_GOB_TYPE_LINEAR) + plane->nil.levels[0].tiling.gob_type == NIL_GOB_TYPE_LINEAR) { + VkResult result; + result = nvk_image_ensure_linear_tiled_shadow_mem(dev, image, ip); + if (result != VK_SUCCESS) + vk_command_buffer_set_error(&cmd->vk, result); plane = &image->linear_tiled_shadows[ip]; + } const struct nil_image *nil_image = &plane->nil; const struct nil_image_level *level = diff --git a/src/nouveau/vulkan/nvk_image.c b/src/nouveau/vulkan/nvk_image.c index fe44fe9c93c..15ab7c23486 100644 --- a/src/nouveau/vulkan/nvk_image.c +++ b/src/nouveau/vulkan/nvk_image.c @@ -985,6 +985,8 @@ nvk_image_init(struct nvk_device *dev, } } + simple_mtx_init(&image->tiled_shadow_mutex, mtx_plain); + /* The video decode engine needs the block size to be the same across chroma * and luma planes, so in order to work around this limitation we gather all * the info for NIL early, which would give it enough information to get and @@ -1223,6 +1225,8 @@ nvk_image_finish(struct nvk_device *dev, struct nvk_image *image, nvkmd_mem_unref(image->linear_tiled_shadow_mem[plane]); } + simple_mtx_destroy(&image->tiled_shadow_mutex); + vk_image_finish(&image->vk); } @@ -1269,21 +1273,6 @@ nvk_CreateImage(VkDevice _device, } } - for (uint8_t plane = 0; plane < image->plane_count; plane++) { - if (image->linear_tiled_shadows[plane].nil.size_B > 0) { - struct nvk_image_plane *shadow = &image->linear_tiled_shadows[plane]; - result = nvkmd_dev_alloc_tiled_mem(dev->nvkmd, &dev->vk.base, - shadow->nil.size_B, shadow->nil.align_B, - shadow->nil.pte_kind, shadow->nil.tile_mode, - NVKMD_MEM_LOCAL, - &image->linear_tiled_shadow_mem[plane]); - if (result != VK_SUCCESS) - goto fail; - - shadow->addr = image->linear_tiled_shadow_mem[plane]->va->addr; - } - } - /* This section is removed by the optimizer for non-ANDROID builds */ if (vk_image_is_android_native_buffer(&image->vk)) { result = vk_android_import_anb(&dev->vk, pCreateInfo, pAllocator, diff --git a/src/nouveau/vulkan/nvk_image.h b/src/nouveau/vulkan/nvk_image.h index 6162c400431..429acbae0a7 100644 --- a/src/nouveau/vulkan/nvk_image.h +++ b/src/nouveau/vulkan/nvk_image.h @@ -125,7 +125,14 @@ struct nvk_image { * under certain conditions, so to support DRM_FORMAT_MOD_LINEAR * rendering in the general case, we need to keep a tiled copy, which would * be used to fake support if the conditions aren't satisfied. + * + * In order to avoid needlessly paying the memory cost of the tiled shadows + * even if they aren't needed (imports for texturing or rendering with the + * conditions satisfied), we hold a mutex at image create time to guard the + * memory object and defer the memory allocation till draw time, where we + * check if it's needed per-plane and selectively allocate memory for it. */ + simple_mtx_t tiled_shadow_mutex; struct nvk_image_plane linear_tiled_shadows[NVK_MAX_IMAGE_PLANES]; struct nvkmd_mem *linear_tiled_shadow_mem[NVK_MAX_IMAGE_PLANES];