nvk: Defer tiled shadow plane memory allocation to draw time

Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Reviewed-by: Mel Henning <mhenning@darkrefraction.com>
Tested-by: Mary Guillemard <mary@mary.zone>

Backport-to: 26.1
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40473>
This commit is contained in:
Mohamed Ahmed 2026-03-19 22:24:08 +04:00 committed by Marge Bot
parent 1564995b0f
commit 72e4b256c7
3 changed files with 58 additions and 19 deletions

View file

@ -1035,6 +1035,44 @@ nvk_rendering_linear(const struct nvk_rendering_state *render)
return true;
}
static VkResult
ensure_linear_tiled_shadow_mem_locked(struct nvk_device *dev,
struct nvk_image *image,
uint8_t plane_idx)
{
if (image->linear_tiled_shadow_mem[plane_idx] != NULL) {
assert(image->linear_tiled_shadows[plane_idx].addr != 0);
return VK_SUCCESS;
}
struct nvk_image_plane *plane = &image->linear_tiled_shadows[plane_idx];
assert(plane->nil.size_B > 0);
VkResult result =
nvkmd_dev_alloc_tiled_mem(dev->nvkmd, &dev->vk.base,
plane->nil.size_B, plane->nil.align_B,
plane->nil.pte_kind, plane->nil.tile_mode,
NVKMD_MEM_LOCAL,
&image->linear_tiled_shadow_mem[plane_idx]);
if (result != VK_SUCCESS)
return result;
plane->addr = image->linear_tiled_shadow_mem[plane_idx]->va->addr;
return VK_SUCCESS;
}
static VkResult
nvk_image_ensure_linear_tiled_shadow_mem(struct nvk_device *dev,
struct nvk_image *image,
uint8_t plane_idx)
{
simple_mtx_lock(&image->tiled_shadow_mutex);
VkResult result = ensure_linear_tiled_shadow_mem_locked(dev, image,
plane_idx);
simple_mtx_unlock(&image->tiled_shadow_mutex);
return result;
}
static void
get_depth_stencil_plane_params(struct nvk_image_view *iview,
uint32_t plane,
@ -1104,7 +1142,7 @@ nvk_CmdBeginRendering(VkCommandBuffer commandBuffer,
const VkRenderingInfo *pRenderingInfo)
{
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
const struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
const struct nvk_physical_device *pdev = nvk_device_physical(dev);
struct nvk_rendering_state *render = &cmd->state.gfx.render;
@ -1182,18 +1220,23 @@ nvk_CmdBeginRendering(VkCommandBuffer commandBuffer,
for (uint32_t i = 0; i < NVK_MAX_RTS; i++) {
if (render->color_att[i].iview) {
const struct nvk_image_view *iview = render->color_att[i].iview;
const struct nvk_image *image = (struct nvk_image *)iview->vk.image;
struct nvk_image *image = (struct nvk_image *)iview->vk.image;
/* Rendering to multi-planar images is valid for a specific single
* plane only, so assert that what we have is a single-plane, obtain
* its index, and begin rendering
*/
assert(iview->plane_count == 1);
const uint8_t ip = iview->planes[0].image_plane;
const struct nvk_image_plane *plane = &image->planes[ip];
struct nvk_image_plane *plane = &image->planes[ip];
if (!render->linear &&
plane->nil.levels[0].tiling.gob_type == NIL_GOB_TYPE_LINEAR)
plane->nil.levels[0].tiling.gob_type == NIL_GOB_TYPE_LINEAR) {
VkResult result;
result = nvk_image_ensure_linear_tiled_shadow_mem(dev, image, ip);
if (result != VK_SUCCESS)
vk_command_buffer_set_error(&cmd->vk, result);
plane = &image->linear_tiled_shadows[ip];
}
const struct nil_image *nil_image = &plane->nil;
const struct nil_image_level *level =

View file

@ -985,6 +985,8 @@ nvk_image_init(struct nvk_device *dev,
}
}
simple_mtx_init(&image->tiled_shadow_mutex, mtx_plain);
/* The video decode engine needs the block size to be the same across chroma
* and luma planes, so in order to work around this limitation we gather all
* the info for NIL early, which would give it enough information to get and
@ -1223,6 +1225,8 @@ nvk_image_finish(struct nvk_device *dev, struct nvk_image *image,
nvkmd_mem_unref(image->linear_tiled_shadow_mem[plane]);
}
simple_mtx_destroy(&image->tiled_shadow_mutex);
vk_image_finish(&image->vk);
}
@ -1269,21 +1273,6 @@ nvk_CreateImage(VkDevice _device,
}
}
for (uint8_t plane = 0; plane < image->plane_count; plane++) {
if (image->linear_tiled_shadows[plane].nil.size_B > 0) {
struct nvk_image_plane *shadow = &image->linear_tiled_shadows[plane];
result = nvkmd_dev_alloc_tiled_mem(dev->nvkmd, &dev->vk.base,
shadow->nil.size_B, shadow->nil.align_B,
shadow->nil.pte_kind, shadow->nil.tile_mode,
NVKMD_MEM_LOCAL,
&image->linear_tiled_shadow_mem[plane]);
if (result != VK_SUCCESS)
goto fail;
shadow->addr = image->linear_tiled_shadow_mem[plane]->va->addr;
}
}
/* This section is removed by the optimizer for non-ANDROID builds */
if (vk_image_is_android_native_buffer(&image->vk)) {
result = vk_android_import_anb(&dev->vk, pCreateInfo, pAllocator,

View file

@ -125,7 +125,14 @@ struct nvk_image {
* under certain conditions, so to support DRM_FORMAT_MOD_LINEAR
* rendering in the general case, we need to keep a tiled copy, which would
* be used to fake support if the conditions aren't satisfied.
*
* In order to avoid needlessly paying the memory cost of the tiled shadows
* even if they aren't needed (imports for texturing or rendering with the
* conditions satisfied), we hold a mutex at image create time to guard the
* memory object and defer the memory allocation till draw time, where we
* check if it's needed per-plane and selectively allocate memory for it.
*/
simple_mtx_t tiled_shadow_mutex;
struct nvk_image_plane linear_tiled_shadows[NVK_MAX_IMAGE_PLANES];
struct nvkmd_mem *linear_tiled_shadow_mem[NVK_MAX_IMAGE_PLANES];