nvk: Enable compression

Enables compression for select images. Additionally, we get large (64K), and
huge (2M) pages as a bonus as the hardware can only do compression on these page
sizes. However, due to nouveau limitations, this means that we are limited to
enabling it on things pinned to VRAM. Fortunately, this works out for us as we
can enable it for color, Z/S, and storage images, which are the main types
to benefit from compression as they're write heavy.

Unfortunately, this means that we need to handle the memory allocation in a
delicate way, as the Vulkan API is a bit restrictive in this regard, so we have
to use dedicated allocations for compression/larger pages.

Reviewed-by: Mel Henning <mhenning@darkrefraction.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36450>
This commit is contained in:
Mohamed Ahmed 2025-07-22 21:00:20 +03:00 committed by Mel Henning
parent d656960596
commit cabfdb4404
4 changed files with 127 additions and 16 deletions

View file

@ -41,10 +41,14 @@ const VkExternalMemoryProperties nvk_dma_buf_mem_props = {
static enum nvkmd_mem_flags static enum nvkmd_mem_flags
nvk_memory_type_flags(const VkMemoryType *type, nvk_memory_type_flags(const VkMemoryType *type,
VkExternalMemoryHandleTypeFlagBits handle_types) VkExternalMemoryHandleTypeFlagBits handle_types,
bool pinned_to_vram)
{ {
enum nvkmd_mem_flags flags = 0; enum nvkmd_mem_flags flags = 0;
if (type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) if (type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
if (pinned_to_vram)
flags = NVKMD_MEM_VRAM;
else
flags = NVKMD_MEM_LOCAL; flags = NVKMD_MEM_LOCAL;
else else
flags = NVKMD_MEM_GART; flags = NVKMD_MEM_GART;
@ -95,7 +99,7 @@ nvk_GetMemoryFdPropertiesKHR(VkDevice device,
for (unsigned t = 0; t < ARRAY_SIZE(pdev->mem_types); t++) { for (unsigned t = 0; t < ARRAY_SIZE(pdev->mem_types); t++) {
const VkMemoryType *type = &pdev->mem_types[t]; const VkMemoryType *type = &pdev->mem_types[t];
const enum nvkmd_mem_flags type_flags = const enum nvkmd_mem_flags type_flags =
nvk_memory_type_flags(type, handleType); nvk_memory_type_flags(type, handleType, false);
/* Flags required to be set on mem to be imported as type /* Flags required to be set on mem to be imported as type
* *
@ -153,8 +157,12 @@ nvk_AllocateMemory(VkDevice device,
if (fd_info != NULL) if (fd_info != NULL)
handle_types |= fd_info->handleType; handle_types |= fd_info->handleType;
const enum nvkmd_mem_flags flags = nvk_memory_type_flags(type, handle_types); bool pinned_to_vram = false;
/* Align to os page size (typically 4K) as a start as this works for
* everything, and then depending on placement and size, we either keep
* it as is or increase it to 64K or 2M.
*/
uint32_t alignment = pdev->nvkmd->bind_align_B; uint32_t alignment = pdev->nvkmd->bind_align_B;
uint8_t pte_kind = 0, tile_mode = 0; uint8_t pte_kind = 0, tile_mode = 0;
@ -171,9 +179,25 @@ nvk_AllocateMemory(VkDevice device,
alignment = MAX2(alignment, image->planes[0].nil.align_B); alignment = MAX2(alignment, image->planes[0].nil.align_B);
pte_kind = image->planes[0].nil.pte_kind; pte_kind = image->planes[0].nil.pte_kind;
tile_mode = image->planes[0].nil.tile_mode; tile_mode = image->planes[0].nil.tile_mode;
} else if (image->can_compress) {
/* If it's a dedicated alloc and it's not modifiers, then it's marked
* for compression and larger pages, so we set the pinned bit and up
* the alignment.
*/
pinned_to_vram = true;
pte_kind = image->planes[0].nil.compressed_pte_kind;
tile_mode = image->planes[0].nil.tile_mode;
/* Align to 2MiB if size is >= 2MiB, otherwise align to 64KiB. */
if (pAllocateInfo->allocationSize >= (1ULL << 21))
alignment = (1ULL << 21);
else
alignment = (1ULL << 16);
} }
} }
const enum nvkmd_mem_flags flags =
nvk_memory_type_flags(type, handle_types, pinned_to_vram);
const uint64_t aligned_size = const uint64_t aligned_size =
align64(pAllocateInfo->allocationSize, alignment); align64(pAllocateInfo->allocationSize, alignment);

View file

@ -769,6 +769,63 @@ nvk_GetPhysicalDeviceSparseImageFormatProperties2(
} }
} }
/* To use compression and larger page sizes, we need to signal to the kernel
* that the memory requested is going to be VRAM resident. However, this
* comes with an issue where said memory can't be evicted to host RAM under
* pressure, so we work around this by going with a dedicated allocation for
* color, Z/S, and storage image targets which are the main types that would
* benefit from compression as they're heavy on writes. Additionally, they
* also aren't the majority of memory used, so they can be safely pinned in
* VRAM without worrying about eviction under high pressure.
*
* There are some additional restrictions we need to keep in mind, however:
* 1. We can only enable this for Turing onwards because prior architectures
* relied on firmware to manage the compression tags, and it's impossible to
* do this on nouveau. Additionally, since compression needs kernel changes,
* we can only enable it if the detected kernel supports it.
*
* 2. Given our approach depends on dedicated allocations, we can't enable
* compression for sparse images as dedicated allocations are not compatible
* with sparse.
*
* 3. In similar vein, we currently don't do multiplanar dedicated allocations
* so we can't do compression for multi-plane YCbCr images.
*
* 4. Host copies are a complete no-go for compression as the host doesn't know
* about the modified data layout nor the compression tags.
*
* 5. The API for VK_EXT_image_drm_format_modifier requires that we report the
* supported modifiers in GetPhysicalDeviceFormatProperties2(). However,
* since we can only know whether an image is compressed or not at bind time
* we can't actually expose any of the compressed modifiers in case the app
* chooses a compressed modifier for a non-compressed image. So for now, we
* have to disable compression for TILING_DRM_FORMAT_MODIFIER_EXT images.
*
* This helper enforces these restrictions and also makes sure to enable
* compression for storage, color, and Z/S targets only so as to avoid pinning
* too many things to VRAM.
*/
static bool
nvk_image_can_compress(const struct nvkmd_pdev *nvkmd_pdev,
const struct nvk_image *image)
{
if (nvkmd_pdev->kmd_info.has_compression) {
if (image->plane_count > 1 ||
image->vk.usage & (VK_IMAGE_USAGE_HOST_TRANSFER_BIT) ||
image->vk.create_flags & (VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT))
return false;
else if (image->vk.usage & (VK_IMAGE_USAGE_STORAGE_BIT |
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
image->vk.tiling == VK_IMAGE_TILING_OPTIMAL)
return true;
else
return false;
} else
return false;
}
static VkResult static VkResult
nvk_image_init(struct nvk_device *dev, nvk_image_init(struct nvk_device *dev,
struct nvk_image *image, struct nvk_image *image,
@ -817,6 +874,13 @@ nvk_image_init(struct nvk_device *dev,
VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR)) VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR))
usage |= NIL_IMAGE_USAGE_VIDEO_BIT; usage |= NIL_IMAGE_USAGE_VIDEO_BIT;
/* We set compression on VkImage creation in order to be able to signal to
* NIL that the image will be compressed which would let NIL choose the
* appropriate PTE kinds, and also to mark the VkImage as compressed so that
* in GetImageMemoryRequirements() we are able to detect it and specify that
* we prefer a dedicated allocation for it.
*/
image->can_compress = nvk_image_can_compress(dev->nvkmd->pdev, image);
if (!image->can_compress) if (!image->can_compress)
usage |= NIL_IMAGE_USAGE_UNCOMPRESSED_BIT; usage |= NIL_IMAGE_USAGE_UNCOMPRESSED_BIT;
@ -1227,10 +1291,23 @@ nvk_get_image_memory_requirements(struct nvk_device *dev,
switch (ext->sType) { switch (ext->sType) {
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
VkMemoryDedicatedRequirements *dedicated = (void *)ext; VkMemoryDedicatedRequirements *dedicated = (void *)ext;
dedicated->prefersDedicatedAllocation = if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; dedicated->prefersDedicatedAllocation = true;
dedicated->requiresDedicatedAllocation = dedicated->requiresDedicatedAllocation = true;
image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; } else if (image->can_compress) {
/* We need dedicated allocations as compressed images have to be
* pinned to VRAM due to nouveau, and we can't have a separate
* memory type that's pinned and non evictable due to the Vulkan API
* disallowing equivalent image properties returning different
* memory types. We aren't allowed to require dedicated allocations
* but we can signal that we prefer them.
*/
dedicated->prefersDedicatedAllocation = true;
dedicated->requiresDedicatedAllocation = false;
} else {
dedicated->prefersDedicatedAllocation = false;
dedicated->requiresDedicatedAllocation = false;
}
break; break;
} }
default: default:
@ -1471,6 +1548,10 @@ nvk_image_plane_bind(struct nvk_device *dev,
*offset_B = align64(*offset_B, plane_align_B); *offset_B = align64(*offset_B, plane_align_B);
if (plane->nil.pte_kind != 0) { if (plane->nil.pte_kind != 0) {
if (mem->dedicated_image == image && image->can_compress) {
image->is_compressed = true;
plane->addr = mem->mem->va->addr + *offset_B;
} else {
VkResult result = nvk_image_plane_alloc_va(dev, image, plane); VkResult result = nvk_image_plane_alloc_va(dev, image, plane);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
return result; return result;
@ -1479,6 +1560,7 @@ nvk_image_plane_bind(struct nvk_device *dev,
plane->va->size_B); plane->va->size_B);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
return result; return result;
}
} else { } else {
plane->addr = mem->mem->va->addr + *offset_B; plane->addr = mem->mem->va->addr + *offset_B;
} }

View file

@ -16,6 +16,8 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <xf86drm.h> #include <xf86drm.h>
#include "clc597.h"
static bool static bool
drm_device_is_nouveau(const char *path) drm_device_is_nouveau(const char *path)
{ {
@ -99,6 +101,8 @@ nvkmd_nouveau_try_create_pdev(struct _drmDevice *drm_device,
.has_alloc_tiled = nouveau_ws_device_has_tiled_bo(ws_dev), .has_alloc_tiled = nouveau_ws_device_has_tiled_bo(ws_dev),
.has_map_fixed = true, .has_map_fixed = true,
.has_overmap = true, .has_overmap = true,
.has_compression = ws_dev->nouveau_version >= 0x01000401 &&
ws_dev->info.cls_eng3d >= TURING_A,
}; };
/* We get this ourselves */ /* We get this ourselves */

View file

@ -120,6 +120,7 @@ struct nvkmd_info {
bool has_alloc_tiled; bool has_alloc_tiled;
bool has_map_fixed; bool has_map_fixed;
bool has_overmap; bool has_overmap;
bool has_compression;
}; };
struct nvkmd_pdev_ops { struct nvkmd_pdev_ops {