mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 22:20:14 +01:00
hk: implement sparse
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33682>
This commit is contained in:
parent
3e7297a297
commit
678134add5
10 changed files with 570 additions and 61 deletions
|
|
@ -77,6 +77,23 @@ hk_get_bda_replay_addr(const VkBufferCreateInfo *pCreateInfo)
|
||||||
return addr;
|
return addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VkResult
|
||||||
|
hk_bind_scratch(struct hk_device *dev, struct agx_va *va, unsigned offset_B,
|
||||||
|
size_t size_B)
|
||||||
|
{
|
||||||
|
VkResult result = VK_SUCCESS;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < size_B; i += AIL_PAGESIZE) {
|
||||||
|
result = dev->dev.ops.bo_bind(&dev->dev, dev->sparse.write,
|
||||||
|
va->addr + offset_B + i, AIL_PAGESIZE, 0,
|
||||||
|
ASAHI_BIND_READ | ASAHI_BIND_WRITE, false);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
VKAPI_ATTR VkResult VKAPI_CALL
|
VKAPI_ATTR VkResult VKAPI_CALL
|
||||||
hk_CreateBuffer(VkDevice device, const VkBufferCreateInfo *pCreateInfo,
|
hk_CreateBuffer(VkDevice device, const VkBufferCreateInfo *pCreateInfo,
|
||||||
const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer)
|
const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer)
|
||||||
|
|
|
||||||
|
|
@ -47,3 +47,6 @@ hk_buffer_addr_range(const struct hk_buffer *buffer, uint64_t offset,
|
||||||
.range = vk_buffer_range(&buffer->vk, offset, range),
|
.range = vk_buffer_range(&buffer->vk, offset, range),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VkResult hk_bind_scratch(struct hk_device *dev, struct agx_va *va,
|
||||||
|
unsigned offs_B, size_t size_B);
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
#include "util/simple_mtx.h"
|
#include "util/simple_mtx.h"
|
||||||
#include "vulkan/vulkan_core.h"
|
#include "vulkan/vulkan_core.h"
|
||||||
#include "vulkan/wsi/wsi_common.h"
|
#include "vulkan/wsi/wsi_common.h"
|
||||||
|
#include "layout.h"
|
||||||
#include "vk_cmd_enqueue_entrypoints.h"
|
#include "vk_cmd_enqueue_entrypoints.h"
|
||||||
#include "vk_common_entrypoints.h"
|
#include "vk_common_entrypoints.h"
|
||||||
#include "vk_debug_utils.h"
|
#include "vk_debug_utils.h"
|
||||||
|
|
@ -57,7 +58,10 @@ hk_upload_rodata(struct hk_device *dev)
|
||||||
dev->rodata.bo =
|
dev->rodata.bo =
|
||||||
agx_bo_create(&dev->dev, AGX_SAMPLER_LENGTH, 0, 0, "Read only data");
|
agx_bo_create(&dev->dev, AGX_SAMPLER_LENGTH, 0, 0, "Read only data");
|
||||||
|
|
||||||
if (!dev->rodata.bo)
|
dev->sparse.write =
|
||||||
|
agx_bo_create(&dev->dev, AIL_PAGESIZE, 0, 0, "Sparse write page");
|
||||||
|
|
||||||
|
if (!dev->rodata.bo || !dev->sparse.write)
|
||||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||||
|
|
||||||
uint8_t *map = agx_bo_map(dev->rodata.bo);
|
uint8_t *map = agx_bo_map(dev->rodata.bo);
|
||||||
|
|
@ -481,6 +485,7 @@ fail_queue:
|
||||||
hk_queue_finish(dev, &dev->queue);
|
hk_queue_finish(dev, &dev->queue);
|
||||||
fail_rodata:
|
fail_rodata:
|
||||||
agx_bo_unreference(&dev->dev, dev->rodata.bo);
|
agx_bo_unreference(&dev->dev, dev->rodata.bo);
|
||||||
|
agx_bo_unreference(&dev->dev, dev->sparse.write);
|
||||||
fail_bg_eot:
|
fail_bg_eot:
|
||||||
agx_bg_eot_cleanup(&dev->bg_eot);
|
agx_bg_eot_cleanup(&dev->bg_eot);
|
||||||
fail_internal_shaders_2:
|
fail_internal_shaders_2:
|
||||||
|
|
@ -533,6 +538,7 @@ hk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
|
||||||
hk_descriptor_table_finish(dev, &dev->images);
|
hk_descriptor_table_finish(dev, &dev->images);
|
||||||
hk_descriptor_table_finish(dev, &dev->occlusion_queries);
|
hk_descriptor_table_finish(dev, &dev->occlusion_queries);
|
||||||
agx_bo_unreference(&dev->dev, dev->rodata.bo);
|
agx_bo_unreference(&dev->dev, dev->rodata.bo);
|
||||||
|
agx_bo_unreference(&dev->dev, dev->sparse.write);
|
||||||
agx_bo_unreference(&dev->dev, dev->heap);
|
agx_bo_unreference(&dev->dev, dev->heap);
|
||||||
agx_bg_eot_cleanup(&dev->bg_eot);
|
agx_bg_eot_cleanup(&dev->bg_eot);
|
||||||
agx_close_device(&dev->dev);
|
agx_close_device(&dev->dev);
|
||||||
|
|
|
||||||
|
|
@ -88,6 +88,14 @@ struct hk_device {
|
||||||
uint64_t geometry_state;
|
uint64_t geometry_state;
|
||||||
} rodata;
|
} rodata;
|
||||||
|
|
||||||
|
/* Pages for backing sparse resources */
|
||||||
|
struct {
|
||||||
|
/* Undefined content, should not be read (except for atomics where the
|
||||||
|
* result is already undefined).
|
||||||
|
*/
|
||||||
|
struct agx_bo *write;
|
||||||
|
} sparse;
|
||||||
|
|
||||||
struct hk_internal_shaders prolog_epilog;
|
struct hk_internal_shaders prolog_epilog;
|
||||||
struct hk_internal_shaders kernels;
|
struct hk_internal_shaders kernels;
|
||||||
struct hk_api_shader *write_shader;
|
struct hk_api_shader *write_shader;
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,8 @@
|
||||||
#include "util/u_math.h"
|
#include "util/u_math.h"
|
||||||
#include "vulkan/vulkan_core.h"
|
#include "vulkan/vulkan_core.h"
|
||||||
|
|
||||||
|
#include "agx_bo.h"
|
||||||
|
#include "hk_buffer.h"
|
||||||
#include "hk_device.h"
|
#include "hk_device.h"
|
||||||
#include "hk_device_memory.h"
|
#include "hk_device_memory.h"
|
||||||
#include "hk_entrypoints.h"
|
#include "hk_entrypoints.h"
|
||||||
|
|
@ -27,6 +29,11 @@
|
||||||
*/
|
*/
|
||||||
#define HK_PLANE_ALIGN_B 128
|
#define HK_PLANE_ALIGN_B 128
|
||||||
|
|
||||||
|
/* However, exposing the standard sparse block sizes requires using the standard
|
||||||
|
* alignment 65k.
|
||||||
|
*/
|
||||||
|
#define HK_SPARSE_ALIGN_B 65536
|
||||||
|
|
||||||
static VkFormatFeatureFlags2
|
static VkFormatFeatureFlags2
|
||||||
hk_get_image_plane_format_features(struct hk_physical_device *pdev,
|
hk_get_image_plane_format_features(struct hk_physical_device *pdev,
|
||||||
VkFormat vk_format, VkImageTiling tiling)
|
VkFormat vk_format, VkImageTiling tiling)
|
||||||
|
|
@ -241,6 +248,16 @@ hk_can_compress(const struct agx_device *dev, VkFormat format, unsigned plane,
|
||||||
if (dev->debug & AGX_DBG_NOCOMPRESS)
|
if (dev->debug & AGX_DBG_NOCOMPRESS)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
/* TODO: Handle compression with sparse. This should be doable but it's a bit
|
||||||
|
* subtle. Correctness first.
|
||||||
|
*/
|
||||||
|
if (flags & (VK_IMAGE_CREATE_SPARSE_ALIASED_BIT |
|
||||||
|
VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
|
||||||
|
VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT)) {
|
||||||
|
perf_debug_dev(dev, "No compression: sparse");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/* Image compression is not (yet?) supported with host image copies,
|
/* Image compression is not (yet?) supported with host image copies,
|
||||||
* although the vendor driver does support something similar if I recall.
|
* although the vendor driver does support something similar if I recall.
|
||||||
* Compression is not supported in hardware for storage images or mutable
|
* Compression is not supported in hardware for storage images or mutable
|
||||||
|
|
@ -404,11 +421,19 @@ hk_GetPhysicalDeviceImageFormatProperties2(
|
||||||
VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT)))
|
VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT)))
|
||||||
return VK_ERROR_FORMAT_NOT_SUPPORTED;
|
return VK_ERROR_FORMAT_NOT_SUPPORTED;
|
||||||
|
|
||||||
/* We don't yet support sparse, but it shouldn't be too hard */
|
/* Multiplane formats are not supported with sparse residency. This has no
|
||||||
if (pImageFormatInfo->flags & (VK_IMAGE_CREATE_SPARSE_ALIASED_BIT |
|
* known use cases and is forbidden in other APIs.
|
||||||
VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
|
*
|
||||||
VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT))
|
* Neither is depth/stencil: this is a hardware limitation on G13. Hardware
|
||||||
|
* support is added with G14, but that's not implemented yet. We could
|
||||||
|
* emulate on G13 but it'd be fiddly. Fortunately, vkd3d-proton doesn't need
|
||||||
|
* sparse depth, as RADV has the same limitation!
|
||||||
|
*/
|
||||||
|
if ((ycbcr_info ||
|
||||||
|
vk_format_is_depth_or_stencil(pImageFormatInfo->format)) &&
|
||||||
|
(pImageFormatInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT)) {
|
||||||
return VK_ERROR_FORMAT_NOT_SUPPORTED;
|
return VK_ERROR_FORMAT_NOT_SUPPORTED;
|
||||||
|
}
|
||||||
|
|
||||||
const uint32_t max_dim = 16384;
|
const uint32_t max_dim = 16384;
|
||||||
VkExtent3D maxExtent;
|
VkExtent3D maxExtent;
|
||||||
|
|
@ -610,18 +635,28 @@ hk_GetPhysicalDeviceImageFormatProperties2(
|
||||||
}
|
}
|
||||||
|
|
||||||
static VkSparseImageFormatProperties
|
static VkSparseImageFormatProperties
|
||||||
hk_fill_sparse_image_fmt_props(VkImageAspectFlags aspects)
|
hk_fill_sparse_image_fmt_props(enum pipe_format format, unsigned samples,
|
||||||
|
VkImageAspectFlags aspects)
|
||||||
{
|
{
|
||||||
/* TODO */
|
/* Apple tile sizes are exactly 16KiB. The Vulkan standard block sizes are
|
||||||
|
* sized to be exactly 64KiB. Fortunately, they correspond directly to the
|
||||||
|
* Apple sizes (except for MSAA 2x), just doubled in each dimensions. Our
|
||||||
|
* sparse binding code gangs together 4 hardware tiles into an API tile. We
|
||||||
|
* just need to derive the correct size here.
|
||||||
|
*/
|
||||||
|
unsigned blocksize_B = util_format_get_blocksize(format) * samples;
|
||||||
|
struct ail_tile ail_size = ail_get_max_tile_size(blocksize_B);
|
||||||
|
|
||||||
|
VkExtent3D granularity = {
|
||||||
|
ail_size.width_el * 2 * util_format_get_blockwidth(format),
|
||||||
|
ail_size.height_el * 2 * util_format_get_blockheight(format),
|
||||||
|
1,
|
||||||
|
};
|
||||||
|
|
||||||
return (VkSparseImageFormatProperties){
|
return (VkSparseImageFormatProperties){
|
||||||
.aspectMask = aspects,
|
.aspectMask = aspects,
|
||||||
.flags = VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT,
|
.flags = VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT,
|
||||||
.imageGranularity =
|
.imageGranularity = granularity,
|
||||||
{
|
|
||||||
.width = 1,
|
|
||||||
.height = 1,
|
|
||||||
.depth = 1,
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -672,7 +707,9 @@ hk_GetPhysicalDeviceSparseImageFormatProperties2(
|
||||||
|
|
||||||
vk_outarray_append_typed(VkSparseImageFormatProperties2, &out, props)
|
vk_outarray_append_typed(VkSparseImageFormatProperties2, &out, props)
|
||||||
{
|
{
|
||||||
props->properties = hk_fill_sparse_image_fmt_props(aspects);
|
props->properties = hk_fill_sparse_image_fmt_props(
|
||||||
|
vk_format_to_pipe_format(pFormatInfo->format), pFormatInfo->samples,
|
||||||
|
aspects);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -881,16 +918,35 @@ hk_image_plane_alloc_vma(struct hk_device *dev, struct hk_image_plane *plane,
|
||||||
assert(sparse_bound || !sparse_resident);
|
assert(sparse_bound || !sparse_resident);
|
||||||
|
|
||||||
if (sparse_bound) {
|
if (sparse_bound) {
|
||||||
plane->vma_size_B = plane->layout.size_B;
|
plane->va =
|
||||||
#if 0
|
agx_va_alloc(&dev->dev, align(plane->layout.size_B, HK_SPARSE_ALIGN_B),
|
||||||
plane->addr = nouveau_ws_alloc_vma(dev->ws_dev, 0, plane->vma_size_B,
|
AIL_PAGESIZE, 0, 0);
|
||||||
plane->layout.align_B,
|
plane->addr = plane->va->addr;
|
||||||
false, sparse_resident);
|
|
||||||
#endif
|
|
||||||
if (plane->addr == 0) {
|
if (plane->addr == 0) {
|
||||||
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||||
"Sparse VMA allocation failed");
|
"Sparse VMA allocation failed");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Bind scratch pages to discard writes, including from lowered software
|
||||||
|
* texture atomics. Reads will use the hardware texture unit sparse
|
||||||
|
* handling to properly handle residency queries.
|
||||||
|
*
|
||||||
|
* In the future we could optimize this out using the PBE sparse support
|
||||||
|
* but that needs more reverse-engineering.
|
||||||
|
*/
|
||||||
|
hk_bind_scratch(dev, plane->va, 0, plane->layout.size_B);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sparse_resident) {
|
||||||
|
plane->sparse_map =
|
||||||
|
agx_bo_create(&dev->dev, plane->layout.sparse_table_size_B,
|
||||||
|
AIL_PAGESIZE, 0, "Sparse map");
|
||||||
|
|
||||||
|
/* Zero-initialize the sparse map. This ensures all tiles are disabled,
|
||||||
|
* which provides correct behaviour for unmapped tiles.
|
||||||
|
*/
|
||||||
|
memset(agx_bo_map(plane->sparse_map), 0,
|
||||||
|
plane->layout.sparse_table_size_B);
|
||||||
}
|
}
|
||||||
|
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
|
|
@ -901,16 +957,11 @@ hk_image_plane_finish(struct hk_device *dev, struct hk_image_plane *plane,
|
||||||
VkImageCreateFlags create_flags,
|
VkImageCreateFlags create_flags,
|
||||||
const VkAllocationCallbacks *pAllocator)
|
const VkAllocationCallbacks *pAllocator)
|
||||||
{
|
{
|
||||||
if (plane->vma_size_B) {
|
if (plane->va) {
|
||||||
#if 0
|
agx_va_free(&dev->dev, plane->va, true);
|
||||||
const bool sparse_resident =
|
|
||||||
create_flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT;
|
|
||||||
|
|
||||||
agx_bo_unbind_vma(dev->ws_dev, plane->addr, plane->vma_size_B);
|
|
||||||
nouveau_ws_free_vma(dev->ws_dev, plane->addr, plane->vma_size_B,
|
|
||||||
false, sparse_resident);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
agx_bo_unreference(&dev->dev, plane->sparse_map);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -988,14 +1039,15 @@ hk_DestroyImage(VkDevice device, VkImage _image,
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
hk_image_plane_add_req(struct hk_image_plane *plane, uint64_t *size_B,
|
hk_image_plane_add_req(struct hk_image_plane *plane, bool sparse,
|
||||||
uint32_t *align_B)
|
uint64_t *size_B, uint32_t *align_B)
|
||||||
{
|
{
|
||||||
|
unsigned plane_align_B = sparse ? HK_SPARSE_ALIGN_B : HK_PLANE_ALIGN_B;
|
||||||
assert(util_is_power_of_two_or_zero64(*align_B));
|
assert(util_is_power_of_two_or_zero64(*align_B));
|
||||||
assert(util_is_power_of_two_or_zero64(HK_PLANE_ALIGN_B));
|
assert(util_is_power_of_two_or_zero64(plane_align_B));
|
||||||
|
|
||||||
*align_B = MAX2(*align_B, HK_PLANE_ALIGN_B);
|
*align_B = MAX2(*align_B, plane_align_B);
|
||||||
*size_B = align64(*size_B, HK_PLANE_ALIGN_B);
|
*size_B = align64(*size_B, plane_align_B);
|
||||||
*size_B += plane->layout.size_B;
|
*size_B += plane->layout.size_B;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1006,17 +1058,26 @@ hk_get_image_memory_requirements(struct hk_device *dev, struct hk_image *image,
|
||||||
{
|
{
|
||||||
struct hk_physical_device *pdev = hk_device_physical(dev);
|
struct hk_physical_device *pdev = hk_device_physical(dev);
|
||||||
uint32_t memory_types = (1 << pdev->mem_type_count) - 1;
|
uint32_t memory_types = (1 << pdev->mem_type_count) - 1;
|
||||||
|
bool sparse =
|
||||||
// TODO hope for the best?
|
image->vk.create_flags & (VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
|
||||||
|
VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT);
|
||||||
|
|
||||||
uint64_t size_B = 0;
|
uint64_t size_B = 0;
|
||||||
uint32_t align_B = 0;
|
uint32_t align_B = 0;
|
||||||
if (image->disjoint) {
|
if (image->disjoint) {
|
||||||
uint8_t plane = hk_image_aspects_to_plane(image, aspects);
|
uint8_t plane = hk_image_aspects_to_plane(image, aspects);
|
||||||
hk_image_plane_add_req(&image->planes[plane], &size_B, &align_B);
|
hk_image_plane_add_req(&image->planes[plane], sparse, &size_B, &align_B);
|
||||||
} else {
|
} else {
|
||||||
for (unsigned plane = 0; plane < image->plane_count; plane++)
|
for (unsigned plane = 0; plane < image->plane_count; plane++)
|
||||||
hk_image_plane_add_req(&image->planes[plane], &size_B, &align_B);
|
hk_image_plane_add_req(&image->planes[plane], sparse, &size_B,
|
||||||
|
&align_B);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* For sparse binding, we need to pad to the standard alignment so we don't
|
||||||
|
* clobber over things when we bind memory.
|
||||||
|
*/
|
||||||
|
if (sparse) {
|
||||||
|
size_B = align64(size_B, align_B);
|
||||||
}
|
}
|
||||||
|
|
||||||
pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
|
pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
|
||||||
|
|
@ -1079,17 +1140,38 @@ hk_fill_sparse_image_memory_reqs(const struct ail_layout *layout,
|
||||||
VkImageAspectFlags aspects)
|
VkImageAspectFlags aspects)
|
||||||
{
|
{
|
||||||
VkSparseImageFormatProperties sparse_format_props =
|
VkSparseImageFormatProperties sparse_format_props =
|
||||||
hk_fill_sparse_image_fmt_props(aspects);
|
hk_fill_sparse_image_fmt_props(layout->format, layout->sample_count_sa,
|
||||||
|
aspects);
|
||||||
|
|
||||||
// assert(layout->mip_tail_first_lod <= layout->num_levels);
|
unsigned tail_level = layout->mip_tail_first_lod;
|
||||||
|
assert(tail_level <= layout->levels);
|
||||||
VkSparseImageMemoryRequirements sparse_memory_reqs = {
|
VkSparseImageMemoryRequirements sparse_memory_reqs = {
|
||||||
.formatProperties = sparse_format_props,
|
.formatProperties = sparse_format_props,
|
||||||
.imageMipTailFirstLod = 0, // layout->mip_tail_first_lod,
|
.imageMipTailFirstLod = layout->mip_tail_first_lod,
|
||||||
.imageMipTailStride = 0,
|
.imageMipTailStride = 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
sparse_memory_reqs.imageMipTailSize = layout->size_B;
|
/* imageMipTailSize must be aligned to the sparse block size (65k). This
|
||||||
|
* requires us to manage the miptail manually, because 16k is the actual
|
||||||
|
* hardware alignment here so we need to give the illusion of extra
|
||||||
|
* padding. Annoying!
|
||||||
|
*/
|
||||||
|
if (tail_level == 0) {
|
||||||
|
sparse_memory_reqs.imageMipTailSize =
|
||||||
|
align(layout->size_B, HK_SPARSE_ALIGN_B);
|
||||||
|
|
||||||
sparse_memory_reqs.imageMipTailOffset = 0;
|
sparse_memory_reqs.imageMipTailOffset = 0;
|
||||||
|
} else if (tail_level < layout->levels) {
|
||||||
|
sparse_memory_reqs.imageMipTailSize =
|
||||||
|
align(layout->mip_tail_stride * layout->depth_px, HK_SPARSE_ALIGN_B);
|
||||||
|
|
||||||
|
/* TODO: sparse metadata */
|
||||||
|
sparse_memory_reqs.imageMipTailOffset = HK_MIP_TAIL_START_OFFSET;
|
||||||
|
} else {
|
||||||
|
sparse_memory_reqs.imageMipTailSize = 0;
|
||||||
|
sparse_memory_reqs.imageMipTailOffset = HK_MIP_TAIL_START_OFFSET;
|
||||||
|
}
|
||||||
|
|
||||||
return sparse_memory_reqs;
|
return sparse_memory_reqs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1176,8 +1258,10 @@ hk_get_image_subresource_layout(UNUSED struct hk_device *dev,
|
||||||
uint64_t offset_B = 0;
|
uint64_t offset_B = 0;
|
||||||
if (!image->disjoint) {
|
if (!image->disjoint) {
|
||||||
uint32_t align_B = 0;
|
uint32_t align_B = 0;
|
||||||
|
/* TODO: sparse? */
|
||||||
for (unsigned plane = 0; plane < p; plane++)
|
for (unsigned plane = 0; plane < p; plane++)
|
||||||
hk_image_plane_add_req(&image->planes[plane], &offset_B, &align_B);
|
hk_image_plane_add_req(&image->planes[plane], false, &offset_B,
|
||||||
|
&align_B);
|
||||||
}
|
}
|
||||||
offset_B +=
|
offset_B +=
|
||||||
ail_get_layer_level_B(&plane->layout, isr->arrayLayer, isr->mipLevel);
|
ail_get_layer_level_B(&plane->layout, isr->arrayLayer, isr->mipLevel);
|
||||||
|
|
@ -1245,12 +1329,12 @@ hk_image_plane_bind(struct hk_device *dev, struct hk_image_plane *plane,
|
||||||
{
|
{
|
||||||
*offset_B = align64(*offset_B, HK_PLANE_ALIGN_B);
|
*offset_B = align64(*offset_B, HK_PLANE_ALIGN_B);
|
||||||
|
|
||||||
if (plane->vma_size_B) {
|
if (plane->va) {
|
||||||
#if 0
|
#if 0
|
||||||
agx_bo_bind_vma(dev->ws_dev,
|
agx_bo_bind_vma(dev->ws_dev,
|
||||||
mem->bo,
|
mem->bo,
|
||||||
plane->addr,
|
plane->addr,
|
||||||
plane->vma_size_B,
|
plane->va,
|
||||||
*offset_B,
|
*offset_B,
|
||||||
plane->nil.pte_kind);
|
plane->nil.pte_kind);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -52,13 +52,16 @@ hk_get_image_format_features(struct hk_physical_device *pdevice,
|
||||||
struct hk_image_plane {
|
struct hk_image_plane {
|
||||||
struct ail_layout layout;
|
struct ail_layout layout;
|
||||||
uint64_t addr;
|
uint64_t addr;
|
||||||
|
struct agx_va *va;
|
||||||
/** Size of the reserved VMA range for sparse images, zero otherwise. */
|
|
||||||
uint64_t vma_size_B;
|
|
||||||
|
|
||||||
/* For host image copy */
|
/* For host image copy */
|
||||||
void *map;
|
void *map;
|
||||||
uint32_t rem;
|
uint32_t rem;
|
||||||
|
|
||||||
|
/* If the image has sparse residency, its residency is tracked in this
|
||||||
|
* secondary page table. Otherwise, this map is NULL.
|
||||||
|
*/
|
||||||
|
struct agx_bo *sparse_map;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct hk_image {
|
struct hk_image {
|
||||||
|
|
|
||||||
|
|
@ -198,7 +198,8 @@ pack_texture(struct hk_image_view *view, unsigned view_plane,
|
||||||
{
|
{
|
||||||
struct hk_image *image = container_of(view->vk.image, struct hk_image, vk);
|
struct hk_image *image = container_of(view->vk.image, struct hk_image, vk);
|
||||||
const uint8_t image_plane = view->planes[view_plane].image_plane;
|
const uint8_t image_plane = view->planes[view_plane].image_plane;
|
||||||
struct ail_layout *layout = &image->planes[image_plane].layout;
|
struct hk_image_plane *plane = &image->planes[image_plane];
|
||||||
|
struct ail_layout *layout = &plane->layout;
|
||||||
uint64_t base_addr = hk_image_base_address(image, image_plane);
|
uint64_t base_addr = hk_image_base_address(image, image_plane);
|
||||||
|
|
||||||
bool cubes_to_2d = usage != HK_DESC_USAGE_SAMPLED;
|
bool cubes_to_2d = usage != HK_DESC_USAGE_SAMPLED;
|
||||||
|
|
@ -282,6 +283,42 @@ pack_texture(struct hk_image_view *view, unsigned view_plane,
|
||||||
cfg.last_level = level + view->vk.level_count - 1;
|
cfg.last_level = level + view->vk.level_count - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* To implement sparse resident textures, the hardware texture descriptor
|
||||||
|
* can instead point to a secondary page table controlled in userspace.
|
||||||
|
* This allows remapping pages and - crucially - disabling unmapped pages
|
||||||
|
* to read zero and report non-resident with shader residency queries.
|
||||||
|
* When we have a sparse map, we need to point to it here.
|
||||||
|
*
|
||||||
|
* However, there's a wrinkle: when handling uncompressed views of
|
||||||
|
* compressed images in the above code, we need to offset the image
|
||||||
|
* address to point to the specific mip level rather than use the hardware
|
||||||
|
* "first level" field. This ensures the layouts are consistent despite us
|
||||||
|
* munging the image dimensions. In that case, we need to also offset the
|
||||||
|
* sparse page table accordingly. Of course, the sparse page table is in
|
||||||
|
* terms of pages, so this trick only works when the mip level is
|
||||||
|
* page-aligned.
|
||||||
|
*
|
||||||
|
* However, if the mip level is NOT page-aligned, it is in the mip tail by
|
||||||
|
* definition. As the mip tail is always resident, there is no need for a
|
||||||
|
* sparse page table. So either:
|
||||||
|
*
|
||||||
|
* 1. We are in the mip tail and don't need a sparse map, or
|
||||||
|
* 2. We are not but the level is page-aligned in the sparse map.
|
||||||
|
*
|
||||||
|
* Either way we're okay.
|
||||||
|
*/
|
||||||
|
if (plane->sparse_map && level < layout->mip_tail_first_lod) {
|
||||||
|
unsigned page = 0;
|
||||||
|
if (denom.x > 1) {
|
||||||
|
page = ail_bytes_to_pages(layout->level_offsets_B[level]);
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg.mode = AGX_IMAGE_MODE_SPARSE;
|
||||||
|
cfg.address = plane->sparse_map->va->addr +
|
||||||
|
ail_page_to_sparse_index_el(layout, layer, page) *
|
||||||
|
AIL_SPARSE_ELSIZE_B;
|
||||||
|
}
|
||||||
|
|
||||||
cfg.srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
|
cfg.srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
|
||||||
cfg.unk_mipmapped = layout->levels > 1;
|
cfg.unk_mipmapped = layout->levels > 1;
|
||||||
cfg.srgb_2_channel = cfg.srgb && util_format_colormask(desc) == 0x3;
|
cfg.srgb_2_channel = cfg.srgb && util_format_colormask(desc) == 0x3;
|
||||||
|
|
|
||||||
|
|
@ -337,6 +337,7 @@ lower_image_intrin(nir_builder *b, nir_intrinsic_instr *intr,
|
||||||
/* Reads and queries use the texture descriptor; writes and atomics PBE. */
|
/* Reads and queries use the texture descriptor; writes and atomics PBE. */
|
||||||
unsigned offs;
|
unsigned offs;
|
||||||
if (intr->intrinsic != nir_intrinsic_image_deref_load &&
|
if (intr->intrinsic != nir_intrinsic_image_deref_load &&
|
||||||
|
intr->intrinsic != nir_intrinsic_image_deref_sparse_load &&
|
||||||
intr->intrinsic != nir_intrinsic_image_deref_size &&
|
intr->intrinsic != nir_intrinsic_image_deref_size &&
|
||||||
intr->intrinsic != nir_intrinsic_image_deref_samples) {
|
intr->intrinsic != nir_intrinsic_image_deref_samples) {
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -212,6 +212,7 @@ hk_get_device_extensions(const struct hk_instance *instance,
|
||||||
|
|
||||||
static void
|
static void
|
||||||
hk_get_device_features(
|
hk_get_device_features(
|
||||||
|
const struct agx_device *dev,
|
||||||
const struct vk_device_extension_table *supported_extensions,
|
const struct vk_device_extension_table *supported_extensions,
|
||||||
struct vk_features *features)
|
struct vk_features *features)
|
||||||
{
|
{
|
||||||
|
|
@ -260,15 +261,28 @@ hk_get_device_features(
|
||||||
.shaderFloat64 = false,
|
.shaderFloat64 = false,
|
||||||
.shaderInt64 = true,
|
.shaderInt64 = true,
|
||||||
.shaderInt16 = true,
|
.shaderInt16 = true,
|
||||||
.shaderResourceResidency = false,
|
.shaderResourceResidency = true,
|
||||||
.shaderResourceMinLod = true,
|
.shaderResourceMinLod = true,
|
||||||
.sparseBinding = false,
|
.sparseBinding = true,
|
||||||
|
|
||||||
|
/* We probably could advertise multisampled sparse but we don't have a use
|
||||||
|
* case yet and it isn't trivial.
|
||||||
|
*/
|
||||||
.sparseResidency2Samples = false,
|
.sparseResidency2Samples = false,
|
||||||
.sparseResidency4Samples = false,
|
.sparseResidency4Samples = false,
|
||||||
.sparseResidency8Samples = false,
|
.sparseResidency8Samples = false,
|
||||||
.sparseResidencyAliased = false,
|
.sparseResidencyAliased = true,
|
||||||
.sparseResidencyBuffer = false,
|
.sparseResidencyImage2D = true,
|
||||||
.sparseResidencyImage2D = false,
|
|
||||||
|
/* We depend on soft fault to implement sparse residency on buffers with
|
||||||
|
* the appropriate semantics. Lifting this requirement would be possible
|
||||||
|
* but challenging, given the requirements imposed by
|
||||||
|
* sparseResidencyNonResidentStrict.
|
||||||
|
*/
|
||||||
|
.sparseResidencyBuffer =
|
||||||
|
(dev->params.feat_compat & DRM_ASAHI_FEAT_SOFT_FAULTS),
|
||||||
|
|
||||||
|
/* This needs investigation. */
|
||||||
.sparseResidencyImage3D = false,
|
.sparseResidencyImage3D = false,
|
||||||
.variableMultisampleRate = false,
|
.variableMultisampleRate = false,
|
||||||
.inheritedQueries = true,
|
.inheritedQueries = true,
|
||||||
|
|
@ -736,10 +750,18 @@ hk_get_device_properties(const struct agx_device *dev,
|
||||||
.nonCoherentAtomSize = 64,
|
.nonCoherentAtomSize = 64,
|
||||||
|
|
||||||
/* Vulkan 1.0 sparse properties */
|
/* Vulkan 1.0 sparse properties */
|
||||||
.sparseResidencyNonResidentStrict = false,
|
.sparseResidencyNonResidentStrict = true,
|
||||||
.sparseResidencyAlignedMipSize = false,
|
.sparseResidencyAlignedMipSize = false,
|
||||||
.sparseResidencyStandard2DBlockShape = false,
|
.sparseResidencyStandard2DBlockShape = true,
|
||||||
|
|
||||||
|
/* We can implement the standard block size for MSAA 4x but maybe not MSAA
|
||||||
|
* 2x?
|
||||||
|
*/
|
||||||
.sparseResidencyStandard2DMultisampleBlockShape = false,
|
.sparseResidencyStandard2DMultisampleBlockShape = false,
|
||||||
|
|
||||||
|
/* As far as I can tell, there is no way to implement this on G13. This
|
||||||
|
* is a shame because D3D12 requires it for FL12.2.
|
||||||
|
*/
|
||||||
.sparseResidencyStandard3DBlockShape = false,
|
.sparseResidencyStandard3DBlockShape = false,
|
||||||
|
|
||||||
/* Vulkan 1.1 properties */
|
/* Vulkan 1.1 properties */
|
||||||
|
|
@ -1166,7 +1188,8 @@ hk_create_drm_physical_device(struct vk_instance *_instance,
|
||||||
hk_get_device_extensions(instance, &supported_extensions);
|
hk_get_device_extensions(instance, &supported_extensions);
|
||||||
|
|
||||||
struct vk_features supported_features;
|
struct vk_features supported_features;
|
||||||
hk_get_device_features(&supported_extensions, &supported_features);
|
hk_get_device_features(&pdev->dev, &supported_extensions,
|
||||||
|
&supported_features);
|
||||||
|
|
||||||
struct vk_properties properties;
|
struct vk_properties properties;
|
||||||
hk_get_device_properties(&pdev->dev, instance, &properties);
|
hk_get_device_properties(&pdev->dev, instance, &properties);
|
||||||
|
|
@ -1216,10 +1239,9 @@ hk_create_drm_physical_device(struct vk_instance *_instance,
|
||||||
assert(pdev->mem_heap_count <= ARRAY_SIZE(pdev->mem_heaps));
|
assert(pdev->mem_heap_count <= ARRAY_SIZE(pdev->mem_heaps));
|
||||||
assert(pdev->mem_type_count <= ARRAY_SIZE(pdev->mem_types));
|
assert(pdev->mem_type_count <= ARRAY_SIZE(pdev->mem_types));
|
||||||
|
|
||||||
/* TODO: VK_QUEUE_SPARSE_BINDING_BIT*/
|
|
||||||
pdev->queue_families[pdev->queue_family_count++] = (struct hk_queue_family){
|
pdev->queue_families[pdev->queue_family_count++] = (struct hk_queue_family){
|
||||||
.queue_flags =
|
.queue_flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT |
|
||||||
VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
|
VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
|
||||||
|
|
||||||
.queue_count = 1,
|
.queue_count = 1,
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -5,9 +5,15 @@
|
||||||
* Copyright 2024 Valve Corporation
|
* Copyright 2024 Valve Corporation
|
||||||
* Copyright 2024 Alyssa Rosenzweig
|
* Copyright 2024 Alyssa Rosenzweig
|
||||||
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
||||||
|
* Copyright © 2016 Red Hat.
|
||||||
|
* Copyright © 2016 Bas Nieuwenhuizen
|
||||||
|
*
|
||||||
|
* based in part on anv driver which is:
|
||||||
|
* Copyright © 2015 Intel Corporation
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*/
|
*/
|
||||||
#include "hk_queue.h"
|
#include "hk_queue.h"
|
||||||
|
#include "hk_buffer.h"
|
||||||
|
|
||||||
#include "agx_bg_eot.h"
|
#include "agx_bg_eot.h"
|
||||||
#include "agx_bo.h"
|
#include "agx_bo.h"
|
||||||
|
|
@ -16,13 +22,17 @@
|
||||||
#include "decode.h"
|
#include "decode.h"
|
||||||
#include "hk_cmd_buffer.h"
|
#include "hk_cmd_buffer.h"
|
||||||
#include "hk_device.h"
|
#include "hk_device.h"
|
||||||
|
#include "hk_image.h"
|
||||||
#include "hk_physical_device.h"
|
#include "hk_physical_device.h"
|
||||||
|
|
||||||
#include <xf86drm.h>
|
#include <xf86drm.h>
|
||||||
#include "asahi/lib/unstable_asahi_drm.h"
|
#include "asahi/lib/unstable_asahi_drm.h"
|
||||||
#include "util/list.h"
|
#include "util/list.h"
|
||||||
|
#include "util/macros.h"
|
||||||
#include "vulkan/vulkan_core.h"
|
#include "vulkan/vulkan_core.h"
|
||||||
|
|
||||||
|
#include "hk_private.h"
|
||||||
|
#include "layout.h"
|
||||||
#include "vk_drm_syncobj.h"
|
#include "vk_drm_syncobj.h"
|
||||||
#include "vk_sync.h"
|
#include "vk_sync.h"
|
||||||
|
|
||||||
|
|
@ -426,10 +436,328 @@ queue_submit_looped(struct hk_device *dev, struct drm_asahi_submit *submit)
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct hk_bind_builder {
|
||||||
|
/* Initialized */
|
||||||
|
struct hk_device *dev;
|
||||||
|
struct vk_object_base *obj_base;
|
||||||
|
struct agx_va *va;
|
||||||
|
struct hk_image *image;
|
||||||
|
|
||||||
|
/* State */
|
||||||
|
struct hk_device_memory *mem;
|
||||||
|
VkDeviceSize resourceOffset;
|
||||||
|
VkDeviceSize size;
|
||||||
|
VkDeviceSize memoryOffset;
|
||||||
|
VkResult result;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline struct hk_bind_builder
|
||||||
|
hk_bind_builder(struct hk_device *dev, struct vk_object_base *obj_base,
|
||||||
|
struct agx_va *va, struct hk_image *image)
|
||||||
|
{
|
||||||
|
return (struct hk_bind_builder){
|
||||||
|
.dev = dev,
|
||||||
|
.obj_base = obj_base,
|
||||||
|
.va = va,
|
||||||
|
.image = image,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
hk_flush_bind(struct hk_bind_builder *b)
|
||||||
|
{
|
||||||
|
if (b->result != VK_SUCCESS || b->size == 0) {
|
||||||
|
return b->result;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t va_addr = b->va->addr + b->resourceOffset;
|
||||||
|
|
||||||
|
/* If we have an image with sparse residency, we have a userspace-managed
|
||||||
|
* sparse page table map, which we need to keep in sync with the real
|
||||||
|
* kernel-managed page table. This ensures textures get strict residency
|
||||||
|
* semantics, using the hardware sparse support.
|
||||||
|
*/
|
||||||
|
if (b->image && b->image->planes[0].sparse_map != NULL) {
|
||||||
|
assert(b->image->plane_count == 1 && "multiplane sparse not supported");
|
||||||
|
|
||||||
|
uint32_t *map = agx_bo_map(b->image->planes[0].sparse_map);
|
||||||
|
uint64_t size_page = ail_bytes_to_pages(b->size);
|
||||||
|
|
||||||
|
struct ail_layout *layout = &b->image->planes[0].layout;
|
||||||
|
uint64_t layer_stride_page = ail_bytes_to_pages(layout->layer_stride_B);
|
||||||
|
|
||||||
|
for (unsigned offs_page = 0; offs_page < size_page; offs_page++) {
|
||||||
|
/* Determine the target page to bind */
|
||||||
|
uint64_t target_page =
|
||||||
|
ail_bytes_to_pages(b->resourceOffset) + offs_page;
|
||||||
|
|
||||||
|
/* The page table is per-layer. Fortunately, layers are page-aligned,
|
||||||
|
* so we can divide to find the layer & the page relative to the start
|
||||||
|
* of the layer, which give us the index into the sparse map.
|
||||||
|
*
|
||||||
|
* Note that we can end up out-of-bounds since the hardware page size
|
||||||
|
* (16k) is smaller than the Vulkan standard sparse block size (65k).
|
||||||
|
* Just clamp out-of-bounds maps - there is sufficient VA space for
|
||||||
|
* them but not sufficient sparse map space for them.
|
||||||
|
*/
|
||||||
|
uint64_t z = target_page / layer_stride_page;
|
||||||
|
if (z >= layout->depth_px)
|
||||||
|
break;
|
||||||
|
|
||||||
|
uint64_t page_in_layer = target_page % layer_stride_page;
|
||||||
|
unsigned idx = ail_page_to_sparse_index_el(layout, z, page_in_layer);
|
||||||
|
|
||||||
|
agx_pack(map + idx, SPARSE_BLOCK, cfg) {
|
||||||
|
cfg.enabled = b->mem != NULL;
|
||||||
|
cfg.unknown = cfg.enabled;
|
||||||
|
|
||||||
|
if (cfg.enabled) {
|
||||||
|
cfg.address = va_addr + (offs_page * AIL_PAGESIZE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* When the app wants to unbind, replace the bound pages with scratch pages
|
||||||
|
* so we don't leave a gap.
|
||||||
|
*/
|
||||||
|
if (!b->mem) {
|
||||||
|
return hk_bind_scratch(b->dev, b->va, b->resourceOffset, b->size);
|
||||||
|
} else {
|
||||||
|
return b->dev->dev.ops.bo_bind(&b->dev->dev, b->mem->bo, va_addr, b->size,
|
||||||
|
b->memoryOffset,
|
||||||
|
ASAHI_BIND_READ | ASAHI_BIND_WRITE, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
hk_add_bind(struct hk_bind_builder *b, struct hk_device_memory *mem,
|
||||||
|
VkDeviceSize resourceOffset, VkDeviceSize size,
|
||||||
|
VkDeviceSize memoryOffset)
|
||||||
|
{
|
||||||
|
/* Discard trivial binds to simplify the below logic. */
|
||||||
|
if (size == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Try to merge with the previous bind */
|
||||||
|
if (b->size && b->mem == mem &&
|
||||||
|
resourceOffset == b->resourceOffset + b->size &&
|
||||||
|
(!mem || memoryOffset == b->memoryOffset + b->size)) {
|
||||||
|
|
||||||
|
b->size += size;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Otherwise, flush the previous bind and replace with the new one */
|
||||||
|
hk_flush_bind(b);
|
||||||
|
b->mem = mem;
|
||||||
|
b->resourceOffset = resourceOffset;
|
||||||
|
b->size = size;
|
||||||
|
b->memoryOffset = memoryOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
hk_sparse_buffer_bind_memory(struct hk_device *device,
|
||||||
|
const VkSparseBufferMemoryBindInfo *bind)
|
||||||
|
{
|
||||||
|
VK_FROM_HANDLE(hk_buffer, buffer, bind->buffer);
|
||||||
|
|
||||||
|
struct hk_bind_builder b =
|
||||||
|
hk_bind_builder(device, &buffer->vk.base, buffer->va, NULL);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < bind->bindCount; ++i) {
|
||||||
|
struct hk_device_memory *cur_mem = NULL;
|
||||||
|
|
||||||
|
if (bind->pBinds[i].memory != VK_NULL_HANDLE)
|
||||||
|
cur_mem = hk_device_memory_from_handle(bind->pBinds[i].memory);
|
||||||
|
|
||||||
|
hk_add_bind(&b, cur_mem, bind->pBinds[i].resourceOffset,
|
||||||
|
bind->pBinds[i].size, bind->pBinds[i].memoryOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
return hk_flush_bind(&b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
hk_sparse_image_opaque_bind_memory(
|
||||||
|
struct hk_device *device, const VkSparseImageOpaqueMemoryBindInfo *bind)
|
||||||
|
{
|
||||||
|
VK_FROM_HANDLE(hk_image, image, bind->image);
|
||||||
|
|
||||||
|
struct hk_bind_builder b =
|
||||||
|
hk_bind_builder(device, &image->vk.base, image->planes[0].va, image);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < bind->bindCount; ++i) {
|
||||||
|
struct hk_device_memory *mem = NULL;
|
||||||
|
if (bind->pBinds[i].memory != VK_NULL_HANDLE)
|
||||||
|
mem = hk_device_memory_from_handle(bind->pBinds[i].memory);
|
||||||
|
|
||||||
|
VkDeviceSize resourceOffset = bind->pBinds[i].resourceOffset;
|
||||||
|
|
||||||
|
/* Conceptually, the miptail is a single region at the end of the image,
|
||||||
|
* possibly layered. However, due to alignment requirements we need to
|
||||||
|
* use a non-layered miptail and internally fan out to each of the layers.
|
||||||
|
* This is facilitated by the HK_MIP_TAIL_START_OFFSET magic offset, see
|
||||||
|
* the comment where that is defined for more detail.
|
||||||
|
*/
|
||||||
|
if (resourceOffset >= HK_MIP_TAIL_START_OFFSET) {
|
||||||
|
assert(resourceOffset == HK_MIP_TAIL_START_OFFSET &&
|
||||||
|
"must bind whole miptail... maybe...");
|
||||||
|
|
||||||
|
const struct ail_layout *layout = &image->planes[0].layout;
|
||||||
|
unsigned tail_offset_B =
|
||||||
|
layout->level_offsets_B[layout->mip_tail_first_lod];
|
||||||
|
|
||||||
|
for (unsigned z = 0; z < layout->depth_px; ++z) {
|
||||||
|
uint64_t image_offs = tail_offset_B + (z * layout->layer_stride_B);
|
||||||
|
uint64_t mem_offs =
|
||||||
|
bind->pBinds[i].memoryOffset + (z * layout->mip_tail_stride);
|
||||||
|
|
||||||
|
hk_add_bind(&b, mem, image_offs, layout->mip_tail_stride, mem_offs);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
hk_add_bind(&b, mem, bind->pBinds[i].resourceOffset,
|
||||||
|
bind->pBinds[i].size, bind->pBinds[i].memoryOffset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return hk_flush_bind(&b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
bind_hw_tile(struct hk_bind_builder *b, struct hk_device_memory *mem,
|
||||||
|
struct ail_layout *layout, unsigned layer, unsigned level,
|
||||||
|
VkOffset3D offset, VkExtent3D extent, struct ail_tile std_size_el,
|
||||||
|
unsigned mem_offset, unsigned x, unsigned y, unsigned z)
|
||||||
|
{
|
||||||
|
uint64_t bo_offset_B = ail_get_twiddled_block_B(
|
||||||
|
layout, level, offset.x + x, offset.y + y, layer + offset.z + z);
|
||||||
|
|
||||||
|
/* Consider the standard tiles in the bound memory to be in raster order, and
|
||||||
|
* address accordingly in standard tiles.
|
||||||
|
*/
|
||||||
|
unsigned mem_x_stl = x / std_size_el.width_el;
|
||||||
|
unsigned mem_y_stl = y / std_size_el.height_el;
|
||||||
|
unsigned extent_w_stl = DIV_ROUND_UP(extent.width, std_size_el.width_el);
|
||||||
|
unsigned extent_y_stl = DIV_ROUND_UP(extent.height, std_size_el.height_el);
|
||||||
|
unsigned mem_offs_stl = (extent_y_stl * extent_w_stl * z) +
|
||||||
|
(extent_w_stl * mem_y_stl) + mem_x_stl;
|
||||||
|
|
||||||
|
/* There are 4 hardware tiles per standard tile, so offset
|
||||||
|
* accordingly for each hardware tile.
|
||||||
|
*/
|
||||||
|
unsigned mem_offset_B = mem_offset + (mem_offs_stl * 4 * AIL_PAGESIZE);
|
||||||
|
|
||||||
|
if (x % std_size_el.width_el)
|
||||||
|
mem_offset_B += AIL_PAGESIZE;
|
||||||
|
|
||||||
|
if (y % std_size_el.height_el)
|
||||||
|
mem_offset_B += (2 * AIL_PAGESIZE);
|
||||||
|
|
||||||
|
hk_add_bind(b, mem, bo_offset_B, AIL_PAGESIZE, mem_offset_B);
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
hk_sparse_image_bind_memory(struct hk_device *device,
|
||||||
|
const VkSparseImageMemoryBindInfo *bind)
|
||||||
|
{
|
||||||
|
VK_FROM_HANDLE(hk_image, image, bind->image);
|
||||||
|
struct ail_layout *layout = &image->planes[0].layout;
|
||||||
|
|
||||||
|
struct hk_bind_builder b =
|
||||||
|
hk_bind_builder(device, &image->vk.base, image->planes[0].va, image);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < bind->bindCount; ++i) {
|
||||||
|
struct hk_device_memory *mem = NULL;
|
||||||
|
if (bind->pBinds[i].memory != VK_NULL_HANDLE)
|
||||||
|
mem = hk_device_memory_from_handle(bind->pBinds[i].memory);
|
||||||
|
|
||||||
|
uint64_t mem_offset = bind->pBinds[i].memoryOffset;
|
||||||
|
const uint32_t layer = bind->pBinds[i].subresource.arrayLayer;
|
||||||
|
const uint32_t level = bind->pBinds[i].subresource.mipLevel;
|
||||||
|
|
||||||
|
VkExtent3D bind_extent = bind->pBinds[i].extent;
|
||||||
|
bind_extent.width = DIV_ROUND_UP(
|
||||||
|
bind_extent.width, vk_format_get_blockwidth(image->vk.format));
|
||||||
|
bind_extent.height = DIV_ROUND_UP(
|
||||||
|
bind_extent.height, vk_format_get_blockheight(image->vk.format));
|
||||||
|
|
||||||
|
VkOffset3D bind_offset = bind->pBinds[i].offset;
|
||||||
|
bind_offset.x /= vk_format_get_blockwidth(image->vk.format);
|
||||||
|
bind_offset.y /= vk_format_get_blockheight(image->vk.format);
|
||||||
|
|
||||||
|
/* Hardware tiles are exactly one page (16K) */
|
||||||
|
struct ail_tile tilesize_el = layout->tilesize_el[level];
|
||||||
|
unsigned size_B = tilesize_el.width_el * tilesize_el.height_el *
|
||||||
|
ail_get_blocksize_B(layout);
|
||||||
|
|
||||||
|
assert(size_B == AIL_PAGESIZE && "fundamental to AGX");
|
||||||
|
|
||||||
|
/* Standard tiles are exactly 4 pages (65K), consisting of a 2x2 grid of
|
||||||
|
* hardware tiles.
|
||||||
|
*/
|
||||||
|
struct ail_tile std_size_el = tilesize_el;
|
||||||
|
std_size_el.width_el *= 2;
|
||||||
|
std_size_el.height_el *= 2;
|
||||||
|
|
||||||
|
for (unsigned z = 0; z < bind_extent.depth; z += 1) {
|
||||||
|
for (unsigned y = 0; y < bind_extent.height;
|
||||||
|
y += tilesize_el.height_el) {
|
||||||
|
for (unsigned x = 0; x < bind_extent.width;
|
||||||
|
x += tilesize_el.width_el) {
|
||||||
|
bind_hw_tile(&b, mem, layout, layer, level, bind_offset,
|
||||||
|
bind_extent, std_size_el, mem_offset, x, y, z);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return hk_flush_bind(&b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
hk_queue_submit_bind_sparse_memory(struct hk_device *device,
|
||||||
|
struct vk_queue_submit *submission)
|
||||||
|
{
|
||||||
|
assert(submission->command_buffer_count == 0);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < submission->buffer_bind_count; ++i) {
|
||||||
|
VkResult result =
|
||||||
|
hk_sparse_buffer_bind_memory(device, submission->buffer_binds + i);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) {
|
||||||
|
VkResult result = hk_sparse_image_opaque_bind_memory(
|
||||||
|
device, submission->image_opaque_binds + i);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < submission->image_bind_count; ++i) {
|
||||||
|
VkResult result =
|
||||||
|
hk_sparse_image_bind_memory(device, submission->image_binds + i);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
static VkResult
|
static VkResult
|
||||||
queue_submit(struct hk_device *dev, struct hk_queue *queue,
|
queue_submit(struct hk_device *dev, struct hk_queue *queue,
|
||||||
struct vk_queue_submit *submit)
|
struct vk_queue_submit *submit)
|
||||||
{
|
{
|
||||||
|
/* TODO: Support asynchronous sparse queue? */
|
||||||
|
if (submit->buffer_bind_count || submit->image_bind_count ||
|
||||||
|
submit->image_opaque_bind_count) {
|
||||||
|
|
||||||
|
VkResult result = hk_queue_submit_bind_sparse_memory(dev, submit);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
unsigned command_count = 0;
|
unsigned command_count = 0;
|
||||||
|
|
||||||
/* Gather the number of individual commands to submit up front */
|
/* Gather the number of individual commands to submit up front */
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue