dzn: Add initial bindless infrastructure

When operating in "bindless" mode, the device will own 2 descriptor
heaps, one for views, and one for samplers. Every time a view is
created (image view, buffer view), a slot is allocated for it out
of the device view heap for each usage type (sampled vs storage).

Then, in a future change, descriptor sets will just contain view/
sampler indices instead of actual descriptors. Instead of copying
these to a cmdbuf-owned descriptor heap, we can directly bind the
descriptor set as a buffer. We'll also modify shaders to perform
an indirection and index into the device heap.

Buffers also get views set up on creation. In a perfect world, we
could just put addresses/sizes in the descriptor set, but DXIL
doesn't support loading from addresses, we need descriptors. When
robust buffer access is disabled *or* descriptor set buffer views
reference the remainder of the buffer, we can just re-use a view
from the buffer and use an offset.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21913>
This commit is contained in:
Jesse Natalie 2023-03-02 10:06:13 -08:00 committed by Marge Bot
parent f977c10f3c
commit 4c4431b674
4 changed files with 240 additions and 13 deletions

View file

@ -815,14 +815,14 @@ desc_type_to_heap_type(VkDescriptorType in)
}
}
static void
void
dzn_descriptor_heap_finish(struct dzn_descriptor_heap *heap)
{
if (heap->heap)
ID3D12DescriptorHeap_Release(heap->heap);
}
static VkResult
VkResult
dzn_descriptor_heap_init(struct dzn_descriptor_heap *heap,
struct dzn_device *device,
D3D12_DESCRIPTOR_HEAP_TYPE type,
@ -874,7 +874,7 @@ dzn_descriptor_heap_get_gpu_handle(const struct dzn_descriptor_heap *heap, uint3
};
}
static void
void
dzn_descriptor_heap_write_sampler_desc(struct dzn_device *device,
struct dzn_descriptor_heap *heap,
uint32_t desc_offset,
@ -933,7 +933,7 @@ dzn_descriptor_heap_write_image_view_desc(struct dzn_device *device,
}
}
static void
void
dzn_descriptor_heap_write_buffer_view_desc(struct dzn_device *device,
struct dzn_descriptor_heap *heap,
uint32_t desc_offset,
@ -969,7 +969,7 @@ dzn_descriptor_heap_write_buffer_desc(struct dzn_device *device,
assert(!writeable);
D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = {
.BufferLocation = info->buffer->gpuva + info->offset,
.SizeInBytes = ALIGN_POT(size, 256),
.SizeInBytes = MIN2(ALIGN_POT(size, 256), D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 4 * sizeof(float)),
};
ID3D12Device1_CreateConstantBufferView(device->dev, &cbv_desc, view_handle);
} else if (writeable) {

View file

@ -2168,6 +2168,12 @@ dzn_device_destroy(struct dzn_device *device, const VkAllocationCallbacks *pAllo
dzn_device_query_finish(device);
dzn_meta_finish(device);
dzn_foreach_pool_type(type) {
dzn_descriptor_heap_finish(&device->device_heaps[type].heap);
util_dynarray_fini(&device->device_heaps[type].slot_freelist);
mtx_destroy(&device->device_heaps[type].lock);
}
if (device->dev_config)
ID3D12DeviceConfiguration_Release(device->dev_config);
@ -2357,6 +2363,23 @@ dzn_device_create(struct dzn_physical_device *pdev,
device->need_swapchain_blits = true;
}
if (device->bindless) {
dzn_foreach_pool_type(type) {
uint32_t descriptor_count = type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER ?
D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE :
D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1;
result = dzn_descriptor_heap_init(&device->device_heaps[type].heap, device, type, descriptor_count, true);
if (result != VK_SUCCESS) {
dzn_device_destroy(device, pAllocator);
return result;
}
mtx_init(&device->device_heaps[type].lock, mtx_plain);
util_dynarray_init(&device->device_heaps[type].slot_freelist, NULL);
device->device_heaps[type].next_alloc_slot = 0;
}
}
assert(queue_count == qindex);
*out = dzn_device_to_handle(device);
return VK_SUCCESS;
@ -2705,6 +2728,9 @@ dzn_buffer_destroy(struct dzn_buffer *buf, const VkAllocationCallbacks *pAllocat
if (buf->res)
ID3D12Resource_Release(buf->res);
dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, buf->cbv_bindless_slot);
dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, buf->uav_bindless_slot);
vk_object_base_finish(&buf->base);
vk_free2(&device->vk.alloc, pAllocator, buf);
}
@ -2762,6 +2788,24 @@ dzn_buffer_create(struct dzn_device *device,
buf->valid_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
}
buf->cbv_bindless_slot = buf->uav_bindless_slot = -1;
if (device->bindless) {
if (buf->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) {
buf->cbv_bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
if (buf->cbv_bindless_slot < 0) {
dzn_buffer_destroy(buf, pAllocator);
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
}
if (buf->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) {
buf->uav_bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
if (buf->uav_bindless_slot < 0) {
dzn_buffer_destroy(buf, pAllocator);
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
}
}
*out = dzn_buffer_to_handle(buf);
return VK_SUCCESS;
}
@ -2958,6 +3002,30 @@ dzn_BindBufferMemory2(VkDevice _device,
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
buffer->gpuva = ID3D12Resource_GetGPUVirtualAddress(buffer->res);
if (device->bindless) {
struct dzn_buffer_desc buf_desc = {
.buffer = buffer,
.offset = 0,
.range = VK_WHOLE_SIZE,
};
if (buffer->cbv_bindless_slot >= 0) {
buf_desc.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
dzn_descriptor_heap_write_buffer_desc(device,
&device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap,
buffer->cbv_bindless_slot,
false,
&buf_desc);
}
if (buffer->uav_bindless_slot >= 0) {
buf_desc.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
dzn_descriptor_heap_write_buffer_desc(device,
&device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap,
buffer->uav_bindless_slot,
true,
&buf_desc);
}
}
}
return VK_SUCCESS;
@ -3103,6 +3171,8 @@ dzn_sampler_destroy(struct dzn_sampler *sampler,
struct dzn_device *device =
container_of(sampler->base.device, struct dzn_device, vk);
dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, sampler->bindless_slot);
vk_object_base_finish(&sampler->base);
vk_free2(&device->vk.alloc, pAllocator, sampler);
}
@ -3203,6 +3273,20 @@ dzn_sampler_create(struct dzn_device *device,
sampler->desc.Flags |= D3D12_SAMPLER_FLAG_NON_NORMALIZED_COORDINATES;
#endif
sampler->bindless_slot = -1;
if (device->bindless) {
sampler->bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
if (sampler->bindless_slot < 0) {
dzn_sampler_destroy(sampler, pAllocator);
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
dzn_descriptor_heap_write_sampler_desc(device,
&device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER].heap,
sampler->bindless_slot,
sampler);
}
*out = dzn_sampler_to_handle(sampler);
return VK_SUCCESS;
}
@ -3225,6 +3309,39 @@ dzn_DestroySampler(VkDevice device,
dzn_sampler_destroy(dzn_sampler_from_handle(sampler), pAllocator);
}
int
dzn_device_descriptor_heap_alloc_slot(struct dzn_device *device,
D3D12_DESCRIPTOR_HEAP_TYPE type)
{
struct dzn_device_descriptor_heap *heap = &device->device_heaps[type];
mtx_lock(&heap->lock);
int ret = -1;
if (heap->slot_freelist.size)
ret = util_dynarray_pop(&heap->slot_freelist, int);
else if (heap->next_alloc_slot < heap->heap.desc_count)
ret = heap->next_alloc_slot++;
mtx_unlock(&heap->lock);
return ret;
}
void
dzn_device_descriptor_heap_free_slot(struct dzn_device *device,
D3D12_DESCRIPTOR_HEAP_TYPE type,
int slot)
{
struct dzn_device_descriptor_heap *heap = &device->device_heaps[type];
assert(slot < 0 || slot < heap->heap.desc_count);
if (slot < 0)
return;
mtx_lock(&heap->lock);
util_dynarray_append(&heap->slot_freelist, int, slot);
mtx_unlock(&heap->lock);
}
VKAPI_ATTR void VKAPI_CALL
dzn_GetDeviceGroupPeerMemoryFeatures(VkDevice device,
uint32_t heapIndex,

View file

@ -1362,6 +1362,9 @@ dzn_image_view_destroy(struct dzn_image_view *iview,
struct dzn_device *device = container_of(iview->vk.base.device, struct dzn_device, vk);
dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, iview->srv_bindless_slot);
dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, iview->uav_bindless_slot);
vk_image_view_finish(&iview->vk);
vk_free2(&device->vk.alloc, pAllocator, iview);
}
@ -1372,6 +1375,7 @@ dzn_image_view_create(struct dzn_device *device,
const VkAllocationCallbacks *pAllocator,
VkImageView *out)
{
VK_FROM_HANDLE(dzn_image, image, pCreateInfo->image);
struct dzn_image_view *iview =
vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*iview), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@ -1380,6 +1384,36 @@ dzn_image_view_create(struct dzn_device *device,
dzn_image_view_init(device, iview, pCreateInfo);
iview->srv_bindless_slot = iview->uav_bindless_slot = -1;
if (device->bindless) {
if (!(image->desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE)) {
iview->srv_bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
if (iview->srv_bindless_slot < 0) {
dzn_image_view_destroy(iview, pAllocator);
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
dzn_descriptor_heap_write_image_view_desc(device,
&device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap,
iview->srv_bindless_slot,
false, false,
iview);
}
if (iview->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) {
iview->uav_bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
if (iview->uav_bindless_slot < 0) {
dzn_image_view_destroy(iview, pAllocator);
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
dzn_descriptor_heap_write_image_view_desc(device,
&device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap,
iview->uav_bindless_slot,
true, true,
iview);
}
}
*out = dzn_image_view_to_handle(iview);
return VK_SUCCESS;
}
@ -1411,6 +1445,9 @@ dzn_buffer_view_destroy(struct dzn_buffer_view *bview,
struct dzn_device *device = container_of(bview->base.device, struct dzn_device, vk);
dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, bview->srv_bindless_slot);
dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, bview->uav_bindless_slot);
vk_object_base_finish(&bview->base);
vk_free2(&device->vk.alloc, pAllocator, bview);
}
@ -1438,6 +1475,7 @@ dzn_buffer_view_create(struct dzn_device *device,
buf->size - pCreateInfo->offset : pCreateInfo->range;
bview->buffer = buf;
bview->srv_bindless_slot = bview->uav_bindless_slot = -1;
if (buf->usage &
(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)) {
@ -1452,6 +1490,16 @@ dzn_buffer_view_create(struct dzn_device *device,
.Flags = D3D12_BUFFER_SRV_FLAG_NONE,
},
};
if (device->bindless) {
bview->srv_bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
if (bview->srv_bindless_slot < 0) {
dzn_buffer_view_destroy(bview, pAllocator);
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
dzn_descriptor_heap_write_buffer_view_desc(device, &device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap,
bview->srv_bindless_slot, false, bview);
}
}
if (buf->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) {
@ -1464,6 +1512,16 @@ dzn_buffer_view_create(struct dzn_device *device,
.Flags = D3D12_BUFFER_UAV_FLAG_NONE,
},
};
if (device->bindless) {
bview->uav_bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
if (bview->uav_bindless_slot < 0) {
dzn_buffer_view_destroy(bview, pAllocator);
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
dzn_descriptor_heap_write_buffer_view_desc(device, &device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap,
bview->uav_bindless_slot, true, bview);
}
}
*out = dzn_buffer_view_to_handle(bview);

View file

@ -256,6 +256,23 @@ struct dzn_queue {
uint64_t fence_point;
};
struct dzn_descriptor_heap {
ID3D12DescriptorHeap *heap;
SIZE_T cpu_base;
uint64_t gpu_base;
uint32_t desc_count;
uint32_t desc_sz;
};
struct dzn_device_descriptor_heap {
struct dzn_descriptor_heap heap;
mtx_t lock;
struct util_dynarray slot_freelist;
uint32_t next_alloc_slot;
};
#define NUM_POOL_TYPES D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER + 1
struct dzn_device {
struct vk_device vk;
struct vk_device_extension_table enabled_extensions;
@ -283,6 +300,9 @@ struct dzn_device {
*/
bool need_swapchain_blits;
struct dzn_queue *swapchain_queue;
bool bindless;
struct dzn_device_descriptor_heap device_heaps[NUM_POOL_TYPES];
};
void dzn_meta_finish(struct dzn_device *device);
@ -345,7 +365,6 @@ enum dzn_cmd_dirty {
#define MAX_PUSH_CONSTANT_DWORDS 32
#define NUM_BIND_POINT VK_PIPELINE_BIND_POINT_COMPUTE + 1
#define NUM_POOL_TYPES D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER + 1
#define dzn_foreach_pool_type(type) \
for (D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; \
@ -369,6 +388,7 @@ struct dzn_descriptor_state {
struct dzn_sampler;
struct dzn_image_view;
struct dzn_buffer_view;
struct dzn_buffer_desc {
VkDescriptorType type;
@ -380,13 +400,15 @@ struct dzn_buffer_desc {
#define MAX_DESCS_PER_SAMPLER_HEAP 2048u
#define MAX_DESCS_PER_CBV_SRV_UAV_HEAP 1000000u
struct dzn_descriptor_heap {
ID3D12DescriptorHeap *heap;
SIZE_T cpu_base;
uint64_t gpu_base;
uint32_t desc_count;
uint32_t desc_sz;
};
VkResult
dzn_descriptor_heap_init(struct dzn_descriptor_heap *heap,
struct dzn_device *device,
D3D12_DESCRIPTOR_HEAP_TYPE type,
uint32_t desc_count,
bool shader_visible);
void
dzn_descriptor_heap_finish(struct dzn_descriptor_heap *heap);
D3D12_CPU_DESCRIPTOR_HANDLE
dzn_descriptor_heap_get_cpu_handle(const struct dzn_descriptor_heap *heap, uint32_t slot);
@ -402,6 +424,13 @@ dzn_descriptor_heap_write_image_view_desc(struct dzn_device *device,
bool cube_as_2darray,
const struct dzn_image_view *iview);
void
dzn_descriptor_heap_write_buffer_view_desc(struct dzn_device *device,
struct dzn_descriptor_heap *heap,
uint32_t heap_offset,
bool writeable,
const struct dzn_buffer_view *bview);
void
dzn_descriptor_heap_write_buffer_desc(struct dzn_device *device,
struct dzn_descriptor_heap *heap,
@ -409,6 +438,12 @@ dzn_descriptor_heap_write_buffer_desc(struct dzn_device *device,
bool writeable,
const struct dzn_buffer_desc *bdesc);
void
dzn_descriptor_heap_write_sampler_desc(struct dzn_device *device,
struct dzn_descriptor_heap *heap,
uint32_t desc_offset,
const struct dzn_sampler *sampler);
void
dzn_descriptor_heap_copy(struct dzn_device *device,
struct dzn_descriptor_heap *dst_heap, uint32_t dst_heap_offset,
@ -449,6 +484,15 @@ dzn_descriptor_heap_pool_alloc_slots(struct dzn_descriptor_heap_pool *pool,
struct dzn_descriptor_heap **heap,
uint32_t *first_slot);
int
dzn_device_descriptor_heap_alloc_slot(struct dzn_device *device,
D3D12_DESCRIPTOR_HEAP_TYPE type);
void
dzn_device_descriptor_heap_free_slot(struct dzn_device *device,
D3D12_DESCRIPTOR_HEAP_TYPE type,
int slot);
struct dzn_cmd_buffer_query_range {
struct dzn_query_pool *qpool;
uint32_t start, count;
@ -1025,6 +1069,8 @@ struct dzn_image_view {
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
D3D12_RENDER_TARGET_VIEW_DESC rtv_desc;
D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc;
int srv_bindless_slot;
int uav_bindless_slot;
};
void
@ -1048,6 +1094,9 @@ struct dzn_buffer {
D3D12_BARRIER_ACCESS valid_access;
D3D12_GPU_VIRTUAL_ADDRESS gpuva;
int cbv_bindless_slot;
int uav_bindless_slot;
};
DXGI_FORMAT
@ -1075,12 +1124,15 @@ struct dzn_buffer_view {
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc;
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
int srv_bindless_slot;
int uav_bindless_slot;
};
struct dzn_sampler {
struct vk_object_base base;
D3D12_SAMPLER_DESC2 desc;
D3D12_STATIC_BORDER_COLOR static_border_color;
int bindless_slot;
};
/* This is defined as a macro so that it works for both