tu: Rewrite dynamic descriptor handling

We need to prepare for storage buffers having different sizes from
uniform buffers. This switches dynamic_offset_offset to have units of
bytes, the same as offset, and as a nice bonus we can more easily
combine the dynamic and non-dynamic paths in various different places.
This also entails rewriting the code that patches dynamic descriptors,
since we can no longer assume a linear mapping between indices in
dynamicOffsets and descriptor locations which the previous approach
heavily relied on.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15288>
This commit is contained in:
Connor Abbott 2022-02-23 16:16:03 +01:00 committed by Marge Bot
parent 6f7f6df287
commit 5ba3ea1eb3
5 changed files with 104 additions and 119 deletions

View file

@ -1839,34 +1839,41 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
descriptors_state->sets[idx] = set;
for(unsigned j = 0; j < set->layout->dynamic_offset_count; ++j, ++dyn_idx) {
/* update the contents of the dynamic descriptor set */
unsigned src_idx = j;
unsigned dst_idx = j + layout->set[idx].dynamic_offset_start;
assert(dyn_idx < dynamicOffsetCount);
if (!set->layout->dynamic_offset_size)
continue;
uint32_t *dst =
&descriptors_state->dynamic_descriptors[dst_idx * A6XX_TEX_CONST_DWORDS];
uint32_t *src =
&set->dynamic_descriptors[src_idx * A6XX_TEX_CONST_DWORDS];
uint32_t offset = pDynamicOffsets[dyn_idx];
uint32_t *src = set->dynamic_descriptors;
uint32_t *dst = descriptors_state->dynamic_descriptors +
layout->set[idx].dynamic_offset_start / 4;
for (unsigned j = 0; j < set->layout->binding_count; j++) {
struct tu_descriptor_set_binding_layout *binding =
&set->layout->binding[j];
if (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
binding->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
for (unsigned k = 0; k < binding->array_size; k++, dyn_idx++) {
assert(dyn_idx < dynamicOffsetCount);
uint32_t offset = pDynamicOffsets[dyn_idx];
memcpy(dst, src, binding->size);
/* Patch the storage/uniform descriptors right away. */
if (layout->set[idx].layout->dynamic_ubo & (1 << j)) {
/* Note: we can assume here that the addition won't roll over and
* change the SIZE field.
*/
uint64_t va = src[0] | ((uint64_t)src[1] << 32);
va += offset;
dst[0] = va;
dst[1] = va >> 32;
} else {
memcpy(dst, src, A6XX_TEX_CONST_DWORDS * 4);
/* Note: A6XX_TEX_CONST_5_DEPTH is always 0 */
uint64_t va = dst[4] | ((uint64_t)dst[5] << 32);
va += offset;
dst[4] = va;
dst[5] = va >> 32;
if (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
/* Note: we can assume here that the addition won't roll
* over and change the SIZE field.
*/
uint64_t va = src[0] | ((uint64_t)src[1] << 32);
va += offset;
dst[0] = va;
dst[1] = va >> 32;
} else {
/* Note: A6XX_TEX_CONST_5_DEPTH is always 0 */
uint64_t va = dst[4] | ((uint64_t)dst[5] << 32);
va += offset;
dst[4] = va;
dst[5] = va >> 32;
}
dst += binding->size / 4;
src += binding->size / 4;
}
}
}
}
@ -1882,10 +1889,11 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
addr[i] = set->va | 3;
}
if (layout->dynamic_offset_count) {
if (layout->dynamic_offset_size) {
/* allocate and fill out dynamic descriptor set */
struct tu_cs_memory dynamic_desc_set;
VkResult result = tu_cs_alloc(&cmd->sub_cs, layout->dynamic_offset_count,
VkResult result = tu_cs_alloc(&cmd->sub_cs,
layout->dynamic_offset_size / (4 * A6XX_TEX_CONST_DWORDS),
A6XX_TEX_CONST_DWORDS, &dynamic_desc_set);
if (result != VK_SUCCESS) {
cmd->record_result = result;
@ -1893,7 +1901,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
}
memcpy(dynamic_desc_set.map, descriptors_state->dynamic_descriptors,
layout->dynamic_offset_count * A6XX_TEX_CONST_DWORDS * 4);
layout->dynamic_offset_size);
addr[MAX_SETS] = dynamic_desc_set.iova | 3;
}

View file

@ -57,13 +57,8 @@ static uint32_t
descriptor_size(VkDescriptorType type)
{
switch (type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
/* These are remapped to the special driver-managed descriptor set,
* hence they don't take up any space in the original descriptor set:
* Input attachment doesn't use descriptor sets at all
*/
/* Input attachment doesn't use descriptor sets at all */
return 0;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
/* We make offsets and sizes all 16 dwords, to match how the hardware
@ -79,6 +74,13 @@ descriptor_size(VkDescriptorType type)
}
}
static bool
is_dynamic(VkDescriptorType type)
{
return type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC ||
type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
}
static uint32_t
mutable_descriptor_size(const VkMutableDescriptorTypeListVALVE *list)
{
@ -174,9 +176,8 @@ tu_CreateDescriptorSetLayout(
set_layout->shader_stages = 0;
set_layout->has_immutable_samplers = false;
set_layout->size = 0;
set_layout->dynamic_ubo = 0;
uint32_t dynamic_offset_count = 0;
uint32_t dynamic_offset_size = 0;
for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
const VkDescriptorSetLayoutBinding *binding = bindings + j;
@ -185,7 +186,7 @@ tu_CreateDescriptorSetLayout(
set_layout->binding[b].type = binding->descriptorType;
set_layout->binding[b].array_size = binding->descriptorCount;
set_layout->binding[b].offset = set_layout->size;
set_layout->binding[b].dynamic_offset_offset = dynamic_offset_count;
set_layout->binding[b].dynamic_offset_offset = dynamic_offset_size;
set_layout->binding[b].shader_stages = binding->stageFlags;
if (binding->descriptorType == VK_DESCRIPTOR_TYPE_MUTABLE_VALVE) {
@ -242,17 +243,12 @@ tu_CreateDescriptorSetLayout(
}
}
set_layout->size +=
binding->descriptorCount * set_layout->binding[b].size;
if (binding->descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC ||
binding->descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
if (binding->descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
STATIC_ASSERT(MAX_DYNAMIC_BUFFERS <= 8 * sizeof(set_layout->dynamic_ubo));
set_layout->dynamic_ubo |=
((1u << binding->descriptorCount) - 1) << dynamic_offset_count;
}
dynamic_offset_count += binding->descriptorCount;
if (is_dynamic(binding->descriptorType)) {
dynamic_offset_size +=
binding->descriptorCount * set_layout->binding[b].size;
} else {
set_layout->size +=
binding->descriptorCount * set_layout->binding[b].size;
}
set_layout->shader_stages |= binding->stageFlags;
@ -260,7 +256,7 @@ tu_CreateDescriptorSetLayout(
free(bindings);
set_layout->dynamic_offset_count = dynamic_offset_count;
set_layout->dynamic_offset_size = dynamic_offset_size;
*pSetLayout = tu_descriptor_set_layout_to_handle(set_layout);
@ -327,7 +323,9 @@ tu_GetDescriptorSetLayoutSupport(
uint64_t descriptor_sz;
if (binding->descriptorType == VK_DESCRIPTOR_TYPE_MUTABLE_VALVE) {
if (is_dynamic(binding->descriptorType)) {
descriptor_sz = 0;
} else if (binding->descriptorType == VK_DESCRIPTOR_TYPE_MUTABLE_VALVE) {
const VkMutableDescriptorTypeListVALVE *list =
&mutable_info->pMutableDescriptorTypeLists[i];
@ -400,21 +398,21 @@ tu_CreatePipelineLayout(VkDevice _device,
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
layout->num_sets = pCreateInfo->setLayoutCount;
layout->dynamic_offset_count = 0;
layout->dynamic_offset_size = 0;
unsigned dynamic_offset_count = 0;
unsigned dynamic_offset_size = 0;
for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) {
TU_FROM_HANDLE(tu_descriptor_set_layout, set_layout,
pCreateInfo->pSetLayouts[set]);
layout->set[set].layout = set_layout;
layout->set[set].dynamic_offset_start = dynamic_offset_size;
tu_descriptor_set_layout_ref(set_layout);
layout->set[set].dynamic_offset_start = dynamic_offset_count;
dynamic_offset_count += set_layout->dynamic_offset_count;
dynamic_offset_size += set_layout->dynamic_offset_size;
}
layout->dynamic_offset_count = dynamic_offset_count;
layout->dynamic_offset_size = dynamic_offset_size;
layout->push_constant_size = 0;
for (unsigned i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) {
@ -457,8 +455,7 @@ tu_descriptor_set_create(struct tu_device *device,
{
struct tu_descriptor_set *set;
unsigned dynamic_offset = sizeof(struct tu_descriptor_set);
unsigned mem_size = dynamic_offset +
A6XX_TEX_CONST_DWORDS * 4 * layout->dynamic_offset_count;
unsigned mem_size = dynamic_offset + layout->dynamic_offset_size;
if (pool->host_memory_base) {
if (pool->host_memory_end - pool->host_memory_ptr < mem_size)
@ -477,7 +474,7 @@ tu_descriptor_set_create(struct tu_device *device,
memset(set, 0, mem_size);
vk_object_base_init(&device->vk, &set->base, VK_OBJECT_TYPE_DESCRIPTOR_SET);
if (layout->dynamic_offset_count) {
if (layout->dynamic_offset_size) {
set->dynamic_descriptors = (uint32_t *)((uint8_t*)set + dynamic_offset);
}
@ -601,7 +598,7 @@ tu_CreateDescriptorPool(VkDevice _device,
TU_FROM_HANDLE(tu_device, device, _device);
struct tu_descriptor_pool *pool;
uint64_t size = sizeof(struct tu_descriptor_pool);
uint64_t bo_size = 0, bo_count = 0, dynamic_count = 0;
uint64_t bo_size = 0, bo_count = 0, dynamic_size = 0;
VkResult ret;
const VkMutableDescriptorTypeCreateInfoVALVE *mutable_info =
@ -615,7 +612,8 @@ tu_CreateDescriptorPool(VkDevice _device,
switch(pCreateInfo->pPoolSizes[i].type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
dynamic_count += pCreateInfo->pPoolSizes[i].descriptorCount;
dynamic_size += descriptor_size(device, pCreateInfo->pPoolSizes[i].type) *
pCreateInfo->pPoolSizes[i].descriptorCount;
break;
case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
if (mutable_info && i < mutable_info->mutableDescriptorTypeListCount &&
@ -643,7 +641,7 @@ tu_CreateDescriptorPool(VkDevice _device,
if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
uint64_t host_size = pCreateInfo->maxSets * sizeof(struct tu_descriptor_set);
host_size += sizeof(struct tu_bo*) * bo_count;
host_size += A6XX_TEX_CONST_DWORDS * 4 * dynamic_count;
host_size += dynamic_size;
size += host_size;
} else {
size += sizeof(struct tu_descriptor_pool_entry) * pCreateInfo->maxSets;
@ -960,36 +958,29 @@ tu_update_descriptor_sets(const struct tu_device *device,
const struct tu_descriptor_set_binding_layout *binding_layout =
set->layout->binding + writeset->dstBinding;
uint32_t *ptr = set->mapped_ptr;
if (writeset->descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
writeset->descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
ptr = set->dynamic_descriptors;
ptr += binding_layout->dynamic_offset_offset / 4;
} else {
ptr = set->mapped_ptr;
ptr += binding_layout->offset / 4;
}
/* for immutable samplers with push descriptors: */
const bool copy_immutable_samplers =
dstSetOverride && binding_layout->immutable_samplers_offset;
const struct tu_sampler *samplers =
tu_immutable_samplers(set->layout, binding_layout);
ptr += binding_layout->offset / 4;
ptr += (binding_layout->size / 4) * writeset->dstArrayElement;
for (j = 0; j < writeset->descriptorCount; ++j) {
switch(writeset->descriptorType) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: {
assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
unsigned idx = writeset->dstArrayElement + j;
idx += binding_layout->dynamic_offset_offset;
write_ubo_descriptor(set->dynamic_descriptors + A6XX_TEX_CONST_DWORDS * idx,
writeset->pBufferInfo + j);
break;
}
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
write_ubo_descriptor(ptr, writeset->pBufferInfo + j);
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
unsigned idx = writeset->dstArrayElement + j;
idx += binding_layout->dynamic_offset_offset;
write_buffer_descriptor(device, set->dynamic_descriptors + A6XX_TEX_CONST_DWORDS * idx,
writeset->pBufferInfo + j);
break;
}
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
write_buffer_descriptor(device, ptr, writeset->pBufferInfo + j);
break;
@ -1039,9 +1030,18 @@ tu_update_descriptor_sets(const struct tu_device *device,
dst_set->layout->binding + copyset->dstBinding;
uint32_t *src_ptr = src_set->mapped_ptr;
uint32_t *dst_ptr = dst_set->mapped_ptr;
src_ptr += src_binding_layout->offset / 4;
dst_ptr += dst_binding_layout->offset / 4;
if (src_binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
src_binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
src_ptr = src_set->dynamic_descriptors;
dst_ptr = dst_set->dynamic_descriptors;
src_ptr += src_binding_layout->dynamic_offset_offset / 4;
dst_ptr += dst_binding_layout->dynamic_offset_offset / 4;
} else {
src_ptr = src_set->mapped_ptr;
dst_ptr = dst_set->mapped_ptr;
src_ptr += src_binding_layout->offset / 4;
dst_ptr += dst_binding_layout->offset / 4;
}
src_ptr += src_binding_layout->size * copyset->srcArrayElement / 4;
dst_ptr += dst_binding_layout->size * copyset->dstArrayElement / 4;
@ -1052,23 +1052,7 @@ tu_update_descriptor_sets(const struct tu_device *device,
uint32_t copy_size = MIN2(src_binding_layout->size, dst_binding_layout->size);
for (j = 0; j < copyset->descriptorCount; ++j) {
switch (src_binding_layout->type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
unsigned src_idx = copyset->srcArrayElement + j;
unsigned dst_idx = copyset->dstArrayElement + j;
src_idx += src_binding_layout->dynamic_offset_offset;
dst_idx += dst_binding_layout->dynamic_offset_offset;
uint32_t *src_dynamic, *dst_dynamic;
src_dynamic = src_set->dynamic_descriptors + src_idx * A6XX_TEX_CONST_DWORDS;
dst_dynamic = dst_set->dynamic_descriptors + dst_idx * A6XX_TEX_CONST_DWORDS;
memcpy(dst_dynamic, src_dynamic, A6XX_TEX_CONST_DWORDS * 4);
break;
}
default:
memcpy(dst_ptr, src_ptr, copy_size);
}
memcpy(dst_ptr, src_ptr, copy_size);
src_ptr += src_binding_layout->size / 4;
dst_ptr += dst_binding_layout->size / 4;
@ -1138,9 +1122,7 @@ tu_CreateDescriptorUpdateTemplate(
switch (entry->descriptorType) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
dst_offset = (binding_layout->dynamic_offset_offset +
entry->dstArrayElement) * A6XX_TEX_CONST_DWORDS;
dst_stride = A6XX_TEX_CONST_DWORDS;
dst_offset = binding_layout->dynamic_offset_offset / 4;
break;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
case VK_DESCRIPTOR_TYPE_SAMPLER:
@ -1152,10 +1134,11 @@ tu_CreateDescriptorUpdateTemplate(
FALLTHROUGH;
default:
dst_offset = binding_layout->offset / 4;
dst_offset += (binding_layout->size * entry->dstArrayElement) / 4;
dst_stride = binding_layout->size / 4;
}
dst_offset += (binding_layout->size * entry->dstArrayElement) / 4;
dst_stride = binding_layout->size / 4;
templ->entry[i] = (struct tu_descriptor_update_template_entry) {
.descriptor_type = entry->descriptorType,
.descriptor_count = entry->descriptorCount,

View file

@ -43,8 +43,7 @@ struct tu_descriptor_set_binding_layout
uint32_t offset;
/* Index into the pDynamicOffsets array for dynamic descriptors, as well as
* the array of dynamic descriptors (offsetted by
/* Byte offset in the array of dynamic descriptors (offsetted by
* tu_pipeline_layout::set::dynamic_offset_start).
*/
uint32_t dynamic_offset_offset;
@ -80,13 +79,8 @@ struct tu_descriptor_set_layout
/* Shader stages affected by this descriptor set */
uint16_t shader_stages;
/* Number of dynamic offsets used by this descriptor set */
uint16_t dynamic_offset_count;
/* A bitfield of which dynamic buffers are ubo's, to make the
* descriptor-binding-time patching easier.
*/
uint32_t dynamic_ubo;
/* Size of dynamic offset descriptors used by this descriptor set */
uint16_t dynamic_offset_size;
bool has_immutable_samplers;
bool has_variable_descriptors;
@ -129,7 +123,7 @@ struct tu_pipeline_layout
uint32_t num_sets;
uint32_t push_constant_size;
uint32_t dynamic_offset_count;
uint32_t dynamic_offset_size;
};
static inline const struct tu_sampler *

View file

@ -174,7 +174,7 @@ tu6_emit_load_state(struct tu_pipeline *pipeline, bool compute)
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
base = MAX_SETS;
offset = (layout->set[i].dynamic_offset_start +
binding->dynamic_offset_offset) * A6XX_TEX_CONST_DWORDS;
binding->dynamic_offset_offset) / 4;
FALLTHROUGH;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
@ -207,7 +207,7 @@ tu6_emit_load_state(struct tu_pipeline *pipeline, bool compute)
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
base = MAX_SETS;
offset = (layout->set[i].dynamic_offset_start +
binding->dynamic_offset_offset) * A6XX_TEX_CONST_DWORDS;
binding->dynamic_offset_offset) / 4;
FALLTHROUGH;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
tu_foreach_stage(stage, stages) {

View file

@ -203,8 +203,8 @@ lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr,
switch (binding_layout->type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
base = layout->set[set].dynamic_offset_start +
binding_layout->dynamic_offset_offset;
base = (layout->set[set].dynamic_offset_start +
binding_layout->dynamic_offset_offset) / (4 * A6XX_TEX_CONST_DWORDS);
set = MAX_SETS;
break;
default: