diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 86350dd6258..300de3dc906 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -673,10 +673,11 @@ void anv_CmdBindPipeline( assert(layout->set[s].dynamic_offset_start < MAX_DYNAMIC_BUFFERS); if (layout->set[s].layout->dynamic_offset_count > 0 && - (push->desc_offsets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) != layout->set[s].dynamic_offset_start) { - push->desc_offsets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK; - push->desc_offsets[s] |= (layout->set[s].dynamic_offset_start & - ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK); + (push->desc_surface_offsets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) != + layout->set[s].dynamic_offset_start) { + push->desc_surface_offsets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK; + push->desc_surface_offsets[s] |= (layout->set[s].dynamic_offset_start & + ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK); modified = true; } } @@ -788,16 +789,16 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer, /* When using indirect descriptors, stages that have access to the HW * binding tables, never need to access the - * anv_push_constants::desc_offsets fields, because any data they need - * from the descriptor buffer is accessible through a binding table - * entry. For stages that are "bindless" (Mesh/Task/RT), we need to - * provide anv_push_constants::desc_offsets matching the bound + * anv_push_constants::desc_surface_offsets fields, because any data + * they need from the descriptor buffer is accessible through a binding + * table entry. For stages that are "bindless" (Mesh/Task/RT), we need + * to provide anv_push_constants::desc_surface_offsets matching the bound * descriptor so that shaders can access the descriptor buffer through * A64 messages. * * With direct descriptors, the shaders can use the - * anv_push_constants::desc_offsets to build bindless offsets. So it's - * we always need to update the push constant data. + * anv_push_constants::desc_surface_offsets to build bindless offsets. + * So it's we always need to update the push constant data. */ bool update_desc_sets = !cmd_buffer->device->physical->indirect_descriptors || @@ -813,18 +814,20 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer, if (update_desc_sets) { struct anv_push_constants *push = &pipe_state->push_constants; - struct anv_address set_addr = anv_descriptor_set_address(set); uint64_t offset = - anv_address_physical(set_addr) - - cmd_buffer->device->physical->va.binding_table_pool.addr; + anv_address_physical(set->desc_surface_addr) - + cmd_buffer->device->physical->va.internal_surface_state_pool.addr; assert((offset & ~ANV_DESCRIPTOR_SET_OFFSET_MASK) == 0); - push->desc_offsets[set_index] &= ~ANV_DESCRIPTOR_SET_OFFSET_MASK; - push->desc_offsets[set_index] |= offset; + push->desc_surface_offsets[set_index] &= ~ANV_DESCRIPTOR_SET_OFFSET_MASK; + push->desc_surface_offsets[set_index] |= offset; + push->desc_sampler_offsets[set_index] |= + anv_address_physical(set->desc_sampler_addr) - + cmd_buffer->device->physical->va.dynamic_state_pool.addr; - if (set_addr.bo) { - anv_reloc_list_add_bo(cmd_buffer->batch.relocs, - set_addr.bo); - } + anv_reloc_list_add_bo(cmd_buffer->batch.relocs, + set->desc_surface_addr.bo); + anv_reloc_list_add_bo(cmd_buffer->batch.relocs, + set->desc_sampler_addr.bo); } dirty_stages |= stages; diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index 902989d7273..017bc306c94 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -36,27 +36,40 @@ * Descriptor set layouts. */ -static unsigned -anv_descriptor_data_alignment(enum anv_descriptor_data data) +static void +anv_descriptor_data_alignment(enum anv_descriptor_data data, + enum anv_descriptor_set_layout_type layout_type, + unsigned *out_surface_align, + unsigned *out_sampler_align) { - unsigned alignment = 1; + unsigned surface_align = 1, sampler_align = 1; if (data & (ANV_DESCRIPTOR_INDIRECT_SAMPLED_IMAGE | ANV_DESCRIPTOR_INDIRECT_STORAGE_IMAGE | ANV_DESCRIPTOR_INDIRECT_ADDRESS_RANGE)) - alignment = MAX2(alignment, 8); + surface_align = MAX2(surface_align, 8); - if (data & (ANV_DESCRIPTOR_SURFACE | - ANV_DESCRIPTOR_SURFACE_SAMPLER)) - alignment = MAX2(alignment, ANV_SURFACE_STATE_SIZE); + if (data & ANV_DESCRIPTOR_SURFACE) + surface_align = MAX2(surface_align, ANV_SURFACE_STATE_SIZE); - if (data & ANV_DESCRIPTOR_SAMPLER) - alignment = MAX2(alignment, ANV_SAMPLER_STATE_SIZE); + if (data & ANV_DESCRIPTOR_SURFACE_SAMPLER) { + surface_align = MAX2(surface_align, ANV_SURFACE_STATE_SIZE); + if (layout_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT) + sampler_align = MAX2(sampler_align, ANV_SAMPLER_STATE_SIZE); + } + + if (data & ANV_DESCRIPTOR_SAMPLER) { + if (layout_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT) + sampler_align = MAX2(sampler_align, ANV_SAMPLER_STATE_SIZE); + else + surface_align = MAX2(surface_align, ANV_SAMPLER_STATE_SIZE); + } if (data & ANV_DESCRIPTOR_INLINE_UNIFORM) - alignment = MAX2(alignment, ANV_UBO_ALIGNMENT); + surface_align = MAX2(surface_align, ANV_UBO_ALIGNMENT); - return alignment; + *out_surface_align = surface_align; + *out_sampler_align = sampler_align; } static enum anv_descriptor_data @@ -125,7 +138,8 @@ anv_indirect_descriptor_data_for_type(VkDescriptorType type) } static enum anv_descriptor_data -anv_direct_descriptor_data_for_type(VkDescriptorType type) +anv_direct_descriptor_data_for_type(enum anv_descriptor_set_layout_type layout_type, + VkDescriptorType type) { enum anv_descriptor_data data = 0; @@ -136,9 +150,16 @@ anv_direct_descriptor_data_for_type(VkDescriptorType type) break; case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - data = ANV_DESCRIPTOR_BTI_SURFACE_STATE | - ANV_DESCRIPTOR_BTI_SAMPLER_STATE | - ANV_DESCRIPTOR_SURFACE_SAMPLER; + if (layout_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT) { + data = ANV_DESCRIPTOR_BTI_SURFACE_STATE | + ANV_DESCRIPTOR_BTI_SAMPLER_STATE | + ANV_DESCRIPTOR_SURFACE | + ANV_DESCRIPTOR_SAMPLER; + } else { + data = ANV_DESCRIPTOR_BTI_SURFACE_STATE | + ANV_DESCRIPTOR_BTI_SAMPLER_STATE | + ANV_DESCRIPTOR_SURFACE_SAMPLER; + } break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: @@ -171,16 +192,18 @@ anv_direct_descriptor_data_for_type(VkDescriptorType type) static enum anv_descriptor_data anv_descriptor_data_for_type(const struct anv_physical_device *device, + enum anv_descriptor_set_layout_type layout_type, VkDescriptorType type) { if (device->indirect_descriptors) return anv_indirect_descriptor_data_for_type(type); else - return anv_direct_descriptor_data_for_type(type); + return anv_direct_descriptor_data_for_type(layout_type, type); } static enum anv_descriptor_data anv_descriptor_data_for_mutable_type(const struct anv_physical_device *device, + enum anv_descriptor_set_layout_type layout_type, const VkMutableDescriptorTypeCreateInfoEXT *mutable_info, int binding) { @@ -193,11 +216,11 @@ anv_descriptor_data_for_mutable_type(const struct anv_physical_device *device, i == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) continue; - desc_data |= anv_descriptor_data_for_type(device, i); + desc_data |= anv_descriptor_data_for_type(device, layout_type, i); } desc_data |= anv_descriptor_data_for_type( - device, VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR); + device, layout_type, VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR); return desc_data; } @@ -206,79 +229,102 @@ anv_descriptor_data_for_mutable_type(const struct anv_physical_device *device, &mutable_info->pMutableDescriptorTypeLists[binding]; for (uint32_t i = 0; i < type_list->descriptorTypeCount; i++) { desc_data |= - anv_descriptor_data_for_type(device, type_list->pDescriptorTypes[i]); + anv_descriptor_data_for_type(device, layout_type, + type_list->pDescriptorTypes[i]); } return desc_data; } -static unsigned -anv_descriptor_data_size(enum anv_descriptor_data data) +static void +anv_descriptor_data_size(enum anv_descriptor_data data, + enum anv_descriptor_set_layout_type layout_type, + uint16_t *out_surface_size, + uint16_t *out_sampler_size) { - unsigned size = 0; + unsigned surface_size = 0; + unsigned sampler_size = 0; if (data & ANV_DESCRIPTOR_INDIRECT_SAMPLED_IMAGE) - size += sizeof(struct anv_sampled_image_descriptor); + surface_size += sizeof(struct anv_sampled_image_descriptor); if (data & ANV_DESCRIPTOR_INDIRECT_STORAGE_IMAGE) - size += sizeof(struct anv_storage_image_descriptor); + surface_size += sizeof(struct anv_storage_image_descriptor); if (data & ANV_DESCRIPTOR_INDIRECT_ADDRESS_RANGE) - size += sizeof(struct anv_address_range_descriptor); + surface_size += sizeof(struct anv_address_range_descriptor); if (data & ANV_DESCRIPTOR_SURFACE) - size += ANV_SURFACE_STATE_SIZE; + surface_size += ANV_SURFACE_STATE_SIZE; - if (data & ANV_DESCRIPTOR_SAMPLER) - size += ANV_SAMPLER_STATE_SIZE; + /* Direct descriptors have sampler states stored separately */ + if (layout_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT) { + if (data & ANV_DESCRIPTOR_SAMPLER) + sampler_size += ANV_SAMPLER_STATE_SIZE; - if (data & ANV_DESCRIPTOR_SURFACE_SAMPLER) { - size += ALIGN(ANV_SURFACE_STATE_SIZE + ANV_SAMPLER_STATE_SIZE, - ANV_SURFACE_STATE_SIZE); + if (data & ANV_DESCRIPTOR_SURFACE_SAMPLER) { + surface_size += ANV_SURFACE_STATE_SIZE; + sampler_size += ANV_SAMPLER_STATE_SIZE; + } + } else { + if (data & ANV_DESCRIPTOR_SAMPLER) + surface_size += ANV_SAMPLER_STATE_SIZE; + + if (data & ANV_DESCRIPTOR_SURFACE_SAMPLER) { + surface_size += ALIGN(ANV_SURFACE_STATE_SIZE + ANV_SAMPLER_STATE_SIZE, + ANV_SURFACE_STATE_SIZE); + } } - return size; + *out_surface_size = surface_size; + *out_sampler_size = sampler_size; } static bool anv_needs_descriptor_buffer(VkDescriptorType desc_type, + enum anv_descriptor_set_layout_type layout_type, enum anv_descriptor_data desc_data) { - if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK || - anv_descriptor_data_size(desc_data) > 0) + if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) return true; - return false; + + uint16_t surface_size, sampler_size; + anv_descriptor_data_size(desc_data, layout_type, + &surface_size, &sampler_size); + return surface_size > 0 || sampler_size > 0; } /** Returns the size in bytes of each descriptor with the given layout */ -static unsigned -anv_descriptor_size(const struct anv_descriptor_set_binding_layout *layout) +static void +anv_descriptor_size(const struct anv_descriptor_set_binding_layout *layout, + enum anv_descriptor_set_layout_type layout_type, + uint16_t *out_surface_stride, + uint16_t *out_sampler_stride) { if (layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM) { assert(layout->data == ANV_DESCRIPTOR_INLINE_UNIFORM); - return layout->array_size; + assert(layout->array_size <= UINT16_MAX); + *out_surface_stride = layout->array_size; + *out_sampler_stride = 0; + return; } - unsigned size = anv_descriptor_data_size(layout->data); - - /* For multi-planar bindings, we make every descriptor consume the maximum - * number of planes so we don't have to bother with walking arrays and - * adding things up every time. Fortunately, YCbCr samplers aren't all - * that common and likely won't be in the middle of big arrays. - */ - if (layout->max_plane_count > 1) - size *= layout->max_plane_count; - - return size; + anv_descriptor_data_size(layout->data, layout_type, + out_surface_stride, + out_sampler_stride); } /** Returns size in bytes of the biggest descriptor in the given layout */ -static unsigned +static void anv_descriptor_size_for_mutable_type(const struct anv_physical_device *device, + enum anv_descriptor_set_layout_type layout_type, const VkMutableDescriptorTypeCreateInfoEXT *mutable_info, - int binding) + int binding, + uint16_t *out_surface_stride, + uint16_t *out_sampler_stride) { - unsigned size = 0; + *out_surface_stride = 0; + *out_sampler_stride = 0; if (!mutable_info || mutable_info->mutableDescriptorTypeListCount == 0 || @@ -291,26 +337,41 @@ anv_descriptor_size_for_mutable_type(const struct anv_physical_device *device, continue; enum anv_descriptor_data desc_data = - anv_descriptor_data_for_type(device, i); - size = MAX2(size, anv_descriptor_data_size(desc_data)); + anv_descriptor_data_for_type(device, layout_type, i); + uint16_t surface_stride, sampler_stride; + anv_descriptor_data_size(desc_data, layout_type, + &surface_stride, &sampler_stride); + + *out_surface_stride = MAX2(*out_surface_stride, surface_stride); + *out_sampler_stride = MAX2(*out_sampler_stride, sampler_stride); } enum anv_descriptor_data desc_data = anv_descriptor_data_for_type( - device, VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR); - size = MAX2(size, anv_descriptor_data_size(desc_data)); + device, layout_type, VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR); + uint16_t surface_stride, sampler_stride; + anv_descriptor_data_size(desc_data, layout_type, + &surface_stride, &sampler_stride); - return size; + *out_surface_stride = MAX2(*out_surface_stride, surface_stride); + *out_sampler_stride = MAX2(*out_sampler_stride, sampler_stride); + + return; } const VkMutableDescriptorTypeListEXT *type_list = &mutable_info->pMutableDescriptorTypeLists[binding]; for (uint32_t i = 0; i < type_list->descriptorTypeCount; i++) { enum anv_descriptor_data desc_data = - anv_descriptor_data_for_type(device, type_list->pDescriptorTypes[i]); - size = MAX2(size, anv_descriptor_data_size(desc_data)); - } + anv_descriptor_data_for_type(device, layout_type, + type_list->pDescriptorTypes[i]); - return size; + uint16_t surface_stride, sampler_stride; + anv_descriptor_data_size(desc_data, layout_type, + &surface_stride, &sampler_stride); + + *out_surface_stride = MAX2(*out_surface_stride, surface_stride); + *out_sampler_stride = MAX2(*out_sampler_stride, sampler_stride); + } } static bool @@ -353,6 +414,18 @@ anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice, return (binding->flags & flags_requiring_bindless) != 0; } +static enum anv_descriptor_set_layout_type +anv_descriptor_set_layout_type_for_flags(const struct anv_physical_device *device, + const VkDescriptorSetLayoutCreateInfo *pCreateInfo) +{ + if (pCreateInfo->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) + return ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER; + else if (device->indirect_descriptors) + return ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT; + else + return ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT; +} + void anv_GetDescriptorSetLayoutSupport( VkDevice _device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, @@ -372,6 +445,9 @@ void anv_GetDescriptorSetLayoutSupport( vk_find_struct_const(pCreateInfo->pNext, MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT); + enum anv_descriptor_set_layout_type layout_type = + anv_descriptor_set_layout_type_for_flags(pdevice, pCreateInfo); + for (uint32_t b = 0; b < pCreateInfo->bindingCount; b++) { const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[b]; @@ -383,10 +459,11 @@ void anv_GetDescriptorSetLayoutSupport( enum anv_descriptor_data desc_data = binding->descriptorType == VK_DESCRIPTOR_TYPE_MUTABLE_EXT ? - anv_descriptor_data_for_mutable_type(pdevice, mutable_info, b) : - anv_descriptor_data_for_type(pdevice, binding->descriptorType); + anv_descriptor_data_for_mutable_type(pdevice, layout_type, mutable_info, b) : + anv_descriptor_data_for_type(pdevice, layout_type, binding->descriptorType); - if (anv_needs_descriptor_buffer(binding->descriptorType, desc_data)) + if (anv_needs_descriptor_buffer(binding->descriptorType, + layout_type, desc_data)) needs_descriptor_buffer = true; if (flags & VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT) @@ -509,13 +586,8 @@ VkResult anv_CreateDescriptorSetLayout( set_layout->ref_cnt = 1; set_layout->binding_count = num_bindings; set_layout->flags = pCreateInfo->flags; - - if (pCreateInfo->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) - set_layout->type = ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER; - else if (device->physical->indirect_descriptors) - set_layout->type = ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT; - else - set_layout->type = ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT; + set_layout->type = anv_descriptor_set_layout_type_for_flags(device->physical, + pCreateInfo); for (uint32_t b = 0; b < num_bindings; b++) { /* Initialize all binding_layout entries to -1 */ @@ -533,7 +605,8 @@ VkResult anv_CreateDescriptorSetLayout( uint32_t buffer_view_count = 0; uint32_t dynamic_offset_count = 0; - uint32_t descriptor_buffer_size = 0; + uint32_t descriptor_buffer_surface_size = 0; + uint32_t descriptor_buffer_sampler_size = 0; for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j]; @@ -597,8 +670,11 @@ VkResult anv_CreateDescriptorSetLayout( set_layout->binding[b].data = binding->descriptorType == VK_DESCRIPTOR_TYPE_MUTABLE_EXT ? - anv_descriptor_data_for_mutable_type(device->physical, mutable_info, b) : - anv_descriptor_data_for_type(device->physical, binding->descriptorType); + anv_descriptor_data_for_mutable_type(device->physical, + set_layout->type, + mutable_info, b) : + anv_descriptor_data_for_type(device->physical, set_layout->type, + binding->descriptorType); set_layout->binding[b].array_size = binding->descriptorCount; set_layout->binding[b].descriptor_index = set_layout->descriptor_count; @@ -650,32 +726,64 @@ VkResult anv_CreateDescriptorSetLayout( break; } - set_layout->binding[b].descriptor_data_size = - anv_descriptor_data_size(set_layout->binding[b].data); - set_layout->binding[b].descriptor_stride = - binding->descriptorType == VK_DESCRIPTOR_TYPE_MUTABLE_EXT ? - anv_descriptor_size_for_mutable_type(device->physical, mutable_info, b) : - anv_descriptor_size(&set_layout->binding[b]); + if (binding->descriptorType == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) { + anv_descriptor_size_for_mutable_type( + device->physical, set_layout->type, mutable_info, b, + &set_layout->binding[b].descriptor_data_surface_size, + &set_layout->binding[b].descriptor_data_sampler_size); + } else { + anv_descriptor_size(&set_layout->binding[b], + set_layout->type, + &set_layout->binding[b].descriptor_data_surface_size, + &set_layout->binding[b].descriptor_data_sampler_size); + } - descriptor_buffer_size = - align(descriptor_buffer_size, - anv_descriptor_data_alignment(set_layout->binding[b].data)); + /* For multi-planar bindings, we make every descriptor consume the maximum + * number of planes so we don't have to bother with walking arrays and + * adding things up every time. Fortunately, YCbCr samplers aren't all + * that common and likely won't be in the middle of big arrays. + */ + set_layout->binding[b].descriptor_surface_stride = + MAX2(set_layout->binding[b].max_plane_count, 1) * + set_layout->binding[b].descriptor_data_surface_size; + set_layout->binding[b].descriptor_sampler_stride = + MAX2(set_layout->binding[b].max_plane_count, 1) * + set_layout->binding[b].descriptor_data_sampler_size; + + unsigned surface_align, sampler_align; + anv_descriptor_data_alignment(set_layout->binding[b].data, + set_layout->type, + &surface_align, + &sampler_align); + descriptor_buffer_surface_size = + align(descriptor_buffer_surface_size, surface_align); + descriptor_buffer_sampler_size = + align(descriptor_buffer_sampler_size, sampler_align); if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { - set_layout->binding[b].descriptor_offset = descriptor_buffer_size; - descriptor_buffer_size += binding->descriptorCount; + set_layout->binding[b].descriptor_surface_offset = descriptor_buffer_surface_size; + descriptor_buffer_surface_size += binding->descriptorCount; } else { - set_layout->binding[b].descriptor_offset = descriptor_buffer_size; - descriptor_buffer_size += - set_layout->binding[b].descriptor_stride * binding->descriptorCount; + set_layout->binding[b].descriptor_surface_offset = descriptor_buffer_surface_size; + descriptor_buffer_surface_size += + set_layout->binding[b].descriptor_surface_stride * binding->descriptorCount; } + set_layout->binding[b].descriptor_sampler_offset = descriptor_buffer_sampler_size; + descriptor_buffer_sampler_size += + set_layout->binding[b].descriptor_sampler_stride * binding->descriptorCount; + set_layout->shader_stages |= binding->stageFlags; } + /* Sanity checks */ + assert(descriptor_buffer_sampler_size == 0 || + set_layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT); + set_layout->buffer_view_count = buffer_view_count; set_layout->dynamic_offset_count = dynamic_offset_count; - set_layout->descriptor_buffer_size = descriptor_buffer_size; + set_layout->descriptor_buffer_surface_size = descriptor_buffer_surface_size; + set_layout->descriptor_buffer_sampler_size = descriptor_buffer_sampler_size; *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout); @@ -746,30 +854,44 @@ anv_descriptor_set_layout_empty(const struct anv_descriptor_set_layout *set_layo return set_layout->binding_count == 0; } -static uint32_t +static void anv_descriptor_set_layout_descriptor_buffer_size(const struct anv_descriptor_set_layout *set_layout, - uint32_t var_desc_count) + uint32_t var_desc_count, + uint32_t *out_surface_size, + uint32_t *out_sampler_size) { const struct anv_descriptor_set_binding_layout *dynamic_binding = set_layout_dynamic_binding(set_layout); - if (dynamic_binding == NULL) - return ALIGN(set_layout->descriptor_buffer_size, ANV_UBO_ALIGNMENT); + if (dynamic_binding == NULL) { + *out_surface_size = ALIGN(set_layout->descriptor_buffer_surface_size, + ANV_UBO_ALIGNMENT); + *out_sampler_size = set_layout->descriptor_buffer_sampler_size; + return; + } assert(var_desc_count <= dynamic_binding->array_size); uint32_t shrink = dynamic_binding->array_size - var_desc_count; - uint32_t set_size; + uint32_t set_surface_size, set_sampler_size; if (dynamic_binding->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { /* Inline uniform blocks are specified to use the descriptor array * size as the size in bytes of the block. */ - set_size = set_layout->descriptor_buffer_size - shrink; + set_surface_size = set_layout->descriptor_buffer_surface_size - shrink; + set_sampler_size = 0; } else { - set_size = set_layout->descriptor_buffer_size - - shrink * dynamic_binding->descriptor_stride; + set_surface_size = + set_layout->descriptor_buffer_surface_size > 0 ? + (set_layout->descriptor_buffer_surface_size - + shrink * dynamic_binding->descriptor_surface_stride) : 0; + set_sampler_size = + set_layout->descriptor_buffer_sampler_size > 0 ? + (set_layout->descriptor_buffer_sampler_size - + shrink * dynamic_binding->descriptor_sampler_stride) : 0; } - return ALIGN(set_size, ANV_UBO_ALIGNMENT); + *out_surface_size = ALIGN(set_surface_size, ANV_UBO_ALIGNMENT); + *out_sampler_size = set_sampler_size; } void anv_DestroyDescriptorSetLayout( @@ -791,11 +913,15 @@ anv_descriptor_set_layout_print(const struct anv_descriptor_set_layout *layout) { fprintf(stderr, "set layout:\n"); for (uint32_t b = 0; b < layout->binding_count; b++) { - fprintf(stderr, " binding%03u: offset=0x%08x stride=%03u size=%03u count=%03u\n", + fprintf(stderr, " binding%03u: offsets=0x%08x/0x%08x sizes=%04u/%04u strides=%03u/%03u planes=%hhu count=%03u\n", b, - layout->binding[b].descriptor_offset, - layout->binding[b].descriptor_data_size, - layout->binding[b].descriptor_stride, + layout->binding[b].descriptor_surface_offset, + layout->binding[b].descriptor_sampler_offset, + layout->binding[b].descriptor_data_surface_size, + layout->binding[b].descriptor_data_sampler_size, + layout->binding[b].descriptor_surface_stride, + layout->binding[b].descriptor_sampler_stride, + layout->binding[b].max_plane_count, layout->binding[b].array_size); } } @@ -824,7 +950,8 @@ sha1_update_descriptor_set_binding_layout(struct mesa_sha1 *ctx, SHA1_UPDATE_VALUE(ctx, layout->descriptor_index); SHA1_UPDATE_VALUE(ctx, layout->dynamic_offset_index); SHA1_UPDATE_VALUE(ctx, layout->buffer_view_index); - SHA1_UPDATE_VALUE(ctx, layout->descriptor_offset); + SHA1_UPDATE_VALUE(ctx, layout->descriptor_surface_offset); + SHA1_UPDATE_VALUE(ctx, layout->descriptor_sampler_offset); if (layout->immutable_samplers) { for (uint16_t i = 0; i < layout->array_size; i++) @@ -842,7 +969,8 @@ sha1_update_descriptor_set_layout(struct mesa_sha1 *ctx, SHA1_UPDATE_VALUE(ctx, layout->shader_stages); SHA1_UPDATE_VALUE(ctx, layout->buffer_view_count); SHA1_UPDATE_VALUE(ctx, layout->dynamic_offset_count); - SHA1_UPDATE_VALUE(ctx, layout->descriptor_buffer_size); + SHA1_UPDATE_VALUE(ctx, layout->descriptor_buffer_surface_size); + SHA1_UPDATE_VALUE(ctx, layout->descriptor_buffer_sampler_size); for (uint16_t i = 0; i < layout->binding_count; i++) sha1_update_descriptor_set_binding_layout(ctx, &layout->binding[i]); @@ -1022,6 +1150,108 @@ void anv_DestroyPipelineLayout( #define EMPTY 1 +static VkResult +anv_descriptor_pool_heap_init(struct anv_device *device, + struct anv_descriptor_pool_heap *heap, + uint32_t size, + bool host_only, + bool samplers) +{ + if (size == 0) + return VK_SUCCESS; + + if (host_only) { + heap->size = size; + heap->host_mem = vk_zalloc(&device->vk.alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (heap->host_mem == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } else { + const char *bo_name = + device->physical->indirect_descriptors ? "indirect descriptors" : + samplers ? "direct sampler" : "direct surfaces"; + + heap->size = align(size, 4096); + + VkResult result = anv_device_alloc_bo(device, + bo_name, heap->size, + ANV_BO_ALLOC_CAPTURE | + ANV_BO_ALLOC_MAPPED | + ANV_BO_ALLOC_HOST_CACHED_COHERENT | + (samplers ? + ANV_BO_ALLOC_SAMPLER_POOL : + ANV_BO_ALLOC_DESCRIPTOR_POOL), + 0 /* explicit_address */, + &heap->bo); + if (result != VK_SUCCESS) + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + + util_vma_heap_init(&heap->heap, POOL_HEAP_OFFSET, heap->size); + + return VK_SUCCESS; +} + +static void +anv_descriptor_pool_heap_fini(struct anv_device *device, + struct anv_descriptor_pool_heap *heap) +{ + if (heap->size == 0) + return; + + util_vma_heap_finish(&heap->heap); + + if (heap->bo) + anv_device_release_bo(device, heap->bo); + + if (heap->host_mem) + vk_free(&device->vk.alloc, heap->host_mem); +} + +static void +anv_descriptor_pool_heap_reset(struct anv_device *device, + struct anv_descriptor_pool_heap *heap) +{ + if (heap->size == 0) + return; + + util_vma_heap_finish(&heap->heap); + util_vma_heap_init(&heap->heap, POOL_HEAP_OFFSET, heap->size); +} + +static VkResult +anv_descriptor_pool_heap_alloc(struct anv_descriptor_pool *pool, + struct anv_descriptor_pool_heap *heap, + uint32_t size, uint32_t alignment, + struct anv_state *state) +{ + uint64_t pool_vma_offset = + util_vma_heap_alloc(&heap->heap, size, alignment); + if (pool_vma_offset == 0) + return vk_error(pool, VK_ERROR_FRAGMENTED_POOL); + + assert(pool_vma_offset >= POOL_HEAP_OFFSET && + pool_vma_offset - POOL_HEAP_OFFSET <= INT32_MAX); + + state->offset = pool_vma_offset - POOL_HEAP_OFFSET; + state->alloc_size = size; + if (heap->host_mem) + state->map = heap->host_mem + state->offset; + else + state->map = heap->bo->map + state->offset; + + return VK_SUCCESS; +} + +static void +anv_descriptor_pool_heap_free(struct anv_descriptor_pool_heap *heap, + struct anv_state state) +{ + util_vma_heap_free(&heap->heap, + (uint64_t)state.offset + POOL_HEAP_OFFSET, + state.alloc_size); +} + VkResult anv_CreateDescriptorPool( VkDevice _device, const VkDescriptorPoolCreateInfo* pCreateInfo, @@ -1040,30 +1270,47 @@ VkResult anv_CreateDescriptorPool( uint32_t descriptor_count = 0; uint32_t buffer_view_count = 0; - uint32_t descriptor_bo_size = 0; + uint32_t descriptor_bo_surface_size = 0; + uint32_t descriptor_bo_sampler_size = 0; + + const enum anv_descriptor_set_layout_type layout_type = + device->physical->indirect_descriptors ? + ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT : + ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT; for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++) { enum anv_descriptor_data desc_data = pCreateInfo->pPoolSizes[i].type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT ? - anv_descriptor_data_for_mutable_type(device->physical, mutable_info, i) : - anv_descriptor_data_for_type(device->physical, pCreateInfo->pPoolSizes[i].type); + anv_descriptor_data_for_mutable_type(device->physical, layout_type, + mutable_info, i) : + anv_descriptor_data_for_type(device->physical, layout_type, + pCreateInfo->pPoolSizes[i].type); if (desc_data & ANV_DESCRIPTOR_BUFFER_VIEW) buffer_view_count += pCreateInfo->pPoolSizes[i].descriptorCount; - unsigned desc_data_size = - pCreateInfo->pPoolSizes[i].type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT ? - anv_descriptor_size_for_mutable_type(device->physical, mutable_info, i) : - anv_descriptor_data_size(desc_data); + uint16_t desc_surface_size, desc_sampler_size; + if (pCreateInfo->pPoolSizes[i].type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) { + anv_descriptor_size_for_mutable_type(device->physical, layout_type, mutable_info, i, + &desc_surface_size, &desc_sampler_size); + } else { + anv_descriptor_data_size(desc_data, layout_type, + &desc_surface_size, &desc_sampler_size); + } - desc_data_size *= pCreateInfo->pPoolSizes[i].descriptorCount; + uint32_t desc_data_surface_size = + desc_surface_size * pCreateInfo->pPoolSizes[i].descriptorCount; + uint32_t desc_data_sampler_size = + desc_sampler_size * pCreateInfo->pPoolSizes[i].descriptorCount; /* Combined image sampler descriptors can take up to 3 slots if they * hold a YCbCr image. */ if (pCreateInfo->pPoolSizes[i].type == - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) - desc_data_size *= 3; + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { + desc_data_surface_size *= 3; + desc_data_sampler_size *= 3; + } if (pCreateInfo->pPoolSizes[i].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { @@ -1071,10 +1318,11 @@ VkResult anv_CreateDescriptorPool( * size as the size in bytes of the block. */ assert(inline_info); - desc_data_size += pCreateInfo->pPoolSizes[i].descriptorCount; + desc_data_surface_size += pCreateInfo->pPoolSizes[i].descriptorCount; } - descriptor_bo_size += desc_data_size; + descriptor_bo_surface_size += desc_data_surface_size; + descriptor_bo_sampler_size += desc_data_sampler_size; descriptor_count += pCreateInfo->pPoolSizes[i].descriptorCount; } @@ -1086,13 +1334,12 @@ VkResult anv_CreateDescriptorPool( * extra space that we can chop it into maxSets pieces and align each one * of them to 32B. */ - descriptor_bo_size += ANV_UBO_ALIGNMENT * pCreateInfo->maxSets; + descriptor_bo_surface_size += ANV_UBO_ALIGNMENT * pCreateInfo->maxSets; /* We align inline uniform blocks to ANV_UBO_ALIGNMENT */ if (inline_info) { - descriptor_bo_size += + descriptor_bo_surface_size += ANV_UBO_ALIGNMENT * inline_info->maxInlineUniformBlockBindings; } - descriptor_bo_size = ALIGN(descriptor_bo_size, 4096); const bool host_only = pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT; @@ -1113,38 +1360,30 @@ VkResult anv_CreateDescriptorPool( if (!pool) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - pool->bo_mem_size = descriptor_bo_size; pool->host_mem_size = host_mem_size; util_vma_heap_init(&pool->host_heap, POOL_HEAP_OFFSET, host_mem_size); pool->host_only = host_only; - if (pool->bo_mem_size > 0) { - if (pool->host_only) { - pool->host_bo = vk_zalloc(&device->vk.alloc, pool->bo_mem_size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pool->host_bo == NULL) { - vk_object_free(&device->vk, pAllocator, pool); - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - } - } else { - VkResult result = anv_device_alloc_bo(device, - device->physical->indirect_descriptors ? - "indirect descriptors" : - "direct descriptors", - descriptor_bo_size, - ANV_BO_ALLOC_CAPTURE | - ANV_BO_ALLOC_MAPPED | - ANV_BO_ALLOC_HOST_CACHED_COHERENT | - ANV_BO_ALLOC_DESCRIPTOR_POOL, - 0 /* explicit_address */, - &pool->bo); - if (result != VK_SUCCESS) { - vk_object_free(&device->vk, pAllocator, pool); - return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); - } - } - util_vma_heap_init(&pool->bo_heap, POOL_HEAP_OFFSET, pool->bo_mem_size); + VkResult result = anv_descriptor_pool_heap_init(device, + &pool->surfaces, + descriptor_bo_surface_size, + pool->host_only, + false /* samplers */); + if (result != VK_SUCCESS) { + vk_object_free(&device->vk, pAllocator, pool); + return result; + } + + result = anv_descriptor_pool_heap_init(device, + &pool->samplers, + descriptor_bo_sampler_size, + pool->host_only, + true /* samplers */); + if (result != VK_SUCCESS) { + anv_descriptor_pool_heap_fini(device, &pool->surfaces); + vk_object_free(&device->vk, pAllocator, pool); + return result; } /* All the surface states allocated by the descriptor pool are internal. We @@ -1180,15 +1419,11 @@ void anv_DestroyDescriptorPool( util_vma_heap_finish(&pool->host_heap); - if (pool->bo_mem_size) { - if (pool->host_bo) - vk_free(&device->vk.alloc, pool->host_bo); - if (pool->bo) - anv_device_release_bo(device, pool->bo); - util_vma_heap_finish(&pool->bo_heap); - } anv_state_stream_finish(&pool->surface_state_stream); + anv_descriptor_pool_heap_fini(device, &pool->surfaces); + anv_descriptor_pool_heap_fini(device, &pool->samplers); + vk_object_free(&device->vk, pAllocator, pool); } @@ -1209,10 +1444,8 @@ VkResult anv_ResetDescriptorPool( util_vma_heap_finish(&pool->host_heap); util_vma_heap_init(&pool->host_heap, POOL_HEAP_OFFSET, pool->host_mem_size); - if (pool->bo_mem_size) { - util_vma_heap_finish(&pool->bo_heap); - util_vma_heap_init(&pool->bo_heap, POOL_HEAP_OFFSET, pool->bo_mem_size); - } + anv_descriptor_pool_heap_reset(device, &pool->surfaces); + anv_descriptor_pool_heap_reset(device, &pool->samplers); anv_state_stream_finish(&pool->surface_state_stream); anv_state_stream_init(&pool->surface_state_stream, @@ -1324,35 +1557,29 @@ anv_descriptor_set_create(struct anv_device *device, if (result != VK_SUCCESS) return result; - uint32_t descriptor_buffer_size = - anv_descriptor_set_layout_descriptor_buffer_size(layout, var_desc_count); + uint32_t descriptor_buffer_surface_size, descriptor_buffer_sampler_size; + anv_descriptor_set_layout_descriptor_buffer_size(layout, var_desc_count, + &descriptor_buffer_surface_size, + &descriptor_buffer_sampler_size); set->desc_surface_state = ANV_STATE_NULL; set->is_push = false; - if (descriptor_buffer_size) { - uint64_t pool_vma_offset = - util_vma_heap_alloc(&pool->bo_heap, descriptor_buffer_size, - ANV_UBO_ALIGNMENT); - if (pool_vma_offset == 0) { + if (descriptor_buffer_surface_size) { + result = anv_descriptor_pool_heap_alloc(pool, &pool->surfaces, + descriptor_buffer_surface_size, + ANV_UBO_ALIGNMENT, + &set->desc_surface_mem); + if (result != VK_SUCCESS) { anv_descriptor_pool_free_set(pool, set); - return vk_error(pool, VK_ERROR_FRAGMENTED_POOL); + return result; } - assert(pool_vma_offset >= POOL_HEAP_OFFSET && - pool_vma_offset - POOL_HEAP_OFFSET <= INT32_MAX); - set->desc_mem.offset = pool_vma_offset - POOL_HEAP_OFFSET; - set->desc_mem.alloc_size = descriptor_buffer_size; - if (pool->host_only) - set->desc_mem.map = pool->host_bo + set->desc_mem.offset; - else - set->desc_mem.map = pool->bo->map + set->desc_mem.offset; - - set->desc_addr = (struct anv_address) { - .bo = pool->bo, - .offset = set->desc_mem.offset, + set->desc_surface_addr = (struct anv_address) { + .bo = pool->surfaces.bo, + .offset = set->desc_surface_mem.offset, }; - set->desc_offset = anv_address_physical(set->desc_addr) - + set->desc_offset = anv_address_physical(set->desc_surface_addr) - device->physical->va.internal_surface_state_pool.addr; enum isl_format format = @@ -1369,12 +1596,31 @@ anv_descriptor_set_create(struct anv_device *device, anv_fill_buffer_surface_state(device, set->desc_surface_state.map, format, ISL_SWIZZLE_IDENTITY, ISL_SURF_USAGE_CONSTANT_BUFFER_BIT, - set->desc_addr, - descriptor_buffer_size, 1); + set->desc_surface_addr, + descriptor_buffer_surface_size, 1); } } else { - set->desc_mem = ANV_STATE_NULL; - set->desc_addr = (struct anv_address) { .bo = NULL, .offset = 0 }; + set->desc_surface_mem = ANV_STATE_NULL; + set->desc_surface_addr = ANV_NULL_ADDRESS; + } + + if (descriptor_buffer_sampler_size) { + result = anv_descriptor_pool_heap_alloc(pool, &pool->samplers, + descriptor_buffer_sampler_size, + ANV_SAMPLER_STATE_SIZE, + &set->desc_sampler_mem); + if (result != VK_SUCCESS) { + anv_descriptor_pool_free_set(pool, set); + return result; + } + + set->desc_sampler_addr = (struct anv_address) { + .bo = pool->samplers.bo, + .offset = set->desc_sampler_mem.offset, + }; + } else { + set->desc_sampler_mem = ANV_STATE_NULL; + set->desc_sampler_addr = ANV_NULL_ADDRESS; } vk_object_base_init(&device->vk, &set->base, @@ -1456,14 +1702,15 @@ anv_descriptor_set_destroy(struct anv_device *device, { anv_descriptor_set_layout_unref(device, set->layout); - if (set->desc_mem.alloc_size) { - util_vma_heap_free(&pool->bo_heap, - (uint64_t)set->desc_mem.offset + POOL_HEAP_OFFSET, - set->desc_mem.alloc_size); + if (set->desc_surface_mem.alloc_size) { + anv_descriptor_pool_heap_free(&pool->surfaces, set->desc_surface_mem); if (set->desc_surface_state.alloc_size) anv_descriptor_pool_free_state(pool, set->desc_surface_state); } + if (set->desc_sampler_mem.alloc_size) + anv_descriptor_pool_heap_free(&pool->samplers, set->desc_sampler_mem); + if (device->physical->indirect_descriptors) { if (!pool->host_only) { for (uint32_t b = 0; b < set->buffer_view_count; b++) { @@ -1582,9 +1829,9 @@ anv_push_descriptor_set_init(struct anv_cmd_buffer *cmd_buffer, set->buffer_view_count = layout->buffer_view_count; set->descriptor_count = layout->descriptor_count; - if (layout->descriptor_buffer_size && + if (layout->descriptor_buffer_surface_size && (push_set->set_used_on_gpu || - set->desc_mem.alloc_size < layout->descriptor_buffer_size)) { + set->desc_surface_mem.alloc_size < layout->descriptor_buffer_surface_size)) { struct anv_physical_device *pdevice = cmd_buffer->device->physical; struct anv_state_stream *push_stream = pdevice->indirect_descriptors ? @@ -1594,30 +1841,67 @@ anv_push_descriptor_set_init(struct anv_cmd_buffer *cmd_buffer, pdevice->va.indirect_push_descriptor_pool.addr : pdevice->va.internal_surface_state_pool.addr; + uint32_t surface_size, sampler_size; + anv_descriptor_set_layout_descriptor_buffer_size(layout, 0, + &surface_size, + &sampler_size); + /* The previous buffer is either actively used by some GPU command (so * we can't modify it) or is too small. Allocate a new one. */ - struct anv_state desc_mem = - anv_state_stream_alloc(push_stream, - anv_descriptor_set_layout_descriptor_buffer_size(layout, 0), - ANV_UBO_ALIGNMENT); - if (desc_mem.map == NULL) + struct anv_state desc_surface_mem = + anv_state_stream_alloc(push_stream, surface_size, ANV_UBO_ALIGNMENT); + if (desc_surface_mem.map == NULL) return false; - if (set->desc_mem.alloc_size) { + if (set->desc_surface_mem.alloc_size) { /* TODO: Do we really need to copy all the time? */ - memcpy(desc_mem.map, set->desc_mem.map, - MIN2(desc_mem.alloc_size, set->desc_mem.alloc_size)); + memcpy(desc_surface_mem.map, set->desc_surface_mem.map, + MIN2(desc_surface_mem.alloc_size, + set->desc_surface_mem.alloc_size)); } - set->desc_mem = desc_mem; + set->desc_surface_mem = desc_surface_mem; - set->desc_addr = anv_state_pool_state_address( + set->desc_surface_addr = anv_state_pool_state_address( push_stream->state_pool, - set->desc_mem); - set->desc_offset = anv_address_physical(set->desc_addr) - + set->desc_surface_mem); + set->desc_offset = anv_address_physical(set->desc_surface_addr) - push_base_address; } + if (layout->descriptor_buffer_sampler_size && + (push_set->set_used_on_gpu || + set->desc_sampler_mem.alloc_size < layout->descriptor_buffer_sampler_size)) { + struct anv_physical_device *pdevice = cmd_buffer->device->physical; + assert(!pdevice->indirect_descriptors); + struct anv_state_stream *push_stream = &cmd_buffer->dynamic_state_stream; + + uint32_t surface_size, sampler_size; + anv_descriptor_set_layout_descriptor_buffer_size(layout, 0, + &surface_size, + &sampler_size); + + /* The previous buffer is either actively used by some GPU command (so + * we can't modify it) or is too small. Allocate a new one. + */ + struct anv_state desc_sampler_mem = + anv_state_stream_alloc(push_stream, sampler_size, ANV_SAMPLER_STATE_SIZE); + if (desc_sampler_mem.map == NULL) + return false; + + if (set->desc_sampler_mem.alloc_size) { + /* TODO: Do we really need to copy all the time? */ + memcpy(desc_sampler_mem.map, set->desc_sampler_mem.map, + MIN2(desc_sampler_mem.alloc_size, + set->desc_sampler_mem.alloc_size)); + } + set->desc_sampler_mem = desc_sampler_mem; + + set->desc_sampler_addr = anv_state_pool_state_address( + push_stream->state_pool, + set->desc_sampler_mem); + } + return true; } @@ -1723,12 +2007,16 @@ anv_descriptor_set_write_image_view(struct anv_device *device, .sampler = sampler, }; - void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset + - element * bind_layout->descriptor_stride; + void *desc_surface_map = set->desc_surface_mem.map + + bind_layout->descriptor_surface_offset + + element * bind_layout->descriptor_surface_stride; + void *desc_sampler_map = set->desc_sampler_mem.map + + bind_layout->descriptor_sampler_offset + + element * bind_layout->descriptor_sampler_stride; enum anv_descriptor_data data = bind_layout->type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT ? - anv_descriptor_data_for_type(device->physical, type) : + anv_descriptor_data_for_type(device->physical, set->layout->type, type) : bind_layout->data; if (data & ANV_DESCRIPTOR_INDIRECT_SAMPLED_IMAGE) { @@ -1755,7 +2043,7 @@ anv_descriptor_set_write_image_view(struct anv_device *device, * can be no more than the size of our array of handles. */ assert(bind_layout->max_plane_count <= ARRAY_SIZE(desc_data)); - memcpy(desc_map, desc_data, + memcpy(desc_surface_map, desc_data, MAX2(1, bind_layout->max_plane_count) * sizeof(desc_data[0])); } @@ -1768,20 +2056,20 @@ anv_descriptor_set_write_image_view(struct anv_device *device, image_view->planes[0].storage.state), .image_depth = image_view->vk.storage.z_slice_count, }; - memcpy(desc_map, &desc_data, sizeof(desc_data)); + memcpy(desc_surface_map, &desc_data, sizeof(desc_data)); } else { - memset(desc_map, 0, bind_layout->descriptor_stride); + memset(desc_surface_map, 0, bind_layout->descriptor_surface_stride); } } if (data & ANV_DESCRIPTOR_SAMPLER) { if (sampler) { for (unsigned p = 0; p < sampler->n_planes; p++) { - memcpy(desc_map + p * ANV_SAMPLER_STATE_SIZE, + memcpy(desc_sampler_map + p * ANV_SAMPLER_STATE_SIZE, sampler->state[p], ANV_SAMPLER_STATE_SIZE); } } else { - memset(desc_map, 0, bind_layout->descriptor_stride); + memset(desc_sampler_map, 0, bind_layout->descriptor_sampler_stride); } } @@ -1789,7 +2077,7 @@ anv_descriptor_set_write_image_view(struct anv_device *device, unsigned max_plane_count = image_view ? image_view->n_planes : 1; for (unsigned p = 0; p < max_plane_count; p++) { - void *plane_map = desc_map + p * ANV_SURFACE_STATE_SIZE; + void *plane_map = desc_surface_map + p * ANV_SURFACE_STATE_SIZE; if (image_view) { memcpy(plane_map, @@ -1808,7 +2096,7 @@ anv_descriptor_set_write_image_view(struct anv_device *device, sampler ? sampler->n_planes : 1); for (unsigned p = 0; p < max_plane_count; p++) { - void *plane_map = desc_map + p * 2 * ANV_SURFACE_STATE_SIZE; + void *plane_map = desc_surface_map + p * 2 * ANV_SURFACE_STATE_SIZE; if (image_view) { memcpy(plane_map, @@ -1866,17 +2154,18 @@ anv_descriptor_set_write_buffer_view(struct anv_device *device, enum anv_descriptor_data data = bind_layout->type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT ? - anv_descriptor_data_for_type(device->physical, type) : + anv_descriptor_data_for_type(device->physical, set->layout->type, type) : bind_layout->data; - void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset + - element * bind_layout->descriptor_stride; + void *desc_map = set->desc_surface_mem.map + + bind_layout->descriptor_surface_offset + + element * bind_layout->descriptor_surface_stride; if (buffer_view == NULL) { if (data & ANV_DESCRIPTOR_SURFACE) memcpy(desc_map, device->null_surface_state.map, ANV_SURFACE_STATE_SIZE); else - memset(desc_map, 0, bind_layout->descriptor_stride); + memset(desc_map, 0, bind_layout->descriptor_surface_stride); return; } @@ -1954,17 +2243,18 @@ anv_descriptor_set_write_buffer(struct anv_device *device, enum anv_descriptor_data data = bind_layout->type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT ? - anv_descriptor_data_for_type(device->physical, type) : + anv_descriptor_data_for_type(device->physical, set->layout->type, type) : bind_layout->data; - void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset + - element * bind_layout->descriptor_stride; + void *desc_map = set->desc_surface_mem.map + + bind_layout->descriptor_surface_offset + + element * bind_layout->descriptor_surface_stride; if (buffer == NULL) { if (data & ANV_DESCRIPTOR_SURFACE) memcpy(desc_map, device->null_surface_state.map, ANV_SURFACE_STATE_SIZE); else - memset(desc_map, 0, bind_layout->descriptor_stride); + memset(desc_map, 0, bind_layout->descriptor_surface_stride); return; } @@ -2038,7 +2328,8 @@ anv_descriptor_set_write_inline_uniform_data(struct anv_device *device, assert(bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM); - void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset; + void *desc_map = set->desc_surface_mem.map + + bind_layout->descriptor_surface_offset; memcpy(desc_map + offset, data, size); } @@ -2066,10 +2357,11 @@ anv_descriptor_set_write_acceleration_structure(struct anv_device *device, desc_data.address = vk_acceleration_structure_get_va(accel); desc_data.range = accel->size; } - assert(sizeof(desc_data) <= bind_layout->descriptor_stride); + assert(sizeof(desc_data) <= bind_layout->descriptor_surface_stride); - void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset + - element * bind_layout->descriptor_stride; + void *desc_map = set->desc_surface_mem.map + + bind_layout->descriptor_surface_offset + + element * bind_layout->descriptor_surface_stride; memcpy(desc_map, &desc_data, sizeof(desc_data)); } @@ -2190,14 +2482,19 @@ void anv_UpdateDescriptorSets( if (src_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { anv_descriptor_set_write_inline_uniform_data(device, dst, copy->dstBinding, - src->desc_mem.map + src_layout->descriptor_offset + copy->srcArrayElement, + src->desc_surface_mem.map + + src_layout->descriptor_surface_offset + copy->srcArrayElement, copy->dstArrayElement, copy->descriptorCount); continue; } - uint32_t copy_element_size = MIN2(src_layout->descriptor_stride, - dst_layout->descriptor_stride); + uint32_t copy_surface_element_size = + MIN2(src_layout->descriptor_surface_stride, + dst_layout->descriptor_surface_stride); + uint32_t copy_sampler_element_size = + MIN2(src_layout->descriptor_sampler_stride, + dst_layout->descriptor_sampler_stride); for (uint32_t j = 0; j < copy->descriptorCount; j++) { struct anv_descriptor *src_desc = &src->descriptors[src_layout->descriptor_index + @@ -2214,13 +2511,20 @@ void anv_UpdateDescriptorSets( * - RENDER_SURFACE_STATE * - SAMPLER_STATE */ - memcpy(dst->desc_mem.map + - dst_layout->descriptor_offset + - (copy->dstArrayElement + j) * dst_layout->descriptor_stride, - src->desc_mem.map + - src_layout->descriptor_offset + - (copy->srcArrayElement + j) * src_layout->descriptor_stride, - copy_element_size); + memcpy(dst->desc_surface_mem.map + + dst_layout->descriptor_surface_offset + + (copy->dstArrayElement + j) * dst_layout->descriptor_surface_stride, + src->desc_surface_mem.map + + src_layout->descriptor_surface_offset + + (copy->srcArrayElement + j) * src_layout->descriptor_surface_stride, + copy_surface_element_size); + memcpy(dst->desc_sampler_mem.map + + dst_layout->descriptor_sampler_offset + + (copy->dstArrayElement + j) * dst_layout->descriptor_sampler_stride, + src->desc_sampler_mem.map + + src_layout->descriptor_sampler_offset + + (copy->srcArrayElement + j) * src_layout->descriptor_sampler_stride, + copy_sampler_element_size); /* Copy the CPU side data anv_descriptor */ *dst_desc = *src_desc; @@ -2230,7 +2534,9 @@ void anv_UpdateDescriptorSets( */ const enum anv_descriptor_data data = src_layout->type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT ? - anv_descriptor_data_for_type(device->physical, src_desc->type) : + anv_descriptor_data_for_type(device->physical, + src->layout->type, + src_desc->type) : src_layout->data; if (data & ANV_DESCRIPTOR_BUFFER_VIEW) { struct anv_buffer_view *src_bview = diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 358c283f1a6..c878d76c45e 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1479,7 +1479,7 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, device->compiler->supports_shader_constants = true; device->compiler->indirect_ubos_use_sampler = device->info.ver < 12; device->compiler->extended_bindless_surface_offset = device->uses_ex_bso; - device->compiler->use_bindless_sampler_offset = !device->indirect_descriptors; + device->compiler->use_bindless_sampler_offset = false; device->compiler->spilling_rate = driQueryOptioni(&instance->dri_options, "shader_spilling_rate"); @@ -3324,6 +3324,9 @@ VkResult anv_CreateDevice( device->physical->va.bindless_surface_state_pool.size); } + util_vma_heap_init(&device->vma_samplers, + device->physical->va.sampler_state_pool.addr, + device->physical->va.sampler_state_pool.size); util_vma_heap_init(&device->vma_trtt, device->physical->va.trtt.addr, device->physical->va.trtt.size); @@ -3789,6 +3792,8 @@ VkResult anv_CreateDevice( pthread_mutex_destroy(&device->mutex); fail_vmas: util_vma_heap_finish(&device->vma_trtt); + if (!device->physical->indirect_descriptors) + util_vma_heap_finish(&device->vma_samplers); util_vma_heap_finish(&device->vma_desc); util_vma_heap_finish(&device->vma_hi); util_vma_heap_finish(&device->vma_lo); @@ -3903,6 +3908,8 @@ void anv_DestroyDevice( anv_bo_cache_finish(&device->bo_cache); util_vma_heap_finish(&device->vma_trtt); + if (!device->physical->indirect_descriptors) + util_vma_heap_finish(&device->vma_samplers); util_vma_heap_finish(&device->vma_desc); util_vma_heap_finish(&device->vma_hi); util_vma_heap_finish(&device->vma_lo); @@ -3970,6 +3977,9 @@ anv_vma_heap_for_flags(struct anv_device *device, if (alloc_flags & ANV_BO_ALLOC_DESCRIPTOR_POOL) return &device->vma_desc; + if (alloc_flags & ANV_BO_ALLOC_SAMPLER_POOL) + return &device->vma_samplers; + return &device->vma_hi; } @@ -4022,6 +4032,7 @@ anv_vma_free(struct anv_device *device, assert(vma_heap == &device->vma_lo || vma_heap == &device->vma_hi || vma_heap == &device->vma_desc || + vma_heap == &device->vma_samplers || vma_heap == &device->vma_trtt); const uint64_t addr_48b = intel_48b_address(address); diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 6aad6fe2fe5..e55e4e51391 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -136,7 +136,7 @@ add_binding(struct apply_pipeline_layout_state *state, * this binding. This lets us be lazy and call this function constantly * without worrying about unnecessarily enabling the buffer. */ - if (bind_layout->descriptor_stride) + if (bind_layout->descriptor_surface_stride) state->set[set].desc_buffer_used = true; if (bind_layout->dynamic_offset_index >= 0) @@ -556,7 +556,8 @@ build_res_index(nir_builder *b, case nir_address_format_64bit_global_32bit_offset: /* Descriptor set buffer accesses will go through A64 messages, so the * index to get the descriptor set buffer address is located in the - * anv_push_constants::desc_offsets and it's indexed by the set number. + * anv_push_constants::desc_surface_offsets and it's indexed by the set + * number. */ set_idx = set; break; @@ -593,8 +594,8 @@ build_res_index(nir_builder *b, } const uint32_t desc_bti = state->set[set].binding[binding].surface_offset; - assert(bind_layout->descriptor_stride % 8 == 0); - const uint32_t desc_stride = bind_layout->descriptor_stride / 8; + assert(bind_layout->descriptor_surface_stride % 8 == 0); + const uint32_t desc_stride = bind_layout->descriptor_surface_stride / 8; nir_def *packed = nir_ior_imm(b, @@ -605,7 +606,7 @@ build_res_index(nir_builder *b, return nir_vec4(b, packed, - nir_imm_int(b, bind_layout->descriptor_offset), + nir_imm_int(b, bind_layout->descriptor_surface_offset), nir_imm_int(b, array_size - 1), array_index); } @@ -748,8 +749,8 @@ build_desc_addr_for_binding(nir_builder *b, nir_iadd_imm(b, nir_imul_imm(b, array_index, - bind_layout->descriptor_stride), - bind_layout->descriptor_offset); + bind_layout->descriptor_surface_stride), + bind_layout->descriptor_surface_offset); return nir_vec4(b, nir_unpack_64_2x32_split_x(b, set_addr), nir_unpack_64_2x32_split_y(b, set_addr), @@ -763,14 +764,38 @@ build_desc_addr_for_binding(nir_builder *b, nir_iadd_imm(b, nir_imul_imm(b, array_index, - bind_layout->descriptor_stride), - bind_layout->descriptor_offset)); + bind_layout->descriptor_surface_stride), + bind_layout->descriptor_surface_offset)); default: unreachable("Unhandled address format"); } } +static unsigned +binding_descriptor_offset(const struct apply_pipeline_layout_state *state, + const struct anv_descriptor_set_binding_layout *bind_layout, + bool sampler) +{ + if (sampler && + state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT) + return bind_layout->descriptor_sampler_offset; + + return bind_layout->descriptor_surface_offset; +} + +static unsigned +binding_descriptor_stride(const struct apply_pipeline_layout_state *state, + const struct anv_descriptor_set_binding_layout *bind_layout, + bool sampler) +{ + if (sampler && + state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT) + return bind_layout->descriptor_sampler_stride; + + return bind_layout->descriptor_surface_stride; +} + static nir_def * build_surface_index_for_binding(nir_builder *b, unsigned set, unsigned binding, @@ -781,6 +806,10 @@ build_surface_index_for_binding(nir_builder *b, { const struct anv_descriptor_set_binding_layout *bind_layout = &state->layout->set[set].layout->binding[binding]; + const unsigned descriptor_offset = + binding_descriptor_offset(state, bind_layout, false /* sampler */); + const unsigned descriptor_stride = + binding_descriptor_stride(state, bind_layout, false /* sampler */); const bool is_bindless = is_binding_bindless(set, binding, false /* sampler */, state); @@ -797,23 +826,25 @@ build_surface_index_for_binding(nir_builder *b, } else { set_offset = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), - .base = offsetof(struct anv_push_constants, desc_offsets[set]), - .range = sizeof_field(struct anv_push_constants, desc_offsets[set])); + .base = offsetof(struct anv_push_constants, + desc_surface_offsets[set]), + .range = sizeof_field(struct anv_push_constants, + desc_surface_offsets[set])); /* With bindless indexes are offsets in the descriptor buffer */ surface_index = nir_iadd_imm(b, - nir_imul_imm(b, array_index, bind_layout->descriptor_stride), - bind_layout->descriptor_offset); + nir_imul_imm(b, array_index, descriptor_stride), + descriptor_offset); if (plane != 0) { assert(plane < bind_layout->max_plane_count); surface_index = nir_iadd_imm(b, surface_index, - plane * (bind_layout->descriptor_stride / + plane * (descriptor_stride / bind_layout->max_plane_count)); } - assert(bind_layout->descriptor_offset % 64 == 0); - assert(bind_layout->descriptor_stride % 64 == 0); + assert(descriptor_offset % 64 == 0); + assert(descriptor_stride % 64 == 0); } } else { /* Unused */ @@ -854,14 +885,17 @@ build_sampler_handle_for_binding(nir_builder *b, bool non_uniform, const struct apply_pipeline_layout_state *state) { + const struct anv_descriptor_set_binding_layout *bind_layout = + &state->layout->set[set].layout->binding[binding]; + const unsigned descriptor_offset = + binding_descriptor_offset(state, bind_layout, true /* sampler */); + const unsigned descriptor_stride = + binding_descriptor_stride(state, bind_layout, true /* sampler */); const bool is_bindless = is_binding_bindless(set, binding, true /* sampler */, state); nir_def *set_offset, *sampler_index; if (is_bindless) { - const struct anv_descriptor_set_binding_layout *bind_layout = - &state->layout->set[set].layout->binding[binding]; - if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) { set_offset = nir_imm_int(b, 0xdeaddead); @@ -878,10 +912,12 @@ build_sampler_handle_for_binding(nir_builder *b, } else { set_offset = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), - .base = offsetof(struct anv_push_constants, desc_offsets[set]), - .range = sizeof_field(struct anv_push_constants, desc_offsets[set])); + .base = offsetof(struct anv_push_constants, + desc_sampler_offsets[set]), + .range = sizeof_field(struct anv_push_constants, + desc_sampler_offsets[set])); - uint32_t base_offset = bind_layout->descriptor_offset; + uint32_t base_offset = descriptor_offset; /* The SAMPLER_STATE can only be located at a 64 byte in the combined * image/sampler case. Combined image/sampler is not supported to be @@ -892,13 +928,13 @@ build_sampler_handle_for_binding(nir_builder *b, if (plane != 0) { assert(plane < bind_layout->max_plane_count); - base_offset += plane * (bind_layout->descriptor_stride / + base_offset += plane * (descriptor_stride / bind_layout->max_plane_count); } sampler_index = nir_iadd_imm(b, - nir_imul_imm(b, array_index, bind_layout->descriptor_stride), + nir_imul_imm(b, array_index, descriptor_stride), base_offset); } } else { @@ -1095,7 +1131,7 @@ build_buffer_addr_for_binding(nir_builder *b, &state->layout->set[set].layout->binding[binding]; return nir_vec2(b, nir_imm_int(b, state->set[set].desc_offset), - nir_imm_int(b, bind_layout->descriptor_offset)); + nir_imm_int(b, bind_layout->descriptor_surface_offset)); } struct res_index_defs res = unpack_res_index(b, res_index); @@ -1875,9 +1911,9 @@ add_bti_entry(struct anv_pipeline_bind_map *map, .set = set, .binding = binding, .index = bind_layout->descriptor_index + element, - .set_offset = bind_layout->descriptor_offset + - element * bind_layout->descriptor_stride + - plane * bind_layout->descriptor_data_size, + .set_offset = bind_layout->descriptor_surface_offset + + element * bind_layout->descriptor_surface_stride + + plane * bind_layout->descriptor_data_surface_size, .plane = plane, }; assert(map->surface_count <= MAX_BINDING_TABLE_SIZE); @@ -1896,8 +1932,8 @@ add_dynamic_bti_entry(struct anv_pipeline_bind_map *map, .set = set, .binding = binding, .index = bind_layout->descriptor_index + element, - .set_offset = bind_layout->descriptor_offset + - element * bind_layout->descriptor_stride, + .set_offset = bind_layout->descriptor_surface_offset + + element * bind_layout->descriptor_surface_stride, .dynamic_offset_index = bind_layout->dynamic_offset_index + element, }; assert(map->surface_count <= MAX_BINDING_TABLE_SIZE); diff --git a/src/intel/vulkan/anv_nir_compute_push_layout.c b/src/intel/vulkan/anv_nir_compute_push_layout.c index ef9f7634a36..4035ba2f9cf 100644 --- a/src/intel/vulkan/anv_nir_compute_push_layout.c +++ b/src/intel/vulkan/anv_nir_compute_push_layout.c @@ -68,10 +68,12 @@ anv_nir_compute_push_layout(nir_shader *nir, case nir_intrinsic_load_desc_set_address_intel: case nir_intrinsic_load_desc_set_dynamic_index_intel: { - unsigned base = offsetof(struct anv_push_constants, desc_offsets); + unsigned base = offsetof(struct anv_push_constants, + desc_surface_offsets); push_start = MIN2(push_start, base); push_end = MAX2(push_end, base + - sizeof_field(struct anv_push_constants, desc_offsets)); + sizeof_field(struct anv_push_constants, + desc_surface_offsets)); break; } @@ -175,8 +177,10 @@ anv_nir_compute_push_layout(nir_shader *nir, b->cursor = nir_before_instr(&intrin->instr); nir_def *pc_load = nir_load_uniform(b, 1, 32, nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint32_t)), - .base = offsetof(struct anv_push_constants, desc_offsets), - .range = sizeof_field(struct anv_push_constants, desc_offsets), + .base = offsetof(struct anv_push_constants, + desc_surface_offsets), + .range = sizeof_field(struct anv_push_constants, + desc_surface_offsets), .dest_type = nir_type_uint32); pc_load = nir_iand_imm(b, pc_load, ANV_DESCRIPTOR_SET_OFFSET_MASK); nir_def *desc_addr = @@ -192,8 +196,10 @@ anv_nir_compute_push_layout(nir_shader *nir, b->cursor = nir_before_instr(&intrin->instr); nir_def *pc_load = nir_load_uniform(b, 1, 32, nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint32_t)), - .base = offsetof(struct anv_push_constants, desc_offsets), - .range = sizeof_field(struct anv_push_constants, desc_offsets), + .base = offsetof(struct anv_push_constants, + desc_surface_offsets), + .range = sizeof_field(struct anv_push_constants, + desc_surface_offsets), .dest_type = nir_type_uint32); pc_load = nir_iand_imm( b, pc_load, ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK); diff --git a/src/intel/vulkan/anv_nir_lower_resource_intel.c b/src/intel/vulkan/anv_nir_lower_resource_intel.c index fffe4a89611..603831b8b11 100644 --- a/src/intel/vulkan/anv_nir_lower_resource_intel.c +++ b/src/intel/vulkan/anv_nir_lower_resource_intel.c @@ -135,9 +135,7 @@ lower_resource_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data) * set, resource_intel::src[0] has to be shifted right by 6 (bringing * it back in bytes). */ - if (is_sampler) - set_offset = nir_ushr_imm(b, set_offset, 6); - else + if (!is_sampler) binding_offset = nir_ishl_imm(b, binding_offset, 6); } diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 880b297858b..2ead8ecd3ba 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -123,13 +123,15 @@ anv_shader_bin_create(struct anv_device *device, int rv_count = 0; struct brw_shader_reloc_value reloc_values[6]; - assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0); + assert((device->physical->va.indirect_descriptor_pool.addr & 0xffffffff) == 0); + assert((device->physical->va.internal_surface_state_pool.addr & 0xffffffff) == 0); reloc_values[rv_count++] = (struct brw_shader_reloc_value) { .id = BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH, .value = device->physical->indirect_descriptors ? (device->physical->va.indirect_descriptor_pool.addr >> 32) : - (device->physical->va.binding_table_pool.addr >> 32), + (device->physical->va.internal_surface_state_pool.addr >> 32), }; + assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0); reloc_values[rv_count++] = (struct brw_shader_reloc_value) { .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW, .value = shader_data_addr, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 9c108e78308..b1c558c7713 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -432,6 +432,9 @@ enum anv_bo_alloc_flags { * set it will allocate a coherent BO. **/ ANV_BO_ALLOC_HOST_CACHED = (1 << 16), + + /** For sampler pools */ + ANV_BO_ALLOC_SAMPLER_POOL = (1 << 17), }; struct anv_bo { @@ -1634,6 +1637,7 @@ struct anv_device { struct util_vma_heap vma_lo; struct util_vma_heap vma_hi; struct util_vma_heap vma_desc; + struct util_vma_heap vma_samplers; struct util_vma_heap vma_trtt; /** List of all anv_device_memory objects */ @@ -2370,18 +2374,30 @@ struct anv_descriptor_set_binding_layout { */ int16_t dynamic_offset_index; - /* Computed size from data */ - uint16_t descriptor_data_size; + /* Computed surface size from data (for one plane) */ + uint16_t descriptor_data_surface_size; + + /* Computed sampler size from data (for one plane) */ + uint16_t descriptor_data_sampler_size; /* Index into the descriptor set buffer views */ int32_t buffer_view_index; - /* Offset into the descriptor buffer where this descriptor lives */ - uint32_t descriptor_offset; + /* Offset into the descriptor buffer where the surface descriptor lives */ + uint32_t descriptor_surface_offset; - /* Pre computed stride (with multiplane descriptor, the descriptor includes - * all the planes) */ - unsigned descriptor_stride; + /* Offset into the descriptor buffer where the sampler descriptor lives */ + uint16_t descriptor_sampler_offset; + + /* Pre computed surface stride (with multiplane descriptor, the descriptor + * includes all the planes) + */ + uint16_t descriptor_surface_stride; + + /* Pre computed sampler stride (with multiplane descriptor, the descriptor + * includes all the planes) + */ + uint16_t descriptor_sampler_stride; /* Immutable samplers (or NULL if no immutable samplers) */ struct anv_sampler **immutable_samplers; @@ -2433,8 +2449,15 @@ struct anv_descriptor_set_layout { */ VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS]; - /* Size of the descriptor buffer for this descriptor set */ - uint32_t descriptor_buffer_size; + /* Size of the descriptor buffer dedicated to surface states for this + * descriptor set + */ + uint32_t descriptor_buffer_surface_size; + + /* Size of the descriptor buffer dedicated to sampler states for this + * descriptor set + */ + uint32_t descriptor_buffer_sampler_size; /* Bindings in this descriptor set */ struct anv_descriptor_set_binding_layout binding[0]; @@ -2506,13 +2529,20 @@ struct anv_descriptor_set { */ uint32_t generate_surface_states; - /* State relative to anv_descriptor_pool::bo */ - struct anv_state desc_mem; + /* State relative to anv_descriptor_pool::surface_bo */ + struct anv_state desc_surface_mem; + /* State relative to anv_descriptor_pool::sampler_bo */ + struct anv_state desc_sampler_mem; /* Surface state for the descriptor buffer */ struct anv_state desc_surface_state; - /* Descriptor set address. */ - struct anv_address desc_addr; + /* Descriptor set address pointing to desc_surface_mem (we don't need one + * for sampler because they're never accessed other than by the HW through + * the shader sampler handle). + */ + struct anv_address desc_surface_addr; + + struct anv_address desc_sampler_addr; /* Descriptor offset from the * device->va.internal_surface_state_pool.addr @@ -2592,15 +2622,28 @@ anv_descriptor_set_address(struct anv_descriptor_set *set) push_set->set_used_on_gpu = true; } - return set->desc_addr; + return set->desc_surface_addr; } +struct anv_descriptor_pool_heap { + /* BO allocated to back the pool (unused for host pools) */ + struct anv_bo *bo; + + /* Host memory allocated to back a host pool */ + void *host_mem; + + /* Heap tracking allocations in bo/host_mem */ + struct util_vma_heap heap; + + /* Size of the heap */ + uint32_t size; +}; + struct anv_descriptor_pool { struct vk_object_base base; - struct anv_bo *bo; - void *host_bo; - struct util_vma_heap bo_heap; + struct anv_descriptor_pool_heap surfaces; + struct anv_descriptor_pool_heap samplers; struct anv_state_stream surface_state_stream; void *surface_state_free_list; @@ -2614,9 +2657,6 @@ struct anv_descriptor_pool { /** Allocated size of host_mem */ uint32_t host_mem_size; - /** Allocated size of descriptor bo (should be equal to bo->size) */ - uint32_t bo_mem_size; - /** * VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT. If set, then * surface_state_stream is unused. @@ -3265,15 +3305,6 @@ struct anv_push_constants { /** Push constant data provided by the client through vkPushConstants */ uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE]; - /** Dynamic offsets for dynamic UBOs and SSBOs */ - uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS]; - - /* Robust access pushed registers. */ - uint64_t push_reg_mask[MESA_SHADER_STAGES]; - - /** Ray query globals (RT_DISPATCH_GLOBALS) */ - uint64_t ray_query_globals; - #define ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK ((uint32_t)ANV_UBO_ALIGNMENT - 1) #define ANV_DESCRIPTOR_SET_OFFSET_MASK (~(uint32_t)(ANV_UBO_ALIGNMENT - 1)) @@ -3285,7 +3316,15 @@ struct anv_push_constants { * * In bits [6:63] : descriptor set address */ - uint32_t desc_offsets[MAX_SETS]; + uint32_t desc_surface_offsets[MAX_SETS]; + + /** + * Base offsets for descriptor sets from + */ + uint32_t desc_sampler_offsets[MAX_SETS]; + + /** Dynamic offsets for dynamic UBOs and SSBOs */ + uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS]; union { struct { @@ -3311,6 +3350,12 @@ struct anv_push_constants { uint32_t subgroup_id; } cs; }; + + /* Robust access pushed registers. */ + uint64_t push_reg_mask[MESA_SHADER_STAGES]; + + /** Ray query globals (RT_DISPATCH_GLOBALS) */ + uint64_t ray_query_globals; }; struct anv_surface_state { diff --git a/src/intel/vulkan/anv_va.c b/src/intel/vulkan/anv_va.c index b3bea2ae120..f7caa822c69 100644 --- a/src/intel/vulkan/anv_va.c +++ b/src/intel/vulkan/anv_va.c @@ -102,30 +102,29 @@ anv_physical_device_init_va_ranges(struct anv_physical_device *device) _1Gb - address); address = va_add(&device->va.low_heap, address, _1Gb); - /* PRMs & simulation disagrees on the actual size of this heap. Take the - * smallest (simulation) so that it works everywhere. - */ - address = va_add(&device->va.dynamic_state_pool, address, _1Gb); - address = va_add(&device->va.sampler_state_pool, address, 2 * _1Gb); - /* The following addresses have to be located in a 4Gb range so that the - * binding tables can address internal surface states & bindless surface - * states. + /* The binding table pool has to be located directly in front of the + * surface states. */ - address = align64(address, _4Gb); + address += _1Gb; address = va_add(&device->va.binding_table_pool, address, _1Gb); address = va_add(&device->va.internal_surface_state_pool, address, 1 * _1Gb); + assert(device->va.internal_surface_state_pool.addr == + align64(device->va.internal_surface_state_pool.addr, 2 * _1Gb)); /* Scratch surface state overlaps with the internal surface state */ va_at(&device->va.scratch_surface_state_pool, device->va.internal_surface_state_pool.addr, 8 * _1Mb); - - /* The bindless surface state heap has be in the same 4Gb range from the - * binding table pool start so they can be addressed from binding table - * entries. - */ address = va_add(&device->va.bindless_surface_state_pool, address, 2 * _1Gb); + + /* PRMs & simulation disagrees on the actual size of this heap. Take the + * smallest (simulation) so that it works everywhere. + */ + address = align64(address, _4Gb); + address = va_add(&device->va.dynamic_state_pool, address, _1Gb); + address = va_add(&device->va.sampler_state_pool, address, 2 * _1Gb); + if (device->indirect_descriptors) { /* With indirect descriptors, descriptor buffers can go anywhere, they * just need to be in a 4Gb aligned range, so all shader accesses can diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index fd8bfb2e7a8..1266a7c80ec 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -171,33 +171,34 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) sba.GeneralStateBufferSize = 0xfffff; sba.IndirectObjectBufferSize = 0xfffff; - sba.DynamicStateBufferSize = device->physical->va.dynamic_state_pool.size / 4096; + sba.DynamicStateBufferSize = (device->physical->va.dynamic_state_pool.size + + device->physical->va.sampler_state_pool.size) / 4096; sba.InstructionBufferSize = device->physical->va.instruction_state_pool.size / 4096; sba.GeneralStateBufferSizeModifyEnable = true; sba.IndirectObjectBufferSizeModifyEnable = true; sba.DynamicStateBufferSizeModifyEnable = true; sba.InstructionBuffersizeModifyEnable = true; +#if GFX_VER >= 11 + sba.BindlessSamplerStateBaseAddress = ANV_NULL_ADDRESS; + sba.BindlessSamplerStateBufferSize = 0; + sba.BindlessSamplerStateMOCS = mocs; + sba.BindlessSamplerStateBaseAddressModifyEnable = true; +#endif + if (!device->physical->indirect_descriptors) { #if GFX_VERx10 >= 125 /* Bindless Surface State & Bindless Sampler State are aligned to the * same heap */ sba.BindlessSurfaceStateBaseAddress = - sba.BindlessSamplerStateBaseAddress = (struct anv_address) { .offset = device->physical->va.binding_table_pool.addr, }; sba.BindlessSurfaceStateSize = - (device->physical->va.binding_table_pool.size + - device->physical->va.internal_surface_state_pool.size + - device->physical->va.descriptor_pool.size) - 1; - sba.BindlessSamplerStateBufferSize = - (device->physical->va.binding_table_pool.size + - device->physical->va.internal_surface_state_pool.size + - device->physical->va.descriptor_pool.size) / 4096 - 1; - sba.BindlessSurfaceStateMOCS = sba.BindlessSamplerStateMOCS = mocs; - sba.BindlessSurfaceStateBaseAddressModifyEnable = - sba.BindlessSamplerStateBaseAddressModifyEnable = true; + (device->physical->va.internal_surface_state_pool.size + + device->physical->va.bindless_surface_state_pool.size) - 1; + sba.BindlessSurfaceStateMOCS = mocs; + sba.BindlessSurfaceStateBaseAddressModifyEnable = true; #else unreachable("Direct descriptor not supported"); #endif @@ -210,12 +211,6 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) anv_physical_device_bindless_heap_size(device->physical) / ANV_SURFACE_STATE_SIZE - 1; sba.BindlessSurfaceStateMOCS = mocs; sba.BindlessSurfaceStateBaseAddressModifyEnable = true; -#if GFX_VER >= 11 - sba.BindlessSamplerStateBaseAddress = (struct anv_address) { NULL, 0 }; - sba.BindlessSamplerStateMOCS = mocs; - sba.BindlessSamplerStateBaseAddressModifyEnable = true; - sba.BindlessSamplerStateBufferSize = 0; -#endif } #if GFX_VERx10 >= 125 @@ -2115,7 +2110,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, /* This is a descriptor set buffer so the set index is actually * given by binding->binding. (Yes, that's confusing.) */ - assert(set->desc_mem.alloc_size); + assert(set->desc_surface_mem.alloc_size); assert(set->desc_surface_state.alloc_size); bt_map[s] = set->desc_surface_state.offset + state_offset; add_surface_reloc(cmd_buffer, anv_descriptor_set_address(set)); @@ -2349,8 +2344,8 @@ flush_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer, set->desc_surface_state.map, format, ISL_SWIZZLE_IDENTITY, ISL_SURF_USAGE_CONSTANT_BUFFER_BIT, - set->desc_addr, - layout->descriptor_buffer_size, 1); + set->desc_surface_addr, + layout->descriptor_buffer_surface_size, 1); } state->push_descriptor.set_used_on_gpu = true; @@ -2480,9 +2475,10 @@ get_push_range_bound_size(struct anv_cmd_buffer *cmd_buffer, case ANV_DESCRIPTOR_SET_DESCRIPTORS: { struct anv_descriptor_set *set = gfx_state->base.descriptors[range->index]; - assert(range->start * 32 < set->desc_mem.alloc_size); - assert((range->start + range->length) * 32 <= set->desc_mem.alloc_size); - return set->desc_mem.alloc_size; + struct anv_state state = set->desc_surface_mem; + assert(range->start * 32 < state.alloc_size); + assert((range->start + range->length) * 32 <= state.alloc_size); + return state.alloc_size; } case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS: diff --git a/src/intel/vulkan/genX_init_state.c b/src/intel/vulkan/genX_init_state.c index 431e43ba868..87d8c94b442 100644 --- a/src/intel/vulkan/genX_init_state.c +++ b/src/intel/vulkan/genX_init_state.c @@ -250,7 +250,8 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch) (struct anv_address) { .offset = device->physical->va.dynamic_state_pool.addr, }; - sba.DynamicStateBufferSize = device->physical->va.dynamic_state_pool.size / 4096; + sba.DynamicStateBufferSize = (device->physical->va.dynamic_state_pool.size + + device->physical->va.sampler_state_pool.size) / 4096; sba.DynamicStateMOCS = mocs; sba.DynamicStateBaseAddressModifyEnable = true; sba.DynamicStateBufferSizeModifyEnable = true; @@ -270,6 +271,13 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch) sba.InstructionBaseAddressModifyEnable = true; sba.InstructionBuffersizeModifyEnable = true; +#if GFX_VER >= 11 + sba.BindlessSamplerStateBaseAddress = ANV_NULL_ADDRESS; + sba.BindlessSamplerStateBufferSize = 0; + sba.BindlessSamplerStateMOCS = mocs; + sba.BindlessSamplerStateBaseAddressModifyEnable = true; +#endif + if (device->physical->indirect_descriptors) { sba.BindlessSurfaceStateBaseAddress = (struct anv_address) { .offset = @@ -279,29 +287,18 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch) anv_physical_device_bindless_heap_size(device->physical) / ANV_SURFACE_STATE_SIZE - 1; sba.BindlessSurfaceStateMOCS = mocs; sba.BindlessSurfaceStateBaseAddressModifyEnable = true; - - sba.BindlessSamplerStateBaseAddress = (struct anv_address) { NULL, 0 }; - sba.BindlessSamplerStateMOCS = mocs; - sba.BindlessSamplerStateBaseAddressModifyEnable = true; - sba.BindlessSamplerStateBufferSize = 0; } else { /* Bindless Surface State & Bindless Sampler State are aligned to the * same heap */ - sba.BindlessSurfaceStateBaseAddress = - sba.BindlessSamplerStateBaseAddress = - (struct anv_address) { .offset = device->physical->va.binding_table_pool.addr, }; + sba.BindlessSurfaceStateBaseAddress = (struct anv_address) { + .offset = device->physical->va.internal_surface_state_pool.addr, + }; sba.BindlessSurfaceStateSize = - (device->physical->va.binding_table_pool.size + - device->physical->va.internal_surface_state_pool.size + + (device->physical->va.internal_surface_state_pool.size + device->physical->va.bindless_surface_state_pool.size) - 1; - sba.BindlessSamplerStateBufferSize = - (device->physical->va.binding_table_pool.size + - device->physical->va.internal_surface_state_pool.size + - device->physical->va.bindless_surface_state_pool.size) / 4096 - 1; - sba.BindlessSurfaceStateMOCS = sba.BindlessSamplerStateMOCS = mocs; - sba.BindlessSurfaceStateBaseAddressModifyEnable = - sba.BindlessSamplerStateBaseAddressModifyEnable = true; + sba.BindlessSurfaceStateMOCS = mocs; + sba.BindlessSurfaceStateBaseAddressModifyEnable = true; } #if GFX_VERx10 >= 125