diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index f6478f39665..8f8e4ad4051 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -905,6 +905,21 @@ radv_emit_userdata_address(struct radv_device *device, struct radeon_cmdbuf *cs, radv_emit_shader_pointer(device, cs, base_reg + loc->sgpr_idx * 4, va, false); } +static uint64_t +radv_descriptor_get_va(const struct radv_descriptor_state *descriptors_state, unsigned set_idx) +{ + struct radv_descriptor_set *set = descriptors_state->sets[set_idx]; + uint64_t va; + + if (set) { + va = set->header.va; + } else { + va = descriptors_state->descriptor_buffers[set_idx]; + } + + return va; +} + static void radv_emit_descriptor_pointers(struct radv_device *device, struct radeon_cmdbuf *cs, struct radv_pipeline *pipeline, @@ -927,9 +942,9 @@ radv_emit_descriptor_pointers(struct radv_device *device, struct radeon_cmdbuf * radv_emit_shader_pointer_head(cs, sh_offset, count, true); for (int i = 0; i < count; i++) { - struct radv_descriptor_set *set = descriptors_state->sets[start + i]; + uint64_t va = radv_descriptor_get_va(descriptors_state, start + i); - radv_emit_shader_pointer_body(device, cs, set->header.va, true); + radv_emit_shader_pointer_body(device, cs, va, true); } } } @@ -4105,9 +4120,9 @@ radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, for (unsigned i = 0; i < MAX_SETS; i++) { uint32_t *uptr = ((uint32_t *)ptr) + i; uint64_t set_va = 0; - struct radv_descriptor_set *set = descriptors_state->sets[i]; if (descriptors_state->valid & (1u << i)) - set_va = set->header.va; + set_va = radv_descriptor_get_va(descriptors_state, i); + uptr[0] = set_va & 0xffffffff; } @@ -10796,3 +10811,43 @@ radv_CmdBindPipelineShaderGroupNV(VkCommandBuffer commandBuffer, fprintf(stderr, "radv: unimplemented vkCmdBindPipelineShaderGroupNV\n"); abort(); } + +/* VK_EXT_descriptor_buffer */ +VKAPI_ATTR void VKAPI_CALL +radv_CmdBindDescriptorBuffersEXT(VkCommandBuffer commandBuffer, uint32_t bufferCount, + const VkDescriptorBufferBindingInfoEXT *pBindingInfos) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + + for (uint32_t i = 0; i < bufferCount; i++) { + cmd_buffer->descriptor_buffers[i] = pBindingInfos[i].address; + } +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdSetDescriptorBufferOffsetsEXT(VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, uint32_t firstSet, uint32_t setCount, + const uint32_t *pBufferIndices, const VkDeviceSize *pOffsets) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_descriptor_state *descriptors_state = + radv_get_descriptors_state(cmd_buffer, pipelineBindPoint); + + for (unsigned i = 0; i < setCount; i++) { + unsigned idx = i + firstSet; + + descriptors_state->descriptor_buffers[idx] = + cmd_buffer->descriptor_buffers[pBufferIndices[i]] + pOffsets[i]; + + radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, NULL, idx); + } +} + +VKAPI_ATTR void VKAPI_CALL +radv_CmdBindDescriptorBufferEmbeddedSamplersEXT(VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, uint32_t set) +{ + /* This is a no-op because embedded samplers are inlined at compile time. */ +} diff --git a/src/amd/vulkan/radv_descriptor_set.c b/src/amd/vulkan/radv_descriptor_set.c index a150cceb1a9..d6ade45d17b 100644 --- a/src/amd/vulkan/radv_descriptor_set.c +++ b/src/amd/vulkan/radv_descriptor_set.c @@ -305,10 +305,17 @@ radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCrea binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) && binding->pImmutableSamplers) { set_layout->binding[b].immutable_samplers_offset = samplers_offset; - set_layout->binding[b].immutable_samplers_equal = - has_equal_immutable_samplers(binding->pImmutableSamplers, binding->descriptorCount); set_layout->has_immutable_samplers = true; + /* Do not optimize space for descriptor buffers and embedded samplers, otherwise the set + * layout size/offset are incorrect. + */ + if (!(pCreateInfo->flags & (VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT | + VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT))) { + set_layout->binding[b].immutable_samplers_equal = + has_equal_immutable_samplers(binding->pImmutableSamplers, binding->descriptorCount); + } + for (uint32_t i = 0; i < binding->descriptorCount; i++) memcpy(samplers + 4 * i, &radv_sampler_from_handle(binding->pImmutableSamplers[i])->state, 16); @@ -1721,3 +1728,135 @@ radv_DestroySamplerYcbcrConversion(VkDevice _device, VkSamplerYcbcrConversion yc vk_object_base_finish(&ycbcr_conversion->base); vk_free2(&device->vk.alloc, pAllocator, ycbcr_conversion); } + +/* VK_EXT_descriptor_buffer */ +VKAPI_ATTR void VKAPI_CALL +radv_GetDescriptorSetLayoutSizeEXT(VkDevice device, VkDescriptorSetLayout layout, + VkDeviceSize *pLayoutSizeInBytes) +{ + RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, layout); + *pLayoutSizeInBytes = set_layout->size; +} + +VKAPI_ATTR void VKAPI_CALL +radv_GetDescriptorSetLayoutBindingOffsetEXT(VkDevice device, VkDescriptorSetLayout layout, + uint32_t binding, VkDeviceSize *pOffset) +{ + RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, layout); + *pOffset = set_layout->binding[binding].offset; +} + +VKAPI_ATTR void VKAPI_CALL +radv_GetDescriptorEXT(VkDevice _device, const VkDescriptorGetInfoEXT *pDescriptorInfo, + size_t dataSize, void *pDescriptor) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + + switch (pDescriptorInfo->type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: { + write_sampler_descriptor(pDescriptor, *pDescriptorInfo->data.pSampler); + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + write_image_descriptor(pDescriptor, 64, pDescriptorInfo->type, + pDescriptorInfo->data.pCombinedImageSampler); + if (pDescriptorInfo->data.pCombinedImageSampler) { + write_sampler_descriptor((uint32_t *)pDescriptor + 20, + pDescriptorInfo->data.pCombinedImageSampler->sampler); + } + break; + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + write_image_descriptor(pDescriptor, 64, pDescriptorInfo->type, + pDescriptorInfo->data.pInputAttachmentImage); + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + write_image_descriptor(pDescriptor, 64, pDescriptorInfo->type, + pDescriptorInfo->data.pSampledImage); + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + write_image_descriptor(pDescriptor, 32, pDescriptorInfo->type, + pDescriptorInfo->data.pStorageImage); + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { + const VkDescriptorAddressInfoEXT *addr_info = pDescriptorInfo->data.pUniformBuffer; + + write_buffer_descriptor(device, pDescriptor, addr_info ? addr_info->address : 0, + addr_info ? addr_info->range : 0); + break; + } + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: { + const VkDescriptorAddressInfoEXT *addr_info = pDescriptorInfo->data.pStorageBuffer; + + write_buffer_descriptor(device, pDescriptor, addr_info ? addr_info->address : 0, + addr_info ? addr_info->range : 0); + break; + } + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: { + const VkDescriptorAddressInfoEXT *addr_info = pDescriptorInfo->data.pUniformTexelBuffer; + + if (addr_info && addr_info->address) { + radv_make_texel_buffer_descriptor(device, addr_info->address, addr_info->format, 0, + addr_info->range, pDescriptor); + } else { + memset(pDescriptor, 0, 4 * 4); + } + break; + } + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: { + const VkDescriptorAddressInfoEXT *addr_info = pDescriptorInfo->data.pStorageTexelBuffer; + + if (addr_info && addr_info->address) { + radv_make_texel_buffer_descriptor(device, addr_info->address, addr_info->format, 0, + addr_info->range, pDescriptor); + } else { + memset(pDescriptor, 0, 4 * 4); + } + break; + } + case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: + write_accel_struct(pDescriptor, pDescriptorInfo->data.accelerationStructure); + break; + } + default: + unreachable("invalid descriptor type"); + } +} + +VKAPI_ATTR VkResult VKAPI_CALL +radv_GetBufferOpaqueCaptureDescriptorDataEXT(VkDevice device, + const VkBufferCaptureDescriptorDataInfoEXT *pInfo, + void *pData) +{ + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +radv_GetImageOpaqueCaptureDescriptorDataEXT(VkDevice device, + const VkImageCaptureDescriptorDataInfoEXT *pInfo, + void *pData) +{ + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +radv_GetImageViewOpaqueCaptureDescriptorDataEXT(VkDevice device, + const VkImageViewCaptureDescriptorDataInfoEXT *pInfo, + void *pData) +{ + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +radv_GetSamplerOpaqueCaptureDescriptorDataEXT(VkDevice _device, + const VkSamplerCaptureDescriptorDataInfoEXT *pInfo, + void *pData) +{ + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +radv_GetAccelerationStructureOpaqueCaptureDescriptorDataEXT(VkDevice device, + const VkAccelerationStructureCaptureDescriptorDataInfoEXT *pInfo, + void *pData) +{ + return VK_SUCCESS; +} diff --git a/src/amd/vulkan/radv_nir_apply_pipeline_layout.c b/src/amd/vulkan/radv_nir_apply_pipeline_layout.c index 62b884e2598..3e85c93f20b 100644 --- a/src/amd/vulkan/radv_nir_apply_pipeline_layout.c +++ b/src/amd/vulkan/radv_nir_apply_pipeline_layout.c @@ -239,8 +239,10 @@ get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *der struct radv_descriptor_set_layout *layout = state->pipeline_layout->set[desc_set].layout; struct radv_descriptor_set_binding_layout *binding = &layout->binding[binding_index]; - /* Handle immutable (compile-time) samplers (VkDescriptorSetLayoutBinding::pImmutableSamplers) - * We can only do this for constant array index or if all samplers in the array are the same. + /* Handle immutable and embedded (compile-time) samplers + * (VkDescriptorSetLayoutBinding::pImmutableSamplers) We can only do this for constant array + * index or if all samplers in the array are the same. Note that indexing is forbidden with + * embedded samplers. */ if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset && (!indirect || binding->immutable_samplers_equal)) { diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 1dbbe64c8fd..15834eb5b1a 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1501,6 +1501,7 @@ struct radv_descriptor_state { struct radv_push_descriptor_set push_set; bool push_dirty; uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS]; + uint64_t descriptor_buffers[MAX_SETS]; }; enum rgp_flush_bits { @@ -1666,6 +1667,8 @@ struct radv_cmd_buffer { struct radv_descriptor_state descriptors[MAX_BIND_POINTS]; + uint64_t descriptor_buffers[MAX_SETS]; + struct radv_cmd_buffer_upload upload; uint32_t scratch_size_per_wave_needed;