From 7fe469d6c809eaacbdb5a04bdcd8a5b0d7fc557b Mon Sep 17 00:00:00 2001 From: Tatsuyuki Ishi Date: Sun, 2 Apr 2023 15:41:25 +0900 Subject: [PATCH] radv: Pre-compute descriptor set layout hash. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While analyzing cache loading performance, hashing the pipeline layout was surprisingly consuming around 4% of time, sometimes close to the cost of hashing shader modules. Turns out we were hashing the pipeline layout on every pipeline creation. Considering that pipeline layouts are usually deduplicated by the application, this was amplifying the hashing cost by a big margin. With Graphics Pipeline Library, we do need to rebuild the pipeline layout by combining those from each library, but we can memoize the hash of the descriptor set layout. The cost of re-hashing hashes is negligible since each descriptor set layout can amount to 1–2KB in size. Part-of: --- src/amd/vulkan/radv_descriptor_set.c | 17 +++++++++-------- src/amd/vulkan/radv_descriptor_set.h | 6 +++--- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/amd/vulkan/radv_descriptor_set.c b/src/amd/vulkan/radv_descriptor_set.c index 082ae473ad9..c35cec10438 100644 --- a/src/amd/vulkan/radv_descriptor_set.c +++ b/src/amd/vulkan/radv_descriptor_set.c @@ -183,7 +183,6 @@ radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCrea return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); set_layout->flags = pCreateInfo->flags; - set_layout->layout_size = size; /* We just allocate all the samplers at the end of the struct */ uint32_t *samplers = (uint32_t *)&set_layout->binding[num_bindings]; @@ -390,6 +389,14 @@ radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCrea set_layout->buffer_count = buffer_count; set_layout->dynamic_offset_count = dynamic_offset_count; + /* Hash the entire set layout except vk_descriptor_set_layout. The rest of the set layout is + * carefully constructed to not have pointers so a full hash instead of a per-field hash + * should be ok. + */ + uint32_t hash_offset = + offsetof(struct radv_descriptor_set_layout, hash) + sizeof(set_layout->hash); + _mesa_sha1_compute((const char *)set_layout + hash_offset, size - hash_offset, set_layout->hash); + *pSetLayout = radv_descriptor_set_layout_to_handle(set_layout); return VK_SUCCESS; @@ -567,13 +574,7 @@ radv_pipeline_layout_hash(struct radv_pipeline_layout *layout) if (!set_layout) continue; - /* Hash the entire set layout except vk_descriptor_set_layout. The rest of the set layout is - * carefully constructed to not have pointers so a full hash instead of a per-field hash - * should be ok. - */ - uint32_t hash_offset = sizeof(struct vk_descriptor_set_layout); - _mesa_sha1_update(&ctx, (const char *)set_layout + hash_offset, - set_layout->layout_size - hash_offset); + _mesa_sha1_update(&ctx, set_layout->hash, sizeof(set_layout->hash)); } _mesa_sha1_update(&ctx, &layout->push_constant_size, sizeof(layout->push_constant_size)); _mesa_sha1_final(&ctx, layout->sha1); diff --git a/src/amd/vulkan/radv_descriptor_set.h b/src/amd/vulkan/radv_descriptor_set.h index 9302ec421fe..77a3b3c099b 100644 --- a/src/amd/vulkan/radv_descriptor_set.h +++ b/src/amd/vulkan/radv_descriptor_set.h @@ -54,6 +54,9 @@ struct radv_descriptor_set_binding_layout { struct radv_descriptor_set_layout { struct vk_descriptor_set_layout vk; + /* Hash of all fields below */ + uint8_t hash[SHA1_DIGEST_LENGTH]; + /* Everything below is hashed and shouldn't contain any pointers. Be careful when modifying this * structure. */ @@ -67,9 +70,6 @@ struct radv_descriptor_set_layout { /* Total size of the descriptor set with room for all array entries */ uint32_t size; - /* CPU size of this struct + all associated data, for hashing. */ - uint32_t layout_size; - /* Shader stages affected by this descriptor set */ uint16_t shader_stages; uint16_t dynamic_shader_stages;