diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index f1ce1549e82..f4948a4c67a 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -212,8 +212,8 @@ radv_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, radv_pipeline_destroy(device, pipeline, pAllocator); } -static uint32_t -get_hash_flags(const struct radv_device *device, bool stats) +uint32_t +radv_get_hash_flags(const struct radv_device *device, bool stats) { uint32_t hash_flags = 0; @@ -3348,7 +3348,7 @@ VkResult radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device, struct radv_pipeline_cache *cache, const struct radv_pipeline_key *pipeline_key, const VkPipelineShaderStageCreateInfo **pStages, - const VkPipelineCreateFlags flags, + const VkPipelineCreateFlags flags, const uint8_t *custom_hash, VkPipelineCreationFeedbackEXT *pipeline_feedback, VkPipelineCreationFeedbackEXT **stage_feedbacks) { @@ -3368,6 +3368,9 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device, (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) || device->keep_shader_info; bool disable_optimizations = flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT; + struct radv_pipeline_shader_stack_size **stack_sizes = + pipeline->type == RADV_PIPELINE_COMPUTE ? &pipeline->compute.rt_stack_sizes : NULL; + uint32_t *num_stack_sizes = stack_sizes ? &pipeline->compute.group_count : NULL; radv_start_feedback(pipeline_feedback); @@ -3384,8 +3387,12 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device, } } - radv_hash_shaders(hash, pStages, pipeline->layout, pipeline_key, - get_hash_flags(device, keep_statistic_info)); + if (custom_hash) + memcpy(hash, custom_hash, 20); + else { + radv_hash_shaders(hash, pStages, pipeline->layout, pipeline_key, + radv_get_hash_flags(device, keep_statistic_info)); + } memcpy(gs_copy_hash, hash, 20); gs_copy_hash[0] ^= 1; @@ -3394,13 +3401,14 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device, bool found_in_application_cache = true; if (modules[MESA_SHADER_GEOMETRY] && !keep_executable_info) { struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0}; - radv_create_shader_variants_from_pipeline_cache(device, cache, gs_copy_hash, variants, - &found_in_application_cache); + radv_create_shader_variants_from_pipeline_cache(device, cache, gs_copy_hash, variants, NULL, + NULL, &found_in_application_cache); pipeline->gs_copy_shader = variants[MESA_SHADER_GEOMETRY]; } if (!keep_executable_info && radv_create_shader_variants_from_pipeline_cache(device, cache, hash, pipeline->shaders, + stack_sizes, num_stack_sizes, &found_in_application_cache) && (!modules[MESA_SHADER_GEOMETRY] || pipeline->gs_copy_shader)) { radv_stop_feedback(pipeline_feedback, found_in_application_cache); @@ -3629,7 +3637,8 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device, gs_binaries[MESA_SHADER_GEOMETRY] = gs_copy_binary; gs_variants[MESA_SHADER_GEOMETRY] = pipeline->gs_copy_shader; - radv_pipeline_cache_insert_shaders(device, cache, gs_copy_hash, gs_variants, gs_binaries); + radv_pipeline_cache_insert_shaders(device, cache, gs_copy_hash, gs_variants, gs_binaries, + NULL, 0); pipeline->gs_copy_shader = gs_variants[MESA_SHADER_GEOMETRY]; } @@ -3698,7 +3707,9 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device, } if (!keep_executable_info) { - radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline->shaders, binaries); + radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline->shaders, binaries, + stack_sizes ? *stack_sizes : NULL, + num_stack_sizes ? *num_stack_sizes : 0); } for (int i = 0; i < MESA_SHADER_STAGES; ++i) { @@ -5519,7 +5530,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device, struct radv_pipeline_key key = radv_generate_graphics_pipeline_key(pipeline, pCreateInfo, &blend); - result = radv_create_shaders(pipeline, device, cache, &key, pStages, pCreateInfo->flags, + result = radv_create_shaders(pipeline, device, cache, &key, pStages, pCreateInfo->flags, NULL, pipeline_feedback, stage_feedbacks); if (result != VK_SUCCESS) return result; @@ -5746,7 +5757,9 @@ radv_generate_compute_pipeline_key(struct radv_pipeline *pipeline, VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) + const VkAllocationCallbacks *pAllocator, const uint8_t *custom_hash, + struct radv_pipeline_shader_stack_size *rt_stack_sizes, + uint32_t rt_group_count, VkPipeline *pPipeline) { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache); @@ -5759,8 +5772,10 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pipeline == NULL) + if (pipeline == NULL) { + free(rt_stack_sizes); return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + } vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE); pipeline->type = RADV_PIPELINE_COMPUTE; @@ -5768,6 +5783,8 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, pipeline->device = device; pipeline->graphics.last_vgt_api_stage = MESA_SHADER_NONE; pipeline->layout = radv_pipeline_layout_from_handle(pCreateInfo->layout); + pipeline->compute.rt_stack_sizes = rt_stack_sizes; + pipeline->compute.group_count = rt_group_count; assert(pipeline->layout); const VkPipelineCreationFeedbackCreateInfoEXT *creation_feedback = @@ -5784,7 +5801,7 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, struct radv_pipeline_key key = radv_generate_compute_pipeline_key(pipeline, pCreateInfo); result = radv_create_shaders(pipeline, device, cache, &key, pStages, pCreateInfo->flags, - pipeline_feedback, stage_feedbacks); + custom_hash, pipeline_feedback, stage_feedbacks); if (result != VK_SUCCESS) { radv_pipeline_destroy(device, pipeline, pAllocator); return result; @@ -5813,8 +5830,8 @@ radv_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uin unsigned i = 0; for (; i < count; i++) { VkResult r; - r = radv_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, - &pPipelines[i]); + r = radv_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, NULL, + NULL, 0, &pPipelines[i]); if (r != VK_SUCCESS) { result = r; pPipelines[i] = VK_NULL_HANDLE; diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c index f0fb1428d55..b386ab89b41 100644 --- a/src/amd/vulkan/radv_pipeline_cache.c +++ b/src/amd/vulkan/radv_pipeline_cache.c @@ -37,6 +37,7 @@ struct cache_entry { uint32_t sha1_dw[5]; }; uint32_t binary_sizes[MESA_SHADER_STAGES]; + uint32_t num_stack_sizes; struct radv_shader_variant *variants[MESA_SHADER_STAGES]; char code[0]; }; @@ -139,6 +140,39 @@ radv_hash_shaders(unsigned char *hash, const VkPipelineShaderStageCreateInfo **s _mesa_sha1_final(&ctx, hash); } +void +radv_hash_rt_shaders(unsigned char *hash, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, + uint32_t flags) +{ + RADV_FROM_HANDLE(radv_pipeline_layout, layout, pCreateInfo->layout); + struct mesa_sha1 ctx; + + _mesa_sha1_init(&ctx); + if (layout) + _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); + + for (uint32_t i = 0; i < pCreateInfo->stageCount; ++i) { + RADV_FROM_HANDLE(vk_shader_module, module, pCreateInfo->pStages[i].module); + const VkSpecializationInfo *spec_info = pCreateInfo->pStages[i].pSpecializationInfo; + + _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1)); + _mesa_sha1_update(&ctx, pCreateInfo->pStages[i].pName, strlen(pCreateInfo->pStages[i].pName)); + if (spec_info && spec_info->mapEntryCount) { + _mesa_sha1_update(&ctx, spec_info->pMapEntries, + spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); + _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize); + } + } + + _mesa_sha1_update(&ctx, pCreateInfo->pGroups, + pCreateInfo->groupCount * sizeof(*pCreateInfo->pGroups)); + + if (!radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo)) + _mesa_sha1_update(&ctx, &pCreateInfo->maxPipelineRayRecursionDepth, 4); + _mesa_sha1_update(&ctx, &flags, 4); + _mesa_sha1_final(&ctx, hash); +} + static struct cache_entry * radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache, const unsigned char *sha1) { @@ -253,11 +287,10 @@ radv_is_cache_disabled(struct radv_device *device) } bool -radv_create_shader_variants_from_pipeline_cache(struct radv_device *device, - struct radv_pipeline_cache *cache, - const unsigned char *sha1, - struct radv_shader_variant **variants, - bool *found_in_application_cache) +radv_create_shader_variants_from_pipeline_cache( + struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1, + struct radv_shader_variant **variants, struct radv_pipeline_shader_stack_size **stack_sizes, + uint32_t *num_stack_sizes, bool *found_in_application_cache) { struct cache_entry *entry; @@ -325,6 +358,14 @@ radv_create_shader_variants_from_pipeline_cache(struct radv_device *device, memcpy(variants, entry->variants, sizeof(entry->variants)); + if (num_stack_sizes) { + *num_stack_sizes = entry->num_stack_sizes; + if (entry->num_stack_sizes) { + *stack_sizes = malloc(entry->num_stack_sizes * sizeof(**stack_sizes)); + memcpy(*stack_sizes, p, entry->num_stack_sizes * sizeof(**stack_sizes)); + } + } + if (device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE && cache == device->mem_cache) vk_free(&cache->alloc, entry); else { @@ -340,7 +381,9 @@ radv_create_shader_variants_from_pipeline_cache(struct radv_device *device, void radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1, struct radv_shader_variant **variants, - struct radv_shader_binary *const *binaries) + struct radv_shader_binary *const *binaries, + const struct radv_pipeline_shader_stack_size *stack_sizes, + uint32_t num_stack_sizes) { if (!cache) cache = device->mem_cache; @@ -370,7 +413,7 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipel return; } - size_t size = sizeof(*entry); + size_t size = sizeof(*entry) + sizeof(*stack_sizes) * num_stack_sizes; for (int i = 0; i < MESA_SHADER_STAGES; ++i) if (variants[i]) size += binaries[i]->total_size; @@ -398,6 +441,12 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipel p += binaries[i]->total_size; } + if (num_stack_sizes) { + memcpy(p, stack_sizes, sizeof(*stack_sizes) * num_stack_sizes); + p += sizeof(*stack_sizes) * num_stack_sizes; + } + entry->num_stack_sizes = num_stack_sizes; + // Make valgrind happy by filling the alignment hole at the end. assert(p == (char *)entry + size_without_align); assert(sizeof(*entry) + (p - entry->code) == size_without_align); diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index 7f4b360c2cf..ea4cc7820fa 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -22,6 +22,7 @@ */ #include "radv_acceleration_structure.h" +#include "radv_debug.h" #include "radv_private.h" #include "radv_shader.h" @@ -1899,6 +1900,11 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, VkResult result; struct radv_pipeline *pipeline = NULL; struct radv_pipeline_shader_stack_size *stack_sizes = NULL; + uint8_t hash[20]; + nir_shader *shader = NULL; + bool keep_statistic_info = + (pCreateInfo->flags & VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR) || + (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) || device->keep_shader_info; if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) return radv_rt_pipeline_library_create(_device, _cache, pCreateInfo, pAllocator, pPipeline); @@ -1910,30 +1916,44 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, goto fail; } - stack_sizes = calloc(sizeof(*stack_sizes), local_create_info.groupCount); - if (!stack_sizes) { - result = VK_ERROR_OUT_OF_HOST_MEMORY; - goto fail; - } + radv_hash_rt_shaders(hash, &local_create_info, radv_get_hash_flags(device, keep_statistic_info)); + struct vk_shader_module module = {.base.type = VK_OBJECT_TYPE_SHADER_MODULE}; - nir_shader *shader = create_rt_shader(device, &local_create_info, stack_sizes); VkComputePipelineCreateInfo compute_info = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = NULL, - .flags = pCreateInfo->flags, + .flags = pCreateInfo->flags | VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT, .stage = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = vk_shader_module_handle_from_nir(shader), + .module = vk_shader_module_to_handle(&module), .pName = "main", }, .layout = pCreateInfo->layout, }; - result = radv_compute_pipeline_create(_device, _cache, &compute_info, pAllocator, pPipeline); - if (result != VK_SUCCESS) - goto shader_fail; + /* First check if we can get things from the cache before we take the expensive step of + * generating the nir. */ + result = radv_compute_pipeline_create(_device, _cache, &compute_info, pAllocator, hash, + stack_sizes, local_create_info.groupCount, pPipeline); + if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT) { + stack_sizes = calloc(sizeof(*stack_sizes), local_create_info.groupCount); + if (!stack_sizes) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } + + shader = create_rt_shader(device, &local_create_info, stack_sizes); + module.nir = shader; + compute_info.flags = pCreateInfo->flags; + result = radv_compute_pipeline_create(_device, _cache, &compute_info, pAllocator, hash, + stack_sizes, local_create_info.groupCount, pPipeline); + stack_sizes = NULL; + + if (result != VK_SUCCESS) + goto shader_fail; + } pipeline = radv_pipeline_from_handle(*pPipeline); pipeline->compute.rt_group_handles = @@ -1943,10 +1963,7 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, goto shader_fail; } - pipeline->compute.rt_stack_sizes = stack_sizes; - stack_sizes = NULL; - - pipeline->compute.dynamic_stack_size = has_dynamic_stack_size(pCreateInfo); + pipeline->compute.dynamic_stack_size = radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo); for (unsigned i = 0; i < local_create_info.groupCount; ++i) { const VkRayTracingShaderGroupCreateInfoKHR *group_info = &local_create_info.pGroups[i]; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 27abcf08d09..0430e4ef6d6 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -375,22 +375,21 @@ struct radv_pipeline_key { struct radv_shader_binary; struct radv_shader_variant; +struct radv_pipeline_shader_stack_size; void radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device); void radv_pipeline_cache_finish(struct radv_pipeline_cache *cache); bool radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size); -bool radv_create_shader_variants_from_pipeline_cache(struct radv_device *device, - struct radv_pipeline_cache *cache, - const unsigned char *sha1, - struct radv_shader_variant **variants, - bool *found_in_application_cache); +bool radv_create_shader_variants_from_pipeline_cache( + struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1, + struct radv_shader_variant **variants, struct radv_pipeline_shader_stack_size **stack_sizes, + uint32_t *num_stack_sizes, bool *found_in_application_cache); -void radv_pipeline_cache_insert_shaders(struct radv_device *device, - struct radv_pipeline_cache *cache, - const unsigned char *sha1, - struct radv_shader_variant **variants, - struct radv_shader_binary *const *binaries); +void radv_pipeline_cache_insert_shaders( + struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1, + struct radv_shader_variant **variants, struct radv_shader_binary *const *binaries, + const struct radv_pipeline_shader_stack_size *stack_sizes, uint32_t num_stack_sizes); enum radv_blit_ds_layout { RADV_BLIT_DS_LAYOUT_TILE_ENABLE, @@ -1690,6 +1689,11 @@ void radv_hash_shaders(unsigned char *hash, const VkPipelineShaderStageCreateInf const struct radv_pipeline_layout *layout, const struct radv_pipeline_key *key, uint32_t flags); +void radv_hash_rt_shaders(unsigned char *hash, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, + uint32_t flags); + +uint32_t radv_get_hash_flags(const struct radv_device *device, bool stats); + bool radv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo); #define RADV_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1) @@ -1819,6 +1823,7 @@ struct radv_pipeline { struct radv_pipeline_group_handle *rt_group_handles; struct radv_pipeline_shader_stack_size *rt_stack_sizes; bool dynamic_stack_size; + uint32_t group_count; } compute; struct { unsigned stage_count; @@ -1878,7 +1883,9 @@ VkResult radv_graphics_pipeline_create(VkDevice device, VkPipelineCache cache, VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkComputePipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, - VkPipeline *pPipeline); + const uint8_t *custom_hash, + struct radv_pipeline_shader_stack_size *rt_stack_sizes, + uint32_t rt_group_count, VkPipeline *pPipeline); void radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline, const VkAllocationCallbacks *allocator); diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 8971f654ef9..f27843e64d1 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -449,7 +449,7 @@ void radv_destroy_shader_slabs(struct radv_device *device); VkResult radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device, struct radv_pipeline_cache *cache, const struct radv_pipeline_key *key, const VkPipelineShaderStageCreateInfo **pStages, - const VkPipelineCreateFlags flags, + const VkPipelineCreateFlags flags, const uint8_t *custom_hash, VkPipelineCreationFeedbackEXT *pipeline_feedback, VkPipelineCreationFeedbackEXT **stage_feedbacks);