From 69a04151db48e4d06072dab38e33129b6681230a Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 10 Apr 2025 21:30:41 +0300 Subject: [PATCH] vulkan/runtime: add ray tracing pipeline support Signed-off-by: Lionel Landwerlin Reviewed-by: Ivan Briano Part-of: --- src/vulkan/runtime/vk_pipeline.c | 1060 ++++++++++++++++++++++++++++++ src/vulkan/runtime/vk_shader.c | 18 + src/vulkan/runtime/vk_shader.h | 65 ++ 3 files changed, 1143 insertions(+) diff --git a/src/vulkan/runtime/vk_pipeline.c b/src/vulkan/runtime/vk_pipeline.c index b7dbacecb85..99d2b0a1eca 100644 --- a/src/vulkan/runtime/vk_pipeline.c +++ b/src/vulkan/runtime/vk_pipeline.c @@ -2314,3 +2314,1063 @@ vk_cmd_unbind_pipelines_for_stages(struct vk_command_buffer *cmd_buffer, if (stages & VK_SHADER_STAGE_COMPUTE_BIT) vk_compute_pipeline_cmd_bind(cmd_buffer, NULL); } + +struct vk_rt_stage { + bool linked : 1; + struct vk_shader *shader; +}; + +struct vk_rt_shader_group { + VkRayTracingShaderGroupTypeKHR type; + + struct vk_rt_stage stages[3]; + uint32_t stage_count; +}; + +struct vk_rt_pipeline { + struct vk_pipeline base; + + uint32_t group_count; + struct vk_rt_shader_group *groups; + + uint32_t stage_count; + struct vk_rt_stage *stages; + + VkDeviceSize stack_size; + VkDeviceSize scratch_size; + uint32_t ray_queries; +}; + +static struct vk_rt_stage +vk_rt_stage_ref(struct vk_rt_stage *stage) +{ + if (stage->shader) + vk_shader_ref(stage->shader); + return *stage; +} + +static void +vk_rt_shader_group_destroy(struct vk_device *device, + struct vk_rt_shader_group *group) +{ + for (uint32_t i = 0; i < group->stage_count; i++) + vk_shader_unref(device, group->stages[i].shader); +} + +static struct vk_rt_shader_group +vk_rt_shader_group_clone(struct vk_rt_shader_group *other) +{ + struct vk_rt_shader_group group = *other; + + for (uint32_t i = 0; i < ARRAY_SIZE(other->stages); i++) + group.stages[i] = vk_rt_stage_ref(&other->stages[i]); + + return group; +} + +static void +vk_rt_pipeline_destroy(struct vk_device *device, + struct vk_pipeline *pipeline, + const VkAllocationCallbacks *pAllocator) +{ + struct vk_rt_pipeline *rt_pipeline = + container_of(pipeline, struct vk_rt_pipeline, base); + + for (uint32_t i = 0; i < rt_pipeline->group_count; i++) + vk_rt_shader_group_destroy(device, &rt_pipeline->groups[i]); + for (uint32_t i = 0; i < rt_pipeline->stage_count; i++) + vk_shader_unref(device, rt_pipeline->stages[i].shader); + vk_pipeline_free(device, pAllocator, pipeline); +} + +static void +vk_rt_pipeline_cmd_bind(struct vk_command_buffer *cmd_buffer, + struct vk_pipeline *pipeline) +{ + if (pipeline != NULL) { + struct vk_device *device = cmd_buffer->base.device; + const struct vk_device_shader_ops *ops = device->shader_ops; + + struct vk_rt_pipeline *rt_pipeline = + container_of(pipeline, struct vk_rt_pipeline, base); + + ops->cmd_set_rt_state(cmd_buffer, + rt_pipeline->scratch_size, + rt_pipeline->ray_queries); + + if (rt_pipeline->stack_size > 0) + ops->cmd_set_stack_size(cmd_buffer, rt_pipeline->stack_size); + + assert(pipeline->bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR); + cmd_buffer->pipeline_shader_stages |= pipeline->stages; + } else { + cmd_buffer->pipeline_shader_stages &= ~(VK_SHADER_STAGE_RAYGEN_BIT_KHR | + VK_SHADER_STAGE_ANY_HIT_BIT_KHR | + VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | + VK_SHADER_STAGE_MISS_BIT_KHR | + VK_SHADER_STAGE_INTERSECTION_BIT_KHR | + VK_SHADER_STAGE_CALLABLE_BIT_KHR); + } +} + +static uint32_t +stages_mask(uint32_t stage_count, struct vk_pipeline_stage *stages) +{ + uint32_t stage_mask = 0; + for (uint32_t i = 0; i < stage_count; i++) + stage_mask |= BITFIELD_BIT(stages[i].stage); + + return stage_mask; +} + +static void +hash_rt_parameters(struct mesa_blake3 *blake3_ctx, + VkShaderCreateFlagsEXT shader_flags, + VkPipelineCreateFlags2KHR _rt_flags, + const VkPushConstantRange *push_range, + struct vk_pipeline_layout *pipeline_layout) +{ + /* We don't want all the flags to be part of the hash (things like + * VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT in + * particular) + */ + const VkPipelineCreateFlags2KHR rt_flags = + _rt_flags & MESA_VK_PIPELINE_RAY_TRACING_FLAGS; + + _mesa_blake3_update(blake3_ctx, &shader_flags, sizeof(shader_flags)); + _mesa_blake3_update(blake3_ctx, &rt_flags, sizeof(rt_flags)); + + for (uint32_t i = 0; i < pipeline_layout->set_count; i++) { + if (pipeline_layout->set_layouts[i] != NULL) { + _mesa_blake3_update(blake3_ctx, + pipeline_layout->set_layouts[i]->blake3, + sizeof(pipeline_layout->set_layouts[i]->blake3)); + } + } + if (push_range != NULL) + _mesa_blake3_update(blake3_ctx, push_range, sizeof(*push_range)); +} + +static VkResult +vk_pipeline_compile_rt_shader(struct vk_device *device, + struct vk_pipeline_cache *cache, + VkPipelineCreateFlags2KHR pipeline_flags, + struct vk_pipeline_layout *pipeline_layout, + struct vk_pipeline_stage *stage, + VkPipelineCreationFeedback *stage_feedback) +{ + const struct vk_device_shader_ops *ops = device->shader_ops; + + int64_t stage_start = os_time_get_nano(); + + const VkPushConstantRange *push_range = NULL; + if (pipeline_layout != NULL) { + for (uint32_t r = 0; r < pipeline_layout->push_range_count; r++) { + if (pipeline_layout->push_ranges[r].stageFlags & + mesa_to_vk_shader_stage(stage->stage)) { + assert(push_range == NULL); + push_range = &pipeline_layout->push_ranges[r]; + } + } + } + + struct mesa_blake3 blake3_ctx; + _mesa_blake3_init(&blake3_ctx); + + VkShaderCreateFlagsEXT shader_flags = + vk_pipeline_to_shader_flags(pipeline_flags, stage->stage); + + hash_rt_parameters(&blake3_ctx, shader_flags, + pipeline_flags, + push_range, pipeline_layout); + + _mesa_blake3_update(&blake3_ctx, stage->precomp->blake3, + sizeof(stage->precomp->blake3)); + + struct vk_shader_pipeline_cache_key shader_key = { + .stage = stage->stage, + }; + _mesa_blake3_final(&blake3_ctx, shader_key.blake3); + + if (cache != NULL) { + bool cache_hit = false; + struct vk_pipeline_cache_object *cache_obj = + vk_pipeline_cache_lookup_object(cache, &shader_key, + sizeof(shader_key), + &pipeline_shader_cache_ops, + &cache_hit); + if (cache_obj != NULL) { + stage->shader = vk_shader_from_cache_obj(cache_obj); + + if (stage_feedback != NULL) { + const int64_t stage_end = os_time_get_nano(); + stage_feedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; + if (cache_hit) { + stage_feedback->flags |= + VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; + } + stage_feedback->duration = stage_end - stage_start; + } + + return VK_SUCCESS; + } + } + + if (pipeline_flags & + VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR) + return VK_PIPELINE_COMPILE_REQUIRED; + + const struct nir_shader_compiler_options *nir_options = + ops->get_nir_options(device->physical, stage->stage, &stage->precomp->rs); + nir_shader *nir = vk_pipeline_precomp_shader_get_nir(stage->precomp, + nir_options); + if (nir == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + /* vk_device_shader_ops::compile() consumes the NIR regardless of + * whether or not it succeeds and only generates shaders on success. + * Once compile() returns, we own the shaders but not the NIR in + * infos. + */ + struct vk_shader_compile_info compile_info = { + .stage = stage->stage, + .flags = shader_flags, + .rt_flags = pipeline_flags & MESA_VK_PIPELINE_RAY_TRACING_FLAGS, + .next_stage_mask = 0, + .nir = nir, + .robustness = &stage->precomp->rs, + .set_layout_count = pipeline_layout->set_count, + .set_layouts = pipeline_layout->set_layouts, + .push_constant_range_count = push_range != NULL, + .push_constant_ranges = push_range != NULL ? push_range : NULL, + }; + + struct vk_shader *shader; + VkResult result = ops->compile(device, 1, &compile_info, + NULL, &device->enabled_features, + &device->alloc, &shader); + if (result != VK_SUCCESS) + return result; + + vk_shader_init_cache_obj(device, shader, &shader_key, sizeof(shader_key)); + + struct vk_pipeline_cache_object *cache_obj = &shader->pipeline.cache_obj; + if (cache != NULL) + cache_obj = vk_pipeline_cache_add_object(cache, cache_obj); + + stage->shader = vk_shader_from_cache_obj(cache_obj); + + if (stage_feedback != NULL) { + const int64_t stage_end = os_time_get_nano(); + stage_feedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; + stage_feedback->duration = stage_end - stage_start; + } + + return VK_SUCCESS; +} + +static VkResult +vk_pipeline_compile_rt_shader_group(struct vk_device *device, + struct vk_pipeline_cache *cache, + VkPipelineCreateFlags2KHR pipeline_flags, + struct vk_pipeline_layout *pipeline_layout, + uint32_t stage_count, + struct vk_pipeline_stage *stages, + bool *all_cache_hit) +{ + const struct vk_device_shader_ops *ops = device->shader_ops; + + assert(stage_count > 1 && stage_count <= 3); + + *all_cache_hit = true; + + struct vk_shader_pipeline_cache_key shader_keys[3]; + bool found_all_shaders = true; + for (uint32_t i = 0; i < stage_count; i++) { + struct mesa_blake3 blake3_ctx; + _mesa_blake3_init(&blake3_ctx); + + const VkPushConstantRange *push_range = NULL; + if (pipeline_layout != NULL) { + for (uint32_t r = 0; r < pipeline_layout->push_range_count; r++) { + if (pipeline_layout->push_ranges[r].stageFlags & + mesa_to_vk_shader_stage(stages[i].stage)) { + assert(push_range == NULL); + push_range = &pipeline_layout->push_ranges[r]; + } + } + } + + VkShaderCreateFlagsEXT shader_flags = + vk_pipeline_to_shader_flags(pipeline_flags, stages[i].stage); + + hash_rt_parameters(&blake3_ctx, shader_flags, pipeline_flags, + push_range, pipeline_layout); + + for (uint32_t j = 0; j < stage_count; j++) { + _mesa_blake3_update(&blake3_ctx, stages[j].precomp->blake3, + sizeof(stages[j].precomp->blake3)); + } + + shader_keys[i] = (struct vk_shader_pipeline_cache_key) { + .stage = stages[i].stage, + }; + _mesa_blake3_final(&blake3_ctx, shader_keys[i].blake3); + + bool cache_hit = false; + if (cache != NULL) { + struct vk_pipeline_cache_object *cache_obj = + vk_pipeline_cache_lookup_object(cache, &shader_keys[i], + sizeof(shader_keys[i]), + &pipeline_shader_cache_ops, + &cache_hit); + if (cache_obj != NULL) { + stages[i].shader = vk_shader_from_cache_obj(cache_obj); + continue; + } + } + found_all_shaders = false; + *all_cache_hit &= cache_hit; + } + + if (found_all_shaders) + return VK_SUCCESS; + + if (pipeline_flags & + VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR) + return VK_PIPELINE_COMPILE_REQUIRED; + + struct vk_shader_compile_info compile_info[3] = { 0 }; + for (uint32_t i = 0; i < stage_count; i++) { + if (stages[i].shader) { + vk_shader_unref(device, stages[i].shader); + stages[i].shader = NULL; + } + + const VkPushConstantRange *push_range = NULL; + if (pipeline_layout != NULL) { + for (uint32_t r = 0; r < pipeline_layout->push_range_count; r++) { + if (pipeline_layout->push_ranges[r].stageFlags & + mesa_to_vk_shader_stage(stages[i].stage)) { + assert(push_range == NULL); + push_range = &pipeline_layout->push_ranges[r]; + } + } + } + + const struct nir_shader_compiler_options *nir_options = + ops->get_nir_options(device->physical, stages[i].stage, + &stages[i].precomp->rs); + + compile_info[i] = (struct vk_shader_compile_info) { + .stage = stages[i].stage, + .flags = vk_pipeline_to_shader_flags(pipeline_flags, + stages[i].stage), + .rt_flags = pipeline_flags & MESA_VK_PIPELINE_RAY_TRACING_FLAGS, + .next_stage_mask = 0, + .nir = vk_pipeline_precomp_shader_get_nir(stages[i].precomp, + nir_options), + .robustness = &stages[i].precomp->rs, + .set_layout_count = pipeline_layout->set_count, + .set_layouts = pipeline_layout->set_layouts, + .push_constant_range_count = push_range != NULL, + .push_constant_ranges = push_range != NULL ? push_range : NULL, + }; + + if (compile_info[i].nir == NULL) { + for (uint32_t j = 0; j < i; j++) + ralloc_free(compile_info[i].nir); + + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + } + + struct vk_shader *shaders[3]; + VkResult result = ops->compile(device, stage_count, compile_info, + NULL, &device->enabled_features, + &device->alloc, shaders); + if (result != VK_SUCCESS) + return result; + + for (uint32_t i = 0; i < stage_count; i++) { + vk_shader_init_cache_obj(device, shaders[i], + &shader_keys[i], sizeof(shader_keys[i])); + + struct vk_pipeline_cache_object *cache_obj = + &shaders[i]->pipeline.cache_obj; + if (cache != NULL) + cache_obj = vk_pipeline_cache_add_object(cache, cache_obj); + + stages[i].shader = vk_shader_from_cache_obj(cache_obj); + } + + return VK_SUCCESS; +} + +static VkResult +vk_rt_pipeline_get_executable_properties( + struct vk_device *device, + struct vk_pipeline *pipeline, + uint32_t *executable_count, + VkPipelineExecutablePropertiesKHR *properties) +{ + struct vk_rt_pipeline *rt_pipeline = + container_of(pipeline, struct vk_rt_pipeline, base); + VkResult result; + + if (properties == NULL) { + *executable_count = 0; + for (uint32_t i = 0; i < rt_pipeline->stage_count; i++) { + struct vk_shader *shader = rt_pipeline->stages[i].shader; + + uint32_t shader_exec_count = 0; + result = shader->ops->get_executable_properties(device, shader, + &shader_exec_count, + NULL); + assert(shader_exec_count == 1); + assert(result == VK_SUCCESS); + *executable_count += shader_exec_count; + } + } else { + uint32_t arr_len = *executable_count; + *executable_count = 0; + for (uint32_t i = 0; i < rt_pipeline->stage_count; i++) { + struct vk_shader *shader = rt_pipeline->stages[i].shader; + + uint32_t shader_exec_count = arr_len - *executable_count; + result = shader->ops->get_executable_properties(device, shader, + &shader_exec_count, + &properties[*executable_count]); + if (result != VK_SUCCESS) + return result; + + assert(shader_exec_count == 1); + *executable_count += shader_exec_count; + } + } + + return VK_SUCCESS; +} + +static VkResult +vk_rt_pipeline_get_executable_statistics( + struct vk_device *device, + struct vk_pipeline *pipeline, + uint32_t executable_index, + uint32_t *statistic_count, + VkPipelineExecutableStatisticKHR *statistics) +{ + struct vk_rt_pipeline *rt_pipeline = + container_of(pipeline, struct vk_rt_pipeline, base); + assert(executable_index < rt_pipeline->stage_count); + struct vk_shader *shader = rt_pipeline->stages[executable_index].shader; + + return shader->ops->get_executable_statistics(device, shader, 0, + statistic_count, + statistics); +} + +static VkResult +vk_rt_pipeline_get_internal_representations( + struct vk_device *device, + struct vk_pipeline *pipeline, + uint32_t executable_index, + uint32_t *internal_representation_count, + VkPipelineExecutableInternalRepresentationKHR* internal_representations) +{ + struct vk_rt_pipeline *rt_pipeline = + container_of(pipeline, struct vk_rt_pipeline, base); + assert(executable_index < rt_pipeline->stage_count); + struct vk_shader *shader = rt_pipeline->stages[executable_index].shader; + + return shader->ops->get_executable_internal_representations( + device, shader, 0, + internal_representation_count, internal_representations); +} + +static struct vk_shader * +vk_rt_pipeline_get_shader(struct vk_pipeline *pipeline, + mesa_shader_stage stage) +{ + UNREACHABLE("Invalid operation"); +} + +static const struct vk_pipeline_ops vk_rt_pipeline_ops = { + .destroy = vk_rt_pipeline_destroy, + .get_executable_statistics = vk_rt_pipeline_get_executable_statistics, + .get_executable_properties = vk_rt_pipeline_get_executable_properties, + .get_internal_representations = vk_rt_pipeline_get_internal_representations, + .cmd_bind = vk_rt_pipeline_cmd_bind, + .get_shader = vk_rt_pipeline_get_shader, +}; + +static bool +is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info) +{ + if (info->pDynamicState == NULL) + return false; + + for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) { + if (info->pDynamicState->pDynamicStates[i] == + VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR) + return true; + } + + return false; +} + +static int +cmp_vk_rt_pipeline_stages(const void *_a, const void *_b) +{ + const struct vk_rt_stage *a = _a, *b = _b; + return vk_shader_cmp_rt_stages(a->shader->stage, b->shader->stage); +} + +static VkResult +vk_create_rt_pipeline(struct vk_device *device, + struct vk_pipeline_cache *cache, + const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipeline) +{ + const struct vk_device_shader_ops *ops = device->shader_ops; + + VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout); + int64_t pipeline_start = os_time_get_nano(); + VkResult result; + + const VkPipelineCreateFlags2KHR pipeline_flags = + vk_rt_pipeline_create_flags(pCreateInfo); + + const VkPipelineCreationFeedbackCreateInfo *feedback_info = + vk_find_struct_const(pCreateInfo->pNext, + PIPELINE_CREATION_FEEDBACK_CREATE_INFO); + + struct vk_pipeline_stage *stages = NULL; + if (pCreateInfo->stageCount > 0) { + stages = vk_zalloc2(&device->alloc, pAllocator, + pCreateInfo->stageCount * sizeof(*stages), 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (stages == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + uint32_t libraries_stage_count = 0; + uint32_t libraries_group_count = 0; + const VkPipelineLibraryCreateInfoKHR *libs_info = pCreateInfo->pLibraryInfo; + if (libs_info != NULL) { + for (uint32_t i = 0; i < libs_info->libraryCount; i++) { + VK_FROM_HANDLE(vk_pipeline, lib_pipeline, libs_info->pLibraries[i]); + assert(lib_pipeline->bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR); + assert(lib_pipeline->flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR); + struct vk_rt_pipeline *lib_rt_pipeline = + container_of(lib_pipeline, struct vk_rt_pipeline, base); + + libraries_stage_count += lib_rt_pipeline->stage_count; + libraries_group_count += lib_rt_pipeline->group_count; + } + } + + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct vk_rt_pipeline, _pipeline, 1); + VK_MULTIALLOC_DECL(&ma, struct vk_rt_stage, pipeline_stages, + libraries_stage_count + pCreateInfo->stageCount); + VK_MULTIALLOC_DECL(&ma, struct vk_rt_shader_group, pipeline_groups, + libraries_group_count + pCreateInfo->groupCount); + + struct vk_rt_pipeline *pipeline = + vk_pipeline_multizalloc(device, &ma, &vk_rt_pipeline_ops, + VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, + pipeline_flags, pAllocator); + if (pipeline == NULL) { + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_stages; + } + + pipeline->stages = pipeline_stages; + pipeline->groups = pipeline_groups; + + bool all_cache_hit = true; + + uint32_t stack_max[MESA_SHADER_KERNEL] = { 0 }; + + /* Load/Compile individual shaders */ + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + const VkPipelineShaderStageCreateInfo *stage_info = + &pCreateInfo->pStages[i]; + + pipeline->base.stages |= pCreateInfo->pStages[i].stage; + stages[i] = (struct vk_pipeline_stage) { + .stage = vk_to_mesa_shader_stage(stage_info->stage), + }; + + result = vk_pipeline_precompile_shader(device, cache, pipeline_flags, + pCreateInfo->pNext, + stage_info, + &stages[i].precomp); + if (result != VK_SUCCESS) + goto fail_stages_compile; + + VkPipelineCreationFeedback feedback = { 0 }; + result = vk_pipeline_compile_rt_shader(device, cache, + pipeline_flags, + pipeline_layout, + &stages[i], + &feedback); + + if ((feedback.flags & + VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT) == 0 && + (pipeline->base.flags & + VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)) { + result = VK_PIPELINE_COMPILE_REQUIRED; + goto fail_stages_compile; + } + + if (result != VK_SUCCESS) + goto fail_stages_compile; + + + /* No need to take a reference, either the pipeline creation succeeds + * and the ownership is transfered from from stages[] to the pipeline or + * it fails and all stages[] elements are unref. + */ + pipeline->stages[pipeline->stage_count++] = (struct vk_rt_stage) { + .shader = stages[i].shader, + }; + + stack_max[stages[i].stage] = MAX2(stages[i].shader->stack_size, + stack_max[stages[i].stage]); + pipeline->scratch_size = MAX2(stages[i].shader->scratch_size, + pipeline->scratch_size); + pipeline->ray_queries = MAX2(stages[i].shader->ray_queries, + pipeline->ray_queries); + + if (feedback_info && + feedback_info->pipelineStageCreationFeedbackCount > 0) { + feedback_info->pPipelineStageCreationFeedbacks[i] = feedback; + all_cache_hit &= !!(feedback.flags & + VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT); + } + } + + /* Create/Compile groups */ + for (uint32_t i = 0; i < pCreateInfo->groupCount; i++) { + const VkRayTracingShaderGroupCreateInfoKHR *group_info = + &pCreateInfo->pGroups[i]; + struct vk_rt_shader_group *group = &pipeline->groups[i]; + + group->type = group_info->type; + + struct vk_pipeline_stage group_stages[3]; + uint32_t group_stage_count = 0; + switch (group_info->type) { + case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: + assert(group_info->generalShader < pCreateInfo->stageCount); + group_stages[group_stage_count++] = stages[group_info->generalShader]; + break; + + case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: + if (group_info->anyHitShader < pCreateInfo->stageCount) + group_stages[group_stage_count++] = stages[group_info->anyHitShader]; + if (group_info->closestHitShader < pCreateInfo->stageCount) + group_stages[group_stage_count++] = stages[group_info->closestHitShader]; + break; + + case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: + if (group_info->closestHitShader < pCreateInfo->stageCount) + group_stages[group_stage_count++] = stages[group_info->closestHitShader]; + if (group_info->anyHitShader < pCreateInfo->stageCount) + group_stages[group_stage_count++] = stages[group_info->anyHitShader]; + assert(group_info->intersectionShader < pCreateInfo->stageCount); + group_stages[group_stage_count++] = stages[group_info->intersectionShader]; + break; + + default: + UNREACHABLE("Invalid shader group"); + } + assert(group_stage_count <= ARRAY_SIZE(group_stages)); + + VkShaderStageFlags group_stages_flags = 0; + for (uint32_t s = 0; s < group_stage_count; s++) + group_stages_flags |= mesa_to_vk_shader_stage(group_stages[s].stage); + + VkShaderStageFlags group_linked_stages = ops->get_rt_group_linking != NULL ? + ops->get_rt_group_linking(device->physical, group_stages_flags) : 0; + + struct vk_pipeline_stage linked_stages[3]; + uint32_t linked_stage_count = 0; + if (group_linked_stages) { + assert(util_bitcount(group_linked_stages) > 1); + + /* Build of list of shader to link */ + for (uint32_t s = 0; s < group_stage_count; s++) { + if (mesa_to_vk_shader_stage(group_stages[s].stage) & + group_linked_stages) { + linked_stages[linked_stage_count] = group_stages[s]; + linked_stages[linked_stage_count].shader = NULL; + linked_stage_count++; + } + } + } + + if (linked_stage_count > 0) { + assert(linked_stage_count > 1); + + bool cache_hit; + result = vk_pipeline_compile_rt_shader_group(device, cache, pipeline_flags, + pipeline_layout, + linked_stage_count, + linked_stages, + &cache_hit); + if (result != VK_SUCCESS) + goto fail_stages_compile; + + all_cache_hit &= cache_hit; + } + + for (uint32_t s = 0; s < linked_stage_count; s++) { + group->stages[group->stage_count++] = (struct vk_rt_stage) { + .shader = linked_stages[s].shader, + .linked = true, + }; + } + for (uint32_t s = 0; s < group_stage_count; s++) { + if (mesa_to_vk_shader_stage( + group_stages[s].stage) & group_linked_stages) + continue; + + group->stages[group->stage_count++] = (struct vk_rt_stage) { + .shader = vk_shader_ref(group_stages[s].shader), + }; + } + assert(group->stage_count > 0); + + qsort(group->stages, group->stage_count, sizeof(*group->stages), + cmp_vk_rt_pipeline_stages); + + for (uint32_t s = 0; s < group->stage_count; s++) { + pipeline->ray_queries = + MAX2(group->stages[s].shader->ray_queries, pipeline->ray_queries); + pipeline->scratch_size = + MAX2(group->stages[s].shader->scratch_size, pipeline->scratch_size); + } + + pipeline->group_count++; + } + + /* Throw away precompiled shaders, unlike GPL, we never do linking with + * shaders coming from libraries. + */ + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) + vk_pipeline_precomp_shader_unref(device, stages[i].precomp); + + /* Import libraries */ + if (libs_info) { + for (uint32_t i = 0; i < libs_info->libraryCount; i++) { + VK_FROM_HANDLE(vk_pipeline, lib_pipeline, libs_info->pLibraries[i]); + struct vk_rt_pipeline *lib_rt_pipeline = + container_of(lib_pipeline, struct vk_rt_pipeline, base); + + /* Import library shaders */ + for (uint32_t s = 0; s < lib_rt_pipeline->stage_count; s++) { + pipeline->stages[pipeline->stage_count++] = + vk_rt_stage_ref(&lib_rt_pipeline->stages[s]); + } + + /* Import library groups */ + for (uint32_t g = 0; g < lib_rt_pipeline->group_count; g++) { + pipeline->groups[pipeline->group_count++] = + vk_rt_shader_group_clone(&lib_rt_pipeline->groups[g]); + } + } + } + + /* Compute final stats */ + for (uint32_t i = 0; i < pipeline->stage_count; i++) { + struct vk_shader *shader = pipeline->stages[i].shader; + + stack_max[shader->stage] = + MAX2(shader->stack_size, stack_max[shader->stage]); + + pipeline->base.stages |= mesa_to_vk_shader_stage(shader->stage); + pipeline->scratch_size = MAX2(shader->scratch_size, pipeline->scratch_size); + pipeline->ray_queries = MAX2(shader->ray_queries, pipeline->ray_queries); + pipeline->stack_size = MAX2(shader->stack_size, pipeline->stack_size); + } + for (uint32_t g = 0; g < pipeline->group_count; g++) { + const struct vk_rt_shader_group *group = &pipeline->groups[g]; + for (uint32_t s = 0; s < group->stage_count; s++) { + struct vk_shader *shader = group->stages[s].shader; + + stack_max[shader->stage] = + MAX2(shader->stack_size, stack_max[shader->stage]); + + pipeline->base.stages |= + mesa_to_vk_shader_stage(group->stages[s].shader->stage); + pipeline->scratch_size = + MAX2(shader->scratch_size, pipeline->scratch_size); + pipeline->ray_queries = MAX2(shader->ray_queries, pipeline->ray_queries); + pipeline->stack_size = MAX2(shader->stack_size, pipeline->stack_size); + } + } + + if (is_rt_stack_size_dynamic(pCreateInfo)) { + pipeline->stack_size = 0; /* 0 means dynamic */ + } else { + /* From the Vulkan spec: + * + * "If the stack size is not set explicitly, the stack size for a + * pipeline is: + * + * rayGenStackMax + + * min(1, maxPipelineRayRecursionDepth) × + * max(closestHitStackMax, missStackMax, + * intersectionStackMax + anyHitStackMax) + + * max(0, maxPipelineRayRecursionDepth-1) × + * max(closestHitStackMax, missStackMax) + + * 2 × callableStackMax" + */ + pipeline->stack_size = MAX2( + pipeline->stack_size, + stack_max[MESA_SHADER_RAYGEN] + + MIN2(1, pCreateInfo->maxPipelineRayRecursionDepth) * + MAX4(stack_max[MESA_SHADER_CLOSEST_HIT], + stack_max[MESA_SHADER_MISS], + stack_max[MESA_SHADER_INTERSECTION], + stack_max[MESA_SHADER_ANY_HIT]) + + MAX2(0, (int)pCreateInfo->maxPipelineRayRecursionDepth - 1) * + MAX2(stack_max[MESA_SHADER_CLOSEST_HIT], + stack_max[MESA_SHADER_MISS]) + + 2 * stack_max[MESA_SHADER_CALLABLE]); + + /* This is an extremely unlikely case but we need to set it to some + * non-zero value so that we don't accidentally think it's dynamic. + */ + if (pipeline->stack_size == 0) + pipeline->stack_size = 1; + } + + const int64_t pipeline_end = os_time_get_nano(); + if (feedback_info != NULL) { + VkPipelineCreationFeedback pipeline_feedback = { + .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, + .duration = pipeline_end - pipeline_start, + }; + if (all_cache_hit && cache != device->mem_cache) { + pipeline_feedback.flags |= + VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; + } + + *feedback_info->pPipelineCreationFeedback = pipeline_feedback; + } + + *pPipeline = vk_pipeline_to_handle(&pipeline->base); + + return VK_SUCCESS; + + fail_stages_compile: + for (uint32_t i = 0; i < pipeline->group_count; i++) + vk_rt_shader_group_destroy(device, &pipeline->groups[i]); + for (uint32_t i = 0; i < pipeline->stage_count; i++) + vk_shader_unref(device, pipeline->stages[i].shader); + for (uint32_t i = 0; i < pCreateInfo->stageCount && stages[i].precomp != NULL; i++) + vk_pipeline_precomp_shader_unref(device, stages[i].precomp); + vk_pipeline_free(device, pAllocator, &pipeline->base); + fail_stages: + vk_free(&device->alloc, stages); + + return result; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateRayTracingPipelinesKHR( + VkDevice _device, + VkDeferredOperationKHR deferredOperation, + VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkRayTracingPipelineCreateInfoKHR* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache); + VkResult first_error_or_success = VK_SUCCESS; + + /* Use implicit pipeline cache if there's no cache set */ + if (!cache && device->mem_cache) + cache = device->mem_cache; + + /* From the Vulkan 1.3.274 spec: + * + * "When attempting to create many pipelines in a single command, it is + * possible that creation may fail for a subset of them. In this case, + * the corresponding elements of pPipelines will be set to + * VK_NULL_HANDLE. + */ + memset(pPipelines, 0, createInfoCount * sizeof(*pPipelines)); + + unsigned i = 0; + for (; i < createInfoCount; i++) { + VkResult result = vk_create_rt_pipeline(device, cache, + &pCreateInfos[i], + pAllocator, + &pPipelines[i]); + if (result == VK_SUCCESS) + continue; + + if (first_error_or_success == VK_SUCCESS) + first_error_or_success = result; + + /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it + * is not obvious what error should be report upon 2 different failures. + */ + if (result != VK_PIPELINE_COMPILE_REQUIRED) + return result; + + const VkPipelineCreateFlags2KHR flags = + vk_rt_pipeline_create_flags(&pCreateInfos[i]); + if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR) + return result; + } + + return first_error_or_success; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetRayTracingShaderGroupHandlesKHR( + VkDevice _device, + VkPipeline _pipeline, + uint32_t firstGroup, + uint32_t groupCount, + size_t dataSize, + void* pData) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_pipeline, pipeline, _pipeline); + const struct vk_device_shader_ops *ops = device->shader_ops; + + assert(pipeline->bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR); + + struct vk_rt_pipeline *rt_pipeline = + container_of(pipeline, struct vk_rt_pipeline, base); + + assert(dataSize >= device->physical->properties.shaderGroupHandleSize * groupCount); + assert(firstGroup + groupCount <= rt_pipeline->group_count); + + for (uint32_t i = 0; i < groupCount; i++) { + struct vk_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i]; + struct vk_shader *shaders[3]; + for (uint32_t s = 0; s < group->stage_count; s++) + shaders[s] = group->stages[s].shader; + + ops->write_rt_shader_group(device, group->type, + (const struct vk_shader **)shaders, + group->stage_count, pData); + + pData = (uint8_t *)pData + device->physical->properties.shaderGroupHandleSize; + } + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_GetRayTracingCaptureReplayShaderGroupHandlesKHR( + VkDevice _device, + VkPipeline _pipeline, + uint32_t firstGroup, + uint32_t groupCount, + size_t dataSize, + void* pData) +{ + VK_FROM_HANDLE(vk_device, device, _device); + VK_FROM_HANDLE(vk_pipeline, pipeline, _pipeline); + const struct vk_device_shader_ops *ops = device->shader_ops; + + assert(pipeline->bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR); + + struct vk_rt_pipeline *rt_pipeline = + container_of(pipeline, struct vk_rt_pipeline, base); + + assert(dataSize >= device->physical->properties.shaderGroupHandleSize * groupCount); + assert(firstGroup + groupCount <= rt_pipeline->group_count); + + for (uint32_t i = 0; i < groupCount; i++) { + struct vk_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i]; + struct vk_shader *shaders[3] = { 0 }; + for (uint32_t s = 0; s < group->stage_count; s++) + shaders[s] = group->stages[s].shader; + + ops->write_rt_shader_group_replay_handle(device, + (const struct vk_shader **)shaders, + group->stage_count, pData); + + pData = (uint8_t *)pData + device->physical->properties.shaderGroupHandleCaptureReplaySize; + } + + return VK_SUCCESS; +} + +VKAPI_ATTR VkDeviceSize VKAPI_CALL +vk_common_GetRayTracingShaderGroupStackSizeKHR( + VkDevice device, + VkPipeline _pipeline, + uint32_t _group, + VkShaderGroupShaderKHR groupShader) +{ + VK_FROM_HANDLE(vk_pipeline, pipeline, _pipeline); + assert(pipeline->bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR); + + struct vk_rt_pipeline *rt_pipeline = + container_of(pipeline, struct vk_rt_pipeline, base); + + assert(_group < rt_pipeline->group_count); + + struct vk_rt_shader_group *group = &rt_pipeline->groups[_group]; + + struct vk_shader *shader = NULL; + for (uint32_t i = 0; i < group->stage_count; i++) { + switch (groupShader) { + case VK_SHADER_GROUP_SHADER_GENERAL_KHR: + shader = (group->stages[i].shader->stage == MESA_SHADER_RAYGEN || + group->stages[i].shader->stage == MESA_SHADER_CALLABLE || + group->stages[i].shader->stage == MESA_SHADER_MISS) ? + group->stages[i].shader : NULL; + break; + + case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR: + shader = group->stages[i].shader->stage == MESA_SHADER_CLOSEST_HIT ? + group->stages[i].shader : NULL; + break; + + case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR: + shader = group->stages[i].shader->stage == MESA_SHADER_ANY_HIT ? + group->stages[i].shader : NULL; + break; + + case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR: + shader = group->stages[i].shader->stage == MESA_SHADER_INTERSECTION ? + group->stages[i].shader : NULL; + break; + + default: + UNREACHABLE("Invalid VkShaderGroupShader enum"); + } + + if (shader != NULL) + break; + } + + return shader ? shader->stack_size : 0; +} + +VKAPI_ATTR void VKAPI_CALL +vk_common_CmdSetRayTracingPipelineStackSizeKHR( + VkCommandBuffer commandBuffer, + uint32_t pipelineStackSize) +{ + VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); + struct vk_device *device = cmd_buffer->base.device; + const struct vk_device_shader_ops *ops = device->shader_ops; + + ops->cmd_set_stack_size(cmd_buffer, pipelineStackSize); +} diff --git a/src/vulkan/runtime/vk_shader.c b/src/vulkan/runtime/vk_shader.c index 2045bb9e013..0078693791a 100644 --- a/src/vulkan/runtime/vk_shader.c +++ b/src/vulkan/runtime/vk_shader.c @@ -121,6 +121,24 @@ vk_shader_cmp_graphics_stages(mesa_shader_stage a, mesa_shader_stage b) return stage_order[a] - stage_order[b]; } +int +vk_shader_cmp_rt_stages(mesa_shader_stage a, mesa_shader_stage b) +{ + static const int stage_order[MESA_SHADER_CALLABLE + 1] = { + [MESA_SHADER_RAYGEN] = 1, + [MESA_SHADER_ANY_HIT] = 2, + [MESA_SHADER_CLOSEST_HIT] = 3, + [MESA_SHADER_MISS] = 4, + [MESA_SHADER_INTERSECTION] = 5, + [MESA_SHADER_CALLABLE] = 6, + }; + + assert(a < ARRAY_SIZE(stage_order) && stage_order[a] > 0); + assert(b < ARRAY_SIZE(stage_order) && stage_order[b] > 0); + + return stage_order[a] - stage_order[b]; +} + struct stage_idx { mesa_shader_stage stage; uint32_t idx; diff --git a/src/vulkan/runtime/vk_shader.h b/src/vulkan/runtime/vk_shader.h index b56398d0fb1..93ef35d8c63 100644 --- a/src/vulkan/runtime/vk_shader.h +++ b/src/vulkan/runtime/vk_shader.h @@ -47,13 +47,45 @@ struct vk_pipeline; struct vk_pipeline_robustness_state; int vk_shader_cmp_graphics_stages(mesa_shader_stage a, mesa_shader_stage b); +int vk_shader_cmp_rt_stages(mesa_shader_stage a, mesa_shader_stage b); #define VK_SHADER_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_MESA 0x1000 #define VK_SHADER_CREATE_UNALIGNED_DISPATCH_BIT_MESA 0x2000 +#define MESA_VK_PIPELINE_RAY_TRACING_FLAGS ( \ + VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_BUILT_IN_PRIMITIVES_BIT_KHR | \ + VK_PIPELINE_CREATE_2_RAY_TRACING_ALLOW_SPHERES_AND_LINEAR_SWEPT_SPHERES_BIT_NV | \ + VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR | \ + VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_AABBS_BIT_KHR | \ + VK_PIPELINE_CREATE_2_RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR | \ + VK_PIPELINE_CREATE_2_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR | \ + VK_PIPELINE_CREATE_2_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR | \ + VK_PIPELINE_CREATE_2_RAY_TRACING_NO_NULL_INTERSECTION_SHADERS_BIT_KHR | \ + VK_PIPELINE_CREATE_2_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR | \ + VK_PIPELINE_CREATE_2_RAY_TRACING_ALLOW_MOTION_BIT_NV | \ + VK_PIPELINE_CREATE_2_RAY_TRACING_OPACITY_MICROMAP_BIT_EXT | \ + VK_PIPELINE_CREATE_2_RAY_TRACING_DISPLACEMENT_MICROMAP_BIT_NV | \ + VK_PIPELINE_CREATE_2_DISALLOW_OPACITY_MICROMAP_BIT_ARM) + struct vk_shader_compile_info { mesa_shader_stage stage; VkShaderCreateFlagsEXT flags; + /* RT flags only includes : + * - VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_BUILT_IN_PRIMITIVES_BIT_KHR + * - VK_PIPELINE_CREATE_2_RAY_TRACING_ALLOW_SPHERES_AND_LINEAR_SWEPT_SPHERES_BIT_NV + * - VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR + * - VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_AABBS_BIT_KHR + * - VK_PIPELINE_CREATE_2_RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR + * - VK_PIPELINE_CREATE_2_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR + * - VK_PIPELINE_CREATE_2_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR + * - VK_PIPELINE_CREATE_2_RAY_TRACING_NO_NULL_INTERSECTION_SHADERS_BIT_KHR + * - VK_PIPELINE_CREATE_2_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR + * - VK_PIPELINE_CREATE_2_RAY_TRACING_ALLOW_MOTION_BIT_NV + * - VK_PIPELINE_CREATE_2_RAY_TRACING_OPACITY_MICROMAP_BIT_EXT + * - VK_PIPELINE_CREATE_2_RAY_TRACING_DISPLACEMENT_MICROMAP_BIT_NV + * - VK_PIPELINE_CREATE_2_DISALLOW_OPACITY_MICROMAP_BIT_ARM + */ + VkPipelineCreateFlags2KHR rt_flags; VkShaderStageFlags next_stage_mask; struct nir_shader *nir; @@ -219,6 +251,16 @@ struct vk_device_shader_ops { nir_shader *nir, const struct vk_pipeline_robustness_state *rs); + /** Return shader stages that should be linked together in a group + * + * The driver should return 0 if it does not require any linking, otherwise + * it should return the stages that need to be linked together. The return + * value should have more than one shader stage and be included in the + * stages given as parameter. + */ + VkShaderStageFlags (*get_rt_group_linking)(struct vk_physical_device *device, + VkShaderStageFlags stages); + /** Hash a vk_graphics_state object and a vk_features object. * * This callback hashes whatever bits of vk_graphics_pipeline_state might @@ -261,6 +303,20 @@ struct vk_device_shader_ops { const VkAllocationCallbacks* pAllocator, struct vk_shader **shader_out); + + /** Writes a HW shader record from a shader group */ + void (*write_rt_shader_group)(struct vk_device *device, + VkRayTracingShaderGroupTypeKHR type, + const struct vk_shader **shaders, + uint32_t shader_count, + void *output); + + /** Writes a group replay handle for a shader group */ + void (*write_rt_shader_group_replay_handle)(struct vk_device *device, + const struct vk_shader **shaders, + uint32_t shader_count, + void *output); + /** Bind a set of shaders * * This is roughly equivalent to vkCmdBindShadersEXT() @@ -273,6 +329,15 @@ struct vk_device_shader_ops { /** Sets dynamic state */ void (*cmd_set_dynamic_graphics_state)(struct vk_command_buffer *cmd_buffer, const struct vk_dynamic_graphics_state *state); + + /** Sets scratch size & ray query count for RT pipelines */ + void (*cmd_set_rt_state)(struct vk_command_buffer *cmd_buffer, + VkDeviceSize scratch_size, + uint32_t ray_queries); + + /** Sets stack size for RT pipelines */ + void (*cmd_set_stack_size)(struct vk_command_buffer *cmd_buffer, + VkDeviceSize stack_size); }; extern const struct vk_pipeline_robustness_state vk_robustness_disabled;