diff --git a/src/amd/vulkan/radv_pipeline_compute.c b/src/amd/vulkan/radv_pipeline_compute.c index 6662aed672d..7e1b4496e43 100644 --- a/src/amd/vulkan/radv_pipeline_compute.c +++ b/src/amd/vulkan/radv_pipeline_compute.c @@ -20,6 +20,7 @@ #include "util/u_atomic.h" #include "radv_cs.h" #include "radv_debug.h" +#include "radv_pipeline_binary.h" #include "radv_pipeline_cache.h" #include "radv_rmv.h" #include "radv_shader.h" @@ -252,6 +253,29 @@ done: return result; } +static VkResult +radv_compute_pipeline_import_binary(struct radv_device *device, struct radv_compute_pipeline *pipeline, + const VkPipelineBinaryInfoKHR *binary_info) +{ + VK_FROM_HANDLE(radv_pipeline_binary, pipeline_binary, binary_info->pPipelineBinaries[0]); + struct radv_shader *shader; + struct blob_reader blob; + + assert(binary_info->binaryCount == 1); + + blob_reader_init(&blob, pipeline_binary->data, pipeline_binary->size); + + shader = radv_shader_deserialize(device, pipeline_binary->key, sizeof(pipeline_binary->key), &blob); + if (!shader) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + pipeline->base.shaders[MESA_SHADER_COMPUTE] = shader; + + pipeline->base.pipeline_hash = *(uint64_t *)pipeline_binary->key; + + return VK_SUCCESS; +} + VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkComputePipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) @@ -274,8 +298,15 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkC const VkPipelineCreationFeedbackCreateInfo *creation_feedback = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); - result = radv_compute_pipeline_compile(pCreateInfo, pipeline, pipeline_layout, device, cache, &pCreateInfo->stage, - creation_feedback); + const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR); + + if (binary_info && binary_info->binaryCount > 0) { + result = radv_compute_pipeline_import_binary(device, pipeline, binary_info); + } else { + result = radv_compute_pipeline_compile(pCreateInfo, pipeline, pipeline_layout, device, cache, &pCreateInfo->stage, + creation_feedback); + } + if (result != VK_SUCCESS) { radv_pipeline_destroy(device, &pipeline->base, pAllocator); return result; diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index a27e7fcfbfc..e17ad9f3fb2 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -24,6 +24,7 @@ #include "radv_entrypoints.h" #include "radv_formats.h" #include "radv_physical_device.h" +#include "radv_pipeline_binary.h" #include "radv_pipeline_cache.h" #include "radv_rmv.h" #include "radv_shader.h" @@ -651,7 +652,7 @@ radv_graphics_pipeline_import_lib(const struct radv_device *device, struct radv_ pipeline->active_stages |= lib->base.active_stages; /* Import binaries when LTO is disabled and when the library doesn't retain any shaders. */ - if (radv_should_import_lib_binaries(pipeline->base.create_flags)) { + if (lib->base.has_pipeline_binaries || radv_should_import_lib_binaries(pipeline->base.create_flags)) { import_binaries = true; } @@ -2512,7 +2513,7 @@ radv_skip_graphics_pipeline_compile(const struct radv_device *device, const VkGr VkShaderStageFlags active_stages = 0; /* No compilation when pipeline binaries are imported. */ - if (binary_info) + if (binary_info && binary_info->binaryCount > 0) return true; /* Do not skip for libraries. */ @@ -3293,6 +3294,45 @@ radv_needs_null_export_workaround(const struct radv_device *device, const struct !ps->info.ps.writes_z && !ps->info.ps.writes_stencil && !ps->info.ps.writes_sample_mask; } +static VkResult +radv_graphics_pipeline_import_binaries(struct radv_device *device, struct radv_graphics_pipeline *pipeline, + const VkPipelineBinaryInfoKHR *binary_info) +{ + blake3_hash pipeline_hash; + struct mesa_blake3 ctx; + + _mesa_blake3_init(&ctx); + + for (uint32_t i = 0; i < binary_info->binaryCount; i++) { + VK_FROM_HANDLE(radv_pipeline_binary, pipeline_binary, binary_info->pPipelineBinaries[i]); + struct radv_shader *shader; + struct blob_reader blob; + + blob_reader_init(&blob, pipeline_binary->data, pipeline_binary->size); + + shader = radv_shader_deserialize(device, pipeline_binary->key, sizeof(pipeline_binary->key), &blob); + if (!shader) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + if (shader->info.stage == MESA_SHADER_VERTEX && i > 0) { + /* The GS copy-shader is a VS placed after all other stages. */ + pipeline->base.gs_copy_shader = shader; + } else { + pipeline->base.shaders[shader->info.stage] = shader; + } + + _mesa_blake3_update(&ctx, pipeline_binary->key, sizeof(pipeline_binary->key)); + } + + _mesa_blake3_final(&ctx, pipeline_hash); + + pipeline->base.pipeline_hash = *(uint64_t *)pipeline_hash; + + pipeline->has_pipeline_binaries = true; + + return VK_SUCCESS; +} + static VkResult radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv_device *device, struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, @@ -3325,14 +3365,22 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv if (result != VK_SUCCESS) return result; - if (gfx_state.compilation_required) { - result = radv_graphics_pipeline_compile(pipeline, pCreateInfo, &gfx_state, device, cache, fast_linking_enabled); - if (result != VK_SUCCESS) { - radv_graphics_pipeline_state_finish(device, &gfx_state); - return result; + const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR); + + if (binary_info && binary_info->binaryCount > 0) { + result = radv_graphics_pipeline_import_binaries(device, pipeline, binary_info); + } else { + if (gfx_state.compilation_required) { + result = + radv_graphics_pipeline_compile(pipeline, pCreateInfo, &gfx_state, device, cache, fast_linking_enabled); } } + if (result != VK_SUCCESS) { + radv_graphics_pipeline_state_finish(device, &gfx_state); + return result; + } + uint32_t vgt_gs_out_prim_type = radv_pipeline_init_vgt_gs_out(pipeline, &gfx_state.vk); radv_pipeline_init_multisample_state(device, pipeline, pCreateInfo, &gfx_state.vk); @@ -3424,7 +3472,6 @@ radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline, str struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo) { VK_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout); - struct radv_graphics_pipeline_state gfx_state; VkResult result; const VkGraphicsPipelineLibraryCreateInfoEXT *lib_info = @@ -3466,14 +3513,23 @@ radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline, str if (pipeline_layout) radv_graphics_pipeline_import_layout(&pipeline->layout, pipeline_layout); - result = radv_generate_graphics_pipeline_state(device, pCreateInfo, &gfx_state); - if (result != VK_SUCCESS) - return result; + const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR); - result = - radv_graphics_pipeline_compile(&pipeline->base, pCreateInfo, &gfx_state, device, cache, fast_linking_enabled); + if (binary_info && binary_info->binaryCount > 0) { + result = radv_graphics_pipeline_import_binaries(device, &pipeline->base, binary_info); + } else { + struct radv_graphics_pipeline_state gfx_state; + + result = radv_generate_graphics_pipeline_state(device, pCreateInfo, &gfx_state); + if (result != VK_SUCCESS) + return result; + + result = + radv_graphics_pipeline_compile(&pipeline->base, pCreateInfo, &gfx_state, device, cache, fast_linking_enabled); + + radv_graphics_pipeline_state_finish(device, &gfx_state); + } - radv_graphics_pipeline_state_finish(device, &gfx_state); return result; } diff --git a/src/amd/vulkan/radv_pipeline_graphics.h b/src/amd/vulkan/radv_pipeline_graphics.h index ba87a5513ee..e67e0f60732 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.h +++ b/src/amd/vulkan/radv_pipeline_graphics.h @@ -121,6 +121,9 @@ struct radv_graphics_pipeline { /* For relocation of shaders with RGP. */ struct radv_sqtt_shaders_reloc *sqtt_shaders_reloc; + + /* Whether the pipeline imported binaries. */ + bool has_pipeline_binaries; }; RADV_DECL_PIPELINE_DOWNCAST(graphics, RADV_PIPELINE_GRAPHICS) diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index 06102148f88..d7eb61a7339 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -6,6 +6,7 @@ #include "nir/nir.h" #include "nir/nir_builder.h" +#include "nir/nir_serialize.h" #include "vk_shader_module.h" @@ -13,6 +14,7 @@ #include "radv_debug.h" #include "radv_descriptor_set.h" #include "radv_entrypoints.h" +#include "radv_pipeline_binary.h" #include "radv_pipeline_cache.h" #include "radv_pipeline_rt.h" #include "radv_rmv.h" @@ -281,14 +283,32 @@ radv_init_rt_stage_hashes(const struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, struct radv_ray_tracing_stage *stages, const struct radv_shader_stage_key *stage_keys) { - for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) { - const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[idx]; - gl_shader_stage s = vk_to_mesa_shader_stage(sinfo->stage); - struct mesa_sha1 ctx; + const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR); + if (binary_info && binary_info->binaryCount > 0) { + for (uint32_t i = 0; i < binary_info->binaryCount; i++) { + VK_FROM_HANDLE(radv_pipeline_binary, pipeline_binary, binary_info->pPipelineBinaries[i]); + struct blob_reader blob; - _mesa_sha1_init(&ctx); - radv_pipeline_hash_shader_stage(pipeline_flags, sinfo, &stage_keys[s], &ctx); - _mesa_sha1_final(&ctx, stages[idx].sha1); + blob_reader_init(&blob, pipeline_binary->data, pipeline_binary->size); + + const struct radv_ray_tracing_binary_header *header = + (const struct radv_ray_tracing_binary_header *)blob_read_bytes(&blob, sizeof(*header)); + + if (header->is_traversal_shader) + continue; + + memcpy(stages[i].sha1, header->stage_sha1, SHA1_DIGEST_LENGTH); + } + } else { + for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) { + const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[idx]; + gl_shader_stage s = vk_to_mesa_shader_stage(sinfo->stage); + struct mesa_sha1 ctx; + + _mesa_sha1_init(&ctx); + radv_pipeline_hash_shader_stage(pipeline_flags, sinfo, &stage_keys[s], &ctx); + _mesa_sha1_final(&ctx, stages[idx].sha1); + } } } @@ -984,6 +1004,67 @@ fail: return result; } +static VkResult +radv_ray_tracing_pipeline_import_binary(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, + const VkPipelineBinaryInfoKHR *binary_info) +{ + blake3_hash pipeline_hash; + struct mesa_blake3 ctx; + + _mesa_blake3_init(&ctx); + + for (uint32_t i = 0; i < binary_info->binaryCount; i++) { + VK_FROM_HANDLE(radv_pipeline_binary, pipeline_binary, binary_info->pPipelineBinaries[i]); + struct radv_shader *shader; + struct blob_reader blob; + + blob_reader_init(&blob, pipeline_binary->data, pipeline_binary->size); + + const struct radv_ray_tracing_binary_header *header = + (const struct radv_ray_tracing_binary_header *)blob_read_bytes(&blob, sizeof(*header)); + + if (header->is_traversal_shader) { + shader = radv_shader_deserialize(device, pipeline_binary->key, sizeof(pipeline_binary->key), &blob); + if (!shader) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + pipeline->base.base.shaders[MESA_SHADER_INTERSECTION] = shader; + + _mesa_blake3_update(&ctx, pipeline_binary->key, sizeof(pipeline_binary->key)); + continue; + } + + memcpy(&pipeline->stages[i].info, &header->stage_info, sizeof(pipeline->stages[i].info)); + pipeline->stages[i].stack_size = header->stack_size; + + if (header->has_shader) { + shader = radv_shader_deserialize(device, pipeline_binary->key, sizeof(pipeline_binary->key), &blob); + if (!shader) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + pipeline->stages[i].shader = shader; + + _mesa_blake3_update(&ctx, pipeline_binary->key, sizeof(pipeline_binary->key)); + } + + if (header->has_nir) { + nir_shader *nir = nir_deserialize(NULL, NULL, &blob); + + pipeline->stages[i].nir = radv_pipeline_cache_nir_to_handle(device, NULL, nir, header->stage_sha1, false); + ralloc_free(nir); + + if (!pipeline->stages[i].nir) + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + } + + _mesa_blake3_final(&ctx, pipeline_hash); + + pipeline->base.base.pipeline_hash = *(uint64_t *)pipeline_hash; + + return VK_SUCCESS; +} + static VkResult radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) @@ -1032,10 +1113,16 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkRayTra if (result != VK_SUCCESS) goto fail; - result = radv_rt_pipeline_compile(device, pCreateInfo, pipeline, cache, &rt_state, capture_replay_blocks, - creation_feedback); - if (result != VK_SUCCESS) - goto fail; + const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR); + + if (binary_info && binary_info->binaryCount > 0) { + result = radv_ray_tracing_pipeline_import_binary(device, pipeline, binary_info); + } else { + result = radv_rt_pipeline_compile(device, pCreateInfo, pipeline, cache, &rt_state, capture_replay_blocks, + creation_feedback); + if (result != VK_SUCCESS) + goto fail; + } if (!(pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)) { compute_rt_stack_size(pCreateInfo, pipeline);