radv: add support for importing pipeline binaries

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30812>
This commit is contained in:
Samuel Pitoiset 2024-07-08 16:43:18 +02:00 committed by Marge Bot
parent 96a300a3f0
commit b4d6d88c6b
4 changed files with 204 additions and 27 deletions

View file

@ -20,6 +20,7 @@
#include "util/u_atomic.h" #include "util/u_atomic.h"
#include "radv_cs.h" #include "radv_cs.h"
#include "radv_debug.h" #include "radv_debug.h"
#include "radv_pipeline_binary.h"
#include "radv_pipeline_cache.h" #include "radv_pipeline_cache.h"
#include "radv_rmv.h" #include "radv_rmv.h"
#include "radv_shader.h" #include "radv_shader.h"
@ -252,6 +253,29 @@ done:
return result; return result;
} }
static VkResult
radv_compute_pipeline_import_binary(struct radv_device *device, struct radv_compute_pipeline *pipeline,
const VkPipelineBinaryInfoKHR *binary_info)
{
VK_FROM_HANDLE(radv_pipeline_binary, pipeline_binary, binary_info->pPipelineBinaries[0]);
struct radv_shader *shader;
struct blob_reader blob;
assert(binary_info->binaryCount == 1);
blob_reader_init(&blob, pipeline_binary->data, pipeline_binary->size);
shader = radv_shader_deserialize(device, pipeline_binary->key, sizeof(pipeline_binary->key), &blob);
if (!shader)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
pipeline->base.shaders[MESA_SHADER_COMPUTE] = shader;
pipeline->base.pipeline_hash = *(uint64_t *)pipeline_binary->key;
return VK_SUCCESS;
}
VkResult VkResult
radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkComputePipelineCreateInfo *pCreateInfo, radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkComputePipelineCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
@ -274,8 +298,15 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkC
const VkPipelineCreationFeedbackCreateInfo *creation_feedback = const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
result = radv_compute_pipeline_compile(pCreateInfo, pipeline, pipeline_layout, device, cache, &pCreateInfo->stage, const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR);
creation_feedback);
if (binary_info && binary_info->binaryCount > 0) {
result = radv_compute_pipeline_import_binary(device, pipeline, binary_info);
} else {
result = radv_compute_pipeline_compile(pCreateInfo, pipeline, pipeline_layout, device, cache, &pCreateInfo->stage,
creation_feedback);
}
if (result != VK_SUCCESS) { if (result != VK_SUCCESS) {
radv_pipeline_destroy(device, &pipeline->base, pAllocator); radv_pipeline_destroy(device, &pipeline->base, pAllocator);
return result; return result;

View file

@ -24,6 +24,7 @@
#include "radv_entrypoints.h" #include "radv_entrypoints.h"
#include "radv_formats.h" #include "radv_formats.h"
#include "radv_physical_device.h" #include "radv_physical_device.h"
#include "radv_pipeline_binary.h"
#include "radv_pipeline_cache.h" #include "radv_pipeline_cache.h"
#include "radv_rmv.h" #include "radv_rmv.h"
#include "radv_shader.h" #include "radv_shader.h"
@ -651,7 +652,7 @@ radv_graphics_pipeline_import_lib(const struct radv_device *device, struct radv_
pipeline->active_stages |= lib->base.active_stages; pipeline->active_stages |= lib->base.active_stages;
/* Import binaries when LTO is disabled and when the library doesn't retain any shaders. */ /* Import binaries when LTO is disabled and when the library doesn't retain any shaders. */
if (radv_should_import_lib_binaries(pipeline->base.create_flags)) { if (lib->base.has_pipeline_binaries || radv_should_import_lib_binaries(pipeline->base.create_flags)) {
import_binaries = true; import_binaries = true;
} }
@ -2512,7 +2513,7 @@ radv_skip_graphics_pipeline_compile(const struct radv_device *device, const VkGr
VkShaderStageFlags active_stages = 0; VkShaderStageFlags active_stages = 0;
/* No compilation when pipeline binaries are imported. */ /* No compilation when pipeline binaries are imported. */
if (binary_info) if (binary_info && binary_info->binaryCount > 0)
return true; return true;
/* Do not skip for libraries. */ /* Do not skip for libraries. */
@ -3293,6 +3294,45 @@ radv_needs_null_export_workaround(const struct radv_device *device, const struct
!ps->info.ps.writes_z && !ps->info.ps.writes_stencil && !ps->info.ps.writes_sample_mask; !ps->info.ps.writes_z && !ps->info.ps.writes_stencil && !ps->info.ps.writes_sample_mask;
} }
static VkResult
radv_graphics_pipeline_import_binaries(struct radv_device *device, struct radv_graphics_pipeline *pipeline,
const VkPipelineBinaryInfoKHR *binary_info)
{
blake3_hash pipeline_hash;
struct mesa_blake3 ctx;
_mesa_blake3_init(&ctx);
for (uint32_t i = 0; i < binary_info->binaryCount; i++) {
VK_FROM_HANDLE(radv_pipeline_binary, pipeline_binary, binary_info->pPipelineBinaries[i]);
struct radv_shader *shader;
struct blob_reader blob;
blob_reader_init(&blob, pipeline_binary->data, pipeline_binary->size);
shader = radv_shader_deserialize(device, pipeline_binary->key, sizeof(pipeline_binary->key), &blob);
if (!shader)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
if (shader->info.stage == MESA_SHADER_VERTEX && i > 0) {
/* The GS copy-shader is a VS placed after all other stages. */
pipeline->base.gs_copy_shader = shader;
} else {
pipeline->base.shaders[shader->info.stage] = shader;
}
_mesa_blake3_update(&ctx, pipeline_binary->key, sizeof(pipeline_binary->key));
}
_mesa_blake3_final(&ctx, pipeline_hash);
pipeline->base.pipeline_hash = *(uint64_t *)pipeline_hash;
pipeline->has_pipeline_binaries = true;
return VK_SUCCESS;
}
static VkResult static VkResult
radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv_device *device, radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv_device *device,
struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo,
@ -3325,14 +3365,22 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
return result; return result;
if (gfx_state.compilation_required) { const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR);
result = radv_graphics_pipeline_compile(pipeline, pCreateInfo, &gfx_state, device, cache, fast_linking_enabled);
if (result != VK_SUCCESS) { if (binary_info && binary_info->binaryCount > 0) {
radv_graphics_pipeline_state_finish(device, &gfx_state); result = radv_graphics_pipeline_import_binaries(device, pipeline, binary_info);
return result; } else {
if (gfx_state.compilation_required) {
result =
radv_graphics_pipeline_compile(pipeline, pCreateInfo, &gfx_state, device, cache, fast_linking_enabled);
} }
} }
if (result != VK_SUCCESS) {
radv_graphics_pipeline_state_finish(device, &gfx_state);
return result;
}
uint32_t vgt_gs_out_prim_type = radv_pipeline_init_vgt_gs_out(pipeline, &gfx_state.vk); uint32_t vgt_gs_out_prim_type = radv_pipeline_init_vgt_gs_out(pipeline, &gfx_state.vk);
radv_pipeline_init_multisample_state(device, pipeline, pCreateInfo, &gfx_state.vk); radv_pipeline_init_multisample_state(device, pipeline, pCreateInfo, &gfx_state.vk);
@ -3424,7 +3472,6 @@ radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline, str
struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo) struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo)
{ {
VK_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout); VK_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
struct radv_graphics_pipeline_state gfx_state;
VkResult result; VkResult result;
const VkGraphicsPipelineLibraryCreateInfoEXT *lib_info = const VkGraphicsPipelineLibraryCreateInfoEXT *lib_info =
@ -3466,14 +3513,23 @@ radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline, str
if (pipeline_layout) if (pipeline_layout)
radv_graphics_pipeline_import_layout(&pipeline->layout, pipeline_layout); radv_graphics_pipeline_import_layout(&pipeline->layout, pipeline_layout);
result = radv_generate_graphics_pipeline_state(device, pCreateInfo, &gfx_state); const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR);
if (result != VK_SUCCESS)
return result;
result = if (binary_info && binary_info->binaryCount > 0) {
radv_graphics_pipeline_compile(&pipeline->base, pCreateInfo, &gfx_state, device, cache, fast_linking_enabled); result = radv_graphics_pipeline_import_binaries(device, &pipeline->base, binary_info);
} else {
struct radv_graphics_pipeline_state gfx_state;
result = radv_generate_graphics_pipeline_state(device, pCreateInfo, &gfx_state);
if (result != VK_SUCCESS)
return result;
result =
radv_graphics_pipeline_compile(&pipeline->base, pCreateInfo, &gfx_state, device, cache, fast_linking_enabled);
radv_graphics_pipeline_state_finish(device, &gfx_state);
}
radv_graphics_pipeline_state_finish(device, &gfx_state);
return result; return result;
} }

View file

@ -121,6 +121,9 @@ struct radv_graphics_pipeline {
/* For relocation of shaders with RGP. */ /* For relocation of shaders with RGP. */
struct radv_sqtt_shaders_reloc *sqtt_shaders_reloc; struct radv_sqtt_shaders_reloc *sqtt_shaders_reloc;
/* Whether the pipeline imported binaries. */
bool has_pipeline_binaries;
}; };
RADV_DECL_PIPELINE_DOWNCAST(graphics, RADV_PIPELINE_GRAPHICS) RADV_DECL_PIPELINE_DOWNCAST(graphics, RADV_PIPELINE_GRAPHICS)

View file

@ -6,6 +6,7 @@
#include "nir/nir.h" #include "nir/nir.h"
#include "nir/nir_builder.h" #include "nir/nir_builder.h"
#include "nir/nir_serialize.h"
#include "vk_shader_module.h" #include "vk_shader_module.h"
@ -13,6 +14,7 @@
#include "radv_debug.h" #include "radv_debug.h"
#include "radv_descriptor_set.h" #include "radv_descriptor_set.h"
#include "radv_entrypoints.h" #include "radv_entrypoints.h"
#include "radv_pipeline_binary.h"
#include "radv_pipeline_cache.h" #include "radv_pipeline_cache.h"
#include "radv_pipeline_rt.h" #include "radv_pipeline_rt.h"
#include "radv_rmv.h" #include "radv_rmv.h"
@ -281,14 +283,32 @@ radv_init_rt_stage_hashes(const struct radv_device *device,
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
struct radv_ray_tracing_stage *stages, const struct radv_shader_stage_key *stage_keys) struct radv_ray_tracing_stage *stages, const struct radv_shader_stage_key *stage_keys)
{ {
for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) { const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR);
const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[idx]; if (binary_info && binary_info->binaryCount > 0) {
gl_shader_stage s = vk_to_mesa_shader_stage(sinfo->stage); for (uint32_t i = 0; i < binary_info->binaryCount; i++) {
struct mesa_sha1 ctx; VK_FROM_HANDLE(radv_pipeline_binary, pipeline_binary, binary_info->pPipelineBinaries[i]);
struct blob_reader blob;
_mesa_sha1_init(&ctx); blob_reader_init(&blob, pipeline_binary->data, pipeline_binary->size);
radv_pipeline_hash_shader_stage(pipeline_flags, sinfo, &stage_keys[s], &ctx);
_mesa_sha1_final(&ctx, stages[idx].sha1); const struct radv_ray_tracing_binary_header *header =
(const struct radv_ray_tracing_binary_header *)blob_read_bytes(&blob, sizeof(*header));
if (header->is_traversal_shader)
continue;
memcpy(stages[i].sha1, header->stage_sha1, SHA1_DIGEST_LENGTH);
}
} else {
for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) {
const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[idx];
gl_shader_stage s = vk_to_mesa_shader_stage(sinfo->stage);
struct mesa_sha1 ctx;
_mesa_sha1_init(&ctx);
radv_pipeline_hash_shader_stage(pipeline_flags, sinfo, &stage_keys[s], &ctx);
_mesa_sha1_final(&ctx, stages[idx].sha1);
}
} }
} }
@ -984,6 +1004,67 @@ fail:
return result; return result;
} }
static VkResult
radv_ray_tracing_pipeline_import_binary(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
const VkPipelineBinaryInfoKHR *binary_info)
{
blake3_hash pipeline_hash;
struct mesa_blake3 ctx;
_mesa_blake3_init(&ctx);
for (uint32_t i = 0; i < binary_info->binaryCount; i++) {
VK_FROM_HANDLE(radv_pipeline_binary, pipeline_binary, binary_info->pPipelineBinaries[i]);
struct radv_shader *shader;
struct blob_reader blob;
blob_reader_init(&blob, pipeline_binary->data, pipeline_binary->size);
const struct radv_ray_tracing_binary_header *header =
(const struct radv_ray_tracing_binary_header *)blob_read_bytes(&blob, sizeof(*header));
if (header->is_traversal_shader) {
shader = radv_shader_deserialize(device, pipeline_binary->key, sizeof(pipeline_binary->key), &blob);
if (!shader)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
pipeline->base.base.shaders[MESA_SHADER_INTERSECTION] = shader;
_mesa_blake3_update(&ctx, pipeline_binary->key, sizeof(pipeline_binary->key));
continue;
}
memcpy(&pipeline->stages[i].info, &header->stage_info, sizeof(pipeline->stages[i].info));
pipeline->stages[i].stack_size = header->stack_size;
if (header->has_shader) {
shader = radv_shader_deserialize(device, pipeline_binary->key, sizeof(pipeline_binary->key), &blob);
if (!shader)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
pipeline->stages[i].shader = shader;
_mesa_blake3_update(&ctx, pipeline_binary->key, sizeof(pipeline_binary->key));
}
if (header->has_nir) {
nir_shader *nir = nir_deserialize(NULL, NULL, &blob);
pipeline->stages[i].nir = radv_pipeline_cache_nir_to_handle(device, NULL, nir, header->stage_sha1, false);
ralloc_free(nir);
if (!pipeline->stages[i].nir)
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
}
_mesa_blake3_final(&ctx, pipeline_hash);
pipeline->base.base.pipeline_hash = *(uint64_t *)pipeline_hash;
return VK_SUCCESS;
}
static VkResult static VkResult
radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
@ -1032,10 +1113,16 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkRayTra
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail; goto fail;
result = radv_rt_pipeline_compile(device, pCreateInfo, pipeline, cache, &rt_state, capture_replay_blocks, const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR);
creation_feedback);
if (result != VK_SUCCESS) if (binary_info && binary_info->binaryCount > 0) {
goto fail; result = radv_ray_tracing_pipeline_import_binary(device, pipeline, binary_info);
} else {
result = radv_rt_pipeline_compile(device, pCreateInfo, pipeline, cache, &rt_state, capture_replay_blocks,
creation_feedback);
if (result != VK_SUCCESS)
goto fail;
}
if (!(pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)) { if (!(pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)) {
compute_rt_stack_size(pCreateInfo, pipeline); compute_rt_stack_size(pCreateInfo, pipeline);