radv/rt: separate shader compilation

With this patch, we compile separately
 - general shaders (raygen, miss, callable)
 - closest-hit shaders
 - traversal shader (incl. all intersection / any-hit shaders)

Each shader uses the following scheme:

  if (shader_pc == shader_va) {
     <shader code>
  }
  next = select_next_shader(shader_va)
  jump next

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22096>
This commit is contained in:
Daniel Schürmann 2023-03-10 18:40:58 +01:00 committed by Marge Bot
parent defdcd2058
commit 163c97e6a7
5 changed files with 94 additions and 126 deletions

View file

@ -27,6 +27,7 @@
#include "radv_debug.h"
#include "radv_private.h"
#include "radv_shader.h"
#include "vk_pipeline.h"
struct rt_handle_hash_entry {
uint32_t key;
@ -362,34 +363,42 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
}
static VkResult
radv_rt_precompile_shaders(struct radv_device *device, struct vk_pipeline_cache *cache,
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const VkPipelineCreationFeedbackCreateInfo *creation_feedback,
const struct radv_pipeline_key *key,
struct radv_ray_tracing_stage *stages)
radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *cache,
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const VkPipelineCreationFeedbackCreateInfo *creation_feedback,
const struct radv_pipeline_key *key,
struct radv_ray_tracing_pipeline *pipeline)
{
if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
return VK_PIPELINE_COMPILE_REQUIRED;
struct radv_ray_tracing_stage *stages = pipeline->stages;
for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) {
int64_t stage_start = os_time_get_nano();
struct radv_pipeline_stage stage;
radv_pipeline_stage_init(&pCreateInfo->pStages[idx], &stage, stages[idx].stage);
uint8_t shader_sha1[SHA1_DIGEST_LENGTH];
radv_hash_shaders(shader_sha1, &stage, 1, NULL, key, radv_get_hash_flags(device, false));
/* lookup the stage in cache */
stages[idx].shader = radv_pipeline_cache_search_nir(device, cache, shader_sha1);
if (stages[idx].shader)
goto feedback;
if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
return VK_PIPELINE_COMPILE_REQUIRED;
/* precompile the shader */
struct nir_shader *nir = radv_parse_rt_stage(device, &pCreateInfo->pStages[idx], key);
stages[idx].shader = radv_pipeline_cache_nir_to_handle(device, cache, nir, shader_sha1,
!key->optimisations_disabled);
ralloc_free(nir);
stage.nir = radv_parse_rt_stage(device, &pCreateInfo->pStages[idx], key);
if (radv_ray_tracing_stage_is_compiled(&stages[idx])) {
uint32_t stack_size = 0;
struct radv_shader *shader =
radv_rt_nir_to_asm(device, cache, pCreateInfo, key, &stage, &stack_size);
stages[idx].stack_size = stack_size;
stages[idx].shader = shader ? &shader->base : NULL;
} else {
uint8_t shader_sha1[SHA1_DIGEST_LENGTH];
radv_hash_shaders(shader_sha1, &stage, 1, NULL, key, radv_get_hash_flags(device, false));
stages[idx].stack_size = stage.nir->scratch_size;
stages[idx].shader = radv_pipeline_cache_nir_to_handle(
device, cache, stage.nir, shader_sha1, !key->optimisations_disabled);
}
ralloc_free(stage.nir);
if (!stages[idx].shader)
return VK_ERROR_OUT_OF_HOST_MEMORY;
@ -402,93 +411,27 @@ radv_rt_precompile_shaders(struct radv_device *device, struct vk_pipeline_cache
}
}
return VK_SUCCESS;
}
static VkResult
radv_rt_pipeline_compile(struct radv_ray_tracing_pipeline *pipeline,
struct radv_pipeline_layout *pipeline_layout, struct radv_device *device,
struct vk_pipeline_cache *cache,
const struct radv_pipeline_key *pipeline_key,
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const VkPipelineCreationFeedbackCreateInfo *creation_feedback)
{
struct radv_shader_binary *binaries[MESA_VULKAN_SHADER_STAGES] = {NULL};
bool keep_executable_info = radv_pipeline_capture_shaders(device, pCreateInfo->flags);
bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pCreateInfo->flags);
struct radv_pipeline_stage rt_stage = {0};
/* First check if we can get things from the cache before we take the expensive step of
* generating the nir. */
struct vk_shader_module module = {.base.type = VK_OBJECT_TYPE_SHADER_MODULE};
VkPipelineShaderStageCreateInfo stage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = NULL,
.stage = VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
.module = vk_shader_module_to_handle(&module),
.pName = "main",
};
radv_pipeline_stage_init(&stage, &rt_stage, vk_to_mesa_shader_stage(stage.stage));
if (!keep_executable_info &&
radv_ray_tracing_pipeline_cache_search(device, cache, pipeline, pCreateInfo))
if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)
return VK_SUCCESS;
if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
return VK_PIPELINE_COMPILE_REQUIRED;
VkResult result = radv_rt_precompile_shaders(device, cache, pCreateInfo, creation_feedback,
pipeline_key, pipeline->stages);
if (result != VK_SUCCESS)
return result;
rt_stage.internal_nir = create_rt_shader(device, pipeline, pCreateInfo, pipeline_key);
/* Compile SPIR-V shader to NIR. */
rt_stage.nir =
radv_shader_spirv_to_nir(device, &rt_stage, pipeline_key, pipeline->base.base.is_internal);
radv_optimize_nir(rt_stage.nir, pipeline_key->optimisations_disabled);
/* Gather info again, information such as outputs_read can be out-of-date. */
nir_shader_gather_info(rt_stage.nir, nir_shader_get_entrypoint(rt_stage.nir));
/* Run the shader info pass. */
radv_nir_shader_info_init(&rt_stage.info);
radv_nir_shader_info_pass(device, rt_stage.nir, MESA_SHADER_NONE, pipeline_layout, pipeline_key,
pipeline->base.base.type, false, &rt_stage.info);
radv_declare_shader_args(device, pipeline_key, &rt_stage.info, rt_stage.stage, MESA_SHADER_NONE,
RADV_SHADER_TYPE_DEFAULT, &rt_stage.args);
rt_stage.info.user_sgprs_locs = rt_stage.args.user_sgprs_locs;
rt_stage.info.inline_push_constant_mask = rt_stage.args.ac.inline_push_const_mask;
/* Postprocess NIR. */
radv_postprocess_nir(device, pipeline_layout, pipeline_key, MESA_SHADER_NONE, &rt_stage);
if (radv_can_dump_shader(device, rt_stage.nir, false))
nir_print_shader(rt_stage.nir, stderr);
/* Compile NIR shader to AMD assembly. */
pipeline->base.base.shaders[rt_stage.stage] =
radv_shader_nir_to_asm(device, cache, &rt_stage, &rt_stage.nir, 1, pipeline_key,
keep_executable_info, keep_statistic_info, &binaries[rt_stage.stage]);
if (!keep_executable_info) {
radv_ray_tracing_pipeline_cache_insert(device, cache, pipeline, pCreateInfo->stageCount,
pipeline->sha1);
}
free(binaries[rt_stage.stage]);
if (radv_can_dump_shader_stats(device, rt_stage.nir)) {
radv_dump_shader_stats(device, &pipeline->base.base,
pipeline->base.base.shaders[rt_stage.stage], rt_stage.stage, stderr);
}
ralloc_free(rt_stage.internal_nir);
ralloc_free(rt_stage.nir);
/* create traversal shader */
struct vk_shader_module traversal_module = {
.base.type = VK_OBJECT_TYPE_SHADER_MODULE,
.nir = radv_build_traversal_shader(device, pipeline, pCreateInfo, key),
};
const VkPipelineShaderStageCreateInfo pStage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
.module = vk_shader_module_to_handle(&traversal_module),
.pName = "main",
};
struct radv_pipeline_stage traversal_stage = {
.stage = MESA_SHADER_INTERSECTION,
.nir = traversal_module.nir,
};
vk_pipeline_hash_shader_stage(&pStage, NULL, traversal_stage.shader_sha1);
pipeline->base.base.shaders[MESA_SHADER_INTERSECTION] =
radv_rt_nir_to_asm(device, cache, pCreateInfo, key, &traversal_stage, NULL);
return VK_SUCCESS;
}
@ -601,11 +544,16 @@ compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline *
pipeline->base.base.shaders[MESA_SHADER_COMPUTE] = radv_create_rt_prolog(device);
/* create combined config */
combine_config(&pipeline->base.base.shaders[MESA_SHADER_COMPUTE]->config,
&pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]->config);
postprocess_rt_config(&pipeline->base.base.shaders[MESA_SHADER_COMPUTE]->config,
device->physical_device->rad_info.gfx_level,
struct ac_shader_config *config = &pipeline->base.base.shaders[MESA_SHADER_COMPUTE]->config;
for (unsigned i = 0; i < pipeline->stage_count; i++) {
if (radv_ray_tracing_stage_is_compiled(&pipeline->stages[i])) {
struct radv_shader *shader =
container_of(pipeline->stages[i].shader, struct radv_shader, base);
combine_config(config, &shader->config);
}
}
combine_config(config, &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]->config);
postprocess_rt_config(config, device->physical_device->rad_info.gfx_level,
device->physical_device->rt_wave_size);
}
@ -619,6 +567,7 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache,
RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
VkResult result;
bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pCreateInfo->flags);
bool keep_executable_info = radv_pipeline_capture_shaders(device, pCreateInfo->flags);
const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
if (creation_feedback)
@ -646,7 +595,7 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache,
radv_rt_fill_stage_info(pCreateInfo, stages);
result = radv_rt_fill_group_info(device, pCreateInfo, stages, pipeline->groups);
if (result != VK_SUCCESS)
goto done;
goto fail;
struct radv_pipeline_key key =
radv_generate_rt_pipeline_key(device, pipeline, pCreateInfo->flags);
@ -655,26 +604,41 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache,
radv_get_hash_flags(device, keep_statistic_info));
pipeline->base.base.pipeline_hash = *(uint64_t *)pipeline->sha1;
if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) {
result = radv_rt_precompile_shaders(device, cache, pCreateInfo, creation_feedback, &key,
pipeline->stages);
goto done;
bool cache_hit = false;
if (!keep_executable_info)
cache_hit = radv_ray_tracing_pipeline_cache_search(device, cache, pipeline, pCreateInfo);
if (!cache_hit) {
result =
radv_rt_compile_shaders(device, cache, pCreateInfo, creation_feedback, &key, pipeline);
if (result != VK_SUCCESS)
goto fail;
}
result = radv_rt_pipeline_compile(pipeline, pipeline_layout, device, cache, &key, pCreateInfo,
creation_feedback);
if (!(pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)) {
compute_rt_stack_size(pCreateInfo, pipeline);
compile_rt_prolog(device, pipeline);
if (result != VK_SUCCESS)
goto done;
radv_compute_pipeline_init(device, &pipeline->base, pipeline_layout);
radv_rmv_log_compute_pipeline_create(device, pCreateInfo->flags, &pipeline->base.base, false);
}
compute_rt_stack_size(pCreateInfo, pipeline);
compile_rt_prolog(device, pipeline);
if (!cache_hit)
radv_ray_tracing_pipeline_cache_insert(device, cache, pipeline, pCreateInfo->stageCount,
pipeline->sha1);
radv_compute_pipeline_init(device, &pipeline->base, pipeline_layout);
/* write shader VAs into group handles */
for (unsigned i = 0; i < pipeline->group_count; i++) {
if (pipeline->groups[i].recursive_shader != VK_SHADER_UNUSED_KHR) {
struct radv_shader *shader =
container_of(pipeline->stages[pipeline->groups[i].recursive_shader].shader,
struct radv_shader, base);
pipeline->groups[i].handle.recursive_shader_ptr = shader->va;
}
}
radv_rmv_log_compute_pipeline_create(device, pCreateInfo->flags, &pipeline->base.base, false);
done:
fail:
if (creation_feedback)
creation_feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - pipeline_start;

View file

@ -2350,7 +2350,8 @@ struct radv_ray_tracing_stage {
static inline bool
radv_ray_tracing_stage_is_compiled(struct radv_ray_tracing_stage *stage)
{
return false;
return stage->stage == MESA_SHADER_RAYGEN || stage->stage == MESA_SHADER_CALLABLE ||
stage->stage == MESA_SHADER_CLOSEST_HIT || stage->stage == MESA_SHADER_MISS;
}
struct radv_ray_tracing_pipeline {

View file

@ -39,7 +39,6 @@ radv_enable_rt(const struct radv_physical_device *pdevice, bool rt_pipelines)
return false;
if (rt_pipelines) {
return false;
if (pdevice->use_llvm)
return false;

View file

@ -1432,7 +1432,7 @@ load_stack_entry(nir_builder *b, nir_ssa_def *index, const struct radv_ray_trave
return nir_load_shared(b, 1, 32, index, .base = 0, .align_mul = 4);
}
static nir_shader *
nir_shader *
radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const struct radv_pipeline_key *key)

View file

@ -790,4 +790,8 @@ nir_shader *create_rt_shader(struct radv_device *device, struct radv_ray_tracing
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const struct radv_pipeline_key *key);
nir_shader *radv_build_traversal_shader(struct radv_device *device,
struct radv_ray_tracing_pipeline *pipeline,
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const struct radv_pipeline_key *key);
#endif