/* * Copyright © 2021 Google * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "radv_acceleration_structure.h" #include "radv_private.h" #include "radv_shader.h" #include "nir/nir.h" #include "nir/nir_builder.h" static VkRayTracingPipelineCreateInfoKHR radv_create_merged_rt_create_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo) { VkRayTracingPipelineCreateInfoKHR local_create_info = *pCreateInfo; uint32_t total_stages = pCreateInfo->stageCount; uint32_t total_groups = pCreateInfo->groupCount; if (pCreateInfo->pLibraryInfo) { for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) { RADV_FROM_HANDLE(radv_pipeline, library, pCreateInfo->pLibraryInfo->pLibraries[i]); total_stages += library->library.stage_count; total_groups += library->library.group_count; } } VkPipelineShaderStageCreateInfo *stages = NULL; VkRayTracingShaderGroupCreateInfoKHR *groups = NULL; local_create_info.stageCount = total_stages; local_create_info.groupCount = total_groups; local_create_info.pStages = stages = malloc(sizeof(VkPipelineShaderStageCreateInfo) * total_stages); local_create_info.pGroups = groups = malloc(sizeof(VkRayTracingShaderGroupCreateInfoKHR) * total_groups); if (!local_create_info.pStages || !local_create_info.pGroups) return local_create_info; total_stages = pCreateInfo->stageCount; total_groups = pCreateInfo->groupCount; for (unsigned j = 0; j < pCreateInfo->stageCount; ++j) stages[j] = pCreateInfo->pStages[j]; for (unsigned j = 0; j < pCreateInfo->groupCount; ++j) groups[j] = pCreateInfo->pGroups[j]; if (pCreateInfo->pLibraryInfo) { for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) { RADV_FROM_HANDLE(radv_pipeline, library, pCreateInfo->pLibraryInfo->pLibraries[i]); for (unsigned j = 0; j < library->library.stage_count; ++j) stages[total_stages + j] = library->library.stages[j]; for (unsigned j = 0; j < library->library.group_count; ++j) { VkRayTracingShaderGroupCreateInfoKHR *dst = &groups[total_groups + j]; *dst = library->library.groups[j]; if (dst->generalShader != VK_SHADER_UNUSED_KHR) dst->generalShader += total_stages; if (dst->closestHitShader != VK_SHADER_UNUSED_KHR) dst->closestHitShader += total_stages; if (dst->anyHitShader != VK_SHADER_UNUSED_KHR) dst->anyHitShader += total_stages; if (dst->intersectionShader != VK_SHADER_UNUSED_KHR) dst->intersectionShader += total_stages; } total_stages += library->library.stage_count; total_groups += library->library.group_count; } } return local_create_info; } static VkResult radv_rt_pipeline_library_create(VkDevice _device, VkPipelineCache _cache, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) { RADV_FROM_HANDLE(radv_device, device, _device); struct radv_pipeline *pipeline; pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pipeline == NULL) return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE); pipeline->type = RADV_PIPELINE_LIBRARY; VkRayTracingPipelineCreateInfoKHR local_create_info = radv_create_merged_rt_create_info(pCreateInfo); if (!local_create_info.pStages || !local_create_info.pGroups) goto fail; if (local_create_info.stageCount) { size_t size = sizeof(VkPipelineShaderStageCreateInfo) * local_create_info.stageCount; pipeline->library.stage_count = local_create_info.stageCount; pipeline->library.stages = malloc(size); if (!pipeline->library.stages) goto fail; memcpy(pipeline->library.stages, local_create_info.pStages, size); } if (local_create_info.groupCount) { size_t size = sizeof(VkRayTracingShaderGroupCreateInfoKHR) * local_create_info.groupCount; pipeline->library.group_count = local_create_info.groupCount; pipeline->library.groups = malloc(size); if (!pipeline->library.groups) goto fail; memcpy(pipeline->library.groups, local_create_info.pGroups, size); } *pPipeline = radv_pipeline_to_handle(pipeline); free((void *)local_create_info.pGroups); free((void *)local_create_info.pStages); return VK_SUCCESS; fail: free(pipeline->library.groups); free(pipeline->library.stages); free((void *)local_create_info.pGroups); free((void *)local_create_info.pStages); return VK_ERROR_OUT_OF_HOST_MEMORY; } /* * Global variables for an RT pipeline */ struct rt_variables { /* idx of the next shader to run in the next iteration of the main loop */ nir_variable *idx; /* scratch offset of the argument area relative to stack_ptr */ nir_variable *arg; nir_variable *stack_ptr; /* global address of the SBT entry used for the shader */ nir_variable *shader_record_ptr; /* trace_ray arguments */ nir_variable *accel_struct; nir_variable *flags; nir_variable *cull_mask; nir_variable *sbt_offset; nir_variable *sbt_stride; nir_variable *miss_index; nir_variable *origin; nir_variable *tmin; nir_variable *direction; nir_variable *tmax; /* from the BTAS instance currently being visited */ nir_variable *custom_instance_and_mask; /* Properties of the primitive currently being visited. */ nir_variable *primitive_id; nir_variable *geometry_id_and_flags; nir_variable *instance_id; nir_variable *instance_addr; nir_variable *hit_kind; nir_variable *opaque; /* Safeguard to ensure we don't end up in an infinite loop of non-existing case. Should not be * needed but is extra anti-hang safety during bring-up. */ nir_variable *main_loop_case_visited; /* Output variable for intersection & anyhit shaders. */ nir_variable *ahit_status; /* Array of stack size struct for recording the max stack size for each group. */ struct radv_pipeline_shader_stack_size *stack_sizes; unsigned group_idx; }; static struct rt_variables create_rt_variables(nir_shader *shader, struct radv_pipeline_shader_stack_size *stack_sizes) { struct rt_variables vars = { NULL, }; vars.idx = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "idx"); vars.arg = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "arg"); vars.stack_ptr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "stack_ptr"); vars.shader_record_ptr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_record_ptr"); const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3); vars.accel_struct = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "accel_struct"); vars.flags = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "ray_flags"); vars.cull_mask = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "cull_mask"); vars.sbt_offset = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_offset"); vars.sbt_stride = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "sbt_stride"); vars.miss_index = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "miss_index"); vars.origin = nir_variable_create(shader, nir_var_shader_temp, vec3_type, "ray_origin"); vars.tmin = nir_variable_create(shader, nir_var_shader_temp, glsl_float_type(), "ray_tmin"); vars.direction = nir_variable_create(shader, nir_var_shader_temp, vec3_type, "ray_direction"); vars.tmax = nir_variable_create(shader, nir_var_shader_temp, glsl_float_type(), "ray_tmax"); vars.custom_instance_and_mask = nir_variable_create( shader, nir_var_shader_temp, glsl_uint_type(), "custom_instance_and_mask"); vars.primitive_id = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "primitive_id"); vars.geometry_id_and_flags = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "geometry_id_and_flags"); vars.instance_id = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "instance_id"); vars.instance_addr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "instance_addr"); vars.hit_kind = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "hit_kind"); vars.opaque = nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "opaque"); vars.main_loop_case_visited = nir_variable_create(shader, nir_var_shader_temp, glsl_bool_type(), "main_loop_case_visited"); vars.ahit_status = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "ahit_status"); vars.stack_sizes = stack_sizes; return vars; } /* * Remap all the variables between the two rt_variables struct for inlining. */ static void map_rt_variables(struct hash_table *var_remap, struct rt_variables *src, const struct rt_variables *dst) { _mesa_hash_table_insert(var_remap, src->idx, dst->idx); _mesa_hash_table_insert(var_remap, src->arg, dst->arg); _mesa_hash_table_insert(var_remap, src->stack_ptr, dst->stack_ptr); _mesa_hash_table_insert(var_remap, src->shader_record_ptr, dst->shader_record_ptr); _mesa_hash_table_insert(var_remap, src->accel_struct, dst->accel_struct); _mesa_hash_table_insert(var_remap, src->flags, dst->flags); _mesa_hash_table_insert(var_remap, src->cull_mask, dst->cull_mask); _mesa_hash_table_insert(var_remap, src->sbt_offset, dst->sbt_offset); _mesa_hash_table_insert(var_remap, src->sbt_stride, dst->sbt_stride); _mesa_hash_table_insert(var_remap, src->miss_index, dst->miss_index); _mesa_hash_table_insert(var_remap, src->origin, dst->origin); _mesa_hash_table_insert(var_remap, src->tmin, dst->tmin); _mesa_hash_table_insert(var_remap, src->direction, dst->direction); _mesa_hash_table_insert(var_remap, src->tmax, dst->tmax); _mesa_hash_table_insert(var_remap, src->custom_instance_and_mask, dst->custom_instance_and_mask); _mesa_hash_table_insert(var_remap, src->primitive_id, dst->primitive_id); _mesa_hash_table_insert(var_remap, src->geometry_id_and_flags, dst->geometry_id_and_flags); _mesa_hash_table_insert(var_remap, src->instance_id, dst->instance_id); _mesa_hash_table_insert(var_remap, src->instance_addr, dst->instance_addr); _mesa_hash_table_insert(var_remap, src->hit_kind, dst->hit_kind); _mesa_hash_table_insert(var_remap, src->opaque, dst->opaque); _mesa_hash_table_insert(var_remap, src->ahit_status, dst->ahit_status); src->stack_sizes = dst->stack_sizes; src->group_idx = dst->group_idx; } /* * Create a copy of the global rt variables where the primitive/instance related variables are * independent.This is needed as we need to keep the old values of the global variables around * in case e.g. an anyhit shader reject the collision. So there are inner variables that get copied * to the outer variables once we commit to a better hit. */ static struct rt_variables create_inner_vars(nir_builder *b, const struct rt_variables *vars) { struct rt_variables inner_vars = *vars; inner_vars.idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_idx"); inner_vars.shader_record_ptr = nir_variable_create( b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "inner_shader_record_ptr"); inner_vars.primitive_id = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_primitive_id"); inner_vars.geometry_id_and_flags = nir_variable_create( b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_geometry_id_and_flags"); inner_vars.tmax = nir_variable_create(b->shader, nir_var_shader_temp, glsl_float_type(), "inner_tmax"); inner_vars.instance_id = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_instance_id"); inner_vars.instance_addr = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "inner_instance_addr"); inner_vars.hit_kind = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_hit_kind"); inner_vars.custom_instance_and_mask = nir_variable_create( b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_custom_instance_and_mask"); return inner_vars; } /* The hit attributes are stored on the stack. This is the offset compared to the current stack * pointer of where the hit attrib is stored. */ const uint32_t RADV_HIT_ATTRIB_OFFSET = -(16 + RADV_MAX_HIT_ATTRIB_SIZE); static void insert_rt_return(nir_builder *b, const struct rt_variables *vars) { nir_store_var(b, vars->stack_ptr, nir_iadd(b, nir_load_var(b, vars->stack_ptr), nir_imm_int(b, -16)), 1); nir_store_var(b, vars->idx, nir_load_scratch(b, 1, 32, nir_load_var(b, vars->stack_ptr), .align_mul = 16), 1); } enum sbt_type { SBT_RAYGEN, SBT_MISS, SBT_HIT, SBT_CALLABLE, }; static nir_ssa_def * get_sbt_ptr(nir_builder *b, nir_ssa_def *idx, enum sbt_type binding) { nir_ssa_def *desc = nir_load_sbt_amd(b, 4, .binding = binding); nir_ssa_def *base_addr = nir_pack_64_2x32(b, nir_channels(b, desc, 0x3)); nir_ssa_def *stride = nir_channel(b, desc, 2); nir_ssa_def *ret = nir_imul(b, idx, stride); ret = nir_iadd(b, base_addr, nir_u2u64(b, ret)); return ret; } static void load_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_ssa_def *idx, enum sbt_type binding, unsigned offset) { nir_ssa_def *addr = get_sbt_ptr(b, idx, binding); nir_ssa_def *load_addr = addr; if (offset) load_addr = nir_iadd(b, load_addr, nir_imm_int64(b, offset)); nir_ssa_def *v_idx = nir_build_load_global(b, 1, 32, load_addr, .align_mul = 4, .align_offset = 0); nir_store_var(b, vars->idx, v_idx, 1); nir_ssa_def *record_addr = nir_iadd(b, addr, nir_imm_int64(b, RADV_RT_HANDLE_SIZE)); nir_store_var(b, vars->shader_record_ptr, record_addr, 1); } static nir_ssa_def * nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[], bool translation) { nir_ssa_def *result_components[3] = { nir_channel(b, matrix[0], 3), nir_channel(b, matrix[1], 3), nir_channel(b, matrix[2], 3), }; for (unsigned i = 0; i < 3; ++i) { for (unsigned j = 0; j < 3; ++j) { nir_ssa_def *v = nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[i], 1 << j)); result_components[i] = (translation || j) ? nir_fadd(b, result_components[i], v) : v; } } return nir_vec(b, result_components, 3); } static nir_ssa_def * nir_build_vec3_mat_mult_pre(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[]) { nir_ssa_def *result_components[3] = { nir_channel(b, matrix[0], 3), nir_channel(b, matrix[1], 3), nir_channel(b, matrix[2], 3), }; return nir_build_vec3_mat_mult(b, nir_fsub(b, vec, nir_vec(b, result_components, 3)), matrix, false); } static void nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out) { unsigned offset = offsetof(struct radv_bvh_instance_node, wto_matrix); for (unsigned i = 0; i < 3; ++i) { out[i] = nir_build_load_global(b, 4, 32, nir_iadd(b, instance_addr, nir_imm_int64(b, offset + i * 16)), .align_mul = 64, .align_offset = offset + i * 16); } } /* This lowers all the RT instructions that we do not want to pass on to the combined shader and * that we can implement using the variables from the shader we are going to inline into. */ static void lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned call_idx_base) { nir_builder b_shader; nir_builder_init(&b_shader, nir_shader_get_entrypoint(shader)); nir_foreach_block (block, nir_shader_get_entrypoint(shader)) { nir_foreach_instr_safe (instr, block) { switch (instr->type) { case nir_instr_type_intrinsic: { nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); switch (intr->intrinsic) { case nir_intrinsic_rt_execute_callable: { uint32_t size = align(nir_intrinsic_stack_size(intr), 16) + RADV_MAX_HIT_ATTRIB_SIZE; uint32_t ret = call_idx_base + nir_intrinsic_call_idx(intr) + 1; b_shader.cursor = nir_instr_remove(instr); nir_store_var(&b_shader, vars->stack_ptr, nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), nir_imm_int(&b_shader, size)), 1); nir_store_scratch(&b_shader, nir_imm_int(&b_shader, ret), nir_load_var(&b_shader, vars->stack_ptr), .align_mul = 16, .write_mask = 1); nir_store_var(&b_shader, vars->stack_ptr, nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), nir_imm_int(&b_shader, 16)), 1); load_sbt_entry(&b_shader, vars, intr->src[0].ssa, SBT_CALLABLE, 0); nir_store_var( &b_shader, vars->arg, nir_isub(&b_shader, intr->src[1].ssa, nir_imm_int(&b_shader, size + 16)), 1); vars->stack_sizes[vars->group_idx].recursive_size = MAX2(vars->stack_sizes[vars->group_idx].recursive_size, size + 16); break; } case nir_intrinsic_rt_trace_ray: { uint32_t size = align(nir_intrinsic_stack_size(intr), 16) + RADV_MAX_HIT_ATTRIB_SIZE; uint32_t ret = call_idx_base + nir_intrinsic_call_idx(intr) + 1; b_shader.cursor = nir_instr_remove(instr); nir_store_var(&b_shader, vars->stack_ptr, nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), nir_imm_int(&b_shader, size)), 1); nir_store_scratch(&b_shader, nir_imm_int(&b_shader, ret), nir_load_var(&b_shader, vars->stack_ptr), .align_mul = 16, .write_mask = 1); nir_store_var(&b_shader, vars->stack_ptr, nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), nir_imm_int(&b_shader, 16)), 1); nir_store_var(&b_shader, vars->idx, nir_imm_int(&b_shader, 1), 1); nir_store_var( &b_shader, vars->arg, nir_isub(&b_shader, intr->src[10].ssa, nir_imm_int(&b_shader, size + 16)), 1); vars->stack_sizes[vars->group_idx].recursive_size = MAX2(vars->stack_sizes[vars->group_idx].recursive_size, size + 16); /* Per the SPIR-V extension spec we have to ignore some bits for some arguments. */ nir_store_var(&b_shader, vars->accel_struct, intr->src[0].ssa, 0x1); nir_store_var(&b_shader, vars->flags, intr->src[1].ssa, 0x1); nir_store_var(&b_shader, vars->cull_mask, nir_iand(&b_shader, intr->src[2].ssa, nir_imm_int(&b_shader, 0xff)), 0x1); nir_store_var(&b_shader, vars->sbt_offset, nir_iand(&b_shader, intr->src[3].ssa, nir_imm_int(&b_shader, 0xf)), 0x1); nir_store_var(&b_shader, vars->sbt_stride, nir_iand(&b_shader, intr->src[4].ssa, nir_imm_int(&b_shader, 0xf)), 0x1); nir_store_var(&b_shader, vars->miss_index, nir_iand(&b_shader, intr->src[5].ssa, nir_imm_int(&b_shader, 0xffff)), 0x1); nir_store_var(&b_shader, vars->origin, intr->src[6].ssa, 0x7); nir_store_var(&b_shader, vars->tmin, intr->src[7].ssa, 0x1); nir_store_var(&b_shader, vars->direction, intr->src[8].ssa, 0x7); nir_store_var(&b_shader, vars->tmax, intr->src[9].ssa, 0x1); break; } case nir_intrinsic_rt_resume: { uint32_t size = align(nir_intrinsic_stack_size(intr), 16) + RADV_MAX_HIT_ATTRIB_SIZE; b_shader.cursor = nir_instr_remove(instr); nir_store_var(&b_shader, vars->stack_ptr, nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), nir_imm_int(&b_shader, -size)), 1); break; } case nir_intrinsic_rt_return_amd: { b_shader.cursor = nir_instr_remove(instr); if (shader->info.stage == MESA_SHADER_RAYGEN) { nir_store_var(&b_shader, vars->idx, nir_imm_int(&b_shader, 0), 1); break; } insert_rt_return(&b_shader, vars); break; } case nir_intrinsic_load_scratch: { b_shader.cursor = nir_before_instr(instr); nir_instr_rewrite_src_ssa( instr, &intr->src[0], nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), intr->src[0].ssa)); break; } case nir_intrinsic_store_scratch: { b_shader.cursor = nir_before_instr(instr); nir_instr_rewrite_src_ssa( instr, &intr->src[1], nir_iadd(&b_shader, nir_load_var(&b_shader, vars->stack_ptr), intr->src[1].ssa)); break; } case nir_intrinsic_load_rt_arg_scratch_offset_amd: { b_shader.cursor = nir_instr_remove(instr); nir_ssa_def *ret = nir_load_var(&b_shader, vars->arg); nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); break; } case nir_intrinsic_load_shader_record_ptr: { b_shader.cursor = nir_instr_remove(instr); nir_ssa_def *ret = nir_load_var(&b_shader, vars->shader_record_ptr); nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); break; } case nir_intrinsic_load_ray_launch_id: { b_shader.cursor = nir_instr_remove(instr); nir_ssa_def *ret = nir_load_global_invocation_id(&b_shader, 32); nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); break; } case nir_intrinsic_load_ray_t_min: { b_shader.cursor = nir_instr_remove(instr); nir_ssa_def *ret = nir_load_var(&b_shader, vars->tmin); nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); break; } case nir_intrinsic_load_ray_t_max: { b_shader.cursor = nir_instr_remove(instr); nir_ssa_def *ret = nir_load_var(&b_shader, vars->tmax); nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); break; } case nir_intrinsic_load_ray_world_origin: { b_shader.cursor = nir_instr_remove(instr); nir_ssa_def *ret = nir_load_var(&b_shader, vars->origin); nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); break; } case nir_intrinsic_load_ray_world_direction: { b_shader.cursor = nir_instr_remove(instr); nir_ssa_def *ret = nir_load_var(&b_shader, vars->direction); nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); break; } case nir_intrinsic_load_ray_instance_custom_index: { b_shader.cursor = nir_instr_remove(instr); nir_ssa_def *ret = nir_load_var(&b_shader, vars->custom_instance_and_mask); ret = nir_iand(&b_shader, ret, nir_imm_int(&b_shader, 0xFFFFFF)); nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); break; } case nir_intrinsic_load_primitive_id: { b_shader.cursor = nir_instr_remove(instr); nir_ssa_def *ret = nir_load_var(&b_shader, vars->primitive_id); nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); break; } case nir_intrinsic_load_ray_geometry_index: { b_shader.cursor = nir_instr_remove(instr); nir_ssa_def *ret = nir_load_var(&b_shader, vars->geometry_id_and_flags); ret = nir_iand(&b_shader, ret, nir_imm_int(&b_shader, 0xFFFFFFF)); nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); break; } case nir_intrinsic_load_instance_id: { b_shader.cursor = nir_instr_remove(instr); nir_ssa_def *ret = nir_load_var(&b_shader, vars->instance_id); nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); break; } case nir_intrinsic_load_ray_flags: { b_shader.cursor = nir_instr_remove(instr); nir_ssa_def *ret = nir_load_var(&b_shader, vars->flags); nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); break; } case nir_intrinsic_load_ray_hit_kind: { b_shader.cursor = nir_instr_remove(instr); nir_ssa_def *ret = nir_load_var(&b_shader, vars->hit_kind); nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); break; } case nir_intrinsic_load_ray_world_to_object: { unsigned c = nir_intrinsic_column(intr); nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); nir_ssa_def *wto_matrix[3]; nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix); nir_ssa_def *vals[3]; for (unsigned i = 0; i < 3; ++i) vals[i] = nir_channel(&b_shader, wto_matrix[i], c); nir_ssa_def *val = nir_vec(&b_shader, vals, 3); if (c == 3) val = nir_fneg(&b_shader, nir_build_vec3_mat_mult(&b_shader, val, wto_matrix, false)); b_shader.cursor = nir_instr_remove(instr); nir_ssa_def_rewrite_uses(&intr->dest.ssa, val); break; } case nir_intrinsic_load_ray_object_to_world: { unsigned c = nir_intrinsic_column(intr); nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); nir_ssa_def *val; if (c == 3) { nir_ssa_def *wto_matrix[3]; nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix); nir_ssa_def *vals[3]; for (unsigned i = 0; i < 3; ++i) vals[i] = nir_channel(&b_shader, wto_matrix[i], c); val = nir_vec(&b_shader, vals, 3); } else { val = nir_build_load_global( &b_shader, 3, 32, nir_iadd(&b_shader, instance_node_addr, nir_imm_int64(&b_shader, 92 + c * 12)), .align_mul = 4, .align_offset = 0); } b_shader.cursor = nir_instr_remove(instr); nir_ssa_def_rewrite_uses(&intr->dest.ssa, val); break; } case nir_intrinsic_load_ray_object_origin: { nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); nir_ssa_def *wto_matrix[] = { nir_build_load_global( &b_shader, 4, 32, nir_iadd(&b_shader, instance_node_addr, nir_imm_int64(&b_shader, 16)), .align_mul = 64, .align_offset = 16), nir_build_load_global( &b_shader, 4, 32, nir_iadd(&b_shader, instance_node_addr, nir_imm_int64(&b_shader, 32)), .align_mul = 64, .align_offset = 32), nir_build_load_global( &b_shader, 4, 32, nir_iadd(&b_shader, instance_node_addr, nir_imm_int64(&b_shader, 48)), .align_mul = 64, .align_offset = 48)}; nir_ssa_def *val = nir_build_vec3_mat_mult_pre( &b_shader, nir_load_var(&b_shader, vars->origin), wto_matrix); b_shader.cursor = nir_instr_remove(instr); nir_ssa_def_rewrite_uses(&intr->dest.ssa, val); break; } case nir_intrinsic_load_ray_object_direction: { nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr); nir_ssa_def *wto_matrix[3]; nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix); nir_ssa_def *val = nir_build_vec3_mat_mult( &b_shader, nir_load_var(&b_shader, vars->direction), wto_matrix, false); b_shader.cursor = nir_instr_remove(instr); nir_ssa_def_rewrite_uses(&intr->dest.ssa, val); break; } case nir_intrinsic_load_intersection_opaque_amd: { b_shader.cursor = nir_instr_remove(instr); nir_ssa_def *ret = nir_load_var(&b_shader, vars->opaque); nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); break; } case nir_intrinsic_ignore_ray_intersection: { b_shader.cursor = nir_instr_remove(instr); nir_store_var(&b_shader, vars->ahit_status, nir_imm_int(&b_shader, 1), 1); /* The if is a workaround to avoid having to fix up control flow manually */ nir_push_if(&b_shader, nir_imm_true(&b_shader)); nir_jump(&b_shader, nir_jump_return); nir_pop_if(&b_shader, NULL); break; } case nir_intrinsic_terminate_ray: { b_shader.cursor = nir_instr_remove(instr); nir_store_var(&b_shader, vars->ahit_status, nir_imm_int(&b_shader, 2), 1); /* The if is a workaround to avoid having to fix up control flow manually */ nir_push_if(&b_shader, nir_imm_true(&b_shader)); nir_jump(&b_shader, nir_jump_return); nir_pop_if(&b_shader, NULL); break; } case nir_intrinsic_report_ray_intersection: { b_shader.cursor = nir_instr_remove(instr); nir_push_if( &b_shader, nir_iand( &b_shader, nir_flt(&b_shader, intr->src[0].ssa, nir_load_var(&b_shader, vars->tmax)), nir_fge(&b_shader, intr->src[0].ssa, nir_load_var(&b_shader, vars->tmin)))); { nir_store_var(&b_shader, vars->ahit_status, nir_imm_int(&b_shader, 0), 1); nir_store_var(&b_shader, vars->tmax, intr->src[0].ssa, 1); nir_store_var(&b_shader, vars->hit_kind, intr->src[1].ssa, 1); } nir_pop_if(&b_shader, NULL); break; } default: break; } break; } case nir_instr_type_jump: { nir_jump_instr *jump = nir_instr_as_jump(instr); if (jump->type == nir_jump_halt) { b_shader.cursor = nir_instr_remove(instr); nir_jump(&b_shader, nir_jump_return); } break; } default: break; } } } nir_metadata_preserve(nir_shader_get_entrypoint(shader), nir_metadata_none); } static void insert_rt_case(nir_builder *b, nir_shader *shader, const struct rt_variables *vars, nir_ssa_def *idx, uint32_t call_idx_base, uint32_t call_idx) { struct hash_table *var_remap = _mesa_pointer_hash_table_create(NULL); nir_opt_dead_cf(shader); struct rt_variables src_vars = create_rt_variables(shader, vars->stack_sizes); map_rt_variables(var_remap, &src_vars, vars); NIR_PASS_V(shader, lower_rt_instructions, &src_vars, call_idx_base); NIR_PASS_V(shader, nir_opt_remove_phis); NIR_PASS_V(shader, nir_lower_returns); NIR_PASS_V(shader, nir_opt_dce); if (b->shader->info.stage == MESA_SHADER_ANY_HIT || b->shader->info.stage == MESA_SHADER_INTERSECTION) { src_vars.stack_sizes[src_vars.group_idx].non_recursive_size = MAX2(src_vars.stack_sizes[src_vars.group_idx].non_recursive_size, shader->scratch_size); } else { src_vars.stack_sizes[src_vars.group_idx].recursive_size = MAX2(src_vars.stack_sizes[src_vars.group_idx].recursive_size, shader->scratch_size); } nir_push_if(b, nir_ieq(b, idx, nir_imm_int(b, call_idx))); nir_store_var(b, vars->main_loop_case_visited, nir_imm_bool(b, true), 1); nir_inline_function_impl(b, nir_shader_get_entrypoint(shader), NULL, var_remap); nir_pop_if(b, NULL); /* Adopt the instructions from the source shader, since they are merely moved, not cloned. */ ralloc_adopt(ralloc_context(b->shader), ralloc_context(shader)); ralloc_free(var_remap); } static nir_shader * create_rt_shader(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, struct radv_pipeline_shader_stack_size *stack_sizes) { /* TODO */ return NULL; } static VkResult radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) { RADV_FROM_HANDLE(radv_device, device, _device); VkResult result; struct radv_pipeline *pipeline = NULL; struct radv_pipeline_shader_stack_size *stack_sizes = NULL; if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) return radv_rt_pipeline_library_create(_device, _cache, pCreateInfo, pAllocator, pPipeline); VkRayTracingPipelineCreateInfoKHR local_create_info = radv_create_merged_rt_create_info(pCreateInfo); if (!local_create_info.pStages || !local_create_info.pGroups) { result = VK_ERROR_OUT_OF_HOST_MEMORY; goto fail; } stack_sizes = calloc(sizeof(*stack_sizes), local_create_info.groupCount); if (!stack_sizes) { result = VK_ERROR_OUT_OF_HOST_MEMORY; goto fail; } nir_shader *shader = create_rt_shader(device, &local_create_info, stack_sizes); VkComputePipelineCreateInfo compute_info = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = NULL, .flags = pCreateInfo->flags, .stage = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = vk_shader_module_handle_from_nir(shader), .pName = "main", }, .layout = pCreateInfo->layout, }; result = radv_compute_pipeline_create(_device, _cache, &compute_info, pAllocator, pPipeline); if (result != VK_SUCCESS) goto shader_fail; pipeline = radv_pipeline_from_handle(*pPipeline); pipeline->compute.rt_group_handles = calloc(sizeof(*pipeline->compute.rt_group_handles), local_create_info.groupCount); if (!pipeline->compute.rt_group_handles) { result = VK_ERROR_OUT_OF_HOST_MEMORY; goto shader_fail; } pipeline->compute.rt_stack_sizes = stack_sizes; stack_sizes = NULL; for (unsigned i = 0; i < local_create_info.groupCount; ++i) { const VkRayTracingShaderGroupCreateInfoKHR *group_info = &local_create_info.pGroups[i]; switch (group_info->type) { case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: if (group_info->generalShader != VK_SHADER_UNUSED_KHR) pipeline->compute.rt_group_handles[i].handles[0] = i + 2; break; case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: if (group_info->intersectionShader != VK_SHADER_UNUSED_KHR) pipeline->compute.rt_group_handles[i].handles[1] = i + 2; FALLTHROUGH; case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: if (group_info->closestHitShader != VK_SHADER_UNUSED_KHR) pipeline->compute.rt_group_handles[i].handles[0] = i + 2; if (group_info->anyHitShader != VK_SHADER_UNUSED_KHR) pipeline->compute.rt_group_handles[i].handles[1] = i + 2; break; case VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR: unreachable("VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR"); } } shader_fail: if (result != VK_SUCCESS && pipeline) radv_pipeline_destroy(device, pipeline, pAllocator); ralloc_free(shader); fail: free((void *)local_create_info.pGroups); free((void *)local_create_info.pStages); free(stack_sizes); return result; } VkResult radv_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation, VkPipelineCache pipelineCache, uint32_t count, const VkRayTracingPipelineCreateInfoKHR *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) { VkResult result = VK_SUCCESS; unsigned i = 0; for (; i < count; i++) { VkResult r; r = radv_rt_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, &pPipelines[i]); if (r != VK_SUCCESS) { result = r; pPipelines[i] = VK_NULL_HANDLE; if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT) break; } } for (; i < count; ++i) pPipelines[i] = VK_NULL_HANDLE; return result; }