diff --git a/src/amd/vulkan/radv_nir_lower_ray_queries.c b/src/amd/vulkan/radv_nir_lower_ray_queries.c index 9de26a054af..efb66864d77 100644 --- a/src/amd/vulkan/radv_nir_lower_ray_queries.c +++ b/src/amd/vulkan/radv_nir_lower_ray_queries.c @@ -391,34 +391,26 @@ lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *ins nir_ssa_def *accel_struct = instr->src[1].ssa; - nir_push_if(b, nir_ine_imm(b, accel_struct, 0)); - { - nir_ssa_def *bvh_offset = nir_build_load_global( - b, 1, 32, - nir_iadd_imm(b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)), - .access = ACCESS_NON_WRITEABLE); - nir_ssa_def *bvh_base = nir_iadd(b, accel_struct, nir_u2u64(b, bvh_offset)); - bvh_base = build_addr_to_node(b, bvh_base); + nir_ssa_def *bvh_offset = nir_build_load_global( + b, 1, 32, + nir_iadd_imm(b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)), + .access = ACCESS_NON_WRITEABLE); + nir_ssa_def *bvh_base = nir_iadd(b, accel_struct, nir_u2u64(b, bvh_offset)); + bvh_base = build_addr_to_node(b, bvh_base); - rq_store_var(b, index, vars->root_bvh_base, bvh_base, 0x1); - rq_store_var(b, index, vars->trav.bvh_base, bvh_base, 1); + rq_store_var(b, index, vars->root_bvh_base, bvh_base, 0x1); + rq_store_var(b, index, vars->trav.bvh_base, bvh_base, 1); - if (vars->stack) { - rq_store_var(b, index, vars->trav.stack, nir_imm_int(b, 0), 0x1); - rq_store_var(b, index, vars->trav.stack_low_watermark, nir_imm_int(b, 0), 0x1); - } else { - nir_ssa_def *base_offset = - nir_imul_imm(b, nir_load_local_invocation_index(b), sizeof(uint32_t)); - base_offset = nir_iadd_imm(b, base_offset, vars->shared_base); - rq_store_var(b, index, vars->trav.stack, base_offset, 0x1); - rq_store_var(b, index, vars->trav.stack_low_watermark, base_offset, 0x1); - } + if (vars->stack) { + rq_store_var(b, index, vars->trav.stack, nir_imm_int(b, 0), 0x1); + rq_store_var(b, index, vars->trav.stack_low_watermark, nir_imm_int(b, 0), 0x1); + } else { + nir_ssa_def *base_offset = + nir_imul_imm(b, nir_load_local_invocation_index(b), sizeof(uint32_t)); + base_offset = nir_iadd_imm(b, base_offset, vars->shared_base); + rq_store_var(b, index, vars->trav.stack, base_offset, 0x1); + rq_store_var(b, index, vars->trav.stack_low_watermark, base_offset, 0x1); } - nir_push_else(b, NULL); - { - rq_store_var(b, index, vars->root_bvh_base, nir_imm_int64(b, 0), 0x1); - } - nir_pop_if(b, NULL); rq_store_var(b, index, vars->trav.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), 0x1); rq_store_var(b, index, vars->trav.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1); diff --git a/src/amd/vulkan/radv_rt_common.c b/src/amd/vulkan/radv_rt_common.c index 103ae5e1763..a9217f35ee9 100644 --- a/src/amd/vulkan/radv_rt_common.c +++ b/src/amd/vulkan/radv_rt_common.c @@ -519,230 +519,169 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args) { nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete"); - nir_store_var(b, incomplete, nir_ine_imm(b, args->root_bvh_base, 0), 0x1); + nir_store_var(b, incomplete, nir_imm_true(b), 0x1); - nir_push_if(b, nir_load_var(b, incomplete)); + nir_ssa_def *desc = create_bvh_descriptor(b); + nir_ssa_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0); + + struct radv_ray_flags ray_flags = { + .force_opaque = nir_test_mask(b, args->flags, SpvRayFlagsOpaqueKHRMask), + .force_not_opaque = nir_test_mask(b, args->flags, SpvRayFlagsNoOpaqueKHRMask), + .terminate_on_first_hit = + nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask), + .no_cull_front = nir_ieq_imm( + b, nir_iand_imm(b, args->flags, SpvRayFlagsCullFrontFacingTrianglesKHRMask), 0), + .no_cull_back = + nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullBackFacingTrianglesKHRMask), 0), + .no_cull_opaque = + nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullOpaqueKHRMask), 0), + .no_cull_no_opaque = + nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullNoOpaqueKHRMask), 0), + .no_skip_triangles = + nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipTrianglesKHRMask), 0), + .no_skip_aabbs = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipAABBsKHRMask), 0), + }; + nir_push_loop(b); { - nir_ssa_def *desc = create_bvh_descriptor(b); - nir_ssa_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0); - - struct radv_ray_flags ray_flags = { - .force_opaque = nir_test_mask(b, args->flags, SpvRayFlagsOpaqueKHRMask), - .force_not_opaque = nir_test_mask(b, args->flags, SpvRayFlagsNoOpaqueKHRMask), - .terminate_on_first_hit = - nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask), - .no_cull_front = nir_ieq_imm( - b, nir_iand_imm(b, args->flags, SpvRayFlagsCullFrontFacingTrianglesKHRMask), 0), - .no_cull_back = nir_ieq_imm( - b, nir_iand_imm(b, args->flags, SpvRayFlagsCullBackFacingTrianglesKHRMask), 0), - .no_cull_opaque = - nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullOpaqueKHRMask), 0), - .no_cull_no_opaque = - nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullNoOpaqueKHRMask), 0), - .no_skip_triangles = - nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipTrianglesKHRMask), 0), - .no_skip_aabbs = - nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipAABBsKHRMask), 0), - }; - nir_push_loop(b); + nir_push_if( + b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), RADV_BVH_INVALID_NODE)); { - nir_push_if( - b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), RADV_BVH_INVALID_NODE)); + /* Early exit if we never overflowed the stack, to avoid having to backtrack to + * the root for no reason. */ + nir_push_if(b, nir_ilt(b, nir_load_deref(b, args->vars.stack), + nir_imm_int(b, args->stack_base + args->stack_stride))); { - /* Early exit if we never overflowed the stack, to avoid having to backtrack to - * the root for no reason. */ - nir_push_if(b, nir_ilt(b, nir_load_deref(b, args->vars.stack), - nir_imm_int(b, args->stack_base + args->stack_stride))); + nir_store_var(b, incomplete, nir_imm_bool(b, false), 0x1); + nir_jump(b, nir_jump_break); + } + nir_pop_if(b, NULL); + + nir_ssa_def *stack_instance_exit = nir_ige(b, nir_load_deref(b, args->vars.top_stack), + nir_load_deref(b, args->vars.stack)); + nir_ssa_def *root_instance_exit = + nir_ieq(b, nir_load_deref(b, args->vars.previous_node), + nir_load_deref(b, args->vars.instance_bottom_node)); + nir_if *instance_exit = + nir_push_if(b, nir_ior(b, stack_instance_exit, root_instance_exit)); + instance_exit->control = nir_selection_control_dont_flatten; + { + nir_store_deref(b, args->vars.top_stack, nir_imm_int(b, -1), 1); + nir_store_deref(b, args->vars.previous_node, + nir_load_deref(b, args->vars.instance_top_node), 1); + nir_store_deref(b, args->vars.instance_bottom_node, + nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 1); + + nir_store_deref(b, args->vars.bvh_base, args->root_bvh_base, 1); + nir_store_deref(b, args->vars.origin, args->origin, 7); + nir_store_deref(b, args->vars.dir, args->dir, 7); + nir_store_deref(b, args->vars.inv_dir, nir_fdiv(b, vec3ones, args->dir), 7); + } + nir_pop_if(b, NULL); + + nir_push_if(b, nir_ige(b, nir_load_deref(b, args->vars.stack_low_watermark), + nir_load_deref(b, args->vars.stack))); + { + nir_ssa_def *prev = nir_load_deref(b, args->vars.previous_node); + nir_ssa_def *bvh_addr = + build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true); + + nir_ssa_def *parent = fetch_parent_node(b, bvh_addr, prev); + nir_push_if(b, nir_ieq(b, parent, nir_imm_int(b, RADV_BVH_INVALID_NODE))); { nir_store_var(b, incomplete, nir_imm_bool(b, false), 0x1); nir_jump(b, nir_jump_break); } nir_pop_if(b, NULL); - - nir_ssa_def *stack_instance_exit = nir_ige(b, nir_load_deref(b, args->vars.top_stack), - nir_load_deref(b, args->vars.stack)); - nir_ssa_def *root_instance_exit = - nir_ieq(b, nir_load_deref(b, args->vars.previous_node), - nir_load_deref(b, args->vars.instance_bottom_node)); - nir_if *instance_exit = - nir_push_if(b, nir_ior(b, stack_instance_exit, root_instance_exit)); - instance_exit->control = nir_selection_control_dont_flatten; - { - nir_store_deref(b, args->vars.top_stack, nir_imm_int(b, -1), 1); - nir_store_deref(b, args->vars.previous_node, - nir_load_deref(b, args->vars.instance_top_node), 1); - nir_store_deref(b, args->vars.instance_bottom_node, - nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 1); - - nir_store_deref(b, args->vars.bvh_base, args->root_bvh_base, 1); - nir_store_deref(b, args->vars.origin, args->origin, 7); - nir_store_deref(b, args->vars.dir, args->dir, 7); - nir_store_deref(b, args->vars.inv_dir, nir_fdiv(b, vec3ones, args->dir), 7); - } - nir_pop_if(b, NULL); - - nir_push_if(b, nir_ige(b, nir_load_deref(b, args->vars.stack_low_watermark), - nir_load_deref(b, args->vars.stack))); - { - nir_ssa_def *prev = nir_load_deref(b, args->vars.previous_node); - nir_ssa_def *bvh_addr = - build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true); - - nir_ssa_def *parent = fetch_parent_node(b, bvh_addr, prev); - nir_push_if(b, nir_ieq(b, parent, nir_imm_int(b, RADV_BVH_INVALID_NODE))); - { - nir_store_var(b, incomplete, nir_imm_bool(b, false), 0x1); - nir_jump(b, nir_jump_break); - } - nir_pop_if(b, NULL); - nir_store_deref(b, args->vars.current_node, parent, 0x1); - } - nir_push_else(b, NULL); - { - nir_store_deref( - b, args->vars.stack, - nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_stride), 1); - - nir_ssa_def *stack_ptr = - nir_umod(b, nir_load_deref(b, args->vars.stack), - nir_imm_int(b, args->stack_stride * args->stack_entries)); - nir_ssa_def *bvh_node = args->stack_load_cb(b, stack_ptr, args); - nir_store_deref(b, args->vars.current_node, bvh_node, 0x1); - nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), - 0x1); - } - nir_pop_if(b, NULL); + nir_store_deref(b, args->vars.current_node, parent, 0x1); } nir_push_else(b, NULL); { + nir_store_deref( + b, args->vars.stack, + nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_stride), 1); + + nir_ssa_def *stack_ptr = + nir_umod(b, nir_load_deref(b, args->vars.stack), + nir_imm_int(b, args->stack_stride * args->stack_entries)); + nir_ssa_def *bvh_node = args->stack_load_cb(b, stack_ptr, args); + nir_store_deref(b, args->vars.current_node, bvh_node, 0x1); nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1); } nir_pop_if(b, NULL); + } + nir_push_else(b, NULL); + { + nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1); + } + nir_pop_if(b, NULL); - nir_ssa_def *bvh_node = nir_load_deref(b, args->vars.current_node); + nir_ssa_def *bvh_node = nir_load_deref(b, args->vars.current_node); - nir_ssa_def *prev_node = nir_load_deref(b, args->vars.previous_node); - nir_store_deref(b, args->vars.previous_node, bvh_node, 0x1); - nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1); + nir_ssa_def *prev_node = nir_load_deref(b, args->vars.previous_node); + nir_store_deref(b, args->vars.previous_node, bvh_node, 0x1); + nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1); - nir_ssa_def *global_bvh_node = - nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node)); + nir_ssa_def *global_bvh_node = + nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node)); - nir_ssa_def *intrinsic_result = NULL; - if (!radv_emulate_rt(device->physical_device)) { - intrinsic_result = nir_bvh64_intersect_ray_amd( - b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node), - nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin), - nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir)); - } + nir_ssa_def *intrinsic_result = NULL; + if (!radv_emulate_rt(device->physical_device)) { + intrinsic_result = nir_bvh64_intersect_ray_amd( + b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node), nir_load_deref(b, args->vars.tmax), + nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir), + nir_load_deref(b, args->vars.inv_dir)); + } - nir_ssa_def *node_type = nir_iand_imm(b, bvh_node, 7); - nir_push_if(b, nir_uge(b, node_type, nir_imm_int(b, radv_bvh_node_box16))); + nir_ssa_def *node_type = nir_iand_imm(b, bvh_node, 7); + nir_push_if(b, nir_uge(b, node_type, nir_imm_int(b, radv_bvh_node_box16))); + { + nir_push_if(b, nir_uge(b, node_type, nir_imm_int(b, radv_bvh_node_instance))); { - nir_push_if(b, nir_uge(b, node_type, nir_imm_int(b, radv_bvh_node_instance))); + nir_push_if(b, nir_ieq_imm(b, node_type, radv_bvh_node_aabb)); { - nir_push_if(b, nir_ieq_imm(b, node_type, radv_bvh_node_aabb)); - { - insert_traversal_aabb_case(device, b, args, &ray_flags, global_bvh_node); - } - nir_push_else(b, NULL); - { - /* instance */ - nir_ssa_def *instance_node_addr = - build_node_to_addr(device, b, global_bvh_node, false); - nir_ssa_def *instance_data = nir_build_load_global( - b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0); - nir_ssa_def *instance_and_mask = nir_channel(b, instance_data, 2); - nir_ssa_def *instance_mask = nir_ushr_imm(b, instance_and_mask, 24); - - nir_push_if(b, nir_ieq_imm(b, nir_iand(b, instance_mask, args->cull_mask), 0)); - { - nir_jump(b, nir_jump_continue); - } - nir_pop_if(b, NULL); - - nir_ssa_def *wto_matrix[3]; - nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix); - - nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1); - nir_store_deref(b, args->vars.bvh_base, - nir_pack_64_2x32(b, nir_channels(b, instance_data, 0x3)), 1); - - /* Push the instance root node onto the stack */ - nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), - 0x1); - nir_store_deref(b, args->vars.instance_bottom_node, - nir_imm_int(b, RADV_BVH_ROOT_NODE), 1); - nir_store_deref(b, args->vars.instance_top_node, bvh_node, 1); - - /* Transform the ray into object space */ - nir_store_deref(b, args->vars.origin, - nir_build_vec3_mat_mult(b, args->origin, wto_matrix, true), 7); - nir_store_deref(b, args->vars.dir, - nir_build_vec3_mat_mult(b, args->dir, wto_matrix, false), 7); - nir_store_deref(b, args->vars.inv_dir, - nir_fdiv(b, vec3ones, nir_load_deref(b, args->vars.dir)), 7); - - nir_store_deref(b, args->vars.sbt_offset_and_flags, - nir_channel(b, instance_data, 3), 1); - nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1); - } - nir_pop_if(b, NULL); + insert_traversal_aabb_case(device, b, args, &ray_flags, global_bvh_node); } nir_push_else(b, NULL); { - nir_ssa_def *result = intrinsic_result; - if (!result) { - /* If we didn't run the intrinsic cause the hardware didn't support it, - * emulate ray/box intersection here */ - result = intersect_ray_amd_software_box( - device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), - nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir), - nir_load_deref(b, args->vars.inv_dir)); - } + /* instance */ + nir_ssa_def *instance_node_addr = + build_node_to_addr(device, b, global_bvh_node, false); + nir_ssa_def *instance_data = nir_build_load_global( + b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0); + nir_ssa_def *instance_and_mask = nir_channel(b, instance_data, 2); + nir_ssa_def *instance_mask = nir_ushr_imm(b, instance_and_mask, 24); - /* box */ - nir_push_if(b, nir_ieq_imm(b, prev_node, RADV_BVH_INVALID_NODE)); + nir_push_if(b, nir_ieq_imm(b, nir_iand(b, instance_mask, args->cull_mask), 0)); { - nir_ssa_def *new_nodes[4]; - for (unsigned i = 0; i < 4; ++i) - new_nodes[i] = nir_channel(b, result, i); - - for (unsigned i = 1; i < 4; ++i) - nir_push_if(b, nir_ine_imm(b, new_nodes[i], RADV_BVH_INVALID_NODE)); - - for (unsigned i = 4; i-- > 1;) { - nir_ssa_def *stack = nir_load_deref(b, args->vars.stack); - nir_ssa_def *stack_ptr = nir_umod( - b, stack, nir_imm_int(b, args->stack_entries * args->stack_stride)); - args->stack_store_cb(b, stack_ptr, new_nodes[i], args); - nir_store_deref(b, args->vars.stack, - nir_iadd_imm(b, stack, args->stack_stride), 1); - - if (i == 1) { - nir_ssa_def *new_watermark = - nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), - -args->stack_entries * args->stack_stride); - new_watermark = nir_imax( - b, nir_load_deref(b, args->vars.stack_low_watermark), new_watermark); - nir_store_deref(b, args->vars.stack_low_watermark, new_watermark, 0x1); - } - - nir_pop_if(b, NULL); - } - nir_store_deref(b, args->vars.current_node, new_nodes[0], 0x1); - } - nir_push_else(b, NULL); - { - nir_ssa_def *next = nir_imm_int(b, RADV_BVH_INVALID_NODE); - for (unsigned i = 0; i < 3; ++i) { - next = nir_bcsel(b, nir_ieq(b, prev_node, nir_channel(b, result, i)), - nir_channel(b, result, i + 1), next); - } - nir_store_deref(b, args->vars.current_node, next, 0x1); + nir_jump(b, nir_jump_continue); } nir_pop_if(b, NULL); + + nir_ssa_def *wto_matrix[3]; + nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix); + + nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1); + nir_store_deref(b, args->vars.bvh_base, + nir_pack_64_2x32(b, nir_channels(b, instance_data, 0x3)), 1); + + /* Push the instance root node onto the stack */ + nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), 0x1); + nir_store_deref(b, args->vars.instance_bottom_node, + nir_imm_int(b, RADV_BVH_ROOT_NODE), 1); + nir_store_deref(b, args->vars.instance_top_node, bvh_node, 1); + + /* Transform the ray into object space */ + nir_store_deref(b, args->vars.origin, + nir_build_vec3_mat_mult(b, args->origin, wto_matrix, true), 7); + nir_store_deref(b, args->vars.dir, + nir_build_vec3_mat_mult(b, args->dir, wto_matrix, false), 7); + nir_store_deref(b, args->vars.inv_dir, + nir_fdiv(b, vec3ones, nir_load_deref(b, args->vars.dir)), 7); + + nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3), + 1); + nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1); } nir_pop_if(b, NULL); } @@ -751,19 +690,73 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, nir_ssa_def *result = intrinsic_result; if (!result) { /* If we didn't run the intrinsic cause the hardware didn't support it, - * emulate ray/tri intersection here */ - result = intersect_ray_amd_software_tri( + * emulate ray/box intersection here */ + result = intersect_ray_amd_software_box( device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir)); } - insert_traversal_triangle_case(device, b, args, &ray_flags, result, global_bvh_node); + + /* box */ + nir_push_if(b, nir_ieq_imm(b, prev_node, RADV_BVH_INVALID_NODE)); + { + nir_ssa_def *new_nodes[4]; + for (unsigned i = 0; i < 4; ++i) + new_nodes[i] = nir_channel(b, result, i); + + for (unsigned i = 1; i < 4; ++i) + nir_push_if(b, nir_ine_imm(b, new_nodes[i], RADV_BVH_INVALID_NODE)); + + for (unsigned i = 4; i-- > 1;) { + nir_ssa_def *stack = nir_load_deref(b, args->vars.stack); + nir_ssa_def *stack_ptr = + nir_umod(b, stack, nir_imm_int(b, args->stack_entries * args->stack_stride)); + args->stack_store_cb(b, stack_ptr, new_nodes[i], args); + nir_store_deref(b, args->vars.stack, nir_iadd_imm(b, stack, args->stack_stride), + 1); + + if (i == 1) { + nir_ssa_def *new_watermark = + nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), + -args->stack_entries * args->stack_stride); + new_watermark = nir_imax(b, nir_load_deref(b, args->vars.stack_low_watermark), + new_watermark); + nir_store_deref(b, args->vars.stack_low_watermark, new_watermark, 0x1); + } + + nir_pop_if(b, NULL); + } + nir_store_deref(b, args->vars.current_node, new_nodes[0], 0x1); + } + nir_push_else(b, NULL); + { + nir_ssa_def *next = nir_imm_int(b, RADV_BVH_INVALID_NODE); + for (unsigned i = 0; i < 3; ++i) { + next = nir_bcsel(b, nir_ieq(b, prev_node, nir_channel(b, result, i)), + nir_channel(b, result, i + 1), next); + } + nir_store_deref(b, args->vars.current_node, next, 0x1); + } + nir_pop_if(b, NULL); } nir_pop_if(b, NULL); } - nir_pop_loop(b, NULL); + nir_push_else(b, NULL); + { + nir_ssa_def *result = intrinsic_result; + if (!result) { + /* If we didn't run the intrinsic cause the hardware didn't support it, + * emulate ray/tri intersection here */ + result = intersect_ray_amd_software_tri( + device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), + nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir), + nir_load_deref(b, args->vars.inv_dir)); + } + insert_traversal_triangle_case(device, b, args, &ray_flags, result, global_bvh_node); + } + nir_pop_if(b, NULL); } - nir_pop_if(b, NULL); + nir_pop_loop(b, NULL); return nir_load_var(b, incomplete); } diff --git a/src/amd/vulkan/radv_rt_shader.c b/src/amd/vulkan/radv_rt_shader.c index 084869e2f10..90efab76a22 100644 --- a/src/amd/vulkan/radv_rt_shader.c +++ b/src/amd/vulkan/radv_rt_shader.c @@ -1333,87 +1333,82 @@ build_traversal_shader(struct radv_device *device, nir_store_var(&b, trav_vars.hit, nir_imm_false(&b), 1); - nir_push_if(&b, nir_ine_imm(&b, accel_struct, 0)); - { - nir_ssa_def *bvh_offset = nir_build_load_global( - &b, 1, 32, - nir_iadd_imm(&b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)), - .access = ACCESS_NON_WRITEABLE); - nir_ssa_def *root_bvh_base = nir_iadd(&b, accel_struct, nir_u2u64(&b, bvh_offset)); - root_bvh_base = build_addr_to_node(&b, root_bvh_base); + nir_ssa_def *bvh_offset = nir_build_load_global( + &b, 1, 32, + nir_iadd_imm(&b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)), + .access = ACCESS_NON_WRITEABLE); + nir_ssa_def *root_bvh_base = nir_iadd(&b, accel_struct, nir_u2u64(&b, bvh_offset)); + root_bvh_base = build_addr_to_node(&b, root_bvh_base); - nir_store_var(&b, trav_vars.bvh_base, root_bvh_base, 1); + nir_store_var(&b, trav_vars.bvh_base, root_bvh_base, 1); - nir_ssa_def *vec3ones = nir_channels(&b, nir_imm_vec4(&b, 1.0, 1.0, 1.0, 1.0), 0x7); + nir_ssa_def *vec3ones = nir_channels(&b, nir_imm_vec4(&b, 1.0, 1.0, 1.0, 1.0), 0x7); - nir_store_var(&b, trav_vars.origin, nir_load_var(&b, vars.origin), 7); - nir_store_var(&b, trav_vars.dir, nir_load_var(&b, vars.direction), 7); - nir_store_var(&b, trav_vars.inv_dir, nir_fdiv(&b, vec3ones, nir_load_var(&b, trav_vars.dir)), - 7); - nir_store_var(&b, trav_vars.sbt_offset_and_flags, nir_imm_int(&b, 0), 1); - nir_store_var(&b, trav_vars.instance_addr, nir_imm_int64(&b, 0), 1); + nir_store_var(&b, trav_vars.origin, nir_load_var(&b, vars.origin), 7); + nir_store_var(&b, trav_vars.dir, nir_load_var(&b, vars.direction), 7); + nir_store_var(&b, trav_vars.inv_dir, nir_fdiv(&b, vec3ones, nir_load_var(&b, trav_vars.dir)), 7); + nir_store_var(&b, trav_vars.sbt_offset_and_flags, nir_imm_int(&b, 0), 1); + nir_store_var(&b, trav_vars.instance_addr, nir_imm_int64(&b, 0), 1); - nir_store_var(&b, trav_vars.stack, - nir_imul_imm(&b, nir_load_local_invocation_index(&b), sizeof(uint32_t)), 1); - nir_store_var(&b, trav_vars.stack_low_watermark, nir_load_var(&b, trav_vars.stack), 1); - nir_store_var(&b, trav_vars.current_node, nir_imm_int(&b, RADV_BVH_ROOT_NODE), 0x1); - nir_store_var(&b, trav_vars.previous_node, nir_imm_int(&b, RADV_BVH_INVALID_NODE), 0x1); - nir_store_var(&b, trav_vars.instance_top_node, nir_imm_int(&b, RADV_BVH_INVALID_NODE), 0x1); - nir_store_var(&b, trav_vars.instance_bottom_node, nir_imm_int(&b, RADV_BVH_NO_INSTANCE_ROOT), - 0x1); + nir_store_var(&b, trav_vars.stack, + nir_imul_imm(&b, nir_load_local_invocation_index(&b), sizeof(uint32_t)), 1); + nir_store_var(&b, trav_vars.stack_low_watermark, nir_load_var(&b, trav_vars.stack), 1); + nir_store_var(&b, trav_vars.current_node, nir_imm_int(&b, RADV_BVH_ROOT_NODE), 0x1); + nir_store_var(&b, trav_vars.previous_node, nir_imm_int(&b, RADV_BVH_INVALID_NODE), 0x1); + nir_store_var(&b, trav_vars.instance_top_node, nir_imm_int(&b, RADV_BVH_INVALID_NODE), 0x1); + nir_store_var(&b, trav_vars.instance_bottom_node, nir_imm_int(&b, RADV_BVH_NO_INSTANCE_ROOT), + 0x1); - nir_store_var(&b, trav_vars.top_stack, nir_imm_int(&b, -1), 1); + nir_store_var(&b, trav_vars.top_stack, nir_imm_int(&b, -1), 1); - struct radv_ray_traversal_vars trav_vars_args = { - .tmax = nir_build_deref_var(&b, vars.tmax), - .origin = nir_build_deref_var(&b, trav_vars.origin), - .dir = nir_build_deref_var(&b, trav_vars.dir), - .inv_dir = nir_build_deref_var(&b, trav_vars.inv_dir), - .bvh_base = nir_build_deref_var(&b, trav_vars.bvh_base), - .stack = nir_build_deref_var(&b, trav_vars.stack), - .top_stack = nir_build_deref_var(&b, trav_vars.top_stack), - .stack_low_watermark = nir_build_deref_var(&b, trav_vars.stack_low_watermark), - .current_node = nir_build_deref_var(&b, trav_vars.current_node), - .previous_node = nir_build_deref_var(&b, trav_vars.previous_node), - .instance_top_node = nir_build_deref_var(&b, trav_vars.instance_top_node), - .instance_bottom_node = nir_build_deref_var(&b, trav_vars.instance_bottom_node), - .instance_addr = nir_build_deref_var(&b, trav_vars.instance_addr), - .sbt_offset_and_flags = nir_build_deref_var(&b, trav_vars.sbt_offset_and_flags), - }; + struct radv_ray_traversal_vars trav_vars_args = { + .tmax = nir_build_deref_var(&b, vars.tmax), + .origin = nir_build_deref_var(&b, trav_vars.origin), + .dir = nir_build_deref_var(&b, trav_vars.dir), + .inv_dir = nir_build_deref_var(&b, trav_vars.inv_dir), + .bvh_base = nir_build_deref_var(&b, trav_vars.bvh_base), + .stack = nir_build_deref_var(&b, trav_vars.stack), + .top_stack = nir_build_deref_var(&b, trav_vars.top_stack), + .stack_low_watermark = nir_build_deref_var(&b, trav_vars.stack_low_watermark), + .current_node = nir_build_deref_var(&b, trav_vars.current_node), + .previous_node = nir_build_deref_var(&b, trav_vars.previous_node), + .instance_top_node = nir_build_deref_var(&b, trav_vars.instance_top_node), + .instance_bottom_node = nir_build_deref_var(&b, trav_vars.instance_bottom_node), + .instance_addr = nir_build_deref_var(&b, trav_vars.instance_addr), + .sbt_offset_and_flags = nir_build_deref_var(&b, trav_vars.sbt_offset_and_flags), + }; - struct traversal_data data = { - .device = device, - .createInfo = pCreateInfo, - .vars = &vars, - .trav_vars = &trav_vars, - .barycentrics = barycentrics, - }; + struct traversal_data data = { + .device = device, + .createInfo = pCreateInfo, + .vars = &vars, + .trav_vars = &trav_vars, + .barycentrics = barycentrics, + }; - struct radv_ray_traversal_args args = { - .root_bvh_base = root_bvh_base, - .flags = nir_load_var(&b, vars.flags), - .cull_mask = nir_load_var(&b, vars.cull_mask), - .origin = nir_load_var(&b, vars.origin), - .tmin = nir_load_var(&b, vars.tmin), - .dir = nir_load_var(&b, vars.direction), - .vars = trav_vars_args, - .stack_stride = device->physical_device->rt_wave_size * sizeof(uint32_t), - .stack_entries = MAX_STACK_ENTRY_COUNT, - .stack_base = 0, - .stack_store_cb = store_stack_entry, - .stack_load_cb = load_stack_entry, - .aabb_cb = (pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR) - ? NULL - : handle_candidate_aabb, - .triangle_cb = (pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR) - ? NULL - : handle_candidate_triangle, - .data = &data, - }; + struct radv_ray_traversal_args args = { + .root_bvh_base = root_bvh_base, + .flags = nir_load_var(&b, vars.flags), + .cull_mask = nir_load_var(&b, vars.cull_mask), + .origin = nir_load_var(&b, vars.origin), + .tmin = nir_load_var(&b, vars.tmin), + .dir = nir_load_var(&b, vars.direction), + .vars = trav_vars_args, + .stack_stride = device->physical_device->rt_wave_size * sizeof(uint32_t), + .stack_entries = MAX_STACK_ENTRY_COUNT, + .stack_base = 0, + .stack_store_cb = store_stack_entry, + .stack_load_cb = load_stack_entry, + .aabb_cb = (pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR) + ? NULL + : handle_candidate_aabb, + .triangle_cb = (pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR) + ? NULL + : handle_candidate_triangle, + .data = &data, + }; - radv_build_ray_traversal(device, &b, &args); - } - nir_pop_if(&b, NULL); + radv_build_ray_traversal(device, &b, &args); /* Initialize follow-up shader. */ nir_push_if(&b, nir_load_var(&b, trav_vars.hit));