diff --git a/src/amd/vulkan/bvh/bvh.h b/src/amd/vulkan/bvh/bvh.h index 8dbb2e5f0d8..54e4d424a91 100644 --- a/src/amd/vulkan/bvh/bvh.h +++ b/src/amd/vulkan/bvh/bvh.h @@ -159,7 +159,7 @@ typedef struct radv_gfx12_box_child radv_gfx12_box_child; struct radv_gfx12_box_node { uint32_t internal_base_id; uint32_t primitive_base_id; - uint32_t unused; + uint32_t parent_id; vec3 origin; uint32_t child_count_exponents; uint32_t obb_matrix_index; @@ -169,7 +169,7 @@ struct radv_gfx12_box_node { struct radv_gfx12_instance_node { mat3x4 wto_matrix; uint64_t pointer_flags_bvh_addr; - uint32_t unused; + uint32_t parent_id; uint32_t cull_mask_user_data; vec3 origin; uint32_t child_count_exponents; diff --git a/src/amd/vulkan/bvh/encode.h b/src/amd/vulkan/bvh/encode.h index c234950762f..f72a1e63e3b 100644 --- a/src/amd/vulkan/bvh/encode.h +++ b/src/amd/vulkan/bvh/encode.h @@ -300,7 +300,7 @@ radv_encode_aabb_gfx12(VOID_REF dst, vk_ir_aabb_node src) /* Writes both the HW node and user data. */ void -radv_encode_instance_gfx12(VOID_REF dst, vk_ir_instance_node src) +radv_encode_instance_gfx12(VOID_REF dst, vk_ir_instance_node src, uint32_t parent_id) { bit_writer child_writer; bit_writer_init(child_writer, dst); @@ -330,7 +330,7 @@ radv_encode_instance_gfx12(VOID_REF dst, vk_ir_instance_node src) bvh_addr |= radv_encode_blas_pointer_flags(flags, blas_header.geometry_type); bit_writer_write(child_writer, uint32_t(bvh_addr & 0xffffffff), 32); bit_writer_write(child_writer, uint32_t(bvh_addr >> 32), 32); - bit_writer_write(child_writer, src.custom_instance_and_mask & 0xffffff, 32); + bit_writer_write(child_writer, parent_id, 32); bit_writer_write(child_writer, src.sbt_offset_and_flags & 0xffffff, 24); bit_writer_write(child_writer, src.custom_instance_and_mask >> 24, 8); diff --git a/src/amd/vulkan/bvh/encode_gfx12.comp b/src/amd/vulkan/bvh/encode_gfx12.comp index ce9d108c2dd..d16294b3fe8 100644 --- a/src/amd/vulkan/bvh/encode_gfx12.comp +++ b/src/amd/vulkan/bvh/encode_gfx12.comp @@ -38,13 +38,6 @@ layout(push_constant) uniform CONSTS encode_gfx12_args args; }; -void -set_parent(uint32_t child, uint32_t parent) -{ - uint64_t addr = args.output_base + args.output_bvh_offset - child / 16 * 4 - 4; - DEREF(REF(uint32_t)(addr)) = parent; -} - void encode_gfx12(uint32_t ir_leaf_node_size, REF(vk_ir_box_node) intermediate_internal_nodes, uint32_t node_index) { @@ -197,6 +190,8 @@ encode_gfx12(uint32_t ir_leaf_node_size, REF(vk_ir_box_node) intermediate_intern if (cluster.invocation_index == 0) { DEREF(dst).internal_base_id = pack_node_id(dst_internal_offset, 0); DEREF(dst).primitive_base_id = pack_node_id(dst_leaf_offset, 0); + if (is_root_node) + DEREF(dst).parent_id = RADV_BVH_INVALID_NODE; DEREF(dst).origin = origin; DEREF(dst).child_count_exponents = extent_exponents.x | (extent_exponents.y << 8) | (extent_exponents.z << 16) | ((valid_child_count - 1) << 28); @@ -207,6 +202,8 @@ encode_gfx12(uint32_t ir_leaf_node_size, REF(vk_ir_box_node) intermediate_intern uint32_t type = ir_id_to_type(child); uint32_t offset = ir_id_to_offset(child); + VOID_REF dst_child_addr = args.output_base + args.output_bvh_offset + dst_offset; + uint32_t child_node_size_128b = 1; uint32_t encoded_type = 0; uint32_t cull_mask = 0xff; @@ -215,6 +212,9 @@ encode_gfx12(uint32_t ir_leaf_node_size, REF(vk_ir_box_node) intermediate_intern encoded_type = 5; REF(vk_ir_box_node) child_node = REF(vk_ir_box_node) OFFSET(args.intermediate_bvh, offset); cull_flags = DEREF(child_node).flags & 0x3; + + REF(radv_gfx12_box_node) child_box = REF(radv_gfx12_box_node)(dst_child_addr); + DEREF(child_box).parent_id = node_id; } else { if (VK_BUILD_FLAG(RADV_BUILD_FLAG_WRITE_LEAF_NODE_OFFSETS)) { /* Write leaf node offset. */ @@ -233,8 +233,6 @@ encode_gfx12(uint32_t ir_leaf_node_size, REF(vk_ir_box_node) intermediate_intern } } - VOID_REF dst_leaf_addr = args.output_base + args.output_bvh_offset + dst_offset; - switch (args.geometry_type) { case VK_GEOMETRY_TYPE_TRIANGLES_KHR: { vk_ir_triangle_node src_node0 = DEREF(REF(vk_ir_triangle_node)(OFFSET(args.intermediate_bvh, offset))); @@ -249,15 +247,15 @@ encode_gfx12(uint32_t ir_leaf_node_size, REF(vk_ir_box_node) intermediate_intern opaque = (src_node1.geometry_id_and_flags & VK_GEOMETRY_OPAQUE) != 0; cull_flags &= opaque ? VK_BVH_BOX_FLAG_ONLY_OPAQUE : VK_BVH_BOX_FLAG_NO_OPAQUE; - radv_encode_triangle_gfx12(dst_leaf_addr, src_node0, src_node1); + radv_encode_triangle_gfx12(dst_child_addr, src_node0, src_node1); } else { - radv_encode_triangle_gfx12(dst_leaf_addr, src_node0); + radv_encode_triangle_gfx12(dst_child_addr, src_node0); } break; } case VK_GEOMETRY_TYPE_AABBS_KHR: { vk_ir_aabb_node src_node = DEREF(REF(vk_ir_aabb_node)(OFFSET(args.intermediate_bvh, offset))); - radv_encode_aabb_gfx12(dst_leaf_addr, src_node); + radv_encode_aabb_gfx12(dst_child_addr, src_node); bool opaque = (src_node.geometry_id_and_flags & VK_GEOMETRY_OPAQUE) != 0; cull_flags = opaque ? VK_BVH_BOX_FLAG_ONLY_OPAQUE : VK_BVH_BOX_FLAG_NO_OPAQUE; @@ -269,7 +267,7 @@ encode_gfx12(uint32_t ir_leaf_node_size, REF(vk_ir_box_node) intermediate_intern child_node_size_128b = 2; vk_ir_instance_node src_node = DEREF(REF(vk_ir_instance_node)(OFFSET(args.intermediate_bvh, offset))); - radv_encode_instance_gfx12(dst_leaf_addr, src_node); + radv_encode_instance_gfx12(dst_child_addr, src_node, node_id); cull_mask = src_node.custom_instance_and_mask >> 24; cull_flags = src_node.root_flags & 0x3; @@ -300,8 +298,6 @@ encode_gfx12(uint32_t ir_leaf_node_size, REF(vk_ir_box_node) intermediate_intern (min(uint32_t(ceil((child_aabb.max.z - origin.z) / extent.z * float(0x1000))) - 1, 0xfff) << 12) | (encoded_type << 24) | (child_node_size_128b << 28); DEREF(dst).children[child_index] = box_child; - - set_parent(pack_node_id(dst_offset, encoded_type), node_id); } else { child_index = bitCount(radv_ballot(cluster, true) & ((1u << cluster.invocation_index) - 1)) + valid_child_count; @@ -322,8 +318,6 @@ encode_gfx12(uint32_t ir_leaf_node_size, REF(vk_ir_box_node) intermediate_intern REF(radv_accel_struct_header) header = REF(radv_accel_struct_header)(args.output_base); DEREF(header).aabb = src.base.aabb; DEREF(header).bvh_offset = args.output_bvh_offset; - - set_parent(RADV_BVH_ROOT_NODE, RADV_BVH_INVALID_NODE); } } diff --git a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c index d75cdb8c78b..2b00d269c8c 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c +++ b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c @@ -79,6 +79,7 @@ enum radv_ray_query_field { radv_ray_query_trav_stack_low_watermark, radv_ray_query_trav_current_node, radv_ray_query_trav_previous_node, + radv_ray_query_trav_parent_node, radv_ray_query_trav_instance_top_node, radv_ray_query_trav_instance_bottom_node, radv_ray_query_trav_second_iteration, @@ -117,6 +118,7 @@ radv_get_ray_query_type() FIELD(trav_stack_low_watermark, glsl_uint_type()); FIELD(trav_current_node, glsl_uint_type()); FIELD(trav_previous_node, glsl_uint_type()); + FIELD(trav_parent_node, glsl_uint_type()); FIELD(trav_instance_top_node, glsl_uint_type()); FIELD(trav_instance_bottom_node, glsl_uint_type()); FIELD(trav_second_iteration, glsl_bool_type()); @@ -319,6 +321,7 @@ lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query rq_store(b, rq, trav_current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE)); rq_store(b, rq, trav_previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE)); + rq_store(b, rq, trav_parent_node, nir_imm_int(b, RADV_BVH_INVALID_NODE)); rq_store(b, rq, trav_instance_top_node, nir_imm_int(b, RADV_BVH_INVALID_NODE)); rq_store(b, rq, trav_instance_bottom_node, nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT)); rq_store(b, rq, trav_second_iteration, nir_imm_false(b)); @@ -525,6 +528,7 @@ lower_rq_proceed(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_va .stack_low_watermark = rq_deref(b, rq, trav_stack_low_watermark), .current_node = rq_deref(b, rq, trav_current_node), .previous_node = rq_deref(b, rq, trav_previous_node), + .parent_node = rq_deref(b, rq, trav_parent_node), .instance_top_node = rq_deref(b, rq, trav_instance_top_node), .instance_bottom_node = rq_deref(b, rq, trav_instance_bottom_node), .second_iteration = rq_deref(b, rq, trav_second_iteration), diff --git a/src/amd/vulkan/nir/radv_nir_rt_common.c b/src/amd/vulkan/nir/radv_nir_rt_common.c index 549a6d8b2e9..ddd8caa5509 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_common.c +++ b/src/amd/vulkan/nir/radv_nir_rt_common.c @@ -1196,9 +1196,29 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const nir_push_if(b, overflow_cond); { nir_def *prev = nir_load_deref(b, args->vars.previous_node); - nir_def *bvh_addr = build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true); - nir_def *parent = fetch_parent_node(device, b, bvh_addr, prev); + nir_def *loaded_parent_id; + nir_def *primitive_parent_id; + nir_push_if(b, nir_test_mask(b, prev, BITFIELD64_BIT(ffs(radv_bvh_node_box16) - 1))); + { + nir_def *is_instance = nir_test_mask(b, prev, BITFIELD64_BIT(ffs(radv_bvh_node_instance) - 1)); + nir_def *field_offset = nir_bcsel( + b, is_instance, + nir_imm_int( + b, (int32_t)offsetof(struct radv_gfx12_instance_node, parent_id) - (radv_bvh_node_instance << 3)), + nir_imm_int(b, + (int32_t)offsetof(struct radv_gfx12_box_node, parent_id) - (radv_bvh_node_box32 << 3))); + nir_def *offset = nir_iadd(b, nir_ishl_imm(b, prev, 3), field_offset); + nir_def *bvh_addr = build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true); + loaded_parent_id = nir_build_load_global(b, 1, 32, nir_iadd(b, bvh_addr, nir_u2u64(b, offset))); + } + nir_push_else(b, NULL); + { + primitive_parent_id = nir_load_deref(b, args->vars.parent_node); + } + nir_pop_if(b, NULL); + nir_def *parent = nir_if_phi(b, loaded_parent_id, primitive_parent_id); + nir_push_if(b, nir_ieq_imm(b, parent, RADV_BVH_INVALID_NODE)); { nir_store_var(b, incomplete, nir_imm_false(b), 0x1); @@ -1299,6 +1319,8 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const } nir_push_else(b, NULL); { + nir_store_deref(b, args->vars.parent_node, bvh_node, 0x1); + /* box */ if (args->use_bvh_stack_rtn) { nir_store_var(b, last_visited_node, prev_node, 0x1); diff --git a/src/amd/vulkan/nir/radv_nir_rt_common.h b/src/amd/vulkan/nir/radv_nir_rt_common.h index 08c4a4272fc..fa1cbf76480 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_common.h +++ b/src/amd/vulkan/nir/radv_nir_rt_common.h @@ -104,6 +104,8 @@ struct radv_ray_traversal_vars { */ nir_deref_instr *previous_node; + nir_deref_instr *parent_node; + /* When entering an instance these are the instance node and the root node of the BLAS */ nir_deref_instr *instance_top_node; nir_deref_instr *instance_bottom_node; diff --git a/src/amd/vulkan/nir/radv_nir_rt_shader.c b/src/amd/vulkan/nir/radv_nir_rt_shader.c index 7d68408544f..255df4e3f5d 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_shader.c +++ b/src/amd/vulkan/nir/radv_nir_rt_shader.c @@ -1224,6 +1224,7 @@ struct rt_traversal_vars { nir_variable *stack_low_watermark; nir_variable *current_node; nir_variable *previous_node; + nir_variable *parent_node; nir_variable *instance_top_node; nir_variable *instance_bottom_node; nir_variable *second_iteration; @@ -1249,6 +1250,7 @@ init_traversal_vars(nir_builder *b) nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_stack_low_watermark"); ret.current_node = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "current_node;"); ret.previous_node = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "previous_node"); + ret.parent_node = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "parent_node"); ret.instance_top_node = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "instance_top_node"); ret.instance_bottom_node = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "instance_bottom_node"); @@ -1578,6 +1580,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin nir_store_var(b, trav_vars.stack, stack_idx, 1); nir_store_var(b, trav_vars.stack_low_watermark, nir_load_var(b, trav_vars.stack), 1); nir_store_var(b, trav_vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1); + nir_store_var(b, trav_vars.parent_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1); nir_store_var(b, trav_vars.instance_top_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1); nir_store_var(b, trav_vars.instance_bottom_node, nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 0x1); nir_store_var(b, trav_vars.second_iteration, nir_imm_false(b), 0x1); @@ -1595,6 +1598,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin .stack_low_watermark = nir_build_deref_var(b, trav_vars.stack_low_watermark), .current_node = nir_build_deref_var(b, trav_vars.current_node), .previous_node = nir_build_deref_var(b, trav_vars.previous_node), + .parent_node = nir_build_deref_var(b, trav_vars.parent_node), .instance_top_node = nir_build_deref_var(b, trav_vars.instance_top_node), .instance_bottom_node = nir_build_deref_var(b, trav_vars.instance_bottom_node), .second_iteration = nir_build_deref_var(b, trav_vars.second_iteration), diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c index 7c178670876..e498d1c9c02 100644 --- a/src/amd/vulkan/radv_acceleration_structure.c +++ b/src/amd/vulkan/radv_acceleration_structure.c @@ -84,7 +84,6 @@ radv_get_acceleration_structure_layout(struct radv_device *device, VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info); uint32_t bvh_leaf_size; - uint32_t bvh_node_size_gcd; if (radv_use_bvh8(pdev)) { switch (geometry_type) { case VK_GEOMETRY_TYPE_TRIANGLES_KHR: @@ -99,7 +98,6 @@ radv_get_acceleration_structure_layout(struct radv_device *device, default: UNREACHABLE("Unknown VkGeometryTypeKHR"); } - bvh_node_size_gcd = RADV_GFX12_BVH_NODE_SIZE; } else { switch (geometry_type) { case VK_GEOMETRY_TYPE_TRIANGLES_KHR: @@ -114,7 +112,6 @@ radv_get_acceleration_structure_layout(struct radv_device *device, default: UNREACHABLE("Unknown VkGeometryTypeKHR"); } - bvh_node_size_gcd = 64; } uint32_t internal_node_size = @@ -140,7 +137,8 @@ radv_get_acceleration_structure_layout(struct radv_device *device, /* Parent links, which have to go directly before bvh_offset as we index them using negative * offsets from there. */ - offset += bvh_size / bvh_node_size_gcd * 4; + if (!radv_use_bvh8(pdev)) + offset += bvh_size / 64 * 4; /* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */ offset = ALIGN(offset, 64); diff --git a/src/amd/vulkan/radv_rra_gfx12.c b/src/amd/vulkan/radv_rra_gfx12.c index 2daaa867f0c..2349ae382e7 100644 --- a/src/amd/vulkan/radv_rra_gfx12.c +++ b/src/amd/vulkan/radv_rra_gfx12.c @@ -202,7 +202,7 @@ rra_transcode_box8_node(struct rra_transcoding_context *ctx, const struct radv_g memcpy(dst, src, sizeof(struct radv_gfx12_box_node)); dst->internal_base_id = ctx->dst_internal_offset >> 3; dst->primitive_base_id = ctx->dst_leaf_offset >> 3; - dst->unused = parent_id; + dst->parent_id = parent_id; uint32_t valid_child_count_minus_one = dst->child_count_exponents >> 28; if (valid_child_count_minus_one == 0xf) @@ -279,7 +279,7 @@ rra_transcode_node_gfx12(struct rra_transcoding_context *ctx, uint32_t parent_id dst->pointer_flags_bvh_addr = dst->pointer_flags_bvh_addr - (user_data->bvh_offset >> 3) + (sizeof(struct rra_accel_struct_metadata) >> 3); - dst->unused = parent_id; + dst->parent_id = parent_id; sideband_data->instance_index = user_data->instance_index; sideband_data->custom_instance_and_flags = user_data->custom_instance;