diff --git a/src/intel/dev/intel_debug.c b/src/intel/dev/intel_debug.c index d98ea35ddc3..f9d2770f4c6 100644 --- a/src/intel/dev/intel_debug.c +++ b/src/intel/dev/intel_debug.c @@ -108,6 +108,8 @@ static const struct debug_control_bitset debug_control[] = { OPT1("bvh_tlas_ir_hdr", DEBUG_BVH_TLAS_IR_HDR), OPT1("bvh_blas_ir_as", DEBUG_BVH_BLAS_IR_AS), OPT1("bvh_tlas_ir_as", DEBUG_BVH_TLAS_IR_AS), + OPT1("bvh_pcrel_map", DEBUG_BVH_PCREL_MAP), + OPT1("bvh_update_as", DEBUG_BVH_UPDATE_AS), OPT1("bvh_no_build", DEBUG_BVH_NO_BUILD), OPT1("task", DEBUG_TASK), OPT1("mesh", DEBUG_MESH), diff --git a/src/intel/dev/intel_debug.h b/src/intel/dev/intel_debug.h index 355208dd710..36c8b2b7fae 100644 --- a/src/intel/dev/intel_debug.h +++ b/src/intel/dev/intel_debug.h @@ -94,6 +94,8 @@ enum intel_debug_flag { DEBUG_BVH_TLAS_IR_HDR, DEBUG_BVH_BLAS_IR_AS, DEBUG_BVH_TLAS_IR_AS, + DEBUG_BVH_PCREL_MAP, + DEBUG_BVH_UPDATE_AS, DEBUG_BVH_NO_BUILD, DEBUG_NO_SEND_GATHER, DEBUG_NO_VRT, @@ -134,7 +136,9 @@ extern BITSET_WORD intel_debug[BITSET_WORDS(INTEL_DEBUG_MAX)]; INTEL_DEBUG(DEBUG_BVH_BLAS_IR_HDR) || \ INTEL_DEBUG(DEBUG_BVH_TLAS_IR_HDR) || \ INTEL_DEBUG(DEBUG_BVH_BLAS_IR_AS) || \ - INTEL_DEBUG(DEBUG_BVH_TLAS_IR_AS))) + INTEL_DEBUG(DEBUG_BVH_TLAS_IR_AS) || \ + INTEL_DEBUG(DEBUG_BVH_UPDATE_AS) || \ + INTEL_DEBUG(DEBUG_BVH_PCREL_MAP))) extern uint64_t intel_simd; extern uint32_t intel_debug_bkp_before_draw_count; diff --git a/src/intel/ds/intel_driver_ds.cc b/src/intel/ds/intel_driver_ds.cc index 75438d4d2f4..3b211042b94 100644 --- a/src/intel/ds/intel_driver_ds.cc +++ b/src/intel/ds/intel_driver_ds.cc @@ -502,6 +502,7 @@ CREATE_DUAL_EVENT_CALLBACK(as_morton_sort, INTEL_DS_QUEUE_STAGE_AS) CREATE_DUAL_EVENT_CALLBACK(as_lbvh_build_internal, INTEL_DS_QUEUE_STAGE_AS) CREATE_DUAL_EVENT_CALLBACK(as_ploc_build_internal, INTEL_DS_QUEUE_STAGE_AS) CREATE_DUAL_EVENT_CALLBACK(as_encode, INTEL_DS_QUEUE_STAGE_AS) +CREATE_DUAL_EVENT_CALLBACK(as_update, INTEL_DS_QUEUE_STAGE_AS) CREATE_DUAL_EVENT_CALLBACK(as_copy, INTEL_DS_QUEUE_STAGE_AS) void diff --git a/src/intel/ds/intel_tracepoints.py b/src/intel/ds/intel_tracepoints.py index 1b285db030a..c6bc7958f13 100644 --- a/src/intel/ds/intel_tracepoints.py +++ b/src/intel/ds/intel_tracepoints.py @@ -266,6 +266,11 @@ def define_tracepoints(args): Arg(type='uint32_t', var='key', c_format='%x'), Arg(type='uint32_t', var='n_leaves', c_format='%u'), Arg(type='uint32_t', var='n_ir_leaves', c_format='%u')]) + begin_end_tp('as_update', repeat_last=True, + tp_args=[Arg(type='uint32_t', var='pass', c_format='%u'), + Arg(type='uint32_t', var='key', c_format='%x'), + Arg(type='uint32_t', var='n_leaves', c_format='%u'), + Arg(type='uint32_t', var='n_ir_leaves', c_format='%u')]) begin_end_tp('as_copy', repeat_last=True) begin_end_tp('rays', diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 5bbc06bbdf8..5e448949869 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2517,12 +2517,15 @@ enum anv_object_key_bvh_type { ANV_OBJECT_KEY_BVH_ENCODE = VK_META_OBJECT_KEY_DRIVER_OFFSET, ANV_OBJECT_KEY_BVH_HEADER, ANV_OBJECT_KEY_BVH_COPY, + ANV_OBJECT_KEY_BVH_UPDATE, }; enum bvh_dump_type { BVH_ANV, BVH_IR_HDR, - BVH_IR_AS + BVH_IR_AS, + BVH_ANV_PCREL, + BVH_ANV_UPDATE }; struct anv_bvh_dump { diff --git a/src/intel/vulkan/anv_util.c b/src/intel/vulkan/anv_util.c index e7d83a17795..6122aa29c0c 100644 --- a/src/intel/vulkan/anv_util.c +++ b/src/intel/vulkan/anv_util.c @@ -293,6 +293,12 @@ create_bvh_dump_file(struct anv_bvh_dump *bvh) case BVH_IR_AS: dump_sub_directory = "BVH_IR_AS"; break; + case BVH_ANV_PCREL: + dump_sub_directory = "BVH_ANV_PCREL"; + break; + case BVH_ANV_UPDATE: + dump_sub_directory = "BVH_ANV_UPDATE"; + break; default: UNREACHABLE("invalid dump type"); } diff --git a/src/intel/vulkan/bvh/anv_build_helpers.h b/src/intel/vulkan/bvh/anv_build_helpers.h index f7793179900..dd047f3e0c3 100644 --- a/src/intel/vulkan/bvh/anv_build_helpers.h +++ b/src/intel/vulkan/bvh/anv_build_helpers.h @@ -18,4 +18,45 @@ TYPE(child_data, 1); TYPE(instance_leaf_part0, 8); TYPE(instance_leaf_part1, 8); +#define ULP 1.1920928955078125e-7f + +/* An offset in 64B blocks from args.output_bvh that points to output of + * encoded nodes. Can be a leaf or internal node. + */ +#define BLOCK uint32_t +#define BLOCK_OFFSET(block) (OFFSET(args.output_bvh, ANV_RT_BLOCK_SIZE * block)) + +uint32_t +get_instance_flag(uint32_t src) +{ + return ((src >> 24) & 0xff); +} + +vk_aabb +conservative_aabb(vk_aabb input_aabb) +{ + vk_aabb out_aabb; + + vec3 reduce_value = max(abs(input_aabb.min), abs(input_aabb.max)); + float err = ULP * max(reduce_value.x, max(reduce_value.y, reduce_value.z)); + + out_aabb.min = input_aabb.min - vec3(err); + out_aabb.max = input_aabb.max + vec3(err); + + return out_aabb; +} + +void +aabb_extend(inout vk_aabb v1, vk_aabb v2) +{ + v1.min = min(v1.min, v2.min); + v1.max = max(v1.max, v2.max); +} + +vec3 +aabb_size(vk_aabb input_aabb) +{ + return input_aabb.max - input_aabb.min; +} + #endif diff --git a/src/intel/vulkan/bvh/anv_build_interface.h b/src/intel/vulkan/bvh/anv_build_interface.h index ebca43959f1..850fa942105 100644 --- a/src/intel/vulkan/bvh/anv_build_interface.h +++ b/src/intel/vulkan/bvh/anv_build_interface.h @@ -6,6 +6,8 @@ #ifndef ANV_BVH_BUILD_INTERFACE_H #define ANV_BVH_BUILD_INTERFACE_H +#include "vk_build_interface.h" + #ifdef VULKAN #include "anv_build_helpers.h" #else @@ -15,6 +17,21 @@ #define VOID_REF uint64_t #endif +#define ANV_BUILD_FLAG_WRITE_LOOKUP_MAPS_FOR_UPDATE (1u << (VK_BUILD_FLAG_COUNT + 0)) + +struct update_args { + VOID_REF output_bvh; + REF(uint32_t) internal_ready_count; + REF(vk_aabb) aabb_scratch; + uint32_t leaf_node_count; + uint32_t primitive_count; + uint32_t output_bvh_offset; + VOID_REF parent_child_map; + VOID_REF leaf_block_offset_map; + + vk_bvh_geometry_data geom_data; +}; + struct encode_args { /* Address within the IR BVH, marking the start of leaves/internal nodes. */ VOID_REF intermediate_bvh; @@ -32,6 +49,9 @@ struct encode_args { uint32_t leaf_node_count; uint32_t geometry_type; + + VOID_REF parent_child_map; + VOID_REF leaf_block_offset_map; }; struct header_args { @@ -47,6 +67,11 @@ struct header_args { uint64_t bvh_size; uint8_t is_compacted; + + uint64_t parent_child_map_offset; + uint64_t leaf_block_map_offset; + uint32_t leaf_count; + uint32_t total_nodes; }; #define ANV_COPY_MODE_COPY 0 diff --git a/src/intel/vulkan/bvh/anv_bvh.h b/src/intel/vulkan/bvh/anv_bvh.h index 011b449dfd9..264c5d6eb73 100644 --- a/src/intel/vulkan/bvh/anv_bvh.h +++ b/src/intel/vulkan/bvh/anv_bvh.h @@ -76,7 +76,12 @@ struct anv_accel_struct_header { uint32_t instance_leaves_offset; - uint32_t padding[40]; + uint64_t parent_child_map_offset; + uint64_t leaf_block_map_offset; + uint32_t leaf_count; + uint32_t total_nodes; + + uint32_t padding[34]; }; /* Mixed internal node with type per child */ @@ -132,7 +137,9 @@ struct anv_quad_leaf_node { * Reserved (9-bits) */ uint32_t prim_index1_delta; - float v[4][3]; + float v[3][3]; + /* Second triangle coords */ + float v1[3]; }; struct anv_procedural_leaf_node { @@ -170,6 +177,12 @@ struct anv_internal_node { */ uint8_t node_type; + /* Note: We are going to use this field to track number of children this + * internal node has. + * + * XXX: Keep an eye out on this field for future platforms in case if anything + * changes. + */ uint8_t reserved; /* 2^exp_x is the size of the grid in x dimension */ @@ -325,13 +338,14 @@ struct anv_instance_leaf { | start with root node, | | followed by interleaving | | internal nodes and leaves | -|-------------------------------| -| padding to align to | -| 64 bytes boundary | |-------------------------------| bvh_layout.instance_leaves_offset | For a TLAS, the pointers | | to all anv_instance_leaves | | For a BLAS, nothing here | +|-------------------------------| bvh_layout.parent_child_map_offset +| Parent - child map | +|-------------------------------| bvh_layout.leaf_block_map_offset +| Leaf block offset map | |-------------------------------| | padding to align to | | 64 bytes boundary | bvh_layout.size @@ -344,13 +358,23 @@ struct bvh_layout { */ uint64_t bvh_offset; + /* This tracks pointers to all anv_instance_leaves for BLAS. */ + uint64_t instance_leaves_offset; + + /* This map stores parent BVH offset for each child + * + * Lower 26bits - parent block index + * upper 6bits - parent child slot index + * */ + uint64_t parent_child_map_offset; + + /* This map stores BVH block index for each leaf id (IR ID) */ + uint64_t leaf_block_map_offset; + /* Total size = bvh_offset + leaves + internal_nodes (assuming there's no * internal node collpased) */ uint64_t size; - - /* This tracks pointers to all anv_instance_leaves for BLAS. */ - uint64_t instance_leaves_offset; }; #endif diff --git a/src/intel/vulkan/bvh/encode.comp b/src/intel/vulkan/bvh/encode.comp index a13b97e44f0..a9450468a48 100644 --- a/src/intel/vulkan/bvh/encode.comp +++ b/src/intel/vulkan/bvh/encode.comp @@ -9,8 +9,8 @@ layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; #include "anv_build_helpers.h" #include "anv_build_interface.h" +#include "encode.h" -#define ULP 1.1920928955078125e-7f #define READY_TO_WRITE(offset) ((offset) < VK_NULL_BVH_OFFSET) #define ASSIGNED_NODE_TO_ENCODE (gl_GlobalInvocationID.x < DEREF(args.header).ir_internal_node_count) @@ -24,12 +24,6 @@ layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; #define IR_NODE uint32_t #define NODE_OFFSET(node) (OFFSET(args.intermediate_bvh, ir_id_to_offset(node))) -/* An offset in 64B blocks from args.output_bvh that points to output of - * encoded nodes. Can be a leaf or internal node. - */ -#define BLOCK uint32_t -#define BLOCK_OFFSET(block) (OFFSET(args.output_bvh, ANV_RT_BLOCK_SIZE * block)) - layout(push_constant) uniform CONSTS { encode_args args; }; @@ -41,12 +35,6 @@ debug_dump(uint32_t offset, uint32_t value) DEREF(msg) = value; } -uint32_t -get_instance_flag(uint32_t src) -{ - return ((src >> 24) & 0xff); -} - struct anv_cluster { /* simd lane inside cluster: 0 .. 7 */ uint32_t idx; @@ -77,153 +65,21 @@ encode_leaf_node(uint32_t type, IR_NODE child, uint64_t dst_node, REF(anv_accel_ if (DEBUG_EXIT_EARLY(type)) return; + switch (type) { case vk_ir_node_triangle: { - REF(anv_quad_leaf_node) quad_leaf = REF(anv_quad_leaf_node)(dst_node); - vk_ir_triangle_node src = DEREF(REF(vk_ir_triangle_node)(src_node)); - uint32_t geometry_id_and_flags = src.geometry_id_and_flags & 0xffffff; - - /* sub-type (4-bit) encoded on 24-bit index */ - geometry_id_and_flags |= (ANV_SUB_TYPE_QUAD & 0xF) << 24; - - if ((src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE) != 0) { - /* Geometry opqaue (1-bit) is encoded on 30-bit index */ - geometry_id_and_flags |= (ANV_GEOMETRY_FLAG_OPAQUE << 30); - } - - /* Disable the second triangle */ - uint32_t prim_index1_delta = 0; - /* For now, blockIncr are all 1, so every quad leaf has its "last" bit set. */ - prim_index1_delta |= (1 << 22); - - DEREF(quad_leaf).prim_index1_delta = prim_index1_delta; - DEREF(quad_leaf).prim_index0 = src.triangle_id; - DEREF(quad_leaf).leaf_desc.geometry_id_and_flags = geometry_id_and_flags; - - /* shaderIndex is typically set to match geomIndex - * Geom mask is default to 0xFF - */ - DEREF(quad_leaf).leaf_desc.shader_index_and_geom_mask = 0xFF000000 | (geometry_id_and_flags & 0xffffff); - - /* Setup single triangle */ - for (uint32_t i = 0; i < 3; i++) { - for (uint32_t j = 0; j < 3; j++) { - DEREF(quad_leaf).v[i][j] = src.coords[i][j]; - } - } + anv_encode_triangle(dst_node, src); break; } case vk_ir_node_aabb: { - REF(anv_procedural_leaf_node) aabb_leaf = REF(anv_procedural_leaf_node)(dst_node); - vk_ir_aabb_node src = DEREF(REF(vk_ir_aabb_node)(src_node)); - uint32_t geometry_id_and_flags = src.geometry_id_and_flags & 0xffffff; - - /* sub-type (4-bit) encoded on 24-bit index */ - geometry_id_and_flags |= (ANV_SUB_TYPE_PROCEDURAL & 0xF) << 24; - - if ((src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE) != 0) { - geometry_id_and_flags |= (ANV_GEOMETRY_FLAG_OPAQUE << 30); - } - - DEREF(aabb_leaf).leaf_desc.geometry_id_and_flags = geometry_id_and_flags; - - /* shaderIndex is typically set to match geomIndex - * Geom mask is default to 0xFF - */ - DEREF(aabb_leaf).leaf_desc.shader_index_and_geom_mask = 0xFF000000 | (geometry_id_and_flags & 0xffffff); - - /* num primitives = 1 */ - uint32_t dw1 = 1; - /* "last" has only 1 bit, and it is set. */ - dw1 |= (1 << 31); - - DEREF(aabb_leaf).DW1 = dw1; - DEREF(aabb_leaf).primIndex[0] = src.primitive_id; + anv_encode_aabb(dst_node, src); break; } case vk_ir_node_instance: { vk_ir_instance_node src = DEREF(REF(vk_ir_instance_node)(src_node)); - - REF(anv_instance_leaf) dst_instance = REF(anv_instance_leaf)(dst_node); - REF(anv_accel_struct_header) blas_header = REF(anv_accel_struct_header)(src.base_ptr); - uint64_t start_node_ptr = uint64_t(src.base_ptr) + args.output_bvh_offset; - -#if GFX_VERx10 >= 300 - DEREF(dst_instance).part0.QW_startNodePtr = start_node_ptr; - uint32_t instance_contribution_and_geom_mask = 0; - instance_contribution_and_geom_mask |= src.sbt_offset_and_flags & 0xffffff; - instance_contribution_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000); - DEREF(dst_instance).part0.DW0 = instance_contribution_and_geom_mask; - - uint32_t inst_flags_and_the_rest = 0; - inst_flags_and_the_rest |= get_instance_flag(src.sbt_offset_and_flags); - inst_flags_and_the_rest |= - ((get_instance_flag(src.sbt_offset_and_flags) & ANV_INSTANCE_FLAG_FORCE_OPAQUE) != 0 ? - ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30; - - DEREF(dst_instance).part0.DW1 = inst_flags_and_the_rest; - -#else - uint32_t shader_index_and_geom_mask = 0; - shader_index_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000); - DEREF(dst_instance).part0.DW0 = shader_index_and_geom_mask; - - uint32_t instance_contribution_and_geom_flags = 0; - instance_contribution_and_geom_flags |= src.sbt_offset_and_flags & 0xffffff; - instance_contribution_and_geom_flags |= - ((get_instance_flag(src.sbt_offset_and_flags) & ANV_INSTANCE_FLAG_FORCE_OPAQUE) != 0 ? - ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30; - DEREF(dst_instance).part0.DW1 = instance_contribution_and_geom_flags; - - DEREF(dst_instance).part0.QW_startNodePtr = - (start_node_ptr & ((1ul << 48) - 1)) | - (uint64_t(get_instance_flag(src.sbt_offset_and_flags)) << 48); -#endif - - mat4 transform = mat4(src.otw_matrix); - - mat4 inv_transform = transpose(inverse(transpose(transform))); - mat3x4 wto_matrix = mat3x4(inv_transform); - mat3x4 otw_matrix = mat3x4(transform); - - /* Arrange WTO transformation matrix in column-major order */ - DEREF(dst_instance).part0.world2obj_vx_x = wto_matrix[0][0]; - DEREF(dst_instance).part0.world2obj_vx_y = wto_matrix[1][0]; - DEREF(dst_instance).part0.world2obj_vx_z = wto_matrix[2][0]; - DEREF(dst_instance).part0.obj2world_p_x = otw_matrix[0][3]; - - DEREF(dst_instance).part0.world2obj_vy_x = wto_matrix[0][1]; - DEREF(dst_instance).part0.world2obj_vy_y = wto_matrix[1][1]; - DEREF(dst_instance).part0.world2obj_vy_z = wto_matrix[2][1]; - DEREF(dst_instance).part0.obj2world_p_y = otw_matrix[1][3]; - - DEREF(dst_instance).part0.world2obj_vz_x = wto_matrix[0][2]; - DEREF(dst_instance).part0.world2obj_vz_y = wto_matrix[1][2]; - DEREF(dst_instance).part0.world2obj_vz_z = wto_matrix[2][2]; - DEREF(dst_instance).part0.obj2world_p_z = otw_matrix[2][3]; - - /* Arrange OTW transformation matrix in column-major order */ - DEREF(dst_instance).part1.obj2world_vx_x = otw_matrix[0][0]; - DEREF(dst_instance).part1.obj2world_vx_y = otw_matrix[1][0]; - DEREF(dst_instance).part1.obj2world_vx_z = otw_matrix[2][0]; - DEREF(dst_instance).part1.world2obj_p_x = wto_matrix[0][3]; - - DEREF(dst_instance).part1.obj2world_vy_x = otw_matrix[0][1]; - DEREF(dst_instance).part1.obj2world_vy_y = otw_matrix[1][1]; - DEREF(dst_instance).part1.obj2world_vy_z = otw_matrix[2][1]; - DEREF(dst_instance).part1.world2obj_p_y = wto_matrix[1][3]; - - DEREF(dst_instance).part1.obj2world_vz_x = otw_matrix[0][2]; - DEREF(dst_instance).part1.obj2world_vz_y = otw_matrix[1][2]; - DEREF(dst_instance).part1.obj2world_vz_z = otw_matrix[2][2]; - DEREF(dst_instance).part1.world2obj_p_z = wto_matrix[2][3]; - - DEREF(dst_instance).part1.bvh_ptr = src.base_ptr; - DEREF(dst_instance).part1.instance_index = src.instance_id; - DEREF(dst_instance).part1.instance_id = src.custom_instance_and_mask & 0xffffff; - + anv_encode_instance(dst_node, src); uint64_t instance_leaves_addr_base = args.instance_leaves_addr; uint64_t slot = ir_id_to_offset(child) / SIZEOF(vk_ir_instance_node); DEREF(INDEX(uint64_t, instance_leaves_addr_base, slot)) = dst_node; @@ -232,33 +88,6 @@ encode_leaf_node(uint32_t type, IR_NODE child, uint64_t dst_node, REF(anv_accel_ } } -vk_aabb -conservative_aabb(vk_aabb input_aabb) -{ - vk_aabb out_aabb; - - vec3 reduce_value = max(abs(input_aabb.min), abs(input_aabb.max)); - float err = ULP * max(reduce_value.x, max(reduce_value.y, reduce_value.z)); - - out_aabb.min = input_aabb.min - vec3(err); - out_aabb.max = input_aabb.max + vec3(err); - - return out_aabb; -} - -void -aabb_extend(inout vk_aabb v1, vk_aabb v2) -{ - v1.min = min(v1.min, v2.min); - v1.max = max(v1.max, v2.max); -} - -vec3 -aabb_size(vk_aabb input_aabb) -{ - return input_aabb.max - input_aabb.min; -} - /* Determine the node_type based on type of its children. * If children are all the same leaves, this internal node is a fat leaf; * Otherwise, it's a mixed node. @@ -353,6 +182,9 @@ encode_internal_node(uint32_t child, uint32_t child_block_offset_from_internal_n DEREF(dst_node).exp_z = exp_i8[2]; DEREF(dst_node).node_mask = uint8_t(0xff); DEREF(dst_node).node_type = node_type; + + /* Using reserved field to track number of children. */ + DEREF(dst_node).reserved = uint8_t(child_count); } child_aabb = conservative_aabb(child_aabb); @@ -530,6 +362,9 @@ main() /* Tracks BLOCK where the next children should be encoded. */ DEREF(args.header).dst_node_offset = 1; DEREF(header).instance_count = 0; + if (VK_BUILD_FLAG(ANV_BUILD_FLAG_WRITE_LOOKUP_MAPS_FOR_UPDATE)) { + DEREF(INDEX(uint32_t, args.parent_child_map, 0)) = VK_NULL_BVH_OFFSET; + } } IR_NODE children[6] = {VK_BVH_INVALID_NODE, VK_BVH_INVALID_NODE, @@ -612,7 +447,6 @@ main() REF(vk_ir_box_node)NODE_OFFSET(children[i]); DEREF(child_node).bvh_offset = child_offset; } - child_offset += (type == vk_ir_node_instance) ? 2 : 1; } @@ -661,9 +495,30 @@ main() child_aabb = DEREF(REF(vk_ir_node)NODE_OFFSET(child)).aabb; uint32_t type = ir_id_to_type(child); - if (child != VK_BVH_INVALID_NODE && type != vk_ir_node_internal) - encode_leaf_node(type, child, BLOCK_OFFSET(child_block), - header); + if (VK_BUILD_FLAG(ANV_BUILD_FLAG_WRITE_LOOKUP_MAPS_FOR_UPDATE)) { + if (child != VK_BVH_INVALID_NODE && + (type == vk_ir_node_triangle || type == vk_ir_node_aabb)) { + uint32_t ir_offset = ir_id_to_offset(child); + uint32_t leaf_id = ir_offset / intermediate_leaf_node_size; + /* Block offset 0 is assigned to root, so avoid accidental + * assignment. + */ + DEREF(INDEX(uint32_t, args.leaf_block_offset_map, leaf_id)) = + (child_block != 0) ? child_block : VK_NULL_BVH_OFFSET; + } + + /* Track each children's parent in the map. */ + if (child != VK_BVH_INVALID_NODE && type != vk_ir_node_instance) { + uint32_t pcm = 0; + pcm = internal_node_block | (cluster.idx << 26); + DEREF(INDEX(uint32_t, args.parent_child_map, child_block)) = pcm; + } + } + + if (child != VK_BVH_INVALID_NODE && type != vk_ir_node_internal) { + encode_leaf_node(type, child, + BLOCK_OFFSET(child_block), header); + } } BLOCK child_block_offset = diff --git a/src/intel/vulkan/bvh/encode.h b/src/intel/vulkan/bvh/encode.h new file mode 100644 index 00000000000..cdf2c7ac10d --- /dev/null +++ b/src/intel/vulkan/bvh/encode.h @@ -0,0 +1,148 @@ +/* Copyright © 2026 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#ifndef ANV_BVH_ENCODE_H +#define ANV_BVH_ENCODE_H + +#include "anv_build_helpers.h" +#include "anv_build_interface.h" + +void +anv_encode_triangle(VOID_REF dst_addr, vk_ir_triangle_node src) +{ + REF(anv_quad_leaf_node) dst = REF(anv_quad_leaf_node)(dst_addr); + + uint32_t geometry_id_and_flags = 0; + geometry_id_and_flags |= (src.geometry_id_and_flags & 0xffffff); + /* Geometry opqaue (1-bit) is encoded on 30-bit index */ + geometry_id_and_flags |= (uint32_t(bool(src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE)) << 30); + + /* Disable the second triangle */ + uint32_t prim_index1_delta = 0; + /* For now, blockIncr are all 1, so every quad leaf has its "last" bit set. */ + prim_index1_delta |= (1 << 22); + + anv_prim_leaf_desc desc; + desc.geometry_id_and_flags = geometry_id_and_flags; + /* shaderIndex is typically set to match geomIndex Geom mask is default to + * 0xFF + */ + desc.shader_index_and_geom_mask = (0xFF000000 | (geometry_id_and_flags & 0xffffff)); + + DEREF(dst).prim_index1_delta = prim_index1_delta; + DEREF(dst).prim_index0 = src.triangle_id; + DEREF(dst).leaf_desc = desc; + /* Setup single triangle */ + DEREF(dst).v = src.coords; +} + +void +anv_encode_aabb(VOID_REF dst_addr, vk_ir_aabb_node src) +{ + REF(anv_procedural_leaf_node) dst = REF(anv_procedural_leaf_node)(dst_addr); + + uint32_t geometry_id_and_flags = 0; + geometry_id_and_flags |= src.geometry_id_and_flags & 0xffffff; + /* Geometry opqaue (1-bit) is encoded on 30-bit index */ + geometry_id_and_flags |= (uint32_t(bool(src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE)) << 30); + + anv_prim_leaf_desc desc; + desc.geometry_id_and_flags = geometry_id_and_flags; + /* shaderIndex is typically set to match geomIndex Geom mask is default to + * 0xFF + */ + desc.shader_index_and_geom_mask = (0xFF000000 | (geometry_id_and_flags & 0xffffff)); + + /* num primitives = 1 */ + uint32_t dw1 = 1; + /* "last" has only 1 bit, and it is set. */ + dw1 |= (1 << 31); + + DEREF(dst).leaf_desc = desc; + DEREF(dst).DW1 = dw1; + DEREF(dst).primIndex[0] = src.primitive_id; +} + +void +anv_encode_instance(VOID_REF dst_addr, vk_ir_instance_node src) +{ + REF(anv_instance_leaf) dst = REF(anv_instance_leaf)(dst_addr); + REF(anv_accel_struct_header) blas_header = REF(anv_accel_struct_header)(src.base_ptr); + uint64_t start_node_ptr = uint64_t(src.base_ptr) + DEREF(blas_header).rootNodeOffset; + +#if GFX_VERx10 >= 300 + DEREF(dst).part0.QW_startNodePtr = start_node_ptr; + uint32_t instance_contribution_and_geom_mask = 0; + instance_contribution_and_geom_mask |= src.sbt_offset_and_flags & 0xffffff; + instance_contribution_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000); + DEREF(dst).part0.DW0 = instance_contribution_and_geom_mask; + + uint32_t inst_flags_and_the_rest = 0; + inst_flags_and_the_rest |= get_instance_flag(src.sbt_offset_and_flags); + inst_flags_and_the_rest |= + ((get_instance_flag(src.sbt_offset_and_flags) & ANV_INSTANCE_FLAG_FORCE_OPAQUE) != 0 ? + ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30; + + DEREF(dst).part0.DW1 = inst_flags_and_the_rest; +#else + uint32_t shader_index_and_geom_mask = 0; + shader_index_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000); + DEREF(dst).part0.DW0 = shader_index_and_geom_mask; + + uint32_t instance_contribution_and_geom_flags = 0; + instance_contribution_and_geom_flags |= src.sbt_offset_and_flags & 0xffffff; + instance_contribution_and_geom_flags |= + ((get_instance_flag(src.sbt_offset_and_flags) & ANV_INSTANCE_FLAG_FORCE_OPAQUE) != 0 ? + ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30; + DEREF(dst).part0.DW1 = instance_contribution_and_geom_flags; + + DEREF(dst).part0.QW_startNodePtr = + (start_node_ptr & ((1ul << 48) - 1)) | + (uint64_t(get_instance_flag(src.sbt_offset_and_flags)) << 48); +#endif + + mat4 transform = mat4(src.otw_matrix); + + mat4 inv_transform = transpose(inverse(transpose(transform))); + mat3x4 wto_matrix = mat3x4(inv_transform); + mat3x4 otw_matrix = mat3x4(transform); + + /* Arrange WTO transformation matrix in column-major order */ + DEREF(dst).part0.world2obj_vx_x = wto_matrix[0][0]; + DEREF(dst).part0.world2obj_vx_y = wto_matrix[1][0]; + DEREF(dst).part0.world2obj_vx_z = wto_matrix[2][0]; + DEREF(dst).part0.obj2world_p_x = otw_matrix[0][3]; + + DEREF(dst).part0.world2obj_vy_x = wto_matrix[0][1]; + DEREF(dst).part0.world2obj_vy_y = wto_matrix[1][1]; + DEREF(dst).part0.world2obj_vy_z = wto_matrix[2][1]; + DEREF(dst).part0.obj2world_p_y = otw_matrix[1][3]; + + DEREF(dst).part0.world2obj_vz_x = wto_matrix[0][2]; + DEREF(dst).part0.world2obj_vz_y = wto_matrix[1][2]; + DEREF(dst).part0.world2obj_vz_z = wto_matrix[2][2]; + DEREF(dst).part0.obj2world_p_z = otw_matrix[2][3]; + + /* Arrange OTW transformation matrix in column-major order */ + DEREF(dst).part1.obj2world_vx_x = otw_matrix[0][0]; + DEREF(dst).part1.obj2world_vx_y = otw_matrix[1][0]; + DEREF(dst).part1.obj2world_vx_z = otw_matrix[2][0]; + DEREF(dst).part1.world2obj_p_x = wto_matrix[0][3]; + + DEREF(dst).part1.obj2world_vy_x = otw_matrix[0][1]; + DEREF(dst).part1.obj2world_vy_y = otw_matrix[1][1]; + DEREF(dst).part1.obj2world_vy_z = otw_matrix[2][1]; + DEREF(dst).part1.world2obj_p_y = wto_matrix[1][3]; + + DEREF(dst).part1.obj2world_vz_x = otw_matrix[0][2]; + DEREF(dst).part1.obj2world_vz_y = otw_matrix[1][2]; + DEREF(dst).part1.obj2world_vz_z = otw_matrix[2][2]; + DEREF(dst).part1.world2obj_p_z = wto_matrix[2][3]; + + DEREF(dst).part1.bvh_ptr = src.base_ptr; + DEREF(dst).part1.instance_index = src.instance_id; + DEREF(dst).part1.instance_id = src.custom_instance_and_mask & 0xffffff; +} + +#endif diff --git a/src/intel/vulkan/bvh/header.comp b/src/intel/vulkan/bvh/header.comp index 4235a1025c0..0069c0d6641 100644 --- a/src/intel/vulkan/bvh/header.comp +++ b/src/intel/vulkan/bvh/header.comp @@ -34,6 +34,11 @@ main(void) DEREF(args.dst).instance_count = args.instance_count; DEREF(args.dst).instance_leaves_offset = args.instance_leaves_offset; + DEREF(args.dst).parent_child_map_offset = args.parent_child_map_offset, + DEREF(args.dst).leaf_block_map_offset = args.leaf_block_map_offset, + DEREF(args.dst).leaf_count = args.leaf_count, + DEREF(args.dst).total_nodes = args.total_nodes, + /* 128 is local_size_x in copy.comp shader, 16bytes is the amount of data * copied by each iteration of that shader's loop */ diff --git a/src/intel/vulkan/bvh/meson.build b/src/intel/vulkan/bvh/meson.build index fcfbfc908be..0ec67a5d328 100644 --- a/src/intel/vulkan/bvh/meson.build +++ b/src/intel/vulkan/bvh/meson.build @@ -7,6 +7,7 @@ bvh_shaders = [ 'encode.comp', 'header.comp', 'copy.comp', + 'update.comp', ] # A mapping: [filename version, GFX_VERx10 define version] @@ -23,6 +24,7 @@ anv_bvh_includes = files( 'anv_build_helpers.h', 'anv_build_interface.h', 'anv_bvh.h', + 'encode.h', ) foreach shader : bvh_shaders diff --git a/src/intel/vulkan/bvh/update.comp b/src/intel/vulkan/bvh/update.comp new file mode 100644 index 00000000000..311ece6fe51 --- /dev/null +++ b/src/intel/vulkan/bvh/update.comp @@ -0,0 +1,207 @@ +/* + * Copyright © 2026 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#version 460 +#include "anv_build_helpers.h" +#include "anv_build_interface.h" +#include "update.h" + +layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; + +layout(push_constant) uniform CONSTS { + update_args args; +}; + +vk_aabb +build_and_encode_leaf(uint32_t leaf_local_idx, BLOCK leaf_block) +{ + VOID_REF dst = BLOCK_OFFSET(leaf_block); + vk_aabb bounds = vk_aabb(vec3(0.0f), vec3(0.0f)); + + switch (args.geom_data.geometry_type) { + case VK_GEOMETRY_TYPE_TRIANGLES_KHR: + anv_build_triangle(bounds, dst, args.geom_data, leaf_local_idx); + break; + case VK_GEOMETRY_TYPE_AABBS_KHR: { + VOID_REF src_aabb_ptr = OFFSET(args.geom_data.data, + leaf_local_idx * args.geom_data.stride); + anv_build_aabb(bounds, src_aabb_ptr, dst, args.geom_data.geometry_id, + leaf_local_idx); + break; + } + default: + /* instances typically not updated */ + break; + } + + return bounds; +} + +vk_aabb +recompute_parent(BLOCK parent_block, uint32_t updated_idx, + uint32_t child_count, vk_aabb updated_child_bounds) +{ + REF(anv_internal_node) parent = REF(anv_internal_node)(BLOCK_OFFSET(parent_block)); + + vk_aabb box; + box.min = vec3(INFINITY); + box.max = vec3(-INFINITY); + + BLOCK current_child_block = parent_block + DEREF(parent).child_block_offset; + + vk_aabb cache_aabb[6]; + for (uint32_t i = 0; i < child_count; ++i) { + cache_aabb[i] = (i == updated_idx) ? updated_child_bounds : + DEREF(INDEX(vk_aabb, args.aabb_scratch, current_child_block + i)); + aabb_extend(box, cache_aabb[i]); + } + + vk_aabb conservative_child_aabb = conservative_aabb(box); + + float up = 1.0 + ULP; + ivec3 exp; + + vec3 len = aabb_size(conservative_child_aabb) * up; + vec3 mant = frexp(len, exp); + + exp.x += int((mant.x > (255.0f / 256.0f))); + exp.y += int((mant.y > (255.0f / 256.0f))); + exp.z += int((mant.z > (255.0f / 256.0f))); + + i8vec3 exponent_i8 = i8vec3(exp); + i8vec3 exp_i8 = {max(int8_t(-128), exponent_i8.x), + max(int8_t(-128), exponent_i8.y), + max(int8_t(-128), exponent_i8.z)}; + + DEREF(parent).lower[0] = conservative_child_aabb.min.x; + DEREF(parent).lower[1] = conservative_child_aabb.min.y; + DEREF(parent).lower[2] = conservative_child_aabb.min.z; + DEREF(parent).exp_x = exp_i8[0]; + DEREF(parent).exp_y = exp_i8[1]; + DEREF(parent).exp_z = exp_i8[2]; + + vec3 base = conservative_child_aabb.min; + vec3 scale = ldexp(vec3(1.0), exp_i8 - 8); + + for (uint32_t i = 0; i < child_count; ++i) { + vk_aabb child_bounds = cache_aabb[i]; + + vec3 lower = (child_bounds.min - base) / scale; + vec3 upper = (child_bounds.max - base) / scale; + lower = clamp(floor(lower), vec3(0.0), vec3(255.0)); + upper = clamp(ceil(upper), vec3(0.0), vec3(255.0)); + + DEREF(parent).lower_x[i] = uint8_t(lower.x); + DEREF(parent).lower_y[i] = uint8_t(lower.y); + DEREF(parent).lower_z[i] = uint8_t(lower.z); + DEREF(parent).upper_x[i] = uint8_t(upper.x); + DEREF(parent).upper_y[i] = uint8_t(upper.y); + DEREF(parent).upper_z[i] = uint8_t(upper.z); + } + + return box; +} + +void main() +{ + uint32_t leaf_local = gl_GlobalInvocationID.x; + if (leaf_local >= args.leaf_node_count || leaf_local >= args.primitive_count) + return; + + uint32_t leaf_id = args.geom_data.first_id + leaf_local; + BLOCK leaf_block = DEREF(INDEX(uint32_t, args.leaf_block_offset_map, leaf_id)); + + if (leaf_block == VK_NULL_BVH_OFFSET) + return; + + vk_aabb leaf_bounds = build_and_encode_leaf(leaf_local, leaf_block); + DEREF(INDEX(vk_aabb, args.aabb_scratch, leaf_block)) = leaf_bounds; + + /* Ensure scratch update and leaf encoding is visible before atomic in + * following loop. + */ + memoryBarrierBuffer(); + + BLOCK current_block = leaf_block; + vk_aabb current_bounds = leaf_bounds; + + while (true) { + BLOCK parent_raw = DEREF(INDEX(uint32_t, args.parent_child_map, current_block)); + + /* No parent at all */ + if (parent_raw == VK_NULL_BVH_OFFSET && current_block == leaf_block) { + REF(anv_accel_struct_header) hdr = + REF(anv_accel_struct_header)(args.output_bvh - args.output_bvh_offset); + DEREF(hdr).aabb = current_bounds; + break; + } + + BLOCK parent = parent_raw & 0x03FFFFFF; + REF(anv_internal_node) internal_node = REF(anv_internal_node)(BLOCK_OFFSET(parent)); + /* Internal node's reserved field is tracking number of children count.*/ + uint32_t valid_child_count = uint32_t(DEREF(internal_node).reserved); + if (valid_child_count == 0) + break; + + /* Last-child-wins atomic */ + uint32_t ready = atomicAdd( + DEREF(INDEX(uint32_t, args.internal_ready_count, parent)), 1, + gl_ScopeDevice, gl_StorageSemanticsBuffer, + gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); + + /* Not the last child */ + if (ready != valid_child_count - 1) + break; + + uint32_t child_idx = parent_raw >> 26; + /* Encode and quantize parent bounds */ + vk_aabb parent_bounds = recompute_parent(parent, child_idx, valid_child_count, current_bounds); + + /* Store parent bounds for next level */ + DEREF(INDEX(vk_aabb, args.aabb_scratch, parent)) = parent_bounds; + + /* Ensure scratch write and parent encoding is visible in the next + * iteration of this loop. + */ + memoryBarrierBuffer(); + + /* Check whether this parent is the root internal node */ + uint32_t grandparent_raw = DEREF(INDEX(uint32_t, args.parent_child_map, parent)); + + if (grandparent_raw == VK_NULL_BVH_OFFSET) { + REF(anv_internal_node) root = REF(anv_internal_node)(BLOCK_OFFSET(parent)); + + vec3 base = vec3(DEREF(root).lower[0], DEREF(root).lower[1], DEREF(root).lower[2]); + ivec3 exp = ivec3( int(DEREF(root).exp_x), int(DEREF(root).exp_y), int(DEREF(root).exp_z)); + vec3 scale = ldexp(vec3(1.0), exp - 8); + + vk_aabb root_bounds = vk_aabb(vec3(INFINITY), vec3(-INFINITY)); + + for (uint32_t i = 0; i < valid_child_count; ++i) { + vec3 lower = vec3(DEREF(root).lower_x[i], + DEREF(root).lower_y[i], + DEREF(root).lower_z[i]); + + vec3 upper = vec3(DEREF(root).upper_x[i], + DEREF(root).upper_y[i], + DEREF(root).upper_z[i]); + + vk_aabb child; + child.min = base + lower * scale; + child.max = base + upper * scale; + + aabb_extend(root_bounds, child); + } + + REF(anv_accel_struct_header) hdr = + REF(anv_accel_struct_header)(args.output_bvh - args.output_bvh_offset); + DEREF(hdr).aabb = root_bounds; + break; + } + + current_block = parent; + current_bounds = parent_bounds; + } +} diff --git a/src/intel/vulkan/bvh/update.h b/src/intel/vulkan/bvh/update.h new file mode 100644 index 00000000000..16c46bed77c --- /dev/null +++ b/src/intel/vulkan/bvh/update.h @@ -0,0 +1,73 @@ +/* + * Copyright © 2026 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#ifndef ANV_BVH_UPDATE_H +#define ANV_BVH_UPDATE_H + +#include "encode.h" + +void +anv_build_triangle(inout vk_aabb bounds, VOID_REF dst_ptr, vk_bvh_geometry_data geom_data, uint32_t global_id) +{ + triangle_indices indices = load_indices(geom_data.indices, geom_data.index_format, global_id); + + triangle_vertices vertices = load_vertices(geom_data.data, indices, geom_data.vertex_format, geom_data.stride); + + if (geom_data.transform != NULL) { + mat4 transform = mat4(1.0); + + for (uint32_t col = 0; col < 4; col++) { + for (uint32_t row = 0; row < 3; row++) { + transform[col][row] = DEREF(INDEX(float, geom_data.transform, col + row * 4)); + } + } + + for (uint32_t i = 0; i < 3; i++) { + vertices.vertex[i] = transform * vertices.vertex[i]; + } + } + + vk_ir_triangle_node node; + + bounds.min = vec3(INFINITY); + bounds.max = vec3(-INFINITY); + + for (uint32_t coord = 0; coord < 3; coord++) { + for (uint32_t comp = 0; comp < 3; comp++) { + node.coords[coord][comp] = vertices.vertex[coord][comp]; + bounds.min[comp] = min(bounds.min[comp], vertices.vertex[coord][comp]); + bounds.max[comp] = max(bounds.max[comp], vertices.vertex[coord][comp]); + } + } + + node.triangle_id = global_id; + node.geometry_id_and_flags = geom_data.geometry_id; + + anv_encode_triangle(dst_ptr, node); +} + +void +anv_build_aabb(inout vk_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t geometry_id, uint32_t global_id) +{ + for (uint32_t vec = 0; vec < 2; vec++) { + for (uint32_t comp = 0; comp < 3; comp++) { + float coord = DEREF(INDEX(float, src_ptr, comp + vec * 3)); + + if (vec == 0) + bounds.min[comp] = coord; + else + bounds.max[comp] = coord; + } + } + + vk_ir_aabb_node node; + node.base.aabb = bounds; + node.primitive_id = global_id; + node.geometry_id_and_flags = geometry_id; + + anv_encode_aabb(dst_ptr, node); +} + +#endif diff --git a/src/intel/vulkan/genX_acceleration_structure.c b/src/intel/vulkan/genX_acceleration_structure.c index a0d3dee49a6..2fa36c4c773 100644 --- a/src/intel/vulkan/genX_acceleration_structure.c +++ b/src/intel/vulkan/genX_acceleration_structure.c @@ -30,6 +30,16 @@ static uint32_t blas_id = 0; static uint32_t tlas_id = 0; +struct update_scratch_layout { + uint32_t internal_ready_count_offset; + uint32_t aabb_offset; + uint32_t size; +}; + +enum anv_encode_key { + ANV_ENCODE_KEY_ALLOW_UPDATE_BVH = (1 << 0), +}; + static void begin_debug_marker(VkCommandBuffer commandBuffer, struct vk_acceleration_structure_build_marker *marker) @@ -58,6 +68,9 @@ begin_debug_marker(VkCommandBuffer commandBuffer, case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE: trace_intel_begin_as_encode(&cmd_buffer->trace); break; + case VK_ACCELERATION_STRUCTURE_BUILD_STEP_UPDATE: + trace_intel_begin_as_update(&cmd_buffer->trace); + break; default: UNREACHABLE("Invalid build step"); } @@ -91,6 +104,7 @@ end_debug_marker(VkCommandBuffer commandBuffer, trace_intel_end_as_ploc_build_internal(&cmd_buffer->trace); break; case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE: + case VK_ACCELERATION_STRUCTURE_BUILD_STEP_UPDATE: trace_intel_end_as_encode(&cmd_buffer->trace, marker->encode.pass, marker->encode.key, @@ -150,21 +164,36 @@ add_bvh_dump(struct anv_cmd_buffer *cmd_buffer, static void debug_record_as_to_bvh_dump(struct anv_cmd_buffer *cmd_buffer, VkDeviceAddress header_addr, - uint64_t bvh_anv_size, + struct bvh_layout bvh_layout, VkDeviceAddress intermediate_header_addr, VkDeviceAddress intermediate_as_addr, uint32_t leaf_count, - VkGeometryTypeKHR geometry_type) + VkGeometryTypeKHR geometry_type, + bool after_update) { + if (INTEL_DEBUG(DEBUG_BVH_UPDATE_AS) && after_update && + geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) { + add_bvh_dump(cmd_buffer, header_addr, bvh_layout.size, geometry_type, + BVH_ANV_UPDATE); + } + + if (INTEL_DEBUG(DEBUG_BVH_PCREL_MAP) && + geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) { + add_bvh_dump(cmd_buffer, header_addr + bvh_layout.parent_child_map_offset, + bvh_layout.leaf_block_map_offset - bvh_layout.parent_child_map_offset, + geometry_type, + BVH_ANV_PCREL); + } + if (INTEL_DEBUG(DEBUG_BVH_BLAS) && geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) { - add_bvh_dump(cmd_buffer, header_addr, bvh_anv_size, geometry_type, + add_bvh_dump(cmd_buffer, header_addr, bvh_layout.size, geometry_type, BVH_ANV); } if (INTEL_DEBUG(DEBUG_BVH_TLAS) && geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) { - add_bvh_dump(cmd_buffer, header_addr, bvh_anv_size, geometry_type, + add_bvh_dump(cmd_buffer, header_addr, bvh_layout.size, geometry_type, BVH_ANV); } @@ -225,6 +254,7 @@ debug_record_as_to_bvh_dump(struct anv_cmd_buffer *cmd_buffer, #define ENCODE_SPV_PATH STRINGIFY(bvh/genX(encode).spv.h) #define HEADER_SPV_PATH STRINGIFY(bvh/genX(header).spv.h) #define COPY_SPV_PATH STRINGIFY(bvh/genX(copy).spv.h) +#define UPDATE_SPV_PATH STRINGIFY(bvh/genX(update).spv.h) static const uint32_t encode_spv[] = { #include ENCODE_SPV_PATH @@ -238,10 +268,17 @@ static const uint32_t copy_spv[] = { #include COPY_SPV_PATH }; +static const uint32_t update_spv[] = { +#include UPDATE_SPV_PATH +}; + static void -get_bvh_layout(VkGeometryTypeKHR geometry_type, uint32_t leaf_count, +get_bvh_layout(const struct vk_acceleration_structure_build_state *state, struct bvh_layout *layout) { + VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info); + uint32_t leaf_count = state->leaf_node_count; + uint32_t internal_count = MAX2(leaf_count, 2) - 1; uint64_t offset = ANV_RT_BVH_HEADER_SIZE; @@ -270,7 +307,6 @@ get_bvh_layout(VkGeometryTypeKHR geometry_type, uint32_t leaf_count, UNREACHABLE("Unknown VkGeometryTypeKHR"); } - offset = align64(offset, 64); layout->instance_leaves_offset = offset; /* For a TLAS, we store the address of anv_instance_leaf after header @@ -280,6 +316,16 @@ get_bvh_layout(VkGeometryTypeKHR geometry_type, uint32_t leaf_count, offset += leaf_count * sizeof(uint64_t); } + if (state->config.encode_key[1] & ANV_ENCODE_KEY_ALLOW_UPDATE_BVH) { + uint64_t parent_child_map_size = (internal_count + leaf_count) * sizeof(uint32_t); + layout->parent_child_map_offset = offset; + offset += parent_child_map_size; + + uint64_t leaf_block_offset_size = leaf_count * sizeof(uint32_t); + layout->leaf_block_map_offset = offset; + offset += leaf_block_offset_size; + } + layout->size = align64(offset, 64); } @@ -287,7 +333,7 @@ static VkDeviceSize anv_get_as_size(VkDevice device, const struct vk_acceleration_structure_build_state *state) { struct bvh_layout layout; - get_bvh_layout(vk_get_as_geometry_type(state->build_info), state->leaf_node_count, &layout); + get_bvh_layout(state, &layout); return layout.size; } @@ -305,9 +351,13 @@ anv_get_build_config(VkDevice device, struct vk_acceleration_structure_build_sta * the compacted size of an updatable AS as the maximum possible size for * any AS that could also be built from the same number of leaf nodes. */ - state->config.encode_key[1] = + state->config.encode_key[0] = ((flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR) && !(flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR)) ? 1 : 0; + + if ((state->build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR) && + state->build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR) + state->config.encode_key[1] = ANV_ENCODE_KEY_ALLOW_UPDATE_BVH; } static void @@ -357,13 +407,32 @@ anv_bvh_build_set_args(VkCommandBuffer commandBuffer, const void *args, anv_CmdPushConstants2(commandBuffer, &push_info); } +static uint32_t +anv_build_flags(VkCommandBuffer commandBuffer, uint32_t key) +{ + uint32_t flags = 0; + + /* This will write following required maps for update BVH pass: + * 1) Parent-Child offset map + * 2) Leaf block offset map + * 3) Parent slot offset map + * 4) Parent child count map + */ + if (key & ANV_ENCODE_KEY_ALLOW_UPDATE_BVH) { + flags |= ANV_BUILD_FLAG_WRITE_LOOKUP_MAPS_FOR_UPDATE; + } + + return flags; +} + static VkResult anv_encode_prepare(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state) { anv_bvh_build_bind_pipeline(commandBuffer, ANV_OBJECT_KEY_BVH_ENCODE, encode_spv, sizeof(encode_spv), - sizeof(struct encode_args), 0); + sizeof(struct encode_args), + anv_build_flags(commandBuffer, state->config.encode_key[1])); return VK_SUCCESS; } @@ -390,7 +459,7 @@ anv_encode_as(VkCommandBuffer commandBuffer, const struct vk_acceleration_struct struct bvh_layout bvh_layout; VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info); - get_bvh_layout(geometry_type, state->leaf_node_count, &bvh_layout); + get_bvh_layout(state, &bvh_layout); if (INTEL_DEBUG(DEBUG_BVH_NO_BUILD)) { /* Zero out the whole BVH when we run with BVH_NO_BUILD debug option. */ @@ -419,6 +488,10 @@ anv_encode_as(VkCommandBuffer commandBuffer, const struct vk_acceleration_struct .geometry_type = geometry_type, .instance_leaves_addr = vk_acceleration_structure_get_va(dst) + bvh_layout.instance_leaves_offset, + .parent_child_map = vk_acceleration_structure_get_va(dst) + + bvh_layout.parent_child_map_offset, + .leaf_block_offset_map = vk_acceleration_structure_get_va(dst) + + bvh_layout.leaf_block_map_offset, }; anv_bvh_build_set_args(commandBuffer, &args, sizeof(args)); @@ -454,7 +527,7 @@ anv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_stru VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info); struct bvh_layout bvh_layout; - get_bvh_layout(geometry_type, state->leaf_node_count, &bvh_layout); + get_bvh_layout(state, &bvh_layout); VkDeviceAddress header_addr = vk_acceleration_structure_get_va(dst); @@ -467,17 +540,158 @@ anv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_stru .bvh_offset = bvh_layout.bvh_offset, .instance_count = instance_count, .instance_leaves_offset = bvh_layout.instance_leaves_offset, - .is_compacted = (state->config.encode_key[1] == 1), + .is_compacted = (state->config.encode_key[0] == 1), .bvh_size = bvh_layout.size, + + .parent_child_map_offset = bvh_layout.parent_child_map_offset, + .leaf_block_map_offset = bvh_layout.leaf_block_map_offset, + .leaf_count = state->leaf_node_count, + .total_nodes = state->leaf_node_count + (MAX2(state->leaf_node_count, 2) - 1), }; anv_bvh_build_set_args(commandBuffer, &args, sizeof(args)); vk_common_CmdDispatch(commandBuffer, 1, 1, 1); if (INTEL_DEBUG_BVH_ANY) { - debug_record_as_to_bvh_dump(cmd_buffer, header_addr, bvh_layout.size, + debug_record_as_to_bvh_dump(cmd_buffer, header_addr, bvh_layout, intermediate_header_addr, intermediate_bvh_addr, - state->leaf_node_count, geometry_type); + state->leaf_node_count, geometry_type, + false /* after update */); + } +} + +static void +anv_get_update_scratch_layout(struct anv_device *device, + const struct vk_acceleration_structure_build_state *state, + struct update_scratch_layout *scratch) +{ + uint32_t internal_count = MAX2(state->leaf_node_count, 2) - 1; + uint32_t offset = 0; + + scratch->internal_ready_count_offset = offset; + offset += sizeof(uint32_t) * (internal_count + state->leaf_node_count); + + scratch->aabb_offset = offset; + offset += sizeof(vk_aabb) * (internal_count + state->leaf_node_count); + + scratch->size = offset; +} + +static VkDeviceSize +anv_get_update_scratch_size(VkDevice _device, + const struct vk_acceleration_structure_build_state *state) +{ + VK_FROM_HANDLE(anv_device, device, _device); + + struct update_scratch_layout scratch; + anv_get_update_scratch_layout(device, state, &scratch); + + return scratch.size; +} + +static void +anv_init_update_scratch(VkCommandBuffer commandBuffer, + const struct vk_acceleration_structure_build_state *states, + uint32_t build_count) +{ + VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_device *device = cmd_buffer->device; + + for (uint32_t i = 0; i < build_count; i++) { + const struct vk_acceleration_structure_build_state *state = &states[i]; + if (state->config.internal_type != VK_INTERNAL_BUILD_TYPE_UPDATE) + continue; + + uint64_t scratch = state->build_info->scratchData.deviceAddress; + + struct update_scratch_layout layout; + anv_get_update_scratch_layout(device, state, &layout); + + anv_cmd_fill_buffer_addr(commandBuffer, scratch, layout.size, 0x0); + } +} + +static void +anv_update_prepare(VkCommandBuffer commandBuffer, + const struct vk_acceleration_structure_build_state *state, + bool flushed_cp_after_init_update_scratch, + bool flushed_compute_after_init_update_scratch) +{ + if (!flushed_compute_after_init_update_scratch || + !flushed_cp_after_init_update_scratch) + vk_barrier_compute_w_to_compute_r(commandBuffer); + + anv_bvh_build_bind_pipeline(commandBuffer, ANV_OBJECT_KEY_BVH_UPDATE, + update_spv, sizeof(update_spv), + sizeof(struct update_args), 0); +} + +static void +anv_update_as(VkCommandBuffer commandBuffer, + const struct vk_acceleration_structure_build_state *state) +{ + VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + VK_FROM_HANDLE(vk_acceleration_structure, src, state->build_info->srcAccelerationStructure); + VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure); + + struct anv_device *device = cmd_buffer->device; + + struct bvh_layout bvh_layout; + get_bvh_layout(state, &bvh_layout); + + /* Just copy over data from src to dst if mismatch. */ + if (src != dst) { + assert(src->offset == 0 && dst->offset == 0); + struct anv_address src_addr = + anv_address_from_u64(vk_acceleration_structure_get_va(src)); + struct anv_address dst_addr = + anv_address_from_u64(vk_acceleration_structure_get_va(dst)); + + assert(src->size == dst->size); + anv_cmd_copy_addr(cmd_buffer, src_addr, dst_addr, src->size); + vk_barrier_compute_w_to_compute_r(commandBuffer); + } + + struct update_scratch_layout update_layout; + anv_get_update_scratch_layout(device, state, &update_layout); + + struct update_args update_consts = { + .internal_ready_count = state->build_info->scratchData.deviceAddress + + update_layout.internal_ready_count_offset, + .aabb_scratch = state->build_info->scratchData.deviceAddress + + update_layout.aabb_offset, + .leaf_node_count = state->leaf_node_count, + .parent_child_map = vk_acceleration_structure_get_va(dst) + + bvh_layout.parent_child_map_offset, + .leaf_block_offset_map = vk_acceleration_structure_get_va(dst) + + bvh_layout.leaf_block_map_offset, + .output_bvh = vk_acceleration_structure_get_va(dst) + bvh_layout.bvh_offset, + .output_bvh_offset = bvh_layout.bvh_offset, + }; + + uint32_t first_id = 0; + for (uint32_t i = 0; i < state->build_info->geometryCount; i++) { + const VkAccelerationStructureGeometryKHR *geom = + state->build_info->pGeometries ? &state->build_info->pGeometries[i] :state->build_info->ppGeometries[i]; + const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = + &state->build_range_infos[i]; + + update_consts.geom_data = vk_fill_geometry_data(state->build_info->type, first_id, i, geom, build_range_info); + update_consts.primitive_count = build_range_info->primitiveCount; + + anv_bvh_build_set_args(commandBuffer, &update_consts, sizeof(update_consts)); + anv_genX(cmd_buffer->device->info, cmd_dispatch_unaligned) + (commandBuffer, build_range_info->primitiveCount, 1, 1); + + first_id += build_range_info->primitiveCount; + } + + if (INTEL_DEBUG_BVH_ANY) { + debug_record_as_to_bvh_dump(cmd_buffer, vk_acceleration_structure_get_va(dst), + bvh_layout, 0, 0, + state->leaf_node_count, + vk_get_as_geometry_type(state->build_info), + true /* after update */); } } @@ -488,6 +702,10 @@ static const struct vk_acceleration_structure_build_ops anv_build_ops = { .get_build_config = anv_get_build_config, .encode_prepare = { anv_encode_prepare, anv_init_header_bind_pipeline }, .encode_as = { anv_encode_as, anv_init_header }, + .get_update_scratch_size = anv_get_update_scratch_size, + .init_update_scratch = anv_init_update_scratch, + .update_prepare[0] = anv_update_prepare, + .update_as[0] = anv_update_as, }; static VkResult