From ab992f89e71839e43d9e5ce16655074437e51c77 Mon Sep 17 00:00:00 2001 From: Sagar Ghuge Date: Thu, 29 Jan 2026 13:42:58 -0800 Subject: [PATCH 1/8] anv: Pass vk_acceleration_structure_build_state as param Pass vk_acceleration_structure_build_state as parameter to get_bvh_layout. Signed-off-by: Sagar Ghuge --- src/intel/vulkan/genX_acceleration_structure.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/intel/vulkan/genX_acceleration_structure.c b/src/intel/vulkan/genX_acceleration_structure.c index 5b2dccb7063..7adbf9f835c 100644 --- a/src/intel/vulkan/genX_acceleration_structure.c +++ b/src/intel/vulkan/genX_acceleration_structure.c @@ -239,9 +239,12 @@ static const uint32_t copy_spv[] = { }; static void -get_bvh_layout(VkGeometryTypeKHR geometry_type, uint32_t leaf_count, +get_bvh_layout(const struct vk_acceleration_structure_build_state *state, struct bvh_layout *layout) { + VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info); + uint32_t leaf_count = state->leaf_node_count; + uint32_t internal_count = MAX2(leaf_count, 2) - 1; uint64_t offset = ANV_RT_BVH_HEADER_SIZE; @@ -287,7 +290,7 @@ static VkDeviceSize anv_get_as_size(VkDevice device, const struct vk_acceleration_structure_build_state *state) { struct bvh_layout layout; - get_bvh_layout(vk_get_as_geometry_type(state->build_info), state->leaf_node_count, &layout); + get_bvh_layout(state, &layout); return layout.size; } @@ -390,7 +393,7 @@ anv_encode_as(VkCommandBuffer commandBuffer, const struct vk_acceleration_struct struct bvh_layout bvh_layout; VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info); - get_bvh_layout(geometry_type, state->leaf_node_count, &bvh_layout); + get_bvh_layout(state, &bvh_layout); if (INTEL_DEBUG(DEBUG_BVH_NO_BUILD)) { /* Zero out the whole BVH when we run with BVH_NO_BUILD debug option. */ @@ -454,7 +457,7 @@ anv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_stru VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info); struct bvh_layout bvh_layout; - get_bvh_layout(geometry_type, state->leaf_node_count, &bvh_layout); + get_bvh_layout(state, &bvh_layout); VkDeviceAddress header_addr = vk_acceleration_structure_get_va(dst); From c7bcadc0df401f6ac5b89ddd5ed2ac579b86c218 Mon Sep 17 00:00:00 2001 From: Sagar Ghuge Date: Sun, 21 Dec 2025 13:08:17 -0800 Subject: [PATCH 2/8] anv/rt: Extract common code in separate header Extract leaf encoding in encode.h and move some of the helper in anv_build_helper.h Signed-off-by: Sagar Ghuge --- src/intel/vulkan/bvh/anv_build_helpers.h | 41 +++++ src/intel/vulkan/bvh/anv_bvh.h | 4 +- src/intel/vulkan/bvh/encode.comp | 181 +---------------------- src/intel/vulkan/bvh/encode.h | 148 ++++++++++++++++++ src/intel/vulkan/bvh/meson.build | 1 + 5 files changed, 198 insertions(+), 177 deletions(-) create mode 100644 src/intel/vulkan/bvh/encode.h diff --git a/src/intel/vulkan/bvh/anv_build_helpers.h b/src/intel/vulkan/bvh/anv_build_helpers.h index f7793179900..dd047f3e0c3 100644 --- a/src/intel/vulkan/bvh/anv_build_helpers.h +++ b/src/intel/vulkan/bvh/anv_build_helpers.h @@ -18,4 +18,45 @@ TYPE(child_data, 1); TYPE(instance_leaf_part0, 8); TYPE(instance_leaf_part1, 8); +#define ULP 1.1920928955078125e-7f + +/* An offset in 64B blocks from args.output_bvh that points to output of + * encoded nodes. Can be a leaf or internal node. + */ +#define BLOCK uint32_t +#define BLOCK_OFFSET(block) (OFFSET(args.output_bvh, ANV_RT_BLOCK_SIZE * block)) + +uint32_t +get_instance_flag(uint32_t src) +{ + return ((src >> 24) & 0xff); +} + +vk_aabb +conservative_aabb(vk_aabb input_aabb) +{ + vk_aabb out_aabb; + + vec3 reduce_value = max(abs(input_aabb.min), abs(input_aabb.max)); + float err = ULP * max(reduce_value.x, max(reduce_value.y, reduce_value.z)); + + out_aabb.min = input_aabb.min - vec3(err); + out_aabb.max = input_aabb.max + vec3(err); + + return out_aabb; +} + +void +aabb_extend(inout vk_aabb v1, vk_aabb v2) +{ + v1.min = min(v1.min, v2.min); + v1.max = max(v1.max, v2.max); +} + +vec3 +aabb_size(vk_aabb input_aabb) +{ + return input_aabb.max - input_aabb.min; +} + #endif diff --git a/src/intel/vulkan/bvh/anv_bvh.h b/src/intel/vulkan/bvh/anv_bvh.h index 011b449dfd9..b8f67635d99 100644 --- a/src/intel/vulkan/bvh/anv_bvh.h +++ b/src/intel/vulkan/bvh/anv_bvh.h @@ -132,7 +132,9 @@ struct anv_quad_leaf_node { * Reserved (9-bits) */ uint32_t prim_index1_delta; - float v[4][3]; + float v[3][3]; + /* Second triangle coords */ + float v1[3]; }; struct anv_procedural_leaf_node { diff --git a/src/intel/vulkan/bvh/encode.comp b/src/intel/vulkan/bvh/encode.comp index a13b97e44f0..8e41099d646 100644 --- a/src/intel/vulkan/bvh/encode.comp +++ b/src/intel/vulkan/bvh/encode.comp @@ -9,8 +9,8 @@ layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; #include "anv_build_helpers.h" #include "anv_build_interface.h" +#include "encode.h" -#define ULP 1.1920928955078125e-7f #define READY_TO_WRITE(offset) ((offset) < VK_NULL_BVH_OFFSET) #define ASSIGNED_NODE_TO_ENCODE (gl_GlobalInvocationID.x < DEREF(args.header).ir_internal_node_count) @@ -24,12 +24,6 @@ layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; #define IR_NODE uint32_t #define NODE_OFFSET(node) (OFFSET(args.intermediate_bvh, ir_id_to_offset(node))) -/* An offset in 64B blocks from args.output_bvh that points to output of - * encoded nodes. Can be a leaf or internal node. - */ -#define BLOCK uint32_t -#define BLOCK_OFFSET(block) (OFFSET(args.output_bvh, ANV_RT_BLOCK_SIZE * block)) - layout(push_constant) uniform CONSTS { encode_args args; }; @@ -41,12 +35,6 @@ debug_dump(uint32_t offset, uint32_t value) DEREF(msg) = value; } -uint32_t -get_instance_flag(uint32_t src) -{ - return ((src >> 24) & 0xff); -} - struct anv_cluster { /* simd lane inside cluster: 0 .. 7 */ uint32_t idx; @@ -77,153 +65,21 @@ encode_leaf_node(uint32_t type, IR_NODE child, uint64_t dst_node, REF(anv_accel_ if (DEBUG_EXIT_EARLY(type)) return; + switch (type) { case vk_ir_node_triangle: { - REF(anv_quad_leaf_node) quad_leaf = REF(anv_quad_leaf_node)(dst_node); - vk_ir_triangle_node src = DEREF(REF(vk_ir_triangle_node)(src_node)); - uint32_t geometry_id_and_flags = src.geometry_id_and_flags & 0xffffff; - - /* sub-type (4-bit) encoded on 24-bit index */ - geometry_id_and_flags |= (ANV_SUB_TYPE_QUAD & 0xF) << 24; - - if ((src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE) != 0) { - /* Geometry opqaue (1-bit) is encoded on 30-bit index */ - geometry_id_and_flags |= (ANV_GEOMETRY_FLAG_OPAQUE << 30); - } - - /* Disable the second triangle */ - uint32_t prim_index1_delta = 0; - /* For now, blockIncr are all 1, so every quad leaf has its "last" bit set. */ - prim_index1_delta |= (1 << 22); - - DEREF(quad_leaf).prim_index1_delta = prim_index1_delta; - DEREF(quad_leaf).prim_index0 = src.triangle_id; - DEREF(quad_leaf).leaf_desc.geometry_id_and_flags = geometry_id_and_flags; - - /* shaderIndex is typically set to match geomIndex - * Geom mask is default to 0xFF - */ - DEREF(quad_leaf).leaf_desc.shader_index_and_geom_mask = 0xFF000000 | (geometry_id_and_flags & 0xffffff); - - /* Setup single triangle */ - for (uint32_t i = 0; i < 3; i++) { - for (uint32_t j = 0; j < 3; j++) { - DEREF(quad_leaf).v[i][j] = src.coords[i][j]; - } - } + anv_encode_triangle(dst_node, src); break; } case vk_ir_node_aabb: { - REF(anv_procedural_leaf_node) aabb_leaf = REF(anv_procedural_leaf_node)(dst_node); - vk_ir_aabb_node src = DEREF(REF(vk_ir_aabb_node)(src_node)); - uint32_t geometry_id_and_flags = src.geometry_id_and_flags & 0xffffff; - - /* sub-type (4-bit) encoded on 24-bit index */ - geometry_id_and_flags |= (ANV_SUB_TYPE_PROCEDURAL & 0xF) << 24; - - if ((src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE) != 0) { - geometry_id_and_flags |= (ANV_GEOMETRY_FLAG_OPAQUE << 30); - } - - DEREF(aabb_leaf).leaf_desc.geometry_id_and_flags = geometry_id_and_flags; - - /* shaderIndex is typically set to match geomIndex - * Geom mask is default to 0xFF - */ - DEREF(aabb_leaf).leaf_desc.shader_index_and_geom_mask = 0xFF000000 | (geometry_id_and_flags & 0xffffff); - - /* num primitives = 1 */ - uint32_t dw1 = 1; - /* "last" has only 1 bit, and it is set. */ - dw1 |= (1 << 31); - - DEREF(aabb_leaf).DW1 = dw1; - DEREF(aabb_leaf).primIndex[0] = src.primitive_id; + anv_encode_aabb(dst_node, src); break; } case vk_ir_node_instance: { vk_ir_instance_node src = DEREF(REF(vk_ir_instance_node)(src_node)); - - REF(anv_instance_leaf) dst_instance = REF(anv_instance_leaf)(dst_node); - REF(anv_accel_struct_header) blas_header = REF(anv_accel_struct_header)(src.base_ptr); - uint64_t start_node_ptr = uint64_t(src.base_ptr) + args.output_bvh_offset; - -#if GFX_VERx10 >= 300 - DEREF(dst_instance).part0.QW_startNodePtr = start_node_ptr; - uint32_t instance_contribution_and_geom_mask = 0; - instance_contribution_and_geom_mask |= src.sbt_offset_and_flags & 0xffffff; - instance_contribution_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000); - DEREF(dst_instance).part0.DW0 = instance_contribution_and_geom_mask; - - uint32_t inst_flags_and_the_rest = 0; - inst_flags_and_the_rest |= get_instance_flag(src.sbt_offset_and_flags); - inst_flags_and_the_rest |= - ((get_instance_flag(src.sbt_offset_and_flags) & ANV_INSTANCE_FLAG_FORCE_OPAQUE) != 0 ? - ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30; - - DEREF(dst_instance).part0.DW1 = inst_flags_and_the_rest; - -#else - uint32_t shader_index_and_geom_mask = 0; - shader_index_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000); - DEREF(dst_instance).part0.DW0 = shader_index_and_geom_mask; - - uint32_t instance_contribution_and_geom_flags = 0; - instance_contribution_and_geom_flags |= src.sbt_offset_and_flags & 0xffffff; - instance_contribution_and_geom_flags |= - ((get_instance_flag(src.sbt_offset_and_flags) & ANV_INSTANCE_FLAG_FORCE_OPAQUE) != 0 ? - ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30; - DEREF(dst_instance).part0.DW1 = instance_contribution_and_geom_flags; - - DEREF(dst_instance).part0.QW_startNodePtr = - (start_node_ptr & ((1ul << 48) - 1)) | - (uint64_t(get_instance_flag(src.sbt_offset_and_flags)) << 48); -#endif - - mat4 transform = mat4(src.otw_matrix); - - mat4 inv_transform = transpose(inverse(transpose(transform))); - mat3x4 wto_matrix = mat3x4(inv_transform); - mat3x4 otw_matrix = mat3x4(transform); - - /* Arrange WTO transformation matrix in column-major order */ - DEREF(dst_instance).part0.world2obj_vx_x = wto_matrix[0][0]; - DEREF(dst_instance).part0.world2obj_vx_y = wto_matrix[1][0]; - DEREF(dst_instance).part0.world2obj_vx_z = wto_matrix[2][0]; - DEREF(dst_instance).part0.obj2world_p_x = otw_matrix[0][3]; - - DEREF(dst_instance).part0.world2obj_vy_x = wto_matrix[0][1]; - DEREF(dst_instance).part0.world2obj_vy_y = wto_matrix[1][1]; - DEREF(dst_instance).part0.world2obj_vy_z = wto_matrix[2][1]; - DEREF(dst_instance).part0.obj2world_p_y = otw_matrix[1][3]; - - DEREF(dst_instance).part0.world2obj_vz_x = wto_matrix[0][2]; - DEREF(dst_instance).part0.world2obj_vz_y = wto_matrix[1][2]; - DEREF(dst_instance).part0.world2obj_vz_z = wto_matrix[2][2]; - DEREF(dst_instance).part0.obj2world_p_z = otw_matrix[2][3]; - - /* Arrange OTW transformation matrix in column-major order */ - DEREF(dst_instance).part1.obj2world_vx_x = otw_matrix[0][0]; - DEREF(dst_instance).part1.obj2world_vx_y = otw_matrix[1][0]; - DEREF(dst_instance).part1.obj2world_vx_z = otw_matrix[2][0]; - DEREF(dst_instance).part1.world2obj_p_x = wto_matrix[0][3]; - - DEREF(dst_instance).part1.obj2world_vy_x = otw_matrix[0][1]; - DEREF(dst_instance).part1.obj2world_vy_y = otw_matrix[1][1]; - DEREF(dst_instance).part1.obj2world_vy_z = otw_matrix[2][1]; - DEREF(dst_instance).part1.world2obj_p_y = wto_matrix[1][3]; - - DEREF(dst_instance).part1.obj2world_vz_x = otw_matrix[0][2]; - DEREF(dst_instance).part1.obj2world_vz_y = otw_matrix[1][2]; - DEREF(dst_instance).part1.obj2world_vz_z = otw_matrix[2][2]; - DEREF(dst_instance).part1.world2obj_p_z = wto_matrix[2][3]; - - DEREF(dst_instance).part1.bvh_ptr = src.base_ptr; - DEREF(dst_instance).part1.instance_index = src.instance_id; - DEREF(dst_instance).part1.instance_id = src.custom_instance_and_mask & 0xffffff; - + anv_encode_instance(dst_node, src); uint64_t instance_leaves_addr_base = args.instance_leaves_addr; uint64_t slot = ir_id_to_offset(child) / SIZEOF(vk_ir_instance_node); DEREF(INDEX(uint64_t, instance_leaves_addr_base, slot)) = dst_node; @@ -232,33 +88,6 @@ encode_leaf_node(uint32_t type, IR_NODE child, uint64_t dst_node, REF(anv_accel_ } } -vk_aabb -conservative_aabb(vk_aabb input_aabb) -{ - vk_aabb out_aabb; - - vec3 reduce_value = max(abs(input_aabb.min), abs(input_aabb.max)); - float err = ULP * max(reduce_value.x, max(reduce_value.y, reduce_value.z)); - - out_aabb.min = input_aabb.min - vec3(err); - out_aabb.max = input_aabb.max + vec3(err); - - return out_aabb; -} - -void -aabb_extend(inout vk_aabb v1, vk_aabb v2) -{ - v1.min = min(v1.min, v2.min); - v1.max = max(v1.max, v2.max); -} - -vec3 -aabb_size(vk_aabb input_aabb) -{ - return input_aabb.max - input_aabb.min; -} - /* Determine the node_type based on type of its children. * If children are all the same leaves, this internal node is a fat leaf; * Otherwise, it's a mixed node. diff --git a/src/intel/vulkan/bvh/encode.h b/src/intel/vulkan/bvh/encode.h new file mode 100644 index 00000000000..cdf2c7ac10d --- /dev/null +++ b/src/intel/vulkan/bvh/encode.h @@ -0,0 +1,148 @@ +/* Copyright © 2026 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#ifndef ANV_BVH_ENCODE_H +#define ANV_BVH_ENCODE_H + +#include "anv_build_helpers.h" +#include "anv_build_interface.h" + +void +anv_encode_triangle(VOID_REF dst_addr, vk_ir_triangle_node src) +{ + REF(anv_quad_leaf_node) dst = REF(anv_quad_leaf_node)(dst_addr); + + uint32_t geometry_id_and_flags = 0; + geometry_id_and_flags |= (src.geometry_id_and_flags & 0xffffff); + /* Geometry opqaue (1-bit) is encoded on 30-bit index */ + geometry_id_and_flags |= (uint32_t(bool(src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE)) << 30); + + /* Disable the second triangle */ + uint32_t prim_index1_delta = 0; + /* For now, blockIncr are all 1, so every quad leaf has its "last" bit set. */ + prim_index1_delta |= (1 << 22); + + anv_prim_leaf_desc desc; + desc.geometry_id_and_flags = geometry_id_and_flags; + /* shaderIndex is typically set to match geomIndex Geom mask is default to + * 0xFF + */ + desc.shader_index_and_geom_mask = (0xFF000000 | (geometry_id_and_flags & 0xffffff)); + + DEREF(dst).prim_index1_delta = prim_index1_delta; + DEREF(dst).prim_index0 = src.triangle_id; + DEREF(dst).leaf_desc = desc; + /* Setup single triangle */ + DEREF(dst).v = src.coords; +} + +void +anv_encode_aabb(VOID_REF dst_addr, vk_ir_aabb_node src) +{ + REF(anv_procedural_leaf_node) dst = REF(anv_procedural_leaf_node)(dst_addr); + + uint32_t geometry_id_and_flags = 0; + geometry_id_and_flags |= src.geometry_id_and_flags & 0xffffff; + /* Geometry opqaue (1-bit) is encoded on 30-bit index */ + geometry_id_and_flags |= (uint32_t(bool(src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE)) << 30); + + anv_prim_leaf_desc desc; + desc.geometry_id_and_flags = geometry_id_and_flags; + /* shaderIndex is typically set to match geomIndex Geom mask is default to + * 0xFF + */ + desc.shader_index_and_geom_mask = (0xFF000000 | (geometry_id_and_flags & 0xffffff)); + + /* num primitives = 1 */ + uint32_t dw1 = 1; + /* "last" has only 1 bit, and it is set. */ + dw1 |= (1 << 31); + + DEREF(dst).leaf_desc = desc; + DEREF(dst).DW1 = dw1; + DEREF(dst).primIndex[0] = src.primitive_id; +} + +void +anv_encode_instance(VOID_REF dst_addr, vk_ir_instance_node src) +{ + REF(anv_instance_leaf) dst = REF(anv_instance_leaf)(dst_addr); + REF(anv_accel_struct_header) blas_header = REF(anv_accel_struct_header)(src.base_ptr); + uint64_t start_node_ptr = uint64_t(src.base_ptr) + DEREF(blas_header).rootNodeOffset; + +#if GFX_VERx10 >= 300 + DEREF(dst).part0.QW_startNodePtr = start_node_ptr; + uint32_t instance_contribution_and_geom_mask = 0; + instance_contribution_and_geom_mask |= src.sbt_offset_and_flags & 0xffffff; + instance_contribution_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000); + DEREF(dst).part0.DW0 = instance_contribution_and_geom_mask; + + uint32_t inst_flags_and_the_rest = 0; + inst_flags_and_the_rest |= get_instance_flag(src.sbt_offset_and_flags); + inst_flags_and_the_rest |= + ((get_instance_flag(src.sbt_offset_and_flags) & ANV_INSTANCE_FLAG_FORCE_OPAQUE) != 0 ? + ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30; + + DEREF(dst).part0.DW1 = inst_flags_and_the_rest; +#else + uint32_t shader_index_and_geom_mask = 0; + shader_index_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000); + DEREF(dst).part0.DW0 = shader_index_and_geom_mask; + + uint32_t instance_contribution_and_geom_flags = 0; + instance_contribution_and_geom_flags |= src.sbt_offset_and_flags & 0xffffff; + instance_contribution_and_geom_flags |= + ((get_instance_flag(src.sbt_offset_and_flags) & ANV_INSTANCE_FLAG_FORCE_OPAQUE) != 0 ? + ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30; + DEREF(dst).part0.DW1 = instance_contribution_and_geom_flags; + + DEREF(dst).part0.QW_startNodePtr = + (start_node_ptr & ((1ul << 48) - 1)) | + (uint64_t(get_instance_flag(src.sbt_offset_and_flags)) << 48); +#endif + + mat4 transform = mat4(src.otw_matrix); + + mat4 inv_transform = transpose(inverse(transpose(transform))); + mat3x4 wto_matrix = mat3x4(inv_transform); + mat3x4 otw_matrix = mat3x4(transform); + + /* Arrange WTO transformation matrix in column-major order */ + DEREF(dst).part0.world2obj_vx_x = wto_matrix[0][0]; + DEREF(dst).part0.world2obj_vx_y = wto_matrix[1][0]; + DEREF(dst).part0.world2obj_vx_z = wto_matrix[2][0]; + DEREF(dst).part0.obj2world_p_x = otw_matrix[0][3]; + + DEREF(dst).part0.world2obj_vy_x = wto_matrix[0][1]; + DEREF(dst).part0.world2obj_vy_y = wto_matrix[1][1]; + DEREF(dst).part0.world2obj_vy_z = wto_matrix[2][1]; + DEREF(dst).part0.obj2world_p_y = otw_matrix[1][3]; + + DEREF(dst).part0.world2obj_vz_x = wto_matrix[0][2]; + DEREF(dst).part0.world2obj_vz_y = wto_matrix[1][2]; + DEREF(dst).part0.world2obj_vz_z = wto_matrix[2][2]; + DEREF(dst).part0.obj2world_p_z = otw_matrix[2][3]; + + /* Arrange OTW transformation matrix in column-major order */ + DEREF(dst).part1.obj2world_vx_x = otw_matrix[0][0]; + DEREF(dst).part1.obj2world_vx_y = otw_matrix[1][0]; + DEREF(dst).part1.obj2world_vx_z = otw_matrix[2][0]; + DEREF(dst).part1.world2obj_p_x = wto_matrix[0][3]; + + DEREF(dst).part1.obj2world_vy_x = otw_matrix[0][1]; + DEREF(dst).part1.obj2world_vy_y = otw_matrix[1][1]; + DEREF(dst).part1.obj2world_vy_z = otw_matrix[2][1]; + DEREF(dst).part1.world2obj_p_y = wto_matrix[1][3]; + + DEREF(dst).part1.obj2world_vz_x = otw_matrix[0][2]; + DEREF(dst).part1.obj2world_vz_y = otw_matrix[1][2]; + DEREF(dst).part1.obj2world_vz_z = otw_matrix[2][2]; + DEREF(dst).part1.world2obj_p_z = wto_matrix[2][3]; + + DEREF(dst).part1.bvh_ptr = src.base_ptr; + DEREF(dst).part1.instance_index = src.instance_id; + DEREF(dst).part1.instance_id = src.custom_instance_and_mask & 0xffffff; +} + +#endif diff --git a/src/intel/vulkan/bvh/meson.build b/src/intel/vulkan/bvh/meson.build index fcfbfc908be..81637140d3e 100644 --- a/src/intel/vulkan/bvh/meson.build +++ b/src/intel/vulkan/bvh/meson.build @@ -23,6 +23,7 @@ anv_bvh_includes = files( 'anv_build_helpers.h', 'anv_build_interface.h', 'anv_bvh.h', + 'encode.h', ) foreach shader : bvh_shaders From eefb5aa8223d86d69ea0680e169c43240d85dbbe Mon Sep 17 00:00:00 2001 From: Sagar Ghuge Date: Sat, 27 Dec 2025 11:37:00 -0800 Subject: [PATCH 3/8] anv: Track parent-child map for BVH update This map stores parent BVH offset for each of their children. This will help us to walk the BVH layout later in the update pass. Since we are tracking block indexes, even with 2^32 large BVH size, we can have 2^26 max indices (each block 64B wide) that leaves us 6 bits in which we can track child slot index occupancies in parent. Signed-off-by: Sagar Ghuge --- src/intel/vulkan/bvh/anv_build_interface.h | 2 ++ src/intel/vulkan/bvh/anv_bvh.h | 18 ++++++++++++------ src/intel/vulkan/bvh/encode.comp | 15 ++++++++++++--- src/intel/vulkan/genX_acceleration_structure.c | 7 ++++++- 4 files changed, 32 insertions(+), 10 deletions(-) diff --git a/src/intel/vulkan/bvh/anv_build_interface.h b/src/intel/vulkan/bvh/anv_build_interface.h index ebca43959f1..8c2a9b45c5d 100644 --- a/src/intel/vulkan/bvh/anv_build_interface.h +++ b/src/intel/vulkan/bvh/anv_build_interface.h @@ -32,6 +32,8 @@ struct encode_args { uint32_t leaf_node_count; uint32_t geometry_type; + + VOID_REF parent_child_map; }; struct header_args { diff --git a/src/intel/vulkan/bvh/anv_bvh.h b/src/intel/vulkan/bvh/anv_bvh.h index b8f67635d99..ac5155c8451 100644 --- a/src/intel/vulkan/bvh/anv_bvh.h +++ b/src/intel/vulkan/bvh/anv_bvh.h @@ -327,13 +327,12 @@ struct anv_instance_leaf { | start with root node, | | followed by interleaving | | internal nodes and leaves | -|-------------------------------| -| padding to align to | -| 64 bytes boundary | |-------------------------------| bvh_layout.instance_leaves_offset | For a TLAS, the pointers | | to all anv_instance_leaves | | For a BLAS, nothing here | +|-------------------------------| bvh_layout.parent_child_map_offset +| Parent - child map | |-------------------------------| | padding to align to | | 64 bytes boundary | bvh_layout.size @@ -346,13 +345,20 @@ struct bvh_layout { */ uint64_t bvh_offset; + /* This tracks pointers to all anv_instance_leaves for BLAS. */ + uint64_t instance_leaves_offset; + + /* This map stores parent BVH offset for each child + * + * Lower 26bits - parent block index + * upper 6bits - parent child slot index + * */ + uint64_t parent_child_map_offset; + /* Total size = bvh_offset + leaves + internal_nodes (assuming there's no * internal node collpased) */ uint64_t size; - - /* This tracks pointers to all anv_instance_leaves for BLAS. */ - uint64_t instance_leaves_offset; }; #endif diff --git a/src/intel/vulkan/bvh/encode.comp b/src/intel/vulkan/bvh/encode.comp index 8e41099d646..f7b45368723 100644 --- a/src/intel/vulkan/bvh/encode.comp +++ b/src/intel/vulkan/bvh/encode.comp @@ -359,6 +359,7 @@ main() /* Tracks BLOCK where the next children should be encoded. */ DEREF(args.header).dst_node_offset = 1; DEREF(header).instance_count = 0; + DEREF(INDEX(uint32_t, args.parent_child_map, 0)) = VK_NULL_BVH_OFFSET; } IR_NODE children[6] = {VK_BVH_INVALID_NODE, VK_BVH_INVALID_NODE, @@ -490,9 +491,17 @@ main() child_aabb = DEREF(REF(vk_ir_node)NODE_OFFSET(child)).aabb; uint32_t type = ir_id_to_type(child); - if (child != VK_BVH_INVALID_NODE && type != vk_ir_node_internal) - encode_leaf_node(type, child, BLOCK_OFFSET(child_block), - header); + /* Track each children's parent in the map. */ + if (child != VK_BVH_INVALID_NODE && type != vk_ir_node_instance) { + uint32_t pcm_val = 0; + pcm_val = (cluster.idx << 26) | internal_node_block; + DEREF(INDEX(uint32_t, args.parent_child_map, child_block)) = pcm_val; + } + + if (child != VK_BVH_INVALID_NODE && type != vk_ir_node_internal) { + encode_leaf_node(type, child, + BLOCK_OFFSET(child_block), header); + } } BLOCK child_block_offset = diff --git a/src/intel/vulkan/genX_acceleration_structure.c b/src/intel/vulkan/genX_acceleration_structure.c index 7adbf9f835c..e82e1afaf87 100644 --- a/src/intel/vulkan/genX_acceleration_structure.c +++ b/src/intel/vulkan/genX_acceleration_structure.c @@ -273,7 +273,6 @@ get_bvh_layout(const struct vk_acceleration_structure_build_state *state, UNREACHABLE("Unknown VkGeometryTypeKHR"); } - offset = align64(offset, 64); layout->instance_leaves_offset = offset; /* For a TLAS, we store the address of anv_instance_leaf after header @@ -283,6 +282,10 @@ get_bvh_layout(const struct vk_acceleration_structure_build_state *state, offset += leaf_count * sizeof(uint64_t); } + uint64_t parent_child_map_size = (internal_count + leaf_count) * sizeof(uint32_t); + layout->parent_child_map_offset = offset; + offset += parent_child_map_size; + layout->size = align64(offset, 64); } @@ -422,6 +425,8 @@ anv_encode_as(VkCommandBuffer commandBuffer, const struct vk_acceleration_struct .geometry_type = geometry_type, .instance_leaves_addr = vk_acceleration_structure_get_va(dst) + bvh_layout.instance_leaves_offset, + .parent_child_map = vk_acceleration_structure_get_va(dst) + + bvh_layout.parent_child_map_offset, }; anv_bvh_build_set_args(commandBuffer, &args, sizeof(args)); From 6a65616aaae553f219777c7827fc74080f7747d0 Mon Sep 17 00:00:00 2001 From: Sagar Ghuge Date: Wed, 14 Jan 2026 14:10:27 -0800 Subject: [PATCH 4/8] anv: Track leaf block offset map Track where is each leaf_id encoded in final BVH. It's a map of leaf_id == final_bvh_offset. This will help us to navigate the BVH layout in update pass. Leaf block offset will give us : Leaf id -> bvh block and parent-child map can be used for: bvh_block -> parent offset. Signed-off-by: Sagar Ghuge --- src/intel/vulkan/bvh/anv_build_interface.h | 1 + src/intel/vulkan/bvh/anv_bvh.h | 5 +++++ src/intel/vulkan/bvh/encode.comp | 7 +++++++ src/intel/vulkan/genX_acceleration_structure.c | 6 ++++++ 4 files changed, 19 insertions(+) diff --git a/src/intel/vulkan/bvh/anv_build_interface.h b/src/intel/vulkan/bvh/anv_build_interface.h index 8c2a9b45c5d..d9fd9620ef1 100644 --- a/src/intel/vulkan/bvh/anv_build_interface.h +++ b/src/intel/vulkan/bvh/anv_build_interface.h @@ -34,6 +34,7 @@ struct encode_args { uint32_t geometry_type; VOID_REF parent_child_map; + VOID_REF leaf_block_offset_map; }; struct header_args { diff --git a/src/intel/vulkan/bvh/anv_bvh.h b/src/intel/vulkan/bvh/anv_bvh.h index ac5155c8451..fc6c0cb1beb 100644 --- a/src/intel/vulkan/bvh/anv_bvh.h +++ b/src/intel/vulkan/bvh/anv_bvh.h @@ -333,6 +333,8 @@ struct anv_instance_leaf { | For a BLAS, nothing here | |-------------------------------| bvh_layout.parent_child_map_offset | Parent - child map | +|-------------------------------| bvh_layout.leaf_block_map_offset +| Leaf block offset map | |-------------------------------| | padding to align to | | 64 bytes boundary | bvh_layout.size @@ -355,6 +357,9 @@ struct bvh_layout { * */ uint64_t parent_child_map_offset; + /* This map stores BVH block index for each leaf id (IR ID) */ + uint64_t leaf_block_map_offset; + /* Total size = bvh_offset + leaves + internal_nodes (assuming there's no * internal node collpased) */ diff --git a/src/intel/vulkan/bvh/encode.comp b/src/intel/vulkan/bvh/encode.comp index f7b45368723..62348a218e2 100644 --- a/src/intel/vulkan/bvh/encode.comp +++ b/src/intel/vulkan/bvh/encode.comp @@ -491,6 +491,13 @@ main() child_aabb = DEREF(REF(vk_ir_node)NODE_OFFSET(child)).aabb; uint32_t type = ir_id_to_type(child); + if (child != VK_BVH_INVALID_NODE && + (type == vk_ir_node_triangle || type == vk_ir_node_aabb)) { + uint32_t ir_offset = ir_id_to_offset(child); + uint32_t leaf_id = ir_offset / intermediate_leaf_node_size; + DEREF(INDEX(uint32_t, args.leaf_block_offset_map, leaf_id)) = child_block; + } + /* Track each children's parent in the map. */ if (child != VK_BVH_INVALID_NODE && type != vk_ir_node_instance) { uint32_t pcm_val = 0; diff --git a/src/intel/vulkan/genX_acceleration_structure.c b/src/intel/vulkan/genX_acceleration_structure.c index e82e1afaf87..ae1ae3c435c 100644 --- a/src/intel/vulkan/genX_acceleration_structure.c +++ b/src/intel/vulkan/genX_acceleration_structure.c @@ -286,6 +286,10 @@ get_bvh_layout(const struct vk_acceleration_structure_build_state *state, layout->parent_child_map_offset = offset; offset += parent_child_map_size; + uint64_t leaf_block_offset_size = (internal_count + leaf_count) * sizeof(uint32_t); + layout->leaf_block_map_offset = offset; + offset += leaf_block_offset_size; + layout->size = align64(offset, 64); } @@ -427,6 +431,8 @@ anv_encode_as(VkCommandBuffer commandBuffer, const struct vk_acceleration_struct bvh_layout.instance_leaves_offset, .parent_child_map = vk_acceleration_structure_get_va(dst) + bvh_layout.parent_child_map_offset, + .leaf_block_offset_map = vk_acceleration_structure_get_va(dst) + + bvh_layout.leaf_block_map_offset, }; anv_bvh_build_set_args(commandBuffer, &args, sizeof(args)); From 26af065e605ae580f1145c507a54edcf81d7f04f Mon Sep 17 00:00:00 2001 From: Sagar Ghuge Date: Thu, 15 Jan 2026 14:36:28 -0800 Subject: [PATCH 5/8] intel: Add debug option to dump out maps This commit adds new debug options to dump out parent-child relationship map using INTEL_DEBUG=bvh_pcrel_map and leaf block offset map using INTEL_DEBUG=bvh_lbo_map. Signed-off-by: Sagar Ghuge --- src/intel/dev/intel_debug.c | 1 + src/intel/dev/intel_debug.h | 4 +++- src/intel/vulkan/anv_private.h | 3 ++- src/intel/vulkan/anv_util.c | 3 +++ src/intel/vulkan/genX_acceleration_structure.c | 16 ++++++++++++---- 5 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/intel/dev/intel_debug.c b/src/intel/dev/intel_debug.c index d98ea35ddc3..d6b02960248 100644 --- a/src/intel/dev/intel_debug.c +++ b/src/intel/dev/intel_debug.c @@ -108,6 +108,7 @@ static const struct debug_control_bitset debug_control[] = { OPT1("bvh_tlas_ir_hdr", DEBUG_BVH_TLAS_IR_HDR), OPT1("bvh_blas_ir_as", DEBUG_BVH_BLAS_IR_AS), OPT1("bvh_tlas_ir_as", DEBUG_BVH_TLAS_IR_AS), + OPT1("bvh_pcrel_map", DEBUG_BVH_PCREL_MAP), OPT1("bvh_no_build", DEBUG_BVH_NO_BUILD), OPT1("task", DEBUG_TASK), OPT1("mesh", DEBUG_MESH), diff --git a/src/intel/dev/intel_debug.h b/src/intel/dev/intel_debug.h index 345749fd5f6..79d76dc6e88 100644 --- a/src/intel/dev/intel_debug.h +++ b/src/intel/dev/intel_debug.h @@ -94,6 +94,7 @@ enum intel_debug_flag { DEBUG_BVH_TLAS_IR_HDR, DEBUG_BVH_BLAS_IR_AS, DEBUG_BVH_TLAS_IR_AS, + DEBUG_BVH_PCREL_MAP, DEBUG_BVH_NO_BUILD, DEBUG_NO_SEND_GATHER, DEBUG_NO_VRT, @@ -134,7 +135,8 @@ extern BITSET_WORD intel_debug[BITSET_WORDS(INTEL_DEBUG_MAX)]; INTEL_DEBUG(DEBUG_BVH_BLAS_IR_HDR) || \ INTEL_DEBUG(DEBUG_BVH_TLAS_IR_HDR) || \ INTEL_DEBUG(DEBUG_BVH_BLAS_IR_AS) || \ - INTEL_DEBUG(DEBUG_BVH_TLAS_IR_AS))) + INTEL_DEBUG(DEBUG_BVH_TLAS_IR_AS) || \ + INTEL_DEBUG(DEBUG_BVH_PCREL_MAP))) extern uint64_t intel_simd; extern uint32_t intel_debug_bkp_before_draw_count; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 9128207936c..5f458e22340 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2564,7 +2564,8 @@ enum anv_object_key_bvh_type { enum bvh_dump_type { BVH_ANV, BVH_IR_HDR, - BVH_IR_AS + BVH_IR_AS, + BVH_ANV_PCREL, }; struct anv_bvh_dump { diff --git a/src/intel/vulkan/anv_util.c b/src/intel/vulkan/anv_util.c index 583baf4c6d3..f02a4f09875 100644 --- a/src/intel/vulkan/anv_util.c +++ b/src/intel/vulkan/anv_util.c @@ -292,6 +292,9 @@ create_bvh_dump_file(struct anv_bvh_dump *bvh) case BVH_IR_AS: dump_sub_directory = "BVH_IR_AS"; break; + case BVH_ANV_PCREL: + dump_sub_directory = "BVH_ANV_PCREL"; + break; default: UNREACHABLE("invalid dump type"); } diff --git a/src/intel/vulkan/genX_acceleration_structure.c b/src/intel/vulkan/genX_acceleration_structure.c index ae1ae3c435c..b7ab269cb79 100644 --- a/src/intel/vulkan/genX_acceleration_structure.c +++ b/src/intel/vulkan/genX_acceleration_structure.c @@ -150,21 +150,29 @@ add_bvh_dump(struct anv_cmd_buffer *cmd_buffer, static void debug_record_as_to_bvh_dump(struct anv_cmd_buffer *cmd_buffer, VkDeviceAddress header_addr, - uint64_t bvh_anv_size, + struct bvh_layout bvh_layout, VkDeviceAddress intermediate_header_addr, VkDeviceAddress intermediate_as_addr, uint32_t leaf_count, VkGeometryTypeKHR geometry_type) { + if (INTEL_DEBUG(DEBUG_BVH_PCREL_MAP) && + geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) { + add_bvh_dump(cmd_buffer, header_addr + bvh_layout.parent_child_map_offset, + bvh_layout.leaf_block_map_offset - bvh_layout.parent_child_map_offset, + geometry_type, + BVH_ANV_PCREL); + } + if (INTEL_DEBUG(DEBUG_BVH_BLAS) && geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) { - add_bvh_dump(cmd_buffer, header_addr, bvh_anv_size, geometry_type, + add_bvh_dump(cmd_buffer, header_addr, bvh_layout.size, geometry_type, BVH_ANV); } if (INTEL_DEBUG(DEBUG_BVH_TLAS) && geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) { - add_bvh_dump(cmd_buffer, header_addr, bvh_anv_size, geometry_type, + add_bvh_dump(cmd_buffer, header_addr, bvh_layout.size, geometry_type, BVH_ANV); } @@ -489,7 +497,7 @@ anv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_stru vk_common_CmdDispatch(commandBuffer, 1, 1, 1); if (INTEL_DEBUG_BVH_ANY) { - debug_record_as_to_bvh_dump(cmd_buffer, header_addr, bvh_layout.size, + debug_record_as_to_bvh_dump(cmd_buffer, header_addr, bvh_layout, intermediate_header_addr, intermediate_bvh_addr, state->leaf_node_count, geometry_type); } From ac3c99edc5f8be574ca741cdfff3ed20950973aa Mon Sep 17 00:00:00 2001 From: Sagar Ghuge Date: Fri, 19 Dec 2025 21:50:34 -0800 Subject: [PATCH 6/8] anv: Implement update BVH Signed-off-by: Sagar Ghuge --- src/intel/ds/intel_driver_ds.cc | 1 + src/intel/ds/intel_tracepoints.py | 5 + src/intel/vulkan/anv_private.h | 1 + src/intel/vulkan/bvh/anv_build_interface.h | 17 ++ src/intel/vulkan/bvh/anv_bvh.h | 6 + src/intel/vulkan/bvh/encode.comp | 36 +-- src/intel/vulkan/bvh/meson.build | 1 + src/intel/vulkan/bvh/update.comp | 207 ++++++++++++++++++ src/intel/vulkan/bvh/update.h | 73 ++++++ .../vulkan/genX_acceleration_structure.c | 193 +++++++++++++++- 10 files changed, 518 insertions(+), 22 deletions(-) create mode 100644 src/intel/vulkan/bvh/update.comp create mode 100644 src/intel/vulkan/bvh/update.h diff --git a/src/intel/ds/intel_driver_ds.cc b/src/intel/ds/intel_driver_ds.cc index dccb30c8e7e..3a2333f4f36 100644 --- a/src/intel/ds/intel_driver_ds.cc +++ b/src/intel/ds/intel_driver_ds.cc @@ -501,6 +501,7 @@ CREATE_DUAL_EVENT_CALLBACK(as_morton_sort, INTEL_DS_QUEUE_STAGE_AS) CREATE_DUAL_EVENT_CALLBACK(as_lbvh_build_internal, INTEL_DS_QUEUE_STAGE_AS) CREATE_DUAL_EVENT_CALLBACK(as_ploc_build_internal, INTEL_DS_QUEUE_STAGE_AS) CREATE_DUAL_EVENT_CALLBACK(as_encode, INTEL_DS_QUEUE_STAGE_AS) +CREATE_DUAL_EVENT_CALLBACK(as_update, INTEL_DS_QUEUE_STAGE_AS) CREATE_DUAL_EVENT_CALLBACK(as_copy, INTEL_DS_QUEUE_STAGE_AS) void diff --git a/src/intel/ds/intel_tracepoints.py b/src/intel/ds/intel_tracepoints.py index 696d666f4bb..dcd97c97071 100644 --- a/src/intel/ds/intel_tracepoints.py +++ b/src/intel/ds/intel_tracepoints.py @@ -263,6 +263,11 @@ def define_tracepoints(args): Arg(type='uint32_t', var='key', c_format='%x'), Arg(type='uint32_t', var='n_leaves', c_format='%u'), Arg(type='uint32_t', var='n_ir_leaves', c_format='%u')]) + begin_end_tp('as_update', repeat_last=True, + tp_args=[Arg(type='uint32_t', var='pass', c_format='%u'), + Arg(type='uint32_t', var='key', c_format='%x'), + Arg(type='uint32_t', var='n_leaves', c_format='%u'), + Arg(type='uint32_t', var='n_ir_leaves', c_format='%u')]) begin_end_tp('as_copy', repeat_last=True) begin_end_tp('rays', diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 5f458e22340..7b13878f8d9 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2559,6 +2559,7 @@ enum anv_object_key_bvh_type { ANV_OBJECT_KEY_BVH_ENCODE = VK_META_OBJECT_KEY_DRIVER_OFFSET, ANV_OBJECT_KEY_BVH_HEADER, ANV_OBJECT_KEY_BVH_COPY, + ANV_OBJECT_KEY_BVH_UPDATE, }; enum bvh_dump_type { diff --git a/src/intel/vulkan/bvh/anv_build_interface.h b/src/intel/vulkan/bvh/anv_build_interface.h index d9fd9620ef1..eb393553521 100644 --- a/src/intel/vulkan/bvh/anv_build_interface.h +++ b/src/intel/vulkan/bvh/anv_build_interface.h @@ -6,6 +6,8 @@ #ifndef ANV_BVH_BUILD_INTERFACE_H #define ANV_BVH_BUILD_INTERFACE_H +#include "vk_build_interface.h" + #ifdef VULKAN #include "anv_build_helpers.h" #else @@ -15,6 +17,21 @@ #define VOID_REF uint64_t #endif +#define ANV_BUILD_FLAG_WRITE_LOOKUP_MAPS_FOR_UPDATE (1u << (VK_BUILD_FLAG_COUNT + 0)) + +struct update_args { + VOID_REF output_bvh; + REF(uint32_t) internal_ready_count; + REF(vk_aabb) aabb_scratch; + uint32_t leaf_node_count; + uint32_t primitive_count; + uint32_t output_bvh_offset; + VOID_REF parent_child_map; + VOID_REF leaf_block_offset_map; + + vk_bvh_geometry_data geom_data; +}; + struct encode_args { /* Address within the IR BVH, marking the start of leaves/internal nodes. */ VOID_REF intermediate_bvh; diff --git a/src/intel/vulkan/bvh/anv_bvh.h b/src/intel/vulkan/bvh/anv_bvh.h index fc6c0cb1beb..3f24fb456a2 100644 --- a/src/intel/vulkan/bvh/anv_bvh.h +++ b/src/intel/vulkan/bvh/anv_bvh.h @@ -172,6 +172,12 @@ struct anv_internal_node { */ uint8_t node_type; + /* Note: We are going to use this field to track number of children this + * internal node has. + * + * XXX: Keep an eye out on this field for future platforms in case if anything + * changes. + */ uint8_t reserved; /* 2^exp_x is the size of the grid in x dimension */ diff --git a/src/intel/vulkan/bvh/encode.comp b/src/intel/vulkan/bvh/encode.comp index 62348a218e2..a9450468a48 100644 --- a/src/intel/vulkan/bvh/encode.comp +++ b/src/intel/vulkan/bvh/encode.comp @@ -182,6 +182,9 @@ encode_internal_node(uint32_t child, uint32_t child_block_offset_from_internal_n DEREF(dst_node).exp_z = exp_i8[2]; DEREF(dst_node).node_mask = uint8_t(0xff); DEREF(dst_node).node_type = node_type; + + /* Using reserved field to track number of children. */ + DEREF(dst_node).reserved = uint8_t(child_count); } child_aabb = conservative_aabb(child_aabb); @@ -359,7 +362,9 @@ main() /* Tracks BLOCK where the next children should be encoded. */ DEREF(args.header).dst_node_offset = 1; DEREF(header).instance_count = 0; - DEREF(INDEX(uint32_t, args.parent_child_map, 0)) = VK_NULL_BVH_OFFSET; + if (VK_BUILD_FLAG(ANV_BUILD_FLAG_WRITE_LOOKUP_MAPS_FOR_UPDATE)) { + DEREF(INDEX(uint32_t, args.parent_child_map, 0)) = VK_NULL_BVH_OFFSET; + } } IR_NODE children[6] = {VK_BVH_INVALID_NODE, VK_BVH_INVALID_NODE, @@ -442,7 +447,6 @@ main() REF(vk_ir_box_node)NODE_OFFSET(children[i]); DEREF(child_node).bvh_offset = child_offset; } - child_offset += (type == vk_ir_node_instance) ? 2 : 1; } @@ -491,18 +495,24 @@ main() child_aabb = DEREF(REF(vk_ir_node)NODE_OFFSET(child)).aabb; uint32_t type = ir_id_to_type(child); - if (child != VK_BVH_INVALID_NODE && - (type == vk_ir_node_triangle || type == vk_ir_node_aabb)) { - uint32_t ir_offset = ir_id_to_offset(child); - uint32_t leaf_id = ir_offset / intermediate_leaf_node_size; - DEREF(INDEX(uint32_t, args.leaf_block_offset_map, leaf_id)) = child_block; - } + if (VK_BUILD_FLAG(ANV_BUILD_FLAG_WRITE_LOOKUP_MAPS_FOR_UPDATE)) { + if (child != VK_BVH_INVALID_NODE && + (type == vk_ir_node_triangle || type == vk_ir_node_aabb)) { + uint32_t ir_offset = ir_id_to_offset(child); + uint32_t leaf_id = ir_offset / intermediate_leaf_node_size; + /* Block offset 0 is assigned to root, so avoid accidental + * assignment. + */ + DEREF(INDEX(uint32_t, args.leaf_block_offset_map, leaf_id)) = + (child_block != 0) ? child_block : VK_NULL_BVH_OFFSET; + } - /* Track each children's parent in the map. */ - if (child != VK_BVH_INVALID_NODE && type != vk_ir_node_instance) { - uint32_t pcm_val = 0; - pcm_val = (cluster.idx << 26) | internal_node_block; - DEREF(INDEX(uint32_t, args.parent_child_map, child_block)) = pcm_val; + /* Track each children's parent in the map. */ + if (child != VK_BVH_INVALID_NODE && type != vk_ir_node_instance) { + uint32_t pcm = 0; + pcm = internal_node_block | (cluster.idx << 26); + DEREF(INDEX(uint32_t, args.parent_child_map, child_block)) = pcm; + } } if (child != VK_BVH_INVALID_NODE && type != vk_ir_node_internal) { diff --git a/src/intel/vulkan/bvh/meson.build b/src/intel/vulkan/bvh/meson.build index 81637140d3e..0ec67a5d328 100644 --- a/src/intel/vulkan/bvh/meson.build +++ b/src/intel/vulkan/bvh/meson.build @@ -7,6 +7,7 @@ bvh_shaders = [ 'encode.comp', 'header.comp', 'copy.comp', + 'update.comp', ] # A mapping: [filename version, GFX_VERx10 define version] diff --git a/src/intel/vulkan/bvh/update.comp b/src/intel/vulkan/bvh/update.comp new file mode 100644 index 00000000000..311ece6fe51 --- /dev/null +++ b/src/intel/vulkan/bvh/update.comp @@ -0,0 +1,207 @@ +/* + * Copyright © 2026 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#version 460 +#include "anv_build_helpers.h" +#include "anv_build_interface.h" +#include "update.h" + +layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; + +layout(push_constant) uniform CONSTS { + update_args args; +}; + +vk_aabb +build_and_encode_leaf(uint32_t leaf_local_idx, BLOCK leaf_block) +{ + VOID_REF dst = BLOCK_OFFSET(leaf_block); + vk_aabb bounds = vk_aabb(vec3(0.0f), vec3(0.0f)); + + switch (args.geom_data.geometry_type) { + case VK_GEOMETRY_TYPE_TRIANGLES_KHR: + anv_build_triangle(bounds, dst, args.geom_data, leaf_local_idx); + break; + case VK_GEOMETRY_TYPE_AABBS_KHR: { + VOID_REF src_aabb_ptr = OFFSET(args.geom_data.data, + leaf_local_idx * args.geom_data.stride); + anv_build_aabb(bounds, src_aabb_ptr, dst, args.geom_data.geometry_id, + leaf_local_idx); + break; + } + default: + /* instances typically not updated */ + break; + } + + return bounds; +} + +vk_aabb +recompute_parent(BLOCK parent_block, uint32_t updated_idx, + uint32_t child_count, vk_aabb updated_child_bounds) +{ + REF(anv_internal_node) parent = REF(anv_internal_node)(BLOCK_OFFSET(parent_block)); + + vk_aabb box; + box.min = vec3(INFINITY); + box.max = vec3(-INFINITY); + + BLOCK current_child_block = parent_block + DEREF(parent).child_block_offset; + + vk_aabb cache_aabb[6]; + for (uint32_t i = 0; i < child_count; ++i) { + cache_aabb[i] = (i == updated_idx) ? updated_child_bounds : + DEREF(INDEX(vk_aabb, args.aabb_scratch, current_child_block + i)); + aabb_extend(box, cache_aabb[i]); + } + + vk_aabb conservative_child_aabb = conservative_aabb(box); + + float up = 1.0 + ULP; + ivec3 exp; + + vec3 len = aabb_size(conservative_child_aabb) * up; + vec3 mant = frexp(len, exp); + + exp.x += int((mant.x > (255.0f / 256.0f))); + exp.y += int((mant.y > (255.0f / 256.0f))); + exp.z += int((mant.z > (255.0f / 256.0f))); + + i8vec3 exponent_i8 = i8vec3(exp); + i8vec3 exp_i8 = {max(int8_t(-128), exponent_i8.x), + max(int8_t(-128), exponent_i8.y), + max(int8_t(-128), exponent_i8.z)}; + + DEREF(parent).lower[0] = conservative_child_aabb.min.x; + DEREF(parent).lower[1] = conservative_child_aabb.min.y; + DEREF(parent).lower[2] = conservative_child_aabb.min.z; + DEREF(parent).exp_x = exp_i8[0]; + DEREF(parent).exp_y = exp_i8[1]; + DEREF(parent).exp_z = exp_i8[2]; + + vec3 base = conservative_child_aabb.min; + vec3 scale = ldexp(vec3(1.0), exp_i8 - 8); + + for (uint32_t i = 0; i < child_count; ++i) { + vk_aabb child_bounds = cache_aabb[i]; + + vec3 lower = (child_bounds.min - base) / scale; + vec3 upper = (child_bounds.max - base) / scale; + lower = clamp(floor(lower), vec3(0.0), vec3(255.0)); + upper = clamp(ceil(upper), vec3(0.0), vec3(255.0)); + + DEREF(parent).lower_x[i] = uint8_t(lower.x); + DEREF(parent).lower_y[i] = uint8_t(lower.y); + DEREF(parent).lower_z[i] = uint8_t(lower.z); + DEREF(parent).upper_x[i] = uint8_t(upper.x); + DEREF(parent).upper_y[i] = uint8_t(upper.y); + DEREF(parent).upper_z[i] = uint8_t(upper.z); + } + + return box; +} + +void main() +{ + uint32_t leaf_local = gl_GlobalInvocationID.x; + if (leaf_local >= args.leaf_node_count || leaf_local >= args.primitive_count) + return; + + uint32_t leaf_id = args.geom_data.first_id + leaf_local; + BLOCK leaf_block = DEREF(INDEX(uint32_t, args.leaf_block_offset_map, leaf_id)); + + if (leaf_block == VK_NULL_BVH_OFFSET) + return; + + vk_aabb leaf_bounds = build_and_encode_leaf(leaf_local, leaf_block); + DEREF(INDEX(vk_aabb, args.aabb_scratch, leaf_block)) = leaf_bounds; + + /* Ensure scratch update and leaf encoding is visible before atomic in + * following loop. + */ + memoryBarrierBuffer(); + + BLOCK current_block = leaf_block; + vk_aabb current_bounds = leaf_bounds; + + while (true) { + BLOCK parent_raw = DEREF(INDEX(uint32_t, args.parent_child_map, current_block)); + + /* No parent at all */ + if (parent_raw == VK_NULL_BVH_OFFSET && current_block == leaf_block) { + REF(anv_accel_struct_header) hdr = + REF(anv_accel_struct_header)(args.output_bvh - args.output_bvh_offset); + DEREF(hdr).aabb = current_bounds; + break; + } + + BLOCK parent = parent_raw & 0x03FFFFFF; + REF(anv_internal_node) internal_node = REF(anv_internal_node)(BLOCK_OFFSET(parent)); + /* Internal node's reserved field is tracking number of children count.*/ + uint32_t valid_child_count = uint32_t(DEREF(internal_node).reserved); + if (valid_child_count == 0) + break; + + /* Last-child-wins atomic */ + uint32_t ready = atomicAdd( + DEREF(INDEX(uint32_t, args.internal_ready_count, parent)), 1, + gl_ScopeDevice, gl_StorageSemanticsBuffer, + gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); + + /* Not the last child */ + if (ready != valid_child_count - 1) + break; + + uint32_t child_idx = parent_raw >> 26; + /* Encode and quantize parent bounds */ + vk_aabb parent_bounds = recompute_parent(parent, child_idx, valid_child_count, current_bounds); + + /* Store parent bounds for next level */ + DEREF(INDEX(vk_aabb, args.aabb_scratch, parent)) = parent_bounds; + + /* Ensure scratch write and parent encoding is visible in the next + * iteration of this loop. + */ + memoryBarrierBuffer(); + + /* Check whether this parent is the root internal node */ + uint32_t grandparent_raw = DEREF(INDEX(uint32_t, args.parent_child_map, parent)); + + if (grandparent_raw == VK_NULL_BVH_OFFSET) { + REF(anv_internal_node) root = REF(anv_internal_node)(BLOCK_OFFSET(parent)); + + vec3 base = vec3(DEREF(root).lower[0], DEREF(root).lower[1], DEREF(root).lower[2]); + ivec3 exp = ivec3( int(DEREF(root).exp_x), int(DEREF(root).exp_y), int(DEREF(root).exp_z)); + vec3 scale = ldexp(vec3(1.0), exp - 8); + + vk_aabb root_bounds = vk_aabb(vec3(INFINITY), vec3(-INFINITY)); + + for (uint32_t i = 0; i < valid_child_count; ++i) { + vec3 lower = vec3(DEREF(root).lower_x[i], + DEREF(root).lower_y[i], + DEREF(root).lower_z[i]); + + vec3 upper = vec3(DEREF(root).upper_x[i], + DEREF(root).upper_y[i], + DEREF(root).upper_z[i]); + + vk_aabb child; + child.min = base + lower * scale; + child.max = base + upper * scale; + + aabb_extend(root_bounds, child); + } + + REF(anv_accel_struct_header) hdr = + REF(anv_accel_struct_header)(args.output_bvh - args.output_bvh_offset); + DEREF(hdr).aabb = root_bounds; + break; + } + + current_block = parent; + current_bounds = parent_bounds; + } +} diff --git a/src/intel/vulkan/bvh/update.h b/src/intel/vulkan/bvh/update.h new file mode 100644 index 00000000000..16c46bed77c --- /dev/null +++ b/src/intel/vulkan/bvh/update.h @@ -0,0 +1,73 @@ +/* + * Copyright © 2026 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#ifndef ANV_BVH_UPDATE_H +#define ANV_BVH_UPDATE_H + +#include "encode.h" + +void +anv_build_triangle(inout vk_aabb bounds, VOID_REF dst_ptr, vk_bvh_geometry_data geom_data, uint32_t global_id) +{ + triangle_indices indices = load_indices(geom_data.indices, geom_data.index_format, global_id); + + triangle_vertices vertices = load_vertices(geom_data.data, indices, geom_data.vertex_format, geom_data.stride); + + if (geom_data.transform != NULL) { + mat4 transform = mat4(1.0); + + for (uint32_t col = 0; col < 4; col++) { + for (uint32_t row = 0; row < 3; row++) { + transform[col][row] = DEREF(INDEX(float, geom_data.transform, col + row * 4)); + } + } + + for (uint32_t i = 0; i < 3; i++) { + vertices.vertex[i] = transform * vertices.vertex[i]; + } + } + + vk_ir_triangle_node node; + + bounds.min = vec3(INFINITY); + bounds.max = vec3(-INFINITY); + + for (uint32_t coord = 0; coord < 3; coord++) { + for (uint32_t comp = 0; comp < 3; comp++) { + node.coords[coord][comp] = vertices.vertex[coord][comp]; + bounds.min[comp] = min(bounds.min[comp], vertices.vertex[coord][comp]); + bounds.max[comp] = max(bounds.max[comp], vertices.vertex[coord][comp]); + } + } + + node.triangle_id = global_id; + node.geometry_id_and_flags = geom_data.geometry_id; + + anv_encode_triangle(dst_ptr, node); +} + +void +anv_build_aabb(inout vk_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t geometry_id, uint32_t global_id) +{ + for (uint32_t vec = 0; vec < 2; vec++) { + for (uint32_t comp = 0; comp < 3; comp++) { + float coord = DEREF(INDEX(float, src_ptr, comp + vec * 3)); + + if (vec == 0) + bounds.min[comp] = coord; + else + bounds.max[comp] = coord; + } + } + + vk_ir_aabb_node node; + node.base.aabb = bounds; + node.primitive_id = global_id; + node.geometry_id_and_flags = geometry_id; + + anv_encode_aabb(dst_ptr, node); +} + +#endif diff --git a/src/intel/vulkan/genX_acceleration_structure.c b/src/intel/vulkan/genX_acceleration_structure.c index b7ab269cb79..02f42c34920 100644 --- a/src/intel/vulkan/genX_acceleration_structure.c +++ b/src/intel/vulkan/genX_acceleration_structure.c @@ -30,6 +30,16 @@ static uint32_t blas_id = 0; static uint32_t tlas_id = 0; +struct update_scratch_layout { + uint32_t internal_ready_count_offset; + uint32_t aabb_offset; + uint32_t size; +}; + +enum anv_encode_key { + ANV_ENCODE_KEY_ALLOW_UPDATE_BVH = (1 << 0), +}; + static void begin_debug_marker(VkCommandBuffer commandBuffer, struct vk_acceleration_structure_build_marker *marker) @@ -58,6 +68,9 @@ begin_debug_marker(VkCommandBuffer commandBuffer, case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE: trace_intel_begin_as_encode(&cmd_buffer->trace); break; + case VK_ACCELERATION_STRUCTURE_BUILD_STEP_UPDATE: + trace_intel_begin_as_update(&cmd_buffer->trace); + break; default: UNREACHABLE("Invalid build step"); } @@ -91,6 +104,7 @@ end_debug_marker(VkCommandBuffer commandBuffer, trace_intel_end_as_ploc_build_internal(&cmd_buffer->trace); break; case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE: + case VK_ACCELERATION_STRUCTURE_BUILD_STEP_UPDATE: trace_intel_end_as_encode(&cmd_buffer->trace, marker->encode.pass, marker->encode.key, @@ -233,6 +247,7 @@ debug_record_as_to_bvh_dump(struct anv_cmd_buffer *cmd_buffer, #define ENCODE_SPV_PATH STRINGIFY(bvh/genX(encode).spv.h) #define HEADER_SPV_PATH STRINGIFY(bvh/genX(header).spv.h) #define COPY_SPV_PATH STRINGIFY(bvh/genX(copy).spv.h) +#define UPDATE_SPV_PATH STRINGIFY(bvh/genX(update).spv.h) static const uint32_t encode_spv[] = { #include ENCODE_SPV_PATH @@ -246,6 +261,10 @@ static const uint32_t copy_spv[] = { #include COPY_SPV_PATH }; +static const uint32_t update_spv[] = { +#include UPDATE_SPV_PATH +}; + static void get_bvh_layout(const struct vk_acceleration_structure_build_state *state, struct bvh_layout *layout) @@ -290,13 +309,15 @@ get_bvh_layout(const struct vk_acceleration_structure_build_state *state, offset += leaf_count * sizeof(uint64_t); } - uint64_t parent_child_map_size = (internal_count + leaf_count) * sizeof(uint32_t); - layout->parent_child_map_offset = offset; - offset += parent_child_map_size; + if (state->config.encode_key[1] & ANV_ENCODE_KEY_ALLOW_UPDATE_BVH) { + uint64_t parent_child_map_size = (internal_count + leaf_count) * sizeof(uint32_t); + layout->parent_child_map_offset = offset; + offset += parent_child_map_size; - uint64_t leaf_block_offset_size = (internal_count + leaf_count) * sizeof(uint32_t); - layout->leaf_block_map_offset = offset; - offset += leaf_block_offset_size; + uint64_t leaf_block_offset_size = leaf_count * sizeof(uint32_t); + layout->leaf_block_map_offset = offset; + offset += leaf_block_offset_size; + } layout->size = align64(offset, 64); } @@ -323,9 +344,13 @@ anv_get_build_config(VkDevice device, struct vk_acceleration_structure_build_sta * the compacted size of an updatable AS as the maximum possible size for * any AS that could also be built from the same number of leaf nodes. */ - state->config.encode_key[1] = + state->config.encode_key[0] = ((flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR) && !(flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR)) ? 1 : 0; + + if ((state->build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR) && + state->build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR) + state->config.encode_key[1] = ANV_ENCODE_KEY_ALLOW_UPDATE_BVH; } static void @@ -375,13 +400,32 @@ anv_bvh_build_set_args(VkCommandBuffer commandBuffer, const void *args, anv_CmdPushConstants2(commandBuffer, &push_info); } +static uint32_t +anv_build_flags(VkCommandBuffer commandBuffer, uint32_t key) +{ + uint32_t flags = 0; + + /* This will write following required maps for update BVH pass: + * 1) Parent-Child offset map + * 2) Leaf block offset map + * 3) Parent slot offset map + * 4) Parent child count map + */ + if (key & ANV_ENCODE_KEY_ALLOW_UPDATE_BVH) { + flags |= ANV_BUILD_FLAG_WRITE_LOOKUP_MAPS_FOR_UPDATE; + } + + return flags; +} + static VkResult anv_encode_prepare(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state) { anv_bvh_build_bind_pipeline(commandBuffer, ANV_OBJECT_KEY_BVH_ENCODE, encode_spv, sizeof(encode_spv), - sizeof(struct encode_args), 0); + sizeof(struct encode_args), + anv_build_flags(commandBuffer, state->config.encode_key[1])); return VK_SUCCESS; } @@ -489,7 +533,7 @@ anv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_stru .bvh_offset = bvh_layout.bvh_offset, .instance_count = instance_count, .instance_leaves_offset = bvh_layout.instance_leaves_offset, - .is_compacted = (state->config.encode_key[1] == 1), + .is_compacted = (state->config.encode_key[0] == 1), .bvh_size = bvh_layout.size, }; @@ -503,6 +547,133 @@ anv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_stru } } +static void +anv_get_update_scratch_layout(struct anv_device *device, + const struct vk_acceleration_structure_build_state *state, + struct update_scratch_layout *scratch) +{ + uint32_t internal_count = MAX2(state->leaf_node_count, 2) - 1; + uint32_t offset = 0; + + scratch->internal_ready_count_offset = offset; + offset += sizeof(uint32_t) * (internal_count + state->leaf_node_count); + + scratch->aabb_offset = offset; + offset += sizeof(vk_aabb) * (internal_count + state->leaf_node_count); + + scratch->size = offset; +} + +static VkDeviceSize +anv_get_update_scratch_size(VkDevice _device, + const struct vk_acceleration_structure_build_state *state) +{ + VK_FROM_HANDLE(anv_device, device, _device); + + struct update_scratch_layout scratch; + anv_get_update_scratch_layout(device, state, &scratch); + + return scratch.size; +} + +static void +anv_init_update_scratch(VkCommandBuffer commandBuffer, + const struct vk_acceleration_structure_build_state *states, + uint32_t build_count) +{ + VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_device *device = cmd_buffer->device; + + for (uint32_t i = 0; i < build_count; i++) { + const struct vk_acceleration_structure_build_state *state = &states[i]; + if (state->config.internal_type != VK_INTERNAL_BUILD_TYPE_UPDATE) + continue; + + uint64_t scratch = state->build_info->scratchData.deviceAddress; + + struct update_scratch_layout layout; + anv_get_update_scratch_layout(device, state, &layout); + + anv_cmd_fill_buffer_addr(commandBuffer, scratch, layout.size, 0x0); + } +} + +static void +anv_update_prepare(VkCommandBuffer commandBuffer, + const struct vk_acceleration_structure_build_state *state, + bool flushed_cp_after_init_update_scratch, + bool flushed_compute_after_init_update_scratch) +{ + if (!flushed_compute_after_init_update_scratch || + !flushed_cp_after_init_update_scratch) + vk_barrier_compute_w_to_compute_r(commandBuffer); + + anv_bvh_build_bind_pipeline(commandBuffer, ANV_OBJECT_KEY_BVH_UPDATE, + update_spv, sizeof(update_spv), + sizeof(struct update_args), 0); +} + +static void +anv_update_as(VkCommandBuffer commandBuffer, + const struct vk_acceleration_structure_build_state *state) +{ + VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + VK_FROM_HANDLE(vk_acceleration_structure, src, state->build_info->srcAccelerationStructure); + VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure); + + struct anv_device *device = cmd_buffer->device; + + struct bvh_layout bvh_layout; + get_bvh_layout(state, &bvh_layout); + + /* Just copy over data from src to dst if mismatch. */ + if (src != dst) { + assert(src->offset == 0 && dst->offset == 0); + struct anv_address src_addr = + anv_address_from_u64(vk_acceleration_structure_get_va(src)); + struct anv_address dst_addr = + anv_address_from_u64(vk_acceleration_structure_get_va(dst)); + + assert(src->size == dst->size); + anv_cmd_copy_addr(cmd_buffer, src_addr, dst_addr, src->size); + vk_barrier_compute_w_to_compute_r(commandBuffer); + } + + struct update_scratch_layout update_layout; + anv_get_update_scratch_layout(device, state, &update_layout); + + struct update_args update_consts = { + .internal_ready_count = state->build_info->scratchData.deviceAddress + + update_layout.internal_ready_count_offset, + .aabb_scratch = state->build_info->scratchData.deviceAddress + + update_layout.aabb_offset, + .leaf_node_count = state->leaf_node_count, + .parent_child_map = vk_acceleration_structure_get_va(dst) + + bvh_layout.parent_child_map_offset, + .leaf_block_offset_map = vk_acceleration_structure_get_va(dst) + + bvh_layout.leaf_block_map_offset, + .output_bvh = vk_acceleration_structure_get_va(dst) + bvh_layout.bvh_offset, + .output_bvh_offset = bvh_layout.bvh_offset, + }; + + uint32_t first_id = 0; + for (uint32_t i = 0; i < state->build_info->geometryCount; i++) { + const VkAccelerationStructureGeometryKHR *geom = + state->build_info->pGeometries ? &state->build_info->pGeometries[i] :state->build_info->ppGeometries[i]; + const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = + &state->build_range_infos[i]; + + update_consts.geom_data = vk_fill_geometry_data(state->build_info->type, first_id, i, geom, build_range_info); + update_consts.primitive_count = build_range_info->primitiveCount; + + anv_bvh_build_set_args(commandBuffer, &update_consts, sizeof(update_consts)); + anv_genX(cmd_buffer->device->info, cmd_dispatch_unaligned) + (commandBuffer, build_range_info->primitiveCount, 1, 1); + + first_id += build_range_info->primitiveCount; + } +} + static const struct vk_acceleration_structure_build_ops anv_build_ops = { .begin_debug_marker = begin_debug_marker, .end_debug_marker = end_debug_marker, @@ -510,6 +681,10 @@ static const struct vk_acceleration_structure_build_ops anv_build_ops = { .get_build_config = anv_get_build_config, .encode_prepare = { anv_encode_prepare, anv_init_header_bind_pipeline }, .encode_as = { anv_encode_as, anv_init_header }, + .get_update_scratch_size = anv_get_update_scratch_size, + .init_update_scratch = anv_init_update_scratch, + .update_prepare[0] = anv_update_prepare, + .update_as[0] = anv_update_as, }; static VkResult From 16831bf3ca9d93833248a8f66f1f66ef70483d7a Mon Sep 17 00:00:00 2001 From: Sagar Ghuge Date: Tue, 20 Jan 2026 11:48:25 -0800 Subject: [PATCH 7/8] intel: Add debug hook to dump out BVH after update INTEL_DEBUG=bvh_update_as will dump out AS after update pass. Signed-off-by: Sagar Ghuge --- src/intel/dev/intel_debug.c | 1 + src/intel/dev/intel_debug.h | 2 ++ src/intel/vulkan/anv_private.h | 1 + src/intel/vulkan/anv_util.c | 3 +++ .../vulkan/genX_acceleration_structure.c | 20 +++++++++++++++++-- 5 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/intel/dev/intel_debug.c b/src/intel/dev/intel_debug.c index d6b02960248..f9d2770f4c6 100644 --- a/src/intel/dev/intel_debug.c +++ b/src/intel/dev/intel_debug.c @@ -109,6 +109,7 @@ static const struct debug_control_bitset debug_control[] = { OPT1("bvh_blas_ir_as", DEBUG_BVH_BLAS_IR_AS), OPT1("bvh_tlas_ir_as", DEBUG_BVH_TLAS_IR_AS), OPT1("bvh_pcrel_map", DEBUG_BVH_PCREL_MAP), + OPT1("bvh_update_as", DEBUG_BVH_UPDATE_AS), OPT1("bvh_no_build", DEBUG_BVH_NO_BUILD), OPT1("task", DEBUG_TASK), OPT1("mesh", DEBUG_MESH), diff --git a/src/intel/dev/intel_debug.h b/src/intel/dev/intel_debug.h index 79d76dc6e88..ea4241a8b05 100644 --- a/src/intel/dev/intel_debug.h +++ b/src/intel/dev/intel_debug.h @@ -95,6 +95,7 @@ enum intel_debug_flag { DEBUG_BVH_BLAS_IR_AS, DEBUG_BVH_TLAS_IR_AS, DEBUG_BVH_PCREL_MAP, + DEBUG_BVH_UPDATE_AS, DEBUG_BVH_NO_BUILD, DEBUG_NO_SEND_GATHER, DEBUG_NO_VRT, @@ -136,6 +137,7 @@ extern BITSET_WORD intel_debug[BITSET_WORDS(INTEL_DEBUG_MAX)]; INTEL_DEBUG(DEBUG_BVH_TLAS_IR_HDR) || \ INTEL_DEBUG(DEBUG_BVH_BLAS_IR_AS) || \ INTEL_DEBUG(DEBUG_BVH_TLAS_IR_AS) || \ + INTEL_DEBUG(DEBUG_BVH_UPDATE_AS) || \ INTEL_DEBUG(DEBUG_BVH_PCREL_MAP))) extern uint64_t intel_simd; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 7b13878f8d9..99761a6e521 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2567,6 +2567,7 @@ enum bvh_dump_type { BVH_IR_HDR, BVH_IR_AS, BVH_ANV_PCREL, + BVH_ANV_UPDATE }; struct anv_bvh_dump { diff --git a/src/intel/vulkan/anv_util.c b/src/intel/vulkan/anv_util.c index f02a4f09875..47ff08b4d29 100644 --- a/src/intel/vulkan/anv_util.c +++ b/src/intel/vulkan/anv_util.c @@ -295,6 +295,9 @@ create_bvh_dump_file(struct anv_bvh_dump *bvh) case BVH_ANV_PCREL: dump_sub_directory = "BVH_ANV_PCREL"; break; + case BVH_ANV_UPDATE: + dump_sub_directory = "BVH_ANV_UPDATE"; + break; default: UNREACHABLE("invalid dump type"); } diff --git a/src/intel/vulkan/genX_acceleration_structure.c b/src/intel/vulkan/genX_acceleration_structure.c index 02f42c34920..6d09f1efe10 100644 --- a/src/intel/vulkan/genX_acceleration_structure.c +++ b/src/intel/vulkan/genX_acceleration_structure.c @@ -168,8 +168,15 @@ debug_record_as_to_bvh_dump(struct anv_cmd_buffer *cmd_buffer, VkDeviceAddress intermediate_header_addr, VkDeviceAddress intermediate_as_addr, uint32_t leaf_count, - VkGeometryTypeKHR geometry_type) + VkGeometryTypeKHR geometry_type, + bool after_update) { + if (INTEL_DEBUG(DEBUG_BVH_UPDATE_AS) && after_update && + geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) { + add_bvh_dump(cmd_buffer, header_addr, bvh_layout.size, geometry_type, + BVH_ANV_UPDATE); + } + if (INTEL_DEBUG(DEBUG_BVH_PCREL_MAP) && geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) { add_bvh_dump(cmd_buffer, header_addr + bvh_layout.parent_child_map_offset, @@ -543,7 +550,8 @@ anv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_stru if (INTEL_DEBUG_BVH_ANY) { debug_record_as_to_bvh_dump(cmd_buffer, header_addr, bvh_layout, intermediate_header_addr, intermediate_bvh_addr, - state->leaf_node_count, geometry_type); + state->leaf_node_count, geometry_type, + false /* after update */); } } @@ -672,6 +680,14 @@ anv_update_as(VkCommandBuffer commandBuffer, first_id += build_range_info->primitiveCount; } + + if (INTEL_DEBUG_BVH_ANY) { + debug_record_as_to_bvh_dump(cmd_buffer, vk_acceleration_structure_get_va(dst), + bvh_layout, 0, 0, + state->leaf_node_count, + vk_get_as_geometry_type(state->build_info), + true /* after update */); + } } static const struct vk_acceleration_structure_build_ops anv_build_ops = { From be9c89fda7a7c449eb5912f95734b4ff33b3b6c9 Mon Sep 17 00:00:00 2001 From: Sagar Ghuge Date: Mon, 23 Mar 2026 21:12:10 -0700 Subject: [PATCH 8/8] DEBUG: debug commit Signed-off-by: Sagar Ghuge --- src/intel/vulkan/bvh/anv_build_interface.h | 5 +++++ src/intel/vulkan/bvh/anv_bvh.h | 7 ++++++- src/intel/vulkan/bvh/header.comp | 5 +++++ src/intel/vulkan/genX_acceleration_structure.c | 5 +++++ 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/intel/vulkan/bvh/anv_build_interface.h b/src/intel/vulkan/bvh/anv_build_interface.h index eb393553521..850fa942105 100644 --- a/src/intel/vulkan/bvh/anv_build_interface.h +++ b/src/intel/vulkan/bvh/anv_build_interface.h @@ -67,6 +67,11 @@ struct header_args { uint64_t bvh_size; uint8_t is_compacted; + + uint64_t parent_child_map_offset; + uint64_t leaf_block_map_offset; + uint32_t leaf_count; + uint32_t total_nodes; }; #define ANV_COPY_MODE_COPY 0 diff --git a/src/intel/vulkan/bvh/anv_bvh.h b/src/intel/vulkan/bvh/anv_bvh.h index 3f24fb456a2..264c5d6eb73 100644 --- a/src/intel/vulkan/bvh/anv_bvh.h +++ b/src/intel/vulkan/bvh/anv_bvh.h @@ -76,7 +76,12 @@ struct anv_accel_struct_header { uint32_t instance_leaves_offset; - uint32_t padding[40]; + uint64_t parent_child_map_offset; + uint64_t leaf_block_map_offset; + uint32_t leaf_count; + uint32_t total_nodes; + + uint32_t padding[34]; }; /* Mixed internal node with type per child */ diff --git a/src/intel/vulkan/bvh/header.comp b/src/intel/vulkan/bvh/header.comp index 4235a1025c0..0069c0d6641 100644 --- a/src/intel/vulkan/bvh/header.comp +++ b/src/intel/vulkan/bvh/header.comp @@ -34,6 +34,11 @@ main(void) DEREF(args.dst).instance_count = args.instance_count; DEREF(args.dst).instance_leaves_offset = args.instance_leaves_offset; + DEREF(args.dst).parent_child_map_offset = args.parent_child_map_offset, + DEREF(args.dst).leaf_block_map_offset = args.leaf_block_map_offset, + DEREF(args.dst).leaf_count = args.leaf_count, + DEREF(args.dst).total_nodes = args.total_nodes, + /* 128 is local_size_x in copy.comp shader, 16bytes is the amount of data * copied by each iteration of that shader's loop */ diff --git a/src/intel/vulkan/genX_acceleration_structure.c b/src/intel/vulkan/genX_acceleration_structure.c index 6d09f1efe10..8d9d8d19590 100644 --- a/src/intel/vulkan/genX_acceleration_structure.c +++ b/src/intel/vulkan/genX_acceleration_structure.c @@ -542,6 +542,11 @@ anv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_stru .instance_leaves_offset = bvh_layout.instance_leaves_offset, .is_compacted = (state->config.encode_key[0] == 1), .bvh_size = bvh_layout.size, + + .parent_child_map_offset = bvh_layout.parent_child_map_offset, + .leaf_block_map_offset = bvh_layout.leaf_block_map_offset, + .leaf_count = state->leaf_node_count, + .total_nodes = state->leaf_node_count + (MAX2(state->leaf_node_count, 2) - 1), }; anv_bvh_build_set_args(commandBuffer, &args, sizeof(args));