diff --git a/src/intel/vulkan/bvh/anv_build_helpers.h b/src/intel/vulkan/bvh/anv_build_helpers.h index f7793179900..dd047f3e0c3 100644 --- a/src/intel/vulkan/bvh/anv_build_helpers.h +++ b/src/intel/vulkan/bvh/anv_build_helpers.h @@ -18,4 +18,45 @@ TYPE(child_data, 1); TYPE(instance_leaf_part0, 8); TYPE(instance_leaf_part1, 8); +#define ULP 1.1920928955078125e-7f + +/* An offset in 64B blocks from args.output_bvh that points to output of + * encoded nodes. Can be a leaf or internal node. + */ +#define BLOCK uint32_t +#define BLOCK_OFFSET(block) (OFFSET(args.output_bvh, ANV_RT_BLOCK_SIZE * block)) + +uint32_t +get_instance_flag(uint32_t src) +{ + return ((src >> 24) & 0xff); +} + +vk_aabb +conservative_aabb(vk_aabb input_aabb) +{ + vk_aabb out_aabb; + + vec3 reduce_value = max(abs(input_aabb.min), abs(input_aabb.max)); + float err = ULP * max(reduce_value.x, max(reduce_value.y, reduce_value.z)); + + out_aabb.min = input_aabb.min - vec3(err); + out_aabb.max = input_aabb.max + vec3(err); + + return out_aabb; +} + +void +aabb_extend(inout vk_aabb v1, vk_aabb v2) +{ + v1.min = min(v1.min, v2.min); + v1.max = max(v1.max, v2.max); +} + +vec3 +aabb_size(vk_aabb input_aabb) +{ + return input_aabb.max - input_aabb.min; +} + #endif diff --git a/src/intel/vulkan/bvh/anv_bvh.h b/src/intel/vulkan/bvh/anv_bvh.h index 011b449dfd9..b8f67635d99 100644 --- a/src/intel/vulkan/bvh/anv_bvh.h +++ b/src/intel/vulkan/bvh/anv_bvh.h @@ -132,7 +132,9 @@ struct anv_quad_leaf_node { * Reserved (9-bits) */ uint32_t prim_index1_delta; - float v[4][3]; + float v[3][3]; + /* Second triangle coords */ + float v1[3]; }; struct anv_procedural_leaf_node { diff --git a/src/intel/vulkan/bvh/encode.comp b/src/intel/vulkan/bvh/encode.comp index a13b97e44f0..8e41099d646 100644 --- a/src/intel/vulkan/bvh/encode.comp +++ b/src/intel/vulkan/bvh/encode.comp @@ -9,8 +9,8 @@ layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; #include "anv_build_helpers.h" #include "anv_build_interface.h" +#include "encode.h" -#define ULP 1.1920928955078125e-7f #define READY_TO_WRITE(offset) ((offset) < VK_NULL_BVH_OFFSET) #define ASSIGNED_NODE_TO_ENCODE (gl_GlobalInvocationID.x < DEREF(args.header).ir_internal_node_count) @@ -24,12 +24,6 @@ layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; #define IR_NODE uint32_t #define NODE_OFFSET(node) (OFFSET(args.intermediate_bvh, ir_id_to_offset(node))) -/* An offset in 64B blocks from args.output_bvh that points to output of - * encoded nodes. Can be a leaf or internal node. - */ -#define BLOCK uint32_t -#define BLOCK_OFFSET(block) (OFFSET(args.output_bvh, ANV_RT_BLOCK_SIZE * block)) - layout(push_constant) uniform CONSTS { encode_args args; }; @@ -41,12 +35,6 @@ debug_dump(uint32_t offset, uint32_t value) DEREF(msg) = value; } -uint32_t -get_instance_flag(uint32_t src) -{ - return ((src >> 24) & 0xff); -} - struct anv_cluster { /* simd lane inside cluster: 0 .. 7 */ uint32_t idx; @@ -77,153 +65,21 @@ encode_leaf_node(uint32_t type, IR_NODE child, uint64_t dst_node, REF(anv_accel_ if (DEBUG_EXIT_EARLY(type)) return; + switch (type) { case vk_ir_node_triangle: { - REF(anv_quad_leaf_node) quad_leaf = REF(anv_quad_leaf_node)(dst_node); - vk_ir_triangle_node src = DEREF(REF(vk_ir_triangle_node)(src_node)); - uint32_t geometry_id_and_flags = src.geometry_id_and_flags & 0xffffff; - - /* sub-type (4-bit) encoded on 24-bit index */ - geometry_id_and_flags |= (ANV_SUB_TYPE_QUAD & 0xF) << 24; - - if ((src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE) != 0) { - /* Geometry opqaue (1-bit) is encoded on 30-bit index */ - geometry_id_and_flags |= (ANV_GEOMETRY_FLAG_OPAQUE << 30); - } - - /* Disable the second triangle */ - uint32_t prim_index1_delta = 0; - /* For now, blockIncr are all 1, so every quad leaf has its "last" bit set. */ - prim_index1_delta |= (1 << 22); - - DEREF(quad_leaf).prim_index1_delta = prim_index1_delta; - DEREF(quad_leaf).prim_index0 = src.triangle_id; - DEREF(quad_leaf).leaf_desc.geometry_id_and_flags = geometry_id_and_flags; - - /* shaderIndex is typically set to match geomIndex - * Geom mask is default to 0xFF - */ - DEREF(quad_leaf).leaf_desc.shader_index_and_geom_mask = 0xFF000000 | (geometry_id_and_flags & 0xffffff); - - /* Setup single triangle */ - for (uint32_t i = 0; i < 3; i++) { - for (uint32_t j = 0; j < 3; j++) { - DEREF(quad_leaf).v[i][j] = src.coords[i][j]; - } - } + anv_encode_triangle(dst_node, src); break; } case vk_ir_node_aabb: { - REF(anv_procedural_leaf_node) aabb_leaf = REF(anv_procedural_leaf_node)(dst_node); - vk_ir_aabb_node src = DEREF(REF(vk_ir_aabb_node)(src_node)); - uint32_t geometry_id_and_flags = src.geometry_id_and_flags & 0xffffff; - - /* sub-type (4-bit) encoded on 24-bit index */ - geometry_id_and_flags |= (ANV_SUB_TYPE_PROCEDURAL & 0xF) << 24; - - if ((src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE) != 0) { - geometry_id_and_flags |= (ANV_GEOMETRY_FLAG_OPAQUE << 30); - } - - DEREF(aabb_leaf).leaf_desc.geometry_id_and_flags = geometry_id_and_flags; - - /* shaderIndex is typically set to match geomIndex - * Geom mask is default to 0xFF - */ - DEREF(aabb_leaf).leaf_desc.shader_index_and_geom_mask = 0xFF000000 | (geometry_id_and_flags & 0xffffff); - - /* num primitives = 1 */ - uint32_t dw1 = 1; - /* "last" has only 1 bit, and it is set. */ - dw1 |= (1 << 31); - - DEREF(aabb_leaf).DW1 = dw1; - DEREF(aabb_leaf).primIndex[0] = src.primitive_id; + anv_encode_aabb(dst_node, src); break; } case vk_ir_node_instance: { vk_ir_instance_node src = DEREF(REF(vk_ir_instance_node)(src_node)); - - REF(anv_instance_leaf) dst_instance = REF(anv_instance_leaf)(dst_node); - REF(anv_accel_struct_header) blas_header = REF(anv_accel_struct_header)(src.base_ptr); - uint64_t start_node_ptr = uint64_t(src.base_ptr) + args.output_bvh_offset; - -#if GFX_VERx10 >= 300 - DEREF(dst_instance).part0.QW_startNodePtr = start_node_ptr; - uint32_t instance_contribution_and_geom_mask = 0; - instance_contribution_and_geom_mask |= src.sbt_offset_and_flags & 0xffffff; - instance_contribution_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000); - DEREF(dst_instance).part0.DW0 = instance_contribution_and_geom_mask; - - uint32_t inst_flags_and_the_rest = 0; - inst_flags_and_the_rest |= get_instance_flag(src.sbt_offset_and_flags); - inst_flags_and_the_rest |= - ((get_instance_flag(src.sbt_offset_and_flags) & ANV_INSTANCE_FLAG_FORCE_OPAQUE) != 0 ? - ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30; - - DEREF(dst_instance).part0.DW1 = inst_flags_and_the_rest; - -#else - uint32_t shader_index_and_geom_mask = 0; - shader_index_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000); - DEREF(dst_instance).part0.DW0 = shader_index_and_geom_mask; - - uint32_t instance_contribution_and_geom_flags = 0; - instance_contribution_and_geom_flags |= src.sbt_offset_and_flags & 0xffffff; - instance_contribution_and_geom_flags |= - ((get_instance_flag(src.sbt_offset_and_flags) & ANV_INSTANCE_FLAG_FORCE_OPAQUE) != 0 ? - ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30; - DEREF(dst_instance).part0.DW1 = instance_contribution_and_geom_flags; - - DEREF(dst_instance).part0.QW_startNodePtr = - (start_node_ptr & ((1ul << 48) - 1)) | - (uint64_t(get_instance_flag(src.sbt_offset_and_flags)) << 48); -#endif - - mat4 transform = mat4(src.otw_matrix); - - mat4 inv_transform = transpose(inverse(transpose(transform))); - mat3x4 wto_matrix = mat3x4(inv_transform); - mat3x4 otw_matrix = mat3x4(transform); - - /* Arrange WTO transformation matrix in column-major order */ - DEREF(dst_instance).part0.world2obj_vx_x = wto_matrix[0][0]; - DEREF(dst_instance).part0.world2obj_vx_y = wto_matrix[1][0]; - DEREF(dst_instance).part0.world2obj_vx_z = wto_matrix[2][0]; - DEREF(dst_instance).part0.obj2world_p_x = otw_matrix[0][3]; - - DEREF(dst_instance).part0.world2obj_vy_x = wto_matrix[0][1]; - DEREF(dst_instance).part0.world2obj_vy_y = wto_matrix[1][1]; - DEREF(dst_instance).part0.world2obj_vy_z = wto_matrix[2][1]; - DEREF(dst_instance).part0.obj2world_p_y = otw_matrix[1][3]; - - DEREF(dst_instance).part0.world2obj_vz_x = wto_matrix[0][2]; - DEREF(dst_instance).part0.world2obj_vz_y = wto_matrix[1][2]; - DEREF(dst_instance).part0.world2obj_vz_z = wto_matrix[2][2]; - DEREF(dst_instance).part0.obj2world_p_z = otw_matrix[2][3]; - - /* Arrange OTW transformation matrix in column-major order */ - DEREF(dst_instance).part1.obj2world_vx_x = otw_matrix[0][0]; - DEREF(dst_instance).part1.obj2world_vx_y = otw_matrix[1][0]; - DEREF(dst_instance).part1.obj2world_vx_z = otw_matrix[2][0]; - DEREF(dst_instance).part1.world2obj_p_x = wto_matrix[0][3]; - - DEREF(dst_instance).part1.obj2world_vy_x = otw_matrix[0][1]; - DEREF(dst_instance).part1.obj2world_vy_y = otw_matrix[1][1]; - DEREF(dst_instance).part1.obj2world_vy_z = otw_matrix[2][1]; - DEREF(dst_instance).part1.world2obj_p_y = wto_matrix[1][3]; - - DEREF(dst_instance).part1.obj2world_vz_x = otw_matrix[0][2]; - DEREF(dst_instance).part1.obj2world_vz_y = otw_matrix[1][2]; - DEREF(dst_instance).part1.obj2world_vz_z = otw_matrix[2][2]; - DEREF(dst_instance).part1.world2obj_p_z = wto_matrix[2][3]; - - DEREF(dst_instance).part1.bvh_ptr = src.base_ptr; - DEREF(dst_instance).part1.instance_index = src.instance_id; - DEREF(dst_instance).part1.instance_id = src.custom_instance_and_mask & 0xffffff; - + anv_encode_instance(dst_node, src); uint64_t instance_leaves_addr_base = args.instance_leaves_addr; uint64_t slot = ir_id_to_offset(child) / SIZEOF(vk_ir_instance_node); DEREF(INDEX(uint64_t, instance_leaves_addr_base, slot)) = dst_node; @@ -232,33 +88,6 @@ encode_leaf_node(uint32_t type, IR_NODE child, uint64_t dst_node, REF(anv_accel_ } } -vk_aabb -conservative_aabb(vk_aabb input_aabb) -{ - vk_aabb out_aabb; - - vec3 reduce_value = max(abs(input_aabb.min), abs(input_aabb.max)); - float err = ULP * max(reduce_value.x, max(reduce_value.y, reduce_value.z)); - - out_aabb.min = input_aabb.min - vec3(err); - out_aabb.max = input_aabb.max + vec3(err); - - return out_aabb; -} - -void -aabb_extend(inout vk_aabb v1, vk_aabb v2) -{ - v1.min = min(v1.min, v2.min); - v1.max = max(v1.max, v2.max); -} - -vec3 -aabb_size(vk_aabb input_aabb) -{ - return input_aabb.max - input_aabb.min; -} - /* Determine the node_type based on type of its children. * If children are all the same leaves, this internal node is a fat leaf; * Otherwise, it's a mixed node. diff --git a/src/intel/vulkan/bvh/encode.h b/src/intel/vulkan/bvh/encode.h new file mode 100644 index 00000000000..cdf2c7ac10d --- /dev/null +++ b/src/intel/vulkan/bvh/encode.h @@ -0,0 +1,148 @@ +/* Copyright © 2026 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#ifndef ANV_BVH_ENCODE_H +#define ANV_BVH_ENCODE_H + +#include "anv_build_helpers.h" +#include "anv_build_interface.h" + +void +anv_encode_triangle(VOID_REF dst_addr, vk_ir_triangle_node src) +{ + REF(anv_quad_leaf_node) dst = REF(anv_quad_leaf_node)(dst_addr); + + uint32_t geometry_id_and_flags = 0; + geometry_id_and_flags |= (src.geometry_id_and_flags & 0xffffff); + /* Geometry opqaue (1-bit) is encoded on 30-bit index */ + geometry_id_and_flags |= (uint32_t(bool(src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE)) << 30); + + /* Disable the second triangle */ + uint32_t prim_index1_delta = 0; + /* For now, blockIncr are all 1, so every quad leaf has its "last" bit set. */ + prim_index1_delta |= (1 << 22); + + anv_prim_leaf_desc desc; + desc.geometry_id_and_flags = geometry_id_and_flags; + /* shaderIndex is typically set to match geomIndex Geom mask is default to + * 0xFF + */ + desc.shader_index_and_geom_mask = (0xFF000000 | (geometry_id_and_flags & 0xffffff)); + + DEREF(dst).prim_index1_delta = prim_index1_delta; + DEREF(dst).prim_index0 = src.triangle_id; + DEREF(dst).leaf_desc = desc; + /* Setup single triangle */ + DEREF(dst).v = src.coords; +} + +void +anv_encode_aabb(VOID_REF dst_addr, vk_ir_aabb_node src) +{ + REF(anv_procedural_leaf_node) dst = REF(anv_procedural_leaf_node)(dst_addr); + + uint32_t geometry_id_and_flags = 0; + geometry_id_and_flags |= src.geometry_id_and_flags & 0xffffff; + /* Geometry opqaue (1-bit) is encoded on 30-bit index */ + geometry_id_and_flags |= (uint32_t(bool(src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE)) << 30); + + anv_prim_leaf_desc desc; + desc.geometry_id_and_flags = geometry_id_and_flags; + /* shaderIndex is typically set to match geomIndex Geom mask is default to + * 0xFF + */ + desc.shader_index_and_geom_mask = (0xFF000000 | (geometry_id_and_flags & 0xffffff)); + + /* num primitives = 1 */ + uint32_t dw1 = 1; + /* "last" has only 1 bit, and it is set. */ + dw1 |= (1 << 31); + + DEREF(dst).leaf_desc = desc; + DEREF(dst).DW1 = dw1; + DEREF(dst).primIndex[0] = src.primitive_id; +} + +void +anv_encode_instance(VOID_REF dst_addr, vk_ir_instance_node src) +{ + REF(anv_instance_leaf) dst = REF(anv_instance_leaf)(dst_addr); + REF(anv_accel_struct_header) blas_header = REF(anv_accel_struct_header)(src.base_ptr); + uint64_t start_node_ptr = uint64_t(src.base_ptr) + DEREF(blas_header).rootNodeOffset; + +#if GFX_VERx10 >= 300 + DEREF(dst).part0.QW_startNodePtr = start_node_ptr; + uint32_t instance_contribution_and_geom_mask = 0; + instance_contribution_and_geom_mask |= src.sbt_offset_and_flags & 0xffffff; + instance_contribution_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000); + DEREF(dst).part0.DW0 = instance_contribution_and_geom_mask; + + uint32_t inst_flags_and_the_rest = 0; + inst_flags_and_the_rest |= get_instance_flag(src.sbt_offset_and_flags); + inst_flags_and_the_rest |= + ((get_instance_flag(src.sbt_offset_and_flags) & ANV_INSTANCE_FLAG_FORCE_OPAQUE) != 0 ? + ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30; + + DEREF(dst).part0.DW1 = inst_flags_and_the_rest; +#else + uint32_t shader_index_and_geom_mask = 0; + shader_index_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000); + DEREF(dst).part0.DW0 = shader_index_and_geom_mask; + + uint32_t instance_contribution_and_geom_flags = 0; + instance_contribution_and_geom_flags |= src.sbt_offset_and_flags & 0xffffff; + instance_contribution_and_geom_flags |= + ((get_instance_flag(src.sbt_offset_and_flags) & ANV_INSTANCE_FLAG_FORCE_OPAQUE) != 0 ? + ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30; + DEREF(dst).part0.DW1 = instance_contribution_and_geom_flags; + + DEREF(dst).part0.QW_startNodePtr = + (start_node_ptr & ((1ul << 48) - 1)) | + (uint64_t(get_instance_flag(src.sbt_offset_and_flags)) << 48); +#endif + + mat4 transform = mat4(src.otw_matrix); + + mat4 inv_transform = transpose(inverse(transpose(transform))); + mat3x4 wto_matrix = mat3x4(inv_transform); + mat3x4 otw_matrix = mat3x4(transform); + + /* Arrange WTO transformation matrix in column-major order */ + DEREF(dst).part0.world2obj_vx_x = wto_matrix[0][0]; + DEREF(dst).part0.world2obj_vx_y = wto_matrix[1][0]; + DEREF(dst).part0.world2obj_vx_z = wto_matrix[2][0]; + DEREF(dst).part0.obj2world_p_x = otw_matrix[0][3]; + + DEREF(dst).part0.world2obj_vy_x = wto_matrix[0][1]; + DEREF(dst).part0.world2obj_vy_y = wto_matrix[1][1]; + DEREF(dst).part0.world2obj_vy_z = wto_matrix[2][1]; + DEREF(dst).part0.obj2world_p_y = otw_matrix[1][3]; + + DEREF(dst).part0.world2obj_vz_x = wto_matrix[0][2]; + DEREF(dst).part0.world2obj_vz_y = wto_matrix[1][2]; + DEREF(dst).part0.world2obj_vz_z = wto_matrix[2][2]; + DEREF(dst).part0.obj2world_p_z = otw_matrix[2][3]; + + /* Arrange OTW transformation matrix in column-major order */ + DEREF(dst).part1.obj2world_vx_x = otw_matrix[0][0]; + DEREF(dst).part1.obj2world_vx_y = otw_matrix[1][0]; + DEREF(dst).part1.obj2world_vx_z = otw_matrix[2][0]; + DEREF(dst).part1.world2obj_p_x = wto_matrix[0][3]; + + DEREF(dst).part1.obj2world_vy_x = otw_matrix[0][1]; + DEREF(dst).part1.obj2world_vy_y = otw_matrix[1][1]; + DEREF(dst).part1.obj2world_vy_z = otw_matrix[2][1]; + DEREF(dst).part1.world2obj_p_y = wto_matrix[1][3]; + + DEREF(dst).part1.obj2world_vz_x = otw_matrix[0][2]; + DEREF(dst).part1.obj2world_vz_y = otw_matrix[1][2]; + DEREF(dst).part1.obj2world_vz_z = otw_matrix[2][2]; + DEREF(dst).part1.world2obj_p_z = wto_matrix[2][3]; + + DEREF(dst).part1.bvh_ptr = src.base_ptr; + DEREF(dst).part1.instance_index = src.instance_id; + DEREF(dst).part1.instance_id = src.custom_instance_and_mask & 0xffffff; +} + +#endif diff --git a/src/intel/vulkan/bvh/meson.build b/src/intel/vulkan/bvh/meson.build index fcfbfc908be..81637140d3e 100644 --- a/src/intel/vulkan/bvh/meson.build +++ b/src/intel/vulkan/bvh/meson.build @@ -23,6 +23,7 @@ anv_bvh_includes = files( 'anv_build_helpers.h', 'anv_build_interface.h', 'anv_bvh.h', + 'encode.h', ) foreach shader : bvh_shaders