mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 02:38:04 +02:00
Merge branch 'review/update_bvh' into 'main'
anv: Implement BVH update See merge request mesa/mesa!39617
This commit is contained in:
commit
5ff7913aeb
16 changed files with 823 additions and 204 deletions
|
|
@ -108,6 +108,8 @@ static const struct debug_control_bitset debug_control[] = {
|
|||
OPT1("bvh_tlas_ir_hdr", DEBUG_BVH_TLAS_IR_HDR),
|
||||
OPT1("bvh_blas_ir_as", DEBUG_BVH_BLAS_IR_AS),
|
||||
OPT1("bvh_tlas_ir_as", DEBUG_BVH_TLAS_IR_AS),
|
||||
OPT1("bvh_pcrel_map", DEBUG_BVH_PCREL_MAP),
|
||||
OPT1("bvh_update_as", DEBUG_BVH_UPDATE_AS),
|
||||
OPT1("bvh_no_build", DEBUG_BVH_NO_BUILD),
|
||||
OPT1("task", DEBUG_TASK),
|
||||
OPT1("mesh", DEBUG_MESH),
|
||||
|
|
|
|||
|
|
@ -94,6 +94,8 @@ enum intel_debug_flag {
|
|||
DEBUG_BVH_TLAS_IR_HDR,
|
||||
DEBUG_BVH_BLAS_IR_AS,
|
||||
DEBUG_BVH_TLAS_IR_AS,
|
||||
DEBUG_BVH_PCREL_MAP,
|
||||
DEBUG_BVH_UPDATE_AS,
|
||||
DEBUG_BVH_NO_BUILD,
|
||||
DEBUG_NO_SEND_GATHER,
|
||||
DEBUG_NO_VRT,
|
||||
|
|
@ -134,7 +136,9 @@ extern BITSET_WORD intel_debug[BITSET_WORDS(INTEL_DEBUG_MAX)];
|
|||
INTEL_DEBUG(DEBUG_BVH_BLAS_IR_HDR) || \
|
||||
INTEL_DEBUG(DEBUG_BVH_TLAS_IR_HDR) || \
|
||||
INTEL_DEBUG(DEBUG_BVH_BLAS_IR_AS) || \
|
||||
INTEL_DEBUG(DEBUG_BVH_TLAS_IR_AS)))
|
||||
INTEL_DEBUG(DEBUG_BVH_TLAS_IR_AS) || \
|
||||
INTEL_DEBUG(DEBUG_BVH_UPDATE_AS) || \
|
||||
INTEL_DEBUG(DEBUG_BVH_PCREL_MAP)))
|
||||
|
||||
extern uint64_t intel_simd;
|
||||
extern uint32_t intel_debug_bkp_before_draw_count;
|
||||
|
|
|
|||
|
|
@ -502,6 +502,7 @@ CREATE_DUAL_EVENT_CALLBACK(as_morton_sort, INTEL_DS_QUEUE_STAGE_AS)
|
|||
CREATE_DUAL_EVENT_CALLBACK(as_lbvh_build_internal, INTEL_DS_QUEUE_STAGE_AS)
|
||||
CREATE_DUAL_EVENT_CALLBACK(as_ploc_build_internal, INTEL_DS_QUEUE_STAGE_AS)
|
||||
CREATE_DUAL_EVENT_CALLBACK(as_encode, INTEL_DS_QUEUE_STAGE_AS)
|
||||
CREATE_DUAL_EVENT_CALLBACK(as_update, INTEL_DS_QUEUE_STAGE_AS)
|
||||
CREATE_DUAL_EVENT_CALLBACK(as_copy, INTEL_DS_QUEUE_STAGE_AS)
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -266,6 +266,11 @@ def define_tracepoints(args):
|
|||
Arg(type='uint32_t', var='key', c_format='%x'),
|
||||
Arg(type='uint32_t', var='n_leaves', c_format='%u'),
|
||||
Arg(type='uint32_t', var='n_ir_leaves', c_format='%u')])
|
||||
begin_end_tp('as_update', repeat_last=True,
|
||||
tp_args=[Arg(type='uint32_t', var='pass', c_format='%u'),
|
||||
Arg(type='uint32_t', var='key', c_format='%x'),
|
||||
Arg(type='uint32_t', var='n_leaves', c_format='%u'),
|
||||
Arg(type='uint32_t', var='n_ir_leaves', c_format='%u')])
|
||||
begin_end_tp('as_copy', repeat_last=True)
|
||||
|
||||
begin_end_tp('rays',
|
||||
|
|
|
|||
|
|
@ -2517,12 +2517,15 @@ enum anv_object_key_bvh_type {
|
|||
ANV_OBJECT_KEY_BVH_ENCODE = VK_META_OBJECT_KEY_DRIVER_OFFSET,
|
||||
ANV_OBJECT_KEY_BVH_HEADER,
|
||||
ANV_OBJECT_KEY_BVH_COPY,
|
||||
ANV_OBJECT_KEY_BVH_UPDATE,
|
||||
};
|
||||
|
||||
enum bvh_dump_type {
|
||||
BVH_ANV,
|
||||
BVH_IR_HDR,
|
||||
BVH_IR_AS
|
||||
BVH_IR_AS,
|
||||
BVH_ANV_PCREL,
|
||||
BVH_ANV_UPDATE
|
||||
};
|
||||
|
||||
struct anv_bvh_dump {
|
||||
|
|
|
|||
|
|
@ -293,6 +293,12 @@ create_bvh_dump_file(struct anv_bvh_dump *bvh)
|
|||
case BVH_IR_AS:
|
||||
dump_sub_directory = "BVH_IR_AS";
|
||||
break;
|
||||
case BVH_ANV_PCREL:
|
||||
dump_sub_directory = "BVH_ANV_PCREL";
|
||||
break;
|
||||
case BVH_ANV_UPDATE:
|
||||
dump_sub_directory = "BVH_ANV_UPDATE";
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("invalid dump type");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,4 +18,45 @@ TYPE(child_data, 1);
|
|||
TYPE(instance_leaf_part0, 8);
|
||||
TYPE(instance_leaf_part1, 8);
|
||||
|
||||
#define ULP 1.1920928955078125e-7f
|
||||
|
||||
/* An offset in 64B blocks from args.output_bvh that points to output of
|
||||
* encoded nodes. Can be a leaf or internal node.
|
||||
*/
|
||||
#define BLOCK uint32_t
|
||||
#define BLOCK_OFFSET(block) (OFFSET(args.output_bvh, ANV_RT_BLOCK_SIZE * block))
|
||||
|
||||
uint32_t
|
||||
get_instance_flag(uint32_t src)
|
||||
{
|
||||
return ((src >> 24) & 0xff);
|
||||
}
|
||||
|
||||
vk_aabb
|
||||
conservative_aabb(vk_aabb input_aabb)
|
||||
{
|
||||
vk_aabb out_aabb;
|
||||
|
||||
vec3 reduce_value = max(abs(input_aabb.min), abs(input_aabb.max));
|
||||
float err = ULP * max(reduce_value.x, max(reduce_value.y, reduce_value.z));
|
||||
|
||||
out_aabb.min = input_aabb.min - vec3(err);
|
||||
out_aabb.max = input_aabb.max + vec3(err);
|
||||
|
||||
return out_aabb;
|
||||
}
|
||||
|
||||
void
|
||||
aabb_extend(inout vk_aabb v1, vk_aabb v2)
|
||||
{
|
||||
v1.min = min(v1.min, v2.min);
|
||||
v1.max = max(v1.max, v2.max);
|
||||
}
|
||||
|
||||
vec3
|
||||
aabb_size(vk_aabb input_aabb)
|
||||
{
|
||||
return input_aabb.max - input_aabb.min;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@
|
|||
#ifndef ANV_BVH_BUILD_INTERFACE_H
|
||||
#define ANV_BVH_BUILD_INTERFACE_H
|
||||
|
||||
#include "vk_build_interface.h"
|
||||
|
||||
#ifdef VULKAN
|
||||
#include "anv_build_helpers.h"
|
||||
#else
|
||||
|
|
@ -15,6 +17,21 @@
|
|||
#define VOID_REF uint64_t
|
||||
#endif
|
||||
|
||||
#define ANV_BUILD_FLAG_WRITE_LOOKUP_MAPS_FOR_UPDATE (1u << (VK_BUILD_FLAG_COUNT + 0))
|
||||
|
||||
struct update_args {
|
||||
VOID_REF output_bvh;
|
||||
REF(uint32_t) internal_ready_count;
|
||||
REF(vk_aabb) aabb_scratch;
|
||||
uint32_t leaf_node_count;
|
||||
uint32_t primitive_count;
|
||||
uint32_t output_bvh_offset;
|
||||
VOID_REF parent_child_map;
|
||||
VOID_REF leaf_block_offset_map;
|
||||
|
||||
vk_bvh_geometry_data geom_data;
|
||||
};
|
||||
|
||||
struct encode_args {
|
||||
/* Address within the IR BVH, marking the start of leaves/internal nodes. */
|
||||
VOID_REF intermediate_bvh;
|
||||
|
|
@ -32,6 +49,9 @@ struct encode_args {
|
|||
|
||||
uint32_t leaf_node_count;
|
||||
uint32_t geometry_type;
|
||||
|
||||
VOID_REF parent_child_map;
|
||||
VOID_REF leaf_block_offset_map;
|
||||
};
|
||||
|
||||
struct header_args {
|
||||
|
|
@ -47,6 +67,11 @@ struct header_args {
|
|||
|
||||
uint64_t bvh_size;
|
||||
uint8_t is_compacted;
|
||||
|
||||
uint64_t parent_child_map_offset;
|
||||
uint64_t leaf_block_map_offset;
|
||||
uint32_t leaf_count;
|
||||
uint32_t total_nodes;
|
||||
};
|
||||
|
||||
#define ANV_COPY_MODE_COPY 0
|
||||
|
|
|
|||
|
|
@ -76,7 +76,12 @@ struct anv_accel_struct_header {
|
|||
|
||||
uint32_t instance_leaves_offset;
|
||||
|
||||
uint32_t padding[40];
|
||||
uint64_t parent_child_map_offset;
|
||||
uint64_t leaf_block_map_offset;
|
||||
uint32_t leaf_count;
|
||||
uint32_t total_nodes;
|
||||
|
||||
uint32_t padding[34];
|
||||
};
|
||||
|
||||
/* Mixed internal node with type per child */
|
||||
|
|
@ -132,7 +137,9 @@ struct anv_quad_leaf_node {
|
|||
* Reserved (9-bits)
|
||||
*/
|
||||
uint32_t prim_index1_delta;
|
||||
float v[4][3];
|
||||
float v[3][3];
|
||||
/* Second triangle coords */
|
||||
float v1[3];
|
||||
};
|
||||
|
||||
struct anv_procedural_leaf_node {
|
||||
|
|
@ -170,6 +177,12 @@ struct anv_internal_node {
|
|||
*/
|
||||
uint8_t node_type;
|
||||
|
||||
/* Note: We are going to use this field to track number of children this
|
||||
* internal node has.
|
||||
*
|
||||
* XXX: Keep an eye out on this field for future platforms in case if anything
|
||||
* changes.
|
||||
*/
|
||||
uint8_t reserved;
|
||||
|
||||
/* 2^exp_x is the size of the grid in x dimension */
|
||||
|
|
@ -325,13 +338,14 @@ struct anv_instance_leaf {
|
|||
| start with root node, |
|
||||
| followed by interleaving |
|
||||
| internal nodes and leaves |
|
||||
|-------------------------------|
|
||||
| padding to align to |
|
||||
| 64 bytes boundary |
|
||||
|-------------------------------| bvh_layout.instance_leaves_offset
|
||||
| For a TLAS, the pointers |
|
||||
| to all anv_instance_leaves |
|
||||
| For a BLAS, nothing here |
|
||||
|-------------------------------| bvh_layout.parent_child_map_offset
|
||||
| Parent - child map |
|
||||
|-------------------------------| bvh_layout.leaf_block_map_offset
|
||||
| Leaf block offset map |
|
||||
|-------------------------------|
|
||||
| padding to align to |
|
||||
| 64 bytes boundary | bvh_layout.size
|
||||
|
|
@ -344,13 +358,23 @@ struct bvh_layout {
|
|||
*/
|
||||
uint64_t bvh_offset;
|
||||
|
||||
/* This tracks pointers to all anv_instance_leaves for BLAS. */
|
||||
uint64_t instance_leaves_offset;
|
||||
|
||||
/* This map stores parent BVH offset for each child
|
||||
*
|
||||
* Lower 26bits - parent block index
|
||||
* upper 6bits - parent child slot index
|
||||
* */
|
||||
uint64_t parent_child_map_offset;
|
||||
|
||||
/* This map stores BVH block index for each leaf id (IR ID) */
|
||||
uint64_t leaf_block_map_offset;
|
||||
|
||||
/* Total size = bvh_offset + leaves + internal_nodes (assuming there's no
|
||||
* internal node collpased)
|
||||
*/
|
||||
uint64_t size;
|
||||
|
||||
/* This tracks pointers to all anv_instance_leaves for BLAS. */
|
||||
uint64_t instance_leaves_offset;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -9,8 +9,8 @@ layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;
|
|||
|
||||
#include "anv_build_helpers.h"
|
||||
#include "anv_build_interface.h"
|
||||
#include "encode.h"
|
||||
|
||||
#define ULP 1.1920928955078125e-7f
|
||||
#define READY_TO_WRITE(offset) ((offset) < VK_NULL_BVH_OFFSET)
|
||||
#define ASSIGNED_NODE_TO_ENCODE (gl_GlobalInvocationID.x < DEREF(args.header).ir_internal_node_count)
|
||||
|
||||
|
|
@ -24,12 +24,6 @@ layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;
|
|||
#define IR_NODE uint32_t
|
||||
#define NODE_OFFSET(node) (OFFSET(args.intermediate_bvh, ir_id_to_offset(node)))
|
||||
|
||||
/* An offset in 64B blocks from args.output_bvh that points to output of
|
||||
* encoded nodes. Can be a leaf or internal node.
|
||||
*/
|
||||
#define BLOCK uint32_t
|
||||
#define BLOCK_OFFSET(block) (OFFSET(args.output_bvh, ANV_RT_BLOCK_SIZE * block))
|
||||
|
||||
layout(push_constant) uniform CONSTS {
|
||||
encode_args args;
|
||||
};
|
||||
|
|
@ -41,12 +35,6 @@ debug_dump(uint32_t offset, uint32_t value)
|
|||
DEREF(msg) = value;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
get_instance_flag(uint32_t src)
|
||||
{
|
||||
return ((src >> 24) & 0xff);
|
||||
}
|
||||
|
||||
struct anv_cluster {
|
||||
/* simd lane inside cluster: 0 .. 7 */
|
||||
uint32_t idx;
|
||||
|
|
@ -77,153 +65,21 @@ encode_leaf_node(uint32_t type, IR_NODE child, uint64_t dst_node, REF(anv_accel_
|
|||
|
||||
if (DEBUG_EXIT_EARLY(type))
|
||||
return;
|
||||
|
||||
switch (type) {
|
||||
case vk_ir_node_triangle: {
|
||||
REF(anv_quad_leaf_node) quad_leaf = REF(anv_quad_leaf_node)(dst_node);
|
||||
|
||||
vk_ir_triangle_node src = DEREF(REF(vk_ir_triangle_node)(src_node));
|
||||
uint32_t geometry_id_and_flags = src.geometry_id_and_flags & 0xffffff;
|
||||
|
||||
/* sub-type (4-bit) encoded on 24-bit index */
|
||||
geometry_id_and_flags |= (ANV_SUB_TYPE_QUAD & 0xF) << 24;
|
||||
|
||||
if ((src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE) != 0) {
|
||||
/* Geometry opqaue (1-bit) is encoded on 30-bit index */
|
||||
geometry_id_and_flags |= (ANV_GEOMETRY_FLAG_OPAQUE << 30);
|
||||
}
|
||||
|
||||
/* Disable the second triangle */
|
||||
uint32_t prim_index1_delta = 0;
|
||||
/* For now, blockIncr are all 1, so every quad leaf has its "last" bit set. */
|
||||
prim_index1_delta |= (1 << 22);
|
||||
|
||||
DEREF(quad_leaf).prim_index1_delta = prim_index1_delta;
|
||||
DEREF(quad_leaf).prim_index0 = src.triangle_id;
|
||||
DEREF(quad_leaf).leaf_desc.geometry_id_and_flags = geometry_id_and_flags;
|
||||
|
||||
/* shaderIndex is typically set to match geomIndex
|
||||
* Geom mask is default to 0xFF
|
||||
*/
|
||||
DEREF(quad_leaf).leaf_desc.shader_index_and_geom_mask = 0xFF000000 | (geometry_id_and_flags & 0xffffff);
|
||||
|
||||
/* Setup single triangle */
|
||||
for (uint32_t i = 0; i < 3; i++) {
|
||||
for (uint32_t j = 0; j < 3; j++) {
|
||||
DEREF(quad_leaf).v[i][j] = src.coords[i][j];
|
||||
}
|
||||
}
|
||||
anv_encode_triangle(dst_node, src);
|
||||
break;
|
||||
}
|
||||
case vk_ir_node_aabb: {
|
||||
REF(anv_procedural_leaf_node) aabb_leaf = REF(anv_procedural_leaf_node)(dst_node);
|
||||
|
||||
vk_ir_aabb_node src = DEREF(REF(vk_ir_aabb_node)(src_node));
|
||||
uint32_t geometry_id_and_flags = src.geometry_id_and_flags & 0xffffff;
|
||||
|
||||
/* sub-type (4-bit) encoded on 24-bit index */
|
||||
geometry_id_and_flags |= (ANV_SUB_TYPE_PROCEDURAL & 0xF) << 24;
|
||||
|
||||
if ((src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE) != 0) {
|
||||
geometry_id_and_flags |= (ANV_GEOMETRY_FLAG_OPAQUE << 30);
|
||||
}
|
||||
|
||||
DEREF(aabb_leaf).leaf_desc.geometry_id_and_flags = geometry_id_and_flags;
|
||||
|
||||
/* shaderIndex is typically set to match geomIndex
|
||||
* Geom mask is default to 0xFF
|
||||
*/
|
||||
DEREF(aabb_leaf).leaf_desc.shader_index_and_geom_mask = 0xFF000000 | (geometry_id_and_flags & 0xffffff);
|
||||
|
||||
/* num primitives = 1 */
|
||||
uint32_t dw1 = 1;
|
||||
/* "last" has only 1 bit, and it is set. */
|
||||
dw1 |= (1 << 31);
|
||||
|
||||
DEREF(aabb_leaf).DW1 = dw1;
|
||||
DEREF(aabb_leaf).primIndex[0] = src.primitive_id;
|
||||
anv_encode_aabb(dst_node, src);
|
||||
break;
|
||||
}
|
||||
case vk_ir_node_instance: {
|
||||
vk_ir_instance_node src = DEREF(REF(vk_ir_instance_node)(src_node));
|
||||
|
||||
REF(anv_instance_leaf) dst_instance = REF(anv_instance_leaf)(dst_node);
|
||||
REF(anv_accel_struct_header) blas_header = REF(anv_accel_struct_header)(src.base_ptr);
|
||||
uint64_t start_node_ptr = uint64_t(src.base_ptr) + args.output_bvh_offset;
|
||||
|
||||
#if GFX_VERx10 >= 300
|
||||
DEREF(dst_instance).part0.QW_startNodePtr = start_node_ptr;
|
||||
uint32_t instance_contribution_and_geom_mask = 0;
|
||||
instance_contribution_and_geom_mask |= src.sbt_offset_and_flags & 0xffffff;
|
||||
instance_contribution_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000);
|
||||
DEREF(dst_instance).part0.DW0 = instance_contribution_and_geom_mask;
|
||||
|
||||
uint32_t inst_flags_and_the_rest = 0;
|
||||
inst_flags_and_the_rest |= get_instance_flag(src.sbt_offset_and_flags);
|
||||
inst_flags_and_the_rest |=
|
||||
((get_instance_flag(src.sbt_offset_and_flags) & ANV_INSTANCE_FLAG_FORCE_OPAQUE) != 0 ?
|
||||
ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30;
|
||||
|
||||
DEREF(dst_instance).part0.DW1 = inst_flags_and_the_rest;
|
||||
|
||||
#else
|
||||
uint32_t shader_index_and_geom_mask = 0;
|
||||
shader_index_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000);
|
||||
DEREF(dst_instance).part0.DW0 = shader_index_and_geom_mask;
|
||||
|
||||
uint32_t instance_contribution_and_geom_flags = 0;
|
||||
instance_contribution_and_geom_flags |= src.sbt_offset_and_flags & 0xffffff;
|
||||
instance_contribution_and_geom_flags |=
|
||||
((get_instance_flag(src.sbt_offset_and_flags) & ANV_INSTANCE_FLAG_FORCE_OPAQUE) != 0 ?
|
||||
ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30;
|
||||
DEREF(dst_instance).part0.DW1 = instance_contribution_and_geom_flags;
|
||||
|
||||
DEREF(dst_instance).part0.QW_startNodePtr =
|
||||
(start_node_ptr & ((1ul << 48) - 1)) |
|
||||
(uint64_t(get_instance_flag(src.sbt_offset_and_flags)) << 48);
|
||||
#endif
|
||||
|
||||
mat4 transform = mat4(src.otw_matrix);
|
||||
|
||||
mat4 inv_transform = transpose(inverse(transpose(transform)));
|
||||
mat3x4 wto_matrix = mat3x4(inv_transform);
|
||||
mat3x4 otw_matrix = mat3x4(transform);
|
||||
|
||||
/* Arrange WTO transformation matrix in column-major order */
|
||||
DEREF(dst_instance).part0.world2obj_vx_x = wto_matrix[0][0];
|
||||
DEREF(dst_instance).part0.world2obj_vx_y = wto_matrix[1][0];
|
||||
DEREF(dst_instance).part0.world2obj_vx_z = wto_matrix[2][0];
|
||||
DEREF(dst_instance).part0.obj2world_p_x = otw_matrix[0][3];
|
||||
|
||||
DEREF(dst_instance).part0.world2obj_vy_x = wto_matrix[0][1];
|
||||
DEREF(dst_instance).part0.world2obj_vy_y = wto_matrix[1][1];
|
||||
DEREF(dst_instance).part0.world2obj_vy_z = wto_matrix[2][1];
|
||||
DEREF(dst_instance).part0.obj2world_p_y = otw_matrix[1][3];
|
||||
|
||||
DEREF(dst_instance).part0.world2obj_vz_x = wto_matrix[0][2];
|
||||
DEREF(dst_instance).part0.world2obj_vz_y = wto_matrix[1][2];
|
||||
DEREF(dst_instance).part0.world2obj_vz_z = wto_matrix[2][2];
|
||||
DEREF(dst_instance).part0.obj2world_p_z = otw_matrix[2][3];
|
||||
|
||||
/* Arrange OTW transformation matrix in column-major order */
|
||||
DEREF(dst_instance).part1.obj2world_vx_x = otw_matrix[0][0];
|
||||
DEREF(dst_instance).part1.obj2world_vx_y = otw_matrix[1][0];
|
||||
DEREF(dst_instance).part1.obj2world_vx_z = otw_matrix[2][0];
|
||||
DEREF(dst_instance).part1.world2obj_p_x = wto_matrix[0][3];
|
||||
|
||||
DEREF(dst_instance).part1.obj2world_vy_x = otw_matrix[0][1];
|
||||
DEREF(dst_instance).part1.obj2world_vy_y = otw_matrix[1][1];
|
||||
DEREF(dst_instance).part1.obj2world_vy_z = otw_matrix[2][1];
|
||||
DEREF(dst_instance).part1.world2obj_p_y = wto_matrix[1][3];
|
||||
|
||||
DEREF(dst_instance).part1.obj2world_vz_x = otw_matrix[0][2];
|
||||
DEREF(dst_instance).part1.obj2world_vz_y = otw_matrix[1][2];
|
||||
DEREF(dst_instance).part1.obj2world_vz_z = otw_matrix[2][2];
|
||||
DEREF(dst_instance).part1.world2obj_p_z = wto_matrix[2][3];
|
||||
|
||||
DEREF(dst_instance).part1.bvh_ptr = src.base_ptr;
|
||||
DEREF(dst_instance).part1.instance_index = src.instance_id;
|
||||
DEREF(dst_instance).part1.instance_id = src.custom_instance_and_mask & 0xffffff;
|
||||
|
||||
anv_encode_instance(dst_node, src);
|
||||
uint64_t instance_leaves_addr_base = args.instance_leaves_addr;
|
||||
uint64_t slot = ir_id_to_offset(child) / SIZEOF(vk_ir_instance_node);
|
||||
DEREF(INDEX(uint64_t, instance_leaves_addr_base, slot)) = dst_node;
|
||||
|
|
@ -232,33 +88,6 @@ encode_leaf_node(uint32_t type, IR_NODE child, uint64_t dst_node, REF(anv_accel_
|
|||
}
|
||||
}
|
||||
|
||||
vk_aabb
|
||||
conservative_aabb(vk_aabb input_aabb)
|
||||
{
|
||||
vk_aabb out_aabb;
|
||||
|
||||
vec3 reduce_value = max(abs(input_aabb.min), abs(input_aabb.max));
|
||||
float err = ULP * max(reduce_value.x, max(reduce_value.y, reduce_value.z));
|
||||
|
||||
out_aabb.min = input_aabb.min - vec3(err);
|
||||
out_aabb.max = input_aabb.max + vec3(err);
|
||||
|
||||
return out_aabb;
|
||||
}
|
||||
|
||||
void
|
||||
aabb_extend(inout vk_aabb v1, vk_aabb v2)
|
||||
{
|
||||
v1.min = min(v1.min, v2.min);
|
||||
v1.max = max(v1.max, v2.max);
|
||||
}
|
||||
|
||||
vec3
|
||||
aabb_size(vk_aabb input_aabb)
|
||||
{
|
||||
return input_aabb.max - input_aabb.min;
|
||||
}
|
||||
|
||||
/* Determine the node_type based on type of its children.
|
||||
* If children are all the same leaves, this internal node is a fat leaf;
|
||||
* Otherwise, it's a mixed node.
|
||||
|
|
@ -353,6 +182,9 @@ encode_internal_node(uint32_t child, uint32_t child_block_offset_from_internal_n
|
|||
DEREF(dst_node).exp_z = exp_i8[2];
|
||||
DEREF(dst_node).node_mask = uint8_t(0xff);
|
||||
DEREF(dst_node).node_type = node_type;
|
||||
|
||||
/* Using reserved field to track number of children. */
|
||||
DEREF(dst_node).reserved = uint8_t(child_count);
|
||||
}
|
||||
|
||||
child_aabb = conservative_aabb(child_aabb);
|
||||
|
|
@ -530,6 +362,9 @@ main()
|
|||
/* Tracks BLOCK where the next children should be encoded. */
|
||||
DEREF(args.header).dst_node_offset = 1;
|
||||
DEREF(header).instance_count = 0;
|
||||
if (VK_BUILD_FLAG(ANV_BUILD_FLAG_WRITE_LOOKUP_MAPS_FOR_UPDATE)) {
|
||||
DEREF(INDEX(uint32_t, args.parent_child_map, 0)) = VK_NULL_BVH_OFFSET;
|
||||
}
|
||||
}
|
||||
|
||||
IR_NODE children[6] = {VK_BVH_INVALID_NODE, VK_BVH_INVALID_NODE,
|
||||
|
|
@ -612,7 +447,6 @@ main()
|
|||
REF(vk_ir_box_node)NODE_OFFSET(children[i]);
|
||||
DEREF(child_node).bvh_offset = child_offset;
|
||||
}
|
||||
|
||||
child_offset += (type == vk_ir_node_instance) ? 2 : 1;
|
||||
}
|
||||
|
||||
|
|
@ -661,9 +495,30 @@ main()
|
|||
child_aabb = DEREF(REF(vk_ir_node)NODE_OFFSET(child)).aabb;
|
||||
|
||||
uint32_t type = ir_id_to_type(child);
|
||||
if (child != VK_BVH_INVALID_NODE && type != vk_ir_node_internal)
|
||||
encode_leaf_node(type, child, BLOCK_OFFSET(child_block),
|
||||
header);
|
||||
if (VK_BUILD_FLAG(ANV_BUILD_FLAG_WRITE_LOOKUP_MAPS_FOR_UPDATE)) {
|
||||
if (child != VK_BVH_INVALID_NODE &&
|
||||
(type == vk_ir_node_triangle || type == vk_ir_node_aabb)) {
|
||||
uint32_t ir_offset = ir_id_to_offset(child);
|
||||
uint32_t leaf_id = ir_offset / intermediate_leaf_node_size;
|
||||
/* Block offset 0 is assigned to root, so avoid accidental
|
||||
* assignment.
|
||||
*/
|
||||
DEREF(INDEX(uint32_t, args.leaf_block_offset_map, leaf_id)) =
|
||||
(child_block != 0) ? child_block : VK_NULL_BVH_OFFSET;
|
||||
}
|
||||
|
||||
/* Track each children's parent in the map. */
|
||||
if (child != VK_BVH_INVALID_NODE && type != vk_ir_node_instance) {
|
||||
uint32_t pcm = 0;
|
||||
pcm = internal_node_block | (cluster.idx << 26);
|
||||
DEREF(INDEX(uint32_t, args.parent_child_map, child_block)) = pcm;
|
||||
}
|
||||
}
|
||||
|
||||
if (child != VK_BVH_INVALID_NODE && type != vk_ir_node_internal) {
|
||||
encode_leaf_node(type, child,
|
||||
BLOCK_OFFSET(child_block), header);
|
||||
}
|
||||
}
|
||||
|
||||
BLOCK child_block_offset =
|
||||
|
|
|
|||
148
src/intel/vulkan/bvh/encode.h
Normal file
148
src/intel/vulkan/bvh/encode.h
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
/* Copyright © 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef ANV_BVH_ENCODE_H
|
||||
#define ANV_BVH_ENCODE_H
|
||||
|
||||
#include "anv_build_helpers.h"
|
||||
#include "anv_build_interface.h"
|
||||
|
||||
void
|
||||
anv_encode_triangle(VOID_REF dst_addr, vk_ir_triangle_node src)
|
||||
{
|
||||
REF(anv_quad_leaf_node) dst = REF(anv_quad_leaf_node)(dst_addr);
|
||||
|
||||
uint32_t geometry_id_and_flags = 0;
|
||||
geometry_id_and_flags |= (src.geometry_id_and_flags & 0xffffff);
|
||||
/* Geometry opqaue (1-bit) is encoded on 30-bit index */
|
||||
geometry_id_and_flags |= (uint32_t(bool(src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE)) << 30);
|
||||
|
||||
/* Disable the second triangle */
|
||||
uint32_t prim_index1_delta = 0;
|
||||
/* For now, blockIncr are all 1, so every quad leaf has its "last" bit set. */
|
||||
prim_index1_delta |= (1 << 22);
|
||||
|
||||
anv_prim_leaf_desc desc;
|
||||
desc.geometry_id_and_flags = geometry_id_and_flags;
|
||||
/* shaderIndex is typically set to match geomIndex Geom mask is default to
|
||||
* 0xFF
|
||||
*/
|
||||
desc.shader_index_and_geom_mask = (0xFF000000 | (geometry_id_and_flags & 0xffffff));
|
||||
|
||||
DEREF(dst).prim_index1_delta = prim_index1_delta;
|
||||
DEREF(dst).prim_index0 = src.triangle_id;
|
||||
DEREF(dst).leaf_desc = desc;
|
||||
/* Setup single triangle */
|
||||
DEREF(dst).v = src.coords;
|
||||
}
|
||||
|
||||
void
|
||||
anv_encode_aabb(VOID_REF dst_addr, vk_ir_aabb_node src)
|
||||
{
|
||||
REF(anv_procedural_leaf_node) dst = REF(anv_procedural_leaf_node)(dst_addr);
|
||||
|
||||
uint32_t geometry_id_and_flags = 0;
|
||||
geometry_id_and_flags |= src.geometry_id_and_flags & 0xffffff;
|
||||
/* Geometry opqaue (1-bit) is encoded on 30-bit index */
|
||||
geometry_id_and_flags |= (uint32_t(bool(src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE)) << 30);
|
||||
|
||||
anv_prim_leaf_desc desc;
|
||||
desc.geometry_id_and_flags = geometry_id_and_flags;
|
||||
/* shaderIndex is typically set to match geomIndex Geom mask is default to
|
||||
* 0xFF
|
||||
*/
|
||||
desc.shader_index_and_geom_mask = (0xFF000000 | (geometry_id_and_flags & 0xffffff));
|
||||
|
||||
/* num primitives = 1 */
|
||||
uint32_t dw1 = 1;
|
||||
/* "last" has only 1 bit, and it is set. */
|
||||
dw1 |= (1 << 31);
|
||||
|
||||
DEREF(dst).leaf_desc = desc;
|
||||
DEREF(dst).DW1 = dw1;
|
||||
DEREF(dst).primIndex[0] = src.primitive_id;
|
||||
}
|
||||
|
||||
void
|
||||
anv_encode_instance(VOID_REF dst_addr, vk_ir_instance_node src)
|
||||
{
|
||||
REF(anv_instance_leaf) dst = REF(anv_instance_leaf)(dst_addr);
|
||||
REF(anv_accel_struct_header) blas_header = REF(anv_accel_struct_header)(src.base_ptr);
|
||||
uint64_t start_node_ptr = uint64_t(src.base_ptr) + DEREF(blas_header).rootNodeOffset;
|
||||
|
||||
#if GFX_VERx10 >= 300
|
||||
DEREF(dst).part0.QW_startNodePtr = start_node_ptr;
|
||||
uint32_t instance_contribution_and_geom_mask = 0;
|
||||
instance_contribution_and_geom_mask |= src.sbt_offset_and_flags & 0xffffff;
|
||||
instance_contribution_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000);
|
||||
DEREF(dst).part0.DW0 = instance_contribution_and_geom_mask;
|
||||
|
||||
uint32_t inst_flags_and_the_rest = 0;
|
||||
inst_flags_and_the_rest |= get_instance_flag(src.sbt_offset_and_flags);
|
||||
inst_flags_and_the_rest |=
|
||||
((get_instance_flag(src.sbt_offset_and_flags) & ANV_INSTANCE_FLAG_FORCE_OPAQUE) != 0 ?
|
||||
ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30;
|
||||
|
||||
DEREF(dst).part0.DW1 = inst_flags_and_the_rest;
|
||||
#else
|
||||
uint32_t shader_index_and_geom_mask = 0;
|
||||
shader_index_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000);
|
||||
DEREF(dst).part0.DW0 = shader_index_and_geom_mask;
|
||||
|
||||
uint32_t instance_contribution_and_geom_flags = 0;
|
||||
instance_contribution_and_geom_flags |= src.sbt_offset_and_flags & 0xffffff;
|
||||
instance_contribution_and_geom_flags |=
|
||||
((get_instance_flag(src.sbt_offset_and_flags) & ANV_INSTANCE_FLAG_FORCE_OPAQUE) != 0 ?
|
||||
ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30;
|
||||
DEREF(dst).part0.DW1 = instance_contribution_and_geom_flags;
|
||||
|
||||
DEREF(dst).part0.QW_startNodePtr =
|
||||
(start_node_ptr & ((1ul << 48) - 1)) |
|
||||
(uint64_t(get_instance_flag(src.sbt_offset_and_flags)) << 48);
|
||||
#endif
|
||||
|
||||
mat4 transform = mat4(src.otw_matrix);
|
||||
|
||||
mat4 inv_transform = transpose(inverse(transpose(transform)));
|
||||
mat3x4 wto_matrix = mat3x4(inv_transform);
|
||||
mat3x4 otw_matrix = mat3x4(transform);
|
||||
|
||||
/* Arrange WTO transformation matrix in column-major order */
|
||||
DEREF(dst).part0.world2obj_vx_x = wto_matrix[0][0];
|
||||
DEREF(dst).part0.world2obj_vx_y = wto_matrix[1][0];
|
||||
DEREF(dst).part0.world2obj_vx_z = wto_matrix[2][0];
|
||||
DEREF(dst).part0.obj2world_p_x = otw_matrix[0][3];
|
||||
|
||||
DEREF(dst).part0.world2obj_vy_x = wto_matrix[0][1];
|
||||
DEREF(dst).part0.world2obj_vy_y = wto_matrix[1][1];
|
||||
DEREF(dst).part0.world2obj_vy_z = wto_matrix[2][1];
|
||||
DEREF(dst).part0.obj2world_p_y = otw_matrix[1][3];
|
||||
|
||||
DEREF(dst).part0.world2obj_vz_x = wto_matrix[0][2];
|
||||
DEREF(dst).part0.world2obj_vz_y = wto_matrix[1][2];
|
||||
DEREF(dst).part0.world2obj_vz_z = wto_matrix[2][2];
|
||||
DEREF(dst).part0.obj2world_p_z = otw_matrix[2][3];
|
||||
|
||||
/* Arrange OTW transformation matrix in column-major order */
|
||||
DEREF(dst).part1.obj2world_vx_x = otw_matrix[0][0];
|
||||
DEREF(dst).part1.obj2world_vx_y = otw_matrix[1][0];
|
||||
DEREF(dst).part1.obj2world_vx_z = otw_matrix[2][0];
|
||||
DEREF(dst).part1.world2obj_p_x = wto_matrix[0][3];
|
||||
|
||||
DEREF(dst).part1.obj2world_vy_x = otw_matrix[0][1];
|
||||
DEREF(dst).part1.obj2world_vy_y = otw_matrix[1][1];
|
||||
DEREF(dst).part1.obj2world_vy_z = otw_matrix[2][1];
|
||||
DEREF(dst).part1.world2obj_p_y = wto_matrix[1][3];
|
||||
|
||||
DEREF(dst).part1.obj2world_vz_x = otw_matrix[0][2];
|
||||
DEREF(dst).part1.obj2world_vz_y = otw_matrix[1][2];
|
||||
DEREF(dst).part1.obj2world_vz_z = otw_matrix[2][2];
|
||||
DEREF(dst).part1.world2obj_p_z = wto_matrix[2][3];
|
||||
|
||||
DEREF(dst).part1.bvh_ptr = src.base_ptr;
|
||||
DEREF(dst).part1.instance_index = src.instance_id;
|
||||
DEREF(dst).part1.instance_id = src.custom_instance_and_mask & 0xffffff;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -34,6 +34,11 @@ main(void)
|
|||
DEREF(args.dst).instance_count = args.instance_count;
|
||||
DEREF(args.dst).instance_leaves_offset = args.instance_leaves_offset;
|
||||
|
||||
DEREF(args.dst).parent_child_map_offset = args.parent_child_map_offset,
|
||||
DEREF(args.dst).leaf_block_map_offset = args.leaf_block_map_offset,
|
||||
DEREF(args.dst).leaf_count = args.leaf_count,
|
||||
DEREF(args.dst).total_nodes = args.total_nodes,
|
||||
|
||||
/* 128 is local_size_x in copy.comp shader, 16bytes is the amount of data
|
||||
* copied by each iteration of that shader's loop
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ bvh_shaders = [
|
|||
'encode.comp',
|
||||
'header.comp',
|
||||
'copy.comp',
|
||||
'update.comp',
|
||||
]
|
||||
|
||||
# A mapping: [filename version, GFX_VERx10 define version]
|
||||
|
|
@ -23,6 +24,7 @@ anv_bvh_includes = files(
|
|||
'anv_build_helpers.h',
|
||||
'anv_build_interface.h',
|
||||
'anv_bvh.h',
|
||||
'encode.h',
|
||||
)
|
||||
|
||||
foreach shader : bvh_shaders
|
||||
|
|
|
|||
207
src/intel/vulkan/bvh/update.comp
Normal file
207
src/intel/vulkan/bvh/update.comp
Normal file
|
|
@ -0,0 +1,207 @@
|
|||
/*
|
||||
* Copyright © 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#version 460
|
||||
#include "anv_build_helpers.h"
|
||||
#include "anv_build_interface.h"
|
||||
#include "update.h"
|
||||
|
||||
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout(push_constant) uniform CONSTS {
|
||||
update_args args;
|
||||
};
|
||||
|
||||
vk_aabb
|
||||
build_and_encode_leaf(uint32_t leaf_local_idx, BLOCK leaf_block)
|
||||
{
|
||||
VOID_REF dst = BLOCK_OFFSET(leaf_block);
|
||||
vk_aabb bounds = vk_aabb(vec3(0.0f), vec3(0.0f));
|
||||
|
||||
switch (args.geom_data.geometry_type) {
|
||||
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
|
||||
anv_build_triangle(bounds, dst, args.geom_data, leaf_local_idx);
|
||||
break;
|
||||
case VK_GEOMETRY_TYPE_AABBS_KHR: {
|
||||
VOID_REF src_aabb_ptr = OFFSET(args.geom_data.data,
|
||||
leaf_local_idx * args.geom_data.stride);
|
||||
anv_build_aabb(bounds, src_aabb_ptr, dst, args.geom_data.geometry_id,
|
||||
leaf_local_idx);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
/* instances typically not updated */
|
||||
break;
|
||||
}
|
||||
|
||||
return bounds;
|
||||
}
|
||||
|
||||
vk_aabb
|
||||
recompute_parent(BLOCK parent_block, uint32_t updated_idx,
|
||||
uint32_t child_count, vk_aabb updated_child_bounds)
|
||||
{
|
||||
REF(anv_internal_node) parent = REF(anv_internal_node)(BLOCK_OFFSET(parent_block));
|
||||
|
||||
vk_aabb box;
|
||||
box.min = vec3(INFINITY);
|
||||
box.max = vec3(-INFINITY);
|
||||
|
||||
BLOCK current_child_block = parent_block + DEREF(parent).child_block_offset;
|
||||
|
||||
vk_aabb cache_aabb[6];
|
||||
for (uint32_t i = 0; i < child_count; ++i) {
|
||||
cache_aabb[i] = (i == updated_idx) ? updated_child_bounds :
|
||||
DEREF(INDEX(vk_aabb, args.aabb_scratch, current_child_block + i));
|
||||
aabb_extend(box, cache_aabb[i]);
|
||||
}
|
||||
|
||||
vk_aabb conservative_child_aabb = conservative_aabb(box);
|
||||
|
||||
float up = 1.0 + ULP;
|
||||
ivec3 exp;
|
||||
|
||||
vec3 len = aabb_size(conservative_child_aabb) * up;
|
||||
vec3 mant = frexp(len, exp);
|
||||
|
||||
exp.x += int((mant.x > (255.0f / 256.0f)));
|
||||
exp.y += int((mant.y > (255.0f / 256.0f)));
|
||||
exp.z += int((mant.z > (255.0f / 256.0f)));
|
||||
|
||||
i8vec3 exponent_i8 = i8vec3(exp);
|
||||
i8vec3 exp_i8 = {max(int8_t(-128), exponent_i8.x),
|
||||
max(int8_t(-128), exponent_i8.y),
|
||||
max(int8_t(-128), exponent_i8.z)};
|
||||
|
||||
DEREF(parent).lower[0] = conservative_child_aabb.min.x;
|
||||
DEREF(parent).lower[1] = conservative_child_aabb.min.y;
|
||||
DEREF(parent).lower[2] = conservative_child_aabb.min.z;
|
||||
DEREF(parent).exp_x = exp_i8[0];
|
||||
DEREF(parent).exp_y = exp_i8[1];
|
||||
DEREF(parent).exp_z = exp_i8[2];
|
||||
|
||||
vec3 base = conservative_child_aabb.min;
|
||||
vec3 scale = ldexp(vec3(1.0), exp_i8 - 8);
|
||||
|
||||
for (uint32_t i = 0; i < child_count; ++i) {
|
||||
vk_aabb child_bounds = cache_aabb[i];
|
||||
|
||||
vec3 lower = (child_bounds.min - base) / scale;
|
||||
vec3 upper = (child_bounds.max - base) / scale;
|
||||
lower = clamp(floor(lower), vec3(0.0), vec3(255.0));
|
||||
upper = clamp(ceil(upper), vec3(0.0), vec3(255.0));
|
||||
|
||||
DEREF(parent).lower_x[i] = uint8_t(lower.x);
|
||||
DEREF(parent).lower_y[i] = uint8_t(lower.y);
|
||||
DEREF(parent).lower_z[i] = uint8_t(lower.z);
|
||||
DEREF(parent).upper_x[i] = uint8_t(upper.x);
|
||||
DEREF(parent).upper_y[i] = uint8_t(upper.y);
|
||||
DEREF(parent).upper_z[i] = uint8_t(upper.z);
|
||||
}
|
||||
|
||||
return box;
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
uint32_t leaf_local = gl_GlobalInvocationID.x;
|
||||
if (leaf_local >= args.leaf_node_count || leaf_local >= args.primitive_count)
|
||||
return;
|
||||
|
||||
uint32_t leaf_id = args.geom_data.first_id + leaf_local;
|
||||
BLOCK leaf_block = DEREF(INDEX(uint32_t, args.leaf_block_offset_map, leaf_id));
|
||||
|
||||
if (leaf_block == VK_NULL_BVH_OFFSET)
|
||||
return;
|
||||
|
||||
vk_aabb leaf_bounds = build_and_encode_leaf(leaf_local, leaf_block);
|
||||
DEREF(INDEX(vk_aabb, args.aabb_scratch, leaf_block)) = leaf_bounds;
|
||||
|
||||
/* Ensure scratch update and leaf encoding is visible before atomic in
|
||||
* following loop.
|
||||
*/
|
||||
memoryBarrierBuffer();
|
||||
|
||||
BLOCK current_block = leaf_block;
|
||||
vk_aabb current_bounds = leaf_bounds;
|
||||
|
||||
while (true) {
|
||||
BLOCK parent_raw = DEREF(INDEX(uint32_t, args.parent_child_map, current_block));
|
||||
|
||||
/* No parent at all */
|
||||
if (parent_raw == VK_NULL_BVH_OFFSET && current_block == leaf_block) {
|
||||
REF(anv_accel_struct_header) hdr =
|
||||
REF(anv_accel_struct_header)(args.output_bvh - args.output_bvh_offset);
|
||||
DEREF(hdr).aabb = current_bounds;
|
||||
break;
|
||||
}
|
||||
|
||||
BLOCK parent = parent_raw & 0x03FFFFFF;
|
||||
REF(anv_internal_node) internal_node = REF(anv_internal_node)(BLOCK_OFFSET(parent));
|
||||
/* Internal node's reserved field is tracking number of children count.*/
|
||||
uint32_t valid_child_count = uint32_t(DEREF(internal_node).reserved);
|
||||
if (valid_child_count == 0)
|
||||
break;
|
||||
|
||||
/* Last-child-wins atomic */
|
||||
uint32_t ready = atomicAdd(
|
||||
DEREF(INDEX(uint32_t, args.internal_ready_count, parent)), 1,
|
||||
gl_ScopeDevice, gl_StorageSemanticsBuffer,
|
||||
gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
|
||||
|
||||
/* Not the last child */
|
||||
if (ready != valid_child_count - 1)
|
||||
break;
|
||||
|
||||
uint32_t child_idx = parent_raw >> 26;
|
||||
/* Encode and quantize parent bounds */
|
||||
vk_aabb parent_bounds = recompute_parent(parent, child_idx, valid_child_count, current_bounds);
|
||||
|
||||
/* Store parent bounds for next level */
|
||||
DEREF(INDEX(vk_aabb, args.aabb_scratch, parent)) = parent_bounds;
|
||||
|
||||
/* Ensure scratch write and parent encoding is visible in the next
|
||||
* iteration of this loop.
|
||||
*/
|
||||
memoryBarrierBuffer();
|
||||
|
||||
/* Check whether this parent is the root internal node */
|
||||
uint32_t grandparent_raw = DEREF(INDEX(uint32_t, args.parent_child_map, parent));
|
||||
|
||||
if (grandparent_raw == VK_NULL_BVH_OFFSET) {
|
||||
REF(anv_internal_node) root = REF(anv_internal_node)(BLOCK_OFFSET(parent));
|
||||
|
||||
vec3 base = vec3(DEREF(root).lower[0], DEREF(root).lower[1], DEREF(root).lower[2]);
|
||||
ivec3 exp = ivec3( int(DEREF(root).exp_x), int(DEREF(root).exp_y), int(DEREF(root).exp_z));
|
||||
vec3 scale = ldexp(vec3(1.0), exp - 8);
|
||||
|
||||
vk_aabb root_bounds = vk_aabb(vec3(INFINITY), vec3(-INFINITY));
|
||||
|
||||
for (uint32_t i = 0; i < valid_child_count; ++i) {
|
||||
vec3 lower = vec3(DEREF(root).lower_x[i],
|
||||
DEREF(root).lower_y[i],
|
||||
DEREF(root).lower_z[i]);
|
||||
|
||||
vec3 upper = vec3(DEREF(root).upper_x[i],
|
||||
DEREF(root).upper_y[i],
|
||||
DEREF(root).upper_z[i]);
|
||||
|
||||
vk_aabb child;
|
||||
child.min = base + lower * scale;
|
||||
child.max = base + upper * scale;
|
||||
|
||||
aabb_extend(root_bounds, child);
|
||||
}
|
||||
|
||||
REF(anv_accel_struct_header) hdr =
|
||||
REF(anv_accel_struct_header)(args.output_bvh - args.output_bvh_offset);
|
||||
DEREF(hdr).aabb = root_bounds;
|
||||
break;
|
||||
}
|
||||
|
||||
current_block = parent;
|
||||
current_bounds = parent_bounds;
|
||||
}
|
||||
}
|
||||
73
src/intel/vulkan/bvh/update.h
Normal file
73
src/intel/vulkan/bvh/update.h
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
* Copyright © 2026 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef ANV_BVH_UPDATE_H
|
||||
#define ANV_BVH_UPDATE_H
|
||||
|
||||
#include "encode.h"
|
||||
|
||||
void
|
||||
anv_build_triangle(inout vk_aabb bounds, VOID_REF dst_ptr, vk_bvh_geometry_data geom_data, uint32_t global_id)
|
||||
{
|
||||
triangle_indices indices = load_indices(geom_data.indices, geom_data.index_format, global_id);
|
||||
|
||||
triangle_vertices vertices = load_vertices(geom_data.data, indices, geom_data.vertex_format, geom_data.stride);
|
||||
|
||||
if (geom_data.transform != NULL) {
|
||||
mat4 transform = mat4(1.0);
|
||||
|
||||
for (uint32_t col = 0; col < 4; col++) {
|
||||
for (uint32_t row = 0; row < 3; row++) {
|
||||
transform[col][row] = DEREF(INDEX(float, geom_data.transform, col + row * 4));
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < 3; i++) {
|
||||
vertices.vertex[i] = transform * vertices.vertex[i];
|
||||
}
|
||||
}
|
||||
|
||||
vk_ir_triangle_node node;
|
||||
|
||||
bounds.min = vec3(INFINITY);
|
||||
bounds.max = vec3(-INFINITY);
|
||||
|
||||
for (uint32_t coord = 0; coord < 3; coord++) {
|
||||
for (uint32_t comp = 0; comp < 3; comp++) {
|
||||
node.coords[coord][comp] = vertices.vertex[coord][comp];
|
||||
bounds.min[comp] = min(bounds.min[comp], vertices.vertex[coord][comp]);
|
||||
bounds.max[comp] = max(bounds.max[comp], vertices.vertex[coord][comp]);
|
||||
}
|
||||
}
|
||||
|
||||
node.triangle_id = global_id;
|
||||
node.geometry_id_and_flags = geom_data.geometry_id;
|
||||
|
||||
anv_encode_triangle(dst_ptr, node);
|
||||
}
|
||||
|
||||
void
|
||||
anv_build_aabb(inout vk_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t geometry_id, uint32_t global_id)
|
||||
{
|
||||
for (uint32_t vec = 0; vec < 2; vec++) {
|
||||
for (uint32_t comp = 0; comp < 3; comp++) {
|
||||
float coord = DEREF(INDEX(float, src_ptr, comp + vec * 3));
|
||||
|
||||
if (vec == 0)
|
||||
bounds.min[comp] = coord;
|
||||
else
|
||||
bounds.max[comp] = coord;
|
||||
}
|
||||
}
|
||||
|
||||
vk_ir_aabb_node node;
|
||||
node.base.aabb = bounds;
|
||||
node.primitive_id = global_id;
|
||||
node.geometry_id_and_flags = geometry_id;
|
||||
|
||||
anv_encode_aabb(dst_ptr, node);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -30,6 +30,16 @@
|
|||
static uint32_t blas_id = 0;
|
||||
static uint32_t tlas_id = 0;
|
||||
|
||||
struct update_scratch_layout {
|
||||
uint32_t internal_ready_count_offset;
|
||||
uint32_t aabb_offset;
|
||||
uint32_t size;
|
||||
};
|
||||
|
||||
enum anv_encode_key {
|
||||
ANV_ENCODE_KEY_ALLOW_UPDATE_BVH = (1 << 0),
|
||||
};
|
||||
|
||||
static void
|
||||
begin_debug_marker(VkCommandBuffer commandBuffer,
|
||||
struct vk_acceleration_structure_build_marker *marker)
|
||||
|
|
@ -58,6 +68,9 @@ begin_debug_marker(VkCommandBuffer commandBuffer,
|
|||
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE:
|
||||
trace_intel_begin_as_encode(&cmd_buffer->trace);
|
||||
break;
|
||||
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_UPDATE:
|
||||
trace_intel_begin_as_update(&cmd_buffer->trace);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("Invalid build step");
|
||||
}
|
||||
|
|
@ -91,6 +104,7 @@ end_debug_marker(VkCommandBuffer commandBuffer,
|
|||
trace_intel_end_as_ploc_build_internal(&cmd_buffer->trace);
|
||||
break;
|
||||
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE:
|
||||
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_UPDATE:
|
||||
trace_intel_end_as_encode(&cmd_buffer->trace,
|
||||
marker->encode.pass,
|
||||
marker->encode.key,
|
||||
|
|
@ -150,21 +164,36 @@ add_bvh_dump(struct anv_cmd_buffer *cmd_buffer,
|
|||
static void
|
||||
debug_record_as_to_bvh_dump(struct anv_cmd_buffer *cmd_buffer,
|
||||
VkDeviceAddress header_addr,
|
||||
uint64_t bvh_anv_size,
|
||||
struct bvh_layout bvh_layout,
|
||||
VkDeviceAddress intermediate_header_addr,
|
||||
VkDeviceAddress intermediate_as_addr,
|
||||
uint32_t leaf_count,
|
||||
VkGeometryTypeKHR geometry_type)
|
||||
VkGeometryTypeKHR geometry_type,
|
||||
bool after_update)
|
||||
{
|
||||
if (INTEL_DEBUG(DEBUG_BVH_UPDATE_AS) && after_update &&
|
||||
geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
|
||||
add_bvh_dump(cmd_buffer, header_addr, bvh_layout.size, geometry_type,
|
||||
BVH_ANV_UPDATE);
|
||||
}
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_BVH_PCREL_MAP) &&
|
||||
geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
|
||||
add_bvh_dump(cmd_buffer, header_addr + bvh_layout.parent_child_map_offset,
|
||||
bvh_layout.leaf_block_map_offset - bvh_layout.parent_child_map_offset,
|
||||
geometry_type,
|
||||
BVH_ANV_PCREL);
|
||||
}
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_BVH_BLAS) &&
|
||||
geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
|
||||
add_bvh_dump(cmd_buffer, header_addr, bvh_anv_size, geometry_type,
|
||||
add_bvh_dump(cmd_buffer, header_addr, bvh_layout.size, geometry_type,
|
||||
BVH_ANV);
|
||||
}
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_BVH_TLAS) &&
|
||||
geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
|
||||
add_bvh_dump(cmd_buffer, header_addr, bvh_anv_size, geometry_type,
|
||||
add_bvh_dump(cmd_buffer, header_addr, bvh_layout.size, geometry_type,
|
||||
BVH_ANV);
|
||||
}
|
||||
|
||||
|
|
@ -225,6 +254,7 @@ debug_record_as_to_bvh_dump(struct anv_cmd_buffer *cmd_buffer,
|
|||
#define ENCODE_SPV_PATH STRINGIFY(bvh/genX(encode).spv.h)
|
||||
#define HEADER_SPV_PATH STRINGIFY(bvh/genX(header).spv.h)
|
||||
#define COPY_SPV_PATH STRINGIFY(bvh/genX(copy).spv.h)
|
||||
#define UPDATE_SPV_PATH STRINGIFY(bvh/genX(update).spv.h)
|
||||
|
||||
static const uint32_t encode_spv[] = {
|
||||
#include ENCODE_SPV_PATH
|
||||
|
|
@ -238,10 +268,17 @@ static const uint32_t copy_spv[] = {
|
|||
#include COPY_SPV_PATH
|
||||
};
|
||||
|
||||
static const uint32_t update_spv[] = {
|
||||
#include UPDATE_SPV_PATH
|
||||
};
|
||||
|
||||
static void
|
||||
get_bvh_layout(VkGeometryTypeKHR geometry_type, uint32_t leaf_count,
|
||||
get_bvh_layout(const struct vk_acceleration_structure_build_state *state,
|
||||
struct bvh_layout *layout)
|
||||
{
|
||||
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info);
|
||||
uint32_t leaf_count = state->leaf_node_count;
|
||||
|
||||
uint32_t internal_count = MAX2(leaf_count, 2) - 1;
|
||||
|
||||
uint64_t offset = ANV_RT_BVH_HEADER_SIZE;
|
||||
|
|
@ -270,7 +307,6 @@ get_bvh_layout(VkGeometryTypeKHR geometry_type, uint32_t leaf_count,
|
|||
UNREACHABLE("Unknown VkGeometryTypeKHR");
|
||||
}
|
||||
|
||||
offset = align64(offset, 64);
|
||||
layout->instance_leaves_offset = offset;
|
||||
|
||||
/* For a TLAS, we store the address of anv_instance_leaf after header
|
||||
|
|
@ -280,6 +316,16 @@ get_bvh_layout(VkGeometryTypeKHR geometry_type, uint32_t leaf_count,
|
|||
offset += leaf_count * sizeof(uint64_t);
|
||||
}
|
||||
|
||||
if (state->config.encode_key[1] & ANV_ENCODE_KEY_ALLOW_UPDATE_BVH) {
|
||||
uint64_t parent_child_map_size = (internal_count + leaf_count) * sizeof(uint32_t);
|
||||
layout->parent_child_map_offset = offset;
|
||||
offset += parent_child_map_size;
|
||||
|
||||
uint64_t leaf_block_offset_size = leaf_count * sizeof(uint32_t);
|
||||
layout->leaf_block_map_offset = offset;
|
||||
offset += leaf_block_offset_size;
|
||||
}
|
||||
|
||||
layout->size = align64(offset, 64);
|
||||
}
|
||||
|
||||
|
|
@ -287,7 +333,7 @@ static VkDeviceSize
|
|||
anv_get_as_size(VkDevice device, const struct vk_acceleration_structure_build_state *state)
|
||||
{
|
||||
struct bvh_layout layout;
|
||||
get_bvh_layout(vk_get_as_geometry_type(state->build_info), state->leaf_node_count, &layout);
|
||||
get_bvh_layout(state, &layout);
|
||||
return layout.size;
|
||||
}
|
||||
|
||||
|
|
@ -305,9 +351,13 @@ anv_get_build_config(VkDevice device, struct vk_acceleration_structure_build_sta
|
|||
* the compacted size of an updatable AS as the maximum possible size for
|
||||
* any AS that could also be built from the same number of leaf nodes.
|
||||
*/
|
||||
state->config.encode_key[1] =
|
||||
state->config.encode_key[0] =
|
||||
((flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR) &&
|
||||
!(flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR)) ? 1 : 0;
|
||||
|
||||
if ((state->build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR) &&
|
||||
state->build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR)
|
||||
state->config.encode_key[1] = ANV_ENCODE_KEY_ALLOW_UPDATE_BVH;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -357,13 +407,32 @@ anv_bvh_build_set_args(VkCommandBuffer commandBuffer, const void *args,
|
|||
anv_CmdPushConstants2(commandBuffer, &push_info);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
anv_build_flags(VkCommandBuffer commandBuffer, uint32_t key)
|
||||
{
|
||||
uint32_t flags = 0;
|
||||
|
||||
/* This will write following required maps for update BVH pass:
|
||||
* 1) Parent-Child offset map
|
||||
* 2) Leaf block offset map
|
||||
* 3) Parent slot offset map
|
||||
* 4) Parent child count map
|
||||
*/
|
||||
if (key & ANV_ENCODE_KEY_ALLOW_UPDATE_BVH) {
|
||||
flags |= ANV_BUILD_FLAG_WRITE_LOOKUP_MAPS_FOR_UPDATE;
|
||||
}
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_encode_prepare(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
|
||||
{
|
||||
anv_bvh_build_bind_pipeline(commandBuffer,
|
||||
ANV_OBJECT_KEY_BVH_ENCODE,
|
||||
encode_spv, sizeof(encode_spv),
|
||||
sizeof(struct encode_args), 0);
|
||||
sizeof(struct encode_args),
|
||||
anv_build_flags(commandBuffer, state->config.encode_key[1]));
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
@ -390,7 +459,7 @@ anv_encode_as(VkCommandBuffer commandBuffer, const struct vk_acceleration_struct
|
|||
|
||||
struct bvh_layout bvh_layout;
|
||||
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info);
|
||||
get_bvh_layout(geometry_type, state->leaf_node_count, &bvh_layout);
|
||||
get_bvh_layout(state, &bvh_layout);
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_BVH_NO_BUILD)) {
|
||||
/* Zero out the whole BVH when we run with BVH_NO_BUILD debug option. */
|
||||
|
|
@ -419,6 +488,10 @@ anv_encode_as(VkCommandBuffer commandBuffer, const struct vk_acceleration_struct
|
|||
.geometry_type = geometry_type,
|
||||
.instance_leaves_addr = vk_acceleration_structure_get_va(dst) +
|
||||
bvh_layout.instance_leaves_offset,
|
||||
.parent_child_map = vk_acceleration_structure_get_va(dst) +
|
||||
bvh_layout.parent_child_map_offset,
|
||||
.leaf_block_offset_map = vk_acceleration_structure_get_va(dst) +
|
||||
bvh_layout.leaf_block_map_offset,
|
||||
};
|
||||
anv_bvh_build_set_args(commandBuffer, &args, sizeof(args));
|
||||
|
||||
|
|
@ -454,7 +527,7 @@ anv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_stru
|
|||
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info);
|
||||
|
||||
struct bvh_layout bvh_layout;
|
||||
get_bvh_layout(geometry_type, state->leaf_node_count, &bvh_layout);
|
||||
get_bvh_layout(state, &bvh_layout);
|
||||
|
||||
VkDeviceAddress header_addr = vk_acceleration_structure_get_va(dst);
|
||||
|
||||
|
|
@ -467,17 +540,158 @@ anv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_stru
|
|||
.bvh_offset = bvh_layout.bvh_offset,
|
||||
.instance_count = instance_count,
|
||||
.instance_leaves_offset = bvh_layout.instance_leaves_offset,
|
||||
.is_compacted = (state->config.encode_key[1] == 1),
|
||||
.is_compacted = (state->config.encode_key[0] == 1),
|
||||
.bvh_size = bvh_layout.size,
|
||||
|
||||
.parent_child_map_offset = bvh_layout.parent_child_map_offset,
|
||||
.leaf_block_map_offset = bvh_layout.leaf_block_map_offset,
|
||||
.leaf_count = state->leaf_node_count,
|
||||
.total_nodes = state->leaf_node_count + (MAX2(state->leaf_node_count, 2) - 1),
|
||||
};
|
||||
|
||||
anv_bvh_build_set_args(commandBuffer, &args, sizeof(args));
|
||||
vk_common_CmdDispatch(commandBuffer, 1, 1, 1);
|
||||
|
||||
if (INTEL_DEBUG_BVH_ANY) {
|
||||
debug_record_as_to_bvh_dump(cmd_buffer, header_addr, bvh_layout.size,
|
||||
debug_record_as_to_bvh_dump(cmd_buffer, header_addr, bvh_layout,
|
||||
intermediate_header_addr, intermediate_bvh_addr,
|
||||
state->leaf_node_count, geometry_type);
|
||||
state->leaf_node_count, geometry_type,
|
||||
false /* after update */);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
anv_get_update_scratch_layout(struct anv_device *device,
|
||||
const struct vk_acceleration_structure_build_state *state,
|
||||
struct update_scratch_layout *scratch)
|
||||
{
|
||||
uint32_t internal_count = MAX2(state->leaf_node_count, 2) - 1;
|
||||
uint32_t offset = 0;
|
||||
|
||||
scratch->internal_ready_count_offset = offset;
|
||||
offset += sizeof(uint32_t) * (internal_count + state->leaf_node_count);
|
||||
|
||||
scratch->aabb_offset = offset;
|
||||
offset += sizeof(vk_aabb) * (internal_count + state->leaf_node_count);
|
||||
|
||||
scratch->size = offset;
|
||||
}
|
||||
|
||||
static VkDeviceSize
|
||||
anv_get_update_scratch_size(VkDevice _device,
|
||||
const struct vk_acceleration_structure_build_state *state)
|
||||
{
|
||||
VK_FROM_HANDLE(anv_device, device, _device);
|
||||
|
||||
struct update_scratch_layout scratch;
|
||||
anv_get_update_scratch_layout(device, state, &scratch);
|
||||
|
||||
return scratch.size;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_init_update_scratch(VkCommandBuffer commandBuffer,
|
||||
const struct vk_acceleration_structure_build_state *states,
|
||||
uint32_t build_count)
|
||||
{
|
||||
VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
|
||||
for (uint32_t i = 0; i < build_count; i++) {
|
||||
const struct vk_acceleration_structure_build_state *state = &states[i];
|
||||
if (state->config.internal_type != VK_INTERNAL_BUILD_TYPE_UPDATE)
|
||||
continue;
|
||||
|
||||
uint64_t scratch = state->build_info->scratchData.deviceAddress;
|
||||
|
||||
struct update_scratch_layout layout;
|
||||
anv_get_update_scratch_layout(device, state, &layout);
|
||||
|
||||
anv_cmd_fill_buffer_addr(commandBuffer, scratch, layout.size, 0x0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
anv_update_prepare(VkCommandBuffer commandBuffer,
|
||||
const struct vk_acceleration_structure_build_state *state,
|
||||
bool flushed_cp_after_init_update_scratch,
|
||||
bool flushed_compute_after_init_update_scratch)
|
||||
{
|
||||
if (!flushed_compute_after_init_update_scratch ||
|
||||
!flushed_cp_after_init_update_scratch)
|
||||
vk_barrier_compute_w_to_compute_r(commandBuffer);
|
||||
|
||||
anv_bvh_build_bind_pipeline(commandBuffer, ANV_OBJECT_KEY_BVH_UPDATE,
|
||||
update_spv, sizeof(update_spv),
|
||||
sizeof(struct update_args), 0);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_update_as(VkCommandBuffer commandBuffer,
|
||||
const struct vk_acceleration_structure_build_state *state)
|
||||
{
|
||||
VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
VK_FROM_HANDLE(vk_acceleration_structure, src, state->build_info->srcAccelerationStructure);
|
||||
VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure);
|
||||
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
|
||||
struct bvh_layout bvh_layout;
|
||||
get_bvh_layout(state, &bvh_layout);
|
||||
|
||||
/* Just copy over data from src to dst if mismatch. */
|
||||
if (src != dst) {
|
||||
assert(src->offset == 0 && dst->offset == 0);
|
||||
struct anv_address src_addr =
|
||||
anv_address_from_u64(vk_acceleration_structure_get_va(src));
|
||||
struct anv_address dst_addr =
|
||||
anv_address_from_u64(vk_acceleration_structure_get_va(dst));
|
||||
|
||||
assert(src->size == dst->size);
|
||||
anv_cmd_copy_addr(cmd_buffer, src_addr, dst_addr, src->size);
|
||||
vk_barrier_compute_w_to_compute_r(commandBuffer);
|
||||
}
|
||||
|
||||
struct update_scratch_layout update_layout;
|
||||
anv_get_update_scratch_layout(device, state, &update_layout);
|
||||
|
||||
struct update_args update_consts = {
|
||||
.internal_ready_count = state->build_info->scratchData.deviceAddress +
|
||||
update_layout.internal_ready_count_offset,
|
||||
.aabb_scratch = state->build_info->scratchData.deviceAddress +
|
||||
update_layout.aabb_offset,
|
||||
.leaf_node_count = state->leaf_node_count,
|
||||
.parent_child_map = vk_acceleration_structure_get_va(dst) +
|
||||
bvh_layout.parent_child_map_offset,
|
||||
.leaf_block_offset_map = vk_acceleration_structure_get_va(dst) +
|
||||
bvh_layout.leaf_block_map_offset,
|
||||
.output_bvh = vk_acceleration_structure_get_va(dst) + bvh_layout.bvh_offset,
|
||||
.output_bvh_offset = bvh_layout.bvh_offset,
|
||||
};
|
||||
|
||||
uint32_t first_id = 0;
|
||||
for (uint32_t i = 0; i < state->build_info->geometryCount; i++) {
|
||||
const VkAccelerationStructureGeometryKHR *geom =
|
||||
state->build_info->pGeometries ? &state->build_info->pGeometries[i] :state->build_info->ppGeometries[i];
|
||||
const VkAccelerationStructureBuildRangeInfoKHR *build_range_info =
|
||||
&state->build_range_infos[i];
|
||||
|
||||
update_consts.geom_data = vk_fill_geometry_data(state->build_info->type, first_id, i, geom, build_range_info);
|
||||
update_consts.primitive_count = build_range_info->primitiveCount;
|
||||
|
||||
anv_bvh_build_set_args(commandBuffer, &update_consts, sizeof(update_consts));
|
||||
anv_genX(cmd_buffer->device->info, cmd_dispatch_unaligned)
|
||||
(commandBuffer, build_range_info->primitiveCount, 1, 1);
|
||||
|
||||
first_id += build_range_info->primitiveCount;
|
||||
}
|
||||
|
||||
if (INTEL_DEBUG_BVH_ANY) {
|
||||
debug_record_as_to_bvh_dump(cmd_buffer, vk_acceleration_structure_get_va(dst),
|
||||
bvh_layout, 0, 0,
|
||||
state->leaf_node_count,
|
||||
vk_get_as_geometry_type(state->build_info),
|
||||
true /* after update */);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -488,6 +702,10 @@ static const struct vk_acceleration_structure_build_ops anv_build_ops = {
|
|||
.get_build_config = anv_get_build_config,
|
||||
.encode_prepare = { anv_encode_prepare, anv_init_header_bind_pipeline },
|
||||
.encode_as = { anv_encode_as, anv_init_header },
|
||||
.get_update_scratch_size = anv_get_update_scratch_size,
|
||||
.init_update_scratch = anv_init_update_scratch,
|
||||
.update_prepare[0] = anv_update_prepare,
|
||||
.update_as[0] = anv_update_as,
|
||||
};
|
||||
|
||||
static VkResult
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue