anv: Add INTEL_DEBUG for bvh dump and visualization tools

This commit allows you to dump different regions of memory related to
bvh building. An additional script to decode the memory dump is also
added, and you're able to view the built bvh in 3D view in html. See the
included README.md for usage.

Rework:
- you can now view the actual child_coord in internalNode in html
- change exponent to be int8_t in the interpreter
- fix the actual coordinates using an updated formula
- now you can have 3D view of the bvh
- blockIncr could be 2 and vk_aabb should be first
- Now, if any bvh dump is enabled, we will zero out tlas, to prevent gpu
  hang caused by incorrect tlas traversal
- rootNodeOffset is back to the beginning
- Add INTEL_DEBUG=bvh_no_build.
- Fix type of dump_size
- add assertion for a 4B alignment
- when clearing out bvh, only clear out everything after
  (header+bvh_offset)
- TODO: instead of dumping on destory, track in the command buffer

Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31588>
This commit is contained in:
Kevin Chuang 2024-07-09 00:54:47 -07:00 committed by Marge Bot
parent 5561db68c3
commit 5098c0c5df
7 changed files with 1120 additions and 0 deletions

View file

@ -94,6 +94,13 @@ static const struct debug_control debug_control[] = {
DEBUG_TES | DEBUG_GS | DEBUG_CS |
DEBUG_RT | DEBUG_TASK | DEBUG_MESH },
{ "rt", DEBUG_RT },
{ "bvh_blas", DEBUG_BVH_BLAS},
{ "bvh_tlas", DEBUG_BVH_TLAS},
{ "bvh_blas_ir_hdr", DEBUG_BVH_BLAS_IR_HDR},
{ "bvh_tlas_ir_hdr", DEBUG_BVH_TLAS_IR_HDR},
{ "bvh_blas_ir_as", DEBUG_BVH_BLAS_IR_AS},
{ "bvh_tlas_ir_as", DEBUG_BVH_TLAS_IR_AS},
{ "bvh_no_build", DEBUG_BVH_NO_BUILD},
{ "task", DEBUG_TASK },
{ "mesh", DEBUG_MESH },
{ "stall", DEBUG_STALL },

View file

@ -99,6 +99,13 @@ extern uint64_t intel_debug;
#define DEBUG_REG_PRESSURE (1ull << 51)
#define DEBUG_SHADER_PRINT (1ull << 52)
#define DEBUG_CL_QUIET (1ull << 53)
#define DEBUG_BVH_BLAS (1ull << 54)
#define DEBUG_BVH_TLAS (1ull << 55)
#define DEBUG_BVH_BLAS_IR_HDR (1ull << 56)
#define DEBUG_BVH_TLAS_IR_HDR (1ull << 57)
#define DEBUG_BVH_BLAS_IR_AS (1ull << 58)
#define DEBUG_BVH_TLAS_IR_AS (1ull << 59)
#define DEBUG_BVH_NO_BUILD (1ull << 60)
#define DEBUG_ANY (~0ull)
@ -110,6 +117,12 @@ extern uint64_t intel_debug;
(DEBUG_NO_DUAL_OBJECT_GS | DEBUG_SPILL_FS | \
DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_SOFT64)
/* Flags to determine what bvh to dump out */
#define DEBUG_BVH_ANV (DEBUG_BVH_BLAS | DEBUG_BVH_TLAS)
#define DEBUG_BVH_IR_HDR (DEBUG_BVH_BLAS_IR_HDR | DEBUG_BVH_TLAS_IR_HDR)
#define DEBUG_BVH_IR_AS (DEBUG_BVH_BLAS_IR_AS | DEBUG_BVH_TLAS_IR_AS)
#define DEBUG_BVH_ANY (DEBUG_BVH_ANV | DEBUG_BVH_IR_HDR | DEBUG_BVH_IR_AS)
extern uint64_t intel_simd;
extern uint32_t intel_debug_bkp_before_draw_count;
extern uint32_t intel_debug_bkp_after_draw_count;

View file

@ -1834,6 +1834,20 @@ enum anv_rt_bvh_build_method {
*/
#define ANV_RT_UUID_MACRO "ANV_RT_BVH_0001"
enum bvh_dump_type {
BVH_ANV,
BVH_IR_HDR,
BVH_IR_AS
};
struct bvh_dump_struct {
struct anv_bo *bo;
uint32_t bvh_id;
uint64_t dump_size;
VkGeometryTypeKHR geometry_type;
enum bvh_dump_type dump_type;
};
struct anv_device_astc_emu {
struct vk_texcompress_astc_state *texcompress;

View file

@ -0,0 +1,38 @@
## BVH Debug
1. `INTEL_DEBUG=bvh_tlas,bvh_blas` will generate `tlas_{id}.txt` or `blas_{id}.txt` in `bvh_dump/BVH_ANV` directory.
2. `INTEL_DEBUG=bvh_tlas_ir_hdr,bvh_blas_ir_hdr` will generate `tlas_{id}.txt` or `blas_{id}.txt` in `bvh_dump/BVH_IR_HDR` directory.
3. `INTEL_DEBUG=bvh_tlas_ir_as,bvh_blas_ir_as` will generate `tlas_{id}.txt` or `blas_{id}.txt` in `bvh_dump/BVH_IR_AS` directory.
4. `INTEL_DEBUG=bvh_no_build` will skip the intel-specific-encoding part. If gpu hang is seen, this is the first step to isolate the problem. If toggled on and gpu doesn't hang anymore, that means it was either encode.comp was spinning, or the built bvh has issues so gpu hung during bvh traversal.
The dumped text file contains memory dump, byte-by-byte in hex. The contents are contiguous memory of a certain region.
1. The dump in `BVH_ANV` starts from the beginning of `anv_accel_struct_header` to the end of the bvh. Nodes/leaves are packed tightly after the header, encoded in a way that our HW expects.
2. The dump in `BVH_IR_HDR` records the contents of `vk_ir_header` sitting at the beginning of ir bvh.
3. The dump in `BVH_IR_AS` records all `vk_ir_{leaf_type}_node` and `vk_ir_box_node` in ir bvh. The region starts from where leaves are encoded to the end of ir bvh.
### Decode the dump
We have a way to decode the dump in `BVH_ANV`.
- To decode this memory dump, use a python script to parse the file and generate a human-readable json.
- To further visualize the tree, there is a html that parses the json and draws the tree topology and 3D views of bounding boxes.
```
# Using blas_0 as an example
xxd -r -p bvh_dump/BVH_ANV/blas_0.txt > input.bin
# Use a python script to generate a human-readable json file
cd mesa/src/intel/vulkan/bvh/
python3 interpret.py <path/to/input.bin>
# To further visualize the tree, the html parses the json and draws it in 3D.
cd mesa/src/intel/vulkan/bvh/
python3 -m http.server 8000
# go to localhost:8000/visualize_json.html
```
### Note and Limitations:
1. The python script use `ctypes` to interpret the memory dump, so the structure defined in the script should match the structure defined in the driver.
2. The memory dump is a snapshot of a VkBuffer captured at the end of `CmdBuildAccelerationStructure` call. It won't capture any bvh obtained from `CmdCopy`.
3. The memory dump of captured bvhs so far are saved to files at the moment when `DestroyAccelerationStructure` is called every time.
4. If ANY dump is enabled, we will nullify anv tlas bvh and send all 0s to the gpu. Doing this can prevent gpu hang caused by incorrect bvh traversal. However, the actual contents are still saved to files for debugging.

View file

@ -0,0 +1,379 @@
#!/usr/bin/env python3
import ctypes
import sys
import json
def get_header_properties(header):
return {
'rootNodeOffset': header.rootNodeOffset,
'aabb': {
'min_x': header.aabb.min_x,
'min_y': header.aabb.min_y,
'min_z': header.aabb.min_z,
'max_x': header.aabb.max_x,
'max_y': header.aabb.max_y,
'max_z': header.aabb.max_z,
},
'instance_flags': header.instance_flags,
'copy_dispatch_size': list(header.copy_dispatch_size),
'compacted_size': header.compacted_size,
'serialization_size': header.serialization_size,
'size': header.size,
'instance_count': header.instance_count,
'self_ptr': header.self_ptr,
'padding': f"{len(header.padding)} uint32_t paddings",
}
def get_aabb_leaf_properties(node):
return {
'leaf_desc': {
'shaderIndex': node.leaf_desc.shader_index_and_geom_mask & 0xFFFFFF,
'geomMask': (node.leaf_desc.shader_index_and_geom_mask >> 24) & 0xFF,
'geomIndex': node.leaf_desc.geometry_id_and_flags & 0xFFFFFF,
'subType': (node.leaf_desc.geometry_id_and_flags >> 24) & 0xF,
'reserved0': (node.leaf_desc.geometry_id_and_flags >> 28) & 0x1,
'DisableOpacityCull': (node.leaf_desc.geometry_id_and_flags >> 29) & 0x1,
'OpaqueGeometry': (node.leaf_desc.geometry_id_and_flags >> 30) & 0x1,
'IgnoreRayMultiplier': (node.leaf_desc.geometry_id_and_flags >> 31) & 0x1
},
'DW1': node.DW1,
'primIndex': f"{len(node.primIndex)} uint32"
}
def get_quad_leaf_properties(node):
return {
'leaf_desc': {
'shaderIndex': node.leaf_desc.shader_index_and_geom_mask & 0xFFFFFF,
'geomMask': (node.leaf_desc.shader_index_and_geom_mask >> 24) & 0xFF,
'geomIndex': node.leaf_desc.geometry_id_and_flags & 0xFFFFFF,
'subType': (node.leaf_desc.geometry_id_and_flags >> 24) & 0xF,
'reserved0': (node.leaf_desc.geometry_id_and_flags >> 28) & 0x1,
'DisableOpacityCull': (node.leaf_desc.geometry_id_and_flags >> 29) & 0x1,
'OpaqueGeometry': (node.leaf_desc.geometry_id_and_flags >> 30) & 0x1,
'IgnoreRayMultiplier': (node.leaf_desc.geometry_id_and_flags >> 31) & 0x1
},
'prim_index0': node.prim_index0,
'prim_index1_and_flags':{
'primIndex1Delta': node.prim_index1_and_flags & 0xFFFF,
'j0': (node.prim_index1_and_flags >> 16) & 0x3,
'j1': (node.prim_index1_and_flags >> 18) & 0x3,
'j2': (node.prim_index1_and_flags >> 20) & 0x3,
'last': (node.prim_index1_and_flags >> 22) & 0x1,
'pad': (node.prim_index1_and_flags >> 23) & 0x1FF
},
'v': [[node.v[i][j] for j in range(3)] for i in range(4)]
}
def get_internal_node_properties(node):
# Calculate the actual coordinates, just for visualizing and debugging
actual_coords = []
for i in range(6):
# Turns out the formula is like: x = lower.x + pow(2,exp_x) * 0.xi
xi_lower = node.lower_x[i] / 256.0 # Convert mantissa to fractional value
xi_upper = node.upper_x[i] / 256.0 # Convert mantissa to fractional value
yi_lower = node.lower_y[i] / 256.0 # Convert mantissa to fractional value
yi_upper = node.upper_y[i] / 256.0 # Convert mantissa to fractional value
zi_lower = node.lower_z[i] / 256.0 # Convert mantissa to fractional value
zi_upper = node.upper_z[i] / 256.0 # Convert mantissa to fractional value
x_lower = node.lower[0] + (2 ** node.exp_x) * xi_lower
x_upper = node.lower[0] + (2 ** node.exp_x) * xi_upper
y_lower = node.lower[1] + (2 ** node.exp_y) * yi_lower
y_upper = node.lower[1] + (2 ** node.exp_y) * yi_upper
z_lower = node.lower[2] + (2 ** node.exp_z) * zi_lower
z_upper = node.lower[2] + (2 ** node.exp_z) * zi_upper
actual_coords.append({
'x_lower': x_lower,
'x_upper': x_upper,
'y_lower': y_lower,
'y_upper': y_upper,
'z_lower': z_lower,
'z_upper': z_upper
})
return {
'lower': list(node.lower),
'child_offset': node.child_offset,
'node_type': {
'nodeType': node.node_type & 0xF,
'subType': (node.node_type >> 4) & 0xF
},
'reserved': node.reserved,
'exp_x': node.exp_x,
'exp_y': node.exp_y,
'exp_z': node.exp_z,
'node_mask': node.node_mask,
'child_data': [{
'blockIncr': node.child_data[i].blockIncr_and_startPrim & 0x3,
'startPrim': (node.child_data[i].blockIncr_and_startPrim >> 2) & 0xf
} for i in range(6)],
'lower_x': list(node.lower_x),
'upper_x': list(node.upper_x),
'lower_y': list(node.lower_y),
'upper_y': list(node.upper_y),
'lower_z': list(node.lower_z),
'upper_z': list(node.upper_z),
'actual_coords': actual_coords
}
def get_instance_leaf_properties(node):
return {
'part0': {
'shaderIndex': node.part0.shader_index_and_geom_mask & 0xFFFFFF,
'geomMask': (node.part0.shader_index_and_geom_mask >> 24) & 0xFF,
'instanceContribution': node.part0.instance_contribution_and_geom_flags & 0xFFFFFF,
'pad0': (node.part0.instance_contribution_and_geom_flags >> 24) & 0x1F,
'DisableOpacityCull': (node.part0.instance_contribution_and_geom_flags >> 29) & 0x1,
'OpaqueGeometry': (node.part0.instance_contribution_and_geom_flags >> 30) & 0x1,
'pad1': (node.part0.instance_contribution_and_geom_flags >> 31) & 0x1,
'startNodePtr': node.part0.start_node_ptr_and_inst_flags & 0xFFFFFFFFFFFF,
'instFlags': (node.part0.start_node_ptr_and_inst_flags >> 48) & 0xFF,
'ComparisonMode': (node.part0.start_node_ptr_and_inst_flags >> 56) & 0x1,
'ComparisonValue': (node.part0.start_node_ptr_and_inst_flags >> 57) & 0x7F,
'world2obj_vx': [node.part0.world2obj_vx_x, node.part0.world2obj_vx_y, node.part0.world2obj_vx_z],
'world2obj_vy': [node.part0.world2obj_vy_x, node.part0.world2obj_vy_y, node.part0.world2obj_vy_z],
'world2obj_vz': [node.part0.world2obj_vz_x, node.part0.world2obj_vz_y, node.part0.world2obj_vz_z],
'obj2world_p': [node.part0.obj2world_p_x, node.part0.obj2world_p_y, node.part0.obj2world_p_z]
},
'part1': {
'bvh_ptr': node.part1.bvh_ptr,
'instance_id': node.part1.instance_id,
'instance_index': node.part1.instance_index,
'obj2world_vx': [node.part1.obj2world_vx_x, node.part1.obj2world_vx_y, node.part1.obj2world_vx_z],
'obj2world_vy': [node.part1.obj2world_vy_x, node.part1.obj2world_vy_y, node.part1.obj2world_vy_z],
'obj2world_vz': [node.part1.obj2world_vz_x, node.part1.obj2world_vz_y, node.part1.obj2world_vz_z],
'world2obj_p': [node.part1.world2obj_p_x, node.part1.world2obj_p_y, node.part1.world2obj_p_z]
}
}
class NodeType:
NODE_TYPE_MIXED = 0x0
NODE_TYPE_INTERNAL = 0x0
NODE_TYPE_INSTANCE = 0x1
NODE_TYPE_QUAD128_STOC = 0x2
NODE_TYPE_PROCEDURAL = 0x3
NODE_TYPE_QUAD = 0x4
NODE_TYPE_QUAD128 = 0x5
NODE_TYPE_MESHLET = 0x6
NODE_TYPE_INVALID = 0x7
class VkAabb(ctypes.Structure):
_fields_ = (
('min_x', ctypes.c_float),
('min_y', ctypes.c_float),
('min_z', ctypes.c_float),
('max_x', ctypes.c_float),
('max_y', ctypes.c_float),
('max_z', ctypes.c_float),
)
class AnvAccelStructHeader(ctypes.Structure):
_fields_ = (
('rootNodeOffset', ctypes.c_uint64),
('aabb', VkAabb),
('instance_flags', ctypes.c_uint32),
('copy_dispatch_size', ctypes.c_uint32 * 3),
('compacted_size', ctypes.c_uint64),
('serialization_size', ctypes.c_uint64),
('size', ctypes.c_uint64),
('instance_count', ctypes.c_uint64),
('self_ptr', ctypes.c_uint64),
('padding', ctypes.c_uint32 * 42),
)
class ChildData(ctypes.Structure):
_fields_ = (
('blockIncr_and_startPrim', ctypes.c_uint8), # Assuming child_data has startPrim field
)
class AnvInternalNode(ctypes.Structure):
_fields_ = (
('lower', ctypes.c_float * 3),
('child_offset', ctypes.c_uint32),
('node_type', ctypes.c_uint8),
('reserved', ctypes.c_uint8),
('exp_x', ctypes.c_int8),
('exp_y', ctypes.c_int8),
('exp_z', ctypes.c_int8),
('node_mask', ctypes.c_uint8),
('child_data', ChildData * 6),
('lower_x', ctypes.c_uint8 * 6),
('upper_x', ctypes.c_uint8 * 6),
('lower_y', ctypes.c_uint8 * 6),
('upper_y', ctypes.c_uint8 * 6),
('lower_z', ctypes.c_uint8 * 6),
('upper_z', ctypes.c_uint8 * 6),
)
class AnvPrimLeafDesc(ctypes.Structure):
_fields_ = (
('shader_index_and_geom_mask', ctypes.c_uint32),
('geometry_id_and_flags', ctypes.c_uint32),
)
class AnvQuadLeafNode(ctypes.Structure):
_fields_ = (
('leaf_desc', AnvPrimLeafDesc),
('prim_index0', ctypes.c_uint32),
('prim_index1_and_flags', ctypes.c_uint32),
('v', (ctypes.c_float * 3) * 4),
)
class AnvProceduralLeafNode(ctypes.Structure):
_fields_ = (
('leaf_desc', AnvPrimLeafDesc),
('DW1', ctypes.c_uint32),
('primIndex', ctypes.c_uint32 * 13),
)
class InstanceLeafPart0(ctypes.Structure):
_fields_ = (
('shader_index_and_geom_mask', ctypes.c_uint32),
('instance_contribution_and_geom_flags', ctypes.c_uint32),
('start_node_ptr_and_inst_flags', ctypes.c_uint64),
('world2obj_vx_x', ctypes.c_float),
('world2obj_vx_y', ctypes.c_float),
('world2obj_vx_z', ctypes.c_float),
('world2obj_vy_x', ctypes.c_float),
('world2obj_vy_y', ctypes.c_float),
('world2obj_vy_z', ctypes.c_float),
('world2obj_vz_x', ctypes.c_float),
('world2obj_vz_y', ctypes.c_float),
('world2obj_vz_z', ctypes.c_float),
('obj2world_p_x', ctypes.c_float),
('obj2world_p_y', ctypes.c_float),
('obj2world_p_z', ctypes.c_float),
)
class InstanceLeafPart1(ctypes.Structure):
_fields_ = (
('bvh_ptr', ctypes.c_uint64),
('instance_id', ctypes.c_uint32),
('instance_index', ctypes.c_uint32),
('obj2world_vx_x', ctypes.c_float),
('obj2world_vx_y', ctypes.c_float),
('obj2world_vx_z', ctypes.c_float),
('obj2world_vy_x', ctypes.c_float),
('obj2world_vy_y', ctypes.c_float),
('obj2world_vy_z', ctypes.c_float),
('obj2world_vz_x', ctypes.c_float),
('obj2world_vz_y', ctypes.c_float),
('obj2world_vz_z', ctypes.c_float),
('world2obj_p_x', ctypes.c_float),
('world2obj_p_y', ctypes.c_float),
('world2obj_p_z', ctypes.c_float),
)
class AnvInstanceLeaf(ctypes.Structure):
_fields_ = (
('part0', InstanceLeafPart0),
('part1', InstanceLeafPart1),
)
class BVHInterpreter:
def __init__(self, data):
self.data = data
self.nodes = []
self.relationships = {}
self.node_counter = 0
def interpret_structure(self, offset, structure):
size = ctypes.sizeof(structure)
if(offset + size > len(self.data)):
raise ValueError("Not enought data to interpret this structure.")
buffer = self.data[offset:offset + size]
return structure.from_buffer_copy(buffer)
def parse_bvh(self):
offset = 0
# Interpret the header
header = self.interpret_structure(offset, AnvAccelStructHeader)
offset += header.rootNodeOffset
# Interpret the rootNode
self.dfs_interpret_node(offset, AnvInternalNode)
output = {
'header': get_header_properties(header),
'nodes': self.nodes,
'relationships': self.relationships
}
return output
def determine_child_structure(self, child_node_type):
if child_node_type == NodeType.NODE_TYPE_MIXED:
return AnvInternalNode
elif child_node_type == NodeType.NODE_TYPE_INSTANCE:
return AnvInstanceLeaf
elif child_node_type == NodeType.NODE_TYPE_QUAD:
return AnvQuadLeafNode
elif child_node_type == NodeType.NODE_TYPE_PROCEDURAL:
return AnvProceduralLeafNode
else:
raise ValueError(f"Unknown node type: {child_node_type}")
def dfs_interpret_node(self, offset, structure):
node = self.interpret_structure(offset, structure)
node_id = self.node_counter;
self.node_counter += 1
if structure == AnvInternalNode:
node_type_str = "AnvInternalNode"
node_properties = get_internal_node_properties(node)
elif structure == AnvInstanceLeaf:
node_type_str = "AnvInstanceLeaf"
node_properties = get_instance_leaf_properties(node)
elif structure == AnvQuadLeafNode:
node_type_str = "AnvQuadLeafNode"
node_properties = get_quad_leaf_properties(node)
elif structure == AnvProceduralLeafNode:
node_type_str = "AnvProceduralLeafNode"
node_properties = get_aabb_leaf_properties(node)
else:
raise ValueError(f"Unknown structure type: {structure}")
self.nodes.append({
'id': node_id,
'type': node_type_str,
'properties': node_properties
})
self.relationships[node_id] = []
if node_type_str == "AnvInternalNode":
# DFS its children
children_offset_start = offset + node.child_offset * 64 # this node's position + child_offset
isFatLeaf = True if node.node_type != NodeType.NODE_TYPE_MIXED else False
added_blocks = 0
for i in range(6):
blockIncr = node.child_data[i].blockIncr_and_startPrim & 0x3
child_is_valid = not (node.lower_x[i] & 0x80) or (node.upper_x[i] & 0x80)
if(not child_is_valid):
continue
# now determine the children's type
child_node_type = node.node_type if isFatLeaf else ((node.child_data[i].blockIncr_and_startPrim >> 2) & 0xf)
# find where my child is
child_offset = children_offset_start + 64 * added_blocks
added_blocks += blockIncr
child_node_id = self.dfs_interpret_node(child_offset, self.determine_child_structure(child_node_type))
self.relationships[node_id].append(child_node_id)
return node_id
def main():
with open(sys.argv[1], 'rb') as file1:
data = file1.read()
interpreter = BVHInterpreter(data)
json_output = interpreter.parse_bvh()
with open("bvh_dump.json", 'w') as f:
json.dump(json_output, f, indent=4)
if __name__=="__main__":
main()

View file

@ -0,0 +1,394 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>BVH Visualization</title>
</head>
<style>
body {
font-family: Arial, sans-serif;
background-color: #f0f0f0;
margin: 0;
padding: 20px;
display: flex;
flex-direction: column;
align-items: center;
height: 100vh;
}
#container {
width: 100%;
height: 50%;
border: 2px solid #ccc;
background-color: #fff;
padding: 10px;
border-radius: 8px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
}
.node circle {
fill: #999;
stroke: steelblue;
stroke-width: 1.5px;
cursor: pointer;
}
.node text {
font: 12px sans-serif;
}
.link {
fill: none;
stroke: #555;
stroke-opacity: 0.4;
stroke-width: 1.5px;
}
.tooltip {
position: absolute;
text-align: left;
width: 300px;
height: auto;
padding: 10px;
font: 12px sans-serif;
background: lightsteelblue;
border: 1px solid #333;
border-radius: 8px;
box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.5);
overflow-y: auto;
max-height: 400px;
visibility: hidden;
}
.tooltip h3 {
margin: 0;
font-size: 14px;
font-weight: bold;
}
.tooltip p {
margin: 5px 0;
font-size: 12px;
word-wrap: break-word;
}
.tooltip .indent {
margin-left: 20px;
}
#header {
margin-bottom: 20px;
padding: 10px;
background-color: #e0e0e0;
border-radius: 8px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
}
#header h2 {
margin-top: 0;
color: #333;
}
#header p {
margin: 5px 0;
font-size: 14px;
line-height: 1.5;
}
#threeContainer {
width: 75%;
height: 100%;
}
#toggleControls {
width: 25%;
height: 100%;
overflow-y: auto;
margin-left: 20px;
border: 2px solid #ccc;
border-radius: 8px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
padding: 10px;
}
.toggle-container {
display: flex;
align-items: center;
margin-bottom: 10px;
}
.toggle-container label {
font-size: 14px;
margin-left: 10px;
}
#mainContainer {
display: flex;
width: 100%;
height: 50%;
justify-content: space-between;
margin-bottom: 20px;
}
</style>
<body>
<div id="header"></div>
<div id="container">
<div class="tooltip"></div>
<svg id="canvas"></svg>
</div>
<br></br>
<br></br>
<br></br>
<div id="mainContainer">
<div id="threeContainer"></div>
<div id="toggleControls"></div>
</div>
<br></br>
<br></br>
<br></br>
<script src="https://d3js.org/d3.v6.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/three@0.130.1/build/three.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/three@0.130.1/examples/js/controls/OrbitControls.js"></script>
<script>
function formatProperties(properties, depth = 0, maxDepth = 2, index = '') {
if (depth > maxDepth) {
return '<p><strong>...</strong></p>';
}
let html = '';
for (const [key, value] of Object.entries(properties)) {
const currentIndex = index ? `${index}.${key}` : key;
if (typeof value === 'object' && value !== null) {
html += `<p><strong>${currentIndex}:</strong><div class="indent">${formatProperties(value, depth + 1, maxDepth, currentIndex)}</div></p>`;
} else {
let displayValue = value;
if (typeof value === 'string' && value.length > 100) {
displayValue = value.substring(0, 100) + '...';
}
html += `<p><strong>${currentIndex}:</strong> ${displayValue}</p>`;
}
}
return html;
}
d3.json('bvh_dump.json').then(function(treeData) {
document.getElementById('header').innerHTML = `<h2>Header Information</h2>${formatProperties(treeData.header)}`;
var width = 1400,
height = 400; // Adjusted for half the height
var svg = d3.select("#canvas")
.attr("width", width)
.attr("height", height)
.call(d3.zoom().on("zoom", function (event) {
g.attr("transform", event.transform);
}))
.append("g");
var g = svg.append("g");
var root = d3.hierarchy(treeData.nodes.find(n => n.id === 0), function(d) {
return treeData.relationships[d.id].map(id => treeData.nodes.find(n => n.id === id));
});
var treeLayout = d3.tree().size([height, width - 250]);
treeLayout(root);
var link = g.selectAll(".link")
.data(root.links())
.enter().append("path")
.attr("class", "link")
.attr("d", d3.linkHorizontal()
.x(d => d.y)
.y(d => d.x));
var node = g.selectAll(".node")
.data(root.descendants())
.enter().append("g")
.attr("class", "node")
.attr("transform", d => `translate(${d.y},${d.x})`);
node.append("circle")
.attr("r", 10)
.on("click", function(event, d) {
const propertiesHtml = formatProperties(d.data.properties);
d3.select(".tooltip")
.html(`<h3>ID: ${d.data.id}</h3>${propertiesHtml}`)
.style("visibility", "visible")
.style("left", (event.pageX + 20) + "px")
.style("top", (event.pageY - 20) + "px");
d3.selectAll("circle").attr("r", 10);
d3.select(this).attr("r", 15);
});
node.append("text")
.attr("dy", 3)
.attr("x", d => d.children ? -12 : 12)
.style("text-anchor", d => d.children ? "end" : "start")
.text(d => d.data.type);
d3.select("body").on("click", function(event) {
if (!event.target.closest(".node")) {
d3.select(".tooltip").style("visibility", "hidden");
d3.selectAll("circle").attr("r", 10);
}
});
initThreeJs(treeData);
});
function initThreeJs(treeData) {
const scene = new THREE.Scene();
const camera = new THREE.PerspectiveCamera(75, window.innerWidth * 0.7 / 800, 0.1, 1000); // Adjusted for the new layout
const renderer = new THREE.WebGLRenderer({ antialias: true });
renderer.setSize(window.innerWidth * 0.7, 800); // Adjusted for half the height
document.getElementById('threeContainer').appendChild(renderer.domElement);
// Add orbit controls for zoom and rotate
const controls = new THREE.OrbitControls(camera, renderer.domElement);
controls.enableDamping = true;
controls.dampingFactor = 0.25;
controls.screenSpacePanning = false;
controls.maxPolarAngle = Math.PI / 2;
// Add grid helper and axis helper for reference
const gridHelper = new THREE.GridHelper(10, 10);
scene.add(gridHelper);
const axesHelper = new THREE.AxesHelper(5);
scene.add(axesHelper);
const geometries = [];
function createBox(coord, color, id) {
const geometry = new THREE.BoxGeometry(
coord.x_upper - coord.x_lower,
coord.y_upper - coord.y_lower,
coord.z_upper - coord.z_lower
);
const edges = new THREE.EdgesGeometry(geometry);
const material = new THREE.LineBasicMaterial({ color: color });
const box = new THREE.LineSegments(edges, material);
box.position.set(
(coord.x_upper + coord.x_lower) / 2,
(coord.y_upper + coord.y_lower) / 2,
(coord.z_upper + coord.z_lower) / 2
);
scene.add(box);
geometries.push(box);
addToggleControl(box, id);
}
function createTriangle(vertices, leafColor, id) {
const geometry = new THREE.BufferGeometry();
const verticesArray = new Float32Array(vertices.flat());
geometry.setAttribute('position', new THREE.BufferAttribute(verticesArray, 3));
const material = new THREE.MeshBasicMaterial({ color: leafColor, side: THREE.DoubleSide, wireframe: true });
const triangle = new THREE.Mesh(geometry, material);
scene.add(triangle);
geometries.push(triangle);
addToggleControl(triangle, id);
}
function createInstance(instanceProperties, color, id) {
const geometry = new THREE.SphereGeometry(0.1, 32, 32); // Create a sphere geometry
const material = new THREE.MeshBasicMaterial({ color: color, wireframe: false }); // Create a material with wireframe
const sphere = new THREE.Mesh(geometry, material); // Create a mesh
const { obj2world_p } = instanceProperties.part0;
sphere.position.set(obj2world_p[0], obj2world_p[1], obj2world_p[2]);
scene.add(sphere);
geometries.push(sphere);
addToggleControl(sphere, id);
}
function addToggleControl(geometry, id) {
const toggleContainer = document.createElement('div');
toggleContainer.className = 'toggle-container';
const checkbox = document.createElement('input');
checkbox.type = 'checkbox';
checkbox.checked = true;
checkbox.addEventListener('change', () => {
geometry.visible = checkbox.checked;
});
const labelElement = document.createElement('label');
labelElement.textContent = `${id}`;
toggleContainer.appendChild(checkbox);
toggleContainer.appendChild(labelElement);
document.getElementById('toggleControls').appendChild(toggleContainer);
}
function handleNode(node, parentType) {
const leafColor = 0xffa500;
const internalColor = 0x00ff00;
if (node.type === 'AnvInternalNode') {
// Check if the internal node is a fatLeaf
const isFatProceduralLeaf = node.properties.node_type.nodeType === 0x3;
const isFatInstanceLeaf = node.properties.node_type.nodeType === 0x1;
node.properties.child_data.forEach((child, index) => {
if (child.blockIncr !== 1 && child.blockIncr !== 2) {
return;
}
const childIsProcedural = child.startPrim === 0x3 || isFatProceduralLeaf;
const childIsInstance = child.startPrim === 0x1 || isFatInstanceLeaf;
const color = (childIsProcedural || childIsInstance) ? leafColor : internalColor;
let label = node.id + "'s child box";
label += (childIsProcedural) ? " also a procedural leaf" : "";
label += (childIsInstance) ? " also a instance leaf" : "";
createBox(node.properties.actual_coords[index], color, label);
});
} else {
switch (node.type) {
case 'AnvQuadLeafNode':
createTriangle(node.properties.v, leafColor, `Triangle. NodeID=${node.id}`);
break;
case 'AnvInstanceLeaf':
// Skip. Already drawn by parents
break;
case 'AnvAabbLeafNode':
// Skip. Already drawn by parents
break;
}
}
}
// Draw AABB from header
const headerAABB = treeData.header.aabb;
createBox({
x_lower: headerAABB.min_x,
x_upper: headerAABB.max_x,
y_lower: headerAABB.min_y,
y_upper: headerAABB.max_y,
z_lower: headerAABB.min_z,
z_upper: headerAABB.max_z
}, 0xff00ff, 'Root AABB');
// Draw nodes
treeData.nodes.forEach(node => {
handleNode(node, node.properties.node_type);
});
camera.position.z = 5;
function animate() {
requestAnimationFrame(animate);
controls.update();
renderer.render(scene, camera);
}
animate();
}
</script>
</body>
</html>

View file

@ -4,6 +4,7 @@
#include "anv_private.h"
#include <sys/stat.h>
#include <math.h>
#include "util/u_debug.h"
@ -26,6 +27,165 @@
#if GFX_VERx10 >= 125
/* TODO: Dumping things on destory doesn't look robust. Would be nice to track
* the debug operation when the command buffer is executed and synchronously
* wait on the command buffer and write the data to disk upon completion.
*
* Each time a CmdBuildAS is completed, we append one element to bvhDumpArray.
* When DestroyAccelerationStructure is called every time, we dump the
* accumulated elements so far to files.
*/
static uint32_t blas_id = 0;
static uint32_t tlas_id = 0;
static struct bvh_dump_struct *bvhDumpArray = NULL;
static uint32_t bvh_dump_array_size = 0;
/* clear out everything from (header + bvh_offset) to the end */
static void
clear_out_anv_bvh(struct anv_cmd_buffer *cmd_buffer,
VkDeviceAddress header_addr, struct bvh_layout bvh_layout)
{
uint64_t offset = bvh_layout.bvh_offset;
uint64_t clear_size = bvh_layout.size - bvh_layout.bvh_offset;
assert(clear_size % 4 == 0);
struct anv_address anv_bvh_addr = anv_address_from_u64(header_addr + offset);
anv_cmd_buffer_fill_area(cmd_buffer, anv_bvh_addr, clear_size, 0, false);
genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_END_OF_PIPE_SYNC_BIT |
ANV_PIPE_DATA_CACHE_FLUSH_BIT |
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT);
}
static void
expand_bvh_dump_array()
{
/* Reallocate bvh dump array */
bvhDumpArray =
(struct bvh_dump_struct *)realloc(bvhDumpArray,
(bvh_dump_array_size + 1) *
sizeof(struct bvh_dump_struct));
if (bvhDumpArray == NULL) {
perror("Failed to reallocate memory for bvh dump array.");
}
bvh_dump_array_size++;
}
static void
append_bvh_dump(struct anv_cmd_buffer *cmd_buffer, VkDeviceAddress src,
uint64_t dump_size, VkGeometryTypeKHR geometry_type,
enum bvh_dump_type dump_type)
{
assert(dump_size % 4 == 0);
expand_bvh_dump_array();
struct anv_device *device = cmd_buffer->device;
struct bvh_dump_struct *latestElement =
bvhDumpArray + bvh_dump_array_size - 1;
struct anv_bo *bo = NULL;
VkResult result = anv_device_alloc_bo(device, "dump_bvh", dump_size,
ANV_BO_ALLOC_MAPPED |
ANV_BO_ALLOC_HOST_CACHED_COHERENT, 0,
&bo);
if (result != VK_SUCCESS) {
printf("Failed to allocate bvh for dump\n");
vk_command_buffer_set_error(&cmd_buffer->vk, result);
return;
}
latestElement->bo = bo;
latestElement->bvh_id = geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR ?
tlas_id : blas_id;
latestElement->dump_size = dump_size;
latestElement->geometry_type = geometry_type;
latestElement->dump_type = dump_type;
struct anv_address dst_addr = { .bo = latestElement->bo, .offset = 0 };
struct anv_address src_addr = anv_address_from_u64(src);
anv_cmd_copy_addr(cmd_buffer, src_addr, dst_addr, latestElement->dump_size);
genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_CS_STALL_BIT);
}
static void
debug_dump_bvh(struct anv_cmd_buffer *cmd_buffer, VkDeviceAddress header_addr,
uint64_t bvh_anv_size, VkDeviceAddress intermediate_header_addr,
VkDeviceAddress intermediate_as_addr, uint32_t leaf_count,
VkGeometryTypeKHR geometry_type)
{
if (INTEL_DEBUG(DEBUG_BVH_BLAS) &&
geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
append_bvh_dump(cmd_buffer, header_addr, bvh_anv_size, geometry_type,
BVH_ANV);
}
if (INTEL_DEBUG(DEBUG_BVH_TLAS) &&
geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
append_bvh_dump(cmd_buffer, header_addr, bvh_anv_size, geometry_type,
BVH_ANV);
}
if (INTEL_DEBUG(DEBUG_BVH_BLAS_IR_HDR) &&
geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
append_bvh_dump(cmd_buffer, intermediate_header_addr,
sizeof(struct vk_ir_header), geometry_type, BVH_IR_HDR);
}
if (INTEL_DEBUG(DEBUG_BVH_TLAS_IR_HDR) &&
geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
append_bvh_dump(cmd_buffer, intermediate_header_addr,
sizeof(struct vk_ir_header), geometry_type, BVH_IR_HDR);
}
uint32_t internal_node_count = MAX2(leaf_count, 2) - 1;
uint64_t internal_node_total_size = sizeof(struct vk_ir_box_node) *
internal_node_count;
if (INTEL_DEBUG(DEBUG_BVH_BLAS_IR_AS) &&
geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
uint64_t leaf_total_size;
switch (geometry_type) {
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
leaf_total_size = sizeof(struct vk_ir_triangle_node) * leaf_count;
break;
case VK_GEOMETRY_TYPE_AABBS_KHR:
leaf_total_size = sizeof(struct vk_ir_aabb_node) * leaf_count;
break;
default:
unreachable("invalid geometry type");
}
append_bvh_dump(cmd_buffer, intermediate_as_addr,
internal_node_total_size + leaf_total_size,
geometry_type, BVH_IR_AS);
}
if (INTEL_DEBUG(DEBUG_BVH_TLAS_IR_AS) &&
geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
uint64_t leaf_total_size = sizeof(struct vk_ir_instance_node) *
leaf_count;
append_bvh_dump(cmd_buffer, intermediate_as_addr,
internal_node_total_size + leaf_total_size,
geometry_type, BVH_IR_AS);
}
if (geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
tlas_id++;
} else {
blas_id++;
}
}
static const uint32_t encode_spv[] = {
#include "bvh/encode.spv.h"
};
@ -183,6 +343,9 @@ anv_encode_as(VkCommandBuffer commandBuffer,
uint32_t key,
struct vk_acceleration_structure *dst)
{
if (INTEL_DEBUG(DEBUG_BVH_NO_BUILD))
return;
VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_device *device = cmd_buffer->device;
@ -354,6 +517,19 @@ anv_init_header(VkCommandBuffer commandBuffer,
anv_cmd_buffer_update_addr(cmd_buffer, addr, 0, header_size,
header_ptr, false);
}
if (INTEL_DEBUG(DEBUG_BVH_ANY)) {
debug_dump_bvh(cmd_buffer, header_addr, bvh_layout.size,
intermediate_header_addr, intermediate_as_addr,
leaf_count, geometry_type);
/* Nullify tlas and send zeros to gpu, so that tlas traversal will return
* early. Doing this can prevent the gpu hang caused by incorrect bvh
* traversal.
*/
if (geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR)
clear_out_anv_bvh(cmd_buffer, header_addr, bvh_layout);
}
}
static const struct vk_acceleration_structure_build_ops anv_build_ops = {
@ -739,4 +915,103 @@ genX(WriteAccelerationStructuresPropertiesKHR)(
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
}
static void
create_directory(const char *dir, const char *sub_dir)
{
char full_path[PATH_MAX];
snprintf(full_path, sizeof(full_path), "%s/%s", dir, sub_dir);
if (mkdir(dir, 0777) == -1 && errno != EEXIST) {
perror("Error creating directory");
return;
}
if (mkdir(full_path, 0777) == -1 && errno != EEXIST) {
perror("Error creating sub directory");
return;
}
}
static void
create_dump_file(struct bvh_dump_struct *bvh)
{
if (bvh == NULL) {
fprintf(stderr, "Error: BVH DUMP structure is NULL\n");
return;
}
char file_name[256];
const char *dump_directory = "bvh_dump";
const char *dump_sub_directory = NULL;
switch (bvh->dump_type) {
case BVH_ANV:
dump_sub_directory = "BVH_ANV";
break;
case BVH_IR_HDR:
dump_sub_directory = "BVH_IR_HDR";
break;
case BVH_IR_AS:
dump_sub_directory = "BVH_IR_AS";
break;
default:
unreachable("invalid dump type");
}
create_directory(dump_directory, dump_sub_directory);
snprintf(file_name, sizeof(file_name),
bvh->geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR
? "%s/%s/tlas_%d.txt"
: "%s/%s/blas_%d.txt",
dump_directory, dump_sub_directory, bvh->bvh_id);
FILE *file = fopen(file_name, "w");
if (file == NULL) {
perror("Error creating file");
return;
}
fprintf(stderr, "Dump File created: %s\n", file_name);
uint8_t *addr = (uint8_t *)(bvh->bo->map);
/* Dump every bytes like this: B0 B1 B2 B3 ... B15 */
for (uint64_t i = 0; i < bvh->dump_size; i++) {
uint8_t result = *(volatile uint8_t *)((uint8_t *)addr + i);
fprintf(file, "%02" PRIx8 " ", result);
if ((i + 1) % 16 == 0) {
fprintf(file, "\n");
}
}
fclose(file);
}
void
genX(DestroyAccelerationStructureKHR)(
VkDevice _device,
VkAccelerationStructureKHR accelerationStructure,
const VkAllocationCallbacks* pAllocator)
{
if (INTEL_DEBUG(DEBUG_BVH_ANY)) {
/* create bvh dump file */
ANV_FROM_HANDLE(anv_device, device, _device);
for (uint32_t i = 0; i < bvh_dump_array_size; i++) {
struct bvh_dump_struct *bvh = bvhDumpArray + i;
create_dump_file(bvh);
if (bvh && bvh->bo) {
anv_device_release_bo(device, bvh->bo);
}
}
free(bvhDumpArray);
bvhDumpArray = NULL;
bvh_dump_array_size = 0;
}
vk_common_DestroyAccelerationStructureKHR(_device, accelerationStructure,
pAllocator);
}
#endif