vulkan/runtime/bvh: Propagate opaqueness information through the BVH

AMD hardware can early-cull box nodes if all leaves are either opaque or
not and the ray flags are set to discard (non-)opaque geometries. This
works even across TLAS/BLAS boundaries.

Propagate info on whether all child nodes are opaque or not through the
BVH to allow RADV to set these flags per box node.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32417>
This commit is contained in:
Natalie Vock 2024-12-01 10:31:34 +01:00 committed by Marge Bot
parent 6afa638b18
commit 7301e92d49
8 changed files with 99 additions and 10 deletions

View file

@ -57,6 +57,7 @@ main(void)
vk_aabb previous_bounds;
previous_bounds.min = vec3(INFINITY);
previous_bounds.max = vec3(-INFINITY);
uint32_t previous_flags;
for (;;) {
uint32_t count = 0;
@ -82,6 +83,7 @@ main(void)
REF(vk_ir_box_node) node = REF(vk_ir_box_node)(OFFSET(args.bvh, current_offset));
vk_aabb bounds = previous_bounds;
uint32_t flags = VK_BVH_BOX_FLAG_ONLY_OPAQUE | VK_BVH_BOX_FLAG_NO_OPAQUE;
lbvh_node_info info = DEREF(INDEX(lbvh_node_info, args.node_info, idx));
@ -101,8 +103,14 @@ main(void)
vk_aabb child_bounds = DEREF(child).aabb;
bounds.min = min(bounds.min, child_bounds.min);
bounds.max = max(bounds.max, child_bounds.max);
if (VK_BUILD_FLAG(VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS))
flags &= fetch_child_flags(args.bvh, children[0]);
}
previous_child_index = 0;
} else {
if (VK_BUILD_FLAG(VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS))
flags &= previous_flags;
}
/* Fetch the non-cached child */
@ -112,14 +120,21 @@ main(void)
vk_aabb child_bounds = DEREF(child).aabb;
bounds.min = min(bounds.min, child_bounds.min);
bounds.max = max(bounds.max, child_bounds.max);
if (VK_BUILD_FLAG(VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS))
flags &= fetch_child_flags(args.bvh, children[1 - previous_child_index]);
}
vk_ir_box_node node_value;
node_value.base.aabb = bounds;
node_value.bvh_offset = VK_UNKNOWN_BVH_OFFSET;
node_value.children = children;
if (VK_BUILD_FLAG(VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS))
node_value.flags = flags;
DEREF(node) = node_value;
if (info.parent == VK_BVH_INVALID_NODE)
@ -132,6 +147,7 @@ main(void)
previous_id = current_id;
previous_bounds = bounds;
previous_flags = flags;
memoryBarrierBuffer();
}

View file

@ -184,6 +184,18 @@ build_instance(inout vk_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_
DEREF(node).sbt_offset_and_flags = instance.sbt_offset_and_flags;
DEREF(node).instance_id = global_id;
if (!VK_BUILD_FLAG(VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS))
return true;
uint32_t root_flags = 0;
if ((instance.sbt_offset_and_flags & (VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR << 24)) != 0)
root_flags = VK_BVH_BOX_FLAG_ONLY_OPAQUE;
else if ((instance.sbt_offset_and_flags & (VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR << 24)) != 0)
root_flags = VK_BVH_BOX_FLAG_NO_OPAQUE;
else
root_flags = DEREF(REF(uint32_t)(instance.accelerationStructureReference + ROOT_FLAGS_OFFSET));
DEREF(node).root_flags = root_flags;
return true;
}

View file

@ -145,18 +145,24 @@ push_node(uint32_t children[2], vk_aabb bounds[2])
total_bounds.min = vec3(INFINITY);
total_bounds.max = vec3(-INFINITY);
for (uint i = 0; i < 2; ++i) {
VOID_REF node = OFFSET(args.bvh, ir_id_to_offset(children[i]));
REF(vk_ir_node) child = REF(vk_ir_node)(node);
uint32_t dst_flags = VK_BVH_BOX_FLAG_ONLY_OPAQUE | VK_BVH_BOX_FLAG_NO_OPAQUE;
for (uint i = 0; i < 2; ++i) {
total_bounds.min = min(total_bounds.min, bounds[i].min);
total_bounds.max = max(total_bounds.max, bounds[i].max);
DEREF(dst_node).children[i] = children[i];
if (VK_BUILD_FLAG(VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS))
dst_flags &= fetch_child_flags(args.bvh, children[i]);
}
DEREF(dst_node).base.aabb = total_bounds;
DEREF(dst_node).bvh_offset = VK_UNKNOWN_BVH_OFFSET;
if (VK_BUILD_FLAG(VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS))
DEREF(dst_node).flags = dst_flags;
return dst_id;
}

View file

@ -407,6 +407,28 @@ load_vertices(VOID_REF vertices, triangle_indices indices, uint32_t vertex_forma
return result;
}
/* Fetch the flags of child nodes used to determine whether all/no children are opaque. */
uint32_t fetch_child_flags(VOID_REF bvh, uint32_t node_ptr)
{
VOID_REF node = OFFSET(bvh, ir_id_to_offset(node_ptr));
switch (ir_id_to_type(node_ptr)) {
case vk_ir_node_triangle:
return (DEREF(REF(vk_ir_triangle_node)(node)).geometry_id_and_flags & VK_GEOMETRY_OPAQUE) != 0
? VK_BVH_BOX_FLAG_ONLY_OPAQUE
: VK_BVH_BOX_FLAG_NO_OPAQUE;
case vk_ir_node_internal:
return DEREF(REF(vk_ir_box_node)(node)).flags;
case vk_ir_node_instance:
return DEREF(REF(vk_ir_instance_node)(node)).root_flags;
case vk_ir_node_aabb:
return (DEREF(REF(vk_ir_aabb_node)(node)).geometry_id_and_flags & VK_GEOMETRY_OPAQUE) != 0
? VK_BVH_BOX_FLAG_ONLY_OPAQUE
: VK_BVH_BOX_FLAG_NO_OPAQUE;
default:
return 0;
}
}
/** Compute ceiling of integer quotient of A divided by B.
From macros.h */
#define DIV_ROUND_UP(A, B) (((A) + (B)-1) / (B))

View file

@ -36,14 +36,17 @@
#define SUBGROUP_SIZE_ID 0
#define BVH_BOUNDS_OFFSET_ID 1
#define BUILD_FLAGS_ID 2
#define ROOT_FLAGS_OFFSET_ID 3
#ifdef VULKAN
layout (constant_id = SUBGROUP_SIZE_ID) const int SUBGROUP_SIZE = 64;
layout (constant_id = BVH_BOUNDS_OFFSET_ID) const int BVH_BOUNDS_OFFSET = 0;
layout (constant_id = BUILD_FLAGS_ID) const int BUILD_FLAGS = 0;
layout (constant_id = ROOT_FLAGS_OFFSET_ID) const int ROOT_FLAGS_OFFSET = -1;
#endif
#define VK_BUILD_FLAG_ALWAYS_ACTIVE (1u << 0)
#define VK_BUILD_FLAG_COUNT 1
#define VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS (1u << 1)
#define VK_BUILD_FLAG_COUNT 2
#define VK_BUILD_FLAG(flag) ((BUILD_FLAGS & flag) != 0)

View file

@ -111,10 +111,16 @@ struct vk_ir_node {
#define VK_UNKNOWN_BVH_OFFSET 0xFFFFFFFF
#define VK_NULL_BVH_OFFSET 0xFFFFFFFE
/* Box node contains only opaque leaves */
#define VK_BVH_BOX_FLAG_ONLY_OPAQUE 0x1
/* Box node contains no opaque leaves */
#define VK_BVH_BOX_FLAG_NO_OPAQUE 0x2
struct vk_ir_box_node {
vk_ir_node base;
uint32_t children[2];
uint32_t bvh_offset;
uint32_t flags;
};
struct vk_ir_aabb_node {
@ -139,6 +145,8 @@ struct vk_ir_instance_node {
uint32_t sbt_offset_and_flags;
mat3x4 otw_matrix;
uint32_t instance_id;
/* The root node's flags. */
uint32_t root_flags;
};
#define VK_BVH_INVALID_NODE 0xFFFFFFFF

View file

@ -303,7 +303,7 @@ vk_get_bvh_build_pipeline_spv(struct vk_device *device, struct vk_meta_device *m
.pCode = spv,
};
VkSpecializationMapEntry spec_map[3] = {
VkSpecializationMapEntry spec_map[4] = {
{
.constantID = SUBGROUP_SIZE_ID,
.offset = 0,
@ -319,12 +319,19 @@ vk_get_bvh_build_pipeline_spv(struct vk_device *device, struct vk_meta_device *m
.offset = sizeof(args->subgroup_size) + sizeof(args->bvh_bounds_offset),
.size = sizeof(flags),
},
{
.constantID = ROOT_FLAGS_OFFSET_ID,
.offset = sizeof(args->subgroup_size) +
sizeof(args->bvh_bounds_offset),
.size = sizeof(args->root_flags_offset),
}
};
uint32_t spec_constants[3] = {
uint32_t spec_constants[4] = {
args->subgroup_size,
args->bvh_bounds_offset,
flags,
args->root_flags_offset,
};
VkSpecializationInfo spec_info = {
@ -489,9 +496,14 @@ build_leaves(VkCommandBuffer commandBuffer,
spirv_size = device->as_build_ops->leaf_spirv_override_size;
}
uint32_t flags = 0;
if (updateable)
flags |= VK_BUILD_FLAG_ALWAYS_ACTIVE;
if (args->propagate_cull_flags)
flags |= VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS;
VkResult result = vk_get_bvh_build_pipeline_spv(device, meta, VK_META_OBJECT_KEY_LEAF,
spirv, spirv_size, sizeof(struct leaf_args),
args, updateable ? VK_BUILD_FLAG_ALWAYS_ACTIVE : 0,
args, flags,
&pipeline);
if (result != VK_SUCCESS)
return result;
@ -847,9 +859,13 @@ lbvh_build_internal(VkCommandBuffer commandBuffer,
VkPipeline pipeline;
VkPipelineLayout layout;
uint32_t flags = 0;
if (args->propagate_cull_flags)
flags |= VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS;
VkResult result = vk_get_bvh_build_pipeline_spv(device, meta, VK_META_OBJECT_KEY_LBVH_MAIN,
lbvh_main_spv, sizeof(lbvh_main_spv),
sizeof(struct lbvh_main_args), args, 0,
sizeof(struct lbvh_main_args), args, flags,
&pipeline);
if (result != VK_SUCCESS)
return result;
@ -893,7 +909,7 @@ lbvh_build_internal(VkCommandBuffer commandBuffer,
result = vk_get_bvh_build_pipeline_spv(device, meta, VK_META_OBJECT_KEY_LBVH_GENERATE_IR,
lbvh_generate_ir_spv, sizeof(lbvh_generate_ir_spv),
sizeof(struct lbvh_generate_ir_args), args, 0,
sizeof(struct lbvh_generate_ir_args), args, flags,
&pipeline);
if (result != VK_SUCCESS)
return result;
@ -937,9 +953,13 @@ ploc_build_internal(VkCommandBuffer commandBuffer,
VkPipeline pipeline;
VkPipelineLayout layout;
uint32_t flags = 0;
if (args->propagate_cull_flags)
flags |= VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS;
VkResult result = vk_get_bvh_build_pipeline_spv(device, meta, VK_META_OBJECT_KEY_PLOC, ploc_spv,
sizeof(ploc_spv), sizeof(struct ploc_args),
args, 0, &pipeline);
args, flags, &pipeline);
if (result != VK_SUCCESS)
return result;

View file

@ -133,6 +133,8 @@ struct vk_acceleration_structure_build_ops {
struct vk_acceleration_structure_build_args {
uint32_t subgroup_size;
uint32_t bvh_bounds_offset;
uint32_t root_flags_offset;
bool propagate_cull_flags;
bool emit_markers;
const radix_sort_vk_t *radix_sort;
};