mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 07:20:10 +01:00
radv/bvh: Use box16 nodes when bvh8 is not used
Using box16 nodes trades bvh quality for memory bandwidth which seems to be roughly equal in performance. Stats assuming box16 nodes are as expensive as box32 nodes: Totals from 7668 (79.68% of 9624) affected BVHs: compacted_size: 951666944 -> 742347648 (-22.00%) max_depth: 57606 -> 57615 (+0.02%) sah: 129114796242 -> 129998517775 (+0.68%); split: -0.00%, +0.68% scene_sah: 188564162 -> 192063633 (+1.86%); split: -0.02%, +1.88% box16_node_count: 0 -> 3270600 (+inf%) box32_node_count: 3365707 -> 95100 (-97.17%)
This commit is contained in:
parent
4950f6e23d
commit
19a4c1c4b5
7 changed files with 109 additions and 39 deletions
|
|
@ -8,6 +8,7 @@
|
|||
#define BVH_BUILD_HELPERS_H
|
||||
|
||||
#include "bvh.h"
|
||||
#include "spirv_internal_exts.h"
|
||||
#include "vk_build_helpers.h"
|
||||
|
||||
TYPE(radv_accel_struct_serialization_header, 8);
|
||||
|
|
@ -110,4 +111,7 @@ radv_encode_blas_pointer_flags(uint32_t flags, uint32_t geometry_type)
|
|||
return ptr_flags;
|
||||
}
|
||||
|
||||
spirv_instruction(set = "MesaInternal", id = SpvOpFConvertRUMesa) float16_t radv_f32_to_f16_pos_inf(float f);
|
||||
spirv_instruction(set = "MesaInternal", id = SpvOpFConvertRDMesa) float16_t radv_f32_to_f16_neg_inf(float f);
|
||||
|
||||
#endif /* BUILD_HELPERS_H */
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
#define RADV_BUILD_FLAG_PAIR_COMPRESS_TRIANGLES (1u << (VK_BUILD_FLAG_COUNT + 5))
|
||||
#define RADV_BUILD_FLAG_BATCH_COMPRESS_TRIANGLES (1u << (VK_BUILD_FLAG_COUNT + 6))
|
||||
#define RADV_BUILD_FLAG_BATCH_COMPRESS_TRIANGLES_RETRY (1u << (VK_BUILD_FLAG_COUNT + 7))
|
||||
#define RADV_BUILD_FLAG_USE_BOX16 (1u << (VK_BUILD_FLAG_COUNT + 8))
|
||||
|
||||
#define RADV_COPY_MODE_COPY 0
|
||||
#define RADV_COPY_MODE_SERIALIZE 1
|
||||
|
|
|
|||
|
|
@ -22,6 +22,32 @@ void set_parent(uint32_t child, uint32_t parent)
|
|||
DEREF(REF(uint32_t)(addr)) = parent;
|
||||
}
|
||||
|
||||
radv_aabb16
|
||||
radv_aabb_f32_to_f16(vk_aabb aabb)
|
||||
{
|
||||
radv_aabb16 aabb16;
|
||||
aabb16.min_x = radv_f32_to_f16_neg_inf(aabb.min.x);
|
||||
aabb16.min_y = radv_f32_to_f16_neg_inf(aabb.min.y);
|
||||
aabb16.min_z = radv_f32_to_f16_neg_inf(aabb.min.z);
|
||||
aabb16.max_x = radv_f32_to_f16_pos_inf(aabb.max.x);
|
||||
aabb16.max_y = radv_f32_to_f16_pos_inf(aabb.max.y);
|
||||
aabb16.max_z = radv_f32_to_f16_pos_inf(aabb.max.z);
|
||||
return aabb16;
|
||||
}
|
||||
|
||||
vk_aabb
|
||||
radv_aabb_f16_to_f32(radv_aabb16 aabb16)
|
||||
{
|
||||
vk_aabb aabb;
|
||||
aabb.min.x = float(aabb16.min_x);
|
||||
aabb.min.y = float(aabb16.min_y);
|
||||
aabb.min.z = float(aabb16.min_z);
|
||||
aabb.max.x = float(aabb16.max_x);
|
||||
aabb.max.y = float(aabb16.max_y);
|
||||
aabb.max.z = float(aabb16.max_z);
|
||||
return aabb;
|
||||
}
|
||||
|
||||
void
|
||||
main()
|
||||
{
|
||||
|
|
@ -89,18 +115,15 @@ main()
|
|||
memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
|
||||
gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
|
||||
|
||||
uint32_t bvh_offset = is_root_node ? id_to_offset(RADV_BVH_ROOT_NODE) : DEREF(src_node).bvh_offset;
|
||||
if (bvh_offset == VK_UNKNOWN_BVH_OFFSET)
|
||||
uint32_t node_id = is_root_node ? RADV_BVH_ROOT_NODE : DEREF(src_node).bvh_offset;
|
||||
if (node_id == VK_UNKNOWN_BVH_OFFSET)
|
||||
continue;
|
||||
|
||||
if (bvh_offset == VK_NULL_BVH_OFFSET)
|
||||
if (node_id == VK_NULL_BVH_OFFSET)
|
||||
break;
|
||||
|
||||
uint32_t flags = 0;
|
||||
|
||||
REF(radv_bvh_box32_node) dst_node = REF(radv_bvh_box32_node)(OFFSET(args.output_bvh, bvh_offset));
|
||||
uint32_t node_id = pack_node_id(bvh_offset, radv_bvh_node_box32);
|
||||
|
||||
uint32_t found_child_count = 0;
|
||||
uint32_t children[4] = {RADV_BVH_INVALID_NODE, RADV_BVH_INVALID_NODE,
|
||||
RADV_BVH_INVALID_NODE, RADV_BVH_INVALID_NODE};
|
||||
|
|
@ -158,20 +181,33 @@ main()
|
|||
break;
|
||||
}
|
||||
|
||||
REF(radv_bvh_box16_node) dst_node_f16 = REF(radv_bvh_box16_node)(OFFSET(args.output_bvh, id_to_offset(node_id)));
|
||||
REF(radv_bvh_box32_node) dst_node_f32 = REF(radv_bvh_box32_node)(OFFSET(args.output_bvh, id_to_offset(node_id)));
|
||||
bool is_box16 = VK_BUILD_FLAG(RADV_BUILD_FLAG_USE_BOX16) && id_to_type(node_id) == radv_bvh_node_box16;
|
||||
|
||||
for (uint32_t i = 0; i < found_child_count; ++i) {
|
||||
uint32_t type = ir_id_to_type(children[i]);
|
||||
uint32_t offset = ir_id_to_offset(children[i]);
|
||||
uint32_t dst_offset;
|
||||
uint32_t child_node_id;
|
||||
|
||||
vk_aabb child_aabb = DEREF(REF(vk_ir_node)OFFSET(args.intermediate_bvh, offset)).aabb;
|
||||
|
||||
if (type == vk_ir_node_internal) {
|
||||
dst_offset = atomicAdd(DEREF(args.header).dst_node_offset, SIZEOF(radv_bvh_box32_node));
|
||||
radv_aabb16 child_aabb16 = radv_aabb_f32_to_f16(child_aabb);
|
||||
float surface_area_f16 = aabb_surface_area(radv_aabb_f16_to_f32(child_aabb16));
|
||||
float surface_area_f32 = aabb_surface_area(child_aabb);
|
||||
bool child_use_f16 = VK_BUILD_FLAG(RADV_BUILD_FLAG_USE_BOX16) && surface_area_f16 < surface_area_f32 * 1.5;
|
||||
|
||||
REF(vk_ir_box_node) child_node = REF(vk_ir_box_node)OFFSET(args.intermediate_bvh, offset);
|
||||
DEREF(child_node).bvh_offset = dst_offset;
|
||||
uint32_t dst_offset = atomicAdd(DEREF(args.header).dst_node_offset,
|
||||
child_use_f16 ? SIZEOF(radv_bvh_box16_node) : SIZEOF(radv_bvh_box32_node));
|
||||
child_node_id = pack_node_id(dst_offset, child_use_f16 ? radv_bvh_node_box16 : radv_bvh_node_box32);
|
||||
|
||||
REF(vk_ir_box_node) child_node = REF(vk_ir_box_node) OFFSET(args.intermediate_bvh, offset);
|
||||
DEREF(child_node).bvh_offset = child_node_id;
|
||||
flags |= (DEREF(child_node).flags & 0x3) << i * 8;
|
||||
} else {
|
||||
uint32_t child_index = offset / ir_leaf_node_size;
|
||||
dst_offset = dst_leaf_offset + child_index * output_leaf_node_size;
|
||||
uint32_t dst_offset = dst_leaf_offset + child_index * output_leaf_node_size;
|
||||
|
||||
if (type == vk_ir_node_instance) {
|
||||
vk_ir_instance_node src_node =
|
||||
|
|
@ -182,47 +218,65 @@ main()
|
|||
uint32_t child_flags = fetch_child_flags(args.intermediate_bvh, children[i]);
|
||||
flags |= (child_flags & 0x3) << i * 8;
|
||||
}
|
||||
|
||||
child_node_id = pack_node_id(dst_offset, ir_type_to_bvh_type(type));
|
||||
}
|
||||
|
||||
vk_aabb child_aabb =
|
||||
DEREF(REF(vk_ir_node)OFFSET(args.intermediate_bvh, offset)).aabb;
|
||||
|
||||
/* On gfx11, infinities in AABB coords can cause garbage child nodes to be
|
||||
* returned by box intersection tests with non-default box sorting modes.
|
||||
* Subtract 1 from the integer representation of inf/-inf to turn it into
|
||||
* the maximum/minimum representable floating-point value as a workaround.
|
||||
*/
|
||||
if (VK_BUILD_FLAG(RADV_BUILD_FLAG_NO_INFS)) {
|
||||
for (uint32_t i = 0; i < 3; ++i) {
|
||||
if (isinf(child_aabb.min[i]))
|
||||
child_aabb.min[i] = uintBitsToFloat(floatBitsToUint(child_aabb.min[i]) - 1);
|
||||
if (isinf(child_aabb.max[i]))
|
||||
child_aabb.max[i] = uintBitsToFloat(floatBitsToUint(child_aabb.max[i]) - 1);
|
||||
if (is_box16) {
|
||||
DEREF(dst_node_f16).coords[i] = radv_aabb_f32_to_f16(child_aabb);
|
||||
} else {
|
||||
/* On gfx11, infinities in AABB coords can cause garbage child nodes to be
|
||||
* returned by box intersection tests with non-default box sorting modes.
|
||||
* Subtract 1 from the integer representation of inf/-inf to turn it into
|
||||
* the maximum/minimum representable floating-point value as a workaround.
|
||||
*/
|
||||
if (VK_BUILD_FLAG(RADV_BUILD_FLAG_NO_INFS)) {
|
||||
for (uint32_t i = 0; i < 3; ++i) {
|
||||
if (isinf(child_aabb.min[i]))
|
||||
child_aabb.min[i] = uintBitsToFloat(floatBitsToUint(child_aabb.min[i]) - 1);
|
||||
if (isinf(child_aabb.max[i]))
|
||||
child_aabb.max[i] = uintBitsToFloat(floatBitsToUint(child_aabb.max[i]) - 1);
|
||||
}
|
||||
}
|
||||
|
||||
DEREF(dst_node_f32).coords[i] = child_aabb;
|
||||
}
|
||||
|
||||
DEREF(dst_node).coords[i] = child_aabb;
|
||||
|
||||
uint32_t child_id = pack_node_id(dst_offset, ir_type_to_bvh_type(type));
|
||||
children[i] = child_id;
|
||||
set_parent(child_id, node_id);
|
||||
children[i] = child_node_id;
|
||||
set_parent(child_node_id, node_id);
|
||||
}
|
||||
|
||||
for (uint i = found_child_count; i < 4; ++i) {
|
||||
if (is_box16) {
|
||||
radv_aabb16 null_aabb;
|
||||
null_aabb.min_x = NAN_F16;
|
||||
null_aabb.min_y = NAN_F16;
|
||||
null_aabb.min_z = NAN_F16;
|
||||
null_aabb.max_x = NAN_F16;
|
||||
null_aabb.max_y = NAN_F16;
|
||||
null_aabb.max_z = NAN_F16;
|
||||
for (uint i = found_child_count; i < 4; ++i)
|
||||
DEREF(dst_node_f16).coords[i] = null_aabb;
|
||||
} else {
|
||||
for (uint i = found_child_count; i < 4; ++i) {
|
||||
for (uint comp = 0; comp < 3; ++comp) {
|
||||
DEREF(dst_node).coords[i].min[comp] = NAN;
|
||||
DEREF(dst_node).coords[i].max[comp] = NAN;
|
||||
DEREF(dst_node_f32).coords[i].min[comp] = NAN;
|
||||
DEREF(dst_node_f32).coords[i].max[comp] = NAN;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Make changes to the children's BVH offset value available to the other invocations. */
|
||||
memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
|
||||
gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
|
||||
|
||||
DEREF(dst_node).children = children;
|
||||
if (is_box16) {
|
||||
DEREF(dst_node_f16).children = children;
|
||||
} else {
|
||||
DEREF(dst_node_f32).children = children;
|
||||
|
||||
if (VK_BUILD_FLAG(VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS))
|
||||
DEREF(dst_node).flags = flags;
|
||||
if (VK_BUILD_FLAG(VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS))
|
||||
DEREF(dst_node_f32).flags = flags;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ bvh_includes = files(
|
|||
bvh_spv = []
|
||||
foreach s : bvh_shaders
|
||||
command = [
|
||||
prog_glslang, '-V', '-I' + bvh_include_dir, '-I' + vk_bvh_include_dir, '--target-env', 'spirv1.5',
|
||||
prog_glslang, '-V', '-I' + bvh_include_dir, '-I' + vk_bvh_include_dir, '-I' + spirv_include_dir, '--target-env', 'spirv1.5',
|
||||
'-x', '-o', '@OUTPUT@', '@INPUT@', glslang_depfile, glslang_quiet,
|
||||
]
|
||||
command += vk_glsl_shader_preamble
|
||||
|
|
|
|||
|
|
@ -75,6 +75,7 @@ enum radv_encode_key_bits {
|
|||
RADV_ENCODE_KEY_WRITE_LEAF_NODE_OFFSETS = (1 << 0),
|
||||
RADV_ENCODE_KEY_PAIR_COMPRESS_GFX12 = (1 << 1),
|
||||
RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12 = (1 << 2),
|
||||
RADV_ENCODE_KEY_USE_BOX16 = (1 << 3),
|
||||
};
|
||||
|
||||
static void
|
||||
|
|
@ -287,6 +288,8 @@ radv_get_build_config(VkDevice _device, struct vk_acceleration_structure_build_s
|
|||
VK_FROM_HANDLE(radv_device, device, _device);
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info);
|
||||
|
||||
uint32_t encode_key = 0;
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
/*
|
||||
|
|
@ -302,11 +305,13 @@ radv_get_build_config(VkDevice _device, struct vk_acceleration_structure_build_s
|
|||
state->build_info->type != VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR)
|
||||
encode_key |= RADV_ENCODE_KEY_WRITE_LEAF_NODE_OFFSETS;
|
||||
|
||||
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info);
|
||||
if (!(state->build_info->flags & (VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR |
|
||||
VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_DATA_ACCESS_KHR)) &&
|
||||
geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR)
|
||||
encode_key |= RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12;
|
||||
} else if (!radv_emulate_rt(pdev)) {
|
||||
if (!(state->build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR))
|
||||
encode_key |= RADV_ENCODE_KEY_USE_BOX16;
|
||||
}
|
||||
|
||||
state->config.encode_key[0] = encode_key;
|
||||
|
|
@ -391,6 +396,8 @@ radv_build_flags(VkCommandBuffer commandBuffer, uint32_t key)
|
|||
flags |= RADV_BUILD_FLAG_PAIR_COMPRESS_TRIANGLES;
|
||||
if (key & RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12)
|
||||
flags |= RADV_BUILD_FLAG_BATCH_COMPRESS_TRIANGLES;
|
||||
if (key & RADV_ENCODE_KEY_USE_BOX16)
|
||||
flags |= RADV_BUILD_FLAG_USE_BOX16;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@ bvh_shaders = [
|
|||
],
|
||||
]
|
||||
|
||||
spirv_include_dir = dir_source_root + '/src/compiler/spirv'
|
||||
vk_bvh_include_dir = dir_source_root + '/src/vulkan/runtime/bvh'
|
||||
|
||||
vk_bvh_includes = files(
|
||||
|
|
@ -50,6 +51,7 @@ vk_bvh_includes = files(
|
|||
'vk_build_interface.h',
|
||||
'vk_bvh.h',
|
||||
'vk_debug.h',
|
||||
spirv_include_dir + '/spirv_internal_exts.h',
|
||||
)
|
||||
|
||||
vk_glsl_shader_extensions = [
|
||||
|
|
@ -69,6 +71,7 @@ vk_glsl_shader_extensions = [
|
|||
'GL_KHR_shader_subgroup_ballot',
|
||||
'GL_KHR_shader_subgroup_clustered',
|
||||
'GL_EXT_shader_atomic_int64',
|
||||
'GL_EXT_spirv_intrinsics',
|
||||
]
|
||||
|
||||
vk_glsl_shader_preamble = []
|
||||
|
|
@ -79,7 +82,7 @@ endforeach
|
|||
bvh_spv = []
|
||||
foreach s : bvh_shaders
|
||||
command = [
|
||||
prog_glslang, '-V', '-I' + vk_bvh_include_dir, '--target-env', 'spirv1.5', '-x', '-o', '@OUTPUT@', '@INPUT@'
|
||||
prog_glslang, '-V', '-I' + vk_bvh_include_dir, '-I' + spirv_include_dir, '--target-env', 'spirv1.5', '-x', '-o', '@OUTPUT@', '@INPUT@'
|
||||
] + (with_mesa_debug ? ['-g'] : [])
|
||||
command += glslang_quiet
|
||||
command += vk_glsl_shader_preamble
|
||||
|
|
|
|||
|
|
@ -180,6 +180,7 @@
|
|||
|
||||
#define INFINITY (1.0 / 0.0)
|
||||
#define NAN (0.0 / 0.0)
|
||||
#define NAN_F16 (0.0hf / 0.0hf)
|
||||
|
||||
#define INDEX(type, ptr, index) REF(type)(OFFSET(ptr, (index)*SIZEOF(type)))
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue