diff --git a/src/amd/vulkan/bvh/build_helpers.h b/src/amd/vulkan/bvh/build_helpers.h index a63a534d349..895e1606d6d 100644 --- a/src/amd/vulkan/bvh/build_helpers.h +++ b/src/amd/vulkan/bvh/build_helpers.h @@ -8,6 +8,7 @@ #define BVH_BUILD_HELPERS_H #include "bvh.h" +#include "spirv_internal_exts.h" #include "vk_build_helpers.h" TYPE(radv_accel_struct_serialization_header, 8); @@ -110,4 +111,7 @@ radv_encode_blas_pointer_flags(uint32_t flags, uint32_t geometry_type) return ptr_flags; } +spirv_instruction(set = "MesaInternal", id = SpvOpFConvertRUMesa) float16_t radv_f32_to_f16_pos_inf(float f); +spirv_instruction(set = "MesaInternal", id = SpvOpFConvertRDMesa) float16_t radv_f32_to_f16_neg_inf(float f); + #endif /* BUILD_HELPERS_H */ diff --git a/src/amd/vulkan/bvh/build_interface.h b/src/amd/vulkan/bvh/build_interface.h index 15a7a2aaf5e..d3b726d296b 100644 --- a/src/amd/vulkan/bvh/build_interface.h +++ b/src/amd/vulkan/bvh/build_interface.h @@ -26,6 +26,7 @@ #define RADV_BUILD_FLAG_PAIR_COMPRESS_TRIANGLES (1u << (VK_BUILD_FLAG_COUNT + 5)) #define RADV_BUILD_FLAG_BATCH_COMPRESS_TRIANGLES (1u << (VK_BUILD_FLAG_COUNT + 6)) #define RADV_BUILD_FLAG_BATCH_COMPRESS_TRIANGLES_RETRY (1u << (VK_BUILD_FLAG_COUNT + 7)) +#define RADV_BUILD_FLAG_USE_BOX16 (1u << (VK_BUILD_FLAG_COUNT + 8)) #define RADV_COPY_MODE_COPY 0 #define RADV_COPY_MODE_SERIALIZE 1 diff --git a/src/amd/vulkan/bvh/encode.comp b/src/amd/vulkan/bvh/encode.comp index 53c6f853d2c..1fb4dc5d728 100644 --- a/src/amd/vulkan/bvh/encode.comp +++ b/src/amd/vulkan/bvh/encode.comp @@ -22,6 +22,32 @@ void set_parent(uint32_t child, uint32_t parent) DEREF(REF(uint32_t)(addr)) = parent; } +radv_aabb16 +radv_aabb_f32_to_f16(vk_aabb aabb) +{ + radv_aabb16 aabb16; + aabb16.min_x = radv_f32_to_f16_neg_inf(aabb.min.x); + aabb16.min_y = radv_f32_to_f16_neg_inf(aabb.min.y); + aabb16.min_z = radv_f32_to_f16_neg_inf(aabb.min.z); + aabb16.max_x = radv_f32_to_f16_pos_inf(aabb.max.x); + aabb16.max_y = radv_f32_to_f16_pos_inf(aabb.max.y); + aabb16.max_z = radv_f32_to_f16_pos_inf(aabb.max.z); + return aabb16; +} + +vk_aabb +radv_aabb_f16_to_f32(radv_aabb16 aabb16) +{ + vk_aabb aabb; + aabb.min.x = float(aabb16.min_x); + aabb.min.y = float(aabb16.min_y); + aabb.min.z = float(aabb16.min_z); + aabb.max.x = float(aabb16.max_x); + aabb.max.y = float(aabb16.max_y); + aabb.max.z = float(aabb16.max_z); + return aabb; +} + void main() { @@ -89,18 +115,15 @@ main() memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); - uint32_t bvh_offset = is_root_node ? id_to_offset(RADV_BVH_ROOT_NODE) : DEREF(src_node).bvh_offset; - if (bvh_offset == VK_UNKNOWN_BVH_OFFSET) + uint32_t node_id = is_root_node ? RADV_BVH_ROOT_NODE : DEREF(src_node).bvh_offset; + if (node_id == VK_UNKNOWN_BVH_OFFSET) continue; - if (bvh_offset == VK_NULL_BVH_OFFSET) + if (node_id == VK_NULL_BVH_OFFSET) break; uint32_t flags = 0; - REF(radv_bvh_box32_node) dst_node = REF(radv_bvh_box32_node)(OFFSET(args.output_bvh, bvh_offset)); - uint32_t node_id = pack_node_id(bvh_offset, radv_bvh_node_box32); - uint32_t found_child_count = 0; uint32_t children[4] = {RADV_BVH_INVALID_NODE, RADV_BVH_INVALID_NODE, RADV_BVH_INVALID_NODE, RADV_BVH_INVALID_NODE}; @@ -158,20 +181,33 @@ main() break; } + REF(radv_bvh_box16_node) dst_node_f16 = REF(radv_bvh_box16_node)(OFFSET(args.output_bvh, id_to_offset(node_id))); + REF(radv_bvh_box32_node) dst_node_f32 = REF(radv_bvh_box32_node)(OFFSET(args.output_bvh, id_to_offset(node_id))); + bool is_box16 = VK_BUILD_FLAG(RADV_BUILD_FLAG_USE_BOX16) && id_to_type(node_id) == radv_bvh_node_box16; + for (uint32_t i = 0; i < found_child_count; ++i) { uint32_t type = ir_id_to_type(children[i]); uint32_t offset = ir_id_to_offset(children[i]); - uint32_t dst_offset; + uint32_t child_node_id; + + vk_aabb child_aabb = DEREF(REF(vk_ir_node)OFFSET(args.intermediate_bvh, offset)).aabb; if (type == vk_ir_node_internal) { - dst_offset = atomicAdd(DEREF(args.header).dst_node_offset, SIZEOF(radv_bvh_box32_node)); + radv_aabb16 child_aabb16 = radv_aabb_f32_to_f16(child_aabb); + float surface_area_f16 = aabb_surface_area(radv_aabb_f16_to_f32(child_aabb16)); + float surface_area_f32 = aabb_surface_area(child_aabb); + bool child_use_f16 = VK_BUILD_FLAG(RADV_BUILD_FLAG_USE_BOX16) && surface_area_f16 < surface_area_f32 * 1.5; - REF(vk_ir_box_node) child_node = REF(vk_ir_box_node)OFFSET(args.intermediate_bvh, offset); - DEREF(child_node).bvh_offset = dst_offset; + uint32_t dst_offset = atomicAdd(DEREF(args.header).dst_node_offset, + child_use_f16 ? SIZEOF(radv_bvh_box16_node) : SIZEOF(radv_bvh_box32_node)); + child_node_id = pack_node_id(dst_offset, child_use_f16 ? radv_bvh_node_box16 : radv_bvh_node_box32); + + REF(vk_ir_box_node) child_node = REF(vk_ir_box_node) OFFSET(args.intermediate_bvh, offset); + DEREF(child_node).bvh_offset = child_node_id; flags |= (DEREF(child_node).flags & 0x3) << i * 8; } else { uint32_t child_index = offset / ir_leaf_node_size; - dst_offset = dst_leaf_offset + child_index * output_leaf_node_size; + uint32_t dst_offset = dst_leaf_offset + child_index * output_leaf_node_size; if (type == vk_ir_node_instance) { vk_ir_instance_node src_node = @@ -182,47 +218,65 @@ main() uint32_t child_flags = fetch_child_flags(args.intermediate_bvh, children[i]); flags |= (child_flags & 0x3) << i * 8; } + + child_node_id = pack_node_id(dst_offset, ir_type_to_bvh_type(type)); } - vk_aabb child_aabb = - DEREF(REF(vk_ir_node)OFFSET(args.intermediate_bvh, offset)).aabb; - - /* On gfx11, infinities in AABB coords can cause garbage child nodes to be - * returned by box intersection tests with non-default box sorting modes. - * Subtract 1 from the integer representation of inf/-inf to turn it into - * the maximum/minimum representable floating-point value as a workaround. - */ - if (VK_BUILD_FLAG(RADV_BUILD_FLAG_NO_INFS)) { - for (uint32_t i = 0; i < 3; ++i) { - if (isinf(child_aabb.min[i])) - child_aabb.min[i] = uintBitsToFloat(floatBitsToUint(child_aabb.min[i]) - 1); - if (isinf(child_aabb.max[i])) - child_aabb.max[i] = uintBitsToFloat(floatBitsToUint(child_aabb.max[i]) - 1); + if (is_box16) { + DEREF(dst_node_f16).coords[i] = radv_aabb_f32_to_f16(child_aabb); + } else { + /* On gfx11, infinities in AABB coords can cause garbage child nodes to be + * returned by box intersection tests with non-default box sorting modes. + * Subtract 1 from the integer representation of inf/-inf to turn it into + * the maximum/minimum representable floating-point value as a workaround. + */ + if (VK_BUILD_FLAG(RADV_BUILD_FLAG_NO_INFS)) { + for (uint32_t i = 0; i < 3; ++i) { + if (isinf(child_aabb.min[i])) + child_aabb.min[i] = uintBitsToFloat(floatBitsToUint(child_aabb.min[i]) - 1); + if (isinf(child_aabb.max[i])) + child_aabb.max[i] = uintBitsToFloat(floatBitsToUint(child_aabb.max[i]) - 1); + } } + + DEREF(dst_node_f32).coords[i] = child_aabb; } - DEREF(dst_node).coords[i] = child_aabb; - - uint32_t child_id = pack_node_id(dst_offset, ir_type_to_bvh_type(type)); - children[i] = child_id; - set_parent(child_id, node_id); + children[i] = child_node_id; + set_parent(child_node_id, node_id); } - for (uint i = found_child_count; i < 4; ++i) { + if (is_box16) { + radv_aabb16 null_aabb; + null_aabb.min_x = NAN_F16; + null_aabb.min_y = NAN_F16; + null_aabb.min_z = NAN_F16; + null_aabb.max_x = NAN_F16; + null_aabb.max_y = NAN_F16; + null_aabb.max_z = NAN_F16; + for (uint i = found_child_count; i < 4; ++i) + DEREF(dst_node_f16).coords[i] = null_aabb; + } else { + for (uint i = found_child_count; i < 4; ++i) { for (uint comp = 0; comp < 3; ++comp) { - DEREF(dst_node).coords[i].min[comp] = NAN; - DEREF(dst_node).coords[i].max[comp] = NAN; + DEREF(dst_node_f32).coords[i].min[comp] = NAN; + DEREF(dst_node_f32).coords[i].max[comp] = NAN; } + } } /* Make changes to the children's BVH offset value available to the other invocations. */ memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible); - DEREF(dst_node).children = children; + if (is_box16) { + DEREF(dst_node_f16).children = children; + } else { + DEREF(dst_node_f32).children = children; - if (VK_BUILD_FLAG(VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS)) - DEREF(dst_node).flags = flags; + if (VK_BUILD_FLAG(VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS)) + DEREF(dst_node_f32).flags = flags; + } break; } diff --git a/src/amd/vulkan/bvh/meson.build b/src/amd/vulkan/bvh/meson.build index 3320ef67428..c0328db82c7 100644 --- a/src/amd/vulkan/bvh/meson.build +++ b/src/amd/vulkan/bvh/meson.build @@ -56,7 +56,7 @@ bvh_includes = files( bvh_spv = [] foreach s : bvh_shaders command = [ - prog_glslang, '-V', '-I' + bvh_include_dir, '-I' + vk_bvh_include_dir, '--target-env', 'spirv1.5', + prog_glslang, '-V', '-I' + bvh_include_dir, '-I' + vk_bvh_include_dir, '-I' + spirv_include_dir, '--target-env', 'spirv1.5', '-x', '-o', '@OUTPUT@', '@INPUT@', glslang_depfile, glslang_quiet, ] command += vk_glsl_shader_preamble diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c index 4271a0be143..607d29866dd 100644 --- a/src/amd/vulkan/radv_acceleration_structure.c +++ b/src/amd/vulkan/radv_acceleration_structure.c @@ -75,6 +75,7 @@ enum radv_encode_key_bits { RADV_ENCODE_KEY_WRITE_LEAF_NODE_OFFSETS = (1 << 0), RADV_ENCODE_KEY_PAIR_COMPRESS_GFX12 = (1 << 1), RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12 = (1 << 2), + RADV_ENCODE_KEY_USE_BOX16 = (1 << 3), }; static void @@ -287,6 +288,8 @@ radv_get_build_config(VkDevice _device, struct vk_acceleration_structure_build_s VK_FROM_HANDLE(radv_device, device, _device); struct radv_physical_device *pdev = radv_device_physical(device); + VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info); + uint32_t encode_key = 0; if (radv_use_bvh8(pdev)) { /* @@ -302,11 +305,13 @@ radv_get_build_config(VkDevice _device, struct vk_acceleration_structure_build_s state->build_info->type != VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR) encode_key |= RADV_ENCODE_KEY_WRITE_LEAF_NODE_OFFSETS; - VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info); if (!(state->build_info->flags & (VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR | VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_DATA_ACCESS_KHR)) && geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) encode_key |= RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12; + } else if (!radv_emulate_rt(pdev)) { + if (!(state->build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR)) + encode_key |= RADV_ENCODE_KEY_USE_BOX16; } state->config.encode_key[0] = encode_key; @@ -391,6 +396,8 @@ radv_build_flags(VkCommandBuffer commandBuffer, uint32_t key) flags |= RADV_BUILD_FLAG_PAIR_COMPRESS_TRIANGLES; if (key & RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12) flags |= RADV_BUILD_FLAG_BATCH_COMPRESS_TRIANGLES; + if (key & RADV_ENCODE_KEY_USE_BOX16) + flags |= RADV_BUILD_FLAG_USE_BOX16; return flags; } diff --git a/src/vulkan/runtime/bvh/meson.build b/src/vulkan/runtime/bvh/meson.build index 02b2afb4163..add1590b70f 100644 --- a/src/vulkan/runtime/bvh/meson.build +++ b/src/vulkan/runtime/bvh/meson.build @@ -42,6 +42,7 @@ bvh_shaders = [ ], ] +spirv_include_dir = dir_source_root + '/src/compiler/spirv' vk_bvh_include_dir = dir_source_root + '/src/vulkan/runtime/bvh' vk_bvh_includes = files( @@ -50,6 +51,7 @@ vk_bvh_includes = files( 'vk_build_interface.h', 'vk_bvh.h', 'vk_debug.h', + spirv_include_dir + '/spirv_internal_exts.h', ) vk_glsl_shader_extensions = [ @@ -69,6 +71,7 @@ vk_glsl_shader_extensions = [ 'GL_KHR_shader_subgroup_ballot', 'GL_KHR_shader_subgroup_clustered', 'GL_EXT_shader_atomic_int64', + 'GL_EXT_spirv_intrinsics', ] vk_glsl_shader_preamble = [] @@ -79,7 +82,7 @@ endforeach bvh_spv = [] foreach s : bvh_shaders command = [ - prog_glslang, '-V', '-I' + vk_bvh_include_dir, '--target-env', 'spirv1.5', '-x', '-o', '@OUTPUT@', '@INPUT@' + prog_glslang, '-V', '-I' + vk_bvh_include_dir, '-I' + spirv_include_dir, '--target-env', 'spirv1.5', '-x', '-o', '@OUTPUT@', '@INPUT@' ] + (with_mesa_debug ? ['-g'] : []) command += glslang_quiet command += vk_glsl_shader_preamble diff --git a/src/vulkan/runtime/bvh/vk_build_helpers.h b/src/vulkan/runtime/bvh/vk_build_helpers.h index 01acb4db715..dd5795855b2 100644 --- a/src/vulkan/runtime/bvh/vk_build_helpers.h +++ b/src/vulkan/runtime/bvh/vk_build_helpers.h @@ -180,6 +180,7 @@ #define INFINITY (1.0 / 0.0) #define NAN (0.0 / 0.0) +#define NAN_F16 (0.0hf / 0.0hf) #define INDEX(type, ptr, index) REF(type)(OFFSET(ptr, (index)*SIZEOF(type)))