diff --git a/src/amd/vulkan/bvh/internal.comp b/src/amd/vulkan/bvh/internal.comp index de7b4ed44d4..f9d9375fe57 100644 --- a/src/amd/vulkan/bvh/internal.comp +++ b/src/amd/vulkan/bvh/internal.comp @@ -33,20 +33,72 @@ #extension GL_EXT_buffer_reference : require #extension GL_EXT_buffer_reference2 : require -layout(scalar) uniform; -layout(scalar) buffer; - layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; -#include "internal.h" +#include "build_helpers.h" -layout(push_constant) uniform CONSTS -{ - internal_kernel_args args; -}; +layout(push_constant) uniform CONSTS { + VOID_REF bvh; + REF(key_id_pair) src_ids; + REF(key_id_pair) dst_ids; + uint32_t dst_offset; + uint32_t fill_count; +} args; void main(void) { - internal_kernel(args, gl_GlobalInvocationID.x); + uint32_t global_id = gl_GlobalInvocationID.x; + + bool fill_header = (args.fill_count & 0x80000000u) != 0; + uint32_t src_count = args.fill_count & 0x7FFFFFFFu; + + uint32_t src_index = global_id * 4; + uint32_t child_count = min(src_count - src_index, 4); + + uint32_t dst_offset = args.dst_offset + global_id * SIZEOF(radv_bvh_box32_node); + + REF(radv_bvh_box32_node) dst_node = REF(radv_bvh_box32_node)(OFFSET(args.bvh, dst_offset)); + + AABB total_bounds; + total_bounds.min = vec3(INFINITY); + total_bounds.max = vec3(-INFINITY); + + for (uint32_t i = 0; i < 4; i++) { + AABB bounds; + bounds.min = vec3(NAN); + bounds.max = vec3(NAN); + + uint32_t child_id = DEREF(INDEX(key_id_pair, args.src_ids, src_index + i)).id; + + if (i < child_count) { + DEREF(dst_node).children[i] = child_id; + + bounds = calculate_node_bounds(args.bvh, child_id); + total_bounds.min = min(total_bounds.min, bounds.min); + total_bounds.max = max(total_bounds.max, bounds.max); + } + + DEREF(dst_node).coords[i][0][0] = bounds.min.x; + DEREF(dst_node).coords[i][0][1] = bounds.min.y; + DEREF(dst_node).coords[i][0][2] = bounds.min.z; + DEREF(dst_node).coords[i][1][0] = bounds.max.x; + DEREF(dst_node).coords[i][1][1] = bounds.max.y; + DEREF(dst_node).coords[i][1][2] = bounds.max.z; + } + + uint32_t node_id = pack_node_id(dst_offset, radv_bvh_node_internal); + DEREF(INDEX(key_id_pair, args.dst_ids, global_id)).id = node_id; + + if (fill_header) { + REF(radv_accel_struct_header) header = REF(radv_accel_struct_header)(args.bvh); + DEREF(header).root_node_offset = node_id; + + DEREF(header).aabb[0][0] = total_bounds.min.x; + DEREF(header).aabb[0][1] = total_bounds.min.y; + DEREF(header).aabb[0][2] = total_bounds.min.z; + DEREF(header).aabb[1][0] = total_bounds.max.x; + DEREF(header).aabb[1][1] = total_bounds.max.y; + DEREF(header).aabb[1][2] = total_bounds.max.z; + } } diff --git a/src/amd/vulkan/bvh/internal.h b/src/amd/vulkan/bvh/internal.h deleted file mode 100644 index a9c06ac679d..00000000000 --- a/src/amd/vulkan/bvh/internal.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright © 2022 Konstantin Seurer - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef BVH_INTERNAL_H -#define BVH_INTERNAL_H - -#include "build_helpers.h" - -struct internal_kernel_args { - VOID_REF bvh; - REF(key_id_pair) src_ids; - REF(key_id_pair) dst_ids; - uint32_t dst_offset; - uint32_t fill_count; -}; -TYPE(internal_kernel_args, 32); - -void -internal_kernel(internal_kernel_args args, uint32_t global_id) -{ - bool fill_header = (args.fill_count & 0x80000000u) != 0; - uint32_t src_count = args.fill_count & 0x7FFFFFFFu; - - uint32_t src_index = global_id * 4; - uint32_t child_count = min(src_count - src_index, 4); - - uint32_t dst_offset = args.dst_offset + global_id * SIZEOF(radv_bvh_box32_node); - - REF(radv_bvh_box32_node) dst_node = REF(radv_bvh_box32_node)(OFFSET(args.bvh, dst_offset)); - - AABB total_bounds; - total_bounds.min = vec3(INFINITY); - total_bounds.max = vec3(-INFINITY); - - for (uint32_t i = 0; i < 4; i++) { - AABB bounds; - bounds.min = vec3(NAN); - bounds.max = vec3(NAN); - - uint32_t child_id = DEREF(INDEX(key_id_pair, args.src_ids, src_index + i)).id; - - if (i < child_count) { - DEREF(dst_node).children[i] = child_id; - - bounds = calculate_node_bounds(args.bvh, child_id); - total_bounds.min = min(total_bounds.min, bounds.min); - total_bounds.max = max(total_bounds.max, bounds.max); - } - - DEREF(dst_node).coords[i][0][0] = bounds.min.x; - DEREF(dst_node).coords[i][0][1] = bounds.min.y; - DEREF(dst_node).coords[i][0][2] = bounds.min.z; - DEREF(dst_node).coords[i][1][0] = bounds.max.x; - DEREF(dst_node).coords[i][1][1] = bounds.max.y; - DEREF(dst_node).coords[i][1][2] = bounds.max.z; - } - - uint32_t node_id = pack_node_id(dst_offset, radv_bvh_node_internal); - DEREF(INDEX(key_id_pair, args.dst_ids, global_id)).id = node_id; - - if (fill_header) { - REF(radv_accel_struct_header) header = REF(radv_accel_struct_header)(args.bvh); - DEREF(header).root_node_offset = node_id; - - DEREF(header).aabb[0][0] = total_bounds.min.x; - DEREF(header).aabb[0][1] = total_bounds.min.y; - DEREF(header).aabb[0][2] = total_bounds.min.z; - DEREF(header).aabb[1][0] = total_bounds.max.x; - DEREF(header).aabb[1][1] = total_bounds.max.y; - DEREF(header).aabb[1][2] = total_bounds.max.z; - } -} - -#endif diff --git a/src/amd/vulkan/bvh/leaf.comp b/src/amd/vulkan/bvh/leaf.comp index b4af8c929c5..4c5930bc6a9 100644 --- a/src/amd/vulkan/bvh/leaf.comp +++ b/src/amd/vulkan/bvh/leaf.comp @@ -34,20 +34,318 @@ #extension GL_EXT_buffer_reference : require #extension GL_EXT_buffer_reference2 : require -layout(scalar) uniform; -layout(scalar) buffer; - layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; -#include "leaf.h" +#include "build_helpers.h" -layout(push_constant) uniform CONSTS -{ - leaf_kernel_args args; +layout(push_constant) uniform CONSTS { + VOID_REF bvh; + REF(AABB) bounds; + REF(key_id_pair) ids; + + VOID_REF data; + VOID_REF indices; + VOID_REF transform; + + uint32_t dst_offset; + uint32_t first_id; + uint32_t geometry_type; + uint32_t geometry_id; + + uint32_t stride; + uint32_t vertex_format; + uint32_t index_format; +} args; + +/* Just a wrapper for 3 uints. */ +struct triangle_indices { + uint32_t index[3]; }; +TYPE(triangle_indices, 12); + +triangle_indices +load_indices(VOID_REF indices, uint32_t index_format, uint32_t global_id) +{ + triangle_indices result; + + uint32_t index_base = global_id * 3; + + switch (index_format) { + case VK_INDEX_TYPE_UINT16: { + result.index[0] = DEREF(INDEX(uint16_t, indices, index_base + 0)); + result.index[1] = DEREF(INDEX(uint16_t, indices, index_base + 1)); + result.index[2] = DEREF(INDEX(uint16_t, indices, index_base + 2)); + break; + } + case VK_INDEX_TYPE_UINT32: { + result.index[0] = DEREF(INDEX(uint32_t, indices, index_base + 0)); + result.index[1] = DEREF(INDEX(uint32_t, indices, index_base + 1)); + result.index[2] = DEREF(INDEX(uint32_t, indices, index_base + 2)); + break; + } + case VK_INDEX_TYPE_NONE_KHR: { + result.index[0] = index_base + 0; + result.index[1] = index_base + 1; + result.index[2] = index_base + 2; + break; + } + case VK_INDEX_TYPE_UINT8_EXT: { + result.index[0] = DEREF(INDEX(uint8_t, indices, index_base + 0)); + result.index[1] = DEREF(INDEX(uint8_t, indices, index_base + 1)); + result.index[2] = DEREF(INDEX(uint8_t, indices, index_base + 2)); + break; + } + } + + return result; +} + +/* Just a wrapper for 3 vec4s. */ +struct triangle_vertices { + vec4 vertex[3]; +}; +TYPE(triangle_vertices, 48); + +TYPE(float16_t, 2); + +triangle_vertices +load_vertices(VOID_REF vertices, triangle_indices indices, uint32_t vertex_format, uint32_t stride) +{ + triangle_vertices result; + + for (uint32_t i = 0; i < 3; i++) { + VOID_REF vertex_ptr = OFFSET(vertices, indices.index[i] * stride); + vec4 vertex = vec4(0.0, 0.0, 0.0, 1.0); + + switch (vertex_format) { + case VK_FORMAT_R32G32_SFLOAT: + vertex.x = DEREF(INDEX(float, vertex_ptr, 0)); + vertex.y = DEREF(INDEX(float, vertex_ptr, 1)); + break; + case VK_FORMAT_R32G32B32_SFLOAT: + case VK_FORMAT_R32G32B32A32_SFLOAT: + vertex.x = DEREF(INDEX(float, vertex_ptr, 0)); + vertex.y = DEREF(INDEX(float, vertex_ptr, 1)); + vertex.z = DEREF(INDEX(float, vertex_ptr, 2)); + break; + case VK_FORMAT_R16G16_SFLOAT: + vertex.x = DEREF(INDEX(float16_t, vertex_ptr, 0)); + vertex.y = DEREF(INDEX(float16_t, vertex_ptr, 1)); + break; + case VK_FORMAT_R16G16B16_SFLOAT: + case VK_FORMAT_R16G16B16A16_SFLOAT: + vertex.x = DEREF(INDEX(float16_t, vertex_ptr, 0)); + vertex.y = DEREF(INDEX(float16_t, vertex_ptr, 1)); + vertex.z = DEREF(INDEX(float16_t, vertex_ptr, 2)); + break; + case VK_FORMAT_R16G16_SNORM: + vertex.x = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 0)) / float(0x7FFF)); + vertex.y = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 1)) / float(0x7FFF)); + break; + case VK_FORMAT_R16G16B16A16_SNORM: + vertex.x = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 0)) / float(0x7FFF)); + vertex.y = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 1)) / float(0x7FFF)); + vertex.z = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 2)) / float(0x7FFF)); + break; + case VK_FORMAT_R8G8_SNORM: + vertex.x = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 0)) / float(0x7F)); + vertex.y = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 1)) / float(0x7F)); + break; + case VK_FORMAT_R8G8B8A8_SNORM: + vertex.x = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 0)) / float(0x7F)); + vertex.y = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 1)) / float(0x7F)); + vertex.z = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 2)) / float(0x7F)); + break; + case VK_FORMAT_R16G16_UNORM: + vertex.x = DEREF(INDEX(uint16_t, vertex_ptr, 0)) / float(0xFFFF); + vertex.y = DEREF(INDEX(uint16_t, vertex_ptr, 1)) / float(0xFFFF); + break; + case VK_FORMAT_R16G16B16A16_UNORM: + vertex.x = DEREF(INDEX(uint16_t, vertex_ptr, 0)) / float(0xFFFF); + vertex.y = DEREF(INDEX(uint16_t, vertex_ptr, 1)) / float(0xFFFF); + vertex.z = DEREF(INDEX(uint16_t, vertex_ptr, 2)) / float(0xFFFF); + break; + case VK_FORMAT_R8G8_UNORM: + vertex.x = DEREF(INDEX(uint8_t, vertex_ptr, 0)) / float(0xFF); + vertex.y = DEREF(INDEX(uint8_t, vertex_ptr, 1)) / float(0xFF); + break; + case VK_FORMAT_R8G8B8A8_UNORM: + vertex.x = DEREF(INDEX(uint8_t, vertex_ptr, 0)) / float(0xFF); + vertex.y = DEREF(INDEX(uint8_t, vertex_ptr, 1)) / float(0xFF); + vertex.z = DEREF(INDEX(uint8_t, vertex_ptr, 2)) / float(0xFF); + break; + case VK_FORMAT_A2B10G10R10_UNORM_PACK32: { + uint32_t data = DEREF(REF(uint32_t)(vertex_ptr)); + vertex.x = float(data & 0x3FF) / 0x3FF; + vertex.y = float((data >> 10) & 0x3FF) / 0x3FF; + vertex.z = float((data >> 20) & 0x3FF) / 0x3FF; + break; + } + } + + result.vertex[i] = vertex; + } + + return result; +} + +/* A GLSL-adapted copy of VkAccelerationStructureInstanceKHR. */ +struct AccelerationStructureInstance { + float transform[12]; + uint32_t custom_instance_and_mask; + uint32_t sbt_offset_and_flags; + uint64_t accelerationStructureReference; +}; +TYPE(AccelerationStructureInstance, 64); + +void +build_instance(inout AABB bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t global_id) +{ + REF(radv_bvh_instance_node) node = REF(radv_bvh_instance_node)(dst_ptr); + + AccelerationStructureInstance instance = DEREF(REF(AccelerationStructureInstance)(src_ptr)); + if (instance.accelerationStructureReference == 0) + return; + + mat4 transform = mat4(1.0); + for (uint32_t col = 0; col < 4; col++) + for (uint32_t row = 0; row < 3; row++) + transform[col][row] = instance.transform[col + row * 4]; + + mat4 inv_transform = inverse(transform); + for (uint32_t col = 0; col < 3; col++) + for (uint32_t row = 0; row < 3; row++) + DEREF(node).wto_matrix[col + row * 4] = inv_transform[col][row]; + + DEREF(node).wto_matrix[3] = transform[3][0]; + DEREF(node).wto_matrix[7] = transform[3][1]; + DEREF(node).wto_matrix[11] = transform[3][2]; + + for (uint32_t col = 0; col < 3; col++) + for (uint32_t row = 0; row < 3; row++) + DEREF(node).otw_matrix[col + row * 3] = transform[col][row]; + + radv_accel_struct_header instance_header = + DEREF(REF(radv_accel_struct_header)(instance.accelerationStructureReference)); + DEREF(node).base_ptr = + instance.accelerationStructureReference | instance_header.root_node_offset; + + for (uint32_t comp = 0; comp < 3; ++comp) { + bounds.min[comp] = transform[3][comp]; + bounds.max[comp] = transform[3][comp]; + for (uint32_t col = 0; col < 3; ++col) { + bounds.min[comp] += min(transform[col][comp] * instance_header.aabb[0][col], + transform[col][comp] * instance_header.aabb[1][col]); + bounds.max[comp] += max(transform[col][comp] * instance_header.aabb[0][col], + transform[col][comp] * instance_header.aabb[1][col]); + } + } + + DEREF(node).custom_instance_and_mask = instance.custom_instance_and_mask; + DEREF(node).sbt_offset_and_flags = instance.sbt_offset_and_flags; + DEREF(node).instance_id = global_id; + + DEREF(node).aabb[0][0] = bounds.min.x; + DEREF(node).aabb[0][1] = bounds.min.y; + DEREF(node).aabb[0][2] = bounds.min.z; + DEREF(node).aabb[1][0] = bounds.max.x; + DEREF(node).aabb[1][1] = bounds.max.y; + DEREF(node).aabb[1][2] = bounds.max.z; +} void main(void) { - leaf_kernel(args, gl_GlobalInvocationID.x); + uint32_t global_id = gl_GlobalInvocationID.x; + + REF(key_id_pair) id_ptr = INDEX(key_id_pair, args.ids, args.first_id + global_id); + uint32_t src_offset = global_id * args.stride; + + uint32_t dst_stride; + uint32_t node_type; + if (args.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) { + dst_stride = SIZEOF(radv_bvh_triangle_node); + node_type = radv_bvh_node_triangle; + } else if (args.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR) { + dst_stride = SIZEOF(radv_bvh_aabb_node); + node_type = radv_bvh_node_aabb; + } else { + dst_stride = SIZEOF(radv_bvh_instance_node); + node_type = radv_bvh_node_instance; + } + + uint32_t dst_offset = args.dst_offset + global_id * dst_stride; + + DEREF(id_ptr).id = pack_node_id(dst_offset, node_type); + + VOID_REF dst_ptr = OFFSET(args.bvh, dst_offset); + + AABB bounds; + if (args.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) { + triangle_indices indices = load_indices(args.indices, args.index_format, global_id); + + triangle_vertices vertices = + load_vertices(args.data, indices, args.vertex_format, args.stride); + + if (args.transform != NULL) { + mat4 transform = mat4(1.0); + + for (uint32_t col = 0; col < 4; col++) + for (uint32_t row = 0; row < 3; row++) + transform[col][row] = DEREF(INDEX(float, args.transform, col + row * 4)); + + for (uint32_t i = 0; i < 3; i++) + vertices.vertex[i] = transform * vertices.vertex[i]; + } + + REF(radv_bvh_triangle_node) node = REF(radv_bvh_triangle_node)(dst_ptr); + + bounds.min = vec3(INFINITY); + bounds.max = vec3(-INFINITY); + + for (uint32_t coord = 0; coord < 3; coord++) + for (uint32_t comp = 0; comp < 3; comp++) { + DEREF(node).coords[coord][comp] = vertices.vertex[coord][comp]; + bounds.min[comp] = min(bounds.min[comp], vertices.vertex[coord][comp]); + bounds.max[comp] = max(bounds.max[comp], vertices.vertex[coord][comp]); + } + + DEREF(node).triangle_id = global_id; + DEREF(node).geometry_id_and_flags = args.geometry_id; + DEREF(node).id = 9; + + } else if (args.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR) { + VOID_REF src_ptr = OFFSET(args.data, src_offset); + + REF(radv_bvh_aabb_node) node = REF(radv_bvh_aabb_node)(dst_ptr); + + for (uint32_t vec = 0; vec < 2; vec++) + for (uint32_t comp = 0; comp < 3; comp++) { + float coord = DEREF(INDEX(float, src_ptr, comp + vec * 3)); + DEREF(node).aabb[vec][comp] = coord; + + if (vec == 0) + bounds.min[comp] = coord; + else + bounds.max[comp] = coord; + } + + DEREF(node).primitive_id = global_id; + DEREF(node).geometry_id_and_flags = args.geometry_id; + } else { + VOID_REF src_ptr = OFFSET(args.data, src_offset); + /* arrayOfPointers */ + if (args.stride == 8) { + src_ptr = DEREF(REF(VOID_REF)(src_ptr)); + } + + build_instance(bounds, src_ptr, dst_ptr, global_id); + } + + min_float_emulated(INDEX(int32_t, args.bounds, 0), bounds.min.x); + min_float_emulated(INDEX(int32_t, args.bounds, 1), bounds.min.y); + min_float_emulated(INDEX(int32_t, args.bounds, 2), bounds.min.z); + max_float_emulated(INDEX(int32_t, args.bounds, 3), bounds.max.x); + max_float_emulated(INDEX(int32_t, args.bounds, 4), bounds.max.y); + max_float_emulated(INDEX(int32_t, args.bounds, 5), bounds.max.z); } diff --git a/src/amd/vulkan/bvh/leaf.h b/src/amd/vulkan/bvh/leaf.h deleted file mode 100644 index 4995a44f3fc..00000000000 --- a/src/amd/vulkan/bvh/leaf.h +++ /dev/null @@ -1,340 +0,0 @@ -/* - * Copyright © 2022 Konstantin Seurer - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef BVH_LEAF_H -#define BVH_LEAF_H - -#include "build_helpers.h" - -struct leaf_kernel_args { - VOID_REF bvh; - REF(AABB) bounds; - REF(key_id_pair) ids; - - VOID_REF data; - VOID_REF indices; - VOID_REF transform; - - uint32_t dst_offset; - uint32_t first_id; - uint32_t geometry_type; - uint32_t geometry_id; - - uint32_t stride; - uint32_t vertex_format; - uint32_t index_format; -}; -TYPE(leaf_kernel_args, 80); - -/* Just a wrapper for 3 uints. */ -struct triangle_indices { - uint32_t index[3]; -}; -TYPE(triangle_indices, 12); - -triangle_indices -load_indices(VOID_REF indices, uint32_t index_format, uint32_t global_id) -{ - triangle_indices result; - - uint32_t index_base = global_id * 3; - - switch (index_format) { - case VK_INDEX_TYPE_UINT16: { - result.index[0] = DEREF(INDEX(uint16_t, indices, index_base + 0)); - result.index[1] = DEREF(INDEX(uint16_t, indices, index_base + 1)); - result.index[2] = DEREF(INDEX(uint16_t, indices, index_base + 2)); - break; - } - case VK_INDEX_TYPE_UINT32: { - result.index[0] = DEREF(INDEX(uint32_t, indices, index_base + 0)); - result.index[1] = DEREF(INDEX(uint32_t, indices, index_base + 1)); - result.index[2] = DEREF(INDEX(uint32_t, indices, index_base + 2)); - break; - } - case VK_INDEX_TYPE_NONE_KHR: { - result.index[0] = index_base + 0; - result.index[1] = index_base + 1; - result.index[2] = index_base + 2; - break; - } - case VK_INDEX_TYPE_UINT8_EXT: { - result.index[0] = DEREF(INDEX(uint8_t, indices, index_base + 0)); - result.index[1] = DEREF(INDEX(uint8_t, indices, index_base + 1)); - result.index[2] = DEREF(INDEX(uint8_t, indices, index_base + 2)); - break; - } - } - - return result; -} - -/* Just a wrapper for 3 vec4s. */ -struct triangle_vertices { - vec4 vertex[3]; -}; -TYPE(triangle_vertices, 48); - -TYPE(float16_t, 2); - -triangle_vertices -load_vertices(VOID_REF vertices, triangle_indices indices, uint32_t vertex_format, uint32_t stride) -{ - triangle_vertices result; - - for (uint32_t i = 0; i < 3; i++) { - VOID_REF vertex_ptr = OFFSET(vertices, indices.index[i] * stride); - vec4 vertex = vec4(0.0, 0.0, 0.0, 1.0); - - switch (vertex_format) { - case VK_FORMAT_R32G32_SFLOAT: - vertex.x = DEREF(INDEX(float, vertex_ptr, 0)); - vertex.y = DEREF(INDEX(float, vertex_ptr, 1)); - break; - case VK_FORMAT_R32G32B32_SFLOAT: - case VK_FORMAT_R32G32B32A32_SFLOAT: - vertex.x = DEREF(INDEX(float, vertex_ptr, 0)); - vertex.y = DEREF(INDEX(float, vertex_ptr, 1)); - vertex.z = DEREF(INDEX(float, vertex_ptr, 2)); - break; - case VK_FORMAT_R16G16_SFLOAT: - vertex.x = DEREF(INDEX(float16_t, vertex_ptr, 0)); - vertex.y = DEREF(INDEX(float16_t, vertex_ptr, 1)); - break; - case VK_FORMAT_R16G16B16_SFLOAT: - case VK_FORMAT_R16G16B16A16_SFLOAT: - vertex.x = DEREF(INDEX(float16_t, vertex_ptr, 0)); - vertex.y = DEREF(INDEX(float16_t, vertex_ptr, 1)); - vertex.z = DEREF(INDEX(float16_t, vertex_ptr, 2)); - break; - case VK_FORMAT_R16G16_SNORM: - vertex.x = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 0)) / float(0x7FFF)); - vertex.y = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 1)) / float(0x7FFF)); - break; - case VK_FORMAT_R16G16B16A16_SNORM: - vertex.x = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 0)) / float(0x7FFF)); - vertex.y = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 1)) / float(0x7FFF)); - vertex.z = max(-1.0, DEREF(INDEX(int16_t, vertex_ptr, 2)) / float(0x7FFF)); - break; - case VK_FORMAT_R8G8_SNORM: - vertex.x = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 0)) / float(0x7F)); - vertex.y = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 1)) / float(0x7F)); - break; - case VK_FORMAT_R8G8B8A8_SNORM: - vertex.x = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 0)) / float(0x7F)); - vertex.y = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 1)) / float(0x7F)); - vertex.z = max(-1.0, DEREF(INDEX(int8_t, vertex_ptr, 2)) / float(0x7F)); - break; - case VK_FORMAT_R16G16_UNORM: - vertex.x = DEREF(INDEX(uint16_t, vertex_ptr, 0)) / float(0xFFFF); - vertex.y = DEREF(INDEX(uint16_t, vertex_ptr, 1)) / float(0xFFFF); - break; - case VK_FORMAT_R16G16B16A16_UNORM: - vertex.x = DEREF(INDEX(uint16_t, vertex_ptr, 0)) / float(0xFFFF); - vertex.y = DEREF(INDEX(uint16_t, vertex_ptr, 1)) / float(0xFFFF); - vertex.z = DEREF(INDEX(uint16_t, vertex_ptr, 2)) / float(0xFFFF); - break; - case VK_FORMAT_R8G8_UNORM: - vertex.x = DEREF(INDEX(uint8_t, vertex_ptr, 0)) / float(0xFF); - vertex.y = DEREF(INDEX(uint8_t, vertex_ptr, 1)) / float(0xFF); - break; - case VK_FORMAT_R8G8B8A8_UNORM: - vertex.x = DEREF(INDEX(uint8_t, vertex_ptr, 0)) / float(0xFF); - vertex.y = DEREF(INDEX(uint8_t, vertex_ptr, 1)) / float(0xFF); - vertex.z = DEREF(INDEX(uint8_t, vertex_ptr, 2)) / float(0xFF); - break; - case VK_FORMAT_A2B10G10R10_UNORM_PACK32: { - uint32_t data = DEREF(REF(uint32_t)(vertex_ptr)); - vertex.x = float(data & 0x3FF) / 0x3FF; - vertex.y = float((data >> 10) & 0x3FF) / 0x3FF; - vertex.z = float((data >> 20) & 0x3FF) / 0x3FF; - break; - } - } - - result.vertex[i] = vertex; - } - - return result; -} - -/* A GLSL-adapted copy of VkAccelerationStructureInstanceKHR. */ -struct AccelerationStructureInstance { - float transform[12]; - uint32_t custom_instance_and_mask; - uint32_t sbt_offset_and_flags; - uint64_t accelerationStructureReference; -}; -TYPE(AccelerationStructureInstance, 64); - -void -build_instance(inout AABB bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t global_id) -{ - REF(radv_bvh_instance_node) node = REF(radv_bvh_instance_node)(dst_ptr); - - AccelerationStructureInstance instance = DEREF(REF(AccelerationStructureInstance)(src_ptr)); - if (instance.accelerationStructureReference == 0) - return; - - mat4 transform = mat4(1.0); - for (uint32_t col = 0; col < 4; col++) - for (uint32_t row = 0; row < 3; row++) - transform[col][row] = instance.transform[col + row * 4]; - - mat4 inv_transform = inverse(transform); - for (uint32_t col = 0; col < 3; col++) - for (uint32_t row = 0; row < 3; row++) - DEREF(node).wto_matrix[col + row * 4] = inv_transform[col][row]; - - DEREF(node).wto_matrix[3] = transform[3][0]; - DEREF(node).wto_matrix[7] = transform[3][1]; - DEREF(node).wto_matrix[11] = transform[3][2]; - - for (uint32_t col = 0; col < 3; col++) - for (uint32_t row = 0; row < 3; row++) - DEREF(node).otw_matrix[col + row * 3] = transform[col][row]; - - radv_accel_struct_header instance_header = - DEREF(REF(radv_accel_struct_header)(instance.accelerationStructureReference)); - DEREF(node).base_ptr = - instance.accelerationStructureReference | instance_header.root_node_offset; - - for (uint32_t comp = 0; comp < 3; ++comp) { - bounds.min[comp] = transform[3][comp]; - bounds.max[comp] = transform[3][comp]; - for (uint32_t col = 0; col < 3; ++col) { - bounds.min[comp] += min(transform[col][comp] * instance_header.aabb[0][col], - transform[col][comp] * instance_header.aabb[1][col]); - bounds.max[comp] += max(transform[col][comp] * instance_header.aabb[0][col], - transform[col][comp] * instance_header.aabb[1][col]); - } - } - - DEREF(node).custom_instance_and_mask = instance.custom_instance_and_mask; - DEREF(node).sbt_offset_and_flags = instance.sbt_offset_and_flags; - DEREF(node).instance_id = global_id; - - DEREF(node).aabb[0][0] = bounds.min.x; - DEREF(node).aabb[0][1] = bounds.min.y; - DEREF(node).aabb[0][2] = bounds.min.z; - DEREF(node).aabb[1][0] = bounds.max.x; - DEREF(node).aabb[1][1] = bounds.max.y; - DEREF(node).aabb[1][2] = bounds.max.z; -} - -void -leaf_kernel(leaf_kernel_args args, uint32_t global_id) -{ - REF(key_id_pair) id_ptr = INDEX(key_id_pair, args.ids, args.first_id + global_id); - uint32_t src_offset = global_id * args.stride; - - uint32_t dst_stride; - uint32_t node_type; - if (args.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) { - dst_stride = SIZEOF(radv_bvh_triangle_node); - node_type = radv_bvh_node_triangle; - } else if (args.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR) { - dst_stride = SIZEOF(radv_bvh_aabb_node); - node_type = radv_bvh_node_aabb; - } else { - dst_stride = SIZEOF(radv_bvh_instance_node); - node_type = radv_bvh_node_instance; - } - - uint32_t dst_offset = args.dst_offset + global_id * dst_stride; - - DEREF(id_ptr).id = pack_node_id(dst_offset, node_type); - - VOID_REF dst_ptr = OFFSET(args.bvh, dst_offset); - - AABB bounds; - if (args.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) { - triangle_indices indices = load_indices(args.indices, args.index_format, global_id); - - triangle_vertices vertices = - load_vertices(args.data, indices, args.vertex_format, args.stride); - - if (args.transform != NULL) { - mat4 transform = mat4(1.0); - - for (uint32_t col = 0; col < 4; col++) - for (uint32_t row = 0; row < 3; row++) - transform[col][row] = DEREF(INDEX(float, args.transform, col + row * 4)); - - for (uint32_t i = 0; i < 3; i++) - vertices.vertex[i] = transform * vertices.vertex[i]; - } - - REF(radv_bvh_triangle_node) node = REF(radv_bvh_triangle_node)(dst_ptr); - - bounds.min = vec3(INFINITY); - bounds.max = vec3(-INFINITY); - - for (uint32_t coord = 0; coord < 3; coord++) - for (uint32_t comp = 0; comp < 3; comp++) { - DEREF(node).coords[coord][comp] = vertices.vertex[coord][comp]; - bounds.min[comp] = min(bounds.min[comp], vertices.vertex[coord][comp]); - bounds.max[comp] = max(bounds.max[comp], vertices.vertex[coord][comp]); - } - - DEREF(node).triangle_id = global_id; - DEREF(node).geometry_id_and_flags = args.geometry_id; - DEREF(node).id = 9; - - } else if (args.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR) { - VOID_REF src_ptr = OFFSET(args.data, src_offset); - - REF(radv_bvh_aabb_node) node = REF(radv_bvh_aabb_node)(dst_ptr); - - for (uint32_t vec = 0; vec < 2; vec++) - for (uint32_t comp = 0; comp < 3; comp++) { - float coord = DEREF(INDEX(float, src_ptr, comp + vec * 3)); - DEREF(node).aabb[vec][comp] = coord; - - if (vec == 0) - bounds.min[comp] = coord; - else - bounds.max[comp] = coord; - } - - DEREF(node).primitive_id = global_id; - DEREF(node).geometry_id_and_flags = args.geometry_id; - } else { - VOID_REF src_ptr = OFFSET(args.data, src_offset); - /* arrayOfPointers */ - if (args.stride == 8) { - src_ptr = DEREF(REF(VOID_REF)(src_ptr)); - } - - build_instance(bounds, src_ptr, dst_ptr, global_id); - } - - min_float_emulated(INDEX(int32_t, args.bounds, 0), bounds.min.x); - min_float_emulated(INDEX(int32_t, args.bounds, 1), bounds.min.y); - min_float_emulated(INDEX(int32_t, args.bounds, 2), bounds.min.z); - max_float_emulated(INDEX(int32_t, args.bounds, 3), bounds.max.x); - max_float_emulated(INDEX(int32_t, args.bounds, 4), bounds.max.y); - max_float_emulated(INDEX(int32_t, args.bounds, 5), bounds.max.z); -} - -#endif diff --git a/src/amd/vulkan/bvh/meson.build b/src/amd/vulkan/bvh/meson.build index 00ee8da353c..f4b968137ea 100644 --- a/src/amd/vulkan/bvh/meson.build +++ b/src/amd/vulkan/bvh/meson.build @@ -29,9 +29,6 @@ bvh_include_dir = meson.source_root() + '/src/amd/vulkan/bvh' bvh_includes = files( 'build_helpers.h', 'bvh.h', - 'internal.h', - 'leaf.h', - 'morton.h' ) bvh_spv = [] diff --git a/src/amd/vulkan/bvh/morton.comp b/src/amd/vulkan/bvh/morton.comp index 9c33651c95f..b68a44c6ad7 100644 --- a/src/amd/vulkan/bvh/morton.comp +++ b/src/amd/vulkan/bvh/morton.comp @@ -33,20 +33,57 @@ #extension GL_EXT_buffer_reference : require #extension GL_EXT_buffer_reference2 : require -layout(scalar) uniform; -layout(scalar) buffer; - layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; -#include "morton.h" +#include "build_helpers.h" -layout(push_constant) uniform CONSTS +layout(push_constant) uniform CONSTS { + VOID_REF bvh; + REF(AABB) bounds; + REF(key_id_pair) ids; +} args; + +uint32_t +morton_component(uint32_t x) { - morton_kernel_args args; -}; + x = (x * 0x00000101u) & 0x0F00F00Fu; + x = (x * 0x00000011u) & 0xC30C30C3u; + x = (x * 0x00000005u) & 0x49249249u; + return x; +} + +uint32_t +morton_code(uint32_t x, uint32_t y, uint32_t z) +{ + return (morton_component(x) << 2) | (morton_component(y) << 1) | morton_component(z); +} + +uint32_t +lbvh_key(float x01, float y01, float z01) +{ + return morton_code(uint32_t(x01 * 255.0), uint32_t(y01 * 255.0), uint32_t(z01 * 255.0)) << 8; +} void main(void) { - morton_kernel(args, gl_GlobalInvocationID.x); + uint32_t global_id = gl_GlobalInvocationID.x; + + REF(key_id_pair) key_id = INDEX(key_id_pair, args.ids, global_id); + + uint32_t id = DEREF(key_id).id; + AABB bounds = calculate_node_bounds(args.bvh, id); + vec3 center = (bounds.min + bounds.max) * 0.5; + + AABB bvh_bounds; + bvh_bounds.min.x = load_minmax_float_emulated(VOID_REF(args.bounds)); + bvh_bounds.min.y = load_minmax_float_emulated(OFFSET(args.bounds, 4)); + bvh_bounds.min.z = load_minmax_float_emulated(OFFSET(args.bounds, 8)); + bvh_bounds.max.x = load_minmax_float_emulated(OFFSET(args.bounds, 12)); + bvh_bounds.max.y = load_minmax_float_emulated(OFFSET(args.bounds, 16)); + bvh_bounds.max.z = load_minmax_float_emulated(OFFSET(args.bounds, 20)); + + vec3 normalized_center = (center - bvh_bounds.min) / (bvh_bounds.max - bvh_bounds.min); + + DEREF(key_id).key = lbvh_key(normalized_center.x, normalized_center.y, normalized_center.z); } diff --git a/src/amd/vulkan/bvh/morton.h b/src/amd/vulkan/bvh/morton.h deleted file mode 100644 index 171bb1ed776..00000000000 --- a/src/amd/vulkan/bvh/morton.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright © 2022 Konstantin Seurer - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef BVH_MORTON_H -#define BVH_MORTON_H - -#include "build_helpers.h" - -uint32_t -morton_component(uint32_t x) -{ - x = (x * 0x00000101u) & 0x0F00F00Fu; - x = (x * 0x00000011u) & 0xC30C30C3u; - x = (x * 0x00000005u) & 0x49249249u; - return x; -} - -uint32_t -morton_code(uint32_t x, uint32_t y, uint32_t z) -{ - return (morton_component(x) << 2) | (morton_component(y) << 1) | morton_component(z); -} - -uint32_t -lbvh_key(float x01, float y01, float z01) -{ - return morton_code(uint32_t(x01 * 255.0), uint32_t(y01 * 255.0), uint32_t(z01 * 255.0)) << 8; -} - -struct morton_kernel_args { - VOID_REF bvh; - REF(AABB) bounds; - REF(key_id_pair) ids; -}; -TYPE(morton_kernel_args, 24); - -void -morton_kernel(morton_kernel_args args, uint32_t global_id) -{ - REF(key_id_pair) key_id = INDEX(key_id_pair, args.ids, global_id); - - uint32_t id = DEREF(key_id).id; - AABB bounds = calculate_node_bounds(args.bvh, id); - vec3 center = (bounds.min + bounds.max) * 0.5; - - AABB bvh_bounds; - bvh_bounds.min.x = load_minmax_float_emulated(VOID_REF(args.bounds)); - bvh_bounds.min.y = load_minmax_float_emulated(OFFSET(args.bounds, 4)); - bvh_bounds.min.z = load_minmax_float_emulated(OFFSET(args.bounds, 8)); - bvh_bounds.max.x = load_minmax_float_emulated(OFFSET(args.bounds, 12)); - bvh_bounds.max.y = load_minmax_float_emulated(OFFSET(args.bounds, 16)); - bvh_bounds.max.z = load_minmax_float_emulated(OFFSET(args.bounds, 20)); - - vec3 normalized_center = (center - bvh_bounds.min) / (bvh_bounds.max - bvh_bounds.min); - - DEREF(key_id).key = lbvh_key(normalized_center.x, normalized_center.y, normalized_center.z); -} - -#endif