mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 13:30:12 +01:00
tu: Support VK_KHR_acceleration_structure
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28447>
This commit is contained in:
parent
78b5999c1e
commit
671e3a65a6
21 changed files with 1996 additions and 5 deletions
|
|
@ -529,7 +529,7 @@ Vulkan 1.4 -- all DONE: anv, lvp, nvk, radv/gfx8+, tu/a7xx+
|
||||||
|
|
||||||
Khronos extensions that are not part of any Vulkan version:
|
Khronos extensions that are not part of any Vulkan version:
|
||||||
|
|
||||||
VK_KHR_acceleration_structure DONE (anv/gfx12.5+, lvp, radv/gfx10.3+)
|
VK_KHR_acceleration_structure DONE (anv/gfx12.5+, lvp, radv/gfx10.3+, tu/a740+)
|
||||||
VK_KHR_android_surface not started
|
VK_KHR_android_surface not started
|
||||||
VK_KHR_calibrated_timestamps DONE (anv, nvk, radv, tu/a750+)
|
VK_KHR_calibrated_timestamps DONE (anv, nvk, radv, tu/a750+)
|
||||||
VK_KHR_compute_shader_derivatives DONE (anv, nvk, radv, tu/a7xx+)
|
VK_KHR_compute_shader_derivatives DONE (anv, nvk, radv, tu/a7xx+)
|
||||||
|
|
|
||||||
|
|
@ -642,7 +642,7 @@ endif
|
||||||
prog_glslang = find_program(
|
prog_glslang = find_program(
|
||||||
'glslangValidator',
|
'glslangValidator',
|
||||||
native : true,
|
native : true,
|
||||||
required : with_vulkan_overlay_layer or with_aco_tests or with_amd_vk or with_intel_vk or with_swrast_vk
|
required : with_vulkan_overlay_layer or with_aco_tests or with_amd_vk or with_intel_vk or with_swrast_vk or with_freedreno_vk
|
||||||
)
|
)
|
||||||
|
|
||||||
if prog_glslang.found()
|
if prog_glslang.found()
|
||||||
|
|
|
||||||
106
src/freedreno/vulkan/bvh/copy.comp
Normal file
106
src/freedreno/vulkan/bvh/copy.comp
Normal file
|
|
@ -0,0 +1,106 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2022 Bas Nieuwenhuizen
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#version 460
|
||||||
|
|
||||||
|
#extension GL_GOOGLE_include_directive : require
|
||||||
|
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
|
||||||
|
#extension GL_EXT_scalar_block_layout : require
|
||||||
|
#extension GL_EXT_buffer_reference : require
|
||||||
|
#extension GL_EXT_buffer_reference2 : require
|
||||||
|
|
||||||
|
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
#include "tu_build_interface.h"
|
||||||
|
|
||||||
|
layout(push_constant) uniform CONSTS {
|
||||||
|
copy_args args;
|
||||||
|
};
|
||||||
|
|
||||||
|
void
|
||||||
|
main(void)
|
||||||
|
{
|
||||||
|
uint32_t global_id = gl_GlobalInvocationID.x;
|
||||||
|
uint32_t lanes = gl_NumWorkGroups.x * 128;
|
||||||
|
uint32_t increment = lanes * 16;
|
||||||
|
|
||||||
|
uint64_t copy_src_addr = args.src_addr;
|
||||||
|
uint64_t copy_dst_addr = args.dst_addr;
|
||||||
|
|
||||||
|
if (args.mode == TU_COPY_MODE_DESERIALIZE) {
|
||||||
|
copy_src_addr += SIZEOF(vk_accel_struct_serialization_header) +
|
||||||
|
DEREF(REF(vk_accel_struct_serialization_header)(args.src_addr)).instance_count * SIZEOF(uint64_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
REF(tu_accel_struct_header) header = REF(tu_accel_struct_header)(copy_src_addr);
|
||||||
|
|
||||||
|
uint64_t instance_base = args.src_addr + SIZEOF(vk_accel_struct_serialization_header);
|
||||||
|
uint64_t instance_offset = SIZEOF(tu_accel_struct_header);
|
||||||
|
uint64_t instance_end = DEREF(header).instance_count * SIZEOF(tu_instance_descriptor);
|
||||||
|
if (instance_end > 0)
|
||||||
|
instance_end += instance_offset;
|
||||||
|
|
||||||
|
if (args.mode == TU_COPY_MODE_SERIALIZE) {
|
||||||
|
copy_dst_addr += SIZEOF(vk_accel_struct_serialization_header) +
|
||||||
|
DEREF(REF(tu_accel_struct_header)(args.src_addr)).instance_count * SIZEOF(uint64_t);
|
||||||
|
|
||||||
|
if (global_id == 0) {
|
||||||
|
REF(vk_accel_struct_serialization_header) ser_header =
|
||||||
|
REF(vk_accel_struct_serialization_header)(args.dst_addr);
|
||||||
|
DEREF(ser_header).serialization_size = DEREF(header).serialization_size;
|
||||||
|
DEREF(ser_header).deserialization_size = DEREF(header).compacted_size;
|
||||||
|
DEREF(ser_header).instance_count = DEREF(header).instance_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
instance_base = args.dst_addr + SIZEOF(vk_accel_struct_serialization_header);
|
||||||
|
} else if (args.mode == TU_COPY_MODE_COPY) {
|
||||||
|
instance_end = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t size = DEREF(header).compacted_size;
|
||||||
|
for (uint64_t offset = global_id * 16; offset < size; offset += increment) {
|
||||||
|
DEREF(REF(uvec4)(copy_dst_addr + offset)) =
|
||||||
|
DEREF(REF(uvec4)(copy_src_addr + offset));
|
||||||
|
|
||||||
|
/* Do the adjustment inline in the same invocation that copies the data so that we don't have
|
||||||
|
* to synchronize. */
|
||||||
|
if (offset < instance_end && offset >= instance_offset &&
|
||||||
|
(offset - instance_offset) % SIZEOF(tu_instance_descriptor) == 0) {
|
||||||
|
uint64_t idx = (offset - instance_offset) / SIZEOF(tu_instance_descriptor);
|
||||||
|
|
||||||
|
uint32_t bvh_offset = DEREF(REF(tu_instance_descriptor)(copy_src_addr + offset)).bvh_offset;
|
||||||
|
if (args.mode == TU_COPY_MODE_SERIALIZE) {
|
||||||
|
DEREF(INDEX(uint64_t, instance_base, idx)) =
|
||||||
|
DEREF(REF(tu_instance_descriptor)(copy_src_addr + offset)).bvh_ptr - bvh_offset;
|
||||||
|
} else { /* TU_COPY_MODE_DESERIALIZE */
|
||||||
|
uint64_t blas_addr = DEREF(INDEX(uint64_t, instance_base, idx));
|
||||||
|
DEREF(REF(tu_instance_descriptor)(copy_dst_addr + offset)).bvh_ptr = blas_addr + bvh_offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
502
src/freedreno/vulkan/bvh/encode.comp
Normal file
502
src/freedreno/vulkan/bvh/encode.comp
Normal file
|
|
@ -0,0 +1,502 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2022 Friedrich Vock
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#version 460
|
||||||
|
|
||||||
|
#extension GL_GOOGLE_include_directive : require
|
||||||
|
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
|
||||||
|
#extension GL_EXT_scalar_block_layout : require
|
||||||
|
#extension GL_EXT_buffer_reference : require
|
||||||
|
#extension GL_EXT_buffer_reference2 : require
|
||||||
|
#extension GL_KHR_memory_scope_semantics : require
|
||||||
|
|
||||||
|
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
#include "tu_build_helpers.h"
|
||||||
|
#include "tu_build_interface.h"
|
||||||
|
|
||||||
|
layout(push_constant) uniform CONSTS {
|
||||||
|
encode_args args;
|
||||||
|
};
|
||||||
|
|
||||||
|
void set_parent(uint32_t child, uint32_t parent)
|
||||||
|
{
|
||||||
|
uint64_t addr = args.output_bvh - child * 4 - 4;
|
||||||
|
DEREF(REF(uint32_t)(addr)) = parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This encoder struct is designed to encode a compressed node without keeping
|
||||||
|
* all the data live at once, making sure register pressure isn't too high.
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct tu_encoder {
|
||||||
|
uint32_t cur_value;
|
||||||
|
uint word_offset;
|
||||||
|
uint bit_offset;
|
||||||
|
REF(tu_compressed_node) node;
|
||||||
|
};
|
||||||
|
|
||||||
|
void encode_init(out tu_encoder encoder, REF(tu_compressed_node) node)
|
||||||
|
{
|
||||||
|
encoder.cur_value = 0;
|
||||||
|
encoder.word_offset = 0;
|
||||||
|
encoder.bit_offset = 0;
|
||||||
|
encoder.node = node;
|
||||||
|
}
|
||||||
|
|
||||||
|
void encode(inout tu_encoder encoder, uint32_t val, uint bits)
|
||||||
|
{
|
||||||
|
encoder.cur_value |= val << encoder.bit_offset;
|
||||||
|
if (encoder.bit_offset + bits >= 32) {
|
||||||
|
DEREF(encoder.node).data[encoder.word_offset] = encoder.cur_value;
|
||||||
|
encoder.cur_value = val >> (32 - encoder.bit_offset);
|
||||||
|
encoder.word_offset++;
|
||||||
|
encoder.bit_offset = encoder.bit_offset + bits - 32;
|
||||||
|
} else {
|
||||||
|
encoder.bit_offset += bits;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void encode_skip(inout tu_encoder encoder, uint bits)
|
||||||
|
{
|
||||||
|
if (encoder.bit_offset + bits >= 32) {
|
||||||
|
DEREF(encoder.node).data[encoder.word_offset] = encoder.cur_value;
|
||||||
|
encoder.word_offset++;
|
||||||
|
encoder.bit_offset = encoder.bit_offset + bits - 32;
|
||||||
|
} else {
|
||||||
|
encoder.bit_offset += bits;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void encode_finalize(tu_encoder encoder)
|
||||||
|
{
|
||||||
|
DEREF(encoder.node).data[encoder.word_offset] = encoder.cur_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
encode_leaf_node(uint32_t type, uint64_t src_node, uint64_t dst_node, uint64_t dst_instances, REF(tu_accel_struct_header) dst_header)
|
||||||
|
{
|
||||||
|
float coords[3][3];
|
||||||
|
uint32_t id;
|
||||||
|
uint32_t geometry_id;
|
||||||
|
uint32_t type_flags = TU_NODE_TYPE_LEAF;
|
||||||
|
|
||||||
|
switch (type) {
|
||||||
|
case vk_ir_node_triangle: {
|
||||||
|
vk_ir_triangle_node src = DEREF(REF(vk_ir_triangle_node)(src_node));
|
||||||
|
|
||||||
|
coords = src.coords;
|
||||||
|
uint32_t geometry_id_and_flags = src.geometry_id_and_flags;
|
||||||
|
if ((geometry_id_and_flags & VK_GEOMETRY_OPAQUE) != 0) {
|
||||||
|
atomicAnd(DEREF(dst_header).instance_flags, ~TU_INSTANCE_ALL_NONOPAQUE);
|
||||||
|
} else {
|
||||||
|
type_flags |= TU_NODE_TYPE_NONOPAQUE;
|
||||||
|
atomicAnd(DEREF(dst_header).instance_flags, ~TU_INSTANCE_ALL_OPAQUE);
|
||||||
|
}
|
||||||
|
geometry_id = geometry_id_and_flags & 0xffffff;
|
||||||
|
id = src.triangle_id;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case vk_ir_node_aabb: {
|
||||||
|
vk_ir_aabb_node src = DEREF(REF(vk_ir_aabb_node)(src_node));
|
||||||
|
vk_aabb aabb = src.base.aabb;
|
||||||
|
coords[0][0] = aabb.min[0];
|
||||||
|
coords[0][1] = aabb.min[1];
|
||||||
|
coords[0][2] = aabb.min[2];
|
||||||
|
coords[1][0] = aabb.max[0];
|
||||||
|
coords[1][1] = aabb.max[1];
|
||||||
|
coords[1][2] = aabb.max[2];
|
||||||
|
|
||||||
|
type_flags |= TU_NODE_TYPE_AABB;
|
||||||
|
|
||||||
|
if ((src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE) != 0) {
|
||||||
|
atomicAnd(DEREF(dst_header).instance_flags, ~TU_INSTANCE_ALL_NONOPAQUE);
|
||||||
|
} else {
|
||||||
|
type_flags |= TU_NODE_TYPE_NONOPAQUE;
|
||||||
|
atomicAnd(DEREF(dst_header).instance_flags, ~TU_INSTANCE_ALL_OPAQUE);
|
||||||
|
}
|
||||||
|
geometry_id = src.geometry_id_and_flags & 0xffffff;
|
||||||
|
id = src.primitive_id;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case vk_ir_node_instance: {
|
||||||
|
vk_ir_instance_node src = DEREF(REF(vk_ir_instance_node)(src_node));
|
||||||
|
|
||||||
|
id = src.instance_id;
|
||||||
|
geometry_id = 0;
|
||||||
|
REF(tu_instance_descriptor) dst_instance = REF(tu_instance_descriptor)(dst_instances + SIZEOF(tu_instance_descriptor) * id);
|
||||||
|
|
||||||
|
REF(tu_accel_struct_header) blas_header = REF(tu_accel_struct_header)(src.base_ptr);
|
||||||
|
uint64_t bvh_ptr = DEREF(blas_header).bvh_ptr;
|
||||||
|
uint32_t bvh_offset = uint32_t(bvh_ptr - src.base_ptr);
|
||||||
|
|
||||||
|
uint32_t sbt_offset_and_flags = src.sbt_offset_and_flags;
|
||||||
|
uint32_t custom_instance_and_mask = src.custom_instance_and_mask;
|
||||||
|
DEREF(dst_instance).bvh_ptr = bvh_ptr;
|
||||||
|
DEREF(dst_instance).custom_instance_index = custom_instance_and_mask & 0xffffffu;
|
||||||
|
DEREF(dst_instance).sbt_offset_and_flags = sbt_offset_and_flags;
|
||||||
|
DEREF(dst_instance).bvh_offset = bvh_offset;
|
||||||
|
|
||||||
|
mat4 transform = mat4(src.otw_matrix);
|
||||||
|
|
||||||
|
mat4 inv_transform = transpose(inverse(transpose(transform)));
|
||||||
|
DEREF(dst_instance).wto_matrix = mat3x4(inv_transform);
|
||||||
|
DEREF(dst_instance).otw_matrix = mat3x4(transform);
|
||||||
|
|
||||||
|
vk_aabb aabb = src.base.aabb;
|
||||||
|
coords[0][0] = aabb.min[0];
|
||||||
|
coords[0][1] = aabb.min[1];
|
||||||
|
coords[0][2] = aabb.min[2];
|
||||||
|
coords[1][0] = aabb.max[0];
|
||||||
|
coords[1][1] = aabb.max[1];
|
||||||
|
coords[1][2] = aabb.max[2];
|
||||||
|
|
||||||
|
type_flags |= TU_NODE_TYPE_TLAS;
|
||||||
|
|
||||||
|
uint32_t instance_flags = DEREF(blas_header).instance_flags;
|
||||||
|
|
||||||
|
/* Apply VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR and
|
||||||
|
* VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR to correct the
|
||||||
|
* ALL_OPAQUE/ALL_NONOPAQUE flags.
|
||||||
|
*/
|
||||||
|
if (((sbt_offset_and_flags >> 24) & (VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR |
|
||||||
|
VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR)) != 0) {
|
||||||
|
instance_flags &= ~(VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR |
|
||||||
|
VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR);
|
||||||
|
instance_flags |= (sbt_offset_and_flags >> 24) & (VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR |
|
||||||
|
VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR);
|
||||||
|
}
|
||||||
|
uint32_t cull_mask_and_flags = ((custom_instance_and_mask >> 16) & 0xff00) | instance_flags;
|
||||||
|
|
||||||
|
coords[2][0] = uintBitsToFloat(cull_mask_and_flags);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
REF(tu_leaf_node) dst = REF(tu_leaf_node)(dst_node);
|
||||||
|
DEREF(dst).coords = coords;
|
||||||
|
DEREF(dst).id = id;
|
||||||
|
DEREF(dst).geometry_id = geometry_id;
|
||||||
|
DEREF(dst).type_flags = type_flags;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Truncate to bfloat16 while rounding down. bfloat16 is used to store the bases.
|
||||||
|
*/
|
||||||
|
|
||||||
|
u16vec3 to_bfloat_round_down(vec3 coord)
|
||||||
|
{
|
||||||
|
u32vec3 icoord = floatBitsToUint(coord);
|
||||||
|
return u16vec3(mix(icoord >> 16, (icoord + 0xffff) >> 16, notEqual(icoord & u32vec3(0x80000000), u32vec3(0))));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Approximate subtraction while rounding up. Return a result greater than or
|
||||||
|
* equal to the infinitely-precise result. This just uses the native
|
||||||
|
* subtraction and then shifts one ULP towards infinity. Because the result is
|
||||||
|
* further rounded, it should usually be good enough while being faster than
|
||||||
|
* emulated floating-point math.
|
||||||
|
*
|
||||||
|
* We assume here that the result is always nonnegative, because it's only used
|
||||||
|
* to subtract away the base.
|
||||||
|
*/
|
||||||
|
|
||||||
|
vec3 subtract_round_up_approx(vec3 a, vec3 b)
|
||||||
|
{
|
||||||
|
vec3 f = a - b;
|
||||||
|
u32vec3 i = floatBitsToUint(f);
|
||||||
|
|
||||||
|
i++;
|
||||||
|
|
||||||
|
/* Handle infinity/zero special cases */
|
||||||
|
i = mix(i, floatBitsToUint(f), isinf(f));
|
||||||
|
i = mix(i, floatBitsToUint(f), equal(f, vec3(0)));
|
||||||
|
|
||||||
|
return uintBitsToFloat(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
vec3 subtract_round_down_approx(vec3 a, vec3 b)
|
||||||
|
{
|
||||||
|
vec3 f = a - b;
|
||||||
|
u32vec3 i = floatBitsToUint(f);
|
||||||
|
|
||||||
|
i--;
|
||||||
|
|
||||||
|
/* Handle infinity/zero special cases */
|
||||||
|
i = mix(i, floatBitsToUint(f), isinf(f));
|
||||||
|
i = mix(i, floatBitsToUint(f), equal(f, vec3(0)));
|
||||||
|
|
||||||
|
return uintBitsToFloat(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32vec3 extract_mantissa(vec3 f)
|
||||||
|
{
|
||||||
|
return mix((floatBitsToUint(f) & 0x7fffff) | 0x800000, u32vec3(0), equal(f, vec3(0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
encode_internal_node(uint32_t children[8], uint32_t children_offset, uint child_count,
|
||||||
|
vec3 min_offset, vec3 max_offset, uint32_t bvh_offset)
|
||||||
|
{
|
||||||
|
REF(tu_internal_node) dst_node = REF(tu_internal_node)(OFFSET(args.output_bvh, SIZEOF(tu_internal_node) * bvh_offset));
|
||||||
|
|
||||||
|
DEREF(dst_node).id = children_offset;
|
||||||
|
|
||||||
|
u16vec3 base_bfloat = to_bfloat_round_down(min_offset);
|
||||||
|
vec3 base_float = uintBitsToFloat(u32vec3(base_bfloat) << 16);
|
||||||
|
DEREF(dst_node).bases[0] = base_bfloat.x;
|
||||||
|
DEREF(dst_node).bases[1] = base_bfloat.y;
|
||||||
|
DEREF(dst_node).bases[2] = base_bfloat.z;
|
||||||
|
|
||||||
|
vec3 children_max = subtract_round_up_approx(max_offset, base_float);
|
||||||
|
|
||||||
|
/* The largest child offset will be encoded in 8 bits, including the
|
||||||
|
* explicit leading 1. We need to downcast to this precision while rounding
|
||||||
|
* up to catch cases where the exponent is increased by rounding up, then
|
||||||
|
* extract the exponent. Because children_max is always nonnegative, we can
|
||||||
|
* do the downcast with "(floatBitsToUint(children_max) + 0xffff) >> 16",
|
||||||
|
* and then we further shift to get the rounded exponent.
|
||||||
|
*/
|
||||||
|
u16vec3 exponents = u16vec3((floatBitsToUint(children_max) + 0xffff) >> 23);
|
||||||
|
u8vec3 exponents_u8 = u8vec3(exponents);
|
||||||
|
DEREF(dst_node).exponents[0] = exponents_u8.x;
|
||||||
|
DEREF(dst_node).exponents[1] = exponents_u8.y;
|
||||||
|
DEREF(dst_node).exponents[2] = exponents_u8.z;
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < child_count; i++) {
|
||||||
|
uint32_t offset = ir_id_to_offset(children[i]);
|
||||||
|
|
||||||
|
vk_aabb child_aabb =
|
||||||
|
DEREF(REF(vk_ir_node)OFFSET(args.intermediate_bvh, offset)).aabb;
|
||||||
|
|
||||||
|
/* Note: because we subtract from the minimum, we should never have a
|
||||||
|
* negative value here.
|
||||||
|
*/
|
||||||
|
vec3 child_min = subtract_round_down_approx(child_aabb.min, base_float);
|
||||||
|
vec3 child_max = subtract_round_up_approx(child_aabb.max, base_float);
|
||||||
|
|
||||||
|
u16vec3 child_min_exponents = u16vec3(floatBitsToUint(child_min) >> 23);
|
||||||
|
u16vec3 child_max_exponents = u16vec3(floatBitsToUint(child_max) >> 23);
|
||||||
|
|
||||||
|
u16vec3 child_min_shift = u16vec3(16) + exponents - child_min_exponents;
|
||||||
|
/* Divide the mantissa by 2**child_min_shift, rounding down */
|
||||||
|
u8vec3 child_min_mantissas =
|
||||||
|
mix(u8vec3(extract_mantissa(child_min) >> child_min_shift), u8vec3(0),
|
||||||
|
greaterThanEqual(child_min_shift, u16vec3(32)));
|
||||||
|
u16vec3 child_max_shift = u16vec3(16) + exponents - child_max_exponents;
|
||||||
|
/* Divide the mantissa by 2**child_max_shift, rounding up */
|
||||||
|
u8vec3 child_max_mantissas =
|
||||||
|
mix(u8vec3((extract_mantissa(child_max) + ((u32vec3(1u) << u32vec3(child_max_shift)) - 1)) >> child_max_shift),
|
||||||
|
u8vec3(notEqual(extract_mantissa(child_max), u32vec3(0))),
|
||||||
|
greaterThanEqual(child_max_shift, u16vec3(32)));
|
||||||
|
|
||||||
|
DEREF(dst_node).mantissas[i][0][0] = child_min_mantissas.x;
|
||||||
|
DEREF(dst_node).mantissas[i][0][1] = child_min_mantissas.y;
|
||||||
|
DEREF(dst_node).mantissas[i][0][2] = child_min_mantissas.z;
|
||||||
|
DEREF(dst_node).mantissas[i][1][0] = child_max_mantissas.x;
|
||||||
|
DEREF(dst_node).mantissas[i][1][1] = child_max_mantissas.y;
|
||||||
|
DEREF(dst_node).mantissas[i][1][2] = child_max_mantissas.z;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = child_count; i < 8; i++) {
|
||||||
|
DEREF(dst_node).mantissas[i][0][0] = uint8_t(0xff);
|
||||||
|
DEREF(dst_node).mantissas[i][0][1] = uint8_t(0xff);
|
||||||
|
DEREF(dst_node).mantissas[i][0][2] = uint8_t(0xff);
|
||||||
|
DEREF(dst_node).mantissas[i][1][0] = uint8_t(0);
|
||||||
|
DEREF(dst_node).mantissas[i][1][1] = uint8_t(0);
|
||||||
|
DEREF(dst_node).mantissas[i][1][2] = uint8_t(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
DEREF(dst_node).child_count = uint8_t(child_count);
|
||||||
|
DEREF(dst_node).type_flags = uint16_t(args.geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR ? (TU_NODE_TYPE_TLAS >> 16) : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
main()
|
||||||
|
{
|
||||||
|
/* Revert the order so we start at the root */
|
||||||
|
uint32_t global_id = DEREF(args.header).ir_internal_node_count - 1 - gl_GlobalInvocationID.x;
|
||||||
|
|
||||||
|
uint32_t intermediate_leaf_node_size;
|
||||||
|
switch (args.geometry_type) {
|
||||||
|
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
|
||||||
|
intermediate_leaf_node_size = SIZEOF(vk_ir_triangle_node);
|
||||||
|
break;
|
||||||
|
case VK_GEOMETRY_TYPE_AABBS_KHR:
|
||||||
|
intermediate_leaf_node_size = SIZEOF(vk_ir_aabb_node);
|
||||||
|
break;
|
||||||
|
default: /* instances */
|
||||||
|
intermediate_leaf_node_size = SIZEOF(vk_ir_instance_node);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t intermediate_leaf_nodes_size = args.leaf_node_count * intermediate_leaf_node_size;
|
||||||
|
|
||||||
|
REF(vk_ir_box_node) intermediate_internal_nodes =
|
||||||
|
REF(vk_ir_box_node)OFFSET(args.intermediate_bvh, intermediate_leaf_nodes_size);
|
||||||
|
REF(vk_ir_box_node) src_node = INDEX(vk_ir_box_node, intermediate_internal_nodes, global_id);
|
||||||
|
vk_ir_box_node src = DEREF(src_node);
|
||||||
|
|
||||||
|
uint64_t dst_instances = args.output_bvh - args.output_bvh_offset + SIZEOF(tu_accel_struct_header);
|
||||||
|
|
||||||
|
bool is_root_node = global_id == DEREF(args.header).ir_internal_node_count - 1;
|
||||||
|
|
||||||
|
REF(tu_accel_struct_header) header = REF(tu_accel_struct_header)(args.output_bvh - args.output_bvh_offset);
|
||||||
|
|
||||||
|
if (is_root_node) {
|
||||||
|
DEREF(header).instance_flags =
|
||||||
|
(args.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR ? TU_INSTANCE_ALL_AABB : 0) |
|
||||||
|
/* These will be removed when processing leaf nodes */
|
||||||
|
TU_INSTANCE_ALL_NONOPAQUE | TU_INSTANCE_ALL_OPAQUE;
|
||||||
|
DEREF(args.header).dst_node_offset = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
/* Make changes to the current node's BVH offset value visible. */
|
||||||
|
memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
|
||||||
|
gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
|
||||||
|
|
||||||
|
uint32_t bvh_offset = is_root_node ? 0 : DEREF(src_node).bvh_offset;
|
||||||
|
if (bvh_offset == VK_UNKNOWN_BVH_OFFSET)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (bvh_offset == VK_NULL_BVH_OFFSET)
|
||||||
|
break;
|
||||||
|
|
||||||
|
uint32_t found_child_count = 0;
|
||||||
|
uint32_t children[8] = {VK_BVH_INVALID_NODE, VK_BVH_INVALID_NODE,
|
||||||
|
VK_BVH_INVALID_NODE, VK_BVH_INVALID_NODE,
|
||||||
|
VK_BVH_INVALID_NODE, VK_BVH_INVALID_NODE,
|
||||||
|
VK_BVH_INVALID_NODE, VK_BVH_INVALID_NODE};
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < 2; ++i)
|
||||||
|
if (src.children[i] != VK_BVH_INVALID_NODE)
|
||||||
|
children[found_child_count++] = src.children[i];
|
||||||
|
|
||||||
|
while (found_child_count < 8) {
|
||||||
|
int32_t collapsed_child_index = -1;
|
||||||
|
float largest_surface_area = -INFINITY;
|
||||||
|
|
||||||
|
for (int32_t i = 0; i < found_child_count; ++i) {
|
||||||
|
if (ir_id_to_type(children[i]) != vk_ir_node_internal)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
vk_aabb bounds =
|
||||||
|
DEREF(REF(vk_ir_node)OFFSET(args.intermediate_bvh,
|
||||||
|
ir_id_to_offset(children[i]))).aabb;
|
||||||
|
|
||||||
|
float surface_area = aabb_surface_area(bounds);
|
||||||
|
if (surface_area > largest_surface_area) {
|
||||||
|
largest_surface_area = surface_area;
|
||||||
|
collapsed_child_index = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (collapsed_child_index != -1) {
|
||||||
|
REF(vk_ir_box_node) child_node =
|
||||||
|
REF(vk_ir_box_node)OFFSET(args.intermediate_bvh,
|
||||||
|
ir_id_to_offset(children[collapsed_child_index]));
|
||||||
|
uint32_t grandchildren[2] = DEREF(child_node).children;
|
||||||
|
uint32_t valid_grandchild_count = 0;
|
||||||
|
|
||||||
|
if (grandchildren[1] != VK_BVH_INVALID_NODE)
|
||||||
|
++valid_grandchild_count;
|
||||||
|
|
||||||
|
if (grandchildren[0] != VK_BVH_INVALID_NODE)
|
||||||
|
++valid_grandchild_count;
|
||||||
|
else
|
||||||
|
grandchildren[0] = grandchildren[1];
|
||||||
|
|
||||||
|
if (valid_grandchild_count > 1)
|
||||||
|
children[found_child_count++] = grandchildren[1];
|
||||||
|
|
||||||
|
if (valid_grandchild_count > 0)
|
||||||
|
children[collapsed_child_index] = grandchildren[0];
|
||||||
|
else {
|
||||||
|
found_child_count--;
|
||||||
|
children[collapsed_child_index] = children[found_child_count];
|
||||||
|
}
|
||||||
|
|
||||||
|
DEREF(child_node).bvh_offset = VK_NULL_BVH_OFFSET;
|
||||||
|
} else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If there is only one child, collapse the current node by setting the
|
||||||
|
* child's offset to this node's offset. Otherwise, use an atomic to
|
||||||
|
* allocate contiguous space for all of the children.
|
||||||
|
*/
|
||||||
|
uint32_t children_offset = bvh_offset;
|
||||||
|
if (found_child_count > 1) {
|
||||||
|
children_offset = atomicAdd(DEREF(args.header).dst_node_offset, found_child_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
vec3 min_offset = vec3(INFINITY);
|
||||||
|
vec3 max_offset = vec3(-INFINITY);
|
||||||
|
for (uint32_t i = 0; i < found_child_count; ++i) {
|
||||||
|
uint32_t type = ir_id_to_type(children[i]);
|
||||||
|
uint32_t offset = ir_id_to_offset(children[i]);
|
||||||
|
uint32_t dst_offset;
|
||||||
|
|
||||||
|
dst_offset = children_offset + i;
|
||||||
|
|
||||||
|
if (type == vk_ir_node_internal) {
|
||||||
|
REF(vk_ir_box_node) child_node = REF(vk_ir_box_node)OFFSET(args.intermediate_bvh, offset);
|
||||||
|
DEREF(child_node).bvh_offset = dst_offset;
|
||||||
|
} else {
|
||||||
|
encode_leaf_node(type, args.intermediate_bvh + offset,
|
||||||
|
args.output_bvh + SIZEOF(tu_internal_node) * dst_offset, dst_instances,
|
||||||
|
header);
|
||||||
|
}
|
||||||
|
|
||||||
|
vk_aabb child_aabb =
|
||||||
|
DEREF(REF(vk_ir_node)OFFSET(args.intermediate_bvh, offset)).aabb;
|
||||||
|
|
||||||
|
min_offset = min(min_offset, child_aabb.min);
|
||||||
|
max_offset = max(max_offset, child_aabb.max);
|
||||||
|
|
||||||
|
if (found_child_count > 1) {
|
||||||
|
set_parent(dst_offset, bvh_offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Make changes to the children's BVH offset value available to the other invocations. */
|
||||||
|
memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
|
||||||
|
gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
|
||||||
|
|
||||||
|
if (found_child_count > 1 || found_child_count == 0)
|
||||||
|
encode_internal_node(children, children_offset, found_child_count, min_offset, max_offset, bvh_offset);
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_root_node) {
|
||||||
|
DEREF(header).aabb = src.base.aabb;
|
||||||
|
DEREF(header).bvh_ptr = args.output_bvh;
|
||||||
|
|
||||||
|
set_parent(0, VK_BVH_INVALID_NODE);
|
||||||
|
}
|
||||||
|
}
|
||||||
65
src/freedreno/vulkan/bvh/header.comp
Normal file
65
src/freedreno/vulkan/bvh/header.comp
Normal file
|
|
@ -0,0 +1,65 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2024 Valve Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#version 460
|
||||||
|
|
||||||
|
#extension GL_GOOGLE_include_directive : require
|
||||||
|
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
|
||||||
|
#extension GL_EXT_scalar_block_layout : require
|
||||||
|
#extension GL_EXT_buffer_reference : require
|
||||||
|
#extension GL_EXT_buffer_reference2 : require
|
||||||
|
|
||||||
|
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
#include "tu_build_interface.h"
|
||||||
|
|
||||||
|
layout(push_constant) uniform CONSTS
|
||||||
|
{
|
||||||
|
header_args args;
|
||||||
|
};
|
||||||
|
|
||||||
|
void
|
||||||
|
main(void)
|
||||||
|
{
|
||||||
|
uint32_t compacted_size = args.bvh_offset + DEREF(args.src).dst_node_offset * SIZEOF(tu_internal_node);
|
||||||
|
|
||||||
|
uint32_t serialization_size = compacted_size + SIZEOF(uint64_t) * args.instance_count;
|
||||||
|
|
||||||
|
uint32_t size = serialization_size - SIZEOF(vk_accel_struct_serialization_header) -
|
||||||
|
SIZEOF(uint64_t) * args.instance_count;
|
||||||
|
|
||||||
|
DEREF(args.dst).compacted_size = compacted_size;
|
||||||
|
|
||||||
|
DEREF(args.dst).copy_dispatch_size[0] = DIV_ROUND_UP(compacted_size, 16 * 128);
|
||||||
|
DEREF(args.dst).copy_dispatch_size[1] = 1;
|
||||||
|
DEREF(args.dst).copy_dispatch_size[2] = 1;
|
||||||
|
|
||||||
|
DEREF(args.dst).serialization_size = serialization_size;
|
||||||
|
|
||||||
|
DEREF(args.dst).size = size;
|
||||||
|
}
|
||||||
66
src/freedreno/vulkan/bvh/meson.build
Normal file
66
src/freedreno/vulkan/bvh/meson.build
Normal file
|
|
@ -0,0 +1,66 @@
|
||||||
|
# Copyright © 2022 Konstantin Seurer
|
||||||
|
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
|
# in the Software without restriction, including without limitation the rights
|
||||||
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
# copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
# The above copyright notice and this permission notice shall be included in
|
||||||
|
# all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
# SOFTWARE.
|
||||||
|
|
||||||
|
# source file, output name, defines
|
||||||
|
bvh_shaders = [
|
||||||
|
[
|
||||||
|
'encode.comp',
|
||||||
|
'encode',
|
||||||
|
[],
|
||||||
|
],
|
||||||
|
[
|
||||||
|
'header.comp',
|
||||||
|
'header',
|
||||||
|
[],
|
||||||
|
],
|
||||||
|
[
|
||||||
|
'copy.comp',
|
||||||
|
'copy',
|
||||||
|
[]
|
||||||
|
],
|
||||||
|
]
|
||||||
|
|
||||||
|
tu_bvh_include_dir = dir_source_root + '/src/freedreno/vulkan/bvh'
|
||||||
|
|
||||||
|
tu_bvh_includes = files(
|
||||||
|
'tu_build_helpers.h',
|
||||||
|
'tu_build_interface.h',
|
||||||
|
'tu_bvh.h',
|
||||||
|
)
|
||||||
|
|
||||||
|
bvh_spv = []
|
||||||
|
foreach s : bvh_shaders
|
||||||
|
command = [
|
||||||
|
prog_glslang, '-V', '-I' + vk_bvh_include_dir, '-I' + tu_bvh_include_dir, '--target-env', 'spirv1.5', '-x', '-o', '@OUTPUT@', '@INPUT@'
|
||||||
|
]
|
||||||
|
command += glslang_quiet
|
||||||
|
|
||||||
|
foreach define : s[2]
|
||||||
|
command += '-D' + define
|
||||||
|
endforeach
|
||||||
|
|
||||||
|
bvh_spv += custom_target(
|
||||||
|
s[1] + '.spv.h',
|
||||||
|
input : s[0],
|
||||||
|
output : s[1] + '.spv.h',
|
||||||
|
command : command,
|
||||||
|
depend_files: [vk_bvh_includes, tu_bvh_includes],
|
||||||
|
)
|
||||||
|
endforeach
|
||||||
37
src/freedreno/vulkan/bvh/tu_build_helpers.h
Normal file
37
src/freedreno/vulkan/bvh/tu_build_helpers.h
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2024 Valve Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TU_BVH_BUILD_HELPERS_H
|
||||||
|
#define TU_BVH_BUILD_HELPERS_H
|
||||||
|
|
||||||
|
#include "vk_build_helpers.h"
|
||||||
|
#include "tu_bvh.h"
|
||||||
|
|
||||||
|
TYPE(tu_accel_struct_header, 8);
|
||||||
|
TYPE(tu_leaf_node, 4);
|
||||||
|
TYPE(tu_internal_node, 4);
|
||||||
|
TYPE(tu_compressed_node, 4);
|
||||||
|
TYPE(tu_instance_descriptor, 8);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
64
src/freedreno/vulkan/bvh/tu_build_interface.h
Normal file
64
src/freedreno/vulkan/bvh/tu_build_interface.h
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2022 Konstantin Seurer
|
||||||
|
* Copyright © 2024 Valve Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TU_BVH_BUILD_INTERFACE_H
|
||||||
|
#define TU_BVH_BUILD_INTERFACE_H
|
||||||
|
|
||||||
|
#ifdef VULKAN
|
||||||
|
#include "tu_build_helpers.h"
|
||||||
|
#else
|
||||||
|
#include <stdint.h>
|
||||||
|
#include "tu_bvh.h"
|
||||||
|
#define REF(type) uint64_t
|
||||||
|
#define VOID_REF uint64_t
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct encode_args {
|
||||||
|
VOID_REF intermediate_bvh;
|
||||||
|
VOID_REF output_bvh;
|
||||||
|
REF(vk_ir_header) header;
|
||||||
|
uint32_t output_bvh_offset;
|
||||||
|
uint32_t leaf_node_count;
|
||||||
|
uint32_t geometry_type;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct header_args {
|
||||||
|
REF(vk_ir_header) src;
|
||||||
|
REF(tu_accel_struct_header) dst;
|
||||||
|
uint32_t bvh_offset;
|
||||||
|
uint32_t instance_count;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define TU_COPY_MODE_COPY 0
|
||||||
|
#define TU_COPY_MODE_SERIALIZE 1
|
||||||
|
#define TU_COPY_MODE_DESERIALIZE 2
|
||||||
|
|
||||||
|
struct copy_args {
|
||||||
|
VOID_REF src_addr;
|
||||||
|
VOID_REF dst_addr;
|
||||||
|
uint32_t mode;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
138
src/freedreno/vulkan/bvh/tu_bvh.h
Normal file
138
src/freedreno/vulkan/bvh/tu_bvh.h
Normal file
|
|
@ -0,0 +1,138 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2021 Bas Nieuwenhuizen
|
||||||
|
* Copyright © 2024 Valve Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TU_BVH_H
|
||||||
|
#define TU_BVH_H
|
||||||
|
|
||||||
|
#ifdef VULKAN
|
||||||
|
#define VK_UUID_SIZE 16
|
||||||
|
#else
|
||||||
|
#include <vulkan/vulkan.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "vk_bvh.h"
|
||||||
|
|
||||||
|
/* The size in bytes of each record in the D3D-style UAV descriptor for
|
||||||
|
* acceleration structures. The first record is the acceleration struct header
|
||||||
|
* and the rest are the instances.
|
||||||
|
*/
|
||||||
|
#define AS_RECORD_SIZE 128
|
||||||
|
|
||||||
|
/* The size of a BVH node as defined by the HW. */
|
||||||
|
#define AS_NODE_SIZE 64
|
||||||
|
|
||||||
|
struct tu_accel_struct_header {
|
||||||
|
vk_aabb aabb;
|
||||||
|
|
||||||
|
uint64_t bvh_ptr;
|
||||||
|
|
||||||
|
/* This word contains flags that should be set in the leaf nodes for
|
||||||
|
* instances pointing to this BLAS. ALL_NODES_{OPAQUE_NONOPAQUE} may be
|
||||||
|
* modified by the FORCE_OPAQUE and FORCE_NON_OPAQUE instance flags.
|
||||||
|
*/
|
||||||
|
uint32_t instance_flags;
|
||||||
|
|
||||||
|
/* Everything after this gets either updated/copied from the CPU or written by header.comp. */
|
||||||
|
uint32_t copy_dispatch_size[3];
|
||||||
|
|
||||||
|
uint64_t compacted_size;
|
||||||
|
uint64_t serialization_size;
|
||||||
|
uint64_t size;
|
||||||
|
|
||||||
|
/* Everything after this gets updated/copied from the CPU. */
|
||||||
|
uint64_t instance_count;
|
||||||
|
|
||||||
|
uint64_t self_ptr;
|
||||||
|
|
||||||
|
uint32_t padding[10];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* See
|
||||||
|
* https://gitlab.freedesktop.org/freedreno/freedreno/-/wikis/a7xx-ray-tracing
|
||||||
|
* for details of the encoding.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define TU_NODE_TYPE_TLAS (1u << 24)
|
||||||
|
#define TU_NODE_TYPE_LEAF (1u << 25)
|
||||||
|
#define TU_NODE_TYPE_NONOPAQUE (1u << 26)
|
||||||
|
#define TU_NODE_TYPE_AABB (1u << 27)
|
||||||
|
|
||||||
|
#define TU_INTERSECTION_TYPE_TLAS (1u << 8)
|
||||||
|
#define TU_INTERSECTION_TYPE_LEAF (1u << 9)
|
||||||
|
#define TU_INTERSECTION_TYPE_NONOPAQUE (1u << 10)
|
||||||
|
#define TU_INTERSECTION_TYPE_AABB (1u << 11)
|
||||||
|
#define TU_INTERSECTION_BACK_FACE (1u << 12)
|
||||||
|
|
||||||
|
#define TU_INSTANCE_ALL_OPAQUE (1u << 2)
|
||||||
|
#define TU_INSTANCE_ALL_NONOPAQUE (1u << 3)
|
||||||
|
#define TU_INSTANCE_ALL_AABB (1u << 6)
|
||||||
|
|
||||||
|
struct tu_leaf_node {
|
||||||
|
uint32_t id;
|
||||||
|
float coords[3][3];
|
||||||
|
uint32_t geometry_id; /* Ignored by HW, we use it to stash the geometry ID */
|
||||||
|
uint32_t padding[4];
|
||||||
|
uint32_t type_flags;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct tu_internal_node {
|
||||||
|
uint32_t id;
|
||||||
|
uint16_t bases[3];
|
||||||
|
uint8_t mantissas[8][2][3];
|
||||||
|
uint8_t exponents[3];
|
||||||
|
uint8_t child_count;
|
||||||
|
uint16_t type_flags;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct tu_compressed_node {
|
||||||
|
uint32_t id;
|
||||||
|
uint32_t bases[3];
|
||||||
|
uint32_t data[12];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct tu_instance_descriptor {
|
||||||
|
uint64_t bvh_ptr;
|
||||||
|
|
||||||
|
uint32_t custom_instance_index;
|
||||||
|
|
||||||
|
/* lower 24 bits are the sbt offset, upper 8 bits are the
|
||||||
|
* VkGeometryInstanceFlagsKHR
|
||||||
|
*/
|
||||||
|
uint32_t sbt_offset_and_flags;
|
||||||
|
|
||||||
|
mat3x4 wto_matrix;
|
||||||
|
|
||||||
|
uint32_t bvh_offset;
|
||||||
|
|
||||||
|
/* Pad to make the size a power of 2 so that addressing math is
|
||||||
|
* simplified.
|
||||||
|
*/
|
||||||
|
uint32_t reserved[3];
|
||||||
|
|
||||||
|
/* Object to world matrix inverted from the initial transform. */
|
||||||
|
mat3x4 otw_matrix;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
@ -19,7 +19,10 @@ tu_entrypoints = custom_target(
|
||||||
|
|
||||||
|
|
||||||
libtu_files = files(
|
libtu_files = files(
|
||||||
|
'bvh/tu_bvh.h',
|
||||||
|
'bvh/tu_build_interface.h',
|
||||||
'layers/tu_rmv_layer.cc',
|
'layers/tu_rmv_layer.cc',
|
||||||
|
'tu_acceleration_structure.cc',
|
||||||
'tu_autotune.cc',
|
'tu_autotune.cc',
|
||||||
'tu_buffer.cc',
|
'tu_buffer.cc',
|
||||||
'tu_buffer_view.cc',
|
'tu_buffer_view.cc',
|
||||||
|
|
@ -47,6 +50,8 @@ libtu_files = files(
|
||||||
'tu_util.cc',
|
'tu_util.cc',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
subdir('bvh')
|
||||||
|
|
||||||
libtu_includes = [
|
libtu_includes = [
|
||||||
inc_include,
|
inc_include,
|
||||||
inc_src,
|
inc_src,
|
||||||
|
|
@ -160,7 +165,7 @@ endif
|
||||||
|
|
||||||
libvulkan_freedreno = shared_library(
|
libvulkan_freedreno = shared_library(
|
||||||
'vulkan_freedreno',
|
'vulkan_freedreno',
|
||||||
[libtu_files, tu_entrypoints, tu_tracepoints, freedreno_xml_header_files, sha1_h, u_format_pack_h],
|
[libtu_files, tu_entrypoints, tu_tracepoints, freedreno_xml_header_files, sha1_h, u_format_pack_h, bvh_spv],
|
||||||
include_directories : libtu_includes,
|
include_directories : libtu_includes,
|
||||||
link_with : [
|
link_with : [
|
||||||
libfreedreno_ir3,
|
libfreedreno_ir3,
|
||||||
|
|
|
||||||
763
src/freedreno/vulkan/tu_acceleration_structure.cc
Normal file
763
src/freedreno/vulkan/tu_acceleration_structure.cc
Normal file
|
|
@ -0,0 +1,763 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2021 Bas Nieuwenhuizen
|
||||||
|
* Copyright © 2024 Valve Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "tu_buffer.h"
|
||||||
|
#include "tu_device.h"
|
||||||
|
#include "tu_cmd_buffer.h"
|
||||||
|
|
||||||
|
#include "vk_acceleration_structure.h"
|
||||||
|
#include "tu_acceleration_structure.h"
|
||||||
|
#include "radix_sort/radix_sort_u64.h"
|
||||||
|
|
||||||
|
#include "util/u_hexdump.h"
|
||||||
|
|
||||||
|
#include "bvh/tu_build_interface.h"
|
||||||
|
|
||||||
|
static const uint32_t encode_spv[] = {
|
||||||
|
#include "bvh/encode.spv.h"
|
||||||
|
};
|
||||||
|
|
||||||
|
static const uint32_t header_spv[] = {
|
||||||
|
#include "bvh/header.spv.h"
|
||||||
|
};
|
||||||
|
|
||||||
|
static const uint32_t copy_spv[] = {
|
||||||
|
#include "bvh/copy.spv.h"
|
||||||
|
};
|
||||||
|
|
||||||
|
static_assert(sizeof(struct tu_instance_descriptor) == AS_RECORD_SIZE);
|
||||||
|
static_assert(sizeof(struct tu_accel_struct_header) == AS_RECORD_SIZE);
|
||||||
|
static_assert(sizeof(struct tu_internal_node) == AS_NODE_SIZE);
|
||||||
|
static_assert(sizeof(struct tu_leaf_node) == AS_NODE_SIZE);
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
get_pipeline_spv(struct tu_device *device,
|
||||||
|
const char *name, const uint32_t *spv, uint32_t spv_size,
|
||||||
|
unsigned push_constant_size,
|
||||||
|
VkPipeline *pipeline, VkPipelineLayout *layout)
|
||||||
|
{
|
||||||
|
size_t key_size = strlen(name);
|
||||||
|
|
||||||
|
const VkPushConstantRange pc_range = {
|
||||||
|
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||||
|
.offset = 0,
|
||||||
|
.size = push_constant_size,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkResult result = vk_meta_get_pipeline_layout(&device->vk,
|
||||||
|
&device->meta, NULL,
|
||||||
|
&pc_range, name, key_size,
|
||||||
|
layout);
|
||||||
|
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
VkPipeline pipeline_from_cache = vk_meta_lookup_pipeline(&device->meta, name, key_size);
|
||||||
|
if (pipeline_from_cache != VK_NULL_HANDLE) {
|
||||||
|
*pipeline = pipeline_from_cache;
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkShaderModuleCreateInfo module_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
||||||
|
.pNext = NULL,
|
||||||
|
.flags = 0,
|
||||||
|
.codeSize = spv_size,
|
||||||
|
.pCode = spv,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkPipelineShaderStageCreateInfo shader_stage = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||||
|
.pNext = &module_info,
|
||||||
|
.flags = 0,
|
||||||
|
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||||
|
.pName = "main",
|
||||||
|
.pSpecializationInfo = NULL,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkComputePipelineCreateInfo pipeline_info = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||||
|
.flags = 0,
|
||||||
|
.stage = shader_stage,
|
||||||
|
.layout = *layout,
|
||||||
|
};
|
||||||
|
|
||||||
|
return vk_meta_create_compute_pipeline(&device->vk, &device->meta, &pipeline_info,
|
||||||
|
name, key_size, pipeline);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct bvh_layout {
|
||||||
|
uint64_t bvh_offset;
|
||||||
|
uint64_t size;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void
|
||||||
|
get_bvh_layout(VkGeometryTypeKHR geometry_type,
|
||||||
|
uint32_t leaf_count,
|
||||||
|
struct bvh_layout *layout)
|
||||||
|
{
|
||||||
|
uint32_t internal_count = MAX2(leaf_count, 2) - 1;
|
||||||
|
|
||||||
|
uint64_t offset = sizeof(struct tu_accel_struct_header);
|
||||||
|
|
||||||
|
/* Instance descriptors, one per instance. */
|
||||||
|
if (geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
|
||||||
|
offset += leaf_count * sizeof(struct tu_instance_descriptor);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent links, which have to go directly before bvh_offset as we index
|
||||||
|
* them using negative offsets from there.
|
||||||
|
*/
|
||||||
|
offset += (internal_count + leaf_count) * sizeof(uint32_t);
|
||||||
|
|
||||||
|
/* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
|
||||||
|
offset = ALIGN(offset, 64);
|
||||||
|
layout->bvh_offset = offset;
|
||||||
|
|
||||||
|
offset += internal_count * sizeof(struct tu_internal_node) +
|
||||||
|
leaf_count * sizeof(struct tu_leaf_node);
|
||||||
|
|
||||||
|
layout->size = offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkDeviceSize get_bvh_size(VkDevice device,
|
||||||
|
const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
|
||||||
|
uint32_t leaf_count)
|
||||||
|
{
|
||||||
|
struct bvh_layout layout;
|
||||||
|
get_bvh_layout(vk_get_as_geometry_type(pBuildInfo), leaf_count, &layout);
|
||||||
|
return layout.size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
encode_key(VkAccelerationStructureTypeKHR type,
|
||||||
|
VkBuildAccelerationStructureFlagBitsKHR flags)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
encode_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key)
|
||||||
|
{
|
||||||
|
VK_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
|
||||||
|
struct tu_device *device = cmdbuf->device;
|
||||||
|
|
||||||
|
VkPipeline pipeline;
|
||||||
|
VkPipelineLayout layout;
|
||||||
|
VkResult result =
|
||||||
|
get_pipeline_spv(device, "encode", encode_spv, sizeof(encode_spv),
|
||||||
|
sizeof(encode_args), &pipeline, &layout);
|
||||||
|
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
tu_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
encode(VkCommandBuffer commandBuffer,
|
||||||
|
const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
|
||||||
|
const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos,
|
||||||
|
VkDeviceAddress intermediate_as_addr,
|
||||||
|
VkDeviceAddress intermediate_header_addr,
|
||||||
|
uint32_t leaf_count,
|
||||||
|
uint32_t key,
|
||||||
|
struct vk_acceleration_structure *dst)
|
||||||
|
{
|
||||||
|
VK_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
|
||||||
|
struct tu_device *device = cmdbuf->device;
|
||||||
|
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(build_info);
|
||||||
|
|
||||||
|
VkPipeline pipeline;
|
||||||
|
VkPipelineLayout layout;
|
||||||
|
get_pipeline_spv(device, "encode", encode_spv, sizeof(encode_spv),
|
||||||
|
sizeof(encode_args), &pipeline, &layout);
|
||||||
|
|
||||||
|
struct bvh_layout bvh_layout;
|
||||||
|
get_bvh_layout(geometry_type, leaf_count, &bvh_layout);
|
||||||
|
|
||||||
|
const struct encode_args args = {
|
||||||
|
.intermediate_bvh = intermediate_as_addr,
|
||||||
|
.output_bvh = vk_acceleration_structure_get_va(dst) + bvh_layout.bvh_offset,
|
||||||
|
.header = intermediate_header_addr,
|
||||||
|
.output_bvh_offset = bvh_layout.bvh_offset,
|
||||||
|
.leaf_node_count = leaf_count,
|
||||||
|
.geometry_type = geometry_type,
|
||||||
|
};
|
||||||
|
vk_common_CmdPushConstants(commandBuffer, layout,
|
||||||
|
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args),
|
||||||
|
&args);
|
||||||
|
|
||||||
|
tu_dispatch_unaligned_indirect(commandBuffer,
|
||||||
|
intermediate_header_addr +
|
||||||
|
offsetof(struct vk_ir_header, ir_internal_node_count));
|
||||||
|
|
||||||
|
*(VkDeviceSize *)
|
||||||
|
util_sparse_array_get(&device->accel_struct_ranges,
|
||||||
|
vk_acceleration_structure_get_va(dst)) = dst->size;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Don't bother copying over the compacted size using a compute shader if
|
||||||
|
* compaction is never going to happen.
|
||||||
|
*/
|
||||||
|
enum tu_header_key {
|
||||||
|
HEADER_NO_DISPATCH,
|
||||||
|
HEADER_USE_DISPATCH
|
||||||
|
};
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
header_key(VkAccelerationStructureTypeKHR type,
|
||||||
|
VkBuildAccelerationStructureFlagBitsKHR flags)
|
||||||
|
{
|
||||||
|
return (flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR) ?
|
||||||
|
HEADER_USE_DISPATCH : HEADER_NO_DISPATCH;
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
header_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key)
|
||||||
|
{
|
||||||
|
VK_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
|
||||||
|
struct tu_device *device = cmdbuf->device;
|
||||||
|
|
||||||
|
if (key == HEADER_USE_DISPATCH) {
|
||||||
|
VkPipeline pipeline;
|
||||||
|
VkPipelineLayout layout;
|
||||||
|
VkResult result =
|
||||||
|
get_pipeline_spv(device, "header", header_spv, sizeof(header_spv),
|
||||||
|
sizeof(header_args), &pipeline, &layout);
|
||||||
|
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
static const VkMemoryBarrier mb = {
|
||||||
|
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||||
|
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||||
|
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
|
||||||
|
};
|
||||||
|
|
||||||
|
vk_common_CmdPipelineBarrier(commandBuffer,
|
||||||
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
|
0, 1, &mb, 0, NULL, 0, NULL);
|
||||||
|
|
||||||
|
tu_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||||
|
}
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
header(VkCommandBuffer commandBuffer,
|
||||||
|
const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
|
||||||
|
const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos,
|
||||||
|
VkDeviceAddress intermediate_as_addr,
|
||||||
|
VkDeviceAddress intermediate_header_addr,
|
||||||
|
uint32_t leaf_count,
|
||||||
|
uint32_t key,
|
||||||
|
struct vk_acceleration_structure *dst)
|
||||||
|
{
|
||||||
|
VK_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
|
||||||
|
struct tu_device *device = cmdbuf->device;
|
||||||
|
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(build_info);
|
||||||
|
|
||||||
|
struct bvh_layout bvh_layout;
|
||||||
|
get_bvh_layout(geometry_type, leaf_count, &bvh_layout);
|
||||||
|
|
||||||
|
VkDeviceAddress header_addr = vk_acceleration_structure_get_va(dst);
|
||||||
|
|
||||||
|
size_t base = offsetof(struct tu_accel_struct_header, copy_dispatch_size);
|
||||||
|
|
||||||
|
uint32_t instance_count =
|
||||||
|
geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR ? leaf_count : 0;
|
||||||
|
|
||||||
|
if (key == HEADER_USE_DISPATCH) {
|
||||||
|
base = offsetof(struct tu_accel_struct_header, instance_count);
|
||||||
|
VkPipeline pipeline;
|
||||||
|
VkPipelineLayout layout;
|
||||||
|
get_pipeline_spv(device, "header", header_spv, sizeof(header_spv),
|
||||||
|
sizeof(header_args), &pipeline, &layout);
|
||||||
|
|
||||||
|
struct header_args args = {
|
||||||
|
.src = intermediate_header_addr,
|
||||||
|
.dst = vk_acceleration_structure_get_va(dst),
|
||||||
|
.bvh_offset = bvh_layout.bvh_offset,
|
||||||
|
.instance_count = instance_count,
|
||||||
|
};
|
||||||
|
|
||||||
|
vk_common_CmdPushConstants(commandBuffer, layout,
|
||||||
|
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args),
|
||||||
|
&args);
|
||||||
|
|
||||||
|
vk_common_CmdDispatch(commandBuffer, 1, 1, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct tu_accel_struct_header header = {};
|
||||||
|
|
||||||
|
header.instance_count = instance_count;
|
||||||
|
header.self_ptr = header_addr;
|
||||||
|
header.compacted_size = bvh_layout.size;
|
||||||
|
|
||||||
|
header.copy_dispatch_size[0] = DIV_ROUND_UP(header.compacted_size, 16 * 128);
|
||||||
|
header.copy_dispatch_size[1] = 1;
|
||||||
|
header.copy_dispatch_size[2] = 1;
|
||||||
|
|
||||||
|
header.serialization_size =
|
||||||
|
header.compacted_size +
|
||||||
|
sizeof(struct vk_accel_struct_serialization_header) + sizeof(uint64_t) * header.instance_count;
|
||||||
|
|
||||||
|
header.size = header.serialization_size - sizeof(struct vk_accel_struct_serialization_header) -
|
||||||
|
sizeof(uint64_t) * header.instance_count;
|
||||||
|
|
||||||
|
struct tu_cs *cs = &cmdbuf->cs;
|
||||||
|
|
||||||
|
size_t header_size = sizeof(struct tu_accel_struct_header) - base;
|
||||||
|
assert(base % sizeof(uint32_t) == 0);
|
||||||
|
assert(header_size % sizeof(uint32_t) == 0);
|
||||||
|
uint32_t *header_ptr = (uint32_t *)((char *)&header + base);
|
||||||
|
|
||||||
|
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + header_size / sizeof(uint32_t));
|
||||||
|
tu_cs_emit_qw(cs, header_addr + base);
|
||||||
|
tu_cs_emit_array(cs, header_ptr, header_size / sizeof(uint32_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct vk_acceleration_structure_build_ops tu_as_build_ops = {
|
||||||
|
.get_as_size = get_bvh_size,
|
||||||
|
.get_encode_key = { encode_key, header_key },
|
||||||
|
.encode_bind_pipeline = { encode_bind_pipeline, header_bind_pipeline },
|
||||||
|
.encode_as = { encode, header },
|
||||||
|
};
|
||||||
|
|
||||||
|
struct radix_sort_vk_target_config tu_radix_sort_config = {
|
||||||
|
.keyval_dwords = 2,
|
||||||
|
.init = { .workgroup_size_log2 = 8, },
|
||||||
|
.fill = { .workgroup_size_log2 = 8, .block_rows = 8 },
|
||||||
|
.histogram = {
|
||||||
|
.workgroup_size_log2 = 8,
|
||||||
|
.subgroup_size_log2 = 7,
|
||||||
|
.block_rows = 14, /* TODO tune this */
|
||||||
|
},
|
||||||
|
.prefix = {
|
||||||
|
.workgroup_size_log2 = 8,
|
||||||
|
.subgroup_size_log2 = 7,
|
||||||
|
},
|
||||||
|
.scatter = {
|
||||||
|
.workgroup_size_log2 = 8,
|
||||||
|
.subgroup_size_log2 = 7,
|
||||||
|
.block_rows = 14, /* TODO tune this */
|
||||||
|
},
|
||||||
|
.nonsequential_dispatch = false,
|
||||||
|
};
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
init_radix_sort(struct tu_device *device)
|
||||||
|
{
|
||||||
|
if (!device->radix_sort) {
|
||||||
|
mtx_lock(&device->radix_sort_mutex);
|
||||||
|
if (!device->radix_sort) {
|
||||||
|
device->radix_sort =
|
||||||
|
vk_create_radix_sort_u64(tu_device_to_handle(device),
|
||||||
|
&device->vk.alloc,
|
||||||
|
VK_NULL_HANDLE, tu_radix_sort_config);
|
||||||
|
if (!device->radix_sort) {
|
||||||
|
/* TODO plumb through the error here */
|
||||||
|
mtx_unlock(&device->radix_sort_mutex);
|
||||||
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
mtx_unlock(&device->radix_sort_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct tu_saved_compute_state {
|
||||||
|
uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
|
||||||
|
struct tu_shader *compute_shader;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void
|
||||||
|
tu_save_compute_state(struct tu_cmd_buffer *cmd,
|
||||||
|
struct tu_saved_compute_state *state)
|
||||||
|
{
|
||||||
|
memcpy(state->push_constants, cmd->push_constants, sizeof(cmd->push_constants));
|
||||||
|
state->compute_shader = cmd->state.shaders[MESA_SHADER_COMPUTE];
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
tu_restore_compute_state(struct tu_cmd_buffer *cmd,
|
||||||
|
struct tu_saved_compute_state *state)
|
||||||
|
{
|
||||||
|
cmd->state.shaders[MESA_SHADER_COMPUTE] = state->compute_shader;
|
||||||
|
if (state->compute_shader) {
|
||||||
|
tu_cs_emit_state_ib(&cmd->cs, state->compute_shader->state);
|
||||||
|
}
|
||||||
|
memcpy(cmd->push_constants, state->push_constants, sizeof(cmd->push_constants));
|
||||||
|
cmd->state.dirty |= TU_CMD_DIRTY_SHADER_CONSTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
VKAPI_ATTR void VKAPI_CALL
|
||||||
|
tu_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
|
||||||
|
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
|
||||||
|
const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
|
||||||
|
{
|
||||||
|
VK_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
|
||||||
|
struct tu_device *device = cmd->device;
|
||||||
|
struct tu_saved_compute_state state;
|
||||||
|
|
||||||
|
VkResult result = init_radix_sort(device);
|
||||||
|
if (result != VK_SUCCESS) {
|
||||||
|
vk_command_buffer_set_error(&cmd->vk, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
tu_save_compute_state(cmd, &state);
|
||||||
|
|
||||||
|
struct vk_acceleration_structure_build_args args = {
|
||||||
|
.subgroup_size = 128,
|
||||||
|
.bvh_bounds_offset = offsetof(tu_accel_struct_header, aabb),
|
||||||
|
.emit_markers = false,
|
||||||
|
.radix_sort = device->radix_sort,
|
||||||
|
};
|
||||||
|
|
||||||
|
vk_cmd_build_acceleration_structures(commandBuffer,
|
||||||
|
&device->vk,
|
||||||
|
&device->meta,
|
||||||
|
infoCount,
|
||||||
|
pInfos,
|
||||||
|
ppBuildRangeInfos,
|
||||||
|
&args);
|
||||||
|
|
||||||
|
tu_restore_compute_state(cmd, &state);
|
||||||
|
}
|
||||||
|
|
||||||
|
VKAPI_ATTR void VKAPI_CALL
|
||||||
|
tu_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
|
||||||
|
{
|
||||||
|
VK_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
|
||||||
|
VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
|
||||||
|
VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
|
||||||
|
struct tu_saved_compute_state state;
|
||||||
|
|
||||||
|
VkPipeline pipeline;
|
||||||
|
VkPipelineLayout layout;
|
||||||
|
VkResult result =
|
||||||
|
get_pipeline_spv(cmd->device, "copy", copy_spv, sizeof(copy_spv),
|
||||||
|
sizeof(copy_args), &pipeline, &layout);
|
||||||
|
if (result != VK_SUCCESS) {
|
||||||
|
vk_command_buffer_set_error(&cmd->vk, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
tu_save_compute_state(cmd, &state);
|
||||||
|
|
||||||
|
tu_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||||
|
|
||||||
|
struct copy_args consts = {
|
||||||
|
.src_addr = vk_acceleration_structure_get_va(src),
|
||||||
|
.dst_addr = vk_acceleration_structure_get_va(dst),
|
||||||
|
.mode = TU_COPY_MODE_COPY,
|
||||||
|
};
|
||||||
|
|
||||||
|
vk_common_CmdPushConstants(commandBuffer, layout,
|
||||||
|
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts),
|
||||||
|
&consts);
|
||||||
|
|
||||||
|
TU_CALLX(cmd->device, tu_CmdDispatchIndirect)(
|
||||||
|
commandBuffer, src->buffer,
|
||||||
|
src->offset + offsetof(struct tu_accel_struct_header, copy_dispatch_size));
|
||||||
|
|
||||||
|
tu_restore_compute_state(cmd, &state);
|
||||||
|
}
|
||||||
|
|
||||||
|
VKAPI_ATTR void VKAPI_CALL
|
||||||
|
tu_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
|
||||||
|
const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
|
||||||
|
{
|
||||||
|
VK_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
|
||||||
|
VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
|
||||||
|
struct tu_saved_compute_state state;
|
||||||
|
|
||||||
|
VkPipeline pipeline;
|
||||||
|
VkPipelineLayout layout;
|
||||||
|
VkResult result =
|
||||||
|
get_pipeline_spv(cmd->device, "copy", copy_spv, sizeof(copy_spv),
|
||||||
|
sizeof(copy_args), &pipeline, &layout);
|
||||||
|
if (result != VK_SUCCESS) {
|
||||||
|
vk_command_buffer_set_error(&cmd->vk, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
tu_save_compute_state(cmd, &state);
|
||||||
|
|
||||||
|
tu_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||||
|
|
||||||
|
const struct copy_args consts = {
|
||||||
|
.src_addr = pInfo->src.deviceAddress,
|
||||||
|
.dst_addr = vk_acceleration_structure_get_va(dst),
|
||||||
|
.mode = TU_COPY_MODE_DESERIALIZE,
|
||||||
|
};
|
||||||
|
|
||||||
|
vk_common_CmdPushConstants(commandBuffer, layout,
|
||||||
|
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts),
|
||||||
|
&consts);
|
||||||
|
|
||||||
|
vk_common_CmdDispatch(commandBuffer, 256, 1, 1);
|
||||||
|
|
||||||
|
tu_restore_compute_state(cmd, &state);
|
||||||
|
}
|
||||||
|
|
||||||
|
VKAPI_ATTR void VKAPI_CALL
|
||||||
|
tu_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
|
||||||
|
const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
|
||||||
|
{
|
||||||
|
VK_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
|
||||||
|
VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
|
||||||
|
struct tu_saved_compute_state state;
|
||||||
|
|
||||||
|
VkPipeline pipeline;
|
||||||
|
VkPipelineLayout layout;
|
||||||
|
VkResult result =
|
||||||
|
get_pipeline_spv(cmd->device, "copy", copy_spv, sizeof(copy_spv),
|
||||||
|
sizeof(copy_args), &pipeline, &layout);
|
||||||
|
if (result != VK_SUCCESS) {
|
||||||
|
vk_command_buffer_set_error(&cmd->vk, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
tu_save_compute_state(cmd, &state);
|
||||||
|
|
||||||
|
tu_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||||
|
|
||||||
|
const struct copy_args consts = {
|
||||||
|
.src_addr = vk_acceleration_structure_get_va(src),
|
||||||
|
.dst_addr = pInfo->dst.deviceAddress,
|
||||||
|
.mode = TU_COPY_MODE_SERIALIZE,
|
||||||
|
};
|
||||||
|
|
||||||
|
vk_common_CmdPushConstants(commandBuffer, layout,
|
||||||
|
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts),
|
||||||
|
&consts);
|
||||||
|
|
||||||
|
TU_CALLX(cmd->device, tu_CmdDispatchIndirect)(
|
||||||
|
commandBuffer, src->buffer,
|
||||||
|
src->offset + offsetof(struct tu_accel_struct_header, copy_dispatch_size));
|
||||||
|
|
||||||
|
tu_restore_compute_state(cmd, &state);
|
||||||
|
|
||||||
|
/* Set the header of the serialized data. */
|
||||||
|
uint32_t header_data[2 * VK_UUID_SIZE / 4];
|
||||||
|
memcpy(header_data, cmd->device->physical_device->driver_uuid, VK_UUID_SIZE);
|
||||||
|
memcpy(header_data + VK_UUID_SIZE / 4, cmd->device->physical_device->cache_uuid, VK_UUID_SIZE);
|
||||||
|
|
||||||
|
struct tu_cs *cs = &cmd->cs;
|
||||||
|
|
||||||
|
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 2 + ARRAY_SIZE(header_data));
|
||||||
|
tu_cs_emit_qw(cs, pInfo->dst.deviceAddress);
|
||||||
|
tu_cs_emit_array(cs, header_data, ARRAY_SIZE(header_data));
|
||||||
|
}
|
||||||
|
|
||||||
|
VKAPI_ATTR void VKAPI_CALL
|
||||||
|
tu_GetAccelerationStructureBuildSizesKHR(VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType,
|
||||||
|
const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
|
||||||
|
const uint32_t *pMaxPrimitiveCounts,
|
||||||
|
VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo)
|
||||||
|
{
|
||||||
|
VK_FROM_HANDLE(tu_device, device, _device);
|
||||||
|
|
||||||
|
init_radix_sort(device);
|
||||||
|
|
||||||
|
struct vk_acceleration_structure_build_args args = {
|
||||||
|
.subgroup_size = 128,
|
||||||
|
.radix_sort = device->radix_sort,
|
||||||
|
};
|
||||||
|
|
||||||
|
vk_get_as_build_sizes(_device, buildType, pBuildInfo, pMaxPrimitiveCounts,
|
||||||
|
pSizeInfo, &args);
|
||||||
|
}
|
||||||
|
|
||||||
|
VKAPI_ATTR void VKAPI_CALL
|
||||||
|
tu_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,
|
||||||
|
const VkAccelerationStructureVersionInfoKHR *pVersionInfo,
|
||||||
|
VkAccelerationStructureCompatibilityKHR *pCompatibility)
|
||||||
|
{
|
||||||
|
VK_FROM_HANDLE(tu_device, device, _device);
|
||||||
|
bool compat =
|
||||||
|
memcmp(pVersionInfo->pVersionData, device->physical_device->driver_uuid, VK_UUID_SIZE) == 0 &&
|
||||||
|
memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, device->physical_device->cache_uuid, VK_UUID_SIZE) == 0;
|
||||||
|
*pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR
|
||||||
|
: VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkResult
|
||||||
|
tu_init_null_accel_struct(struct tu_device *device)
|
||||||
|
{
|
||||||
|
VkResult result = tu_bo_init_new(device, NULL,
|
||||||
|
&device->null_accel_struct_bo,
|
||||||
|
sizeof(tu_accel_struct_header) +
|
||||||
|
sizeof(tu_internal_node),
|
||||||
|
TU_BO_ALLOC_NO_FLAGS, "null AS");
|
||||||
|
if (result != VK_SUCCESS) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
result = tu_bo_map(device, device->null_accel_struct_bo, NULL);
|
||||||
|
if (result != VK_SUCCESS) {
|
||||||
|
tu_bo_finish(device, device->null_accel_struct_bo);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct tu_accel_struct_header header = {
|
||||||
|
.bvh_ptr = device->null_accel_struct_bo->iova +
|
||||||
|
sizeof(tu_accel_struct_header),
|
||||||
|
.self_ptr = device->null_accel_struct_bo->iova,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct tu_internal_node node = {
|
||||||
|
.child_count = 0,
|
||||||
|
.type_flags = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < 8; i++) {
|
||||||
|
node.mantissas[i][0][0] = 0xff;
|
||||||
|
node.mantissas[i][0][1] = 0xff;
|
||||||
|
node.mantissas[i][0][2] = 0xff;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(device->null_accel_struct_bo->map, (void *)&header, sizeof(header));
|
||||||
|
memcpy((char *)device->null_accel_struct_bo->map + sizeof(header),
|
||||||
|
(void *)&node, sizeof(node));
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct tu_node {
|
||||||
|
uint32_t data[16];
|
||||||
|
};
|
||||||
|
|
||||||
|
static void
|
||||||
|
dump_leaf(struct tu_leaf_node *node)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "\tID: %d\n", node->id);
|
||||||
|
fprintf(stderr, "\tgeometry ID: %d\n", node->geometry_id);
|
||||||
|
bool aabb = node->type_flags & TU_NODE_TYPE_AABB;
|
||||||
|
for (unsigned i = 0; i < (aabb ? 2 : 3); i++) {
|
||||||
|
fprintf(stderr, "\t(");
|
||||||
|
for (unsigned j = 0; j < 3; j++) {
|
||||||
|
if (j != 0)
|
||||||
|
fprintf(stderr, ", ");
|
||||||
|
fprintf(stderr, "%f", node->coords[i][j]);
|
||||||
|
}
|
||||||
|
fprintf(stderr, ")\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
dump_internal(struct tu_internal_node *node, uint32_t *max_child)
|
||||||
|
{
|
||||||
|
*max_child = MAX2(*max_child, node->id + node->child_count);
|
||||||
|
float base[3];
|
||||||
|
unsigned exponents[3];
|
||||||
|
for (unsigned i = 0; i < 3; i++) {
|
||||||
|
base[i] = uif(node->bases[i] << 16);
|
||||||
|
exponents[i] = node->exponents[i] - 134;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < node->child_count; i++) {
|
||||||
|
fprintf(stderr, "\tchild %d\n", node->id + i);
|
||||||
|
for (unsigned vert = 0; vert < 2; vert++) {
|
||||||
|
fprintf(stderr, "\t\t(");
|
||||||
|
for (unsigned coord = 0; coord < 3; coord++) {
|
||||||
|
unsigned mantissa = node->mantissas[i][vert][coord];
|
||||||
|
if (coord != 0)
|
||||||
|
fprintf(stderr, ", ");
|
||||||
|
fprintf(stderr, "%f", base[coord] + ldexp((float)mantissa,
|
||||||
|
exponents[coord]));
|
||||||
|
}
|
||||||
|
fprintf(stderr, ")\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
dump_as(struct vk_acceleration_structure *as)
|
||||||
|
{
|
||||||
|
VK_FROM_HANDLE(tu_buffer, buf, as->buffer);
|
||||||
|
|
||||||
|
struct tu_accel_struct_header *hdr =
|
||||||
|
(struct tu_accel_struct_header *)((char *)buf->bo->map + as->offset);
|
||||||
|
|
||||||
|
fprintf(stderr, "dumping AS at %" PRIx64 "\n", buf->iova + as->offset);
|
||||||
|
u_hexdump(stderr, (uint8_t *)hdr, sizeof(*hdr), false);
|
||||||
|
|
||||||
|
char *base = ((char *)buf->bo->map + (hdr->bvh_ptr - buf->iova));
|
||||||
|
struct tu_node *node = (struct tu_node *)base;
|
||||||
|
|
||||||
|
fprintf(stderr, "dumping nodes at %" PRIx64 "\n", hdr->bvh_ptr);
|
||||||
|
|
||||||
|
uint32_t max_child = 1;
|
||||||
|
for (unsigned i = 0; i < max_child; i++) {
|
||||||
|
uint32_t *parent_ptr = (uint32_t*)(base - (4 + 4 * i));
|
||||||
|
uint32_t parent = *parent_ptr;
|
||||||
|
fprintf(stderr, "node %d parent %d\n", i, parent);
|
||||||
|
u_hexdump(stderr, (uint8_t *)node, sizeof(*node), false);
|
||||||
|
if (node->data[15] & TU_NODE_TYPE_LEAF) {
|
||||||
|
/* TODO compressed leaves */
|
||||||
|
dump_leaf((struct tu_leaf_node *)node);
|
||||||
|
} else {
|
||||||
|
dump_internal((struct tu_internal_node *)node, &max_child);
|
||||||
|
}
|
||||||
|
|
||||||
|
node++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
as_finished(struct tu_device *dev, struct vk_acceleration_structure *as)
|
||||||
|
{
|
||||||
|
VK_FROM_HANDLE(tu_buffer, buf, as->buffer);
|
||||||
|
tu_bo_map(dev, buf->bo, NULL);
|
||||||
|
|
||||||
|
struct tu_accel_struct_header *hdr =
|
||||||
|
(struct tu_accel_struct_header *)((char *)buf->bo->map + as->offset);
|
||||||
|
return hdr->self_ptr == buf->iova + as->offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
VKAPI_ATTR void VKAPI_CALL
|
||||||
|
tu_DestroyAccelerationStructureKHR(VkDevice _device,
|
||||||
|
VkAccelerationStructureKHR accelerationStructure,
|
||||||
|
const VkAllocationCallbacks *pAllocator)
|
||||||
|
{
|
||||||
|
VK_FROM_HANDLE(tu_device, device, _device);
|
||||||
|
if (TU_DEBUG(DUMPAS)) {
|
||||||
|
VK_FROM_HANDLE(vk_acceleration_structure, as, accelerationStructure);
|
||||||
|
if (as_finished(device, as))
|
||||||
|
dump_as(as);
|
||||||
|
}
|
||||||
|
|
||||||
|
vk_common_DestroyAccelerationStructureKHR(_device, accelerationStructure,
|
||||||
|
pAllocator);
|
||||||
|
}
|
||||||
15
src/freedreno/vulkan/tu_acceleration_structure.h
Normal file
15
src/freedreno/vulkan/tu_acceleration_structure.h
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2024 Valve Corporation
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TU_ACCELERATION_STRUCT_H
|
||||||
|
#define TU_ACCELERATION_STRUCT_H
|
||||||
|
|
||||||
|
#include "tu_common.h"
|
||||||
|
|
||||||
|
VkResult tu_init_null_accel_struct(struct tu_device *device);
|
||||||
|
|
||||||
|
extern const vk_acceleration_structure_build_ops tu_as_build_ops;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -3958,13 +3958,26 @@ vk2tu_access(VkAccessFlags2 flags, VkPipelineStageFlags2 stages, bool image_only
|
||||||
VK_ACCESS_2_SHADER_READ_BIT |
|
VK_ACCESS_2_SHADER_READ_BIT |
|
||||||
VK_ACCESS_2_SHADER_SAMPLED_READ_BIT |
|
VK_ACCESS_2_SHADER_SAMPLED_READ_BIT |
|
||||||
VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
|
VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
|
||||||
VK_ACCESS_2_SHADER_BINDING_TABLE_READ_BIT_KHR,
|
VK_ACCESS_2_SHADER_BINDING_TABLE_READ_BIT_KHR |
|
||||||
|
VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR,
|
||||||
VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT |
|
VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT |
|
||||||
VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT |
|
VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT |
|
||||||
VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT |
|
VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT |
|
||||||
|
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR |
|
||||||
|
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR |
|
||||||
SHADER_STAGES))
|
SHADER_STAGES))
|
||||||
mask |= TU_ACCESS_UCHE_READ | TU_ACCESS_CCHE_READ;
|
mask |= TU_ACCESS_UCHE_READ | TU_ACCESS_CCHE_READ;
|
||||||
|
|
||||||
|
/* Reading the AS for copying involves doing CmdDispatchIndirect with the
|
||||||
|
* copy size as a parameter, so it's read by the CP as well as a shader.
|
||||||
|
*/
|
||||||
|
if (gfx_read_access(flags, stages,
|
||||||
|
VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR,
|
||||||
|
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR |
|
||||||
|
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR))
|
||||||
|
mask |= TU_ACCESS_SYSMEM_READ;
|
||||||
|
|
||||||
|
|
||||||
if (gfx_read_access(flags, stages,
|
if (gfx_read_access(flags, stages,
|
||||||
VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT,
|
VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT,
|
||||||
SHADER_STAGES))
|
SHADER_STAGES))
|
||||||
|
|
@ -3985,6 +3998,11 @@ vk2tu_access(VkAccessFlags2 flags, VkPipelineStageFlags2 stages, bool image_only
|
||||||
SHADER_STAGES))
|
SHADER_STAGES))
|
||||||
mask |= TU_ACCESS_UCHE_WRITE;
|
mask |= TU_ACCESS_UCHE_WRITE;
|
||||||
|
|
||||||
|
if (gfx_write_access(flags, stages,
|
||||||
|
VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
|
||||||
|
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR))
|
||||||
|
mask |= TU_ACCESS_UCHE_WRITE | TU_ACCESS_CP_WRITE;
|
||||||
|
|
||||||
/* When using GMEM, the CCU is always flushed automatically to GMEM, and
|
/* When using GMEM, the CCU is always flushed automatically to GMEM, and
|
||||||
* then GMEM is flushed to sysmem. Furthermore, we already had to flush any
|
* then GMEM is flushed to sysmem. Furthermore, we already had to flush any
|
||||||
* previous writes in sysmem mode when transitioning to GMEM. Therefore we
|
* previous writes in sysmem mode when transitioning to GMEM. Therefore we
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@
|
||||||
#include "util/mesa-sha1.h"
|
#include "util/mesa-sha1.h"
|
||||||
#include "vk_descriptors.h"
|
#include "vk_descriptors.h"
|
||||||
#include "vk_util.h"
|
#include "vk_util.h"
|
||||||
|
#include "vk_acceleration_structure.h"
|
||||||
|
|
||||||
#include "tu_buffer.h"
|
#include "tu_buffer.h"
|
||||||
#include "tu_buffer_view.h"
|
#include "tu_buffer_view.h"
|
||||||
|
|
@ -33,6 +34,7 @@
|
||||||
#include "tu_image.h"
|
#include "tu_image.h"
|
||||||
#include "tu_formats.h"
|
#include "tu_formats.h"
|
||||||
#include "tu_rmv.h"
|
#include "tu_rmv.h"
|
||||||
|
#include "bvh/tu_build_interface.h"
|
||||||
|
|
||||||
static inline uint8_t *
|
static inline uint8_t *
|
||||||
pool_base(struct tu_descriptor_pool *pool)
|
pool_base(struct tu_descriptor_pool *pool)
|
||||||
|
|
@ -67,6 +69,7 @@ descriptor_size(struct tu_device *dev,
|
||||||
COND(dev->physical_device->info->a7xx.storage_8bit, 1));
|
COND(dev->physical_device->info->a7xx.storage_8bit, 1));
|
||||||
case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
|
case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
|
||||||
return binding->descriptorCount;
|
return binding->descriptorCount;
|
||||||
|
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
|
||||||
default:
|
default:
|
||||||
return A6XX_TEX_CONST_DWORDS * 4;
|
return A6XX_TEX_CONST_DWORDS * 4;
|
||||||
}
|
}
|
||||||
|
|
@ -1156,6 +1159,27 @@ write_sampler_descriptor(uint32_t *dst, VkSampler _sampler)
|
||||||
memcpy(dst, sampler->descriptor, sizeof(sampler->descriptor));
|
memcpy(dst, sampler->descriptor, sizeof(sampler->descriptor));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
write_accel_struct(uint32_t *dst, uint64_t va, uint64_t size)
|
||||||
|
{
|
||||||
|
dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_32_UINT);
|
||||||
|
|
||||||
|
/* The overall range of the entire AS may be more than the max range, but
|
||||||
|
* the SSBO is only used to access the instance descriptors and header.
|
||||||
|
* Make sure that we don't specify a too-large range.
|
||||||
|
*/
|
||||||
|
dst[1] = MAX2(DIV_ROUND_UP(size, AS_RECORD_SIZE), MAX_TEXEL_ELEMENTS);
|
||||||
|
dst[2] =
|
||||||
|
A6XX_TEX_CONST_2_STRUCTSIZETEXELS(AS_RECORD_SIZE / 4) |
|
||||||
|
A6XX_TEX_CONST_2_STARTOFFSETTEXELS(0) |
|
||||||
|
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_BUFFER);
|
||||||
|
dst[3] = 0;
|
||||||
|
dst[4] = A6XX_TEX_CONST_4_BASE_LO(va);
|
||||||
|
dst[5] = A6XX_TEX_CONST_5_BASE_HI(va >> 32);
|
||||||
|
for (int j = 6; j < A6XX_TEX_CONST_DWORDS; j++)
|
||||||
|
dst[j] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* note: this is used with immutable samplers in push descriptors */
|
/* note: this is used with immutable samplers in push descriptors */
|
||||||
static void
|
static void
|
||||||
write_sampler_push(uint32_t *dst, const struct tu_sampler *sampler)
|
write_sampler_push(uint32_t *dst, const struct tu_sampler *sampler)
|
||||||
|
|
@ -1203,6 +1227,18 @@ tu_GetDescriptorEXT(
|
||||||
case VK_DESCRIPTOR_TYPE_SAMPLER:
|
case VK_DESCRIPTOR_TYPE_SAMPLER:
|
||||||
write_sampler_descriptor(dest, *pDescriptorInfo->data.pSampler);
|
write_sampler_descriptor(dest, *pDescriptorInfo->data.pSampler);
|
||||||
break;
|
break;
|
||||||
|
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
|
||||||
|
if (pDescriptorInfo->data.accelerationStructure == 0) {
|
||||||
|
write_accel_struct(dest, device->null_accel_struct_bo->iova,
|
||||||
|
device->null_accel_struct_bo->size);
|
||||||
|
} else {
|
||||||
|
VkDeviceSize size = *(VkDeviceSize *)
|
||||||
|
util_sparse_array_get(&device->accel_struct_ranges,
|
||||||
|
pDescriptorInfo->data.accelerationStructure);
|
||||||
|
write_accel_struct(dest, pDescriptorInfo->data.accelerationStructure, size);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
|
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
|
||||||
write_image_descriptor(dest, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT,
|
write_image_descriptor(dest, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT,
|
||||||
pDescriptorInfo->data.pInputAttachmentImage);
|
pDescriptorInfo->data.pInputAttachmentImage);
|
||||||
|
|
@ -1236,6 +1272,8 @@ tu_update_descriptor_sets(const struct tu_device *device,
|
||||||
ptr += binding_layout->offset / 4;
|
ptr += binding_layout->offset / 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const VkWriteDescriptorSetAccelerationStructureKHR *accel_structs = NULL;
|
||||||
|
|
||||||
/* for immutable samplers with push descriptors: */
|
/* for immutable samplers with push descriptors: */
|
||||||
const bool copy_immutable_samplers =
|
const bool copy_immutable_samplers =
|
||||||
dstSetOverride && binding_layout->immutable_samplers_offset;
|
dstSetOverride && binding_layout->immutable_samplers_offset;
|
||||||
|
|
@ -1279,6 +1317,9 @@ tu_update_descriptor_sets(const struct tu_device *device,
|
||||||
} while (remaining > 0);
|
} while (remaining > 0);
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
|
} else if (writeset->descriptorType ==
|
||||||
|
VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
|
||||||
|
accel_structs = vk_find_struct_const(writeset->pNext, WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR);
|
||||||
}
|
}
|
||||||
|
|
||||||
ptr += binding_layout->size / 4 * writeset->dstArrayElement;
|
ptr += binding_layout->size / 4 * writeset->dstArrayElement;
|
||||||
|
|
@ -1316,6 +1357,18 @@ tu_update_descriptor_sets(const struct tu_device *device,
|
||||||
else if (copy_immutable_samplers)
|
else if (copy_immutable_samplers)
|
||||||
write_sampler_push(ptr, &samplers[writeset->dstArrayElement + j]);
|
write_sampler_push(ptr, &samplers[writeset->dstArrayElement + j]);
|
||||||
break;
|
break;
|
||||||
|
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
|
||||||
|
VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, accel_structs->pAccelerationStructures[j]);
|
||||||
|
if (accel_struct) {
|
||||||
|
write_accel_struct(ptr,
|
||||||
|
vk_acceleration_structure_get_va(accel_struct),
|
||||||
|
accel_struct->size);
|
||||||
|
} else {
|
||||||
|
write_accel_struct(ptr, device->null_accel_struct_bo->iova,
|
||||||
|
device->null_accel_struct_bo->size);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
unreachable("unimplemented descriptor type");
|
unreachable("unimplemented descriptor type");
|
||||||
break;
|
break;
|
||||||
|
|
@ -1646,6 +1699,18 @@ tu_update_descriptor_set_with_template(
|
||||||
else if (samplers)
|
else if (samplers)
|
||||||
write_sampler_push(ptr, &samplers[j]);
|
write_sampler_push(ptr, &samplers[j]);
|
||||||
break;
|
break;
|
||||||
|
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
|
||||||
|
VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, *(const VkAccelerationStructureKHR *)src);
|
||||||
|
if (accel_struct) {
|
||||||
|
write_accel_struct(ptr,
|
||||||
|
vk_acceleration_structure_get_va(accel_struct),
|
||||||
|
accel_struct->size);
|
||||||
|
} else {
|
||||||
|
write_accel_struct(ptr, device->null_accel_struct_bo->iova,
|
||||||
|
device->null_accel_struct_bo->size);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
unreachable("unimplemented descriptor type");
|
unreachable("unimplemented descriptor type");
|
||||||
break;
|
break;
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@
|
||||||
#include "freedreno/common/freedreno_uuid.h"
|
#include "freedreno/common/freedreno_uuid.h"
|
||||||
#include "freedreno/common/freedreno_stompable_regs.h"
|
#include "freedreno/common/freedreno_stompable_regs.h"
|
||||||
|
|
||||||
|
#include "tu_acceleration_structure.h"
|
||||||
#include "tu_clear_blit.h"
|
#include "tu_clear_blit.h"
|
||||||
#include "tu_cmd_buffer.h"
|
#include "tu_cmd_buffer.h"
|
||||||
#include "tu_cs.h"
|
#include "tu_cs.h"
|
||||||
|
|
@ -143,9 +144,19 @@ static void
|
||||||
get_device_extensions(const struct tu_physical_device *device,
|
get_device_extensions(const struct tu_physical_device *device,
|
||||||
struct vk_device_extension_table *ext)
|
struct vk_device_extension_table *ext)
|
||||||
{
|
{
|
||||||
|
/* device->has_raytracing contains the value of the SW fuse. If the
|
||||||
|
* device doesn't have a fuse (i.e. a740), we have to ignore it because
|
||||||
|
* kgsl returns false. If it does have a fuse, enable raytracing if the
|
||||||
|
* fuse is set and we have ray_intersection.
|
||||||
|
*/
|
||||||
|
bool has_raytracing =
|
||||||
|
device->info->a7xx.has_ray_intersection &&
|
||||||
|
(!device->info->a7xx.has_sw_fuse || device->has_raytracing);
|
||||||
|
|
||||||
*ext = (struct vk_device_extension_table) { .table = {
|
*ext = (struct vk_device_extension_table) { .table = {
|
||||||
.KHR_8bit_storage = device->info->a7xx.storage_8bit,
|
.KHR_8bit_storage = device->info->a7xx.storage_8bit,
|
||||||
.KHR_16bit_storage = device->info->a6xx.storage_16bit,
|
.KHR_16bit_storage = device->info->a6xx.storage_16bit,
|
||||||
|
.KHR_acceleration_structure = has_raytracing,
|
||||||
.KHR_bind_memory2 = true,
|
.KHR_bind_memory2 = true,
|
||||||
.KHR_buffer_device_address = true,
|
.KHR_buffer_device_address = true,
|
||||||
.KHR_calibrated_timestamps = device->info->a7xx.has_persistent_counter,
|
.KHR_calibrated_timestamps = device->info->a7xx.has_persistent_counter,
|
||||||
|
|
@ -153,6 +164,7 @@ get_device_extensions(const struct tu_physical_device *device,
|
||||||
.KHR_copy_commands2 = true,
|
.KHR_copy_commands2 = true,
|
||||||
.KHR_create_renderpass2 = true,
|
.KHR_create_renderpass2 = true,
|
||||||
.KHR_dedicated_allocation = true,
|
.KHR_dedicated_allocation = true,
|
||||||
|
.KHR_deferred_host_operations = true,
|
||||||
.KHR_depth_stencil_resolve = true,
|
.KHR_depth_stencil_resolve = true,
|
||||||
.KHR_descriptor_update_template = true,
|
.KHR_descriptor_update_template = true,
|
||||||
.KHR_device_group = true,
|
.KHR_device_group = true,
|
||||||
|
|
@ -466,6 +478,11 @@ tu_get_features(struct tu_physical_device *pdevice,
|
||||||
/* Vulkan 1.4 */
|
/* Vulkan 1.4 */
|
||||||
features->pushDescriptor = true;
|
features->pushDescriptor = true;
|
||||||
|
|
||||||
|
/* VK_KHR_acceleration_structure */
|
||||||
|
features->accelerationStructure = true;
|
||||||
|
features->accelerationStructureCaptureReplay = pdevice->has_set_iova;
|
||||||
|
features->descriptorBindingAccelerationStructureUpdateAfterBind = true;
|
||||||
|
|
||||||
/* VK_KHR_compute_shader_derivatives */
|
/* VK_KHR_compute_shader_derivatives */
|
||||||
features->computeDerivativeGroupQuads = pdevice->info->chip >= 7;
|
features->computeDerivativeGroupQuads = pdevice->info->chip >= 7;
|
||||||
features->computeDerivativeGroupLinear = pdevice->info->chip >= 7;
|
features->computeDerivativeGroupLinear = pdevice->info->chip >= 7;
|
||||||
|
|
@ -1219,6 +1236,7 @@ tu_get_properties(struct tu_physical_device *pdevice,
|
||||||
COND(pdevice->info->a7xx.storage_8bit, 1));
|
COND(pdevice->info->a7xx.storage_8bit, 1));
|
||||||
props->robustStorageBufferDescriptorSize =
|
props->robustStorageBufferDescriptorSize =
|
||||||
props->storageBufferDescriptorSize;
|
props->storageBufferDescriptorSize;
|
||||||
|
props->accelerationStructureDescriptorSize = 4 * A6XX_TEX_CONST_DWORDS;
|
||||||
props->inputAttachmentDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
|
props->inputAttachmentDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
|
||||||
props->maxSamplerDescriptorBufferRange = ~0ull;
|
props->maxSamplerDescriptorBufferRange = ~0ull;
|
||||||
props->maxResourceDescriptorBufferRange = ~0ull;
|
props->maxResourceDescriptorBufferRange = ~0ull;
|
||||||
|
|
@ -1304,6 +1322,16 @@ tu_get_properties(struct tu_physical_device *pdevice,
|
||||||
|
|
||||||
memcpy(props->optimalTilingLayoutUUID, sha1, VK_UUID_SIZE);
|
memcpy(props->optimalTilingLayoutUUID, sha1, VK_UUID_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* VK_KHR_acceleration_structure */
|
||||||
|
props->maxGeometryCount = (1 << 24) - 1;
|
||||||
|
props->maxInstanceCount = (1 << 24) - 1;
|
||||||
|
props->maxPrimitiveCount = (1 << 29) - 1;
|
||||||
|
props->maxPerStageDescriptorAccelerationStructures = max_descriptor_set_size;
|
||||||
|
props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = max_descriptor_set_size;
|
||||||
|
props->maxDescriptorSetAccelerationStructures = max_descriptor_set_size;
|
||||||
|
props->maxDescriptorSetUpdateAfterBindAccelerationStructures = max_descriptor_set_size;
|
||||||
|
props->minAccelerationStructureScratchOffsetAlignment = 128;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
|
static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
|
||||||
|
|
@ -2416,6 +2444,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
||||||
}
|
}
|
||||||
|
|
||||||
device->vk.command_buffer_ops = &tu_cmd_buffer_ops;
|
device->vk.command_buffer_ops = &tu_cmd_buffer_ops;
|
||||||
|
device->vk.as_build_ops = &tu_as_build_ops;
|
||||||
device->vk.check_status = tu_device_check_status;
|
device->vk.check_status = tu_device_check_status;
|
||||||
device->vk.get_timestamp = tu_device_get_timestamp;
|
device->vk.get_timestamp = tu_device_get_timestamp;
|
||||||
|
|
||||||
|
|
@ -2479,6 +2508,10 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
goto fail_queues;
|
goto fail_queues;
|
||||||
|
|
||||||
|
util_sparse_array_init(&device->accel_struct_ranges, sizeof(VkDeviceSize), 256);
|
||||||
|
|
||||||
|
mtx_init(&device->radix_sort_mutex, mtx_plain);
|
||||||
|
|
||||||
{
|
{
|
||||||
struct ir3_compiler_options ir3_options = {
|
struct ir3_compiler_options ir3_options = {
|
||||||
.push_ubo_with_preamble = true,
|
.push_ubo_with_preamble = true,
|
||||||
|
|
@ -2555,6 +2588,15 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
||||||
device->global_bo_map = global;
|
device->global_bo_map = global;
|
||||||
tu_init_clear_blit_shaders(device);
|
tu_init_clear_blit_shaders(device);
|
||||||
|
|
||||||
|
if (device->vk.enabled_features.accelerationStructure &&
|
||||||
|
device->vk.enabled_features.nullDescriptor) {
|
||||||
|
result = tu_init_null_accel_struct(device);
|
||||||
|
if (result != VK_SUCCESS) {
|
||||||
|
vk_startup_errorf(device->instance, result, "null acceleration struct");
|
||||||
|
goto fail_null_accel_struct;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
result = tu_init_empty_shaders(device);
|
result = tu_init_empty_shaders(device);
|
||||||
if (result != VK_SUCCESS) {
|
if (result != VK_SUCCESS) {
|
||||||
vk_startup_errorf(device->instance, result, "empty shaders");
|
vk_startup_errorf(device->instance, result, "empty shaders");
|
||||||
|
|
@ -2738,6 +2780,9 @@ fail_pipeline_cache:
|
||||||
fail_dynamic_rendering:
|
fail_dynamic_rendering:
|
||||||
tu_destroy_empty_shaders(device);
|
tu_destroy_empty_shaders(device);
|
||||||
fail_empty_shaders:
|
fail_empty_shaders:
|
||||||
|
if (device->null_accel_struct_bo)
|
||||||
|
tu_bo_finish(device, device->null_accel_struct_bo);
|
||||||
|
fail_null_accel_struct:
|
||||||
tu_destroy_clear_blit_shaders(device);
|
tu_destroy_clear_blit_shaders(device);
|
||||||
fail_global_bo_map:
|
fail_global_bo_map:
|
||||||
TU_RMV(resource_destroy, device, device->global_bo);
|
TU_RMV(resource_destroy, device, device->global_bo);
|
||||||
|
|
@ -2752,6 +2797,7 @@ fail_free_zombie_vma:
|
||||||
u_vector_finish(&device->zombie_vmas);
|
u_vector_finish(&device->zombie_vmas);
|
||||||
ir3_compiler_destroy(device->compiler);
|
ir3_compiler_destroy(device->compiler);
|
||||||
fail_compiler:
|
fail_compiler:
|
||||||
|
util_sparse_array_finish(&device->accel_struct_ranges);
|
||||||
vk_meta_device_finish(&device->vk, &device->meta);
|
vk_meta_device_finish(&device->vk, &device->meta);
|
||||||
fail_queues:
|
fail_queues:
|
||||||
for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
|
for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
|
||||||
|
|
@ -2804,6 +2850,8 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
|
||||||
|
|
||||||
vk_meta_device_finish(&device->vk, &device->meta);
|
vk_meta_device_finish(&device->vk, &device->meta);
|
||||||
|
|
||||||
|
util_sparse_array_finish(&device->accel_struct_ranges);
|
||||||
|
|
||||||
ir3_compiler_destroy(device->compiler);
|
ir3_compiler_destroy(device->compiler);
|
||||||
|
|
||||||
vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
|
vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
|
||||||
|
|
@ -2832,6 +2880,9 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
|
||||||
|
|
||||||
tu_bo_finish(device, device->global_bo);
|
tu_bo_finish(device, device->global_bo);
|
||||||
|
|
||||||
|
if (device->null_accel_struct_bo)
|
||||||
|
tu_bo_finish(device, device->null_accel_struct_bo);
|
||||||
|
|
||||||
for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
|
for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
|
||||||
for (unsigned q = 0; q < device->queue_count[i]; q++)
|
for (unsigned q = 0; q < device->queue_count[i]; q++)
|
||||||
tu_queue_finish(&device->queues[i][q]);
|
tu_queue_finish(&device->queues[i][q]);
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,8 @@
|
||||||
#include "tu_suballoc.h"
|
#include "tu_suballoc.h"
|
||||||
#include "tu_util.h"
|
#include "tu_util.h"
|
||||||
|
|
||||||
|
#include "radix_sort/radix_sort_vk.h"
|
||||||
|
|
||||||
#include "common/freedreno_rd_output.h"
|
#include "common/freedreno_rd_output.h"
|
||||||
#include "util/vma.h"
|
#include "util/vma.h"
|
||||||
#include "util/u_vector.h"
|
#include "util/u_vector.h"
|
||||||
|
|
@ -290,6 +292,11 @@ struct tu_device
|
||||||
|
|
||||||
struct vk_meta_device meta;
|
struct vk_meta_device meta;
|
||||||
|
|
||||||
|
radix_sort_vk_t *radix_sort;
|
||||||
|
mtx_t radix_sort_mutex;
|
||||||
|
|
||||||
|
struct util_sparse_array accel_struct_ranges;
|
||||||
|
|
||||||
#define MIN_SCRATCH_BO_SIZE_LOG2 12 /* A page */
|
#define MIN_SCRATCH_BO_SIZE_LOG2 12 /* A page */
|
||||||
|
|
||||||
/* Currently the kernel driver uses a 32-bit GPU address space, but it
|
/* Currently the kernel driver uses a 32-bit GPU address space, but it
|
||||||
|
|
@ -306,6 +313,8 @@ struct tu_device
|
||||||
struct tu_bo *global_bo;
|
struct tu_bo *global_bo;
|
||||||
struct tu6_global *global_bo_map;
|
struct tu6_global *global_bo_map;
|
||||||
|
|
||||||
|
struct tu_bo *null_accel_struct_bo;
|
||||||
|
|
||||||
uint32_t implicit_sync_bo_count;
|
uint32_t implicit_sync_bo_count;
|
||||||
|
|
||||||
/* Device-global BO suballocator for reducing BO management overhead for
|
/* Device-global BO suballocator for reducing BO management overhead for
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@
|
||||||
#include "vk_android.h"
|
#include "vk_android.h"
|
||||||
#include "vk_enum_defines.h"
|
#include "vk_enum_defines.h"
|
||||||
#include "vk_util.h"
|
#include "vk_util.h"
|
||||||
|
#include "vk_acceleration_structure.h"
|
||||||
#include "drm-uapi/drm_fourcc.h"
|
#include "drm-uapi/drm_fourcc.h"
|
||||||
|
|
||||||
#include "tu_android.h"
|
#include "tu_android.h"
|
||||||
|
|
@ -270,6 +271,9 @@ tu_physical_device_get_format_properties(
|
||||||
if (vk_format == VK_FORMAT_R8_UINT)
|
if (vk_format == VK_FORMAT_R8_UINT)
|
||||||
optimal |= VK_FORMAT_FEATURE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
|
optimal |= VK_FORMAT_FEATURE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
|
||||||
|
|
||||||
|
if (vk_acceleration_struct_vtx_format_supported(vk_format))
|
||||||
|
buffer |= VK_FORMAT_FEATURE_2_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR;
|
||||||
|
|
||||||
end:
|
end:
|
||||||
out_properties->linearTilingFeatures = linear;
|
out_properties->linearTilingFeatures = linear;
|
||||||
out_properties->optimalTilingFeatures = optimal;
|
out_properties->optimalTilingFeatures = optimal;
|
||||||
|
|
|
||||||
|
|
@ -79,6 +79,7 @@ tu6_load_state_size(struct tu_pipeline *pipeline,
|
||||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
||||||
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
||||||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
||||||
|
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
|
||||||
/* IBO-backed resources only need one packet for all graphics stages */
|
/* IBO-backed resources only need one packet for all graphics stages */
|
||||||
if (stage_count)
|
if (stage_count)
|
||||||
count += 1;
|
count += 1;
|
||||||
|
|
@ -174,7 +175,8 @@ tu6_emit_load_state(struct tu_device *device,
|
||||||
FALLTHROUGH;
|
FALLTHROUGH;
|
||||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
||||||
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
||||||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: {
|
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
||||||
|
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
|
||||||
unsigned mul = binding->size / (A6XX_TEX_CONST_DWORDS * 4);
|
unsigned mul = binding->size / (A6XX_TEX_CONST_DWORDS * 4);
|
||||||
/* IBO-backed resources only need one packet for all graphics stages */
|
/* IBO-backed resources only need one packet for all graphics stages */
|
||||||
if (stages & ~VK_SHADER_STAGE_COMPUTE_BIT) {
|
if (stages & ~VK_SHADER_STAGE_COMPUTE_BIT) {
|
||||||
|
|
|
||||||
|
|
@ -13,9 +13,11 @@
|
||||||
#include "nir/nir_builder.h"
|
#include "nir/nir_builder.h"
|
||||||
#include "util/os_time.h"
|
#include "util/os_time.h"
|
||||||
|
|
||||||
|
#include "vk_acceleration_structure.h"
|
||||||
#include "vk_util.h"
|
#include "vk_util.h"
|
||||||
|
|
||||||
#include "tu_buffer.h"
|
#include "tu_buffer.h"
|
||||||
|
#include "bvh/tu_build_interface.h"
|
||||||
#include "tu_cmd_buffer.h"
|
#include "tu_cmd_buffer.h"
|
||||||
#include "tu_cs.h"
|
#include "tu_cs.h"
|
||||||
#include "tu_device.h"
|
#include "tu_device.h"
|
||||||
|
|
@ -92,6 +94,11 @@ struct PACKED primitives_generated_query_slot {
|
||||||
uint64_t end;
|
uint64_t end;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct PACKED accel_struct_slot {
|
||||||
|
struct query_slot common;
|
||||||
|
uint64_t result;
|
||||||
|
};
|
||||||
|
|
||||||
/* Returns the IOVA or mapped address of a given uint64_t field
|
/* Returns the IOVA or mapped address of a given uint64_t field
|
||||||
* in a given slot of a query pool. */
|
* in a given slot of a query pool. */
|
||||||
#define query_iova(type, pool, query, field) \
|
#define query_iova(type, pool, query, field) \
|
||||||
|
|
@ -251,6 +258,12 @@ tu_CreateQueryPool(VkDevice _device,
|
||||||
pool_size += sizeof(struct tu_perf_query_data) *
|
pool_size += sizeof(struct tu_perf_query_data) *
|
||||||
perf_query_info->counterIndexCount;
|
perf_query_info->counterIndexCount;
|
||||||
break;
|
break;
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
|
||||||
|
slot_size = sizeof(struct accel_struct_slot);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
||||||
slot_size = sizeof(struct pipeline_stat_query_slot);
|
slot_size = sizeof(struct pipeline_stat_query_slot);
|
||||||
|
|
@ -366,6 +379,10 @@ get_result_count(struct tu_query_pool *pool)
|
||||||
case VK_QUERY_TYPE_OCCLUSION:
|
case VK_QUERY_TYPE_OCCLUSION:
|
||||||
case VK_QUERY_TYPE_TIMESTAMP:
|
case VK_QUERY_TYPE_TIMESTAMP:
|
||||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
|
||||||
return 1;
|
return 1;
|
||||||
/* Transform feedback queries write two integer values */
|
/* Transform feedback queries write two integer values */
|
||||||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||||
|
|
@ -586,6 +603,10 @@ tu_GetQueryPoolResults(VkDevice _device,
|
||||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||||
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
||||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
|
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
|
||||||
return get_query_pool_results(device, pool, firstQuery, queryCount,
|
return get_query_pool_results(device, pool, firstQuery, queryCount,
|
||||||
dataSize, pData, stride, flags);
|
dataSize, pData, stride, flags);
|
||||||
default:
|
default:
|
||||||
|
|
@ -735,6 +756,10 @@ tu_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
|
||||||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||||
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
|
||||||
return emit_copy_query_pool_results<CHIP>(cmdbuf, cs, pool, firstQuery,
|
return emit_copy_query_pool_results<CHIP>(cmdbuf, cs, pool, firstQuery,
|
||||||
queryCount, buffer, dstOffset,
|
queryCount, buffer, dstOffset,
|
||||||
stride, flags);
|
stride, flags);
|
||||||
|
|
@ -802,6 +827,10 @@ tu_CmdResetQueryPool(VkCommandBuffer commandBuffer,
|
||||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||||
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
||||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
|
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
|
||||||
emit_reset_query_pool(cmdbuf, pool, firstQuery, queryCount);
|
emit_reset_query_pool(cmdbuf, pool, firstQuery, queryCount);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
@ -1710,6 +1739,56 @@ tu_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
|
||||||
handle_multiview_queries(cmd, pool, query);
|
handle_multiview_queries(cmd, pool, query);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VKAPI_ATTR void VKAPI_CALL
|
||||||
|
tu_CmdWriteAccelerationStructuresPropertiesKHR(VkCommandBuffer commandBuffer,
|
||||||
|
uint32_t accelerationStructureCount,
|
||||||
|
const VkAccelerationStructureKHR *pAccelerationStructures,
|
||||||
|
VkQueryType queryType,
|
||||||
|
VkQueryPool queryPool,
|
||||||
|
uint32_t firstQuery)
|
||||||
|
{
|
||||||
|
VK_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
|
||||||
|
VK_FROM_HANDLE(tu_query_pool, pool, queryPool);
|
||||||
|
|
||||||
|
struct tu_cs *cs = &cmd->cs;
|
||||||
|
|
||||||
|
/* Flush any AS builds */
|
||||||
|
tu_emit_cache_flush<A7XX>(cmd);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < accelerationStructureCount; ++i) {
|
||||||
|
uint32_t query = i + firstQuery;
|
||||||
|
|
||||||
|
VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, pAccelerationStructures[i]);
|
||||||
|
uint64_t va = vk_acceleration_structure_get_va(accel_struct);
|
||||||
|
|
||||||
|
switch (queryType) {
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
|
||||||
|
va += offsetof(struct tu_accel_struct_header, compacted_size);
|
||||||
|
break;
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
|
||||||
|
va += offsetof(struct tu_accel_struct_header, serialization_size);
|
||||||
|
break;
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
|
||||||
|
va += offsetof(struct tu_accel_struct_header, instance_count);
|
||||||
|
break;
|
||||||
|
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
|
||||||
|
va += offsetof(struct tu_accel_struct_header, size);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
unreachable("Unhandle accel struct query type.");
|
||||||
|
}
|
||||||
|
|
||||||
|
tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 5);
|
||||||
|
tu_cs_emit(cs, CP_MEM_TO_MEM_0_DOUBLE);
|
||||||
|
tu_cs_emit_qw(cs, query_result_iova(pool, query, uint64_t, 0));
|
||||||
|
tu_cs_emit_qw(cs, va);
|
||||||
|
|
||||||
|
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4);
|
||||||
|
tu_cs_emit_qw(cs, query_available_iova(pool, query));
|
||||||
|
tu_cs_emit_qw(cs, 0x1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
VKAPI_ATTR VkResult VKAPI_CALL
|
VKAPI_ATTR VkResult VKAPI_CALL
|
||||||
tu_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
|
tu_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
|
||||||
VkPhysicalDevice physicalDevice,
|
VkPhysicalDevice physicalDevice,
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,7 @@ static const struct debug_control tu_debug_options[] = {
|
||||||
{ "hiprio", TU_DEBUG_HIPRIO },
|
{ "hiprio", TU_DEBUG_HIPRIO },
|
||||||
{ "noconcurrentresolves", TU_DEBUG_NO_CONCURRENT_RESOLVES },
|
{ "noconcurrentresolves", TU_DEBUG_NO_CONCURRENT_RESOLVES },
|
||||||
{ "noconcurrentunresolves", TU_DEBUG_NO_CONCURRENT_UNRESOLVES },
|
{ "noconcurrentunresolves", TU_DEBUG_NO_CONCURRENT_UNRESOLVES },
|
||||||
|
{ "dumpas", TU_DEBUG_DUMPAS },
|
||||||
{ NULL, 0 }
|
{ NULL, 0 }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,7 @@ enum tu_debug_flags
|
||||||
TU_DEBUG_HIPRIO = 1 << 26,
|
TU_DEBUG_HIPRIO = 1 << 26,
|
||||||
TU_DEBUG_NO_CONCURRENT_RESOLVES = 1 << 27,
|
TU_DEBUG_NO_CONCURRENT_RESOLVES = 1 << 27,
|
||||||
TU_DEBUG_NO_CONCURRENT_UNRESOLVES = 1 << 28,
|
TU_DEBUG_NO_CONCURRENT_UNRESOLVES = 1 << 28,
|
||||||
|
TU_DEBUG_DUMPAS = 1 << 29,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct tu_env {
|
struct tu_env {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue