mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 04:58:05 +02:00
radv: Use the BVH8 format on GFX12
Reviewed-by: Natalie Vock <natalie.vock@gmx.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34273>
This commit is contained in:
parent
95e7343a7d
commit
97f6287827
26 changed files with 1948 additions and 204 deletions
|
|
@ -1461,6 +1461,8 @@ RADV driver environment variables
|
|||
Dump backend IR (ACO or LLVM) for selected shader stages.
|
||||
``asm``
|
||||
Dump shader disassembly for selected shader stages.
|
||||
``bvh4``
|
||||
Use bvh4 encoding on GPUs that support bvh8 encoding.
|
||||
|
||||
.. envvar:: RADV_FORCE_FAMILY
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,10 @@ TYPE(radv_bvh_aabb_node, 4);
|
|||
TYPE(radv_bvh_instance_node, 8);
|
||||
TYPE(radv_bvh_box16_node, 4);
|
||||
TYPE(radv_bvh_box32_node, 4);
|
||||
TYPE(radv_gfx12_box_node, 4);
|
||||
TYPE(radv_gfx12_instance_node, 8);
|
||||
TYPE(radv_gfx12_instance_node_user_data, 4);
|
||||
TYPE(radv_gfx12_primitive_node, 4);
|
||||
|
||||
uint32_t
|
||||
id_to_offset(uint32_t id)
|
||||
|
|
|
|||
|
|
@ -35,6 +35,16 @@ struct encode_args {
|
|||
uint32_t geometry_type;
|
||||
};
|
||||
|
||||
struct encode_gfx12_args {
|
||||
VOID_REF intermediate_bvh;
|
||||
VOID_REF output_base;
|
||||
REF(vk_ir_header) header;
|
||||
uint32_t output_bvh_offset;
|
||||
uint32_t leaf_node_offsets_offset;
|
||||
uint32_t leaf_node_count;
|
||||
uint32_t geometry_type;
|
||||
};
|
||||
|
||||
struct header_args {
|
||||
REF(vk_ir_header) src;
|
||||
REF(radv_accel_struct_header) dst;
|
||||
|
|
|
|||
|
|
@ -58,10 +58,12 @@ struct radv_accel_struct_header {
|
|||
uint64_t size;
|
||||
|
||||
/* Everything after this gets updated/copied from the CPU. */
|
||||
uint32_t geometry_type;
|
||||
uint32_t geometry_count;
|
||||
uint32_t primitive_base_indices_offset;
|
||||
uint64_t instance_offset;
|
||||
uint64_t instance_count;
|
||||
uint32_t leaf_node_offsets_offset;
|
||||
uint32_t build_flags;
|
||||
};
|
||||
|
||||
|
|
@ -114,4 +116,60 @@ struct radv_bvh_box32_node {
|
|||
#define RADV_BVH_ROOT_NODE radv_bvh_node_box32
|
||||
#define RADV_BVH_INVALID_NODE 0xffffffffu
|
||||
|
||||
/* GFX12 */
|
||||
|
||||
#define RADV_GFX12_BVH_NODE_SIZE 128
|
||||
|
||||
struct radv_gfx12_box_child {
|
||||
uint32_t dword0;
|
||||
uint32_t dword1;
|
||||
uint32_t dword2;
|
||||
};
|
||||
|
||||
#ifndef VULKAN
|
||||
typedef struct radv_gfx12_box_child radv_gfx12_box_child;
|
||||
#endif
|
||||
|
||||
struct radv_gfx12_box_node {
|
||||
uint32_t internal_base_id;
|
||||
uint32_t primitive_base_id;
|
||||
uint32_t unused;
|
||||
vec3 origin;
|
||||
uint32_t child_count_exponents;
|
||||
uint32_t obb_matrix_index;
|
||||
radv_gfx12_box_child children[8];
|
||||
};
|
||||
|
||||
struct radv_gfx12_instance_node {
|
||||
mat3x4 wto_matrix;
|
||||
uint64_t pointer_flags_bvh_addr;
|
||||
uint32_t unused;
|
||||
uint32_t cull_mask_user_data;
|
||||
vec3 origin;
|
||||
uint32_t child_count_exponents;
|
||||
radv_gfx12_box_child children[4];
|
||||
};
|
||||
|
||||
struct radv_gfx12_instance_node_user_data {
|
||||
mat3x4 otw_matrix;
|
||||
uint32_t custom_instance;
|
||||
uint32_t instance_index;
|
||||
uint32_t bvh_offset;
|
||||
uint32_t padding;
|
||||
uint64_t blas_addr;
|
||||
uint32_t primitive_base_indices_offset;
|
||||
uint32_t leaf_node_offsets_offset;
|
||||
uint32_t unused[12];
|
||||
};
|
||||
|
||||
/* Size of the primitive header section in bits. */
|
||||
#define RADV_GFX12_PRIMITIVE_NODE_HEADER_SIZE 52
|
||||
|
||||
/* Size of a primitive pair description in bits. */
|
||||
#define RADV_GFX12_PRIMITIVE_NODE_PAIR_DESC_SIZE 29
|
||||
|
||||
struct radv_gfx12_primitive_node {
|
||||
uint32_t dwords[32];
|
||||
};
|
||||
|
||||
#endif /* BVH_H */
|
||||
|
|
|
|||
|
|
@ -71,7 +71,10 @@ main(void)
|
|||
DEREF(REF(uvec4)(copy_src_addr + offset));
|
||||
|
||||
/* Do the adjustment inline in the same invocation that copies the data so that we don't have
|
||||
* to synchronize. */
|
||||
* to synchronize. This is only possible on pre-GFX12 HW because leaf nodes have a different
|
||||
* order on GFX12.
|
||||
*/
|
||||
#if !GFX12
|
||||
if (offset < node_end && offset >= node_offset &&
|
||||
(offset - node_offset) % SIZEOF(radv_bvh_instance_node) == 0) {
|
||||
uint64_t idx = (offset - node_offset) / SIZEOF(radv_bvh_instance_node);
|
||||
|
|
@ -85,5 +88,6 @@ main(void)
|
|||
DEREF(REF(radv_bvh_instance_node)(copy_dst_addr + offset)).bvh_ptr = addr_to_node(blas_addr + bvh_offset);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
|
|||
65
src/amd/vulkan/bvh/copy_blas_addrs_gfx12.comp
Normal file
65
src/amd/vulkan/bvh/copy_blas_addrs_gfx12.comp
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Copyright © 2022 Valve Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#version 460
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
|
||||
#extension GL_EXT_scalar_block_layout : require
|
||||
#extension GL_EXT_buffer_reference : require
|
||||
#extension GL_EXT_buffer_reference2 : require
|
||||
|
||||
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
#include "build_interface.h"
|
||||
|
||||
layout(push_constant) uniform CONSTS
|
||||
{
|
||||
copy_args args;
|
||||
};
|
||||
|
||||
void
|
||||
main(void)
|
||||
{
|
||||
uint32_t global_id = gl_GlobalInvocationID.x;
|
||||
uint32_t total_invocations = gl_NumWorkGroups.x * 64;
|
||||
|
||||
uint64_t accel_struct_addr = args.mode == RADV_COPY_MODE_SERIALIZE ? args.src_addr : args.dst_addr;
|
||||
uint64_t serialized_addr = args.mode == RADV_COPY_MODE_SERIALIZE ? args.dst_addr : args.src_addr;
|
||||
|
||||
uint64_t blas_addrs = serialized_addr + SIZEOF(radv_accel_struct_serialization_header);
|
||||
|
||||
radv_accel_struct_serialization_header serialization_header =
|
||||
DEREF(REF(radv_accel_struct_serialization_header)(serialized_addr));
|
||||
|
||||
radv_accel_struct_header header = DEREF(REF(radv_accel_struct_header)(accel_struct_addr));
|
||||
|
||||
for (uint32_t i = global_id; i < serialization_header.instance_count; i += total_invocations) {
|
||||
uint64_t instance_offset_addr = accel_struct_addr + (header.leaf_node_offsets_offset + i * 4);
|
||||
uint64_t instance_addr = accel_struct_addr + (header.bvh_offset + DEREF(REF(uint32_t)(instance_offset_addr)));
|
||||
REF(radv_gfx12_instance_node) instance_node = REF(radv_gfx12_instance_node)(instance_addr);
|
||||
REF(radv_gfx12_instance_node_user_data) instance_data =
|
||||
REF(radv_gfx12_instance_node_user_data)(instance_addr + SIZEOF(radv_gfx12_instance_node));
|
||||
|
||||
if (args.mode == RADV_COPY_MODE_SERIALIZE) {
|
||||
DEREF(INDEX(uint64_t, blas_addrs, i)) = DEREF(instance_data).blas_addr;
|
||||
} else {
|
||||
uint32_t bvh_offset = DEREF(instance_data).bvh_offset;
|
||||
|
||||
/* Replace the address while keeping the pointer flags. */
|
||||
uint64_t pointer_flags_bvh_addr = DEREF(instance_node).pointer_flags_bvh_addr;
|
||||
uint64_t blas_addr = DEREF(INDEX(uint64_t, blas_addrs, i));
|
||||
DEREF(instance_node).pointer_flags_bvh_addr =
|
||||
(pointer_flags_bvh_addr & 0xFFC0000000000000ul) | addr_to_node(blas_addr + bvh_offset);
|
||||
DEREF(instance_data).blas_addr = blas_addr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -52,4 +52,273 @@ radv_encode_instance_gfx10_3(VOID_REF dst_addr, vk_ir_instance_node src)
|
|||
DEREF(dst).instance_id = src.instance_id;
|
||||
}
|
||||
|
||||
struct bit_writer {
|
||||
uint64_t addr;
|
||||
uint32_t offset;
|
||||
uint32_t temp;
|
||||
uint32_t count;
|
||||
uint32_t total_count;
|
||||
};
|
||||
|
||||
void
|
||||
bit_writer_init(out bit_writer writer, uint64_t addr)
|
||||
{
|
||||
writer.addr = addr;
|
||||
writer.offset = 0;
|
||||
writer.temp = 0;
|
||||
writer.count = 0;
|
||||
writer.total_count = 0;
|
||||
}
|
||||
|
||||
void
|
||||
bit_writer_write(inout bit_writer writer, uint32_t data, uint32_t bit_size)
|
||||
{
|
||||
writer.total_count += bit_size;
|
||||
|
||||
if (writer.count + bit_size >= 32) {
|
||||
writer.temp = writer.temp | (data << writer.count);
|
||||
|
||||
REF(uint32_t) dst = REF(uint32_t)(writer.addr + writer.offset);
|
||||
DEREF(dst) = writer.temp;
|
||||
writer.offset += 4;
|
||||
|
||||
bit_size = bit_size - (32 - writer.count);
|
||||
if (writer.count == 0)
|
||||
data = 0;
|
||||
else
|
||||
data = data >> (32 - writer.count);
|
||||
|
||||
writer.temp = 0;
|
||||
writer.count = 0;
|
||||
}
|
||||
|
||||
writer.temp = writer.temp | (data << writer.count);
|
||||
writer.count += bit_size;
|
||||
}
|
||||
|
||||
void
|
||||
bit_writer_skip_to(inout bit_writer writer, uint32_t target)
|
||||
{
|
||||
/* Flush the remaining data. */
|
||||
if (writer.count > 0) {
|
||||
REF(uint32_t) dst = REF(uint32_t)(writer.addr + writer.offset);
|
||||
DEREF(dst) = writer.temp;
|
||||
}
|
||||
|
||||
writer.count = target % 32;
|
||||
writer.total_count = target;
|
||||
writer.offset = (target / 32) * 4;
|
||||
}
|
||||
|
||||
void
|
||||
bit_writer_finish(inout bit_writer writer)
|
||||
{
|
||||
/* Flush the remaining data. */
|
||||
if (writer.count > 0) {
|
||||
REF(uint32_t) dst = REF(uint32_t)(writer.addr + writer.offset);
|
||||
DEREF(dst) = writer.temp;
|
||||
}
|
||||
|
||||
writer.temp = 0;
|
||||
writer.count = 0;
|
||||
writer.total_count = 0;
|
||||
}
|
||||
|
||||
void
|
||||
radv_encode_triangle_gfx12(VOID_REF dst, vk_ir_triangle_node src)
|
||||
{
|
||||
bit_writer child_writer;
|
||||
bit_writer_init(child_writer, dst);
|
||||
|
||||
bit_writer_write(child_writer, 31, 5); /* x_vertex_bits_minus_one */
|
||||
bit_writer_write(child_writer, 31, 5); /* y_vertex_bits_minus_one */
|
||||
bit_writer_write(child_writer, 31, 5); /* z_vertex_bits_minus_one */
|
||||
bit_writer_write(child_writer, 0, 5); /* trailing_zero_bits */
|
||||
bit_writer_write(child_writer, 14, 4); /* geometry_index_base_bits_div_2 */
|
||||
bit_writer_write(child_writer, 14, 4); /* geometry_index_bits_div_2 */
|
||||
bit_writer_write(child_writer, 0, 3); /* triangle_pair_count_minus_one */
|
||||
bit_writer_write(child_writer, 0, 1); /* vertex_type */
|
||||
bit_writer_write(child_writer, 28, 5); /* primitive_index_base_bits */
|
||||
bit_writer_write(child_writer, 28, 5); /* primitive_index_bits */
|
||||
/* header + 9 floats + geometry_id */
|
||||
bit_writer_write(child_writer, RADV_GFX12_PRIMITIVE_NODE_HEADER_SIZE + 9 * 32 + 28, 10);
|
||||
|
||||
bit_writer_write(child_writer, floatBitsToUint(src.coords[0][0]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(src.coords[0][1]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(src.coords[0][2]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(src.coords[1][0]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(src.coords[1][1]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(src.coords[1][2]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(src.coords[2][0]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(src.coords[2][1]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(src.coords[2][2]), 32);
|
||||
|
||||
bit_writer_write(child_writer, src.geometry_id_and_flags & 0xfffffff, 28);
|
||||
bit_writer_write(child_writer, src.triangle_id, 28);
|
||||
|
||||
bit_writer_skip_to(child_writer, 32 * 32 - RADV_GFX12_PRIMITIVE_NODE_PAIR_DESC_SIZE);
|
||||
|
||||
uint32_t opaque = (src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE) != 0 ? 1 : 0;
|
||||
|
||||
bit_writer_write(child_writer, 1, 1); /* prim_range_stop */
|
||||
bit_writer_write(child_writer, 0, 1); /* tri1_double_sided */
|
||||
bit_writer_write(child_writer, 0, 1); /* tri1_opaque */
|
||||
bit_writer_write(child_writer, 0, 4); /* tri1_v0_index */
|
||||
bit_writer_write(child_writer, 0, 4); /* tri1_v1_index */
|
||||
bit_writer_write(child_writer, 0, 4); /* tri1_v2_index */
|
||||
bit_writer_write(child_writer, 0, 1); /* tri0_double_sided */
|
||||
bit_writer_write(child_writer, opaque, 1); /* tri0_opaque */
|
||||
bit_writer_write(child_writer, 0, 4); /* tri0_v0_index */
|
||||
bit_writer_write(child_writer, 1, 4); /* tri0_v1_index */
|
||||
bit_writer_write(child_writer, 2, 4); /* tri0_v2_index */
|
||||
|
||||
bit_writer_finish(child_writer);
|
||||
}
|
||||
|
||||
void
|
||||
radv_encode_aabb_gfx12(VOID_REF dst, vk_ir_aabb_node src)
|
||||
{
|
||||
bit_writer child_writer;
|
||||
bit_writer_init(child_writer, dst);
|
||||
|
||||
bit_writer_write(child_writer, 0, 5); /* x_vertex_bits_minus_one */
|
||||
bit_writer_write(child_writer, 0, 5); /* y_vertex_bits_minus_one */
|
||||
bit_writer_write(child_writer, 0, 5); /* z_vertex_bits_minus_one */
|
||||
bit_writer_write(child_writer, 0, 5); /* trailing_zero_bits */
|
||||
bit_writer_write(child_writer, 14, 4); /* geometry_index_base_bits_div_2 */
|
||||
bit_writer_write(child_writer, 14, 4); /* geometry_index_bits_div_2 */
|
||||
bit_writer_write(child_writer, 0, 3); /* triangle_pair_count_minus_one */
|
||||
bit_writer_write(child_writer, 0, 1); /* vertex_type */
|
||||
bit_writer_write(child_writer, 28, 5); /* primitive_index_base_bits */
|
||||
bit_writer_write(child_writer, 28, 5); /* primitive_index_bits */
|
||||
/* header + 6 floats + geometry_id */
|
||||
bit_writer_write(child_writer, RADV_GFX12_PRIMITIVE_NODE_HEADER_SIZE + 6 * 32 + 28, 10);
|
||||
|
||||
bit_writer_write(child_writer, floatBitsToUint(src.base.aabb.min.x), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(src.base.aabb.min.y), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(src.base.aabb.min.z), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(src.base.aabb.max.x), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(src.base.aabb.max.y), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(src.base.aabb.max.z), 32);
|
||||
|
||||
bit_writer_write(child_writer, src.geometry_id_and_flags & 0xfffffff, 28);
|
||||
bit_writer_write(child_writer, src.primitive_id, 28);
|
||||
|
||||
bit_writer_skip_to(child_writer, 32 * 32 - RADV_GFX12_PRIMITIVE_NODE_PAIR_DESC_SIZE);
|
||||
|
||||
uint32_t opaque = (src.geometry_id_and_flags & VK_GEOMETRY_OPAQUE) != 0 ? 1 : 0;
|
||||
|
||||
bit_writer_write(child_writer, 1, 1); /* prim_range_stop */
|
||||
bit_writer_write(child_writer, 0, 1); /* tri1_double_sided */
|
||||
bit_writer_write(child_writer, 0, 1); /* tri1_opaque */
|
||||
bit_writer_write(child_writer, 0, 4); /* tri1_v0_index */
|
||||
bit_writer_write(child_writer, 0, 4); /* tri1_v1_index */
|
||||
bit_writer_write(child_writer, 0, 4); /* tri1_v2_index */
|
||||
bit_writer_write(child_writer, 0, 1); /* tri0_double_sided */
|
||||
bit_writer_write(child_writer, opaque, 1); /* tri0_opaque */
|
||||
bit_writer_write(child_writer, 0xf, 4); /* tri0_v0_index */
|
||||
bit_writer_write(child_writer, 0xf, 4); /* tri0_v1_index */
|
||||
bit_writer_write(child_writer, 0, 4); /* tri0_v2_index */
|
||||
|
||||
bit_writer_finish(child_writer);
|
||||
}
|
||||
|
||||
/* Writes both the HW node and user data. */
|
||||
void
|
||||
radv_encode_instance_gfx12(VOID_REF dst, vk_ir_instance_node src)
|
||||
{
|
||||
bit_writer child_writer;
|
||||
bit_writer_init(child_writer, dst);
|
||||
|
||||
radv_accel_struct_header blas_header = DEREF(REF(radv_accel_struct_header)(src.base_ptr));
|
||||
|
||||
mat4 transform = mat4(src.otw_matrix);
|
||||
mat4 wto_matrix = transpose(inverse(transpose(transform)));
|
||||
|
||||
bit_writer_write(child_writer, floatBitsToUint(wto_matrix[0][0]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(wto_matrix[0][1]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(wto_matrix[0][2]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(wto_matrix[0][3]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(wto_matrix[1][0]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(wto_matrix[1][1]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(wto_matrix[1][2]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(wto_matrix[1][3]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(wto_matrix[2][0]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(wto_matrix[2][1]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(wto_matrix[2][2]), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(wto_matrix[2][3]), 32);
|
||||
|
||||
uint32_t flags = src.sbt_offset_and_flags >> 24;
|
||||
uint32_t instance_pointer_flags = 0;
|
||||
if ((flags & VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR) != 0)
|
||||
instance_pointer_flags |= 1;
|
||||
if ((flags & VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR) != 0)
|
||||
instance_pointer_flags |= 2;
|
||||
if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR) != 0 ||
|
||||
blas_header.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR)
|
||||
instance_pointer_flags |= 4;
|
||||
if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR) != 0)
|
||||
instance_pointer_flags |= 8;
|
||||
|
||||
if (blas_header.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR)
|
||||
instance_pointer_flags |= 512;
|
||||
else
|
||||
instance_pointer_flags |= 256;
|
||||
|
||||
uint64_t bvh_addr = addr_to_node(src.base_ptr + blas_header.bvh_offset);
|
||||
bit_writer_write(child_writer, uint32_t(bvh_addr & 0xffffffff), 32);
|
||||
bit_writer_write(child_writer, uint32_t(bvh_addr >> 32) | (instance_pointer_flags << (54 - 32)), 32);
|
||||
bit_writer_write(child_writer, src.custom_instance_and_mask & 0xffffff, 32);
|
||||
bit_writer_write(child_writer, src.sbt_offset_and_flags & 0xffffff, 24);
|
||||
bit_writer_write(child_writer, src.custom_instance_and_mask >> 24, 8);
|
||||
|
||||
bit_writer_write(child_writer, floatBitsToUint(blas_header.aabb.min.x), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(blas_header.aabb.min.y), 32);
|
||||
bit_writer_write(child_writer, floatBitsToUint(blas_header.aabb.min.z), 32);
|
||||
|
||||
vec3 child_extent = blas_header.aabb.max - blas_header.aabb.min;
|
||||
uvec3 child_extent_exponents = uvec3(ceil(clamp(log2(child_extent) + 127.0, vec3(0.0), vec3(255))));
|
||||
|
||||
bit_writer_write(child_writer, child_extent_exponents.x, 8);
|
||||
bit_writer_write(child_writer, child_extent_exponents.y, 8);
|
||||
bit_writer_write(child_writer, child_extent_exponents.z, 8);
|
||||
bit_writer_write(child_writer, 0, 4);
|
||||
bit_writer_write(child_writer, 0, 4);
|
||||
|
||||
bit_writer_write(child_writer, 0, 12);
|
||||
bit_writer_write(child_writer, 0, 12);
|
||||
bit_writer_write(child_writer, 4, 8);
|
||||
bit_writer_write(child_writer, 0, 12);
|
||||
bit_writer_write(child_writer, 0xfff, 12);
|
||||
bit_writer_write(child_writer, 0xff, 8);
|
||||
bit_writer_write(child_writer, 0xfff, 12);
|
||||
bit_writer_write(child_writer, 0xfff, 12);
|
||||
bit_writer_write(child_writer, radv_bvh_node_box32, 4);
|
||||
bit_writer_write(child_writer, 1, 4);
|
||||
|
||||
for (uint32_t remaining_child_index = 0; remaining_child_index < 3; remaining_child_index++) {
|
||||
bit_writer_write(child_writer, 0xfff, 12);
|
||||
bit_writer_write(child_writer, 0xfff, 12);
|
||||
bit_writer_write(child_writer, 0xff, 8);
|
||||
bit_writer_write(child_writer, 0xfff, 12);
|
||||
bit_writer_write(child_writer, 0, 12);
|
||||
bit_writer_write(child_writer, 0, 8);
|
||||
bit_writer_write(child_writer, 0, 12);
|
||||
bit_writer_write(child_writer, 0, 12);
|
||||
bit_writer_write(child_writer, 0, 8);
|
||||
}
|
||||
|
||||
bit_writer_finish(child_writer);
|
||||
|
||||
REF(radv_gfx12_instance_node_user_data) user_data =
|
||||
REF(radv_gfx12_instance_node_user_data)(dst + RADV_GFX12_BVH_NODE_SIZE);
|
||||
DEREF(user_data).otw_matrix = src.otw_matrix;
|
||||
DEREF(user_data).custom_instance = src.custom_instance_and_mask & 0xffffff;
|
||||
DEREF(user_data).instance_index = src.instance_id;
|
||||
DEREF(user_data).bvh_offset = blas_header.bvh_offset;
|
||||
DEREF(user_data).blas_addr = src.base_ptr;
|
||||
DEREF(user_data).primitive_base_indices_offset = blas_header.primitive_base_indices_offset;
|
||||
DEREF(user_data).leaf_node_offsets_offset = blas_header.leaf_node_offsets_offset;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
275
src/amd/vulkan/bvh/encode_gfx12.comp
Normal file
275
src/amd/vulkan/bvh/encode_gfx12.comp
Normal file
|
|
@ -0,0 +1,275 @@
|
|||
/*
|
||||
* Copyright © 2022 Friedrich Vock
|
||||
* Copyright © 2025 Valve Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#version 460
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
|
||||
#extension GL_EXT_scalar_block_layout : require
|
||||
#extension GL_EXT_buffer_reference : require
|
||||
#extension GL_EXT_buffer_reference2 : require
|
||||
#extension GL_KHR_memory_scope_semantics : require
|
||||
|
||||
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
#define GFX12
|
||||
|
||||
#include "build_helpers.h"
|
||||
#include "build_interface.h"
|
||||
#include "encode.h"
|
||||
|
||||
layout(push_constant) uniform CONSTS
|
||||
{
|
||||
encode_gfx12_args args;
|
||||
};
|
||||
|
||||
void
|
||||
set_parent(uint32_t child, uint32_t parent)
|
||||
{
|
||||
uint64_t addr = args.output_base + args.output_bvh_offset - child / 16 * 4 - 4;
|
||||
DEREF(REF(uint32_t)(addr)) = parent;
|
||||
}
|
||||
|
||||
void
|
||||
main()
|
||||
{
|
||||
if (gl_GlobalInvocationID.x >= DEREF(args.header).ir_internal_node_count)
|
||||
return;
|
||||
|
||||
/* Revert the order so we start at the root */
|
||||
uint32_t global_id = DEREF(args.header).ir_internal_node_count - 1 - gl_GlobalInvocationID.x;
|
||||
|
||||
uint32_t ir_leaf_node_size;
|
||||
switch (args.geometry_type) {
|
||||
case VK_GEOMETRY_TYPE_TRIANGLES_KHR: {
|
||||
ir_leaf_node_size = SIZEOF(vk_ir_triangle_node);
|
||||
break;
|
||||
}
|
||||
case VK_GEOMETRY_TYPE_AABBS_KHR: {
|
||||
ir_leaf_node_size = SIZEOF(vk_ir_aabb_node);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
/* instances */
|
||||
ir_leaf_node_size = SIZEOF(vk_ir_instance_node);
|
||||
break;
|
||||
}
|
||||
|
||||
uint32_t intermediate_leaf_nodes_size = args.leaf_node_count * ir_leaf_node_size;
|
||||
uint32_t dst_internal_offset = id_to_offset(RADV_BVH_ROOT_NODE);
|
||||
|
||||
REF(vk_ir_box_node) intermediate_internal_nodes =
|
||||
REF(vk_ir_box_node) OFFSET(args.intermediate_bvh, intermediate_leaf_nodes_size);
|
||||
REF(vk_ir_box_node) src_node = INDEX(vk_ir_box_node, intermediate_internal_nodes, global_id);
|
||||
vk_ir_box_node src = DEREF(src_node);
|
||||
|
||||
bool is_root_node = global_id == DEREF(args.header).ir_internal_node_count - 1;
|
||||
|
||||
for (;;) {
|
||||
/* Make changes to the current node's BVH offset value visible. */
|
||||
memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
|
||||
gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
|
||||
|
||||
uint32_t bvh_offset = is_root_node ? id_to_offset(RADV_BVH_ROOT_NODE) : DEREF(src_node).bvh_offset;
|
||||
if (bvh_offset == VK_UNKNOWN_BVH_OFFSET)
|
||||
continue;
|
||||
|
||||
if (bvh_offset == VK_NULL_BVH_OFFSET)
|
||||
break;
|
||||
|
||||
REF(radv_gfx12_box_node) dst = REF(radv_gfx12_box_node)(args.output_base + (args.output_bvh_offset + bvh_offset));
|
||||
|
||||
uint32_t node_id = pack_node_id(bvh_offset, radv_bvh_node_box32);
|
||||
|
||||
uint32_t children[8];
|
||||
|
||||
uint32_t found_child_count = 0;
|
||||
for (uint32_t i = 0; i < 2; i++) {
|
||||
if (src.children[i] != RADV_BVH_INVALID_NODE) {
|
||||
children[found_child_count] = src.children[i];
|
||||
found_child_count++;
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: Collapse child nodes with high SAH values. */
|
||||
while (found_child_count < 8) {
|
||||
bool progress = false;
|
||||
for (int32_t i = 0; i < found_child_count; i++) {
|
||||
uint32_t child_id = children[i];
|
||||
if (ir_id_to_type(child_id) != vk_ir_node_internal)
|
||||
continue;
|
||||
|
||||
progress = true;
|
||||
|
||||
REF(vk_ir_box_node) child_node =
|
||||
REF(vk_ir_box_node) OFFSET(args.intermediate_bvh, ir_id_to_offset(child_id));
|
||||
uint32_t grandchildren[2] = DEREF(child_node).children;
|
||||
uint32_t valid_grandchild_count = 0;
|
||||
|
||||
if (grandchildren[1] != RADV_BVH_INVALID_NODE)
|
||||
valid_grandchild_count++;
|
||||
|
||||
if (grandchildren[0] != RADV_BVH_INVALID_NODE)
|
||||
valid_grandchild_count++;
|
||||
else
|
||||
grandchildren[0] = grandchildren[1];
|
||||
|
||||
if (valid_grandchild_count > 1) {
|
||||
children[found_child_count] = grandchildren[1];
|
||||
found_child_count++;
|
||||
}
|
||||
|
||||
if (valid_grandchild_count > 0) {
|
||||
children[i] = grandchildren[0];
|
||||
} else {
|
||||
found_child_count--;
|
||||
children[i] = children[found_child_count];
|
||||
}
|
||||
|
||||
DEREF(child_node).bvh_offset = VK_NULL_BVH_OFFSET;
|
||||
|
||||
if (found_child_count == 8)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!progress)
|
||||
break;
|
||||
}
|
||||
|
||||
uint32_t child_leaf_nodes_size = 0;
|
||||
uint32_t child_internal_nodes_size = 0;
|
||||
for (uint32_t i = 0; i < found_child_count; i++) {
|
||||
uint32_t type = ir_id_to_type(children[i]);
|
||||
if (type == vk_ir_node_internal)
|
||||
child_internal_nodes_size += RADV_GFX12_BVH_NODE_SIZE;
|
||||
else if (type == vk_ir_node_instance)
|
||||
child_leaf_nodes_size += 2 * RADV_GFX12_BVH_NODE_SIZE;
|
||||
else
|
||||
child_leaf_nodes_size += RADV_GFX12_BVH_NODE_SIZE;
|
||||
}
|
||||
|
||||
uint32_t dst_internal_offset = atomicAdd(DEREF(args.header).dst_node_offset, child_internal_nodes_size);
|
||||
uint32_t dst_leaf_offset = atomicAdd(DEREF(args.header).dst_leaf_node_offset, child_leaf_nodes_size);
|
||||
|
||||
vec3 origin = src.base.aabb.min;
|
||||
vec3 extent = src.base.aabb.max - src.base.aabb.min;
|
||||
|
||||
extent = uintBitsToFloat((floatBitsToUint(extent) + uvec3(0x7fffff)) & 0x7f800000);
|
||||
uvec3 extent_exponents = floatBitsToUint(extent) >> 23;
|
||||
|
||||
DEREF(dst).internal_base_id = pack_node_id(dst_internal_offset, 0);
|
||||
DEREF(dst).primitive_base_id = pack_node_id(dst_leaf_offset, 0);
|
||||
DEREF(dst).origin = origin;
|
||||
DEREF(dst).child_count_exponents =
|
||||
extent_exponents.x | (extent_exponents.y << 8) | (extent_exponents.z << 16) | ((found_child_count - 1) << 28);
|
||||
DEREF(dst).obb_matrix_index = 0x7f;
|
||||
|
||||
for (uint32_t i = 0; i < found_child_count; i++) {
|
||||
uint32_t child_id = children[i];
|
||||
uint32_t type = ir_id_to_type(child_id);
|
||||
uint32_t offset = ir_id_to_offset(child_id);
|
||||
|
||||
uint32_t child_node_size_128b = 1;
|
||||
uint32_t encoded_type = 0;
|
||||
uint32_t dst_offset = 0;
|
||||
uint32_t cull_mask = 0xff;
|
||||
if (type == vk_ir_node_internal) {
|
||||
encoded_type = 5;
|
||||
dst_offset = dst_internal_offset;
|
||||
|
||||
REF(vk_ir_box_node) child_node = REF(vk_ir_box_node) OFFSET(args.intermediate_bvh, offset);
|
||||
DEREF(child_node).bvh_offset = dst_internal_offset;
|
||||
|
||||
dst_internal_offset += RADV_GFX12_BVH_NODE_SIZE;
|
||||
} else {
|
||||
dst_offset = dst_leaf_offset;
|
||||
|
||||
/* Write leaf node offset. */
|
||||
uint32_t child_index = offset / ir_leaf_node_size;
|
||||
REF(uint32_t) child_dst_offset = REF(uint32_t)(args.output_base + args.leaf_node_offsets_offset);
|
||||
child_dst_offset = INDEX(uint32_t, child_dst_offset, child_index);
|
||||
DEREF(child_dst_offset) = dst_offset;
|
||||
|
||||
VOID_REF dst_leaf_addr = args.output_base + args.output_bvh_offset + dst_leaf_offset;
|
||||
|
||||
switch (args.geometry_type) {
|
||||
case VK_GEOMETRY_TYPE_TRIANGLES_KHR: {
|
||||
vk_ir_triangle_node src_node = DEREF(REF(vk_ir_triangle_node)(OFFSET(args.intermediate_bvh, offset)));
|
||||
radv_encode_triangle_gfx12(dst_leaf_addr, src_node);
|
||||
dst_leaf_offset += RADV_GFX12_BVH_NODE_SIZE;
|
||||
break;
|
||||
}
|
||||
case VK_GEOMETRY_TYPE_AABBS_KHR: {
|
||||
vk_ir_aabb_node src_node = DEREF(REF(vk_ir_aabb_node)(OFFSET(args.intermediate_bvh, offset)));
|
||||
radv_encode_aabb_gfx12(dst_leaf_addr, src_node);
|
||||
dst_leaf_offset += RADV_GFX12_BVH_NODE_SIZE;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
/* instances */
|
||||
encoded_type = 6;
|
||||
child_node_size_128b = 2;
|
||||
|
||||
vk_ir_instance_node src_node = DEREF(REF(vk_ir_instance_node)(OFFSET(args.intermediate_bvh, offset)));
|
||||
radv_encode_instance_gfx12(dst_leaf_addr, src_node);
|
||||
|
||||
cull_mask = src_node.custom_instance_and_mask >> 24;
|
||||
|
||||
dst_leaf_offset += 2 * RADV_GFX12_BVH_NODE_SIZE;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
vk_aabb child_aabb = DEREF(REF(vk_ir_node) OFFSET(args.intermediate_bvh, offset)).aabb;
|
||||
|
||||
radv_gfx12_box_child child;
|
||||
/* TODO: subtree flags culling */
|
||||
child.dword0 = min(uint32_t(floor((child_aabb.min.x - origin.x) / extent.x * float(0x1000))), 0xfff) |
|
||||
(min(uint32_t(floor((child_aabb.min.y - origin.y) / extent.y * float(0x1000))), 0xfff) << 12);
|
||||
/* TODO: subtree mask culling */
|
||||
child.dword1 =
|
||||
min(uint32_t(floor((child_aabb.min.z - origin.z) / extent.z * float(0x1000))), 0xfff) |
|
||||
(min(uint32_t(ceil((child_aabb.max.x - origin.x) / extent.x * float(0x1000))) - 1, 0xfff) << 12) |
|
||||
(cull_mask << 24);
|
||||
child.dword2 =
|
||||
min(uint32_t(ceil((child_aabb.max.y - origin.y) / extent.y * float(0x1000))) - 1, 0xfff) |
|
||||
(min(uint32_t(ceil((child_aabb.max.z - origin.z) / extent.z * float(0x1000))) - 1, 0xfff) << 12) |
|
||||
(encoded_type << 24) | (child_node_size_128b << 28);
|
||||
DEREF(dst).children[i] = child;
|
||||
|
||||
set_parent(pack_node_id(dst_offset, encoded_type), node_id);
|
||||
}
|
||||
|
||||
/* Set remaining children to invalid */
|
||||
for (uint32_t i = found_child_count; i < 8; i++) {
|
||||
radv_gfx12_box_child null_child;
|
||||
null_child.dword0 = 0xffffffff;
|
||||
null_child.dword1 = 0xfff;
|
||||
null_child.dword2 = 0;
|
||||
DEREF(dst).children[i] = null_child;
|
||||
}
|
||||
|
||||
/* Make changes to the children's BVH offset value available to the other invocations. */
|
||||
memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
|
||||
gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_root_node) {
|
||||
REF(radv_accel_struct_header) header = REF(radv_accel_struct_header)(args.output_base);
|
||||
DEREF(header).aabb = src.base.aabb;
|
||||
DEREF(header).bvh_offset = args.output_bvh_offset;
|
||||
|
||||
set_parent(RADV_BVH_ROOT_NODE, RADV_BVH_INVALID_NODE);
|
||||
}
|
||||
}
|
||||
|
|
@ -3,9 +3,24 @@
|
|||
|
||||
# source file, output name, defines
|
||||
bvh_shaders = [
|
||||
[
|
||||
'copy_blas_addrs_gfx12.comp',
|
||||
'copy_blas_addrs_gfx12',
|
||||
[],
|
||||
],
|
||||
[
|
||||
'copy.comp',
|
||||
'copy',
|
||||
['GFX12=0'],
|
||||
],
|
||||
[
|
||||
'copy.comp',
|
||||
'copy_gfx12',
|
||||
['GFX12=1'],
|
||||
],
|
||||
[
|
||||
'encode_gfx12.comp',
|
||||
'encode_gfx12',
|
||||
[],
|
||||
],
|
||||
[
|
||||
|
|
@ -28,6 +43,11 @@ bvh_shaders = [
|
|||
'update',
|
||||
[],
|
||||
],
|
||||
[
|
||||
'update_gfx12.comp',
|
||||
'update_gfx12',
|
||||
[],
|
||||
],
|
||||
[
|
||||
'leaf.comp',
|
||||
'radv_leaf',
|
||||
|
|
|
|||
|
|
@ -57,10 +57,10 @@ void main() {
|
|||
vk_aabb bounds;
|
||||
bool is_active;
|
||||
if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) {
|
||||
is_active = radv_build_triangle(bounds, dst_ptr, args.geom_data, gl_GlobalInvocationID.x);
|
||||
is_active = radv_build_triangle(bounds, dst_ptr, args.geom_data, gl_GlobalInvocationID.x, false);
|
||||
} else {
|
||||
VOID_REF src_ptr = OFFSET(args.geom_data.data, src_offset);
|
||||
is_active = radv_build_aabb(bounds, src_ptr, dst_ptr, args.geom_data.geometry_id, gl_GlobalInvocationID.x);
|
||||
is_active = radv_build_aabb(bounds, src_ptr, dst_ptr, args.geom_data.geometry_id, gl_GlobalInvocationID.x, false);
|
||||
}
|
||||
|
||||
if (!is_active)
|
||||
|
|
|
|||
|
|
@ -11,7 +11,8 @@
|
|||
#include "encode.h"
|
||||
|
||||
bool
|
||||
radv_build_triangle(inout vk_aabb bounds, VOID_REF dst_ptr, vk_bvh_geometry_data geom_data, uint32_t global_id)
|
||||
radv_build_triangle(inout vk_aabb bounds, VOID_REF dst_ptr, vk_bvh_geometry_data geom_data, uint32_t global_id,
|
||||
bool gfx12)
|
||||
{
|
||||
bool is_valid = true;
|
||||
triangle_indices indices = load_indices(geom_data.indices, geom_data.index_format, global_id);
|
||||
|
|
@ -56,13 +57,17 @@ radv_build_triangle(inout vk_aabb bounds, VOID_REF dst_ptr, vk_bvh_geometry_data
|
|||
node.triangle_id = global_id;
|
||||
node.geometry_id_and_flags = geom_data.geometry_id;
|
||||
|
||||
radv_encode_triangle_gfx10_3(dst_ptr, node);
|
||||
if (gfx12)
|
||||
radv_encode_triangle_gfx12(dst_ptr, node);
|
||||
else
|
||||
radv_encode_triangle_gfx10_3(dst_ptr, node);
|
||||
|
||||
return is_valid;
|
||||
}
|
||||
|
||||
bool
|
||||
radv_build_aabb(inout vk_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t geometry_id, uint32_t global_id)
|
||||
radv_build_aabb(inout vk_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t geometry_id, uint32_t global_id,
|
||||
bool gfx12)
|
||||
{
|
||||
bool is_valid = true;
|
||||
|
||||
|
|
@ -87,10 +92,14 @@ radv_build_aabb(inout vk_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32
|
|||
#endif
|
||||
|
||||
vk_ir_aabb_node node;
|
||||
node.base.aabb = bounds;
|
||||
node.primitive_id = global_id;
|
||||
node.geometry_id_and_flags = geometry_id;
|
||||
|
||||
radv_encode_aabb_gfx10_3(dst_ptr, node);
|
||||
if (gfx12)
|
||||
radv_encode_aabb_gfx12(dst_ptr, node);
|
||||
else
|
||||
radv_encode_aabb_gfx10_3(dst_ptr, node);
|
||||
|
||||
return is_valid;
|
||||
}
|
||||
|
|
|
|||
213
src/amd/vulkan/bvh/update_gfx12.comp
Normal file
213
src/amd/vulkan/bvh/update_gfx12.comp
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
/*
|
||||
* Copyright © 2025 Valve Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#version 460
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
|
||||
#extension GL_EXT_scalar_block_layout : require
|
||||
#extension GL_EXT_buffer_reference : require
|
||||
#extension GL_EXT_buffer_reference2 : require
|
||||
#extension GL_KHR_memory_scope_semantics : require
|
||||
|
||||
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
#include "build_interface.h"
|
||||
#include "update.h"
|
||||
|
||||
layout(push_constant) uniform CONSTS
|
||||
{
|
||||
update_args args;
|
||||
};
|
||||
|
||||
uint32_t
|
||||
fetch_parent_node(VOID_REF bvh, uint32_t node)
|
||||
{
|
||||
uint64_t addr = bvh - node / 16 * 4 - 4;
|
||||
return DEREF(REF(uint32_t)(addr));
|
||||
}
|
||||
|
||||
void
|
||||
main()
|
||||
{
|
||||
uint32_t bvh_offset = DEREF(args.src).bvh_offset;
|
||||
|
||||
VOID_REF src_bvh = OFFSET(args.src, bvh_offset);
|
||||
VOID_REF dst_bvh = OFFSET(args.dst, bvh_offset);
|
||||
|
||||
VOID_REF leaf_node_offsets = OFFSET(args.src, DEREF(args.src).leaf_node_offsets_offset);
|
||||
|
||||
uint32_t leaf_node_size;
|
||||
if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR)
|
||||
leaf_node_size = SIZEOF(radv_gfx12_primitive_node);
|
||||
else if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_AABBS_KHR)
|
||||
leaf_node_size = SIZEOF(radv_gfx12_primitive_node);
|
||||
else
|
||||
leaf_node_size = SIZEOF(radv_gfx12_instance_node) + SIZEOF(radv_gfx12_instance_node_user_data);
|
||||
|
||||
uint32_t leaf_node_id = args.geom_data.first_id + gl_GlobalInvocationID.x;
|
||||
uint32_t first_leaf_offset = id_to_offset(RADV_BVH_ROOT_NODE) + SIZEOF(radv_gfx12_box_node);
|
||||
|
||||
uint32_t dst_offset = DEREF(INDEX(uint32_t, leaf_node_offsets, leaf_node_id));
|
||||
VOID_REF dst_ptr = OFFSET(dst_bvh, dst_offset);
|
||||
uint32_t src_offset = gl_GlobalInvocationID.x * args.geom_data.stride;
|
||||
|
||||
vk_aabb bounds;
|
||||
bool is_active;
|
||||
if (args.geom_data.geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) {
|
||||
is_active = radv_build_triangle(bounds, dst_ptr, args.geom_data, gl_GlobalInvocationID.x, true);
|
||||
} else {
|
||||
VOID_REF src_ptr = OFFSET(args.geom_data.data, src_offset);
|
||||
is_active = radv_build_aabb(bounds, src_ptr, dst_ptr, args.geom_data.geometry_id, gl_GlobalInvocationID.x, true);
|
||||
}
|
||||
|
||||
if (!is_active)
|
||||
return;
|
||||
|
||||
DEREF(INDEX(vk_aabb, args.leaf_bounds, (dst_offset - first_leaf_offset) / leaf_node_size)) = bounds;
|
||||
memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
|
||||
gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
|
||||
|
||||
uint32_t node_id = pack_node_id(dst_offset, 0);
|
||||
uint32_t parent_id = fetch_parent_node(src_bvh, node_id);
|
||||
uint32_t internal_nodes_offset = first_leaf_offset + args.leaf_node_count * leaf_node_size;
|
||||
while (parent_id != RADV_BVH_INVALID_NODE) {
|
||||
uint32_t offset = id_to_offset(parent_id);
|
||||
|
||||
uint32_t parent_index = (offset - internal_nodes_offset) / SIZEOF(radv_gfx12_box_node) + 1;
|
||||
if (parent_id == RADV_BVH_ROOT_NODE)
|
||||
parent_index = 0;
|
||||
|
||||
/* Make accesses to internal nodes in dst_bvh available and visible */
|
||||
memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
|
||||
gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
|
||||
|
||||
REF(radv_gfx12_box_node) src_node = REF(radv_gfx12_box_node) OFFSET(src_bvh, offset);
|
||||
REF(radv_gfx12_box_node) dst_node = REF(radv_gfx12_box_node) OFFSET(dst_bvh, offset);
|
||||
|
||||
uint32_t valid_child_count_minus_one = DEREF(src_node).child_count_exponents >> 28;
|
||||
|
||||
/* Check if all children have been processed. As this is an atomic the last path coming from
|
||||
* a child will pass here, while earlier paths break.
|
||||
*/
|
||||
uint32_t ready_child_count = atomicAdd(
|
||||
DEREF(INDEX(uint32_t, args.internal_ready_count, parent_index)), 1, gl_ScopeDevice, gl_StorageSemanticsBuffer,
|
||||
gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
|
||||
|
||||
if (ready_child_count != valid_child_count_minus_one)
|
||||
break;
|
||||
|
||||
uint32_t child_internal_id = DEREF(src_node).internal_base_id;
|
||||
uint32_t child_primitive_id = DEREF(src_node).primitive_base_id;
|
||||
|
||||
DEREF(dst_node).internal_base_id = child_internal_id;
|
||||
DEREF(dst_node).primitive_base_id = child_primitive_id;
|
||||
|
||||
uint32_t child_offsets[8];
|
||||
vk_aabb total_bounds = vk_aabb(vec3(INFINITY), vec3(-INFINITY));
|
||||
for (uint32_t i = 0; i <= valid_child_count_minus_one; i++) {
|
||||
radv_gfx12_box_child child = DEREF(src_node).children[i];
|
||||
uint32_t child_type = (child.dword2 >> 24) & 0xf;
|
||||
uint32_t child_size_id = (child.dword2 >> 28) * RADV_GFX12_BVH_NODE_SIZE / 8;
|
||||
|
||||
uint32_t child_id;
|
||||
if (child_type == radv_bvh_node_box32) {
|
||||
child_id = child_internal_id;
|
||||
child_internal_id += child_size_id;
|
||||
} else {
|
||||
child_id = child_primitive_id;
|
||||
child_primitive_id += child_size_id;
|
||||
}
|
||||
|
||||
child_offsets[i] = id_to_offset(child_id);
|
||||
|
||||
uint32_t child_offset = child_offsets[i];
|
||||
vk_aabb child_aabb;
|
||||
if (child_offset == dst_offset) {
|
||||
child_aabb = bounds;
|
||||
} else {
|
||||
uint32_t child_index;
|
||||
if (child_offset >= internal_nodes_offset) {
|
||||
child_index =
|
||||
(child_offset - internal_nodes_offset) / SIZEOF(radv_gfx12_box_node) + 1 + args.leaf_node_count;
|
||||
} else {
|
||||
child_index = (child_offset - first_leaf_offset) / leaf_node_size;
|
||||
}
|
||||
|
||||
child_aabb = DEREF(INDEX(vk_aabb, args.leaf_bounds, child_index));
|
||||
}
|
||||
|
||||
total_bounds.min = min(total_bounds.min, child_aabb.min);
|
||||
total_bounds.max = max(total_bounds.max, child_aabb.max);
|
||||
}
|
||||
|
||||
vec3 origin = total_bounds.min;
|
||||
vec3 extent = total_bounds.max - total_bounds.min;
|
||||
|
||||
extent = uintBitsToFloat((floatBitsToUint(extent) + uvec3(0x7fffff)) & 0x7f800000);
|
||||
uvec3 extent_exponents = floatBitsToUint(extent) >> 23;
|
||||
|
||||
DEREF(dst_node).origin = origin;
|
||||
DEREF(dst_node).child_count_exponents = extent_exponents.x | (extent_exponents.y << 8) |
|
||||
(extent_exponents.z << 16) | (valid_child_count_minus_one << 28);
|
||||
DEREF(dst_node).obb_matrix_index = 0x7f;
|
||||
|
||||
for (uint32_t i = 0; i <= valid_child_count_minus_one; i++) {
|
||||
uint32_t child_offset = child_offsets[i];
|
||||
vk_aabb child_aabb;
|
||||
if (child_offset == dst_offset) {
|
||||
child_aabb = bounds;
|
||||
} else {
|
||||
uint32_t child_index;
|
||||
if (child_offset >= internal_nodes_offset) {
|
||||
child_index =
|
||||
(child_offset - internal_nodes_offset) / SIZEOF(radv_gfx12_box_node) + 1 + args.leaf_node_count;
|
||||
} else {
|
||||
child_index = (child_offset - first_leaf_offset) / leaf_node_size;
|
||||
}
|
||||
|
||||
child_aabb = DEREF(INDEX(vk_aabb, args.leaf_bounds, child_index));
|
||||
}
|
||||
|
||||
radv_gfx12_box_child child = DEREF(src_node).children[i];
|
||||
|
||||
radv_gfx12_box_child box_child;
|
||||
box_child.dword0 =
|
||||
(child.dword0 & 0xFF000000) |
|
||||
min(uint32_t(floor((child_aabb.min.x - origin.x) / extent.x * float(0x1000))), 0xfff) |
|
||||
(min(uint32_t(floor((child_aabb.min.y - origin.y) / extent.y * float(0x1000))), 0xfff) << 12);
|
||||
box_child.dword1 =
|
||||
(child.dword1 & 0xFF000000) |
|
||||
min(uint32_t(floor((child_aabb.min.z - origin.z) / extent.z * float(0x1000))), 0xfff) |
|
||||
(min(uint32_t(ceil((child_aabb.max.x - origin.x) / extent.x * float(0x1000))) - 1, 0xfff) << 12);
|
||||
box_child.dword2 =
|
||||
(child.dword2 & 0xFF000000) |
|
||||
min(uint32_t(ceil((child_aabb.max.y - origin.y) / extent.y * float(0x1000))) - 1, 0xfff) |
|
||||
(min(uint32_t(ceil((child_aabb.max.z - origin.z) / extent.z * float(0x1000))) - 1, 0xfff) << 12);
|
||||
DEREF(dst_node).children[i] = box_child;
|
||||
}
|
||||
|
||||
for (uint32_t i = valid_child_count_minus_one + 1; i < 8; i++) {
|
||||
radv_gfx12_box_child null_child;
|
||||
null_child.dword0 = 0xffffffff;
|
||||
null_child.dword1 = 0xfff;
|
||||
null_child.dword2 = 0;
|
||||
DEREF(dst_node).children[i] = null_child;
|
||||
}
|
||||
|
||||
if (parent_id == RADV_BVH_ROOT_NODE)
|
||||
DEREF(args.dst).aabb = total_bounds;
|
||||
|
||||
DEREF(INDEX(vk_aabb, args.leaf_bounds, parent_index + args.leaf_node_count)) = total_bounds;
|
||||
|
||||
parent_id = fetch_parent_node(src_bvh, parent_id);
|
||||
}
|
||||
}
|
||||
|
|
@ -147,6 +147,7 @@ libradv_files = files(
|
|||
'radv_rmv.c',
|
||||
'radv_rmv.h',
|
||||
'radv_rra_gfx10_3.c',
|
||||
'radv_rra_gfx12.c',
|
||||
'radv_rra.c',
|
||||
'radv_rra.h',
|
||||
'radv_sampler.c',
|
||||
|
|
|
|||
|
|
@ -241,8 +241,11 @@ enum rq_intersection_type { intersection_type_none, intersection_type_triangle,
|
|||
|
||||
static void
|
||||
lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_vars *vars, nir_deref_instr *rq,
|
||||
struct radv_instance *instance)
|
||||
struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
|
||||
nir_deref_instr *closest = rq_deref(b, rq, closest);
|
||||
nir_deref_instr *candidate = rq_deref(b, rq, candidate);
|
||||
|
||||
|
|
@ -270,7 +273,7 @@ lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query
|
|||
b, 1, 32, nir_iadd_imm(b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)),
|
||||
.access = ACCESS_NON_WRITEABLE);
|
||||
nir_def *bvh_base = nir_iadd(b, accel_struct, nir_u2u64(b, bvh_offset));
|
||||
bvh_base = build_addr_to_node(b, bvh_base);
|
||||
bvh_base = build_addr_to_node(device, b, bvh_base, instr->src[2].ssa);
|
||||
|
||||
rq_store(b, rq, root_bvh_base, bvh_base);
|
||||
rq_store(b, rq, trav_bvh_base, bvh_base);
|
||||
|
|
@ -320,44 +323,27 @@ lower_rq_load(struct radv_device *device, nir_builder *b, nir_intrinsic_instr *i
|
|||
return isec_load(b, intersection, frontface);
|
||||
case nir_ray_query_value_intersection_geometry_index:
|
||||
return nir_iand_imm(b, isec_load(b, intersection, geometry_id_and_flags), 0xFFFFFF);
|
||||
case nir_ray_query_value_intersection_instance_custom_index: {
|
||||
nir_def *instance_node_addr = isec_load(b, intersection, instance_addr);
|
||||
return nir_iand_imm(
|
||||
b,
|
||||
nir_build_load_global(
|
||||
b, 1, 32,
|
||||
nir_iadd_imm(b, instance_node_addr, offsetof(struct radv_bvh_instance_node, custom_instance_and_mask))),
|
||||
0xFFFFFF);
|
||||
}
|
||||
case nir_ray_query_value_intersection_instance_id: {
|
||||
nir_def *instance_node_addr = isec_load(b, intersection, instance_addr);
|
||||
return nir_build_load_global(
|
||||
b, 1, 32, nir_iadd_imm(b, instance_node_addr, offsetof(struct radv_bvh_instance_node, instance_id)));
|
||||
}
|
||||
case nir_ray_query_value_intersection_instance_custom_index:
|
||||
return radv_load_custom_instance(device, b, isec_load(b, intersection, instance_addr));
|
||||
case nir_ray_query_value_intersection_instance_id:
|
||||
return radv_load_instance_id(device, b, isec_load(b, intersection, instance_addr));
|
||||
case nir_ray_query_value_intersection_instance_sbt_index:
|
||||
return nir_iand_imm(b, isec_load(b, intersection, sbt_offset_and_flags), 0xFFFFFF);
|
||||
case nir_ray_query_value_intersection_object_ray_direction: {
|
||||
nir_def *instance_node_addr = isec_load(b, intersection, instance_addr);
|
||||
nir_def *wto_matrix[3];
|
||||
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
|
||||
radv_load_wto_matrix(device, b, isec_load(b, intersection, instance_addr), wto_matrix);
|
||||
return nir_build_vec3_mat_mult(b, rq_load(b, rq, direction), wto_matrix, false);
|
||||
}
|
||||
case nir_ray_query_value_intersection_object_ray_origin: {
|
||||
nir_def *instance_node_addr = isec_load(b, intersection, instance_addr);
|
||||
nir_def *wto_matrix[3];
|
||||
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
|
||||
radv_load_wto_matrix(device, b, isec_load(b, intersection, instance_addr), wto_matrix);
|
||||
return nir_build_vec3_mat_mult(b, rq_load(b, rq, origin), wto_matrix, true);
|
||||
}
|
||||
case nir_ray_query_value_intersection_object_to_world: {
|
||||
nir_def *instance_node_addr = isec_load(b, intersection, instance_addr);
|
||||
nir_def *rows[3];
|
||||
for (unsigned r = 0; r < 3; ++r)
|
||||
rows[r] = nir_build_load_global(
|
||||
b, 4, 32,
|
||||
nir_iadd_imm(b, instance_node_addr, offsetof(struct radv_bvh_instance_node, otw_matrix) + r * 16));
|
||||
|
||||
return nir_vec3(b, nir_channel(b, rows[0], column), nir_channel(b, rows[1], column),
|
||||
nir_channel(b, rows[2], column));
|
||||
nir_def *otw_matrix[3];
|
||||
radv_load_otw_matrix(device, b, isec_load(b, intersection, instance_addr), otw_matrix);
|
||||
return nir_vec3(b, nir_channel(b, otw_matrix[0], column), nir_channel(b, otw_matrix[1], column),
|
||||
nir_channel(b, otw_matrix[2], column));
|
||||
}
|
||||
case nir_ray_query_value_intersection_primitive_index:
|
||||
return isec_load(b, intersection, primitive_id);
|
||||
|
|
@ -371,10 +357,8 @@ lower_rq_load(struct radv_device *device, nir_builder *b, nir_intrinsic_instr *i
|
|||
return intersection_type;
|
||||
}
|
||||
case nir_ray_query_value_intersection_world_to_object: {
|
||||
nir_def *instance_node_addr = isec_load(b, intersection, instance_addr);
|
||||
|
||||
nir_def *wto_matrix[3];
|
||||
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
|
||||
radv_load_wto_matrix(device, b, isec_load(b, intersection, instance_addr), wto_matrix);
|
||||
|
||||
nir_def *vals[3];
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
|
|
@ -477,6 +461,8 @@ static nir_def *
|
|||
lower_rq_proceed(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_vars *vars, nir_deref_instr *rq,
|
||||
struct radv_device *device)
|
||||
{
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
nir_deref_instr *closest = rq_deref(b, rq, closest);
|
||||
nir_deref_instr *candidate = rq_deref(b, rq, candidate);
|
||||
|
||||
|
|
@ -543,7 +529,11 @@ lower_rq_proceed(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_va
|
|||
|
||||
nir_push_if(b, rq_load(b, rq, incomplete));
|
||||
{
|
||||
nir_def *incomplete = radv_build_ray_traversal(device, b, &args);
|
||||
nir_def *incomplete;
|
||||
if (radv_use_bvh8(pdev))
|
||||
incomplete = radv_build_ray_traversal_gfx12(device, b, &args);
|
||||
else
|
||||
incomplete = radv_build_ray_traversal(device, b, &args);
|
||||
rq_store(b, rq, incomplete, nir_iand(b, rq_load(b, rq, incomplete), incomplete));
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
|
@ -571,7 +561,7 @@ bool
|
|||
radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
|
||||
bool progress = false;
|
||||
struct hash_table *query_ht = _mesa_pointer_hash_table_create(NULL);
|
||||
|
||||
|
|
@ -626,7 +616,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
|
|||
lower_rq_generate_intersection(&builder, intrinsic, rq);
|
||||
break;
|
||||
case nir_intrinsic_rq_initialize:
|
||||
lower_rq_initialize(&builder, intrinsic, vars, rq, instance);
|
||||
lower_rq_initialize(&builder, intrinsic, vars, rq, device);
|
||||
break;
|
||||
case nir_intrinsic_rq_load:
|
||||
new_dest = lower_rq_load(device, &builder, intrinsic, rq);
|
||||
|
|
|
|||
|
|
@ -267,11 +267,27 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_d
|
|||
}
|
||||
|
||||
nir_def *
|
||||
build_addr_to_node(nir_builder *b, nir_def *addr)
|
||||
build_addr_to_node(struct radv_device *device, nir_builder *b, nir_def *addr, nir_def *flags)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
const uint64_t bvh_size = 1ull << 42;
|
||||
nir_def *node = nir_ushr_imm(b, addr, 3);
|
||||
return nir_iand_imm(b, node, (bvh_size - 1) << 3);
|
||||
node = nir_iand_imm(b, node, (bvh_size - 1) << 3);
|
||||
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
/* The HW ray flags are the same bits as the API flags.
|
||||
* - SpvRayFlagsTerminateOnFirstHitKHRMask, SpvRayFlagsSkipClosestHitShaderKHRMask are handled in shader code.
|
||||
* - SpvRayFlagsSkipTrianglesKHRMask, SpvRayFlagsSkipAABBsKHRMask do not work.
|
||||
*/
|
||||
flags = nir_iand_imm(b, flags,
|
||||
SpvRayFlagsOpaqueKHRMask | SpvRayFlagsNoOpaqueKHRMask |
|
||||
SpvRayFlagsCullBackFacingTrianglesKHRMask | SpvRayFlagsCullFrontFacingTrianglesKHRMask |
|
||||
SpvRayFlagsCullOpaqueKHRMask | SpvRayFlagsCullNoOpaqueKHRMask);
|
||||
node = nir_ior(b, node, nir_ishl_imm(b, nir_u2u64(b, flags), 54));
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
|
|
@ -302,20 +318,57 @@ nir_build_vec3_mat_mult(nir_builder *b, nir_def *vec, nir_def *matrix[], bool tr
|
|||
return nir_vec(b, result_components, 3);
|
||||
}
|
||||
|
||||
void
|
||||
nir_build_wto_matrix_load(nir_builder *b, nir_def *instance_addr, nir_def **out)
|
||||
{
|
||||
unsigned offset = offsetof(struct radv_bvh_instance_node, wto_matrix);
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
out[i] = nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_addr, offset + i * 16), .align_mul = 64,
|
||||
.align_offset = offset + i * 16);
|
||||
}
|
||||
}
|
||||
|
||||
nir_def *
|
||||
radv_load_vertex_position(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def *geometry_id,
|
||||
nir_def *primitive_id, uint32_t index)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
nir_def *addr_offsets =
|
||||
nir_build_load_global(b, 4, 32,
|
||||
nir_iadd_imm(b, instance_addr,
|
||||
sizeof(struct radv_gfx12_instance_node) +
|
||||
offsetof(struct radv_gfx12_instance_node_user_data, blas_addr)));
|
||||
nir_def *bvh_offset =
|
||||
nir_build_load_global(b, 1, 32,
|
||||
nir_iadd_imm(b, instance_addr,
|
||||
sizeof(struct radv_gfx12_instance_node) +
|
||||
offsetof(struct radv_gfx12_instance_node_user_data, bvh_offset)));
|
||||
|
||||
nir_def *addr = nir_pack_64_2x32(b, nir_channels(b, addr_offsets, 0x3));
|
||||
|
||||
nir_def *base_index_offset =
|
||||
nir_iadd(b, nir_channel(b, addr_offsets, 2), nir_imul_imm(b, geometry_id, sizeof(uint32_t)));
|
||||
nir_def *base_index = nir_build_load_global(b, 1, 32, nir_iadd(b, addr, nir_u2u64(b, base_index_offset)));
|
||||
|
||||
nir_def *offset_offset = nir_iadd(b, nir_channel(b, addr_offsets, 3),
|
||||
nir_imul_imm(b, nir_iadd(b, base_index, primitive_id), sizeof(uint32_t)));
|
||||
nir_def *offset = nir_build_load_global(b, 1, 32, nir_iadd(b, addr, nir_u2u64(b, offset_offset)));
|
||||
offset = nir_iadd(b, offset, bvh_offset);
|
||||
|
||||
/* Assume that vertices are uncompressed. */
|
||||
offset = nir_iadd_imm(b, offset,
|
||||
ROUND_DOWN_TO(RADV_GFX12_PRIMITIVE_NODE_HEADER_SIZE / 8, 4) + index * 3 * sizeof(float));
|
||||
|
||||
nir_def *data[4];
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(data); i++) {
|
||||
data[i] = nir_build_load_global(b, 1, 32, nir_iadd(b, addr, nir_u2u64(b, offset)));
|
||||
offset = nir_iadd_imm(b, offset, 4);
|
||||
}
|
||||
|
||||
uint32_t subdword_offset = RADV_GFX12_PRIMITIVE_NODE_HEADER_SIZE % 32;
|
||||
|
||||
nir_def *vertices[3];
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(vertices); i++) {
|
||||
nir_def *lo = nir_ubitfield_extract_imm(b, data[i], subdword_offset, 32 - subdword_offset);
|
||||
nir_def *hi = nir_ubitfield_extract_imm(b, data[i + 1], 0, subdword_offset);
|
||||
vertices[i] = nir_ior(b, lo, nir_ishl_imm(b, hi, 32 - subdword_offset));
|
||||
}
|
||||
|
||||
return nir_vec3(b, vertices[0], vertices[1], vertices[2]);
|
||||
}
|
||||
|
||||
nir_def *bvh_addr_id =
|
||||
nir_build_load_global(b, 1, 64, nir_iadd_imm(b, instance_addr, offsetof(struct radv_bvh_instance_node, bvh_ptr)));
|
||||
nir_def *bvh_addr = build_node_to_addr(device, b, bvh_addr_id, true);
|
||||
|
|
@ -335,6 +388,74 @@ radv_load_vertex_position(struct radv_device *device, nir_builder *b, nir_def *i
|
|||
return nir_build_load_global(b, 3, 32, nir_iadd(b, bvh_addr, nir_u2u64(b, offset)));
|
||||
}
|
||||
|
||||
void
|
||||
radv_load_wto_matrix(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def **out)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
unsigned offset = offsetof(struct radv_bvh_instance_node, wto_matrix);
|
||||
if (radv_use_bvh8(pdev))
|
||||
offset = offsetof(struct radv_gfx12_instance_node, wto_matrix);
|
||||
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
out[i] = nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_addr, offset + i * 16), .align_mul = 64,
|
||||
.align_offset = (offset + i * 16) % 64);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
radv_load_otw_matrix(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def **out)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
unsigned offset = offsetof(struct radv_bvh_instance_node, otw_matrix);
|
||||
if (radv_use_bvh8(pdev))
|
||||
offset =
|
||||
sizeof(struct radv_gfx12_instance_node) + offsetof(struct radv_gfx12_instance_node_user_data, otw_matrix);
|
||||
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
out[i] = nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_addr, offset + i * 16), .align_mul = 64,
|
||||
.align_offset = (offset + i * 16) % 64);
|
||||
}
|
||||
}
|
||||
|
||||
nir_def *
|
||||
radv_load_custom_instance(struct radv_device *device, nir_builder *b, nir_def *instance_addr)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
return nir_build_load_global(
|
||||
b, 1, 32,
|
||||
nir_iadd_imm(b, instance_addr,
|
||||
sizeof(struct radv_gfx12_instance_node) +
|
||||
offsetof(struct radv_gfx12_instance_node_user_data, custom_instance)));
|
||||
}
|
||||
|
||||
return nir_iand_imm(
|
||||
b,
|
||||
nir_build_load_global(
|
||||
b, 1, 32, nir_iadd_imm(b, instance_addr, offsetof(struct radv_bvh_instance_node, custom_instance_and_mask))),
|
||||
0xFFFFFF);
|
||||
}
|
||||
|
||||
nir_def *
|
||||
radv_load_instance_id(struct radv_device *device, nir_builder *b, nir_def *instance_addr)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
return nir_build_load_global(
|
||||
b, 1, 32,
|
||||
nir_iadd_imm(b, instance_addr,
|
||||
sizeof(struct radv_gfx12_instance_node) +
|
||||
offsetof(struct radv_gfx12_instance_node_user_data, instance_index)));
|
||||
}
|
||||
|
||||
return nir_build_load_global(b, 1, 32,
|
||||
nir_iadd_imm(b, instance_addr, offsetof(struct radv_bvh_instance_node, instance_id)));
|
||||
}
|
||||
|
||||
/* When a hit is opaque the any_hit shader is skipped for this hit and the hit
|
||||
* is assumed to be an actual hit. */
|
||||
static nir_def *
|
||||
|
|
@ -356,11 +477,12 @@ create_bvh_descriptor(nir_builder *b, const struct radv_physical_device *pdev, s
|
|||
* instances at the cost of having to use 64-bit node ids. */
|
||||
const uint64_t bvh_size = 1ull << 42;
|
||||
|
||||
const uint32_t sort_triangles_first = radv_use_bvh8(pdev) ? BITFIELD_BIT(52 - 32) : 0;
|
||||
const uint32_t box_sort_enable = BITFIELD_BIT(63 - 32);
|
||||
const uint32_t triangle_return_mode = BITFIELD_BIT(120 - 96); /* Return IJ for triangles */
|
||||
|
||||
uint32_t dword0 = 0;
|
||||
nir_def *dword1 = nir_imm_intN_t(b, box_sort_enable, 32);
|
||||
nir_def *dword1 = nir_imm_intN_t(b, sort_triangles_first | box_sort_enable, 32);
|
||||
uint32_t dword2 = (bvh_size - 1) & 0xFFFFFFFFu;
|
||||
uint32_t dword3 = ((bvh_size - 1) >> 32) | triangle_return_mode | (1u << 31);
|
||||
|
||||
|
|
@ -373,9 +495,20 @@ create_bvh_descriptor(nir_builder *b, const struct radv_physical_device *pdev, s
|
|||
/* Only use largest/midpoint sorting when all invocations have the same ray flags, otherwise
|
||||
* fall back to the default closest point. */
|
||||
dword1 = nir_bcsel(b, nir_vote_any(b, 1, ray_flags->terminate_on_first_hit), dword1,
|
||||
nir_imm_int(b, (box_sort_midpoint << 21) | box_sort_enable));
|
||||
nir_imm_int(b, (box_sort_midpoint << 21) | sort_triangles_first | box_sort_enable));
|
||||
dword1 = nir_bcsel(b, nir_vote_all(b, 1, ray_flags->terminate_on_first_hit),
|
||||
nir_imm_int(b, (box_sort_largest << 21) | box_sort_enable), dword1);
|
||||
nir_imm_int(b, (box_sort_largest << 21) | sort_triangles_first | box_sort_enable), dword1);
|
||||
}
|
||||
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
/* compressed_format_en */
|
||||
dword3 |= BITFIELD_BIT(115 - 96);
|
||||
/* wide_sort_en */
|
||||
dword3 |= BITFIELD_BIT(117 - 96);
|
||||
/* instance_en */
|
||||
dword3 |= BITFIELD_BIT(118 - 96);
|
||||
/* pointer_flags */
|
||||
dword3 |= BITFIELD_BIT(119 - 96);
|
||||
}
|
||||
|
||||
return nir_vec4(b, nir_imm_intN_t(b, dword0, 32), dword1, nir_imm_intN_t(b, dword2, 32), nir_imm_intN_t(b, dword3, 32));
|
||||
|
|
@ -439,6 +572,36 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const
|
|||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
insert_traversal_triangle_case_gfx12(struct radv_device *device, nir_builder *b,
|
||||
const struct radv_ray_traversal_args *args, const struct radv_ray_flags *ray_flags,
|
||||
nir_def *result, nir_def *bvh_node)
|
||||
{
|
||||
if (!args->triangle_cb)
|
||||
return;
|
||||
|
||||
struct radv_triangle_intersection intersection;
|
||||
intersection.t = nir_channel(b, result, 0);
|
||||
|
||||
nir_push_if(b, nir_iand(b, nir_flt(b, intersection.t, nir_load_deref(b, args->vars.tmax)),
|
||||
nir_flt(b, args->tmin, intersection.t)));
|
||||
{
|
||||
intersection.frontface = nir_inot(b, nir_test_mask(b, nir_channel(b, result, 3), 1));
|
||||
intersection.base.node_addr = build_node_to_addr(device, b, bvh_node, false);
|
||||
intersection.base.primitive_id = nir_ishr_imm(b, nir_channel(b, result, 3), 1);
|
||||
intersection.base.geometry_id_and_flags = nir_ishr_imm(b, nir_channel(b, result, 8), 2);
|
||||
intersection.base.opaque = nir_inot(b, nir_test_mask(b, nir_channel(b, result, 2), 1u << 31));
|
||||
intersection.barycentrics = nir_fabs(b, nir_channels(b, result, 0x3 << 1));
|
||||
|
||||
nir_push_if(b, nir_bcsel(b, intersection.base.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque));
|
||||
{
|
||||
args->triangle_cb(b, &intersection, args, ray_flags);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args,
|
||||
const struct radv_ray_flags *ray_flags, nir_def *bvh_node)
|
||||
|
|
@ -466,11 +629,31 @@ insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, const str
|
|||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
fetch_parent_node(nir_builder *b, nir_def *bvh, nir_def *node)
|
||||
static void
|
||||
insert_traversal_aabb_case_gfx12(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args,
|
||||
const struct radv_ray_flags *ray_flags, nir_def *result, nir_def *bvh_node)
|
||||
{
|
||||
nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, nir_udiv_imm(b, node, 8), 4), 4);
|
||||
if (!args->aabb_cb)
|
||||
return;
|
||||
|
||||
struct radv_leaf_intersection intersection;
|
||||
intersection.node_addr = build_node_to_addr(device, b, bvh_node, false);
|
||||
intersection.primitive_id = nir_ishr_imm(b, nir_channel(b, result, 3), 1);
|
||||
intersection.geometry_id_and_flags = nir_ishr_imm(b, nir_channel(b, result, 8), 2);
|
||||
intersection.opaque = nir_inot(b, nir_test_mask(b, nir_channel(b, result, 2), 1u << 31));
|
||||
|
||||
nir_push_if(b, nir_bcsel(b, intersection.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque));
|
||||
{
|
||||
args->aabb_cb(b, &intersection, args);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
fetch_parent_node(struct radv_device *device, nir_builder *b, nir_def *bvh, nir_def *node)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, nir_udiv_imm(b, node, radv_use_bvh8(pdev) ? 16 : 8), 4), 4);
|
||||
return nir_build_load_global(b, 1, 32, nir_isub(b, bvh, nir_u2u64(b, offset)), .align_mul = 4);
|
||||
}
|
||||
|
||||
|
|
@ -547,7 +730,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
nir_def *prev = nir_load_deref(b, args->vars.previous_node);
|
||||
nir_def *bvh_addr = build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true);
|
||||
|
||||
nir_def *parent = fetch_parent_node(b, bvh_addr, prev);
|
||||
nir_def *parent = fetch_parent_node(device, b, bvh_addr, prev);
|
||||
nir_push_if(b, nir_ieq_imm(b, parent, RADV_BVH_INVALID_NODE));
|
||||
{
|
||||
nir_store_var(b, incomplete, nir_imm_false(b), 0x1);
|
||||
|
|
@ -615,7 +798,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
nir_build_load_global(b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0);
|
||||
|
||||
nir_def *wto_matrix[3];
|
||||
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
|
||||
radv_load_wto_matrix(device, b, instance_node_addr, wto_matrix);
|
||||
|
||||
nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3), 1);
|
||||
|
||||
|
|
@ -718,3 +901,205 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
|
||||
return nir_load_var(b, incomplete);
|
||||
}
|
||||
|
||||
nir_def *
|
||||
radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
|
||||
nir_store_var(b, incomplete, nir_imm_true(b), 0x1);
|
||||
|
||||
struct radv_ray_flags ray_flags = {
|
||||
.force_opaque = radv_test_flag(b, args, SpvRayFlagsOpaqueKHRMask, true),
|
||||
.force_not_opaque = radv_test_flag(b, args, SpvRayFlagsNoOpaqueKHRMask, true),
|
||||
.terminate_on_first_hit = radv_test_flag(b, args, SpvRayFlagsTerminateOnFirstHitKHRMask, true),
|
||||
.no_cull_front = radv_test_flag(b, args, SpvRayFlagsCullFrontFacingTrianglesKHRMask, false),
|
||||
.no_cull_back = radv_test_flag(b, args, SpvRayFlagsCullBackFacingTrianglesKHRMask, false),
|
||||
.no_cull_opaque = radv_test_flag(b, args, SpvRayFlagsCullOpaqueKHRMask, false),
|
||||
.no_cull_no_opaque = radv_test_flag(b, args, SpvRayFlagsCullNoOpaqueKHRMask, false),
|
||||
.no_skip_triangles = radv_test_flag(b, args, SpvRayFlagsSkipTrianglesKHRMask, false),
|
||||
.no_skip_aabbs = radv_test_flag(b, args, SpvRayFlagsSkipAABBsKHRMask, false),
|
||||
};
|
||||
|
||||
nir_def *desc = create_bvh_descriptor(b, pdev, &ray_flags);
|
||||
|
||||
nir_push_loop(b);
|
||||
{
|
||||
nir_push_if(b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), RADV_BVH_INVALID_NODE));
|
||||
{
|
||||
/* Early exit if we never overflowed the stack, to avoid having to backtrack to
|
||||
* the root for no reason. */
|
||||
nir_push_if(b, nir_ilt_imm(b, nir_load_deref(b, args->vars.stack), args->stack_base + args->stack_stride));
|
||||
{
|
||||
nir_store_var(b, incomplete, nir_imm_false(b), 0x1);
|
||||
nir_jump(b, nir_jump_break);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_def *stack_instance_exit =
|
||||
nir_ige(b, nir_load_deref(b, args->vars.top_stack), nir_load_deref(b, args->vars.stack));
|
||||
nir_def *root_instance_exit =
|
||||
nir_ieq(b, nir_load_deref(b, args->vars.previous_node), nir_load_deref(b, args->vars.instance_bottom_node));
|
||||
nir_if *instance_exit = nir_push_if(b, nir_ior(b, stack_instance_exit, root_instance_exit));
|
||||
instance_exit->control = nir_selection_control_dont_flatten;
|
||||
{
|
||||
nir_store_deref(b, args->vars.top_stack, nir_imm_int(b, -1), 1);
|
||||
nir_store_deref(b, args->vars.previous_node, nir_load_deref(b, args->vars.instance_top_node), 1);
|
||||
nir_store_deref(b, args->vars.instance_bottom_node, nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 1);
|
||||
|
||||
nir_store_deref(b, args->vars.bvh_base, args->root_bvh_base, 1);
|
||||
nir_store_deref(b, args->vars.origin, args->origin, 7);
|
||||
nir_store_deref(b, args->vars.dir, args->dir, 7);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_push_if(
|
||||
b, nir_ige(b, nir_load_deref(b, args->vars.stack_low_watermark), nir_load_deref(b, args->vars.stack)));
|
||||
{
|
||||
nir_def *prev = nir_load_deref(b, args->vars.previous_node);
|
||||
nir_def *bvh_addr = build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true);
|
||||
|
||||
nir_def *parent = fetch_parent_node(device, b, bvh_addr, prev);
|
||||
nir_push_if(b, nir_ieq_imm(b, parent, RADV_BVH_INVALID_NODE));
|
||||
{
|
||||
nir_store_var(b, incomplete, nir_imm_false(b), 0x1);
|
||||
nir_jump(b, nir_jump_break);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
nir_store_deref(b, args->vars.current_node, parent, 0x1);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_store_deref(b, args->vars.stack,
|
||||
nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_stride), 1);
|
||||
|
||||
nir_def *stack_ptr =
|
||||
nir_umod_imm(b, nir_load_deref(b, args->vars.stack), args->stack_stride * args->stack_entries);
|
||||
nir_def *bvh_node = args->stack_load_cb(b, stack_ptr, args);
|
||||
nir_store_deref(b, args->vars.current_node, bvh_node, 0x1);
|
||||
nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_def *bvh_node = nir_load_deref(b, args->vars.current_node);
|
||||
|
||||
nir_def *prev_node = nir_load_deref(b, args->vars.previous_node);
|
||||
nir_store_deref(b, args->vars.previous_node, bvh_node, 0x1);
|
||||
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
|
||||
|
||||
nir_def *global_bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node));
|
||||
|
||||
nir_def *result =
|
||||
nir_bvh8_intersect_ray_amd(b, 32, desc, nir_unpack_64_2x32(b, nir_load_deref(b, args->vars.bvh_base)),
|
||||
nir_ishr_imm(b, args->cull_mask, 24), nir_load_deref(b, args->vars.tmax),
|
||||
nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir), bvh_node);
|
||||
|
||||
nir_push_if(b, nir_test_mask(b, bvh_node, BITFIELD64_BIT(ffs(radv_bvh_node_box16) - 1)));
|
||||
{
|
||||
nir_push_if(b, nir_test_mask(b, bvh_node, BITFIELD64_BIT(ffs(radv_bvh_node_instance) - 1)));
|
||||
{
|
||||
if (args->vars.iteration_instance_count) {
|
||||
nir_def *iteration_instance_count = nir_load_deref(b, args->vars.iteration_instance_count);
|
||||
iteration_instance_count = nir_iadd_imm(b, iteration_instance_count, 1 << 16);
|
||||
nir_store_deref(b, args->vars.iteration_instance_count, iteration_instance_count, 0x1);
|
||||
}
|
||||
|
||||
nir_def *next_node = nir_iand_imm(b, nir_channel(b, result, 7), 0xff);
|
||||
nir_push_if(b, nir_ieq_imm(b, next_node, 0xff));
|
||||
nir_jump(b, nir_jump_continue);
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
/* instance */
|
||||
nir_def *instance_node_addr = build_node_to_addr(device, b, global_bvh_node, false);
|
||||
nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1);
|
||||
|
||||
nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, result, 6), 1);
|
||||
|
||||
nir_store_deref(b, args->vars.origin, nir_channels(b, result, 0x7 << 10), 0x7);
|
||||
nir_store_deref(b, args->vars.dir, nir_channels(b, result, 0x7 << 13), 0x7);
|
||||
|
||||
nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1);
|
||||
nir_store_deref(b, args->vars.bvh_base, nir_pack_64_2x32(b, nir_channels(b, result, 0x3 << 2)), 1);
|
||||
|
||||
/* Push the instance root node onto the stack */
|
||||
nir_store_deref(b, args->vars.current_node, next_node, 0x1);
|
||||
nir_store_deref(b, args->vars.instance_bottom_node, next_node, 1);
|
||||
nir_store_deref(b, args->vars.instance_top_node, bvh_node, 1);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
/* box */
|
||||
nir_push_if(b, nir_ieq_imm(b, prev_node, RADV_BVH_INVALID_NODE));
|
||||
{
|
||||
nir_def *new_nodes[8];
|
||||
for (unsigned i = 0; i < 8; ++i)
|
||||
new_nodes[i] = nir_channel(b, result, i);
|
||||
|
||||
for (unsigned i = 1; i < 8; ++i)
|
||||
nir_push_if(b, nir_ine_imm(b, new_nodes[i], RADV_BVH_INVALID_NODE));
|
||||
|
||||
for (unsigned i = 8; i-- > 1;) {
|
||||
nir_def *stack = nir_load_deref(b, args->vars.stack);
|
||||
nir_def *stack_ptr = nir_umod_imm(b, stack, args->stack_entries * args->stack_stride);
|
||||
args->stack_store_cb(b, stack_ptr, new_nodes[i], args);
|
||||
nir_store_deref(b, args->vars.stack, nir_iadd_imm(b, stack, args->stack_stride), 1);
|
||||
|
||||
if (i == 1) {
|
||||
nir_def *new_watermark =
|
||||
nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_entries * args->stack_stride);
|
||||
new_watermark = nir_imax(b, nir_load_deref(b, args->vars.stack_low_watermark), new_watermark);
|
||||
nir_store_deref(b, args->vars.stack_low_watermark, new_watermark, 0x1);
|
||||
}
|
||||
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_store_deref(b, args->vars.current_node, new_nodes[0], 0x1);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_def *next = nir_imm_int(b, RADV_BVH_INVALID_NODE);
|
||||
for (unsigned i = 0; i < 7; ++i) {
|
||||
next = nir_bcsel(b, nir_ieq(b, prev_node, nir_channel(b, result, i)), nir_channel(b, result, i + 1),
|
||||
next);
|
||||
}
|
||||
nir_store_deref(b, args->vars.current_node, next, 0x1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_push_if(b, nir_test_mask(b, nir_channel(b, result, 1), 1u << 31));
|
||||
{
|
||||
nir_push_if(b, ray_flags.no_skip_aabbs);
|
||||
insert_traversal_aabb_case_gfx12(device, b, args, &ray_flags, result, global_bvh_node);
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_push_if(b, ray_flags.no_skip_triangles);
|
||||
insert_traversal_triangle_case_gfx12(device, b, args, &ray_flags, result, global_bvh_node);
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
if (args->vars.iteration_instance_count) {
|
||||
nir_def *iteration_instance_count = nir_load_deref(b, args->vars.iteration_instance_count);
|
||||
iteration_instance_count = nir_iadd_imm(b, iteration_instance_count, 1);
|
||||
nir_store_deref(b, args->vars.iteration_instance_count, iteration_instance_count, 0x1);
|
||||
}
|
||||
}
|
||||
nir_pop_loop(b, NULL);
|
||||
|
||||
return nir_load_var(b, incomplete);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,15 +14,21 @@
|
|||
|
||||
struct radv_device;
|
||||
|
||||
nir_def *build_addr_to_node(nir_builder *b, nir_def *addr);
|
||||
nir_def *build_addr_to_node(struct radv_device *device, nir_builder *b, nir_def *addr, nir_def *flags);
|
||||
|
||||
nir_def *nir_build_vec3_mat_mult(nir_builder *b, nir_def *vec, nir_def *matrix[], bool translation);
|
||||
|
||||
void nir_build_wto_matrix_load(nir_builder *b, nir_def *instance_addr, nir_def **out);
|
||||
|
||||
nir_def *radv_load_vertex_position(struct radv_device *device, nir_builder *b, nir_def *instance_addr,
|
||||
nir_def *geometry_id, nir_def *primitive_id, uint32_t index);
|
||||
|
||||
void radv_load_wto_matrix(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def **out);
|
||||
|
||||
void radv_load_otw_matrix(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def **out);
|
||||
|
||||
nir_def *radv_load_custom_instance(struct radv_device *device, nir_builder *b, nir_def *instance_addr);
|
||||
|
||||
nir_def *radv_load_instance_id(struct radv_device *device, nir_builder *b, nir_def *instance_addr);
|
||||
|
||||
struct radv_ray_traversal_args;
|
||||
|
||||
struct radv_ray_flags {
|
||||
|
|
@ -146,4 +152,7 @@ struct radv_ray_traversal_args {
|
|||
nir_def *radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
|
||||
const struct radv_ray_traversal_args *args);
|
||||
|
||||
nir_def *radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b,
|
||||
const struct radv_ray_traversal_args *args);
|
||||
|
||||
#endif /* RADV_NIR_RT_COMMON_H */
|
||||
|
|
|
|||
|
|
@ -530,11 +530,7 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
|
|||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_instance_custom_index: {
|
||||
nir_def *instance_node_addr = nir_load_var(b, vars->instance_addr);
|
||||
nir_def *custom_instance_and_mask = nir_build_load_global(
|
||||
b, 1, 32,
|
||||
nir_iadd_imm(b, instance_node_addr, offsetof(struct radv_bvh_instance_node, custom_instance_and_mask)));
|
||||
ret = nir_iand_imm(b, custom_instance_and_mask, 0xFFFFFF);
|
||||
ret = radv_load_custom_instance(vars->device, b, nir_load_var(b, vars->instance_addr));
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_primitive_id: {
|
||||
|
|
@ -547,9 +543,7 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
|
|||
break;
|
||||
}
|
||||
case nir_intrinsic_load_instance_id: {
|
||||
nir_def *instance_node_addr = nir_load_var(b, vars->instance_addr);
|
||||
ret = nir_build_load_global(
|
||||
b, 1, 32, nir_iadd_imm(b, instance_node_addr, offsetof(struct radv_bvh_instance_node, instance_id)));
|
||||
ret = radv_load_instance_id(vars->device, b, nir_load_var(b, vars->instance_addr));
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_flags: {
|
||||
|
|
@ -564,7 +558,7 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
|
|||
unsigned c = nir_intrinsic_column(intr);
|
||||
nir_def *instance_node_addr = nir_load_var(b, vars->instance_addr);
|
||||
nir_def *wto_matrix[3];
|
||||
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
|
||||
radv_load_wto_matrix(vars->device, b, instance_node_addr, wto_matrix);
|
||||
|
||||
nir_def *vals[3];
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
|
|
@ -575,26 +569,21 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
|
|||
}
|
||||
case nir_intrinsic_load_ray_object_to_world: {
|
||||
unsigned c = nir_intrinsic_column(intr);
|
||||
nir_def *instance_node_addr = nir_load_var(b, vars->instance_addr);
|
||||
nir_def *rows[3];
|
||||
for (unsigned r = 0; r < 3; ++r)
|
||||
rows[r] = nir_build_load_global(
|
||||
b, 4, 32,
|
||||
nir_iadd_imm(b, instance_node_addr, offsetof(struct radv_bvh_instance_node, otw_matrix) + r * 16));
|
||||
ret = nir_vec3(b, nir_channel(b, rows[0], c), nir_channel(b, rows[1], c), nir_channel(b, rows[2], c));
|
||||
nir_def *otw_matrix[3];
|
||||
radv_load_otw_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), otw_matrix);
|
||||
ret = nir_vec3(b, nir_channel(b, otw_matrix[0], c), nir_channel(b, otw_matrix[1], c),
|
||||
nir_channel(b, otw_matrix[2], c));
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_object_origin: {
|
||||
nir_def *instance_node_addr = nir_load_var(b, vars->instance_addr);
|
||||
nir_def *wto_matrix[3];
|
||||
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
|
||||
radv_load_wto_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), wto_matrix);
|
||||
ret = nir_build_vec3_mat_mult(b, nir_load_var(b, vars->origin), wto_matrix, true);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_object_direction: {
|
||||
nir_def *instance_node_addr = nir_load_var(b, vars->instance_addr);
|
||||
nir_def *wto_matrix[3];
|
||||
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
|
||||
radv_load_wto_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), wto_matrix);
|
||||
ret = nir_build_vec3_mat_mult(b, nir_load_var(b, vars->direction), wto_matrix, false);
|
||||
break;
|
||||
}
|
||||
|
|
@ -1526,6 +1515,8 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
|
|||
|
||||
struct rt_traversal_vars trav_vars = init_traversal_vars(b);
|
||||
|
||||
nir_def *cull_mask_and_flags = nir_load_var(b, vars->cull_mask_and_flags);
|
||||
|
||||
nir_store_var(b, trav_vars.hit, nir_imm_false(b), 1);
|
||||
|
||||
nir_def *accel_struct = nir_load_var(b, vars->accel_struct);
|
||||
|
|
@ -1533,7 +1524,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
|
|||
b, 1, 32, nir_iadd_imm(b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)),
|
||||
.access = ACCESS_NON_WRITEABLE);
|
||||
nir_def *root_bvh_base = nir_iadd(b, accel_struct, nir_u2u64(b, bvh_offset));
|
||||
root_bvh_base = build_addr_to_node(b, root_bvh_base);
|
||||
root_bvh_base = build_addr_to_node(device, b, root_bvh_base, cull_mask_and_flags);
|
||||
|
||||
nir_store_var(b, trav_vars.bvh_base, root_bvh_base, 1);
|
||||
|
||||
|
|
@ -1589,7 +1580,6 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
|
|||
.pipeline = pipeline,
|
||||
};
|
||||
|
||||
nir_def *cull_mask_and_flags = nir_load_var(b, vars->cull_mask_and_flags);
|
||||
struct radv_ray_traversal_args args = {
|
||||
.root_bvh_base = root_bvh_base,
|
||||
.flags = cull_mask_and_flags,
|
||||
|
|
@ -1617,7 +1607,10 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
|
|||
|
||||
nir_def *original_tmax = nir_load_var(b, vars->tmax);
|
||||
|
||||
radv_build_ray_traversal(device, b, &args);
|
||||
if (radv_use_bvh8(pdev))
|
||||
radv_build_ray_traversal_gfx12(device, b, &args);
|
||||
else
|
||||
radv_build_ray_traversal(device, b, &args);
|
||||
|
||||
if (vars->device->rra_trace.ray_history_addr)
|
||||
radv_build_end_trace_token(b, vars, original_tmax, nir_load_var(b, trav_vars.hit),
|
||||
|
|
|
|||
|
|
@ -16,10 +16,18 @@
|
|||
#include "vk_acceleration_structure.h"
|
||||
#include "vk_common_entrypoints.h"
|
||||
|
||||
static const uint32_t copy_blas_addrs_gfx12_spv[] = {
|
||||
#include "bvh/copy_blas_addrs_gfx12.spv.h"
|
||||
};
|
||||
|
||||
static const uint32_t copy_spv[] = {
|
||||
#include "bvh/copy.spv.h"
|
||||
};
|
||||
|
||||
static const uint32_t copy_gfx12_spv[] = {
|
||||
#include "bvh/copy_gfx12.spv.h"
|
||||
};
|
||||
|
||||
static const uint32_t encode_spv[] = {
|
||||
#include "bvh/encode.spv.h"
|
||||
};
|
||||
|
|
@ -28,6 +36,10 @@ static const uint32_t encode_compact_spv[] = {
|
|||
#include "bvh/encode_compact.spv.h"
|
||||
};
|
||||
|
||||
static const uint32_t encode_gfx12_spv[] = {
|
||||
#include "bvh/encode_gfx12.spv.h"
|
||||
};
|
||||
|
||||
static const uint32_t header_spv[] = {
|
||||
#include "bvh/header.spv.h"
|
||||
};
|
||||
|
|
@ -36,6 +48,10 @@ static const uint32_t update_spv[] = {
|
|||
#include "bvh/update.spv.h"
|
||||
};
|
||||
|
||||
static const uint32_t update_gfx12_spv[] = {
|
||||
#include "bvh/update_gfx12.spv.h"
|
||||
};
|
||||
|
||||
static const uint32_t leaf_spv[] = {
|
||||
#include "bvh/radv_leaf.spv.h"
|
||||
};
|
||||
|
|
@ -47,6 +63,7 @@ static const uint32_t leaf_always_active_spv[] = {
|
|||
struct acceleration_structure_layout {
|
||||
uint32_t geometry_info_offset;
|
||||
uint32_t primitive_base_indices_offset;
|
||||
uint32_t leaf_node_offsets_offset;
|
||||
uint32_t bvh_offset;
|
||||
uint32_t leaf_nodes_offset;
|
||||
uint32_t internal_nodes_offset;
|
||||
|
|
@ -68,26 +85,50 @@ radv_get_acceleration_structure_layout(struct radv_device *device, uint32_t leaf
|
|||
const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
|
||||
struct acceleration_structure_layout *accel_struct)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
uint32_t internal_count = MAX2(leaf_count, 2) - 1;
|
||||
|
||||
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(build_info);
|
||||
|
||||
uint32_t bvh_leaf_size;
|
||||
switch (geometry_type) {
|
||||
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
|
||||
bvh_leaf_size = sizeof(struct radv_bvh_triangle_node);
|
||||
break;
|
||||
case VK_GEOMETRY_TYPE_AABBS_KHR:
|
||||
bvh_leaf_size = sizeof(struct radv_bvh_aabb_node);
|
||||
break;
|
||||
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
|
||||
bvh_leaf_size = sizeof(struct radv_bvh_instance_node);
|
||||
break;
|
||||
default:
|
||||
unreachable("Unknown VkGeometryTypeKHR");
|
||||
uint32_t bvh_node_size_gcd;
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
switch (geometry_type) {
|
||||
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
|
||||
bvh_leaf_size = sizeof(struct radv_gfx12_primitive_node);
|
||||
break;
|
||||
case VK_GEOMETRY_TYPE_AABBS_KHR:
|
||||
bvh_leaf_size = sizeof(struct radv_gfx12_primitive_node);
|
||||
break;
|
||||
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
|
||||
bvh_leaf_size = sizeof(struct radv_gfx12_instance_node) + sizeof(struct radv_gfx12_instance_node_user_data);
|
||||
break;
|
||||
default:
|
||||
unreachable("Unknown VkGeometryTypeKHR");
|
||||
}
|
||||
bvh_node_size_gcd = RADV_GFX12_BVH_NODE_SIZE;
|
||||
} else {
|
||||
switch (geometry_type) {
|
||||
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
|
||||
bvh_leaf_size = sizeof(struct radv_bvh_triangle_node);
|
||||
break;
|
||||
case VK_GEOMETRY_TYPE_AABBS_KHR:
|
||||
bvh_leaf_size = sizeof(struct radv_bvh_aabb_node);
|
||||
break;
|
||||
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
|
||||
bvh_leaf_size = sizeof(struct radv_bvh_instance_node);
|
||||
break;
|
||||
default:
|
||||
unreachable("Unknown VkGeometryTypeKHR");
|
||||
}
|
||||
bvh_node_size_gcd = 64;
|
||||
}
|
||||
|
||||
uint64_t bvh_size = bvh_leaf_size * leaf_count + sizeof(struct radv_bvh_box32_node) * internal_count;
|
||||
uint32_t internal_node_size =
|
||||
radv_use_bvh8(pdev) ? sizeof(struct radv_gfx12_box_node) : sizeof(struct radv_bvh_box32_node);
|
||||
|
||||
uint64_t bvh_size = bvh_leaf_size * leaf_count + internal_node_size * internal_count;
|
||||
uint32_t offset = 0;
|
||||
offset += sizeof(struct radv_accel_struct_header);
|
||||
|
||||
|
|
@ -101,23 +142,30 @@ radv_get_acceleration_structure_layout(struct radv_device *device, uint32_t leaf
|
|||
offset += sizeof(uint32_t) * build_info->geometryCount;
|
||||
}
|
||||
|
||||
/* On GFX12, we need additional space for leaf node offsets since they do not have the same
|
||||
* order as the application provided data.
|
||||
*/
|
||||
accel_struct->leaf_node_offsets_offset = offset;
|
||||
if (radv_use_bvh8(pdev))
|
||||
offset += leaf_count * 4;
|
||||
|
||||
/* Parent links, which have to go directly before bvh_offset as we index them using negative
|
||||
* offsets from there. */
|
||||
offset += bvh_size / 64 * 4;
|
||||
offset += bvh_size / bvh_node_size_gcd * 4;
|
||||
|
||||
/* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
|
||||
offset = ALIGN(offset, 64);
|
||||
accel_struct->bvh_offset = offset;
|
||||
|
||||
/* root node */
|
||||
offset += sizeof(struct radv_bvh_box32_node);
|
||||
offset += internal_node_size;
|
||||
|
||||
accel_struct->leaf_nodes_offset = offset;
|
||||
offset += bvh_leaf_size * leaf_count;
|
||||
|
||||
accel_struct->internal_nodes_offset = offset;
|
||||
/* Factor out the root node. */
|
||||
offset += sizeof(struct radv_bvh_box32_node) * (internal_count - 1);
|
||||
offset += internal_node_size * (internal_count - 1);
|
||||
|
||||
accel_struct->size = offset;
|
||||
}
|
||||
|
|
@ -134,7 +182,7 @@ radv_get_scratch_layout(struct radv_device *device, uint32_t leaf_count, struct
|
|||
|
||||
uint32_t update_offset = 0;
|
||||
|
||||
update_offset += sizeof(vk_aabb) * leaf_count;
|
||||
update_offset += sizeof(vk_aabb) * (leaf_count + internal_count);
|
||||
scratch->internal_ready_count_offset = update_offset;
|
||||
|
||||
update_offset += sizeof(uint32_t) * internal_count;
|
||||
|
|
@ -154,6 +202,10 @@ radv_GetAccelerationStructureBuildSizesKHR(VkDevice _device, VkAccelerationStruc
|
|||
STATIC_ASSERT(sizeof(struct radv_bvh_instance_node) == 128);
|
||||
STATIC_ASSERT(sizeof(struct radv_bvh_box16_node) == 64);
|
||||
STATIC_ASSERT(sizeof(struct radv_bvh_box32_node) == 128);
|
||||
STATIC_ASSERT(sizeof(struct radv_gfx12_box_node) == RADV_GFX12_BVH_NODE_SIZE);
|
||||
STATIC_ASSERT(sizeof(struct radv_gfx12_primitive_node) == RADV_GFX12_BVH_NODE_SIZE);
|
||||
STATIC_ASSERT(sizeof(struct radv_gfx12_instance_node) == RADV_GFX12_BVH_NODE_SIZE);
|
||||
STATIC_ASSERT(sizeof(struct radv_gfx12_instance_node_user_data) == RADV_GFX12_BVH_NODE_SIZE);
|
||||
|
||||
if (radv_device_init_accel_struct_build_state(device) != VK_SUCCESS)
|
||||
return;
|
||||
|
|
@ -170,6 +222,7 @@ radv_device_finish_accel_struct_build_state(struct radv_device *device)
|
|||
struct vk_device_dispatch_table *dispatch = &device->vk.dispatch_table;
|
||||
|
||||
dispatch->DestroyPipeline(_device, state->accel_struct_build.copy_pipeline, &state->alloc);
|
||||
dispatch->DestroyPipeline(_device, state->accel_struct_build.copy_blas_addrs_gfx12_pipeline, &state->alloc);
|
||||
dispatch->DestroyPipeline(_device, state->accel_struct_build.encode_pipeline, &state->alloc);
|
||||
dispatch->DestroyPipeline(_device, state->accel_struct_build.encode_compact_pipeline, &state->alloc);
|
||||
dispatch->DestroyPipeline(_device, state->accel_struct_build.header_pipeline, &state->alloc);
|
||||
|
|
@ -257,7 +310,11 @@ radv_device_init_null_accel_struct(struct radv_device *device)
|
|||
VkDevice _device = radv_device_to_handle(device);
|
||||
|
||||
uint32_t bvh_offset = ALIGN(sizeof(struct radv_accel_struct_header), 64);
|
||||
uint32_t size = bvh_offset + sizeof(struct radv_bvh_box32_node);
|
||||
uint32_t size = bvh_offset;
|
||||
if (radv_use_bvh8(pdev))
|
||||
size += sizeof(struct radv_gfx12_box_node);
|
||||
else
|
||||
size += sizeof(struct radv_bvh_box32_node);
|
||||
|
||||
VkResult result;
|
||||
|
||||
|
|
@ -321,28 +378,44 @@ radv_device_init_null_accel_struct(struct radv_device *device)
|
|||
};
|
||||
memcpy(data, &header, sizeof(struct radv_accel_struct_header));
|
||||
|
||||
struct radv_bvh_box32_node root = {
|
||||
.children =
|
||||
{
|
||||
RADV_BVH_INVALID_NODE,
|
||||
RADV_BVH_INVALID_NODE,
|
||||
RADV_BVH_INVALID_NODE,
|
||||
RADV_BVH_INVALID_NODE,
|
||||
},
|
||||
};
|
||||
|
||||
for (uint32_t child = 0; child < 4; child++) {
|
||||
root.coords[child] = (vk_aabb){
|
||||
.min.x = NAN,
|
||||
.min.y = NAN,
|
||||
.min.z = NAN,
|
||||
.max.x = NAN,
|
||||
.max.y = NAN,
|
||||
.max.z = NAN,
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
struct radv_gfx12_box_node root = {
|
||||
.obb_matrix_index = 0x7f,
|
||||
};
|
||||
}
|
||||
|
||||
memcpy((uint8_t *)data + bvh_offset, &root, sizeof(struct radv_bvh_box32_node));
|
||||
for (uint32_t child = 0; child < 8; child++) {
|
||||
root.children[child] = (struct radv_gfx12_box_child){
|
||||
.dword0 = 0xffffffff,
|
||||
.dword1 = 0xfff,
|
||||
.dword2 = 0,
|
||||
};
|
||||
}
|
||||
|
||||
memcpy((uint8_t *)data + bvh_offset, &root, sizeof(struct radv_gfx12_box_node));
|
||||
} else {
|
||||
struct radv_bvh_box32_node root = {
|
||||
.children =
|
||||
{
|
||||
RADV_BVH_INVALID_NODE,
|
||||
RADV_BVH_INVALID_NODE,
|
||||
RADV_BVH_INVALID_NODE,
|
||||
RADV_BVH_INVALID_NODE,
|
||||
},
|
||||
};
|
||||
|
||||
for (uint32_t child = 0; child < 4; child++) {
|
||||
root.coords[child] = (vk_aabb){
|
||||
.min.x = NAN,
|
||||
.min.y = NAN,
|
||||
.min.z = NAN,
|
||||
.max.x = NAN,
|
||||
.max.y = NAN,
|
||||
.max.z = NAN,
|
||||
};
|
||||
}
|
||||
|
||||
memcpy((uint8_t *)data + bvh_offset, &root, sizeof(struct radv_bvh_box32_node));
|
||||
}
|
||||
|
||||
vk_common_UnmapMemory(_device, memory);
|
||||
|
||||
|
|
@ -385,9 +458,15 @@ radv_get_update_scratch_size(struct vk_device *vk_device, uint32_t leaf_count)
|
|||
}
|
||||
|
||||
static uint32_t
|
||||
radv_get_encode_key(struct vk_device *device, VkAccelerationStructureTypeKHR type,
|
||||
radv_get_encode_key(struct vk_device *vk_device, VkAccelerationStructureTypeKHR type,
|
||||
VkBuildAccelerationStructureFlagBitsKHR flags)
|
||||
{
|
||||
struct radv_device *device = container_of(vk_device, struct radv_device, vk);
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (radv_use_bvh8(pdev))
|
||||
return RADV_ENCODE_KEY_COMPACT;
|
||||
|
||||
if (flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR)
|
||||
return RADV_ENCODE_KEY_COMPACT;
|
||||
|
||||
|
|
@ -401,9 +480,10 @@ radv_encode_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key)
|
|||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
|
||||
bool compact = key & RADV_ENCODE_KEY_COMPACT;
|
||||
device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
compact ? device->meta_state.accel_struct_build.encode_compact_pipeline
|
||||
: device->meta_state.accel_struct_build.encode_pipeline);
|
||||
VkPipeline pipeline = compact ? device->meta_state.accel_struct_build.encode_compact_pipeline
|
||||
: device->meta_state.accel_struct_build.encode_pipeline;
|
||||
|
||||
device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
|
@ -448,6 +528,47 @@ radv_encode_as(VkCommandBuffer commandBuffer, const VkAccelerationStructureBuild
|
|||
radv_compute_dispatch(cmd_buffer, &dispatch);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_encode_as_gfx12(VkCommandBuffer commandBuffer, const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
|
||||
const VkAccelerationStructureBuildRangeInfoKHR *build_range_infos,
|
||||
VkDeviceAddress intermediate_as_addr, VkDeviceAddress intermediate_header_addr,
|
||||
uint32_t leaf_count, uint32_t key, struct vk_acceleration_structure *dst)
|
||||
{
|
||||
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
|
||||
struct acceleration_structure_layout layout;
|
||||
radv_get_acceleration_structure_layout(device, leaf_count, build_info, &layout);
|
||||
|
||||
uint32_t dst_internal_nodes_offset = layout.internal_nodes_offset - layout.bvh_offset;
|
||||
uint32_t dst_leaf_nodes_offset = layout.leaf_nodes_offset - layout.bvh_offset;
|
||||
uint32_t offsets[2] = {dst_internal_nodes_offset, dst_leaf_nodes_offset};
|
||||
radv_update_buffer_cp(cmd_buffer, intermediate_header_addr + offsetof(struct vk_ir_header, dst_node_offset), offsets,
|
||||
sizeof(offsets));
|
||||
if (radv_device_physical(device)->info.cp_sdma_ge_use_system_memory_scope)
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2;
|
||||
|
||||
const struct encode_gfx12_args args = {
|
||||
.intermediate_bvh = intermediate_as_addr,
|
||||
.output_base = vk_acceleration_structure_get_va(dst),
|
||||
.header = intermediate_header_addr,
|
||||
.output_bvh_offset = layout.bvh_offset,
|
||||
.leaf_node_offsets_offset = layout.leaf_node_offsets_offset,
|
||||
.leaf_node_count = leaf_count,
|
||||
.geometry_type = vk_get_as_geometry_type(build_info),
|
||||
};
|
||||
vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.encode_p_layout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args);
|
||||
|
||||
struct radv_dispatch_info dispatch = {
|
||||
.unaligned = true,
|
||||
.ordered = true,
|
||||
.blocks = {MAX2(leaf_count, 1), 1, 1},
|
||||
};
|
||||
|
||||
radv_compute_dispatch(cmd_buffer, &dispatch);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
radv_init_header_bind_pipeline(VkCommandBuffer commandBuffer, uint32_t key)
|
||||
{
|
||||
|
|
@ -487,7 +608,7 @@ radv_init_header(VkCommandBuffer commandBuffer, const VkAccelerationStructureBui
|
|||
radv_get_acceleration_structure_layout(device, leaf_count, build_info, &layout);
|
||||
|
||||
if (key & RADV_ENCODE_KEY_COMPACT) {
|
||||
base = offsetof(struct radv_accel_struct_header, geometry_count);
|
||||
base = offsetof(struct radv_accel_struct_header, geometry_type);
|
||||
|
||||
struct header_args args = {
|
||||
.src = intermediate_header_addr,
|
||||
|
|
@ -506,6 +627,7 @@ radv_init_header(VkCommandBuffer commandBuffer, const VkAccelerationStructureBui
|
|||
|
||||
header.instance_offset = layout.bvh_offset + sizeof(struct radv_bvh_box32_node);
|
||||
header.instance_count = instance_count;
|
||||
header.leaf_node_offsets_offset = layout.leaf_node_offsets_offset;
|
||||
header.compacted_size = layout.size;
|
||||
|
||||
header.copy_dispatch_size[0] = DIV_ROUND_UP(header.compacted_size, 16 * 64);
|
||||
|
|
@ -520,6 +642,7 @@ radv_init_header(VkCommandBuffer commandBuffer, const VkAccelerationStructureBui
|
|||
sizeof(uint64_t) * header.instance_count;
|
||||
|
||||
header.build_flags = build_info->flags;
|
||||
header.geometry_type = vk_get_as_geometry_type(build_info);
|
||||
header.geometry_count = build_info->geometryCount;
|
||||
header.primitive_base_indices_offset = layout.primitive_base_indices_offset;
|
||||
|
||||
|
|
@ -674,26 +797,6 @@ static const struct radix_sort_vk_target_config radix_sort_config = {
|
|||
.scatter.block_rows = 14,
|
||||
};
|
||||
|
||||
static const struct vk_acceleration_structure_build_ops build_ops = {
|
||||
.begin_debug_marker = vk_accel_struct_cmd_begin_debug_marker,
|
||||
.end_debug_marker = vk_accel_struct_cmd_end_debug_marker,
|
||||
.get_as_size = radv_get_as_size,
|
||||
.get_update_scratch_size = radv_get_update_scratch_size,
|
||||
.get_encode_key[0] = radv_get_encode_key,
|
||||
.get_encode_key[1] = radv_get_encode_key,
|
||||
.encode_bind_pipeline[0] = radv_encode_bind_pipeline,
|
||||
.encode_bind_pipeline[1] = radv_init_header_bind_pipeline,
|
||||
.encode_as[0] = radv_encode_as,
|
||||
.encode_as[1] = radv_init_header,
|
||||
.init_update_scratch = radv_init_update_scratch,
|
||||
.update_bind_pipeline[0] = radv_update_bind_pipeline,
|
||||
.update_as[0] = radv_update_as,
|
||||
.leaf_spirv_override = leaf_spv,
|
||||
.leaf_spirv_override_size = sizeof(leaf_spv),
|
||||
.leaf_always_active_spirv_override = leaf_always_active_spv,
|
||||
.leaf_always_active_spirv_override_size = sizeof(leaf_always_active_spv),
|
||||
};
|
||||
|
||||
static void
|
||||
radv_write_buffer_cp(VkCommandBuffer commandBuffer, VkDeviceAddress addr, void *data, uint32_t size)
|
||||
{
|
||||
|
|
@ -729,24 +832,49 @@ radv_cmd_fill_buffer_addr(VkCommandBuffer commandBuffer, VkDeviceAddress addr, V
|
|||
VkResult
|
||||
radv_device_init_accel_struct_build_state(struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
VkResult result = VK_SUCCESS;
|
||||
mtx_lock(&device->meta_state.mtx);
|
||||
|
||||
if (device->meta_state.accel_struct_build.radix_sort)
|
||||
goto exit;
|
||||
|
||||
result = create_build_pipeline_spv(device, encode_spv, sizeof(encode_spv), sizeof(struct encode_args),
|
||||
&device->meta_state.accel_struct_build.encode_pipeline,
|
||||
&device->meta_state.accel_struct_build.encode_p_layout);
|
||||
if (result != VK_SUCCESS)
|
||||
goto exit;
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
result =
|
||||
create_build_pipeline_spv(device, encode_gfx12_spv, sizeof(encode_gfx12_spv), sizeof(struct encode_gfx12_args),
|
||||
&device->meta_state.accel_struct_build.encode_compact_pipeline,
|
||||
&device->meta_state.accel_struct_build.encode_p_layout);
|
||||
if (result != VK_SUCCESS)
|
||||
goto exit;
|
||||
|
||||
result =
|
||||
create_build_pipeline_spv(device, encode_compact_spv, sizeof(encode_compact_spv), sizeof(struct encode_args),
|
||||
&device->meta_state.accel_struct_build.encode_compact_pipeline,
|
||||
&device->meta_state.accel_struct_build.encode_p_layout);
|
||||
if (result != VK_SUCCESS)
|
||||
goto exit;
|
||||
result = create_build_pipeline_spv(device, update_gfx12_spv, sizeof(update_gfx12_spv), sizeof(struct update_args),
|
||||
&device->meta_state.accel_struct_build.update_pipeline,
|
||||
&device->meta_state.accel_struct_build.update_p_layout);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
goto exit;
|
||||
} else {
|
||||
result = create_build_pipeline_spv(device, encode_spv, sizeof(encode_spv), sizeof(struct encode_args),
|
||||
&device->meta_state.accel_struct_build.encode_pipeline,
|
||||
&device->meta_state.accel_struct_build.encode_p_layout);
|
||||
if (result != VK_SUCCESS)
|
||||
goto exit;
|
||||
|
||||
result =
|
||||
create_build_pipeline_spv(device, encode_compact_spv, sizeof(encode_compact_spv), sizeof(struct encode_args),
|
||||
&device->meta_state.accel_struct_build.encode_compact_pipeline,
|
||||
&device->meta_state.accel_struct_build.encode_p_layout);
|
||||
if (result != VK_SUCCESS)
|
||||
goto exit;
|
||||
|
||||
result = create_build_pipeline_spv(device, update_spv, sizeof(update_spv), sizeof(struct update_args),
|
||||
&device->meta_state.accel_struct_build.update_pipeline,
|
||||
&device->meta_state.accel_struct_build.update_p_layout);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
result = create_build_pipeline_spv(device, header_spv, sizeof(header_spv), sizeof(struct header_args),
|
||||
&device->meta_state.accel_struct_build.header_pipeline,
|
||||
|
|
@ -754,16 +882,36 @@ radv_device_init_accel_struct_build_state(struct radv_device *device)
|
|||
if (result != VK_SUCCESS)
|
||||
goto exit;
|
||||
|
||||
result = create_build_pipeline_spv(device, update_spv, sizeof(update_spv), sizeof(struct update_args),
|
||||
&device->meta_state.accel_struct_build.update_pipeline,
|
||||
&device->meta_state.accel_struct_build.update_p_layout);
|
||||
if (result != VK_SUCCESS)
|
||||
goto exit;
|
||||
|
||||
device->meta_state.accel_struct_build.radix_sort = vk_create_radix_sort_u64(
|
||||
radv_device_to_handle(device), &device->meta_state.alloc, device->meta_state.cache, radix_sort_config);
|
||||
|
||||
device->vk.as_build_ops = &build_ops;
|
||||
device->meta_state.accel_struct_build.build_ops = (struct vk_acceleration_structure_build_ops){
|
||||
.begin_debug_marker = vk_accel_struct_cmd_begin_debug_marker,
|
||||
.end_debug_marker = vk_accel_struct_cmd_end_debug_marker,
|
||||
.get_as_size = radv_get_as_size,
|
||||
.get_update_scratch_size = radv_get_update_scratch_size,
|
||||
.get_encode_key[0] = radv_get_encode_key,
|
||||
.get_encode_key[1] = radv_get_encode_key,
|
||||
.encode_bind_pipeline[0] = radv_encode_bind_pipeline,
|
||||
.encode_bind_pipeline[1] = radv_init_header_bind_pipeline,
|
||||
.encode_as[1] = radv_init_header,
|
||||
.init_update_scratch = radv_init_update_scratch,
|
||||
.update_bind_pipeline[0] = radv_update_bind_pipeline,
|
||||
.update_as[0] = radv_update_as,
|
||||
};
|
||||
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
device->meta_state.accel_struct_build.build_ops.encode_as[0] = radv_encode_as_gfx12;
|
||||
} else {
|
||||
device->meta_state.accel_struct_build.build_ops.encode_as[0] = radv_encode_as;
|
||||
device->meta_state.accel_struct_build.build_ops.leaf_spirv_override = leaf_spv;
|
||||
device->meta_state.accel_struct_build.build_ops.leaf_spirv_override_size = sizeof(leaf_spv);
|
||||
device->meta_state.accel_struct_build.build_ops.leaf_always_active_spirv_override = leaf_always_active_spv;
|
||||
device->meta_state.accel_struct_build.build_ops.leaf_always_active_spirv_override_size =
|
||||
sizeof(leaf_always_active_spv);
|
||||
}
|
||||
|
||||
device->vk.as_build_ops = &device->meta_state.accel_struct_build.build_ops;
|
||||
device->vk.write_buffer_cp = radv_write_buffer_cp;
|
||||
device->vk.flush_buffer_write_cp = radv_flush_buffer_write_cp;
|
||||
device->vk.cmd_dispatch_unaligned = radv_cmd_dispatch_unaligned;
|
||||
|
|
@ -783,12 +931,30 @@ exit:
|
|||
static VkResult
|
||||
radv_device_init_accel_struct_copy_state(struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
VkResult result;
|
||||
|
||||
mtx_lock(&device->meta_state.mtx);
|
||||
|
||||
VkResult result = create_build_pipeline_spv(device, copy_spv, sizeof(copy_spv), sizeof(struct copy_args),
|
||||
&device->meta_state.accel_struct_build.copy_pipeline,
|
||||
&device->meta_state.accel_struct_build.copy_p_layout);
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
result = create_build_pipeline_spv(device, copy_gfx12_spv, sizeof(copy_gfx12_spv), sizeof(struct copy_args),
|
||||
&device->meta_state.accel_struct_build.copy_pipeline,
|
||||
&device->meta_state.accel_struct_build.copy_p_layout);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
goto exit;
|
||||
|
||||
result = create_build_pipeline_spv(device, copy_blas_addrs_gfx12_spv, sizeof(copy_blas_addrs_gfx12_spv),
|
||||
sizeof(struct copy_args),
|
||||
&device->meta_state.accel_struct_build.copy_blas_addrs_gfx12_pipeline,
|
||||
&device->meta_state.accel_struct_build.copy_p_layout);
|
||||
} else {
|
||||
result = create_build_pipeline_spv(device, copy_spv, sizeof(copy_spv), sizeof(struct copy_args),
|
||||
&device->meta_state.accel_struct_build.copy_pipeline,
|
||||
&device->meta_state.accel_struct_build.copy_p_layout);
|
||||
}
|
||||
|
||||
exit:
|
||||
mtx_unlock(&device->meta_state.mtx);
|
||||
return result;
|
||||
}
|
||||
|
|
@ -879,6 +1045,7 @@ radv_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
|
|||
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_meta_saved_state saved_state;
|
||||
|
||||
VkResult result = radv_device_init_accel_struct_copy_state(device);
|
||||
|
|
@ -904,6 +1071,21 @@ radv_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
|
|||
sizeof(consts), &consts);
|
||||
|
||||
vk_common_CmdDispatch(commandBuffer, 512, 1, 1);
|
||||
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
/* Wait for the main copy dispatch to finish. */
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
|
||||
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_2_SHADER_WRITE_BIT, 0, NULL, NULL) |
|
||||
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_2_SHADER_READ_BIT, 0, NULL, NULL);
|
||||
|
||||
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
device->meta_state.accel_struct_build.copy_blas_addrs_gfx12_pipeline);
|
||||
|
||||
vk_common_CmdDispatch(commandBuffer, 256, 1, 1);
|
||||
}
|
||||
|
||||
radv_meta_restore(&saved_state, cmd_buffer);
|
||||
}
|
||||
|
||||
|
|
@ -945,6 +1127,20 @@ radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
|
|||
radv_CmdDispatchIndirect(commandBuffer, vk_buffer_to_handle(src->buffer),
|
||||
src->offset + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
|
||||
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
/* Wait for the main copy dispatch to finish. */
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
|
||||
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_2_SHADER_WRITE_BIT, 0, NULL, NULL) |
|
||||
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_2_SHADER_READ_BIT, 0, NULL, NULL);
|
||||
|
||||
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
device->meta_state.accel_struct_build.copy_blas_addrs_gfx12_pipeline);
|
||||
|
||||
vk_common_CmdDispatch(commandBuffer, 256, 1, 1);
|
||||
}
|
||||
|
||||
radv_meta_restore(&saved_state, cmd_buffer);
|
||||
|
||||
/* Set the header of the serialized data. */
|
||||
|
|
|
|||
|
|
@ -71,6 +71,7 @@ enum {
|
|||
RADV_DEBUG_DUMP_ASM = 1ull << 56,
|
||||
RADV_DEBUG_DUMP_BACKEND_IR = 1ull << 57,
|
||||
RADV_DEBUG_PSO_HISTORY = 1ull << 58,
|
||||
RADV_DEBUG_BVH4 = 1ull << 59,
|
||||
RADV_DEBUG_DUMP_SHADERS = RADV_DEBUG_DUMP_VS | RADV_DEBUG_DUMP_TCS | RADV_DEBUG_DUMP_TES | RADV_DEBUG_DUMP_GS |
|
||||
RADV_DEBUG_DUMP_PS | RADV_DEBUG_DUMP_TASK | RADV_DEBUG_DUMP_MESH | RADV_DEBUG_DUMP_CS |
|
||||
RADV_DEBUG_DUMP_NIR | RADV_DEBUG_DUMP_ASM | RADV_DEBUG_DUMP_BACKEND_IR,
|
||||
|
|
|
|||
|
|
@ -100,8 +100,10 @@ struct radv_meta_state {
|
|||
VkPipeline update_pipeline;
|
||||
VkPipelineLayout copy_p_layout;
|
||||
VkPipeline copy_pipeline;
|
||||
VkPipeline copy_blas_addrs_gfx12_pipeline;
|
||||
|
||||
struct radix_sort_vk *radix_sort;
|
||||
struct vk_acceleration_structure_build_ops build_ops;
|
||||
struct vk_acceleration_structure_build_args build_args;
|
||||
|
||||
struct {
|
||||
|
|
|
|||
|
|
@ -86,6 +86,7 @@ static const struct debug_control radv_debug_options[] = {{"nofastclears", RADV_
|
|||
{"asm", RADV_DEBUG_DUMP_ASM},
|
||||
{"ir", RADV_DEBUG_DUMP_BACKEND_IR},
|
||||
{"pso_history", RADV_DEBUG_PSO_HISTORY},
|
||||
{"bvh4", RADV_DEBUG_BVH4},
|
||||
{NULL, 0}};
|
||||
|
||||
const char *
|
||||
|
|
|
|||
|
|
@ -157,6 +157,13 @@ radv_emulate_rt(const struct radv_physical_device *pdev)
|
|||
return !pdev->info.has_image_bvh_intersect_ray && instance->drirc.emulate_rt;
|
||||
}
|
||||
|
||||
bool
|
||||
radv_use_bvh8(const struct radv_physical_device *pdev)
|
||||
{
|
||||
const struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
return pdev->info.gfx_level >= GFX12 && !radv_emulate_rt(pdev) && !(instance->debug_flags & RADV_DEBUG_BVH4);
|
||||
}
|
||||
|
||||
static void
|
||||
parse_hex(char *out, const char *in, unsigned length)
|
||||
{
|
||||
|
|
@ -186,6 +193,7 @@ radv_physical_device_init_cache_key(struct radv_physical_device *pdev)
|
|||
key->disable_sinking_load_input_fs = instance->drirc.disable_sinking_load_input_fs;
|
||||
key->disable_trunc_coord = instance->drirc.disable_trunc_coord;
|
||||
key->emulate_rt = radv_emulate_rt(pdev);
|
||||
key->bvh8 = radv_use_bvh8(pdev);
|
||||
key->ge_wave32 = pdev->ge_wave_size == 32;
|
||||
key->invariant_geom = !!(instance->debug_flags & RADV_DEBUG_INVARIANT_GEOM);
|
||||
key->no_fmask = !!(instance->debug_flags & RADV_DEBUG_NO_FMASK);
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ struct radv_physical_device_cache_key {
|
|||
uint32_t disable_sinking_load_input_fs : 1;
|
||||
uint32_t disable_trunc_coord : 1;
|
||||
uint32_t emulate_rt : 1;
|
||||
uint32_t bvh8 : 1;
|
||||
uint32_t ge_wave32 : 1;
|
||||
uint32_t invariant_geom : 1;
|
||||
uint32_t no_fmask : 1;
|
||||
|
|
@ -258,6 +259,8 @@ bool radv_enable_rt(const struct radv_physical_device *pdev);
|
|||
|
||||
bool radv_emulate_rt(const struct radv_physical_device *pdev);
|
||||
|
||||
bool radv_use_bvh8(const struct radv_physical_device *pdev);
|
||||
|
||||
uint32_t radv_find_memory_index(const struct radv_physical_device *pdev, VkMemoryPropertyFlags flags);
|
||||
|
||||
VkResult create_null_physical_device(struct vk_instance *vk_instance);
|
||||
|
|
|
|||
|
|
@ -184,8 +184,8 @@ rra_dump_asic_info(const struct radeon_info *gpu_info, FILE *output)
|
|||
}
|
||||
|
||||
static struct rra_accel_struct_header
|
||||
rra_fill_accel_struct_header_common(struct radv_accel_struct_header *header, size_t parent_id_table_size,
|
||||
size_t leaf_node_data_size, size_t internal_node_data_size,
|
||||
rra_fill_accel_struct_header_common(const struct radv_physical_device *pdev, struct radv_accel_struct_header *header,
|
||||
size_t parent_id_table_size, struct rra_bvh_info *bvh_info,
|
||||
uint64_t primitive_count)
|
||||
{
|
||||
struct rra_accel_struct_header result = {
|
||||
|
|
@ -199,32 +199,39 @@ rra_fill_accel_struct_header_common(struct radv_accel_struct_header *header, siz
|
|||
/* TODO: calculate active primitives */
|
||||
.active_primitive_count = primitive_count,
|
||||
.geometry_description_count = header->geometry_count,
|
||||
.interior_fp32_node_count = internal_node_data_size / sizeof(struct radv_bvh_box32_node),
|
||||
.interior_fp32_node_count = bvh_info->internal_nodes_size / sizeof(struct radv_bvh_box32_node),
|
||||
.leaf_node_count = primitive_count,
|
||||
.rt_driver_interface_version = 8 << 16,
|
||||
.rt_ip_version = pdev->info.rt_ip_version,
|
||||
};
|
||||
|
||||
if (!radv_use_bvh8(pdev))
|
||||
result.rt_ip_version = MIN2(result.rt_ip_version, RT_1_1);
|
||||
|
||||
result.metadata_size = sizeof(struct rra_accel_struct_metadata) + parent_id_table_size;
|
||||
result.file_size =
|
||||
result.metadata_size + sizeof(struct rra_accel_struct_header) + internal_node_data_size + leaf_node_data_size;
|
||||
result.file_size = result.metadata_size + sizeof(struct rra_accel_struct_header) + bvh_info->internal_nodes_size +
|
||||
bvh_info->leaf_nodes_size;
|
||||
|
||||
result.internal_nodes_offset = sizeof(struct rra_accel_struct_metadata);
|
||||
result.leaf_nodes_offset = result.internal_nodes_offset + internal_node_data_size;
|
||||
result.geometry_infos_offset = result.leaf_nodes_offset + leaf_node_data_size;
|
||||
result.leaf_nodes_offset = result.internal_nodes_offset + bvh_info->internal_nodes_size;
|
||||
result.geometry_infos_offset = result.leaf_nodes_offset + bvh_info->leaf_nodes_size;
|
||||
result.leaf_ids_offset = result.geometry_infos_offset;
|
||||
if (!header->instance_count)
|
||||
if (header->instance_count) {
|
||||
if (radv_use_bvh8(pdev))
|
||||
result.leaf_ids_offset += bvh_info->instance_sideband_data_size;
|
||||
} else {
|
||||
result.leaf_ids_offset += header->geometry_count * sizeof(struct rra_geometry_info);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void
|
||||
rra_dump_tlas_header(struct radv_accel_struct_header *header, size_t parent_id_table_size, size_t leaf_node_data_size,
|
||||
size_t internal_node_data_size, uint64_t primitive_count, FILE *output)
|
||||
rra_dump_tlas_header(const struct radv_physical_device *pdev, struct radv_accel_struct_header *header,
|
||||
size_t parent_id_table_size, struct rra_bvh_info *bvh_info, uint64_t primitive_count, FILE *output)
|
||||
{
|
||||
struct rra_accel_struct_header file_header = rra_fill_accel_struct_header_common(
|
||||
header, parent_id_table_size, leaf_node_data_size, internal_node_data_size, primitive_count);
|
||||
struct rra_accel_struct_header file_header =
|
||||
rra_fill_accel_struct_header_common(pdev, header, parent_id_table_size, bvh_info, primitive_count);
|
||||
file_header.post_build_info.bvh_type = RRA_BVH_TYPE_TLAS;
|
||||
file_header.geometry_type = VK_GEOMETRY_TYPE_INSTANCES_KHR;
|
||||
|
||||
|
|
@ -232,12 +239,12 @@ rra_dump_tlas_header(struct radv_accel_struct_header *header, size_t parent_id_t
|
|||
}
|
||||
|
||||
static void
|
||||
rra_dump_blas_header(struct radv_accel_struct_header *header, size_t parent_id_table_size,
|
||||
struct radv_accel_struct_geometry_info *geometry_infos, size_t leaf_node_data_size,
|
||||
size_t internal_node_data_size, uint64_t primitive_count, FILE *output)
|
||||
rra_dump_blas_header(const struct radv_physical_device *pdev, struct radv_accel_struct_header *header,
|
||||
size_t parent_id_table_size, struct radv_accel_struct_geometry_info *geometry_infos,
|
||||
struct rra_bvh_info *bvh_info, uint64_t primitive_count, FILE *output)
|
||||
{
|
||||
struct rra_accel_struct_header file_header = rra_fill_accel_struct_header_common(
|
||||
header, parent_id_table_size, leaf_node_data_size, internal_node_data_size, primitive_count);
|
||||
struct rra_accel_struct_header file_header =
|
||||
rra_fill_accel_struct_header_common(pdev, header, parent_id_table_size, bvh_info, primitive_count);
|
||||
file_header.post_build_info.bvh_type = RRA_BVH_TYPE_BLAS;
|
||||
file_header.geometry_type = header->geometry_count ? geometry_infos->type : VK_GEOMETRY_TYPE_TRIANGLES_KHR;
|
||||
|
||||
|
|
@ -281,7 +288,8 @@ rra_validate_header(struct radv_rra_accel_struct_data *accel_struct, const struc
|
|||
}
|
||||
|
||||
static VkResult
|
||||
rra_dump_acceleration_structure(struct radv_rra_accel_struct_data *accel_struct, uint8_t *data,
|
||||
rra_dump_acceleration_structure(const struct radv_physical_device *pdev,
|
||||
struct radv_rra_accel_struct_data *accel_struct, uint8_t *data,
|
||||
struct hash_table_u64 *accel_struct_vas, bool should_validate, FILE *output)
|
||||
{
|
||||
struct radv_accel_struct_header *header = (struct radv_accel_struct_header *)data;
|
||||
|
|
@ -297,10 +305,18 @@ rra_dump_acceleration_structure(struct radv_rra_accel_struct_data *accel_struct,
|
|||
if (rra_validate_header(accel_struct, header)) {
|
||||
return VK_ERROR_VALIDATION_FAILED_EXT;
|
||||
}
|
||||
if (rra_validate_node_gfx10_3(accel_struct_vas, data + header->bvh_offset,
|
||||
data + header->bvh_offset + src_root_offset, header->geometry_count,
|
||||
accel_struct->size, !is_tlas, 0)) {
|
||||
return VK_ERROR_VALIDATION_FAILED_EXT;
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
if (rra_validate_node_gfx12(accel_struct_vas, data + header->bvh_offset,
|
||||
data + header->bvh_offset + src_root_offset, header->geometry_count,
|
||||
accel_struct->size, !is_tlas, 0)) {
|
||||
return VK_ERROR_VALIDATION_FAILED_EXT;
|
||||
}
|
||||
} else {
|
||||
if (rra_validate_node_gfx10_3(accel_struct_vas, data + header->bvh_offset,
|
||||
data + header->bvh_offset + src_root_offset, header->geometry_count,
|
||||
accel_struct->size, !is_tlas, 0)) {
|
||||
return VK_ERROR_VALIDATION_FAILED_EXT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -321,7 +337,10 @@ rra_dump_acceleration_structure(struct radv_rra_accel_struct_data *accel_struct,
|
|||
struct rra_bvh_info bvh_info = {
|
||||
.geometry_infos = rra_geometry_infos,
|
||||
};
|
||||
rra_gather_bvh_info_gfx10_3(data + header->bvh_offset, RADV_BVH_ROOT_NODE, &bvh_info);
|
||||
if (radv_use_bvh8(pdev))
|
||||
rra_gather_bvh_info_gfx12(data + header->bvh_offset, RADV_BVH_ROOT_NODE, &bvh_info);
|
||||
else
|
||||
rra_gather_bvh_info_gfx10_3(data + header->bvh_offset, RADV_BVH_ROOT_NODE, &bvh_info);
|
||||
|
||||
leaf_indices = calloc(header->geometry_count, sizeof(struct rra_geometry_info));
|
||||
if (!leaf_indices) {
|
||||
|
|
@ -343,6 +362,8 @@ rra_dump_acceleration_structure(struct radv_rra_accel_struct_data *accel_struct,
|
|||
|
||||
uint32_t node_parent_table_size =
|
||||
((bvh_info.leaf_nodes_size + bvh_info.internal_nodes_size) / 64) * sizeof(uint32_t);
|
||||
if (radv_use_bvh8(pdev))
|
||||
node_parent_table_size = 0;
|
||||
|
||||
node_parent_table = calloc(node_parent_table_size, 1);
|
||||
if (!node_parent_table) {
|
||||
|
|
@ -355,7 +376,9 @@ rra_dump_acceleration_structure(struct radv_rra_accel_struct_data *accel_struct,
|
|||
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
dst_structure_data = calloc(RRA_ROOT_NODE_OFFSET + bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size, 1);
|
||||
dst_structure_data = calloc(RRA_ROOT_NODE_OFFSET + bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size +
|
||||
bvh_info.instance_sideband_data_size,
|
||||
1);
|
||||
if (!dst_structure_data) {
|
||||
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
|
|
@ -366,13 +389,20 @@ rra_dump_acceleration_structure(struct radv_rra_accel_struct_data *accel_struct,
|
|||
.dst = dst_structure_data,
|
||||
.dst_leaf_offset = RRA_ROOT_NODE_OFFSET + bvh_info.internal_nodes_size,
|
||||
.dst_internal_offset = RRA_ROOT_NODE_OFFSET,
|
||||
.dst_instance_sideband_data_offset =
|
||||
RRA_ROOT_NODE_OFFSET + bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size,
|
||||
.parent_id_table = node_parent_table,
|
||||
.parent_id_table_size = node_parent_table_size,
|
||||
.leaf_node_ids = leaf_node_ids,
|
||||
.leaf_indices = leaf_indices,
|
||||
};
|
||||
|
||||
rra_transcode_node_gfx10_3(&ctx, 0xFFFFFFFF, RADV_BVH_ROOT_NODE, header->aabb);
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
ctx.dst_internal_offset += sizeof(struct radv_gfx12_box_node);
|
||||
rra_transcode_node_gfx12(&ctx, 0xFFFFFFFF, RADV_BVH_ROOT_NODE, RRA_ROOT_NODE_OFFSET);
|
||||
} else {
|
||||
rra_transcode_node_gfx10_3(&ctx, 0xFFFFFFFF, RADV_BVH_ROOT_NODE, header->aabb);
|
||||
}
|
||||
|
||||
struct rra_accel_struct_chunk_header chunk_header = {
|
||||
.metadata_offset = 0,
|
||||
|
|
@ -395,8 +425,12 @@ rra_dump_acceleration_structure(struct radv_rra_accel_struct_data *accel_struct,
|
|||
* the top bits are masked away.
|
||||
* In order to make sure BLASes can be found in the hashmap, we have
|
||||
* to replicate that mask here.
|
||||
* On GFX12, we mask away the top 16 bits because the instance BLAS addresses
|
||||
* use pointer flags.
|
||||
*/
|
||||
uint64_t va = accel_struct->va & 0x1FFFFFFFFFFFFFF;
|
||||
if (radv_use_bvh8(pdev))
|
||||
va &= 0xFFFFFFFFFFFF;
|
||||
memcpy(chunk_header.virtual_address, &va, sizeof(uint64_t));
|
||||
|
||||
struct rra_accel_struct_metadata rra_metadata = {
|
||||
|
|
@ -411,15 +445,13 @@ rra_dump_acceleration_structure(struct radv_rra_accel_struct_data *accel_struct,
|
|||
fwrite(node_parent_table, 1, node_parent_table_size, output);
|
||||
|
||||
if (is_tlas)
|
||||
rra_dump_tlas_header(header, node_parent_table_size, bvh_info.leaf_nodes_size, bvh_info.internal_nodes_size,
|
||||
primitive_count, output);
|
||||
rra_dump_tlas_header(pdev, header, node_parent_table_size, &bvh_info, primitive_count, output);
|
||||
else
|
||||
rra_dump_blas_header(header, node_parent_table_size, geometry_infos, bvh_info.leaf_nodes_size,
|
||||
bvh_info.internal_nodes_size, primitive_count, output);
|
||||
rra_dump_blas_header(pdev, header, node_parent_table_size, geometry_infos, &bvh_info, primitive_count, output);
|
||||
|
||||
/* Write acceleration structure data */
|
||||
fwrite(dst_structure_data + RRA_ROOT_NODE_OFFSET, 1, bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size,
|
||||
output);
|
||||
fwrite(dst_structure_data + RRA_ROOT_NODE_OFFSET, 1,
|
||||
bvh_info.internal_nodes_size + bvh_info.leaf_nodes_size + bvh_info.instance_sideband_data_size, output);
|
||||
|
||||
if (!is_tlas)
|
||||
fwrite(rra_geometry_infos, sizeof(struct rra_geometry_info), header->geometry_count, output);
|
||||
|
|
@ -974,7 +1006,7 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename)
|
|||
continue;
|
||||
|
||||
accel_struct_offsets[written_accel_struct_count] = (uint64_t)ftell(file);
|
||||
result = rra_dump_acceleration_structure(data, mapped_data, device->rra_trace.accel_struct_vas,
|
||||
result = rra_dump_acceleration_structure(pdev, data, mapped_data, device->rra_trace.accel_struct_vas,
|
||||
device->rra_trace.validate_as, file);
|
||||
|
||||
rra_unmap_accel_struct_data(©_ctx, i);
|
||||
|
|
|
|||
|
|
@ -285,6 +285,7 @@ radv_node_to_addr(uint64_t node)
|
|||
struct rra_bvh_info {
|
||||
uint32_t leaf_nodes_size;
|
||||
uint32_t internal_nodes_size;
|
||||
uint32_t instance_sideband_data_size;
|
||||
struct rra_geometry_info *geometry_infos;
|
||||
};
|
||||
|
||||
|
|
@ -293,6 +294,7 @@ struct rra_transcoding_context {
|
|||
uint8_t *dst;
|
||||
uint32_t dst_leaf_offset;
|
||||
uint32_t dst_internal_offset;
|
||||
uint32_t dst_instance_sideband_data_offset;
|
||||
uint32_t *parent_id_table;
|
||||
uint32_t parent_id_table_size;
|
||||
uint32_t *leaf_node_ids;
|
||||
|
|
@ -307,4 +309,12 @@ void rra_gather_bvh_info_gfx10_3(const uint8_t *bvh, uint32_t node_id, struct rr
|
|||
uint32_t rra_transcode_node_gfx10_3(struct rra_transcoding_context *ctx, uint32_t parent_id, uint32_t src_id,
|
||||
vk_aabb bounds);
|
||||
|
||||
bool rra_validate_node_gfx12(struct hash_table_u64 *accel_struct_vas, uint8_t *data, void *node,
|
||||
uint32_t geometry_count, uint32_t size, bool is_bottom_level, uint32_t depth);
|
||||
|
||||
void rra_gather_bvh_info_gfx12(const uint8_t *bvh, uint32_t node_id, struct rra_bvh_info *dst);
|
||||
|
||||
void rra_transcode_node_gfx12(struct rra_transcoding_context *ctx, uint32_t parent_id, uint32_t src_id,
|
||||
uint32_t dst_offset);
|
||||
|
||||
#endif /* RADV_RRA_H */
|
||||
|
|
|
|||
184
src/amd/vulkan/radv_rra_gfx12.c
Normal file
184
src/amd/vulkan/radv_rra_gfx12.c
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
/*
|
||||
* Copyright © 2025 Valve Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
/* GFX12 specific code for RRA. */
|
||||
|
||||
#include "bvh/bvh.h"
|
||||
#include "radv_rra.h"
|
||||
|
||||
#include "util/bitset.h"
|
||||
|
||||
struct rra_instance_sideband_data {
|
||||
uint32_t instance_index;
|
||||
uint32_t custom_instance_and_flags;
|
||||
uint32_t blas_metadata_size;
|
||||
uint32_t padding;
|
||||
mat3x4 otw_matrix;
|
||||
};
|
||||
|
||||
bool
|
||||
rra_validate_node_gfx12(struct hash_table_u64 *accel_struct_vas, uint8_t *data, void *node, uint32_t geometry_count,
|
||||
uint32_t size, bool is_bottom_level, uint32_t depth)
|
||||
{
|
||||
struct rra_validation_context ctx = {0};
|
||||
|
||||
if (depth > 1024) {
|
||||
rra_validation_fail(&ctx, "depth > 1024");
|
||||
return true;
|
||||
}
|
||||
|
||||
uint32_t cur_offset = (uint8_t *)node - data;
|
||||
snprintf(ctx.location, sizeof(ctx.location), "internal node (offset=%u)", cur_offset);
|
||||
|
||||
return ctx.failed;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
get_geometry_id(const void *node, uint32_t node_type)
|
||||
{
|
||||
if (node_type == radv_bvh_node_instance)
|
||||
return 0;
|
||||
|
||||
uint32_t indices_midpoint = BITSET_EXTRACT(node, 42, 10);
|
||||
return BITSET_EXTRACT(node, indices_midpoint - 28, 28);
|
||||
}
|
||||
|
||||
void
|
||||
rra_gather_bvh_info_gfx12(const uint8_t *bvh, uint32_t node_id, struct rra_bvh_info *dst)
|
||||
{
|
||||
uint32_t node_type = node_id & 7;
|
||||
|
||||
switch (node_type) {
|
||||
case radv_bvh_node_box32:
|
||||
dst->internal_nodes_size += sizeof(struct radv_gfx12_box_node);
|
||||
break;
|
||||
case radv_bvh_node_instance:
|
||||
dst->leaf_nodes_size += sizeof(struct radv_gfx12_instance_node);
|
||||
dst->instance_sideband_data_size += sizeof(struct rra_instance_sideband_data);
|
||||
break;
|
||||
case radv_bvh_node_triangle:
|
||||
dst->leaf_nodes_size += sizeof(struct radv_gfx12_primitive_node);
|
||||
break;
|
||||
default:
|
||||
unreachable("Invalid node type");
|
||||
break;
|
||||
}
|
||||
|
||||
const void *node = bvh + ((node_id & (~7u)) << 3);
|
||||
if (node_type == radv_bvh_node_box32) {
|
||||
const struct radv_gfx12_box_node *src = node;
|
||||
|
||||
uint32_t valid_child_count_minus_one = src->child_count_exponents >> 28;
|
||||
|
||||
uint32_t internal_id = src->internal_base_id;
|
||||
uint32_t primitive_id = src->primitive_base_id;
|
||||
for (uint32_t i = 0; i <= valid_child_count_minus_one; i++) {
|
||||
uint32_t child_type = (src->children[i].dword2 >> 24) & 0xf;
|
||||
uint32_t child_size = src->children[i].dword2 >> 28;
|
||||
|
||||
uint32_t child_id;
|
||||
if (child_type == radv_bvh_node_box32) {
|
||||
child_id = internal_id | child_type;
|
||||
internal_id += (child_size * RADV_GFX12_BVH_NODE_SIZE) >> 3;
|
||||
} else {
|
||||
child_id = primitive_id | child_type;
|
||||
primitive_id += (child_size * RADV_GFX12_BVH_NODE_SIZE) >> 3;
|
||||
}
|
||||
|
||||
rra_gather_bvh_info_gfx12(bvh, child_id, dst);
|
||||
}
|
||||
} else {
|
||||
dst->geometry_infos[get_geometry_id(node, node_type)].primitive_count++;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
rra_transcode_box8_node(struct rra_transcoding_context *ctx, const struct radv_gfx12_box_node *src, uint32_t parent_id,
|
||||
uint32_t dst_offset)
|
||||
{
|
||||
struct radv_gfx12_box_node *dst = (struct radv_gfx12_box_node *)(ctx->dst + dst_offset);
|
||||
|
||||
memcpy(dst, src, sizeof(struct radv_gfx12_box_node));
|
||||
dst->internal_base_id = ctx->dst_internal_offset >> 3;
|
||||
dst->primitive_base_id = ctx->dst_leaf_offset >> 3;
|
||||
dst->unused = parent_id;
|
||||
|
||||
uint32_t valid_child_count_minus_one = dst->child_count_exponents >> 28;
|
||||
|
||||
uint32_t internal_child_count = 0;
|
||||
uint32_t leaf_child_count = 0;
|
||||
for (uint32_t i = 0; i <= valid_child_count_minus_one; ++i) {
|
||||
uint32_t child_type = (src->children[i].dword2 >> 24) & 0xf;
|
||||
if (child_type == radv_bvh_node_box32)
|
||||
internal_child_count++;
|
||||
else
|
||||
leaf_child_count++;
|
||||
}
|
||||
|
||||
uint32_t dst_internal_offset = ctx->dst_internal_offset;
|
||||
ctx->dst_internal_offset += internal_child_count * RADV_GFX12_BVH_NODE_SIZE;
|
||||
|
||||
uint32_t dst_leaf_offset = ctx->dst_leaf_offset;
|
||||
ctx->dst_leaf_offset += leaf_child_count * RADV_GFX12_BVH_NODE_SIZE;
|
||||
|
||||
uint32_t internal_id = src->internal_base_id;
|
||||
uint32_t primitive_id = src->primitive_base_id;
|
||||
for (uint32_t i = 0; i <= valid_child_count_minus_one; ++i) {
|
||||
uint32_t child_type = (src->children[i].dword2 >> 24) & 0xf;
|
||||
uint32_t child_size = src->children[i].dword2 >> 28;
|
||||
|
||||
uint32_t child_id;
|
||||
uint32_t child_dst_offset;
|
||||
if (child_type == radv_bvh_node_box32) {
|
||||
child_id = internal_id | child_type;
|
||||
internal_id += (child_size * RADV_GFX12_BVH_NODE_SIZE) >> 3;
|
||||
child_dst_offset = dst_internal_offset;
|
||||
dst_internal_offset += RADV_GFX12_BVH_NODE_SIZE;
|
||||
} else {
|
||||
child_id = primitive_id | child_type;
|
||||
primitive_id += (child_size * RADV_GFX12_BVH_NODE_SIZE) >> 3;
|
||||
child_dst_offset = dst_leaf_offset;
|
||||
dst_leaf_offset += RADV_GFX12_BVH_NODE_SIZE;
|
||||
}
|
||||
|
||||
rra_transcode_node_gfx12(ctx, radv_bvh_node_box32 | (dst_offset >> 3), child_id, child_dst_offset);
|
||||
|
||||
dst->children[i].dword2 = (dst->children[i].dword2 & 0x0fffffff) | (1 << 28);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
rra_transcode_node_gfx12(struct rra_transcoding_context *ctx, uint32_t parent_id, uint32_t src_id, uint32_t dst_offset)
|
||||
{
|
||||
uint32_t node_type = src_id & 7;
|
||||
uint32_t src_offset = (src_id & (~7u)) << 3;
|
||||
|
||||
const void *src_child_node = ctx->src + src_offset;
|
||||
if (node_type == radv_bvh_node_box32) {
|
||||
rra_transcode_box8_node(ctx, src_child_node, parent_id, dst_offset);
|
||||
} else {
|
||||
memcpy(ctx->dst + dst_offset, src_child_node, RADV_GFX12_BVH_NODE_SIZE);
|
||||
|
||||
if (node_type == radv_bvh_node_instance) {
|
||||
struct radv_gfx12_instance_node *dst = (void *)(ctx->dst + dst_offset);
|
||||
|
||||
struct rra_instance_sideband_data *sideband_data = (void *)(ctx->dst + ctx->dst_instance_sideband_data_offset);
|
||||
ctx->dst_instance_sideband_data_offset += sizeof(struct rra_instance_sideband_data);
|
||||
|
||||
const struct radv_gfx12_instance_node_user_data *user_data =
|
||||
(const void *)((const uint8_t *)src_child_node + sizeof(struct radv_gfx12_instance_node));
|
||||
|
||||
dst->pointer_flags_bvh_addr = dst->pointer_flags_bvh_addr - (user_data->bvh_offset >> 3) +
|
||||
(sizeof(struct rra_accel_struct_metadata) >> 3);
|
||||
dst->unused = parent_id;
|
||||
|
||||
sideband_data->instance_index = user_data->instance_index;
|
||||
sideband_data->custom_instance_and_flags = user_data->custom_instance;
|
||||
sideband_data->blas_metadata_size = offsetof(struct rra_accel_struct_metadata, unused);
|
||||
sideband_data->otw_matrix = user_data->otw_matrix;
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue