anv: Add shader to copy acceleration structures

Rework (Kevin)
- encode the address of anv_instance_leaf after header in order to
  handle serialization and deserialization part.
- draw serialized data layout and explanation

Co-authored-by: Kevin Chuang <kaiwenjon23@gmail.com>
Co-authored-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31588>
This commit is contained in:
Sagar Ghuge 2024-06-12 16:03:18 -07:00 committed by Marge Bot
parent a6b1a1fce1
commit 692b5fa9f2

View file

@ -0,0 +1,147 @@
/* Copyright © 2022 Bas Nieuwenhuizen
* Copyright © 2024 Intel Coorporation
* SPDX-License-Identifier: MIT
*/
#version 460
#extension GL_GOOGLE_include_directive : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_buffer_reference : require
#extension GL_EXT_buffer_reference2 : require
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;
#include "anv_build_interface.h"
layout(push_constant) uniform CONSTS {
copy_args args;
};
// Layout of serialized data
/**************************************|
| vk_accel_struct_serialization_header |
|--------------------------------------|
| For a TLAS, all handles to the BLAS |
| within this TLAS. |
| For a BLAS, nothing. |
|--------------------------------------|
| Driver-specific part. |
| For Intel, this starts with |
| anv_accel_struct_header as drawn |
| in anv_bvh.h |
|**************************************/
/*
* Explanation of BLAS handles:
* According to the spec of vkCmdCopyAccelerationStructureToMemoryKHR,
* for a TLAS, the handles of all BLAS/instances within this TLAS are
* tightly stored after vk_accel_struct_serialization_header, making this
* serialized-memory a semi-opaque object. The application might be able
* to swap/replace these handles with other handles. In fact this is what
* dEQP-VK.ray_tracing_pipeline.acceleration_structures.header_bottom_address.*
* is doing.
*
* Therefore, if the application updates the handles, we need to replace
* the old handles in anv_instance_leaf with the new one. To access
* anv_instance_leaf without traversing the TLAS, pointers to these
* anv_instance_leaf are stored right after anv_accel_struct_header,
* allowing us to know where they are in the TLAS instantly.
*
* Although, the fact that the application can swap/replace new handles
* of BLAS without rebuilding the TLAS sounds a bit odd.
*/
void
main(void)
{
uint32_t global_id = gl_GlobalInvocationID.x;
uint32_t lanes = gl_NumWorkGroups.x * 128;
uint32_t increment = lanes * 8;
uint64_t copy_src_addr = args.src_addr;
uint64_t copy_dst_addr = args.dst_addr;
if (args.mode == ANV_COPY_MODE_DESERIALIZE) {
copy_src_addr += SIZEOF(vk_accel_struct_serialization_header) +
DEREF(REF(vk_accel_struct_serialization_header)(args.src_addr)).instance_count * SIZEOF(uint64_t);
}
REF(anv_accel_struct_header) header = REF(anv_accel_struct_header)(copy_src_addr);
uint64_t instance_base = args.src_addr + SIZEOF(vk_accel_struct_serialization_header);
uint64_t instance_offset = SIZEOF(anv_accel_struct_header);
/* We store the address of instance_leaf after bvh header */
uint64_t instance_end = DEREF(header).instance_count * SIZEOF(uint64_t);
if (instance_end > 0)
instance_end += instance_offset;
if (args.mode == ANV_COPY_MODE_SERIALIZE) {
copy_dst_addr += SIZEOF(vk_accel_struct_serialization_header) +
DEREF(REF(anv_accel_struct_header)(args.src_addr)).instance_count * SIZEOF(uint64_t);
if (global_id == 0) {
REF(vk_accel_struct_serialization_header) ser_header =
REF(vk_accel_struct_serialization_header)(args.dst_addr);
DEREF(ser_header).serialization_size = DEREF(header).serialization_size;
DEREF(ser_header).deserialization_size = DEREF(header).compacted_size;
DEREF(ser_header).instance_count = DEREF(header).instance_count;
for (uint32_t offset = 0; offset < VK_UUID_SIZE; offset++) {
DEREF(ser_header).driver_uuid[offset] = args.driver_uuid[offset];
}
for (uint32_t offset = 0; offset < VK_UUID_SIZE; offset++) {
DEREF(ser_header).accel_struct_compat[offset] = args.accel_struct_compat[offset];
}
}
instance_base = args.dst_addr + SIZEOF(vk_accel_struct_serialization_header);
} else if (args.mode == ANV_COPY_MODE_COPY) {
instance_end = 0;
}
uint64_t size = DEREF(header).compacted_size;
for (uint64_t offset = global_id * 8; offset < size; offset += increment) {
/* copy 8 bytes per iteration */
DEREF(REF(uint64_t)(copy_dst_addr + offset)) =
DEREF(REF(uint64_t)(copy_src_addr + offset));
/* Do the adjustment inline in the same invocation that copies the data so that we don't have
* to synchronize.
*/
if (offset < instance_end && offset >= instance_offset &&
(offset - instance_offset) % SIZEOF(uint64_t) == 0) {
uint64_t idx = (offset - instance_offset) / SIZEOF(uint64_t);
if (args.mode == ANV_COPY_MODE_SERIALIZE) {
/* Indirectly access the anv_instance_leaf, and store the blas_ptrs after ser_header */
uint64_t instance_leaf_addr = DEREF(REF(uint64_t)(copy_src_addr + offset));
REF(anv_instance_leaf) instance_leaf = REF(anv_instance_leaf)(instance_leaf_addr);
uint64_t blas_ptr = DEREF(instance_leaf).part1.bvh_ptr & 0xfffffffffffful;
DEREF(INDEX(uint64_t, instance_base, idx)) = blas_ptr;
} else { /* ANV_COPY_MODE_DESERIALIZE */
/* Indirectly access the anv_instance_leaf, and replace the bvh_ptr with the ones after ser_header */
uint64_t instance_leaf_addr = DEREF(REF(uint64_t)(copy_dst_addr + offset));
REF(anv_instance_leaf) instance_leaf = REF(anv_instance_leaf)(instance_leaf_addr);
uint64_t blas_ptr = DEREF(INDEX(uint64_t, instance_base, idx));
DEREF(instance_leaf).part1.bvh_ptr = (blas_ptr & 0xfffffffffffful);
/* set the startNodePtr to blas_ptr + ANV_HEADER_SIZE */
uint64_t mask = 0x0000fffffffffffful;
uint64_t new_startNodePtr = blas_ptr + ANV_RT_BVH_HEADER_SIZE;
/* clear bits and set */
DEREF(instance_leaf).part0.start_node_ptr_and_inst_flags =
(DEREF(instance_leaf).part0.start_node_ptr_and_inst_flags & ~mask) | (new_startNodePtr & mask);
}
}
}
}