mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-12 01:20:17 +01:00
anv: Add shader to copy acceleration structures
Rework (Kevin) - encode the address of anv_instance_leaf after header in order to handle serialization and deserialization part. - draw serialized data layout and explanation Co-authored-by: Kevin Chuang <kaiwenjon23@gmail.com> Co-authored-by: Sagar Ghuge <sagar.ghuge@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31588>
This commit is contained in:
parent
a6b1a1fce1
commit
692b5fa9f2
1 changed files with 147 additions and 0 deletions
147
src/intel/vulkan/bvh/copy.comp
Normal file
147
src/intel/vulkan/bvh/copy.comp
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
/* Copyright © 2022 Bas Nieuwenhuizen
|
||||
* Copyright © 2024 Intel Coorporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#version 460
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
|
||||
#extension GL_EXT_scalar_block_layout : require
|
||||
#extension GL_EXT_buffer_reference : require
|
||||
#extension GL_EXT_buffer_reference2 : require
|
||||
|
||||
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
#include "anv_build_interface.h"
|
||||
|
||||
layout(push_constant) uniform CONSTS {
|
||||
copy_args args;
|
||||
};
|
||||
|
||||
// Layout of serialized data
|
||||
/**************************************|
|
||||
| vk_accel_struct_serialization_header |
|
||||
|--------------------------------------|
|
||||
| For a TLAS, all handles to the BLAS |
|
||||
| within this TLAS. |
|
||||
| For a BLAS, nothing. |
|
||||
|--------------------------------------|
|
||||
| Driver-specific part. |
|
||||
| For Intel, this starts with |
|
||||
| anv_accel_struct_header as drawn |
|
||||
| in anv_bvh.h |
|
||||
|**************************************/
|
||||
|
||||
/*
|
||||
* Explanation of BLAS handles:
|
||||
* According to the spec of vkCmdCopyAccelerationStructureToMemoryKHR,
|
||||
* for a TLAS, the handles of all BLAS/instances within this TLAS are
|
||||
* tightly stored after vk_accel_struct_serialization_header, making this
|
||||
* serialized-memory a semi-opaque object. The application might be able
|
||||
* to swap/replace these handles with other handles. In fact this is what
|
||||
* dEQP-VK.ray_tracing_pipeline.acceleration_structures.header_bottom_address.*
|
||||
* is doing.
|
||||
*
|
||||
* Therefore, if the application updates the handles, we need to replace
|
||||
* the old handles in anv_instance_leaf with the new one. To access
|
||||
* anv_instance_leaf without traversing the TLAS, pointers to these
|
||||
* anv_instance_leaf are stored right after anv_accel_struct_header,
|
||||
* allowing us to know where they are in the TLAS instantly.
|
||||
*
|
||||
* Although, the fact that the application can swap/replace new handles
|
||||
* of BLAS without rebuilding the TLAS sounds a bit odd.
|
||||
*/
|
||||
|
||||
void
|
||||
main(void)
|
||||
{
|
||||
uint32_t global_id = gl_GlobalInvocationID.x;
|
||||
uint32_t lanes = gl_NumWorkGroups.x * 128;
|
||||
uint32_t increment = lanes * 8;
|
||||
|
||||
uint64_t copy_src_addr = args.src_addr;
|
||||
uint64_t copy_dst_addr = args.dst_addr;
|
||||
|
||||
if (args.mode == ANV_COPY_MODE_DESERIALIZE) {
|
||||
copy_src_addr += SIZEOF(vk_accel_struct_serialization_header) +
|
||||
DEREF(REF(vk_accel_struct_serialization_header)(args.src_addr)).instance_count * SIZEOF(uint64_t);
|
||||
}
|
||||
|
||||
REF(anv_accel_struct_header) header = REF(anv_accel_struct_header)(copy_src_addr);
|
||||
|
||||
uint64_t instance_base = args.src_addr + SIZEOF(vk_accel_struct_serialization_header);
|
||||
uint64_t instance_offset = SIZEOF(anv_accel_struct_header);
|
||||
|
||||
/* We store the address of instance_leaf after bvh header */
|
||||
uint64_t instance_end = DEREF(header).instance_count * SIZEOF(uint64_t);
|
||||
|
||||
if (instance_end > 0)
|
||||
instance_end += instance_offset;
|
||||
|
||||
if (args.mode == ANV_COPY_MODE_SERIALIZE) {
|
||||
copy_dst_addr += SIZEOF(vk_accel_struct_serialization_header) +
|
||||
DEREF(REF(anv_accel_struct_header)(args.src_addr)).instance_count * SIZEOF(uint64_t);
|
||||
|
||||
if (global_id == 0) {
|
||||
REF(vk_accel_struct_serialization_header) ser_header =
|
||||
REF(vk_accel_struct_serialization_header)(args.dst_addr);
|
||||
DEREF(ser_header).serialization_size = DEREF(header).serialization_size;
|
||||
DEREF(ser_header).deserialization_size = DEREF(header).compacted_size;
|
||||
DEREF(ser_header).instance_count = DEREF(header).instance_count;
|
||||
|
||||
for (uint32_t offset = 0; offset < VK_UUID_SIZE; offset++) {
|
||||
DEREF(ser_header).driver_uuid[offset] = args.driver_uuid[offset];
|
||||
}
|
||||
|
||||
for (uint32_t offset = 0; offset < VK_UUID_SIZE; offset++) {
|
||||
DEREF(ser_header).accel_struct_compat[offset] = args.accel_struct_compat[offset];
|
||||
}
|
||||
}
|
||||
|
||||
instance_base = args.dst_addr + SIZEOF(vk_accel_struct_serialization_header);
|
||||
} else if (args.mode == ANV_COPY_MODE_COPY) {
|
||||
instance_end = 0;
|
||||
}
|
||||
|
||||
uint64_t size = DEREF(header).compacted_size;
|
||||
for (uint64_t offset = global_id * 8; offset < size; offset += increment) {
|
||||
/* copy 8 bytes per iteration */
|
||||
DEREF(REF(uint64_t)(copy_dst_addr + offset)) =
|
||||
DEREF(REF(uint64_t)(copy_src_addr + offset));
|
||||
|
||||
/* Do the adjustment inline in the same invocation that copies the data so that we don't have
|
||||
* to synchronize.
|
||||
*/
|
||||
if (offset < instance_end && offset >= instance_offset &&
|
||||
(offset - instance_offset) % SIZEOF(uint64_t) == 0) {
|
||||
uint64_t idx = (offset - instance_offset) / SIZEOF(uint64_t);
|
||||
|
||||
if (args.mode == ANV_COPY_MODE_SERIALIZE) {
|
||||
/* Indirectly access the anv_instance_leaf, and store the blas_ptrs after ser_header */
|
||||
uint64_t instance_leaf_addr = DEREF(REF(uint64_t)(copy_src_addr + offset));
|
||||
REF(anv_instance_leaf) instance_leaf = REF(anv_instance_leaf)(instance_leaf_addr);
|
||||
uint64_t blas_ptr = DEREF(instance_leaf).part1.bvh_ptr & 0xfffffffffffful;
|
||||
DEREF(INDEX(uint64_t, instance_base, idx)) = blas_ptr;
|
||||
} else { /* ANV_COPY_MODE_DESERIALIZE */
|
||||
/* Indirectly access the anv_instance_leaf, and replace the bvh_ptr with the ones after ser_header */
|
||||
uint64_t instance_leaf_addr = DEREF(REF(uint64_t)(copy_dst_addr + offset));
|
||||
REF(anv_instance_leaf) instance_leaf = REF(anv_instance_leaf)(instance_leaf_addr);
|
||||
uint64_t blas_ptr = DEREF(INDEX(uint64_t, instance_base, idx));
|
||||
DEREF(instance_leaf).part1.bvh_ptr = (blas_ptr & 0xfffffffffffful);
|
||||
|
||||
/* set the startNodePtr to blas_ptr + ANV_HEADER_SIZE */
|
||||
uint64_t mask = 0x0000fffffffffffful;
|
||||
uint64_t new_startNodePtr = blas_ptr + ANV_RT_BVH_HEADER_SIZE;
|
||||
/* clear bits and set */
|
||||
DEREF(instance_leaf).part0.start_node_ptr_and_inst_flags =
|
||||
(DEREF(instance_leaf).part0.start_node_ptr_and_inst_flags & ~mask) | (new_startNodePtr & mask);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue