mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
anv: Add shader to copy acceleration structures
Rework (Kevin) - encode the address of anv_instance_leaf after header in order to handle serialization and deserialization part. - draw serialized data layout and explanation Co-authored-by: Kevin Chuang <kaiwenjon23@gmail.com> Co-authored-by: Sagar Ghuge <sagar.ghuge@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31588>
This commit is contained in:
parent
a6b1a1fce1
commit
692b5fa9f2
1 changed files with 147 additions and 0 deletions
147
src/intel/vulkan/bvh/copy.comp
Normal file
147
src/intel/vulkan/bvh/copy.comp
Normal file
|
|
@ -0,0 +1,147 @@
|
||||||
|
/* Copyright © 2022 Bas Nieuwenhuizen
|
||||||
|
* Copyright © 2024 Intel Coorporation
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#version 460
|
||||||
|
|
||||||
|
#extension GL_GOOGLE_include_directive : require
|
||||||
|
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
|
||||||
|
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
|
||||||
|
#extension GL_EXT_scalar_block_layout : require
|
||||||
|
#extension GL_EXT_buffer_reference : require
|
||||||
|
#extension GL_EXT_buffer_reference2 : require
|
||||||
|
|
||||||
|
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
#include "anv_build_interface.h"
|
||||||
|
|
||||||
|
layout(push_constant) uniform CONSTS {
|
||||||
|
copy_args args;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Layout of serialized data
|
||||||
|
/**************************************|
|
||||||
|
| vk_accel_struct_serialization_header |
|
||||||
|
|--------------------------------------|
|
||||||
|
| For a TLAS, all handles to the BLAS |
|
||||||
|
| within this TLAS. |
|
||||||
|
| For a BLAS, nothing. |
|
||||||
|
|--------------------------------------|
|
||||||
|
| Driver-specific part. |
|
||||||
|
| For Intel, this starts with |
|
||||||
|
| anv_accel_struct_header as drawn |
|
||||||
|
| in anv_bvh.h |
|
||||||
|
|**************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Explanation of BLAS handles:
|
||||||
|
* According to the spec of vkCmdCopyAccelerationStructureToMemoryKHR,
|
||||||
|
* for a TLAS, the handles of all BLAS/instances within this TLAS are
|
||||||
|
* tightly stored after vk_accel_struct_serialization_header, making this
|
||||||
|
* serialized-memory a semi-opaque object. The application might be able
|
||||||
|
* to swap/replace these handles with other handles. In fact this is what
|
||||||
|
* dEQP-VK.ray_tracing_pipeline.acceleration_structures.header_bottom_address.*
|
||||||
|
* is doing.
|
||||||
|
*
|
||||||
|
* Therefore, if the application updates the handles, we need to replace
|
||||||
|
* the old handles in anv_instance_leaf with the new one. To access
|
||||||
|
* anv_instance_leaf without traversing the TLAS, pointers to these
|
||||||
|
* anv_instance_leaf are stored right after anv_accel_struct_header,
|
||||||
|
* allowing us to know where they are in the TLAS instantly.
|
||||||
|
*
|
||||||
|
* Although, the fact that the application can swap/replace new handles
|
||||||
|
* of BLAS without rebuilding the TLAS sounds a bit odd.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void
|
||||||
|
main(void)
|
||||||
|
{
|
||||||
|
uint32_t global_id = gl_GlobalInvocationID.x;
|
||||||
|
uint32_t lanes = gl_NumWorkGroups.x * 128;
|
||||||
|
uint32_t increment = lanes * 8;
|
||||||
|
|
||||||
|
uint64_t copy_src_addr = args.src_addr;
|
||||||
|
uint64_t copy_dst_addr = args.dst_addr;
|
||||||
|
|
||||||
|
if (args.mode == ANV_COPY_MODE_DESERIALIZE) {
|
||||||
|
copy_src_addr += SIZEOF(vk_accel_struct_serialization_header) +
|
||||||
|
DEREF(REF(vk_accel_struct_serialization_header)(args.src_addr)).instance_count * SIZEOF(uint64_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
REF(anv_accel_struct_header) header = REF(anv_accel_struct_header)(copy_src_addr);
|
||||||
|
|
||||||
|
uint64_t instance_base = args.src_addr + SIZEOF(vk_accel_struct_serialization_header);
|
||||||
|
uint64_t instance_offset = SIZEOF(anv_accel_struct_header);
|
||||||
|
|
||||||
|
/* We store the address of instance_leaf after bvh header */
|
||||||
|
uint64_t instance_end = DEREF(header).instance_count * SIZEOF(uint64_t);
|
||||||
|
|
||||||
|
if (instance_end > 0)
|
||||||
|
instance_end += instance_offset;
|
||||||
|
|
||||||
|
if (args.mode == ANV_COPY_MODE_SERIALIZE) {
|
||||||
|
copy_dst_addr += SIZEOF(vk_accel_struct_serialization_header) +
|
||||||
|
DEREF(REF(anv_accel_struct_header)(args.src_addr)).instance_count * SIZEOF(uint64_t);
|
||||||
|
|
||||||
|
if (global_id == 0) {
|
||||||
|
REF(vk_accel_struct_serialization_header) ser_header =
|
||||||
|
REF(vk_accel_struct_serialization_header)(args.dst_addr);
|
||||||
|
DEREF(ser_header).serialization_size = DEREF(header).serialization_size;
|
||||||
|
DEREF(ser_header).deserialization_size = DEREF(header).compacted_size;
|
||||||
|
DEREF(ser_header).instance_count = DEREF(header).instance_count;
|
||||||
|
|
||||||
|
for (uint32_t offset = 0; offset < VK_UUID_SIZE; offset++) {
|
||||||
|
DEREF(ser_header).driver_uuid[offset] = args.driver_uuid[offset];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t offset = 0; offset < VK_UUID_SIZE; offset++) {
|
||||||
|
DEREF(ser_header).accel_struct_compat[offset] = args.accel_struct_compat[offset];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
instance_base = args.dst_addr + SIZEOF(vk_accel_struct_serialization_header);
|
||||||
|
} else if (args.mode == ANV_COPY_MODE_COPY) {
|
||||||
|
instance_end = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t size = DEREF(header).compacted_size;
|
||||||
|
for (uint64_t offset = global_id * 8; offset < size; offset += increment) {
|
||||||
|
/* copy 8 bytes per iteration */
|
||||||
|
DEREF(REF(uint64_t)(copy_dst_addr + offset)) =
|
||||||
|
DEREF(REF(uint64_t)(copy_src_addr + offset));
|
||||||
|
|
||||||
|
/* Do the adjustment inline in the same invocation that copies the data so that we don't have
|
||||||
|
* to synchronize.
|
||||||
|
*/
|
||||||
|
if (offset < instance_end && offset >= instance_offset &&
|
||||||
|
(offset - instance_offset) % SIZEOF(uint64_t) == 0) {
|
||||||
|
uint64_t idx = (offset - instance_offset) / SIZEOF(uint64_t);
|
||||||
|
|
||||||
|
if (args.mode == ANV_COPY_MODE_SERIALIZE) {
|
||||||
|
/* Indirectly access the anv_instance_leaf, and store the blas_ptrs after ser_header */
|
||||||
|
uint64_t instance_leaf_addr = DEREF(REF(uint64_t)(copy_src_addr + offset));
|
||||||
|
REF(anv_instance_leaf) instance_leaf = REF(anv_instance_leaf)(instance_leaf_addr);
|
||||||
|
uint64_t blas_ptr = DEREF(instance_leaf).part1.bvh_ptr & 0xfffffffffffful;
|
||||||
|
DEREF(INDEX(uint64_t, instance_base, idx)) = blas_ptr;
|
||||||
|
} else { /* ANV_COPY_MODE_DESERIALIZE */
|
||||||
|
/* Indirectly access the anv_instance_leaf, and replace the bvh_ptr with the ones after ser_header */
|
||||||
|
uint64_t instance_leaf_addr = DEREF(REF(uint64_t)(copy_dst_addr + offset));
|
||||||
|
REF(anv_instance_leaf) instance_leaf = REF(anv_instance_leaf)(instance_leaf_addr);
|
||||||
|
uint64_t blas_ptr = DEREF(INDEX(uint64_t, instance_base, idx));
|
||||||
|
DEREF(instance_leaf).part1.bvh_ptr = (blas_ptr & 0xfffffffffffful);
|
||||||
|
|
||||||
|
/* set the startNodePtr to blas_ptr + ANV_HEADER_SIZE */
|
||||||
|
uint64_t mask = 0x0000fffffffffffful;
|
||||||
|
uint64_t new_startNodePtr = blas_ptr + ANV_RT_BVH_HEADER_SIZE;
|
||||||
|
/* clear bits and set */
|
||||||
|
DEREF(instance_leaf).part0.start_node_ptr_and_inst_flags =
|
||||||
|
(DEREF(instance_leaf).part0.start_node_ptr_and_inst_flags & ~mask) | (new_startNodePtr & mask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue