anv: Implement update BVH

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
This commit is contained in:
Sagar Ghuge 2025-12-19 21:50:34 -08:00
parent 26af065e60
commit ac3c99edc5
10 changed files with 518 additions and 22 deletions

View file

@ -501,6 +501,7 @@ CREATE_DUAL_EVENT_CALLBACK(as_morton_sort, INTEL_DS_QUEUE_STAGE_AS)
CREATE_DUAL_EVENT_CALLBACK(as_lbvh_build_internal, INTEL_DS_QUEUE_STAGE_AS)
CREATE_DUAL_EVENT_CALLBACK(as_ploc_build_internal, INTEL_DS_QUEUE_STAGE_AS)
CREATE_DUAL_EVENT_CALLBACK(as_encode, INTEL_DS_QUEUE_STAGE_AS)
CREATE_DUAL_EVENT_CALLBACK(as_update, INTEL_DS_QUEUE_STAGE_AS)
CREATE_DUAL_EVENT_CALLBACK(as_copy, INTEL_DS_QUEUE_STAGE_AS)
void

View file

@ -263,6 +263,11 @@ def define_tracepoints(args):
Arg(type='uint32_t', var='key', c_format='%x'),
Arg(type='uint32_t', var='n_leaves', c_format='%u'),
Arg(type='uint32_t', var='n_ir_leaves', c_format='%u')])
begin_end_tp('as_update', repeat_last=True,
tp_args=[Arg(type='uint32_t', var='pass', c_format='%u'),
Arg(type='uint32_t', var='key', c_format='%x'),
Arg(type='uint32_t', var='n_leaves', c_format='%u'),
Arg(type='uint32_t', var='n_ir_leaves', c_format='%u')])
begin_end_tp('as_copy', repeat_last=True)
begin_end_tp('rays',

View file

@ -2559,6 +2559,7 @@ enum anv_object_key_bvh_type {
ANV_OBJECT_KEY_BVH_ENCODE = VK_META_OBJECT_KEY_DRIVER_OFFSET,
ANV_OBJECT_KEY_BVH_HEADER,
ANV_OBJECT_KEY_BVH_COPY,
ANV_OBJECT_KEY_BVH_UPDATE,
};
enum bvh_dump_type {

View file

@ -6,6 +6,8 @@
#ifndef ANV_BVH_BUILD_INTERFACE_H
#define ANV_BVH_BUILD_INTERFACE_H
#include "vk_build_interface.h"
#ifdef VULKAN
#include "anv_build_helpers.h"
#else
@ -15,6 +17,21 @@
#define VOID_REF uint64_t
#endif
#define ANV_BUILD_FLAG_WRITE_LOOKUP_MAPS_FOR_UPDATE (1u << (VK_BUILD_FLAG_COUNT + 0))
struct update_args {
VOID_REF output_bvh;
REF(uint32_t) internal_ready_count;
REF(vk_aabb) aabb_scratch;
uint32_t leaf_node_count;
uint32_t primitive_count;
uint32_t output_bvh_offset;
VOID_REF parent_child_map;
VOID_REF leaf_block_offset_map;
vk_bvh_geometry_data geom_data;
};
struct encode_args {
/* Address within the IR BVH, marking the start of leaves/internal nodes. */
VOID_REF intermediate_bvh;

View file

@ -172,6 +172,12 @@ struct anv_internal_node {
*/
uint8_t node_type;
/* Note: We are going to use this field to track number of children this
* internal node has.
*
* XXX: Keep an eye out on this field for future platforms in case if anything
* changes.
*/
uint8_t reserved;
/* 2^exp_x is the size of the grid in x dimension */

View file

@ -182,6 +182,9 @@ encode_internal_node(uint32_t child, uint32_t child_block_offset_from_internal_n
DEREF(dst_node).exp_z = exp_i8[2];
DEREF(dst_node).node_mask = uint8_t(0xff);
DEREF(dst_node).node_type = node_type;
/* Using reserved field to track number of children. */
DEREF(dst_node).reserved = uint8_t(child_count);
}
child_aabb = conservative_aabb(child_aabb);
@ -359,7 +362,9 @@ main()
/* Tracks BLOCK where the next children should be encoded. */
DEREF(args.header).dst_node_offset = 1;
DEREF(header).instance_count = 0;
DEREF(INDEX(uint32_t, args.parent_child_map, 0)) = VK_NULL_BVH_OFFSET;
if (VK_BUILD_FLAG(ANV_BUILD_FLAG_WRITE_LOOKUP_MAPS_FOR_UPDATE)) {
DEREF(INDEX(uint32_t, args.parent_child_map, 0)) = VK_NULL_BVH_OFFSET;
}
}
IR_NODE children[6] = {VK_BVH_INVALID_NODE, VK_BVH_INVALID_NODE,
@ -442,7 +447,6 @@ main()
REF(vk_ir_box_node)NODE_OFFSET(children[i]);
DEREF(child_node).bvh_offset = child_offset;
}
child_offset += (type == vk_ir_node_instance) ? 2 : 1;
}
@ -491,18 +495,24 @@ main()
child_aabb = DEREF(REF(vk_ir_node)NODE_OFFSET(child)).aabb;
uint32_t type = ir_id_to_type(child);
if (child != VK_BVH_INVALID_NODE &&
(type == vk_ir_node_triangle || type == vk_ir_node_aabb)) {
uint32_t ir_offset = ir_id_to_offset(child);
uint32_t leaf_id = ir_offset / intermediate_leaf_node_size;
DEREF(INDEX(uint32_t, args.leaf_block_offset_map, leaf_id)) = child_block;
}
if (VK_BUILD_FLAG(ANV_BUILD_FLAG_WRITE_LOOKUP_MAPS_FOR_UPDATE)) {
if (child != VK_BVH_INVALID_NODE &&
(type == vk_ir_node_triangle || type == vk_ir_node_aabb)) {
uint32_t ir_offset = ir_id_to_offset(child);
uint32_t leaf_id = ir_offset / intermediate_leaf_node_size;
/* Block offset 0 is assigned to root, so avoid accidental
* assignment.
*/
DEREF(INDEX(uint32_t, args.leaf_block_offset_map, leaf_id)) =
(child_block != 0) ? child_block : VK_NULL_BVH_OFFSET;
}
/* Track each children's parent in the map. */
if (child != VK_BVH_INVALID_NODE && type != vk_ir_node_instance) {
uint32_t pcm_val = 0;
pcm_val = (cluster.idx << 26) | internal_node_block;
DEREF(INDEX(uint32_t, args.parent_child_map, child_block)) = pcm_val;
/* Track each children's parent in the map. */
if (child != VK_BVH_INVALID_NODE && type != vk_ir_node_instance) {
uint32_t pcm = 0;
pcm = internal_node_block | (cluster.idx << 26);
DEREF(INDEX(uint32_t, args.parent_child_map, child_block)) = pcm;
}
}
if (child != VK_BVH_INVALID_NODE && type != vk_ir_node_internal) {

View file

@ -7,6 +7,7 @@ bvh_shaders = [
'encode.comp',
'header.comp',
'copy.comp',
'update.comp',
]
# A mapping: [filename version, GFX_VERx10 define version]

View file

@ -0,0 +1,207 @@
/*
* Copyright © 2026 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#version 460
#include "anv_build_helpers.h"
#include "anv_build_interface.h"
#include "update.h"
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;
layout(push_constant) uniform CONSTS {
update_args args;
};
vk_aabb
build_and_encode_leaf(uint32_t leaf_local_idx, BLOCK leaf_block)
{
VOID_REF dst = BLOCK_OFFSET(leaf_block);
vk_aabb bounds = vk_aabb(vec3(0.0f), vec3(0.0f));
switch (args.geom_data.geometry_type) {
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
anv_build_triangle(bounds, dst, args.geom_data, leaf_local_idx);
break;
case VK_GEOMETRY_TYPE_AABBS_KHR: {
VOID_REF src_aabb_ptr = OFFSET(args.geom_data.data,
leaf_local_idx * args.geom_data.stride);
anv_build_aabb(bounds, src_aabb_ptr, dst, args.geom_data.geometry_id,
leaf_local_idx);
break;
}
default:
/* instances typically not updated */
break;
}
return bounds;
}
vk_aabb
recompute_parent(BLOCK parent_block, uint32_t updated_idx,
uint32_t child_count, vk_aabb updated_child_bounds)
{
REF(anv_internal_node) parent = REF(anv_internal_node)(BLOCK_OFFSET(parent_block));
vk_aabb box;
box.min = vec3(INFINITY);
box.max = vec3(-INFINITY);
BLOCK current_child_block = parent_block + DEREF(parent).child_block_offset;
vk_aabb cache_aabb[6];
for (uint32_t i = 0; i < child_count; ++i) {
cache_aabb[i] = (i == updated_idx) ? updated_child_bounds :
DEREF(INDEX(vk_aabb, args.aabb_scratch, current_child_block + i));
aabb_extend(box, cache_aabb[i]);
}
vk_aabb conservative_child_aabb = conservative_aabb(box);
float up = 1.0 + ULP;
ivec3 exp;
vec3 len = aabb_size(conservative_child_aabb) * up;
vec3 mant = frexp(len, exp);
exp.x += int((mant.x > (255.0f / 256.0f)));
exp.y += int((mant.y > (255.0f / 256.0f)));
exp.z += int((mant.z > (255.0f / 256.0f)));
i8vec3 exponent_i8 = i8vec3(exp);
i8vec3 exp_i8 = {max(int8_t(-128), exponent_i8.x),
max(int8_t(-128), exponent_i8.y),
max(int8_t(-128), exponent_i8.z)};
DEREF(parent).lower[0] = conservative_child_aabb.min.x;
DEREF(parent).lower[1] = conservative_child_aabb.min.y;
DEREF(parent).lower[2] = conservative_child_aabb.min.z;
DEREF(parent).exp_x = exp_i8[0];
DEREF(parent).exp_y = exp_i8[1];
DEREF(parent).exp_z = exp_i8[2];
vec3 base = conservative_child_aabb.min;
vec3 scale = ldexp(vec3(1.0), exp_i8 - 8);
for (uint32_t i = 0; i < child_count; ++i) {
vk_aabb child_bounds = cache_aabb[i];
vec3 lower = (child_bounds.min - base) / scale;
vec3 upper = (child_bounds.max - base) / scale;
lower = clamp(floor(lower), vec3(0.0), vec3(255.0));
upper = clamp(ceil(upper), vec3(0.0), vec3(255.0));
DEREF(parent).lower_x[i] = uint8_t(lower.x);
DEREF(parent).lower_y[i] = uint8_t(lower.y);
DEREF(parent).lower_z[i] = uint8_t(lower.z);
DEREF(parent).upper_x[i] = uint8_t(upper.x);
DEREF(parent).upper_y[i] = uint8_t(upper.y);
DEREF(parent).upper_z[i] = uint8_t(upper.z);
}
return box;
}
void main()
{
uint32_t leaf_local = gl_GlobalInvocationID.x;
if (leaf_local >= args.leaf_node_count || leaf_local >= args.primitive_count)
return;
uint32_t leaf_id = args.geom_data.first_id + leaf_local;
BLOCK leaf_block = DEREF(INDEX(uint32_t, args.leaf_block_offset_map, leaf_id));
if (leaf_block == VK_NULL_BVH_OFFSET)
return;
vk_aabb leaf_bounds = build_and_encode_leaf(leaf_local, leaf_block);
DEREF(INDEX(vk_aabb, args.aabb_scratch, leaf_block)) = leaf_bounds;
/* Ensure scratch update and leaf encoding is visible before atomic in
* following loop.
*/
memoryBarrierBuffer();
BLOCK current_block = leaf_block;
vk_aabb current_bounds = leaf_bounds;
while (true) {
BLOCK parent_raw = DEREF(INDEX(uint32_t, args.parent_child_map, current_block));
/* No parent at all */
if (parent_raw == VK_NULL_BVH_OFFSET && current_block == leaf_block) {
REF(anv_accel_struct_header) hdr =
REF(anv_accel_struct_header)(args.output_bvh - args.output_bvh_offset);
DEREF(hdr).aabb = current_bounds;
break;
}
BLOCK parent = parent_raw & 0x03FFFFFF;
REF(anv_internal_node) internal_node = REF(anv_internal_node)(BLOCK_OFFSET(parent));
/* Internal node's reserved field is tracking number of children count.*/
uint32_t valid_child_count = uint32_t(DEREF(internal_node).reserved);
if (valid_child_count == 0)
break;
/* Last-child-wins atomic */
uint32_t ready = atomicAdd(
DEREF(INDEX(uint32_t, args.internal_ready_count, parent)), 1,
gl_ScopeDevice, gl_StorageSemanticsBuffer,
gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
/* Not the last child */
if (ready != valid_child_count - 1)
break;
uint32_t child_idx = parent_raw >> 26;
/* Encode and quantize parent bounds */
vk_aabb parent_bounds = recompute_parent(parent, child_idx, valid_child_count, current_bounds);
/* Store parent bounds for next level */
DEREF(INDEX(vk_aabb, args.aabb_scratch, parent)) = parent_bounds;
/* Ensure scratch write and parent encoding is visible in the next
* iteration of this loop.
*/
memoryBarrierBuffer();
/* Check whether this parent is the root internal node */
uint32_t grandparent_raw = DEREF(INDEX(uint32_t, args.parent_child_map, parent));
if (grandparent_raw == VK_NULL_BVH_OFFSET) {
REF(anv_internal_node) root = REF(anv_internal_node)(BLOCK_OFFSET(parent));
vec3 base = vec3(DEREF(root).lower[0], DEREF(root).lower[1], DEREF(root).lower[2]);
ivec3 exp = ivec3( int(DEREF(root).exp_x), int(DEREF(root).exp_y), int(DEREF(root).exp_z));
vec3 scale = ldexp(vec3(1.0), exp - 8);
vk_aabb root_bounds = vk_aabb(vec3(INFINITY), vec3(-INFINITY));
for (uint32_t i = 0; i < valid_child_count; ++i) {
vec3 lower = vec3(DEREF(root).lower_x[i],
DEREF(root).lower_y[i],
DEREF(root).lower_z[i]);
vec3 upper = vec3(DEREF(root).upper_x[i],
DEREF(root).upper_y[i],
DEREF(root).upper_z[i]);
vk_aabb child;
child.min = base + lower * scale;
child.max = base + upper * scale;
aabb_extend(root_bounds, child);
}
REF(anv_accel_struct_header) hdr =
REF(anv_accel_struct_header)(args.output_bvh - args.output_bvh_offset);
DEREF(hdr).aabb = root_bounds;
break;
}
current_block = parent;
current_bounds = parent_bounds;
}
}

View file

@ -0,0 +1,73 @@
/*
* Copyright © 2026 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#ifndef ANV_BVH_UPDATE_H
#define ANV_BVH_UPDATE_H
#include "encode.h"
void
anv_build_triangle(inout vk_aabb bounds, VOID_REF dst_ptr, vk_bvh_geometry_data geom_data, uint32_t global_id)
{
triangle_indices indices = load_indices(geom_data.indices, geom_data.index_format, global_id);
triangle_vertices vertices = load_vertices(geom_data.data, indices, geom_data.vertex_format, geom_data.stride);
if (geom_data.transform != NULL) {
mat4 transform = mat4(1.0);
for (uint32_t col = 0; col < 4; col++) {
for (uint32_t row = 0; row < 3; row++) {
transform[col][row] = DEREF(INDEX(float, geom_data.transform, col + row * 4));
}
}
for (uint32_t i = 0; i < 3; i++) {
vertices.vertex[i] = transform * vertices.vertex[i];
}
}
vk_ir_triangle_node node;
bounds.min = vec3(INFINITY);
bounds.max = vec3(-INFINITY);
for (uint32_t coord = 0; coord < 3; coord++) {
for (uint32_t comp = 0; comp < 3; comp++) {
node.coords[coord][comp] = vertices.vertex[coord][comp];
bounds.min[comp] = min(bounds.min[comp], vertices.vertex[coord][comp]);
bounds.max[comp] = max(bounds.max[comp], vertices.vertex[coord][comp]);
}
}
node.triangle_id = global_id;
node.geometry_id_and_flags = geom_data.geometry_id;
anv_encode_triangle(dst_ptr, node);
}
void
anv_build_aabb(inout vk_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t geometry_id, uint32_t global_id)
{
for (uint32_t vec = 0; vec < 2; vec++) {
for (uint32_t comp = 0; comp < 3; comp++) {
float coord = DEREF(INDEX(float, src_ptr, comp + vec * 3));
if (vec == 0)
bounds.min[comp] = coord;
else
bounds.max[comp] = coord;
}
}
vk_ir_aabb_node node;
node.base.aabb = bounds;
node.primitive_id = global_id;
node.geometry_id_and_flags = geometry_id;
anv_encode_aabb(dst_ptr, node);
}
#endif

View file

@ -30,6 +30,16 @@
static uint32_t blas_id = 0;
static uint32_t tlas_id = 0;
struct update_scratch_layout {
uint32_t internal_ready_count_offset;
uint32_t aabb_offset;
uint32_t size;
};
enum anv_encode_key {
ANV_ENCODE_KEY_ALLOW_UPDATE_BVH = (1 << 0),
};
static void
begin_debug_marker(VkCommandBuffer commandBuffer,
struct vk_acceleration_structure_build_marker *marker)
@ -58,6 +68,9 @@ begin_debug_marker(VkCommandBuffer commandBuffer,
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE:
trace_intel_begin_as_encode(&cmd_buffer->trace);
break;
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_UPDATE:
trace_intel_begin_as_update(&cmd_buffer->trace);
break;
default:
UNREACHABLE("Invalid build step");
}
@ -91,6 +104,7 @@ end_debug_marker(VkCommandBuffer commandBuffer,
trace_intel_end_as_ploc_build_internal(&cmd_buffer->trace);
break;
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE:
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_UPDATE:
trace_intel_end_as_encode(&cmd_buffer->trace,
marker->encode.pass,
marker->encode.key,
@ -233,6 +247,7 @@ debug_record_as_to_bvh_dump(struct anv_cmd_buffer *cmd_buffer,
#define ENCODE_SPV_PATH STRINGIFY(bvh/genX(encode).spv.h)
#define HEADER_SPV_PATH STRINGIFY(bvh/genX(header).spv.h)
#define COPY_SPV_PATH STRINGIFY(bvh/genX(copy).spv.h)
#define UPDATE_SPV_PATH STRINGIFY(bvh/genX(update).spv.h)
static const uint32_t encode_spv[] = {
#include ENCODE_SPV_PATH
@ -246,6 +261,10 @@ static const uint32_t copy_spv[] = {
#include COPY_SPV_PATH
};
static const uint32_t update_spv[] = {
#include UPDATE_SPV_PATH
};
static void
get_bvh_layout(const struct vk_acceleration_structure_build_state *state,
struct bvh_layout *layout)
@ -290,13 +309,15 @@ get_bvh_layout(const struct vk_acceleration_structure_build_state *state,
offset += leaf_count * sizeof(uint64_t);
}
uint64_t parent_child_map_size = (internal_count + leaf_count) * sizeof(uint32_t);
layout->parent_child_map_offset = offset;
offset += parent_child_map_size;
if (state->config.encode_key[1] & ANV_ENCODE_KEY_ALLOW_UPDATE_BVH) {
uint64_t parent_child_map_size = (internal_count + leaf_count) * sizeof(uint32_t);
layout->parent_child_map_offset = offset;
offset += parent_child_map_size;
uint64_t leaf_block_offset_size = (internal_count + leaf_count) * sizeof(uint32_t);
layout->leaf_block_map_offset = offset;
offset += leaf_block_offset_size;
uint64_t leaf_block_offset_size = leaf_count * sizeof(uint32_t);
layout->leaf_block_map_offset = offset;
offset += leaf_block_offset_size;
}
layout->size = align64(offset, 64);
}
@ -323,9 +344,13 @@ anv_get_build_config(VkDevice device, struct vk_acceleration_structure_build_sta
* the compacted size of an updatable AS as the maximum possible size for
* any AS that could also be built from the same number of leaf nodes.
*/
state->config.encode_key[1] =
state->config.encode_key[0] =
((flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR) &&
!(flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR)) ? 1 : 0;
if ((state->build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR) &&
state->build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR)
state->config.encode_key[1] = ANV_ENCODE_KEY_ALLOW_UPDATE_BVH;
}
static void
@ -375,13 +400,32 @@ anv_bvh_build_set_args(VkCommandBuffer commandBuffer, const void *args,
anv_CmdPushConstants2(commandBuffer, &push_info);
}
static uint32_t
anv_build_flags(VkCommandBuffer commandBuffer, uint32_t key)
{
uint32_t flags = 0;
/* This will write following required maps for update BVH pass:
* 1) Parent-Child offset map
* 2) Leaf block offset map
* 3) Parent slot offset map
* 4) Parent child count map
*/
if (key & ANV_ENCODE_KEY_ALLOW_UPDATE_BVH) {
flags |= ANV_BUILD_FLAG_WRITE_LOOKUP_MAPS_FOR_UPDATE;
}
return flags;
}
static VkResult
anv_encode_prepare(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
anv_bvh_build_bind_pipeline(commandBuffer,
ANV_OBJECT_KEY_BVH_ENCODE,
encode_spv, sizeof(encode_spv),
sizeof(struct encode_args), 0);
sizeof(struct encode_args),
anv_build_flags(commandBuffer, state->config.encode_key[1]));
return VK_SUCCESS;
}
@ -489,7 +533,7 @@ anv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_stru
.bvh_offset = bvh_layout.bvh_offset,
.instance_count = instance_count,
.instance_leaves_offset = bvh_layout.instance_leaves_offset,
.is_compacted = (state->config.encode_key[1] == 1),
.is_compacted = (state->config.encode_key[0] == 1),
.bvh_size = bvh_layout.size,
};
@ -503,6 +547,133 @@ anv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_stru
}
}
static void
anv_get_update_scratch_layout(struct anv_device *device,
const struct vk_acceleration_structure_build_state *state,
struct update_scratch_layout *scratch)
{
uint32_t internal_count = MAX2(state->leaf_node_count, 2) - 1;
uint32_t offset = 0;
scratch->internal_ready_count_offset = offset;
offset += sizeof(uint32_t) * (internal_count + state->leaf_node_count);
scratch->aabb_offset = offset;
offset += sizeof(vk_aabb) * (internal_count + state->leaf_node_count);
scratch->size = offset;
}
static VkDeviceSize
anv_get_update_scratch_size(VkDevice _device,
const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(anv_device, device, _device);
struct update_scratch_layout scratch;
anv_get_update_scratch_layout(device, state, &scratch);
return scratch.size;
}
static void
anv_init_update_scratch(VkCommandBuffer commandBuffer,
const struct vk_acceleration_structure_build_state *states,
uint32_t build_count)
{
VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_device *device = cmd_buffer->device;
for (uint32_t i = 0; i < build_count; i++) {
const struct vk_acceleration_structure_build_state *state = &states[i];
if (state->config.internal_type != VK_INTERNAL_BUILD_TYPE_UPDATE)
continue;
uint64_t scratch = state->build_info->scratchData.deviceAddress;
struct update_scratch_layout layout;
anv_get_update_scratch_layout(device, state, &layout);
anv_cmd_fill_buffer_addr(commandBuffer, scratch, layout.size, 0x0);
}
}
static void
anv_update_prepare(VkCommandBuffer commandBuffer,
const struct vk_acceleration_structure_build_state *state,
bool flushed_cp_after_init_update_scratch,
bool flushed_compute_after_init_update_scratch)
{
if (!flushed_compute_after_init_update_scratch ||
!flushed_cp_after_init_update_scratch)
vk_barrier_compute_w_to_compute_r(commandBuffer);
anv_bvh_build_bind_pipeline(commandBuffer, ANV_OBJECT_KEY_BVH_UPDATE,
update_spv, sizeof(update_spv),
sizeof(struct update_args), 0);
}
static void
anv_update_as(VkCommandBuffer commandBuffer,
const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, src, state->build_info->srcAccelerationStructure);
VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure);
struct anv_device *device = cmd_buffer->device;
struct bvh_layout bvh_layout;
get_bvh_layout(state, &bvh_layout);
/* Just copy over data from src to dst if mismatch. */
if (src != dst) {
assert(src->offset == 0 && dst->offset == 0);
struct anv_address src_addr =
anv_address_from_u64(vk_acceleration_structure_get_va(src));
struct anv_address dst_addr =
anv_address_from_u64(vk_acceleration_structure_get_va(dst));
assert(src->size == dst->size);
anv_cmd_copy_addr(cmd_buffer, src_addr, dst_addr, src->size);
vk_barrier_compute_w_to_compute_r(commandBuffer);
}
struct update_scratch_layout update_layout;
anv_get_update_scratch_layout(device, state, &update_layout);
struct update_args update_consts = {
.internal_ready_count = state->build_info->scratchData.deviceAddress +
update_layout.internal_ready_count_offset,
.aabb_scratch = state->build_info->scratchData.deviceAddress +
update_layout.aabb_offset,
.leaf_node_count = state->leaf_node_count,
.parent_child_map = vk_acceleration_structure_get_va(dst) +
bvh_layout.parent_child_map_offset,
.leaf_block_offset_map = vk_acceleration_structure_get_va(dst) +
bvh_layout.leaf_block_map_offset,
.output_bvh = vk_acceleration_structure_get_va(dst) + bvh_layout.bvh_offset,
.output_bvh_offset = bvh_layout.bvh_offset,
};
uint32_t first_id = 0;
for (uint32_t i = 0; i < state->build_info->geometryCount; i++) {
const VkAccelerationStructureGeometryKHR *geom =
state->build_info->pGeometries ? &state->build_info->pGeometries[i] :state->build_info->ppGeometries[i];
const VkAccelerationStructureBuildRangeInfoKHR *build_range_info =
&state->build_range_infos[i];
update_consts.geom_data = vk_fill_geometry_data(state->build_info->type, first_id, i, geom, build_range_info);
update_consts.primitive_count = build_range_info->primitiveCount;
anv_bvh_build_set_args(commandBuffer, &update_consts, sizeof(update_consts));
anv_genX(cmd_buffer->device->info, cmd_dispatch_unaligned)
(commandBuffer, build_range_info->primitiveCount, 1, 1);
first_id += build_range_info->primitiveCount;
}
}
static const struct vk_acceleration_structure_build_ops anv_build_ops = {
.begin_debug_marker = begin_debug_marker,
.end_debug_marker = end_debug_marker,
@ -510,6 +681,10 @@ static const struct vk_acceleration_structure_build_ops anv_build_ops = {
.get_build_config = anv_get_build_config,
.encode_prepare = { anv_encode_prepare, anv_init_header_bind_pipeline },
.encode_as = { anv_encode_as, anv_init_header },
.get_update_scratch_size = anv_get_update_scratch_size,
.init_update_scratch = anv_init_update_scratch,
.update_prepare[0] = anv_update_prepare,
.update_as[0] = anv_update_as,
};
static VkResult