diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c index 3afba3c2493..a93af54c41c 100644 --- a/src/amd/vulkan/radv_acceleration_structure.c +++ b/src/amd/vulkan/radv_acceleration_structure.c @@ -5,6 +5,7 @@ */ #include "meta/radv_meta.h" +#include "radv_buffer.h" #include "radv_cs.h" #include "radv_entrypoints.h" @@ -328,7 +329,7 @@ radv_get_build_config(VkDevice _device, struct vk_acceleration_structure_build_s if (device->meta_state.accel_struct_build.build_args.propagate_cull_flags) update_key |= VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS; - state->config.update_key[0] = update_key; + state->config.update_key[1] = update_key; } static void @@ -403,7 +404,7 @@ radv_build_flags(VkCommandBuffer commandBuffer, uint32_t key) } static VkResult -radv_encode_bind_pipeline(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state) +radv_encode_prepare(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state) { radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_ENCODE, encode_spv, sizeof(encode_spv), sizeof(struct encode_args), @@ -413,8 +414,7 @@ radv_encode_bind_pipeline(VkCommandBuffer commandBuffer, const struct vk_acceler } static VkResult -radv_encode_bind_pipeline_gfx12(VkCommandBuffer commandBuffer, - const struct vk_acceleration_structure_build_state *state) +radv_encode_prepare_gfx12(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state) { radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_ENCODE, encode_gfx12_spv, sizeof(encode_gfx12_spv), sizeof(struct encode_gfx12_args), @@ -524,8 +524,8 @@ radv_encode_as_gfx12(VkCommandBuffer commandBuffer, const struct vk_acceleration } static VkResult -radv_encode_triangles_bind_pipeline_gfx12(VkCommandBuffer commandBuffer, - const struct vk_acceleration_structure_build_state *state) +radv_encode_triangles_prepare_gfx12(VkCommandBuffer commandBuffer, + const struct vk_acceleration_structure_build_state *state) { bool compress_triangles = state->config.encode_key[2] & RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12; if (!compress_triangles) @@ -578,8 +578,8 @@ radv_encode_triangles_gfx12(VkCommandBuffer commandBuffer, const struct vk_accel } static VkResult -radv_encode_triangles_retry_bind_pipeline_gfx12(VkCommandBuffer commandBuffer, - const struct vk_acceleration_structure_build_state *state) +radv_encode_triangles_retry_prepare_gfx12(VkCommandBuffer commandBuffer, + const struct vk_acceleration_structure_build_state *state) { bool compress_triangles = state->config.encode_key[2] & RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12; if (!compress_triangles) @@ -636,7 +636,7 @@ radv_encode_triangles_retry_gfx12(VkCommandBuffer commandBuffer, } static VkResult -radv_init_header_bind_pipeline(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state) +radv_init_header_prepare(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state) { /* Wait for encoding to finish. */ vk_barrier_compute_w_to_compute_r(commandBuffer); @@ -703,6 +703,7 @@ radv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_str sizeof(header) - base); if (layout.geometry_info_offset != RADV_OFFSET_UNUSED) { + VK_FROM_HANDLE(radv_buffer, dst_buffer, vk_buffer_to_handle(dst->buffer)); uint64_t geometry_infos_size = state->build_info->geometryCount * sizeof(struct radv_accel_struct_geometry_info); struct radv_accel_struct_geometry_info *geometry_infos = malloc(geometry_infos_size); @@ -717,8 +718,14 @@ radv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_str geometry_infos[i].primitive_count = state->build_range_infos[i].primitiveCount; } - radv_CmdUpdateBuffer(commandBuffer, vk_buffer_to_handle(dst->buffer), dst->offset + layout.geometry_info_offset, - geometry_infos_size, geometry_infos); + radv_update_memory(cmd_buffer, vk_acceleration_structure_get_va(dst) + layout.geometry_info_offset, + geometry_infos_size, geometry_infos, radv_get_copy_flags_from_bo(dst_buffer->bo)); + + /* CmdUpdateBuffer might use compute, which clobbers the pipeline bind point. If that happens, rebind the header + * pipeline to restore the state expected at entry of radv_init_header. + */ + radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_HEADER, header_spv, sizeof(header_spv), + sizeof(struct header_args), 0); free(geometry_infos); } @@ -741,7 +748,7 @@ radv_init_update_scratch(VkCommandBuffer commandBuffer, const struct vk_accelera layout.size - layout.internal_ready_count_offset, 0x0, RADV_COPY_FLAGS_DEVICE_LOCAL); /* geometryCount == 1 passes the data as push constant. */ - if (radv_use_bvh8(pdev) && !(state->config.update_key[0] & RADV_BUILD_FLAG_UPDATE_SINGLE_GEOMETRY)) { + if (radv_use_bvh8(pdev) && !(state->config.update_key[1] & RADV_BUILD_FLAG_UPDATE_SINGLE_GEOMETRY)) { uint32_t data_size = sizeof(struct vk_bvh_geometry_data) * state->build_info->geometryCount; struct vk_bvh_geometry_data *data = malloc(data_size); if (!data) { @@ -769,10 +776,39 @@ radv_init_update_scratch(VkCommandBuffer commandBuffer, const struct vk_accelera } static void -radv_update_bind_pipeline(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state, - bool flushed_cp_after_init_update_scratch, bool flushed_compute_after_init_update_scratch) +radv_update_copy_prepare(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state, + bool flushed_cp_after_init_update_scratch, bool flushed_compute_after_init_update_scratch) +{ +} + +static void +radv_update_copy(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state) { VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + VK_FROM_HANDLE(vk_acceleration_structure, src, state->build_info->srcAccelerationStructure); + VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure); + struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + + if (src != dst) { + struct acceleration_structure_layout layout; + radv_get_acceleration_structure_layout(device, state, &layout); + + /* Copy header/metadata */ + const uint64_t src_va = vk_acceleration_structure_get_va(src); + const uint64_t dst_va = vk_acceleration_structure_get_va(dst); + + radv_copy_memory(cmd_buffer, src_va, dst_va, layout.bvh_offset, RADV_COPY_FLAGS_DEVICE_LOCAL, + RADV_COPY_FLAGS_DEVICE_LOCAL); + } +} + +static void +radv_update_prepare(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state, + bool flushed_cp_after_init_update_scratch, bool flushed_compute_after_init_update_scratch) +{ + VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + VK_FROM_HANDLE(vk_acceleration_structure, src, state->build_info->srcAccelerationStructure); + VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); @@ -785,7 +821,14 @@ radv_update_bind_pipeline(VkCommandBuffer commandBuffer, const struct vk_acceler cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2; } - uint32_t flags = state->config.update_key[0]; + uint32_t flags = state->config.update_key[1]; + + /* If we copied anything, we need to wait for that copy. */ + if (!(flags & RADV_BUILD_FLAG_UPDATE_IN_PLACE)) { + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | + radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_2_SHADER_WRITE_BIT, 0, NULL, NULL); + } if (radv_use_bvh8(pdev)) { radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_UPDATE, update_gfx12_spv, @@ -814,18 +857,6 @@ radv_update_as(VkCommandBuffer commandBuffer, const struct vk_acceleration_struc VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - if (src != dst) { - struct acceleration_structure_layout layout; - radv_get_acceleration_structure_layout(device, state, &layout); - - /* Copy header/metadata */ - const uint64_t src_va = vk_acceleration_structure_get_va(src); - const uint64_t dst_va = vk_acceleration_structure_get_va(dst); - - radv_copy_memory(cmd_buffer, src_va, dst_va, layout.bvh_offset, RADV_COPY_FLAGS_DEVICE_LOCAL, - RADV_COPY_FLAGS_DEVICE_LOCAL); - } - struct update_scratch_layout layout; radv_get_update_scratch_layout(device, state, &layout); @@ -861,18 +892,6 @@ radv_update_as_gfx12(VkCommandBuffer commandBuffer, const struct vk_acceleration VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - if (src != dst) { - struct acceleration_structure_layout layout; - radv_get_acceleration_structure_layout(device, state, &layout); - - /* Copy header/metadata */ - const uint64_t src_va = vk_acceleration_structure_get_va(src); - const uint64_t dst_va = vk_acceleration_structure_get_va(dst); - - radv_copy_memory(cmd_buffer, src_va, dst_va, layout.bvh_offset, RADV_COPY_FLAGS_DEVICE_LOCAL, - RADV_COPY_FLAGS_DEVICE_LOCAL); - } - struct update_scratch_layout layout; radv_get_update_scratch_layout(device, state, &layout); @@ -885,7 +904,7 @@ radv_update_as_gfx12(VkCommandBuffer commandBuffer, const struct vk_acceleration .leaf_node_count = state->leaf_node_count, }; - if (state->config.update_key[0] & RADV_BUILD_FLAG_UPDATE_SINGLE_GEOMETRY) { + if (state->config.update_key[1] & RADV_BUILD_FLAG_UPDATE_SINGLE_GEOMETRY) { const VkAccelerationStructureGeometryKHR *geom = state->build_info->pGeometries ? &state->build_info->pGeometries[0] : state->build_info->ppGeometries[0]; update_consts.geom_data0 = vk_fill_geometry_data(state->build_info->type, 0, 0, geom, state->build_range_infos); @@ -969,27 +988,27 @@ radv_device_init_accel_struct_build_state(struct radv_device *device) .get_as_size = radv_get_as_size, .get_update_scratch_size = radv_get_update_scratch_size, .init_update_scratch = radv_init_update_scratch, - .update_bind_pipeline[0] = radv_update_bind_pipeline, + .update_prepare[0] = radv_update_copy_prepare, + .update_as[0] = radv_update_copy, + .update_prepare[1] = radv_update_prepare, }; if (radv_use_bvh8(pdev)) { - device->meta_state.accel_struct_build.build_ops.update_as[0] = radv_update_as_gfx12; + device->meta_state.accel_struct_build.build_ops.update_as[1] = radv_update_as_gfx12; device->meta_state.accel_struct_build.build_ops.get_encode_scratch_size = radv_get_encode_scratch_size; - device->meta_state.accel_struct_build.build_ops.encode_bind_pipeline[0] = radv_encode_bind_pipeline_gfx12; + device->meta_state.accel_struct_build.build_ops.encode_prepare[0] = radv_encode_prepare_gfx12; device->meta_state.accel_struct_build.build_ops.encode_as[0] = radv_encode_as_gfx12; - device->meta_state.accel_struct_build.build_ops.encode_bind_pipeline[1] = - radv_encode_triangles_bind_pipeline_gfx12; + device->meta_state.accel_struct_build.build_ops.encode_prepare[1] = radv_encode_triangles_prepare_gfx12; device->meta_state.accel_struct_build.build_ops.encode_as[1] = radv_encode_triangles_gfx12; - device->meta_state.accel_struct_build.build_ops.encode_bind_pipeline[2] = - radv_encode_triangles_retry_bind_pipeline_gfx12; + device->meta_state.accel_struct_build.build_ops.encode_prepare[2] = radv_encode_triangles_retry_prepare_gfx12; device->meta_state.accel_struct_build.build_ops.encode_as[2] = radv_encode_triangles_retry_gfx12; - device->meta_state.accel_struct_build.build_ops.encode_bind_pipeline[3] = radv_init_header_bind_pipeline; + device->meta_state.accel_struct_build.build_ops.encode_prepare[3] = radv_init_header_prepare; device->meta_state.accel_struct_build.build_ops.encode_as[3] = radv_init_header; } else { - device->meta_state.accel_struct_build.build_ops.update_as[0] = radv_update_as; - device->meta_state.accel_struct_build.build_ops.encode_bind_pipeline[0] = radv_encode_bind_pipeline; + device->meta_state.accel_struct_build.build_ops.update_as[1] = radv_update_as; + device->meta_state.accel_struct_build.build_ops.encode_prepare[0] = radv_encode_prepare; device->meta_state.accel_struct_build.build_ops.encode_as[0] = radv_encode_as; - device->meta_state.accel_struct_build.build_ops.encode_bind_pipeline[1] = radv_init_header_bind_pipeline; + device->meta_state.accel_struct_build.build_ops.encode_prepare[1] = radv_init_header_prepare; device->meta_state.accel_struct_build.build_ops.encode_as[1] = radv_init_header; device->meta_state.accel_struct_build.build_ops.leaf_spirv_override = leaf_spv; device->meta_state.accel_struct_build.build_ops.leaf_spirv_override_size = sizeof(leaf_spv); diff --git a/src/freedreno/vulkan/tu_acceleration_structure.cc b/src/freedreno/vulkan/tu_acceleration_structure.cc index f7658eabe51..1d4722dbe24 100644 --- a/src/freedreno/vulkan/tu_acceleration_structure.cc +++ b/src/freedreno/vulkan/tu_acceleration_structure.cc @@ -174,8 +174,8 @@ tu_get_build_config(VkDevice device, } static VkResult -encode_bind_pipeline(VkCommandBuffer commandBuffer, - const struct vk_acceleration_structure_build_state *state) +encode_prepare(VkCommandBuffer commandBuffer, + const struct vk_acceleration_structure_build_state *state) { VK_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer); struct tu_device *device = cmdbuf->device; @@ -348,7 +348,7 @@ header(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_ops tu_as_build_ops = { .get_build_config = tu_get_build_config, .get_as_size = get_bvh_size, - .encode_bind_pipeline = { encode_bind_pipeline, header_bind_pipeline }, + .encode_prepare = { encode_prepare, header_bind_pipeline }, .encode_as = { encode, header }, }; diff --git a/src/gallium/frontends/lavapipe/lvp_acceleration_structure.c b/src/gallium/frontends/lavapipe/lvp_acceleration_structure.c index 4cbe33e283b..5c458416de9 100644 --- a/src/gallium/frontends/lavapipe/lvp_acceleration_structure.c +++ b/src/gallium/frontends/lavapipe/lvp_acceleration_structure.c @@ -620,14 +620,14 @@ lvp_CopyAccelerationStructureToMemoryKHR(VkDevice _device, VkDeferredOperationKH } static VkResult -lvp_encode_bind_pipeline(VkCommandBuffer cmd_buffer, const struct vk_acceleration_structure_build_state *state) +lvp_encode_prepare(VkCommandBuffer cmd_buffer, const struct vk_acceleration_structure_build_state *state) { return VK_SUCCESS; } const struct vk_acceleration_structure_build_ops accel_struct_ops = { .get_as_size = lvp_get_as_size, - .encode_bind_pipeline[0] = lvp_encode_bind_pipeline, + .encode_prepare[0] = lvp_encode_prepare, .encode_as[0] = lvp_enqueue_encode_as, }; diff --git a/src/intel/vulkan/genX_acceleration_structure.c b/src/intel/vulkan/genX_acceleration_structure.c index a5e30ffce85..a1c539b9fea 100644 --- a/src/intel/vulkan/genX_acceleration_structure.c +++ b/src/intel/vulkan/genX_acceleration_structure.c @@ -358,7 +358,7 @@ anv_bvh_build_set_args(VkCommandBuffer commandBuffer, const void *args, } static VkResult -anv_encode_bind_pipeline(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state) +anv_encode_prepare(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state) { anv_bvh_build_bind_pipeline(commandBuffer, ANV_OBJECT_KEY_BVH_ENCODE, @@ -543,8 +543,7 @@ static const struct vk_acceleration_structure_build_ops anv_build_ops = { .end_debug_marker = end_debug_marker, .get_as_size = anv_get_as_size, .get_build_config = anv_get_build_config, - .encode_bind_pipeline = { anv_encode_bind_pipeline, - anv_init_header_bind_pipeline }, + .encode_prepare = { anv_encode_prepare, anv_init_header_bind_pipeline }, .encode_as = { anv_encode_as, anv_init_header }, }; diff --git a/src/vulkan/runtime/vk_acceleration_structure.c b/src/vulkan/runtime/vk_acceleration_structure.c index cc212e35ee4..a2110d4933c 100644 --- a/src/vulkan/runtime/vk_acceleration_structure.c +++ b/src/vulkan/runtime/vk_acceleration_structure.c @@ -1401,11 +1401,11 @@ vk_cmd_build_acceleration_structures(VkCommandBuffer commandBuffer, } if (update) { - ops->update_bind_pipeline[pass](commandBuffer, &bvh_states[i].vk, + ops->update_prepare[pass](commandBuffer, &bvh_states[i].vk, flushed_cp_after_init_update_scratch, flushed_compute_after_init_update_scratch); } else { - ops->encode_bind_pipeline[pass](commandBuffer, &bvh_states[i].vk); + ops->encode_prepare[pass](commandBuffer, &bvh_states[i].vk); } } else { if (update != (bvh_states[i].vk.config.internal_type == diff --git a/src/vulkan/runtime/vk_acceleration_structure.h b/src/vulkan/runtime/vk_acceleration_structure.h index 3d27538b6b4..4900ba53a0f 100644 --- a/src/vulkan/runtime/vk_acceleration_structure.h +++ b/src/vulkan/runtime/vk_acceleration_structure.h @@ -139,11 +139,11 @@ struct vk_acceleration_structure_build_ops { VkDeviceSize (*get_encode_scratch_size)(VkDevice device, const struct vk_acceleration_structure_build_state *state); VkDeviceSize (*get_update_scratch_size)(VkDevice device, const struct vk_acceleration_structure_build_state *state); - VkResult (*encode_bind_pipeline[MAX_ENCODE_PASSES])(VkCommandBuffer cmd_buffer, const struct vk_acceleration_structure_build_state *state); + VkResult (*encode_prepare[MAX_ENCODE_PASSES])(VkCommandBuffer cmd_buffer, const struct vk_acceleration_structure_build_state *state); void (*encode_as[MAX_ENCODE_PASSES])(VkCommandBuffer cmd_buffer, const struct vk_acceleration_structure_build_state *state); void (*init_update_scratch)(VkCommandBuffer cmd_buffer, const struct vk_acceleration_structure_build_state *state); - void (*update_bind_pipeline[MAX_ENCODE_PASSES])(VkCommandBuffer cmd_buffer, const struct vk_acceleration_structure_build_state *state, + void (*update_prepare[MAX_ENCODE_PASSES])(VkCommandBuffer cmd_buffer, const struct vk_acceleration_structure_build_state *state, bool flushed_cp_after_init_update_scratch, bool flushed_compute_after_init_update_scratch); void (*update_as[MAX_ENCODE_PASSES])(VkCommandBuffer cmd_buffer, const struct vk_acceleration_structure_build_state *state);