radv: Add BVH IR header.

To include GPU state passed between stages but not in a node. Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19292>
2026-05-05 05:18:08 +02:00 · 2022-09-29 01:40:50 +02:00 · 2022-09-29 01:40:50 +02:00 · 0e23df959e
commit 0e23df959e
parent 37525c11d1
6 changed files with 36 additions and 32 deletions
--- a/src/amd/vulkan/bvh/build_helpers.h
+++ b/src/amd/vulkan/bvh/build_helpers.h
@ -236,6 +236,7 @@ TYPE(radv_bvh_instance_node, 8);
 TYPE(radv_bvh_box16_node, 4);
 TYPE(radv_bvh_box32_node, 4);

+TYPE(radv_ir_header, 4);
 TYPE(radv_ir_node, 4);
 TYPE(radv_ir_box_node, 4);
 TYPE(radv_ir_triangle_node, 4);
--- a/src/amd/vulkan/bvh/build_interface.h
+++ b/src/amd/vulkan/bvh/build_interface.h
@ -34,7 +34,7 @@

 struct leaf_args {
   VOID_REF bvh;
-   REF(AABB) bounds;
+   REF(radv_ir_header) header;
   REF(key_id_pair) ids;

   VOID_REF data;
@ -53,7 +53,7 @@ struct leaf_args {

 struct morton_args {
   VOID_REF bvh;
-   REF(AABB) bounds;
+   REF(radv_ir_header) header;
   REF(key_id_pair) ids;
 };

--- a/src/amd/vulkan/bvh/bvh.h
+++ b/src/amd/vulkan/bvh/bvh.h
@ -120,6 +120,11 @@ struct radv_ir_instance_node {
   uint32_t instance_id;
 };

+struct radv_ir_header {
+   int32_t min_bounds[3];
+   int32_t max_bounds[3];
+};
+
 struct radv_bvh_triangle_node {
   float coords[3][3];
   uint32_t reserved[3];
--- a/src/amd/vulkan/bvh/leaf.comp
+++ b/src/amd/vulkan/bvh/leaf.comp
@ -310,10 +310,10 @@ main(void)

   DEREF(id_ptr).id = is_active ? pack_ir_node_id(dst_offset, node_type) : RADV_BVH_INVALID_NODE;

-   atomicMin(DEREF(INDEX(int32_t, args.bounds, 0)), to_emulated_float(bounds.min.x));
-   atomicMin(DEREF(INDEX(int32_t, args.bounds, 1)), to_emulated_float(bounds.min.y));
-   atomicMin(DEREF(INDEX(int32_t, args.bounds, 2)), to_emulated_float(bounds.min.z));
-   atomicMax(DEREF(INDEX(int32_t, args.bounds, 3)), to_emulated_float(bounds.max.x));
-   atomicMax(DEREF(INDEX(int32_t, args.bounds, 4)), to_emulated_float(bounds.max.y));
-   atomicMax(DEREF(INDEX(int32_t, args.bounds, 5)), to_emulated_float(bounds.max.z));
+   atomicMin(DEREF(args.header).min_bounds[0], to_emulated_float(bounds.min.x));
+   atomicMin(DEREF(args.header).min_bounds[1], to_emulated_float(bounds.min.y));
+   atomicMin(DEREF(args.header).min_bounds[2], to_emulated_float(bounds.min.z));
+   atomicMax(DEREF(args.header).max_bounds[0], to_emulated_float(bounds.max.x));
+   atomicMax(DEREF(args.header).max_bounds[1], to_emulated_float(bounds.max.y));
+   atomicMax(DEREF(args.header).max_bounds[2], to_emulated_float(bounds.max.z));
 }
--- a/src/amd/vulkan/bvh/morton.comp
+++ b/src/amd/vulkan/bvh/morton.comp
@ -78,12 +78,12 @@ main(void)
      vec3 center = (bounds.min + bounds.max) * 0.5;

      AABB bvh_bounds;
-      bvh_bounds.min.x = from_emulated_float(DEREF(INDEX(int32_t, args.bounds, 0)));
-      bvh_bounds.min.y = from_emulated_float(DEREF(INDEX(int32_t, args.bounds, 1)));
-      bvh_bounds.min.z = from_emulated_float(DEREF(INDEX(int32_t, args.bounds, 2)));
-      bvh_bounds.max.x = from_emulated_float(DEREF(INDEX(int32_t, args.bounds, 3)));
-      bvh_bounds.max.y = from_emulated_float(DEREF(INDEX(int32_t, args.bounds, 4)));
-      bvh_bounds.max.z = from_emulated_float(DEREF(INDEX(int32_t, args.bounds, 5)));
+      bvh_bounds.min.x = from_emulated_float(DEREF(args.header).min_bounds[0]);
+      bvh_bounds.min.y = from_emulated_float(DEREF(args.header).min_bounds[1]);
+      bvh_bounds.min.z = from_emulated_float(DEREF(args.header).min_bounds[2]);
+      bvh_bounds.max.x = from_emulated_float(DEREF(args.header).max_bounds[0]);
+      bvh_bounds.max.y = from_emulated_float(DEREF(args.header).max_bounds[1]);
+      bvh_bounds.max.z = from_emulated_float(DEREF(args.header).max_bounds[2]);

      vec3 normalized_center = (center - bvh_bounds.min) / (bvh_bounds.max - bvh_bounds.min);

--- a/src/amd/vulkan/radv_acceleration_structure.c
+++ b/src/amd/vulkan/radv_acceleration_structure.c
@ -55,9 +55,6 @@ static const uint32_t convert_internal_spv[] = {
 #include "bvh/converter_internal.comp.spv.h"
 };

-/* Min and max bounds of the bvh used to compute morton codes */
-#define SCRATCH_TOTAL_BOUNDS_SIZE (6 * sizeof(float))
-
 #define KEY_ID_PAIR_SIZE 8

 struct acceleration_structure_layout {
@ -68,7 +65,7 @@ struct acceleration_structure_layout {
 struct scratch_layout {
   uint32_t size;

-   uint32_t bounds_offset;
+   uint32_t header_offset;

   uint32_t sort_buffer_offset[2];
   uint32_t sort_internal_offset;
@ -144,8 +141,8 @@ get_build_layout(struct radv_device *device, uint32_t leaf_count,

      uint32_t offset = 0;

-      scratch->bounds_offset = offset;
-      offset += SCRATCH_TOTAL_BOUNDS_SIZE;
+      scratch->header_offset = offset;
+      offset += sizeof(struct radv_ir_header);

      scratch->sort_buffer_offset[0] = offset;
      offset += requirements.keyvals_size;
@ -458,7 +455,7 @@ build_leaves(VkCommandBuffer commandBuffer, uint32_t infoCount,
   for (uint32_t i = 0; i < infoCount; ++i) {
      struct leaf_args leaf_consts = {
         .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
-         .bounds = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.bounds_offset,
+         .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
         .ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0],
         .dst_offset = 0,
      };
@ -551,7 +548,7 @@ morton_generate(VkCommandBuffer commandBuffer, uint32_t infoCount,
   for (uint32_t i = 0; i < infoCount; ++i) {
      const struct morton_args consts = {
         .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
-         .bounds = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.bounds_offset,
+         .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
         .ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0],
      };

@ -745,16 +742,6 @@ radv_CmdBuildAccelerationStructuresKHR(
      RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
   struct bvh_state *bvh_states = calloc(infoCount, sizeof(struct bvh_state));

-   for (uint32_t i = 0; i < infoCount; ++i) {
-      /* Clear the bvh bounds with int max/min. */
-      si_cp_dma_clear_buffer(cmd_buffer, pInfos[i].scratchData.deviceAddress, 3 * sizeof(float),
-                             0x7fffffff);
-      si_cp_dma_clear_buffer(cmd_buffer, pInfos[i].scratchData.deviceAddress + 3 * sizeof(float),
-                             3 * sizeof(float), 0x80000000);
-   }
-
-   cmd_buffer->state.flush_bits |= flush_bits;
-
   for (uint32_t i = 0; i < infoCount; ++i) {
      uint32_t leaf_node_count = 0;
      for (uint32_t j = 0; j < pInfos[i].geometryCount; ++j) {
@ -763,8 +750,19 @@ radv_CmdBuildAccelerationStructuresKHR(

      get_build_layout(cmd_buffer->device, leaf_node_count, pInfos + i, &bvh_states[i].accel_struct,
                       &bvh_states[i].scratch);
+
+      struct radv_ir_header header = {
+         .min_bounds = {0x7fffffff, 0x7fffffff, 0x7fffffff},
+         .max_bounds = {0x80000000, 0x80000000, 0x80000000},
+      };
+
+      radv_update_buffer_cp(
+         cmd_buffer, pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
+         &header, sizeof(header));
   }

+   cmd_buffer->state.flush_bits |= flush_bits;
+
   build_leaves(commandBuffer, infoCount, pInfos, ppBuildRangeInfos, bvh_states, flush_bits);

   morton_generate(commandBuffer, infoCount, pInfos, bvh_states, flush_bits);