radv: Merge the leaf and internal converter

We have everything we need in the internal one already so we can just
encode leaf nodes there. Since this functionality isn't split anymore,
the shader was renamed to "encode".

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20768>
This commit is contained in:
Konstantin Seurer 2023-01-18 16:34:55 +01:00 committed by Marge Bot
parent 883f18f761
commit 3eb646efd0
8 changed files with 100 additions and 216 deletions

View file

@ -95,7 +95,7 @@ struct copy_args {
uint32_t mode;
};
struct convert_internal_args {
struct encode_args {
VOID_REF intermediate_bvh;
VOID_REF output_bvh;
REF(radv_ir_header) header;
@ -104,12 +104,6 @@ struct convert_internal_args {
uint32_t geometry_type;
};
struct convert_leaf_args {
VOID_REF intermediate_bvh;
VOID_REF output_bvh;
uint32_t geometry_type;
};
struct ploc_prefix_scan_partition {
uint32_t aggregate;
uint32_t inclusive_sum;

View file

@ -158,7 +158,7 @@ struct radv_ir_header {
int32_t min_bounds[3];
int32_t max_bounds[3];
uint32_t active_leaf_count;
/* Indirect dispatch dimensions for the internal node converter.
/* Indirect dispatch dimensions for the encoder.
* ir_internal_node_count is the thread count in the X dimension,
* while Y and Z are always set to 1. */
uint32_t ir_internal_node_count;

View file

@ -1,127 +0,0 @@
/*
* Copyright © 2022 Friedrich Vock
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#version 460
#extension GL_GOOGLE_include_directive : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_buffer_reference : require
#extension GL_EXT_buffer_reference2 : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
#include "build_helpers.h"
#include "build_interface.h"
layout(push_constant) uniform CONSTS {
convert_leaf_args args;
};
uint32_t
convert_geometry_id_and_flags(uint32_t src)
{
uint32_t flags = src >> 28;
uint32_t ret = src & 0xfffffffu;
if ((flags & VK_GEOMETRY_OPAQUE_BIT_KHR) != 0)
ret |= RADV_GEOMETRY_OPAQUE;
return ret;
}
uint32_t
convert_sbt_offset_and_flags(uint32_t src)
{
uint32_t flags = src >> 24;
uint32_t ret = src & 0xffffffu;
if ((flags & VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR) != 0)
ret |= RADV_INSTANCE_FORCE_OPAQUE;
if ((flags & VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR) == 0)
ret |= RADV_INSTANCE_NO_FORCE_NOT_OPAQUE;
if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR) != 0)
ret |= RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE;
if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR) != 0)
ret |= RADV_INSTANCE_TRIANGLE_FLIP_FACING;
return ret;
}
void
main()
{
uint32_t global_id = gl_GlobalInvocationID.x;
uint32_t dst_leaf_offset =
id_to_offset(RADV_BVH_ROOT_NODE) + SIZEOF(radv_bvh_box32_node);
VOID_REF dst_leaves = OFFSET(args.output_bvh, dst_leaf_offset);
switch (args.geometry_type) {
case VK_GEOMETRY_TYPE_TRIANGLES_KHR: {
radv_ir_triangle_node src =
DEREF(INDEX(radv_ir_triangle_node, args.intermediate_bvh, global_id));
REF(radv_bvh_triangle_node) dst =
INDEX(radv_bvh_triangle_node, dst_leaves, global_id);
DEREF(dst).coords = src.coords;
DEREF(dst).triangle_id = src.triangle_id;
DEREF(dst).geometry_id_and_flags = convert_geometry_id_and_flags(src.geometry_id_and_flags);
DEREF(dst).id = src.id;
break;
}
case VK_GEOMETRY_TYPE_AABBS_KHR: {
radv_ir_aabb_node src =
DEREF(INDEX(radv_ir_aabb_node, args.intermediate_bvh, global_id));
REF(radv_bvh_aabb_node) dst =
INDEX(radv_bvh_aabb_node, dst_leaves, global_id);
DEREF(dst).aabb = src.base.aabb;
DEREF(dst).primitive_id = src.primitive_id;
DEREF(dst).geometry_id_and_flags = convert_geometry_id_and_flags(src.geometry_id_and_flags);
break;
}
default: { /* instances */
radv_ir_instance_node src =
DEREF(INDEX(radv_ir_instance_node, args.intermediate_bvh, global_id));
REF(radv_bvh_instance_node) dst =
INDEX(radv_bvh_instance_node, dst_leaves, global_id);
uint32_t bvh_offset = 0;
if (src.base_ptr != 0)
bvh_offset = DEREF(REF(radv_accel_struct_header)(src.base_ptr)).bvh_offset;
DEREF(dst).bvh_ptr = addr_to_node(src.base_ptr + bvh_offset);
DEREF(dst).custom_instance_and_mask = src.custom_instance_and_mask;
DEREF(dst).sbt_offset_and_flags = convert_sbt_offset_and_flags(src.sbt_offset_and_flags);
DEREF(dst).instance_id = src.instance_id;
DEREF(dst).bvh_offset = bvh_offset;
mat4 transform = mat4(src.otw_matrix);
mat4 inv_transform = transpose(inverse(transpose(transform)));
DEREF(dst).wto_matrix = mat3x4(inv_transform);
DEREF(dst).otw_matrix = mat3x4(transform);
break;
}
}
}

View file

@ -41,7 +41,7 @@ layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
#include "build_interface.h"
layout(push_constant) uniform CONSTS {
convert_internal_args args;
encode_args args;
};
void set_parent(uint32_t child, uint32_t parent)
@ -50,6 +50,76 @@ void set_parent(uint32_t child, uint32_t parent)
DEREF(REF(uint32_t)(addr)) = parent;
}
uint32_t
encode_geometry_id_and_flags(uint32_t src)
{
uint32_t flags = src >> 28;
uint32_t ret = src & 0xfffffffu;
if ((flags & VK_GEOMETRY_OPAQUE_BIT_KHR) != 0)
ret |= RADV_GEOMETRY_OPAQUE;
return ret;
}
uint32_t
encode_sbt_offset_and_flags(uint32_t src)
{
uint32_t flags = src >> 24;
uint32_t ret = src & 0xffffffu;
if ((flags & VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR) != 0)
ret |= RADV_INSTANCE_FORCE_OPAQUE;
if ((flags & VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR) == 0)
ret |= RADV_INSTANCE_NO_FORCE_NOT_OPAQUE;
if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR) != 0)
ret |= RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE;
if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR) != 0)
ret |= RADV_INSTANCE_TRIANGLE_FLIP_FACING;
return ret;
}
void
encode_leaf_node(uint32_t type, uint64_t src_node, uint64_t dst_node)
{
switch (type) {
case radv_ir_node_triangle: {
radv_ir_triangle_node src = DEREF(REF(radv_ir_triangle_node)(src_node));
REF(radv_bvh_triangle_node) dst = REF(radv_bvh_triangle_node)(dst_node);
DEREF(dst).coords = src.coords;
DEREF(dst).triangle_id = src.triangle_id;
DEREF(dst).geometry_id_and_flags = encode_geometry_id_and_flags(src.geometry_id_and_flags);
DEREF(dst).id = src.id;
break;
}
case radv_ir_node_aabb: {
radv_ir_aabb_node src = DEREF(REF(radv_ir_aabb_node)(src_node));
REF(radv_bvh_aabb_node) dst = REF(radv_bvh_aabb_node)(dst_node);
DEREF(dst).aabb = src.base.aabb;
DEREF(dst).primitive_id = src.primitive_id;
DEREF(dst).geometry_id_and_flags = encode_geometry_id_and_flags(src.geometry_id_and_flags);
break;
}
case radv_ir_node_instance: {
radv_ir_instance_node src = DEREF(REF(radv_ir_instance_node)(src_node));
REF(radv_bvh_instance_node) dst = REF(radv_bvh_instance_node)(dst_node);
uint32_t bvh_offset = DEREF(REF(radv_accel_struct_header)(src.base_ptr)).bvh_offset;
DEREF(dst).bvh_ptr = addr_to_node(src.base_ptr + bvh_offset);
DEREF(dst).custom_instance_and_mask = src.custom_instance_and_mask;
DEREF(dst).sbt_offset_and_flags = encode_sbt_offset_and_flags(src.sbt_offset_and_flags);
DEREF(dst).instance_id = src.instance_id;
DEREF(dst).bvh_offset = bvh_offset;
mat4 transform = mat4(src.otw_matrix);
mat4 inv_transform = transpose(inverse(transpose(transform)));
DEREF(dst).wto_matrix = mat3x4(inv_transform);
DEREF(dst).otw_matrix = mat3x4(transform);
break;
}
}
}
void
main()
{
@ -165,6 +235,8 @@ main()
if (offset < intermediate_leaf_nodes_size) {
uint32_t child_index = offset / intermediate_leaf_node_size;
dst_offset = dst_leaf_offset + child_index * output_leaf_node_size;
encode_leaf_node(type, args.intermediate_bvh + offset, args.output_bvh + dst_offset);
} else {
uint32_t offset_in_internal_nodes = offset - intermediate_leaf_nodes_size;
uint32_t child_index = offset_in_internal_nodes / SIZEOF(radv_ir_box_node);

View file

@ -72,7 +72,7 @@ main(void)
break;
/* We allocate nodes on demand with the atomic here to ensure children come before their
* parents, which is a requirement of the converter.
* parents, which is a requirement of the encoder.
*/
uint32_t dst_idx =
atomicAdd(DEREF(REF(radv_ir_header)(args.header)).ir_internal_node_count, 1);

View file

@ -25,6 +25,11 @@ bvh_shaders = [
'copy',
[],
],
[
'encode.comp',
'encode',
[],
],
[
'lbvh_generate_ir.comp',
'lbvh_generate_ir',
@ -55,16 +60,6 @@ bvh_shaders = [
'ploc_internal_extended',
['EXTENDED_SAH=1'],
],
[
'converter_internal.comp',
'converter_internal',
[],
],
[
'converter_leaf.comp',
'converter_leaf',
[],
],
]
bvh_include_dir = dir_source_root + '/src/amd/vulkan/bvh'

View file

@ -61,12 +61,8 @@ static const uint32_t copy_spv[] = {
#include "bvh/copy.spv.h"
};
static const uint32_t convert_leaf_spv[] = {
#include "bvh/converter_leaf.spv.h"
};
static const uint32_t convert_internal_spv[] = {
#include "bvh/converter_internal.spv.h"
static const uint32_t encode_spv[] = {
#include "bvh/encode.spv.h"
};
#define KEY_ID_PAIR_SIZE 8
@ -357,10 +353,8 @@ radv_device_finish_accel_struct_build_state(struct radv_device *device)
&state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.leaf_pipeline,
&state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device),
state->accel_struct_build.convert_leaf_pipeline, &state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device),
state->accel_struct_build.convert_internal_pipeline, &state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.encode_pipeline,
&state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.morton_pipeline,
&state->alloc);
radv_DestroyPipelineLayout(radv_device_to_handle(device),
@ -374,9 +368,7 @@ radv_device_finish_accel_struct_build_state(struct radv_device *device)
radv_DestroyPipelineLayout(radv_device_to_handle(device),
state->accel_struct_build.leaf_p_layout, &state->alloc);
radv_DestroyPipelineLayout(radv_device_to_handle(device),
state->accel_struct_build.convert_leaf_p_layout, &state->alloc);
radv_DestroyPipelineLayout(radv_device_to_handle(device),
state->accel_struct_build.convert_internal_p_layout, &state->alloc);
state->accel_struct_build.encode_p_layout, &state->alloc);
radv_DestroyPipelineLayout(radv_device_to_handle(device),
state->accel_struct_build.morton_p_layout, &state->alloc);
@ -622,18 +614,10 @@ radv_device_init_accel_struct_build_state(struct radv_device *device)
if (result != VK_SUCCESS)
return result;
result = create_build_pipeline_spv(device, convert_leaf_spv, sizeof(convert_leaf_spv),
sizeof(struct convert_leaf_args),
&device->meta_state.accel_struct_build.convert_leaf_pipeline,
&device->meta_state.accel_struct_build.convert_leaf_p_layout);
if (result != VK_SUCCESS)
return result;
result =
create_build_pipeline_spv(device, convert_internal_spv, sizeof(convert_internal_spv),
sizeof(struct convert_internal_args),
&device->meta_state.accel_struct_build.convert_internal_pipeline,
&device->meta_state.accel_struct_build.convert_internal_p_layout);
create_build_pipeline_spv(device, encode_spv, sizeof(encode_spv), sizeof(struct encode_args),
&device->meta_state.accel_struct_build.encode_pipeline,
&device->meta_state.accel_struct_build.encode_p_layout);
if (result != VK_SUCCESS)
return result;
@ -947,43 +931,13 @@ ploc_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount,
}
static void
convert_leaf_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
struct bvh_state *bvh_states)
encode_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
struct bvh_state *bvh_states)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
cmd_buffer->device->meta_state.accel_struct_build.convert_leaf_pipeline);
for (uint32_t i = 0; i < infoCount; ++i) {
if (!pInfos[i].geometryCount)
continue;
RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct,
pInfos[i].dstAccelerationStructure);
const struct convert_leaf_args args = {
.intermediate_bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
.output_bvh = accel_struct->va + bvh_states[i].accel_struct.bvh_offset,
.geometry_type = pInfos[i].pGeometries ? pInfos[i].pGeometries[0].geometryType
: pInfos[i].ppGeometries[0]->geometryType,
};
radv_CmdPushConstants(commandBuffer,
cmd_buffer->device->meta_state.accel_struct_build.convert_leaf_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args);
radv_unaligned_dispatch(cmd_buffer, bvh_states[i].leaf_node_count, 1, 1);
}
/* This is the final access to the leaf nodes, no need to flush */
}
static void
convert_internal_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
struct bvh_state *bvh_states)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
radv_CmdBindPipeline(
commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
cmd_buffer->device->meta_state.accel_struct_build.convert_internal_pipeline);
cmd_buffer->device->meta_state.accel_struct_build.encode_pipeline);
for (uint32_t i = 0; i < infoCount; ++i) {
RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct,
pInfos[i].dstAccelerationStructure);
@ -997,7 +951,7 @@ convert_internal_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount,
geometry_type = pInfos[i].pGeometries ? pInfos[i].pGeometries[0].geometryType
: pInfos[i].ppGeometries[0]->geometryType;
const struct convert_internal_args args = {
const struct encode_args args = {
.intermediate_bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
.output_bvh = accel_struct->va + bvh_states[i].accel_struct.bvh_offset,
.header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
@ -1005,9 +959,9 @@ convert_internal_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount,
.leaf_node_count = bvh_states[i].leaf_node_count,
.geometry_type = geometry_type,
};
radv_CmdPushConstants(
commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.convert_internal_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args);
radv_CmdPushConstants(commandBuffer,
cmd_buffer->device->meta_state.accel_struct_build.encode_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args);
radv_indirect_unaligned_dispatch(cmd_buffer, NULL,
pInfos[i].scratchData.deviceAddress +
bvh_states[i].scratch.header_offset +
@ -1084,9 +1038,7 @@ radv_CmdBuildAccelerationStructuresKHR(
cmd_buffer->state.flush_bits |= flush_bits;
convert_leaf_nodes(commandBuffer, infoCount, pInfos, bvh_states);
convert_internal_nodes(commandBuffer, infoCount, pInfos, bvh_states);
encode_nodes(commandBuffer, infoCount, pInfos, bvh_states);
for (uint32_t i = 0; i < infoCount; ++i) {
RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct,

View file

@ -689,10 +689,8 @@ struct radv_meta_state {
VkPipelineLayout ploc_p_layout;
VkPipeline ploc_pipeline;
VkPipeline ploc_extended_pipeline;
VkPipelineLayout convert_leaf_p_layout;
VkPipeline convert_leaf_pipeline;
VkPipelineLayout convert_internal_p_layout;
VkPipeline convert_internal_pipeline;
VkPipelineLayout encode_p_layout;
VkPipeline encode_pipeline;
VkPipelineLayout copy_p_layout;
VkPipeline copy_pipeline;