radv: Store parent node IDs inside nodes on GFX12
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Saves some space.

Reviewed-by: Natalie Vock <natalie.vock@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36691>
This commit is contained in:
Konstantin Seurer 2025-05-11 19:44:07 +02:00 committed by Marge Bot
parent 8c20947f69
commit cc0dc4b566
9 changed files with 53 additions and 29 deletions

View file

@ -159,7 +159,7 @@ typedef struct radv_gfx12_box_child radv_gfx12_box_child;
struct radv_gfx12_box_node {
uint32_t internal_base_id;
uint32_t primitive_base_id;
uint32_t unused;
uint32_t parent_id;
vec3 origin;
uint32_t child_count_exponents;
uint32_t obb_matrix_index;
@ -169,7 +169,7 @@ struct radv_gfx12_box_node {
struct radv_gfx12_instance_node {
mat3x4 wto_matrix;
uint64_t pointer_flags_bvh_addr;
uint32_t unused;
uint32_t parent_id;
uint32_t cull_mask_user_data;
vec3 origin;
uint32_t child_count_exponents;

View file

@ -300,7 +300,7 @@ radv_encode_aabb_gfx12(VOID_REF dst, vk_ir_aabb_node src)
/* Writes both the HW node and user data. */
void
radv_encode_instance_gfx12(VOID_REF dst, vk_ir_instance_node src)
radv_encode_instance_gfx12(VOID_REF dst, vk_ir_instance_node src, uint32_t parent_id)
{
bit_writer child_writer;
bit_writer_init(child_writer, dst);
@ -330,7 +330,7 @@ radv_encode_instance_gfx12(VOID_REF dst, vk_ir_instance_node src)
bvh_addr |= radv_encode_blas_pointer_flags(flags, blas_header.geometry_type);
bit_writer_write(child_writer, uint32_t(bvh_addr & 0xffffffff), 32);
bit_writer_write(child_writer, uint32_t(bvh_addr >> 32), 32);
bit_writer_write(child_writer, src.custom_instance_and_mask & 0xffffff, 32);
bit_writer_write(child_writer, parent_id, 32);
bit_writer_write(child_writer, src.sbt_offset_and_flags & 0xffffff, 24);
bit_writer_write(child_writer, src.custom_instance_and_mask >> 24, 8);

View file

@ -38,13 +38,6 @@ layout(push_constant) uniform CONSTS
encode_gfx12_args args;
};
void
set_parent(uint32_t child, uint32_t parent)
{
uint64_t addr = args.output_base + args.output_bvh_offset - child / 16 * 4 - 4;
DEREF(REF(uint32_t)(addr)) = parent;
}
void
encode_gfx12(uint32_t ir_leaf_node_size, REF(vk_ir_box_node) intermediate_internal_nodes, uint32_t node_index)
{
@ -197,6 +190,8 @@ encode_gfx12(uint32_t ir_leaf_node_size, REF(vk_ir_box_node) intermediate_intern
if (cluster.invocation_index == 0) {
DEREF(dst).internal_base_id = pack_node_id(dst_internal_offset, 0);
DEREF(dst).primitive_base_id = pack_node_id(dst_leaf_offset, 0);
if (is_root_node)
DEREF(dst).parent_id = RADV_BVH_INVALID_NODE;
DEREF(dst).origin = origin;
DEREF(dst).child_count_exponents = extent_exponents.x | (extent_exponents.y << 8) |
(extent_exponents.z << 16) | ((valid_child_count - 1) << 28);
@ -207,6 +202,8 @@ encode_gfx12(uint32_t ir_leaf_node_size, REF(vk_ir_box_node) intermediate_intern
uint32_t type = ir_id_to_type(child);
uint32_t offset = ir_id_to_offset(child);
VOID_REF dst_child_addr = args.output_base + args.output_bvh_offset + dst_offset;
uint32_t child_node_size_128b = 1;
uint32_t encoded_type = 0;
uint32_t cull_mask = 0xff;
@ -215,6 +212,9 @@ encode_gfx12(uint32_t ir_leaf_node_size, REF(vk_ir_box_node) intermediate_intern
encoded_type = 5;
REF(vk_ir_box_node) child_node = REF(vk_ir_box_node) OFFSET(args.intermediate_bvh, offset);
cull_flags = DEREF(child_node).flags & 0x3;
REF(radv_gfx12_box_node) child_box = REF(radv_gfx12_box_node)(dst_child_addr);
DEREF(child_box).parent_id = node_id;
} else {
if (VK_BUILD_FLAG(RADV_BUILD_FLAG_WRITE_LEAF_NODE_OFFSETS)) {
/* Write leaf node offset. */
@ -233,8 +233,6 @@ encode_gfx12(uint32_t ir_leaf_node_size, REF(vk_ir_box_node) intermediate_intern
}
}
VOID_REF dst_leaf_addr = args.output_base + args.output_bvh_offset + dst_offset;
switch (args.geometry_type) {
case VK_GEOMETRY_TYPE_TRIANGLES_KHR: {
vk_ir_triangle_node src_node0 = DEREF(REF(vk_ir_triangle_node)(OFFSET(args.intermediate_bvh, offset)));
@ -249,15 +247,15 @@ encode_gfx12(uint32_t ir_leaf_node_size, REF(vk_ir_box_node) intermediate_intern
opaque = (src_node1.geometry_id_and_flags & VK_GEOMETRY_OPAQUE) != 0;
cull_flags &= opaque ? VK_BVH_BOX_FLAG_ONLY_OPAQUE : VK_BVH_BOX_FLAG_NO_OPAQUE;
radv_encode_triangle_gfx12(dst_leaf_addr, src_node0, src_node1);
radv_encode_triangle_gfx12(dst_child_addr, src_node0, src_node1);
} else {
radv_encode_triangle_gfx12(dst_leaf_addr, src_node0);
radv_encode_triangle_gfx12(dst_child_addr, src_node0);
}
break;
}
case VK_GEOMETRY_TYPE_AABBS_KHR: {
vk_ir_aabb_node src_node = DEREF(REF(vk_ir_aabb_node)(OFFSET(args.intermediate_bvh, offset)));
radv_encode_aabb_gfx12(dst_leaf_addr, src_node);
radv_encode_aabb_gfx12(dst_child_addr, src_node);
bool opaque = (src_node.geometry_id_and_flags & VK_GEOMETRY_OPAQUE) != 0;
cull_flags = opaque ? VK_BVH_BOX_FLAG_ONLY_OPAQUE : VK_BVH_BOX_FLAG_NO_OPAQUE;
@ -269,7 +267,7 @@ encode_gfx12(uint32_t ir_leaf_node_size, REF(vk_ir_box_node) intermediate_intern
child_node_size_128b = 2;
vk_ir_instance_node src_node = DEREF(REF(vk_ir_instance_node)(OFFSET(args.intermediate_bvh, offset)));
radv_encode_instance_gfx12(dst_leaf_addr, src_node);
radv_encode_instance_gfx12(dst_child_addr, src_node, node_id);
cull_mask = src_node.custom_instance_and_mask >> 24;
cull_flags = src_node.root_flags & 0x3;
@ -300,8 +298,6 @@ encode_gfx12(uint32_t ir_leaf_node_size, REF(vk_ir_box_node) intermediate_intern
(min(uint32_t(ceil((child_aabb.max.z - origin.z) / extent.z * float(0x1000))) - 1, 0xfff) << 12) |
(encoded_type << 24) | (child_node_size_128b << 28);
DEREF(dst).children[child_index] = box_child;
set_parent(pack_node_id(dst_offset, encoded_type), node_id);
} else {
child_index =
bitCount(radv_ballot(cluster, true) & ((1u << cluster.invocation_index) - 1)) + valid_child_count;
@ -322,8 +318,6 @@ encode_gfx12(uint32_t ir_leaf_node_size, REF(vk_ir_box_node) intermediate_intern
REF(radv_accel_struct_header) header = REF(radv_accel_struct_header)(args.output_base);
DEREF(header).aabb = src.base.aabb;
DEREF(header).bvh_offset = args.output_bvh_offset;
set_parent(RADV_BVH_ROOT_NODE, RADV_BVH_INVALID_NODE);
}
}

View file

@ -79,6 +79,7 @@ enum radv_ray_query_field {
radv_ray_query_trav_stack_low_watermark,
radv_ray_query_trav_current_node,
radv_ray_query_trav_previous_node,
radv_ray_query_trav_parent_node,
radv_ray_query_trav_instance_top_node,
radv_ray_query_trav_instance_bottom_node,
radv_ray_query_trav_second_iteration,
@ -117,6 +118,7 @@ radv_get_ray_query_type()
FIELD(trav_stack_low_watermark, glsl_uint_type());
FIELD(trav_current_node, glsl_uint_type());
FIELD(trav_previous_node, glsl_uint_type());
FIELD(trav_parent_node, glsl_uint_type());
FIELD(trav_instance_top_node, glsl_uint_type());
FIELD(trav_instance_bottom_node, glsl_uint_type());
FIELD(trav_second_iteration, glsl_bool_type());
@ -319,6 +321,7 @@ lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query
rq_store(b, rq, trav_current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE));
rq_store(b, rq, trav_previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE));
rq_store(b, rq, trav_parent_node, nir_imm_int(b, RADV_BVH_INVALID_NODE));
rq_store(b, rq, trav_instance_top_node, nir_imm_int(b, RADV_BVH_INVALID_NODE));
rq_store(b, rq, trav_instance_bottom_node, nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT));
rq_store(b, rq, trav_second_iteration, nir_imm_false(b));
@ -525,6 +528,7 @@ lower_rq_proceed(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_va
.stack_low_watermark = rq_deref(b, rq, trav_stack_low_watermark),
.current_node = rq_deref(b, rq, trav_current_node),
.previous_node = rq_deref(b, rq, trav_previous_node),
.parent_node = rq_deref(b, rq, trav_parent_node),
.instance_top_node = rq_deref(b, rq, trav_instance_top_node),
.instance_bottom_node = rq_deref(b, rq, trav_instance_bottom_node),
.second_iteration = rq_deref(b, rq, trav_second_iteration),

View file

@ -1196,9 +1196,29 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const
nir_push_if(b, overflow_cond);
{
nir_def *prev = nir_load_deref(b, args->vars.previous_node);
nir_def *bvh_addr = build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true);
nir_def *parent = fetch_parent_node(device, b, bvh_addr, prev);
nir_def *loaded_parent_id;
nir_def *primitive_parent_id;
nir_push_if(b, nir_test_mask(b, prev, BITFIELD64_BIT(ffs(radv_bvh_node_box16) - 1)));
{
nir_def *is_instance = nir_test_mask(b, prev, BITFIELD64_BIT(ffs(radv_bvh_node_instance) - 1));
nir_def *field_offset = nir_bcsel(
b, is_instance,
nir_imm_int(
b, (int32_t)offsetof(struct radv_gfx12_instance_node, parent_id) - (radv_bvh_node_instance << 3)),
nir_imm_int(b,
(int32_t)offsetof(struct radv_gfx12_box_node, parent_id) - (radv_bvh_node_box32 << 3)));
nir_def *offset = nir_iadd(b, nir_ishl_imm(b, prev, 3), field_offset);
nir_def *bvh_addr = build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true);
loaded_parent_id = nir_build_load_global(b, 1, 32, nir_iadd(b, bvh_addr, nir_u2u64(b, offset)));
}
nir_push_else(b, NULL);
{
primitive_parent_id = nir_load_deref(b, args->vars.parent_node);
}
nir_pop_if(b, NULL);
nir_def *parent = nir_if_phi(b, loaded_parent_id, primitive_parent_id);
nir_push_if(b, nir_ieq_imm(b, parent, RADV_BVH_INVALID_NODE));
{
nir_store_var(b, incomplete, nir_imm_false(b), 0x1);
@ -1299,6 +1319,8 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const
}
nir_push_else(b, NULL);
{
nir_store_deref(b, args->vars.parent_node, bvh_node, 0x1);
/* box */
if (args->use_bvh_stack_rtn) {
nir_store_var(b, last_visited_node, prev_node, 0x1);

View file

@ -104,6 +104,8 @@ struct radv_ray_traversal_vars {
*/
nir_deref_instr *previous_node;
nir_deref_instr *parent_node;
/* When entering an instance these are the instance node and the root node of the BLAS */
nir_deref_instr *instance_top_node;
nir_deref_instr *instance_bottom_node;

View file

@ -1224,6 +1224,7 @@ struct rt_traversal_vars {
nir_variable *stack_low_watermark;
nir_variable *current_node;
nir_variable *previous_node;
nir_variable *parent_node;
nir_variable *instance_top_node;
nir_variable *instance_bottom_node;
nir_variable *second_iteration;
@ -1249,6 +1250,7 @@ init_traversal_vars(nir_builder *b)
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_stack_low_watermark");
ret.current_node = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "current_node;");
ret.previous_node = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "previous_node");
ret.parent_node = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "parent_node");
ret.instance_top_node = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "instance_top_node");
ret.instance_bottom_node =
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "instance_bottom_node");
@ -1578,6 +1580,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
nir_store_var(b, trav_vars.stack, stack_idx, 1);
nir_store_var(b, trav_vars.stack_low_watermark, nir_load_var(b, trav_vars.stack), 1);
nir_store_var(b, trav_vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
nir_store_var(b, trav_vars.parent_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
nir_store_var(b, trav_vars.instance_top_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
nir_store_var(b, trav_vars.instance_bottom_node, nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 0x1);
nir_store_var(b, trav_vars.second_iteration, nir_imm_false(b), 0x1);
@ -1595,6 +1598,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
.stack_low_watermark = nir_build_deref_var(b, trav_vars.stack_low_watermark),
.current_node = nir_build_deref_var(b, trav_vars.current_node),
.previous_node = nir_build_deref_var(b, trav_vars.previous_node),
.parent_node = nir_build_deref_var(b, trav_vars.parent_node),
.instance_top_node = nir_build_deref_var(b, trav_vars.instance_top_node),
.instance_bottom_node = nir_build_deref_var(b, trav_vars.instance_bottom_node),
.second_iteration = nir_build_deref_var(b, trav_vars.second_iteration),

View file

@ -84,7 +84,6 @@ radv_get_acceleration_structure_layout(struct radv_device *device,
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info);
uint32_t bvh_leaf_size;
uint32_t bvh_node_size_gcd;
if (radv_use_bvh8(pdev)) {
switch (geometry_type) {
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
@ -99,7 +98,6 @@ radv_get_acceleration_structure_layout(struct radv_device *device,
default:
UNREACHABLE("Unknown VkGeometryTypeKHR");
}
bvh_node_size_gcd = RADV_GFX12_BVH_NODE_SIZE;
} else {
switch (geometry_type) {
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
@ -114,7 +112,6 @@ radv_get_acceleration_structure_layout(struct radv_device *device,
default:
UNREACHABLE("Unknown VkGeometryTypeKHR");
}
bvh_node_size_gcd = 64;
}
uint32_t internal_node_size =
@ -140,7 +137,8 @@ radv_get_acceleration_structure_layout(struct radv_device *device,
/* Parent links, which have to go directly before bvh_offset as we index them using negative
* offsets from there. */
offset += bvh_size / bvh_node_size_gcd * 4;
if (!radv_use_bvh8(pdev))
offset += bvh_size / 64 * 4;
/* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
offset = ALIGN(offset, 64);

View file

@ -202,7 +202,7 @@ rra_transcode_box8_node(struct rra_transcoding_context *ctx, const struct radv_g
memcpy(dst, src, sizeof(struct radv_gfx12_box_node));
dst->internal_base_id = ctx->dst_internal_offset >> 3;
dst->primitive_base_id = ctx->dst_leaf_offset >> 3;
dst->unused = parent_id;
dst->parent_id = parent_id;
uint32_t valid_child_count_minus_one = dst->child_count_exponents >> 28;
if (valid_child_count_minus_one == 0xf)
@ -279,7 +279,7 @@ rra_transcode_node_gfx12(struct rra_transcoding_context *ctx, uint32_t parent_id
dst->pointer_flags_bvh_addr = dst->pointer_flags_bvh_addr - (user_data->bvh_offset >> 3) +
(sizeof(struct rra_accel_struct_metadata) >> 3);
dst->unused = parent_id;
dst->parent_id = parent_id;
sideband_data->instance_index = user_data->instance_index;
sideband_data->custom_instance_and_flags = user_data->custom_instance;