mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 07:20:10 +01:00
Merge branch 'radv-box16' into 'main'
radv: Use box16 nodes when bvh8 is not used See merge request mesa/mesa!37883
This commit is contained in:
commit
ddef04ff5b
26 changed files with 647 additions and 53 deletions
|
|
@ -279,6 +279,13 @@ emit_set_mode_block(fp_mode_ctx* ctx, Block* block)
|
|||
instr->opcode = aco_opcode::v_cvt_f16_f32;
|
||||
else
|
||||
instr->opcode = aco_opcode::s_cvt_f16_f32;
|
||||
} else if (instr->opcode == aco_opcode::p_v_cvt_f16_f32_rtpi ||
|
||||
instr->opcode == aco_opcode::p_v_cvt_f16_f32_rtni) {
|
||||
set_mode |= fp_state.require(mode_round16_64, instr->opcode == aco_opcode::p_v_cvt_f16_f32_rtpi ? fp_round_pi : fp_round_ni);
|
||||
set_mode |= fp_state.require(mode_fp16_ovfl, default_state.fields[mode_fp16_ovfl]);
|
||||
set_mode |= fp_state.require(mode_denorm16_64, default_state.fields[mode_denorm16_64]);
|
||||
set_mode |= fp_state.require(mode_denorm32, default_state.fields[mode_denorm32]);
|
||||
instr->opcode = aco_opcode::v_cvt_f16_f32;
|
||||
} else if (instr->opcode == aco_opcode::p_v_cvt_pk_fp8_f32_ovfl) {
|
||||
set_mode |= fp_state.require(mode_fp16_ovfl, 1);
|
||||
instr->opcode = aco_opcode::v_cvt_pk_fp8_f32;
|
||||
|
|
|
|||
|
|
@ -718,6 +718,8 @@ instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op)
|
|||
/* VOP1 */
|
||||
case aco_opcode::v_cvt_f16_f32:
|
||||
case aco_opcode::p_v_cvt_f16_f32_rtne:
|
||||
case aco_opcode::p_v_cvt_f16_f32_rtpi:
|
||||
case aco_opcode::p_v_cvt_f16_f32_rtni:
|
||||
case aco_opcode::v_cvt_f16_u16:
|
||||
case aco_opcode::v_cvt_f16_i16:
|
||||
case aco_opcode::v_rcp_f16:
|
||||
|
|
|
|||
|
|
@ -1029,6 +1029,8 @@ VOP1 = {
|
|||
("v_cvt_i32_f32", dst(U32), src(F32), op(0x08)),
|
||||
("v_cvt_f16_f32", dst(F16), src(F32), op(0x0a)),
|
||||
("p_v_cvt_f16_f32_rtne", dst(F16), src(F32), op(-1)),
|
||||
("p_v_cvt_f16_f32_rtpi", dst(F16), src(F32), op(-1)),
|
||||
("p_v_cvt_f16_f32_rtni", dst(F16), src(F32), op(-1)),
|
||||
("v_cvt_f32_f16", dst(F32), src(F16), op(0x0b)),
|
||||
("v_cvt_rpi_i32_f32", dst(U32), src(F32), op(0x0c)), #v_cvt_nearest_i32_f32 in GFX11
|
||||
("v_cvt_flr_i32_f32", dst(U32), src(F32), op(0x0d)),#v_cvt_floor_i32_f32 in GFX11
|
||||
|
|
|
|||
|
|
@ -453,7 +453,9 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
case nir_op_sdot_2x16_iadd_sat:
|
||||
case nir_op_bfdot2_bfadd:
|
||||
case nir_op_byte_perm_amd:
|
||||
case nir_op_alignbyte_amd: type = RegType::vgpr; break;
|
||||
case nir_op_alignbyte_amd:
|
||||
case nir_op_f2f16_ru:
|
||||
case nir_op_f2f16_rd: type = RegType::vgpr; break;
|
||||
case nir_op_fmul:
|
||||
case nir_op_ffma:
|
||||
case nir_op_fadd:
|
||||
|
|
|
|||
|
|
@ -2615,6 +2615,13 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
}
|
||||
break;
|
||||
}
|
||||
case nir_op_f2f16_ru:
|
||||
case nir_op_f2f16_rd:
|
||||
ctx->program->needs_fp_mode_insertion = true;
|
||||
bld.vop1(instr->op == nir_op_f2f16_ru ? aco_opcode::p_v_cvt_f16_f32_rtpi
|
||||
: aco_opcode::p_v_cvt_f16_f32_rtni,
|
||||
Definition(dst), Operand(get_alu_src(ctx, instr->src[0])));
|
||||
break;
|
||||
case nir_op_f2f32: {
|
||||
if (dst.regClass() == s1) {
|
||||
assert(instr->src[0].src.ssa->bit_size == 16);
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@
|
|||
#define BVH_BUILD_HELPERS_H
|
||||
|
||||
#include "bvh.h"
|
||||
#include "spirv_internal_exts.h"
|
||||
#include "vk_build_helpers.h"
|
||||
|
||||
TYPE(radv_accel_struct_serialization_header, 8);
|
||||
|
|
@ -110,4 +111,7 @@ radv_encode_blas_pointer_flags(uint32_t flags, uint32_t geometry_type)
|
|||
return ptr_flags;
|
||||
}
|
||||
|
||||
spirv_instruction(set = "MesaInternal", id = SpvOpFConvertRUMesa) float16_t radv_f32_to_f16_pos_inf(float f);
|
||||
spirv_instruction(set = "MesaInternal", id = SpvOpFConvertRDMesa) float16_t radv_f32_to_f16_neg_inf(float f);
|
||||
|
||||
#endif /* BUILD_HELPERS_H */
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
#define RADV_BUILD_FLAG_PAIR_COMPRESS_TRIANGLES (1u << (VK_BUILD_FLAG_COUNT + 5))
|
||||
#define RADV_BUILD_FLAG_BATCH_COMPRESS_TRIANGLES (1u << (VK_BUILD_FLAG_COUNT + 6))
|
||||
#define RADV_BUILD_FLAG_BATCH_COMPRESS_TRIANGLES_RETRY (1u << (VK_BUILD_FLAG_COUNT + 7))
|
||||
#define RADV_BUILD_FLAG_USE_BOX16 (1u << (VK_BUILD_FLAG_COUNT + 8))
|
||||
|
||||
#define RADV_COPY_MODE_COPY 0
|
||||
#define RADV_COPY_MODE_SERIALIZE 1
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@
|
|||
#else
|
||||
#include <vulkan/vulkan.h>
|
||||
typedef uint16_t float16_t;
|
||||
typedef struct radv_aabb16 radv_aabb16;
|
||||
#endif
|
||||
|
||||
struct radv_accel_struct_serialization_header {
|
||||
|
|
@ -112,9 +113,18 @@ struct radv_bvh_instance_node {
|
|||
mat3x4 otw_matrix;
|
||||
};
|
||||
|
||||
struct radv_aabb16 {
|
||||
float16_t min_x;
|
||||
float16_t min_y;
|
||||
float16_t min_z;
|
||||
float16_t max_x;
|
||||
float16_t max_y;
|
||||
float16_t max_z;
|
||||
};
|
||||
|
||||
struct radv_bvh_box16_node {
|
||||
uint32_t children[4];
|
||||
float16_t coords[4][2][3];
|
||||
radv_aabb16 coords[4];
|
||||
};
|
||||
|
||||
struct radv_bvh_box32_node {
|
||||
|
|
|
|||
|
|
@ -22,6 +22,32 @@ void set_parent(uint32_t child, uint32_t parent)
|
|||
DEREF(REF(uint32_t)(addr)) = parent;
|
||||
}
|
||||
|
||||
radv_aabb16
|
||||
radv_aabb_f32_to_f16(vk_aabb aabb)
|
||||
{
|
||||
radv_aabb16 aabb16;
|
||||
aabb16.min_x = radv_f32_to_f16_neg_inf(aabb.min.x);
|
||||
aabb16.min_y = radv_f32_to_f16_neg_inf(aabb.min.y);
|
||||
aabb16.min_z = radv_f32_to_f16_neg_inf(aabb.min.z);
|
||||
aabb16.max_x = radv_f32_to_f16_pos_inf(aabb.max.x);
|
||||
aabb16.max_y = radv_f32_to_f16_pos_inf(aabb.max.y);
|
||||
aabb16.max_z = radv_f32_to_f16_pos_inf(aabb.max.z);
|
||||
return aabb16;
|
||||
}
|
||||
|
||||
vk_aabb
|
||||
radv_aabb_f16_to_f32(radv_aabb16 aabb16)
|
||||
{
|
||||
vk_aabb aabb;
|
||||
aabb.min.x = float(aabb16.min_x);
|
||||
aabb.min.y = float(aabb16.min_y);
|
||||
aabb.min.z = float(aabb16.min_z);
|
||||
aabb.max.x = float(aabb16.max_x);
|
||||
aabb.max.y = float(aabb16.max_y);
|
||||
aabb.max.z = float(aabb16.max_z);
|
||||
return aabb;
|
||||
}
|
||||
|
||||
void
|
||||
main()
|
||||
{
|
||||
|
|
@ -89,18 +115,15 @@ main()
|
|||
memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
|
||||
gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
|
||||
|
||||
uint32_t bvh_offset = is_root_node ? id_to_offset(RADV_BVH_ROOT_NODE) : DEREF(src_node).bvh_offset;
|
||||
if (bvh_offset == VK_UNKNOWN_BVH_OFFSET)
|
||||
uint32_t node_id = is_root_node ? RADV_BVH_ROOT_NODE : DEREF(src_node).bvh_offset;
|
||||
if (node_id == VK_UNKNOWN_BVH_OFFSET)
|
||||
continue;
|
||||
|
||||
if (bvh_offset == VK_NULL_BVH_OFFSET)
|
||||
if (node_id == VK_NULL_BVH_OFFSET)
|
||||
break;
|
||||
|
||||
uint32_t flags = 0;
|
||||
|
||||
REF(radv_bvh_box32_node) dst_node = REF(radv_bvh_box32_node)(OFFSET(args.output_bvh, bvh_offset));
|
||||
uint32_t node_id = pack_node_id(bvh_offset, radv_bvh_node_box32);
|
||||
|
||||
uint32_t found_child_count = 0;
|
||||
uint32_t children[4] = {RADV_BVH_INVALID_NODE, RADV_BVH_INVALID_NODE,
|
||||
RADV_BVH_INVALID_NODE, RADV_BVH_INVALID_NODE};
|
||||
|
|
@ -158,20 +181,33 @@ main()
|
|||
break;
|
||||
}
|
||||
|
||||
REF(radv_bvh_box16_node) dst_node_f16 = REF(radv_bvh_box16_node)(OFFSET(args.output_bvh, id_to_offset(node_id)));
|
||||
REF(radv_bvh_box32_node) dst_node_f32 = REF(radv_bvh_box32_node)(OFFSET(args.output_bvh, id_to_offset(node_id)));
|
||||
bool is_box16 = VK_BUILD_FLAG(RADV_BUILD_FLAG_USE_BOX16) && id_to_type(node_id) == radv_bvh_node_box16;
|
||||
|
||||
for (uint32_t i = 0; i < found_child_count; ++i) {
|
||||
uint32_t type = ir_id_to_type(children[i]);
|
||||
uint32_t offset = ir_id_to_offset(children[i]);
|
||||
uint32_t dst_offset;
|
||||
uint32_t child_node_id;
|
||||
|
||||
vk_aabb child_aabb = DEREF(REF(vk_ir_node)OFFSET(args.intermediate_bvh, offset)).aabb;
|
||||
|
||||
if (type == vk_ir_node_internal) {
|
||||
dst_offset = atomicAdd(DEREF(args.header).dst_node_offset, SIZEOF(radv_bvh_box32_node));
|
||||
radv_aabb16 child_aabb16 = radv_aabb_f32_to_f16(child_aabb);
|
||||
float surface_area_f16 = aabb_surface_area(radv_aabb_f16_to_f32(child_aabb16));
|
||||
float surface_area_f32 = aabb_surface_area(child_aabb);
|
||||
bool child_use_f16 = VK_BUILD_FLAG(RADV_BUILD_FLAG_USE_BOX16) && surface_area_f16 < surface_area_f32 * 1.5;
|
||||
|
||||
REF(vk_ir_box_node) child_node = REF(vk_ir_box_node)OFFSET(args.intermediate_bvh, offset);
|
||||
DEREF(child_node).bvh_offset = dst_offset;
|
||||
uint32_t dst_offset = atomicAdd(DEREF(args.header).dst_node_offset,
|
||||
child_use_f16 ? SIZEOF(radv_bvh_box16_node) : SIZEOF(radv_bvh_box32_node));
|
||||
child_node_id = pack_node_id(dst_offset, child_use_f16 ? radv_bvh_node_box16 : radv_bvh_node_box32);
|
||||
|
||||
REF(vk_ir_box_node) child_node = REF(vk_ir_box_node) OFFSET(args.intermediate_bvh, offset);
|
||||
DEREF(child_node).bvh_offset = child_node_id;
|
||||
flags |= (DEREF(child_node).flags & 0x3) << i * 8;
|
||||
} else {
|
||||
uint32_t child_index = offset / ir_leaf_node_size;
|
||||
dst_offset = dst_leaf_offset + child_index * output_leaf_node_size;
|
||||
uint32_t dst_offset = dst_leaf_offset + child_index * output_leaf_node_size;
|
||||
|
||||
if (type == vk_ir_node_instance) {
|
||||
vk_ir_instance_node src_node =
|
||||
|
|
@ -182,47 +218,65 @@ main()
|
|||
uint32_t child_flags = fetch_child_flags(args.intermediate_bvh, children[i]);
|
||||
flags |= (child_flags & 0x3) << i * 8;
|
||||
}
|
||||
|
||||
child_node_id = pack_node_id(dst_offset, ir_type_to_bvh_type(type));
|
||||
}
|
||||
|
||||
vk_aabb child_aabb =
|
||||
DEREF(REF(vk_ir_node)OFFSET(args.intermediate_bvh, offset)).aabb;
|
||||
|
||||
/* On gfx11, infinities in AABB coords can cause garbage child nodes to be
|
||||
* returned by box intersection tests with non-default box sorting modes.
|
||||
* Subtract 1 from the integer representation of inf/-inf to turn it into
|
||||
* the maximum/minimum representable floating-point value as a workaround.
|
||||
*/
|
||||
if (VK_BUILD_FLAG(RADV_BUILD_FLAG_NO_INFS)) {
|
||||
for (uint32_t i = 0; i < 3; ++i) {
|
||||
if (isinf(child_aabb.min[i]))
|
||||
child_aabb.min[i] = uintBitsToFloat(floatBitsToUint(child_aabb.min[i]) - 1);
|
||||
if (isinf(child_aabb.max[i]))
|
||||
child_aabb.max[i] = uintBitsToFloat(floatBitsToUint(child_aabb.max[i]) - 1);
|
||||
if (is_box16) {
|
||||
DEREF(dst_node_f16).coords[i] = radv_aabb_f32_to_f16(child_aabb);
|
||||
} else {
|
||||
/* On gfx11, infinities in AABB coords can cause garbage child nodes to be
|
||||
* returned by box intersection tests with non-default box sorting modes.
|
||||
* Subtract 1 from the integer representation of inf/-inf to turn it into
|
||||
* the maximum/minimum representable floating-point value as a workaround.
|
||||
*/
|
||||
if (VK_BUILD_FLAG(RADV_BUILD_FLAG_NO_INFS)) {
|
||||
for (uint32_t i = 0; i < 3; ++i) {
|
||||
if (isinf(child_aabb.min[i]))
|
||||
child_aabb.min[i] = uintBitsToFloat(floatBitsToUint(child_aabb.min[i]) - 1);
|
||||
if (isinf(child_aabb.max[i]))
|
||||
child_aabb.max[i] = uintBitsToFloat(floatBitsToUint(child_aabb.max[i]) - 1);
|
||||
}
|
||||
}
|
||||
|
||||
DEREF(dst_node_f32).coords[i] = child_aabb;
|
||||
}
|
||||
|
||||
DEREF(dst_node).coords[i] = child_aabb;
|
||||
|
||||
uint32_t child_id = pack_node_id(dst_offset, ir_type_to_bvh_type(type));
|
||||
children[i] = child_id;
|
||||
set_parent(child_id, node_id);
|
||||
children[i] = child_node_id;
|
||||
set_parent(child_node_id, node_id);
|
||||
}
|
||||
|
||||
for (uint i = found_child_count; i < 4; ++i) {
|
||||
if (is_box16) {
|
||||
radv_aabb16 null_aabb;
|
||||
null_aabb.min_x = NAN_F16;
|
||||
null_aabb.min_y = NAN_F16;
|
||||
null_aabb.min_z = NAN_F16;
|
||||
null_aabb.max_x = NAN_F16;
|
||||
null_aabb.max_y = NAN_F16;
|
||||
null_aabb.max_z = NAN_F16;
|
||||
for (uint i = found_child_count; i < 4; ++i)
|
||||
DEREF(dst_node_f16).coords[i] = null_aabb;
|
||||
} else {
|
||||
for (uint i = found_child_count; i < 4; ++i) {
|
||||
for (uint comp = 0; comp < 3; ++comp) {
|
||||
DEREF(dst_node).coords[i].min[comp] = NAN;
|
||||
DEREF(dst_node).coords[i].max[comp] = NAN;
|
||||
DEREF(dst_node_f32).coords[i].min[comp] = NAN;
|
||||
DEREF(dst_node_f32).coords[i].max[comp] = NAN;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Make changes to the children's BVH offset value available to the other invocations. */
|
||||
memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
|
||||
gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
|
||||
|
||||
DEREF(dst_node).children = children;
|
||||
if (is_box16) {
|
||||
DEREF(dst_node_f16).children = children;
|
||||
} else {
|
||||
DEREF(dst_node_f32).children = children;
|
||||
|
||||
if (VK_BUILD_FLAG(VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS))
|
||||
DEREF(dst_node).flags = flags;
|
||||
if (VK_BUILD_FLAG(VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS))
|
||||
DEREF(dst_node_f32).flags = flags;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ bvh_includes = files(
|
|||
bvh_spv = []
|
||||
foreach s : bvh_shaders
|
||||
command = [
|
||||
prog_glslang, '-V', '-I' + bvh_include_dir, '-I' + vk_bvh_include_dir, '--target-env', 'spirv1.5',
|
||||
prog_glslang, '-V', '-I' + bvh_include_dir, '-I' + vk_bvh_include_dir, '-I' + spirv_include_dir, '--target-env', 'spirv1.5',
|
||||
'-x', '-o', '@OUTPUT@', '@INPUT@', glslang_depfile, glslang_quiet,
|
||||
]
|
||||
command += vk_glsl_shader_preamble
|
||||
|
|
|
|||
|
|
@ -374,7 +374,16 @@ rra_QueueSubmit2KHR(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *p
|
|||
struct radv_device *device = radv_queue_device(queue);
|
||||
|
||||
VkResult result = device->layer_dispatch.rra.QueueSubmit2KHR(_queue, submitCount, pSubmits, _fence);
|
||||
if (result != VK_SUCCESS || !device->rra_trace.triggered)
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
if (radv_bvh_stats_file()) {
|
||||
result = radv_dump_bvh_stats(_queue);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
}
|
||||
|
||||
if (!device->rra_trace.triggered)
|
||||
return result;
|
||||
|
||||
uint32_t total_trace_count = 0;
|
||||
|
|
|
|||
|
|
@ -75,6 +75,7 @@ enum radv_encode_key_bits {
|
|||
RADV_ENCODE_KEY_WRITE_LEAF_NODE_OFFSETS = (1 << 0),
|
||||
RADV_ENCODE_KEY_PAIR_COMPRESS_GFX12 = (1 << 1),
|
||||
RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12 = (1 << 2),
|
||||
RADV_ENCODE_KEY_USE_BOX16 = (1 << 3),
|
||||
};
|
||||
|
||||
static void
|
||||
|
|
@ -287,6 +288,8 @@ radv_get_build_config(VkDevice _device, struct vk_acceleration_structure_build_s
|
|||
VK_FROM_HANDLE(radv_device, device, _device);
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info);
|
||||
|
||||
uint32_t encode_key = 0;
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
/*
|
||||
|
|
@ -302,11 +305,13 @@ radv_get_build_config(VkDevice _device, struct vk_acceleration_structure_build_s
|
|||
state->build_info->type != VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR)
|
||||
encode_key |= RADV_ENCODE_KEY_WRITE_LEAF_NODE_OFFSETS;
|
||||
|
||||
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info);
|
||||
if (!(state->build_info->flags & (VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR |
|
||||
VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_DATA_ACCESS_KHR)) &&
|
||||
geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR)
|
||||
encode_key |= RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12;
|
||||
} else if (!radv_emulate_rt(pdev)) {
|
||||
if (!(state->build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR))
|
||||
encode_key |= RADV_ENCODE_KEY_USE_BOX16;
|
||||
}
|
||||
|
||||
state->config.encode_key[0] = encode_key;
|
||||
|
|
@ -391,6 +396,8 @@ radv_build_flags(VkCommandBuffer commandBuffer, uint32_t key)
|
|||
flags |= RADV_BUILD_FLAG_PAIR_COMPRESS_TRIANGLES;
|
||||
if (key & RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12)
|
||||
flags |= RADV_BUILD_FLAG_BATCH_COMPRESS_TRIANGLES;
|
||||
if (key & RADV_ENCODE_KEY_USE_BOX16)
|
||||
flags |= RADV_BUILD_FLAG_USE_BOX16;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -722,7 +722,7 @@ radv_device_init_tools(struct radv_device *device)
|
|||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
if ((instance->vk.trace_mode & RADV_TRACE_MODE_RRA) && radv_enable_rt(pdev)) {
|
||||
if (radv_bvh_dumping_enabled(instance) && radv_enable_rt(pdev)) {
|
||||
result = radv_rra_trace_init(device);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
|
@ -798,7 +798,7 @@ init_dispatch_tables(struct radv_device *device, struct radv_physical_device *pd
|
|||
if (instance->vk.trace_mode & RADV_TRACE_MODE_RGP)
|
||||
add_entrypoints(&b, &sqtt_device_entrypoints, RADV_RGP_DISPATCH_TABLE);
|
||||
|
||||
if ((instance->vk.trace_mode & RADV_TRACE_MODE_RRA) && radv_enable_rt(pdev))
|
||||
if (radv_bvh_dumping_enabled(instance) && radv_enable_rt(pdev))
|
||||
add_entrypoints(&b, &rra_device_entrypoints, RADV_RRA_DISPATCH_TABLE);
|
||||
|
||||
#ifndef _WIN32
|
||||
|
|
|
|||
|
|
@ -115,4 +115,17 @@ const char *radv_get_perftest_option_name(int id);
|
|||
|
||||
bool radv_is_rt_wave64_enabled(const struct radv_instance *instance);
|
||||
|
||||
static const char *
|
||||
radv_bvh_stats_file()
|
||||
{
|
||||
return os_get_option("RADV_BVH_STATS_FILE");
|
||||
}
|
||||
|
||||
static bool
|
||||
radv_bvh_dumping_enabled(const struct radv_instance *instance)
|
||||
{
|
||||
/* Gathering bvh stats uses a large part of the rra code for dumping bvhs. */
|
||||
return (instance->vk.trace_mode & RADV_TRACE_MODE_RRA) || radv_bvh_stats_file();
|
||||
}
|
||||
|
||||
#endif /* RADV_INSTANCE_H */
|
||||
|
|
|
|||
|
|
@ -198,7 +198,8 @@ rra_fill_accel_struct_header_common(const struct radv_physical_device *pdev, str
|
|||
/* TODO: calculate active primitives */
|
||||
.active_primitive_count = primitive_count,
|
||||
.geometry_description_count = header->geometry_count,
|
||||
.interior_fp32_node_count = bvh_info->internal_nodes_size / sizeof(struct radv_bvh_box32_node),
|
||||
.interior_fp32_node_count = bvh_info->box32_count,
|
||||
.interior_fp16_node_count = bvh_info->box16_count,
|
||||
.leaf_node_count = primitive_count,
|
||||
.rt_driver_interface_version = 8 << 16,
|
||||
.rt_ip_version = pdev->info.rt_ip_version,
|
||||
|
|
@ -488,6 +489,10 @@ radv_rra_trace_init(struct radv_device *device)
|
|||
|
||||
device->rra_trace.ray_history = UTIL_DYNARRAY_INIT;
|
||||
|
||||
/* BVH stats dumping does not need ray history. */
|
||||
if (!(radv_physical_device_instance(pdev)->vk.trace_mode & RADV_TRACE_MODE_RRA))
|
||||
return VK_SUCCESS;
|
||||
|
||||
device->rra_trace.ray_history_buffer_size = debug_get_num_option("RADV_RRA_TRACE_HISTORY_SIZE", 100 * 1024 * 1024);
|
||||
if (device->rra_trace.ray_history_buffer_size <
|
||||
sizeof(struct radv_ray_history_header) + sizeof(struct radv_packed_end_trace_token))
|
||||
|
|
@ -624,6 +629,9 @@ radv_rra_trace_finish(VkDevice vk_device, struct radv_rra_trace_data *data)
|
|||
simple_mtx_destroy(&data->data_mtx);
|
||||
_mesa_hash_table_destroy(data->accel_structs, NULL);
|
||||
_mesa_hash_table_u64_destroy(data->accel_struct_vas);
|
||||
|
||||
if (data->stats_file)
|
||||
fclose(data->stats_file);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -789,7 +797,7 @@ rra_map_accel_struct_data(struct rra_copy_context *ctx, uint32_t i)
|
|||
if (radv_GetEventStatus(ctx->device, data->build_event) != VK_EVENT_SET)
|
||||
return NULL;
|
||||
|
||||
if (data->buffer->memory) {
|
||||
if (data->buffer && data->buffer->memory) {
|
||||
VkMemoryMapInfo memory_map_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_MAP_INFO,
|
||||
.memory = data->buffer->memory,
|
||||
|
|
@ -1297,3 +1305,167 @@ cleanup:
|
|||
free(accel_struct_offsets);
|
||||
return result;
|
||||
}
|
||||
|
||||
static void
|
||||
dump_bvh_stats(struct radv_device *device, struct vk_acceleration_structure *accel_struct,
|
||||
struct radv_rra_accel_struct_data *accel_struct_data, uint8_t *data, struct hash_table_u64 *blas_sah,
|
||||
bool tlas_pass)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
|
||||
struct radv_accel_struct_header *header = (struct radv_accel_struct_header *)data;
|
||||
|
||||
bool is_tlas = header->instance_count > 0;
|
||||
if (is_tlas != tlas_pass)
|
||||
return;
|
||||
|
||||
/* convert root node id to offset */
|
||||
uint32_t src_root_offset = (RADV_BVH_ROOT_NODE & ~7) << 3;
|
||||
|
||||
if (rra_validate_header(accel_struct_data, header)) {
|
||||
return;
|
||||
}
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
if (rra_validate_node_gfx12(device->rra_trace.accel_struct_vas, data + header->bvh_offset,
|
||||
data + header->bvh_offset + src_root_offset, header->geometry_count,
|
||||
accel_struct_data->size, !is_tlas, 0)) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
if (rra_validate_node_gfx10_3(device->rra_trace.accel_struct_vas, data + header->bvh_offset,
|
||||
data + header->bvh_offset + src_root_offset, header->geometry_count,
|
||||
accel_struct_data->size, !is_tlas, 0)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!device->rra_trace.stats_file) {
|
||||
device->rra_trace.stats_file = fopen(radv_bvh_stats_file(), "w");
|
||||
fprintf(device->rra_trace.stats_file, "app,name,type,allocated_size,compacted_size");
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
fprintf(device->rra_trace.stats_file, ",max_depth,box_node_count,primitive_node_count,instance_node_count");
|
||||
} else {
|
||||
fprintf(device->rra_trace.stats_file, ",max_depth,box16_node_count,box32_node_count,triangle_node_count,"
|
||||
"instance_node_count,procedual_node_count");
|
||||
}
|
||||
fprintf(device->rra_trace.stats_file, ",sah,scene_sah\n");
|
||||
}
|
||||
|
||||
fprintf(device->rra_trace.stats_file, "\"%s\",%s,%s,%" PRIu64 ",%" PRIu64, instance->vk.app_info.app_name,
|
||||
vk_object_base_name(&accel_struct->base), is_tlas ? "tlas" : "blas", accel_struct_data->size,
|
||||
header->compacted_size);
|
||||
|
||||
float extent[3] = {
|
||||
header->aabb.max.x - header->aabb.min.x,
|
||||
header->aabb.max.y - header->aabb.min.y,
|
||||
header->aabb.max.z - header->aabb.min.z,
|
||||
};
|
||||
float surface_area = 2 * (extent[0] * extent[1] + extent[0] * extent[2] + extent[1] * extent[2]);
|
||||
|
||||
float sah;
|
||||
float instance_sah;
|
||||
if (radv_use_bvh8(pdev)) {
|
||||
struct radv_bvh_stats_gfx12 stats = {};
|
||||
radv_gather_bvh_stats_gfx12(data + header->bvh_offset, RADV_BVH_ROOT_NODE, 1, surface_area, blas_sah, &stats);
|
||||
sah = stats.sah;
|
||||
instance_sah = stats.instance_sah;
|
||||
fprintf(device->rra_trace.stats_file, ",%u,%u,%u,%u", stats.max_depth, stats.box_node_count,
|
||||
stats.primitive_node_count, stats.instance_node_count);
|
||||
} else {
|
||||
struct radv_bvh_stats_gfx10_3 stats = {};
|
||||
radv_gather_bvh_stats_gfx10_3(data + header->bvh_offset, RADV_BVH_ROOT_NODE, 1, surface_area, blas_sah, &stats);
|
||||
sah = stats.sah;
|
||||
instance_sah = stats.instance_sah;
|
||||
fprintf(device->rra_trace.stats_file, ",%u,%u,%u,%u,%u,%u", stats.max_depth, stats.box16_node_count,
|
||||
stats.box32_node_count, stats.triangle_node_count, stats.instance_node_count, stats.procedual_node_count);
|
||||
}
|
||||
|
||||
fprintf(device->rra_trace.stats_file, ",%u", (uint32_t)(sah / surface_area * 1000000));
|
||||
|
||||
if (is_tlas) {
|
||||
fprintf(device->rra_trace.stats_file, ",%u\n", (uint32_t)((sah + instance_sah) / surface_area * 1000000));
|
||||
} else {
|
||||
fprintf(device->rra_trace.stats_file, ",0\n");
|
||||
|
||||
float *sah_ptr = ralloc(blas_sah, float);
|
||||
*sah_ptr = sah / surface_area;
|
||||
_mesa_hash_table_u64_insert(blas_sah, vk_acceleration_structure_get_va(accel_struct), sah_ptr);
|
||||
}
|
||||
|
||||
fflush(device->rra_trace.stats_file);
|
||||
}
|
||||
|
||||
VkResult
|
||||
radv_dump_bvh_stats(VkQueue vk_queue)
|
||||
{
|
||||
VK_FROM_HANDLE(radv_queue, queue, vk_queue);
|
||||
struct radv_device *device = radv_queue_device(queue);
|
||||
VkDevice vk_device = radv_device_to_handle(device);
|
||||
|
||||
VkResult result = vk_common_DeviceWaitIdle(vk_device);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
struct hash_entry **hash_entries = NULL;
|
||||
struct hash_table_u64 *blas_sah = NULL;
|
||||
|
||||
uint32_t struct_count = _mesa_hash_table_num_entries(device->rra_trace.accel_structs);
|
||||
|
||||
hash_entries = malloc(sizeof(*hash_entries) * struct_count);
|
||||
if (!hash_entries) {
|
||||
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
struct hash_entry *last_entry = NULL;
|
||||
for (unsigned i = 0; (last_entry = _mesa_hash_table_next_entry(device->rra_trace.accel_structs, last_entry)); ++i)
|
||||
hash_entries[i] = last_entry;
|
||||
|
||||
qsort(hash_entries, struct_count, sizeof(*hash_entries), accel_struct_entry_cmp);
|
||||
|
||||
struct rra_copy_context copy_ctx = {
|
||||
.device = vk_device,
|
||||
.queue = vk_queue,
|
||||
.entries = hash_entries,
|
||||
.family_index = queue->vk.queue_family_index,
|
||||
.min_size = device->rra_trace.ray_history_buffer_size,
|
||||
};
|
||||
|
||||
result = rra_copy_context_init(©_ctx);
|
||||
if (result != VK_SUCCESS)
|
||||
goto cleanup;
|
||||
|
||||
blas_sah = _mesa_hash_table_u64_create(NULL);
|
||||
|
||||
for (unsigned i = 0; i < struct_count; i++) {
|
||||
void *mapped_data = rra_map_accel_struct_data(©_ctx, i);
|
||||
if (!mapped_data)
|
||||
continue;
|
||||
|
||||
dump_bvh_stats(device, (void *)hash_entries[i]->key, hash_entries[i]->data, mapped_data, blas_sah, false);
|
||||
|
||||
rra_unmap_accel_struct_data(©_ctx, i);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < struct_count; i++) {
|
||||
if (_mesa_hash_table_u64_search(blas_sah, vk_acceleration_structure_get_va(hash_entries[i]->key)))
|
||||
continue;
|
||||
|
||||
void *mapped_data = rra_map_accel_struct_data(©_ctx, i);
|
||||
if (!mapped_data)
|
||||
continue;
|
||||
|
||||
dump_bvh_stats(device, (void *)hash_entries[i]->key, hash_entries[i]->data, mapped_data, blas_sah, true);
|
||||
|
||||
rra_unmap_accel_struct_data(©_ctx, i);
|
||||
}
|
||||
|
||||
rra_copy_context_finish(©_ctx);
|
||||
|
||||
result = VK_SUCCESS;
|
||||
cleanup:
|
||||
_mesa_hash_table_u64_destroy(blas_sah);
|
||||
free(hash_entries);
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -107,6 +107,7 @@ struct radv_rra_trace_data {
|
|||
struct hash_table *accel_structs;
|
||||
struct hash_table_u64 *accel_struct_vas;
|
||||
simple_mtx_t data_mtx;
|
||||
FILE *stats_file;
|
||||
bool validate_as;
|
||||
bool copy_after_build;
|
||||
bool triggered;
|
||||
|
|
@ -288,6 +289,8 @@ struct rra_bvh_info {
|
|||
uint32_t leaf_nodes_size;
|
||||
uint32_t internal_nodes_size;
|
||||
uint32_t instance_sideband_data_size;
|
||||
uint32_t box32_count;
|
||||
uint32_t box16_count;
|
||||
struct rra_geometry_info *geometry_infos;
|
||||
};
|
||||
|
||||
|
|
@ -320,4 +323,32 @@ void rra_gather_bvh_info_gfx12(const uint8_t *bvh, uint32_t node_id, struct rra_
|
|||
void rra_transcode_node_gfx12(struct rra_transcoding_context *ctx, uint32_t parent_id, uint32_t src_id,
|
||||
uint32_t dst_offset);
|
||||
|
||||
struct radv_bvh_stats_gfx10_3 {
|
||||
uint32_t max_depth;
|
||||
float sah;
|
||||
float instance_sah;
|
||||
uint32_t box16_node_count;
|
||||
uint32_t box32_node_count;
|
||||
uint32_t triangle_node_count;
|
||||
uint32_t instance_node_count;
|
||||
uint32_t procedual_node_count;
|
||||
};
|
||||
|
||||
struct radv_bvh_stats_gfx12 {
|
||||
uint32_t max_depth;
|
||||
float sah;
|
||||
float instance_sah;
|
||||
uint32_t box_node_count;
|
||||
uint32_t primitive_node_count;
|
||||
uint32_t instance_node_count;
|
||||
};
|
||||
|
||||
void radv_gather_bvh_stats_gfx10_3(const uint8_t *bvh, uint32_t node_id, uint32_t depth, float p,
|
||||
struct hash_table_u64 *blas_sah, struct radv_bvh_stats_gfx10_3 *stats);
|
||||
|
||||
void radv_gather_bvh_stats_gfx12(const uint8_t *bvh, uint32_t node_id, uint32_t depth, float p,
|
||||
struct hash_table_u64 *blas_sah, struct radv_bvh_stats_gfx12 *stats);
|
||||
|
||||
VkResult radv_dump_bvh_stats(VkQueue vk_queue);
|
||||
|
||||
#endif /* RADV_RRA_H */
|
||||
|
|
|
|||
|
|
@ -177,9 +177,11 @@ rra_gather_bvh_info_gfx10_3(const uint8_t *bvh, uint32_t node_id, struct rra_bvh
|
|||
switch (node_type) {
|
||||
case radv_bvh_node_box16:
|
||||
dst->internal_nodes_size += sizeof(struct rra_box16_node);
|
||||
dst->box16_count++;
|
||||
break;
|
||||
case radv_bvh_node_box32:
|
||||
dst->internal_nodes_size += sizeof(struct rra_box32_node);
|
||||
dst->box32_count++;
|
||||
break;
|
||||
case radv_bvh_node_instance:
|
||||
dst->leaf_nodes_size += sizeof(struct rra_instance_node);
|
||||
|
|
@ -283,15 +285,15 @@ rra_transcode_box16_node(struct rra_transcoding_context *ctx, const struct radv_
|
|||
vk_aabb bounds = {
|
||||
.min =
|
||||
{
|
||||
_mesa_half_to_float(src->coords[i][0][0]),
|
||||
_mesa_half_to_float(src->coords[i][0][1]),
|
||||
_mesa_half_to_float(src->coords[i][0][2]),
|
||||
_mesa_half_to_float(src->coords[i].min_x),
|
||||
_mesa_half_to_float(src->coords[i].min_y),
|
||||
_mesa_half_to_float(src->coords[i].min_z),
|
||||
},
|
||||
.max =
|
||||
{
|
||||
_mesa_half_to_float(src->coords[i][1][0]),
|
||||
_mesa_half_to_float(src->coords[i][1][1]),
|
||||
_mesa_half_to_float(src->coords[i][1][2]),
|
||||
_mesa_half_to_float(src->coords[i].max_x),
|
||||
_mesa_half_to_float(src->coords[i].max_y),
|
||||
_mesa_half_to_float(src->coords[i].max_z),
|
||||
},
|
||||
};
|
||||
|
||||
|
|
@ -355,3 +357,78 @@ rra_transcode_node_gfx10_3(struct rra_transcoding_context *ctx, uint32_t parent_
|
|||
|
||||
return dst_id;
|
||||
}
|
||||
|
||||
void
|
||||
radv_gather_bvh_stats_gfx10_3(const uint8_t *bvh, uint32_t node_id, uint32_t depth, float p,
|
||||
struct hash_table_u64 *blas_sah, struct radv_bvh_stats_gfx10_3 *stats)
|
||||
{
|
||||
uint32_t node_type = node_id & 7;
|
||||
const void *node = bvh + ((node_id & (~7u)) << 3);
|
||||
|
||||
stats->max_depth = MAX2(stats->max_depth, depth);
|
||||
|
||||
switch (node_type) {
|
||||
case radv_bvh_node_box16: {
|
||||
stats->sah += 1.0 * p;
|
||||
stats->box16_node_count++;
|
||||
|
||||
const struct radv_bvh_box16_node *box16 = node;
|
||||
for (uint32_t i = 0; i < 4; i++) {
|
||||
if (box16->children[i] != 0xffffffff) {
|
||||
float extent[3] = {
|
||||
_mesa_half_to_float(box16->coords[i].max_x) - _mesa_half_to_float(box16->coords[i].min_x),
|
||||
_mesa_half_to_float(box16->coords[i].max_y) - _mesa_half_to_float(box16->coords[i].min_y),
|
||||
_mesa_half_to_float(box16->coords[i].max_z) - _mesa_half_to_float(box16->coords[i].min_z),
|
||||
};
|
||||
float surface_area = 2 * (extent[0] * extent[1] + extent[0] * extent[2] + extent[1] * extent[2]);
|
||||
radv_gather_bvh_stats_gfx10_3(bvh, box16->children[i], depth + 1, surface_area, blas_sah, stats);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case radv_bvh_node_box32: {
|
||||
stats->sah += 1.5 * p;
|
||||
stats->box32_node_count++;
|
||||
|
||||
const struct radv_bvh_box32_node *box32 = node;
|
||||
for (uint32_t i = 0; i < 4; i++) {
|
||||
if (box32->children[i] != 0xffffffff) {
|
||||
float extent[3] = {
|
||||
box32->coords[i].max.x - box32->coords[i].min.x,
|
||||
box32->coords[i].max.y - box32->coords[i].min.y,
|
||||
box32->coords[i].max.z - box32->coords[i].min.z,
|
||||
};
|
||||
float surface_area = 2 * (extent[0] * extent[1] + extent[0] * extent[2] + extent[1] * extent[2]);
|
||||
radv_gather_bvh_stats_gfx10_3(bvh, box32->children[i], depth + 1, surface_area, blas_sah, stats);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case radv_bvh_node_instance: {
|
||||
stats->sah += 2.0 * p;
|
||||
stats->instance_node_count++;
|
||||
|
||||
const struct radv_bvh_instance_node *instance = node;
|
||||
uint64_t blas_va = radv_node_to_addr(instance->bvh_ptr) - instance->bvh_offset;
|
||||
float *sah = _mesa_hash_table_u64_search(blas_sah, blas_va);
|
||||
if (sah)
|
||||
stats->instance_sah += *sah * p;
|
||||
else
|
||||
fprintf(stderr, "radv: Could not find SAH for BLAS at address 0x%lx\n", blas_va);
|
||||
|
||||
break;
|
||||
}
|
||||
case radv_bvh_node_triangle:
|
||||
stats->sah += 2.0 * p;
|
||||
stats->triangle_node_count++;
|
||||
break;
|
||||
case radv_bvh_node_aabb:
|
||||
stats->sah += 4.0 * p;
|
||||
stats->procedual_node_count++;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@
|
|||
#include "radv_rra.h"
|
||||
|
||||
#include "util/bitset.h"
|
||||
#include "util/compiler.h"
|
||||
|
||||
struct rra_instance_sideband_data {
|
||||
uint32_t instance_index;
|
||||
|
|
@ -306,3 +307,98 @@ rra_transcode_node_gfx12(struct rra_transcoding_context *ctx, uint32_t parent_id
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
radv_gather_bvh_stats_gfx12(const uint8_t *bvh, uint32_t node_id, uint32_t depth, float surface_area,
|
||||
struct hash_table_u64 *blas_sah, struct radv_bvh_stats_gfx12 *stats)
|
||||
{
|
||||
uint32_t node_type = node_id & 0xf;
|
||||
const void *node = bvh + ((node_id & (~0xf)) << 3);
|
||||
|
||||
stats->max_depth = MAX2(stats->max_depth, depth);
|
||||
|
||||
switch (node_type) {
|
||||
case radv_bvh_node_box32: {
|
||||
stats->box_node_count++;
|
||||
stats->sah += 0.5 * surface_area;
|
||||
|
||||
const struct radv_gfx12_box_node *src = node;
|
||||
|
||||
uint32_t valid_child_count_minus_one = src->child_count_exponents >> 28;
|
||||
|
||||
if (valid_child_count_minus_one != 0xf) {
|
||||
uint32_t internal_id = src->internal_base_id;
|
||||
uint32_t primitive_id = src->primitive_base_id;
|
||||
|
||||
uint32_t exponents[3] = {
|
||||
src->child_count_exponents & 0xff,
|
||||
(src->child_count_exponents >> 8) & 0xff,
|
||||
(src->child_count_exponents >> 16) & 0xff,
|
||||
};
|
||||
float extent[3] = {
|
||||
uif(exponents[0] << 23),
|
||||
uif(exponents[1] << 23),
|
||||
uif(exponents[2] << 23),
|
||||
};
|
||||
|
||||
for (uint32_t i = 0; i <= valid_child_count_minus_one; i++) {
|
||||
uint32_t child_type = (src->children[i].dword2 >> 24) & 0xf;
|
||||
uint32_t child_size = src->children[i].dword2 >> 28;
|
||||
|
||||
uint32_t child_id;
|
||||
if (child_type == radv_bvh_node_box32) {
|
||||
child_id = internal_id | child_type;
|
||||
internal_id += (child_size * RADV_GFX12_BVH_NODE_SIZE) >> 3;
|
||||
} else {
|
||||
child_id = primitive_id | child_type;
|
||||
primitive_id += (child_size * RADV_GFX12_BVH_NODE_SIZE) >> 3;
|
||||
}
|
||||
|
||||
float min[3] = {
|
||||
(float)(src->children[i].dword0 & 0xfff) / 0x1000 * extent[0],
|
||||
(float)((src->children[i].dword0 >> 12) & 0xfff) / 0x1000 * extent[1],
|
||||
(float)(src->children[i].dword1 & 0xfff) / 0x1000 * extent[2],
|
||||
};
|
||||
float max[3] = {
|
||||
(float)(((src->children[i].dword1 >> 12) & 0xfff) + 1) / 0x1000 * extent[0],
|
||||
(float)((src->children[i].dword2 & 0xfff) + 1) / 0x1000 * extent[1],
|
||||
(float)(((src->children[i].dword2 >> 12) & 0xfff) + 1) / 0x1000 * extent[2],
|
||||
};
|
||||
float child_extent[3] = {
|
||||
max[0] - min[0],
|
||||
max[1] - min[1],
|
||||
max[2] - min[2],
|
||||
};
|
||||
float child_surface_area = 2 * (child_extent[0] * child_extent[1] + child_extent[0] * child_extent[2] +
|
||||
child_extent[1] * child_extent[2]);
|
||||
|
||||
radv_gather_bvh_stats_gfx12(bvh, child_id, depth + 1, child_surface_area, blas_sah, stats);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case radv_bvh_node_instance: {
|
||||
stats->instance_node_count++;
|
||||
stats->sah += 0.7 * surface_area;
|
||||
|
||||
struct radv_gfx12_instance_node *instance = (struct radv_gfx12_instance_node *)(node);
|
||||
const struct radv_gfx12_instance_node_user_data *user_data =
|
||||
(const void *)((const uint8_t *)node + sizeof(struct radv_gfx12_instance_node));
|
||||
uint64_t blas_va = radv_node_to_addr(instance->pointer_flags_bvh_addr) - user_data->bvh_offset;
|
||||
float *sah = _mesa_hash_table_u64_search(blas_sah, blas_va);
|
||||
if (sah)
|
||||
stats->instance_sah += *sah * surface_area;
|
||||
else
|
||||
fprintf(stderr, "radv: Could not find SAH for BLAS at address 0x%lx\n", blas_va);
|
||||
|
||||
break;
|
||||
}
|
||||
case radv_bvh_node_triangle:
|
||||
stats->primitive_node_count++;
|
||||
FALLTHROUGH;
|
||||
default:
|
||||
stats->sah += 1.0 * surface_area;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -257,7 +257,7 @@ for src_t in [tint, tuint, tfloat, tbool]:
|
|||
for dst_t in dst_types:
|
||||
for dst_bit_size in type_sizes(dst_t):
|
||||
if dst_bit_size == 16 and dst_t == tfloat and src_t == tfloat:
|
||||
rnd_modes = ['_rtne', '_rtz', '']
|
||||
rnd_modes = ['_rtne', '_rtz', '_ru', '_rd', '']
|
||||
for rnd_mode in rnd_modes:
|
||||
if rnd_mode == '_rtne':
|
||||
conv_expr = """
|
||||
|
|
@ -279,6 +279,22 @@ for src_t in [tint, tuint, tfloat, tbool]:
|
|||
dst = src0;
|
||||
}
|
||||
"""
|
||||
elif rnd_mode == '_ru':
|
||||
conv_expr = """
|
||||
if (bit_size > 16) {
|
||||
dst = _mesa_half_to_float(_mesa_float_to_float16_ru(src0));
|
||||
} else {
|
||||
dst = src0;
|
||||
}
|
||||
"""
|
||||
elif rnd_mode == '_rd':
|
||||
conv_expr = """
|
||||
if (bit_size > 16) {
|
||||
dst = _mesa_half_to_float(_mesa_float_to_float16_rd(src0));
|
||||
} else {
|
||||
dst = src0;
|
||||
}
|
||||
"""
|
||||
else:
|
||||
conv_expr = """
|
||||
if (bit_size > 32) {
|
||||
|
|
|
|||
13
src/compiler/spirv/spirv_internal_exts.h
Normal file
13
src/compiler/spirv/spirv_internal_exts.h
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
/*
|
||||
* Copyright © 2025 Valve Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef SPIRV_INTERNAL_EXTS_H
|
||||
#define SPIRV_INTERNAL_EXTS_H
|
||||
|
||||
#define SpvOpFConvertRUMesa 0
|
||||
#define SpvOpFConvertRDMesa 1
|
||||
|
||||
#endif
|
||||
|
|
@ -923,6 +923,29 @@ vtn_handle_non_semantic_debug_info(struct vtn_builder *b, SpvOp ext_opcode,
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
vtn_handle_mesa_internal(struct vtn_builder *b, SpvOp ext_opcode,
|
||||
const uint32_t *w, unsigned count)
|
||||
{
|
||||
uint32_t instr = w[4];
|
||||
|
||||
switch (instr) {
|
||||
case SpvOpFConvertRUMesa: {
|
||||
struct vtn_ssa_value *arg = vtn_ssa_value(b, w[5]);
|
||||
vtn_push_nir_ssa(b, w[2], nir_f2f16_ru(&b->nb, arg->def));
|
||||
break;
|
||||
}
|
||||
case SpvOpFConvertRDMesa: {
|
||||
struct vtn_ssa_value *arg = vtn_ssa_value(b, w[5]);
|
||||
vtn_push_nir_ssa(b, w[2], nir_f2f16_rd(&b->nb, arg->def));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
|
||||
const uint32_t *w, unsigned count)
|
||||
|
|
@ -958,6 +981,8 @@ vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
|
|||
val->ext_handler = vtn_handle_debug_printf;
|
||||
} else if (strstr(ext, "NonSemantic.") == ext) {
|
||||
val->ext_handler = vtn_handle_non_semantic_instruction;
|
||||
} else if (strstr(ext, "MesaInternal") == ext) {
|
||||
val->ext_handler = vtn_handle_mesa_internal;
|
||||
} else {
|
||||
vtn_fail("Unsupported extension: %s", ext);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@
|
|||
#include "spirv.h"
|
||||
#include "spirv_info.h"
|
||||
#include "vtn_generator_ids.h"
|
||||
#include "spirv_internal_exts.h"
|
||||
|
||||
extern uint32_t mesa_spirv_debug;
|
||||
|
||||
|
|
|
|||
|
|
@ -211,3 +211,41 @@ uint16_t _mesa_uint16_div_64k_to_half(uint16_t v)
|
|||
|
||||
return (e << 10) | m;
|
||||
}
|
||||
|
||||
static uint16_t
|
||||
util_nextafter16(uint16_t x, bool up)
|
||||
{
|
||||
uint16_t sign_mask = 1ull << 15;
|
||||
uint16_t min_abs = 1;
|
||||
|
||||
float f = _mesa_half_to_float(x);
|
||||
if (isnan(f) || (f == INFINITY && up) || (f == -INFINITY && !up))
|
||||
return x;
|
||||
|
||||
/* beware of: +/-0.0 - 1 == NaN */
|
||||
uint16_t xn = f == 0 ? (sign_mask | min_abs) : x - 1;
|
||||
|
||||
/* beware of -0.0 + 1 == -0x1p-149 */
|
||||
uint16_t xp = f == 0 ? min_abs : x + 1;
|
||||
|
||||
/* nextafter can be implemented by just +/- 1 on the int value */
|
||||
return (up ^ (f < 0)) ? xp : xn;
|
||||
}
|
||||
|
||||
uint16_t
|
||||
_mesa_float_to_float16_ru(float val)
|
||||
{
|
||||
uint16_t half = _mesa_float_to_half(val);
|
||||
if (_mesa_half_to_float(half) < val)
|
||||
return util_nextafter16(half, true);
|
||||
return half;
|
||||
}
|
||||
|
||||
uint16_t
|
||||
_mesa_float_to_float16_rd(float val)
|
||||
{
|
||||
uint16_t half = _mesa_float_to_half(val);
|
||||
if (_mesa_half_to_float(half) > val)
|
||||
return util_nextafter16(half, false);
|
||||
return half;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -113,6 +113,9 @@ _mesa_float_to_float16_rtz(float val)
|
|||
return _mesa_float_to_float16_rtz_slow(val);
|
||||
}
|
||||
|
||||
uint16_t _mesa_float_to_float16_ru(float val);
|
||||
uint16_t _mesa_float_to_float16_rd(float val);
|
||||
|
||||
static inline uint16_t
|
||||
_mesa_float_to_float16_rtne(float val)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@ bvh_shaders = [
|
|||
],
|
||||
]
|
||||
|
||||
spirv_include_dir = dir_source_root + '/src/compiler/spirv'
|
||||
vk_bvh_include_dir = dir_source_root + '/src/vulkan/runtime/bvh'
|
||||
|
||||
vk_bvh_includes = files(
|
||||
|
|
@ -50,6 +51,7 @@ vk_bvh_includes = files(
|
|||
'vk_build_interface.h',
|
||||
'vk_bvh.h',
|
||||
'vk_debug.h',
|
||||
spirv_include_dir + '/spirv_internal_exts.h',
|
||||
)
|
||||
|
||||
vk_glsl_shader_extensions = [
|
||||
|
|
@ -69,6 +71,7 @@ vk_glsl_shader_extensions = [
|
|||
'GL_KHR_shader_subgroup_ballot',
|
||||
'GL_KHR_shader_subgroup_clustered',
|
||||
'GL_EXT_shader_atomic_int64',
|
||||
'GL_EXT_spirv_intrinsics',
|
||||
]
|
||||
|
||||
vk_glsl_shader_preamble = []
|
||||
|
|
@ -79,7 +82,7 @@ endforeach
|
|||
bvh_spv = []
|
||||
foreach s : bvh_shaders
|
||||
command = [
|
||||
prog_glslang, '-V', '-I' + vk_bvh_include_dir, '--target-env', 'spirv1.5', '-x', '-o', '@OUTPUT@', '@INPUT@'
|
||||
prog_glslang, '-V', '-I' + vk_bvh_include_dir, '-I' + spirv_include_dir, '--target-env', 'spirv1.5', '-x', '-o', '@OUTPUT@', '@INPUT@'
|
||||
] + (with_mesa_debug ? ['-g'] : [])
|
||||
command += glslang_quiet
|
||||
command += vk_glsl_shader_preamble
|
||||
|
|
|
|||
|
|
@ -180,6 +180,7 @@
|
|||
|
||||
#define INFINITY (1.0 / 0.0)
|
||||
#define NAN (0.0 / 0.0)
|
||||
#define NAN_F16 (0.0hf / 0.0hf)
|
||||
|
||||
#define INDEX(type, ptr, index) REF(type)(OFFSET(ptr, (index)*SIZEOF(type)))
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue