radv: Switch to the GLSL morton implementation

Signed-off-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17028>
This commit is contained in:
Konstantin Seurer 2022-06-12 22:13:13 +02:00 committed by Marge Bot
parent f79fe32cdc
commit b1a8797de9

View file

@ -29,6 +29,10 @@
#include "radix_sort/radv_radix_sort.h"
static const uint32_t morton_spv[] = {
#include "bvh/morton.comp.spv.h"
};
/* Min and max bounds of the bvh used to compute morton codes */
#define SCRATCH_TOTAL_BOUNDS_SIZE (6 * sizeof(float))
@ -383,8 +387,9 @@ struct build_primitive_constants {
};
struct morton_constants {
uint64_t node_addr;
uint64_t scratch_addr;
uint64_t bvh_addr;
uint64_t bounds_addr;
uint64_t ids_addr;
};
struct build_internal_constants {
@ -438,16 +443,6 @@ atomic_fminmax(struct radv_device *dev, nir_builder *b, nir_ssa_def *addr, bool
nir_global_atomic_imin(b, 32, addr, val);
}
static nir_ssa_def *
read_fminmax_atomic(struct radv_device *dev, nir_builder *b, unsigned channels, nir_ssa_def *addr)
{
nir_ssa_def *val = nir_build_load_global(b, channels, 32, addr,
.access = ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER);
return nir_bcsel(b, nir_ilt(b, val, nir_imm_int(b, 0)),
nir_isub(b, nir_imm_int(b, -2147483648), val), val);
}
static nir_shader *
build_leaf_shader(struct radv_device *dev)
{
@ -790,88 +785,6 @@ determine_bounds(nir_builder *b, nir_ssa_def *node_addr, nir_ssa_def *node_id,
nir_pop_if(b, NULL);
}
/* https://developer.nvidia.com/blog/thinking-parallel-part-iii-tree-construction-gpu/ */
static nir_ssa_def *
build_morton_component(nir_builder *b, nir_ssa_def *x)
{
x = nir_iand_imm(b, nir_imul_imm(b, x, 0x00000101u), 0x0F00F00Fu);
x = nir_iand_imm(b, nir_imul_imm(b, x, 0x00000011u), 0xC30C30C3u);
x = nir_iand_imm(b, nir_imul_imm(b, x, 0x00000005u), 0x49249249u);
return x;
}
static nir_shader *
build_morton_shader(struct radv_device *dev)
{
const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
nir_builder b = create_accel_build_shader(dev, "accel_build_morton_shader");
/*
* push constants:
* i32 x 2: node address
* i32 x 2: scratch address
*/
nir_ssa_def *pconst0 =
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
nir_ssa_def *node_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 0b0011));
nir_ssa_def *scratch_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 0b1100));
nir_ssa_def *global_id =
nir_iadd(&b,
nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b, 32), 0),
b.shader->info.workgroup_size[0]),
nir_load_local_invocation_index(&b));
nir_ssa_def *node_id_addr =
nir_iadd(&b, nir_iadd_imm(&b, scratch_addr, SCRATCH_TOTAL_BOUNDS_SIZE),
nir_u2u64(&b, nir_imul_imm(&b, global_id, KEY_ID_PAIR_SIZE)));
nir_ssa_def *node_id =
nir_build_load_global(&b, 1, 32, node_id_addr, .align_mul = 4, .align_offset = 0);
nir_variable *node_bounds[2] = {
nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "min_bound"),
nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "max_bound"),
};
determine_bounds(&b, node_addr, node_id, node_bounds);
nir_ssa_def *node_min = nir_load_var(&b, node_bounds[0]);
nir_ssa_def *node_max = nir_load_var(&b, node_bounds[1]);
nir_ssa_def *node_pos =
nir_fmul(&b, nir_fadd(&b, node_min, node_max), nir_imm_vec3(&b, 0.5, 0.5, 0.5));
nir_ssa_def *bvh_min = read_fminmax_atomic(dev, &b, 3, scratch_addr);
nir_ssa_def *bvh_max = read_fminmax_atomic(dev, &b, 3, nir_iadd_imm(&b, scratch_addr, 12));
nir_ssa_def *bvh_size = nir_fsub(&b, bvh_max, bvh_min);
nir_ssa_def *normalized_node_pos = nir_fdiv(&b, nir_fsub(&b, node_pos, bvh_min), bvh_size);
nir_ssa_def *x_int =
nir_f2u32(&b, nir_fmul_imm(&b, nir_channel(&b, normalized_node_pos, 0), 255.0));
nir_ssa_def *x_morton = build_morton_component(&b, x_int);
nir_ssa_def *y_int =
nir_f2u32(&b, nir_fmul_imm(&b, nir_channel(&b, normalized_node_pos, 1), 255.0));
nir_ssa_def *y_morton = build_morton_component(&b, y_int);
nir_ssa_def *z_int =
nir_f2u32(&b, nir_fmul_imm(&b, nir_channel(&b, normalized_node_pos, 2), 255.0));
nir_ssa_def *z_morton = build_morton_component(&b, z_int);
nir_ssa_def *morton_code = nir_iadd(
&b, nir_iadd(&b, nir_ishl_imm(&b, x_morton, 2), nir_ishl_imm(&b, y_morton, 1)), z_morton);
nir_ssa_def *key = nir_ishl_imm(&b, morton_code, 8);
nir_ssa_def *dst_addr =
nir_iadd(&b, nir_iadd_imm(&b, scratch_addr, SCRATCH_TOTAL_BOUNDS_SIZE),
nir_u2u64(&b, nir_iadd_imm(&b, nir_imul_imm(&b, global_id, KEY_ID_PAIR_SIZE), 4)));
nir_build_store_global(&b, key, dst_addr, .align_mul = 4);
return b.shader;
}
static nir_shader *
build_internal_shader(struct radv_device *dev)
{
@ -1354,11 +1267,10 @@ radv_device_init_accel_struct_build_state(struct radv_device *device)
if (result != VK_SUCCESS)
return result;
nir_shader *morton_cs = build_morton_shader(device);
result = create_build_pipeline(device, morton_cs, sizeof(struct morton_constants),
&device->meta_state.accel_struct_build.morton_pipeline,
&device->meta_state.accel_struct_build.morton_p_layout);
result = create_build_pipeline_spv(device, morton_spv, sizeof(morton_spv),
sizeof(struct morton_constants),
&device->meta_state.accel_struct_build.morton_pipeline,
&device->meta_state.accel_struct_build.morton_p_layout);
if (result != VK_SUCCESS)
return result;
@ -1507,8 +1419,9 @@ radv_CmdBuildAccelerationStructuresKHR(
pInfos[i].dstAccelerationStructure);
const struct morton_constants consts = {
.node_addr = radv_accel_struct_get_va(accel_struct),
.scratch_addr = pInfos[i].scratchData.deviceAddress,
.bvh_addr = radv_accel_struct_get_va(accel_struct),
.bounds_addr = pInfos[i].scratchData.deviceAddress,
.ids_addr = pInfos[i].scratchData.deviceAddress + SCRATCH_TOTAL_BOUNDS_SIZE,
};
radv_CmdPushConstants(commandBuffer,