mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-01 10:18:05 +02:00
lavapipe: Add ray traversal code
Basically the software implementation in radv_rt_common without traversal stack. Acked-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25616>
This commit is contained in:
parent
897ccbd180
commit
b69ae8b355
3 changed files with 608 additions and 0 deletions
503
src/gallium/frontends/lavapipe/lvp_nir_ray_tracing.c
Normal file
503
src/gallium/frontends/lavapipe/lvp_nir_ray_tracing.c
Normal file
|
|
@ -0,0 +1,503 @@
|
|||
/*
|
||||
* Copyright © 2021 Google
|
||||
* Copyright © 2023 Valve Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "lvp_nir_ray_tracing.h"
|
||||
#include "lvp_acceleration_structure.h"
|
||||
#include "lvp_private.h"
|
||||
|
||||
#include "compiler/spirv/spirv.h"
|
||||
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
static nir_def *
|
||||
lvp_build_intersect_ray_box(nir_builder *b, nir_def *node_addr, nir_def *ray_tmax,
|
||||
nir_def *origin, nir_def *dir, nir_def *inv_dir)
|
||||
{
|
||||
const struct glsl_type *vec2_type = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
|
||||
const struct glsl_type *uvec2_type = glsl_vector_type(GLSL_TYPE_UINT, 2);
|
||||
|
||||
nir_variable *distances =
|
||||
nir_variable_create(b->shader, nir_var_shader_temp, vec2_type, "distances");
|
||||
nir_store_var(b, distances, nir_imm_vec2(b, INFINITY, INFINITY), 0xf);
|
||||
|
||||
nir_variable *child_indices =
|
||||
nir_variable_create(b->shader, nir_var_shader_temp, uvec2_type, "child_indices");
|
||||
nir_store_var(b, child_indices, nir_imm_ivec2(b, 0xffffffffu, 0xffffffffu), 0xf);
|
||||
|
||||
inv_dir = nir_bcsel(b, nir_feq_imm(b, dir, 0), nir_imm_float(b, FLT_MAX), inv_dir);
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
const uint32_t child_offset = offsetof(struct lvp_bvh_box_node, children[i]);
|
||||
const uint32_t coord_offsets[2] = {
|
||||
offsetof(struct lvp_bvh_box_node, bounds[i].min.x),
|
||||
offsetof(struct lvp_bvh_box_node, bounds[i].max.x),
|
||||
};
|
||||
|
||||
nir_def *child_index =
|
||||
nir_build_load_global(b, 1, 32, nir_iadd_imm(b, node_addr, child_offset));
|
||||
|
||||
nir_def *node_coords[2] = {
|
||||
nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0])),
|
||||
nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1])),
|
||||
};
|
||||
|
||||
/* If x of the aabb min is NaN, then this is an inactive aabb.
|
||||
* We don't need to care about any other components being NaN as that is UB.
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap36.html#VkAabbPositionsKHR
|
||||
*/
|
||||
nir_def *min_x = nir_channel(b, node_coords[0], 0);
|
||||
nir_def *min_x_is_not_nan =
|
||||
nir_inot(b, nir_fneu(b, min_x, min_x)); /* NaN != NaN -> true */
|
||||
|
||||
nir_def *bound0 = nir_fmul(b, nir_fsub(b, node_coords[0], origin), inv_dir);
|
||||
nir_def *bound1 = nir_fmul(b, nir_fsub(b, node_coords[1], origin), inv_dir);
|
||||
|
||||
nir_def *tmin =
|
||||
nir_fmax(b,
|
||||
nir_fmax(b, nir_fmin(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
|
||||
nir_fmin(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
|
||||
nir_fmin(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
|
||||
|
||||
nir_def *tmax =
|
||||
nir_fmin(b,
|
||||
nir_fmin(b, nir_fmax(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
|
||||
nir_fmax(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
|
||||
nir_fmax(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
|
||||
|
||||
nir_push_if(b,
|
||||
nir_iand(b, min_x_is_not_nan,
|
||||
nir_iand(b, nir_fge(b, tmax, nir_fmax(b, nir_imm_float(b, 0.0f), tmin)),
|
||||
nir_flt(b, tmin, ray_tmax))));
|
||||
{
|
||||
nir_def *new_child_indices[2] = {child_index, child_index};
|
||||
nir_store_var(b, child_indices, nir_vec(b, new_child_indices, 2), 1u << i);
|
||||
|
||||
nir_def *new_distances[2] = {tmin, tmin};
|
||||
nir_store_var(b, distances, nir_vec(b, new_distances, 2), 1u << i);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
nir_def *ssa_distances = nir_load_var(b, distances);
|
||||
nir_def *ssa_indices = nir_load_var(b, child_indices);
|
||||
nir_push_if(b, nir_flt(b, nir_channel(b, ssa_distances, 1), nir_channel(b, ssa_distances, 0)));
|
||||
{
|
||||
nir_store_var(b, child_indices,
|
||||
nir_vec2(b, nir_channel(b, ssa_indices, 0), nir_channel(b, ssa_indices, 1)),
|
||||
0b11);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
return nir_load_var(b, child_indices);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
lvp_build_intersect_ray_tri(nir_builder *b, nir_def *node_addr, nir_def *ray_tmax,
|
||||
nir_def *origin, nir_def *dir, nir_def *inv_dir)
|
||||
{
|
||||
const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4);
|
||||
|
||||
const uint32_t coord_offsets[3] = {
|
||||
offsetof(struct lvp_bvh_triangle_node, coords[0]),
|
||||
offsetof(struct lvp_bvh_triangle_node, coords[1]),
|
||||
offsetof(struct lvp_bvh_triangle_node, coords[2]),
|
||||
};
|
||||
|
||||
nir_def *node_coords[3] = {
|
||||
nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0])),
|
||||
nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1])),
|
||||
nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[2])),
|
||||
};
|
||||
|
||||
nir_variable *result = nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "result");
|
||||
nir_store_var(b, result, nir_imm_vec4(b, INFINITY, 1.0f, 0.0f, 0.0f), 0xf);
|
||||
|
||||
/* Based on watertight Ray/Triangle intersection from
|
||||
* http://jcgt.org/published/0002/01/05/paper.pdf */
|
||||
|
||||
/* Calculate the dimension where the ray direction is largest */
|
||||
nir_def *abs_dir = nir_fabs(b, dir);
|
||||
|
||||
nir_def *abs_dirs[3] = {
|
||||
nir_channel(b, abs_dir, 0),
|
||||
nir_channel(b, abs_dir, 1),
|
||||
nir_channel(b, abs_dir, 2),
|
||||
};
|
||||
/* Find index of greatest value of abs_dir and put that as kz. */
|
||||
nir_def *kz = nir_bcsel(
|
||||
b, nir_fge(b, abs_dirs[0], abs_dirs[1]),
|
||||
nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[2]), nir_imm_int(b, 0), nir_imm_int(b, 2)),
|
||||
nir_bcsel(b, nir_fge(b, abs_dirs[1], abs_dirs[2]), nir_imm_int(b, 1), nir_imm_int(b, 2)));
|
||||
nir_def *kx = nir_imod_imm(b, nir_iadd_imm(b, kz, 1), 3);
|
||||
nir_def *ky = nir_imod_imm(b, nir_iadd_imm(b, kx, 1), 3);
|
||||
nir_def *k_indices[3] = {kx, ky, kz};
|
||||
nir_def *k = nir_vec(b, k_indices, 3);
|
||||
|
||||
/* Swap kx and ky dimensions to preserve winding order */
|
||||
unsigned swap_xy_swizzle[4] = {1, 0, 2, 3};
|
||||
k = nir_bcsel(b, nir_flt_imm(b, nir_vector_extract(b, dir, kz), 0.0f),
|
||||
nir_swizzle(b, k, swap_xy_swizzle, 3), k);
|
||||
|
||||
kx = nir_channel(b, k, 0);
|
||||
ky = nir_channel(b, k, 1);
|
||||
kz = nir_channel(b, k, 2);
|
||||
|
||||
/* Calculate shear constants */
|
||||
nir_def *sz = nir_frcp(b, nir_vector_extract(b, dir, kz));
|
||||
nir_def *sx = nir_fmul(b, nir_vector_extract(b, dir, kx), sz);
|
||||
nir_def *sy = nir_fmul(b, nir_vector_extract(b, dir, ky), sz);
|
||||
|
||||
/* Calculate vertices relative to ray origin */
|
||||
nir_def *v_a = nir_fsub(b, node_coords[0], origin);
|
||||
nir_def *v_b = nir_fsub(b, node_coords[1], origin);
|
||||
nir_def *v_c = nir_fsub(b, node_coords[2], origin);
|
||||
|
||||
/* Perform shear and scale */
|
||||
nir_def *ax =
|
||||
nir_fsub(b, nir_vector_extract(b, v_a, kx), nir_fmul(b, sx, nir_vector_extract(b, v_a, kz)));
|
||||
nir_def *ay =
|
||||
nir_fsub(b, nir_vector_extract(b, v_a, ky), nir_fmul(b, sy, nir_vector_extract(b, v_a, kz)));
|
||||
nir_def *bx =
|
||||
nir_fsub(b, nir_vector_extract(b, v_b, kx), nir_fmul(b, sx, nir_vector_extract(b, v_b, kz)));
|
||||
nir_def *by =
|
||||
nir_fsub(b, nir_vector_extract(b, v_b, ky), nir_fmul(b, sy, nir_vector_extract(b, v_b, kz)));
|
||||
nir_def *cx =
|
||||
nir_fsub(b, nir_vector_extract(b, v_c, kx), nir_fmul(b, sx, nir_vector_extract(b, v_c, kz)));
|
||||
nir_def *cy =
|
||||
nir_fsub(b, nir_vector_extract(b, v_c, ky), nir_fmul(b, sy, nir_vector_extract(b, v_c, kz)));
|
||||
|
||||
ax = nir_f2f64(b, ax);
|
||||
ay = nir_f2f64(b, ay);
|
||||
bx = nir_f2f64(b, bx);
|
||||
by = nir_f2f64(b, by);
|
||||
cx = nir_f2f64(b, cx);
|
||||
cy = nir_f2f64(b, cy);
|
||||
|
||||
nir_def *u = nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx));
|
||||
nir_def *v = nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx));
|
||||
nir_def *w = nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax));
|
||||
|
||||
/* Perform edge tests. */
|
||||
nir_def *cond_back = nir_ior(b, nir_ior(b, nir_flt_imm(b, u, 0.0f), nir_flt_imm(b, v, 0.0f)),
|
||||
nir_flt_imm(b, w, 0.0f));
|
||||
|
||||
nir_def *cond_front = nir_ior(
|
||||
b, nir_ior(b, nir_fgt_imm(b, u, 0.0f), nir_fgt_imm(b, v, 0.0f)), nir_fgt_imm(b, w, 0.0f));
|
||||
|
||||
nir_def *cond = nir_inot(b, nir_iand(b, cond_back, cond_front));
|
||||
|
||||
nir_push_if(b, cond);
|
||||
{
|
||||
nir_def *det = nir_fadd(b, u, nir_fadd(b, v, w));
|
||||
|
||||
sz = nir_f2f64(b, sz);
|
||||
|
||||
v_a = nir_f2f64(b, v_a);
|
||||
v_b = nir_f2f64(b, v_b);
|
||||
v_c = nir_f2f64(b, v_c);
|
||||
|
||||
nir_def *az = nir_fmul(b, sz, nir_vector_extract(b, v_a, kz));
|
||||
nir_def *bz = nir_fmul(b, sz, nir_vector_extract(b, v_b, kz));
|
||||
nir_def *cz = nir_fmul(b, sz, nir_vector_extract(b, v_c, kz));
|
||||
|
||||
nir_def *t =
|
||||
nir_fadd(b, nir_fadd(b, nir_fmul(b, u, az), nir_fmul(b, v, bz)), nir_fmul(b, w, cz));
|
||||
|
||||
nir_def *t_signed = nir_fmul(b, nir_fsign(b, det), t);
|
||||
|
||||
nir_def *det_cond_front = nir_inot(b, nir_flt_imm(b, t_signed, 0.0f));
|
||||
|
||||
nir_push_if(b, det_cond_front);
|
||||
{
|
||||
t = nir_f2f32(b, nir_fdiv(b, t, det));
|
||||
det = nir_f2f32(b, det);
|
||||
v = nir_fdiv(b, nir_f2f32(b, v), det);
|
||||
w = nir_fdiv(b, nir_f2f32(b, w), det);
|
||||
|
||||
nir_def *indices[4] = {t, det, v, w};
|
||||
nir_store_var(b, result, nir_vec(b, indices, 4), 0xf);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
return nir_load_var(b, result);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
lvp_build_vec3_mat_mult(nir_builder *b, nir_def *vec, nir_def *matrix[], bool translation)
|
||||
{
|
||||
nir_def *result_components[3] = {
|
||||
nir_channel(b, matrix[0], 3),
|
||||
nir_channel(b, matrix[1], 3),
|
||||
nir_channel(b, matrix[2], 3),
|
||||
};
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
for (unsigned j = 0; j < 3; ++j) {
|
||||
nir_def *v = nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[i], 1 << j));
|
||||
result_components[i] = (translation || j) ? nir_fadd(b, result_components[i], v) : v;
|
||||
}
|
||||
}
|
||||
return nir_vec(b, result_components, 3);
|
||||
}
|
||||
|
||||
static void
|
||||
lvp_build_wto_matrix_load(nir_builder *b, nir_def *instance_addr, nir_def **out)
|
||||
{
|
||||
unsigned offset = offsetof(struct lvp_bvh_instance_node, wto_matrix);
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
out[i] = nir_build_load_global(b, 4, 32, nir_iadd_imm(b, instance_addr, offset + i * 16));
|
||||
}
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
lvp_build_hit_is_opaque(nir_builder *b, nir_def *sbt_offset_and_flags,
|
||||
const struct lvp_ray_flags *ray_flags, nir_def *geometry_id_and_flags)
|
||||
{
|
||||
nir_def *opaque = nir_uge_imm(b, nir_ior(b, geometry_id_and_flags, sbt_offset_and_flags),
|
||||
LVP_INSTANCE_FORCE_OPAQUE | LVP_INSTANCE_NO_FORCE_NOT_OPAQUE);
|
||||
opaque = nir_bcsel(b, ray_flags->force_opaque, nir_imm_true(b), opaque);
|
||||
opaque = nir_bcsel(b, ray_flags->force_not_opaque, nir_imm_false(b), opaque);
|
||||
return opaque;
|
||||
}
|
||||
|
||||
static void
|
||||
lvp_build_triangle_case(nir_builder *b, const struct lvp_ray_traversal_args *args,
|
||||
const struct lvp_ray_flags *ray_flags, nir_def *result,
|
||||
nir_def *node_addr)
|
||||
{
|
||||
if (!args->triangle_cb)
|
||||
return;
|
||||
|
||||
struct lvp_triangle_intersection intersection;
|
||||
intersection.t = nir_channel(b, result, 0);
|
||||
intersection.barycentrics = nir_channels(b, result, 0xc);
|
||||
|
||||
nir_push_if(b, nir_flt(b, intersection.t, nir_load_deref(b, args->vars.tmax)));
|
||||
{
|
||||
intersection.frontface = nir_fgt_imm(b, nir_channel(b, result, 1), 0);
|
||||
nir_def *switch_ccw = nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags),
|
||||
LVP_INSTANCE_TRIANGLE_FLIP_FACING);
|
||||
intersection.frontface = nir_ixor(b, intersection.frontface, switch_ccw);
|
||||
|
||||
nir_def *not_cull = ray_flags->no_skip_triangles;
|
||||
nir_def *not_facing_cull =
|
||||
nir_bcsel(b, intersection.frontface, ray_flags->no_cull_front, ray_flags->no_cull_back);
|
||||
|
||||
not_cull =
|
||||
nir_iand(b, not_cull,
|
||||
nir_ior(b, not_facing_cull,
|
||||
nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags),
|
||||
LVP_INSTANCE_TRIANGLE_FACING_CULL_DISABLE)));
|
||||
|
||||
nir_push_if(b, nir_iand(b, nir_flt(b, args->tmin, intersection.t), not_cull));
|
||||
{
|
||||
intersection.base.node_addr = node_addr;
|
||||
nir_def *triangle_info = nir_build_load_global(
|
||||
b, 2, 32,
|
||||
nir_iadd_imm(b, intersection.base.node_addr,
|
||||
offsetof(struct lvp_bvh_triangle_node, primitive_id)));
|
||||
intersection.base.primitive_id = nir_channel(b, triangle_info, 0);
|
||||
intersection.base.geometry_id_and_flags = nir_channel(b, triangle_info, 1);
|
||||
intersection.base.opaque =
|
||||
lvp_build_hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), ray_flags,
|
||||
intersection.base.geometry_id_and_flags);
|
||||
|
||||
not_cull = nir_bcsel(b, intersection.base.opaque, ray_flags->no_cull_opaque,
|
||||
ray_flags->no_cull_no_opaque);
|
||||
nir_push_if(b, not_cull);
|
||||
{
|
||||
args->triangle_cb(b, &intersection, args, ray_flags);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
lvp_build_aabb_case(nir_builder *b, const struct lvp_ray_traversal_args *args,
|
||||
const struct lvp_ray_flags *ray_flags, nir_def *node_addr)
|
||||
{
|
||||
if (!args->aabb_cb)
|
||||
return;
|
||||
|
||||
struct lvp_leaf_intersection intersection;
|
||||
intersection.node_addr = node_addr;
|
||||
nir_def *triangle_info = nir_build_load_global(
|
||||
b, 2, 32,
|
||||
nir_iadd_imm(b, intersection.node_addr, offsetof(struct lvp_bvh_aabb_node, primitive_id)));
|
||||
intersection.primitive_id = nir_channel(b, triangle_info, 0);
|
||||
intersection.geometry_id_and_flags = nir_channel(b, triangle_info, 1);
|
||||
intersection.opaque = lvp_build_hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags),
|
||||
ray_flags, intersection.geometry_id_and_flags);
|
||||
|
||||
nir_def *not_cull =
|
||||
nir_bcsel(b, intersection.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque);
|
||||
not_cull = nir_iand(b, not_cull, ray_flags->no_skip_aabbs);
|
||||
nir_push_if(b, not_cull);
|
||||
{
|
||||
args->aabb_cb(b, &intersection, args);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
lvp_build_push_stack(nir_builder *b, const struct lvp_ray_traversal_args *args, nir_def *node)
|
||||
{
|
||||
nir_def *stack_ptr = nir_load_deref(b, args->vars.stack_ptr);
|
||||
nir_store_deref(b, nir_build_deref_array(b, args->vars.stack, stack_ptr), node, 0x1);
|
||||
nir_store_deref(b, args->vars.stack_ptr, nir_iadd_imm(b, nir_load_deref(b, args->vars.stack_ptr), 1), 0x1);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
lvp_build_pop_stack(nir_builder *b, const struct lvp_ray_traversal_args *args)
|
||||
{
|
||||
nir_def *stack_ptr = nir_iadd_imm(b, nir_load_deref(b, args->vars.stack_ptr), -1);
|
||||
nir_store_deref(b, args->vars.stack_ptr, stack_ptr, 0x1);
|
||||
return nir_load_deref(b, nir_build_deref_array(b, args->vars.stack, stack_ptr));
|
||||
}
|
||||
|
||||
nir_def *
|
||||
lvp_build_ray_traversal(nir_builder *b, const struct lvp_ray_traversal_args *args)
|
||||
{
|
||||
nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
|
||||
nir_store_var(b, incomplete, nir_imm_true(b), 0x1);
|
||||
|
||||
nir_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0);
|
||||
|
||||
struct lvp_ray_flags ray_flags = {
|
||||
.force_opaque = nir_test_mask(b, args->flags, SpvRayFlagsOpaqueKHRMask),
|
||||
.force_not_opaque = nir_test_mask(b, args->flags, SpvRayFlagsNoOpaqueKHRMask),
|
||||
.terminate_on_first_hit =
|
||||
nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask),
|
||||
.no_cull_front = nir_ieq_imm(
|
||||
b, nir_iand_imm(b, args->flags, SpvRayFlagsCullFrontFacingTrianglesKHRMask), 0),
|
||||
.no_cull_back =
|
||||
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullBackFacingTrianglesKHRMask), 0),
|
||||
.no_cull_opaque =
|
||||
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullOpaqueKHRMask), 0),
|
||||
.no_cull_no_opaque =
|
||||
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullNoOpaqueKHRMask), 0),
|
||||
.no_skip_triangles =
|
||||
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipTrianglesKHRMask), 0),
|
||||
.no_skip_aabbs = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipAABBsKHRMask), 0),
|
||||
};
|
||||
|
||||
nir_push_loop(b);
|
||||
{
|
||||
nir_push_if(b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), LVP_BVH_INVALID_NODE));
|
||||
{
|
||||
nir_push_if(b, nir_ieq_imm(b, nir_load_deref(b, args->vars.stack_ptr), 0));
|
||||
{
|
||||
nir_store_var(b, incomplete, nir_imm_false(b), 0x1);
|
||||
nir_jump(b, nir_jump_break);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_push_if(b, nir_ige(b, nir_load_deref(b, args->vars.stack_base), nir_load_deref(b, args->vars.stack_ptr)));
|
||||
{
|
||||
nir_store_deref(b, args->vars.stack_base, nir_imm_int(b, -1), 1);
|
||||
|
||||
nir_store_deref(b, args->vars.bvh_base, args->root_bvh_base, 1);
|
||||
nir_store_deref(b, args->vars.origin, args->origin, 7);
|
||||
nir_store_deref(b, args->vars.dir, args->dir, 7);
|
||||
nir_store_deref(b, args->vars.inv_dir, nir_fdiv(b, vec3ones, args->dir), 7);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_store_deref(b, args->vars.current_node, lvp_build_pop_stack(b, args), 0x1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_def *bvh_node = nir_load_deref(b, args->vars.current_node);
|
||||
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, LVP_BVH_INVALID_NODE), 0x1);
|
||||
|
||||
nir_def *node_addr = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, nir_iand_imm(b, bvh_node, ~3u)));
|
||||
|
||||
nir_def *node_type = nir_iand_imm(b, bvh_node, 3);
|
||||
nir_push_if(b, nir_uge_imm(b, node_type, lvp_bvh_node_internal));
|
||||
{
|
||||
nir_push_if(b, nir_uge_imm(b, node_type, lvp_bvh_node_instance));
|
||||
{
|
||||
nir_push_if(b, nir_ieq_imm(b, node_type, lvp_bvh_node_aabb));
|
||||
{
|
||||
lvp_build_aabb_case(b, args, &ray_flags, node_addr);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
/* instance */
|
||||
nir_store_deref(b, args->vars.instance_addr, node_addr, 1);
|
||||
|
||||
nir_def *instance_data = nir_build_load_global(
|
||||
b, 4, 32,
|
||||
nir_iadd_imm(b, node_addr, offsetof(struct lvp_bvh_instance_node, bvh_ptr)));
|
||||
|
||||
nir_def *wto_matrix[3];
|
||||
lvp_build_wto_matrix_load(b, node_addr, wto_matrix);
|
||||
|
||||
nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3),
|
||||
1);
|
||||
|
||||
nir_def *instance_and_mask = nir_channel(b, instance_data, 2);
|
||||
nir_push_if(b, nir_ult(b, nir_iand(b, instance_and_mask, args->cull_mask),
|
||||
nir_imm_int(b, 1 << 24)));
|
||||
{
|
||||
nir_jump(b, nir_jump_continue);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_store_deref(b, args->vars.bvh_base,
|
||||
nir_pack_64_2x32(b, nir_trim_vector(b, instance_data, 2)), 1);
|
||||
|
||||
nir_store_deref(b, args->vars.stack_base, nir_load_deref(b, args->vars.stack_ptr), 0x1);
|
||||
|
||||
/* Push the instance root node onto the stack */
|
||||
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, LVP_BVH_ROOT_NODE), 0x1);
|
||||
|
||||
/* Transform the ray into object space */
|
||||
nir_store_deref(b, args->vars.origin,
|
||||
lvp_build_vec3_mat_mult(b, args->origin, wto_matrix, true), 7);
|
||||
nir_store_deref(b, args->vars.dir,
|
||||
lvp_build_vec3_mat_mult(b, args->dir, wto_matrix, false), 7);
|
||||
nir_store_deref(b, args->vars.inv_dir,
|
||||
nir_fdiv(b, vec3ones, nir_load_deref(b, args->vars.dir)), 7);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_def *result = lvp_build_intersect_ray_box(
|
||||
b, node_addr, nir_load_deref(b, args->vars.tmax),
|
||||
nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir),
|
||||
nir_load_deref(b, args->vars.inv_dir));
|
||||
|
||||
nir_store_deref(b, args->vars.current_node, nir_channel(b, result, 0), 0x1);
|
||||
|
||||
nir_push_if(b, nir_ine_imm(b, nir_channel(b, result, 1), LVP_BVH_INVALID_NODE));
|
||||
{
|
||||
lvp_build_push_stack(b, args, nir_channel(b, result, 1));
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_def *result = lvp_build_intersect_ray_tri(
|
||||
b, node_addr, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),
|
||||
nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir));
|
||||
|
||||
lvp_build_triangle_case(b, args, &ray_flags, result, node_addr);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_pop_loop(b, NULL);
|
||||
|
||||
return nir_load_var(b, incomplete);
|
||||
}
|
||||
103
src/gallium/frontends/lavapipe/lvp_nir_ray_tracing.h
Normal file
103
src/gallium/frontends/lavapipe/lvp_nir_ray_tracing.h
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
* Copyright © 2021 Google
|
||||
* Copyright © 2023 Valve Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef LVP_NIR_RAY_TRACING_H
|
||||
#define LVP_NIR_RAY_TRACING_H
|
||||
|
||||
#include "nir/nir.h"
|
||||
#include "nir/nir_builder.h"
|
||||
|
||||
struct lvp_ray_traversal_args;
|
||||
|
||||
struct lvp_ray_flags {
|
||||
nir_def *force_opaque;
|
||||
nir_def *force_not_opaque;
|
||||
nir_def *terminate_on_first_hit;
|
||||
nir_def *no_cull_front;
|
||||
nir_def *no_cull_back;
|
||||
nir_def *no_cull_opaque;
|
||||
nir_def *no_cull_no_opaque;
|
||||
nir_def *no_skip_triangles;
|
||||
nir_def *no_skip_aabbs;
|
||||
};
|
||||
|
||||
struct lvp_leaf_intersection {
|
||||
nir_def *node_addr;
|
||||
nir_def *primitive_id;
|
||||
nir_def *geometry_id_and_flags;
|
||||
nir_def *opaque;
|
||||
};
|
||||
|
||||
typedef void (*lvp_aabb_intersection_cb)(nir_builder *b, struct lvp_leaf_intersection *intersection,
|
||||
const struct lvp_ray_traversal_args *args);
|
||||
|
||||
struct lvp_triangle_intersection {
|
||||
struct lvp_leaf_intersection base;
|
||||
|
||||
nir_def *t;
|
||||
nir_def *frontface;
|
||||
nir_def *barycentrics;
|
||||
};
|
||||
|
||||
typedef void (*lvp_triangle_intersection_cb)(nir_builder *b,
|
||||
struct lvp_triangle_intersection *intersection,
|
||||
const struct lvp_ray_traversal_args *args,
|
||||
const struct lvp_ray_flags *ray_flags);
|
||||
|
||||
struct lvp_ray_traversal_vars {
|
||||
/* For each accepted hit, tmax will be set to the t value. This allows for automatic intersection
|
||||
* culling.
|
||||
*/
|
||||
nir_deref_instr *tmax;
|
||||
|
||||
/* Those variables change when entering and exiting BLASes. */
|
||||
nir_deref_instr *origin;
|
||||
nir_deref_instr *dir;
|
||||
nir_deref_instr *inv_dir;
|
||||
|
||||
/* The base address of the current TLAS/BLAS. */
|
||||
nir_deref_instr *bvh_base;
|
||||
|
||||
nir_deref_instr *current_node;
|
||||
|
||||
nir_deref_instr *stack_base;
|
||||
nir_deref_instr *stack_ptr;
|
||||
nir_deref_instr *stack;
|
||||
|
||||
/* Information about the current instance used for culling. */
|
||||
nir_deref_instr *instance_addr;
|
||||
nir_deref_instr *sbt_offset_and_flags;
|
||||
};
|
||||
|
||||
struct lvp_ray_traversal_args {
|
||||
nir_def *root_bvh_base;
|
||||
nir_def *flags;
|
||||
nir_def *cull_mask;
|
||||
nir_def *origin;
|
||||
nir_def *tmin;
|
||||
nir_def *dir;
|
||||
|
||||
struct lvp_ray_traversal_vars vars;
|
||||
|
||||
/* The increment/decrement used for radv_ray_traversal_vars::stack, and how many entries are
|
||||
* available. stack_base is the base address of the stack. */
|
||||
uint32_t stack_stride;
|
||||
uint32_t stack_entries;
|
||||
uint32_t stack_base;
|
||||
|
||||
lvp_aabb_intersection_cb aabb_cb;
|
||||
lvp_triangle_intersection_cb triangle_cb;
|
||||
|
||||
void *data;
|
||||
};
|
||||
|
||||
/* Builds the ray traversal loop and returns whether traversal is incomplete, similar to
|
||||
* rayQueryProceedEXT. Traversal will only be considered incomplete, if one of the specified
|
||||
* callbacks breaks out of the traversal loop.
|
||||
*/
|
||||
nir_def *lvp_build_ray_traversal(nir_builder *b, const struct lvp_ray_traversal_args *args);
|
||||
|
||||
#endif
|
||||
|
|
@ -25,6 +25,8 @@ liblvp_files = files(
|
|||
'lvp_lower_vulkan_resource.c',
|
||||
'lvp_lower_vulkan_resource.h',
|
||||
'lvp_lower_input_attachments.c',
|
||||
'lvp_nir_ray_tracing.c',
|
||||
'lvp_nir_ray_tracing.h',
|
||||
'lvp_pipe_sync.c',
|
||||
'lvp_pipeline.c',
|
||||
'lvp_pipeline_cache.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue