diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build index 53bcfd43710..7837f622a9a 100644 --- a/src/amd/vulkan/meson.build +++ b/src/amd/vulkan/meson.build @@ -69,6 +69,7 @@ libradv_files = files( 'radv_meta_resolve.c', 'radv_meta_resolve_cs.c', 'radv_meta_resolve_fs.c', + 'radv_nir_lower_ray_queries.c', 'radv_nir_lower_ycbcr_textures.c', 'radv_pass.c', 'radv_perfcounter.c', diff --git a/src/amd/vulkan/radv_nir_lower_ray_queries.c b/src/amd/vulkan/radv_nir_lower_ray_queries.c new file mode 100644 index 00000000000..894abcc25ca --- /dev/null +++ b/src/amd/vulkan/radv_nir_lower_ray_queries.c @@ -0,0 +1,983 @@ +/* + * Copyright © 2022 Konstantin Seurer + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir/nir.h" +#include "nir/nir_builder.h" + +#include "util/hash_table.h" + +#include "radv_acceleration_structure.h" +#include "radv_debug.h" +#include "radv_private.h" +#include "radv_rt_common.h" +#include "radv_shader.h" + +typedef struct { + nir_variable *variable; + unsigned array_length; +} rq_variable; + +static rq_variable * +rq_variable_create(nir_shader *shader, nir_function_impl *impl, unsigned array_length, + const struct glsl_type *type, const char *name) +{ + rq_variable *result = ralloc(shader ? (void *)shader : (void *)impl, rq_variable); + result->array_length = array_length; + + const struct glsl_type *variable_type = type; + if (array_length != 1) + variable_type = glsl_array_type(type, array_length, glsl_get_explicit_stride(type)); + + if (shader) { + result->variable = nir_variable_create(shader, nir_var_shader_temp, variable_type, name); + } else { + result->variable = nir_local_variable_create(impl, variable_type, name); + } + + return result; +} + +static nir_ssa_def * +nir_load_array(nir_builder *b, nir_variable *array, nir_ssa_def *index) +{ + return nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, array), index)); +} + +static void +nir_store_array(nir_builder *b, nir_variable *array, nir_ssa_def *index, nir_ssa_def *value, + unsigned writemask) +{ + nir_store_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, array), index), value, + writemask); +} + +static nir_ssa_def * +rq_load_var(nir_builder *b, nir_ssa_def *index, rq_variable *var) +{ + if (var->array_length == 1) + return nir_load_var(b, var->variable); + + return nir_load_array(b, var->variable, index); +} + +static void +rq_store_var(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *value, + unsigned writemask) +{ + if (var->array_length == 1) { + nir_store_var(b, var->variable, value, writemask); + } else { + nir_store_array(b, var->variable, index, value, writemask); + } +} + +static void +rq_copy_var(nir_builder *b, nir_ssa_def *index, rq_variable *dst, rq_variable *src, unsigned mask) +{ + rq_store_var(b, index, dst, rq_load_var(b, index, src), mask); +} + +static nir_ssa_def * +rq_load_array(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *array_index) +{ + if (var->array_length == 1) + return nir_load_array(b, var->variable, array_index); + + return nir_load_deref( + b, + nir_build_deref_array( + b, nir_build_deref_array(b, nir_build_deref_var(b, var->variable), index), array_index)); +} + +static void +rq_store_array(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *array_index, + nir_ssa_def *value, unsigned writemask) +{ + if (var->array_length == 1) { + nir_store_array(b, var->variable, array_index, value, writemask); + } else { + nir_store_deref( + b, + nir_build_deref_array( + b, nir_build_deref_array(b, nir_build_deref_var(b, var->variable), index), array_index), + value, writemask); + } +} + +struct ray_query_traversal_vars { + rq_variable *origin; + rq_variable *direction; + + rq_variable *inv_dir; + rq_variable *bvh_base; + rq_variable *stack; + rq_variable *top_stack; +}; + +struct ray_query_intersection_vars { + rq_variable *primitive_id; + rq_variable *geometry_id_and_flags; + rq_variable *instance_id; + rq_variable *instance_addr; + rq_variable *intersection_type; + rq_variable *opaque; + rq_variable *frontface; + rq_variable *custom_instance_and_mask; + rq_variable *sbt_offset_and_flags; + rq_variable *barycentrics; + rq_variable *t; +}; + +struct ray_query_vars { + rq_variable *accel_struct; + rq_variable *flags; + rq_variable *cull_mask; + rq_variable *origin; + rq_variable *tmin; + rq_variable *direction; + + rq_variable *incomplete; + + struct ray_query_intersection_vars closest; + struct ray_query_intersection_vars candidate; + + struct ray_query_traversal_vars trav; + + rq_variable *stack; +}; + +#define VAR_NAME(name) \ + strcat(strcpy(ralloc_size(impl, strlen(base_name) + strlen(name) + 1), base_name), name) + +static struct ray_query_traversal_vars +init_ray_query_traversal_vars(nir_shader *shader, nir_function_impl *impl, unsigned array_length, + const char *base_name) +{ + struct ray_query_traversal_vars result; + + const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3); + + result.origin = rq_variable_create(shader, impl, array_length, vec3_type, VAR_NAME("_origin")); + result.direction = + rq_variable_create(shader, impl, array_length, vec3_type, VAR_NAME("_direction")); + + result.inv_dir = rq_variable_create(shader, impl, array_length, vec3_type, VAR_NAME("_inv_dir")); + result.bvh_base = + rq_variable_create(shader, impl, array_length, glsl_uint64_t_type(), VAR_NAME("_bvh_base")); + result.stack = + rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_stack")); + result.top_stack = + rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_top_stack")); + + return result; +} + +static struct ray_query_intersection_vars +init_ray_query_intersection_vars(nir_shader *shader, nir_function_impl *impl, unsigned array_length, + const char *base_name) +{ + struct ray_query_intersection_vars result; + + const struct glsl_type *vec2_type = glsl_vector_type(GLSL_TYPE_FLOAT, 2); + + result.primitive_id = + rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_primitive_id")); + result.geometry_id_and_flags = rq_variable_create(shader, impl, array_length, glsl_uint_type(), + VAR_NAME("_geometry_id_and_flags")); + result.instance_id = + rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_instance_id")); + result.instance_addr = rq_variable_create(shader, impl, array_length, glsl_uint64_t_type(), + VAR_NAME("_instance_addr")); + result.intersection_type = rq_variable_create(shader, impl, array_length, glsl_uint_type(), + VAR_NAME("_intersection_type")); + result.opaque = + rq_variable_create(shader, impl, array_length, glsl_bool_type(), VAR_NAME("_opaque")); + result.frontface = + rq_variable_create(shader, impl, array_length, glsl_bool_type(), VAR_NAME("_frontface")); + result.custom_instance_and_mask = rq_variable_create( + shader, impl, array_length, glsl_uint_type(), VAR_NAME("_custom_instance_and_mask")); + result.sbt_offset_and_flags = rq_variable_create(shader, impl, array_length, glsl_uint_type(), + VAR_NAME("_sbt_offset_and_flags")); + result.barycentrics = + rq_variable_create(shader, impl, array_length, vec2_type, VAR_NAME("_barycentrics")); + result.t = rq_variable_create(shader, impl, array_length, glsl_float_type(), VAR_NAME("_t")); + + return result; +} + +static void +init_ray_query_vars(nir_shader *shader, nir_function_impl *impl, unsigned array_length, + struct ray_query_vars *dst, const char *base_name) +{ + const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3); + + dst->accel_struct = rq_variable_create(shader, impl, array_length, glsl_uint64_t_type(), + VAR_NAME("_accel_struct")); + dst->flags = + rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_flags")); + dst->cull_mask = + rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_cull_mask")); + dst->origin = rq_variable_create(shader, impl, array_length, vec3_type, VAR_NAME("_origin")); + dst->tmin = rq_variable_create(shader, impl, array_length, glsl_float_type(), VAR_NAME("_tmin")); + dst->direction = + rq_variable_create(shader, impl, array_length, vec3_type, VAR_NAME("_direction")); + + dst->incomplete = + rq_variable_create(shader, impl, array_length, glsl_bool_type(), VAR_NAME("_incomplete")); + + dst->closest = + init_ray_query_intersection_vars(shader, impl, array_length, VAR_NAME("_closest")); + dst->candidate = + init_ray_query_intersection_vars(shader, impl, array_length, VAR_NAME("_candidate")); + + dst->trav = init_ray_query_traversal_vars(shader, impl, array_length, VAR_NAME("_top")); + + dst->stack = rq_variable_create(shader, impl, array_length, + glsl_array_type(glsl_uint_type(), MAX_STACK_ENTRY_COUNT, + glsl_get_explicit_stride(glsl_uint_type())), + VAR_NAME("_stack")); +} + +#undef VAR_NAME + +static void +lower_ray_query(nir_shader *shader, nir_function_impl *impl, nir_variable *ray_query, + struct hash_table *ht) +{ + struct ray_query_vars *vars = ralloc(impl, struct ray_query_vars); + + unsigned array_length = 1; + if (glsl_type_is_array(ray_query->type)) + array_length = glsl_get_length(ray_query->type); + + init_ray_query_vars(shader, impl, array_length, vars, + ray_query->name == NULL ? "" : ray_query->name); + + _mesa_hash_table_insert(ht, ray_query, vars); +} + +static void +copy_candidate_to_closest(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars) +{ + rq_copy_var(b, index, vars->closest.barycentrics, vars->candidate.barycentrics, 0x3); + rq_copy_var(b, index, vars->closest.custom_instance_and_mask, + vars->candidate.custom_instance_and_mask, 0x1); + rq_copy_var(b, index, vars->closest.geometry_id_and_flags, vars->candidate.geometry_id_and_flags, + 0x1); + rq_copy_var(b, index, vars->closest.instance_addr, vars->candidate.instance_addr, 0x1); + rq_copy_var(b, index, vars->closest.instance_id, vars->candidate.instance_id, 0x1); + rq_copy_var(b, index, vars->closest.intersection_type, vars->candidate.intersection_type, 0x1); + rq_copy_var(b, index, vars->closest.opaque, vars->candidate.opaque, 0x1); + rq_copy_var(b, index, vars->closest.frontface, vars->candidate.frontface, 0x1); + rq_copy_var(b, index, vars->closest.sbt_offset_and_flags, vars->candidate.sbt_offset_and_flags, + 0x1); + rq_copy_var(b, index, vars->closest.primitive_id, vars->candidate.primitive_id, 0x1); + rq_copy_var(b, index, vars->closest.t, vars->candidate.t, 0x1); +} + +static void +insert_terminate_on_first_hit(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars, + bool break_on_terminate) +{ + nir_ssa_def *terminate_on_first_hit = + nir_ine(b, + nir_iand(b, rq_load_var(b, index, vars->flags), + nir_imm_int(b, 4 /* TerminateOnFirstHitKHR */)), + nir_imm_int(b, 0)); + nir_push_if(b, terminate_on_first_hit); + { + rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, false), 0x1); + if (break_on_terminate) + nir_jump(b, nir_jump_break); + } + nir_pop_if(b, NULL); +} + +static void +lower_rq_confirm_intersection(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, + struct ray_query_vars *vars) +{ + copy_candidate_to_closest(b, index, vars); + insert_terminate_on_first_hit(b, index, vars, false); +} + +static void +lower_rq_generate_intersection(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, + struct ray_query_vars *vars) +{ + nir_push_if(b, nir_iand(b, nir_flt(b, instr->src[1].ssa, rq_load_var(b, index, vars->closest.t)), + nir_fge(b, instr->src[1].ssa, rq_load_var(b, index, vars->tmin)))); + { + copy_candidate_to_closest(b, index, vars); + insert_terminate_on_first_hit(b, index, vars, false); + rq_store_var(b, index, vars->closest.t, instr->src[1].ssa, 0x1); + } + nir_pop_if(b, NULL); +} + +enum rq_intersection_type { + intersection_type_none, + intersection_type_triangle, + intersection_type_aabb +}; + +static void +lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, + struct ray_query_vars *vars) +{ + rq_store_var(b, index, vars->accel_struct, instr->src[1].ssa, 0x1); + rq_store_var(b, index, vars->flags, instr->src[2].ssa, 0x1); + rq_store_var(b, index, vars->cull_mask, nir_iand(b, instr->src[3].ssa, nir_imm_int(b, 0xff)), + 0x1); + + rq_store_var(b, index, vars->origin, instr->src[4].ssa, 0x7); + rq_store_var(b, index, vars->trav.origin, instr->src[4].ssa, 0x7); + + rq_store_var(b, index, vars->tmin, instr->src[5].ssa, 0x1); + + rq_store_var(b, index, vars->direction, instr->src[6].ssa, 0x7); + rq_store_var(b, index, vars->trav.direction, instr->src[6].ssa, 0x7); + + nir_ssa_def *vec3ones = nir_channels(b, nir_imm_vec4(b, 1.0, 1.0, 1.0, 1.0), 0x7); + rq_store_var(b, index, vars->trav.inv_dir, nir_fdiv(b, vec3ones, instr->src[6].ssa), 0x7); + + rq_store_var(b, index, vars->closest.t, instr->src[7].ssa, 0x1); + rq_store_var(b, index, vars->closest.intersection_type, nir_imm_int(b, intersection_type_none), + 0x1); + + nir_ssa_def *accel_struct = rq_load_var(b, index, vars->accel_struct); + + nir_push_if(b, nir_ine(b, accel_struct, nir_imm_int64(b, 0))); + { + rq_store_var(b, index, vars->trav.bvh_base, build_addr_to_node(b, accel_struct), 1); + + nir_ssa_def *bvh_root = + nir_build_load_global(b, 1, 32, accel_struct, .access = ACCESS_NON_WRITEABLE, + .align_mul = 64, .align_offset = 0); + + rq_store_var(b, index, vars->trav.stack, nir_imm_int(b, 1), 0x1); + rq_store_array(b, index, vars->stack, nir_imm_int(b, 0), bvh_root, 0x1); + + rq_store_var(b, index, vars->trav.top_stack, nir_imm_int(b, 0), 1); + + rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, true), 0x1); + } + nir_push_else(b, NULL); + { + rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, false), 0x1); + } + nir_pop_if(b, NULL); +} + +static nir_ssa_def * +lower_rq_load(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars, + nir_ssa_def *committed, nir_ray_query_value value, unsigned column) +{ + switch (value) { + case nir_ray_query_value_flags: + return rq_load_var(b, index, vars->flags); + case nir_ray_query_value_intersection_barycentrics: + return nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.barycentrics), + rq_load_var(b, index, vars->candidate.barycentrics)); + case nir_ray_query_value_intersection_candidate_aabb_opaque: + return nir_iand(b, rq_load_var(b, index, vars->candidate.opaque), + nir_ieq(b, rq_load_var(b, index, vars->candidate.intersection_type), + nir_imm_int(b, intersection_type_aabb))); + case nir_ray_query_value_intersection_front_face: + return nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.frontface), + rq_load_var(b, index, vars->candidate.frontface)); + case nir_ray_query_value_intersection_geometry_index: + return nir_iand( + b, + nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.geometry_id_and_flags), + rq_load_var(b, index, vars->candidate.geometry_id_and_flags)), + nir_imm_int(b, 0xFFFFFF)); + case nir_ray_query_value_intersection_instance_custom_index: + return nir_iand( + b, + nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.custom_instance_and_mask), + rq_load_var(b, index, vars->candidate.custom_instance_and_mask)), + nir_imm_int(b, 0xFFFFFF)); + case nir_ray_query_value_intersection_instance_id: + return nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.instance_id), + rq_load_var(b, index, vars->candidate.instance_id)); + case nir_ray_query_value_intersection_instance_sbt_index: + return nir_iand( + b, + nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.sbt_offset_and_flags), + rq_load_var(b, index, vars->candidate.sbt_offset_and_flags)), + nir_imm_int(b, 0xFFFFFF)); + case nir_ray_query_value_intersection_object_ray_direction: { + nir_ssa_def *instance_node_addr = + nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.instance_addr), + rq_load_var(b, index, vars->candidate.instance_addr)); + nir_ssa_def *wto_matrix[3]; + nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix); + return nir_build_vec3_mat_mult(b, rq_load_var(b, index, vars->direction), wto_matrix, false); + } + case nir_ray_query_value_intersection_object_ray_origin: { + nir_ssa_def *instance_node_addr = + nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.instance_addr), + rq_load_var(b, index, vars->candidate.instance_addr)); + nir_ssa_def *wto_matrix[] = { + nir_build_load_global(b, 4, 32, nir_iadd(b, instance_node_addr, nir_imm_int64(b, 16)), + .align_mul = 64, .align_offset = 16), + nir_build_load_global(b, 4, 32, nir_iadd(b, instance_node_addr, nir_imm_int64(b, 32)), + .align_mul = 64, .align_offset = 32), + nir_build_load_global(b, 4, 32, nir_iadd(b, instance_node_addr, nir_imm_int64(b, 48)), + .align_mul = 64, .align_offset = 48)}; + return nir_build_vec3_mat_mult_pre(b, rq_load_var(b, index, vars->origin), wto_matrix); + } + case nir_ray_query_value_intersection_object_to_world: { + nir_ssa_def *instance_node_addr = + nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.instance_addr), + rq_load_var(b, index, vars->candidate.instance_addr)); + + if (column == 3) { + nir_ssa_def *wto_matrix[3]; + nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix); + + nir_ssa_def *vals[3]; + for (unsigned i = 0; i < 3; ++i) + vals[i] = nir_channel(b, wto_matrix[i], column); + + return nir_vec(b, vals, 3); + } + + return nir_build_load_global( + b, 3, 32, nir_iadd(b, instance_node_addr, nir_imm_int64(b, 92 + column * 12)), + .align_mul = 4, .align_offset = 0); + } + case nir_ray_query_value_intersection_primitive_index: + return nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.primitive_id), + rq_load_var(b, index, vars->candidate.primitive_id)); + case nir_ray_query_value_intersection_t: + return nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.t), + rq_load_var(b, index, vars->candidate.t)); + case nir_ray_query_value_intersection_type: + return nir_bcsel( + b, committed, rq_load_var(b, index, vars->closest.intersection_type), + nir_isub(b, rq_load_var(b, index, vars->candidate.intersection_type), nir_imm_int(b, 1))); + case nir_ray_query_value_intersection_world_to_object: { + nir_ssa_def *instance_node_addr = + nir_bcsel(b, committed, rq_load_var(b, index, vars->closest.instance_addr), + rq_load_var(b, index, vars->candidate.instance_addr)); + + nir_ssa_def *wto_matrix[3]; + nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix); + + nir_ssa_def *vals[3]; + for (unsigned i = 0; i < 3; ++i) + vals[i] = nir_channel(b, wto_matrix[i], column); + + if (column == 3) + return nir_fneg(b, nir_build_vec3_mat_mult(b, nir_vec(b, vals, 3), wto_matrix, false)); + + return nir_vec(b, vals, 3); + } + case nir_ray_query_value_tmin: + return rq_load_var(b, index, vars->tmin); + case nir_ray_query_value_world_ray_direction: + return rq_load_var(b, index, vars->direction); + case nir_ray_query_value_world_ray_origin: + return rq_load_var(b, index, vars->origin); + default: + unreachable("Invalid nir_ray_query_value!"); + } + + return NULL; +} + +static void +insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, nir_ssa_def *index, + nir_ssa_def *result, struct ray_query_vars *vars, + nir_ssa_def *bvh_node) +{ + nir_ssa_def *dist = nir_vector_extract(b, result, nir_imm_int(b, 0)); + nir_ssa_def *div = nir_vector_extract(b, result, nir_imm_int(b, 1)); + dist = nir_fdiv(b, dist, div); + nir_ssa_def *frontface = nir_flt(b, nir_imm_float(b, 0), div); + nir_ssa_def *switch_ccw = nir_ine( + b, + nir_iand( + b, rq_load_var(b, index, vars->candidate.sbt_offset_and_flags), + nir_imm_int(b, 2 << 24 /* VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT */)), + nir_imm_int(b, 0)); + frontface = nir_ixor(b, frontface, switch_ccw); + rq_store_var(b, index, vars->candidate.frontface, frontface, 0x1); + + nir_ssa_def *not_cull = nir_ieq(b, + nir_iand(b, rq_load_var(b, index, vars->flags), + nir_imm_int(b, 256 /* RayFlagsSkipTriangles */)), + nir_imm_int(b, 0)); + nir_ssa_def *not_facing_cull = nir_ieq( + b, + nir_iand(b, rq_load_var(b, index, vars->flags), + nir_bcsel(b, frontface, nir_imm_int(b, 32 /* RayFlagsCullFrontFacingTriangles */), + nir_imm_int(b, 16 /* RayFlagsCullBackFacingTriangles */))), + nir_imm_int(b, 0)); + + not_cull = nir_iand( + b, not_cull, + nir_ior( + b, not_facing_cull, + nir_ine( + b, + nir_iand( + b, rq_load_var(b, index, vars->candidate.sbt_offset_and_flags), + nir_imm_int(b, 1 << 24 /* VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT */)), + nir_imm_int(b, 0)))); + + nir_push_if(b, nir_iand(b, + nir_iand(b, nir_flt(b, dist, rq_load_var(b, index, vars->closest.t)), + nir_fge(b, dist, rq_load_var(b, index, vars->tmin))), + not_cull)); + { + nir_ssa_def *triangle_info = nir_build_load_global( + b, 2, 32, + nir_iadd(b, build_node_to_addr(device, b, bvh_node), + nir_imm_int64(b, offsetof(struct radv_bvh_triangle_node, triangle_id))), + .align_mul = 4, .align_offset = 0); + nir_ssa_def *primitive_id = nir_channel(b, triangle_info, 0); + nir_ssa_def *geometry_id_and_flags = nir_channel(b, triangle_info, 1); + nir_ssa_def *is_opaque = + hit_is_opaque(b, rq_load_var(b, index, vars->candidate.sbt_offset_and_flags), + rq_load_var(b, index, vars->flags), geometry_id_and_flags); + + not_cull = + nir_ieq(b, + nir_iand(b, rq_load_var(b, index, vars->flags), + nir_bcsel(b, is_opaque, nir_imm_int(b, 0x40), nir_imm_int(b, 0x80))), + nir_imm_int(b, 0)); + nir_push_if(b, not_cull); + { + nir_ssa_def *divs[2] = {div, div}; + nir_ssa_def *ij = nir_fdiv(b, nir_channels(b, result, 0xc), nir_vec(b, divs, 2)); + + rq_store_var(b, index, vars->candidate.barycentrics, ij, 3); + rq_store_var(b, index, vars->candidate.primitive_id, primitive_id, 1); + rq_store_var(b, index, vars->candidate.geometry_id_and_flags, geometry_id_and_flags, 1); + rq_store_var(b, index, vars->candidate.t, dist, 0x1); + rq_store_var(b, index, vars->candidate.opaque, is_opaque, 0x1); + rq_store_var(b, index, vars->candidate.intersection_type, + nir_imm_int(b, intersection_type_triangle), 0x1); + + nir_push_if(b, is_opaque); + { + copy_candidate_to_closest(b, index, vars); + insert_terminate_on_first_hit(b, index, vars, true); + } + nir_push_else(b, NULL); + { + nir_jump(b, nir_jump_break); + } + nir_pop_if(b, NULL); + } + nir_pop_if(b, NULL); + } + nir_pop_if(b, NULL); +} + +static void +insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, nir_ssa_def *index, + struct ray_query_vars *vars, nir_ssa_def *bvh_node) +{ + nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node); + nir_ssa_def *triangle_info = nir_build_load_global( + b, 2, 32, nir_iadd(b, node_addr, nir_imm_int64(b, 24)), .align_mul = 4, .align_offset = 0); + nir_ssa_def *primitive_id = nir_channel(b, triangle_info, 0); + nir_ssa_def *geometry_id_and_flags = nir_channel(b, triangle_info, 1); + nir_ssa_def *is_opaque = + hit_is_opaque(b, rq_load_var(b, index, vars->candidate.sbt_offset_and_flags), + rq_load_var(b, index, vars->flags), geometry_id_and_flags); + + nir_ssa_def *not_skip_aabb = nir_ieq( + b, + nir_iand(b, rq_load_var(b, index, vars->flags), nir_imm_int(b, 512 /* RayFlagsSkipAABB */)), + nir_imm_int(b, 0)); + nir_ssa_def *not_cull = nir_iand( + b, not_skip_aabb, + nir_ieq(b, + nir_iand(b, rq_load_var(b, index, vars->flags), + nir_bcsel(b, is_opaque, nir_imm_int(b, 0x40), nir_imm_int(b, 0x80))), + nir_imm_int(b, 0))); + nir_push_if(b, not_cull); + { + nir_ssa_def *vec3_zero = nir_channels(b, nir_imm_vec4(b, 0, 0, 0, 0), 0x7); + nir_ssa_def *vec3_inf = + nir_channels(b, nir_imm_vec4(b, INFINITY, INFINITY, INFINITY, 0), 0x7); + + nir_ssa_def *bvh_lo = nir_build_load_global( + b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, 0)), .align_mul = 4, .align_offset = 0); + nir_ssa_def *bvh_hi = nir_build_load_global( + b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, 12)), .align_mul = 4, .align_offset = 0); + + bvh_lo = nir_fsub(b, bvh_lo, rq_load_var(b, index, vars->trav.origin)); + bvh_hi = nir_fsub(b, bvh_hi, rq_load_var(b, index, vars->trav.origin)); + nir_ssa_def *t_vec = + nir_fmin(b, nir_fmul(b, bvh_lo, rq_load_var(b, index, vars->trav.inv_dir)), + nir_fmul(b, bvh_hi, rq_load_var(b, index, vars->trav.inv_dir))); + nir_ssa_def *t2_vec = + nir_fmax(b, nir_fmul(b, bvh_lo, rq_load_var(b, index, vars->trav.inv_dir)), + nir_fmul(b, bvh_hi, rq_load_var(b, index, vars->trav.inv_dir))); + /* If we run parallel to one of the edges the range should be [0, inf) not [0,0] */ + t2_vec = nir_bcsel(b, nir_feq(b, rq_load_var(b, index, vars->trav.direction), vec3_zero), + vec3_inf, t2_vec); + + nir_ssa_def *t_min = nir_fmax(b, nir_channel(b, t_vec, 0), nir_channel(b, t_vec, 1)); + t_min = nir_fmax(b, t_min, nir_channel(b, t_vec, 2)); + + nir_ssa_def *t_max = nir_fmin(b, nir_channel(b, t2_vec, 0), nir_channel(b, t2_vec, 1)); + t_max = nir_fmin(b, t_max, nir_channel(b, t2_vec, 2)); + + nir_push_if(b, nir_iand(b, nir_flt(b, t_min, rq_load_var(b, index, vars->closest.t)), + nir_fge(b, t_max, rq_load_var(b, index, vars->tmin)))); + { + rq_store_var(b, index, vars->candidate.t, + nir_fmax(b, t_min, rq_load_var(b, index, vars->tmin)), 0x1); + rq_store_var(b, index, vars->candidate.primitive_id, primitive_id, 1); + rq_store_var(b, index, vars->candidate.geometry_id_and_flags, geometry_id_and_flags, 1); + rq_store_var(b, index, vars->candidate.opaque, is_opaque, 0x1); + rq_store_var(b, index, vars->candidate.intersection_type, + nir_imm_int(b, intersection_type_aabb), 0x1); + + nir_push_if(b, is_opaque); + { + copy_candidate_to_closest(b, index, vars); + } + nir_pop_if(b, NULL); + + nir_jump(b, nir_jump_break); + } + nir_pop_if(b, NULL); + } + nir_pop_if(b, NULL); +} + +static nir_ssa_def * +lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars, + struct radv_device *device) +{ + nir_push_if(b, rq_load_var(b, index, vars->incomplete)); + { + nir_ssa_def *desc = create_bvh_descriptor(b); + nir_ssa_def *vec3ones = nir_channels(b, nir_imm_vec4(b, 1.0, 1.0, 1.0, 1.0), 0x7); + + nir_push_loop(b); + { + nir_push_if(b, nir_uge(b, rq_load_var(b, index, vars->trav.top_stack), + rq_load_var(b, index, vars->trav.stack))); + { + nir_push_if(b, nir_ieq(b, rq_load_var(b, index, vars->trav.stack), nir_imm_int(b, 0))); + { + rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, false), 0x1); + nir_jump(b, nir_jump_break); + } + nir_pop_if(b, NULL); + + rq_store_var(b, index, vars->trav.top_stack, nir_imm_int(b, 0), 1); + rq_store_var(b, index, vars->trav.bvh_base, + build_addr_to_node(b, rq_load_var(b, index, vars->accel_struct)), 1); + rq_store_var(b, index, vars->trav.origin, rq_load_var(b, index, vars->origin), 7); + rq_store_var(b, index, vars->trav.direction, rq_load_var(b, index, vars->direction), 7); + rq_store_var(b, index, vars->trav.inv_dir, + nir_fdiv(b, vec3ones, rq_load_var(b, index, vars->direction)), 7); + } + nir_pop_if(b, NULL); + + rq_store_var(b, index, vars->trav.stack, + nir_isub(b, rq_load_var(b, index, vars->trav.stack), nir_imm_int(b, 1)), 1); + + nir_ssa_def *bvh_node = + rq_load_array(b, index, vars->stack, rq_load_var(b, index, vars->trav.stack)); + nir_ssa_def *bvh_node_type = bvh_node; + + bvh_node = + nir_iadd(b, rq_load_var(b, index, vars->trav.bvh_base), nir_u2u(b, bvh_node, 64)); + nir_ssa_def *intrinsic_result = NULL; + if (device->physical_device->rad_info.chip_class >= GFX10_3 && + !(device->instance->perftest_flags & RADV_PERFTEST_FORCE_EMULATE_RT)) { + intrinsic_result = nir_bvh64_intersect_ray_amd( + b, 32, desc, nir_unpack_64_2x32(b, bvh_node), rq_load_var(b, index, vars->closest.t), + rq_load_var(b, index, vars->trav.origin), + rq_load_var(b, index, vars->trav.direction), + rq_load_var(b, index, vars->trav.inv_dir)); + } + + /* if (node.type_flags & aabb) */ + nir_push_if(b, + nir_ine(b, nir_iand(b, bvh_node_type, nir_imm_int(b, 4)), nir_imm_int(b, 0))); + { + /* if (node.type_flags & leaf) */ + nir_push_if( + b, nir_ine(b, nir_iand(b, bvh_node_type, nir_imm_int(b, 2)), nir_imm_int(b, 0))); + { + /* custom */ + nir_push_if( + b, nir_ine(b, nir_iand(b, bvh_node_type, nir_imm_int(b, 1)), nir_imm_int(b, 0))); + { + insert_traversal_aabb_case(device, b, index, vars, bvh_node); + } + nir_push_else(b, NULL); + { + /* instance */ + nir_ssa_def *instance_node_addr = build_node_to_addr(device, b, bvh_node); + nir_ssa_def *instance_data = nir_build_load_global( + b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0); + nir_ssa_def *instance_and_mask = nir_channel(b, instance_data, 2); + nir_ssa_def *instance_mask = nir_ushr(b, instance_and_mask, nir_imm_int(b, 24)); + + nir_push_if( + b, + nir_ieq(b, nir_iand(b, instance_mask, rq_load_var(b, index, vars->cull_mask)), + nir_imm_int(b, 0))); + { + nir_jump(b, nir_jump_continue); + } + nir_pop_if(b, NULL); + + nir_ssa_def *wto_matrix[] = { + nir_build_load_global(b, 4, 32, + nir_iadd(b, instance_node_addr, nir_imm_int64(b, 16)), + .align_mul = 64, .align_offset = 16), + nir_build_load_global(b, 4, 32, + nir_iadd(b, instance_node_addr, nir_imm_int64(b, 32)), + .align_mul = 64, .align_offset = 32), + nir_build_load_global(b, 4, 32, + nir_iadd(b, instance_node_addr, nir_imm_int64(b, 48)), + .align_mul = 64, .align_offset = 48)}; + nir_ssa_def *instance_id = nir_build_load_global( + b, 1, 32, nir_iadd(b, instance_node_addr, nir_imm_int64(b, 88)), + .align_mul = 4, .align_offset = 0); + + rq_store_var(b, index, vars->trav.top_stack, + rq_load_var(b, index, vars->trav.stack), 1); + rq_store_var(b, index, vars->trav.bvh_base, + build_addr_to_node( + b, nir_pack_64_2x32(b, nir_channels(b, instance_data, 0x3))), + 1); + + rq_store_array(b, index, vars->stack, rq_load_var(b, index, vars->trav.stack), + nir_iand(b, nir_channel(b, instance_data, 0), nir_imm_int(b, 63)), + 0x1); + rq_store_var( + b, index, vars->trav.stack, + nir_iadd(b, rq_load_var(b, index, vars->trav.stack), nir_imm_int(b, 1)), 1); + + rq_store_var(b, index, vars->trav.origin, + nir_build_vec3_mat_mult_pre(b, rq_load_var(b, index, vars->origin), + wto_matrix), + 7); + rq_store_var(b, index, vars->trav.direction, + nir_build_vec3_mat_mult(b, rq_load_var(b, index, vars->direction), + wto_matrix, false), + 7); + rq_store_var(b, index, vars->trav.inv_dir, + nir_fdiv(b, vec3ones, rq_load_var(b, index, vars->trav.direction)), + 7); + + rq_store_var(b, index, vars->candidate.sbt_offset_and_flags, + nir_channel(b, instance_data, 3), 1); + rq_store_var(b, index, vars->candidate.custom_instance_and_mask, + instance_and_mask, 1); + rq_store_var(b, index, vars->candidate.instance_id, instance_id, 1); + rq_store_var(b, index, vars->candidate.instance_addr, instance_node_addr, 1); + } + nir_pop_if(b, NULL); + } + nir_push_else(b, NULL); + { + nir_ssa_def *result = intrinsic_result; + if (!result) { + /* If we didn't run the intrinsic cause the hardware didn't support it, + * emulate ray/box intersection here */ + result = intersect_ray_amd_software_box( + device, b, bvh_node, rq_load_var(b, index, vars->closest.t), + rq_load_var(b, index, vars->trav.origin), + rq_load_var(b, index, vars->trav.direction), + rq_load_var(b, index, vars->trav.inv_dir)); + } + + /* box */ + for (unsigned i = 4; i-- > 0;) { + nir_ssa_def *new_node = nir_vector_extract(b, result, nir_imm_int(b, i)); + nir_push_if(b, nir_ine(b, new_node, nir_imm_int(b, 0xffffffff))); + { + rq_store_array(b, index, vars->stack, rq_load_var(b, index, vars->trav.stack), + new_node, 0x1); + rq_store_var( + b, index, vars->trav.stack, + nir_iadd(b, rq_load_var(b, index, vars->trav.stack), nir_imm_int(b, 1)), 1); + } + nir_pop_if(b, NULL); + } + } + nir_pop_if(b, NULL); + } + nir_push_else(b, NULL); + { + nir_ssa_def *result = intrinsic_result; + if (!result) { + /* If we didn't run the intrinsic cause the hardware didn't support it, + * emulate ray/tri intersection here */ + result = intersect_ray_amd_software_tri(device, b, bvh_node, + rq_load_var(b, index, vars->closest.t), + rq_load_var(b, index, vars->trav.origin), + rq_load_var(b, index, vars->trav.direction), + rq_load_var(b, index, vars->trav.inv_dir)); + } + insert_traversal_triangle_case(device, b, index, result, vars, bvh_node); + } + nir_pop_if(b, NULL); + } + nir_pop_loop(b, NULL); + } + nir_pop_if(b, NULL); + + return rq_load_var(b, index, vars->incomplete); +} + +static void +lower_rq_terminate(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, + struct ray_query_vars *vars) +{ + rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, false), 0x1); +} + +static bool +is_rq_intrinsic(nir_intrinsic_op intrinsic) +{ + switch (intrinsic) { + case nir_intrinsic_rq_confirm_intersection: + case nir_intrinsic_rq_generate_intersection: + case nir_intrinsic_rq_initialize: + case nir_intrinsic_rq_load: + case nir_intrinsic_rq_proceed: + case nir_intrinsic_rq_terminate: + return true; + default: + return false; + } +} + +bool +radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device) +{ + bool contains_ray_query = false; + struct hash_table *query_ht = _mesa_pointer_hash_table_create(NULL); + + nir_foreach_variable_in_list (var, &shader->variables) { + if (!var->data.ray_query) + continue; + + lower_ray_query(shader, NULL, var, query_ht); + contains_ray_query = true; + } + + nir_foreach_function (function, shader) { + if (!function->impl) + continue; + + nir_builder builder; + nir_builder_init(&builder, function->impl); + + nir_foreach_variable_in_list (var, &function->impl->locals) { + if (!var->data.ray_query) + continue; + + lower_ray_query(NULL, function->impl, var, query_ht); + contains_ray_query = true; + } + + if (!contains_ray_query) + continue; + + nir_foreach_block (block, function->impl) { + nir_foreach_instr_safe (instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr); + + if (!is_rq_intrinsic(intrinsic->intrinsic)) + continue; + + nir_deref_instr *ray_query_deref = + nir_instr_as_deref(intrinsic->src[0].ssa->parent_instr); + nir_ssa_def *index = NULL; + + if (ray_query_deref->deref_type == nir_deref_type_array) { + index = ray_query_deref->arr.index.ssa; + ray_query_deref = nir_instr_as_deref(ray_query_deref->parent.ssa->parent_instr); + } + + assert(ray_query_deref->deref_type == nir_deref_type_var); + + struct ray_query_vars *vars = + (struct ray_query_vars *)_mesa_hash_table_search(query_ht, ray_query_deref->var) + ->data; + + builder.cursor = nir_before_instr(instr); + + nir_ssa_def *new_dest = NULL; + + switch (intrinsic->intrinsic) { + case nir_intrinsic_rq_confirm_intersection: + lower_rq_confirm_intersection(&builder, index, intrinsic, vars); + break; + case nir_intrinsic_rq_generate_intersection: + lower_rq_generate_intersection(&builder, index, intrinsic, vars); + break; + case nir_intrinsic_rq_initialize: + lower_rq_initialize(&builder, index, intrinsic, vars); + break; + case nir_intrinsic_rq_load: + new_dest = lower_rq_load(&builder, index, vars, intrinsic->src[1].ssa, + (nir_ray_query_value)nir_intrinsic_base(intrinsic), + nir_intrinsic_column(intrinsic)); + break; + case nir_intrinsic_rq_proceed: + new_dest = lower_rq_proceed(&builder, index, vars, device); + break; + case nir_intrinsic_rq_terminate: + lower_rq_terminate(&builder, index, intrinsic, vars); + break; + default: + unreachable("Unsupported ray query intrinsic!"); + } + + if (new_dest) + nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa, new_dest); + + nir_instr_remove(instr); + nir_instr_free(instr); + } + } + + nir_metadata_preserve(function->impl, nir_metadata_none); + } + + ralloc_free(query_ht); + + return contains_ray_query; +} diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index b0d31452676..ae904cb2764 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -736,6 +736,11 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module * nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + if (nir->info.ray_queries > 0) { + NIR_PASS_V(nir, nir_opt_ray_queries); + NIR_PASS_V(nir, radv_nir_lower_ray_queries, device); + } + if (nir->info.stage == MESA_SHADER_GEOMETRY) { unsigned nir_gs_flags = nir_lower_gs_intrinsics_per_stream; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index e6842b1a473..bfc1fb9feba 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -509,6 +509,8 @@ void radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, void radv_optimize_nir_algebraic(nir_shader *shader, bool opt_offsets); bool radv_nir_lower_ycbcr_textures(nir_shader *shader, const struct radv_pipeline_layout *layout); +bool radv_nir_lower_ray_queries(nir_shader *shader, struct radv_device *device); + nir_shader *radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *module, const char *entrypoint_name, gl_shader_stage stage, const VkSpecializationInfo *spec_info,