diff --git a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c index 1e7a41b62ff..a66277db04e 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c +++ b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c @@ -172,6 +172,8 @@ struct ray_query_vars { rq_variable *stack; uint32_t shared_base; uint32_t stack_entries; + + nir_intrinsic_instr *initialize; }; #define VAR_NAME(name) strcat(strcpy(ralloc_size(ctx, strlen(base_name) + strlen(name) + 1), base_name), name) @@ -387,6 +389,8 @@ lower_rq_initialize(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, rq_store_var(b, index, vars->trav.top_stack, nir_imm_int(b, -1), 1); rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, !(instance->debug_flags & RADV_DEBUG_NO_RT)), 0x1); + + vars->initialize = instr; } static nir_def * @@ -555,8 +559,18 @@ load_stack_entry(nir_builder *b, nir_def *index, const struct radv_ray_traversal } static nir_def * -lower_rq_proceed(nir_builder *b, nir_def *index, struct ray_query_vars *vars, struct radv_device *device) +lower_rq_proceed(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars, + struct radv_device *device) { + nir_metadata_require(nir_cf_node_get_function(&instr->instr.block->cf_node), nir_metadata_dominance); + + bool ignore_cull_mask = false; + if (nir_block_dominates(vars->initialize->instr.block, instr->instr.block)) { + nir_src cull_mask = vars->initialize->src[3]; + if (nir_src_is_const(cull_mask) && nir_src_as_uint(cull_mask) == 0xFF) + ignore_cull_mask = true; + } + nir_variable *inv_dir = nir_local_variable_create(b->impl, glsl_vector_type(GLSL_TYPE_FLOAT, 3), "inv_dir"); nir_store_var(b, inv_dir, nir_frcp(b, rq_load_var(b, index, vars->trav.direction)), 0x7); @@ -591,6 +605,7 @@ lower_rq_proceed(nir_builder *b, nir_def *index, struct ray_query_vars *vars, st .dir = rq_load_var(b, index, vars->direction), .vars = trav_vars, .stack_entries = vars->stack_entries, + .ignore_cull_mask = ignore_cull_mask, .stack_store_cb = store_stack_entry, .stack_load_cb = load_stack_entry, .aabb_cb = handle_candidate_aabb, @@ -695,7 +710,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device new_dest = lower_rq_load(&builder, index, intrinsic, vars); break; case nir_intrinsic_rq_proceed: - new_dest = lower_rq_proceed(&builder, index, vars, device); + new_dest = lower_rq_proceed(&builder, index, intrinsic, vars, device); break; case nir_intrinsic_rq_terminate: lower_rq_terminate(&builder, index, intrinsic, vars); diff --git a/src/amd/vulkan/radv_rt_common.c b/src/amd/vulkan/radv_rt_common.c index 781df7aafc9..5aab4dc9e9c 100644 --- a/src/amd/vulkan/radv_rt_common.c +++ b/src/amd/vulkan/radv_rt_common.c @@ -574,12 +574,14 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3), 1); - nir_def *instance_and_mask = nir_channel(b, instance_data, 2); - nir_push_if(b, nir_ult(b, nir_iand(b, instance_and_mask, args->cull_mask), nir_imm_int(b, 1 << 24))); - { - nir_jump(b, nir_jump_continue); + if (!args->ignore_cull_mask) { + nir_def *instance_and_mask = nir_channel(b, instance_data, 2); + nir_push_if(b, nir_ult(b, nir_iand(b, instance_and_mask, args->cull_mask), nir_imm_int(b, 1 << 24))); + { + nir_jump(b, nir_jump_continue); + } + nir_pop_if(b, NULL); } - nir_pop_if(b, NULL); nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1); nir_store_deref(b, args->vars.bvh_base, nir_pack_64_2x32(b, nir_trim_vector(b, instance_data, 2)), 1); diff --git a/src/amd/vulkan/radv_rt_common.h b/src/amd/vulkan/radv_rt_common.h index 949a301daff..9b098c2d193 100644 --- a/src/amd/vulkan/radv_rt_common.h +++ b/src/amd/vulkan/radv_rt_common.h @@ -144,6 +144,8 @@ struct radv_ray_traversal_args { uint32_t stack_entries; uint32_t stack_base; + bool ignore_cull_mask; + radv_rt_stack_store_cb stack_store_cb; radv_rt_stack_load_cb stack_load_cb;