From e0cf4fbf38e30cc2c4c286e91797abd7c6e2ab1d Mon Sep 17 00:00:00 2001 From: Konstantin Seurer Date: Sat, 16 Sep 2023 19:35:21 +0200 Subject: [PATCH] radv/ray_queries: Skip cull_mask handling if it is FF Stats for Metro Exodus: Totals from 26 (0.99% of 2627) affected shaders: Instrs: 14586 -> 14232 (-2.43%) CodeSize: 77024 -> 75192 (-2.38%) VGPRs: 1408 -> 1208 (-14.20%) Latency: 315076 -> 309898 (-1.64%) InvThroughput: 42345 -> 41677 (-1.58%) VClause: 366 -> 374 (+2.19%) Copies: 2840 -> 2800 (-1.41%); split: -1.48%, +0.07% Branches: 587 -> 561 (-4.43%) PreSGPRs: 897 -> 853 (-4.91%) PreVGPRs: 1290 -> 1122 (-13.02%) Reviewed-by: Bas Nieuwenhuizen Part-of: --- .../vulkan/nir/radv_nir_lower_ray_queries.c | 19 +++++++++++++++++-- src/amd/vulkan/radv_rt_common.c | 12 +++++++----- src/amd/vulkan/radv_rt_common.h | 2 ++ 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c index 1e7a41b62ff..a66277db04e 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c +++ b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c @@ -172,6 +172,8 @@ struct ray_query_vars { rq_variable *stack; uint32_t shared_base; uint32_t stack_entries; + + nir_intrinsic_instr *initialize; }; #define VAR_NAME(name) strcat(strcpy(ralloc_size(ctx, strlen(base_name) + strlen(name) + 1), base_name), name) @@ -387,6 +389,8 @@ lower_rq_initialize(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, rq_store_var(b, index, vars->trav.top_stack, nir_imm_int(b, -1), 1); rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, !(instance->debug_flags & RADV_DEBUG_NO_RT)), 0x1); + + vars->initialize = instr; } static nir_def * @@ -555,8 +559,18 @@ load_stack_entry(nir_builder *b, nir_def *index, const struct radv_ray_traversal } static nir_def * -lower_rq_proceed(nir_builder *b, nir_def *index, struct ray_query_vars *vars, struct radv_device *device) +lower_rq_proceed(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars, + struct radv_device *device) { + nir_metadata_require(nir_cf_node_get_function(&instr->instr.block->cf_node), nir_metadata_dominance); + + bool ignore_cull_mask = false; + if (nir_block_dominates(vars->initialize->instr.block, instr->instr.block)) { + nir_src cull_mask = vars->initialize->src[3]; + if (nir_src_is_const(cull_mask) && nir_src_as_uint(cull_mask) == 0xFF) + ignore_cull_mask = true; + } + nir_variable *inv_dir = nir_local_variable_create(b->impl, glsl_vector_type(GLSL_TYPE_FLOAT, 3), "inv_dir"); nir_store_var(b, inv_dir, nir_frcp(b, rq_load_var(b, index, vars->trav.direction)), 0x7); @@ -591,6 +605,7 @@ lower_rq_proceed(nir_builder *b, nir_def *index, struct ray_query_vars *vars, st .dir = rq_load_var(b, index, vars->direction), .vars = trav_vars, .stack_entries = vars->stack_entries, + .ignore_cull_mask = ignore_cull_mask, .stack_store_cb = store_stack_entry, .stack_load_cb = load_stack_entry, .aabb_cb = handle_candidate_aabb, @@ -695,7 +710,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device new_dest = lower_rq_load(&builder, index, intrinsic, vars); break; case nir_intrinsic_rq_proceed: - new_dest = lower_rq_proceed(&builder, index, vars, device); + new_dest = lower_rq_proceed(&builder, index, intrinsic, vars, device); break; case nir_intrinsic_rq_terminate: lower_rq_terminate(&builder, index, intrinsic, vars); diff --git a/src/amd/vulkan/radv_rt_common.c b/src/amd/vulkan/radv_rt_common.c index 781df7aafc9..5aab4dc9e9c 100644 --- a/src/amd/vulkan/radv_rt_common.c +++ b/src/amd/vulkan/radv_rt_common.c @@ -574,12 +574,14 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3), 1); - nir_def *instance_and_mask = nir_channel(b, instance_data, 2); - nir_push_if(b, nir_ult(b, nir_iand(b, instance_and_mask, args->cull_mask), nir_imm_int(b, 1 << 24))); - { - nir_jump(b, nir_jump_continue); + if (!args->ignore_cull_mask) { + nir_def *instance_and_mask = nir_channel(b, instance_data, 2); + nir_push_if(b, nir_ult(b, nir_iand(b, instance_and_mask, args->cull_mask), nir_imm_int(b, 1 << 24))); + { + nir_jump(b, nir_jump_continue); + } + nir_pop_if(b, NULL); } - nir_pop_if(b, NULL); nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1); nir_store_deref(b, args->vars.bvh_base, nir_pack_64_2x32(b, nir_trim_vector(b, instance_data, 2)), 1); diff --git a/src/amd/vulkan/radv_rt_common.h b/src/amd/vulkan/radv_rt_common.h index 949a301daff..9b098c2d193 100644 --- a/src/amd/vulkan/radv_rt_common.h +++ b/src/amd/vulkan/radv_rt_common.h @@ -144,6 +144,8 @@ struct radv_ray_traversal_args { uint32_t stack_entries; uint32_t stack_base; + bool ignore_cull_mask; + radv_rt_stack_store_cb stack_store_cb; radv_rt_stack_load_cb stack_load_cb;