radv/ray_queries: Skip cull_mask handling if it is FF

Stats for Metro Exodus:

Totals from 26 (0.99% of 2627) affected shaders:
Instrs: 14586 -> 14232 (-2.43%)
CodeSize: 77024 -> 75192 (-2.38%)
VGPRs: 1408 -> 1208 (-14.20%)
Latency: 315076 -> 309898 (-1.64%)
InvThroughput: 42345 -> 41677 (-1.58%)
VClause: 366 -> 374 (+2.19%)
Copies: 2840 -> 2800 (-1.41%); split: -1.48%, +0.07%
Branches: 587 -> 561 (-4.43%)
PreSGPRs: 897 -> 853 (-4.91%)
PreVGPRs: 1290 -> 1122 (-13.02%)

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25268>
This commit is contained in:
Konstantin Seurer 2023-09-16 19:35:21 +02:00 committed by Marge Bot
parent 3e7850f97b
commit e0cf4fbf38
3 changed files with 26 additions and 7 deletions

View file

@ -172,6 +172,8 @@ struct ray_query_vars {
rq_variable *stack;
uint32_t shared_base;
uint32_t stack_entries;
nir_intrinsic_instr *initialize;
};
#define VAR_NAME(name) strcat(strcpy(ralloc_size(ctx, strlen(base_name) + strlen(name) + 1), base_name), name)
@ -387,6 +389,8 @@ lower_rq_initialize(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr,
rq_store_var(b, index, vars->trav.top_stack, nir_imm_int(b, -1), 1);
rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, !(instance->debug_flags & RADV_DEBUG_NO_RT)), 0x1);
vars->initialize = instr;
}
static nir_def *
@ -555,8 +559,18 @@ load_stack_entry(nir_builder *b, nir_def *index, const struct radv_ray_traversal
}
static nir_def *
lower_rq_proceed(nir_builder *b, nir_def *index, struct ray_query_vars *vars, struct radv_device *device)
lower_rq_proceed(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars,
struct radv_device *device)
{
nir_metadata_require(nir_cf_node_get_function(&instr->instr.block->cf_node), nir_metadata_dominance);
bool ignore_cull_mask = false;
if (nir_block_dominates(vars->initialize->instr.block, instr->instr.block)) {
nir_src cull_mask = vars->initialize->src[3];
if (nir_src_is_const(cull_mask) && nir_src_as_uint(cull_mask) == 0xFF)
ignore_cull_mask = true;
}
nir_variable *inv_dir = nir_local_variable_create(b->impl, glsl_vector_type(GLSL_TYPE_FLOAT, 3), "inv_dir");
nir_store_var(b, inv_dir, nir_frcp(b, rq_load_var(b, index, vars->trav.direction)), 0x7);
@ -591,6 +605,7 @@ lower_rq_proceed(nir_builder *b, nir_def *index, struct ray_query_vars *vars, st
.dir = rq_load_var(b, index, vars->direction),
.vars = trav_vars,
.stack_entries = vars->stack_entries,
.ignore_cull_mask = ignore_cull_mask,
.stack_store_cb = store_stack_entry,
.stack_load_cb = load_stack_entry,
.aabb_cb = handle_candidate_aabb,
@ -695,7 +710,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
new_dest = lower_rq_load(&builder, index, intrinsic, vars);
break;
case nir_intrinsic_rq_proceed:
new_dest = lower_rq_proceed(&builder, index, vars, device);
new_dest = lower_rq_proceed(&builder, index, intrinsic, vars, device);
break;
case nir_intrinsic_rq_terminate:
lower_rq_terminate(&builder, index, intrinsic, vars);

View file

@ -574,12 +574,14 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3), 1);
nir_def *instance_and_mask = nir_channel(b, instance_data, 2);
nir_push_if(b, nir_ult(b, nir_iand(b, instance_and_mask, args->cull_mask), nir_imm_int(b, 1 << 24)));
{
nir_jump(b, nir_jump_continue);
if (!args->ignore_cull_mask) {
nir_def *instance_and_mask = nir_channel(b, instance_data, 2);
nir_push_if(b, nir_ult(b, nir_iand(b, instance_and_mask, args->cull_mask), nir_imm_int(b, 1 << 24)));
{
nir_jump(b, nir_jump_continue);
}
nir_pop_if(b, NULL);
}
nir_pop_if(b, NULL);
nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1);
nir_store_deref(b, args->vars.bvh_base, nir_pack_64_2x32(b, nir_trim_vector(b, instance_data, 2)), 1);

View file

@ -144,6 +144,8 @@ struct radv_ray_traversal_args {
uint32_t stack_entries;
uint32_t stack_base;
bool ignore_cull_mask;
radv_rt_stack_store_cb stack_store_cb;
radv_rt_stack_load_cb stack_load_cb;