ac/nir/ngg: Prepare deferred shader part before adding culling code.

The previous concept was to emit the non-deferred shader part
first, including the culling code, and then modify the
non-deferred part accordingly.

This caused some issues because it was really impossible to tell
which sysvals the deferred part needs after DCE, so we had to
run an additional cleanup pass afterwards.

The new concept is to prepare the deferred part first by applying
reusable variables (from the non-deferred part) and run DCE.
This opens the possibility to accurately gather info about what
the deferred part needs.

This idea is further expanded in the next commits.

Fossil DB stats on Navi 21:

Totals from 17 (0.02% of 79377) affected shaders:
Instrs: 18063 -> 18064 (+0.01%)
CodeSize: 93368 -> 93372 (+0.00%)
Latency: 49889 -> 49899 (+0.02%); split: -0.01%, +0.03%
SALU: 2416 -> 2417 (+0.04%)

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22073>
This commit is contained in:
Timur Kristóf 2025-03-20 17:05:15 +01:00 committed by Marge Bot
parent e9e58fa412
commit e4c91c01e3

View file

@ -934,29 +934,22 @@ save_reusable_variables(nir_builder *b, lower_ngg_nogs_state *s)
}
/**
* Reuses suitable variables from the top part of the shader,
* by deleting their stores from the bottom part.
* Reuses suitable variables from the non-deferred (top) part of the shader,
* by deleting their stores from the deferred (bottom) part.
*/
static void
apply_reusable_variables(nir_builder *b, lower_ngg_nogs_state *s)
apply_reusable_variables(nir_function_impl *impl, lower_ngg_nogs_state *s)
{
if (!u_vector_length(&s->reusable_nondeferred_variables)) {
u_vector_finish(&s->reusable_nondeferred_variables);
return;
}
nir_foreach_block_reverse_safe(block, b->impl) {
nir_foreach_block_reverse_safe(block, impl) {
nir_foreach_instr_reverse_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
/* When we found any of these intrinsics, it means
* we reached the top part and we must stop.
*/
if (intrin->intrinsic == nir_intrinsic_sendmsg_amd)
goto done;
if (intrin->intrinsic != nir_intrinsic_store_deref)
continue;
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
@ -972,7 +965,6 @@ apply_reusable_variables(nir_builder *b, lower_ngg_nogs_state *s)
}
}
done:
u_vector_finish(&s->reusable_nondeferred_variables);
}
@ -1056,6 +1048,35 @@ ngg_nogs_get_culling_pervertex_lds_size(gl_shader_stage stage,
return (lds_es_arg_0 + num_repacked * 4u) | 4u;
}
static nir_cf_list *
prepare_shader_for_culling(nir_shader *shader, nir_function_impl *impl,
nir_cf_list *original_extracted_cf, lower_ngg_nogs_state *s)
{
/* Reinsert a clone of the original shader code. */
struct hash_table *orig_remap_table = _mesa_pointer_hash_table_create(NULL);
nir_cf_list_clone_and_reinsert(original_extracted_cf, &impl->cf_node, nir_after_impl(impl), orig_remap_table);
_mesa_hash_table_destroy(orig_remap_table, NULL);
/* Apply reusable variables. */
apply_reusable_variables(impl, s);
apply_repacked_pos_outputs(shader, s);
/* Cleanup. This is done so that we can accurately gather info from the deferred part. */
bool progress;
do {
progress = false;
NIR_PASS(progress, shader, nir_opt_undef);
NIR_PASS(progress, shader, nir_copy_prop);
NIR_PASS(progress, shader, nir_opt_dce);
NIR_PASS(progress, shader, nir_opt_dead_cf);
} while (progress);
/* Extract the shader code again. This will be reinserted as the deferred shader part. */
nir_cf_list *prepared_extracted = rzalloc(shader, nir_cf_list);
nir_cf_extract(prepared_extracted, nir_before_impl(impl), nir_after_impl(impl));
return prepared_extracted;
}
static void
add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_cf, lower_ngg_nogs_state *s)
{
@ -1113,10 +1134,8 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
*/
nir_store_var(b, s->position_value_var, nir_imm_vec4(b, 0.0f, 0.0f, 0.0f, 1.0f), 0xfu);
/* Now reinsert a clone of the shader code */
struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
nir_cf_list_clone_and_reinsert(original_extracted_cf, &if_es_thread->cf_node, b->cursor, remap_table);
_mesa_hash_table_destroy(remap_table, NULL);
/* Now reinsert the shader code. */
nir_cf_reinsert(original_extracted_cf, b->cursor);
b->cursor = nir_after_cf_list(&if_es_thread->then_list);
/* Remember the current thread's shader arguments */
@ -1651,9 +1670,16 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
save_reusable_variables(b, &state);
}
nir_cf_list extracted;
nir_cf_extract(&extracted, nir_before_impl(impl),
nir_cf_list *extracted = rzalloc(shader, nir_cf_list);
nir_cf_extract(extracted, nir_before_impl(impl),
nir_after_impl(impl));
nir_cf_list *non_deferred_cf = NULL;
if (options->can_cull) {
non_deferred_cf = extracted;
extracted = prepare_shader_for_culling(shader, impl, extracted, &state);
}
b->cursor = nir_before_impl(impl);
ngg_nogs_init_vertex_indices_vars(b, impl, &state);
@ -1687,7 +1713,9 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
else
nir_store_var(b, prim_exp_arg_var, emit_ngg_nogs_prim_exp_arg(b, &state), 0x1u);
} else {
add_deferred_attribute_culling(b, &extracted, &state);
add_deferred_attribute_culling(b, non_deferred_cf, &state);
ralloc_free(non_deferred_cf);
b->cursor = nir_after_impl(impl);
if (state.early_prim_export)
@ -1736,7 +1764,8 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
nir_if *if_es_thread = nir_push_if(b, es_thread);
{
/* Run the actual shader */
nir_cf_reinsert(&extracted, b->cursor);
nir_cf_reinsert(extracted, b->cursor);
ralloc_free(extracted);
b->cursor = nir_after_cf_list(&if_es_thread->then_list);
if (options->export_primitive_id)
@ -1744,14 +1773,6 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
}
nir_pop_if(b, if_es_thread);
if (options->can_cull) {
/* Replace uniforms. */
apply_reusable_variables(b, &state);
/* Reuse the position value calculated in the non-deferred shader part. */
apply_repacked_pos_outputs(shader, &state);
}
/* Gather outputs data and types */
ngg_nogs_gather_outputs(b, &if_es_thread->then_list, &state);
b->cursor = nir_after_cf_list(&if_es_thread->then_list);