diff --git a/src/nouveau/compiler/meson.build b/src/nouveau/compiler/meson.build index 90a3a2e5af9..a58ce0598f5 100644 --- a/src/nouveau/compiler/meson.build +++ b/src/nouveau/compiler/meson.build @@ -32,6 +32,7 @@ libnak_c_files = files( 'nak_nir_lower_scan_reduce.c', 'nak_nir_lower_tex.c', 'nak_nir_lower_vtg_io.c', + 'nak_nir_mark_lcssa_invariants.c', 'nak_nir_split_64bit_conversions.c', ) diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index 39463097ca8..4d6d28ce8ff 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -1045,10 +1045,13 @@ nak_postprocess_nir(nir_shader *nir, if (nak->sm < 70) OPT(nir, nak_nir_split_64bit_conversions); - nir_convert_to_lcssa(nir, true, true); + bool lcssa_progress = nir_convert_to_lcssa(nir, false, false); nir_divergence_analysis(nir); if (nak->sm >= 75) { + if (lcssa_progress) { + OPT(nir, nak_nir_mark_lcssa_invariants); + } if (OPT(nir, nak_nir_lower_non_uniform_ldcx)) { OPT(nir, nir_copy_prop); OPT(nir, nir_opt_dce); diff --git a/src/nouveau/compiler/nak_nir_lower_non_uniform_ldcx.c b/src/nouveau/compiler/nak_nir_lower_non_uniform_ldcx.c index 231a891dba8..d6929a8faa1 100644 --- a/src/nouveau/compiler/nak_nir_lower_non_uniform_ldcx.c +++ b/src/nouveau/compiler/nak_nir_lower_non_uniform_ldcx.c @@ -7,6 +7,13 @@ #include "nir_builder.h" #include "nir_vla.h" +/* + * This pass relies on nak_nir_mark_lcssa_invariants being run first, because it + * assumes that convergent values used in convergent control flow can be + * allocated to uniform registers. See the example in that file for more + * details. + */ + static void lower_ldcx_to_global(nir_builder *b, nir_intrinsic_instr *load) { diff --git a/src/nouveau/compiler/nak_nir_mark_lcssa_invariants.c b/src/nouveau/compiler/nak_nir_mark_lcssa_invariants.c new file mode 100644 index 00000000000..3166e87986e --- /dev/null +++ b/src/nouveau/compiler/nak_nir_mark_lcssa_invariants.c @@ -0,0 +1,118 @@ +/* + * Copyright 2024 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#include "nak_private.h" +#include "nir_builder.h" + +/* + * Divergent loops can define convergent values that escape the loop. eg. + * + * div loop { + * con %72 = ... + * ... + * } + * ... = @ldcx_nv (%72, 0x30) + * + * When that happens, we would like to have the opportunity to move the value + * into a uniform register once we reach uniform control flow. This pass allows + * that by inserting nir_intrinsic_as_uniform after the loop-closed phi, which + * becomes r2ur in the backend. + * + * With nir_to_lcssa and this pass, our example becomes something like: + * + * div loop { + * con %72 = ... + * ... + * } + * con %73 = phi(%72, %72) + * con %74 = @as_uniform(%73) + * ... = @ldcx_nv (%74, 0x30) + * + * nir_to_lcssa(nir, false, false) and nir_divergence_analysis must be run + * before this pass. + */ + +static bool +lower_block(nir_builder *b, nir_block *block) +{ + bool progress = false; + + b->cursor = nir_after_phis(block); + nir_foreach_phi_safe(phi, block) { + if (phi->def.divergent) { + continue; + } + + nir_def* x = nir_as_uniform(b, &phi->def); + x->divergent = false; + nir_def_rewrite_uses_after(&phi->def, x, x->parent_instr); + + progress = true; + } + + return progress; +} + + +static bool +lower_cf_list(nir_builder *b, struct exec_list *cf_list) +{ + bool progress = false; + + foreach_list_typed_safe(nir_cf_node, node, node, cf_list) { + switch (node->type) { + case nir_cf_node_block: + break; + + case nir_cf_node_if: { + nir_if *nif = nir_cf_node_as_if(node); + if (nir_src_is_divergent(&nif->condition)) { + nir_block *succ = nir_cf_node_as_block(nir_cf_node_next(node)); + progress |= lower_block(b, succ); + } else { + progress |= lower_cf_list(b, &nif->then_list); + progress |= lower_cf_list(b, &nif->else_list); + } + break; + } + + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(node); + if (nir_loop_is_divergent(loop)) { + nir_block *succ = nir_cf_node_as_block(nir_cf_node_next(node)); + progress |= lower_block(b, succ); + } else { + progress |= lower_cf_list(b, &loop->body); + progress |= lower_cf_list(b, &loop->continue_list); + } + break; + } + + default: + unreachable("Unknown CF node type"); + } + } + + return progress; +} + +bool +nak_nir_mark_lcssa_invariants(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function_impl(impl, shader) { + nir_builder b = nir_builder_create(impl); + + if (lower_cf_list(&b, &impl->body)) { + progress = true; + nir_metadata_preserve(impl, nir_metadata_control_flow); + } else { + nir_metadata_preserve(impl, nir_metadata_all); + } + } + + return progress; +} diff --git a/src/nouveau/compiler/nak_private.h b/src/nouveau/compiler/nak_private.h index 286df447a43..22b0a8f34ec 100644 --- a/src/nouveau/compiler/nak_private.h +++ b/src/nouveau/compiler/nak_private.h @@ -226,6 +226,7 @@ enum nak_fs_out { #define NAK_FS_OUT_COLOR(n) (NAK_FS_OUT_COLOR0 + (n) * 16) +bool nak_nir_mark_lcssa_invariants(nir_shader *nir); bool nak_nir_split_64bit_conversions(nir_shader *nir); bool nak_nir_lower_non_uniform_ldcx(nir_shader *nir); bool nak_nir_add_barriers(nir_shader *nir, const struct nak_compiler *nak);