ir3: lower 64b registers before creating preamble

ir3_nir_lower_preamble cannot handle 64b @load/store_preamble so we have to make sure ir3_nir_opt_preamble will never produce them. Up to now, nir_lower_locals_to_regs was run after preamble lowering so 64b locals could still be around when lowering the preamble. This patch moves running this pass, as well as ir3_nir_lower_64b_regs, to before the preamble lowering. Fixed Piglit tests: - spec@arb_gpu_shader_fp64@execution@fs-indirect-temp-double-dst - spec@arb_gpu_shader_fp64@execution@built-in-functions@fs-frexp-dvec4-variable-index This patch has no impact on shader-db. Note: a few cleanup passes used to be run after nir_lower_locals_to_regs (nir_opt_algebraic, nir_opt_constant_folding) and after ir3_nir_lower_64b_regs (nir_lower_alu_to_scalar, nir_copy_prop). As far as I can tell, these are not necessary anymore when running the register lowering earlier so this patch removes them. Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26737>
2026-05-03 05:38:16 +02:00 · 2023-12-15 11:43:46 +01:00 · 2023-12-15 11:43:46 +01:00 · 2d273c520c
commit 2d273c520c
parent 6cad2fc230
3 changed files with 10 additions and 30 deletions
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
@ -84,42 +84,14 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
   ctx->s = nir_shader_clone(ctx, shader->nir);
   ir3_nir_lower_variant(so, ctx->s);

-   /* this needs to be the last pass run, so do this here instead of
-    * in ir3_optimize_nir():
-    */
   bool progress = false;
   bool needs_late_alg = false;
-   NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs, 1);
-
-   if (progress) {
-      bool regs_progress = false;
-
-      /* Split 64b registers into two 32b ones. */
-      NIR_PASS(regs_progress, ctx->s, ir3_nir_lower_64b_regs);
-
-      if (regs_progress) {
-         /* After splitting registers, we might still have some 64b vecs. Run
-          * some passes to get rid of them.
-          */
-         NIR_PASS_V(ctx->s, nir_lower_alu_to_scalar, NULL, NULL);
-         NIR_PASS_V(ctx->s, nir_copy_prop);
-      }
-   }
-
-   /* we could need cleanup after lower_locals_to_regs */
-   while (progress) {
-      progress = false;
-      NIR_PASS(progress, ctx->s, nir_opt_algebraic);
-      NIR_PASS(progress, ctx->s, nir_opt_constant_folding);
-      needs_late_alg = true;
-   }

   /* We want to lower nir_op_imul as late as possible, to catch also
    * those generated by earlier passes (e.g,
    * nir_lower_locals_to_regs).  However, we want a final swing of a
    * few passes to have a chance at optimizing the result.
    */
-   progress = false;
   NIR_PASS(progress, ctx->s, ir3_nir_lower_imul);
   while (progress) {
      progress = false;
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@ -722,6 +722,13 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
   /* Lower scratch writemasks */
   progress |= OPT(s, nir_lower_wrmasks, should_split_wrmask, s);

+   if (OPT(s, nir_lower_locals_to_regs, 1)) {
+      progress = true;
+
+      /* Split 64b registers into two 32b ones. */
+      OPT_V(s, ir3_nir_lower_64b_regs);
+   }
+
   progress |= OPT(s, ir3_nir_lower_wide_load_store);
   progress |= OPT(s, ir3_nir_lower_64b_global);
   progress |= OPT(s, ir3_nir_lower_64b_intrinsics);
--- a/src/freedreno/ir3/ir3_nir_opt_preamble.c
+++ b/src/freedreno/ir3/ir3_nir_opt_preamble.c
@ -440,8 +440,9 @@ ir3_nir_lower_preamble(nir_shader *nir, struct ir3_shader_variant *v)
    * ...
    */

-   b->cursor = nir_before_impl(main);
-   
+   /* @decl_regs need to stay in the first block. */
+   b->cursor = nir_after_reg_decls(main);
+
   nir_if *outer_if = nir_push_if(b, nir_preamble_start_ir3(b, 1));
   {
      nir_if *inner_if = nir_push_if(b, nir_elect(b, 1));