From 73580de2e8caafa2de972d7e267e9307b7e139e3 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 9 Aug 2021 15:05:33 -0700
Subject: [PATCH] nir/loop_analyze: Fix get_iteration for nir_op_fneu

Consider the loop:

    float i = 0.0;
    while (true) {
       if (i != 0.0)
          break;

       i = i + 1.0;
    }

This loop clearly executes exactly one time.

Some trickery is necessary to handle cases where the initial loop value
is very large and the increment is, by comparison, very small.  From the
fenu_once test case,

    float i = -604462909807314587353088.0;
    while (true) {
       if (i != -604462909807314587353088.0)
          break;

       i = i + 36028797018963968.0;
    }

This loop should also execute exactly once, but this is much more
challenging to calculate due to precision issues.

Going towards smaller magnitude (i.e., adding a small positive value to
a large negative value) requires a smaller delta to make a difference
than going towards a larger magnitude. For this reason,
-604462909807314587353088.0 + 36028797018963968.0 !=
-604462909807314587353088.0, but -604462909807314587353088.0 +
-36028797018963968.0 == -604462909807314587353088.0. Math class is
tough.

No changes in shader-db or fossil-db.

v2: Fix major bug in checking result of the eval_const_binop(nir_op_feq,
...) discovered while developing fneu_once_easy unit test. Fix a typo in
the comment just above that. Add fneu_once_easy test.

v3: Skip the iteration count adjustment tests for nir_op_fenu and
nir_op_ine. Since the iteration count is either 1 or unknown, all this
function can do is add numerical error. Add fenu_once tests.

v4: Change the initial value in the fneu_once test from large positive
to large negative. Change check in get_iteration from nir_op_fsub to
nir_op_fadd. Both changes from discussion with M Henning. Also add some
more explanation in fneu_once.

v5: Rename test cases.

Fixes: 6772a17acc8 ("nir: Add a loop analysis pass")
Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19732>
(cherry picked from commit f75c83c4aae2b01013f3740a7414cf207f60b0ab)
---
 .pick_status.json                   |  2 +-
 src/compiler/nir/nir_loop_analyze.c | 22 +++++++++++++++++++++-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index e22da8dc2f7..719d8fc0bc1 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -733,7 +733,7 @@
         "description": "nir/loop_analyze: Fix get_iteration for nir_op_fneu",
         "nominated": true,
         "nomination_type": 1,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": "6772a17acc8ee90f9398348251a4455f988208fd"
     },
diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index 4fd192489ef..d9eca46f765 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -779,10 +779,27 @@ get_iteration(nir_op cond_op, nir_const_value initial, nir_const_value step,
                               execution_mode);
       break;
 
+   case nir_op_fneu:
+      /* In order for execution to be here, limit must be the same as initial.
+       * Otherwise will_break_on_first_iteration would have returned false.
+       * If step is zero, the loop is infinite.  Otherwise the loop will
+       * execute once.
+       *
+       * This is a little more tricky for floating point since X-Y might still
+       * be X even if Y is not zero.  Instead check that (initial + step) !=
+       * initial.
+       */
+      span = eval_const_binop(nir_op_fadd, bit_size, initial, step,
+                              execution_mode);
+      iter = eval_const_binop(nir_op_feq, bit_size, initial,
+                              span, execution_mode);
+
+      /* return (initial + step) == initial ? -1 : 1 */
+      return iter.b ? -1 : 1;
+
    case nir_op_fge:
    case nir_op_flt:
    case nir_op_feq:
-   case nir_op_fneu:
       span = eval_const_binop(nir_op_fsub, bit_size, limit, initial,
                               execution_mode);
       iter = eval_const_binop(nir_op_fdiv, bit_size, span,
@@ -953,6 +970,9 @@ calculate_iterations(nir_const_value initial, nir_const_value step,
    if (iter_int < 0)
       return -1;
 
+   if (alu_op == nir_op_ine || alu_op == nir_op_fneu)
+      return iter_int;
+
    /* An explanation from the GLSL unrolling pass:
     *
     * Make sure that the calculated number of iterations satisfies the exit