nir/lower_int64: replace uadd_sat with ior for find_lsb64 and ufind_msb64

Using ior here is equivalent to using uadd_sat, but works for every driver and shouldn't hurt anywhere. I forgot to fix this up when fixing up some vvl errors with zink. Fixes crashes with the integer_ctz CL CTS tests in zink. Fixes: 39ec184db6 ("zink: lower 64 bit find_lsb, ufind_msb and bit_count") Reviewed-by: Karol Herbst <kherbst@redhat.com> Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30535> (cherry picked from commit 48acf9d358)
2026-05-08 09:08:10 +02:00 · 2024-08-06 14:09:08 +02:00 · 2024-08-06 14:09:08 +02:00 · ff7759cdfe
commit ff7759cdfe
parent f3f0b87bf8
2 changed files with 10 additions and 23 deletions
--- a/.pick_status.json
+++ b/.pick_status.json
@ -2204,7 +2204,7 @@
        "description": "nir/lower_int64: replace uadd_sat with ior for find_lsb64 and ufind_msb64",
        "nominated": true,
        "nomination_type": 1,
-        "resolution": 0,
+        "resolution": 1,
        "main_sha": null,
        "because_sha": "39ec184db6e9d614fd540b89e9cba4e6cb77de50",
        "notes": null
--- a/src/compiler/nir/nir_lower_int64.c
+++ b/src/compiler/nir/nir_lower_int64.c
@ -683,24 +683,13 @@ lower_ufind_msb64(nir_builder *b, nir_def *x)
   nir_def *lo_count = nir_ufind_msb(b, x_lo);
   nir_def *hi_count = nir_ufind_msb(b, x_hi);

-   if (b->shader->options->lower_uadd_sat) {
-      nir_def *valid_hi_bits = nir_ine_imm(b, x_hi, 0);
-      nir_def *hi_res = nir_iadd_imm(b, hi_count, 32);
-      return nir_bcsel(b, valid_hi_bits, hi_res, lo_count);
-   } else {
-      /* If hi_count was -1, it will still be -1 after this uadd_sat. As a
-       * result, hi_count is either -1 or the correct return value for 64-bit
-       * ufind_msb.
-       */
-      nir_def *hi_res = nir_uadd_sat(b, nir_imm_intN_t(b, 32, 32), hi_count);
-
-      /* hi_res is either -1 or a value in the range [63, 32]. lo_count is
-       * either -1 or a value in the range [31, 0]. The imax will pick
-       * lo_count only when hi_res is -1. In those cases, lo_count is
-       * guaranteed to be the correct answer.
-       */
-      return nir_imax(b, hi_res, lo_count);
-   }
+   /* hi_count is either -1 or a value in the range [31, 0]. lo_count is
+    * the same. The imax will pick lo_count only when hi_count is -1. In those
+    * cases, lo_count is guaranteed to be the correct answer.
+    * The ior 32 is always safe here as with -1 the value won't change,
+    * otherwise it adds 32, which is what we want anyway.
+    */
+   return nir_imax(b, lo_count, nir_ior_imm(b, hi_count, 32));
 }

 static nir_def *
@ -713,11 +702,9 @@ lower_find_lsb64(nir_builder *b, nir_def *x)

   /* Use umin so that -1 (no bits found) becomes larger (0xFFFFFFFF)
    * than any actual bit position, so we return a found bit instead.
-    * This is similar to the ufind_msb lowering. If you need this lowering
-    * without uadd_sat, add code like in lower_ufind_msb64.
+    * This is similar to the ufind_msb lowering.
    */
-   assert(!b->shader->options->lower_uadd_sat);
-   return nir_umin(b, lo_lsb, nir_uadd_sat(b, hi_lsb, nir_imm_int(b, 32)));
+   return nir_umin(b, lo_lsb, nir_ior_imm(b, hi_lsb, 32));
 }

 static nir_def *