r600/sfn: make sure f2u32 is lowered late and correctly for 64 bit floats

With the latest changes in opt_algebraic we got f2u32 in the final code that should be lowered before conversion to assembly. Fixes: b3685f3ba7 nir/algebraic: insert patterns inside optimizations list Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22640> (cherry picked from commit 6a78af1dbb)
2026-05-08 06:58:05 +02:00 · 2023-04-11 14:17:40 +02:00 · 2023-04-11 14:17:40 +02:00 · 4b6649c163
commit 4b6649c163
parent df6085eb9c
3 changed files with 6 additions and 3 deletions
--- a/.pick_status.json
+++ b/.pick_status.json
@ -121,7 +121,7 @@
        "description": "r600/sfn: make sure f2u32 is lowered late and correctly for 64 bit floats",
        "nominated": true,
        "nomination_type": 1,
-        "resolution": 0,
+        "resolution": 1,
        "main_sha": null,
        "because_sha": "b3685f3ba7fddbe73f363ff4d53ca734841e4b06"
    },
--- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp
@ -924,6 +924,9 @@ r600_shader_from_nir(struct r600_context *rctx,
   while (optimize_once(sh))
      ;

+   if ((sh->info.bit_sizes_float | sh->info.bit_sizes_int) & 64)
+      NIR_PASS_V(sh, r600::r600_split_64bit_alu_and_phi);
+
   bool late_algebraic_progress;
   do {
      late_algebraic_progress = false;
--- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp
@ -212,11 +212,11 @@ class LowerSplit64op : public NirLowerInstruction {
             * rounds, we have to remove the fractional part in the hi bits
             * For values > UINT_MAX the result is undefined */
            auto src = nir_ssa_for_alu_src(b, alu, 0);
-            src = nir_fsub(b, src, nir_ffract(b, src));
+            src = nir_fadd(b, src, nir_fneg(b, nir_ffract(b, src)));
            auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
            auto highval = nir_fmul_imm(b, src, 1.0 / 65536.0);
            auto fract = nir_ffract(b, highval);
-            auto high = nir_f2u32(b, nir_f2f32(b, nir_fsub(b, highval, fract)));
+            auto high = nir_f2u32(b, nir_f2f32(b, nir_fadd(b, highval, nir_fneg(b, fract))));
            auto lowval = nir_fmul_imm(b, fract, 65536.0);
            auto low = nir_f2u32(b, nir_f2f32(b, lowval));
            return nir_bcsel(b,