From 206ec1ff779e139e3cc7604c6cac6d73bf6d3472 Mon Sep 17 00:00:00 2001
From: Gert Wollny <gert.wollny@collabora.com>
Date: Thu, 7 Aug 2025 22:59:04 +0200
Subject: [PATCH] r600/sfn: lower u2f64 and i2f64 in nir

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36587>
---
 .../drivers/r600/sfn/sfn_instr_alu.cpp        | 69 -------------------
 .../drivers/r600/sfn/sfn_nir_lower_64bit.cpp  | 44 ++++++++----
 2 files changed, 31 insertions(+), 82 deletions(-)

diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp
index d6d54c2f9e1..957561f5d2f 100644
--- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp
@@ -1358,8 +1358,6 @@ emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
 static bool
 emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader);
 static bool
-emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader);
-static bool
 emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader);
 static bool
 emit_alu_abs64(const nir_alu_instr& alu, Shader& shader);
@@ -1491,10 +1489,6 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
          return emit_alu_op2_64bit(*alu, op2_min_64, shader, false);
       case nir_op_f2f64:
          return emit_alu_f2f64(*alu, shader);
-      case nir_op_i2f64:
-         return emit_alu_i2f64(*alu, op1_int_to_flt, shader);
-      case nir_op_u2f64:
-         return emit_alu_i2f64(*alu, op1_uint_to_flt, shader);
       case nir_op_f2f32:
          return emit_alu_f2f32(*alu, shader);
       case nir_op_fabs:
@@ -2145,69 +2139,6 @@ emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
    return true;
 }
 
-static bool
-emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader)
-{
-   /* int 64 to f64 should have been lowered, so we only handle i32 to f64 */
-   auto& value_factory = shader.value_factory();
-   auto group = new AluGroup();
-   AluInstr *ir = nullptr;
-
-   assert(alu.def.num_components == 1);
-
-   auto tmpx = value_factory.temp_register();
-   shader.emit_instruction(new AluInstr(op2_and_int,
-                                        tmpx,
-                                        value_factory.src(alu.src[0], 0),
-                                        value_factory.literal(0xffffff00),
-                                        AluInstr::write));
-   auto tmpy = value_factory.temp_register();
-   shader.emit_instruction(new AluInstr(op2_and_int,
-                                        tmpy,
-                                        value_factory.src(alu.src[0], 0),
-                                        value_factory.literal(0xff),
-                                        AluInstr::last_write));
-
-   auto tmpx2 = value_factory.temp_register();
-   auto tmpy2 = value_factory.temp_register();
-   shader.emit_instruction(new AluInstr(op, tmpx2, tmpx, AluInstr::last_write));
-   shader.emit_instruction(new AluInstr(op, tmpy2, tmpy, AluInstr::last_write));
-
-   auto tmpx3 = value_factory.temp_register(0);
-   auto tmpy3 = value_factory.temp_register(1);
-   auto tmpz3 = value_factory.temp_register(2);
-   auto tmpw3 = value_factory.temp_register(3);
-
-   ir = new AluInstr(op1_flt32_to_flt64, tmpx3, tmpx2, AluInstr::write);
-   group->add_instruction(ir);
-   ir = new AluInstr(op1_flt32_to_flt64, tmpy3, value_factory.zero(), AluInstr::write);
-   group->add_instruction(ir);
-   ir = new AluInstr(op1_flt32_to_flt64, tmpz3, tmpy2, AluInstr::write);
-   group->add_instruction(ir);
-   ir =
-      new AluInstr(op1_flt32_to_flt64, tmpw3, value_factory.zero(), AluInstr::last_write);
-   group->add_instruction(ir);
-   shader.emit_instruction(group);
-
-   group = new AluGroup();
-
-   ir = new AluInstr(op2_add_64,
-                     value_factory.dest(alu.def, 0, pin_chan),
-                     tmpy3,
-                     tmpw3,
-                     AluInstr::write);
-   group->add_instruction(ir);
-   ir = new AluInstr(op2_add_64,
-                     value_factory.dest(alu.def, 1, pin_chan),
-                     tmpx3,
-                     tmpz3,
-                     AluInstr::write);
-   group->add_instruction(ir);
-   shader.emit_instruction(group);
-
-   return true;
-}
-
 static bool
 emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader)
 {
diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp
index 10032e59991..5189b30bdbf 100644
--- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_64bit.cpp
@@ -136,15 +136,17 @@ class LowerSplit64op : public NirLowerInstruction {
          auto alu = nir_instr_as_alu(instr);
          switch (alu->op) {
          case nir_op_bcsel:
-         case nir_op_b2f64:
             return alu->def.bit_size == 64;
          case nir_op_f2i32:
          case nir_op_f2u32:
          case nir_op_f2i64:
          case nir_op_f2u64:
-         case nir_op_u2f64:
-         case nir_op_i2f64:
             return nir_src_bit_size(alu->src[0].src) == 64;
+         case nir_op_i2f64:
+         case nir_op_u2f64:
+            return nir_src_bit_size(alu->src[0].src) >= 32;
+         case nir_op_b2f64:
+            return true;
          default:
             return false;
          }
@@ -206,19 +208,19 @@ class LowerSplit64op : public NirLowerInstruction {
          }        
          case nir_op_u2f64: {
             auto src = nir_ssa_for_alu_src(b, alu, 0);
-            auto low = nir_unpack_64_2x32_split_x(b, src);
-            auto high = nir_unpack_64_2x32_split_y(b, src);
-            auto flow = nir_u2f64(b, low);
-            auto fhigh = nir_u2f64(b, high);
-            return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow);
+            if (src->bit_size == 64) {
+               return lower_i64_to_f64(src, nir_op_u2f64);
+            } else {
+               return lower_i32_to_f64(src, nir_op_u2f32);
+            }
          }
          case nir_op_i2f64: {
             auto src = nir_ssa_for_alu_src(b, alu, 0);
-            auto low = nir_unpack_64_2x32_split_x(b, src);
-            auto high = nir_unpack_64_2x32_split_y(b, src);
-            auto flow = nir_u2f64(b, low);
-            auto fhigh = nir_i2f64(b, high);
-            return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow);
+            if (src->bit_size == 64) {
+               return lower_i64_to_f64(src, nir_op_i2f64);
+            } else {
+               return lower_i32_to_f64(src, nir_op_i2f32);
+            }
          }
          case nir_op_b2f64: {
             auto src = nir_b2b32(b, nir_ssa_for_alu_src(b, alu, 0));
@@ -251,6 +253,22 @@ class LowerSplit64op : public NirLowerInstruction {
          UNREACHABLE("Trying to lower instruction that was not in filter");
       }
    }
+
+   nir_def *lower_i64_to_f64(nir_def *src, nir_op op)
+   {
+      auto flow = nir_i2f64(b, nir_unpack_64_2x32_split_x(b, src));
+      auto fhigh = nir_build_alu1(b, op, nir_unpack_64_2x32_split_y(b, src));
+      return nir_fadd(b, nir_fmul_imm(b, fhigh, 65536.0 * 65536.0), flow);
+   }
+
+   nir_def *lower_i32_to_f64(nir_def *src, nir_op op)
+   {
+      auto tmplo = nir_u2f32(b, nir_iand(b, src, nir_imm_int(b, 0x000000ff)));
+      auto tmphi =
+         nir_build_alu1(b, op, nir_iand(b, src, nir_imm_int(b, 0xffffff00)));
+      auto f64 = nir_f2f64(b, nir_vec2(b, tmplo, tmphi));
+      return nir_fadd(b, nir_channel(b, f64, 0), nir_channel(b, f64, 1));
+   }
 };
 
 bool