From ded8690336efbc781aadaa98e1db3dee54219c4f Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 1 May 2024 16:03:33 -0700
Subject: [PATCH] intel/brw: Remove dsign optimization

This bit from the comment should have been a big red flag:

    There are currently zero instances of fsign(double(x))*IMM in
    shader-db or any test suite, so it is hard to care at this time.

The implementation of that path was incorrect. The XOR instructions
should be predicated like the OR instruction in the non-multiplication
path. As a result, dsign(zero_value) * x will not produce the correct
result.

Instead of fixing this code that is never exercised by anything, replace
it with the simple lowering in NIR.

No shader-db or fossil-db changes on any Intel platform.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29095>
---
 src/intel/compiler/brw_compiler.c |  1 +
 src/intel/compiler/brw_fs_nir.cpp | 40 +------------------------------
 2 files changed, 2 insertions(+), 39 deletions(-)

diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index 9f1aec6b91e..884efae4dff 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -117,6 +117,7 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
       nir_lower_drcp |
       nir_lower_dsqrt |
       nir_lower_drsq |
+      nir_lower_dsign |
       nir_lower_dtrunc |
       nir_lower_dfloor |
       nir_lower_dceil |
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index d1f7dabd103..d4c5e7daec8 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -940,45 +940,7 @@ emit_fsign(nir_to_brw_state &ntb, const fs_builder &bld, const nir_alu_instr *in
 
       inst->predicate = BRW_PREDICATE_NORMAL;
    } else {
-      /* For doubles we do the same but we need to consider:
-       *
-       * - 2-src instructions can't operate with 64-bit immediates
-       * - The sign is encoded in the high 32-bit of each DF
-       * - We need to produce a DF result.
-       */
-
-      fs_reg zero = bld.MOV(brw_imm_df(0.0));
-      bld.CMP(bld.null_reg_df(), op[0], zero, BRW_CONDITIONAL_NZ);
-
-      bld.MOV(result, zero);
-
-      fs_reg r = subscript(result, BRW_TYPE_UD, 1);
-      bld.AND(r, subscript(op[0], BRW_TYPE_UD, 1),
-              brw_imm_ud(0x80000000u));
-
-      if (instr->op == nir_op_fsign) {
-         set_predicate(BRW_PREDICATE_NORMAL,
-                       bld.OR(r, r, brw_imm_ud(0x3ff00000u)));
-      } else {
-         if (devinfo->has_64bit_int) {
-            /* This could be done better in some cases.  If the scale is an
-             * immediate with the low 32-bits all 0, emitting a separate XOR and
-             * OR would allow an algebraic optimization to remove the OR.  There
-             * are currently zero instances of fsign(double(x))*IMM in shader-db
-             * or any test suite, so it is hard to care at this time.
-             */
-            fs_reg result_int64 = retype(result, BRW_TYPE_UQ);
-            inst = bld.XOR(result_int64, result_int64,
-                           retype(op[1], BRW_TYPE_UQ));
-         } else {
-            fs_reg result_int64 = retype(result, BRW_TYPE_UQ);
-            bld.MOV(subscript(result_int64, BRW_TYPE_UD, 0),
-                    subscript(op[1], BRW_TYPE_UD, 0));
-            bld.XOR(subscript(result_int64, BRW_TYPE_UD, 1),
-                    subscript(result_int64, BRW_TYPE_UD, 1),
-                    subscript(op[1], BRW_TYPE_UD, 1));
-         }
-      }
+      unreachable("Should have been lowered by nir_opt_algebraic.");
    }
 }