From ed70b256cea526dd233bc21a9749ad2bb14e48d9 Mon Sep 17 00:00:00 2001
From: Rhys Perry <pendingchaos02@gmail.com>
Date: Wed, 2 Jun 2021 15:14:41 +0100
Subject: [PATCH] nir: add ffma creation helpers

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8056>
---
 src/compiler/nir/nir.h                |  7 +++++--
 src/compiler/nir/nir_builder.h        | 29 +++++++++++++++++++++++++++
 src/compiler/nir/nir_opt_algebraic.py |  4 ++--
 src/intel/compiler/brw_compiler.c     |  2 +-
 4 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index f750035beae..3eea054e5e7 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3685,8 +3685,11 @@ typedef struct nir_shader_compiler_options {
     */
    bool intel_vec4;
 
-   /** Lower nir_op_ibfe and nir_op_ubfe that have two constant sources. */
-   bool lower_bfe_with_two_constants;
+   /**
+    * For most Intel GPUs, all ternary operations such as FMA and BFE cannot
+    * have immediates, so two to three instructions may eventually be needed.
+    */
+   bool avoid_ternary_with_two_constants;
 
    /** Whether 8-bit ALU is supported. */
    bool support_8bit_alu;
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index e1e248b1514..962fdf0fafd 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -977,6 +977,35 @@ nir_uclamp(nir_builder *b,
    return nir_umin(b, nir_umax(b, x, min_val), max_val);
 }
 
+static inline nir_ssa_def *
+nir_ffma_imm12(nir_builder *build, nir_ssa_def *src0, double src1, double src2)
+{
+   if (build->shader->options->avoid_ternary_with_two_constants)
+      return nir_fadd_imm(build, nir_fmul_imm(build, src0, src1), src2);
+   else
+      return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size),
+                             nir_imm_floatN_t(build, src2, src0->bit_size));
+}
+
+static inline nir_ssa_def *
+nir_ffma_imm1(nir_builder *build, nir_ssa_def *src0, double src1, nir_ssa_def *src2)
+{
+   return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2);
+}
+
+static inline nir_ssa_def *
+nir_ffma_imm2(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1, double src2)
+{
+   return nir_ffma(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size));
+}
+
+static inline nir_ssa_def *
+nir_a_minus_bc(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1,
+               nir_ssa_def *src2)
+{
+   return nir_ffma(build, nir_fneg(build, src1), src2, src0);
+}
+
 static inline nir_ssa_def *
 nir_pack_bits(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)
 {
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 51f2d18c2ca..376069514c3 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -2410,7 +2410,7 @@ late_optimizations = [
    # result, it is very easy for 3-source instruction combined with either
    # loads of immediate values or copies from weird register strides to be
    # more expensive than the primitive instructions it represents.
-   (('ubfe', a, '#b', '#c'), ('iand', ('ushr', 0xffffffff, ('ineg', c)), ('ushr', a, b)), 'options->lower_bfe_with_two_constants'),
+   (('ubfe', a, '#b', '#c'), ('iand', ('ushr', 0xffffffff, ('ineg', c)), ('ushr', a, b)), 'options->avoid_ternary_with_two_constants'),
 
    # b is the lowest order bit to be extracted and c is the number of bits to
    # extract.  The inner shift removes the bits above b + c by shifting left
@@ -2418,7 +2418,7 @@ late_optimizations = [
    # -(b + c).  The outer shift moves the bit that was at b to bit zero.
    # After the first shift, that bit is now at b + (32 - (b + c)) or 32 - c.
    # This means that it must be shifted right by 32 - c or -c bits.
-   (('ibfe', a, '#b', '#c'), ('ishr', ('ishl', a, ('ineg', ('iadd', b, c))), ('ineg', c)), 'options->lower_bfe_with_two_constants'),
+   (('ibfe', a, '#b', '#c'), ('ishr', ('ishl', a, ('ineg', ('iadd', b, c))), ('ineg', c)), 'options->avoid_ternary_with_two_constants'),
 
    # Clean up no-op shifts that may result from the bfe lowerings.
    (('ishl', a, 0), a),
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index 4336ff73a08..ec6b591cd12 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -67,7 +67,7 @@
    .lower_unpack_unorm_4x8 = true,                                            \
    .lower_usub_sat64 = true,                                                  \
    .lower_hadd64 = true,                                                      \
-   .lower_bfe_with_two_constants = true,                                      \
+   .avoid_ternary_with_two_constants = true,                                  \
    .max_unroll_iterations = 32,                                               \
    .force_indirect_unrolling = nir_var_function_temp