brw/algebraic: Constant fold multiplicands of MAD

v2: Move the full constant folding part to brw_constant_fold_instruction. Suggested by Caio. I did this by extracting the core part of the folding to a helper function. v3: Delete stale comment. Noticed by Caio. shader-db: All Intel platforms had similar results. (Lunar Lake shown) total instructions in shared programs: 18090847 -> 18090843 (<.01%) instructions in affected programs: 150 -> 146 (-2.67%) helped: 1 / HURT: 0 total cycles in shared programs: 919664648 -> 919663210 (<.01%) cycles in affected programs: 3426 -> 1988 (-41.97%) helped: 1 / HURT: 0 LOST: 1 GAINED: 0 fossil-db: All Intel platforms had similar results. (Lunar Lake shown) Totals: Instrs: 220496486 -> 220496403 (-0.00%) Cycle count: 31610880908 -> 31610879044 (-0.00%); split: -0.00%, +0.00% Totals from 70 (0.01% of 702439) affected shaders: Instrs: 47018 -> 46935 (-0.18%) Cycle count: 6335504 -> 6333640 (-0.03%); split: -0.11%, +0.09% Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Reviewed-by: Matt Turner <mattst88@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32436>
2025-12-21 11:30:11 +01:00 · 2024-11-14 10:04:26 -08:00 · 2024-11-14 10:04:26 -08:00 · b605f76b2a
commit b605f76b2a
parent 3a16ad71b7
1 changed files with 108 additions and 0 deletions
--- a/src/intel/compiler/brw_fs_opt_algebraic.cpp
+++ b/src/intel/compiler/brw_fs_opt_algebraic.cpp
@ -5,6 +5,7 @@

 #include "brw_fs.h"
 #include "brw_fs_builder.h"
+#include "util/half_float.h"

 using namespace brw;

@ -37,6 +38,26 @@ src_as_uint(const brw_reg &src)
   }
 }

+static double
+src_as_float(const brw_reg &src)
+{
+   assert(src.file == IMM);
+
+   switch (src.type) {
+   case BRW_TYPE_HF:
+      return _mesa_half_to_float((uint16_t)src.d);
+
+   case BRW_TYPE_F:
+      return src.f;
+
+   case BRW_TYPE_DF:
+      return src.df;
+
+   default:
+      unreachable("Invalid float type.");
+   }
+}
+
 static brw_reg
 brw_imm_for_type(uint64_t value, enum brw_reg_type type)
 {
@ -64,6 +85,55 @@ brw_imm_for_type(uint64_t value, enum brw_reg_type type)
   }
 }

+/**
+ * Converts a MAD to an ADD by folding the multiplicand sources.
+ */
+static void
+fold_multiplicands_of_MAD(fs_inst *inst)
+{
+   assert(inst->opcode == BRW_OPCODE_MAD);
+   assert (inst->src[1].file == IMM &&
+           inst->src[2].file == IMM &&
+           !brw_type_is_vector_imm(inst->src[1].type) &&
+           !brw_type_is_vector_imm(inst->src[2].type));
+
+   if (brw_type_is_int(inst->src[1].type)) {
+      const uint64_t imm1 = src_as_uint(inst->src[1]);
+      const uint64_t imm2 = src_as_uint(inst->src[2]);
+
+      brw_reg product = brw_imm_ud(imm1 * imm2);
+
+      inst->src[1] = retype(product,
+                            brw_type_larger_of(inst->src[1].type,
+                                               inst->src[2].type));
+   } else {
+      const double product = src_as_float(inst->src[1]) *
+         src_as_float(inst->src[2]);
+
+      switch (brw_type_larger_of(inst->src[1].type,
+                                 inst->src[2].type)) {
+      case BRW_TYPE_HF:
+         inst->src[1] = retype(brw_imm_w(_mesa_float_to_half(product)),
+                               BRW_TYPE_HF);
+         break;
+
+      case BRW_TYPE_F:
+         inst->src[1] = brw_imm_f(product);
+         break;
+
+      case BRW_TYPE_DF:
+         unreachable("float64 should be impossible.");
+         break;
+
+      default:
+         unreachable("Invalid float type.");
+      }
+   }
+
+   inst->opcode = BRW_OPCODE_ADD;
+   inst->resize_sources(2);
+}
+
 bool
 brw_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst)
 {
@ -120,6 +190,25 @@ brw_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst)

      break;

+   case BRW_OPCODE_MAD:
+      if (inst->src[1].file == IMM &&
+          inst->src[2].file == IMM &&
+          inst->src[3].file == IMM &&
+          !brw_type_is_vector_imm(inst->src[1].type) &&
+          !brw_type_is_vector_imm(inst->src[2].type) &&
+          !brw_type_is_vector_imm(inst->src[3].type)) {
+         fold_multiplicands_of_MAD(inst);
+         assert(inst->opcode == BRW_OPCODE_ADD);
+
+         ASSERTED bool folded = brw_constant_fold_instruction(devinfo, inst);
+         assert(folded);
+
+         progress = true;
+         break;
+      }
+
+      break;
+
   case BRW_OPCODE_MUL:
      if (brw_type_is_float(inst->src[1].type))
         break;
@ -560,6 +649,25 @@ brw_fs_opt_algebraic(fs_visitor &s)
         }
         break;
      case BRW_OPCODE_MAD:
+         if (brw_constant_fold_instruction(devinfo, inst)) {
+            progress = true;
+            break;
+         }
+
+         if (inst->src[1].file == IMM &&
+             inst->src[2].file == IMM &&
+             !brw_type_is_vector_imm(inst->src[1].type) &&
+             !brw_type_is_vector_imm(inst->src[2].type)) {
+            fold_multiplicands_of_MAD(inst);
+
+            /* This could result in (x + 0). For floats, we want to leave this
+             * as an ADD so that a subnormal x will get flushed to zero.
+             */
+            assert(inst->opcode == BRW_OPCODE_ADD);
+            progress = true;
+            break;
+         }
+
         if (inst->src[0].type != BRW_TYPE_F ||
             inst->src[1].type != BRW_TYPE_F ||
             inst->src[2].type != BRW_TYPE_F)