brw/algebraic: Constant fold multiplicands of MAD

v2: Move the full constant folding part to brw_constant_fold_instruction. Suggested by Caio. I did this by extracting the core part of the folding to a helper function. v3: Delete stale comment. Noticed by Caio. shader-db: All Intel platforms had similar results. (Lunar Lake shown) total instructions in shared programs: 18090847 -> 18090843 (<.01%) instructions in affected programs: 150 -> 146 (-2.67%) helped: 1 / HURT: 0 total cycles in shared programs: 919664648 -> 919663210 (<.01%) cycles in affected programs: 3426 -> 1988 (-41.97%) helped: 1 / HURT: 0 LOST: 1 GAINED: 0 fossil-db: All Intel platforms had similar results. (Lunar Lake shown) Totals: Instrs: 220496486 -> 220496403 (-0.00%) Cycle count: 31610880908 -> 31610879044 (-0.00%); split: -0.00%, +0.00% Totals from 70 (0.01% of 702439) affected shaders: Instrs: 47018 -> 46935 (-0.18%) Cycle count: 6335504 -> 6333640 (-0.03%); split: -0.11%, +0.09% Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Reviewed-by: Matt Turner <mattst88@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32436>
2025-12-21 22:20:14 +01:00 · 2024-11-14 10:04:26 -08:00 · 2024-11-14 10:04:26 -08:00 · b605f76b2a
commit b605f76b2a
parent 3a16ad71b7
1 changed files with 108 additions and 0 deletions
--- a/src/intel/compiler/brw_fs_opt_algebraic.cpp
+++ b/src/intel/compiler/brw_fs_opt_algebraic.cpp
@ -5,6 +5,7 @@
 #include "brw_fs.h"
 #include "brw_fs_builder.h"
 #include "util/half_float.h"
 using namespace brw;
@ -37,6 +38,26 @@ src_as_uint(const brw_reg &src)
   }
 }
 static double
 src_as_float(const brw_reg &src)
 {
   assert(src.file == IMM);
   switch (src.type) {
   case BRW_TYPE_HF:
      return _mesa_half_to_float((uint16_t)src.d);
   case BRW_TYPE_F:
      return src.f;
   case BRW_TYPE_DF:
      return src.df;
   default:
      unreachable("Invalid float type.");
   }
 }
 static brw_reg
 brw_imm_for_type(uint64_t value, enum brw_reg_type type)
 {
@ -64,6 +85,55 @@ brw_imm_for_type(uint64_t value, enum brw_reg_type type)
   }
 }
 /**
 * Converts a MAD to an ADD by folding the multiplicand sources.
 */
 static void
 fold_multiplicands_of_MAD(fs_inst *inst)
 {
   assert(inst->opcode == BRW_OPCODE_MAD);
   assert (inst->src[1].file == IMM &&
           inst->src[2].file == IMM &&
           !brw_type_is_vector_imm(inst->src[1].type) &&
           !brw_type_is_vector_imm(inst->src[2].type));
   if (brw_type_is_int(inst->src[1].type)) {
      const uint64_t imm1 = src_as_uint(inst->src[1]);
      const uint64_t imm2 = src_as_uint(inst->src[2]);
      brw_reg product = brw_imm_ud(imm1 * imm2);
      inst->src[1] = retype(product,
                            brw_type_larger_of(inst->src[1].type,
                                               inst->src[2].type));
   } else {
      const double product = src_as_float(inst->src[1]) *
         src_as_float(inst->src[2]);
      switch (brw_type_larger_of(inst->src[1].type,
                                 inst->src[2].type)) {
      case BRW_TYPE_HF:
         inst->src[1] = retype(brw_imm_w(_mesa_float_to_half(product)),
                               BRW_TYPE_HF);
         break;
      case BRW_TYPE_F:
         inst->src[1] = brw_imm_f(product);
         break;
      case BRW_TYPE_DF:
         unreachable("float64 should be impossible.");
         break;
      default:
         unreachable("Invalid float type.");
      }
   }
   inst->opcode = BRW_OPCODE_ADD;
   inst->resize_sources(2);
 }
 bool
 brw_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst)
 {
@ -120,6 +190,25 @@ brw_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst)
      break;
   case BRW_OPCODE_MAD:
      if (inst->src[1].file == IMM &&
          inst->src[2].file == IMM &&
          inst->src[3].file == IMM &&
          !brw_type_is_vector_imm(inst->src[1].type) &&
          !brw_type_is_vector_imm(inst->src[2].type) &&
          !brw_type_is_vector_imm(inst->src[3].type)) {
         fold_multiplicands_of_MAD(inst);
         assert(inst->opcode == BRW_OPCODE_ADD);
         ASSERTED bool folded = brw_constant_fold_instruction(devinfo, inst);
         assert(folded);
         progress = true;
         break;
      }
      break;
   case BRW_OPCODE_MUL:
      if (brw_type_is_float(inst->src[1].type))
         break;
@ -560,6 +649,25 @@ brw_fs_opt_algebraic(fs_visitor &s)
         }
         break;
      case BRW_OPCODE_MAD:
         if (brw_constant_fold_instruction(devinfo, inst)) {
            progress = true;
            break;
         }
         if (inst->src[1].file == IMM &&
             inst->src[2].file == IMM &&
             !brw_type_is_vector_imm(inst->src[1].type) &&
             !brw_type_is_vector_imm(inst->src[2].type)) {
            fold_multiplicands_of_MAD(inst);
            /* This could result in (x + 0). For floats, we want to leave this
             * as an ADD so that a subnormal x will get flushed to zero.
             */
            assert(inst->opcode == BRW_OPCODE_ADD);
            progress = true;
            break;
         }
         if (inst->src[0].type != BRW_TYPE_F ||
             inst->src[1].type != BRW_TYPE_F ||
             inst->src[2].type != BRW_TYPE_F)