intel/brw: Combine constants for src0 of integer multiply too

The majority of cases that would have been affected by this actually had both sources as integer constants. The earlier commit "intel/rt: Don't directly generate umul_32x16" allowed those to be constant folded. v2: Move the a*-1 block to be near the existing a*-1 block. No shader-db changes on any Intel platform. fossil-db results: All Intel platforms had similar results. (Ice Lake shown) Totals: Instrs: 165510246 -> 165510222 (-0.00%) Cycles: 15125198238 -> 15125195835 (-0.00%); split: -0.00%, +0.00% Totals from 46 (0.01% of 656118) affected shaders: Instrs: 36010 -> 35986 (-0.07%) Cycles: 2613658 -> 2611255 (-0.09%); split: -0.17%, +0.07% Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27552>
2025-12-24 15:20:10 +01:00 · 2024-01-16 17:25:39 -08:00 · 2024-01-16 17:25:39 -08:00 · e7480f94c1
commit e7480f94c1
parent dd3bed1d92
3 changed files with 33 additions and 5 deletions
--- a/src/intel/compiler/brw_fs_combine_constants.cpp
+++ b/src/intel/compiler/brw_fs_combine_constants.cpp
@ -1378,6 +1378,7 @@ brw_fs_opt_combine_constants(fs_visitor &s)

      case BRW_OPCODE_ASR:
      case BRW_OPCODE_BFI1:
+      case BRW_OPCODE_MUL:
      case BRW_OPCODE_ROL:
      case BRW_OPCODE_ROR:
      case BRW_OPCODE_SHL:
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@ -1017,12 +1017,12 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
         inst->src[arg] = val;
         progress = true;
      } else if (arg == 0 && inst->src[1].file != IMM) {
-         /* Don't copy propagate the constant in situations like
+         /* We used to not copy propagate the constant in situations like
          *
          *    mov(8)          g8<1>D          0x7fffffffD
          *    mul(8)          g16<1>D         g8<8,8,1>D      g15<16,8,2>W
          *
-          * On platforms that only have a 32x16 multiplier, this will
+          * On platforms that only have a 32x16 multiplier, this would
          * result in lowering the multiply to
          *
          *    mul(8)          g15<1>D         g14<8,8,1>D     0xffffUW
@ -1030,7 +1030,7 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
          *    add(8)          g15.1<2>UW      g15.1<16,8,2>UW g16<16,8,2>UW
          *
          * On Gfx8 and Gfx9, which have the full 32x32 multiplier, it
-          * results in
+          * would results in
          *
          *    mul(8)          g16<1>D         g15<16,8,2>W    0x7fffffffD
          *
@ -1038,11 +1038,19 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
          *
          *    When multiplying a DW and any lower precision integer, the
          *    DW operand must on src0.
+          *
+          * So it would have been invalid. However, brw_fs_combine_constants
+          * will now "fix" the constant.
          */
         if (inst->opcode == BRW_OPCODE_MUL &&
             type_sz(inst->src[1].type) < 4 &&
-             type_sz(val.type) == 4)
+             (inst->src[0].type == BRW_REGISTER_TYPE_D ||
+              inst->src[0].type == BRW_REGISTER_TYPE_UD)) {
+            inst->src[0] = val;
+            inst->src[0].type = BRW_REGISTER_TYPE_D;
+            progress = true;
            break;
+         }

         /* Fit this constant in by commuting the operands.
          * Exception: we can't do this for 32-bit integer MUL/MACH
--- a/src/intel/compiler/brw_fs_opt_algebraic.cpp
+++ b/src/intel/compiler/brw_fs_opt_algebraic.cpp
@ -148,7 +148,7 @@ brw_fs_opt_algebraic(fs_visitor &s)
         break;

      case BRW_OPCODE_MUL:
-         if (inst->src[1].file != IMM)
+         if (inst->src[0].file != IMM && inst->src[1].file != IMM)
            continue;

         if (brw_reg_type_is_floating_point(inst->src[1].type))
@ -177,6 +177,15 @@ brw_fs_opt_algebraic(fs_visitor &s)
              inst->writes_accumulator_implicitly(devinfo)))
            break;

+         if (inst->src[0].is_zero() || inst->src[1].is_zero()) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->sources = 1;
+            inst->src[0] = brw_imm_d(0);
+            inst->src[1] = reg_undef;
+            progress = true;
+            break;
+         }
+
         /* a * 1.0 = a */
         if (inst->src[1].is_one()) {
            inst->opcode = BRW_OPCODE_MOV;
@ -187,6 +196,16 @@ brw_fs_opt_algebraic(fs_visitor &s)
         }

         /* a * -1.0 = -a */
+         if (inst->src[0].is_negative_one()) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->sources = 1;
+            inst->src[0] = inst->src[1];
+            inst->src[0].negate = !inst->src[0].negate;
+            inst->src[1] = reg_undef;
+            progress = true;
+            break;
+         }
+
         if (inst->src[1].is_negative_one()) {
            inst->opcode = BRW_OPCODE_MOV;
            inst->sources = 1;