brw/algebraic: Refactor constant folding out of brw_fs_opt_algebraic

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31729>
2025-12-22 04:50:11 +01:00 · 2024-10-10 14:07:04 -07:00 · 2024-10-10 14:07:04 -07:00 · 2cc1575a31
commit 2cc1575a31
parent 90ad5d3f06
2 changed files with 176 additions and 134 deletions
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@ -654,6 +654,7 @@ bool brw_fs_lower_subgroup_ops(fs_visitor &s);
 bool brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s);
 void brw_fs_lower_vgrfs_to_fixed_grfs(fs_visitor &s);
 bool brw_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst);
 bool brw_fs_opt_algebraic(fs_visitor &s);
 bool brw_fs_opt_bank_conflicts(fs_visitor &s);
 bool brw_fs_opt_cmod_propagation(fs_visitor &s);
--- a/src/intel/compiler/brw_fs_opt_algebraic.cpp
+++ b/src/intel/compiler/brw_fs_opt_algebraic.cpp
@ -64,6 +64,176 @@ brw_imm_for_type(uint64_t value, enum brw_reg_type type)
   }
 }
 bool
 brw_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst)
 {
   bool progress = false;
   switch (inst->opcode) {
   case BRW_OPCODE_ADD:
      if (inst->src[1].file != IMM)
         break;
      if (brw_type_is_int(inst->src[1].type) &&
          inst->src[1].is_zero()) {
         inst->opcode = BRW_OPCODE_MOV;
         inst->resize_sources(1);
         progress = true;
         break;
      }
      if (inst->src[0].file == IMM) {
         assert(inst->src[0].type == BRW_TYPE_F);
         inst->opcode = BRW_OPCODE_MOV;
         inst->src[0].f += inst->src[1].f;
         inst->resize_sources(1);
         progress = true;
         break;
      }
      break;
   case BRW_OPCODE_AND:
      if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
         const uint64_t src0 = src_as_uint(inst->src[0]);
         const uint64_t src1 = src_as_uint(inst->src[1]);
         inst->opcode = BRW_OPCODE_MOV;
         inst->src[0] = brw_imm_for_type(src0 & src1, inst->dst.type);
         inst->resize_sources(1);
         progress = true;
         break;
      }
      break;
   case BRW_OPCODE_MUL:
      if ((inst->src[0].file != IMM && inst->src[1].file != IMM) ||
          brw_type_is_float(inst->src[1].type))
         break;
      /* From the BDW PRM, Vol 2a, "mul - Multiply":
       *
       *    "When multiplying integer datatypes, if src0 is DW and src1
       *    is W, irrespective of the destination datatype, the
       *    accumulator maintains full 48-bit precision."
       *    ...
       *    "When multiplying integer data types, if one of the sources
       *    is a DW, the resulting full precision data is stored in
       *    the accumulator."
       *
       * There are also similar notes in earlier PRMs.
       *
       * The MOV instruction can copy the bits of the source, but it
       * does not clear the higher bits of the accumulator. So, because
       * we might use the full accumulator in the MUL/MACH macro, we
       * shouldn't replace such MULs with MOVs.
       */
      if ((brw_type_size_bytes(inst->src[0].type) == 4 ||
           brw_type_size_bytes(inst->src[1].type) == 4) &&
          (inst->dst.is_accumulator() ||
           inst->writes_accumulator_implicitly(devinfo)))
         break;
      if (inst->src[0].is_zero() || inst->src[1].is_zero()) {
         inst->opcode = BRW_OPCODE_MOV;
         inst->src[0] = brw_imm_d(0);
         inst->resize_sources(1);
         progress = true;
         break;
      }
      /* a * 1 = a */
      if (inst->src[1].is_one()) {
         inst->opcode = BRW_OPCODE_MOV;
         inst->resize_sources(1);
         return true;
      }
      /* a * -1 = -a */
      if (inst->src[0].is_negative_one()) {
         inst->opcode = BRW_OPCODE_MOV;
         inst->src[0] = inst->src[1];
         inst->src[0].negate = !inst->src[0].negate;
         inst->resize_sources(1);
         progress = true;
         break;
      }
      if (inst->src[1].is_negative_one()) {
         inst->opcode = BRW_OPCODE_MOV;
         inst->src[0].negate = !inst->src[0].negate;
         inst->resize_sources(1);
         progress = true;
         break;
      }
      break;
   case BRW_OPCODE_OR:
      if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
         const uint64_t src0 = src_as_uint(inst->src[0]);
         const uint64_t src1 = src_as_uint(inst->src[1]);
         inst->opcode = BRW_OPCODE_MOV;
         inst->src[0] = brw_imm_for_type(src0 | src1, inst->dst.type);
         inst->resize_sources(1);
         progress = true;
         break;
      }
      break;
   case BRW_OPCODE_SHL:
      if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
         /* It's not currently possible to generate this, and this constant
          * folding does not handle it.
          */
         assert(!inst->saturate);
         brw_reg result;
         switch (brw_type_size_bytes(inst->src[0].type)) {
         case 2:
            result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f)));
            break;
         case 4:
            result = brw_imm_ud(inst->src[0].ud << (inst->src[1].ud & 0x1f));
            break;
         case 8:
            result = brw_imm_uq(inst->src[0].u64 << (inst->src[1].ud & 0x3f));
            break;
         default:
            /* Just in case a future platform re-enables B or UB types. */
            unreachable("Invalid source size.");
         }
         inst->opcode = BRW_OPCODE_MOV;
         inst->src[0] = retype(result, inst->dst.type);
         inst->resize_sources(1);
         progress = true;
      }
      break;
   default:
      break;
   }
 #ifndef NDEBUG
   /* The function is only intended to do constant folding, so the result of
    * progress must be a MOV of an immediate value.
    */
   if (progress) {
      assert(inst->opcode == BRW_OPCODE_MOV);
      assert(inst->src[0].file == IMM);
   }
 #endif
   return progress;
 }
 bool
 brw_fs_opt_algebraic(fs_visitor &s)
 {
@ -107,119 +277,17 @@ brw_fs_opt_algebraic(fs_visitor &s)
         break;
      case BRW_OPCODE_MUL:
         if (inst->src[0].file != IMM && inst->src[1].file != IMM)
            continue;
         if (brw_type_is_float(inst->src[1].type))
            break;
         /* From the BDW PRM, Vol 2a, "mul - Multiply":
          *
          *    "When multiplying integer datatypes, if src0 is DW and src1
          *    is W, irrespective of the destination datatype, the
          *    accumulator maintains full 48-bit precision."
          *    ...
          *    "When multiplying integer data types, if one of the sources
          *    is a DW, the resulting full precision data is stored in
          *    the accumulator."
          *
          * There are also similar notes in earlier PRMs.
          *
          * The MOV instruction can copy the bits of the source, but it
          * does not clear the higher bits of the accumulator. So, because
          * we might use the full accumulator in the MUL/MACH macro, we
          * shouldn't replace such MULs with MOVs.
          */
         if ((brw_type_size_bytes(inst->src[0].type) == 4 ||
              brw_type_size_bytes(inst->src[1].type) == 4) &&
             (inst->dst.is_accumulator() ||
              inst->writes_accumulator_implicitly(devinfo)))
            break;
         if (inst->src[0].is_zero() || inst->src[1].is_zero()) {
            inst->opcode = BRW_OPCODE_MOV;
            inst->src[0] = brw_imm_d(0);
            inst->resize_sources(1);
            progress = true;
            break;
         }
         /* a * 1.0 = a */
         if (inst->src[1].is_one()) {
            inst->opcode = BRW_OPCODE_MOV;
            inst->resize_sources(1);
            progress = true;
            break;
         }
         /* a * -1.0 = -a */
         if (inst->src[0].is_negative_one()) {
            inst->opcode = BRW_OPCODE_MOV;
            inst->src[0] = inst->src[1];
            inst->src[0].negate = !inst->src[0].negate;
            inst->resize_sources(1);
            progress = true;
            break;
         }
         if (inst->src[1].is_negative_one()) {
            inst->opcode = BRW_OPCODE_MOV;
            inst->src[0].negate = !inst->src[0].negate;
            inst->resize_sources(1);
            progress = true;
            break;
         }
         break;
      case BRW_OPCODE_ADD:
         if (inst->src[1].file != IMM)
            continue;
         if (brw_type_is_int(inst->src[1].type) &&
             inst->src[1].is_zero()) {
            inst->opcode = BRW_OPCODE_MOV;
            inst->resize_sources(1);
            progress = true;
            break;
         }
         if (inst->src[0].file == IMM) {
            assert(inst->src[0].type == BRW_TYPE_F);
            inst->opcode = BRW_OPCODE_MOV;
            inst->src[0].f += inst->src[1].f;
            inst->resize_sources(1);
            progress = true;
            break;
         }
         break;
      case BRW_OPCODE_AND:
-         if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
+         if (brw_constant_fold_instruction(devinfo, inst))
            const uint64_t src0 = src_as_uint(inst->src[0]);
            const uint64_t src1 = src_as_uint(inst->src[1]);
            inst->opcode = BRW_OPCODE_MOV;
            inst->src[0] = brw_imm_for_type(src0 & src1, inst->dst.type);
            inst->resize_sources(1);
            progress = true;
            break;
         }
         break;
      case BRW_OPCODE_OR:
-         if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
+         if (brw_constant_fold_instruction(devinfo, inst)) {
            const uint64_t src0 = src_as_uint(inst->src[0]);
            const uint64_t src1 = src_as_uint(inst->src[1]);
            inst->opcode = BRW_OPCODE_MOV;
            inst->src[0] = brw_imm_for_type(src0 | src1, inst->dst.type);
            inst->resize_sources(1);
            progress = true;
-            break;
+         } else if (inst->src[0].equals(inst->src[1]) ||
         }
         if (inst->src[0].equals(inst->src[1]) ||
                    inst->src[1].is_zero()) {
            /* On Gfx8+, the OR instruction can have a source modifier that
             * performs logical not on the operand.  Cases of 'OR r0, ~r1, 0'
@ -388,35 +456,8 @@ brw_fs_opt_algebraic(fs_visitor &s)
         }
         break;
      case BRW_OPCODE_SHL:
-         if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
+         if (brw_constant_fold_instruction(devinfo, inst))
            /* It's not currently possible to generate this, and this constant
             * folding does not handle it.
             */
            assert(!inst->saturate);
            brw_reg result;
            switch (brw_type_size_bytes(inst->src[0].type)) {
            case 2:
               result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f)));
               break;
            case 4:
               result = brw_imm_ud(inst->src[0].ud << (inst->src[1].ud & 0x1f));
               break;
            case 8:
               result = brw_imm_uq(inst->src[0].u64 << (inst->src[1].ud & 0x3f));
               break;
            default:
               /* Just in case a future platform re-enables B or UB types. */
               unreachable("Invalid source size.");
            }
            inst->opcode = BRW_OPCODE_MOV;
            inst->src[0] = retype(result, inst->dst.type);
            inst->resize_sources(1);
            progress = true;
         }
         break;
      case SHADER_OPCODE_BROADCAST: