intel/elk: Remove Gfx9+-only passes

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27629>
2026-05-09 04:38:03 +02:00 · 2024-02-13 12:58:15 -08:00 · 2024-02-13 12:58:15 -08:00 · 9f5213923e
commit 9f5213923e
parent fb2eee2aaa
2 changed files with 0 additions and 210 deletions
--- a/src/intel/compiler/elk/elk_fs.cpp
+++ b/src/intel/compiler/elk/elk_fs.cpp
@ -5311,65 +5311,6 @@ elk_fs_visitor::lower_barycentrics()
   return progress;
 }

-/**
- * Lower a derivative instruction as the floating-point difference of two
- * swizzles of the source, specified as \p swz0 and \p swz1.
- */
-static bool
-lower_derivative(elk_fs_visitor *v, elk_bblock_t *block, elk_fs_inst *inst,
-                 unsigned swz0, unsigned swz1)
-{
-   const fs_builder ubld = fs_builder(v, block, inst).exec_all();
-   const elk_fs_reg tmp0 = ubld.vgrf(inst->src[0].type);
-   const elk_fs_reg tmp1 = ubld.vgrf(inst->src[0].type);
-
-   ubld.emit(ELK_SHADER_OPCODE_QUAD_SWIZZLE, tmp0, inst->src[0], elk_imm_ud(swz0));
-   ubld.emit(ELK_SHADER_OPCODE_QUAD_SWIZZLE, tmp1, inst->src[0], elk_imm_ud(swz1));
-
-   inst->resize_sources(2);
-   inst->src[0] = negate(tmp0);
-   inst->src[1] = tmp1;
-   inst->opcode = ELK_OPCODE_ADD;
-
-   return true;
-}
-
-/**
- * Lower derivative instructions on platforms where codegen cannot implement
- * them efficiently (i.e. XeHP).
- */
-bool
-elk_fs_visitor::lower_derivatives()
-{
-   bool progress = false;
-
-   if (devinfo->verx10 < 125)
-      return false;
-
-   foreach_block_and_inst(block, elk_fs_inst, inst, cfg) {
-      if (inst->opcode == ELK_FS_OPCODE_DDX_COARSE)
-         progress |= lower_derivative(this, block, inst,
-                                      ELK_SWIZZLE_XXXX, ELK_SWIZZLE_YYYY);
-
-      else if (inst->opcode == ELK_FS_OPCODE_DDX_FINE)
-         progress |= lower_derivative(this, block, inst,
-                                      ELK_SWIZZLE_XXZZ, ELK_SWIZZLE_YYWW);
-
-      else if (inst->opcode == ELK_FS_OPCODE_DDY_COARSE)
-         progress |= lower_derivative(this, block, inst,
-                                      ELK_SWIZZLE_XXXX, ELK_SWIZZLE_ZZZZ);
-
-      else if (inst->opcode == ELK_FS_OPCODE_DDY_FINE)
-         progress |= lower_derivative(this, block, inst,
-                                      ELK_SWIZZLE_XYXY, ELK_SWIZZLE_ZWZW);
-   }
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
-
-   return progress;
-}
-
 bool
 elk_fs_visitor::lower_find_live_channel()
 {
@ -5844,8 +5785,6 @@ elk_fs_visitor::optimize()
         OPT(opt_algebraic);
   }

-   OPT(fixup_nomask_control_flow);
-
   if (progress) {
      if (OPT(opt_copy_propagation))
         OPT(opt_algebraic);
@ -5897,7 +5836,6 @@ elk_fs_visitor::optimize()
   }

   progress = false;
-   OPT(lower_derivatives);
   OPT(lower_regioning);
   if (progress) {
      if (OPT(opt_copy_propagation))
@ -5935,152 +5873,6 @@ elk_fs_visitor::fixup_3src_null_dest()
                          DEPENDENCY_VARIABLES);
 }

-/**
- * Find the first instruction in the program that might start a region of
- * divergent control flow due to a HALT jump.  There is no
- * find_halt_control_flow_region_end(), the region of divergence extends until
- * the only ELK_SHADER_OPCODE_HALT_TARGET in the program.
- */
-static const elk_fs_inst *
-find_halt_control_flow_region_start(const elk_fs_visitor *v)
-{
-   foreach_block_and_inst(block, elk_fs_inst, inst, v->cfg) {
-      if (inst->opcode == ELK_OPCODE_HALT ||
-          inst->opcode == ELK_SHADER_OPCODE_HALT_TARGET)
-         return inst;
-   }
-
-   return NULL;
-}
-
-/**
- * Work around the Gfx12 hardware bug filed as Wa_1407528679.  EU fusion
- * can cause a BB to be executed with all channels disabled, which will lead
- * to the execution of any NoMask instructions in it, even though any
- * execution-masked instructions will be correctly shot down.  This may break
- * assumptions of some NoMask SEND messages whose descriptor depends on data
- * generated by live invocations of the shader.
- *
- * This avoids the problem by predicating certain instructions on an ANY
- * horizontal predicate that makes sure that their execution is omitted when
- * all channels of the program are disabled.
- */
-bool
-elk_fs_visitor::fixup_nomask_control_flow()
-{
-   if (devinfo->ver != 12)
-      return false;
-
-   const elk_predicate pred = dispatch_width > 16 ? ELK_PREDICATE_ALIGN1_ANY32H :
-                              dispatch_width > 8 ? ELK_PREDICATE_ALIGN1_ANY16H :
-                              ELK_PREDICATE_ALIGN1_ANY8H;
-   const elk_fs_inst *halt_start = find_halt_control_flow_region_start(this);
-   unsigned depth = 0;
-   bool progress = false;
-
-   const fs_live_variables &live_vars = live_analysis.require();
-
-   /* Scan the program backwards in order to be able to easily determine
-    * whether the flag register is live at any point.
-    */
-   foreach_block_reverse_safe(block, cfg) {
-      BITSET_WORD flag_liveout = live_vars.block_data[block->num]
-                                               .flag_liveout[0];
-      STATIC_ASSERT(ARRAY_SIZE(live_vars.block_data[0].flag_liveout) == 1);
-
-      foreach_inst_in_block_reverse_safe(elk_fs_inst, inst, block) {
-         if (!inst->predicate && inst->exec_size >= 8)
-            flag_liveout &= ~inst->flags_written(devinfo);
-
-         switch (inst->opcode) {
-         case ELK_OPCODE_DO:
-         case ELK_OPCODE_IF:
-            /* Note that this doesn't handle ELK_OPCODE_HALT since only
-             * the first one in the program closes the region of divergent
-             * control flow due to any HALT instructions -- Instead this is
-             * handled with the halt_start check below.
-             */
-            depth--;
-            break;
-
-         case ELK_OPCODE_WHILE:
-         case ELK_OPCODE_ENDIF:
-         case ELK_SHADER_OPCODE_HALT_TARGET:
-            depth++;
-            break;
-
-         default:
-            /* Note that the vast majority of NoMask SEND instructions in the
-             * program are harmless while executed in a block with all
-             * channels disabled, since any instructions with side effects we
-             * could hit here should be execution-masked.
-             *
-             * The main concern is NoMask SEND instructions where the message
-             * descriptor or header depends on data generated by live
-             * invocations of the shader (RESINFO and
-             * ELK_FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD with a dynamically
-             * computed surface index seem to be the only examples right now
-             * where this could easily lead to GPU hangs).  Unfortunately we
-             * have no straightforward way to detect that currently, so just
-             * predicate any NoMask SEND instructions we find under control
-             * flow.
-             *
-             * If this proves to have a measurable performance impact it can
-             * be easily extended with a whitelist of messages we know we can
-             * safely omit the predication for.
-             */
-            if (depth && inst->force_writemask_all &&
-                is_send(inst) && !inst->predicate) {
-               /* We need to load the execution mask into the flag register by
-                * using a builder with channel group matching the whole shader
-                * (rather than the default which is derived from the original
-                * instruction), in order to avoid getting a right-shifted
-                * value.
-                */
-               const fs_builder ubld = fs_builder(this, block, inst)
-                                       .exec_all().group(dispatch_width, 0);
-               const elk_fs_reg flag = retype(elk_flag_reg(0, 0),
-                                          ELK_REGISTER_TYPE_UD);
-
-               /* Due to the lack of flag register allocation we need to save
-                * and restore the flag register if it's live.
-                */
-               const bool save_flag = flag_liveout &
-                                      flag_mask(flag, dispatch_width / 8);
-               const elk_fs_reg tmp = ubld.group(8, 0).vgrf(flag.type);
-
-               if (save_flag) {
-                  ubld.group(8, 0).UNDEF(tmp);
-                  ubld.group(1, 0).MOV(tmp, flag);
-               }
-
-               ubld.emit(ELK_FS_OPCODE_LOAD_LIVE_CHANNELS);
-
-               set_predicate(pred, inst);
-               inst->flag_subreg = 0;
-               inst->predicate_trivial = true;
-
-               if (save_flag)
-                  ubld.group(1, 0).at(block, inst->next).MOV(flag, tmp);
-
-               progress = true;
-            }
-            break;
-         }
-
-         if (inst == halt_start)
-            depth--;
-
-         flag_liveout |= inst->flags_read(devinfo);
-      }
-   }
-
-   if (progress)
-      invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
-
-   return progress;
-}
-
 uint32_t
 elk_fs_visitor::compute_max_register_pressure()
 {
--- a/src/intel/compiler/elk/elk_fs.h
+++ b/src/intel/compiler/elk/elk_fs.h
@ -213,7 +213,6 @@ public:
   void allocate_registers(bool allow_spilling);
   uint32_t compute_max_register_pressure();
   void fixup_3src_null_dest();
-   bool fixup_nomask_control_flow();
   void assign_curb_setup();
   void assign_urb_setup();
   void convert_attr_sources_to_hw_regs(elk_fs_inst *inst);
@ -275,7 +274,6 @@ public:
   bool lower_minmax();
   bool lower_simd_width();
   bool lower_barycentrics();
-   bool lower_derivatives();
   bool lower_find_live_channel();
   bool lower_scoreboard();
   bool lower_sub_sat();