brw: Remove brw_inst::no_dd_check/no_dd_clear

These dependency hints were primarily useful for the vec4 backend, where it was common to write subsets of a vec4's components across multiple instructions. In the scalar backend, we rarely used them. They also no longer exist on Tigerlake and later in favor of software scoreboarding. Dropping this allows us to clean up the IR a bit. We still use the hardware hints in the generator in a couple places: - Gfx9-12.0 scratch headers - Quad swizzles - Indirect MOV lowering In theory we might want them back if we moved that lowering to the IR. For scratch at least, I suspect it won't have a huge impact, as we're already incurring the cost of spills/fills. The others are fairly rare as well, so it may not be worth keeping. Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36730>
2026-03-17 14:40:43 +01:00 · 2025-08-13 14:05:43 -07:00 · 2025-08-13 14:05:43 -07:00 · 6281a12822
commit 6281a12822
parent b26c68e40a
5 changed files with 45 additions and 64 deletions
--- a/src/intel/compiler/brw_analysis_performance.cpp
+++ b/src/intel/compiler/brw_analysis_performance.cpp
@ -936,26 +936,24 @@ namespace {
      }

      /* Stall on any write dependencies. */
-      if (!inst->no_dd_check) {
-         if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) {
-            for (unsigned j = 0; j < regs_written(inst); j++)
-               stall_on_dependency(
-                  st, reg_dependency_id(devinfo, inst->dst, j));
-         }
+      if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) {
+         for (unsigned j = 0; j < regs_written(inst); j++)
+            stall_on_dependency(
+               st, reg_dependency_id(devinfo, inst->dst, j));
+      }

-         if (inst->writes_accumulator_implicitly(devinfo)) {
-            for (unsigned j = accum_reg_of_channel(devinfo, inst, info.tx, 0);
-                 j <= accum_reg_of_channel(devinfo, inst, info.tx,
-                                           inst->exec_size - 1); j++)
-               stall_on_dependency(
-                  st, reg_dependency_id(devinfo, brw_acc_reg(8), j));
-         }
+      if (inst->writes_accumulator_implicitly(devinfo)) {
+         for (unsigned j = accum_reg_of_channel(devinfo, inst, info.tx, 0);
+              j <= accum_reg_of_channel(devinfo, inst, info.tx,
+                                        inst->exec_size - 1); j++)
+            stall_on_dependency(
+               st, reg_dependency_id(devinfo, brw_acc_reg(8), j));
+      }

-         if (const unsigned mask = inst->flags_written(devinfo)) {
-            for (unsigned i = 0; i < sizeof(mask) * CHAR_BIT; i++) {
-               if (mask & (1 << i))
-                  stall_on_dependency(st, flag_dependency_id(i));
-            }
+      if (const unsigned mask = inst->flags_written(devinfo)) {
+         for (unsigned i = 0; i < sizeof(mask) * CHAR_BIT; i++) {
+            if (mask & (1 << i))
+               stall_on_dependency(st, flag_dependency_id(i));
         }
      }

--- a/src/intel/compiler/brw_generator.cpp
+++ b/src/intel/compiler/brw_generator.cpp
@ -1354,19 +1354,15 @@ brw_generator::generate_code(const brw_shader &s,
      if (multiple_instructions_emitted)
         continue;

-      if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) {
+      if (inst->conditional_mod) {
         assert(p->next_insn_offset == last_insn_offset + 16 ||
-                !"conditional_mod, no_dd_check, or no_dd_clear set for IR "
+                !"conditional_mod for IR "
                 "emitting more than 1 instruction");

         brw_eu_inst *last = &p->store[last_insn_offset / 16];

         if (inst->conditional_mod)
            brw_eu_inst_set_cond_modifier(p->devinfo, last, inst->conditional_mod);
-         if (devinfo->ver < 12) {
-            brw_eu_inst_set_no_dd_clear(p->devinfo, last, inst->no_dd_clear);
-            brw_eu_inst_set_no_dd_check(p->devinfo, last, inst->no_dd_check);
-         }
      }

      /* When enabled, insert sync NOP after every instruction and make sure
--- a/src/intel/compiler/brw_inst.h
+++ b/src/intel/compiler/brw_inst.h
@ -194,8 +194,6 @@ public:
         bool predicate_inverse:1;
         bool writes_accumulator:1; /**< instruction implicitly writes accumulator */
         bool force_writemask_all:1;
-         bool no_dd_clear:1;
-         bool no_dd_check:1;
         bool saturate:1;
         bool check_tdr:1; /**< Only valid for SEND; turns it into a SENDC */
         bool send_has_side_effects:1; /**< Only valid for SHADER_OPCODE_SEND */
--- a/src/intel/compiler/brw_lower_scoreboard.cpp
+++ b/src/intel/compiler/brw_lower_scoreboard.cpp
@ -1272,41 +1272,39 @@ namespace {
            add_dependency(ids, deps[ip],
                           dependency(TGL_SBID_SET, ip, exec_all));

-         if (!inst->no_dd_check) {
-            if (inst->dst.file != BAD_FILE && !inst->dst.is_null() &&
-                !inst->dst.is_accumulator()) {
-               for (unsigned j = 0; j < regs_written(inst); j++) {
-                  add_dependency(ids, deps[ip], dependency_for_write(devinfo, inst,
-                     sb.get(byte_offset(inst->dst, REG_SIZE * j))));
-               }
+         if (inst->dst.file != BAD_FILE && !inst->dst.is_null() &&
+             !inst->dst.is_accumulator()) {
+            for (unsigned j = 0; j < regs_written(inst); j++) {
+               add_dependency(ids, deps[ip], dependency_for_write(devinfo, inst,
+                  sb.get(byte_offset(inst->dst, REG_SIZE * j))));
            }
+         }

-            if (inst->writes_accumulator_implicitly(devinfo) ||
-                inst->dst.is_accumulator()) {
-               /* Wa_22012725308:
-                *
-                * "When the accumulator registers are used as source and/or
-                *  destination, hardware does not ensure prevention of write
-                *  after read hazard across execution pipes."
-                */
-               const dependency dep = sb.get(brw_acc_reg(8));
+         if (inst->writes_accumulator_implicitly(devinfo) ||
+             inst->dst.is_accumulator()) {
+            /* Wa_22012725308:
+             *
+             * "When the accumulator registers are used as source and/or
+             *  destination, hardware does not ensure prevention of write
+             *  after read hazard across execution pipes."
+             */
+            const dependency dep = sb.get(brw_acc_reg(8));
+            if (dep.ordered && !is_single_pipe(dep.jp, p))
+               add_dependency(ids, deps[ip], dep);
+         }
+
+         /* flags_written returns a bit set per byte of the flags register
+          * file that is writtten.
+          */
+         unsigned flags = inst->flags_written(devinfo);
+         for (unsigned i = 0; flags != 0; i++) {
+            if ((flags & 0x0f) != 0) {
+               const dependency dep = sb.get(brw_flag_reg(i, 0));
               if (dep.ordered && !is_single_pipe(dep.jp, p))
                  add_dependency(ids, deps[ip], dep);
            }

-            /* flags_written returns a bit set per byte of the flags register
-             * file that is writtten.
-             */
-            unsigned flags = inst->flags_written(devinfo);
-            for (unsigned i = 0; flags != 0; i++) {
-               if ((flags & 0x0f) != 0) {
-                  const dependency dep = sb.get(brw_flag_reg(i, 0));
-                  if (dep.ordered && !is_single_pipe(dep.jp, p))
-                     add_dependency(ids, deps[ip], dep);
-               }
-
-               flags >>= 4;
-            }
+            flags >>= 4;
         }

         update_inst_scoreboard(shader, jps, inst, ip, sb);
@ -1439,7 +1437,6 @@ namespace {

         /* Update the IR. */
         inst->sched = swsb;
-         inst->no_dd_check = inst->no_dd_clear = false;
         ip++;
      }

--- a/src/intel/compiler/brw_reg_allocate.cpp
+++ b/src/intel/compiler/brw_reg_allocate.cpp
@ -1275,14 +1275,6 @@ brw_reg_alloc::spill_reg(unsigned spill_reg)
          */
         inst->dst.offset %= REG_SIZE * reg_unit(devinfo);

-         /* If we're immediately spilling the register, we should not use
-          * destination dependency hints.  Doing so will cause the GPU do
-          * try to read and write the register at the same time and may
-          * hang the GPU.
-          */
-         inst->no_dd_clear = false;
-         inst->no_dd_check = false;
-
         /* Calculate the execution width of the scratch messages (which work
          * in terms of 32 bit components so we have a fixed number of eight
          * channels per spilled register).  We attempt to write one