intel/fs/gen12: Demodernize software scoreboard lowering pass.

Kept as a separate commit in order to avoid distracting reviewers of the software scoreboard pass with memory management boilerplate. Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
2026-01-04 00:30:11 +01:00 · 2019-10-09 18:47:29 -07:00 · 2019-10-09 18:47:29 -07:00 · 396f6b27a7
commit 396f6b27a7
parent 265c7c8971
1 changed files with 163 additions and 81 deletions
--- a/src/intel/compiler/brw_fs_scoreboard.cpp
+++ b/src/intel/compiler/brw_fs_scoreboard.cpp
@ -52,9 +52,6 @@
 *  - tdr0 thread dependency register
 */

-#include <tuple>
-#include <vector>
-
 #include "brw_fs.h"
 #include "brw_cfg.h"

@ -103,19 +100,30 @@ namespace {
    */
   typedef int ordered_address;

+   /**
+    * Return the number of instructions in the program.
+    */
+   unsigned
+   num_instructions(const backend_shader *shader)
+   {
+      return shader->cfg->blocks[shader->cfg->num_blocks - 1]->end_ip + 1;
+   }
+
   /**
    * Calculate the local ordered_address instruction counter at every
    * instruction of the shader for subsequent constant-time look-up.
    */
-   std::vector<ordered_address>
+   ordered_address *
   ordered_inst_addresses(const fs_visitor *shader)
   {
-      std::vector<ordered_address> jps;
+      ordered_address *jps = new ordered_address[num_instructions(shader)];
      ordered_address jp = 0;
+      unsigned ip = 0;

      foreach_block_and_inst(block, fs_inst, inst, shader->cfg) {
-         jps.push_back(jp);
+         jps[ip] = jp;
         jp += ordered_unit(inst);
+         ip++;
      }

      return jps;
@ -174,6 +182,17 @@ namespace {
    * only if i == j for every pair of unsigned integers i and j.
    */
   struct equivalence_relation {
+      equivalence_relation(unsigned n) : is(new unsigned[n]), n(n)
+      {
+         for (unsigned i = 0; i < n; i++)
+            is[i] = i;
+      }
+
+      ~equivalence_relation()
+      {
+         delete[] is;
+      }
+
      /**
       * Return equivalence class index of the specified element.  Effectively
       * this is the numeric value of an arbitrary representative from the
@ -185,7 +204,7 @@ namespace {
      unsigned
      lookup(unsigned i) const
      {
-         if (i < is.size() && is[i] != i)
+         if (i < n && is[i] != i)
            return lookup(is[i]);
         else
            return i;
@ -195,12 +214,13 @@ namespace {
       * Create an array with the results of the lookup() method for
       * constant-time evaluation.
       */
-      std::vector<unsigned>
-      flatten() const {
-         std::vector<unsigned> ids;
+      unsigned *
+      flatten() const
+      {
+         unsigned *ids = new unsigned[n];

-         for (const auto i : is)
-            ids.push_back(lookup(i));
+         for (unsigned i = 0; i < n; i++)
+            ids[i] = lookup(i);

         return ids;
      }
@ -223,6 +243,11 @@ namespace {
      }

   private:
+      equivalence_relation(const equivalence_relation &);
+
+      equivalence_relation &
+      operator=(const equivalence_relation &);
+
      /**
       * Assign the representative of \p from to be equivalent to \p to.
       *
@ -233,17 +258,17 @@ namespace {
      assign(unsigned from, unsigned to)
      {
         if (from != to) {
-            if (from < is.size() && is[from] != from)
-               assign(is[from], to);
+            assert(from < n);

-            for (unsigned i = is.size(); i <= from; i++)
-               is.push_back(i);
+            if (is[from] != from)
+               assign(is[from], to);

            is[from] = to;
         }
      }

-      std::vector<unsigned> is;
+      unsigned *is;
+      unsigned n;
   };

   /**
@ -559,32 +584,74 @@ namespace {
    * Dependency list handling.
    * @{
    */
+   struct dependency_list {
+      dependency_list() : deps(NULL), n(0) {}
+
+      ~dependency_list()
+      {
+         free(deps);
+      }
+
+      void
+      push_back(const dependency &dep)
+      {
+         deps = (dependency *)realloc(deps, (n + 1) * sizeof(*deps));
+         deps[n++] = dep;
+      }
+
+      unsigned
+      size() const
+      {
+         return n;
+      }
+
+      const dependency &
+      operator[](unsigned i) const
+      {
+         assert(i < n);
+         return deps[i];
+      }
+
+      dependency &
+      operator[](unsigned i)
+      {
+         assert(i < n);
+         return deps[i];
+      }
+
+   private:
+      dependency_list(const dependency_list &);
+      dependency_list &
+      operator=(const dependency_list &);
+
+      dependency *deps;
+      unsigned n;
+   };

   /**
    * Add dependency \p dep to the list of dependencies of an instruction
    * \p deps.
    */
   void
-   add_dependency(const std::vector<unsigned> &ids,
-                  std::vector<dependency> &deps, dependency dep)
+   add_dependency(const unsigned *ids, dependency_list &deps, dependency dep)
   {
      if (is_valid(dep)) {
         /* Translate the unordered dependency token first in order to keep
          * the list minimally redundant.
          */
-         if (dep.unordered && dep.id < ids.size())
+         if (dep.unordered)
            dep.id = ids[dep.id];

         /* Try to combine the specified dependency with any existing ones. */
-         for (auto &dep1 : deps) {
-            if (dep.ordered && dep1.ordered) {
-               dep1.jp = MAX2(dep1.jp, dep.jp);
-               dep1.ordered |= dep.ordered;
+         for (unsigned i = 0; i < deps.size(); i++) {
+            if (dep.ordered && deps[i].ordered) {
+               deps[i].jp = MAX2(deps[i].jp, dep.jp);
+               deps[i].ordered |= dep.ordered;
               dep.ordered = TGL_REGDIST_NULL;
            }

-            if (dep.unordered && dep1.unordered && dep1.id == dep.id) {
-               dep1.unordered |= dep.unordered;
+            if (dep.unordered && deps[i].unordered && deps[i].id == dep.id) {
+               deps[i].unordered |= dep.unordered;
               dep.unordered = TGL_SBID_NULL;
            }
         }
@ -601,16 +668,16 @@ namespace {
    * \p jp.
    */
   tgl_swsb
-   ordered_dependency_swsb(const std::vector<dependency> &deps,
+   ordered_dependency_swsb(const dependency_list &deps,
                           const ordered_address &jp)
   {
      unsigned min_dist = ~0u;

-      for (const auto &dep : deps) {
-         if (dep.ordered) {
-            const unsigned dist = jp - dep.jp;
+      for (unsigned i = 0; i < deps.size(); i++) {
+         if (deps[i].ordered) {
+            const unsigned dist = jp - deps[i].jp;
            const unsigned max_dist = 10;
-            assert(jp > dep.jp);
+            assert(jp > deps[i].jp);
            if (dist <= max_dist)
               min_dist = MIN3(min_dist, dist, 7);
         }
@ -624,7 +691,7 @@ namespace {
    * ordered_address \p jp has any non-trivial ordered dependencies.
    */
   bool
-   find_ordered_dependency(const std::vector<dependency> &deps,
+   find_ordered_dependency(const dependency_list &deps,
                           const ordered_address &jp)
   {
      return ordered_dependency_swsb(deps, jp).regdist;
@ -636,13 +703,13 @@ namespace {
    * no such dependency is present.
    */
   tgl_sbid_mode
-   find_unordered_dependency(const std::vector<dependency> &deps,
+   find_unordered_dependency(const dependency_list &deps,
                             tgl_sbid_mode unordered)
   {
      if (unordered) {
-         for (const auto &dep : deps) {
-            if (unordered & dep.unordered)
-               return dep.unordered;
+         for (unsigned i = 0; i < deps.size(); i++) {
+            if (unordered & deps[i].unordered)
+               return deps[i].unordered;
         }
      }

@ -657,7 +724,7 @@ namespace {
    */
   tgl_sbid_mode
   baked_unordered_dependency_mode(const fs_inst *inst,
-                                   const std::vector<dependency> &deps,
+                                   const dependency_list &deps,
                                   const ordered_address &jp)
   {
      const bool has_ordered = find_ordered_dependency(deps, jp);
@ -687,8 +754,7 @@ namespace {
    * instruction \p inst.
    */
   void
-   update_inst_scoreboard(const fs_visitor *shader,
-                          const std::vector<ordered_address> &jps,
+   update_inst_scoreboard(const fs_visitor *shader, const ordered_address *jps,
                          const fs_inst *inst, unsigned ip, scoreboard &sb)
   {
      /* Track any source registers that may be fetched asynchronously by this
@ -730,11 +796,11 @@ namespace {
    * unconditionally resolved) dependencies at the end of each block of the
    * program.
    */
-   std::vector<scoreboard>
+   scoreboard *
   gather_block_scoreboards(const fs_visitor *shader,
-                            const std::vector<ordered_address> &jps)
+                            const ordered_address *jps)
   {
-      std::vector<scoreboard> sbs(shader->cfg->num_blocks);
+      scoreboard *sbs = new scoreboard[shader->cfg->num_blocks];
      unsigned ip = 0;

      foreach_block_and_inst(block, fs_inst, inst, shader->cfg)
@ -750,15 +816,14 @@ namespace {
    * Calculates the set of dependencies potentially pending at the beginning
    * of each block, and returns it as an array of scoreboard objects.
    */
-   std::pair<std::vector<scoreboard>, std::vector<unsigned>>
+   scoreboard *
   propagate_block_scoreboards(const fs_visitor *shader,
-                               const std::vector<ordered_address> &jps)
+                               const ordered_address *jps,
+                               equivalence_relation &eq)
   {
-      const std::vector<scoreboard> delta_sbs =
-         gather_block_scoreboards(shader, jps);
-      std::vector<scoreboard> in_sbs(shader->cfg->num_blocks);
-      std::vector<scoreboard> out_sbs(shader->cfg->num_blocks);
-      equivalence_relation eq;
+      const scoreboard *delta_sbs = gather_block_scoreboards(shader, jps);
+      scoreboard *in_sbs = new scoreboard[shader->cfg->num_blocks];
+      scoreboard *out_sbs = new scoreboard[shader->cfg->num_blocks];

      for (bool progress = true; progress;) {
         progress = false;
@ -784,63 +849,66 @@ namespace {
         }
      }

-      return { std::move(in_sbs), eq.flatten() };
+      delete[] delta_sbs;
+      delete[] out_sbs;
+
+      return in_sbs;
   }

   /**
    * Return the list of potential dependencies of each instruction in the
    * shader based on the result of global dependency analysis.
    */
-   std::vector<std::vector<dependency>>
+   dependency_list *
   gather_inst_dependencies(const fs_visitor *shader,
-                            const std::vector<ordered_address> &jps)
+                            const ordered_address *jps)
   {
-      std::vector<scoreboard> sbs;
-      std::vector<unsigned> ids;
-      std::vector<std::vector<dependency>> deps;
+      equivalence_relation eq(num_instructions(shader));
+      scoreboard *sbs = propagate_block_scoreboards(shader, jps, eq);
+      const unsigned *ids = eq.flatten();
+      dependency_list *deps = new dependency_list[num_instructions(shader)];
      unsigned ip = 0;

-      std::tie(sbs, ids) = propagate_block_scoreboards(shader, jps);
-
      foreach_block_and_inst(block, fs_inst, inst, shader->cfg) {
         scoreboard &sb = sbs[block->num];
-         std::vector<dependency> inst_deps;

         for (unsigned i = 0; i < inst->sources; i++) {
            for (unsigned j = 0; j < regs_read(inst, i); j++)
-               add_dependency(ids, inst_deps, dependency_for_read(
+               add_dependency(ids, deps[ip], dependency_for_read(
                  sb.get(byte_offset(inst->src[i], REG_SIZE * j))));
         }

         if (is_send(inst) && inst->base_mrf != -1) {
            for (unsigned j = 0; j < inst->mlen; j++)
-               add_dependency(ids, inst_deps, dependency_for_read(
+               add_dependency(ids, deps[ip], dependency_for_read(
                  sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0))));
         }

         if (is_unordered(inst))
-            add_dependency(ids, inst_deps, dependency(TGL_SBID_SET, ip));
+            add_dependency(ids, deps[ip], dependency(TGL_SBID_SET, ip));

         if (!inst->no_dd_check) {
            if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) {
               for (unsigned j = 0; j < regs_written(inst); j++) {
-                  add_dependency(ids, inst_deps, dependency_for_write(inst,
+                  add_dependency(ids, deps[ip], dependency_for_write(inst,
                     sb.get(byte_offset(inst->dst, REG_SIZE * j))));
               }
            }

            if (is_send(inst) && inst->base_mrf != -1) {
               for (int j = 0; j < shader->implied_mrf_writes(inst); j++)
-                  add_dependency(ids, inst_deps, dependency_for_write(inst,
+                  add_dependency(ids, deps[ip], dependency_for_write(inst,
                     sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0))));
            }
         }

-         deps.push_back(inst_deps);
         update_inst_scoreboard(shader, jps, inst, ip, sb);
         ip++;
      }

+      delete[] sbs;
+      delete[] ids;
+
      return deps;
   }

@ -850,30 +918,39 @@ namespace {
    * Allocate SBID tokens to track the execution of every out-of-order
    * instruction of the shader.
    */
-   std::vector<std::vector<dependency>>
+   dependency_list *
   allocate_inst_dependencies(const fs_visitor *shader,
-                              const std::vector<std::vector<dependency>> &deps0)
+                              const dependency_list *deps0)
   {
      /* XXX - Use bin-packing algorithm to assign hardware SBIDs optimally in
       *       shaders with a large number of SEND messages.
       */
-      std::vector<std::vector<dependency>> deps1;
-      std::vector<unsigned> ids(deps0.size(), ~0u);
+
+      /* Allocate an unordered dependency ID to hardware SBID translation
+       * table with as many entries as instructions there are in the shader,
+       * which is the maximum number of unordered IDs we can find in the
+       * program.
+       */
+      unsigned *ids = new unsigned[num_instructions(shader)];
+      for (unsigned ip = 0; ip < num_instructions(shader); ip++)
+         ids[ip] = ~0u;
+
+      dependency_list *deps1 = new dependency_list[num_instructions(shader)];
      unsigned next_id = 0;

-      for (const auto &inst_deps0 : deps0) {
-         std::vector<dependency> inst_deps1;
+      for (unsigned ip = 0; ip < num_instructions(shader); ip++) {
+         for (unsigned i = 0; i < deps0[ip].size(); i++) {
+            const dependency &dep = deps0[ip][i];

-         for (const auto &dep : inst_deps0) {
            if (dep.unordered && ids[dep.id] == ~0u)
               ids[dep.id] = (next_id++) & 0xf;

-            add_dependency(ids, inst_deps1, dep);
+            add_dependency(ids, deps1[ip], dep);
         }
-
-         deps1.push_back(inst_deps1);
      }

+      delete[] ids;
+
      return deps1;
   }

@ -884,8 +961,8 @@ namespace {
    */
   void
   emit_inst_dependencies(fs_visitor *shader,
-                          const std::vector<ordered_address> &jps,
-                          const std::vector<std::vector<dependency>> &deps)
+                          const ordered_address *jps,
+                          const dependency_list *deps)
   {
      unsigned ip = 0;

@ -894,7 +971,9 @@ namespace {
         const tgl_sbid_mode unordered_mode =
            baked_unordered_dependency_mode(inst, deps[ip], jps[ip]);

-         for (const auto &dep : deps[ip]) {
+         for (unsigned i = 0; i < deps[ip].size(); i++) {
+            const dependency &dep = deps[ip][i];
+
            if (dep.unordered) {
               if (unordered_mode == dep.unordered && !swsb.mode) {
                  /* Bake unordered dependency into the instruction's SWSB if
@ -929,10 +1008,13 @@ bool
 fs_visitor::lower_scoreboard()
 {
   if (devinfo->gen >= 12) {
-      const std::vector<ordered_address> jps = ordered_inst_addresses(this);
-      emit_inst_dependencies(this, jps,
-         allocate_inst_dependencies(this,
-            gather_inst_dependencies(this, jps)));
+      const ordered_address *jps = ordered_inst_addresses(this);
+      const dependency_list *deps0 = gather_inst_dependencies(this, jps);
+      const dependency_list *deps1 = allocate_inst_dependencies(this, deps0);
+      emit_inst_dependencies(this, jps, deps1);
+      delete[] deps1;
+      delete[] deps0;
+      delete[] jps;
   }

   return true;