intel/compiler: Use array to iterate the scheduler nodes

For all the preparation data collection before the scheduling actually happens, it is possible to walk the schedule nodes in order by iterating on the range of the array dedicated to a given block. Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Matt Turner <mattst88@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25841>
2026-05-06 07:18:17 +02:00 · 2023-10-20 00:09:37 -07:00 · 2023-10-20 00:09:37 -07:00 · ddff6428c5
commit ddff6428c5
parent fe6ac5a184
1 changed files with 68 additions and 57 deletions
--- a/src/intel/compiler/brw_schedule_instructions.cpp
+++ b/src/intel/compiler/brw_schedule_instructions.cpp
@ -620,7 +620,7 @@ public:
      this->post_reg_alloc = (mode == SCHEDULE_POST);
      this->mode = mode;
      this->reg_pressure = 0;
-      this->block_idx = 0;
+
      this->last_grf_write = linear_zalloc_array(lin_ctx, schedule_node *, grf_count * grf_write_scale);
      if (!post_reg_alloc) {
         this->reg_pressure_in = linear_zalloc_array(lin_ctx, int, block_count);
@ -678,6 +678,12 @@ public:
         n++;
      }
      assert(n == nodes + nodes_len);
+
+      current.block = NULL;
+      current.start = NULL;
+      current.end = NULL;
+      current.len = 0;
+      current.time = 0;
   }

   ~instruction_scheduler()
@ -690,7 +696,7 @@ public:
   void add_dep(schedule_node *before, schedule_node *after);

   void run(cfg_t *cfg);
-   void add_insts_from_block(bblock_t *block);
+   void set_current_block(bblock_t *block);
   void compute_delays();
   void compute_exits();
   virtual void calculate_deps() = 0;
@ -710,7 +716,7 @@ public:
   virtual void update_register_pressure(backend_instruction *inst) = 0;
   virtual int get_register_pressure_benefit(backend_instruction *inst) = 0;

-   void schedule_instructions(bblock_t *block);
+   void schedule_instructions();

   void *mem_ctx;
   linear_ctx *lin_ctx;
@ -718,11 +724,24 @@ public:
   schedule_node *nodes;
   int nodes_len;

+   /* Current block being processed. */
+   struct {
+      bblock_t *block;
+
+      /* Range of nodes in the block.  End will point to first node
+       * address after the block, i.e. the range is [start, end).
+       */
+      schedule_node *start;
+      schedule_node *end;
+      int len;
+
+      int time;
+   } current;
+
   bool post_reg_alloc;
   int grf_count;
   unsigned hw_reg_count;
   int reg_pressure;
-   int block_idx;
   exec_list instructions;
   const backend_shader *bs;

@ -928,6 +947,7 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
 {
   fs_inst *inst = (fs_inst *)be;
   int benefit = 0;
+   const int block_idx = current.block->num;

   if (inst->dst.file == VGRF) {
      if (!BITSET_TEST(livein[block_idx], inst->dst.nr) &&
@ -1003,19 +1023,25 @@ vec4_instruction_scheduler::get_register_pressure_benefit(backend_instruction *)
 }

 void
-instruction_scheduler::add_insts_from_block(bblock_t *block)
+instruction_scheduler::set_current_block(bblock_t *block)
 {
-   schedule_node *start = nodes + block->start_ip;
-   schedule_node *end = nodes + block->end_ip + 1;
-   for (schedule_node *n = start; n < end; n++)
+   current.block = block;
+   current.start = nodes + block->start_ip;
+   current.len = block->end_ip - block->start_ip + 1;
+   current.end = current.start + current.len;
+   current.time = 0;
+
+   assert(instructions.is_empty());
+   for (schedule_node *n = current.start; n < current.end; n++) {
      instructions.push_tail(n);
+   }
 }

 /** Computation of the delay member of each node. */
 void
 instruction_scheduler::compute_delays()
 {
-   foreach_in_list_reverse(schedule_node, n, &instructions) {
+   for (schedule_node *n = current.end - 1; n >= current.start; n--) {
      if (!n->child_count) {
         n->delay = issue_time(n->inst);
      } else {
@ -1034,7 +1060,7 @@ instruction_scheduler::compute_exits()
    * graph.  This is analogous to the node's critical path but calculated
    * from the top instead of from the bottom of the block.
    */
-   foreach_in_list(schedule_node, n, &instructions) {
+   for (schedule_node *n = current.start; n < current.end; n++) {
      for (int i = 0; i < n->child_count; i++) {
         n->children[i]->unblocked_time =
            MAX2(n->children[i]->unblocked_time,
@ -1047,7 +1073,7 @@ instruction_scheduler::compute_exits()
    * nodes of its children which can be unblocked first according to the
    * optimistic unblocked time estimate calculated above.
    */
-   foreach_in_list_reverse(schedule_node, n, &instructions) {
+   for (schedule_node *n = current.end - 1; n >= current.start; n--) {
      n->exit = (n->inst->opcode == BRW_OPCODE_HALT ? n : NULL);

      for (int i = 0; i < n->child_count; i++) {
@ -1150,25 +1176,16 @@ has_cross_lane_access(const fs_inst *inst)
 void
 instruction_scheduler::add_barrier_deps(schedule_node *n)
 {
-   schedule_node *prev = (schedule_node *)n->prev;
-   schedule_node *next = (schedule_node *)n->next;
-
-   if (prev) {
-      while (!prev->is_head_sentinel()) {
-         add_dep(prev, n, 0);
-         if (is_scheduling_barrier(prev->inst))
-            break;
-         prev = (schedule_node *)prev->prev;
-      }
+   for (schedule_node *prev = n - 1; prev >= current.start; prev--) {
+      add_dep(prev, n, 0);
+      if (is_scheduling_barrier(prev->inst))
+         break;
   }

-   if (next) {
-      while (!next->is_tail_sentinel()) {
-         add_dep(n, next, 0);
-         if (is_scheduling_barrier(next->inst))
-            break;
-         next = (schedule_node *)next->next;
-      }
+   for (schedule_node *next = n + 1; next < current.end; next++) {
+      add_dep(n, next, 0);
+      if (is_scheduling_barrier(next->inst))
+         break;
   }
 }

@ -1180,14 +1197,9 @@ instruction_scheduler::add_barrier_deps(schedule_node *n)
 void
 instruction_scheduler::add_cross_lane_deps(schedule_node *n)
 {
-   schedule_node *prev = (schedule_node *)n->prev;
-
-   if (prev) {
-      while (!prev->is_head_sentinel()) {
-         if (has_cross_lane_access((fs_inst *)prev->inst))
-            add_dep(prev, n, 0);
-         prev = (schedule_node *)prev->prev;
-      }
+   for (schedule_node *prev = n - 1; prev >= current.start; prev--) {
+      if (has_cross_lane_access((fs_inst*)prev->inst))
+         add_dep(prev, n, 0);
   }
 }

@ -1215,7 +1227,7 @@ void
 fs_instruction_scheduler::clear_last_grf_write()
 {
   if (!post_reg_alloc) {
-      foreach_in_list(schedule_node, n, &instructions) {
+      for (schedule_node *n = current.start; n < current.end; n++) {
         fs_inst *inst = (fs_inst *)n->inst;

         if (inst->dst.file == VGRF) {
@ -1248,7 +1260,7 @@ fs_instruction_scheduler::calculate_deps()
   memset(last_mrf_write, 0, sizeof(last_mrf_write));

   /* top-to-bottom dependencies: RAW and WAW. */
-   foreach_in_list(schedule_node, n, &instructions) {
+   for (schedule_node *n = current.start; n < current.end; n++) {
      fs_inst *inst = (fs_inst *)n->inst;

      if (is_scheduling_barrier(inst))
@ -1385,7 +1397,7 @@ fs_instruction_scheduler::calculate_deps()
   last_accumulator_write = NULL;
   last_fixed_grf_write = NULL;

-   foreach_in_list_reverse_safe(schedule_node, n, &instructions) {
+   for (schedule_node *n = current.end - 1; n >= current.start; n--) {
      fs_inst *inst = (fs_inst *)n->inst;

      /* write-after-read deps. */
@ -1516,7 +1528,7 @@ vec4_instruction_scheduler::calculate_deps()
   memset(last_mrf_write, 0, sizeof(last_mrf_write));

   /* top-to-bottom dependencies: RAW and WAW. */
-   foreach_in_list(schedule_node, n, &instructions) {
+   for (schedule_node *n = current.start; n < current.end; n++) {
      vec4_instruction *inst = (vec4_instruction *)n->inst;

      if (is_scheduling_barrier(inst))
@ -1605,7 +1617,7 @@ vec4_instruction_scheduler::calculate_deps()
   last_accumulator_write = NULL;
   last_fixed_grf_write = NULL;

-   foreach_in_list_reverse_safe(schedule_node, n, &instructions) {
+   for (schedule_node *n = current.end - 1; n >= current.start; n--) {
      vec4_instruction *inst = (vec4_instruction *)n->inst;

      /* write-after-read deps. */
@ -1842,18 +1854,17 @@ vec4_instruction_scheduler::issue_time(backend_instruction *)
 }

 void
-instruction_scheduler::schedule_instructions(bblock_t *block)
+instruction_scheduler::schedule_instructions()
 {
   const struct intel_device_info *devinfo = bs->devinfo;
-   int time = 0;
-   int instructions_to_schedule = block->end_ip - block->start_ip + 1;

   if (!post_reg_alloc)
-      reg_pressure = reg_pressure_in[block->num];
-   block_idx = block->num;
+      reg_pressure = reg_pressure_in[current.block->num];
+
+   int scheduled = 0;

   /* Remove non-DAG heads from the list. */
-   foreach_in_list_safe(schedule_node, n, &instructions) {
+   for (schedule_node *n = current.start; n < current.end; n++) {
      if (n->parent_count != 0)
         n->remove();
   }
@ -1866,8 +1877,8 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
      assert(chosen);
      chosen->remove();
      chosen->inst->exec_node::remove();
-      block->instructions.push_tail(chosen->inst);
-      instructions_to_schedule--;
+      current.block->instructions.push_tail(chosen->inst);
+      scheduled++;

      if (!post_reg_alloc) {
         reg_pressure -= get_register_pressure_benefit(chosen->inst);
@ -1880,15 +1891,15 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
       * we're unblocked.  After this, we have the time when the chosen
       * instruction will start executing.
       */
-      time = MAX2(time, chosen->unblocked_time);
+      current.time = MAX2(current.time, chosen->unblocked_time);

      /* Update the clock for how soon an instruction could start after the
       * chosen one.
       */
-      time += issue_time(chosen->inst);
+      current.time += issue_time(chosen->inst);

      if (debug) {
-         fprintf(stderr, "clock %4d, scheduled: ", time);
+         fprintf(stderr, "clock %4d, scheduled: ", current.time);
         bs->dump_instruction(chosen->inst);
         if (!post_reg_alloc)
            fprintf(stderr, "(register pressure %d)\n", reg_pressure);
@ -1903,7 +1914,7 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
         schedule_node *child = chosen->children[i];

         child->unblocked_time = MAX2(child->unblocked_time,
-                                      time + chosen->child_latency[i]);
+                                      current.time + chosen->child_latency[i]);

         if (debug) {
            fprintf(stderr, "\tchild %d, %d parents: ", i, child->parent_count);
@ -1930,12 +1941,12 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
         foreach_in_list(schedule_node, n, &instructions) {
            if (n->inst->is_math())
               n->unblocked_time = MAX2(n->unblocked_time,
-                                        time + chosen->latency);
+                                        current.time + chosen->latency);
         }
      }
   }

-   assert(instructions_to_schedule == 0);
+   assert(scheduled == current.len);
 }

 void
@ -1964,14 +1975,14 @@ instruction_scheduler::run(cfg_t *cfg)
            count_reads_remaining(inst);
      }

-      add_insts_from_block(block);
+      set_current_block(block);

      calculate_deps();

      compute_delays();
      compute_exits();

-      schedule_instructions(block);
+      schedule_instructions();
   }

   if (debug && !post_reg_alloc) {