r600/sfn: Splizt ALU blocks in scheduler to fit into 128 slots

With that the backend doesn't split these ALU CFs any more. Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24297>
2025-12-20 20:20:18 +01:00 · 2023-07-21 17:04:46 +02:00 · 2023-07-21 17:04:46 +02:00 · 6aafa2bb49
commit 6aafa2bb49
parent 440cf7439d
1 changed files with 87 additions and 5 deletions
--- a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp
@ -187,6 +187,8 @@ private:
   bool schedule_exports(Shader::ShaderBlocks& out_blocks,
                         std::list<ExportInstr *>& ready_list);
   int maybe_split_alu_block(Shader::ShaderBlocks& out_blocks);
   template <typename I> bool schedule(std::list<I *>& ready_list);
   template <typename I> bool schedule_block(std::list<I *>& ready_list);
@ -513,11 +515,17 @@ BlockScheduler::schedule_block(Block& in_block,
   if (cir.m_cf_instr) {
      // Assert that if condition is ready
      if (m_current_block->type() != Block::alu) {
         start_new_block(out_blocks, Block::alu);
      }
      m_current_block->push_back(cir.m_cf_instr);
      cir.m_cf_instr->set_scheduled();
   }
-   out_blocks.push_back(m_current_block);
+   if (m_current_block->type() == Block::alu)
      maybe_split_alu_block(out_blocks);
   else
      out_blocks.push_back(m_current_block);
 }
 void
@ -677,6 +685,12 @@ BlockScheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
   m_idx1_pending |= m_idx1_loading;
   m_idx1_loading = false;
   if (!m_current_block->lds_group_active() &&
       m_current_block->expected_ar_uses() == 0 &&
       (!addr || is_index)) {
      group->set_instr_flag(Instr::no_lds_or_addr_group);
   }
   if (group->has_lds_group_start())
      m_current_block->lds_group_start(*group->begin());
@ -704,7 +718,7 @@ BlockScheduler::schedule_tex(Shader::ShaderBlocks& out_blocks)
      auto ii = tex_ready.begin();
      sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
-      if (m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size())
+      if ((unsigned)m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size())
         start_new_block(out_blocks, Block::tex);
      for (auto prep : (*ii)->prepare_instr()) {
@ -749,9 +763,13 @@ BlockScheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type ty
   if (!m_current_block->empty()) {
      sfn_log << SfnLog::schedule << "Start new block\n";
      assert(!m_current_block->lds_group_active());
-      out_blocks.push_back(m_current_block);
+
-      m_current_block =
+      int next_id = m_current_block->id() + 1;
-         new Block(m_current_block->nesting_depth(), m_current_block->id());
+      if (m_current_block->type() != Block::alu)
         out_blocks.push_back(m_current_block);
      else
         next_id = maybe_split_alu_block(out_blocks);
      m_current_block = new Block(m_current_block->nesting_depth(), next_id);
      m_current_block->set_instr_flag(Instr::force_cf);
      m_idx0_pending = m_idx1_pending = false;
@ -759,6 +777,70 @@ BlockScheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type ty
   m_current_block->set_type(type, m_chip_class);
 }
 int BlockScheduler::maybe_split_alu_block(Shader::ShaderBlocks& out_blocks)
 {
   // TODO: needs fixing
   if (m_current_block->remaining_slots() > 0) {
      out_blocks.push_back(m_current_block);
      return m_current_block->id() + 1;
   }
   int used_slots = 0;
   int pending_slots = 0;
   Instr *next_block_start = nullptr;
   for (auto cur_group : *m_current_block) {
      /* This limit is a bit fishy, it should be 128 */
      if (used_slots + pending_slots + cur_group->slots() < 128) {
         if (cur_group->can_start_alu_block()) {
            next_block_start = cur_group;
            used_slots += pending_slots;
            pending_slots = cur_group->slots();
         } else {
            pending_slots += cur_group->slots();
         }
      } else {
         assert(next_block_start);
         next_block_start->set_instr_flag(Instr::force_cf);
         used_slots = pending_slots;
         pending_slots = cur_group->slots();
      }
   }
   int sub_block_id = m_current_block->id();
   Block *sub_block = new Block(m_current_block->nesting_depth(),
                                sub_block_id++);
   sub_block->set_type(Block::alu, m_chip_class);
   sub_block->set_instr_flag(Instr::force_cf);
   for (auto instr : *m_current_block) {
      auto group = instr->as_alu_group();
      if (!group) {
            sub_block->push_back(instr);
            continue;
      }
      if (group->group_force_alu_cf()) {
         assert(!sub_block->lds_group_active());
         out_blocks.push_back(sub_block);
         sub_block = new Block(m_current_block->nesting_depth(),
                                         sub_block_id++);
         sub_block->set_type(Block::alu, m_chip_class);
         sub_block->set_instr_flag(Instr::force_cf);
      }
      sub_block->push_back(group);
      if (group->has_lds_group_start())
         sub_block->lds_group_start(*group->begin());
      if (group->has_lds_group_end())
         sub_block->lds_group_end();
   }
   if (!sub_block->empty())
      out_blocks.push_back(sub_block);
   return sub_block_id;
 }
 template <typename I>
 bool
 BlockScheduler::schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)