r600/sfn: Splizt ALU blocks in scheduler to fit into 128 slots

With that the backend doesn't split these ALU CFs any more.

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24297>
This commit is contained in:
Gert Wollny 2023-07-21 17:04:46 +02:00 committed by Marge Bot
parent 440cf7439d
commit 6aafa2bb49

View file

@ -187,6 +187,8 @@ private:
bool schedule_exports(Shader::ShaderBlocks& out_blocks, bool schedule_exports(Shader::ShaderBlocks& out_blocks,
std::list<ExportInstr *>& ready_list); std::list<ExportInstr *>& ready_list);
int maybe_split_alu_block(Shader::ShaderBlocks& out_blocks);
template <typename I> bool schedule(std::list<I *>& ready_list); template <typename I> bool schedule(std::list<I *>& ready_list);
template <typename I> bool schedule_block(std::list<I *>& ready_list); template <typename I> bool schedule_block(std::list<I *>& ready_list);
@ -513,11 +515,17 @@ BlockScheduler::schedule_block(Block& in_block,
if (cir.m_cf_instr) { if (cir.m_cf_instr) {
// Assert that if condition is ready // Assert that if condition is ready
if (m_current_block->type() != Block::alu) {
start_new_block(out_blocks, Block::alu);
}
m_current_block->push_back(cir.m_cf_instr); m_current_block->push_back(cir.m_cf_instr);
cir.m_cf_instr->set_scheduled(); cir.m_cf_instr->set_scheduled();
} }
out_blocks.push_back(m_current_block); if (m_current_block->type() == Block::alu)
maybe_split_alu_block(out_blocks);
else
out_blocks.push_back(m_current_block);
} }
void void
@ -677,6 +685,12 @@ BlockScheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
m_idx1_pending |= m_idx1_loading; m_idx1_pending |= m_idx1_loading;
m_idx1_loading = false; m_idx1_loading = false;
if (!m_current_block->lds_group_active() &&
m_current_block->expected_ar_uses() == 0 &&
(!addr || is_index)) {
group->set_instr_flag(Instr::no_lds_or_addr_group);
}
if (group->has_lds_group_start()) if (group->has_lds_group_start())
m_current_block->lds_group_start(*group->begin()); m_current_block->lds_group_start(*group->begin());
@ -704,7 +718,7 @@ BlockScheduler::schedule_tex(Shader::ShaderBlocks& out_blocks)
auto ii = tex_ready.begin(); auto ii = tex_ready.begin();
sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n"; sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
if (m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size()) if ((unsigned)m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size())
start_new_block(out_blocks, Block::tex); start_new_block(out_blocks, Block::tex);
for (auto prep : (*ii)->prepare_instr()) { for (auto prep : (*ii)->prepare_instr()) {
@ -749,9 +763,13 @@ BlockScheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type ty
if (!m_current_block->empty()) { if (!m_current_block->empty()) {
sfn_log << SfnLog::schedule << "Start new block\n"; sfn_log << SfnLog::schedule << "Start new block\n";
assert(!m_current_block->lds_group_active()); assert(!m_current_block->lds_group_active());
out_blocks.push_back(m_current_block);
m_current_block = int next_id = m_current_block->id() + 1;
new Block(m_current_block->nesting_depth(), m_current_block->id()); if (m_current_block->type() != Block::alu)
out_blocks.push_back(m_current_block);
else
next_id = maybe_split_alu_block(out_blocks);
m_current_block = new Block(m_current_block->nesting_depth(), next_id);
m_current_block->set_instr_flag(Instr::force_cf); m_current_block->set_instr_flag(Instr::force_cf);
m_idx0_pending = m_idx1_pending = false; m_idx0_pending = m_idx1_pending = false;
@ -759,6 +777,70 @@ BlockScheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type ty
m_current_block->set_type(type, m_chip_class); m_current_block->set_type(type, m_chip_class);
} }
int BlockScheduler::maybe_split_alu_block(Shader::ShaderBlocks& out_blocks)
{
// TODO: needs fixing
if (m_current_block->remaining_slots() > 0) {
out_blocks.push_back(m_current_block);
return m_current_block->id() + 1;
}
int used_slots = 0;
int pending_slots = 0;
Instr *next_block_start = nullptr;
for (auto cur_group : *m_current_block) {
/* This limit is a bit fishy, it should be 128 */
if (used_slots + pending_slots + cur_group->slots() < 128) {
if (cur_group->can_start_alu_block()) {
next_block_start = cur_group;
used_slots += pending_slots;
pending_slots = cur_group->slots();
} else {
pending_slots += cur_group->slots();
}
} else {
assert(next_block_start);
next_block_start->set_instr_flag(Instr::force_cf);
used_slots = pending_slots;
pending_slots = cur_group->slots();
}
}
int sub_block_id = m_current_block->id();
Block *sub_block = new Block(m_current_block->nesting_depth(),
sub_block_id++);
sub_block->set_type(Block::alu, m_chip_class);
sub_block->set_instr_flag(Instr::force_cf);
for (auto instr : *m_current_block) {
auto group = instr->as_alu_group();
if (!group) {
sub_block->push_back(instr);
continue;
}
if (group->group_force_alu_cf()) {
assert(!sub_block->lds_group_active());
out_blocks.push_back(sub_block);
sub_block = new Block(m_current_block->nesting_depth(),
sub_block_id++);
sub_block->set_type(Block::alu, m_chip_class);
sub_block->set_instr_flag(Instr::force_cf);
}
sub_block->push_back(group);
if (group->has_lds_group_start())
sub_block->lds_group_start(*group->begin());
if (group->has_lds_group_end())
sub_block->lds_group_end();
}
if (!sub_block->empty())
out_blocks.push_back(sub_block);
return sub_block_id;
}
template <typename I> template <typename I>
bool bool
BlockScheduler::schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list) BlockScheduler::schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)