From 8a7d34e3bdf35dee35bd1d1fd43738d8ebe05218 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Thu, 21 Jul 2022 17:52:48 +0200 Subject: [PATCH] r600/sfn: Fix the kcache failure handling Instead of starting a new block when the kcache handling failed, try to continue scheduling instructions until kcache allocation fails for all ready instruction. With that we avoid a CF split withing an LDS fetch/read group. Signed-off-by: Gert Wollny Part-of: --- src/gallium/drivers/r600/sfn/sfn_instr.cpp | 75 ++++++--- src/gallium/drivers/r600/sfn/sfn_instr.h | 10 +- .../drivers/r600/sfn/sfn_scheduler.cpp | 144 ++++++++++-------- 3 files changed, 138 insertions(+), 91 deletions(-) diff --git a/src/gallium/drivers/r600/sfn/sfn_instr.cpp b/src/gallium/drivers/r600/sfn/sfn_instr.cpp index d81e329531e..6ab2518d3fa 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr.cpp @@ -302,17 +302,43 @@ void Block::push_back(PInst instr) bool Block::try_reserve_kcache(const AluGroup& group) { + auto kcache = m_kcache; + auto kcache_constants = group.get_kconsts(); for (auto& kc : kcache_constants) { auto u = kc->as_uniform(); assert(u); - if (!try_reserve_kcache(*u)) + if (!try_reserve_kcache(*u, kcache)) { + m_kcache_alloc_failed = true; return false; + } } + + m_kcache = kcache; + m_kcache_alloc_failed = false; return true; } -bool Block::try_reserve_kcache(const UniformValue& u) +bool Block::try_reserve_kcache(const AluInstr& instr) +{ + auto kcache = m_kcache; + + for (auto& src : instr.sources()) { + auto u = src->as_uniform(); + if (u) { + if (!try_reserve_kcache(*u, kcache)) { + m_kcache_alloc_failed = true; + return false; + } + } + } + m_kcache = kcache; + m_kcache_alloc_failed = false; + return true; +} + +bool Block::try_reserve_kcache(const UniformValue& u, + std::array& kcache) const { const int kcache_banks = 4; // TODO: handle pre-evergreen @@ -323,49 +349,50 @@ bool Block::try_reserve_kcache(const UniformValue& u) bool found = false; for (int i = 0; i < kcache_banks && !found; ++i) { - if (m_kcache[i].mode) { - if (m_kcache[i].bank < bank) + if (kcache[i].mode) { + if (kcache[i].bank < bank) continue; - if ((m_kcache[i].bank == bank && - m_kcache[i].addr > line + 1) || - m_kcache[i].bank > bank) { - if (m_kcache[kcache_banks - 1].mode) + if ((kcache[i].bank == bank && + kcache[i].addr > line + 1) || + kcache[i].bank > bank) { + if (kcache[kcache_banks - 1].mode) return false; - memmove(&m_kcache[i+1],&m_kcache[i], (kcache_banks-i-1)*sizeof(KCacheLine)); - m_kcache[i].mode = KCacheLine::lock_1; - m_kcache[i].bank = bank; - m_kcache[i].addr = line; + memmove(&kcache[i+1],&kcache[i], (kcache_banks-i-1)*sizeof(KCacheLine)); + kcache[i].mode = KCacheLine::lock_1; + kcache[i].bank = bank; + kcache[i].addr = line; return true; } - int d = line - m_kcache[i].addr; + int d = line - kcache[i].addr; if (d == -1) { - m_kcache[i].addr--; - if (m_kcache[i].mode == KCacheLine::lock_2) { + kcache[i].addr--; + if (kcache[i].mode == KCacheLine::lock_2) { /* we are prepending the line to the current set, - * discarding the existing second line, - * so we'll have to insert line+2 after it */ + * discarding the existing second line, + * so we'll have to insert line+2 after it */ line += 2; continue; - } else if (m_kcache[i].mode == KCacheLine::lock_1) { - m_kcache[i].mode = KCacheLine::lock_2; + } else if (kcache[i].mode == KCacheLine::lock_1) { + kcache[i].mode = KCacheLine::lock_2; return true; } else { /* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */ return false; } } else if (d == 1) { - m_kcache[i].mode = KCacheLine::lock_2; + kcache[i].mode = KCacheLine::lock_2; return true; - } else if (d == 0) + } else if (d == 0) { return true; + } } else { /* free kcache set - use it */ - m_kcache[i].mode = KCacheLine::lock_1; - m_kcache[i].bank = bank; - m_kcache[i].addr = line; + kcache[i].mode = KCacheLine::lock_1; + kcache[i].bank = bank; + kcache[i].addr = line; return true; } } diff --git a/src/gallium/drivers/r600/sfn/sfn_instr.h b/src/gallium/drivers/r600/sfn/sfn_instr.h index c70427e8575..19f118149bd 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr.h +++ b/src/gallium/drivers/r600/sfn/sfn_instr.h @@ -196,7 +196,8 @@ public: void set_type(Type t); uint32_t remaining_slots() const { return m_remaining_slots;} - bool try_reserve_kcache(const AluGroup& group); + bool try_reserve_kcache(const AluGroup& instr); + bool try_reserve_kcache(const AluInstr& group); auto last_lds_instr() {return m_last_lds_instr;} void set_last_lds_instr(Instr *instr) {m_last_lds_instr = instr;} @@ -207,8 +208,11 @@ public: size_t size() const { return m_instructions.size();} + bool kcache_reservation_failed() const { return m_kcache_alloc_failed;} + private: - bool try_reserve_kcache(const UniformValue& u); + bool try_reserve_kcache(const UniformValue& u, + std::array& kcache) const; bool do_ready() const override {return true;}; void do_print(std::ostream& os) const override; @@ -221,11 +225,13 @@ private: uint32_t m_remaining_slots{0xffff}; std::array m_kcache; + bool m_kcache_alloc_failed{false}; Instr *m_last_lds_instr{nullptr}; int m_lds_group_requirement{0}; AluInstr *m_lds_group_start{nullptr}; + }; class InstrWithVectorResult : public Instr { diff --git a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp index faf116a5cdb..87ab579f8a3 100644 --- a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp @@ -489,83 +489,84 @@ bool BlockSheduler::schedule_alu(Shader::ShaderBlocks& out_blocks) bool has_lds_ready = !alu_vec_ready.empty() && (*alu_vec_ready.begin())->has_lds_access(); + /* If we have ready ALU instructions we have to start a new ALU block */ + if (has_alu_ready || !alu_groups_ready.empty()) { + if (m_current_block->type() != Block::alu) { + start_new_block(out_blocks, Block::alu); + m_alu_groups_schduled = 0; + } + } + /* Schedule groups first. unless we have a pending LDS instuction * We don't want the LDS instructions to be too far apart because the * fetch + read from queue has to be in the same ALU CF block */ if (!alu_groups_ready.empty() && !has_lds_ready) { group = *alu_groups_ready.begin(); - alu_groups_ready.erase(alu_groups_ready.begin()); - sfn_log << SfnLog::schedule << "Schedule ALU group\n"; - success = true; - } else { - if (has_alu_ready) { - group = new AluGroup(); - sfn_log << SfnLog::schedule << "START new ALU group\n"; - } - } - - if (group) { - int free_slots = group->free_slots(); - - if (free_slots && has_alu_ready) { - if (!alu_vec_ready.empty()) - success |= schedule_alu_to_group_vec(group); - - /* Apparently one can't schedule a t-slot if there is already - * and LDS instruction scheduled. - * TODO: check whether this is only relevant for actual LDS instructions - * or also for instructions that read from the LDS return value queue */ - - if (free_slots & 0x10 && !has_lds_ready) { - sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n"; - if (!alu_trans_ready.empty()) - success |= schedule_alu_to_group_trans(group, alu_trans_ready); - if (!alu_vec_ready.empty()) - success |= schedule_alu_to_group_trans(group, alu_vec_ready); - } - } - - sfn_log << SfnLog::schedule << "Finalize ALU group\n"; - group->set_scheduled(); - group->fix_last_flag(); - group->set_nesting_depth(m_current_block->nesting_depth()); - - - if (m_current_block->type() != Block::alu) { - start_new_block(out_blocks, Block::alu); - m_alu_groups_schduled = 0; - } - - /* Pessimistic hack: If we have started an LDS group, - * make sure 8 instructions groups still fit into the CF - * TODO: take care of Address slot emission - * TODO: maybe do this CF split only in the assembler - */ - /*if (group->slots() > m_current_block->remaining_slots() || - (group->has_lds_group_start() && - m_current_block->remaining_slots() < 7 * 8)) { - //assert(!m_current_block->lds_group_active()); - start_new_block(out_blocks, Block::alu); - }*/ - if (!m_current_block->try_reserve_kcache(*group)) { - assert(!m_current_block->lds_group_active()); start_new_block(out_blocks, Block::alu); m_current_block->set_instr_flag(Instr::force_cf); } - assert(m_current_block->try_reserve_kcache(*group)); - - if (group->has_lds_group_start()) - m_current_block->lds_group_start(*group->begin()); - - m_current_block->push_back(group); - if (group->has_lds_group_end()) - m_current_block->lds_group_end(); + if (!m_current_block->try_reserve_kcache(*group)) + unreachable("Scheduling a group in a new block should always succeed"); + alu_groups_ready.erase(alu_groups_ready.begin()); + sfn_log << SfnLog::schedule << "Schedule ALU group\n"; + success = true; + } else if (has_alu_ready) { + group = new AluGroup(); + sfn_log << SfnLog::schedule << "START new ALU group\n"; + } else { + return false; } - if (success) - ++m_alu_groups_schduled; + assert(group); + + int free_slots = group->free_slots(); + + while (free_slots && has_alu_ready) { + if (!alu_vec_ready.empty()) + success |= schedule_alu_to_group_vec(group); + + /* Apparently one can't schedule a t-slot if there is already + * and LDS instruction scheduled. + * TODO: check whether this is only relevant for actual LDS instructions + * or also for instructions that read from the LDS return value queue */ + + if (free_slots & 0x10 && !has_lds_ready) { + sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n"; + if (!alu_trans_ready.empty()) + success |= schedule_alu_to_group_trans(group, alu_trans_ready); + if (!alu_vec_ready.empty()) + success |= schedule_alu_to_group_trans(group, alu_vec_ready); + } + + if (success) { + ++m_alu_groups_schduled; + break; + } else if (m_current_block->kcache_reservation_failed()) { + // LDS read groups should not lead to impossible + // kcache constellations + assert(!m_current_block->lds_group_active()); + + // kcache reservation failed, so we have to start a new CF + start_new_block(out_blocks, Block::alu); + m_current_block->set_instr_flag(Instr::force_cf); + } else { + return false; + } + } + + sfn_log << SfnLog::schedule << "Finalize ALU group\n"; + group->set_scheduled(); + group->fix_last_flag(); + group->set_nesting_depth(m_current_block->nesting_depth()); + m_current_block->push_back(group); + + if (group->has_lds_group_start()) + m_current_block->lds_group_start(*group->begin()); + + if (group->has_lds_group_end()) + m_current_block->lds_group_end(); return success; } @@ -652,6 +653,13 @@ bool BlockSheduler::schedule_alu_to_group_vec(AluGroup *group) auto e = alu_vec_ready.end(); while (i != e) { sfn_log << SfnLog::schedule << "Try schedule to vec " << **i; + + if (!m_current_block->try_reserve_kcache(**i)) { + sfn_log << SfnLog::schedule << " failed (kcache)\n"; + ++i; + continue; + } + if (group->add_vec_instructions(*i)) { auto old_i = i; ++i; @@ -679,6 +687,12 @@ bool BlockSheduler::schedule_alu_to_group_trans(AluGroup *group, std::listtry_reserve_kcache(**i)) { + sfn_log << SfnLog::schedule << " failed (kcache)\n"; + ++i; + continue; + } + if (group->add_trans_instructions(*i)) { auto old_i = i; ++i;