diff --git a/src/gallium/drivers/r600/sfn/sfn_instr.h b/src/gallium/drivers/r600/sfn/sfn_instr.h index 23b4fbe5533..3254100e868 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr.h +++ b/src/gallium/drivers/r600/sfn/sfn_instr.h @@ -135,9 +135,10 @@ public: (void)addr; unreachable("Instruction type has no indirect addess"); }; + const InstrList& required_instr() const { return m_required_instr; } protected: - const InstrList& required_instr() const { return m_required_instr; } + private: virtual void forward_set_blockid(int id, int index); diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp index 991f5fb16c7..6158c50cd47 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp @@ -367,7 +367,8 @@ public: void ReplaceIndirectArrayAddr::visit(LocalArrayValue& value) { - if (new_addr->sel() == 0 && value.addr()->as_register()) + if (new_addr->sel() == 0 && value.addr() + && value.addr()->as_register()) value.set_addr(new_addr); } @@ -1233,7 +1234,13 @@ AluInstr::do_ready() const * we have to make sure that required ops are already * scheduled before marking this one ready */ for (auto i : required_instr()) { - if (!i->is_scheduled()) + if (i->is_dead()) + continue; + + bool is_older_instr = i->block_id() <= block_id() && + i->index() < index(); + bool is_lds = i->as_alu() && i->as_alu()->has_lds_access(); + if (!i->is_scheduled() && (is_older_instr || is_lds)) return false; } @@ -1265,7 +1272,13 @@ AluInstr::do_ready() const * update are scheduled, otherwise we may use the updated value when we * shouldn't */ for (auto u : m_dest->uses()) { - if (u->block_id() <= block_id() && u->index() < index() && !u->is_scheduled()) { + /* TODO: This is working around some sloppy use updates, dead instrzuctions + * should remove themselves from uses. */ + if (u->is_dead()) + continue; + if (!u->is_scheduled() && + u->block_id() <= block_id() && + u->index() < index()) { return false; } } diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.cpp b/src/gallium/drivers/r600/sfn/sfn_shader.cpp index 859c8806e4f..0eed9bed877 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader.cpp @@ -1338,6 +1338,13 @@ Shader::emit_wait_ack() return true; } +static uint32_t get_array_hash(const VirtualValue& value) +{ + assert (value.pin() == pin_array); + const LocalArrayValue& av = static_cast(value); + return av.chan() | (av.array().base_sel() << 2); +} + void Shader::InstructionChain::visit(AluInstr *instr) { if (instr->is_kill()) { @@ -1351,6 +1358,35 @@ void Shader::InstructionChain::visit(AluInstr *instr) if (last_ssbo_instr) instr->add_required_instr(last_ssbo_instr); } + + /* Make sure array reads and writes depends on the last indirect access + * so that we don't overwrite array elements too early */ + + if (auto d = instr->dest()) { + if (d->pin() == pin_array) { + if (d->addr()) { + last_alu_with_indirect_reg[get_array_hash(*d)] = instr; + return; + } + auto pos = last_alu_with_indirect_reg.find(get_array_hash(*d)); + if (pos != last_alu_with_indirect_reg.end()) { + instr->add_required_instr(pos->second); + } + } + } + + for (auto& s : instr->sources()) { + if (s->pin() == pin_array) { + if (s->get_addr()) { + last_alu_with_indirect_reg[get_array_hash(*s)] = instr; + return; + } + auto pos = last_alu_with_indirect_reg.find(get_array_hash(*s)); + if (pos != last_alu_with_indirect_reg.end()) { + instr->add_required_instr(pos->second); + } + } + } } void diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.h b/src/gallium/drivers/r600/sfn/sfn_shader.h index 236a62b837b..559d9d7f8f9 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader.h +++ b/src/gallium/drivers/r600/sfn/sfn_shader.h @@ -396,6 +396,7 @@ private: Instr *last_gds_instr{nullptr}; Instr *last_ssbo_instr{nullptr}; Instr *last_kill_instr{nullptr}; + std::unordered_map last_alu_with_indirect_reg; bool prepare_mem_barrier{false}; }; diff --git a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp index 5664d886d65..804d1bac2e4 100644 --- a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp @@ -975,6 +975,13 @@ LocalArray::element(size_t offset, PVirtualValue indirect, uint32_t chan) return reg; } +void LocalArray::add_parent_to_elements(int chan, Instr *instr) +{ + for (auto& e : m_values) + if (e->chan() == chan) + e->add_parent(instr); +} + bool LocalArray::ready_for_direct(int block, int index, int chan) const { @@ -1069,7 +1076,8 @@ LocalArrayValue::accept(ConstRegisterVisitor& vistor) const void LocalArrayValue::add_parent_to_array(Instr *instr) { - m_array.add_parent(instr); + if (m_addr) + m_array.add_parent_to_elements(chan(), instr); } void diff --git a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h index ccf3c565b33..d1f275818df 100644 --- a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h +++ b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h @@ -444,7 +444,7 @@ public: uint32_t nchannels() const; uint32_t frac() const { return m_frac; } - void add_parent_to_elements(Instr *instr); + void add_parent_to_elements(int chan, Instr *instr); const Register& operator()(size_t idx, size_t chan) const;