r600/sfn: Add handling for R600 indirect access alias handling

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21347>
This commit is contained in:
Gert Wollny 2023-02-19 18:29:07 +01:00 committed by Marge Bot
parent d955633319
commit e57643cf54
5 changed files with 481 additions and 32 deletions

View file

@ -268,7 +268,7 @@ AluInstr::do_print(std::ostream& os) const
}
os << " : ";
} else {
os << "__." << swzchar[dest_chan()] << " : ";
os << " __." << swzchar[dest_chan()] << " : ";
}
}

View file

@ -383,18 +383,6 @@ LDSAtomicInstr::replace_source(PRegister old_src, PVirtualValue new_src)
if (old_src->pin() == pin_array || new_src->pin() == pin_array)
return false;
<<<<<<< HEAD
if (new_src->get_addr()) {
for (auto& s : m_srcs) {
auto addr = s->get_addr();
/* can't have two different indirect addresses in the same instr */
if (addr && !addr->equal_to(*new_src->get_addr()))
return false;
}
}
=======
>>>>>>> 74c0ddf158e (r600/sfn: Don't copy-propagate indirect access into LDS instr)
for (unsigned i = 0; i < m_srcs.size(); ++i) {
if (old_src->equal_to(*m_srcs[i])) {
m_srcs[i] = new_src;

View file

@ -26,6 +26,9 @@
#include "sfn_scheduler.h"
#include "amd_family.h"
#include "r600_isa.h"
#include "sfn_alu_defines.h"
#include "sfn_debug.h"
#include "sfn_instr_alugroup.h"
#include "sfn_instr_controlflow.h"
@ -135,9 +138,21 @@ public:
AluInstr *m_last_lds_instr{nullptr};
};
struct ArrayChanHash
{
std::size_t operator()(std::pair<int, int> const& s) const noexcept
{
return std::hash<size_t>{}((size_t(s.first) << 3) | s.second);
}
};
using ArrayCheckSet = std::unordered_set<std::pair<int, int>, ArrayChanHash>;
class BlockScheduler {
public:
BlockScheduler(r600_chip_class chip_class);
BlockScheduler(r600_chip_class chip_class,
radeon_family family);
void run(Shader *shader);
void finalize();
@ -176,6 +191,10 @@ private:
template <typename I> bool schedule_block(std::list<I *>& ready_list);
void update_array_writes(const AluGroup& group);
bool check_array_reads(const AluInstr& instr);
bool check_array_reads(const AluGroup& group);
std::list<AluInstr *> alu_vec_ready;
std::list<AluInstr *> alu_trans_ready;
std::list<AluGroup *> alu_groups_ready;
@ -208,10 +227,17 @@ private:
int m_lds_addr_count{0};
int m_alu_groups_scheduled{0};
r600_chip_class m_chip_class;
radeon_family m_chip_family;
bool m_idx0_loading{false};
bool m_idx1_loading{false};
bool m_idx0_pending{false};
bool m_idx1_pending{false};
bool m_nop_after_rel_dest{false};
bool m_nop_befor_rel_src{false};
ArrayCheckSet m_last_indirect_array_write;
ArrayCheckSet m_last_direct_array_write;
};
Shader *
@ -231,7 +257,9 @@ schedule(Shader *original)
// to be able to re-start scheduling
auto scheduled_shader = original;
BlockScheduler s(original->chip_class());
BlockScheduler s(original->chip_class(), original->chip_family());
s.run(scheduled_shader);
s.finalize();
@ -245,7 +273,8 @@ schedule(Shader *original)
return scheduled_shader;
}
BlockScheduler::BlockScheduler(r600_chip_class chip_class):
BlockScheduler::BlockScheduler(r600_chip_class chip_class,
radeon_family chip_family):
current_shed(sched_alu),
m_last_pos(nullptr),
m_last_pixel(nullptr),
@ -253,6 +282,12 @@ BlockScheduler::BlockScheduler(r600_chip_class chip_class):
m_current_block(nullptr),
m_chip_class(chip_class)
{
m_nop_after_rel_dest = chip_family == CHIP_RV770;
m_nop_befor_rel_src = m_chip_class == ISA_CC_R600 &&
chip_family != CHIP_RV670 &&
chip_family != CHIP_RS780 &&
chip_family != CHIP_RS880;
}
void
@ -503,6 +538,9 @@ BlockScheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
if (!alu_groups_ready.empty() && !has_lds_ready) {
group = *alu_groups_ready.begin();
if (!check_array_reads(*group)) {
sfn_log << SfnLog::schedule << "try schedule " <<
*group << "\n";
@ -523,6 +561,8 @@ BlockScheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
sfn_log << SfnLog::schedule << "Don't add group because of " <<
m_current_block->expected_ar_uses()
<< "pending AR loads\n";
group = nullptr;
}
}
}
}
@ -569,10 +609,20 @@ BlockScheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
// kcache reservation failed, so we have to start a new CF
start_new_block(out_blocks, Block::alu);
} else {
// Ready is not empty, but we didn't schedule anything, this
// means we had a indirect array read or write conflict that we
// can resolve with an extra group that has a NOP instruction
if (!alu_trans_ready.empty() || !alu_vec_ready.empty()) {
group->add_vec_instructions(new AluInstr(op0_nop, 0));
break;
} else {
return false;
}
}
}
sfn_log << SfnLog::schedule << "Finalize ALU group\n";
group->set_scheduled();
@ -595,6 +645,8 @@ BlockScheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
m_current_block->push_back(group);
update_array_writes(*group);
m_idx0_pending |= m_idx0_loading;
m_idx0_loading = false;
@ -705,6 +757,11 @@ BlockScheduler::schedule_alu_to_group_vec(AluGroup *group)
while (i != e) {
sfn_log << SfnLog::schedule << "Try schedule to vec " << **i;
if (check_array_reads(**i)) {
++i;
continue;
}
if (!m_current_block->try_reserve_kcache(**i)) {
sfn_log << SfnLog::schedule << " failed (kcache)\n";
++i;
@ -770,6 +827,12 @@ BlockScheduler::schedule_alu_to_group_trans(AluGroup *group,
auto i = readylist.begin();
auto e = readylist.end();
while (i != e) {
if (check_array_reads(**i)) {
++i;
continue;
}
sfn_log << SfnLog::schedule << "Try schedule to trans " << **i;
if (!m_current_block->try_reserve_kcache(**i)) {
sfn_log << SfnLog::schedule << " failed (kcache)\n";
@ -1020,4 +1083,118 @@ BlockScheduler::collect_ready_type(std::list<T *>& ready, std::list<T *>& availa
return !ready.empty();
}
class CheckArrayAccessVisitor : public ConstRegisterVisitor {
public:
void visit(const Register& value) override {(void)value;}
void visit(const LocalArray& value) override {(void)value;}
void visit(const UniformValue& value) override {(void)value;}
void visit(const LiteralConstant& value) override {(void)value;}
void visit(const InlineConstant& value) override {(void)value;}
};
class UpdateArrayWrite : public CheckArrayAccessVisitor {
public:
UpdateArrayWrite(ArrayCheckSet& indirect_arrays,
ArrayCheckSet& direct_arrays,
bool tdw):
last_indirect_array_write(indirect_arrays),
last_direct_array_write(direct_arrays),
track_direct_writes(tdw)
{
}
void visit(const LocalArrayValue& value) override {
int array_base = value.array().base_sel();
auto entry = std::make_pair(array_base, value.chan());
if (value.addr())
last_indirect_array_write.insert(entry);
else if (track_direct_writes)
last_direct_array_write.insert(entry);
}
private:
ArrayCheckSet& last_indirect_array_write;
ArrayCheckSet& last_direct_array_write;
bool track_direct_writes {false};
};
void BlockScheduler::update_array_writes(const AluGroup& group)
{
if (m_nop_after_rel_dest || m_nop_befor_rel_src) {
m_last_direct_array_write.clear();
m_last_indirect_array_write.clear();
UpdateArrayWrite visitor(m_last_indirect_array_write,
m_last_direct_array_write,
m_nop_befor_rel_src);
for (auto alu : group) {
if (alu && alu->dest())
alu->dest()->accept(visitor);
}
}
}
class CheckArrayRead : public CheckArrayAccessVisitor {
public:
CheckArrayRead(const ArrayCheckSet& indirect_arrays,
const ArrayCheckSet& direct_arrays):
last_indirect_array_write(indirect_arrays),
last_direct_array_write(direct_arrays)
{
}
void visit(const LocalArrayValue& value) override {
int array_base = value.array().base_sel();
auto entry = std::make_pair(array_base, value.chan());
if (last_indirect_array_write.find(entry) !=
last_indirect_array_write.end())
need_extra_group = true;
if (value.addr() && last_direct_array_write.find(entry) !=
last_indirect_array_write.end()) {
need_extra_group = true;
}
}
const ArrayCheckSet& last_indirect_array_write;
const ArrayCheckSet& last_direct_array_write;
bool need_extra_group {false};
};
bool BlockScheduler::check_array_reads(const AluInstr& instr)
{
if (m_nop_after_rel_dest || m_nop_befor_rel_src) {
CheckArrayRead visitor(m_last_indirect_array_write,
m_last_direct_array_write);
for (auto& s : instr.sources()) {
s->accept(visitor);
}
return visitor.need_extra_group;
}
return false;
}
bool BlockScheduler::check_array_reads(const AluGroup& group)
{
if (m_nop_after_rel_dest || m_nop_befor_rel_src) {
CheckArrayRead visitor(m_last_indirect_array_write,
m_last_direct_array_write);
for (auto alu : group) {
for (auto& s : alu->sources()) {
s->accept(visitor);
}
}
return visitor.need_extra_group;
}
return false;
}
} // namespace r600

View file

@ -453,6 +453,8 @@ public:
Values::const_iterator begin() const { return m_values.begin(); }
Values::const_iterator end() const { return m_values.end(); }
uint32_t base_sel() const { return m_base_sel;}
private:
uint32_t m_base_sel;
uint32_t m_nchannels;

View file

@ -471,3 +471,285 @@ BLOCK_END
auto sh = from_string(input);
check(schedule(sh), expect);
}
TEST_F(TestShaderFromNir, ScheduleSplitLoadAddrAndNOPAfterIndirectDest)
{
const char *input =
R"(FS
CHIPCLASS R600
FAMILY R600
PROP MAX_COLOR_EXPORTS:1
PROP COLOR_EXPORTS:1
PROP COLOR_EXPORT_MASK:15
PROP WRITE_ALL_COLORS:0
OUTPUT LOC:0 NAME:1 MASK:15
ARRAYS A1[2].x
SHADER
BLOCK_START
ALU MOV S1.x : KC0[0].x {WL}
ALU MOV A1[S1.x].x : KC0[0].y {WL}
ALU ADD S2.x : A1[1].x KC0[1].x {WL}
EXPORT_DONE PIXEL 0 S2.xxxx
BLOCK_END)";
const char *expect =
R"(FS
CHIPCLASS R600
FAMILY R600
PROP MAX_COLOR_EXPORTS:1
PROP COLOR_EXPORTS:1
PROP COLOR_EXPORT_MASK:15
PROP WRITE_ALL_COLORS:0
OUTPUT LOC:0 NAME:1 MASK:15
ARRAYS A1[2].x
SHADER
BLOCK_START
ALU_GROUP_BEGIN
ALU MOVA_INT AR : KC0[0].x {L}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU MOV A1[AR].x : KC0[0].y {WL}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU NOP __.x : {L}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU ADD S2.x@chgr : A1[1].x KC0[1].x {WL}
ALU_GROUP_END
BLOCK_END
BLOCK_START
EXPORT_DONE PIXEL 0 S2.xxxx
BLOCK_END)";
auto sh = from_string(input);
split_address_loads(*sh);
optimize(*sh);
check(schedule(sh), expect);
}
TEST_F(TestShaderFromNir, ScheduleSplitLoadAddrAndNOPBeforIndirectSrc)
{
const char *input =
R"(FS
CHIPCLASS R600
FAMILY R600
PROP MAX_COLOR_EXPORTS:1
PROP COLOR_EXPORTS:1
PROP COLOR_EXPORT_MASK:15
PROP WRITE_ALL_COLORS:0
OUTPUT LOC:0 NAME:1 MASK:15
ARRAYS A1[2].x
SHADER
BLOCK_START
ALU MOV S1.x : KC0[0].x {WL}
ALU MOV A1[0].x : KC0[0].y {WL}
ALU ADD S2.x : A1[S1.x].x KC0[1].x {WL}
EXPORT_DONE PIXEL 0 S2.xxxx
BLOCK_END)";
const char *expect =
R"(FS
CHIPCLASS R600
FAMILY R600
PROP MAX_COLOR_EXPORTS:1
PROP COLOR_EXPORTS:1
PROP COLOR_EXPORT_MASK:15
PROP WRITE_ALL_COLORS:0
OUTPUT LOC:0 NAME:1 MASK:15
ARRAYS A1[2].x
SHADER
BLOCK_START
ALU_GROUP_BEGIN
ALU MOVA_INT AR : KC0[0].x {}
ALU MOV A1[0].x : KC0[0].y {WL}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU NOP __.x : {L}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU ADD S2.x@chgr : A1[AR].x KC0[1].x {WL}
ALU_GROUP_END
BLOCK_END
BLOCK_START
EXPORT_DONE PIXEL 0 S2.xxxx
BLOCK_END)";
auto sh = from_string(input);
split_address_loads(*sh);
optimize(*sh);
check(schedule(sh), expect);
}
TEST_F(TestShaderFromNir, ScheduleSplitLoadAddrAndNOPAfterIndirectDestRV670)
{
const char *input =
R"(FS
CHIPCLASS R600
FAMILY RV670
PROP MAX_COLOR_EXPORTS:1
PROP COLOR_EXPORTS:1
PROP COLOR_EXPORT_MASK:15
PROP WRITE_ALL_COLORS:0
OUTPUT LOC:0 NAME:1 MASK:15
ARRAYS A1[2].x
SHADER
BLOCK_START
ALU MOV S1.x : KC0[0].x {WL}
ALU MOV A1[S1.x].x : KC0[0].y {WL}
ALU ADD S2.x : A1[1].x KC0[1].x {WL}
EXPORT_DONE PIXEL 0 S2.xxxx
BLOCK_END)";
const char *expect =
R"(FS
CHIPCLASS R600
FAMILY RV670
PROP MAX_COLOR_EXPORTS:1
PROP COLOR_EXPORTS:1
PROP COLOR_EXPORT_MASK:15
PROP WRITE_ALL_COLORS:0
OUTPUT LOC:0 NAME:1 MASK:15
ARRAYS A1[2].x
SHADER
BLOCK_START
ALU_GROUP_BEGIN
ALU MOVA_INT AR : KC0[0].x {L}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU MOV A1[AR].x : KC0[0].y {WL}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU ADD S2.x@chgr : A1[1].x KC0[1].x {WL}
ALU_GROUP_END
BLOCK_END
BLOCK_START
EXPORT_DONE PIXEL 0 S2.xxxx
BLOCK_END)";
auto sh = from_string(input);
split_address_loads(*sh);
optimize(*sh);
check(schedule(sh), expect);
}
TEST_F(TestShaderFromNir, ScheduleSplitLoadAddrAndNOPAfterIndirectDestEG)
{
const char *input =
R"(FS
CHIPCLASS EVERGREEN
FAMILY BARTS
PROP MAX_COLOR_EXPORTS:1
PROP COLOR_EXPORTS:1
PROP COLOR_EXPORT_MASK:15
PROP WRITE_ALL_COLORS:0
OUTPUT LOC:0 NAME:1 MASK:15
ARRAYS A1[2].x
SHADER
BLOCK_START
ALU MOV S1.x : KC0[0].x {WL}
ALU MOV A1[S1.x].x : KC0[0].y {WL}
ALU ADD S2.x : A1[1].x KC0[1].x {WL}
EXPORT_DONE PIXEL 0 S2.xxxx
BLOCK_END)";
const char *expect =
R"(FS
CHIPCLASS EVERGREEN
FAMILY BARTS
PROP MAX_COLOR_EXPORTS:1
PROP COLOR_EXPORTS:1
PROP COLOR_EXPORT_MASK:15
PROP WRITE_ALL_COLORS:0
OUTPUT LOC:0 NAME:1 MASK:15
ARRAYS A1[2].x
SHADER
BLOCK_START
ALU_GROUP_BEGIN
ALU MOVA_INT AR : KC0[0].x {L}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU MOV A1[AR].x : KC0[0].y {WL}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU ADD S2.x@chgr : A1[1].x KC0[1].x {WL}
ALU_GROUP_END
BLOCK_END
BLOCK_START
EXPORT_DONE PIXEL 0 S2.xxxx
BLOCK_END)";
auto sh = from_string(input);
split_address_loads(*sh);
optimize(*sh);
check(schedule(sh), expect);
}
TEST_F(TestShaderFromNir, ScheduleSplitLoadAddrAndNOPAfterIndirectDestRV770)
{
const char *input =
R"(FS
CHIPCLASS R700
FAMILY RV770
PROP MAX_COLOR_EXPORTS:1
PROP COLOR_EXPORTS:1
PROP COLOR_EXPORT_MASK:15
PROP WRITE_ALL_COLORS:0
OUTPUT LOC:0 NAME:1 MASK:15
ARRAYS A1[2].x
SHADER
BLOCK_START
ALU MOV S1.x : KC0[0].x {WL}
ALU MOV A1[S1.x].x : KC0[0].y {WL}
ALU ADD S2.x : A1[1].x KC0[1].x {WL}
EXPORT_DONE PIXEL 0 S2.xxxx
BLOCK_END)";
const char *expect =
R"(FS
CHIPCLASS R700
FAMILY RV770
PROP MAX_COLOR_EXPORTS:1
PROP COLOR_EXPORTS:1
PROP COLOR_EXPORT_MASK:15
PROP WRITE_ALL_COLORS:0
OUTPUT LOC:0 NAME:1 MASK:15
ARRAYS A1[2].x
SHADER
BLOCK_START
ALU_GROUP_BEGIN
ALU MOVA_INT AR : KC0[0].x {L}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU MOV A1[AR].x : KC0[0].y {WL}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU NOP __.x : {L}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU ADD S2.x@chgr : A1[1].x KC0[1].x {WL}
ALU_GROUP_END
BLOCK_END
BLOCK_START
EXPORT_DONE PIXEL 0 S2.xxxx
BLOCK_END)";
auto sh = from_string(input);
split_address_loads(*sh);
optimize(*sh);
check(schedule(sh), expect);
}