diff --git a/src/gallium/drivers/r600/sfn/sfn_instr.h b/src/gallium/drivers/r600/sfn/sfn_instr.h index 720f9a3a93c..dee1744d51b 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr.h +++ b/src/gallium/drivers/r600/sfn/sfn_instr.h @@ -127,6 +127,8 @@ public: const InstrList& dependend_instr() { return m_dependend_instr;} virtual AluInstr *as_alu() { return nullptr;} + virtual uint8_t allowed_dest_chan_mask() const { return 0; } + protected: const InstrList& required_instr() const {return m_required_instr; } diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp index a55148edfcc..80c0c1e7908 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp @@ -444,6 +444,18 @@ void AluInstr::set_sources(SrcValues src) } } +uint8_t AluInstr::allowed_dest_chan_mask() const +{ + if (alu_slots() != 1) { + if (has_alu_flag(alu_is_cayman_trans)) { + return (1 << alu_slots()) - 1; + } else { + return 0; + } + } + return 0xf; +} + bool AluInstr::replace_dest(PRegister new_dest, AluInstr *move_instr) { if (m_dest->equal_to(*new_dest)) diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.h b/src/gallium/drivers/r600/sfn/sfn_instr_alu.h index a2ff2968588..1c282702c64 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.h +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.h @@ -160,6 +160,8 @@ public: AluInstr *as_alu() override { return this;} + uint8_t allowed_dest_chan_mask() const override; + private: friend class AluGroup; diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp index 35b76763921..9f2cc872dcf 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp @@ -25,6 +25,9 @@ */ #include "sfn_instr_alugroup.h" +#include "sfn_instr_export.h" +#include "sfn_instr_mem.h" +#include "sfn_instr_tex.h" #include "sfn_debug.h" #include @@ -158,25 +161,6 @@ int AluGroup::free_slots() const return free_mask; } -class AluAllowSlotSwitch : public AluInstrVisitor { -public: - using AluInstrVisitor::visit; - - void visit(AluInstr *alu) { - if (alu->alu_slots() != 1) { - if (alu->has_alu_flag(alu_is_cayman_trans)) { - free_mask &= (1 << alu->alu_slots()) - 1; - } else { - yes = false; - } - } - } - - bool yes{true}; - uint8_t free_mask{0xf}; - -}; - bool AluGroup::add_vec_instructions(AluInstr *instr) { if (!update_indirect_access(instr)) @@ -213,18 +197,18 @@ bool AluGroup::add_vec_instructions(AluInstr *instr) } else { auto dest = instr->dest(); - if (dest && dest->pin() == pin_free) { + if (dest && (dest->pin() == pin_free || dest->pin() == pin_group)) { - AluAllowSlotSwitch swich_allowed; + int free_mask = 0xf; for (auto u : dest->uses()) { - u->accept(swich_allowed); - if (!swich_allowed.yes) + free_mask &= u->allowed_dest_chan_mask(); + if (!free_mask) return false; } int free_chan = 0; while (free_chan < 4 && (m_slots[free_chan] || - !(swich_allowed.free_mask & (1 << free_chan)))) + !(free_mask & (1 << free_chan)))) free_chan++; if (free_chan < 4) { @@ -255,8 +239,12 @@ bool AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle) m_has_lds_op |= instr->has_lds_access(); sfn_log << SfnLog::schedule << "V: " << *instr << "\n"; auto dest = instr->dest(); - if (dest && dest->pin() == pin_free) - dest->set_pin(pin_chan); + if (dest) { + if (dest->pin() == pin_free) + dest->set_pin(pin_chan); + else if (dest->pin() == pin_group) + dest->set_pin(pin_chgr); + } instr->pin_sources_to_chan(); return true; } diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_export.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_export.cpp index 5d4a3b7f5a6..dac0e101855 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_export.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_export.cpp @@ -147,6 +147,11 @@ ExportInstr::Pointer ExportInstr::from_string_impl(std::istream& is, ValueFactor return new ExportInstr( type, pos, value); } +uint8_t ExportInstr::allowed_dest_chan_mask() const +{ + return value().free_chan_mask(); +} + ScratchIOInstr::ScratchIOInstr(const RegisterVec4& value, PRegister addr, int align, int align_offset, int writemask, int array_size, bool is_read): diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_export.h b/src/gallium/drivers/r600/sfn/sfn_instr_export.h index d19580a7746..d8ab4f0cd89 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_export.h +++ b/src/gallium/drivers/r600/sfn/sfn_instr_export.h @@ -43,6 +43,8 @@ public: const RegisterVec4& value() const {return m_value;}; RegisterVec4& value() {return m_value;}; + + private: RegisterVec4 m_value; @@ -79,6 +81,8 @@ public: static Instr::Pointer from_string(std::istream& is, ValueFactory &vf); static Instr::Pointer last_from_string(std::istream& is, ValueFactory &vf); + uint8_t allowed_dest_chan_mask() const override; + private: static ExportInstr::Pointer from_string_impl(std::istream& is, ValueFactory &vf); diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp index bb480a8f2d4..f687392c394 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp @@ -376,6 +376,11 @@ bool TexInstr::replace_source(PRegister old_src, PVirtualValue new_src) return success; } +uint8_t TexInstr::allowed_dest_chan_mask() const +{ + return m_src.free_chan_mask(); +} + struct SamplerId { int id; bool indirect; diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_tex.h b/src/gallium/drivers/r600/sfn/sfn_instr_tex.h index 0eddf6347f0..4b40a13991c 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_tex.h +++ b/src/gallium/drivers/r600/sfn/sfn_instr_tex.h @@ -152,6 +152,7 @@ public: bool replace_source(PRegister old_src, PVirtualValue new_src) override; + uint8_t allowed_dest_chan_mask() const override; private: bool do_ready() const override; diff --git a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp index b41e705b0df..3b408954349 100644 --- a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp @@ -451,7 +451,7 @@ void RegisterVec4::print(std::ostream& os) const { os << (m_values[0]->value()->is_ssa() ? 'S' : 'R') << sel() << "."; for (int i = 0; i < 4; ++i) - os << VirtualValue::chanchar[m_swz[i]]; + os << VirtualValue::chanchar[m_values[i]->value()->chan()]; } bool operator == (const RegisterVec4& lhs, const RegisterVec4& rhs) diff --git a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h index 52f817fc628..d71c0fd4db9 100644 --- a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h +++ b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.h @@ -297,6 +297,17 @@ public: } } + uint8_t free_chan_mask() const { + int mask = 0xf; + for (int i = 0; i < 4; ++i) { + int chan = m_values[i]->value()->chan(); + if (chan <= 3) { + mask &= ~(1 << chan); + } + } + return mask; + } + bool ready(int block_id, int index) const; private: int m_sel; diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp index f12f600204f..1a9b5e11d81 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_liverange_test.cpp @@ -173,7 +173,7 @@ TEST_F(LiveRangeTests, SimpleArrayAccess) auto s2x = vf.dest_from_string("S2.x"); auto s2y = vf.dest_from_string("S2.y"); - auto s3 = vf.dest_vec4_from_string("S3.xy01", dummy, pin_group); + auto s3 = vf.dest_vec4_from_string("S3.xy01", dummy, pin_chgr); LiveRangeMap expect = vf.prepare_live_range_map(); diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp index 487cb20e461..12f7288dfcd 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp @@ -277,6 +277,13 @@ TEST_F(TestShaderFromNir, fs_opt_tex_coord) check(sh, fs_opt_tex_coord_expect); } +TEST_F(TestShaderFromNir, fs_shed_tex_coord) +{ + auto sh = from_string(fs_sched_tex_coord_init); + check(schedule(sh), fs_sched_tex_coord_expect); +} + + void TestShaderFromNir::check(Shader *s, const char *expect_orig) { ostringstream test_str; diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp index c1e312e6013..1f874ac8876 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp @@ -2409,6 +2409,89 @@ ALU ADD S5.z@group : S3.z@group S4.z@group {W} ALU ADD S5.w@group : S3.w@group S4.w@group {W} EXPORT_DONE PIXEL 0 S5.xyzw)"; +const char *fs_sched_tex_coord_init = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +INPUT LOC:0 NAME:5 INTERP:2 SID:9 SPI_SID:10 +OUTPUT LOC:0 NAME:1 MASK:15 +REGISTERS R0.x@fully R0.y@fully +SHADER +ALU_GROUP_BEGIN + ALU INTERP_XY S1.x@chan : R0.y@fully Param0.x {W} VEC_210 + ALU INTERP_XY S1.y@chan : R0.x@fully Param0.y {W} VEC_210 + ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210 + ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210 +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210 + ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210 + ALU INTERP_ZW S1.z@chan : R0.y@fully Param0.z {W} VEC_210 + ALU INTERP_ZW S1.w@chan : R0.x@fully Param0.w {WL} VEC_210 +ALU_GROUP_END +ALU ADD S2.x@group : S1.x@chan S1.z@chan {W} +ALU ADD S2.y@group : S1.y@chan S1.w@chan {WL} +ALU MUL_IEEE S3.x@group : S1.x@chan S1.z@chan {W} +ALU MUL_IEEE S3.y@group : S1.y@chan S1.w@chan {WL} + +TEX SAMPLE S4.xyzw : S2.xy__ RID:18 SID:0 NNNN +TEX SAMPLE S5.xyzw : S3.xy__ RID:18 SID:0 NNNN +ALU ADD S6.x@group : S5.x@group S4.x@group {W} +ALU ADD S6.y@group : S5.y@group S4.y@group {W} +ALU ADD S6.z@group : S5.z@group S4.z@group {W} +ALU ADD S6.w@group : S5.w@group S4.w@group {W} +EXPORT_DONE PIXEL 0 S5.xyzw)"; + + +const char *fs_sched_tex_coord_expect = +R"(FS +CHIPCLASS EVERGREEN +PROP MAX_COLOR_EXPORTS:1 +PROP COLOR_EXPORTS:1 +PROP COLOR_EXPORT_MASK:15 +INPUT LOC:0 NAME:5 INTERP:2 SID:9 SPI_SID:10 +OUTPUT LOC:0 NAME:1 MASK:15 +REGISTERS R0.x@fully R0.y@fully +SHADER +BLOCK_START +ALU_GROUP_BEGIN + ALU INTERP_XY S1.x@chan : R0.y@fully Param0.x {W} VEC_210 + ALU INTERP_XY S1.y@chan : R0.x@fully Param0.y {W} VEC_210 + ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210 + ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210 +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210 + ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210 + ALU INTERP_ZW S1.z@chan : R0.y@fully Param0.z {W} VEC_210 + ALU INTERP_ZW S1.w@chan : R0.x@fully Param0.w {WL} VEC_210 +ALU_GROUP_END +ALU_GROUP_BEGIN + ALU ADD S2.x@group : S1.x@chan S1.z@chan {W} + ALU ADD S2.y@group : S1.y@chan S1.w@chan {W} + ALU MUL_IEEE S3.z@chgr : S1.x@chan S1.z@chan {W} + ALU MUL_IEEE S3.w@chgr : S1.y@chan S1.w@chan {WL} +ALU_GROUP_END +BLOCK_END +BLOCK_START +TEX SAMPLE S4.xyzw : S2.xy__ RID:18 SID:0 NNNN +TEX SAMPLE S5.xyzw : S3.zw__ RID:18 SID:0 NNNN +BLOCK_END +BLOCK_START +ALU_GROUP_BEGIN +ALU ADD S6.x@group : S5.x@group S4.x@group {W} +ALU ADD S6.y@group : S5.y@group S4.y@group {W} +ALU ADD S6.z@group : S5.z@group S4.z@group {W} +ALU ADD S6.w@group : S5.w@group S4.w@group {WL} +ALU_GROUP_END +BLOCK_END +BLOCK_START +EXPORT_DONE PIXEL 0 S5.xyzw +BLOCK_END)"; + + const char *fs_with_loop_multislot_reuse = R"(FS CHIPCLASS CAYMAN diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h index bcd224e174d..e56506b2df6 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h +++ b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.h @@ -98,6 +98,9 @@ extern const char *vtx_for_tcs_sched; extern const char *fs_opt_tex_coord_init; extern const char *fs_opt_tex_coord_expect; +extern const char *fs_sched_tex_coord_init; +extern const char *fs_sched_tex_coord_expect; + class TestShader : public ::testing::Test { void SetUp() override;