r600/sfn: improve scheduling of tex sources

Let tex sources switch the channel to unused channels.

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19300>
This commit is contained in:
Gert Wollny 2022-10-07 20:36:43 +02:00 committed by Marge Bot
parent 8f7100e48f
commit a73b51c187
14 changed files with 151 additions and 28 deletions

View file

@ -127,6 +127,8 @@ public:
const InstrList& dependend_instr() { return m_dependend_instr;}
virtual AluInstr *as_alu() { return nullptr;}
virtual uint8_t allowed_dest_chan_mask() const { return 0; }
protected:
const InstrList& required_instr() const {return m_required_instr; }

View file

@ -444,6 +444,18 @@ void AluInstr::set_sources(SrcValues src)
}
}
uint8_t AluInstr::allowed_dest_chan_mask() const
{
if (alu_slots() != 1) {
if (has_alu_flag(alu_is_cayman_trans)) {
return (1 << alu_slots()) - 1;
} else {
return 0;
}
}
return 0xf;
}
bool AluInstr::replace_dest(PRegister new_dest, AluInstr *move_instr)
{
if (m_dest->equal_to(*new_dest))

View file

@ -160,6 +160,8 @@ public:
AluInstr *as_alu() override { return this;}
uint8_t allowed_dest_chan_mask() const override;
private:
friend class AluGroup;

View file

@ -25,6 +25,9 @@
*/
#include "sfn_instr_alugroup.h"
#include "sfn_instr_export.h"
#include "sfn_instr_mem.h"
#include "sfn_instr_tex.h"
#include "sfn_debug.h"
#include <algorithm>
@ -158,25 +161,6 @@ int AluGroup::free_slots() const
return free_mask;
}
class AluAllowSlotSwitch : public AluInstrVisitor {
public:
using AluInstrVisitor::visit;
void visit(AluInstr *alu) {
if (alu->alu_slots() != 1) {
if (alu->has_alu_flag(alu_is_cayman_trans)) {
free_mask &= (1 << alu->alu_slots()) - 1;
} else {
yes = false;
}
}
}
bool yes{true};
uint8_t free_mask{0xf};
};
bool AluGroup::add_vec_instructions(AluInstr *instr)
{
if (!update_indirect_access(instr))
@ -213,18 +197,18 @@ bool AluGroup::add_vec_instructions(AluInstr *instr)
} else {
auto dest = instr->dest();
if (dest && dest->pin() == pin_free) {
if (dest && (dest->pin() == pin_free || dest->pin() == pin_group)) {
AluAllowSlotSwitch swich_allowed;
int free_mask = 0xf;
for (auto u : dest->uses()) {
u->accept(swich_allowed);
if (!swich_allowed.yes)
free_mask &= u->allowed_dest_chan_mask();
if (!free_mask)
return false;
}
int free_chan = 0;
while (free_chan < 4 && (m_slots[free_chan] ||
!(swich_allowed.free_mask & (1 << free_chan))))
!(free_mask & (1 << free_chan))))
free_chan++;
if (free_chan < 4) {
@ -255,8 +239,12 @@ bool AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle)
m_has_lds_op |= instr->has_lds_access();
sfn_log << SfnLog::schedule << "V: " << *instr << "\n";
auto dest = instr->dest();
if (dest && dest->pin() == pin_free)
dest->set_pin(pin_chan);
if (dest) {
if (dest->pin() == pin_free)
dest->set_pin(pin_chan);
else if (dest->pin() == pin_group)
dest->set_pin(pin_chgr);
}
instr->pin_sources_to_chan();
return true;
}

View file

@ -147,6 +147,11 @@ ExportInstr::Pointer ExportInstr::from_string_impl(std::istream& is, ValueFactor
return new ExportInstr( type, pos, value);
}
uint8_t ExportInstr::allowed_dest_chan_mask() const
{
return value().free_chan_mask();
}
ScratchIOInstr::ScratchIOInstr(const RegisterVec4& value, PRegister addr,
int align, int align_offset, int writemask,
int array_size, bool is_read):

View file

@ -43,6 +43,8 @@ public:
const RegisterVec4& value() const {return m_value;};
RegisterVec4& value() {return m_value;};
private:
RegisterVec4 m_value;
@ -79,6 +81,8 @@ public:
static Instr::Pointer from_string(std::istream& is, ValueFactory &vf);
static Instr::Pointer last_from_string(std::istream& is, ValueFactory &vf);
uint8_t allowed_dest_chan_mask() const override;
private:
static ExportInstr::Pointer from_string_impl(std::istream& is, ValueFactory &vf);

View file

@ -376,6 +376,11 @@ bool TexInstr::replace_source(PRegister old_src, PVirtualValue new_src)
return success;
}
uint8_t TexInstr::allowed_dest_chan_mask() const
{
return m_src.free_chan_mask();
}
struct SamplerId {
int id;
bool indirect;

View file

@ -152,6 +152,7 @@ public:
bool replace_source(PRegister old_src, PVirtualValue new_src) override;
uint8_t allowed_dest_chan_mask() const override;
private:
bool do_ready() const override;

View file

@ -451,7 +451,7 @@ void RegisterVec4::print(std::ostream& os) const
{
os << (m_values[0]->value()->is_ssa() ? 'S' : 'R') << sel() << ".";
for (int i = 0; i < 4; ++i)
os << VirtualValue::chanchar[m_swz[i]];
os << VirtualValue::chanchar[m_values[i]->value()->chan()];
}
bool operator == (const RegisterVec4& lhs, const RegisterVec4& rhs)

View file

@ -297,6 +297,17 @@ public:
}
}
uint8_t free_chan_mask() const {
int mask = 0xf;
for (int i = 0; i < 4; ++i) {
int chan = m_values[i]->value()->chan();
if (chan <= 3) {
mask &= ~(1 << chan);
}
}
return mask;
}
bool ready(int block_id, int index) const;
private:
int m_sel;

View file

@ -173,7 +173,7 @@ TEST_F(LiveRangeTests, SimpleArrayAccess)
auto s2x = vf.dest_from_string("S2.x");
auto s2y = vf.dest_from_string("S2.y");
auto s3 = vf.dest_vec4_from_string("S3.xy01", dummy, pin_group);
auto s3 = vf.dest_vec4_from_string("S3.xy01", dummy, pin_chgr);
LiveRangeMap expect = vf.prepare_live_range_map();

View file

@ -277,6 +277,13 @@ TEST_F(TestShaderFromNir, fs_opt_tex_coord)
check(sh, fs_opt_tex_coord_expect);
}
TEST_F(TestShaderFromNir, fs_shed_tex_coord)
{
auto sh = from_string(fs_sched_tex_coord_init);
check(schedule(sh), fs_sched_tex_coord_expect);
}
void TestShaderFromNir::check(Shader *s, const char *expect_orig)
{
ostringstream test_str;

View file

@ -2409,6 +2409,89 @@ ALU ADD S5.z@group : S3.z@group S4.z@group {W}
ALU ADD S5.w@group : S3.w@group S4.w@group {W}
EXPORT_DONE PIXEL 0 S5.xyzw)";
const char *fs_sched_tex_coord_init =
R"(FS
CHIPCLASS EVERGREEN
PROP MAX_COLOR_EXPORTS:1
PROP COLOR_EXPORTS:1
PROP COLOR_EXPORT_MASK:15
INPUT LOC:0 NAME:5 INTERP:2 SID:9 SPI_SID:10
OUTPUT LOC:0 NAME:1 MASK:15
REGISTERS R0.x@fully R0.y@fully
SHADER
ALU_GROUP_BEGIN
ALU INTERP_XY S1.x@chan : R0.y@fully Param0.x {W} VEC_210
ALU INTERP_XY S1.y@chan : R0.x@fully Param0.y {W} VEC_210
ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210
ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210
ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210
ALU INTERP_ZW S1.z@chan : R0.y@fully Param0.z {W} VEC_210
ALU INTERP_ZW S1.w@chan : R0.x@fully Param0.w {WL} VEC_210
ALU_GROUP_END
ALU ADD S2.x@group : S1.x@chan S1.z@chan {W}
ALU ADD S2.y@group : S1.y@chan S1.w@chan {WL}
ALU MUL_IEEE S3.x@group : S1.x@chan S1.z@chan {W}
ALU MUL_IEEE S3.y@group : S1.y@chan S1.w@chan {WL}
TEX SAMPLE S4.xyzw : S2.xy__ RID:18 SID:0 NNNN
TEX SAMPLE S5.xyzw : S3.xy__ RID:18 SID:0 NNNN
ALU ADD S6.x@group : S5.x@group S4.x@group {W}
ALU ADD S6.y@group : S5.y@group S4.y@group {W}
ALU ADD S6.z@group : S5.z@group S4.z@group {W}
ALU ADD S6.w@group : S5.w@group S4.w@group {W}
EXPORT_DONE PIXEL 0 S5.xyzw)";
const char *fs_sched_tex_coord_expect =
R"(FS
CHIPCLASS EVERGREEN
PROP MAX_COLOR_EXPORTS:1
PROP COLOR_EXPORTS:1
PROP COLOR_EXPORT_MASK:15
INPUT LOC:0 NAME:5 INTERP:2 SID:9 SPI_SID:10
OUTPUT LOC:0 NAME:1 MASK:15
REGISTERS R0.x@fully R0.y@fully
SHADER
BLOCK_START
ALU_GROUP_BEGIN
ALU INTERP_XY S1.x@chan : R0.y@fully Param0.x {W} VEC_210
ALU INTERP_XY S1.y@chan : R0.x@fully Param0.y {W} VEC_210
ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210
ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210
ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210
ALU INTERP_ZW S1.z@chan : R0.y@fully Param0.z {W} VEC_210
ALU INTERP_ZW S1.w@chan : R0.x@fully Param0.w {WL} VEC_210
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU ADD S2.x@group : S1.x@chan S1.z@chan {W}
ALU ADD S2.y@group : S1.y@chan S1.w@chan {W}
ALU MUL_IEEE S3.z@chgr : S1.x@chan S1.z@chan {W}
ALU MUL_IEEE S3.w@chgr : S1.y@chan S1.w@chan {WL}
ALU_GROUP_END
BLOCK_END
BLOCK_START
TEX SAMPLE S4.xyzw : S2.xy__ RID:18 SID:0 NNNN
TEX SAMPLE S5.xyzw : S3.zw__ RID:18 SID:0 NNNN
BLOCK_END
BLOCK_START
ALU_GROUP_BEGIN
ALU ADD S6.x@group : S5.x@group S4.x@group {W}
ALU ADD S6.y@group : S5.y@group S4.y@group {W}
ALU ADD S6.z@group : S5.z@group S4.z@group {W}
ALU ADD S6.w@group : S5.w@group S4.w@group {WL}
ALU_GROUP_END
BLOCK_END
BLOCK_START
EXPORT_DONE PIXEL 0 S5.xyzw
BLOCK_END)";
const char *fs_with_loop_multislot_reuse =
R"(FS
CHIPCLASS CAYMAN

View file

@ -98,6 +98,9 @@ extern const char *vtx_for_tcs_sched;
extern const char *fs_opt_tex_coord_init;
extern const char *fs_opt_tex_coord_expect;
extern const char *fs_sched_tex_coord_init;
extern const char *fs_sched_tex_coord_expect;
class TestShader : public ::testing::Test {
void SetUp() override;