mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 11:18:08 +02:00
r600/sfn: Implement source mod optimization in backend
Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23702>
This commit is contained in:
parent
da92733d5a
commit
ae7d904a73
4 changed files with 344 additions and 0 deletions
|
|
@ -468,6 +468,33 @@ bool AluInstr::do_replace_source(PRegister old_src, PVirtualValue new_src)
|
|||
return process;
|
||||
}
|
||||
|
||||
bool AluInstr::replace_src(int i, PVirtualValue new_src, uint32_t to_set,
|
||||
SourceMod to_clear)
|
||||
{
|
||||
auto old_src = m_src[i]->as_register();
|
||||
assert(old_src);
|
||||
|
||||
if (!can_replace_source(old_src, new_src)) {
|
||||
std::cerr << "Can't replace src " << *old_src << " with " << *new_src << "\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(old_src);
|
||||
old_src->del_use(this);
|
||||
|
||||
m_src[i] = new_src;
|
||||
|
||||
auto r = new_src->as_register();
|
||||
if (r)
|
||||
r->add_use(this);
|
||||
|
||||
m_source_modifiers |= to_set << (2 * i);
|
||||
m_source_modifiers &= ~(to_clear << (2 * i));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool AluInstr::can_replace_source(PRegister old_src, PVirtualValue new_src)
|
||||
{
|
||||
if (!check_readport_validation(old_src, new_src))
|
||||
|
|
|
|||
|
|
@ -199,6 +199,9 @@ public:
|
|||
void inc_ar_uses() { ++m_num_ar_uses;}
|
||||
auto num_ar_uses() const {return m_num_ar_uses;}
|
||||
|
||||
bool replace_src(int i, PVirtualValue new_src, uint32_t to_set,
|
||||
SourceMod to_clear);
|
||||
|
||||
void set_source_mod(int src, SourceMod mod) {
|
||||
m_source_modifiers |= mod << (2 * src);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,6 +50,9 @@ public:
|
|||
|
||||
void convert_to_mov(AluInstr *alu, int src_idx);
|
||||
|
||||
void apply_source_mods(AluInstr *alu);
|
||||
void apply_dest_clamp(AluInstr *alu);
|
||||
|
||||
bool progress{false};
|
||||
};
|
||||
|
||||
|
|
@ -81,6 +84,10 @@ void
|
|||
PeepholeVisitor::visit(AluInstr *instr)
|
||||
{
|
||||
switch (instr->opcode()) {
|
||||
case op1_mov:
|
||||
if (instr->has_alu_flag(alu_dst_clamp))
|
||||
apply_dest_clamp(instr);
|
||||
break;
|
||||
case op2_add:
|
||||
case op2_add_int:
|
||||
if (value_is_const_uint(instr->src(0), 0))
|
||||
|
|
@ -110,8 +117,13 @@ PeepholeVisitor::visit(AluInstr *instr)
|
|||
progress |= visitor.success;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:;
|
||||
}
|
||||
|
||||
auto opinfo = alu_ops.at(instr->opcode());
|
||||
if (opinfo.can_srcmod && !opinfo.is_fp64)
|
||||
apply_source_mods(instr);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -154,6 +166,108 @@ PeepholeVisitor::visit(IfInstr *instr)
|
|||
}
|
||||
}
|
||||
|
||||
void PeepholeVisitor::apply_source_mods(AluInstr *alu)
|
||||
{
|
||||
bool has_abs = alu->n_sources() / alu->alu_slots() < 3;
|
||||
|
||||
for (unsigned i = 0; i < alu->n_sources(); ++i) {
|
||||
|
||||
auto reg = alu->psrc(i)->as_register();
|
||||
if (!reg)
|
||||
continue;
|
||||
if (!reg->has_flag(Register::ssa))
|
||||
continue;
|
||||
if (reg->parents().size() != 1)
|
||||
continue;
|
||||
|
||||
auto p = (*reg->parents().begin())->as_alu();
|
||||
if (!p)
|
||||
continue;
|
||||
|
||||
if (p->opcode() != op1_mov)
|
||||
continue;
|
||||
|
||||
if (!has_abs && p->has_source_mod(0, AluInstr::mod_abs))
|
||||
continue;
|
||||
|
||||
if (!p->has_source_mod(0, AluInstr::mod_abs) &&
|
||||
!p->has_source_mod(0, AluInstr::mod_neg))
|
||||
continue;
|
||||
|
||||
if (p->has_alu_flag(alu_dst_clamp))
|
||||
continue;
|
||||
|
||||
auto new_src = p->psrc(0);
|
||||
bool new_src_not_pinned = new_src->pin() == pin_free ||
|
||||
new_src->pin() == pin_none;
|
||||
|
||||
bool old_src_not_pinned = reg->pin() == pin_free ||
|
||||
reg->pin() == pin_none;
|
||||
|
||||
bool sources_equal_channel = reg->pin() == pin_chan &&
|
||||
new_src->pin() == pin_chan &&
|
||||
new_src->chan() == reg->chan();
|
||||
|
||||
if (!new_src_not_pinned &&
|
||||
!old_src_not_pinned &&
|
||||
!sources_equal_channel)
|
||||
continue;
|
||||
|
||||
uint32_t to_set = 0;
|
||||
AluInstr::SourceMod to_clear = AluInstr::mod_none;
|
||||
|
||||
if (p->has_source_mod(0, AluInstr::mod_abs))
|
||||
to_set |= AluInstr::mod_abs;
|
||||
if (p->has_source_mod(0, AluInstr::mod_neg)) {
|
||||
if (!alu->has_source_mod(i, AluInstr::mod_neg))
|
||||
to_set |= AluInstr::mod_neg;
|
||||
else
|
||||
to_clear = AluInstr::mod_neg;
|
||||
}
|
||||
|
||||
progress |= alu->replace_src(i, new_src, to_set, to_clear);
|
||||
}
|
||||
}
|
||||
|
||||
void PeepholeVisitor::apply_dest_clamp(AluInstr *alu)
|
||||
{
|
||||
if (alu->has_source_mod(0, AluInstr::mod_abs) ||
|
||||
alu->has_source_mod(0, AluInstr::mod_neg))
|
||||
return;
|
||||
|
||||
auto dest = alu->dest();
|
||||
|
||||
assert(dest);
|
||||
|
||||
if (!dest->has_flag(Register::ssa))
|
||||
return;
|
||||
|
||||
auto src = alu->psrc(0)->as_register();
|
||||
if (!src)
|
||||
return;
|
||||
|
||||
if (src->parents().size() != 1)
|
||||
return;
|
||||
|
||||
if (src->uses().size() != 1)
|
||||
return;
|
||||
|
||||
auto new_parent = (*src->parents().begin())->as_alu();
|
||||
if (!new_parent)
|
||||
return;
|
||||
|
||||
auto opinfo = alu_ops.at(new_parent->opcode());
|
||||
if (!opinfo.can_clamp)
|
||||
return;
|
||||
|
||||
// Move clamp flag to the parent, and let copy propagation do the rest
|
||||
new_parent->set_alu_flag(alu_dst_clamp);
|
||||
alu->reset_alu_flag(alu_dst_clamp);
|
||||
|
||||
progress = true;
|
||||
}
|
||||
|
||||
|
||||
static EAluOp
|
||||
pred_from_op(EAluOp pred_op, EAluOp op)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -369,6 +369,206 @@ BLOCK_END
|
|||
check(sh, expect);
|
||||
};
|
||||
|
||||
TEST_F(TestShaderFromNir, PeeholeSoureModsSimple)
|
||||
{
|
||||
const char *input =
|
||||
R"(VS
|
||||
CHIPCLASS CAYMAN
|
||||
INPUT LOC:0 NAME:15
|
||||
OUTPUT LOC:0 NAME:0 MASK:15
|
||||
OUTPUT LOC:1 NAME:5 MASK:15 SID:9 SPI_SID:10
|
||||
SHADER
|
||||
BLOCK_START
|
||||
ALU MOV S2.x@free{s} : I[0] {WL}
|
||||
ALU MOV S3.y@free{s} : L[0x40c00000] {WL}
|
||||
ALU MOV S4.z@free{s} : L[0xc1140000] {WL}
|
||||
ALU MOV S5.w@free{s} : L[0xbfe00000] {WL}
|
||||
ALU MOV S6.x@free{s} : L[0x3fa00000] {WL}
|
||||
ALU MOV S7.x{s} : |KC0[0].x| {W}
|
||||
ALU MOV S7.y{s} : -KC0[0].y {W}
|
||||
ALU MOV S7.z{s} : -|KC0[0].z| {W}
|
||||
ALU MOV S7.w{s} : KC0[0].w {WL}
|
||||
ALU ADD S8.y@free{s} : S3.y@free{s} S7.x{s} {WL}
|
||||
ALU ADD S9.z@free{s} : S4.z@free{s} S7.y{s} {WL}
|
||||
ALU ADD S10.w@free{s} : S5.w@free{s} S7.z{s} {WL}
|
||||
ALU ADD S11.x@free{s} : S6.x@free{s} S7.w{s} {WL}
|
||||
ALU EXP_IEEE S12.y@free{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {WL}
|
||||
ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {WL}
|
||||
ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {WL}
|
||||
ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
|
||||
ALU MOV S17.x{s} : S12.y@free{s} {W}
|
||||
ALU MOV S17.y{s} : S13.z@free{s} {W}
|
||||
ALU MOV S17.z{s} : S14.x@free{s} {W}
|
||||
ALU MOV S17.w{s} : S15.y@free{s} {WL}
|
||||
ALU MOV S18.x@group{s} : S17.x{s} {W}
|
||||
ALU MOV S18.y@group{s} : S17.y{s} {W}
|
||||
ALU MOV S18.z@group{s} : S17.z{s} {W}
|
||||
ALU MOV S18.w@group{s} : S17.w{s} {WL}
|
||||
EXPORT_DONE PARAM 0 S18.xyzw
|
||||
BLOCK_END)";
|
||||
|
||||
const char *expect =
|
||||
R"(VS
|
||||
CHIPCLASS CAYMAN
|
||||
INPUT LOC:0 NAME:15
|
||||
OUTPUT LOC:0 NAME:0 MASK:15
|
||||
OUTPUT LOC:1 NAME:5 MASK:15 SID:9 SPI_SID:10
|
||||
SHADER
|
||||
BLOCK_START
|
||||
ALU ADD S8.y@free{s} : L[0x40c00000] |KC0[0].x| {WL}
|
||||
ALU ADD S9.z@free{s} : L[0xc1140000] -KC0[0].y {WL}
|
||||
ALU ADD S10.w@free{s} : L[0xbfe00000] -|KC0[0].z| {WL}
|
||||
ALU ADD S11.x@free{s} : L[0x3fa00000] KC0[0].w {WL}
|
||||
ALU EXP_IEEE S18.x@group{s} : S8.y@free{s} + S8.y@free{s} + S8.y@free{s} {W}
|
||||
ALU EXP_IEEE S18.y@group{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W}
|
||||
ALU EXP_IEEE S18.z@group{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W}
|
||||
ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
|
||||
EXPORT_DONE PARAM 0 S18.xyzw
|
||||
BLOCK_END
|
||||
)";
|
||||
auto sh = from_string(input);
|
||||
optimize(*sh);
|
||||
check(sh, expect);
|
||||
};
|
||||
|
||||
TEST_F(TestShaderFromNir, PeeholeSoureModsAbsNegTwice)
|
||||
{
|
||||
const char *input =
|
||||
R"(VS
|
||||
CHIPCLASS CAYMAN
|
||||
INPUT LOC:0 NAME:15
|
||||
OUTPUT LOC:0 NAME:0 MASK:15
|
||||
OUTPUT LOC:1 NAME:5 MASK:15 SID:9 SPI_SID:10
|
||||
SHADER
|
||||
BLOCK_START
|
||||
ALU MOV S2.x@free{s} : I[0] {WL}
|
||||
ALU MOV S3.y@free{s} : L[0x40c00000] {WL}
|
||||
ALU MOV S4.z@free{s} : L[0xc1140000] {WL}
|
||||
ALU MOV S5.w@free{s} : L[0xbfe00000] {WL}
|
||||
ALU MOV S6.x@free{s} : L[0x3fa00000] {WL}
|
||||
ALU MOV S7.x{s} : |KC0[0].x| {W}
|
||||
ALU MOV S7.y{s} : -KC0[0].y {W}
|
||||
ALU MOV S7.z{s} : -|KC0[0].z| {W}
|
||||
ALU MOV S7.w{s} : KC0[0].w {WL}
|
||||
ALU MOV S8.x : |S7.x| {W}
|
||||
ALU MOV S8.y : -S7.y {W}
|
||||
ALU MOV S8.z : -|S7.z| {W}
|
||||
ALU MOV S8.w : -|S7.x| {WL}
|
||||
ALU ADD S19.y@free{s} : S3.y@free{s} S8.x {WL}
|
||||
ALU ADD S9.z@free{s} : S4.z@free{s} S8.y {WL}
|
||||
ALU ADD S10.w@free{s} : S5.w@free{s} S8.z {WL}
|
||||
ALU ADD S11.x@free{s} : S6.x@free{s} S8.w {WL}
|
||||
ALU EXP_IEEE S12.y@free{s} : S19.y@free{s} + S19.y@free{s} + S19.y@free{s} {WL}
|
||||
ALU EXP_IEEE S13.z@free{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {WL}
|
||||
ALU EXP_IEEE S14.x@free{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {WL}
|
||||
ALU EXP_IEEE S15.y@free{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
|
||||
ALU MOV S17.x{s} : S12.y@free{s} {W}
|
||||
ALU MOV S17.y{s} : S13.z@free{s} {W}
|
||||
ALU MOV S17.z{s} : S14.x@free{s} {W}
|
||||
ALU MOV S17.w{s} : S15.y@free{s} {WL}
|
||||
ALU MOV S18.x@group{s} : S17.x{s} {W}
|
||||
ALU MOV S18.y@group{s} : S17.y{s} {W}
|
||||
ALU MOV S18.z@group{s} : S17.z{s} {W}
|
||||
ALU MOV S18.w@group{s} : S17.w{s} {WL}
|
||||
EXPORT_DONE PARAM 0 S18.xyzw
|
||||
BLOCK_END)";
|
||||
|
||||
const char *expect =
|
||||
R"(VS
|
||||
CHIPCLASS CAYMAN
|
||||
INPUT LOC:0 NAME:15
|
||||
OUTPUT LOC:0 NAME:0 MASK:15
|
||||
OUTPUT LOC:1 NAME:5 MASK:15 SID:9 SPI_SID:10
|
||||
SHADER
|
||||
BLOCK_START
|
||||
ALU ADD S19.y@free{s} : L[0x40c00000] |KC0[0].x| {WL}
|
||||
ALU ADD S9.z@free{s} : L[0xc1140000] KC0[0].y {WL}
|
||||
ALU ADD S10.w@free{s} : L[0xbfe00000] |KC0[0].z| {WL}
|
||||
ALU ADD S11.x@free{s} : L[0x3fa00000] -|KC0[0].x| {WL}
|
||||
ALU EXP_IEEE S18.x@group{s} : S19.y@free{s} + S19.y@free{s} + S19.y@free{s} {W}
|
||||
ALU EXP_IEEE S18.y@group{s} : S9.z@free{s} + S9.z@free{s} + S9.z@free{s} {W}
|
||||
ALU EXP_IEEE S18.z@group{s} : S10.w@free{s} + S10.w@free{s} + S10.w@free{s} {W}
|
||||
ALU EXP_IEEE S18.w@group{s} : S11.x@free{s} + S11.x@free{s} + S11.x@free{s} + S11.x@free{s} {WL}
|
||||
EXPORT_DONE PARAM 0 S18.xyzw
|
||||
BLOCK_END
|
||||
)";
|
||||
auto sh = from_string(input);
|
||||
optimize(*sh);
|
||||
check(sh, expect);
|
||||
};
|
||||
|
||||
TEST_F(TestShaderFromNir, PeeholeSoureModsClamp)
|
||||
{
|
||||
const char *input =
|
||||
R"(VS
|
||||
CHIPCLASS CAYMAN
|
||||
INPUT LOC:0 NAME:15
|
||||
OUTPUT LOC:0 NAME:0 MASK:15
|
||||
SHADER
|
||||
BLOCK_START
|
||||
ALU MOV S1.x{s} : |KC0[0].x| {W}
|
||||
ALU MOV S2.y{s} : -KC0[0].y {W}
|
||||
ALU ADD S3.x : S1.x S2.y {W}
|
||||
ALU MOV CLAMP S4.x : S3.x {W}
|
||||
EXPORT_DONE PARAM 0 S4.xxxx
|
||||
BLOCK_END)";
|
||||
|
||||
const char *expect =
|
||||
R"(VS
|
||||
CHIPCLASS CAYMAN
|
||||
INPUT LOC:0 NAME:15
|
||||
OUTPUT LOC:0 NAME:0 MASK:15
|
||||
SHADER
|
||||
BLOCK_START
|
||||
ALU ADD CLAMP S3.x : |KC0[0].x| -KC0[0].y {W}
|
||||
EXPORT_DONE PARAM 0 S3.xxxx
|
||||
BLOCK_END
|
||||
)";
|
||||
auto sh = from_string(input);
|
||||
optimize(*sh);
|
||||
check(sh, expect);
|
||||
};
|
||||
|
||||
TEST_F(TestShaderFromNir, PeeholeSoureModsMuliSlot)
|
||||
{
|
||||
const char *input =
|
||||
R"(VS
|
||||
CHIPCLASS CAYMAN
|
||||
INPUT LOC:0 NAME:15
|
||||
OUTPUT LOC:0 NAME:0 MASK:15
|
||||
REGISTERS R1.xyzw
|
||||
SHADER
|
||||
BLOCK_START
|
||||
ALU MOV S1.x{s} : |KC0[0].x| {W}
|
||||
ALU MOV S1.y{s} : -KC0[0].y {W}
|
||||
ALU MOV S1.z{s} : |KC0[0].z| {W}
|
||||
ALU MOV S1.w{s} : KC0[0].w {W}
|
||||
ALU MOV S2.x{s} : |R1.x| {W}
|
||||
ALU MOV S2.y{s} : R1.y {W}
|
||||
ALU MOV S2.z{s} : -R1.z {W}
|
||||
ALU MOV S2.w{s} : -R1.w {W}
|
||||
ALU DOT4 S5.x : S1.x S2.x + S1.y S2.y + S1.z S2.z + S1.w S2.w {W}
|
||||
EXPORT_DONE PARAM 0 S5.xxxx
|
||||
BLOCK_END)";
|
||||
|
||||
const char *expect =
|
||||
R"(VS
|
||||
CHIPCLASS CAYMAN
|
||||
INPUT LOC:0 NAME:15
|
||||
OUTPUT LOC:0 NAME:0 MASK:15
|
||||
REGISTERS R1.xyzw
|
||||
SHADER
|
||||
BLOCK_START
|
||||
ALU DOT4 S5.x : |KC0[0].x| |R1.x| + -KC0[0].y R1.y + |KC0[0].z| -R1.z + KC0[0].w -R1.w {W}
|
||||
EXPORT_DONE PARAM 0 S5.xxxx
|
||||
BLOCK_END
|
||||
)";
|
||||
auto sh = from_string(input);
|
||||
optimize(*sh);
|
||||
check(sh, expect);
|
||||
};
|
||||
|
||||
|
||||
TEST_F(TestShaderFromNir, OptimizeIntoGroup)
|
||||
{
|
||||
const char *input =
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue