r600/sfn: Copy propagate into TEX source

This is possible if all register values are actually from the same
register ID.

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18619>
This commit is contained in:
Gert Wollny 2022-09-07 08:22:04 +02:00 committed by Marge Bot
parent 5416d1bc4b
commit c0b6c59e08
3 changed files with 82 additions and 21 deletions

View file

@ -376,6 +376,7 @@ bool TexInstr::replace_source(PRegister old_src, PVirtualValue new_src)
success = true;
}
}
m_src.validate();
if (success) {
old_src->del_use(this);
new_src->as_register()->add_use(this);

View file

@ -244,7 +244,7 @@ public:
void visit(AluInstr *instr) override;
void visit(AluGroup *instr) override;
void visit(TexInstr *instr) override;
void visit(ExportInstr *instr) override {(void)instr;}
void visit(ExportInstr *instr) override;
void visit(FetchInstr *instr) override;
void visit(Block *instr) override;
void visit(ControlFlowInstr *instr) override {(void)instr;}
@ -261,6 +261,8 @@ public:
void visit(LDSAtomicInstr *instr) override {(void)instr;};
void visit(LDSReadInstr *instr) override {(void)instr;};
void propagate_to(RegisterVec4& src, Instr *instr);
bool progress;
};
@ -386,7 +388,69 @@ void CopyPropFwdVisitor::visit(AluGroup *instr)
void CopyPropFwdVisitor::visit(TexInstr *instr)
{
(void)instr;
propagate_to(instr->src(), instr);
}
void CopyPropFwdVisitor::visit(ExportInstr *instr)
{
propagate_to(instr->value(), instr);
}
void CopyPropFwdVisitor::propagate_to(RegisterVec4& src, Instr *instr)
{
AluInstr *parents[4] = {nullptr};
for (int i = 0; i < 4; ++i) {
if (src[i]->chan() < 4 && src[i]->is_ssa()) {
/* We have a pre-define value, so we can't propagate a copy */
if (src[i]->parents().empty())
return;
assert(src[i]->parents().size() == 1);
parents[i] = (*src[i]->parents().begin())->as_alu();
}
}
PRegister new_src[4] = {0};
int sel = -1;
for (int i = 0; i < 4; ++i) {
if (!parents[i])
continue;
if ((parents[i]->opcode() != op1_mov) ||
parents[i]->has_alu_flag(alu_src0_neg) ||
parents[i]->has_alu_flag(alu_src0_abs) ||
parents[i]->has_alu_flag(alu_dst_clamp) ||
parents[i]->has_alu_flag(alu_src0_rel)) {
return;
} else {
auto src = parents[i]->src(0).as_register();
if (!src)
return;
else if (!src->is_ssa())
return;
else if (sel < 0)
sel = src->sel();
else if (sel != src->sel())
return;
new_src[i] = src;
}
}
for (int i = 0; i < 4; ++i) {
if (parents[i]) {
src.del_use(instr);
src.set_value(i, new_src[i]);
if (new_src[i]->pin() != pin_fully) {
if (new_src[i]->pin() == pin_chan)
new_src[i]->set_pin(pin_chgr);
else
new_src[i]->set_pin(pin_group);
}
src.add_use(instr);
progress |= true;
}
}
if (progress)
src.validate();
}
void CopyPropFwdVisitor::visit(FetchInstr *instr)

View file

@ -504,13 +504,13 @@ ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210
ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU FLT_TO_INT S1028.x@group : S1025.x@chan {W}
ALU FLT_TO_INT S1028.y@group : S1025.y@chan {W}
ALU FLT_TO_INT S1028.w@group : S1025.w@chan {WL}
ALU FLT_TO_INT S1026.x@group : S1025.x@chan {W}
ALU FLT_TO_INT S1026.y@group : S1025.y@chan {W}
ALU FLT_TO_INT S1026.z@group : S1025.w@chan {WL}
ALU_GROUP_END
BLOCK_END
BLOCK_START
TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN
TEX LD S1029.xyzw : S1026.xy_z RID:0 SID:18 NNNN
BLOCK_END
BLOCK_START
EXPORT_DONE PIXEL 0 S1029.xyzw
@ -540,10 +540,10 @@ ALU INTERP_XY S1025.y@chan : R0.x@fully Param0.y VEC_210 {W}
ALU INTERP_XY __.z@chan : R0.y@fully Param0.z VEC_210 {}
ALU INTERP_XY __.w@chan : R0.x@fully Param0.w VEC_210 {L}
ALU_GROUP_END
ALU FLT_TO_INT S1028.x@group : S1025.x@chan {W}
ALU FLT_TO_INT S1028.y@group : S1025.y@chan {W}
ALU FLT_TO_INT S1028.w@group : S1025.w@chan {WL}
TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN
ALU FLT_TO_INT S1026.x@group : S1025.x@chan {W}
ALU FLT_TO_INT S1026.y@group : S1025.y@chan {W}
ALU FLT_TO_INT S1026.z@group : S1025.w@chan {WL}
TEX LD S1029.xyzw : S1026.xy_z RID:0 SID:18 NNNN
EXPORT_DONE PIXEL 0 S1029.xyzw
)";
@ -585,12 +585,8 @@ ALU MOV S2.x : KC0[1].x {W}
ALU MOV S2.y : KC0[1].y {W}
ALU MOV S2.z : KC0[1].z {W}
ALU MOV S2.w : KC0[1].w {WL}
ALU DOT4_IEEE S3.x@free : KC0[0].x S2.x + KC0[0].y S2.y + KC0[0].z S2.z + KC0[0].w S2.w {WL}
ALU MOV S4.x : S3.x@free {W}
ALU MOV S4.y : S3.x@free {W}
ALU MOV S4.z : S3.x@free {W}
ALU MOV S4.w : S3.x@free {W}
EXPORT_DONE PIXEL 0 S4.xyzw
ALU DOT4_IEEE S3.x@group : KC0[0].x S2.x + KC0[0].y S2.y + KC0[0].z S2.z + KC0[0].w S2.w {WL}
EXPORT_DONE PIXEL 0 S3.xxxx
)";
const char *glxgears_vs2_nir =
@ -2188,16 +2184,16 @@ SHADER
ALU_GROUP_BEGIN
ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210
ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210
ALU INTERP_ZW S2.z@chgr : R0.y@fully Param0.z {W} VEC_210
ALU INTERP_ZW S2.w@chgr : R0.x@fully Param0.w {WL} VEC_210
ALU INTERP_ZW S1.z@chgr : R0.y@fully Param0.z {W} VEC_210
ALU INTERP_ZW S1.w@chgr : R0.x@fully Param0.w {WL} VEC_210
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU INTERP_XY S2.x@chgr : R0.y@fully Param0.x {W} VEC_210
ALU INTERP_XY S2.y@chgr : R0.x@fully Param0.y {W} VEC_210
ALU INTERP_XY S1.x@chgr : R0.y@fully Param0.x {W} VEC_210
ALU INTERP_XY S1.y@chgr : R0.x@fully Param0.y {W} VEC_210
ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210
ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210
ALU_GROUP_END
EXPORT_DONE PIXEL 0 S2.xyzw
EXPORT_DONE PIXEL 0 S1.xyzw
)";
const char *shader_group_chan_pin_combined_sheduled =