mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-30 10:00:14 +01:00
r600/sfn: Copy propagate into TEX source
This is possible if all register values are actually from the same register ID. Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18619>
This commit is contained in:
parent
5416d1bc4b
commit
c0b6c59e08
3 changed files with 82 additions and 21 deletions
|
|
@ -376,6 +376,7 @@ bool TexInstr::replace_source(PRegister old_src, PVirtualValue new_src)
|
|||
success = true;
|
||||
}
|
||||
}
|
||||
m_src.validate();
|
||||
if (success) {
|
||||
old_src->del_use(this);
|
||||
new_src->as_register()->add_use(this);
|
||||
|
|
|
|||
|
|
@ -244,7 +244,7 @@ public:
|
|||
void visit(AluInstr *instr) override;
|
||||
void visit(AluGroup *instr) override;
|
||||
void visit(TexInstr *instr) override;
|
||||
void visit(ExportInstr *instr) override {(void)instr;}
|
||||
void visit(ExportInstr *instr) override;
|
||||
void visit(FetchInstr *instr) override;
|
||||
void visit(Block *instr) override;
|
||||
void visit(ControlFlowInstr *instr) override {(void)instr;}
|
||||
|
|
@ -261,6 +261,8 @@ public:
|
|||
void visit(LDSAtomicInstr *instr) override {(void)instr;};
|
||||
void visit(LDSReadInstr *instr) override {(void)instr;};
|
||||
|
||||
void propagate_to(RegisterVec4& src, Instr *instr);
|
||||
|
||||
bool progress;
|
||||
};
|
||||
|
||||
|
|
@ -386,7 +388,69 @@ void CopyPropFwdVisitor::visit(AluGroup *instr)
|
|||
|
||||
void CopyPropFwdVisitor::visit(TexInstr *instr)
|
||||
{
|
||||
(void)instr;
|
||||
propagate_to(instr->src(), instr);
|
||||
}
|
||||
|
||||
void CopyPropFwdVisitor::visit(ExportInstr *instr)
|
||||
{
|
||||
propagate_to(instr->value(), instr);
|
||||
}
|
||||
|
||||
void CopyPropFwdVisitor::propagate_to(RegisterVec4& src, Instr *instr)
|
||||
{
|
||||
AluInstr *parents[4] = {nullptr};
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (src[i]->chan() < 4 && src[i]->is_ssa()) {
|
||||
/* We have a pre-define value, so we can't propagate a copy */
|
||||
if (src[i]->parents().empty())
|
||||
return;
|
||||
|
||||
assert(src[i]->parents().size() == 1);
|
||||
parents[i] = (*src[i]->parents().begin())->as_alu();
|
||||
}
|
||||
}
|
||||
PRegister new_src[4] = {0};
|
||||
|
||||
int sel = -1;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!parents[i])
|
||||
continue;
|
||||
if ((parents[i]->opcode() != op1_mov) ||
|
||||
parents[i]->has_alu_flag(alu_src0_neg) ||
|
||||
parents[i]->has_alu_flag(alu_src0_abs) ||
|
||||
parents[i]->has_alu_flag(alu_dst_clamp) ||
|
||||
parents[i]->has_alu_flag(alu_src0_rel)) {
|
||||
return;
|
||||
} else {
|
||||
auto src = parents[i]->src(0).as_register();
|
||||
if (!src)
|
||||
return;
|
||||
else if (!src->is_ssa())
|
||||
return;
|
||||
else if (sel < 0)
|
||||
sel = src->sel();
|
||||
else if (sel != src->sel())
|
||||
return;
|
||||
new_src[i] = src;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (parents[i]) {
|
||||
src.del_use(instr);
|
||||
src.set_value(i, new_src[i]);
|
||||
if (new_src[i]->pin() != pin_fully) {
|
||||
if (new_src[i]->pin() == pin_chan)
|
||||
new_src[i]->set_pin(pin_chgr);
|
||||
else
|
||||
new_src[i]->set_pin(pin_group);
|
||||
}
|
||||
src.add_use(instr);
|
||||
progress |= true;
|
||||
}
|
||||
}
|
||||
if (progress)
|
||||
src.validate();
|
||||
}
|
||||
|
||||
void CopyPropFwdVisitor::visit(FetchInstr *instr)
|
||||
|
|
|
|||
|
|
@ -504,13 +504,13 @@ ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210
|
|||
ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210
|
||||
ALU_GROUP_END
|
||||
ALU_GROUP_BEGIN
|
||||
ALU FLT_TO_INT S1028.x@group : S1025.x@chan {W}
|
||||
ALU FLT_TO_INT S1028.y@group : S1025.y@chan {W}
|
||||
ALU FLT_TO_INT S1028.w@group : S1025.w@chan {WL}
|
||||
ALU FLT_TO_INT S1026.x@group : S1025.x@chan {W}
|
||||
ALU FLT_TO_INT S1026.y@group : S1025.y@chan {W}
|
||||
ALU FLT_TO_INT S1026.z@group : S1025.w@chan {WL}
|
||||
ALU_GROUP_END
|
||||
BLOCK_END
|
||||
BLOCK_START
|
||||
TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN
|
||||
TEX LD S1029.xyzw : S1026.xy_z RID:0 SID:18 NNNN
|
||||
BLOCK_END
|
||||
BLOCK_START
|
||||
EXPORT_DONE PIXEL 0 S1029.xyzw
|
||||
|
|
@ -540,10 +540,10 @@ ALU INTERP_XY S1025.y@chan : R0.x@fully Param0.y VEC_210 {W}
|
|||
ALU INTERP_XY __.z@chan : R0.y@fully Param0.z VEC_210 {}
|
||||
ALU INTERP_XY __.w@chan : R0.x@fully Param0.w VEC_210 {L}
|
||||
ALU_GROUP_END
|
||||
ALU FLT_TO_INT S1028.x@group : S1025.x@chan {W}
|
||||
ALU FLT_TO_INT S1028.y@group : S1025.y@chan {W}
|
||||
ALU FLT_TO_INT S1028.w@group : S1025.w@chan {WL}
|
||||
TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN
|
||||
ALU FLT_TO_INT S1026.x@group : S1025.x@chan {W}
|
||||
ALU FLT_TO_INT S1026.y@group : S1025.y@chan {W}
|
||||
ALU FLT_TO_INT S1026.z@group : S1025.w@chan {WL}
|
||||
TEX LD S1029.xyzw : S1026.xy_z RID:0 SID:18 NNNN
|
||||
EXPORT_DONE PIXEL 0 S1029.xyzw
|
||||
)";
|
||||
|
||||
|
|
@ -585,12 +585,8 @@ ALU MOV S2.x : KC0[1].x {W}
|
|||
ALU MOV S2.y : KC0[1].y {W}
|
||||
ALU MOV S2.z : KC0[1].z {W}
|
||||
ALU MOV S2.w : KC0[1].w {WL}
|
||||
ALU DOT4_IEEE S3.x@free : KC0[0].x S2.x + KC0[0].y S2.y + KC0[0].z S2.z + KC0[0].w S2.w {WL}
|
||||
ALU MOV S4.x : S3.x@free {W}
|
||||
ALU MOV S4.y : S3.x@free {W}
|
||||
ALU MOV S4.z : S3.x@free {W}
|
||||
ALU MOV S4.w : S3.x@free {W}
|
||||
EXPORT_DONE PIXEL 0 S4.xyzw
|
||||
ALU DOT4_IEEE S3.x@group : KC0[0].x S2.x + KC0[0].y S2.y + KC0[0].z S2.z + KC0[0].w S2.w {WL}
|
||||
EXPORT_DONE PIXEL 0 S3.xxxx
|
||||
)";
|
||||
|
||||
const char *glxgears_vs2_nir =
|
||||
|
|
@ -2188,16 +2184,16 @@ SHADER
|
|||
ALU_GROUP_BEGIN
|
||||
ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210
|
||||
ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210
|
||||
ALU INTERP_ZW S2.z@chgr : R0.y@fully Param0.z {W} VEC_210
|
||||
ALU INTERP_ZW S2.w@chgr : R0.x@fully Param0.w {WL} VEC_210
|
||||
ALU INTERP_ZW S1.z@chgr : R0.y@fully Param0.z {W} VEC_210
|
||||
ALU INTERP_ZW S1.w@chgr : R0.x@fully Param0.w {WL} VEC_210
|
||||
ALU_GROUP_END
|
||||
ALU_GROUP_BEGIN
|
||||
ALU INTERP_XY S2.x@chgr : R0.y@fully Param0.x {W} VEC_210
|
||||
ALU INTERP_XY S2.y@chgr : R0.x@fully Param0.y {W} VEC_210
|
||||
ALU INTERP_XY S1.x@chgr : R0.y@fully Param0.x {W} VEC_210
|
||||
ALU INTERP_XY S1.y@chgr : R0.x@fully Param0.y {W} VEC_210
|
||||
ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210
|
||||
ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210
|
||||
ALU_GROUP_END
|
||||
EXPORT_DONE PIXEL 0 S2.xyzw
|
||||
EXPORT_DONE PIXEL 0 S1.xyzw
|
||||
)";
|
||||
|
||||
const char *shader_group_chan_pin_combined_sheduled =
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue