gm107/ir: use CS2R for SV_CLOCK

This instruction seems to be faster than S2R and requires no barrier,
though the range of special registers it can read from is limited.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Karol Herbst <kherbst@redhat.com>
This commit is contained in:
Rhys Perry 2018-07-19 16:58:46 +01:00 committed by Karol Herbst
parent 94cf964586
commit 3b6edd0b59
3 changed files with 25 additions and 2 deletions

View file

@ -124,6 +124,7 @@ private:
void emitMOV();
void emitS2R();
void emitCS2R();
void emitF2F();
void emitF2I();
void emitI2F();
@ -749,6 +750,14 @@ CodeEmitterGM107::emitS2R()
emitGPR (0x00, insn->def(0));
}
void
CodeEmitterGM107::emitCS2R()
{
emitInsn(0x50c80000);
emitSYS (0x14, insn->src(0));
emitGPR (0x00, insn->def(0));
}
void
CodeEmitterGM107::emitF2F()
{
@ -3192,7 +3201,10 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
emitMOV();
break;
case OP_RDSV:
emitS2R();
if (targGM107->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))
emitCS2R();
else
emitS2R();
break;
case OP_ABS:
case OP_NEG:

View file

@ -153,9 +153,10 @@ TargetGM107::isBarrierRequired(const Instruction *insn) const
case OP_AFETCH:
case OP_PFETCH:
case OP_PIXLD:
case OP_RDSV:
case OP_SHFL:
return true;
case OP_RDSV:
return !isCS2RSV(insn->getSrc(0)->reg.data.sv.sv);
default:
break;
}
@ -232,6 +233,8 @@ TargetGM107::getLatency(const Instruction *insn) const
if (insn->dType != TYPE_F64)
return 6;
break;
case OP_RDSV:
return isCS2RSV(insn->getSrc(0)->reg.data.sv.sv) ? 6 : 15;
case OP_ABS:
case OP_CEIL:
case OP_CVT:
@ -321,6 +324,12 @@ TargetGM107::getReadLatency(const Instruction *insn) const
return 0;
}
bool
TargetGM107::isCS2RSV(SVSemantic sv) const
{
return sv == SV_CLOCK;
}
bool
TargetGM107::runLegalizePass(Program *prog, CGStage stage) const
{

View file

@ -23,6 +23,8 @@ public:
virtual bool canDualIssue(const Instruction *, const Instruction *) const;
virtual int getLatency(const Instruction *) const;
virtual int getReadLatency(const Instruction *) const;
virtual bool isCS2RSV(SVSemantic) const;
};
} // namespace nv50_ir