gk110/ir: Use the new rcp/rsq in library

v2: (Karol Herbst <kherbst@redhat.com>
 * fix Value setup for the builtins

Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
[imirkin: track the fp64 flag when switching ops to calls]
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: 19.0 <mesa-stable@lists.freedesktop.org>
(cherry picked from commit 656ad06051)
This commit is contained in:
Boyan Ding 2017-03-09 13:55:19 +08:00 committed by Dylan Baker
parent c5b9774eb4
commit 81810fa5db
5 changed files with 42 additions and 0 deletions

View file

@ -1119,6 +1119,7 @@ Program::Program(Type type, Target *arch)
binSize = 0;
maxGPR = -1;
fp64 = false;
main = new Function(this, "MAIN", ~0);
calls.insert(&main->call);

View file

@ -1311,6 +1311,7 @@ public:
uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL
int maxGPR;
bool fp64;
MemoryPool mem_Instruction;
MemoryPool mem_CmpInstruction;

View file

@ -83,6 +83,38 @@ NVC0LegalizeSSA::handleDIV(Instruction *i)
delete_Instruction(prog, i);
}
void
NVC0LegalizeSSA::handleRCPRSQLib(Instruction *i, Value *src[])
{
FlowInstruction *call;
Value *def[2];
int builtin;
def[0] = bld.mkMovToReg(0, src[0])->getDef(0);
def[1] = bld.mkMovToReg(1, src[1])->getDef(0);
if (i->op == OP_RCP)
builtin = NVC0_BUILTIN_RCP_F64;
else
builtin = NVC0_BUILTIN_RSQ_F64;
call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL);
def[0] = bld.getSSA();
def[1] = bld.getSSA();
bld.mkMovFromReg(def[0], 0);
bld.mkMovFromReg(def[1], 1);
bld.mkClobber(FILE_GPR, 0x3fc, 2);
bld.mkClobber(FILE_PREDICATE, i->op == OP_RSQ ? 0x3 : 0x1, 0);
bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), def[0], def[1]);
call->fixed = 1;
call->absolute = call->builtin = 1;
call->target.builtin = builtin;
delete_Instruction(prog, i);
prog->fp64 = true;
}
void
NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
{
@ -96,6 +128,12 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
Value *src[2], *dst[2], *def = i->getDef(0);
bld.mkSplit(src, 4, i->getSrc(0));
int chip = prog->getTarget()->getChipset();
if (chip >= NVISA_GK20A_CHIPSET && chip < NVISA_GM107_CHIPSET) {
handleRCPRSQLib(i, src);
return;
}
// 2. We don't care about the low 32 bits of the destination. Stick a 0 in.
dst[0] = bld.loadImm(NULL, 0);
dst[1] = bld.getSSA();

View file

@ -62,6 +62,7 @@ private:
// we want to insert calls to the builtin library only after optimization
void handleDIV(Instruction *); // integer division, modulus
void handleRCPRSQLib(Instruction *, Value *[]);
void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
void handleFTZ(Instruction *);
void handleSET(CmpInstruction *);

View file

@ -399,6 +399,7 @@ Program::emitBinary(struct nv50_ir_prog_info *info)
}
}
}
info->io.fp64 |= fp64;
info->bin.relocData = emit->getRelocInfo();
info->bin.fixupData = emit->getFixupInfo();