mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 02:48:06 +02:00
gk110/ir: Use the new rcp/rsq in library
v2: (Karol Herbst <kherbst@redhat.com>
* fix Value setup for the builtins
Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
[imirkin: track the fp64 flag when switching ops to calls]
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: 19.0 <mesa-stable@lists.freedesktop.org>
(cherry picked from commit 656ad06051)
This commit is contained in:
parent
c5b9774eb4
commit
81810fa5db
5 changed files with 42 additions and 0 deletions
|
|
@ -1119,6 +1119,7 @@ Program::Program(Type type, Target *arch)
|
|||
binSize = 0;
|
||||
|
||||
maxGPR = -1;
|
||||
fp64 = false;
|
||||
|
||||
main = new Function(this, "MAIN", ~0);
|
||||
calls.insert(&main->call);
|
||||
|
|
|
|||
|
|
@ -1311,6 +1311,7 @@ public:
|
|||
uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL
|
||||
|
||||
int maxGPR;
|
||||
bool fp64;
|
||||
|
||||
MemoryPool mem_Instruction;
|
||||
MemoryPool mem_CmpInstruction;
|
||||
|
|
|
|||
|
|
@ -83,6 +83,38 @@ NVC0LegalizeSSA::handleDIV(Instruction *i)
|
|||
delete_Instruction(prog, i);
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LegalizeSSA::handleRCPRSQLib(Instruction *i, Value *src[])
|
||||
{
|
||||
FlowInstruction *call;
|
||||
Value *def[2];
|
||||
int builtin;
|
||||
|
||||
def[0] = bld.mkMovToReg(0, src[0])->getDef(0);
|
||||
def[1] = bld.mkMovToReg(1, src[1])->getDef(0);
|
||||
|
||||
if (i->op == OP_RCP)
|
||||
builtin = NVC0_BUILTIN_RCP_F64;
|
||||
else
|
||||
builtin = NVC0_BUILTIN_RSQ_F64;
|
||||
|
||||
call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL);
|
||||
def[0] = bld.getSSA();
|
||||
def[1] = bld.getSSA();
|
||||
bld.mkMovFromReg(def[0], 0);
|
||||
bld.mkMovFromReg(def[1], 1);
|
||||
bld.mkClobber(FILE_GPR, 0x3fc, 2);
|
||||
bld.mkClobber(FILE_PREDICATE, i->op == OP_RSQ ? 0x3 : 0x1, 0);
|
||||
bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), def[0], def[1]);
|
||||
|
||||
call->fixed = 1;
|
||||
call->absolute = call->builtin = 1;
|
||||
call->target.builtin = builtin;
|
||||
delete_Instruction(prog, i);
|
||||
|
||||
prog->fp64 = true;
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
|
||||
{
|
||||
|
|
@ -96,6 +128,12 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
|
|||
Value *src[2], *dst[2], *def = i->getDef(0);
|
||||
bld.mkSplit(src, 4, i->getSrc(0));
|
||||
|
||||
int chip = prog->getTarget()->getChipset();
|
||||
if (chip >= NVISA_GK20A_CHIPSET && chip < NVISA_GM107_CHIPSET) {
|
||||
handleRCPRSQLib(i, src);
|
||||
return;
|
||||
}
|
||||
|
||||
// 2. We don't care about the low 32 bits of the destination. Stick a 0 in.
|
||||
dst[0] = bld.loadImm(NULL, 0);
|
||||
dst[1] = bld.getSSA();
|
||||
|
|
|
|||
|
|
@ -62,6 +62,7 @@ private:
|
|||
|
||||
// we want to insert calls to the builtin library only after optimization
|
||||
void handleDIV(Instruction *); // integer division, modulus
|
||||
void handleRCPRSQLib(Instruction *, Value *[]);
|
||||
void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
|
||||
void handleFTZ(Instruction *);
|
||||
void handleSET(CmpInstruction *);
|
||||
|
|
|
|||
|
|
@ -399,6 +399,7 @@ Program::emitBinary(struct nv50_ir_prog_info *info)
|
|||
}
|
||||
}
|
||||
}
|
||||
info->io.fp64 |= fp64;
|
||||
info->bin.relocData = emit->getRelocInfo();
|
||||
info->bin.fixupData = emit->getFixupInfo();
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue