nv50/ir: import new shader backend code

This commit is contained in:
Christoph Bumiller 2011-09-14 16:18:23 +02:00
parent a42eca84c5
commit 57594065c3
28 changed files with 16435 additions and 2 deletions

View file

@ -3,7 +3,7 @@ include $(TOP)/configs/current
LIBNAME = nv50
# get C_SOURCES
# get C/CPP_SOURCES
include Makefile.sources
LIBRARY_INCLUDES = \

View file

@ -21,3 +21,17 @@ C_SOURCES := \
nv50_pc_regalloc.c \
nv50_push.c \
nv50_query.c
CPP_SOURCES := \
codegen/nv50_ir.cpp \
codegen/nv50_ir_bb.cpp \
codegen/nv50_ir_build_util.cpp \
codegen/nv50_ir_emit_nv50.cpp \
codegen/nv50_ir_from_tgsi.cpp \
codegen/nv50_ir_graph.cpp \
codegen/nv50_ir_peephole.cpp \
codegen/nv50_ir_print.cpp \
codegen/nv50_ir_ra.cpp \
codegen/nv50_ir_ssa.cpp \
codegen/nv50_ir_target.cpp \
codegen/nv50_ir_util.cpp

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,409 @@
#include "nv50_ir.h"
namespace nv50_ir {
Function::Function(Program *p, const char *fnName)
: call(this),
name(fnName),
prog(p)
{
cfgExit = NULL;
domTree = NULL;
bbArray = NULL;
bbCount = 0;
loopNestingBound = 0;
regClobberMax = 0;
binPos = 0;
binSize = 0;
prog->add(this, id);
}
Function::~Function()
{
if (domTree)
delete domTree;
if (bbArray)
delete[] bbArray;
for (ArrayList::Iterator BBs = allBBlocks.iterator(); !BBs.end(); BBs.next())
delete reinterpret_cast<BasicBlock *>(BBs.get());
}
BasicBlock::BasicBlock(Function *fn) : cfg(this), dom(this), func(fn)
{
program = func->getProgram();
joinAt = phi = entry = exit = NULL;
numInsns = 0;
binPos = 0;
binSize = 0;
explicitCont = false;
func->add(this, this->id);
}
BasicBlock::~BasicBlock()
{
// nothing yet
}
BasicBlock *
BasicBlock::idom() const
{
Graph::Node *dn = dom.parent();
return dn ? BasicBlock::get(dn) : NULL;
}
void
BasicBlock::insertHead(Instruction *inst)
{
assert(inst->next == 0 && inst->prev == 0);
if (inst->op == OP_PHI) {
if (phi) {
insertBefore(phi, inst);
} else {
if (entry) {
insertBefore(entry, phi);
} else {
assert(!exit);
phi = exit = inst;
inst->bb = this;
++numInsns;
}
}
} else {
if (entry) {
insertBefore(entry, inst);
} else {
if (phi) {
insertAfter(phi, inst);
} else {
assert(!exit);
entry = exit = inst;
inst->bb = this;
++numInsns;
}
}
}
}
void
BasicBlock::insertTail(Instruction *inst)
{
assert(inst->next == 0 && inst->prev == 0);
if (inst->op == OP_PHI) {
if (entry) {
insertBefore(entry, inst);
} else
if (exit) {
assert(phi);
insertAfter(exit, inst);
} else {
assert(!phi);
phi = exit = inst;
inst->bb = this;
++numInsns;
}
} else {
if (exit) {
insertAfter(exit, inst);
} else {
assert(!phi);
entry = exit = inst;
inst->bb = this;
++numInsns;
}
}
}
void
BasicBlock::insertBefore(Instruction *q, Instruction *p)
{
assert(p && q);
assert(p->next == 0 && p->prev == 0);
if (q == entry) {
if (p->op == OP_PHI) {
if (!phi)
phi = p;
} else {
entry = p;
}
} else
if (q == phi) {
assert(p->op == OP_PHI);
phi = p;
}
p->next = q;
p->prev = q->prev;
if (p->prev)
p->prev->next = p;
q->prev = p;
p->bb = this;
++numInsns;
}
void
BasicBlock::insertAfter(Instruction *p, Instruction *q)
{
assert(p && q);
assert(q->op != OP_PHI || p->op == OP_PHI);
assert(q->next == 0 && q->prev == 0);
if (p == exit)
exit = q;
if (p->op == OP_PHI && q->op != OP_PHI)
entry = q;
q->prev = p;
q->next = p->next;
if (q->next)
q->next->prev = q;
p->next = q;
q->bb = this;
++numInsns;
}
void
BasicBlock::remove(Instruction *insn)
{
assert(insn->bb == this);
if (insn->prev)
insn->prev->next = insn->next;
if (insn->next)
insn->next->prev = insn->prev;
else
exit = insn->prev;
if (insn == entry)
entry = insn->next ? insn->next : insn->prev;
if (insn == phi)
phi = (insn->next && insn->next->op == OP_PHI) ? insn->next : 0;
--numInsns;
insn->bb = NULL;
insn->next =
insn->prev = NULL;
}
void BasicBlock::permuteAdjacent(Instruction *a, Instruction *b)
{
assert(a->bb == b->bb);
if (a->next != b) {
Instruction *i = a;
a = b;
b = i;
}
assert(a->next == b);
assert(a->op != OP_PHI && b->op != OP_PHI);
if (b == exit)
exit = a;
if (a == entry)
entry = b;
b->prev = a->prev;
a->next = b->next;
b->next = a;
a->prev = b;
if (b->prev)
b->prev->next = b;
if (a->prev)
a->next->prev = a;
}
bool
BasicBlock::dominatedBy(BasicBlock *that)
{
Graph::Node *bn = &that->dom;
Graph::Node *dn = &this->dom;
while (dn && dn != bn)
dn = dn->parent();
return dn != NULL;
}
unsigned int
BasicBlock::initiatesSimpleConditional() const
{
Graph::Node *out[2];
int n;
Graph::Edge::Type eR;
if (cfg.outgoingCount() != 2) // -> if and -> else/endif
return false;
n = 0;
for (Graph::EdgeIterator ei = cfg.outgoing(); !ei.end(); ei.next())
out[n++] = ei.getNode();
eR = out[1]->outgoing().getType();
// IF block is out edge to the right
if (eR == Graph::Edge::CROSS || eR == Graph::Edge::BACK)
return 0x2;
if (out[1]->outgoingCount() != 1) // 0 is IF { RET; }, >1 is more divergence
return 0x0;
// do they reconverge immediately ?
if (out[1]->outgoing().getNode() == out[0])
return 0x1;
if (out[0]->outgoingCount() == 1)
if (out[0]->outgoing().getNode() == out[1]->outgoing().getNode())
return 0x3;
return 0x0;
}
bool
Function::setEntry(BasicBlock *bb)
{
if (cfg.getRoot())
return false;
cfg.insert(&bb->cfg);
return true;
}
bool
Function::setExit(BasicBlock *bb)
{
if (cfgExit)
return false;
cfgExit = &bb->cfg;
return true;
}
unsigned int
Function::orderInstructions(ArrayList &result)
{
Iterator *iter;
for (iter = cfg.iteratorCFG(); !iter->end(); iter->next())
for (Instruction *insn = BasicBlock::get(*iter)->getFirst();
insn; insn = insn->next)
result.insert(insn, insn->serial);
cfg.putIterator(iter);
return result.getSize();
}
bool
Pass::run(Program *prog, bool ordered, bool skipPhi)
{
this->prog = prog;
err = false;
return doRun(prog, ordered, skipPhi);
}
bool
Pass::doRun(Program *prog, bool ordered, bool skipPhi)
{
for (ArrayList::Iterator fi = prog->allFuncs.iterator();
!fi.end(); fi.next()) {
Function *fn = reinterpret_cast<Function *>(fi.get());
if (!doRun(fn, ordered, skipPhi))
return false;
}
return !err;
}
bool
Pass::run(Function *func, bool ordered, bool skipPhi)
{
prog = func->getProgram();
err = false;
return doRun(func, ordered, skipPhi);
}
bool
Pass::doRun(Function *func, bool ordered, bool skipPhi)
{
Iterator *bbIter;
BasicBlock *bb;
Instruction *insn, *next;
this->func = func;
if (!visit(func))
return false;
bbIter = ordered ? func->cfg.iteratorCFG() : func->cfg.iteratorDFS();
for (; !bbIter->end(); bbIter->next()) {
bb = BasicBlock::get(reinterpret_cast<Graph::Node *>(bbIter->get()));
if (!visit(bb))
break;
for (insn = skipPhi ? bb->getEntry() : bb->getFirst(); insn != NULL;
insn = next) {
next = insn->next;
if (!visit(insn))
break;
}
}
func->cfg.putIterator(bbIter);
return !err;
}
void
Function::printCFGraph(const char *filePath)
{
FILE *out = fopen(filePath, "a");
if (!out) {
ERROR("failed to open file: %s\n", filePath);
return;
}
INFO("printing control flow graph to: %s\n", filePath);
fprintf(out, "digraph G {\n");
Iterator *iter;
for (iter = cfg.iteratorDFS(); !iter->end(); iter->next()) {
BasicBlock *bb = BasicBlock::get(
reinterpret_cast<Graph::Node *>(iter->get()));
int idA = bb->getId();
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
int idB = BasicBlock::get(ei.getNode())->getId();
switch (ei.getType()) {
case Graph::Edge::TREE:
fprintf(out, "\t%i -> %i;\n", idA, idB);
break;
case Graph::Edge::FORWARD:
fprintf(out, "\t%i -> %i [color=green];\n", idA, idB);
break;
case Graph::Edge::CROSS:
fprintf(out, "\t%i -> %i [color=red];\n", idA, idB);
break;
case Graph::Edge::BACK:
fprintf(out, "\t%i -> %i;\n", idA, idB);
break;
case Graph::Edge::DUMMY:
fprintf(out, "\t%i -> %i [style=dotted];\n", idA, idB);
break;
default:
assert(0);
break;
}
}
}
cfg.putIterator(iter);
fprintf(out, "}\n");
fclose(out);
}
} // namespace nv50_ir

View file

@ -0,0 +1,501 @@
#include "nv50_ir.h"
#include "nv50_ir_build_util.h"
namespace nv50_ir {
BuildUtil::BuildUtil()
{
prog = NULL;
func = NULL;
bb = NULL;
pos = NULL;
memset(imms, 0, sizeof(imms));
immCount = 0;
}
void
BuildUtil::addImmediate(ImmediateValue *imm)
{
if (immCount > (NV50_IR_BUILD_IMM_HT_SIZE * 3) / 4)
return;
unsigned int pos = u32Hash(imm->reg.data.u32);
while (imms[pos])
pos = (pos + 1) % NV50_IR_BUILD_IMM_HT_SIZE;
imms[pos] = imm;
immCount++;
}
Instruction *
BuildUtil::mkOp1(operation op, DataType ty, Value *dst, Value *src)
{
Instruction *insn = new_Instruction(func, op, ty);
insn->setDef(0, dst);
insn->setSrc(0, src);
insert(insn);
return insn;
}
Instruction *
BuildUtil::mkOp2(operation op, DataType ty, Value *dst,
Value *src0, Value *src1)
{
Instruction *insn = new_Instruction(func, op, ty);
insn->setDef(0, dst);
insn->setSrc(0, src0);
insn->setSrc(1, src1);
insert(insn);
return insn;
}
Instruction *
BuildUtil::mkOp3(operation op, DataType ty, Value *dst,
Value *src0, Value *src1, Value *src2)
{
Instruction *insn = new_Instruction(func, op, ty);
insn->setDef(0, dst);
insn->setSrc(0, src0);
insn->setSrc(1, src1);
insn->setSrc(2, src2);
insert(insn);
return insn;
}
LValue *
BuildUtil::mkLoad(DataType ty, Symbol *mem, Value *ptr)
{
Instruction *insn = new_Instruction(func, OP_LOAD, ty);
LValue *def = getScratch();
insn->setDef(0, def);
insn->setSrc(0, mem);
if (ptr)
insn->setIndirect(0, 0, ptr);
insert(insn);
return def;
}
Instruction *
BuildUtil::mkStore(operation op, DataType ty, Symbol *mem, Value *ptr,
Value *stVal)
{
Instruction *insn = new_Instruction(func, op, ty);
insn->setSrc(0, mem);
insn->setSrc(1, stVal);
if (ptr)
insn->setIndirect(0, 0, ptr);
insert(insn);
return insn;
}
Instruction *
BuildUtil::mkFetch(Value *dst, DataType ty, DataFile file, int32_t offset,
Value *attrRel, Value *primRel)
{
Symbol *sym = mkSymbol(file, 0, ty, offset);
Instruction *insn = mkOp1(OP_VFETCH, ty, dst, sym);
insn->setIndirect(0, 0, attrRel);
insn->setIndirect(0, 1, primRel);
// already inserted
return insn;
}
Instruction *
BuildUtil::mkMov(Value *dst, Value *src, DataType ty)
{
Instruction *insn = new_Instruction(func, OP_MOV, ty);
insn->setDef(0, dst);
insn->setSrc(0, src);
insert(insn);
return insn;
}
Instruction *
BuildUtil::mkMovToReg(int id, Value *src)
{
Instruction *insn = new_Instruction(func, OP_MOV, typeOfSize(src->reg.size));
insn->setDef(0, new_LValue(func, FILE_GPR));
insn->getDef(0)->reg.data.id = id;
insn->setSrc(0, src);
insert(insn);
return insn;
}
Instruction *
BuildUtil::mkMovFromReg(Value *dst, int id)
{
Instruction *insn = new_Instruction(func, OP_MOV, typeOfSize(dst->reg.size));
insn->setDef(0, dst);
insn->setSrc(0, new_LValue(func, FILE_GPR));
insn->getSrc(0)->reg.data.id = id;
insert(insn);
return insn;
}
Instruction *
BuildUtil::mkCvt(operation op,
DataType dstTy, Value *dst, DataType srcTy, Value *src)
{
Instruction *insn = new_Instruction(func, op, dstTy);
insn->setType(dstTy, srcTy);
insn->setDef(0, dst);
insn->setSrc(0, src);
insert(insn);
return insn;
}
Instruction *
BuildUtil::mkCmp(operation op, CondCode cc, DataType ty, Value *dst,
Value *src0, Value *src1, Value *src2)
{
CmpInstruction *insn = new_CmpInstruction(func, op);
insn->setType(dst->reg.file == FILE_PREDICATE ? TYPE_U8 : ty, ty);
insn->setCondition(cc);
insn->setDef(0, dst);
insn->setSrc(0, src0);
insn->setSrc(1, src1);
if (src2)
insn->setSrc(2, src2);
insert(insn);
return insn;
}
Instruction *
BuildUtil::mkTex(operation op, TexTarget targ, uint8_t tic, uint8_t tsc,
Value **def, Value **src)
{
TexInstruction *tex = new_TexInstruction(func, op);
for (int d = 0; d < 4 && def[d]; ++d)
tex->setDef(d, def[d]);
for (int s = 0; s < 4 && src[s]; ++s)
tex->setSrc(s, src[s]);
tex->setTexture(targ, tic, tsc);
return tex;
}
Instruction *
BuildUtil::mkQuadop(uint8_t q, Value *def, uint8_t l, Value *src0, Value *src1)
{
Instruction *quadop = mkOp2(OP_QUADOP, TYPE_F32, def, src0, src1);
quadop->subOp = q;
quadop->lanes = l;
return quadop;
}
Instruction *
BuildUtil::mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc)
{
Instruction *insn;
LValue *def0 = getSSA();
LValue *def1 = getSSA();
mkMov(def0, trSrc)->setPredicate(CC_P, pred);
mkMov(def1, flSrc)->setPredicate(CC_NOT_P, pred);
insn = mkOp2(OP_UNION, typeOfSize(dst->reg.size), dst, def0, def1);
insert(insn);
return insn;
}
FlowInstruction *
BuildUtil::mkFlow(operation op, BasicBlock *targ, CondCode cc, Value *pred)
{
FlowInstruction *insn = new_FlowInstruction(func, op, targ);
if (pred)
insn->setPredicate(cc, pred);
insert(insn);
return insn;
}
void
BuildUtil::mkClobber(DataFile f, uint32_t rMask, int unit)
{
static const uint16_t baseSize2[16] =
{
0x0000, 0x0010, 0x0011, 0x0020, 0x0012, 0x1210, 0x1211, 0x1220,
0x0013, 0x1310, 0x1311, 0x0020, 0x1320, 0x0022, 0x2210, 0x0040,
};
int base = 0;
for (; rMask; rMask >>= 4, base += 4) {
const uint32_t mask = rMask & 0xf;
if (!mask)
continue;
int base1 = (baseSize2[mask] >> 0) & 0xf;
int size1 = (baseSize2[mask] >> 4) & 0xf;
int base2 = (baseSize2[mask] >> 8) & 0xf;
int size2 = (baseSize2[mask] >> 12) & 0xf;
Instruction *insn = mkOp(OP_NOP, TYPE_NONE, NULL);
if (1) { // size1 can't be 0
LValue *reg = new_LValue(func, f);
reg->reg.size = size1 << unit;
reg->reg.data.id = base + base1;
insn->setDef(0, reg);
}
if (size2) {
LValue *reg = new_LValue(func, f);
reg->reg.size = size2 << unit;
reg->reg.data.id = base + base2;
insn->setDef(1, reg);
}
}
}
ImmediateValue *
BuildUtil::mkImm(uint32_t u)
{
unsigned int pos = u32Hash(u);
while (imms[pos] && imms[pos]->reg.data.u32 != u)
pos = (pos + 1) % NV50_IR_BUILD_IMM_HT_SIZE;
ImmediateValue *imm = imms[pos];
if (!imm) {
imm = new_ImmediateValue(prog, u);
addImmediate(imm);
}
return imm;
}
ImmediateValue *
BuildUtil::mkImm(uint64_t u)
{
ImmediateValue *imm = new_ImmediateValue(prog, (uint32_t)0);
imm->reg.size = 8;
imm->reg.type = TYPE_U64;
imm->reg.data.u64 = u;
return imm;
}
ImmediateValue *
BuildUtil::mkImm(float f)
{
union {
float f32;
uint32_t u32;
} u;
u.f32 = f;
return mkImm(u.u32);
}
Value *
BuildUtil::loadImm(Value *dst, float f)
{
return mkOp1v(OP_MOV, TYPE_F32, dst ? dst : getScratch(), mkImm(f));
}
Value *
BuildUtil::loadImm(Value *dst, uint32_t u)
{
return mkOp1v(OP_MOV, TYPE_U32, dst ? dst : getScratch(), mkImm(u));
}
Value *
BuildUtil::loadImm(Value *dst, uint64_t u)
{
return mkOp1v(OP_MOV, TYPE_U64, dst ? dst : getScratch(8), mkImm(u));
}
Symbol *
BuildUtil::mkSymbol(DataFile file, int8_t fileIndex, DataType ty,
uint32_t baseAddr)
{
Symbol *sym = new_Symbol(prog, file, fileIndex);
sym->setOffset(baseAddr);
sym->reg.type = ty;
sym->reg.size = typeSizeof(ty);
return sym;
}
Symbol *
BuildUtil::mkSysVal(SVSemantic svName, uint32_t svIndex)
{
Symbol *sym = new_Symbol(prog, FILE_SYSTEM_VALUE, 0);
assert(svIndex < 4 ||
(svName == SV_CLIP_DISTANCE || svName == SV_TESS_FACTOR));
switch (svName) {
case SV_POSITION:
case SV_FACE:
case SV_YDIR:
case SV_POINT_SIZE:
case SV_POINT_COORD:
case SV_CLIP_DISTANCE:
case SV_TESS_FACTOR:
sym->reg.type = TYPE_F32;
break;
default:
sym->reg.type = TYPE_U32;
break;
}
sym->reg.size = typeSizeof(sym->reg.type);
sym->reg.data.sv.sv = svName;
sym->reg.data.sv.index = svIndex;
return sym;
}
void
BuildUtil::DataArray::init()
{
values = NULL;
baseAddr = 0;
arrayLen = 0;
vecDim = 4;
eltSize = 2;
file = FILE_GPR;
regOnly = true;
}
BuildUtil::DataArray::DataArray()
{
init();
}
BuildUtil::DataArray::DataArray(BuildUtil *bld) : up(bld)
{
init();
}
BuildUtil::DataArray::~DataArray()
{
if (values)
delete[] values;
}
void
BuildUtil::DataArray::setup(uint32_t base, int len, int v, int size,
DataFile f, int8_t fileIndex)
{
baseAddr = base;
arrayLen = len;
vecDim = v;
eltSize = size;
file = f;
regOnly = !isMemoryFile(f);
values = new Value * [arrayLen * vecDim];
if (values)
memset(values, 0, arrayLen * vecDim * sizeof(Value *));
if (!regOnly) {
baseSym = new_Symbol(up->getProgram(), file, fileIndex);
baseSym->setOffset(baseAddr);
baseSym->reg.size = size;
}
}
Value *
BuildUtil::DataArray::acquire(int i, int c)
{
const unsigned int idx = i * vecDim + c;
assert(idx < arrayLen * vecDim);
if (regOnly) {
const unsigned int idx = i * 4 + c; // vecDim always 4 if regOnly
if (!values[idx])
values[idx] = new_LValue(up->getFunction(), file);
return values[idx];
} else {
return up->getScratch();
}
}
Value *
BuildUtil::DataArray::load(int i, int c, Value *ptr)
{
const unsigned int idx = i * vecDim + c;
assert(idx < arrayLen * vecDim);
if (regOnly) {
if (!values[idx])
values[idx] = new_LValue(up->getFunction(), file);
return values[idx];
} else {
Symbol *sym = reinterpret_cast<Symbol *>(values[idx]);
if (!sym)
values[idx] = sym = this->mkSymbol(i, c, baseSym);
return up->mkLoad(typeOfSize(eltSize), sym, ptr);
}
}
void
BuildUtil::DataArray::store(int i, int c, Value *ptr, Value *value)
{
const unsigned int idx = i * vecDim + c;
assert(idx < arrayLen * vecDim);
if (regOnly) {
assert(!ptr);
assert(!values[idx] || values[idx] == value);
values[idx] = value;
} else {
Symbol *sym = reinterpret_cast<Symbol *>(values[idx]);
if (!sym)
values[idx] = sym = this->mkSymbol(i, c, baseSym);
up->mkStore(OP_STORE, typeOfSize(value->reg.size), sym, ptr, value);
}
}
Symbol *
BuildUtil::DataArray::mkSymbol(int i, int c, Symbol *base)
{
const unsigned int idx = i * vecDim + c;
Symbol *sym = new_Symbol(up->getProgram(), file, 0);
assert(base || (idx < arrayLen && c < vecDim));
sym->reg.size = eltSize;
sym->reg.type = typeOfSize(eltSize);
sym->setAddress(base, baseAddr + idx * eltSize);
return sym;
}
} // namespace nv50_ir

View file

@ -0,0 +1,245 @@
#ifndef __NV50_IR_BUILD_UTIL__
#define __NV50_IR_BUILD_UTIL__
namespace nv50_ir {
class BuildUtil
{
public:
BuildUtil();
inline void setProgram(Program *);
inline Program *getProgram() const { return prog; }
inline Function *getFunction() const { return func; }
// keeps inserting at head/tail of block
inline void setPosition(BasicBlock *, bool tail);
// position advances only if @after is true
inline void setPosition(Instruction *, bool after);
inline BasicBlock *getBB() { return bb; }
inline void insert(Instruction *);
inline void remove(Instruction *i) { assert(i->bb == bb); bb->remove(i); }
inline LValue *getScratch(int size = 4);
inline LValue *getSSA(int size = 4); // scratch value for a single assignment
inline Instruction *mkOp(operation, DataType, Value *);
Instruction *mkOp1(operation, DataType, Value *, Value *);
Instruction *mkOp2(operation, DataType, Value *, Value *, Value *);
Instruction *mkOp3(operation, DataType, Value *, Value *, Value *, Value *);
LValue *mkOp1v(operation, DataType, Value *, Value *);
LValue *mkOp2v(operation, DataType, Value *, Value *, Value *);
LValue *mkOp3v(operation, DataType, Value *, Value *, Value *, Value *);
LValue *mkLoad(DataType, Symbol *, Value *ptr);
Instruction *mkStore(operation, DataType, Symbol *, Value *ptr, Value *val);
Instruction *mkMov(Value *, Value *, DataType = TYPE_U32);
Instruction *mkMovToReg(int id, Value *);
Instruction *mkMovFromReg(Value *, int id);
Instruction *mkFetch(Value *, DataType, DataFile, int32_t offset,
Value *attrRel, Value *primRel);
Instruction *mkCvt(operation, DataType, Value *, DataType, Value *);
Instruction *mkCmp(operation, CondCode, DataType,
Value *,
Value *, Value *, Value * = NULL);
Instruction *mkTex(operation, TexTarget, uint8_t tic, uint8_t tsc,
Value **def, Value **src);
Instruction *mkQuadop(uint8_t qop, Value *, uint8_t l, Value *, Value *);
FlowInstruction *mkFlow(operation, BasicBlock *target,
CondCode, Value *pred);
Instruction *mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc);
void mkClobber(DataFile file, uint32_t regMask, int regUnitLog2);
ImmediateValue *mkImm(float);
ImmediateValue *mkImm(uint32_t);
ImmediateValue *mkImm(uint64_t);
ImmediateValue *mkImm(int i) { return mkImm((uint32_t)i); }
Value *loadImm(Value *dst, float);
Value *loadImm(Value *dst, uint32_t);
Value *loadImm(Value *dst, uint64_t);
Value *loadImm(Value *dst, int i) { return loadImm(dst, (uint32_t)i); }
class DataArray
{
public:
DataArray();
DataArray(BuildUtil *);
~DataArray();
inline void setParent(BuildUtil *bld) { assert(!up); up = bld; }
void setup(uint32_t base, int len, int vecDim, int size,
DataFile, int8_t fileIndex = 0);
inline bool exists(unsigned int i, unsigned int c);
Value *load(int i, int c, Value *ptr);
void store(int i, int c, Value *ptr, Value *value);
Value *acquire(int i, int c);
private:
Symbol *mkSymbol(int i, int c, Symbol *base);
private:
Value **values;
uint32_t baseAddr;
uint32_t arrayLen;
Symbol *baseSym;
uint8_t vecDim;
uint8_t eltSize; // in bytes
DataFile file;
bool regOnly;
BuildUtil *up;
void init();
};
Symbol *mkSymbol(DataFile file, int8_t fileIndex,
DataType ty, uint32_t baseAddress);
Symbol *mkSysVal(SVSemantic svName, uint32_t svIndex);
private:
void addImmediate(ImmediateValue *);
inline unsigned int u32Hash(uint32_t);
protected:
Program *prog;
Function *func;
Instruction *pos;
BasicBlock *bb;
bool tail;
#define NV50_IR_BUILD_IMM_HT_SIZE 256
ImmediateValue *imms[NV50_IR_BUILD_IMM_HT_SIZE];
unsigned int immCount;
};
unsigned int BuildUtil::u32Hash(uint32_t u)
{
return (u % 273) % NV50_IR_BUILD_IMM_HT_SIZE;
}
void BuildUtil::setProgram(Program *program)
{
prog = program;
}
void
BuildUtil::setPosition(BasicBlock *block, bool atTail)
{
bb = block;
prog = bb->getProgram();
func = bb->getFunction();
pos = NULL;
tail = atTail;
}
void
BuildUtil::setPosition(Instruction *i, bool after)
{
bb = i->bb;
prog = bb->getProgram();
func = bb->getFunction();
pos = i;
tail = after;
assert(bb);
}
LValue *
BuildUtil::getScratch(int size)
{
LValue *lval = new_LValue(func, FILE_GPR);
if (size != 4)
lval->reg.size = size;
return lval;
}
LValue *
BuildUtil::getSSA(int size)
{
LValue *lval = new_LValue(func, FILE_GPR);
lval->ssa = 1;
if (size != 4)
lval->reg.size = size;
return lval;
}
void BuildUtil::insert(Instruction *i)
{
if (!pos) {
tail ? bb->insertTail(i) : bb->insertHead(i);
} else {
if (tail) {
bb->insertAfter(pos, i);
pos = i;
} else {
bb->insertBefore(pos, i);
}
}
}
Instruction *
BuildUtil::mkOp(operation op, DataType ty, Value *dst)
{
Instruction *insn = new_Instruction(func, op, ty);
insn->setDef(0, dst);
insert(insn);
if (op == OP_DISCARD || op == OP_EXIT ||
op == OP_JOIN ||
op == OP_QUADON || op == OP_QUADPOP ||
op == OP_EMIT || op == OP_RESTART)
insn->fixed = 1;
return insn;
}
inline LValue *
BuildUtil::mkOp1v(operation op, DataType ty, Value *dst, Value *src)
{
mkOp1(op, ty, dst, src);
return dst->asLValue();
}
inline LValue *
BuildUtil::mkOp2v(operation op, DataType ty, Value *dst,
Value *src0, Value *src1)
{
mkOp2(op, ty, dst, src0, src1);
return dst->asLValue();
}
inline LValue *
BuildUtil::mkOp3v(operation op, DataType ty, Value *dst,
Value *src0, Value *src1, Value *src2)
{
mkOp3(op, ty, dst, src0, src1, src2);
return dst->asLValue();
}
bool
BuildUtil::DataArray::exists(unsigned int i, unsigned int c)
{
assert(i < arrayLen && c < vecDim);
return !regOnly || values[i * vecDim + c];
}
} // namespace nv50_ir
#endif // __NV50_IR_BUILD_UTIL_H__

View file

@ -0,0 +1,149 @@
#ifndef __NV50_IR_DRIVER_H__
#define __NV50_IR_DRIVER_H__
#include "pipe/p_shader_tokens.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_scan.h"
/*
* This struct constitutes linkage information in TGSI terminology.
*
* It is created by the code generator and handed to the pipe driver
* for input/output slot assignment.
*/
struct nv50_ir_varying
{
uint8_t slot[4]; /* native slots for xyzw (addresses in 32-bit words) */
unsigned mask : 4; /* vec4 mask */
unsigned linear : 1; /* linearly interpolated if true (and not flat) */
unsigned flat : 1;
unsigned centroid : 1;
unsigned patch : 1; /* patch constant value */
unsigned regular : 1; /* driver-specific meaning (e.g. input in sreg) */
unsigned input : 1; /* indicates direction of system values */
unsigned oread : 1; /* true if output is read from parallel TCP */
ubyte id; /* TGSI register index */
ubyte sn; /* TGSI semantic name */
ubyte si; /* TGSI semantic index */
};
#define NV50_PROGRAM_IR_TGSI 0
#define NV50_PROGRAM_IR_SM4 1
#define NV50_PROGRAM_IR_GLSL 2
#define NV50_PROGRAM_IR_LLVM 3
#ifdef DEBUG
# define NV50_IR_DEBUG_BASIC (1 << 0)
# define NV50_IR_DEBUG_VERBOSE (2 << 0)
# define NV50_IR_DEBUG_REG_ALLOC (1 << 2)
#else
# define NV50_IR_DEBUG_BASIC 0
# define NV50_IR_DEBUG_VERBOSE 0
# define NV50_IR_DEBUG_REG_ALLOC 0
#endif
struct nv50_ir_prog_info
{
uint16_t target; /* chipset (0x50, 0x84, 0xc0, ...) */
uint8_t type; /* PIPE_SHADER */
uint8_t optLevel; /* optimization level (0 to 3) */
uint8_t dbgFlags;
struct {
int16_t maxGPR; /* may be -1 if none used */
int16_t maxOutput;
uint32_t tlsSpace; /* required local memory per thread */
uint32_t *code;
uint32_t codeSize;
uint8_t sourceRep; /* NV50_PROGRAM_IR */
const void *source;
void *relocData;
} bin;
struct nv50_ir_varying sv[PIPE_MAX_SHADER_INPUTS];
struct nv50_ir_varying in[PIPE_MAX_SHADER_INPUTS];
struct nv50_ir_varying out[PIPE_MAX_SHADER_OUTPUTS];
uint8_t numInputs;
uint8_t numOutputs;
uint8_t numPatchConstants; /* also included in numInputs/numOutputs */
uint8_t numSysVals;
struct {
uint32_t *buf; /* for IMMEDIATE_ARRAY */
uint16_t bufSize; /* size of immediate array */
uint16_t count; /* count of inline immediates */
uint32_t *data; /* inline immediate data */
uint8_t *type; /* for each vec4 (128 bit) */
} immd;
union {
struct {
uint32_t inputMask[4]; /* mask of attributes read (1 bit per scalar) */
} vp;
struct {
uint8_t inputPatchSize;
uint8_t outputPatchSize;
uint8_t partitioning; /* PIPE_TESS_PART */
int8_t winding; /* +1 (clockwise) / -1 (counter-clockwise) */
uint8_t domain; /* PIPE_PRIM_{QUADS,TRIANGLES,LINES} */
uint8_t outputPrim; /* PIPE_PRIM_{TRIANGLES,LINES,POINTS} */
} tp;
struct {
uint8_t inputPrim;
uint8_t outputPrim;
unsigned instanceCount;
unsigned maxVertices;
} gp;
struct {
unsigned numColourResults;
boolean writesDepth;
boolean earlyFragTests;
boolean separateFragData;
boolean usesDiscard;
} fp;
} prop;
struct {
uint8_t clipDistance; /* index of first clip distance output */
uint8_t clipDistanceCount;
uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */
uint8_t pointSize; /* output index for PointSize */
uint8_t edgeFlagIn;
uint8_t edgeFlagOut;
uint8_t fragDepth; /* output index of FragDepth */
uint8_t sampleMask; /* output index of SampleMask */
uint8_t backFaceColor[2]; /* input/output indices of back face colour */
uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */
} io;
/* driver callback to assign input/output locations */
int (*assignSlots)(struct nv50_ir_prog_info *);
};
#ifdef __cplusplus
extern "C" {
#endif
extern int nv50_ir_generate_code(struct nv50_ir_prog_info *);
extern void nv50_ir_relocate_code(void *relocData, uint32_t *code,
uint32_t codePos,
uint32_t libPos,
uint32_t dataPos);
/* obtain code that will be shared among programs */
extern void nv50_ir_get_target_library(uint32_t chipset,
const uint32_t **code, uint32_t *size);
#ifdef __cplusplus
}
#endif
#endif // __NV50_IR_DRIVER_H__

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,381 @@
#include "nv50_ir_graph.h"
namespace nv50_ir {
Graph::Graph()
{
root = NULL;
size = 0;
sequence = 0;
}
Graph::~Graph()
{
Iterator *iter = this->safeIteratorDFS();
for (; !iter->end(); iter->next())
reinterpret_cast<Node *>(iter->get())->cut();
putIterator(iter);
}
void Graph::insert(Node *node)
{
if (!root) {
root = node;
size = 1;
node->graph = this;
} else {
root->attach(node, Edge::TREE);
}
}
void Graph::Edge::unlink()
{
if (origin) {
prev[0]->next[0] = next[0];
next[0]->prev[0] = prev[0];
if (origin->out == this)
origin->out = (next[0] == this) ? NULL : next[0];
--origin->outCount;
}
if (target) {
prev[1]->next[1] = next[1];
next[1]->prev[1] = prev[1];
if (target->in == this)
target->in = (next[1] == this) ? NULL : next[1];
--target->inCount;
}
}
const char *Graph::Edge::typeStr() const
{
switch (type) {
case TREE: return "tree";
case FORWARD: return "forward";
case BACK: return "back";
case CROSS: return "cross";
case DUMMY: return "dummy";
case UNKNOWN:
default:
return "unk";
}
}
Graph::Node::Node(void *priv) : data(priv),
in(0), out(0), graph(0),
visited(0),
inCount(0), outCount(0)
{
// nothing to do
}
void Graph::Node::attach(Node *node, Edge::Type kind)
{
Edge *edge = new Edge(this, node, kind);
// insert head
if (this->out) {
edge->next[0] = this->out;
edge->prev[0] = this->out->prev[0];
edge->prev[0]->next[0] = edge;
this->out->prev[0] = edge;
}
this->out = edge;
if (node->in) {
edge->next[1] = node->in;
edge->prev[1] = node->in->prev[1];
edge->prev[1]->next[1] = edge;
node->in->prev[1] = edge;
}
node->in = edge;
++this->outCount;
++node->inCount;
assert(this->graph);
if (!node->graph) {
node->graph = this->graph;
++node->graph->size;
}
if (kind == Edge::UNKNOWN)
graph->classifyEdges();
}
bool Graph::Node::detach(Graph::Node *node)
{
EdgeIterator ei = this->outgoing();
for (; !ei.end(); ei.next())
if (ei.getNode() == node)
break;
if (ei.end()) {
ERROR("no such node attached\n");
return false;
}
delete ei.getEdge();
return true;
}
// Cut a node from the graph, deleting all attached edges.
void Graph::Node::cut()
{
if (!graph || (!in && !out))
return;
while (out)
delete out;
while (in)
delete in;
if (graph->root == this)
graph->root = NULL;
}
Graph::Edge::Edge(Node *org, Node *tgt, Type kind)
{
target = tgt;
origin = org;
type = kind;
next[0] = next[1] = this;
prev[0] = prev[1] = this;
}
bool
Graph::Node::reachableBy(Node *node, Node *term)
{
Stack stack;
Node *pos;
const int seq = graph->nextSequence();
stack.push(node);
while (stack.getSize()) {
pos = reinterpret_cast<Node *>(stack.pop().u.p);
if (pos == this)
return true;
if (pos == term)
continue;
for (EdgeIterator ei = pos->outgoing(); !ei.end(); ei.next()) {
if (ei.getType() == Edge::BACK || ei.getType() == Edge::DUMMY)
continue;
if (ei.getNode()->visit(seq))
stack.push(ei.getNode());
}
}
return pos == this;
}
class DFSIterator : public Graph::GraphIterator
{
public:
DFSIterator(Graph *graph, const bool preorder)
{
unsigned int seq = graph->nextSequence();
nodes = new Graph::Node * [graph->getSize() + 1];
count = 0;
pos = 0;
nodes[graph->getSize()] = 0;
if (graph->getRoot()) {
graph->getRoot()->visit(seq);
search(graph->getRoot(), preorder, seq);
}
}
~DFSIterator()
{
if (nodes)
delete[] nodes;
}
void search(Graph::Node *node, const bool preorder, const int sequence)
{
if (preorder)
nodes[count++] = node;
for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next())
if (ei.getNode()->visit(sequence))
search(ei.getNode(), preorder, sequence);
if (!preorder)
nodes[count++] = node;
}
virtual bool end() const { return pos >= count; }
virtual void next() { if (pos < count) ++pos; }
virtual void *get() const { return nodes[pos]; }
void reset() { pos = 0; }
protected:
Graph::Node **nodes;
int count;
int pos;
};
Graph::GraphIterator *Graph::iteratorDFS(bool preorder)
{
return new DFSIterator(this, preorder);
}
Graph::GraphIterator *Graph::safeIteratorDFS(bool preorder)
{
return this->iteratorDFS(preorder);
}
class CFGIterator : public Graph::GraphIterator
{
public:
CFGIterator(Graph *graph)
{
nodes = new Graph::Node * [graph->getSize() + 1];
count = 0;
pos = 0;
nodes[graph->getSize()] = 0;
// TODO: argh, use graph->sequence instead of tag and just raise it by > 1
Iterator *iter;
for (iter = graph->iteratorDFS(); !iter->end(); iter->next())
reinterpret_cast<Graph::Node *>(iter->get())->tag = 0;
graph->putIterator(iter);
if (graph->getRoot())
search(graph->getRoot(), graph->nextSequence());
}
~CFGIterator()
{
if (nodes)
delete[] nodes;
}
virtual void *get() const { return nodes[pos]; }
virtual bool end() const { return pos >= count; }
virtual void next() { if (pos < count) ++pos; }
private:
void search(Graph::Node *node, const int sequence)
{
Stack bb, cross;
bb.push(node);
while (bb.getSize()) {
node = reinterpret_cast<Graph::Node *>(bb.pop().u.p);
assert(node);
if (!node->visit(sequence))
continue;
node->tag = 0;
for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) {
switch (ei.getType()) {
case Graph::Edge::TREE:
case Graph::Edge::FORWARD:
case Graph::Edge::DUMMY:
if (++(ei.getNode()->tag) == ei.getNode()->incidentCountFwd())
bb.push(ei.getNode());
break;
case Graph::Edge::BACK:
continue;
case Graph::Edge::CROSS:
if (++(ei.getNode()->tag) == 1)
cross.push(ei.getNode());
break;
default:
assert(!"unknown edge kind in CFG");
break;
}
}
nodes[count++] = node;
if (bb.getSize() == 0)
cross.moveTo(bb);
}
}
private:
Graph::Node **nodes;
int count;
int pos;
};
Graph::GraphIterator *Graph::iteratorCFG()
{
return new CFGIterator(this);
}
Graph::GraphIterator *Graph::safeIteratorCFG()
{
return this->iteratorCFG();
}
void Graph::classifyEdges()
{
DFSIterator *iter;
int seq;
for (iter = new DFSIterator(this, true); !iter->end(); iter->next()) {
Node *node = reinterpret_cast<Node *>(iter->get());
node->visit(0);
node->tag = 0;
}
putIterator(iter);
classifyDFS(root, (seq = 0));
sequence = seq;
}
void Graph::classifyDFS(Node *curr, int& seq)
{
Graph::Edge *edge;
Graph::Node *node;
curr->visit(++seq);
curr->tag = 1;
for (edge = curr->out; edge; edge = edge->next[0]) {
node = edge->target;
if (edge->type == Edge::DUMMY)
continue;
if (node->getSequence() == 0) {
edge->type = Edge::TREE;
classifyDFS(node, seq);
} else
if (node->getSequence() > curr->getSequence()) {
edge->type = Edge::FORWARD;
} else {
edge->type = node->tag ? Edge::BACK : Edge::CROSS;
}
}
for (edge = curr->in; edge; edge = edge->next[1]) {
node = edge->origin;
if (edge->type == Edge::DUMMY)
continue;
if (node->getSequence() == 0) {
edge->type = Edge::TREE;
classifyDFS(node, seq);
} else
if (node->getSequence() > curr->getSequence()) {
edge->type = Edge::FORWARD;
} else {
edge->type = node->tag ? Edge::BACK : Edge::CROSS;
}
}
curr->tag = 0;
}
} // namespace nv50_ir

View file

@ -0,0 +1,207 @@
#ifndef __NV50_IR_GRAPH_H__
#define __NV50_IR_GRAPH_H__
#include "nv50_ir_util.h"
namespace nv50_ir {
#define ITER_NODE(x) reinterpret_cast<Graph::Node *>((x).get())
#define ITER_EDGE(x) reinterpret_cast<Graph::Edge *>((x).get())
// A connected graph.
class Graph
{
public:
class Node;
class GraphIterator : public Iterator
{
public:
virtual ~GraphIterator() { };
};
class Edge
{
public:
enum Type
{
UNKNOWN,
TREE,
FORWARD,
BACK,
CROSS, // e.g. loop break
DUMMY
};
Edge(Node *dst, Node *src, Type kind);
~Edge() { unlink(); }
inline Node *getOrigin() const { return origin; }
inline Node *getTarget() const { return target; }
inline Type getType() const { return type; }
const char *typeStr() const;
private:
Node *origin;
Node *target;
Type type;
Edge *next[2]; // next edge outgoing/incident from/to origin/target
Edge *prev[2];
void unlink();
friend class Graph;
};
class EdgeIterator : public Iterator
{
public:
EdgeIterator() : e(0), t(0), d(0) { }
EdgeIterator(Graph::Edge *first, int dir) : e(first), t(first), d(dir) { }
virtual void next() { e = (e->next[d] == t) ? 0 : e->next[d]; }
virtual bool end() const { return !e; }
virtual void *get() const { return e; }
inline Node *getNode() const { assert(e); return d ?
e->origin : e->target; }
inline Edge *getEdge() const { return e; }
inline Edge::Type getType() { return e ? e->getType() : Edge::UNKNOWN; }
private:
Graph::Edge *e;
Graph::Edge *t;
int d;
};
class Node
{
public:
Node(void *);
~Node() { cut(); }
void attach(Node *, Edge::Type);
bool detach(Node *);
void cut();
inline EdgeIterator outgoing() const;
inline EdgeIterator incident() const;
inline Node *parent() const; // returns NULL if count(incident edges) != 1
bool reachableBy(Node *node, Node *term);
inline bool visit(int);
inline int getSequence() const;
inline int incidentCountFwd() const; // count of incident non-back edges
inline int incidentCount() const { return inCount; }
inline int outgoingCount() const { return outCount; }
Graph *getGraph() const { return graph; }
void *data;
private:
Edge *in;
Edge *out;
Graph *graph;
int visited;
int16_t inCount;
int16_t outCount;
public:
int tag; // for temporary use
friend class Graph;
};
public:
Graph();
~Graph(); // does *not* free the nodes (make it an option ?)
inline Node *getRoot() const { return root; }
inline unsigned int getSize() const { return size; }
inline int nextSequence();
void insert(Node *node); // attach to or set as root
GraphIterator *iteratorDFS(bool preorder = true);
GraphIterator *iteratorCFG();
// safe iterators are unaffected by changes to the *edges* of the graph
GraphIterator *safeIteratorDFS(bool preorder = true);
GraphIterator *safeIteratorCFG();
inline void putIterator(Iterator *); // should be GraphIterator *
void classifyEdges();
private:
void classifyDFS(Node *, int&);
private:
Node *root;
unsigned int size;
int sequence;
};
int Graph::nextSequence()
{
return ++sequence;
}
Graph::Node *Graph::Node::parent() const
{
if (inCount != 1)
return NULL;
assert(in);
return in->origin;
}
bool Graph::Node::visit(int v)
{
if (visited == v)
return false;
visited = v;
return true;
}
int Graph::Node::getSequence() const
{
return visited;
}
void Graph::putIterator(Iterator *iter)
{
delete reinterpret_cast<GraphIterator *>(iter);
}
Graph::EdgeIterator Graph::Node::outgoing() const
{
return EdgeIterator(out, 0);
}
Graph::EdgeIterator Graph::Node::incident() const
{
return EdgeIterator(in, 1);
}
int Graph::Node::incidentCountFwd() const
{
int n = 0;
for (EdgeIterator ei = incident(); !ei.end(); ei.next())
if (ei.getType() != Edge::BACK)
++n;
return n;
}
} // namespace nv50_ir
#endif // __NV50_IR_GRAPH_H__

View file

@ -0,0 +1,328 @@
#ifndef __NV50_IR_INLINES_H__
#define __NV50_IR_INLINES_H__
static inline CondCode reverseCondCode(CondCode cc)
{
static const uint8_t ccRev[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
return static_cast<CondCode>(ccRev[cc & 7] | (cc & ~7));
}
static inline CondCode inverseCondCode(CondCode cc)
{
return static_cast<CondCode>(cc ^ 7);
}
static inline bool isMemoryFile(DataFile f)
{
return (f >= FILE_MEMORY_CONST && f <= FILE_MEMORY_LOCAL);
}
static inline bool isTextureOp(operation op)
{
return (op >= OP_TEX && op <= OP_TEXCSAA);
}
static inline unsigned int typeSizeof(DataType ty)
{
switch (ty) {
case TYPE_U8:
case TYPE_S8:
return 1;
case TYPE_F16:
case TYPE_U16:
case TYPE_S16:
return 2;
case TYPE_F32:
case TYPE_U32:
case TYPE_S32:
return 4;
case TYPE_F64:
case TYPE_U64:
case TYPE_S64:
return 8;
case TYPE_B96:
return 12;
case TYPE_B128:
return 16;
default:
return 0;
}
}
static inline DataType typeOfSize(unsigned int size,
bool flt = false, bool sgn = false)
{
switch (size) {
case 1: return sgn ? TYPE_S8 : TYPE_U8;
case 2: return flt ? TYPE_F16 : (sgn ? TYPE_S16 : TYPE_U16);
case 8: return flt ? TYPE_F64 : (sgn ? TYPE_S64 : TYPE_U64);
case 12: return TYPE_B96;
case 16: return TYPE_B128;
case 4:
default:
return flt ? TYPE_F32 : (sgn ? TYPE_S32 : TYPE_U32);
}
}
static inline bool isFloatType(DataType ty)
{
return (ty >= TYPE_F16 && ty <= TYPE_F64);
}
static inline bool isSignedIntType(DataType ty)
{
return (ty == TYPE_S8 || ty == TYPE_S16 || ty == TYPE_S32);
}
static inline bool isSignedType(DataType ty)
{
switch (ty) {
case TYPE_NONE:
case TYPE_U8:
case TYPE_U16:
case TYPE_U32:
case TYPE_B96:
case TYPE_B128:
return false;
default:
return true;
}
}
const ValueRef *ValueRef::getIndirect(int dim) const
{
return isIndirect(dim) ? &insn->src[indirect[dim]] : NULL;
}
DataFile ValueRef::getFile() const
{
return value ? value->reg.file : FILE_NULL;
}
unsigned int ValueRef::getSize() const
{
return value ? value->reg.size : 0;
}
Value *ValueRef::rep() const
{
assert(value);
return value->join;
}
Value *ValueDef::rep() const
{
assert(value);
return value->join;
}
DataFile ValueDef::getFile() const
{
return value ? value->reg.file : FILE_NULL;
}
unsigned int ValueDef::getSize() const
{
return value ? value->reg.size : 0;
}
void ValueDef::setSSA(LValue *lval)
{
Value *save = value;
this->set(NULL);
prev = reinterpret_cast<ValueDef *>(save);
value = lval;
lval->defs = this;
}
void ValueDef::restoreDefList()
{
if (next == this)
prev = this;
}
const LValue *ValueDef::preSSA() const
{
return reinterpret_cast<LValue *>(prev);
}
Instruction *Value::getInsn() const
{
assert(!defs || getUniqueInsn());
return defs ? defs->getInsn() : NULL;
}
Instruction *Value::getUniqueInsn() const
{
if (defs) {
if (join != this) {
ValueDef::Iterator it = defs->iterator();
while (!it.end() && it.get()->get() != this)
it.next();
assert(it.get()->get() == this);
return it.get()->getInsn();
}
// after regalloc, the definitions of coalesced values are linked
if (reg.data.id < 0) {
ValueDef::Iterator it = defs->iterator();
int nDef;
for (nDef = 0; !it.end() && nDef < 2; it.next())
if (it.get()->get() == this) // don't count joined values
++nDef;
if (nDef > 1)
WARN("value %%%i not uniquely defined\n", id); // return NULL ?
}
assert(defs->get() == this);
return defs->getInsn();
}
return NULL;
}
Value *Instruction::getIndirect(int s, int dim) const
{
return src[s].isIndirect(dim) ? getSrc(src[s].indirect[dim]) : NULL;
}
Value *Instruction::getPredicate() const
{
return (predSrc >= 0) ? getSrc(predSrc) : NULL;
}
Value *TexInstruction::getIndirectR() const
{
return tex.rIndirectSrc >= 0 ? getSrc(tex.rIndirectSrc) : NULL;
}
Value *TexInstruction::getIndirectS() const
{
return tex.rIndirectSrc >= 0 ? getSrc(tex.rIndirectSrc) : NULL;
}
CmpInstruction *Instruction::asCmp()
{
if (op >= OP_SET_AND && op <= OP_SLCT && op != OP_SELP)
return static_cast<CmpInstruction *>(this);
return NULL;
}
const CmpInstruction *Instruction::asCmp() const
{
if (op >= OP_SET_AND && op <= OP_SLCT && op != OP_SELP)
return static_cast<const CmpInstruction *>(this);
return NULL;
}
FlowInstruction *Instruction::asFlow()
{
if (op >= OP_BRA && op <= OP_JOIN)
return static_cast<FlowInstruction *>(this);
return NULL;
}
const FlowInstruction *Instruction::asFlow() const
{
if (op >= OP_BRA && op <= OP_JOINAT)
return static_cast<const FlowInstruction *>(this);
return NULL;
}
TexInstruction *Instruction::asTex()
{
if (op >= OP_TEX && op <= OP_TEXCSAA)
return static_cast<TexInstruction *>(this);
return NULL;
}
const TexInstruction *Instruction::asTex() const
{
if (op >= OP_TEX && op <= OP_TEXCSAA)
return static_cast<const TexInstruction *>(this);
return NULL;
}
// XXX: use a virtual function so we're really really safe ?
LValue *Value::asLValue()
{
if (reg.file >= FILE_GPR && reg.file <= FILE_ADDRESS)
return static_cast<LValue *>(this);
return NULL;
}
Symbol *Value::asSym()
{
if (reg.file >= FILE_MEMORY_CONST)
return static_cast<Symbol *>(this);
return NULL;
}
const Symbol *Value::asSym() const
{
if (reg.file >= FILE_MEMORY_CONST)
return static_cast<const Symbol *>(this);
return NULL;
}
void Symbol::setOffset(int32_t offset)
{
reg.data.offset = offset;
}
void Symbol::setAddress(Symbol *base, int32_t offset)
{
baseSym = base;
reg.data.offset = offset;
}
void Symbol::setSV(SVSemantic sv, uint32_t index)
{
reg.data.sv.sv = sv;
reg.data.sv.index = index;
}
ImmediateValue *Value::asImm()
{
if (reg.file == FILE_IMMEDIATE)
return static_cast<ImmediateValue *>(this);
return NULL;
}
const ImmediateValue *Value::asImm() const
{
if (reg.file == FILE_IMMEDIATE)
return static_cast<const ImmediateValue *>(this);
return NULL;
}
Value *Value::get(Iterator &it)
{
return reinterpret_cast<Value *>(it.get());
}
bool BasicBlock::reachableBy(BasicBlock *by, BasicBlock *term)
{
return cfg.reachableBy(&by->cfg, &term->cfg);
}
BasicBlock *BasicBlock::get(Iterator &iter)
{
return reinterpret_cast<BasicBlock *>(iter.get());
}
BasicBlock *BasicBlock::get(Graph::Node *node)
{
assert(node);
return reinterpret_cast<BasicBlock *>(node->data);
}
LValue *Function::getLValue(int id)
{
assert((unsigned int)id < (unsigned int)allLValues.getSize());
return reinterpret_cast<LValue *>(allLValues.get(id));
}
#endif // __NV50_IR_INLINES_H__

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,558 @@
#include "nv50_ir.h"
#include "nv50_ir_target.h"
namespace nv50_ir {
enum TextStyle
{
TXT_DEFAULT,
TXT_GPR,
TXT_REGISTER,
TXT_FLAGS,
TXT_MEM,
TXT_IMMD,
TXT_BRA,
TXT_INSN
};
static const char *colour[8] =
{
#if 1
"\x1b[00m",
"\x1b[34m",
"\x1b[35m",
"\x1b[35m",
"\x1b[36m",
"\x1b[33m",
"\x1b[37m",
"\x1b[32m"
#else
"", "", "", "", "", "", "", ""
#endif
};
const char *operationStr[OP_LAST + 1] =
{
"nop",
"phi",
"union",
"split",
"merge",
"consec",
"mov",
"ld",
"st",
"add",
"sub",
"mul",
"div",
"mod",
"mad",
"fma",
"sad",
"abs",
"neg",
"not",
"and",
"or",
"xor",
"shl",
"shr",
"max",
"min",
"sat",
"ceil",
"floor",
"trunc",
"cvt",
"set and",
"set or",
"set xor",
"set",
"selp",
"slct",
"rcp",
"rsq",
"lg2",
"sin",
"cos",
"ex2",
"exp",
"log",
"presin",
"preex2",
"sqrt",
"pow",
"bra",
"call",
"ret",
"cont",
"break",
"preret",
"precont",
"prebreak",
"brkpt",
"joinat",
"join",
"discard",
"exit",
"barrier",
"vfetch",
"pfetch",
"export",
"linterp",
"pinterp",
"emit",
"restart",
"tex",
"texbias",
"texlod",
"texfetch",
"texquery",
"texgrad",
"texgather",
"texcsaa",
"suld",
"sust",
"dfdx",
"dfdy",
"rdsv",
"wrsv",
"pixld",
"quadop",
"quadon",
"quadpop",
"popcnt",
"insbf",
"extbf",
"(invalid)"
};
static const char *DataTypeStr[] =
{
"-",
"u8", "s8",
"u16", "s16",
"u32", "s32",
"u64", "s64",
"f16", "f32", "f64",
"b96", "b128"
};
static const char *RoundModeStr[] =
{
"", "rm", "rz", "rp", "rni", "rmi", "rzi", "rpi"
};
static const char *CondCodeStr[] =
{
"never",
"lt",
"eq",
"le",
"gt",
"ne",
"ge",
"",
"(invalid)",
"ltu",
"equ",
"leu",
"gtu",
"neu",
"geu",
"",
"no",
"nc",
"ns",
"na",
"a",
"s",
"c",
"o"
};
static const char *SemanticStr[SV_LAST + 1] =
{
"POSITION",
"VERTEX_ID",
"INSTANCE_ID",
"INVOCATION_ID",
"PRIMITIVE_ID",
"VERTEX_COUNT",
"LAYER",
"VIEWPORT_INDEX",
"Y_DIR",
"FACE",
"POINT_SIZE",
"POINT_COORD",
"CLIP_DISTANCE",
"SAMPLE_INDEX",
"TESS_FACTOR",
"TESS_COORD",
"TID",
"CTAID",
"NTID",
"GRIDID",
"NCTAID",
"LANEID",
"PHYSID",
"NPHYSID",
"CLOCK",
"LBASE",
"SBASE",
"?",
"(INVALID)"
};
#define PRINT(args...) \
do { \
pos += snprintf(&buf[pos], size - pos, args); \
} while(0)
#define SPACE_PRINT(cond, args...) \
do { \
if (cond) \
buf[pos++] = ' '; \
pos += snprintf(&buf[pos], size - pos, args); \
} while(0)
#define SPACE() \
do { \
if (pos < size) \
buf[pos++] = ' '; \
} while(0)
int Modifier::print(char *buf, size_t size) const
{
size_t pos = 0;
if (bits)
PRINT("%s", colour[TXT_INSN]);
size_t base = pos;
if (bits & NV50_IR_MOD_NOT)
PRINT("not");
if (bits & NV50_IR_MOD_SAT)
SPACE_PRINT(pos > base && pos < size, "sat");
if (bits & NV50_IR_MOD_NEG)
SPACE_PRINT(pos > base && pos < size, "neg");
if (bits & NV50_IR_MOD_ABS)
SPACE_PRINT(pos > base && pos < size, "abs");
return pos;
}
int LValue::print(char *buf, size_t size, DataType ty) const
{
const char *postFix = "";
size_t pos = 0;
int idx = join->reg.data.id >= 0 ? join->reg.data.id : id;
char p = join->reg.data.id >= 0 ? '$' : '%';
char r;
int col = TXT_DEFAULT;
switch (reg.file) {
case FILE_GPR:
r = 'r'; col = TXT_GPR;
if (reg.size == 8)
postFix = "d";
else
if (reg.size == 16)
postFix = "q";
break;
case FILE_PREDICATE:
r = 'p'; col = TXT_REGISTER;
if (reg.size == 2)
postFix = "d";
else
if (reg.size == 4)
postFix = "q";
break;
case FILE_FLAGS:
r = 'c'; col = TXT_FLAGS;
break;
case FILE_ADDRESS:
r = 'a'; col = TXT_REGISTER;
break;
default:
assert(!"invalid file for lvalue");
r = '?';
break;
}
PRINT("%s%c%c%i%s", colour[col], p, r, idx, postFix);
return pos;
}
int ImmediateValue::print(char *buf, size_t size, DataType ty) const
{
size_t pos = 0;
PRINT("%s", colour[TXT_IMMD]);
switch (ty) {
case TYPE_F32: PRINT("%f", reg.data.f32); break;
case TYPE_F64: PRINT("%f", reg.data.f64); break;
case TYPE_U8: PRINT("0x%02x", reg.data.u8); break;
case TYPE_S8: PRINT("%i", reg.data.s8); break;
case TYPE_U16: PRINT("0x%04x", reg.data.u16); break;
case TYPE_S16: PRINT("%i", reg.data.s16); break;
case TYPE_U32: PRINT("0x%08x", reg.data.u32); break;
case TYPE_S32: PRINT("%i", reg.data.s32); break;
case TYPE_U64:
case TYPE_S64:
default:
PRINT("0x%016lx", reg.data.u64);
break;
}
return pos;
}
int Symbol::print(char *buf, size_t size, DataType ty) const
{
return print(buf, size, NULL, NULL, ty);
}
int Symbol::print(char *buf, size_t size,
Value *rel, Value *dimRel, DataType ty) const
{
size_t pos = 0;
char c;
if (ty == TYPE_NONE)
ty = typeOfSize(reg.size);
if (reg.file == FILE_SYSTEM_VALUE) {
PRINT("%ssv[%s%s:%i%s", colour[TXT_MEM],
colour[TXT_REGISTER],
SemanticStr[reg.data.sv.sv], reg.data.sv.index, colour[TXT_MEM]);
if (rel) {
PRINT("%s+", colour[TXT_DEFAULT]);
pos += rel->print(&buf[pos], size - pos);
}
PRINT("%s]", colour[TXT_MEM]);
return pos;
}
switch (reg.file) {
case FILE_MEMORY_CONST: c = 'c'; break;
case FILE_SHADER_INPUT: c = 'a'; break;
case FILE_SHADER_OUTPUT: c = 'o'; break;
case FILE_MEMORY_GLOBAL: c = 'g'; break;
case FILE_MEMORY_SHARED: c = 's'; break;
case FILE_MEMORY_LOCAL: c = 'l'; break;
default:
assert(!"invalid file");
c = '?';
break;
}
if (c == 'c')
PRINT("%s%c%i[", colour[TXT_MEM], c, reg.fileIndex);
else
PRINT("%s%c[", colour[TXT_MEM], c);
if (dimRel) {
pos += dimRel->print(&buf[pos], size - pos, TYPE_S32);
PRINT("%s][", colour[TXT_MEM]);
}
if (rel) {
pos += rel->print(&buf[pos], size - pos);
PRINT("%s%c", colour[TXT_DEFAULT], (reg.data.offset < 0) ? '-' : '+');
} else {
assert(reg.data.offset >= 0);
}
PRINT("%s0x%x%s]", colour[TXT_IMMD], abs(reg.data.offset), colour[TXT_MEM]);
return pos;
}
void Instruction::print() const
{
#define BUFSZ 512
const size_t size = BUFSZ;
char buf[BUFSZ];
int s, d;
size_t pos = 0;
PRINT("%s", colour[TXT_INSN]);
if (join)
PRINT("join ");
if (predSrc >= 0) {
const size_t pre = pos;
if (getSrc(predSrc)->reg.file == FILE_PREDICATE) {
if (cc == CC_NOT_P)
PRINT("not");
} else {
PRINT("%s", CondCodeStr[cc]);
}
if (pos > pre + 1)
SPACE();
pos += src[predSrc].get()->print(&buf[pos], BUFSZ - pos);
PRINT(" %s", colour[TXT_INSN]);
}
if (saturate)
PRINT("sat ");
if (asFlow()) {
PRINT("%s", operationStr[op]);
if (op == OP_CALL && asFlow()->builtin) {
PRINT(" %sBUILTIN:%i", colour[TXT_BRA], asFlow()->target.builtin);
} else
if (op == OP_CALL && asFlow()->target.fn) {
PRINT(" %s%s", colour[TXT_BRA], asFlow()->target.fn->getName());
} else
if (asFlow()->target.bb)
PRINT(" %sBB:%i", colour[TXT_BRA], asFlow()->target.bb->getId());
} else {
PRINT("%s ", operationStr[op]);
if (perPatch)
PRINT("patch ");
if (asTex())
PRINT("%s ", asTex()->tex.target.getName());
if (postFactor)
PRINT("x2^%i ", postFactor);
PRINT("%s%s", dnz ? "dnz " : (ftz ? "ftz " : ""), DataTypeStr[dType]);
}
if (rnd != ROUND_N)
PRINT(" %s", RoundModeStr[rnd]);
if (def[1].exists())
PRINT(" {");
for (d = 0; defExists(d); ++d) {
SPACE();
pos += def[d].get()->print(&buf[pos], size - pos);
}
if (d > 1)
PRINT(" %s}", colour[TXT_INSN]);
else
if (!d && !asFlow())
PRINT(" %s#", colour[TXT_INSN]);
if (asCmp())
PRINT(" %s%s", colour[TXT_INSN], CondCodeStr[asCmp()->setCond]);
if (sType != dType)
PRINT(" %s%s", colour[TXT_INSN], DataTypeStr[sType]);
for (s = 0; srcExists(s); ++s) {
if (s == predSrc || src[s].usedAsPtr)
continue;
const size_t pre = pos;
SPACE();
pos += src[s].mod.print(&buf[pos], BUFSZ - pos);
if (pos > pre + 1)
SPACE();
if (src[s].isIndirect(0) || src[s].isIndirect(1))
pos += src[s].get()->asSym()->print(&buf[pos], BUFSZ - pos,
getIndirect(s, 0),
getIndirect(s, 1));
else
pos += src[s].get()->print(&buf[pos], BUFSZ - pos, sType);
}
PRINT("%s", colour[TXT_DEFAULT]);
buf[MIN2(pos, BUFSZ - 1)] = 0;
INFO("%s (%u)\n", buf, encSize);
}
class PrintPass : public Pass
{
public:
PrintPass() : serial(0) { }
virtual bool visit(Function *);
virtual bool visit(BasicBlock *);
virtual bool visit(Instruction *);
private:
int serial;
};
bool
PrintPass::visit(Function *fn)
{
INFO("\n%s:\n", fn->getName());
return true;
}
bool
PrintPass::visit(BasicBlock *bb)
{
#if 0
INFO("---\n");
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next())
INFO(" <- BB:%i (%s)\n",
BasicBlock::get(ei.getNode())->getId(),
ei.getEdge()->typeStr());
#endif
INFO("BB:%i (%u instructions) - ", bb->getId(), bb->getInsnCount());
if (bb->idom())
INFO("idom = BB:%i, ", bb->idom()->getId());
INFO("df = { ");
for (DLList::Iterator df = bb->getDF().iterator(); !df.end(); df.next())
INFO("BB:%i ", BasicBlock::get(df)->getId());
INFO("}\n");
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next())
INFO(" -> BB:%i (%s)\n",
BasicBlock::get(ei.getNode())->getId(),
ei.getEdge()->typeStr());
return true;
}
bool
PrintPass::visit(Instruction *insn)
{
INFO("%3i: ", serial++);
insn->print();
return true;
}
void
Function::print()
{
PrintPass pass;
pass.run(this, true, false);
}
void
Program::print()
{
PrintPass pass;
pass.run(this, true, false);
}
void
Function::printLiveIntervals() const
{
INFO("printing live intervals ...\n");
for (ArrayList::Iterator it = allLValues.iterator(); !it.end(); it.next()) {
const Value *lval = Value::get(it)->asLValue();
if (lval && !lval->livei.isEmpty()) {
INFO("livei(%%%i): ", lval->id);
lval->livei.print();
}
}
}
} // namespace nv50_ir

View file

@ -0,0 +1,963 @@
#include "nv50_ir.h"
#include "nv50_ir_target.h"
#include "nv50/nv50_debug.h"
namespace nv50_ir {
#define MAX_REGISTER_FILE_SIZE 256
class RegisterSet
{
public:
RegisterSet();
RegisterSet(const Target *);
void init(const Target *);
void reset(); // reset allocation status, but not max assigned regs
void periodicMask(DataFile f, uint32_t lock, uint32_t unlock);
void intersect(DataFile f, const RegisterSet *);
bool assign(Value **, int nr);
void release(const Value *);
void occupy(const Value *);
int getMaxAssigned(DataFile f) const { return fill[f]; }
void print() const;
private:
uint32_t bits[FILE_ADDRESS + 1][(MAX_REGISTER_FILE_SIZE + 31) / 32];
int unit[FILE_ADDRESS + 1]; // log2 of allocation granularity
int last[FILE_ADDRESS + 1];
int fill[FILE_ADDRESS + 1];
};
void
RegisterSet::reset()
{
memset(bits, 0, sizeof(bits));
}
RegisterSet::RegisterSet()
{
reset();
}
void
RegisterSet::init(const Target *targ)
{
for (unsigned int rf = 0; rf <= FILE_ADDRESS; ++rf) {
DataFile f = static_cast<DataFile>(rf);
last[rf] = targ->getFileSize(f) - 1;
unit[rf] = targ->getFileUnit(f);
fill[rf] = -1;
assert(last[rf] < MAX_REGISTER_FILE_SIZE);
}
}
RegisterSet::RegisterSet(const Target *targ)
{
reset();
init(targ);
}
void
RegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock)
{
for (int i = 0; i < (last[f] + 31) / 32; ++i)
bits[f][i] = (bits[f][i] | lock) & ~unlock;
}
void
RegisterSet::intersect(DataFile f, const RegisterSet *set)
{
for (int i = 0; i < (last[f] + 31) / 32; ++i)
bits[f][i] |= set->bits[f][i];
}
void
RegisterSet::print() const
{
INFO("GPR:");
for (int i = 0; i < (last[FILE_GPR] + 31) / 32; ++i)
INFO(" %08x", bits[FILE_GPR][i]);
INFO("\n");
}
bool
RegisterSet::assign(Value **def, int nr)
{
DataFile f = def[0]->reg.file;
int n = nr;
if (n == 3)
n = 4;
int s = (n * def[0]->reg.size) >> unit[f];
uint32_t m = (1 << s) - 1;
int id = last[f] + 1;
int i;
for (i = 0; (i * 32) < last[f]; ++i) {
if (bits[f][i] == 0xffffffff)
continue;
for (id = 0; id < 32; id += s)
if (!(bits[f][i] & (m << id)))
break;
if (id < 32)
break;
}
id += i * 32;
if (id > last[f])
return false;
bits[f][id / 32] |= m << (id % 32);
if (id + (s - 1) > fill[f])
fill[f] = id + (s - 1);
for (i = 0; i < nr; ++i, ++id)
if (!def[i]->livei.isEmpty()) // XXX: really increased id if empty ?
def[i]->reg.data.id = id;
return true;
}
void
RegisterSet::occupy(const Value *val)
{
int id = val->reg.data.id;
if (id < 0)
return;
unsigned int f = val->reg.file;
uint32_t m = (1 << (val->reg.size >> unit[f])) - 1;
INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %x\n", f, id, m);
bits[f][id / 32] |= m << (id % 32);
if (fill[f] < id)
fill[f] = id;
}
void
RegisterSet::release(const Value *val)
{
int id = val->reg.data.id;
if (id < 0)
return;
unsigned int f = val->reg.file;
uint32_t m = (1 << (val->reg.size >> unit[f])) - 1;
INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %x\n", f, id, m);
bits[f][id / 32] &= ~(m << (id % 32));
}
#define JOIN_MASK_PHI (1 << 0)
#define JOIN_MASK_UNION (1 << 1)
#define JOIN_MASK_MOV (1 << 2)
#define JOIN_MASK_TEX (1 << 3)
#define JOIN_MASK_CONSTRAINT (1 << 4)
class RegAlloc
{
public:
RegAlloc(Program *program) : prog(program), sequence(0) { }
bool exec();
bool execFunc();
private:
bool coalesceValues(unsigned int mask);
bool linearScan();
bool allocateConstrainedValues();
private:
class PhiMovesPass : public Pass {
private:
virtual bool visit(BasicBlock *);
inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
};
class BuildIntervalsPass : public Pass {
private:
virtual bool visit(BasicBlock *);
void collectLiveValues(BasicBlock *);
void addLiveRange(Value *, const BasicBlock *, int end);
};
class InsertConstraintsPass : public Pass {
public:
bool exec(Function *func);
private:
virtual bool visit(BasicBlock *);
bool insertConstraintMoves();
void addHazard(Instruction *i, const ValueRef *src);
void textureMask(TexInstruction *);
void addConstraint(Instruction *, int s, int n);
bool detectConflict(Instruction *, int s);
DLList constrList;
};
bool buildLiveSets(BasicBlock *);
void collectLValues(DLList&, bool assignedOnly);
void insertOrderedTail(DLList&, Value *);
inline Instruction *insnBySerial(int);
private:
Program *prog;
Function *func;
// instructions in control flow / chronological order
ArrayList insns;
int sequence; // for manual passes through CFG
};
Instruction *
RegAlloc::insnBySerial(int serial)
{
return reinterpret_cast<Instruction *>(insns.get(serial));
}
void
RegAlloc::BuildIntervalsPass::addLiveRange(Value *val,
const BasicBlock *bb,
int end)
{
Instruction *insn = val->getUniqueInsn();
if (!insn)
return;
assert(bb->getFirst()->serial <= bb->getExit()->serial);
assert(bb->getExit()->serial + 1 >= end);
int begin = insn->serial;
if (begin < bb->getEntry()->serial || begin > bb->getExit()->serial)
begin = bb->getEntry()->serial;
INFO_DBG(prog->dbgFlags, REG_ALLOC, "%%%i <- live range [%i(%i), %i)\n",
val->id, begin, insn->serial, end);
if (begin != end) // empty ranges are only added as hazards for fixed regs
val->livei.extend(begin, end);
}
bool
RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
{
if (b->cfg.incidentCount() <= 1)
return false;
int n = 0;
for (Graph::EdgeIterator ei = p->cfg.outgoing(); !ei.end(); ei.next())
if (ei.getType() == Graph::Edge::TREE ||
ei.getType() == Graph::Edge::FORWARD)
++n;
return (n == 2);
}
// For each operand of each PHI in b, generate a new value by inserting a MOV
// at the end of the block it is coming from and replace the operand with its
// result. This eliminates liveness conflicts and enables us to let values be
// copied to the right register if such a conflict exists nonetheless.
//
// These MOVs are also crucial in making sure the live intervals of phi srces
// are extended until the end of the loop, since they are not included in the
// live-in sets.
bool
RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
{
Instruction *phi, *mov;
BasicBlock *pb, *pn;
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
pb = pn = BasicBlock::get(ei.getNode());
assert(pb);
if (needNewElseBlock(bb, pb)) {
pn = new BasicBlock(func);
// deletes an edge, iterator is invalid after this:
pb->cfg.detach(&bb->cfg);
pb->cfg.attach(&pn->cfg, Graph::Edge::TREE);
pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD); // XXX: check order !
assert(pb->getExit()->op != OP_CALL);
if (pb->getExit()->asFlow()->target.bb == bb)
pb->getExit()->asFlow()->target.bb = pn;
break;
}
}
// insert MOVs (phi->src[j] should stem from j-th in-BB)
int j = 0;
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
pb = BasicBlock::get(ei.getNode());
if (!pb->isTerminated())
pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
mov = new_Instruction(func, OP_MOV, TYPE_U32);
mov->setSrc(0, phi->getSrc(j));
mov->setDef(0, new_LValue(func, phi->getDef(0)->asLValue()));
phi->setSrc(j, mov->getDef(0));
pb->insertBefore(pb->getExit(), mov);
}
++j;
}
return true;
}
// Build the set of live-in variables of bb.
bool
RegAlloc::buildLiveSets(BasicBlock *bb)
{
BasicBlock *bn;
Instruction *i;
unsigned int s, d;
INFO_DBG(prog->dbgFlags, REG_ALLOC, "buildLiveSets(BB:%i)\n", bb->getId());
bb->liveSet.allocate(func->allLValues.getSize(), false);
int n = 0;
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
bn = BasicBlock::get(ei.getNode());
if (bn == bb)
continue;
if (bn->cfg.visit(sequence))
if (!buildLiveSets(bn))
return false;
if (n++ == 0)
bb->liveSet = bn->liveSet;
else
bb->liveSet |= bn->liveSet;
}
if (!n && !bb->liveSet.marker)
bb->liveSet.fill(0);
bb->liveSet.marker = true;
if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
INFO("BB:%i live set of out blocks:\n", bb->getId());
bb->liveSet.print();
}
// if (!bb->getEntry())
// return true;
for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) {
for (d = 0; i->defExists(d); ++d)
bb->liveSet.clr(i->getDef(d)->id);
for (s = 0; i->srcExists(s); ++s)
if (i->getSrc(s)->asLValue())
bb->liveSet.set(i->getSrc(s)->id);
}
for (i = bb->getPhi(); i && i->op == OP_PHI; i = i->next)
bb->liveSet.clr(i->getDef(0)->id);
if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
INFO("BB:%i live set after propagation:\n", bb->getId());
bb->liveSet.print();
}
return true;
}
void
RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb)
{
BasicBlock *bbA = NULL, *bbB = NULL;
assert(bb->cfg.incidentCount() || bb->liveSet.popCount() == 0);
if (bb->cfg.outgoingCount()) {
// trickery to save a loop of OR'ing liveSets
// aliasing works fine with BitSet::setOr
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
if (ei.getType() == Graph::Edge::DUMMY)
continue;
if (bbA) {
bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet);
bbA = bb;
} else {
bbA = bbB;
}
bbB = BasicBlock::get(ei.getNode());
}
bb->liveSet.setOr(&bbB->liveSet, bbA ? &bbA->liveSet : NULL);
} else
if (bb->cfg.incidentCount()) {
bb->liveSet.fill(0);
}
}
bool
RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb)
{
collectLiveValues(bb);
INFO_DBG(prog->dbgFlags, REG_ALLOC, "BuildIntervals(BB:%i)\n", bb->getId());
// go through out blocks and delete phi sources that do not originate from
// the current block from the live set
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
BasicBlock *out = BasicBlock::get(ei.getNode());
for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) {
bb->liveSet.clr(i->getDef(0)->id);
for (int s = 0; s < NV50_IR_MAX_SRCS && i->src[s].exists(); ++s) {
assert(i->src[s].getInsn());
if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ?
bb->liveSet.set(i->getSrc(s)->id);
else
bb->liveSet.clr(i->getSrc(s)->id);
}
}
}
// remaining live-outs are live until end
if (bb->getExit()) {
for (unsigned int j = 0; j < bb->liveSet.getSize(); ++j)
if (bb->liveSet.test(j))
addLiveRange(func->getLValue(j), bb, bb->getExit()->serial + 1);
}
for (Instruction *i = bb->getExit(); i && i->op != OP_PHI; i = i->prev) {
for (int d = 0; i->defExists(d); ++d) {
bb->liveSet.clr(i->getDef(d)->id);
if (i->getDef(d)->reg.data.id >= 0) // add hazard for fixed regs
i->getDef(d)->livei.extend(i->serial, i->serial);
}
for (int s = 0; i->srcExists(s); ++s) {
if (!i->getSrc(s)->asLValue())
continue;
if (!bb->liveSet.test(i->getSrc(s)->id)) {
bb->liveSet.set(i->getSrc(s)->id);
addLiveRange(i->getSrc(s), bb, i->serial);
}
}
}
return true;
}
bool
RegAlloc::coalesceValues(unsigned int mask)
{
int c, n;
for (n = 0; n < insns.getSize(); ++n) {
Instruction *i;
Instruction *insn = insnBySerial(n);
switch (insn->op) {
case OP_PHI:
if (!(mask & JOIN_MASK_PHI))
break;
for (c = 0; insn->srcExists(c); ++c)
if (!insn->getDef(0)->coalesce(insn->getSrc(c), false)) {
ERROR("failed to coalesce phi operands\n");
return false;
}
break;
case OP_UNION:
if (!(mask & JOIN_MASK_UNION))
break;
for (c = 0; insn->srcExists(c); ++c)
insn->getDef(0)->coalesce(insn->getSrc(c), true);
break;
case OP_CONSTRAINT:
if (!(mask & JOIN_MASK_CONSTRAINT))
break;
for (c = 0; c < 4 && insn->srcExists(c); ++c)
insn->getDef(c)->coalesce(insn->getSrc(c), true);
break;
case OP_MOV:
if (!(mask & JOIN_MASK_MOV))
break;
i = insn->getSrc(0)->getUniqueInsn();
if (i && !i->constrainedDefs())
insn->getDef(0)->coalesce(insn->getSrc(0), false);
break;
case OP_TEX:
case OP_TXB:
case OP_TXL:
case OP_TXF:
case OP_TXQ:
case OP_TXD:
case OP_TXG:
case OP_TEXCSAA:
if (!(mask & JOIN_MASK_TEX))
break;
for (c = 0; c < 4 && insn->srcExists(c); ++c)
insn->getDef(c)->coalesce(insn->getSrc(c), true);
break;
default:
break;
}
}
return true;
}
void
RegAlloc::insertOrderedTail(DLList &list, Value *val)
{
// we insert the live intervals in order, so this should be short
DLList::Iterator iter = list.revIterator();
const int begin = val->livei.begin();
for (; !iter.end(); iter.next()) {
if (reinterpret_cast<Value *>(iter.get())->livei.begin() <= begin)
break;
}
iter.insert(val);
}
static void
checkList(DLList &list)
{
Value *prev = NULL;
Value *next = NULL;
for (DLList::Iterator iter = list.iterator(); !iter.end(); iter.next()) {
next = Value::get(iter);
assert(next);
if (prev) {
assert(prev->livei.begin() <= next->livei.begin());
}
assert(next->join == next);
prev = next;
}
}
void
RegAlloc::collectLValues(DLList &list, bool assignedOnly)
{
for (int n = 0; n < insns.getSize(); ++n) {
Instruction *i = insnBySerial(n);
for (int d = 0; i->defExists(d); ++d)
if (!i->getDef(d)->livei.isEmpty())
if (!assignedOnly || i->getDef(d)->reg.data.id >= 0)
insertOrderedTail(list, i->getDef(d));
}
checkList(list);
}
bool
RegAlloc::allocateConstrainedValues()
{
Value *defs[4];
RegisterSet regSet[4];
DLList regVals;
INFO_DBG(prog->dbgFlags, REG_ALLOC, "RA: allocating constrained values\n");
collectLValues(regVals, true);
for (int c = 0; c < 4; ++c)
regSet[c].init(prog->getTarget());
for (int n = 0; n < insns.getSize(); ++n) {
Instruction *i = insnBySerial(n);
const int vecSize = i->defCount(0xf);
if (vecSize < 2)
continue;
assert(vecSize <= 4);
for (int c = 0; c < vecSize; ++c)
defs[c] = i->def[c].rep();
if (defs[0]->reg.data.id >= 0) {
for (int c = 1; c < vecSize; ++c) {
assert(defs[c]->reg.data.id >= 0);
}
continue;
}
for (int c = 0; c < vecSize; ++c) {
uint32_t mask;
regSet[c].reset();
for (DLList::Iterator it = regVals.iterator(); !it.end(); it.next()) {
Value *rVal = Value::get(it);
if (rVal->reg.data.id >= 0 && rVal->livei.overlaps(defs[c]->livei))
regSet[c].occupy(rVal);
}
mask = 0x11111111;
if (vecSize == 2) // granularity is 2 instead of 4
mask |= 0x11111111 << 2;
regSet[c].periodicMask(defs[0]->reg.file, 0, ~(mask << c));
if (!defs[c]->livei.isEmpty())
insertOrderedTail(regVals, defs[c]);
}
for (int c = 1; c < vecSize; ++c)
regSet[0].intersect(defs[0]->reg.file, &regSet[c]);
if (!regSet[0].assign(&defs[0], vecSize)) // TODO: spilling
return false;
}
for (int c = 0; c < 4; c += 2)
if (regSet[c].getMaxAssigned(FILE_GPR) > prog->maxGPR)
prog->maxGPR = regSet[c].getMaxAssigned(FILE_GPR);
return true;
}
bool
RegAlloc::linearScan()
{
Value *cur, *val;
DLList unhandled, active, inactive;
RegisterSet f(prog->getTarget()), free(prog->getTarget());
INFO_DBG(prog->dbgFlags, REG_ALLOC, "RA: linear scan\n");
collectLValues(unhandled, false);
for (DLList::Iterator cI = unhandled.iterator(); !cI.end();) {
cur = Value::get(cI);
cI.erase();
for (DLList::Iterator aI = active.iterator(); !aI.end();) {
val = Value::get(aI);
if (val->livei.end() <= cur->livei.begin()) {
free.release(val);
aI.erase();
} else
if (!val->livei.contains(cur->livei.begin())) {
free.release(val);
aI.moveToList(inactive);
} else {
aI.next();
}
}
for (DLList::Iterator iI = inactive.iterator(); !iI.end();) {
val = Value::get(iI);
if (val->livei.end() <= cur->livei.begin()) {
iI.erase();
} else
if (val->livei.contains(cur->livei.begin())) {
free.occupy(val);
iI.moveToList(active);
} else {
iI.next();
}
}
f = free;
for (DLList::Iterator iI = inactive.iterator(); !iI.end(); iI.next()) {
val = Value::get(iI);
if (val->livei.overlaps(cur->livei))
f.occupy(val);
}
for (DLList::Iterator uI = unhandled.iterator(); !uI.end(); uI.next()) {
val = Value::get(uI);
if (val->reg.data.id >= 0 && val->livei.overlaps(cur->livei))
f.occupy(val);
}
if (cur->reg.data.id < 0) {
bool spill = !f.assign(&cur, 1);
if (spill) {
ERROR("out of registers of file %u\n", cur->reg.file);
abort();
}
}
free.occupy(cur);
active.insert(cur);
}
if (f.getMaxAssigned(FILE_GPR) > prog->maxGPR)
prog->maxGPR = f.getMaxAssigned(FILE_GPR);
if (free.getMaxAssigned(FILE_GPR) > prog->maxGPR)
prog->maxGPR = free.getMaxAssigned(FILE_GPR);
return true;
}
bool
RegAlloc::exec()
{
for (ArrayList::Iterator fi = prog->allFuncs.iterator();
!fi.end(); fi.next()) {
func = reinterpret_cast<Function *>(fi.get());
if (!execFunc())
return false;
}
return true;
}
bool
RegAlloc::execFunc()
{
InsertConstraintsPass insertConstr;
PhiMovesPass insertMoves;
BuildIntervalsPass buildIntervals;
unsigned int i;
bool ret;
ret = insertConstr.exec(func);
if (!ret)
goto out;
ret = insertMoves.run(func);
if (!ret)
goto out;
for (sequence = func->cfg.nextSequence(), i = 0;
ret && i <= func->loopNestingBound;
sequence = func->cfg.nextSequence(), ++i)
ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot()));
if (!ret)
goto out;
func->orderInstructions(this->insns);
ret = buildIntervals.run(func);
if (!ret)
goto out;
ret = coalesceValues(JOIN_MASK_PHI);
if (!ret)
goto out;
switch (prog->getTarget()->getChipset() & 0xf0) {
case 0x50:
ret = coalesceValues(JOIN_MASK_UNION | JOIN_MASK_TEX);
break;
case 0xc0:
ret = coalesceValues(JOIN_MASK_UNION | JOIN_MASK_CONSTRAINT);
break;
default:
break;
}
if (!ret)
goto out;
ret = coalesceValues(JOIN_MASK_MOV);
if (!ret)
goto out;
if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
func->print();
func->printLiveIntervals();
}
ret = allocateConstrainedValues() && linearScan();
if (!ret)
goto out;
out:
// TODO: should probably call destructor on LValues later instead
for (ArrayList::Iterator it = func->allLValues.iterator();
!it.end(); it.next())
reinterpret_cast<LValue *>(it.get())->livei.clear();
return ret;
}
bool Program::registerAllocation()
{
RegAlloc ra(this);
return ra.exec();
}
bool
RegAlloc::InsertConstraintsPass::exec(Function *ir)
{
constrList.clear();
bool ret = run(ir, true, true);
if (ret)
ret = insertConstraintMoves();
return ret;
}
// TODO: make part of texture insn
void
RegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex)
{
Value *def[4];
int c, k, d;
uint8_t mask = 0;
for (d = 0, k = 0, c = 0; c < 4; ++c) {
if (!(tex->tex.mask & (1 << c)))
continue;
if (tex->getDef(k)->refCount()) {
mask |= 1 << c;
def[d++] = tex->getDef(k);
}
++k;
}
tex->tex.mask = mask;
#if 0 // reorder or set the unused ones NULL ?
for (c = 0; c < 4; ++c)
if (!(tex->tex.mask & (1 << c)))
def[d++] = tex->getDef(c);
#endif
for (c = 0; c < d; ++c)
tex->setDef(c, def[c]);
#if 1
for (; c < 4; ++c)
tex->setDef(c, NULL);
#endif
}
bool
RegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s)
{
// current register allocation can't handle it if a value participates in
// multiple constraints
for (ValueRef::Iterator it = cst->src[s].iterator(); !it.end(); it.next()) {
Instruction *insn = it.get()->getInsn();
if (insn != cst)
return true;
}
// can start at s + 1 because detectConflict is called on all sources
for (int c = s + 1; cst->srcExists(c); ++c)
if (cst->getSrc(c) == cst->getSrc(s))
return true;
Instruction *defi = cst->getSrc(s)->getInsn();
return (!defi || defi->constrainedDefs());
}
void
RegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n)
{
Instruction *cst;
int d;
// first, look for an existing identical constraint op
for (DLList::Iterator it = constrList.iterator(); !it.end(); it.next()) {
cst = reinterpret_cast<Instruction *>(it.get());
if (!i->bb->dominatedBy(cst->bb))
break;
for (d = 0; d < n; ++d)
if (cst->getSrc(d) != i->getSrc(d + s))
break;
if (d >= n) {
for (d = 0; d < n; ++d, ++s)
i->setSrc(s, cst->getDef(d));
return;
}
}
cst = new_Instruction(func, OP_CONSTRAINT, i->dType);
for (d = 0; d < n; ++s, ++d) {
cst->setDef(d, new_LValue(func, FILE_GPR));
cst->setSrc(d, i->getSrc(s));
i->setSrc(s, cst->getDef(d));
}
i->bb->insertBefore(i, cst);
constrList.insert(cst);
}
// Add a dummy use of the pointer source of >= 8 byte loads after the load
// to prevent it from being assigned a register which overlapping the load's
// destination, which would produce random corruptions.
void
RegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src)
{
Instruction *hzd = new_Instruction(func, OP_NOP, TYPE_NONE);
hzd->setSrc(0, src->get());
i->bb->insertAfter(i, hzd);
}
// Insert constraint markers for instructions whose multiple sources must be
// located in consecutive registers.
bool
RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
{
TexInstruction *tex;
Instruction *next;
int s, n, size;
for (Instruction *i = bb->getEntry(); i; i = next) {
next = i->next;
if ((tex = i->asTex())) {
textureMask(tex);
// FIXME: this is target specific
if (tex->op == OP_TXQ) {
s = tex->srcCount(0xff);
n = 0;
} else {
s = tex->tex.target.getArgCount();
if (!tex->tex.target.isArray() &&
(tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0))
++s;
n = tex->srcCount(0xff) - s;
assert(n <= 4);
}
if (s > 1)
addConstraint(i, 0, s);
if (n > 1)
addConstraint(i, s, n);
} else
if (i->op == OP_EXPORT || i->op == OP_STORE) {
for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) {
assert(i->srcExists(s));
size -= i->getSrc(s)->reg.size;
}
if ((s - 1) > 1)
addConstraint(i, 1, s - 1);
} else
if (i->op == OP_LOAD) {
if (i->src[0].isIndirect(0) && typeSizeof(i->dType) >= 8)
addHazard(i, i->src[0].getIndirect(0));
}
}
return true;
}
// Insert extra moves so that, if multiple register constraints on a value are
// in conflict, these conflicts can be resolved.
bool
RegAlloc::InsertConstraintsPass::insertConstraintMoves()
{
for (DLList::Iterator it = constrList.iterator(); !it.end(); it.next()) {
Instruction *cst = reinterpret_cast<Instruction *>(it.get());
for (int s = 0; cst->srcExists(s); ++s) {
if (!detectConflict(cst, s))
continue;
Instruction *mov = new_Instruction(func, OP_MOV,
typeOfSize(cst->src[s].getSize()));
mov->setSrc(0, cst->getSrc(s));
mov->setDef(0, new_LValue(func, FILE_GPR));
cst->setSrc(s, mov->getDef(0));
cst->bb->insertBefore(cst, mov);
}
}
return true;
}
} // namespace nv50_ir

View file

@ -0,0 +1,463 @@
#include "nv50_ir.h"
#include "nv50_ir_target.h"
namespace nv50_ir {
// Converts nv50 IR generated from TGSI to SSA form.
// DominatorTree implements an algorithm for finding immediate dominators,
// as described by T. Lengauer & R. Tarjan.
class DominatorTree : public Graph
{
public:
DominatorTree(Graph *cfg);
~DominatorTree() { }
bool dominates(BasicBlock *, BasicBlock *);
void findDominanceFrontiers();
private:
void build();
void buildDFS(Node *);
void squash(int);
inline void link(int, int);
inline int eval(int);
void debugPrint();
Graph *cfg;
Node **vert;
int *data;
const int count;
#define SEMI(i) (data[(i) + 0 * count])
#define ANCESTOR(i) (data[(i) + 1 * count])
#define PARENT(i) (data[(i) + 2 * count])
#define LABEL(i) (data[(i) + 3 * count])
#define DOM(i) (data[(i) + 4 * count])
};
void DominatorTree::debugPrint()
{
for (int i = 0; i < count; ++i) {
INFO("SEMI(%i) = %i\n", i, SEMI(i));
INFO("ANCESTOR(%i) = %i\n", i, ANCESTOR(i));
INFO("PARENT(%i) = %i\n", i, PARENT(i));
INFO("LABEL(%i) = %i\n", i, LABEL(i));
INFO("DOM(%i) = %i\n", i, DOM(i));
}
}
DominatorTree::DominatorTree(Graph *cfgraph) : cfg(cfgraph),
count(cfg->getSize())
{
Iterator *iter;
int i;
vert = new Node * [count];
data = new int[5 * count];
for (i = 0, iter = cfg->iteratorDFS(true); !iter->end(); iter->next(), ++i) {
vert[i] = reinterpret_cast<Node *>(iter->get());
vert[i]->tag = i;
LABEL(i) = i;
SEMI(i) = ANCESTOR(i) = -1;
}
cfg->putIterator(iter);
build();
delete[] vert;
delete[] data;
}
void DominatorTree::buildDFS(Graph::Node *node)
{
SEMI(node->tag) = node->tag;
for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) {
if (SEMI(ei.getNode()->tag) < 0) {
buildDFS(ei.getNode());
PARENT(ei.getNode()->tag) = node->tag;
}
}
}
void DominatorTree::squash(int v)
{
if (ANCESTOR(ANCESTOR(v)) >= 0) {
squash(ANCESTOR(v));
if (SEMI(LABEL(ANCESTOR(v))) < SEMI(LABEL(v)))
LABEL(v) = LABEL(ANCESTOR(v));
ANCESTOR(v) = ANCESTOR(ANCESTOR(v));
}
}
int DominatorTree::eval(int v)
{
if (ANCESTOR(v) < 0)
return v;
squash(v);
return LABEL(v);
}
void DominatorTree::link(int v, int w)
{
ANCESTOR(w) = v;
}
void DominatorTree::build()
{
DLList *bucket = new DLList[count];
Node *nv, *nw;
int p, u, v, w;
buildDFS(cfg->getRoot());
for (w = count - 1; w >= 1; --w) {
nw = vert[w];
assert(nw->tag == w);
for (Graph::EdgeIterator ei = nw->incident(); !ei.end(); ei.next()) {
nv = ei.getNode();
v = nv->tag;
u = eval(v);
if (SEMI(u) < SEMI(w))
SEMI(w) = SEMI(u);
}
p = PARENT(w);
bucket[SEMI(w)].insert(nw);
link(p, w);
for (DLList::Iterator it = bucket[p].iterator(); !it.end(); it.erase()) {
v = reinterpret_cast<Node *>(it.get())->tag;
u = eval(v);
DOM(v) = (SEMI(u) < SEMI(v)) ? u : p;
}
}
for (w = 1; w < count; ++w) {
if (DOM(w) != SEMI(w))
DOM(w) = DOM(DOM(w));
}
DOM(0) = 0;
insert(&BasicBlock::get(cfg->getRoot())->dom);
do {
p = 0;
for (v = 1; v < count; ++v) {
nw = &BasicBlock::get(vert[DOM(v)])->dom;;
nv = &BasicBlock::get(vert[v])->dom;
if (nw->getGraph() && !nv->getGraph()) {
++p;
nw->attach(nv, Graph::Edge::TREE);
}
}
} while (p);
delete[] bucket;
}
#undef SEMI
#undef ANCESTOR
#undef PARENT
#undef LABEL
#undef DOM
void DominatorTree::findDominanceFrontiers()
{
Iterator *dtIter;
BasicBlock *bb;
for (dtIter = this->iteratorDFS(false); !dtIter->end(); dtIter->next()) {
EdgeIterator succIter, chldIter;
bb = BasicBlock::get(reinterpret_cast<Node *>(dtIter->get()));
bb->getDF().clear();
for (succIter = bb->cfg.outgoing(); !succIter.end(); succIter.next()) {
BasicBlock *dfLocal = BasicBlock::get(succIter.getNode());
if (dfLocal->idom() != bb)
bb->getDF().insert(dfLocal);
}
for (chldIter = bb->dom.outgoing(); !chldIter.end(); chldIter.next()) {
BasicBlock *cb = BasicBlock::get(chldIter.getNode());
DLList::Iterator dfIter = cb->getDF().iterator();
for (; !dfIter.end(); dfIter.next()) {
BasicBlock *dfUp = BasicBlock::get(dfIter);
if (dfUp->idom() != bb)
bb->getDF().insert(dfUp);
}
}
}
this->putIterator(dtIter);
}
// liveIn(bb) = usedBeforeAssigned(bb) U (liveOut(bb) - assigned(bb))
void
Function::buildLiveSetsPreSSA(BasicBlock *bb, const int seq)
{
BitSet usedBeforeAssigned(allLValues.getSize(), true);
BitSet assigned(allLValues.getSize(), true);
bb->liveSet.allocate(allLValues.getSize(), false);
int n = 0;
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
BasicBlock *out = BasicBlock::get(ei.getNode());
if (out == bb)
continue;
if (out->cfg.visit(seq))
buildLiveSetsPreSSA(out, seq);
if (!n++)
bb->liveSet = out->liveSet;
else
bb->liveSet |= out->liveSet;
}
if (!n && !bb->liveSet.marker)
bb->liveSet.fill(0);
bb->liveSet.marker = true;
for (Instruction *i = bb->getEntry(); i; i = i->next) {
for (int s = 0; i->srcExists(s); ++s)
if (i->getSrc(s)->asLValue() && !assigned.test(i->getSrc(s)->id))
usedBeforeAssigned.set(i->getSrc(s)->id);
for (int d = 0; i->defExists(d); ++d)
assigned.set(i->getDef(d)->id);
}
bb->liveSet.andNot(assigned);
bb->liveSet |= usedBeforeAssigned;
}
class RenamePass
{
public:
RenamePass(Function *);
~RenamePass();
bool run();
void search(BasicBlock *);
inline LValue *getStackTop(Value *);
private:
Stack *stack;
Function *func;
Program *prog;
Instruction *undef;
};
bool
Program::convertToSSA()
{
for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
Function *fn = reinterpret_cast<Function *>(fi.get());
if (!fn->convertToSSA())
return false;
}
return true;
}
// XXX: add edge from entry to exit ?
// Efficiently Computing Static Single Assignment Form and
// the Control Dependence Graph,
// R. Cytron, J. Ferrante, B. K. Rosen, M. N. Wegman, F. K. Zadeck
bool
Function::convertToSSA()
{
// 0. calculate live in variables (for pruned SSA)
int seq = cfg.nextSequence();
for (unsigned i = 0; i <= loopNestingBound; seq = cfg.nextSequence(), ++i)
buildLiveSetsPreSSA(BasicBlock::get(cfg.getRoot()), seq);
// reset liveSet marker for use in regalloc
for (ArrayList::Iterator bi = allBBlocks.iterator(); !bi.end(); bi.next())
reinterpret_cast<BasicBlock *>(bi.get())->liveSet.marker = false;
// 1. create the dominator tree
domTree = new DominatorTree(&cfg);
reinterpret_cast<DominatorTree *>(domTree)->findDominanceFrontiers();
// 2. insert PHI functions
DLList workList;
LValue *lval;
BasicBlock *bb;
int var;
int iterCount = 0;
int *hasAlready = new int[allBBlocks.getSize() * 2];
int *work = &hasAlready[allBBlocks.getSize()];
memset(hasAlready, 0, allBBlocks.getSize() * 2 * sizeof(int));
// for each variable
for (var = 0; var < allLValues.getSize(); ++var) {
if (!allLValues.get(var))
continue;
lval = reinterpret_cast<Value *>(allLValues.get(var))->asLValue();
if (!lval || !lval->defs)
continue;
++iterCount;
// TODO: don't add phi functions for values that aren't used outside
// the BB they're defined in
// gather blocks with assignments to lval in workList
for (ValueDef::Iterator d = lval->defs->iterator(); !d.end(); d.next()) {
bb = d.get()->getInsn()->bb;
if (!bb)
continue; // instruction likely been removed but not XXX deleted
if (work[bb->getId()] == iterCount)
continue;
work[bb->getId()] = iterCount;
workList.insert(bb);
}
// for each block in workList, insert a phi for lval in the block's
// dominance frontier (if we haven't already done so)
for (DLList::Iterator wI = workList.iterator(); !wI.end(); wI.erase()) {
bb = BasicBlock::get(wI);
DLList::Iterator dfIter = bb->getDF().iterator();
for (; !dfIter.end(); dfIter.next()) {
Instruction *phi;
BasicBlock *dfBB = BasicBlock::get(dfIter);
if (hasAlready[dfBB->getId()] >= iterCount)
continue;
hasAlready[dfBB->getId()] = iterCount;
// pruned SSA: don't need a phi if the value is not live-in
if (!dfBB->liveSet.test(lval->id))
continue;
// TODO: use dedicated PhiInstruction to lift this limit
assert(dfBB->cfg.incidentCount() <= NV50_IR_MAX_SRCS);
phi = new_Instruction(this, OP_PHI, typeOfSize(lval->reg.size));
dfBB->insertTail(phi);
phi->setDef(0, lval);
for (int s = 0; s < dfBB->cfg.incidentCount(); ++s)
phi->setSrc(s, lval);
if (work[dfBB->getId()] < iterCount) {
work[dfBB->getId()] = iterCount;
wI.insert(dfBB);
}
}
}
}
delete[] hasAlready;
RenamePass rename(this);
return rename.run();
}
RenamePass::RenamePass(Function *fn) : func(fn), prog(fn->getProgram())
{
BasicBlock *root = BasicBlock::get(func->cfg.getRoot());
undef = new_Instruction(func, OP_NOP, TYPE_U32);
undef->setDef(0, new_LValue(func, FILE_GPR));
root->insertHead(undef);
stack = new Stack[func->allLValues.getSize()];
}
RenamePass::~RenamePass()
{
if (stack)
delete[] stack;
}
LValue *
RenamePass::getStackTop(Value *val)
{
if (!stack[val->id].getSize())
return 0;
return reinterpret_cast<LValue *>(stack[val->id].peek().u.p);
}
bool RenamePass::run()
{
if (!stack)
return false;
search(BasicBlock::get(func->domTree->getRoot()));
ArrayList::Iterator iter = func->allInsns.iterator();
for (; !iter.end(); iter.next()) {
Instruction *insn = reinterpret_cast<Instruction *>(iter.get());
for (int d = 0; insn->defExists(d); ++d)
insn->def[d].restoreDefList();
}
return true;
}
void RenamePass::search(BasicBlock *bb)
{
LValue *lval;
int d, s;
const Target *targ = prog->getTarget();
for (Instruction *stmt = bb->getFirst(); stmt; stmt = stmt->next) {
if (stmt->op != OP_PHI) {
for (s = 0; stmt->srcExists(s); ++s) {
lval = stmt->getSrc(s)->asLValue();
if (!lval)
continue;
lval = getStackTop(lval);
if (!lval)
lval = static_cast<LValue *>(undef->getDef(0));
stmt->setSrc(s, lval);
}
}
for (d = 0; stmt->defExists(d); ++d) {
lval = stmt->def[d].get()->asLValue();
assert(lval);
stmt->def[d].setSSA(
new_LValue(func, targ->nativeFile(lval->reg.file)));
stmt->def[d].get()->reg.data.id = lval->reg.data.id;
stack[lval->id].push(stmt->def[d].get());
}
}
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
Instruction *phi;
int p = 0;
BasicBlock *sb = BasicBlock::get(ei.getNode());
// which predecessor of sb is bb ?
for (Graph::EdgeIterator ei = sb->cfg.incident(); !ei.end(); ei.next()) {
if (ei.getNode() == &bb->cfg)
break;
++p;
}
assert(p < sb->cfg.incidentCount());
for (phi = sb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
lval = getStackTop(phi->getSrc(p));
if (!lval)
lval = undef->getDef(0)->asLValue();
phi->setSrc(p, lval);
}
}
for (Graph::EdgeIterator ei = bb->dom.outgoing(); !ei.end(); ei.next())
search(BasicBlock::get(ei.getNode()));
for (Instruction *stmt = bb->getFirst(); stmt; stmt = stmt->next) {
for (d = 0; stmt->defExists(d); ++d)
stack[stmt->def[d].preSSA()->id].pop();
}
}
} // namespace nv50_ir

View file

@ -0,0 +1,304 @@
#include "nv50/codegen/nv50_ir.h"
#include "nv50/codegen/nv50_ir_target.h"
namespace nv50_ir {
const uint8_t Target::operationSrcNr[OP_LAST + 1] =
{
0, 0, // NOP, PHI
0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT
1, 1, 2, // MOV, LOAD, STORE
2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
1, 1, 1, // ABS, NEG, NOT
2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR
2, 2, 1, // MAX, MIN, SAT
1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT
3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT
1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2
1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW
0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK,
0, 0, 0, // PRERET,CONT,BREAK
0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
1, 1, 2, 1, 2, // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
1, 1, // EMIT, RESTART
1, 1, 1, // TEX, TXB, TXL,
1, 1, 1, 1, 1, // TXF, TXQ, TXD, TXG, TEXCSAA
1, 2, // SULD, SUST
1, 1, // DFDX, DFDY
1, 2, 2, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
2, 3, 2, // POPCNT, INSBF, EXTBF
0
};
extern Target *getTargetNVC0(unsigned int chipset);
Target *Target::create(unsigned int chipset)
{
switch (chipset & 0xf0) {
case 0xc0:
return getTargetNVC0(chipset);
case 0x50:
case 0x80:
case 0x90:
case 0xa0:
default:
ERROR("unsupported target: NV%x\n", chipset);
return 0;
}
}
void Target::destroy(Target *targ)
{
delete targ;
}
void
CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
{
code = reinterpret_cast<uint32_t *>(ptr);
codeSize = 0;
codeSizeLimit = size;
}
void
CodeEmitter::printBinary() const
{
uint32_t *bin = code - codeSize / 4;
INFO("program binary (%u bytes)", codeSize);
for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
if ((pos % 8) == 0)
INFO("\n");
INFO("%08x ", bin[pos]);
}
INFO("\n");
}
void
CodeEmitter::prepareEmission(Program *prog)
{
for (ArrayList::Iterator fi = prog->allFuncs.iterator();
!fi.end(); fi.next()) {
Function *func = reinterpret_cast<Function *>(fi.get());
func->binPos = prog->binSize;
prepareEmission(func);
prog->binSize += func->binSize;
}
}
void
CodeEmitter::prepareEmission(Function *func)
{
func->bbCount = 0;
func->bbArray = new BasicBlock * [func->cfg.getSize()];
BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
Graph::GraphIterator *iter;
for (iter = func->cfg.iteratorCFG(); !iter->end(); iter->next())
prepareEmission(BasicBlock::get(*iter));
func->cfg.putIterator(iter);
}
void
CodeEmitter::prepareEmission(BasicBlock *bb)
{
Instruction *i, *next;
Function *func = bb->getFunction();
int j;
unsigned int nShort;
for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
for (; j >= 0; --j) {
BasicBlock *in = func->bbArray[j];
Instruction *exit = in->getExit();
if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
in->binSize -= 8;
func->binSize -= 8;
for (++j; j < func->bbCount; ++j)
func->bbArray[j]->binPos -= 8;
in->remove(exit);
}
bb->binPos = in->binPos + in->binSize;
if (in->binSize) // no more no-op branches to bb
break;
}
func->bbArray[func->bbCount++] = bb;
if (!bb->getExit())
return;
// determine encoding size, try to group short instructions
nShort = 0;
for (i = bb->getEntry(); i; i = next) {
next = i->next;
i->encSize = getMinEncodingSize(i);
if (next && i->encSize < 8)
++nShort;
else
if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
if (i->isCommutationLegal(i->next)) {
bb->permuteAdjacent(i, next);
next->encSize = 4;
next = i;
i = i->prev;
++nShort;
} else
if (i->isCommutationLegal(i->prev) && next->next) {
bb->permuteAdjacent(i->prev, i);
next->encSize = 4;
next = next->next;
bb->binSize += 4;
++nShort;
} else {
i->encSize = 8;
i->prev->encSize = 8;
bb->binSize += 4;
nShort = 0;
}
} else {
i->encSize = 8;
if (nShort & 1) {
i->prev->encSize = 8;
bb->binSize += 4;
}
nShort = 0;
}
bb->binSize += i->encSize;
}
if (bb->getExit()->encSize == 4) {
assert(nShort);
bb->getExit()->encSize = 8;
bb->binSize += 4;
if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
bb->binSize += 8;
bb->getExit()->prev->encSize = 8;
}
}
assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
func->binSize += bb->binSize;
}
bool
Program::emitBinary(struct nv50_ir_prog_info *info)
{
CodeEmitter *emit = target->getCodeEmitter(progType);
emit->prepareEmission(this);
if (dbgFlags & NV50_IR_DEBUG_BASIC)
this->print();
if (!binSize) {
code = NULL;
return false;
}
code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
if (!code)
return false;
emit->setCodeLocation(code, binSize);
for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
Function *fn = reinterpret_cast<Function *>(fi.get());
assert(emit->getCodeSize() == fn->binPos);
for (int b = 0; b < fn->bbCount; ++b)
for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next)
emit->emitInstruction(i);
}
info->bin.relocData = emit->getRelocInfo();
delete emit;
return true;
}
#define RELOC_ALLOC_INCREMENT 8
bool
CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
int s)
{
unsigned int n = relocInfo ? relocInfo->count : 0;
if (!(n % RELOC_ALLOC_INCREMENT)) {
size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
relocInfo = reinterpret_cast<RelocInfo *>(
REALLOC(relocInfo, n ? size : 0,
size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
if (!relocInfo)
return false;
}
++relocInfo->count;
relocInfo->entry[n].data = data;
relocInfo->entry[n].mask = m;
relocInfo->entry[n].offset = codeSize + w * 4;
relocInfo->entry[n].bitPos = s;
relocInfo->entry[n].type = ty;
return true;
}
void
RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
{
uint32_t value = 0;
switch (type) {
case TYPE_CODE: value = info->codePos; break;
case TYPE_BUILTIN: value = info->libPos; break;
case TYPE_DATA: value = info->dataPos; break;
default:
assert(0);
break;
}
value += data;
value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
binary[offset / 4] &= ~mask;
binary[offset / 4] |= value & mask;
}
} // namespace nv50_ir
#include "nv50/codegen/nv50_ir_driver.h"
extern "C" {
void
nv50_ir_relocate_code(void *relocData, uint32_t *code,
uint32_t codePos,
uint32_t libPos,
uint32_t dataPos)
{
nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
info->codePos = codePos;
info->libPos = libPos;
info->dataPos = dataPos;
for (unsigned int i = 0; i < info->count; ++i)
info->entry[i].apply(code, info);
}
void
nv50_ir_get_target_library(uint32_t chipset,
const uint32_t **code, uint32_t *size)
{
nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
targ->getBuiltinCode(code, size);
nv50_ir::Target::destroy(targ);
}
}

View file

@ -0,0 +1,164 @@
#ifndef __NV50_IR_TARGET_H__
#define __NV50_IR_TARGET_H__
#include "nv50_ir.h"
namespace nv50_ir {
struct RelocInfo;
struct RelocEntry
{
enum Type
{
TYPE_CODE,
TYPE_BUILTIN,
TYPE_DATA
};
uint32_t data;
uint32_t mask;
uint32_t offset;
int8_t bitPos;
Type type;
inline void apply(uint32_t *binary, const RelocInfo *info) const;
};
struct RelocInfo
{
uint32_t codePos;
uint32_t libPos;
uint32_t dataPos;
uint32_t count;
RelocEntry entry[0];
};
class CodeEmitter
{
public:
// returns whether the instruction was encodable and written
virtual bool emitInstruction(Instruction *) = 0;
virtual uint32_t getMinEncodingSize(const Instruction *) const = 0;
void setCodeLocation(void *, uint32_t size);
inline void *getCodeLocation() const { return code; }
inline uint32_t getCodeSize() const { return codeSize; }
bool addReloc(RelocEntry::Type, int w, uint32_t data, uint32_t m,
int s);
inline void *getRelocInfo() const { return relocInfo; }
void prepareEmission(Program *);
void prepareEmission(Function *);
virtual void prepareEmission(BasicBlock *);
void printBinary() const;
protected:
uint32_t *code;
uint32_t codeSize;
uint32_t codeSizeLimit;
RelocInfo *relocInfo;
};
class Target
{
public:
static Target *create(uint32_t chipset);
static void destroy(Target *);
// 0x50 and 0x84 to 0xaf for nv50
// 0xc0 to 0xdf for nvc0
inline uint32_t getChipset() const { return chipset; }
virtual CodeEmitter *getCodeEmitter(Program::Type) = 0;
// Drivers should upload this so we can use it from all programs.
// The address chosen is supplied to the relocation routine.
virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const = 0;
virtual bool runLegalizePass(Program *, CGStage stage) const = 0;
public:
struct OpInfo
{
OpInfo *variants;
operation op;
uint16_t srcTypes;
uint16_t dstTypes;
uint32_t immdBits;
uint8_t srcNr;
uint8_t srcMods[3];
uint8_t dstMods;
uint8_t srcFiles[3];
uint8_t dstFiles;
unsigned int minEncSize : 4;
unsigned int vector : 1;
unsigned int predicate : 1;
unsigned int commutative : 1;
unsigned int pseudo : 1;
unsigned int flow : 1;
unsigned int hasDest : 1;
unsigned int terminator : 1;
};
inline const OpInfo& getOpInfo(const Instruction *) const;
inline const OpInfo& getOpInfo(const operation) const;
inline DataFile nativeFile(DataFile f) const;
virtual bool insnCanLoad(const Instruction *insn, int s,
const Instruction *ld) const = 0;
virtual bool isOpSupported(operation, DataType) const = 0;
virtual bool isModSupported(const Instruction *,
int s, Modifier) const = 0;
virtual bool isSatSupported(const Instruction *) const = 0;
virtual bool mayPredicate(const Instruction *,
const Value *) const = 0;
virtual int getLatency(const Instruction *) const { return 1; }
virtual int getThroughput(const Instruction *) const { return 1; }
virtual unsigned int getFileSize(DataFile) const = 0;
virtual unsigned int getFileUnit(DataFile) const = 0;
virtual uint32_t getSVAddress(DataFile, const Symbol *) const = 0;
public:
bool joinAnterior; // true if join is executed before the op
static const uint8_t operationSrcNr[OP_LAST + 1];
protected:
uint32_t chipset;
DataFile nativeFileMap[DATA_FILE_COUNT];
OpInfo opInfo[OP_LAST + 1];
};
const Target::OpInfo& Target::getOpInfo(const Instruction *insn) const
{
return opInfo[MIN2(insn->op, OP_LAST)];
}
const Target::OpInfo& Target::getOpInfo(const operation op) const
{
return opInfo[op];
}
inline DataFile Target::nativeFile(DataFile f) const
{
return nativeFileMap[f];
}
} // namespace nv50_ir
#endif // __NV50_IR_TARGET_H__

View file

@ -0,0 +1,253 @@
#include "nv50_ir_util.h"
namespace nv50_ir {
void DLList::clear()
{
for (Item *next, *item = head.next; item != &head; item = next) {
next = item->next;
delete item;
}
head.next = head.prev = &head;
}
void
DLList::Iterator::erase()
{
Item *rem = pos;
if (rem == term)
return;
pos = pos->next;
DLLIST_DEL(rem);
delete rem;
}
void DLList::Iterator::moveToList(DLList& dest)
{
Item *item = pos;
assert(term != &dest.head);
assert(pos != term);
pos = pos->next;
DLLIST_DEL(item);
DLLIST_ADDHEAD(&dest.head, item);
}
bool
DLList::Iterator::insert(void *data)
{
Item *ins = new Item(data);
ins->next = pos->next;
ins->prev = pos;
pos->next->prev = ins;
pos->next = ins;
if (pos == term)
term = ins;
return true;
}
void
Stack::moveTo(Stack& that)
{
unsigned int newSize = this->size + that.size;
while (newSize > that.limit)
that.resize();
memcpy(&that.array[that.size], &array[0], this->size * sizeof(Item));
that.size = newSize;
this->size = 0;
}
Interval::~Interval()
{
clear();
}
void
Interval::clear()
{
for (Range *next, *r = head; r; r = next) {
next = r->next;
delete r;
}
}
bool
Interval::extend(int a, int b)
{
Range *r, **nextp = &head;
// NOTE: we need empty intervals for fixed registers
// if (a == b)
// return false;
assert(a <= b);
for (r = head; r; r = r->next) {
if (b < r->bgn)
break; // insert before
if (a > r->end) {
// insert after
nextp = &r->next;
continue;
}
// overlap
if (a < r->bgn) {
r->bgn = a;
if (b > r->end)
r->end = b;
r->coalesce(&tail);
return true;
}
if (b > r->end) {
r->end = b;
r->coalesce(&tail);
return true;
}
assert(a >= r->bgn);
assert(b <= r->end);
return true;
}
(*nextp) = new Range(a, b);
(*nextp)->next = r;
for (r = (*nextp); r->next; r = r->next);
tail = r;
return true;
}
bool Interval::contains(int pos)
{
for (Range *r = head; r && r->bgn <= pos; r = r->next)
if (r->end > pos)
return true;
return false;
}
bool Interval::overlaps(const Interval &iv) const
{
for (Range *rA = this->head; rA; rA = rA->next)
for (Range *rB = iv.head; rB; rB = rB->next)
if (rB->bgn < rA->end &&
rB->end > rA->bgn)
return true;
return false;
}
void Interval::unify(Interval &that)
{
assert(this != &that);
for (Range *next, *r = that.head; r; r = next) {
next = r->next;
this->extend(r->bgn, r->end);
delete r;
}
that.head = NULL;
}
void Interval::print() const
{
if (!head)
return;
INFO("[%i %i)", head->bgn, head->end);
for (const Range *r = head->next; r; r = r->next)
INFO(" [%i %i)", r->bgn, r->end);
INFO("\n");
}
void
BitSet::andNot(const BitSet &set)
{
assert(data && set.data);
assert(size >= set.size);
for (unsigned int i = 0; i < (set.size + 31) / 32; ++i)
data[i] &= ~set.data[i];
}
BitSet& BitSet::operator|=(const BitSet &set)
{
assert(data && set.data);
assert(size >= set.size);
for (unsigned int i = 0; i < (set.size + 31) / 32; ++i)
data[i] |= set.data[i];
return *this;
}
bool BitSet::allocate(unsigned int nBits, bool zero)
{
if (data && size < nBits) {
FREE(data);
data = NULL;
}
size = nBits;
if (!data)
data = reinterpret_cast<uint32_t *>(CALLOC((size + 31) / 32, 4));
if (zero)
memset(data, 0, (size + 7) / 8);
else
data[(size + 31) / 32 - 1] = 0; // clear unused bits (e.g. for popCount)
return data;
}
unsigned int BitSet::popCount() const
{
unsigned int count = 0;
for (unsigned int i = 0; i < (size + 31) / 32; ++i)
if (data[i])
count += util_bitcount(data[i]);
return count;
}
void BitSet::fill(uint32_t val)
{
unsigned int i;
for (i = 0; i < (size + 31) / 32; ++i)
data[i] = val;
if (val)
data[i] &= ~(0xffffffff << (size % 32)); // BE ?
}
void BitSet::setOr(BitSet *pA, BitSet *pB)
{
if (!pB) {
*this = *pA;
} else {
for (unsigned int i = 0; i < (size + 31) / 32; ++i)
data[i] = pA->data[i] | pB->data[i];
}
}
void BitSet::print() const
{
unsigned int n = 0;
INFO("BitSet of size %u:\n", size);
for (unsigned int i = 0; i < (size + 31) / 32; ++i) {
uint32_t bits = data[i];
while (bits) {
int pos = ffs(bits) - 1;
bits &= ~(1 << pos);
INFO(" %i", i * 32 + pos);
++n;
if ((n % 16) == 0)
INFO("\n");
}
}
if (n % 16)
INFO("\n");
}
} // namespace nv50_ir

View file

@ -0,0 +1,585 @@
#ifndef __NV50_IR_UTIL_H__
#define __NV50_IR_UTIL_H__
#include <new>
#include <assert.h>
#include <stdio.h>
#include "util/u_inlines.h"
#include "util/u_memory.h"
#define ERROR(args...) debug_printf("ERROR: " args)
#define WARN(args...) debug_printf("WARNING: " args)
#define INFO(args...) debug_printf(args)
#define INFO_DBG(m, f, args...) \
do { \
if (m & NV50_IR_DEBUG_##f) \
debug_printf(args); \
} while(0)
#define FATAL(args...) \
do { \
fprintf(stderr, args); \
abort(); \
} while(0)
#define NV50_IR_FUNC_ALLOC_OBJ_DEF(obj, f, args...) \
new ((f)->getProgram()->mem_##obj.allocate()) obj(f, args)
#define new_Instruction(f, args...) \
NV50_IR_FUNC_ALLOC_OBJ_DEF(Instruction, f, args)
#define new_CmpInstruction(f, args...) \
NV50_IR_FUNC_ALLOC_OBJ_DEF(CmpInstruction, f, args)
#define new_TexInstruction(f, args...) \
NV50_IR_FUNC_ALLOC_OBJ_DEF(TexInstruction, f, args)
#define new_FlowInstruction(f, args...) \
NV50_IR_FUNC_ALLOC_OBJ_DEF(FlowInstruction, f, args)
#define new_LValue(f, args...) \
NV50_IR_FUNC_ALLOC_OBJ_DEF(LValue, f, args)
#define NV50_IR_PROG_ALLOC_OBJ_DEF(obj, p, args...) \
new ((p)->mem_##obj.allocate()) obj(p, args)
#define new_Symbol(p, args...) \
NV50_IR_PROG_ALLOC_OBJ_DEF(Symbol, p, args)
#define new_ImmediateValue(p, args...) \
NV50_IR_PROG_ALLOC_OBJ_DEF(ImmediateValue, p, args)
#define delete_Instruction(p, insn) (p)->releaseInstruction(insn)
#define delete_Value(p, val) (p)->releaseValue(val)
namespace nv50_ir {
class Iterator
{
public:
virtual void next() = 0;
virtual void *get() const = 0;
virtual bool end() const = 0; // if true, get will return 0
};
class ManipIterator : public Iterator
{
public:
virtual bool insert(void *) = 0; // insert after current position
virtual void erase() = 0;
};
// WARNING: do not use a->prev/next for __item or __list
#define DLLIST_DEL(__item) \
do { \
(__item)->prev->next = (__item)->next; \
(__item)->next->prev = (__item)->prev; \
(__item)->next = (__item); \
(__item)->prev = (__item); \
} while(0)
#define DLLIST_ADDTAIL(__list, __item) \
do { \
(__item)->next = (__list); \
(__item)->prev = (__list)->prev; \
(__list)->prev->next = (__item); \
(__list)->prev = (__item); \
} while(0)
#define DLLIST_ADDHEAD(__list, __item) \
do { \
(__item)->prev = (__list); \
(__item)->next = (__list)->next; \
(__list)->next->prev = (__item); \
(__list)->next = (__item); \
} while(0)
#define DLLIST_MERGE(__listA, __listB, ty) \
do { \
ty prevB = (__listB)->prev; \
(__listA)->prev->next = (__listB); \
(__listB)->prev->next = (__listA); \
(__listB)->prev = (__listA)->prev; \
(__listA)->prev = prevB; \
} while(0)
#define DLLIST_FOR_EACH(list, it) \
for (DLList::Iterator (it) = (list)->iterator(); !(it).end(); (it).next())
class DLList
{
public:
class Item
{
public:
Item(void *priv) : next(this), prev(this), data(priv) { }
public:
Item *next;
Item *prev;
void *data;
};
DLList() : head(0) { }
~DLList() { clear(); }
inline void insertHead(void *data)
{
Item *item = new Item(data);
assert(data);
item->prev = &head;
item->next = head.next;
head.next->prev = item;
head.next = item;
}
inline void insertTail(void *data)
{
Item *item = new Item(data);
assert(data);
DLLIST_ADDTAIL(&head, item);
}
inline void insert(void *data) { insertTail(data); }
void clear();
class Iterator : public ManipIterator
{
public:
Iterator(Item *head, bool r) : rev(r), pos(r ? head->prev : head->next),
term(head) { }
virtual void next() { if (!end()) pos = rev ? pos->prev : pos->next; }
virtual void *get() const { return pos->data; }
virtual bool end() const { return pos == term; }
// caution: if you're at end-2 and erase it, then do next, you're at end
virtual void erase();
virtual bool insert(void *data);
// move item to a another list, no consistency with its iterators though
void moveToList(DLList&);
private:
const bool rev;
Item *pos;
Item *term;
friend class DLList;
};
inline void erase(Iterator& pos)
{
pos.erase();
}
Iterator iterator()
{
return Iterator(&head, false);
}
Iterator revIterator()
{
return Iterator(&head, true);
}
private:
Item head;
};
class Stack
{
public:
class Item {
public:
union {
void *p;
int i;
unsigned int u;
float f;
double d;
} u;
Item() { memset(&u, 0, sizeof(u)); }
};
Stack() : size(0), limit(0), array(0) { }
~Stack() { if (array) FREE(array); }
inline void push(int i) { Item data; data.u.i = i; push(data); }
inline void push(unsigned int u) { Item data; data.u.u = u; push(data); }
inline void push(void *p) { Item data; data.u.p = p; push(data); }
inline void push(float f) { Item data; data.u.f = f; push(data); }
inline void push(Item data)
{
if (size == limit)
resize();
array[size++] = data;
}
inline Item pop()
{
if (!size) {
Item data;
assert(0);
return data;
}
return array[--size];
}
inline unsigned int getSize() { return size; }
inline Item& peek() { assert(size); return array[size - 1]; }
void clear(bool releaseStorage = false)
{
if (releaseStorage && array)
FREE(array);
size = limit = 0;
}
void moveTo(Stack&); // move all items to target (not like push(pop()))
private:
void resize()
{
unsigned int sizeOld, sizeNew;
sizeOld = limit * sizeof(Item);
limit = MAX2(4, limit + limit);
sizeNew = limit * sizeof(Item);
array = (Item *)REALLOC(array, sizeOld, sizeNew);
}
unsigned int size;
unsigned int limit;
Item *array;
};
class DynArray
{
public:
class Item
{
public:
union {
uint32_t u32;
void *p;
};
};
DynArray() : data(NULL), size(0) { }
~DynArray() { if (data) FREE(data); }
inline Item& operator[](unsigned int i)
{
if (i >= size)
resize(i);
return data[i];
}
inline const Item operator[](unsigned int i) const
{
return data[i];
}
void resize(unsigned int index)
{
const unsigned int oldSize = size * sizeof(Item);
if (!size)
size = 8;
while (size <= index)
size <<= 1;
data = (Item *)REALLOC(data, oldSize, size * sizeof(Item));
}
private:
Item *data;
unsigned int size;
};
class ArrayList
{
public:
ArrayList() : size(0) { }
void insert(void *item, int& id)
{
id = ids.getSize() ? ids.pop().u.i : size++;
data[id].p = item;
}
void remove(int& id)
{
const unsigned int uid = id;
assert(uid < size && data[id].p);
ids.push(uid);
data[uid].p = NULL;
id = -1;
}
inline int getSize() const { return size; }
inline void *get(unsigned int id) { assert(id < size); return data[id].p; }
class Iterator : public nv50_ir::Iterator
{
public:
Iterator(const ArrayList *array) : pos(0), data(array->data)
{
size = array->getSize();
if (size)
nextValid();
}
void nextValid() { while ((pos < size) && !data[pos].p) ++pos; }
void next() { if (pos < size) { ++pos; nextValid(); } }
void *get() const { assert(pos < size); return data[pos].p; }
bool end() const { return pos >= size; }
private:
unsigned int pos;
unsigned int size;
const DynArray& data;
friend class ArrayList;
};
Iterator iterator() const { return Iterator(this); }
private:
DynArray data;
Stack ids;
unsigned int size;
};
class Interval
{
public:
Interval() : head(0), tail(0) { }
~Interval();
bool extend(int, int);
void unify(Interval&); // clears source interval
void clear();
inline int begin() { return head ? head->bgn : -1; }
inline int end() { checkTail(); return tail ? tail->end : -1; }
inline bool isEmpty() const { return !head; }
bool overlaps(const Interval&) const;
bool contains(int pos);
void print() const;
inline void checkTail() const;
private:
class Range
{
public:
Range(int a, int b) : next(0), bgn(a), end(b) { }
Range *next;
int bgn;
int end;
void coalesce(Range **ptail)
{
Range *rnn;
while (next && end >= next->bgn) {
assert(bgn <= next->bgn);
rnn = next->next;
end = MAX2(end, next->end);
delete next;
next = rnn;
}
if (!next)
*ptail = this;
}
};
Range *head;
Range *tail;
};
class BitSet
{
public:
BitSet() : marker(false), data(0), size(0) { }
BitSet(unsigned int nBits, bool zero) : marker(false), data(0), size(0)
{
allocate(nBits, zero);
}
~BitSet()
{
if (data)
FREE(data);
}
bool allocate(unsigned int nBits, bool zero);
inline unsigned int getSize() const { return size; }
void fill(uint32_t val);
void setOr(BitSet *, BitSet *); // second BitSet may be NULL
inline void set(unsigned int i)
{
assert(i < size);
data[i / 32] |= 1 << (i % 32);
}
inline void clr(unsigned int i)
{
assert(i < size);
data[i / 32] &= ~(1 << (i % 32));
}
inline bool test(unsigned int i) const
{
assert(i < size);
return data[i / 32] & (1 << (i % 32));
}
BitSet& operator|=(const BitSet&);
BitSet& operator=(const BitSet& set)
{
assert(data && set.data);
assert(size == set.size);
memcpy(data, set.data, (set.size + 7) / 8);
return *this;
}
void andNot(const BitSet&);
unsigned int popCount() const;
void print() const;
public:
bool marker; // for user
private:
uint32_t *data;
unsigned int size;
};
void Interval::checkTail() const
{
#if NV50_DEBUG & NV50_DEBUG_PROG_RA
Range *r = head;
while (r->next)
r = r->next;
assert(tail == r);
#endif
}
class MemoryPool
{
private:
inline bool enlargeAllocationsArray(const unsigned int id, unsigned int nr)
{
const unsigned int size = sizeof(uint8_t *) * id;
const unsigned int incr = sizeof(uint8_t *) * nr;
uint8_t **alloc = (uint8_t **)REALLOC(allocArray, size, size + incr);
if (!alloc)
return false;
allocArray = alloc;
return true;
}
inline bool enlargeCapacity()
{
const unsigned int id = count >> objStepLog2;
uint8_t *const mem = (uint8_t *)MALLOC(objSize << objStepLog2);
if (!mem)
return false;
if (!(id % 32)) {
if (!enlargeAllocationsArray(id, 32)) {
FREE(mem);
return false;
}
}
allocArray[id] = mem;
return true;
}
public:
MemoryPool(unsigned int size, unsigned int incr) : objSize(size),
objStepLog2(incr)
{
allocArray = NULL;
released = NULL;
count = 0;
}
~MemoryPool()
{
unsigned int allocCount = (count + (1 << objStepLog2) - 1) >> objStepLog2;
for (unsigned int i = 0; i < allocCount && allocArray[i]; ++i)
FREE(allocArray[i]);
if (allocArray)
FREE(allocArray);
}
void *allocate()
{
void *ret;
const unsigned int mask = (1 << objStepLog2) - 1;
if (released) {
ret = released;
released = *(void **)released;
return ret;
}
if (!(count & mask))
if (!enlargeCapacity())
return NULL;
ret = allocArray[count >> objStepLog2] + (count & mask) * objSize;
++count;
return ret;
}
void release(void *ptr)
{
*(void **)ptr = released;
released = ptr;
}
private:
uint8_t **allocArray; // array (list) of MALLOC allocations
void *released; // list of released objects
unsigned int count; // highest allocated object
const unsigned int objSize;
const unsigned int objStepLog2;
};
} // namespace nv50_ir
#endif // __NV50_IR_UTIL_H__

View file

@ -3,7 +3,7 @@ include $(TOP)/configs/current
LIBNAME = nvc0
# get C_SOURCES
# get C/CPP_SOURCES
include Makefile.sources
LIBRARY_INCLUDES = \

View file

@ -22,3 +22,8 @@ C_SOURCES := \
nvc0_push.c \
nvc0_push2.c \
nvc0_query.c
CPP_SOURCES := \
codegen/nv50_ir_emit_nvc0.cpp \
codegen/nv50_ir_lowering_nvc0.cpp \
codegen/nv50_ir_target_nvc0.cpp

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,705 @@
#include "nv50/codegen/nv50_ir.h"
#include "nv50/codegen/nv50_ir_build_util.h"
#include "nv50_ir_target_nvc0.h"
namespace nv50_ir {
#define QOP_ADD 0
#define QOP_SUBR 1
#define QOP_SUB 2
#define QOP_MOV2 3
#define QUADOP(q, r, s, t) \
((QOP_##q << 0) | (QOP_##r << 2) | \
(QOP_##s << 4) | (QOP_##t << 6))
class NVC0LegalizeSSA : public Pass
{
private:
virtual bool visit(BasicBlock *);
virtual bool visit(Function *);
// we want to insert calls to the builtin library only after optimization
void handleDIV(Instruction *); // integer division, modulus
void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
private:
BuildUtil bld;
};
void
NVC0LegalizeSSA::handleDIV(Instruction *i)
{
FlowInstruction *call;
int builtin;
Value *def[2];
bld.setPosition(i, false);
def[0] = bld.mkMovToReg(0, i->getSrc(0))->getDef(0);
def[1] = bld.mkMovToReg(1, i->getSrc(1))->getDef(0);
switch (i->dType) {
case TYPE_U32: builtin = NVC0_BUILTIN_DIV_U32; break;
case TYPE_S32: builtin = NVC0_BUILTIN_DIV_S32; break;
default:
return;
}
call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL);
bld.mkMov(i->getDef(0), def[(i->op == OP_DIV) ? 0 : 1]);
bld.mkClobber(FILE_GPR, (i->op == OP_DIV) ? 0xe : 0xd, 2);
bld.mkClobber(FILE_PREDICATE, (i->dType == TYPE_S32) ? 0xf : 0x3, 0);
call->fixed = 1;
call->absolute = call->builtin = 1;
call->target.builtin = builtin;
delete_Instruction(prog, i);
}
void
NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
{
// TODO
}
bool
NVC0LegalizeSSA::visit(Function *fn)
{
bld.setProgram(fn->getProgram());
return true;
}
bool
NVC0LegalizeSSA::visit(BasicBlock *bb)
{
Instruction *next;
for (Instruction *i = bb->getEntry(); i; i = next) {
next = i->next;
if (i->dType == TYPE_F32)
continue;
switch (i->op) {
case OP_DIV:
case OP_MOD:
handleDIV(i);
break;
case OP_RCP:
case OP_RSQ:
if (i->dType == TYPE_F64)
handleRCPRSQ(i);
break;
default:
break;
}
}
return true;
}
class NVC0LegalizePostRA : public Pass
{
private:
virtual bool visit(Function *);
virtual bool visit(BasicBlock *);
void replaceZero(Instruction *);
void split64BitOp(Instruction *);
bool tryReplaceContWithBra(BasicBlock *);
void propagateJoin(BasicBlock *);
LValue *r63;
};
bool
NVC0LegalizePostRA::visit(Function *fn)
{
r63 = new_LValue(fn, FILE_GPR);
r63->reg.data.id = 63;
return true;
}
void
NVC0LegalizePostRA::replaceZero(Instruction *i)
{
for (int s = 0; i->srcExists(s); ++s) {
ImmediateValue *imm = i->getSrc(s)->asImm();
if (imm && imm->reg.data.u64 == 0)
i->setSrc(s, r63);
}
}
void
NVC0LegalizePostRA::split64BitOp(Instruction *i)
{
if (i->dType == TYPE_F64) {
if (i->op == OP_MAD)
i->op = OP_FMA;
if (i->op == OP_ADD || i->op == OP_MUL || i->op == OP_FMA ||
i->op == OP_CVT || i->op == OP_MIN || i->op == OP_MAX ||
i->op == OP_SET)
return;
i->dType = i->sType = TYPE_U32;
i->bb->insertAfter(i, i->clone(true)); // deep cloning
}
}
// replace CONT with BRA for single unconditional continue
bool
NVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock *bb)
{
if (bb->cfg.incidentCount() != 2 || bb->getEntry()->op != OP_PRECONT)
return false;
Graph::EdgeIterator ei = bb->cfg.incident();
if (ei.getType() != Graph::Edge::BACK)
ei.next();
if (ei.getType() != Graph::Edge::BACK)
return false;
BasicBlock *contBB = BasicBlock::get(ei.getNode());
if (!contBB->getExit() || contBB->getExit()->op != OP_CONT ||
contBB->getExit()->getPredicate())
return false;
contBB->getExit()->op = OP_BRA;
bb->remove(bb->getEntry()); // delete PRECONT
ei.next();
assert(ei.end() || ei.getType() != Graph::Edge::BACK);
return true;
}
// replace branches to join blocks with join ops
void
NVC0LegalizePostRA::propagateJoin(BasicBlock *bb)
{
if (bb->getEntry()->op != OP_JOIN || bb->getEntry()->asFlow()->limit)
return;
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
BasicBlock *in = BasicBlock::get(ei.getNode());
Instruction *exit = in->getExit();
if (!exit) {
in->insertTail(new FlowInstruction(func, OP_JOIN, bb));
// there should always be a terminator instruction
WARN("inserted missing terminator in BB:%i\n", in->getId());
} else
if (exit->op == OP_BRA) {
exit->op = OP_JOIN;
exit->asFlow()->limit = 1; // must-not-propagate marker
}
}
bb->remove(bb->getEntry());
}
bool
NVC0LegalizePostRA::visit(BasicBlock *bb)
{
Instruction *i, *next;
// remove pseudo operations and non-fixed no-ops, split 64 bit operations
for (i = bb->getFirst(); i; i = next) {
next = i->next;
if (i->op == OP_EMIT || i->op == OP_RESTART) {
if (!i->getDef(0)->refCount())
i->setDef(0, NULL);
if (i->src[0].getFile() == FILE_IMMEDIATE)
i->setSrc(0, r63); // initial value must be 0
} else
if (i->isNop()) {
bb->remove(i);
} else {
if (i->op != OP_MOV && i->op != OP_PFETCH)
replaceZero(i);
if (typeSizeof(i->dType) == 8)
split64BitOp(i);
}
}
if (!bb->getEntry())
return true;
if (!tryReplaceContWithBra(bb))
propagateJoin(bb);
return true;
}
class NVC0LoweringPass : public Pass
{
public:
NVC0LoweringPass(Program *);
private:
virtual bool visit(Function *);
virtual bool visit(BasicBlock *);
virtual bool visit(Instruction *);
bool handleRDSV(Instruction *);
bool handleWRSV(Instruction *);
bool handleEXPORT(Instruction *);
bool handleOUT(Instruction *);
bool handleDIV(Instruction *);
bool handleMOD(Instruction *);
bool handleSQRT(Instruction *);
bool handlePOW(Instruction *);
bool handleTEX(TexInstruction *);
bool handleTXD(TexInstruction *);
bool handleManualTXD(TexInstruction *);
void checkPredicate(Instruction *);
void readTessCoord(LValue *dst, int c);
private:
const Target *const targ;
BuildUtil bld;
LValue *gpEmitAddress;
};
NVC0LoweringPass::NVC0LoweringPass(Program *prog) : targ(prog->getTarget())
{
bld.setProgram(prog);
}
bool
NVC0LoweringPass::visit(Function *fn)
{
if (prog->getType() == Program::TYPE_GEOMETRY) {
assert(!strncmp(fn->getName(), "MAIN", 4));
// TODO: when we generate actual functions pass this value along somehow
bld.setPosition(BasicBlock::get(fn->cfg.getRoot()), false);
gpEmitAddress = bld.loadImm(NULL, 0)->asLValue();
}
return true;
}
bool
NVC0LoweringPass::visit(BasicBlock *bb)
{
return true;
}
// move array source to first slot, convert to u16, add indirections
bool
NVC0LoweringPass::handleTEX(TexInstruction *i)
{
const int dim = i->tex.target.getDim();
const int arg = i->tex.target.getDim() + i->tex.target.isArray();
// generate and move the tsc/tic/array source to the front
if (dim != arg || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa
Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(dim) : NULL;
for (int s = dim; s >= 1; --s)
i->setSrc(s, i->getSrc(s - 1));
i->setSrc(0, arrayIndex);
Value *ticRel = i->getIndirectR();
Value *tscRel = i->getIndirectS();
if (arrayIndex)
bld.mkCvt(OP_CVT, TYPE_U16, src, TYPE_F32, arrayIndex);
else
bld.loadImm(src, 0);
if (ticRel) {
i->setSrc(i->tex.rIndirectSrc, NULL);
bld.mkOp3(OP_INSBF, TYPE_U32, src, ticRel, bld.mkImm(0x0917), src);
}
if (tscRel) {
i->setSrc(i->tex.sIndirectSrc, NULL);
bld.mkOp3(OP_INSBF, TYPE_U32, src, tscRel, bld.mkImm(0x0710), src);
}
i->setSrc(0, src);
}
// offset is last source (lod 1st, dc 2nd)
if (i->tex.useOffsets) {
uint32_t value = 0;
int n, c;
int s = i->srcCount(0xff);
for (n = 0; n < i->tex.useOffsets; ++n)
for (c = 0; c < 3; ++c)
value |= (i->tex.offset[n][c] & 0xf) << (n * 12 + c * 4);
i->setSrc(s, bld.loadImm(NULL, value));
}
return true;
}
bool
NVC0LoweringPass::handleManualTXD(TexInstruction *i)
{
static const uint8_t qOps[4][2] =
{
{ QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0
{ QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1
{ QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
{ QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
};
Value *def[4][4];
Value *crd[3];
Instruction *tex;
Value *zero = bld.loadImm(bld.getSSA(), 0);
int l, c;
const int dim = i->tex.target.getDim();
i->op = OP_TEX; // no need to clone dPdx/dPdy later
for (c = 0; c < dim; ++c)
crd[c] = bld.getScratch();
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
for (l = 0; l < 4; ++l) {
// mov coordinates from lane l to all lanes
for (c = 0; c < dim; ++c)
bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero);
// add dPdx from lane l to lanes dx
for (c = 0; c < dim; ++c)
bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]);
// add dPdy from lane l to lanes dy
for (c = 0; c < dim; ++c)
bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
// texture
bld.insert(tex = i->clone(true));
for (c = 0; c < dim; ++c)
tex->setSrc(c, crd[c]);
// save results
for (c = 0; i->defExists(c); ++c) {
Instruction *mov;
def[c][l] = bld.getSSA();
mov = bld.mkMov(def[c][l], tex->getDef(c));
mov->fixed = 1;
mov->lanes = 1 << l;
}
}
bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
for (c = 0; i->defExists(c); ++c) {
Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
for (l = 0; l < 4; ++l)
u->setSrc(l, def[c][l]);
}
i->bb->remove(i);
return true;
}
bool
NVC0LoweringPass::handleTXD(TexInstruction *txd)
{
int dim = txd->tex.target.getDim();
int arg = txd->tex.target.getDim() + txd->tex.target.isArray();
handleTEX(txd);
if (txd->src[arg].exists())
++arg;
if (dim > 2 || txd->tex.target.isShadow())
return handleManualTXD(txd);
// at most s/t/array, x, y, offset
assert(arg <= 4 && !txd->src[arg].exists());
for (int c = 0; c < dim; ++c) {
txd->src[arg + c * 2 + 0].set(txd->dPdx[c]);
txd->src[arg + c * 2 + 1].set(txd->dPdy[c]);
txd->dPdx[c] = NULL;
txd->dPdy[c] = NULL;
}
return true;
}
bool
NVC0LoweringPass::handleWRSV(Instruction *i)
{
Instruction *st;
Symbol *sym;
uint32_t addr;
// must replace, $sreg are not writeable
addr = targ->getSVAddress(FILE_SHADER_OUTPUT, i->getSrc(0)->asSym());
if (addr >= 0x400)
return false;
sym = bld.mkSymbol(FILE_SHADER_OUTPUT, 0, i->sType, addr);
st = bld.mkStore(OP_EXPORT, i->dType, sym, i->getIndirect(0, 0),
i->getSrc(1));
st->perPatch = i->perPatch;
bld.getBB()->remove(i);
return true;
}
void
NVC0LoweringPass::readTessCoord(LValue *dst, int c)
{
Value *laneid = bld.getSSA();
Value *x, *y;
bld.mkOp1(OP_RDSV, TYPE_U32, laneid, bld.mkSysVal(SV_LANEID, 0));
if (c == 0) {
x = dst;
y = NULL;
} else
if (c == 1) {
x = NULL;
y = dst;
} else {
assert(c == 2);
x = bld.getSSA();
y = bld.getSSA();
}
if (x)
bld.mkFetch(x, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f0, NULL, laneid);
if (y)
bld.mkFetch(x, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f4, NULL, laneid);
if (c == 2) {
bld.mkOp2(OP_ADD, TYPE_F32, dst, x, y);
bld.mkOp2(OP_SUB, TYPE_F32, dst, bld.loadImm(NULL, 1.0f), dst);
}
}
bool
NVC0LoweringPass::handleRDSV(Instruction *i)
{
Symbol *sym = i->getSrc(0)->asSym();
Value *vtx = NULL;
Instruction *ld;
uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym);
if (addr >= 0x400) // mov $sreg
return true;
switch (i->getSrc(0)->reg.data.sv.sv) {
case SV_POSITION:
assert(prog->getType() == Program::TYPE_FRAGMENT);
ld = new_Instruction(func, OP_LINTERP, TYPE_F32);
ld->setDef(0, i->getDef(0));
ld->setSrc(0, bld.mkSymbol(FILE_SHADER_INPUT, 0, TYPE_F32, addr));
ld->setInterpolate(NV50_IR_INTERP_LINEAR);
bld.getBB()->insertAfter(i, ld);
break;
case SV_TESS_COORD:
assert(prog->getType() == Program::TYPE_TESSELLATION_EVAL);
readTessCoord(i->getDef(0)->asLValue(), i->getSrc(0)->reg.data.sv.index);
break;
default:
if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
ld = bld.mkFetch(i->getDef(0), i->dType,
FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx);
ld->perPatch = i->perPatch;
break;
}
bld.getBB()->remove(i);
return true;
}
bool
NVC0LoweringPass::handleDIV(Instruction *i)
{
if (!isFloatType(i->dType))
return true;
Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(), i->getSrc(1));
i->op = OP_MUL;
i->setSrc(1, rcp->getDef(0));
return true;
}
bool
NVC0LoweringPass::handleMOD(Instruction *i)
{
if (i->dType != TYPE_F32)
return true;
LValue *value = bld.getScratch();
bld.mkOp1(OP_RCP, TYPE_F32, value, i->getSrc(1));
bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(0), value);
bld.mkOp1(OP_TRUNC, TYPE_F32, value, value);
bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(1), value);
i->op = OP_SUB;
i->setSrc(1, value);
return true;
}
bool
NVC0LoweringPass::handleSQRT(Instruction *i)
{
Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32,
bld.getSSA(), i->getSrc(0));
i->op = OP_MUL;
i->setSrc(1, rsq->getDef(0));
return true;
}
bool
NVC0LoweringPass::handlePOW(Instruction *i)
{
LValue *val = bld.getScratch();
bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0));
bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1;
bld.mkOp1(OP_PREEX2, TYPE_F32, val, val);
i->op = OP_EX2;
i->setSrc(0, val);
i->setSrc(1, NULL);
return true;
}
bool
NVC0LoweringPass::handleEXPORT(Instruction *i)
{
if (prog->getType() == Program::TYPE_FRAGMENT) {
int id = i->getSrc(0)->reg.data.offset / 4;
if (i->src[0].isIndirect(0)) // TODO, ugly
return false;
i->op = OP_MOV;
i->src[0].set(i->src[1]);
i->setSrc(1, NULL);
i->setDef(0, new_LValue(func, FILE_GPR));
i->getDef(0)->reg.data.id = id;
prog->maxGPR = MAX2(prog->maxGPR, id);
} else
if (prog->getType() == Program::TYPE_GEOMETRY) {
i->setIndirect(0, 1, gpEmitAddress);
}
return true;
}
bool
NVC0LoweringPass::handleOUT(Instruction *i)
{
if (i->op == OP_RESTART && i->prev && i->prev->op == OP_EMIT) {
i->prev->subOp = NV50_IR_SUBOP_EMIT_RESTART;
delete_Instruction(prog, i);
} else {
assert(gpEmitAddress);
i->setDef(0, gpEmitAddress);
if (i->srcExists(0))
i->setSrc(1, i->getSrc(0));
i->setSrc(0, gpEmitAddress);
}
return true;
}
// Generate a binary predicate if an instruction is predicated by
// e.g. an f32 value.
void
NVC0LoweringPass::checkPredicate(Instruction *insn)
{
Value *pred = insn->getPredicate();
Value *pdst;
if (!pred || pred->reg.file == FILE_PREDICATE)
return;
pdst = new_LValue(func, FILE_PREDICATE);
// CAUTION: don't use pdst->getInsn, the definition might not be unique,
// delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass
bld.mkCmp(OP_SET, CC_NEU, TYPE_U32, pdst, bld.mkImm(0), pred);
insn->setPredicate(insn->cc, pdst);
}
//
// - add quadop dance for texturing
// - put FP outputs in GPRs
// - convert instruction sequences
//
bool
NVC0LoweringPass::visit(Instruction *i)
{
if (i->prev)
bld.setPosition(i->prev, true);
else
if (i->next)
bld.setPosition(i->next, false);
else
bld.setPosition(i->bb, true);
if (i->cc != CC_ALWAYS)
checkPredicate(i);
switch (i->op) {
case OP_TEX:
case OP_TXB:
case OP_TXL:
case OP_TXF:
case OP_TXQ:
case OP_TXG:
return handleTEX(i->asTex());
case OP_TXD:
return handleTXD(i->asTex());
case OP_EX2:
bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
i->setSrc(0, i->getDef(0));
break;
case OP_POW:
return handlePOW(i);
case OP_DIV:
return handleDIV(i);
case OP_MOD:
return handleMOD(i);
case OP_SQRT:
return handleSQRT(i);
case OP_EXPORT:
return handleEXPORT(i);
case OP_EMIT:
case OP_RESTART:
return handleOUT(i);
case OP_RDSV:
return handleRDSV(i);
case OP_WRSV:
return handleWRSV(i);
case OP_LOAD:
if (i->src[0].getFile() == FILE_SHADER_INPUT) {
i->op = OP_VFETCH;
assert(prog->getType() != Program::TYPE_FRAGMENT);
}
break;
case OP_PINTERP:
if (i->getSrc(0)->reg.data.offset >= 0x280 &&
i->getSrc(0)->reg.data.offset < 0x2c0)
i->setInterpolate(i->getSampleMode() | NV50_IR_INTERP_SC);
break;
case OP_LINTERP:
if (i->getSrc(0)->reg.data.offset == 0x3fc) {
Value *face = i->getDef(0);
bld.setPosition(i, true);
bld.mkOp2(OP_SHL, TYPE_U32, face, face, bld.mkImm(31));
bld.mkOp2(OP_XOR, TYPE_U32, face, face, bld.mkImm(0xbf800000));
}
break;
default:
break;
}
return true;
}
bool
TargetNVC0::runLegalizePass(Program *prog, CGStage stage) const
{
if (stage == CG_STAGE_PRE_SSA) {
NVC0LoweringPass pass(prog);
return pass.run(prog, false, true);
} else
if (stage == CG_STAGE_POST_RA) {
NVC0LegalizePostRA pass;
return pass.run(prog, false, true);
} else
if (stage == CG_STAGE_SSA) {
NVC0LegalizeSSA pass;
return pass.run(prog, false, true);
}
return false;
}
} // namespace nv50_ir

View file

@ -0,0 +1,568 @@
#include "nv50_ir_target_nvc0.h"
namespace nv50_ir {
Target *getTargetNVC0(unsigned int chipset)
{
return new TargetNVC0(chipset);
}
TargetNVC0::TargetNVC0(unsigned int card)
{
chipset = card;
initOpInfo();
}
// BULTINS / LIBRARY FUNCTIONS:
// lazyness -> will just hardcode everything for the time being
// Will probably make this nicer once we support subroutines properly,
// i.e. when we have an input IR that provides function declarations.
static const uint32_t nvc0_builtin_code[] =
{
// DIV U32: slow unsigned integer division
//
// UNR recurrence (q = a / b):
// look for z such that 2^32 - b <= b * z < 2^32
// then q - 1 <= (a * z) / 2^32 <= q
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p1
// SIZE: 22 / 14 * 8 bytes
//
#if 1
0x04009c03, 0x78000000,
0x7c209cdd,
0x0010dd18,
0x08309c03, 0x60000000,
0x05605c18,
0x0810dc2a,
0x0c209c43, 0x20040000,
0x0810dc03, 0x50000000,
0x0c209c43, 0x20040000,
0x0810dc03, 0x50000000,
0x0c209c43, 0x20040000,
0x0810dc03, 0x50000000,
0x0c209c43, 0x20040000,
0x0810dc03, 0x50000000,
0x0c209c43, 0x20040000,
0x0000dde4, 0x28000000,
0x08001c43, 0x50000000,
0x05609c18,
0x0010430d,
0x0811dc03, 0x1b0e0000,
0x08104103, 0x48000000,
0x04000002, 0x08000000,
0x0811c003, 0x1b0e0000,
0x08104103, 0x48000000,
0x040000ac,
0x90001dff,
#else
0x0401dc03, 0x1b0e0000,
0x00008003, 0x78000000,
0x0400c003, 0x78000000,
0x0c20c103, 0x48000000,
0x0c108003, 0x60000000,
0x00005c28,
0x00001d18,
0x0031c023, 0x1b0ec000,
0xb000a1e7, 0x40000000,
0x04000003, 0x6000c000,
0x0813dc03, 0x1b000000,
0x0420446c,
0x040004bd,
0x04208003, 0x5800c000,
0x0430c103, 0x4800c000,
0x0ffc5dff,
0x90001dff,
#endif
// DIV S32: slow signed integer division
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p3
// SIZE: 18 * 8 bytes
//
0xfc05dc23, 0x188e0000,
0xfc17dc23, 0x18c40000,
0x03301e18,
0x07305e18,
0x0401dc03, 0x1b0e0000,
0x00008003, 0x78000000,
0x0400c003, 0x78000000,
0x0c20c103, 0x48000000,
0x0c108003, 0x60000000,
0x00005c28,
0x00001d18,
0x0031c023, 0x1b0ec000,
0xb000a1e7, 0x40000000,
0x04000003, 0x6000c000,
0x0813dc03, 0x1b000000,
0x0420446c,
0x040004bd,
0x04208003, 0x5800c000,
0x0430c103, 0x4800c000,
0x0ffc5dff,
0x01700e18,
0x05704a18,
0x90001dff,
// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i)
//
// INPUT: $r0d (x)
// OUTPUT: $r0d (rcp(x))
// CLOBBER: $r2 - $r7
// SIZE: 9 * 8 bytes
//
0x9810dc08,
0x00009c28,
0x4001df18,
0x00019d18,
0x08011e01, 0x200c0000,
0x10209c01, 0x50000000,
0x08011e01, 0x200c0000,
0x10209c01, 0x50000000,
0x08011e01, 0x200c0000,
0x10201c01, 0x50000000,
0x00001de7, 0x90000000,
// RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i)
//
// INPUT: $r0d (x)
// OUTPUT: $r0d (rsqrt(x))
// CLOBBER: $r2 - $r7
// SIZE: 14 * 8 bytes
//
0x9c10dc08,
0x00009c28,
0x00019d18,
0x3fe1df18,
0x18001c01, 0x50000000,
0x0001dde2, 0x18ffe000,
0x08211c01, 0x50000000,
0x10011e01, 0x200c0000,
0x10209c01, 0x50000000,
0x08211c01, 0x50000000,
0x10011e01, 0x200c0000,
0x10209c01, 0x50000000,
0x08211c01, 0x50000000,
0x10011e01, 0x200c0000,
0x10201c01, 0x50000000,
0x00001de7, 0x90000000,
};
static const uint16_t nvc0_builtin_offsets[NVC0_BUILTIN_COUNT] =
{
0,
8 * (22),
8 * (22 + 18),
8 * (22 + 18 + 9)
};
void
TargetNVC0::getBuiltinCode(const uint32_t **code, uint32_t *size) const
{
*code = &nvc0_builtin_code[0];
*size = sizeof(nvc0_builtin_code);
}
uint32_t
TargetNVC0::getBuiltinOffset(int builtin) const
{
assert(builtin < NVC0_BUILTIN_COUNT);
return nvc0_builtin_offsets[builtin];
}
struct opProperties
{
operation op;
unsigned int mNeg : 4;
unsigned int mAbs : 4;
unsigned int mNot : 4;
unsigned int mSat : 4;
unsigned int fConst : 3;
unsigned int fImmd : 4; // last bit indicates if full immediate is suppoted
};
static const struct opProperties _initProps[] =
{
// neg abs not sat c[] imm
{ OP_ADD, 0x3, 0x3, 0x0, 0x8, 0x2, 0x2 | 0x8 },
{ OP_SUB, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 | 0x8 },
{ OP_MUL, 0x3, 0x0, 0x0, 0x8, 0x2, 0x2 | 0x8 },
{ OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
{ OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
{ OP_MAD, 0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] constraint
{ OP_ABS, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
{ OP_NEG, 0x0, 0x1, 0x0, 0x0, 0x1, 0x0 },
{ OP_CVT, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
{ OP_AND, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
{ OP_OR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
{ OP_XOR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
{ OP_SHL, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
{ OP_SHR, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
{ OP_SET, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
{ OP_SLCT, 0x4, 0x0, 0x0, 0x0, 0x6, 0x2 }, // special c[] constraint
{ OP_PREEX2, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 },
{ OP_PRESIN, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 },
{ OP_COS, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_SIN, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_EX2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_LG2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_RCP, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_RSQ, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_CALL, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
{ OP_INSBF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
{ OP_SET_AND, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
{ OP_SET_OR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
{ OP_SET_XOR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
// saturate only:
{ OP_LINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 },
{ OP_PINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 },
};
void TargetNVC0::initOpInfo()
{
unsigned int i, j;
static const uint32_t commutative[(OP_LAST + 31) / 32] =
{
// ADD, MAD, MUL, AND, OR, XOR, MAX, MIN
0x0670ca00, 0x0000003f, 0x00000000
};
static const uint32_t shortForm[(OP_LAST + 31) / 32] =
{
// ADD, MAD, MUL, AND, OR, XOR, PRESIN, PREEX2, SFN, CVT, PINTERP, MOV
0x0670ca00, 0x00000000, 0x00000000
};
static const operation noDest[] =
{
OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT,
OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET,
OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART,
OP_QUADON, OP_QUADPOP
};
joinAnterior = false;
for (i = 0; i < DATA_FILE_COUNT; ++i)
nativeFileMap[i] = (DataFile)i;
nativeFileMap[FILE_ADDRESS] = FILE_GPR;
for (i = 0; i < OP_LAST; ++i) {
opInfo[i].variants = NULL;
opInfo[i].op = (operation)i;
opInfo[i].srcTypes = 1 << (int)TYPE_F32;
opInfo[i].dstTypes = 1 << (int)TYPE_F32;
opInfo[i].immdBits = 0;
opInfo[i].srcNr = operationSrcNr[i];
for (j = 0; j < opInfo[i].srcNr; ++j) {
opInfo[i].srcMods[j] = 0;
opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR;
}
opInfo[i].dstMods = 0;
opInfo[i].dstFiles = 1 << (int)FILE_GPR;
opInfo[i].hasDest = 1;
opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA);
opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1;
opInfo[i].pseudo = (i < OP_MOV);
opInfo[i].predicate = !opInfo[i].pseudo;
opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN);
opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8;
}
for (i = 0; i < sizeof(noDest) / sizeof(noDest[0]); ++i)
opInfo[noDest[i]].hasDest = 0;
for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) {
const struct opProperties *prop = &_initProps[i];
for (int s = 0; s < 3; ++s) {
if (prop->mNeg & (1 << s))
opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NEG;
if (prop->mAbs & (1 << s))
opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_ABS;
if (prop->mNot & (1 << s))
opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NOT;
if (prop->fConst & (1 << s))
opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_CONST;
if (prop->fImmd & (1 << s))
opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_IMMEDIATE;
if (prop->fImmd & 8)
opInfo[prop->op].immdBits = 0xffffffff;
}
if (prop->mSat & 8)
opInfo[prop->op].dstMods = NV50_IR_MOD_SAT;
}
}
unsigned int
TargetNVC0::getFileSize(DataFile file) const
{
switch (file) {
case FILE_NULL: return 0;
case FILE_GPR: return 63;
case FILE_PREDICATE: return 7;
case FILE_FLAGS: return 1;
case FILE_ADDRESS: return 0;
case FILE_IMMEDIATE: return 0;
case FILE_MEMORY_CONST: return 65536;
case FILE_SHADER_INPUT: return 0x400;
case FILE_SHADER_OUTPUT: return 0x400;
case FILE_MEMORY_GLOBAL: return 0xffffffff;
case FILE_MEMORY_SHARED: return 16 << 10;
case FILE_MEMORY_LOCAL: return 48 << 10;
case FILE_SYSTEM_VALUE: return 32;
default:
assert(!"invalid file");
return 0;
}
}
unsigned int
TargetNVC0::getFileUnit(DataFile file) const
{
if (file == FILE_GPR || file == FILE_ADDRESS || file == FILE_SYSTEM_VALUE)
return 2;
return 0;
}
uint32_t
TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
{
const int idx = sym->reg.data.sv.index;
const SVSemantic sv = sym->reg.data.sv.sv;
const bool isInput = shaderFile == FILE_SHADER_INPUT;
switch (sv) {
case SV_POSITION: return 0x070 + idx * 4;
case SV_INSTANCE_ID: return 0x2f8;
case SV_VERTEX_ID: return 0x2fc;
case SV_PRIMITIVE_ID: return isInput ? 0x060 : 0x040;
case SV_LAYER: return 0x064;
case SV_VIEWPORT_INDEX: return 0x068;
case SV_POINT_SIZE: return 0x06c;
case SV_CLIP_DISTANCE: return 0x2c0 + idx * 4;
case SV_POINT_COORD: return 0x2e0 + idx * 4;
case SV_FACE: return 0x3fc;
case SV_TESS_FACTOR: return 0x000 + idx * 4;
case SV_TESS_COORD: return 0x2f0 + idx * 4;
default:
return 0xffffffff;
}
}
bool
TargetNVC0::insnCanLoad(const Instruction *i, int s,
const Instruction *ld) const
{
DataFile sf = ld->src[0].getFile();
// immediate 0 can be represented by GPR $r63
if (sf == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0)
return (!i->asTex() && i->op != OP_EXPORT && i->op != OP_STORE);
if (s > opInfo[i->op].srcNr)
return false;
if (!(opInfo[i->op].srcFiles[s] & (1 << (int)sf)))
return false;
// indirect loads can only be done by OP_LOAD/VFETCH/INTERP on nvc0
if (ld->src[0].isIndirect(0))
return false;
for (int k = 0; i->srcExists(k); ++k) {
if (i->src[k].getFile() == FILE_IMMEDIATE) {
if (i->getSrc(k)->reg.data.u64 != 0)
return false;
} else
if (i->src[k].getFile() != FILE_GPR &&
i->src[k].getFile() != FILE_PREDICATE) {
return false;
}
}
// not all instructions support full 32 bit immediates
if (sf == FILE_IMMEDIATE) {
Storage &reg = ld->getSrc(0)->asImm()->reg;
if (opInfo[i->op].immdBits != 0xffffffff) {
if (i->sType == TYPE_F32) {
if (reg.data.u32 & 0xfff)
return false;
} else
if (i->sType == TYPE_S32 || i->sType == TYPE_U32) {
// with u32, 0xfffff counts as 0xffffffff as well
if (reg.data.s32 > 0x7ffff || reg.data.s32 < -0x80000)
return false;
}
} else
if (i->op == OP_MAD || i->op == OP_FMA) {
// requires src == dst, cannot decide before RA
// (except if we implement more constraints)
if (ld->getSrc(0)->asImm()->reg.data.u32 & 0xfff)
return false;
}
}
return true;
}
bool
TargetNVC0::isOpSupported(operation op, DataType ty) const
{
if ((op == OP_MAD || op == OP_FMA) && (ty != TYPE_F32))
return false;
if (op == OP_SAD && ty != TYPE_S32)
return false;
if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD)
return false;
return true;
}
bool
TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
{
if (!isFloatType(insn->dType)) {
switch (insn->op) {
case OP_ABS:
case OP_NEG:
case OP_CVT:
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
case OP_AND:
case OP_OR:
case OP_XOR:
break;
case OP_ADD:
if (insn->src[s ? 0 : 1].mod.neg())
return false;
break;
case OP_SUB:
if (s == 0)
return insn->src[1].mod.neg() ? false : true;
break;
default:
return false;
}
}
if (s > 3)
return false;
return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
}
bool
TargetNVC0::mayPredicate(const Instruction *insn, const Value *pred) const
{
if (insn->getPredicate())
return false;
return opInfo[insn->op].predicate;
}
bool
TargetNVC0::isSatSupported(const Instruction *insn) const
{
if (insn->op == OP_CVT)
return true;
if (!(opInfo[insn->op].dstMods & NV50_IR_MOD_SAT))
return false;
if (insn->dType == TYPE_U32)
return (insn->op == OP_ADD) || (insn->op == OP_MAD);
return insn->dType == TYPE_F32;
}
// TODO: better values
int TargetNVC0::getLatency(const Instruction *i) const
{
if (i->op == OP_LOAD) {
if (i->cache == CACHE_CV)
return 700;
return 48;
}
return 24;
}
// These are "inverse" throughput values, i.e. the number of cycles required
// to issue a specific instruction for a full warp (32 threads).
//
// Assuming we have more than 1 warp in flight, a higher issue latency results
// in a lower result latency since the MP will have spent more time with other
// warps.
// This also helps to determine the number of cycles between instructions in
// a single warp.
//
int TargetNVC0::getThroughput(const Instruction *i) const
{
// TODO: better values
if (i->dType == TYPE_F32) {
switch (i->op) {
case OP_ADD:
case OP_MUL:
case OP_MAD:
case OP_FMA:
return 1;
case OP_CVT:
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
case OP_SET:
case OP_SLCT:
case OP_MIN:
case OP_MAX:
return 2;
case OP_RCP:
case OP_RSQ:
case OP_LG2:
case OP_SIN:
case OP_COS:
case OP_PRESIN:
case OP_PREEX2:
default:
return 8;
}
} else
if (i->dType == TYPE_U32 || i->dType == TYPE_S32) {
switch (i->op) {
case OP_ADD:
case OP_AND:
case OP_OR:
case OP_XOR:
case OP_NOT:
return 1;
case OP_MUL:
case OP_MAD:
case OP_CVT:
case OP_SET:
case OP_SLCT:
case OP_SHL:
case OP_SHR:
case OP_NEG:
case OP_ABS:
case OP_MIN:
case OP_MAX:
default:
return 2;
}
} else
if (i->dType == TYPE_F64) {
return 2;
} else {
return 1;
}
}
} // namespace nv50_ir

View file

@ -0,0 +1,46 @@
#include "nv50/codegen/nv50_ir_target.h"
namespace nv50_ir {
#define NVC0_BUILTIN_DIV_U32 0
#define NVC0_BUILTIN_DIV_S32 1
#define NVC0_BUILTIN_RCP_F64 2
#define NVC0_BUILTIN_RSQ_F64 3
#define NVC0_BUILTIN_COUNT 4
class TargetNVC0 : public Target
{
public:
TargetNVC0(unsigned int chipset);
virtual CodeEmitter *getCodeEmitter(Program::Type);
virtual bool runLegalizePass(Program *, CGStage stage) const;
virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const;
virtual bool insnCanLoad(const Instruction *insn, int s,
const Instruction *ld) const;
virtual bool isOpSupported(operation, DataType) const;
virtual bool isModSupported(const Instruction *, int s, Modifier) const;
virtual bool isSatSupported(const Instruction *) const;
virtual bool mayPredicate(const Instruction *, const Value *) const;
virtual int getLatency(const Instruction *) const;
virtual int getThroughput(const Instruction *) const;
virtual unsigned int getFileSize(DataFile) const;
virtual unsigned int getFileUnit(DataFile) const;
virtual uint32_t getSVAddress(DataFile shaderFile, const Symbol *sv) const;
uint32_t getBuiltinOffset(int builtin) const;
private:
void initOpInfo();
};
} // namespace nv50_ir

View file

@ -118,6 +118,7 @@ pipe_SOURCES += pipe_i965.c
endif
ifneq ($(findstring nouveau/drm,$(GALLIUM_WINSYS_DIRS)),)
LDFLAGS += -lstdc++
pipe_TARGETS += $(PIPE_PREFIX)nouveau.so
pipe_SOURCES += pipe_nouveau.c
endif