mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-02 17:30:29 +01:00
nvc0/ir: add support for all the new int64 tgsi opcodes
A few thoughts: - Some of that LegalizeSSA logic should really live much earlier and be subject to the likes of DCE and other useful passes - Some of the "lowering" done in from_tgsi should be done later so that proper optimization might be done. However this all works and the above can be improved upon later. Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
parent
009c54aa7a
commit
1aefd6159c
6 changed files with 302 additions and 5 deletions
|
|
@ -354,6 +354,14 @@ unsigned int Instruction::srcMask(unsigned int s) const
|
|||
case TGSI_OPCODE_DSGE:
|
||||
case TGSI_OPCODE_DSEQ:
|
||||
case TGSI_OPCODE_DSNE:
|
||||
case TGSI_OPCODE_U64SEQ:
|
||||
case TGSI_OPCODE_U64SNE:
|
||||
case TGSI_OPCODE_I64SLT:
|
||||
case TGSI_OPCODE_U64SLT:
|
||||
case TGSI_OPCODE_I64SGE:
|
||||
case TGSI_OPCODE_U64SGE:
|
||||
case TGSI_OPCODE_I642F:
|
||||
case TGSI_OPCODE_U642F:
|
||||
switch (util_bitcount(mask)) {
|
||||
case 1: return 0x3;
|
||||
case 2: return 0xf;
|
||||
|
|
@ -557,6 +565,7 @@ nv50_ir::DataType Instruction::inferSrcType() const
|
|||
case TGSI_OPCODE_SHL:
|
||||
case TGSI_OPCODE_U2F:
|
||||
case TGSI_OPCODE_U2D:
|
||||
case TGSI_OPCODE_U2I64:
|
||||
case TGSI_OPCODE_UADD:
|
||||
case TGSI_OPCODE_UDIV:
|
||||
case TGSI_OPCODE_UMOD:
|
||||
|
|
@ -587,6 +596,7 @@ nv50_ir::DataType Instruction::inferSrcType() const
|
|||
return nv50_ir::TYPE_U32;
|
||||
case TGSI_OPCODE_I2F:
|
||||
case TGSI_OPCODE_I2D:
|
||||
case TGSI_OPCODE_I2I64:
|
||||
case TGSI_OPCODE_IDIV:
|
||||
case TGSI_OPCODE_IMUL_HI:
|
||||
case TGSI_OPCODE_IMAX:
|
||||
|
|
@ -608,6 +618,8 @@ nv50_ir::DataType Instruction::inferSrcType() const
|
|||
case TGSI_OPCODE_D2F:
|
||||
case TGSI_OPCODE_D2I:
|
||||
case TGSI_OPCODE_D2U:
|
||||
case TGSI_OPCODE_D2I64:
|
||||
case TGSI_OPCODE_D2U64:
|
||||
case TGSI_OPCODE_DABS:
|
||||
case TGSI_OPCODE_DNEG:
|
||||
case TGSI_OPCODE_DADD:
|
||||
|
|
@ -630,6 +642,34 @@ nv50_ir::DataType Instruction::inferSrcType() const
|
|||
case TGSI_OPCODE_DFLR:
|
||||
case TGSI_OPCODE_DROUND:
|
||||
return nv50_ir::TYPE_F64;
|
||||
case TGSI_OPCODE_U64SEQ:
|
||||
case TGSI_OPCODE_U64SNE:
|
||||
case TGSI_OPCODE_U64SLT:
|
||||
case TGSI_OPCODE_U64SGE:
|
||||
case TGSI_OPCODE_U64MIN:
|
||||
case TGSI_OPCODE_U64MAX:
|
||||
case TGSI_OPCODE_U64ADD:
|
||||
case TGSI_OPCODE_U64MUL:
|
||||
case TGSI_OPCODE_U64SHL:
|
||||
case TGSI_OPCODE_U64SHR:
|
||||
case TGSI_OPCODE_U64DIV:
|
||||
case TGSI_OPCODE_U64MOD:
|
||||
case TGSI_OPCODE_U642F:
|
||||
case TGSI_OPCODE_U642D:
|
||||
return nv50_ir::TYPE_U64;
|
||||
case TGSI_OPCODE_I64ABS:
|
||||
case TGSI_OPCODE_I64SSG:
|
||||
case TGSI_OPCODE_I64NEG:
|
||||
case TGSI_OPCODE_I64SLT:
|
||||
case TGSI_OPCODE_I64SGE:
|
||||
case TGSI_OPCODE_I64MIN:
|
||||
case TGSI_OPCODE_I64MAX:
|
||||
case TGSI_OPCODE_I64SHR:
|
||||
case TGSI_OPCODE_I64DIV:
|
||||
case TGSI_OPCODE_I64MOD:
|
||||
case TGSI_OPCODE_I642F:
|
||||
case TGSI_OPCODE_I642D:
|
||||
return nv50_ir::TYPE_S64;
|
||||
default:
|
||||
return nv50_ir::TYPE_F32;
|
||||
}
|
||||
|
|
@ -650,17 +690,35 @@ nv50_ir::DataType Instruction::inferDstType() const
|
|||
case TGSI_OPCODE_DSGE:
|
||||
case TGSI_OPCODE_DSLT:
|
||||
case TGSI_OPCODE_DSNE:
|
||||
case TGSI_OPCODE_I64SLT:
|
||||
case TGSI_OPCODE_I64SGE:
|
||||
case TGSI_OPCODE_U64SEQ:
|
||||
case TGSI_OPCODE_U64SNE:
|
||||
case TGSI_OPCODE_U64SLT:
|
||||
case TGSI_OPCODE_U64SGE:
|
||||
case TGSI_OPCODE_PK2H:
|
||||
return nv50_ir::TYPE_U32;
|
||||
case TGSI_OPCODE_I2F:
|
||||
case TGSI_OPCODE_U2F:
|
||||
case TGSI_OPCODE_D2F:
|
||||
case TGSI_OPCODE_I642F:
|
||||
case TGSI_OPCODE_U642F:
|
||||
case TGSI_OPCODE_UP2H:
|
||||
return nv50_ir::TYPE_F32;
|
||||
case TGSI_OPCODE_I2D:
|
||||
case TGSI_OPCODE_U2D:
|
||||
case TGSI_OPCODE_F2D:
|
||||
case TGSI_OPCODE_I642D:
|
||||
case TGSI_OPCODE_U642D:
|
||||
return nv50_ir::TYPE_F64;
|
||||
case TGSI_OPCODE_I2I64:
|
||||
case TGSI_OPCODE_U2I64:
|
||||
case TGSI_OPCODE_F2I64:
|
||||
case TGSI_OPCODE_D2I64:
|
||||
return nv50_ir::TYPE_S64;
|
||||
case TGSI_OPCODE_F2U64:
|
||||
case TGSI_OPCODE_D2U64:
|
||||
return nv50_ir::TYPE_U64;
|
||||
default:
|
||||
return inferSrcType();
|
||||
}
|
||||
|
|
@ -676,6 +734,8 @@ nv50_ir::CondCode Instruction::getSetCond() const
|
|||
case TGSI_OPCODE_USLT:
|
||||
case TGSI_OPCODE_FSLT:
|
||||
case TGSI_OPCODE_DSLT:
|
||||
case TGSI_OPCODE_I64SLT:
|
||||
case TGSI_OPCODE_U64SLT:
|
||||
return CC_LT;
|
||||
case TGSI_OPCODE_SLE:
|
||||
return CC_LE;
|
||||
|
|
@ -684,6 +744,8 @@ nv50_ir::CondCode Instruction::getSetCond() const
|
|||
case TGSI_OPCODE_USGE:
|
||||
case TGSI_OPCODE_FSGE:
|
||||
case TGSI_OPCODE_DSGE:
|
||||
case TGSI_OPCODE_I64SGE:
|
||||
case TGSI_OPCODE_U64SGE:
|
||||
return CC_GE;
|
||||
case TGSI_OPCODE_SGT:
|
||||
return CC_GT;
|
||||
|
|
@ -691,10 +753,12 @@ nv50_ir::CondCode Instruction::getSetCond() const
|
|||
case TGSI_OPCODE_USEQ:
|
||||
case TGSI_OPCODE_FSEQ:
|
||||
case TGSI_OPCODE_DSEQ:
|
||||
case TGSI_OPCODE_U64SEQ:
|
||||
return CC_EQ;
|
||||
case TGSI_OPCODE_SNE:
|
||||
case TGSI_OPCODE_FSNE:
|
||||
case TGSI_OPCODE_DSNE:
|
||||
case TGSI_OPCODE_U64SNE:
|
||||
return CC_NEU;
|
||||
case TGSI_OPCODE_USNE:
|
||||
return CC_NE;
|
||||
|
|
@ -832,6 +896,35 @@ static nv50_ir::operation translateOpcode(uint opcode)
|
|||
NV50_IR_OPCODE_CASE(DFLR, FLOOR);
|
||||
NV50_IR_OPCODE_CASE(DROUND, CVT);
|
||||
|
||||
NV50_IR_OPCODE_CASE(U64SEQ, SET);
|
||||
NV50_IR_OPCODE_CASE(U64SNE, SET);
|
||||
NV50_IR_OPCODE_CASE(U64SLT, SET);
|
||||
NV50_IR_OPCODE_CASE(U64SGE, SET);
|
||||
NV50_IR_OPCODE_CASE(I64SLT, SET);
|
||||
NV50_IR_OPCODE_CASE(I64SGE, SET);
|
||||
NV50_IR_OPCODE_CASE(I2I64, CVT);
|
||||
NV50_IR_OPCODE_CASE(U2I64, CVT);
|
||||
NV50_IR_OPCODE_CASE(F2I64, CVT);
|
||||
NV50_IR_OPCODE_CASE(F2U64, CVT);
|
||||
NV50_IR_OPCODE_CASE(D2I64, CVT);
|
||||
NV50_IR_OPCODE_CASE(D2U64, CVT);
|
||||
NV50_IR_OPCODE_CASE(I642F, CVT);
|
||||
NV50_IR_OPCODE_CASE(U642F, CVT);
|
||||
NV50_IR_OPCODE_CASE(I642D, CVT);
|
||||
NV50_IR_OPCODE_CASE(U642D, CVT);
|
||||
|
||||
NV50_IR_OPCODE_CASE(I64MIN, MIN);
|
||||
NV50_IR_OPCODE_CASE(U64MIN, MIN);
|
||||
NV50_IR_OPCODE_CASE(I64MAX, MAX);
|
||||
NV50_IR_OPCODE_CASE(U64MAX, MAX);
|
||||
NV50_IR_OPCODE_CASE(I64ABS, ABS);
|
||||
NV50_IR_OPCODE_CASE(I64NEG, NEG);
|
||||
NV50_IR_OPCODE_CASE(U64ADD, ADD);
|
||||
NV50_IR_OPCODE_CASE(U64MUL, MUL);
|
||||
NV50_IR_OPCODE_CASE(U64SHL, SHL);
|
||||
NV50_IR_OPCODE_CASE(I64SHR, SHR);
|
||||
NV50_IR_OPCODE_CASE(U64SHR, SHR);
|
||||
|
||||
NV50_IR_OPCODE_CASE(IMUL_HI, MUL);
|
||||
NV50_IR_OPCODE_CASE(UMUL_HI, MUL);
|
||||
|
||||
|
|
@ -3721,6 +3814,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
|||
case TGSI_OPCODE_INTERP_OFFSET:
|
||||
handleINTERP(dst0);
|
||||
break;
|
||||
case TGSI_OPCODE_I642F:
|
||||
case TGSI_OPCODE_U642F:
|
||||
case TGSI_OPCODE_D2I:
|
||||
case TGSI_OPCODE_D2U:
|
||||
case TGSI_OPCODE_D2F: {
|
||||
|
|
@ -3737,16 +3832,79 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
|||
}
|
||||
break;
|
||||
}
|
||||
case TGSI_OPCODE_I2I64:
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
|
||||
dst0[c] = fetchSrc(0, c / 2);
|
||||
mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(NULL, 31));
|
||||
c++;
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_U2I64:
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
|
||||
dst0[c] = fetchSrc(0, c / 2);
|
||||
dst0[c + 1] = zero;
|
||||
c++;
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_F2I64:
|
||||
case TGSI_OPCODE_F2U64:
|
||||
case TGSI_OPCODE_I2D:
|
||||
case TGSI_OPCODE_U2D:
|
||||
case TGSI_OPCODE_F2D:
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
|
||||
Value *dreg = getSSA(8);
|
||||
mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2));
|
||||
Instruction *cvt = mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2));
|
||||
if (!isFloatType(dstTy))
|
||||
cvt->rnd = ROUND_Z;
|
||||
mkSplit(&dst0[c], 4, dreg);
|
||||
c++;
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_D2I64:
|
||||
case TGSI_OPCODE_D2U64:
|
||||
case TGSI_OPCODE_I642D:
|
||||
case TGSI_OPCODE_U642D:
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
|
||||
src0 = getSSA(8);
|
||||
Value *dst = getSSA(8), *tmp[2];
|
||||
tmp[0] = fetchSrc(0, c);
|
||||
tmp[1] = fetchSrc(0, c + 1);
|
||||
mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
|
||||
Instruction *cvt = mkCvt(OP_CVT, dstTy, dst, srcTy, src0);
|
||||
if (!isFloatType(dstTy))
|
||||
cvt->rnd = ROUND_Z;
|
||||
mkSplit(&dst0[c], 4, dst);
|
||||
c++;
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_I64NEG:
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
|
||||
src0 = getSSA(8);
|
||||
Value *dst = getSSA(8), *tmp[2];
|
||||
tmp[0] = fetchSrc(0, c);
|
||||
tmp[1] = fetchSrc(0, c + 1);
|
||||
mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
|
||||
mkOp2(OP_SUB, dstTy, dst, zero, src0);
|
||||
mkSplit(&dst0[c], 4, dst);
|
||||
c++;
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_I64ABS:
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
|
||||
src0 = getSSA(8);
|
||||
Value *neg = getSSA(8), *srcComp[2], *negComp[2];
|
||||
srcComp[0] = fetchSrc(0, c);
|
||||
srcComp[1] = fetchSrc(0, c + 1);
|
||||
mkOp2(OP_MERGE, TYPE_U64, src0, srcComp[0], srcComp[1]);
|
||||
mkOp2(OP_SUB, dstTy, neg, zero, src0);
|
||||
mkSplit(negComp, 4, neg);
|
||||
mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c], TYPE_S32,
|
||||
negComp[0], srcComp[0], srcComp[1]);
|
||||
mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c + 1], TYPE_S32,
|
||||
negComp[1], srcComp[1], srcComp[1]);
|
||||
c++;
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_DABS:
|
||||
case TGSI_OPCODE_DNEG:
|
||||
case TGSI_OPCODE_DRCP:
|
||||
|
|
@ -3779,6 +3937,12 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
|||
c++;
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_U64SEQ:
|
||||
case TGSI_OPCODE_U64SNE:
|
||||
case TGSI_OPCODE_U64SLT:
|
||||
case TGSI_OPCODE_U64SGE:
|
||||
case TGSI_OPCODE_I64SLT:
|
||||
case TGSI_OPCODE_I64SGE:
|
||||
case TGSI_OPCODE_DSLT:
|
||||
case TGSI_OPCODE_DSGE:
|
||||
case TGSI_OPCODE_DSEQ:
|
||||
|
|
@ -3800,6 +3964,46 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
|||
}
|
||||
break;
|
||||
}
|
||||
case TGSI_OPCODE_U64MIN:
|
||||
case TGSI_OPCODE_U64MAX:
|
||||
case TGSI_OPCODE_I64MIN:
|
||||
case TGSI_OPCODE_I64MAX: {
|
||||
dstTy = isSignedIntType(dstTy) ? TYPE_S32 : TYPE_U32;
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
|
||||
Value *flag = getSSA(1, FILE_FLAGS);
|
||||
src0 = fetchSrc(0, c + 1);
|
||||
src1 = fetchSrc(1, c + 1);
|
||||
geni = mkOp2(op, dstTy, dst0[c + 1], src0, src1);
|
||||
geni->subOp = NV50_IR_SUBOP_MINMAX_HIGH;
|
||||
geni->setFlagsDef(1, flag);
|
||||
|
||||
src0 = fetchSrc(0, c);
|
||||
src1 = fetchSrc(1, c);
|
||||
geni = mkOp2(op, TYPE_U32, dst0[c], src0, src1);
|
||||
geni->subOp = NV50_IR_SUBOP_MINMAX_LOW;
|
||||
geni->setFlagsSrc(2, flag);
|
||||
|
||||
c++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TGSI_OPCODE_U64SHL:
|
||||
case TGSI_OPCODE_I64SHR:
|
||||
case TGSI_OPCODE_U64SHR:
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
|
||||
src0 = getSSA(8);
|
||||
Value *dst = getSSA(8), *tmp[2];
|
||||
tmp[0] = fetchSrc(0, c);
|
||||
tmp[1] = fetchSrc(0, c + 1);
|
||||
mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
|
||||
src1 = fetchSrc(1, c / 2);
|
||||
mkOp2(op, dstTy, dst, src0, src1);
|
||||
mkSplit(&dst0[c], 4, dst);
|
||||
c++;
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_U64ADD:
|
||||
case TGSI_OPCODE_U64MUL:
|
||||
case TGSI_OPCODE_DADD:
|
||||
case TGSI_OPCODE_DMUL:
|
||||
case TGSI_OPCODE_DDIV:
|
||||
|
|
@ -3873,6 +4077,22 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
|||
mkSplit(&dst0[c], 4, dst);
|
||||
c++;
|
||||
}
|
||||
case TGSI_OPCODE_I64SSG:
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
|
||||
src0 = getSSA(8);
|
||||
Value *tmp[2];
|
||||
tmp[0] = fetchSrc(0, c);
|
||||
tmp[1] = fetchSrc(0, c + 1);
|
||||
mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
|
||||
|
||||
val0 = getScratch();
|
||||
val1 = getScratch();
|
||||
mkCmp(OP_SET, CC_GT, TYPE_U32, val0, TYPE_S64, src0, zero);
|
||||
mkCmp(OP_SET, CC_LT, TYPE_U32, val1, TYPE_S64, src0, zero);
|
||||
mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
|
||||
mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(0, 31));
|
||||
c++;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
|
||||
|
|
|
|||
|
|
@ -147,6 +147,59 @@ NVC0LegalizeSSA::handleTEXLOD(TexInstruction *i)
|
|||
i->moveSources(arg + 1, -1);
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LegalizeSSA::handleShift(Instruction *lo)
|
||||
{
|
||||
Instruction *hi = new_Instruction(func, lo->op, TYPE_U32);
|
||||
lo->bb->insertAfter(lo, hi);
|
||||
bld.setPosition(lo, false);
|
||||
|
||||
Value *src[2], *dst[2] = {bld.getSSA(), bld.getSSA()};
|
||||
Value *dst64 = lo->getDef(0), *shift = lo->getSrc(1);
|
||||
bld.mkSplit(src, 4, lo->getSrc(0));
|
||||
|
||||
hi->sType = lo->sType;
|
||||
lo->dType = TYPE_U32;
|
||||
|
||||
hi->setDef(0, dst[1]);
|
||||
if (lo->op == OP_SHR)
|
||||
hi->subOp |= NV50_IR_SUBOP_SHIFT_HIGH;
|
||||
lo->setDef(0, dst[0]);
|
||||
|
||||
bld.setPosition(hi, true);
|
||||
|
||||
if (lo->op == OP_SHL)
|
||||
std::swap(hi, lo);
|
||||
|
||||
hi->setSrc(0, new_ImmediateValue(prog, 0u));
|
||||
hi->setSrc(1, shift);
|
||||
hi->setSrc(2, lo->op == OP_SHL ? src[0] : src[1]);
|
||||
|
||||
lo->setSrc(0, src[0]);
|
||||
lo->setSrc(1, shift);
|
||||
lo->setSrc(2, src[1]);
|
||||
|
||||
bld.mkOp2(OP_MERGE, TYPE_U64, dst64, dst[0], dst[1]);
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LegalizeSSA::handleSET(CmpInstruction *cmp)
|
||||
{
|
||||
DataType hTy = cmp->sType == TYPE_S64 ? TYPE_S32 : TYPE_U32;
|
||||
Value *carry;
|
||||
Value *src0[2], *src1[2];
|
||||
bld.setPosition(cmp, false);
|
||||
|
||||
bld.mkSplit(src0, 4, cmp->getSrc(0));
|
||||
bld.mkSplit(src1, 4, cmp->getSrc(1));
|
||||
bld.mkOp2(OP_SUB, hTy, NULL, src0[0], src1[0])
|
||||
->setFlagsDef(1, (carry = bld.getSSA(1, FILE_FLAGS)));
|
||||
cmp->setFlagsSrc(cmp->srcCount(), carry);
|
||||
cmp->setSrc(0, src0[1]);
|
||||
cmp->setSrc(1, src1[1]);
|
||||
cmp->sType = hTy;
|
||||
}
|
||||
|
||||
bool
|
||||
NVC0LegalizeSSA::visit(Function *fn)
|
||||
{
|
||||
|
|
@ -179,6 +232,18 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
|
|||
case OP_TXF:
|
||||
handleTEXLOD(i->asTex());
|
||||
break;
|
||||
case OP_SHR:
|
||||
case OP_SHL:
|
||||
if (typeSizeof(i->sType) == 8)
|
||||
handleShift(i);
|
||||
break;
|
||||
case OP_SET:
|
||||
case OP_SET_AND:
|
||||
case OP_SET_OR:
|
||||
case OP_SET_XOR:
|
||||
if (typeSizeof(i->sType) == 8 && i->sType != TYPE_F64)
|
||||
handleSET(i->asCmp());
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -612,7 +677,7 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
|
|||
} else {
|
||||
// TODO: Move this to before register allocation for operations that
|
||||
// need the $c register !
|
||||
if (typeSizeof(i->dType) == 8) {
|
||||
if (typeSizeof(i->sType) == 8 || typeSizeof(i->dType) == 8) {
|
||||
Instruction *hi;
|
||||
hi = BuildUtil::split64BitOpPostRA(func, i, rZero, carry);
|
||||
if (hi)
|
||||
|
|
|
|||
|
|
@ -35,7 +35,9 @@ private:
|
|||
void handleDIV(Instruction *); // integer division, modulus
|
||||
void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
|
||||
void handleFTZ(Instruction *);
|
||||
void handleSET(CmpInstruction *);
|
||||
void handleTEXLOD(TexInstruction *);
|
||||
void handleShift(Instruction *);
|
||||
|
||||
protected:
|
||||
BuildUtil bld;
|
||||
|
|
|
|||
|
|
@ -1054,8 +1054,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
|
|||
i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32));
|
||||
}
|
||||
break;
|
||||
case OP_ADD:
|
||||
case OP_SUB:
|
||||
if (imm0.isInteger(0) && s == 0 && typeSizeof(i->dType) == 8 &&
|
||||
!isFloatType(i->dType))
|
||||
break;
|
||||
/* fallthrough */
|
||||
case OP_ADD:
|
||||
if (i->usesFlags())
|
||||
break;
|
||||
if (imm0.isInteger(0)) {
|
||||
|
|
|
|||
|
|
@ -392,7 +392,8 @@ Program::emitBinary(struct nv50_ir_prog_info *info)
|
|||
for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
|
||||
emit->emitInstruction(i);
|
||||
info->bin.instructions++;
|
||||
if (i->sType == TYPE_F64 || i->dType == TYPE_F64)
|
||||
if ((typeSizeof(i->sType) == 8 || typeSizeof(i->dType) == 8) &&
|
||||
(isFloatType(i->sType) || isFloatType(i->dType)))
|
||||
info->io.fp64 = true;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -329,6 +329,10 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s,
|
|||
// indirect loads can only be done by OP_LOAD/VFETCH/INTERP on nvc0
|
||||
if (ld->src(0).isIndirect(0))
|
||||
return false;
|
||||
// these are implemented using shf.r and shf.l which can't load consts
|
||||
if ((i->op == OP_SHL || i->op == OP_SHR) && typeSizeof(i->sType) == 8 &&
|
||||
sf == FILE_MEMORY_CONST)
|
||||
return false;
|
||||
|
||||
for (int k = 0; i->srcExists(k); ++k) {
|
||||
if (i->src(k).getFile() == FILE_IMMEDIATE) {
|
||||
|
|
@ -340,7 +344,8 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s,
|
|||
return false;
|
||||
} else
|
||||
if (i->src(k).getFile() != FILE_GPR &&
|
||||
i->src(k).getFile() != FILE_PREDICATE) {
|
||||
i->src(k).getFile() != FILE_PREDICATE &&
|
||||
i->src(k).getFile() != FILE_FLAGS) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue