radeon/llvm: Remove AMDIL FTOI and ITOF instructions

This commit is contained in:
Tom Stellard 2012-05-24 08:37:49 -04:00
parent a8ba697c1e
commit 9d41a401dc
7 changed files with 7 additions and 316 deletions

View file

@ -48,7 +48,7 @@ my $FILE_TYPE = $ARGV[0];
open AMDIL, '<', 'AMDILInstructions.td';
my @INST_ENUMS = ('NONE', 'FEQ', 'FGE', 'FLT', 'FNE', 'MOVE_f32', 'MOVE_i32', 'FTOI', 'ITOF', 'UGT', 'IGE', 'INE', 'UGE', 'IEQ', 'SMULHI_i32', 'SMUL_i32', 'LOG_f32', 'RSQ_f32', 'SIN_f32', 'COS_f32');
my @INST_ENUMS = ('NONE', 'FEQ', 'FGE', 'FLT', 'FNE', 'MOVE_f32', 'MOVE_i32', 'UGT', 'IGE', 'INE', 'UGE', 'IEQ', 'SMULHI_i32', 'SMUL_i32', 'LOG_f32', 'RSQ_f32', 'SIN_f32', 'COS_f32');
while (<AMDIL>) {
if ($_ =~ /defm\s+([A-Z_]+)\s+:\s+([A-Za-z0-9]+)</) {

View file

@ -48,8 +48,6 @@ bool AMDGPU::isTransOp(unsigned opcode)
case AMDIL::COS_r600:
case AMDIL::COS_eg:
case AMDIL::RSQ_f32:
case AMDIL::FTOI:
case AMDIL::ITOF:
case AMDIL::MULLIT:
case AMDIL::MUL_LIT_r600:
case AMDIL::MUL_LIT_eg:

View file

@ -190,16 +190,6 @@ def sitol_i64:Pat < (i64 (sext GPRI32:$src)),
def sctof_f32:Pat < (f32 (sint_to_fp GPRI8:$src)),
(f32
(ITOF
(SHR_i32
(SHL_i32
(IL_ASINT_i8 GPRI8:$src),
(LOADCONST_i32 24)),
(LOADCONST_i32 24)))) >;
def uctof_f32:Pat < (f32 (uint_to_fp GPRI8:$src)),
(f32
(UTOF
@ -210,16 +200,6 @@ def uctof_f32:Pat < (f32 (uint_to_fp GPRI8:$src)),
(LOADCONST_i32 24)))) >;
def sctod_f64:Pat < (f64 (sint_to_fp GPRI8:$src)),
(f64 (FTOD
(ITOF
(SHR_i32
(SHL_i32
(IL_ASINT_i8 GPRI8:$src),
(LOADCONST_i32 24)),
(LOADCONST_i32 24))))) >;
def uctod_f64:Pat < (f64 (uint_to_fp GPRI8:$src)),
(f64 (FTOD
(UTOF
@ -229,16 +209,6 @@ def uctod_f64:Pat < (f64 (uint_to_fp GPRI8:$src)),
(LOADCONST_i32 24)),
(LOADCONST_i32 24))))) >;
def sstof_f32:Pat < (f32 (sint_to_fp GPRI16:$src)),
(f32
(ITOF
(SHR_i32
(SHL_i32
(IL_ASINT_i16 GPRI16:$src),
(LOADCONST_i32 16)),
(LOADCONST_i32 16)))) >;
def ustof_f32:Pat < (f32 (uint_to_fp GPRI16:$src)),
(f32
(UTOF
@ -248,16 +218,6 @@ def ustof_f32:Pat < (f32 (uint_to_fp GPRI16:$src)),
(LOADCONST_i32 16)),
(LOADCONST_i32 16)))) >;
def sstod_f64:Pat < (f64 (sint_to_fp GPRI16:$src)),
(f64 (FTOD
(ITOF
(SHR_i32
(SHL_i32
(IL_ASINT_i16 GPRI16:$src),
(LOADCONST_i32 16)),
(LOADCONST_i32 16))))) >;
def ustod_f64:Pat < (f64 (uint_to_fp GPRI16:$src)),
(f64 (FTOD
(UTOF
@ -452,16 +412,6 @@ def sitol_v2i64:Pat < (v2i64 (sext GPRV2I32:$src)),
def sctof_v2f32:Pat < (v2f32 (sint_to_fp GPRV2I8:$src)),
(v2f32
(ITOF_v2f32
(SHRVEC_v2i32
(SHLVEC_v2i32
(IL_ASV2INT_v2i8 GPRV2I8:$src),
(VCREATE_v2i32 (LOADCONST_i32 24))),
(VCREATE_v2i32 (LOADCONST_i32 24))))) >;
def uctof_v2f32:Pat < (v2f32 (uint_to_fp GPRV2I8:$src)),
(v2f32
(UTOF_v2f32
@ -472,32 +422,6 @@ def uctof_v2f32:Pat < (v2f32 (uint_to_fp GPRV2I8:$src)),
(VCREATE_v2i32 (LOADCONST_i32 24))))) >;
def sctod_v2f64:Pat < (v2f64 (sint_to_fp GPRV2I8:$src)),
(v2f64
(VINSERT_v2f64
(VCREATE_v2f64
(FTOD
(VEXTRACT_v2f32
(ITOF_v2f32
(SHRVEC_v2i32
(SHLVEC_v2i32
(IL_ASV2INT_v2i8 GPRV2I8:$src),
(VCREATE_v2i32 (LOADCONST_i32 24))),
(VCREATE_v2i32 (LOADCONST_i32 24)))),
1)
)),
(FTOD
(VEXTRACT_v2f32
(ITOF_v2f32
(SHRVEC_v2i32
(SHLVEC_v2i32
(IL_ASV2INT_v2i8 GPRV2I8:$src),
(VCREATE_v2i32 (LOADCONST_i32 24))),
(VCREATE_v2i32 (LOADCONST_i32 24)))),
2)
), 1, 256)
) >;
def uctod_v2f64:Pat < (v2f64 (uint_to_fp GPRV2I8:$src)),
(v2f64
(VINSERT_v2f64
@ -524,15 +448,6 @@ def uctod_v2f64:Pat < (v2f64 (uint_to_fp GPRV2I8:$src)),
), 1, 256)
) >;
def sstof_v2f32:Pat < (v2f32 (sint_to_fp GPRV2I16:$src)),
(v2f32
(ITOF_v2f32
(SHRVEC_v2i32
(SHLVEC_v2i32
(IL_ASV2INT_v2i16 GPRV2I16:$src),
(VCREATE_v2i32 (LOADCONST_i32 16))),
(VCREATE_v2i32 (LOADCONST_i32 16))))) >;
def ustof_v2f32:Pat < (v2f32 (uint_to_fp GPRV2I16:$src)),
(v2f32
@ -544,32 +459,6 @@ def ustof_v2f32:Pat < (v2f32 (uint_to_fp GPRV2I16:$src)),
(VCREATE_v2i32 (LOADCONST_i32 16))))) >;
def sstod_v2f64:Pat < (v2f64 (sint_to_fp GPRV2I16:$src)),
(v2f64
(VINSERT_v2f64
(VCREATE_v2f64
(FTOD
(VEXTRACT_v2f32
(ITOF_v2f32
(SHRVEC_v2i32
(SHLVEC_v2i32
(IL_ASV2INT_v2i16 GPRV2I16:$src),
(VCREATE_v2i32 (LOADCONST_i32 16))),
(VCREATE_v2i32 (LOADCONST_i32 16)))),
1)
)),
(FTOD
(VEXTRACT_v2f32
(ITOF_v2f32
(SHRVEC_v2i32
(SHLVEC_v2i32
(IL_ASV2INT_v2i16 GPRV2I16:$src),
(VCREATE_v2i32 (LOADCONST_i32 16))),
(VCREATE_v2i32 (LOADCONST_i32 16)))),
2)
), 1, 256)
) >;
def ustod_v2f64:Pat < (v2f64 (uint_to_fp GPRV2I16:$src)),
(v2f64
(VINSERT_v2f64
@ -679,16 +568,6 @@ def sstoi_v4i32:Pat < (v4i32 (sext GPRV4I16:$src)),
def sctof_v4f32:Pat < (v4f32 (sint_to_fp GPRV4I8:$src)),
(v4f32
(ITOF_v4f32
(SHRVEC_v4i32
(SHLVEC_v4i32
(IL_ASV4INT_v4i8 GPRV4I8:$src),
(VCREATE_v4i32 (LOADCONST_i32 24))),
(VCREATE_v4i32 (LOADCONST_i32 24))))) >;
def uctof_v4f32:Pat < (v4f32 (uint_to_fp GPRV4I8:$src)),
(v4f32
(UTOF_v4f32
@ -699,16 +578,6 @@ def uctof_v4f32:Pat < (v4f32 (uint_to_fp GPRV4I8:$src)),
(VCREATE_v4i32 (LOADCONST_i32 24))))) >;
def sstof_v4f32:Pat < (v4f32 (sint_to_fp GPRV4I16:$src)),
(v4f32
(ITOF_v4f32
(SHRVEC_v4i32
(SHLVEC_v4i32
(IL_ASV4INT_v4i16 GPRV4I16:$src),
(VCREATE_v4i32 (LOADCONST_i32 16))),
(VCREATE_v4i32 (LOADCONST_i32 16))))) >;
def ustof_v4f32:Pat < (v4f32 (uint_to_fp GPRV4I16:$src)),
(v4f32
(UTOF_v4f32

View file

@ -713,9 +713,7 @@ AMDILTargetLowering::convertToReg(MachineOperand op) const
// TODO: Implement custom UREM/SREM routines
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISDBITCAST, VT, Custom);
setOperationAction(ISD::GlobalAddress, VT, Custom);
@ -809,9 +807,7 @@ AMDILTargetLowering::convertToReg(MachineOperand op) const
setOperationAction(ISD::Constant , MVT::i64 , Legal);
setOperationAction(ISD::UDIV, MVT::v2i64, Expand);
setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand);
setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
@ -830,9 +826,7 @@ AMDILTargetLowering::convertToReg(MachineOperand op) const
setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
// We want to expand vector conversions into their scalar
// counterparts.
setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand);
setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand);
setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
@ -1579,9 +1573,7 @@ AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
LOWER(JumpTable);
LOWER(ConstantPool);
LOWER(ExternalSymbol);
LOWER(FP_TO_SINT);
LOWER(FP_TO_UINT);
LOWER(SINT_TO_FP);
LOWER(UINT_TO_FP);
LOWER(MUL);
LOWER(SUB);
@ -2505,62 +2497,6 @@ AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG,
}
return res;
}
SDValue
AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
{
SDValue RHS = Op.getOperand(0);
EVT RHSVT = RHS.getValueType();
MVT RST = RHSVT.getScalarType().getSimpleVT();
EVT LHSVT = Op.getValueType();
MVT LST = LHSVT.getScalarType().getSimpleVT();
DebugLoc DL = Op.getDebugLoc();
SDValue DST;
const AMDILTargetMachine*
amdtm = reinterpret_cast<const AMDILTargetMachine*>
(&this->getTargetMachine());
const AMDILSubtarget*
stm = static_cast<const AMDILSubtarget*>(
amdtm->getSubtargetImpl());
if (RST == MVT::f64 && RHSVT.isVector()
&& stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
// We dont support vector 64bit floating point convertions.
for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
if (!x) {
DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
} else {
DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
DST, op, DAG.getTargetConstant(x, MVT::i32));
}
}
} else {
if (RST == MVT::f64
&& LST == MVT::i32) {
if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
DST = SDValue(Op.getNode(), 0);
} else {
DST = genf64toi32(RHS, DAG, true);
}
} else if (RST == MVT::f64
&& LST == MVT::i64) {
DST = genf64toi64(RHS, DAG, true);
} else if (RST == MVT::f64
&& (LST == MVT::i8 || LST == MVT::i16)) {
if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
} else {
SDValue ToInt = genf64toi32(RHS, DAG, true);
DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
}
} else {
DST = SDValue(Op.getNode(), 0);
}
}
return DST;
}
SDValue
AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
@ -2854,104 +2790,6 @@ AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
return DST;
}
SDValue
AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
{
SDValue RHS = Op.getOperand(0);
EVT RHSVT = RHS.getValueType();
MVT RST = RHSVT.getScalarType().getSimpleVT();
EVT INTVT;
EVT LONGVT;
SDValue DST;
bool isVec = RHSVT.isVector();
DebugLoc DL = Op.getDebugLoc();
EVT LHSVT = Op.getValueType();
MVT LST = LHSVT.getScalarType().getSimpleVT();
const AMDILTargetMachine*
amdtm = reinterpret_cast<const AMDILTargetMachine*>
(&this->getTargetMachine());
const AMDILSubtarget*
stm = static_cast<const AMDILSubtarget*>(
amdtm->getSubtargetImpl());
if (LST == MVT::f64 && LHSVT.isVector()
&& stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
// We dont support vector 64bit floating point convertions.
for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
if (!x) {
DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
} else {
DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
op, DAG.getTargetConstant(x, MVT::i32));
}
}
} else {
if (isVec) {
LONGVT = EVT(MVT::getVectorVT(MVT::i64,
RHSVT.getVectorNumElements()));
INTVT = EVT(MVT::getVectorVT(MVT::i32,
RHSVT.getVectorNumElements()));
} else {
LONGVT = EVT(MVT::i64);
INTVT = EVT(MVT::i32);
}
MVT RST = RHSVT.getScalarType().getSimpleVT();
if ((RST == MVT::i32 || RST == MVT::i64)
&& LST == MVT::f64) {
if (RST == MVT::i32) {
if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
DST = SDValue(Op.getNode(), 0);
return DST;
}
}
SDValue c31 = DAG.getConstant( 31, INTVT );
SDValue cSbit = DAG.getConstant( 0x80000000, INTVT );
SDValue S; // Sign, as 0 or -1
SDValue Sbit; // Sign bit, as one bit, MSB only.
if (RST == MVT::i32) {
Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit );
S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 );
} else { // 64-bit case... SRA of 64-bit values is slow
SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS );
Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit );
SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 );
S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp );
}
// get abs() of input value, given sign as S (0 or -1)
// SpI = RHS + S
SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S);
// SpIxS = SpI ^ S
SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S);
// Convert unsigned value to double precision
SDValue R;
if (RST == MVT::i32) {
// r = cast_u32_to_f64(SpIxS)
R = genu32tof64(SpIxS, LHSVT, DAG);
} else {
// r = cast_u64_to_f64(SpIxS)
R = genu64tof64(SpIxS, LHSVT, DAG);
}
// drop in the sign bit
SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R );
SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t );
SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t );
thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit );
t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi );
DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t );
} else {
DST = SDValue(Op.getNode(), 0);
}
}
return DST;
}
SDValue
AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const
{

View file

@ -17,15 +17,9 @@
// unsigned: f32 -> i64
def FTOUL : Pat<(i64 (fp_to_uint GPRF32:$src)),
(LCREATE (FTOU GPRF32:$src), (LOADCONST_i32 0))>;
// signed: f32 -> i64
def FTOL : Pat<(i64 (fp_to_sint GPRF32:$src)),
(LCREATE (FTOI GPRF32:$src), (LOADCONST_i32 0))>;
// unsigned: i64 -> f32
def ULTOF : Pat<(f32 (uint_to_fp GPRI64:$src)),
(UTOF (LLO GPRI64:$src))>;
// signed: i64 -> f32
def LTOF : Pat<(f32 (sint_to_fp GPRI64:$src)),
(ITOF (LLO GPRI64:$src))>;
// LLVM isn't lowering this correctly, so writing a pattern that
// matches it isntead.

View file

@ -664,14 +664,6 @@ def ULLT : TwoInOneOut<IL_OP_U64_LT, (outs GPRI64:$dst),
def FTOD : UnaryOp<IL_OP_F_2_D, fextend, GPRF64, GPRF32>;
// f64 ==> f32
def DTOF : UnaryOp<IL_OP_D_2_F, IL_d2f, GPRF32, GPRF64>;
// f32 ==> i32 signed
def FTOI : UnaryOp<IL_OP_FTOI, fp_to_sint, GPRI32, GPRF32>;
def FTOI_v2i32 : UnaryOp<IL_OP_FTOI, fp_to_sint, GPRV2I32, GPRV2F32>;
def FTOI_v4i32 : UnaryOp<IL_OP_FTOI, fp_to_sint, GPRV4I32, GPRV4F32>;
// i32 ==> f32 signed
def ITOF : UnaryOp<IL_OP_ITOF, sint_to_fp, GPRF32, GPRI32>;
def ITOF_v2f32 : UnaryOp<IL_OP_ITOF, sint_to_fp, GPRV2F32, GPRV2I32>;
def ITOF_v4f32 : UnaryOp<IL_OP_ITOF, sint_to_fp, GPRV4F32, GPRV4I32>;
// f32 ==> i32 unsigned
def FTOU : UnaryOp<IL_OP_FTOU, fp_to_uint, GPRI32, GPRF32>;
def FTOU_v2i32 : UnaryOp<IL_OP_FTOU, fp_to_uint, GPRV2I32, GPRV2F32>;

View file

@ -626,14 +626,14 @@ class EXP_IEEE_Common <bits<32> inst> : R600_1OP <
>;
class FLT_TO_INT_Common <bits<32> inst> : R600_1OP <
inst, "FLT_TO_INT", []> {
let AMDILOp = AMDILInst.FTOI;
}
inst, "FLT_TO_INT",
[(set R600_Reg32:$dst, (fp_to_sint R600_Reg32:$src))]
>;
class INT_TO_FLT_Common <bits<32> inst> : R600_1OP <
inst, "INT_TO_FLT", []> {
let AMDILOp = AMDILInst.ITOF;
}
inst, "INT_TO_FLT",
[(set R600_Reg32:$dst, (sint_to_fp R600_Reg32:$src))]
>;
class LOG_CLAMPED_Common <bits<32> inst> : R600_1OP <
inst, "LOG_CLAMPED",