radeon/llvm: Add flag operand to some instructions

This new operand replaces the MachineOperand flags in LLVM, which
will be deprecated soon.  Eventually all instructions should have a flag
operand, but for now this operand has only been added to instructions
that need it.
This commit is contained in:
Tom Stellard 2012-08-22 15:04:58 +00:00
parent 3a7a56e7aa
commit 67a47a445b
7 changed files with 97 additions and 33 deletions

View file

@ -31,6 +31,7 @@
#define MO_FLAG_MASK (1 << 3)
#define MO_FLAG_PUSH (1 << 4)
#define MO_FLAG_LAST (1 << 5)
#define NUM_MO_FLAGS 6
#define OPCODE_IS_ZERO_INT 0x00000045
#define OPCODE_IS_NOT_ZERO_INT 0x00000042

View file

@ -67,7 +67,7 @@ public:
private:
void EmitALUInstr(MachineInstr &MI);
void EmitSrc(const MachineOperand & MO);
void EmitSrc(const MachineOperand & MO, unsigned SrcIdx);
void EmitDst(const MachineOperand & MO);
void EmitALU(MachineInstr &MI, unsigned numSrc);
void EmitTexInstr(MachineInstr &MI);
@ -218,6 +218,8 @@ void R600CodeEmitter::EmitALUInstr(MachineInstr &MI)
unsigned numOperands = MI.getNumExplicitOperands();
if(MI.findFirstPredOperandIdx() > -1)
numOperands--;
if (TII->HasFlagOperand(MI))
numOperands--;
// Some instructions are just place holder instructions that represent
// operations that the GPU does automatically. They should be ignored.
@ -243,7 +245,7 @@ void R600CodeEmitter::EmitALUInstr(MachineInstr &MI)
if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
break;
}
EmitSrc(MI.getOperand(opIndex));
EmitSrc(MI.getOperand(opIndex), opIndex);
}
// Emit zeros for unused sources
@ -256,8 +258,9 @@ void R600CodeEmitter::EmitALUInstr(MachineInstr &MI)
EmitALU(MI, numOperands - 1);
}
void R600CodeEmitter::EmitSrc(const MachineOperand & MO)
void R600CodeEmitter::EmitSrc(const MachineOperand & MO, unsigned SrcIdx)
{
const MachineInstr *MI = MO.getParent();
uint32_t value = 0;
// Emit the source select (2 bytes). For GPRs, this is the register index.
// For other potential instruction operands, (e.g. constant registers) the
@ -289,8 +292,8 @@ void R600CodeEmitter::EmitSrc(const MachineOperand & MO)
}
// XXX: Emit isNegated (1 byte)
if ((!(MO.getTargetFlags() & MO_FLAG_ABS))
&& (MO.getTargetFlags() & MO_FLAG_NEG ||
if ((!(TII->IsFlagSet(*MI, SrcIdx, MO_FLAG_ABS)))
&& (TII->IsFlagSet(*MI, SrcIdx, MO_FLAG_NEG) ||
(MO.isReg() &&
(MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
EmitByte(1);
@ -299,7 +302,7 @@ void R600CodeEmitter::EmitSrc(const MachineOperand & MO)
}
// Emit isAbsolute (1 byte)
if (MO.getTargetFlags() & MO_FLAG_ABS) {
if (TII->IsFlagSet(*MI, SrcIdx, MO_FLAG_ABS)) {
EmitByte(1);
} else {
EmitByte(0);
@ -318,6 +321,7 @@ void R600CodeEmitter::EmitSrc(const MachineOperand & MO)
void R600CodeEmitter::EmitDst(const MachineOperand & MO)
{
const MachineInstr *MI = MO.getParent();
if (MO.isReg() && MO.getReg() != AMDGPU::PREDICATE_BIT) {
// Emit the destination register index (1 byte)
EmitByte(getHWReg(MO.getReg()));
@ -326,14 +330,14 @@ void R600CodeEmitter::EmitDst(const MachineOperand & MO)
EmitByte(TRI->getHWRegChan(MO.getReg()));
// Emit isClamped (1 byte)
if (MO.getTargetFlags() & MO_FLAG_CLAMP) {
if (TII->IsFlagSet(*MI, 0, MO_FLAG_CLAMP)) {
EmitByte(1);
} else {
EmitByte(0);
}
// Emit writemask (1 byte).
if (MO.getTargetFlags() & MO_FLAG_MASK) {
if (TII->IsFlagSet(*MI, 0, MO_FLAG_MASK)) {
EmitByte(0);
} else {
EmitByte(1);
@ -353,8 +357,7 @@ void R600CodeEmitter::EmitALU(MachineInstr &MI, unsigned numSrc)
EmitTwoBytes(getBinaryCodeForInstr(MI));
// Emit IsLast (for this instruction group) (1 byte)
if (MI.isInsideBundle() &&
!(MI.getOperand(0).getTargetFlags() & MO_FLAG_LAST)) {
if (MI.isInsideBundle() && !TII->IsFlagSet(MI, 0, MO_FLAG_LAST)) {
EmitByte(0);
} else {
EmitByte(1);
@ -508,7 +511,7 @@ void R600CodeEmitter::EmitFCInstr(MachineInstr &MI)
unsigned numOperands = MI.getNumOperands();
if (numOperands > 0) {
assert(numOperands == 1);
EmitSrc(MI.getOperand(0));
EmitSrc(MI.getOperand(0), 0);
} else {
EmitNullBytes(SRC_BYTE_COUNT);
}

View file

@ -149,7 +149,8 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *NewMI =
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg)
.addReg(Src0)
.addReg(Src1);
.addReg(Src1)
.addImm(0); // Flag
NewMI->setIsInsideBundle(Chan != 0);
TII->AddFlag(NewMI, 0, Flags);

View file

@ -64,6 +64,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addImm(0) // Flags
.addReg(AMDGPU::PRED_SEL_OFF);
TII->AddFlag(NewMI, 0, MO_FLAG_CLAMP);
break;
@ -74,6 +75,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addImm(0) // Flags
.addReg(AMDGPU::PRED_SEL_OFF);
TII->AddFlag(NewMI, 1, MO_FLAG_ABS);
break;
@ -85,6 +87,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addImm(0) // Flags
.addReg(AMDGPU::PRED_SEL_OFF);
TII->AddFlag(NewMI, 1, MO_FLAG_NEG);
break;
@ -200,7 +203,8 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
.addReg(AMDGPU::PREDICATE_BIT)
.addOperand(MI->getOperand(1))
.addImm(OPCODE_IS_ZERO);
.addImm(OPCODE_IS_ZERO)
.addImm(0); // Flags
TII->AddFlag(NewMI, 1, MO_FLAG_PUSH);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
.addOperand(MI->getOperand(0))
@ -213,7 +217,8 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
.addReg(AMDGPU::PREDICATE_BIT)
.addOperand(MI->getOperand(1))
.addImm(OPCODE_IS_ZERO_INT);
.addImm(OPCODE_IS_ZERO_INT)
.addImm(0); // Flags
TII->AddFlag(NewMI, 1, MO_FLAG_PUSH);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
.addOperand(MI->getOperand(0))

View file

@ -57,6 +57,7 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
.addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define)
.addReg(RI.getSubReg(SrcReg, SubRegIndex))
.addImm(0) // Flag
.addReg(0) // PREDICATE_BIT
.addReg(DestReg, RegState::Define | RegState::Implicit);
}
@ -68,6 +69,7 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc))
.addImm(0) // Flag
.addReg(0); // PREDICATE_BIT
}
}
@ -520,11 +522,35 @@ int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
}
//===----------------------------------------------------------------------===//
// Instruction flag setters
// Instruction flag getters/setters
//===----------------------------------------------------------------------===//
#define GET_FLAG_OPERAND_IDX(MI) (((MI).getDesc().TSFlags >> 7) & 0x3)
bool R600InstrInfo::HasFlagOperand(const MachineInstr &MI) const
{
return GET_FLAG_OPERAND_IDX(MI) != 0;
}
void R600InstrInfo::AddFlag(MachineInstr *MI, unsigned Operand,
unsigned Flag) const
{
MI->getOperand(Operand).addTargetFlag(Flag);
unsigned FlagIndex = GET_FLAG_OPERAND_IDX(*MI);
assert(FlagIndex != 0 &&
"Instruction flags not supported for this instruction");
MachineOperand &FlagOp = MI->getOperand(FlagIndex);
assert(FlagOp.isImm());
FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
}
bool R600InstrInfo::IsFlagSet(const MachineInstr &MI, unsigned Operand,
unsigned Flag) const
{
unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MI);
if (FlagIndex == 0) {
return false;
}
assert(MI.getOperand(FlagIndex).isImm());
return !!((MI.getOperand(FlagIndex).getImm() >>
(NUM_MO_FLAGS * Operand)) & Flag);
}

View file

@ -112,8 +112,13 @@ namespace llvm {
virtual int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const { return 1;}
bool HasFlagOperand(const MachineInstr &MI) const;
///AddFlag - Add one of the MO_FLAG* flags to the specified Operand.
void AddFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
///IsFlagSet - Determine if the specified flag is set on this Operand.
bool IsFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const;
};
} // End llvm namespace
@ -127,6 +132,7 @@ namespace R600_InstFlag {
TRIG = (1 << 4),
OP3 = (1 << 5),
VECTOR = (1 << 6)
//FlagOperand bits 7, 8
};
}

View file

@ -20,7 +20,8 @@ class InstR600 <bits<32> inst, dag outs, dag ins, string asm, list<dag> pattern,
field bits<32> Inst;
bit Trig = 0;
bit Op3 = 0;
bit isVector = 0;
bit isVector = 0;
bits<2> FlagOperandIdx = 0;
let Inst = inst;
let Namespace = "AMDGPU";
@ -36,6 +37,7 @@ class InstR600 <bits<32> inst, dag outs, dag ins, string asm, list<dag> pattern,
// Vector instructions are instructions that must fill all slots in an
// instruction group
let TSFlags{6} = isVector;
let TSFlags{8-7} = FlagOperandIdx;
}
class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
@ -107,20 +109,19 @@ class R600_3OP <bits<32> inst, string opName, list<dag> pattern,
def PRED_X : AMDGPUInst <(outs R600_Predicate_Bit:$dst),
(ins R600_Reg32:$src0, i32imm:$src1),
def PRED_X : InstR600 <0, (outs R600_Predicate_Bit:$dst),
(ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
"PRED $dst, $src0, $src1",
[]>
[], NullALU>
{
let DisableEncoding = "$src0";
field bits<32> Inst;
bits<32> src1;
let Inst = src1;
let FlagOperandIdx = 3;
}
let isTerminator = 1, isBranch = 1 in {
def JUMP : InstR600 <0x10,
(outs),
@ -365,7 +366,12 @@ def FLOOR : R600_1OP <
[(set R600_Reg32:$dst, (int_AMDGPU_floor R600_Reg32:$src))]
>;
def MOV : R600_1OP <0x19, "MOV", []>;
def MOV : InstR600 <0x19, (outs R600_Reg32:$dst),
(ins R600_Reg32:$src0, i32imm:$flags,
R600_Pred:$p),
"MOV $dst, $src0", [], AnyALU> {
let FlagOperandIdx = 2;
}
class MOV_IMM <ValueType vt, Operand immType> : InstR600 <0x19,
(outs R600_Reg32:$dst),
@ -386,10 +392,15 @@ def : Pat <
(MOV_IMM_F32 (i32 ALU_LITERAL_X), fpimm:$val)
>;
def KILLGT : R600_2OP <
0x2D, "KILLGT",
[]
>;
def KILLGT : InstR600 <0x2D,
(outs R600_Reg32:$dst),
(ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags, R600_Pred:$p,
variable_ops),
"KILLGT $dst, $src0, $src1, $flags ($p)",
[],
NullALU>{
let FlagOperandIdx = 3;
}
def AND_INT : R600_2OP <
0x30, "AND_INT",
@ -588,9 +599,16 @@ class CNDGE_Common <bits<32> inst> : R600_3OP <
class DOT4_Common <bits<32> inst> : R600_REDUCTION <
inst,
(ins R600_Reg128:$src0, R600_Reg128:$src1),
(ins R600_Reg128:$src0, R600_Reg128:$src1, i32imm:$flags),
"DOT4 $dst $src0, $src1",
[(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
[]
> {
let FlagOperandIdx = 3;
}
class DOT4_Pat <Instruction dot4> : Pat <
(int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1),
(dot4 R600_Reg128:$src0, R600_Reg128:$src1, 0)
>;
multiclass CUBE_Common <bits<32> inst> {
@ -607,10 +625,12 @@ multiclass CUBE_Common <bits<32> inst> {
def _real : InstR600 <
inst,
(outs R600_Reg32:$dst),
(ins R600_Reg32:$src0, R600_Reg32:$src1),
(ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
"CUBE $dst, $src0, $src1",
[], VecALU
>;
>{
let FlagOperandIdx = 3;
}
}
class EXP_IEEE_Common <bits<32> inst> : R600_1OP <
@ -748,6 +768,7 @@ let Predicates = [isR600] in {
def CNDGT_r600 : CNDGT_Common<0x19>;
def CNDGE_r600 : CNDGE_Common<0x1A>;
def DOT4_r600 : DOT4_Common<0x50>;
def : DOT4_Pat <DOT4_r600>;
defm CUBE_r600 : CUBE_Common<0x52>;
def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
@ -864,6 +885,7 @@ let Predicates = [isEGorCayman] in {
def SIN_eg : SIN_Common<0x8D>;
def COS_eg : COS_Common<0x8E>;
def DOT4_eg : DOT4_Common<0xBE>;
def : DOT4_Pat <DOT4_eg>;
defm CUBE_eg : CUBE_Common<0xC0>;
def DIV_eg : DIV_Common<RECIP_IEEE_eg>;
@ -1198,12 +1220,12 @@ def MASK_WRITE : AMDGPUShaderInst <
// KIL Patterns
def KILP : Pat <
(int_AMDGPU_kilp),
(MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO)))
(MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO), 0))
>;
def KIL : Pat <
(int_AMDGPU_kill R600_Reg32:$src0),
(MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0)))
(MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0), 0))
>;
// SGT Reverse args