mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 03:08:05 +02:00
radeon/llvm: ExpandSpecialInstrs - Add support for cube instructions
This commit is contained in:
parent
6c99f2101f
commit
1cb07bd3b8
4 changed files with 100 additions and 63 deletions
|
|
@ -49,17 +49,14 @@ private:
|
|||
const R600RegisterInfo * TRI;
|
||||
const R600InstrInfo * TII;
|
||||
|
||||
bool IsCube;
|
||||
unsigned currentElement;
|
||||
bool IsLast;
|
||||
|
||||
unsigned section_start;
|
||||
|
||||
public:
|
||||
|
||||
R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
|
||||
_OS(OS), TM(NULL), IsCube(false),
|
||||
IsLast(true) { }
|
||||
_OS(OS), TM(NULL) { }
|
||||
|
||||
const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
|
||||
|
||||
|
|
@ -70,7 +67,7 @@ public:
|
|||
private:
|
||||
|
||||
void EmitALUInstr(MachineInstr &MI);
|
||||
void EmitSrc(const MachineOperand & MO, int chan_override = -1);
|
||||
void EmitSrc(const MachineOperand & MO);
|
||||
void EmitDst(const MachineOperand & MO);
|
||||
void EmitALU(MachineInstr &MI, unsigned numSrc);
|
||||
void EmitTexInstr(MachineInstr &MI);
|
||||
|
|
@ -160,7 +157,6 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
|
|||
for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(),
|
||||
E = MBB.instr_end(); I != E; ++I) {
|
||||
MachineInstr &MI = *I;
|
||||
IsCube = TII->isCubeOp(MI.getOpcode());
|
||||
if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -168,15 +164,6 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
|
|||
EmitTexInstr(MI);
|
||||
} else if (TII->isFCOp(MI.getOpcode())){
|
||||
EmitFCInstr(MI);
|
||||
} else if (IsCube) {
|
||||
IsLast = false;
|
||||
// XXX: On Cayman, some (all?) of the vector instructions only need
|
||||
// to fill the first three slots.
|
||||
for (currentElement = 0; currentElement < 4; currentElement++) {
|
||||
IsLast = (currentElement == 3);
|
||||
EmitALUInstr(MI);
|
||||
}
|
||||
IsCube = false;
|
||||
} else if (MI.getOpcode() == AMDGPU::RETURN ||
|
||||
MI.getOpcode() == AMDGPU::BUNDLE ||
|
||||
MI.getOpcode() == AMDGPU::KILL) {
|
||||
|
|
@ -250,25 +237,18 @@ void R600CodeEmitter::EmitALUInstr(MachineInstr &MI)
|
|||
// Emit instruction type
|
||||
EmitByte(0);
|
||||
|
||||
if (IsCube) {
|
||||
static const int cube_src_swz[] = {2, 2, 0, 1};
|
||||
EmitSrc(MI.getOperand(1), cube_src_swz[currentElement]);
|
||||
EmitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]);
|
||||
EmitNullBytes(SRC_BYTE_COUNT);
|
||||
} else {
|
||||
unsigned int opIndex;
|
||||
for (opIndex = 1; opIndex < numOperands; opIndex++) {
|
||||
// Literal constants are always stored as the last operand.
|
||||
if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
|
||||
break;
|
||||
}
|
||||
EmitSrc(MI.getOperand(opIndex));
|
||||
unsigned int opIndex;
|
||||
for (opIndex = 1; opIndex < numOperands; opIndex++) {
|
||||
// Literal constants are always stored as the last operand.
|
||||
if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
|
||||
break;
|
||||
}
|
||||
EmitSrc(MI.getOperand(opIndex));
|
||||
}
|
||||
|
||||
// Emit zeros for unused sources
|
||||
for ( ; opIndex < 4; opIndex++) {
|
||||
EmitNullBytes(SRC_BYTE_COUNT);
|
||||
}
|
||||
// Emit zeros for unused sources
|
||||
for ( ; opIndex < 4; opIndex++) {
|
||||
EmitNullBytes(SRC_BYTE_COUNT);
|
||||
}
|
||||
|
||||
EmitDst(dstOp);
|
||||
|
|
@ -276,7 +256,7 @@ void R600CodeEmitter::EmitALUInstr(MachineInstr &MI)
|
|||
EmitALU(MI, numOperands - 1);
|
||||
}
|
||||
|
||||
void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override)
|
||||
void R600CodeEmitter::EmitSrc(const MachineOperand & MO)
|
||||
{
|
||||
uint32_t value = 0;
|
||||
// Emit the source select (2 bytes). For GPRs, this is the register index.
|
||||
|
|
@ -302,9 +282,7 @@ void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override)
|
|||
}
|
||||
|
||||
// Emit the source channel (1 byte)
|
||||
if (chan_override != -1) {
|
||||
EmitByte(chan_override);
|
||||
} else if (MO.isReg()) {
|
||||
if (MO.isReg()) {
|
||||
EmitByte(TRI->getHWRegChan(MO.getReg()));
|
||||
} else {
|
||||
EmitByte(0);
|
||||
|
|
@ -345,11 +323,7 @@ void R600CodeEmitter::EmitDst(const MachineOperand & MO)
|
|||
EmitByte(getHWReg(MO.getReg()));
|
||||
|
||||
// Emit the element of the destination register (1 byte)
|
||||
if (IsCube) {
|
||||
EmitByte(currentElement);
|
||||
} else {
|
||||
EmitByte(TRI->getHWRegChan(MO.getReg()));
|
||||
}
|
||||
EmitByte(TRI->getHWRegChan(MO.getReg()));
|
||||
|
||||
// Emit isClamped (1 byte)
|
||||
if (MO.getTargetFlags() & MO_FLAG_CLAMP) {
|
||||
|
|
@ -379,9 +353,8 @@ void R600CodeEmitter::EmitALU(MachineInstr &MI, unsigned numSrc)
|
|||
EmitTwoBytes(getBinaryCodeForInstr(MI));
|
||||
|
||||
// Emit IsLast (for this instruction group) (1 byte)
|
||||
if (!IsLast ||
|
||||
(MI.isInsideBundle() &&
|
||||
!(MI.getOperand(0).getTargetFlags() & MO_FLAG_LAST))) {
|
||||
if (MI.isInsideBundle() &&
|
||||
!(MI.getOperand(0).getTargetFlags() & MO_FLAG_LAST)) {
|
||||
EmitByte(0);
|
||||
} else {
|
||||
EmitByte(1);
|
||||
|
|
|
|||
|
|
@ -61,7 +61,8 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
|||
|
||||
bool IsReduction = TII->isReductionOp(MI.getOpcode());
|
||||
bool IsVector = TII->isVector(MI);
|
||||
if (!IsReduction && !IsVector) {
|
||||
bool IsCube = TII->isCubeOp(MI.getOpcode());
|
||||
if (!IsReduction && !IsVector && !IsCube) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -82,23 +83,73 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
|||
// T0_Y (write masked) = MULLO_INT T1_X, T2_X
|
||||
// T0_Z (write masked) = MULLO_INT T1_X, T2_X
|
||||
// T0_W (write masked) = MULLO_INT T1_X, T2_X
|
||||
//
|
||||
// Cube instructions:
|
||||
// T0_XYZW = CUBE T1_XYZW
|
||||
// becomes:
|
||||
// TO_X = CUBE T1_Z, T1_Y
|
||||
// T0_Y = CUBE T1_Z, T1_X
|
||||
// T0_Z = CUBE T1_X, T1_Z
|
||||
// T0_W = CUBE T1_Y, T1_Z
|
||||
for (unsigned Chan = 0; Chan < 4; Chan++) {
|
||||
unsigned DstReg = MI.getOperand(0).getReg();
|
||||
unsigned Src0 = MI.getOperand(1).getReg();
|
||||
unsigned Src1 = MI.getOperand(2).getReg();
|
||||
unsigned Src1 = 0;
|
||||
|
||||
// Determine the correct source registers
|
||||
if (!IsCube) {
|
||||
Src1 = MI.getOperand(2).getReg();
|
||||
}
|
||||
if (IsReduction) {
|
||||
unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
|
||||
Src0 = TRI.getSubReg(Src0, SubRegIndex);
|
||||
Src1 = TRI.getSubReg(Src1, SubRegIndex);
|
||||
} else if (IsCube) {
|
||||
static const int CubeSrcSwz[] = {2, 2, 0, 1};
|
||||
unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
|
||||
unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
|
||||
Src1 = TRI.getSubReg(Src0, SubRegIndex1);
|
||||
Src0 = TRI.getSubReg(Src0, SubRegIndex0);
|
||||
}
|
||||
unsigned DstBase = TRI.getHWRegIndex(DstReg);
|
||||
unsigned NewDstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
|
||||
unsigned Flags = (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
|
||||
|
||||
// Determine the correct destination registers;
|
||||
unsigned Flags = 0;
|
||||
if (IsCube) {
|
||||
unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
|
||||
DstReg = TRI.getSubReg(DstReg, SubRegIndex);
|
||||
} else {
|
||||
// Mask the write if the original instruction does not write to
|
||||
// the current Channel.
|
||||
Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
|
||||
unsigned DstBase = TRI.getHWRegIndex(DstReg);
|
||||
DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
|
||||
}
|
||||
|
||||
// Set the IsLast bit
|
||||
Flags |= (Chan == 3 ? MO_FLAG_LAST : 0);
|
||||
MachineOperand NewDstOp = MachineOperand::CreateReg(NewDstReg, true);
|
||||
|
||||
// Add the new instruction
|
||||
unsigned Opcode;
|
||||
if (IsCube) {
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::CUBE_r600_pseudo:
|
||||
Opcode = AMDGPU::CUBE_r600_real;
|
||||
break;
|
||||
case AMDGPU::CUBE_eg_pseudo:
|
||||
Opcode = AMDGPU::CUBE_eg_real;
|
||||
break;
|
||||
default:
|
||||
assert(!"Unknown CUBE instruction");
|
||||
Opcode = 0;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
Opcode = MI.getOpcode();
|
||||
}
|
||||
MachineOperand NewDstOp = MachineOperand::CreateReg(DstReg, true);
|
||||
NewDstOp.addTargetFlag(Flags);
|
||||
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MI.getOpcode()))
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode))
|
||||
.addOperand(NewDstOp)
|
||||
.addReg(Src0)
|
||||
.addReg(Src1)
|
||||
|
|
|
|||
|
|
@ -153,8 +153,10 @@ bool R600InstrInfo::isCubeOp(unsigned opcode) const
|
|||
{
|
||||
switch(opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::CUBE_r600:
|
||||
case AMDGPU::CUBE_eg:
|
||||
case AMDGPU::CUBE_r600_pseudo:
|
||||
case AMDGPU::CUBE_r600_real:
|
||||
case AMDGPU::CUBE_eg_pseudo:
|
||||
case AMDGPU::CUBE_eg_real:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -593,14 +593,25 @@ class DOT4_Common <bits<32> inst> : R600_REDUCTION <
|
|||
[(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
|
||||
>;
|
||||
|
||||
class CUBE_Common <bits<32> inst> : InstR600 <
|
||||
inst,
|
||||
(outs R600_Reg128:$dst),
|
||||
(ins R600_Reg128:$src),
|
||||
"CUBE $dst $src",
|
||||
[(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
|
||||
VecALU
|
||||
>;
|
||||
multiclass CUBE_Common <bits<32> inst> {
|
||||
|
||||
def _pseudo : InstR600 <
|
||||
inst,
|
||||
(outs R600_Reg128:$dst),
|
||||
(ins R600_Reg128:$src),
|
||||
"CUBE $dst $src",
|
||||
[(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
|
||||
VecALU
|
||||
>;
|
||||
|
||||
def _real : InstR600 <
|
||||
inst,
|
||||
(outs R600_Reg32:$dst),
|
||||
(ins R600_Reg32:$src0, R600_Reg32:$src1),
|
||||
"CUBE $dst, $src0, $src1",
|
||||
[], VecALU
|
||||
>;
|
||||
}
|
||||
|
||||
class EXP_IEEE_Common <bits<32> inst> : R600_1OP <
|
||||
inst, "EXP_IEEE",
|
||||
|
|
@ -737,7 +748,7 @@ let Predicates = [isR600] in {
|
|||
def CNDGT_r600 : CNDGT_Common<0x19>;
|
||||
def CNDGE_r600 : CNDGE_Common<0x1A>;
|
||||
def DOT4_r600 : DOT4_Common<0x50>;
|
||||
def CUBE_r600 : CUBE_Common<0x52>;
|
||||
defm CUBE_r600 : CUBE_Common<0x52>;
|
||||
def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
|
||||
def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
|
||||
def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
|
||||
|
|
@ -853,7 +864,7 @@ let Predicates = [isEGorCayman] in {
|
|||
def SIN_eg : SIN_Common<0x8D>;
|
||||
def COS_eg : COS_Common<0x8E>;
|
||||
def DOT4_eg : DOT4_Common<0xBE>;
|
||||
def CUBE_eg : CUBE_Common<0xC0>;
|
||||
defm CUBE_eg : CUBE_Common<0xC0>;
|
||||
|
||||
def DIV_eg : DIV_Common<RECIP_IEEE_eg>;
|
||||
def POW_eg : POW_Common<LOG_IEEE_eg, EXP_IEEE_eg, MUL, GPRF32>;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue