radeon/llvm: Create a register class for the M0 register

The Common Subexpression Elimination pass will not operate on
instructions with physical register defs, so we end up with
several redundant copies to M0 when using interpolation.

Adding a register class that only contains the M0 register allows
use to use a virtual register to represent M0, and makes it possible
for the Common Subexpression Elimination pass to remove the extra
copies.
This commit is contained in:
Tom Stellard 2012-08-29 10:33:58 -04:00
parent 733c28a0d9
commit 05113fd266
5 changed files with 24 additions and 16 deletions

View file

@ -171,6 +171,8 @@ def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>;
def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>;
def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>;
def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
STRING

View file

@ -127,7 +127,7 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
LowerSI_INTERP(MI, *BB, I, MRI);
break;
case AMDGPU::SI_INTERP_CONST:
LowerSI_INTERP_CONST(MI, *BB, I);
LowerSI_INTERP_CONST(MI, *BB, I, MRI);
break;
case AMDGPU::SI_KIL:
LowerSI_KIL(MI, *BB, I, MRI);
@ -150,6 +150,7 @@ void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
{
unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
MachineOperand dst = MI->getOperand(0);
MachineOperand iReg = MI->getOperand(1);
MachineOperand jReg = MI->getOperand(2);
@ -157,39 +158,44 @@ void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
MachineOperand attr = MI->getOperand(4);
MachineOperand params = MI->getOperand(5);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
.addOperand(params);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp)
.addOperand(iReg)
.addOperand(attr_chan)
.addOperand(attr);
.addOperand(attr)
.addReg(M0);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32))
.addOperand(dst)
.addReg(tmp)
.addOperand(jReg)
.addOperand(attr_chan)
.addOperand(attr);
.addOperand(attr)
.addReg(M0);
MI->eraseFromParent();
}
void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
MachineBasicBlock &BB, MachineBasicBlock::iterator I) const
MachineBasicBlock &BB, MachineBasicBlock::iterator I,
MachineRegisterInfo &MRI) const
{
MachineOperand dst = MI->getOperand(0);
MachineOperand attr_chan = MI->getOperand(1);
MachineOperand attr = MI->getOperand(2);
MachineOperand params = MI->getOperand(3);
unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
.addOperand(params);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32))
.addOperand(dst)
.addOperand(attr_chan)
.addOperand(attr);
.addOperand(attr)
.addReg(M0);
MI->eraseFromParent();
}

View file

@ -32,7 +32,7 @@ class SITargetLowering : public AMDGPUTargetLowering
void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I) const;
MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const;
void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,

View file

@ -363,7 +363,6 @@ class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{31-26} = 0x32; // encoding
let EncodingType = 11; // SIInstrEncodingType::VINTRP
let Uses = [M0];
let neverHasSideEffects = 1;
}

View file

@ -566,33 +566,34 @@ defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>;
def V_INTERP_P1_F32 : VINTRP <
0x00000000,
(outs VReg_32:$dst),
(ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr),
(ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
"V_INTERP_P1_F32",
[]
>;
[]> {
let DisableEncoding = "$m0";
}
def V_INTERP_P2_F32 : VINTRP <
0x00000001,
(outs VReg_32:$dst),
(ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr),
(ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
"V_INTERP_P2_F32",
[]> {
let Constraints = "$src0 = $dst";
let DisableEncoding = "$src0";
let DisableEncoding = "$src0,$m0";
}
def V_INTERP_MOV_F32 : VINTRP <
0x00000002,
(outs VReg_32:$dst),
(ins i32imm:$attr_chan, i32imm:$attr),
(ins i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
"V_INTERP_MOV_F32",
[]> {
let VSRC = 0;
let DisableEncoding = "$m0";
}
//def V_INTERP_MOV_F32 : VINTRP_32 <0x00000002, "V_INTERP_MOV_F32", []>;
//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>;
let isTerminator = 1 in {