mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 21:50:12 +01:00
radeon/llvm: Inital flow control support for SI
This adds basic flow control support for If-Then-Else blocks using predicates (stored in the EXEC register) and a predicate stack for nested flow control.
This commit is contained in:
parent
ef0d7e13d7
commit
bfd55711c1
7 changed files with 168 additions and 2 deletions
|
|
@ -25,6 +25,7 @@ FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
|
|||
|
||||
// SI Passes
|
||||
FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
|
||||
FunctionPass *createSILowerFlowControlPass(TargetMachine &tm);
|
||||
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
|
||||
FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
|
||||
|
||||
|
|
|
|||
|
|
@ -134,6 +134,7 @@ bool AMDGPUPassConfig::addPreEmitPass() {
|
|||
addPass(FinalizeMachineBundlesID);
|
||||
} else {
|
||||
PM->add(createSILowerLiteralConstantsPass(*TM));
|
||||
PM->add(createSILowerFlowControlPass(*TM));
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -2892,6 +2892,8 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
|
|||
switch (instr->getOpcode()) {
|
||||
case AMDGPU::JUMP:
|
||||
return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
|
||||
case AMDGPU::BRANCH:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -211,7 +211,7 @@ include "AMDILIntrinsics.td"
|
|||
// Custom Inserter for Branches and returns, this eventually will be a
|
||||
// seperate pass
|
||||
//===---------------------------------------------------------------------===//
|
||||
let isTerminator = 1, usesCustomInserter = 1 in {
|
||||
let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
|
||||
def BRANCH : ILFormat<(outs), (ins brtarget:$target),
|
||||
"; Pseudo unconditional branch instruction",
|
||||
[(br bb:$target)]>;
|
||||
|
|
|
|||
|
|
@ -71,6 +71,7 @@ CPP_SOURCES := \
|
|||
SIInstrInfo.cpp \
|
||||
SIISelLowering.cpp \
|
||||
SILowerLiteralConstants.cpp \
|
||||
SILowerFlowControl.cpp \
|
||||
SIMachineFunctionInfo.cpp \
|
||||
SIRegisterInfo.cpp \
|
||||
InstPrinter/AMDGPUInstPrinter.cpp \
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
|||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
||||
|
||||
case AMDGPU::BRANCH: return BB;
|
||||
case AMDGPU::CLAMP_SI:
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
|
||||
.addOperand(MI->getOperand(0))
|
||||
|
|
|
|||
161
src/gallium/drivers/radeon/SILowerFlowControl.cpp
Normal file
161
src/gallium/drivers/radeon/SILowerFlowControl.cpp
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
//===-- SILowerFlowControl.cpp - Use predicates for flow control ----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass lowers the pseudo flow control instructions (SI_IF_NZ, ELSE, ENDIF)
|
||||
// to predicated instructions.
|
||||
//
|
||||
// All flow control (except loops) is handled using predicated instructions and
|
||||
// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
|
||||
// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
|
||||
// by writting to the 64-bit EXEC register (each bit corresponds to a
|
||||
// single vector ALU). Typically, for predicates, a vector ALU will write
|
||||
// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
|
||||
// Vector ALU) and then the ScalarALU will AND the VCC register with the
|
||||
// EXEC to update the predicates.
|
||||
//
|
||||
// For example:
|
||||
// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
|
||||
// SI_IF_NZ %VCC
|
||||
// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
|
||||
// ELSE
|
||||
// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
|
||||
// ENDIF
|
||||
//
|
||||
// becomes:
|
||||
//
|
||||
// %SGPR0 = S_MOV_B64 %EXEC // Save the current exec mask
|
||||
// %EXEC = S_AND_B64 %VCC, %EXEC // Update the exec mask
|
||||
// S_CBRANCH_EXECZ label0 // This instruction is an
|
||||
// // optimization which allows us to
|
||||
// // branch if all the bits of
|
||||
// // EXEC are zero.
|
||||
// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
|
||||
//
|
||||
// label0:
|
||||
// %EXEC = S_NOT_B64 %EXEC // Invert the exec mask for the
|
||||
// // Then block.
|
||||
// %EXEC = S_AND_B64 %SGPR0, %EXEC
|
||||
// S_BRANCH_EXECZ label1 // Use our branch optimization
|
||||
// // instruction again.
|
||||
// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block
|
||||
// label1:
|
||||
// S_MOV_B64 // Restore the old EXEC value
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class SILowerFlowControlPass : public MachineFunctionPass {
|
||||
|
||||
private:
|
||||
static char ID;
|
||||
const TargetInstrInfo *TII;
|
||||
std::vector<unsigned> PredicateStack;
|
||||
std::vector<unsigned> UnusedRegisters;
|
||||
|
||||
void pushExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
|
||||
void popExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
|
||||
|
||||
public:
|
||||
SILowerFlowControlPass(TargetMachine &tm) :
|
||||
MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
const char *getPassName() const {
|
||||
return "SI Lower flow control instructions";
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
char SILowerFlowControlPass::ID = 0;
|
||||
|
||||
FunctionPass *llvm::createSILowerFlowControlPass(TargetMachine &tm) {
|
||||
return new SILowerFlowControlPass(tm);
|
||||
}
|
||||
|
||||
bool SILowerFlowControlPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
// Find all the unused registers that can be used for the predicate stack.
|
||||
for (TargetRegisterClass::iterator S = AMDGPU::SReg_64RegClass.begin(),
|
||||
I = AMDGPU::SReg_64RegClass.end();
|
||||
I != S; --I) {
|
||||
unsigned Reg = *I;
|
||||
if (!MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
|
||||
UnusedRegisters.push_back(Reg);
|
||||
}
|
||||
}
|
||||
|
||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||
BB != BB_E; ++BB) {
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
|
||||
I != MBB.end(); I = Next, Next = llvm::next(I)) {
|
||||
MachineInstr &MI = *I;
|
||||
switch (MI.getOpcode()) {
|
||||
default: break;
|
||||
case AMDGPU::SI_IF_NZ:
|
||||
pushExecMask(MBB, I);
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64),
|
||||
AMDGPU::EXEC)
|
||||
.addOperand(MI.getOperand(0)) // VCC
|
||||
.addReg(AMDGPU::EXEC);
|
||||
MI.eraseFromParent();
|
||||
break;
|
||||
case AMDGPU::ELSE:
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_NOT_B64),
|
||||
AMDGPU::EXEC)
|
||||
.addReg(AMDGPU::EXEC);
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64),
|
||||
AMDGPU::EXEC)
|
||||
.addReg(PredicateStack.back())
|
||||
.addReg(AMDGPU::EXEC);
|
||||
MI.eraseFromParent();
|
||||
break;
|
||||
case AMDGPU::ENDIF:
|
||||
popExecMask(MBB, I);
|
||||
MI.eraseFromParent();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void SILowerFlowControlPass::pushExecMask(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) {
|
||||
|
||||
assert(!UnusedRegisters.empty() && "Ran out of registers for predicate stack");
|
||||
unsigned StackReg = UnusedRegisters.back();
|
||||
UnusedRegisters.pop_back();
|
||||
PredicateStack.push_back(StackReg);
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
|
||||
StackReg)
|
||||
.addReg(AMDGPU::EXEC);
|
||||
}
|
||||
|
||||
void SILowerFlowControlPass::popExecMask(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) {
|
||||
unsigned StackReg = PredicateStack.back();
|
||||
PredicateStack.pop_back();
|
||||
UnusedRegisters.push_back(StackReg);
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
|
||||
AMDGPU::EXEC)
|
||||
.addReg(StackReg);
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue