mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-03 03:28:09 +02:00
radeon/llvm: Rework how immediate operands are handled with SI
Immediate operands were previously handled in the CodeEmitter, but that code was buggy and very confusing. This commit adds a pass that simplifies the handling of immediate operands by spliting the loading of the immediate into a sperate insruction that is bundled with the original.
This commit is contained in:
parent
1cee70c5d8
commit
022f6d8861
10 changed files with 150 additions and 44 deletions
|
|
@ -26,6 +26,7 @@ FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
|
|||
// SI Passes
|
||||
FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
|
||||
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
|
||||
FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
|
||||
|
||||
// Passes common to R600 and SI
|
||||
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
|
||||
|
|
|
|||
|
|
@ -81,9 +81,13 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
|
|||
VCCUsed = true;
|
||||
continue;
|
||||
}
|
||||
if (reg == AMDGPU::EXEC) {
|
||||
switch (reg) {
|
||||
default: break;
|
||||
case AMDGPU::EXEC:
|
||||
case AMDGPU::SI_LITERAL_CONSTANT:
|
||||
continue;
|
||||
}
|
||||
|
||||
if (AMDGPU::SReg_32RegClass.contains(reg)) {
|
||||
isSGPR = true;
|
||||
width = 1;
|
||||
|
|
|
|||
|
|
@ -137,6 +137,8 @@ bool AMDGPUPassConfig::addPreEmitPass() {
|
|||
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
||||
PM->add(createR600ExpandSpecialInstrsPass(*TM));
|
||||
addPass(FinalizeMachineBundlesID);
|
||||
} else {
|
||||
PM->add(createSILowerLiteralConstantsPass(*TM));
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -22,11 +22,9 @@
|
|||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
#define LITERAL_REG 255
|
||||
#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
|
||||
#define SI_INSTR_FLAGS_ENCODING_MASK 0xf
|
||||
|
||||
|
||||
// These must be kept in sync with SIInstructions.td and also the
|
||||
// InstrEncodingInfo array in SIInstrInfo.cpp.
|
||||
//
|
||||
|
|
@ -91,11 +89,6 @@ public:
|
|||
virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
|
||||
SmallVectorImpl<MCFixup> &Fixup) const;
|
||||
|
||||
/// i32LiteralEncode - Encode an i32 literal this is used as an operand
|
||||
/// for an instruction in place of a register.
|
||||
virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo,
|
||||
SmallVectorImpl<MCFixup> &Fixup) const;
|
||||
|
||||
/// SMRDmemriEncode - Encoding for SMRD indexed loads
|
||||
virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
|
||||
SmallVectorImpl<MCFixup> &Fixup) const;
|
||||
|
|
@ -147,7 +140,12 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
|
|||
} else if (MO.isFPImm()) {
|
||||
// XXX: Not all instructions can use inline literals
|
||||
// XXX: We should make sure this is a 32-bit constant
|
||||
return LITERAL_REG;
|
||||
union {
|
||||
float F;
|
||||
uint32_t I;
|
||||
} Imm;
|
||||
Imm.F = MO.getFPImm();
|
||||
return Imm.I;
|
||||
} else{
|
||||
llvm_unreachable("Encoding of this operand type is not supported yet.");
|
||||
}
|
||||
|
|
@ -176,12 +174,6 @@ unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI,
|
|||
return GPRAlign(MI, OpNo, 2);
|
||||
}
|
||||
|
||||
uint64_t SIMCCodeEmitter::i32LiteralEncode(const MCInst &MI,
|
||||
unsigned OpNo,
|
||||
SmallVectorImpl<MCFixup> &Fixup) const {
|
||||
return LITERAL_REG | (MI.getOperand(OpNo).getImm() << 32);
|
||||
}
|
||||
|
||||
#define SMRD_OFFSET_MASK 0xff
|
||||
#define SMRD_IMM_SHIFT 8
|
||||
#define SMRD_SBASE_MASK 0x3f
|
||||
|
|
@ -262,17 +254,13 @@ unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const {
|
|||
|
||||
unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const {
|
||||
|
||||
// Instructions with literal constants are expanded to 64-bits, and
|
||||
// the constant is stored in bits [63:32]
|
||||
for (unsigned i = 0; i < MI.getNumOperands(); i++) {
|
||||
if (MI.getOperand(i).isFPImm()) {
|
||||
return 8;
|
||||
}
|
||||
}
|
||||
|
||||
// This instruction always has a literal
|
||||
if (MI.getOpcode() == AMDGPU::S_MOV_IMM_I32) {
|
||||
return 8;
|
||||
// These instructions aren't real instructions with an encoding type, so
|
||||
// we need to manually specify their size.
|
||||
switch (MI.getOpcode()) {
|
||||
default: break;
|
||||
case AMDGPU::SI_LOAD_LITERAL_I32:
|
||||
case AMDGPU::SI_LOAD_LITERAL_F32:
|
||||
return 4;
|
||||
}
|
||||
|
||||
unsigned encoding_type = getEncodingType(MI);
|
||||
|
|
@ -294,6 +282,7 @@ unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const {
|
|||
switch (reg) {
|
||||
case AMDGPU::M0: return 124;
|
||||
case AMDGPU::SREG_LIT_0: return 128;
|
||||
case AMDGPU::SI_LITERAL_CONSTANT: return 255;
|
||||
default: return getHWRegNum(reg);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ CPP_SOURCES := \
|
|||
SIAssignInterpRegs.cpp \
|
||||
SIInstrInfo.cpp \
|
||||
SIISelLowering.cpp \
|
||||
SILowerLiteralConstants.cpp \
|
||||
SIMachineFunctionInfo.cpp \
|
||||
SIRegisterInfo.cpp \
|
||||
InstPrinter/AMDGPUInstPrinter.cpp \
|
||||
|
|
|
|||
|
|
@ -91,6 +91,7 @@ def VCC : SIReg<"VCC">;
|
|||
def EXEC : SIReg<"EXEC">;
|
||||
def SCC : SIReg<"SCC">;
|
||||
def SREG_LIT_0 : SIReg <"S LIT 0">;
|
||||
def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT">;
|
||||
|
||||
def M0 : SIReg <"M0">;
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,8 @@ class SITargetLowering : public AMDGPUTargetLowering
|
|||
/// write.
|
||||
void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I) const;
|
||||
void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, unsigned Opocde) const;
|
||||
void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
|
||||
void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
|
|
|
|||
|
|
@ -86,16 +86,6 @@ class GPR2Align <RegisterClass rc, ValueType vt> : Operand <vt> {
|
|||
let MIOperandInfo = (ops rc:$reg);
|
||||
}
|
||||
|
||||
def i32Literal : Operand <i32> {
|
||||
let EncoderMethod = "i32LiteralEncode";
|
||||
}
|
||||
|
||||
// i64Literal uses the same encoder method as i32 literal, because an
|
||||
// i64Literal is really a i32 literal with the top 32-bits all set to zero.
|
||||
def i64Literal : Operand <i64> {
|
||||
let EncoderMethod = "i32LiteralEncode";
|
||||
}
|
||||
|
||||
def SMRDmemrr : Operand<iPTR> {
|
||||
let MIOperandInfo = (ops SReg_64, SReg_32);
|
||||
let EncoderMethod = "GPR2AlignEncode";
|
||||
|
|
|
|||
|
|
@ -887,21 +887,21 @@ def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
|
|||
//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
|
||||
def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
|
||||
|
||||
class V_MOV_IMM <Operand immType, SDNode immNode> : VOP1 <
|
||||
0x1,
|
||||
class V_MOV_IMM <Operand immType, SDNode immNode> : InstSI <
|
||||
(outs VReg_32:$dst),
|
||||
(ins immType:$src0),
|
||||
"V_MOV_IMM",
|
||||
[(set VReg_32:$dst, (immNode:$src0))]
|
||||
>;
|
||||
|
||||
let isCodeGenOnly = 1, isPseudo = 1 in {
|
||||
|
||||
def V_MOV_IMM_I32 : V_MOV_IMM<i32imm, imm>;
|
||||
def V_MOV_IMM_F32 : V_MOV_IMM<f32imm, fpimm>;
|
||||
|
||||
def S_MOV_IMM_I32 : SOP1 <
|
||||
0x3,
|
||||
def S_MOV_IMM_I32 : InstSI <
|
||||
(outs SReg_32:$dst),
|
||||
(ins i32Literal:$src0),
|
||||
(ins i32imm:$src0),
|
||||
"S_MOV_IMM_I32",
|
||||
[(set SReg_32:$dst, (imm:$src0))]
|
||||
>;
|
||||
|
|
@ -910,14 +910,25 @@ def S_MOV_IMM_I32 : SOP1 <
|
|||
// type for indices on load and store instructions. The pattern for
|
||||
// S_MOV_IMM_I64 will only match i64 immediates that can fit into 32-bits,
|
||||
// which the hardware can handle.
|
||||
def S_MOV_IMM_I64 : SOP1 <
|
||||
0x3,
|
||||
def S_MOV_IMM_I64 : InstSI <
|
||||
(outs SReg_64:$dst),
|
||||
(ins i64Literal:$src0),
|
||||
(ins i64imm:$src0),
|
||||
"S_MOV_IMM_I64 $dst, $src0",
|
||||
[(set SReg_64:$dst, (IMM32bitIn64bit:$src0))]
|
||||
>;
|
||||
|
||||
} // End isCodeGenOnly, isPseudo = 1
|
||||
|
||||
class SI_LOAD_LITERAL<Operand ImmType> :
|
||||
Enc32 <(outs), (ins ImmType:$imm), "LOAD_LITERAL $imm", []> {
|
||||
|
||||
bits<32> imm;
|
||||
let Inst{31-0} = imm;
|
||||
}
|
||||
|
||||
def SI_LOAD_LITERAL_I32 : SI_LOAD_LITERAL<i32imm>;
|
||||
def SI_LOAD_LITERAL_F32 : SI_LOAD_LITERAL<f32imm>;
|
||||
|
||||
let isCodeGenOnly = 1, isPseudo = 1 in {
|
||||
|
||||
def SET_M0 : InstSI <
|
||||
|
|
|
|||
105
src/gallium/drivers/radeon/SILowerLiteralConstants.cpp
Normal file
105
src/gallium/drivers/radeon/SILowerLiteralConstants.cpp
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This pass performs the following transformation on instructions with
|
||||
// literal constants:
|
||||
//
|
||||
// %VGPR0 = V_MOV_IMM_I32 1
|
||||
//
|
||||
// becomes:
|
||||
//
|
||||
// BUNDLE
|
||||
// * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT
|
||||
// * SI_LOAD_LITERAL 1
|
||||
//
|
||||
// The resulting sequence matches exactly how the hardware handles immediate
|
||||
// operands, so this transformation greatly simplifies the code generator.
|
||||
//
|
||||
// Only the *_MOV_IMM_* support immediate operands at the moment, but when
|
||||
// support for immediate operands is added to other instructions, they
|
||||
// will be lowered here as well.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineInstrBundle.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class SILowerLiteralConstantsPass : public MachineFunctionPass {
|
||||
|
||||
private:
|
||||
static char ID;
|
||||
const TargetInstrInfo *TII;
|
||||
|
||||
public:
|
||||
SILowerLiteralConstantsPass(TargetMachine &tm) :
|
||||
MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
const char *getPassName() const {
|
||||
return "SI Lower literal constants pass";
|
||||
}
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
char SILowerLiteralConstantsPass::ID = 0;
|
||||
|
||||
FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) {
|
||||
return new SILowerLiteralConstantsPass(tm);
|
||||
}
|
||||
|
||||
bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||
BB != BB_E; ++BB) {
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
|
||||
I != MBB.end(); I = Next, Next = llvm::next(I)) {
|
||||
MachineInstr &MI = *I;
|
||||
switch (MI.getOpcode()) {
|
||||
default: break;
|
||||
case AMDGPU::S_MOV_IMM_I32:
|
||||
case AMDGPU::S_MOV_IMM_I64:
|
||||
case AMDGPU::V_MOV_IMM_F32:
|
||||
case AMDGPU::V_MOV_IMM_I32: {
|
||||
unsigned MovOpcode;
|
||||
unsigned LoadLiteralOpcode;
|
||||
MachineOperand LiteralOp = MI.getOperand(1);
|
||||
if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) {
|
||||
MovOpcode = AMDGPU::V_MOV_B32_e32;
|
||||
} else {
|
||||
MovOpcode = AMDGPU::S_MOV_B32;
|
||||
}
|
||||
if (LiteralOp.isImm()) {
|
||||
LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32;
|
||||
} else {
|
||||
LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32;
|
||||
}
|
||||
MachineInstr *First =
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MovOpcode),
|
||||
MI.getOperand(0).getReg())
|
||||
.addReg(AMDGPU::SI_LITERAL_CONSTANT);
|
||||
MachineInstr *Last =
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(LoadLiteralOpcode))
|
||||
.addOperand(MI.getOperand(1));
|
||||
Last->setIsInsideBundle();
|
||||
llvm::finalizeBundle(MBB, First, Last);
|
||||
MI.eraseFromParent();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue