mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 20:18:12 +02:00
radeonsi: Handle TGSI CONST registers
We now emit LLVM load instructions for TGSI CONST register reads, which are lowered in the backend to S_LOAD_DWORD* instructions.
This commit is contained in:
parent
32b83e0366
commit
467f51613e
12 changed files with 254 additions and 100 deletions
|
|
@ -37,6 +37,10 @@ namespace llvm {
|
|||
unsigned OpNo) const {
|
||||
return 0;
|
||||
}
|
||||
virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
|
||||
const {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
|
|
|||
|
|
@ -67,6 +67,9 @@ private:
|
|||
|
||||
SDNode *xformAtomicInst(SDNode *N);
|
||||
|
||||
bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
|
||||
bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
|
||||
|
||||
// Include the pieces autogenerated from the target description.
|
||||
#include "AMDILGenDAGISel.inc"
|
||||
};
|
||||
|
|
@ -513,3 +516,56 @@ AMDILDAGToDAGISel::xformAtomicInst(SDNode *N)
|
|||
#undef INT64_C
|
||||
#endif
|
||||
#undef DEBUGTMP
|
||||
|
||||
///==== AMDGPU Functions ====///
|
||||
|
||||
bool AMDILDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
|
||||
SDValue& Offset) {
|
||||
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
||||
Addr.getOpcode() == ISD::TargetGlobalAddress) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if (Addr.getOpcode() == ISD::ADD) {
|
||||
bool Match = false;
|
||||
|
||||
// Find the base ptr and the offset
|
||||
for (unsigned i = 0; i < Addr.getNumOperands(); i++) {
|
||||
SDValue Arg = Addr.getOperand(i);
|
||||
ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg);
|
||||
// This arg isn't a constant so it must be the base PTR.
|
||||
if (!OffsetNode) {
|
||||
Base = Addr.getOperand(i);
|
||||
continue;
|
||||
}
|
||||
// Check if the constant argument fits in 8-bits. The offset is in bytes
|
||||
// so we need to convert it to dwords.
|
||||
if (isInt<8>(OffsetNode->getZExtValue() >> 2)) {
|
||||
Match = true;
|
||||
Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2,
|
||||
MVT::i32);
|
||||
}
|
||||
}
|
||||
return Match;
|
||||
}
|
||||
|
||||
// Default case, no offset
|
||||
Base = Addr;
|
||||
Offset = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDILDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
|
||||
SDValue& Offset) {
|
||||
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
||||
Addr.getOpcode() == ISD::TargetGlobalAddress ||
|
||||
Addr.getOpcode() != ISD::ADD) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Base = Addr.getOperand(0);
|
||||
Offset = Addr.getOperand(1);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -796,7 +796,6 @@ AMDILTargetLowering::convertToReg(MachineOperand op) const
|
|||
setPrefLoopAlignment(16);
|
||||
setSelectIsExpensive(true);
|
||||
setJumpIsExpensive(true);
|
||||
computeRegisterProperties();
|
||||
|
||||
maxStoresPerMemcpy = 4096;
|
||||
maxStoresPerMemmove = 4096;
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
|
|||
addRegisterClass(MVT::f32, &AMDIL::R600_Reg32RegClass);
|
||||
addRegisterClass(MVT::v4i32, &AMDIL::R600_Reg128RegClass);
|
||||
addRegisterClass(MVT::i32, &AMDIL::R600_Reg32RegClass);
|
||||
computeRegisterProperties();
|
||||
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
|
||||
|
|
|
|||
|
|
@ -65,6 +65,9 @@ namespace {
|
|||
/// for an instruction in place of a register.
|
||||
virtual uint64_t i32LiteralEncode(const MachineInstr &MI, unsigned OpNo)
|
||||
const;
|
||||
/// SMRDmemriEncode - Encoding for SMRD indexed loads
|
||||
virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
|
||||
const;
|
||||
|
||||
/// VOPPostEncode - Post-Encoder method for VOP instructions
|
||||
virtual uint64_t VOPPostEncode(const MachineInstr &MI,
|
||||
|
|
@ -238,6 +241,37 @@ uint64_t SICodeEmitter::i32LiteralEncode(const MachineInstr &MI,
|
|||
return LITERAL_REG | (MI.getOperand(OpNo).getImm() << 32);
|
||||
}
|
||||
|
||||
#define SMRD_OFFSET_MASK 0xff
|
||||
#define SMRD_IMM_SHIFT 8
|
||||
#define SMRD_SBASE_MASK 0x3f
|
||||
#define SMRD_SBASE_SHIFT 9
|
||||
/// SMRDmemriEncode - This function is responsibe for encoding the offset
|
||||
/// and the base ptr for SMRD instructions it should return a bit string in
|
||||
/// this format:
|
||||
///
|
||||
/// OFFSET = bits{7-0}
|
||||
/// IMM = bits{8}
|
||||
/// SBASE = bits{14-9}
|
||||
///
|
||||
uint32_t SICodeEmitter::SMRDmemriEncode(const MachineInstr &MI,
|
||||
unsigned OpNo) const
|
||||
{
|
||||
uint32_t encoding;
|
||||
|
||||
const MachineOperand &OffsetOp = MI.getOperand(OpNo + 1);
|
||||
|
||||
//XXX: Use this function for SMRD loads with register offsets
|
||||
assert(OffsetOp.isImm());
|
||||
|
||||
encoding =
|
||||
(getMachineOpValue(MI, OffsetOp) & SMRD_OFFSET_MASK)
|
||||
| (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit
|
||||
| ((GPR2AlignEncode(MI, OpNo) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT)
|
||||
;
|
||||
|
||||
return encoding;
|
||||
}
|
||||
|
||||
/// Set the "VGPR" bit for VOP args that can take either a VGPR or a SGPR.
|
||||
/// XXX: It would be nice if we could handle this without a PostEncode function.
|
||||
uint64_t SICodeEmitter::VOPPostEncode(const MachineInstr &MI,
|
||||
|
|
|
|||
|
|
@ -182,8 +182,8 @@ my @subregs_64 = ('low', 'high');
|
|||
my @subregs_128 = ('sel_x', 'sel_y', 'sel_z', 'sel_w');
|
||||
my @subregs_256 = ('sub0', 'sub1', 'sub2', 'sub3', 'sub4', 'sub5', 'sub6', 'sub7');
|
||||
|
||||
my @SGPR64 = print_sgpr_class(64, \@subregs_64, ('i64', 'iPTRAny'));
|
||||
my @SGPR128 = print_sgpr_class(128, \@subregs_128, ('v4f32'));
|
||||
my @SGPR64 = print_sgpr_class(64, \@subregs_64, ('i64'));
|
||||
my @SGPR128 = print_sgpr_class(128, \@subregs_128, ('v4f32', 'v4i32'));
|
||||
my @SGPR256 = print_sgpr_class(256, \@subregs_256, ('v8i32'));
|
||||
|
||||
my @VGPR64 = print_vgpr_class(64, \@subregs_64, ('i64'));
|
||||
|
|
|
|||
|
|
@ -25,9 +25,20 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
|||
{
|
||||
addRegisterClass(MVT::v4f32, &AMDIL::VReg_128RegClass);
|
||||
addRegisterClass(MVT::f32, &AMDIL::VReg_32RegClass);
|
||||
addRegisterClass(MVT::i32, &AMDIL::VReg_32RegClass);
|
||||
addRegisterClass(MVT::i64, &AMDIL::VReg_64RegClass);
|
||||
|
||||
addRegisterClass(MVT::v4i32, &AMDIL::SReg_128RegClass);
|
||||
addRegisterClass(MVT::v8i32, &AMDIL::SReg_256RegClass);
|
||||
|
||||
computeRegisterProperties();
|
||||
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
|
||||
|
||||
setOperationAction(ISD::ADD, MVT::i64, Legal);
|
||||
setOperationAction(ISD::ADD, MVT::i32, Legal);
|
||||
|
||||
}
|
||||
|
||||
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
||||
|
|
|
|||
|
|
@ -32,6 +32,21 @@ class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
|
|||
field bits<64> Inst;
|
||||
}
|
||||
|
||||
class SIOperand <ValueType vt, dag opInfo>: Operand <vt> {
|
||||
let EncoderMethod = "encodeOperand";
|
||||
let MIOperandInfo = opInfo;
|
||||
}
|
||||
|
||||
def IMM8bit : ImmLeaf <
|
||||
i32,
|
||||
[{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}]
|
||||
>;
|
||||
|
||||
def IMM12bit : ImmLeaf <
|
||||
i16,
|
||||
[{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}]
|
||||
>;
|
||||
|
||||
class GPR4Align <RegisterClass rc> : Operand <vAny> {
|
||||
let EncoderMethod = "GPR4AlignEncode";
|
||||
let MIOperandInfo = (ops rc:$reg);
|
||||
|
|
@ -46,6 +61,19 @@ def i32Literal : Operand <i32> {
|
|||
let EncoderMethod = "i32LiteralEncode";
|
||||
}
|
||||
|
||||
def SMRDmemrr : Operand<iPTR> {
|
||||
let MIOperandInfo = (ops SReg_64, SReg_32);
|
||||
let EncoderMethod = "GPR2AlignEncode";
|
||||
}
|
||||
|
||||
def SMRDmemri : Operand<iPTR> {
|
||||
let MIOperandInfo = (ops SReg_64, i32imm);
|
||||
let EncoderMethod = "SMRDmemriEncode";
|
||||
}
|
||||
|
||||
def ADDR_Reg : ComplexPattern<i64, 2, "SelectADDRReg", [], []>;
|
||||
def ADDR_Offset8 : ComplexPattern<i64, 2, "SelectADDR8BitOffset", [], []>;
|
||||
|
||||
def EXP : Enc64<
|
||||
(outs),
|
||||
(ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
|
||||
|
|
@ -196,9 +224,10 @@ class SMRD <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
|||
Enc32<outs, ins, asm, pattern> {
|
||||
|
||||
bits<7> SDST;
|
||||
bits<8> OFFSET;
|
||||
bits<6> SBASE;
|
||||
bits<1> IMM = 0; // Determined by subclasses
|
||||
bits<15> PTR;
|
||||
bits<8> OFFSET = PTR{7-0};
|
||||
bits<1> IMM = PTR{8};
|
||||
bits<6> SBASE = PTR{14-9};
|
||||
|
||||
let Inst{7-0} = OFFSET;
|
||||
let Inst{8} = IMM;
|
||||
|
|
@ -420,14 +449,15 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
|
|||
}
|
||||
|
||||
/*XXX: We should be able to infer the imm bit based on the arg types */
|
||||
multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass> {
|
||||
multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass,
|
||||
ValueType vt> {
|
||||
|
||||
def _SGPR : SMRD <
|
||||
op,
|
||||
(outs dstClass:$dst),
|
||||
(ins SReg_32:$offset, GPR2Align<SReg_64,i64>:$sbase),
|
||||
(ins SMRDmemrr:$src0),
|
||||
asm,
|
||||
[]
|
||||
[(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))]
|
||||
> {
|
||||
let IMM = 0;
|
||||
}
|
||||
|
|
@ -435,29 +465,13 @@ multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass> {
|
|||
def _IMM : SMRD <
|
||||
op,
|
||||
(outs dstClass:$dst),
|
||||
(ins i32imm:$offset, GPR2Align<SReg_64,i64>:$sbase),
|
||||
(ins SMRDmemri:$src0),
|
||||
asm,
|
||||
[]
|
||||
[(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))]
|
||||
> {
|
||||
let IMM = 1;
|
||||
}
|
||||
}
|
||||
|
||||
class SIOperand <ValueType vt, dag opInfo>: Operand <vt> {
|
||||
let EncoderMethod = "encodeOperand";
|
||||
let MIOperandInfo = opInfo;
|
||||
}
|
||||
|
||||
def IMM8bit : ImmLeaf <
|
||||
i32,
|
||||
[{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}]
|
||||
>;
|
||||
|
||||
def IMM12bit : ImmLeaf <
|
||||
i16,
|
||||
[{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}]
|
||||
>;
|
||||
|
||||
include "SIInstrFormats.td"
|
||||
|
||||
include "SIInstructions.td"
|
||||
|
|
|
|||
|
|
@ -346,12 +346,10 @@ def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORM
|
|||
//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>;
|
||||
//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>;
|
||||
|
||||
let mayLoad = 0, neverHasSideEffects = 1 in {
|
||||
|
||||
defm S_LOAD_DWORD : SMRD_Helper <0x00000000, "S_LOAD_DWORD", SReg_32>;
|
||||
defm S_LOAD_DWORD : SMRD_Helper <0x00000000, "S_LOAD_DWORD", SReg_32, f32>;
|
||||
//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>;
|
||||
defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128>;
|
||||
defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256>;
|
||||
defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128, v4i32>;
|
||||
defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256, v8i32>;
|
||||
//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>;
|
||||
//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>;
|
||||
//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>;
|
||||
|
|
@ -359,8 +357,6 @@ defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256>;
|
|||
//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>;
|
||||
//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>;
|
||||
|
||||
} // End mayLoad, neverHasSideEffects
|
||||
|
||||
//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
|
||||
//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
|
||||
//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>;
|
||||
|
|
@ -866,29 +862,25 @@ def VS_LOAD_BUFFER_INDEX : InstSI <
|
|||
|
||||
/* int_SI_vs_load_input */
|
||||
def : Pat<
|
||||
(int_SI_vs_load_input SReg_64:$tlst_sgpr, IMM8bit:$t_offset, IMM12bit:$attr_offset,
|
||||
(int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
|
||||
VReg_32:$buf_idx_vgpr),
|
||||
(BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
|
||||
VReg_32:$buf_idx_vgpr,
|
||||
(S_LOAD_DWORDX4_IMM imm:$t_offset, SReg_64:$tlst_sgpr),
|
||||
0, 0, (i32 SREG_LIT_0))
|
||||
VReg_32:$buf_idx_vgpr, SReg_128:$tlst,
|
||||
0, 0, (i32 SREG_LIT_0))
|
||||
>;
|
||||
|
||||
/* int_SI_load_const */
|
||||
|
||||
def : Pat <
|
||||
(int_SI_load_const SReg_64:$const_ptr, IMM8bit:$offset),
|
||||
(S_LOAD_DWORD_IMM imm:$offset, SReg_64:$const_ptr)
|
||||
def : Pat<
|
||||
(int_SI_use_sgprptrcf32 imm:$src0),
|
||||
(USE_SGPR_64 imm:$src0)
|
||||
>;
|
||||
|
||||
|
||||
/* XXX: Complete this pattern with some form of a scalar move immediate */
|
||||
/*
|
||||
def : Pat <
|
||||
(int_SI_load_const SReg_64:$const_ptr, imm:$offset),
|
||||
(S_LOAD_DWORD_SGPR imm:$offset, SReg_64:$const_ptr)
|
||||
def : Pat<
|
||||
(int_SI_use_sgprptrci128 imm:$src0),
|
||||
(USE_SGPR_64 imm:$src0)
|
||||
>;
|
||||
def : Pat<
|
||||
(int_SI_use_sgprptrci256 imm:$src0),
|
||||
(USE_SGPR_64 imm:$src0)
|
||||
>;
|
||||
*/
|
||||
|
||||
/* int_SI_export */
|
||||
def : Pat <
|
||||
|
|
@ -900,11 +892,9 @@ def : Pat <
|
|||
|
||||
/* int_SI_sample */
|
||||
def : Pat <
|
||||
(int_SI_sample imm:$writemask, VReg_128:$coord, SReg_64:$rsrc, imm:$rsrc_offset,
|
||||
SReg_64:$sampler, imm:$sampler_offset),
|
||||
(int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler),
|
||||
(IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord,
|
||||
(S_LOAD_DWORDX8_IMM imm:$rsrc_offset, SReg_64:$rsrc), /* Resource */
|
||||
(S_LOAD_DWORDX4_IMM imm:$sampler_offset, SReg_64:$sampler)) /* Sampler */
|
||||
SReg_256:$rsrc, SReg_128:$sampler)
|
||||
>;
|
||||
|
||||
def CLAMP_SI : CLAMP<VReg_32>;
|
||||
|
|
|
|||
|
|
@ -18,11 +18,14 @@ let TargetPrefix = "SI", isTarget = 1 in {
|
|||
/* XXX: We may need a seperate intrinsic here for loading integer values */
|
||||
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
|
||||
def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], []>;
|
||||
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i32_ty], []> ;
|
||||
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], []> ;
|
||||
|
||||
def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_ptr_ty, llvm_i32_ty]>;
|
||||
def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty]>;
|
||||
def int_SI_use_sgpr : Intrinsic <[llvm_anyint_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
class int_SI_use_sgprptr : Intrinsic <[llvm_anyptr_ty], [llvm_i32_ty], []>;
|
||||
def int_SI_use_sgprptrcf32 : int_SI_use_sgprptr;
|
||||
def int_SI_use_sgprptrci128 : int_SI_use_sgprptr;
|
||||
def int_SI_use_sgprptrci256 : int_SI_use_sgprptr;
|
||||
|
||||
/* Interpolation Intrinsics */
|
||||
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ bool SIPropagateImmReadsPass::runOnMachineFunction(MachineFunction &MF)
|
|||
switch (MI.getOpcode()) {
|
||||
case AMDIL::LOADCONST_f32:
|
||||
case AMDIL::LOADCONST_i32:
|
||||
case AMDIL::LOADCONST_i64:
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -66,54 +66,85 @@ static struct si_shader_context * si_shader_context(
|
|||
#define CENTROID_OFSET 4
|
||||
|
||||
#define USE_SGPR_MAX_SUFFIX_LEN 5
|
||||
#define CONST_ADDR_SPACE 2
|
||||
|
||||
enum sgpr_type {
|
||||
SGPR_CONST_PTR_F32,
|
||||
SGPR_CONST_PTR_V4I32,
|
||||
SGPR_CONST_PTR_V8I32,
|
||||
SGPR_I32,
|
||||
SGPR_I64,
|
||||
SGPR_PTR_V4I32,
|
||||
SGPR_PTR_V8I32
|
||||
SGPR_I64
|
||||
};
|
||||
|
||||
/**
|
||||
* Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad
|
||||
*
|
||||
* @param offset The offset parameter specifies the number of
|
||||
* elements to offset, not the number of bytes or dwords. An element is the
|
||||
* the type pointed to by the base_ptr parameter (e.g. int is the element of
|
||||
* an int* pointer)
|
||||
*
|
||||
* When LLVM lowers the load instruction, it will convert the element offset
|
||||
* into a dword offset automatically.
|
||||
*
|
||||
*/
|
||||
static LLVMValueRef build_indexed_load(
|
||||
struct gallivm_state * gallivm,
|
||||
LLVMValueRef base_ptr,
|
||||
LLVMValueRef offset)
|
||||
{
|
||||
LLVMValueRef computed_ptr = LLVMBuildGEP(
|
||||
gallivm->builder, base_ptr, &offset, 1, "");
|
||||
|
||||
return LLVMBuildLoad(gallivm->builder, computed_ptr, "");
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: Instead of using an intrinsic to use a specific SGPR, we should be
|
||||
* using load instructions. The loads should load from the USER_SGPR address
|
||||
* space and use the sgpr index as the pointer.
|
||||
*/
|
||||
static LLVMValueRef use_sgpr(
|
||||
struct gallivm_state * gallivm,
|
||||
enum sgpr_type type,
|
||||
unsigned sgpr)
|
||||
{
|
||||
LLVMValueRef sgpr_index;
|
||||
LLVMValueRef sgpr_value;
|
||||
LLVMTypeRef ret_type;
|
||||
|
||||
sgpr_index = lp_build_const_int32(gallivm, sgpr);
|
||||
|
||||
if (type == SGPR_I32) {
|
||||
switch (type) {
|
||||
case SGPR_CONST_PTR_F32:
|
||||
ret_type = LLVMFloatTypeInContext(gallivm->context);
|
||||
ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
|
||||
return lp_build_intrinsic_unary(gallivm->builder,
|
||||
"llvm.SI.use.sgprptrcf32.",
|
||||
ret_type, sgpr_index);
|
||||
case SGPR_I32:
|
||||
ret_type = LLVMInt32TypeInContext(gallivm->context);
|
||||
return lp_build_intrinsic_unary(gallivm->builder,
|
||||
"llvm.SI.use.sgpr.i32",
|
||||
ret_type, sgpr_index);
|
||||
}
|
||||
|
||||
ret_type = LLVMInt64TypeInContext(gallivm->context);
|
||||
sgpr_value = lp_build_intrinsic_unary(gallivm->builder,
|
||||
case SGPR_I64:
|
||||
ret_type= LLVMInt64TypeInContext(gallivm->context);
|
||||
return lp_build_intrinsic_unary(gallivm->builder,
|
||||
"llvm.SI.use.sgpr.i64",
|
||||
ret_type, sgpr_index);
|
||||
|
||||
switch (type) {
|
||||
case SGPR_I64:
|
||||
return sgpr_value;
|
||||
case SGPR_PTR_V4I32:
|
||||
case SGPR_CONST_PTR_V4I32:
|
||||
ret_type = LLVMInt32TypeInContext(gallivm->context);
|
||||
ret_type = LLVMVectorType(ret_type, 4);
|
||||
ret_type = LLVMPointerType(ret_type,
|
||||
0 /*XXX: Specify address space*/);
|
||||
return LLVMBuildIntToPtr(gallivm->builder, sgpr_value,
|
||||
ret_type, "");
|
||||
case SGPR_PTR_V8I32:
|
||||
ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
|
||||
return lp_build_intrinsic_unary(gallivm->builder,
|
||||
"llvm.SI.use.sgprptrci128.",
|
||||
ret_type, sgpr_index);
|
||||
case SGPR_CONST_PTR_V8I32:
|
||||
ret_type = LLVMInt32TypeInContext(gallivm->context);
|
||||
ret_type = LLVMVectorType(ret_type, 8);
|
||||
ret_type = LLVMPointerType(ret_type,
|
||||
0 /*XXX: Specify address space*/);
|
||||
return LLVMBuildIntToPtr(gallivm->builder, sgpr_value,
|
||||
ret_type, "");
|
||||
ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
|
||||
return lp_build_intrinsic_unary(gallivm->builder,
|
||||
"llvm.SI.use.sgprptrci256.",
|
||||
ret_type, sgpr_index);
|
||||
default:
|
||||
assert(!"Unsupported SGPR type in use_sgpr()");
|
||||
return NULL;
|
||||
|
|
@ -127,9 +158,10 @@ static void declare_input_vs(
|
|||
{
|
||||
LLVMValueRef t_list_ptr;
|
||||
LLVMValueRef t_offset;
|
||||
LLVMValueRef t_list;
|
||||
LLVMValueRef attribute_offset;
|
||||
LLVMValueRef buffer_index_reg;
|
||||
LLVMValueRef args[4];
|
||||
LLVMValueRef args[3];
|
||||
LLVMTypeRef vec4_type;
|
||||
LLVMValueRef input;
|
||||
struct lp_build_context * uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
|
||||
|
|
@ -138,13 +170,17 @@ static void declare_input_vs(
|
|||
struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index];
|
||||
unsigned chan;
|
||||
|
||||
/* Load the T list */
|
||||
/* XXX: Communicate with the rest of the driver about which SGPR the T#
|
||||
* list pointer is going to be stored in. Hard code to SGPR[6:7] for
|
||||
* now */
|
||||
t_list_ptr = use_sgpr(base->gallivm, SGPR_I64, 3);
|
||||
t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, 3);
|
||||
|
||||
t_offset = lp_build_const_int32(base->gallivm,
|
||||
4 * velem->vertex_buffer_index);
|
||||
t_offset = lp_build_const_int32(base->gallivm, velem->vertex_buffer_index);
|
||||
|
||||
t_list = build_indexed_load(base->gallivm, t_list_ptr, t_offset);
|
||||
|
||||
/* Build the attribute offset */
|
||||
attribute_offset = lp_build_const_int32(base->gallivm, velem->src_offset);
|
||||
|
||||
/* Load the buffer index is always, which is always stored in VGPR0
|
||||
|
|
@ -153,12 +189,11 @@ static void declare_input_vs(
|
|||
"llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0);
|
||||
|
||||
vec4_type = LLVMVectorType(base->elem_type, 4);
|
||||
args[0] = t_list_ptr;
|
||||
args[1] = t_offset;
|
||||
args[2] = attribute_offset;
|
||||
args[3] = buffer_index_reg;
|
||||
args[0] = t_list;
|
||||
args[1] = attribute_offset;
|
||||
args[2] = buffer_index_reg;
|
||||
input = lp_build_intrinsic(base->gallivm->builder,
|
||||
"llvm.SI.vs.load.input", vec4_type, args, 4);
|
||||
"llvm.SI.vs.load.input", vec4_type, args, 3);
|
||||
|
||||
/* Break up the vec4 into individual components */
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
|
|
@ -274,7 +309,7 @@ static LLVMValueRef fetch_constant(
|
|||
|
||||
/* XXX: Assume the pointer to the constant buffer is being stored in
|
||||
* SGPR[0:1] */
|
||||
const_ptr = use_sgpr(base->gallivm, SGPR_I64, 0);
|
||||
const_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_F32, 0);
|
||||
|
||||
/* XXX: This assumes that the constant buffer is not packed, so
|
||||
* CONST[0].x will have an offset of 0 and CONST[1].x will have an
|
||||
|
|
@ -282,8 +317,7 @@ static LLVMValueRef fetch_constant(
|
|||
offset = lp_build_const_int32(base->gallivm,
|
||||
(reg->Register.Index * 4) + swizzle);
|
||||
|
||||
return lp_build_intrinsic_binary(base->gallivm->builder,
|
||||
"llvm.SI.load.const", base->elem_type, const_ptr, offset);
|
||||
return build_indexed_load(base->gallivm, const_ptr, offset);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -457,6 +491,9 @@ static void tex_fetch_args(
|
|||
struct lp_build_tgsi_context * bld_base,
|
||||
struct lp_build_emit_data * emit_data)
|
||||
{
|
||||
LLVMValueRef ptr;
|
||||
LLVMValueRef offset;
|
||||
|
||||
/* WriteMask */
|
||||
emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm,
|
||||
emit_data->inst->Dst[0].Register.WriteMask);
|
||||
|
|
@ -467,14 +504,18 @@ static void tex_fetch_args(
|
|||
0, LP_CHAN_ALL);
|
||||
|
||||
/* Resource */
|
||||
emit_data->args[2] = use_sgpr(bld_base->base.gallivm, SGPR_I64, 2);
|
||||
emit_data->args[3] = lp_build_const_int32(bld_base->base.gallivm,
|
||||
8 * emit_data->inst->Src[1].Register.Index);
|
||||
ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, 2);
|
||||
offset = lp_build_const_int32(bld_base->base.gallivm,
|
||||
8 * emit_data->inst->Src[1].Register.Index);
|
||||
emit_data->args[2] = build_indexed_load(bld_base->base.gallivm,
|
||||
ptr, offset);
|
||||
|
||||
/* Sampler */
|
||||
emit_data->args[4] = use_sgpr(bld_base->base.gallivm, SGPR_I64, 1);
|
||||
emit_data->args[5] = lp_build_const_int32(bld_base->base.gallivm,
|
||||
4 * emit_data->inst->Src[1].Register.Index);
|
||||
ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, 1);
|
||||
offset = lp_build_const_int32(bld_base->base.gallivm,
|
||||
4 * emit_data->inst->Src[1].Register.Index);
|
||||
emit_data->args[3] = build_indexed_load(bld_base->base.gallivm,
|
||||
ptr, offset);
|
||||
|
||||
/* Dimensions */
|
||||
/* XXX: We might want to pass this information to the shader at some. */
|
||||
|
|
@ -482,7 +523,7 @@ static void tex_fetch_args(
|
|||
emit_data->inst->Texture.Texture);
|
||||
*/
|
||||
|
||||
emit_data->arg_count = 6;
|
||||
emit_data->arg_count = 4;
|
||||
/* XXX: To optimize, we could use a float or v2f32, if the last bits of
|
||||
* the writemask are clear */
|
||||
emit_data->dst_type = LLVMVectorType(
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue