mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-10 03:50:13 +01:00
nvir/gv100: initial support
v2: - add TargetGV100::isBarrierRequired() for OP_BREV - use NV50_IR_SUBOP_LOP3_LUT() convenience macro where it makes sense - separated out nir_lower_idiv into its own commit - make use of the shared function to generate compiler options - disable lower_fpow, nir's lowering is broken v3: - use replaceCvt() instead of custom NEG/ABS/SAT lowering v4: - remove WAR from peephole, not needed now we're using replaceCvt() Signed-off-by: Ben Skeggs <bskeggs@redhat.com> Acked-by: Karol Herbst <kherbst@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5377>
This commit is contained in:
parent
cacf296109
commit
78103abe87
14 changed files with 3683 additions and 19 deletions
|
|
@ -77,6 +77,7 @@ struct nv50_ir_prog_symbol
|
|||
#define NVISA_GK20A_CHIPSET 0xea
|
||||
#define NVISA_GM107_CHIPSET 0x110
|
||||
#define NVISA_GM200_CHIPSET 0x120
|
||||
#define NVISA_GV100_CHIPSET 0x140
|
||||
|
||||
struct nv50_ir_prog_info
|
||||
{
|
||||
|
|
|
|||
2011
src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp
Normal file
2011
src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp
Normal file
File diff suppressed because it is too large
Load diff
403
src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h
Normal file
403
src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h
Normal file
|
|
@ -0,0 +1,403 @@
|
|||
/*
|
||||
* Copyright 2020 Red Hat Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef __NV50_IR_EMIT_GV100_H__
|
||||
#define __NV50_IR_EMIT_GV100_H__
|
||||
#include "codegen/nv50_ir_target_gv100.h"
|
||||
|
||||
namespace nv50_ir {
|
||||
|
||||
class CodeEmitterGV100 : public CodeEmitter {
|
||||
public:
|
||||
CodeEmitterGV100(TargetGV100 *target);
|
||||
|
||||
virtual bool emitInstruction(Instruction *);
|
||||
virtual uint32_t getMinEncodingSize(const Instruction *) const { return 16; }
|
||||
|
||||
private:
|
||||
const Program *prog;
|
||||
const TargetGV100 *targ;
|
||||
const Instruction *insn;
|
||||
|
||||
virtual void prepareEmission(Program *);
|
||||
virtual void prepareEmission(Function *);
|
||||
virtual void prepareEmission(BasicBlock *);
|
||||
|
||||
inline void emitInsn(uint32_t op) {
|
||||
code[0] = op;
|
||||
code[1] = 0;
|
||||
code[2] = 0;
|
||||
code[3] = 0;
|
||||
if (insn->predSrc >= 0) {
|
||||
emitField(12, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
|
||||
emitField(15, 1, insn->cc == CC_NOT_P);
|
||||
} else {
|
||||
emitField(12, 3, 7);
|
||||
}
|
||||
};
|
||||
|
||||
inline void emitField(int b, int s, uint64_t v) {
|
||||
if (b >= 0) {
|
||||
uint64_t m = ~0ULL >> (64 - s);
|
||||
uint64_t d = v & m;
|
||||
assert(!(v & ~m) || (v & ~m) == ~m);
|
||||
if (b < 64 && b + s > 64) {
|
||||
*(uint64_t *)&code[0] |= d << b;
|
||||
*(uint64_t *)&code[2] |= d >> (64 - b);
|
||||
} else {
|
||||
*(uint64_t *)&code[(b/64*2)] |= d << (b & 0x3f);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline void emitABS(int pos, int src, bool supported)
|
||||
{
|
||||
if (insn->src(src).mod.abs()) {
|
||||
assert(supported);
|
||||
emitField(pos, 1, 1);
|
||||
}
|
||||
}
|
||||
|
||||
inline void emitABS(int pos, int src)
|
||||
{
|
||||
emitABS(pos, src, true);
|
||||
}
|
||||
|
||||
inline void emitNEG(int pos, int src, bool supported) {
|
||||
if (insn->src(src).mod.neg()) {
|
||||
assert(supported);
|
||||
emitField(pos, 1, 1);
|
||||
}
|
||||
}
|
||||
|
||||
inline void emitNEG(int pos, int src) {
|
||||
emitNEG(pos, src, true);
|
||||
}
|
||||
|
||||
inline void emitNOT(int pos) {
|
||||
emitField(pos, 1, 0);
|
||||
};
|
||||
|
||||
inline void emitNOT(int pos, const ValueRef &ref) {
|
||||
emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
|
||||
}
|
||||
|
||||
inline void emitSAT(int pos) {
|
||||
emitField(pos, 1, insn->saturate);
|
||||
}
|
||||
|
||||
inline void emitRND(int rmp, RoundMode rnd, int rip) {
|
||||
int rm = 0, ri = 0;
|
||||
switch (rnd) {
|
||||
case ROUND_NI: ri = 1;
|
||||
case ROUND_N : rm = 0; break;
|
||||
case ROUND_MI: ri = 1;
|
||||
case ROUND_M : rm = 1; break;
|
||||
case ROUND_PI: ri = 1;
|
||||
case ROUND_P : rm = 2; break;
|
||||
case ROUND_ZI: ri = 1;
|
||||
case ROUND_Z : rm = 3; break;
|
||||
default:
|
||||
assert(!"invalid round mode");
|
||||
break;
|
||||
}
|
||||
emitField(rip, 1, ri);
|
||||
emitField(rmp, 2, rm);
|
||||
}
|
||||
|
||||
inline void emitRND(int pos) {
|
||||
emitRND(pos, insn->rnd, -1);
|
||||
}
|
||||
|
||||
inline void emitFMZ(int pos, int len) {
|
||||
emitField(pos, len, insn->dnz << 1 | insn->ftz);
|
||||
}
|
||||
|
||||
inline void emitPDIV(int pos) {
|
||||
emitField(pos, 3, insn->postFactor + 4);
|
||||
}
|
||||
|
||||
inline void emitO(int pos) {
|
||||
emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
|
||||
}
|
||||
|
||||
inline void emitP(int pos) {
|
||||
emitField(pos, 1, insn->perPatch);
|
||||
}
|
||||
|
||||
inline void emitCond3(int pos, CondCode code) {
|
||||
int data = 0;
|
||||
|
||||
switch (code) {
|
||||
case CC_FL : data = 0x00; break;
|
||||
case CC_LTU:
|
||||
case CC_LT : data = 0x01; break;
|
||||
case CC_EQU:
|
||||
case CC_EQ : data = 0x02; break;
|
||||
case CC_LEU:
|
||||
case CC_LE : data = 0x03; break;
|
||||
case CC_GTU:
|
||||
case CC_GT : data = 0x04; break;
|
||||
case CC_NEU:
|
||||
case CC_NE : data = 0x05; break;
|
||||
case CC_GEU:
|
||||
case CC_GE : data = 0x06; break;
|
||||
case CC_TR : data = 0x07; break;
|
||||
default:
|
||||
assert(!"invalid cond3");
|
||||
break;
|
||||
}
|
||||
|
||||
emitField(pos, 3, data);
|
||||
}
|
||||
|
||||
inline void emitCond4(int pos, CondCode code) {
|
||||
int data = 0;
|
||||
|
||||
switch (code) {
|
||||
case CC_FL: data = 0x00; break;
|
||||
case CC_LT: data = 0x01; break;
|
||||
case CC_EQ: data = 0x02; break;
|
||||
case CC_LE: data = 0x03; break;
|
||||
case CC_GT: data = 0x04; break;
|
||||
case CC_NE: data = 0x05; break;
|
||||
case CC_GE: data = 0x06; break;
|
||||
// case CC_NUM: data = 0x07; break;
|
||||
// case CC_NAN: data = 0x08; break;
|
||||
case CC_LTU: data = 0x09; break;
|
||||
case CC_EQU: data = 0x0a; break;
|
||||
case CC_LEU: data = 0x0b; break;
|
||||
case CC_GTU: data = 0x0c; break;
|
||||
case CC_NEU: data = 0x0d; break;
|
||||
case CC_GEU: data = 0x0e; break;
|
||||
case CC_TR: data = 0x0f; break;
|
||||
default:
|
||||
assert(!"invalid cond4");
|
||||
break;
|
||||
}
|
||||
|
||||
emitField(pos, 4, data);
|
||||
}
|
||||
|
||||
inline void emitSYS(int pos, const Value *val) {
|
||||
int id = val ? val->reg.data.id : -1;
|
||||
|
||||
switch (id) {
|
||||
case SV_LANEID : id = 0x00; break;
|
||||
case SV_VERTEX_COUNT : id = 0x10; break;
|
||||
case SV_INVOCATION_ID : id = 0x11; break;
|
||||
case SV_THREAD_KILL : id = 0x13; break;
|
||||
case SV_INVOCATION_INFO: id = 0x1d; break;
|
||||
case SV_COMBINED_TID : id = 0x20; break;
|
||||
case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
|
||||
case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
|
||||
case SV_LANEMASK_EQ : id = 0x38; break;
|
||||
case SV_LANEMASK_LT : id = 0x39; break;
|
||||
case SV_LANEMASK_LE : id = 0x3a; break;
|
||||
case SV_LANEMASK_GT : id = 0x3b; break;
|
||||
case SV_LANEMASK_GE : id = 0x3c; break;
|
||||
case SV_CLOCK : id = 0x50 + val->reg.data.sv.index; break;
|
||||
default:
|
||||
assert(!"invalid system value");
|
||||
id = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
emitField(pos, 8, id);
|
||||
}
|
||||
|
||||
inline void emitSYS(int pos, const ValueRef &ref) {
|
||||
emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
|
||||
}
|
||||
|
||||
inline void emitGPR(int pos, const Value *val, int off) {
|
||||
emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?
|
||||
val->reg.data.id + off: 255);
|
||||
}
|
||||
|
||||
inline void emitGPR(int pos, const Value *v) {
|
||||
emitGPR(pos, v, 0);
|
||||
}
|
||||
|
||||
inline void emitGPR(int pos) {
|
||||
emitGPR(pos, (const Value *)NULL);
|
||||
}
|
||||
|
||||
inline void emitGPR(int pos, const ValueRef &ref) {
|
||||
emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
|
||||
}
|
||||
|
||||
inline void emitGPR(int pos, const ValueRef *ref) {
|
||||
emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
|
||||
}
|
||||
|
||||
inline void emitGPR(int pos, const ValueDef &def) {
|
||||
emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
|
||||
}
|
||||
|
||||
inline void emitGPR(int pos, const ValueDef &def, int off) {
|
||||
emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL, off);
|
||||
}
|
||||
|
||||
inline void emitPRED(int pos, const Value *val) {
|
||||
emitField(pos, 3, val ? val->reg.data.id : 7);
|
||||
};
|
||||
|
||||
inline void emitPRED(int pos) {
|
||||
emitPRED(pos, (const Value *)NULL);
|
||||
}
|
||||
|
||||
inline void emitPRED(int pos, const ValueRef &ref) {
|
||||
emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
|
||||
}
|
||||
|
||||
inline void emitPRED(int pos, const ValueDef &def) {
|
||||
emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
|
||||
}
|
||||
|
||||
inline void emitCBUF(int buf, int gpr, int off, int len, int align,
|
||||
const ValueRef &ref) {
|
||||
const Value *v = ref.get();
|
||||
const Symbol *s = v->asSym();
|
||||
|
||||
assert(!(s->reg.data.offset & ((1 << align) - 1)));
|
||||
|
||||
emitField(buf, 5, v->reg.fileIndex);
|
||||
if (gpr >= 0)
|
||||
emitGPR(gpr, ref.getIndirect(0));
|
||||
emitField(off, 16, s->reg.data.offset);
|
||||
}
|
||||
|
||||
inline void emitIMMD(int pos, int len, const ValueRef &ref) {
|
||||
const ImmediateValue *imm = ref.get()->asImm();
|
||||
uint32_t val = imm->reg.data.u32;
|
||||
|
||||
if (insn->sType == TYPE_F64) {
|
||||
assert(!(imm->reg.data.u64 & 0x00000000ffffffffULL));
|
||||
val = imm->reg.data.u64 >> 32;
|
||||
}
|
||||
|
||||
emitField(pos, len, val);
|
||||
}
|
||||
|
||||
inline void emitADDR(int gpr, int off, int len, int shr,
|
||||
const ValueRef &ref) {
|
||||
const Value *v = ref.get();
|
||||
assert(!(v->reg.data.offset & ((1 << shr) - 1)));
|
||||
if (gpr >= 0)
|
||||
emitGPR(gpr, ref.getIndirect(0));
|
||||
emitField(off, len, v->reg.data.offset >> shr);
|
||||
}
|
||||
|
||||
inline void emitFormA(uint16_t op, uint8_t forms, int src0, int src1, int src2);
|
||||
inline void emitFormA_RRR(uint16_t op, int src1, int src2);
|
||||
inline void emitFormA_RRI(uint16_t op, int src1, int src2);
|
||||
inline void emitFormA_RRC(uint16_t op, int src1, int src2);
|
||||
inline void emitFormA_I32(int src);
|
||||
|
||||
void emitBRA();
|
||||
void emitEXIT();
|
||||
void emitKILL();
|
||||
void emitNOP();
|
||||
void emitWARPSYNC();
|
||||
|
||||
void emitCS2R();
|
||||
void emitF2F();
|
||||
void emitF2I();
|
||||
void emitFRND();
|
||||
void emitI2F();
|
||||
void emitMOV();
|
||||
void emitPRMT();
|
||||
void emitS2R();
|
||||
void emitSEL();
|
||||
void emitSHFL();
|
||||
|
||||
void emitFADD();
|
||||
void emitFFMA();
|
||||
void emitFMNMX();
|
||||
void emitFMUL();
|
||||
void emitFSET_BF();
|
||||
void emitFSETP();
|
||||
void emitFSWZADD();
|
||||
void emitMUFU();
|
||||
|
||||
void emitDADD();
|
||||
void emitDFMA();
|
||||
void emitDMUL();
|
||||
void emitDSETP();
|
||||
|
||||
void emitBMSK();
|
||||
void emitBREV();
|
||||
void emitFLO();
|
||||
void emitIABS();
|
||||
void emitIADD3();
|
||||
void emitIMAD();
|
||||
void emitIMAD_WIDE();
|
||||
void emitISETP();
|
||||
void emitLEA();
|
||||
void emitLOP3_LUT();
|
||||
void emitPOPC();
|
||||
void emitSGXT();
|
||||
void emitSHF();
|
||||
|
||||
void emitALD();
|
||||
void emitAST();
|
||||
void emitATOM();
|
||||
void emitATOMS();
|
||||
void emitIPA();
|
||||
void emitISBERD();
|
||||
void emitLDSTc(int);
|
||||
void emitLDSTs(int, DataType);
|
||||
void emitLD();
|
||||
void emitLDC();
|
||||
void emitLDL();
|
||||
void emitLDS();
|
||||
void emitOUT();
|
||||
void emitRED();
|
||||
void emitST();
|
||||
void emitSTL();
|
||||
void emitSTS();
|
||||
|
||||
void emitTEXs(int);
|
||||
void emitTEX();
|
||||
void emitTLD();
|
||||
void emitTLD4();
|
||||
void emitTMML();
|
||||
void emitTXD();
|
||||
void emitTXQ();
|
||||
|
||||
void emitSUHandle(const int);
|
||||
void emitSUTarget();
|
||||
void emitSUATOM();
|
||||
void emitSULD();
|
||||
void emitSUST();
|
||||
|
||||
void emitAL2P();
|
||||
void emitBAR();
|
||||
void emitCCTL();
|
||||
void emitMEMBAR();
|
||||
void emitPIXLD();
|
||||
void emitPLOP3_LUT();
|
||||
void emitVOTE();
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
||||
|
|
@ -3356,21 +3356,21 @@ static nir_shader_compiler_options
|
|||
nvir_nir_shader_compiler_options(int chipset)
|
||||
{
|
||||
return {
|
||||
.lower_fdiv = false,
|
||||
.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET),
|
||||
.lower_ffma = false,
|
||||
.fuse_ffma = false, /* nir doesn't track mad vs fma */
|
||||
.lower_flrp16 = false,
|
||||
.lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET),
|
||||
.lower_flrp32 = true,
|
||||
.lower_flrp64 = true,
|
||||
.lower_fpow = false,
|
||||
.lower_fpow = false, // TODO: nir's lowering is broken, or we could use it
|
||||
.lower_fsat = false,
|
||||
.lower_fsqrt = false, // TODO: only before gm200
|
||||
.lower_sincos = false,
|
||||
.lower_fmod = true,
|
||||
.lower_bitfield_extract = false,
|
||||
.lower_bitfield_extract_to_shifts = false,
|
||||
.lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET),
|
||||
.lower_bitfield_insert = false,
|
||||
.lower_bitfield_insert_to_shifts = false,
|
||||
.lower_bitfield_insert_to_shifts = (chipset >= NVISA_GV100_CHIPSET),
|
||||
.lower_bitfield_insert_to_bitfield_select = false,
|
||||
.lower_bitfield_reverse = false,
|
||||
.lower_bit_count = false,
|
||||
|
|
@ -3385,8 +3385,8 @@ nvir_nir_shader_compiler_options(int chipset)
|
|||
.lower_vector_cmp = false,
|
||||
.lower_idiv = true,
|
||||
.lower_bitops = false,
|
||||
.lower_isign = false, // TODO
|
||||
.lower_fsign = false,
|
||||
.lower_isign = (chipset >= NVISA_GV100_CHIPSET),
|
||||
.lower_fsign = (chipset >= NVISA_GV100_CHIPSET),
|
||||
.lower_fdph = false,
|
||||
.lower_fdot = false,
|
||||
.fdot_replicates = false, // TODO
|
||||
|
|
@ -3425,18 +3425,35 @@ nvir_nir_shader_compiler_options(int chipset)
|
|||
.unify_interfaces = false,
|
||||
.use_interpolated_input_intrinsics = true,
|
||||
.lower_mul_2x32_64 = true, // TODO
|
||||
.lower_rotate = true,
|
||||
.lower_rotate = (chipset < NVISA_GV100_CHIPSET),
|
||||
.has_imul24 = false,
|
||||
.intel_vec4 = false,
|
||||
.max_unroll_iterations = 32,
|
||||
.lower_int64_options = (nir_lower_int64_options) ( // TODO
|
||||
.lower_int64_options = (nir_lower_int64_options) (
|
||||
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul64 : 0) |
|
||||
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_isign64 : 0) |
|
||||
nir_lower_divmod64 |
|
||||
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_high64 : 0) |
|
||||
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_mov64 : 0) |
|
||||
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_icmp64 : 0) |
|
||||
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_iabs64 : 0) |
|
||||
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ineg64 : 0) |
|
||||
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) |
|
||||
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) |
|
||||
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) |
|
||||
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_2x32_64 : 0) |
|
||||
((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) |
|
||||
nir_lower_ufind_msb64
|
||||
),
|
||||
.lower_doubles_options = (nir_lower_doubles_options) ( // TODO
|
||||
nir_lower_dmod
|
||||
),
|
||||
.lower_doubles_options = (nir_lower_doubles_options) (
|
||||
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drcp : 0) |
|
||||
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsqrt : 0) |
|
||||
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drsq : 0) |
|
||||
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dfract : 0) |
|
||||
nir_lower_dmod |
|
||||
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsub : 0) |
|
||||
((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ddiv : 0)
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -3444,10 +3461,14 @@ static const nir_shader_compiler_options gf100_nir_shader_compiler_options =
|
|||
nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET);
|
||||
static const nir_shader_compiler_options gm107_nir_shader_compiler_options =
|
||||
nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET);
|
||||
static const nir_shader_compiler_options gv100_nir_shader_compiler_options =
|
||||
nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET);
|
||||
|
||||
const nir_shader_compiler_options *
|
||||
nv50_ir_nir_shader_compiler_options(int chipset)
|
||||
{
|
||||
if (chipset >= NVISA_GV100_CHIPSET)
|
||||
return &gv100_nir_shader_compiler_options;
|
||||
if (chipset >= NVISA_GM107_CHIPSET)
|
||||
return &gm107_nir_shader_compiler_options;
|
||||
return &gf100_nir_shader_compiler_options;
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ class GM107LegalizeSSA : public NVC0LegalizeSSA
|
|||
private:
|
||||
virtual bool visit(Instruction *);
|
||||
|
||||
protected:
|
||||
void handlePFETCH(Instruction *);
|
||||
void handleLOAD(Instruction *);
|
||||
};
|
||||
|
|
|
|||
477
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp
Normal file
477
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp
Normal file
|
|
@ -0,0 +1,477 @@
|
|||
/*
|
||||
* Copyright 2020 Red Hat Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#include "codegen/nv50_ir.h"
|
||||
#include "codegen/nv50_ir_build_util.h"
|
||||
|
||||
#include "codegen/nv50_ir_target_nvc0.h"
|
||||
#include "codegen/nv50_ir_lowering_gv100.h"
|
||||
|
||||
#include <limits>
|
||||
|
||||
namespace nv50_ir {
|
||||
|
||||
bool
|
||||
GV100LegalizeSSA::handleCMP(Instruction *i)
|
||||
{
|
||||
Value *pred = bld.getSSA(1, FILE_PREDICATE);
|
||||
|
||||
bld.mkCmp(OP_SET, reverseCondCode(i->asCmp()->setCond), TYPE_U8, pred,
|
||||
i->sType, bld.mkImm(0), i->getSrc(2));
|
||||
bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1), pred);
|
||||
return true;
|
||||
}
|
||||
|
||||
// NIR deals with most of these for us, but codegen generates more in pointer
|
||||
// calculations from other lowering passes.
|
||||
bool
|
||||
GV100LegalizeSSA::handleIADD64(Instruction *i)
|
||||
{
|
||||
Value *carry = bld.getSSA(1, FILE_PREDICATE);
|
||||
Value *def[2] = { bld.getSSA(), bld.getSSA() };
|
||||
Value *src[2][2];
|
||||
|
||||
for (int s = 0; s < 2; s++) {
|
||||
if (i->getSrc(s)->reg.size == 8) {
|
||||
bld.mkSplit(src[s], 4, i->getSrc(s));
|
||||
} else {
|
||||
src[s][0] = i->getSrc(s);
|
||||
src[s][1] = bld.mkImm(0);
|
||||
}
|
||||
}
|
||||
|
||||
bld.mkOp2(OP_ADD, TYPE_U32, def[0], src[0][0], src[1][0])->
|
||||
setFlagsDef(1, carry);
|
||||
bld.mkOp2(OP_ADD, TYPE_U32, def[1], src[0][1], src[1][1])->
|
||||
setFlagsSrc(2, carry);
|
||||
bld.mkOp2(OP_MERGE, i->dType, i->getDef(0), def[0], def[1]);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LegalizeSSA::handleIMAD_HIGH(Instruction *i)
|
||||
{
|
||||
Value *def = bld.getSSA(8), *defs[2];
|
||||
Value *src2;
|
||||
|
||||
if (i->srcExists(2) &&
|
||||
(!i->getSrc(2)->asImm() || i->getSrc(2)->asImm()->reg.data.u32)) {
|
||||
Value *src2s[2] = { bld.getSSA(), bld.getSSA() };
|
||||
bld.mkMov(src2s[0], bld.mkImm(0));
|
||||
bld.mkMov(src2s[1], i->getSrc(2));
|
||||
src2 = bld.mkOp2(OP_MERGE, TYPE_U64, bld.getSSA(8), src2s[0], src2s[1])->getDef(0);
|
||||
} else {
|
||||
src2 = bld.mkImm(0);
|
||||
}
|
||||
|
||||
bld.mkOp3(OP_MAD, isSignedType(i->sType) ? TYPE_S64 : TYPE_U64, def,
|
||||
i->getSrc(0), i->getSrc(1), src2);
|
||||
|
||||
bld.mkSplit(defs, 4, def);
|
||||
i->def(0).replace(defs[1], false);
|
||||
return true;
|
||||
}
|
||||
|
||||
// XXX: We should be able to do this in GV100LoweringPass, but codegen messes
|
||||
// up somehow and swaps the condcode without swapping the sources.
|
||||
// - tests/spec/glsl-1.50/execution/geometry/primitive-id-in.shader_test
|
||||
bool
|
||||
GV100LegalizeSSA::handleIMNMX(Instruction *i)
|
||||
{
|
||||
Value *pred = bld.getSSA(1, FILE_PREDICATE);
|
||||
|
||||
bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, i->dType, pred,
|
||||
i->sType, i->getSrc(0), i->getSrc(1));
|
||||
bld.mkOp3(OP_SELP, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1), pred);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LegalizeSSA::handleIMUL(Instruction *i)
|
||||
{
|
||||
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
|
||||
return handleIMAD_HIGH(i);
|
||||
|
||||
bld.mkOp3(OP_MAD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1),
|
||||
bld.mkImm(0));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LegalizeSSA::handleLOP2(Instruction *i)
|
||||
{
|
||||
uint8_t src0 = NV50_IR_SUBOP_LOP3_LUT_SRC0;
|
||||
uint8_t src1 = NV50_IR_SUBOP_LOP3_LUT_SRC1;
|
||||
uint8_t subOp;
|
||||
|
||||
if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
|
||||
src0 = ~src0;
|
||||
if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
|
||||
src1 = ~src1;
|
||||
|
||||
switch (i->op) {
|
||||
case OP_AND: subOp = src0 & src1; break;
|
||||
case OP_OR : subOp = src0 | src1; break;
|
||||
case OP_XOR: subOp = src0 ^ src1; break;
|
||||
default:
|
||||
assert(!"invalid LOP2 opcode");
|
||||
break;
|
||||
}
|
||||
|
||||
bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1),
|
||||
bld.mkImm(0))->subOp = subOp;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LegalizeSSA::handleNOT(Instruction *i)
|
||||
{
|
||||
bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), bld.mkImm(0), i->getSrc(0),
|
||||
bld.mkImm(0))->subOp = (uint8_t)~NV50_IR_SUBOP_LOP3_LUT_SRC1;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LegalizeSSA::handlePREEX2(Instruction *i)
|
||||
{
|
||||
i->def(0).replace(i->src(0), false);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LegalizeSSA::handleQUADON(Instruction *i)
|
||||
{
|
||||
handleSHFL(i); // Inserts OP_WARPSYNC
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LegalizeSSA::handleQUADPOP(Instruction *i)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LegalizeSSA::handleSET(Instruction *i)
|
||||
{
|
||||
Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL;
|
||||
Value *pred = bld.getSSA(1, FILE_PREDICATE), *met;
|
||||
Instruction *xsetp;
|
||||
|
||||
if (isFloatType(i->dType)) {
|
||||
if (i->sType == TYPE_F32)
|
||||
return false; // HW has FSET.BF
|
||||
met = bld.mkImm(0x3f800000);
|
||||
} else {
|
||||
met = bld.mkImm(0xffffffff);
|
||||
}
|
||||
|
||||
xsetp = bld.mkCmp(i->op, i->asCmp()->setCond, TYPE_U8, pred, i->sType,
|
||||
i->getSrc(0), i->getSrc(1));
|
||||
xsetp->src(0).mod = i->src(0).mod;
|
||||
xsetp->src(1).mod = i->src(1).mod;
|
||||
xsetp->setSrc(2, src2);
|
||||
|
||||
i = bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), bld.mkImm(0), met, pred);
|
||||
i->src(2).mod = Modifier(NV50_IR_MOD_NOT);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LegalizeSSA::handleSHFL(Instruction *i)
|
||||
{
|
||||
Instruction *sync = new_Instruction(func, OP_WARPSYNC, TYPE_NONE);
|
||||
sync->fixed = 1;
|
||||
sync->setSrc(0, bld.mkImm(0xffffffff));
|
||||
i->bb->insertBefore(i, sync);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LegalizeSSA::handleSHL(Instruction *i)
|
||||
{
|
||||
if (i->src(0).getFile() != FILE_GPR) {
|
||||
bld.mkOp3(OP_SHF, i->dType, i->getDef(0), bld.mkImm(0), i->getSrc(1),
|
||||
i->getSrc(0))->subOp = NV50_IR_SUBOP_SHF_L |
|
||||
NV50_IR_SUBOP_SHF_HI;
|
||||
} else {
|
||||
bld.mkOp3(OP_SHF, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1),
|
||||
bld.mkImm(0))->subOp = NV50_IR_SUBOP_SHF_L;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LegalizeSSA::handleSHR(Instruction *i)
|
||||
{
|
||||
bld.mkOp3(OP_SHF, i->dType, i->getDef(0), bld.mkImm(0), i->getSrc(1),
|
||||
i->getSrc(0))->subOp = NV50_IR_SUBOP_SHF_R | NV50_IR_SUBOP_SHF_HI;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LegalizeSSA::handleSUB(Instruction *i)
|
||||
{
|
||||
Instruction *xadd =
|
||||
bld.mkOp2(OP_ADD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1));
|
||||
xadd->src(0).mod = i->src(0).mod;
|
||||
xadd->src(1).mod = i->src(1).mod ^ Modifier(NV50_IR_MOD_NEG);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LegalizeSSA::visit(Instruction *i)
|
||||
{
|
||||
bool lowered = false;
|
||||
|
||||
bld.setPosition(i, false);
|
||||
|
||||
switch (i->op) {
|
||||
case OP_AND:
|
||||
case OP_OR:
|
||||
case OP_XOR:
|
||||
if (i->def(0).getFile() != FILE_PREDICATE)
|
||||
lowered = handleLOP2(i);
|
||||
break;
|
||||
case OP_NOT:
|
||||
lowered = handleNOT(i);
|
||||
break;
|
||||
case OP_SHL:
|
||||
lowered = handleSHL(i);
|
||||
break;
|
||||
case OP_SHR:
|
||||
lowered = handleSHR(i);
|
||||
break;
|
||||
case OP_SET:
|
||||
case OP_SET_AND:
|
||||
case OP_SET_OR:
|
||||
case OP_SET_XOR:
|
||||
if (i->def(0).getFile() != FILE_PREDICATE)
|
||||
lowered = handleSET(i);
|
||||
break;
|
||||
case OP_SLCT:
|
||||
lowered = handleCMP(i);
|
||||
break;
|
||||
case OP_PREEX2:
|
||||
lowered = handlePREEX2(i);
|
||||
break;
|
||||
case OP_MUL:
|
||||
if (!isFloatType(i->dType))
|
||||
lowered = handleIMUL(i);
|
||||
break;
|
||||
case OP_MAD:
|
||||
if (!isFloatType(i->dType) && i->subOp == NV50_IR_SUBOP_MUL_HIGH)
|
||||
lowered = handleIMAD_HIGH(i);
|
||||
break;
|
||||
case OP_SHFL:
|
||||
lowered = handleSHFL(i);
|
||||
break;
|
||||
case OP_QUADON:
|
||||
lowered = handleQUADON(i);
|
||||
break;
|
||||
case OP_QUADPOP:
|
||||
lowered = handleQUADPOP(i);
|
||||
break;
|
||||
case OP_SUB:
|
||||
lowered = handleSUB(i);
|
||||
break;
|
||||
case OP_MAX:
|
||||
case OP_MIN:
|
||||
if (!isFloatType(i->dType))
|
||||
lowered = handleIMNMX(i);
|
||||
break;
|
||||
case OP_ADD:
|
||||
if (!isFloatType(i->dType) && typeSizeof(i->dType) == 8)
|
||||
lowered = handleIADD64(i);
|
||||
break;
|
||||
case OP_PFETCH:
|
||||
handlePFETCH(i);
|
||||
break;
|
||||
case OP_LOAD:
|
||||
handleLOAD(i);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (lowered)
|
||||
delete_Instruction(prog, i);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LoweringPass::handleDMNMX(Instruction *i)
|
||||
{
|
||||
Value *pred = bld.getSSA(1, FILE_PREDICATE);
|
||||
Value *src0[2], *src1[2], *dest[2];
|
||||
|
||||
bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, TYPE_U32, pred,
|
||||
i->sType, i->getSrc(0), i->getSrc(1));
|
||||
bld.mkSplit(src0, 4, i->getSrc(0));
|
||||
bld.mkSplit(src1, 4, i->getSrc(1));
|
||||
bld.mkSplit(dest, 4, i->getDef(0));
|
||||
bld.mkOp3(OP_SELP, TYPE_U32, dest[0], src0[0], src1[0], pred);
|
||||
bld.mkOp3(OP_SELP, TYPE_U32, dest[1], src0[1], src1[1], pred);
|
||||
bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), dest[0], dest[1]);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LoweringPass::handleEXTBF(Instruction *i)
|
||||
{
|
||||
Value *bit = bld.getScratch();
|
||||
Value *cnt = bld.getScratch();
|
||||
Value *mask = bld.getScratch();
|
||||
Value *zero = bld.mkImm(0);
|
||||
|
||||
bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero);
|
||||
bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero);
|
||||
bld.mkOp2(OP_BMSK, TYPE_U32, mask, bit, cnt);
|
||||
bld.mkOp2(OP_AND, TYPE_U32, mask, i->getSrc(0), mask);
|
||||
bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), mask, bit);
|
||||
if (isSignedType(i->dType))
|
||||
bld.mkOp2(OP_SGXT, TYPE_S32, i->getDef(0), i->getDef(0), cnt);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LoweringPass::handleFLOW(Instruction *i)
|
||||
{
|
||||
i->op = OP_BRA;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LoweringPass::handleI2I(Instruction *i)
|
||||
{
|
||||
bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), i->sType, i->getSrc(0))->
|
||||
subOp = i->subOp;
|
||||
bld.mkCvt(OP_CVT, i->dType, i->getDef(0), TYPE_F32, i->getDef(0));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LoweringPass::handleINSBF(Instruction *i)
|
||||
{
|
||||
Value *bit = bld.getScratch();
|
||||
Value *cnt = bld.getScratch();
|
||||
Value *mask = bld.getScratch();
|
||||
Value *src0 = bld.getScratch();
|
||||
Value *zero = bld.mkImm(0);
|
||||
|
||||
bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero);
|
||||
bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero);
|
||||
bld.mkOp2(OP_BMSK, TYPE_U32, mask, zero, cnt);
|
||||
|
||||
bld.mkOp2(OP_AND, TYPE_U32, src0, i->getSrc(0), mask);
|
||||
bld.mkOp2(OP_SHL, TYPE_U32, src0, src0, bit);
|
||||
|
||||
bld.mkOp2(OP_SHL, TYPE_U32, mask, mask, bit);
|
||||
bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), src0, i->getSrc(2), mask)->
|
||||
subOp = NV50_IR_SUBOP_LOP3_LUT(a | (b & ~c));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LoweringPass::handlePINTERP(Instruction *i)
|
||||
{
|
||||
Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL;
|
||||
Instruction *ipa, *mul;
|
||||
|
||||
ipa = bld.mkOp2(OP_LINTERP, TYPE_F32, i->getDef(0), i->getSrc(0), src2);
|
||||
ipa->ipa = i->ipa;
|
||||
mul = bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), i->getSrc(1));
|
||||
|
||||
if (i->getInterpMode() == NV50_IR_INTERP_SC) {
|
||||
ipa->setDef(1, bld.getSSA(1, FILE_PREDICATE));
|
||||
mul->setPredicate(CC_NOT_P, ipa->getDef(1));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LoweringPass::handlePREFLOW(Instruction *i)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LoweringPass::handlePRESIN(Instruction *i)
|
||||
{
|
||||
const float f = 1.0 / (2.0 * 3.14159265);
|
||||
bld.mkOp2(OP_MUL, i->dType, i->getDef(0), i->getSrc(0), bld.mkImm(f));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GV100LoweringPass::visit(Instruction *i)
|
||||
{
|
||||
bool lowered = false;
|
||||
|
||||
bld.setPosition(i, false);
|
||||
|
||||
switch (i->op) {
|
||||
case OP_BREAK:
|
||||
case OP_CONT:
|
||||
lowered = handleFLOW(i);
|
||||
break;
|
||||
case OP_PREBREAK:
|
||||
case OP_PRECONT:
|
||||
lowered = handlePREFLOW(i);
|
||||
break;
|
||||
case OP_CVT:
|
||||
if (i->src(0).getFile() != FILE_PREDICATE &&
|
||||
i->def(0).getFile() != FILE_PREDICATE &&
|
||||
!isFloatType(i->dType) && !isFloatType(i->sType))
|
||||
lowered = handleI2I(i);
|
||||
break;
|
||||
case OP_EXTBF:
|
||||
lowered = handleEXTBF(i);
|
||||
break;
|
||||
case OP_INSBF:
|
||||
lowered = handleINSBF(i);
|
||||
break;
|
||||
case OP_MAX:
|
||||
case OP_MIN:
|
||||
if (i->dType == TYPE_F64)
|
||||
lowered = handleDMNMX(i);
|
||||
break;
|
||||
case OP_PINTERP:
|
||||
lowered = handlePINTERP(i);
|
||||
break;
|
||||
case OP_PRESIN:
|
||||
lowered = handlePRESIN(i);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (lowered)
|
||||
delete_Instruction(prog, i);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace nv50_ir
|
||||
79
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h
Normal file
79
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
* Copyright 2020 Red Hat Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef __NV50_IR_LOWERING_GV100_H__
|
||||
#define __NV50_IR_LOWERING_GV100_H__
|
||||
#include "codegen/nv50_ir_lowering_gm107.h"
|
||||
|
||||
namespace nv50_ir {
|
||||
|
||||
class GV100LoweringPass : public Pass
|
||||
{
|
||||
public:
|
||||
GV100LoweringPass(Program *p) {
|
||||
bld.setProgram(p);
|
||||
}
|
||||
|
||||
private:
|
||||
BuildUtil bld;
|
||||
|
||||
virtual bool visit(Instruction *);
|
||||
|
||||
bool handleDMNMX(Instruction *);
|
||||
bool handleEXTBF(Instruction *);
|
||||
bool handleFLOW(Instruction *);
|
||||
bool handleI2I(Instruction *);
|
||||
bool handleINSBF(Instruction *);
|
||||
bool handlePINTERP(Instruction *);
|
||||
bool handlePREFLOW(Instruction *);
|
||||
bool handlePRESIN(Instruction *);
|
||||
};
|
||||
|
||||
class GV100LegalizeSSA : public GM107LegalizeSSA
|
||||
{
|
||||
public:
|
||||
GV100LegalizeSSA(Program *p) {
|
||||
bld.setProgram(p);
|
||||
}
|
||||
|
||||
private:
|
||||
virtual bool visit(Function *) { return true; }
|
||||
virtual bool visit(BasicBlock *) { return true; }
|
||||
virtual bool visit(Instruction *);
|
||||
|
||||
bool handleCMP(Instruction *);
|
||||
bool handleIADD64(Instruction *);
|
||||
bool handleIMAD_HIGH(Instruction *);
|
||||
bool handleIMNMX(Instruction *);
|
||||
bool handleIMUL(Instruction *);
|
||||
bool handleLOP2(Instruction *);
|
||||
bool handleNOT(Instruction *);
|
||||
bool handlePREEX2(Instruction *);
|
||||
bool handleQUADON(Instruction *);
|
||||
bool handleQUADPOP(Instruction *);
|
||||
bool handleSET(Instruction *);
|
||||
bool handleSHFL(Instruction *);
|
||||
bool handleSHL(Instruction *);
|
||||
bool handleSHR(Instruction *);
|
||||
bool handleSUB(Instruction *);
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
|
@ -898,6 +898,8 @@ NVC0LoweringPass::visit(Function *fn)
|
|||
gpEmitAddress = bld.loadImm(NULL, 0)->asLValue();
|
||||
if (fn->cfgExit) {
|
||||
bld.setPosition(BasicBlock::get(fn->cfgExit)->getExit(), false);
|
||||
if (prog->getTarget()->getChipset() >= NVISA_GV100_CHIPSET)
|
||||
bld.mkOp1(OP_FINAL, TYPE_NONE, NULL, gpEmitAddress)->fixed = 1;
|
||||
bld.mkMovToReg(0, gpEmitAddress);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -988,6 +988,7 @@ GCRA::coalesce(ArrayList& insns)
|
|||
case 0x110:
|
||||
case 0x120:
|
||||
case 0x130:
|
||||
case 0x140:
|
||||
ret = doCoalesce(insns, JOIN_MASK_UNION);
|
||||
break;
|
||||
default:
|
||||
|
|
@ -2297,13 +2298,25 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex)
|
|||
if (isTextureOp(tex->op))
|
||||
textureMask(tex);
|
||||
|
||||
if (isScalarTexGM107(tex)) {
|
||||
handleScalarTexGM107(tex);
|
||||
return;
|
||||
}
|
||||
if (targ->getChipset() < NVISA_GV100_CHIPSET) {
|
||||
if (isScalarTexGM107(tex)) {
|
||||
handleScalarTexGM107(tex);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(!tex->tex.scalar);
|
||||
condenseDefs(tex);
|
||||
assert(!tex->tex.scalar);
|
||||
condenseDefs(tex);
|
||||
} else {
|
||||
if (isTextureOp(tex->op)) {
|
||||
int defCount = tex->defCount(0xff);
|
||||
if (defCount > 3)
|
||||
condenseDefs(tex, 2, 3);
|
||||
if (defCount > 1)
|
||||
condenseDefs(tex, 0, 1);
|
||||
} else {
|
||||
condenseDefs(tex);
|
||||
}
|
||||
}
|
||||
|
||||
if (isSurfaceOp(tex->op)) {
|
||||
int s = tex->tex.target.getDim() +
|
||||
|
|
@ -2485,6 +2498,7 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
|
|||
case 0x110:
|
||||
case 0x120:
|
||||
case 0x130:
|
||||
case 0x140:
|
||||
texConstraintGM107(tex);
|
||||
break;
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -144,6 +144,7 @@ const OpClass Target::operationClass[] =
|
|||
};
|
||||
|
||||
|
||||
extern Target *getTargetGV100(unsigned int chipset);
|
||||
extern Target *getTargetGM107(unsigned int chipset);
|
||||
extern Target *getTargetNVC0(unsigned int chipset);
|
||||
extern Target *getTargetNV50(unsigned int chipset);
|
||||
|
|
@ -153,6 +154,8 @@ Target *Target::create(unsigned int chipset)
|
|||
STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1);
|
||||
STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1);
|
||||
switch (chipset & ~0xf) {
|
||||
case 0x140:
|
||||
return getTargetGV100(chipset);
|
||||
case 0x110:
|
||||
case 0x120:
|
||||
case 0x130:
|
||||
|
|
|
|||
594
src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp
Normal file
594
src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp
Normal file
|
|
@ -0,0 +1,594 @@
|
|||
/*
|
||||
* Copyright 2020 Red Hat Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#include "codegen/nv50_ir_target_gv100.h"
|
||||
#include "codegen/nv50_ir_lowering_gv100.h"
|
||||
#include "codegen/nv50_ir_emit_gv100.h"
|
||||
|
||||
namespace nv50_ir {
|
||||
|
||||
void
|
||||
TargetGV100::initOpInfo()
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
static const operation commutative[] =
|
||||
{
|
||||
OP_ADD, OP_MUL, OP_MAD, OP_FMA, OP_MAX, OP_MIN,
|
||||
OP_SET_AND, OP_SET_OR, OP_SET_XOR, OP_SET, OP_SELP, OP_SLCT
|
||||
};
|
||||
|
||||
static const operation noDest[] =
|
||||
{
|
||||
OP_EXIT
|
||||
};
|
||||
|
||||
static const operation noPred[] =
|
||||
{
|
||||
};
|
||||
|
||||
for (i = 0; i < DATA_FILE_COUNT; ++i)
|
||||
nativeFileMap[i] = (DataFile)i;
|
||||
nativeFileMap[FILE_ADDRESS] = FILE_GPR;
|
||||
nativeFileMap[FILE_FLAGS] = FILE_PREDICATE;
|
||||
|
||||
for (i = 0; i < OP_LAST; ++i) {
|
||||
opInfo[i].variants = NULL;
|
||||
opInfo[i].op = (operation)i;
|
||||
opInfo[i].srcTypes = 1 << (int)TYPE_F32;
|
||||
opInfo[i].dstTypes = 1 << (int)TYPE_F32;
|
||||
opInfo[i].immdBits = 0;
|
||||
opInfo[i].srcNr = operationSrcNr[i];
|
||||
|
||||
for (j = 0; j < opInfo[i].srcNr; ++j) {
|
||||
opInfo[i].srcMods[j] = 0;
|
||||
opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR;
|
||||
}
|
||||
opInfo[i].dstMods = 0;
|
||||
opInfo[i].dstFiles = 1 << (int)FILE_GPR;
|
||||
|
||||
opInfo[i].hasDest = 1;
|
||||
opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA);
|
||||
opInfo[i].commutative = false; /* set below */
|
||||
opInfo[i].pseudo = (i < OP_MOV);
|
||||
opInfo[i].predicate = !opInfo[i].pseudo;
|
||||
opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN);
|
||||
opInfo[i].minEncSize = 16;
|
||||
}
|
||||
for (i = 0; i < ARRAY_SIZE(commutative); ++i)
|
||||
opInfo[commutative[i]].commutative = true;
|
||||
for (i = 0; i < ARRAY_SIZE(noDest); ++i)
|
||||
opInfo[noDest[i]].hasDest = 0;
|
||||
for (i = 0; i < ARRAY_SIZE(noPred); ++i)
|
||||
opInfo[noPred[i]].predicate = 0;
|
||||
}
|
||||
|
||||
struct opInfo {
|
||||
struct {
|
||||
uint8_t files;
|
||||
uint8_t mods;
|
||||
} src[3];
|
||||
};
|
||||
|
||||
#define SRC_NONE 0
|
||||
#define SRC_R (1 << FILE_GPR)
|
||||
#define SRC_I (1 << FILE_MEMORY_CONST)
|
||||
#define SRC_C (1 << FILE_IMMEDIATE)
|
||||
#define SRC_RC (SRC_R | SRC_C)
|
||||
#define SRC_RI (SRC_R | SRC_I )
|
||||
#define SRC_RIC (SRC_R | SRC_I | SRC_C)
|
||||
|
||||
#define MOD_NONE 0
|
||||
#define MOD_NEG NV50_IR_MOD_NEG
|
||||
#define MOD_ABS NV50_IR_MOD_ABS
|
||||
#define MOD_NOT NV50_IR_MOD_NOT
|
||||
#define MOD_NA (MOD_NEG | MOD_ABS)
|
||||
|
||||
#define OPINFO(O,SA,MA,SB,MB,SC,MC) \
|
||||
static struct opInfo \
|
||||
opInfo_##O = { \
|
||||
.src = { { SRC_##SA, MOD_##MA }, \
|
||||
{ SRC_##SB, MOD_##MB }, \
|
||||
{ SRC_##SC, MOD_##MC }}, \
|
||||
};
|
||||
|
||||
|
||||
/* Handled by GV100LegalizeSSA. */
|
||||
OPINFO(FABS , RIC , NA , NONE, NONE, NONE, NONE);
|
||||
OPINFO(FCMP , R , NONE, RIC , NONE, RIC , NONE); //XXX: use FSEL for mods
|
||||
OPINFO(FNEG , RIC , NA , NONE, NONE, NONE, NONE);
|
||||
OPINFO(FSET , R , NA , RIC , NA , NONE, NONE);
|
||||
OPINFO(ICMP , R , NONE, RIC , NONE, RIC , NONE);
|
||||
OPINFO(IMUL , R , NONE, RIC , NONE, NONE, NONE);
|
||||
OPINFO(INEG , RIC , NEG , NONE, NONE, NONE, NONE);
|
||||
OPINFO(ISET , R , NONE, RIC , NONE, NONE, NONE);
|
||||
OPINFO(LOP2 , R , NOT , RIC , NOT , NONE, NONE);
|
||||
OPINFO(NOT , RIC , NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(SAT , RIC , NA , NONE, NONE, NONE, NONE);
|
||||
OPINFO(SHL , RIC , NONE, RIC , NONE, NONE, NONE);
|
||||
OPINFO(SHR , RIC , NONE, RIC , NONE, NONE, NONE);
|
||||
OPINFO(SUB , R , NONE, RIC , NEG , NONE, NONE);
|
||||
OPINFO(IMNMX , R , NONE, RIC , NONE, NONE, NONE);
|
||||
|
||||
/* Handled by CodeEmitterGV100. */
|
||||
OPINFO(AL2P , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(ALD , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(AST , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(ATOM , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(ATOMS , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(BAR , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(BRA , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(BMSK , R , NONE, RIC , NONE, NONE, NONE);
|
||||
OPINFO(BREV , RIC , NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(CCTL , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
//OPINFO(CS2R , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(DADD , R , NA , RIC , NA , NONE, NONE);
|
||||
OPINFO(DFMA , R , NA , RIC , NA , RIC , NA );
|
||||
OPINFO(DMUL , R , NA , RIC , NA , NONE, NONE);
|
||||
OPINFO(DSETP , R , NA , RIC , NA , NONE, NONE);
|
||||
OPINFO(EXIT , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(F2F , RIC , NA , NONE, NONE, NONE, NONE);
|
||||
OPINFO(F2I , RIC , NA , NONE, NONE, NONE, NONE);
|
||||
OPINFO(FADD , R , NA , RIC , NA , NONE, NONE);
|
||||
OPINFO(FFMA , R , NA , RIC , NA , RIC , NA );
|
||||
OPINFO(FLO , RIC , NOT , NONE, NONE, NONE, NONE);
|
||||
OPINFO(FMNMX , R , NA , RIC , NA , NONE, NONE);
|
||||
OPINFO(FMUL , R , NA , RIC , NA , NONE, NONE);
|
||||
OPINFO(FRND , RIC , NA , NONE, NONE, NONE, NONE);
|
||||
OPINFO(FSET_BF , R , NA , RIC , NA , NONE, NONE);
|
||||
OPINFO(FSETP , R , NA , RIC , NA , NONE, NONE);
|
||||
OPINFO(FSWZADD , R , NONE, R , NONE, NONE, NONE);
|
||||
OPINFO(I2F , RIC , NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(IABS , RIC , NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(IADD3 , R , NEG , RIC , NEG , R , NEG );
|
||||
OPINFO(IMAD , R , NONE, RIC , NONE, RIC , NEG );
|
||||
OPINFO(IMAD_WIDE, R , NONE, RIC , NONE, RC , NEG );
|
||||
OPINFO(IPA , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(ISBERD , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(ISETP , R , NONE, RIC , NONE, NONE, NONE);
|
||||
OPINFO(KILL , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(LD , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(LDC , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(LDL , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(LDS , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(LEA , R , NEG , I , NONE, RIC , NEG );
|
||||
OPINFO(LOP3_LUT , R , NONE, RIC , NONE, R , NONE);
|
||||
OPINFO(MEMBAR , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(MOV , RIC , NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(MUFU , RIC , NA , NONE, NONE, NONE, NONE);
|
||||
OPINFO(NOP , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(OUT , R , NONE, RI , NONE, NONE, NONE);
|
||||
OPINFO(PIXLD , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(PLOP3_LUT, NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(POPC , RIC , NOT , NONE, NONE, NONE, NONE);
|
||||
OPINFO(PRMT , R , NONE, RIC , NONE, RIC , NONE);
|
||||
OPINFO(RED , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(SGXT , R , NONE, RIC , NONE, NONE, NONE);
|
||||
OPINFO(S2R , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(SEL , R , NONE, RIC , NONE, NONE, NONE);
|
||||
OPINFO(SHF , R , NONE, RIC , NONE, RIC , NONE);
|
||||
OPINFO(SHFL , R , NONE, R , NONE, R , NONE);
|
||||
OPINFO(ST , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(STL , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(STS , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(SUATOM , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(SULD , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(SUST , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(TEX , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(TLD , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(TLD4 , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(TMML , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(TXD , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(TXQ , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(VOTE , NONE, NONE, NONE, NONE, NONE, NONE);
|
||||
OPINFO(WARPSYNC , R , NONE, NONE, NONE, NONE, NONE);
|
||||
|
||||
static const struct opInfo *
|
||||
getOpInfo(const Instruction *i)
|
||||
{
|
||||
switch (i->op) {
|
||||
case OP_ABS:
|
||||
if (isFloatType(i->dType))
|
||||
return &opInfo_FABS;
|
||||
return &opInfo_IABS;
|
||||
case OP_ADD:
|
||||
if (isFloatType(i->dType)) {
|
||||
if (i->dType == TYPE_F32)
|
||||
return &opInfo_FADD;
|
||||
else
|
||||
return &opInfo_DADD;
|
||||
} else {
|
||||
return &opInfo_IADD3;
|
||||
}
|
||||
break;
|
||||
case OP_AFETCH: return &opInfo_AL2P;
|
||||
case OP_AND:
|
||||
case OP_OR:
|
||||
case OP_XOR:
|
||||
if (i->def(0).getFile() == FILE_PREDICATE)
|
||||
return &opInfo_PLOP3_LUT;
|
||||
return &opInfo_LOP2;
|
||||
case OP_ATOM:
|
||||
if (i->src(0).getFile() == FILE_MEMORY_SHARED)
|
||||
return &opInfo_ATOMS;
|
||||
else
|
||||
if (!i->defExists(0) && i->subOp < NV50_IR_SUBOP_ATOM_CAS)
|
||||
return &opInfo_RED;
|
||||
else
|
||||
return &opInfo_ATOM;
|
||||
break;
|
||||
case OP_BAR: return &opInfo_BAR;
|
||||
case OP_BFIND: return &opInfo_FLO;
|
||||
case OP_BMSK: return &opInfo_BMSK;
|
||||
case OP_BREV: return &opInfo_BREV;
|
||||
case OP_BRA:
|
||||
case OP_JOIN: return &opInfo_BRA; //XXX
|
||||
case OP_CCTL: return &opInfo_CCTL;
|
||||
case OP_CEIL:
|
||||
case OP_CVT:
|
||||
case OP_FLOOR:
|
||||
case OP_TRUNC:
|
||||
if (i->op == OP_CVT && (i->def(0).getFile() == FILE_PREDICATE ||
|
||||
i->src(0).getFile() == FILE_PREDICATE)) {
|
||||
return &opInfo_MOV;
|
||||
} else if (isFloatType(i->dType)) {
|
||||
if (isFloatType(i->sType)) {
|
||||
if (i->sType == i->dType)
|
||||
return &opInfo_FRND;
|
||||
else
|
||||
return &opInfo_F2F;
|
||||
} else {
|
||||
return &opInfo_I2F;
|
||||
}
|
||||
} else {
|
||||
if (isFloatType(i->sType))
|
||||
return &opInfo_F2I;
|
||||
}
|
||||
break;
|
||||
case OP_COS:
|
||||
case OP_EX2:
|
||||
case OP_LG2:
|
||||
case OP_RCP:
|
||||
case OP_RSQ:
|
||||
case OP_SIN:
|
||||
case OP_SQRT: return &opInfo_MUFU;
|
||||
case OP_DISCARD: return &opInfo_KILL;
|
||||
case OP_EMIT:
|
||||
case OP_FINAL:
|
||||
case OP_RESTART: return &opInfo_OUT;
|
||||
case OP_EXIT: return &opInfo_EXIT;
|
||||
case OP_EXPORT: return &opInfo_AST;
|
||||
case OP_FMA:
|
||||
case OP_MAD:
|
||||
if (isFloatType(i->dType)) {
|
||||
if (i->dType == TYPE_F32)
|
||||
return &opInfo_FFMA;
|
||||
else
|
||||
return &opInfo_DFMA;
|
||||
} else {
|
||||
if (typeSizeof(i->dType) != 8)
|
||||
return &opInfo_IMAD;
|
||||
else
|
||||
return &opInfo_IMAD_WIDE;
|
||||
}
|
||||
break;
|
||||
case OP_JOINAT: return &opInfo_NOP; //XXX
|
||||
case OP_LINTERP: return &opInfo_IPA;
|
||||
case OP_LOAD:
|
||||
switch (i->src(0).getFile()) {
|
||||
case FILE_MEMORY_CONST : return &opInfo_LDC;
|
||||
case FILE_MEMORY_LOCAL : return &opInfo_LDL;
|
||||
case FILE_MEMORY_SHARED: return &opInfo_LDS;
|
||||
case FILE_MEMORY_GLOBAL: return &opInfo_LD;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case OP_LOP3_LUT: return &opInfo_LOP3_LUT;
|
||||
case OP_MAX:
|
||||
case OP_MIN:
|
||||
if (isFloatType(i->dType)) {
|
||||
if (i->dType == TYPE_F32)
|
||||
return &opInfo_FMNMX;
|
||||
} else {
|
||||
return &opInfo_IMNMX;
|
||||
}
|
||||
break;
|
||||
case OP_MEMBAR: return &opInfo_MEMBAR;
|
||||
case OP_MOV: return &opInfo_MOV;
|
||||
case OP_MUL:
|
||||
if (isFloatType(i->dType)) {
|
||||
if (i->dType == TYPE_F32)
|
||||
return &opInfo_FMUL;
|
||||
else
|
||||
return &opInfo_DMUL;
|
||||
}
|
||||
return &opInfo_IMUL;
|
||||
case OP_NEG:
|
||||
if (isFloatType(i->dType))
|
||||
return &opInfo_FNEG;
|
||||
return &opInfo_INEG;
|
||||
case OP_NOT: return &opInfo_NOT;
|
||||
case OP_PERMT: return &opInfo_PRMT;
|
||||
case OP_PFETCH: return &opInfo_ISBERD;
|
||||
case OP_PIXLD: return &opInfo_PIXLD;
|
||||
case OP_POPCNT: return &opInfo_POPC;
|
||||
case OP_QUADOP: return &opInfo_FSWZADD;
|
||||
case OP_RDSV:
|
||||
#if 0
|
||||
if (targ->isCS2RSV(i->getSrc(0)->reg.data.sv.sv))
|
||||
return &opInfo_CS2R;
|
||||
#endif
|
||||
return &opInfo_S2R;
|
||||
case OP_SAT: return &opInfo_SAT;
|
||||
case OP_SELP: return &opInfo_SEL;
|
||||
case OP_SET:
|
||||
case OP_SET_AND:
|
||||
case OP_SET_OR:
|
||||
case OP_SET_XOR:
|
||||
if (i->def(0).getFile() != FILE_PREDICATE) {
|
||||
if (isFloatType(i->dType)) {
|
||||
if (i->dType == TYPE_F32)
|
||||
return &opInfo_FSET_BF;
|
||||
} else {
|
||||
if (isFloatType(i->sType))
|
||||
return &opInfo_FSET;
|
||||
return &opInfo_ISET;
|
||||
}
|
||||
} else {
|
||||
if (isFloatType(i->sType))
|
||||
if (i->sType == TYPE_F64)
|
||||
return &opInfo_DSETP;
|
||||
else
|
||||
return &opInfo_FSETP;
|
||||
else
|
||||
return &opInfo_ISETP;
|
||||
}
|
||||
break;
|
||||
case OP_SGXT: return &opInfo_SGXT;
|
||||
case OP_SHF: return &opInfo_SHF;
|
||||
case OP_SHFL: return &opInfo_SHFL;
|
||||
case OP_SHL: return &opInfo_SHL;
|
||||
case OP_SHLADD: return &opInfo_LEA;
|
||||
case OP_SHR: return &opInfo_SHR;
|
||||
case OP_SLCT:
|
||||
if (isFloatType(i->sType))
|
||||
return &opInfo_FCMP;
|
||||
return &opInfo_ICMP;
|
||||
case OP_STORE:
|
||||
switch (i->src(0).getFile()) {
|
||||
case FILE_MEMORY_LOCAL : return &opInfo_STL;
|
||||
case FILE_MEMORY_SHARED: return &opInfo_STS;
|
||||
case FILE_MEMORY_GLOBAL: return &opInfo_ST;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case OP_SUB: return &opInfo_SUB;
|
||||
case OP_SULDB:
|
||||
case OP_SULDP: return &opInfo_SULD;
|
||||
case OP_SUREDB:
|
||||
case OP_SUREDP: return &opInfo_SUATOM;
|
||||
case OP_SUSTB:
|
||||
case OP_SUSTP: return &opInfo_SUST;
|
||||
case OP_TEX:
|
||||
case OP_TXB:
|
||||
case OP_TXL: return &opInfo_TEX;
|
||||
case OP_TXD: return &opInfo_TXD;
|
||||
case OP_TXF: return &opInfo_TLD;
|
||||
case OP_TXG: return &opInfo_TLD4;
|
||||
case OP_TXLQ: return &opInfo_TMML;
|
||||
case OP_TXQ: return &opInfo_TXQ;
|
||||
case OP_VFETCH: return &opInfo_ALD;
|
||||
case OP_VOTE: return &opInfo_VOTE;
|
||||
case OP_WARPSYNC: return &opInfo_WARPSYNC;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool
|
||||
TargetGV100::isSatSupported(const Instruction *i) const
|
||||
{
|
||||
switch (i->dType) {
|
||||
case TYPE_F32:
|
||||
switch (i->op) {
|
||||
case OP_ADD:
|
||||
case OP_FMA:
|
||||
case OP_MAD:
|
||||
case OP_MUL: return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
TargetGV100::isModSupported(const Instruction *i, int s, Modifier mod) const
|
||||
{
|
||||
const struct opInfo *info = nv50_ir::getOpInfo(i);
|
||||
uint8_t mods = 0;
|
||||
if (info && s < (int)ARRAY_SIZE(info->src))
|
||||
mods = info->src[s].mods;
|
||||
return (mod & Modifier(mods)) == mod;
|
||||
}
|
||||
|
||||
bool
|
||||
TargetGV100::isOpSupported(operation op, DataType ty) const
|
||||
{
|
||||
if (op == OP_MAD || op == OP_FMA)
|
||||
return true;
|
||||
if (ty == TYPE_F32) {
|
||||
if (op == OP_MAX)
|
||||
return true;
|
||||
}
|
||||
if (op == OP_RSQ)
|
||||
return true;
|
||||
if (op == OP_SET ||
|
||||
op == OP_SET_AND ||
|
||||
op == OP_SET_OR ||
|
||||
op == OP_SET_XOR)
|
||||
return true;
|
||||
if (op == OP_SHLADD)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
TargetGV100::isBarrierRequired(const Instruction *i) const
|
||||
{
|
||||
switch (i->op) {
|
||||
case OP_BREV:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return TargetGM107::isBarrierRequired(i);
|
||||
}
|
||||
|
||||
bool
|
||||
TargetGV100::insnCanLoad(const Instruction *i, int s,
|
||||
const Instruction *ld) const
|
||||
{
|
||||
const struct opInfo *info = nv50_ir::getOpInfo(i);
|
||||
uint16_t files = 0;
|
||||
|
||||
if (ld->src(0).getFile() == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0)
|
||||
return (!i->isPseudo() &&
|
||||
!i->asTex() &&
|
||||
i->op != OP_EXPORT && i->op != OP_STORE);
|
||||
|
||||
if (ld->src(0).isIndirect(0))
|
||||
return false;
|
||||
|
||||
if (info && s < (int)ARRAY_SIZE(info->src)) {
|
||||
files = info->src[s].files;
|
||||
if ((s == 1 && i->srcExists(2) && i->src(2).getFile() != FILE_GPR) ||
|
||||
(s == 2 && i->srcExists(1) && i->src(1).getFile() != FILE_GPR)) {
|
||||
files &= ~(1 << FILE_MEMORY_CONST);
|
||||
files &= ~(1 << FILE_IMMEDIATE);
|
||||
} else
|
||||
if ((i->op == OP_SHL || i->op == OP_SHR) &&
|
||||
((s == 0 && i->srcExists(1) && i->src(1).getFile() != FILE_GPR) ||
|
||||
(s == 1 && i->srcExists(0) && i->src(0).getFile() != FILE_GPR))) {
|
||||
files &= ~(1 << FILE_MEMORY_CONST);
|
||||
files &= ~(1 << FILE_IMMEDIATE);
|
||||
}
|
||||
}
|
||||
|
||||
if (ld->src(0).getFile() == FILE_IMMEDIATE) {
|
||||
if (i->sType == TYPE_F64) {
|
||||
if (ld->getSrc(0)->asImm()->reg.data.u64 & 0x00000000ffffffff)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return (files & (1 << ld->src(0).getFile()));
|
||||
}
|
||||
|
||||
void
|
||||
TargetGV100::getBuiltinCode(const uint32_t **code, uint32_t *size) const
|
||||
{
|
||||
//XXX: find out why gv100 (tu1xx is fine) hangs without this
|
||||
static uint32_t builtin[] = {
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
|
||||
};
|
||||
*code = builtin;
|
||||
*size = sizeof(builtin);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
TargetGV100::getBuiltinOffset(int builtin) const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool
|
||||
TargetGV100::runLegalizePass(Program *prog, CGStage stage) const
|
||||
{
|
||||
if (stage == CG_STAGE_PRE_SSA) {
|
||||
GM107LoweringPass pass1(prog);
|
||||
GV100LoweringPass pass2(prog);
|
||||
pass1.run(prog, false, true);
|
||||
pass2.run(prog, false, true);
|
||||
return true;
|
||||
} else
|
||||
if (stage == CG_STAGE_SSA) {
|
||||
GV100LegalizeSSA pass(prog);
|
||||
return pass.run(prog, false, true);
|
||||
} else
|
||||
if (stage == CG_STAGE_POST_RA) {
|
||||
NVC0LegalizePostRA pass(prog);
|
||||
return pass.run(prog, false, true);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
CodeEmitter *
|
||||
TargetGV100::getCodeEmitter(Program::Type type)
|
||||
{
|
||||
return new CodeEmitterGV100(this);
|
||||
}
|
||||
|
||||
TargetGV100::TargetGV100(unsigned int chipset)
|
||||
: TargetGM107(chipset)
|
||||
{
|
||||
initOpInfo();
|
||||
};
|
||||
|
||||
Target *getTargetGV100(unsigned int chipset)
|
||||
{
|
||||
return new TargetGV100(chipset);
|
||||
}
|
||||
|
||||
};
|
||||
52
src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h
Normal file
52
src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Copyright 2020 Red Hat Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef __NV50_IR_TARGET_GV100_H__
|
||||
#define __NV50_IR_TARGET_GV100_H__
|
||||
#include "codegen/nv50_ir_target_gm107.h"
|
||||
|
||||
namespace nv50_ir {
|
||||
|
||||
class TargetGV100 : public TargetGM107 {
|
||||
public:
|
||||
TargetGV100(unsigned int chipset);
|
||||
|
||||
virtual CodeEmitter *getCodeEmitter(Program::Type);
|
||||
|
||||
virtual bool runLegalizePass(Program *, CGStage stage) const;
|
||||
|
||||
virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const;
|
||||
virtual uint32_t getBuiltinOffset(int builtin) const;
|
||||
|
||||
virtual bool insnCanLoad(const Instruction *, int, const Instruction *) const;
|
||||
virtual bool isOpSupported(operation, DataType) const;
|
||||
virtual bool isModSupported(const Instruction *, int s, Modifier) const;
|
||||
virtual bool isSatSupported(const Instruction *) const;
|
||||
|
||||
virtual bool isBarrierRequired(const Instruction *) const;
|
||||
|
||||
private:
|
||||
void initOpInfo();
|
||||
void initProps(const struct opProperties *, int);
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
||||
|
|
@ -30,7 +30,7 @@ Target *getTargetNVC0(unsigned int chipset)
|
|||
}
|
||||
|
||||
TargetNVC0::TargetNVC0(unsigned int card) :
|
||||
Target(card < 0x110, false, card >= 0xe4)
|
||||
Target(card < 0x110, false, card >= 0xe4 && card < 0x140)
|
||||
{
|
||||
chipset = card;
|
||||
initOpInfo();
|
||||
|
|
|
|||
|
|
@ -150,13 +150,19 @@ files_libnouveau = files(
|
|||
'codegen/nv50_ir_util.cpp',
|
||||
'codegen/nv50_ir_util.h',
|
||||
'codegen/unordered_set.h',
|
||||
'codegen/nv50_ir_emit_gv100.cpp',
|
||||
'codegen/nv50_ir_emit_gv100.h',
|
||||
'codegen/nv50_ir_emit_gk110.cpp',
|
||||
'codegen/nv50_ir_emit_gm107.cpp',
|
||||
'codegen/nv50_ir_emit_nvc0.cpp',
|
||||
'codegen/nv50_ir_lowering_gv100.cpp',
|
||||
'codegen/nv50_ir_lowering_gv100.h',
|
||||
'codegen/nv50_ir_lowering_gm107.cpp',
|
||||
'codegen/nv50_ir_lowering_gm107.h',
|
||||
'codegen/nv50_ir_lowering_nvc0.cpp',
|
||||
'codegen/nv50_ir_lowering_nvc0.h',
|
||||
'codegen/nv50_ir_target_gv100.cpp',
|
||||
'codegen/nv50_ir_target_gv100.h',
|
||||
'codegen/nv50_ir_target_gm107.cpp',
|
||||
'codegen/nv50_ir_target_gm107.h',
|
||||
'codegen/nv50_ir_target_nvc0.cpp',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue