ir3: Add bindless instruction encoding

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
This commit is contained in:
Connor Abbott 2020-03-06 18:06:06 +01:00 committed by Marge Bot
parent 122a900d7d
commit c8b0f90439
3 changed files with 275 additions and 101 deletions

View file

@ -117,7 +117,10 @@ static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full, bool r,
else
fprintf(ctx->out, "%s%c<a0.x>", full ? "" : "h", type);
} else if ((reg.num == REG_A0) && !c) {
fprintf(ctx->out, "a0.%c", component[reg.comp]);
/* This matches libllvm output, the second (scalar) address register
* seems to be called a1.x instead of a0.y.
*/
fprintf(ctx->out, "a%d.x", reg.comp);
} else if ((reg.num == REG_P0) && !c) {
fprintf(ctx->out, "p0.%c", component[reg.comp]);
} else {
@ -448,15 +451,70 @@ static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr)
[opc_op(OPC_RGETPOS)] = { true, false, false, false, },
[opc_op(OPC_RGETINFO)] = { false, false, false, false, },
};
static const struct {
bool indirect;
bool bindless;
bool use_a1;
bool uniform;
} desc_features[8] = {
[CAT5_NONUNIFORM] = { .indirect = true, },
[CAT5_UNIFORM] = { .indirect = true, .uniform = true, },
[CAT5_BINDLESS_IMM] = { .bindless = true, },
[CAT5_BINDLESS_UNIFORM] = {
.bindless = true,
.indirect = true,
.uniform = true,
},
[CAT5_BINDLESS_NONUNIFORM] = {
.bindless = true,
.indirect = true,
},
[CAT5_BINDLESS_A1_IMM] = {
.bindless = true,
.use_a1 = true,
},
[CAT5_BINDLESS_A1_UNIFORM] = {
.bindless = true,
.indirect = true,
.uniform = true,
.use_a1 = true,
},
[CAT5_BINDLESS_A1_NONUNIFORM] = {
.bindless = true,
.indirect = true,
.use_a1 = true,
},
};
instr_cat5_t *cat5 = &instr->cat5;
int i;
bool desc_indirect =
cat5->is_s2en_bindless &&
desc_features[cat5->s2en_bindless.desc_mode].indirect;
bool bindless =
cat5->is_s2en_bindless &&
desc_features[cat5->s2en_bindless.desc_mode].bindless;
bool use_a1 =
cat5->is_s2en_bindless &&
desc_features[cat5->s2en_bindless.desc_mode].use_a1;
bool uniform =
cat5->is_s2en_bindless &&
desc_features[cat5->s2en_bindless.desc_mode].uniform;
if (cat5->is_3d) fprintf(ctx->out, ".3d");
if (cat5->is_a) fprintf(ctx->out, ".a");
if (cat5->is_o) fprintf(ctx->out, ".o");
if (cat5->is_p) fprintf(ctx->out, ".p");
if (cat5->is_s) fprintf(ctx->out, ".s");
if (cat5->is_s2en) fprintf(ctx->out, ".s2en");
if (desc_indirect) fprintf(ctx->out, ".s2en");
if (uniform) fprintf(ctx->out, ".uniform");
if (bindless) {
unsigned base = (cat5->s2en_bindless.base_hi << 1) | cat5->base_lo;
fprintf(ctx->out, ".base%d", base);
}
fprintf(ctx->out, " ");
@ -483,34 +541,47 @@ static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr)
false, false, false);
}
if (cat5->is_s2en) {
if (cat5->is_o || info[cat5->opc].src2) {
fprintf(ctx->out, ", ");
print_reg_src(ctx, (reg_t)(cat5->s2en.src2), cat5->full,
false, false, false, false, false, false);
}
if (cat5->is_o || info[cat5->opc].src2) {
fprintf(ctx->out, ", ");
print_reg_src(ctx, (reg_t)(cat5->s2en.src3), false, false, false, false,
false, false, false);
} else {
if (cat5->is_o || info[cat5->opc].src2) {
fprintf(ctx->out, ", ");
print_reg_src(ctx, (reg_t)(cat5->norm.src2), cat5->full,
false, false, false, false, false, false);
print_reg_src(ctx, (reg_t)(cat5->src2), cat5->full,
false, false, false, false, false, false);
}
if (cat5->is_s2en_bindless) {
if (!desc_indirect) {
if (info[cat5->opc].samp) {
if (use_a1)
fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3);
else
fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3 & 0xf);
}
if (info[cat5->opc].tex && !use_a1) {
fprintf(ctx->out, ", t#%d", cat5->s2en_bindless.src3 >> 4);
}
}
} else {
if (info[cat5->opc].samp)
fprintf(ctx->out, ", s#%d", cat5->norm.samp);
if (info[cat5->opc].tex)
fprintf(ctx->out, ", t#%d", cat5->norm.tex);
}
if (desc_indirect) {
fprintf(ctx->out, ", ");
print_reg_src(ctx, (reg_t)(cat5->s2en_bindless.src3), bindless,
false, false, false, false, false, false);
}
if (use_a1)
fprintf(ctx->out, ", a1.x");
if (debug & PRINT_VERBOSE) {
if (cat5->is_s2en) {
if ((debug & PRINT_VERBOSE) && (cat5->s2en.dummy1|cat5->s2en.dummy2|cat5->dummy2))
fprintf(ctx->out, "\t{5: %x,%x,%x}", cat5->s2en.dummy1, cat5->s2en.dummy2, cat5->dummy2);
if (cat5->is_s2en_bindless) {
if ((debug & PRINT_VERBOSE) && cat5->s2en_bindless.dummy1)
fprintf(ctx->out, "\t{5: %x}", cat5->s2en_bindless.dummy1);
} else {
if ((debug & PRINT_VERBOSE) && (cat5->norm.dummy1|cat5->dummy2))
fprintf(ctx->out, "\t{5: %x,%x}", cat5->norm.dummy1, cat5->dummy2);
if ((debug & PRINT_VERBOSE) && cat5->norm.dummy1)
fprintf(ctx->out, "\t{5: %x}", cat5->norm.dummy1);
}
}
}
@ -833,46 +904,66 @@ static void print_instr_cat6_a3xx(struct disasm_ctx *ctx, instr_t *instr)
static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr)
{
instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
struct reginfo src1, src2;
bool has_dest = _OPC(6, cat6->opc) == OPC_LDIB;
char ss = 0;
struct reginfo src1, src2, ssbo;
bool uses_type = _OPC(6, cat6->opc) != OPC_LDC;
static const struct {
bool indirect;
bool bindless;
bool uniform;
} desc_features[8] = {
[CAT6_IMM] = { false },
[CAT6_BINDLESS_IMM] = { .bindless = true, },
[CAT6_BINDLESS_UNIFORM] = {
.bindless = true,
.indirect = true,
.uniform = true,
},
[CAT6_BINDLESS_NONUNIFORM] = {
.bindless = true,
.indirect = true,
},
};
bool indirect_ssbo = desc_features[cat6->desc_mode].indirect;
bool bindless = desc_features[cat6->desc_mode].bindless;
bool uniform = desc_features[cat6->desc_mode].uniform;
memset(&src1, 0, sizeof(src1));
memset(&src2, 0, sizeof(src2));
memset(&ssbo, 0, sizeof(ssbo));
fprintf(ctx->out, ".%s", cat6->typed ? "typed" : "untyped");
fprintf(ctx->out, ".%dd", cat6->d + 1);
fprintf(ctx->out, ".%s", type[cat6->type]);
fprintf(ctx->out, ".%u ", cat6->type_size + 1);
if (has_dest) {
src2.reg = (reg_t)(cat6->src2);
src2.full = true; // XXX
print_src(ctx, &src2);
fprintf(ctx->out, ", ");
if (uses_type) {
fprintf(ctx->out, ".%s", cat6->typed ? "typed" : "untyped");
fprintf(ctx->out, ".%dd", cat6->d + 1);
fprintf(ctx->out, ".%s", type[cat6->type]);
}
fprintf(ctx->out, ".%u", cat6->type_size + 1);
/* NOTE: blob seems to use old encoding for ldl/stl (local memory) */
ss = 'g';
if (bindless)
fprintf(ctx->out, ".base%d", cat6->base);
if (uniform)
fprintf(ctx->out, ".uniform");
fprintf(ctx->out, " ");
src2.reg = (reg_t)(cat6->src2);
src2.full = true; // XXX
print_src(ctx, &src2);
fprintf(ctx->out, ", ");
fprintf(ctx->out, "%c[%u", ss, cat6->ssbo);
fprintf(ctx->out, "] + ");
src1.reg = (reg_t)(cat6->src1);
src1.full = true; // XXX
print_src(ctx, &src1);
if (!has_dest) {
fprintf(ctx->out, ", ");
src2.reg = (reg_t)(cat6->src2);
src2.full = true; // XXX
print_src(ctx, &src2);
}
fprintf(ctx->out, ", ");
ssbo.reg = (reg_t)(cat6->ssbo);
ssbo.im = !indirect_ssbo;
ssbo.full = true;
print_src(ctx, &ssbo);
if (debug & PRINT_VERBOSE) {
fprintf(ctx->out, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x)", cat6->pad1,
cat6->pad2, cat6->pad3, cat6->pad4);
fprintf(ctx->out, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x, pad5=%x)",
cat6->pad1, cat6->pad2, cat6->pad3, cat6->pad4, cat6->pad5);
}
}

View file

@ -567,6 +567,57 @@ typedef struct PACKED {
uint32_t opc_cat : 3;
} instr_cat4_t;
/* With is_bindless_s2en = 1, this determines whether bindless is enabled and
* if so, how to get the (base, index) pair for both sampler and texture.
* There is a single base embedded in the instruction, which is always used
* for the texture.
*/
typedef enum {
/* Use traditional GL binding model, get texture and sampler index
* from src3 which is not presumed to be uniform. This is
* backwards-compatible with earlier generations, where this field was
* always 0 and nonuniform-indexed sampling always worked.
*/
CAT5_NONUNIFORM = 0,
/* The sampler base comes from the low 3 bits of a1.x, and the sampler
* and texture index come from src3 which is presumed to be uniform.
*/
CAT5_BINDLESS_A1_UNIFORM = 1,
/* The texture and sampler share the same base, and the sampler and
* texture index come from src3 which is *not* presumed to be uniform.
*/
CAT5_BINDLESS_NONUNIFORM = 2,
/* The sampler base comes from the low 3 bits of a1.x, and the sampler
* and texture index come from src3 which is *not* presumed to be
* uniform.
*/
CAT5_BINDLESS_A1_NONUNIFORM = 3,
/* Use traditional GL binding model, get texture and sampler index
* from src3 which is presumed to be uniform.
*/
CAT5_UNIFORM = 4,
/* The texture and sampler share the same base, and the sampler and
* texture index come from src3 which is presumed to be uniform.
*/
CAT5_BINDLESS_UNIFORM = 5,
/* The texture and sampler share the same base, get sampler index from low
* 4 bits of src3 and texture index from high 4 bits.
*/
CAT5_BINDLESS_IMM = 6,
/* The sampler base comes from the low 3 bits of a1.x, and the texture
* index comes from the next 8 bits of a1.x. The sampler index is an
* immediate in src3.
*/
CAT5_BINDLESS_A1_IMM = 7,
} cat5_desc_mode_t;
typedef struct PACKED {
/* dword0: */
union PACKED {
@ -581,39 +632,41 @@ typedef struct PACKED {
} norm;
/* s2en case: */
struct PACKED {
uint32_t full : 1; /* not half */
uint32_t src1 : 8;
uint32_t src2 : 11;
uint32_t dummy1 : 1;
uint32_t src3 : 8;
uint32_t dummy2 : 3;
} s2en;
uint32_t full : 1; /* not half */
uint32_t src1 : 8;
uint32_t src2 : 8;
uint32_t dummy1 : 2;
uint32_t base_hi : 2;
uint32_t src3 : 8;
uint32_t desc_mode : 3;
} s2en_bindless;
/* same in either case: */
// XXX I think, confirm this
struct PACKED {
uint32_t full : 1; /* not half */
uint32_t src1 : 8;
uint32_t pad : 23;
uint32_t src2 : 8;
uint32_t pad : 15;
};
};
/* dword1: */
uint32_t dst : 8;
uint32_t wrmask : 4; /* write-mask */
uint32_t type : 3;
uint32_t dummy2 : 1; /* seems to be ignored */
uint32_t is_3d : 1;
uint32_t dst : 8;
uint32_t wrmask : 4; /* write-mask */
uint32_t type : 3;
uint32_t base_lo : 1; /* used with bindless */
uint32_t is_3d : 1;
uint32_t is_a : 1;
uint32_t is_s : 1;
uint32_t is_s2en : 1;
uint32_t is_o : 1;
uint32_t is_p : 1;
uint32_t is_a : 1;
uint32_t is_s : 1;
uint32_t is_s2en_bindless : 1;
uint32_t is_o : 1;
uint32_t is_p : 1;
uint32_t opc : 5;
uint32_t jmp_tgt : 1;
uint32_t sync : 1;
uint32_t opc_cat : 3;
uint32_t opc : 5;
uint32_t jmp_tgt : 1;
uint32_t sync : 1;
uint32_t opc_cat : 3;
} instr_cat5_t;
/* dword0 encoding for src_off: [src1 + off], src2: */
@ -748,43 +801,72 @@ typedef union PACKED {
};
} instr_cat6_t;
/* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
*/
typedef enum {
/* Use old GL binding model with an immediate index.
* TODO: find CAT6_UNIFORM and CAT6_NONUNIFORM
*/
CAT6_IMM = 0,
/* Use the bindless model, with an immediate index.
*/
CAT6_BINDLESS_IMM = 4,
/* Use the bindless model, with a uniform register index.
*/
CAT6_BINDLESS_UNIFORM = 5,
/* Use the bindless model, with a register index that isn't guaranteed
* to be uniform. This presumably checks if the indices are equal and
* splits up the load/store, because it works the way you would
* expect.
*/
CAT6_BINDLESS_NONUNIFORM = 6,
} cat6_desc_mode_t;
/**
* For atomic ops (which return a value):
*
* pad1=1, pad2=c, pad3=0, pad4=3
* pad1=1, pad3=c, pad5=3
* src1 - vecN offset/coords
* src2.x - is actually dest register
* src2.y - is 'data' except for cmpxchg where src2.y is 'compare'
* and src2.z is 'data'
*
* For stib (which does not return a value):
* pad1=0, pad2=c, pad3=0, pad4=2
* pad1=0, pad3=c, pad5=2
* src1 - vecN offset/coords
* src2 - value to store
*
* For ldib:
* pad1=1, pad2=c, pad3=0, pad4=2
* pad1=1, pad3=c, pad5=2
* src1 - vecN offset/coords
*
* for ldc (load from UBO using descriptor):
* pad1=0, pad2=8, pad3=0, pad4=2
* pad1=0, pad3=8, pad5=2
*
* pad2 and pad5 are only observed to be 0.
*/
typedef struct PACKED {
/* dword0: */
uint32_t pad1 : 9;
uint32_t pad1 : 1;
uint32_t base : 3;
uint32_t pad2 : 2;
uint32_t desc_mode : 3;
uint32_t d : 2;
uint32_t typed : 1;
uint32_t type_size : 2;
uint32_t opc : 5;
uint32_t pad2 : 5;
uint32_t pad3 : 5;
uint32_t src1 : 8; /* coordinate/offset */
/* dword1: */
uint32_t src2 : 8; /* or the dst for load instructions */
uint32_t pad3 : 1; //mustbe0 ?? or zero means imm vs reg for ssbo??
uint32_t pad4 : 1; //mustbe0 ??
uint32_t ssbo : 8; /* ssbo/image binding point */
uint32_t type : 3;
uint32_t pad4 : 7;
uint32_t pad5 : 7;
uint32_t jmp_tgt : 1;
uint32_t sync : 1;
uint32_t opc_cat : 3;
@ -869,7 +951,7 @@ static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id)
* cmdstream traces I have indicates that the pad bit is zero
* in all cases. So we can use this to detect new encoding:
*/
if ((cat6->pad2 & 0x8) && (cat6->pad4 & 0x2)) {
if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) {
assert(gpu_id >= 600);
assert(instr->cat6.opc == 0);
return false;

View file

@ -482,20 +482,23 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr,
cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF);
}
if (src2) {
iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
cat5->src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
}
if (instr->flags & IR3_INSTR_S2EN) {
struct ir3_register *samp_tex = instr->regs[1];
if (src2) {
iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
}
iassert(samp_tex->flags & IR3_REG_HALF);
cat5->s2en.src3 = reg(samp_tex, info, instr->repeat, IR3_REG_HALF);
cat5->s2en_bindless.src3 = reg(samp_tex, info, instr->repeat, IR3_REG_HALF);
/* TODO: This should probably be CAT5_UNIFORM, at least on a6xx, as
* this is what the blob does and it is presumably faster, but first
* we should confirm it is actually nonuniform and figure out when the
* whole descriptor mode mechanism was introduced.
*/
cat5->s2en_bindless.desc_mode = CAT5_NONUNIFORM;
iassert(!(instr->cat5.samp | instr->cat5.tex));
} else {
if (src2) {
iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
}
cat5->norm.samp = instr->cat5.samp;
cat5->norm.tex = instr->cat5.tex;
}
@ -506,7 +509,7 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr,
cat5->is_3d = !!(instr->flags & IR3_INSTR_3D);
cat5->is_a = !!(instr->flags & IR3_INSTR_A);
cat5->is_s = !!(instr->flags & IR3_INSTR_S);
cat5->is_s2en = !!(instr->flags & IR3_INSTR_S2EN);
cat5->is_s2en_bindless = !!(instr->flags & IR3_INSTR_S2EN);
cat5->is_o = !!(instr->flags & IR3_INSTR_O);
cat5->is_p = !!(instr->flags & IR3_INSTR_P);
cat5->opc = instr->opc;
@ -564,31 +567,29 @@ static int emit_cat6_a6xx(struct ir3_instruction *instr, void *ptr,
case OPC_ATOMIC_OR:
case OPC_ATOMIC_XOR:
cat6->pad1 = 0x1;
cat6->pad2 = 0xc;
cat6->pad3 = 0x0;
cat6->pad4 = 0x3;
cat6->pad3 = 0xc;
cat6->pad5 = 0x3;
break;
case OPC_STIB:
cat6->pad1 = 0x0;
cat6->pad2 = 0xc;
cat6->pad3 = 0x0;
cat6->pad4 = 0x2;
cat6->pad3 = 0xc;
cat6->pad5 = 0x2;
break;
case OPC_LDIB:
cat6->pad1 = 0x1;
cat6->pad2 = 0xc;
cat6->pad3 = 0x0;
cat6->pad4 = 0x2;
cat6->pad3 = 0xc;
cat6->pad5 = 0x2;
break;
case OPC_LDC:
cat6->pad1 = 0x0;
cat6->pad2 = 0x8;
cat6->pad3 = 0x0;
cat6->pad4 = 0x2;
cat6->pad3 = 0x8;
cat6->pad5 = 0x2;
break;
default:
iassert(0);
}
cat6->pad2 = 0x0;
cat6->pad4 = 0x0;
return 0;
}