ir3: add ldg.a,stg.a which allow complex in-place offset calculation

The full form for ldg.a/stg.a offset is:
 g[reg_address + reg_offset << (imm_shift + 2) + imm_offset << 2]

where imm_shift is in [0, 3] and imm_offset is in [0, 3]

a6xx blob was found to produce a bit simplier offset calculations
for TES/TCS shaders in GTA V:

 [c002000a_03c14215] ldg.a.f32 r2.z, g[r1.y+((r2.z+1)<<2)], 3;
 [c0020004_01c14609] ldg.a.f32 r1.x, g[r1.y+((r1.x+3)<<2)], 1;

Our new syntax:
 stg.a.u32 g[r2.x+(r1.x+1)<<2], r5.x, 1
 stg.a.u32 g[r2.x+r1.x<<4+3<<2], r5.x, 1
 ldg.a.f32 r1.w, g[r1.y+(r1.w+1)<<2], 3
 ldg.a.f32 r1.w, g[r1.y+r1.w<<5+2<<2], 3

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11431>
This commit is contained in:
Danylo Piliaiev 2021-06-16 14:43:19 +03:00 committed by Marge Bot
parent 4b06db0548
commit fdc0f489e0
14 changed files with 327 additions and 126 deletions

View file

@ -0,0 +1,17 @@
@localsize 16, 1, 1
@buf 128 (c2.x) ; c2.xy
@invocationid(r0.x) ; r0.xyz
mov.u32u32 r0.y, r0.x
mov.u32u32 r1.x, c2.x
mov.u32u32 r1.y, c2.y
mov.u32u32 r2.x, 0xff
(rpt5)nop
stg.a.u32 g[r1.x+r0.y<<4+2<<2], r2.x, 1
nop(sy)
ldg.a.u32 r4.x, g[r1.x+r0.y<<4+2<<2], 1
nop(sy)
add.u r4.x, r4.x, 1
(rpt3)nop
stg.a.u32 g[r1.x+r0.y<<4+1<<2], r4.x, 1
end
nop

View file

@ -303,9 +303,11 @@ static const struct opc_info {
/* category 6: */
OPC(6, OPC_LDG, ldg),
OPC(6, OPC_LDG_A, ldg.a),
OPC(6, OPC_LDL, ldl),
OPC(6, OPC_LDP, ldp),
OPC(6, OPC_STG, stg),
OPC(6, OPC_STG_A, stg.a),
OPC(6, OPC_STL, stl),
OPC(6, OPC_STP, stp),
OPC(6, OPC_LDIB, ldib),

View file

@ -291,6 +291,9 @@ typedef enum {
OPC_ATOMIC_B_OR = _OPC(6, 53),
OPC_ATOMIC_B_XOR = _OPC(6, 54),
OPC_LDG_A = _OPC(6, 55),
OPC_STG_A = _OPC(6, 56),
/* category 7: */
OPC_BAR = _OPC(7, 0),
OPC_FENCE = _OPC(7, 1),

View file

@ -817,7 +817,7 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n,
* but for load instructions this arg is the address (and not
* really sure any good way to test a hard-coded immed addr src)
*/
if (is_store(instr) && (n == 1))
if (is_store(instr) && (instr->opc != OPC_STG) && (n == 1))
return false;
if ((instr->opc == OPC_LDL) && (n == 0))
@ -847,7 +847,10 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n,
if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
return false;
if (instr->opc == OPC_STG && (instr->flags & IR3_INSTR_G) && (n != 2))
if (instr->opc == OPC_STG && (n == 2))
return false;
if (instr->opc == OPC_STG_A && (n == 4))
return false;
/* as with atomics, these cat6 instrs can only have an immediate

View file

@ -827,6 +827,7 @@ is_store(struct ir3_instruction *instr)
*/
switch (instr->opc) {
case OPC_STG:
case OPC_STG_A:
case OPC_STGB:
case OPC_STIB:
case OPC_STP:
@ -844,6 +845,7 @@ static inline bool is_load(struct ir3_instruction *instr)
{
switch (instr->opc) {
case OPC_LDG:
case OPC_LDG_A:
case OPC_LDGB:
case OPC_LDIB:
case OPC_LDL:
@ -1731,6 +1733,54 @@ ir3_##name(struct ir3_block *block, \
#define INSTR4F(f, name) __INSTR4(IR3_INSTR_##f, name##_##f, OPC_##name)
#define INSTR4(name) __INSTR4(0, name, OPC_##name)
#define __INSTR5(flag, name, opc) \
static inline struct ir3_instruction * \
ir3_##name(struct ir3_block *block, \
struct ir3_instruction *a, unsigned aflags, \
struct ir3_instruction *b, unsigned bflags, \
struct ir3_instruction *c, unsigned cflags, \
struct ir3_instruction *d, unsigned dflags, \
struct ir3_instruction *e, unsigned eflags) \
{ \
struct ir3_instruction *instr = \
ir3_instr_create(block, opc, 1, 5); \
__ssa_dst(instr); \
__ssa_src(instr, a, aflags); \
__ssa_src(instr, b, bflags); \
__ssa_src(instr, c, cflags); \
__ssa_src(instr, d, dflags); \
__ssa_src(instr, e, eflags); \
instr->flags |= flag; \
return instr; \
}
#define INSTR5F(f, name) __INSTR5(IR3_INSTR_##f, name##_##f, OPC_##name)
#define INSTR5(name) __INSTR5(0, name, OPC_##name)
#define __INSTR6(flag, name, opc) \
static inline struct ir3_instruction * \
ir3_##name(struct ir3_block *block, \
struct ir3_instruction *a, unsigned aflags, \
struct ir3_instruction *b, unsigned bflags, \
struct ir3_instruction *c, unsigned cflags, \
struct ir3_instruction *d, unsigned dflags, \
struct ir3_instruction *e, unsigned eflags, \
struct ir3_instruction *f, unsigned fflags) \
{ \
struct ir3_instruction *instr = \
ir3_instr_create(block, opc, 1, 6); \
__ssa_dst(instr); \
__ssa_src(instr, a, aflags); \
__ssa_src(instr, b, bflags); \
__ssa_src(instr, c, cflags); \
__ssa_src(instr, d, dflags); \
__ssa_src(instr, e, eflags); \
__ssa_src(instr, f, fflags); \
instr->flags |= flag; \
return instr; \
}
#define INSTR6F(f, name) __INSTR6(IR3_INSTR_##f, name##_##f, OPC_##name)
#define INSTR6(name) __INSTR6(0, name, OPC_##name)
/* cat0 instructions: */
INSTR1(B)
INSTR0(JUMP)
@ -1872,7 +1922,7 @@ INSTR3(LDG)
INSTR3(LDL)
INSTR3(LDLW)
INSTR3(LDP)
INSTR3(STG)
INSTR4(STG)
INSTR3(STL)
INSTR3(STLW)
INSTR3(STP)
@ -1893,6 +1943,8 @@ INSTR2(LDC)
#if GPU >= 600
INSTR3(STIB);
INSTR2(LDIB);
INSTR5(LDG_A);
INSTR6(STG_A);
INSTR3F(G, ATOMIC_ADD)
INSTR3F(G, ATOMIC_SUB)
INSTR3F(G, ATOMIC_XCHG)
@ -1921,8 +1973,6 @@ INSTR4F(G, ATOMIC_OR)
INSTR4F(G, ATOMIC_XOR)
#endif
INSTR4F(G, STG)
/* cat7 instructions: */
INSTR0(BAR)
INSTR0(FENCE)

View file

@ -357,4 +357,6 @@ const struct ir3_context_funcs ir3_a4xx_funcs = {
.emit_intrinsic_store_image = emit_intrinsic_store_image,
.emit_intrinsic_atomic_image = emit_intrinsic_atomic_image,
.emit_intrinsic_image_size = emit_intrinsic_image_size_tex,
.emit_intrinsic_load_global_ir3 = NULL,
.emit_intrinsic_store_global_ir3 = NULL,
};

View file

@ -371,6 +371,68 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
ir3_split_dest(b, dst, resinfo, 0, intr->num_components);
}
static void
emit_intrinsic_load_global_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction **dst)
{
struct ir3_block *b = ctx->block;
unsigned dest_components = nir_intrinsic_dest_components(intr);
struct ir3_instruction *addr, *offset;
addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){
ir3_get_src(ctx, &intr->src[0])[0],
ir3_get_src(ctx, &intr->src[0])[1]
}, 2);
offset = ir3_get_src(ctx, &intr->src[1])[0];
struct ir3_instruction *load =
ir3_LDG_A(b, addr, 0, offset, 0,
create_immed(b, 0), 0,
create_immed(b, 0), 0,
create_immed(b, dest_components), 0);
load->cat6.type = TYPE_U32;
load->dsts[0]->wrmask = MASK(dest_components);
load->barrier_class = IR3_BARRIER_BUFFER_R;
load->barrier_conflict = IR3_BARRIER_BUFFER_W;
ir3_split_dest(b, dst, load, 0, dest_components);
}
static void
emit_intrinsic_store_global_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr)
{
struct ir3_block *b = ctx->block;
struct ir3_instruction *value, *addr, *offset;
unsigned ncomp = nir_intrinsic_src_components(intr, 0);
addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){
ir3_get_src(ctx, &intr->src[1])[0],
ir3_get_src(ctx, &intr->src[1])[1]
}, 2);
offset = ir3_get_src(ctx, &intr->src[2])[0];
value = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp);
struct ir3_instruction *stg =
ir3_STG_A(b,
addr, 0,
offset, 0,
create_immed(b, 0), 0,
create_immed(b, 0), 0,
value, 0,
create_immed(b, ncomp), 0);
stg->cat6.type = TYPE_U32;
stg->cat6.iim_val = 1;
array_insert(b, b->keeps, stg);
stg->barrier_class = IR3_BARRIER_BUFFER_W;
stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
}
const struct ir3_context_funcs ir3_a6xx_funcs = {
.emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo,
.emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo,
@ -379,5 +441,7 @@ const struct ir3_context_funcs ir3_a6xx_funcs = {
.emit_intrinsic_store_image = emit_intrinsic_store_image,
.emit_intrinsic_atomic_image = emit_intrinsic_atomic_image,
.emit_intrinsic_image_size = emit_intrinsic_image_size,
.emit_intrinsic_load_global_ir3 = emit_intrinsic_load_global_ir3,
.emit_intrinsic_store_global_ir3 = emit_intrinsic_store_global_ir3,
};

View file

@ -1735,54 +1735,12 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
end->barrier_conflict = IR3_BARRIER_EVERYTHING;
break;
case nir_intrinsic_store_global_ir3: {
struct ir3_instruction *value, *addr, *offset;
unsigned ncomp = nir_intrinsic_src_components(intr, 0);
addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){
ir3_get_src(ctx, &intr->src[1])[0],
ir3_get_src(ctx, &intr->src[1])[1]
}, 2);
offset = ir3_get_src(ctx, &intr->src[2])[0];
value = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp);
struct ir3_instruction *stg =
ir3_STG_G(ctx->block, addr, 0, value, 0,
create_immed(ctx->block, ncomp), 0, offset, 0);
stg->cat6.type = TYPE_U32;
stg->cat6.iim_val = 1;
array_insert(b, b->keeps, stg);
stg->barrier_class = IR3_BARRIER_BUFFER_W;
stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
case nir_intrinsic_store_global_ir3:
ctx->funcs->emit_intrinsic_store_global_ir3(ctx, intr);
break;
}
case nir_intrinsic_load_global_ir3: {
struct ir3_instruction *addr, *offset;
addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){
ir3_get_src(ctx, &intr->src[0])[0],
ir3_get_src(ctx, &intr->src[0])[1]
}, 2);
offset = ir3_get_src(ctx, &intr->src[1])[0];
struct ir3_instruction *load =
ir3_LDG(b, addr, 0, offset, 0,
create_immed(ctx->block, dest_components), 0);
load->cat6.type = TYPE_U32;
load->dsts[0]->wrmask = MASK(dest_components);
load->barrier_class = IR3_BARRIER_BUFFER_R;
load->barrier_conflict = IR3_BARRIER_BUFFER_W;
ir3_split_dest(b, dst, load, 0, dest_components);
case nir_intrinsic_load_global_ir3:
ctx->funcs->emit_intrinsic_load_global_ir3(ctx, intr, dst);
break;
}
case nir_intrinsic_load_ubo:
emit_intrinsic_load_ubo(ctx, intr, dst);
@ -3085,10 +3043,12 @@ emit_stream_out(struct ir3_context *ctx)
base = bases[strmout->output[i].output_buffer];
out = ctx->outputs[regid(strmout->output[i].register_index, c)];
stg = ir3_STG(ctx->block, base, 0, out, 0,
create_immed(ctx->block, 1), 0);
stg = ir3_STG(ctx->block,
base, 0,
create_immed(ctx->block, (strmout->output[i].dst_offset + j) * 4), 0,
out, 0,
create_immed(ctx->block, 1), 0);
stg->cat6.type = TYPE_U32;
stg->cat6.dst_offset = (strmout->output[i].dst_offset + j) * 4;
array_insert(ctx->block, ctx->block->keeps, stg);
}

View file

@ -165,6 +165,9 @@ struct ir3_context_funcs {
struct ir3_instruction * (*emit_intrinsic_atomic_image)(struct ir3_context *ctx, nir_intrinsic_instr *intr);
void (*emit_intrinsic_image_size)(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction **dst);
void (*emit_intrinsic_load_global_ir3)(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction **dst);
void (*emit_intrinsic_store_global_ir3)(struct ir3_context *ctx, nir_intrinsic_instr *intr);
};
extern const struct ir3_context_funcs ir3_a4xx_funcs;

View file

@ -296,9 +296,11 @@ static int parse_w(const char *str)
/* category 6: */
"ldg" return TOKEN(T_OP_LDG);
"ldg.a" return TOKEN(T_OP_LDG_A);
"ldl" return TOKEN(T_OP_LDL);
"ldp" return TOKEN(T_OP_LDP);
"stg" return TOKEN(T_OP_STG);
"stg.a" return TOKEN(T_OP_STG_A);
"stl" return TOKEN(T_OP_STL);
"stp" return TOKEN(T_OP_STP);
"ldib" return TOKEN(T_OP_LDIB);

View file

@ -92,7 +92,7 @@ static void new_label(const char *name)
static struct ir3_instruction * new_instr(opc_t opc)
{
instr = ir3_instr_create(block, opc, 4, 4);
instr = ir3_instr_create(block, opc, 4, 6);
instr->flags = iflags.flags;
instr->repeat = iflags.repeat;
instr->nop = iflags.nop;
@ -525,9 +525,11 @@ static void print_token(FILE *file, int type, YYSTYPE value)
/* category 6: */
%token <tok> T_OP_LDG
%token <tok> T_OP_LDG_A
%token <tok> T_OP_LDL
%token <tok> T_OP_LDP
%token <tok> T_OP_STG
%token <tok> T_OP_STG_A
%token <tok> T_OP_STL
%token <tok> T_OP_STP
%token <tok> T_OP_LDIB
@ -995,33 +997,40 @@ cat6_dim: '.' T_1D { instr->cat6.d = 1; }
| '.' T_4D { instr->cat6.d = 4; }
cat6_type: '.' type { instr->cat6.type = $2; }
cat6_offset: offset { new_src(0, IR3_REG_IMMED)->iim_val = $1; }
cat6_imm_offset: offset { new_src(0, IR3_REG_IMMED)->iim_val = $1; }
cat6_offset: cat6_imm_offset
| '+' src
cat6_dst_offset: offset { instr->cat6.dst_offset = $1; }
| '+' src { instr->flags |= IR3_INSTR_G; }
cat6_immed: integer { instr->cat6.iim_val = $1; }
cat6_load: T_OP_LDG { new_instr(OPC_LDG); } cat6_type dst_reg ',' 'g' '[' src cat6_offset ']' ',' immediate
| T_OP_LDP { new_instr(OPC_LDP); } cat6_type dst_reg ',' 'p' '[' src cat6_offset ']' ',' immediate
| T_OP_LDL { new_instr(OPC_LDL); } cat6_type dst_reg ',' 'l' '[' src cat6_offset ']' ',' immediate
| T_OP_LDLW { new_instr(OPC_LDLW); } cat6_type dst_reg ',' 'l' '[' src cat6_offset ']' ',' immediate
| T_OP_LDLV { new_instr(OPC_LDLV); } cat6_type dst_reg ',' 'l' '[' integer ']' {
cat6_stg_ldg_a6xx_offset:
'+' '(' src offset ')' '<' '<' integer {
assert($8 == 2);
new_src(0, IR3_REG_IMMED)->uim_val = 0;
new_src(0, IR3_REG_IMMED)->uim_val = $4;
}
| '+' src '<' '<' integer offset '<' '<' integer {
assert($9 == 2);
new_src(0, IR3_REG_IMMED)->uim_val = $5 - 2;
new_src(0, IR3_REG_IMMED)->uim_val = $6;
}
cat6_load: T_OP_LDG { new_instr(OPC_LDG); } cat6_type dst_reg ',' 'g' '[' src cat6_offset ']' ',' immediate
| T_OP_LDG_A { new_instr(OPC_LDG_A); } cat6_type dst_reg ',' 'g' '[' src cat6_stg_ldg_a6xx_offset ']' ',' immediate
| T_OP_LDP { new_instr(OPC_LDP); } cat6_type dst_reg ',' 'p' '[' src cat6_offset ']' ',' immediate
| T_OP_LDL { new_instr(OPC_LDL); } cat6_type dst_reg ',' 'l' '[' src cat6_offset ']' ',' immediate
| T_OP_LDLW { new_instr(OPC_LDLW); } cat6_type dst_reg ',' 'l' '[' src cat6_offset ']' ',' immediate
| T_OP_LDLV { new_instr(OPC_LDLV); } cat6_type dst_reg ',' 'l' '[' integer ']' {
new_src(0, IR3_REG_IMMED)->iim_val = $8;
} ',' immediate
// TODO some of the cat6 instructions have different syntax for a6xx..
//| T_OP_LDIB { new_instr(OPC_LDIB); } cat6_type dst_reg cat6_offset ',' reg ',' cat6_immed
cat6_store: T_OP_STG { new_instr(OPC_STG); dummy_dst(); } cat6_type 'g' '[' src cat6_dst_offset ']' ',' src ',' immediate {
/* fixup src order, the offset reg is expected last currently */
if (instr->flags & IR3_INSTR_G) {
struct ir3_register *offset = instr->srcs[1];
instr->srcs[1] = instr->srcs[2];
instr->srcs[2] = instr->srcs[3];
instr->srcs[3] = offset;
}
}
cat6_store: T_OP_STG { new_instr(OPC_STG); dummy_dst(); } cat6_type 'g' '[' src cat6_imm_offset ']' ',' src ',' immediate
| T_OP_STG_A { new_instr(OPC_STG_A); dummy_dst(); } cat6_type 'g' '[' src cat6_stg_ldg_a6xx_offset ']' ',' src ',' immediate
| T_OP_STP { new_instr(OPC_STP); dummy_dst(); } cat6_type 'p' '[' src cat6_dst_offset ']' ',' src ',' immediate
| T_OP_STL { new_instr(OPC_STL); dummy_dst(); } cat6_type 'l' '[' src cat6_dst_offset ']' ',' src ',' immediate
| T_OP_STLW { new_instr(OPC_STLW); dummy_dst(); } cat6_type 'l' '[' src cat6_dst_offset ']' ',' src ',' immediate

View file

@ -247,6 +247,18 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr)
validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
break;
case OPC_STG:
validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
validate_assert(ctx, !(instr->srcs[1]->flags & IR3_REG_HALF));
validate_reg_size(ctx, instr->srcs[2], instr->cat6.type);
validate_assert(ctx, !(instr->srcs[3]->flags & IR3_REG_HALF));
break;
case OPC_STG_A:
validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
validate_assert(ctx, !(instr->srcs[2]->flags & IR3_REG_HALF));
validate_assert(ctx, !(instr->srcs[3]->flags & IR3_REG_HALF));
validate_reg_size(ctx, instr->srcs[4], instr->cat6.type);
validate_assert(ctx, !(instr->srcs[5]->flags & IR3_REG_HALF));
break;
case OPC_STL:
case OPC_STP:
case OPC_STLW:

View file

@ -153,20 +153,30 @@ static const struct test {
// TODO is this a real instruction? Or float -6.0 ?
// INSTR_6XX(c0c00000_00000000, "stg.f16 g[hr0.x], hr0.x, hr0.x", .parse_fail=true),
/* dEQP-GLES31.functional.tessellation.invariance.outer_edge_symmetry.isolines_equal_spacing_ccw */
INSTR_6XX(c0d20906_02800004, "stg.f32 g[r1.x+r1.z], r0.z, 2"), /* stg.a.f32 g[r1.x+(r1.z<<2)], r0.z, 2 */
INSTR_6XX(c0da052e_01800042, "stg.s32 g[r0.z+r11.z], r8.y, 1"), /* stg.a.s32 g[r0.z+(r11.z<<2)], r8.y, 1 */
INSTR_6XX(c0d20906_02800004, "stg.a.f32 g[r1.x+(r1.z)<<2], r0.z, 2"), /* stg.a.f32 g[r1.x+(r1.z<<2)], r0.z, 2 */
INSTR_6XX(c0da052e_01800042, "stg.a.s32 g[r0.z+(r11.z)<<2], r8.y, 1"), /* stg.a.s32 g[r0.z+(r11.z<<2)], r8.y, 1 */
INSTR_6XX(c0ca0505_03800042, "stg.s32 g[r0.z+5], r8.y, 3"),
INSTR_6XX(c0ca0500_03800042, "stg.s32 g[r0.z], r8.y, 3"),
INSTR_6XX(c0ca0531_03800242, "stg.s32 g[r0.z+305], r8.y, 3"),
INSTR_6XX(c0020011_04c08023, "ldg.f32 r4.y, g[r0.z+r4.y], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
INSTR_6XX(c0060006_01c18017, "ldg.u32 r1.z, g[r1.z+r2.w], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
/* Customely crafted */
INSTR_6XX(c0d61104_01800228, "stg.a.u32 g[r2.x+(r1.x+1)<<2], r5.x, 1"),
INSTR_6XX(c0d61104_01802628, "stg.a.u32 g[r2.x+r1.x<<4+3<<2], r5.x, 1"),
INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"),
INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"),
INSTR_6XX(c0060003_0180c269, "ldg.u32 r0.w, g[r0.w+308], 1"),
INSTR_6XX(c0020011_04c08023, "ldg.f32 r4.y, g[r0.z+r4.y], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
INSTR_6XX(c0060006_01c18017, "ldg.u32 r1.z, g[r1.z+r2.w], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
/* Found in TCS/TES shaders of GTA V */
INSTR_6XX(c0020007_03c1420f, "ldg.a.f32 r1.w, g[r1.y+(r1.w+1)<<2], 3"), /* ldg.a.f32 r1.w, g[r1.y+((r1.w+1)<<2)], 3 */
/* Customely crafted */
INSTR_6XX(c0020007_03c1740f, "ldg.a.f32 r1.w, g[r1.y+r1.w<<5+2<<2], 3"),
INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"),
INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"),

View file

@ -42,84 +42,148 @@ SOFTWARE.
<!-- TODO pull more fields up to this level, when they are common across sub-encodings -->
</bitset>
<bitset name="ldg" extends="#instruction-cat6-a3xx">
<doc>
LoaD Global
</doc>
<display>
{SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}+{SRC2}], {SIZE}
</display>
<override>
<display>
{SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}{OFF}], {SIZE}
</display>
<expr>!{SRC2_REG}</expr>
<field low="1" high="13" name="OFF" type="offset"/>
</override>
<bitset name="#instruction-cat6-ldg" extends="#instruction-cat6-a3xx">
<pattern pos="0" >1</pattern>
<field low="1" high="8" name="SRC2" type="#reg-gpr"/>
<assert low="9" high="13">00000</assert>
<field low="14" high="21" name="SRC1" type="#reg-gpr"/>
<field pos="22" name="SRC2_REG" type="bool"/>
<pattern pos="23" >1</pattern>
<field low="24" high="31" name="SIZE" type="uint"/>
<field low="32" high="39" name="DST" type="#reg-gpr"/>
<pattern low="40" high="48">xxxxxxxxx</pattern>
<pattern low="52" high="53">00</pattern>
<pattern low="54" high="58">00000</pattern> <!-- OPC -->
</bitset>
<bitset name="ldg" extends="#instruction-cat6-ldg">
<doc>
LoaD Global
</doc>
<display>
{SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}{OFF}], {SIZE}
</display>
<field low="1" high="13" name="OFF" type="offset"/>
<pattern pos="22" >0</pattern> <!-- Imm offset ldg form -->
<encode>
<map name="SRC2_REG">!(src->srcs[1]->flags &amp; IR3_REG_IMMED)</map>
<map name="SRC2">src->srcs[1]</map>
<map name="OFF">src->srcs[1]->iim_val</map>
<map name="SIZE">src->srcs[2]->uim_val</map>
</encode>
</bitset>
<bitset name="stg" extends="#instruction-cat6-a3xx">
<bitset name="ldg.a" extends="#instruction-cat6-ldg">
<doc>
STore Global
LoaD Global
</doc>
<gen min="600"/>
<display>
{SY}{JP}{NAME}.{TYPE} g[{SRC1}+{SRC2}], {SRC3}, {SIZE}
{SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}+({SRC2}{OFF})&lt;&lt;{SRC2_BYTE_SHIFT}], {SIZE}
</display>
<override>
<display>
{SY}{JP}{NAME}.{TYPE} g[{SRC1}{OFF}], {SRC3}, {SIZE}
{SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}+{SRC2}&lt;&lt;{SRC2_BYTE_SHIFT}{OFF}&lt;&lt;2], {SIZE}
</display>
<expr>!{G}</expr>
<derived name="OFF" width="13" type="offset">
<expr>({OFF_HI} &lt;&lt; 8) | {OFF_LO}</expr>
</derived>
<field low="9" high="13" name="OFF_HI" type="uint"/>
<field low="32" high="39" name="OFF_LO" type="uint"/>
<expr>{SRC2_ADD_DWORD_SHIFT} > 0</expr>
</override>
<pattern pos="0" >x</pattern>
<field low="1" high="8" name="SRC3" type="#reg-gpr"/>
<assert low="9" high="13">00000</assert> <!-- OFF_HI -->
<pattern low="14" high="21">xxxxxxxx</pattern>
<pattern low="22" high="23">1x</pattern>
<field low="24" high="31" name="SIZE" type="uint"/>
<field low="32" high="39" name="SRC2" type="#reg-gpr"/>
<field pos="40" name="DST_OFF" type="bool"/>
<field low="41" high="48" name="SRC1" type="#reg-gpr"/>
<field pos="52" name="G" type="bool"/>
<pattern pos="53" >x</pattern>
<pattern low="54" high="58">00011</pattern> <!-- OPC -->
<field low="1" high="8" name="SRC2" type="#reg-gpr"/>
<field low="9" high="10" name="OFF" type="uoffset"/>
<assert pos="11" >0</assert>
<field low="12" high="13" name="SRC2_ADD_DWORD_SHIFT" type="uint"/>
<pattern pos="22" >1</pattern> <!-- Reg offset ldg form -->
<derived name="SRC2_BYTE_SHIFT" width="3" type="uint">
<expr>{SRC2_ADD_DWORD_SHIFT} + 2</expr>
</derived>
<encode>
<map name="SIZE">src->srcs[2]->uim_val</map>
<map name="SRC2">src->srcs[3]</map>
<map name="DST_OFF" force="true">1</map>
<map name="SRC3">src->srcs[1]</map>
<map name="G">(src->flags &amp; IR3_INSTR_G) &amp;&amp; !(src->srcs[3]->flags &amp; IR3_REG_IMMED)</map>
<map name="OFF_LO">src->cat6.dst_offset</map>
<map name="OFF_HI">src->cat6.dst_offset >> 8</map>
<map name="SRC2">src->srcs[1]</map>
<map name="SRC2_ADD_DWORD_SHIFT">src->srcs[2]->uim_val</map>
<map name="OFF">src->srcs[3]->uim_val</map>
<map name="SIZE">src->srcs[4]->uim_val</map>
</encode>
</bitset>
<bitset name="#instruction-cat6-stg" extends="#instruction-cat6-a3xx">
<pattern pos="0" >x</pattern>
<field low="1" high="8" name="SRC3" type="#reg-gpr"/>
<pattern low="14" high="21">xxxxxxxx</pattern>
<pattern low="22" high="23">1x</pattern>
<field low="24" high="31" name="SIZE" type="uint"/>
<field pos="40" name="DST_OFF" type="bool"/>
<field low="41" high="48" name="SRC1" type="#reg-gpr"/>
<pattern pos="53" >x</pattern>
<pattern low="54" high="58">00011</pattern> <!-- OPC -->
<encode>
<map name="DST_OFF" force="true">1</map>
</encode>
</bitset>
<bitset name="stg" extends="#instruction-cat6-stg">
<doc>
STore Global
</doc>
<display>
{SY}{JP}{NAME}.{TYPE} g[{SRC1}{OFF}], {SRC3}, {SIZE}
</display>
<derived name="OFF" width="13" type="offset">
<expr>({OFF_HI} &lt;&lt; 8) | {OFF_LO}</expr>
</derived>
<field low="9" high="13" name="OFF_HI" type="uint"/>
<field low="32" high="39" name="OFF_LO" type="uint"/>
<pattern pos="52" >0</pattern> <!-- Imm offset stg form -->
<encode>
<map name="OFF_LO">src->srcs[1]->iim_val</map>
<map name="OFF_HI">src->srcs[1]->iim_val >> 8</map>
<map name="SRC3">src->srcs[2]</map>
<map name="SIZE">src->srcs[3]->uim_val</map>
</encode>
</bitset>
<bitset name="stg.a" extends="#instruction-cat6-stg">
<doc>
STore Global
</doc>
<gen min="600"/>
<display>
{SY}{JP}{NAME}.{TYPE} g[{SRC1}+({SRC2}{OFF})&lt;&lt;{DST_BYTE_SHIFT}], {SRC3}, {SIZE}
</display>
<override>
<display>
{SY}{JP}{NAME}.{TYPE} g[{SRC1}+{SRC2}&lt;&lt;{DST_BYTE_SHIFT}{OFF}&lt;&lt;2], {SRC3}, {SIZE}
</display>
<expr>{SRC2_ADD_DWORD_SHIFT} > 0</expr>
</override>
<derived name="DST_BYTE_SHIFT" width="3" type="uint">
<expr>{SRC2_ADD_DWORD_SHIFT} + 2</expr>
</derived>
<field low="9" high="10" name="OFF" type="uoffset"/>
<assert pos="11" >0</assert>
<field low="12" high="13" name="SRC2_ADD_DWORD_SHIFT" type="uint"/>
<field low="32" high="39" name="SRC2" type="#reg-gpr"/>
<pattern pos="52" >1</pattern> <!-- Reg offset stg form -->
<encode>
<map name="SRC2">src->srcs[1]</map>
<map name="SRC2_ADD_DWORD_SHIFT">src->srcs[2]->uim_val</map>
<map name="OFF">src->srcs[3]->uim_val</map>
<map name="SRC3">src->srcs[4]</map>
<map name="SIZE">src->srcs[5]->uim_val</map>
</encode>
</bitset>
<bitset name="#instruction-cat6-a3xx-ld" extends="#instruction-cat6-a3xx">
<pattern pos="0" >1</pattern>