diff --git a/src/freedreno/computerator/examples/stg_ldg_offset.asm b/src/freedreno/computerator/examples/stg_ldg_offset.asm new file mode 100644 index 00000000000..53b379d47ff --- /dev/null +++ b/src/freedreno/computerator/examples/stg_ldg_offset.asm @@ -0,0 +1,17 @@ +@localsize 16, 1, 1 +@buf 128 (c2.x) ; c2.xy +@invocationid(r0.x) ; r0.xyz +mov.u32u32 r0.y, r0.x +mov.u32u32 r1.x, c2.x +mov.u32u32 r1.y, c2.y +mov.u32u32 r2.x, 0xff +(rpt5)nop +stg.a.u32 g[r1.x+r0.y<<4+2<<2], r2.x, 1 +nop(sy) +ldg.a.u32 r4.x, g[r1.x+r0.y<<4+2<<2], 1 +nop(sy) +add.u r4.x, r4.x, 1 +(rpt3)nop +stg.a.u32 g[r1.x+r0.y<<4+1<<2], r4.x, 1 +end +nop diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c index f0b9731fee1..b0cfa73a4c1 100644 --- a/src/freedreno/ir3/disasm-a3xx.c +++ b/src/freedreno/ir3/disasm-a3xx.c @@ -303,9 +303,11 @@ static const struct opc_info { /* category 6: */ OPC(6, OPC_LDG, ldg), + OPC(6, OPC_LDG_A, ldg.a), OPC(6, OPC_LDL, ldl), OPC(6, OPC_LDP, ldp), OPC(6, OPC_STG, stg), + OPC(6, OPC_STG_A, stg.a), OPC(6, OPC_STL, stl), OPC(6, OPC_STP, stp), OPC(6, OPC_LDIB, ldib), diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h index 656ee2b6db3..8fb954e0f57 100644 --- a/src/freedreno/ir3/instr-a3xx.h +++ b/src/freedreno/ir3/instr-a3xx.h @@ -291,6 +291,9 @@ typedef enum { OPC_ATOMIC_B_OR = _OPC(6, 53), OPC_ATOMIC_B_XOR = _OPC(6, 54), + OPC_LDG_A = _OPC(6, 55), + OPC_STG_A = _OPC(6, 56), + /* category 7: */ OPC_BAR = _OPC(7, 0), OPC_FENCE = _OPC(7, 1), diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 5ba9134909f..7a340b78eb7 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -817,7 +817,7 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, * but for load instructions this arg is the address (and not * really sure any good way to test a hard-coded immed addr src) */ - if (is_store(instr) && (n == 1)) + if (is_store(instr) && (instr->opc != OPC_STG) && (n == 1)) return false; if ((instr->opc == OPC_LDL) && (n == 0)) @@ -847,7 +847,10 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G)) return false; - if (instr->opc == OPC_STG && (instr->flags & IR3_INSTR_G) && (n != 2)) + if (instr->opc == OPC_STG && (n == 2)) + return false; + + if (instr->opc == OPC_STG_A && (n == 4)) return false; /* as with atomics, these cat6 instrs can only have an immediate diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 2bd2eebc83e..6dac29a78fe 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -827,6 +827,7 @@ is_store(struct ir3_instruction *instr) */ switch (instr->opc) { case OPC_STG: + case OPC_STG_A: case OPC_STGB: case OPC_STIB: case OPC_STP: @@ -844,6 +845,7 @@ static inline bool is_load(struct ir3_instruction *instr) { switch (instr->opc) { case OPC_LDG: + case OPC_LDG_A: case OPC_LDGB: case OPC_LDIB: case OPC_LDL: @@ -1731,6 +1733,54 @@ ir3_##name(struct ir3_block *block, \ #define INSTR4F(f, name) __INSTR4(IR3_INSTR_##f, name##_##f, OPC_##name) #define INSTR4(name) __INSTR4(0, name, OPC_##name) +#define __INSTR5(flag, name, opc) \ +static inline struct ir3_instruction * \ +ir3_##name(struct ir3_block *block, \ + struct ir3_instruction *a, unsigned aflags, \ + struct ir3_instruction *b, unsigned bflags, \ + struct ir3_instruction *c, unsigned cflags, \ + struct ir3_instruction *d, unsigned dflags, \ + struct ir3_instruction *e, unsigned eflags) \ +{ \ + struct ir3_instruction *instr = \ + ir3_instr_create(block, opc, 1, 5); \ + __ssa_dst(instr); \ + __ssa_src(instr, a, aflags); \ + __ssa_src(instr, b, bflags); \ + __ssa_src(instr, c, cflags); \ + __ssa_src(instr, d, dflags); \ + __ssa_src(instr, e, eflags); \ + instr->flags |= flag; \ + return instr; \ +} +#define INSTR5F(f, name) __INSTR5(IR3_INSTR_##f, name##_##f, OPC_##name) +#define INSTR5(name) __INSTR5(0, name, OPC_##name) + +#define __INSTR6(flag, name, opc) \ +static inline struct ir3_instruction * \ +ir3_##name(struct ir3_block *block, \ + struct ir3_instruction *a, unsigned aflags, \ + struct ir3_instruction *b, unsigned bflags, \ + struct ir3_instruction *c, unsigned cflags, \ + struct ir3_instruction *d, unsigned dflags, \ + struct ir3_instruction *e, unsigned eflags, \ + struct ir3_instruction *f, unsigned fflags) \ +{ \ + struct ir3_instruction *instr = \ + ir3_instr_create(block, opc, 1, 6); \ + __ssa_dst(instr); \ + __ssa_src(instr, a, aflags); \ + __ssa_src(instr, b, bflags); \ + __ssa_src(instr, c, cflags); \ + __ssa_src(instr, d, dflags); \ + __ssa_src(instr, e, eflags); \ + __ssa_src(instr, f, fflags); \ + instr->flags |= flag; \ + return instr; \ +} +#define INSTR6F(f, name) __INSTR6(IR3_INSTR_##f, name##_##f, OPC_##name) +#define INSTR6(name) __INSTR6(0, name, OPC_##name) + /* cat0 instructions: */ INSTR1(B) INSTR0(JUMP) @@ -1872,7 +1922,7 @@ INSTR3(LDG) INSTR3(LDL) INSTR3(LDLW) INSTR3(LDP) -INSTR3(STG) +INSTR4(STG) INSTR3(STL) INSTR3(STLW) INSTR3(STP) @@ -1893,6 +1943,8 @@ INSTR2(LDC) #if GPU >= 600 INSTR3(STIB); INSTR2(LDIB); +INSTR5(LDG_A); +INSTR6(STG_A); INSTR3F(G, ATOMIC_ADD) INSTR3F(G, ATOMIC_SUB) INSTR3F(G, ATOMIC_XCHG) @@ -1921,8 +1973,6 @@ INSTR4F(G, ATOMIC_OR) INSTR4F(G, ATOMIC_XOR) #endif -INSTR4F(G, STG) - /* cat7 instructions: */ INSTR0(BAR) INSTR0(FENCE) diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c index 486dd1f7cfe..57e5b304ff3 100644 --- a/src/freedreno/ir3/ir3_a4xx.c +++ b/src/freedreno/ir3/ir3_a4xx.c @@ -357,4 +357,6 @@ const struct ir3_context_funcs ir3_a4xx_funcs = { .emit_intrinsic_store_image = emit_intrinsic_store_image, .emit_intrinsic_atomic_image = emit_intrinsic_atomic_image, .emit_intrinsic_image_size = emit_intrinsic_image_size_tex, + .emit_intrinsic_load_global_ir3 = NULL, + .emit_intrinsic_store_global_ir3 = NULL, }; diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c index 1fbc8f1248d..501a02ae3d0 100644 --- a/src/freedreno/ir3/ir3_a6xx.c +++ b/src/freedreno/ir3/ir3_a6xx.c @@ -371,6 +371,68 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, ir3_split_dest(b, dst, resinfo, 0, intr->num_components); } +static void +emit_intrinsic_load_global_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr, + struct ir3_instruction **dst) +{ + struct ir3_block *b = ctx->block; + unsigned dest_components = nir_intrinsic_dest_components(intr); + struct ir3_instruction *addr, *offset; + + addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){ + ir3_get_src(ctx, &intr->src[0])[0], + ir3_get_src(ctx, &intr->src[0])[1] + }, 2); + + offset = ir3_get_src(ctx, &intr->src[1])[0]; + + struct ir3_instruction *load = + ir3_LDG_A(b, addr, 0, offset, 0, + create_immed(b, 0), 0, + create_immed(b, 0), 0, + create_immed(b, dest_components), 0); + load->cat6.type = TYPE_U32; + load->dsts[0]->wrmask = MASK(dest_components); + + load->barrier_class = IR3_BARRIER_BUFFER_R; + load->barrier_conflict = IR3_BARRIER_BUFFER_W; + + ir3_split_dest(b, dst, load, 0, dest_components); +} + +static void +emit_intrinsic_store_global_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *value, *addr, *offset; + unsigned ncomp = nir_intrinsic_src_components(intr, 0); + + addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){ + ir3_get_src(ctx, &intr->src[1])[0], + ir3_get_src(ctx, &intr->src[1])[1] + }, 2); + + offset = ir3_get_src(ctx, &intr->src[2])[0]; + + value = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp); + + struct ir3_instruction *stg = + ir3_STG_A(b, + addr, 0, + offset, 0, + create_immed(b, 0), 0, + create_immed(b, 0), 0, + value, 0, + create_immed(b, ncomp), 0); + stg->cat6.type = TYPE_U32; + stg->cat6.iim_val = 1; + + array_insert(b, b->keeps, stg); + + stg->barrier_class = IR3_BARRIER_BUFFER_W; + stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; +} + const struct ir3_context_funcs ir3_a6xx_funcs = { .emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo, .emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo, @@ -379,5 +441,7 @@ const struct ir3_context_funcs ir3_a6xx_funcs = { .emit_intrinsic_store_image = emit_intrinsic_store_image, .emit_intrinsic_atomic_image = emit_intrinsic_atomic_image, .emit_intrinsic_image_size = emit_intrinsic_image_size, + .emit_intrinsic_load_global_ir3 = emit_intrinsic_load_global_ir3, + .emit_intrinsic_store_global_ir3 = emit_intrinsic_store_global_ir3, }; diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index e314f1e79de..88cc9251b7b 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -1735,54 +1735,12 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) end->barrier_conflict = IR3_BARRIER_EVERYTHING; break; - case nir_intrinsic_store_global_ir3: { - struct ir3_instruction *value, *addr, *offset; - unsigned ncomp = nir_intrinsic_src_components(intr, 0); - - addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){ - ir3_get_src(ctx, &intr->src[1])[0], - ir3_get_src(ctx, &intr->src[1])[1] - }, 2); - - offset = ir3_get_src(ctx, &intr->src[2])[0]; - - value = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp); - - struct ir3_instruction *stg = - ir3_STG_G(ctx->block, addr, 0, value, 0, - create_immed(ctx->block, ncomp), 0, offset, 0); - stg->cat6.type = TYPE_U32; - stg->cat6.iim_val = 1; - - array_insert(b, b->keeps, stg); - - stg->barrier_class = IR3_BARRIER_BUFFER_W; - stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; + case nir_intrinsic_store_global_ir3: + ctx->funcs->emit_intrinsic_store_global_ir3(ctx, intr); break; - } - - case nir_intrinsic_load_global_ir3: { - struct ir3_instruction *addr, *offset; - - addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){ - ir3_get_src(ctx, &intr->src[0])[0], - ir3_get_src(ctx, &intr->src[0])[1] - }, 2); - - offset = ir3_get_src(ctx, &intr->src[1])[0]; - - struct ir3_instruction *load = - ir3_LDG(b, addr, 0, offset, 0, - create_immed(ctx->block, dest_components), 0); - load->cat6.type = TYPE_U32; - load->dsts[0]->wrmask = MASK(dest_components); - - load->barrier_class = IR3_BARRIER_BUFFER_R; - load->barrier_conflict = IR3_BARRIER_BUFFER_W; - - ir3_split_dest(b, dst, load, 0, dest_components); + case nir_intrinsic_load_global_ir3: + ctx->funcs->emit_intrinsic_load_global_ir3(ctx, intr, dst); break; - } case nir_intrinsic_load_ubo: emit_intrinsic_load_ubo(ctx, intr, dst); @@ -3085,10 +3043,12 @@ emit_stream_out(struct ir3_context *ctx) base = bases[strmout->output[i].output_buffer]; out = ctx->outputs[regid(strmout->output[i].register_index, c)]; - stg = ir3_STG(ctx->block, base, 0, out, 0, - create_immed(ctx->block, 1), 0); + stg = ir3_STG(ctx->block, + base, 0, + create_immed(ctx->block, (strmout->output[i].dst_offset + j) * 4), 0, + out, 0, + create_immed(ctx->block, 1), 0); stg->cat6.type = TYPE_U32; - stg->cat6.dst_offset = (strmout->output[i].dst_offset + j) * 4; array_insert(ctx->block, ctx->block->keeps, stg); } diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h index 2a0066e069e..31ab63c2129 100644 --- a/src/freedreno/ir3/ir3_context.h +++ b/src/freedreno/ir3/ir3_context.h @@ -165,6 +165,9 @@ struct ir3_context_funcs { struct ir3_instruction * (*emit_intrinsic_atomic_image)(struct ir3_context *ctx, nir_intrinsic_instr *intr); void (*emit_intrinsic_image_size)(struct ir3_context *ctx, nir_intrinsic_instr *intr, struct ir3_instruction **dst); + void (*emit_intrinsic_load_global_ir3)(struct ir3_context *ctx, nir_intrinsic_instr *intr, + struct ir3_instruction **dst); + void (*emit_intrinsic_store_global_ir3)(struct ir3_context *ctx, nir_intrinsic_instr *intr); }; extern const struct ir3_context_funcs ir3_a4xx_funcs; diff --git a/src/freedreno/ir3/ir3_lexer.l b/src/freedreno/ir3/ir3_lexer.l index f7d9783c628..c5b263af910 100644 --- a/src/freedreno/ir3/ir3_lexer.l +++ b/src/freedreno/ir3/ir3_lexer.l @@ -296,9 +296,11 @@ static int parse_w(const char *str) /* category 6: */ "ldg" return TOKEN(T_OP_LDG); +"ldg.a" return TOKEN(T_OP_LDG_A); "ldl" return TOKEN(T_OP_LDL); "ldp" return TOKEN(T_OP_LDP); "stg" return TOKEN(T_OP_STG); +"stg.a" return TOKEN(T_OP_STG_A); "stl" return TOKEN(T_OP_STL); "stp" return TOKEN(T_OP_STP); "ldib" return TOKEN(T_OP_LDIB); diff --git a/src/freedreno/ir3/ir3_parser.y b/src/freedreno/ir3/ir3_parser.y index 713676e24b1..47ca9eb0f59 100644 --- a/src/freedreno/ir3/ir3_parser.y +++ b/src/freedreno/ir3/ir3_parser.y @@ -92,7 +92,7 @@ static void new_label(const char *name) static struct ir3_instruction * new_instr(opc_t opc) { - instr = ir3_instr_create(block, opc, 4, 4); + instr = ir3_instr_create(block, opc, 4, 6); instr->flags = iflags.flags; instr->repeat = iflags.repeat; instr->nop = iflags.nop; @@ -525,9 +525,11 @@ static void print_token(FILE *file, int type, YYSTYPE value) /* category 6: */ %token T_OP_LDG +%token T_OP_LDG_A %token T_OP_LDL %token T_OP_LDP %token T_OP_STG +%token T_OP_STG_A %token T_OP_STL %token T_OP_STP %token T_OP_LDIB @@ -995,33 +997,40 @@ cat6_dim: '.' T_1D { instr->cat6.d = 1; } | '.' T_4D { instr->cat6.d = 4; } cat6_type: '.' type { instr->cat6.type = $2; } -cat6_offset: offset { new_src(0, IR3_REG_IMMED)->iim_val = $1; } +cat6_imm_offset: offset { new_src(0, IR3_REG_IMMED)->iim_val = $1; } +cat6_offset: cat6_imm_offset | '+' src cat6_dst_offset: offset { instr->cat6.dst_offset = $1; } | '+' src { instr->flags |= IR3_INSTR_G; } cat6_immed: integer { instr->cat6.iim_val = $1; } -cat6_load: T_OP_LDG { new_instr(OPC_LDG); } cat6_type dst_reg ',' 'g' '[' src cat6_offset ']' ',' immediate -| T_OP_LDP { new_instr(OPC_LDP); } cat6_type dst_reg ',' 'p' '[' src cat6_offset ']' ',' immediate -| T_OP_LDL { new_instr(OPC_LDL); } cat6_type dst_reg ',' 'l' '[' src cat6_offset ']' ',' immediate -| T_OP_LDLW { new_instr(OPC_LDLW); } cat6_type dst_reg ',' 'l' '[' src cat6_offset ']' ',' immediate -| T_OP_LDLV { new_instr(OPC_LDLV); } cat6_type dst_reg ',' 'l' '[' integer ']' { +cat6_stg_ldg_a6xx_offset: + '+' '(' src offset ')' '<' '<' integer { + assert($8 == 2); + new_src(0, IR3_REG_IMMED)->uim_val = 0; + new_src(0, IR3_REG_IMMED)->uim_val = $4; + } +| '+' src '<' '<' integer offset '<' '<' integer { + assert($9 == 2); + new_src(0, IR3_REG_IMMED)->uim_val = $5 - 2; + new_src(0, IR3_REG_IMMED)->uim_val = $6; + } + +cat6_load: T_OP_LDG { new_instr(OPC_LDG); } cat6_type dst_reg ',' 'g' '[' src cat6_offset ']' ',' immediate +| T_OP_LDG_A { new_instr(OPC_LDG_A); } cat6_type dst_reg ',' 'g' '[' src cat6_stg_ldg_a6xx_offset ']' ',' immediate +| T_OP_LDP { new_instr(OPC_LDP); } cat6_type dst_reg ',' 'p' '[' src cat6_offset ']' ',' immediate +| T_OP_LDL { new_instr(OPC_LDL); } cat6_type dst_reg ',' 'l' '[' src cat6_offset ']' ',' immediate +| T_OP_LDLW { new_instr(OPC_LDLW); } cat6_type dst_reg ',' 'l' '[' src cat6_offset ']' ',' immediate +| T_OP_LDLV { new_instr(OPC_LDLV); } cat6_type dst_reg ',' 'l' '[' integer ']' { new_src(0, IR3_REG_IMMED)->iim_val = $8; } ',' immediate // TODO some of the cat6 instructions have different syntax for a6xx.. //| T_OP_LDIB { new_instr(OPC_LDIB); } cat6_type dst_reg cat6_offset ',' reg ',' cat6_immed -cat6_store: T_OP_STG { new_instr(OPC_STG); dummy_dst(); } cat6_type 'g' '[' src cat6_dst_offset ']' ',' src ',' immediate { - /* fixup src order, the offset reg is expected last currently */ - if (instr->flags & IR3_INSTR_G) { - struct ir3_register *offset = instr->srcs[1]; - instr->srcs[1] = instr->srcs[2]; - instr->srcs[2] = instr->srcs[3]; - instr->srcs[3] = offset; - } - } +cat6_store: T_OP_STG { new_instr(OPC_STG); dummy_dst(); } cat6_type 'g' '[' src cat6_imm_offset ']' ',' src ',' immediate +| T_OP_STG_A { new_instr(OPC_STG_A); dummy_dst(); } cat6_type 'g' '[' src cat6_stg_ldg_a6xx_offset ']' ',' src ',' immediate | T_OP_STP { new_instr(OPC_STP); dummy_dst(); } cat6_type 'p' '[' src cat6_dst_offset ']' ',' src ',' immediate | T_OP_STL { new_instr(OPC_STL); dummy_dst(); } cat6_type 'l' '[' src cat6_dst_offset ']' ',' src ',' immediate | T_OP_STLW { new_instr(OPC_STLW); dummy_dst(); } cat6_type 'l' '[' src cat6_dst_offset ']' ',' src ',' immediate diff --git a/src/freedreno/ir3/ir3_validate.c b/src/freedreno/ir3/ir3_validate.c index e2c132f89b0..38e3ecaea3b 100644 --- a/src/freedreno/ir3/ir3_validate.c +++ b/src/freedreno/ir3/ir3_validate.c @@ -247,6 +247,18 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr) validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF)); break; case OPC_STG: + validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF)); + validate_assert(ctx, !(instr->srcs[1]->flags & IR3_REG_HALF)); + validate_reg_size(ctx, instr->srcs[2], instr->cat6.type); + validate_assert(ctx, !(instr->srcs[3]->flags & IR3_REG_HALF)); + break; + case OPC_STG_A: + validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF)); + validate_assert(ctx, !(instr->srcs[2]->flags & IR3_REG_HALF)); + validate_assert(ctx, !(instr->srcs[3]->flags & IR3_REG_HALF)); + validate_reg_size(ctx, instr->srcs[4], instr->cat6.type); + validate_assert(ctx, !(instr->srcs[5]->flags & IR3_REG_HALF)); + break; case OPC_STL: case OPC_STP: case OPC_STLW: diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c index 6f500f0253e..31fcf97bf4a 100644 --- a/src/freedreno/ir3/tests/disasm.c +++ b/src/freedreno/ir3/tests/disasm.c @@ -153,20 +153,30 @@ static const struct test { // TODO is this a real instruction? Or float -6.0 ? // INSTR_6XX(c0c00000_00000000, "stg.f16 g[hr0.x], hr0.x, hr0.x", .parse_fail=true), /* dEQP-GLES31.functional.tessellation.invariance.outer_edge_symmetry.isolines_equal_spacing_ccw */ - INSTR_6XX(c0d20906_02800004, "stg.f32 g[r1.x+r1.z], r0.z, 2"), /* stg.a.f32 g[r1.x+(r1.z<<2)], r0.z, 2 */ - INSTR_6XX(c0da052e_01800042, "stg.s32 g[r0.z+r11.z], r8.y, 1"), /* stg.a.s32 g[r0.z+(r11.z<<2)], r8.y, 1 */ + INSTR_6XX(c0d20906_02800004, "stg.a.f32 g[r1.x+(r1.z)<<2], r0.z, 2"), /* stg.a.f32 g[r1.x+(r1.z<<2)], r0.z, 2 */ + INSTR_6XX(c0da052e_01800042, "stg.a.s32 g[r0.z+(r11.z)<<2], r8.y, 1"), /* stg.a.s32 g[r0.z+(r11.z<<2)], r8.y, 1 */ INSTR_6XX(c0ca0505_03800042, "stg.s32 g[r0.z+5], r8.y, 3"), INSTR_6XX(c0ca0500_03800042, "stg.s32 g[r0.z], r8.y, 3"), INSTR_6XX(c0ca0531_03800242, "stg.s32 g[r0.z+305], r8.y, 3"), - INSTR_6XX(c0020011_04c08023, "ldg.f32 r4.y, g[r0.z+r4.y], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */ - INSTR_6XX(c0060006_01c18017, "ldg.u32 r1.z, g[r1.z+r2.w], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */ + /* Customely crafted */ + INSTR_6XX(c0d61104_01800228, "stg.a.u32 g[r2.x+(r1.x+1)<<2], r5.x, 1"), + INSTR_6XX(c0d61104_01802628, "stg.a.u32 g[r2.x+r1.x<<4+3<<2], r5.x, 1"), + + INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */ + INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */ INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"), INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"), INSTR_6XX(c0060003_0180c269, "ldg.u32 r0.w, g[r0.w+308], 1"), - INSTR_6XX(c0020011_04c08023, "ldg.f32 r4.y, g[r0.z+r4.y], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */ - INSTR_6XX(c0060006_01c18017, "ldg.u32 r1.z, g[r1.z+r2.w], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */ + /* Found in TCS/TES shaders of GTA V */ + INSTR_6XX(c0020007_03c1420f, "ldg.a.f32 r1.w, g[r1.y+(r1.w+1)<<2], 3"), /* ldg.a.f32 r1.w, g[r1.y+((r1.w+1)<<2)], 3 */ + + /* Customely crafted */ + INSTR_6XX(c0020007_03c1740f, "ldg.a.f32 r1.w, g[r1.y+r1.w<<5+2<<2], 3"), + + INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */ + INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */ INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"), INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"), diff --git a/src/freedreno/isa/ir3-cat6.xml b/src/freedreno/isa/ir3-cat6.xml index 9283d30d653..e4bccd0fc16 100644 --- a/src/freedreno/isa/ir3-cat6.xml +++ b/src/freedreno/isa/ir3-cat6.xml @@ -42,84 +42,148 @@ SOFTWARE. - - - LoaD Global - - - {SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}+{SRC2}], {SIZE} - - - - - {SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}{OFF}], {SIZE} - - !{SRC2_REG} - - - + 1 - - 00000 - 1 xxxxxxxxx 00 00000 + + + + + LoaD Global + + + + {SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}{OFF}], {SIZE} + + + + 0 + - !(src->srcs[1]->flags & IR3_REG_IMMED) - src->srcs[1] src->srcs[1]->iim_val src->srcs[2]->uim_val - + - STore Global + LoaD Global + + + - {SY}{JP}{NAME}.{TYPE} g[{SRC1}+{SRC2}], {SRC3}, {SIZE} + {SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}+({SRC2}{OFF})<<{SRC2_BYTE_SHIFT}], {SIZE} - {SY}{JP}{NAME}.{TYPE} g[{SRC1}{OFF}], {SRC3}, {SIZE} + {SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}+{SRC2}<<{SRC2_BYTE_SHIFT}{OFF}<<2], {SIZE} - !{G} - - ({OFF_HI} << 8) | {OFF_LO} - - - + {SRC2_ADD_DWORD_SHIFT} > 0 - x - - 00000 - xxxxxxxx - 1x - - - - - - x - 00011 + + + 0 + + 1 + + + {SRC2_ADD_DWORD_SHIFT} + 2 + + - src->srcs[2]->uim_val - src->srcs[3] - 1 - src->srcs[1] - (src->flags & IR3_INSTR_G) && !(src->srcs[3]->flags & IR3_REG_IMMED) - src->cat6.dst_offset - src->cat6.dst_offset >> 8 + src->srcs[1] + src->srcs[2]->uim_val + src->srcs[3]->uim_val + src->srcs[4]->uim_val + + x + + xxxxxxxx + 1x + + + + x + 00011 + + + 1 + + + + + + STore Global + + + + {SY}{JP}{NAME}.{TYPE} g[{SRC1}{OFF}], {SRC3}, {SIZE} + + + + ({OFF_HI} << 8) | {OFF_LO} + + + + + 0 + + + src->srcs[1]->iim_val + src->srcs[1]->iim_val >> 8 + src->srcs[2] + src->srcs[3]->uim_val + + + + + + STore Global + + + + + + {SY}{JP}{NAME}.{TYPE} g[{SRC1}+({SRC2}{OFF})<<{DST_BYTE_SHIFT}], {SRC3}, {SIZE} + + + + + {SY}{JP}{NAME}.{TYPE} g[{SRC1}+{SRC2}<<{DST_BYTE_SHIFT}{OFF}<<2], {SRC3}, {SIZE} + + {SRC2_ADD_DWORD_SHIFT} > 0 + + + + {SRC2_ADD_DWORD_SHIFT} + 2 + + + + 0 + + + 1 + + + src->srcs[1] + src->srcs[2]->uim_val + src->srcs[3]->uim_val + src->srcs[4] + src->srcs[5]->uim_val + + 1