From 944f4e6f8a1fbfd992a1f594b17bfc0e3d3429ca Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Fri, 24 Sep 2021 16:14:30 +0200 Subject: [PATCH] ir3: Better assemble/disassemble stc Add in the type, even though it turns out to not be that useful. Add in support for assembling it. Add some notes based on computerator experiments. And add support for the indirect a1.x mode that's needed for storing c64.x and later. Part-of: --- src/freedreno/ir3/disasm-a3xx.c | 1 + src/freedreno/ir3/ir3_lexer.l | 1 + src/freedreno/ir3/ir3_parser.y | 8 +++++ src/freedreno/ir3/ir3_validate.c | 4 +++ src/freedreno/ir3/tests/disasm.c | 8 +++-- src/freedreno/isa/encode.c | 11 +++++++ src/freedreno/isa/ir3-cat6.xml | 53 ++++++++++++++++++++++++++------ 7 files changed, 75 insertions(+), 11 deletions(-) diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c index bb3c4e35575..224ef79b400 100644 --- a/src/freedreno/ir3/disasm-a3xx.c +++ b/src/freedreno/ir3/disasm-a3xx.c @@ -394,6 +394,7 @@ static const struct opc_info { OPC(6, OPC_GETSPID, getspid), OPC(6, OPC_GETWID, getwid), OPC(6, OPC_GETFIBERID, getfiberid), + OPC(6, OPC_STC, stc), OPC(6, OPC_SPILL_MACRO, spill.macro), OPC(6, OPC_RELOAD_MACRO, reload.macro), diff --git a/src/freedreno/ir3/ir3_lexer.l b/src/freedreno/ir3/ir3_lexer.l index 04bff6ab4af..181a17a90e6 100644 --- a/src/freedreno/ir3/ir3_lexer.l +++ b/src/freedreno/ir3/ir3_lexer.l @@ -374,6 +374,7 @@ static int parse_reg(const char *str) "getspid" return TOKEN(T_OP_GETSPID); "getwid" return TOKEN(T_OP_GETWID); "getfiberid" return TOKEN(T_OP_GETFIBERID); +"stc" return TOKEN(T_OP_STC); /* category 7: */ "bar" return TOKEN(T_OP_BAR); diff --git a/src/freedreno/ir3/ir3_parser.y b/src/freedreno/ir3/ir3_parser.y index b414caef4c5..2aaebd91221 100644 --- a/src/freedreno/ir3/ir3_parser.y +++ b/src/freedreno/ir3/ir3_parser.y @@ -613,6 +613,7 @@ static void print_token(FILE *file, int type, YYSTYPE value) %token T_OP_GETSPID %token T_OP_GETWID %token T_OP_GETFIBERID +%token T_OP_STC /* category 7: */ %token T_OP_BAR @@ -1235,6 +1236,12 @@ cat6_bindless_ldc: cat6_bindless_ldc_opc '.' T_OFFSET '.' cat6_immed '.' cat6_bi swap(instr->srcs[0], instr->srcs[1]); } +stc_dst: integer { new_src(0, IR3_REG_IMMED)->iim_val = $1; } +| T_A1 { new_src(0, IR3_REG_IMMED)->iim_val = 0; instr->flags |= IR3_INSTR_A1EN; } +| T_A1 '+' integer { new_src(0, IR3_REG_IMMED)->iim_val = $3; instr->flags |= IR3_INSTR_A1EN; } + +cat6_stc: T_OP_STC { new_instr(OPC_STC); } cat6_type 'c' '[' stc_dst ']' ',' src_reg ',' cat6_immed + cat6_todo: T_OP_G2L { new_instr(OPC_G2L); } | T_OP_L2G { new_instr(OPC_L2G); } | T_OP_RESFMT { new_instr(OPC_RESFMT); } @@ -1249,6 +1256,7 @@ cat6_instr: cat6_load | cat6_id | cat6_bindless_ldc | cat6_bindless_ibo +| cat6_stc | cat6_todo cat7_scope: '.' 'w' { instr->cat7.w = true; } diff --git a/src/freedreno/ir3/ir3_validate.c b/src/freedreno/ir3/ir3_validate.c index 0fcea240d50..84be40ca795 100644 --- a/src/freedreno/ir3/ir3_validate.c +++ b/src/freedreno/ir3/ir3_validate.c @@ -346,6 +346,10 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr) case OPC_GETWID: validate_reg_size(ctx, instr->dsts[0], instr->cat6.type); break; + case OPC_STC: + validate_reg_size(ctx, instr->srcs[0], instr->cat6.type); + validate_assert(ctx, !(instr->srcs[1]->flags & IR3_REG_HALF)); + break; default: validate_reg_size(ctx, instr->dsts[0], instr->cat6.type); validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF)); diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c index ee1544c3b42..ae68ffc621c 100644 --- a/src/freedreno/ir3/tests/disasm.c +++ b/src/freedreno/ir3/tests/disasm.c @@ -227,9 +227,13 @@ static const struct test { INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"), /* dEQP-GLES3.functional.ubo.random.basic_arrays.0 */ - INSTR_6XX(c7020020_01800000, "stc c[32], r0.x, 1", .parse_fail=true), + INSTR_6XX(c7020020_01800000, "stc.f32 c[32], r0.x, 1"), /* stc c[32], r0.x, 1 */ /* dEQP-VK.image.image_size.cube_array.readonly_writeonly_1x1x12 */ - INSTR_6XX(c7060020_03800000, "stc c[32], r0.x, 3", .parse_fail=true), + INSTR_6XX(c7060020_03800000, "stc.u32 c[32], r0.x, 3"), /* stc c[32], r0.x, 3 */ + + /* custom */ + INSTR_6XX(c7060100_03800000, "stc.u32 c[a1.x], r0.x, 3"), /* stc c[a1.x], r0.x, 3 */ + INSTR_6XX(c7060120_03800000, "stc.u32 c[a1.x+32], r0.x, 3"), /* stc c[a1.x+32], r0.x, 3 */ /* dEQP-VK.image.image_size.cube_array.readonly_writeonly_1x1x12 */ INSTR_6XX(c0260200_03676100, "stib.b.untyped.1d.u32.3.imm.base0 r0.x, r0.w, 1"), /* stib.untyped.u32.1d.3.mode4.base0 r0.x, r0.w, 1 */ diff --git a/src/freedreno/isa/encode.c b/src/freedreno/isa/encode.c index 6f6fdce2d1d..bcf8f50a70a 100644 --- a/src/freedreno/isa/encode.c +++ b/src/freedreno/isa/encode.c @@ -308,6 +308,17 @@ __cat3_src_case(struct encode_state *s, struct ir3_register *reg) } } +typedef enum { + STC_DST_IMM, + STC_DST_A1 +} stc_dst_t; + +static inline stc_dst_t +__stc_dst_case(struct encode_state *s, struct ir3_instruction *instr) +{ + return (instr->flags & IR3_INSTR_A1EN) ? STC_DST_A1 : STC_DST_IMM; +} + #include "encode.h" diff --git a/src/freedreno/isa/ir3-cat6.xml b/src/freedreno/isa/ir3-cat6.xml index 8dff10c1dcc..4227d558dcf 100644 --- a/src/freedreno/isa/ir3-cat6.xml +++ b/src/freedreno/isa/ir3-cat6.xml @@ -325,21 +325,55 @@ SOFTWARE. 01011 + + + {OFFSET} + + + 0 + + + + + a1.x{OFFSET} + + + 1 + + + + + Encoding for stc destination which can be constant or have an + offset of a1.x. + + + extract_reg_uim(src->srcs[0]) + + + STore Const - used for shader prolog (between shps and shpe) to store "uniform folded" values into CONST file NOTE: TYPE field actually seems to be set to different - values (ie f32 vs u32), but I *think* it does not matter. - (There is SP_MODE_CONTROL.CONSTANT_DEMOTION_ENABLE, but - I think float results are already converted to 32b) + values (ie f32 vs u32), but it seems that only the size (16b vs + 32b) matters. Setting a 16-bit type (f16, u16, or s16) doesn't + cause any promotion to 32-bit, it causes the 16-bit sources to + be stored one after the other starting with the low half of the + constant. So e.g. "stc.f16 c[1], hr0.x, 1" copies hr0.x to the + bottom half of c0.y. There seems to be no way to set just the + upper half. In any case, the blob seems to only use the 32-bit + versions. - NOTE: this could be the "old" encoding, although it - would conflict with stgb from earlier gens + The blob disassembly doesn't include the type, but we still + display it so that we can preserve the different values the blob + sets when round-tripping. + + NOTE: this conflicts with stgb from earlier gens - {SY}{JP}{NAME} c[{DST}], {SRC}, {SIZE} + {SY}{JP}{NAME}.{TYPE} c[{DST}], {SRC}, {SIZE} x @@ -348,13 +382,14 @@ SOFTWARE. 1 xxxxx - - xxxxxxxxx + + xxxxxxxx xx 11100 - extract_reg_uim(src->srcs[0]) + src src->srcs[1] + src->cat6.iim_val