diff --git a/src/freedreno/afuc/afuc.h b/src/freedreno/afuc/afuc.h index c5a9331171c..c45e89d776f 100644 --- a/src/freedreno/afuc/afuc.h +++ b/src/freedreno/afuc/afuc.h @@ -60,9 +60,13 @@ typedef enum { ALU(MIN) ALU(MAX) ALU(CMP) /* compare src to immed */ + ALU(BIC) /* AND with second source negated */ + OPC_SETBIT, /* Set or clear a bit dynamically */ OPC_MOVI, /* move immediate */ - OPC_SETBIT, /* Set a bit */ + OPC_SETBITI, /* Set a bit */ OPC_CLRBIT, /* Clear a bit */ + OPC_UBFX, /* Unsigned BitField eXtract */ + OPC_BFI, /* BitField Insert */ #undef ALU /* Return the most-significant bit of src2, or 0 if src2 == 0 (the diff --git a/src/freedreno/afuc/afuc.xml b/src/freedreno/afuc/afuc.xml index 58351a1946f..9733d9bb592 100644 --- a/src/freedreno/afuc/afuc.xml +++ b/src/freedreno/afuc/afuc.xml @@ -184,6 +184,17 @@ SOFTWARE. + + + {REP}{NAME} {DST}, {SRC1}, 0x{RIMMED} + + + + + + 10010 + + {REP}{NAME} {DST}, 0x{IMMED} @@ -270,67 +281,151 @@ SOFTWARE. + 01001 + 01001 + + + 10010 + + + + + 0010 + + 0-extending right shift + 01010 + 01010 + + + 10011 + + + + + 0011 + + sign-extending right shift + 01011 + 01011 + + + 10100 + + + + + 0100 + + Rotate left (left shift with wraparound) + 01100 + 01100 + + + 10101 + + + + + 0101 + + Multiply low 8 bits of each source to produce a 16-bit result + 01101 + 01101 + + + 01100 + + + + + 01100 + + Unsigned minimum + 01110 + 01110 + + + 01010 + + + + + 01010 + + Unsigned maximum + 01111 + 01111 + + + 01011 + + + + + 01011 + + Compare two sources and produce a bitfield: @@ -340,19 +435,47 @@ SOFTWARE. Often a "branch on bit set/unset" instruction is used on the result to implement a compare-and-branch macro. + 10000 + 10000 + + + 01101 + + + + + 01101 + + + + + 01001 + + + + + 01001 + + Return the most-significant bit of src2, or 0 if src2 == 0 + 10100 - + + + 11001 + + + {REP}{NAME} {DST}, {SRC}, b{BIT} @@ -368,17 +491,87 @@ SOFTWARE. - + + + {REP}{NAME} {DST}, {SRC}, b{BIT} + + + + xxxxxx + 0110 + + + 10010 + + + src->src1 + + + + Set a given bit to 1 + 1 - + Clear a given bit, i.e. set it to 0 + 0 - + + + 1 + + + + + 0 + + + + + Set or clear a given bit. This is the non-immediate form of + setbit/clrbit. Bits 1-5 of src2 are the bit to set, bit 0 is the + value to set it to. + + + 10110 + + + + + {REP}{NAME} {DST}, {SRC}, b{LO}, b{HI} + + + + + xx + + + 10010 + + + src->bit + src->immed + src->src1 + + + + + Unsigned BitField eXtract + + 0111 + + + + BitField Insert + + 1000 + + + Special move-immediate instruction with a shift {SHIFT} == 0 @@ -393,13 +586,22 @@ SOFTWARE. - 10001 src->shift + + + 10001 + + + + + 01110 + + diff --git a/src/freedreno/afuc/disasm.c b/src/freedreno/afuc/disasm.c index adfb21d84c7..fd9e0de7b96 100644 --- a/src/freedreno/afuc/disasm.c +++ b/src/freedreno/afuc/disasm.c @@ -225,12 +225,15 @@ static void disasm(struct emu *emu) { uint32_t sizedwords = emu->sizedwords; - uint32_t lpac_offset = 0; + uint32_t lpac_offset = 0, bv_offset = 0; EMU_GPU_REG(CP_SQE_INSTR_BASE); EMU_GPU_REG(CP_LPAC_SQE_INSTR_BASE); + EMU_CONTROL_REG(BV_INSTR_BASE); + EMU_CONTROL_REG(LPAC_INSTR_BASE); emu_init(emu); + emu->processor = EMU_PROC_SQE; struct isa_decode_options options; struct decode_state state; @@ -246,12 +249,22 @@ disasm(struct emu *emu) emu_run_bootstrap(emu); - /* Figure out if we have LPAC SQE appended: */ - if (emu_get_reg64(emu, &CP_LPAC_SQE_INSTR_BASE)) { - lpac_offset = emu_get_reg64(emu, &CP_LPAC_SQE_INSTR_BASE) - - emu_get_reg64(emu, &CP_SQE_INSTR_BASE); + /* Figure out if we have BV/LPAC SQE appended: */ + if (gpuver >= 7) { + bv_offset = emu_get_reg64(emu, &BV_INSTR_BASE) - + emu_get_reg64(emu, &CP_SQE_INSTR_BASE); + bv_offset /= 4; + lpac_offset = emu_get_reg64(emu, &LPAC_INSTR_BASE) - + emu_get_reg64(emu, &CP_SQE_INSTR_BASE); lpac_offset /= 4; - sizedwords = lpac_offset; + sizedwords = MIN2(bv_offset, lpac_offset); + } else { + if (emu_get_reg64(emu, &CP_LPAC_SQE_INSTR_BASE)) { + lpac_offset = emu_get_reg64(emu, &CP_LPAC_SQE_INSTR_BASE) - + emu_get_reg64(emu, &CP_SQE_INSTR_BASE); + lpac_offset /= 4; + sizedwords = lpac_offset; + } } setup_packet_table(&options, emu->jmptbl, ARRAY_SIZE(emu->jmptbl)); @@ -271,25 +284,51 @@ disasm(struct emu *emu) /* print instructions: */ isa_disasm(emu->instrs, sizedwords * 4, stdout, &options); - if (!lpac_offset) - return; + if (bv_offset) { + printf(";\n"); + printf("; BV microcode:\n"); + printf(";\n"); - printf(";\n"); - printf("; LPAC microcode:\n"); - printf(";\n"); + emu_fini(emu); - emu_fini(emu); + emu->processor = EMU_PROC_BV; + emu->instrs += bv_offset; + emu->sizedwords -= bv_offset; - emu->lpac = true; - emu->instrs += lpac_offset; - emu->sizedwords -= lpac_offset; + emu_init(emu); + emu_run_bootstrap(emu); - emu_init(emu); - emu_run_bootstrap(emu); + setup_packet_table(&options, emu->jmptbl, ARRAY_SIZE(emu->jmptbl)); - setup_packet_table(&options, emu->jmptbl, ARRAY_SIZE(emu->jmptbl)); + uint32_t sizedwords = lpac_offset - bv_offset; - isa_disasm(emu->instrs, emu->sizedwords * 4, stdout, &options); + isa_disasm(emu->instrs, sizedwords * 4, stdout, &options); + + emu->instrs -= bv_offset; + emu->sizedwords += bv_offset; + } + + if (lpac_offset) { + printf(";\n"); + printf("; LPAC microcode:\n"); + printf(";\n"); + + emu_fini(emu); + + emu->processor = EMU_PROC_LPAC; + emu->instrs += lpac_offset; + emu->sizedwords -= lpac_offset; + + emu_init(emu); + emu_run_bootstrap(emu); + + setup_packet_table(&options, emu->jmptbl, ARRAY_SIZE(emu->jmptbl)); + + isa_disasm(emu->instrs, emu->sizedwords * 4, stdout, &options); + + emu->instrs -= lpac_offset; + emu->sizedwords += lpac_offset; + } } static void diff --git a/src/freedreno/afuc/emu-regs.c b/src/freedreno/afuc/emu-regs.c index 91802253eac..7d6f8b38c4b 100644 --- a/src/freedreno/afuc/emu-regs.c +++ b/src/freedreno/afuc/emu-regs.c @@ -62,6 +62,9 @@ emu_set_control_reg(struct emu *emu, unsigned n, uint32_t val) EMU_CONTROL_REG(PACKET_TABLE_WRITE_ADDR); EMU_CONTROL_REG(REG_WRITE); EMU_CONTROL_REG(REG_WRITE_ADDR); + EMU_CONTROL_REG(BV_CNTL); + EMU_CONTROL_REG(LPAC_CNTL); + EMU_CONTROL_REG(THREAD_SYNC); assert(n < ARRAY_SIZE(emu->control_regs.val)); BITSET_SET(emu->control_regs.written, n); @@ -86,6 +89,18 @@ emu_set_control_reg(struct emu *emu, unsigned n, uint32_t val) emu_set_gpu_reg(emu, write_addr++, val); emu_set_reg32(emu, ®_WRITE_ADDR, write_addr | (flags << 16)); + } else if (gpuver >= 7 && n == emu_reg_offset(&BV_CNTL)) { + /* This is sort-of a hack, but emulate what the BV bootstrap routine + * does so that the main bootstrap routine doesn't get stuck. + */ + emu_set_reg32(emu, &THREAD_SYNC, + emu_get_reg32(emu, &THREAD_SYNC) & ~(1u << 1)); + } else if (gpuver >= 7 && n == emu_reg_offset(&LPAC_CNTL)) { + /* This is sort-of a hack, but emulate what the LPAC bootstrap routine + * does so that the main bootstrap routine doesn't get stuck. + */ + emu_set_reg32(emu, &THREAD_SYNC, + emu_get_reg32(emu, &THREAD_SYNC) & ~(1u << 2)); } else if (is_draw_state_control_reg(n)) { emu_set_draw_state_reg(emu, n, val); } diff --git a/src/freedreno/afuc/emu.c b/src/freedreno/afuc/emu.c index 856d0407cd3..2bdda3a5c97 100644 --- a/src/freedreno/afuc/emu.c +++ b/src/freedreno/afuc/emu.c @@ -39,8 +39,6 @@ #include "emu.h" #include "util.h" -extern int gpuver; - #define rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) #define rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r)))) @@ -99,10 +97,17 @@ emu_alu(struct emu *emu, afuc_opc opc, uint32_t src1, uint32_t src2) else if (src1 == src2) return 0x2b; return 0x1e; + case OPC_BIC: + return src1 & ~src2; case OPC_MSB: if (!src2) return 0; return util_last_bit(src2) - 1; + case OPC_SETBIT: { + unsigned bit = src2 >> 1; + unsigned val = src2 & 1; + return (src1 & ~(1u << bit)) | (val << bit); + } default: printf("unhandled alu opc: 0x%02x\n", opc); exit(1); @@ -132,7 +137,7 @@ emu_instr(struct emu *emu, struct afuc_instr *instr) case OPC_NOP: break; case OPC_MSB: - case OPC_ADD ... OPC_CMP: { + case OPC_ADD ... OPC_BIC: { uint32_t val = emu_alu(emu, instr->opc, emu_get_gpr_reg(emu, instr->src1), instr->has_immed ? instr->immed : @@ -180,7 +185,7 @@ emu_instr(struct emu *emu, struct afuc_instr *instr) emu_set_gpr_reg(emu, instr->dst, val); break; } - case OPC_SETBIT: { + case OPC_SETBITI: { uint32_t src = emu_get_gpr_reg(emu, instr->src1); emu_set_gpr_reg(emu, instr->dst, src | (1u << instr->bit)); break; @@ -190,6 +195,20 @@ emu_instr(struct emu *emu, struct afuc_instr *instr) emu_set_gpr_reg(emu, instr->dst, src & ~(1u << instr->bit)); break; } + case OPC_UBFX: { + uint32_t src = emu_get_gpr_reg(emu, instr->src1); + unsigned lo = instr->bit, hi = instr->immed; + uint32_t dst = (src >> lo) & BITFIELD_MASK(hi - lo + 1); + emu_set_gpr_reg(emu, instr->dst, dst); + break; + } + case OPC_BFI: { + uint32_t src = emu_get_gpr_reg(emu, instr->src1); + unsigned lo = instr->bit, hi = instr->immed; + src = (src & BITFIELD_MASK(hi - lo + 1)) << lo; + emu_set_gpr_reg(emu, instr->dst, emu_get_gpr_reg(emu, instr->dst) | src); + break; + } case OPC_CWRITE: { uint32_t src1 = emu_get_gpr_reg(emu, instr->src1); uint32_t src2 = emu_get_gpr_reg(emu, instr->src2); @@ -473,15 +492,29 @@ emu_init(struct emu *emu) EMU_GPU_REG(CP_SQE_INSTR_BASE); EMU_GPU_REG(CP_LPAC_SQE_INSTR_BASE); + EMU_CONTROL_REG(BV_INSTR_BASE); + EMU_CONTROL_REG(LPAC_INSTR_BASE); /* Setup the address of the SQE fw, just use the normal CPU ptr address: */ - if (emu->lpac) { - emu_set_reg64(emu, &CP_LPAC_SQE_INSTR_BASE, EMU_INSTR_BASE); - } else { + switch (emu->processor) { + case EMU_PROC_SQE: emu_set_reg64(emu, &CP_SQE_INSTR_BASE, EMU_INSTR_BASE); + break; + case EMU_PROC_BV: + emu_set_reg64(emu, &BV_INSTR_BASE, EMU_INSTR_BASE); + break; + case EMU_PROC_LPAC: + if (gpuver >= 7) + emu_set_reg64(emu, &LPAC_INSTR_BASE, EMU_INSTR_BASE); + else + emu_set_reg64(emu, &CP_LPAC_SQE_INSTR_BASE, EMU_INSTR_BASE); + break; } - if (emu->gpu_id == 660) { + if (emu->gpu_id == 730) { + emu_set_control_reg(emu, 0xef, 1 << 21); + emu_set_control_reg(emu, 0, 7 << 28); + } else if (emu->gpu_id == 660) { emu_set_control_reg(emu, 0, 3 << 28); } else if (emu->gpu_id == 650) { emu_set_control_reg(emu, 0, 1 << 28); diff --git a/src/freedreno/afuc/emu.h b/src/freedreno/afuc/emu.h index 111b44bb88a..e978d097aaa 100644 --- a/src/freedreno/afuc/emu.h +++ b/src/freedreno/afuc/emu.h @@ -31,6 +31,8 @@ #include "afuc.h" +extern int gpuver; + #define EMU_NUM_GPR_REGS 32 struct emu_gpr_regs { @@ -153,7 +155,11 @@ struct emu { */ bool quiet; - bool lpac; + enum { + EMU_PROC_SQE, + EMU_PROC_BV, + EMU_PROC_LPAC, + } processor; uint32_t *instrs; unsigned sizedwords; diff --git a/src/freedreno/afuc/lexer.l b/src/freedreno/afuc/lexer.l index d42e71f54fa..9383ea1273e 100644 --- a/src/freedreno/afuc/lexer.l +++ b/src/freedreno/afuc/lexer.l @@ -69,9 +69,12 @@ extern YYSTYPE yylval; "min" return TOKEN(T_OP_MIN); "max" return TOKEN(T_OP_MAX); "cmp" return TOKEN(T_OP_CMP); +"bic" return TOKEN(T_OP_BIC); "msb" return TOKEN(T_OP_MSB); "setbit" return TOKEN(T_OP_SETBIT); "clrbit" return TOKEN(T_OP_CLRBIT); +"ubfx" return TOKEN(T_OP_UBFX); +"bfi" return TOKEN(T_OP_BFI); "mov" return TOKEN(T_OP_MOV); "cwrite" return TOKEN(T_OP_CWRITE); "cread" return TOKEN(T_OP_CREAD); diff --git a/src/freedreno/afuc/parser.y b/src/freedreno/afuc/parser.y index 6735950aec7..39ce7f44c61 100644 --- a/src/freedreno/afuc/parser.y +++ b/src/freedreno/afuc/parser.y @@ -144,9 +144,12 @@ label(const char *str) %token T_OP_MIN %token T_OP_MAX %token T_OP_CMP +%token T_OP_BIC %token T_OP_MSB %token T_OP_SETBIT %token T_OP_CLRBIT +%token T_OP_BFI +%token T_OP_UBFX %token T_OP_MOV %token T_OP_CWRITE %token T_OP_CREAD @@ -226,20 +229,28 @@ alu_2src_op: T_OP_ADD { new_instr(OPC_ADD); } | T_OP_MIN { new_instr(OPC_MIN); } | T_OP_MAX { new_instr(OPC_MAX); } | T_OP_CMP { new_instr(OPC_CMP); } +| T_OP_BIC { new_instr(OPC_BIC); } alu_2src_instr: alu_2src_op reg ',' reg ',' reg { dst($2); src1($4); src2($6); } | alu_2src_op reg ',' reg ',' immediate { dst($2); src1($4); immed($6); } -alu_clrsetbit_op: T_OP_SETBIT { new_instr(OPC_SETBIT); } -| T_OP_CLRBIT { new_instr(OPC_CLRBIT); } +alu_setbit_src2: T_BIT { bit($1); instr->opc = OPC_SETBITI; } +| reg { src2($1); } -alu_clrsetbit_instr: alu_clrsetbit_op reg ',' reg ',' T_BIT { dst($2); src1($4); bit($6); } +alu_clrsetbit_instr: T_OP_SETBIT reg ',' reg ',' alu_setbit_src2 { new_instr(OPC_SETBIT); dst($2); src1($4); } +| T_OP_CLRBIT reg ',' reg ',' T_BIT { new_instr(OPC_CLRBIT); dst($2); src1($4); bit($6); } + +alu_bitfield_op: T_OP_UBFX { new_instr(OPC_UBFX); } +| T_OP_BFI { new_instr(OPC_BFI); } + +alu_bitfield_instr: alu_bitfield_op reg ',' reg ',' T_BIT ',' T_BIT { dst($2); src1($4); bit($6); immed($8); } alu_instr: alu_2src_instr | alu_msb_instr | alu_not_instr | alu_mov_instr | alu_clrsetbit_instr +| alu_bitfield_instr load_op: T_OP_LOAD { new_instr(OPC_LOAD); } | T_OP_CREAD { new_instr(OPC_CREAD); } diff --git a/src/freedreno/afuc/util.c b/src/freedreno/afuc/util.c index b19c21c974f..65f7560a7ad 100644 --- a/src/freedreno/afuc/util.c +++ b/src/freedreno/afuc/util.c @@ -252,17 +252,25 @@ afuc_printc(enum afuc_color c, const char *fmt, ...) int afuc_util_init(int gpuver, bool colors) { - char *name, *control_reg_name; + char *name, *control_reg_name, *variant; char *pipe_reg_name = NULL; switch (gpuver) { + case 7: + name = "A6XX"; + variant = "A7XX"; + control_reg_name = "A7XX_CONTROL_REG"; + pipe_reg_name = "A7XX_PIPE_REG"; + break; case 6: name = "A6XX"; + variant = "A6XX"; control_reg_name = "A6XX_CONTROL_REG"; pipe_reg_name = "A6XX_PIPE_REG"; break; case 5: name = "A5XX"; + variant = "A5XX"; control_reg_name = "A5XX_CONTROL_REG"; pipe_reg_name = "A5XX_PIPE_REG"; break; @@ -286,7 +294,7 @@ int afuc_util_init(int gpuver, bool colors) control_regs = rnn_finddomain(db, control_reg_name); pipe_regs = rnn_finddomain(db, pipe_reg_name); - rnndec_varadd(ctx, "chip", name); + rnndec_varadd(ctx, "chip", variant); pm4_packets = rnn_findenum(ctx->db, "adreno_pm4_type3_packets"); diff --git a/src/freedreno/registers/adreno/adreno_control_regs.xml b/src/freedreno/registers/adreno/adreno_control_regs.xml index ef428117cab..cee87df50be 100644 --- a/src/freedreno/registers/adreno/adreno_control_regs.xml +++ b/src/freedreno/registers/adreno/adreno_control_regs.xml @@ -239,6 +239,15 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd"> + + + + + + + + + @@ -250,10 +259,46 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd"> + Controls whether RB, IB1, IB2, IB3, or SDS is executed + + + Controls high 32 bits used by load and store afuc instructions + + Used to initialize the jump table for handling packets at bootup + + + + + + + + + + + + + Register used to create critical sections when reading/writing + shared memory (0x200-0x2ff). Each bit contains a lock. Writing 1 + to the bit initiates a lock, reads return 1 once the lock is + taken. Writing 0 unlocks. + + + + + + + + The low 3 bits are used as a semaphore to let SQE wait for other + coprocessors to start. SQE sets it to 0x7 before starting the + coprocessors, then each coprocessor atomically clears a bit. + Other bits are used for CP_THREAD_CONTROL::SYNC_THREADS and + other internal syncing. + + diff --git a/src/freedreno/registers/adreno/adreno_pipe_regs.xml b/src/freedreno/registers/adreno/adreno_pipe_regs.xml index 404685e3800..aed3a4a4682 100644 --- a/src/freedreno/registers/adreno/adreno_pipe_regs.xml +++ b/src/freedreno/registers/adreno/adreno_pipe_regs.xml @@ -36,6 +36,9 @@ CP_WAIT_MEM_WRITES: + + + @@ -78,6 +81,35 @@ CP_WAIT_MEM_WRITES: + + + + + + + + + + + + + + + + + + + + + + + + + + + + +