diff --git a/src/freedreno/afuc/afuc.h b/src/freedreno/afuc/afuc.h
index c5a9331171c..c45e89d776f 100644
--- a/src/freedreno/afuc/afuc.h
+++ b/src/freedreno/afuc/afuc.h
@@ -60,9 +60,13 @@ typedef enum {
ALU(MIN)
ALU(MAX)
ALU(CMP) /* compare src to immed */
+ ALU(BIC) /* AND with second source negated */
+ OPC_SETBIT, /* Set or clear a bit dynamically */
OPC_MOVI, /* move immediate */
- OPC_SETBIT, /* Set a bit */
+ OPC_SETBITI, /* Set a bit */
OPC_CLRBIT, /* Clear a bit */
+ OPC_UBFX, /* Unsigned BitField eXtract */
+ OPC_BFI, /* BitField Insert */
#undef ALU
/* Return the most-significant bit of src2, or 0 if src2 == 0 (the
diff --git a/src/freedreno/afuc/afuc.xml b/src/freedreno/afuc/afuc.xml
index 58351a1946f..9733d9bb592 100644
--- a/src/freedreno/afuc/afuc.xml
+++ b/src/freedreno/afuc/afuc.xml
@@ -184,6 +184,17 @@ SOFTWARE.
+
+
+ {REP}{NAME} {DST}, {SRC1}, 0x{RIMMED}
+
+
+
+
+
+ 10010
+
+
{REP}{NAME} {DST}, 0x{IMMED}
@@ -270,67 +281,151 @@ SOFTWARE.
+
01001
+
01001
+
+
+ 10010
+
+
+
+
+ 0010
+
+
0-extending right shift
+
01010
+
01010
+
+
+ 10011
+
+
+
+
+ 0011
+
+
sign-extending right shift
+
01011
+
01011
+
+
+ 10100
+
+
+
+
+ 0100
+
+
Rotate left (left shift with wraparound)
+
01100
+
01100
+
+
+ 10101
+
+
+
+
+ 0101
+
+
Multiply low 8 bits of each source to produce a 16-bit result
+
01101
+
01101
+
+
+ 01100
+
+
+
+
+ 01100
+
+
Unsigned minimum
+
01110
+
01110
+
+
+ 01010
+
+
+
+
+ 01010
+
+
Unsigned maximum
+
01111
+
01111
+
+
+ 01011
+
+
+
+
+ 01011
+
+
Compare two sources and produce a bitfield:
@@ -340,19 +435,47 @@ SOFTWARE.
Often a "branch on bit set/unset" instruction is used on the
result to implement a compare-and-branch macro.
+
10000
+
10000
+
+
+ 01101
+
+
+
+
+ 01101
+
+
+
+
+ 01001
+
+
+
+
+ 01001
+
+
Return the most-significant bit of src2, or 0 if src2 == 0
+
10100
-
+
+
+ 11001
+
+
+
{REP}{NAME} {DST}, {SRC}, b{BIT}
@@ -368,17 +491,87 @@ SOFTWARE.
-
+
+
+ {REP}{NAME} {DST}, {SRC}, b{BIT}
+
+
+
+ xxxxxx
+ 0110
+
+
+ 10010
+
+
+
+
+
+
+
Set a given bit to 1
+
1
-
+
Clear a given bit, i.e. set it to 0
+
0
-
+
+
+ 1
+
+
+
+
+ 0
+
+
+
+
+ Set or clear a given bit. This is the non-immediate form of
+ setbit/clrbit. Bits 1-5 of src2 are the bit to set, bit 0 is the
+ value to set it to.
+
+
+ 10110
+
+
+
+
+ {REP}{NAME} {DST}, {SRC}, b{LO}, b{HI}
+
+
+
+
+ xx
+
+
+ 10010
+
+
+
+
+
+
+
+
+
+ Unsigned BitField eXtract
+
+ 0111
+
+
+
+ BitField Insert
+
+ 1000
+
+
+
Special move-immediate instruction with a shift
{SHIFT} == 0
@@ -393,13 +586,22 @@ SOFTWARE.
- 10001
+
+
+ 10001
+
+
+
+
+ 01110
+
+
diff --git a/src/freedreno/afuc/disasm.c b/src/freedreno/afuc/disasm.c
index adfb21d84c7..fd9e0de7b96 100644
--- a/src/freedreno/afuc/disasm.c
+++ b/src/freedreno/afuc/disasm.c
@@ -225,12 +225,15 @@ static void
disasm(struct emu *emu)
{
uint32_t sizedwords = emu->sizedwords;
- uint32_t lpac_offset = 0;
+ uint32_t lpac_offset = 0, bv_offset = 0;
EMU_GPU_REG(CP_SQE_INSTR_BASE);
EMU_GPU_REG(CP_LPAC_SQE_INSTR_BASE);
+ EMU_CONTROL_REG(BV_INSTR_BASE);
+ EMU_CONTROL_REG(LPAC_INSTR_BASE);
emu_init(emu);
+ emu->processor = EMU_PROC_SQE;
struct isa_decode_options options;
struct decode_state state;
@@ -246,12 +249,22 @@ disasm(struct emu *emu)
emu_run_bootstrap(emu);
- /* Figure out if we have LPAC SQE appended: */
- if (emu_get_reg64(emu, &CP_LPAC_SQE_INSTR_BASE)) {
- lpac_offset = emu_get_reg64(emu, &CP_LPAC_SQE_INSTR_BASE) -
- emu_get_reg64(emu, &CP_SQE_INSTR_BASE);
+ /* Figure out if we have BV/LPAC SQE appended: */
+ if (gpuver >= 7) {
+ bv_offset = emu_get_reg64(emu, &BV_INSTR_BASE) -
+ emu_get_reg64(emu, &CP_SQE_INSTR_BASE);
+ bv_offset /= 4;
+ lpac_offset = emu_get_reg64(emu, &LPAC_INSTR_BASE) -
+ emu_get_reg64(emu, &CP_SQE_INSTR_BASE);
lpac_offset /= 4;
- sizedwords = lpac_offset;
+ sizedwords = MIN2(bv_offset, lpac_offset);
+ } else {
+ if (emu_get_reg64(emu, &CP_LPAC_SQE_INSTR_BASE)) {
+ lpac_offset = emu_get_reg64(emu, &CP_LPAC_SQE_INSTR_BASE) -
+ emu_get_reg64(emu, &CP_SQE_INSTR_BASE);
+ lpac_offset /= 4;
+ sizedwords = lpac_offset;
+ }
}
setup_packet_table(&options, emu->jmptbl, ARRAY_SIZE(emu->jmptbl));
@@ -271,25 +284,51 @@ disasm(struct emu *emu)
/* print instructions: */
isa_disasm(emu->instrs, sizedwords * 4, stdout, &options);
- if (!lpac_offset)
- return;
+ if (bv_offset) {
+ printf(";\n");
+ printf("; BV microcode:\n");
+ printf(";\n");
- printf(";\n");
- printf("; LPAC microcode:\n");
- printf(";\n");
+ emu_fini(emu);
- emu_fini(emu);
+ emu->processor = EMU_PROC_BV;
+ emu->instrs += bv_offset;
+ emu->sizedwords -= bv_offset;
- emu->lpac = true;
- emu->instrs += lpac_offset;
- emu->sizedwords -= lpac_offset;
+ emu_init(emu);
+ emu_run_bootstrap(emu);
- emu_init(emu);
- emu_run_bootstrap(emu);
+ setup_packet_table(&options, emu->jmptbl, ARRAY_SIZE(emu->jmptbl));
- setup_packet_table(&options, emu->jmptbl, ARRAY_SIZE(emu->jmptbl));
+ uint32_t sizedwords = lpac_offset - bv_offset;
- isa_disasm(emu->instrs, emu->sizedwords * 4, stdout, &options);
+ isa_disasm(emu->instrs, sizedwords * 4, stdout, &options);
+
+ emu->instrs -= bv_offset;
+ emu->sizedwords += bv_offset;
+ }
+
+ if (lpac_offset) {
+ printf(";\n");
+ printf("; LPAC microcode:\n");
+ printf(";\n");
+
+ emu_fini(emu);
+
+ emu->processor = EMU_PROC_LPAC;
+ emu->instrs += lpac_offset;
+ emu->sizedwords -= lpac_offset;
+
+ emu_init(emu);
+ emu_run_bootstrap(emu);
+
+ setup_packet_table(&options, emu->jmptbl, ARRAY_SIZE(emu->jmptbl));
+
+ isa_disasm(emu->instrs, emu->sizedwords * 4, stdout, &options);
+
+ emu->instrs -= lpac_offset;
+ emu->sizedwords += lpac_offset;
+ }
}
static void
diff --git a/src/freedreno/afuc/emu-regs.c b/src/freedreno/afuc/emu-regs.c
index 91802253eac..7d6f8b38c4b 100644
--- a/src/freedreno/afuc/emu-regs.c
+++ b/src/freedreno/afuc/emu-regs.c
@@ -62,6 +62,9 @@ emu_set_control_reg(struct emu *emu, unsigned n, uint32_t val)
EMU_CONTROL_REG(PACKET_TABLE_WRITE_ADDR);
EMU_CONTROL_REG(REG_WRITE);
EMU_CONTROL_REG(REG_WRITE_ADDR);
+ EMU_CONTROL_REG(BV_CNTL);
+ EMU_CONTROL_REG(LPAC_CNTL);
+ EMU_CONTROL_REG(THREAD_SYNC);
assert(n < ARRAY_SIZE(emu->control_regs.val));
BITSET_SET(emu->control_regs.written, n);
@@ -86,6 +89,18 @@ emu_set_control_reg(struct emu *emu, unsigned n, uint32_t val)
emu_set_gpu_reg(emu, write_addr++, val);
emu_set_reg32(emu, ®_WRITE_ADDR, write_addr | (flags << 16));
+ } else if (gpuver >= 7 && n == emu_reg_offset(&BV_CNTL)) {
+ /* This is sort-of a hack, but emulate what the BV bootstrap routine
+ * does so that the main bootstrap routine doesn't get stuck.
+ */
+ emu_set_reg32(emu, &THREAD_SYNC,
+ emu_get_reg32(emu, &THREAD_SYNC) & ~(1u << 1));
+ } else if (gpuver >= 7 && n == emu_reg_offset(&LPAC_CNTL)) {
+ /* This is sort-of a hack, but emulate what the LPAC bootstrap routine
+ * does so that the main bootstrap routine doesn't get stuck.
+ */
+ emu_set_reg32(emu, &THREAD_SYNC,
+ emu_get_reg32(emu, &THREAD_SYNC) & ~(1u << 2));
} else if (is_draw_state_control_reg(n)) {
emu_set_draw_state_reg(emu, n, val);
}
diff --git a/src/freedreno/afuc/emu.c b/src/freedreno/afuc/emu.c
index 856d0407cd3..2bdda3a5c97 100644
--- a/src/freedreno/afuc/emu.c
+++ b/src/freedreno/afuc/emu.c
@@ -39,8 +39,6 @@
#include "emu.h"
#include "util.h"
-extern int gpuver;
-
#define rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
#define rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
@@ -99,10 +97,17 @@ emu_alu(struct emu *emu, afuc_opc opc, uint32_t src1, uint32_t src2)
else if (src1 == src2)
return 0x2b;
return 0x1e;
+ case OPC_BIC:
+ return src1 & ~src2;
case OPC_MSB:
if (!src2)
return 0;
return util_last_bit(src2) - 1;
+ case OPC_SETBIT: {
+ unsigned bit = src2 >> 1;
+ unsigned val = src2 & 1;
+ return (src1 & ~(1u << bit)) | (val << bit);
+ }
default:
printf("unhandled alu opc: 0x%02x\n", opc);
exit(1);
@@ -132,7 +137,7 @@ emu_instr(struct emu *emu, struct afuc_instr *instr)
case OPC_NOP:
break;
case OPC_MSB:
- case OPC_ADD ... OPC_CMP: {
+ case OPC_ADD ... OPC_BIC: {
uint32_t val = emu_alu(emu, instr->opc,
emu_get_gpr_reg(emu, instr->src1),
instr->has_immed ? instr->immed :
@@ -180,7 +185,7 @@ emu_instr(struct emu *emu, struct afuc_instr *instr)
emu_set_gpr_reg(emu, instr->dst, val);
break;
}
- case OPC_SETBIT: {
+ case OPC_SETBITI: {
uint32_t src = emu_get_gpr_reg(emu, instr->src1);
emu_set_gpr_reg(emu, instr->dst, src | (1u << instr->bit));
break;
@@ -190,6 +195,20 @@ emu_instr(struct emu *emu, struct afuc_instr *instr)
emu_set_gpr_reg(emu, instr->dst, src & ~(1u << instr->bit));
break;
}
+ case OPC_UBFX: {
+ uint32_t src = emu_get_gpr_reg(emu, instr->src1);
+ unsigned lo = instr->bit, hi = instr->immed;
+ uint32_t dst = (src >> lo) & BITFIELD_MASK(hi - lo + 1);
+ emu_set_gpr_reg(emu, instr->dst, dst);
+ break;
+ }
+ case OPC_BFI: {
+ uint32_t src = emu_get_gpr_reg(emu, instr->src1);
+ unsigned lo = instr->bit, hi = instr->immed;
+ src = (src & BITFIELD_MASK(hi - lo + 1)) << lo;
+ emu_set_gpr_reg(emu, instr->dst, emu_get_gpr_reg(emu, instr->dst) | src);
+ break;
+ }
case OPC_CWRITE: {
uint32_t src1 = emu_get_gpr_reg(emu, instr->src1);
uint32_t src2 = emu_get_gpr_reg(emu, instr->src2);
@@ -473,15 +492,29 @@ emu_init(struct emu *emu)
EMU_GPU_REG(CP_SQE_INSTR_BASE);
EMU_GPU_REG(CP_LPAC_SQE_INSTR_BASE);
+ EMU_CONTROL_REG(BV_INSTR_BASE);
+ EMU_CONTROL_REG(LPAC_INSTR_BASE);
/* Setup the address of the SQE fw, just use the normal CPU ptr address: */
- if (emu->lpac) {
- emu_set_reg64(emu, &CP_LPAC_SQE_INSTR_BASE, EMU_INSTR_BASE);
- } else {
+ switch (emu->processor) {
+ case EMU_PROC_SQE:
emu_set_reg64(emu, &CP_SQE_INSTR_BASE, EMU_INSTR_BASE);
+ break;
+ case EMU_PROC_BV:
+ emu_set_reg64(emu, &BV_INSTR_BASE, EMU_INSTR_BASE);
+ break;
+ case EMU_PROC_LPAC:
+ if (gpuver >= 7)
+ emu_set_reg64(emu, &LPAC_INSTR_BASE, EMU_INSTR_BASE);
+ else
+ emu_set_reg64(emu, &CP_LPAC_SQE_INSTR_BASE, EMU_INSTR_BASE);
+ break;
}
- if (emu->gpu_id == 660) {
+ if (emu->gpu_id == 730) {
+ emu_set_control_reg(emu, 0xef, 1 << 21);
+ emu_set_control_reg(emu, 0, 7 << 28);
+ } else if (emu->gpu_id == 660) {
emu_set_control_reg(emu, 0, 3 << 28);
} else if (emu->gpu_id == 650) {
emu_set_control_reg(emu, 0, 1 << 28);
diff --git a/src/freedreno/afuc/emu.h b/src/freedreno/afuc/emu.h
index 111b44bb88a..e978d097aaa 100644
--- a/src/freedreno/afuc/emu.h
+++ b/src/freedreno/afuc/emu.h
@@ -31,6 +31,8 @@
#include "afuc.h"
+extern int gpuver;
+
#define EMU_NUM_GPR_REGS 32
struct emu_gpr_regs {
@@ -153,7 +155,11 @@ struct emu {
*/
bool quiet;
- bool lpac;
+ enum {
+ EMU_PROC_SQE,
+ EMU_PROC_BV,
+ EMU_PROC_LPAC,
+ } processor;
uint32_t *instrs;
unsigned sizedwords;
diff --git a/src/freedreno/afuc/lexer.l b/src/freedreno/afuc/lexer.l
index d42e71f54fa..9383ea1273e 100644
--- a/src/freedreno/afuc/lexer.l
+++ b/src/freedreno/afuc/lexer.l
@@ -69,9 +69,12 @@ extern YYSTYPE yylval;
"min" return TOKEN(T_OP_MIN);
"max" return TOKEN(T_OP_MAX);
"cmp" return TOKEN(T_OP_CMP);
+"bic" return TOKEN(T_OP_BIC);
"msb" return TOKEN(T_OP_MSB);
"setbit" return TOKEN(T_OP_SETBIT);
"clrbit" return TOKEN(T_OP_CLRBIT);
+"ubfx" return TOKEN(T_OP_UBFX);
+"bfi" return TOKEN(T_OP_BFI);
"mov" return TOKEN(T_OP_MOV);
"cwrite" return TOKEN(T_OP_CWRITE);
"cread" return TOKEN(T_OP_CREAD);
diff --git a/src/freedreno/afuc/parser.y b/src/freedreno/afuc/parser.y
index 6735950aec7..39ce7f44c61 100644
--- a/src/freedreno/afuc/parser.y
+++ b/src/freedreno/afuc/parser.y
@@ -144,9 +144,12 @@ label(const char *str)
%token T_OP_MIN
%token T_OP_MAX
%token T_OP_CMP
+%token T_OP_BIC
%token T_OP_MSB
%token T_OP_SETBIT
%token T_OP_CLRBIT
+%token T_OP_BFI
+%token T_OP_UBFX
%token T_OP_MOV
%token T_OP_CWRITE
%token T_OP_CREAD
@@ -226,20 +229,28 @@ alu_2src_op: T_OP_ADD { new_instr(OPC_ADD); }
| T_OP_MIN { new_instr(OPC_MIN); }
| T_OP_MAX { new_instr(OPC_MAX); }
| T_OP_CMP { new_instr(OPC_CMP); }
+| T_OP_BIC { new_instr(OPC_BIC); }
alu_2src_instr: alu_2src_op reg ',' reg ',' reg { dst($2); src1($4); src2($6); }
| alu_2src_op reg ',' reg ',' immediate { dst($2); src1($4); immed($6); }
-alu_clrsetbit_op: T_OP_SETBIT { new_instr(OPC_SETBIT); }
-| T_OP_CLRBIT { new_instr(OPC_CLRBIT); }
+alu_setbit_src2: T_BIT { bit($1); instr->opc = OPC_SETBITI; }
+| reg { src2($1); }
-alu_clrsetbit_instr: alu_clrsetbit_op reg ',' reg ',' T_BIT { dst($2); src1($4); bit($6); }
+alu_clrsetbit_instr: T_OP_SETBIT reg ',' reg ',' alu_setbit_src2 { new_instr(OPC_SETBIT); dst($2); src1($4); }
+| T_OP_CLRBIT reg ',' reg ',' T_BIT { new_instr(OPC_CLRBIT); dst($2); src1($4); bit($6); }
+
+alu_bitfield_op: T_OP_UBFX { new_instr(OPC_UBFX); }
+| T_OP_BFI { new_instr(OPC_BFI); }
+
+alu_bitfield_instr: alu_bitfield_op reg ',' reg ',' T_BIT ',' T_BIT { dst($2); src1($4); bit($6); immed($8); }
alu_instr: alu_2src_instr
| alu_msb_instr
| alu_not_instr
| alu_mov_instr
| alu_clrsetbit_instr
+| alu_bitfield_instr
load_op: T_OP_LOAD { new_instr(OPC_LOAD); }
| T_OP_CREAD { new_instr(OPC_CREAD); }
diff --git a/src/freedreno/afuc/util.c b/src/freedreno/afuc/util.c
index b19c21c974f..65f7560a7ad 100644
--- a/src/freedreno/afuc/util.c
+++ b/src/freedreno/afuc/util.c
@@ -252,17 +252,25 @@ afuc_printc(enum afuc_color c, const char *fmt, ...)
int afuc_util_init(int gpuver, bool colors)
{
- char *name, *control_reg_name;
+ char *name, *control_reg_name, *variant;
char *pipe_reg_name = NULL;
switch (gpuver) {
+ case 7:
+ name = "A6XX";
+ variant = "A7XX";
+ control_reg_name = "A7XX_CONTROL_REG";
+ pipe_reg_name = "A7XX_PIPE_REG";
+ break;
case 6:
name = "A6XX";
+ variant = "A6XX";
control_reg_name = "A6XX_CONTROL_REG";
pipe_reg_name = "A6XX_PIPE_REG";
break;
case 5:
name = "A5XX";
+ variant = "A5XX";
control_reg_name = "A5XX_CONTROL_REG";
pipe_reg_name = "A5XX_PIPE_REG";
break;
@@ -286,7 +294,7 @@ int afuc_util_init(int gpuver, bool colors)
control_regs = rnn_finddomain(db, control_reg_name);
pipe_regs = rnn_finddomain(db, pipe_reg_name);
- rnndec_varadd(ctx, "chip", name);
+ rnndec_varadd(ctx, "chip", variant);
pm4_packets = rnn_findenum(ctx->db, "adreno_pm4_type3_packets");
diff --git a/src/freedreno/registers/adreno/adreno_control_regs.xml b/src/freedreno/registers/adreno/adreno_control_regs.xml
index ef428117cab..cee87df50be 100644
--- a/src/freedreno/registers/adreno/adreno_control_regs.xml
+++ b/src/freedreno/registers/adreno/adreno_control_regs.xml
@@ -239,6 +239,15 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd">
+
+
+
+
+
+
+
+
+
@@ -250,10 +259,46 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd">
+ Controls whether RB, IB1, IB2, IB3, or SDS is executed
+
+
+ Controls high 32 bits used by load and store afuc instructions
+
+
Used to initialize the jump table for handling packets at bootup
+
+
+
+
+
+
+
+
+
+
+
+
+ Register used to create critical sections when reading/writing
+ shared memory (0x200-0x2ff). Each bit contains a lock. Writing 1
+ to the bit initiates a lock, reads return 1 once the lock is
+ taken. Writing 0 unlocks.
+
+
+
+
+
+
+
+ The low 3 bits are used as a semaphore to let SQE wait for other
+ coprocessors to start. SQE sets it to 0x7 before starting the
+ coprocessors, then each coprocessor atomically clears a bit.
+ Other bits are used for CP_THREAD_CONTROL::SYNC_THREADS and
+ other internal syncing.
+
+
diff --git a/src/freedreno/registers/adreno/adreno_pipe_regs.xml b/src/freedreno/registers/adreno/adreno_pipe_regs.xml
index 404685e3800..aed3a4a4682 100644
--- a/src/freedreno/registers/adreno/adreno_pipe_regs.xml
+++ b/src/freedreno/registers/adreno/adreno_pipe_regs.xml
@@ -36,6 +36,9 @@ CP_WAIT_MEM_WRITES:
+
+
+
@@ -78,6 +81,35 @@ CP_WAIT_MEM_WRITES:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+