diff --git a/src/asahi/compiler/agx_compiler.h b/src/asahi/compiler/agx_compiler.h index 94f75a6e985..e4f76813d8e 100644 --- a/src/asahi/compiler/agx_compiler.h +++ b/src/asahi/compiler/agx_compiler.h @@ -43,6 +43,9 @@ enum agx_dbg { extern int agx_debug; +/* r0-r127 inclusive, as pairs of 16-bits, gives 256 registers */ +#define AGX_NUM_REGS (256) + enum agx_index_type { AGX_INDEX_NULL = 0, AGX_INDEX_NORMAL = 1, @@ -325,6 +328,9 @@ typedef struct agx_block { BITSET_WORD *live_in; BITSET_WORD *live_out; + /* Register allocation */ + BITSET_DECLARE(regs_out, AGX_NUM_REGS); + /* Offset of the block in the emitted binary */ off_t offset; diff --git a/src/asahi/compiler/agx_register_allocate.c b/src/asahi/compiler/agx_register_allocate.c index 0977d2be2d5..0f1acdc7c57 100644 --- a/src/asahi/compiler/agx_register_allocate.c +++ b/src/asahi/compiler/agx_register_allocate.c @@ -37,8 +37,6 @@ agx_read_registers(agx_instr *I, unsigned s) unsigned size = I->src[s].size == AGX_SIZE_32 ? 2 : 1; switch (I->op) { - case AGX_OPCODE_DEVICE_LOAD: - return 8; default: return size; } @@ -58,43 +56,134 @@ agx_write_registers(agx_instr *I, unsigned d) return 8; case AGX_OPCODE_LD_VARY_FLAT: return 6; + case AGX_OPCODE_P_COMBINE: + { + unsigned components = 0; + + for (unsigned i = 0; i < 4; ++i) { + if (!agx_is_null(I->src[i])) + components = i + 1; + } + + return components * size; + } default: return size; } } +static unsigned +agx_assign_regs(BITSET_WORD *used_regs, unsigned count, unsigned align) +{ + for (unsigned reg = 0; reg < AGX_NUM_REGS; reg += align) { + bool conflict = false; + + for (unsigned j = 0; j < count; ++j) + conflict |= BITSET_TEST(used_regs, reg + j); + + if (!conflict) { + for (unsigned j = 0; j < count; ++j) + BITSET_SET(used_regs, reg + j); + + return reg; + } + } + + unreachable("Could not find a free register"); +} + +/** Assign registers to SSA values in a block. */ + +static void +agx_ra_assign_local(agx_block *block, uint8_t *ssa_to_reg) +{ + BITSET_DECLARE(used_regs, AGX_NUM_REGS) = { 0 }; + + agx_foreach_predecessor(block, pred) { + for (unsigned i = 0; i < BITSET_WORDS(AGX_NUM_REGS); ++i) + used_regs[i] |= pred->regs_out[i]; + } + + BITSET_SET(used_regs, 0); // control flow writes r0l + BITSET_SET(used_regs, 5*2); // TODO: precolouring, don't overwrite vertex ID + BITSET_SET(used_regs, (5*2 + 1)); + + agx_foreach_instr_in_block(block, I) { + /* First, free killed sources */ + agx_foreach_src(I, s) { + if (I->src[s].type == AGX_INDEX_NORMAL && I->src[s].kill) { + unsigned reg = ssa_to_reg[I->src[s].value]; + unsigned count = agx_read_registers(I, s); + + for (unsigned i = 0; i < count; ++i) + BITSET_CLEAR(used_regs, reg + i); + } + } + + /* Next, assign destinations. Always legal in SSA form. */ + agx_foreach_dest(I, d) { + if (I->dest[d].type == AGX_INDEX_NORMAL) { + unsigned count = agx_write_registers(I, d); + unsigned align = (I->dest[d].size == AGX_SIZE_16) ? 1 : 2; + unsigned reg = agx_assign_regs(used_regs, count, align); + + ssa_to_reg[I->dest[d].value] = reg; + } + } + } + + STATIC_ASSERT(sizeof(block->regs_out) == sizeof(used_regs)); + memcpy(block->regs_out, used_regs, sizeof(used_regs)); +} + void agx_ra(agx_context *ctx) { unsigned *alloc = calloc(ctx->alloc, sizeof(unsigned)); - unsigned usage = 6*2; + + agx_compute_liveness(ctx); + uint8_t *ssa_to_reg = calloc(ctx->alloc, sizeof(uint8_t)); + agx_foreach_block(ctx, block) + agx_ra_assign_local(block, ssa_to_reg); + + /* TODO: Coalesce combines */ agx_foreach_instr_global_safe(ctx, ins) { /* Lower away RA pseudo-instructions */ if (ins->op == AGX_OPCODE_P_COMBINE) { /* TODO: Optimize out the moves! */ - unsigned components = 0; - - for (unsigned i = 0; i < 4; ++i) { - if (!agx_is_null(ins->src[i])) - components = i + 1; - } - - unsigned size = ins->dest[0].size == AGX_SIZE_32 ? 2 : 1; - if (size == 2 && usage & 1) usage++; - unsigned base = usage; assert(ins->dest[0].type == AGX_INDEX_NORMAL); - alloc[ins->dest[0].value] = base; - usage += (components * size); + enum agx_size common_size = ins->dest[0].size; + unsigned base = ssa_to_reg[ins->dest[0].value]; + unsigned size = common_size == AGX_SIZE_32 ? 2 : 1; /* Move the sources */ agx_builder b = agx_init_builder(ctx, agx_after_instr(ins)); + /* TODO: Eliminate the intermediate copy by handling parallel copies */ for (unsigned i = 0; i < 4; ++i) { if (agx_is_null(ins->src[i])) continue; - assert(ins->src[0].type == AGX_INDEX_NORMAL); - agx_mov_to(&b, agx_register(base + (i * size), ins->dest[0].size), - agx_register(alloc[ins->src[i].value], ins->src[0].size)); + unsigned base = ins->src[i].value; + if (ins->src[i].type == AGX_INDEX_NORMAL) + base = ssa_to_reg[base]; + else + assert(ins->src[i].type == AGX_INDEX_REGISTER); + + assert(ins->src[i].size == common_size); + + agx_mov_to(&b, agx_register(124*2 + (i * size), common_size), + agx_register(base, common_size)); + } + + for (unsigned i = 0; i < 4; ++i) { + if (agx_is_null(ins->src[i])) continue; + agx_index src = ins->src[i]; + + if (src.type == AGX_INDEX_NORMAL) + src = agx_register(alloc[src.value], src.size); + + agx_mov_to(&b, agx_register(base + (i * size), common_size), + agx_register(124*2 + (i * size), common_size)); } /* We've lowered away, delete the old */ @@ -102,34 +191,42 @@ agx_ra(agx_context *ctx) continue; } else if (ins->op == AGX_OPCODE_P_EXTRACT) { assert(ins->dest[0].type == AGX_INDEX_NORMAL); - assert(ins->src[0].type == AGX_INDEX_NORMAL); assert(ins->dest[0].size == ins->src[0].size); + unsigned base = ins->src[0].value; + + if (ins->src[0].type != AGX_INDEX_REGISTER) { + assert(ins->src[0].type == AGX_INDEX_NORMAL); + base = alloc[base]; + } unsigned size = ins->dest[0].size == AGX_SIZE_32 ? 2 : 1; - alloc[ins->dest[0].value] = alloc[ins->src[0].value] + (size * ins->imm); + unsigned left = ssa_to_reg[ins->dest[0].value]; + unsigned right = ssa_to_reg[ins->src[0].value] + (size * ins->imm); + + if (left != right) { + agx_builder b = agx_init_builder(ctx, agx_after_instr(ins)); + agx_mov_to(&b, agx_register(left, ins->dest[0].size), + agx_register(right, ins->src[0].size)); + } + agx_remove_instruction(ins); continue; } agx_foreach_src(ins, s) { if (ins->src[s].type == AGX_INDEX_NORMAL) { - unsigned v = alloc[ins->src[s].value]; + unsigned v = ssa_to_reg[ins->src[s].value]; ins->src[s] = agx_replace_index(ins->src[s], agx_register(v, ins->src[s].size)); } } agx_foreach_dest(ins, d) { if (ins->dest[d].type == AGX_INDEX_NORMAL) { - unsigned size = ins->dest[d].size == AGX_SIZE_32 ? 2 : 1; - if (size == 2 && usage & 1) usage++; - unsigned v = usage; - usage += agx_write_registers(ins, d); - alloc[ins->dest[d].value] = v; + unsigned v = ssa_to_reg[ins->dest[d].value]; ins->dest[d] = agx_replace_index(ins->dest[d], agx_register(v, ins->dest[d].size)); } } } - assert(usage < 256 && "dummy RA"); free(alloc); }