diff --git a/src/asahi/compiler/agx_compiler.h b/src/asahi/compiler/agx_compiler.h index fb079fcae68..9b0dc59f54c 100644 --- a/src/asahi/compiler/agx_compiler.h +++ b/src/asahi/compiler/agx_compiler.h @@ -779,12 +779,8 @@ struct agx_copy { /* Base register destination of the copy */ unsigned dest; - /* Base register source (or uniform base) of the copy */ - unsigned src; - bool is_uniform; - - /* Size of the copy */ - enum agx_size size; + /* Source of the copy */ + agx_index src; /* Whether the copy has been handled. Callers must leave to false. */ bool done; diff --git a/src/asahi/compiler/agx_lower_parallel_copy.c b/src/asahi/compiler/agx_lower_parallel_copy.c index 1300cdb0a50..576fe86dd24 100644 --- a/src/asahi/compiler/agx_lower_parallel_copy.c +++ b/src/asahi/compiler/agx_lower_parallel_copy.c @@ -40,31 +40,22 @@ * We only handles register-register copies, not general agx_index sources. This * suffices for its internal use for register allocation. */ -static agx_index -copy_src(const struct agx_copy *copy) -{ - if (copy->is_uniform) - return agx_uniform(copy->src, copy->size); - else - return agx_register(copy->src, copy->size); -} - static void do_copy(agx_builder *b, const struct agx_copy *copy) { - agx_mov_to(b, agx_register(copy->dest, copy->size), copy_src(copy)); + agx_mov_to(b, agx_register(copy->dest, copy->src.size), copy->src); } static void do_swap(agx_builder *b, const struct agx_copy *copy) { - assert(!copy->is_uniform && "cannot swap uniform with GPR"); + assert(copy->src.type == AGX_INDEX_REGISTER && "only GPRs are swapped"); - if (copy->dest == copy->src) + if (copy->dest == copy->src.value) return; - agx_index x = agx_register(copy->dest, copy->size); - agx_index y = copy_src(copy); + agx_index x = agx_register(copy->dest, copy->src.size); + agx_index y = copy->src; agx_xor_to(b, x, x, y); agx_xor_to(b, y, x, y); @@ -90,7 +81,7 @@ struct copy_ctx { static bool entry_blocked(struct agx_copy *entry, struct copy_ctx *ctx) { - for (unsigned i = 0; i < agx_size_align_16(entry->size); i++) { + for (unsigned i = 0; i < agx_size_align_16(entry->src.size); i++) { if (ctx->physreg_use_count[entry->dest + i] != 0) return true; } @@ -101,7 +92,7 @@ entry_blocked(struct agx_copy *entry, struct copy_ctx *ctx) static bool is_real(struct agx_copy *entry) { - return !entry->is_uniform; + return entry->src.type == AGX_INDEX_REGISTER; } /* TODO: Generalize to other bit sizes */ @@ -110,14 +101,15 @@ split_32bit_copy(struct copy_ctx *ctx, struct agx_copy *entry) { assert(!entry->done); assert(is_real(entry)); - assert(agx_size_align_16(entry->size) == 2); + assert(agx_size_align_16(entry->src.size) == 2); struct agx_copy *new_entry = &ctx->entries[ctx->entry_count++]; new_entry->dest = entry->dest + 1; - new_entry->src = entry->src + 1; + new_entry->src = entry->src; + new_entry->src.value += 1; new_entry->done = false; - entry->size = AGX_SIZE_16; - new_entry->size = AGX_SIZE_16; + entry->src.size = AGX_SIZE_16; + new_entry->src.size = AGX_SIZE_16; ctx->physreg_dest[entry->dest + 1] = new_entry; } @@ -141,9 +133,9 @@ agx_emit_parallel_copies(agx_builder *b, ctx->entries[i] = *entry; - for (unsigned j = 0; j < agx_size_align_16(entry->size); j++) { + for (unsigned j = 0; j < agx_size_align_16(entry->src.size); j++) { if (is_real(entry)) - ctx->physreg_use_count[entry->src + j]++; + ctx->physreg_use_count[entry->src.value + j]++; /* Copies should not have overlapping destinations. */ assert(!ctx->physreg_dest[entry->dest + j]); @@ -170,9 +162,9 @@ agx_emit_parallel_copies(agx_builder *b, entry->done = true; progress = true; do_copy(b, entry); - for (unsigned j = 0; j < agx_size_align_16(entry->size); j++) { + for (unsigned j = 0; j < agx_size_align_16(entry->src.size); j++) { if (is_real(entry)) - ctx->physreg_use_count[entry->src + j]--; + ctx->physreg_use_count[entry->src.value + j]--; ctx->physreg_dest[entry->dest + j] = NULL; } } @@ -193,7 +185,7 @@ agx_emit_parallel_copies(agx_builder *b, */ for (unsigned i = 0; i < ctx->entry_count; i++) { struct agx_copy *entry = &ctx->entries[i]; - if (entry->done || (agx_size_align_16(entry->size) != 2)) + if (entry->done || (agx_size_align_16(entry->src.size) != 2)) continue; if (((ctx->physreg_use_count[entry->dest] == 0 || @@ -249,7 +241,7 @@ agx_emit_parallel_copies(agx_builder *b, assert(is_real(entry)); /* catch trivial copies */ - if (entry->dest == entry->src) { + if (entry->dest == entry->src.value) { entry->done = true; continue; } @@ -259,16 +251,16 @@ agx_emit_parallel_copies(agx_builder *b, /* Split any blocking copies whose sources are only partially * contained within our destination. */ - if (agx_size_align_16(entry->size) == 1) { + if (agx_size_align_16(entry->src.size) == 1) { for (unsigned j = 0; j < ctx->entry_count; j++) { struct agx_copy *blocking = &ctx->entries[j]; if (blocking->done) continue; - if (blocking->src <= entry->dest && - blocking->src + 1 >= entry->dest && - agx_size_align_16(blocking->size) == 2) { + if (blocking->src.value <= entry->dest && + blocking->src.value + 1 >= entry->dest && + agx_size_align_16(blocking->src.size) == 2) { split_32bit_copy(ctx, blocking); } } @@ -281,9 +273,9 @@ agx_emit_parallel_copies(agx_builder *b, */ for (unsigned j = 0; j < ctx->entry_count; j++) { struct agx_copy *blocking = &ctx->entries[j]; - if (blocking->src >= entry->dest && - blocking->src < entry->dest + agx_size_align_16(entry->size)) { - blocking->src = entry->src + (blocking->src - entry->dest); + if (blocking->src.value >= entry->dest && + blocking->src.value < entry->dest + agx_size_align_16(entry->src.size)) { + blocking->src.value = entry->src.value + (blocking->src.value - entry->dest); } } diff --git a/src/asahi/compiler/agx_register_allocate.c b/src/asahi/compiler/agx_register_allocate.c index c9cc3208141..0cbd6739f6e 100644 --- a/src/asahi/compiler/agx_register_allocate.c +++ b/src/asahi/compiler/agx_register_allocate.c @@ -359,8 +359,7 @@ agx_insert_parallel_copies(agx_context *ctx, agx_block *block) copies[i++] = (struct agx_copy) { .dest = dest.value, - .src = src.value, - .size = src.size + .src = src, }; } @@ -444,13 +443,9 @@ agx_ra(agx_context *ctx) if (agx_is_null(ins->src[i])) continue; assert(ins->src[i].size == ins->dest[0].size); - bool is_uniform = ins->src[i].type == AGX_INDEX_UNIFORM; - copies[n++] = (struct agx_copy) { .dest = base + (i * width), - .is_uniform = is_uniform, - .src = is_uniform ? ins->src[i].value : agx_index_to_reg(ssa_to_reg, ins->src[i]), - .size = ins->src[i].size + .src = ins->src[i] }; } @@ -473,8 +468,7 @@ agx_ra(agx_context *ctx) copies[n++] = (struct agx_copy) { .dest = agx_index_to_reg(ssa_to_reg, ins->dest[i]), - .src = base + (i * width), - .size = ins->dest[i].size + .src = agx_register(base + (i * width), ins->dest[i].size) }; } diff --git a/src/asahi/compiler/test/test-lower-parallel-copy.cpp b/src/asahi/compiler/test/test-lower-parallel-copy.cpp index ded6704776a..72c6da5e16a 100644 --- a/src/asahi/compiler/test/test-lower-parallel-copy.cpp +++ b/src/asahi/compiler/test/test-lower-parallel-copy.cpp @@ -62,8 +62,8 @@ protected: TEST_F(LowerParallelCopy, UnrelatedCopies) { struct agx_copy test_1[] = { - { .dest = 0, .src = 2, .size = AGX_SIZE_32 }, - { .dest = 4, .src = 6, .size = AGX_SIZE_32 }, + { .dest = 0, .src = agx_register(2, AGX_SIZE_32) }, + { .dest = 4, .src = agx_register(6, AGX_SIZE_32) }, }; CASE(test_1, { @@ -72,8 +72,8 @@ TEST_F(LowerParallelCopy, UnrelatedCopies) { }); struct agx_copy test_2[] = { - { .dest = 0, .src = 1, .size = AGX_SIZE_16 }, - { .dest = 4, .src = 5, .size = AGX_SIZE_16 }, + { .dest = 0, .src = agx_register(1, AGX_SIZE_16) }, + { .dest = 4, .src = agx_register(5, AGX_SIZE_16) }, }; CASE(test_2, { @@ -85,8 +85,8 @@ TEST_F(LowerParallelCopy, UnrelatedCopies) { TEST_F(LowerParallelCopy, RelatedSource) { struct agx_copy test_1[] = { - { .dest = 0, .src = 2, .size = AGX_SIZE_32 }, - { .dest = 4, .src = 2, .size = AGX_SIZE_32 }, + { .dest = 0, .src = agx_register(2, AGX_SIZE_32) }, + { .dest = 4, .src = agx_register(2, AGX_SIZE_32) }, }; CASE(test_1, { @@ -95,8 +95,8 @@ TEST_F(LowerParallelCopy, RelatedSource) }); struct agx_copy test_2[] = { - { .dest = 0, .src = 1, .size = AGX_SIZE_16 }, - { .dest = 4, .src = 1, .size = AGX_SIZE_16 }, + { .dest = 0, .src = agx_register(1, AGX_SIZE_16) }, + { .dest = 4, .src = agx_register(1, AGX_SIZE_16) }, }; CASE(test_2, { @@ -108,8 +108,8 @@ TEST_F(LowerParallelCopy, RelatedSource) TEST_F(LowerParallelCopy, DependentCopies) { struct agx_copy test_1[] = { - { .dest = 0, .src = 2, .size = AGX_SIZE_32 }, - { .dest = 4, .src = 0, .size = AGX_SIZE_32 }, + { .dest = 0, .src = agx_register(2, AGX_SIZE_32) }, + { .dest = 4, .src = agx_register(0, AGX_SIZE_32) }, }; CASE(test_1, { @@ -118,8 +118,8 @@ TEST_F(LowerParallelCopy, DependentCopies) }); struct agx_copy test_2[] = { - { .dest = 0, .src = 1, .size = AGX_SIZE_16 }, - { .dest = 4, .src = 0, .size = AGX_SIZE_16 }, + { .dest = 0, .src = agx_register(1, AGX_SIZE_16) }, + { .dest = 4, .src = agx_register(0, AGX_SIZE_16) }, }; CASE(test_2, { @@ -131,10 +131,10 @@ TEST_F(LowerParallelCopy, DependentCopies) TEST_F(LowerParallelCopy, ManyDependentCopies) { struct agx_copy test_1[] = { - { .dest = 0, .src = 2, .size = AGX_SIZE_32 }, - { .dest = 4, .src = 0, .size = AGX_SIZE_32 }, - { .dest = 8, .src = 6, .size = AGX_SIZE_32 }, - { .dest = 6, .src = 4, .size = AGX_SIZE_32 }, + { .dest = 0, .src = agx_register(2, AGX_SIZE_32) }, + { .dest = 4, .src = agx_register(0, AGX_SIZE_32) }, + { .dest = 8, .src = agx_register(6, AGX_SIZE_32) }, + { .dest = 6, .src = agx_register(4, AGX_SIZE_32) }, }; CASE(test_1, { @@ -145,10 +145,10 @@ TEST_F(LowerParallelCopy, ManyDependentCopies) }); struct agx_copy test_2[] = { - { .dest = 0, .src = 1, .size = AGX_SIZE_16 }, - { .dest = 2, .src = 0, .size = AGX_SIZE_16 }, - { .dest = 4, .src = 3, .size = AGX_SIZE_16 }, - { .dest = 3, .src = 2, .size = AGX_SIZE_16 }, + { .dest = 0, .src = agx_register(1, AGX_SIZE_16) }, + { .dest = 2, .src = agx_register(0, AGX_SIZE_16) }, + { .dest = 4, .src = agx_register(3, AGX_SIZE_16) }, + { .dest = 3, .src = agx_register(2, AGX_SIZE_16) }, }; CASE(test_2, { @@ -161,8 +161,8 @@ TEST_F(LowerParallelCopy, ManyDependentCopies) TEST_F(LowerParallelCopy, Swap) { struct agx_copy test_1[] = { - { .dest = 0, .src = 2, .size = AGX_SIZE_32 }, - { .dest = 2, .src = 0, .size = AGX_SIZE_32 }, + { .dest = 0, .src = agx_register(2, AGX_SIZE_32) }, + { .dest = 2, .src = agx_register(0, AGX_SIZE_32) }, }; CASE(test_1, { @@ -170,8 +170,8 @@ TEST_F(LowerParallelCopy, Swap) { }); struct agx_copy test_2[] = { - { .dest = 0, .src = 1, .size = AGX_SIZE_16 }, - { .dest = 1, .src = 0, .size = AGX_SIZE_16 }, + { .dest = 0, .src = agx_register(1, AGX_SIZE_16) }, + { .dest = 1, .src = agx_register(0, AGX_SIZE_16) }, }; CASE(test_2, { @@ -181,9 +181,9 @@ TEST_F(LowerParallelCopy, Swap) { TEST_F(LowerParallelCopy, Cycle3) { struct agx_copy test[] = { - { .dest = 0, .src = 1, .size = AGX_SIZE_16 }, - { .dest = 1, .src = 2, .size = AGX_SIZE_16 }, - { .dest = 2, .src = 0, .size = AGX_SIZE_16 }, + { .dest = 0, .src = agx_register(1, AGX_SIZE_16) }, + { .dest = 1, .src = agx_register(2, AGX_SIZE_16) }, + { .dest = 2, .src = agx_register(0, AGX_SIZE_16) }, }; /* XXX: requires 6 instructions. if we had a temp free, could do it in 4 */ @@ -196,10 +196,10 @@ TEST_F(LowerParallelCopy, Cycle3) { /* Test case from Hack et al */ TEST_F(LowerParallelCopy, TwoSwaps) { struct agx_copy test[] = { - { .dest = 4, .src = 2, .size = AGX_SIZE_32 }, - { .dest = 6, .src = 4, .size = AGX_SIZE_32 }, - { .dest = 2, .src = 6, .size = AGX_SIZE_32 }, - { .dest = 8, .src = 8, .size = AGX_SIZE_32 }, + { .dest = 4, .src = agx_register(2, AGX_SIZE_32) }, + { .dest = 6, .src = agx_register(4, AGX_SIZE_32) }, + { .dest = 2, .src = agx_register(6, AGX_SIZE_32) }, + { .dest = 8, .src = agx_register(8, AGX_SIZE_32) }, }; CASE(test, { @@ -211,9 +211,9 @@ TEST_F(LowerParallelCopy, TwoSwaps) { #if 0 TEST_F(LowerParallelCopy, LooksLikeASwap) { struct agx_copy test[] = { - { .dest = 0, .src = 2, .size = AGX_SIZE_32 }, - { .dest = 2, .src = 0, .size = AGX_SIZE_32 }, - { .dest = 4, .src = 2, .size = AGX_SIZE_32 }, + { .dest = 0, .src = agx_register(2, AGX_SIZE_32) }, + { .dest = 2, .src = agx_register(0, AGX_SIZE_32) }, + { .dest = 4, .src = agx_register(2, AGX_SIZE_32) }, }; CASE(test, {