agx: lower swaps late

for RA validation

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31532>
This commit is contained in:
Alyssa Rosenzweig 2024-09-27 10:17:34 -04:00 committed by Marge Bot
parent 6f1c275c94
commit a07faaf6c9
4 changed files with 48 additions and 46 deletions

View file

@ -73,30 +73,13 @@ do_swap(agx_builder *b, const struct agx_copy *copy)
if (copy->dest == copy->src.value)
return;
/* We can swap lo/hi halves of a 32-bit register with a 32-bit extr */
if (copy->src.size == AGX_SIZE_16 &&
(copy->dest >> 1) == (copy->src.value >> 1) && !copy->dest_mem) {
assert(((copy->dest & 1) == (1 - (copy->src.value & 1))) &&
"no trivial swaps, and only 2 halves of a register");
/* r0 = extr r0, r0, #16
* = (((r0 << 32) | r0) >> 16) & 0xFFFFFFFF
* = (((r0 << 32) >> 16) & 0xFFFFFFFF) | (r0 >> 16)
* = (r0l << 16) | r0h
*/
agx_index reg32 = agx_register(copy->dest & ~1, AGX_SIZE_32);
agx_extr_to(b, reg32, reg32, reg32, agx_immediate(16), 0);
return;
}
agx_index x = copy->dest_mem
? agx_memory_register(copy->dest, copy->src.size)
: agx_register(copy->dest, copy->src.size);
agx_index y = copy->src;
/* Memory-memory swaps need to be lowered */
assert(x.memory == y.memory);
/* Memory-memory swaps lowered here, GPR swaps lowered later */
if (x.memory) {
agx_index temp1 = agx_register(4, copy->src.size);
agx_index temp2 = agx_register(6, copy->src.size);
@ -105,13 +88,9 @@ do_swap(agx_builder *b, const struct agx_copy *copy)
agx_mov_to(b, temp2, y);
agx_mov_to(b, y, temp1);
agx_mov_to(b, x, temp2);
return;
} else {
agx_swap(b, x, y);
}
/* Otherwise, we're swapping GPRs and fallback on a XOR swap. */
agx_xor_to(b, x, x, y);
agx_xor_to(b, y, x, y);
agx_xor_to(b, x, x, y);
}
struct copy_ctx {

View file

@ -44,6 +44,33 @@ cmpsel_for_break_if(agx_builder *b, agx_instr *I)
return agx_push_exec(b, 0);
}
static void
swap(agx_builder *b, agx_index x, agx_index y)
{
assert(!x.memory && "already lowered");
assert(!y.memory && "already lowered");
/* We can swap lo/hi halves of a 32-bit register with a 32-bit extr */
if (x.size == AGX_SIZE_16 && (x.value >> 1) == (y.value >> 1)) {
assert(((x.value & 1) == (1 - (y.value & 1))) &&
"no trivial swaps, and only 2 halves of a register");
/* r0 = extr r0, r0, #16
* = (((r0 << 32) | r0) >> 16) & 0xFFFFFFFF
* = (((r0 << 32) >> 16) & 0xFFFFFFFF) | (r0 >> 16)
* = (r0l << 16) | r0h
*/
agx_index reg32 = agx_register(x.value & ~1, AGX_SIZE_32);
agx_extr_to(b, reg32, reg32, reg32, agx_immediate(16), 0);
} else {
/* Otherwise, we're swapping GPRs and fallback on a XOR swap. */
agx_xor_to(b, x, x, y);
agx_xor_to(b, y, x, y);
agx_xor_to(b, x, x, y);
}
}
static agx_instr *
lower(agx_builder *b, agx_instr *I)
{
@ -91,6 +118,10 @@ lower(agx_builder *b, agx_instr *I)
return cmpsel_for_break_if(b, I);
}
case AGX_OPCODE_SWAP:
swap(b, I->src[0], I->src[1]);
return (void *)true;
case AGX_OPCODE_EXPORT:
/* We already lowered exports during RA, we just need to remove them late
* after inserting waits.

View file

@ -501,6 +501,11 @@ op("collect", _, srcs = VARIABLE)
op("split", _, srcs = 1, dests = VARIABLE)
op("phi", _, srcs = VARIABLE, schedule_class = "preload")
# The srcs double as destinations. Only deals in registers. This is generated by
# parallel copy lowering and lowered soon after. We need this as a dedicated
# instruction only for RA validation.
op("swap", _, dests = 0, srcs = 2)
op("unit_test", _, dests = 0, srcs = 1, can_eliminate = False)
# Like mov, but takes a register and can only appear at the start. Guaranteed

View file

@ -24,21 +24,6 @@
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
} while (0)
static inline void
extr_swap(agx_builder *b, agx_index x)
{
x.size = AGX_SIZE_32;
agx_extr_to(b, x, x, x, agx_immediate(16), 0);
}
static inline void
xor_swap(agx_builder *b, agx_index x, agx_index y)
{
agx_xor_to(b, x, x, y);
agx_xor_to(b, y, x, y);
agx_xor_to(b, x, x, y);
}
class LowerParallelCopy : public testing::Test {
protected:
LowerParallelCopy()
@ -162,7 +147,7 @@ TEST_F(LowerParallelCopy, Swap)
};
CASE(test_1, {
xor_swap(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
agx_swap(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
});
struct agx_copy test_2[] = {
@ -170,7 +155,9 @@ TEST_F(LowerParallelCopy, Swap)
{.dest = 1, .src = agx_register(0, AGX_SIZE_16)},
};
CASE(test_2, { extr_swap(b, agx_register(0, AGX_SIZE_16)); });
CASE(test_2, {
agx_swap(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
});
}
TEST_F(LowerParallelCopy, Cycle3)
@ -182,8 +169,8 @@ TEST_F(LowerParallelCopy, Cycle3)
};
CASE(test, {
extr_swap(b, agx_register(0, AGX_SIZE_16));
xor_swap(b, agx_register(1, AGX_SIZE_16), agx_register(2, AGX_SIZE_16));
agx_swap(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
agx_swap(b, agx_register(1, AGX_SIZE_16), agx_register(2, AGX_SIZE_16));
});
}
@ -213,8 +200,8 @@ TEST_F(LowerParallelCopy, TwoSwaps)
};
CASE(test, {
xor_swap(b, agx_register(4, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
xor_swap(b, agx_register(6, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
agx_swap(b, agx_register(4, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
agx_swap(b, agx_register(6, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
});
}