mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-02 12:18:09 +02:00
agx: lower swaps late
for RA validation Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31532>
This commit is contained in:
parent
6f1c275c94
commit
a07faaf6c9
4 changed files with 48 additions and 46 deletions
|
|
@ -73,30 +73,13 @@ do_swap(agx_builder *b, const struct agx_copy *copy)
|
|||
if (copy->dest == copy->src.value)
|
||||
return;
|
||||
|
||||
/* We can swap lo/hi halves of a 32-bit register with a 32-bit extr */
|
||||
if (copy->src.size == AGX_SIZE_16 &&
|
||||
(copy->dest >> 1) == (copy->src.value >> 1) && !copy->dest_mem) {
|
||||
|
||||
assert(((copy->dest & 1) == (1 - (copy->src.value & 1))) &&
|
||||
"no trivial swaps, and only 2 halves of a register");
|
||||
|
||||
/* r0 = extr r0, r0, #16
|
||||
* = (((r0 << 32) | r0) >> 16) & 0xFFFFFFFF
|
||||
* = (((r0 << 32) >> 16) & 0xFFFFFFFF) | (r0 >> 16)
|
||||
* = (r0l << 16) | r0h
|
||||
*/
|
||||
agx_index reg32 = agx_register(copy->dest & ~1, AGX_SIZE_32);
|
||||
agx_extr_to(b, reg32, reg32, reg32, agx_immediate(16), 0);
|
||||
return;
|
||||
}
|
||||
|
||||
agx_index x = copy->dest_mem
|
||||
? agx_memory_register(copy->dest, copy->src.size)
|
||||
: agx_register(copy->dest, copy->src.size);
|
||||
agx_index y = copy->src;
|
||||
|
||||
/* Memory-memory swaps need to be lowered */
|
||||
assert(x.memory == y.memory);
|
||||
|
||||
/* Memory-memory swaps lowered here, GPR swaps lowered later */
|
||||
if (x.memory) {
|
||||
agx_index temp1 = agx_register(4, copy->src.size);
|
||||
agx_index temp2 = agx_register(6, copy->src.size);
|
||||
|
|
@ -105,13 +88,9 @@ do_swap(agx_builder *b, const struct agx_copy *copy)
|
|||
agx_mov_to(b, temp2, y);
|
||||
agx_mov_to(b, y, temp1);
|
||||
agx_mov_to(b, x, temp2);
|
||||
return;
|
||||
} else {
|
||||
agx_swap(b, x, y);
|
||||
}
|
||||
|
||||
/* Otherwise, we're swapping GPRs and fallback on a XOR swap. */
|
||||
agx_xor_to(b, x, x, y);
|
||||
agx_xor_to(b, y, x, y);
|
||||
agx_xor_to(b, x, x, y);
|
||||
}
|
||||
|
||||
struct copy_ctx {
|
||||
|
|
|
|||
|
|
@ -44,6 +44,33 @@ cmpsel_for_break_if(agx_builder *b, agx_instr *I)
|
|||
return agx_push_exec(b, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
swap(agx_builder *b, agx_index x, agx_index y)
|
||||
{
|
||||
assert(!x.memory && "already lowered");
|
||||
assert(!y.memory && "already lowered");
|
||||
|
||||
/* We can swap lo/hi halves of a 32-bit register with a 32-bit extr */
|
||||
if (x.size == AGX_SIZE_16 && (x.value >> 1) == (y.value >> 1)) {
|
||||
|
||||
assert(((x.value & 1) == (1 - (y.value & 1))) &&
|
||||
"no trivial swaps, and only 2 halves of a register");
|
||||
|
||||
/* r0 = extr r0, r0, #16
|
||||
* = (((r0 << 32) | r0) >> 16) & 0xFFFFFFFF
|
||||
* = (((r0 << 32) >> 16) & 0xFFFFFFFF) | (r0 >> 16)
|
||||
* = (r0l << 16) | r0h
|
||||
*/
|
||||
agx_index reg32 = agx_register(x.value & ~1, AGX_SIZE_32);
|
||||
agx_extr_to(b, reg32, reg32, reg32, agx_immediate(16), 0);
|
||||
} else {
|
||||
/* Otherwise, we're swapping GPRs and fallback on a XOR swap. */
|
||||
agx_xor_to(b, x, x, y);
|
||||
agx_xor_to(b, y, x, y);
|
||||
agx_xor_to(b, x, x, y);
|
||||
}
|
||||
}
|
||||
|
||||
static agx_instr *
|
||||
lower(agx_builder *b, agx_instr *I)
|
||||
{
|
||||
|
|
@ -91,6 +118,10 @@ lower(agx_builder *b, agx_instr *I)
|
|||
return cmpsel_for_break_if(b, I);
|
||||
}
|
||||
|
||||
case AGX_OPCODE_SWAP:
|
||||
swap(b, I->src[0], I->src[1]);
|
||||
return (void *)true;
|
||||
|
||||
case AGX_OPCODE_EXPORT:
|
||||
/* We already lowered exports during RA, we just need to remove them late
|
||||
* after inserting waits.
|
||||
|
|
|
|||
|
|
@ -501,6 +501,11 @@ op("collect", _, srcs = VARIABLE)
|
|||
op("split", _, srcs = 1, dests = VARIABLE)
|
||||
op("phi", _, srcs = VARIABLE, schedule_class = "preload")
|
||||
|
||||
# The srcs double as destinations. Only deals in registers. This is generated by
|
||||
# parallel copy lowering and lowered soon after. We need this as a dedicated
|
||||
# instruction only for RA validation.
|
||||
op("swap", _, dests = 0, srcs = 2)
|
||||
|
||||
op("unit_test", _, dests = 0, srcs = 1, can_eliminate = False)
|
||||
|
||||
# Like mov, but takes a register and can only appear at the start. Guaranteed
|
||||
|
|
|
|||
|
|
@ -24,21 +24,6 @@
|
|||
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
|
||||
} while (0)
|
||||
|
||||
static inline void
|
||||
extr_swap(agx_builder *b, agx_index x)
|
||||
{
|
||||
x.size = AGX_SIZE_32;
|
||||
agx_extr_to(b, x, x, x, agx_immediate(16), 0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
xor_swap(agx_builder *b, agx_index x, agx_index y)
|
||||
{
|
||||
agx_xor_to(b, x, x, y);
|
||||
agx_xor_to(b, y, x, y);
|
||||
agx_xor_to(b, x, x, y);
|
||||
}
|
||||
|
||||
class LowerParallelCopy : public testing::Test {
|
||||
protected:
|
||||
LowerParallelCopy()
|
||||
|
|
@ -162,7 +147,7 @@ TEST_F(LowerParallelCopy, Swap)
|
|||
};
|
||||
|
||||
CASE(test_1, {
|
||||
xor_swap(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
|
||||
agx_swap(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
|
||||
});
|
||||
|
||||
struct agx_copy test_2[] = {
|
||||
|
|
@ -170,7 +155,9 @@ TEST_F(LowerParallelCopy, Swap)
|
|||
{.dest = 1, .src = agx_register(0, AGX_SIZE_16)},
|
||||
};
|
||||
|
||||
CASE(test_2, { extr_swap(b, agx_register(0, AGX_SIZE_16)); });
|
||||
CASE(test_2, {
|
||||
agx_swap(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(LowerParallelCopy, Cycle3)
|
||||
|
|
@ -182,8 +169,8 @@ TEST_F(LowerParallelCopy, Cycle3)
|
|||
};
|
||||
|
||||
CASE(test, {
|
||||
extr_swap(b, agx_register(0, AGX_SIZE_16));
|
||||
xor_swap(b, agx_register(1, AGX_SIZE_16), agx_register(2, AGX_SIZE_16));
|
||||
agx_swap(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
|
||||
agx_swap(b, agx_register(1, AGX_SIZE_16), agx_register(2, AGX_SIZE_16));
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -213,8 +200,8 @@ TEST_F(LowerParallelCopy, TwoSwaps)
|
|||
};
|
||||
|
||||
CASE(test, {
|
||||
xor_swap(b, agx_register(4, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
|
||||
xor_swap(b, agx_register(6, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
|
||||
agx_swap(b, agx_register(4, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
|
||||
agx_swap(b, agx_register(6, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue