mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-27 10:30:08 +01:00
agx: Optimize swaps of 2x16 channels
We can use extr to swap the low and high halves of a 32-bit register in one instruction. No shader-db changes, but it reduces xor's on a deqp I'm looking at. Yes, I'm procrastinating on debugging deqps, how'd you guess? Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24635>
This commit is contained in:
parent
efbdc31ce5
commit
d459de85b7
2 changed files with 26 additions and 5 deletions
|
|
@ -36,6 +36,23 @@ do_swap(agx_builder *b, const struct agx_copy *copy)
|
|||
if (copy->dest == copy->src.value)
|
||||
return;
|
||||
|
||||
/* We can swap lo/hi halves of a 32-bit register with a 32-bit extr */
|
||||
if (copy->src.size == AGX_SIZE_16 &&
|
||||
(copy->dest >> 1) == (copy->src.value >> 1)) {
|
||||
|
||||
assert(((copy->dest & 1) == (1 - (copy->src.value & 1))) &&
|
||||
"no trivial swaps, and only 2 halves of a register");
|
||||
|
||||
/* r0 = extr r0, r0, #16
|
||||
* = (((r0 << 32) | r0) >> 16) & 0xFFFFFFFF
|
||||
* = (((r0 << 32) >> 16) & 0xFFFFFFFF) | (r0 >> 16)
|
||||
* = (r0l << 16) | r0h
|
||||
*/
|
||||
agx_index reg32 = agx_register(copy->dest & ~1, AGX_SIZE_32);
|
||||
agx_extr_to(b, reg32, reg32, reg32, agx_immediate(16), 0);
|
||||
return;
|
||||
}
|
||||
|
||||
agx_index x = agx_register(copy->dest, copy->src.size);
|
||||
agx_index y = copy->src;
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,13 @@
|
|||
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
|
||||
} while (0)
|
||||
|
||||
static inline void
|
||||
extr_swap(agx_builder *b, agx_index x)
|
||||
{
|
||||
x.size = AGX_SIZE_32;
|
||||
agx_extr_to(b, x, x, x, agx_immediate(16), 0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
xor_swap(agx_builder *b, agx_index x, agx_index y)
|
||||
{
|
||||
|
|
@ -161,9 +168,7 @@ TEST_F(LowerParallelCopy, Swap)
|
|||
{.dest = 1, .src = agx_register(0, AGX_SIZE_16)},
|
||||
};
|
||||
|
||||
CASE(test_2, {
|
||||
xor_swap(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
|
||||
});
|
||||
CASE(test_2, { extr_swap(b, agx_register(0, AGX_SIZE_16)); });
|
||||
}
|
||||
|
||||
TEST_F(LowerParallelCopy, Cycle3)
|
||||
|
|
@ -174,9 +179,8 @@ TEST_F(LowerParallelCopy, Cycle3)
|
|||
{.dest = 2, .src = agx_register(0, AGX_SIZE_16)},
|
||||
};
|
||||
|
||||
/* XXX: requires 6 instructions. if we had a temp free, could do it in 4 */
|
||||
CASE(test, {
|
||||
xor_swap(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
|
||||
extr_swap(b, agx_register(0, AGX_SIZE_16));
|
||||
xor_swap(b, agx_register(1, AGX_SIZE_16), agx_register(2, AGX_SIZE_16));
|
||||
});
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue