mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-24 09:20:49 +02:00
i965/tiled_memcopy: Add aligned mem_copy parameters to the [de]tiling functions
Each of the [de]tiling functions has three mem_copy calls: 1) Left edge to tile boundary 2) Tile boundary to tile boundary in a loop 3) Tile boundary to right edge Copies 2 and 3 start at a tile edge so the pointer to tiled memory is guaranteed to be at least 16-byte aligned. Copy 1, on the other hand, starts at some arbitrary place in the tile so it doesn't have any such alignment guarantees. Cc: "11.1 11.2" <mesa-stable@lists.freedesktop.org> Reviewed-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Roland Scheidegger <sroland@vmware.com> Reviewed-by: Chad Versace <chad.versace@intel.com>
This commit is contained in:
parent
e5295b5fb4
commit
f6f54a29ca
1 changed files with 43 additions and 32 deletions
|
|
@ -172,6 +172,12 @@ typedef void (*tile_copy_fn)(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
* Copy texture data from linear to X tile layout.
|
||||
*
|
||||
* \copydoc tile_copy_fn
|
||||
*
|
||||
* The mem_copy parameters allow the user to specify an alternative mem_copy
|
||||
* function that, for instance, may do RGBA -> BGRA swizzling. The first
|
||||
* function must handle any memory alignment while the second function must
|
||||
* only handle 16-byte alignment in whichever side (source or destination) is
|
||||
* tiled.
|
||||
*/
|
||||
static inline void
|
||||
linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
||||
|
|
@ -179,7 +185,8 @@ linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
char *dst, const char *src,
|
||||
int32_t src_pitch,
|
||||
uint32_t swizzle_bit,
|
||||
mem_copy_fn mem_copy)
|
||||
mem_copy_fn mem_copy,
|
||||
mem_copy_fn mem_copy_align16)
|
||||
{
|
||||
/* The copy destination offset for each range copied is the sum of
|
||||
* an X offset 'x0' or 'xo' and a Y offset 'yo.'
|
||||
|
|
@ -200,10 +207,10 @@ linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
mem_copy(dst + ((x0 + yo) ^ swizzle), src + x0, x1 - x0);
|
||||
|
||||
for (xo = x1; xo < x2; xo += xtile_span) {
|
||||
mem_copy(dst + ((xo + yo) ^ swizzle), src + xo, xtile_span);
|
||||
mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + xo, xtile_span);
|
||||
}
|
||||
|
||||
mem_copy(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
|
||||
mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
|
||||
|
||||
src += src_pitch;
|
||||
}
|
||||
|
|
@ -220,7 +227,8 @@ linear_to_ytiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
char *dst, const char *src,
|
||||
int32_t src_pitch,
|
||||
uint32_t swizzle_bit,
|
||||
mem_copy_fn mem_copy)
|
||||
mem_copy_fn mem_copy,
|
||||
mem_copy_fn mem_copy_align16)
|
||||
{
|
||||
/* Y tiles consist of columns that are 'ytile_span' wide (and the same height
|
||||
* as the tile). Thus the destination offset for (x,y) is the sum of:
|
||||
|
|
@ -259,12 +267,12 @@ linear_to_ytiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
* at each step so we don't need to calculate it explicitly.
|
||||
*/
|
||||
for (x = x1; x < x2; x += ytile_span) {
|
||||
mem_copy(dst + ((xo + yo) ^ swizzle), src + x, ytile_span);
|
||||
mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x, ytile_span);
|
||||
xo += bytes_per_column;
|
||||
swizzle ^= swizzle_bit;
|
||||
}
|
||||
|
||||
mem_copy(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
|
||||
mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
|
||||
|
||||
src += src_pitch;
|
||||
}
|
||||
|
|
@ -281,7 +289,8 @@ xtiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
char *dst, const char *src,
|
||||
int32_t dst_pitch,
|
||||
uint32_t swizzle_bit,
|
||||
mem_copy_fn mem_copy)
|
||||
mem_copy_fn mem_copy,
|
||||
mem_copy_fn mem_copy_align16)
|
||||
{
|
||||
/* The copy destination offset for each range copied is the sum of
|
||||
* an X offset 'x0' or 'xo' and a Y offset 'yo.'
|
||||
|
|
@ -302,10 +311,10 @@ xtiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
mem_copy(dst + x0, src + ((x0 + yo) ^ swizzle), x1 - x0);
|
||||
|
||||
for (xo = x1; xo < x2; xo += xtile_span) {
|
||||
mem_copy(dst + xo, src + ((xo + yo) ^ swizzle), xtile_span);
|
||||
mem_copy_align16(dst + xo, src + ((xo + yo) ^ swizzle), xtile_span);
|
||||
}
|
||||
|
||||
mem_copy(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
|
||||
mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
|
||||
|
||||
dst += dst_pitch;
|
||||
}
|
||||
|
|
@ -322,7 +331,8 @@ ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
char *dst, const char *src,
|
||||
int32_t dst_pitch,
|
||||
uint32_t swizzle_bit,
|
||||
mem_copy_fn mem_copy)
|
||||
mem_copy_fn mem_copy,
|
||||
mem_copy_fn mem_copy_align16)
|
||||
{
|
||||
/* Y tiles consist of columns that are 'ytile_span' wide (and the same height
|
||||
* as the tile). Thus the destination offset for (x,y) is the sum of:
|
||||
|
|
@ -361,12 +371,12 @@ ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
* at each step so we don't need to calculate it explicitly.
|
||||
*/
|
||||
for (x = x1; x < x2; x += ytile_span) {
|
||||
mem_copy(dst + x, src + ((xo + yo) ^ swizzle), ytile_span);
|
||||
mem_copy_align16(dst + x, src + ((xo + yo) ^ swizzle), ytile_span);
|
||||
xo += bytes_per_column;
|
||||
swizzle ^= swizzle_bit;
|
||||
}
|
||||
|
||||
mem_copy(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
|
||||
mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
|
||||
|
||||
dst += dst_pitch;
|
||||
}
|
||||
|
|
@ -393,26 +403,27 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) {
|
||||
if (mem_copy == memcpy)
|
||||
return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
|
||||
dst, src, src_pitch, swizzle_bit, memcpy);
|
||||
dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
|
||||
else if (mem_copy == rgba8_copy_aligned_dst)
|
||||
return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
|
||||
dst, src, src_pitch, swizzle_bit,
|
||||
rgba8_copy_aligned_dst);
|
||||
rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
|
||||
else
|
||||
unreachable("not reached");
|
||||
} else {
|
||||
if (mem_copy == memcpy)
|
||||
return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, src_pitch, swizzle_bit, memcpy);
|
||||
dst, src, src_pitch, swizzle_bit,
|
||||
memcpy, memcpy);
|
||||
else if (mem_copy == rgba8_copy_aligned_dst)
|
||||
return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, src_pitch, swizzle_bit,
|
||||
rgba8_copy_aligned_dst);
|
||||
rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
|
||||
else
|
||||
unreachable("not reached");
|
||||
}
|
||||
linear_to_xtiled(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, src_pitch, swizzle_bit, mem_copy);
|
||||
dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -435,26 +446,26 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) {
|
||||
if (mem_copy == memcpy)
|
||||
return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
|
||||
dst, src, src_pitch, swizzle_bit, memcpy);
|
||||
dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
|
||||
else if (mem_copy == rgba8_copy_aligned_dst)
|
||||
return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
|
||||
dst, src, src_pitch, swizzle_bit,
|
||||
rgba8_copy_aligned_dst);
|
||||
rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
|
||||
else
|
||||
unreachable("not reached");
|
||||
} else {
|
||||
if (mem_copy == memcpy)
|
||||
return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, src_pitch, swizzle_bit, memcpy);
|
||||
dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
|
||||
else if (mem_copy == rgba8_copy_aligned_dst)
|
||||
return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, src_pitch, swizzle_bit,
|
||||
rgba8_copy_aligned_dst);
|
||||
rgba8_copy_aligned_dst, rgba8_copy_aligned_dst);
|
||||
else
|
||||
unreachable("not reached");
|
||||
}
|
||||
linear_to_ytiled(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, src_pitch, swizzle_bit, mem_copy);
|
||||
dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -477,26 +488,26 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) {
|
||||
if (mem_copy == memcpy)
|
||||
return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
|
||||
dst, src, dst_pitch, swizzle_bit, memcpy);
|
||||
dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
|
||||
else if (mem_copy == rgba8_copy_aligned_src)
|
||||
return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
rgba8_copy_aligned_src);
|
||||
rgba8_copy_aligned_src, rgba8_copy_aligned_src);
|
||||
else
|
||||
unreachable("not reached");
|
||||
} else {
|
||||
if (mem_copy == memcpy)
|
||||
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, dst_pitch, swizzle_bit, memcpy);
|
||||
dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
|
||||
else if (mem_copy == rgba8_copy_aligned_src)
|
||||
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
rgba8_copy_aligned_src);
|
||||
rgba8_copy_aligned_src, rgba8_copy_aligned_src);
|
||||
else
|
||||
unreachable("not reached");
|
||||
}
|
||||
xtiled_to_linear(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, dst_pitch, swizzle_bit, mem_copy);
|
||||
dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -519,26 +530,26 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) {
|
||||
if (mem_copy == memcpy)
|
||||
return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
|
||||
dst, src, dst_pitch, swizzle_bit, memcpy);
|
||||
dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
|
||||
else if (mem_copy == rgba8_copy_aligned_src)
|
||||
return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
rgba8_copy_aligned_src);
|
||||
rgba8_copy_aligned_src, rgba8_copy_aligned_src);
|
||||
else
|
||||
unreachable("not reached");
|
||||
} else {
|
||||
if (mem_copy == memcpy)
|
||||
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, dst_pitch, swizzle_bit, memcpy);
|
||||
dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
|
||||
else if (mem_copy == rgba8_copy_aligned_src)
|
||||
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
rgba8_copy_aligned_src);
|
||||
rgba8_copy_aligned_src, rgba8_copy_aligned_src);
|
||||
else
|
||||
unreachable("not reached");
|
||||
}
|
||||
ytiled_to_linear(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, dst_pitch, swizzle_bit, mem_copy);
|
||||
dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue