mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 02:28:10 +02:00
vc4: Simplify the load/store utile functions.
They now have less of a dependency on the cpp, and don't have to do a divide. Hacking up mesa-demos teximage to do only one subtest and not draw points, I saw 1024x1024 glTexSubImage2D() improve by 4.86939% +/- 1.40408% (n=30) and glGetTexImage() by 2.18978% +/- 0.140268% (n=5).
This commit is contained in:
parent
7b8c67b3cc
commit
e64b1169d3
1 changed files with 22 additions and 10 deletions
|
|
@ -87,6 +87,22 @@ vc4_utile_height(int cpp)
|
|||
}
|
||||
}
|
||||
|
||||
/** Returns the stride in bytes of a 64-byte microtile. */
|
||||
static uint32_t
|
||||
vc4_utile_stride(int cpp)
|
||||
{
|
||||
switch (cpp) {
|
||||
case 1:
|
||||
return 8;
|
||||
case 2:
|
||||
case 4:
|
||||
case 8:
|
||||
return 16;
|
||||
default:
|
||||
unreachable("bad cpp");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The texture unit decides what tiling format a particular miplevel is using
|
||||
* this function, so we lay out our miptrees accordingly.
|
||||
|
|
@ -101,25 +117,21 @@ vc4_size_is_lt(uint32_t width, uint32_t height, int cpp)
|
|||
void
|
||||
vc4_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp)
|
||||
{
|
||||
uint32_t utile_h = vc4_utile_height(cpp);
|
||||
uint32_t row_size = 64 / utile_h;
|
||||
uint32_t src_stride = vc4_utile_stride(cpp);
|
||||
|
||||
for (int y = 0; y < utile_h; y++) {
|
||||
memcpy(dst, src, row_size);
|
||||
for (uint32_t src_offset = 0; src_offset < 64; src_offset += src_stride) {
|
||||
memcpy(dst, src + src_offset, src_stride);
|
||||
dst += dst_stride;
|
||||
src += row_size;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vc4_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp)
|
||||
{
|
||||
uint32_t utile_h = vc4_utile_height(cpp);
|
||||
uint32_t row_size = 64 / utile_h;
|
||||
uint32_t dst_stride = vc4_utile_stride(cpp);
|
||||
|
||||
for (int y = 0; y < utile_h; y++) {
|
||||
memcpy(dst, src, row_size);
|
||||
dst += row_size;
|
||||
for (uint32_t dst_offset = 0; dst_offset < 64; dst_offset += dst_stride) {
|
||||
memcpy(dst + dst_offset, src, dst_stride);
|
||||
src += src_stride;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue