util: Switch the non-block formats to unpacking rgba rows instead of rects.

We have only a few callers of unpack that do rects, so add a helper that
iterates over y adding the strides.  This saves us 36kb of generated code
and means that adding cpu-specific variants for RGBA format unpack will be
much simpler.

Reviewed-by: Jesse Natalie <jenatali@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10014>
This commit is contained in:
Eric Anholt 2021-04-02 14:35:01 -07:00 committed by Marge Bot
parent 921b05f582
commit 2b5178ee48
11 changed files with 268 additions and 200 deletions

View file

@ -50,9 +50,8 @@ struct translate_generic {
struct { struct {
enum translate_element_type type; enum translate_element_type type;
void (*fetch)(void *restrict dst, unsigned dst_stride, void (*fetch)(void *restrict dst, const uint8_t *restrict src,
const uint8_t *restrict src, unsigned src_stride, unsigned width);
unsigned width, unsigned height);
unsigned buffer; unsigned buffer;
unsigned input_offset; unsigned input_offset;
unsigned instance_divisor; unsigned instance_divisor;
@ -625,7 +624,7 @@ generic_run_one(struct translate_generic *tg,
if (likely(copy_size >= 0)) { if (likely(copy_size >= 0)) {
memcpy(dst, src, copy_size); memcpy(dst, src, copy_size);
} else { } else {
tg->attrib[attr].fetch(data, 0, src, 0, 1, 1); tg->attrib[attr].fetch(data, src, 1);
if (0) if (0)
debug_printf("Fetch linear attr %d from %p stride %d index %d: " debug_printf("Fetch linear attr %d from %p stride %d index %d: "

View file

@ -848,16 +848,13 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This,
{ {
D3DLOCKED_RECT lock; D3DLOCKED_RECT lock;
HRESULT hr; HRESULT hr;
const struct util_format_unpack_description *unpack =
util_format_unpack_description(surf->base.info.format);
assert(unpack);
hr = NineSurface9_LockRect(surf, &lock, NULL, D3DLOCK_READONLY); hr = NineSurface9_LockRect(surf, &lock, NULL, D3DLOCK_READONLY);
if (FAILED(hr)) if (FAILED(hr))
ret_err("Failed to map cursor source image.\n", ret_err("Failed to map cursor source image.\n",
D3DERR_DRIVERINTERNALERROR); D3DERR_DRIVERINTERNALERROR);
unpack->unpack_rgba_8unorm(ptr, transfer->stride, util_format_unpack_rgba_8unorm_rect(surf->base.info.format, ptr, transfer->stride,
lock.pBits, lock.Pitch, lock.pBits, lock.Pitch,
This->cursor.w, This->cursor.h); This->cursor.w, This->cursor.h);
@ -865,7 +862,8 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This,
void *data = lock.pBits; void *data = lock.pBits;
/* SetCursor assumes 32x32 argb with pitch 128 */ /* SetCursor assumes 32x32 argb with pitch 128 */
if (lock.Pitch != 128) { if (lock.Pitch != 128) {
unpack->unpack_rgba_8unorm(This->cursor.hw_upload_temp, 128, util_format_unpack_rgba_8unorm_rect(surf->base.info.format,
This->cursor.hw_upload_temp, 128,
lock.pBits, lock.Pitch, lock.pBits, lock.Pitch,
32, 32); 32, 32);
data = This->cursor.hw_upload_temp; data = This->cursor.hw_upload_temp;

View file

@ -1641,7 +1641,7 @@ _mesa_unpack_ubyte_rgba_row(mesa_format format, uint32_t n,
util_format_unpack_description((enum pipe_format)format); util_format_unpack_description((enum pipe_format)format);
if (unpack->unpack_rgba_8unorm) { if (unpack->unpack_rgba_8unorm) {
unpack->unpack_rgba_8unorm((uint8_t *)dst, 0, src, 0, n, 1); unpack->unpack_rgba_8unorm((uint8_t *)dst, src, n);
} else { } else {
/* get float values, convert to ubyte */ /* get float values, convert to ubyte */
{ {

View file

@ -359,6 +359,47 @@ util_get_depth_format_mrd(const struct util_format_description *desc)
return mrd; return mrd;
} }
void
util_format_unpack_rgba_rect(enum pipe_format format,
void *dst, unsigned dst_stride,
const void *src, unsigned src_stride,
unsigned w, unsigned h)
{
const struct util_format_unpack_description *unpack =
util_format_unpack_description(format);
/* Optimized function for block-compressed formats */
if (unpack->unpack_rgba_rect) {
unpack->unpack_rgba_rect(dst, dst_stride, src, src_stride, w, h);
} else {
for (unsigned y = 0; y < h; y++) {
unpack->unpack_rgba(dst, src, w);
src = (const char *)src + src_stride;
dst = (char *)dst + dst_stride;
}
}
}
void
util_format_unpack_rgba_8unorm_rect(enum pipe_format format,
void *dst, unsigned dst_stride,
const void *src, unsigned src_stride,
unsigned w, unsigned h)
{
const struct util_format_unpack_description *unpack =
util_format_unpack_description(format);
/* Optimized function for block-compressed formats */
if (unpack->unpack_rgba_8unorm_rect) {
unpack->unpack_rgba_8unorm_rect(dst, dst_stride, src, src_stride, w, h);
} else {
for (unsigned y = 0; y < h; y++) {
unpack->unpack_rgba_8unorm(dst, src, w);
src = (const char *)src + src_stride;
dst = (char *)dst + dst_stride;
}
}
}
void void
util_format_read_4(enum pipe_format format, util_format_read_4(enum pipe_format format,
@ -367,8 +408,6 @@ util_format_read_4(enum pipe_format format,
unsigned x, unsigned y, unsigned w, unsigned h) unsigned x, unsigned y, unsigned w, unsigned h)
{ {
const struct util_format_description *format_desc; const struct util_format_description *format_desc;
const struct util_format_unpack_description *unpack =
util_format_unpack_description(format);
const uint8_t *src_row; const uint8_t *src_row;
format_desc = util_format_description(format); format_desc = util_format_description(format);
@ -378,7 +417,7 @@ util_format_read_4(enum pipe_format format,
src_row = (const uint8_t *)src + y*src_stride + x*(format_desc->block.bits/8); src_row = (const uint8_t *)src + y*src_stride + x*(format_desc->block.bits/8);
unpack->unpack_rgba(dst, dst_stride, src_row, src_stride, w, h); util_format_unpack_rgba_rect(format, dst, dst_stride, src_row, src_stride, w, h);
} }
@ -413,10 +452,7 @@ void
util_format_read_4ub(enum pipe_format format, uint8_t *dst, unsigned dst_stride, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h) util_format_read_4ub(enum pipe_format format, uint8_t *dst, unsigned dst_stride, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)
{ {
const struct util_format_description *format_desc; const struct util_format_description *format_desc;
const struct util_format_unpack_description *unpack =
util_format_unpack_description(format);
const uint8_t *src_row; const uint8_t *src_row;
uint8_t *dst_row;
format_desc = util_format_description(format); format_desc = util_format_description(format);
@ -424,9 +460,8 @@ util_format_read_4ub(enum pipe_format format, uint8_t *dst, unsigned dst_stride,
assert(y % format_desc->block.height == 0); assert(y % format_desc->block.height == 0);
src_row = (const uint8_t *)src + y*src_stride + x*(format_desc->block.bits/8); src_row = (const uint8_t *)src + y*src_stride + x*(format_desc->block.bits/8);
dst_row = dst;
unpack->unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, w, h); util_format_unpack_rgba_8unorm_rect(format, dst, dst_stride, src_row, src_stride, w, h);
} }
@ -715,7 +750,7 @@ util_format_translate(enum pipe_format dst_format,
return FALSE; return FALSE;
while (height >= y_step) { while (height >= y_step) {
unpack->unpack_rgba_8unorm(tmp_row, tmp_stride, src_row, src_stride, width, y_step); util_format_unpack_rgba_8unorm_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, y_step);
pack->pack_rgba_8unorm(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step); pack->pack_rgba_8unorm(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step);
dst_row += dst_step; dst_row += dst_step;
@ -724,7 +759,7 @@ util_format_translate(enum pipe_format dst_format,
} }
if (height) { if (height) {
unpack->unpack_rgba_8unorm(tmp_row, tmp_stride, src_row, src_stride, width, height); util_format_unpack_rgba_8unorm_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, height);
pack->pack_rgba_8unorm(dst_row, dst_stride, tmp_row, tmp_stride, width, height); pack->pack_rgba_8unorm(dst_row, dst_stride, tmp_row, tmp_stride, width, height);
} }
@ -746,7 +781,7 @@ util_format_translate(enum pipe_format dst_format,
return FALSE; return FALSE;
while (height >= y_step) { while (height >= y_step) {
unpack->unpack_rgba(tmp_row, tmp_stride, src_row, src_stride, width, y_step); util_format_unpack_rgba_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, y_step);
pack->pack_rgba_sint(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step); pack->pack_rgba_sint(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step);
dst_row += dst_step; dst_row += dst_step;
@ -755,7 +790,7 @@ util_format_translate(enum pipe_format dst_format,
} }
if (height) { if (height) {
unpack->unpack_rgba(tmp_row, tmp_stride, src_row, src_stride, width, height); util_format_unpack_rgba_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, height);
pack->pack_rgba_sint(dst_row, dst_stride, tmp_row, tmp_stride, width, height); pack->pack_rgba_sint(dst_row, dst_stride, tmp_row, tmp_stride, width, height);
} }
@ -777,7 +812,7 @@ util_format_translate(enum pipe_format dst_format,
return FALSE; return FALSE;
while (height >= y_step) { while (height >= y_step) {
unpack->unpack_rgba(tmp_row, tmp_stride, src_row, src_stride, width, y_step); util_format_unpack_rgba_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, y_step);
pack->pack_rgba_uint(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step); pack->pack_rgba_uint(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step);
dst_row += dst_step; dst_row += dst_step;
@ -786,7 +821,7 @@ util_format_translate(enum pipe_format dst_format,
} }
if (height) { if (height) {
unpack->unpack_rgba(tmp_row, tmp_stride, src_row, src_stride, width, height); util_format_unpack_rgba_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, height);
pack->pack_rgba_uint(dst_row, dst_stride, tmp_row, tmp_stride, width, height); pack->pack_rgba_uint(dst_row, dst_stride, tmp_row, tmp_stride, width, height);
} }
@ -807,7 +842,7 @@ util_format_translate(enum pipe_format dst_format,
return FALSE; return FALSE;
while (height >= y_step) { while (height >= y_step) {
unpack->unpack_rgba(tmp_row, tmp_stride, src_row, src_stride, width, y_step); util_format_unpack_rgba_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, y_step);
pack->pack_rgba_float(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step); pack->pack_rgba_float(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step);
dst_row += dst_step; dst_row += dst_step;
@ -816,7 +851,7 @@ util_format_translate(enum pipe_format dst_format,
} }
if (height) { if (height) {
unpack->unpack_rgba(tmp_row, tmp_stride, src_row, src_stride, width, height); util_format_unpack_rgba_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, height);
pack->pack_rgba_float(dst_row, dst_stride, tmp_row, tmp_stride, width, height); pack->pack_rgba_float(dst_row, dst_stride, tmp_row, tmp_stride, width, height);
} }

View file

@ -315,10 +315,20 @@ struct util_format_unpack_description {
* Unpack pixel blocks to R8G8B8A8_UNORM. * Unpack pixel blocks to R8G8B8A8_UNORM.
* Note: strides are in bytes. * Note: strides are in bytes.
* *
* Only defined for non-depth-stencil formats. * Only defined for non-block non-depth-stencil formats.
*/ */
void void
(*unpack_rgba_8unorm)(uint8_t *restrict dst, unsigned dst_stride, (*unpack_rgba_8unorm)(uint8_t *restrict dst, const uint8_t *restrict src,
unsigned width);
/**
* Unpack pixel blocks to R8G8B8A8_UNORM.
* Note: strides are in bytes.
*
* Only defined for block non-depth-stencil formats.
*/
void
(*unpack_rgba_8unorm_rect)(uint8_t *restrict dst, unsigned dst_stride,
const uint8_t *restrict src, unsigned src_stride, const uint8_t *restrict src, unsigned src_stride,
unsigned width, unsigned height); unsigned width, unsigned height);
@ -338,10 +348,22 @@ struct util_format_unpack_description {
* *
* Note: strides are in bytes. * Note: strides are in bytes.
* *
* Only defined for non-depth-stencil formats. * Only defined for non-block non-depth-stencil formats.
*/ */
void void
(*unpack_rgba)(void *restrict dst, unsigned dst_stride, (*unpack_rgba)(void *restrict dst, const uint8_t *restrict src,
unsigned width);
/**
* Unpack pixel blocks to R32G32B32A32_UINT/_INT_FLOAT based on whether the
* type is pure uint, int, or other.
*
* Note: strides are in bytes.
*
* Only defined for block non-depth-stencil formats.
*/
void
(*unpack_rgba_rect)(void *restrict dst, unsigned dst_stride,
const uint8_t *restrict src, unsigned src_stride, const uint8_t *restrict src, unsigned src_stride,
unsigned width, unsigned height); unsigned width, unsigned height);
@ -1477,7 +1499,7 @@ util_format_unpack_rgba(enum pipe_format format, void *dst,
const struct util_format_unpack_description *desc = const struct util_format_unpack_description *desc =
util_format_unpack_description(format); util_format_unpack_description(format);
desc->unpack_rgba(dst, 0, (const uint8_t *)src, 0, w, 1); desc->unpack_rgba(dst, (const uint8_t *)src, w);
} }
static inline void static inline void
@ -1558,6 +1580,18 @@ util_format_write_4ub(enum pipe_format format,
void *dst, unsigned dst_stride, void *dst, unsigned dst_stride,
unsigned x, unsigned y, unsigned w, unsigned h); unsigned x, unsigned y, unsigned w, unsigned h);
void
util_format_unpack_rgba_rect(enum pipe_format format,
void *dst, unsigned dst_stride,
const void *src, unsigned src_stride,
unsigned w, unsigned h);
void
util_format_unpack_rgba_8unorm_rect(enum pipe_format format,
void *dst, unsigned dst_stride,
const void *src, unsigned src_stride,
unsigned w, unsigned h);
/* /*
* Generic format conversion; * Generic format conversion;
*/ */

View file

@ -62,10 +62,14 @@ util_format_bptc_rgba_unorm_unpack_rgba_float(void *restrict dst_row, unsigned d
decompress_rgba_unorm(width, height, decompress_rgba_unorm(width, height,
src_row, src_stride, src_row, src_stride,
temp_block, width * 4 * sizeof(uint8_t)); temp_block, width * 4 * sizeof(uint8_t));
util_format_r8g8b8a8_unorm_unpack_rgba_float( /* Direct call to row unpack instead of util_format_rgba_unpack_rect()
dst_row, dst_stride, * to avoid table lookup that would pull in all unpack symbols.
temp_block, width * 4 * sizeof(uint8_t), */
width, height); for (int y = 0; y < height; y++) {
util_format_r8g8b8a8_unorm_unpack_rgba_float((char *)dst_row + dst_stride * y,
temp_block + 4 * width * y,
width);
}
free((void *) temp_block); free((void *) temp_block);
} }
@ -76,10 +80,15 @@ util_format_bptc_rgba_unorm_pack_rgba_float(uint8_t *restrict dst_row, unsigned
{ {
uint8_t *temp_block; uint8_t *temp_block;
temp_block = malloc(width * height * 4 * sizeof(uint8_t)); temp_block = malloc(width * height * 4 * sizeof(uint8_t));
util_format_r32g32b32a32_float_unpack_rgba_8unorm( /* Direct call to row unpack instead of util_format_rgba_unpack_rect()
temp_block, width * 4 * sizeof(uint8_t), * to avoid table lookup that would pull in all unpack symbols.
(uint8_t *)src_row, src_stride, */
width, height); for (int y = 0; y < height; y++) {
util_format_r32g32b32a32_float_unpack_rgba_8unorm(
temp_block + 4 * width * y,
(uint8_t *)src_row + src_stride * y,
width);
}
compress_rgba_unorm(width, height, compress_rgba_unorm(width, height,
temp_block, width * 4 * sizeof(uint8_t), temp_block, width * 4 * sizeof(uint8_t),
dst_row, dst_stride); dst_row, dst_stride);
@ -131,9 +140,15 @@ util_format_bptc_srgba_unpack_rgba_float(void *restrict dst_row, unsigned dst_st
decompress_rgba_unorm(width, height, decompress_rgba_unorm(width, height,
src_row, src_stride, src_row, src_stride,
temp_block, width * 4 * sizeof(uint8_t)); temp_block, width * 4 * sizeof(uint8_t));
util_format_r8g8b8a8_srgb_unpack_rgba_float(dst_row, dst_stride,
temp_block, width * 4 * sizeof(uint8_t), /* Direct call to row unpack instead of util_format_rgba_unpack_rect()
width, height); * to avoid table lookup that would pull in all unpack symbols.
*/
for (int y = 0; y < height; y++) {
util_format_r8g8b8a8_srgb_unpack_rgba_float((char *)dst_row + dst_stride * y,
temp_block + width * 4 * y,
width);
}
free((void *) temp_block); free((void *) temp_block);
} }
@ -171,10 +186,15 @@ util_format_bptc_rgb_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigne
src_row, src_stride, src_row, src_stride,
temp_block, width * 4 * sizeof(float), temp_block, width * 4 * sizeof(float),
true); true);
util_format_r32g32b32a32_float_unpack_rgba_8unorm( /* Direct call to row unpack instead of util_format_rgba_unpack_rect()
dst_row, dst_stride, * to avoid table lookup that would pull in all unpack symbols.
(const uint8_t *)temp_block, width * 4 * sizeof(float), */
width, height); for (int y = 0; y < height; y++) {
util_format_r32g32b32a32_float_unpack_rgba_8unorm(
dst_row + dst_stride * y,
(const uint8_t *)temp_block + width * 4 * sizeof(float) * y,
width);
}
free((void *) temp_block); free((void *) temp_block);
} }
@ -229,10 +249,14 @@ util_format_bptc_rgb_ufloat_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsign
src_row, src_stride, src_row, src_stride,
temp_block, width * 4 * sizeof(float), temp_block, width * 4 * sizeof(float),
false); false);
util_format_r32g32b32a32_float_unpack_rgba_8unorm( /* Direct call to row unpack instead of util_format_rgba_unpack_8unorm()
dst_row, dst_stride, * to avoid table lookup that would pull in all unpack symbols.
(const uint8_t *)temp_block, width * 4 * sizeof(float), */
width, height); for (int y = 0; y < height; y++) {
util_format_r32g32b32a32_float_unpack_rgba_8unorm(dst_row + dst_stride * y,
(void *)(temp_block + 4 * width * y),
width);
}
free((void *) temp_block); free((void *) temp_block);
} }

View file

@ -33,23 +33,19 @@
void void
util_format_r9g9b9e5_float_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride, util_format_r9g9b9e5_float_unpack_rgba_float(void *restrict dst_row,
const uint8_t *restrict src_row, unsigned src_stride, const uint8_t *restrict src_row,
unsigned width, unsigned height) unsigned width)
{ {
unsigned x, y; unsigned x;
for(y = 0; y < height; y += 1) { float *dst = dst_row;
float *dst = dst_row; const uint8_t *src = src_row;
const uint8_t *src = src_row; for(x = 0; x < width; x += 1) {
for(x = 0; x < width; x += 1) { uint32_t value = util_cpu_to_le32(*(const uint32_t *)src);
uint32_t value = util_cpu_to_le32(*(const uint32_t *)src); rgb9e5_to_float3(value, dst);
rgb9e5_to_float3(value, dst); dst[3] = 1; /* a */
dst[3] = 1; /* a */ src += 4;
src += 4; dst += 4;
dst += 4;
}
src_row += src_stride;
dst_row = (uint8_t *)dst_row + dst_stride;
} }
} }
@ -85,27 +81,23 @@ util_format_r9g9b9e5_float_fetch_rgba(void *restrict in_dst, const uint8_t *rest
void void
util_format_r9g9b9e5_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, util_format_r9g9b9e5_float_unpack_rgba_8unorm(uint8_t *restrict dst_row,
const uint8_t *restrict src_row, unsigned src_stride, const uint8_t *restrict src_row,
unsigned width, unsigned height) unsigned width)
{ {
unsigned x, y; unsigned x;
float p[3]; float p[3];
for(y = 0; y < height; y += 1) { uint8_t *dst = dst_row;
uint8_t *dst = dst_row; const uint8_t *src = src_row;
const uint8_t *src = src_row; for(x = 0; x < width; x += 1) {
for(x = 0; x < width; x += 1) { uint32_t value = util_cpu_to_le32(*(const uint32_t *)src);
uint32_t value = util_cpu_to_le32(*(const uint32_t *)src); rgb9e5_to_float3(value, p);
rgb9e5_to_float3(value, p); dst[0] = float_to_ubyte(p[0]); /* r */
dst[0] = float_to_ubyte(p[0]); /* r */ dst[1] = float_to_ubyte(p[1]); /* g */
dst[1] = float_to_ubyte(p[1]); /* g */ dst[2] = float_to_ubyte(p[2]); /* b */
dst[2] = float_to_ubyte(p[2]); /* b */ dst[3] = 255; /* a */
dst[3] = 255; /* a */ src += 4;
src += 4; dst += 4;
dst += 4;
}
src_row += src_stride;
dst_row += dst_stride/sizeof(*dst_row);
} }
} }
@ -137,23 +129,19 @@ util_format_r9g9b9e5_float_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned
void void
util_format_r11g11b10_float_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride, util_format_r11g11b10_float_unpack_rgba_float(void *restrict dst_row,
const uint8_t *restrict src_row, unsigned src_stride, const uint8_t *restrict src_row,
unsigned width, unsigned height) unsigned width)
{ {
unsigned x, y; unsigned x;
for(y = 0; y < height; y += 1) { float *dst = dst_row;
float *dst = dst_row; const uint8_t *src = src_row;
const uint8_t *src = src_row; for(x = 0; x < width; x += 1) {
for(x = 0; x < width; x += 1) { uint32_t value = util_cpu_to_le32(*(const uint32_t *)src);
uint32_t value = util_cpu_to_le32(*(const uint32_t *)src); r11g11b10f_to_float3(value, dst);
r11g11b10f_to_float3(value, dst); dst[3] = 1; /* a */
dst[3] = 1; /* a */ src += 4;
src += 4; dst += 4;
dst += 4;
}
src_row += src_stride;
dst_row = (uint8_t *)dst_row + dst_stride;
} }
} }
@ -189,27 +177,23 @@ util_format_r11g11b10_float_fetch_rgba(void *restrict in_dst, const uint8_t *res
void void
util_format_r11g11b10_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, util_format_r11g11b10_float_unpack_rgba_8unorm(uint8_t *restrict dst_row,
const uint8_t *restrict src_row, unsigned src_stride, const uint8_t *restrict src_row,
unsigned width, unsigned height) unsigned width)
{ {
unsigned x, y; unsigned x;
float p[3]; float p[3];
for(y = 0; y < height; y += 1) { uint8_t *dst = dst_row;
uint8_t *dst = dst_row; const uint8_t *src = src_row;
const uint8_t *src = src_row; for(x = 0; x < width; x += 1) {
for(x = 0; x < width; x += 1) { uint32_t value = util_cpu_to_le32(*(const uint32_t *)src);
uint32_t value = util_cpu_to_le32(*(const uint32_t *)src); r11g11b10f_to_float3(value, p);
r11g11b10f_to_float3(value, p); dst[0] = float_to_ubyte(p[0]); /* r */
dst[0] = float_to_ubyte(p[0]); /* r */ dst[1] = float_to_ubyte(p[1]); /* g */
dst[1] = float_to_ubyte(p[1]); /* g */ dst[2] = float_to_ubyte(p[2]); /* b */
dst[2] = float_to_ubyte(p[2]); /* b */ dst[3] = 255; /* a */
dst[3] = 255; /* a */ src += 4;
src += 4; dst += 4;
dst += 4;
}
src_row += src_stride;
dst_row += dst_stride/sizeof(*dst_row);
} }
} }
@ -256,58 +240,47 @@ r8g8bx_derive(int16_t r, int16_t g)
} }
void void
util_format_r8g8bx_snorm_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride, util_format_r8g8bx_snorm_unpack_rgba_float(void *restrict dst_row,
const uint8_t *restrict src_row, unsigned src_stride, const uint8_t *restrict src_row, unsigned width)
unsigned width, unsigned height)
{ {
unsigned x, y; unsigned x;
float *dst = dst_row;
const uint16_t *src = (const uint16_t *)src_row;
for(x = 0; x < width; x += 1) {
uint16_t value = util_cpu_to_le16(*src++);
int16_t r, g;
for(y = 0; y < height; y += 1) { r = ((int16_t)(value << 8)) >> 8;
float *dst = dst_row; g = ((int16_t)(value << 0)) >> 8;
const uint16_t *src = (const uint16_t *)src_row;
for(x = 0; x < width; x += 1) {
uint16_t value = util_cpu_to_le16(*src++);
int16_t r, g;
r = ((int16_t)(value << 8)) >> 8; dst[0] = (float)(r * (1.0f/0x7f)); /* r */
g = ((int16_t)(value << 0)) >> 8; dst[1] = (float)(g * (1.0f/0x7f)); /* g */
dst[2] = r8g8bx_derive(r, g) * (1.0f/0xff); /* b */
dst[0] = (float)(r * (1.0f/0x7f)); /* r */ dst[3] = 1.0f; /* a */
dst[1] = (float)(g * (1.0f/0x7f)); /* g */ dst += 4;
dst[2] = r8g8bx_derive(r, g) * (1.0f/0xff); /* b */
dst[3] = 1.0f; /* a */
dst += 4;
}
src_row += src_stride;
dst_row = (uint8_t *)dst_row + dst_stride;
} }
} }
void void
util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *restrict dst,
const uint8_t *restrict src_row, unsigned src_stride, const uint8_t *restrict src_row,
unsigned width, unsigned height) unsigned width)
{ {
unsigned x, y; unsigned x;
for(y = 0; y < height; y += 1) { const uint16_t *src = (const uint16_t *)src_row;
uint8_t *dst = dst_row; for(x = 0; x < width; x += 1) {
const uint16_t *src = (const uint16_t *)src_row; uint16_t value = util_cpu_to_le16(*src++);
for(x = 0; x < width; x += 1) { int16_t r, g;
uint16_t value = util_cpu_to_le16(*src++);
int16_t r, g;
r = ((int16_t)(value << 8)) >> 8; r = ((int16_t)(value << 8)) >> 8;
g = ((int16_t)(value << 0)) >> 8; g = ((int16_t)(value << 0)) >> 8;
dst[0] = (uint8_t)(((uint16_t)MAX2(r, 0)) * 0xff / 0x7f); /* r */ dst[0] = (uint8_t)(((uint16_t)MAX2(r, 0)) * 0xff / 0x7f); /* r */
dst[1] = (uint8_t)(((uint16_t)MAX2(g, 0)) * 0xff / 0x7f); /* g */ dst[1] = (uint8_t)(((uint16_t)MAX2(g, 0)) * 0xff / 0x7f); /* g */
dst[2] = r8g8bx_derive(r, g); /* b */ dst[2] = r8g8bx_derive(r, g); /* b */
dst[3] = 255; /* a */ dst[3] = 255; /* a */
dst += 4; dst += 4;
}
src_row += src_stride;
dst_row += dst_stride/sizeof(*dst_row);
} }
} }

View file

@ -34,9 +34,9 @@
void void
util_format_r9g9b9e5_float_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride, util_format_r9g9b9e5_float_unpack_rgba_float(void *restrict dst_row,
const uint8_t *restrict src_row, unsigned src_stride, const uint8_t *restrict src_row,
unsigned width, unsigned height); unsigned width);
void void
util_format_r9g9b9e5_float_pack_rgba_float(uint8_t *restrict dst_row, unsigned dst_stride, util_format_r9g9b9e5_float_pack_rgba_float(uint8_t *restrict dst_row, unsigned dst_stride,
@ -48,9 +48,9 @@ util_format_r9g9b9e5_float_fetch_rgba(void *restrict dst, const uint8_t *restric
unsigned i, unsigned j); unsigned i, unsigned j);
void void
util_format_r9g9b9e5_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, util_format_r9g9b9e5_float_unpack_rgba_8unorm(uint8_t *restrict dst_row,
const uint8_t *restrict src_row, unsigned src_stride, const uint8_t *restrict src_row,
unsigned width, unsigned height); unsigned width);
void void
util_format_r9g9b9e5_float_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, util_format_r9g9b9e5_float_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
@ -59,9 +59,9 @@ util_format_r9g9b9e5_float_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned
void void
util_format_r11g11b10_float_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride, util_format_r11g11b10_float_unpack_rgba_float(void *restrict dst_row,
const uint8_t *restrict src_row, unsigned src_stride, const uint8_t *restrict src_row,
unsigned width, unsigned height); unsigned width);
void void
util_format_r11g11b10_float_pack_rgba_float(uint8_t *restrict dst_row, unsigned dst_stride, util_format_r11g11b10_float_pack_rgba_float(uint8_t *restrict dst_row, unsigned dst_stride,
@ -73,9 +73,9 @@ util_format_r11g11b10_float_fetch_rgba(void *restrict dst, const uint8_t *restri
unsigned i, unsigned j); unsigned i, unsigned j);
void void
util_format_r11g11b10_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, util_format_r11g11b10_float_unpack_rgba_8unorm(uint8_t *restrict dst_row,
const uint8_t *restrict src_row, unsigned src_stride, const uint8_t *restrict src_row,
unsigned width, unsigned height); unsigned width);
void void
util_format_r11g11b10_float_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, util_format_r11g11b10_float_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
@ -84,9 +84,9 @@ util_format_r11g11b10_float_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned
void void
util_format_r8g8bx_snorm_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride, util_format_r8g8bx_snorm_unpack_rgba_float(void *restrict dst_row,
const uint8_t *restrict src_row, unsigned src_stride, const uint8_t *restrict src_row,
unsigned width, unsigned height); unsigned width);
void void
util_format_r8g8bx_snorm_pack_rgba_float(uint8_t *restrict dst_row, unsigned dst_stride, util_format_r8g8bx_snorm_pack_rgba_float(uint8_t *restrict dst_row, unsigned dst_stride,
@ -98,9 +98,9 @@ util_format_r8g8bx_snorm_fetch_rgba(void *restrict dst, const uint8_t *restrict
unsigned i, unsigned j); unsigned i, unsigned j);
void void
util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *restrict dst_row,
const uint8_t *restrict src_row, unsigned src_stride, const uint8_t *restrict src_row,
unsigned width, unsigned height); unsigned width);
void void
util_format_r8g8bx_snorm_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, util_format_r8g8bx_snorm_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,

View file

@ -617,7 +617,7 @@ def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix):
else: else:
dst_proto_type = 'void' dst_proto_type = 'void'
proto = 'util_format_%s_unpack_%s(%s *restrict dst_row, unsigned dst_stride, const uint8_t *restrict src_row, unsigned src_stride, unsigned width, unsigned height)' % ( proto = 'util_format_%s_unpack_%s(%s *restrict dst_row, const uint8_t *restrict src, unsigned width)' % (
name, dst_suffix, dst_proto_type) name, dst_suffix, dst_proto_type)
print('void %s;' % proto, file=sys.stdout2) print('void %s;' % proto, file=sys.stdout2)
@ -626,19 +626,14 @@ def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix):
print('{') print('{')
if is_format_supported(format): if is_format_supported(format):
print(' unsigned x, y;') print(' %s *dst = dst_row;' % (dst_native_type))
print(' for(y = 0; y < height; y += %u) {' % (format.block_height,)) print(
print(' %s *dst = dst_row;' % (dst_native_type)) ' for (unsigned x = 0; x < width; x += %u) {' % (format.block_width,))
print(' const uint8_t *src = src_row;')
print(' for(x = 0; x < width; x += %u) {' % (format.block_width,))
generate_unpack_kernel(format, dst_channel, dst_native_type) generate_unpack_kernel(format, dst_channel, dst_native_type)
print(' src += %u;' % (format.block_size() / 8,)) print(' src += %u;' % (format.block_size() / 8,))
print(' dst += 4;') print(' dst += 4;')
print(' }')
print(' src_row += src_stride;')
print(' dst_row = (uint8_t *)dst_row + dst_stride;')
print(' }') print(' }')
print('}') print('}')

View file

@ -255,10 +255,17 @@ def write_format_table(formats):
print(" [%s] = {" % (format.name,)) print(" [%s] = {" % (format.name,))
if format.colorspace != ZS and not format.is_pure_color(): if format.colorspace != ZS and not format.is_pure_color():
print(" .unpack_rgba_8unorm = &util_format_%s_unpack_rgba_8unorm," % sn)
if format.layout == 's3tc' or format.layout == 'rgtc': if format.layout == 's3tc' or format.layout == 'rgtc':
print(" .fetch_rgba_8unorm = &util_format_%s_fetch_rgba_8unorm," % sn) print(" .fetch_rgba_8unorm = &util_format_%s_fetch_rgba_8unorm," % sn)
print(" .unpack_rgba = &util_format_%s_unpack_rgba_float," % sn) if format.block_width > 1:
print(
" .unpack_rgba_8unorm_rect = &util_format_%s_unpack_rgba_8unorm," % sn)
print(
" .unpack_rgba_rect = &util_format_%s_unpack_rgba_float," % sn)
else:
print(
" .unpack_rgba_8unorm = &util_format_%s_unpack_rgba_8unorm," % sn)
print(" .unpack_rgba = &util_format_%s_unpack_rgba_float," % sn)
if format.has_depth(): if format.has_depth():
print(" .unpack_z_32unorm = &util_format_%s_unpack_z_32unorm," % sn) print(" .unpack_z_32unorm = &util_format_%s_unpack_z_32unorm," % sn)

View file

@ -241,13 +241,11 @@ static boolean
test_format_unpack_rgba(const struct util_format_description *format_desc, test_format_unpack_rgba(const struct util_format_description *format_desc,
const struct util_format_test_case *test) const struct util_format_test_case *test)
{ {
const struct util_format_unpack_description *unpack =
util_format_unpack_description(format_desc->format);
float unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } }; float unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } };
unsigned i, j, k; unsigned i, j, k;
boolean success; boolean success;
unpack->unpack_rgba(&unpacked[0][0][0], sizeof unpacked[0], util_format_unpack_rgba_rect(format_desc->format, &unpacked[0][0][0], sizeof unpacked[0],
test->packed, 0, test->packed, 0,
format_desc->block.width, format_desc->block.height); format_desc->block.width, format_desc->block.height);
@ -361,8 +359,6 @@ static boolean
test_format_unpack_rgba_8unorm(const struct util_format_description *format_desc, test_format_unpack_rgba_8unorm(const struct util_format_description *format_desc,
const struct util_format_test_case *test) const struct util_format_test_case *test)
{ {
const struct util_format_unpack_description *unpack =
util_format_unpack_description(format_desc->format);
uint8_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } }; uint8_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } };
uint8_t expected[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } }; uint8_t expected[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } };
unsigned i, j, k; unsigned i, j, k;
@ -371,7 +367,7 @@ test_format_unpack_rgba_8unorm(const struct util_format_description *format_desc
if (util_format_is_pure_integer(format_desc->format)) if (util_format_is_pure_integer(format_desc->format))
return FALSE; return FALSE;
unpack->unpack_rgba_8unorm(&unpacked[0][0][0], sizeof unpacked[0], util_format_unpack_rgba_8unorm_rect(format_desc->format, &unpacked[0][0][0], sizeof unpacked[0],
test->packed, 0, test->packed, 0,
format_desc->block.width, format_desc->block.height); format_desc->block.width, format_desc->block.height);
@ -814,6 +810,13 @@ test_all(void)
} \ } \
} }
# define TEST_ONE_UNPACK_RECT_FUNC(name) \
if (util_format_unpack_description(format)->name || util_format_unpack_description(format)->name##_rect) { \
if (!test_one_func(format_desc, &test_format_##name, #name)) { \
success = FALSE; \
} \
}
# define TEST_FORMAT_METADATA(name) \ # define TEST_FORMAT_METADATA(name) \
if (!test_format_metadata(format_desc, &test_format_##name, #name)) { \ if (!test_format_metadata(format_desc, &test_format_##name, #name)) { \
success = FALSE; \ success = FALSE; \
@ -825,9 +828,9 @@ test_all(void)
} }
TEST_ONE_PACK_FUNC(pack_rgba_float); TEST_ONE_PACK_FUNC(pack_rgba_float);
TEST_ONE_UNPACK_FUNC(unpack_rgba); TEST_ONE_UNPACK_RECT_FUNC(unpack_rgba);
TEST_ONE_PACK_FUNC(pack_rgba_8unorm); TEST_ONE_PACK_FUNC(pack_rgba_8unorm);
TEST_ONE_UNPACK_FUNC(unpack_rgba_8unorm); TEST_ONE_UNPACK_RECT_FUNC(unpack_rgba_8unorm);
TEST_ONE_UNPACK_FUNC(unpack_z_32unorm); TEST_ONE_UNPACK_FUNC(unpack_z_32unorm);
TEST_ONE_PACK_FUNC(pack_z_32unorm); TEST_ONE_PACK_FUNC(pack_z_32unorm);