From 2b5178ee4820d69386dfa0ecb28fe9fe7c4f0964 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 2 Apr 2021 14:35:01 -0700 Subject: [PATCH] util: Switch the non-block formats to unpacking rgba rows instead of rects. We have only a few callers of unpack that do rects, so add a helper that iterates over y adding the strides. This saves us 36kb of generated code and means that adding cpu-specific variants for RGBA format unpack will be much simpler. Reviewed-by: Jesse Natalie Part-of: --- .../auxiliary/translate/translate_generic.c | 7 +- src/gallium/frontends/nine/device9.c | 8 +- src/mesa/main/pack.c | 2 +- src/util/format/u_format.c | 67 ++++-- src/util/format/u_format.h | 44 +++- src/util/format/u_format_bptc.c | 62 ++++-- src/util/format/u_format_other.c | 195 ++++++++---------- src/util/format/u_format_other.h | 36 ++-- src/util/format/u_format_pack.py | 17 +- src/util/format/u_format_table.py | 11 +- src/util/tests/format/u_format_test.c | 19 +- 11 files changed, 268 insertions(+), 200 deletions(-) diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 123f077386f..a0c18531caa 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -50,9 +50,8 @@ struct translate_generic { struct { enum translate_element_type type; - void (*fetch)(void *restrict dst, unsigned dst_stride, - const uint8_t *restrict src, unsigned src_stride, - unsigned width, unsigned height); + void (*fetch)(void *restrict dst, const uint8_t *restrict src, + unsigned width); unsigned buffer; unsigned input_offset; unsigned instance_divisor; @@ -625,7 +624,7 @@ generic_run_one(struct translate_generic *tg, if (likely(copy_size >= 0)) { memcpy(dst, src, copy_size); } else { - tg->attrib[attr].fetch(data, 0, src, 0, 1, 1); + tg->attrib[attr].fetch(data, src, 1); if (0) debug_printf("Fetch linear attr %d from %p stride %d index %d: " diff --git a/src/gallium/frontends/nine/device9.c b/src/gallium/frontends/nine/device9.c index ec2e9f4bc71..527d40d62ec 100644 --- a/src/gallium/frontends/nine/device9.c +++ b/src/gallium/frontends/nine/device9.c @@ -848,16 +848,13 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This, { D3DLOCKED_RECT lock; HRESULT hr; - const struct util_format_unpack_description *unpack = - util_format_unpack_description(surf->base.info.format); - assert(unpack); hr = NineSurface9_LockRect(surf, &lock, NULL, D3DLOCK_READONLY); if (FAILED(hr)) ret_err("Failed to map cursor source image.\n", D3DERR_DRIVERINTERNALERROR); - unpack->unpack_rgba_8unorm(ptr, transfer->stride, + util_format_unpack_rgba_8unorm_rect(surf->base.info.format, ptr, transfer->stride, lock.pBits, lock.Pitch, This->cursor.w, This->cursor.h); @@ -865,7 +862,8 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This, void *data = lock.pBits; /* SetCursor assumes 32x32 argb with pitch 128 */ if (lock.Pitch != 128) { - unpack->unpack_rgba_8unorm(This->cursor.hw_upload_temp, 128, + util_format_unpack_rgba_8unorm_rect(surf->base.info.format, + This->cursor.hw_upload_temp, 128, lock.pBits, lock.Pitch, 32, 32); data = This->cursor.hw_upload_temp; diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index 4e8db1b7952..257f9dd93e8 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -1641,7 +1641,7 @@ _mesa_unpack_ubyte_rgba_row(mesa_format format, uint32_t n, util_format_unpack_description((enum pipe_format)format); if (unpack->unpack_rgba_8unorm) { - unpack->unpack_rgba_8unorm((uint8_t *)dst, 0, src, 0, n, 1); + unpack->unpack_rgba_8unorm((uint8_t *)dst, src, n); } else { /* get float values, convert to ubyte */ { diff --git a/src/util/format/u_format.c b/src/util/format/u_format.c index 43f57caa882..1c3ca57fc76 100644 --- a/src/util/format/u_format.c +++ b/src/util/format/u_format.c @@ -359,6 +359,47 @@ util_get_depth_format_mrd(const struct util_format_description *desc) return mrd; } +void +util_format_unpack_rgba_rect(enum pipe_format format, + void *dst, unsigned dst_stride, + const void *src, unsigned src_stride, + unsigned w, unsigned h) +{ + const struct util_format_unpack_description *unpack = + util_format_unpack_description(format); + + /* Optimized function for block-compressed formats */ + if (unpack->unpack_rgba_rect) { + unpack->unpack_rgba_rect(dst, dst_stride, src, src_stride, w, h); + } else { + for (unsigned y = 0; y < h; y++) { + unpack->unpack_rgba(dst, src, w); + src = (const char *)src + src_stride; + dst = (char *)dst + dst_stride; + } + } +} + +void +util_format_unpack_rgba_8unorm_rect(enum pipe_format format, + void *dst, unsigned dst_stride, + const void *src, unsigned src_stride, + unsigned w, unsigned h) +{ + const struct util_format_unpack_description *unpack = + util_format_unpack_description(format); + + /* Optimized function for block-compressed formats */ + if (unpack->unpack_rgba_8unorm_rect) { + unpack->unpack_rgba_8unorm_rect(dst, dst_stride, src, src_stride, w, h); + } else { + for (unsigned y = 0; y < h; y++) { + unpack->unpack_rgba_8unorm(dst, src, w); + src = (const char *)src + src_stride; + dst = (char *)dst + dst_stride; + } + } +} void util_format_read_4(enum pipe_format format, @@ -367,8 +408,6 @@ util_format_read_4(enum pipe_format format, unsigned x, unsigned y, unsigned w, unsigned h) { const struct util_format_description *format_desc; - const struct util_format_unpack_description *unpack = - util_format_unpack_description(format); const uint8_t *src_row; format_desc = util_format_description(format); @@ -378,7 +417,7 @@ util_format_read_4(enum pipe_format format, src_row = (const uint8_t *)src + y*src_stride + x*(format_desc->block.bits/8); - unpack->unpack_rgba(dst, dst_stride, src_row, src_stride, w, h); + util_format_unpack_rgba_rect(format, dst, dst_stride, src_row, src_stride, w, h); } @@ -413,10 +452,7 @@ void util_format_read_4ub(enum pipe_format format, uint8_t *dst, unsigned dst_stride, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h) { const struct util_format_description *format_desc; - const struct util_format_unpack_description *unpack = - util_format_unpack_description(format); const uint8_t *src_row; - uint8_t *dst_row; format_desc = util_format_description(format); @@ -424,9 +460,8 @@ util_format_read_4ub(enum pipe_format format, uint8_t *dst, unsigned dst_stride, assert(y % format_desc->block.height == 0); src_row = (const uint8_t *)src + y*src_stride + x*(format_desc->block.bits/8); - dst_row = dst; - unpack->unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, w, h); + util_format_unpack_rgba_8unorm_rect(format, dst, dst_stride, src_row, src_stride, w, h); } @@ -715,7 +750,7 @@ util_format_translate(enum pipe_format dst_format, return FALSE; while (height >= y_step) { - unpack->unpack_rgba_8unorm(tmp_row, tmp_stride, src_row, src_stride, width, y_step); + util_format_unpack_rgba_8unorm_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, y_step); pack->pack_rgba_8unorm(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step); dst_row += dst_step; @@ -724,7 +759,7 @@ util_format_translate(enum pipe_format dst_format, } if (height) { - unpack->unpack_rgba_8unorm(tmp_row, tmp_stride, src_row, src_stride, width, height); + util_format_unpack_rgba_8unorm_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, height); pack->pack_rgba_8unorm(dst_row, dst_stride, tmp_row, tmp_stride, width, height); } @@ -746,7 +781,7 @@ util_format_translate(enum pipe_format dst_format, return FALSE; while (height >= y_step) { - unpack->unpack_rgba(tmp_row, tmp_stride, src_row, src_stride, width, y_step); + util_format_unpack_rgba_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, y_step); pack->pack_rgba_sint(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step); dst_row += dst_step; @@ -755,7 +790,7 @@ util_format_translate(enum pipe_format dst_format, } if (height) { - unpack->unpack_rgba(tmp_row, tmp_stride, src_row, src_stride, width, height); + util_format_unpack_rgba_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, height); pack->pack_rgba_sint(dst_row, dst_stride, tmp_row, tmp_stride, width, height); } @@ -777,7 +812,7 @@ util_format_translate(enum pipe_format dst_format, return FALSE; while (height >= y_step) { - unpack->unpack_rgba(tmp_row, tmp_stride, src_row, src_stride, width, y_step); + util_format_unpack_rgba_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, y_step); pack->pack_rgba_uint(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step); dst_row += dst_step; @@ -786,7 +821,7 @@ util_format_translate(enum pipe_format dst_format, } if (height) { - unpack->unpack_rgba(tmp_row, tmp_stride, src_row, src_stride, width, height); + util_format_unpack_rgba_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, height); pack->pack_rgba_uint(dst_row, dst_stride, tmp_row, tmp_stride, width, height); } @@ -807,7 +842,7 @@ util_format_translate(enum pipe_format dst_format, return FALSE; while (height >= y_step) { - unpack->unpack_rgba(tmp_row, tmp_stride, src_row, src_stride, width, y_step); + util_format_unpack_rgba_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, y_step); pack->pack_rgba_float(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step); dst_row += dst_step; @@ -816,7 +851,7 @@ util_format_translate(enum pipe_format dst_format, } if (height) { - unpack->unpack_rgba(tmp_row, tmp_stride, src_row, src_stride, width, height); + util_format_unpack_rgba_rect(src_format, tmp_row, tmp_stride, src_row, src_stride, width, height); pack->pack_rgba_float(dst_row, dst_stride, tmp_row, tmp_stride, width, height); } diff --git a/src/util/format/u_format.h b/src/util/format/u_format.h index 9911ff5a40d..a1b5ec1ecbc 100644 --- a/src/util/format/u_format.h +++ b/src/util/format/u_format.h @@ -315,10 +315,20 @@ struct util_format_unpack_description { * Unpack pixel blocks to R8G8B8A8_UNORM. * Note: strides are in bytes. * - * Only defined for non-depth-stencil formats. + * Only defined for non-block non-depth-stencil formats. */ void - (*unpack_rgba_8unorm)(uint8_t *restrict dst, unsigned dst_stride, + (*unpack_rgba_8unorm)(uint8_t *restrict dst, const uint8_t *restrict src, + unsigned width); + + /** + * Unpack pixel blocks to R8G8B8A8_UNORM. + * Note: strides are in bytes. + * + * Only defined for block non-depth-stencil formats. + */ + void + (*unpack_rgba_8unorm_rect)(uint8_t *restrict dst, unsigned dst_stride, const uint8_t *restrict src, unsigned src_stride, unsigned width, unsigned height); @@ -338,10 +348,22 @@ struct util_format_unpack_description { * * Note: strides are in bytes. * - * Only defined for non-depth-stencil formats. + * Only defined for non-block non-depth-stencil formats. */ void - (*unpack_rgba)(void *restrict dst, unsigned dst_stride, + (*unpack_rgba)(void *restrict dst, const uint8_t *restrict src, + unsigned width); + + /** + * Unpack pixel blocks to R32G32B32A32_UINT/_INT_FLOAT based on whether the + * type is pure uint, int, or other. + * + * Note: strides are in bytes. + * + * Only defined for block non-depth-stencil formats. + */ + void + (*unpack_rgba_rect)(void *restrict dst, unsigned dst_stride, const uint8_t *restrict src, unsigned src_stride, unsigned width, unsigned height); @@ -1477,7 +1499,7 @@ util_format_unpack_rgba(enum pipe_format format, void *dst, const struct util_format_unpack_description *desc = util_format_unpack_description(format); - desc->unpack_rgba(dst, 0, (const uint8_t *)src, 0, w, 1); + desc->unpack_rgba(dst, (const uint8_t *)src, w); } static inline void @@ -1558,6 +1580,18 @@ util_format_write_4ub(enum pipe_format format, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h); +void +util_format_unpack_rgba_rect(enum pipe_format format, + void *dst, unsigned dst_stride, + const void *src, unsigned src_stride, + unsigned w, unsigned h); + +void +util_format_unpack_rgba_8unorm_rect(enum pipe_format format, + void *dst, unsigned dst_stride, + const void *src, unsigned src_stride, + unsigned w, unsigned h); + /* * Generic format conversion; */ diff --git a/src/util/format/u_format_bptc.c b/src/util/format/u_format_bptc.c index 7358124dde2..7cf9cf40e56 100644 --- a/src/util/format/u_format_bptc.c +++ b/src/util/format/u_format_bptc.c @@ -62,10 +62,14 @@ util_format_bptc_rgba_unorm_unpack_rgba_float(void *restrict dst_row, unsigned d decompress_rgba_unorm(width, height, src_row, src_stride, temp_block, width * 4 * sizeof(uint8_t)); - util_format_r8g8b8a8_unorm_unpack_rgba_float( - dst_row, dst_stride, - temp_block, width * 4 * sizeof(uint8_t), - width, height); + /* Direct call to row unpack instead of util_format_rgba_unpack_rect() + * to avoid table lookup that would pull in all unpack symbols. + */ + for (int y = 0; y < height; y++) { + util_format_r8g8b8a8_unorm_unpack_rgba_float((char *)dst_row + dst_stride * y, + temp_block + 4 * width * y, + width); + } free((void *) temp_block); } @@ -76,10 +80,15 @@ util_format_bptc_rgba_unorm_pack_rgba_float(uint8_t *restrict dst_row, unsigned { uint8_t *temp_block; temp_block = malloc(width * height * 4 * sizeof(uint8_t)); - util_format_r32g32b32a32_float_unpack_rgba_8unorm( - temp_block, width * 4 * sizeof(uint8_t), - (uint8_t *)src_row, src_stride, - width, height); + /* Direct call to row unpack instead of util_format_rgba_unpack_rect() + * to avoid table lookup that would pull in all unpack symbols. + */ + for (int y = 0; y < height; y++) { + util_format_r32g32b32a32_float_unpack_rgba_8unorm( + temp_block + 4 * width * y, + (uint8_t *)src_row + src_stride * y, + width); + } compress_rgba_unorm(width, height, temp_block, width * 4 * sizeof(uint8_t), dst_row, dst_stride); @@ -131,9 +140,15 @@ util_format_bptc_srgba_unpack_rgba_float(void *restrict dst_row, unsigned dst_st decompress_rgba_unorm(width, height, src_row, src_stride, temp_block, width * 4 * sizeof(uint8_t)); - util_format_r8g8b8a8_srgb_unpack_rgba_float(dst_row, dst_stride, - temp_block, width * 4 * sizeof(uint8_t), - width, height); + + /* Direct call to row unpack instead of util_format_rgba_unpack_rect() + * to avoid table lookup that would pull in all unpack symbols. + */ + for (int y = 0; y < height; y++) { + util_format_r8g8b8a8_srgb_unpack_rgba_float((char *)dst_row + dst_stride * y, + temp_block + width * 4 * y, + width); + } free((void *) temp_block); } @@ -171,10 +186,15 @@ util_format_bptc_rgb_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigne src_row, src_stride, temp_block, width * 4 * sizeof(float), true); - util_format_r32g32b32a32_float_unpack_rgba_8unorm( - dst_row, dst_stride, - (const uint8_t *)temp_block, width * 4 * sizeof(float), - width, height); + /* Direct call to row unpack instead of util_format_rgba_unpack_rect() + * to avoid table lookup that would pull in all unpack symbols. + */ + for (int y = 0; y < height; y++) { + util_format_r32g32b32a32_float_unpack_rgba_8unorm( + dst_row + dst_stride * y, + (const uint8_t *)temp_block + width * 4 * sizeof(float) * y, + width); + } free((void *) temp_block); } @@ -229,10 +249,14 @@ util_format_bptc_rgb_ufloat_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsign src_row, src_stride, temp_block, width * 4 * sizeof(float), false); - util_format_r32g32b32a32_float_unpack_rgba_8unorm( - dst_row, dst_stride, - (const uint8_t *)temp_block, width * 4 * sizeof(float), - width, height); + /* Direct call to row unpack instead of util_format_rgba_unpack_8unorm() + * to avoid table lookup that would pull in all unpack symbols. + */ + for (int y = 0; y < height; y++) { + util_format_r32g32b32a32_float_unpack_rgba_8unorm(dst_row + dst_stride * y, + (void *)(temp_block + 4 * width * y), + width); + } free((void *) temp_block); } diff --git a/src/util/format/u_format_other.c b/src/util/format/u_format_other.c index 795ad8cce29..ce4b9d0976a 100644 --- a/src/util/format/u_format_other.c +++ b/src/util/format/u_format_other.c @@ -33,23 +33,19 @@ void -util_format_r9g9b9e5_float_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride, - const uint8_t *restrict src_row, unsigned src_stride, - unsigned width, unsigned height) +util_format_r9g9b9e5_float_unpack_rgba_float(void *restrict dst_row, + const uint8_t *restrict src_row, + unsigned width) { - unsigned x, y; - for(y = 0; y < height; y += 1) { - float *dst = dst_row; - const uint8_t *src = src_row; - for(x = 0; x < width; x += 1) { - uint32_t value = util_cpu_to_le32(*(const uint32_t *)src); - rgb9e5_to_float3(value, dst); - dst[3] = 1; /* a */ - src += 4; - dst += 4; - } - src_row += src_stride; - dst_row = (uint8_t *)dst_row + dst_stride; + unsigned x; + float *dst = dst_row; + const uint8_t *src = src_row; + for(x = 0; x < width; x += 1) { + uint32_t value = util_cpu_to_le32(*(const uint32_t *)src); + rgb9e5_to_float3(value, dst); + dst[3] = 1; /* a */ + src += 4; + dst += 4; } } @@ -85,27 +81,23 @@ util_format_r9g9b9e5_float_fetch_rgba(void *restrict in_dst, const uint8_t *rest void -util_format_r9g9b9e5_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, - const uint8_t *restrict src_row, unsigned src_stride, - unsigned width, unsigned height) +util_format_r9g9b9e5_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, + const uint8_t *restrict src_row, + unsigned width) { - unsigned x, y; + unsigned x; float p[3]; - for(y = 0; y < height; y += 1) { - uint8_t *dst = dst_row; - const uint8_t *src = src_row; - for(x = 0; x < width; x += 1) { - uint32_t value = util_cpu_to_le32(*(const uint32_t *)src); - rgb9e5_to_float3(value, p); - dst[0] = float_to_ubyte(p[0]); /* r */ - dst[1] = float_to_ubyte(p[1]); /* g */ - dst[2] = float_to_ubyte(p[2]); /* b */ - dst[3] = 255; /* a */ - src += 4; - dst += 4; - } - src_row += src_stride; - dst_row += dst_stride/sizeof(*dst_row); + uint8_t *dst = dst_row; + const uint8_t *src = src_row; + for(x = 0; x < width; x += 1) { + uint32_t value = util_cpu_to_le32(*(const uint32_t *)src); + rgb9e5_to_float3(value, p); + dst[0] = float_to_ubyte(p[0]); /* r */ + dst[1] = float_to_ubyte(p[1]); /* g */ + dst[2] = float_to_ubyte(p[2]); /* b */ + dst[3] = 255; /* a */ + src += 4; + dst += 4; } } @@ -137,23 +129,19 @@ util_format_r9g9b9e5_float_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned void -util_format_r11g11b10_float_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride, - const uint8_t *restrict src_row, unsigned src_stride, - unsigned width, unsigned height) +util_format_r11g11b10_float_unpack_rgba_float(void *restrict dst_row, + const uint8_t *restrict src_row, + unsigned width) { - unsigned x, y; - for(y = 0; y < height; y += 1) { - float *dst = dst_row; - const uint8_t *src = src_row; - for(x = 0; x < width; x += 1) { - uint32_t value = util_cpu_to_le32(*(const uint32_t *)src); - r11g11b10f_to_float3(value, dst); - dst[3] = 1; /* a */ - src += 4; - dst += 4; - } - src_row += src_stride; - dst_row = (uint8_t *)dst_row + dst_stride; + unsigned x; + float *dst = dst_row; + const uint8_t *src = src_row; + for(x = 0; x < width; x += 1) { + uint32_t value = util_cpu_to_le32(*(const uint32_t *)src); + r11g11b10f_to_float3(value, dst); + dst[3] = 1; /* a */ + src += 4; + dst += 4; } } @@ -189,27 +177,23 @@ util_format_r11g11b10_float_fetch_rgba(void *restrict in_dst, const uint8_t *res void -util_format_r11g11b10_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, - const uint8_t *restrict src_row, unsigned src_stride, - unsigned width, unsigned height) +util_format_r11g11b10_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, + const uint8_t *restrict src_row, + unsigned width) { - unsigned x, y; + unsigned x; float p[3]; - for(y = 0; y < height; y += 1) { - uint8_t *dst = dst_row; - const uint8_t *src = src_row; - for(x = 0; x < width; x += 1) { - uint32_t value = util_cpu_to_le32(*(const uint32_t *)src); - r11g11b10f_to_float3(value, p); - dst[0] = float_to_ubyte(p[0]); /* r */ - dst[1] = float_to_ubyte(p[1]); /* g */ - dst[2] = float_to_ubyte(p[2]); /* b */ - dst[3] = 255; /* a */ - src += 4; - dst += 4; - } - src_row += src_stride; - dst_row += dst_stride/sizeof(*dst_row); + uint8_t *dst = dst_row; + const uint8_t *src = src_row; + for(x = 0; x < width; x += 1) { + uint32_t value = util_cpu_to_le32(*(const uint32_t *)src); + r11g11b10f_to_float3(value, p); + dst[0] = float_to_ubyte(p[0]); /* r */ + dst[1] = float_to_ubyte(p[1]); /* g */ + dst[2] = float_to_ubyte(p[2]); /* b */ + dst[3] = 255; /* a */ + src += 4; + dst += 4; } } @@ -256,58 +240,47 @@ r8g8bx_derive(int16_t r, int16_t g) } void -util_format_r8g8bx_snorm_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride, - const uint8_t *restrict src_row, unsigned src_stride, - unsigned width, unsigned height) +util_format_r8g8bx_snorm_unpack_rgba_float(void *restrict dst_row, + const uint8_t *restrict src_row, unsigned width) { - unsigned x, y; + unsigned x; + float *dst = dst_row; + const uint16_t *src = (const uint16_t *)src_row; + for(x = 0; x < width; x += 1) { + uint16_t value = util_cpu_to_le16(*src++); + int16_t r, g; - for(y = 0; y < height; y += 1) { - float *dst = dst_row; - const uint16_t *src = (const uint16_t *)src_row; - for(x = 0; x < width; x += 1) { - uint16_t value = util_cpu_to_le16(*src++); - int16_t r, g; + r = ((int16_t)(value << 8)) >> 8; + g = ((int16_t)(value << 0)) >> 8; - r = ((int16_t)(value << 8)) >> 8; - g = ((int16_t)(value << 0)) >> 8; - - dst[0] = (float)(r * (1.0f/0x7f)); /* r */ - dst[1] = (float)(g * (1.0f/0x7f)); /* g */ - dst[2] = r8g8bx_derive(r, g) * (1.0f/0xff); /* b */ - dst[3] = 1.0f; /* a */ - dst += 4; - } - src_row += src_stride; - dst_row = (uint8_t *)dst_row + dst_stride; + dst[0] = (float)(r * (1.0f/0x7f)); /* r */ + dst[1] = (float)(g * (1.0f/0x7f)); /* g */ + dst[2] = r8g8bx_derive(r, g) * (1.0f/0xff); /* b */ + dst[3] = 1.0f; /* a */ + dst += 4; } } void -util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, - const uint8_t *restrict src_row, unsigned src_stride, - unsigned width, unsigned height) +util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *restrict dst, + const uint8_t *restrict src_row, + unsigned width) { - unsigned x, y; - for(y = 0; y < height; y += 1) { - uint8_t *dst = dst_row; - const uint16_t *src = (const uint16_t *)src_row; - for(x = 0; x < width; x += 1) { - uint16_t value = util_cpu_to_le16(*src++); - int16_t r, g; + unsigned x; + const uint16_t *src = (const uint16_t *)src_row; + for(x = 0; x < width; x += 1) { + uint16_t value = util_cpu_to_le16(*src++); + int16_t r, g; - r = ((int16_t)(value << 8)) >> 8; - g = ((int16_t)(value << 0)) >> 8; + r = ((int16_t)(value << 8)) >> 8; + g = ((int16_t)(value << 0)) >> 8; - dst[0] = (uint8_t)(((uint16_t)MAX2(r, 0)) * 0xff / 0x7f); /* r */ - dst[1] = (uint8_t)(((uint16_t)MAX2(g, 0)) * 0xff / 0x7f); /* g */ - dst[2] = r8g8bx_derive(r, g); /* b */ - dst[3] = 255; /* a */ - dst += 4; - } - src_row += src_stride; - dst_row += dst_stride/sizeof(*dst_row); + dst[0] = (uint8_t)(((uint16_t)MAX2(r, 0)) * 0xff / 0x7f); /* r */ + dst[1] = (uint8_t)(((uint16_t)MAX2(g, 0)) * 0xff / 0x7f); /* g */ + dst[2] = r8g8bx_derive(r, g); /* b */ + dst[3] = 255; /* a */ + dst += 4; } } diff --git a/src/util/format/u_format_other.h b/src/util/format/u_format_other.h index 4f656c148f5..3547e9e673f 100644 --- a/src/util/format/u_format_other.h +++ b/src/util/format/u_format_other.h @@ -34,9 +34,9 @@ void -util_format_r9g9b9e5_float_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride, - const uint8_t *restrict src_row, unsigned src_stride, - unsigned width, unsigned height); +util_format_r9g9b9e5_float_unpack_rgba_float(void *restrict dst_row, + const uint8_t *restrict src_row, + unsigned width); void util_format_r9g9b9e5_float_pack_rgba_float(uint8_t *restrict dst_row, unsigned dst_stride, @@ -48,9 +48,9 @@ util_format_r9g9b9e5_float_fetch_rgba(void *restrict dst, const uint8_t *restric unsigned i, unsigned j); void -util_format_r9g9b9e5_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, - const uint8_t *restrict src_row, unsigned src_stride, - unsigned width, unsigned height); +util_format_r9g9b9e5_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, + const uint8_t *restrict src_row, + unsigned width); void util_format_r9g9b9e5_float_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, @@ -59,9 +59,9 @@ util_format_r9g9b9e5_float_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned void -util_format_r11g11b10_float_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride, - const uint8_t *restrict src_row, unsigned src_stride, - unsigned width, unsigned height); +util_format_r11g11b10_float_unpack_rgba_float(void *restrict dst_row, + const uint8_t *restrict src_row, + unsigned width); void util_format_r11g11b10_float_pack_rgba_float(uint8_t *restrict dst_row, unsigned dst_stride, @@ -73,9 +73,9 @@ util_format_r11g11b10_float_fetch_rgba(void *restrict dst, const uint8_t *restri unsigned i, unsigned j); void -util_format_r11g11b10_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, - const uint8_t *restrict src_row, unsigned src_stride, - unsigned width, unsigned height); +util_format_r11g11b10_float_unpack_rgba_8unorm(uint8_t *restrict dst_row, + const uint8_t *restrict src_row, + unsigned width); void util_format_r11g11b10_float_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, @@ -84,9 +84,9 @@ util_format_r11g11b10_float_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned void -util_format_r8g8bx_snorm_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride, - const uint8_t *restrict src_row, unsigned src_stride, - unsigned width, unsigned height); +util_format_r8g8bx_snorm_unpack_rgba_float(void *restrict dst_row, + const uint8_t *restrict src_row, + unsigned width); void util_format_r8g8bx_snorm_pack_rgba_float(uint8_t *restrict dst_row, unsigned dst_stride, @@ -98,9 +98,9 @@ util_format_r8g8bx_snorm_fetch_rgba(void *restrict dst, const uint8_t *restrict unsigned i, unsigned j); void -util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, - const uint8_t *restrict src_row, unsigned src_stride, - unsigned width, unsigned height); +util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *restrict dst_row, + const uint8_t *restrict src_row, + unsigned width); void util_format_r8g8bx_snorm_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride, diff --git a/src/util/format/u_format_pack.py b/src/util/format/u_format_pack.py index 35e8d341ec4..e32c01f8a08 100644 --- a/src/util/format/u_format_pack.py +++ b/src/util/format/u_format_pack.py @@ -617,7 +617,7 @@ def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix): else: dst_proto_type = 'void' - proto = 'util_format_%s_unpack_%s(%s *restrict dst_row, unsigned dst_stride, const uint8_t *restrict src_row, unsigned src_stride, unsigned width, unsigned height)' % ( + proto = 'util_format_%s_unpack_%s(%s *restrict dst_row, const uint8_t *restrict src, unsigned width)' % ( name, dst_suffix, dst_proto_type) print('void %s;' % proto, file=sys.stdout2) @@ -626,19 +626,14 @@ def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix): print('{') if is_format_supported(format): - print(' unsigned x, y;') - print(' for(y = 0; y < height; y += %u) {' % (format.block_height,)) - print(' %s *dst = dst_row;' % (dst_native_type)) - print(' const uint8_t *src = src_row;') - print(' for(x = 0; x < width; x += %u) {' % (format.block_width,)) + print(' %s *dst = dst_row;' % (dst_native_type)) + print( + ' for (unsigned x = 0; x < width; x += %u) {' % (format.block_width,)) generate_unpack_kernel(format, dst_channel, dst_native_type) - print(' src += %u;' % (format.block_size() / 8,)) - print(' dst += 4;') - print(' }') - print(' src_row += src_stride;') - print(' dst_row = (uint8_t *)dst_row + dst_stride;') + print(' src += %u;' % (format.block_size() / 8,)) + print(' dst += 4;') print(' }') print('}') diff --git a/src/util/format/u_format_table.py b/src/util/format/u_format_table.py index 607c999f46a..b9176956cc3 100644 --- a/src/util/format/u_format_table.py +++ b/src/util/format/u_format_table.py @@ -255,10 +255,17 @@ def write_format_table(formats): print(" [%s] = {" % (format.name,)) if format.colorspace != ZS and not format.is_pure_color(): - print(" .unpack_rgba_8unorm = &util_format_%s_unpack_rgba_8unorm," % sn) if format.layout == 's3tc' or format.layout == 'rgtc': print(" .fetch_rgba_8unorm = &util_format_%s_fetch_rgba_8unorm," % sn) - print(" .unpack_rgba = &util_format_%s_unpack_rgba_float," % sn) + if format.block_width > 1: + print( + " .unpack_rgba_8unorm_rect = &util_format_%s_unpack_rgba_8unorm," % sn) + print( + " .unpack_rgba_rect = &util_format_%s_unpack_rgba_float," % sn) + else: + print( + " .unpack_rgba_8unorm = &util_format_%s_unpack_rgba_8unorm," % sn) + print(" .unpack_rgba = &util_format_%s_unpack_rgba_float," % sn) if format.has_depth(): print(" .unpack_z_32unorm = &util_format_%s_unpack_z_32unorm," % sn) diff --git a/src/util/tests/format/u_format_test.c b/src/util/tests/format/u_format_test.c index e6473c2bf6d..9f14b671ff8 100644 --- a/src/util/tests/format/u_format_test.c +++ b/src/util/tests/format/u_format_test.c @@ -241,13 +241,11 @@ static boolean test_format_unpack_rgba(const struct util_format_description *format_desc, const struct util_format_test_case *test) { - const struct util_format_unpack_description *unpack = - util_format_unpack_description(format_desc->format); float unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } }; unsigned i, j, k; boolean success; - unpack->unpack_rgba(&unpacked[0][0][0], sizeof unpacked[0], + util_format_unpack_rgba_rect(format_desc->format, &unpacked[0][0][0], sizeof unpacked[0], test->packed, 0, format_desc->block.width, format_desc->block.height); @@ -361,8 +359,6 @@ static boolean test_format_unpack_rgba_8unorm(const struct util_format_description *format_desc, const struct util_format_test_case *test) { - const struct util_format_unpack_description *unpack = - util_format_unpack_description(format_desc->format); uint8_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } }; uint8_t expected[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } }; unsigned i, j, k; @@ -371,7 +367,7 @@ test_format_unpack_rgba_8unorm(const struct util_format_description *format_desc if (util_format_is_pure_integer(format_desc->format)) return FALSE; - unpack->unpack_rgba_8unorm(&unpacked[0][0][0], sizeof unpacked[0], + util_format_unpack_rgba_8unorm_rect(format_desc->format, &unpacked[0][0][0], sizeof unpacked[0], test->packed, 0, format_desc->block.width, format_desc->block.height); @@ -814,6 +810,13 @@ test_all(void) } \ } +# define TEST_ONE_UNPACK_RECT_FUNC(name) \ + if (util_format_unpack_description(format)->name || util_format_unpack_description(format)->name##_rect) { \ + if (!test_one_func(format_desc, &test_format_##name, #name)) { \ + success = FALSE; \ + } \ + } + # define TEST_FORMAT_METADATA(name) \ if (!test_format_metadata(format_desc, &test_format_##name, #name)) { \ success = FALSE; \ @@ -825,9 +828,9 @@ test_all(void) } TEST_ONE_PACK_FUNC(pack_rgba_float); - TEST_ONE_UNPACK_FUNC(unpack_rgba); + TEST_ONE_UNPACK_RECT_FUNC(unpack_rgba); TEST_ONE_PACK_FUNC(pack_rgba_8unorm); - TEST_ONE_UNPACK_FUNC(unpack_rgba_8unorm); + TEST_ONE_UNPACK_RECT_FUNC(unpack_rgba_8unorm); TEST_ONE_UNPACK_FUNC(unpack_z_32unorm); TEST_ONE_PACK_FUNC(pack_z_32unorm);