diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c index ac6f8a624be..cf79a8c1345 100644 --- a/src/mesa/main/format_utils.c +++ b/src/mesa/main/format_utils.c @@ -23,6 +23,9 @@ */ #include +#ifdef __ARM_NEON__ +#include +#endif #include "errors.h" #include "format_utils.h" @@ -135,6 +138,65 @@ compute_src2dst_component_mapping(uint8_t *src2rgba, uint8_t *rgba2dst, } } +/* Convert pixel format A1B5G5R5 to B5G5R5A1. + * + * This is used to improve performance for 16bit pixel formats. + */ +static __inline void +conversion_16bit_argb2rgba(uint16_t *restrict dst, + const uint16_t *restrict src, + size_t num_pixels) +{ + size_t i = 0; + +#ifdef __ARM_NEON__ + size_t j; + uint16x8_t pix_in; + uint16x8_t pix_out; + uint16x8_t rgb; + uint16x8_t a; + size_t loop_num = num_pixels / 32; + + for (j = 0; j < loop_num; j++) { + pix_in = vld1q_u16(src + i); + rgb = vshrq_n_u16(pix_in, 1); + a = vshlq_n_u16(pix_in, 15); + pix_out = vorrq_u16(rgb, a); + vst1q_u16(dst + i, pix_out); + i += 8; + + pix_in = vld1q_u16(src + i); + rgb = vshrq_n_u16(pix_in, 1); + a = vshlq_n_u16(pix_in, 15); + pix_out = vorrq_u16(rgb, a); + vst1q_u16(dst + i, pix_out); + i += 8; + + pix_in = vld1q_u16(src + i); + rgb = vshrq_n_u16(pix_in, 1); + a = vshlq_n_u16(pix_in, 15); + pix_out = vorrq_u16(rgb, a); + vst1q_u16(dst + i, pix_out); + i += 8; + + pix_in = vld1q_u16(src + i); + rgb = vshrq_n_u16(pix_in, 1); + a = vshlq_n_u16(pix_in, 15); + pix_out = vorrq_u16(rgb, a); + vst1q_u16(dst + i, pix_out); + i += 8; + } + + for (; i < num_pixels; ++i) { + dst[i] = src[i] >> 1 | src[i] << 15; + } +#else + for (i = 0; i < num_pixels; ++i) { + dst[i] = src[i] >> 1 | src[i] << 15; + } +#endif +} + /** * This function is used by clients of _mesa_format_convert to obtain * the rebase swizzle to use in a format conversion based on the base @@ -274,6 +336,7 @@ convert_ubyte_rgba_to_bgra(size_t width, size_t height, * the dst or the src -depending on whether we are doing * an upload or a download respectively- are the same). */ + void _mesa_format_convert(void *void_dst, uint32_t dst_format, size_t dst_stride, void *void_src, uint32_t src_format, size_t src_stride, @@ -620,49 +683,56 @@ _mesa_format_convert(void *void_dst, uint32_t dst_format, size_t dst_stride, free(tmp_float); } else { - tmp_ubyte = malloc(width * height * sizeof(*tmp_ubyte)); + if (src_format == PIPE_FORMAT_A1B5G5R5_UNORM && dst_format == PIPE_FORMAT_B5G5R5A1_UNORM) { + uint16_t *value_src_ptr = (uint16_t *) src; + uint16_t *value_dst_ptr = (uint16_t *) dst; + + conversion_16bit_argb2rgba(value_dst_ptr, value_src_ptr, width * height); - if (src_format_is_mesa_array_format) { - compute_rebased_rgba_component_mapping(src2rgba, rebase_swizzle, - rebased_src2rgba); - for (row = 0; row < height; ++row) { - _mesa_swizzle_and_convert(tmp_ubyte + row * width, - MESA_ARRAY_FORMAT_TYPE_UBYTE, 4, - src, src_type, src_num_channels, - rebased_src2rgba, normalized, width); - src += src_stride; - } } else { - for (row = 0; row < height; ++row) { - _mesa_unpack_ubyte_rgba_row(src_format, width, - src, tmp_ubyte + row * width); - if (rebase_swizzle) + tmp_ubyte = malloc(width * height * sizeof(*tmp_ubyte)); + if (src_format_is_mesa_array_format) { + compute_rebased_rgba_component_mapping(src2rgba, rebase_swizzle, + rebased_src2rgba); + for (row = 0; row < height; ++row) { _mesa_swizzle_and_convert(tmp_ubyte + row * width, - MESA_ARRAY_FORMAT_TYPE_UBYTE, 4, - tmp_ubyte + row * width, - MESA_ARRAY_FORMAT_TYPE_UBYTE, 4, - rebase_swizzle, normalized, width); - src += src_stride; + MESA_ARRAY_FORMAT_TYPE_UBYTE, 4, + src, src_type, src_num_channels, + rebased_src2rgba, normalized, width); + src += src_stride; + } + } else { + for (row = 0; row < height; ++row) { + _mesa_unpack_ubyte_rgba_row(src_format, width, + src, tmp_ubyte + row * width); + if (rebase_swizzle) + _mesa_swizzle_and_convert(tmp_ubyte + row * width, + MESA_ARRAY_FORMAT_TYPE_UBYTE, 4, + tmp_ubyte + row * width, + MESA_ARRAY_FORMAT_TYPE_UBYTE, 4, + rebase_swizzle, normalized, width); + src += src_stride; + } } - } - if (dst_format_is_mesa_array_format) { - for (row = 0; row < height; ++row) { - _mesa_swizzle_and_convert(dst, dst_type, dst_num_channels, - tmp_ubyte + row * width, - MESA_ARRAY_FORMAT_TYPE_UBYTE, 4, - rgba2dst, normalized, width); - dst += dst_stride; + if (dst_format_is_mesa_array_format) { + for (row = 0; row < height; ++row) { + _mesa_swizzle_and_convert(dst, dst_type, dst_num_channels, + tmp_ubyte + row * width, + MESA_ARRAY_FORMAT_TYPE_UBYTE, 4, + rgba2dst, normalized, width); + dst += dst_stride; + } + } else { + for (row = 0; row < height; ++row) { + _mesa_pack_ubyte_rgba_row(dst_format, width, + (const uint8_t *)(tmp_ubyte + row * width), dst); + dst += dst_stride; + } } - } else { - for (row = 0; row < height; ++row) { - _mesa_pack_ubyte_rgba_row(dst_format, width, - (const uint8_t *)(tmp_ubyte + row * width), dst); - dst += dst_stride; - } - } - free(tmp_ubyte); + free(tmp_ubyte); + } } }