mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 05:10:11 +01:00
Merge branch 'main' into 'main'
mesa: main: 16-bit Pixel Conversion Improvement See merge request mesa/mesa!38531
This commit is contained in:
commit
1ad0dc9c05
1 changed files with 106 additions and 36 deletions
|
|
@ -23,6 +23,9 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#ifdef __ARM_NEON__
|
||||||
|
#include <arm_neon.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "errors.h"
|
#include "errors.h"
|
||||||
#include "format_utils.h"
|
#include "format_utils.h"
|
||||||
|
|
@ -135,6 +138,65 @@ compute_src2dst_component_mapping(uint8_t *src2rgba, uint8_t *rgba2dst,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Convert pixel format A1B5G5R5 to B5G5R5A1.
|
||||||
|
*
|
||||||
|
* This is used to improve performance for 16bit pixel formats.
|
||||||
|
*/
|
||||||
|
static __inline void
|
||||||
|
conversion_16bit_argb2rgba(uint16_t *restrict dst,
|
||||||
|
const uint16_t *restrict src,
|
||||||
|
size_t num_pixels)
|
||||||
|
{
|
||||||
|
size_t i = 0;
|
||||||
|
|
||||||
|
#ifdef __ARM_NEON__
|
||||||
|
size_t j;
|
||||||
|
uint16x8_t pix_in;
|
||||||
|
uint16x8_t pix_out;
|
||||||
|
uint16x8_t rgb;
|
||||||
|
uint16x8_t a;
|
||||||
|
size_t loop_num = num_pixels / 32;
|
||||||
|
|
||||||
|
for (j = 0; j < loop_num; j++) {
|
||||||
|
pix_in = vld1q_u16(src + i);
|
||||||
|
rgb = vshrq_n_u16(pix_in, 1);
|
||||||
|
a = vshlq_n_u16(pix_in, 15);
|
||||||
|
pix_out = vorrq_u16(rgb, a);
|
||||||
|
vst1q_u16(dst + i, pix_out);
|
||||||
|
i += 8;
|
||||||
|
|
||||||
|
pix_in = vld1q_u16(src + i);
|
||||||
|
rgb = vshrq_n_u16(pix_in, 1);
|
||||||
|
a = vshlq_n_u16(pix_in, 15);
|
||||||
|
pix_out = vorrq_u16(rgb, a);
|
||||||
|
vst1q_u16(dst + i, pix_out);
|
||||||
|
i += 8;
|
||||||
|
|
||||||
|
pix_in = vld1q_u16(src + i);
|
||||||
|
rgb = vshrq_n_u16(pix_in, 1);
|
||||||
|
a = vshlq_n_u16(pix_in, 15);
|
||||||
|
pix_out = vorrq_u16(rgb, a);
|
||||||
|
vst1q_u16(dst + i, pix_out);
|
||||||
|
i += 8;
|
||||||
|
|
||||||
|
pix_in = vld1q_u16(src + i);
|
||||||
|
rgb = vshrq_n_u16(pix_in, 1);
|
||||||
|
a = vshlq_n_u16(pix_in, 15);
|
||||||
|
pix_out = vorrq_u16(rgb, a);
|
||||||
|
vst1q_u16(dst + i, pix_out);
|
||||||
|
i += 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; i < num_pixels; ++i) {
|
||||||
|
dst[i] = src[i] >> 1 | src[i] << 15;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
for (i = 0; i < num_pixels; ++i) {
|
||||||
|
dst[i] = src[i] >> 1 | src[i] << 15;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This function is used by clients of _mesa_format_convert to obtain
|
* This function is used by clients of _mesa_format_convert to obtain
|
||||||
* the rebase swizzle to use in a format conversion based on the base
|
* the rebase swizzle to use in a format conversion based on the base
|
||||||
|
|
@ -274,6 +336,7 @@ convert_ubyte_rgba_to_bgra(size_t width, size_t height,
|
||||||
* the dst or the src -depending on whether we are doing
|
* the dst or the src -depending on whether we are doing
|
||||||
* an upload or a download respectively- are the same).
|
* an upload or a download respectively- are the same).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void
|
void
|
||||||
_mesa_format_convert(void *void_dst, uint32_t dst_format, size_t dst_stride,
|
_mesa_format_convert(void *void_dst, uint32_t dst_format, size_t dst_stride,
|
||||||
void *void_src, uint32_t src_format, size_t src_stride,
|
void *void_src, uint32_t src_format, size_t src_stride,
|
||||||
|
|
@ -620,49 +683,56 @@ _mesa_format_convert(void *void_dst, uint32_t dst_format, size_t dst_stride,
|
||||||
|
|
||||||
free(tmp_float);
|
free(tmp_float);
|
||||||
} else {
|
} else {
|
||||||
tmp_ubyte = malloc(width * height * sizeof(*tmp_ubyte));
|
if (src_format == PIPE_FORMAT_A1B5G5R5_UNORM && dst_format == PIPE_FORMAT_B5G5R5A1_UNORM) {
|
||||||
|
uint16_t *value_src_ptr = (uint16_t *) src;
|
||||||
|
uint16_t *value_dst_ptr = (uint16_t *) dst;
|
||||||
|
|
||||||
|
conversion_16bit_argb2rgba(value_dst_ptr, value_src_ptr, width * height);
|
||||||
|
|
||||||
if (src_format_is_mesa_array_format) {
|
|
||||||
compute_rebased_rgba_component_mapping(src2rgba, rebase_swizzle,
|
|
||||||
rebased_src2rgba);
|
|
||||||
for (row = 0; row < height; ++row) {
|
|
||||||
_mesa_swizzle_and_convert(tmp_ubyte + row * width,
|
|
||||||
MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
|
|
||||||
src, src_type, src_num_channels,
|
|
||||||
rebased_src2rgba, normalized, width);
|
|
||||||
src += src_stride;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
for (row = 0; row < height; ++row) {
|
tmp_ubyte = malloc(width * height * sizeof(*tmp_ubyte));
|
||||||
_mesa_unpack_ubyte_rgba_row(src_format, width,
|
if (src_format_is_mesa_array_format) {
|
||||||
src, tmp_ubyte + row * width);
|
compute_rebased_rgba_component_mapping(src2rgba, rebase_swizzle,
|
||||||
if (rebase_swizzle)
|
rebased_src2rgba);
|
||||||
|
for (row = 0; row < height; ++row) {
|
||||||
_mesa_swizzle_and_convert(tmp_ubyte + row * width,
|
_mesa_swizzle_and_convert(tmp_ubyte + row * width,
|
||||||
MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
|
MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
|
||||||
tmp_ubyte + row * width,
|
src, src_type, src_num_channels,
|
||||||
MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
|
rebased_src2rgba, normalized, width);
|
||||||
rebase_swizzle, normalized, width);
|
src += src_stride;
|
||||||
src += src_stride;
|
}
|
||||||
|
} else {
|
||||||
|
for (row = 0; row < height; ++row) {
|
||||||
|
_mesa_unpack_ubyte_rgba_row(src_format, width,
|
||||||
|
src, tmp_ubyte + row * width);
|
||||||
|
if (rebase_swizzle)
|
||||||
|
_mesa_swizzle_and_convert(tmp_ubyte + row * width,
|
||||||
|
MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
|
||||||
|
tmp_ubyte + row * width,
|
||||||
|
MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
|
||||||
|
rebase_swizzle, normalized, width);
|
||||||
|
src += src_stride;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (dst_format_is_mesa_array_format) {
|
if (dst_format_is_mesa_array_format) {
|
||||||
for (row = 0; row < height; ++row) {
|
for (row = 0; row < height; ++row) {
|
||||||
_mesa_swizzle_and_convert(dst, dst_type, dst_num_channels,
|
_mesa_swizzle_and_convert(dst, dst_type, dst_num_channels,
|
||||||
tmp_ubyte + row * width,
|
tmp_ubyte + row * width,
|
||||||
MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
|
MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
|
||||||
rgba2dst, normalized, width);
|
rgba2dst, normalized, width);
|
||||||
dst += dst_stride;
|
dst += dst_stride;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (row = 0; row < height; ++row) {
|
||||||
|
_mesa_pack_ubyte_rgba_row(dst_format, width,
|
||||||
|
(const uint8_t *)(tmp_ubyte + row * width), dst);
|
||||||
|
dst += dst_stride;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
for (row = 0; row < height; ++row) {
|
|
||||||
_mesa_pack_ubyte_rgba_row(dst_format, width,
|
|
||||||
(const uint8_t *)(tmp_ubyte + row * width), dst);
|
|
||||||
dst += dst_stride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
free(tmp_ubyte);
|
free(tmp_ubyte);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue