Merge branch 'main' into 'main'

mesa: main: 16-bit Pixel Conversion Improvement

See merge request mesa/mesa!38531
This commit is contained in:
Gustavo Vilches 2025-12-20 00:47:14 +00:00
commit 1ad0dc9c05

View file

@ -23,6 +23,9 @@
*/
#include <stdlib.h>
#ifdef __ARM_NEON__
#include <arm_neon.h>
#endif
#include "errors.h"
#include "format_utils.h"
@ -135,6 +138,65 @@ compute_src2dst_component_mapping(uint8_t *src2rgba, uint8_t *rgba2dst,
}
}
/* Convert pixel format A1B5G5R5 to B5G5R5A1.
*
* This is used to improve performance for 16bit pixel formats.
*/
static __inline void
conversion_16bit_argb2rgba(uint16_t *restrict dst,
const uint16_t *restrict src,
size_t num_pixels)
{
size_t i = 0;
#ifdef __ARM_NEON__
size_t j;
uint16x8_t pix_in;
uint16x8_t pix_out;
uint16x8_t rgb;
uint16x8_t a;
size_t loop_num = num_pixels / 32;
for (j = 0; j < loop_num; j++) {
pix_in = vld1q_u16(src + i);
rgb = vshrq_n_u16(pix_in, 1);
a = vshlq_n_u16(pix_in, 15);
pix_out = vorrq_u16(rgb, a);
vst1q_u16(dst + i, pix_out);
i += 8;
pix_in = vld1q_u16(src + i);
rgb = vshrq_n_u16(pix_in, 1);
a = vshlq_n_u16(pix_in, 15);
pix_out = vorrq_u16(rgb, a);
vst1q_u16(dst + i, pix_out);
i += 8;
pix_in = vld1q_u16(src + i);
rgb = vshrq_n_u16(pix_in, 1);
a = vshlq_n_u16(pix_in, 15);
pix_out = vorrq_u16(rgb, a);
vst1q_u16(dst + i, pix_out);
i += 8;
pix_in = vld1q_u16(src + i);
rgb = vshrq_n_u16(pix_in, 1);
a = vshlq_n_u16(pix_in, 15);
pix_out = vorrq_u16(rgb, a);
vst1q_u16(dst + i, pix_out);
i += 8;
}
for (; i < num_pixels; ++i) {
dst[i] = src[i] >> 1 | src[i] << 15;
}
#else
for (i = 0; i < num_pixels; ++i) {
dst[i] = src[i] >> 1 | src[i] << 15;
}
#endif
}
/**
* This function is used by clients of _mesa_format_convert to obtain
* the rebase swizzle to use in a format conversion based on the base
@ -274,6 +336,7 @@ convert_ubyte_rgba_to_bgra(size_t width, size_t height,
* the dst or the src -depending on whether we are doing
* an upload or a download respectively- are the same).
*/
void
_mesa_format_convert(void *void_dst, uint32_t dst_format, size_t dst_stride,
void *void_src, uint32_t src_format, size_t src_stride,
@ -620,8 +683,14 @@ _mesa_format_convert(void *void_dst, uint32_t dst_format, size_t dst_stride,
free(tmp_float);
} else {
tmp_ubyte = malloc(width * height * sizeof(*tmp_ubyte));
if (src_format == PIPE_FORMAT_A1B5G5R5_UNORM && dst_format == PIPE_FORMAT_B5G5R5A1_UNORM) {
uint16_t *value_src_ptr = (uint16_t *) src;
uint16_t *value_dst_ptr = (uint16_t *) dst;
conversion_16bit_argb2rgba(value_dst_ptr, value_src_ptr, width * height);
} else {
tmp_ubyte = malloc(width * height * sizeof(*tmp_ubyte));
if (src_format_is_mesa_array_format) {
compute_rebased_rgba_component_mapping(src2rgba, rebase_swizzle,
rebased_src2rgba);
@ -664,6 +733,7 @@ _mesa_format_convert(void *void_dst, uint32_t dst_format, size_t dst_stride,
free(tmp_ubyte);
}
}
}
/**