Merge branch 'main' into 'main'

mesa: main: 16-bit Pixel Conversion Improvement See merge request mesa/mesa!38531
2025-12-20 05:10:11 +01:00 · 2025-12-20 00:47:14 +00:00 · 2025-12-20 00:47:14 +00:00 · 1ad0dc9c05
commit 1ad0dc9c05
parent c430f394c5 53add8e753
1 changed files with 106 additions and 36 deletions
--- a/src/mesa/main/format_utils.c
+++ b/src/mesa/main/format_utils.c
@ -23,6 +23,9 @@
 */
 #include <stdlib.h>
 #ifdef __ARM_NEON__
 #include <arm_neon.h>
 #endif
 #include "errors.h"
 #include "format_utils.h"
@ -135,6 +138,65 @@ compute_src2dst_component_mapping(uint8_t *src2rgba, uint8_t *rgba2dst,
   }
 }
 /* Convert pixel format A1B5G5R5 to B5G5R5A1.
 *
 * This is used to improve performance for 16bit pixel formats.
 */
 static __inline void
 conversion_16bit_argb2rgba(uint16_t *restrict dst, 
                           const uint16_t *restrict src,
                           size_t num_pixels)
 {
   size_t i = 0;
 #ifdef __ARM_NEON__
   size_t j;
   uint16x8_t pix_in;
   uint16x8_t pix_out;
   uint16x8_t rgb;
   uint16x8_t a;
   size_t loop_num = num_pixels / 32;
   for (j = 0; j < loop_num; j++) {
      pix_in = vld1q_u16(src + i);
      rgb = vshrq_n_u16(pix_in, 1);
      a = vshlq_n_u16(pix_in, 15);
      pix_out = vorrq_u16(rgb, a);
      vst1q_u16(dst + i, pix_out);
      i += 8;
      pix_in = vld1q_u16(src + i);
      rgb = vshrq_n_u16(pix_in, 1);
      a = vshlq_n_u16(pix_in, 15);
      pix_out = vorrq_u16(rgb, a);
      vst1q_u16(dst + i, pix_out);
      i += 8;
      pix_in = vld1q_u16(src + i);
      rgb = vshrq_n_u16(pix_in, 1);
      a = vshlq_n_u16(pix_in, 15);
      pix_out = vorrq_u16(rgb, a);
      vst1q_u16(dst + i, pix_out);
      i += 8;
      pix_in = vld1q_u16(src + i);
      rgb = vshrq_n_u16(pix_in, 1);
      a = vshlq_n_u16(pix_in, 15);
      pix_out = vorrq_u16(rgb, a);
      vst1q_u16(dst + i, pix_out);
      i += 8;
   }
   for (; i < num_pixels; ++i) {
      dst[i] = src[i] >> 1 | src[i] << 15;
   }
 #else
   for (i = 0; i < num_pixels; ++i) {
      dst[i] = src[i] >> 1 | src[i] << 15;
   }
 #endif
 }
 /**
 * This function is used by clients of _mesa_format_convert to obtain
 * the rebase swizzle to use in a format conversion based on the base
@ -274,6 +336,7 @@ convert_ubyte_rgba_to_bgra(size_t width, size_t height,
 *                        the dst or the src -depending on whether we are doing
 *                        an upload or a download respectively- are the same).
 */
 void
 _mesa_format_convert(void *void_dst, uint32_t dst_format, size_t dst_stride,
                     void *void_src, uint32_t src_format, size_t src_stride,
@ -620,49 +683,56 @@ _mesa_format_convert(void *void_dst, uint32_t dst_format, size_t dst_stride,
      free(tmp_float);
   } else {
-      tmp_ubyte = malloc(width * height * sizeof(*tmp_ubyte));
+      if (src_format == PIPE_FORMAT_A1B5G5R5_UNORM && dst_format == PIPE_FORMAT_B5G5R5A1_UNORM) {
         uint16_t *value_src_ptr = (uint16_t *) src;
         uint16_t *value_dst_ptr = (uint16_t *) dst;
         conversion_16bit_argb2rgba(value_dst_ptr, value_src_ptr, width * height);
      if (src_format_is_mesa_array_format) {
         compute_rebased_rgba_component_mapping(src2rgba, rebase_swizzle,
                                                rebased_src2rgba);
         for (row = 0; row < height; ++row) {
            _mesa_swizzle_and_convert(tmp_ubyte + row * width,
                                      MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
                                      src, src_type, src_num_channels,
                                      rebased_src2rgba, normalized, width);
            src += src_stride;
         }
      } else {
-         for (row = 0; row < height; ++row) {
+         tmp_ubyte = malloc(width * height * sizeof(*tmp_ubyte));
-            _mesa_unpack_ubyte_rgba_row(src_format, width,
+         if (src_format_is_mesa_array_format) {
-                                        src, tmp_ubyte + row * width);
+            compute_rebased_rgba_component_mapping(src2rgba, rebase_swizzle,
-            if (rebase_swizzle)
+                                                   rebased_src2rgba);
            for (row = 0; row < height; ++row) {
               _mesa_swizzle_and_convert(tmp_ubyte + row * width,
-                                         MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
+                                       MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
-                                         tmp_ubyte + row * width,
+                                       src, src_type, src_num_channels,
-                                         MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
+                                       rebased_src2rgba, normalized, width);
-                                         rebase_swizzle, normalized, width);
+               src += src_stride;
-            src += src_stride;
+            }
         } else {
            for (row = 0; row < height; ++row) {
               _mesa_unpack_ubyte_rgba_row(src_format, width,
                                          src, tmp_ubyte + row * width);
               if (rebase_swizzle)
                  _mesa_swizzle_and_convert(tmp_ubyte + row * width,
                                          MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
                                          tmp_ubyte + row * width,
                                          MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
                                          rebase_swizzle, normalized, width);
               src += src_stride;
            }
         }
      }
-      if (dst_format_is_mesa_array_format) {
+         if (dst_format_is_mesa_array_format) {
-         for (row = 0; row < height; ++row) {
+            for (row = 0; row < height; ++row) {
-            _mesa_swizzle_and_convert(dst, dst_type, dst_num_channels,
+               _mesa_swizzle_and_convert(dst, dst_type, dst_num_channels,
-                                      tmp_ubyte + row * width,
+                                       tmp_ubyte + row * width,
-                                      MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
+                                       MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
-                                      rgba2dst, normalized, width);
+                                       rgba2dst, normalized, width);
-            dst += dst_stride;
+               dst += dst_stride;
            }
         } else {
            for (row = 0; row < height; ++row) {
               _mesa_pack_ubyte_rgba_row(dst_format, width,
                                       (const uint8_t *)(tmp_ubyte + row * width), dst);
               dst += dst_stride;
            }
         }
      } else {
         for (row = 0; row < height; ++row) {
            _mesa_pack_ubyte_rgba_row(dst_format, width,
                                      (const uint8_t *)(tmp_ubyte + row * width), dst);
            dst += dst_stride;
         }
      }
-      free(tmp_ubyte);
+         free(tmp_ubyte);
      }
   }
 }