util: Fix big-endian handling of z/s formats.

channel datatypes in Mesa are the host's endianness. For example, PIPE_FORMAT_R32_UINT doesn't do a bswap in and out in u_format_table.c's pack/unpack functions. So, z32_unorm shouldn't be byte swapping either, and neither should z24s8 which is also a packed format, and once you've got those it becomes clear that all of the swaps in this file were mistaken. Things would mostly work out because it's unusual to read/write Z/S data through the GL API, and even for drivers like softpipe as long as the pack and unpack both swap it could work anyway. However, the bug would be visible in glReadPixels() with the matching datatype which would hit the memcpy fastpath without doing another swap. Caught by a mesa/main unit test on transitioning to using these pack/unpack functions. Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Adam Jackson <ajax@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10336>
2026-04-30 23:28:06 +02:00 · 2021-04-19 15:58:06 -07:00 · 2021-04-19 15:58:06 -07:00 · 8bd91d1368
commit 8bd91d1368
parent 3ccd0891d3
1 changed files with 35 additions and 62 deletions
--- a/src/util/format/u_format_zs.c
+++ b/src/util/format/u_format_zs.c
@ -146,8 +146,7 @@ util_format_z16_unorm_unpack_z_float(float *restrict dst_row, unsigned dst_strid
      float *dst = dst_row;
      const uint16_t *src = (const uint16_t *)src_row;
      for(x = 0; x < width; ++x) {
-         uint16_t value = util_cpu_to_le16(*src++);
-         *dst++ = z16_unorm_to_z32_float(value);
+         *dst++ = z16_unorm_to_z32_float(*src++);
      }
      src_row += src_stride/sizeof(*src_row);
      dst_row += dst_stride/sizeof(*dst_row);
@ -164,9 +163,7 @@ util_format_z16_unorm_pack_z_float(uint8_t *restrict dst_row, unsigned dst_strid
      const float *src = src_row;
      uint16_t *dst = (uint16_t *)dst_row;
      for(x = 0; x < width; ++x) {
-         uint16_t value;
-         value = z32_float_to_z16_unorm(*src++);
-         *dst++ = util_le16_to_cpu(value);
+         *dst++ = z32_float_to_z16_unorm(*src++);
      }
      dst_row += dst_stride/sizeof(*dst_row);
      src_row += src_stride/sizeof(*src_row);
@ -183,8 +180,7 @@ util_format_z16_unorm_unpack_z_32unorm(uint32_t *restrict dst_row, unsigned dst_
      uint32_t *dst = dst_row;
      const uint16_t *src = (const uint16_t *)src_row;
      for(x = 0; x < width; ++x) {
-         uint16_t value = util_cpu_to_le16(*src++);
-         *dst++ = z16_unorm_to_z32_unorm(value);
+         *dst++ = z16_unorm_to_z32_unorm(*src++);
      }
      src_row += src_stride/sizeof(*src_row);
      dst_row += dst_stride/sizeof(*dst_row);
@ -201,9 +197,7 @@ util_format_z16_unorm_pack_z_32unorm(uint8_t *restrict dst_row, unsigned dst_str
      const uint32_t *src = src_row;
      uint16_t *dst = (uint16_t *)dst_row;
      for(x = 0; x < width; ++x) {
-         uint16_t value;
-         value = z32_unorm_to_z16_unorm(*src++);
-         *dst++ = util_le16_to_cpu(value);
+         *dst++ = z32_unorm_to_z16_unorm(*src++);
      }
      dst_row += dst_stride/sizeof(*dst_row);
      src_row += src_stride/sizeof(*src_row);
@ -220,8 +214,7 @@ util_format_z32_unorm_unpack_z_float(float *restrict dst_row, unsigned dst_strid
      float *dst = dst_row;
      const uint32_t *src = (const uint32_t *)src_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
-         *dst++ = z32_unorm_to_z32_float(value);
+         *dst++ = z32_unorm_to_z32_float(*src++);
      }
      src_row += src_stride/sizeof(*src_row);
      dst_row += dst_stride/sizeof(*dst_row);
@ -238,9 +231,7 @@ util_format_z32_unorm_pack_z_float(uint8_t *restrict dst_row, unsigned dst_strid
      const float *src = src_row;
      uint32_t *dst = (uint32_t *)dst_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value;
-         value = z32_float_to_z32_unorm(*src++);
-         *dst++ = util_le32_to_cpu(value);
+         *dst++ =z32_float_to_z32_unorm(*src++);
      }
      dst_row += dst_stride/sizeof(*dst_row);
      src_row += src_stride/sizeof(*src_row);
@ -392,8 +383,7 @@ util_format_z24_unorm_s8_uint_unpack_z_float(float *restrict dst_row, unsigned d
      float *dst = dst_row;
      const uint32_t *src = (const uint32_t *)src_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value =  util_cpu_to_le32(*src++);
-         *dst++ = z24_unorm_to_z32_float(value & 0xffffff);
+         *dst++ = z24_unorm_to_z32_float((*src++) & 0xffffff);
      }
      src_row += src_stride/sizeof(*src_row);
      dst_row += dst_stride/sizeof(*dst_row);
@ -410,10 +400,10 @@ util_format_z24_unorm_s8_uint_pack_z_float(uint8_t *restrict dst_row, unsigned d
      const float *src = src_row;
      uint32_t *dst = (uint32_t *)dst_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value = util_le32_to_cpu(*dst);
+         uint32_t value = *dst;
         value &= 0xff000000;
         value |= z32_float_to_z24_unorm(*src++);
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = value;
      }
      dst_row += dst_stride/sizeof(*dst_row);
      src_row += src_stride/sizeof(*src_row);
@ -431,8 +421,7 @@ util_format_z24_unorm_s8_uint_unpack_z24(uint8_t *restrict dst_row, unsigned dst
      uint32_t *dst = (uint32_t *)dst_row;
      const uint32_t *src = (const uint32_t *)src_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value =  util_cpu_to_le32(*src++);
-         *dst++ = (value & 0xffffff);
+         *dst++ = ((*src++) & 0xffffff);
      }
      src_row += src_stride/sizeof(*src_row);
      dst_row += dst_stride/sizeof(*dst_row);
@ -449,11 +438,11 @@ util_format_z24_unorm_s8_uint_pack_z24(uint8_t *restrict dst_row, unsigned dst_s
      const uint32_t *src = (const uint32_t *)src_row;
      uint32_t *dst = (uint32_t *)dst_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value = util_le32_to_cpu(*dst);
+         uint32_t value = *dst;
         value &= 0xff000000;
         value |= *src & 0xffffff;
         src++;
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = value;
      }
      dst_row += dst_stride/sizeof(*dst_row);
      src_row += src_stride/sizeof(*src_row);
@ -470,8 +459,7 @@ util_format_z24_unorm_s8_uint_unpack_z_32unorm(uint32_t *restrict dst_row, unsig
      uint32_t *dst = dst_row;
      const uint32_t *src = (const uint32_t *)src_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
-         *dst++ = z24_unorm_to_z32_unorm(value & 0xffffff);
+         *dst++ = z24_unorm_to_z32_unorm((*src++) & 0xffffff);
      }
      src_row += src_stride/sizeof(*src_row);
      dst_row += dst_stride/sizeof(*dst_row);
@ -488,10 +476,10 @@ util_format_z24_unorm_s8_uint_pack_z_32unorm(uint8_t *restrict dst_row, unsigned
      const uint32_t *src = src_row;
      uint32_t *dst = (uint32_t *)dst_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value = util_le32_to_cpu(*dst);
+         uint32_t value = *dst;
         value &= 0xff000000;
         value |= z32_unorm_to_z24_unorm(*src++);
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = value;
      }
      dst_row += dst_stride/sizeof(*dst_row);
      src_row += src_stride/sizeof(*src_row);
@ -508,8 +496,7 @@ util_format_z24_unorm_s8_uint_unpack_s_8uint(uint8_t *restrict dst_row, unsigned
      uint8_t *dst = dst_row;
      const uint32_t *src = (const uint32_t *)src_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
-         *dst++ = value >> 24;
+         *dst++ = (*src++) >> 24;
      }
      src_row += src_stride/sizeof(*src_row);
      dst_row += dst_stride/sizeof(*dst_row);
@ -529,7 +516,7 @@ util_format_z24_unorm_s8_uint_pack_s_8uint(uint8_t *restrict dst_row, unsigned d
         uint32_t value = util_le32_to_cpu(*dst);
         value &= 0x00ffffff;
         value |= (uint32_t)*src++ << 24;
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = value;
      }
      dst_row += dst_stride/sizeof(*dst_row);
      src_row += src_stride/sizeof(*src_row);
@ -566,8 +553,7 @@ util_format_s8_uint_z24_unorm_unpack_z_float(float *restrict dst_row, unsigned d
      float *dst = dst_row;
      const uint32_t *src = (const uint32_t *)src_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
-         *dst++ = z24_unorm_to_z32_float(value >> 8);
+         *dst++ = z24_unorm_to_z32_float((*src++) >> 8);
      }
      src_row += src_stride/sizeof(*src_row);
      dst_row += dst_stride/sizeof(*dst_row);
@ -584,10 +570,10 @@ util_format_s8_uint_z24_unorm_pack_z_float(uint8_t *restrict dst_row, unsigned d
      const float *src = src_row;
      uint32_t *dst = (uint32_t *)dst_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value = util_le32_to_cpu(*dst);
+         uint32_t value = *dst;
         value &= 0x000000ff;
         value |= z32_float_to_z24_unorm(*src++) << 8;
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = value;
      }
      dst_row += dst_stride/sizeof(*dst_row);
      src_row += src_stride/sizeof(*src_row);
@ -604,7 +590,7 @@ util_format_s8_uint_z24_unorm_unpack_z_32unorm(uint32_t *restrict dst_row, unsig
      uint32_t *dst = dst_row;
      const uint32_t *src = (const uint32_t *)src_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
+         uint32_t value = *src++;
         *dst++ = z24_unorm_to_z32_unorm(value >> 8);
      }
      src_row += src_stride/sizeof(*src_row);
@ -622,10 +608,10 @@ util_format_s8_uint_z24_unorm_pack_z_32unorm(uint8_t *restrict dst_row, unsigned
      const uint32_t *src = src_row;
      uint32_t *dst = (uint32_t *)dst_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value = util_le32_to_cpu(*dst);
+         uint32_t value = *dst;
         value &= 0x000000ff;
         value |= *src++ & 0xffffff00;
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = value;
      }
      dst_row += dst_stride/sizeof(*dst_row);
      src_row += src_stride/sizeof(*src_row);
@ -642,8 +628,7 @@ util_format_s8_uint_z24_unorm_unpack_s_8uint(uint8_t *restrict dst_row, unsigned
      uint8_t *dst = dst_row;
      const uint32_t *src = (const uint32_t *)src_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
-         *dst++ = value & 0xff;
+         *dst++ = (*src++) & 0xff;
      }
      src_row += src_stride/sizeof(*src_row);
      dst_row += dst_stride/sizeof(*dst_row);
@ -660,10 +645,10 @@ util_format_s8_uint_z24_unorm_pack_s_8uint(uint8_t *restrict dst_row, unsigned d
      const uint8_t *src = src_row;
      uint32_t *dst = (uint32_t *)dst_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value = util_le32_to_cpu(*dst);
+         uint32_t value = *dst;
         value &= 0xffffff00;
         value |= *src++;
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = value;
      }
      dst_row += dst_stride/sizeof(*dst_row);
      src_row += src_stride/sizeof(*src_row);
@ -680,8 +665,7 @@ util_format_z24x8_unorm_unpack_z_float(float *restrict dst_row, unsigned dst_str
      float *dst = dst_row;
      const uint32_t *src = (const uint32_t *)src_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
-         *dst++ = z24_unorm_to_z32_float(value & 0xffffff);
+         *dst++ = z24_unorm_to_z32_float((*src++) & 0xffffff);
      }
      src_row += src_stride/sizeof(*src_row);
      dst_row += dst_stride/sizeof(*dst_row);
@ -698,9 +682,7 @@ util_format_z24x8_unorm_pack_z_float(uint8_t *restrict dst_row, unsigned dst_str
      const float *src = src_row;
      uint32_t *dst = (uint32_t *)dst_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value;
-         value = z32_float_to_z24_unorm(*src++);
-         *dst++ = util_le32_to_cpu(value);
+         *dst++ = z32_float_to_z24_unorm(*src++);
      }
      dst_row += dst_stride/sizeof(*dst_row);
      src_row += src_stride/sizeof(*src_row);
@ -717,8 +699,7 @@ util_format_z24x8_unorm_unpack_z_32unorm(uint32_t *restrict dst_row, unsigned ds
      uint32_t *dst = dst_row;
      const uint32_t *src = (const uint32_t *)src_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
-         *dst++ = z24_unorm_to_z32_unorm(value & 0xffffff);
+         *dst++ = z24_unorm_to_z32_unorm((*src++) & 0xffffff);
      }
      src_row += src_stride/sizeof(*src_row);
      dst_row += dst_stride/sizeof(*dst_row);
@ -735,9 +716,7 @@ util_format_z24x8_unorm_pack_z_32unorm(uint8_t *restrict dst_row, unsigned dst_s
      const uint32_t *src = src_row;
      uint32_t *dst = (uint32_t *)dst_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value;
-         value = z32_unorm_to_z24_unorm(*src++);
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = z32_unorm_to_z24_unorm(*src++);
      }
      dst_row += dst_stride/sizeof(*dst_row);
      src_row += src_stride/sizeof(*src_row);
@ -754,8 +733,7 @@ util_format_x8z24_unorm_unpack_z_float(float *restrict dst_row, unsigned dst_str
      float *dst = dst_row;
      const uint32_t *src = (uint32_t *)src_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
-         *dst++ = z24_unorm_to_z32_float(value >> 8);
+         *dst++ = z24_unorm_to_z32_float((*src++) >> 8);
      }
      src_row += src_stride/sizeof(*src_row);
      dst_row += dst_stride/sizeof(*dst_row);
@ -772,9 +750,7 @@ util_format_x8z24_unorm_pack_z_float(uint8_t *restrict dst_row, unsigned dst_str
      const float *src = src_row;
      uint32_t *dst = (uint32_t *)dst_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value;
-         value = z32_float_to_z24_unorm(*src++) << 8;
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = z32_float_to_z24_unorm(*src++) << 8;
      }
      dst_row += dst_stride/sizeof(*dst_row);
      src_row += src_stride/sizeof(*src_row);
@ -791,8 +767,7 @@ util_format_x8z24_unorm_unpack_z_32unorm(uint32_t *restrict dst_row, unsigned ds
      uint32_t *dst = dst_row;
      const uint32_t *src = (const uint32_t *)src_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value = util_cpu_to_le32(*src++);
-         *dst++ = z24_unorm_to_z32_unorm(value >> 8);
+         *dst++ = z24_unorm_to_z32_unorm((*src++) >> 8);
      }
      src_row += src_stride/sizeof(*src_row);
      dst_row += dst_stride/sizeof(*dst_row);
@ -809,9 +784,7 @@ util_format_x8z24_unorm_pack_z_32unorm(uint8_t *restrict dst_row, unsigned dst_s
      const uint32_t *src = src_row;
      uint32_t *dst = (uint32_t *)dst_row;
      for(x = 0; x < width; ++x) {
-         uint32_t value;
-         value = z32_unorm_to_z24_unorm(*src++) << 8;
-         *dst++ = util_cpu_to_le32(value);
+         *dst++ = z32_unorm_to_z24_unorm(*src++) << 8;
      }
      dst_row += dst_stride/sizeof(*dst_row);
      src_row += src_stride/sizeof(*src_row);
@ -921,7 +894,7 @@ util_format_z32_float_s8x24_uint_pack_s_8uint(uint8_t *restrict dst_row, unsigne
      const uint8_t *src = src_row;
      uint32_t *dst = ((uint32_t *)dst_row) + 1;
      for(x = 0; x < width; ++x) {
-         *dst = util_cpu_to_le32(*src);
+         *dst = *src;
         src += 1;
         dst += 2;
      }