tu: Match SW color clear value packing with HW

Color clears may happen via different paths: BLIT_EVENT_CLEAR, R2D, or a draw call. And which path to take may depend sysmem/gmem selection. The "Appendix I: Invariance" of the Vulkan spec encourages implementations to produce the same results for the same operation. Unfortunately I haven't found any ready-made packing functions in the common utils. Tested by writing edge-case color values through Vulkan ways of clearing color, and from fragment shader. E5B9G9R9, B10G11R11, B5G5R5, A2R10G10B10 are not handled due to complexity. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Assisted-by: OpenAI Codex (GPT-5.5) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41972>
2026-06-21 01:38:23 +02:00 · 2026-06-02 15:45:52 +02:00 · 2026-06-02 15:45:52 +02:00 · 38a10950e3
commit 38a10950e3
parent 6a7d3020a7
3 changed files with 121 additions and 13 deletions
--- a/src/freedreno/ci/traces-freedreno-restricted.toml
+++ b/src/freedreno/ci/traces-freedreno-restricted.toml
@ -590,8 +590,8 @@ freedreno-a750 = { checksum = "66854c4c188d6c31b7eee767785096313295a09a8ca3c71ea
 path = "steam-d3d9/Oblivion_unknown_dx9_unknown_unknown_none2.trace"
 nonloopable = true
 [traces.devices]
-freedreno-a660 = { checksum = "a329851d66e946d5b5eeffca4d81e3984a421dd6dab97b50a177a9a420ede165" }
-freedreno-a750 = { checksum = "a329851d66e946d5b5eeffca4d81e3984a421dd6dab97b50a177a9a420ede165" }
+freedreno-a660 = { checksum = "2830bf18501492f5b453fb15874c438810b08d025dfce5a26f9be845d558081a" }
+freedreno-a750 = { checksum = "2830bf18501492f5b453fb15874c438810b08d025dfce5a26f9be845d558081a" }

 [[traces]]
 path = "steam-d3d9/Oblivion_unknown_dx9_unknown_unknown_none.trace"
@ -1248,7 +1248,7 @@ freedreno-a660 = { checksum = "8be08d609a38c2410a4abcb79c421b89df0b149bd4a7c3a72
 path = "steam-d3d11/unigine_heaven_1.rdc"
 sysmem = 800
 [traces.devices]
-freedreno-a660 = { checksum = "9f773449f8a418a80ba28fd2c8242b2f31bdcae484fb170fcebfee54afcdb0b7" }
+freedreno-a660 = { checksum = "6dcde8a286917af66f23ecdd81eecfc7e412d7e305d6e6657e639802eac159ef" }

 [[traces]]
 path = "steam-d3d11/unigine_superposition_1.rdc"
--- a/src/freedreno/ci/traces-freedreno.yml
+++ b/src/freedreno/ci/traces-freedreno.yml
@ -297,7 +297,7 @@ traces:
    freedreno-a618:
      checksum: b0ac8fb4b2b2268cfb87944730125886
    zink-a618:
-      checksum: bd713b79eaa2a7ebbf4ebebab3577e40
+      checksum: 47c816354d1ea4dcb64827394abf089e

  humus/DynamicBranching3-v2.trace:
    freedreno-a306:
--- a/src/freedreno/vulkan/tu_clear_blit.cc
+++ b/src/freedreno/vulkan/tu_clear_blit.cc
@ -28,21 +28,118 @@

 static const VkOffset2D blt_no_coord = { ~0, ~0 };

+/* The helpers below quantize floats to match shader export behavior and avoid
+ * rounding mismatches between hardware paths (R2D blit engine, 3D pipeline,
+ * etc.).
+ *
+ * Vulkan does not guarantee that values written through different commands
+ * will match. However, "Appendix I: Invariance" encourages implementations to
+ * return the same values for the same operations with the same inputs. We would
+ * otherwise violate that because GMEM and sysmem clears use different paths,
+ * and CmdClearAttachments can use either HW clears or 3D clears.
+ */
+
 static uint32_t
 tu_pack_float32_for_unorm(float val, int bits)
 {
-   return _mesa_lroundevenf(CLAMP(val, 0.0f, 1.0f) * (float) ((1 << bits) - 1));
+   val = CLAMP(val, 0.0f, 1.0f);
+
+   uint32_t m = BITFIELD_MASK(bits < 8 ? 8 : bits);
+
+   if (val >= 1.0f)
+      return BITFIELD_MASK(bits);
+
+   float scaled = nextafterf(val * (float) m, INFINITY) + 0.5f;
+   uint32_t result = MIN2((uint32_t) floorf(scaled), m);
+
+   if (bits < 8)
+      result >>= (8 - bits);
+   return result;
 }

-/* Quantize a float to exact UNORMn precision to avoid F32->UNORMn rounding
- * mismatches between different HW paths (R2D blit engine, 3D pipeline, etc).
- */
 static float
-tu_quantize_float_for_unorm(float val, int bits)
+tu_quantize_float32_for_unorm(float val, int bits)
 {
   return (float) tu_pack_float32_for_unorm(val, bits) / (float) ((1 << bits) - 1);
 }

+static int32_t
+tu_pack_float32_for_snorm(float val, int bits)
+{
+   val = CLAMP(val, -1.0f, 1.0f);
+
+   int32_t m = BITFIELD_MASK(bits - 1);
+   float scale = nextafterf((float) m, INFINITY);
+
+   if (val >= 0.0f) {
+      double scaled = (double) val * (double) scale + 0.5;
+      return MIN2((int32_t) floor(scaled), m);
+   } else {
+      double scaled = (double) val * (double) scale - 0.5;
+      return MAX2((int32_t) ceil(scaled), -m);
+   }
+}
+
+static float
+tu_quantize_float32_for_snorm(float val, int bits)
+{
+   return (float) tu_pack_float32_for_snorm(val, bits) / (float) BITFIELD_MASK(bits - 1);
+}
+
+static bool
+tu_pack_float32_for_color(enum pipe_format format, const float src[4], uint32_t clear_value[4])
+{
+   const struct util_format_description *desc = util_format_description(format);
+
+   if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || desc->block.bits > 64)
+      return false;
+
+   bool is_normalized = desc->is_unorm || desc->is_snorm;
+   bool is_float16 = util_format_is_float16(format);
+   unsigned bits = util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X);
+
+   switch (bits) {
+   case 4:
+   case 8:
+   case 10:
+      if (!is_normalized)
+         return false;
+      break;
+   case 16:
+      if (!is_normalized && !is_float16)
+         return false;
+      break;
+   default:
+      return false;
+   }
+
+   uint64_t packed = 0;
+   for (unsigned i = 0; i < 4; i++) {
+      if (desc->swizzle[i] > PIPE_SWIZZLE_W)
+         continue;
+
+      const struct util_format_channel_description *ch = &desc->channel[i];
+      uint32_t packed_ch = 0;
+
+      if (is_normalized && ch->type == UTIL_FORMAT_TYPE_UNSIGNED) {
+         packed_ch = tu_pack_float32_for_unorm(src[i], ch->size);
+      } else if (is_normalized && ch->type == UTIL_FORMAT_TYPE_SIGNED) {
+         packed_ch = (uint32_t) tu_pack_float32_for_snorm(src[i], ch->size) & BITFIELD_MASK(ch->size);
+      } else if (is_float16) {
+         packed_ch = _mesa_float_to_float16_rtz(src[i]);
+      } else {
+         UNREACHABLE("unsupported format");
+      }
+      packed |= (uint64_t) packed_ch << ch->shift;
+   }
+
+   clear_value[0] = (uint32_t) packed;
+   clear_value[1] = (uint32_t) (packed >> 32);
+   clear_value[2] = 0;
+   clear_value[3] = 0;
+   return true;
+}
+
 static uint32_t
 tu_pack_float32_for_unorm_depth(float val, unsigned bits)
 {
@ -255,15 +352,24 @@ r2d_clear_value(struct tu_cmd_buffer *cmd,
               linear = util_format_linear_to_srgb_float(val->color.float32[i]);

            if (ch->type == UTIL_FORMAT_TYPE_SIGNED)
-               clear_value[i] = _mesa_lroundevenf(CLAMP(linear, -1.0f, 1.0f) * 127.0f);
+               clear_value[i] = tu_pack_float32_for_snorm(linear, 8);
            else
               clear_value[i] = tu_pack_float32_for_unorm(linear, 8);
         } else if (ifmt == R2D_FLOAT16) {
-            clear_value[i] = _mesa_float_to_half(val->color.float32[i]);
+            clear_value[i] = _mesa_float_to_float16_rtz(val->color.float32[i]);
         } else {
            assert(ifmt == R2D_FLOAT32 || ifmt == R2D_INT32 ||
                   ifmt == R2D_INT16 || ifmt == R2D_INT8);
-            clear_value[i] = val->color.uint32[i];
+            if (ifmt == R2D_FLOAT32 && ch->normalized) {
+               if (ch->type == UTIL_FORMAT_TYPE_UNSIGNED)
+                  clear_value[i] = fui(tu_quantize_float32_for_unorm(val->color.float32[i], ch->size));
+               else if (ch->type == UTIL_FORMAT_TYPE_SIGNED)
+                  clear_value[i] = fui(tu_quantize_float32_for_snorm(val->color.float32[i], ch->size));
+               else
+                  clear_value[i] = val->color.uint32[i];
+            } else {
+               clear_value[i] = val->color.uint32[i];
+            }
         }
      }
      break;
@ -2103,6 +2209,9 @@ pack_blit_event_clear_value(const VkClearValue *val, enum pipe_format format, ui
         tmp[i] = util_format_linear_to_srgb_float(tmp[i]);
   }

+   if (tu_pack_float32_for_color(format, tmp, clear_value))
+      return;
+
 #define PACK_F(type) util_format_##type##_pack_rgba_float \
   ( (uint8_t*) &clear_value[0], 0, tmp, 0, 1, 1)
   switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) {
@ -6352,4 +6461,3 @@ tu_blit_subsampled_apron(struct tu_cmd_buffer *cmd,
   }
 }
 TU_GENX(tu_blit_subsampled_apron);
-