tu: Match SW color clear value packing with HW

Color clears may happen via different paths: BLIT_EVENT_CLEAR, R2D, or a
draw call. And which path to take may depend sysmem/gmem selection.

The "Appendix I: Invariance" of the Vulkan spec encourages implementations
to produce the same results for the same operation.

Unfortunately I haven't found any ready-made packing functions in
the common utils.

Tested by writing edge-case color values through Vulkan ways of
clearing color, and from fragment shader.

E5B9G9R9, B10G11R11, B5G5R5, A2R10G10B10 are not handled due to
complexity.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Assisted-by: OpenAI Codex (GPT-5.5)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41972>
This commit is contained in:
Danylo Piliaiev 2026-06-02 15:45:52 +02:00 committed by Marge Bot
parent 6a7d3020a7
commit 38a10950e3
3 changed files with 121 additions and 13 deletions

View file

@ -590,8 +590,8 @@ freedreno-a750 = { checksum = "66854c4c188d6c31b7eee767785096313295a09a8ca3c71ea
path = "steam-d3d9/Oblivion_unknown_dx9_unknown_unknown_none2.trace"
nonloopable = true
[traces.devices]
freedreno-a660 = { checksum = "a329851d66e946d5b5eeffca4d81e3984a421dd6dab97b50a177a9a420ede165" }
freedreno-a750 = { checksum = "a329851d66e946d5b5eeffca4d81e3984a421dd6dab97b50a177a9a420ede165" }
freedreno-a660 = { checksum = "2830bf18501492f5b453fb15874c438810b08d025dfce5a26f9be845d558081a" }
freedreno-a750 = { checksum = "2830bf18501492f5b453fb15874c438810b08d025dfce5a26f9be845d558081a" }
[[traces]]
path = "steam-d3d9/Oblivion_unknown_dx9_unknown_unknown_none.trace"
@ -1248,7 +1248,7 @@ freedreno-a660 = { checksum = "8be08d609a38c2410a4abcb79c421b89df0b149bd4a7c3a72
path = "steam-d3d11/unigine_heaven_1.rdc"
sysmem = 800
[traces.devices]
freedreno-a660 = { checksum = "9f773449f8a418a80ba28fd2c8242b2f31bdcae484fb170fcebfee54afcdb0b7" }
freedreno-a660 = { checksum = "6dcde8a286917af66f23ecdd81eecfc7e412d7e305d6e6657e639802eac159ef" }
[[traces]]
path = "steam-d3d11/unigine_superposition_1.rdc"

View file

@ -297,7 +297,7 @@ traces:
freedreno-a618:
checksum: b0ac8fb4b2b2268cfb87944730125886
zink-a618:
checksum: bd713b79eaa2a7ebbf4ebebab3577e40
checksum: 47c816354d1ea4dcb64827394abf089e
humus/DynamicBranching3-v2.trace:
freedreno-a306:

View file

@ -28,21 +28,118 @@
static const VkOffset2D blt_no_coord = { ~0, ~0 };
/* The helpers below quantize floats to match shader export behavior and avoid
* rounding mismatches between hardware paths (R2D blit engine, 3D pipeline,
* etc.).
*
* Vulkan does not guarantee that values written through different commands
* will match. However, "Appendix I: Invariance" encourages implementations to
* return the same values for the same operations with the same inputs. We would
* otherwise violate that because GMEM and sysmem clears use different paths,
* and CmdClearAttachments can use either HW clears or 3D clears.
*/
static uint32_t
tu_pack_float32_for_unorm(float val, int bits)
{
return _mesa_lroundevenf(CLAMP(val, 0.0f, 1.0f) * (float) ((1 << bits) - 1));
val = CLAMP(val, 0.0f, 1.0f);
uint32_t m = BITFIELD_MASK(bits < 8 ? 8 : bits);
if (val >= 1.0f)
return BITFIELD_MASK(bits);
float scaled = nextafterf(val * (float) m, INFINITY) + 0.5f;
uint32_t result = MIN2((uint32_t) floorf(scaled), m);
if (bits < 8)
result >>= (8 - bits);
return result;
}
/* Quantize a float to exact UNORMn precision to avoid F32->UNORMn rounding
* mismatches between different HW paths (R2D blit engine, 3D pipeline, etc).
*/
static float
tu_quantize_float_for_unorm(float val, int bits)
tu_quantize_float32_for_unorm(float val, int bits)
{
return (float) tu_pack_float32_for_unorm(val, bits) / (float) ((1 << bits) - 1);
}
static int32_t
tu_pack_float32_for_snorm(float val, int bits)
{
val = CLAMP(val, -1.0f, 1.0f);
int32_t m = BITFIELD_MASK(bits - 1);
float scale = nextafterf((float) m, INFINITY);
if (val >= 0.0f) {
double scaled = (double) val * (double) scale + 0.5;
return MIN2((int32_t) floor(scaled), m);
} else {
double scaled = (double) val * (double) scale - 0.5;
return MAX2((int32_t) ceil(scaled), -m);
}
}
static float
tu_quantize_float32_for_snorm(float val, int bits)
{
return (float) tu_pack_float32_for_snorm(val, bits) / (float) BITFIELD_MASK(bits - 1);
}
static bool
tu_pack_float32_for_color(enum pipe_format format, const float src[4], uint32_t clear_value[4])
{
const struct util_format_description *desc = util_format_description(format);
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || desc->block.bits > 64)
return false;
bool is_normalized = desc->is_unorm || desc->is_snorm;
bool is_float16 = util_format_is_float16(format);
unsigned bits = util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X);
switch (bits) {
case 4:
case 8:
case 10:
if (!is_normalized)
return false;
break;
case 16:
if (!is_normalized && !is_float16)
return false;
break;
default:
return false;
}
uint64_t packed = 0;
for (unsigned i = 0; i < 4; i++) {
if (desc->swizzle[i] > PIPE_SWIZZLE_W)
continue;
const struct util_format_channel_description *ch = &desc->channel[i];
uint32_t packed_ch = 0;
if (is_normalized && ch->type == UTIL_FORMAT_TYPE_UNSIGNED) {
packed_ch = tu_pack_float32_for_unorm(src[i], ch->size);
} else if (is_normalized && ch->type == UTIL_FORMAT_TYPE_SIGNED) {
packed_ch = (uint32_t) tu_pack_float32_for_snorm(src[i], ch->size) & BITFIELD_MASK(ch->size);
} else if (is_float16) {
packed_ch = _mesa_float_to_float16_rtz(src[i]);
} else {
UNREACHABLE("unsupported format");
}
packed |= (uint64_t) packed_ch << ch->shift;
}
clear_value[0] = (uint32_t) packed;
clear_value[1] = (uint32_t) (packed >> 32);
clear_value[2] = 0;
clear_value[3] = 0;
return true;
}
static uint32_t
tu_pack_float32_for_unorm_depth(float val, unsigned bits)
{
@ -255,15 +352,24 @@ r2d_clear_value(struct tu_cmd_buffer *cmd,
linear = util_format_linear_to_srgb_float(val->color.float32[i]);
if (ch->type == UTIL_FORMAT_TYPE_SIGNED)
clear_value[i] = _mesa_lroundevenf(CLAMP(linear, -1.0f, 1.0f) * 127.0f);
clear_value[i] = tu_pack_float32_for_snorm(linear, 8);
else
clear_value[i] = tu_pack_float32_for_unorm(linear, 8);
} else if (ifmt == R2D_FLOAT16) {
clear_value[i] = _mesa_float_to_half(val->color.float32[i]);
clear_value[i] = _mesa_float_to_float16_rtz(val->color.float32[i]);
} else {
assert(ifmt == R2D_FLOAT32 || ifmt == R2D_INT32 ||
ifmt == R2D_INT16 || ifmt == R2D_INT8);
clear_value[i] = val->color.uint32[i];
if (ifmt == R2D_FLOAT32 && ch->normalized) {
if (ch->type == UTIL_FORMAT_TYPE_UNSIGNED)
clear_value[i] = fui(tu_quantize_float32_for_unorm(val->color.float32[i], ch->size));
else if (ch->type == UTIL_FORMAT_TYPE_SIGNED)
clear_value[i] = fui(tu_quantize_float32_for_snorm(val->color.float32[i], ch->size));
else
clear_value[i] = val->color.uint32[i];
} else {
clear_value[i] = val->color.uint32[i];
}
}
}
break;
@ -2103,6 +2209,9 @@ pack_blit_event_clear_value(const VkClearValue *val, enum pipe_format format, ui
tmp[i] = util_format_linear_to_srgb_float(tmp[i]);
}
if (tu_pack_float32_for_color(format, tmp, clear_value))
return;
#define PACK_F(type) util_format_##type##_pack_rgba_float \
( (uint8_t*) &clear_value[0], 0, tmp, 0, 1, 1)
switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) {
@ -6352,4 +6461,3 @@ tu_blit_subsampled_apron(struct tu_cmd_buffer *cmd,
}
}
TU_GENX(tu_blit_subsampled_apron);