diff --git a/src/freedreno/ci/traces-freedreno-restricted.toml b/src/freedreno/ci/traces-freedreno-restricted.toml index 2c2a7221bee..4fb7deba2f1 100644 --- a/src/freedreno/ci/traces-freedreno-restricted.toml +++ b/src/freedreno/ci/traces-freedreno-restricted.toml @@ -590,8 +590,8 @@ freedreno-a750 = { checksum = "66854c4c188d6c31b7eee767785096313295a09a8ca3c71ea path = "steam-d3d9/Oblivion_unknown_dx9_unknown_unknown_none2.trace" nonloopable = true [traces.devices] -freedreno-a660 = { checksum = "a329851d66e946d5b5eeffca4d81e3984a421dd6dab97b50a177a9a420ede165" } -freedreno-a750 = { checksum = "a329851d66e946d5b5eeffca4d81e3984a421dd6dab97b50a177a9a420ede165" } +freedreno-a660 = { checksum = "2830bf18501492f5b453fb15874c438810b08d025dfce5a26f9be845d558081a" } +freedreno-a750 = { checksum = "2830bf18501492f5b453fb15874c438810b08d025dfce5a26f9be845d558081a" } [[traces]] path = "steam-d3d9/Oblivion_unknown_dx9_unknown_unknown_none.trace" @@ -1248,7 +1248,7 @@ freedreno-a660 = { checksum = "8be08d609a38c2410a4abcb79c421b89df0b149bd4a7c3a72 path = "steam-d3d11/unigine_heaven_1.rdc" sysmem = 800 [traces.devices] -freedreno-a660 = { checksum = "9f773449f8a418a80ba28fd2c8242b2f31bdcae484fb170fcebfee54afcdb0b7" } +freedreno-a660 = { checksum = "6dcde8a286917af66f23ecdd81eecfc7e412d7e305d6e6657e639802eac159ef" } [[traces]] path = "steam-d3d11/unigine_superposition_1.rdc" diff --git a/src/freedreno/ci/traces-freedreno.yml b/src/freedreno/ci/traces-freedreno.yml index 49a6ec844a5..7f4cdb4770b 100644 --- a/src/freedreno/ci/traces-freedreno.yml +++ b/src/freedreno/ci/traces-freedreno.yml @@ -297,7 +297,7 @@ traces: freedreno-a618: checksum: b0ac8fb4b2b2268cfb87944730125886 zink-a618: - checksum: bd713b79eaa2a7ebbf4ebebab3577e40 + checksum: 47c816354d1ea4dcb64827394abf089e humus/DynamicBranching3-v2.trace: freedreno-a306: diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc index 742cc442327..ff7452a68c2 100644 --- a/src/freedreno/vulkan/tu_clear_blit.cc +++ b/src/freedreno/vulkan/tu_clear_blit.cc @@ -28,21 +28,118 @@ static const VkOffset2D blt_no_coord = { ~0, ~0 }; +/* The helpers below quantize floats to match shader export behavior and avoid + * rounding mismatches between hardware paths (R2D blit engine, 3D pipeline, + * etc.). + * + * Vulkan does not guarantee that values written through different commands + * will match. However, "Appendix I: Invariance" encourages implementations to + * return the same values for the same operations with the same inputs. We would + * otherwise violate that because GMEM and sysmem clears use different paths, + * and CmdClearAttachments can use either HW clears or 3D clears. + */ + static uint32_t tu_pack_float32_for_unorm(float val, int bits) { - return _mesa_lroundevenf(CLAMP(val, 0.0f, 1.0f) * (float) ((1 << bits) - 1)); + val = CLAMP(val, 0.0f, 1.0f); + + uint32_t m = BITFIELD_MASK(bits < 8 ? 8 : bits); + + if (val >= 1.0f) + return BITFIELD_MASK(bits); + + float scaled = nextafterf(val * (float) m, INFINITY) + 0.5f; + uint32_t result = MIN2((uint32_t) floorf(scaled), m); + + if (bits < 8) + result >>= (8 - bits); + return result; } -/* Quantize a float to exact UNORMn precision to avoid F32->UNORMn rounding - * mismatches between different HW paths (R2D blit engine, 3D pipeline, etc). - */ static float -tu_quantize_float_for_unorm(float val, int bits) +tu_quantize_float32_for_unorm(float val, int bits) { return (float) tu_pack_float32_for_unorm(val, bits) / (float) ((1 << bits) - 1); } +static int32_t +tu_pack_float32_for_snorm(float val, int bits) +{ + val = CLAMP(val, -1.0f, 1.0f); + + int32_t m = BITFIELD_MASK(bits - 1); + float scale = nextafterf((float) m, INFINITY); + + if (val >= 0.0f) { + double scaled = (double) val * (double) scale + 0.5; + return MIN2((int32_t) floor(scaled), m); + } else { + double scaled = (double) val * (double) scale - 0.5; + return MAX2((int32_t) ceil(scaled), -m); + } +} + +static float +tu_quantize_float32_for_snorm(float val, int bits) +{ + return (float) tu_pack_float32_for_snorm(val, bits) / (float) BITFIELD_MASK(bits - 1); +} + +static bool +tu_pack_float32_for_color(enum pipe_format format, const float src[4], uint32_t clear_value[4]) +{ + const struct util_format_description *desc = util_format_description(format); + + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || desc->block.bits > 64) + return false; + + bool is_normalized = desc->is_unorm || desc->is_snorm; + bool is_float16 = util_format_is_float16(format); + unsigned bits = util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X); + + switch (bits) { + case 4: + case 8: + case 10: + if (!is_normalized) + return false; + break; + case 16: + if (!is_normalized && !is_float16) + return false; + break; + default: + return false; + } + + uint64_t packed = 0; + for (unsigned i = 0; i < 4; i++) { + if (desc->swizzle[i] > PIPE_SWIZZLE_W) + continue; + + const struct util_format_channel_description *ch = &desc->channel[i]; + uint32_t packed_ch = 0; + + if (is_normalized && ch->type == UTIL_FORMAT_TYPE_UNSIGNED) { + packed_ch = tu_pack_float32_for_unorm(src[i], ch->size); + } else if (is_normalized && ch->type == UTIL_FORMAT_TYPE_SIGNED) { + packed_ch = (uint32_t) tu_pack_float32_for_snorm(src[i], ch->size) & BITFIELD_MASK(ch->size); + } else if (is_float16) { + packed_ch = _mesa_float_to_float16_rtz(src[i]); + } else { + UNREACHABLE("unsupported format"); + } + packed |= (uint64_t) packed_ch << ch->shift; + } + + clear_value[0] = (uint32_t) packed; + clear_value[1] = (uint32_t) (packed >> 32); + clear_value[2] = 0; + clear_value[3] = 0; + return true; +} + static uint32_t tu_pack_float32_for_unorm_depth(float val, unsigned bits) { @@ -255,15 +352,24 @@ r2d_clear_value(struct tu_cmd_buffer *cmd, linear = util_format_linear_to_srgb_float(val->color.float32[i]); if (ch->type == UTIL_FORMAT_TYPE_SIGNED) - clear_value[i] = _mesa_lroundevenf(CLAMP(linear, -1.0f, 1.0f) * 127.0f); + clear_value[i] = tu_pack_float32_for_snorm(linear, 8); else clear_value[i] = tu_pack_float32_for_unorm(linear, 8); } else if (ifmt == R2D_FLOAT16) { - clear_value[i] = _mesa_float_to_half(val->color.float32[i]); + clear_value[i] = _mesa_float_to_float16_rtz(val->color.float32[i]); } else { assert(ifmt == R2D_FLOAT32 || ifmt == R2D_INT32 || ifmt == R2D_INT16 || ifmt == R2D_INT8); - clear_value[i] = val->color.uint32[i]; + if (ifmt == R2D_FLOAT32 && ch->normalized) { + if (ch->type == UTIL_FORMAT_TYPE_UNSIGNED) + clear_value[i] = fui(tu_quantize_float32_for_unorm(val->color.float32[i], ch->size)); + else if (ch->type == UTIL_FORMAT_TYPE_SIGNED) + clear_value[i] = fui(tu_quantize_float32_for_snorm(val->color.float32[i], ch->size)); + else + clear_value[i] = val->color.uint32[i]; + } else { + clear_value[i] = val->color.uint32[i]; + } } } break; @@ -2103,6 +2209,9 @@ pack_blit_event_clear_value(const VkClearValue *val, enum pipe_format format, ui tmp[i] = util_format_linear_to_srgb_float(tmp[i]); } + if (tu_pack_float32_for_color(format, tmp, clear_value)) + return; + #define PACK_F(type) util_format_##type##_pack_rgba_float \ ( (uint8_t*) &clear_value[0], 0, tmp, 0, 1, 1) switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) { @@ -6352,4 +6461,3 @@ tu_blit_subsampled_apron(struct tu_cmd_buffer *cmd, } } TU_GENX(tu_blit_subsampled_apron); -