From dbc4a2e30b99b7be52ce28d57d4c8897dcc9d54d Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Mon, 9 Sep 2024 11:39:43 -0400 Subject: [PATCH] tu: Initial support for VK_KHR_calibrated_timestamps on a750 Starting with a750, the ALWAYS_ON counter is initialized from a loadable counter in CX power domain, which is never turned off except during a GPU reset. This means that timestamps should always be monotonic except if the GPU resets, in which case subsequent submits should return DEVICE_LOST anyway. Thus it should be good enough to satisfy the Vulkan requirement that vkCmdWriteTimestamp is monotonic. kgsl tries to synchronize the CX counter to the CPU counter, and additionally adds a synchronization ioctl to improve the accuracy. I'm not sure whether the former is really useful for us, but the latter should eventually be implemented in drm/msm. However for now we can expose the extension without any kernel support. Part-of: --- docs/features.txt | 4 +- src/freedreno/common/freedreno_dev_info.h | 5 ++ src/freedreno/common/freedreno_devices.py | 1 + src/freedreno/vulkan/tu_device.cc | 86 ++++++++++++++++++++++- 4 files changed, 93 insertions(+), 3 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index c0140aca3f8..524867e4bde 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -509,7 +509,7 @@ Khronos extensions that are not part of any Vulkan version: VK_KHR_acceleration_structure DONE (anv/gfx12.5+, lvp, radv/gfx10.3+) VK_KHR_android_surface not started - VK_KHR_calibrated_timestamps DONE (anv, nvk, radv) + VK_KHR_calibrated_timestamps DONE (anv, nvk, radv, tu/a750+) VK_KHR_compute_shader_derivatives DONE (anv, nvk, radv) VK_KHR_cooperative_matrix DONE (anv, radv/gfx11+) VK_KHR_deferred_host_operations DONE (anv, hasvk, lvp, radv) @@ -568,7 +568,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_attachment_feedback_loop_layout DONE (anv, lvp, nvk, radv, tu, v3dv, vn) VK_EXT_border_color_swizzle DONE (anv, hasvk, lvp, nvk, radv/gfx10+, tu, v3dv, vn) VK_EXT_buffer_device_address DONE (anv, hasvk, nvk, panvk, radv) - VK_EXT_calibrated_timestamps DONE (anv, hasvk, nvk, lvp, radv, vn) + VK_EXT_calibrated_timestamps DONE (anv, hasvk, nvk, lvp, radv, vn, tu/a750+) VK_EXT_color_write_enable DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn) VK_EXT_conditional_rendering DONE (anv, hasvk, lvp, nvk, radv, tu, vn) VK_EXT_conservative_rasterization DONE (anv, nvk, radv, vn) diff --git a/src/freedreno/common/freedreno_dev_info.h b/src/freedreno/common/freedreno_dev_info.h index 97e0ef802af..942bd1e6d8e 100644 --- a/src/freedreno/common/freedreno_dev_info.h +++ b/src/freedreno/common/freedreno_dev_info.h @@ -276,6 +276,11 @@ struct fd_dev_info { * example dEQP-VK.image.load_store.with_format.2d.*. */ bool ubwc_coherency_quirk; + + /* Whether CP_ALWAYS_ON_COUNTER only resets on device loss rather than + * on every suspend/resume. + */ + bool has_persistent_counter; } a7xx; }; diff --git a/src/freedreno/common/freedreno_devices.py b/src/freedreno/common/freedreno_devices.py index 64cf3d6825b..2351f0e2d51 100644 --- a/src/freedreno/common/freedreno_devices.py +++ b/src/freedreno/common/freedreno_devices.py @@ -893,6 +893,7 @@ a7xx_750 = A7XXProps( ubwc_all_formats_compatible = True, has_compliant_dp4acc = True, ubwc_coherency_quirk = True, + has_persistent_counter = True, ) a730_magic_regs = dict( diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 2fe5a08ea9b..0f6326a13f4 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -147,6 +147,7 @@ get_device_extensions(const struct tu_physical_device *device, .KHR_16bit_storage = device->info->a6xx.storage_16bit, .KHR_bind_memory2 = true, .KHR_buffer_device_address = true, + .KHR_calibrated_timestamps = device->info->a7xx.has_persistent_counter, .KHR_copy_commands2 = true, .KHR_create_renderpass2 = true, .KHR_dedicated_allocation = true, @@ -230,6 +231,7 @@ get_device_extensions(const struct tu_physical_device *device, .EXT_attachment_feedback_loop_dynamic_state = true, .EXT_attachment_feedback_loop_layout = true, .EXT_border_color_swizzle = true, + .EXT_calibrated_timestamps = device->info->a7xx.has_persistent_counter, .EXT_color_write_enable = true, .EXT_conditional_rendering = true, .EXT_custom_border_color = true, @@ -867,6 +869,9 @@ tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice, p->maxBufferSize = 1ull << 32; } +/* CP_ALWAYS_ON_COUNTER is fixed 19.2 MHz */ +#define ALWAYS_ON_FREQUENCY 19200000 + static void tu_get_properties(struct tu_physical_device *pdevice, struct vk_properties *props) @@ -973,7 +978,7 @@ tu_get_properties(struct tu_physical_device *pdevice, props->storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT; props->maxSampleMaskWords = 1; props->timestampComputeAndGraphics = true; - props->timestampPeriod = 1000000000.0 / 19200000.0; /* CP_ALWAYS_ON_COUNTER is fixed 19.2MHz */ + props->timestampPeriod = 1000000000.0 / (float) ALWAYS_ON_FREQUENCY; props->maxClipDistances = 8; props->maxCullDistances = 8; props->maxCombinedClipAndCullDistances = 8; @@ -3388,3 +3393,82 @@ tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer) vk_common_CmdEndDebugUtilsLabelEXT(_commandBuffer); } + +static inline clockid_t +tu_get_default_cpu_clock_id(void) +{ +#ifdef CLOCK_MONOTONIC_RAW + return CLOCK_MONOTONIC_RAW; +#else + return CLOCK_MONOTONIC; +#endif +} + +VkResult tu_GetCalibratedTimestampsKHR( + VkDevice _device, + uint32_t timestampCount, + const VkCalibratedTimestampInfoKHR *pTimestampInfos, + uint64_t *pTimestamps, + uint64_t *pMaxDeviation) +{ + VK_FROM_HANDLE(tu_device, device, _device); + const uint64_t device_period = DIV_ROUND_UP(1000000000, ALWAYS_ON_FREQUENCY); + uint32_t d; + uint64_t begin, end; + uint64_t max_clock_period = 0; + + begin = vk_clock_gettime(tu_get_default_cpu_clock_id()); + + for (d = 0; d < timestampCount; d++) { + switch (pTimestampInfos[d].timeDomain) { + case VK_TIME_DOMAIN_DEVICE_KHR: + tu_device_get_gpu_timestamp(device, &pTimestamps[d]); + max_clock_period = MAX2(max_clock_period, device_period); + break; + case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR: + pTimestamps[d] = vk_clock_gettime(CLOCK_MONOTONIC); + max_clock_period = MAX2(max_clock_period, 1); + break; + +#ifdef CLOCK_MONOTONIC_RAW + case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR: + pTimestamps[d] = begin; + break; +#endif + default: + pTimestamps[d] = 0; + break; + } + } + + end = vk_clock_gettime(tu_get_default_cpu_clock_id()); + + *pMaxDeviation = vk_time_max_deviation(begin, end, max_clock_period); + + return VK_SUCCESS; +} + +static const VkTimeDomainKHR tu_time_domains[] = { + VK_TIME_DOMAIN_DEVICE_KHR, + VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR, +#ifdef CLOCK_MONOTONIC_RAW + VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR, +#endif +}; + +VkResult tu_GetPhysicalDeviceCalibrateableTimeDomainsKHR( + VkPhysicalDevice physicalDevice, + uint32_t *pTimeDomainCount, + VkTimeDomainKHR *pTimeDomains) +{ + int d; + VK_OUTARRAY_MAKE_TYPED(VkTimeDomainKHR, out, pTimeDomains, pTimeDomainCount); + + for (d = 0; d < ARRAY_SIZE(tu_time_domains); d++) { + vk_outarray_append_typed(VkTimeDomainKHR, &out, i) { + *i = tu_time_domains[d]; + } + } + + return vk_outarray_status(&out); +}