tu: Initial support for VK_KHR_calibrated_timestamps on a750

Starting with a750, the ALWAYS_ON counter is initialized from a loadable
counter in CX power domain, which is never turned off except during a
GPU reset. This means that timestamps should always be monotonic except
if the GPU resets, in which case subsequent submits should return
DEVICE_LOST anyway. Thus it should be good enough to satisfy the Vulkan
requirement that vkCmdWriteTimestamp is monotonic.

kgsl tries to synchronize the CX counter to the CPU counter, and
additionally adds a synchronization ioctl to improve the accuracy. I'm
not sure whether the former is really useful for us, but the latter
should eventually be implemented in drm/msm. However for now we can
expose the extension without any kernel support.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31100>
This commit is contained in:
Connor Abbott 2024-09-09 11:39:43 -04:00
parent c968c5a740
commit dbc4a2e30b
4 changed files with 93 additions and 3 deletions

View file

@ -509,7 +509,7 @@ Khronos extensions that are not part of any Vulkan version:
VK_KHR_acceleration_structure DONE (anv/gfx12.5+, lvp, radv/gfx10.3+) VK_KHR_acceleration_structure DONE (anv/gfx12.5+, lvp, radv/gfx10.3+)
VK_KHR_android_surface not started VK_KHR_android_surface not started
VK_KHR_calibrated_timestamps DONE (anv, nvk, radv) VK_KHR_calibrated_timestamps DONE (anv, nvk, radv, tu/a750+)
VK_KHR_compute_shader_derivatives DONE (anv, nvk, radv) VK_KHR_compute_shader_derivatives DONE (anv, nvk, radv)
VK_KHR_cooperative_matrix DONE (anv, radv/gfx11+) VK_KHR_cooperative_matrix DONE (anv, radv/gfx11+)
VK_KHR_deferred_host_operations DONE (anv, hasvk, lvp, radv) VK_KHR_deferred_host_operations DONE (anv, hasvk, lvp, radv)
@ -568,7 +568,7 @@ Khronos extensions that are not part of any Vulkan version:
VK_EXT_attachment_feedback_loop_layout DONE (anv, lvp, nvk, radv, tu, v3dv, vn) VK_EXT_attachment_feedback_loop_layout DONE (anv, lvp, nvk, radv, tu, v3dv, vn)
VK_EXT_border_color_swizzle DONE (anv, hasvk, lvp, nvk, radv/gfx10+, tu, v3dv, vn) VK_EXT_border_color_swizzle DONE (anv, hasvk, lvp, nvk, radv/gfx10+, tu, v3dv, vn)
VK_EXT_buffer_device_address DONE (anv, hasvk, nvk, panvk, radv) VK_EXT_buffer_device_address DONE (anv, hasvk, nvk, panvk, radv)
VK_EXT_calibrated_timestamps DONE (anv, hasvk, nvk, lvp, radv, vn) VK_EXT_calibrated_timestamps DONE (anv, hasvk, nvk, lvp, radv, vn, tu/a750+)
VK_EXT_color_write_enable DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn) VK_EXT_color_write_enable DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn)
VK_EXT_conditional_rendering DONE (anv, hasvk, lvp, nvk, radv, tu, vn) VK_EXT_conditional_rendering DONE (anv, hasvk, lvp, nvk, radv, tu, vn)
VK_EXT_conservative_rasterization DONE (anv, nvk, radv, vn) VK_EXT_conservative_rasterization DONE (anv, nvk, radv, vn)

View file

@ -276,6 +276,11 @@ struct fd_dev_info {
* example dEQP-VK.image.load_store.with_format.2d.*. * example dEQP-VK.image.load_store.with_format.2d.*.
*/ */
bool ubwc_coherency_quirk; bool ubwc_coherency_quirk;
/* Whether CP_ALWAYS_ON_COUNTER only resets on device loss rather than
* on every suspend/resume.
*/
bool has_persistent_counter;
} a7xx; } a7xx;
}; };

View file

@ -893,6 +893,7 @@ a7xx_750 = A7XXProps(
ubwc_all_formats_compatible = True, ubwc_all_formats_compatible = True,
has_compliant_dp4acc = True, has_compliant_dp4acc = True,
ubwc_coherency_quirk = True, ubwc_coherency_quirk = True,
has_persistent_counter = True,
) )
a730_magic_regs = dict( a730_magic_regs = dict(

View file

@ -147,6 +147,7 @@ get_device_extensions(const struct tu_physical_device *device,
.KHR_16bit_storage = device->info->a6xx.storage_16bit, .KHR_16bit_storage = device->info->a6xx.storage_16bit,
.KHR_bind_memory2 = true, .KHR_bind_memory2 = true,
.KHR_buffer_device_address = true, .KHR_buffer_device_address = true,
.KHR_calibrated_timestamps = device->info->a7xx.has_persistent_counter,
.KHR_copy_commands2 = true, .KHR_copy_commands2 = true,
.KHR_create_renderpass2 = true, .KHR_create_renderpass2 = true,
.KHR_dedicated_allocation = true, .KHR_dedicated_allocation = true,
@ -230,6 +231,7 @@ get_device_extensions(const struct tu_physical_device *device,
.EXT_attachment_feedback_loop_dynamic_state = true, .EXT_attachment_feedback_loop_dynamic_state = true,
.EXT_attachment_feedback_loop_layout = true, .EXT_attachment_feedback_loop_layout = true,
.EXT_border_color_swizzle = true, .EXT_border_color_swizzle = true,
.EXT_calibrated_timestamps = device->info->a7xx.has_persistent_counter,
.EXT_color_write_enable = true, .EXT_color_write_enable = true,
.EXT_conditional_rendering = true, .EXT_conditional_rendering = true,
.EXT_custom_border_color = true, .EXT_custom_border_color = true,
@ -867,6 +869,9 @@ tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice,
p->maxBufferSize = 1ull << 32; p->maxBufferSize = 1ull << 32;
} }
/* CP_ALWAYS_ON_COUNTER is fixed 19.2 MHz */
#define ALWAYS_ON_FREQUENCY 19200000
static void static void
tu_get_properties(struct tu_physical_device *pdevice, tu_get_properties(struct tu_physical_device *pdevice,
struct vk_properties *props) struct vk_properties *props)
@ -973,7 +978,7 @@ tu_get_properties(struct tu_physical_device *pdevice,
props->storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT; props->storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT;
props->maxSampleMaskWords = 1; props->maxSampleMaskWords = 1;
props->timestampComputeAndGraphics = true; props->timestampComputeAndGraphics = true;
props->timestampPeriod = 1000000000.0 / 19200000.0; /* CP_ALWAYS_ON_COUNTER is fixed 19.2MHz */ props->timestampPeriod = 1000000000.0 / (float) ALWAYS_ON_FREQUENCY;
props->maxClipDistances = 8; props->maxClipDistances = 8;
props->maxCullDistances = 8; props->maxCullDistances = 8;
props->maxCombinedClipAndCullDistances = 8; props->maxCombinedClipAndCullDistances = 8;
@ -3388,3 +3393,82 @@ tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)
vk_common_CmdEndDebugUtilsLabelEXT(_commandBuffer); vk_common_CmdEndDebugUtilsLabelEXT(_commandBuffer);
} }
static inline clockid_t
tu_get_default_cpu_clock_id(void)
{
#ifdef CLOCK_MONOTONIC_RAW
return CLOCK_MONOTONIC_RAW;
#else
return CLOCK_MONOTONIC;
#endif
}
VkResult tu_GetCalibratedTimestampsKHR(
VkDevice _device,
uint32_t timestampCount,
const VkCalibratedTimestampInfoKHR *pTimestampInfos,
uint64_t *pTimestamps,
uint64_t *pMaxDeviation)
{
VK_FROM_HANDLE(tu_device, device, _device);
const uint64_t device_period = DIV_ROUND_UP(1000000000, ALWAYS_ON_FREQUENCY);
uint32_t d;
uint64_t begin, end;
uint64_t max_clock_period = 0;
begin = vk_clock_gettime(tu_get_default_cpu_clock_id());
for (d = 0; d < timestampCount; d++) {
switch (pTimestampInfos[d].timeDomain) {
case VK_TIME_DOMAIN_DEVICE_KHR:
tu_device_get_gpu_timestamp(device, &pTimestamps[d]);
max_clock_period = MAX2(max_clock_period, device_period);
break;
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR:
pTimestamps[d] = vk_clock_gettime(CLOCK_MONOTONIC);
max_clock_period = MAX2(max_clock_period, 1);
break;
#ifdef CLOCK_MONOTONIC_RAW
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR:
pTimestamps[d] = begin;
break;
#endif
default:
pTimestamps[d] = 0;
break;
}
}
end = vk_clock_gettime(tu_get_default_cpu_clock_id());
*pMaxDeviation = vk_time_max_deviation(begin, end, max_clock_period);
return VK_SUCCESS;
}
static const VkTimeDomainKHR tu_time_domains[] = {
VK_TIME_DOMAIN_DEVICE_KHR,
VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR,
#ifdef CLOCK_MONOTONIC_RAW
VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR,
#endif
};
VkResult tu_GetPhysicalDeviceCalibrateableTimeDomainsKHR(
VkPhysicalDevice physicalDevice,
uint32_t *pTimeDomainCount,
VkTimeDomainKHR *pTimeDomains)
{
int d;
VK_OUTARRAY_MAKE_TYPED(VkTimeDomainKHR, out, pTimeDomains, pTimeDomainCount);
for (d = 0; d < ARRAY_SIZE(tu_time_domains); d++) {
vk_outarray_append_typed(VkTimeDomainKHR, &out, i) {
*i = tu_time_domains[d];
}
}
return vk_outarray_status(&out);
}