mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 04:50:11 +01:00
vulkan: Add VK_EXT_calibrated_timestamps extension (radv and anv) [v5]
Offers three clocks, device, clock monotonic and clock monotonic raw. Could use some kernel support to reduce the deviation between clock values. v2: Ensure deviation is at least as big as the GPU time interval. v3: Set device->lost when returning DEVICE_LOST. Use MAX2 and DIV_ROUND_UP instead of open coding these. Delete spurious TIMESTAMP in radv version. Suggested-by: Jason Ekstrand <jason@jlekstrand.net> Suggested-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> v4: Add anv_gem_reg_read to anv_gem_stubs.c Suggested-by: Jason Ekstrand <jason@jlekstrand.net> v5: Adjust maxDeviation computation to max(sampled_clock_period) + sample_interval. Suggested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Suggested-by: Jason Ekstrand <jason@jlekstrand.net> Signed-off-by: Keith Packard <keithp@keithp.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
parent
a11cafbd7a
commit
67a2c1493c
7 changed files with 270 additions and 0 deletions
|
|
@ -4957,3 +4957,122 @@ radv_GetDeviceGroupPeerMemoryFeatures(
|
||||||
VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
|
VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
|
||||||
VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
|
VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const VkTimeDomainEXT radv_time_domains[] = {
|
||||||
|
VK_TIME_DOMAIN_DEVICE_EXT,
|
||||||
|
VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
|
||||||
|
VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
|
||||||
|
VkPhysicalDevice physicalDevice,
|
||||||
|
uint32_t *pTimeDomainCount,
|
||||||
|
VkTimeDomainEXT *pTimeDomains)
|
||||||
|
{
|
||||||
|
int d;
|
||||||
|
VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
|
||||||
|
|
||||||
|
for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
|
||||||
|
vk_outarray_append(&out, i) {
|
||||||
|
*i = radv_time_domains[d];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return vk_outarray_status(&out);
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t
|
||||||
|
radv_clock_gettime(clockid_t clock_id)
|
||||||
|
{
|
||||||
|
struct timespec current;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = clock_gettime(clock_id, ¤t);
|
||||||
|
if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
|
||||||
|
ret = clock_gettime(CLOCK_MONOTONIC, ¤t);
|
||||||
|
if (ret < 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkResult radv_GetCalibratedTimestampsEXT(
|
||||||
|
VkDevice _device,
|
||||||
|
uint32_t timestampCount,
|
||||||
|
const VkCalibratedTimestampInfoEXT *pTimestampInfos,
|
||||||
|
uint64_t *pTimestamps,
|
||||||
|
uint64_t *pMaxDeviation)
|
||||||
|
{
|
||||||
|
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||||
|
uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
|
||||||
|
int d;
|
||||||
|
uint64_t begin, end;
|
||||||
|
uint64_t max_clock_period = 0;
|
||||||
|
|
||||||
|
begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
|
||||||
|
|
||||||
|
for (d = 0; d < timestampCount; d++) {
|
||||||
|
switch (pTimestampInfos[d].timeDomain) {
|
||||||
|
case VK_TIME_DOMAIN_DEVICE_EXT:
|
||||||
|
pTimestamps[d] = device->ws->query_value(device->ws,
|
||||||
|
RADEON_TIMESTAMP);
|
||||||
|
uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
|
||||||
|
max_clock_period = MAX2(max_clock_period, device_period);
|
||||||
|
break;
|
||||||
|
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
|
||||||
|
pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
|
||||||
|
max_clock_period = MAX2(max_clock_period, 1);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
|
||||||
|
pTimestamps[d] = begin;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
pTimestamps[d] = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The maximum deviation is the sum of the interval over which we
|
||||||
|
* perform the sampling and the maximum period of any sampled
|
||||||
|
* clock. That's because the maximum skew between any two sampled
|
||||||
|
* clock edges is when the sampled clock with the largest period is
|
||||||
|
* sampled at the end of that period but right at the beginning of the
|
||||||
|
* sampling interval and some other clock is sampled right at the
|
||||||
|
* begining of its sampling period and right at the end of the
|
||||||
|
* sampling interval. Let's assume the GPU has the longest clock
|
||||||
|
* period and that the application is sampling GPU and monotonic:
|
||||||
|
*
|
||||||
|
* s e
|
||||||
|
* w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
|
||||||
|
* Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
|
||||||
|
*
|
||||||
|
* g
|
||||||
|
* 0 1 2 3
|
||||||
|
* GPU -----_____-----_____-----_____-----_____
|
||||||
|
*
|
||||||
|
* m
|
||||||
|
* x y z 0 1 2 3 4 5 6 7 8 9 a b c
|
||||||
|
* Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
|
||||||
|
*
|
||||||
|
* Interval <----------------->
|
||||||
|
* Deviation <-------------------------->
|
||||||
|
*
|
||||||
|
* s = read(raw) 2
|
||||||
|
* g = read(GPU) 1
|
||||||
|
* m = read(monotonic) 2
|
||||||
|
* e = read(raw) b
|
||||||
|
*
|
||||||
|
* We round the sample interval up by one tick to cover sampling error
|
||||||
|
* in the interval clock
|
||||||
|
*/
|
||||||
|
|
||||||
|
uint64_t sample_interval = end - begin + 1;
|
||||||
|
|
||||||
|
*pMaxDeviation = sample_interval + max_clock_period;
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -92,6 +92,7 @@ EXTENSIONS = [
|
||||||
Extension('VK_KHR_display', 23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
|
Extension('VK_KHR_display', 23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
|
||||||
Extension('VK_EXT_direct_mode_display', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
|
Extension('VK_EXT_direct_mode_display', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
|
||||||
Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
|
Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
|
||||||
|
Extension('VK_EXT_calibrated_timestamps', 1, True),
|
||||||
Extension('VK_EXT_conditional_rendering', 1, True),
|
Extension('VK_EXT_conditional_rendering', 1, True),
|
||||||
Extension('VK_EXT_conservative_rasterization', 1, 'device->rad_info.chip_class >= GFX9'),
|
Extension('VK_EXT_conservative_rasterization', 1, 'device->rad_info.chip_class >= GFX9'),
|
||||||
Extension('VK_EXT_display_surface_counter', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
|
Extension('VK_EXT_display_surface_counter', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
|
||||||
|
|
|
||||||
|
|
@ -3035,6 +3035,133 @@ void anv_DestroyFramebuffer(
|
||||||
vk_free2(&device->alloc, pAllocator, fb);
|
vk_free2(&device->alloc, pAllocator, fb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const VkTimeDomainEXT anv_time_domains[] = {
|
||||||
|
VK_TIME_DOMAIN_DEVICE_EXT,
|
||||||
|
VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
|
||||||
|
VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
|
||||||
|
};
|
||||||
|
|
||||||
|
VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
|
||||||
|
VkPhysicalDevice physicalDevice,
|
||||||
|
uint32_t *pTimeDomainCount,
|
||||||
|
VkTimeDomainEXT *pTimeDomains)
|
||||||
|
{
|
||||||
|
int d;
|
||||||
|
VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
|
||||||
|
|
||||||
|
for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
|
||||||
|
vk_outarray_append(&out, i) {
|
||||||
|
*i = anv_time_domains[d];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return vk_outarray_status(&out);
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t
|
||||||
|
anv_clock_gettime(clockid_t clock_id)
|
||||||
|
{
|
||||||
|
struct timespec current;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = clock_gettime(clock_id, ¤t);
|
||||||
|
if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
|
||||||
|
ret = clock_gettime(CLOCK_MONOTONIC, ¤t);
|
||||||
|
if (ret < 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define TIMESTAMP 0x2358
|
||||||
|
|
||||||
|
VkResult anv_GetCalibratedTimestampsEXT(
|
||||||
|
VkDevice _device,
|
||||||
|
uint32_t timestampCount,
|
||||||
|
const VkCalibratedTimestampInfoEXT *pTimestampInfos,
|
||||||
|
uint64_t *pTimestamps,
|
||||||
|
uint64_t *pMaxDeviation)
|
||||||
|
{
|
||||||
|
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||||
|
uint64_t timestamp_frequency = device->info.timestamp_frequency;
|
||||||
|
int ret;
|
||||||
|
int d;
|
||||||
|
uint64_t begin, end;
|
||||||
|
uint64_t max_clock_period = 0;
|
||||||
|
|
||||||
|
begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
|
||||||
|
|
||||||
|
for (d = 0; d < timestampCount; d++) {
|
||||||
|
switch (pTimestampInfos[d].timeDomain) {
|
||||||
|
case VK_TIME_DOMAIN_DEVICE_EXT:
|
||||||
|
ret = anv_gem_reg_read(device, TIMESTAMP | 1,
|
||||||
|
&pTimestamps[d]);
|
||||||
|
|
||||||
|
if (ret != 0) {
|
||||||
|
device->lost = TRUE;
|
||||||
|
return VK_ERROR_DEVICE_LOST;
|
||||||
|
}
|
||||||
|
uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
|
||||||
|
max_clock_period = MAX2(max_clock_period, device_period);
|
||||||
|
break;
|
||||||
|
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
|
||||||
|
pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC);
|
||||||
|
max_clock_period = MAX2(max_clock_period, 1);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
|
||||||
|
pTimestamps[d] = begin;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
pTimestamps[d] = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
end = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The maximum deviation is the sum of the interval over which we
|
||||||
|
* perform the sampling and the maximum period of any sampled
|
||||||
|
* clock. That's because the maximum skew between any two sampled
|
||||||
|
* clock edges is when the sampled clock with the largest period is
|
||||||
|
* sampled at the end of that period but right at the beginning of the
|
||||||
|
* sampling interval and some other clock is sampled right at the
|
||||||
|
* begining of its sampling period and right at the end of the
|
||||||
|
* sampling interval. Let's assume the GPU has the longest clock
|
||||||
|
* period and that the application is sampling GPU and monotonic:
|
||||||
|
*
|
||||||
|
* s e
|
||||||
|
* w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
|
||||||
|
* Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
|
||||||
|
*
|
||||||
|
* g
|
||||||
|
* 0 1 2 3
|
||||||
|
* GPU -----_____-----_____-----_____-----_____
|
||||||
|
*
|
||||||
|
* m
|
||||||
|
* x y z 0 1 2 3 4 5 6 7 8 9 a b c
|
||||||
|
* Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
|
||||||
|
*
|
||||||
|
* Interval <----------------->
|
||||||
|
* Deviation <-------------------------->
|
||||||
|
*
|
||||||
|
* s = read(raw) 2
|
||||||
|
* g = read(GPU) 1
|
||||||
|
* m = read(monotonic) 2
|
||||||
|
* e = read(raw) b
|
||||||
|
*
|
||||||
|
* We round the sample interval up by one tick to cover sampling error
|
||||||
|
* in the interval clock
|
||||||
|
*/
|
||||||
|
|
||||||
|
uint64_t sample_interval = end - begin + 1;
|
||||||
|
|
||||||
|
*pMaxDeviation = sample_interval + max_clock_period;
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
/* vk_icd.h does not declare this function, so we declare it here to
|
/* vk_icd.h does not declare this function, so we declare it here to
|
||||||
* suppress Wmissing-prototypes.
|
* suppress Wmissing-prototypes.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -127,6 +127,7 @@ EXTENSIONS = [
|
||||||
Extension('VK_EXT_vertex_attribute_divisor', 3, True),
|
Extension('VK_EXT_vertex_attribute_divisor', 3, True),
|
||||||
Extension('VK_EXT_post_depth_coverage', 1, 'device->info.gen >= 9'),
|
Extension('VK_EXT_post_depth_coverage', 1, 'device->info.gen >= 9'),
|
||||||
Extension('VK_EXT_sampler_filter_minmax', 1, 'device->info.gen >= 9'),
|
Extension('VK_EXT_sampler_filter_minmax', 1, 'device->info.gen >= 9'),
|
||||||
|
Extension('VK_EXT_calibrated_timestamps', 1, True),
|
||||||
]
|
]
|
||||||
|
|
||||||
class VkVersion:
|
class VkVersion:
|
||||||
|
|
|
||||||
|
|
@ -423,6 +423,19 @@ anv_gem_fd_to_handle(struct anv_device *device, int fd)
|
||||||
return args.handle;
|
return args.handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
anv_gem_reg_read(struct anv_device *device, uint32_t offset, uint64_t *result)
|
||||||
|
{
|
||||||
|
struct drm_i915_reg_read args = {
|
||||||
|
.offset = offset
|
||||||
|
};
|
||||||
|
|
||||||
|
int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_REG_READ, &args);
|
||||||
|
|
||||||
|
*result = args.val;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef SYNC_IOC_MAGIC
|
#ifndef SYNC_IOC_MAGIC
|
||||||
/* duplicated from linux/sync_file.h to avoid build-time dependency
|
/* duplicated from linux/sync_file.h to avoid build-time dependency
|
||||||
* on new (v4.7) kernel headers. Once distro's are mostly using
|
* on new (v4.7) kernel headers. Once distro's are mostly using
|
||||||
|
|
|
||||||
|
|
@ -251,3 +251,10 @@ anv_gem_syncobj_wait(struct anv_device *device,
|
||||||
{
|
{
|
||||||
unreachable("Unused");
|
unreachable("Unused");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
anv_gem_reg_read(struct anv_device *device,
|
||||||
|
uint32_t offset, uint64_t *result)
|
||||||
|
{
|
||||||
|
unreachable("Unused");
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -1109,6 +1109,8 @@ int anv_gem_get_aperture(int fd, uint64_t *size);
|
||||||
int anv_gem_gpu_get_reset_stats(struct anv_device *device,
|
int anv_gem_gpu_get_reset_stats(struct anv_device *device,
|
||||||
uint32_t *active, uint32_t *pending);
|
uint32_t *active, uint32_t *pending);
|
||||||
int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
|
int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
|
||||||
|
int anv_gem_reg_read(struct anv_device *device,
|
||||||
|
uint32_t offset, uint64_t *result);
|
||||||
uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
|
uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
|
||||||
int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);
|
int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);
|
||||||
int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
|
int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue