diff --git a/src/intel/vulkan/anv_perf.c b/src/intel/vulkan/anv_perf.c index a7bc162f3a6..2dfef2949da 100644 --- a/src/intel/vulkan/anv_perf.c +++ b/src/intel/vulkan/anv_perf.c @@ -101,6 +101,8 @@ anv_device_perf_close(struct anv_device *device) if (device->perf_fd == -1) return; + if (intel_bind_timeline_get_syncobj(&device->perf_timeline)) + intel_bind_timeline_finish(&device->perf_timeline, device->fd); close(device->perf_fd); device->perf_fd = -1; } @@ -133,11 +135,19 @@ anv_device_perf_open(struct anv_device *device, struct anv_queue *queue, uint64_ uint64_t period_exponent = 31; /* slowest sampling period */ int ret; + if (intel_perf_has_metric_sync(device->physical->perf)) { + if (!intel_bind_timeline_init(&device->perf_timeline, device->fd)) + return -1; + } + ret = intel_perf_stream_open(device->physical->perf, device->fd, anv_device_perf_get_queue_context_or_exec_queue_id(queue), - metric_id, period_exponent, true, true, NULL); + metric_id, period_exponent, true, true, + &device->perf_timeline); if (ret >= 0) device->perf_queue = queue; + else + intel_bind_timeline_finish(&device->perf_timeline, device->fd); return ret; } @@ -290,7 +300,7 @@ VkResult anv_QueueSetPerformanceConfigurationINTEL( device->perf_fd, context_or_exec_queue, config->config_id, - NULL); + &device->perf_timeline); if (ret < 0) return vk_device_set_lost(&device->vk, "i915-perf config failed: %m"); } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 3127fab22a0..683ec5bc3fe 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1964,6 +1964,7 @@ struct anv_device { int perf_fd; /* -1 if no opened */ struct anv_queue *perf_queue; + struct intel_bind_timeline perf_timeline; struct intel_aux_map_context *aux_map_ctx; diff --git a/src/intel/vulkan/xe/anv_batch_chain.c b/src/intel/vulkan/xe/anv_batch_chain.c index 71a2258818c..2ae34b45c8f 100644 --- a/src/intel/vulkan/xe/anv_batch_chain.c +++ b/src/intel/vulkan/xe/anv_batch_chain.c @@ -315,7 +315,7 @@ xe_queue_exec_locked(struct anv_queue *queue, perf_query_pass, &exec); if (perf_query_pool && cmd_buffer_count) { - struct drm_xe_sync xe_syncs[1] = {}; + struct drm_xe_sync xe_syncs[2] = {}; struct drm_xe_exec perf_query_exec = { .exec_queue_id = queue->exec_queue_id, .num_batch_buffer = 1, @@ -338,7 +338,7 @@ xe_queue_exec_locked(struct anv_queue *queue, device->perf_fd, queue->exec_queue_id, query_info->oa_metrics_set_id, - NULL); + &device->perf_timeline); if (ret < 0) { result = vk_device_set_lost(&device->vk, "intel_perf_stream_set_metrics_id failed: %s", @@ -346,11 +346,21 @@ xe_queue_exec_locked(struct anv_queue *queue, } } + /* wait on completion of all vm binds */ xe_syncs[0].type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ; xe_syncs[0].flags = 0;/* wait */ xe_syncs[0].handle = intel_bind_timeline_get_syncobj(&device->bind_timeline); xe_syncs[0].timeline_value = intel_bind_timeline_get_last_point(&device->bind_timeline); + /* wait for metric change if supported */ + if (intel_bind_timeline_get_syncobj(&device->perf_timeline)) { + perf_query_exec.num_syncs++; + xe_syncs[1].type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ; + xe_syncs[1].flags = 0;/* wait */ + xe_syncs[1].handle = intel_bind_timeline_get_syncobj(&device->perf_timeline); + xe_syncs[1].timeline_value = intel_bind_timeline_get_last_point(&device->perf_timeline); + } + if (!device->info->no_hw && result == VK_SUCCESS) { if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &perf_query_exec)) result = vk_device_set_lost(&device->vk, "perf_query_exec failed: %m");