diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h index 790719ccbad..30a57dc483f 100644 --- a/src/intel/perf/gen_perf.h +++ b/src/intel/perf/gen_perf.h @@ -389,6 +389,26 @@ gen_perf_new(void *ctx) return perf; } +/** Whether we have the ability to hold off preemption on a batch so we don't + * have to look at the OA buffer to subtract unrelated workloads off the + * values captured through MI_* commands. + */ +static inline bool +gen_perf_has_hold_preemption(const struct gen_perf_config *perf) +{ + return perf->i915_perf_version >= 3; +} + +/** Whether we have the ability to lock EU array power configuration for the + * duration of the performance recording. This is useful on Gen11 where the HW + * architecture requires half the EU for particular workloads. + */ +static inline bool +gen_perf_has_global_sseu(const struct gen_perf_config *perf) +{ + return perf->i915_perf_version >= 4; +} + uint32_t gen_perf_get_n_passes(struct gen_perf_config *perf, const uint32_t *counter_indices, uint32_t counter_indices_count, diff --git a/src/intel/perf/gen_perf_query.c b/src/intel/perf/gen_perf_query.c index ab78e5fc693..288f261f55a 100644 --- a/src/intel/perf/gen_perf_query.c +++ b/src/intel/perf/gen_perf_query.c @@ -348,8 +348,6 @@ gen_perf_close(struct gen_perf_context *perfquery, } } -#define NUM_PERF_PROPERTIES(array) (ARRAY_SIZE(array) / 2) - static bool gen_perf_open(struct gen_perf_context *perf_ctx, int metrics_set_id, @@ -358,28 +356,40 @@ gen_perf_open(struct gen_perf_context *perf_ctx, int drm_fd, uint32_t ctx_id) { - uint64_t properties[] = { - /* Single context sampling */ - DRM_I915_PERF_PROP_CTX_HANDLE, ctx_id, + uint64_t properties[DRM_I915_PERF_PROP_MAX * 2]; + uint32_t p = 0; - /* Include OA reports in samples */ - DRM_I915_PERF_PROP_SAMPLE_OA, true, + /* Single context sampling */ + properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE; + properties[p++] = ctx_id; - /* OA unit configuration */ - DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id, - DRM_I915_PERF_PROP_OA_FORMAT, report_format, - DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent, + /* Include OA reports in samples */ + properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA; + properties[p++] = true; + + /* OA unit configuration */ + properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET; + properties[p++] = metrics_set_id; + + properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT; + properties[p++] = report_format; + + properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT; + properties[p++] = period_exponent; + + /* SSEU configuration */ + if (gen_perf_has_global_sseu(perf_ctx->perf)) { + properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU; + properties[p++] = to_user_pointer(&perf_ctx->perf->sseu); + } + + assert(p <= ARRAY_SIZE(properties)); - /* SSEU configuration */ - DRM_I915_PERF_PROP_GLOBAL_SSEU, to_user_pointer(&perf_ctx->perf->sseu), - }; struct drm_i915_perf_open_param param = { .flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | I915_PERF_FLAG_DISABLED, - .num_properties = perf_ctx->perf->i915_perf_version >= 4 ? - NUM_PERF_PROPERTIES(properties) : - NUM_PERF_PROPERTIES(properties) - 1, + .num_properties = p / 2, .properties_ptr = (uintptr_t) properties, }; int fd = gen_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m); diff --git a/src/intel/vulkan/anv_perf.c b/src/intel/vulkan/anv_perf.c index 91fcd6b1e53..0a323994cb6 100644 --- a/src/intel/vulkan/anv_perf.c +++ b/src/intel/vulkan/anv_perf.c @@ -57,7 +57,7 @@ anv_get_perf(const struct gen_device_info *devinfo, int fd) * perf revision 2. */ if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) { - if (perf->i915_perf_version < 3) + if (!gen_perf_has_hold_preemption(perf)) goto err; } @@ -106,7 +106,7 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id) * enabled we would use only half on Gen11 because of functional * requirements. */ - if (device->physical->perf->i915_perf_version >= 4) { + if (gen_perf_has_global_sseu(device->physical->perf)) { properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU; properties[p++] = (uintptr_t) &device->physical->perf->sseu; }