diff --git a/src/intel/perf/gen_perf.py b/src/intel/perf/gen_perf.py index e8db03dd429..5b9de7ee274 100644 --- a/src/intel/perf/gen_perf.py +++ b/src/intel/perf/gen_perf.py @@ -233,8 +233,8 @@ hw_vars["$EuSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices" hw_vars["$XeCoreTotalCount"] = "perf->sys_vars.n_eu_sub_slices" hw_vars["$EuDualSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices" hw_vars["$EuDualSubslicesSlice0123Count"] = "perf->sys_vars.n_eu_slice0123" -hw_vars["$EuThreadsCount"] = "perf->devinfo.num_thread_per_eu" -hw_vars["$VectorEngineThreadsCount"] = "perf->devinfo.num_thread_per_eu" +hw_vars["$EuThreadsCount"] = "perf->devinfo->num_thread_per_eu" +hw_vars["$VectorEngineThreadsCount"] = "perf->devinfo->num_thread_per_eu" hw_vars["$SliceMask"] = "perf->sys_vars.slice_mask" hw_vars["$SliceTotalCount"] = "perf->sys_vars.n_eu_slices" # subslice_mask is interchangeable with subslice/dual-subslice since Gfx12+ @@ -242,10 +242,10 @@ hw_vars["$SliceTotalCount"] = "perf->sys_vars.n_eu_slices" hw_vars["$SubsliceMask"] = "perf->sys_vars.subslice_mask" hw_vars["$DualSubsliceMask"] = "perf->sys_vars.subslice_mask" hw_vars["$XeCoreMask"] = "perf->sys_vars.subslice_mask" -hw_vars["$GpuTimestampFrequency"] = "perf->devinfo.timestamp_frequency" +hw_vars["$GpuTimestampFrequency"] = "perf->devinfo->timestamp_frequency" hw_vars["$GpuMinFrequency"] = "perf->sys_vars.gt_min_freq" hw_vars["$GpuMaxFrequency"] = "perf->sys_vars.gt_max_freq" -hw_vars["$SkuRevisionId"] = "perf->devinfo.revision" +hw_vars["$SkuRevisionId"] = "perf->devinfo->revision" hw_vars["$QueryMode"] = "perf->sys_vars.query_mode" def resolve_variable(name, set, allow_counters): @@ -253,10 +253,10 @@ def resolve_variable(name, set, allow_counters): return hw_vars[name] m = re.search(r'\$GtSlice([0-9]+)$', name) if m: - return 'intel_device_info_slice_available(&perf->devinfo, {0})'.format(m.group(1)) + return 'intel_device_info_slice_available(perf->devinfo, {0})'.format(m.group(1)) m = re.search(r'\$GtSlice([0-9]+)XeCore([0-9]+)$', name) if m: - return 'intel_device_info_subslice_available(&perf->devinfo, {0}, {1})'.format(m.group(1), m.group(2)) + return 'intel_device_info_subslice_available(perf->devinfo, {0}, {1})'.format(m.group(1), m.group(2)) if allow_counters and name in set.counter_vars: return set.read_funcs[name[1:]] + "(perf, query, results)" return None diff --git a/src/intel/perf/intel_perf.c b/src/intel/perf/intel_perf.c index e04be2a7d61..385862a4a97 100644 --- a/src/intel/perf/intel_perf.c +++ b/src/intel/perf/intel_perf.c @@ -362,7 +362,7 @@ init_oa_configs(struct intel_perf_config *perf, int fd, static void compute_topology_builtins(struct intel_perf_config *perf) { - const struct intel_device_info *devinfo = &perf->devinfo; + const struct intel_device_info *devinfo = perf->devinfo; perf->sys_vars.slice_mask = devinfo->slice_masks; perf->sys_vars.n_eu_slices = devinfo->num_slices; @@ -725,7 +725,7 @@ oa_metrics_available(struct intel_perf_config *perf, int fd, if (devinfo->kmd_type != INTEL_KMD_TYPE_I915) return false; - perf->devinfo = *devinfo; + perf->devinfo = devinfo; /* Consider an invalid as supported. */ if (fd == -1) { @@ -1180,7 +1180,7 @@ intel_perf_query_result_accumulate(struct intel_perf_query_result *result, result->accumulator + query->a_offset + 32 + i); } - if (can_use_mi_rpc_bc_counters(&query->perf->devinfo) || + if (can_use_mi_rpc_bc_counters(query->perf->devinfo) || !query->perf->sys_vars.query_mode) { /* A36-37 counters are 32bits */ accumulate_uint32(start + 40, end + 40, @@ -1222,7 +1222,7 @@ intel_perf_query_result_accumulate(struct intel_perf_query_result *result, result->accumulator + query->a_offset + 32 + i); } - if (can_use_mi_rpc_bc_counters(&query->perf->devinfo) || + if (can_use_mi_rpc_bc_counters(query->perf->devinfo) || !query->perf->sys_vars.query_mode) { /* 8x 32bit B counters */ for (i = 0; i < 8; i++) { @@ -1328,7 +1328,7 @@ intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result bool no_oa_accumulate) { const struct intel_perf_query_field_layout *layout = &query->perf->query_layout; - const struct intel_device_info *devinfo = &query->perf->devinfo; + const struct intel_device_info *devinfo = query->perf->devinfo; for (uint32_t r = 0; r < layout->n_fields; r++) { const struct intel_perf_query_field *field = &layout->fields[r]; diff --git a/src/intel/perf/intel_perf.h b/src/intel/perf/intel_perf.h index 011fbf98693..6462a8faf32 100644 --- a/src/intel/perf/intel_perf.h +++ b/src/intel/perf/intel_perf.h @@ -373,7 +373,7 @@ struct intel_perf_config { bool query_mode; /** $QueryMode */ } sys_vars; - struct intel_device_info devinfo; + const struct intel_device_info *devinfo; /* OA metric sets, indexed by GUID, as know by Mesa at build time, to * cross-reference with the GUIDs of configs advertised by the kernel at diff --git a/src/intel/perf/intel_perf_setup.h b/src/intel/perf/intel_perf_setup.h index 25e697c2d7e..6ce39a03b16 100644 --- a/src/intel/perf/intel_perf_setup.h +++ b/src/intel/perf/intel_perf_setup.h @@ -40,7 +40,7 @@ intel_query_alloc(struct intel_perf_config *perf, int ncounters) query->counters = rzalloc_array(query, struct intel_perf_query_counter, ncounters); /* Accumulation buffer offsets... */ - if (perf->devinfo.verx10 <= 75) { + if (perf->devinfo->verx10 <= 75) { query->oa_format = I915_OA_FORMAT_A45_B8_C8; query->gpu_time_offset = 0; query->a_offset = query->gpu_time_offset + 1; @@ -48,7 +48,7 @@ intel_query_alloc(struct intel_perf_config *perf, int ncounters) query->c_offset = query->b_offset + 8; query->perfcnt_offset = query->c_offset + 8; query->rpstat_offset = query->perfcnt_offset + 2; - } else if (perf->devinfo.verx10 <= 120) { + } else if (perf->devinfo->verx10 <= 120) { query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8; query->gpu_time_offset = 0; query->gpu_clock_offset = query->gpu_time_offset + 1;