intel/perf: use a function to do common allocations

This cuts the compile time down for this file on my ryzen from
real	1m4.077s
to
real	0m30.827s

Reviewed-by: Emma Anholt <emma@anholt.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14630>
This commit is contained in:
Dave Airlie 2022-01-20 11:36:16 +10:00 committed by Marge Bot
parent 521ede8451
commit acc2d08cf9

View file

@ -691,7 +691,48 @@ def main():
#define MIN(a, b) ((a < b) ? (a) : (b))
#define MAX(a, b) ((a > b) ? (a) : (b))
static struct intel_perf_query_info *
intel_query_alloc(struct intel_perf_config *perf, int ncounters)
{
struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);
query->perf = perf;
query->kind = INTEL_PERF_QUERY_TYPE_OA;
query->n_counters = 0;
query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
query->counters = rzalloc_array(query, struct intel_perf_query_counter, ncounters);
return query;
}
static struct intel_perf_query_info *
hsw_query_alloc(struct intel_perf_config *perf, int ncounters)
{
struct intel_perf_query_info *query = intel_query_alloc(perf, ncounters);
query->oa_format = I915_OA_FORMAT_A45_B8_C8;
/* Accumulation buffer offsets... */
query->gpu_time_offset = 0;
query->a_offset = query->gpu_time_offset + 1;
query->b_offset = query->a_offset + 45;
query->c_offset = query->b_offset + 8;
query->perfcnt_offset = query->c_offset + 8;
query->rpstat_offset = query->perfcnt_offset + 2;
return query;
}
static struct intel_perf_query_info *
bdw_query_alloc(struct intel_perf_config *perf, int ncounters)
{
struct intel_perf_query_info *query = intel_query_alloc(perf, ncounters);
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
/* Accumulation buffer offsets... */
query->gpu_time_offset = 0;
query->gpu_clock_offset = query->gpu_time_offset + 1;
query->a_offset = query->gpu_clock_offset + 1;
query->b_offset = query->a_offset + 36;
query->c_offset = query->b_offset + 8;
query->perfcnt_offset = query->c_offset + 8;
query->rpstat_offset = query->perfcnt_offset + 2;
return query;
}
"""))
# Print out all equation functions.
@ -713,43 +754,15 @@ def main():
c("{\n")
c_indent(3)
c("struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);\n")
if gen.chipset == "hsw":
c("struct intel_perf_query_info *query = hsw_query_alloc(perf, %u);\n" % len(counters))
else:
c("struct intel_perf_query_info *query = bdw_query_alloc(perf, %u);\n" % len(counters))
c("\n")
c("query->perf = perf;\n")
c("query->kind = INTEL_PERF_QUERY_TYPE_OA;\n")
c("query->name = \"" + set.name + "\";\n")
c("query->symbol_name = \"" + set.symbol_name + "\";\n")
c("query->guid = \"" + set.hw_config_guid + "\";\n")
c("query->counters = rzalloc_array(query, struct intel_perf_query_counter, %u);" % len(counters))
c("query->n_counters = 0;")
c("query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */")
if gen.chipset == "hsw":
c(textwrap.dedent("""\
query->oa_format = I915_OA_FORMAT_A45_B8_C8;
/* Accumulation buffer offsets... */
query->gpu_time_offset = 0;
query->a_offset = query->gpu_time_offset + 1;
query->b_offset = query->a_offset + 45;
query->c_offset = query->b_offset + 8;
query->perfcnt_offset = query->c_offset + 8;
query->rpstat_offset = query->perfcnt_offset + 2;
"""))
else:
c(textwrap.dedent("""\
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
/* Accumulation buffer offsets... */
query->gpu_time_offset = 0;
query->gpu_clock_offset = query->gpu_time_offset + 1;
query->a_offset = query->gpu_clock_offset + 1;
query->b_offset = query->a_offset + 36;
query->c_offset = query->b_offset + 8;
query->perfcnt_offset = query->c_offset + 8;
query->rpstat_offset = query->perfcnt_offset + 2;
"""))
c("\n")
c("struct intel_perf_query_counter *counter = query->counters;\n")