pan/perf: Generate derived counter code

For the derived counters generate functions that read the required
hardware counters, then compute and return the result.
The computations use doubles like in libGPUCounters. It also performs all
computations using floating point, we want to match the output of other
tools using that library.
The equation implementations are deduplicated as there are counters
which have changed their equation over time but not every generation.
This commit is contained in:
Christoph Pillmayer 2026-04-17 11:48:33 +02:00
parent 54d1a512a8
commit 3b6b25c7d6

View file

@ -5,9 +5,14 @@ import argparse
import textwrap
import os
import datetime
import re
from dataclasses import dataclass
from typing import ClassVar
import xml.etree.ElementTree as et
TAB_SIZE = 3
class SourceFile:
def __init__(self, filename):
@ -28,6 +33,7 @@ class SourceFile:
CATEGORY_IDX_REMAP = {
"GPU Front-end": "PAN_PERF_COUNTER_CAT_FRONTEND",
"Job Manager": "PAN_PERF_COUNTER_CAT_FRONTEND",
"CSF": "PAN_PERF_COUNTER_CAT_FRONTEND",
"Tiler": "PAN_PERF_COUNTER_CAT_TILER",
@ -46,8 +52,14 @@ class Counter:
self.name = self.xml.get("name")
self.desc = self.xml.get("description")
self.units = self.xml.get("units")
self.offset = int(self.xml.get("offset"))
self.underscore_name = self.xml.get("counter").lower()
self.equation = self.xml.get("equation")
self.offset = int(self.xml.get("offset") or 0)
self.underscore_name = (self.xml.get("counter") or "").lower()
self.source_name = self.xml.get("counter") or ""
self.equation_impl = None
if self.units.endswith("/second"):
self.units = self.units.replace("/second", "_per_second")
class Category:
@ -71,6 +83,7 @@ class Product:
self.filename = filename
self.xml = et.parse(self.filename)
self.name = self.xml.getroot().get('id')
assert(self.name is not None)
self.id = self.name.lower()
self.categories = []
@ -78,6 +91,114 @@ class Product:
self.categories.append(Category(self, xml_cat))
@dataclass
class EquationImpl:
fname: str
body: str
counter: Counter
version: int = -1
impls: ClassVar[dict[str, dict[str, 'EquationImpl']]] = {}
"""We don't want duplicate methods wasting space, this makes sure there is
only one implementation for each variant of counter hardware locations.
"""
@classmethod
def get(cls, counter, all_counters):
body = cls.generate_body(counter, all_counters)
if counter.name not in cls.impls:
cls.impls[counter.name] = {}
bucket = cls.impls[counter.name]
if body not in bucket:
fname = f"compute_{counter.name.lower()}"
eq = EquationImpl(fname, body, counter)
eq.version = len(bucket.keys())
bucket[body] = eq
return bucket[body]
@staticmethod
def generate_body(counter, counters):
eq = counter.equation
vals = dict()
for c in sorted(counters, key=lambda x: len(x.source_name), reverse=True):
if c.source_name == "" or c.source_name not in eq:
continue
idx = len(vals)
cat_enum = CATEGORY_IDX_REMAP[c.category.name]
# MaliAnyUtil for example is from "Shader Core" but it reads GPU_ACTIVE
# which is from "Front-end". We can not use the block index from the
# shader core when reading a front-end counter.
# If reading from another block for the equation the only block index that
# makes sense is 0 because if the category had more than one block we
# could not know which one to choose.
from_block = 'block' if c.category.name == counter.category.name else '0'
r = f"const double v{idx} = pan_perf_counter_read_raw(perf, {cat_enum}, {c.offset}, {from_block});"
vals[c.source_name] = (idx, r)
eq = eq.replace(c.source_name, f"v{idx}")
for match in re.finditer(r"(MALI_CONFIG[a-zA-Z0-9_]+)($|[^a-zA-Z0-9_])", eq):
config = match.group(1)
idx = len(vals)
pan_config = config.replace("MALI", "PAN_PERF_DERIVED")
r = f"const double v{idx} = configs[{pan_config}];"
vals[config] = (idx, r)
eq = eq.replace(config, f"v{idx}")
defs = [r for _, r in vals.values()]
body = "\n".join(defs) + "\n"
body += f"return {eq};"
return body
@property
def versioned_name(self):
assert (self.version != -1 and "should not emit non versioned")
return self.fname + f"_v{self.version}"
@property
def decl(self):
decl = "double " + self.versioned_name + \
"(const struct pan_perf *perf, const double *configs, uint8_t block)"
return decl
def generate_equations(prods, c):
for prod in prods:
all_raw_counters = []
for cat in prod.categories:
for counter in cat.counters:
if counter.source_name:
all_raw_counters.append(counter)
for cat in prod.categories:
for counter in cat.counters:
if not counter.equation:
continue
eq = EquationImpl.get(counter, all_raw_counters)
counter.equation_impl = eq
for impls in EquationImpl.impls.values():
for impl in impls.values():
c.write("static " + impl.decl + "{")
c.indent(TAB_SIZE)
c.write(impl.body)
c.outdent(TAB_SIZE)
c.write("}\n")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--header", help="Header file to write", required=True)
@ -93,7 +214,7 @@ def main():
for xml_file in args.xml_files:
prods.append(Product(xml_file))
tab_size = 3
tab_size = TAB_SIZE
copyright = textwrap.dedent("""\
/* Autogenerated file, DO NOT EDIT manually! generated by {}
@ -121,6 +242,79 @@ def main():
#include <util/macros.h>
"""))
c.write(textwrap.dedent("""
static inline int max2(int a, int b) {
return MAX2(a, b);
}
static inline int max3(int a, int b, int c) {
return max2(max2(a, b), c);
}
static inline int max4(int a, int b, int c, int d) {
return max2(max3(a, b, c), d);
}
static inline int max5(int a, int b, int c, int d, int e) {
return max2(max4(a, b, c, d), e);
}
static inline int max6(int a, int b, int c, int d, int e, int f) {
return max2(max5(a, b, c, d, e), f);
}
static inline int max7(int a, int b, int c, int d, int e, int f, int g) {
return max2(max6(a, b, c, d, e, f), g);
}
static inline int max8(int a, int b, int c, int d, int e, int f, int g, int h) {
return max2(max7(a, b, c, d, e, f, g), h);
}
static inline int max9(int a, int b, int c, int d, int e, int f, int g, int h, int i) {
return max2(max8(a, b, c, d, e, f, g, h), i);
}
static inline int min2(int a, int b) {
return MIN2(a, b);
}
static inline int min3(int a, int b, int c) {
return min2(min2(a, b), c);
}
static inline int min4(int a, int b, int c, int d) {
return min2(min3(a, b, c), d);
}
static inline int min5(int a, int b, int c, int d, int e) {
return min2(min4(a, b, c, d), e);
}
static inline int min6(int a, int b, int c, int d, int e, int f) {
return min2(min5(a, b, c, d, e), f);
}
static inline int min7(int a, int b, int c, int d, int e, int f, int g) {
return min2(min6(a, b, c, d, e, f), g);
}
static inline int min8(int a, int b, int c, int d, int e, int f, int g, int h) {
return min2(min7(a, b, c, d, e, f, g), h);
}
static inline int min9(int a, int b, int c, int d, int e, int f, int g, int h, int i) {
return min2(min8(a, b, c, d, e, f, g, h), i);
}
#define GET_MACRO(_1,_2,_3,_4,_5,_6,_7,_8,_9,name,...) name
#define min(...) GET_MACRO(__VA_ARGS__, min9, min8, min7, min6, min5, min4, min3, min2)(__VA_ARGS__)
#define max(...) GET_MACRO(__VA_ARGS__, max9, max8, max7, max6, max5, max4, max3, max2)(__VA_ARGS__)
"""))
generate_equations(prods, c)
for prod in prods:
c.write(textwrap.dedent("""
static void UNUSED
@ -176,6 +370,10 @@ def main():
c.write(".units = PAN_PERF_COUNTER_UNITS_%s," % (counter.units.upper()))
c.write(".offset = %u," % (counter.offset))
c.write(".category = %s," % CATEGORY_IDX_REMAP[category.name])
if counter.equation:
c.write(f".derived = {counter.equation_impl.versioned_name},")
else:
c.write(".derived = NULL,")
c.outdent(tab_size)
c.write("}, // counter")