From 51d96c9f751890f3970f2e332a90b6bba133184e Mon Sep 17 00:00:00 2001 From: "Nemallapudi, Jaikrishna" Date: Thu, 7 May 2026 09:34:58 +0000 Subject: [PATCH 1/2] pps: classify counters into Perfetto counter groups Perfetto's GpuCounterDescriptor exposes a per-counter "groups" field that classifies a counter into well-known buckets (VERTICES, FRAGMENTS, COMPUTE, MEMORY, PRIMITIVES, RAY_TRACING). Tools such as Android CTS' CtsGpuProfilingDataTest#testProfilingDataProducersAvailable iterate the counter specs and union their group lists to determine the set of "supported" GPU counter groups. Until now PPS never populated this field, so all Mesa-backed producers exposed counters with an empty groups list. Add a virtual Driver::classify_counter_groups() hook so each driver can map its own counter naming conventions onto the Perfetto categories without having to include Perfetto headers, then call it from add_descriptors() to emit the per-spec groups field. A neutral CounterGroupType enum is introduced in pps_driver.h and mirrors the values of perfetto::protos::pbzero::GpuCounterDescriptor:: CounterGroup so driver implementations stay decoupled from the Perfetto SDK headers. The default implementation does nothing, leaving counters unclassified for drivers that have not opted in yet. Signed-off-by: Nemallapudi, Jaikrishna --- src/tool/pps/pps_datasource.cc | 26 ++++++++++++++++++++++++++ src/tool/pps/pps_driver.h | 21 +++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/src/tool/pps/pps_datasource.cc b/src/tool/pps/pps_datasource.cc index 168eda79c54..c397b602936 100644 --- a/src/tool/pps/pps_datasource.cc +++ b/src/tool/pps/pps_datasource.cc @@ -187,6 +187,32 @@ template void add_descriptors(GpuCounterDescript } spec->add_numerator_units(units); spec->set_select_by_default(true); + + // Classify counter into Perfetto counter groups via driver's virtual method. + // Each driver maps its own counter naming conventions; unrecognised counters + // are left without a group (which is valid). + driver.classify_counter_groups(counter.name, [&](CounterGroupType group) { + switch (group) { + case CounterGroupType::VERTICES: + spec->add_groups(GpuCounterDescriptor::VERTICES); + break; + case CounterGroupType::FRAGMENTS: + spec->add_groups(GpuCounterDescriptor::FRAGMENTS); + break; + case CounterGroupType::COMPUTE: + spec->add_groups(GpuCounterDescriptor::COMPUTE); + break; + case CounterGroupType::MEMORY: + spec->add_groups(GpuCounterDescriptor::MEMORY); + break; + case CounterGroupType::PRIMITIVES: + spec->add_groups(GpuCounterDescriptor::PRIMITIVES); + break; + case CounterGroupType::RAY_TRACING: + spec->add_groups(GpuCounterDescriptor::RAY_TRACING); + break; + } + }); } } diff --git a/src/tool/pps/pps_driver.h b/src/tool/pps/pps_driver.h index b46c0f58bf6..c79e34ec4aa 100644 --- a/src/tool/pps/pps_driver.h +++ b/src/tool/pps/pps_driver.h @@ -9,6 +9,7 @@ #pragma once +#include #include #include #include @@ -19,6 +20,18 @@ namespace pps { +/// @brief Perfetto-compatible counter group categories for classifying GPU counters. +/// These mirror the values in perfetto::protos::pbzero::GpuCounterDescriptor::CounterGroup +/// so that driver implementations do not need to include Perfetto headers. +enum class CounterGroupType { + VERTICES, + FRAGMENTS, + COMPUTE, + MEMORY, + PRIMITIVES, + RAY_TRACING, +}; + /// @brief Abstract Driver class class Driver { @@ -90,6 +103,14 @@ class Driver /// than sampling separately CPU & GPU timestamps. virtual bool cpu_gpu_timestamp(uint64_t &cpu_timestamp, uint64_t &gpu_timestamp) const = 0; + /// @brief Classify a counter name into Perfetto counter groups. + /// Drivers should call add_group(CounterGroupType::XYZ) for each group + /// that applies to the given counter name. + /// The default implementation does nothing (counters are left unclassified). + virtual void classify_counter_groups( + const std::string & /*counter_name*/, + const std::function & /*add_group*/) const {} + DrmDevice drm_device; /// List of counter groups From 3ecdfda2b19a6b86f8c8d5a306dd4ce3befb9c6a Mon Sep 17 00:00:00 2001 From: "Nemallapudi, Jaikrishna" Date: Thu, 7 May 2026 09:35:31 +0000 Subject: [PATCH 2/2] intel/pps: classify OA counters into Perfetto counter groups Implement Driver::classify_counter_groups() for the Intel PPS driver so that Perfetto's GpuCounterDescriptor exposes a non-empty 'groups' field for every Intel OA counter. The classifier matches Intel OA counter names against well-known prefixes / suffixes / substrings to assign each counter to one or more Perfetto buckets: VERTICES starts with VS, VERTEX; contains _VERTEX; ends with _VS FRAGMENTS starts with PS, PIXEL, POSTPS, RASTERIZER, SHADER_PIX, SAMPLES; ends with _PS COMPUTE starts with CS, ASYNCCS, ASYNC_GPGPU, GPGPU MEMORY starts with GTI, L3, SLM, SHADERMEMORY, LOAD_STORE, COLOR_L3, GPU_MEMORY, SAMPLER_ PRIMITIVES starts with CLIPPER, SO, IA_, STREAMOUT, STRIPSFAN; contains PRIMITIVE RAY_TRACING starts with RT_; contains RAYTRACE or _RT_ A counter may belong to several buckets at once -- for example CLIPPER_INPUT_VERTEX is both PRIMITIVES and VERTICES -- which is allowed by the repeated GpuCounterSpec.groups field. This fixes the Android CTS test CtsGpuProfilingDataTest#testProfilingDataProducersAvailable which previously failed on Intel platforms with: Required counter groups missing. Found: [] Required: [COMPUTE, FRAGMENTS, MEMORY, PRIMITIVES, VERTICES, RAY_TRACING] Signed-off-by: Nemallapudi, Jaikrishna --- src/intel/ds/intel_pps_driver.cc | 57 ++++++++++++++++++++++++++++++++ src/intel/ds/intel_pps_driver.h | 4 +++ 2 files changed, 61 insertions(+) diff --git a/src/intel/ds/intel_pps_driver.cc b/src/intel/ds/intel_pps_driver.cc index b13377d161b..ee81200d98b 100644 --- a/src/intel/ds/intel_pps_driver.cc +++ b/src/intel/ds/intel_pps_driver.cc @@ -8,6 +8,8 @@ #include "intel_pps_driver.h" +#include +#include #include #include #include @@ -384,4 +386,59 @@ bool IntelDriver::cpu_gpu_timestamp(uint64_t &cpu_timestamp, return true; } +void IntelDriver::classify_counter_groups( + const std::string &cname_in, + const std::function &add_group) const +{ + // Normalise to upper-case so the heuristics work for both Mixed-case + // (e.g. "VsThreads", "PostPsDepthTestFails") and UPPER_SNAKE_CASE + // (e.g. "CLIPPER_INPUT_VERTEX") counter names used across Intel OA XMLs. + std::string cname; + cname.reserve(cname_in.size()); + for (char c : cname_in) + cname.push_back(static_cast(std::toupper(static_cast(c)))); + + auto starts_with = [](const std::string &s, const char *prefix) { + return s.compare(0, strlen(prefix), prefix) == 0; + }; + auto ends_with = [](const std::string &s, const char *suffix) { + size_t slen = strlen(suffix); + return s.size() >= slen && s.compare(s.size() - slen, slen, suffix) == 0; + }; + auto contains = [](const std::string &s, const char *needle) { + return s.find(needle) != std::string::npos; + }; + + if (starts_with(cname, "VS") || starts_with(cname, "VERTEX") || + contains(cname, "_VERTEX") || ends_with(cname, "_VS")) { + add_group(CounterGroupType::VERTICES); + } + if (starts_with(cname, "PS") || starts_with(cname, "PIXEL") || + starts_with(cname, "POSTPS") || starts_with(cname, "RASTERIZER") || + starts_with(cname, "SHADERPIX") || starts_with(cname, "SHADER_PIX") || + starts_with(cname, "SAMPLES") || ends_with(cname, "_PS")) { + add_group(CounterGroupType::FRAGMENTS); + } + if (starts_with(cname, "CS") || starts_with(cname, "ASYNCCS") || + starts_with(cname, "ASYNC_GPGPU") || starts_with(cname, "GPGPU")) { + add_group(CounterGroupType::COMPUTE); + } + if (starts_with(cname, "GTI") || starts_with(cname, "L3") || + starts_with(cname, "SLM") || starts_with(cname, "SHADERMEMORY") || + starts_with(cname, "LOAD_STORE") || starts_with(cname, "LOADSTORE") || + starts_with(cname, "COLOR_L3") || starts_with(cname, "GPU_MEMORY") || + starts_with(cname, "SAMPLER_")) { + add_group(CounterGroupType::MEMORY); + } + if (starts_with(cname, "CLIPPER") || starts_with(cname, "SO") || + starts_with(cname, "IA_") || starts_with(cname, "STREAMOUT") || + starts_with(cname, "STRIPSFAN") || contains(cname, "PRIMITIVE")) { + add_group(CounterGroupType::PRIMITIVES); + } + if (starts_with(cname, "RT_") || contains(cname, "RAYTRACE") || + contains(cname, "_RT_")) { + add_group(CounterGroupType::RAY_TRACING); + } +} + } // namespace pps diff --git a/src/intel/ds/intel_pps_driver.h b/src/intel/ds/intel_pps_driver.h index 0daabd41682..2ed57ebe79e 100644 --- a/src/intel/ds/intel_pps_driver.h +++ b/src/intel/ds/intel_pps_driver.h @@ -52,6 +52,10 @@ class IntelDriver : public Driver bool cpu_gpu_timestamp(uint64_t &cpu_timestamp, uint64_t &gpu_timestamp) const override; + void classify_counter_groups( + const std::string &counter_name, + const std::function &add_group) const override; + private: /// @brief Requests the next perf sample /// @return The sample GPU timestamp