diff --git a/src/intel/ds/intel_pps_driver.cc b/src/intel/ds/intel_pps_driver.cc index b13377d161b..ee81200d98b 100644 --- a/src/intel/ds/intel_pps_driver.cc +++ b/src/intel/ds/intel_pps_driver.cc @@ -8,6 +8,8 @@ #include "intel_pps_driver.h" +#include +#include #include #include #include @@ -384,4 +386,59 @@ bool IntelDriver::cpu_gpu_timestamp(uint64_t &cpu_timestamp, return true; } +void IntelDriver::classify_counter_groups( + const std::string &cname_in, + const std::function &add_group) const +{ + // Normalise to upper-case so the heuristics work for both Mixed-case + // (e.g. "VsThreads", "PostPsDepthTestFails") and UPPER_SNAKE_CASE + // (e.g. "CLIPPER_INPUT_VERTEX") counter names used across Intel OA XMLs. + std::string cname; + cname.reserve(cname_in.size()); + for (char c : cname_in) + cname.push_back(static_cast(std::toupper(static_cast(c)))); + + auto starts_with = [](const std::string &s, const char *prefix) { + return s.compare(0, strlen(prefix), prefix) == 0; + }; + auto ends_with = [](const std::string &s, const char *suffix) { + size_t slen = strlen(suffix); + return s.size() >= slen && s.compare(s.size() - slen, slen, suffix) == 0; + }; + auto contains = [](const std::string &s, const char *needle) { + return s.find(needle) != std::string::npos; + }; + + if (starts_with(cname, "VS") || starts_with(cname, "VERTEX") || + contains(cname, "_VERTEX") || ends_with(cname, "_VS")) { + add_group(CounterGroupType::VERTICES); + } + if (starts_with(cname, "PS") || starts_with(cname, "PIXEL") || + starts_with(cname, "POSTPS") || starts_with(cname, "RASTERIZER") || + starts_with(cname, "SHADERPIX") || starts_with(cname, "SHADER_PIX") || + starts_with(cname, "SAMPLES") || ends_with(cname, "_PS")) { + add_group(CounterGroupType::FRAGMENTS); + } + if (starts_with(cname, "CS") || starts_with(cname, "ASYNCCS") || + starts_with(cname, "ASYNC_GPGPU") || starts_with(cname, "GPGPU")) { + add_group(CounterGroupType::COMPUTE); + } + if (starts_with(cname, "GTI") || starts_with(cname, "L3") || + starts_with(cname, "SLM") || starts_with(cname, "SHADERMEMORY") || + starts_with(cname, "LOAD_STORE") || starts_with(cname, "LOADSTORE") || + starts_with(cname, "COLOR_L3") || starts_with(cname, "GPU_MEMORY") || + starts_with(cname, "SAMPLER_")) { + add_group(CounterGroupType::MEMORY); + } + if (starts_with(cname, "CLIPPER") || starts_with(cname, "SO") || + starts_with(cname, "IA_") || starts_with(cname, "STREAMOUT") || + starts_with(cname, "STRIPSFAN") || contains(cname, "PRIMITIVE")) { + add_group(CounterGroupType::PRIMITIVES); + } + if (starts_with(cname, "RT_") || contains(cname, "RAYTRACE") || + contains(cname, "_RT_")) { + add_group(CounterGroupType::RAY_TRACING); + } +} + } // namespace pps diff --git a/src/intel/ds/intel_pps_driver.h b/src/intel/ds/intel_pps_driver.h index 0daabd41682..2ed57ebe79e 100644 --- a/src/intel/ds/intel_pps_driver.h +++ b/src/intel/ds/intel_pps_driver.h @@ -52,6 +52,10 @@ class IntelDriver : public Driver bool cpu_gpu_timestamp(uint64_t &cpu_timestamp, uint64_t &gpu_timestamp) const override; + void classify_counter_groups( + const std::string &counter_name, + const std::function &add_group) const override; + private: /// @brief Requests the next perf sample /// @return The sample GPU timestamp diff --git a/src/tool/pps/pps_datasource.cc b/src/tool/pps/pps_datasource.cc index 168eda79c54..c397b602936 100644 --- a/src/tool/pps/pps_datasource.cc +++ b/src/tool/pps/pps_datasource.cc @@ -187,6 +187,32 @@ template void add_descriptors(GpuCounterDescript } spec->add_numerator_units(units); spec->set_select_by_default(true); + + // Classify counter into Perfetto counter groups via driver's virtual method. + // Each driver maps its own counter naming conventions; unrecognised counters + // are left without a group (which is valid). + driver.classify_counter_groups(counter.name, [&](CounterGroupType group) { + switch (group) { + case CounterGroupType::VERTICES: + spec->add_groups(GpuCounterDescriptor::VERTICES); + break; + case CounterGroupType::FRAGMENTS: + spec->add_groups(GpuCounterDescriptor::FRAGMENTS); + break; + case CounterGroupType::COMPUTE: + spec->add_groups(GpuCounterDescriptor::COMPUTE); + break; + case CounterGroupType::MEMORY: + spec->add_groups(GpuCounterDescriptor::MEMORY); + break; + case CounterGroupType::PRIMITIVES: + spec->add_groups(GpuCounterDescriptor::PRIMITIVES); + break; + case CounterGroupType::RAY_TRACING: + spec->add_groups(GpuCounterDescriptor::RAY_TRACING); + break; + } + }); } } diff --git a/src/tool/pps/pps_driver.h b/src/tool/pps/pps_driver.h index b46c0f58bf6..c79e34ec4aa 100644 --- a/src/tool/pps/pps_driver.h +++ b/src/tool/pps/pps_driver.h @@ -9,6 +9,7 @@ #pragma once +#include #include #include #include @@ -19,6 +20,18 @@ namespace pps { +/// @brief Perfetto-compatible counter group categories for classifying GPU counters. +/// These mirror the values in perfetto::protos::pbzero::GpuCounterDescriptor::CounterGroup +/// so that driver implementations do not need to include Perfetto headers. +enum class CounterGroupType { + VERTICES, + FRAGMENTS, + COMPUTE, + MEMORY, + PRIMITIVES, + RAY_TRACING, +}; + /// @brief Abstract Driver class class Driver { @@ -90,6 +103,14 @@ class Driver /// than sampling separately CPU & GPU timestamps. virtual bool cpu_gpu_timestamp(uint64_t &cpu_timestamp, uint64_t &gpu_timestamp) const = 0; + /// @brief Classify a counter name into Perfetto counter groups. + /// Drivers should call add_group(CounterGroupType::XYZ) for each group + /// that applies to the given counter name. + /// The default implementation does nothing (counters are left unclassified). + virtual void classify_counter_groups( + const std::string & /*counter_name*/, + const std::function & /*add_group*/) const {} + DrmDevice drm_device; /// List of counter groups