mesa/src/util/perf/u_trace_gather.py
Danylo Piliaiev 1e6af961ec util/u_trace: Add scripts for perf analysis based on u_trace results
The intent is to provide an easy way to measure the impact of an
optimization, not by measuring the whole workload completion time
but also by measuring certain chunks of the workload like command
buffers, renderpasses, or even separate draws.

A moderate perf win in a rare case may not translate into statistically
signifacant overall result. An optimization also may hurt perf in some
cases and help in other which is also hard to judge from overall perf.

For best results pin cpu/gpu frequencies and disable gpu suspend.
Exclude all unnecessary tracepoints via TU_GPU_TRACEPOINT.

Usage:
   u_trace_gather.py gather_all \
    --loops 1 --launcher "renderdoccmd replay --loops 12" \
    --traces-list /path/to/traces.txt \
    --traces-dir /path/to/dir/with/traces/ \
    --results /path/to/results/ \
    --alias new-shiny-opt

   u_trace_compare.py compare \
    --results /path/to/results/ \
    --loops-merged true \
    --alias-a default \
    --alias-b new-shiny-opt \
    --event-start start_render_pass \
    --event-end end_render_pass \
    --filter "int(params['drawCount']) > 10"

   u_trace_compare.py details \
    --results /path/to/results/ \
    --trace-name test.rdc \
    --alias default \
    --event-start start_render_pass \
    --event-end end_render_pass

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16914>
2025-07-22 21:15:51 +00:00

168 lines
5 KiB
Python

#!/usr/bin/python3
#
# Copyright © 2024 Igalia S.L.
# SPDX-License-Identifier: MIT
# Usage:
# u_trace_gather.py gather_all \
# --loops 1 --launcher "renderdoccmd replay --loops 12" \
# --traces-list /path/to/traces.txt \
# --traces-dir /path/to/dir/with/traces/ \
# --results /path/to/results/ \
# --alias new-shiny-opt
#
# Where traces.txt:
# trace1.rdc
# ; trace2.rdc // Disabled
# trace3.rdc
from dataclasses import dataclass
from enum import Enum
import os
import argparse
import re
import subprocess
from types import SimpleNamespace
class RunResultStatus(Enum):
SUCCESS = 0,
FAILURE = 1,
RETRY = 2
@dataclass
class RunResult:
status: RunResultStatus
description: str
class DmesgFailureFinder():
DMESG_COMMAND = ['dmesg', '--level', 'emerg,alert,crit,err,warn,notice']
def __init__(self, regex) -> None:
self.regex = regex
self._new_messages = []
self._last_message = None
self.update_dmesg()
def get_workload_result(self):
self.update_dmesg()
if self._new_messages:
for line in self._new_messages:
if self.regex.search(line):
return RunResult(RunResultStatus.FAILURE, line)
return RunResult(RunResultStatus.SUCCESS, "")
def update_dmesg(self):
dmesg = subprocess.check_output(self.DMESG_COMMAND).decode('utf-8')
dmesg = dmesg.strip().splitlines()
l = 0
for index, item in enumerate(reversed(dmesg)):
if item == self._last_message:
l = len(dmesg) - index # don't include the matched element
break
self._new_messages = dmesg[l:]
# Attempt to store the last element of dmesg, unless there was no dmesg
self._last_message = dmesg[-1] if dmesg else None
pass
def gather(args) -> None:
results_dir = f"{args.results}/{args.name}/"
os.makedirs(results_dir, exist_ok=True)
failure_finder = DmesgFailureFinder(re.compile("gpu fault"))
for loop in range(args.loops):
print(f"Start of loop {loop}")
output_file = f"{results_dir}/trace_{args.name}_{args.alias}_{loop}.csv"
output_log = f"{results_dir}/{args.name}_{args.alias}_{loop}.log"
env = os.environ.copy()
env["MESA_VK_ABORT_ON_DEVICE_LOSS"] = "1"
env["MESA_GPU_TRACEFILE"] = output_file
env["MESA_GPU_TRACES"] = "print_csv"
print(f"{args.command}")
with open(output_log, 'w') as file:
ret = subprocess.run(
args.command,
stdout=file,
stderr=subprocess.STDOUT,
stdin=subprocess.PIPE,
env=env,
shell=False,
)
result = failure_finder.get_workload_result()
if ret.returncode != 0:
print(f"\tCommand exited with code {ret}")
if result.status != RunResultStatus.SUCCESS:
print(f"GPU failure: \"{result.description}\"")
if ret.returncode != 0 or result.status != RunResultStatus.SUCCESS:
try:
os.remove(output_file)
except OSError:
pass
break
def gather_all(args) -> None:
trace_files = []
with open(args.traces_list) as file:
for line in file:
if len(line) > 0 and line[0].isalnum():
trace_files.append(line.rstrip())
for i, trace_file in enumerate(trace_files):
full_path = f"{args.traces_dir}/{trace_file}"
print(f"Evaluating [{i + 1}/{len(trace_files)}] {trace_file} (\"{full_path}\")")
gather_args = SimpleNamespace()
gather_args.loops = args.loops
gather_args.command = f"{args.launcher} {full_path}".split(" ")
gather_args.results = args.results
gather_args.name = trace_file
gather_args.alias = args.alias
gather(gather_args)
print("Done.")
def main() -> None:
parser = argparse.ArgumentParser()
sub = parser.add_subparsers()
gather_all_args = sub.add_parser('gather_all', help='')
gather_all_args.add_argument('--loops', type=int, required=True,
help="How many times the command would be repeated in order to average the results.")
gather_all_args.add_argument('--traces-list', type=str, required=True, help="File with a list of trace files.")
gather_all_args.add_argument('--traces-dir', type=str, required=True,
help="Path to the directory with trace files.")
gather_all_args.add_argument('--results', type=str, required=True, help="Folder where to write results.")
gather_all_args.add_argument('--alias', type=str, required=True, help="Alias for the change being tested.")
gather_all_args.add_argument('--launcher', type=str, required=True,
help="Launcher that accepts trace file as an argument.")
gather_all_args.set_defaults(func=gather_all)
args = parser.parse_args()
args.func(args)
if __name__ == "__main__":
main()