intel/tools: create intel_monitor for sampling eu stalls

Created stand alone tool for sampling gfx data on regular
intervals. Tool has inner loop that performs sampling every N
useconds. Press any key to end sampling. Results will be dumped
when intel_monitor exits.

First application of intel_monitor will be to collect eu stall
data. Perhaps more applications can be added at a later date.

How to use:
 0. Set sysctl dev.xe.observation_paranoid=0
 1. Clean shader cache and launch gfx INTEL_DEBUG=shaders-lineno.
    Redirect stderr to asm.txt.
 2. When gfx app ready to monitor, begin capturing eustall data by
    launching `intel_monitor -e > eustall.csv` in separate console.
 3  When done collected, close intel_monitor by pressing any key.
 4. Correlate eustall data in eustall.csv with shader instructions in
    asm.txt by matching instruction offsets. Use data to determine which
    instructions are stalling and why.

Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30142>
This commit is contained in:
Felix DeGrood 2024-07-09 02:11:41 +00:00 committed by Marge Bot
parent 2fcebf2f1e
commit 610ad8d378
5 changed files with 493 additions and 2 deletions

View file

@ -433,7 +433,6 @@ xe_perf_eustall_stream_record_size(int drm_fd)
int
xe_perf_eustall_stream_sample_rate(int drm_fd)
{
int sampling_rate;
struct drm_xe_query_eu_stall *eu_stall_data =
xe_device_query_alloc_fetch(drm_fd, DRM_XE_DEVICE_QUERY_EU_STALL, NULL);
if (!eu_stall_data)
@ -441,7 +440,9 @@ xe_perf_eustall_stream_sample_rate(int drm_fd)
assert(eu_stall_data->sampling_rates[0] > 0 &&
eu_stall_data->sampling_rates[0] < INT_MAX);
sampling_rate = (int)eu_stall_data->sampling_rates[0];
/* pick slowest rate to reduce chance of overflow */
int idx_slowest = eu_stall_data->num_sampling_rates - 1;
int sampling_rate = (int)eu_stall_data->sampling_rates[idx_slowest];
free(eu_stall_data);
return sampling_rate;
}

View file

@ -0,0 +1,234 @@
/*
* Copyright 2024 Intel Corporation
* SPDX-License-Identifier: MIT
*
* Purpose:
* Enables EU stall sampling available in Xe2+ Intel GPUs. Enables
* GPU sampling of EU stalls. Every N mircoseconds, program collects
* sampled data from GPU. Accumulated data dumped to stdout once
* intel_monitor is closed.
*/
#include <fcntl.h>
#include <getopt.h>
#include <termios.h>
#include <xf86drm.h>
#include "intel_monitor_eustall.h"
#include "dev/intel_device_info.h"
static int
get_drm_device(struct intel_device_info *devinfo)
{
drmDevicePtr devices[8];
int max_devices = drmGetDevices2(0, devices, 8);
int i, fd = -1;
for (i = 0; i < max_devices; i++) {
if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
devices[i]->bustype == DRM_BUS_PCI &&
devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
if (fd < 0)
continue;
if (!intel_get_device_info_from_fd(fd, devinfo, -1, -1)) {
close(fd);
fd = -1;
continue;
}
/* Found a device! */
break;
}
}
return fd;
}
/* Keyboard hit function
*
* Return true if keypress detected.
*/
static bool
kbhit()
{
int ch, oldf;
struct termios oldt, newt;
tcgetattr(STDIN_FILENO, &oldt);
newt = oldt;
newt.c_lflag &= ~(ICANON | ECHO);
tcsetattr(STDIN_FILENO, TCSANOW, &newt);
oldf = fcntl(STDIN_FILENO, F_GETFL, 0);
fcntl(STDIN_FILENO, F_SETFL, oldf | O_NONBLOCK);
ch = getchar();
tcsetattr(STDIN_FILENO, TCSANOW, &oldt);
fcntl(STDIN_FILENO, F_SETFL, oldf);
if (ch != EOF) {
ungetc(ch, stdin);
return true;
}
return false;
}
static void
print_help(const char *progname, FILE *file)
{
fprintf(file,
"\n"
"Usage: %s [OPTION]...\n"
"Sample and collate GPU metrics across system. Studies workloads run in parallel.\n"
" -h, --help display this help\n"
" -e, --eustall sample eu stalls. Supported Xe2+.\n"
" -f, --file name of output file. DEFAULT=stdout\n"
" -t, --sample_time period of sampling, in microseconds. DEFAULT=1000\n"
"\n"
"Eu stall sample usage:\n"
" 0. Run `sudo sysctl dev.xe.observation_paranoid=0`\n"
" 1. Launch gfx app with INTEL_DEBUG=shaders-lineno. Redirect stderr to asm.txt.\n"
" 2. When gfx app ready to monitor, begin capturing eustall data by launching\n"
" `intel_monitor -e > eustall.csv` in separate console.\n"
" 3. When enough data has been collected, close intel_monitor by pressing any key.\n"
" 4. Correlate eustall data in eustall.csv with shader instructions in asm.txt by\n"
" matching instruction offsets. Use data to determine which instructions are\n"
" stalling and why.\n"
"\n"
"Eu stall defintions:\n"
"tdr_count - Number of cycles EU stalled, with at least one thread waiting\n"
" on Pixel Shader dependency. Multiple stall reasons can\n"
" qualify during the same cycle.\n"
"other_count - Number of cycles EU stalled, with at least one thread waiting\n"
" on any other dependency (Flag/EoT etc). Multiple stall reasons\n"
" can qualify during the same cycle.\n"
"control_count - Number of cycles EU stalled, with at least one thread waiting\n"
" for JEU to complete branch instruction. Multiple stall reasons\n"
" can qualify during the same cycle.\n"
"pipestall_count - Number of cycles EU stalled, with at least one thread ready to\n"
" be scheduled (Grf conf/send holds etc). Multiple stall reasons\n"
" can qualify during the same cycle.\n"
"send_count - Number of cycles EU stalled, with at least one thread waiting\n"
" for SEND message to be dispatched from EU. Multiple stall\n"
" reasons can qualify during the same cycle.\n"
"dist_acc_count - Number of cycles EU stalled, with at least one thread waiting\n"
" for ALU to write GRF/ACC register. Multiple stall reasons can\n"
" qualify during the same cycle.\n"
"sbid_count - Number of cycles EU stalled, with at least one thread waiting\n"
" for Scoreboard token to be available. Multiple stall reasons\n"
" can qualify during the same cycle.\n"
"sync_count - Number of cycles EU stalled, with at least one thread waiting\n"
" for Gateway to write Notify register. Multiple stall reasons\n"
" can qualify during the same cycle.\n"
"inst_fetch_count - Number of cycles EU stalled, with at least one thread waiting\n"
" for Instruction Fetch. Multiple stall reasons can qualify\n"
" during the same cycle.\n"
"active_count - Number of cycles no EU stalled.\n\n",
progname);
}
enum intel_monitor_sampling_mode
{
/* No sampling requested */
INTEL_MONITOR_MODE_NONE = 0,
/* Sample eu stalls */
INTEL_MONITOR_MODE_EUSTALL = 1
};
int
main(int argc, char *argv[])
{
int c, i;
FILE *fd_out = stdout;
uint64_t sample_period_us = 1000;
bool success = true;
void *cfg = NULL;
bool (*do_sample)(void*) = NULL;
void (*do_dump_results)(void*, FILE*) = NULL;
void (*do_close)(void*) = NULL;
enum intel_monitor_sampling_mode sample_mode = INTEL_MONITOR_MODE_NONE;
const struct option intel_monitor_opts[] = {
{ "help", no_argument, NULL, 'h' },
{ "eustall", no_argument, NULL, 'e' },
{ "file", required_argument, NULL, 'f' },
{ "sample_time", required_argument, NULL, 't' },
{ NULL, 0, NULL, 0 }
};
struct intel_device_info devinfo;
int drm_fd = get_drm_device(&devinfo);
if (drm_fd < 0) {
fprintf(stderr, "IMON: Error encountered while getting drm_device. err=%i\n",
drm_fd);
exit(EXIT_FAILURE);
}
/* Parse arguments */
while ((c = getopt_long(argc, argv, "hef:t:", intel_monitor_opts, &i)) != -1) {
switch (c) {
case 'h':
print_help(argv[0], stderr);
return EXIT_SUCCESS;
case 'e':
sample_mode = INTEL_MONITOR_MODE_EUSTALL;
break;
case 'f':
fd_out = fopen(optarg, "w");
if (!fd_out) {
fprintf(stderr, "IMON: Error opening output file '%s'\n", optarg);
exit(EXIT_FAILURE);
}
break;
case 't':
sample_period_us = atoi(optarg);
break;
default:
fprintf(stderr,
"IMON: Error. Unexpected parameter encountered '%c'.\n", c);
exit(EXIT_FAILURE);
}
}
/* Setup cfg based on selected sampling mode */
if (sample_mode == INTEL_MONITOR_MODE_EUSTALL) {
fprintf(stderr, "IMON: Setting up EU Stall Sampling\n");
if (devinfo.ver < 20) {
fprintf(stderr, "IMON: EU Stall Sampler supported on Xe2+ platforms.\n");
exit(EXIT_FAILURE);
}
cfg = eustall_setup(drm_fd, &devinfo);
do_sample = eustall_sample;
do_dump_results = eustall_dump_results;
do_close = eustall_close;
} else {
fprintf(stderr,
"IMON: Error. No sampling mode set. Modes supported: ['-e']\n");
exit(EXIT_FAILURE);
}
/* Main loop */
fprintf(stderr, "IMON: Collecting samples. <Press any key to stop>\n");
while (!kbhit()) {
if (!success)
exit(EXIT_FAILURE);
success = do_sample(cfg);
usleep(sample_period_us);
}
fprintf(stderr, "IMON: Dumping results\n");
do_dump_results(cfg, fd_out);
do_close(cfg);
fprintf(stderr, "IMON: done\n");
return EXIT_SUCCESS;
}

View file

@ -0,0 +1,216 @@
/*
* Copyright 2024 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "intel_monitor_eustall.h"
#include <poll.h>
#include "util/u_qsort.h"
#include "util/xxhash.h"
static bool
oa_stream_ready(int fd)
{
struct pollfd pfd;
pfd.fd = fd;
pfd.events = POLLIN;
pfd.revents = 0;
if (poll(&pfd, 1, 0) < 0) {
fprintf(stderr, "IMON: Error polling OA stream\n");
return false;
}
if (!(pfd.revents & POLLIN))
return false;
return true;
}
/* Initialize eustall_cfg and enable eu stall profiling by
* opening stream with KMD. Return eustall_cfg.
*/
struct eustall_config*
eustall_setup(int drm_fd, struct intel_device_info *devinfo)
{
struct eustall_config *eustall_cfg = malloc(sizeof(struct eustall_config));
eustall_cfg->min_event_count = 1, /* min records to trigger data flush */
eustall_cfg->drm_fd = drm_fd;
eustall_cfg->fd = -1;
eustall_cfg->devinfo = devinfo;
eustall_cfg->result.accumulator =
_mesa_hash_table_create(NULL,
_mesa_hash_u64,
_mesa_key_u64_equal);
/* Arbitrarily large buffer for copying stall data into. */
eustall_cfg->buf_len = 64 * 1024 * 1024;
eustall_cfg->buf = malloc(eustall_cfg->buf_len);
return eustall_cfg;
}
static bool
init_stream(struct eustall_config *eustall_cfg)
{
eustall_cfg->record_size =
intel_perf_eustall_stream_record_size(eustall_cfg->devinfo,
eustall_cfg->drm_fd);
if (eustall_cfg->record_size <= 0) {
fprintf(stderr, "IMON: ERROR encountered querying record size."
" err=%i\n", eustall_cfg->record_size);
return false;
}
eustall_cfg->sample_rate =
intel_perf_eustall_stream_sample_rate(eustall_cfg->devinfo,
eustall_cfg->drm_fd);
if (eustall_cfg->sample_rate <= 0) {
fprintf(stderr, "IMON: ERROR encountered querying sampling rate."
" err=%i\n", eustall_cfg->sample_rate);
return false;
}
eustall_cfg->fd =
intel_perf_eustall_stream_open(eustall_cfg->devinfo,
eustall_cfg->drm_fd,
eustall_cfg->sample_rate,
eustall_cfg->min_event_count);
if (eustall_cfg->fd < 0) {
fprintf(stderr, "IMON: ERROR encountered while opening "
"eustall stream. err=%i\n", eustall_cfg->fd);
return false;
}
fprintf(stderr, "IMON: intel_perf_eustall_stream_open = %i\n",
eustall_cfg->fd);
int err = intel_perf_eustall_stream_set_state(eustall_cfg->devinfo,
eustall_cfg->fd, true);
if (err != 0) {
fprintf(stderr, "IMON: ERROR encountered while enabling stream."
" err=%i\n", err);
return false;
}
return true;
}
/* Sample all eustall data via KMD stream. Open stream on first call.
*/
bool
eustall_sample(void *cfg)
{
struct eustall_config *eustall_cfg = cfg;
bool overflow;
if (eustall_cfg->fd < 0 || eustall_cfg->record_size <= 0)
return init_stream(eustall_cfg);
while (oa_stream_ready(eustall_cfg->fd)) {
int bytes_read =
intel_perf_eustall_stream_read_samples(eustall_cfg->devinfo,
eustall_cfg->fd,
eustall_cfg->buf,
eustall_cfg->buf_len,
&overflow);
if (bytes_read <= 0) {
if (bytes_read < 0)
fprintf(stderr, "IMON: read_samples returned err=%i\n", bytes_read);
break;
}
if (overflow)
fprintf(stderr, "IMON: detected EU stall sampling buffer overflow. "
"Some stall data was lost.\n");
intel_perf_eustall_accumulate_results(&eustall_cfg->result,
eustall_cfg->buf,
eustall_cfg->buf + bytes_read,
eustall_cfg->record_size);
}
return true;
}
static int
compare_ip_addr(const void *a, const void *b)
{
const uint64_t *val_a = a;
const uint64_t *val_b = b;
return (int)(*val_a - *val_b);
}
/* Write all previously collected results to fd. Clear results.
*/
void
eustall_dump_results(void *cfg, FILE *file)
{
struct eustall_config *eustall_cfg = cfg;
struct hash_table *accumulator = eustall_cfg->result.accumulator;
uint64_t *ip_addr_keys = malloc(accumulator->size * sizeof(uint64_t));
uint32_t num_entries = accumulator->entries;
uint32_t i = 0;
/* Sort keys so ip_addr appear in order */
hash_table_foreach(accumulator, entry) {
struct intel_perf_query_eustall_event* data =
(struct intel_perf_query_eustall_event*)entry->data;
ip_addr_keys[i++] = data->ip_addr;
}
qsort(ip_addr_keys, accumulator->entries, sizeof(uint64_t), compare_ip_addr);
fprintf(file, "offset,tdr_count,other_count,control_count,pipestall_count,"
"send_count,dist_acc_count,sbid_count,sync_count,"
"inst_fetch_count,active_count,sum\n");
for (i = 0; i < num_entries; i++) {
struct hash_entry *entry =
_mesa_hash_table_search(accumulator, ip_addr_keys + i);
struct intel_perf_query_eustall_event *data =
(struct intel_perf_query_eustall_event*)entry->data;
uint64_t ip_addr = data->ip_addr << 3;
uint64_t sum = data->tdr_count + data->other_count +
data->control_count + data->pipestall_count + data->send_count +
data->dist_acc_count + data->sbid_count + data->sync_count +
data->inst_fetch_count + data->active_count;
fprintf(file, "0x%08lx,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu\n",
ip_addr, data->tdr_count, data->other_count, data->control_count,
data->pipestall_count, data->send_count, data->dist_acc_count,
data->sbid_count, data->sync_count, data->inst_fetch_count,
data->active_count, sum);
free(data);
_mesa_hash_table_remove(accumulator, entry);
}
free(ip_addr_keys);
}
static void
delete_entry(struct hash_entry *entry)
{
free(entry->data);
}
/* Close eustall stream and deconstruct eustall cfg.
*/
void
eustall_close(void *cfg)
{
struct eustall_config *eustall_cfg = cfg;
_mesa_hash_table_destroy(eustall_cfg->result.accumulator, delete_entry);
eustall_cfg->result.accumulator = NULL;
close(eustall_cfg->fd);
eustall_cfg->fd = -1;
free(eustall_cfg->buf);
eustall_cfg->buf = NULL;
free(eustall_cfg);
}

View file

@ -0,0 +1,28 @@
/*
* Copyright 2024 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#pragma once
#include "perf/intel_perf.h"
struct eustall_config {
struct intel_perf_query_eustall_result result;
struct intel_device_info *devinfo;
int drm_fd;
uint8_t* buf;
size_t buf_len;
int record_size;
int fd;
uint64_t poll_period_ns;
int sample_rate;
uint32_t min_event_count;
};
struct eustall_config* eustall_setup(int drm_fd,
struct intel_device_info* devinfo);
bool eustall_sample(void *cfg);
void eustall_dump_results(void *cfg, FILE* file);
void eustall_close(void *cfg);

View file

@ -107,6 +107,18 @@ intel_hang_replay = executable(
install : true
)
intel_monitor = executable(
'intel_monitor',
files('intel_monitor.c',
'intel_monitor_eustall.c',
'intel_monitor_eustall.h'),
dependencies : [dep_libdrm, idep_intel_driver_ds, dep_valgrind],
include_directories : [inc_include, inc_src, inc_intel],
c_args : [no_override_init_args],
gnu_symbol_visibility : 'hidden',
install : true
)
sanitize_data = configuration_data()
sanitize_data.set(
'install_libexecdir',