diff --git a/src/intel/perf/xe/intel_perf.c b/src/intel/perf/xe/intel_perf.c index 799668c3f3b..429a9bc6fae 100644 --- a/src/intel/perf/xe/intel_perf.c +++ b/src/intel/perf/xe/intel_perf.c @@ -433,7 +433,6 @@ xe_perf_eustall_stream_record_size(int drm_fd) int xe_perf_eustall_stream_sample_rate(int drm_fd) { - int sampling_rate; struct drm_xe_query_eu_stall *eu_stall_data = xe_device_query_alloc_fetch(drm_fd, DRM_XE_DEVICE_QUERY_EU_STALL, NULL); if (!eu_stall_data) @@ -441,7 +440,9 @@ xe_perf_eustall_stream_sample_rate(int drm_fd) assert(eu_stall_data->sampling_rates[0] > 0 && eu_stall_data->sampling_rates[0] < INT_MAX); - sampling_rate = (int)eu_stall_data->sampling_rates[0]; + /* pick slowest rate to reduce chance of overflow */ + int idx_slowest = eu_stall_data->num_sampling_rates - 1; + int sampling_rate = (int)eu_stall_data->sampling_rates[idx_slowest]; free(eu_stall_data); return sampling_rate; } diff --git a/src/intel/tools/intel_monitor.c b/src/intel/tools/intel_monitor.c new file mode 100644 index 00000000000..b29a738c2b2 --- /dev/null +++ b/src/intel/tools/intel_monitor.c @@ -0,0 +1,234 @@ +/* + * Copyright 2024 Intel Corporation + * SPDX-License-Identifier: MIT + * + * Purpose: + * Enables EU stall sampling available in Xe2+ Intel GPUs. Enables + * GPU sampling of EU stalls. Every N mircoseconds, program collects + * sampled data from GPU. Accumulated data dumped to stdout once + * intel_monitor is closed. + */ + +#include +#include +#include + +#include + +#include "intel_monitor_eustall.h" +#include "dev/intel_device_info.h" + +static int +get_drm_device(struct intel_device_info *devinfo) +{ + drmDevicePtr devices[8]; + int max_devices = drmGetDevices2(0, devices, 8); + + int i, fd = -1; + for (i = 0; i < max_devices; i++) { + if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER && + devices[i]->bustype == DRM_BUS_PCI && + devices[i]->deviceinfo.pci->vendor_id == 0x8086) { + fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC); + if (fd < 0) + continue; + + if (!intel_get_device_info_from_fd(fd, devinfo, -1, -1)) { + close(fd); + fd = -1; + continue; + } + + /* Found a device! */ + break; + } + } + + return fd; +} + +/* Keyboard hit function + * + * Return true if keypress detected. + */ +static bool +kbhit() +{ + int ch, oldf; + struct termios oldt, newt; + + tcgetattr(STDIN_FILENO, &oldt); + newt = oldt; + newt.c_lflag &= ~(ICANON | ECHO); + tcsetattr(STDIN_FILENO, TCSANOW, &newt); + oldf = fcntl(STDIN_FILENO, F_GETFL, 0); + fcntl(STDIN_FILENO, F_SETFL, oldf | O_NONBLOCK); + + ch = getchar(); + + tcsetattr(STDIN_FILENO, TCSANOW, &oldt); + fcntl(STDIN_FILENO, F_SETFL, oldf); + + if (ch != EOF) { + ungetc(ch, stdin); + return true; + } + + return false; +} + +static void +print_help(const char *progname, FILE *file) +{ + fprintf(file, + "\n" + "Usage: %s [OPTION]...\n" + "Sample and collate GPU metrics across system. Studies workloads run in parallel.\n" + " -h, --help display this help\n" + " -e, --eustall sample eu stalls. Supported Xe2+.\n" + " -f, --file name of output file. DEFAULT=stdout\n" + " -t, --sample_time period of sampling, in microseconds. DEFAULT=1000\n" + "\n" + "Eu stall sample usage:\n" + " 0. Run `sudo sysctl dev.xe.observation_paranoid=0`\n" + " 1. Launch gfx app with INTEL_DEBUG=shaders-lineno. Redirect stderr to asm.txt.\n" + " 2. When gfx app ready to monitor, begin capturing eustall data by launching\n" + " `intel_monitor -e > eustall.csv` in separate console.\n" + " 3. When enough data has been collected, close intel_monitor by pressing any key.\n" + " 4. Correlate eustall data in eustall.csv with shader instructions in asm.txt by\n" + " matching instruction offsets. Use data to determine which instructions are\n" + " stalling and why.\n" + "\n" + "Eu stall defintions:\n" + "tdr_count - Number of cycles EU stalled, with at least one thread waiting\n" + " on Pixel Shader dependency. Multiple stall reasons can\n" + " qualify during the same cycle.\n" + "other_count - Number of cycles EU stalled, with at least one thread waiting\n" + " on any other dependency (Flag/EoT etc). Multiple stall reasons\n" + " can qualify during the same cycle.\n" + "control_count - Number of cycles EU stalled, with at least one thread waiting\n" + " for JEU to complete branch instruction. Multiple stall reasons\n" + " can qualify during the same cycle.\n" + "pipestall_count - Number of cycles EU stalled, with at least one thread ready to\n" + " be scheduled (Grf conf/send holds etc). Multiple stall reasons\n" + " can qualify during the same cycle.\n" + "send_count - Number of cycles EU stalled, with at least one thread waiting\n" + " for SEND message to be dispatched from EU. Multiple stall\n" + " reasons can qualify during the same cycle.\n" + "dist_acc_count - Number of cycles EU stalled, with at least one thread waiting\n" + " for ALU to write GRF/ACC register. Multiple stall reasons can\n" + " qualify during the same cycle.\n" + "sbid_count - Number of cycles EU stalled, with at least one thread waiting\n" + " for Scoreboard token to be available. Multiple stall reasons\n" + " can qualify during the same cycle.\n" + "sync_count - Number of cycles EU stalled, with at least one thread waiting\n" + " for Gateway to write Notify register. Multiple stall reasons\n" + " can qualify during the same cycle.\n" + "inst_fetch_count - Number of cycles EU stalled, with at least one thread waiting\n" + " for Instruction Fetch. Multiple stall reasons can qualify\n" + " during the same cycle.\n" + "active_count - Number of cycles no EU stalled.\n\n", + progname); +} + +enum intel_monitor_sampling_mode +{ + /* No sampling requested */ + INTEL_MONITOR_MODE_NONE = 0, + + /* Sample eu stalls */ + INTEL_MONITOR_MODE_EUSTALL = 1 +}; + +int +main(int argc, char *argv[]) +{ + int c, i; + FILE *fd_out = stdout; + uint64_t sample_period_us = 1000; + bool success = true; + + void *cfg = NULL; + bool (*do_sample)(void*) = NULL; + void (*do_dump_results)(void*, FILE*) = NULL; + void (*do_close)(void*) = NULL; + + enum intel_monitor_sampling_mode sample_mode = INTEL_MONITOR_MODE_NONE; + const struct option intel_monitor_opts[] = { + { "help", no_argument, NULL, 'h' }, + { "eustall", no_argument, NULL, 'e' }, + { "file", required_argument, NULL, 'f' }, + { "sample_time", required_argument, NULL, 't' }, + { NULL, 0, NULL, 0 } + }; + + struct intel_device_info devinfo; + int drm_fd = get_drm_device(&devinfo); + if (drm_fd < 0) { + fprintf(stderr, "IMON: Error encountered while getting drm_device. err=%i\n", + drm_fd); + exit(EXIT_FAILURE); + } + + /* Parse arguments */ + while ((c = getopt_long(argc, argv, "hef:t:", intel_monitor_opts, &i)) != -1) { + switch (c) { + case 'h': + print_help(argv[0], stderr); + return EXIT_SUCCESS; + case 'e': + sample_mode = INTEL_MONITOR_MODE_EUSTALL; + break; + case 'f': + fd_out = fopen(optarg, "w"); + if (!fd_out) { + fprintf(stderr, "IMON: Error opening output file '%s'\n", optarg); + exit(EXIT_FAILURE); + } + break; + case 't': + sample_period_us = atoi(optarg); + break; + default: + fprintf(stderr, + "IMON: Error. Unexpected parameter encountered '%c'.\n", c); + exit(EXIT_FAILURE); + } + } + + /* Setup cfg based on selected sampling mode */ + if (sample_mode == INTEL_MONITOR_MODE_EUSTALL) { + fprintf(stderr, "IMON: Setting up EU Stall Sampling\n"); + if (devinfo.ver < 20) { + fprintf(stderr, "IMON: EU Stall Sampler supported on Xe2+ platforms.\n"); + exit(EXIT_FAILURE); + } + + cfg = eustall_setup(drm_fd, &devinfo); + + do_sample = eustall_sample; + do_dump_results = eustall_dump_results; + do_close = eustall_close; + } else { + fprintf(stderr, + "IMON: Error. No sampling mode set. Modes supported: ['-e']\n"); + exit(EXIT_FAILURE); + } + + /* Main loop */ + fprintf(stderr, "IMON: Collecting samples. \n"); + while (!kbhit()) { + if (!success) + exit(EXIT_FAILURE); + + success = do_sample(cfg); + usleep(sample_period_us); + } + + fprintf(stderr, "IMON: Dumping results\n"); + do_dump_results(cfg, fd_out); + do_close(cfg); + fprintf(stderr, "IMON: done\n"); + + return EXIT_SUCCESS; +} diff --git a/src/intel/tools/intel_monitor_eustall.c b/src/intel/tools/intel_monitor_eustall.c new file mode 100644 index 00000000000..4f9fa9e4c3f --- /dev/null +++ b/src/intel/tools/intel_monitor_eustall.c @@ -0,0 +1,216 @@ +/* + * Copyright 2024 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "intel_monitor_eustall.h" + +#include + +#include "util/u_qsort.h" +#include "util/xxhash.h" + +static bool +oa_stream_ready(int fd) +{ + struct pollfd pfd; + + pfd.fd = fd; + pfd.events = POLLIN; + pfd.revents = 0; + + if (poll(&pfd, 1, 0) < 0) { + fprintf(stderr, "IMON: Error polling OA stream\n"); + return false; + } + + if (!(pfd.revents & POLLIN)) + return false; + + return true; +} + +/* Initialize eustall_cfg and enable eu stall profiling by + * opening stream with KMD. Return eustall_cfg. + */ +struct eustall_config* +eustall_setup(int drm_fd, struct intel_device_info *devinfo) +{ + struct eustall_config *eustall_cfg = malloc(sizeof(struct eustall_config)); + eustall_cfg->min_event_count = 1, /* min records to trigger data flush */ + eustall_cfg->drm_fd = drm_fd; + eustall_cfg->fd = -1; + eustall_cfg->devinfo = devinfo; + + eustall_cfg->result.accumulator = + _mesa_hash_table_create(NULL, + _mesa_hash_u64, + _mesa_key_u64_equal); + + /* Arbitrarily large buffer for copying stall data into. */ + eustall_cfg->buf_len = 64 * 1024 * 1024; + eustall_cfg->buf = malloc(eustall_cfg->buf_len); + + return eustall_cfg; +} + +static bool +init_stream(struct eustall_config *eustall_cfg) +{ + eustall_cfg->record_size = + intel_perf_eustall_stream_record_size(eustall_cfg->devinfo, + eustall_cfg->drm_fd); + if (eustall_cfg->record_size <= 0) { + fprintf(stderr, "IMON: ERROR encountered querying record size." + " err=%i\n", eustall_cfg->record_size); + return false; + } + + eustall_cfg->sample_rate = + intel_perf_eustall_stream_sample_rate(eustall_cfg->devinfo, + eustall_cfg->drm_fd); + if (eustall_cfg->sample_rate <= 0) { + fprintf(stderr, "IMON: ERROR encountered querying sampling rate." + " err=%i\n", eustall_cfg->sample_rate); + return false; + } + + eustall_cfg->fd = + intel_perf_eustall_stream_open(eustall_cfg->devinfo, + eustall_cfg->drm_fd, + eustall_cfg->sample_rate, + eustall_cfg->min_event_count); + if (eustall_cfg->fd < 0) { + fprintf(stderr, "IMON: ERROR encountered while opening " + "eustall stream. err=%i\n", eustall_cfg->fd); + return false; + } + fprintf(stderr, "IMON: intel_perf_eustall_stream_open = %i\n", + eustall_cfg->fd); + + int err = intel_perf_eustall_stream_set_state(eustall_cfg->devinfo, + eustall_cfg->fd, true); + if (err != 0) { + fprintf(stderr, "IMON: ERROR encountered while enabling stream." + " err=%i\n", err); + return false; + } + + return true; +} + +/* Sample all eustall data via KMD stream. Open stream on first call. + */ +bool +eustall_sample(void *cfg) +{ + struct eustall_config *eustall_cfg = cfg; + bool overflow; + + if (eustall_cfg->fd < 0 || eustall_cfg->record_size <= 0) + return init_stream(eustall_cfg); + + while (oa_stream_ready(eustall_cfg->fd)) { + int bytes_read = + intel_perf_eustall_stream_read_samples(eustall_cfg->devinfo, + eustall_cfg->fd, + eustall_cfg->buf, + eustall_cfg->buf_len, + &overflow); + if (bytes_read <= 0) { + if (bytes_read < 0) + fprintf(stderr, "IMON: read_samples returned err=%i\n", bytes_read); + break; + } + + if (overflow) + fprintf(stderr, "IMON: detected EU stall sampling buffer overflow. " + "Some stall data was lost.\n"); + + intel_perf_eustall_accumulate_results(&eustall_cfg->result, + eustall_cfg->buf, + eustall_cfg->buf + bytes_read, + eustall_cfg->record_size); + } + + return true; +} + +static int +compare_ip_addr(const void *a, const void *b) +{ + const uint64_t *val_a = a; + const uint64_t *val_b = b; + return (int)(*val_a - *val_b); +} + +/* Write all previously collected results to fd. Clear results. + */ +void +eustall_dump_results(void *cfg, FILE *file) +{ + struct eustall_config *eustall_cfg = cfg; + struct hash_table *accumulator = eustall_cfg->result.accumulator; + uint64_t *ip_addr_keys = malloc(accumulator->size * sizeof(uint64_t)); + uint32_t num_entries = accumulator->entries; + uint32_t i = 0; + + /* Sort keys so ip_addr appear in order */ + hash_table_foreach(accumulator, entry) { + struct intel_perf_query_eustall_event* data = + (struct intel_perf_query_eustall_event*)entry->data; + ip_addr_keys[i++] = data->ip_addr; + } + qsort(ip_addr_keys, accumulator->entries, sizeof(uint64_t), compare_ip_addr); + + fprintf(file, "offset,tdr_count,other_count,control_count,pipestall_count," + "send_count,dist_acc_count,sbid_count,sync_count," + "inst_fetch_count,active_count,sum\n"); + + for (i = 0; i < num_entries; i++) { + struct hash_entry *entry = + _mesa_hash_table_search(accumulator, ip_addr_keys + i); + struct intel_perf_query_eustall_event *data = + (struct intel_perf_query_eustall_event*)entry->data; + + uint64_t ip_addr = data->ip_addr << 3; + uint64_t sum = data->tdr_count + data->other_count + + data->control_count + data->pipestall_count + data->send_count + + data->dist_acc_count + data->sbid_count + data->sync_count + + data->inst_fetch_count + data->active_count; + + fprintf(file, "0x%08lx,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu\n", + ip_addr, data->tdr_count, data->other_count, data->control_count, + data->pipestall_count, data->send_count, data->dist_acc_count, + data->sbid_count, data->sync_count, data->inst_fetch_count, + data->active_count, sum); + + free(data); + _mesa_hash_table_remove(accumulator, entry); + } + free(ip_addr_keys); +} + +static void +delete_entry(struct hash_entry *entry) +{ + free(entry->data); +} + +/* Close eustall stream and deconstruct eustall cfg. + */ +void +eustall_close(void *cfg) +{ + struct eustall_config *eustall_cfg = cfg; + _mesa_hash_table_destroy(eustall_cfg->result.accumulator, delete_entry); + eustall_cfg->result.accumulator = NULL; + + close(eustall_cfg->fd); + eustall_cfg->fd = -1; + + free(eustall_cfg->buf); + eustall_cfg->buf = NULL; + + free(eustall_cfg); +} diff --git a/src/intel/tools/intel_monitor_eustall.h b/src/intel/tools/intel_monitor_eustall.h new file mode 100644 index 00000000000..a33c5e35b37 --- /dev/null +++ b/src/intel/tools/intel_monitor_eustall.h @@ -0,0 +1,28 @@ +/* + * Copyright 2024 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include "perf/intel_perf.h" + +struct eustall_config { + struct intel_perf_query_eustall_result result; + struct intel_device_info *devinfo; + int drm_fd; + uint8_t* buf; + size_t buf_len; + int record_size; + + int fd; + uint64_t poll_period_ns; + int sample_rate; + uint32_t min_event_count; +}; + +struct eustall_config* eustall_setup(int drm_fd, + struct intel_device_info* devinfo); +bool eustall_sample(void *cfg); +void eustall_dump_results(void *cfg, FILE* file); +void eustall_close(void *cfg); diff --git a/src/intel/tools/meson.build b/src/intel/tools/meson.build index 0aa34acf216..c4e2126c91b 100644 --- a/src/intel/tools/meson.build +++ b/src/intel/tools/meson.build @@ -107,6 +107,18 @@ intel_hang_replay = executable( install : true ) +intel_monitor = executable( + 'intel_monitor', + files('intel_monitor.c', + 'intel_monitor_eustall.c', + 'intel_monitor_eustall.h'), + dependencies : [dep_libdrm, idep_intel_driver_ds, dep_valgrind], + include_directories : [inc_include, inc_src, inc_intel], + c_args : [no_override_init_args], + gnu_symbol_visibility : 'hidden', + install : true +) + sanitize_data = configuration_data() sanitize_data.set( 'install_libexecdir',