mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 15:50:11 +01:00
util: rework AMD cpu L3 cache affinity code.
This changes how the L3 cache affinity code works out the affinity
masks. It works better with multi-CPU systems and should also be
capable of handling big/little type situations if they appear in
the future.
It now iterates over all CPU cores, gets the core count for each
CPU, and works out the L3_ID from the physical CPU ID, and
the current cores L3 cache. It then tracks how many L3 caches
it has seen and reallocate the affinity masks for each one.
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/4496
Fixes: d8ea509965 ("util: completely rewrite and do AMD Zen L3 cache pinning correctly")
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9782>
This commit is contained in:
parent
f7acdb1d1d
commit
11d2db17c5
1 changed files with 48 additions and 35 deletions
|
|
@ -446,20 +446,14 @@ get_cpu_topology(void)
|
|||
util_cpu_caps.family < CPU_AMD_LAST) {
|
||||
uint32_t regs[4];
|
||||
|
||||
/* Query the L3 cache count. */
|
||||
cpuid_count(0x8000001D, 3, regs);
|
||||
unsigned cache_level = (regs[0] >> 5) & 0x7;
|
||||
unsigned cores_per_L3 = ((regs[0] >> 14) & 0xfff) + 1;
|
||||
|
||||
if (cache_level != 3 || cores_per_L3 == util_cpu_caps.nr_cpus)
|
||||
return;
|
||||
|
||||
uint32_t saved_mask[UTIL_MAX_CPUS / 32] = {0};
|
||||
uint32_t mask[UTIL_MAX_CPUS / 32] = {0};
|
||||
uint32_t allowed_mask[UTIL_MAX_CPUS / 32] = {0};
|
||||
uint32_t apic_id[UTIL_MAX_CPUS];
|
||||
bool saved = false;
|
||||
|
||||
uint32_t L3_found[UTIL_MAX_CPUS] = {0};
|
||||
uint32_t num_L3_caches = 0;
|
||||
util_affinity_mask *L3_affinity_masks = NULL;
|
||||
|
||||
/* Query APIC IDs from each CPU core.
|
||||
*
|
||||
* An APIC ID is a logical ID of the CPU with respect to the cache
|
||||
|
|
@ -486,39 +480,58 @@ get_cpu_topology(void)
|
|||
!saved ? saved_mask : NULL,
|
||||
util_cpu_caps.num_cpu_mask_bits)) {
|
||||
saved = true;
|
||||
allowed_mask[i / 32] |= cpu_bit;
|
||||
|
||||
/* Query the APIC ID of the current core. */
|
||||
cpuid(0x00000001, regs);
|
||||
apic_id[i] = regs[1] >> 24;
|
||||
unsigned apic_id = regs[1] >> 24;
|
||||
|
||||
/* Query the total core count for the CPU */
|
||||
uint32_t core_count = 1;
|
||||
if (regs[3] & (1 << 28))
|
||||
core_count = (regs[1] >> 16) & 0xff;
|
||||
|
||||
core_count = util_next_power_of_two(core_count);
|
||||
|
||||
/* Query the L3 cache count. */
|
||||
cpuid_count(0x8000001D, 3, regs);
|
||||
unsigned cache_level = (regs[0] >> 5) & 0x7;
|
||||
unsigned cores_per_L3 = ((regs[0] >> 14) & 0xfff) + 1;
|
||||
|
||||
if (cache_level != 3)
|
||||
continue;
|
||||
|
||||
unsigned local_core_id = apic_id & (core_count - 1);
|
||||
unsigned phys_id = (apic_id & ~(core_count - 1)) >> util_logbase2(core_count);
|
||||
unsigned local_l3_cache_index = local_core_id / util_next_power_of_two(cores_per_L3);
|
||||
#define L3_ID(p, i) (p << 16 | i << 1 | 1);
|
||||
|
||||
unsigned l3_id = L3_ID(phys_id, local_l3_cache_index);
|
||||
int idx = -1;
|
||||
for (unsigned c = 0; c < num_L3_caches; c++) {
|
||||
if (L3_found[c] == l3_id) {
|
||||
idx = c;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (idx == -1) {
|
||||
idx = num_L3_caches;
|
||||
L3_found[num_L3_caches++] = l3_id;
|
||||
L3_affinity_masks = realloc(L3_affinity_masks, sizeof(util_affinity_mask) * num_L3_caches);
|
||||
if (!L3_affinity_masks)
|
||||
return;
|
||||
memset(&L3_affinity_masks[num_L3_caches - 1], 0, sizeof(util_affinity_mask));
|
||||
}
|
||||
util_cpu_caps.cpu_to_L3[i] = idx;
|
||||
L3_affinity_masks[idx][i / 32] |= cpu_bit;
|
||||
|
||||
}
|
||||
mask[i / 32] = 0;
|
||||
}
|
||||
|
||||
util_cpu_caps.num_L3_caches = num_L3_caches;
|
||||
util_cpu_caps.L3_affinity_mask = L3_affinity_masks;
|
||||
|
||||
if (saved) {
|
||||
|
||||
/* We succeeded in using at least one CPU. */
|
||||
util_cpu_caps.num_L3_caches = util_cpu_caps.nr_cpus / cores_per_L3;
|
||||
util_cpu_caps.cores_per_L3 = cores_per_L3;
|
||||
util_cpu_caps.L3_affinity_mask = calloc(sizeof(util_affinity_mask),
|
||||
util_cpu_caps.num_L3_caches);
|
||||
|
||||
for (unsigned i = 0; i < util_cpu_caps.nr_cpus && i < UTIL_MAX_CPUS;
|
||||
i++) {
|
||||
uint32_t cpu_bit = 1u << (i % 32);
|
||||
|
||||
if (allowed_mask[i / 32] & cpu_bit) {
|
||||
/* Each APIC ID bit represents a topology level, so we need
|
||||
* to round up to the next power of two.
|
||||
*/
|
||||
unsigned L3_index = apic_id[i] /
|
||||
util_next_power_of_two(cores_per_L3);
|
||||
|
||||
util_cpu_caps.L3_affinity_mask[L3_index][i / 32] |= cpu_bit;
|
||||
util_cpu_caps.cpu_to_L3[i] = L3_index;
|
||||
}
|
||||
}
|
||||
|
||||
if (debug_get_option_dump_cpu()) {
|
||||
fprintf(stderr, "CPU <-> L3 cache mapping:\n");
|
||||
for (unsigned i = 0; i < util_cpu_caps.num_L3_caches; i++) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue