From 0eab7bfabc2bb231699d4dccf54efd84b6dae851 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 12 Nov 2024 12:00:43 -0800 Subject: [PATCH] intel/dev/xe3: Set max_slices and max_subslices_per_slice using hwconfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xe3 different SKUs can have different max_subslices_per_slice and Xe KMD topology uAPI only provide us the available subslices. Therefore, to correctly calculate the available slices, we need max_subslices_per_slice to match the hardware. This change retrieves this information from hwconfig for Xe3+. This avoids adding all the PTL intel_device_info variants. Additionally, the PTL topology values are currently embargoed and cannot be hard-coded in public source code. This could be simplified if we decide to apply max_slices and max_subslices_per_slice to all platforms that hwconfig is required. Reviewed-by: Jordan Justen Signed-off-by: José Roberto de Souza Part-of: --- src/intel/dev/intel_hwconfig.c | 44 +++++++++++++++++++++++++++- src/intel/dev/xe/intel_device_info.c | 13 +++++--- 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/src/intel/dev/intel_hwconfig.c b/src/intel/dev/intel_hwconfig.c index 847d3835591..8a8bf9ef70f 100644 --- a/src/intel/dev/intel_hwconfig.c +++ b/src/intel/dev/intel_hwconfig.c @@ -212,12 +212,43 @@ should_apply_hwconfig_item(uint16_t always_apply_verx10, #define DEVINFO_HWCONFIG(CVER, F, I) \ DEVINFO_HWCONFIG_KV((CVER), F, (I)->key, (I)->val[0]) +#define CALC_TOPOLOGY_LAYOUT_VERX10 300 + static void process_hwconfig_item(struct intel_device_info *devinfo, const struct hwconfig *item, const bool check_only) { switch (item->key) { + case INTEL_HWCONFIG_MAX_SLICES_SUPPORTED: + /* if we are not applying hwconfig to max_slices and max_subslices_per_slice + * it should be skipped at all, otherwise the upper limit values set in + * xe_compute_topology() will cause hwconfig mismatch warnings in + * some SKUs. + */ + if (devinfo->verx10 < CALC_TOPOLOGY_LAYOUT_VERX10) + break; + + DEVINFO_HWCONFIG(CALC_TOPOLOGY_LAYOUT_VERX10, max_slices, item); + break; + case INTEL_HWCONFIG_MAX_DUAL_SUBSLICES_SUPPORTED: /* available in Gfx 12.5 */ + case INTEL_HWCONFIG_MAX_SUBSLICE: /* available in Gfx 20+ */ + if (devinfo->verx10 < CALC_TOPOLOGY_LAYOUT_VERX10) + break; + + /* This one is special because it depends on max_slices that is not + * guarantee to be processed before this one + */ + if (check_only) { + hwconfig_item_warning("max_subslices_per_slice", + devinfo->max_subslices_per_slice, item->key, + item->val[0] / devinfo->max_slices); + } else { + /* it will be later adjusted in late_apply_hwconfig() */ + DEVINFO_HWCONFIG(CALC_TOPOLOGY_LAYOUT_VERX10, + max_subslices_per_slice, item); + } + break; case INTEL_HWCONFIG_MAX_NUM_EU_PER_DSS: DEVINFO_HWCONFIG(125, max_eus_per_subslice, item); break; @@ -272,12 +303,23 @@ apply_hwconfig_item(struct intel_device_info *devinfo, process_hwconfig_item(devinfo, item, false); } +static void +late_apply_hwconfig(struct intel_device_info *devinfo) +{ + if (devinfo->verx10 >= CALC_TOPOLOGY_LAYOUT_VERX10) { + assert((devinfo->max_subslices_per_slice % devinfo->max_slices) == 0); + devinfo->max_subslices_per_slice /= devinfo->max_slices; + } +} + bool intel_hwconfig_process_table(struct intel_device_info *devinfo, void *data, int32_t len) { - if (intel_hwconfig_is_required(devinfo)) + if (intel_hwconfig_is_required(devinfo)) { process_hwconfig_table(devinfo, data, len, apply_hwconfig_item); + late_apply_hwconfig(devinfo); + } return true; } diff --git a/src/intel/dev/xe/intel_device_info.c b/src/intel/dev/xe/intel_device_info.c index f5e33cbd1a0..29410e1bf17 100644 --- a/src/intel/dev/xe/intel_device_info.c +++ b/src/intel/dev/xe/intel_device_info.c @@ -194,7 +194,9 @@ xe_compute_topology(struct intel_device_info * devinfo, * RKL/ADL-S: 1 slice x 2 dual sub slices * DG2: 8 slices x 4 dual sub slices */ - if (devinfo->verx10 >= 125) { + if (devinfo->verx10 >= 300) { + /* was set by hwconfig */ + } else if (devinfo->verx10 >= 125) { devinfo->max_slices = 8; devinfo->max_subslices_per_slice = 4; } else { @@ -208,6 +210,8 @@ xe_compute_topology(struct intel_device_info * devinfo, assert((sizeof(uint32_t) * 8) >= devinfo->max_subslices_per_slice); assert((sizeof(uint32_t) * 8) >= devinfo->max_eus_per_subslice); + assert(INTEL_DEVICE_MAX_SLICES >= devinfo->max_slices); + assert(INTEL_DEVICE_MAX_SUBSLICES >= devinfo->max_subslices_per_slice); const uint32_t dss_mask_in_slice = (1u << devinfo->max_subslices_per_slice) - 1; struct slice { @@ -343,12 +347,13 @@ intel_device_info_xe_get_info_from_fd(int fd, struct intel_device_info *devinfo) if (!xe_query_gts(fd, devinfo)) return false; - if (!xe_query_topology(fd, devinfo)) - return false; - if (!xe_query_process_hwconfig(fd, devinfo)) return false; + /* xe_compute_topology() depends on information provided by hwconfig */ + if (!xe_query_topology(fd, devinfo)) + return false; + devinfo->has_context_isolation = true; devinfo->has_mmap_offset = true; devinfo->has_caching_uapi = false;