diff --git a/src/intel/dev/intel_device_info.c b/src/intel/dev/intel_device_info.c index 4cb1c658a3a..623c6e1788f 100644 --- a/src/intel/dev/intel_device_info.c +++ b/src/intel/dev/intel_device_info.c @@ -1103,7 +1103,7 @@ update_pixel_pipes(struct intel_device_info *devinfo) /* Count the number of subslices on each pixel pipe. Assume that every * contiguous group of 4 subslices in the mask belong to the same pixel - * pipe. However note that on TGL the kernel returns a mask of enabled + * pipe. However note that on TGL+ the kernel returns a mask of enabled * *dual* subslices instead of actual subslices somewhat confusingly, so * each pixel pipe only takes 2 bits in the mask even though it's still 4 * subslices. @@ -1111,11 +1111,13 @@ update_pixel_pipes(struct intel_device_info *devinfo) const unsigned ppipe_bits = devinfo->ver >= 12 ? 2 : 4; for (unsigned p = 0; p < INTEL_DEVICE_MAX_PIXEL_PIPES; p++) { const unsigned offset = p * ppipe_bits; + const unsigned subslice_idx = offset / + devinfo->max_subslices_per_slice * devinfo->subslice_slice_stride; const unsigned ppipe_mask = BITFIELD_RANGE(offset % 8, ppipe_bits); - if (offset / 8 < ARRAY_SIZE(devinfo->subslice_masks)) + if (subslice_idx < ARRAY_SIZE(devinfo->subslice_masks)) devinfo->ppipe_subslices[p] = - __builtin_popcount(devinfo->subslice_masks[offset / 8] & ppipe_mask); + __builtin_popcount(devinfo->subslice_masks[subslice_idx] & ppipe_mask); else devinfo->ppipe_subslices[p] = 0; } @@ -1141,7 +1143,7 @@ update_pixel_pipes(struct intel_device_info *devinfo) static void update_l3_banks(struct intel_device_info *devinfo) { - if (devinfo->ver != 12 || devinfo->num_slices != 1) + if (devinfo->ver != 12) return; if (devinfo->verx10 >= 125) { @@ -1166,6 +1168,73 @@ update_l3_banks(struct intel_device_info *devinfo) } } +/* At some point in time, some people decided to redefine what topology means, + * from useful HW related information (slice, subslice, etc...), to much less + * useful generic stuff that noone cares about (a single slice with lots of + * subslices). Of course all of this was done without asking the people who + * defined the topology query in the first place, to solve a lack of + * information Gfx10+. This function is here to workaround the fact it's not + * possible to change people's mind even before this stuff goes upstream. Sad + * times... + */ +static void +update_from_single_slice_topology(struct intel_device_info *devinfo, + const struct drm_i915_query_topology_info *topology) +{ + assert(devinfo->verx10 >= 125); + + reset_masks(devinfo); + + assert(topology->max_slices == 1); + assert(topology->max_subslices > 0); + assert(topology->max_eus_per_subslice > 0); + + /* i915 gives us only one slice so we have to rebuild that out of groups of + * 4 dualsubslices. + */ + devinfo->max_subslices_per_slice = 4; + devinfo->max_eus_per_subslice = 16; + devinfo->subslice_slice_stride = 1; + devinfo->eu_slice_stride = DIV_ROUND_UP(16 * 4, 8); + devinfo->eu_subslice_stride = DIV_ROUND_UP(16, 8); + + for (uint32_t ss_idx = 0; ss_idx < topology->max_subslices; ss_idx++) { + const bool ss_idx_available = + (topology->data[topology->subslice_offset + ss_idx / 8] >> + (ss_idx % 8)) & 1; + + if (!ss_idx_available) + continue; + + uint32_t s = ss_idx / 4; + uint32_t ss = ss_idx % 4; + + devinfo->max_slices = MAX2(devinfo->max_slices, s + 1); + devinfo->slice_masks |= 1u << s; + + devinfo->subslice_masks[s * devinfo->subslice_slice_stride + + ss / 8] |= 1u << (ss % 8); + + for (uint32_t eu = 0; eu < devinfo->max_eus_per_subslice; eu++) { + const bool eu_available = + (topology->data[topology->eu_offset + + ss_idx * topology->eu_stride + + eu / 8] >> (eu % 8)) & 1; + + if (!eu_available) + continue; + + devinfo->eu_masks[s * devinfo->eu_slice_stride + + ss * devinfo->eu_subslice_stride + + eu / 8] |= 1u << (eu % 8); + } + } + + update_slice_subslice_counts(devinfo); + update_pixel_pipes(devinfo); + update_l3_banks(devinfo); +} + static void update_from_topology(struct intel_device_info *devinfo, const struct drm_i915_query_topology_info *topology) @@ -1458,7 +1527,10 @@ query_topology(struct intel_device_info *devinfo, int fd) if (topo_info == NULL) return false; - update_from_topology(devinfo, topo_info); + if (devinfo->verx10 >= 125) + update_from_single_slice_topology(devinfo, topo_info); + else + update_from_topology(devinfo, topo_info); free(topo_info);