/* * Copyright 2024 Intel Corporation * SPDX-License-Identifier: MIT */ #include #include "dev/intel_debug.h" #include "intel_common.h" #include "intel_engine.h" #include "util/compiler.h" /* Updates intel_device_info fields that has dependencies on intel/common * functions. */ void intel_common_update_device_info(int fd, struct intel_device_info *devinfo) { struct intel_query_engine_info *engine_info; enum intel_engine_class klass; engine_info = intel_engine_get_info(fd, devinfo->kmd_type); if (!engine_info) return; devinfo->has_compute_engine = intel_engines_count(engine_info, INTEL_ENGINE_CLASS_COMPUTE); for (klass = 0; klass < INTEL_ENGINE_CLASS_INVALID; klass++) devinfo->engine_class_supported_count[klass] = intel_engines_supported_count(fd, devinfo, engine_info, klass); free(engine_info); } void intel_compute_engine_async_threads_limit(const struct intel_device_info *devinfo, uint32_t hw_threads_in_wg, bool slm_or_barrier_enabled, uint8_t *ret_pixel_async_compute_thread_limit, uint8_t *ret_z_pass_async_compute_thread_limit, uint8_t *ret_np_z_async_throttle_settings) { /* Spec recommended SW values. * IMPORTANT: values set to this variables are HW values */ uint8_t pixel_async_compute_thread_limit = 2; uint8_t z_pass_async_compute_thread_limit = 0; uint8_t np_z_async_throttle_settings = 0; bool has_vrt = devinfo->verx10 >= 300 && !INTEL_DEBUG(DEBUG_NO_VRT); /* When VRT is enabled async threads limits don't have effect */ if (!slm_or_barrier_enabled || has_vrt) { *ret_pixel_async_compute_thread_limit = pixel_async_compute_thread_limit; *ret_z_pass_async_compute_thread_limit = z_pass_async_compute_thread_limit; *ret_np_z_async_throttle_settings = np_z_async_throttle_settings; return; } if (devinfo->verx10 >= 200) { /* Spec give us a table with Throttle value | SIMD | MAX API threads(LWS). * HW threads = MAX API threads(LWS) / SIMD */ switch (hw_threads_in_wg) { case 0 ... 2: /* Minimum is Max 2 but lets use spec recommended value below */ FALLTHROUGH; case 3 ... 8: /* Max 8 */ pixel_async_compute_thread_limit = 2; break; case 9 ... 16: /* Max 16 */ pixel_async_compute_thread_limit = 3; break; case 17 ... 24: /* Max 24 */ pixel_async_compute_thread_limit = 4; break; case 25 ... 32: /* Max 32 */ pixel_async_compute_thread_limit = 5; break; case 33 ... 40: /* Max 40 */ pixel_async_compute_thread_limit = 6; break; case 41 ... 48: /* Max 48 */ pixel_async_compute_thread_limit = 7; break; default: /* No limit applied */ pixel_async_compute_thread_limit = 0; } switch (hw_threads_in_wg) { case 0 ... 32: /* Minimum is Max 32 but lets use spec recommended value below */ FALLTHROUGH; case 33 ... 40: /* Minimum is Max 40 but lets use spec recommended value below */ FALLTHROUGH; case 41 ... 48: /* Minimum is Max 48 but lets use spec recommended value below */ FALLTHROUGH; case 49 ... 56: /* Minimum is Max 56 but lets use spec recommended value below */ FALLTHROUGH; case 57 ... 60: /* Max 60 */ z_pass_async_compute_thread_limit = 0; break; default: /* No limit applied */ z_pass_async_compute_thread_limit = 1; } switch (hw_threads_in_wg) { case 0 ... 32: /* Max 32 */ np_z_async_throttle_settings = 1; break; case 33 ... 40: /* Max 40 */ np_z_async_throttle_settings = 2; break; case 41 ... 48: /* Max 48 */ np_z_async_throttle_settings = 3; break; default: /* Use the same settings as the Pixel shader Async compute setting, * for values >= async compute settings disables the limits */ np_z_async_throttle_settings = 0; } } else { switch (hw_threads_in_wg) { case 0 ... 4: /* Minimum is Max 2 but lets use spec recommended value below */ FALLTHROUGH; case 5 ... 16: /* Max 8 */ pixel_async_compute_thread_limit = 2; break; case 17 ... 32: /* Max 16 */ pixel_async_compute_thread_limit = 3; break; case 33 ... 48: /* Max 24 */ pixel_async_compute_thread_limit = 4; break; case 49 ... 64: /* Max 32 */ pixel_async_compute_thread_limit = 5; break; case 65 ... 80: /* Max 40 */ pixel_async_compute_thread_limit = 6; break; case 81 ... 96: /* Max 48 */ pixel_async_compute_thread_limit = 7; break; default: /* No limit applied */ pixel_async_compute_thread_limit = 0; } switch (hw_threads_in_wg) { case 0 ... 64: /* Minimum is Max 32 but lets use spec recommended value below */ FALLTHROUGH; case 65 ... 80: /* Minimum is Max 40 but lets use spec recommended value below */ FALLTHROUGH; case 81 ... 96: /* Minimum is Max 48 but lets use spec recommended value below */ FALLTHROUGH; case 97 ... 112: /* Minimum is Max 56 but lets use spec recommended value below */ FALLTHROUGH; case 113 ... 120: /* Max 60 */ z_pass_async_compute_thread_limit = 0; break; default: /* Max 64/No limit applied */ z_pass_async_compute_thread_limit = 1; } switch (hw_threads_in_wg) { case 0 ... 64: /* Max 32 */ np_z_async_throttle_settings = 1; break; case 65 ... 80: /* Max 40 */ np_z_async_throttle_settings = 2; break; case 81 ... 96: /* Max 48 */ np_z_async_throttle_settings = 3; break; default: /* Use the same settings as the Pixel shader Async compute setting, * for values >= async compute settings disables the limits */ np_z_async_throttle_settings = 0; } } assert(np_z_async_throttle_settings != 0 || pixel_async_compute_thread_limit == 0); *ret_pixel_async_compute_thread_limit = pixel_async_compute_thread_limit; *ret_z_pass_async_compute_thread_limit = z_pass_async_compute_thread_limit; *ret_np_z_async_throttle_settings = np_z_async_throttle_settings; } int intel_compute_threads_group_dispatch_size(uint32_t hw_threads_in_wg) { /* Following value calculated based on overdispatch is disabled. In case if * compute overdispatch disabled set to 1, then we need to use TG Size 1. */ switch (hw_threads_in_wg) { case 0 ... 16: return 0; case 17 ... 32: return 1; default: return 2; } }