From b37747ce685928c6918afae993ae74e32b7c3d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Mon, 16 Jun 2025 11:05:31 -0700 Subject: [PATCH] blorp: Emit STATE_COMPUTE_MODE before COMPUTE_WALKER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: stable Reviewed-by: Sagar Ghuge Signed-off-by: José Roberto de Souza Part-of: --- src/intel/blorp/blorp_genX_exec_brw.h | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/intel/blorp/blorp_genX_exec_brw.h b/src/intel/blorp/blorp_genX_exec_brw.h index bebd6b21e87..c923c82b599 100644 --- a/src/intel/blorp/blorp_genX_exec_brw.h +++ b/src/intel/blorp/blorp_genX_exec_brw.h @@ -26,6 +26,7 @@ #include "blorp_priv.h" #include "dev/intel_device_info.h" +#include "common/intel_common.h" #include "common/intel_compute_slm.h" #include "common/intel_sample_positions.h" #include "common/intel_l3_config.h" @@ -1717,6 +1718,39 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params) assert(cs_prog_data->local_size[2] == 1); #if GFX_VERx10 >= 125 + +/* Not need with VRT enabled */ +#if GFX_VERx10 < 300 + uint8_t pixel_async_compute_thread_limit, z_pass_async_compute_thread_limit, + np_z_async_throttle_settings; + bool slm_or_barrier_enabled = prog_data->total_shared != 0 || cs_prog_data->uses_barrier; + + intel_compute_engine_async_threads_limit(devinfo, dispatch.threads, + slm_or_barrier_enabled, + &pixel_async_compute_thread_limit, + &z_pass_async_compute_thread_limit, + &np_z_async_throttle_settings); + blorp_emit(batch, GENX(STATE_COMPUTE_MODE), cm) { +#if GFX_VER >= 20 + cm.AsyncComputeThreadLimit = pixel_async_compute_thread_limit; + cm.ZPassAsyncComputeThreadLimit = z_pass_async_compute_thread_limit; + cm.ZAsyncThrottlesettings = np_z_async_throttle_settings; + cm.AsyncComputeThreadLimitMask = 0x7; + cm.ZPassAsyncComputeThreadLimitMask = 0x7; + cm.ZAsyncThrottlesettingsMask = 0x3; +#else + cm.PixelAsyncComputeThreadLimit = pixel_async_compute_thread_limit; + cm.ZPassAsyncComputeThreadLimit = z_pass_async_compute_thread_limit; + cm.PixelAsyncComputeThreadLimitMask = 0x7; + cm.ZPassAsyncComputeThreadLimitMask = 0x7; + if (intel_device_info_is_mtl_or_arl(devinfo)) { + cm.ZAsyncThrottlesettings = np_z_async_throttle_settings; + cm.ZAsyncThrottlesettingsMask = 0x3; + } +#endif + } +#endif /* GFX_VERx10 < 300 */ + uint32_t surfaces_offset = blorp_setup_binding_table(batch, params); uint32_t samplers_offset =