From cb423ee63608551e0d20d9f1fe36598c9e8883fa Mon Sep 17 00:00:00 2001 From: Sagar Ghuge Date: Mon, 9 Mar 2026 10:46:55 -0700 Subject: [PATCH] anv: Fix Wa_14021821874, Wa_14018813551, Wa_14026600921 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WA states that we need to allocate maximum number of stackIDs per DSS from RT_DISPATCH_GLOBALS to 2048. We can still throttle/control the CFE_STATE::StackID to be in range specified by the field. This does impact performance having CFE_STATE::stackIDs capped to 2K by default. More the outstanding ray queries, larger the working set and have more impact on cache hit rate. This affect performance on Xe2+ onwards: * Boundary Benchmark: 36.2% * Solar Bay extreme: 9.8% * Hitman world of assassination: 3.9% Fixes: c1a44e8d4317 ("anv: force StackIDControl value for Wa_14021821874") Signed-off-by: Sagar Ghuge Reviewed-by: Tapani Pälli Part-of: --- src/intel/compiler/brw/brw_rt.h | 15 ++++++++++++++- src/intel/vulkan/genX_cmd_compute.c | 14 -------------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/intel/compiler/brw/brw_rt.h b/src/intel/compiler/brw/brw_rt.h index 09e95c0d6cf..8ec39946a56 100644 --- a/src/intel/compiler/brw/brw_rt.h +++ b/src/intel/compiler/brw/brw_rt.h @@ -262,7 +262,20 @@ brw_rt_ray_queries_stack_ids_per_dss(const struct intel_device_info *devinfo) * "For Sync Ray tracing (i.e. using RayQueries), SW must allocate * space assuming 2K StackIDs" */ - return 2048; + uint32_t num_stack_id_per_dss = 2048; + + /* Wa_14021821874, Wa_14018813551, Wa_14026600921: + * + * "StackIDControlOverride_RTGlobals = 0 (i.e. 2k)". We + * already set stack size per ray to 64 in brw_nir_lower_rt_intrinsics + * as the workaround also requires. + */ + if (intel_needs_workaround(devinfo, 14021821874) || + intel_needs_workaround(devinfo, 14018813551) || + intel_needs_workaround(devinfo, 14026600921)) + num_stack_id_per_dss = 2048; + + return num_stack_id_per_dss; } static inline uint32_t diff --git a/src/intel/vulkan/genX_cmd_compute.c b/src/intel/vulkan/genX_cmd_compute.c index 58e83e7494c..2c078167de3 100644 --- a/src/intel/vulkan/genX_cmd_compute.c +++ b/src/intel/vulkan/genX_cmd_compute.c @@ -76,20 +76,6 @@ genX(cmd_buffer_ensure_cfe_state)(struct anv_cmd_buffer *cmd_buffer, case 2048: cfe.StackIDControl = StackIDs2048; break; default: UNREACHABLE("invalid stack_ids value"); } - -#if INTEL_WA_14021821874_GFX_VER || INTEL_WA_14018813551_GFX_VER || INTEL_WA_14026600921_GFX_VER - /* Wa_14021821874, Wa_14018813551, Wa_14026600921: - * - * "StackIDControlOverride_RTGlobals = 0 (i.e. 2k)". We - * already set stack size per ray to 64 in brw_nir_lower_rt_intrinsics - * as the workaround also requires. - */ - if (intel_needs_workaround(cmd_buffer->device->info, 14021821874) || - intel_needs_workaround(cmd_buffer->device->info, 14018813551) || - intel_needs_workaround(cmd_buffer->device->info, 14026600921)) - cfe.StackIDControl = StackIDs2048; -#endif - #endif cfe.OverDispatchControl = 2; /* 50% overdispatch */