From ada713b32fc974fd19e64d9895c281d21fccdd12 Mon Sep 17 00:00:00 2001 From: Sagar Ghuge Date: Mon, 9 Mar 2026 10:46:55 -0700 Subject: [PATCH] anv: Fix Wa_14021821874, Wa_14018813551, Wa_14026600921 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WA states that we need to allocate maximum number of stackIDs per DSS from RT_DISPATCH_GLOBALS to 2048. We can still throttle/control the CFE_STATE::StackID to be in range specified by the field. This does impact performance having CFE_STATE::stackIDs capped to 2K by default. More the outstanding ray queries, larger the working set and have more impact on cache hit rate. This affect performance on Xe2+ onwards: * Boundary Benchmark: 36.2% * Solar Bay extreme: 9.8% * Hitman world of assassination: 3.9% Fixes: c1a44e8d4317 ("anv: force StackIDControl value for Wa_14021821874") Signed-off-by: Sagar Ghuge Reviewed-by: Tapani Pälli (cherry picked from commit cb423ee63608551e0d20d9f1fe36598c9e8883fa) Part-of: --- .pick_status.json | 2 +- src/intel/compiler/brw/brw_rt.h | 15 ++++++++++++++- src/intel/vulkan/genX_cmd_compute.c | 14 -------------- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index f6f1af02fda..e448b59b7b8 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -364,7 +364,7 @@ "description": "anv: Fix Wa_14021821874, Wa_14018813551, Wa_14026600921", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "c1a44e8d4317e2288bdc620458783149ddfeb8d8", "notes": null diff --git a/src/intel/compiler/brw/brw_rt.h b/src/intel/compiler/brw/brw_rt.h index 3b4fdf4ec92..bbc1d901ca3 100644 --- a/src/intel/compiler/brw/brw_rt.h +++ b/src/intel/compiler/brw/brw_rt.h @@ -277,7 +277,20 @@ brw_rt_ray_queries_stack_ids_per_dss(const struct intel_device_info *devinfo) * "For Sync Ray tracing (i.e. using RayQueries), SW must allocate * space assuming 2K StackIDs" */ - return 2048; + uint32_t num_stack_id_per_dss = 2048; + + /* Wa_14021821874, Wa_14018813551, Wa_14026600921: + * + * "StackIDControlOverride_RTGlobals = 0 (i.e. 2k)". We + * already set stack size per ray to 64 in brw_nir_lower_rt_intrinsics + * as the workaround also requires. + */ + if (intel_needs_workaround(devinfo, 14021821874) || + intel_needs_workaround(devinfo, 14018813551) || + intel_needs_workaround(devinfo, 14026600921)) + num_stack_id_per_dss = 2048; + + return num_stack_id_per_dss; } static inline uint32_t diff --git a/src/intel/vulkan/genX_cmd_compute.c b/src/intel/vulkan/genX_cmd_compute.c index bfae57c4657..562acd7f70f 100644 --- a/src/intel/vulkan/genX_cmd_compute.c +++ b/src/intel/vulkan/genX_cmd_compute.c @@ -75,20 +75,6 @@ genX(cmd_buffer_ensure_cfe_state)(struct anv_cmd_buffer *cmd_buffer, case 2048: cfe.StackIDControl = StackIDs2048; break; default: UNREACHABLE("invalid stack_ids value"); } - -#if INTEL_WA_14021821874_GFX_VER || INTEL_WA_14018813551_GFX_VER || INTEL_WA_14026600921_GFX_VER - /* Wa_14021821874, Wa_14018813551, Wa_14026600921: - * - * "StackIDControlOverride_RTGlobals = 0 (i.e. 2k)". We - * already set stack size per ray to 64 in brw_nir_lower_rt_intrinsics - * as the workaround also requires. - */ - if (intel_needs_workaround(cmd_buffer->device->info, 14021821874) || - intel_needs_workaround(cmd_buffer->device->info, 14018813551) || - intel_needs_workaround(cmd_buffer->device->info, 14026600921)) - cfe.StackIDControl = StackIDs2048; -#endif - #endif cfe.OverDispatchControl = 2; /* 50% overdispatch */