anv: Fix Wa_14021821874, Wa_14018813551, Wa_14026600921

WA states that we need to allocate maximum number of stackIDs per DSS
from RT_DISPATCH_GLOBALS to 2048.

We can still throttle/control the CFE_STATE::StackID to be in range
specified by the field.

This does impact performance having CFE_STATE::stackIDs capped to 2K
by default. More the outstanding ray queries, larger the working set and
have more impact on cache hit rate.

This affect performance on Xe2+ onwards:
* Boundary Benchmark:            36.2%
* Solar Bay extreme:             9.8%
* Hitman world of assassination: 3.9%

Fixes: c1a44e8d43 ("anv: force StackIDControl value for Wa_14021821874")
Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
(cherry picked from commit cb423ee636)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40359>
This commit is contained in:
Sagar Ghuge 2026-03-09 10:46:55 -07:00 committed by Eric Engestrom
parent 446fab4a4a
commit ada713b32f
3 changed files with 15 additions and 16 deletions

View file

@ -364,7 +364,7 @@
"description": "anv: Fix Wa_14021821874, Wa_14018813551, Wa_14026600921",
"nominated": true,
"nomination_type": 2,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "c1a44e8d4317e2288bdc620458783149ddfeb8d8",
"notes": null

View file

@ -277,7 +277,20 @@ brw_rt_ray_queries_stack_ids_per_dss(const struct intel_device_info *devinfo)
* "For Sync Ray tracing (i.e. using RayQueries), SW must allocate
* space assuming 2K StackIDs"
*/
return 2048;
uint32_t num_stack_id_per_dss = 2048;
/* Wa_14021821874, Wa_14018813551, Wa_14026600921:
*
* "StackIDControlOverride_RTGlobals = 0 (i.e. 2k)". We
* already set stack size per ray to 64 in brw_nir_lower_rt_intrinsics
* as the workaround also requires.
*/
if (intel_needs_workaround(devinfo, 14021821874) ||
intel_needs_workaround(devinfo, 14018813551) ||
intel_needs_workaround(devinfo, 14026600921))
num_stack_id_per_dss = 2048;
return num_stack_id_per_dss;
}
static inline uint32_t

View file

@ -75,20 +75,6 @@ genX(cmd_buffer_ensure_cfe_state)(struct anv_cmd_buffer *cmd_buffer,
case 2048: cfe.StackIDControl = StackIDs2048; break;
default: UNREACHABLE("invalid stack_ids value");
}
#if INTEL_WA_14021821874_GFX_VER || INTEL_WA_14018813551_GFX_VER || INTEL_WA_14026600921_GFX_VER
/* Wa_14021821874, Wa_14018813551, Wa_14026600921:
*
* "StackIDControlOverride_RTGlobals = 0 (i.e. 2k)". We
* already set stack size per ray to 64 in brw_nir_lower_rt_intrinsics
* as the workaround also requires.
*/
if (intel_needs_workaround(cmd_buffer->device->info, 14021821874) ||
intel_needs_workaround(cmd_buffer->device->info, 14018813551) ||
intel_needs_workaround(cmd_buffer->device->info, 14026600921))
cfe.StackIDControl = StackIDs2048;
#endif
#endif
cfe.OverDispatchControl = 2; /* 50% overdispatch */