From d5037a34bb05f4304b1ccae70635f77612c3ada9 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 15 Jul 2025 06:39:22 +1000 Subject: [PATCH] nak: don't set the divergent flag on uniform sysvals The list of S2UR allowed sysvals comes from the nvidia NDA docs. with open shader-db: Totals: CodeSize: 14022144 -> 12291088 (-12.35%); split: -12.94%, +0.59% Number of GPRs: 41467 -> 40560 (-2.19%) SLM Size: 92344 -> 68824 (-25.47%) Static cycle count: 5312651 -> 3856674 (-27.41%); split: -27.95%, +0.54% Spills to memory: 54216 -> 51018 (-5.90%) Fills from memory: 54216 -> 51018 (-5.90%) Spills to reg: 7533 -> 7204 (-4.37%); split: -4.42%, +0.05% Fills from reg: 8406 -> 7987 (-4.98%) Max warps/SM: 61508 -> 61780 (+0.44%) Totals from 689 (51.73% of 1332) affected shaders: CodeSize: 12873552 -> 11142496 (-13.45%); split: -14.09%, +0.64% Number of GPRs: 26789 -> 25882 (-3.39%) SLM Size: 89176 -> 65656 (-26.37%) Static cycle count: 5058667 -> 3602690 (-28.78%); split: -29.35%, +0.57% Spills to memory: 54216 -> 51018 (-5.90%) Fills from memory: 54216 -> 51018 (-5.90%) Spills to reg: 7533 -> 7204 (-4.37%); split: -4.42%, +0.05% Fills from reg: 8406 -> 7987 (-4.98%) Max warps/SM: 30908 -> 31180 (+0.88%) PERCENTAGE DELTAS Shaders CodeSize Number of GPRs SLM Size Static cycle count Spills to memory Fills from memory Spills to reg Fills from reg Max warps/SM google-meet-clvk/BgBlur 49 +6.46% -5.10% . +6.81% . . . . +1.48% google-meet-clvk/Relight 81 +5.47% -4.74% . +6.29% . . . . +1.23% parallel-rdp/small_subgroup 246 -2.88% -4.10% . +0.41% . . -3.65% -2.39% +0.73% parallel-rdp/small_uber_subgroup 55 -23.04% -1.32% -36.28% -42.86% -1.61% -1.61% -6.88% -9.55% +0.68% parallel-rdp/subgroup 327 -2.78% -2.64% . -0.26% . . -3.17% -2.07% +0.53% parallel-rdp/uber_subgroup 55 -25.59% -1.32% -29.98% -41.29% -9.04% -9.04% -7.06% -10.08% +0.68% q2rtx/q2rtx-rt-pipeline 42 -0.38% -0.25% -49.40% +0.84% -97.48% -97.48% . . . sascha-willems/bloom 12 . . . . . . . . . sascha-willems/computecloth 7 +0.20% . . +0.51% . . . . . sascha-willems/computecullandlod 5 +0.21% . . +0.84% . . . . . sascha-willems/computeheadless 1 -28.85% . . +27.71% . . . . . sascha-willems/computenbody 6 +0.73% . . +1.78% . . . . . sascha-willems/computeparticles 5 +0.53% . . +1.24% . . . . . sascha-willems/computeraytracing 5 +0.14% . . +0.26% . . . . . sascha-willems/computeshader 7 +1.29% . . +2.97% . . . . . Part-of: --- src/nouveau/compiler/nak_nir.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index 750981b70cb..04530c8d760 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -24,9 +24,27 @@ nir_def * nak_nir_load_sysval(nir_builder *b, enum nak_sv idx, enum gl_access_qualifier access) { + bool divergent; + + switch (idx) { + case NAK_SV_CTAID_X: + case NAK_SV_CTAID_Y: + case NAK_SV_CTAID_Z: + case NAK_SV_VIRTCFG: + case NAK_SV_PRIM_TYPE: + case NAK_SV_CLOCK_LO: + case NAK_SV_CLOCK_HI: + case NAK_SV_VARIABLE_RATE: + divergent = false; + break; + default: + divergent = true; + break; + } + return nir_load_sysval_nv(b, 32, .base = idx, .access = access, - .divergent = true); + .divergent = divergent); } bool