From 88fbcee5855813f3f10cd790beba475fb2221eb9 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Sat, 11 Apr 2026 00:40:29 +0200 Subject: [PATCH] nak: allow a bit of spilling when we hit 8 warps per sm If we risk a bit of spilling we can run +50% threads concurrently that should work in our favor most of the time. Increases performance in pixmark_piano by 15%. Totals from 761 (0.06% of 1212873) affected shaders: CodeSize: 53351440 -> 54550752 (+2.25%); split: -0.00%, +2.25% Number of GPRs: 137160 -> 126408 (-7.84%) SLM Size: 12052 -> 47456 (+293.76%) Static cycle count: 85992784 -> 88649262 (+3.09%); split: -0.00%, +3.09% Spills to memory: 0 -> 31656 (+inf%) Fills from memory: 0 -> 31656 (+inf%) Max warps/SM: 6088 -> 9132 (+50.00%) --- src/nouveau/compiler/nak/assign_regs.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/nouveau/compiler/nak/assign_regs.rs b/src/nouveau/compiler/nak/assign_regs.rs index 82de4014b12..644aeb587af 100644 --- a/src/nouveau/compiler/nak/assign_regs.rs +++ b/src/nouveau/compiler/nak/assign_regs.rs @@ -1486,6 +1486,28 @@ impl Shader<'_> { ); } + // We try to check if allocating a few fewer registers would allow us + // to run 12 instead of 8 warps concurrently. + // The assumption is that running +50% threads will give us more + // performance than we lose with a bit of spilling. + let actual_gprs = (total_gprs + hw_reserved_gprs).min(max_gprs); + let warps_per_sm = max_warps_per_sm(self.sm, actual_gprs); + if warps_per_sm == 8 { + let new_max = if max_warps_per_sm(self.sm, actual_gprs - 8) > 8 { + total_gprs - 8 + } else if max_warps_per_sm(self.sm, actual_gprs - 16) > 8 { + // This gives us +15% performance in pixmark_piano + total_gprs - 16 + } else { + 0 + }; + + if new_max != 0 { + max_gprs = (new_max.next_multiple_of(8) - hw_reserved_gprs) + .min(max_gprs); + } + } + if total_gprs > max_gprs { // If we're spilling GPRs, we need to reserve 2 GPRs for OpParCopy // lowering because it needs to be able lower Mem copies which