From 475c8f058035366da0828a65cc8016d05ab866bc Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Fri, 1 Aug 2025 12:33:24 +0200 Subject: [PATCH] nak: set max_gpr to multiple of 8s Optimizations cutting down on GPRs often lead to the akward situations where RA being more restricted and having to insert more mov instructions pumping up the instruction counts. In order to give developers more reliable stats we just set the max_gprs to the next multiple of 8 including taking hw reserved registers into account. This does not impact occupancy in any way despite the increase in gprs. Totals: CodeSize: 920980864 -> 914748784 (-0.68%); split: -0.69%, +0.02% Number of GPRs: 3544248 -> 3879749 (+9.47%) Static cycle count: 217345431 -> 216414194 (-0.43%); split: -0.50%, +0.07% Totals from 78493 (89.58% of 87622) affected shaders: CodeSize: 795883088 -> 789651008 (-0.78%); split: -0.80%, +0.02% Number of GPRs: 3108571 -> 3444072 (+10.79%) Static cycle count: 187450578 -> 186519341 (-0.50%); split: -0.58%, +0.08% Reviewed-by: Mel Henning Part-of: --- src/nouveau/compiler/nak/assign_regs.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/nouveau/compiler/nak/assign_regs.rs b/src/nouveau/compiler/nak/assign_regs.rs index 7dbfb72ae9a..75b2b1781f6 100644 --- a/src/nouveau/compiler/nak/assign_regs.rs +++ b/src/nouveau/compiler/nak/assign_regs.rs @@ -1472,11 +1472,12 @@ impl Shader<'_> { max_gprs += 2; } + let hw_reserved_gprs = self.sm.hw_reserved_gprs(); if let ShaderStageInfo::Compute(cs_info) = &self.info.stage { max_gprs = min( max_gprs, gpr_limit_from_local_size(&cs_info.local_size) - - self.sm.hw_reserved_gprs(), + - hw_reserved_gprs, ); } @@ -1492,6 +1493,17 @@ impl Shader<'_> { // Re-calculate liveness one last time live = SimpleLiveness::for_function(f); + } else { + // GPRs are allocated in multiple of 8. That means we can give RA a + // bit more freedom by making gprs up until the next multiple + // available. + let next_multiple_gprs = (total_gprs + hw_reserved_gprs) + .next_multiple_of(8) + - hw_reserved_gprs; + let free_gprs = next_multiple_gprs.min(max_gprs) - total_gprs; + + total_gprs += free_gprs; + gpr_limit += free_gprs; } self.info.num_gprs = total_gprs.try_into().unwrap();