From dfdaee5ca74fbf1945326b454079575826df5a89 Mon Sep 17 00:00:00 2001 From: Mel Henning Date: Thu, 11 Dec 2025 15:35:00 -0500 Subject: [PATCH] nak: Use the hardware's max warps_per_sm value This should improve our occupancy estimates. Reviewed-by: Mary Guillemard Part-of: --- src/nouveau/compiler/nak/api.rs | 2 +- src/nouveau/compiler/nak/hw_tests.rs | 3 ++- src/nouveau/compiler/nak/ir.rs | 10 ++++++---- src/nouveau/compiler/nak/nvdisasm_tests.rs | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/nouveau/compiler/nak/api.rs b/src/nouveau/compiler/nak/api.rs index 4d430c83087..5d8e030be66 100644 --- a/src/nouveau/compiler/nak/api.rs +++ b/src/nouveau/compiler/nak/api.rs @@ -433,7 +433,7 @@ fn nak_compile_shader_internal( Some(unsafe { &*fs_key }) }; - let sm = ShaderModelInfo::new(nak.sm); + let sm = ShaderModelInfo::new(nak.sm, nak.warps_per_sm); let mut s = nak_shader_from_nir(nak, nir, &sm); if DEBUG.print() { diff --git a/src/nouveau/compiler/nak/hw_tests.rs b/src/nouveau/compiler/nak/hw_tests.rs index 0a809e936e5..483947f2c17 100644 --- a/src/nouveau/compiler/nak/hw_tests.rs +++ b/src/nouveau/compiler/nak/hw_tests.rs @@ -37,7 +37,8 @@ impl RunSingleton { let run = Runner::new(dev_id); let sm_nr = run.dev_info().sm; - let sm = ShaderModelInfo::new(sm_nr); + let sm = + ShaderModelInfo::new(sm_nr, run.dev_info().max_warps_per_mp); RunSingleton { sm, run } }) } diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 8cab8e5c70d..f97e34a5e44 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -9305,11 +9305,12 @@ pub trait ShaderModel { pub struct ShaderModelInfo { sm: u8, + warps_per_sm: u8, } impl ShaderModelInfo { - pub fn new(sm: u8) -> Self { - ShaderModelInfo { sm } + pub fn new(sm: u8, warps_per_sm: u8) -> Self { + ShaderModelInfo { sm, warps_per_sm } } } @@ -9430,7 +9431,7 @@ pub fn gpr_limit_from_local_size(local_size: &[u16; 3]) -> u32 { min(out, 255) } -pub fn max_warps_per_sm(gprs: u32) -> u32 { +pub fn max_warps_per_sm(sm: &ShaderModelInfo, gprs: u32) -> u32 { fn prev_multiple_of(x: u32, y: u32) -> u32 { (x / y) * y } @@ -9440,7 +9441,7 @@ pub fn max_warps_per_sm(gprs: u32) -> u32 { // GPRs are allocated in multiples of 8 let gprs = gprs.next_multiple_of(8); let max_warps = prev_multiple_of((total_regs / 32) / gprs, 4); - min(max_warps, 48) + min(max_warps, sm.warps_per_sm.into()) } pub struct Shader<'a> { @@ -9600,6 +9601,7 @@ impl Shader<'_> { self.info.uses_fp64 = uses_fp64; self.info.max_warps_per_sm = max_warps_per_sm( + self.sm, self.info.num_gprs as u32 + self.sm.hw_reserved_gprs(), ); diff --git a/src/nouveau/compiler/nak/nvdisasm_tests.rs b/src/nouveau/compiler/nak/nvdisasm_tests.rs index 622d4ba22ad..21df3625e2d 100644 --- a/src/nouveau/compiler/nak/nvdisasm_tests.rs +++ b/src/nouveau/compiler/nak/nvdisasm_tests.rs @@ -87,7 +87,7 @@ fn disassemble_instrs(instrs: Vec, sm: u8) -> Vec { io: ShaderIoInfo::None, }; - let sm = ShaderModelInfo::new(sm); + let sm = ShaderModelInfo::new(sm, 0); let s = Shader { sm: &sm, info: info,