nak: Use the hardware's max warps_per_sm value

This should improve our occupancy estimates.

Reviewed-by: Mary Guillemard <mary@mary.zone>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38913>
This commit is contained in:
Mel Henning 2025-12-11 15:35:00 -05:00 committed by Marge Bot
parent b154071178
commit dfdaee5ca7
4 changed files with 10 additions and 7 deletions

View file

@ -433,7 +433,7 @@ fn nak_compile_shader_internal(
Some(unsafe { &*fs_key }) Some(unsafe { &*fs_key })
}; };
let sm = ShaderModelInfo::new(nak.sm); let sm = ShaderModelInfo::new(nak.sm, nak.warps_per_sm);
let mut s = nak_shader_from_nir(nak, nir, &sm); let mut s = nak_shader_from_nir(nak, nir, &sm);
if DEBUG.print() { if DEBUG.print() {

View file

@ -37,7 +37,8 @@ impl RunSingleton {
let run = Runner::new(dev_id); let run = Runner::new(dev_id);
let sm_nr = run.dev_info().sm; let sm_nr = run.dev_info().sm;
let sm = ShaderModelInfo::new(sm_nr); let sm =
ShaderModelInfo::new(sm_nr, run.dev_info().max_warps_per_mp);
RunSingleton { sm, run } RunSingleton { sm, run }
}) })
} }

View file

@ -9305,11 +9305,12 @@ pub trait ShaderModel {
pub struct ShaderModelInfo { pub struct ShaderModelInfo {
sm: u8, sm: u8,
warps_per_sm: u8,
} }
impl ShaderModelInfo { impl ShaderModelInfo {
pub fn new(sm: u8) -> Self { pub fn new(sm: u8, warps_per_sm: u8) -> Self {
ShaderModelInfo { sm } ShaderModelInfo { sm, warps_per_sm }
} }
} }
@ -9430,7 +9431,7 @@ pub fn gpr_limit_from_local_size(local_size: &[u16; 3]) -> u32 {
min(out, 255) min(out, 255)
} }
pub fn max_warps_per_sm(gprs: u32) -> u32 { pub fn max_warps_per_sm(sm: &ShaderModelInfo, gprs: u32) -> u32 {
fn prev_multiple_of(x: u32, y: u32) -> u32 { fn prev_multiple_of(x: u32, y: u32) -> u32 {
(x / y) * y (x / y) * y
} }
@ -9440,7 +9441,7 @@ pub fn max_warps_per_sm(gprs: u32) -> u32 {
// GPRs are allocated in multiples of 8 // GPRs are allocated in multiples of 8
let gprs = gprs.next_multiple_of(8); let gprs = gprs.next_multiple_of(8);
let max_warps = prev_multiple_of((total_regs / 32) / gprs, 4); let max_warps = prev_multiple_of((total_regs / 32) / gprs, 4);
min(max_warps, 48) min(max_warps, sm.warps_per_sm.into())
} }
pub struct Shader<'a> { pub struct Shader<'a> {
@ -9600,6 +9601,7 @@ impl Shader<'_> {
self.info.uses_fp64 = uses_fp64; self.info.uses_fp64 = uses_fp64;
self.info.max_warps_per_sm = max_warps_per_sm( self.info.max_warps_per_sm = max_warps_per_sm(
self.sm,
self.info.num_gprs as u32 + self.sm.hw_reserved_gprs(), self.info.num_gprs as u32 + self.sm.hw_reserved_gprs(),
); );

View file

@ -87,7 +87,7 @@ fn disassemble_instrs(instrs: Vec<Instr>, sm: u8) -> Vec<String> {
io: ShaderIoInfo::None, io: ShaderIoInfo::None,
}; };
let sm = ShaderModelInfo::new(sm); let sm = ShaderModelInfo::new(sm, 0);
let s = Shader { let s = Shader {
sm: &sm, sm: &sm,
info: info, info: info,