mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 07:20:10 +01:00
nak: Use the hardware's max warps_per_sm value
This should improve our occupancy estimates. Reviewed-by: Mary Guillemard <mary@mary.zone> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38913>
This commit is contained in:
parent
b154071178
commit
dfdaee5ca7
4 changed files with 10 additions and 7 deletions
|
|
@ -433,7 +433,7 @@ fn nak_compile_shader_internal(
|
||||||
Some(unsafe { &*fs_key })
|
Some(unsafe { &*fs_key })
|
||||||
};
|
};
|
||||||
|
|
||||||
let sm = ShaderModelInfo::new(nak.sm);
|
let sm = ShaderModelInfo::new(nak.sm, nak.warps_per_sm);
|
||||||
let mut s = nak_shader_from_nir(nak, nir, &sm);
|
let mut s = nak_shader_from_nir(nak, nir, &sm);
|
||||||
|
|
||||||
if DEBUG.print() {
|
if DEBUG.print() {
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,8 @@ impl RunSingleton {
|
||||||
|
|
||||||
let run = Runner::new(dev_id);
|
let run = Runner::new(dev_id);
|
||||||
let sm_nr = run.dev_info().sm;
|
let sm_nr = run.dev_info().sm;
|
||||||
let sm = ShaderModelInfo::new(sm_nr);
|
let sm =
|
||||||
|
ShaderModelInfo::new(sm_nr, run.dev_info().max_warps_per_mp);
|
||||||
RunSingleton { sm, run }
|
RunSingleton { sm, run }
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -9305,11 +9305,12 @@ pub trait ShaderModel {
|
||||||
|
|
||||||
pub struct ShaderModelInfo {
|
pub struct ShaderModelInfo {
|
||||||
sm: u8,
|
sm: u8,
|
||||||
|
warps_per_sm: u8,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ShaderModelInfo {
|
impl ShaderModelInfo {
|
||||||
pub fn new(sm: u8) -> Self {
|
pub fn new(sm: u8, warps_per_sm: u8) -> Self {
|
||||||
ShaderModelInfo { sm }
|
ShaderModelInfo { sm, warps_per_sm }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -9430,7 +9431,7 @@ pub fn gpr_limit_from_local_size(local_size: &[u16; 3]) -> u32 {
|
||||||
min(out, 255)
|
min(out, 255)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn max_warps_per_sm(gprs: u32) -> u32 {
|
pub fn max_warps_per_sm(sm: &ShaderModelInfo, gprs: u32) -> u32 {
|
||||||
fn prev_multiple_of(x: u32, y: u32) -> u32 {
|
fn prev_multiple_of(x: u32, y: u32) -> u32 {
|
||||||
(x / y) * y
|
(x / y) * y
|
||||||
}
|
}
|
||||||
|
|
@ -9440,7 +9441,7 @@ pub fn max_warps_per_sm(gprs: u32) -> u32 {
|
||||||
// GPRs are allocated in multiples of 8
|
// GPRs are allocated in multiples of 8
|
||||||
let gprs = gprs.next_multiple_of(8);
|
let gprs = gprs.next_multiple_of(8);
|
||||||
let max_warps = prev_multiple_of((total_regs / 32) / gprs, 4);
|
let max_warps = prev_multiple_of((total_regs / 32) / gprs, 4);
|
||||||
min(max_warps, 48)
|
min(max_warps, sm.warps_per_sm.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Shader<'a> {
|
pub struct Shader<'a> {
|
||||||
|
|
@ -9600,6 +9601,7 @@ impl Shader<'_> {
|
||||||
self.info.uses_fp64 = uses_fp64;
|
self.info.uses_fp64 = uses_fp64;
|
||||||
|
|
||||||
self.info.max_warps_per_sm = max_warps_per_sm(
|
self.info.max_warps_per_sm = max_warps_per_sm(
|
||||||
|
self.sm,
|
||||||
self.info.num_gprs as u32 + self.sm.hw_reserved_gprs(),
|
self.info.num_gprs as u32 + self.sm.hw_reserved_gprs(),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -87,7 +87,7 @@ fn disassemble_instrs(instrs: Vec<Instr>, sm: u8) -> Vec<String> {
|
||||||
io: ShaderIoInfo::None,
|
io: ShaderIoInfo::None,
|
||||||
};
|
};
|
||||||
|
|
||||||
let sm = ShaderModelInfo::new(sm);
|
let sm = ShaderModelInfo::new(sm, 0);
|
||||||
let s = Shader {
|
let s = Shader {
|
||||||
sm: &sm,
|
sm: &sm,
|
||||||
info: info,
|
info: info,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue