mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 02:38:04 +02:00
nak/instr_sched_prepass: take shared mem and block size into account
There is a bug in prepass that sometimes hurt occupancy. Totals from 6865 (0.57% of 1212873) affected shaders: CodeSize: 117726624 -> 116684032 (-0.89%); split: -1.03%, +0.15% Number of GPRs: 326680 -> 377264 (+15.48%); split: -0.01%, +15.49% Static cycle count: 55771616 -> 55292902 (-0.86%); split: -1.45%, +0.59% Spills to reg: 14283 -> 14611 (+2.30%); split: -0.19%, +2.49% Fills from reg: 12409 -> 12708 (+2.41%); split: -0.15%, +2.55% Max warps/SM: 215877 -> 215753 (-0.06%)
This commit is contained in:
parent
a09af6ce29
commit
467980079d
1 changed files with 37 additions and 6 deletions
|
|
@ -26,9 +26,14 @@ const TARGET_FREE: i32 = 4;
|
|||
/// one more register causes occupancy to plummet. This function figures out how
|
||||
/// many GPRs you can use without costing occupancy, assuming you always need at
|
||||
/// least `x` GPRs.
|
||||
fn next_occupancy_cliff(sm: &ShaderModelInfo, x: u32) -> u32 {
|
||||
fn next_occupancy_cliff(
|
||||
sm: &ShaderModelInfo,
|
||||
x: u32,
|
||||
shared_mem: u16,
|
||||
block_size: u16,
|
||||
) -> u32 {
|
||||
let total_regs: u32 = 65536;
|
||||
let threads = max_warps_per_sm(sm, x, 0, 0) * 32;
|
||||
let threads = max_warps_per_sm(sm, x, shared_mem, block_size) * 32;
|
||||
|
||||
// This function doesn't actually model the maximum number of registers
|
||||
// correctly - callers need to worry about that separately. We do,
|
||||
|
|
@ -44,7 +49,7 @@ fn test_next_occupancy_cliff() {
|
|||
for max_hw_warps in [32, 48, 64] {
|
||||
let sm = ShaderModelInfo::new(75, max_hw_warps, 0, 100 * 1024);
|
||||
for x in 0..255 {
|
||||
let y = next_occupancy_cliff(&sm, x);
|
||||
let y = next_occupancy_cliff(&sm, x, 0, 0);
|
||||
assert!(y >= x);
|
||||
assert_eq!(
|
||||
max_warps_per_sm(&sm, x, 0, 0),
|
||||
|
|
@ -62,10 +67,14 @@ fn next_occupancy_cliff_with_reserved(
|
|||
sm: &ShaderModelInfo,
|
||||
gprs: i32,
|
||||
reserved: i32,
|
||||
shared_mem: u16,
|
||||
block_size: u16,
|
||||
) -> i32 {
|
||||
i32::try_from(next_occupancy_cliff(
|
||||
sm,
|
||||
(gprs + reserved).try_into().unwrap(),
|
||||
shared_mem,
|
||||
block_size,
|
||||
))
|
||||
.unwrap()
|
||||
- reserved
|
||||
|
|
@ -757,11 +766,18 @@ fn get_schedule_types(
|
|||
min_gpr_target: i32,
|
||||
max_gpr_target: i32,
|
||||
reserved_gprs: i32,
|
||||
shared_mem: u16,
|
||||
block_size: u16,
|
||||
) -> Vec<ScheduleType> {
|
||||
let mut out = Vec::new();
|
||||
|
||||
let mut gpr_target =
|
||||
next_occupancy_cliff_with_reserved(sm, min_gpr_target, reserved_gprs);
|
||||
let mut gpr_target = next_occupancy_cliff_with_reserved(
|
||||
sm,
|
||||
min_gpr_target,
|
||||
reserved_gprs,
|
||||
shared_mem,
|
||||
block_size,
|
||||
);
|
||||
while gpr_target < max_regs[RegFile::GPR] {
|
||||
out.push(ScheduleType::RegLimit(gpr_target.try_into().unwrap()));
|
||||
|
||||
|
|
@ -776,6 +792,8 @@ fn get_schedule_types(
|
|||
sm,
|
||||
gpr_target + 1,
|
||||
reserved_gprs,
|
||||
shared_mem,
|
||||
block_size,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -798,6 +816,8 @@ impl Function {
|
|||
&mut self,
|
||||
sm: &ShaderModelInfo,
|
||||
max_regs: PerRegFile<i32>,
|
||||
shared_mem: u16,
|
||||
block_size: u16,
|
||||
) {
|
||||
let liveness = SimpleLiveness::for_function(self);
|
||||
let mut live_out_sets: Vec<LiveSet> = Vec::new();
|
||||
|
|
@ -918,6 +938,8 @@ impl Function {
|
|||
min_gpr_target,
|
||||
max_gpr_target,
|
||||
reserved_gprs,
|
||||
shared_mem,
|
||||
block_size,
|
||||
);
|
||||
schedule_types.reverse();
|
||||
|
||||
|
|
@ -1031,8 +1053,17 @@ impl Shader<'_> {
|
|||
}
|
||||
max_regs[RegFile::GPR] -= SW_RESERVED_GPRS;
|
||||
|
||||
let mut shared_mem = 0;
|
||||
let mut block_size = 0;
|
||||
if let ShaderStageInfo::Compute(compute) = &self.info.stage {
|
||||
shared_mem = compute.smem_size;
|
||||
block_size = compute.local_size.iter().product();
|
||||
}
|
||||
|
||||
for f in &mut self.functions {
|
||||
f.opt_instr_sched_prepass(self.sm, max_regs);
|
||||
f.opt_instr_sched_prepass(
|
||||
self.sm, max_regs, shared_mem, block_size,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue