From 562504f47c20f1f881811bad0c4fb28c8a393a64 Mon Sep 17 00:00:00 2001 From: Mel Henning Date: Tue, 11 Mar 2025 11:46:34 -0400 Subject: [PATCH] nak: Calc static cycle count in instr_sched This changes the static cycle count estimate so that it takes into account estimated variable latency instruction delays. Statistics from before this commit are not comparable to statistics generated after this commit. Part-of: --- src/nouveau/compiler/nak/ir.rs | 3 --- .../compiler/nak/opt_instr_sched_postpass.rs | 27 ++++++++++++------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index db68f18c22e..facb3bce267 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -7574,13 +7574,11 @@ impl Shader<'_> { pub fn gather_info(&mut self) { let mut num_instrs = 0; - let mut num_static_cycles = 0; let mut uses_global_mem = false; let mut writes_global_mem = false; self.for_each_instr(&mut |instr| { num_instrs += 1; - num_static_cycles += instr.deps.delay as u32; if !uses_global_mem { uses_global_mem = instr.uses_global_mem(); @@ -7592,7 +7590,6 @@ impl Shader<'_> { }); self.info.num_instrs = num_instrs; - self.info.num_static_cycles = num_static_cycles; self.info.uses_global_mem = uses_global_mem; self.info.writes_global_mem = writes_global_mem; diff --git a/src/nouveau/compiler/nak/opt_instr_sched_postpass.rs b/src/nouveau/compiler/nak/opt_instr_sched_postpass.rs index eadf1036a01..2b21a87429d 100644 --- a/src/nouveau/compiler/nak/opt_instr_sched_postpass.rs +++ b/src/nouveau/compiler/nak/opt_instr_sched_postpass.rs @@ -147,7 +147,10 @@ fn generate_dep_graph( g } -fn generate_order(g: &mut DepGraph, init_ready_list: Vec) -> Vec { +fn generate_order( + g: &mut DepGraph, + init_ready_list: Vec, +) -> (Vec, u32) { let mut ready_instrs: BinaryHeap = BinaryHeap::new(); let mut future_ready_instrs: BinaryHeap = init_ready_list .into_iter() @@ -203,35 +206,40 @@ fn generate_order(g: &mut DepGraph, init_ready_list: Vec) -> Vec { } } } - return instr_order; + return (instr_order, current_cycle); } fn sched_buffer( sm: &dyn ShaderModel, instrs: Vec>, -) -> impl Iterator> { +) -> (impl Iterator>, u32) { let mut g = generate_dep_graph(sm, &instrs); let init_ready_list = calc_statistics(&mut g); // save_graphviz(&instrs, &g).unwrap(); g.reverse(); - let new_order = generate_order(&mut g, init_ready_list); + let (new_order, cycle_count) = generate_order(&mut g, init_ready_list); // Apply the new instruction order let mut instrs: Vec>> = instrs.into_iter().map(|instr| Some(instr)).collect(); - new_order.into_iter().rev().map(move |i| { + let instrs = new_order.into_iter().rev().map(move |i| { std::mem::take(&mut instrs[i]).expect("Instruction scheduled twice") - }) + }); + (instrs, cycle_count) } impl Function { - pub fn opt_instr_sched_postpass(&mut self, sm: &dyn ShaderModel) { + pub fn opt_instr_sched_postpass(&mut self, sm: &dyn ShaderModel) -> u32 { + let mut num_static_cycles = 0; for block in &mut self.blocks { let orig_instr_count = block.instrs.len(); let instrs = std::mem::take(&mut block.instrs); - block.instrs = sched_buffer(sm, instrs).collect(); + let (instrs, cycle_count) = sched_buffer(sm, instrs); + block.instrs = instrs.collect(); + num_static_cycles += cycle_count; assert_eq!(orig_instr_count, block.instrs.len()); } + num_static_cycles } } @@ -242,8 +250,9 @@ impl Shader<'_> { /// See eg. Cooper & Torczon's "Engineering A Compiler", 3rd ed. /// Chapter 12.3 "Local scheduling" pub fn opt_instr_sched_postpass(&mut self) { + self.info.num_static_cycles = 0; for f in &mut self.functions { - f.opt_instr_sched_postpass(self.sm); + self.info.num_static_cycles += f.opt_instr_sched_postpass(self.sm); } } }