nak: Replace &dyn ShaderModel w/ &ShaderModelInfo

This is mostly a s/dyn ShaderModel/ShaderModelInfo/ with a few manual fixes.
With this change, we now statically dispatch into ShaderModel, which is
a bit faster than dynamically dispatching. Together, this commit and the
last one improve compile times by about 1% geomean.

Reviewed-by: Mary Guillemard <mary@mary.zone>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38913>
This commit is contained in:
Mel Henning 2025-12-11 14:33:38 -05:00 committed by Marge Bot
parent ee65578fa1
commit d7e906d60e
14 changed files with 57 additions and 54 deletions

View file

@ -239,7 +239,7 @@ pub struct ShaderBin {
impl ShaderBin {
pub fn new(
sm: &dyn ShaderModel,
sm: &ShaderModelInfo,
info: &ShaderInfo,
fs_key: Option<&nak_fs_key>,
code: Vec<u32>,

View file

@ -889,11 +889,11 @@ pub trait SSABuilder: Builder {
pub struct InstrBuilder<'a> {
instrs: MappedInstrs,
sm: &'a dyn ShaderModel,
sm: &'a ShaderModelInfo,
}
impl<'a> InstrBuilder<'a> {
pub fn new(sm: &'a dyn ShaderModel) -> Self {
pub fn new(sm: &'a ShaderModelInfo) -> Self {
Self {
instrs: MappedInstrs::None,
sm,
@ -933,7 +933,7 @@ pub struct SSAInstrBuilder<'a> {
impl<'a> SSAInstrBuilder<'a> {
pub fn new(
sm: &'a dyn ShaderModel,
sm: &'a ShaderModelInfo,
alloc: &'a mut SSAValueAllocator,
) -> Self {
Self {

View file

@ -597,7 +597,7 @@ fn instr_needs_texbar(instr: &Instr) -> bool {
/// - Instead of pushing by 1 each element in the queue on a `push` op,
/// we could keep track of an in-flight range and use a wrapping timestamp
/// this improves performance but needs careful implementation to avoid bugs
fn insert_texture_barriers(f: &mut Function, sm: &dyn ShaderModel) {
fn insert_texture_barriers(f: &mut Function, sm: &ShaderModelInfo) {
assert!(sm.is_kepler()); // Only kepler has texture barriers!
let mut state_in: Vec<_> = (0..f.blocks.len())
@ -643,7 +643,7 @@ fn insert_texture_barriers(f: &mut Function, sm: &dyn ShaderModel) {
}
}
fn assign_barriers(f: &mut Function, sm: &dyn ShaderModel) {
fn assign_barriers(f: &mut Function, sm: &ShaderModelInfo) {
let mut uses = Box::new(RegTracker::new_with(&|| RegUse::None));
let mut deps = DepGraph::new();
@ -767,7 +767,7 @@ type AccumulatedDelay = u8;
type DelayRegTracker = SparseRegTracker<RegUseMap<RegOrigin, AccumulatedDelay>>;
struct BlockDelayScheduler<'a> {
sm: &'a dyn ShaderModel,
sm: &'a ShaderModelInfo,
f: &'a Function,
// Map from barrier to last waited cycle
bars: [u32; 6],
@ -915,7 +915,7 @@ impl BlockDelayScheduler<'_> {
}
}
fn calc_delays(f: &mut Function, sm: &dyn ShaderModel) -> u64 {
fn calc_delays(f: &mut Function, sm: &ShaderModelInfo) -> u64 {
let mut instr_cycles: Vec<Vec<u32>> =
f.blocks.iter().map(|b| vec![0; b.instrs.len()]).collect();

View file

@ -318,7 +318,7 @@ enum SyncType {
struct ShaderFromNir<'a> {
nir: &'a nir_shader,
sm: &'a dyn ShaderModel,
sm: &'a ShaderModelInfo,
info: ShaderInfo,
float_ctl: ShaderFloatControls,
cfg: CFGBuilder<u32, BasicBlock, FxBuildHasher>,
@ -338,7 +338,7 @@ impl<'a> ShaderFromNir<'a> {
fn new(
nak: &nak_compiler,
nir: &'a nir_shader,
sm: &'a dyn ShaderModel,
sm: &'a ShaderModelInfo,
) -> Self {
Self {
nir: nir,
@ -4381,7 +4381,7 @@ impl<'a> ShaderFromNir<'a> {
pub fn nak_shader_from_nir<'a>(
nak: &nak_compiler,
ns: &'a nir_shader,
sm: &'a dyn ShaderModel,
sm: &'a ShaderModelInfo,
) -> Shader<'a> {
ShaderFromNir::new(nak, ns, sm).parse_shader()
}

View file

@ -21,7 +21,7 @@ use nv_push_rs::Push as NvPush;
use nvidia_headers::classes::cl90b5::mthd as cl90b5;
struct RunSingleton {
sm: Box<dyn ShaderModel + Send + Sync>,
sm: Box<ShaderModelInfo>,
run: Runner,
}
@ -46,7 +46,7 @@ impl RunSingleton {
const LOCAL_SIZE_X: u16 = 32;
pub struct TestShaderBuilder<'a> {
sm: &'a dyn ShaderModel,
sm: &'a ShaderModelInfo,
alloc: SSAValueAllocator,
b: InstrBuilder<'a>,
start_block: BasicBlock,
@ -56,7 +56,7 @@ pub struct TestShaderBuilder<'a> {
}
impl<'a> TestShaderBuilder<'a> {
pub fn new(sm: &'a dyn ShaderModel) -> Self {
pub fn new(sm: &'a ShaderModelInfo) -> Self {
let mut alloc = SSAValueAllocator::new();
let mut label_alloc = LabelAllocator::new();
let mut b = SSAInstrBuilder::new(sm, &mut alloc);

View file

@ -1546,7 +1546,7 @@ impl OpFoldData<'_> {
pub trait Foldable: SrcsAsSlice + DstsAsSlice {
// Currently only used by test code
#[allow(dead_code)]
fn fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>);
fn fold(&self, sm: &ShaderModelInfo, f: &mut OpFoldData<'_>);
}
pub trait DisplayOp: DstsAsSlice {
@ -3254,7 +3254,7 @@ pub struct OpDSetP {
}
impl Foldable for OpDSetP {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let a = f.get_f64_src(self, &self.srcs[0]);
let b = f.get_f64_src(self, &self.srcs[1]);
let accum = f.get_pred_src(self, &self.accum);
@ -3684,7 +3684,7 @@ pub struct OpFlo {
}
impl Foldable for OpFlo {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let src = f.get_u32_src(self, &self.src);
let leading = if self.signed && (src & 0x80000000) != 0 {
(!src).leading_zeros()
@ -3722,7 +3722,7 @@ pub struct OpIAbs {
}
impl Foldable for OpIAbs {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let src = f.get_u32_src(self, &self.src);
let dst = (src as i32).unsigned_abs();
f.set_u32_dst(self, &self.dst, dst);
@ -3750,7 +3750,7 @@ pub struct OpIAdd2 {
}
impl Foldable for OpIAdd2 {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let srcs = [
f.get_u32_src(self, &self.srcs[0]),
f.get_u32_src(self, &self.srcs[1]),
@ -3794,7 +3794,7 @@ pub struct OpIAdd2X {
}
impl Foldable for OpIAdd2X {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let srcs = [
f.get_u32_bnot_src(self, &self.srcs[0]),
f.get_u32_bnot_src(self, &self.srcs[1]),
@ -3832,7 +3832,7 @@ pub struct OpIAdd3 {
}
impl Foldable for OpIAdd3 {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let srcs = [
f.get_u32_src(self, &self.srcs[0]),
f.get_u32_src(self, &self.srcs[1]),
@ -3884,7 +3884,7 @@ pub struct OpIAdd3X {
}
impl Foldable for OpIAdd3X {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let srcs = [
f.get_u32_bnot_src(self, &self.srcs[0]),
f.get_u32_bnot_src(self, &self.srcs[1]),
@ -4041,7 +4041,7 @@ pub struct OpIMnMx {
}
impl Foldable for OpIMnMx {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let (a, b) = (
f.get_u32_bnot_src(self, &self.srcs[0]),
f.get_u32_bnot_src(self, &self.srcs[1]),
@ -4092,7 +4092,7 @@ pub struct OpISetP {
}
impl Foldable for OpISetP {
fn fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let x = f.get_u32_src(self, &self.srcs[0]);
let y = f.get_u32_src(self, &self.srcs[1]);
let accum = f.get_pred_src(self, &self.accum);
@ -4187,7 +4187,7 @@ pub struct OpLea {
}
impl Foldable for OpLea {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let a = f.get_u32_src(self, &self.a);
let mut b = f.get_u32_src(self, &self.b);
let a_high = f.get_u32_src(self, &self.a_high);
@ -4266,7 +4266,7 @@ pub struct OpLeaX {
}
impl Foldable for OpLeaX {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let a = f.get_u32_src(self, &self.a);
let mut b = f.get_u32_src(self, &self.b);
let a_high = f.get_u32_src(self, &self.a_high);
@ -4338,7 +4338,7 @@ impl DisplayOp for OpLop2 {
}
impl Foldable for OpLop2 {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let srcs = [
f.get_u32_bnot_src(self, &self.srcs[0]),
f.get_u32_bnot_src(self, &self.srcs[1]),
@ -4366,7 +4366,7 @@ pub struct OpLop3 {
}
impl Foldable for OpLop3 {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let srcs = [
f.get_u32_bnot_src(self, &self.srcs[0]),
f.get_u32_bnot_src(self, &self.srcs[1]),
@ -4449,7 +4449,7 @@ impl OpShf {
}
impl Foldable for OpShf {
fn fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let low = f.get_u32_src(self, &self.low);
let high = f.get_u32_src(self, &self.high);
let shift = f.get_u32_src(self, &self.shift);
@ -4551,7 +4551,7 @@ impl DisplayOp for OpShl {
}
impl Foldable for OpShl {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let x = f.get_u32_src(self, &self.src);
let shift = f.get_u32_src(self, &self.shift);
@ -4604,7 +4604,7 @@ impl OpShr {
}
impl Foldable for OpShr {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let x = f.get_u32_src(self, &self.src);
let shift = f.get_u32_src(self, &self.shift);
@ -5097,7 +5097,7 @@ impl OpPrmt {
}
impl Foldable for OpPrmt {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let srcs = [
f.get_u32_src(self, &self.srcs[0]),
f.get_u32_src(self, &self.srcs[1]),
@ -5174,7 +5174,7 @@ impl DisplayOp for OpSgxt {
impl_display_for_op!(OpSgxt);
impl Foldable for OpSgxt {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let a = f.get_u32_src(self, &self.a);
let bits = f.get_u32_src(self, &self.bits);
@ -5282,7 +5282,7 @@ pub struct OpPSetP {
}
impl Foldable for OpPSetP {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let srcs = [
f.get_pred_src(self, &self.srcs[0]),
f.get_pred_src(self, &self.srcs[1]),
@ -5321,7 +5321,7 @@ pub struct OpPopC {
}
impl Foldable for OpPopC {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let src = f.get_u32_bnot_src(self, &self.src);
let dst = src.count_ones();
f.set_u32_dst(self, &self.dst, dst);
@ -5835,7 +5835,7 @@ pub struct OpSuClamp {
}
impl Foldable for OpSuClamp {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let src = f.get_u32_src(self, &self.coords);
let params = f.get_u32_src(self, &self.params);
let imm = self.imm; // i6
@ -5951,7 +5951,7 @@ pub struct OpSuBfm {
}
impl Foldable for OpSuBfm {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let x_raw = f.get_u32_src(self, &self.srcs[0]);
let y_raw = f.get_u32_src(self, &self.srcs[1]);
let z_raw = f.get_u32_src(self, &self.srcs[2]);
@ -6086,7 +6086,7 @@ pub struct OpSuEau {
}
impl Foldable for OpSuEau {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let off_raw = f.get_u32_src(self, &self.off);
let bf_raw = f.get_u32_src(self, &self.bit_field);
let addr = f.get_u32_src(self, &self.addr);
@ -6247,7 +6247,7 @@ pub struct OpIMadSp {
}
impl Foldable for OpIMadSp {
fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
fn fold(&self, _sm: &ShaderModelInfo, f: &mut OpFoldData<'_>) {
let src0 = f.get_u32_src(self, &self.srcs[0]);
let src1 = f.get_u32_src(self, &self.srcs[1]);
let src2 = f.get_u32_src(self, &self.srcs[2]);
@ -9444,7 +9444,7 @@ pub fn max_warps_per_sm(gprs: u32) -> u32 {
}
pub struct Shader<'a> {
pub sm: &'a dyn ShaderModel,
pub sm: &'a ShaderModelInfo,
pub info: ShaderInfo,
pub functions: Vec<Function>,
}

View file

@ -383,7 +383,7 @@ pub struct LegalizeBuilder<'a> {
impl<'a> LegalizeBuilder<'a> {
fn new(
sm: &'a dyn ShaderModel,
sm: &'a ShaderModelInfo,
alloc: &'a mut SSAValueAllocator,
const_tracker: &'a mut ConstTracker,
) -> Self {
@ -437,7 +437,7 @@ impl<'a> SSABuilder for LegalizeBuilder<'a> {
impl LegalizeBuildHelpers for LegalizeBuilder<'_> {}
fn legalize_instr(
sm: &dyn ShaderModel,
sm: &ShaderModelInfo,
b: &mut LegalizeBuilder,
bl: &impl BlockLiveness,
block_uniform: bool,

View file

@ -90,7 +90,7 @@ fn cycle_use_swap(pc: &OpParCopy, file: RegFile) -> bool {
}
}
fn lower_par_copy(pc: OpParCopy, sm: &dyn ShaderModel) -> MappedInstrs {
fn lower_par_copy(pc: OpParCopy, sm: &ShaderModelInfo) -> MappedInstrs {
let mut graph = CopyGraph::new();
let mut vals = Vec::new();
let mut reg_to_idx = FxHashMap::default();

View file

@ -87,7 +87,7 @@ fn disassemble_instrs(instrs: Vec<Instr>, sm: u8) -> Vec<String> {
io: ShaderIoInfo::None,
};
let sm: Box<dyn ShaderModel> = Box::new(ShaderModelInfo::new(sm));
let sm: Box<ShaderModelInfo> = Box::new(ShaderModelInfo::new(sm));
let s = Shader {
sm: &*sm,
info: info,

View file

@ -53,12 +53,12 @@ enum CopyPropEntry {
}
struct CopyPropPass<'a> {
sm: &'a dyn ShaderModel,
sm: &'a ShaderModelInfo,
ssa_map: FxHashMap<SSAValue, CopyPropEntry>,
}
impl<'a> CopyPropPass<'a> {
pub fn new(sm: &'a dyn ShaderModel) -> Self {
pub fn new(sm: &'a ShaderModelInfo) -> Self {
CopyPropPass {
sm: sm,
ssa_map: Default::default(),

View file

@ -32,7 +32,7 @@ impl<T: Clone> RegUse<T> {
}
}
fn generate_dep_graph(sm: &dyn ShaderModel, instrs: &[Instr]) -> DepGraph {
fn generate_dep_graph(sm: &ShaderModelInfo, instrs: &[Instr]) -> DepGraph {
let mut g = DepGraph::new((0..instrs.len()).map(|_| Default::default()));
// Maps registers to RegUse<ip, src_dst_idx>. Predicates are
@ -201,7 +201,7 @@ fn generate_order(
}
fn sched_buffer(
sm: &dyn ShaderModel,
sm: &ShaderModelInfo,
instrs: Vec<Instr>,
) -> (impl Iterator<Item = Instr> + use<>, u64) {
let mut g = generate_dep_graph(sm, &instrs);
@ -220,7 +220,7 @@ fn sched_buffer(
}
impl Function {
pub fn opt_instr_sched_postpass(&mut self, sm: &dyn ShaderModel) -> u64 {
pub fn opt_instr_sched_postpass(&mut self, sm: &ShaderModelInfo) -> u64 {
let mut num_static_cycles = 0u64;
for i in 0..self.blocks.len() {
let block = &mut self.blocks[i];

View file

@ -6,7 +6,7 @@ use crate::ir::*;
use rustc_hash::FxHashMap;
fn should_lower_to_warp(
sm: &dyn ShaderModel,
sm: &ShaderModelInfo,
instr: &Instr,
r2ur: &FxHashMap<SSAValue, SSAValue>,
) -> bool {

View file

@ -6,6 +6,7 @@ use crate::legalize::{
src_is_reg, src_is_upred_reg, swap_srcs_if_not_reg, LegalizeBuildHelpers,
LegalizeBuilder,
};
use crate::sm70::ShaderModel70;
use bitview::*;
use rustc_hash::FxHashMap;
@ -4155,14 +4156,14 @@ impl SM70Op for Op {
}
pub fn legalize_sm70_op(
_sm: &dyn ShaderModel,
_sm: &ShaderModel70,
b: &mut LegalizeBuilder,
op: &mut Op,
) {
op.legalize(b);
}
pub fn encode_sm70_shader(sm: &dyn ShaderModel, s: &Shader<'_>) -> Vec<u32> {
pub fn encode_sm70_shader(sm: &ShaderModel70, s: &Shader<'_>) -> Vec<u32> {
assert!(s.functions.len() == 1);
let func = &s.functions[0];

View file

@ -4,7 +4,9 @@
extern crate bitview;
extern crate nvidia_headers;
use crate::ir::{ShaderInfo, ShaderIoInfo, ShaderModel, ShaderStageInfo};
use crate::ir::{
ShaderInfo, ShaderIoInfo, ShaderModel, ShaderModelInfo, ShaderStageInfo,
};
use bitview::{
BitMutView, BitMutViewable, BitView, BitViewable, SetBit, SetField,
};
@ -464,7 +466,7 @@ impl ShaderProgramHeader {
}
pub fn encode_header(
sm: &dyn ShaderModel,
sm: &ShaderModelInfo,
shader_info: &ShaderInfo,
fs_key: Option<&nak_fs_key>,
) -> [u32; CURRENT_MAX_SHADER_HEADER_SIZE] {