nak: run nir_normalize_sin_cos on Volta+
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Totals:
CodeSize: 8662115904 -> 8657750080 (-0.05%); split: -0.05%, +0.00%
Number of GPRs: 47507646 -> 47507630 (-0.00%); split: -0.00%, +0.00%
Static cycle count: 4713221465 -> 4713944616 (+0.02%); split: -0.02%, +0.04%
Spills to reg: 149128 -> 149123 (-0.00%); split: -0.00%, +0.00%
Fills from reg: 170693 -> 170692 (-0.00%); split: -0.00%, +0.00%
Max warps/SM: 50565052 -> 50564968 (-0.00%); split: +0.00%, -0.00%

Totals from 79168 (6.81% of 1163204) affected shaders:
CodeSize: 1264418656 -> 1260052832 (-0.35%); split: -0.35%, +0.00%
Number of GPRs: 4044963 -> 4044947 (-0.00%); split: -0.01%, +0.01%
Static cycle count: 702908827 -> 703631978 (+0.10%); split: -0.16%, +0.26%
Spills to reg: 66812 -> 66807 (-0.01%); split: -0.01%, +0.00%
Fills from reg: 81720 -> 81719 (-0.00%); split: -0.00%, +0.00%
Max warps/SM: 3175316 -> 3175232 (-0.00%); split: +0.00%, -0.00%

Reviewed-by: Mel Henning <mhenning@darkrefraction.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40541>
This commit is contained in:
Karol Herbst 2026-03-20 16:00:51 +01:00 committed by Karol Herbst
parent b7ca34db13
commit 75b0dc667d
3 changed files with 25 additions and 37 deletions

View file

@ -289,19 +289,6 @@ pub trait SSABuilder: Builder {
dst
}
fn fmul(&mut self, x: Src, y: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
self.push_op(OpFMul {
dst: dst.into(),
srcs: [x, y],
saturate: false,
rnd_mode: FRndMode::NearestEven,
ftz: false,
dnz: false,
});
dst
}
fn fset(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSAValue {
let dst = self.alloc_ssa(RegFile::GPR);
self.push_op(OpFSet {
@ -754,34 +741,24 @@ pub trait SSABuilder: Builder {
}
fn fsin(&mut self, src: Src) -> SSAValue {
let tmp = if self.sm() >= 70 {
let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI);
self.fmul(src, frac_1_2pi.into())
} else {
let tmp = self.alloc_ssa(RegFile::GPR);
self.push_op(OpRro {
dst: tmp.into(),
op: RroOp::SinCos,
src,
});
tmp
};
assert!(self.sm() < 70);
let tmp = self.alloc_ssa(RegFile::GPR);
self.push_op(OpRro {
dst: tmp.into(),
op: RroOp::SinCos,
src,
});
self.mufu(MuFuOp::Sin, tmp.into())
}
fn fcos(&mut self, src: Src) -> SSAValue {
let tmp = if self.sm() >= 70 {
let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI);
self.fmul(src, frac_1_2pi.into())
} else {
let tmp = self.alloc_ssa(RegFile::GPR);
self.push_op(OpRro {
dst: tmp.into(),
op: RroOp::SinCos,
src,
});
tmp
};
assert!(self.sm() < 70);
let tmp = self.alloc_ssa(RegFile::GPR);
self.push_op(OpRro {
dst: tmp.into(),
op: RroOp::SinCos,
src,
});
self.mufu(MuFuOp::Cos, tmp.into())
}

View file

@ -982,6 +982,10 @@ impl<'a> ShaderFromNir<'a> {
dst.into()
}
nir_op_fcos => b.fcos(srcs(0)).into(),
nir_op_fcos_normalized_2_pi => {
assert!(self.sm.sm() >= 70);
b.mufu(MuFuOp::Cos, srcs(0)).into()
}
nir_op_feq | nir_op_fge | nir_op_flt | nir_op_fneu => {
let src_type =
FloatType::from_bits(alu.get_src(0).bit_size().into());
@ -1317,6 +1321,10 @@ impl<'a> ShaderFromNir<'a> {
}
}
nir_op_fsin => b.fsin(srcs(0)).into(),
nir_op_fsin_normalized_2_pi => {
assert!(self.sm.sm() >= 70);
b.mufu(MuFuOp::Sin, srcs(0)).into()
}
nir_op_fsqrt => b.mufu(MuFuOp::Sqrt, srcs(0)).into(),
nir_op_i2f16 | nir_op_i2f32 | nir_op_i2f64 => {
let src_bits = alu.get_src(0).src.bit_size();

View file

@ -1308,6 +1308,9 @@ nak_postprocess_nir(nir_shader *nir,
OPT(nir, nir_lower_doubles, NULL, nak->nir_options.lower_doubles_options);
OPT(nir, nir_lower_int64);
if (nak->sm >= 70)
OPT(nir, nir_normalize_sin_cos);
nak_optimize_nir(nir, nak);
do {