mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 09:18:04 +02:00
nak: run nir_normalize_sin_cos on Volta+
Totals: CodeSize: 8662115904 -> 8657750080 (-0.05%); split: -0.05%, +0.00% Number of GPRs: 47507646 -> 47507630 (-0.00%); split: -0.00%, +0.00% Static cycle count: 4713221465 -> 4713944616 (+0.02%); split: -0.02%, +0.04% Spills to reg: 149128 -> 149123 (-0.00%); split: -0.00%, +0.00% Fills from reg: 170693 -> 170692 (-0.00%); split: -0.00%, +0.00% Max warps/SM: 50565052 -> 50564968 (-0.00%); split: +0.00%, -0.00% Totals from 79168 (6.81% of 1163204) affected shaders: CodeSize: 1264418656 -> 1260052832 (-0.35%); split: -0.35%, +0.00% Number of GPRs: 4044963 -> 4044947 (-0.00%); split: -0.01%, +0.01% Static cycle count: 702908827 -> 703631978 (+0.10%); split: -0.16%, +0.26% Spills to reg: 66812 -> 66807 (-0.01%); split: -0.01%, +0.00% Fills from reg: 81720 -> 81719 (-0.00%); split: -0.00%, +0.00% Max warps/SM: 3175316 -> 3175232 (-0.00%); split: +0.00%, -0.00% Reviewed-by: Mel Henning <mhenning@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40541>
This commit is contained in:
parent
b7ca34db13
commit
75b0dc667d
3 changed files with 25 additions and 37 deletions
|
|
@ -289,19 +289,6 @@ pub trait SSABuilder: Builder {
|
|||
dst
|
||||
}
|
||||
|
||||
fn fmul(&mut self, x: Src, y: Src) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
self.push_op(OpFMul {
|
||||
dst: dst.into(),
|
||||
srcs: [x, y],
|
||||
saturate: false,
|
||||
rnd_mode: FRndMode::NearestEven,
|
||||
ftz: false,
|
||||
dnz: false,
|
||||
});
|
||||
dst
|
||||
}
|
||||
|
||||
fn fset(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSAValue {
|
||||
let dst = self.alloc_ssa(RegFile::GPR);
|
||||
self.push_op(OpFSet {
|
||||
|
|
@ -754,34 +741,24 @@ pub trait SSABuilder: Builder {
|
|||
}
|
||||
|
||||
fn fsin(&mut self, src: Src) -> SSAValue {
|
||||
let tmp = if self.sm() >= 70 {
|
||||
let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI);
|
||||
self.fmul(src, frac_1_2pi.into())
|
||||
} else {
|
||||
let tmp = self.alloc_ssa(RegFile::GPR);
|
||||
self.push_op(OpRro {
|
||||
dst: tmp.into(),
|
||||
op: RroOp::SinCos,
|
||||
src,
|
||||
});
|
||||
tmp
|
||||
};
|
||||
assert!(self.sm() < 70);
|
||||
let tmp = self.alloc_ssa(RegFile::GPR);
|
||||
self.push_op(OpRro {
|
||||
dst: tmp.into(),
|
||||
op: RroOp::SinCos,
|
||||
src,
|
||||
});
|
||||
self.mufu(MuFuOp::Sin, tmp.into())
|
||||
}
|
||||
|
||||
fn fcos(&mut self, src: Src) -> SSAValue {
|
||||
let tmp = if self.sm() >= 70 {
|
||||
let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI);
|
||||
self.fmul(src, frac_1_2pi.into())
|
||||
} else {
|
||||
let tmp = self.alloc_ssa(RegFile::GPR);
|
||||
self.push_op(OpRro {
|
||||
dst: tmp.into(),
|
||||
op: RroOp::SinCos,
|
||||
src,
|
||||
});
|
||||
tmp
|
||||
};
|
||||
assert!(self.sm() < 70);
|
||||
let tmp = self.alloc_ssa(RegFile::GPR);
|
||||
self.push_op(OpRro {
|
||||
dst: tmp.into(),
|
||||
op: RroOp::SinCos,
|
||||
src,
|
||||
});
|
||||
self.mufu(MuFuOp::Cos, tmp.into())
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -982,6 +982,10 @@ impl<'a> ShaderFromNir<'a> {
|
|||
dst.into()
|
||||
}
|
||||
nir_op_fcos => b.fcos(srcs(0)).into(),
|
||||
nir_op_fcos_normalized_2_pi => {
|
||||
assert!(self.sm.sm() >= 70);
|
||||
b.mufu(MuFuOp::Cos, srcs(0)).into()
|
||||
}
|
||||
nir_op_feq | nir_op_fge | nir_op_flt | nir_op_fneu => {
|
||||
let src_type =
|
||||
FloatType::from_bits(alu.get_src(0).bit_size().into());
|
||||
|
|
@ -1317,6 +1321,10 @@ impl<'a> ShaderFromNir<'a> {
|
|||
}
|
||||
}
|
||||
nir_op_fsin => b.fsin(srcs(0)).into(),
|
||||
nir_op_fsin_normalized_2_pi => {
|
||||
assert!(self.sm.sm() >= 70);
|
||||
b.mufu(MuFuOp::Sin, srcs(0)).into()
|
||||
}
|
||||
nir_op_fsqrt => b.mufu(MuFuOp::Sqrt, srcs(0)).into(),
|
||||
nir_op_i2f16 | nir_op_i2f32 | nir_op_i2f64 => {
|
||||
let src_bits = alu.get_src(0).src.bit_size();
|
||||
|
|
|
|||
|
|
@ -1308,6 +1308,9 @@ nak_postprocess_nir(nir_shader *nir,
|
|||
OPT(nir, nir_lower_doubles, NULL, nak->nir_options.lower_doubles_options);
|
||||
OPT(nir, nir_lower_int64);
|
||||
|
||||
if (nak->sm >= 70)
|
||||
OPT(nir, nir_normalize_sin_cos);
|
||||
|
||||
nak_optimize_nir(nir, nak);
|
||||
|
||||
do {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue