From f5e92e5493c8d9f390c261efaaaf91c541acb46b Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Sat, 25 Apr 2026 14:39:07 +0200 Subject: [PATCH] nak: use fmul_rtz for NAK_INTERP_MODE_PERSPECTIVE Fixes rendering artifacts in The Surge 2 and Shadow of the Tomb Raider. And it's what nvidia's driver is doing. Totals from 170446 (14.05% of 1212873) affected shaders: CodeSize: 2019019440 -> 2026071952 (+0.35%); split: -0.07%, +0.41% Number of GPRs: 8158110 -> 8098382 (-0.73%); split: -0.80%, +0.07% SLM Size: 106448 -> 106440 (-0.01%) Static cycle count: 1398452243 -> 1400038117 (+0.11%); split: -0.17%, +0.28% Spills to memory: 546 -> 520 (-4.76%) Fills from memory: 546 -> 520 (-4.76%) Spills to reg: 22585 -> 22670 (+0.38%); split: -0.31%, +0.68% Fills from reg: 18243 -> 18331 (+0.48%); split: -0.34%, +0.82% Max warps/SM: 6797472 -> 6822196 (+0.36%); split: +0.38%, -0.02% Closes: https://gitlab.freedesktop.org/mesa/mesa/-/work_items/11447 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/work_items/11706 Backport-to: 26.1 Reviewed-by: Mel Henning Part-of: --- src/nouveau/compiler/nak_nir_lower_fs_inputs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/nouveau/compiler/nak_nir_lower_fs_inputs.c b/src/nouveau/compiler/nak_nir_lower_fs_inputs.c index d5a83dcaed3..7f0864efeaf 100644 --- a/src/nouveau/compiler/nak_nir_lower_fs_inputs.c +++ b/src/nouveau/compiler/nak_nir_lower_fs_inputs.c @@ -67,8 +67,15 @@ interp_fs_input(nir_builder *b, unsigned num_components, uint32_t addr, comps[c] = nir_ipa_nv(b, nir_imm_float(b, 0), offset, .base = addr + c * 4, .flags = NAK_AS_U32(flags)); - if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE) - comps[c] = nir_fmul(b, comps[c], inv_w); + if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE) { + unsigned fp_math_ctrl = b->fp_math_ctrl; + b->fp_math_ctrl |= nir_fp_exact; + /* It seems critical that this is done as round to zero. + * The Surge 2 and Shadow of the Tomb Raider show artifacts if not. + */ + comps[c] = nir_fmul_rtz(b, comps[c], inv_w); + b->fp_math_ctrl = fp_math_ctrl; + } } return nir_vec(b, comps, num_components); } else if (nak->sm >= 20) {