mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-27 07:40:31 +01:00
nir/opt_uniform_subgroup: optimize uniform ddx/ddy
We can't just use 0.0 as the replacement because of NaN/Inf. But turning the intrinsic into a simple fsub should still be better or at least equal. Foz-DB Navi48: Totals from 128 (0.10% of 125402) affected shaders: MaxWaves: 3684 -> 3708 (+0.65%) Instrs: 111150 -> 111055 (-0.09%); split: -0.20%, +0.11% CodeSize: 587176 -> 590800 (+0.62%); split: -0.01%, +0.63% VGPRs: 6540 -> 6480 (-0.92%) Latency: 382775 -> 383332 (+0.15%); split: -0.15%, +0.29% InvThroughput: 80909 -> 80530 (-0.47%); split: -0.51%, +0.04% VClause: 1433 -> 1430 (-0.21%) SClause: 1834 -> 1841 (+0.38%); split: -0.11%, +0.49% Copies: 6130 -> 6096 (-0.55%); split: -1.29%, +0.73% PreSGPRs: 7352 -> 7356 (+0.05%) PreVGPRs: 4797 -> 4721 (-1.58%) VALU: 71892 -> 71435 (-0.64%); split: -0.64%, +0.01% SALU: 12665 -> 13056 (+3.09%); split: -0.06%, +3.14% Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39112>
This commit is contained in:
parent
7ed6679361
commit
369a3b22b4
1 changed files with 15 additions and 0 deletions
|
|
@ -221,6 +221,21 @@ opt_uniform_subgroup_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *_s
|
|||
}
|
||||
break;
|
||||
|
||||
case nir_intrinsic_ddx:
|
||||
case nir_intrinsic_ddx_coarse:
|
||||
case nir_intrinsic_ddx_fine:
|
||||
case nir_intrinsic_ddy:
|
||||
case nir_intrinsic_ddy_coarse:
|
||||
case nir_intrinsic_ddy_fine:
|
||||
if (nir_src_is_divergent(&intrin->src[0]))
|
||||
return false;
|
||||
|
||||
nir_def *x = intrin->src[0].ssa;
|
||||
b->fp_math_ctrl = nir_fp_no_fast_math;
|
||||
replacement = nir_fsub(b, x, x);
|
||||
b->fp_math_ctrl = nir_fp_fast_math;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_reduce:
|
||||
case nir_intrinsic_exclusive_scan:
|
||||
case nir_intrinsic_inclusive_scan: {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue