From 2e50bf19cd91169498e2a04823f901eb50672028 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Wed, 5 Oct 2022 15:01:05 +0200 Subject: [PATCH] nir: move fusing csel and comparisons to opt_late_algebraic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With that simple comparisons are cleaned up properly. This helps with some tesselation shaders on r600. Shader-db stats R600/Cayman: -------------------------------------------------------------- total dw in shared programs: 1621806 -> 1620884 (-0.06%) dw in affected programs: 41650 -> 40728 (-2.21%) helped: 211 HURT: 4 helped stats (abs) min: 2 max: 26 x̄: 4.46 x̃: 4 helped stats (rel) min: 0.30% max: 9.68% x̄: 2.87% x̃: 2.52% HURT stats (abs) min: 2 max: 8 x̄: 5.00 x̃: 5 HURT stats (rel) min: 0.23% max: 1.67% x̄: 1.02% x̃: 1.09% 95% mean confidence interval for dw value: -4.81 -3.77 95% mean confidence interval for dw %-change: -3.03% -2.57% Dw are helped. total gprs in shared programs: 41192 -> 41182 (-0.02%) gprs in affected programs: 731 -> 721 (-1.37%) helped: 53 HURT: 45 helped stats (abs) min: 1 max: 3 x̄: 1.23 x̃: 1 helped stats (rel) min: 5.88% max: 40.00% x̄: 16.56% x̃: 14.29% HURT stats (abs) min: 1 max: 2 x̄: 1.22 x̃: 1 HURT stats (rel) min: 7.69% max: 40.00% x̄: 19.42% x̃: 20.00% 95% mean confidence interval for gprs value: -0.37 0.16 95% mean confidence interval for gprs %-change: -3.92% 3.85% Inconclusive result (value mean confidence interval includes 0). total alu_groups in shared programs: 203677 -> 203632 (-0.02%) alu_groups in affected programs: 2876 -> 2831 (-1.56%) helped: 68 HURT: 30 helped stats (abs) min: 1 max: 4 x̄: 1.46 x̃: 1 helped stats (rel) min: 0.84% max: 25.00% x̄: 7.48% x̃: 5.41% HURT stats (abs) min: 1 max: 6 x̄: 1.80 x̃: 1 HURT stats (rel) min: 1.98% max: 33.33% x̄: 10.09% x̃: 5.61% 95% mean confidence interval for alu_groups value: -0.81 -0.11 95% mean confidence interval for alu_groups %-change: -4.20% <.01% Alu_groups are helped. total loops in shared programs: 72 -> 72 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total cf in shared programs: 88230 -> 88233 (<.01%) cf in affected programs: 71 -> 74 (4.23%) helped: 1 HURT: 4 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 33.33% max: 33.33% x̄: 33.33% x̃: 33.33% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 1.89% max: 33.33% x̄: 17.14% x̃: 16.67% 95% mean confidence interval for cf value: -0.51 1.71 95% mean confidence interval for cf %-change: -24.20% 38.29% Inconclusive result (value mean confidence interval includes 0). total stack in shared programs: 3827 -> 3827 (0.00%) stack in affected programs: 0 -> 0 helped: 0 HURT: 0 LOST: 0 GAINED: 0 Total CPU time (seconds): 45.32 -> 41.69 (-8.01%) -------------------------------------------------------------- v2: Simplify replacement pattern (Rhys Perry) v3: fix ws (Alexander Orzechowski) v4: move the original lowering to opt_late_algebraic and drop cleanup code (Alyssa) v5: Add shader-sb stats (Alyssa) Signed-off-by: Gert Wollny Reviewed-by: Alyssa Rosenzweig Reviewed-by: Emma Anholt Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 31 +++++++++++++-------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index ae9f50ee417..f898f708cfa 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -2018,22 +2018,6 @@ optimizations.extend([ (('imul24', a, '#b@32(is_pos_power_of_two)'), ('ishl', a, ('find_lsb', b)), '!options->lower_bitops'), (('imul24', a, '#b@32(is_neg_power_of_two)'), ('ineg', ('ishl', a, ('find_lsb', ('iabs', b)))), '!options->lower_bitops'), (('imul24', a, 0), (0)), - - (('fcsel', ('slt', 0, a), b, c), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"), - (('fcsel', ('slt', a, 0), b, c), ('fcsel_gt', ('fneg', a), b, c), "options->has_fused_comp_and_csel"), - (('fcsel', ('sge', a, 0), b, c), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"), - (('fcsel', ('sge', 0, a), b, c), ('fcsel_ge', ('fneg', a), b, c), "options->has_fused_comp_and_csel"), - - (('bcsel', ('ilt', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, b, c), "options->has_fused_comp_and_csel"), - (('bcsel', ('ilt', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, c, b), "options->has_fused_comp_and_csel"), - (('bcsel', ('ige', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, b, c), "options->has_fused_comp_and_csel"), - (('bcsel', ('ige', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, c, b), "options->has_fused_comp_and_csel"), - - (('bcsel', ('flt', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"), - (('bcsel', ('flt', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_gt', ('fneg', a), b, c), "options->has_fused_comp_and_csel"), - (('bcsel', ('fge', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"), - (('bcsel', ('fge', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_ge', ('fneg', a), b, c), "options->has_fused_comp_and_csel"), - ]) # bit_size dependent lowerings @@ -2891,6 +2875,21 @@ late_optimizations += [ (('i2imp', a), ('u2u16', a)), (('u2fmp', a), ('u2f16', a)), (('fisfinite', a), ('flt', ('fabs', a), float("inf"))), + + (('fcsel', ('slt', 0, a), b, c), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"), + (('fcsel', ('slt', a, 0), b, c), ('fcsel_gt', ('fneg', a), b, c), "options->has_fused_comp_and_csel"), + (('fcsel', ('sge', a, 0), b, c), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"), + (('fcsel', ('sge', 0, a), b, c), ('fcsel_ge', ('fneg', a), b, c), "options->has_fused_comp_and_csel"), + + (('bcsel', ('ilt', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, b, c), "options->has_fused_comp_and_csel"), + (('bcsel', ('ilt', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, c, b), "options->has_fused_comp_and_csel"), + (('bcsel', ('ige', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, b, c), "options->has_fused_comp_and_csel"), + (('bcsel', ('ige', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, c, b), "options->has_fused_comp_and_csel"), + + (('bcsel', ('flt', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"), + (('bcsel', ('flt', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_gt', ('fneg', a), b, c), "options->has_fused_comp_and_csel"), + (('bcsel', ('fge', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"), + (('bcsel', ('fge', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_ge', ('fneg', a), b, c), "options->has_fused_comp_and_csel"), ] distribute_src_mods = [