From 1e7d82c8813647916325e460a93fbec1d428a4e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Thu, 1 Oct 2020 11:32:45 -0400
Subject: [PATCH] nir/algebraic: always lower idiv to shifts if bitops are
 allowed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

why would you want anything else

The only platform significantly affected by this is Intel where `lower_idiv`
is not set today but neither is `lower_bitops`.  There it seems to still be
a boon over-all.

Shader-db results on Ice Lake:

    total instructions in shared programs: 19719051 -> 19735766 (0.08%)
    instructions in affected programs: 106992 -> 123707 (15.62%)
    helped: 0
    HURT: 445
    HURT stats (abs)   min: 3 max: 295 x̄: 37.56 x̃: 44
    HURT stats (rel)   min: 0.16% max: 33.33% x̄: 19.60% x̃: 19.38%
    95% mean confidence interval for instructions value: 33.60 41.53
    95% mean confidence interval for instructions %-change: 18.97% 20.23%
    Instructions are HURT.

    total loops in shared programs: 5973 -> 5973 (0.00%)
    loops in affected programs: 0 -> 0
    helped: 0
    HURT: 0

    total cycles in shared programs: 489405810 -> 486917482 (-0.51%)
    cycles in affected programs: 4759097 -> 2270769 (-52.29%)
    helped: 406
    HURT: 34
    helped stats (abs) min: 2 max: 64661 x̄: 6291.95 x̃: 3126
    helped stats (rel) min: 0.02% max: 79.42% x̄: 43.32% x̃: 55.83%
    HURT stats (abs)   min: 2 max: 29376 x̄: 1947.12 x̃: 30
    HURT stats (rel)   min: 0.04% max: 23.82% x̄: 4.66% x̃: 1.33%
    95% mean confidence interval for cycles value: -6753.06 -4557.52
    95% mean confidence interval for cycles %-change: -42.60% -36.63%
    Cycles are helped.

    total spills in shared programs: 12481 -> 12482 (<.01%)
    spills in affected programs: 47 -> 48 (2.13%)
    helped: 0
    HURT: 1

    total fills in shared programs: 12816 -> 12819 (0.02%)
    fills in affected programs: 71 -> 74 (4.23%)
    helped: 0
    HURT: 1

    total sends in shared programs: 1010124 -> 1010124 (0.00%)
    sends in affected programs: 0 -> 0
    helped: 0
    HURT: 0

    LOST:   1
    GAINED: 0

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6963>
---
 src/compiler/nir/nir.h                                   | 3 ---
 src/compiler/nir/nir_opt_algebraic.py                    | 4 ++--
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 1 -
 src/gallium/drivers/r600/r600_pipe_common.c              | 2 --
 src/panfrost/bifrost/bifrost_compile.h                   | 1 -
 src/panfrost/midgard/midgard_compile.h                   | 1 -
 6 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 28b9a6eb8ea..46301741b55 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3148,9 +3148,6 @@ typedef struct nir_shader_compiler_options {
    /* lower b/fall_equalN/b/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */
    bool lower_vector_cmp;
 
-   /** enables rules to lower idiv by power-of-two: */
-   bool lower_idiv;
-
    /** enable rules to avoid bit ops */
    bool lower_bitops;
 
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index be836f954ed..2bb9b41fb15 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -103,8 +103,8 @@ optimizations = [
    (('umod', a, 1), 0),
    (('imod', a, 1), 0),
    (('udiv', a, '#b(is_pos_power_of_two)'), ('ushr', a, ('find_lsb', b)), '!options->lower_bitops'),
-   (('idiv', a, '#b(is_pos_power_of_two)'), ('imul', ('isign', a), ('ushr', ('iabs', a), ('find_lsb', b))), 'options->lower_idiv'),
-   (('idiv', a, '#b(is_neg_power_of_two)'), ('ineg', ('imul', ('isign', a), ('ushr', ('iabs', a), ('find_lsb', ('iabs', b))))), 'options->lower_idiv'),
+   (('idiv', a, '#b(is_pos_power_of_two)'), ('imul', ('isign', a), ('ushr', ('iabs', a), ('find_lsb', b))), '!options->lower_bitops'),
+   (('idiv', a, '#b(is_neg_power_of_two)'), ('ineg', ('imul', ('isign', a), ('ushr', ('iabs', a), ('find_lsb', ('iabs', b))))), '!options->lower_bitops'),
    (('umod', a, '#b(is_pos_power_of_two)'),    ('iand', a, ('isub', b, 1))),
 
    (('~fneg', ('fneg', a)), a),
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 64453edf19b..9141b149581 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -3236,7 +3236,6 @@ nvir_nir_shader_compiler_options(int chipset)
    op.lower_sub = true;
    op.lower_scmp = true; // TODO: not implemented yet
    op.lower_vector_cmp = false;
-   op.lower_idiv = true;
    op.lower_bitops = false;
    op.lower_isign = (chipset >= NVISA_GV100_CHIPSET);
    op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET);
diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c
index 9dc00cf1aef..5380fbb7056 100644
--- a/src/gallium/drivers/r600/r600_pipe_common.c
+++ b/src/gallium/drivers/r600/r600_pipe_common.c
@@ -1187,7 +1187,6 @@ const struct nir_shader_compiler_options r600_nir_fs_options = {
 	.lower_flrp64 = true,
 	.lower_fpow = true,
 	.lower_fdiv = true,
-	.lower_idiv = true,
         .lower_isign = true,
         .lower_fsign = true,
 	.lower_fmod = true,
@@ -1212,7 +1211,6 @@ const struct nir_shader_compiler_options r600_nir_options = {
 	.lower_flrp64 = true,
 	.lower_fpow = true,
 	.lower_fdiv = true,
-	.lower_idiv = true,
 	.lower_fmod = true,
 	.lower_doubles_options = nir_lower_fp64_full_software,
 	.lower_int64_options = 0,
diff --git a/src/panfrost/bifrost/bifrost_compile.h b/src/panfrost/bifrost/bifrost_compile.h
index 15b90788133..b4182f28860 100644
--- a/src/panfrost/bifrost/bifrost_compile.h
+++ b/src/panfrost/bifrost/bifrost_compile.h
@@ -38,7 +38,6 @@ static const nir_shader_compiler_options bifrost_nir_options = {
         .lower_ffract = true,
         .lower_fmod = true,
         .lower_fdiv = true,
-        .lower_idiv = true,
         .lower_isign = true,
         .lower_fpow = true,
         .lower_find_lsb = true,
diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h
index b4c5a07056f..ffdfb90598c 100644
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h
@@ -46,7 +46,6 @@ static const nir_shader_compiler_options midgard_nir_options = {
         .lower_ffract = true,
         .lower_fmod = true,
         .lower_fdiv = true,
-        .lower_idiv = true,
         .lower_isign = true,
         .lower_fpow = true,
         .lower_find_lsb = true,