From e1f89a1da2169e52edbac6725efff7dfd5db885a Mon Sep 17 00:00:00 2001
From: Danylo Piliaiev <dpiliaiev@igalia.com>
Date: Tue, 30 Nov 2021 18:06:53 +0200
Subject: [PATCH] ir3: Make nir compiler options a part of ir3_compiler

This would allow for sub-gens to have different options.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13986>
---
 src/freedreno/ir3/ir3_compiler.c | 127 +++++++++++++++++++++++++++++++
 src/freedreno/ir3/ir3_compiler.h |   6 ++
 src/freedreno/ir3/ir3_nir.c      | 123 ------------------------------
 src/freedreno/ir3/ir3_nir.h      |   2 -
 4 files changed, 133 insertions(+), 125 deletions(-)

diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c
index dfc6aec037f..8d98014632a 100644
--- a/src/freedreno/ir3/ir3_compiler.c
+++ b/src/freedreno/ir3/ir3_compiler.c
@@ -69,6 +69,121 @@ ir3_compiler_destroy(struct ir3_compiler *compiler)
    ralloc_free(compiler);
 }
 
+static const nir_shader_compiler_options options = {
+   .lower_fpow = true,
+   .lower_scmp = true,
+   .lower_flrp16 = true,
+   .lower_flrp32 = true,
+   .lower_flrp64 = true,
+   .lower_ffract = true,
+   .lower_fmod = true,
+   .lower_fdiv = true,
+   .lower_isign = true,
+   .lower_ldexp = true,
+   .lower_uadd_carry = true,
+   .lower_usub_borrow = true,
+   .lower_mul_high = true,
+   .lower_mul_2x32_64 = true,
+   .fuse_ffma16 = true,
+   .fuse_ffma32 = true,
+   .fuse_ffma64 = true,
+   .vertex_id_zero_based = true,
+   .lower_extract_byte = true,
+   .lower_extract_word = true,
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
+   .lower_helper_invocation = true,
+   .lower_bitfield_insert_to_shifts = true,
+   .lower_bitfield_extract_to_shifts = true,
+   .lower_pack_half_2x16 = true,
+   .lower_pack_snorm_4x8 = true,
+   .lower_pack_snorm_2x16 = true,
+   .lower_pack_unorm_4x8 = true,
+   .lower_pack_unorm_2x16 = true,
+   .lower_unpack_half_2x16 = true,
+   .lower_unpack_snorm_4x8 = true,
+   .lower_unpack_snorm_2x16 = true,
+   .lower_unpack_unorm_4x8 = true,
+   .lower_unpack_unorm_2x16 = true,
+   .lower_pack_split = true,
+   .use_interpolated_input_intrinsics = true,
+   .lower_rotate = true,
+   .lower_to_scalar = true,
+   .has_imul24 = true,
+   .has_fsub = true,
+   .has_isub = true,
+   .lower_wpos_pntc = true,
+   .lower_cs_local_index_from_id = true,
+
+   /* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c
+    * but that should be harmless for GL since 64b is not
+    * supported there.
+    */
+   .lower_int64_options = (nir_lower_int64_options)~0,
+   .lower_uniforms_to_ubo = true,
+   .use_scoped_barrier = true,
+};
+
+/* we don't want to lower vertex_id to _zero_based on newer gpus: */
+static const nir_shader_compiler_options options_a6xx = {
+   .lower_fpow = true,
+   .lower_scmp = true,
+   .lower_flrp16 = true,
+   .lower_flrp32 = true,
+   .lower_flrp64 = true,
+   .lower_ffract = true,
+   .lower_fmod = true,
+   .lower_fdiv = true,
+   .lower_isign = true,
+   .lower_ldexp = true,
+   .lower_uadd_carry = true,
+   .lower_usub_borrow = true,
+   .lower_mul_high = true,
+   .lower_mul_2x32_64 = true,
+   .fuse_ffma16 = true,
+   .fuse_ffma32 = true,
+   .fuse_ffma64 = true,
+   .vertex_id_zero_based = false,
+   .lower_extract_byte = true,
+   .lower_extract_word = true,
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
+   .lower_helper_invocation = true,
+   .lower_bitfield_insert_to_shifts = true,
+   .lower_bitfield_extract_to_shifts = true,
+   .lower_pack_half_2x16 = true,
+   .lower_pack_snorm_4x8 = true,
+   .lower_pack_snorm_2x16 = true,
+   .lower_pack_unorm_4x8 = true,
+   .lower_pack_unorm_2x16 = true,
+   .lower_unpack_half_2x16 = true,
+   .lower_unpack_snorm_4x8 = true,
+   .lower_unpack_snorm_2x16 = true,
+   .lower_unpack_unorm_4x8 = true,
+   .lower_unpack_unorm_2x16 = true,
+   .lower_pack_split = true,
+   .use_interpolated_input_intrinsics = true,
+   .lower_rotate = true,
+   .vectorize_io = true,
+   .lower_to_scalar = true,
+   .has_imul24 = true,
+   .has_fsub = true,
+   .has_isub = true,
+   .max_unroll_iterations = 32,
+   .force_indirect_unrolling = nir_var_all,
+   .lower_wpos_pntc = true,
+   .lower_cs_local_index_from_id = true,
+
+   /* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c
+    * but that should be harmless for GL since 64b is not
+    * supported there.
+    */
+   .lower_int64_options = (nir_lower_int64_options)~0,
+   .lower_uniforms_to_ubo = true,
+   .lower_device_index_to_zero = true,
+   .use_scoped_barrier = true,
+};
+
 struct ir3_compiler *
 ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
                     bool robust_ubo_access)
@@ -192,7 +307,19 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
 
    compiler->bool_type = (compiler->gen >= 5) ? TYPE_U16 : TYPE_U32;
 
+   if (compiler->gen >= 6) {
+      compiler->nir_options = options_a6xx;
+   } else {
+      compiler->nir_options = options;
+   }
+
    ir3_disk_cache_init(compiler);
 
    return compiler;
 }
+
+const nir_shader_compiler_options *
+ir3_get_compiler_options(struct ir3_compiler *compiler)
+{
+   return &compiler->nir_options;
+}
diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h
index 68e5d944ea5..d8bfe1c0925 100644
--- a/src/freedreno/ir3/ir3_compiler.h
+++ b/src/freedreno/ir3/ir3_compiler.h
@@ -27,6 +27,7 @@
 #ifndef IR3_COMPILER_H_
 #define IR3_COMPILER_H_
 
+#include "compiler/nir/nir.h"
 #include "util/disk_cache.h"
 #include "util/log.h"
 
@@ -45,6 +46,8 @@ struct ir3_compiler {
 
    struct disk_cache *disk_cache;
 
+   struct nir_shader_compiler_options nir_options;
+
    /* If true, UBO accesses are assumed to be bounds-checked as defined by
     * VK_EXT_robustness2 and optimizations may have to be more conservative.
     */
@@ -186,6 +189,9 @@ bool ir3_disk_cache_retrieve(struct ir3_compiler *compiler,
 void ir3_disk_cache_store(struct ir3_compiler *compiler,
                           struct ir3_shader_variant *v);
 
+const nir_shader_compiler_options *
+ir3_get_compiler_options(struct ir3_compiler *compiler);
+
 int ir3_compile_shader_nir(struct ir3_compiler *compiler,
                            struct ir3_shader_variant *so);
 
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 6951c3af835..9db9b367f84 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -31,129 +31,6 @@
 #include "ir3_nir.h"
 #include "ir3_shader.h"
 
-static const nir_shader_compiler_options options = {
-   .lower_fpow = true,
-   .lower_scmp = true,
-   .lower_flrp16 = true,
-   .lower_flrp32 = true,
-   .lower_flrp64 = true,
-   .lower_ffract = true,
-   .lower_fmod = true,
-   .lower_fdiv = true,
-   .lower_isign = true,
-   .lower_ldexp = true,
-   .lower_uadd_carry = true,
-   .lower_usub_borrow = true,
-   .lower_mul_high = true,
-   .lower_mul_2x32_64 = true,
-   .fuse_ffma16 = true,
-   .fuse_ffma32 = true,
-   .fuse_ffma64 = true,
-   .vertex_id_zero_based = true,
-   .lower_extract_byte = true,
-   .lower_extract_word = true,
-   .lower_insert_byte = true,
-   .lower_insert_word = true,
-   .lower_helper_invocation = true,
-   .lower_bitfield_insert_to_shifts = true,
-   .lower_bitfield_extract_to_shifts = true,
-   .lower_pack_half_2x16 = true,
-   .lower_pack_snorm_4x8 = true,
-   .lower_pack_snorm_2x16 = true,
-   .lower_pack_unorm_4x8 = true,
-   .lower_pack_unorm_2x16 = true,
-   .lower_unpack_half_2x16 = true,
-   .lower_unpack_snorm_4x8 = true,
-   .lower_unpack_snorm_2x16 = true,
-   .lower_unpack_unorm_4x8 = true,
-   .lower_unpack_unorm_2x16 = true,
-   .lower_pack_split = true,
-   .use_interpolated_input_intrinsics = true,
-   .lower_rotate = true,
-   .lower_to_scalar = true,
-   .has_imul24 = true,
-   .has_fsub = true,
-   .has_isub = true,
-   .lower_wpos_pntc = true,
-   .lower_cs_local_index_from_id = true,
-
-   /* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c
-    * but that should be harmless for GL since 64b is not
-    * supported there.
-    */
-   .lower_int64_options = (nir_lower_int64_options)~0,
-   .lower_uniforms_to_ubo = true,
-   .use_scoped_barrier = true,
-};
-
-/* we don't want to lower vertex_id to _zero_based on newer gpus: */
-static const nir_shader_compiler_options options_a6xx = {
-   .lower_fpow = true,
-   .lower_scmp = true,
-   .lower_flrp16 = true,
-   .lower_flrp32 = true,
-   .lower_flrp64 = true,
-   .lower_ffract = true,
-   .lower_fmod = true,
-   .lower_fdiv = true,
-   .lower_isign = true,
-   .lower_ldexp = true,
-   .lower_uadd_carry = true,
-   .lower_usub_borrow = true,
-   .lower_mul_high = true,
-   .lower_mul_2x32_64 = true,
-   .fuse_ffma16 = true,
-   .fuse_ffma32 = true,
-   .fuse_ffma64 = true,
-   .vertex_id_zero_based = false,
-   .lower_extract_byte = true,
-   .lower_extract_word = true,
-   .lower_insert_byte = true,
-   .lower_insert_word = true,
-   .lower_helper_invocation = true,
-   .lower_bitfield_insert_to_shifts = true,
-   .lower_bitfield_extract_to_shifts = true,
-   .lower_pack_half_2x16 = true,
-   .lower_pack_snorm_4x8 = true,
-   .lower_pack_snorm_2x16 = true,
-   .lower_pack_unorm_4x8 = true,
-   .lower_pack_unorm_2x16 = true,
-   .lower_unpack_half_2x16 = true,
-   .lower_unpack_snorm_4x8 = true,
-   .lower_unpack_snorm_2x16 = true,
-   .lower_unpack_unorm_4x8 = true,
-   .lower_unpack_unorm_2x16 = true,
-   .lower_pack_split = true,
-   .use_interpolated_input_intrinsics = true,
-   .lower_rotate = true,
-   .vectorize_io = true,
-   .lower_to_scalar = true,
-   .has_imul24 = true,
-   .has_fsub = true,
-   .has_isub = true,
-   .max_unroll_iterations = 32,
-   .force_indirect_unrolling = nir_var_all,
-   .lower_wpos_pntc = true,
-   .lower_cs_local_index_from_id = true,
-
-   /* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c
-    * but that should be harmless for GL since 64b is not
-    * supported there.
-    */
-   .lower_int64_options = (nir_lower_int64_options)~0,
-   .lower_uniforms_to_ubo = true,
-   .lower_device_index_to_zero = true,
-   .use_scoped_barrier = true,
-};
-
-const nir_shader_compiler_options *
-ir3_get_compiler_options(struct ir3_compiler *compiler)
-{
-   if (compiler->gen >= 6)
-      return &options_a6xx;
-   return &options;
-}
-
 static bool
 ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
                              unsigned bit_size, unsigned num_components,
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h
index aa236e77321..11649263be5 100644
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -62,8 +62,6 @@ bool ir3_nir_lower_64b_intrinsics(nir_shader *shader);
 bool ir3_nir_lower_64b_undef(nir_shader *shader);
 bool ir3_nir_lower_64b_global(nir_shader *shader);
 
-const nir_shader_compiler_options *
-ir3_get_compiler_options(struct ir3_compiler *compiler);
 void ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s);
 void ir3_nir_lower_io_to_temporaries(nir_shader *s);
 void ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s);