From e1f89a1da2169e52edbac6725efff7dfd5db885a Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Tue, 30 Nov 2021 18:06:53 +0200 Subject: [PATCH] ir3: Make nir compiler options a part of ir3_compiler This would allow for sub-gens to have different options. Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/ir3/ir3_compiler.c | 127 +++++++++++++++++++++++++++++++ src/freedreno/ir3/ir3_compiler.h | 6 ++ src/freedreno/ir3/ir3_nir.c | 123 ------------------------------ src/freedreno/ir3/ir3_nir.h | 2 - 4 files changed, 133 insertions(+), 125 deletions(-) diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index dfc6aec037f..8d98014632a 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -69,6 +69,121 @@ ir3_compiler_destroy(struct ir3_compiler *compiler) ralloc_free(compiler); } +static const nir_shader_compiler_options options = { + .lower_fpow = true, + .lower_scmp = true, + .lower_flrp16 = true, + .lower_flrp32 = true, + .lower_flrp64 = true, + .lower_ffract = true, + .lower_fmod = true, + .lower_fdiv = true, + .lower_isign = true, + .lower_ldexp = true, + .lower_uadd_carry = true, + .lower_usub_borrow = true, + .lower_mul_high = true, + .lower_mul_2x32_64 = true, + .fuse_ffma16 = true, + .fuse_ffma32 = true, + .fuse_ffma64 = true, + .vertex_id_zero_based = true, + .lower_extract_byte = true, + .lower_extract_word = true, + .lower_insert_byte = true, + .lower_insert_word = true, + .lower_helper_invocation = true, + .lower_bitfield_insert_to_shifts = true, + .lower_bitfield_extract_to_shifts = true, + .lower_pack_half_2x16 = true, + .lower_pack_snorm_4x8 = true, + .lower_pack_snorm_2x16 = true, + .lower_pack_unorm_4x8 = true, + .lower_pack_unorm_2x16 = true, + .lower_unpack_half_2x16 = true, + .lower_unpack_snorm_4x8 = true, + .lower_unpack_snorm_2x16 = true, + .lower_unpack_unorm_4x8 = true, + .lower_unpack_unorm_2x16 = true, + .lower_pack_split = true, + .use_interpolated_input_intrinsics = true, + .lower_rotate = true, + .lower_to_scalar = true, + .has_imul24 = true, + .has_fsub = true, + .has_isub = true, + .lower_wpos_pntc = true, + .lower_cs_local_index_from_id = true, + + /* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c + * but that should be harmless for GL since 64b is not + * supported there. + */ + .lower_int64_options = (nir_lower_int64_options)~0, + .lower_uniforms_to_ubo = true, + .use_scoped_barrier = true, +}; + +/* we don't want to lower vertex_id to _zero_based on newer gpus: */ +static const nir_shader_compiler_options options_a6xx = { + .lower_fpow = true, + .lower_scmp = true, + .lower_flrp16 = true, + .lower_flrp32 = true, + .lower_flrp64 = true, + .lower_ffract = true, + .lower_fmod = true, + .lower_fdiv = true, + .lower_isign = true, + .lower_ldexp = true, + .lower_uadd_carry = true, + .lower_usub_borrow = true, + .lower_mul_high = true, + .lower_mul_2x32_64 = true, + .fuse_ffma16 = true, + .fuse_ffma32 = true, + .fuse_ffma64 = true, + .vertex_id_zero_based = false, + .lower_extract_byte = true, + .lower_extract_word = true, + .lower_insert_byte = true, + .lower_insert_word = true, + .lower_helper_invocation = true, + .lower_bitfield_insert_to_shifts = true, + .lower_bitfield_extract_to_shifts = true, + .lower_pack_half_2x16 = true, + .lower_pack_snorm_4x8 = true, + .lower_pack_snorm_2x16 = true, + .lower_pack_unorm_4x8 = true, + .lower_pack_unorm_2x16 = true, + .lower_unpack_half_2x16 = true, + .lower_unpack_snorm_4x8 = true, + .lower_unpack_snorm_2x16 = true, + .lower_unpack_unorm_4x8 = true, + .lower_unpack_unorm_2x16 = true, + .lower_pack_split = true, + .use_interpolated_input_intrinsics = true, + .lower_rotate = true, + .vectorize_io = true, + .lower_to_scalar = true, + .has_imul24 = true, + .has_fsub = true, + .has_isub = true, + .max_unroll_iterations = 32, + .force_indirect_unrolling = nir_var_all, + .lower_wpos_pntc = true, + .lower_cs_local_index_from_id = true, + + /* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c + * but that should be harmless for GL since 64b is not + * supported there. + */ + .lower_int64_options = (nir_lower_int64_options)~0, + .lower_uniforms_to_ubo = true, + .lower_device_index_to_zero = true, + .use_scoped_barrier = true, +}; + struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, bool robust_ubo_access) @@ -192,7 +307,19 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, compiler->bool_type = (compiler->gen >= 5) ? TYPE_U16 : TYPE_U32; + if (compiler->gen >= 6) { + compiler->nir_options = options_a6xx; + } else { + compiler->nir_options = options; + } + ir3_disk_cache_init(compiler); return compiler; } + +const nir_shader_compiler_options * +ir3_get_compiler_options(struct ir3_compiler *compiler) +{ + return &compiler->nir_options; +} diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 68e5d944ea5..d8bfe1c0925 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -27,6 +27,7 @@ #ifndef IR3_COMPILER_H_ #define IR3_COMPILER_H_ +#include "compiler/nir/nir.h" #include "util/disk_cache.h" #include "util/log.h" @@ -45,6 +46,8 @@ struct ir3_compiler { struct disk_cache *disk_cache; + struct nir_shader_compiler_options nir_options; + /* If true, UBO accesses are assumed to be bounds-checked as defined by * VK_EXT_robustness2 and optimizations may have to be more conservative. */ @@ -186,6 +189,9 @@ bool ir3_disk_cache_retrieve(struct ir3_compiler *compiler, void ir3_disk_cache_store(struct ir3_compiler *compiler, struct ir3_shader_variant *v); +const nir_shader_compiler_options * +ir3_get_compiler_options(struct ir3_compiler *compiler); + int ir3_compile_shader_nir(struct ir3_compiler *compiler, struct ir3_shader_variant *so); diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 6951c3af835..9db9b367f84 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -31,129 +31,6 @@ #include "ir3_nir.h" #include "ir3_shader.h" -static const nir_shader_compiler_options options = { - .lower_fpow = true, - .lower_scmp = true, - .lower_flrp16 = true, - .lower_flrp32 = true, - .lower_flrp64 = true, - .lower_ffract = true, - .lower_fmod = true, - .lower_fdiv = true, - .lower_isign = true, - .lower_ldexp = true, - .lower_uadd_carry = true, - .lower_usub_borrow = true, - .lower_mul_high = true, - .lower_mul_2x32_64 = true, - .fuse_ffma16 = true, - .fuse_ffma32 = true, - .fuse_ffma64 = true, - .vertex_id_zero_based = true, - .lower_extract_byte = true, - .lower_extract_word = true, - .lower_insert_byte = true, - .lower_insert_word = true, - .lower_helper_invocation = true, - .lower_bitfield_insert_to_shifts = true, - .lower_bitfield_extract_to_shifts = true, - .lower_pack_half_2x16 = true, - .lower_pack_snorm_4x8 = true, - .lower_pack_snorm_2x16 = true, - .lower_pack_unorm_4x8 = true, - .lower_pack_unorm_2x16 = true, - .lower_unpack_half_2x16 = true, - .lower_unpack_snorm_4x8 = true, - .lower_unpack_snorm_2x16 = true, - .lower_unpack_unorm_4x8 = true, - .lower_unpack_unorm_2x16 = true, - .lower_pack_split = true, - .use_interpolated_input_intrinsics = true, - .lower_rotate = true, - .lower_to_scalar = true, - .has_imul24 = true, - .has_fsub = true, - .has_isub = true, - .lower_wpos_pntc = true, - .lower_cs_local_index_from_id = true, - - /* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c - * but that should be harmless for GL since 64b is not - * supported there. - */ - .lower_int64_options = (nir_lower_int64_options)~0, - .lower_uniforms_to_ubo = true, - .use_scoped_barrier = true, -}; - -/* we don't want to lower vertex_id to _zero_based on newer gpus: */ -static const nir_shader_compiler_options options_a6xx = { - .lower_fpow = true, - .lower_scmp = true, - .lower_flrp16 = true, - .lower_flrp32 = true, - .lower_flrp64 = true, - .lower_ffract = true, - .lower_fmod = true, - .lower_fdiv = true, - .lower_isign = true, - .lower_ldexp = true, - .lower_uadd_carry = true, - .lower_usub_borrow = true, - .lower_mul_high = true, - .lower_mul_2x32_64 = true, - .fuse_ffma16 = true, - .fuse_ffma32 = true, - .fuse_ffma64 = true, - .vertex_id_zero_based = false, - .lower_extract_byte = true, - .lower_extract_word = true, - .lower_insert_byte = true, - .lower_insert_word = true, - .lower_helper_invocation = true, - .lower_bitfield_insert_to_shifts = true, - .lower_bitfield_extract_to_shifts = true, - .lower_pack_half_2x16 = true, - .lower_pack_snorm_4x8 = true, - .lower_pack_snorm_2x16 = true, - .lower_pack_unorm_4x8 = true, - .lower_pack_unorm_2x16 = true, - .lower_unpack_half_2x16 = true, - .lower_unpack_snorm_4x8 = true, - .lower_unpack_snorm_2x16 = true, - .lower_unpack_unorm_4x8 = true, - .lower_unpack_unorm_2x16 = true, - .lower_pack_split = true, - .use_interpolated_input_intrinsics = true, - .lower_rotate = true, - .vectorize_io = true, - .lower_to_scalar = true, - .has_imul24 = true, - .has_fsub = true, - .has_isub = true, - .max_unroll_iterations = 32, - .force_indirect_unrolling = nir_var_all, - .lower_wpos_pntc = true, - .lower_cs_local_index_from_id = true, - - /* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c - * but that should be harmless for GL since 64b is not - * supported there. - */ - .lower_int64_options = (nir_lower_int64_options)~0, - .lower_uniforms_to_ubo = true, - .lower_device_index_to_zero = true, - .use_scoped_barrier = true, -}; - -const nir_shader_compiler_options * -ir3_get_compiler_options(struct ir3_compiler *compiler) -{ - if (compiler->gen >= 6) - return &options_a6xx; - return &options; -} - static bool ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset, unsigned bit_size, unsigned num_components, diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index aa236e77321..11649263be5 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -62,8 +62,6 @@ bool ir3_nir_lower_64b_intrinsics(nir_shader *shader); bool ir3_nir_lower_64b_undef(nir_shader *shader); bool ir3_nir_lower_64b_global(nir_shader *shader); -const nir_shader_compiler_options * -ir3_get_compiler_options(struct ir3_compiler *compiler); void ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s); void ir3_nir_lower_io_to_temporaries(nir_shader *s); void ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s);