From 2ac7e6614a7c985683865bde391f75f64ff9c015 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Mon, 14 Aug 2023 19:21:52 +0200 Subject: [PATCH] nir: unify lower_bitfield_extract with has_bfe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Daniel Schürmann Acked-by: Faith Ekstrand Part-of: --- src/amd/vulkan/radv_shader.c | 1 + src/broadcom/vulkan/v3dv_pipeline.c | 2 +- src/compiler/nir/nir.h | 7 ++++--- src/compiler/nir/nir_algebraic.py | 9 --------- src/compiler/nir/nir_opt_algebraic.py | 8 ++++---- src/freedreno/ir3/ir3_compiler.c | 2 +- src/gallium/drivers/i915/i915_screen.c | 2 +- src/gallium/drivers/llvmpipe/lp_screen.c | 2 +- src/gallium/drivers/r600/r600_pipe_common.c | 4 ++-- src/gallium/drivers/radeonsi/si_get.c | 1 + src/gallium/drivers/v3d/v3d_screen.c | 2 +- src/intel/compiler/brw_compiler.c | 4 ++-- src/microsoft/compiler/nir_to_dxil.c | 1 + src/nouveau/codegen/nv50_ir_from_nir.cpp | 3 +-- src/panfrost/compiler/bifrost_compile.h | 2 +- src/panfrost/midgard/midgard_compile.h | 2 +- 16 files changed, 23 insertions(+), 29 deletions(-) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 4ce9679d5c8..7fbe4b0d91b 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -98,6 +98,7 @@ get_nir_options_for_stage(struct radv_physical_device *device, gl_shader_stage s .lower_hadd = true, .lower_mul_32x16 = true, .lower_uclz = true, + .has_bfe = true, .has_bfm = true, .has_bitfield_select = true, .has_fsub = true, diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c index 4861e82e616..fca9192ccec 100644 --- a/src/broadcom/vulkan/v3dv_pipeline.c +++ b/src/broadcom/vulkan/v3dv_pipeline.c @@ -194,7 +194,7 @@ const nir_shader_compiler_options v3dv_nir_options = { .lower_insert_byte = true, .lower_insert_word = true, .lower_bitfield_insert = true, - .lower_bitfield_extract_to_shifts = true, + .lower_bitfield_extract = true, .lower_bitfield_reverse = true, .lower_bit_count = true, .lower_cs_local_id_to_index = true, diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 0afb3b19ab0..63aeabe9ac5 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3411,10 +3411,8 @@ typedef struct nir_shader_compiler_options { bool lower_fsqrt; bool lower_sincos; bool lower_fmod; - /** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */ + /** Lowers ibitfield_extract/ubitfield_extract. */ bool lower_bitfield_extract; - /** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */ - bool lower_bitfield_extract_to_shifts; /** Lowers bitfield_insert. */ bool lower_bitfield_insert; /** Lowers bitfield_reverse to shifts. */ @@ -3744,6 +3742,9 @@ typedef struct nir_shader_compiler_options { /** Backend supports bitz/bitnz. */ bool has_bit_test; + /** Backend supports ubfe/ibfe. */ + bool has_bfe; + /** Backend supports bfm. */ bool has_bfm; diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py index c57b79f32c9..065810bb393 100644 --- a/src/compiler/nir/nir_algebraic.py +++ b/src/compiler/nir/nir_algebraic.py @@ -1177,15 +1177,6 @@ ${pass_name}(nir_shader *shader) (void) options; (void) info; - /* This is not a great place for this, but it seems to be the best place - * for it. Check that at most one kind of lowering is requested for - * bitfield extract and bitfield insert. Otherwise the lowering can fight - * with each other and optimizations. - */ - assert((int)options->lower_bitfield_extract + - (int)options->lower_bitfield_extract_to_shifts <= 1); - - STATIC_ASSERT(${str(cache["next_index"])} == ARRAY_SIZE(${pass_name}_values)); % for index, condition in enumerate(condition_list): condition_flags[${index}] = ${condition}; diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 19431a39db3..6e594ea451a 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1999,12 +1999,12 @@ optimizations.extend([ (('ibitfield_extract', 'value', 'offset', 'bits'), ('bcsel', ('ult', 31, 'bits'), 'value', ('ibfe', 'value', 'offset', 'bits')), - 'options->lower_bitfield_extract'), + 'options->lower_bitfield_extract && options->has_bfe'), (('ubitfield_extract', 'value', 'offset', 'bits'), ('bcsel', ('ult', 31, 'bits'), 'value', ('ubfe', 'value', 'offset', 'bits')), - 'options->lower_bitfield_extract'), + 'options->lower_bitfield_extract && options->has_bfe'), # (src0 & src1) | (~src0 & src2). Constant fold if src2 is 0. (('bitfield_select', a, b, 0), ('iand', a, b)), @@ -2056,7 +2056,7 @@ optimizations.extend([ ('ishr', ('ishl', 'value', ('isub', ('isub', 32, 'bits'), 'offset')), ('isub', 32, 'bits'))), - 'options->lower_bitfield_extract_to_shifts'), + 'options->lower_bitfield_extract && !options->has_bfe'), (('ubitfield_extract', 'value', 'offset', 'bits'), ('iand', @@ -2064,7 +2064,7 @@ optimizations.extend([ ('bcsel', ('ieq', 'bits', 32), 0xffffffff, ('isub', ('ishl', 1, 'bits'), 1))), - 'options->lower_bitfield_extract_to_shifts'), + 'options->lower_bitfield_extract && !options->has_bfe'), (('ifind_msb', 'value'), ('ufind_msb', ('bcsel', ('ilt', 'value', 0), ('inot', 'value'), 'value')), diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index 9d75d5665f6..61cc68a0836 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -96,7 +96,7 @@ static const nir_shader_compiler_options ir3_base_options = { .lower_insert_word = true, .lower_helper_invocation = true, .lower_bitfield_insert = true, - .lower_bitfield_extract_to_shifts = true, + .lower_bitfield_extract = true, .lower_pack_half_2x16 = true, .lower_pack_snorm_4x8 = true, .lower_pack_snorm_2x16 = true, diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 16026f1b048..ae5c9f782f0 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -135,7 +135,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = { .lower_flrp64 = true, .lower_fsat = true, .lower_bitfield_insert = true, - .lower_bitfield_extract_to_shifts = true, + .lower_bitfield_extract = true, .lower_fdph = true, .lower_ffma16 = true, .lower_ffma32 = true, diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index bc270460902..aebf2a89056 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -589,7 +589,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = { .lower_flrp64 = true, .lower_fsat = true, .lower_bitfield_insert = true, - .lower_bitfield_extract_to_shifts = true, + .lower_bitfield_extract = true, .lower_fdot = true, .lower_fdph = true, .lower_ffma16 = true, diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c index 05b6c59c028..c0a34156703 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.c +++ b/src/gallium/drivers/r600/r600_pipe_common.c @@ -1339,6 +1339,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, .lower_fmod = true, .lower_uadd_carry = true, .lower_usub_borrow = true, + .lower_bitfield_extract = true, .lower_bitfield_insert = true, .lower_extract_byte = true, .lower_extract_word = true, @@ -1381,7 +1382,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, rscreen->nir_options.force_indirect_unrolling_sampler = true; if (rscreen->info.gfx_level >= EVERGREEN) { - rscreen->nir_options.lower_bitfield_extract = true; + rscreen->nir_options.has_bfe = true; rscreen->nir_options.has_bfm = true; rscreen->nir_options.has_bitfield_select = true; } @@ -1390,7 +1391,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, /* Pre-EG doesn't have these ALU ops */ rscreen->nir_options.lower_bit_count = true; rscreen->nir_options.lower_bitfield_reverse = true; - rscreen->nir_options.lower_bitfield_extract_to_shifts = true; } if (rscreen->info.gfx_level < CAYMAN) { diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 02ac8921535..8de9124c0c1 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -1323,6 +1323,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen) .has_sudot_4x8 = sscreen->info.has_accelerated_dot_product && sscreen->info.gfx_level >= GFX11, .has_udot_4x8 = sscreen->info.has_accelerated_dot_product, .has_dot_2x16 = sscreen->info.has_accelerated_dot_product && sscreen->info.gfx_level < GFX11, + .has_bfe = true, .has_bfm = true, .has_bitfield_select = true, .optimize_sample_mask_in = true, diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index 6375de9214f..77dbfe8fcb2 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -700,7 +700,7 @@ static const nir_shader_compiler_options v3d_nir_options = { .lower_insert_byte = true, .lower_insert_word = true, .lower_bitfield_insert = true, - .lower_bitfield_extract_to_shifts = true, + .lower_bitfield_extract = true, .lower_bitfield_reverse = true, .lower_bit_count = true, .lower_cs_local_id_to_index = true, diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index 50ab8fabf0b..54c11acaa1f 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -40,6 +40,7 @@ .lower_fisnormal = true, \ .lower_isign = true, \ .lower_ldexp = true, \ + .lower_bitfield_extract = true, \ .lower_bitfield_insert = true, \ .lower_device_index_to_zero = true, \ .vectorize_io = true, \ @@ -183,8 +184,7 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo) nir_options->lower_flrp32 = devinfo->ver < 6 || devinfo->ver >= 11; nir_options->lower_fpow = devinfo->ver >= 12; - nir_options->lower_bitfield_extract = devinfo->ver >= 7; - nir_options->lower_bitfield_extract_to_shifts = devinfo->ver < 7; + nir_options->has_bfe = devinfo->ver >= 7; nir_options->has_bfm = devinfo->ver >= 7; nir_options->has_bfi = devinfo->ver >= 7; diff --git a/src/microsoft/compiler/nir_to_dxil.c b/src/microsoft/compiler/nir_to_dxil.c index 92926717d95..002b65b217b 100644 --- a/src/microsoft/compiler/nir_to_dxil.c +++ b/src/microsoft/compiler/nir_to_dxil.c @@ -137,6 +137,7 @@ nir_options = { .lower_interpolate_at = true, .has_fsub = true, .has_isub = true, + .has_bfe = true, .vertex_id_zero_based = true, .lower_base_vertex = true, .lower_helper_invocation = true, diff --git a/src/nouveau/codegen/nv50_ir_from_nir.cpp b/src/nouveau/codegen/nv50_ir_from_nir.cpp index 8f3ecc3f4d0..96844a5e207 100644 --- a/src/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/nouveau/codegen/nv50_ir_from_nir.cpp @@ -3385,8 +3385,7 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type) op.lower_fsqrt = false; // TODO: only before gm200 op.lower_sincos = false; op.lower_fmod = true; - op.lower_bitfield_extract = false; - op.lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET); + op.lower_bitfield_extract = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET); op.lower_bitfield_insert = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET); op.lower_bitfield_reverse = (chipset < NVISA_GF100_CHIPSET); op.lower_bit_count = (chipset < NVISA_GF100_CHIPSET); diff --git a/src/panfrost/compiler/bifrost_compile.h b/src/panfrost/compiler/bifrost_compile.h index e46d340b261..5d7b3710f2b 100644 --- a/src/panfrost/compiler/bifrost_compile.h +++ b/src/panfrost/compiler/bifrost_compile.h @@ -53,7 +53,7 @@ void bifrost_compile_shader_nir(nir_shader *nir, .lower_fsign = true, \ \ .lower_bitfield_insert = true, \ - .lower_bitfield_extract_to_shifts = true, \ + .lower_bitfield_extract = true, \ .lower_insert_byte = true, \ .lower_rotate = true, \ \ diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h index 541bac676fa..2b9c9cf334b 100644 --- a/src/panfrost/midgard/midgard_compile.h +++ b/src/panfrost/midgard/midgard_compile.h @@ -67,7 +67,7 @@ static const nir_shader_compiler_options midgard_nir_options = { .lower_bit_count = true, .lower_bitfield_reverse = true, .lower_bitfield_insert = true, - .lower_bitfield_extract_to_shifts = true, + .lower_bitfield_extract = true, .lower_extract_byte = true, .lower_extract_word = true, .lower_insert_byte = true,