From 6a09d33549ec62d8613925762a372c1ce426cb81 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 3 Sep 2024 11:41:37 -0700 Subject: [PATCH] nir: Add a pass to generate BFI instructions from logical operations Inspired by a commit message in !30934, I set about optimizing the code generated for nir_copysign. It would be possible to just implement an opt_algebraic pattern for the specific values used by nir_copysign, but this casts a slightly larger net. As noted in a comment in the code, there may be variations of the pattern that this pass misses. The opt_algebraic pattern would miss them too. v2: Use nir_def_replace. Suggested by Alyssa. Allow more "root" instruction types. Suggested by Georg. v3: Treat extract_u16(x, 0) as (x & 0x0000ffff), and treat extract_u8(x, 0) as (x & 0x000000ff). v4: Use nir_scalar. Suggested by Georg. Reviewed-by: Georg Lehmann Part-of: --- src/compiler/nir/meson.build | 1 + src/compiler/nir/nir.h | 2 + src/compiler/nir/nir_opt_generate_bfi.c | 160 ++++++++++++++++++++++++ 3 files changed, 163 insertions(+) create mode 100644 src/compiler/nir/nir_opt_generate_bfi.c diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index 000bdaf026a..3ba61c45c3c 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -231,6 +231,7 @@ files_libnir = files( 'nir_opt_find_array_copies.c', 'nir_opt_fragdepth.c', 'nir_opt_gcm.c', + 'nir_opt_generate_bfi.c', 'nir_opt_idiv_const.c', 'nir_opt_if.c', 'nir_opt_intrinsics.c', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 097c240d195..4acf970e8bf 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -6701,6 +6701,8 @@ bool nir_opt_fragdepth(nir_shader *shader); bool nir_opt_gcm(nir_shader *shader, bool value_number); +bool nir_opt_generate_bfi(nir_shader *shader); + bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size); bool nir_opt_mqsad(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_generate_bfi.c b/src/compiler/nir/nir_opt_generate_bfi.c new file mode 100644 index 00000000000..65b108a7ff8 --- /dev/null +++ b/src/compiler/nir/nir_opt_generate_bfi.c @@ -0,0 +1,160 @@ +/* + * Copyright 2024 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +/** + * \file + * Identify sequences of logical operations to convert to bfi + * + * It is difficult for opt_algebraic to match general expressions like + * + * (a & some_constant) | (b & ~some_constant) + * + * Common cases like some_constant = 0x7fffffff can be added, but this may + * miss other opportunities. This pass implements that general pattern + * matching. + * + * Either nir_op_bfi or nir_op_bitfield_select may be generated by this pass. + * + * Future work may also detect cases like: + * + * (a & some_constant) | ~(b | some_constant) + * ~((a | some_constant) & (b | ~some_constant)) + * etc. + */ + +#include "nir_builder.h" + +static bool +parse_iand(nir_scalar alu, nir_scalar *value, uint32_t *mask) +{ + if (nir_scalar_alu_op(alu) == nir_op_iand) { + /* If both source are constants, do not perform the conversion. There + * are lowerings in opt_algebraic that can generate this pattern on + * platforms that set has_bfi and avoid_ternary_with_two_constants. + * Undoing that lowering would result in infinite optimization loops. + */ + nir_scalar left = nir_scalar_chase_alu_src(alu, 0); + nir_scalar right = nir_scalar_chase_alu_src(alu, 1); + if (nir_scalar_is_const(left) && nir_scalar_is_const(right)) + return false; + + if (nir_scalar_is_const(left)) { + *mask = nir_scalar_as_uint(left); + *value = right; + return true; + } else if (nir_scalar_is_const(right)) { + *mask = nir_scalar_as_uint(right); + *value = left; + return true; + } + } else if (nir_scalar_alu_op(alu) == nir_op_extract_u16 || + nir_scalar_alu_op(alu) == nir_op_extract_u8) { + /* There may be leftovers from opt_algebraic that haven't been constant + * folded yet. + */ + nir_scalar left = nir_scalar_chase_alu_src(alu, 0); + if (nir_scalar_is_const(left)) + return false; + + if (nir_scalar_as_uint(nir_scalar_chase_alu_src(alu, 1)) == 0) { + *mask = nir_scalar_alu_op(alu) == nir_op_extract_u16 ? 0x0000ffff : 0x000000ff; + *value = left; + return true; + } + } + + return false; +} + +static bool +nir_opt_generate_bfi_instr(nir_builder *b, + nir_alu_instr *alu, + UNUSED void *cb_data) +{ + /* Since none of the source bits will overlap, these are equvalent. */ + if ((alu->op != nir_op_ior && + alu->op != nir_op_ixor && + alu->op != nir_op_iadd) || + alu->def.num_components != 1 || alu->def.bit_size != 32) + return false; + + nir_scalar alu_scalar = nir_get_scalar(&alu->def, 0); + nir_scalar left = nir_scalar_chase_alu_src(alu_scalar, 0); + nir_scalar right = nir_scalar_chase_alu_src(alu_scalar, 1); + + if (!nir_scalar_is_alu(left) || !nir_scalar_is_alu(right)) + return false; + + nir_scalar src1; + nir_scalar src2; + uint32_t mask1; + uint32_t mask2; + + if (!parse_iand(left, &src1, &mask1)) + return false; + + if (!parse_iand(right, &src2, &mask2)) + return false; + + if (mask1 != ~mask2) + return false; + + nir_scalar insert; + nir_scalar base; + uint32_t mask; + + /* The mask used by the bfi instruction must be odd. When the mask is odd, + * the implict shift applied by the bfi is by zero bits. Since one of the + * masks must be odd, the rule can always be applied. + * + * bitfield_select does not have this restriction, but it doesn't hurt. + */ + if ((mask1 & 1) != 0) { + /* Because mask1 == ~mask2. */ + assert((mask2 & 1) == 0); + + mask = mask1; + insert = src1; + base = src2; + } else { + /* Because mask1 == ~mask2. */ + assert((mask2 & 1) != 0); + + mask = mask2; + insert = src2; + base = src1; + } + + b->cursor = nir_before_instr(&alu->instr); + + nir_def *bfi; + + if (b->shader->options->has_bfi) { + bfi = nir_bfi(b, + nir_imm_int(b, mask), + nir_channel(b, insert.def, insert.comp), + nir_channel(b, base.def, base.comp)); + } else { + assert(b->shader->options->has_bitfield_select); + + bfi = nir_bitfield_select(b, + nir_imm_int(b, mask), + nir_channel(b, insert.def, insert.comp), + nir_channel(b, base.def, base.comp)); + } + + nir_def_replace(&alu->def, bfi); + return true; +} + +bool +nir_opt_generate_bfi(nir_shader *shader) +{ + if (!shader->options->has_bfi && !shader->options->has_bitfield_select) + return false; + + return nir_shader_alu_pass(shader, nir_opt_generate_bfi_instr, + nir_metadata_control_flow, NULL); +}