mesa/src/compiler/nir/nir_opt_generate_bfi.c
Ian Romanick 6a09d33549 nir: Add a pass to generate BFI instructions from logical operations
Inspired by a commit message in !30934, I set about optimizing the code
generated for nir_copysign. It would be possible to just implement an
opt_algebraic pattern for the specific values used by nir_copysign, but
this casts a slightly larger net.

As noted in a comment in the code, there may be variations of the
pattern that this pass misses. The opt_algebraic pattern would miss them
too.

v2: Use nir_def_replace. Suggested by Alyssa. Allow more "root"
instruction types. Suggested by Georg.

v3: Treat extract_u16(x, 0) as (x & 0x0000ffff), and treat extract_u8(x,
0) as (x & 0x000000ff).

v4: Use nir_scalar. Suggested by Georg.

Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31006>
2024-09-13 00:21:00 +00:00

160 lines
4.6 KiB
C

/*
* Copyright 2024 Intel Corporation
* SPDX-License-Identifier: MIT
*/
/**
* \file
* Identify sequences of logical operations to convert to bfi
*
* It is difficult for opt_algebraic to match general expressions like
*
* (a & some_constant) | (b & ~some_constant)
*
* Common cases like some_constant = 0x7fffffff can be added, but this may
* miss other opportunities. This pass implements that general pattern
* matching.
*
* Either nir_op_bfi or nir_op_bitfield_select may be generated by this pass.
*
* Future work may also detect cases like:
*
* (a & some_constant) | ~(b | some_constant)
* ~((a | some_constant) & (b | ~some_constant))
* etc.
*/
#include "nir_builder.h"
static bool
parse_iand(nir_scalar alu, nir_scalar *value, uint32_t *mask)
{
if (nir_scalar_alu_op(alu) == nir_op_iand) {
/* If both source are constants, do not perform the conversion. There
* are lowerings in opt_algebraic that can generate this pattern on
* platforms that set has_bfi and avoid_ternary_with_two_constants.
* Undoing that lowering would result in infinite optimization loops.
*/
nir_scalar left = nir_scalar_chase_alu_src(alu, 0);
nir_scalar right = nir_scalar_chase_alu_src(alu, 1);
if (nir_scalar_is_const(left) && nir_scalar_is_const(right))
return false;
if (nir_scalar_is_const(left)) {
*mask = nir_scalar_as_uint(left);
*value = right;
return true;
} else if (nir_scalar_is_const(right)) {
*mask = nir_scalar_as_uint(right);
*value = left;
return true;
}
} else if (nir_scalar_alu_op(alu) == nir_op_extract_u16 ||
nir_scalar_alu_op(alu) == nir_op_extract_u8) {
/* There may be leftovers from opt_algebraic that haven't been constant
* folded yet.
*/
nir_scalar left = nir_scalar_chase_alu_src(alu, 0);
if (nir_scalar_is_const(left))
return false;
if (nir_scalar_as_uint(nir_scalar_chase_alu_src(alu, 1)) == 0) {
*mask = nir_scalar_alu_op(alu) == nir_op_extract_u16 ? 0x0000ffff : 0x000000ff;
*value = left;
return true;
}
}
return false;
}
static bool
nir_opt_generate_bfi_instr(nir_builder *b,
nir_alu_instr *alu,
UNUSED void *cb_data)
{
/* Since none of the source bits will overlap, these are equvalent. */
if ((alu->op != nir_op_ior &&
alu->op != nir_op_ixor &&
alu->op != nir_op_iadd) ||
alu->def.num_components != 1 || alu->def.bit_size != 32)
return false;
nir_scalar alu_scalar = nir_get_scalar(&alu->def, 0);
nir_scalar left = nir_scalar_chase_alu_src(alu_scalar, 0);
nir_scalar right = nir_scalar_chase_alu_src(alu_scalar, 1);
if (!nir_scalar_is_alu(left) || !nir_scalar_is_alu(right))
return false;
nir_scalar src1;
nir_scalar src2;
uint32_t mask1;
uint32_t mask2;
if (!parse_iand(left, &src1, &mask1))
return false;
if (!parse_iand(right, &src2, &mask2))
return false;
if (mask1 != ~mask2)
return false;
nir_scalar insert;
nir_scalar base;
uint32_t mask;
/* The mask used by the bfi instruction must be odd. When the mask is odd,
* the implict shift applied by the bfi is by zero bits. Since one of the
* masks must be odd, the rule can always be applied.
*
* bitfield_select does not have this restriction, but it doesn't hurt.
*/
if ((mask1 & 1) != 0) {
/* Because mask1 == ~mask2. */
assert((mask2 & 1) == 0);
mask = mask1;
insert = src1;
base = src2;
} else {
/* Because mask1 == ~mask2. */
assert((mask2 & 1) != 0);
mask = mask2;
insert = src2;
base = src1;
}
b->cursor = nir_before_instr(&alu->instr);
nir_def *bfi;
if (b->shader->options->has_bfi) {
bfi = nir_bfi(b,
nir_imm_int(b, mask),
nir_channel(b, insert.def, insert.comp),
nir_channel(b, base.def, base.comp));
} else {
assert(b->shader->options->has_bitfield_select);
bfi = nir_bitfield_select(b,
nir_imm_int(b, mask),
nir_channel(b, insert.def, insert.comp),
nir_channel(b, base.def, base.comp));
}
nir_def_replace(&alu->def, bfi);
return true;
}
bool
nir_opt_generate_bfi(nir_shader *shader)
{
if (!shader->options->has_bfi && !shader->options->has_bitfield_select)
return false;
return nir_shader_alu_pass(shader, nir_opt_generate_bfi_instr,
nir_metadata_control_flow, NULL);
}