mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-18 13:48:06 +02:00
Foz-DB Navi21: Totals from 1 (0.00% of 80255) affected shaders: Instrs: 295 -> 293 (-0.68%) CodeSize: 1588 -> 1572 (-1.01%) InvThroughput: 350 -> 347 (-0.86%) VALU: 231 -> 229 (-0.87%) Acked-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36141>
175 lines
5.3 KiB
C
175 lines
5.3 KiB
C
/*
|
|
* Copyright 2024 Intel Corporation
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
/**
|
|
* \file
|
|
* Identify sequences of logical operations to convert to bfi
|
|
*
|
|
* It is difficult for opt_algebraic to match general expressions like
|
|
*
|
|
* (a & some_constant) | (b & ~some_constant)
|
|
*
|
|
* Common cases like some_constant = 0x7fffffff can be added, but this may
|
|
* miss other opportunities. This pass implements that general pattern
|
|
* matching.
|
|
*
|
|
* Either nir_op_bfi or nir_op_bitfield_select may be generated by this pass.
|
|
*
|
|
* Future work may also detect cases like:
|
|
*
|
|
* (a & some_constant) | ~(b | some_constant)
|
|
* ~((a | some_constant) & (b | ~some_constant))
|
|
* etc.
|
|
*/
|
|
|
|
#include "nir_builder.h"
|
|
|
|
static bool
|
|
parse_iand(nir_scalar alu, nir_scalar *value, uint64_t *mask)
|
|
{
|
|
if (nir_scalar_alu_op(alu) == nir_op_iand) {
|
|
/* If both source are constants, do not perform the conversion. There
|
|
* are lowerings in opt_algebraic that can generate this pattern on
|
|
* platforms that set has_bfi and avoid_ternary_with_two_constants.
|
|
* Undoing that lowering would result in infinite optimization loops.
|
|
*/
|
|
nir_scalar left = nir_scalar_chase_alu_src(alu, 0);
|
|
nir_scalar right = nir_scalar_chase_alu_src(alu, 1);
|
|
if (nir_scalar_is_const(left) && nir_scalar_is_const(right))
|
|
return false;
|
|
|
|
if (nir_scalar_is_const(left)) {
|
|
*mask = nir_scalar_as_uint(left);
|
|
*value = right;
|
|
return true;
|
|
} else if (nir_scalar_is_const(right)) {
|
|
*mask = nir_scalar_as_uint(right);
|
|
*value = left;
|
|
return true;
|
|
}
|
|
} else if (nir_scalar_alu_op(alu) == nir_op_extract_u16 ||
|
|
nir_scalar_alu_op(alu) == nir_op_extract_u8) {
|
|
/* There may be leftovers from opt_algebraic that haven't been constant
|
|
* folded yet.
|
|
*/
|
|
nir_scalar left = nir_scalar_chase_alu_src(alu, 0);
|
|
if (nir_scalar_is_const(left))
|
|
return false;
|
|
|
|
if (nir_scalar_as_uint(nir_scalar_chase_alu_src(alu, 1)) == 0) {
|
|
*mask = nir_scalar_alu_op(alu) == nir_op_extract_u16 ? 0xffff : 0xff;
|
|
*value = left;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool
|
|
nir_opt_generate_bfi_instr(nir_builder *b,
|
|
nir_alu_instr *alu,
|
|
UNUSED void *cb_data)
|
|
{
|
|
/* Since none of the source bits will overlap, these are equvalent. */
|
|
if (alu->op != nir_op_ior &&
|
|
alu->op != nir_op_ixor &&
|
|
alu->op != nir_op_iadd)
|
|
return false;
|
|
|
|
if (alu->def.bit_size == 1)
|
|
return false;
|
|
|
|
/* bfi only supports 32bit. */
|
|
if (!b->shader->options->has_bitfield_select && alu->def.bit_size != 32)
|
|
return false;
|
|
|
|
nir_scalar insert[NIR_MAX_VEC_COMPONENTS];
|
|
nir_scalar base[NIR_MAX_VEC_COMPONENTS];
|
|
nir_const_value mask_cvals[NIR_MAX_VEC_COMPONENTS];
|
|
|
|
for (unsigned i = 0; i < alu->def.num_components; i++) {
|
|
nir_scalar alu_scalar = nir_get_scalar(&alu->def, i);
|
|
nir_scalar left = nir_scalar_chase_alu_src(alu_scalar, 0);
|
|
nir_scalar right = nir_scalar_chase_alu_src(alu_scalar, 1);
|
|
|
|
if (!nir_scalar_is_alu(left) || !nir_scalar_is_alu(right))
|
|
return false;
|
|
|
|
nir_scalar src1;
|
|
nir_scalar src2;
|
|
uint64_t mask1;
|
|
uint64_t mask2;
|
|
|
|
if (!parse_iand(left, &src1, &mask1))
|
|
return false;
|
|
|
|
if (!parse_iand(right, &src2, &mask2))
|
|
return false;
|
|
|
|
if (mask1 != (~mask2 & BITFIELD64_MASK(alu->def.bit_size)))
|
|
return false;
|
|
|
|
/* The mask used by the bfi instruction must be odd. When the mask is odd,
|
|
* the implict shift applied by the bfi is by zero bits. Since one of the
|
|
* masks must be odd, the rule can always be applied.
|
|
*
|
|
* bitfield_select does not have this restriction, but don't do it for vectors
|
|
* because swapping only part of the components would hurt.
|
|
*/
|
|
uint64_t mask;
|
|
if (b->shader->options->has_bitfield_select && alu->def.num_components > 1) {
|
|
/* Just pick one. */
|
|
mask = mask1;
|
|
insert[i] = src1;
|
|
base[i] = src2;
|
|
} else if ((mask1 & 1) != 0) {
|
|
/* Because mask1 == ~mask2. */
|
|
assert((mask2 & 1) == 0);
|
|
|
|
mask = mask1;
|
|
insert[i] = src1;
|
|
base[i] = src2;
|
|
} else {
|
|
/* Because mask1 == ~mask2. */
|
|
assert((mask2 & 1) != 0);
|
|
|
|
mask = mask2;
|
|
insert[i] = src2;
|
|
base[i] = src1;
|
|
}
|
|
|
|
mask_cvals[i] = nir_const_value_for_uint(mask, alu->def.bit_size);
|
|
}
|
|
|
|
b->cursor = nir_before_instr(&alu->instr);
|
|
|
|
nir_def *mask_vec = nir_build_imm(b, alu->def.num_components, alu->def.bit_size, mask_cvals);
|
|
nir_def *insert_vec = nir_vec_scalars(b, insert, alu->def.num_components);
|
|
nir_def *base_vec = nir_vec_scalars(b, base, alu->def.num_components);
|
|
|
|
nir_def *bfi;
|
|
|
|
if (b->shader->options->has_bitfield_select) {
|
|
bfi = nir_bitfield_select(b, mask_vec, insert_vec, base_vec);
|
|
} else {
|
|
assert(b->shader->options->has_bfi);
|
|
|
|
bfi = nir_bfi(b, mask_vec, insert_vec, base_vec);
|
|
}
|
|
|
|
nir_def_replace(&alu->def, bfi);
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
nir_opt_generate_bfi(nir_shader *shader)
|
|
{
|
|
if (!shader->options->has_bfi && !shader->options->has_bitfield_select)
|
|
return false;
|
|
|
|
return nir_shader_alu_pass(shader, nir_opt_generate_bfi_instr,
|
|
nir_metadata_control_flow, NULL);
|
|
}
|