nir/opt_algebraic: Add various bitfield extract patterns.

v2 (Georg Lehmann):
- fixed incorrect imin in ubfe_ubfe
- simplied outer_bits of ushr((ubfe, ...), ...) opt
- added is_used_once to iand(ushr(), ...) opt to improve stats

For-DB Navi21:
Totals from 3309 (4.18% of 79206) affected shaders:
Instrs: 5295291 -> 5282128 (-0.25%); split: -0.28%, +0.03%
CodeSize: 28299320 -> 28298456 (-0.00%); split: -0.07%, +0.06%
Latency: 51566173 -> 51521923 (-0.09%); split: -0.09%, +0.01%
InvThroughput: 13222050 -> 13204557 (-0.13%); split: -0.14%, +0.01%
VClause: 116451 -> 116458 (+0.01%); split: -0.02%, +0.02%
SClause: 160356 -> 160324 (-0.02%); split: -0.03%, +0.01%
Copies: 424152 -> 423670 (-0.11%); split: -0.20%, +0.09%
Branches: 156701 -> 156192 (-0.32%); split: -0.33%, +0.01%
PreSGPRs: 168507 -> 168500 (-0.00%); split: -0.02%, +0.01%
PreVGPRs: 151477 -> 151474 (-0.00%)
VALU: 3486077 -> 3476675 (-0.27%); split: -0.31%, +0.04%
SALU: 786467 -> 783109 (-0.43%); split: -0.45%, +0.03%
VMEM: 188035 -> 188060 (+0.01%)
SMEM: 259632 -> 259630 (-0.00%)

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31852>
This commit is contained in:
Timur Kristóf 2023-07-03 17:08:21 +02:00 committed by Marge Bot
parent 78f23bf295
commit be68aeafdc
2 changed files with 74 additions and 0 deletions

View file

@ -540,7 +540,36 @@ for size, mask in ((8, 0xff), (16, 0xffff), (32, 0xffffffff), (64, 0xfffffffffff
(('ushr', ('ishl', a_sz, '#b'), b), ('iand', a, ('ushr', mask, b))),
])
# Collapses ubfe(ubfe(a, b, c), d, e) when b, c, d, e are constants.
def ubfe_ubfe(a, b, c, d, e):
inner_offset = ('iand', b, 0x1f)
inner_bits = ('umin', ('iand', c, 0x1f), ('isub', 32, inner_offset))
outer_offset = ('iand', d, 0x1f)
outer_bits = ('iand', e, 0x1f)
offset = ('iadd', inner_offset, outer_offset)
bits = ('umin', outer_bits, ('imax', ('isub', inner_bits, outer_offset), 0))
collapsed = ('ubfe', a, offset, bits)
offset_out_of_range = ('ilt', 31, offset)
# This will be constant-folded to either 0 or the collapsed ubfe,
# whose offset and bits operands will also be constant folded.
return ('bcsel', offset_out_of_range, 0, collapsed)
optimizations.extend([
# Create bitfield extract from right-shift + and pattern.
(('iand@32', ('ushr@32(is_used_once)', a, b), '#c(is_const_bitmask)'),
('ubfe', a, b, ('bit_count', c)),
'options->has_bfe && !options->avoid_ternary_with_two_constants'),
# Collapse two bitfield extracts with constant operands into a single one.
(('ubfe', ('ubfe', a, '#b', '#c'), '#d', '#e'),
ubfe_ubfe(a, b, c, d, e)),
# Collapse non-zero right-shift into bitfield extract.
(('ushr@32', ('ubfe', a, '#b', '#c'), '#d(is_5lsb_not_zero)'),
ubfe_ubfe(a, b, c, d, 31)),
(('iand', ('ishl', 'a@32', '#b(is_first_5_bits_uge_2)'), -4), ('ishl', a, b)),
(('iand', ('imul', a, '#b(is_unsigned_multiple_of_4)'), -4), ('imul', a, b)),
])

View file

@ -699,6 +699,51 @@ is_lower_half_negative_one(UNUSED struct hash_table *ht, const nir_alu_instr *in
return true;
}
/**
* Returns whether an operand is a constant bit-mask, meaning that it
* only has consecutive 1 bits starting from the LSB.
* Numbers whose MSB is 1 are excluded because they are not useful
* for the optimizations where this function is used.
*/
static inline bool
is_const_bitmask(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
if (nir_src_as_const_value(instr->src[src].src) == NULL)
return false;
for (unsigned i = 0; i < num_components; i++) {
const unsigned bit_size = instr->src[src].src.ssa->bit_size;
const uint64_t c = nir_src_comp_as_uint(instr->src[src].src, swizzle[i]);
const unsigned num_bits = util_bitcount64(c);
if (c != BITFIELD64_MASK(num_bits) || num_bits == bit_size)
return false;
}
return true;
}
/**
* Returns whether the 5 LSBs of an operand are non-zero.
*/
static inline bool
is_5lsb_not_zero(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
if (nir_src_as_const_value(instr->src[src].src) == NULL)
return false;
for (unsigned i = 0; i < num_components; i++) {
const uint64_t c = nir_src_comp_as_uint(instr->src[src].src, swizzle[i]);
if ((c & 0x1f) == 0)
return false;
}
return true;
}
static inline bool
no_signed_wrap(const nir_alu_instr *instr)
{