mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 03:08:05 +02:00
nir/opt_algebraic: Add various bitfield extract patterns.
v2 (Georg Lehmann): - fixed incorrect imin in ubfe_ubfe - simplied outer_bits of ushr((ubfe, ...), ...) opt - added is_used_once to iand(ushr(), ...) opt to improve stats For-DB Navi21: Totals from 3309 (4.18% of 79206) affected shaders: Instrs: 5295291 -> 5282128 (-0.25%); split: -0.28%, +0.03% CodeSize: 28299320 -> 28298456 (-0.00%); split: -0.07%, +0.06% Latency: 51566173 -> 51521923 (-0.09%); split: -0.09%, +0.01% InvThroughput: 13222050 -> 13204557 (-0.13%); split: -0.14%, +0.01% VClause: 116451 -> 116458 (+0.01%); split: -0.02%, +0.02% SClause: 160356 -> 160324 (-0.02%); split: -0.03%, +0.01% Copies: 424152 -> 423670 (-0.11%); split: -0.20%, +0.09% Branches: 156701 -> 156192 (-0.32%); split: -0.33%, +0.01% PreSGPRs: 168507 -> 168500 (-0.00%); split: -0.02%, +0.01% PreVGPRs: 151477 -> 151474 (-0.00%) VALU: 3486077 -> 3476675 (-0.27%); split: -0.31%, +0.04% SALU: 786467 -> 783109 (-0.43%); split: -0.45%, +0.03% VMEM: 188035 -> 188060 (+0.01%) SMEM: 259632 -> 259630 (-0.00%) Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31852>
This commit is contained in:
parent
78f23bf295
commit
be68aeafdc
2 changed files with 74 additions and 0 deletions
|
|
@ -540,7 +540,36 @@ for size, mask in ((8, 0xff), (16, 0xffff), (32, 0xffffffff), (64, 0xfffffffffff
|
|||
(('ushr', ('ishl', a_sz, '#b'), b), ('iand', a, ('ushr', mask, b))),
|
||||
])
|
||||
|
||||
# Collapses ubfe(ubfe(a, b, c), d, e) when b, c, d, e are constants.
|
||||
def ubfe_ubfe(a, b, c, d, e):
|
||||
inner_offset = ('iand', b, 0x1f)
|
||||
inner_bits = ('umin', ('iand', c, 0x1f), ('isub', 32, inner_offset))
|
||||
outer_offset = ('iand', d, 0x1f)
|
||||
outer_bits = ('iand', e, 0x1f)
|
||||
|
||||
offset = ('iadd', inner_offset, outer_offset)
|
||||
bits = ('umin', outer_bits, ('imax', ('isub', inner_bits, outer_offset), 0))
|
||||
collapsed = ('ubfe', a, offset, bits)
|
||||
offset_out_of_range = ('ilt', 31, offset)
|
||||
|
||||
# This will be constant-folded to either 0 or the collapsed ubfe,
|
||||
# whose offset and bits operands will also be constant folded.
|
||||
return ('bcsel', offset_out_of_range, 0, collapsed)
|
||||
|
||||
optimizations.extend([
|
||||
# Create bitfield extract from right-shift + and pattern.
|
||||
(('iand@32', ('ushr@32(is_used_once)', a, b), '#c(is_const_bitmask)'),
|
||||
('ubfe', a, b, ('bit_count', c)),
|
||||
'options->has_bfe && !options->avoid_ternary_with_two_constants'),
|
||||
|
||||
# Collapse two bitfield extracts with constant operands into a single one.
|
||||
(('ubfe', ('ubfe', a, '#b', '#c'), '#d', '#e'),
|
||||
ubfe_ubfe(a, b, c, d, e)),
|
||||
|
||||
# Collapse non-zero right-shift into bitfield extract.
|
||||
(('ushr@32', ('ubfe', a, '#b', '#c'), '#d(is_5lsb_not_zero)'),
|
||||
ubfe_ubfe(a, b, c, d, 31)),
|
||||
|
||||
(('iand', ('ishl', 'a@32', '#b(is_first_5_bits_uge_2)'), -4), ('ishl', a, b)),
|
||||
(('iand', ('imul', a, '#b(is_unsigned_multiple_of_4)'), -4), ('imul', a, b)),
|
||||
])
|
||||
|
|
|
|||
|
|
@ -699,6 +699,51 @@ is_lower_half_negative_one(UNUSED struct hash_table *ht, const nir_alu_instr *in
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether an operand is a constant bit-mask, meaning that it
|
||||
* only has consecutive 1 bits starting from the LSB.
|
||||
* Numbers whose MSB is 1 are excluded because they are not useful
|
||||
* for the optimizations where this function is used.
|
||||
*/
|
||||
static inline bool
|
||||
is_const_bitmask(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
|
||||
unsigned src, unsigned num_components,
|
||||
const uint8_t *swizzle)
|
||||
{
|
||||
if (nir_src_as_const_value(instr->src[src].src) == NULL)
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
const unsigned bit_size = instr->src[src].src.ssa->bit_size;
|
||||
const uint64_t c = nir_src_comp_as_uint(instr->src[src].src, swizzle[i]);
|
||||
const unsigned num_bits = util_bitcount64(c);
|
||||
if (c != BITFIELD64_MASK(num_bits) || num_bits == bit_size)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the 5 LSBs of an operand are non-zero.
|
||||
*/
|
||||
static inline bool
|
||||
is_5lsb_not_zero(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
|
||||
unsigned src, unsigned num_components,
|
||||
const uint8_t *swizzle)
|
||||
{
|
||||
if (nir_src_as_const_value(instr->src[src].src) == NULL)
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
const uint64_t c = nir_src_comp_as_uint(instr->src[src].src, swizzle[i]);
|
||||
if ((c & 0x1f) == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
no_signed_wrap(const nir_alu_instr *instr)
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue