mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-06 14:20:39 +01:00
nir/algebraic: Fix up extract_[iu]8 after loop unrolling
Skylake, Broadwell, and Haswell had similar results. (Skylake shown) total instructions in shared programs: 15256840 -> 15256837 (<.01%) instructions in affected programs: 4713 -> 4710 (-0.06%) helped: 3 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 0.06% max: 0.08% x̄: 0.06% x̃: 0.06% total cycles in shared programs: 372286583 -> 372286583 (0.00%) cycles in affected programs: 198516 -> 198516 (0.00%) helped: 1 HURT: 1 helped stats (abs) min: 10 max: 10 x̄: 10.00 x̃: 10 helped stats (rel) min: <.01% max: <.01% x̄: <.01% x̃: <.01% HURT stats (abs) min: 10 max: 10 x̄: 10.00 x̃: 10 HURT stats (rel) min: 0.01% max: 0.01% x̄: 0.01% x̃: 0.01% No changes on any other Intel platform. v2: Use a loop to generate patterns. Suggested by Jason. v3: Fix a copy-and-paste bug in the extract_[ui] of ishl loop that would replace an extract_i8 with and extract_u8. This broke ~180 tests. This bug was introduced in v2. Reviewed-by: Matt Turner <mattst88@gmail.com> [v1] Reviewed-by: Dylan Baker <dylan@pnwbakers.com> [v2] Acked-by: Jason Ekstrand <jason@jlekstrand.net> [v2]
This commit is contained in:
parent
b779baa9bf
commit
c152672e68
1 changed files with 20 additions and 2 deletions
|
|
@ -621,8 +621,26 @@ optimizations = [
|
|||
(('ishr', 'a@32', 24), ('extract_i8', a, 3), '!options->lower_extract_byte'),
|
||||
(('iand', 0xff, ('ushr', a, 16)), ('extract_u8', a, 2), '!options->lower_extract_byte'),
|
||||
(('iand', 0xff, ('ushr', a, 8)), ('extract_u8', a, 1), '!options->lower_extract_byte'),
|
||||
(('iand', 0xff, a), ('extract_u8', a, 0), '!options->lower_extract_byte'),
|
||||
(('iand', 0xff, a), ('extract_u8', a, 0), '!options->lower_extract_byte')
|
||||
]
|
||||
|
||||
# The ('extract_u8', a, 0) pattern, above, can trigger in cases where the
|
||||
# shift count is based on a loop induction variable. Once the loop is
|
||||
# unrolled, constant folding will generate patterns like those below.
|
||||
for op in ('ushr', 'ishr'):
|
||||
optimizations.extend([(('extract_u8', (op, 'a@16', 8), 0), ('extract_u8', a, 1))])
|
||||
optimizations.extend([(('extract_u8', (op, 'a@32', 8 * i), 0), ('extract_u8', a, i)) for i in range(1, 4)])
|
||||
optimizations.extend([(('extract_u8', (op, 'a@64', 8 * i), 0), ('extract_u8', a, i)) for i in range(1, 8)])
|
||||
|
||||
optimizations.extend([(('extract_u8', ('extract_u16', a, 1), 0), ('extract_u8', a, 2))])
|
||||
|
||||
# The ('extract_[iu]8', a, 3) patterns, above, can trigger in cases where the
|
||||
# shift count is based on a loop induction variable. Once the loop is
|
||||
# unrolled, constant folding will generate patterns like those below.
|
||||
for op in ('extract_u8', 'extract_i8'):
|
||||
optimizations.extend([((op, ('ishl', 'a@32', 24 - 8 * i), 3), (op, a, i)) for i in range(2, -1, -1)])
|
||||
|
||||
optimizations.extend([
|
||||
# Word extraction
|
||||
(('ushr', ('ishl', 'a@32', 16), 16), ('extract_u16', a, 0), '!options->lower_extract_word'),
|
||||
(('ushr', 'a@32', 16), ('extract_u16', a, 1), '!options->lower_extract_word'),
|
||||
|
|
@ -806,7 +824,7 @@ optimizations = [
|
|||
'options->lower_unpack_snorm_4x8'),
|
||||
|
||||
(('isign', a), ('imin', ('imax', a, -1), 1), 'options->lower_isign'),
|
||||
]
|
||||
])
|
||||
|
||||
# bit_size dependent lowerings
|
||||
for bit_size in [8, 16, 32, 64]:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue