mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 08:50:13 +01:00
nir/algebraic: Optimize some extract_* expressions
v2: Add missing '!options->lower_extract_byte' to the last two patterns. Every driver except Asahi sets both or neither. shader-db: All Intel platforms had similar results. (DG2 shown) total instructions in shared programs: 19659360 -> 19659356 (<.01%) instructions in affected programs: 44 -> 40 (-9.09%) helped: 2 / HURT: 0 total cycles in shared programs: 823432524 -> 823432520 (<.01%) cycles in affected programs: 1722 -> 1718 (-0.23%) helped: 2 / HURT: 0 fossil-db: All Intel platforms had similar results. (DG2 shown) Totals: Instrs: 153989787 -> 153989617 (-0.00%) Cycle count: 17562079230 -> 17562079493 (+0.00%); split: -0.00%, +0.00% Totals from 24 (0.00% of 631369) affected shaders: Instrs: 13733 -> 13563 (-1.24%) Cycle count: 341392 -> 341655 (+0.08%); split: -0.25%, +0.33% Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> [v1] Acked-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27891>
This commit is contained in:
parent
0fa17962d6
commit
1b8cf06fc7
1 changed files with 17 additions and 0 deletions
|
|
@ -1698,6 +1698,8 @@ optimizations.extend([
|
|||
(('ishr', 'a@32', 24), ('extract_i8', a, 3), '!options->lower_extract_byte'),
|
||||
(('ishr', 'a@64', 56), ('extract_i8', a, 7), '!options->lower_extract_byte'),
|
||||
(('iand', 0xff, a), ('extract_u8', a, 0), '!options->lower_extract_byte'),
|
||||
(('ishr', ('iand', a, 0x0000ff00), 8), ('extract_u8', a, 1), '!options->lower_extract_byte'),
|
||||
(('ishr', ('iand', a, 0x00ff0000), 16), ('extract_u8', a, 2), '!options->lower_extract_byte'),
|
||||
|
||||
# Common pattern in many Vulkan CTS tests that read 8-bit integers from a
|
||||
# storage buffer.
|
||||
|
|
@ -1720,6 +1722,16 @@ optimizations.extend([
|
|||
(('extract_u8', ('extract_i8', a, b), 0), ('extract_u8', a, b)),
|
||||
(('extract_u8', ('extract_u8', a, b), 0), ('extract_u8', a, b)),
|
||||
|
||||
# The extract_X8(a & 0xff) patterns aren't included because the iand will
|
||||
# already be converted to extract_u8.
|
||||
(('extract_i8', ('iand', a, 0x0000ff00), 1), ('extract_i8', a, 1)),
|
||||
(('extract_i8', ('iand', a, 0x00ff0000), 2), ('extract_i8', a, 2)),
|
||||
(('extract_i8', ('iand', a, 0xff000000), 3), ('extract_i8', a, 3)),
|
||||
|
||||
(('extract_u8', ('iand', a, 0x0000ff00), 1), ('extract_u8', a, 1)),
|
||||
(('extract_u8', ('iand', a, 0x00ff0000), 2), ('extract_u8', a, 2)),
|
||||
(('extract_u8', ('iand', a, 0xff000000), 3), ('extract_u8', a, 3)),
|
||||
|
||||
# Word extraction
|
||||
(('ushr', ('ishl', 'a@32', 16), 16), ('extract_u16', a, 0), '!options->lower_extract_word'),
|
||||
(('ushr', 'a@32', 16), ('extract_u16', a, 1), '!options->lower_extract_word'),
|
||||
|
|
@ -1739,6 +1751,11 @@ optimizations.extend([
|
|||
(('extract_u16', ('extract_i16', a, b), 0), ('extract_u16', a, b)),
|
||||
(('extract_u16', ('extract_u16', a, b), 0), ('extract_u16', a, b)),
|
||||
|
||||
# The extract_X16(a & 0xff) patterns aren't included because the iand will
|
||||
# already be converted to extract_u8.
|
||||
(('extract_i16', ('iand', a, 0x00ff0000), 1), ('extract_u8', a, 2), '!options->lower_extract_byte'), # extract_u8 is correct
|
||||
(('extract_u16', ('iand', a, 0x00ff0000), 1), ('extract_u8', a, 2), '!options->lower_extract_byte'),
|
||||
|
||||
# Lower pack/unpack
|
||||
(('pack_64_2x32_split', a, b), ('ior', ('u2u64', a), ('ishl', ('u2u64', b), 32)), 'options->lower_pack_64_2x32_split'),
|
||||
(('pack_32_2x16_split', a, b), ('ior', ('u2u32', a), ('ishl', ('u2u32', b), 16)), 'options->lower_pack_32_2x16_split || options->lower_pack_split'),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue