nir/opt_algebraic: optimize downcast followed by upcast to extract

Totals from 217 (0.10% of 208640) affected shaders: (Navi48)
Instrs: 283561 -> 282870 (-0.24%)
CodeSize: 1604864 -> 1601136 (-0.23%); split: -0.24%, +0.01%
Latency: 2992301 -> 2990107 (-0.07%); split: -0.09%, +0.02%
InvThroughput: 602722 -> 601316 (-0.23%); split: -0.23%, +0.00%
Copies: 26490 -> 26471 (-0.07%); split: -0.10%, +0.03%
VALU: 147735 -> 147176 (-0.38%)
SALU: 51545 -> 51541 (-0.01%)
VOPD: 11140 -> 11204 (+0.57%)
This commit is contained in:
Daniel Schürmann 2026-05-06 09:31:49 +02:00
parent a8c7a25fb1
commit 22fd4cf885

View file

@ -2186,6 +2186,16 @@ optimizations.extend([
(('extract_u16', ('extract_i16', a, b), 0), ('extract_u16', a, b)),
(('extract_u16', ('extract_u16', a, b), 0), ('extract_u16', a, b)),
# Downcast followed by upcast
(('u2u32', ('u2u8', 'a@32')), ('extract_u8', a, 0), '!options->lower_extract_byte'),
(('u2u32', ('i2i8', 'a@32')), ('extract_u8', a, 0), '!options->lower_extract_byte'),
(('i2i32', ('i2i8', 'a@32')), ('extract_i8', a, 0), '!options->lower_extract_byte'),
(('i2i32', ('u2u8', 'a@32')), ('extract_i8', a, 0), '!options->lower_extract_byte'),
(('u2u32', ('u2u16', 'a@32')), ('extract_u16', a, 0), '!options->lower_extract_word'),
(('u2u32', ('i2i16', 'a@32')), ('extract_u16', a, 0), '!options->lower_extract_word'),
(('i2i32', ('i2i16', 'a@32')), ('extract_i16', a, 0), '!options->lower_extract_word'),
(('i2i32', ('u2u16', 'a@32')), ('extract_i16', a, 0), '!options->lower_extract_word'),
# The extract_X16(a & 0xff) patterns aren't included because the iand will
# already be converted to extract_u8.
(('extract_i16', ('iand', a, 0x00ff0000), 1), ('extract_u8', a, 2), '!options->lower_extract_byte'), # extract_u8 is correct