From 22fd4cf885a3eb2367c4cbdb2602c48237ad25ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 6 May 2026 09:31:49 +0200 Subject: [PATCH 1/2] nir/opt_algebraic: optimize downcast followed by upcast to extract Totals from 217 (0.10% of 208640) affected shaders: (Navi48) Instrs: 283561 -> 282870 (-0.24%) CodeSize: 1604864 -> 1601136 (-0.23%); split: -0.24%, +0.01% Latency: 2992301 -> 2990107 (-0.07%); split: -0.09%, +0.02% InvThroughput: 602722 -> 601316 (-0.23%); split: -0.23%, +0.00% Copies: 26490 -> 26471 (-0.07%); split: -0.10%, +0.03% VALU: 147735 -> 147176 (-0.38%) SALU: 51545 -> 51541 (-0.01%) VOPD: 11140 -> 11204 (+0.57%) --- src/compiler/nir/nir_opt_algebraic.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 9d0cd043504..9b431f998d4 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -2186,6 +2186,16 @@ optimizations.extend([ (('extract_u16', ('extract_i16', a, b), 0), ('extract_u16', a, b)), (('extract_u16', ('extract_u16', a, b), 0), ('extract_u16', a, b)), + # Downcast followed by upcast + (('u2u32', ('u2u8', 'a@32')), ('extract_u8', a, 0), '!options->lower_extract_byte'), + (('u2u32', ('i2i8', 'a@32')), ('extract_u8', a, 0), '!options->lower_extract_byte'), + (('i2i32', ('i2i8', 'a@32')), ('extract_i8', a, 0), '!options->lower_extract_byte'), + (('i2i32', ('u2u8', 'a@32')), ('extract_i8', a, 0), '!options->lower_extract_byte'), + (('u2u32', ('u2u16', 'a@32')), ('extract_u16', a, 0), '!options->lower_extract_word'), + (('u2u32', ('i2i16', 'a@32')), ('extract_u16', a, 0), '!options->lower_extract_word'), + (('i2i32', ('i2i16', 'a@32')), ('extract_i16', a, 0), '!options->lower_extract_word'), + (('i2i32', ('u2u16', 'a@32')), ('extract_i16', a, 0), '!options->lower_extract_word'), + # The extract_X16(a & 0xff) patterns aren't included because the iand will # already be converted to extract_u8. (('extract_i16', ('iand', a, 0x00ff0000), 1), ('extract_u8', a, 2), '!options->lower_extract_byte'), # extract_u8 is correct From dace9adfe25e1d610ff29dadc38d072f4bfbf38f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 6 May 2026 09:40:23 +0200 Subject: [PATCH 2/2] nir/opt_algebraic: extend some extract_u8 pattern to extract_i8 and remove some duplicate extract pattern. --- src/compiler/nir/nir_opt_algebraic.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 9b431f998d4..ec1c53f6087 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -2128,13 +2128,6 @@ optimizations.extend([ (('ior', ('bcsel', ('ine', ('iand', a, 0x00800000), 0), ~0xff, 0), ('extract_u8', a, 2)), ('extract_i8', a, 2)), (('ior', ('bcsel', ('ilt', 'a@32', 0), ~0xff, 0), ('extract_u8', a, 3)), ('extract_i8', a, 3)), - (('extract_i8', ('ushr', a, 8), 0), ('extract_i8', a, 1)), - (('extract_i8', ('ushr', a, 8), 1), ('extract_i8', a, 2)), - (('extract_i8', ('ushr', a, 8), 2), ('extract_i8', a, 3)), - (('extract_u8', ('ushr', a, 8), 0), ('extract_u8', a, 1)), - (('extract_u8', ('ushr', a, 8), 1), ('extract_u8', a, 2)), - (('extract_u8', ('ushr', a, 8), 2), ('extract_u8', a, 3)), - (('extract_i8', ('extract_i16', a, 1), 0), ('extract_i8', a, 2)), (('extract_i8', ('extract_i16', a, 1), 1), ('extract_i8', a, 3)), (('extract_i8', ('extract_u16', a, 1), 0), ('extract_i8', a, 2)), @@ -2304,12 +2297,11 @@ optimizations.extend([ # After the ('extract_u8', a, 0) pattern, above, triggers, there will be # patterns like those below. -for op in ('ushr', 'ishr'): - optimizations.extend([(('extract_u8', (op, 'a@16', 8), 0), ('extract_u8', a, 1))]) - optimizations.extend([(('extract_u8', (op, 'a@32', 8 * i), 0), ('extract_u8', a, i)) for i in range(1, 4)]) - optimizations.extend([(('extract_u8', (op, 'a@64', 8 * i), 0), ('extract_u8', a, i)) for i in range(1, 8)]) - -optimizations.extend([(('extract_u8', ('extract_u16', a, 1), 0), ('extract_u8', a, 2))]) +for extract_op in ('extract_u8', 'extract_i8'): + for op in ('ushr', 'ishr'): + optimizations.extend([((extract_op, (op, a, 8), i), (extract_op, a, i + 1)) for i in range (0, 3)]) + optimizations.extend([((extract_op, (op, 'a@32', 8 * i), 0), (extract_op, a, i)) for i in range(2, 4)]) + optimizations.extend([((extract_op, (op, 'a@64', 8 * i), 0), (extract_op, a, i)) for i in range(2, 8)]) # After the ('extract_[iu]8', a, 3) patterns, above, trigger, there will be # patterns like those below.