From 22fd4cf885a3eb2367c4cbdb2602c48237ad25ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 6 May 2026 09:31:49 +0200 Subject: [PATCH] nir/opt_algebraic: optimize downcast followed by upcast to extract Totals from 217 (0.10% of 208640) affected shaders: (Navi48) Instrs: 283561 -> 282870 (-0.24%) CodeSize: 1604864 -> 1601136 (-0.23%); split: -0.24%, +0.01% Latency: 2992301 -> 2990107 (-0.07%); split: -0.09%, +0.02% InvThroughput: 602722 -> 601316 (-0.23%); split: -0.23%, +0.00% Copies: 26490 -> 26471 (-0.07%); split: -0.10%, +0.03% VALU: 147735 -> 147176 (-0.38%) SALU: 51545 -> 51541 (-0.01%) VOPD: 11140 -> 11204 (+0.57%) --- src/compiler/nir/nir_opt_algebraic.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 9d0cd043504..9b431f998d4 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -2186,6 +2186,16 @@ optimizations.extend([ (('extract_u16', ('extract_i16', a, b), 0), ('extract_u16', a, b)), (('extract_u16', ('extract_u16', a, b), 0), ('extract_u16', a, b)), + # Downcast followed by upcast + (('u2u32', ('u2u8', 'a@32')), ('extract_u8', a, 0), '!options->lower_extract_byte'), + (('u2u32', ('i2i8', 'a@32')), ('extract_u8', a, 0), '!options->lower_extract_byte'), + (('i2i32', ('i2i8', 'a@32')), ('extract_i8', a, 0), '!options->lower_extract_byte'), + (('i2i32', ('u2u8', 'a@32')), ('extract_i8', a, 0), '!options->lower_extract_byte'), + (('u2u32', ('u2u16', 'a@32')), ('extract_u16', a, 0), '!options->lower_extract_word'), + (('u2u32', ('i2i16', 'a@32')), ('extract_u16', a, 0), '!options->lower_extract_word'), + (('i2i32', ('i2i16', 'a@32')), ('extract_i16', a, 0), '!options->lower_extract_word'), + (('i2i32', ('u2u16', 'a@32')), ('extract_i16', a, 0), '!options->lower_extract_word'), + # The extract_X16(a & 0xff) patterns aren't included because the iand will # already be converted to extract_u8. (('extract_i16', ('iand', a, 0x00ff0000), 1), ('extract_u8', a, 2), '!options->lower_extract_byte'), # extract_u8 is correct