From 3c66bcd25b81b3e364acf5cbbcc234cd76166ab9 Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Wed, 29 Apr 2026 17:16:49 -0700 Subject: [PATCH] nir/algebraic: Optimize away re-extracts of packs. Turns this v2i16 OpSDotKHR code on ir3: MESA: info: isam.v.base0 (u16)(xy)hr1.x, r0.x, s#0, t#0 MESA: info: isam.v.base0 (u16)(xy)hr1.z, r0.x, s#0, t#2 MESA: info: (sy)(rpt1)cov.u16u32 r48.w, (last)(r)hr1.x MESA: info: (rpt1)cov.u16u32 r49.y, (last)(r)hr1.z MESA: info: (ss)shlg r49.x, 16, r49.x, r48.w MESA: info: shlg r49.z, 16, r49.z, r49.y MESA: info: shl.b r49.w, r49.x, 16 MESA: info: shl.b r50.x, r49.z, 16 MESA: info: ashr.b r49.w, r49.w, 16 MESA: info: ashr.b r49.x, r49.x, 16 MESA: info: ashr.b r50.x, r50.x, 16 MESA: info: ashr.b r49.z, r49.z, 16 MESA: info: mul.u24 r49.w, r49.w, r50.x MESA: info: mul.s24 r49.x, r49.x, r49.z MESA: info: add.u r49.w, r49.w, r49.x into: MESA: info: isam.v.base0 (u16)(xy)hr1.x, r0.x, s#0, t#0 MESA: info: isam.v.base0 (u16)(xy)hr1.z, r0.x, s#0, t#2 MESA: info: (sy)(rpt1)cov.s16s32 r48.w, (last)(r)hr1.x MESA: info: (rpt1)cov.s16s32 r49.y, (last)(r)hr1.z MESA: info: (ss)mul.s24 r48.w, r48.w, r49.y MESA: info: mul.s24 r49.x, r49.x, r49.z MESA: info: add.u r48.w, r48.w, r49.x No change on shader-db. --- src/compiler/nir/nir_opt_algebraic.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 939bc0f79cd..b0e100c9815 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -2018,6 +2018,20 @@ for pack, bits, compbits in [('pack_64_2x32', 64, 32), ('pack_32_2x16', 32, 16)] ((pack, (unpack, a)), a), ] +# No-op extracts from packs. +for (bits, chans, compbits) in ((32, 4, 8), (32, 2, 16), (64, 4, 16)): + for chan in range(chans): + chanvar = 'abcd'[chan] + optimizations += [((f'extract_i{compbits}', (f'pack_{bits}_{chans}x{compbits}', a), chan), ('i2i', 'a.' + 'xyzw'[chan]))] + optimizations += [((f'extract_u{compbits}', (f'pack_{bits}_{chans}x{compbits}', a), chan), ('u2u', 'a.' + 'xyzw'[chan]))] + if not (bits == 64 and compbits == 16): + if chans == 4: + optimizations += [((f'extract_i{compbits}', (f'pack_{bits}_{chans}x{compbits}_split', a, b, c, d), chan), ('i2i', chanvar))] + optimizations += [((f'extract_u{compbits}', (f'pack_{bits}_{chans}x{compbits}_split', a, b, c, d), chan), ('u2u', chanvar))] + if chans == 2: + optimizations += [((f'extract_i{compbits}', (f'pack_{bits}_{chans}x{compbits}_split', a, b), chan), ('i2i', chanvar))] + optimizations += [((f'extract_u{compbits}', (f'pack_{bits}_{chans}x{compbits}_split', a, b), chan), ('u2u', chanvar))] + optimizations.extend([ (('unpack_64_2x32_split_y', ('u2u64', 'a@1')), 0), (('unpack_64_2x32_split_y', ('u2u64', 'a@8')), 0),