From 75b1fa9263c21f06c4231bc11fc66adae89f4af9 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Wed, 24 Apr 2024 08:51:14 +0200 Subject: [PATCH] nir/opt_algebraic: alternative 8bit pack_[us]norm_4x8 lowering Foz-DB Navi21: Totals from 42 (0.05% of 79395) affected shaders: Instrs: 2709529 -> 2705848 (-0.14%) CodeSize: 14720732 -> 14711384 (-0.06%); split: -0.06%, +0.00% VGPRs: 4096 -> 4104 (+0.20%) Latency: 17907612 -> 17904468 (-0.02%); split: -0.02%, +0.00% InvThroughput: 4723551 -> 4722649 (-0.02%); split: -0.02%, +0.00% Copies: 223516 -> 219819 (-1.65%) Branches: 109578 -> 109594 (+0.01%); split: -0.00%, +0.02% VALU: 1730848 -> 1727151 (-0.21%) Tested-by: Ian Romanick Reviewed-by: Jason Ekstrand Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 3d3be0702d1..73570d66adb 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -2240,7 +2240,12 @@ optimizations.extend([ (('pack_unorm_4x8', 'v'), ('pack_uvec4_to_uint', ('f2u32', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))), - 'options->lower_pack_unorm_4x8'), + 'options->lower_pack_unorm_4x8 && !options->has_pack_32_4x8'), + + (('pack_unorm_4x8', 'v'), + ('pack_32_4x8', + ('f2u8', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))), + 'options->lower_pack_unorm_4x8 && options->has_pack_32_4x8'), (('pack_snorm_2x16', 'v'), ('pack_uvec2_to_uint', @@ -2250,7 +2255,12 @@ optimizations.extend([ (('pack_snorm_4x8', 'v'), ('pack_uvec4_to_uint', ('f2i32', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))), - 'options->lower_pack_snorm_4x8'), + 'options->lower_pack_snorm_4x8 && !options->has_pack_32_4x8'), + + (('pack_snorm_4x8', 'v'), + ('pack_32_4x8', + ('f2i8', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))), + 'options->lower_pack_snorm_4x8 && options->has_pack_32_4x8'), (('unpack_unorm_2x16', 'v'), ('fdiv', ('u2f32', ('vec2', ('extract_u16', 'v', 0),