From b37804c8de8bf6babefb9a3db65a3485cfcdc79e Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 26 Jan 2024 20:48:32 +0000 Subject: [PATCH] nir/algebraic: optimize 64-bit comparisons with zero'd halves to 32-bit These expect nir_lower_int64 to replace u2u64 to pack_64_2x32_split(, 0). fossil-db (navi31): Totals from 149 (0.19% of 79242) affected shaders: Instrs: 433095 -> 431830 (-0.29%); split: -0.29%, +0.00% CodeSize: 2165980 -> 2160284 (-0.26%); split: -0.27%, +0.00% SpillSGPRs: 689 -> 688 (-0.15%) Latency: 3801497 -> 3799901 (-0.04%); split: -0.05%, +0.01% InvThroughput: 1547916 -> 1546567 (-0.09%); split: -0.09%, +0.01% VClause: 4698 -> 4693 (-0.11%) SClause: 9981 -> 9977 (-0.04%); split: -0.05%, +0.01% Copies: 66148 -> 65431 (-1.08%); split: -1.09%, +0.01% PreSGPRs: 6732 -> 6729 (-0.04%) PreVGPRs: 7976 -> 7945 (-0.39%) VALU: 252936 -> 252336 (-0.24%) SALU: 51794 -> 51274 (-1.00%); split: -1.03%, +0.02% Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 06b4a2111e6..9bb8b1c7dcf 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1811,6 +1811,19 @@ for op in ('extract_u8', 'extract_i8'): optimizations.extend([((op, ('ishl', 'a@32', 24 - 8 * i), 3), (op, a, i)) for i in range(2, -1, -1)]) optimizations.extend([((op, ('ishl', 'a@64', 56 - 8 * i), 7), (op, a, i)) for i in range(6, -1, -1)]) +for op, repl in [('ieq', 'ieq'), ('ine', 'ine'), + ('ult', 'ult'), ('ilt', 'ult'), + ('uge', 'uge'), ('ige', 'uge')]: + optimizations.extend([ + ((op, ('pack_64_2x32_split', a, 0), ('pack_64_2x32_split', b, 0)), (repl, a, b)), + ((op, ('pack_64_2x32_split', a, 0), '#b(is_upper_half_zero)'), (repl, a, ('unpack_64_2x32_split_x', b))), + ((op, '#a(is_upper_half_zero)', ('pack_64_2x32_split', b, 0)), (repl, ('unpack_64_2x32_split_x', a), b)), + + ((op, ('pack_64_2x32_split', 0, a), ('pack_64_2x32_split', 0, b)), (op, a, b)), + ((op, ('pack_64_2x32_split', 0, a), '#b(is_lower_half_zero)'), (op, a, ('unpack_64_2x32_split_y', b))), + ((op, '#a(is_lower_half_zero)', ('pack_64_2x32_split', 0, b)), (op, ('unpack_64_2x32_split_y', a), b)), + ]) + optimizations.extend([ # Subtracts (('ussub_4x8_vc4', a, 0), a),