nir/algebraic: shrink 64-bit bitwise operations with 0/-1 constant half

fossil-db (navi21):
Totals from 457 (0.34% of 135636) affected shaders:
Instrs: 259349 -> 250383 (-3.46%)
CodeSize: 1411976 -> 1369136 (-3.03%)
Latency: 2175961 -> 2148158 (-1.28%)
InvThroughput: 502206 -> 490244 (-2.38%)
Copies: 15238 -> 15232 (-0.04%); split: -0.07%, +0.03%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19748>
This commit is contained in:
Rhys Perry 2022-11-15 12:10:40 +00:00 committed by Marge Bot
parent 8133d5551d
commit 368be87255
2 changed files with 74 additions and 4 deletions

View file

@ -1265,6 +1265,40 @@ optimizations.extend([
(('ior', ('ior', a, b), b), ('ior', a, b)),
(('iand', ('ior', a, b), b), b),
(('iand', ('iand', a, b), b), ('iand', a, b)),
(('iand@64', a, '#b(is_lower_half_zero)'),
('pack_64_2x32_split', 0,
('iand', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)))),
(('iand@64', a, '#b(is_upper_half_zero)'),
('pack_64_2x32_split', ('iand', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)),
0)),
(('iand@64', a, '#b(is_lower_half_negative_one)'),
('pack_64_2x32_split', ('unpack_64_2x32_split_x', a),
('iand', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)))),
(('iand@64', a, '#b(is_upper_half_negative_one)'),
('pack_64_2x32_split', ('iand', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)),
('unpack_64_2x32_split_y', a))),
(('ior@64', a, '#b(is_lower_half_zero)'),
('pack_64_2x32_split', ('unpack_64_2x32_split_x', a),
('ior', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)))),
(('ior@64', a, '#b(is_upper_half_zero)'),
('pack_64_2x32_split', ('ior', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)),
('unpack_64_2x32_split_y', a))),
(('ior@64', a, '#b(is_lower_half_negative_one)'),
('pack_64_2x32_split', -1,
('ior', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)))),
(('ior@64', a, '#b(is_upper_half_negative_one)'),
('pack_64_2x32_split', ('ior', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)),
-1)),
(('ixor@64', a, '#b(is_lower_half_zero)'),
('pack_64_2x32_split', ('unpack_64_2x32_split_x', a),
('ixor', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)))),
(('ixor@64', a, '#b(is_upper_half_zero)'),
('pack_64_2x32_split', ('ixor', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)),
('unpack_64_2x32_split_y', a))),
# DeMorgan's Laws
(('iand', ('inot', a), ('inot', b)), ('inot', ('ior', a, b))),
(('ior', ('inot', a), ('inot', b)), ('inot', ('iand', a, b))),

View file

@ -471,7 +471,7 @@ is_upper_half_zero(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
for (unsigned i = 0; i < num_components; i++) {
unsigned half_bit_size = nir_src_bit_size(instr->src[src].src) / 2;
uint32_t high_bits = ((1 << half_bit_size) - 1) << half_bit_size;
uint64_t high_bits = u_bit_consecutive64(half_bit_size, half_bit_size);
if ((nir_src_comp_as_uint(instr->src[src].src,
swizzle[i]) & high_bits) != 0) {
return false;
@ -495,9 +495,45 @@ is_lower_half_zero(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
return false;
for (unsigned i = 0; i < num_components; i++) {
uint32_t low_bits =
(1 << (nir_src_bit_size(instr->src[src].src) / 2)) - 1;
if ((nir_src_comp_as_int(instr->src[src].src, swizzle[i]) & low_bits) != 0)
uint64_t low_bits = u_bit_consecutive64(0, nir_src_bit_size(instr->src[src].src) / 2);
if ((nir_src_comp_as_uint(instr->src[src].src, swizzle[i]) & low_bits) != 0)
return false;
}
return true;
}
static inline bool
is_upper_half_negative_one(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
if (nir_src_as_const_value(instr->src[src].src) == NULL)
return false;
for (unsigned i = 0; i < num_components; i++) {
unsigned half_bit_size = nir_src_bit_size(instr->src[src].src) / 2;
uint64_t high_bits = u_bit_consecutive64(half_bit_size, half_bit_size);
if ((nir_src_comp_as_uint(instr->src[src].src,
swizzle[i]) & high_bits) != high_bits) {
return false;
}
}
return true;
}
static inline bool
is_lower_half_negative_one(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
if (nir_src_as_const_value(instr->src[src].src) == NULL)
return false;
for (unsigned i = 0; i < num_components; i++) {
uint64_t low_bits = u_bit_consecutive64(0, nir_src_bit_size(instr->src[src].src) / 2);
if ((nir_src_comp_as_uint(instr->src[src].src, swizzle[i]) & low_bits) != low_bits)
return false;
}