nir/algebraic: shrink 64-bit bitwise operations with 0/-1 constant half

fossil-db (navi21):
Totals from 457 (0.34% of 135636) affected shaders:
Instrs: 259349 -> 250383 (-3.46%)
CodeSize: 1411976 -> 1369136 (-3.03%)
Latency: 2175961 -> 2148158 (-1.28%)
InvThroughput: 502206 -> 490244 (-2.38%)
Copies: 15238 -> 15232 (-0.04%); split: -0.07%, +0.03%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19748>
This commit is contained in:
Rhys Perry 2022-11-15 12:10:40 +00:00 committed by Marge Bot
parent 8133d5551d
commit 368be87255
2 changed files with 74 additions and 4 deletions

View file

@ -1265,6 +1265,40 @@ optimizations.extend([
(('ior', ('ior', a, b), b), ('ior', a, b)), (('ior', ('ior', a, b), b), ('ior', a, b)),
(('iand', ('ior', a, b), b), b), (('iand', ('ior', a, b), b), b),
(('iand', ('iand', a, b), b), ('iand', a, b)), (('iand', ('iand', a, b), b), ('iand', a, b)),
(('iand@64', a, '#b(is_lower_half_zero)'),
('pack_64_2x32_split', 0,
('iand', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)))),
(('iand@64', a, '#b(is_upper_half_zero)'),
('pack_64_2x32_split', ('iand', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)),
0)),
(('iand@64', a, '#b(is_lower_half_negative_one)'),
('pack_64_2x32_split', ('unpack_64_2x32_split_x', a),
('iand', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)))),
(('iand@64', a, '#b(is_upper_half_negative_one)'),
('pack_64_2x32_split', ('iand', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)),
('unpack_64_2x32_split_y', a))),
(('ior@64', a, '#b(is_lower_half_zero)'),
('pack_64_2x32_split', ('unpack_64_2x32_split_x', a),
('ior', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)))),
(('ior@64', a, '#b(is_upper_half_zero)'),
('pack_64_2x32_split', ('ior', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)),
('unpack_64_2x32_split_y', a))),
(('ior@64', a, '#b(is_lower_half_negative_one)'),
('pack_64_2x32_split', -1,
('ior', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)))),
(('ior@64', a, '#b(is_upper_half_negative_one)'),
('pack_64_2x32_split', ('ior', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)),
-1)),
(('ixor@64', a, '#b(is_lower_half_zero)'),
('pack_64_2x32_split', ('unpack_64_2x32_split_x', a),
('ixor', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)))),
(('ixor@64', a, '#b(is_upper_half_zero)'),
('pack_64_2x32_split', ('ixor', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)),
('unpack_64_2x32_split_y', a))),
# DeMorgan's Laws # DeMorgan's Laws
(('iand', ('inot', a), ('inot', b)), ('inot', ('ior', a, b))), (('iand', ('inot', a), ('inot', b)), ('inot', ('ior', a, b))),
(('ior', ('inot', a), ('inot', b)), ('inot', ('iand', a, b))), (('ior', ('inot', a), ('inot', b)), ('inot', ('iand', a, b))),

View file

@ -471,7 +471,7 @@ is_upper_half_zero(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
for (unsigned i = 0; i < num_components; i++) { for (unsigned i = 0; i < num_components; i++) {
unsigned half_bit_size = nir_src_bit_size(instr->src[src].src) / 2; unsigned half_bit_size = nir_src_bit_size(instr->src[src].src) / 2;
uint32_t high_bits = ((1 << half_bit_size) - 1) << half_bit_size; uint64_t high_bits = u_bit_consecutive64(half_bit_size, half_bit_size);
if ((nir_src_comp_as_uint(instr->src[src].src, if ((nir_src_comp_as_uint(instr->src[src].src,
swizzle[i]) & high_bits) != 0) { swizzle[i]) & high_bits) != 0) {
return false; return false;
@ -495,9 +495,45 @@ is_lower_half_zero(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
return false; return false;
for (unsigned i = 0; i < num_components; i++) { for (unsigned i = 0; i < num_components; i++) {
uint32_t low_bits = uint64_t low_bits = u_bit_consecutive64(0, nir_src_bit_size(instr->src[src].src) / 2);
(1 << (nir_src_bit_size(instr->src[src].src) / 2)) - 1; if ((nir_src_comp_as_uint(instr->src[src].src, swizzle[i]) & low_bits) != 0)
if ((nir_src_comp_as_int(instr->src[src].src, swizzle[i]) & low_bits) != 0) return false;
}
return true;
}
static inline bool
is_upper_half_negative_one(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
if (nir_src_as_const_value(instr->src[src].src) == NULL)
return false;
for (unsigned i = 0; i < num_components; i++) {
unsigned half_bit_size = nir_src_bit_size(instr->src[src].src) / 2;
uint64_t high_bits = u_bit_consecutive64(half_bit_size, half_bit_size);
if ((nir_src_comp_as_uint(instr->src[src].src,
swizzle[i]) & high_bits) != high_bits) {
return false;
}
}
return true;
}
static inline bool
is_lower_half_negative_one(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
if (nir_src_as_const_value(instr->src[src].src) == NULL)
return false;
for (unsigned i = 0; i < num_components; i++) {
uint64_t low_bits = u_bit_consecutive64(0, nir_src_bit_size(instr->src[src].src) / 2);
if ((nir_src_comp_as_uint(instr->src[src].src, swizzle[i]) & low_bits) != low_bits)
return false; return false;
} }