diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 421bb18b3f3..86c01f49ee8 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1265,6 +1265,40 @@ optimizations.extend([ (('ior', ('ior', a, b), b), ('ior', a, b)), (('iand', ('ior', a, b), b), b), (('iand', ('iand', a, b), b), ('iand', a, b)), + + (('iand@64', a, '#b(is_lower_half_zero)'), + ('pack_64_2x32_split', 0, + ('iand', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)))), + (('iand@64', a, '#b(is_upper_half_zero)'), + ('pack_64_2x32_split', ('iand', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)), + 0)), + (('iand@64', a, '#b(is_lower_half_negative_one)'), + ('pack_64_2x32_split', ('unpack_64_2x32_split_x', a), + ('iand', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)))), + (('iand@64', a, '#b(is_upper_half_negative_one)'), + ('pack_64_2x32_split', ('iand', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)), + ('unpack_64_2x32_split_y', a))), + + (('ior@64', a, '#b(is_lower_half_zero)'), + ('pack_64_2x32_split', ('unpack_64_2x32_split_x', a), + ('ior', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)))), + (('ior@64', a, '#b(is_upper_half_zero)'), + ('pack_64_2x32_split', ('ior', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)), + ('unpack_64_2x32_split_y', a))), + (('ior@64', a, '#b(is_lower_half_negative_one)'), + ('pack_64_2x32_split', -1, + ('ior', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)))), + (('ior@64', a, '#b(is_upper_half_negative_one)'), + ('pack_64_2x32_split', ('ior', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)), + -1)), + + (('ixor@64', a, '#b(is_lower_half_zero)'), + ('pack_64_2x32_split', ('unpack_64_2x32_split_x', a), + ('ixor', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)))), + (('ixor@64', a, '#b(is_upper_half_zero)'), + ('pack_64_2x32_split', ('ixor', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)), + ('unpack_64_2x32_split_y', a))), + # DeMorgan's Laws (('iand', ('inot', a), ('inot', b)), ('inot', ('ior', a, b))), (('ior', ('inot', a), ('inot', b)), ('inot', ('iand', a, b))), diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index 5308fc41e40..b0288511210 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -471,7 +471,7 @@ is_upper_half_zero(UNUSED struct hash_table *ht, const nir_alu_instr *instr, for (unsigned i = 0; i < num_components; i++) { unsigned half_bit_size = nir_src_bit_size(instr->src[src].src) / 2; - uint32_t high_bits = ((1 << half_bit_size) - 1) << half_bit_size; + uint64_t high_bits = u_bit_consecutive64(half_bit_size, half_bit_size); if ((nir_src_comp_as_uint(instr->src[src].src, swizzle[i]) & high_bits) != 0) { return false; @@ -495,9 +495,45 @@ is_lower_half_zero(UNUSED struct hash_table *ht, const nir_alu_instr *instr, return false; for (unsigned i = 0; i < num_components; i++) { - uint32_t low_bits = - (1 << (nir_src_bit_size(instr->src[src].src) / 2)) - 1; - if ((nir_src_comp_as_int(instr->src[src].src, swizzle[i]) & low_bits) != 0) + uint64_t low_bits = u_bit_consecutive64(0, nir_src_bit_size(instr->src[src].src) / 2); + if ((nir_src_comp_as_uint(instr->src[src].src, swizzle[i]) & low_bits) != 0) + return false; + } + + return true; +} + +static inline bool +is_upper_half_negative_one(UNUSED struct hash_table *ht, const nir_alu_instr *instr, + unsigned src, unsigned num_components, + const uint8_t *swizzle) +{ + if (nir_src_as_const_value(instr->src[src].src) == NULL) + return false; + + for (unsigned i = 0; i < num_components; i++) { + unsigned half_bit_size = nir_src_bit_size(instr->src[src].src) / 2; + uint64_t high_bits = u_bit_consecutive64(half_bit_size, half_bit_size); + if ((nir_src_comp_as_uint(instr->src[src].src, + swizzle[i]) & high_bits) != high_bits) { + return false; + } + } + + return true; +} + +static inline bool +is_lower_half_negative_one(UNUSED struct hash_table *ht, const nir_alu_instr *instr, + unsigned src, unsigned num_components, + const uint8_t *swizzle) +{ + if (nir_src_as_const_value(instr->src[src].src) == NULL) + return false; + + for (unsigned i = 0; i < num_components; i++) { + uint64_t low_bits = u_bit_consecutive64(0, nir_src_bit_size(instr->src[src].src) / 2); + if ((nir_src_comp_as_uint(instr->src[src].src, swizzle[i]) & low_bits) != low_bits) return false; }