diff --git a/src/compiler/nir/nir_opt_generate_bfi.c b/src/compiler/nir/nir_opt_generate_bfi.c index 7966f2ade90..63289d9a05b 100644 --- a/src/compiler/nir/nir_opt_generate_bfi.c +++ b/src/compiler/nir/nir_opt_generate_bfi.c @@ -27,7 +27,7 @@ #include "nir_builder.h" static bool -parse_iand(nir_scalar alu, nir_scalar *value, uint32_t *mask) +parse_iand(nir_scalar alu, nir_scalar *value, uint64_t *mask) { if (nir_scalar_alu_op(alu) == nir_op_iand) { /* If both source are constants, do not perform the conversion. There @@ -59,7 +59,7 @@ parse_iand(nir_scalar alu, nir_scalar *value, uint32_t *mask) return false; if (nir_scalar_as_uint(nir_scalar_chase_alu_src(alu, 1)) == 0) { - *mask = nir_scalar_alu_op(alu) == nir_op_extract_u16 ? 0x0000ffff : 0x000000ff; + *mask = nir_scalar_alu_op(alu) == nir_op_extract_u16 ? 0xffff : 0xff; *value = left; return true; } @@ -74,75 +74,90 @@ nir_opt_generate_bfi_instr(nir_builder *b, UNUSED void *cb_data) { /* Since none of the source bits will overlap, these are equvalent. */ - if ((alu->op != nir_op_ior && - alu->op != nir_op_ixor && - alu->op != nir_op_iadd) || - alu->def.num_components != 1 || alu->def.bit_size != 32) + if (alu->op != nir_op_ior && + alu->op != nir_op_ixor && + alu->op != nir_op_iadd) return false; - nir_scalar alu_scalar = nir_get_scalar(&alu->def, 0); - nir_scalar left = nir_scalar_chase_alu_src(alu_scalar, 0); - nir_scalar right = nir_scalar_chase_alu_src(alu_scalar, 1); - - if (!nir_scalar_is_alu(left) || !nir_scalar_is_alu(right)) + if (alu->def.bit_size == 1) return false; - nir_scalar src1; - nir_scalar src2; - uint32_t mask1; - uint32_t mask2; - - if (!parse_iand(left, &src1, &mask1)) + /* bfi only supports 32bit. */ + if (!b->shader->options->has_bitfield_select && alu->def.bit_size != 32) return false; - if (!parse_iand(right, &src2, &mask2)) - return false; + nir_scalar insert[NIR_MAX_VEC_COMPONENTS]; + nir_scalar base[NIR_MAX_VEC_COMPONENTS]; + nir_const_value mask_cvals[NIR_MAX_VEC_COMPONENTS]; - if (mask1 != ~mask2) - return false; + for (unsigned i = 0; i < alu->def.num_components; i++) { + nir_scalar alu_scalar = nir_get_scalar(&alu->def, i); + nir_scalar left = nir_scalar_chase_alu_src(alu_scalar, 0); + nir_scalar right = nir_scalar_chase_alu_src(alu_scalar, 1); - nir_scalar insert; - nir_scalar base; - uint32_t mask; + if (!nir_scalar_is_alu(left) || !nir_scalar_is_alu(right)) + return false; - /* The mask used by the bfi instruction must be odd. When the mask is odd, - * the implict shift applied by the bfi is by zero bits. Since one of the - * masks must be odd, the rule can always be applied. - * - * bitfield_select does not have this restriction, but it doesn't hurt. - */ - if ((mask1 & 1) != 0) { - /* Because mask1 == ~mask2. */ - assert((mask2 & 1) == 0); + nir_scalar src1; + nir_scalar src2; + uint64_t mask1; + uint64_t mask2; - mask = mask1; - insert = src1; - base = src2; - } else { - /* Because mask1 == ~mask2. */ - assert((mask2 & 1) != 0); + if (!parse_iand(left, &src1, &mask1)) + return false; - mask = mask2; - insert = src2; - base = src1; + if (!parse_iand(right, &src2, &mask2)) + return false; + + if (mask1 != (~mask2 & BITFIELD64_MASK(alu->def.bit_size))) + return false; + + /* The mask used by the bfi instruction must be odd. When the mask is odd, + * the implict shift applied by the bfi is by zero bits. Since one of the + * masks must be odd, the rule can always be applied. + * + * bitfield_select does not have this restriction, but don't do it for vectors + * because swapping only part of the components would hurt. + */ + uint64_t mask; + if (b->shader->options->has_bitfield_select && alu->def.num_components > 1) { + /* Just pick one. */ + mask = mask1; + insert[i] = src1; + base[i] = src2; + } else if ((mask1 & 1) != 0) { + /* Because mask1 == ~mask2. */ + assert((mask2 & 1) == 0); + + mask = mask1; + insert[i] = src1; + base[i] = src2; + } else { + /* Because mask1 == ~mask2. */ + assert((mask2 & 1) != 0); + + mask = mask2; + insert[i] = src2; + base[i] = src1; + } + + mask_cvals[i] = nir_const_value_for_uint(mask, alu->def.bit_size); } b->cursor = nir_before_instr(&alu->instr); + nir_def *mask_vec = nir_build_imm(b, alu->def.num_components, alu->def.bit_size, mask_cvals); + nir_def *insert_vec = nir_vec_scalars(b, insert, alu->def.num_components); + nir_def *base_vec = nir_vec_scalars(b, base, alu->def.num_components); + nir_def *bfi; - if (b->shader->options->has_bfi) { - bfi = nir_bfi(b, - nir_imm_int(b, mask), - nir_mov_scalar(b, insert), - nir_mov_scalar(b, base)); + if (b->shader->options->has_bitfield_select) { + bfi = nir_bitfield_select(b, mask_vec, insert_vec, base_vec); } else { - assert(b->shader->options->has_bitfield_select); + assert(b->shader->options->has_bfi); - bfi = nir_bitfield_select(b, - nir_imm_int(b, mask), - nir_mov_scalar(b, insert), - nir_mov_scalar(b, base)); + bfi = nir_bfi(b, mask_vec, insert_vec, base_vec); } nir_def_replace(&alu->def, bfi);