diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 399cbf534fe..dbd4295303e 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -1138,14 +1138,8 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) case nir_intrinsic_discard_if: { bi_index src = bi_src_index(&instr->src[0]); - - unsigned sz = nir_src_bit_size(instr->src[0]); - assert(sz == 16 || sz == 32); - - if (sz == 16) - src = bi_half(src, false); - - bi_discard_f32(b, src, bi_zero(), BI_CMPF_NE); + assert(nir_src_bit_size(instr->src[0]) == 1); + bi_discard_f32(b, bi_half(src, false), bi_imm_u16(0), BI_CMPF_NE); break; } @@ -1223,7 +1217,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) case nir_intrinsic_load_front_face: /* r58 == 0 means primitive is front facing */ bi_icmp_i32_to(b, dst, bi_register(58), bi_zero(), BI_CMPF_EQ, - BI_RESULT_TYPE_M1); + BI_RESULT_TYPE_I1); break; case nir_intrinsic_load_point_coord: @@ -1293,6 +1287,10 @@ bi_alu_src_index(nir_alu_src src, unsigned comps) unsigned bitsize = nir_src_bit_size(src.src); + /* TODO: Do we need to do something more clever with 1-bit bools? */ + if (bitsize == 1) + bitsize = 16; + /* the bi_index carries the 32-bit (word) offset separate from the * subword swizzle, first handle the offset */ @@ -1344,33 +1342,6 @@ bi_nir_round(nir_op op) } } -static enum bi_cmpf -bi_cmpf_nir(nir_op op) -{ - switch (op) { - case nir_op_flt32: - case nir_op_ilt32: - case nir_op_ult32: - return BI_CMPF_LT; - - case nir_op_fge32: - case nir_op_ige32: - case nir_op_uge32: - return BI_CMPF_GE; - - case nir_op_feq32: - case nir_op_ieq32: - return BI_CMPF_EQ; - - case nir_op_fneu32: - case nir_op_ine32: - return BI_CMPF_NE; - - default: - unreachable("Invalid compare"); - } -} - /* Convenience for lowered transcendentals */ static bi_index @@ -1523,6 +1494,64 @@ bi_lower_fsincos_32(bi_builder *b, bi_index dst, bi_index s0, bool cos) bi_fadd_f32_to(b, dst, I->dest[0], cos ? cosx : sinx, BI_ROUND_NONE); } +static bi_instr * +bi_emit_alu_bool(bi_builder *b, unsigned sz, nir_op op, + bi_index dst, bi_index s0, bi_index s1, bi_index s2) +{ + /* Handle 1-bit bools as zero/nonzero rather than specifically 0/1 or 0/~0. + * This will give the optimizer flexibility. */ + if (sz == 1) sz = 16; + bi_index f = bi_zero(); + bi_index t = bi_imm_uintN(0x1, sz); + + switch (op) { + case nir_op_feq: + return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_EQ, BI_RESULT_TYPE_I1); + case nir_op_flt: + return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_LT, BI_RESULT_TYPE_I1); + case nir_op_fge: + return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_GE, BI_RESULT_TYPE_I1); + case nir_op_fneu: + return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_NE, BI_RESULT_TYPE_I1); + + case nir_op_ieq: + return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_EQ, BI_RESULT_TYPE_I1); + case nir_op_ine: + return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_NE, BI_RESULT_TYPE_I1); + case nir_op_ilt: + return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_LT, BI_RESULT_TYPE_I1); + case nir_op_ige: + return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_GE, BI_RESULT_TYPE_I1); + case nir_op_ult: + return bi_icmp_to(b, nir_type_uint, sz, dst, s0, s1, BI_CMPF_LT, BI_RESULT_TYPE_I1); + case nir_op_uge: + return bi_icmp_to(b, nir_type_uint, sz, dst, s0, s1, BI_CMPF_GE, BI_RESULT_TYPE_I1); + + case nir_op_iand: + return bi_lshift_and_to(b, sz, dst, s0, s1, bi_imm_u8(0)); + case nir_op_ior: + return bi_lshift_or_to(b, sz, dst, s0, s1, bi_imm_u8(0)); + case nir_op_ixor: + return bi_lshift_xor_to(b, sz, dst, s0, s1, bi_imm_u8(0)); + case nir_op_inot: + return bi_lshift_xor_to(b, sz, dst, s0, t, bi_imm_u8(0)); + + case nir_op_f2b1: + return bi_csel_to(b, nir_type_int, sz, dst, s0, f, f, t, BI_CMPF_EQ); + case nir_op_i2b1: + return bi_csel_to(b, nir_type_int, sz, dst, s0, f, f, t, BI_CMPF_EQ); + case nir_op_b2b1: + return bi_csel_to(b, nir_type_int, sz, dst, s0, f, f, t, BI_CMPF_EQ); + + case nir_op_bcsel: + return bi_csel_to(b, nir_type_int, sz, dst, s0, f, s1, s2, BI_CMPF_NE); + + default: + fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[op].name); + unreachable("Unhandled boolean ALU instruction"); + } +} + static void bi_emit_alu(bi_builder *b, nir_alu_instr *instr) { @@ -1531,6 +1560,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) unsigned sz = nir_dest_bit_size(instr->dest.dest); unsigned comps = nir_dest_num_components(instr->dest.dest); unsigned src_sz = srcs > 0 ? nir_src_bit_size(instr->src[0].src) : 0; + unsigned src1_sz = srcs > 1 ? nir_src_bit_size(instr->src[1].src) : 0; /* Indicate scalarness */ if ((sz == 1 || sz == 16) && comps == 1) @@ -1604,6 +1634,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) comps > 3 ? instr->src[0].swizzle[3] : 0, }; + if (sz == 1) sz = 16; bi_make_vec_to(b, dst, unoffset_srcs, channels, comps, sz); return; } @@ -1656,6 +1687,11 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) bi_index s1 = srcs > 1 ? bi_alu_src_index(instr->src[1], comps) : bi_null(); bi_index s2 = srcs > 2 ? bi_alu_src_index(instr->src[2], comps) : bi_null(); + if (sz == 1) { + bi_emit_alu_bool(b, src_sz, instr->op, dst, s0, s1, s2); + return; + } + switch (instr->op) { case nir_op_ffma: bi_fma_to(b, sz, dst, s0, s1, s2, BI_ROUND_NONE); @@ -1727,13 +1763,12 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) break; } - case nir_op_b8csel: - case nir_op_b16csel: - case nir_op_b32csel: - if (sz == 8) + case nir_op_bcsel: + if (src1_sz == 8) bi_mux_v4i8_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO); else - bi_csel_to(b, nir_type_int, sz, dst, s0, bi_zero(), s1, s2, BI_CMPF_NE); + bi_csel_to(b, nir_type_int, src1_sz, + dst, s0, bi_zero(), s1, s2, BI_CMPF_NE); break; case nir_op_ishl: @@ -1747,42 +1782,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) bi_arshift_to(b, sz, dst, s0, bi_null(), bi_byte(s1, 0)); break; - case nir_op_flt32: - case nir_op_fge32: - case nir_op_feq32: - case nir_op_fneu32: - bi_fcmp_to(b, sz, dst, s0, s1, bi_cmpf_nir(instr->op), - BI_RESULT_TYPE_M1); - break; - - case nir_op_ieq32: - case nir_op_ine32: - if (sz == 32) { - bi_icmp_i32_to(b, dst, s0, s1, bi_cmpf_nir(instr->op), - BI_RESULT_TYPE_M1); - } else if (sz == 16) { - bi_icmp_v2i16_to(b, dst, s0, s1, bi_cmpf_nir(instr->op), - BI_RESULT_TYPE_M1); - } else { - bi_icmp_v4i8_to(b, dst, s0, s1, bi_cmpf_nir(instr->op), - BI_RESULT_TYPE_M1); - } - break; - - case nir_op_ilt32: - case nir_op_ige32: - if (sz == 32) { - bi_icmp_s32_to(b, dst, s0, s1, bi_cmpf_nir(instr->op), - BI_RESULT_TYPE_M1); - } else if (sz == 16) { - bi_icmp_v2s16_to(b, dst, s0, s1, bi_cmpf_nir(instr->op), - BI_RESULT_TYPE_M1); - } else { - bi_icmp_v4s8_to(b, dst, s0, s1, bi_cmpf_nir(instr->op), - BI_RESULT_TYPE_M1); - } - break; - case nir_op_imin: case nir_op_umin: bi_csel_to(b, nir_op_infos[instr->op].input_types[0], sz, dst, @@ -1795,20 +1794,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) s0, s1, s0, s1, BI_CMPF_GT); break; - case nir_op_ult32: - case nir_op_uge32: - if (sz == 32) { - bi_icmp_u32_to(b, dst, s0, s1, bi_cmpf_nir(instr->op), - BI_RESULT_TYPE_M1); - } else if (sz == 16) { - bi_icmp_v2u16_to(b, dst, s0, s1, bi_cmpf_nir(instr->op), - BI_RESULT_TYPE_M1); - } else { - bi_icmp_v4u8_to(b, dst, s0, s1, bi_cmpf_nir(instr->op), - BI_RESULT_TYPE_M1); - } - break; - case nir_op_fddx: case nir_op_fddy: { bi_index lane1 = bi_lshift_and_i32(b, @@ -1946,6 +1931,11 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) BI_CMPF_NE); break; + case nir_op_b2b32: + bi_csel_to(b, nir_type_int, sz, dst, s0, bi_zero(), + bi_imm_u32(~0), bi_zero(), BI_CMPF_NE); + break; + case nir_op_b2i8: case nir_op_b2i16: case nir_op_b2i32: @@ -2965,7 +2955,6 @@ bi_optimize_nir(nir_shader *nir, bool is_blend) NIR_PASS(progress, nir, nir_opt_cse); } - NIR_PASS(progress, nir, nir_lower_bool_to_int32); NIR_PASS(progress, nir, bifrost_nir_lower_algebraic_late); NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL); diff --git a/src/panfrost/bifrost/bifrost_nir_algebraic.py b/src/panfrost/bifrost/bifrost_nir_algebraic.py index 955d914c82b..cad2ffb5021 100644 --- a/src/panfrost/bifrost/bifrost_nir_algebraic.py +++ b/src/panfrost/bifrost/bifrost_nir_algebraic.py @@ -38,9 +38,6 @@ SPECIAL = ['fexp2', 'flog2', 'fsin', 'fcos'] for op in SPECIAL: algebraic_late += [((op + '@16', a), ('f2f16', (op, ('f2f32', a))))] -algebraic_late += [(('f2b32', a), ('fneu32', a, 0.0)), - (('i2b32', a), ('ine32', a, 0))] - def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', '--import-path', required=True)