pan/bi: Switch to 1-bit bools

In prep for FP16. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10627>
2026-03-17 19:00:33 +01:00 · 2021-05-03 19:00:35 -04:00 · 2021-05-03 19:00:35 -04:00 · 2db8048aaa
commit 2db8048aaa
parent f21248c3c2
2 changed files with 81 additions and 95 deletions
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@ -1138,14 +1138,8 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)

        case nir_intrinsic_discard_if: {
                bi_index src = bi_src_index(&instr->src[0]);
-
-                unsigned sz = nir_src_bit_size(instr->src[0]);
-                assert(sz == 16 || sz == 32);
-
-                if (sz == 16)
-                        src = bi_half(src, false);
-
-                bi_discard_f32(b, src, bi_zero(), BI_CMPF_NE);
+                assert(nir_src_bit_size(instr->src[0]) == 1);
+                bi_discard_f32(b, bi_half(src, false), bi_imm_u16(0), BI_CMPF_NE);
                break;
        }

@ -1223,7 +1217,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
 	case nir_intrinsic_load_front_face:
                /* r58 == 0 means primitive is front facing */
                bi_icmp_i32_to(b, dst, bi_register(58), bi_zero(), BI_CMPF_EQ,
-                                BI_RESULT_TYPE_M1);
+                                BI_RESULT_TYPE_I1);
                break;

        case nir_intrinsic_load_point_coord:
@ -1293,6 +1287,10 @@ bi_alu_src_index(nir_alu_src src, unsigned comps)

        unsigned bitsize = nir_src_bit_size(src.src);

+        /* TODO: Do we need to do something more clever with 1-bit bools? */
+        if (bitsize == 1)
+                bitsize = 16;
+
        /* the bi_index carries the 32-bit (word) offset separate from the
         * subword swizzle, first handle the offset */

@ -1344,33 +1342,6 @@ bi_nir_round(nir_op op)
        }
 }

-static enum bi_cmpf
-bi_cmpf_nir(nir_op op)
-{
-        switch (op) {
-        case nir_op_flt32:
-        case nir_op_ilt32:
-        case nir_op_ult32:
-                return BI_CMPF_LT;
-
-        case nir_op_fge32:
-        case nir_op_ige32:
-        case nir_op_uge32:
-                return BI_CMPF_GE;
-
-        case nir_op_feq32:
-        case nir_op_ieq32:
-                return BI_CMPF_EQ;
-
-        case nir_op_fneu32:
-        case nir_op_ine32:
-                return BI_CMPF_NE;
-
-        default:
-                unreachable("Invalid compare");
-        }
-}
-
 /* Convenience for lowered transcendentals */

 static bi_index
@ -1523,6 +1494,64 @@ bi_lower_fsincos_32(bi_builder *b, bi_index dst, bi_index s0, bool cos)
        bi_fadd_f32_to(b, dst, I->dest[0], cos ? cosx : sinx, BI_ROUND_NONE);
 }

+static bi_instr *
+bi_emit_alu_bool(bi_builder *b, unsigned sz, nir_op op,
+      bi_index dst, bi_index s0, bi_index s1, bi_index s2)
+{
+        /* Handle 1-bit bools as zero/nonzero rather than specifically 0/1 or 0/~0.
+         * This will give the optimizer flexibility. */
+        if (sz == 1) sz = 16;
+        bi_index f = bi_zero();
+        bi_index t = bi_imm_uintN(0x1, sz);
+
+        switch (op) {
+        case nir_op_feq:
+                return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_EQ, BI_RESULT_TYPE_I1);
+        case nir_op_flt:
+                return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_LT, BI_RESULT_TYPE_I1);
+        case nir_op_fge:
+                return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_GE, BI_RESULT_TYPE_I1);
+        case nir_op_fneu:
+                return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_NE, BI_RESULT_TYPE_I1);
+
+        case nir_op_ieq:
+                return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_EQ, BI_RESULT_TYPE_I1);
+        case nir_op_ine:
+                return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_NE, BI_RESULT_TYPE_I1);
+        case nir_op_ilt:
+                return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_LT, BI_RESULT_TYPE_I1);
+        case nir_op_ige:
+                return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_GE, BI_RESULT_TYPE_I1);
+        case nir_op_ult:
+                return bi_icmp_to(b, nir_type_uint, sz, dst, s0, s1, BI_CMPF_LT, BI_RESULT_TYPE_I1);
+        case nir_op_uge:
+                return bi_icmp_to(b, nir_type_uint, sz, dst, s0, s1, BI_CMPF_GE, BI_RESULT_TYPE_I1);
+
+        case nir_op_iand:
+                return bi_lshift_and_to(b, sz, dst, s0, s1, bi_imm_u8(0));
+        case nir_op_ior:
+                return bi_lshift_or_to(b, sz, dst, s0, s1, bi_imm_u8(0));
+        case nir_op_ixor:
+                return bi_lshift_xor_to(b, sz, dst, s0, s1, bi_imm_u8(0));
+        case nir_op_inot:
+                return bi_lshift_xor_to(b, sz, dst, s0, t, bi_imm_u8(0));
+
+        case nir_op_f2b1:
+                return bi_csel_to(b, nir_type_int, sz, dst, s0, f, f, t, BI_CMPF_EQ);
+        case nir_op_i2b1:
+                return bi_csel_to(b, nir_type_int, sz, dst, s0, f, f, t, BI_CMPF_EQ);
+        case nir_op_b2b1:
+                return bi_csel_to(b, nir_type_int, sz, dst, s0, f, f, t, BI_CMPF_EQ);
+
+        case nir_op_bcsel:
+                return bi_csel_to(b, nir_type_int, sz, dst, s0, f, s1, s2, BI_CMPF_NE);
+
+        default:
+                fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[op].name);
+                unreachable("Unhandled boolean ALU instruction");
+        }
+}
+
 static void
 bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
 {
@ -1531,6 +1560,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
        unsigned sz = nir_dest_bit_size(instr->dest.dest);
        unsigned comps = nir_dest_num_components(instr->dest.dest);
        unsigned src_sz = srcs > 0 ? nir_src_bit_size(instr->src[0].src) : 0;
+        unsigned src1_sz = srcs > 1 ? nir_src_bit_size(instr->src[1].src) : 0;

        /* Indicate scalarness */
        if ((sz == 1 || sz == 16) && comps == 1)
@ -1604,6 +1634,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
                        comps > 3 ? instr->src[0].swizzle[3] : 0,
                };

+                if (sz == 1) sz = 16;
                bi_make_vec_to(b, dst, unoffset_srcs, channels, comps, sz);
                return;
        }
@ -1656,6 +1687,11 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
        bi_index s1 = srcs > 1 ? bi_alu_src_index(instr->src[1], comps) : bi_null();
        bi_index s2 = srcs > 2 ? bi_alu_src_index(instr->src[2], comps) : bi_null();

+        if (sz == 1) {
+                bi_emit_alu_bool(b, src_sz, instr->op, dst, s0, s1, s2);
+                return;
+        }
+
        switch (instr->op) {
        case nir_op_ffma:
                bi_fma_to(b, sz, dst, s0, s1, s2, BI_ROUND_NONE);
@ -1727,13 +1763,12 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
                break;
        }

-        case nir_op_b8csel:
-        case nir_op_b16csel:
-        case nir_op_b32csel:
-                if (sz == 8)
+        case nir_op_bcsel:
+                if (src1_sz == 8)
                        bi_mux_v4i8_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO);
                else
-                        bi_csel_to(b, nir_type_int, sz, dst, s0, bi_zero(), s1, s2, BI_CMPF_NE);
+                        bi_csel_to(b, nir_type_int, src1_sz,
+                                        dst, s0, bi_zero(), s1, s2, BI_CMPF_NE);
                break;

        case nir_op_ishl:
@ -1747,42 +1782,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
                bi_arshift_to(b, sz, dst, s0, bi_null(), bi_byte(s1, 0));
                break;

-        case nir_op_flt32:
-        case nir_op_fge32:
-        case nir_op_feq32:
-        case nir_op_fneu32:
-                bi_fcmp_to(b, sz, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                BI_RESULT_TYPE_M1);
-                break;
-
-        case nir_op_ieq32:
-        case nir_op_ine32:
-                if (sz == 32) {
-                        bi_icmp_i32_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                        BI_RESULT_TYPE_M1);
-                } else if (sz == 16) {
-                        bi_icmp_v2i16_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                        BI_RESULT_TYPE_M1);
-                } else {
-                        bi_icmp_v4i8_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                        BI_RESULT_TYPE_M1);
-                }
-                break;
-
-        case nir_op_ilt32:
-        case nir_op_ige32:
-                if (sz == 32) {
-                        bi_icmp_s32_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                        BI_RESULT_TYPE_M1);
-                } else if (sz == 16) {
-                        bi_icmp_v2s16_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                        BI_RESULT_TYPE_M1);
-                } else {
-                        bi_icmp_v4s8_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                        BI_RESULT_TYPE_M1);
-                }
-                break;
-
        case nir_op_imin:
        case nir_op_umin:
                bi_csel_to(b, nir_op_infos[instr->op].input_types[0], sz, dst,
@ -1795,20 +1794,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
                                s0, s1, s0, s1, BI_CMPF_GT);
                break;

-        case nir_op_ult32:
-        case nir_op_uge32:
-                if (sz == 32) {
-                        bi_icmp_u32_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                        BI_RESULT_TYPE_M1);
-                } else if (sz == 16) {
-                        bi_icmp_v2u16_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                        BI_RESULT_TYPE_M1);
-                } else {
-                        bi_icmp_v4u8_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
-                                        BI_RESULT_TYPE_M1);
-                }
-                break;
-
        case nir_op_fddx:
        case nir_op_fddy: {
                bi_index lane1 = bi_lshift_and_i32(b,
@ -1946,6 +1931,11 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
                                BI_CMPF_NE);
                break;

+        case nir_op_b2b32:
+                bi_csel_to(b, nir_type_int, sz, dst, s0, bi_zero(),
+                                bi_imm_u32(~0), bi_zero(), BI_CMPF_NE);
+                break;
+
        case nir_op_b2i8:
        case nir_op_b2i16:
        case nir_op_b2i32:
@ -2965,7 +2955,6 @@ bi_optimize_nir(nir_shader *nir, bool is_blend)
                NIR_PASS(progress, nir, nir_opt_cse);
        }

-        NIR_PASS(progress, nir, nir_lower_bool_to_int32);
        NIR_PASS(progress, nir, bifrost_nir_lower_algebraic_late);
        NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);

--- a/src/panfrost/bifrost/bifrost_nir_algebraic.py
+++ b/src/panfrost/bifrost/bifrost_nir_algebraic.py
@ -38,9 +38,6 @@ SPECIAL = ['fexp2', 'flog2', 'fsin', 'fcos']
 for op in SPECIAL:
        algebraic_late += [((op + '@16', a), ('f2f16', (op, ('f2f32', a))))]

-algebraic_late += [(('f2b32', a), ('fneu32', a, 0.0)),
-             (('i2b32', a), ('ine32', a, 0))]
-
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-p', '--import-path', required=True)