pan/bi: Switch to 1-bit bools

In prep for FP16.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10627>
This commit is contained in:
Alyssa Rosenzweig 2021-05-03 19:00:35 -04:00 committed by Marge Bot
parent f21248c3c2
commit 2db8048aaa
2 changed files with 81 additions and 95 deletions

View file

@ -1138,14 +1138,8 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
case nir_intrinsic_discard_if: {
bi_index src = bi_src_index(&instr->src[0]);
unsigned sz = nir_src_bit_size(instr->src[0]);
assert(sz == 16 || sz == 32);
if (sz == 16)
src = bi_half(src, false);
bi_discard_f32(b, src, bi_zero(), BI_CMPF_NE);
assert(nir_src_bit_size(instr->src[0]) == 1);
bi_discard_f32(b, bi_half(src, false), bi_imm_u16(0), BI_CMPF_NE);
break;
}
@ -1223,7 +1217,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
case nir_intrinsic_load_front_face:
/* r58 == 0 means primitive is front facing */
bi_icmp_i32_to(b, dst, bi_register(58), bi_zero(), BI_CMPF_EQ,
BI_RESULT_TYPE_M1);
BI_RESULT_TYPE_I1);
break;
case nir_intrinsic_load_point_coord:
@ -1293,6 +1287,10 @@ bi_alu_src_index(nir_alu_src src, unsigned comps)
unsigned bitsize = nir_src_bit_size(src.src);
/* TODO: Do we need to do something more clever with 1-bit bools? */
if (bitsize == 1)
bitsize = 16;
/* the bi_index carries the 32-bit (word) offset separate from the
* subword swizzle, first handle the offset */
@ -1344,33 +1342,6 @@ bi_nir_round(nir_op op)
}
}
static enum bi_cmpf
bi_cmpf_nir(nir_op op)
{
switch (op) {
case nir_op_flt32:
case nir_op_ilt32:
case nir_op_ult32:
return BI_CMPF_LT;
case nir_op_fge32:
case nir_op_ige32:
case nir_op_uge32:
return BI_CMPF_GE;
case nir_op_feq32:
case nir_op_ieq32:
return BI_CMPF_EQ;
case nir_op_fneu32:
case nir_op_ine32:
return BI_CMPF_NE;
default:
unreachable("Invalid compare");
}
}
/* Convenience for lowered transcendentals */
static bi_index
@ -1523,6 +1494,64 @@ bi_lower_fsincos_32(bi_builder *b, bi_index dst, bi_index s0, bool cos)
bi_fadd_f32_to(b, dst, I->dest[0], cos ? cosx : sinx, BI_ROUND_NONE);
}
static bi_instr *
bi_emit_alu_bool(bi_builder *b, unsigned sz, nir_op op,
bi_index dst, bi_index s0, bi_index s1, bi_index s2)
{
/* Handle 1-bit bools as zero/nonzero rather than specifically 0/1 or 0/~0.
* This will give the optimizer flexibility. */
if (sz == 1) sz = 16;
bi_index f = bi_zero();
bi_index t = bi_imm_uintN(0x1, sz);
switch (op) {
case nir_op_feq:
return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_EQ, BI_RESULT_TYPE_I1);
case nir_op_flt:
return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_LT, BI_RESULT_TYPE_I1);
case nir_op_fge:
return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_GE, BI_RESULT_TYPE_I1);
case nir_op_fneu:
return bi_fcmp_to(b, sz, dst, s0, s1, BI_CMPF_NE, BI_RESULT_TYPE_I1);
case nir_op_ieq:
return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_EQ, BI_RESULT_TYPE_I1);
case nir_op_ine:
return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_NE, BI_RESULT_TYPE_I1);
case nir_op_ilt:
return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_LT, BI_RESULT_TYPE_I1);
case nir_op_ige:
return bi_icmp_to(b, nir_type_int, sz, dst, s0, s1, BI_CMPF_GE, BI_RESULT_TYPE_I1);
case nir_op_ult:
return bi_icmp_to(b, nir_type_uint, sz, dst, s0, s1, BI_CMPF_LT, BI_RESULT_TYPE_I1);
case nir_op_uge:
return bi_icmp_to(b, nir_type_uint, sz, dst, s0, s1, BI_CMPF_GE, BI_RESULT_TYPE_I1);
case nir_op_iand:
return bi_lshift_and_to(b, sz, dst, s0, s1, bi_imm_u8(0));
case nir_op_ior:
return bi_lshift_or_to(b, sz, dst, s0, s1, bi_imm_u8(0));
case nir_op_ixor:
return bi_lshift_xor_to(b, sz, dst, s0, s1, bi_imm_u8(0));
case nir_op_inot:
return bi_lshift_xor_to(b, sz, dst, s0, t, bi_imm_u8(0));
case nir_op_f2b1:
return bi_csel_to(b, nir_type_int, sz, dst, s0, f, f, t, BI_CMPF_EQ);
case nir_op_i2b1:
return bi_csel_to(b, nir_type_int, sz, dst, s0, f, f, t, BI_CMPF_EQ);
case nir_op_b2b1:
return bi_csel_to(b, nir_type_int, sz, dst, s0, f, f, t, BI_CMPF_EQ);
case nir_op_bcsel:
return bi_csel_to(b, nir_type_int, sz, dst, s0, f, s1, s2, BI_CMPF_NE);
default:
fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[op].name);
unreachable("Unhandled boolean ALU instruction");
}
}
static void
bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
{
@ -1531,6 +1560,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
unsigned sz = nir_dest_bit_size(instr->dest.dest);
unsigned comps = nir_dest_num_components(instr->dest.dest);
unsigned src_sz = srcs > 0 ? nir_src_bit_size(instr->src[0].src) : 0;
unsigned src1_sz = srcs > 1 ? nir_src_bit_size(instr->src[1].src) : 0;
/* Indicate scalarness */
if ((sz == 1 || sz == 16) && comps == 1)
@ -1604,6 +1634,7 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
comps > 3 ? instr->src[0].swizzle[3] : 0,
};
if (sz == 1) sz = 16;
bi_make_vec_to(b, dst, unoffset_srcs, channels, comps, sz);
return;
}
@ -1656,6 +1687,11 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
bi_index s1 = srcs > 1 ? bi_alu_src_index(instr->src[1], comps) : bi_null();
bi_index s2 = srcs > 2 ? bi_alu_src_index(instr->src[2], comps) : bi_null();
if (sz == 1) {
bi_emit_alu_bool(b, src_sz, instr->op, dst, s0, s1, s2);
return;
}
switch (instr->op) {
case nir_op_ffma:
bi_fma_to(b, sz, dst, s0, s1, s2, BI_ROUND_NONE);
@ -1727,13 +1763,12 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
break;
}
case nir_op_b8csel:
case nir_op_b16csel:
case nir_op_b32csel:
if (sz == 8)
case nir_op_bcsel:
if (src1_sz == 8)
bi_mux_v4i8_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO);
else
bi_csel_to(b, nir_type_int, sz, dst, s0, bi_zero(), s1, s2, BI_CMPF_NE);
bi_csel_to(b, nir_type_int, src1_sz,
dst, s0, bi_zero(), s1, s2, BI_CMPF_NE);
break;
case nir_op_ishl:
@ -1747,42 +1782,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
bi_arshift_to(b, sz, dst, s0, bi_null(), bi_byte(s1, 0));
break;
case nir_op_flt32:
case nir_op_fge32:
case nir_op_feq32:
case nir_op_fneu32:
bi_fcmp_to(b, sz, dst, s0, s1, bi_cmpf_nir(instr->op),
BI_RESULT_TYPE_M1);
break;
case nir_op_ieq32:
case nir_op_ine32:
if (sz == 32) {
bi_icmp_i32_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
BI_RESULT_TYPE_M1);
} else if (sz == 16) {
bi_icmp_v2i16_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
BI_RESULT_TYPE_M1);
} else {
bi_icmp_v4i8_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
BI_RESULT_TYPE_M1);
}
break;
case nir_op_ilt32:
case nir_op_ige32:
if (sz == 32) {
bi_icmp_s32_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
BI_RESULT_TYPE_M1);
} else if (sz == 16) {
bi_icmp_v2s16_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
BI_RESULT_TYPE_M1);
} else {
bi_icmp_v4s8_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
BI_RESULT_TYPE_M1);
}
break;
case nir_op_imin:
case nir_op_umin:
bi_csel_to(b, nir_op_infos[instr->op].input_types[0], sz, dst,
@ -1795,20 +1794,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
s0, s1, s0, s1, BI_CMPF_GT);
break;
case nir_op_ult32:
case nir_op_uge32:
if (sz == 32) {
bi_icmp_u32_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
BI_RESULT_TYPE_M1);
} else if (sz == 16) {
bi_icmp_v2u16_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
BI_RESULT_TYPE_M1);
} else {
bi_icmp_v4u8_to(b, dst, s0, s1, bi_cmpf_nir(instr->op),
BI_RESULT_TYPE_M1);
}
break;
case nir_op_fddx:
case nir_op_fddy: {
bi_index lane1 = bi_lshift_and_i32(b,
@ -1946,6 +1931,11 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
BI_CMPF_NE);
break;
case nir_op_b2b32:
bi_csel_to(b, nir_type_int, sz, dst, s0, bi_zero(),
bi_imm_u32(~0), bi_zero(), BI_CMPF_NE);
break;
case nir_op_b2i8:
case nir_op_b2i16:
case nir_op_b2i32:
@ -2965,7 +2955,6 @@ bi_optimize_nir(nir_shader *nir, bool is_blend)
NIR_PASS(progress, nir, nir_opt_cse);
}
NIR_PASS(progress, nir, nir_lower_bool_to_int32);
NIR_PASS(progress, nir, bifrost_nir_lower_algebraic_late);
NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);

View file

@ -38,9 +38,6 @@ SPECIAL = ['fexp2', 'flog2', 'fsin', 'fcos']
for op in SPECIAL:
algebraic_late += [((op + '@16', a), ('f2f16', (op, ('f2f32', a))))]
algebraic_late += [(('f2b32', a), ('fneu32', a, 0.0)),
(('i2b32', a), ('ine32', a, 0))]
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-p', '--import-path', required=True)