diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index f7c0e25406f..a07437ffa6d 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -4180,6 +4180,10 @@ typedef struct nir_shader_compiler_options { /* Backend supports fused comapre against zero and csel */ bool has_fused_comp_and_csel; + /* Backend supports fused int eq/ne against zero and csel. */ + bool has_icsel_eqz64; + bool has_icsel_eqz32; + bool has_icsel_eqz16; /* Backend supports fneo, fequ, fltu, fgeu. */ bool has_fneo_fcmpu; diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 05786b7397b..e315022c49b 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -1064,6 +1064,8 @@ opcode("b32csel", 0, tuint, [0, 0, 0], [tbool32, tuint, tuint], False, selection, "src0 ? src1 : src2", description = csel_description.format("a 32-bit", "0 vs ~0")) +triop("icsel_eqz", tint, selection, "(src0 == 0) ? src1 : src2") + triop("i32csel_gt", tint32, selection, "(src0 > 0) ? src1 : src2") triop("i32csel_ge", tint32, selection, "(src0 >= 0) ? src1 : src2") diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index e132bc5b210..e8172690d41 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -3677,6 +3677,12 @@ late_optimizations += [ (('bcsel', ('fge', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_ge', ('fneg', a), b, c), "options->has_fused_comp_and_csel"), ] +for s in [16, 32, 64]: + late_optimizations.extend([ + (('bcsel@{}'.format(s), ('ieq', 0, 'a@{}'.format(s)), 'b@{}'.format(s), 'c@{}'.format(s)), ('icsel_eqz', a, b, c), "options->has_icsel_eqz{} && !options->no_integers".format(s)), + (('bcsel@{}'.format(s), ('ine', 0, 'a@{}'.format(s)), 'b@{}'.format(s), 'c@{}'.format(s)), ('icsel_eqz', a, c, b), "options->has_icsel_eqz{} && !options->no_integers".format(s)), + ]) + distribute_src_mods = [ # Try to remove some spurious negations rather than pushing them down. (('fmul', ('fneg', a), ('fneg', b)), ('fmul', a, b)), diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index b45b3083e58..7566b9a057b 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -98,6 +98,8 @@ static const nir_shader_compiler_options ir3_base_options = { .lower_pack_split = true, .lower_to_scalar = true, .has_imul24 = true, + .has_icsel_eqz32 = true, + .has_icsel_eqz16 = true, .has_fsub = true, .has_isub = true, .force_indirect_unrolling_sampler = true, diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 4e5578c6edc..6498ae48af0 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -922,6 +922,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) set_cat2_condition(dst.rpts, dst_sz, IR3_COND_GE); break; + case nir_op_icsel_eqz: case nir_op_bcsel: { struct ir3_instruction_rpt conds; @@ -965,12 +966,19 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) conds.rpts[rpt] = cond; } + if (alu->op == nir_op_icsel_eqz) { + struct ir3_instruction_rpt tmp = src[1]; + src[1] = src[2]; + src[2] = tmp; + } + if (is_half(src[1].rpts[0])) dst = ir3_SEL_B16_rpt(b, dst_sz, src[1], 0, conds, 0, src[2], 0); else dst = ir3_SEL_B32_rpt(b, dst_sz, src[1], 0, conds, 0, src[2], 0); break; } + case nir_op_bit_count: { if (ctx->compiler->gen < 5 || (src[0].rpts[0]->dsts[0]->flags & IR3_REG_HALF)) {