diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index d62752f5901..7e03a0a64cd 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -158,6 +158,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, compiler->num_predicates = 1; compiler->bitops_can_write_predicates = false; + compiler->has_branch_and_or = false; if (compiler->gen >= 6) { compiler->samgq_workaround = true; @@ -217,6 +218,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, compiler->load_inline_uniforms_via_preamble_ldgk = dev_info->a7xx.load_inline_uniforms_via_preamble_ldgk; compiler->num_predicates = 4; compiler->bitops_can_write_predicates = true; + compiler->has_branch_and_or = true; } else { compiler->max_const_pipeline = 512; compiler->max_const_geom = 512; diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 271b01fa12b..c48ceda1a8c 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -220,6 +220,9 @@ struct ir3_compiler { /* True if bitops (and.b, or.b, xor.b, not.b) can write to p0.c */ bool bitops_can_write_predicates; + /* True if braa/brao are available. */ + bool has_branch_and_or; + /* MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB */ uint32_t max_variable_workgroup_size; diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index b1ed40355a7..9cb73cbc00c 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -3797,6 +3797,91 @@ emit_block(struct ir3_context *ctx, nir_block *nblock) static void emit_cf_list(struct ir3_context *ctx, struct exec_list *list); +/* Get the ir3 branch condition for a given nir source. This will strip any inot + * instructions and set *inv when the condition should be inverted. This + * inversion can be directly folded into branches (in the inv1/inv2 fields) + * instead of adding an explicit not.b/sub.u instruction. + */ +static struct ir3_instruction * +get_branch_condition(struct ir3_context *ctx, nir_src *src, bool *inv) +{ + struct ir3_instruction *condition = ir3_get_src(ctx, src)[0]; + + if (src->ssa->parent_instr->type == nir_instr_type_alu) { + nir_alu_instr *nir_cond = nir_instr_as_alu(src->ssa->parent_instr); + + if (nir_cond->op == nir_op_inot) { + struct ir3_instruction *inv_cond = + get_branch_condition(ctx, &nir_cond->src[0].src, inv); + *inv = !*inv; + return inv_cond; + } + } + + *inv = false; + return ir3_get_predicate(ctx, condition); +} + +/* Try to fold br (and/or cond1, cond2) into braa/brao cond1, cond2. + */ +static struct ir3_instruction * +fold_conditional_branch(struct ir3_context *ctx, struct nir_src *nir_cond) +{ + if (!ctx->compiler->has_branch_and_or) + return false; + + if (nir_cond->ssa->parent_instr->type != nir_instr_type_alu) + return NULL; + + nir_alu_instr *alu_cond = nir_instr_as_alu(nir_cond->ssa->parent_instr); + + if ((alu_cond->op != nir_op_iand) && (alu_cond->op != nir_op_ior)) + return NULL; + + /* If the result of the and/or is also used for something else than an if + * condition, the and/or cannot be removed. In that case, we will end-up with + * extra predicate conversions for the conditions without actually removing + * any instructions, resulting in an increase of instructions. Let's not fold + * the conditions in the branch in that case. + */ + if (!nir_def_only_used_by_if(&alu_cond->def)) + return NULL; + + bool inv1, inv2; + struct ir3_instruction *cond1 = + get_branch_condition(ctx, &alu_cond->src[0].src, &inv1); + struct ir3_instruction *cond2 = + get_branch_condition(ctx, &alu_cond->src[1].src, &inv2); + + struct ir3_instruction *branch; + if (alu_cond->op == nir_op_iand) { + branch = ir3_BRAA(ctx->block, cond1, IR3_REG_PREDICATE, cond2, + IR3_REG_PREDICATE); + } else { + branch = ir3_BRAO(ctx->block, cond1, IR3_REG_PREDICATE, cond2, + IR3_REG_PREDICATE); + } + + branch->cat0.inv1 = inv1; + branch->cat0.inv2 = inv2; + return branch; +} + +static struct ir3_instruction * +emit_conditional_branch(struct ir3_context *ctx, struct nir_src *nir_cond) +{ + struct ir3_instruction *folded = fold_conditional_branch(ctx, nir_cond); + if (folded) + return folded; + + bool inv1; + struct ir3_instruction *cond1 = get_branch_condition(ctx, nir_cond, &inv1); + struct ir3_instruction *branch = + ir3_BR(ctx->block, cond1, IR3_REG_PREDICATE); + branch->cat0.inv1 = inv1; + return branch; +} + static void emit_if(struct ir3_context *ctx, nir_if *nif) { @@ -3820,8 +3905,7 @@ emit_if(struct ir3_context *ctx, nir_if *nif) */ ir3_SHPS(ctx->block); } else { - struct ir3_instruction *pred = ir3_get_predicate(ctx, condition); - ir3_BR(ctx->block, pred, IR3_REG_PREDICATE); + emit_conditional_branch(ctx, &nif->condition); } emit_cf_list(ctx, &nif->then_list); diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index fb9bd1d03d5..b4845b6da2e 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -110,6 +110,10 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader, NIR_PASS(progress, ctx->s, nir_opt_dce); } + /* This must run after the last nir_opt_algebraic or it gets undone. */ + if (compiler->has_branch_and_or) + NIR_PASS_V(ctx->s, ir3_nir_opt_branch_and_or_not); + /* Enable the texture pre-fetch feature only a4xx onwards. But * only enable it on generations that have been tested: */ diff --git a/src/freedreno/ir3/ir3_delay.c b/src/freedreno/ir3/ir3_delay.c index a6a8ff70b69..db5b5871c48 100644 --- a/src/freedreno/ir3/ir3_delay.c +++ b/src/freedreno/ir3/ir3_delay.c @@ -103,7 +103,8 @@ count_instruction(struct ir3_instruction *n) * earlier so we don't have this constraint. */ return is_alu(n) || - (is_flow(n) && (n->opc != OPC_JUMP) && (n->opc != OPC_BR)); + (is_flow(n) && (n->opc != OPC_JUMP) && (n->opc != OPC_BR) && + (n->opc != OPC_BRAA) && (n->opc != OPC_BRAO)); } /* Post-RA, we don't have arrays any more, so we have to be a bit careful here diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 69a59b67fe6..f843d78baef 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -714,6 +714,12 @@ invert_branch(struct ir3_instruction *branch) case OPC_BANY: branch->opc = OPC_BALL; break; + case OPC_BRAA: + branch->opc = OPC_BRAO; + break; + case OPC_BRAO: + branch->opc = OPC_BRAA; + break; default: unreachable("can't get here"); } @@ -753,8 +759,6 @@ block_sched(struct ir3 *ir) br2 = ir3_JUMP(block); br2->cat0.target = block->successors[0]; } else { - assert(terminator->srcs_count == 1); - /* create "else" branch first (since "then" block should * frequently/always end up being a fall-thru): */ diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index 5320340ea2c..a17e2883c96 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -69,6 +69,7 @@ bool ir3_nir_lower_64b_undef(nir_shader *shader); bool ir3_nir_lower_64b_global(nir_shader *shader); bool ir3_nir_lower_64b_regs(nir_shader *shader); +bool ir3_nir_opt_branch_and_or_not(nir_shader *nir); void ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s); void ir3_nir_lower_io_to_temporaries(nir_shader *s); void ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s); diff --git a/src/freedreno/ir3/ir3_nir_branch_and_or_not.py b/src/freedreno/ir3/ir3_nir_branch_and_or_not.py new file mode 100644 index 00000000000..02e39548c8a --- /dev/null +++ b/src/freedreno/ir3/ir3_nir_branch_and_or_not.py @@ -0,0 +1,37 @@ +# Copyright (C) 2024 Igalia S.L. +# SPDX-License-Identifier: MIT + + +import argparse +import sys + + +# Inverse DeMorgan's laws to facilitate folding iand/ior into braa/brao. Only +# apply if the inot is only used by branch conditions. Otherwise, it would just +# end-up generating more instructions. +cond_lowering = [ + (('inot(is_only_used_by_if)', ('iand', 'a', 'b')), + ('ior', ('inot', 'a'), ('inot', 'b'))), + (('inot(is_only_used_by_if)', ('ior', 'a', 'b')), + ('iand', ('inot', 'a'), ('inot', 'b'))), +] + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-p', '--import-path', required=True) + args = parser.parse_args() + sys.path.insert(0, args.import_path) + run() + + +def run(): + import nir_algebraic # pylint: disable=import-error + + print('#include "ir3_nir.h"') + print(nir_algebraic.AlgebraicPass("ir3_nir_opt_branch_and_or_not", + cond_lowering).render()) + + +if __name__ == '__main__': + main() diff --git a/src/freedreno/ir3/meson.build b/src/freedreno/ir3/meson.build index e400bd179f3..aacfd1e2fdf 100644 --- a/src/freedreno/ir3/meson.build +++ b/src/freedreno/ir3/meson.build @@ -40,6 +40,17 @@ ir3_nir_imul_c = custom_target( depend_files : nir_algebraic_depends, ) +ir3_nir_branch_and_or_not_c = custom_target( + 'ir3_nir_branch_and_or_not.c', + input : 'ir3_nir_branch_and_or_not.py', + output : 'ir3_nir_branch_and_or_not.c', + command : [ + prog_python, '@INPUT@', '-p', dir_compiler_nir, + ], + capture : true, + depend_files : nir_algebraic_depends, +) + ir3_parser = custom_target( 'ir3_parser.[ch]', input: 'ir3_parser.y', @@ -123,7 +134,7 @@ libfreedreno_ir3_files = files( libfreedreno_ir3 = static_library( 'freedreno_ir3', - [libfreedreno_ir3_files, ir3_nir_trig_c, ir3_nir_imul_c, ir3_parser[0], ir3_parser[1], ir3_lexer], + [libfreedreno_ir3_files, ir3_nir_trig_c, ir3_nir_imul_c, ir3_nir_branch_and_or_not_c, ir3_parser[0], ir3_parser[1], ir3_lexer], include_directories : [inc_freedreno, inc_include, inc_src], c_args : [no_override_init_args], gnu_symbol_visibility : 'hidden',