mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-12 15:30:33 +01:00
ir3: fold and/or and negations into branches
Fold and/or into braa/brao when profitable. Only do this when the and/or is not used for any non-branch instructions as this would increase total instruction count. Add an algebraic nir pass that performs the inverse DeMorgan's laws to try to bring and/or in front of branches. Again, only do this when the original inot in only used for branches. This should always decrease instruction count since the extra inots can be folded into the branch. Fold inot into branches by using the inv1/inv2 cat0 fields. Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27411>
This commit is contained in:
parent
96c2fe3e1a
commit
9de628b65c
9 changed files with 153 additions and 6 deletions
|
|
@ -158,6 +158,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
|
|||
|
||||
compiler->num_predicates = 1;
|
||||
compiler->bitops_can_write_predicates = false;
|
||||
compiler->has_branch_and_or = false;
|
||||
|
||||
if (compiler->gen >= 6) {
|
||||
compiler->samgq_workaround = true;
|
||||
|
|
@ -217,6 +218,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
|
|||
compiler->load_inline_uniforms_via_preamble_ldgk = dev_info->a7xx.load_inline_uniforms_via_preamble_ldgk;
|
||||
compiler->num_predicates = 4;
|
||||
compiler->bitops_can_write_predicates = true;
|
||||
compiler->has_branch_and_or = true;
|
||||
} else {
|
||||
compiler->max_const_pipeline = 512;
|
||||
compiler->max_const_geom = 512;
|
||||
|
|
|
|||
|
|
@ -220,6 +220,9 @@ struct ir3_compiler {
|
|||
/* True if bitops (and.b, or.b, xor.b, not.b) can write to p0.c */
|
||||
bool bitops_can_write_predicates;
|
||||
|
||||
/* True if braa/brao are available. */
|
||||
bool has_branch_and_or;
|
||||
|
||||
/* MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB */
|
||||
uint32_t max_variable_workgroup_size;
|
||||
|
||||
|
|
|
|||
|
|
@ -3797,6 +3797,91 @@ emit_block(struct ir3_context *ctx, nir_block *nblock)
|
|||
|
||||
static void emit_cf_list(struct ir3_context *ctx, struct exec_list *list);
|
||||
|
||||
/* Get the ir3 branch condition for a given nir source. This will strip any inot
|
||||
* instructions and set *inv when the condition should be inverted. This
|
||||
* inversion can be directly folded into branches (in the inv1/inv2 fields)
|
||||
* instead of adding an explicit not.b/sub.u instruction.
|
||||
*/
|
||||
static struct ir3_instruction *
|
||||
get_branch_condition(struct ir3_context *ctx, nir_src *src, bool *inv)
|
||||
{
|
||||
struct ir3_instruction *condition = ir3_get_src(ctx, src)[0];
|
||||
|
||||
if (src->ssa->parent_instr->type == nir_instr_type_alu) {
|
||||
nir_alu_instr *nir_cond = nir_instr_as_alu(src->ssa->parent_instr);
|
||||
|
||||
if (nir_cond->op == nir_op_inot) {
|
||||
struct ir3_instruction *inv_cond =
|
||||
get_branch_condition(ctx, &nir_cond->src[0].src, inv);
|
||||
*inv = !*inv;
|
||||
return inv_cond;
|
||||
}
|
||||
}
|
||||
|
||||
*inv = false;
|
||||
return ir3_get_predicate(ctx, condition);
|
||||
}
|
||||
|
||||
/* Try to fold br (and/or cond1, cond2) into braa/brao cond1, cond2.
|
||||
*/
|
||||
static struct ir3_instruction *
|
||||
fold_conditional_branch(struct ir3_context *ctx, struct nir_src *nir_cond)
|
||||
{
|
||||
if (!ctx->compiler->has_branch_and_or)
|
||||
return false;
|
||||
|
||||
if (nir_cond->ssa->parent_instr->type != nir_instr_type_alu)
|
||||
return NULL;
|
||||
|
||||
nir_alu_instr *alu_cond = nir_instr_as_alu(nir_cond->ssa->parent_instr);
|
||||
|
||||
if ((alu_cond->op != nir_op_iand) && (alu_cond->op != nir_op_ior))
|
||||
return NULL;
|
||||
|
||||
/* If the result of the and/or is also used for something else than an if
|
||||
* condition, the and/or cannot be removed. In that case, we will end-up with
|
||||
* extra predicate conversions for the conditions without actually removing
|
||||
* any instructions, resulting in an increase of instructions. Let's not fold
|
||||
* the conditions in the branch in that case.
|
||||
*/
|
||||
if (!nir_def_only_used_by_if(&alu_cond->def))
|
||||
return NULL;
|
||||
|
||||
bool inv1, inv2;
|
||||
struct ir3_instruction *cond1 =
|
||||
get_branch_condition(ctx, &alu_cond->src[0].src, &inv1);
|
||||
struct ir3_instruction *cond2 =
|
||||
get_branch_condition(ctx, &alu_cond->src[1].src, &inv2);
|
||||
|
||||
struct ir3_instruction *branch;
|
||||
if (alu_cond->op == nir_op_iand) {
|
||||
branch = ir3_BRAA(ctx->block, cond1, IR3_REG_PREDICATE, cond2,
|
||||
IR3_REG_PREDICATE);
|
||||
} else {
|
||||
branch = ir3_BRAO(ctx->block, cond1, IR3_REG_PREDICATE, cond2,
|
||||
IR3_REG_PREDICATE);
|
||||
}
|
||||
|
||||
branch->cat0.inv1 = inv1;
|
||||
branch->cat0.inv2 = inv2;
|
||||
return branch;
|
||||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
emit_conditional_branch(struct ir3_context *ctx, struct nir_src *nir_cond)
|
||||
{
|
||||
struct ir3_instruction *folded = fold_conditional_branch(ctx, nir_cond);
|
||||
if (folded)
|
||||
return folded;
|
||||
|
||||
bool inv1;
|
||||
struct ir3_instruction *cond1 = get_branch_condition(ctx, nir_cond, &inv1);
|
||||
struct ir3_instruction *branch =
|
||||
ir3_BR(ctx->block, cond1, IR3_REG_PREDICATE);
|
||||
branch->cat0.inv1 = inv1;
|
||||
return branch;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_if(struct ir3_context *ctx, nir_if *nif)
|
||||
{
|
||||
|
|
@ -3820,8 +3905,7 @@ emit_if(struct ir3_context *ctx, nir_if *nif)
|
|||
*/
|
||||
ir3_SHPS(ctx->block);
|
||||
} else {
|
||||
struct ir3_instruction *pred = ir3_get_predicate(ctx, condition);
|
||||
ir3_BR(ctx->block, pred, IR3_REG_PREDICATE);
|
||||
emit_conditional_branch(ctx, &nif->condition);
|
||||
}
|
||||
|
||||
emit_cf_list(ctx, &nif->then_list);
|
||||
|
|
|
|||
|
|
@ -110,6 +110,10 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
|
|||
NIR_PASS(progress, ctx->s, nir_opt_dce);
|
||||
}
|
||||
|
||||
/* This must run after the last nir_opt_algebraic or it gets undone. */
|
||||
if (compiler->has_branch_and_or)
|
||||
NIR_PASS_V(ctx->s, ir3_nir_opt_branch_and_or_not);
|
||||
|
||||
/* Enable the texture pre-fetch feature only a4xx onwards. But
|
||||
* only enable it on generations that have been tested:
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -103,7 +103,8 @@ count_instruction(struct ir3_instruction *n)
|
|||
* earlier so we don't have this constraint.
|
||||
*/
|
||||
return is_alu(n) ||
|
||||
(is_flow(n) && (n->opc != OPC_JUMP) && (n->opc != OPC_BR));
|
||||
(is_flow(n) && (n->opc != OPC_JUMP) && (n->opc != OPC_BR) &&
|
||||
(n->opc != OPC_BRAA) && (n->opc != OPC_BRAO));
|
||||
}
|
||||
|
||||
/* Post-RA, we don't have arrays any more, so we have to be a bit careful here
|
||||
|
|
|
|||
|
|
@ -714,6 +714,12 @@ invert_branch(struct ir3_instruction *branch)
|
|||
case OPC_BANY:
|
||||
branch->opc = OPC_BALL;
|
||||
break;
|
||||
case OPC_BRAA:
|
||||
branch->opc = OPC_BRAO;
|
||||
break;
|
||||
case OPC_BRAO:
|
||||
branch->opc = OPC_BRAA;
|
||||
break;
|
||||
default:
|
||||
unreachable("can't get here");
|
||||
}
|
||||
|
|
@ -753,8 +759,6 @@ block_sched(struct ir3 *ir)
|
|||
br2 = ir3_JUMP(block);
|
||||
br2->cat0.target = block->successors[0];
|
||||
} else {
|
||||
assert(terminator->srcs_count == 1);
|
||||
|
||||
/* create "else" branch first (since "then" block should
|
||||
* frequently/always end up being a fall-thru):
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -69,6 +69,7 @@ bool ir3_nir_lower_64b_undef(nir_shader *shader);
|
|||
bool ir3_nir_lower_64b_global(nir_shader *shader);
|
||||
bool ir3_nir_lower_64b_regs(nir_shader *shader);
|
||||
|
||||
bool ir3_nir_opt_branch_and_or_not(nir_shader *nir);
|
||||
void ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s);
|
||||
void ir3_nir_lower_io_to_temporaries(nir_shader *s);
|
||||
void ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s);
|
||||
|
|
|
|||
37
src/freedreno/ir3/ir3_nir_branch_and_or_not.py
Normal file
37
src/freedreno/ir3/ir3_nir_branch_and_or_not.py
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
# Copyright (C) 2024 Igalia S.L.
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
# Inverse DeMorgan's laws to facilitate folding iand/ior into braa/brao. Only
|
||||
# apply if the inot is only used by branch conditions. Otherwise, it would just
|
||||
# end-up generating more instructions.
|
||||
cond_lowering = [
|
||||
(('inot(is_only_used_by_if)', ('iand', 'a', 'b')),
|
||||
('ior', ('inot', 'a'), ('inot', 'b'))),
|
||||
(('inot(is_only_used_by_if)', ('ior', 'a', 'b')),
|
||||
('iand', ('inot', 'a'), ('inot', 'b'))),
|
||||
]
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-p', '--import-path', required=True)
|
||||
args = parser.parse_args()
|
||||
sys.path.insert(0, args.import_path)
|
||||
run()
|
||||
|
||||
|
||||
def run():
|
||||
import nir_algebraic # pylint: disable=import-error
|
||||
|
||||
print('#include "ir3_nir.h"')
|
||||
print(nir_algebraic.AlgebraicPass("ir3_nir_opt_branch_and_or_not",
|
||||
cond_lowering).render())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
@ -40,6 +40,17 @@ ir3_nir_imul_c = custom_target(
|
|||
depend_files : nir_algebraic_depends,
|
||||
)
|
||||
|
||||
ir3_nir_branch_and_or_not_c = custom_target(
|
||||
'ir3_nir_branch_and_or_not.c',
|
||||
input : 'ir3_nir_branch_and_or_not.py',
|
||||
output : 'ir3_nir_branch_and_or_not.c',
|
||||
command : [
|
||||
prog_python, '@INPUT@', '-p', dir_compiler_nir,
|
||||
],
|
||||
capture : true,
|
||||
depend_files : nir_algebraic_depends,
|
||||
)
|
||||
|
||||
ir3_parser = custom_target(
|
||||
'ir3_parser.[ch]',
|
||||
input: 'ir3_parser.y',
|
||||
|
|
@ -123,7 +134,7 @@ libfreedreno_ir3_files = files(
|
|||
|
||||
libfreedreno_ir3 = static_library(
|
||||
'freedreno_ir3',
|
||||
[libfreedreno_ir3_files, ir3_nir_trig_c, ir3_nir_imul_c, ir3_parser[0], ir3_parser[1], ir3_lexer],
|
||||
[libfreedreno_ir3_files, ir3_nir_trig_c, ir3_nir_imul_c, ir3_nir_branch_and_or_not_c, ir3_parser[0], ir3_parser[1], ir3_lexer],
|
||||
include_directories : [inc_freedreno, inc_include, inc_src],
|
||||
c_args : [no_override_init_args],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue