diff --git a/src/intel/compiler/brw/brw_opt.cpp b/src/intel/compiler/brw/brw_opt.cpp index 9ce3a6a8cf0..59e98d88eba 100644 --- a/src/intel/compiler/brw/brw_opt.cpp +++ b/src/intel/compiler/brw/brw_opt.cpp @@ -98,6 +98,16 @@ brw_optimize(brw_shader &s) OPT(brw_opt_dead_code_eliminate); } + while (OPT(brw_opt_predicate_logic)) { + /* The dead code elimination after opt_predicate_logic can cause the + * first comparison in the set to have a NULL destination. That can make + * it a candidate for additional cmod_propagation and addition + * opt_predicate_logic. + */ + if (OPT(brw_opt_dead_code_eliminate) && OPT(brw_opt_cmod_propagation)) + OPT(brw_opt_dead_code_eliminate); + } + if (OPT(brw_lower_pack)) { OPT(brw_opt_register_coalesce); OPT(brw_opt_dead_code_eliminate); diff --git a/src/intel/compiler/brw/brw_opt_predicate_logic.cpp b/src/intel/compiler/brw/brw_opt_predicate_logic.cpp new file mode 100644 index 00000000000..e17f7a22f56 --- /dev/null +++ b/src/intel/compiler/brw/brw_opt_predicate_logic.cpp @@ -0,0 +1,495 @@ +/* + * Copyright © 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "brw_shader.h" +#include "brw_analysis.h" +#include "brw_cfg.h" + +struct logic_source { + logic_source() : inst(NULL), distance(0), src(0) + { + /* empty */ + } + + logic_source(brw_inst *inst, unsigned distance, unsigned src) : + inst(inst), distance(distance), src(src) + { + /* empty */ + } + + brw_inst *inst; + unsigned distance; + unsigned src; +}; + +static bool +is_used_once(brw_inst *inst, const intel_device_info *devinfo, + const brw_live_variables &live_vars, + const brw_def_analysis &defs) +{ + unsigned use_count = defs.get_use_count(inst->dst); + if (use_count != 0) + return use_count == 1; + + /* If there are any uses outside the block, fail. */ + if (BITSET_TEST(live_vars.block_data[inst->block->num].liveout, + live_vars.var_from_reg(inst->dst))) + return false; + + foreach_inst_in_block_starting_from(brw_inst, scan_inst, inst) { + for (unsigned i = 0; i < scan_inst->sources; i++) { + if (regions_overlap(inst->dst, inst->size_written, + scan_inst->src[i], scan_inst->size_read(devinfo, i))) { + use_count++; + } + } + + if (use_count > 1) + return false; + } + + assert(use_count == 1); + return true; +} + +static bool +is_Boolean(brw_inst *inst, const brw_def_analysis &defs) +{ + if (inst == NULL) + return false; + + switch (inst->opcode) { + case BRW_OPCODE_CMP: + case BRW_OPCODE_CMPN: + return true; + + case BRW_OPCODE_AND: + case BRW_OPCODE_NOT: + case BRW_OPCODE_OR: + case BRW_OPCODE_SEL: + case BRW_OPCODE_XOR: { + for (unsigned i = 0; i < inst->sources; i++) { + brw_inst *def = defs.get(inst->src[i]); + + if (def == NULL) + return false; + + if (def->opcode != BRW_OPCODE_CMP && + def->opcode != BRW_OPCODE_CMPN) + return false; + } + + return true; + } + + default: + return false; + } +} + +/** + * Calculate the flags read between two instructions. + * + * Flags read by \c begin or \c end are \b not included in the return value. + */ +static unsigned +flags_read_between(brw_inst *begin, brw_inst *end, + const intel_device_info *devinfo) +{ + unsigned flags_read = 0; + + foreach_inst_in_block_starting_from(brw_inst, inst, begin) { + if (inst == end) + return flags_read; + + flags_read |= inst->flags_read(devinfo); + } + + if (end == NULL) + return flags_read; + + UNREACHABLE("end does not occur after begin in the same block."); +} + +/** + * Calculate the flags written between two instructions. + * + * Flags written by \c begin or \c end are \b not included in the return value. + */ +static unsigned +flags_written_between(brw_inst *begin, brw_inst *end, + const intel_device_info *devinfo) +{ + unsigned flags_written = 0; + + foreach_inst_in_block_starting_from(brw_inst, inst, begin) { + if (inst == end) + return flags_written; + + flags_written |= inst->flags_written(devinfo); + } + + if (end == NULL) + return flags_written; + + UNREACHABLE("end does not occur after begin in the same block."); +} + +static bool +is_valid_logic_source(const brw_inst *inst) +{ + if (inst->opcode == BRW_OPCODE_CMP || + inst->opcode == BRW_OPCODE_CMPN) + return true; + + /* The flags will be used as a proxy for the value produced by the + * instruction. At the end, the instruction must have a + * conditional modifier of NZ (G for BFN). + */ + if (inst->opcode == BRW_OPCODE_BFN) { + if (inst->conditional_mod == BRW_CONDITIONAL_NONE || + inst->conditional_mod == BRW_CONDITIONAL_G) { + return true; + } + } else { + if ((inst->conditional_mod == BRW_CONDITIONAL_NONE && + inst->can_do_cmod(BRW_CONDITIONAL_NZ)) || + inst->conditional_mod == BRW_CONDITIONAL_NZ) { + return true; + } + } + + return false; +} + +static logic_source +find_logic_source(brw_inst *inst, unsigned src, const brw_def_analysis &defs, + const intel_device_info *devinfo) +{ + unsigned distance = 0; + unsigned flags_read = 0; + unsigned flags_written = 0; + + foreach_inst_in_block_reverse_starting_from(brw_inst, scan_inst, inst) { + distance++; + + if (regions_overlap(scan_inst->dst, scan_inst->size_written, + inst->src[src], inst->size_read(devinfo, src))) { + if (scan_inst->is_partial_write() || + scan_inst->dst.offset != inst->src[0].offset || + scan_inst->exec_size != inst->exec_size || + !is_valid_logic_source(scan_inst)) { + return logic_source(); + } + + return logic_source(scan_inst, distance, src); + } + + flags_read |= scan_inst->flags_read(devinfo); + flags_written |= scan_inst->flags_written(devinfo); + } + + brw_inst *def = defs.get(inst->src[src]); + if (def != NULL) { + assert(def->block != inst->block); + + if (def->is_partial_write() || + def->dst.offset != inst->src[0].offset || + def->exec_size != inst->exec_size || + !is_valid_logic_source(def)) { + def = NULL; + } + } + + return logic_source(def, UINT_MAX, src); +} + +static bool +try_predicated_cmp(brw_shader &s, const brw_live_variables &live_vars, + const brw_def_analysis &defs, + brw_inst *logic_inst, logic_source &hither, + logic_source &yon, unsigned hither_flags) +{ + /* For this path, the farther instruction must also be in the same block + * as the logic operation. + */ + if (yon.inst == NULL || yon.distance == UINT_MAX) + return false; + + /* If yon doesn't write any flags yet, determine what flags it would + * write. + */ + unsigned yon_flags = yon.inst->flags_written(s.devinfo); + if (yon_flags == 0) { + assert(yon.inst->conditional_mod == BRW_CONDITIONAL_NONE); + assert(yon.inst->flag_subreg == 0); + + yon.inst->conditional_mod = yon.inst->opcode != BRW_OPCODE_BFN ? + BRW_CONDITIONAL_NZ : BRW_CONDITIONAL_G; + yon.inst->flag_subreg = logic_inst->flag_subreg; + + yon_flags = yon.inst->flags_written(s.devinfo); + + yon.inst->conditional_mod = BRW_CONDITIONAL_NONE; + yon.inst->flag_subreg = 0; + } + + /* If yon does not already write flags, there must be no readers of the flags + * that it will write. + * + * A similar test for hither is not necessary. It is already required + * that there be no uses of the flags produced by hither. + */ + if (yon.inst->conditional_mod == BRW_CONDITIONAL_NONE && + (flags_read_between(yon.inst, hither.inst, s.devinfo) & + yon_flags) != 0) { + return false; + } + + /* The flags written by yon must reach hither. */ + if ((flags_written_between(yon.inst, hither.inst, s.devinfo) & + yon_flags) != 0) + return false; + + /* The flags and the destination written by hither must not be read by + * any instruction other than the logic operation. + */ + if (!is_used_once(hither.inst, s.devinfo, live_vars, defs)) + return false; + + if ((hither_flags & flags_read_between(hither.inst, logic_inst, s.devinfo)) != 0) + return false; + + const unsigned flags_read_after_inst = + flags_read_between(logic_inst, NULL, s.devinfo) | + live_vars.block_data[logic_inst->block->num].flag_liveout[0]; + + if (flags_read_after_inst & (hither_flags & + ~logic_inst->flags_written(s.devinfo))) + return false; + + /* It is safe to eliminate the logic operation. Perform the following + * steps: + * + * 1. If yon doesn't already write flags, set a conditional modifier on + * it, and set its flag_subreg. + * + * 2. If hither doesn't already write flags, set a conditional modifier on + * it, and set its flag_subreg. + * + * 3. Make hither's destination be the null register. + * + * 4. Make hither be predicated. + * + * 5. Remove the logic operation. + */ + if (yon.inst->conditional_mod == BRW_CONDITIONAL_NONE) { + yon.inst->conditional_mod = yon.inst->opcode != BRW_OPCODE_BFN ? + BRW_CONDITIONAL_NZ : BRW_CONDITIONAL_G; + yon.inst->flag_subreg = logic_inst->flag_subreg; + + assert(yon_flags == yon.inst->flags_written(s.devinfo)); + } + + if (hither.inst->conditional_mod == BRW_CONDITIONAL_NONE) { + hither.inst->conditional_mod = hither.inst->opcode != BRW_OPCODE_BFN ? + BRW_CONDITIONAL_NZ : BRW_CONDITIONAL_G; + hither.inst->flag_subreg = logic_inst->flag_subreg; + + assert(hither_flags == hither.inst->flags_written(s.devinfo)); + } + + hither.inst->dst = retype(brw_null_reg(), hither.inst->dst.type); + + set_predicate_inv(BRW_PREDICATE_NORMAL, + logic_inst->opcode == BRW_OPCODE_OR, + hither.inst); + + assert((hither.inst->flags_read(s.devinfo) & + ~yon.inst->flags_written(s.devinfo)) == 0); + + logic_inst->remove(); + return true; +} + +static bool +try_predicated_mov(brw_shader &s, const brw_live_variables &live_vars, + const brw_def_analysis &defs, + brw_inst *logic_inst, logic_source &hither, + logic_source &yon, unsigned hither_flags) +{ + /* Cases like + * + * cmp.g.f0.0(8) v946:F, |v945|:F, 0f + * and.nz.f0.0(8) null:UD, -v869:UD, v946:UD + * + * can be handled by replacing the AND instruction with a predicated NOT + * instead of a predicated MOV. + * + * NOTE: ~x != 0 is not the same as x == 0 when x is not known to be a + * Boolean value. Since yon may not be a CMP/CMPN, this is important. + * + * However, cases where the other source is negated would require more + * complicated surgery. De Morgan's Law would have to be applied, and + * all uses of the new predicate would have to be inverted. The + * information is available to make that possible (e.g., the flags + * liveness), but it's a lot more work. + */ + const enum opcode op = logic_inst->src[yon.src].negate ? + BRW_OPCODE_NOT : BRW_OPCODE_MOV; + + if (hither.inst->conditional_mod == BRW_CONDITIONAL_NONE && + (flags_read_between(hither.inst, logic_inst, s.devinfo) & + hither_flags) != 0) { + return false; + } + + /* It is safe to eliminate the logic operation. Perform the following + * steps: + * + * 1. If hither doesn't already write flags, set a conditional modifier on + * it, and set its flag_subreg. + * + * 2. Convert the logic operation to either a MOV or a NOT of the value + * taken from yon. + */ + if (hither.inst->conditional_mod == BRW_CONDITIONAL_NONE) { + hither.inst->conditional_mod = hither.inst->opcode != BRW_OPCODE_BFN ? + BRW_CONDITIONAL_NZ : BRW_CONDITIONAL_G; + hither.inst->flag_subreg = logic_inst->flag_subreg; + + assert(hither_flags == hither.inst->flags_written(s.devinfo)); + } + + set_predicate_inv(BRW_PREDICATE_NORMAL, + logic_inst->opcode == BRW_OPCODE_OR, + logic_inst); + logic_inst->src[0] = logic_inst->src[yon.src]; + logic_inst->src[0].negate = false; + + brw_transform_inst(s, logic_inst, op); + return true; +} + +static bool +try_predicate_and(brw_shader &s, brw_inst *inst, + const brw_live_variables &live_vars, + const brw_def_analysis &defs) +{ + if (inst->conditional_mod != BRW_CONDITIONAL_NZ) + return false; + + if (regions_overlap(inst->src[0], inst->size_read(s.devinfo, 0), + inst->src[1], inst->size_read(s.devinfo, 1))) { + return false; + } + /* These names are annoying. Some compilers secretly have "near" and "far" + * as reserved words, so those can't be used. + */ + logic_source hither = find_logic_source(inst, 0, defs, s.devinfo); + logic_source yon = find_logic_source(inst, 1, defs, s.devinfo); + + assert(hither.inst == NULL || hither.inst != yon.inst); + + if (hither.distance > yon.distance) + SWAP(hither, yon); + + /* The closer instruction must be in the same block. */ + if (hither.inst == NULL || hither.distance == UINT_MAX) + return false; + + /* If the logical operation is AND, one of the comparisons must be provably + * a Boolean value (i.e., 0 or ~0). This is the only way to be sure A&B != + * 0 is equivalent to (A != 0) && (B != 0). + */ + if (inst->opcode == BRW_OPCODE_AND && + !is_Boolean(hither.inst, defs) && !is_Boolean(yon.inst, defs)) + return false; + + /* If hither doesn't write any flags yet, determine what flags it would + * write. + */ + unsigned hither_flags = hither.inst->flags_written(s.devinfo); + if (hither_flags == 0) { + assert(hither.inst->conditional_mod == BRW_CONDITIONAL_NONE); + assert(hither.inst->flag_subreg == 0); + + hither.inst->conditional_mod = hither.inst->opcode != BRW_OPCODE_BFN ? + BRW_CONDITIONAL_NZ : BRW_CONDITIONAL_G; + hither.inst->flag_subreg = inst->flag_subreg; + + hither_flags = hither.inst->flags_written(s.devinfo); + + hither.inst->conditional_mod = BRW_CONDITIONAL_NONE; + hither.inst->flag_subreg = 0; + } + + /* The flags written by hither must reach the logic operation. */ + if ((flags_written_between(hither.inst, inst, s.devinfo) & + hither_flags) != 0) + return false; + + if (!inst->src[0].negate && !inst->src[1].negate && + try_predicated_cmp(s, live_vars, defs, inst, hither, yon, + hither_flags)) { + return true; + } + + if (!inst->src[hither.src].negate && + try_predicated_mov(s, live_vars, defs, inst, hither, yon, + hither_flags)) { + return true; + } + + return false; +} + +static bool +opt_predicate_logic_local(brw_shader &s, bblock_t *block, + const brw_live_variables &live_vars, + const brw_def_analysis &defs) +{ + bool progress = false; + + foreach_inst_in_block_reverse_safe(brw_inst, inst, block) { + switch (inst->opcode) { + case BRW_OPCODE_AND: + case BRW_OPCODE_OR: + if (inst->predicate == BRW_PREDICATE_NONE && + inst->dst.is_null() && + brw_type_size_bytes(inst->src[0].type) == 4 && + brw_type_size_bytes(inst->src[1].type) == 4 && + !inst->src[0].abs && !inst->src[1].abs) { + if (try_predicate_and(s, inst, live_vars, defs)) + progress = true; + } + + break; + + default: + break; + } + } + + return progress; +} + +bool +brw_opt_predicate_logic(brw_shader &s) +{ + bool progress = false; + const brw_live_variables &live_vars = s.live_analysis.require(); + const brw_def_analysis &defs = s.def_analysis.require(); + + foreach_block (block, s.cfg) { + if (opt_predicate_logic_local(s, block, live_vars, defs)) + progress = true; + } + + if (progress) + s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS); + + return progress; +} diff --git a/src/intel/compiler/brw/brw_shader.h b/src/intel/compiler/brw/brw_shader.h index 0cac6233ef2..bfe6a41eeb3 100644 --- a/src/intel/compiler/brw/brw_shader.h +++ b/src/intel/compiler/brw/brw_shader.h @@ -372,6 +372,7 @@ bool brw_opt_cse_defs(brw_shader &s); bool brw_opt_dead_code_eliminate(brw_shader &s); bool brw_opt_eliminate_find_live_channel(brw_shader &s); bool brw_opt_fill_and_spill(brw_shader &s); +bool brw_opt_predicate_logic(brw_shader &s); bool brw_opt_register_coalesce(brw_shader &s); bool brw_opt_remove_extra_rounding_modes(brw_shader &s); bool brw_opt_remove_redundant_halts(brw_shader &s); diff --git a/src/intel/compiler/brw/meson.build b/src/intel/compiler/brw/meson.build index dea40949c08..b8a598ea82f 100644 --- a/src/intel/compiler/brw/meson.build +++ b/src/intel/compiler/brw/meson.build @@ -84,6 +84,7 @@ libintel_compiler_brw_files = files( 'brw_opt_cse.cpp', 'brw_opt_dead_code_eliminate.cpp', 'brw_opt_fill_spill.cpp', + 'brw_opt_predicate_logic.cpp', 'brw_opt_register_coalesce.cpp', 'brw_opt_saturate_propagation.cpp', 'brw_opt_txf_combiner.cpp',