From 67f0fc62fdd4c4780b4fa5dc8bed896ca5ba9fc1 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 18 Nov 2025 20:09:29 -0800 Subject: [PATCH] brw: Replace logical operations with predication There is more to do here. A few things I have noticed. 1. There are cases where the ideal pass cannot make progress, but the "logic op to predicated move" pass can. Sometimes scheduling can rearrange this to sequences like: cmp.nz.f0.0(16) g99<1>F g98<1,1,0>F 0x3f800000F cmp.g.f0.0(16) null<1>HF g106<16,16,1>HF 0x0000HF (+f0.0) mov.nz.f0.0(16) null<1>UD g99<8,8,1>UD We should be able to detect this after scheduling, and eliminate the mov.nz. 2. We should extend post-scheduling cmod propagation to handle cases where a predicated CMP is the only use of an ALU result. I have observed sequences like and(16) v5200:UD v5048+6.0:UD 134217726u (+f0.0) cmp.z.f0.0(16) null:D v5200:D 0d and or(16) g113<1>UD g112<1,1,0>UD g20<1,1,0>UD (-f0.0) mov.nz.f0.0(16) null<1>UD g113<8,8,1>UD v2: Don't allow SEL or CSEL in is_valid_logic_source. No shader-db or fossil-db changes here, but this prevents problems with (possible) future commits. v3: Replace hither and yon with nearer and farther. Find both logic sources in one loop. Use brw_flags_written. Refactor cmod selection (for BFN vs all other opcodes) to separate function. Suggested by Caio. v4: Actually remove flags_written now. Suggested (twice) by Caio. Require that flags written by the nearer logic source matches the flags written by the logic operation. This fixes cmp_flag_subreg_mismatch and mov_flag_subreg_mismatch (added in the next commit). Also s/inst->src[0]/inst->src[src]/. Noticed by Caio. v5: flags_read was also unused. Noticed by marge. shader-db: Lunar Lake total instructions in shared programs: 17083282 -> 17072645 (-0.06%) instructions in affected programs: 2076491 -> 2065854 (-0.51%) helped: 3952 / HURT: 0 total cycles in shared programs: 887823360 -> 889080938 (0.14%) cycles in affected programs: 472236518 -> 473494096 (0.27%) helped: 3156 / HURT: 936 total fills in shared programs: 1778 -> 1778 (0.00%) fills in affected programs: 286 -> 286 (0.00%) helped: 2 / HURT: 2 LOST: 27 GAINED: 14 Meteor Lake and DG2 had similar results. (Meteor Lake shown) total instructions in shared programs: 19980337 -> 19965369 (-0.07%) instructions in affected programs: 2406043 -> 2391075 (-0.62%) helped: 4621 / HURT: 7 total cycles in shared programs: 887416449 -> 887170456 (-0.03%) cycles in affected programs: 457957623 -> 457711630 (-0.05%) helped: 3776 / HURT: 1039 total fills in shared programs: 4371 -> 4375 (0.09%) fills in affected programs: 798 -> 802 (0.50%) helped: 4 / HURT: 6 LOST: 15 GAINED: 1 Tiger Lake total instructions in shared programs: 19904512 -> 19889603 (-0.07%) instructions in affected programs: 2405908 -> 2390999 (-0.62%) helped: 4616 / HURT: 22 total cycles in shared programs: 864580948 -> 863953289 (-0.07%) cycles in affected programs: 459500521 -> 458872862 (-0.14%) helped: 3710 / HURT: 1093 total spills in shared programs: 3467 -> 3472 (0.14%) spills in affected programs: 15 -> 20 (33.33%) helped: 0 / HURT: 1 total fills in shared programs: 2059 -> 2069 (0.49%) fills in affected programs: 47 -> 57 (21.28%) helped: 0 / HURT: 1 LOST: 11 GAINED: 9 Ice Lake total instructions in shared programs: 20821682 -> 20806373 (-0.07%) instructions in affected programs: 2447072 -> 2431763 (-0.63%) helped: 4741 / HURT: 1 total cycles in shared programs: 876811334 -> 876360389 (-0.05%) cycles in affected programs: 438363075 -> 437912130 (-0.10%) helped: 4000 / HURT: 724 total fills in shared programs: 3837 -> 3835 (-0.05%) fills in affected programs: 302 -> 300 (-0.66%) helped: 1 / HURT: 0 LOST: 12 GAINED: 9 Skylake total instructions in shared programs: 19041784 -> 19026462 (-0.08%) instructions in affected programs: 2397491 -> 2382169 (-0.64%) helped: 4711 / HURT: 0 total cycles in shared programs: 868019298 -> 867790279 (-0.03%) cycles in affected programs: 441110462 -> 440881443 (-0.05%) helped: 3915 / HURT: 788 total fills in shared programs: 3767 -> 3765 (-0.05%) fills in affected programs: 302 -> 300 (-0.66%) helped: 1 / HURT: 0 LOST: 4 GAINED: 3 fossil-db: Lunar Lake Totals: Instrs: 924697067 -> 922488661 (-0.24%); split: -0.25%, +0.01% Subgroup size: 40939424 -> 40939744 (+0.00%) Cycle count: 106291402322 -> 105964111203 (-0.31%); split: -0.66%, +0.35% Spill count: 3423988 -> 3421004 (-0.09%); split: -0.34%, +0.25% Fill count: 4877087 -> 4862981 (-0.29%); split: -1.21%, +0.92% Max live registers: 193812217 -> 193805296 (-0.00%) Max dispatch width: 49089184 -> 49085216 (-0.01%); split: +0.01%, -0.02% Totals from 453746 (22.47% of 2019504) affected shaders: Instrs: 529674876 -> 527466470 (-0.42%); split: -0.43%, +0.02% Subgroup size: 320 -> 640 (+100.00%) Cycle count: 87892218969 -> 87564927850 (-0.37%); split: -0.79%, +0.42% Spill count: 3302695 -> 3299711 (-0.09%); split: -0.35%, +0.26% Fill count: 4778154 -> 4764048 (-0.30%); split: -1.23%, +0.94% Max live registers: 65405449 -> 65398528 (-0.01%) Max dispatch width: 10793104 -> 10789136 (-0.04%); split: +0.04%, -0.08% Meteor Lake and DG2 had similar results. (Meteor Lake shown) Totals: Instrs: 998057341 -> 995683321 (-0.24%); split: -0.25%, +0.01% Subgroup size: 27545440 -> 27545656 (+0.00%) Cycle count: 93854696449 -> 93709099572 (-0.16%); split: -0.62%, +0.46% Spill count: 3709547 -> 3701296 (-0.22%); split: -0.50%, +0.28% Fill count: 5032889 -> 5014189 (-0.37%); split: -1.28%, +0.91% Max live registers: 121823974 -> 121810927 (-0.01%) Max dispatch width: 38021936 -> 38020536 (-0.00%); split: +0.06%, -0.07% Totals from 505565 (22.13% of 2284025) affected shaders: Instrs: 549480901 -> 547106881 (-0.43%); split: -0.45%, +0.02% Subgroup size: 216 -> 432 (+100.00%) Cycle count: 76260069937 -> 76114473060 (-0.19%); split: -0.76%, +0.57% Spill count: 3526038 -> 3517787 (-0.23%); split: -0.53%, +0.29% Fill count: 4844826 -> 4826126 (-0.39%); split: -1.33%, +0.94% Max live registers: 38085235 -> 38072188 (-0.03%) Max dispatch width: 8015432 -> 8014032 (-0.02%); split: +0.30%, -0.32% Tiger Lake Totals: Instrs: 1013436935 -> 1011070083 (-0.23%); split: -0.25%, +0.02% Cycle count: 85763486346 -> 85580242131 (-0.21%); split: -0.68%, +0.47% Spill count: 3903905 -> 3902350 (-0.04%); split: -0.36%, +0.32% Fill count: 6801966 -> 6787600 (-0.21%); split: -0.70%, +0.49% Max live registers: 122298352 -> 122284634 (-0.01%) Max dispatch width: 37957184 -> 37964608 (+0.02%); split: +0.10%, -0.08% Totals from 525103 (23.03% of 2280298) affected shaders: Instrs: 570013347 -> 567646495 (-0.42%); split: -0.44%, +0.03% Cycle count: 71392808767 -> 71209564552 (-0.26%); split: -0.82%, +0.56% Spill count: 3757751 -> 3756196 (-0.04%); split: -0.38%, +0.33% Fill count: 6648525 -> 6634159 (-0.22%); split: -0.72%, +0.51% Max live registers: 39876402 -> 39862684 (-0.03%) Max dispatch width: 8453816 -> 8461240 (+0.09%); split: +0.44%, -0.36% Ice Lake Totals: Instrs: 1014312031 -> 1011938992 (-0.23%); split: -0.24%, +0.01% Cycle count: 86550003161 -> 86343662349 (-0.24%); split: -0.39%, +0.15% Spill count: 3039497 -> 3035267 (-0.14%); split: -0.33%, +0.19% Fill count: 5376655 -> 5370235 (-0.12%); split: -0.43%, +0.32% Max live registers: 125551684 -> 125537675 (-0.01%) Max dispatch width: 41300016 -> 41301552 (+0.00%); split: +0.02%, -0.02% Totals from 537158 (23.01% of 2334535) affected shaders: Instrs: 555656911 -> 553283872 (-0.43%); split: -0.44%, +0.01% Cycle count: 71869799780 -> 71663458968 (-0.29%); split: -0.47%, +0.19% Spill count: 2844469 -> 2840239 (-0.15%); split: -0.35%, +0.20% Fill count: 5006995 -> 5000575 (-0.13%); split: -0.47%, +0.34% Max live registers: 39809729 -> 39795720 (-0.04%) Max dispatch width: 9226240 -> 9227776 (+0.02%); split: +0.10%, -0.08% Skylake Totals: Instrs: 519584256 -> 518938991 (-0.12%); split: -0.13%, +0.00% Cycle count: 57935410863 -> 57867852550 (-0.12%); split: -0.22%, +0.10% Spill count: 636741 -> 636728 (-0.00%); split: -0.06%, +0.06% Fill count: 860470 -> 860314 (-0.02%); split: -0.19%, +0.17% Max live registers: 87895659 -> 87889485 (-0.01%) Max dispatch width: 32565912 -> 32567080 (+0.00%); split: +0.03%, -0.03% Totals from 235957 (13.59% of 1736653) affected shaders: Instrs: 158020578 -> 157375313 (-0.41%); split: -0.41%, +0.00% Cycle count: 44881056772 -> 44813498459 (-0.15%); split: -0.28%, +0.13% Spill count: 461098 -> 461085 (-0.00%); split: -0.09%, +0.09% Fill count: 601255 -> 601099 (-0.03%); split: -0.27%, +0.24% Max live registers: 16143628 -> 16137454 (-0.04%) Max dispatch width: 4664240 -> 4665408 (+0.03%); split: +0.20%, -0.17% Reviewed-by: Caio Oliveira Part-of: --- src/intel/compiler/brw/brw_opt.cpp | 10 + .../compiler/brw/brw_opt_predicate_logic.cpp | 492 ++++++++++++++++++ src/intel/compiler/brw/brw_shader.h | 1 + src/intel/compiler/brw/meson.build | 1 + 4 files changed, 504 insertions(+) create mode 100644 src/intel/compiler/brw/brw_opt_predicate_logic.cpp diff --git a/src/intel/compiler/brw/brw_opt.cpp b/src/intel/compiler/brw/brw_opt.cpp index 5dfdeeb6372..4558082c275 100644 --- a/src/intel/compiler/brw/brw_opt.cpp +++ b/src/intel/compiler/brw/brw_opt.cpp @@ -98,6 +98,16 @@ brw_optimize(brw_shader &s) OPT(brw_opt_dead_code_eliminate); } + while (OPT(brw_opt_predicate_logic)) { + /* The dead code elimination after brw_opt_predicate_logic can cause the + * first comparison in the set to have a NULL destination. That can make + * it a candidate for additional brw_opt_cmod_propagation and additional + * brw_opt_predicate_logic. + */ + if (OPT(brw_opt_dead_code_eliminate) && OPT(brw_opt_cmod_propagation)) + OPT(brw_opt_dead_code_eliminate); + } + if (OPT(brw_lower_pack)) { OPT(brw_opt_register_coalesce); OPT(brw_opt_dead_code_eliminate); diff --git a/src/intel/compiler/brw/brw_opt_predicate_logic.cpp b/src/intel/compiler/brw/brw_opt_predicate_logic.cpp new file mode 100644 index 00000000000..fc7ccd9c571 --- /dev/null +++ b/src/intel/compiler/brw/brw_opt_predicate_logic.cpp @@ -0,0 +1,492 @@ +/* + * Copyright © 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "brw_shader.h" +#include "brw_analysis.h" +#include "brw_cfg.h" + +struct logic_source { + brw_inst *inst = NULL; + unsigned distance = 0; + unsigned src = 0; +}; + +static bool +is_used_once(brw_inst *inst, const intel_device_info *devinfo, + const brw_live_variables &live_vars, + const brw_def_analysis &defs) +{ + unsigned use_count = defs.get_use_count(inst->dst); + if (use_count != 0) + return use_count == 1; + + /* If there are any uses outside the block, fail. */ + if (BITSET_TEST(live_vars.block_data[inst->block->num].liveout, + live_vars.var_from_reg(inst->dst))) + return false; + + foreach_inst_in_block_starting_from(brw_inst, scan_inst, inst) { + for (unsigned i = 0; i < scan_inst->sources; i++) { + if (regions_overlap(inst->dst, inst->size_written, + scan_inst->src[i], scan_inst->size_read(devinfo, i))) { + use_count++; + } + } + + if (use_count > 1) + return false; + } + + assert(use_count == 1); + return true; +} + +static bool +is_Boolean(brw_inst *inst, const brw_def_analysis &defs) +{ + if (inst == NULL) + return false; + + switch (inst->opcode) { + case BRW_OPCODE_CMP: + case BRW_OPCODE_CMPN: + return true; + + case BRW_OPCODE_AND: + case BRW_OPCODE_NOT: + case BRW_OPCODE_OR: + case BRW_OPCODE_SEL: + case BRW_OPCODE_XOR: { + for (unsigned i = 0; i < inst->sources; i++) { + brw_inst *def = defs.get(inst->src[i]); + + if (def == NULL) + return false; + + if (def->opcode != BRW_OPCODE_CMP && + def->opcode != BRW_OPCODE_CMPN) + return false; + } + + return true; + } + + default: + return false; + } +} + +/** + * Calculate the flags read between two instructions. + * + * Flags read by \c begin or \c end are \b not included in the return value. + */ +static unsigned +flags_read_between(brw_inst *begin, brw_inst *end, + const intel_device_info *devinfo) +{ + unsigned flags_read = 0; + + foreach_inst_in_block_starting_from(brw_inst, inst, begin) { + if (inst == end) + return flags_read; + + flags_read |= inst->flags_read(devinfo); + } + + if (end == NULL) + return flags_read; + + UNREACHABLE("end does not occur after begin in the same block."); +} + +/** + * Calculate the flags written between two instructions. + * + * Flags written by \c begin or \c end are \b not included in the return value. + */ +static unsigned +flags_written_between(brw_inst *begin, brw_inst *end, + const intel_device_info *devinfo) +{ + unsigned flags_written = 0; + + foreach_inst_in_block_starting_from(brw_inst, inst, begin) { + if (inst == end) + return flags_written; + + flags_written |= inst->flags_written(devinfo); + } + + if (end == NULL) + return flags_written; + + UNREACHABLE("end does not occur after begin in the same block."); +} + +static enum brw_conditional_mod +required_cmod(enum opcode op) +{ + return op == BRW_OPCODE_BFN ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_NZ; +} + +static bool +is_valid_logic_source(const brw_inst *inst) +{ + if (inst->opcode == BRW_OPCODE_CMP || inst->opcode == BRW_OPCODE_CMPN) + return true; + + /* Especially CSEL.NZ can confuse some of the checks below. Rejecting SEL + * and CSEL here keeps that code more clear. + */ + if (inst->opcode == BRW_OPCODE_SEL || inst->opcode == BRW_OPCODE_CSEL) + return false; + + /* The flags will be used as a proxy for the value produced by the + * instruction. At the end, the instruction must have a + * conditional modifier of NZ (G for BFN). + */ + const enum brw_conditional_mod req_cmod = required_cmod(inst->opcode); + + return (inst->conditional_mod == BRW_CONDITIONAL_NONE && + inst->can_do_cmod(req_cmod)) || inst->conditional_mod == req_cmod; +} + +static void +find_logic_sources(brw_inst *inst, const brw_def_analysis &defs, + const intel_device_info *devinfo, + logic_source *nearer, logic_source *farther) +{ + unsigned distance = 0; + const unsigned size_read[2] = { + inst->size_read(devinfo, 0), + inst->size_read(devinfo, 1), + }; + int lo = 0; + int hi = 1; + logic_source ls[2]; + + foreach_inst_in_block_reverse_starting_from(brw_inst, scan_inst, inst) { + distance++; + + for (int src = lo; src <= hi; src++) { + if (regions_overlap(scan_inst->dst, scan_inst->size_written, + inst->src[src], size_read[src])) { + if (!(scan_inst->is_partial_write() || + scan_inst->dst.offset != inst->src[src].offset || + scan_inst->exec_size != inst->exec_size || + !is_valid_logic_source(scan_inst))) { + ls[src] = logic_source { scan_inst, distance, (unsigned) src }; + } + + if (src == lo) + lo++; + else + hi--; + } + } + + if (lo > hi) + break; + } + + for (int src = lo; src <= hi; src++) { + brw_inst *def = defs.get(inst->src[src]); + if (def != NULL) { + assert(def->block != inst->block); + + if (def->is_partial_write() || + def->dst.offset != inst->src[src].offset || + def->exec_size != inst->exec_size || + !is_valid_logic_source(def)) { + def = NULL; + } + } + + ls[src] = logic_source { def, UINT_MAX, (unsigned) src }; + } + + assert(ls[0].inst == NULL || ls[0].inst != ls[1].inst); + + if (ls[0].distance > ls[1].distance) + SWAP(ls[0], ls[1]); + + *nearer = ls[0]; + *farther = ls[1]; +} + +static bool +try_predicated_cmp(brw_shader &s, const brw_live_variables &live_vars, + const brw_def_analysis &defs, + brw_inst *logic_inst, logic_source &nearer, + logic_source &farther, unsigned nearer_flags) +{ + /* For this path, the farther instruction must also be in the same block + * as the logic operation. + */ + if (farther.inst == NULL || farther.distance == UINT_MAX) + return false; + + /* If farther doesn't write any flags yet, determine what flags it would + * write. + */ + unsigned farther_flags = farther.inst->flags_written(s.devinfo); + if (farther_flags == 0) { + farther_flags = brw_flags_written(farther.inst->opcode, + required_cmod(farther.inst->opcode), + logic_inst->flag_subreg, + farther.inst->group, + farther.inst->exec_size); + } + + /* If farther does not already write flags, there must be no readers of the flags + * that it will write. + * + * A similar test for nearer is not necessary. It is already required + * that there be no uses of the flags produced by nearer. + */ + if (farther.inst->conditional_mod == BRW_CONDITIONAL_NONE && + (flags_read_between(farther.inst, nearer.inst, s.devinfo) & + farther_flags) != 0) { + return false; + } + + /* The flags written by farther must reach nearer. */ + if ((flags_written_between(farther.inst, nearer.inst, s.devinfo) & + farther_flags) != 0) + return false; + + /* The flags and the destination written by nearer must not be read by + * any instruction other than the logic operation. + */ + if (!is_used_once(nearer.inst, s.devinfo, live_vars, defs)) + return false; + + if ((nearer_flags & flags_read_between(nearer.inst, logic_inst, s.devinfo)) != 0) + return false; + + const unsigned flags_read_after_inst = + flags_read_between(logic_inst, NULL, s.devinfo) | + live_vars.block_data[logic_inst->block->num].flag_liveout[0]; + + if (flags_read_after_inst & (nearer_flags & + ~logic_inst->flags_written(s.devinfo))) + return false; + + /* It is safe to eliminate the logic operation. Perform the following + * steps: + * + * 1. If farther doesn't already write flags, set a conditional modifier on + * it, and set its flag_subreg. + * + * 2. If nearer doesn't already write flags, set a conditional modifier on + * it, and set its flag_subreg. + * + * 3. Make nearer's destination be the null register. + * + * 4. Make nearer be predicated. + * + * 5. Remove the logic operation. + */ + if (farther.inst->conditional_mod == BRW_CONDITIONAL_NONE) { + farther.inst->conditional_mod = required_cmod(farther.inst->opcode); + farther.inst->flag_subreg = logic_inst->flag_subreg; + + assert(farther_flags == farther.inst->flags_written(s.devinfo)); + } + + if (nearer.inst->conditional_mod == BRW_CONDITIONAL_NONE) { + nearer.inst->conditional_mod = required_cmod(nearer.inst->opcode); + nearer.inst->flag_subreg = logic_inst->flag_subreg; + + assert(nearer_flags == nearer.inst->flags_written(s.devinfo)); + } + + nearer.inst->dst = retype(brw_null_reg(), nearer.inst->dst.type); + + set_predicate_inv(BRW_PREDICATE_NORMAL, + logic_inst->opcode == BRW_OPCODE_OR, + nearer.inst); + + assert((nearer.inst->flags_read(s.devinfo) & + ~farther.inst->flags_written(s.devinfo)) == 0); + + logic_inst->remove(); + return true; +} + +static bool +try_predicated_mov(brw_shader &s, const brw_live_variables &live_vars, + const brw_def_analysis &defs, + brw_inst *logic_inst, logic_source &nearer, + logic_source &farther, unsigned nearer_flags) +{ + /* Cases like + * + * cmp.g.f0.0(8) v946:F, |v945|:F, 0f + * and.nz.f0.0(8) null:UD, -v869:UD, v946:UD + * + * can be handled by replacing the AND instruction with a predicated NOT + * instead of a predicated MOV. + * + * NOTE: ~x != 0 is not the same as x == 0 when x is not known to be a + * Boolean value. Since farther may not be a CMP/CMPN, this is important. + * + * However, cases where the other source is negated would require more + * complicated surgery. De Morgan's Law would have to be applied, and + * all uses of the new predicate would have to be inverted. The + * information is available to make that possible (e.g., the flags + * liveness), but it's a lot more work. + */ + const enum opcode op = logic_inst->src[farther.src].negate ? + BRW_OPCODE_NOT : BRW_OPCODE_MOV; + + if (nearer.inst->conditional_mod == BRW_CONDITIONAL_NONE && + (flags_read_between(nearer.inst, logic_inst, s.devinfo) & + nearer_flags) != 0) { + return false; + } + + /* It is safe to eliminate the logic operation. Perform the following + * steps: + * + * 1. If nearer doesn't already write flags, set a conditional modifier on + * it, and set its flag_subreg. + * + * 2. Convert the logic operation to either a MOV or a NOT of the value + * taken from farther. + */ + if (nearer.inst->conditional_mod == BRW_CONDITIONAL_NONE) { + nearer.inst->conditional_mod = required_cmod(nearer.inst->opcode); + nearer.inst->flag_subreg = logic_inst->flag_subreg; + + assert(nearer_flags == nearer.inst->flags_written(s.devinfo)); + } + + set_predicate_inv(BRW_PREDICATE_NORMAL, + logic_inst->opcode == BRW_OPCODE_OR, + logic_inst); + logic_inst->src[0] = logic_inst->src[farther.src]; + logic_inst->src[0].negate = false; + + brw_transform_inst(s, logic_inst, op); + return true; +} + +static bool +try_predicate_and(brw_shader &s, brw_inst *inst, + const brw_live_variables &live_vars, + const brw_def_analysis &defs) +{ + if (inst->conditional_mod != BRW_CONDITIONAL_NZ) + return false; + + if (regions_overlap(inst->src[0], inst->size_read(s.devinfo, 0), + inst->src[1], inst->size_read(s.devinfo, 1))) { + return false; + } + /* These names are annoying. Some compilers secretly have "near" and "far" + * as reserved words, so those can't be used. + */ + logic_source nearer; + logic_source farther; + + find_logic_sources(inst, defs, s.devinfo, &nearer, &farther); + + /* The closer instruction must be in the same block. */ + if (nearer.inst == NULL || nearer.distance == UINT_MAX) + return false; + + /* If the logical operation is AND, one of the comparisons must be provably + * a Boolean value (i.e., 0 or ~0). This is the only way to be sure A&B != + * 0 is equivalent to (A != 0) && (B != 0). + */ + if (inst->opcode == BRW_OPCODE_AND && + !is_Boolean(nearer.inst, defs) && !is_Boolean(farther.inst, defs)) + return false; + + /* If nearer doesn't write any flags yet, determine what flags it would + * write. + */ + unsigned nearer_flags = nearer.inst->flags_written(s.devinfo); + if (nearer_flags == 0) { + nearer_flags = brw_flags_written(nearer.inst->opcode, + required_cmod(nearer.inst->opcode), + inst->flag_subreg, + nearer.inst->group, + nearer.inst->exec_size); + } + + unsigned flags_written = inst->flags_written(s.devinfo); + if ((nearer_flags & flags_written) != flags_written) + return false; + + /* The flags written by nearer must reach the logic operation. */ + if ((flags_written_between(nearer.inst, inst, s.devinfo) & + nearer_flags) != 0) + return false; + + if (!inst->src[0].negate && !inst->src[1].negate && + try_predicated_cmp(s, live_vars, defs, inst, nearer, farther, + nearer_flags)) { + return true; + } + + if (!inst->src[nearer.src].negate && + try_predicated_mov(s, live_vars, defs, inst, nearer, farther, + nearer_flags)) { + return true; + } + + return false; +} + +static bool +opt_predicate_logic_local(brw_shader &s, bblock_t *block, + const brw_live_variables &live_vars, + const brw_def_analysis &defs) +{ + bool progress = false; + + foreach_inst_in_block_reverse_safe(brw_inst, inst, block) { + switch (inst->opcode) { + case BRW_OPCODE_AND: + case BRW_OPCODE_OR: + if (inst->predicate == BRW_PREDICATE_NONE && + inst->dst.is_null() && + brw_type_size_bytes(inst->src[0].type) == 4 && + brw_type_size_bytes(inst->src[1].type) == 4 && + !inst->src[0].abs && !inst->src[1].abs) { + if (try_predicate_and(s, inst, live_vars, defs)) + progress = true; + } + + break; + + default: + break; + } + } + + return progress; +} + +bool +brw_opt_predicate_logic(brw_shader &s) +{ + bool progress = false; + const brw_live_variables &live_vars = s.live_analysis.require(); + const brw_def_analysis &defs = s.def_analysis.require(); + + foreach_block (block, s.cfg) { + if (opt_predicate_logic_local(s, block, live_vars, defs)) + progress = true; + } + + if (progress) + s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS); + + return progress; +} diff --git a/src/intel/compiler/brw/brw_shader.h b/src/intel/compiler/brw/brw_shader.h index f5049598354..2fa99e16aba 100644 --- a/src/intel/compiler/brw/brw_shader.h +++ b/src/intel/compiler/brw/brw_shader.h @@ -367,6 +367,7 @@ bool brw_opt_cse_defs(brw_shader &s); bool brw_opt_dead_code_eliminate(brw_shader &s); bool brw_opt_eliminate_find_live_channel(brw_shader &s); bool brw_opt_fill_and_spill(brw_shader &s); +bool brw_opt_predicate_logic(brw_shader &s); bool brw_opt_register_coalesce(brw_shader &s); bool brw_opt_remove_extra_rounding_modes(brw_shader &s); bool brw_opt_remove_redundant_halts(brw_shader &s); diff --git a/src/intel/compiler/brw/meson.build b/src/intel/compiler/brw/meson.build index 4af4e46305b..b3c5a288550 100644 --- a/src/intel/compiler/brw/meson.build +++ b/src/intel/compiler/brw/meson.build @@ -78,6 +78,7 @@ libintel_compiler_brw_files = files( 'brw_opt_cse.cpp', 'brw_opt_dead_code_eliminate.cpp', 'brw_opt_fill_spill.cpp', + 'brw_opt_predicate_logic.cpp', 'brw_opt_register_coalesce.cpp', 'brw_opt_saturate_propagation.cpp', 'brw_opt_txf_combiner.cpp',