brw: Replace logical operations with predication

There is more to do here. A few things I have noticed.

1. There are cases where the ideal pass cannot make progress, but the
   "logic op to predicated move" pass can. Sometimes scheduling can
   rearrange this to sequences like:

            cmp.nz.f0.0(16) g99<1>F       g98<1,1,0>F     0x3f800000F
            cmp.g.f0.0(16)  null<1>HF     g106<16,16,1>HF 0x0000HF
    (+f0.0) mov.nz.f0.0(16) null<1>UD     g99<8,8,1>UD

  We should be able to detect this after scheduling, and eliminate the
  mov.nz.

2. We should extend post-scheduling cmod propagation to handle cases
   where a predicated CMP is the only use of an ALU result. I have
   observed sequences like

            and(16)        v5200:UD       v5048+6.0:UD    134217726u
    (+f0.0) cmp.z.f0.0(16) null:D         v5200:D         0d

   and

            or(16)          g113<1>UD     g112<1,1,0>UD   g20<1,1,0>UD
    (-f0.0) mov.nz.f0.0(16) null<1>UD     g113<8,8,1>UD

v2: Don't allow SEL or CSEL in is_valid_logic_source. No shader-db or
fossil-db changes here, but this prevents problems with (possible)
future commits.

v3: Replace hither and yon with nearer and farther. Find both logic
sources in one loop. Use brw_flags_written. Refactor cmod selection (for
BFN vs all other opcodes) to separate function. Suggested by Caio.

v4: Actually remove flags_written now. Suggested (twice) by Caio.
Require that flags written by the nearer logic source matches the flags
written by the logic operation. This fixes cmp_flag_subreg_mismatch and
mov_flag_subreg_mismatch (added in the next commit). Also
s/inst->src[0]/inst->src[src]/. Noticed by Caio.

v5: flags_read was also unused. Noticed by marge.

shader-db:

Lunar Lake
total instructions in shared programs: 17083282 -> 17072645 (-0.06%)
instructions in affected programs: 2076491 -> 2065854 (-0.51%)
helped: 3952 / HURT: 0

total cycles in shared programs: 887823360 -> 889080938 (0.14%)
cycles in affected programs: 472236518 -> 473494096 (0.27%)
helped: 3156 / HURT: 936

total fills in shared programs: 1778 -> 1778 (0.00%)
fills in affected programs: 286 -> 286 (0.00%)
helped: 2 / HURT: 2

LOST:   27
GAINED: 14

Meteor Lake and DG2 had similar results. (Meteor Lake shown)
total instructions in shared programs: 19980337 -> 19965369 (-0.07%)
instructions in affected programs: 2406043 -> 2391075 (-0.62%)
helped: 4621 / HURT: 7

total cycles in shared programs: 887416449 -> 887170456 (-0.03%)
cycles in affected programs: 457957623 -> 457711630 (-0.05%)
helped: 3776 / HURT: 1039

total fills in shared programs: 4371 -> 4375 (0.09%)
fills in affected programs: 798 -> 802 (0.50%)
helped: 4 / HURT: 6

LOST:   15
GAINED: 1

Tiger Lake
total instructions in shared programs: 19904512 -> 19889603 (-0.07%)
instructions in affected programs: 2405908 -> 2390999 (-0.62%)
helped: 4616 / HURT: 22

total cycles in shared programs: 864580948 -> 863953289 (-0.07%)
cycles in affected programs: 459500521 -> 458872862 (-0.14%)
helped: 3710 / HURT: 1093

total spills in shared programs: 3467 -> 3472 (0.14%)
spills in affected programs: 15 -> 20 (33.33%)
helped: 0 / HURT: 1

total fills in shared programs: 2059 -> 2069 (0.49%)
fills in affected programs: 47 -> 57 (21.28%)
helped: 0 / HURT: 1

LOST:   11
GAINED: 9

Ice Lake
total instructions in shared programs: 20821682 -> 20806373 (-0.07%)
instructions in affected programs: 2447072 -> 2431763 (-0.63%)
helped: 4741 / HURT: 1

total cycles in shared programs: 876811334 -> 876360389 (-0.05%)
cycles in affected programs: 438363075 -> 437912130 (-0.10%)
helped: 4000 / HURT: 724

total fills in shared programs: 3837 -> 3835 (-0.05%)
fills in affected programs: 302 -> 300 (-0.66%)
helped: 1 / HURT: 0

LOST:   12
GAINED: 9

Skylake
total instructions in shared programs: 19041784 -> 19026462 (-0.08%)
instructions in affected programs: 2397491 -> 2382169 (-0.64%)
helped: 4711 / HURT: 0

total cycles in shared programs: 868019298 -> 867790279 (-0.03%)
cycles in affected programs: 441110462 -> 440881443 (-0.05%)
helped: 3915 / HURT: 788

total fills in shared programs: 3767 -> 3765 (-0.05%)
fills in affected programs: 302 -> 300 (-0.66%)
helped: 1 / HURT: 0

LOST:   4
GAINED: 3

fossil-db:

Lunar Lake
Totals:
Instrs: 924697067 -> 922488661 (-0.24%); split: -0.25%, +0.01%
Subgroup size: 40939424 -> 40939744 (+0.00%)
Cycle count: 106291402322 -> 105964111203 (-0.31%); split: -0.66%, +0.35%
Spill count: 3423988 -> 3421004 (-0.09%); split: -0.34%, +0.25%
Fill count: 4877087 -> 4862981 (-0.29%); split: -1.21%, +0.92%
Max live registers: 193812217 -> 193805296 (-0.00%)
Max dispatch width: 49089184 -> 49085216 (-0.01%); split: +0.01%, -0.02%

Totals from 453746 (22.47% of 2019504) affected shaders:
Instrs: 529674876 -> 527466470 (-0.42%); split: -0.43%, +0.02%
Subgroup size: 320 -> 640 (+100.00%)
Cycle count: 87892218969 -> 87564927850 (-0.37%); split: -0.79%, +0.42%
Spill count: 3302695 -> 3299711 (-0.09%); split: -0.35%, +0.26%
Fill count: 4778154 -> 4764048 (-0.30%); split: -1.23%, +0.94%
Max live registers: 65405449 -> 65398528 (-0.01%)
Max dispatch width: 10793104 -> 10789136 (-0.04%); split: +0.04%, -0.08%

Meteor Lake and DG2 had similar results. (Meteor Lake shown)
Totals:
Instrs: 998057341 -> 995683321 (-0.24%); split: -0.25%, +0.01%
Subgroup size: 27545440 -> 27545656 (+0.00%)
Cycle count: 93854696449 -> 93709099572 (-0.16%); split: -0.62%, +0.46%
Spill count: 3709547 -> 3701296 (-0.22%); split: -0.50%, +0.28%
Fill count: 5032889 -> 5014189 (-0.37%); split: -1.28%, +0.91%
Max live registers: 121823974 -> 121810927 (-0.01%)
Max dispatch width: 38021936 -> 38020536 (-0.00%); split: +0.06%, -0.07%

Totals from 505565 (22.13% of 2284025) affected shaders:
Instrs: 549480901 -> 547106881 (-0.43%); split: -0.45%, +0.02%
Subgroup size: 216 -> 432 (+100.00%)
Cycle count: 76260069937 -> 76114473060 (-0.19%); split: -0.76%, +0.57%
Spill count: 3526038 -> 3517787 (-0.23%); split: -0.53%, +0.29%
Fill count: 4844826 -> 4826126 (-0.39%); split: -1.33%, +0.94%
Max live registers: 38085235 -> 38072188 (-0.03%)
Max dispatch width: 8015432 -> 8014032 (-0.02%); split: +0.30%, -0.32%

Tiger Lake
Totals:
Instrs: 1013436935 -> 1011070083 (-0.23%); split: -0.25%, +0.02%
Cycle count: 85763486346 -> 85580242131 (-0.21%); split: -0.68%, +0.47%
Spill count: 3903905 -> 3902350 (-0.04%); split: -0.36%, +0.32%
Fill count: 6801966 -> 6787600 (-0.21%); split: -0.70%, +0.49%
Max live registers: 122298352 -> 122284634 (-0.01%)
Max dispatch width: 37957184 -> 37964608 (+0.02%); split: +0.10%, -0.08%

Totals from 525103 (23.03% of 2280298) affected shaders:
Instrs: 570013347 -> 567646495 (-0.42%); split: -0.44%, +0.03%
Cycle count: 71392808767 -> 71209564552 (-0.26%); split: -0.82%, +0.56%
Spill count: 3757751 -> 3756196 (-0.04%); split: -0.38%, +0.33%
Fill count: 6648525 -> 6634159 (-0.22%); split: -0.72%, +0.51%
Max live registers: 39876402 -> 39862684 (-0.03%)
Max dispatch width: 8453816 -> 8461240 (+0.09%); split: +0.44%, -0.36%

Ice Lake
Totals:
Instrs: 1014312031 -> 1011938992 (-0.23%); split: -0.24%, +0.01%
Cycle count: 86550003161 -> 86343662349 (-0.24%); split: -0.39%, +0.15%
Spill count: 3039497 -> 3035267 (-0.14%); split: -0.33%, +0.19%
Fill count: 5376655 -> 5370235 (-0.12%); split: -0.43%, +0.32%
Max live registers: 125551684 -> 125537675 (-0.01%)
Max dispatch width: 41300016 -> 41301552 (+0.00%); split: +0.02%, -0.02%

Totals from 537158 (23.01% of 2334535) affected shaders:
Instrs: 555656911 -> 553283872 (-0.43%); split: -0.44%, +0.01%
Cycle count: 71869799780 -> 71663458968 (-0.29%); split: -0.47%, +0.19%
Spill count: 2844469 -> 2840239 (-0.15%); split: -0.35%, +0.20%
Fill count: 5006995 -> 5000575 (-0.13%); split: -0.47%, +0.34%
Max live registers: 39809729 -> 39795720 (-0.04%)
Max dispatch width: 9226240 -> 9227776 (+0.02%); split: +0.10%, -0.08%

Skylake
Totals:
Instrs: 519584256 -> 518938991 (-0.12%); split: -0.13%, +0.00%
Cycle count: 57935410863 -> 57867852550 (-0.12%); split: -0.22%, +0.10%
Spill count: 636741 -> 636728 (-0.00%); split: -0.06%, +0.06%
Fill count: 860470 -> 860314 (-0.02%); split: -0.19%, +0.17%
Max live registers: 87895659 -> 87889485 (-0.01%)
Max dispatch width: 32565912 -> 32567080 (+0.00%); split: +0.03%, -0.03%

Totals from 235957 (13.59% of 1736653) affected shaders:
Instrs: 158020578 -> 157375313 (-0.41%); split: -0.41%, +0.00%
Cycle count: 44881056772 -> 44813498459 (-0.15%); split: -0.28%, +0.13%
Spill count: 461098 -> 461085 (-0.00%); split: -0.09%, +0.09%
Fill count: 601255 -> 601099 (-0.03%); split: -0.27%, +0.24%
Max live registers: 16143628 -> 16137454 (-0.04%)
Max dispatch width: 4664240 -> 4665408 (+0.03%); split: +0.20%, -0.17%

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39030>
This commit is contained in:
Ian Romanick 2025-11-18 20:09:29 -08:00 committed by Marge Bot
parent 539162936f
commit 67f0fc62fd
4 changed files with 504 additions and 0 deletions

View file

@ -98,6 +98,16 @@ brw_optimize(brw_shader &s)
OPT(brw_opt_dead_code_eliminate);
}
while (OPT(brw_opt_predicate_logic)) {
/* The dead code elimination after brw_opt_predicate_logic can cause the
* first comparison in the set to have a NULL destination. That can make
* it a candidate for additional brw_opt_cmod_propagation and additional
* brw_opt_predicate_logic.
*/
if (OPT(brw_opt_dead_code_eliminate) && OPT(brw_opt_cmod_propagation))
OPT(brw_opt_dead_code_eliminate);
}
if (OPT(brw_lower_pack)) {
OPT(brw_opt_register_coalesce);
OPT(brw_opt_dead_code_eliminate);

View file

@ -0,0 +1,492 @@
/*
* Copyright © 2025 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "brw_shader.h"
#include "brw_analysis.h"
#include "brw_cfg.h"
struct logic_source {
brw_inst *inst = NULL;
unsigned distance = 0;
unsigned src = 0;
};
static bool
is_used_once(brw_inst *inst, const intel_device_info *devinfo,
const brw_live_variables &live_vars,
const brw_def_analysis &defs)
{
unsigned use_count = defs.get_use_count(inst->dst);
if (use_count != 0)
return use_count == 1;
/* If there are any uses outside the block, fail. */
if (BITSET_TEST(live_vars.block_data[inst->block->num].liveout,
live_vars.var_from_reg(inst->dst)))
return false;
foreach_inst_in_block_starting_from(brw_inst, scan_inst, inst) {
for (unsigned i = 0; i < scan_inst->sources; i++) {
if (regions_overlap(inst->dst, inst->size_written,
scan_inst->src[i], scan_inst->size_read(devinfo, i))) {
use_count++;
}
}
if (use_count > 1)
return false;
}
assert(use_count == 1);
return true;
}
static bool
is_Boolean(brw_inst *inst, const brw_def_analysis &defs)
{
if (inst == NULL)
return false;
switch (inst->opcode) {
case BRW_OPCODE_CMP:
case BRW_OPCODE_CMPN:
return true;
case BRW_OPCODE_AND:
case BRW_OPCODE_NOT:
case BRW_OPCODE_OR:
case BRW_OPCODE_SEL:
case BRW_OPCODE_XOR: {
for (unsigned i = 0; i < inst->sources; i++) {
brw_inst *def = defs.get(inst->src[i]);
if (def == NULL)
return false;
if (def->opcode != BRW_OPCODE_CMP &&
def->opcode != BRW_OPCODE_CMPN)
return false;
}
return true;
}
default:
return false;
}
}
/**
* Calculate the flags read between two instructions.
*
* Flags read by \c begin or \c end are \b not included in the return value.
*/
static unsigned
flags_read_between(brw_inst *begin, brw_inst *end,
const intel_device_info *devinfo)
{
unsigned flags_read = 0;
foreach_inst_in_block_starting_from(brw_inst, inst, begin) {
if (inst == end)
return flags_read;
flags_read |= inst->flags_read(devinfo);
}
if (end == NULL)
return flags_read;
UNREACHABLE("end does not occur after begin in the same block.");
}
/**
* Calculate the flags written between two instructions.
*
* Flags written by \c begin or \c end are \b not included in the return value.
*/
static unsigned
flags_written_between(brw_inst *begin, brw_inst *end,
const intel_device_info *devinfo)
{
unsigned flags_written = 0;
foreach_inst_in_block_starting_from(brw_inst, inst, begin) {
if (inst == end)
return flags_written;
flags_written |= inst->flags_written(devinfo);
}
if (end == NULL)
return flags_written;
UNREACHABLE("end does not occur after begin in the same block.");
}
static enum brw_conditional_mod
required_cmod(enum opcode op)
{
return op == BRW_OPCODE_BFN ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_NZ;
}
static bool
is_valid_logic_source(const brw_inst *inst)
{
if (inst->opcode == BRW_OPCODE_CMP || inst->opcode == BRW_OPCODE_CMPN)
return true;
/* Especially CSEL.NZ can confuse some of the checks below. Rejecting SEL
* and CSEL here keeps that code more clear.
*/
if (inst->opcode == BRW_OPCODE_SEL || inst->opcode == BRW_OPCODE_CSEL)
return false;
/* The flags will be used as a proxy for the value produced by the
* instruction. At the end, the instruction must have a
* conditional modifier of NZ (G for BFN).
*/
const enum brw_conditional_mod req_cmod = required_cmod(inst->opcode);
return (inst->conditional_mod == BRW_CONDITIONAL_NONE &&
inst->can_do_cmod(req_cmod)) || inst->conditional_mod == req_cmod;
}
static void
find_logic_sources(brw_inst *inst, const brw_def_analysis &defs,
const intel_device_info *devinfo,
logic_source *nearer, logic_source *farther)
{
unsigned distance = 0;
const unsigned size_read[2] = {
inst->size_read(devinfo, 0),
inst->size_read(devinfo, 1),
};
int lo = 0;
int hi = 1;
logic_source ls[2];
foreach_inst_in_block_reverse_starting_from(brw_inst, scan_inst, inst) {
distance++;
for (int src = lo; src <= hi; src++) {
if (regions_overlap(scan_inst->dst, scan_inst->size_written,
inst->src[src], size_read[src])) {
if (!(scan_inst->is_partial_write() ||
scan_inst->dst.offset != inst->src[src].offset ||
scan_inst->exec_size != inst->exec_size ||
!is_valid_logic_source(scan_inst))) {
ls[src] = logic_source { scan_inst, distance, (unsigned) src };
}
if (src == lo)
lo++;
else
hi--;
}
}
if (lo > hi)
break;
}
for (int src = lo; src <= hi; src++) {
brw_inst *def = defs.get(inst->src[src]);
if (def != NULL) {
assert(def->block != inst->block);
if (def->is_partial_write() ||
def->dst.offset != inst->src[src].offset ||
def->exec_size != inst->exec_size ||
!is_valid_logic_source(def)) {
def = NULL;
}
}
ls[src] = logic_source { def, UINT_MAX, (unsigned) src };
}
assert(ls[0].inst == NULL || ls[0].inst != ls[1].inst);
if (ls[0].distance > ls[1].distance)
SWAP(ls[0], ls[1]);
*nearer = ls[0];
*farther = ls[1];
}
static bool
try_predicated_cmp(brw_shader &s, const brw_live_variables &live_vars,
const brw_def_analysis &defs,
brw_inst *logic_inst, logic_source &nearer,
logic_source &farther, unsigned nearer_flags)
{
/* For this path, the farther instruction must also be in the same block
* as the logic operation.
*/
if (farther.inst == NULL || farther.distance == UINT_MAX)
return false;
/* If farther doesn't write any flags yet, determine what flags it would
* write.
*/
unsigned farther_flags = farther.inst->flags_written(s.devinfo);
if (farther_flags == 0) {
farther_flags = brw_flags_written(farther.inst->opcode,
required_cmod(farther.inst->opcode),
logic_inst->flag_subreg,
farther.inst->group,
farther.inst->exec_size);
}
/* If farther does not already write flags, there must be no readers of the flags
* that it will write.
*
* A similar test for nearer is not necessary. It is already required
* that there be no uses of the flags produced by nearer.
*/
if (farther.inst->conditional_mod == BRW_CONDITIONAL_NONE &&
(flags_read_between(farther.inst, nearer.inst, s.devinfo) &
farther_flags) != 0) {
return false;
}
/* The flags written by farther must reach nearer. */
if ((flags_written_between(farther.inst, nearer.inst, s.devinfo) &
farther_flags) != 0)
return false;
/* The flags and the destination written by nearer must not be read by
* any instruction other than the logic operation.
*/
if (!is_used_once(nearer.inst, s.devinfo, live_vars, defs))
return false;
if ((nearer_flags & flags_read_between(nearer.inst, logic_inst, s.devinfo)) != 0)
return false;
const unsigned flags_read_after_inst =
flags_read_between(logic_inst, NULL, s.devinfo) |
live_vars.block_data[logic_inst->block->num].flag_liveout[0];
if (flags_read_after_inst & (nearer_flags &
~logic_inst->flags_written(s.devinfo)))
return false;
/* It is safe to eliminate the logic operation. Perform the following
* steps:
*
* 1. If farther doesn't already write flags, set a conditional modifier on
* it, and set its flag_subreg.
*
* 2. If nearer doesn't already write flags, set a conditional modifier on
* it, and set its flag_subreg.
*
* 3. Make nearer's destination be the null register.
*
* 4. Make nearer be predicated.
*
* 5. Remove the logic operation.
*/
if (farther.inst->conditional_mod == BRW_CONDITIONAL_NONE) {
farther.inst->conditional_mod = required_cmod(farther.inst->opcode);
farther.inst->flag_subreg = logic_inst->flag_subreg;
assert(farther_flags == farther.inst->flags_written(s.devinfo));
}
if (nearer.inst->conditional_mod == BRW_CONDITIONAL_NONE) {
nearer.inst->conditional_mod = required_cmod(nearer.inst->opcode);
nearer.inst->flag_subreg = logic_inst->flag_subreg;
assert(nearer_flags == nearer.inst->flags_written(s.devinfo));
}
nearer.inst->dst = retype(brw_null_reg(), nearer.inst->dst.type);
set_predicate_inv(BRW_PREDICATE_NORMAL,
logic_inst->opcode == BRW_OPCODE_OR,
nearer.inst);
assert((nearer.inst->flags_read(s.devinfo) &
~farther.inst->flags_written(s.devinfo)) == 0);
logic_inst->remove();
return true;
}
static bool
try_predicated_mov(brw_shader &s, const brw_live_variables &live_vars,
const brw_def_analysis &defs,
brw_inst *logic_inst, logic_source &nearer,
logic_source &farther, unsigned nearer_flags)
{
/* Cases like
*
* cmp.g.f0.0(8) v946:F, |v945|:F, 0f
* and.nz.f0.0(8) null:UD, -v869:UD, v946:UD
*
* can be handled by replacing the AND instruction with a predicated NOT
* instead of a predicated MOV.
*
* NOTE: ~x != 0 is not the same as x == 0 when x is not known to be a
* Boolean value. Since farther may not be a CMP/CMPN, this is important.
*
* However, cases where the other source is negated would require more
* complicated surgery. De Morgan's Law would have to be applied, and
* all uses of the new predicate would have to be inverted. The
* information is available to make that possible (e.g., the flags
* liveness), but it's a lot more work.
*/
const enum opcode op = logic_inst->src[farther.src].negate ?
BRW_OPCODE_NOT : BRW_OPCODE_MOV;
if (nearer.inst->conditional_mod == BRW_CONDITIONAL_NONE &&
(flags_read_between(nearer.inst, logic_inst, s.devinfo) &
nearer_flags) != 0) {
return false;
}
/* It is safe to eliminate the logic operation. Perform the following
* steps:
*
* 1. If nearer doesn't already write flags, set a conditional modifier on
* it, and set its flag_subreg.
*
* 2. Convert the logic operation to either a MOV or a NOT of the value
* taken from farther.
*/
if (nearer.inst->conditional_mod == BRW_CONDITIONAL_NONE) {
nearer.inst->conditional_mod = required_cmod(nearer.inst->opcode);
nearer.inst->flag_subreg = logic_inst->flag_subreg;
assert(nearer_flags == nearer.inst->flags_written(s.devinfo));
}
set_predicate_inv(BRW_PREDICATE_NORMAL,
logic_inst->opcode == BRW_OPCODE_OR,
logic_inst);
logic_inst->src[0] = logic_inst->src[farther.src];
logic_inst->src[0].negate = false;
brw_transform_inst(s, logic_inst, op);
return true;
}
static bool
try_predicate_and(brw_shader &s, brw_inst *inst,
const brw_live_variables &live_vars,
const brw_def_analysis &defs)
{
if (inst->conditional_mod != BRW_CONDITIONAL_NZ)
return false;
if (regions_overlap(inst->src[0], inst->size_read(s.devinfo, 0),
inst->src[1], inst->size_read(s.devinfo, 1))) {
return false;
}
/* These names are annoying. Some compilers secretly have "near" and "far"
* as reserved words, so those can't be used.
*/
logic_source nearer;
logic_source farther;
find_logic_sources(inst, defs, s.devinfo, &nearer, &farther);
/* The closer instruction must be in the same block. */
if (nearer.inst == NULL || nearer.distance == UINT_MAX)
return false;
/* If the logical operation is AND, one of the comparisons must be provably
* a Boolean value (i.e., 0 or ~0). This is the only way to be sure A&B !=
* 0 is equivalent to (A != 0) && (B != 0).
*/
if (inst->opcode == BRW_OPCODE_AND &&
!is_Boolean(nearer.inst, defs) && !is_Boolean(farther.inst, defs))
return false;
/* If nearer doesn't write any flags yet, determine what flags it would
* write.
*/
unsigned nearer_flags = nearer.inst->flags_written(s.devinfo);
if (nearer_flags == 0) {
nearer_flags = brw_flags_written(nearer.inst->opcode,
required_cmod(nearer.inst->opcode),
inst->flag_subreg,
nearer.inst->group,
nearer.inst->exec_size);
}
unsigned flags_written = inst->flags_written(s.devinfo);
if ((nearer_flags & flags_written) != flags_written)
return false;
/* The flags written by nearer must reach the logic operation. */
if ((flags_written_between(nearer.inst, inst, s.devinfo) &
nearer_flags) != 0)
return false;
if (!inst->src[0].negate && !inst->src[1].negate &&
try_predicated_cmp(s, live_vars, defs, inst, nearer, farther,
nearer_flags)) {
return true;
}
if (!inst->src[nearer.src].negate &&
try_predicated_mov(s, live_vars, defs, inst, nearer, farther,
nearer_flags)) {
return true;
}
return false;
}
static bool
opt_predicate_logic_local(brw_shader &s, bblock_t *block,
const brw_live_variables &live_vars,
const brw_def_analysis &defs)
{
bool progress = false;
foreach_inst_in_block_reverse_safe(brw_inst, inst, block) {
switch (inst->opcode) {
case BRW_OPCODE_AND:
case BRW_OPCODE_OR:
if (inst->predicate == BRW_PREDICATE_NONE &&
inst->dst.is_null() &&
brw_type_size_bytes(inst->src[0].type) == 4 &&
brw_type_size_bytes(inst->src[1].type) == 4 &&
!inst->src[0].abs && !inst->src[1].abs) {
if (try_predicate_and(s, inst, live_vars, defs))
progress = true;
}
break;
default:
break;
}
}
return progress;
}
bool
brw_opt_predicate_logic(brw_shader &s)
{
bool progress = false;
const brw_live_variables &live_vars = s.live_analysis.require();
const brw_def_analysis &defs = s.def_analysis.require();
foreach_block (block, s.cfg) {
if (opt_predicate_logic_local(s, block, live_vars, defs))
progress = true;
}
if (progress)
s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS);
return progress;
}

View file

@ -367,6 +367,7 @@ bool brw_opt_cse_defs(brw_shader &s);
bool brw_opt_dead_code_eliminate(brw_shader &s);
bool brw_opt_eliminate_find_live_channel(brw_shader &s);
bool brw_opt_fill_and_spill(brw_shader &s);
bool brw_opt_predicate_logic(brw_shader &s);
bool brw_opt_register_coalesce(brw_shader &s);
bool brw_opt_remove_extra_rounding_modes(brw_shader &s);
bool brw_opt_remove_redundant_halts(brw_shader &s);

View file

@ -78,6 +78,7 @@ libintel_compiler_brw_files = files(
'brw_opt_cse.cpp',
'brw_opt_dead_code_eliminate.cpp',
'brw_opt_fill_spill.cpp',
'brw_opt_predicate_logic.cpp',
'brw_opt_register_coalesce.cpp',
'brw_opt_saturate_propagation.cpp',
'brw_opt_txf_combiner.cpp',