Merge branch 'review/predication-1' into 'main'

brw: Replace logical operations with predication

See merge request mesa/mesa!39030
This commit is contained in:
Ian Romanick 2025-12-20 01:49:05 +00:00
commit ef70113fde
4 changed files with 507 additions and 0 deletions

View file

@ -98,6 +98,16 @@ brw_optimize(brw_shader &s)
OPT(brw_opt_dead_code_eliminate);
}
while (OPT(brw_opt_predicate_logic)) {
/* The dead code elimination after opt_predicate_logic can cause the
* first comparison in the set to have a NULL destination. That can make
* it a candidate for additional cmod_propagation and addition
* opt_predicate_logic.
*/
if (OPT(brw_opt_dead_code_eliminate) && OPT(brw_opt_cmod_propagation))
OPT(brw_opt_dead_code_eliminate);
}
if (OPT(brw_lower_pack)) {
OPT(brw_opt_register_coalesce);
OPT(brw_opt_dead_code_eliminate);

View file

@ -0,0 +1,495 @@
/*
* Copyright © 2025 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "brw_shader.h"
#include "brw_analysis.h"
#include "brw_cfg.h"
struct logic_source {
logic_source() : inst(NULL), distance(0), src(0)
{
/* empty */
}
logic_source(brw_inst *inst, unsigned distance, unsigned src) :
inst(inst), distance(distance), src(src)
{
/* empty */
}
brw_inst *inst;
unsigned distance;
unsigned src;
};
static bool
is_used_once(brw_inst *inst, const intel_device_info *devinfo,
const brw_live_variables &live_vars,
const brw_def_analysis &defs)
{
unsigned use_count = defs.get_use_count(inst->dst);
if (use_count != 0)
return use_count == 1;
/* If there are any uses outside the block, fail. */
if (BITSET_TEST(live_vars.block_data[inst->block->num].liveout,
live_vars.var_from_reg(inst->dst)))
return false;
foreach_inst_in_block_starting_from(brw_inst, scan_inst, inst) {
for (unsigned i = 0; i < scan_inst->sources; i++) {
if (regions_overlap(inst->dst, inst->size_written,
scan_inst->src[i], scan_inst->size_read(devinfo, i))) {
use_count++;
}
}
if (use_count > 1)
return false;
}
assert(use_count == 1);
return true;
}
static bool
is_Boolean(brw_inst *inst, const brw_def_analysis &defs)
{
if (inst == NULL)
return false;
switch (inst->opcode) {
case BRW_OPCODE_CMP:
case BRW_OPCODE_CMPN:
return true;
case BRW_OPCODE_AND:
case BRW_OPCODE_NOT:
case BRW_OPCODE_OR:
case BRW_OPCODE_SEL:
case BRW_OPCODE_XOR: {
for (unsigned i = 0; i < inst->sources; i++) {
brw_inst *def = defs.get(inst->src[i]);
if (def == NULL)
return false;
if (def->opcode != BRW_OPCODE_CMP &&
def->opcode != BRW_OPCODE_CMPN)
return false;
}
return true;
}
default:
return false;
}
}
/**
* Calculate the flags read between two instructions.
*
* Flags read by \c begin or \c end are \b not included in the return value.
*/
static unsigned
flags_read_between(brw_inst *begin, brw_inst *end,
const intel_device_info *devinfo)
{
unsigned flags_read = 0;
foreach_inst_in_block_starting_from(brw_inst, inst, begin) {
if (inst == end)
return flags_read;
flags_read |= inst->flags_read(devinfo);
}
if (end == NULL)
return flags_read;
UNREACHABLE("end does not occur after begin in the same block.");
}
/**
* Calculate the flags written between two instructions.
*
* Flags written by \c begin or \c end are \b not included in the return value.
*/
static unsigned
flags_written_between(brw_inst *begin, brw_inst *end,
const intel_device_info *devinfo)
{
unsigned flags_written = 0;
foreach_inst_in_block_starting_from(brw_inst, inst, begin) {
if (inst == end)
return flags_written;
flags_written |= inst->flags_written(devinfo);
}
if (end == NULL)
return flags_written;
UNREACHABLE("end does not occur after begin in the same block.");
}
static bool
is_valid_logic_source(const brw_inst *inst)
{
if (inst->opcode == BRW_OPCODE_CMP ||
inst->opcode == BRW_OPCODE_CMPN)
return true;
/* The flags will be used as a proxy for the value produced by the
* instruction. At the end, the instruction must have a
* conditional modifier of NZ (G for BFN).
*/
if (inst->opcode == BRW_OPCODE_BFN) {
if (inst->conditional_mod == BRW_CONDITIONAL_NONE ||
inst->conditional_mod == BRW_CONDITIONAL_G) {
return true;
}
} else {
if ((inst->conditional_mod == BRW_CONDITIONAL_NONE &&
inst->can_do_cmod(BRW_CONDITIONAL_NZ)) ||
inst->conditional_mod == BRW_CONDITIONAL_NZ) {
return true;
}
}
return false;
}
static logic_source
find_logic_source(brw_inst *inst, unsigned src, const brw_def_analysis &defs,
const intel_device_info *devinfo)
{
unsigned distance = 0;
unsigned flags_read = 0;
unsigned flags_written = 0;
foreach_inst_in_block_reverse_starting_from(brw_inst, scan_inst, inst) {
distance++;
if (regions_overlap(scan_inst->dst, scan_inst->size_written,
inst->src[src], inst->size_read(devinfo, src))) {
if (scan_inst->is_partial_write() ||
scan_inst->dst.offset != inst->src[0].offset ||
scan_inst->exec_size != inst->exec_size ||
!is_valid_logic_source(scan_inst)) {
return logic_source();
}
return logic_source(scan_inst, distance, src);
}
flags_read |= scan_inst->flags_read(devinfo);
flags_written |= scan_inst->flags_written(devinfo);
}
brw_inst *def = defs.get(inst->src[src]);
if (def != NULL) {
assert(def->block != inst->block);
if (def->is_partial_write() ||
def->dst.offset != inst->src[0].offset ||
def->exec_size != inst->exec_size ||
!is_valid_logic_source(def)) {
def = NULL;
}
}
return logic_source(def, UINT_MAX, src);
}
static bool
try_predicated_cmp(brw_shader &s, const brw_live_variables &live_vars,
const brw_def_analysis &defs,
brw_inst *logic_inst, logic_source &hither,
logic_source &yon, unsigned hither_flags)
{
/* For this path, the farther instruction must also be in the same block
* as the logic operation.
*/
if (yon.inst == NULL || yon.distance == UINT_MAX)
return false;
/* If yon doesn't write any flags yet, determine what flags it would
* write.
*/
unsigned yon_flags = yon.inst->flags_written(s.devinfo);
if (yon_flags == 0) {
assert(yon.inst->conditional_mod == BRW_CONDITIONAL_NONE);
assert(yon.inst->flag_subreg == 0);
yon.inst->conditional_mod = yon.inst->opcode != BRW_OPCODE_BFN ?
BRW_CONDITIONAL_NZ : BRW_CONDITIONAL_G;
yon.inst->flag_subreg = logic_inst->flag_subreg;
yon_flags = yon.inst->flags_written(s.devinfo);
yon.inst->conditional_mod = BRW_CONDITIONAL_NONE;
yon.inst->flag_subreg = 0;
}
/* If yon does not already write flags, there must be no readers of the flags
* that it will write.
*
* A similar test for hither is not necessary. It is already required
* that there be no uses of the flags produced by hither.
*/
if (yon.inst->conditional_mod == BRW_CONDITIONAL_NONE &&
(flags_read_between(yon.inst, hither.inst, s.devinfo) &
yon_flags) != 0) {
return false;
}
/* The flags written by yon must reach hither. */
if ((flags_written_between(yon.inst, hither.inst, s.devinfo) &
yon_flags) != 0)
return false;
/* The flags and the destination written by hither must not be read by
* any instruction other than the logic operation.
*/
if (!is_used_once(hither.inst, s.devinfo, live_vars, defs))
return false;
if ((hither_flags & flags_read_between(hither.inst, logic_inst, s.devinfo)) != 0)
return false;
const unsigned flags_read_after_inst =
flags_read_between(logic_inst, NULL, s.devinfo) |
live_vars.block_data[logic_inst->block->num].flag_liveout[0];
if (flags_read_after_inst & (hither_flags &
~logic_inst->flags_written(s.devinfo)))
return false;
/* It is safe to eliminate the logic operation. Perform the following
* steps:
*
* 1. If yon doesn't already write flags, set a conditional modifier on
* it, and set its flag_subreg.
*
* 2. If hither doesn't already write flags, set a conditional modifier on
* it, and set its flag_subreg.
*
* 3. Make hither's destination be the null register.
*
* 4. Make hither be predicated.
*
* 5. Remove the logic operation.
*/
if (yon.inst->conditional_mod == BRW_CONDITIONAL_NONE) {
yon.inst->conditional_mod = yon.inst->opcode != BRW_OPCODE_BFN ?
BRW_CONDITIONAL_NZ : BRW_CONDITIONAL_G;
yon.inst->flag_subreg = logic_inst->flag_subreg;
assert(yon_flags == yon.inst->flags_written(s.devinfo));
}
if (hither.inst->conditional_mod == BRW_CONDITIONAL_NONE) {
hither.inst->conditional_mod = hither.inst->opcode != BRW_OPCODE_BFN ?
BRW_CONDITIONAL_NZ : BRW_CONDITIONAL_G;
hither.inst->flag_subreg = logic_inst->flag_subreg;
assert(hither_flags == hither.inst->flags_written(s.devinfo));
}
hither.inst->dst = retype(brw_null_reg(), hither.inst->dst.type);
set_predicate_inv(BRW_PREDICATE_NORMAL,
logic_inst->opcode == BRW_OPCODE_OR,
hither.inst);
assert((hither.inst->flags_read(s.devinfo) &
~yon.inst->flags_written(s.devinfo)) == 0);
logic_inst->remove();
return true;
}
static bool
try_predicated_mov(brw_shader &s, const brw_live_variables &live_vars,
const brw_def_analysis &defs,
brw_inst *logic_inst, logic_source &hither,
logic_source &yon, unsigned hither_flags)
{
/* Cases like
*
* cmp.g.f0.0(8) v946:F, |v945|:F, 0f
* and.nz.f0.0(8) null:UD, -v869:UD, v946:UD
*
* can be handled by replacing the AND instruction with a predicated NOT
* instead of a predicated MOV.
*
* NOTE: ~x != 0 is not the same as x == 0 when x is not known to be a
* Boolean value. Since yon may not be a CMP/CMPN, this is important.
*
* However, cases where the other source is negated would require more
* complicated surgery. De Morgan's Law would have to be applied, and
* all uses of the new predicate would have to be inverted. The
* information is available to make that possible (e.g., the flags
* liveness), but it's a lot more work.
*/
const enum opcode op = logic_inst->src[yon.src].negate ?
BRW_OPCODE_NOT : BRW_OPCODE_MOV;
if (hither.inst->conditional_mod == BRW_CONDITIONAL_NONE &&
(flags_read_between(hither.inst, logic_inst, s.devinfo) &
hither_flags) != 0) {
return false;
}
/* It is safe to eliminate the logic operation. Perform the following
* steps:
*
* 1. If hither doesn't already write flags, set a conditional modifier on
* it, and set its flag_subreg.
*
* 2. Convert the logic operation to either a MOV or a NOT of the value
* taken from yon.
*/
if (hither.inst->conditional_mod == BRW_CONDITIONAL_NONE) {
hither.inst->conditional_mod = hither.inst->opcode != BRW_OPCODE_BFN ?
BRW_CONDITIONAL_NZ : BRW_CONDITIONAL_G;
hither.inst->flag_subreg = logic_inst->flag_subreg;
assert(hither_flags == hither.inst->flags_written(s.devinfo));
}
set_predicate_inv(BRW_PREDICATE_NORMAL,
logic_inst->opcode == BRW_OPCODE_OR,
logic_inst);
logic_inst->src[0] = logic_inst->src[yon.src];
logic_inst->src[0].negate = false;
brw_transform_inst(s, logic_inst, op);
return true;
}
static bool
try_predicate_and(brw_shader &s, brw_inst *inst,
const brw_live_variables &live_vars,
const brw_def_analysis &defs)
{
if (inst->conditional_mod != BRW_CONDITIONAL_NZ)
return false;
if (regions_overlap(inst->src[0], inst->size_read(s.devinfo, 0),
inst->src[1], inst->size_read(s.devinfo, 1))) {
return false;
}
/* These names are annoying. Some compilers secretly have "near" and "far"
* as reserved words, so those can't be used.
*/
logic_source hither = find_logic_source(inst, 0, defs, s.devinfo);
logic_source yon = find_logic_source(inst, 1, defs, s.devinfo);
assert(hither.inst == NULL || hither.inst != yon.inst);
if (hither.distance > yon.distance)
SWAP(hither, yon);
/* The closer instruction must be in the same block. */
if (hither.inst == NULL || hither.distance == UINT_MAX)
return false;
/* If the logical operation is AND, one of the comparisons must be provably
* a Boolean value (i.e., 0 or ~0). This is the only way to be sure A&B !=
* 0 is equivalent to (A != 0) && (B != 0).
*/
if (inst->opcode == BRW_OPCODE_AND &&
!is_Boolean(hither.inst, defs) && !is_Boolean(yon.inst, defs))
return false;
/* If hither doesn't write any flags yet, determine what flags it would
* write.
*/
unsigned hither_flags = hither.inst->flags_written(s.devinfo);
if (hither_flags == 0) {
assert(hither.inst->conditional_mod == BRW_CONDITIONAL_NONE);
assert(hither.inst->flag_subreg == 0);
hither.inst->conditional_mod = hither.inst->opcode != BRW_OPCODE_BFN ?
BRW_CONDITIONAL_NZ : BRW_CONDITIONAL_G;
hither.inst->flag_subreg = inst->flag_subreg;
hither_flags = hither.inst->flags_written(s.devinfo);
hither.inst->conditional_mod = BRW_CONDITIONAL_NONE;
hither.inst->flag_subreg = 0;
}
/* The flags written by hither must reach the logic operation. */
if ((flags_written_between(hither.inst, inst, s.devinfo) &
hither_flags) != 0)
return false;
if (!inst->src[0].negate && !inst->src[1].negate &&
try_predicated_cmp(s, live_vars, defs, inst, hither, yon,
hither_flags)) {
return true;
}
if (!inst->src[hither.src].negate &&
try_predicated_mov(s, live_vars, defs, inst, hither, yon,
hither_flags)) {
return true;
}
return false;
}
static bool
opt_predicate_logic_local(brw_shader &s, bblock_t *block,
const brw_live_variables &live_vars,
const brw_def_analysis &defs)
{
bool progress = false;
foreach_inst_in_block_reverse_safe(brw_inst, inst, block) {
switch (inst->opcode) {
case BRW_OPCODE_AND:
case BRW_OPCODE_OR:
if (inst->predicate == BRW_PREDICATE_NONE &&
inst->dst.is_null() &&
brw_type_size_bytes(inst->src[0].type) == 4 &&
brw_type_size_bytes(inst->src[1].type) == 4 &&
!inst->src[0].abs && !inst->src[1].abs) {
if (try_predicate_and(s, inst, live_vars, defs))
progress = true;
}
break;
default:
break;
}
}
return progress;
}
bool
brw_opt_predicate_logic(brw_shader &s)
{
bool progress = false;
const brw_live_variables &live_vars = s.live_analysis.require();
const brw_def_analysis &defs = s.def_analysis.require();
foreach_block (block, s.cfg) {
if (opt_predicate_logic_local(s, block, live_vars, defs))
progress = true;
}
if (progress)
s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS);
return progress;
}

View file

@ -372,6 +372,7 @@ bool brw_opt_cse_defs(brw_shader &s);
bool brw_opt_dead_code_eliminate(brw_shader &s);
bool brw_opt_eliminate_find_live_channel(brw_shader &s);
bool brw_opt_fill_and_spill(brw_shader &s);
bool brw_opt_predicate_logic(brw_shader &s);
bool brw_opt_register_coalesce(brw_shader &s);
bool brw_opt_remove_extra_rounding_modes(brw_shader &s);
bool brw_opt_remove_redundant_halts(brw_shader &s);

View file

@ -84,6 +84,7 @@ libintel_compiler_brw_files = files(
'brw_opt_cse.cpp',
'brw_opt_dead_code_eliminate.cpp',
'brw_opt_fill_spill.cpp',
'brw_opt_predicate_logic.cpp',
'brw_opt_register_coalesce.cpp',
'brw_opt_saturate_propagation.cpp',
'brw_opt_txf_combiner.cpp',