mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 05:10:11 +01:00
Merge branch 'review/predication-1' into 'main'
brw: Replace logical operations with predication See merge request mesa/mesa!39030
This commit is contained in:
commit
ef70113fde
4 changed files with 507 additions and 0 deletions
|
|
@ -98,6 +98,16 @@ brw_optimize(brw_shader &s)
|
|||
OPT(brw_opt_dead_code_eliminate);
|
||||
}
|
||||
|
||||
while (OPT(brw_opt_predicate_logic)) {
|
||||
/* The dead code elimination after opt_predicate_logic can cause the
|
||||
* first comparison in the set to have a NULL destination. That can make
|
||||
* it a candidate for additional cmod_propagation and addition
|
||||
* opt_predicate_logic.
|
||||
*/
|
||||
if (OPT(brw_opt_dead_code_eliminate) && OPT(brw_opt_cmod_propagation))
|
||||
OPT(brw_opt_dead_code_eliminate);
|
||||
}
|
||||
|
||||
if (OPT(brw_lower_pack)) {
|
||||
OPT(brw_opt_register_coalesce);
|
||||
OPT(brw_opt_dead_code_eliminate);
|
||||
|
|
|
|||
495
src/intel/compiler/brw/brw_opt_predicate_logic.cpp
Normal file
495
src/intel/compiler/brw/brw_opt_predicate_logic.cpp
Normal file
|
|
@ -0,0 +1,495 @@
|
|||
/*
|
||||
* Copyright © 2025 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "brw_shader.h"
|
||||
#include "brw_analysis.h"
|
||||
#include "brw_cfg.h"
|
||||
|
||||
struct logic_source {
|
||||
logic_source() : inst(NULL), distance(0), src(0)
|
||||
{
|
||||
/* empty */
|
||||
}
|
||||
|
||||
logic_source(brw_inst *inst, unsigned distance, unsigned src) :
|
||||
inst(inst), distance(distance), src(src)
|
||||
{
|
||||
/* empty */
|
||||
}
|
||||
|
||||
brw_inst *inst;
|
||||
unsigned distance;
|
||||
unsigned src;
|
||||
};
|
||||
|
||||
static bool
|
||||
is_used_once(brw_inst *inst, const intel_device_info *devinfo,
|
||||
const brw_live_variables &live_vars,
|
||||
const brw_def_analysis &defs)
|
||||
{
|
||||
unsigned use_count = defs.get_use_count(inst->dst);
|
||||
if (use_count != 0)
|
||||
return use_count == 1;
|
||||
|
||||
/* If there are any uses outside the block, fail. */
|
||||
if (BITSET_TEST(live_vars.block_data[inst->block->num].liveout,
|
||||
live_vars.var_from_reg(inst->dst)))
|
||||
return false;
|
||||
|
||||
foreach_inst_in_block_starting_from(brw_inst, scan_inst, inst) {
|
||||
for (unsigned i = 0; i < scan_inst->sources; i++) {
|
||||
if (regions_overlap(inst->dst, inst->size_written,
|
||||
scan_inst->src[i], scan_inst->size_read(devinfo, i))) {
|
||||
use_count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (use_count > 1)
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(use_count == 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_Boolean(brw_inst *inst, const brw_def_analysis &defs)
|
||||
{
|
||||
if (inst == NULL)
|
||||
return false;
|
||||
|
||||
switch (inst->opcode) {
|
||||
case BRW_OPCODE_CMP:
|
||||
case BRW_OPCODE_CMPN:
|
||||
return true;
|
||||
|
||||
case BRW_OPCODE_AND:
|
||||
case BRW_OPCODE_NOT:
|
||||
case BRW_OPCODE_OR:
|
||||
case BRW_OPCODE_SEL:
|
||||
case BRW_OPCODE_XOR: {
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
brw_inst *def = defs.get(inst->src[i]);
|
||||
|
||||
if (def == NULL)
|
||||
return false;
|
||||
|
||||
if (def->opcode != BRW_OPCODE_CMP &&
|
||||
def->opcode != BRW_OPCODE_CMPN)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the flags read between two instructions.
|
||||
*
|
||||
* Flags read by \c begin or \c end are \b not included in the return value.
|
||||
*/
|
||||
static unsigned
|
||||
flags_read_between(brw_inst *begin, brw_inst *end,
|
||||
const intel_device_info *devinfo)
|
||||
{
|
||||
unsigned flags_read = 0;
|
||||
|
||||
foreach_inst_in_block_starting_from(brw_inst, inst, begin) {
|
||||
if (inst == end)
|
||||
return flags_read;
|
||||
|
||||
flags_read |= inst->flags_read(devinfo);
|
||||
}
|
||||
|
||||
if (end == NULL)
|
||||
return flags_read;
|
||||
|
||||
UNREACHABLE("end does not occur after begin in the same block.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the flags written between two instructions.
|
||||
*
|
||||
* Flags written by \c begin or \c end are \b not included in the return value.
|
||||
*/
|
||||
static unsigned
|
||||
flags_written_between(brw_inst *begin, brw_inst *end,
|
||||
const intel_device_info *devinfo)
|
||||
{
|
||||
unsigned flags_written = 0;
|
||||
|
||||
foreach_inst_in_block_starting_from(brw_inst, inst, begin) {
|
||||
if (inst == end)
|
||||
return flags_written;
|
||||
|
||||
flags_written |= inst->flags_written(devinfo);
|
||||
}
|
||||
|
||||
if (end == NULL)
|
||||
return flags_written;
|
||||
|
||||
UNREACHABLE("end does not occur after begin in the same block.");
|
||||
}
|
||||
|
||||
static bool
|
||||
is_valid_logic_source(const brw_inst *inst)
|
||||
{
|
||||
if (inst->opcode == BRW_OPCODE_CMP ||
|
||||
inst->opcode == BRW_OPCODE_CMPN)
|
||||
return true;
|
||||
|
||||
/* The flags will be used as a proxy for the value produced by the
|
||||
* instruction. At the end, the instruction must have a
|
||||
* conditional modifier of NZ (G for BFN).
|
||||
*/
|
||||
if (inst->opcode == BRW_OPCODE_BFN) {
|
||||
if (inst->conditional_mod == BRW_CONDITIONAL_NONE ||
|
||||
inst->conditional_mod == BRW_CONDITIONAL_G) {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
if ((inst->conditional_mod == BRW_CONDITIONAL_NONE &&
|
||||
inst->can_do_cmod(BRW_CONDITIONAL_NZ)) ||
|
||||
inst->conditional_mod == BRW_CONDITIONAL_NZ) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static logic_source
|
||||
find_logic_source(brw_inst *inst, unsigned src, const brw_def_analysis &defs,
|
||||
const intel_device_info *devinfo)
|
||||
{
|
||||
unsigned distance = 0;
|
||||
unsigned flags_read = 0;
|
||||
unsigned flags_written = 0;
|
||||
|
||||
foreach_inst_in_block_reverse_starting_from(brw_inst, scan_inst, inst) {
|
||||
distance++;
|
||||
|
||||
if (regions_overlap(scan_inst->dst, scan_inst->size_written,
|
||||
inst->src[src], inst->size_read(devinfo, src))) {
|
||||
if (scan_inst->is_partial_write() ||
|
||||
scan_inst->dst.offset != inst->src[0].offset ||
|
||||
scan_inst->exec_size != inst->exec_size ||
|
||||
!is_valid_logic_source(scan_inst)) {
|
||||
return logic_source();
|
||||
}
|
||||
|
||||
return logic_source(scan_inst, distance, src);
|
||||
}
|
||||
|
||||
flags_read |= scan_inst->flags_read(devinfo);
|
||||
flags_written |= scan_inst->flags_written(devinfo);
|
||||
}
|
||||
|
||||
brw_inst *def = defs.get(inst->src[src]);
|
||||
if (def != NULL) {
|
||||
assert(def->block != inst->block);
|
||||
|
||||
if (def->is_partial_write() ||
|
||||
def->dst.offset != inst->src[0].offset ||
|
||||
def->exec_size != inst->exec_size ||
|
||||
!is_valid_logic_source(def)) {
|
||||
def = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return logic_source(def, UINT_MAX, src);
|
||||
}
|
||||
|
||||
static bool
|
||||
try_predicated_cmp(brw_shader &s, const brw_live_variables &live_vars,
|
||||
const brw_def_analysis &defs,
|
||||
brw_inst *logic_inst, logic_source &hither,
|
||||
logic_source &yon, unsigned hither_flags)
|
||||
{
|
||||
/* For this path, the farther instruction must also be in the same block
|
||||
* as the logic operation.
|
||||
*/
|
||||
if (yon.inst == NULL || yon.distance == UINT_MAX)
|
||||
return false;
|
||||
|
||||
/* If yon doesn't write any flags yet, determine what flags it would
|
||||
* write.
|
||||
*/
|
||||
unsigned yon_flags = yon.inst->flags_written(s.devinfo);
|
||||
if (yon_flags == 0) {
|
||||
assert(yon.inst->conditional_mod == BRW_CONDITIONAL_NONE);
|
||||
assert(yon.inst->flag_subreg == 0);
|
||||
|
||||
yon.inst->conditional_mod = yon.inst->opcode != BRW_OPCODE_BFN ?
|
||||
BRW_CONDITIONAL_NZ : BRW_CONDITIONAL_G;
|
||||
yon.inst->flag_subreg = logic_inst->flag_subreg;
|
||||
|
||||
yon_flags = yon.inst->flags_written(s.devinfo);
|
||||
|
||||
yon.inst->conditional_mod = BRW_CONDITIONAL_NONE;
|
||||
yon.inst->flag_subreg = 0;
|
||||
}
|
||||
|
||||
/* If yon does not already write flags, there must be no readers of the flags
|
||||
* that it will write.
|
||||
*
|
||||
* A similar test for hither is not necessary. It is already required
|
||||
* that there be no uses of the flags produced by hither.
|
||||
*/
|
||||
if (yon.inst->conditional_mod == BRW_CONDITIONAL_NONE &&
|
||||
(flags_read_between(yon.inst, hither.inst, s.devinfo) &
|
||||
yon_flags) != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* The flags written by yon must reach hither. */
|
||||
if ((flags_written_between(yon.inst, hither.inst, s.devinfo) &
|
||||
yon_flags) != 0)
|
||||
return false;
|
||||
|
||||
/* The flags and the destination written by hither must not be read by
|
||||
* any instruction other than the logic operation.
|
||||
*/
|
||||
if (!is_used_once(hither.inst, s.devinfo, live_vars, defs))
|
||||
return false;
|
||||
|
||||
if ((hither_flags & flags_read_between(hither.inst, logic_inst, s.devinfo)) != 0)
|
||||
return false;
|
||||
|
||||
const unsigned flags_read_after_inst =
|
||||
flags_read_between(logic_inst, NULL, s.devinfo) |
|
||||
live_vars.block_data[logic_inst->block->num].flag_liveout[0];
|
||||
|
||||
if (flags_read_after_inst & (hither_flags &
|
||||
~logic_inst->flags_written(s.devinfo)))
|
||||
return false;
|
||||
|
||||
/* It is safe to eliminate the logic operation. Perform the following
|
||||
* steps:
|
||||
*
|
||||
* 1. If yon doesn't already write flags, set a conditional modifier on
|
||||
* it, and set its flag_subreg.
|
||||
*
|
||||
* 2. If hither doesn't already write flags, set a conditional modifier on
|
||||
* it, and set its flag_subreg.
|
||||
*
|
||||
* 3. Make hither's destination be the null register.
|
||||
*
|
||||
* 4. Make hither be predicated.
|
||||
*
|
||||
* 5. Remove the logic operation.
|
||||
*/
|
||||
if (yon.inst->conditional_mod == BRW_CONDITIONAL_NONE) {
|
||||
yon.inst->conditional_mod = yon.inst->opcode != BRW_OPCODE_BFN ?
|
||||
BRW_CONDITIONAL_NZ : BRW_CONDITIONAL_G;
|
||||
yon.inst->flag_subreg = logic_inst->flag_subreg;
|
||||
|
||||
assert(yon_flags == yon.inst->flags_written(s.devinfo));
|
||||
}
|
||||
|
||||
if (hither.inst->conditional_mod == BRW_CONDITIONAL_NONE) {
|
||||
hither.inst->conditional_mod = hither.inst->opcode != BRW_OPCODE_BFN ?
|
||||
BRW_CONDITIONAL_NZ : BRW_CONDITIONAL_G;
|
||||
hither.inst->flag_subreg = logic_inst->flag_subreg;
|
||||
|
||||
assert(hither_flags == hither.inst->flags_written(s.devinfo));
|
||||
}
|
||||
|
||||
hither.inst->dst = retype(brw_null_reg(), hither.inst->dst.type);
|
||||
|
||||
set_predicate_inv(BRW_PREDICATE_NORMAL,
|
||||
logic_inst->opcode == BRW_OPCODE_OR,
|
||||
hither.inst);
|
||||
|
||||
assert((hither.inst->flags_read(s.devinfo) &
|
||||
~yon.inst->flags_written(s.devinfo)) == 0);
|
||||
|
||||
logic_inst->remove();
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
try_predicated_mov(brw_shader &s, const brw_live_variables &live_vars,
|
||||
const brw_def_analysis &defs,
|
||||
brw_inst *logic_inst, logic_source &hither,
|
||||
logic_source &yon, unsigned hither_flags)
|
||||
{
|
||||
/* Cases like
|
||||
*
|
||||
* cmp.g.f0.0(8) v946:F, |v945|:F, 0f
|
||||
* and.nz.f0.0(8) null:UD, -v869:UD, v946:UD
|
||||
*
|
||||
* can be handled by replacing the AND instruction with a predicated NOT
|
||||
* instead of a predicated MOV.
|
||||
*
|
||||
* NOTE: ~x != 0 is not the same as x == 0 when x is not known to be a
|
||||
* Boolean value. Since yon may not be a CMP/CMPN, this is important.
|
||||
*
|
||||
* However, cases where the other source is negated would require more
|
||||
* complicated surgery. De Morgan's Law would have to be applied, and
|
||||
* all uses of the new predicate would have to be inverted. The
|
||||
* information is available to make that possible (e.g., the flags
|
||||
* liveness), but it's a lot more work.
|
||||
*/
|
||||
const enum opcode op = logic_inst->src[yon.src].negate ?
|
||||
BRW_OPCODE_NOT : BRW_OPCODE_MOV;
|
||||
|
||||
if (hither.inst->conditional_mod == BRW_CONDITIONAL_NONE &&
|
||||
(flags_read_between(hither.inst, logic_inst, s.devinfo) &
|
||||
hither_flags) != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* It is safe to eliminate the logic operation. Perform the following
|
||||
* steps:
|
||||
*
|
||||
* 1. If hither doesn't already write flags, set a conditional modifier on
|
||||
* it, and set its flag_subreg.
|
||||
*
|
||||
* 2. Convert the logic operation to either a MOV or a NOT of the value
|
||||
* taken from yon.
|
||||
*/
|
||||
if (hither.inst->conditional_mod == BRW_CONDITIONAL_NONE) {
|
||||
hither.inst->conditional_mod = hither.inst->opcode != BRW_OPCODE_BFN ?
|
||||
BRW_CONDITIONAL_NZ : BRW_CONDITIONAL_G;
|
||||
hither.inst->flag_subreg = logic_inst->flag_subreg;
|
||||
|
||||
assert(hither_flags == hither.inst->flags_written(s.devinfo));
|
||||
}
|
||||
|
||||
set_predicate_inv(BRW_PREDICATE_NORMAL,
|
||||
logic_inst->opcode == BRW_OPCODE_OR,
|
||||
logic_inst);
|
||||
logic_inst->src[0] = logic_inst->src[yon.src];
|
||||
logic_inst->src[0].negate = false;
|
||||
|
||||
brw_transform_inst(s, logic_inst, op);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
try_predicate_and(brw_shader &s, brw_inst *inst,
|
||||
const brw_live_variables &live_vars,
|
||||
const brw_def_analysis &defs)
|
||||
{
|
||||
if (inst->conditional_mod != BRW_CONDITIONAL_NZ)
|
||||
return false;
|
||||
|
||||
if (regions_overlap(inst->src[0], inst->size_read(s.devinfo, 0),
|
||||
inst->src[1], inst->size_read(s.devinfo, 1))) {
|
||||
return false;
|
||||
}
|
||||
/* These names are annoying. Some compilers secretly have "near" and "far"
|
||||
* as reserved words, so those can't be used.
|
||||
*/
|
||||
logic_source hither = find_logic_source(inst, 0, defs, s.devinfo);
|
||||
logic_source yon = find_logic_source(inst, 1, defs, s.devinfo);
|
||||
|
||||
assert(hither.inst == NULL || hither.inst != yon.inst);
|
||||
|
||||
if (hither.distance > yon.distance)
|
||||
SWAP(hither, yon);
|
||||
|
||||
/* The closer instruction must be in the same block. */
|
||||
if (hither.inst == NULL || hither.distance == UINT_MAX)
|
||||
return false;
|
||||
|
||||
/* If the logical operation is AND, one of the comparisons must be provably
|
||||
* a Boolean value (i.e., 0 or ~0). This is the only way to be sure A&B !=
|
||||
* 0 is equivalent to (A != 0) && (B != 0).
|
||||
*/
|
||||
if (inst->opcode == BRW_OPCODE_AND &&
|
||||
!is_Boolean(hither.inst, defs) && !is_Boolean(yon.inst, defs))
|
||||
return false;
|
||||
|
||||
/* If hither doesn't write any flags yet, determine what flags it would
|
||||
* write.
|
||||
*/
|
||||
unsigned hither_flags = hither.inst->flags_written(s.devinfo);
|
||||
if (hither_flags == 0) {
|
||||
assert(hither.inst->conditional_mod == BRW_CONDITIONAL_NONE);
|
||||
assert(hither.inst->flag_subreg == 0);
|
||||
|
||||
hither.inst->conditional_mod = hither.inst->opcode != BRW_OPCODE_BFN ?
|
||||
BRW_CONDITIONAL_NZ : BRW_CONDITIONAL_G;
|
||||
hither.inst->flag_subreg = inst->flag_subreg;
|
||||
|
||||
hither_flags = hither.inst->flags_written(s.devinfo);
|
||||
|
||||
hither.inst->conditional_mod = BRW_CONDITIONAL_NONE;
|
||||
hither.inst->flag_subreg = 0;
|
||||
}
|
||||
|
||||
/* The flags written by hither must reach the logic operation. */
|
||||
if ((flags_written_between(hither.inst, inst, s.devinfo) &
|
||||
hither_flags) != 0)
|
||||
return false;
|
||||
|
||||
if (!inst->src[0].negate && !inst->src[1].negate &&
|
||||
try_predicated_cmp(s, live_vars, defs, inst, hither, yon,
|
||||
hither_flags)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!inst->src[hither.src].negate &&
|
||||
try_predicated_mov(s, live_vars, defs, inst, hither, yon,
|
||||
hither_flags)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
opt_predicate_logic_local(brw_shader &s, bblock_t *block,
|
||||
const brw_live_variables &live_vars,
|
||||
const brw_def_analysis &defs)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
foreach_inst_in_block_reverse_safe(brw_inst, inst, block) {
|
||||
switch (inst->opcode) {
|
||||
case BRW_OPCODE_AND:
|
||||
case BRW_OPCODE_OR:
|
||||
if (inst->predicate == BRW_PREDICATE_NONE &&
|
||||
inst->dst.is_null() &&
|
||||
brw_type_size_bytes(inst->src[0].type) == 4 &&
|
||||
brw_type_size_bytes(inst->src[1].type) == 4 &&
|
||||
!inst->src[0].abs && !inst->src[1].abs) {
|
||||
if (try_predicate_and(s, inst, live_vars, defs))
|
||||
progress = true;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_opt_predicate_logic(brw_shader &s)
|
||||
{
|
||||
bool progress = false;
|
||||
const brw_live_variables &live_vars = s.live_analysis.require();
|
||||
const brw_def_analysis &defs = s.def_analysis.require();
|
||||
|
||||
foreach_block (block, s.cfg) {
|
||||
if (opt_predicate_logic_local(s, block, live_vars, defs))
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if (progress)
|
||||
s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
|
@ -372,6 +372,7 @@ bool brw_opt_cse_defs(brw_shader &s);
|
|||
bool brw_opt_dead_code_eliminate(brw_shader &s);
|
||||
bool brw_opt_eliminate_find_live_channel(brw_shader &s);
|
||||
bool brw_opt_fill_and_spill(brw_shader &s);
|
||||
bool brw_opt_predicate_logic(brw_shader &s);
|
||||
bool brw_opt_register_coalesce(brw_shader &s);
|
||||
bool brw_opt_remove_extra_rounding_modes(brw_shader &s);
|
||||
bool brw_opt_remove_redundant_halts(brw_shader &s);
|
||||
|
|
|
|||
|
|
@ -84,6 +84,7 @@ libintel_compiler_brw_files = files(
|
|||
'brw_opt_cse.cpp',
|
||||
'brw_opt_dead_code_eliminate.cpp',
|
||||
'brw_opt_fill_spill.cpp',
|
||||
'brw_opt_predicate_logic.cpp',
|
||||
'brw_opt_register_coalesce.cpp',
|
||||
'brw_opt_saturate_propagation.cpp',
|
||||
'brw_opt_txf_combiner.cpp',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue