mesa/src/compiler/nir/nir_opt_cse.c
Marek Olšák d17d1f53bd
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run
nir/opt_cse: update potential future plans merging copy propagation with CSE
This matches my current understanding of nir_opt_copy_prop, including that
nir_opt_copy_prop always replaces movs with vecN.

Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38916>
2025-12-13 06:41:59 +00:00

115 lines
3.6 KiB
C

/*
* Copyright © 2014 Intel Corporation
* SPDX-License-Identifier: MIT
*/
/* Common Subexpression Elimination
*
* This implementation behaves more like Global Value Numbering (GVN) than
* traditional CSE. While traditional CSE eliminates redundant instructions
* that have identical representations, GVN eliminates redundant instructions
* that have identical behavior.
*
* The pass walks the shader and adds instructions into a set whose equality
* function returns whether the behavior of 2 instructions is identical.
* When we encounter an instruction that is already in the set, the instruction
* is eliminated if the instruction in the set dominates it, else
* the instruction replaces the instruction in the set (see example 4).
*
* Non-reorderable intrinsics are ignored with the exception of certain
* non-reorderable subgroups ops and intrinsics like demote and terminate that
* are CSE'd.
*
* Example 1. Identical instructions:
* %2 = iadd %0, %1
* control_flow {
* %3 = iadd %0, %1 // eliminated
* }
*
* Example 2. Commutative instructions:
* %3 = ffma %0, %1, %2
* %4 = ffma %1, %0, %2 // eliminated
*
* Example 3. Non-matching ALU flags are merged:
* %2 = fmul %0, %1 (fp_fast_math) // exact added here
* %3 = fmul %0, %1 (exact) // eliminated
*
* Example 4. Non-dominating situation:
* if {
* %2 = iadd %0, %1
* } else {
* %3 = iadd %0, %1 // keep, but replace %2 in the set
* %4 = iadd %0, %1 // eliminated
* }
* TODO: We could move %2 before "if" in this pass instead. It would also
* reduce register usage when %0 and %1 are no longer live in
* the range between "if" and %3, while only %2 would be live in that
* range.
*
* TODO - everything below is not implemented:
*
* Implementing the following cases could eliminate most of nir_opt_copy_prop:
*
* Case 1. Copy propagation of movs without swizzles:
* 32x4 %2 = (any instruction)
* 32x4 %3 = mov %2.xyzw // eliminated since it's equal to %2
* OR
* 32x4 %3 = vec4 %2.x, %2.y, %2.z, %2.w // eliminated since it's equal to %2
*
* Case 2. Copy propagation of movs with swizzles:
* 32x2 %2 = (any instruction)
* 32x3 %3 = mov %2.yxx // eliminated conditionally
* OR
* 32x3 %3 = vec3 %2.y, %2.x, %2.x // eliminated conditionally
* All %3 uses that are ALU will absorb the swizzle and are changed
* to use %2, and those uses that are not ALU will keep vecN or replace
* mov with equivalent vecN while eliminating components not used by
* the remaining uses (nir_opt_copy_prop always does that).
*/
#include "nir.h"
#include "nir_instr_set.h"
static bool
dominates(const nir_instr *old_instr, const nir_instr *new_instr)
{
return nir_block_dominates(old_instr->block, new_instr->block);
}
static bool
nir_opt_cse_impl(nir_function_impl *impl)
{
struct set instr_set;
nir_instr_set_init(&instr_set, NULL);
_mesa_set_resize(&instr_set, impl->ssa_alloc);
nir_metadata_require(impl, nir_metadata_dominance);
bool progress = false;
nir_foreach_block(block, impl) {
nir_foreach_instr_safe(instr, block) {
if (nir_instr_set_add_or_rewrite(&instr_set, instr, dominates)) {
progress = true;
nir_instr_remove(instr);
}
}
}
nir_progress(progress, impl, nir_metadata_control_flow);
nir_instr_set_fini(&instr_set);
return progress;
}
bool
nir_opt_cse(nir_shader *shader)
{
bool progress = false;
nir_foreach_function_impl(impl, shader) {
progress |= nir_opt_cse_impl(impl);
}
return progress;
}