mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-20 13:28:09 +02:00
not planning to work on the TODOs immediately Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38385>
114 lines
3.5 KiB
C
114 lines
3.5 KiB
C
/*
|
|
* Copyright © 2014 Intel Corporation
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
/* Common Subexpression Elimination
|
|
*
|
|
* This implementation behaves more like Global Value Numbering (GVN) than
|
|
* traditional CSE. While traditional CSE eliminates redundant instructions
|
|
* that have identical representations, GVN eliminates redundant instructions
|
|
* that have identical behavior.
|
|
*
|
|
* The pass walks the shader and adds instructions into a set whose equality
|
|
* function returns whether the behavior of 2 instructions is identical.
|
|
* When we encounter an instruction that is already in the set, the instruction
|
|
* is eliminated if the instruction in the set dominates it, else
|
|
* the instruction replaces the instruction in the set (see example 4).
|
|
*
|
|
* Non-reorderable intrinsics are ignored with the exception of certain
|
|
* non-reorderable subgroups ops and intrinsics like demote and terminate that
|
|
* are CSE'd.
|
|
*
|
|
* Example 1. Identical instructions:
|
|
* %2 = iadd %0, %1
|
|
* control_flow {
|
|
* %3 = iadd %0, %1 // eliminated
|
|
* }
|
|
*
|
|
* Example 2. Commutative instructions:
|
|
* %3 = ffma %0, %1, %2
|
|
* %4 = ffma %1, %0, %2 // eliminated
|
|
*
|
|
* Example 3. Non-matching ALU flags are merged:
|
|
* %2 = fmul %0, %1 (fp_fast_math) // exact added here
|
|
* %3 = fmul %0, %1 (exact) // eliminated
|
|
*
|
|
* Example 4. Non-dominating situation:
|
|
* if {
|
|
* %2 = iadd %0, %1
|
|
* } else {
|
|
* %3 = iadd %0, %1 // keep, but replace %2 in the set
|
|
* %4 = iadd %0, %1 // eliminated
|
|
* }
|
|
* TODO: We could move %2 before "if" in this pass instead. It would also
|
|
* reduce register usage when %0 and %1 are no longer live in
|
|
* the range between "if" and %3, while only %2 would be live in that
|
|
* range.
|
|
*
|
|
* TODO - everything below is not implemented:
|
|
*
|
|
* Implementing the following cases could eliminate most of nir_opt_copy_prop:
|
|
*
|
|
* Case 1. Copy propagation of movs without swizzles:
|
|
* 32x4 %2 = (any instruction)
|
|
* 32x4 %3 = mov %2.xyzw // eliminated since it's equal to %2
|
|
*
|
|
* Case 2. Copy propagation of movs with swizzles:
|
|
* 32x2 %2 = (any instruction)
|
|
* 32x3 %3 = mov %2.yxx // eliminated conditionally
|
|
* All %3 uses that are ALU will absorb the swizzle and are changed
|
|
* to use %2, and those uses that are not ALU will keep the mov.
|
|
*
|
|
* While vecN is possible to occur here instead, NIR should always create
|
|
* swizzled mov instead of vecN when all components use the same def, and
|
|
* nir_validate should assert that, so this should never occur:
|
|
* 32x4 %2 = vec4 %1.?, %1.?, %1.?, %1.?
|
|
*/
|
|
|
|
#include "nir.h"
|
|
#include "nir_instr_set.h"
|
|
|
|
static bool
|
|
dominates(const nir_instr *old_instr, const nir_instr *new_instr)
|
|
{
|
|
return nir_block_dominates(old_instr->block, new_instr->block);
|
|
}
|
|
|
|
static bool
|
|
nir_opt_cse_impl(nir_function_impl *impl)
|
|
{
|
|
struct set instr_set;
|
|
nir_instr_set_init(&instr_set, NULL);
|
|
|
|
_mesa_set_resize(&instr_set, impl->ssa_alloc);
|
|
|
|
nir_metadata_require(impl, nir_metadata_dominance);
|
|
|
|
bool progress = false;
|
|
nir_foreach_block(block, impl) {
|
|
nir_foreach_instr_safe(instr, block) {
|
|
if (nir_instr_set_add_or_rewrite(&instr_set, instr, dominates)) {
|
|
progress = true;
|
|
nir_instr_remove(instr);
|
|
}
|
|
}
|
|
}
|
|
|
|
nir_progress(progress, impl, nir_metadata_control_flow);
|
|
|
|
nir_instr_set_fini(&instr_set);
|
|
return progress;
|
|
}
|
|
|
|
bool
|
|
nir_opt_cse(nir_shader *shader)
|
|
{
|
|
bool progress = false;
|
|
|
|
nir_foreach_function_impl(impl, shader) {
|
|
progress |= nir_opt_cse_impl(impl);
|
|
}
|
|
|
|
return progress;
|
|
}
|