2025-05-14 20:12:11 -04:00
|
|
|
// Copyright © 2025 Valve Corporation
|
|
|
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
|
|
|
|
|
|
//! Dataflow analysis
|
|
|
|
|
//!
|
|
|
|
|
//! This module contains helpers for solving dataflow problems. A dataflow
|
|
|
|
|
//! problem is characterized by a "transfer" function, which updates information
|
|
|
|
|
//! based on a single block, and a "join" function, which updates information
|
|
|
|
|
//! along a control flow edge. See the wikipedia article for more information on
|
|
|
|
|
//! this terminology.
|
|
|
|
|
//! https://en.wikipedia.org/wiki/Data-flow_analysis#Basic_principles
|
|
|
|
|
|
2025-09-13 00:13:15 +02:00
|
|
|
use crate::bitset::BitSet;
|
|
|
|
|
use crate::cfg::CFG;
|
2025-05-15 14:30:52 -04:00
|
|
|
use std::collections::VecDeque;
|
|
|
|
|
|
|
|
|
|
/// A FIFO where each item is unique
|
|
|
|
|
#[derive(Default)]
|
|
|
|
|
struct FIFOSet {
|
|
|
|
|
vec_deque: VecDeque<usize>,
|
|
|
|
|
bit_set: BitSet,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl FIFOSet {
|
|
|
|
|
fn push_back(&mut self, x: usize) {
|
|
|
|
|
if self.bit_set.insert(x) {
|
|
|
|
|
self.vec_deque.push_back(x);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn pop_front(&mut self) -> Option<usize> {
|
|
|
|
|
let out = self.vec_deque.pop_front();
|
|
|
|
|
if let Some(x) = out {
|
|
|
|
|
let exists = self.bit_set.remove(x);
|
|
|
|
|
debug_assert!(exists);
|
|
|
|
|
}
|
|
|
|
|
out
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-05-14 20:12:11 -04:00
|
|
|
|
|
|
|
|
pub struct BackwardDataflow<'a, Block, BlockIn, BlockOut, Transfer, Join>
|
|
|
|
|
where
|
|
|
|
|
Transfer: FnMut(usize, &Block, &mut BlockIn, &BlockOut) -> bool,
|
|
|
|
|
Join: FnMut(&mut BlockOut, &BlockIn),
|
|
|
|
|
{
|
|
|
|
|
pub cfg: &'a CFG<Block>,
|
|
|
|
|
pub block_in: &'a mut [BlockIn],
|
|
|
|
|
pub block_out: &'a mut [BlockOut],
|
|
|
|
|
|
|
|
|
|
/// Generate the block input from the block's output
|
|
|
|
|
///
|
|
|
|
|
/// Returns true if block_in has changed, false otherwise
|
|
|
|
|
pub transfer: Transfer,
|
|
|
|
|
|
|
|
|
|
/// Update the block output based on a successor's input
|
|
|
|
|
pub join: Join,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<'a, Block, BlockIn, BlockOut, Transfer, Join>
|
|
|
|
|
BackwardDataflow<'a, Block, BlockIn, BlockOut, Transfer, Join>
|
|
|
|
|
where
|
|
|
|
|
Transfer: FnMut(usize, &Block, &mut BlockIn, &BlockOut) -> bool,
|
|
|
|
|
Join: FnMut(&mut BlockOut, &BlockIn),
|
|
|
|
|
{
|
2025-05-15 14:30:52 -04:00
|
|
|
fn transfer(&mut self, block_idx: usize) -> bool {
|
|
|
|
|
(self.transfer)(
|
|
|
|
|
block_idx,
|
|
|
|
|
&self.cfg[block_idx],
|
|
|
|
|
&mut self.block_in[block_idx],
|
|
|
|
|
&self.block_out[block_idx],
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn join(&mut self, pred_idx: usize, block_idx: usize) {
|
|
|
|
|
(self.join)(&mut self.block_out[pred_idx], &self.block_in[block_idx]);
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-14 20:12:11 -04:00
|
|
|
/// Solve the dataflow problem and generate output for each block
|
|
|
|
|
pub fn solve(mut self) {
|
2025-05-15 14:30:52 -04:00
|
|
|
let num_blocks = self.cfg.len();
|
|
|
|
|
assert_eq!(num_blocks, self.block_in.len());
|
|
|
|
|
assert_eq!(num_blocks, self.block_out.len());
|
|
|
|
|
|
|
|
|
|
let mut worklist = FIFOSet::default();
|
|
|
|
|
|
|
|
|
|
// Perform an initial pass over the data
|
|
|
|
|
for block_idx in (0..num_blocks).rev() {
|
|
|
|
|
self.transfer(block_idx);
|
|
|
|
|
|
|
|
|
|
for &pred_idx in self.cfg.pred_indices(block_idx) {
|
|
|
|
|
// On the first iteration, we unconditionally join so that the
|
2025-07-05 21:57:37 +02:00
|
|
|
// join operator is called at least once for each edge
|
2025-05-15 14:30:52 -04:00
|
|
|
self.join(pred_idx, block_idx);
|
|
|
|
|
if pred_idx >= block_idx {
|
|
|
|
|
// Otherwise we're about to process it in the first pass
|
|
|
|
|
worklist.push_back(pred_idx);
|
2025-05-14 20:12:11 -04:00
|
|
|
}
|
2025-05-15 14:30:52 -04:00
|
|
|
}
|
|
|
|
|
}
|
2025-05-14 20:12:11 -04:00
|
|
|
|
2025-05-15 14:30:52 -04:00
|
|
|
// Process the worklist
|
|
|
|
|
while let Some(block_idx) = worklist.pop_front() {
|
|
|
|
|
let changed = self.transfer(block_idx);
|
|
|
|
|
if changed {
|
|
|
|
|
for &pred_idx in self.cfg.pred_indices(block_idx) {
|
|
|
|
|
self.join(pred_idx, block_idx);
|
|
|
|
|
worklist.push_back(pred_idx);
|
|
|
|
|
}
|
2025-05-14 20:12:11 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-07-05 21:57:37 +02:00
|
|
|
pub struct ForwardDataflow<'a, Block, BlockIn, BlockOut, Transfer, Join>
|
|
|
|
|
where
|
|
|
|
|
Transfer: FnMut(usize, &Block, &mut BlockOut, &BlockIn) -> bool,
|
|
|
|
|
Join: FnMut(&mut BlockIn, &BlockOut),
|
|
|
|
|
{
|
|
|
|
|
pub cfg: &'a CFG<Block>,
|
|
|
|
|
pub block_in: &'a mut [BlockIn],
|
|
|
|
|
pub block_out: &'a mut [BlockOut],
|
|
|
|
|
|
|
|
|
|
/// Generate the block output from the block's input
|
|
|
|
|
///
|
|
|
|
|
/// Returns true if block_out has changed, false otherwise
|
|
|
|
|
pub transfer: Transfer,
|
|
|
|
|
|
|
|
|
|
/// Update the block output based on a predecessor's input
|
|
|
|
|
pub join: Join,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<'a, Block, BlockIn, BlockOut, Transfer, Join>
|
|
|
|
|
ForwardDataflow<'a, Block, BlockIn, BlockOut, Transfer, Join>
|
|
|
|
|
where
|
|
|
|
|
Transfer: FnMut(usize, &Block, &mut BlockOut, &BlockIn) -> bool,
|
|
|
|
|
Join: FnMut(&mut BlockIn, &BlockOut),
|
|
|
|
|
{
|
|
|
|
|
fn transfer(&mut self, block_idx: usize) -> bool {
|
|
|
|
|
(self.transfer)(
|
|
|
|
|
block_idx,
|
|
|
|
|
&self.cfg[block_idx],
|
|
|
|
|
&mut self.block_out[block_idx],
|
|
|
|
|
&self.block_in[block_idx],
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn join(&mut self, succ_idx: usize, block_idx: usize) {
|
|
|
|
|
(self.join)(&mut self.block_in[succ_idx], &self.block_out[block_idx]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Solve the dataflow problem and generate output for each block
|
|
|
|
|
pub fn solve(mut self) {
|
|
|
|
|
let num_blocks = self.cfg.len();
|
|
|
|
|
assert_eq!(num_blocks, self.block_in.len());
|
|
|
|
|
assert_eq!(num_blocks, self.block_out.len());
|
|
|
|
|
|
|
|
|
|
let mut worklist = FIFOSet::default();
|
|
|
|
|
|
|
|
|
|
// Perform an initial pass over the data
|
|
|
|
|
for block_idx in 0..num_blocks {
|
|
|
|
|
self.transfer(block_idx);
|
|
|
|
|
|
|
|
|
|
for &succ_idx in self.cfg.succ_indices(block_idx) {
|
|
|
|
|
// On the first iteration, we unconditionally join so that the
|
|
|
|
|
// join operator is called at least once for each edge
|
|
|
|
|
self.join(succ_idx, block_idx);
|
|
|
|
|
if succ_idx <= block_idx {
|
|
|
|
|
// Otherwise we're about to process it in the first pass
|
|
|
|
|
worklist.push_back(succ_idx);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Process the worklist
|
|
|
|
|
while let Some(block_idx) = worklist.pop_front() {
|
|
|
|
|
let changed = self.transfer(block_idx);
|
|
|
|
|
if changed {
|
|
|
|
|
for &succ_idx in self.cfg.succ_indices(block_idx) {
|
|
|
|
|
self.join(succ_idx, block_idx);
|
|
|
|
|
worklist.push_back(succ_idx);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-14 20:12:11 -04:00
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
2025-09-13 00:13:15 +02:00
|
|
|
use crate::bitset::BitSet;
|
|
|
|
|
use crate::cfg::CFGBuilder;
|
2025-05-14 20:12:11 -04:00
|
|
|
use std::hash::RandomState;
|
|
|
|
|
|
|
|
|
|
fn check_graph_reachability(
|
|
|
|
|
edges: &[(usize, usize)],
|
|
|
|
|
expected: &[&[usize]],
|
|
|
|
|
) {
|
|
|
|
|
let mut builder = CFGBuilder::<_, _, RandomState>::new();
|
|
|
|
|
for (i, expected) in expected.iter().enumerate() {
|
|
|
|
|
builder.add_node(i, (i, expected));
|
|
|
|
|
}
|
|
|
|
|
for &(a, b) in edges {
|
|
|
|
|
builder.add_edge(a, b);
|
|
|
|
|
}
|
|
|
|
|
let cfg = builder.as_cfg();
|
|
|
|
|
|
|
|
|
|
let mut reachable_in: Vec<BitSet> =
|
|
|
|
|
(0..cfg.len()).map(|_| Default::default()).collect();
|
|
|
|
|
let mut reachable_out: Vec<BitSet> =
|
|
|
|
|
(0..cfg.len()).map(|_| Default::default()).collect();
|
|
|
|
|
BackwardDataflow {
|
|
|
|
|
cfg: &cfg,
|
|
|
|
|
block_in: &mut reachable_in[..],
|
|
|
|
|
block_out: &mut reachable_out[..],
|
|
|
|
|
transfer: |_block_idx, block, live_in, live_out| {
|
|
|
|
|
// The dataflow framework guarantees that each block and edge is
|
|
|
|
|
// processed at least once, so we don't need to record whether
|
|
|
|
|
// this first insert changed anything or not
|
|
|
|
|
live_in.insert(block.0);
|
|
|
|
|
|
|
|
|
|
live_in.union_with(live_out.s(..))
|
|
|
|
|
},
|
|
|
|
|
join: |live_out, succ_live_in| {
|
|
|
|
|
*live_out |= succ_live_in.s(..);
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
.solve();
|
|
|
|
|
|
|
|
|
|
for (node, reachable_out) in cfg.iter().zip(reachable_out.iter()) {
|
|
|
|
|
let (_i, expected) = **node;
|
|
|
|
|
let r: Vec<usize> = reachable_out.iter().collect();
|
|
|
|
|
assert_eq!(*expected, &r[..]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_if_else() {
|
|
|
|
|
check_graph_reachability(
|
|
|
|
|
&[(0, 1), (0, 2), (1, 3), (2, 3)],
|
|
|
|
|
&[&[1, 2, 3], &[3], &[3], &[]],
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_loop() {
|
|
|
|
|
check_graph_reachability(
|
|
|
|
|
&[(0, 1), (1, 2), (2, 3), (2, 1)],
|
|
|
|
|
&[&[1, 2, 3], &[1, 2, 3], &[1, 2, 3], &[]],
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_irreducible() {
|
|
|
|
|
check_graph_reachability(
|
|
|
|
|
&[(0, 1), (0, 2), (1, 2), (2, 1), (1, 3), (2, 3)],
|
|
|
|
|
&[&[1, 2, 3], &[1, 2, 3], &[1, 2, 3], &[]],
|
|
|
|
|
);
|
|
|
|
|
}
|
2025-07-05 21:57:37 +02:00
|
|
|
|
|
|
|
|
fn check_graph_origin(edges: &[(usize, usize)], expected: &[&[usize]]) {
|
|
|
|
|
let mut builder = CFGBuilder::<_, _, RandomState>::new();
|
|
|
|
|
for (i, expected) in expected.iter().enumerate() {
|
|
|
|
|
builder.add_node(i, (i, expected));
|
|
|
|
|
}
|
|
|
|
|
for &(a, b) in edges {
|
|
|
|
|
builder.add_edge(a, b);
|
|
|
|
|
}
|
|
|
|
|
let cfg = builder.as_cfg();
|
|
|
|
|
|
|
|
|
|
let mut reachable_in: Vec<BitSet> =
|
|
|
|
|
(0..cfg.len()).map(|_| Default::default()).collect();
|
|
|
|
|
let mut reachable_out: Vec<BitSet> =
|
|
|
|
|
(0..cfg.len()).map(|_| Default::default()).collect();
|
|
|
|
|
ForwardDataflow {
|
|
|
|
|
cfg: &cfg,
|
|
|
|
|
block_in: &mut reachable_in[..],
|
|
|
|
|
block_out: &mut reachable_out[..],
|
|
|
|
|
transfer: |_block_idx, block, live_out, live_in| {
|
|
|
|
|
// The dataflow framework guarantees that each block and edge is
|
|
|
|
|
// processed at least once, so we don't need to record whether
|
|
|
|
|
// this first insert changed anything or not
|
|
|
|
|
live_out.insert(block.0);
|
|
|
|
|
|
|
|
|
|
live_out.union_with(live_in.s(..))
|
|
|
|
|
},
|
|
|
|
|
join: |live_in, pred_live_out| {
|
|
|
|
|
*live_in |= pred_live_out.s(..);
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
.solve();
|
|
|
|
|
|
|
|
|
|
for (node, reachable_in) in cfg.iter().zip(reachable_in.iter()) {
|
|
|
|
|
let (_i, expected) = **node;
|
|
|
|
|
let r: Vec<usize> = reachable_in.iter().collect();
|
|
|
|
|
assert_eq!(*expected, &r[..]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_fw_if_else() {
|
|
|
|
|
check_graph_origin(
|
|
|
|
|
&[(0, 1), (0, 2), (1, 3), (2, 3)],
|
|
|
|
|
&[&[], &[0], &[0], &[0, 1, 2]],
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_fw_loop() {
|
|
|
|
|
check_graph_origin(
|
|
|
|
|
&[(0, 1), (1, 2), (2, 3), (2, 1)],
|
|
|
|
|
&[&[], &[0, 1, 2], &[0, 1, 2], &[0, 1, 2]],
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_fw_irreducible() {
|
|
|
|
|
check_graph_origin(
|
|
|
|
|
&[(0, 1), (0, 2), (1, 2), (2, 1), (1, 3), (2, 3)],
|
|
|
|
|
&[&[], &[0, 1, 2], &[0, 1, 2], &[0, 1, 2]],
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn check_max_iter_count(edges: &[(usize, usize)], expected: &[u32]) {
|
|
|
|
|
let mut builder = CFGBuilder::<_, _, RandomState>::new();
|
|
|
|
|
for (i, expected) in expected.iter().enumerate() {
|
|
|
|
|
builder.add_node(i, (i, expected));
|
|
|
|
|
}
|
|
|
|
|
for &(a, b) in edges {
|
|
|
|
|
builder.add_edge(a, b);
|
|
|
|
|
}
|
|
|
|
|
let cfg = builder.as_cfg();
|
|
|
|
|
|
|
|
|
|
let mut iter_in: Vec<u32> = vec![0; cfg.len()];
|
|
|
|
|
let mut iter_out: Vec<u32> = vec![0; cfg.len()];
|
|
|
|
|
// This unusual data-flow analysis counts how many cfg-blocks
|
|
|
|
|
// are visited, there is a hard limit of 5 to allow for convergence
|
|
|
|
|
ForwardDataflow {
|
|
|
|
|
cfg: &cfg,
|
|
|
|
|
block_in: &mut iter_in[..],
|
|
|
|
|
block_out: &mut iter_out[..],
|
|
|
|
|
transfer: |_block_idx, _block, it_out, it_in| {
|
|
|
|
|
let new_it = (*it_in + 1).min(5);
|
|
|
|
|
|
|
|
|
|
let is_diff = new_it != *it_out;
|
|
|
|
|
*it_out = new_it;
|
|
|
|
|
is_diff
|
|
|
|
|
},
|
|
|
|
|
join: |live_in, pred_live_out| {
|
|
|
|
|
*live_in = (*live_in).max(*pred_live_out)
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
.solve();
|
|
|
|
|
|
|
|
|
|
for (node, iter_in) in cfg.iter().zip(iter_in.iter()) {
|
|
|
|
|
let (_i, expected) = **node;
|
|
|
|
|
assert_eq!(*expected, *iter_in);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_max_iter_while() {
|
|
|
|
|
// This test fails if same-node back-edges are not accounted for
|
|
|
|
|
check_max_iter_count(&[(0, 1), (1, 2), (1, 1)], &[0, 5, 5]);
|
|
|
|
|
}
|
2025-05-14 20:12:11 -04:00
|
|
|
}
|