Skip to main content

problemreductions/rules/
longestcommonsubsequence_maximumindependentset.rs

1//! Reduction from LongestCommonSubsequence to MaximumIndependentSet.
2//!
3//! Constructs a conflict graph where vertices are match-node k-tuples
4//! (positions in each string that share the same character) and edges
5//! connect conflicting tuples that cannot both appear in a valid common
6//! subsequence. A maximum independent set in this graph corresponds to
7//! a longest common subsequence.
8//!
9//! Reference: Santini, Blum, Djukanovic et al. (2021),
10//! "Solving Longest Common Subsequence Problems via a Transformation
11//! to the Maximum Clique Problem," Computers & Operations Research.
12
13use crate::models::graph::MaximumIndependentSet;
14use crate::models::misc::LongestCommonSubsequence;
15use crate::reduction;
16use crate::rules::traits::{ReduceTo, ReductionResult};
17use crate::topology::SimpleGraph;
18use crate::types::One;
19
20/// Result of reducing LongestCommonSubsequence to MaximumIndependentSet.
21///
22/// Each vertex in the target graph corresponds to a match-node k-tuple
23/// `(p_1, ..., p_k)` where all strings have the same character at their
24/// respective positions.
25#[derive(Debug, Clone)]
26pub struct ReductionLCSToIS {
27    /// The target MaximumIndependentSet problem.
28    target: MaximumIndependentSet<SimpleGraph, One>,
29    /// Match-node k-tuples: `match_nodes[v]` gives the position tuple for vertex v.
30    match_nodes: Vec<Vec<usize>>,
31    /// Character for each match node.
32    match_chars: Vec<usize>,
33    /// Maximum possible subsequence length in the source problem.
34    max_length: usize,
35    /// Alphabet size of the source problem (used as the padding symbol).
36    alphabet_size: usize,
37}
38
39impl ReductionResult for ReductionLCSToIS {
40    type Source = LongestCommonSubsequence;
41    type Target = MaximumIndependentSet<SimpleGraph, One>;
42
43    fn target_problem(&self) -> &Self::Target {
44        &self.target
45    }
46
47    /// Extract an LCS solution from a MaximumIndependentSet solution.
48    ///
49    /// Selected vertices correspond to match nodes. Sort by position in
50    /// the first string to get the subsequence order, then pad to `max_length`.
51    fn extract_solution(&self, target_solution: &[usize]) -> Vec<usize> {
52        // Collect selected match nodes with their characters
53        let mut selected: Vec<(usize, usize)> = target_solution
54            .iter()
55            .enumerate()
56            .filter(|(_, &v)| v == 1)
57            .map(|(i, _)| (self.match_nodes[i][0], self.match_chars[i]))
58            .collect();
59        // Sort by position in the first string
60        selected.sort_by_key(|&(pos, _)| pos);
61
62        // Build config: characters followed by padding
63        let mut config = Vec::with_capacity(self.max_length);
64        for &(_, ch) in &selected {
65            config.push(ch);
66        }
67        // Pad with alphabet_size (the padding symbol)
68        while config.len() < self.max_length {
69            config.push(self.alphabet_size);
70        }
71        config
72    }
73}
74
75#[reduction(
76    overhead = {
77        num_vertices = "cross_frequency_product",
78        num_edges = "cross_frequency_product^2",
79    }
80)]
81impl ReduceTo<MaximumIndependentSet<SimpleGraph, One>> for LongestCommonSubsequence {
82    type Result = ReductionLCSToIS;
83
84    fn reduce_to(&self) -> Self::Result {
85        let strings = self.strings();
86        let k = self.num_strings();
87
88        // Step 1: Build match nodes.
89        // For each character c, find all k-tuples of positions where every
90        // string has character c at its respective position.
91        let mut match_nodes: Vec<Vec<usize>> = Vec::new();
92        let mut match_chars: Vec<usize> = Vec::new();
93
94        for c in 0..self.alphabet_size() {
95            // For each string, collect positions where character c appears
96            let positions_per_string: Vec<Vec<usize>> = strings
97                .iter()
98                .map(|s| {
99                    s.iter()
100                        .enumerate()
101                        .filter(|(_, &sym)| sym == c)
102                        .map(|(i, _)| i)
103                        .collect()
104                })
105                .collect();
106
107            // Generate all k-tuples (Cartesian product of position lists)
108            let tuples = cartesian_product(&positions_per_string);
109            for tuple in tuples {
110                match_nodes.push(tuple);
111                match_chars.push(c);
112            }
113        }
114
115        let num_vertices = match_nodes.len();
116
117        // Step 2: Build conflict edges.
118        // Two nodes u = (a_1, ..., a_k) and v = (b_1, ..., b_k) conflict when
119        // they cannot both appear in a valid common subsequence: NOT(all a_i < b_i)
120        // AND NOT(all a_i > b_i).
121        let mut edges: Vec<(usize, usize)> = Vec::new();
122
123        for i in 0..num_vertices {
124            for j in (i + 1)..num_vertices {
125                if nodes_conflict(&match_nodes[i], &match_nodes[j], k) {
126                    edges.push((i, j));
127                }
128            }
129        }
130
131        let target = MaximumIndependentSet::new(
132            SimpleGraph::new(num_vertices, edges),
133            vec![One; num_vertices],
134        );
135
136        ReductionLCSToIS {
137            target,
138            match_nodes,
139            match_chars,
140            max_length: self.max_length(),
141            alphabet_size: self.alphabet_size(),
142        }
143    }
144}
145
146/// Check whether two match nodes conflict (cannot both be in a common subsequence).
147///
148/// Two nodes `u = (a_1, ..., a_k)` and `v = (b_1, ..., b_k)` conflict when
149/// NOT (all a_i < b_i) AND NOT (all a_i > b_i).
150fn nodes_conflict(u: &[usize], v: &[usize], k: usize) -> bool {
151    let mut all_less = true;
152    let mut all_greater = true;
153    for i in 0..k {
154        if u[i] >= v[i] {
155            all_less = false;
156        }
157        if u[i] <= v[i] {
158            all_greater = false;
159        }
160    }
161    !all_less && !all_greater
162}
163
164/// Compute the Cartesian product of a list of position vectors.
165fn cartesian_product(lists: &[Vec<usize>]) -> Vec<Vec<usize>> {
166    if lists.is_empty() {
167        return vec![vec![]];
168    }
169
170    let mut result = vec![vec![]];
171    for list in lists {
172        let mut new_result = Vec::new();
173        for prefix in &result {
174            for &item in list {
175                let mut new_tuple = prefix.clone();
176                new_tuple.push(item);
177                new_result.push(new_tuple);
178            }
179        }
180        result = new_result;
181    }
182    result
183}
184
185#[cfg(feature = "example-db")]
186pub(crate) fn canonical_rule_example_specs() -> Vec<crate::example_db::specs::RuleExampleSpec> {
187    use crate::export::SolutionPair;
188
189    /// Build the example from the issue: k=2, s1="ABAC", s2="BACA", alphabet={A,B,C}.
190    fn lcs_abac_baca() -> LongestCommonSubsequence {
191        // A=0, B=1, C=2
192        LongestCommonSubsequence::new(
193            3,
194            vec![
195                vec![0, 1, 0, 2], // ABAC
196                vec![1, 0, 2, 0], // BACA
197            ],
198        )
199    }
200
201    vec![crate::example_db::specs::RuleExampleSpec {
202        id: "longestcommonsubsequence_to_maximumindependentset",
203        build: || {
204            // Issue example: MIS solution {v2, v4, v5} gives LCS "BAC" (length 3).
205            // Match nodes (ordered by character):
206            //   c=A(0): v0=(0,1), v1=(0,3), v2=(2,1), v3=(2,3)
207            //   c=B(1): v4=(1,0)
208            //   c=C(2): v5=(3,2)
209            // MIS {v2, v4, v5} => positions B@(1,0), A@(2,1), C@(3,2)
210            // source_config = [1, 0, 2, 3] (B, A, C, padding)
211            crate::example_db::specs::rule_example_with_witness::<
212                _,
213                MaximumIndependentSet<SimpleGraph, One>,
214            >(
215                lcs_abac_baca(),
216                SolutionPair {
217                    source_config: vec![1, 0, 2, 3],
218                    target_config: vec![0, 0, 1, 0, 1, 1],
219                },
220            )
221        },
222    }]
223}
224
225#[cfg(test)]
226#[path = "../unit_tests/rules/longestcommonsubsequence_maximumindependentset.rs"]
227mod tests;