Skip to main content

problemreductions/models/misc/
longest_common_subsequence.rs

1//! Longest Common Subsequence (LCS) problem implementation.
2//!
3//! Given a finite alphabet and a set of strings over that alphabet, find a
4//! longest common subsequence. The configuration is a fixed-length vector of
5//! `max_length` positions, where each entry is either a valid symbol or the
6//! padding symbol (`alphabet_size`). Padding must be contiguous at the end.
7
8use crate::registry::{FieldInfo, ProblemSchemaEntry};
9use crate::traits::Problem;
10use crate::types::Max;
11use serde::{Deserialize, Serialize};
12
13inventory::submit! {
14    ProblemSchemaEntry {
15        name: "LongestCommonSubsequence",
16        display_name: "Longest Common Subsequence",
17        aliases: &["LCS"],
18        dimensions: &[],
19        module_path: module_path!(),
20        description: "Find a longest common subsequence for a set of strings",
21        fields: &[
22            FieldInfo { name: "alphabet_size", type_name: "usize", description: "Size of the alphabet" },
23            FieldInfo { name: "strings", type_name: "Vec<Vec<usize>>", description: "Input strings over the alphabet {0, ..., alphabet_size-1}" },
24            FieldInfo { name: "max_length", type_name: "usize", description: "Maximum possible subsequence length (min of string lengths)" },
25        ],
26    }
27}
28
29/// The Longest Common Subsequence problem.
30///
31/// Given an alphabet of size `k` and a set of strings over `{0, ..., k-1}`,
32/// find a longest string `w` that is a subsequence of every input string.
33///
34/// # Representation
35///
36/// The configuration is a vector of length `max_length`, where each entry is a
37/// symbol in `{0, ..., alphabet_size}`. The value `alphabet_size` is the
38/// padding symbol. Padding must be contiguous at the end of the vector. The
39/// effective subsequence consists of all non-padding symbols (the prefix before
40/// padding starts). The objective is to maximize the effective length.
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct LongestCommonSubsequence {
43    alphabet_size: usize,
44    strings: Vec<Vec<usize>>,
45    max_length: usize,
46}
47
48impl LongestCommonSubsequence {
49    /// Create a new LongestCommonSubsequence instance.
50    ///
51    /// The `max_length` is computed automatically as the minimum of all string
52    /// lengths (the maximum possible common subsequence length).
53    ///
54    /// # Panics
55    ///
56    /// Panics if `alphabet_size == 0` and any input string is non-empty, or if
57    /// an input symbol is outside the declared alphabet, or if all strings are
58    /// empty (max_length would be 0, requiring at least one non-empty string).
59    pub fn new(alphabet_size: usize, strings: Vec<Vec<usize>>) -> Self {
60        let max_length = strings.iter().map(|s| s.len()).min().unwrap_or(0);
61        assert!(
62            alphabet_size > 0 || strings.iter().all(|s| s.is_empty()),
63            "alphabet_size must be > 0 when any input string is non-empty"
64        );
65        assert!(
66            strings
67                .iter()
68                .flat_map(|s| s.iter())
69                .all(|&symbol| symbol < alphabet_size),
70            "input symbols must be less than alphabet_size"
71        );
72        Self {
73            alphabet_size,
74            strings,
75            max_length,
76        }
77    }
78
79    /// Returns the alphabet size.
80    pub fn alphabet_size(&self) -> usize {
81        self.alphabet_size
82    }
83
84    /// Returns the input strings.
85    pub fn strings(&self) -> &[Vec<usize>] {
86        &self.strings
87    }
88
89    /// Returns the `max_length` field.
90    pub fn max_length(&self) -> usize {
91        self.max_length
92    }
93
94    /// Returns the number of input strings.
95    pub fn num_strings(&self) -> usize {
96        self.strings.len()
97    }
98
99    /// Returns the total input length across all strings.
100    pub fn total_length(&self) -> usize {
101        self.strings.iter().map(|s| s.len()).sum()
102    }
103
104    /// Returns the sum of squared string lengths.
105    pub fn sum_squared_lengths(&self) -> usize {
106        self.strings.iter().map(|s| s.len() * s.len()).sum()
107    }
108
109    /// Returns the sum of triangular numbers len * (len + 1) / 2 across strings.
110    pub fn sum_triangular_lengths(&self) -> usize {
111        self.strings
112            .iter()
113            .map(|s| s.len() * (s.len() + 1) / 2)
114            .sum()
115    }
116
117    /// Returns the number of adjacent position transitions.
118    pub fn num_transitions(&self) -> usize {
119        self.max_length.saturating_sub(1)
120    }
121
122    /// Returns the cross-frequency product: the sum over each alphabet symbol
123    /// of the product of that symbol's frequency across all input strings.
124    ///
125    /// Formally: Σ_{c ∈ 0..alphabet_size} Π_{i=1..k} count(c, strings\[i\])
126    /// where count(c, s) is the number of occurrences of symbol c in string s.
127    ///
128    /// This equals the exact number of match-node vertices in the LCS → MaxIS
129    /// reduction graph.
130    pub fn cross_frequency_product(&self) -> usize {
131        (0..self.alphabet_size)
132            .map(|c| {
133                self.strings
134                    .iter()
135                    .map(|s| s.iter().filter(|&&sym| sym == c).count())
136                    .product::<usize>()
137            })
138            .sum()
139    }
140}
141
142/// Check whether `candidate` is a subsequence of `target` using greedy
143/// left-to-right matching.
144fn is_subsequence(candidate: &[usize], target: &[usize]) -> bool {
145    let mut it = target.iter();
146    for &symbol in candidate {
147        loop {
148            match it.next() {
149                Some(&next) if next == symbol => break,
150                Some(_) => continue,
151                None => return false,
152            }
153        }
154    }
155    true
156}
157
158impl Problem for LongestCommonSubsequence {
159    const NAME: &'static str = "LongestCommonSubsequence";
160    type Value = Max<usize>;
161
162    fn variant() -> Vec<(&'static str, &'static str)> {
163        crate::variant_params![]
164    }
165
166    fn dims(&self) -> Vec<usize> {
167        vec![self.alphabet_size + 1; self.max_length]
168    }
169
170    fn evaluate(&self, config: &[usize]) -> Max<usize> {
171        if config.len() != self.max_length {
172            return Max(None);
173        }
174
175        let padding = self.alphabet_size;
176
177        // Find effective length = index of first padding symbol (or max_length if no padding).
178        let effective_length = config
179            .iter()
180            .position(|&s| s == padding)
181            .unwrap_or(self.max_length);
182
183        // Verify all positions after the first padding are also padding (no interleaved padding).
184        if config[effective_length..].iter().any(|&s| s != padding) {
185            return Max(None);
186        }
187
188        // Extract the non-padding prefix as the candidate subsequence.
189        let prefix = &config[..effective_length];
190
191        // Check all symbols in prefix are valid (0..alphabet_size).
192        if prefix.iter().any(|&s| s >= self.alphabet_size) {
193            return Max(None);
194        }
195
196        // Check the prefix is a subsequence of every input string.
197        if !self.strings.iter().all(|s| is_subsequence(prefix, s)) {
198            return Max(None);
199        }
200
201        Max(Some(effective_length))
202    }
203}
204
205crate::declare_variants! {
206    default LongestCommonSubsequence => "(alphabet_size + 1) ^ max_length",
207}
208
209#[cfg(feature = "example-db")]
210pub(crate) fn canonical_model_example_specs() -> Vec<crate::example_db::specs::ModelExampleSpec> {
211    vec![crate::example_db::specs::ModelExampleSpec {
212        id: "longest_common_subsequence",
213        instance: Box::new(LongestCommonSubsequence::new(
214            2,
215            vec![
216                vec![0, 1, 0, 1, 1, 0],
217                vec![1, 0, 0, 1, 0, 1],
218                vec![0, 0, 1, 0, 1, 1],
219                vec![1, 1, 0, 0, 1, 0],
220                vec![0, 1, 0, 1, 0, 1],
221                vec![1, 0, 1, 0, 1, 0],
222            ],
223        )),
224        optimal_config: vec![0, 0, 1, 0, 2, 2],
225        optimal_value: serde_json::json!(4),
226    }]
227}
228
229#[cfg(test)]
230#[path = "../../unit_tests/models/misc/longest_common_subsequence.rs"]
231mod tests;