Skip to main content

problemreductions/models/misc/
shortest_common_supersequence.rs

1//! Shortest Common Supersequence problem implementation.
2//!
3//! Given a set of strings over an alphabet, find the shortest common
4//! supersequence. A string `w` is a supersequence of `s` if `s` is a
5//! subsequence of `w` (i.e., `s` can be obtained by deleting zero or more
6//! characters from `w`).
7//!
8//! The configuration uses a fixed-length representation of `max_length`
9//! symbols from `{0, ..., alphabet_size}`, where `alphabet_size` serves as a
10//! padding/end symbol. The effective supersequence is the prefix before the
11//! first padding symbol. `max_length` equals the sum of all input string
12//! lengths (the worst case where no overlap exists). This problem is NP-hard
13//! (Maier, 1978).
14
15use crate::registry::{FieldInfo, ProblemSchemaEntry};
16use crate::traits::Problem;
17use crate::types::Min;
18use serde::{Deserialize, Serialize};
19
20inventory::submit! {
21    ProblemSchemaEntry {
22        name: "ShortestCommonSupersequence",
23        display_name: "Shortest Common Supersequence",
24        aliases: &["SCS"],
25        dimensions: &[],
26        module_path: module_path!(),
27        description: "Find a shortest common supersequence for a set of strings",
28        fields: &[
29            FieldInfo { name: "alphabet_size", type_name: "usize", description: "Size of the alphabet" },
30            FieldInfo { name: "strings", type_name: "Vec<Vec<usize>>", description: "Input strings over the alphabet {0, ..., alphabet_size-1}" },
31            FieldInfo { name: "max_length", type_name: "usize", description: "Maximum possible supersequence length (sum of all string lengths)" },
32        ],
33    }
34}
35
36/// The Shortest Common Supersequence problem.
37///
38/// Given an alphabet of size `k` and a set of strings over `{0, ..., k-1}`,
39/// find the shortest string `w` such that every input string is a subsequence
40/// of `w`.
41///
42/// # Representation
43///
44/// The configuration is a vector of length `max_length`, where each entry is a
45/// symbol in `{0, ..., alphabet_size}`. The value `alphabet_size` acts as a
46/// padding/end symbol. The effective supersequence is the prefix of
47/// non-padding symbols. Padding must be contiguous at the end.
48///
49/// # Example
50///
51/// ```
52/// use problemreductions::models::misc::ShortestCommonSupersequence;
53/// use problemreductions::{Problem, Solver, BruteForce};
54///
55/// // Alphabet {0, 1}, strings [0,1] and [1,0]
56/// let problem = ShortestCommonSupersequence::new(2, vec![vec![0, 1], vec![1, 0]]);
57/// let solver = BruteForce::new();
58/// let solution = solver.find_witness(&problem);
59/// assert!(solution.is_some());
60/// ```
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct ShortestCommonSupersequence {
63    alphabet_size: usize,
64    strings: Vec<Vec<usize>>,
65    max_length: usize,
66}
67
68impl ShortestCommonSupersequence {
69    /// Create a new ShortestCommonSupersequence instance.
70    ///
71    /// `max_length` is computed automatically as the sum of all input string
72    /// lengths (the worst-case supersequence with no overlap).
73    ///
74    /// # Panics
75    ///
76    /// Panics if `strings` is empty, or if `alphabet_size` is 0 and any input
77    /// string is non-empty.
78    pub fn new(alphabet_size: usize, strings: Vec<Vec<usize>>) -> Self {
79        assert!(!strings.is_empty(), "must have at least one string");
80        let max_length: usize = strings.iter().map(|s| s.len()).sum();
81        assert!(
82            alphabet_size > 0 || strings.iter().all(|s| s.is_empty()),
83            "alphabet_size must be > 0 when any input string is non-empty"
84        );
85        Self {
86            alphabet_size,
87            strings,
88            max_length,
89        }
90    }
91
92    /// Returns the alphabet size.
93    pub fn alphabet_size(&self) -> usize {
94        self.alphabet_size
95    }
96
97    /// Returns the input strings.
98    pub fn strings(&self) -> &[Vec<usize>] {
99        &self.strings
100    }
101
102    /// Returns the maximum possible supersequence length.
103    pub fn max_length(&self) -> usize {
104        self.max_length
105    }
106
107    /// Returns the number of input strings.
108    pub fn num_strings(&self) -> usize {
109        self.strings.len()
110    }
111
112    /// Returns the total length of all input strings.
113    pub fn total_length(&self) -> usize {
114        self.strings.iter().map(|s| s.len()).sum()
115    }
116}
117
118/// Check whether `needle` is a subsequence of `haystack` using greedy
119/// left-to-right matching.
120fn is_subsequence(needle: &[usize], haystack: &[usize]) -> bool {
121    let mut it = haystack.iter();
122    for &ch in needle {
123        loop {
124            match it.next() {
125                Some(&c) if c == ch => break,
126                Some(_) => continue,
127                None => return false,
128            }
129        }
130    }
131    true
132}
133
134impl Problem for ShortestCommonSupersequence {
135    const NAME: &'static str = "ShortestCommonSupersequence";
136    type Value = Min<usize>;
137
138    fn variant() -> Vec<(&'static str, &'static str)> {
139        crate::variant_params![]
140    }
141
142    fn dims(&self) -> Vec<usize> {
143        vec![self.alphabet_size + 1; self.max_length]
144    }
145
146    fn evaluate(&self, config: &[usize]) -> Min<usize> {
147        if config.len() != self.max_length {
148            return Min(None);
149        }
150
151        let pad = self.alphabet_size;
152
153        // Find effective length = index of first padding symbol
154        let effective_length = config
155            .iter()
156            .position(|&v| v == pad)
157            .unwrap_or(self.max_length);
158
159        // Verify all positions after first padding are also padding (no interleaved padding)
160        for &v in &config[effective_length..] {
161            if v != pad {
162                return Min(None);
163            }
164        }
165
166        // Check all symbols in the prefix are valid (0..alphabet_size)
167        let prefix = &config[..effective_length];
168        if prefix.iter().any(|&v| v >= self.alphabet_size) {
169            return Min(None);
170        }
171
172        // Check every input string is a subsequence of the prefix
173        if !self.strings.iter().all(|s| is_subsequence(s, prefix)) {
174            return Min(None);
175        }
176
177        Min(Some(effective_length))
178    }
179}
180
181crate::declare_variants! {
182    default ShortestCommonSupersequence => "(alphabet_size + 1) ^ max_length",
183}
184
185#[cfg(feature = "example-db")]
186pub(crate) fn canonical_model_example_specs() -> Vec<crate::example_db::specs::ModelExampleSpec> {
187    // Alphabet {0, 1}, strings [0,1] and [1,0]
188    // max_length = 2 + 2 = 4, search space = 3^4 = 81
189    // Optimal SCS length = 3, e.g. [0,1,0] padded to [0,1,0,2]
190    vec![crate::example_db::specs::ModelExampleSpec {
191        id: "shortest_common_supersequence",
192        instance: Box::new(ShortestCommonSupersequence::new(
193            2,
194            vec![vec![0, 1], vec![1, 0]],
195        )),
196        optimal_config: vec![0, 1, 0, 2],
197        optimal_value: serde_json::json!(3),
198    }]
199}
200
201#[cfg(test)]
202#[path = "../../unit_tests/models/misc/shortest_common_supersequence.rs"]
203mod tests;