Skip to main content

problemreductions/models/misc/
shortest_common_superstring.rs

1//! Shortest Common Superstring problem implementation.
2//!
3//! Given a set of strings over an alphabet, find the shortest common
4//! superstring. A string `w` is a superstring of `s` if `s` appears as a
5//! _contiguous substring_ of `w` (i.e., there exist `w_0, w_1` in `Sigma^*`
6//! with `w = w_0 s w_1`). This is stricter than the subsequence-based
7//! `ShortestCommonSupersequence`.
8//!
9//! The configuration uses a fixed-length representation of `max_length`
10//! symbols from `{0, ..., alphabet_size}`, where `alphabet_size` serves as a
11//! padding/end symbol. The effective superstring is the prefix before the
12//! first padding symbol. `max_length` equals the sum of all input string
13//! lengths (the worst case where no overlap exists). This problem is
14//! NP-complete (Maier and Storer, 1977).
15//!
16//! Reference: Garey & Johnson, *Computers and Intractability*, problem SR9
17//! (P157).
18
19use crate::registry::{FieldInfo, ProblemSchemaEntry};
20use crate::traits::Problem;
21use crate::types::Min;
22use serde::{Deserialize, Serialize};
23
24inventory::submit! {
25    ProblemSchemaEntry {
26        name: "ShortestCommonSuperstring",
27        display_name: "Shortest Common Superstring",
28        aliases: &["SCSS"],
29        dimensions: &[],
30        module_path: module_path!(),
31        description: "Find a shortest string that contains every input string as a contiguous substring",
32        fields: &[
33            FieldInfo { name: "alphabet_size", type_name: "usize", description: "Size of the alphabet" },
34            FieldInfo { name: "strings", type_name: "Vec<Vec<usize>>", description: "Input strings over the alphabet {0, ..., alphabet_size-1}" },
35            FieldInfo { name: "max_length", type_name: "usize", description: "Maximum possible superstring length (sum of all string lengths)" },
36        ],
37    }
38}
39
40/// The Shortest Common Superstring problem.
41///
42/// Given an alphabet of size `k` and a set of strings over `{0, ..., k-1}`,
43/// find the shortest string `w` such that every input string appears as a
44/// contiguous substring of `w`.
45///
46/// # Representation
47///
48/// The configuration is a vector of length `max_length`, where each entry is a
49/// symbol in `{0, ..., alphabet_size}`. The value `alphabet_size` acts as a
50/// padding/end symbol. The effective superstring is the prefix of
51/// non-padding symbols. Padding must be contiguous at the end.
52///
53/// # Example
54///
55/// ```
56/// use problemreductions::models::misc::ShortestCommonSuperstring;
57/// use problemreductions::{Problem, Solver, BruteForce};
58///
59/// // Alphabet {0, 1}, strings [0,1] and [1,0]
60/// let problem = ShortestCommonSuperstring::new(2, vec![vec![0, 1], vec![1, 0]]);
61/// let solver = BruteForce::new();
62/// let solution = solver.find_witness(&problem);
63/// assert!(solution.is_some());
64/// ```
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct ShortestCommonSuperstring {
67    alphabet_size: usize,
68    strings: Vec<Vec<usize>>,
69    max_length: usize,
70}
71
72impl ShortestCommonSuperstring {
73    /// Create a new ShortestCommonSuperstring instance.
74    ///
75    /// `max_length` is computed automatically as the sum of all input string
76    /// lengths (the trivial upper bound: concatenation with no overlap).
77    ///
78    /// # Panics
79    ///
80    /// Panics if `strings` is empty, or if `alphabet_size` is 0 and any input
81    /// string is non-empty.
82    pub fn new(alphabet_size: usize, strings: Vec<Vec<usize>>) -> Self {
83        assert!(!strings.is_empty(), "must have at least one string");
84        let max_length: usize = strings.iter().map(|s| s.len()).sum();
85        assert!(
86            alphabet_size > 0 || strings.iter().all(|s| s.is_empty()),
87            "alphabet_size must be > 0 when any input string is non-empty"
88        );
89        Self {
90            alphabet_size,
91            strings,
92            max_length,
93        }
94    }
95
96    /// Returns the alphabet size.
97    pub fn alphabet_size(&self) -> usize {
98        self.alphabet_size
99    }
100
101    /// Returns the input strings.
102    pub fn strings(&self) -> &[Vec<usize>] {
103        &self.strings
104    }
105
106    /// Returns the maximum possible superstring length.
107    pub fn max_length(&self) -> usize {
108        self.max_length
109    }
110
111    /// Returns the number of input strings.
112    pub fn num_strings(&self) -> usize {
113        self.strings.len()
114    }
115
116    /// Returns the total length of all input strings.
117    pub fn total_length(&self) -> usize {
118        self.strings.iter().map(|s| s.len()).sum()
119    }
120}
121
122/// Check whether `needle` appears as a contiguous substring of `haystack`.
123fn is_substring(needle: &[usize], haystack: &[usize]) -> bool {
124    if needle.is_empty() {
125        return true;
126    }
127    if needle.len() > haystack.len() {
128        return false;
129    }
130    haystack
131        .windows(needle.len())
132        .any(|window| window == needle)
133}
134
135impl Problem for ShortestCommonSuperstring {
136    const NAME: &'static str = "ShortestCommonSuperstring";
137    type Value = Min<usize>;
138
139    fn variant() -> Vec<(&'static str, &'static str)> {
140        crate::variant_params![]
141    }
142
143    fn dims(&self) -> Vec<usize> {
144        vec![self.alphabet_size + 1; self.max_length]
145    }
146
147    fn evaluate(&self, config: &[usize]) -> Min<usize> {
148        if config.len() != self.max_length {
149            return Min(None);
150        }
151
152        let pad = self.alphabet_size;
153
154        // Find effective length = index of first padding symbol
155        let effective_length = config
156            .iter()
157            .position(|&v| v == pad)
158            .unwrap_or(self.max_length);
159
160        // Verify all positions after first padding are also padding (no interleaved padding)
161        for &v in &config[effective_length..] {
162            if v != pad {
163                return Min(None);
164            }
165        }
166
167        // Check all symbols in the prefix are valid (0..alphabet_size)
168        let prefix = &config[..effective_length];
169        if prefix.iter().any(|&v| v >= self.alphabet_size) {
170            return Min(None);
171        }
172
173        // Check every input string appears as a contiguous substring of the prefix
174        if !self.strings.iter().all(|s| is_substring(s, prefix)) {
175            return Min(None);
176        }
177
178        Min(Some(effective_length))
179    }
180}
181
182crate::declare_variants! {
183    default ShortestCommonSuperstring => "num_strings ^ 2 * 2 ^ num_strings",
184}
185
186#[cfg(feature = "example-db")]
187pub(crate) fn canonical_model_example_specs() -> Vec<crate::example_db::specs::ModelExampleSpec> {
188    // Alphabet {0, 1}, strings [0,1] and [1,0].
189    // max_length = 2 + 2 = 4, search space = 3^4 = 81.
190    // Optimal SCSS length = 3, e.g. [0,1,0] padded to [0,1,0,2] ("010" contains
191    // both "01" and "10" as contiguous substrings).
192    vec![crate::example_db::specs::ModelExampleSpec {
193        id: "shortest_common_superstring",
194        instance: Box::new(ShortestCommonSuperstring::new(
195            2,
196            vec![vec![0, 1], vec![1, 0]],
197        )),
198        optimal_config: vec![0, 1, 0, 2],
199        optimal_value: serde_json::json!(3),
200    }]
201}
202
203#[cfg(test)]
204#[path = "../../unit_tests/models/misc/shortest_common_superstring.rs"]
205mod tests;