problemreductions/models/misc/shortest_common_superstring.rs
1//! Shortest Common Superstring problem implementation.
2//!
3//! Given a set of strings over an alphabet, find the shortest common
4//! superstring. A string `w` is a superstring of `s` if `s` appears as a
5//! _contiguous substring_ of `w` (i.e., there exist `w_0, w_1` in `Sigma^*`
6//! with `w = w_0 s w_1`). This is stricter than the subsequence-based
7//! `ShortestCommonSupersequence`.
8//!
9//! The configuration uses a fixed-length representation of `max_length`
10//! symbols from `{0, ..., alphabet_size}`, where `alphabet_size` serves as a
11//! padding/end symbol. The effective superstring is the prefix before the
12//! first padding symbol. `max_length` equals the sum of all input string
13//! lengths (the worst case where no overlap exists). This problem is
14//! NP-complete (Maier and Storer, 1977).
15//!
16//! Reference: Garey & Johnson, *Computers and Intractability*, problem SR9
17//! (P157).
18
19use crate::registry::{FieldInfo, ProblemSchemaEntry};
20use crate::traits::Problem;
21use crate::types::Min;
22use serde::{Deserialize, Serialize};
23
24inventory::submit! {
25 ProblemSchemaEntry {
26 name: "ShortestCommonSuperstring",
27 display_name: "Shortest Common Superstring",
28 aliases: &["SCSS"],
29 dimensions: &[],
30 module_path: module_path!(),
31 description: "Find a shortest string that contains every input string as a contiguous substring",
32 fields: &[
33 FieldInfo { name: "alphabet_size", type_name: "usize", description: "Size of the alphabet" },
34 FieldInfo { name: "strings", type_name: "Vec<Vec<usize>>", description: "Input strings over the alphabet {0, ..., alphabet_size-1}" },
35 FieldInfo { name: "max_length", type_name: "usize", description: "Maximum possible superstring length (sum of all string lengths)" },
36 ],
37 }
38}
39
40/// The Shortest Common Superstring problem.
41///
42/// Given an alphabet of size `k` and a set of strings over `{0, ..., k-1}`,
43/// find the shortest string `w` such that every input string appears as a
44/// contiguous substring of `w`.
45///
46/// # Representation
47///
48/// The configuration is a vector of length `max_length`, where each entry is a
49/// symbol in `{0, ..., alphabet_size}`. The value `alphabet_size` acts as a
50/// padding/end symbol. The effective superstring is the prefix of
51/// non-padding symbols. Padding must be contiguous at the end.
52///
53/// # Example
54///
55/// ```
56/// use problemreductions::models::misc::ShortestCommonSuperstring;
57/// use problemreductions::{Problem, Solver, BruteForce};
58///
59/// // Alphabet {0, 1}, strings [0,1] and [1,0]
60/// let problem = ShortestCommonSuperstring::new(2, vec![vec![0, 1], vec![1, 0]]);
61/// let solver = BruteForce::new();
62/// let solution = solver.find_witness(&problem);
63/// assert!(solution.is_some());
64/// ```
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct ShortestCommonSuperstring {
67 alphabet_size: usize,
68 strings: Vec<Vec<usize>>,
69 max_length: usize,
70}
71
72impl ShortestCommonSuperstring {
73 /// Create a new ShortestCommonSuperstring instance.
74 ///
75 /// `max_length` is computed automatically as the sum of all input string
76 /// lengths (the trivial upper bound: concatenation with no overlap).
77 ///
78 /// # Panics
79 ///
80 /// Panics if `strings` is empty, or if `alphabet_size` is 0 and any input
81 /// string is non-empty.
82 pub fn new(alphabet_size: usize, strings: Vec<Vec<usize>>) -> Self {
83 assert!(!strings.is_empty(), "must have at least one string");
84 let max_length: usize = strings.iter().map(|s| s.len()).sum();
85 assert!(
86 alphabet_size > 0 || strings.iter().all(|s| s.is_empty()),
87 "alphabet_size must be > 0 when any input string is non-empty"
88 );
89 Self {
90 alphabet_size,
91 strings,
92 max_length,
93 }
94 }
95
96 /// Returns the alphabet size.
97 pub fn alphabet_size(&self) -> usize {
98 self.alphabet_size
99 }
100
101 /// Returns the input strings.
102 pub fn strings(&self) -> &[Vec<usize>] {
103 &self.strings
104 }
105
106 /// Returns the maximum possible superstring length.
107 pub fn max_length(&self) -> usize {
108 self.max_length
109 }
110
111 /// Returns the number of input strings.
112 pub fn num_strings(&self) -> usize {
113 self.strings.len()
114 }
115
116 /// Returns the total length of all input strings.
117 pub fn total_length(&self) -> usize {
118 self.strings.iter().map(|s| s.len()).sum()
119 }
120}
121
122/// Check whether `needle` appears as a contiguous substring of `haystack`.
123fn is_substring(needle: &[usize], haystack: &[usize]) -> bool {
124 if needle.is_empty() {
125 return true;
126 }
127 if needle.len() > haystack.len() {
128 return false;
129 }
130 haystack
131 .windows(needle.len())
132 .any(|window| window == needle)
133}
134
135impl Problem for ShortestCommonSuperstring {
136 const NAME: &'static str = "ShortestCommonSuperstring";
137 type Value = Min<usize>;
138
139 fn variant() -> Vec<(&'static str, &'static str)> {
140 crate::variant_params![]
141 }
142
143 fn dims(&self) -> Vec<usize> {
144 vec![self.alphabet_size + 1; self.max_length]
145 }
146
147 fn evaluate(&self, config: &[usize]) -> Min<usize> {
148 if config.len() != self.max_length {
149 return Min(None);
150 }
151
152 let pad = self.alphabet_size;
153
154 // Find effective length = index of first padding symbol
155 let effective_length = config
156 .iter()
157 .position(|&v| v == pad)
158 .unwrap_or(self.max_length);
159
160 // Verify all positions after first padding are also padding (no interleaved padding)
161 for &v in &config[effective_length..] {
162 if v != pad {
163 return Min(None);
164 }
165 }
166
167 // Check all symbols in the prefix are valid (0..alphabet_size)
168 let prefix = &config[..effective_length];
169 if prefix.iter().any(|&v| v >= self.alphabet_size) {
170 return Min(None);
171 }
172
173 // Check every input string appears as a contiguous substring of the prefix
174 if !self.strings.iter().all(|s| is_substring(s, prefix)) {
175 return Min(None);
176 }
177
178 Min(Some(effective_length))
179 }
180}
181
182crate::declare_variants! {
183 default ShortestCommonSuperstring => "num_strings ^ 2 * 2 ^ num_strings",
184}
185
186#[cfg(feature = "example-db")]
187pub(crate) fn canonical_model_example_specs() -> Vec<crate::example_db::specs::ModelExampleSpec> {
188 // Alphabet {0, 1}, strings [0,1] and [1,0].
189 // max_length = 2 + 2 = 4, search space = 3^4 = 81.
190 // Optimal SCSS length = 3, e.g. [0,1,0] padded to [0,1,0,2] ("010" contains
191 // both "01" and "10" as contiguous substrings).
192 vec![crate::example_db::specs::ModelExampleSpec {
193 id: "shortest_common_superstring",
194 instance: Box::new(ShortestCommonSuperstring::new(
195 2,
196 vec![vec![0, 1], vec![1, 0]],
197 )),
198 optimal_config: vec![0, 1, 0, 2],
199 optimal_value: serde_json::json!(3),
200 }]
201}
202
203#[cfg(test)]
204#[path = "../../unit_tests/models/misc/shortest_common_superstring.rs"]
205mod tests;