problemreductions/models/misc/
closest_substring.rs1use crate::registry::{FieldInfo, ProblemSchemaEntry, ProblemSizeFieldEntry};
10use crate::traits::Problem;
11use crate::types::Min;
12use serde::{Deserialize, Serialize};
13
14inventory::submit! {
15 ProblemSchemaEntry {
16 name: "ClosestSubstring",
17 display_name: "Closest Substring",
18 aliases: &[],
19 dimensions: &[],
20 module_path: module_path!(),
21 description: "Find a center string of fixed length and one length-ell window per input string that minimize the maximum Hamming distance between the center and any selected window",
22 fields: &[
23 FieldInfo {
24 name: "alphabet_size",
25 type_name: "usize",
26 description: "Size q of the finite alphabet {0, ..., q-1}",
27 },
28 FieldInfo {
29 name: "strings",
30 type_name: "Vec<Vec<usize>>",
31 description: "Input strings s_1, ..., s_n over the alphabet (possibly of different lengths)",
32 },
33 FieldInfo {
34 name: "substring_length",
35 type_name: "usize",
36 description: "Common window length ell; every input string must have length at least substring_length",
37 },
38 ],
39 }
40}
41
42inventory::submit! {
43 ProblemSizeFieldEntry {
44 name: "ClosestSubstring",
45 fields: &[
46 "alphabet_size",
47 "num_strings",
48 "substring_length",
49 "total_length",
50 "total_num_windows",
51 ],
52 }
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct ClosestSubstring {
69 alphabet_size: usize,
70 strings: Vec<Vec<usize>>,
71 substring_length: usize,
72}
73
74impl ClosestSubstring {
75 pub fn new(alphabet_size: usize, strings: Vec<Vec<usize>>, substring_length: usize) -> Self {
85 assert!(
86 !strings.is_empty(),
87 "ClosestSubstring requires at least one input string"
88 );
89 assert!(
90 strings.iter().all(|s| s.len() >= substring_length),
91 "substring_length must be <= |s_i| for every input string"
92 );
93 assert!(
94 alphabet_size > 0 || substring_length == 0,
95 "alphabet_size must be > 0 when substring_length > 0"
96 );
97 assert!(
98 strings
99 .iter()
100 .flat_map(|s| s.iter())
101 .all(|&symbol| symbol < alphabet_size),
102 "input symbols must be less than alphabet_size"
103 );
104 Self {
105 alphabet_size,
106 strings,
107 substring_length,
108 }
109 }
110
111 pub fn alphabet_size(&self) -> usize {
113 self.alphabet_size
114 }
115
116 pub fn strings(&self) -> &[Vec<usize>] {
118 &self.strings
119 }
120
121 pub fn num_strings(&self) -> usize {
123 self.strings.len()
124 }
125
126 pub fn substring_length(&self) -> usize {
128 self.substring_length
129 }
130
131 pub fn total_length(&self) -> usize {
133 self.strings.iter().map(|s| s.len()).sum()
134 }
135
136 pub fn total_num_windows(&self) -> usize {
138 self.strings
139 .iter()
140 .map(|s| s.len() - self.substring_length + 1)
141 .sum()
142 }
143
144 pub fn num_window_choice_product(&self) -> usize {
149 self.strings
150 .iter()
151 .map(|s| s.len() - self.substring_length + 1)
152 .fold(1usize, |acc, w| acc.saturating_mul(w))
153 }
154}
155
156impl Problem for ClosestSubstring {
157 const NAME: &'static str = "ClosestSubstring";
158 type Value = Min<i64>;
159
160 fn variant() -> Vec<(&'static str, &'static str)> {
161 crate::variant_params![]
162 }
163
164 fn dims(&self) -> Vec<usize> {
165 let ell = self.substring_length;
166 let mut dims = vec![self.alphabet_size; ell];
167 dims.extend(self.strings.iter().map(|s| s.len() - ell + 1));
168 dims
169 }
170
171 fn evaluate(&self, config: &[usize]) -> Min<i64> {
172 let ell = self.substring_length;
173 let n = self.num_strings();
174 if config.len() != ell + n {
175 return Min(None);
176 }
177 let (center, window_starts) = config.split_at(ell);
178 if center.iter().any(|&symbol| symbol >= self.alphabet_size) {
179 return Min(None);
180 }
181 for (i, &start) in window_starts.iter().enumerate() {
182 let w_i = self.strings[i].len() - ell + 1;
183 if start >= w_i {
184 return Min(None);
185 }
186 }
187 let max_distance = window_starts
189 .iter()
190 .enumerate()
191 .map(|(i, &start)| {
192 let window = &self.strings[i][start..start + ell];
193 center
194 .iter()
195 .zip(window.iter())
196 .filter(|(c, t)| c != t)
197 .count() as i64
198 })
199 .max()
200 .unwrap_or(0);
201 Min(Some(max_distance))
202 }
203}
204
205crate::declare_variants! {
206 default ClosestSubstring => "alphabet_size ^ substring_length * num_window_choice_product",
207}
208
209#[cfg(feature = "example-db")]
210pub(crate) fn canonical_model_example_specs() -> Vec<crate::example_db::specs::ModelExampleSpec> {
211 vec![crate::example_db::specs::ModelExampleSpec {
212 id: "closest_substring",
213 instance: Box::new(ClosestSubstring::new(
214 2,
215 vec![
216 vec![0, 0, 0, 1, 1],
217 vec![1, 0, 1, 0, 0],
218 vec![1, 1, 0, 0, 1],
219 ],
220 3,
221 )),
222 optimal_config: vec![0, 1, 0, 0, 1, 0],
225 optimal_value: serde_json::json!(1),
226 }]
227}
228
229#[cfg(test)]
230#[path = "../../unit_tests/models/misc/closest_substring.rs"]
231mod tests;