problemreductions/models/set/consecutive_sets.rs
1//! Consecutive Sets problem implementation.
2//!
3//! Given an alphabet of size n, a collection of subsets of the alphabet, and a
4//! bound K, determine if there exists a string of length at most K over the
5//! alphabet such that the elements of each subset appear consecutively (as a
6//! contiguous block in some order) within the string.
7
8use crate::registry::{FieldInfo, ProblemSchemaEntry};
9use crate::traits::Problem;
10use serde::{Deserialize, Serialize};
11use std::collections::HashSet;
12
13inventory::submit! {
14 ProblemSchemaEntry {
15 name: "ConsecutiveSets",
16 display_name: "Consecutive Sets",
17 aliases: &[],
18 dimensions: &[],
19 module_path: module_path!(),
20 description: "Determine if a string exists where each subset's elements appear consecutively",
21 fields: &[
22 FieldInfo { name: "alphabet_size", type_name: "usize", description: "Size of the alphabet (elements are 0..alphabet_size-1)" },
23 FieldInfo { name: "subsets", type_name: "Vec<Vec<usize>>", description: "Collection of subsets of the alphabet" },
24 FieldInfo { name: "bound_k", type_name: "usize", description: "Maximum string length K" },
25 ],
26 }
27}
28
29/// Consecutive Sets problem.
30///
31/// Given an alphabet {0, 1, ..., n-1}, a collection of subsets, and a bound K,
32/// determine if there exists a string w of length at most K over the alphabet
33/// such that the elements of each subset appear as a contiguous block (in any
34/// order) within w.
35///
36/// Configurations use `bound_k` positions. Values `0..alphabet_size-1`
37/// represent alphabet symbols, and the extra value `alphabet_size` marks
38/// unused positions beyond the end of a shorter string. Only trailing unused
39/// positions are valid.
40///
41/// This problem is NP-complete and arises in physical mapping of DNA and in
42/// consecutive arrangements of hypergraph vertices.
43///
44/// # Example
45///
46/// ```
47/// use problemreductions::models::set::ConsecutiveSets;
48/// use problemreductions::{Problem, Solver, BruteForce};
49///
50/// // Alphabet: {0, 1, 2, 3, 4, 5}, subsets that must appear consecutively
51/// let problem = ConsecutiveSets::new(
52/// 6,
53/// vec![vec![0, 4], vec![2, 4], vec![2, 5], vec![1, 5], vec![1, 3]],
54/// 6,
55/// );
56///
57/// let solver = BruteForce::new();
58/// let solution = solver.find_witness(&problem);
59///
60/// // w = [0, 4, 2, 5, 1, 3] is a valid solution
61/// assert!(solution.is_some());
62/// assert!(problem.evaluate(&solution.unwrap()));
63///
64/// // Shorter strings are encoded with trailing `unused = alphabet_size`.
65/// let shorter = ConsecutiveSets::new(3, vec![vec![0, 1]], 4);
66/// let unused = shorter.alphabet_size();
67/// assert!(shorter.evaluate(&[0, 1, unused, unused]));
68/// assert!(!shorter.evaluate(&[0, unused, 1, unused]));
69/// ```
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct ConsecutiveSets {
72 /// Size of the alphabet (elements are 0..alphabet_size-1).
73 alphabet_size: usize,
74 /// Collection of subsets of the alphabet, each sorted in canonical form.
75 subsets: Vec<Vec<usize>>,
76 /// Maximum string length K.
77 bound_k: usize,
78}
79
80impl ConsecutiveSets {
81 /// Create a new Consecutive Sets problem.
82 ///
83 /// # Panics
84 ///
85 /// Panics if `bound_k` is zero, if any subset contains duplicate elements,
86 /// or if any element is outside the alphabet.
87 pub fn new(alphabet_size: usize, subsets: Vec<Vec<usize>>, bound_k: usize) -> Self {
88 assert!(bound_k > 0, "bound_k must be positive, got 0");
89 let mut subsets = subsets;
90 for (i, subset) in subsets.iter_mut().enumerate() {
91 let mut seen = HashSet::with_capacity(subset.len());
92 for &elem in subset.iter() {
93 assert!(
94 elem < alphabet_size,
95 "Subset {} contains element {} which is outside alphabet of size {}",
96 i,
97 elem,
98 alphabet_size
99 );
100 assert!(
101 seen.insert(elem),
102 "Subset {} contains duplicate elements",
103 i
104 );
105 }
106 subset.sort();
107 }
108 Self {
109 alphabet_size,
110 subsets,
111 bound_k,
112 }
113 }
114
115 /// Get the alphabet size.
116 pub fn alphabet_size(&self) -> usize {
117 self.alphabet_size
118 }
119
120 /// Get the number of subsets in the collection.
121 pub fn num_subsets(&self) -> usize {
122 self.subsets.len()
123 }
124
125 /// Get the bound K.
126 pub fn bound_k(&self) -> usize {
127 self.bound_k
128 }
129
130 /// Get the subsets.
131 pub fn subsets(&self) -> &[Vec<usize>] {
132 &self.subsets
133 }
134}
135
136impl Problem for ConsecutiveSets {
137 const NAME: &'static str = "ConsecutiveSets";
138 type Value = crate::types::Or;
139
140 fn dims(&self) -> Vec<usize> {
141 // Each position can be any symbol (0..alphabet_size-1) or "unused" (alphabet_size)
142 vec![self.alphabet_size + 1; self.bound_k]
143 }
144
145 fn evaluate(&self, config: &[usize]) -> crate::types::Or {
146 crate::types::Or({
147 // 1. Validate config
148 if config.len() != self.bound_k || config.iter().any(|&v| v > self.alphabet_size) {
149 return crate::types::Or(false);
150 }
151
152 // 2. Build string: find the actual string length (strip trailing "unused")
153 let unused = self.alphabet_size;
154 let str_len = config
155 .iter()
156 .rposition(|&v| v != unused)
157 .map_or(0, |p| p + 1);
158
159 // 3. Check no internal "unused" symbols
160 let w = &config[..str_len];
161 if w.contains(&unused) {
162 return crate::types::Or(false);
163 }
164
165 let mut subset_membership = vec![0usize; self.alphabet_size];
166 let mut seen_in_window = vec![0usize; self.alphabet_size];
167 let mut subset_stamp = 1usize;
168 let mut window_stamp = 1usize;
169
170 // 4. Check each subset has a consecutive block
171 for subset in &self.subsets {
172 let subset_len = subset.len();
173 if subset_len == 0 {
174 continue; // empty subset trivially satisfied
175 }
176 if subset_len > str_len {
177 return crate::types::Or(false); // can't fit
178 }
179
180 for &elem in subset {
181 subset_membership[elem] = subset_stamp;
182 }
183
184 let mut found = false;
185 for start in 0..=(str_len - subset_len) {
186 let window = &w[start..start + subset_len];
187 let current_window_stamp = window_stamp;
188 window_stamp += 1;
189
190 // Because subsets are validated to contain unique elements,
191 // a window matches iff every symbol belongs to the subset and
192 // appears at most once.
193 if window.iter().all(|&elem| {
194 let is_member = subset_membership[elem] == subset_stamp;
195 let is_new = seen_in_window[elem] != current_window_stamp;
196 if is_member && is_new {
197 seen_in_window[elem] = current_window_stamp;
198 true
199 } else {
200 false
201 }
202 }) {
203 // subset is already sorted
204 found = true;
205 break;
206 }
207 }
208 if !found {
209 return crate::types::Or(false);
210 }
211
212 subset_stamp += 1;
213 }
214
215 true
216 })
217 }
218
219 fn variant() -> Vec<(&'static str, &'static str)> {
220 crate::variant_params![]
221 }
222}
223
224crate::declare_variants! {
225 default ConsecutiveSets => "alphabet_size^bound_k * num_subsets",
226}
227
228#[cfg(feature = "example-db")]
229pub(crate) fn canonical_model_example_specs() -> Vec<crate::example_db::specs::ModelExampleSpec> {
230 vec![crate::example_db::specs::ModelExampleSpec {
231 id: "consecutive_sets",
232 // YES instance from issue: w = [0, 4, 2, 5, 1, 3]
233 instance: Box::new(ConsecutiveSets::new(
234 6,
235 vec![vec![0, 4], vec![2, 4], vec![2, 5], vec![1, 5], vec![1, 3]],
236 6,
237 )),
238 optimal_config: vec![0, 4, 2, 5, 1, 3],
239 optimal_value: serde_json::json!(true),
240 }]
241}
242
243#[cfg(test)]
244#[path = "../../unit_tests/models/set/consecutive_sets.rs"]
245mod tests;