Skip to main content

problemreductions/models/set/
consecutive_sets.rs

1//! Consecutive Sets problem implementation.
2//!
3//! Given an alphabet of size n, a collection of subsets of the alphabet, and a
4//! bound K, determine if there exists a string of length at most K over the
5//! alphabet such that the elements of each subset appear consecutively (as a
6//! contiguous block in some order) within the string.
7
8use crate::registry::{FieldInfo, ProblemSchemaEntry};
9use crate::traits::Problem;
10use serde::{Deserialize, Serialize};
11use std::collections::HashSet;
12
13inventory::submit! {
14    ProblemSchemaEntry {
15        name: "ConsecutiveSets",
16        display_name: "Consecutive Sets",
17        aliases: &[],
18        dimensions: &[],
19        module_path: module_path!(),
20        description: "Determine if a string exists where each subset's elements appear consecutively",
21        fields: &[
22            FieldInfo { name: "alphabet_size", type_name: "usize", description: "Size of the alphabet (elements are 0..alphabet_size-1)" },
23            FieldInfo { name: "subsets", type_name: "Vec<Vec<usize>>", description: "Collection of subsets of the alphabet" },
24            FieldInfo { name: "bound_k", type_name: "usize", description: "Maximum string length K" },
25        ],
26    }
27}
28
29/// Consecutive Sets problem.
30///
31/// Given an alphabet {0, 1, ..., n-1}, a collection of subsets, and a bound K,
32/// determine if there exists a string w of length at most K over the alphabet
33/// such that the elements of each subset appear as a contiguous block (in any
34/// order) within w.
35///
36/// Configurations use `bound_k` positions. Values `0..alphabet_size-1`
37/// represent alphabet symbols, and the extra value `alphabet_size` marks
38/// unused positions beyond the end of a shorter string. Only trailing unused
39/// positions are valid.
40///
41/// This problem is NP-complete and arises in physical mapping of DNA and in
42/// consecutive arrangements of hypergraph vertices.
43///
44/// # Example
45///
46/// ```
47/// use problemreductions::models::set::ConsecutiveSets;
48/// use problemreductions::{Problem, Solver, BruteForce};
49///
50/// // Alphabet: {0, 1, 2, 3, 4, 5}, subsets that must appear consecutively
51/// let problem = ConsecutiveSets::new(
52///     6,
53///     vec![vec![0, 4], vec![2, 4], vec![2, 5], vec![1, 5], vec![1, 3]],
54///     6,
55/// );
56///
57/// let solver = BruteForce::new();
58/// let solution = solver.find_witness(&problem);
59///
60/// // w = [0, 4, 2, 5, 1, 3] is a valid solution
61/// assert!(solution.is_some());
62/// assert!(problem.evaluate(&solution.unwrap()));
63///
64/// // Shorter strings are encoded with trailing `unused = alphabet_size`.
65/// let shorter = ConsecutiveSets::new(3, vec![vec![0, 1]], 4);
66/// let unused = shorter.alphabet_size();
67/// assert!(shorter.evaluate(&[0, 1, unused, unused]));
68/// assert!(!shorter.evaluate(&[0, unused, 1, unused]));
69/// ```
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct ConsecutiveSets {
72    /// Size of the alphabet (elements are 0..alphabet_size-1).
73    alphabet_size: usize,
74    /// Collection of subsets of the alphabet, each sorted in canonical form.
75    subsets: Vec<Vec<usize>>,
76    /// Maximum string length K.
77    bound_k: usize,
78}
79
80impl ConsecutiveSets {
81    /// Create a new Consecutive Sets problem.
82    ///
83    /// # Panics
84    ///
85    /// Panics if `bound_k` is zero, if any subset contains duplicate elements,
86    /// or if any element is outside the alphabet.
87    pub fn new(alphabet_size: usize, subsets: Vec<Vec<usize>>, bound_k: usize) -> Self {
88        assert!(bound_k > 0, "bound_k must be positive, got 0");
89        let mut subsets = subsets;
90        for (i, subset) in subsets.iter_mut().enumerate() {
91            let mut seen = HashSet::with_capacity(subset.len());
92            for &elem in subset.iter() {
93                assert!(
94                    elem < alphabet_size,
95                    "Subset {} contains element {} which is outside alphabet of size {}",
96                    i,
97                    elem,
98                    alphabet_size
99                );
100                assert!(
101                    seen.insert(elem),
102                    "Subset {} contains duplicate elements",
103                    i
104                );
105            }
106            subset.sort();
107        }
108        Self {
109            alphabet_size,
110            subsets,
111            bound_k,
112        }
113    }
114
115    /// Get the alphabet size.
116    pub fn alphabet_size(&self) -> usize {
117        self.alphabet_size
118    }
119
120    /// Get the number of subsets in the collection.
121    pub fn num_subsets(&self) -> usize {
122        self.subsets.len()
123    }
124
125    /// Get the bound K.
126    pub fn bound_k(&self) -> usize {
127        self.bound_k
128    }
129
130    /// Get the subsets.
131    pub fn subsets(&self) -> &[Vec<usize>] {
132        &self.subsets
133    }
134}
135
136impl Problem for ConsecutiveSets {
137    const NAME: &'static str = "ConsecutiveSets";
138    type Value = crate::types::Or;
139
140    fn dims(&self) -> Vec<usize> {
141        // Each position can be any symbol (0..alphabet_size-1) or "unused" (alphabet_size)
142        vec![self.alphabet_size + 1; self.bound_k]
143    }
144
145    fn evaluate(&self, config: &[usize]) -> crate::types::Or {
146        crate::types::Or({
147            // 1. Validate config
148            if config.len() != self.bound_k || config.iter().any(|&v| v > self.alphabet_size) {
149                return crate::types::Or(false);
150            }
151
152            // 2. Build string: find the actual string length (strip trailing "unused")
153            let unused = self.alphabet_size;
154            let str_len = config
155                .iter()
156                .rposition(|&v| v != unused)
157                .map_or(0, |p| p + 1);
158
159            // 3. Check no internal "unused" symbols
160            let w = &config[..str_len];
161            if w.contains(&unused) {
162                return crate::types::Or(false);
163            }
164
165            let mut subset_membership = vec![0usize; self.alphabet_size];
166            let mut seen_in_window = vec![0usize; self.alphabet_size];
167            let mut subset_stamp = 1usize;
168            let mut window_stamp = 1usize;
169
170            // 4. Check each subset has a consecutive block
171            for subset in &self.subsets {
172                let subset_len = subset.len();
173                if subset_len == 0 {
174                    continue; // empty subset trivially satisfied
175                }
176                if subset_len > str_len {
177                    return crate::types::Or(false); // can't fit
178                }
179
180                for &elem in subset {
181                    subset_membership[elem] = subset_stamp;
182                }
183
184                let mut found = false;
185                for start in 0..=(str_len - subset_len) {
186                    let window = &w[start..start + subset_len];
187                    let current_window_stamp = window_stamp;
188                    window_stamp += 1;
189
190                    // Because subsets are validated to contain unique elements,
191                    // a window matches iff every symbol belongs to the subset and
192                    // appears at most once.
193                    if window.iter().all(|&elem| {
194                        let is_member = subset_membership[elem] == subset_stamp;
195                        let is_new = seen_in_window[elem] != current_window_stamp;
196                        if is_member && is_new {
197                            seen_in_window[elem] = current_window_stamp;
198                            true
199                        } else {
200                            false
201                        }
202                    }) {
203                        // subset is already sorted
204                        found = true;
205                        break;
206                    }
207                }
208                if !found {
209                    return crate::types::Or(false);
210                }
211
212                subset_stamp += 1;
213            }
214
215            true
216        })
217    }
218
219    fn variant() -> Vec<(&'static str, &'static str)> {
220        crate::variant_params![]
221    }
222}
223
224crate::declare_variants! {
225    default ConsecutiveSets => "alphabet_size^bound_k * num_subsets",
226}
227
228#[cfg(feature = "example-db")]
229pub(crate) fn canonical_model_example_specs() -> Vec<crate::example_db::specs::ModelExampleSpec> {
230    vec![crate::example_db::specs::ModelExampleSpec {
231        id: "consecutive_sets",
232        // YES instance from issue: w = [0, 4, 2, 5, 1, 3]
233        instance: Box::new(ConsecutiveSets::new(
234            6,
235            vec![vec![0, 4], vec![2, 4], vec![2, 5], vec![1, 5], vec![1, 3]],
236            6,
237        )),
238        optimal_config: vec![0, 4, 2, 5, 1, 3],
239        optimal_value: serde_json::json!(true),
240    }]
241}
242
243#[cfg(test)]
244#[path = "../../unit_tests/models/set/consecutive_sets.rs"]
245mod tests;