Skip to main content

problemreductions/models/misc/
closest_string.rs

1//! Closest String problem implementation.
2//!
3//! Given a finite alphabet `{0, ..., alphabet_size - 1}` and a list of input
4//! strings of equal length `m`, find a center string `c` of length `m` over the
5//! same alphabet that minimizes the maximum Hamming distance from `c` to any
6//! input string.
7
8use crate::registry::{FieldInfo, ProblemSchemaEntry, ProblemSizeFieldEntry};
9use crate::traits::Problem;
10use crate::types::Min;
11use serde::{Deserialize, Serialize};
12
13inventory::submit! {
14    ProblemSchemaEntry {
15        name: "ClosestString",
16        display_name: "Closest String",
17        aliases: &[],
18        dimensions: &[],
19        module_path: module_path!(),
20        description: "Find a center string of fixed length that minimizes the maximum Hamming distance to a list of equal-length input strings",
21        fields: &[
22            FieldInfo {
23                name: "alphabet_size",
24                type_name: "usize",
25                description: "Size q of the finite alphabet {0, ..., q-1}",
26            },
27            FieldInfo {
28                name: "strings",
29                type_name: "Vec<Vec<usize>>",
30                description: "Input strings s_1, ..., s_n over the alphabet, all of equal length m",
31            },
32        ],
33    }
34}
35
36inventory::submit! {
37    ProblemSizeFieldEntry {
38        name: "ClosestString",
39        fields: &["alphabet_size", "num_strings", "string_length", "total_length"],
40    }
41}
42
43/// The Closest String problem.
44///
45/// Given a finite alphabet `Sigma = {0, ..., q - 1}` and `n` input strings
46/// `s_1, ..., s_n` in `Sigma^m` (all of common length `m`), find a center
47/// string `c` in `Sigma^m` minimizing
48///
49/// `max_{1 <= i <= n} d_H(c, s_i)`,
50///
51/// where `d_H` is the Hamming distance. Every center in the discrete cube is
52/// syntactically feasible; the objective is its worst-case Hamming distance
53/// to the input strings.
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct ClosestString {
56    alphabet_size: usize,
57    strings: Vec<Vec<usize>>,
58}
59
60impl ClosestString {
61    /// Create a new `ClosestString` instance.
62    ///
63    /// # Panics
64    ///
65    /// Panics if:
66    /// - `strings` is empty (the problem requires at least one input string),
67    /// - input strings do not all have the same length,
68    /// - `alphabet_size == 0` while any input string is non-empty,
69    /// - any symbol in any input string is `>= alphabet_size`.
70    pub fn new(alphabet_size: usize, strings: Vec<Vec<usize>>) -> Self {
71        assert!(
72            !strings.is_empty(),
73            "ClosestString requires at least one input string"
74        );
75        let string_length = strings[0].len();
76        assert!(
77            strings.iter().all(|s| s.len() == string_length),
78            "all input strings must have the same length"
79        );
80        assert!(
81            alphabet_size > 0 || string_length == 0,
82            "alphabet_size must be > 0 when input strings are non-empty"
83        );
84        assert!(
85            strings
86                .iter()
87                .flat_map(|s| s.iter())
88                .all(|&symbol| symbol < alphabet_size),
89            "input symbols must be less than alphabet_size"
90        );
91        Self {
92            alphabet_size,
93            strings,
94        }
95    }
96
97    /// Returns the alphabet size `q`.
98    pub fn alphabet_size(&self) -> usize {
99        self.alphabet_size
100    }
101
102    /// Returns the input strings.
103    pub fn strings(&self) -> &[Vec<usize>] {
104        &self.strings
105    }
106
107    /// Returns the number of input strings `n`.
108    pub fn num_strings(&self) -> usize {
109        self.strings.len()
110    }
111
112    /// Returns the common string length `m`.
113    pub fn string_length(&self) -> usize {
114        self.strings[0].len()
115    }
116
117    /// Returns the total input length `num_strings * string_length`.
118    pub fn total_length(&self) -> usize {
119        self.num_strings() * self.string_length()
120    }
121}
122
123impl Problem for ClosestString {
124    const NAME: &'static str = "ClosestString";
125    type Value = Min<i64>;
126
127    fn variant() -> Vec<(&'static str, &'static str)> {
128        crate::variant_params![]
129    }
130
131    fn dims(&self) -> Vec<usize> {
132        vec![self.alphabet_size; self.string_length()]
133    }
134
135    fn evaluate(&self, config: &[usize]) -> Min<i64> {
136        let m = self.string_length();
137        if config.len() != m {
138            return Min(None);
139        }
140        if config.iter().any(|&symbol| symbol >= self.alphabet_size) {
141            return Min(None);
142        }
143        // Maximum Hamming distance from the center to any input string.
144        let max_distance = self
145            .strings
146            .iter()
147            .map(|s| config.iter().zip(s.iter()).filter(|(c, t)| c != t).count() as i64)
148            .max()
149            .unwrap_or(0);
150        Min(Some(max_distance))
151    }
152}
153
154crate::declare_variants! {
155    default ClosestString => "alphabet_size ^ string_length",
156}
157
158#[cfg(feature = "example-db")]
159pub(crate) fn canonical_model_example_specs() -> Vec<crate::example_db::specs::ModelExampleSpec> {
160    vec![crate::example_db::specs::ModelExampleSpec {
161        id: "closest_string",
162        instance: Box::new(ClosestString::new(
163            2,
164            vec![vec![0, 0, 0], vec![0, 1, 1], vec![1, 0, 1], vec![1, 1, 0]],
165        )),
166        optimal_config: vec![0, 0, 0],
167        optimal_value: serde_json::json!(2),
168    }]
169}
170
171#[cfg(test)]
172#[path = "../../unit_tests/models/misc/closest_string.rs"]
173mod tests;