problemreductions/rules/closestsubstring_ilp.rs
1//! Reduction from ClosestSubstring to ILP (Integer Linear Programming).
2//!
3//! Given an alphabet of size `q`, `n` input strings `s_1, ..., s_n` (not
4//! necessarily of equal length), and a window length `ell`, the goal is to
5//! pick a center `c in Sigma^ell` and one length-`ell` window from each input
6//! string that together minimize the worst-case Hamming distance between the
7//! center and any chosen window. The ILP encoding combines the
8//! center-selection variables of ClosestString with one-hot window-choice
9//! indicators, plus a radius variable that is active only on each selected
10//! window.
11//!
12//! - Integer `x_{r, a}` for `r in {0, ..., ell - 1}` and
13//! `a in {0, ..., q - 1}`: `x_{r, a} = 1` iff the center has symbol `a` at
14//! position `r`. The non-negativity of ILP variables together with the
15//! assignment constraint forces every `x_{r, a} in {0, 1}`.
16//! - Integer `y_{i, p}` for input string `s_i` and window start
17//! `p in {0, ..., W_i - 1}` where `W_i = |s_i| - ell + 1`: `y_{i, p} = 1` iff
18//! window `p` is selected from string `s_i`.
19//! - Nonnegative integer radius variable `R`.
20//! - Assignment constraint: `sum_a x_{r, a} = 1` for every position `r`.
21//! - Window-choice constraint: `sum_p y_{i, p} = 1` for every input string.
22//! - Conditional radius constraint per `(i, p)`:
23//! `R + sum_{r} x_{r, s_i[p + r]} - ell * y_{i, p} >= 0`.
24//! When `y_{i, p} = 1`, this becomes `R >= d_H(c, s_i[p..p + ell))`; when
25//! `y_{i, p} = 0`, the constraint is automatically satisfied.
26//! - Objective: minimize `R`.
27//!
28//! Reference: Ming Li, Bin Ma, and Lusheng Wang, "On the closest string and
29//! substring problems," Journal of the ACM 49(2):157-171, 2002.
30//! <https://doi.org/10.1145/506147.506150>
31
32use crate::models::algebraic::{LinearConstraint, ObjectiveSense, ILP};
33use crate::models::misc::ClosestSubstring;
34use crate::reduction;
35use crate::rules::traits::{ReduceTo, ReductionResult};
36
37/// Result of reducing ClosestSubstring to ILP.
38///
39/// Variable layout (`ILP<i32>`, all non-negative):
40/// - `x_{r, a}` at index `r * alphabet_size + a` for `r in [0, ell)` and
41/// `a in [0, q)`, forced into `{0, 1}` by the assignment constraints.
42/// - `y_{i, p}` at index `q * ell + window_offsets[i] + p` for input string
43/// `s_i` and window start `p in [0, W_i)`, forced into `{0, 1}` by the
44/// window-choice constraints.
45/// - `R` (radius) at index `q * ell + total_num_windows`, a non-negative
46/// integer in `[0, ell]`.
47#[derive(Debug, Clone)]
48pub struct ReductionClosestSubstringToILP {
49 target: ILP<i32>,
50 alphabet_size: usize,
51 substring_length: usize,
52 /// Prefix sums of per-string window counts: `window_offsets[i]` is the
53 /// number of `y_{j, p}` variables for `j < i`. Has length `num_strings`.
54 window_offsets: Vec<usize>,
55 /// `window_counts[i] = W_i = |s_i| - ell + 1`.
56 window_counts: Vec<usize>,
57}
58
59impl ReductionResult for ReductionClosestSubstringToILP {
60 type Source = ClosestSubstring;
61 type Target = ILP<i32>;
62
63 fn target_problem(&self) -> &ILP<i32> {
64 &self.target
65 }
66
67 /// Decode the integer ILP assignment into the source config layout.
68 ///
69 /// `ClosestSubstring::evaluate` expects `config` of length `ell + n`: the
70 /// first `ell` entries are the center symbols, the remaining `n` entries
71 /// are per-string window starts. For each center position `r`, we pick the
72 /// unique alphabet symbol `a` with `x_{r, a} = 1`; for each input string
73 /// `s_i`, we pick the unique window start `p` with `y_{i, p} = 1`. When no
74 /// indicator is set to 1 in some block (which only happens on partial /
75 /// infeasible ILP solutions), we fall back to 0 so the returned vector
76 /// still has the expected shape.
77 fn extract_solution(&self, target_solution: &[usize]) -> Vec<usize> {
78 let q = self.alphabet_size;
79 let ell = self.substring_length;
80 let y_base = q * ell;
81
82 let mut out = Vec::with_capacity(ell + self.window_counts.len());
83
84 // Center symbols.
85 for r in 0..ell {
86 let symbol = (0..q)
87 .find(|&a| target_solution.get(r * q + a).copied().unwrap_or(0) == 1)
88 .unwrap_or(0);
89 out.push(symbol);
90 }
91
92 // Window starts.
93 for (i, &w_i) in self.window_counts.iter().enumerate() {
94 let start = (0..w_i)
95 .find(|&p| {
96 target_solution
97 .get(y_base + self.window_offsets[i] + p)
98 .copied()
99 .unwrap_or(0)
100 == 1
101 })
102 .unwrap_or(0);
103 out.push(start);
104 }
105
106 out
107 }
108}
109
110#[reduction(
111 overhead = {
112 num_vars = "alphabet_size * substring_length + total_num_windows + 1",
113 num_constraints = "substring_length + num_strings + total_num_windows + 1",
114 }
115)]
116impl ReduceTo<ILP<i32>> for ClosestSubstring {
117 type Result = ReductionClosestSubstringToILP;
118
119 fn reduce_to(&self) -> Self::Result {
120 let q = self.alphabet_size();
121 let ell = self.substring_length();
122 let strings = self.strings();
123 let n = strings.len();
124
125 let window_counts: Vec<usize> = strings.iter().map(|s| s.len() - ell + 1).collect();
126 let mut window_offsets: Vec<usize> = Vec::with_capacity(n);
127 {
128 let mut acc = 0usize;
129 for &w in &window_counts {
130 window_offsets.push(acc);
131 acc += w;
132 }
133 }
134 let total_windows: usize = window_counts.iter().sum();
135
136 let x_idx = |r: usize, a: usize| -> usize { r * q + a };
137 let y_base = q * ell;
138 let y_idx = |i: usize, p: usize| -> usize { y_base + window_offsets[i] + p };
139 let r_idx = y_base + total_windows;
140 let num_vars = r_idx + 1;
141
142 let mut constraints: Vec<LinearConstraint> =
143 Vec::with_capacity(ell + n + total_windows + 1);
144
145 // Assignment constraints: exactly one symbol per center position.
146 // Together with the non-negativity built into ILP<i32>, this also
147 // forces every x_{r, a} to lie in {0, 1}.
148 for r in 0..ell {
149 let terms: Vec<(usize, f64)> = (0..q).map(|a| (x_idx(r, a), 1.0)).collect();
150 constraints.push(LinearConstraint::eq(terms, 1.0));
151 }
152
153 // Tight upper bound on R: the worst-case Hamming distance over a
154 // length-ell window is at most ell. Added as a single-term `<=`
155 // constraint so the solver's bound-tightening pass (which scans for
156 // exactly this pattern) picks it up. Without this, R defaults to the
157 // full i32 domain, which severely degrades HiGHS performance even on
158 // tiny instances.
159 constraints.push(LinearConstraint::le(vec![(r_idx, 1.0)], ell as f64));
160
161 // Window-choice constraints: exactly one window per input string.
162 // Combined with non-negativity, this forces every y_{i, p} in {0, 1}.
163 for (i, &w_i) in window_counts.iter().enumerate() {
164 let terms: Vec<(usize, f64)> = (0..w_i).map(|p| (y_idx(i, p), 1.0)).collect();
165 constraints.push(LinearConstraint::eq(terms, 1.0));
166 }
167
168 // Conditional radius constraints: for every (input string, window
169 // start) pair, R + sum_r x_{r, s_i[p + r]} - ell * y_{i, p} >= 0.
170 // - If y_{i, p} = 1: R >= ell - sum_r x_{r, s_i[p + r]} = d_H(c, window).
171 // - If y_{i, p} = 0: the LHS is R + (nonneg match count) >= 0,
172 // automatically satisfied because R >= 0.
173 for (i, s) in strings.iter().enumerate() {
174 for p in 0..window_counts[i] {
175 let mut terms: Vec<(usize, f64)> = Vec::with_capacity(ell + 2);
176 terms.push((r_idx, 1.0));
177 for r in 0..ell {
178 terms.push((x_idx(r, s[p + r]), 1.0));
179 }
180 terms.push((y_idx(i, p), -(ell as f64)));
181 constraints.push(LinearConstraint::ge(terms, 0.0));
182 }
183 }
184
185 // Objective: minimize R.
186 let objective = vec![(r_idx, 1.0)];
187
188 let target = ILP::new(num_vars, constraints, objective, ObjectiveSense::Minimize);
189
190 ReductionClosestSubstringToILP {
191 target,
192 alphabet_size: q,
193 substring_length: ell,
194 window_offsets,
195 window_counts,
196 }
197 }
198}
199
200#[cfg(feature = "example-db")]
201pub(crate) fn canonical_rule_example_specs() -> Vec<crate::example_db::specs::RuleExampleSpec> {
202 vec![crate::example_db::specs::RuleExampleSpec {
203 id: "closestsubstring_to_ilp",
204 build: || {
205 // Canonical issue #1033 instance: binary alphabet, length-3
206 // windows on three length-5 strings. Optimum radius is 1; one
207 // optimal center is 010 with windows (0, 1, 0) selecting 000,
208 // 010, 110 from s_1, s_2, s_3 respectively.
209 let source = ClosestSubstring::new(
210 2,
211 vec![
212 vec![0, 0, 0, 1, 1],
213 vec![1, 0, 1, 0, 0],
214 vec![1, 1, 0, 0, 1],
215 ],
216 3,
217 );
218 crate::example_db::specs::rule_example_via_ilp::<_, i32>(source)
219 },
220 }]
221}
222
223#[cfg(test)]
224#[path = "../unit_tests/rules/closestsubstring_ilp.rs"]
225mod tests;