Skip to main content

problemreductions/rules/
minimumexternalmacrodatacompression_ilp.rs

1//! Reduction from MinimumExternalMacroDataCompression to ILP (Integer Linear Programming).
2//!
3//! The EMDC problem is formulated as a binary ILP using a flow-on-DAG partition
4//! model for the compressed string, combined with dictionary assignment variables.
5//!
6//! **Variables (all binary):**
7//! - `d[j][c]`: D-slot j contains symbol c (j=0..n-1, c=0..k-1)
8//! - `d_used[j]`: D-slot j is used (j=0..n-1)
9//! - `lit[i]`: position i is covered by a literal in C (i=0..n-1)
10//! - `ptr[i][l][d_start]`: segment [i, i+l) is a pointer referencing D[d_start..d_start+l]
11//! - Flow conservation ensures positions 0..n are partitioned into segments.
12//!
13//! **Objective:** minimize |D| + |C_literals| + h * |C_pointers|
14//! = sum d_used[j] + sum lit[i] + h * sum ptr[i][l][d_start]
15
16use crate::models::algebraic::{LinearConstraint, ObjectiveSense, ILP};
17use crate::models::misc::MinimumExternalMacroDataCompression;
18use crate::reduction;
19use crate::rules::traits::{ReduceTo, ReductionResult};
20
21/// Index layout for ILP variables.
22#[derive(Debug, Clone)]
23struct VarLayout {
24    n: usize,
25    k: usize,
26    /// Offset of d[j][c] block: index = d_offset + j * k + c
27    d_offset: usize,
28    /// Offset of d_used[j] block: index = d_used_offset + j
29    d_used_offset: usize,
30    /// Offset of lit[i] block: index = lit_offset + i
31    lit_offset: usize,
32    /// Offset of ptr variables, stored as a flat list.
33    /// Each entry in `ptr_triples` is (i, l, d_start) and its var index = ptr_offset + idx.
34    ptr_offset: usize,
35    /// The (i, l, d_start) triples in order.
36    ptr_triples: Vec<(usize, usize, usize)>,
37    /// Total number of variables.
38    total_vars: usize,
39}
40
41impl VarLayout {
42    fn new(n: usize, k: usize) -> Self {
43        let d_offset = 0;
44        let d_used_offset = d_offset + n * k;
45        let lit_offset = d_used_offset + n;
46        let ptr_offset = lit_offset + n;
47
48        // Enumerate all valid (i, l, d_start) triples
49        let mut ptr_triples = Vec::new();
50        for i in 0..n {
51            for l in 1..=(n - i) {
52                for d_start in 0..=(n - l) {
53                    ptr_triples.push((i, l, d_start));
54                }
55            }
56        }
57
58        let total_vars = ptr_offset + ptr_triples.len();
59        Self {
60            n,
61            k,
62            d_offset,
63            d_used_offset,
64            lit_offset,
65            ptr_offset,
66            ptr_triples,
67            total_vars,
68        }
69    }
70
71    fn d_var(&self, j: usize, c: usize) -> usize {
72        self.d_offset + j * self.k + c
73    }
74
75    fn d_used_var(&self, j: usize) -> usize {
76        self.d_used_offset + j
77    }
78
79    fn lit_var(&self, i: usize) -> usize {
80        self.lit_offset + i
81    }
82
83    fn ptr_var(&self, i: usize, l: usize, d_start: usize) -> usize {
84        // Find the index of (i, l, d_start) in ptr_triples
85        let idx = self
86            .ptr_triples
87            .iter()
88            .position(|&(pi, pl, pd)| pi == i && pl == l && pd == d_start)
89            .expect("invalid ptr triple");
90        self.ptr_offset + idx
91    }
92
93    /// Get all ptr variable indices for segments starting at position i with length l.
94    fn ptr_vars_for_segment(&self, i: usize, l: usize) -> Vec<usize> {
95        self.ptr_triples
96            .iter()
97            .enumerate()
98            .filter(|(_, &(pi, pl, _))| pi == i && pl == l)
99            .map(|(idx, _)| self.ptr_offset + idx)
100            .collect()
101    }
102}
103
104/// Result of reducing MinimumExternalMacroDataCompression to ILP.
105#[derive(Debug, Clone)]
106pub struct ReductionEMDCToILP {
107    target: ILP<bool>,
108    /// Variable layout for solution extraction.
109    layout: VarLayout,
110    /// The source string (needed for extract_solution).
111    source_string: Vec<usize>,
112    /// Alphabet size.
113    alphabet_size: usize,
114}
115
116impl ReductionResult for ReductionEMDCToILP {
117    type Source = MinimumExternalMacroDataCompression;
118    type Target = ILP<bool>;
119
120    fn target_problem(&self) -> &ILP<bool> {
121        &self.target
122    }
123
124    fn extract_solution(&self, target_solution: &[usize]) -> Vec<usize> {
125        let n = self.layout.n;
126        let k = self.alphabet_size;
127        let empty = k; // empty marker
128
129        // Build D-slots
130        let mut d_slots = vec![empty; n];
131        for j in 0..n {
132            if target_solution[self.layout.d_used_var(j)] == 1 {
133                for c in 0..k {
134                    if target_solution[self.layout.d_var(j, c)] == 1 {
135                        d_slots[j] = c;
136                        break;
137                    }
138                }
139            }
140        }
141
142        // Walk through active segments to build C-slots
143        let mut c_slots = vec![empty; n];
144        let mut c_pos = 0;
145        let mut pos = 0;
146        while pos < n {
147            // Check if lit[pos] = 1
148            if target_solution[self.layout.lit_var(pos)] == 1 {
149                // Literal at position pos
150                c_slots[c_pos] = self.source_string[pos];
151                c_pos += 1;
152                pos += 1;
153                continue;
154            }
155            // Check for an active pointer starting at pos
156            let mut found = false;
157            for l in 1..=(n - pos) {
158                for d_start in 0..=(n - l) {
159                    let var_idx = self.layout.ptr_var(pos, l, d_start);
160                    if target_solution[var_idx] == 1 {
161                        // Encode pointer (d_start, l) as EMDC pointer index
162                        let ptr_idx = encode_pointer(n, d_start, l);
163                        c_slots[c_pos] = k + 1 + ptr_idx;
164                        c_pos += 1;
165                        pos += l;
166                        found = true;
167                        break;
168                    }
169                }
170                if found {
171                    break;
172                }
173            }
174            if !found {
175                // Should not happen with a valid ILP solution
176                pos += 1;
177            }
178        }
179
180        // Combine D-slots and C-slots
181        let mut config = d_slots;
182        config.extend(c_slots);
183        config
184    }
185}
186
187/// Encode a pointer (start, len) into the EMDC pointer index.
188/// Pointers enumerate: (0,1),(0,2),...,(0,n), (1,1),(1,2),...,(1,n-1), ...
189fn encode_pointer(n: usize, start: usize, len: usize) -> usize {
190    let mut idx = 0;
191    for s in 0..start {
192        idx += n - s;
193    }
194    idx + len - 1
195}
196
197#[reduction(
198    overhead = {
199        num_vars = "string_length * alphabet_size + 2 * string_length + string_length ^ 3",
200        num_constraints = "string_length + string_length * alphabet_size + string_length + string_length + 1 + string_length ^ 3 * string_length",
201    }
202)]
203impl ReduceTo<ILP<bool>> for MinimumExternalMacroDataCompression {
204    type Result = ReductionEMDCToILP;
205
206    fn reduce_to(&self) -> Self::Result {
207        let n = self.string_length();
208        let k = self.alphabet_size();
209        let h = self.pointer_cost();
210        let s = self.string();
211
212        // Handle empty string
213        if n == 0 {
214            let layout = VarLayout::new(0, k);
215            let target = ILP::new(0, vec![], vec![], ObjectiveSense::Minimize);
216            return ReductionEMDCToILP {
217                target,
218                layout,
219                source_string: vec![],
220                alphabet_size: k,
221            };
222        }
223
224        let layout = VarLayout::new(n, k);
225        let num_vars = layout.total_vars;
226        let mut constraints = Vec::new();
227
228        // 1. Dictionary one-hot: for each j, sum_c d[j][c] <= 1
229        for j in 0..n {
230            let terms: Vec<(usize, f64)> = (0..k).map(|c| (layout.d_var(j, c), 1.0)).collect();
231            constraints.push(LinearConstraint::le(terms, 1.0));
232        }
233
234        // 2. Dictionary linking: d[j][c] <= d_used[j] for all j, c
235        for j in 0..n {
236            for c in 0..k {
237                constraints.push(LinearConstraint::le(
238                    vec![(layout.d_var(j, c), 1.0), (layout.d_used_var(j), -1.0)],
239                    0.0,
240                ));
241            }
242        }
243
244        // 3. Dictionary contiguous: d_used[j+1] <= d_used[j] for j=0..n-2
245        for j in 0..n.saturating_sub(1) {
246            constraints.push(LinearConstraint::le(
247                vec![
248                    (layout.d_used_var(j + 1), 1.0),
249                    (layout.d_used_var(j), -1.0),
250                ],
251                0.0,
252            ));
253        }
254
255        // 4. Flow conservation on DAG: positions 0..n are nodes.
256        // A segment (i, l) contributes to outgoing flow at node i and incoming flow at node i+l.
257        // For segment (i, l):
258        //   - if l == 1: flow = lit[i] + sum_{d_start} ptr[i][1][d_start]
259        //   - if l >= 2: flow = sum_{d_start} ptr[i][l][d_start]
260        //
261        // Flow constraints:
262        // At node 0: sum of outgoing segments = 1
263        // At node i (1..n-1): sum of incoming = sum of outgoing
264        // At node n: sum of incoming = 1
265
266        // Helper: get all terms for "segment flow" at (i, l)
267        // Returns the variable indices with coefficient 1.0
268        let segment_terms = |i: usize, l: usize| -> Vec<(usize, f64)> {
269            let mut terms = Vec::new();
270            if l == 1 {
271                terms.push((layout.lit_var(i), 1.0));
272            }
273            for &var in &layout.ptr_vars_for_segment(i, l) {
274                terms.push((var, 1.0));
275            }
276            terms
277        };
278
279        // For each node, compute outgoing and incoming segment terms
280        for node in 0..=n {
281            let mut all_terms: Vec<(usize, f64)> = Vec::new();
282
283            if node == 0 {
284                // sum of outgoing(0, l) = 1
285                for l in 1..=n {
286                    all_terms.extend(segment_terms(0, l));
287                }
288                constraints.push(LinearConstraint::eq(all_terms, 1.0));
289            } else if node == n {
290                // sum of incoming(n) = 1
291                // incoming at node n: segments (j, l) where j + l = n
292                for j in 0..n {
293                    let l = n - j;
294                    all_terms.extend(segment_terms(j, l));
295                }
296                constraints.push(LinearConstraint::eq(all_terms, 1.0));
297            } else {
298                // node 1..n-1: incoming = outgoing
299                // incoming: segments (j, l) where j + l = node
300                let mut incoming = Vec::new();
301                for j in 0..node {
302                    let l = node - j;
303                    incoming.extend(segment_terms(j, l));
304                }
305                // outgoing: segments (node, l) for valid l
306                let mut outgoing = Vec::new();
307                for l in 1..=(n - node) {
308                    outgoing.extend(segment_terms(node, l));
309                }
310                // incoming - outgoing = 0
311                for (var, coef) in incoming {
312                    all_terms.push((var, coef));
313                }
314                for (var, coef) in outgoing {
315                    all_terms.push((var, -coef));
316                }
317                constraints.push(LinearConstraint::eq(all_terms, 0.0));
318            }
319        }
320
321        // 5. Pointer matching: ptr[i][l][d_start] <= d[d_start+offset][s[i+offset]]
322        // for all offset=0..l-1
323        for (idx, &(i, l, d_start)) in layout.ptr_triples.iter().enumerate() {
324            let ptr_idx = layout.ptr_offset + idx;
325            for offset in 0..l {
326                let symbol = s[i + offset];
327                // ptr[i][l][d_start] <= d[d_start + offset][symbol]
328                constraints.push(LinearConstraint::le(
329                    vec![
330                        (ptr_idx, 1.0),
331                        (layout.d_var(d_start + offset, symbol), -1.0),
332                    ],
333                    0.0,
334                ));
335            }
336        }
337
338        // 6. Literal matching: lit[i] can only be active if position i exists
339        // (this is always true for i < n, so no constraint needed).
340        // But we do need: if lit[i] = 1, the literal is s[i], which is automatic
341        // in the extract_solution. No additional constraint needed because the
342        // objective already penalizes literals.
343
344        // Objective: minimize sum d_used[j] + sum lit[i] + h * sum ptr[i][l][d_start]
345        let mut objective: Vec<(usize, f64)> = Vec::new();
346        for j in 0..n {
347            objective.push((layout.d_used_var(j), 1.0));
348        }
349        for i in 0..n {
350            objective.push((layout.lit_var(i), 1.0));
351        }
352        for (idx, _) in layout.ptr_triples.iter().enumerate() {
353            objective.push((layout.ptr_offset + idx, h as f64));
354        }
355
356        let target = ILP::new(num_vars, constraints, objective, ObjectiveSense::Minimize);
357
358        ReductionEMDCToILP {
359            target,
360            layout,
361            source_string: s.to_vec(),
362            alphabet_size: k,
363        }
364    }
365}
366
367#[cfg(feature = "example-db")]
368pub(crate) fn canonical_rule_example_specs() -> Vec<crate::example_db::specs::RuleExampleSpec> {
369    use crate::export::SolutionPair;
370
371    // s = "ab" (len 2), alphabet {a,b} (size 2), h=2
372    // Optimal: uncompressed, D="" C="ab", cost = 0+2+0 = 2
373    // Config: D-slots=[2,2], C-slots=[0,1]
374    // ILP target_config: all d and d_used = 0, lit[0]=1, lit[1]=1, all ptr = 0
375    vec![crate::example_db::specs::RuleExampleSpec {
376        id: "minimumexternalmacrodatacompression_to_ilp",
377        build: || {
378            let source = MinimumExternalMacroDataCompression::new(2, vec![0, 1], 2);
379            let reduction = ReduceTo::<ILP<bool>>::reduce_to(&source);
380            let layout = &reduction.layout;
381            let n = 2;
382            let k = 2;
383
384            // Build target config: all zeros, then set lit[0]=1, lit[1]=1
385            let mut target_config = vec![0usize; layout.total_vars];
386            target_config[layout.lit_var(0)] = 1;
387            target_config[layout.lit_var(1)] = 1;
388
389            // Verify this is correct
390            let source_config = reduction.extract_solution(&target_config);
391            debug_assert_eq!(source_config[..n], [k, k]); // D empty
392            debug_assert_eq!(source_config[n..], [0, 1]); // C = "ab"
393
394            crate::example_db::specs::rule_example_with_witness::<_, ILP<bool>>(
395                source,
396                SolutionPair {
397                    source_config,
398                    target_config,
399                },
400            )
401        },
402    }]
403}
404
405#[cfg(test)]
406#[path = "../unit_tests/rules/minimumexternalmacrodatacompression_ilp.rs"]
407mod tests;