Skip to main content

problemreductions/rules/
minimuminternalmacrodatacompression_ilp.rs

1//! Reduction from MinimumInternalMacroDataCompression to ILP (Integer Linear Programming).
2//!
3//! The IMDC problem is formulated as a binary ILP using a flow-on-DAG partition
4//! model for the compressed string C, where pointers reference earlier segments
5//! within C itself.
6//!
7//! **Variables (all binary):**
8//! - `lit[i]`: source position i is covered by a literal in C (i=0..n-1)
9//! - `ptr[i][l][r]`: segment [i, i+l) of the source is covered by a pointer in C
10//!   that copies from source position r (the first l characters of the decoded
11//!   output starting at source position r must equal s[i..i+l])
12//! - Flow conservation ensures positions 0..n are partitioned into segments.
13//!
14//! **Objective:** minimize |C| + (h−1) × pointer_count
15//! = (sum lit[i]) + (sum ptr[i][l][r]) + (h−1) × (sum ptr[i][l][r])
16//! = (sum lit[i]) + h × (sum ptr[i][l][r])
17
18use crate::models::algebraic::{LinearConstraint, ObjectiveSense, ILP};
19use crate::models::misc::MinimumInternalMacroDataCompression;
20use crate::reduction;
21use crate::rules::traits::{ReduceTo, ReductionResult};
22
23/// Index layout for ILP variables.
24#[derive(Debug, Clone)]
25struct VarLayout {
26    n: usize,
27    /// Offset of lit[i] block: index = lit_offset + i
28    lit_offset: usize,
29    /// Offset of ptr variables, stored as a flat list.
30    /// Each entry in `ptr_triples` is (i, l, r) and its var index = ptr_offset + idx.
31    ptr_offset: usize,
32    /// The (i, l, r) triples in order.
33    ptr_triples: Vec<(usize, usize, usize)>,
34    /// Total number of variables.
35    total_vars: usize,
36}
37
38impl VarLayout {
39    fn new(n: usize, source_string: &[usize]) -> Self {
40        let lit_offset = 0;
41        let ptr_offset = lit_offset + n;
42
43        // Enumerate all valid (i, l, r) triples where:
44        // - i is the start position in the source (0..n)
45        // - l is the segment length (1..n-i)
46        // - r is the reference position in the source (0..i), meaning
47        //   the pointer copies from source[r..r+l] which must equal source[i..i+l]
48        //   AND r < i (pointer references earlier decoded content)
49        let mut ptr_triples = Vec::new();
50        for i in 0..n {
51            for l in 1..=(n - i) {
52                for r in 0..i {
53                    // The pointer copies from decoded[r..r+l]. With non-overlapping
54                    // semantics, decoded has exactly i characters before this pointer,
55                    // so we need r + l <= i.
56                    if r + l <= i
57                        && r + l <= n
58                        && source_string[r..r + l] == source_string[i..i + l]
59                    {
60                        ptr_triples.push((i, l, r));
61                    }
62                }
63            }
64        }
65
66        let total_vars = ptr_offset + ptr_triples.len();
67        Self {
68            n,
69            lit_offset,
70            ptr_offset,
71            ptr_triples,
72            total_vars,
73        }
74    }
75
76    fn lit_var(&self, i: usize) -> usize {
77        self.lit_offset + i
78    }
79}
80
81/// Result of reducing MinimumInternalMacroDataCompression to ILP.
82#[derive(Debug, Clone)]
83pub struct ReductionIMDCToILP {
84    target: ILP<bool>,
85    layout: VarLayout,
86    source_string: Vec<usize>,
87    alphabet_size: usize,
88}
89
90impl ReductionResult for ReductionIMDCToILP {
91    type Source = MinimumInternalMacroDataCompression;
92    type Target = ILP<bool>;
93
94    fn target_problem(&self) -> &ILP<bool> {
95        &self.target
96    }
97
98    fn extract_solution(&self, target_solution: &[usize]) -> Vec<usize> {
99        let n = self.layout.n;
100        let k = self.alphabet_size;
101        let eos = k; // end-of-string marker
102
103        // First pass: collect segments and build source-to-compressed-position map.
104        // source_to_c_pos[i] = compressed position that covers source position i.
105        let mut source_to_c_pos = vec![0usize; n];
106        let mut segments: Vec<(usize, usize, Option<usize>)> = Vec::new(); // (source_start, len, ref_source_pos)
107        let mut c_pos = 0;
108        let mut pos = 0;
109
110        while pos < n {
111            if target_solution[self.layout.lit_var(pos)] == 1 {
112                source_to_c_pos[pos] = c_pos;
113                segments.push((pos, 1, None));
114                c_pos += 1;
115                pos += 1;
116                continue;
117            }
118            let mut found = false;
119            for (idx, &(i, l, r)) in self.layout.ptr_triples.iter().enumerate() {
120                if i == pos && target_solution[self.layout.ptr_offset + idx] == 1 {
121                    for offset in 0..l {
122                        source_to_c_pos[pos + offset] = c_pos;
123                    }
124                    segments.push((pos, l, Some(r)));
125                    c_pos += 1;
126                    pos += l;
127                    found = true;
128                    break;
129                }
130            }
131            if !found {
132                pos += 1;
133            }
134        }
135
136        // Second pass: build config using source_to_c_pos for pointer references
137        let mut config = vec![eos; n];
138        for (idx, &(src_start, _len, ref_pos)) in segments.iter().enumerate() {
139            match ref_pos {
140                None => {
141                    config[idx] = self.source_string[src_start];
142                }
143                Some(r) => {
144                    // Pointer references source position r, which is at
145                    // compressed position source_to_c_pos[r]
146                    config[idx] = k + 1 + source_to_c_pos[r];
147                }
148            }
149        }
150
151        config
152    }
153}
154
155#[reduction(
156    overhead = {
157        num_vars = "string_len + string_len ^ 3",
158        num_constraints = "string_len + 1",
159    }
160)]
161impl ReduceTo<ILP<bool>> for MinimumInternalMacroDataCompression {
162    type Result = ReductionIMDCToILP;
163
164    fn reduce_to(&self) -> Self::Result {
165        let n = self.string_len();
166        let k = self.alphabet_size();
167        let h = self.pointer_cost();
168        let s = self.string();
169
170        // Handle empty string
171        if n == 0 {
172            let layout = VarLayout::new(0, s);
173            let target = ILP::new(0, vec![], vec![], ObjectiveSense::Minimize);
174            return ReductionIMDCToILP {
175                target,
176                layout,
177                source_string: vec![],
178                alphabet_size: k,
179            };
180        }
181
182        let layout = VarLayout::new(n, s);
183        let num_vars = layout.total_vars;
184        let mut constraints = Vec::new();
185
186        // Flow conservation on DAG: positions 0..n are nodes.
187        // A segment covers source positions [i, i+l).
188        // Segments: lit[i] covers [i, i+1), ptr[i][l][r] covers [i, i+l).
189        //
190        // Flow constraints:
191        // At node 0: sum of outgoing segments = 1
192        // At node j (1..n-1): sum of incoming = sum of outgoing
193        // At node n: sum of incoming = 1
194
195        let segment_terms = |i: usize, l: usize| -> Vec<(usize, f64)> {
196            let mut terms = Vec::new();
197            if l == 1 {
198                terms.push((layout.lit_var(i), 1.0));
199            }
200            // All ptr variables for segment (i, l, *)
201            for (idx, &(pi, pl, _)) in layout.ptr_triples.iter().enumerate() {
202                if pi == i && pl == l {
203                    terms.push((layout.ptr_offset + idx, 1.0));
204                }
205            }
206            terms
207        };
208
209        for node in 0..=n {
210            let mut all_terms: Vec<(usize, f64)> = Vec::new();
211
212            if node == 0 {
213                for l in 1..=n {
214                    all_terms.extend(segment_terms(0, l));
215                }
216                constraints.push(LinearConstraint::eq(all_terms, 1.0));
217            } else if node == n {
218                for j in 0..n {
219                    let l = n - j;
220                    all_terms.extend(segment_terms(j, l));
221                }
222                constraints.push(LinearConstraint::eq(all_terms, 1.0));
223            } else {
224                let mut incoming = Vec::new();
225                for j in 0..node {
226                    let l = node - j;
227                    incoming.extend(segment_terms(j, l));
228                }
229                let mut outgoing = Vec::new();
230                for l in 1..=(n - node) {
231                    outgoing.extend(segment_terms(node, l));
232                }
233                for (var, coef) in incoming {
234                    all_terms.push((var, coef));
235                }
236                for (var, coef) in outgoing {
237                    all_terms.push((var, -coef));
238                }
239                constraints.push(LinearConstraint::eq(all_terms, 0.0));
240            }
241        }
242
243        // Pointer precedence: for ptr[i][l][r], we need r < i (already enforced
244        // by the triple enumeration). Additionally, the content at source[r..r+l]
245        // must equal source[i..i+l] (also enforced by triple enumeration).
246        // No additional constraints needed since we pre-filtered valid triples.
247
248        // Objective: minimize literals + h * pointers
249        // = sum lit[i] + h * sum ptr[i][l][r]
250        // Since each literal contributes 1 to |C| and each pointer contributes
251        // 1 to |C| plus (h-1) to the pointer penalty:
252        // cost = |C| + (h-1)*pointers = (lits + ptrs) + (h-1)*ptrs = lits + h*ptrs
253        let mut objective: Vec<(usize, f64)> = Vec::new();
254        for i in 0..n {
255            objective.push((layout.lit_var(i), 1.0));
256        }
257        for (idx, _) in layout.ptr_triples.iter().enumerate() {
258            objective.push((layout.ptr_offset + idx, h as f64));
259        }
260
261        let target = ILP::new(num_vars, constraints, objective, ObjectiveSense::Minimize);
262
263        ReductionIMDCToILP {
264            target,
265            layout,
266            source_string: s.to_vec(),
267            alphabet_size: k,
268        }
269    }
270}
271
272#[cfg(feature = "example-db")]
273pub(crate) fn canonical_rule_example_specs() -> Vec<crate::example_db::specs::RuleExampleSpec> {
274    use crate::export::SolutionPair;
275
276    // s = "ab" (len 2), alphabet {a,b} (size 2), h=2
277    // Optimal: uncompressed C="ab", cost = 2
278    // ILP: lit[0]=1, lit[1]=1, no pointers
279    vec![crate::example_db::specs::RuleExampleSpec {
280        id: "minimuminternalmacrodatacompression_to_ilp",
281        build: || {
282            let source = MinimumInternalMacroDataCompression::new(2, vec![0, 1], 2);
283            let reduction = ReduceTo::<ILP<bool>>::reduce_to(&source);
284            let layout = &reduction.layout;
285
286            let mut target_config = vec![0usize; layout.total_vars];
287            target_config[layout.lit_var(0)] = 1;
288            target_config[layout.lit_var(1)] = 1;
289
290            let source_config = reduction.extract_solution(&target_config);
291
292            crate::example_db::specs::rule_example_with_witness::<_, ILP<bool>>(
293                source,
294                SolutionPair {
295                    source_config,
296                    target_config,
297                },
298            )
299        },
300    }]
301}
302
303#[cfg(test)]
304#[path = "../unit_tests/rules/minimuminternalmacrodatacompression_ilp.rs"]
305mod tests;