Skip to main content

problemreductions/models/graph/
maximum_contact_map_overlap.rs

1//! Maximum Contact Map Overlap problem implementation.
2//!
3//! Given two finite ordered contact maps `G_1 = (V_1, E_1)` and
4//! `G_2 = (V_2, E_2)` where each `V_r` is the ordered vertex set
5//! `{0, 1, ..., n_r - 1}` and each `E_r` is a simple undirected contact set,
6//! find an order-preserving partial injective alignment
7//! `f: V_1 -> V_2 union {bot}` that maximizes the number of preserved contacts
8//!
9//! `|{{i, k} in E_1 : i, k both matched and {f(i), f(k)} in E_2}|`.
10//!
11//! The configuration vector has length `|V_1|`. For each source vertex `i`, the
12//! value `config[i] in {0, 1, ..., |V_2|}` records the alignment: `0` denotes
13//! `bot` (unmatched), and `j + 1` denotes "matched to vertex `j` of `G_2`".
14//! Feasibility requires that the non-zero entries are pairwise distinct
15//! (injectivity) and strictly increasing in source order (order-preserving).
16
17use crate::registry::{FieldInfo, ProblemSchemaEntry, ProblemSizeFieldEntry};
18use crate::traits::Problem;
19use crate::types::Max;
20use serde::{Deserialize, Serialize};
21use std::collections::HashSet;
22
23inventory::submit! {
24    ProblemSchemaEntry {
25        name: "MaximumContactMapOverlap",
26        display_name: "Maximum Contact Map Overlap",
27        aliases: &["CMO", "MaxCMO"],
28        dimensions: &[],
29        module_path: module_path!(),
30        description: "Maximize the number of preserved contacts under an order-preserving partial injective alignment from G_1 into G_2",
31        fields: &[
32            FieldInfo {
33                name: "num_vertices_1",
34                type_name: "usize",
35                description: "Number of ordered residues/vertices in the first contact map G_1",
36            },
37            FieldInfo {
38                name: "contacts_1",
39                type_name: "Vec<(usize,usize)>",
40                description: "Simple undirected contacts of G_1 as canonicalized (u,v) pairs with u < v",
41            },
42            FieldInfo {
43                name: "num_vertices_2",
44                type_name: "usize",
45                description: "Number of ordered residues/vertices in the second contact map G_2",
46            },
47            FieldInfo {
48                name: "contacts_2",
49                type_name: "Vec<(usize,usize)>",
50                description: "Simple undirected contacts of G_2 as canonicalized (u,v) pairs with u < v",
51            },
52        ],
53    }
54}
55
56inventory::submit! {
57    ProblemSizeFieldEntry {
58        name: "MaximumContactMapOverlap",
59        fields: &["num_vertices_1", "num_vertices_2", "num_contacts_1", "num_contacts_2"],
60    }
61}
62
63/// The Maximum Contact Map Overlap problem.
64///
65/// Given two finite ordered contact maps `G_1 = (V_1, E_1)` and
66/// `G_2 = (V_2, E_2)`, find an order-preserving partial injective alignment
67/// `f: V_1 -> V_2 union {bot}` that maximizes the number of preserved contacts
68///
69/// `|{{i, k} in E_1 : i, k both matched and {f(i), f(k)} in E_2}|`.
70///
71/// # Configuration encoding
72///
73/// `dims()` returns `vec![|V_2| + 1; |V_1|]`. For each source vertex `i`,
74/// `config[i] = 0` denotes `bot` (unmatched) and `config[i] = j + 1` denotes
75/// "matched to vertex `j in V_2`". Feasibility requires that the nonzero
76/// entries are pairwise distinct (injectivity) and strictly increasing along
77/// the index order of `V_1` (order-preserving).
78#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
79pub struct MaximumContactMapOverlap {
80    num_vertices_1: usize,
81    contacts_1: Vec<(usize, usize)>,
82    num_vertices_2: usize,
83    contacts_2: Vec<(usize, usize)>,
84}
85
86/// Canonicalize a contact set: each pair is normalized to `(min, max)`, no
87/// self-loops are allowed, all endpoints must be in range, and duplicates
88/// (after normalization) cause a panic.
89fn canonicalize_contacts(
90    raw: Vec<(usize, usize)>,
91    num_vertices: usize,
92    side: &str,
93) -> Vec<(usize, usize)> {
94    let mut seen: HashSet<(usize, usize)> = HashSet::new();
95    let mut out = Vec::with_capacity(raw.len());
96    for (u, v) in raw {
97        assert!(
98            u < num_vertices && v < num_vertices,
99            "{side} contact endpoint out of range for num_vertices = {num_vertices}: ({u}, {v})"
100        );
101        assert!(u != v, "{side} contact has self-loop: ({u}, {v})");
102        let (a, b) = if u < v { (u, v) } else { (v, u) };
103        assert!(
104            seen.insert((a, b)),
105            "{side} has duplicate contact after normalization: ({a}, {b})"
106        );
107        out.push((a, b));
108    }
109    out
110}
111
112impl MaximumContactMapOverlap {
113    /// Construct a new instance from two ordered contact maps.
114    ///
115    /// Contacts are canonicalized to `(min, max)` pairs. Self-loops, duplicate
116    /// contacts (after normalization), and out-of-range endpoints panic.
117    pub fn new(
118        num_vertices_1: usize,
119        contacts_1: Vec<(usize, usize)>,
120        num_vertices_2: usize,
121        contacts_2: Vec<(usize, usize)>,
122    ) -> Self {
123        let contacts_1 = canonicalize_contacts(contacts_1, num_vertices_1, "G_1");
124        let contacts_2 = canonicalize_contacts(contacts_2, num_vertices_2, "G_2");
125        Self {
126            num_vertices_1,
127            contacts_1,
128            num_vertices_2,
129            contacts_2,
130        }
131    }
132
133    /// Number of ordered residues/vertices in `G_1`.
134    pub fn num_vertices_1(&self) -> usize {
135        self.num_vertices_1
136    }
137
138    /// Number of ordered residues/vertices in `G_2`.
139    pub fn num_vertices_2(&self) -> usize {
140        self.num_vertices_2
141    }
142
143    /// Number of contacts in `G_1`.
144    pub fn num_contacts_1(&self) -> usize {
145        self.contacts_1.len()
146    }
147
148    /// Number of contacts in `G_2`.
149    pub fn num_contacts_2(&self) -> usize {
150        self.contacts_2.len()
151    }
152
153    /// Contacts of `G_1` as canonicalized `(u, v)` pairs with `u < v`.
154    pub fn contacts_1(&self) -> &[(usize, usize)] {
155        &self.contacts_1
156    }
157
158    /// Contacts of `G_2` as canonicalized `(u, v)` pairs with `u < v`.
159    pub fn contacts_2(&self) -> &[(usize, usize)] {
160        &self.contacts_2
161    }
162
163    /// Check that `config` describes an order-preserving partial injective
164    /// alignment.
165    ///
166    /// Validity requires: `config.len() == |V_1|`, every entry lies in
167    /// `0..=|V_2|` (with `0` denoting `bot`), all nonzero entries are
168    /// pairwise distinct (injectivity), and the nonzero entries are strictly
169    /// increasing in source order.
170    pub fn is_valid_solution(&self, config: &[usize]) -> bool {
171        if config.len() != self.num_vertices_1 {
172            return false;
173        }
174        let max_value = self.num_vertices_2; // valid range is 0..=num_vertices_2
175        let mut previous_nonzero: Option<usize> = None;
176        let mut used: HashSet<usize> = HashSet::new();
177        for &value in config {
178            if value > max_value {
179                return false;
180            }
181            if value == 0 {
182                continue;
183            }
184            if !used.insert(value) {
185                return false;
186            }
187            if let Some(prev) = previous_nonzero {
188                if value <= prev {
189                    return false;
190                }
191            }
192            previous_nonzero = Some(value);
193        }
194        true
195    }
196
197    /// Count contacts of `G_1` preserved by the alignment `config`. Returns
198    /// `None` if `config` is infeasible.
199    pub fn preserved_contact_count(&self, config: &[usize]) -> Option<usize> {
200        if !self.is_valid_solution(config) {
201            return None;
202        }
203        let contacts_2_set: HashSet<(usize, usize)> = self.contacts_2.iter().copied().collect();
204        let mut count = 0usize;
205        for &(i, k) in &self.contacts_1 {
206            let fi = config[i];
207            let fk = config[k];
208            if fi == 0 || fk == 0 {
209                continue;
210            }
211            // Encoding: nonzero value v means vertex v - 1 of G_2.
212            let a = fi - 1;
213            let b = fk - 1;
214            let pair = if a < b { (a, b) } else { (b, a) };
215            if contacts_2_set.contains(&pair) {
216                count += 1;
217            }
218        }
219        Some(count)
220    }
221}
222
223impl Problem for MaximumContactMapOverlap {
224    const NAME: &'static str = "MaximumContactMapOverlap";
225    type Value = Max<i64>;
226
227    fn variant() -> Vec<(&'static str, &'static str)> {
228        crate::variant_params![]
229    }
230
231    fn dims(&self) -> Vec<usize> {
232        vec![self.num_vertices_2 + 1; self.num_vertices_1]
233    }
234
235    fn evaluate(&self, config: &[usize]) -> Max<i64> {
236        match self.preserved_contact_count(config) {
237            Some(count) => Max(Some(count as i64)),
238            None => Max(None),
239        }
240    }
241}
242
243crate::declare_variants! {
244    default MaximumContactMapOverlap => "(num_vertices_2 + 1)^num_vertices_1",
245}
246
247#[cfg(feature = "example-db")]
248pub(crate) fn canonical_model_example_specs() -> Vec<crate::example_db::specs::ModelExampleSpec> {
249    // Canonical example from the issue:
250    //   G_1: n_1 = 4, E_1 = {{0,2}, {1,3}}
251    //   G_2: n_2 = 5, E_2 = {{0,3}, {1,4}, {0,2}}
252    // Optimal alignment: 0->0, 1->1, 2->3, 3->4 (encoded as [1, 2, 4, 5]).
253    //   - order-preserving: 1 < 2 < 4 < 5
254    //   - injectivity: all values distinct
255    //   - contact {0,2}: mapped (0, 3); sorted (0, 3) in E_2
256    //   - contact {1,3}: mapped (1, 4); sorted (1, 4) in E_2
257    //   - value = 2 contacts preserved.
258    vec![crate::example_db::specs::ModelExampleSpec {
259        id: "maximum_contact_map_overlap",
260        instance: Box::new(MaximumContactMapOverlap::new(
261            4,
262            vec![(0, 2), (1, 3)],
263            5,
264            vec![(0, 3), (1, 4), (0, 2)],
265        )),
266        optimal_config: vec![1, 2, 4, 5],
267        optimal_value: serde_json::json!(2),
268    }]
269}
270
271#[cfg(test)]
272#[path = "../../unit_tests/models/graph/maximum_contact_map_overlap.rs"]
273mod tests;