Skip to main content

problemreductions/models/misc/
clustering.rs

1//! Clustering problem implementation.
2//!
3//! Given a distance matrix over n elements, a cluster count bound K,
4//! and a diameter bound B, determine whether the elements can be partitioned
5//! into at most K non-empty clusters such that all intra-cluster pairwise
6//! distances are at most B.
7
8use crate::registry::{FieldInfo, ProblemSchemaEntry};
9use crate::traits::Problem;
10use serde::{Deserialize, Serialize};
11
12inventory::submit! {
13    ProblemSchemaEntry {
14        name: "Clustering",
15        display_name: "Clustering",
16        aliases: &[],
17        dimensions: &[],
18        module_path: module_path!(),
19        description: "Partition elements into at most K clusters where all intra-cluster distances are at most B",
20        fields: &[
21            FieldInfo { name: "distances", type_name: "Vec<Vec<u64>>", description: "Symmetric distance matrix with zero diagonal" },
22            FieldInfo { name: "num_clusters", type_name: "usize", description: "Maximum number of clusters K" },
23            FieldInfo { name: "diameter_bound", type_name: "u64", description: "Maximum allowed intra-cluster pairwise distance B" },
24        ],
25    }
26}
27
28/// The Clustering problem.
29///
30/// Given a set of `n` elements with pairwise distances, a cluster count
31/// bound `K`, and a diameter bound `B`, determine whether there exists
32/// a partition of the elements into at most `K` non-empty clusters such
33/// that for every cluster, all pairwise distances within that cluster
34/// are at most `B`.
35///
36/// # Representation
37///
38/// Each element `i` is assigned a cluster index `config[i] ∈ {0, ..., K-1}`.
39/// The problem is satisfiable iff every non-empty cluster has all pairwise
40/// distances ≤ B.
41///
42/// # Example
43///
44/// ```
45/// use problemreductions::models::misc::Clustering;
46/// use problemreductions::{Problem, Solver, BruteForce};
47///
48/// // 4 elements, 2 clusters, diameter bound 1
49/// let distances = vec![
50///     vec![0, 1, 3, 3],
51///     vec![1, 0, 3, 3],
52///     vec![3, 3, 0, 1],
53///     vec![3, 3, 1, 0],
54/// ];
55/// let problem = Clustering::new(distances, 2, 1);
56/// let solver = BruteForce::new();
57/// let solution = solver.find_witness(&problem);
58/// assert!(solution.is_some());
59/// ```
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct Clustering {
62    /// Symmetric distance matrix with zero diagonal.
63    distances: Vec<Vec<u64>>,
64    /// Maximum number of clusters K.
65    num_clusters: usize,
66    /// Maximum allowed intra-cluster pairwise distance B.
67    diameter_bound: u64,
68}
69
70impl Clustering {
71    /// Create a new Clustering instance.
72    ///
73    /// # Panics
74    ///
75    /// Panics if:
76    /// - `distances` is empty
77    /// - `distances` is not square
78    /// - `distances` is not symmetric
79    /// - diagonal entries are not zero
80    /// - `num_clusters` is zero
81    pub fn new(distances: Vec<Vec<u64>>, num_clusters: usize, diameter_bound: u64) -> Self {
82        let n = distances.len();
83        assert!(n > 0, "Clustering requires at least one element");
84        assert!(num_clusters > 0, "num_clusters must be at least 1");
85        for (i, row) in distances.iter().enumerate() {
86            assert_eq!(
87                row.len(),
88                n,
89                "Distance matrix must be square: row {i} has {} columns, expected {n}",
90                row.len()
91            );
92            assert_eq!(
93                distances[i][i], 0,
94                "Diagonal entry distances[{i}][{i}] must be 0"
95            );
96        }
97        for (i, row_i) in distances.iter().enumerate() {
98            for j in (i + 1)..n {
99                assert_eq!(
100                    row_i[j], distances[j][i],
101                    "Distance matrix must be symmetric: distances[{i}][{j}] = {} != distances[{j}][{i}] = {}",
102                    row_i[j], distances[j][i]
103                );
104            }
105        }
106        Self {
107            distances,
108            num_clusters,
109            diameter_bound,
110        }
111    }
112
113    /// Returns the distance matrix.
114    pub fn distances(&self) -> &[Vec<u64>] {
115        &self.distances
116    }
117
118    /// Returns the number of elements.
119    pub fn num_elements(&self) -> usize {
120        self.distances.len()
121    }
122
123    /// Returns the maximum number of clusters K.
124    pub fn num_clusters(&self) -> usize {
125        self.num_clusters
126    }
127
128    /// Returns the diameter bound B.
129    pub fn diameter_bound(&self) -> u64 {
130        self.diameter_bound
131    }
132
133    /// Check if a configuration is a valid clustering.
134    fn is_valid_partition(&self, config: &[usize]) -> bool {
135        let n = self.num_elements();
136        if config.len() != n {
137            return false;
138        }
139        if config.iter().any(|&c| c >= self.num_clusters) {
140            return false;
141        }
142        // Group elements by cluster in a single pass
143        let mut clusters: Vec<Vec<usize>> = vec![vec![]; self.num_clusters];
144        for (i, &c) in config.iter().enumerate() {
145            clusters[c].push(i);
146        }
147        // Check all intra-cluster pairwise distances ≤ B
148        for members in &clusters {
149            for a in 0..members.len() {
150                for b in (a + 1)..members.len() {
151                    if self.distances[members[a]][members[b]] > self.diameter_bound {
152                        return false;
153                    }
154                }
155            }
156        }
157        true
158    }
159}
160
161impl Problem for Clustering {
162    const NAME: &'static str = "Clustering";
163    type Value = crate::types::Or;
164
165    fn variant() -> Vec<(&'static str, &'static str)> {
166        crate::variant_params![]
167    }
168
169    fn dims(&self) -> Vec<usize> {
170        vec![self.num_clusters; self.num_elements()]
171    }
172
173    fn evaluate(&self, config: &[usize]) -> crate::types::Or {
174        crate::types::Or(self.is_valid_partition(config))
175    }
176}
177
178crate::declare_variants! {
179    default Clustering => "num_clusters^num_elements",
180}
181
182#[cfg(feature = "example-db")]
183pub(crate) fn canonical_model_example_specs() -> Vec<crate::example_db::specs::ModelExampleSpec> {
184    // 6 elements in two tight groups {0,1,2} and {3,4,5}
185    // Intra-group distance = 1, inter-group distance = 3
186    // K=2, B=1
187    let distances = vec![
188        vec![0, 1, 1, 3, 3, 3],
189        vec![1, 0, 1, 3, 3, 3],
190        vec![1, 1, 0, 3, 3, 3],
191        vec![3, 3, 3, 0, 1, 1],
192        vec![3, 3, 3, 1, 0, 1],
193        vec![3, 3, 3, 1, 1, 0],
194    ];
195    vec![crate::example_db::specs::ModelExampleSpec {
196        id: "clustering",
197        instance: Box::new(Clustering::new(distances, 2, 1)),
198        optimal_config: vec![0, 0, 0, 1, 1, 1],
199        optimal_value: serde_json::json!(true),
200    }]
201}
202
203#[cfg(test)]
204#[path = "../../unit_tests/models/misc/clustering.rs"]
205mod tests;