Skip to main content

problemreductions/models/misc/
consistency_of_database_frequency_tables.rs

1//! Consistency of Database Frequency Tables problem implementation.
2//!
3//! Given a finite set of objects, categorical attributes with finite domains,
4//! pairwise frequency tables for selected attribute pairs, and some known
5//! object-attribute values, determine whether there exists a complete
6//! assignment of attribute values to all objects that matches every published
7//! frequency table and every known value.
8
9use crate::registry::{FieldInfo, ProblemSchemaEntry};
10use crate::traits::Problem;
11use serde::{Deserialize, Serialize};
12use std::collections::BTreeSet;
13
14#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
15pub struct FrequencyTable {
16    attribute_a: usize,
17    attribute_b: usize,
18    counts: Vec<Vec<usize>>,
19}
20
21impl FrequencyTable {
22    /// Create a new pairwise frequency table.
23    pub fn new(attribute_a: usize, attribute_b: usize, counts: Vec<Vec<usize>>) -> Self {
24        Self {
25            attribute_a,
26            attribute_b,
27            counts,
28        }
29    }
30
31    /// Returns the first attribute index.
32    pub fn attribute_a(&self) -> usize {
33        self.attribute_a
34    }
35
36    /// Returns the second attribute index.
37    pub fn attribute_b(&self) -> usize {
38        self.attribute_b
39    }
40
41    /// Returns the table counts.
42    pub fn counts(&self) -> &[Vec<usize>] {
43        &self.counts
44    }
45
46    /// Returns the number of table cells.
47    pub fn num_cells(&self) -> usize {
48        self.counts.iter().map(Vec::len).sum()
49    }
50}
51
52#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
53pub struct KnownValue {
54    object: usize,
55    attribute: usize,
56    value: usize,
57}
58
59impl KnownValue {
60    /// Create a new known object-attribute value.
61    pub fn new(object: usize, attribute: usize, value: usize) -> Self {
62        Self {
63            object,
64            attribute,
65            value,
66        }
67    }
68
69    /// Returns the object index.
70    pub fn object(&self) -> usize {
71        self.object
72    }
73
74    /// Returns the attribute index.
75    pub fn attribute(&self) -> usize {
76        self.attribute
77    }
78
79    /// Returns the fixed categorical value.
80    pub fn value(&self) -> usize {
81        self.value
82    }
83}
84
85inventory::submit! {
86    ProblemSchemaEntry {
87        name: "ConsistencyOfDatabaseFrequencyTables",
88        display_name: "Consistency of Database Frequency Tables",
89        aliases: &[],
90        dimensions: &[],
91        module_path: module_path!(),
92        description: "Determine whether pairwise frequency tables and known values admit a consistent complete database assignment",
93        fields: &[
94            FieldInfo { name: "num_objects", type_name: "usize", description: "Number of objects in the database" },
95            FieldInfo { name: "attribute_domains", type_name: "Vec<usize>", description: "Domain size for each attribute" },
96            FieldInfo { name: "frequency_tables", type_name: "Vec<FrequencyTable>", description: "Published pairwise frequency tables" },
97            FieldInfo { name: "known_values", type_name: "Vec<KnownValue>", description: "Known object-attribute-value triples" },
98        ],
99    }
100}
101
102/// The Consistency of Database Frequency Tables decision problem.
103#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
104pub struct ConsistencyOfDatabaseFrequencyTables {
105    num_objects: usize,
106    attribute_domains: Vec<usize>,
107    frequency_tables: Vec<FrequencyTable>,
108    known_values: Vec<KnownValue>,
109}
110
111impl ConsistencyOfDatabaseFrequencyTables {
112    /// Create a new consistency-of-database-frequency-tables instance.
113    pub fn new(
114        num_objects: usize,
115        attribute_domains: Vec<usize>,
116        frequency_tables: Vec<FrequencyTable>,
117        known_values: Vec<KnownValue>,
118    ) -> Self {
119        for (attribute, &domain_size) in attribute_domains.iter().enumerate() {
120            assert!(
121                domain_size > 0,
122                "attribute domain size at index {attribute} must be positive"
123            );
124        }
125
126        let num_attributes = attribute_domains.len();
127        let mut seen_pairs = BTreeSet::new();
128        for table in &frequency_tables {
129            let attribute_a = table.attribute_a();
130            let attribute_b = table.attribute_b();
131            assert!(
132                attribute_a < num_attributes,
133                "frequency table attribute_a {attribute_a} out of range for {num_attributes} attributes"
134            );
135            assert!(
136                attribute_b < num_attributes,
137                "frequency table attribute_b {attribute_b} out of range for {num_attributes} attributes"
138            );
139            assert!(
140                attribute_a != attribute_b,
141                "frequency table attributes must be distinct"
142            );
143
144            let pair = if attribute_a < attribute_b {
145                (attribute_a, attribute_b)
146            } else {
147                (attribute_b, attribute_a)
148            };
149            assert!(
150                seen_pairs.insert(pair),
151                "duplicate frequency table pair ({}, {})",
152                pair.0,
153                pair.1
154            );
155
156            let expected_rows = attribute_domains[attribute_a];
157            assert_eq!(
158                table.counts().len(),
159                expected_rows,
160                "frequency table rows ({}) must equal attribute_domains[{attribute_a}] ({expected_rows})",
161                table.counts().len()
162            );
163
164            let expected_cols = attribute_domains[attribute_b];
165            for (row, row_counts) in table.counts().iter().enumerate() {
166                assert_eq!(
167                    row_counts.len(),
168                    expected_cols,
169                    "frequency table columns ({}) in row {row} must equal attribute_domains[{attribute_b}] ({expected_cols})",
170                    row_counts.len()
171                );
172            }
173
174            let total: usize = table.counts().iter().flatten().copied().sum();
175            assert_eq!(
176                total, num_objects,
177                "frequency table total ({total}) must equal num_objects ({num_objects})"
178            );
179        }
180
181        for known_value in &known_values {
182            assert!(
183                known_value.object() < num_objects,
184                "known value object {} out of range for num_objects {}",
185                known_value.object(),
186                num_objects
187            );
188            assert!(
189                known_value.attribute() < num_attributes,
190                "known value attribute {} out of range for {num_attributes} attributes",
191                known_value.attribute()
192            );
193            let domain_size = attribute_domains[known_value.attribute()];
194            assert!(
195                known_value.value() < domain_size,
196                "known value value {} out of range for attribute {} with domain size {}",
197                known_value.value(),
198                known_value.attribute(),
199                domain_size
200            );
201        }
202
203        Self {
204            num_objects,
205            attribute_domains,
206            frequency_tables,
207            known_values,
208        }
209    }
210
211    /// Returns the number of objects.
212    pub fn num_objects(&self) -> usize {
213        self.num_objects
214    }
215
216    /// Returns the number of attributes.
217    pub fn num_attributes(&self) -> usize {
218        self.attribute_domains.len()
219    }
220
221    /// Returns the attribute-domain sizes.
222    pub fn attribute_domains(&self) -> &[usize] {
223        &self.attribute_domains
224    }
225
226    /// Returns the published frequency tables.
227    pub fn frequency_tables(&self) -> &[FrequencyTable] {
228        &self.frequency_tables
229    }
230
231    /// Returns the known values.
232    pub fn known_values(&self) -> &[KnownValue] {
233        &self.known_values
234    }
235
236    /// Returns the product of attribute domain sizes.
237    pub fn domain_size_product(&self) -> usize {
238        self.attribute_domains.iter().copied().product()
239    }
240
241    /// Returns the number of object-attribute assignment variables in the direct encoding.
242    pub fn num_assignment_variables(&self) -> usize {
243        self.num_objects * self.num_attributes()
244    }
245
246    /// Returns the number of published frequency tables.
247    pub fn num_frequency_tables(&self) -> usize {
248        self.frequency_tables.len()
249    }
250
251    /// Returns the number of known value constraints.
252    pub fn num_known_values(&self) -> usize {
253        self.known_values.len()
254    }
255
256    /// Returns the number of one-hot assignment indicators used by the ILP reduction.
257    pub fn num_assignment_indicators(&self) -> usize {
258        self.num_objects * self.attribute_domains.iter().sum::<usize>()
259    }
260
261    /// Returns the total number of published frequency-table cells.
262    pub fn num_frequency_cells(&self) -> usize {
263        self.frequency_tables
264            .iter()
265            .map(FrequencyTable::num_cells)
266            .sum()
267    }
268
269    /// Returns the number of auxiliary ILP indicators used for frequency-cell counting.
270    pub fn num_auxiliary_frequency_indicators(&self) -> usize {
271        self.num_objects * self.num_frequency_cells()
272    }
273
274    fn config_index(&self, object: usize, attribute: usize) -> usize {
275        object * self.num_attributes() + attribute
276    }
277}
278
279impl Problem for ConsistencyOfDatabaseFrequencyTables {
280    const NAME: &'static str = "ConsistencyOfDatabaseFrequencyTables";
281    type Value = crate::types::Or;
282
283    fn variant() -> Vec<(&'static str, &'static str)> {
284        crate::variant_params![]
285    }
286
287    fn dims(&self) -> Vec<usize> {
288        let mut dims = Vec::with_capacity(self.num_assignment_variables());
289        for _ in 0..self.num_objects {
290            dims.extend(self.attribute_domains.iter().copied());
291        }
292        dims
293    }
294
295    fn evaluate(&self, config: &[usize]) -> crate::types::Or {
296        crate::types::Or({
297            if config.len() != self.num_assignment_variables() {
298                return crate::types::Or(false);
299            }
300
301            for object in 0..self.num_objects {
302                for (attribute, &domain_size) in self.attribute_domains.iter().enumerate() {
303                    if config[self.config_index(object, attribute)] >= domain_size {
304                        return crate::types::Or(false);
305                    }
306                }
307            }
308
309            for known_value in &self.known_values {
310                if config[self.config_index(known_value.object(), known_value.attribute())]
311                    != known_value.value()
312                {
313                    return crate::types::Or(false);
314                }
315            }
316
317            for table in &self.frequency_tables {
318                let rows = self.attribute_domains[table.attribute_a()];
319                let cols = self.attribute_domains[table.attribute_b()];
320                let mut observed = vec![vec![0usize; cols]; rows];
321
322                for object in 0..self.num_objects {
323                    let value_a = config[self.config_index(object, table.attribute_a())];
324                    let value_b = config[self.config_index(object, table.attribute_b())];
325                    observed[value_a][value_b] += 1;
326                }
327
328                if observed != table.counts {
329                    return crate::types::Or(false);
330                }
331            }
332
333            true
334        })
335    }
336}
337
338crate::declare_variants! {
339    default ConsistencyOfDatabaseFrequencyTables => "domain_size_product^num_objects",
340}
341
342#[cfg(feature = "example-db")]
343pub(crate) fn canonical_model_example_specs() -> Vec<crate::example_db::specs::ModelExampleSpec> {
344    vec![crate::example_db::specs::ModelExampleSpec {
345        id: "consistency_of_database_frequency_tables",
346        instance: Box::new(ConsistencyOfDatabaseFrequencyTables::new(
347            6,
348            vec![2, 3, 2],
349            vec![
350                FrequencyTable::new(0, 1, vec![vec![1, 1, 1], vec![1, 1, 1]]),
351                FrequencyTable::new(1, 2, vec![vec![1, 1], vec![0, 2], vec![1, 1]]),
352            ],
353            vec![
354                KnownValue::new(0, 0, 0),
355                KnownValue::new(3, 0, 1),
356                KnownValue::new(1, 2, 1),
357            ],
358        )),
359        optimal_config: vec![0, 0, 0, 0, 1, 1, 0, 2, 1, 1, 0, 1, 1, 1, 1, 1, 2, 0],
360        optimal_value: serde_json::json!(true),
361    }]
362}
363
364#[cfg(test)]
365#[path = "../../unit_tests/models/misc/consistency_of_database_frequency_tables.rs"]
366mod tests;