problemreductions/models/misc/
consistency_of_database_frequency_tables.rs1use crate::registry::{FieldInfo, ProblemSchemaEntry};
10use crate::traits::Problem;
11use serde::{Deserialize, Serialize};
12use std::collections::BTreeSet;
13
14#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
15pub struct FrequencyTable {
16 attribute_a: usize,
17 attribute_b: usize,
18 counts: Vec<Vec<usize>>,
19}
20
21impl FrequencyTable {
22 pub fn new(attribute_a: usize, attribute_b: usize, counts: Vec<Vec<usize>>) -> Self {
24 Self {
25 attribute_a,
26 attribute_b,
27 counts,
28 }
29 }
30
31 pub fn attribute_a(&self) -> usize {
33 self.attribute_a
34 }
35
36 pub fn attribute_b(&self) -> usize {
38 self.attribute_b
39 }
40
41 pub fn counts(&self) -> &[Vec<usize>] {
43 &self.counts
44 }
45
46 pub fn num_cells(&self) -> usize {
48 self.counts.iter().map(Vec::len).sum()
49 }
50}
51
52#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
53pub struct KnownValue {
54 object: usize,
55 attribute: usize,
56 value: usize,
57}
58
59impl KnownValue {
60 pub fn new(object: usize, attribute: usize, value: usize) -> Self {
62 Self {
63 object,
64 attribute,
65 value,
66 }
67 }
68
69 pub fn object(&self) -> usize {
71 self.object
72 }
73
74 pub fn attribute(&self) -> usize {
76 self.attribute
77 }
78
79 pub fn value(&self) -> usize {
81 self.value
82 }
83}
84
85inventory::submit! {
86 ProblemSchemaEntry {
87 name: "ConsistencyOfDatabaseFrequencyTables",
88 display_name: "Consistency of Database Frequency Tables",
89 aliases: &[],
90 dimensions: &[],
91 module_path: module_path!(),
92 description: "Determine whether pairwise frequency tables and known values admit a consistent complete database assignment",
93 fields: &[
94 FieldInfo { name: "num_objects", type_name: "usize", description: "Number of objects in the database" },
95 FieldInfo { name: "attribute_domains", type_name: "Vec<usize>", description: "Domain size for each attribute" },
96 FieldInfo { name: "frequency_tables", type_name: "Vec<FrequencyTable>", description: "Published pairwise frequency tables" },
97 FieldInfo { name: "known_values", type_name: "Vec<KnownValue>", description: "Known object-attribute-value triples" },
98 ],
99 }
100}
101
102#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
104pub struct ConsistencyOfDatabaseFrequencyTables {
105 num_objects: usize,
106 attribute_domains: Vec<usize>,
107 frequency_tables: Vec<FrequencyTable>,
108 known_values: Vec<KnownValue>,
109}
110
111impl ConsistencyOfDatabaseFrequencyTables {
112 pub fn new(
114 num_objects: usize,
115 attribute_domains: Vec<usize>,
116 frequency_tables: Vec<FrequencyTable>,
117 known_values: Vec<KnownValue>,
118 ) -> Self {
119 for (attribute, &domain_size) in attribute_domains.iter().enumerate() {
120 assert!(
121 domain_size > 0,
122 "attribute domain size at index {attribute} must be positive"
123 );
124 }
125
126 let num_attributes = attribute_domains.len();
127 let mut seen_pairs = BTreeSet::new();
128 for table in &frequency_tables {
129 let attribute_a = table.attribute_a();
130 let attribute_b = table.attribute_b();
131 assert!(
132 attribute_a < num_attributes,
133 "frequency table attribute_a {attribute_a} out of range for {num_attributes} attributes"
134 );
135 assert!(
136 attribute_b < num_attributes,
137 "frequency table attribute_b {attribute_b} out of range for {num_attributes} attributes"
138 );
139 assert!(
140 attribute_a != attribute_b,
141 "frequency table attributes must be distinct"
142 );
143
144 let pair = if attribute_a < attribute_b {
145 (attribute_a, attribute_b)
146 } else {
147 (attribute_b, attribute_a)
148 };
149 assert!(
150 seen_pairs.insert(pair),
151 "duplicate frequency table pair ({}, {})",
152 pair.0,
153 pair.1
154 );
155
156 let expected_rows = attribute_domains[attribute_a];
157 assert_eq!(
158 table.counts().len(),
159 expected_rows,
160 "frequency table rows ({}) must equal attribute_domains[{attribute_a}] ({expected_rows})",
161 table.counts().len()
162 );
163
164 let expected_cols = attribute_domains[attribute_b];
165 for (row, row_counts) in table.counts().iter().enumerate() {
166 assert_eq!(
167 row_counts.len(),
168 expected_cols,
169 "frequency table columns ({}) in row {row} must equal attribute_domains[{attribute_b}] ({expected_cols})",
170 row_counts.len()
171 );
172 }
173
174 let total: usize = table.counts().iter().flatten().copied().sum();
175 assert_eq!(
176 total, num_objects,
177 "frequency table total ({total}) must equal num_objects ({num_objects})"
178 );
179 }
180
181 for known_value in &known_values {
182 assert!(
183 known_value.object() < num_objects,
184 "known value object {} out of range for num_objects {}",
185 known_value.object(),
186 num_objects
187 );
188 assert!(
189 known_value.attribute() < num_attributes,
190 "known value attribute {} out of range for {num_attributes} attributes",
191 known_value.attribute()
192 );
193 let domain_size = attribute_domains[known_value.attribute()];
194 assert!(
195 known_value.value() < domain_size,
196 "known value value {} out of range for attribute {} with domain size {}",
197 known_value.value(),
198 known_value.attribute(),
199 domain_size
200 );
201 }
202
203 Self {
204 num_objects,
205 attribute_domains,
206 frequency_tables,
207 known_values,
208 }
209 }
210
211 pub fn num_objects(&self) -> usize {
213 self.num_objects
214 }
215
216 pub fn num_attributes(&self) -> usize {
218 self.attribute_domains.len()
219 }
220
221 pub fn attribute_domains(&self) -> &[usize] {
223 &self.attribute_domains
224 }
225
226 pub fn frequency_tables(&self) -> &[FrequencyTable] {
228 &self.frequency_tables
229 }
230
231 pub fn known_values(&self) -> &[KnownValue] {
233 &self.known_values
234 }
235
236 pub fn domain_size_product(&self) -> usize {
238 self.attribute_domains.iter().copied().product()
239 }
240
241 pub fn num_assignment_variables(&self) -> usize {
243 self.num_objects * self.num_attributes()
244 }
245
246 pub fn num_frequency_tables(&self) -> usize {
248 self.frequency_tables.len()
249 }
250
251 pub fn num_known_values(&self) -> usize {
253 self.known_values.len()
254 }
255
256 pub fn num_assignment_indicators(&self) -> usize {
258 self.num_objects * self.attribute_domains.iter().sum::<usize>()
259 }
260
261 pub fn num_frequency_cells(&self) -> usize {
263 self.frequency_tables
264 .iter()
265 .map(FrequencyTable::num_cells)
266 .sum()
267 }
268
269 pub fn num_auxiliary_frequency_indicators(&self) -> usize {
271 self.num_objects * self.num_frequency_cells()
272 }
273
274 fn config_index(&self, object: usize, attribute: usize) -> usize {
275 object * self.num_attributes() + attribute
276 }
277}
278
279impl Problem for ConsistencyOfDatabaseFrequencyTables {
280 const NAME: &'static str = "ConsistencyOfDatabaseFrequencyTables";
281 type Value = crate::types::Or;
282
283 fn variant() -> Vec<(&'static str, &'static str)> {
284 crate::variant_params![]
285 }
286
287 fn dims(&self) -> Vec<usize> {
288 let mut dims = Vec::with_capacity(self.num_assignment_variables());
289 for _ in 0..self.num_objects {
290 dims.extend(self.attribute_domains.iter().copied());
291 }
292 dims
293 }
294
295 fn evaluate(&self, config: &[usize]) -> crate::types::Or {
296 crate::types::Or({
297 if config.len() != self.num_assignment_variables() {
298 return crate::types::Or(false);
299 }
300
301 for object in 0..self.num_objects {
302 for (attribute, &domain_size) in self.attribute_domains.iter().enumerate() {
303 if config[self.config_index(object, attribute)] >= domain_size {
304 return crate::types::Or(false);
305 }
306 }
307 }
308
309 for known_value in &self.known_values {
310 if config[self.config_index(known_value.object(), known_value.attribute())]
311 != known_value.value()
312 {
313 return crate::types::Or(false);
314 }
315 }
316
317 for table in &self.frequency_tables {
318 let rows = self.attribute_domains[table.attribute_a()];
319 let cols = self.attribute_domains[table.attribute_b()];
320 let mut observed = vec![vec![0usize; cols]; rows];
321
322 for object in 0..self.num_objects {
323 let value_a = config[self.config_index(object, table.attribute_a())];
324 let value_b = config[self.config_index(object, table.attribute_b())];
325 observed[value_a][value_b] += 1;
326 }
327
328 if observed != table.counts {
329 return crate::types::Or(false);
330 }
331 }
332
333 true
334 })
335 }
336}
337
338crate::declare_variants! {
339 default ConsistencyOfDatabaseFrequencyTables => "domain_size_product^num_objects",
340}
341
342#[cfg(feature = "example-db")]
343pub(crate) fn canonical_model_example_specs() -> Vec<crate::example_db::specs::ModelExampleSpec> {
344 vec![crate::example_db::specs::ModelExampleSpec {
345 id: "consistency_of_database_frequency_tables",
346 instance: Box::new(ConsistencyOfDatabaseFrequencyTables::new(
347 6,
348 vec![2, 3, 2],
349 vec![
350 FrequencyTable::new(0, 1, vec![vec![1, 1, 1], vec![1, 1, 1]]),
351 FrequencyTable::new(1, 2, vec![vec![1, 1], vec![0, 2], vec![1, 1]]),
352 ],
353 vec![
354 KnownValue::new(0, 0, 0),
355 KnownValue::new(3, 0, 1),
356 KnownValue::new(1, 2, 1),
357 ],
358 )),
359 optimal_config: vec![0, 0, 0, 0, 1, 1, 0, 2, 1, 1, 0, 1, 1, 1, 1, 1, 2, 0],
360 optimal_value: serde_json::json!(true),
361 }]
362}
363
364#[cfg(test)]
365#[path = "../../unit_tests/models/misc/consistency_of_database_frequency_tables.rs"]
366mod tests;