rust_data_processing/
privacy.rs1use serde::Serialize;
7
8use crate::types::{DataSet, Value};
9
10#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
12pub struct Utf8ColumnChangeSummary {
13 pub column: String,
14 pub non_null_cells_compared: usize,
15 pub cells_changed: usize,
16}
17
18pub fn summarize_utf8_column_changes(
20 before: &DataSet,
21 after: &DataSet,
22 columns: &[String],
23) -> Vec<Utf8ColumnChangeSummary> {
24 let mut out = Vec::with_capacity(columns.len());
25 for name in columns {
26 let Some(bi) = before.schema.index_of(name) else {
27 continue;
28 };
29 let Some(ai) = after.schema.index_of(name) else {
30 continue;
31 };
32 let mut non_null = 0usize;
33 let mut changed = 0usize;
34 for (br, ar) in before.rows.iter().zip(after.rows.iter()) {
35 if let (Some(Value::Utf8(sb)), Some(Value::Utf8(sa))) = (br.get(bi), ar.get(ai)) {
36 non_null += 1;
37 if sb != sa {
38 changed += 1;
39 }
40 }
41 }
42 out.push(Utf8ColumnChangeSummary {
43 column: name.clone(),
44 non_null_cells_compared: non_null,
45 cells_changed: changed,
46 });
47 }
48 out
49}
50
51pub fn render_privacy_report_json(
53 rows: &[Utf8ColumnChangeSummary],
54) -> crate::error::IngestionResult<String> {
55 serde_json::to_string_pretty(rows).map_err(|e| crate::error::IngestionError::SchemaMismatch {
56 message: format!("privacy report json: {e}"),
57 })
58}
59
60pub fn render_privacy_report_markdown(rows: &[Utf8ColumnChangeSummary]) -> String {
62 let mut s = String::from("## Privacy / masking summary (UTF-8 diffs)\n\n");
63 for r in rows {
64 s.push_str(&format!(
65 "- **{}**: changed **{}** / **{}** non-null cells\n",
66 r.column, r.cells_changed, r.non_null_cells_compared
67 ));
68 }
69 s
70}
71
72#[cfg(test)]
73mod tests {
74 use super::*;
75 use crate::types::{DataType, Field, Schema};
76
77 #[test]
78 fn summary_counts_changes() {
79 let sc = Schema::new(vec![Field::new("email", DataType::Utf8)]);
80 let before = DataSet::new(
81 sc.clone(),
82 vec![vec![Value::Utf8("a@b.c".into())], vec![Value::Null]],
83 );
84 let after = DataSet::new(
85 sc,
86 vec![vec![Value::Utf8("a@***".into())], vec![Value::Null]],
87 );
88 let r = summarize_utf8_column_changes(&before, &after, &["email".into()]);
89 assert_eq!(r[0].cells_changed, 1);
90 assert_eq!(r[0].non_null_cells_compared, 1);
91 }
92}