Skip to main content

rust_data_processing/
reports.rs

1//! Small helpers for **prompt-sized** text derived from structured reports (Phase 2 optional).
2
3/// Truncate a UTF-8 string to at most `max_bytes` **UTF-8 bytes**, never splitting a codepoint.
4/// If truncated, appends an ASCII ellipsis marker and a short suffix explaining truncation.
5pub fn truncate_utf8_by_bytes(input: &str, max_bytes: usize) -> String {
6    if input.len() <= max_bytes {
7        return input.to_string();
8    }
9    let mut end = max_bytes;
10    while end > 0 && !input.is_char_boundary(end) {
11        end -= 1;
12    }
13    format!("{}… [truncated from {} bytes]", &input[..end], input.len())
14}
15
16#[cfg(test)]
17mod tests {
18    use super::*;
19
20    #[test]
21    fn truncate_respects_char_boundary() {
22        let s = "ééé"; // 6 bytes
23        let t = truncate_utf8_by_bytes(s, 3);
24        assert!(t.starts_with('é') || t.contains('…'));
25        assert!(t.len() < s.len() + 40);
26    }
27}