Skip to main content

brows3r_lib/diagnostics/
redact.rs

1//! Credential and path redaction for diagnostic bundles.
2//!
3//! `Redactor` compiles all patterns once at construction time and applies them
4//! in `redact_text` / `redact_path`.  The trace_id field on `AppError::Internal`
5//! is intentionally NOT redacted — it is the link between a user-visible error
6//! and the corresponding log lines.
7
8use regex::Regex;
9use serde::{Deserialize, Serialize};
10
11/// Controls how aggressively account IDs are redacted.
12///
13/// `Full` is the default and redacts everything.  `Partial` keeps 12-digit
14/// account IDs visible (useful for multi-account diagnostic triage).  `None`
15/// is a no-op that returns the input unchanged — useful in tests or when the
16/// caller has already stripped sensitive data.
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
18pub enum RedactionLevel {
19    /// Redact all patterns including AWS account IDs (default).
20    #[default]
21    Full,
22    /// Redact credentials but keep 12-digit account IDs visible.
23    Partial,
24    /// No-op; return the input unchanged.
25    None,
26}
27
28/// Compiled redaction patterns.
29///
30/// Construct once with `Redactor::new()` and reuse across many calls.
31pub struct Redactor {
32    level: RedactionLevel,
33    /// `(AKIA|ASIA|AROA)[A-Z0-9]{16}` — AWS access key IDs.
34    re_aws_key_id: Regex,
35    /// `(?i)(aws_secret_access_key|secret_access_key|secret)\s*=\s*['"]?([A-Za-z0-9/+=]{40})['"]?`
36    re_secret: Regex,
37    /// `(?i)(aws_session_token|session_token)\s*=\s*['"]?([A-Za-z0-9/+=]{100,})['"]?`
38    re_session: Regex,
39    /// Presigned URLs — everything after `?` when `X-Amz-Signature` is present.
40    re_presigned: Regex,
41    /// 12-digit AWS account IDs (only applied at `RedactionLevel::Full`).
42    re_account_id: Regex,
43    /// Bearer token values.
44    re_bearer: Regex,
45    /// Actual home-dir prefix at runtime (e.g. `/Users/alice`).
46    home_dir: Option<String>,
47}
48
49impl Redactor {
50    /// Build a `Redactor` with the given `RedactionLevel`.
51    pub fn with_level(level: RedactionLevel) -> Self {
52        // These unwraps are intentional: all patterns are compile-time
53        // constants.  A panic here is a programming error, not a runtime error.
54        let re_aws_key_id = Regex::new(r"(AKIA|ASIA|AROA)[A-Z0-9]{16}").unwrap();
55        let re_secret = Regex::new(
56            r#"(?i)(aws_secret_access_key|secret_access_key|secret)\s*=\s*['"]?([A-Za-z0-9/+=]{40})['"]?"#,
57        )
58        .unwrap();
59        let re_session = Regex::new(
60            r#"(?i)(aws_session_token|session_token)\s*=\s*['"]?([A-Za-z0-9/+=]{100,})['"]?"#,
61        )
62        .unwrap();
63        // Match https?://host/path? followed by any querystring containing X-Amz-Signature.
64        // Capture group 1 is everything up to and including the `?`.
65        let re_presigned =
66            Regex::new(r"(https?://[^?\s]+\?)[^?\s]*X-Amz-Signature[^?\s]*").unwrap();
67        // Match exactly 12 consecutive digits.  The replacement closure
68        // (see `redact_text`) skips matches that are adjacent to a `-` or hex
69        // letter, so UUID segments like `446655440000` are never clobbered.
70        let re_account_id = Regex::new(r"\b\d{12}\b").unwrap();
71        let re_bearer = Regex::new(r"Bearer [A-Za-z0-9\-._~+/]+=*").unwrap();
72
73        let home_dir = dirs_home();
74
75        Redactor {
76            level,
77            re_aws_key_id,
78            re_secret,
79            re_session,
80            re_presigned,
81            re_account_id,
82            re_bearer,
83            home_dir,
84        }
85    }
86
87    /// Build a `Redactor` with the default `RedactionLevel::Full`.
88    pub fn new() -> Self {
89        Self::with_level(RedactionLevel::Full)
90    }
91
92    /// Apply all active patterns to `text` and return the redacted string.
93    ///
94    /// Patterns are applied in this order so that more-specific matches are
95    /// replaced before generic ones (e.g. presigned URLs before account IDs):
96    ///
97    /// 1. Presigned URLs (full querystring)
98    /// 2. AWS session tokens (longest values first, before secrets)
99    /// 3. AWS secret access keys
100    /// 4. Bearer tokens
101    /// 5. AWS access key IDs
102    /// 6. AWS account IDs (Full level only)
103    pub fn redact_text(&self, text: &str) -> String {
104        if self.level == RedactionLevel::None {
105            return text.to_owned();
106        }
107
108        let mut out = self
109            .re_presigned
110            .replace_all(text, "${1}<REDACTED_QUERY>")
111            .into_owned();
112
113        // Session tokens first (they can be >100 chars and contain the same
114        // character set as secrets, so match them before the shorter pattern).
115        out = self
116            .re_session
117            .replace_all(&out, "${1}=<REDACTED:AWS_SESSION>")
118            .into_owned();
119
120        out = self
121            .re_secret
122            .replace_all(&out, "${1}=<REDACTED:AWS_SECRET>")
123            .into_owned();
124
125        out = self
126            .re_bearer
127            .replace_all(&out, "Bearer <REDACTED:BEARER>")
128            .into_owned();
129
130        out = self
131            .re_aws_key_id
132            .replace_all(&out, "<REDACTED:AWS_KEY_ID>")
133            .into_owned();
134
135        if self.level == RedactionLevel::Full {
136            // Use a closure so we can skip 12-digit sequences that are part of
137            // a UUID (preceded or followed by `-` or a hex letter a-f/A-F),
138            // which would otherwise clobber `Internal::trace_id` values.
139            let captured = out.clone();
140            out = self
141                .re_account_id
142                .replace_all(&captured, |caps: &regex::Captures<'_>| {
143                    let m = caps.get(0).unwrap();
144                    let bytes = captured.as_bytes();
145                    let before = if m.start() > 0 {
146                        bytes[m.start() - 1]
147                    } else {
148                        b' '
149                    };
150                    let after = if m.end() < bytes.len() {
151                        bytes[m.end()]
152                    } else {
153                        b' '
154                    };
155                    // If the digit block is glued to a UUID separator or hex
156                    // letter, treat it as part of a UUID and leave it alone.
157                    let is_uuid_context = before == b'-'
158                        || after == b'-'
159                        || before.is_ascii_hexdigit() && !before.is_ascii_digit()
160                        || after.is_ascii_hexdigit() && !after.is_ascii_digit();
161                    if is_uuid_context {
162                        m.as_str().to_owned()
163                    } else {
164                        "<REDACTED:ACCOUNT_ID>".to_owned()
165                    }
166                })
167                .into_owned();
168        }
169
170        out
171    }
172
173    /// Replace `$HOME/` and the literal home-dir path prefix with `~/`.
174    ///
175    /// If the home directory cannot be determined at runtime, the input is
176    /// returned unchanged.
177    pub fn redact_path(&self, path: &str) -> String {
178        // Replace `$HOME` literal placeholder first.
179        let mut out = path.replace("$HOME", "~");
180
181        // Replace the real home-dir prefix if we detected it.
182        if let Some(ref home) = self.home_dir {
183            if out.starts_with(home.as_str()) {
184                // e.g. /Users/alice/foo -> ~/foo
185                out = format!("~{}", &out[home.len()..]);
186            }
187        }
188
189        out
190    }
191}
192
193impl Default for Redactor {
194    fn default() -> Self {
195        Self::new()
196    }
197}
198
199/// Return the current user's home directory as a string, without a trailing
200/// slash.  Returns `None` when the home directory cannot be determined.
201fn dirs_home() -> Option<String> {
202    // `HOME` on Unix, `USERPROFILE` on Windows. Probing both keeps us off the
203    // `dirs` crate while still working on every CI runner platform.
204    std::env::var("HOME")
205        .or_else(|_| std::env::var("USERPROFILE"))
206        .ok()
207        .map(|h| h.trim_end_matches(['/', '\\']).to_owned())
208}
209
210// ---------------------------------------------------------------------------
211// Tests
212// ---------------------------------------------------------------------------
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217    use std::path::PathBuf;
218
219    // -------------------------------------------------------------------------
220    // Fixture helpers
221    // -------------------------------------------------------------------------
222
223    /// Load a fixture file from `tests/fixtures/diagnostics/<name>`.
224    fn fixture(name: &str) -> String {
225        let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
226        p.push("tests/fixtures/diagnostics");
227        p.push(name);
228        std::fs::read_to_string(&p)
229            .unwrap_or_else(|e| panic!("failed to read fixture {}: {}", p.display(), e))
230            .trim_end_matches('\n')
231            .to_owned()
232    }
233
234    // -------------------------------------------------------------------------
235    // AWS access key ID
236    // -------------------------------------------------------------------------
237
238    #[test]
239    fn aws_key_id_positive() {
240        let r = Redactor::new();
241        let input = fixture("aws_key_id.positive.txt");
242        let got = r.redact_text(&input);
243        assert!(
244            got.contains("<REDACTED:AWS_KEY_ID>"),
245            "expected AWS key ID to be redacted; got: {got}"
246        );
247        assert!(
248            !got.contains("AKIAIOSFODNN7EXAMPLE"),
249            "raw key must not appear in output"
250        );
251    }
252
253    #[test]
254    fn aws_key_id_negative() {
255        let r = Redactor::new();
256        let input = fixture("aws_key_id.negative.txt");
257        let got = r.redact_text(&input);
258        assert_eq!(input, got, "non-matching input must pass through unchanged");
259    }
260
261    // -------------------------------------------------------------------------
262    // Secret access key
263    // -------------------------------------------------------------------------
264
265    #[test]
266    fn secret_positive() {
267        let r = Redactor::new();
268        let input = fixture("secret.positive.txt");
269        let got = r.redact_text(&input);
270        assert!(
271            got.contains("<REDACTED:AWS_SECRET>"),
272            "expected secret to be redacted; got: {got}"
273        );
274        assert!(
275            !got.contains("wJalrXUtnFEMI"),
276            "raw secret must not appear in output"
277        );
278    }
279
280    #[test]
281    fn secret_negative() {
282        let r = Redactor::new();
283        let input = fixture("secret.negative.txt");
284        let got = r.redact_text(&input);
285        assert_eq!(input, got, "short secret-like value must not be redacted");
286    }
287
288    // -------------------------------------------------------------------------
289    // Presigned URLs
290    // -------------------------------------------------------------------------
291
292    #[test]
293    fn presigned_positive() {
294        let r = Redactor::new();
295        let input = fixture("presigned.positive.txt");
296        let got = r.redact_text(&input);
297        assert!(
298            got.contains("<REDACTED_QUERY>"),
299            "expected presigned querystring to be redacted; got: {got}"
300        );
301        assert!(
302            !got.contains("X-Amz-Signature"),
303            "signature must not appear after redaction"
304        );
305    }
306
307    #[test]
308    fn presigned_negative() {
309        let r = Redactor::new();
310        let input = fixture("presigned.negative.txt");
311        let got = r.redact_text(&input);
312        assert_eq!(input, got, "normal URL must not be changed");
313    }
314
315    // -------------------------------------------------------------------------
316    // Account ID
317    // -------------------------------------------------------------------------
318
319    #[test]
320    fn account_id_full_level() {
321        let r = Redactor::new(); // Full
322        let input = fixture("account_id.positive.txt");
323        let got = r.redact_text(&input);
324        assert!(
325            got.contains("<REDACTED:ACCOUNT_ID>"),
326            "Full level must redact account IDs; got: {got}"
327        );
328    }
329
330    #[test]
331    fn account_id_partial_level_keeps_visible() {
332        let r = Redactor::with_level(RedactionLevel::Partial);
333        let input = fixture("account_id.positive.txt");
334        let got = r.redact_text(&input);
335        assert!(
336            !got.contains("<REDACTED:ACCOUNT_ID>"),
337            "Partial level must NOT redact account IDs; got: {got}"
338        );
339        assert!(
340            got.contains("123456789012"),
341            "account ID must remain visible in Partial mode"
342        );
343    }
344
345    #[test]
346    fn account_id_negative() {
347        let r = Redactor::new();
348        let input = fixture("account_id.negative.txt");
349        let got = r.redact_text(&input);
350        assert_eq!(
351            input, got,
352            "11-digit number must not be treated as account ID"
353        );
354    }
355
356    // -------------------------------------------------------------------------
357    // Home path
358    // -------------------------------------------------------------------------
359
360    #[test]
361    fn home_path_positive_dollar_home() {
362        let r = Redactor::new();
363        let input = "$HOME/projects/brows3r/data.log";
364        let got = r.redact_path(input);
365        assert_eq!("~/projects/brows3r/data.log", got);
366    }
367
368    #[test]
369    fn home_path_positive_literal() {
370        let r = Redactor::new();
371        let input = fixture("home_path.positive.txt");
372        // Resolve the home directory in a cross-platform way: $HOME on
373        // Unix, %USERPROFILE% on Windows. The previous std::env::var("HOME")
374        // returned an empty string on the CI Windows runner and the
375        // fixture-with-empty-prefix doesn't trip the redactor's path
376        // pattern, so the assertion below failed.
377        let home = std::env::var("HOME")
378            .or_else(|_| std::env::var("USERPROFILE"))
379            .unwrap_or_default();
380        assert!(
381            !home.is_empty(),
382            "test setup error: neither HOME nor USERPROFILE is set",
383        );
384        let input = input.replace("__HOME__", &home);
385        let got = r.redact_path(&input);
386        assert!(
387            got.starts_with("~/"),
388            "home prefix should be replaced with ~/; got: {got}"
389        );
390    }
391
392    #[test]
393    fn home_path_negative() {
394        let r = Redactor::new();
395        let input = fixture("home_path.negative.txt");
396        let got = r.redact_path(&input);
397        // Non-home-prefixed path should remain unchanged.
398        assert_eq!(input, got, "non-home path must pass through unchanged");
399    }
400
401    // -------------------------------------------------------------------------
402    // trace_id preservation
403    // -------------------------------------------------------------------------
404
405    #[test]
406    fn trace_id_is_not_redacted() {
407        let r = Redactor::new();
408        let input = fixture("trace_id.preserve.txt");
409        let got = r.redact_text(&input);
410        // The UUID trace_id must survive the redactor unchanged.
411        assert!(
412            got.contains("trace_id"),
413            "trace_id field must be preserved; got: {got}"
414        );
415        // Verify the UUID value itself is still present.
416        assert!(
417            got.contains("550e8400-e29b-41d4-a716-446655440000"),
418            "trace_id UUID must not be clobbered; got: {got}"
419        );
420    }
421
422    // -------------------------------------------------------------------------
423    // RedactionLevel::None
424    // -------------------------------------------------------------------------
425
426    #[test]
427    fn none_level_is_no_op() {
428        let r = Redactor::with_level(RedactionLevel::None);
429        let input = "AKIAIOSFODNN7EXAMPLE is a key and aws_secret_access_key=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY";
430        let got = r.redact_text(input);
431        assert_eq!(input, got, "None level must return input unchanged");
432    }
433
434    // -------------------------------------------------------------------------
435    // Bearer token
436    // -------------------------------------------------------------------------
437
438    #[test]
439    fn bearer_token_redacted() {
440        let r = Redactor::new();
441        let input = "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.payload.sig";
442        let got = r.redact_text(input);
443        assert!(
444            got.contains("Bearer <REDACTED:BEARER>"),
445            "Bearer token must be redacted; got: {got}"
446        );
447        assert!(
448            !got.contains("eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"),
449            "Raw bearer value must not appear"
450        );
451    }
452
453    // -------------------------------------------------------------------------
454    // proptest fuzzer for AWS key ID format
455    // -------------------------------------------------------------------------
456
457    #[cfg(test)]
458    mod proptests {
459        use super::*;
460        use proptest::prelude::*;
461
462        fn aws_key_id_strategy() -> impl Strategy<Value = String> {
463            let prefix = prop_oneof![
464                Just("AKIA".to_owned()),
465                Just("ASIA".to_owned()),
466                Just("AROA".to_owned()),
467            ];
468            let suffix = "[A-Z0-9]{16}";
469            (prefix, suffix).prop_map(|(p, s)| format!("{p}{s}"))
470        }
471
472        proptest! {
473            #[test]
474            fn valid_aws_key_ids_are_always_redacted(key in aws_key_id_strategy()) {
475                let r = Redactor::new();
476                let input = format!("Found {key} in the logs");
477                let got = r.redact_text(&input);
478                prop_assert!(
479                    got.contains("<REDACTED:AWS_KEY_ID>"),
480                    "key {key:?} was not redacted; output: {got:?}"
481                );
482                prop_assert!(
483                    !got.contains(&key),
484                    "raw key {key:?} still present in output: {got:?}"
485                );
486            }
487
488            /// Strings of similar length that do NOT start with AKIA/ASIA/AROA
489            /// must not be falsely redacted.
490            #[test]
491            fn non_matching_strings_pass_through(s in "[B-Z][A-Z0-9]{19}") {
492                let r = Redactor::new();
493                let input = format!("data: {s}");
494                let got = r.redact_text(&input);
495                prop_assert!(
496                    !got.contains("<REDACTED:AWS_KEY_ID>"),
497                    "false positive on {s:?}; output: {got:?}"
498                );
499            }
500        }
501    }
502}