Skip to main content

brows3r_lib/path/
encode.rs

1//! Encoding and decoding between `S3Location` and its various string forms.
2//!
3//! Three distinct output forms:
4//! - **Canonical URI** (`brows3r://<profile_id>/<bucket>/<key>`) — used for
5//!   Copy Path, navigation state, and any IPC surface that must be
6//!   unambiguous even when two profiles share a display name.
7//! - **Display path** — `DisplayPath` for the breadcrumb UI. Human-readable,
8//!   not URL-encoded.
9//! - **Clipboard string** (`s3://<bucket>/<key>`) — the aws-cli-compatible
10//!   form users paste into a terminal.
11//!
12//! # Percent-encoding rules
13//! The canonical URI encodes bucket names and the full key/prefix string using
14//! `percent-encoding`'s `NON_ALPHANUMERIC` set (encodes everything except
15//! `[A-Za-z0-9]`), then un-encodes `/` back to a literal `/` in the key
16//! segment only. This preserves the S3 hierarchy-separator semantics while
17//! ensuring that `?`, `#`, `%`, and other special characters are always
18//! escaped.
19//!
20//! Bucket names are encoded without slash restoration (S3 bucket names cannot
21//! contain `/`).
22
23use percent_encoding::{percent_decode_str, utf8_percent_encode, NON_ALPHANUMERIC};
24
25use crate::{
26    error::AppError,
27    ids::{BucketId, ObjectKey, ProfileId},
28};
29
30use super::{DisplayPath, S3Location};
31
32// ---------------------------------------------------------------------------
33// Canonical URI — brows3r://<profile_id>/<bucket>/<key>
34// ---------------------------------------------------------------------------
35
36/// Encode an `S3Location` into its canonical `brows3r://` URI.
37///
38/// The canonical form uses the stable `profile_id` (not the display name) so
39/// two profiles with identical display names produce different URIs (AC-2).
40///
41/// Key path separators (`/`) are preserved for readability; all other
42/// non-alphanumeric characters (including `?`, `#`, `%`, unicode) are
43/// percent-encoded.
44pub fn to_canonical_uri(loc: &S3Location) -> String {
45    // Bucket encoding — no slash restoration; bucket names never contain `/`.
46    let encoded_bucket = encode_no_slash(loc.bucket.as_str());
47
48    // Key or prefix encoding — slashes are preserved as hierarchy separators.
49    let key_str = loc
50        .key
51        .as_ref()
52        .map(|k| k.as_str())
53        .unwrap_or(loc.prefix.as_str());
54    let encoded_key = encode_preserve_slash(key_str);
55
56    format!(
57        "brows3r://{}/{}/{}",
58        loc.profile_id.as_str(),
59        encoded_bucket,
60        encoded_key
61    )
62}
63
64/// Parse a `brows3r://` URI back into an `S3Location`.
65///
66/// Returns `AppError::Validation` when the URI is malformed (wrong scheme,
67/// missing profile id, missing bucket, etc.). The resulting `S3Location` uses
68/// an empty `prefix` and sets `key` to `Some` with the decoded key/prefix
69/// string (callers that need a directory prefix can normalise accordingly).
70pub fn from_canonical_uri(uri: &str) -> Result<S3Location, AppError> {
71    // Scheme check.
72    let rest = uri
73        .strip_prefix("brows3r://")
74        .ok_or_else(|| AppError::Validation {
75            field: "uri".to_string(),
76            hint: "URI must begin with brows3r://".to_string(),
77        })?;
78
79    // Split into at most 3 parts: profile_id / bucket / key
80    let mut parts = rest.splitn(3, '/');
81
82    let profile_id_raw = parts.next().unwrap_or(""); // always Some from splitn
83    if profile_id_raw.is_empty() {
84        return Err(AppError::Validation {
85            field: "uri".to_string(),
86            hint: "profile_id must not be empty".to_string(),
87        });
88    }
89
90    let bucket_raw = parts.next().unwrap_or("");
91    if bucket_raw.is_empty() {
92        return Err(AppError::Validation {
93            field: "uri".to_string(),
94            hint: "bucket must not be empty".to_string(),
95        });
96    }
97
98    // Key may be empty (bucket root).
99    let key_raw = parts.next().unwrap_or("");
100
101    // Percent-decode each component.
102    let bucket_decoded = decode_component(bucket_raw).map_err(|_| AppError::Validation {
103        field: "uri".to_string(),
104        hint: "bucket segment contains invalid percent-encoding".to_string(),
105    })?;
106
107    let key_decoded = decode_component(key_raw).map_err(|_| AppError::Validation {
108        field: "uri".to_string(),
109        hint: "key segment contains invalid percent-encoding".to_string(),
110    })?;
111
112    let key = if key_decoded.is_empty() {
113        None
114    } else {
115        Some(ObjectKey::new(key_decoded))
116    };
117
118    Ok(S3Location {
119        profile_id: ProfileId::new(profile_id_raw),
120        bucket: BucketId::new(bucket_decoded),
121        prefix: String::new(),
122        key,
123    })
124}
125
126// ---------------------------------------------------------------------------
127// Display path — for the breadcrumb UI
128// ---------------------------------------------------------------------------
129
130/// Build a `DisplayPath` from an `S3Location`.
131///
132/// The profile display name is passed in as a `&str` because it lives in the
133/// profile store, not in `S3Location`. Segments are split on `/` and empty
134/// strings are filtered out so trailing slashes and double-slashes do not
135/// produce empty breadcrumb items.
136pub fn to_display_path(loc: &S3Location, profile_display_name: &str) -> DisplayPath {
137    let raw = loc
138        .key
139        .as_ref()
140        .map(|k| k.as_str())
141        .unwrap_or(loc.prefix.as_str());
142
143    let segments: Vec<String> = raw
144        .split('/')
145        .filter(|s| !s.is_empty())
146        .map(str::to_owned)
147        .collect();
148
149    DisplayPath {
150        profile_display_name: profile_display_name.to_owned(),
151        bucket: loc.bucket.as_str().to_owned(),
152        segments,
153    }
154}
155
156/// Reconstruct an `S3Location` from breadcrumb segments.
157///
158/// The resulting location has an empty `prefix` and the joined segments as the
159/// `key` (or `None` when `segments` is empty, indicating the bucket root).
160pub fn from_display_path(
161    profile_id: ProfileId,
162    bucket: BucketId,
163    segments: &[String],
164) -> S3Location {
165    let key = if segments.is_empty() {
166        None
167    } else {
168        Some(ObjectKey::new(segments.join("/")))
169    };
170
171    S3Location {
172        profile_id,
173        bucket,
174        prefix: String::new(),
175        key,
176    }
177}
178
179// ---------------------------------------------------------------------------
180// Clipboard string — s3://<bucket>/<key>
181// ---------------------------------------------------------------------------
182
183/// Produce the aws-cli-compatible `s3://` string for clipboard use.
184///
185/// Uses the raw (un-encoded) bucket name and key/prefix so users can paste
186/// directly into a terminal command (`aws s3 cp s3://bucket/key ./`).
187pub fn to_clipboard_string(loc: &S3Location, _profile_display_name: &str) -> String {
188    let key_str = loc
189        .key
190        .as_ref()
191        .map(|k| k.as_str())
192        .unwrap_or(loc.prefix.as_str());
193
194    if key_str.is_empty() {
195        format!("s3://{}/", loc.bucket.as_str())
196    } else {
197        format!("s3://{}/{}", loc.bucket.as_str(), key_str)
198    }
199}
200
201// ---------------------------------------------------------------------------
202// Internal helpers
203// ---------------------------------------------------------------------------
204
205/// Percent-encode a string, encoding ALL non-alphanumeric characters
206/// (including `/`). Used for bucket names.
207fn encode_no_slash(input: &str) -> String {
208    utf8_percent_encode(input, NON_ALPHANUMERIC).to_string()
209}
210
211/// Percent-encode a string, encoding all non-alphanumeric characters EXCEPT
212/// `/`. Used for key/prefix segments.
213fn encode_preserve_slash(input: &str) -> String {
214    // Encode the full string with NON_ALPHANUMERIC (which encodes `/` as %2F),
215    // then restore literal `/` by replacing `%2F` (case-insensitive match).
216    let encoded = utf8_percent_encode(input, NON_ALPHANUMERIC).to_string();
217    // Restore both lower-case (%2f) and upper-case (%2F) variants.
218    encoded.replace("%2F", "/").replace("%2f", "/")
219}
220
221/// Percent-decode a URI component, returning a UTF-8 string.
222fn decode_component(input: &str) -> Result<String, ()> {
223    percent_decode_str(input)
224        .decode_utf8()
225        .map(|s| s.into_owned())
226        .map_err(|_| ())
227}
228
229// ---------------------------------------------------------------------------
230// Tests
231// ---------------------------------------------------------------------------
232
233#[cfg(test)]
234mod tests {
235    use super::*;
236
237    fn make_loc(profile_id: &str, bucket: &str, prefix: &str, key: Option<&str>) -> S3Location {
238        S3Location {
239            profile_id: ProfileId::new(profile_id),
240            bucket: BucketId::new(bucket),
241            prefix: prefix.to_owned(),
242            key: key.map(ObjectKey::new),
243        }
244    }
245
246    // -----------------------------------------------------------------------
247    // AC-2: duplicate display names → different canonical URIs via profile_id
248    // -----------------------------------------------------------------------
249
250    #[test]
251    fn duplicate_display_names_produce_distinct_canonical_uris() {
252        // Two profiles with the same display name "prod" but different profile_ids.
253        let profile_id_a = "11111111-1111-1111-1111-111111111111";
254        let profile_id_b = "22222222-2222-2222-2222-222222222222";
255
256        let loc_a = make_loc(profile_id_a, "my-bucket", "", Some("data/file.csv"));
257        let loc_b = make_loc(profile_id_b, "my-bucket", "", Some("data/file.csv"));
258
259        let uri_a = to_canonical_uri(&loc_a);
260        let uri_b = to_canonical_uri(&loc_b);
261
262        assert_ne!(
263            uri_a, uri_b,
264            "duplicate display names must produce distinct URIs"
265        );
266        assert!(
267            uri_a.contains(profile_id_a),
268            "URI must embed the profile_id"
269        );
270        assert!(uri_b.contains(profile_id_b));
271    }
272
273    // -----------------------------------------------------------------------
274    // Unicode key round-trip
275    // -----------------------------------------------------------------------
276
277    #[test]
278    fn unicode_key_round_trips() {
279        let loc = make_loc("prof-1", "bucket", "", Some("café/menu.pdf"));
280
281        let uri = to_canonical_uri(&loc);
282        let restored = from_canonical_uri(&uri).expect("must parse");
283
284        assert_eq!(
285            restored.key.as_ref().map(|k| k.as_str()),
286            Some("café/menu.pdf"),
287            "unicode key must survive round-trip"
288        );
289        // Slashes must be preserved as literals in the URI.
290        assert!(
291            uri.contains('/'),
292            "URI must preserve path-separator slashes"
293        );
294    }
295
296    // -----------------------------------------------------------------------
297    // Special chars: ?, #, %, / (path sep preserved)
298    // -----------------------------------------------------------------------
299
300    #[test]
301    fn special_chars_round_trip() {
302        let key = "path/with?query#hash%percent/end";
303        let loc = make_loc("prof-1", "my-bucket", "", Some(key));
304
305        let uri = to_canonical_uri(&loc);
306
307        // ? # % must be encoded.
308        assert!(
309            !uri.contains('?'),
310            "? must be percent-encoded in canonical URI"
311        );
312        assert!(
313            !uri.contains('#'),
314            "# must be percent-encoded in canonical URI"
315        );
316
317        let restored = from_canonical_uri(&uri).expect("must parse");
318        assert_eq!(
319            restored.key.as_ref().map(|k| k.as_str()),
320            Some(key),
321            "special-char key must round-trip losslessly"
322        );
323    }
324
325    #[test]
326    fn slash_preserved_in_key_encoding() {
327        let loc = make_loc("prof-1", "bucket", "", Some("a/b/c.txt"));
328        let uri = to_canonical_uri(&loc);
329        // Slashes must be preserved as literal `/` path separators. The `.` in
330        // `c.txt` is encoded (%2E) by the strict NON_ALPHANUMERIC set, so the
331        // URI does not literally end with "a/b/c.txt", but round-tripping gives
332        // back the original key unchanged.
333        let restored = from_canonical_uri(&uri).expect("must parse");
334        assert_eq!(
335            restored.key.as_ref().map(|k| k.as_str()),
336            Some("a/b/c.txt"),
337            "key must round-trip losslessly"
338        );
339        // The URI must contain literal slashes (not %2F) between a, b, and the filename.
340        assert!(
341            uri.contains("a/b/"),
342            "path slashes must be preserved as literal '/': got {uri}"
343        );
344    }
345
346    // -----------------------------------------------------------------------
347    // to_clipboard_string
348    // -----------------------------------------------------------------------
349
350    #[test]
351    fn clipboard_string_formats_s3_uri() {
352        let loc = make_loc("prof-1", "my-bucket", "", Some("folder/file.txt"));
353        let clip = to_clipboard_string(&loc, "prod");
354        assert_eq!(clip, "s3://my-bucket/folder/file.txt");
355    }
356
357    #[test]
358    fn clipboard_string_bucket_root() {
359        let loc = make_loc("prof-1", "my-bucket", "", None);
360        let clip = to_clipboard_string(&loc, "prod");
361        assert_eq!(clip, "s3://my-bucket/");
362    }
363
364    // -----------------------------------------------------------------------
365    // from_canonical_uri — malformed inputs
366    // -----------------------------------------------------------------------
367
368    #[test]
369    fn rejects_wrong_scheme() {
370        let result = from_canonical_uri("https://example.com/bucket/key");
371        assert!(
372            matches!(result, Err(AppError::Validation { .. })),
373            "wrong scheme must return Validation error"
374        );
375    }
376
377    #[test]
378    fn rejects_empty_profile_id() {
379        // brows3r:// with empty profile_id (the string after // before first /)
380        let result = from_canonical_uri("brows3r:///bucket/key");
381        assert!(
382            matches!(result, Err(AppError::Validation { .. })),
383            "empty profile_id must be rejected"
384        );
385    }
386
387    #[test]
388    fn rejects_missing_bucket() {
389        // profile_id present but no bucket
390        let result = from_canonical_uri("brows3r://prof-1");
391        assert!(
392            matches!(result, Err(AppError::Validation { .. })),
393            "missing bucket must be rejected"
394        );
395    }
396
397    #[test]
398    fn rejects_empty_bucket() {
399        let result = from_canonical_uri("brows3r://prof-1//key");
400        assert!(
401            matches!(result, Err(AppError::Validation { .. })),
402            "empty bucket must be rejected"
403        );
404    }
405
406    // -----------------------------------------------------------------------
407    // Display path
408    // -----------------------------------------------------------------------
409
410    #[test]
411    fn to_display_path_splits_segments() {
412        let loc = make_loc("prof-1", "my-bucket", "", Some("folder/sub/file.txt"));
413        let dp = to_display_path(&loc, "production");
414        assert_eq!(dp.profile_display_name, "production");
415        assert_eq!(dp.bucket, "my-bucket");
416        assert_eq!(dp.segments, vec!["folder", "sub", "file.txt"]);
417    }
418
419    #[test]
420    fn to_display_path_bucket_root() {
421        let loc = make_loc("prof-1", "my-bucket", "", None);
422        let dp = to_display_path(&loc, "prod");
423        assert_eq!(dp.segments, Vec::<String>::new());
424    }
425
426    #[test]
427    fn from_display_path_joins_segments() {
428        let profile_id = ProfileId::new("prof-1");
429        let bucket = BucketId::new("my-bucket");
430        let segments = vec!["folder".to_owned(), "sub".to_owned(), "file.txt".to_owned()];
431        let loc = from_display_path(profile_id.clone(), bucket.clone(), &segments);
432
433        assert_eq!(loc.profile_id, profile_id);
434        assert_eq!(loc.bucket, bucket);
435        assert_eq!(
436            loc.key.as_ref().map(|k| k.as_str()),
437            Some("folder/sub/file.txt")
438        );
439    }
440
441    #[test]
442    fn from_display_path_empty_segments_is_bucket_root() {
443        let loc = from_display_path(ProfileId::new("p"), BucketId::new("b"), &[]);
444        assert!(loc.key.is_none());
445    }
446}