http/uri/
path.rs

1use std::convert::TryFrom;
2use std::str::FromStr;
3use std::{cmp, fmt, hash, str};
4
5use bytes::Bytes;
6
7use super::{ErrorKind, InvalidUri};
8use crate::byte_str::ByteStr;
9
10/// Represents the path component of a URI
11#[derive(Clone)]
12pub struct PathAndQuery {
13    pub(super) data: ByteStr,
14    pub(super) query: u16,
15}
16
17const NONE: u16 = u16::MAX;
18
19impl PathAndQuery {
20    // Not public while `bytes` is unstable.
21    pub(super) fn from_shared(mut src: Bytes) -> Result<Self, InvalidUri> {
22        let mut query = NONE;
23        let mut fragment = None;
24
25        let mut is_maybe_not_utf8 = false;
26
27        // block for iterator borrow
28        {
29            let mut iter = src.as_ref().iter().enumerate();
30
31            // path ...
32            for (i, &b) in &mut iter {
33                // See https://url.spec.whatwg.org/#path-state
34                match b {
35                    b'?' => {
36                        debug_assert_eq!(query, NONE);
37                        query = i as u16;
38                        break;
39                    }
40                    b'#' => {
41                        fragment = Some(i);
42                        break;
43                    }
44
45                    // This is the range of bytes that don't need to be
46                    // percent-encoded in the path. If it should have been
47                    // percent-encoded, then error.
48                    #[rustfmt::skip]
49                    0x21 |
50                    0x24..=0x3B |
51                    0x3D |
52                    0x40..=0x5F |
53                    0x61..=0x7A |
54                    0x7C |
55                    0x7E => {}
56
57                    // potentially utf8, might not, should check
58                    0x7F..=0xFF => {
59                        is_maybe_not_utf8 = true;
60                    }
61
62                    // These are code points that are supposed to be
63                    // percent-encoded in the path but there are clients
64                    // out there sending them as is and httparse accepts
65                    // to parse those requests, so they are allowed here
66                    // for parity.
67                    //
68                    // For reference, those are code points that are used
69                    // to send requests with JSON directly embedded in
70                    // the URI path. Yes, those things happen for real.
71                    #[rustfmt::skip]
72                    b'"' |
73                    b'{' | b'}' => {}
74
75                    _ => return Err(ErrorKind::InvalidUriChar.into()),
76                }
77            }
78
79            // query ...
80            if query != NONE {
81                for (i, &b) in iter {
82                    match b {
83                        // While queries *should* be percent-encoded, most
84                        // bytes are actually allowed...
85                        // See https://url.spec.whatwg.org/#query-state
86                        //
87                        // Allowed: 0x21 / 0x24 - 0x3B / 0x3D / 0x3F - 0x7E
88                        #[rustfmt::skip]
89                        0x21 |
90                        0x24..=0x3B |
91                        0x3D |
92                        0x3F..=0x7E => {}
93
94                        0x7F..=0xFF => {
95                            is_maybe_not_utf8 = true;
96                        }
97
98                        b'#' => {
99                            fragment = Some(i);
100                            break;
101                        }
102
103                        _ => return Err(ErrorKind::InvalidUriChar.into()),
104                    }
105                }
106            }
107        }
108
109        if let Some(i) = fragment {
110            src.truncate(i);
111        }
112
113        let data = if is_maybe_not_utf8 {
114            ByteStr::from_utf8(src).map_err(|_| ErrorKind::InvalidUriChar)?
115        } else {
116            unsafe { ByteStr::from_utf8_unchecked(src) }
117        };
118
119        Ok(PathAndQuery { data, query })
120    }
121
122    /// Convert a `PathAndQuery` from a static string.
123    ///
124    /// This function will not perform any copying, however the string is
125    /// checked to ensure that it is valid.
126    ///
127    /// # Panics
128    ///
129    /// This function panics if the argument is an invalid path and query.
130    ///
131    /// # Examples
132    ///
133    /// ```
134    /// # use http::uri::*;
135    /// let v = PathAndQuery::from_static("/hello?world");
136    ///
137    /// assert_eq!(v.path(), "/hello");
138    /// assert_eq!(v.query(), Some("world"));
139    /// ```
140    #[inline]
141    pub fn from_static(src: &'static str) -> Self {
142        let src = Bytes::from_static(src.as_bytes());
143
144        PathAndQuery::from_shared(src).unwrap()
145    }
146
147    /// Attempt to convert a `Bytes` buffer to a `PathAndQuery`.
148    ///
149    /// This will try to prevent a copy if the type passed is the type used
150    /// internally, and will copy the data if it is not.
151    pub fn from_maybe_shared<T>(src: T) -> Result<Self, InvalidUri>
152    where
153        T: AsRef<[u8]> + 'static,
154    {
155        if_downcast_into!(T, Bytes, src, {
156            return PathAndQuery::from_shared(src);
157        });
158
159        PathAndQuery::try_from(src.as_ref())
160    }
161
162    pub(super) fn empty() -> Self {
163        PathAndQuery {
164            data: ByteStr::new(),
165            query: NONE,
166        }
167    }
168
169    pub(super) fn slash() -> Self {
170        PathAndQuery {
171            data: ByteStr::from_static("/"),
172            query: NONE,
173        }
174    }
175
176    pub(super) fn star() -> Self {
177        PathAndQuery {
178            data: ByteStr::from_static("*"),
179            query: NONE,
180        }
181    }
182
183    /// Returns the path component
184    ///
185    /// The path component is **case sensitive**.
186    ///
187    /// ```notrust
188    /// abc://username:password@example.com:123/path/data?key=value&key2=value2#fragid1
189    ///                                        |--------|
190    ///                                             |
191    ///                                           path
192    /// ```
193    ///
194    /// If the URI is `*` then the path component is equal to `*`.
195    ///
196    /// # Examples
197    ///
198    /// ```
199    /// # use http::uri::*;
200    ///
201    /// let path_and_query: PathAndQuery = "/hello/world".parse().unwrap();
202    ///
203    /// assert_eq!(path_and_query.path(), "/hello/world");
204    /// ```
205    #[inline]
206    pub fn path(&self) -> &str {
207        let ret = if self.query == NONE {
208            &self.data[..]
209        } else {
210            &self.data[..self.query as usize]
211        };
212
213        if ret.is_empty() {
214            return "/";
215        }
216
217        ret
218    }
219
220    /// Returns the query string component
221    ///
222    /// The query component contains non-hierarchical data that, along with data
223    /// in the path component, serves to identify a resource within the scope of
224    /// the URI's scheme and naming authority (if any). The query component is
225    /// indicated by the first question mark ("?") character and terminated by a
226    /// number sign ("#") character or by the end of the URI.
227    ///
228    /// ```notrust
229    /// abc://username:password@example.com:123/path/data?key=value&key2=value2#fragid1
230    ///                                                   |-------------------|
231    ///                                                             |
232    ///                                                           query
233    /// ```
234    ///
235    /// # Examples
236    ///
237    /// With a query string component
238    ///
239    /// ```
240    /// # use http::uri::*;
241    /// let path_and_query: PathAndQuery = "/hello/world?key=value&foo=bar".parse().unwrap();
242    ///
243    /// assert_eq!(path_and_query.query(), Some("key=value&foo=bar"));
244    /// ```
245    ///
246    /// Without a query string component
247    ///
248    /// ```
249    /// # use http::uri::*;
250    /// let path_and_query: PathAndQuery = "/hello/world".parse().unwrap();
251    ///
252    /// assert!(path_and_query.query().is_none());
253    /// ```
254    #[inline]
255    pub fn query(&self) -> Option<&str> {
256        if self.query == NONE {
257            None
258        } else {
259            let i = self.query + 1;
260            Some(&self.data[i as usize..])
261        }
262    }
263
264    /// Returns the path and query as a string component.
265    ///
266    /// # Examples
267    ///
268    /// With a query string component
269    ///
270    /// ```
271    /// # use http::uri::*;
272    /// let path_and_query: PathAndQuery = "/hello/world?key=value&foo=bar".parse().unwrap();
273    ///
274    /// assert_eq!(path_and_query.as_str(), "/hello/world?key=value&foo=bar");
275    /// ```
276    ///
277    /// Without a query string component
278    ///
279    /// ```
280    /// # use http::uri::*;
281    /// let path_and_query: PathAndQuery = "/hello/world".parse().unwrap();
282    ///
283    /// assert_eq!(path_and_query.as_str(), "/hello/world");
284    /// ```
285    #[inline]
286    pub fn as_str(&self) -> &str {
287        let ret = &self.data[..];
288        if ret.is_empty() {
289            return "/";
290        }
291        ret
292    }
293}
294
295impl<'a> TryFrom<&'a [u8]> for PathAndQuery {
296    type Error = InvalidUri;
297    #[inline]
298    fn try_from(s: &'a [u8]) -> Result<Self, Self::Error> {
299        PathAndQuery::from_shared(Bytes::copy_from_slice(s))
300    }
301}
302
303impl<'a> TryFrom<&'a str> for PathAndQuery {
304    type Error = InvalidUri;
305    #[inline]
306    fn try_from(s: &'a str) -> Result<Self, Self::Error> {
307        TryFrom::try_from(s.as_bytes())
308    }
309}
310
311impl TryFrom<Vec<u8>> for PathAndQuery {
312    type Error = InvalidUri;
313    #[inline]
314    fn try_from(vec: Vec<u8>) -> Result<Self, Self::Error> {
315        PathAndQuery::from_shared(vec.into())
316    }
317}
318
319impl TryFrom<String> for PathAndQuery {
320    type Error = InvalidUri;
321    #[inline]
322    fn try_from(s: String) -> Result<Self, Self::Error> {
323        PathAndQuery::from_shared(s.into())
324    }
325}
326
327impl TryFrom<&String> for PathAndQuery {
328    type Error = InvalidUri;
329    #[inline]
330    fn try_from(s: &String) -> Result<Self, Self::Error> {
331        TryFrom::try_from(s.as_bytes())
332    }
333}
334
335impl FromStr for PathAndQuery {
336    type Err = InvalidUri;
337    #[inline]
338    fn from_str(s: &str) -> Result<Self, InvalidUri> {
339        TryFrom::try_from(s)
340    }
341}
342
343impl fmt::Debug for PathAndQuery {
344    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
345        fmt::Display::fmt(self, f)
346    }
347}
348
349impl fmt::Display for PathAndQuery {
350    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
351        if !self.data.is_empty() {
352            match self.data.as_bytes()[0] {
353                b'/' | b'*' => write!(fmt, "{}", &self.data[..]),
354                _ => write!(fmt, "/{}", &self.data[..]),
355            }
356        } else {
357            write!(fmt, "/")
358        }
359    }
360}
361
362impl hash::Hash for PathAndQuery {
363    fn hash<H: hash::Hasher>(&self, state: &mut H) {
364        self.data.hash(state);
365    }
366}
367
368// ===== PartialEq / PartialOrd =====
369
370impl PartialEq for PathAndQuery {
371    #[inline]
372    fn eq(&self, other: &PathAndQuery) -> bool {
373        self.data == other.data
374    }
375}
376
377impl Eq for PathAndQuery {}
378
379impl PartialEq<str> for PathAndQuery {
380    #[inline]
381    fn eq(&self, other: &str) -> bool {
382        self.as_str() == other
383    }
384}
385
386impl<'a> PartialEq<PathAndQuery> for &'a str {
387    #[inline]
388    fn eq(&self, other: &PathAndQuery) -> bool {
389        self == &other.as_str()
390    }
391}
392
393impl<'a> PartialEq<&'a str> for PathAndQuery {
394    #[inline]
395    fn eq(&self, other: &&'a str) -> bool {
396        self.as_str() == *other
397    }
398}
399
400impl PartialEq<PathAndQuery> for str {
401    #[inline]
402    fn eq(&self, other: &PathAndQuery) -> bool {
403        self == other.as_str()
404    }
405}
406
407impl PartialEq<String> for PathAndQuery {
408    #[inline]
409    fn eq(&self, other: &String) -> bool {
410        self.as_str() == other.as_str()
411    }
412}
413
414impl PartialEq<PathAndQuery> for String {
415    #[inline]
416    fn eq(&self, other: &PathAndQuery) -> bool {
417        self.as_str() == other.as_str()
418    }
419}
420
421impl PartialOrd for PathAndQuery {
422    #[inline]
423    fn partial_cmp(&self, other: &PathAndQuery) -> Option<cmp::Ordering> {
424        self.as_str().partial_cmp(other.as_str())
425    }
426}
427
428impl PartialOrd<str> for PathAndQuery {
429    #[inline]
430    fn partial_cmp(&self, other: &str) -> Option<cmp::Ordering> {
431        self.as_str().partial_cmp(other)
432    }
433}
434
435impl PartialOrd<PathAndQuery> for str {
436    #[inline]
437    fn partial_cmp(&self, other: &PathAndQuery) -> Option<cmp::Ordering> {
438        self.partial_cmp(other.as_str())
439    }
440}
441
442impl<'a> PartialOrd<&'a str> for PathAndQuery {
443    #[inline]
444    fn partial_cmp(&self, other: &&'a str) -> Option<cmp::Ordering> {
445        self.as_str().partial_cmp(*other)
446    }
447}
448
449impl<'a> PartialOrd<PathAndQuery> for &'a str {
450    #[inline]
451    fn partial_cmp(&self, other: &PathAndQuery) -> Option<cmp::Ordering> {
452        self.partial_cmp(&other.as_str())
453    }
454}
455
456impl PartialOrd<String> for PathAndQuery {
457    #[inline]
458    fn partial_cmp(&self, other: &String) -> Option<cmp::Ordering> {
459        self.as_str().partial_cmp(other.as_str())
460    }
461}
462
463impl PartialOrd<PathAndQuery> for String {
464    #[inline]
465    fn partial_cmp(&self, other: &PathAndQuery) -> Option<cmp::Ordering> {
466        self.as_str().partial_cmp(other.as_str())
467    }
468}
469
470#[cfg(test)]
471mod tests {
472    use super::*;
473
474    #[test]
475    fn equal_to_self_of_same_path() {
476        let p1: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
477        let p2: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
478        assert_eq!(p1, p2);
479        assert_eq!(p2, p1);
480    }
481
482    #[test]
483    fn not_equal_to_self_of_different_path() {
484        let p1: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
485        let p2: PathAndQuery = "/world&foo=bar".parse().unwrap();
486        assert_ne!(p1, p2);
487        assert_ne!(p2, p1);
488    }
489
490    #[test]
491    fn equates_with_a_str() {
492        let path_and_query: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
493        assert_eq!(&path_and_query, "/hello/world&foo=bar");
494        assert_eq!("/hello/world&foo=bar", &path_and_query);
495        assert_eq!(path_and_query, "/hello/world&foo=bar");
496        assert_eq!("/hello/world&foo=bar", path_and_query);
497    }
498
499    #[test]
500    fn not_equal_with_a_str_of_a_different_path() {
501        let path_and_query: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
502        // as a reference
503        assert_ne!(&path_and_query, "/hello&foo=bar");
504        assert_ne!("/hello&foo=bar", &path_and_query);
505        // without reference
506        assert_ne!(path_and_query, "/hello&foo=bar");
507        assert_ne!("/hello&foo=bar", path_and_query);
508    }
509
510    #[test]
511    fn equates_with_a_string() {
512        let path_and_query: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
513        assert_eq!(path_and_query, "/hello/world&foo=bar".to_string());
514        assert_eq!("/hello/world&foo=bar".to_string(), path_and_query);
515    }
516
517    #[test]
518    fn not_equal_with_a_string_of_a_different_path() {
519        let path_and_query: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
520        assert_ne!(path_and_query, "/hello&foo=bar".to_string());
521        assert_ne!("/hello&foo=bar".to_string(), path_and_query);
522    }
523
524    #[test]
525    fn compares_to_self() {
526        let p1: PathAndQuery = "/a/world&foo=bar".parse().unwrap();
527        let p2: PathAndQuery = "/b/world&foo=bar".parse().unwrap();
528        assert!(p1 < p2);
529        assert!(p2 > p1);
530    }
531
532    #[test]
533    fn compares_with_a_str() {
534        let path_and_query: PathAndQuery = "/b/world&foo=bar".parse().unwrap();
535        // by ref
536        assert!(&path_and_query < "/c/world&foo=bar");
537        assert!("/c/world&foo=bar" > &path_and_query);
538        assert!(&path_and_query > "/a/world&foo=bar");
539        assert!("/a/world&foo=bar" < &path_and_query);
540
541        // by val
542        assert!(path_and_query < "/c/world&foo=bar");
543        assert!("/c/world&foo=bar" > path_and_query);
544        assert!(path_and_query > "/a/world&foo=bar");
545        assert!("/a/world&foo=bar" < path_and_query);
546    }
547
548    #[test]
549    fn compares_with_a_string() {
550        let path_and_query: PathAndQuery = "/b/world&foo=bar".parse().unwrap();
551        assert!(path_and_query < "/c/world&foo=bar".to_string());
552        assert!("/c/world&foo=bar".to_string() > path_and_query);
553        assert!(path_and_query > "/a/world&foo=bar".to_string());
554        assert!("/a/world&foo=bar".to_string() < path_and_query);
555    }
556
557    #[test]
558    fn ignores_valid_percent_encodings() {
559        assert_eq!("/a%20b", pq("/a%20b?r=1").path());
560        assert_eq!("qr=%31", pq("/a/b?qr=%31").query().unwrap());
561    }
562
563    #[test]
564    fn ignores_invalid_percent_encodings() {
565        assert_eq!("/a%%b", pq("/a%%b?r=1").path());
566        assert_eq!("/aaa%", pq("/aaa%").path());
567        assert_eq!("/aaa%", pq("/aaa%?r=1").path());
568        assert_eq!("/aa%2", pq("/aa%2").path());
569        assert_eq!("/aa%2", pq("/aa%2?r=1").path());
570        assert_eq!("qr=%3", pq("/a/b?qr=%3").query().unwrap());
571    }
572
573    #[test]
574    fn allow_utf8_in_path() {
575        assert_eq!("/🍕", pq("/🍕").path());
576    }
577
578    #[test]
579    fn allow_utf8_in_query() {
580        assert_eq!(Some("pizza=🍕"), pq("/test?pizza=🍕").query());
581    }
582
583    #[test]
584    fn rejects_invalid_utf8_in_path() {
585        PathAndQuery::try_from(&[b'/', 0xFF][..]).expect_err("reject invalid utf8");
586    }
587
588    #[test]
589    fn rejects_invalid_utf8_in_query() {
590        PathAndQuery::try_from(&[b'/', b'a', b'?', 0xFF][..]).expect_err("reject invalid utf8");
591    }
592
593    #[test]
594    fn json_is_fine() {
595        assert_eq!(
596            r#"/{"bread":"baguette"}"#,
597            pq(r#"/{"bread":"baguette"}"#).path()
598        );
599    }
600
601    fn pq(s: &str) -> PathAndQuery {
602        s.parse().expect(&format!("parsing {}", s))
603    }
604}