combine/parser/
byte.rs

1//! Module containing parsers specialized on byte streams.
2
3use crate::{
4    error::{self, ParseResult::*},
5    parser::{
6        combinator::no_partial,
7        range::{take_fn, TakeRange},
8        repeat::skip_many,
9        token::{satisfy, token, tokens_cmp, Token},
10    },
11    stream::{RangeStream, Stream},
12    Parser,
13};
14
15/// Parses a byte and succeeds if the byte is equal to `c`.
16///
17/// ```
18/// use combine::Parser;
19/// use combine::parser::byte::byte;
20/// assert_eq!(byte(b'!').parse(&b"!"[..]), Ok((b'!', &b""[..])));
21/// assert!(byte(b'A').parse(&b""[..]).is_err());
22/// assert!(byte(b'A').parse(&b"!"[..]).is_err());
23/// ```
24pub fn byte<Input>(c: u8) -> Token<Input>
25where
26    Input: Stream<Token = u8>,
27{
28    token(c)
29}
30
31macro_rules! byte_parser {
32    ($name:ident, $ty:ident, $f: ident) => {{
33        satisfy(|c: u8| c.$f())
34            .expected(stringify!($name))
35    }};
36    ($name:ident, $ty:ident, $f: ident $($args:tt)+) => {{
37        satisfy(|c: u8| c.$f $($args)+)
38            .expected(stringify!($name))
39    }};
40}
41
42/// Parses a base-10 digit (0–9).
43///
44/// ```
45/// use combine::Parser;
46/// use combine::parser::byte::digit;
47/// assert_eq!(digit().parse(&b"9"[..]), Ok((b'9', &b""[..])));
48/// assert!(digit().parse(&b"A"[..]).is_err());
49/// ```
50pub fn digit<Input>() -> impl Parser<Input, Output = u8, PartialState = ()>
51where
52    Input: Stream<Token = u8>,
53{
54    byte_parser!(digit, Digit, is_ascii_digit())
55}
56
57/// Parses a `b' '`, `b'\t'`, `b'\n'` or `'b\'r'`.
58///
59/// ```
60/// use combine::Parser;
61/// use combine::parser::byte::space;
62/// assert_eq!(space().parse(&b" "[..]), Ok((b' ', &b""[..])));
63/// assert_eq!(space().parse(&b"  "[..]), Ok((b' ', &b" "[..])));
64/// assert!(space().parse(&b"!"[..]).is_err());
65/// assert!(space().parse(&b""[..]).is_err());
66/// ```
67pub fn space<Input>() -> impl Parser<Input, Output = u8, PartialState = ()>
68where
69    Input: Stream<Token = u8>,
70{
71    byte_parser!(space, Space, is_ascii_whitespace)
72}
73
74/// Skips over [`space`] zero or more times
75///
76/// [`space`]: fn.space.html
77///
78/// ```
79/// use combine::Parser;
80/// use combine::parser::byte::spaces;
81/// assert_eq!(spaces().parse(&b""[..]), Ok(((), &b""[..])));
82/// assert_eq!(spaces().parse(&b"   "[..]), Ok(((), &b""[..])));
83/// ```
84pub fn spaces<Input>() -> impl Parser<Input, Output = ()>
85where
86    Input: Stream<Token = u8>,
87{
88    skip_many(space()).expected("whitespaces")
89}
90
91/// Parses a newline byte (`b'\n'`).
92///
93/// ```
94/// use combine::Parser;
95/// use combine::parser::byte::newline;
96/// assert_eq!(newline().parse(&b"\n"[..]), Ok((b'\n', &b""[..])));
97/// assert!(newline().parse(&b"\r"[..]).is_err());
98/// ```
99pub fn newline<Input>() -> impl Parser<Input, Output = u8, PartialState = ()>
100where
101    Input: Stream<Token = u8>,
102{
103    satisfy(|ch: u8| ch == b'\n').expected("lf newline")
104}
105
106/// Parses carriage return and newline (`&b"\r\n"`), returning the newline byte.
107///
108/// ```
109/// use combine::Parser;
110/// use combine::parser::byte::crlf;
111/// assert_eq!(crlf().parse(&b"\r\n"[..]), Ok((b'\n', &b""[..])));
112/// assert!(crlf().parse(&b"\r"[..]).is_err());
113/// assert!(crlf().parse(&b"\n"[..]).is_err());
114/// ```
115pub fn crlf<Input>() -> impl Parser<Input, Output = u8, PartialState = ()>
116where
117    Input: Stream<Token = u8>,
118{
119    no_partial(satisfy(|ch: u8| ch == b'\r').with(newline())).expected("crlf newline")
120}
121
122/// Parses a tab byte (`b'\t'`).
123///
124/// ```
125/// use combine::Parser;
126/// use combine::parser::byte::tab;
127/// assert_eq!(tab().parse(&b"\t"[..]), Ok((b'\t', &b""[..])));
128/// assert!(tab().parse(&b" "[..]).is_err());
129/// ```
130pub fn tab<Input>() -> impl Parser<Input, Output = u8, PartialState = ()>
131where
132    Input: Stream<Token = u8>,
133{
134    satisfy(|ch| ch == b'\t').expected("tab")
135}
136
137/// Parses an uppercase ASCII letter (A–Z).
138///
139/// ```
140/// use combine::Parser;
141/// use combine::parser::byte::upper;
142/// assert_eq!(upper().parse(&b"A"[..]), Ok((b'A', &b""[..])));
143/// assert!(upper().parse(&b"a"[..]).is_err());
144/// ```
145pub fn upper<Input>() -> impl Parser<Input, Output = u8, PartialState = ()>
146where
147    Input: Stream<Token = u8>,
148{
149    byte_parser!(upper, Upper, is_ascii_uppercase)
150}
151
152/// Parses an lowercase ASCII letter (a–z).
153///
154/// ```
155/// use combine::Parser;
156/// use combine::parser::byte::lower;
157/// assert_eq!(lower().parse(&b"a"[..]), Ok((b'a', &b""[..])));
158/// assert!(lower().parse(&b"A"[..]).is_err());
159/// ```
160pub fn lower<Input>() -> impl Parser<Input, Output = u8, PartialState = ()>
161where
162    Input: Stream<Token = u8>,
163{
164    byte_parser!(lower, Lower, is_ascii_lowercase)
165}
166
167/// Parses either an ASCII alphabet letter or digit (a–z, A–Z, 0–9).
168///
169/// ```
170/// use combine::Parser;
171/// use combine::parser::byte::alpha_num;
172/// assert_eq!(alpha_num().parse(&b"A"[..]), Ok((b'A', &b""[..])));
173/// assert_eq!(alpha_num().parse(&b"1"[..]), Ok((b'1', &b""[..])));
174/// assert!(alpha_num().parse(&b"!"[..]).is_err());
175/// ```
176pub fn alpha_num<Input>() -> impl Parser<Input, Output = u8, PartialState = ()>
177where
178    Input: Stream<Token = u8>,
179{
180    byte_parser!(alpha_num, AlphaNum, is_ascii_alphanumeric)
181}
182
183/// Parses an ASCII alphabet letter (a–z, A–Z).
184///
185/// ```
186/// use combine::Parser;
187/// use combine::parser::byte::letter;
188/// assert_eq!(letter().parse(&b"a"[..]), Ok((b'a', &b""[..])));
189/// assert_eq!(letter().parse(&b"A"[..]), Ok((b'A', &b""[..])));
190/// assert!(letter().parse(&b"9"[..]).is_err());
191/// ```
192pub fn letter<Input>() -> impl Parser<Input, Output = u8, PartialState = ()>
193where
194    Input: Stream<Token = u8>,
195{
196    byte_parser!(letter, Letter, is_ascii_alphabetic)
197}
198
199/// Parses an octal digit.
200///
201/// ```
202/// use combine::Parser;
203/// use combine::parser::byte::oct_digit;
204/// assert_eq!(oct_digit().parse(&b"7"[..]), Ok((b'7', &b""[..])));
205/// assert!(oct_digit().parse(&b"8"[..]).is_err());
206/// ```
207pub fn oct_digit<Input>() -> impl Parser<Input, Output = u8, PartialState = ()>
208where
209    Input: Stream<Token = u8>,
210{
211    satisfy(|ch| (b'0'..=b'7').contains(&ch)).expected("octal digit")
212}
213
214/// Parses an ASCII hexdecimal digit (accepts both uppercase and lowercase).
215///
216/// ```
217/// use combine::Parser;
218/// use combine::parser::byte::hex_digit;
219/// assert_eq!(hex_digit().parse(&b"F"[..]), Ok((b'F', &b""[..])));
220/// assert!(hex_digit().parse(&b"H"[..]).is_err());
221/// ```
222pub fn hex_digit<Input>() -> impl Parser<Input, Output = u8, PartialState = ()>
223where
224    Input: Stream<Token = u8>,
225{
226    byte_parser!(hex_digit, HexDigit, is_ascii_hexdigit())
227}
228
229parser! {
230/// Parses the bytes `s`.
231///
232/// If you have a stream implementing [`RangeStream`] such as `&[u8]` you can also use the
233/// [`range`] parser which may be more efficient.
234///
235/// ```
236/// # extern crate combine;
237/// # use combine::*;
238/// # use combine::parser::byte::bytes;
239/// # fn main() {
240/// let result = bytes(&b"rust"[..])
241///     .parse(&b"rust"[..])
242///     .map(|x| x.0);
243/// assert_eq!(result, Ok(&b"rust"[..]));
244/// # }
245/// ```
246///
247/// [`RangeStream`]: super::super::stream::RangeStream
248/// [`range`]: super::range::range
249pub fn bytes['a, 'b, Input](s: &'static [u8])(Input) -> &'a [u8]
250where [
251    Input: Stream<Token = u8, Range = &'b [u8]>,
252]
253{
254    bytes_cmp(s, |l: u8, r: u8| l == r)
255}
256}
257
258parser! {
259/// Parses the bytes `s` using `cmp` to compare each token.
260///
261/// If you have a stream implementing [`RangeStream`] such as `&[u8]` you can also use the
262/// [`range`] parser which may be more efficient.
263///
264/// ```
265/// # extern crate combine;
266/// # use combine::*;
267/// # use combine::parser::byte::bytes_cmp;
268/// # use combine::stream::easy::Info;
269/// # fn main() {
270/// let result = bytes_cmp(&b"abc"[..], |l, r| l.eq_ignore_ascii_case(&r))
271///     .parse(&b"AbC"[..]);
272/// assert_eq!(result, Ok((&b"abc"[..], &b""[..])));
273/// # }
274/// ```
275///
276/// [`RangeStream`]: super::super::stream::RangeStream
277/// [`range`]: super::range::range
278pub fn bytes_cmp['a, 'b, C, Input](s: &'static [u8], cmp: C)(Input) -> &'a [u8]
279where [
280    C: FnMut(u8, u8) -> bool,
281    Input: Stream<Token = u8, Range = &'b [u8]>,
282]
283{
284    let s = *s;
285    tokens_cmp(s.iter().cloned(), cmp)
286        .map(move |_| s)
287        .expected(error::Range(s))
288}
289}
290
291macro_rules! take_until {
292    (
293        $(#[$attr:meta])*
294        $type_name: ident, $func_name: ident, $memchr: ident, $($param: ident),+
295    ) => {
296        parser!{
297            #[derive(Clone)]
298            pub struct $type_name;
299            type PartialState = usize;
300            $(#[$attr])*
301            pub fn $func_name[Input]($($param : u8),*)(Input) -> Input::Range
302                where [
303                    Input: RangeStream,
304                    Input::Range: AsRef<[u8]> + crate::stream::Range,
305                ]
306            {
307                take_fn(move |haystack: Input::Range| {
308                    let haystack = haystack.as_ref();
309                    match ::memchr::$memchr( $(*$param),+ , haystack) {
310                        Some(i) => TakeRange::Found(i),
311                        None => TakeRange::NotFound(haystack.len()),
312                    }
313                })
314            }
315        }
316    }
317}
318
319take_until! {
320    /// Zero-copy parser which reads a range of 0 or more tokens until `a` is found.
321    ///
322    /// If `a` is not found, the parser will return an error.
323    ///
324    /// ```
325    /// # extern crate combine;
326    /// # use combine::parser::byte::take_until_byte;
327    /// # use combine::*;
328    /// # fn main() {
329    /// let mut parser = take_until_byte(b'\r');
330    /// let result = parser.parse("To: user@example.com\r\n");
331    /// assert_eq!(result, Ok(("To: user@example.com", "\r\n")));
332    /// let result = parser.parse("Hello, world\n");
333    /// assert!(result.is_err());
334    /// # }
335    /// ```
336    TakeUntilByte, take_until_byte, memchr, a
337}
338take_until! {
339    /// Zero-copy parser which reads a range of 0 or more tokens until `a` or `b` is found.
340    ///
341    /// If `a` or `b` is not found, the parser will return an error.
342    ///
343    /// ```
344    /// # extern crate combine;
345    /// # use combine::parser::byte::take_until_byte2;
346    /// # use combine::*;
347    /// # fn main() {
348    /// let mut parser = take_until_byte2(b'\r', b'\n');
349    /// let result = parser.parse("To: user@example.com\r\n");
350    /// assert_eq!(result, Ok(("To: user@example.com", "\r\n")));
351    /// let result = parser.parse("Hello, world\n");
352    /// assert_eq!(result, Ok(("Hello, world", "\n")));
353    /// # }
354    /// ```
355    TakeUntilByte2, take_until_byte2, memchr2, a, b
356}
357take_until! {
358    /// Zero-copy parser which reads a range of 0 or more tokens until `a`, 'b' or `c` is found.
359    ///
360    /// If `a`, 'b' or `c` is not found, the parser will return an error.
361    ///
362    /// ```
363    /// # extern crate combine;
364    /// # use combine::parser::byte::take_until_byte3;
365    /// # use combine::*;
366    /// # fn main() {
367    /// let mut parser = take_until_byte3(b'\r', b'\n', b' ');
368    /// let result = parser.parse("To: user@example.com\r\n");
369    /// assert_eq!(result, Ok(("To:", " user@example.com\r\n")));
370    /// let result = parser.parse("Helloworld");
371    /// assert!(result.is_err());
372    /// # }
373    /// ```
374    TakeUntilByte3, take_until_byte3, memchr3, a, b, c
375}
376
377parser! {
378type PartialState = usize;
379/// Zero-copy parser which reads a range of 0 or more tokens until `needle` is found.
380///
381/// If `a`, 'b' or `c` is not found, the parser will return an error.
382///
383/// Optimized variant of [`take_until_range`](../range/fn.take_until_range.html)
384///
385/// ```
386/// use combine::*;
387/// use combine::parser::byte::take_until_bytes;
388/// assert_eq!(
389///     take_until_bytes(&b"\r\n"[..]).easy_parse(&b"abc\r\n"[..]).map(|(x, _)| x),
390///     Ok((&b"abc"[..]))
391/// );
392/// // Also works on strings as long as `needle` is UTF-8
393/// assert_eq!(
394///     take_until_bytes("\r\n".as_bytes()).easy_parse("abc\r\n").map(|(x, _)| x),
395///     Ok(("abc"))
396/// );
397/// ```
398pub fn take_until_bytes['a, Input](needle: &'a [u8])(Input) -> Input::Range
399where [
400    Input: RangeStream,
401    Input::Range: AsRef<[u8]> + crate::stream::Range,
402]
403{
404    take_fn(move |haystack: Input::Range| {
405        let haystack = haystack.as_ref();
406        match memslice(needle, haystack) {
407            Some(i) => TakeRange::Found(i),
408            None => TakeRange::NotFound(haystack.len().saturating_sub(needle.len() - 1)),
409        }
410    })
411}
412
413}
414
415fn memslice(needle: &[u8], haystack: &[u8]) -> Option<usize> {
416    let (&prefix, suffix) = match needle.split_first() {
417        Some(x) => x,
418        None => return Some(0),
419    };
420    for i in memchr::memchr_iter(prefix, haystack) {
421        if haystack[i + 1..].starts_with(suffix) {
422            return Some(i);
423        }
424    }
425    None
426}
427
428/// Parsers for decoding numbers in big-endian or little-endian order.
429pub mod num {
430
431    use crate::{error::ResultExt, lib::mem::size_of, parser::function::parser, stream::uncons};
432
433    use super::*;
434
435    macro_rules! integer_parser {
436        (
437            $(#[$attr:meta])*
438            pub $type_name: ident,
439            $output_type: ident, $be_name: ident, $le_name: ident, $read_name: ident
440        ) => {
441            $(#[$attr])*
442            pub fn $be_name<'a, Input>() -> impl Parser<Input, Output = $output_type, PartialState = ()>
443            where
444                Input: Stream<Token = u8>,
445            {
446                parser(|input: &mut Input| {
447                    let checkpoint = input.checkpoint();
448                    let result = (|input: &mut Input| {
449                        let mut buffer = [0u8; size_of::<$output_type>()];
450                        for elem in &mut buffer[..] {
451                            *elem = ctry!(uncons(input)).0;
452                        }
453                        CommitOk($output_type::from_be_bytes(buffer))
454                    })(input);
455                    if result.is_err() {
456                        input.reset(checkpoint).committed().into_result()?;
457                    }
458                    result.into_result()
459                })
460            }
461
462            $(#[$attr])*
463            pub fn $le_name<'a, Input>() -> impl Parser<Input, Output = $output_type, PartialState = ()>
464            where
465                Input: Stream<Token = u8>,
466            {
467                parser(|input: &mut Input| {
468                    let checkpoint = input.checkpoint();
469                    let result = (|input: &mut Input| {
470                        let mut buffer = [0u8; size_of::<$output_type>()];
471                        for elem in &mut buffer[..] {
472                            *elem = ctry!(uncons(input)).0;
473                        }
474                        CommitOk($output_type::from_le_bytes(buffer))
475                    })(input);
476                    if result.is_err() {
477                        input.reset(checkpoint).committed().into_result()?;
478                    }
479                    result.into_result()
480                })
481            }
482        }
483    }
484
485    integer_parser!(
486        /// Reads a u16 out of the byte stream with the specified endianess
487        ///
488        /// ```
489        /// use combine::Parser;
490        /// use combine::parser::byte::num::le_u16;
491        ///
492        /// assert_eq!(le_u16().parse(&b"\x01\0"[..]), Ok((1, &b""[..])));
493        /// assert!(le_u16().parse(&b"\0"[..]).is_err());
494        /// ```
495        pub U16, u16, be_u16, le_u16, read_u16
496    );
497    integer_parser!(
498        /// Reads a u32 out of the byte stream with the specified endianess
499        ///
500        /// ```
501        /// use combine::Parser;
502        /// use combine::parser::byte::num::le_u32;
503        ///
504        /// assert_eq!(le_u32().parse(&b"\x01\0\0\0"[..]), Ok((1, &b""[..])));
505        /// assert!(le_u32().parse(&b"\x01\0\0"[..]).is_err());
506        /// ```
507        pub U32, u32, be_u32, le_u32, read_u32
508    );
509    integer_parser!(
510        /// Reads a u64 out of the byte stream with the specified endianess
511        ///
512        /// ```
513        /// use combine::Parser;
514        /// use combine::parser::byte::num::le_u64;
515        ///
516        /// assert_eq!(le_u64().parse(&b"\x01\0\0\0\0\0\0\0"[..]), Ok((1, &b""[..])));
517        /// assert!(le_u64().parse(&b"\x01\0\0\0\0\0\0"[..]).is_err());
518        /// ```
519        pub U64, u64, be_u64, le_u64, read_u64
520    );
521
522    integer_parser!(
523        /// Reads a i16 out of the byte stream with the specified endianess
524        ///
525        /// ```
526        /// use combine::Parser;
527        /// use combine::parser::byte::num::le_i16;
528        ///
529        /// assert_eq!(le_i16().parse(&b"\x01\0"[..]), Ok((1, &b""[..])));
530        /// assert!(le_i16().parse(&b"\x01"[..]).is_err());
531        /// ```
532        pub I16, i16, be_i16, le_i16, read_i16
533    );
534
535    integer_parser!(
536        /// Reads a i32 out of the byte stream with the specified endianess
537        ///
538        /// ```
539        /// use combine::Parser;
540        /// use combine::parser::byte::num::le_i32;
541        ///
542        /// assert_eq!(le_i32().parse(&b"\x01\0\0\0"[..]), Ok((1, &b""[..])));
543        /// assert!(le_i32().parse(&b"\x01\0\0"[..]).is_err());
544        /// ```
545        pub I32, i32, be_i32, le_i32, read_i32
546    );
547    integer_parser!(
548        /// Reads a i64 out of the byte stream with the specified endianess
549        ///
550        /// ```
551        /// use combine::Parser;
552        /// use combine::parser::byte::num::le_i64;
553        ///
554        /// assert_eq!(le_i64().parse(&b"\x01\0\0\0\0\0\0\0"[..]), Ok((1, &b""[..])));
555        /// assert!(le_i64().parse(&b"\x01\0\0\0\0\0\0"[..]).is_err());
556        /// ```
557        pub I64, i64, be_i64, le_i64, read_i64
558    );
559
560    integer_parser!(
561        /// Reads a i32 out of the byte stream with the specified endianess
562        ///
563        /// ```
564        /// use combine::Parser;
565        /// use combine::parser::byte::num::le_f32;
566        ///
567        /// let buf = 123.45f32.to_le_bytes();
568        /// assert_eq!(le_f32().parse(&buf[..]), Ok((123.45, &b""[..])));
569        /// assert!(le_f32().parse(&b"\x01\0\0"[..]).is_err());
570        /// ```
571        pub F32, f32, be_f32, le_f32, read_f32
572    );
573    integer_parser!(
574        /// Reads a i64 out of the byte stream with the specified endianess
575        ///
576        /// ```
577        /// use combine::Parser;
578        /// use combine::parser::byte::num::le_f64;
579        ///
580        /// let buf = 123.45f64.to_le_bytes();
581        /// assert_eq!(le_f64().parse(&buf[..]), Ok((123.45, &b""[..])));
582        /// assert!(le_f64().parse(&b"\x01\0\0\0\0\0\0"[..]).is_err());
583        /// ```
584        pub F64, f64, be_f64, le_f64, read_f64
585    );
586
587    #[cfg(all(feature = "std", test))]
588    mod tests {
589
590        use crate::stream::{buffered, position, IteratorStream};
591
592        use super::*;
593
594        #[test]
595        fn no_rangestream() {
596            let buf = 123.45f64.to_le_bytes();
597            assert_eq!(
598                le_f64()
599                    .parse(buffered::Stream::new(
600                        position::Stream::new(IteratorStream::new(buf.iter().cloned())),
601                        1
602                    ))
603                    .map(|(t, _)| t),
604                Ok(123.45)
605            );
606            assert_eq!(
607                le_f64()
608                    .parse(buffered::Stream::new(
609                        position::Stream::new(IteratorStream::new(buf.iter().cloned())),
610                        1
611                    ))
612                    .map(|(t, _)| t),
613                Ok(123.45)
614            );
615            let buf = 123.45f64.to_be_bytes();
616            assert_eq!(
617                be_f64()
618                    .parse(buffered::Stream::new(
619                        position::Stream::new(IteratorStream::new(buf.iter().cloned())),
620                        1
621                    ))
622                    .map(|(t, _)| t),
623                Ok(123.45)
624            );
625        }
626    }
627}
628
629#[cfg(all(feature = "std", test))]
630mod tests {
631
632    use crate::stream::{buffered, position, read};
633
634    use super::*;
635
636    #[test]
637    fn memslice_basic() {
638        let haystack = b"abc123";
639        assert_eq!(memslice(b"", haystack), Some(0));
640        assert_eq!(memslice(b"a", haystack), Some(0));
641        assert_eq!(memslice(b"ab", haystack), Some(0));
642        assert_eq!(memslice(b"c12", haystack), Some(2));
643
644        let haystack2 = b"abcab2";
645        assert_eq!(memslice(b"abc", haystack2), Some(0));
646        assert_eq!(memslice(b"ab2", haystack2), Some(3));
647
648        let haystack3 = b"aaabaaaa";
649        assert_eq!(memslice(b"aaaa", haystack3), Some(4));
650    }
651
652    #[test]
653    fn bytes_read_stream() {
654        assert!(bytes(b"abc")
655            .parse(buffered::Stream::new(
656                position::Stream::new(read::Stream::new("abc".as_bytes())),
657                1
658            ))
659            .is_ok());
660    }
661}