Repository: J-F-Liu/pom
Branch: master
Commit: 0fd011c736ea
Files: 30
Total size: 104.0 KB

Directory structure:
gitextract_7hb7yep3/

├── .editorconfig
├── .gitignore
├── .travis.yml
├── .vscode/
│   ├── launch.json
│   └── tasks.json
├── Cargo.toml
├── LICENSE
├── README.md
├── assets/
│   └── data.json
├── benches/
│   ├── json.rs
│   └── json_char.rs
├── doc/
│   └── article.md
├── examples/
│   ├── duration.rs
│   ├── json.rs
│   ├── json_char.rs
│   ├── json_file.rs
│   ├── simple.rs
│   ├── test.json
│   ├── utf8.rs
│   ├── utf8_mixed.rs
│   └── whitespace.rs
├── rustfmt.toml
├── src/
│   ├── char_class.rs
│   ├── lib.rs
│   ├── parser.rs
│   ├── range.rs
│   ├── result.rs
│   ├── set.rs
│   └── utf8.rs
└── tests/
    └── list.rs

================================================
FILE CONTENTS
================================================

================================================
FILE: .editorconfig
================================================
root = true

[*]
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
indent_style = tab
indent_size = 4

[*.md]
trim_trailing_whitespace = false


================================================
FILE: .gitignore
================================================
target
Cargo.lock


================================================
FILE: .travis.yml
================================================
language: rust
sudo: false

# run builds for all the trains (and more)
rust:
  - stable

# load travis-cargo
before_script:
  - |
      pip install 'travis-cargo<0.2' --user &&
      export PATH=$HOME/.local/bin:$PATH
# the main build
script:
  - |
      travis-cargo build &&
      travis-cargo test

env:
  global:
    # override the default `--features unstable` used for the nightly branch (optional)
    - TRAVIS_CARGO_NIGHTLY_FEATURE=""


================================================
FILE: .vscode/launch.json
================================================
{
  "version": "0.2.0",
  "configurations": [
	  {
		  "type": "lldb",
		  "request": "launch",
		  "name": "Custom launch",
		  "program": "./target/debug/examples/json"
	  },
	  {
		  "type": "lldb-mi",
		  "request": "launch",
		  "name": "Launch Program",
		  "target": "./target/debug/examples/simple",
		  "cwd": "${workspaceRoot}"
	  }
  ]
}


================================================
FILE: .vscode/tasks.json
================================================
{
	// See https://go.microsoft.com/fwlink/?LinkId=733558
	// for the documentation about the tasks.json format
	"version": "0.1.0",
	"command": "cargo",
	"isShellCommand": true,
	"showOutput": "always",
	"echoCommand": true,
	"suppressTaskName": true,
	"tasks": [
		{
			"taskName": "json",
			"args": [
				"build",
				"--example",
				"json"
			]
		},
		{
			"taskName": "release",
			"args": [
				"build",
				"--release"
			]
		}
	]
}


================================================
FILE: Cargo.toml
================================================
[package]
name = "pom"
version = "3.4.1"
edition = "2021"
authors = ["Junfeng Liu <china.liujunfeng@gmail.com>"]
homepage = "https://github.com/J-F-Liu/pom"
documentation = "https://docs.rs/crate/pom/"
repository = "https://github.com/J-F-Liu/pom.git"
license = "MIT"
readme = "README.md"
description = "PEG parser combinators using operator overloading without macros."
categories = ["parsing"]
keywords = ["parser", "parser-combinators", "parsing", "PEG"]

[badges]
travis-ci = { repository = "J-F-Liu/pom" }

[dependencies]
bstr = { version = "1.1.0", features = [
] } # Only uses one function, so no features needed.

[features]
default = ["utf8"]
utf8 = []
trace = []


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2016 Junfeng Liu

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
# pom

[![Crates.io](https://img.shields.io/crates/v/pom.svg)](https://crates.io/crates/pom)
[![Build Status](https://travis-ci.org/J-F-Liu/pom.png)](https://travis-ci.org/J-F-Liu/pom)
[![Docs](https://docs.rs/pom/badge.svg)](https://docs.rs/pom)
[![Discord](https://img.shields.io/badge/discord-pom-red.svg)](https://discord.gg/CVy85pg)

PEG parser combinators created using operator overloading without macros.

## Document

- [Tutorial](https://github.com/J-F-Liu/pom/blob/master/doc/article.md)
- [API Reference](https://docs.rs/crate/pom/)
- [Learning Parser Combinators With Rust](https://bodil.lol/parser-combinators/) - By Bodil Stokke

## What is PEG?

PEG stands for parsing expression grammar, is a type of analytic formal grammar, i.e. it describes a formal language in terms of a set of rules for recognizing strings in the language.
Unlike CFGs, PEGs cannot be ambiguous; if a string parses, it has exactly one valid parse tree.
Each parsing function conceptually takes an input string as its argument, and yields one of the following results:
- success, in which the function may optionally move forward or consume one or more characters of the input string supplied to it, or
- failure, in which case no input is consumed.

Read more on [Wikipedia](https://en.wikipedia.org/wiki/Parsing_expression_grammar).

## What is parser combinator?

A parser combinator is a higher-order function that accepts several parsers as input and returns a new parser as its output.
Parser combinators enable a recursive descent parsing strategy that facilitates modular piecewise construction and testing.

Parsers built using combinators are straightforward to construct, readable, modular, well-structured and easily maintainable.
With operator overloading, a parser combinator can take the form of an infix operator, used to glue different parsers to form a complete rule.
Parser combinators thereby enable parsers to be defined in an embedded style, in code which is similar in structure to the rules of the formal grammar.
And the code is easier to debug than macros.

The main advantage is that you don't need to go through any kind of code generation step, you're always using the vanilla language underneath.
Aside from build issues (and the usual issues around error messages and debuggability, which in fairness are about as bad with macros as with code generation), it's usually easier to freely intermix grammar expressions and plain code.

## List of predefined parsers and combinators

| Basic Parsers    | Description                                                     |
|------------------|-----------------------------------------------------------------|
| empty()          | Always succeeds, consume no input.                              |
| end()            | Match end of input.                                             |
| any()            | Match any symbol and return the symbol.                         |
| sym(t)           | Match a single terminal symbol _t_.                             |
| seq(s)           | Match sequence of symbols.                                      |
| list(p,s)        | Match list of _p_, separated by _s_.                            |
| one_of(set)      | Success when current input symbol is one of the set.            |
| none_of(set)     | Success when current input symbol is none of the set.           |
| is_a(predicate)  | Success when predicate return true on current input symbol.     |
| not_a(predicate) | Success when predicate return false on current input symbol.    |
| take(n)          | Read _n_ symbols.                                               |
| skip(n)          | Skip _n_ symbols.                                               |
| call(pf)         | Call a parser factory, can be used to create recursive parsers. |

| Parser Combinators | Description                                                                                                                                                                                    |
|--------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| p &#124; q         | Match p or q, return result of the first success.                                                                                                                                              |
| p + q              | Match p and q, if both succeed return a pair of results.                                                                                                                                       |
| p - q              | Match p and q, if both succeed return result of p.                                                                                                                                             |
| p \* q             | Match p and q, if both succeed return result of q.                                                                                                                                             |
| p >> q             | Parse p and get result P, then parse q and return result of q(P).                                                                                                                              |
| -p                 | Success when p succeeds, doesn't consume input.                                                                                                                                                |
| !p                 | Success when p fails, doesn't consume input.                                                                                                                                                   |
| p.opt()            | Make parser optional. Returns an `Option`.                                                                                                                                                     |
| p.repeat(m..n)     | `p.repeat(0..)` repeat p zero or more times<br>`p.repeat(1..)` repeat p one or more times<br>`p.repeat(1..4)` match p at least 1 and at most 3 times<br>`p.repeat(5)` repeat p exactly 5 times |
| p.map(f)           | Convert parser result to desired value.                                                                                                                                                        |
| p.convert(f)       | Convert parser result to desired value, fails in case of conversion error.                                                                                                                     |
| p.pos()            | Get input position after matching p.                                                                                                                                                           |
| p.collect()        | Collect all matched input symbols.                                                                                                                                                             |
| p.discard()        | Discard parser output.                                                                                                                                                                         |
| p.name(\_)         | Give parser a name to identify parsing errors.<br>If the `trace` feature is enabled then a basic trace for the parse and parse result is made to stderr.                                       |
| p.expect(\_)       | Mark parser as expected, abort early when failed in ordered choice.                                                                                                                            |

The choice of operators is established by their operator precedence, arity and "meaning".
Use `*` to ignore the result of first operand on the start of an expression, `+` and `-` can fulfill the need on the rest of the expression.

For example, `A * B * C - D + E - F` will return the results of C and E as a pair.

## Example code

```rust
use pom::parser::*;

let input = b"abcde";
let parser = sym(b'a') * none_of(b"AB") - sym(b'c') + seq(b"de");
let output = parser.parse(input);
assert_eq!(output, Ok( (b'b', vec![b'd', b'e'].as_slice()) ) );
```

### Example JSON parser

```rust
extern crate pom;
use pom::parser::*;
use pom::Parser;

use std::collections::HashMap;
use std::str::{self, FromStr};

#[derive(Debug, PartialEq)]
pub enum JsonValue {
	Null,
	Bool(bool),
	Str(String),
	Num(f64),
	Array(Vec<JsonValue>),
	Object(HashMap<String,JsonValue>)
}

fn space() -> Parser<u8, ()> {
	one_of(b" \t\r\n").repeat(0..).discard()
}

fn number() -> Parser<u8, f64> {
	let integer = one_of(b"123456789") - one_of(b"0123456789").repeat(0..) | sym(b'0');
	let frac = sym(b'.') + one_of(b"0123456789").repeat(1..);
	let exp = one_of(b"eE") + one_of(b"+-").opt() + one_of(b"0123456789").repeat(1..);
	let number = sym(b'-').opt() + integer + frac.opt() + exp.opt();
	number.collect().convert(str::from_utf8).convert(|s|f64::from_str(&s))
}

fn string() -> Parser<u8, String> {
	let special_char = sym(b'\\') | sym(b'/') | sym(b'"')
		| sym(b'b').map(|_|b'\x08') | sym(b'f').map(|_|b'\x0C')
		| sym(b'n').map(|_|b'\n') | sym(b'r').map(|_|b'\r') | sym(b't').map(|_|b'\t');
	let escape_sequence = sym(b'\\') * special_char;
	let string = sym(b'"') * (none_of(b"\\\"") | escape_sequence).repeat(0..) - sym(b'"');
	string.convert(String::from_utf8)
}

fn array() -> Parser<u8, Vec<JsonValue>> {
	let elems = list(call(value), sym(b',') * space());
	sym(b'[') * space() * elems - sym(b']')
}

fn object() -> Parser<u8, HashMap<String, JsonValue>> {
	let member = string() - space() - sym(b':') - space() + call(value);
	let members = list(member, sym(b',') * space());
	let obj = sym(b'{') * space() * members - sym(b'}');
	obj.map(|members|members.into_iter().collect::<HashMap<_,_>>())
}

fn value() -> Parser<u8, JsonValue> {
	( seq(b"null").map(|_|JsonValue::Null)
	| seq(b"true").map(|_|JsonValue::Bool(true))
	| seq(b"false").map(|_|JsonValue::Bool(false))
	| number().map(|num|JsonValue::Num(num))
	| string().map(|text|JsonValue::Str(text))
	| array().map(|arr|JsonValue::Array(arr))
	| object().map(|obj|JsonValue::Object(obj))
	) - space()
}

pub fn json() -> Parser<u8, JsonValue> {
	space() * value() - end()
}

fn main() {
	let input = br#"
	{
        "Image": {
            "Width":  800,
            "Height": 600,
            "Title":  "View from 15th Floor",
            "Thumbnail": {
                "Url":    "http://www.example.com/image/481989943",
                "Height": 125,
                "Width":  100
            },
            "Animated" : false,
            "IDs": [116, 943, 234, 38793]
        }
    }"#;

	println!("{:?}", json().parse(input));
}
```

You can run this example with the following command:

```
cargo run --example json
```

## Benchmark

| Parser                                               | Time to parse the same JSON file |
|------------------------------------------------------|----------------------------------|
| pom: json_byte                                       | 621,319 ns/iter (+/- 20,318)     |
| pom: json_char                                       | 627,110 ns/iter (+/- 11,463)     |
| [pest](https://github.com/dragostis/pest): json_char | 13,359 ns/iter (+/- 811)         |

### Lifetimes and files

String literals have a static lifetime so they can work with the static version of Parser
imported from `pom::Parser`. Input read from a file has a shorter lifetime. In this case you
should import `pom::parser::Parser` and declare lifetimes on your parser functions. So

```rust
fn space() -> Parser<u8, ()> {
    one_of(b" \t\r\n").repeat(0..).discard()
}

```

would become

```rust
fn space<'a>() -> Parser<'a, u8, ()> {
    one_of(b" \t\r\n").repeat(0..).discard()
}
```


================================================
FILE: assets/data.json
================================================
[
  {
    "_id": "5741cfe6bf9f447a509a269e",
    "index": 0,
    "guid": "642f0c2a-3d87-43ac-8f82-25f004e0c96a",
    "isActive": false,
    "balance": "$3,666.68",
    "picture": "http://placehold.it/32x32",
    "age": 39,
    "eyeColor": "blue",
    "name": "Leonor Herman",
    "gender": "female",
    "company": "RODEOMAD",
    "email": "leonorherman@rodeomad.com",
    "phone": "+1 (848) 456-2962",
    "address": "450 Seeley Street, Iberia, North Dakota, 7859",
    "about": "Reprehenderit in anim laboris labore sint occaecat labore proident ipsum exercitation. Ut ea aliqua duis occaecat consectetur aliqua anim id. Dolor ea fugiat excepteur reprehenderit eiusmod enim non sit nisi. Mollit consequat anim mollit et excepteur qui laborum qui eiusmod. Qui ea amet incididunt cillum quis occaecat excepteur qui duis nisi. Dolore labore eu sunt consequat magna.\r\n",
    "registered": "2015-03-06T02:49:06 -02:00",
    "latitude": -29.402032,
    "longitude": 151.088135,
    "tags": [
      "Lorem",
      "voluptate",
      "aute",
      "ullamco",
      "elit",
      "esse",
      "culpa"
    ],
    "friends": [
      {
        "id": 0,
        "name": "Millicent Norman"
      },
      {
        "id": 1,
        "name": "Vincent Cannon"
      },
      {
        "id": 2,
        "name": "Gray Berry"
      }
    ],
    "greeting": "Hello, Leonor Herman! You have 4 unread messages.",
    "favoriteFruit": "apple"
  },
  {
    "_id": "5741cfe69424f42d4493caa2",
    "index": 1,
    "guid": "40ec6b43-e6e6-44e1-92a8-dc80cd5d7179",
    "isActive": true,
    "balance": "$2,923.78",
    "picture": "http://placehold.it/32x32",
    "age": 36,
    "eyeColor": "blue",
    "name": "Barton Barnes",
    "gender": "male",
    "company": "BRAINQUIL",
    "email": "bartonbarnes@brainquil.com",
    "phone": "+1 (907) 553-3739",
    "address": "644 Falmouth Street, Sedley, Michigan, 5602",
    "about": "Et nulla laboris consectetur laborum labore. Officia dolor sint do amet excepteur dolore eiusmod. Occaecat pariatur sunt velit sunt ullamco labore commodo mollit sint dolore occaecat.\r\n",
    "registered": "2014-08-28T01:07:22 -03:00",
    "latitude": 14.056553,
    "longitude": -61.911624,
    "tags": [
      "laboris",
      "sunt",
      "esse",
      "tempor",
      "pariatur",
      "occaecat",
      "et"
    ],
    "friends": [
      {
        "id": 0,
        "name": "Tillman Mckay"
      },
      {
        "id": 1,
        "name": "Rivera Berg"
      },
      {
        "id": 2,
        "name": "Rosetta Erickson"
      }
    ],
    "greeting": "Hello, Barton Barnes! You have 2 unread messages.",
    "favoriteFruit": "banana"
  }
]


================================================
FILE: benches/json.rs
================================================
#![feature(test)]
extern crate test;
use self::test::Bencher;

use std::fs::File;
use std::io::Read;

extern crate pom;

#[path = "../examples/json.rs"]
mod json;

#[bench]
fn json_byte(b: &mut Bencher) {
	let mut file = File::open("assets/data.json").unwrap();
	let mut input = Vec::new();
	file.read_to_end(&mut input).unwrap();

	b.iter(|| {
		json::json().parse(&input).ok();
	});
}


================================================
FILE: benches/json_char.rs
================================================
#![feature(test)]
extern crate test;
use self::test::Bencher;

use std::fs::File;
use std::io::Read;

extern crate pom;

#[path = "../examples/json_char.rs"]
mod json;

#[bench]
fn json_char(b: &mut Bencher) {
	let mut file = File::open("assets/data.json").unwrap();
	let mut input = String::new();
	file.read_to_string(&mut input).unwrap();
	let chars: Vec<char> = input.chars().collect();

	b.iter(|| {
		json::json().parse(&chars).ok();
	});
}


================================================
FILE: doc/article.md
================================================
# PEG Parser Combinators Implemented in Rust

This article introduces [pom](https://github.com/J-F-Liu/pom), a PEG parser combinator library implemented in Rust, using operator overloading without macros.

## Why Rust?

![Rust](rust.png)

After I've learned C/C++ and C#, I found that choosing a new programming language can greatly affect a programmer's productivity.
On one hand I keep sorting out new languages, there are hundreds of them, I examine and choose what I like best, my favorites are C#, Ruby, TypeScript and Rust.
On the other hand I try to design a new language and implement a compiler by myself.

I like the syntax provided by C#, but hate the huge .NET runtime. Dependency on CLR makes distribution of an application written in C# very hard. Compiling to native code is always what I longed for a programming language.
In year 2003 I thought a compiler can get rid of garbage collector by generating free memory instructions in appropriate locations in the target program.
But I didn't go deep into the design of the details of this mechanism, I decided to firstly write a working compiler, then improve the design of the language and implementation of the compiler bit by bit.

The first stage of compilation is parsing. I tried some parser generators, but not satisfied with the result.
Then I dig into the parsing theory, followed several books, implemented DFA, NFA, NFA to DFA conversion, LL(1), LR, LALR algorithms,
then wrote a parser to parse BNF, EBNF or TBNF grammar file, and generate parser code corresponding to the grammar.

The syntax/semantics analysis and code generation parts of a compiler are more difficult.
I even tried to define a intermediate assembly language, at that time I didn't know LLVM. My effort of writing a compiler ceased for years, then Rust was born.

At first glance, the Rust's syntax is a bit strange, why use `fn` instaed of `def`, why use `let mut` instead of `var`, I was not attracted by it.
After read a publication on O'Reilly [*Why Rust?*](http://www.oreilly.com/programming/free/files/why-rust.pdf) I suddenly realized that this is language I'm trying to build,
when you actually start using Rust you'll find that `fn` and `let mut` fits Rust's logic well. For me, **Rust is once a dream now a reality.**

Rust has a steep learning curve, more challenging than any of the previous programming languages I learned. All this learning is worthwhile when you finally get your program working and polished.
Object oriented class hierarchy is not good enough for code reuse, Rust's enum, tuple, struct and trait type system is a better solution.
I still wondering whether the Rust compiler can be smart enough to elide all the lifetime parameters, they are mostly noise and obstacle when reading and writing programs.

## What is PEG?

When I discovered [PEG](http://bford.info/packrat/), I knew that all my previous work on LALR can be thrown away.
I rewrote my parser generator using and working with PEG. Using this parser generator I created a [YAML parser](https://www.codeproject.com/Articles/28720/YAML-Parser-in-C) and a [Lua Interpreter](https://www.codeproject.com/Articles/228212/Lua-Interpreter).

[Parsing Expression Grammars](http://en.wikipedia.org/wiki/Parsing_expression_grammar) (PEGs) are an alternative to [Context-Free Grammars](http://en.wikipedia.org/wiki/Context-free_grammar) (CFGs) for formally specifying syntax.
CFG describe a rule system to generate language strings while PEG describe a rule system to recognize language strings.

![CFG](cfg.png)

![PEG](peg.png)

Unlike CFGs, PEGs cannot be ambiguous; if a string parses, it has exactly one valid parse tree.
We normally specify our languages directly by how to recognize it, so PEG is both closer match to syntax practices and more powerful than nondeterministic CFG.

### Parsing expressions

| Expr       | Description                         |
| ---------- | ----------------------------------- |
| ε          | the empty string                    |
| a          | terminal (a ∈ Σ)                    |
| A          | non-terminal (A ∈ N)                |
| e1 e2      | sequence                            |
| e1 / e2    | prioritized choice                  |
| e?, e*, e+ | optional, zero-or-more, one-or-more |
| &e, !e     | syntactic predicates                |

## What is parser combinator?

When I heard of Parsec in the Haskell world, I got the concept of parser combinator for my first time.

A *parser* is a function which takes a *string* (a series of *symbols*) as input, and returns matching result as *output*.

A *combinator* is a higher-order function (a "functional") which takes zero or more functions (each of the same type) as input and returns a new function of the same type as output.

A *parser combinator* is a higher-order function which takes parsers as input and returns a new parser as output.

Parser combinators allow you write grammar rules and create a parser directly in the host language, without a separated parser generation step, so the whole procedure is more fluent.

## How to implement parser combinators?

I thought deeply about how to implement parser combinator using language constructs provided by Rust. In summary, there are four approaches:

1. Parser as closure

   ```rust
   pub fn empty<I>() -> impl Fn(&mut Input<I>) -> Result<()> {
     |_: &mut Input<I>| Ok(())
   }

   pub fn term<I>(t: I) -> impl Fn(&mut Input<I>) -> Result<I> {
       ...
   }

   pub fn seq<'a, I>(tag: &'a [I]) -> impl Fn(&mut Input<I>) -> Result<&'a [I]> {
     ...
   }
   ...
   // To create a parser for integer
   let parser = concatenate(optional(one_of("+-")), one_or_more(one_of("0123456789")));
   ```

   *Pros*: Less implementation code.

   *Cons*: Cannot overload operators, poor readability.

2. Parser as struct

   ```rust
   pub struct Parser<I, O> {
       method: Box<Fn(&mut Input<I>) -> Result<O>>,
   }

   impl<I, O> Parser<I, O> {
       /// Create new parser.
       pub fn new<P>(parse: P) -> Parser<I, O>
           where P: Fn(&mut Input<I>) -> Result<O> + 'static
       {
           Parser { method: Box::new(parse) }
       }

       /// Apply the parser to parse input.
       pub fn parse(&self, input: &mut Input<I>) -> Result<O> {
           (self.method)(input)
       }
       ...
   }

   pub fn empty<I>() -> Parser<I, ()> {
       Parser::new(|_: &mut Input<I>| Ok(()))
   }

   pub fn term<I>(t: I) -> Parser<I, I> {
       ...
   }
   ...
   impl<I: Copy, O, U> Add<Parser<I, U>> for Parser<I, O> {
       type Output = Parser<I, (O, U)>;

       fn add(self, other: Parser<I, U>) -> Self::Output
           where I: 'static,
                 O: 'static,
                 U: 'static
       {
           Parser::new(move |input: &mut Input<I>| {
               let start = input.position();
               let result = self.parse(input)
                   .and_then(|out1| other.parse(input).map(|out2| (out1, out2)));
               if result.is_err() {
                   input.jump_to(start);
               }
               result
           })
       }
   }
   ...
   // To create a parser for integer
   let parser = one_of("+-").opt() + one_of("0123456789").repeat(1..);
   ```

   *Pros*: Can overload operators, elegant code.

   *Cons*: Depends on compiler's zero-cost abstractions to optimize runtime performance.

   Crate [pom](https://github.com/J-F-Liu/pom) is using this approach.

3. Parser as trait

   ```rust
   pub trait Parser  {
     type I: ?Sized;
     type O;

     fn parse<'a>(&self, data: &'a Self::I) -> ParseResult<&'a Self::I, Self::O>;
   }

   pub trait ParserCombinator : Parser + Clone {
     fn then<P: Parser<I=Self::I>>(&self, p: P) -> ChainedParser<Self,P> {
       ChainedParser{first: self.clone(), second: p}
     }
     ...
   }

   pub fn opt<T: Parser>(t: T) -> OptionParser<T> {
     OptionParser{parser: t}
   }

   pub fn recursive<I:?Sized,O, F:  Fn() -> Box<Parser<I=I,O=O>>>(f: F) -> RecursiveParser<I,O,F> {
     RecursiveParser{parser: Rc::new(f)}
   }

   ...

   pub struct ChainedParser<A,B> {
     first: A,
     second: B,
   }
   ...
   impl<C: ?Sized, A: Parser<I=C>, B: Parser<I=C>> Parser for ChainedParser<A, B> {
     type I = C;
     type O = (A::O,B::O);

     fn parse<'a>(&self, data: &'a Self::I) -> ParseResult<&'a Self::I, Self::O>{
       match self.first.parse(data) {
         Ok((a, d2)) => match self.second.parse(d2) {
           Ok((b, remain)) => Ok(((a, b), remain)),
           Err(err) => Err(err)
         },
         Err(err) => Err(err)
       }
     }
   }

   impl<C: ?Sized, A: ParserCombinator<I=C>, B: ParserCombinator<I=C>>  Clone for ChainedParser<A, B> {
       ...
   }
   ...
   ```

   *Pros*: Can overload operators.

   *Cons*: Bloated code.

   Crate [peruse](https://github.com/DanSimon/peruse) is using this approach.

4. Parser as macro

   ```rust
   #[macro_export]
   macro_rules! do_parse (
     (__impl $i:expr, $consumed:expr, ( $($rest:expr),* )) => (
       $crate::IResult::Done($i, ( $($rest),* ))
     );

     (__impl $i:expr, $consumed:expr, $e:ident >> $($rest:tt)*) => (
       do_parse!(__impl $i, $consumed, call!($e) >> $($rest)*);
     );
     (__impl $i:expr, $consumed:expr, $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => (
       {
         match $submac!($i, $($args)*) {
           $crate::IResult::Error(e)      => $crate::IResult::Error(e),
           $crate::IResult::Incomplete($crate::Needed::Unknown) =>
             $crate::IResult::Incomplete($crate::Needed::Unknown),
           $crate::IResult::Incomplete($crate::Needed::Size(i)) =>
             $crate::IResult::Incomplete($crate::Needed::Size($consumed + i)),
           $crate::IResult::Done(i,_)     => {
             do_parse!(__impl i,
               $consumed + ($crate::InputLength::input_len(&($i)) -
                            $crate::InputLength::input_len(&i)), $($rest)*)
           },
         }
       }
     );

     (__impl $i:expr, $consumed:expr, $field:ident : $e:ident >> $($rest:tt)*) => (
       do_parse!(__impl $i, $consumed, $field: call!($e) >> $($rest)*);
     );

     (__impl $i:expr, $consumed:expr, $field:ident : $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => (
       {
         match  $submac!($i, $($args)*) {
           $crate::IResult::Error(e)      => $crate::IResult::Error(e),
           $crate::IResult::Incomplete($crate::Needed::Unknown) =>
             $crate::IResult::Incomplete($crate::Needed::Unknown),
           $crate::IResult::Incomplete($crate::Needed::Size(i)) =>
             $crate::IResult::Incomplete($crate::Needed::Size($consumed + i)),
           $crate::IResult::Done(i,o)     => {
             let $field = o;
             do_parse!(__impl i,
               $consumed + ($crate::InputLength::input_len(&($i)) -
                            $crate::InputLength::input_len(&i)), $($rest)*)
           },
         }
       }
     );

     // ending the chain
     (__impl $i:expr, $consumed:expr, $e:ident >> ( $($rest:tt)* )) => (
       do_parse!(__impl $i, $consumed, call!($e) >> ( $($rest)* ));
     );

     (__impl $i:expr, $consumed:expr, $submac:ident!( $($args:tt)* ) >> ( $($rest:tt)* )) => (
       match $submac!($i, $($args)*) {
         $crate::IResult::Error(e)      => $crate::IResult::Error(e),
         $crate::IResult::Incomplete($crate::Needed::Unknown) =>
           $crate::IResult::Incomplete($crate::Needed::Unknown),
         $crate::IResult::Incomplete($crate::Needed::Size(i)) =>
           $crate::IResult::Incomplete($crate::Needed::Size($consumed + i)),
         $crate::IResult::Done(i,_)     => {
           $crate::IResult::Done(i, ( $($rest)* ))
         },
       }
     );

     (__impl $i:expr, $consumed:expr, $field:ident : $e:ident >> ( $($rest:tt)* )) => (
       do_parse!(__impl $i, $consumed, $field: call!($e) >> ( $($rest)* ) );
     );

     (__impl $i:expr, $consumed:expr, $field:ident : $submac:ident!( $($args:tt)* ) >> ( $($rest:tt)* )) => (
       match $submac!($i, $($args)*) {
         $crate::IResult::Error(e)      => $crate::IResult::Error(e),
         $crate::IResult::Incomplete($crate::Needed::Unknown) =>
           $crate::IResult::Incomplete($crate::Needed::Unknown),
         $crate::IResult::Incomplete($crate::Needed::Size(i)) =>
           $crate::IResult::Incomplete($crate::Needed::Size($consumed + i)),
         $crate::IResult::Done(i,o)     => {
           let $field = o;
           $crate::IResult::Done(i, ( $($rest)* ))
         },
       }
     );

     ($i:expr, $($rest:tt)*) => (
       {
         do_parse!(__impl $i, 0usize, $($rest)*)
       }
     );
   );
   ...
   // To create a parser for integer
   named!(integer<&[u8], i64>, map!(
     pair!(
       opt!(sign),
       map_res!(map_res!(digit, str::from_utf8), i64::from_str)
     ),
     |(sign, value): (Option<i64>, i64)| { sign.unwrap_or(1) * value }
   ));
   ```

   *Pros*: Can create DSL syntax, high performance.

   *Cons*: Macros themselves are difficult to read, write and debug.


According to above comparison, parser as struct is the best approach. At first I choose to use nom to create a PDF parser, it turns out a special PDF feature blocked me.
When parsing a PDF stream object, it's length may be a referenced object, hence the need to get the length from a reader.
The `named! ` macro cannot accept extra parameters, there is no obvious way to read a length object inside a stream object parser.
This is the primary reason why I started to develop pom.

## List of predefined parsers and combinators in pom

| Basic Parsers  | Description                              |
| -------------- | ---------------------------------------- |
| empty()        | Always succeeds, consume no input.       |
| end()          | Match end of input.                      |
| sym(t)        | Match a single terminal symbol *t*.       |
| seq(s)         | Match sequence of symbols.               |
| list(p,s)      | Match list of *p*, separated by *s*.     |
| one_of(set)    | Success when current input symbol is one of the set. |
| none_of(set)   | Success when current input symbol is none of the set. |
| is_a(predicate)  | Success when predicate return true on current input symbol. |
| not_a(predicate) | Success when predicate return false on current input symbol. |
| take(n)        | Read *n* symbols.                        |
| skip(n)        | Skip *n* symbols.                        |
| call(pf)       | Call a parser factory, can used to create recursive parsers. |

These are functions to create basic parsers.


| Parser Combinators | Description                              |
| ------------------ | ---------------------------------------- |
| p &#124; q         | Match p or q, return result of the first success. |
| p + q              | Match p and q, if both success return a pair of results. |
| p - q              | Match p and q, if both success return result of p. |
| p * q              | Match p and q, if both success return result of q. |
| p >> q             | Parse p and get result P, then parse and return result of q(P). |
| -p                 | Success when p success, doen't consume input. |
| !p                 | Success when p fail, doen't consume input. |
| p.opt()            | Make parser optional.                    |
| p.repeat(m..n)     | `p.repeat(0..)` repeat p zero or more times<br>`p.repeat(1..)` repeat p one or more times<br>`p.repeat(1..4)` match p at least 1 and at most 3 times<br>`p.repeat(1..=3)` also match p at least 1 and at most 3 times |
| p.map(f)           | Convert parser result to desired value.  |
| p.convert(f)       | Convert parser result to desired value, fail in case of conversion error. |
| p.pos()            | Get input position after matching p.     |
| p.collect()        | Collect all matched input symbols.       |
| p.discard()        | Discard parser output.                   |
| p.name(_)          | Give parser a name to identify parsing errors. |

These are operations to create new parsers based on other parsers. The choice of operators is established by their operator precedence, arity and "meaning".

Use `*` to ignore the result of first operand on the start of an expression, `+` and `-` can fulfill the need on the rest of the expression.
For example, `A * B * C - D + E - F` will return the results of C and E as a pair.

## Using the code

There are three ways to create a parser:

1. As a variable, normally used to construct another parser.

   ```rust
   let integer = one_of(b"+-").opt() + one_of(b"0123456789").repeat(1..);
   ```

2. As a closure, when referenced several times in constructing another parser.

   ```rust
   let integer = || one_of(b"+-").opt() + one_of(b"0123456789").repeat(1..);
   let pair = sym(b'(') * integer() - sym(b',') + integer() - sym(b')');
   ```

3. As a function, provides a high level construct.

   ```rust
   fn integer() -> Parser<u8, (Option<u8>, Vec<u8>)> {
       one_of(b"+-").opt() + one_of(b"0123456789").repeat(1..)
   }
   ```

## Example JSON Parser

Let me explain the parser combinators in more detail by creating a JSON parser. Syntax diagrams can be found on [json.org](http://www.json.org/).

```rust
extern crate pom;
use pom::{Parser, DataInput};
use pom::char_class::hex_digit;
use pom::parser::*;

use std::str::FromStr;
use std::char::{decode_utf16, REPLACEMENT_CHARACTER};
use std::collections::HashMap;

#[derive(Debug, PartialEq)]
pub enum JsonValue {
    Null,
    Bool(bool),
    Str(String),
    Num(f64),
    Array(Vec<JsonValue>),
    Object(HashMap<String,JsonValue>)
}
```

Import predefined parser combinators and utility functions, define the JSON parser's output value as an enum.

```rust
fn space() -> Parser<u8, ()> {
    one_of(b" \t\r\n").repeat(0..).discard()
}
```

Match zero or more space characters, the output is ignored.

```rust
fn number() -> Parser<u8, f64> {
    let integer = one_of(b"123456789") - one_of(b"0123456789").repeat(0..) | sym(b'0');
    let frac = sym(b'.') + one_of(b"0123456789").repeat(1..);
    let exp = one_of(b"eE") + one_of(b"+-").opt() + one_of(b"0123456789").repeat(1..);
    let number = sym(b'-').opt() + integer + frac.opt() + exp.opt();
    number.collect().convert(String::from_utf8).convert(|s|f64::from_str(&s))
}
```

Don't care each output of integer, frac or exp, collect() method get all the match character as a Vec<u8>, then it is converted to a string, and further converted to a float number.

```rust
fn string() -> Parser<u8, String> {
    let special_char = sym(b'\\') | sym(b'/') | sym(b'"')
        | sym(b'b').map(|_|b'\x08') | sym(b'f').map(|_|b'\x0C')
        | sym(b'n').map(|_|b'\n') | sym(b'r').map(|_|b'\r') | sym(b't').map(|_|b'\t');
    let escape_sequence = sym(b'\\') * special_char;
    let char_string = (none_of(b"\\\"") | escape_sequence).repeat(1..).convert(String::from_utf8);
    let utf16_char = seq(b"\\u") * is_a(hex_digit).repeat(4).convert(String::from_utf8).convert(|digits|u16::from_str_radix(&digits, 16));
    let utf16_string = utf16_char.repeat(1..).map(|chars|decode_utf16(chars).map(|r| r.unwrap_or(REPLACEMENT_CHARACTER)).collect::<String>());
    let string = sym(b'"') * (char_string | utf16_string).repeat(0..) - sym(b'"');
    string.map(|strings|strings.concat())
}
```

The bulk of code is written to parse escape sequences.
According to [Wikipedia](https://en.wikipedia.org/wiki/JSON#Data_portability_issues), UTF-16 surrogate pairs is a detail missed by some JSON parsers.
We implement this easily with Rust's Unicode support.

```rust
fn array() -> Parser<u8, Vec<JsonValue>> {
    let elems = list(call(value), sym(b',') * space());
    sym(b'[') * space() * elems - sym(b']')
}

fn object() -> Parser<u8, HashMap<String, JsonValue>> {
    let member = string() - space() - sym(b':') - space() + call(value);
    let members = list(member, sym(b',') * space());
    let obj = sym(b'{') * space() * members - sym(b'}');
    obj.map(|members|members.into_iter().collect::<HashMap<_,_>>())
}

fn value() -> Parser<u8, JsonValue> {
    ( seq(b"null").map(|_|JsonValue::Null)
    | seq(b"true").map(|_|JsonValue::Bool(true))
    | seq(b"false").map(|_|JsonValue::Bool(false))
    | number().map(|num|JsonValue::Num(num))
    | string().map(|text|JsonValue::Str(text))
    | array().map(|arr|JsonValue::Array(arr))
    | object().map(|obj|JsonValue::Object(obj))
    ) - space()
}
```

array and object are very straight to parse, notice `call(value)`, at the first attempt I write it as `value()`, then an infinite loop is created. Recursive parsing is solved by adding `call()` to `pom`.

```rust
pub fn json() -> Parser<u8, JsonValue> {
    space() * value() - end()
}
```

The final JSON parser, declared as public. According to [RFC 7159](https://tools.ietf.org/html/rfc7159) a JSON text is a serialized value of any of the six types.
`end()` is used to ensure there is no extra text in the input.

```rust
fn main() {
    let test = br#"
    {
        "Image": {
            "Width":  800,
            "Height": 600,
            "Title":  "View from 15th Floor",
            "Thumbnail": {
                "Url":    "http://www.example.com/image/481989943",
                "Height": 125,
                "Width":  100
            },
            "Animated" : false,
            "IDs": [116, 943, 234, 38793]
        },
        "escaped characters": "\u2192\uD83D\uDE00\"\t\uD834\uDD1E"
    }"#;

    let mut input = DataInput::new(test);
    println!("{:?}", json().parse(&mut input));
}
```

Use the JSON parser to parse JSON text, the output is:

```
cargo run --example json
   Compiling pom v0.6.0 (file:///work/pom)
    Finished debug [unoptimized + debuginfo] target(s) in 2.20 secs
     Running `target/debug/examples/json`
Ok(Object({"Image": Object({"Width": Num(800), "Title": Str("View from 15th Floor"), "Height": Num(600), "Animated": Bool(false), "IDs": Array([Num(116), Num(943), Num(234), Num(38793)]), "Thumbnail": Object({"Height
": Num(125), "Url": Str("http://www.example.com/image/481989943"), "Width": Num(100)})}), "escaped characters": Str("→😀\"\t𝄞")}))
```

The above parser assumes that the input bytes is UTF-8 encoded text; otherwise, you can use the [char version of JSON parser](https://github.com/J-F-Liu/pom/blob/master/examples/json_char.rs).

`p >> q` is not covered in the JSON example. It is used to pass the output of `p` into parser creation of `p`.

```rust
let mut input = DataInput::new(b"5oooooooo");
let parser = one_of(b"0123456789").map(|c|c - b'0') >> |n| {
    take(n as usize) + sym(b'o').repeat(0..)
};
let output = parser.parse(&mut input);
assert_eq!(output, Ok( (vec![b'o';5], vec![b'o';3]) ));
```

The first character indicates the number of `o`s to parse, then the number is used in the closure `|n| take(n)`.

## More examples

- A [simple PDF parser](https://github.com/J-F-Liu/lopdf/blob/491dece5867a2b81878208bcb5e07ff1007c0d89/src/parser.rs), you can compare it with the equivalent [nom version](https://github.com/J-F-Liu/lopdf/blob/dff82c49fea9ac9ea23edf42ad80e480bd5edb46/src/parser.rs).
- A [complete PDF parser](https://github.com/J-F-Liu/lopdf/blob/master/src/parser.rs) which can read length object when parsing stream object.

## Conclusion

I think I created something really cool, you can use pom to write all kinds of parsers elegantly.
I helped pom to evolve version by version into what it is, and pom also helps me to grow my Rust programming skills a lot.
Of course there is still room for improvement, any feed back is welcome.

## Points of interest

I try to add a `cache()` method to `Parser`. Memorize the result on given input position, return the result directly when called again, effectively implementing the Packrat Parsing algorithm.
But there are two problems, 1) save result means mutate a Hashmap, so Parser's method field should be a Box of `FnMut`,
2) Hashmap returns an reference of value for a given key, the value cannot be moved, so need to make the value cloneable.

## Pain points where Rust needs to improve

1. Implement trait for `[T]` should automatically implement `[T; N]`.
2. The standard library should provide a char_at() method return the char and the number of bytes consumed, like:
	```rust
	pub trait Encoding {
		/// Get char at a byte index, return the char and the number of bytes read.
		fn char_at(&self, data: &[u8], index: usize) -> Result<(char, u32)>;
	}
	```
3. Can ellide 'static lifetime parameter, allow `Parser<'static, I, O>` written as `Parser<I, O>`.
4. Should `impl Copy for closure`, so that FnOnce closure can be passed to map() inside Fn closure.
	```rust
	pub fn map<U, F>(self, f: F) -> Parser<'a, I, U>
		where F: FnOnce(O) -> U + Copy + 'a,
			  I: 'static,
			  O: 'static,
			  U: 'static
	{
		Parser::new(move |input: &mut Input<I>| {
			self.parse(input).map(f)
		})
	}
	```

## More Readings

- [The Rust programming language, in the words of its practitioners](https://brson.github.io/fireflowers/)
- [PEGs, Packrats and Parser Combinators](http://scg.unibe.ch/download/lectures/cc2011/10PEGs.pptx.pdf)
- [An introduction to parsing text in Haskell with Parsec](http://unbui.lt/#!/post/haskell-parsec-basics/)


================================================
FILE: examples/duration.rs
================================================
use pom::parser::*;
use pom::Parser;

use std::str::{self, FromStr};

#[derive(Debug, PartialEq)]
struct Duration {
	years: Option<f32>,
	months: Option<f32>,
	weeks: Option<f32>,
	days: Option<f32>,
	hours: Option<f32>,
	minutes: Option<f32>,
	seconds: Option<f32>,
}

fn number_separator() -> Parser<u8, ()> {
	// either '.' or ',' can be used as a separator between the whole and decimal part of a number
	one_of(b".,").discard()
}

fn number() -> Parser<u8, f32> {
	let integer = one_of(b"0123456789").repeat(0..);
	let frac = number_separator() + one_of(b"0123456789").repeat(1..);
	let number = integer + frac.opt();
	number
		.collect()
		.convert(str::from_utf8)
		.convert(f32::from_str)
}

fn date_part() -> Parser<u8, (Option<f32>, Option<f32>, Option<f32>, Option<f32>)> {
	((number() - sym(b'Y')).opt()
		+ (number() - sym(b'M')).opt()
		+ (number() - sym(b'W')).opt()
		+ (number() - sym(b'D')).opt())
	.map(|(((years, months), weeks), days)| (years, months, weeks, days))
}

fn time_part() -> Parser<u8, (Option<f32>, Option<f32>, Option<f32>)> {
	sym(b'T')
		* ((number() - sym(b'H')).opt()
			+ (number() - sym(b'M')).opt()
			+ (number() - sym(b'S')).opt())
		.map(|((hours, minutes), seconds)| (hours, minutes, seconds))
}

fn parser() -> Parser<u8, Duration> {
	sym(b'P')
		* (time_part().map(|(hours, minutes, seconds)| Duration {
			years: None,
			months: None,
			weeks: None,
			days: None,
			hours,
			minutes,
			seconds,
		}) | (date_part() + time_part()).map(|(date_elements, time_elements)| {
			let (years, months, weeks, days) = date_elements;
			let (hours, minutes, seconds) = time_elements;
			Duration {
				years,
				months,
				weeks,
				days,
				hours,
				minutes,
				seconds,
			}
		}))
}

/// Parses the ISO 8601 Duration standard
/// https://en.wikipedia.org/wiki/ISO_8601#Durations
fn main() {
	let input = "P3Y6M4DT12H30M5S";
	let result = parser().parse(input.as_bytes());

	assert_eq!(
		Duration {
			years: Some(3f32),
			months: Some(6f32),
			weeks: None,
			days: Some(4f32),
			hours: Some(12f32),
			minutes: Some(30f32),
			seconds: Some(5f32)
		},
		result.unwrap()
	);
}


================================================
FILE: examples/json.rs
================================================
use pom::char_class::hex_digit;
use pom::parser::*;

use std::char::{decode_utf16, REPLACEMENT_CHARACTER};
use std::collections::HashMap;
use std::str::{self, FromStr};

#[derive(Debug, PartialEq)]
pub enum JsonValue {
	Null,
	Bool(bool),
	Str(String),
	Num(f64),
	Array(Vec<JsonValue>),
	Object(HashMap<String, JsonValue>),
}

fn space<'a>() -> Parser<'a, u8, ()> {
	one_of(b" \t\r\n").repeat(0..).discard()
}

fn number<'a>() -> Parser<'a, u8, f64> {
	let integer = one_of(b"123456789") - one_of(b"0123456789").repeat(0..) | sym(b'0');
	let frac = sym(b'.') + one_of(b"0123456789").repeat(1..);
	let exp = one_of(b"eE") + one_of(b"+-").opt() + one_of(b"0123456789").repeat(1..);
	let number = sym(b'-').opt() + integer + frac.opt() + exp.opt();
	number
		.collect()
		.convert(str::from_utf8)
		.convert(f64::from_str)
}

fn string<'a>() -> Parser<'a, u8, String> {
	let special_char = sym(b'\\')
		| sym(b'/')
		| sym(b'"')
		| sym(b'b').map(|_| b'\x08')
		| sym(b'f').map(|_| b'\x0C')
		| sym(b'n').map(|_| b'\n')
		| sym(b'r').map(|_| b'\r')
		| sym(b't').map(|_| b'\t');
	let escape_sequence = sym(b'\\') * special_char;
	let char_string = (none_of(b"\\\"") | escape_sequence)
		.repeat(1..)
		.convert(String::from_utf8);
	let utf16_char = seq(b"\\u")
		* is_a(hex_digit)
			.repeat(4)
			.convert(String::from_utf8)
			.convert(|digits| u16::from_str_radix(&digits, 16));
	let utf16_string = utf16_char.repeat(1..).map(|chars| {
		decode_utf16(chars)
			.map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
			.collect::<String>()
	});
	let string = sym(b'"') * (char_string | utf16_string).repeat(0..) - sym(b'"');
	string.map(|strings| strings.concat())
}

fn array<'a>() -> Parser<'a, u8, Vec<JsonValue>> {
	let elems = list(call(value), sym(b',') * space());
	sym(b'[') * space() * elems - sym(b']')
}

fn object<'a>() -> Parser<'a, u8, HashMap<String, JsonValue>> {
	let member = string() - space() - sym(b':') - space() + call(value);
	let members = list(member, sym(b',') * space());
	let obj = sym(b'{') * space() * members - sym(b'}');
	obj.map(|members| members.into_iter().collect::<HashMap<_, _>>())
}

fn value<'a>() -> Parser<'a, u8, JsonValue> {
	(seq(b"null").map(|_| JsonValue::Null)
		| seq(b"true").map(|_| JsonValue::Bool(true))
		| seq(b"false").map(|_| JsonValue::Bool(false))
		| number().map(|num| JsonValue::Num(num))
		| string().map(|text| JsonValue::Str(text))
		| array().map(|arr| JsonValue::Array(arr))
		| object().map(|obj| JsonValue::Object(obj)))
		- space()
}

pub fn json<'a>() -> Parser<'a, u8, JsonValue> {
	space() * value() - end()
}

#[allow(dead_code)]
fn main() {
	let input = br#"
	{
        "Image": {
            "Width":  800,
            "Height": 600,
            "Title":  "View from 15th Floor",
            "Thumbnail": {
                "Url":    "http://www.example.com/image/481989943",
                "Height": 125,
                "Width":  100
            },
            "Animated" : false,
            "IDs": [116, 943, 234, 38793]
        },
        "escaped characters": "\u2192\uD83D\uDE00\"\t\uD834\uDD1E"
    }"#;

	println!("{:?}", json().parse(input));
}


================================================
FILE: examples/json_char.rs
================================================
use pom::parser::*;

use std::char::{decode_utf16, REPLACEMENT_CHARACTER};
use std::collections::HashMap;
use std::iter::FromIterator;
use std::str::FromStr;

#[derive(Debug, PartialEq)]
pub enum JsonValue {
	Null,
	Bool(bool),
	Str(String),
	Num(f64),
	Array(Vec<JsonValue>),
	Object(HashMap<String, JsonValue>),
}

fn space<'a>() -> Parser<'a, char, ()> {
	one_of(" \t\r\n").repeat(0..).discard()
}

fn number<'a>() -> Parser<'a, char, f64> {
	let integer = one_of("123456789") - one_of("0123456789").repeat(0..) | sym('0');
	let frac = sym('.') + one_of("0123456789").repeat(1..);
	let exp = one_of("eE") + one_of("+-").opt() + one_of("0123456789").repeat(1..);
	let number = sym('-').opt() + integer + frac.opt() + exp.opt();
	number
		.collect()
		.map(String::from_iter)
		.convert(|s| f64::from_str(&s))
}

fn string<'a>() -> Parser<'a, char, String> {
	let special_char = sym('\\')
		| sym('/')
		| sym('"')
		| sym('b').map(|_| '\x08')
		| sym('f').map(|_| '\x0C')
		| sym('n').map(|_| '\n')
		| sym('r').map(|_| '\r')
		| sym('t').map(|_| '\t');
	let escape_sequence = sym('\\') * special_char;
	let char_string = (none_of("\\\"") | escape_sequence)
		.repeat(1..)
		.map(String::from_iter);
	let utf16_char = tag("\\u")
		* is_a(|c: char| c.is_digit(16))
			.repeat(4)
			.map(String::from_iter)
			.convert(|digits| u16::from_str_radix(&digits, 16));
	let utf16_string = utf16_char.repeat(1..).map(|chars| {
		decode_utf16(chars)
			.map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
			.collect::<String>()
	});
	let string = sym('"') * (char_string | utf16_string).repeat(0..) - sym('"');
	string.map(|strings| strings.concat())
}

fn array<'a>() -> Parser<'a, char, Vec<JsonValue>> {
	let elems = list(call(value), sym(',') * space());
	sym('[') * space() * elems - sym(']')
}

fn object<'a>() -> Parser<'a, char, HashMap<String, JsonValue>> {
	let member = string() - space() - sym(':') - space() + call(value);
	let members = list(member, sym(',') * space());
	let obj = sym('{') * space() * members - sym('}');
	obj.map(|members| members.into_iter().collect::<HashMap<_, _>>())
}

fn value<'a>() -> Parser<'a, char, JsonValue> {
	(tag("null").map(|_| JsonValue::Null)
		| tag("true").map(|_| JsonValue::Bool(true))
		| tag("false").map(|_| JsonValue::Bool(false))
		| number().map(|num| JsonValue::Num(num))
		| string().map(|text| JsonValue::Str(text))
		| array().map(|arr| JsonValue::Array(arr))
		| object().map(|obj| JsonValue::Object(obj)))
		- space()
}

pub fn json<'a>() -> Parser<'a, char, JsonValue> {
	space() * value() - end()
}

#[allow(dead_code)]
fn main() {
	let test = r#"
	{
        "Image": {
            "Width":  800,
            "Height": 600,
            "Title":  "View from 15th Floor",
            "Thumbnail": {
                "Url":    "http://www.example.com/image/481989943",
                "Height": 125,
                "Width":  100
            },
            "Animated" : false,
            "IDs": [116, 943, 234, 38793]
        },
        "escaped characters": "\u2192\uD83D\uDE00\"\t\uD834\uDD1E"
    }"#;

	let input: Vec<char> = test.chars().collect();
	println!("{:?}", json().parse(&input));
}


================================================
FILE: examples/json_file.rs
================================================
use pom::char_class::hex_digit;
use pom::parser::{call, end, is_a, list, none_of, one_of, seq, sym, Parser};

use std::char::{decode_utf16, REPLACEMENT_CHARACTER};
use std::collections::HashMap;
use std::fs::File;
use std::io::Read;
use std::str::{self, FromStr};

#[derive(Debug, PartialEq)]
pub enum JsonValue {
	Null,
	Bool(bool),
	Str(String),
	Num(f64),
	Array(Vec<JsonValue>),
	Object(HashMap<String, JsonValue>),
}

fn space<'a>() -> Parser<'a, u8, ()> {
	one_of(b" \t\r\n").repeat(0..).discard()
}

fn number<'a>() -> Parser<'a, u8, f64> {
	let integer = one_of(b"123456789") - one_of(b"0123456789").repeat(0..) | sym(b'0');
	let frac = sym(b'.') + one_of(b"0123456789").repeat(1..);
	let exp = one_of(b"eE") + one_of(b"+-").opt() + one_of(b"0123456789").repeat(1..);
	let number = sym(b'-').opt() + integer + frac.opt() + exp.opt();
	number
		.collect()
		.convert(str::from_utf8)
		.convert(f64::from_str)
}

fn string<'a>() -> Parser<'a, u8, String> {
	let special_char = sym(b'\\')
		| sym(b'/')
		| sym(b'"')
		| sym(b'b').map(|_| b'\x08')
		| sym(b'f').map(|_| b'\x0C')
		| sym(b'n').map(|_| b'\n')
		| sym(b'r').map(|_| b'\r')
		| sym(b't').map(|_| b'\t');
	let escape_sequence = sym(b'\\') * special_char;
	let char_string = (none_of(b"\\\"") | escape_sequence)
		.repeat(1..)
		.convert(String::from_utf8);
	let utf16_char = seq(b"\\u")
		* is_a(hex_digit)
			.repeat(4)
			.convert(String::from_utf8)
			.convert(|digits| u16::from_str_radix(&digits, 16));
	let utf16_string = utf16_char.repeat(1..).map(|chars| {
		decode_utf16(chars)
			.map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
			.collect::<String>()
	});
	let string = sym(b'"') * (char_string | utf16_string).repeat(0..) - sym(b'"');
	string.map(|strings| strings.concat())
}

fn array<'a>() -> Parser<'a, u8, Vec<JsonValue>> {
	let elems = list(call(value), sym(b',') * space());
	sym(b'[') * space() * elems - sym(b']')
}

fn object<'a>() -> Parser<'a, u8, HashMap<String, JsonValue>> {
	let member = string() - space() - sym(b':') - space() + call(value);
	let members = list(member, sym(b',') * space());
	let obj = sym(b'{') * space() * members - sym(b'}');
	obj.map(|members| members.into_iter().collect::<HashMap<_, _>>())
}

fn value<'a>() -> Parser<'a, u8, JsonValue> {
	(seq(b"null").map(|_| JsonValue::Null)
		| seq(b"true").map(|_| JsonValue::Bool(true))
		| seq(b"false").map(|_| JsonValue::Bool(false))
		| number().map(|num| JsonValue::Num(num))
		| string().map(|text| JsonValue::Str(text))
		| array().map(|arr| JsonValue::Array(arr))
		| object().map(|obj| JsonValue::Object(obj)))
		- space()
}

pub fn json<'a>() -> Parser<'a, u8, JsonValue> {
	space() * value() - end()
}

#[allow(dead_code)]
fn main() {
	let mut file = File::open("examples/test.json").unwrap();
	let mut input: Vec<u8> = Vec::new();
	file.read_to_end(&mut input).expect("read test.json");
	println!("{:?}", json().parse(input.as_slice()));
}


================================================
FILE: examples/simple.rs
================================================
use pom::parser::*;

fn main() {
	let input = b"abcde";
	let parser = sym(b'a') * none_of(b"AB") - sym(b'c') + seq(b"de");
	let output = parser.parse(input);
	// assert_eq!(output, Ok( (b'b', &b"de"[..]) ) );
	println!("{:?}", output);
}


================================================
FILE: examples/test.json
================================================
{
	"Image": {
		"Width":  800,
		"Height": 600,
		"Title":  "View from 15th Floor",
		"Thumbnail": {
			"Url":    "http://www.example.com/image/481989943",
			"Height": 125,
			"Width":  100
		},
		"Animated" : false,
		"IDs": [116, 943, 234, 38793]
	},
	"escaped characters": "\u2192\uD83D\uDE00\"\t\uD834\uDD1E"
}


================================================
FILE: examples/utf8.rs
================================================
// Example shows basic UTF-8 combinators

use pom::utf8::*;

fn main() {
	// Informal, Spanish-language movie database format
	let input = "\
Título: Abre los ojos
Año: 1997
Director: Alejandro Amenábar

Título: Amores Perros
Director: Alejandro González Iñárritu
Año: 2000

Título: La montaña sagrada
Año: 1973
Director: Alejandro Jodorowsky
";

	enum DataLine<'a> {
		Title(&'a str),
		Director(&'a str),
		Year(i32),
	}

	fn positive<'a>() -> Parser<'a, i32> {
		//		let integer = (one_of("123456789") - one_of("0123456789").repeat(0..)) | sym(b'0'); // TODO
		let digit = one_of("0123456789");
		let integer = digit.discard().repeat(1..);
		integer.collect().convert(|x| x.parse::<i32>())
	}

	fn rest_str<'a>() -> Parser<'a, &'a str> {
		any().repeat(1..).collect()
	}

	fn separator<'a>() -> Parser<'a, ()> {
		seq(": ").discard()
	}

	let parser = (seq("Título") * separator() * rest_str().map(|s| DataLine::Title(s)))
		| (seq("Director") * separator() * rest_str().map(|s| DataLine::Director(s)))
		| (seq("Año") * separator() * positive().map(|i| DataLine::Year(i)));

	{
		let mut title_opt: Option<&str> = None;
		let mut year_opt: Option<i32> = None;
		let mut director_opt: Option<&str> = None;

		for line in input.lines() {
			if !line.is_empty() {
				// Skip blank lines without parsing
				// Parse line
				match parser.parse_str(line).unwrap() {
					DataLine::Title(s) => title_opt = Some(s),
					DataLine::Director(s) => director_opt = Some(s),
					DataLine::Year(s) => year_opt = Some(s),
				}
				// When all three line types have been collected, print them
				if let (Some(title), Some(year), Some(director)) =
					(title_opt, year_opt, director_opt)
				{
					println!("Title: {}\nDirector: {}\nYear: {}\n", title, director, year);
					(title_opt, year_opt, director_opt) = (None, None, None);
				}
			}
		}
	}
}


================================================
FILE: examples/utf8_mixed.rs
================================================
// Example shows UTF-8 combinators intermixed with binary combinators

use pom::parser::*;
use pom::utf8;

fn main() {
	// A parser for MsgPack (but only messages encoding a string)
	let testcases: [Vec<u8>; 6] = [
		vec![0b10100100, 0b11110000, 0b10011111, 0b10100100, 0b10010100], // 🤔, max-size 31 format
		vec![0xd9, 4, 0b11110000, 0b10011111, 0b10011000, 0b10101110],    // 😮, max-size 255 format
		vec![0xda, 0, 4, 0b11110000, 0b10011111, 0b10100100, 0b10101111], // 🤯, max-size 2^16-1 format
		vec![
			0xdb, 0, 0, 0, 4, 0b11110000, 0b10011111, 0b10010010, 0b10100101,
		], // 💥, max-size 2^32-1 format
		vec![0xc4, 4, 0b11110000, 0b10011111, 0b10011000, 0b10101110], // Valid MsgPack, but not a string (binary)
		vec![0b10100100, 0b10010100, 0b10100100, 0b10011111, 0b11110000], // A MsgPack string, but invalid UTF-8
	];

	const MASK: u8 = 0b11100000; // size 31 format is denoted by 3 high bits == 101
	const SIZE_31: u8 = 0b10100000;

	fn rest_as_str<'a>() -> utf8::Parser<'a, &'a str> {
		utf8::any().repeat(0..).collect()
	}

	// Demo parser does not verify that the claimed length matches the actual length (but checking so is simple with >>)
	let parser = (sym(0xdb) * any().repeat(4) * rest_as_str()) // 2^32-1 format
		| (sym(0xda) * any().repeat(2) * rest_as_str()) // 2^16-1 format
		| (sym(0xd9) * any()           * rest_as_str()) // 255 format
		| (is_a(|x| x&MASK == SIZE_31) * rest_as_str()) // 31 format
		- end();

	for testcase in testcases.iter() {
		println!("{:?}", parser.parse(testcase));
	}
}


================================================
FILE: examples/whitespace.rs
================================================
use pom::parser::*;

#[derive(Clone, Debug, PartialEq)]
struct Container {
	containers: Vec<Container>,
	contents: Vec<String>,
}

enum TmpContainerOrContent {
	Container(Container),
	Content(String),
}

fn whitespace<'a>() -> Parser<'a, u8, ()> {
	one_of(b" \t\r\n").repeat(0..).discard()
}

fn linebreak<'a>() -> Parser<'a, u8, ()> {
	sym(b'\r').opt() * sym(b'\n').discard()
}

fn indented<'a>() -> Parser<'a, u8, Vec<u8>> {
	sym(b'\t') * none_of(b"\n\r").repeat(1..) - linebreak()
}

fn empty<'a>() -> Parser<'a, u8, ()> {
	one_of(b" \t").repeat(0..).discard() - linebreak()
}

fn content<'a>() -> Parser<'a, u8, String> {
	none_of(b" \t\r\n").repeat(1..).convert(String::from_utf8) - linebreak()
}

fn subcontainer<'a>() -> Parser<'a, u8, (Vec<Container>, Vec<String>)> {
	(call(container).map(|ctr| TmpContainerOrContent::Container(ctr))
		| content().map(|ctn| TmpContainerOrContent::Content(ctn)))
	.repeat(1..)
	.map(|tmp| {
		tmp.into_iter().fold((vec![], vec![]), |acc, x| match x {
			TmpContainerOrContent::Container(ct) => (
				acc.0.into_iter().chain(vec![ct].into_iter()).collect(),
				acc.1,
			),
			TmpContainerOrContent::Content(cn) => (
				acc.0,
				acc.1.into_iter().chain(vec![cn].into_iter()).collect(),
			),
		})
	})
}

fn container<'a>() -> Parser<'a, u8, Container> {
	seq(b"Container\n")
		* (indented() | empty().map(|()| vec![]))
			.repeat(1..)
			.map(|lines| {
				lines
					.into_iter()
					.filter(|line| line.len() > 0)
					.fold(vec![], |accum, line| {
						accum
							.into_iter()
							.chain(line.into_iter().chain(vec![b'\n'].into_iter()))
							.collect()
					})
			})
			.map(|deden| subcontainer().parse(&deden).expect("subcont"))
			.map(|(containers, contents)| Container {
				containers,
				contents,
			})
}

fn mylang<'a>() -> Parser<'a, u8, Vec<Container>> {
	whitespace() * list(call(container), whitespace())
}

fn main() -> Result<(), ()> {
	let input = br#"
Container
	Container
		a
		b
		c

	1
	2
	3

	Container
		q

Container
	foo
	bar

	Container
		baz
		quux
		"#;

	assert_eq!(
		mylang().parse(input),
		Ok(vec![
			Container {
				containers: vec![
					Container {
						containers: vec![],
						contents: vec!["a".into(), "b".into(), "c".into(),]
					},
					Container {
						containers: vec![],
						contents: vec!["q".into(),]
					}
				],
				contents: vec!["1".into(), "2".into(), "3".into(),]
			},
			Container {
				containers: vec![Container {
					contents: vec!["baz".into(), "quux".into(),],
					containers: vec![],
				},],
				contents: vec!["foo".into(), "bar".into(),]
			},
		])
	);

	Ok(())
}


================================================
FILE: rustfmt.toml
================================================
format_strings = false
reorder_imports = true
hard_tabs = true


================================================
FILE: src/char_class.rs
================================================
/// Recognises an alphabetic character, `a-zA-Z`.
#[inline]
pub fn alpha(term: u8) -> bool {
	term.is_ascii_alphabetic()
}

/// Recognises an alphabetic character, `A-Z`.
#[inline]
pub fn alpha_uppercase(term: u8) -> bool {
	term.is_ascii_uppercase()
}

/// Recognises an alphabetic character, `a-z`.
#[inline]
pub fn alpha_lowercase(term: u8) -> bool {
	term.is_ascii_lowercase()
}

/// Recognises a decimal digit, `0-9`.
#[inline]
pub fn digit(term: u8) -> bool {
	term.is_ascii_digit()
}

/// Recognises an alphanumeric character, `a-zA-Z0-9`.
#[inline]
pub fn alphanum(term: u8) -> bool {
	term.is_ascii_alphanumeric()
}

/// Recognises a hexadecimal digit, `0-9a-fA-F`.
#[inline]
pub fn hex_digit(term: u8) -> bool {
	matches!(term, 0x30..=0x39 | 0x41..=0x46 | 0x61..=0x66)
}

/// Recognises an octal digit, `0-7`.
#[inline]
pub fn oct_digit(term: u8) -> bool {
	matches!(term, 0x30..=0x37)
}

/// Recognises a space or tab.
#[inline]
pub fn space(term: u8) -> bool {
	matches!(term, b' ' | b'\t')
}

/// Recognises a space, tab, line feed, or carriage return.
#[inline]
pub fn multispace(term: u8) -> bool {
	space(term) || matches!(term, b'\n' | b'\r')
}

#[cfg(test)]
mod test {
	use super::*;

	#[test]
	fn is_an_alpha() {
		assert!(alpha(b'A'));
		assert!(alpha(b'Z'));
		assert!(alpha(b'a'));
		assert!(alpha(b'z'));
	}

	#[test]
	fn is_an_alpha_uppercase() {
		assert!(alpha_uppercase(b'A'));
		assert!(alpha_uppercase(b'Z'));
		assert!(!alpha_uppercase(b'a'));
		assert!(!alpha_uppercase(b'z'));
	}

	#[test]
	fn is_an_alpha_lowercase() {
		assert!(!alpha_lowercase(b'A'));
		assert!(!alpha_lowercase(b'Z'));
		assert!(alpha_lowercase(b'a'));
		assert!(alpha_lowercase(b'z'));
	}

	#[test]
	fn is_a_digit() {
		assert!(digit(b'0'));
		assert!(digit(b'9'));
		assert!(!digit(b'A'));
	}

	#[test]
	fn is_an_alphanum() {
		assert!(alphanum(b'A'));
		assert!(alphanum(b'Z'));
		assert!(alphanum(b'a'));
		assert!(alphanum(b'z'));
		assert!(alphanum(b'0'));
		assert!(alphanum(b'9'));
		assert!(!alphanum(b'#'));
	}

	#[test]
	fn is_a_hex_digit() {
		assert!(hex_digit(b'0'));
		assert!(hex_digit(b'9'));
		assert!(hex_digit(b'A'));
		assert!(hex_digit(b'F'));
		assert!(hex_digit(b'a'));
		assert!(hex_digit(b'f'));
		assert!(!hex_digit(b'G'));
	}

	#[test]
	fn is_a_oct_digit() {
		assert!(oct_digit(b'0'));
		assert!(oct_digit(b'7'));
		assert!(!oct_digit(b'8'));
		assert!(!oct_digit(b'9'));
	}

	#[test]
	fn is_space() {
		assert!(space(b' '));
		assert!(space(b'\t'));
		assert!(!space(b'\n'));
		assert!(!space(b'A'));
	}

	#[test]
	fn is_multispace() {
		assert!(multispace(b' '));
		assert!(multispace(b'\t'));
		assert!(multispace(b'\n'));
		assert!(!multispace(b'A'));
	}
}

================================================
FILE: src/lib.rs
================================================
pub(crate) mod range;
mod result;
pub(crate) mod set;

/// Contains predefined parsers and combinators.
pub mod parser;

/// Utility functions to recognize char class of byte value.
pub mod char_class;

/// Variants of parser functions specialized for matching UTF-8 strings and returning chars.
/// Method and constructor names/functionality are generally the same as in base parser module.
#[cfg(feature = "utf8")]
pub mod utf8;

pub use crate::result::{Error, Result};

/// Parser type, `Parser<I, O>` is alias of `parser::Parser<'static, I, O>`.
pub type Parser<I, O> = parser::Parser<'static, I, O>;


================================================
FILE: src/parser.rs
================================================
use super::{Error, Result};
use crate::{range::RangeArgument, set::Set};
use std::{
	fmt::{Debug, Display},
	ops::Bound::{Excluded, Included, Unbounded},
	ops::{Add, BitOr, Mul, Neg, Not, Shr, Sub},
};

type Parse<'a, I, O> = dyn Fn(&'a [I], usize) -> Result<(O, usize)> + 'a;

/// Parser combinator.
pub struct Parser<'a, I, O> {
	pub method: Box<Parse<'a, I, O>>,
}

impl<'a, I, O> Parser<'a, I, O> {
	/// Create new parser.
	pub fn new<P>(parse: P) -> Self
	where
		P: Fn(&'a [I], usize) -> Result<(O, usize)> + 'a,
	{
		Self {
			method: Box::new(parse),
		}
	}

	/// Apply the parser to parse input.
	pub fn parse(&self, input: &'a [I]) -> Result<O> {
		(self.method)(input, 0).map(|(out, _)| out)
	}

	/// Parse input at specified position.
	pub fn parse_at(&self, input: &'a [I], start: usize) -> Result<(O, usize)> {
		(self.method)(input, start)
	}

	/// Convert parser result to desired value.
	pub fn map<U, F>(self, f: F) -> Parser<'a, I, U>
	where
		F: Fn(O) -> U + 'a,
		I: 'a,
		O: 'a,
		U: 'a,
	{
		Parser::new(move |input: &'a [I], start: usize| {
			(self.method)(input, start).map(|(out, pos)| (f(out), pos))
		})
	}

	/// Convert parser result to desired value, fail in case of conversion error.
	pub fn convert<U, E, F>(self, f: F) -> Parser<'a, I, U>
	where
		F: Fn(O) -> ::std::result::Result<U, E> + 'a,
		E: Debug,
		O: 'a,
		U: 'a,
	{
		Parser::new(move |input: &'a [I], start: usize| {
			(self.method)(input, start).and_then(|(res, pos)| match f(res) {
				Ok(out) => Ok((out, pos)),
				Err(err) => Err(Error::Conversion {
					message: format!("Conversion error: {:?}", err),
					position: start,
				}),
			})
		})
	}

	/// Cache parser output result to speed up backtracking.
	pub fn cache(self) -> Self
	where
		O: Clone + 'a,
	{
		use std::{cell::RefCell, collections::HashMap};
		let results = RefCell::new(HashMap::new());
		Self::new(move |input: &'a [I], start: usize| {
			let key = (start, format!("{:p}", &self.method));
			results
				.borrow_mut()
				.entry(key)
				.or_insert_with(|| (self.method)(input, start))
				.clone()
		})
	}

	/// Get input position after matching parser.
	pub fn pos(self) -> Parser<'a, I, usize>
	where
		O: 'a,
	{
		Parser::new(move |input: &'a [I], start: usize| {
			(self.method)(input, start).map(|(_, pos)| (pos, pos))
		})
	}

	/// Collect all matched input symbols.
	pub fn collect(self) -> Parser<'a, I, &'a [I]>
	where
		O: 'a,
	{
		Parser::new(move |input: &'a [I], start: usize| {
			(self.method)(input, start).map(|(_, end)| (&input[start..end], end))
		})
	}

	/// Discard parser output.
	pub fn discard(self) -> Parser<'a, I, ()>
	where
		O: 'a,
	{
		Parser::new(move |input: &'a [I], start: usize| {
			(self.method)(input, start).map(|(_, end)| ((), end))
		})
	}

	/// Make parser optional.
	pub fn opt(self) -> Parser<'a, I, Option<O>>
	where
		O: 'a,
	{
		Parser::new(
			move |input: &'a [I], start: usize| match (self.method)(input, start) {
				Ok((out, pos)) => Ok((Some(out), pos)),
				Err(_) => Ok((None, start)),
			},
		)
	}

	/// `p.repeat(5)` repeat p exactly 5 times
	/// `p.repeat(0..)` repeat p zero or more times
	/// `p.repeat(1..)` repeat p one or more times
	/// `p.repeat(1..4)` match p at least 1 and at most 3 times
	pub fn repeat<R>(self, range: R) -> Parser<'a, I, Vec<O>>
	where
		R: RangeArgument<usize> + Debug + 'a,
		O: 'a,
	{
		Parser::new(move |input: &'a [I], start: usize| {
			let mut items = vec![];
			let mut pos = start;
			loop {
				match range.end() {
					Included(&max_count) => {
						if items.len() >= max_count {
							break;
						}
					}
					Excluded(&max_count) => {
						if items.len() + 1 >= max_count {
							break;
						}
					}
					Unbounded => (),
				}

				let Ok((item, item_pos)) = (self.method)(input, pos) else {
					break;
				};
				items.push(item);
				pos = item_pos;
			}
			if let Included(&min_count) = range.start() {
				if items.len() < min_count {
					return Err(Error::Mismatch {
						message: format!(
							"expect repeat at least {} times, found {} times",
							min_count,
							items.len()
						),
						position: start,
					});
				}
			}
			Ok((items, pos))
		})
	}

	#[cfg(not(feature = "trace"))]
	/// Give parser a name to identify parsing errors.
	pub fn name(self, name: &'a str) -> Self
	where
		O: 'a,
	{
		Parser::new(
			move |input: &'a [I], start: usize| match (self.method)(input, start) {
				res @ Ok(_) => res,
				Err(err) => match err {
					Error::Custom { .. } => Err(err),
					_ => Err(Error::Custom {
						message: format!("failed to parse {}", name),
						position: start,
						inner: Some(Box::new(err)),
					}),
				},
			},
		)
	}

	#[cfg(feature = "trace")]
	/// Trace parser calls and results. Similar to name
	pub fn name(self, name: &'a str) -> Self
	where
		O: 'a,
	{
		Parser::new(move |input: &'a [I], start: usize| {
			eprintln!("parse: {} ({})", name, start);
			match (self.method)(input, start) {
				res @ Ok(_) => {
					eprintln!("       {} ({}): ok", name, start);
					res
				}
				Err(err) => {
					eprintln!("       {} ({}): error", name, start);
					match err {
						Error::Custom { .. } => Err(err),
						_ => Err(Error::Custom {
							message: format!("failed to parse {}", name),
							position: start,
							inner: Some(Box::new(err)),
						}),
					}
				}
			}
		})
	}

	/// Mark parser as expected, abort early when failed in ordered choice.
	pub fn expect(self, name: &'a str) -> Self
	where
		O: 'a,
	{
		Parser::new(
			move |input: &'a [I], start: usize| match (self.method)(input, start) {
				res @ Ok(_) => res,
				Err(err) => Err(Error::Expect {
					message: format!("Expect {}", name),
					position: start,
					inner: Box::new(err),
				}),
			},
		)
	}
}

/// Always succeeds, consume no input.
pub fn empty<'a, I>() -> Parser<'a, I, ()> {
	Parser::new(|_: &[I], start: usize| Ok(((), start)))
}

/// Match any symbol.
pub fn any<'a, I>() -> Parser<'a, I, I>
where
	I: Clone,
{
	Parser::new(|input: &[I], start: usize| {
		let Some(s) = input.get(start) else {
			return Err(Error::Mismatch {
				message: "end of input reached".to_owned(),
				position: start,
			});
		};
		Ok((s.clone(), start + 1))
	})
}

/// Success when current input symbol equals `t`.
pub fn sym<'a, I>(t: I) -> Parser<'a, I, I>
where
	I: Clone + PartialEq + Display,
{
	Parser::new(move |input: &'a [I], start: usize| {
		let Some(s) = input.get(start) else {
			return Err(Error::Incomplete);
		};
		if t != *s {
			return Err(Error::Mismatch {
				message: format!("expect: {}, found: {}", t, s),
				position: start,
			});
		}
		Ok((s.clone(), start + 1))
	})
}

/// Success when sequence of symbols matches current input.
pub fn seq<'a, 'b: 'a, I>(tag: &'b [I]) -> Parser<'a, I, &'a [I]>
where
	I: PartialEq + Debug,
{
	Parser::new(move |input: &'a [I], start: usize| {
		let mut index = 0;
		loop {
			let pos = start + index;
			if index == tag.len() {
				return Ok((tag, pos));
			}
			let Some(s) = input.get(pos) else {
				return Err(Error::Incomplete);
			};
			if tag[index] != *s {
				return Err(Error::Mismatch {
					message: format!("seq {:?} expect: {:?}, found: {:?}", tag, tag[index], s),
					position: pos,
				});
			}
			index += 1;
		}
	})
}

/// Success when tag matches current input.
pub fn tag<'a, 'b: 'a>(tag: &'b str) -> Parser<'a, char, &'a str> {
	Parser::new(move |input: &'a [char], start: usize| {
		let mut pos = start;
		for c in tag.chars() {
			let Some(&s) = input.get(pos) else {
				return Err(Error::Incomplete);
			};
			if c != s {
				return Err(Error::Mismatch {
					message: format!("tag {:?} expect: {:?}, found: {}", tag, c, s),
					position: pos,
				});
			}
			pos += 1;
		}
		Ok((tag, pos))
	})
}

/// Parse separated list.
pub fn list<'a, I, O, U>(
	parser: Parser<'a, I, O>,
	separator: Parser<'a, I, U>,
) -> Parser<'a, I, Vec<O>>
where
	O: 'a,
	U: 'a,
{
	Parser::new(move |input: &'a [I], start: usize| {
		let mut items = vec![];
		let mut pos = start;
		if let Ok((first_item, first_pos)) = (parser.method)(input, pos) {
			items.push(first_item);
			pos = first_pos;
			while let Ok((_, sep_pos)) = (separator.method)(input, pos) {
				match (parser.method)(input, sep_pos) {
					Ok((more_item, more_pos)) => {
						items.push(more_item);
						pos = more_pos;
					}
					Err(_) => break,
				}
			}
		}
		Ok((items, pos))
	})
}

/// Success when current input symbol is one of the set.
pub fn one_of<'a, I, S>(set: &'a S) -> Parser<'a, I, I>
where
	I: Clone + PartialEq + Display + Debug,
	S: Set<I> + ?Sized,
{
	Parser::new(move |input: &'a [I], start: usize| {
		let Some(s) = input.get(start) else {
			return Err(Error::Incomplete);
		};
		if !set.contains(s) {
			return Err(Error::Mismatch {
				message: format!("expect one of: {}, found: {}", set.to_str(), s),
				position: start,
			});
		};
		Ok((s.clone(), start + 1))
	})
}

/// Success when current input symbol is none of the set.
pub fn none_of<'a, I, S>(set: &'static S) -> Parser<'a, I, I>
where
	I: Clone + PartialEq + Display + Debug,
	S: Set<I> + ?Sized,
{
	Parser::new(move |input: &'a [I], start: usize| {
		let Some(s) = input.get(start) else {
			return Err(Error::Incomplete);
		};
		if set.contains(s) {
			return Err(Error::Mismatch {
				message: format!("expect none of: {}, found: {}", set.to_str(), s),
				position: start,
			});
		}
		Ok((s.clone(), start + 1))
	})
}

/// Success when predicate returns true on current input symbol.
pub fn is_a<'a, I, F>(predicate: F) -> Parser<'a, I, I>
where
	I: Clone + PartialEq + Display + Debug,
	F: Fn(I) -> bool + 'a,
{
	Parser::new(move |input: &'a [I], start: usize| {
		let Some(s) = input.get(start) else {
			return Err(Error::Incomplete);
		};
		if !predicate(s.clone()) {
			return Err(Error::Mismatch {
				message: format!("is_a predicate failed on: {}", s),
				position: start,
			});
		}
		Ok((s.clone(), start + 1))
	})
}

/// Success when predicate returns false on current input symbol.
pub fn not_a<'a, I, F>(predicate: F) -> Parser<'a, I, I>
where
	I: Clone + PartialEq + Display + Debug,
	F: Fn(I) -> bool + 'a,
{
	Parser::new(move |input: &'a [I], start: usize| {
		let Some(s) = input.get(start) else {
			return Err(Error::Incomplete);
		};
		if predicate(s.clone()) {
			return Err(Error::Mismatch {
				message: format!("not_a predicate failed on: {}", s),
				position: start,
			});
		}
		Ok((s.clone(), start + 1))
	})
}

/// Read n symbols.
pub fn take<'a, I>(n: usize) -> Parser<'a, I, &'a [I]> {
	Parser::new(move |input: &'a [I], start: usize| {
		let pos = start + n;
		if input.len() < pos {
			return Err(Error::Incomplete);
		}
		Ok((&input[start..pos], pos))
	})
}

/// Skip n symbols.
pub fn skip<'a, I>(n: usize) -> Parser<'a, I, ()> {
	Parser::new(move |input: &'a [I], start: usize| {
		let pos = start + n;
		if input.len() < pos {
			return Err(Error::Incomplete);
		}
		Ok(((), pos))
	})
}

/// Call a parser factory, can be used to create recursive parsers.
pub fn call<'a, I, O, F>(parser_factory: F) -> Parser<'a, I, O>
where
	O: 'a,
	F: Fn() -> Parser<'a, I, O> + 'a,
{
	Parser::new(move |input: &'a [I], start: usize| {
		let parser = parser_factory();
		(parser.method)(input, start)
	})
}

/// Success when end of input is reached.
pub fn end<'a, I>() -> Parser<'a, I, ()>
where
	I: Display,
{
	Parser::new(|input: &'a [I], start: usize| {
		if let Some(s) = input.get(start) {
			return Err(Error::Mismatch {
				message: format!("expect end of input, found: {}", s),
				position: start,
			});
		}
		Ok(((), start))
	})
}

/// Sequence reserve value
impl<'a, I, O: 'a, U: 'a> Add<Parser<'a, I, U>> for Parser<'a, I, O> {
	type Output = Parser<'a, I, (O, U)>;

	fn add(self, other: Parser<'a, I, U>) -> Self::Output {
		Parser::new(move |input: &'a [I], start: usize| {
			(self.method)(input, start).and_then(|(out1, pos1)| {
				(other.method)(input, pos1).map(|(out2, pos2)| ((out1, out2), pos2))
			})
		})
	}
}

/// Sequence discard second value
impl<'a, I, O: 'a, U: 'a> Sub<Parser<'a, I, U>> for Parser<'a, I, O> {
	type Output = Parser<'a, I, O>;

	fn sub(self, other: Parser<'a, I, U>) -> Self::Output {
		Parser::new(move |input: &'a [I], start: usize| {
			(self.method)(input, start)
				.and_then(|(out1, pos1)| (other.method)(input, pos1).map(|(_, pos2)| (out1, pos2)))
		})
	}
}

/// Sequence discard first value
impl<'a, I: 'a, O: 'a, U: 'a> Mul<Parser<'a, I, U>> for Parser<'a, I, O> {
	type Output = Parser<'a, I, U>;

	fn mul(self, other: Parser<'a, I, U>) -> Self::Output {
		Parser::new(move |input: &'a [I], start: usize| {
			(self.method)(input, start).and_then(|(_, pos1)| (other.method)(input, pos1))
		})
	}
}

/// Chain two parsers where the second parser depends on the first's result.
impl<'a, I, O: 'a, U: 'a, F: Fn(O) -> Parser<'a, I, U> + 'a> Shr<F> for Parser<'a, I, O> {
	type Output = Parser<'a, I, U>;

	fn shr(self, other: F) -> Self::Output {
		Parser::new(move |input: &'a [I], start: usize| {
			(self.method)(input, start).and_then(|(out, pos)| (other(out).method)(input, pos))
		})
	}
}

/// Ordered choice
impl<'a, I, O: 'a> BitOr for Parser<'a, I, O> {
	type Output = Parser<'a, I, O>;

	fn bitor(self, other: Parser<'a, I, O>) -> Self::Output {
		Parser::new(
			move |input: &'a [I], start: usize| match (self.method)(input, start) {
				Ok(out) => Ok(out),
				Err(err) => match err {
					Error::Expect { .. } => Err(err),
					_ => (other.method)(input, start),
				},
			},
		)
	}
}

/// And predicate
impl<'a, I, O: 'a> Neg for Parser<'a, I, O> {
	type Output = Parser<'a, I, bool>;

	fn neg(self) -> Self::Output {
		Parser::new(move |input: &'a [I], start: usize| {
			(self.method)(input, start).map(|_| (true, start))
		})
	}
}

/// Not predicate
impl<'a, I, O: 'a> Not for Parser<'a, I, O> {
	type Output = Parser<'a, I, bool>;

	fn not(self) -> Self::Output {
		Parser::new(
			move |input: &'a [I], start: usize| match (self.method)(input, start) {
				Ok(_) => Err(Error::Mismatch {
					message: "not predicate failed".to_string(),
					position: start,
				}),
				Err(_) => Ok((true, start)),
			},
		)
	}
}

#[cfg(test)]
mod tests {
	use crate::parser::*;
	use crate::Error;

	#[test]
	fn byte_works() {
		let input = b"abcde";
		let parser = sym(b'a') + one_of(b"ab") - sym(b'C');
		let output = parser.parse(input);
		assert_eq!(
			output,
			Err(Error::Mismatch {
				message: "expect: 67, found: 99".to_string(),
				position: 2
			})
		);

		let parser = sym(b'a') * none_of(b"AB") - sym(b'c') + seq(b"de");
		let output = parser.parse(input);
		assert_eq!(output, Ok((b'b', &b"de"[..])));
		assert_eq!(parser.pos().parse(input), Ok(5));

		let parser = sym(b'e') | sym(b'd').expect("d") | empty().map(|_| b'0');
		let output = parser.parse(input);
		assert_eq!(
			output,
			Err(Error::Expect {
				message: "Expect d".to_owned(),
				position: 0,
				inner: Box::new(Error::Mismatch {
					message: "expect: 100, found: 97".to_string(),
					position: 0
				})
			})
		);
	}

	#[test]
	fn char_works() {
		let input = "abcd".chars().collect::<Vec<char>>();
		let parser = tag("ab") + sym('c') | sym('d').map(|_| ("", '0'));
		let output = parser.parse(&input);
		assert_eq!(output, Ok(("ab", 'c')));
	}

	#[test]
	fn recursive_parser() {
		#[derive(Debug, PartialEq)]
		enum Expr {
			Empty,
			Group(Box<Expr>),
		}
		fn expr() -> Parser<'static, u8, Expr> {
			(sym(b'(') + call(expr) - sym(b')')).map(|(_, e)| Expr::Group(Box::new(e)))
				| empty().map(|_| Expr::Empty)
		}
		let input = b"(())";
		let parser = expr();
		let output = parser.parse(input);
		assert_eq!(
			output,
			Ok(Expr::Group(Box::new(Expr::Group(Box::new(Expr::Empty)))))
		);
	}

	#[test]
	fn chain_parser() {
		let input = b"5oooooooo";
		{
			let parser = one_of(b"0123456789").map(|c| c - b'0')
				>> |n| take(n as usize) + sym(b'o').repeat(0..);
			assert_eq!(parser.parse(input), Ok((&b"ooooo"[..], vec![b'o'; 3])));
		}
		{
			let parser =
				skip(1) * take(3) >> |v: &'static [u8]| take(v.len() + 2).map(move |u| (u, v));
			assert_eq!(parser.parse(input), Ok((&b"ooooo"[..], &b"ooo"[..])));
		}
		{
			let parser = Parser::new(move |input, start| {
				(skip(1) * take(3))
					.parse_at(input, start)
					.and_then(|(v, pos)| {
						take(v.len() + 2)
							.parse_at(input, pos)
							.map(|(u, end)| ((u, v), end))
					})
			});
			assert_eq!(parser.parse(input), Ok((&b"ooooo"[..], &b"ooo"[..])));
		}
	}

	#[test]
	fn repeat_at_least() {
		let input = b"xxxooo";

		{
			let parser = sym(b'x').repeat(1..2);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![b'x'; 1]))
		}

		{
			let parser = sym(b'x').repeat(1..);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![b'x'; 3]))
		}

		{
			let parser = sym(b'x').repeat(0..);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![b'x'; 3]))
		}

		{
			let parser = sym(b'y').repeat(0..);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![]))
		}

		{
			let parser = sym(b'y').repeat(1..);
			let output = parser.parse(input);
			assert!(output.is_err());
		}

		{
			let parser = sym(b'x').repeat(10..);
			let output = parser.parse(input);
			assert!(output.is_err());
		}
	}

	#[test]
	fn repeat_up_to() {
		let input = b"xxxooo";

		{
			let parser = sym(b'x').repeat(..2);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![b'x'; 1]))
		}

		{
			let parser = sym(b'x').repeat(..4);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![b'x'; 3]))
		}

		{
			let parser = sym(b'x').repeat(..);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![b'x'; 3]))
		}

		{
			let parser = sym(b'x').repeat(..0);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![]))
		}

		{
			let parser = sym(b'x').repeat(..10);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![b'x'; 3]))
		}
	}

	#[test]
	fn repeat_up_to_inclusive() {
		let input = b"xxxooo";

		{
			let parser = sym(b'x').repeat(..=2);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![b'x'; 2]))
		}

		{
			let parser = sym(b'x').repeat(..=4);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![b'x'; 3]))
		}

		{
			let parser = sym(b'x').repeat(..=0);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![]))
		}

		{
			let parser = sym(b'x').repeat(..=10);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![b'x'; 3]))
		}
	}

	#[test]
	fn repeat_from_to_inclusive() {
		let input = b"xxxooo";

		{
			let parser = sym(b'x').repeat(1..=2);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![b'x'; 2]))
		}

		{
			let parser = sym(b'x').repeat(1..=4);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![b'x'; 3]))
		}

		{
			let parser = sym(b'x').repeat(0..=0);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![]))
		}

		{
			let parser = sym(b'x').repeat(3..=10);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![b'x'; 3]))
		}

		{
			let parser = sym(b'x').repeat(4..=10);
			let output = parser.parse(input);
			assert!(output.is_err())
		}
	}

	#[test]
	fn repeat_exactly() {
		let input = b"xxxooo";

		{
			let parser = sym(b'x').repeat(0);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![]))
		}

		{
			let parser = sym(b'x').repeat(1);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![b'x'; 1]))
		}

		{
			let parser = sym(b'x').repeat(2);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![b'x'; 2]))
		}

		{
			let parser = sym(b'x').repeat(3);
			let output = parser.parse(input);
			assert_eq!(output, Ok(vec![b'x'; 3]))
		}

		{
			let parser = sym(b'x').repeat(4);
			let output = parser.parse(input);
			assert!(output.is_err())
		}
	}

	#[cfg(not(feature = "trace"))]
	#[test]
	fn named() {
		let input = b"xxxooo";

		{
			let parser = sym(b'x').repeat(3);
			let output = parser.name("name_test_ok").parse(input);
			assert_eq!(output, Ok(vec![b'x'; 3]))
		}

		{
			let parser = sym(b'x').repeat(4);
			let output = parser.name("name_test_err").parse(input);
			assert_eq!(
				output,
				Err(Error::Custom {
					message: "failed to parse name_test_err".into(),
					position: 0,
					inner: Some(Box::new(Error::Mismatch {
						message: "expect repeat at least 4 times, found 3 times".into(),
						position: 0
					}))
				})
			)
		}
	}

	#[cfg(feature = "trace")]
	#[test]
	// Note: this doesn't test the tracing per se, just that the `name()` method executes
	// in the same way when the feature is turned on.
	fn named() {
		let input = b"xxxooo";

		{
			let parser = sym(b'x').repeat(3);
			let output = parser.name("name_test_ok").parse(input);
			assert_eq!(output, Ok(vec![b'x'; 3]))
		}

		{
			let parser = sym(b'x').repeat(4);
			let output = parser.name("name_test_err").parse(input);
			assert_eq!(
				output,
				Err(Error::Custom {
					message: "failed to parse name_test_err".into(),
					position: 0,
					inner: Some(Box::new(Error::Mismatch {
						message: "expect repeat at least 4 times, found 3 times".into(),
						position: 0
					}))
				})
			)
		}
	}
}


================================================
FILE: src/range.rs
================================================
use std::ops::{Bound, RangeBounds, Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive};

pub trait RangeArgument<T> {
	fn start(&self) -> Bound<&usize>;
	fn end(&self) -> Bound<&usize>;
}

impl<T> RangeArgument<T> for Range<usize> {
	fn start(&self) -> Bound<&usize> {
        self.start_bound()
	}
	fn end(&self) -> Bound<&usize> {
        self.end_bound()
	}
}

impl<T> RangeArgument<T> for RangeFrom<usize> {
	fn start(&self) -> Bound<&usize> {
        self.start_bound()
	}
	fn end(&self) -> Bound<&usize> {
        self.end_bound()
	}
}

impl<T> RangeArgument<T> for RangeFull {
	fn start(&self) -> Bound<&usize> {
        self.start_bound()
	}
	fn end(&self) -> Bound<&usize> {
        self.end_bound()
	}
}

impl<T> RangeArgument<T> for RangeInclusive<usize> {
    fn start(&self) -> Bound<&usize> {
        self.start_bound()
    }
    fn end(&self) -> Bound<&usize> {
        self.end_bound()
    }
}

impl<T> RangeArgument<T> for RangeTo<usize> {
	fn start(&self) -> Bound<&usize> {
        self.start_bound()
	}
	fn end(&self) -> Bound<&usize> {
        self.end_bound()
	}
}

impl<T> RangeArgument<T> for RangeToInclusive<usize> {
	fn start(&self) -> Bound<&usize> {
        self.start_bound()
	}
	fn end(&self) -> Bound<&usize> {
        self.end_bound()
	}
}

impl RangeArgument<usize> for usize { 
    fn start(&self) -> Bound<&usize> {
        Bound::Included(self)
    }
    fn end(&self) -> Bound<&usize> {
        Bound::Included(self)
    }
}

#[cfg(test)]
mod test {
    use super::*;

    fn accept<T, R>(ra: R, expected: impl std::ops::RangeBounds<usize>)
    where
        R: RangeArgument<T>,
        T: std::fmt::Debug + std::cmp::PartialEq {
        assert_eq!(ra.start(), expected.start_bound());
        assert_eq!(ra.end(), expected.end_bound());
    }

    #[test]
    fn unbounded() {
        accept::<usize, _>(.., ..)
    }

    #[test]
    fn up_to_inclusive() {
        accept::<usize, _>(..=2, ..=2)
    }

    #[test]
    fn up_to_exclusive() {
        accept::<usize, _>(..2, ..2)
    }

    #[test]
    fn from() {
        accept::<usize, _>(1.., 1..)
    }

    #[test]
    fn from_to_inclusive() {
        accept::<usize, _>(1..=2, 1..=2)
    }

    #[test]
    fn from_to_exclusive() {
        accept::<usize, _>(1..3, 1..3)
    }

    #[test]
    fn exactly() {
        accept::<usize, _>(42, 42..=42)
    }
}


================================================
FILE: src/result.rs
================================================
use std::{
	error,
	fmt::{self, Display},
};

/// Parser error.
#[derive(Debug, PartialEq, Clone)]
pub enum Error {
	Incomplete,
	Mismatch {
		message: String,
		position: usize,
	},
	Conversion {
		message: String,
		position: usize,
	},
	Expect {
		message: String,
		position: usize,
		inner: Box<Error>,
	},
	Custom {
		message: String,
		position: usize,
		inner: Option<Box<Error>>,
	},
}

impl error::Error for Error {
	fn description(&self) -> &'static str {
		"Parse error"
	}
}

impl Display for Error {
	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
		match self {
			Self::Incomplete => write!(f, "Incomplete"),
			Self::Mismatch {
				ref message,
				ref position,
			} => write!(f, "Mismatch at {}: {}", position, message),
			Self::Conversion {
				ref message,
				ref position,
			} => write!(f, "Conversion failed at {}: {}", position, message),
			Self::Expect {
				ref message,
				ref position,
				ref inner,
			} => write!(f, "{} at {}: {}", message, position, inner),
			Self::Custom {
				ref message,
				ref position,
				inner: Some(ref inner),
			} => write!(f, "{} at {}, (inner: {})", message, position, inner),
			Self::Custom {
				ref message,
				ref position,
				inner: None,
			} => write!(f, "{} at {}", message, position),
		}
	}
}

/// Parser result, `Result<O>` ia alias of `Result<O, pom::Error>`.
pub type Result<O> = ::std::result::Result<O, Error>;


================================================
FILE: src/set.rs
================================================
use std::{
	cmp::{PartialEq, PartialOrd},
	ops::{Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive},
	str,
};

/// Set relationship.
pub trait Set<T> {
	/// Whether a set contains an element or not.
	fn contains(&self, elem: &T) -> bool;

	/// Convert to text for display.
	fn to_str(&self) -> &str {
		"<set>"
	}
}

impl<T: PartialEq> Set<T> for [T] {
	fn contains(&self, elem: &T) -> bool {
		(self as &[T]).contains(elem)
	}
}

impl Set<char> for str {
	fn contains(&self, elem: &char) -> bool {
		(self as &str).contains(*elem)
	}

	fn to_str(&self) -> &str {
		self
	}
}

impl<T: PartialOrd + Copy> Set<T> for Range<T> {
	fn contains(&self, elem: &T) -> bool {
		self.start <= *elem && self.end > *elem
	}
}

impl<T: PartialOrd + Copy> Set<T> for RangeFrom<T> {
	fn contains(&self, elem: &T) -> bool {
		self.start <= *elem
	}
}

impl<T: PartialOrd + Copy> Set<T> for RangeInclusive<T> {
	fn contains(&self, elem: &T) -> bool {
		self.start() <= elem && self.end() >= elem
	}
}

impl<T: PartialOrd + Copy> Set<T> for RangeTo<T> {
	fn contains(&self, elem: &T) -> bool {
		self.end > *elem
	}
}

impl<T: PartialOrd + Copy> Set<T> for RangeToInclusive<T> {
	fn contains(&self, elem: &T) -> bool {
		self.end >= *elem
	}
}

impl<T> Set<T> for RangeFull {
	fn contains(&self, _: &T) -> bool {
		true
	}

	fn to_str(&self) -> &str {
		".."
	}
}

impl<const N: usize> Set<u8> for [u8; N] {
	fn contains(&self, elem: &u8) -> bool {
		(self as &[u8]).contains(elem)
	}

	fn to_str(&self) -> &str {
		str::from_utf8(self).unwrap_or("<byte array>")
	}
}

#[cfg(test)]
mod test {
	use crate::parser::*;

	#[test]
	fn one_of_using_set() {
		assert!(one_of(b"az").parse(b"a").is_ok());
		assert!(one_of(b"az").parse(b"1").is_err());
	}

	#[test]
	fn one_of_using_range() {
		assert!(one_of(&(b'a'..b'z')).parse(b"a").is_ok());
		assert!(one_of(&(b'a'..b'z')).parse(b"z").is_err());
		assert!(one_of(&(b'a'..b'z')).parse(b"1").is_err());
	}

	#[test]
	fn one_of_using_range_to() {
		assert!(one_of(&(..b'z')).parse(b"a").is_ok());
		assert!(one_of(&(..b'z')).parse(b"z").is_err());
		assert!(one_of(&(..b'z')).parse(b"1").is_ok());
	}

	#[test]
	fn one_of_using_range_inclusive() {
		assert!(one_of(&(b'a'..=b'z')).parse(b"a").is_ok());
		assert!(one_of(&(b'a'..=b'z')).parse(b"z").is_ok());
		assert!(one_of(&(b'a'..=b'z')).parse(b"1").is_err());
	}

	#[test]
	fn one_of_using_range_to_inclusive() {
		assert!(one_of(&(..=b'z')).parse(b"a").is_ok());
		assert!(one_of(&(..=b'z')).parse(b"z").is_ok());
		assert!(one_of(&(..=b'z')).parse(b"1").is_ok());
	}

	#[test]
	fn one_of_using_full_range() {
		assert!(one_of(&(..)).parse(b"a").is_ok());
		assert!(one_of(&(..)).parse(b"z").is_ok());
		assert!(one_of(&(..)).parse(b"1").is_ok());
	}

}

================================================
FILE: src/utf8.rs
================================================
// Variants of parser functions specialized for matching UTF-8 strings and returning chars

use super::parser;
use super::{Error, Result};
use crate::range::RangeArgument;
use crate::set::Set;
use bstr::decode_utf8;
use std::fmt::Debug;
use std::ops::{Add, BitOr, Mul, Neg, Not, Shr, Sub};
use std::str;

// / Parser combinator.
//type Parse<'a, O> = dyn Fn(&'a [u8], usize) -> Result<(O, usize)> + 'a;

/// Being wrapped in this struct guarantees that the parser within will only match valid UTF-8 strings.
pub struct Parser<'a, O>(parser::Parser<'a, u8, O>);

impl<'a, O> Parser<'a, O> {
	/// Create new parser.
	pub fn new<P>(parse: P) -> Self
	where
		P: Fn(&'a [u8], usize) -> Result<(O, usize)> + 'a,
	{
		Self(parser::Parser::new(parse))
	}

	/// Collect all matched input symbols.
	// This method is the primary reason utf8::Parser exists at all.
	pub fn collect(self) -> Parser<'a, &'a str>
	where
		O: 'a,
	{
		Parser(self.0.collect().map(
			// UNSAFE: Because we only could have constructed this object from other utf8::Parser objects, the match space must be valid UTF-8
			|s| unsafe { str::from_utf8_unchecked(s) },
		))
	}

	// Remaining methods in impl only delegate to base parser::Parser

	/// Apply the parser to parse input.
	pub fn parse(&self, input: &'a [u8]) -> Result<O> {
		self.0.parse(input)
	}

	/// Parse input at specified byte position.
	pub fn parse_at(&self, input: &'a [u8], start: usize) -> Result<(O, usize)> {
		self.0.parse_at(input, start)
	}

	/// Apply the parser to parse input.
	pub fn parse_str(&self, input: &'a str) -> Result<O> {
		self.0.parse(input.as_bytes())
	}

	/// Convert parser result to desired value.
	pub fn map<U, F>(self, f: F) -> Parser<'a, U>
	where
		F: Fn(O) -> U + 'a,
		O: 'a,
		U: 'a,
	{
		Parser(self.0.map(f))
	}

	/// Convert parser result to desired value, fail in case of conversion error.
	pub fn convert<U, E, F>(self, f: F) -> Parser<'a, U>
	where
		F: Fn(O) -> ::std::result::Result<U, E> + 'a,
		E: Debug,
		O: 'a,
		U: 'a,
	{
		Parser(self.0.convert(f))
	}

	/// Cache parser output result to speed up backtracking.
	pub fn cache(self) -> Self
	where
		O: Clone + 'a,
	{
		Self(self.0.cache())
	}

	/// Get input position after matching parser.
	pub fn pos(self) -> Parser<'a, usize>
	where
		O: 'a,
	{
		Parser(self.0.pos())
	}

	/// Discard parser output.
	pub fn discard(self) -> Parser<'a, ()>
	where
		O: 'a,
	{
		Parser(self.0.discard())
	}

	/// Make parser optional.
	pub fn opt(self) -> Parser<'a, Option<O>>
	where
		O: 'a,
	{
		Parser(self.0.opt())
	}

	/// `p.repeat(5)` repeat p exactly 5 times
	/// `p.repeat(0..)` repeat p zero or more times
	/// `p.repeat(1..)` repeat p one or more times
	/// `p.repeat(1..4)` match p at least 1 and at most 3 times
	pub fn repeat<R>(self, range: R) -> Parser<'a, Vec<O>>
	where
		R: RangeArgument<usize> + Debug + 'a,
		O: 'a,
	{
		Parser(self.0.repeat(range))
	}

	/// Give parser a name to identify parsing errors.
	pub fn name(self, name: &'a str) -> Self
	where
		O: 'a,
	{
		Self(self.0.name(name))
	}

	/// Mark parser as expected, abort early when failed in ordered choice.
	pub fn expect(self, name: &'a str) -> Self
	where
		O: 'a,
	{
		Self(self.0.expect(name))
	}
}

impl<'a, O> From<Parser<'a, O>> for parser::Parser<'a, u8, O> {
	fn from(parser: Parser<'a, O>) -> Self {
		parser.0 // Simply unwrap
	}
}

pub fn decode(slice: &[u8], start: usize) -> Result<(char, usize)> {
	let (ch, size) = decode_utf8(&slice[start..]);
	let Some(ch) = ch else {
		return no_utf8(start, size);
	};
	Ok((ch, size))
}

// Helper for functions that decode_utf8 and fail
fn no_utf8<T>(start: usize, size: usize) -> Result<T> {
	Err(Error::Mismatch {
		message: if size == 0 {
			"end of input reached"
		} else {
			"not UTF-8"
		}
		.to_owned(),
		position: start,
	})
}

/// Match any UTF-8 character.
pub fn any<'a>() -> Parser<'a, char> {
	Parser::new(|input: &[u8], start: usize| {
		let (ch, size) = decode(input, start)?;
		let pos = start + size;
		Ok((ch, pos))
	})
}

/// Match specific UTF-8 character.
pub fn sym<'a>(tag: char) -> Parser<'a, char> {
	Parser::new(move |input: &[u8], start: usize| {
		let (ch, size) = decode(input, start)?;
		if ch != tag {
			return Err(Error::Mismatch {
				message: format!("expect: {}, found: {}", tag, ch),
				position: start,
			});
		}
		let pos = start + size;
		Ok((ch, pos))
	})
}

/// Success when sequence of chars matches current input.
pub fn seq<'a, 'b: 'a>(tag_str: &'b str) -> Parser<'a, &'a str> {
	let tag = tag_str.as_bytes();
	Parser::new(move |input: &'a [u8], start: usize| {
		let mut index = 0;
		loop {
			let pos = start + index;
			if index == tag.len() {
				let result = &input[start..pos];
				// UNSAFE: Because slice is byte-identical to a str, it is known valid UTF-8
				let result_str = unsafe { str::from_utf8_unchecked(result) };
				return Ok((result_str, pos));
			}
			let Some(s) = input.get(pos) else {
				return Err(Error::Incomplete);
			};
			if tag[index] != *s {
				return Err(Error::Mismatch {
					message: format!("seq {:?} at byte index: {}", tag, pos),
					position: pos,
				});
			}
			index += 1;
		}
	})
}

/// Success when current input symbol is one of the set.
pub fn one_of<'a, S>(set: &'a S) -> Parser<'a, char>
where
	S: Set<char> + ?Sized,
{
	Parser::new(move |input: &'a [u8], start: usize| {
		let (ch, size) = decode(input, start)?;
		if !set.contains(&ch) {
			return Err(Error::Mismatch {
				message: format!("expect one of: {}, found: {}", set.to_str(), ch),
				position: start,
			});
		}
		let pos = start + size;
		Ok((ch, pos))
	})
}

/// Success when current input symbol is none of the set.
pub fn none_of<'a, S>(set: &'a S) -> Parser<'a, char>
where
	S: Set<char> + ?Sized,
{
	Parser::new(move |input: &'a [u8], start: usize| {
		let (ch, size) = decode(input, start)?;
		if set.contains(&ch) {
			return Err(Error::Mismatch {
				message: format!("expect one of: {}, found: {}", set.to_str(), ch),
				position: start,
			});
		}
		let pos = start + size;
		Ok((ch, pos))
	})
}

/// Success when predicate returns true on current input symbol.
pub fn is_a<'a, F>(predicate: F) -> Parser<'a, char>
where
	F: Fn(char) -> bool + 'a,
{
	Parser::new(move |input: &'a [u8], start: usize| {
		let (ch, size) = decode(input, start)?;
		if !predicate(ch) {
			return Err(Error::Mismatch {
				message: format!("is_a predicate failed on: {}", ch),
				position: start,
			});
		}
		let pos = start + size;
		Ok((ch, pos))
	})
}

/// Success when predicate returns false on current input symbol.
pub fn not_a<'a, F>(predicate: F) -> Parser<'a, char>
where
	F: Fn(char) -> bool + 'a,
{
	Parser::new(move |input: &'a [u8], start: usize| {
		let (ch, size) = decode(input, start)?;
		if predicate(ch) {
			return Err(Error::Mismatch {
				message: format!("is_a predicate failed on: {}", ch),
				position: start,
			});
		}
		let pos = start + size;
		Ok((ch, pos))
	})
}

/// Read n chars.
pub fn take<'a>(n: usize) -> Parser<'a, &'a str> {
	Parser::new(move |input: &'a [u8], start: usize| {
		let mut byte_pos = start;
		for _ in 0..n {
			let (ch, size) = decode_utf8(&input[start..]);
			if ch.is_none() {
				return no_utf8(byte_pos, size);
			}
			byte_pos += size;
		}
		let result = &input[start..byte_pos];
		// UNSAFE: Because every char has been checked by decode_utf8, this string is known utf8
		let result_str = unsafe { str::from_utf8_unchecked(result) };
		Ok((result_str, byte_pos))
	})
}

/// Skip n symbols.
pub fn skip<'a>(n: usize) -> Parser<'a, ()> {
	Parser::new(move |input: &'a [u8], start: usize| {
		let mut byte_pos = start;
		for _ in 0..n {
			let (ch, size) = decode_utf8(&input[start..]);
			if ch.is_none() {
				return no_utf8(byte_pos, size);
			}
			byte_pos += size;
		}
		Ok(((), byte_pos))
	})
}

/// Read n bytes exactly.
pub fn take_bytes<'a>(n: usize) -> Parser<'a, &'a str> {
	Parser::new(move |input: &'a [u8], start: usize| {
		// FIXME: This runs in linear time because it checks each character.
		// If we could remember which inputs were passed in from parse_str() instead of parse(),
		// we could assume the characters are valid utf8 and run this in constant time by only checking
		// the final character using bstr::decode_last_utf8.
		let mut byte_pos = start;
		loop {
			let (ch, size) = decode_utf8(&input[start..]);
			if ch.is_none() {
				return no_utf8(byte_pos, size);
			}
			byte_pos += size;
			if byte_pos > n {
				return Err(Error::Mismatch {
					message: "range splits a UTF-8 character".to_owned(),
					position: start,
				});
			}
			if byte_pos == n {
				let result = &input[start..byte_pos];
				// UNSAFE: Because every char has been checked by decode_utf8, this string is known utf8
				let result_str = unsafe { str::from_utf8_unchecked(result) };
				return Ok((result_str, byte_pos));
			}
		}
	})
}

/// Skip n bytes exactly.
pub fn skip_bytes<'a>(n: usize) -> Parser<'a, ()> {
	Parser::new(move |input: &'a [u8], start: usize| {
		// FIXME: See note on take_bytes.
		let mut byte_pos = start;
		loop {
			let (ch, size) = decode_utf8(&input[start..]);
			if ch.is_none() {
				return no_utf8(byte_pos, size);
			}
			byte_pos += size;
			if byte_pos > n {
				return Err(Error::Mismatch {
					message: "range splits a UTF-8 character".to_owned(),
					position: start,
				});
			}
			if byte_pos == n {
				return Ok(((), byte_pos));
			}
		}
	})
}

/// Chain two parsers where the second parser depends on the first's result.
impl<'a, O: 'a, U: 'a, F: Fn(O) -> Parser<'a, U> + 'a> Shr<F> for Parser<'a, O> {
	type Output = Parser<'a, U>;

	fn shr(self, other: F) -> Self::Output {
		Parser::new(move |input: &'a [u8], start: usize| {
			(self.0.method)(input, start).and_then(|(out, pos)| (other(out).0.method)(input, pos))
		})
	}
}

// Note: There are no "degrade to parser::Parser" implementations for >>
// because Rust cannot tell the difference between an FN(O)->U and an FN(O)->V.

// Remaining functions in file only delegate to base parser::Parser

/// Always succeeds, consume no input.
pub fn empty<'a>() -> Parser<'a, ()> {
	Parser(parser::empty())
}

/// Parse separated list.
pub fn list<'a, O, U>(item: Parser<'a, O>, separator: Parser<'a, U>) -> Parser<'a, Vec<O>>
where
	O: 'a,
	U: 'a,
{
	Parser(parser::list(item.0, separator.0))
}

/// Call a parser factory, can be used to create recursive parsers.
pub fn call<'a, O, F>(parser_factory: F) -> Parser<'a, O>
where
	O: 'a,
	F: Fn() -> Parser<'a, O> + 'a,
{
	Parser(parser::call(move || parser_factory().0))
}

/// Success when end of input is reached.
pub fn end<'a>() -> Parser<'a, ()> {
	Parser(parser::end())
}

// And, Sub and Mul are similar enough we can implement them with macros

macro_rules! utf_op {
    ( $impl_name:ident, $fn_name:ident, $op:tt, $return_type:ty, $doc:expr ) => {
    	#[doc=$doc]
		impl<'a, Left: 'a, Right: 'a> $impl_name<Parser<'a, Right>> for Parser<'a, Left> {
			type Output = Parser<'a, $return_type>;

			fn $fn_name (self, other: Parser<'a, Right>) -> Self::Output {
				Parser(self.0 $op other.0)
			}
		}
    };
}

macro_rules! utf_u8_op {
    ( $impl_name:ident, $fn_name:ident, $op:tt, $return_type:ty, $doc:expr ) => {
    	#[doc=concat!($doc, " (but degrade to non-utf8 parser)")]
		impl<'a, Left: 'a, Right: 'a> $impl_name<parser::Parser<'a, u8, Right>> for Parser<'a, Left> {
			type Output = parser::Parser<'a, u8, $return_type>;

			fn $fn_name (self, other: parser::Parser<'a, u8, Right>) -> Self::Output {
				self.0 $op other
			}
		}
    };
}

macro_rules! u8_utf_op {
    ( $impl_name:ident, $fn_name:ident, $op:tt, $return_type:ty, $doc:expr ) => {
    	#[doc=concat!($doc, " (but degrade to non-utf8 parser)")]
		impl<'a, Left: 'a, Right: 'a> $impl_name<Parser<'a, Right>> for parser::Parser<'a, u8, Left> {
			type Output = parser::Parser<'a, u8, $return_type>;

			fn $fn_name (self, other: Parser<'a, Right>) -> Self::Output {
				self $op other.0
			}
		}
    };
}

macro_rules! all_op {
	( $impl_name:ident, $fn_name:ident, $op:tt, $return_type:ty, $doc:expr ) => {
		utf_op!($impl_name, $fn_name, $op, $return_type, $doc);
		utf_u8_op!($impl_name, $fn_name, $op, $return_type, $doc);
		u8_utf_op!($impl_name, $fn_name, $op, $return_type, $doc);
	};
}

all_op!(Add, add, +, (Left, Right), "Sequence reserve value");

all_op!(Sub, sub, -, Left, "Sequence discard second value");

all_op!(Mul, mul, *, Right, "Sequence discard first value");

/// Ordered choice
impl<'a, O: 'a> BitOr for Parser<'a, O> {
	type Output = Self;

	fn bitor(self, other: Self) -> Self {
		Self(self.0 | other.0)
	}
}

/// Ordered choice (but degrade to non-utf8 parser)
impl<'a, O: 'a> BitOr<parser::Parser<'a, u8, O>> for Parser<'a, O> {
	type Output = parser::Parser<'a, u8, O>;

	fn bitor(self, other: parser::Parser<'a, u8, O>) -> Self::Output {
		self.0 | other
	}
}

/// Ordered choice (but degrade to non-utf8 parser)
impl<'a, O: 'a> BitOr<Parser<'a, O>> for parser::Parser<'a, u8, O> {
	type Output = parser::Parser<'a, u8, O>;

	fn bitor(self, other: Parser<'a, O>) -> Self::Output {
		self | other.0
	}
}

/// And predicate
impl<'a, O: 'a> Neg for Parser<'a, O> {
	type Output = Parser<'a, bool>;

	fn neg(self) -> Self::Output {
		Parser(-self.0)
	}
}

/// Not predicate
impl<'a, O: 'a> Not for Parser<'a, O> {
	type Output = Parser<'a, bool>;

	fn not(self) -> Self::Output {
		Parser(!self.0)
	}
}


================================================
FILE: tests/list.rs
================================================
extern crate pom;

use pom::parser::*;
use pom::Parser;

fn spaces() -> Parser<u8, ()> {
	one_of(b" ").repeat(1..).discard()
}

fn works() -> Parser<u8, Vec<u8>> {
	list(one_of(b"abc"), spaces() * seq(b"and") - spaces())
}

fn dangle() -> Parser<u8, (Vec<u8>, &'static [u8])> {
	list(one_of(b"abc"), spaces() * seq(b"and") - spaces()) + seq(b" and")
}

#[test]
fn test_list() {
	let one = b"a and b and c";
	assert_eq!(works().parse(one), Ok(vec![b'a', b'b', b'c']));

	let two = b"a and b and c and ";
	assert_eq!(
		dangle().parse(two),
		Ok((vec![b'a', b'b', b'c'], &b" and"[..]))
	);
}