Repository: TheLartians/PEGParser Branch: master Commit: 99ea2268fa5b Files: 39 Total size: 100.7 KB Directory structure: gitextract_iw089xqt/ ├── .clang-format ├── .cmake-format ├── .github/ │ └── workflows/ │ ├── examples.yml │ ├── install.yml │ ├── macos.yml │ ├── style.yml │ ├── ubuntu.yml │ └── windows.yml ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── cmake/ │ ├── CPM.cmake │ └── tools.cmake ├── codecov.yaml ├── example/ │ ├── CMakeLists.txt │ ├── calculator.cpp │ ├── calculator_sequental.cpp │ ├── calculator_visitor.cpp │ ├── json_parser.cpp │ ├── python_indentation.cpp │ └── type_checker.cpp ├── glue/ │ ├── CMakeLists.txt │ ├── include/ │ │ └── peg_parser/ │ │ └── glue.h │ └── source/ │ └── glue.cpp ├── include/ │ └── peg_parser/ │ ├── generator.h │ ├── grammar.h │ ├── interpreter.h │ ├── parser.h │ └── presets.h ├── source/ │ ├── grammar.cpp │ ├── interpreter.cpp │ ├── parser.cpp │ └── presets.cpp └── test/ ├── CMakeLists.txt └── source/ ├── example.cpp ├── glue.cpp ├── main.cpp └── parser.cpp ================================================ FILE CONTENTS ================================================ ================================================ FILE: .clang-format ================================================ --- BasedOnStyle: Google AccessModifierOffset: '-2' AlignTrailingComments: 'true' AllowAllParametersOfDeclarationOnNextLine: 'false' AlwaysBreakTemplateDeclarations: 'No' BreakBeforeBraces: Attach ColumnLimit: '100' ConstructorInitializerAllOnOneLineOrOnePerLine: 'true' IncludeBlocks: Regroup IndentPPDirectives: AfterHash IndentWidth: '2' NamespaceIndentation: All BreakBeforeBinaryOperators: All BreakBeforeTernaryOperators: 'true' ... ================================================ FILE: .cmake-format ================================================ format: tab_size: 2 line_width: 100 dangle_parens: true parse: additional_commands: cpmaddpackage: pargs: nargs: '*' flags: [] spelling: CPMAddPackage kwargs: &cpmaddpackagekwargs NAME: 1 FORCE: 1 VERSION: 1 GIT_TAG: 1 DOWNLOAD_ONLY: 1 GITHUB_REPOSITORY: 1 GITLAB_REPOSITORY: 1 GIT_REPOSITORY: 1 SVN_REPOSITORY: 1 SVN_REVISION: 1 SOURCE_DIR: 1 DOWNLOAD_COMMAND: 1 FIND_PACKAGE_ARGUMENTS: 1 NO_CACHE: 1 GIT_SHALLOW: 1 URL: 1 URL_HASH: 1 URL_MD5: 1 DOWNLOAD_NAME: 1 DOWNLOAD_NO_EXTRACT: 1 HTTP_USERNAME: 1 HTTP_PASSWORD: 1 OPTIONS: + cpmfindpackage: pargs: nargs: '*' flags: [] spelling: CPMFindPackage kwargs: *cpmaddpackagekwargs packageproject: pargs: nargs: '*' flags: [] spelling: packageProject kwargs: NAME: 1 VERSION: 1 NAMESPACE: 1 INCLUDE_DIR: 1 INCLUDE_DESTINATION: 1 BINARY_DIR: 1 COMPATIBILITY: 1 VERSION_HEADER: 1 DEPENDENCIES: + ================================================ FILE: .github/workflows/examples.yml ================================================ name: Examples on: push: branches: - master pull_request: branches: - master env: CTEST_OUTPUT_ON_FAILURE: 1 CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} CPM_SOURCE_CACHE: ${{ github.workspace }}/cpm_modules jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: "**/cpm_modules" key: ${{ github.workflow }}-cpm-modules-${{ hashFiles('**/CMakeLists.txt', '**/*.cmake') }} - name: configure run: cmake -Sexample -Bbuild - name: build run: cmake --build build -j4 ================================================ FILE: .github/workflows/install.yml ================================================ name: Install on: push: branches: - master pull_request: branches: - master env: CTEST_OUTPUT_ON_FAILURE: 1 CPM_SOURCE_CACHE: ${{ github.workspace }}/cpm_modules jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: "**/cpm_modules" key: ${{ github.workflow }}-cpm-modules-${{ hashFiles('**/CMakeLists.txt', '**/*.cmake') }} - name: build and install library run: | CXX=g++-9 cmake -Sglue -Bbuild -DCMAKE_BUILD_TYPE=Release sudo cmake --build build --target install rm -rf build - name: configure run: CXX=g++-9 cmake -Stest -Bbuild -DTEST_INSTALLED_VERSION=1 - name: build run: cmake --build build --config Debug -j4 - name: test run: | cd build ctest --build-config Debug ================================================ FILE: .github/workflows/macos.yml ================================================ name: MacOS on: push: branches: - master pull_request: branches: - master env: CTEST_OUTPUT_ON_FAILURE: 1 CPM_SOURCE_CACHE: ${{ github.workspace }}/cpm_modules jobs: build: runs-on: macos-latest steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: "**/cpm_modules" key: ${{ github.workflow }}-cpm-modules-${{ hashFiles('**/CMakeLists.txt', '**/*.cmake') }} - name: configure run: cmake -Stest -Bbuild - name: build run: cmake --build build --config Debug -j4 - name: test run: | cd build ctest --build-config Debug ================================================ FILE: .github/workflows/style.yml ================================================ name: Style on: push: branches: - master pull_request: branches: - master env: CPM_SOURCE_CACHE: ${{ github.workspace }}/cpm_modules jobs: build: runs-on: macos-latest steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: "**/cpm_modules" key: ${{ github.workflow }}-cpm-modules-${{ hashFiles('**/CMakeLists.txt', '**/*.cmake') }} - name: Install format dependencies run: | brew install clang-format pip3 install cmake_format==0.6.11 pyyaml - name: configure run: cmake -Stest -Bbuild - name: check style run: cmake --build build --target check-format ================================================ FILE: .github/workflows/ubuntu.yml ================================================ name: Ubuntu on: push: branches: - master pull_request: branches: - master env: CTEST_OUTPUT_ON_FAILURE: 1 CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} CPM_SOURCE_CACHE: ${{ github.workspace }}/cpm_modules jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: "**/cpm_modules" key: ${{ github.workflow }}-cpm-modules-${{ hashFiles('**/CMakeLists.txt', '**/*.cmake') }} - name: install valgrind run: | sudo apt-get update sudo apt install -y valgrind - name: configure run: CXX=g++-8 cmake -Stest -Bbuild -DCMAKE_BUILD_TYPE=Debug - name: build run: cmake --build build -j4 - name: test run: cmake --build build -j4 - name: run tests with valgrind run: valgrind --track-origins=yes --error-exitcode=1 --leak-check=full ./build/PEGParserTests - name: configure with code coverage run: CXX=g++-8 cmake -Stest -Bbuild -DENABLE_TEST_COVERAGE=1 - name: build with code coverage run: cmake --build build -j4 - name: test with code coverage run: | cd build ctest --build-config Debug - name: install code coverage tools run: | wget https://github.com/linux-test-project/lcov/releases/download/v1.14/lcov-1.14.tar.gz tar xvfz lcov-1.14.tar.gz; sudo make install -C lcov-1.14 - name: collect code coverage run: | lcov --gcov-tool $(which gcov-8) --directory . --capture --no-external --exclude "*tests*" --exclude "*_deps*" --quiet --output-file coverage.info lcov --gcov-tool $(which gcov-8) --list coverage.info bash <(curl -s https://codecov.io/bash) -f coverage.info || echo "Codecov did not collect coverage reports" ================================================ FILE: .github/workflows/windows.yml ================================================ name: Windows on: push: branches: - master pull_request: branches: - master env: CTEST_OUTPUT_ON_FAILURE: 1 CPM_SOURCE_CACHE: ${{ github.workspace }}/cpm_modules jobs: build: runs-on: windows-latest steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: "**/cpm_modules" key: ${{ github.workflow }}-cpm-modules-${{ hashFiles('**/CMakeLists.txt', '**/*.cmake') }} - name: configure run: cmake -Stest -Bbuild - name: build run: cmake --build build --config Debug -j4 - name: test run: | cd build ctest --build-config Debug ================================================ FILE: .gitignore ================================================ build* .vscode ================================================ FILE: CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.14 FATAL_ERROR) # ---- Project ---- project( PEGParser VERSION 2.1 LANGUAGES CXX ) # ---- Include guards ---- if(PROJECT_SOURCE_DIR STREQUAL PROJECT_BINARY_DIR) message( FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there." ) endif() # ---- Add dependencies via CPM ---- include(cmake/CPM.cmake) # PackageProject.cmake will be used to make our target installable CPMAddPackage( NAME PackageProject.cmake GITHUB_REPOSITORY TheLartians/PackageProject.cmake VERSION 1.4 ) CPMAddPackage( NAME EasyIterator GITHUB_REPOSITORY TheLartians/EasyIterator VERSION 1.4 ) # ---- Add source files ---- file(GLOB_RECURSE headers CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/include/*.h") file(GLOB_RECURSE sources CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/source/*.cpp") # ---- Create library ---- add_library(PEGParser ${headers} ${sources}) set_target_properties(PEGParser PROPERTIES CXX_STANDARD 17) target_compile_options(PEGParser PUBLIC "$<$:/permissive->") target_link_libraries(PEGParser PRIVATE EasyIterator) target_include_directories( PEGParser PUBLIC $ $ ) # ---- Create an installable target ---- # this allows users to install and find the library via `find_package()`. packageProject( NAME ${PROJECT_NAME} NAMESPACE ${PROJECT_NAME} VERSION ${PROJECT_VERSION} BINARY_DIR ${PROJECT_BINARY_DIR} INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include INCLUDE_DESTINATION include/${PROJECT_NAME}-${PROJECT_VERSION} DEPENDENCIES EasyIterator ) ================================================ FILE: LICENSE ================================================ BSD 3-Clause License Copyright (c) 2018, Lars Melchior All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================ [![Actions Status](https://github.com/TheLartians/PEGParser/workflows/MacOS/badge.svg)](https://github.com/TheLartians/PEGParser/actions) [![Actions Status](https://github.com/TheLartians/PEGParser/workflows/Windows/badge.svg)](https://github.com/TheLartians/PEGParser/actions) [![Actions Status](https://github.com/TheLartians/PEGParser/workflows/Ubuntu/badge.svg)](https://github.com/TheLartians/PEGParser/actions) [![Actions Status](https://github.com/TheLartians/PEGParser/workflows/Style/badge.svg)](https://github.com/TheLartians/PEGParser/actions) [![codecov](https://codecov.io/gh/TheLartians/PEGParser/branch/master/graph/badge.svg)](https://codecov.io/gh/TheLartians/PEGParser) # PEGParser A linear-time C++17 PEG parser generator supporting memoization, left-recursion and context-dependent grammars. ## Example The following defines a simple calculator program. It is able to parse and evaluate the basic operations `+`, `-`, `*`, `/` while obeying operator and bracket precedence and ignoring whitespace characters between tokens. ```c++ #include #include void example() { peg_parser::ParserGenerator g; // Define grammar and evaluation rules g.setSeparator(g["Whitespace"] << "[\t ]"); g["Sum" ] << "Add | Subtract | Product"; g["Product" ] << "Multiply | Divide | Atomic"; g["Atomic" ] << "Number | '(' Sum ')'"; g["Add" ] << "Sum '+' Product" >> [](auto e){ return e[0].evaluate() + e[1].evaluate(); }; g["Subtract"] << "Sum '-' Product" >> [](auto e){ return e[0].evaluate() - e[1].evaluate(); }; g["Multiply"] << "Product '*' Atomic" >> [](auto e){ return e[0].evaluate() * e[1].evaluate(); }; g["Divide" ] << "Product '/' Atomic" >> [](auto e){ return e[0].evaluate() / e[1].evaluate(); }; g["Number" ] << "'-'? [0-9]+ ('.' [0-9]+)?" >> [](auto e){ return stof(e.string()); }; g.setStart(g["Sum"]); // Execute a string auto input = "1 + 2 * (3+4)/2 - 3"; float result = g.run(input); // -> 5 std::cout << input << " = " << result << std::endl; } ``` ## Quickstart PEGParser requires at least cmake 3.14 and the ability to compile C++17 code. The following shows how to compile and run the calculator example. ```bash git clone https://github.com/TheLartians/PegParser cd PegParser cmake -Sexample -Bbuild/example cmake --build build/example -j8 ./build/example/calculator ``` You should familiarize yourself with the syntax of [parsing expression grammars](http://en.wikipedia.org/wiki/Parsing_expression_grammar). The included [examples](example) should help you to get started. ## Installation and usage PEGParser can be easily added to your project through [CPM.cmake](https://github.com/TheLartians/CPM.cmake). ```cmake CPMAddPackage( NAME PEGParser VERSION 2.1.1 GITHUB_REPOSITORY TheLartians/PEGParser ) target_link_libraries(myProject PEGParser::PEGParser) ``` ## Project goals PEGParser is designed for ease-of-use and rapid prototyping of grammars with arbitrary complexity, and builds its parsers at run time. So far no work has been invested on optimizing the library, however it runs fast enough to be used in several production projects. ## Time complexity PEGParser uses memoization, resulting in linear time complexity (as a function of input string length) for grammars without left-recursion. Left-recursive grammars have squared time complexity in worst case. Memoization can also be disabled on a per-rule basis, reducing the memory footprint and allowing context-dependent rules. ================================================ FILE: cmake/CPM.cmake ================================================ set(CPM_DOWNLOAD_VERSION 0.28.0) if(CPM_SOURCE_CACHE) set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") elseif(DEFINED ENV{CPM_SOURCE_CACHE}) set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") else() set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake") endif() if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION})) message(STATUS "Downloading CPM.cmake to ${CPM_DOWNLOAD_LOCATION}") file(DOWNLOAD https://github.com/TheLartians/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake ${CPM_DOWNLOAD_LOCATION} ) endif() include(${CPM_DOWNLOAD_LOCATION}) ================================================ FILE: cmake/tools.cmake ================================================ # this file contains a list of tools that can be activated and downloaded on-demand each tool is # enabled during configuration by passing an additional `-DUSE_=` argument to CMake # only activate tools for top level project if(NOT PROJECT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) return() endif() include(${CMAKE_CURRENT_LIST_DIR}/CPM.cmake) # enables sanitizers support using the the `USE_SANITIZER` flag available values are: Address, # Memory, MemoryWithOrigins, Undefined, Thread, Leak, 'Address;Undefined' if(USE_SANITIZER OR USE_STATIC_ANALYZER) CPMAddPackage( NAME StableCoder-cmake-scripts GITHUB_REPOSITORY StableCoder/cmake-scripts GIT_TAG 3d2d5a9fb26f0ce24e3e4eaeeff686ec2ecfb3fb ) if(USE_SANITIZER) include(${StableCoder-cmake-scripts_SOURCE_DIR}/sanitizers.cmake) endif() if(USE_STATIC_ANALYZER) if("clang-tidy" IN_LIST USE_STATIC_ANALYZER) set(CLANG_TIDY ON CACHE INTERNAL "" ) else() set(CLANG_TIDY OFF CACHE INTERNAL "" ) endif() if("iwyu" IN_LIST USE_STATIC_ANALYZER) set(IWYU ON CACHE INTERNAL "" ) else() set(IWYU OFF CACHE INTERNAL "" ) endif() if("cppcheck" IN_LIST USE_STATIC_ANALYZER) set(CPPCHECK ON CACHE INTERNAL "" ) else() set(CPPCHECK OFF CACHE INTERNAL "" ) endif() include(${StableCoder-cmake-scripts_SOURCE_DIR}/tools.cmake) clang_tidy(${CLANG_TIDY_ARGS}) include_what_you_use(${IWYU_ARGS}) cppcheck(${CPPCHECK_ARGS}) endif() endif() # enables CCACHE support through the USE_CCACHE flag possible values are: YES, NO or equivalent if(USE_CCACHE) CPMAddPackage( NAME Ccache.cmake GITHUB_REPOSITORY TheLartians/Ccache.cmake VERSION 1.2.1 ) endif() ================================================ FILE: codecov.yaml ================================================ ignore: - "test" comment: require_changes: true ================================================ FILE: example/CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.5 FATAL_ERROR) # ---- Project ---- project(PEGParserExamples CXX) # --- Import tools ---- include(../cmake/tools.cmake) # ---- Add dependencies ---- include(../cmake/CPM.cmake) CPMAddPackage(NAME PEGParser SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/..) # ---- Create binaries ---- file(GLOB example_sources ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) foreach(example_source_file ${example_sources}) get_filename_component(filename ${example_source_file} NAME) string(REPLACE ".cpp" "" example_name ${filename}) add_executable(${example_name} ${example_source_file}) set_target_properties(${example_name} PROPERTIES CXX_STANDARD 17) target_link_libraries(${example_name} PEGParser::PEGParser) endforeach() ================================================ FILE: example/calculator.cpp ================================================ /** * This example demonstrate how we can use peg_parser::parser to define a * simple command-line calculator. The parser supports the basic operators `+`, * `-`, `*`, `/`, `^` as well as using and assigning variables via `=`. * * Note, that The grammar is defined in a left-recursive way. While this is * easiest to implement, it recomended to rewrite left-recursive grammars * sequentially for optimal performance. */ #include #include #include #include int main() { using namespace std; using VariableMap = unordered_map; peg_parser::ParserGenerator calculator; auto &g = calculator; g.setSeparator(g["Whitespace"] << "[\t ]"); g["Expression"] << "Set | Sum"; g["Set"] << "Name '=' Sum" >> [](auto e, auto &v) { return v[e[0].string()] = e[1].evaluate(v); }; g["Sum"] << "Add | Subtract | Product"; g["Product"] << "Multiply | Divide | Exponent"; g["Exponent"] << "Power | Atomic"; g["Atomic"] << "Number | Brackets | Variable"; g["Brackets"] << "'(' Sum ')'"; g["Add"] << "Sum '+' Product" >> [](auto e, auto &v) { return e[0].evaluate(v) + e[1].evaluate(v); }; g["Subtract"] << "Sum '-' Product" >> [](auto e, auto &v) { return e[0].evaluate(v) - e[1].evaluate(v); }; g["Multiply"] << "Product '*' Exponent" >> [](auto e, auto &v) { return e[0].evaluate(v) * e[1].evaluate(v); }; g["Divide"] << "Product '/' Exponent" >> [](auto e, auto &v) { return e[0].evaluate(v) / e[1].evaluate(v); }; g["Power"] << "Atomic ('^' Exponent)" >> [](auto e, auto &v) { return pow(e[0].evaluate(v), e[1].evaluate(v)); }; g["Variable"] << "Name" >> [](auto e, auto &v) { return v[e[0].string()]; }; g["Name"] << "[a-zA-Z] [a-zA-Z0-9]*"; g["Number"] << "'-'? [0-9]+ ('.' [0-9]+)?" >> [](auto e, auto &) { return stod(e.string()); }; g.setStart(g["Expression"]); cout << "Enter an expression to be evaluated.\n"; VariableMap variables; while (true) { string str; cout << "> "; getline(cin, str); if (str == "q" || str == "quit") { break; } try { auto result = calculator.run(str, variables); cout << str << " = " << result << endl; } catch (peg_parser::SyntaxError &error) { auto syntax = error.syntax; cout << " "; cout << string(syntax->begin, ' '); cout << string(syntax->length(), '~'); cout << "^\n"; cout << " " << "Syntax error while parsing " << syntax->rule->name << endl; } } return 0; } ================================================ FILE: example/calculator_sequental.cpp ================================================ /** * A command-line calculator that uses a sequental grammar instead of * left-recursion. */ #include #include #include #include #include int main() { using namespace std; using VariableMap = unordered_map; peg_parser::ParserGenerator calculator; auto &g = calculator; g.setSeparator(g["Whitespace"] << "[\t ]"); g["Expression"] << "Assign | Sum"; g["Assign"] << "Name '=' Sum" >> [](auto e, auto &v) { return v[e[0].string()] = e[1].evaluate(v); }; g["Sum"] << "Product Summand*" >> [=](auto e, auto &v) { return std::accumulate(e.begin(), e.end(), 0, [&](auto a, auto b) { return a + b.evaluate(v); }); }; g["PositiveSummand"] << "'+' Product" >> [=](auto e, auto &v) { return e[0].evaluate(v); }; g["NegativeSummand"] << "'-' Product" >> [=](auto e, auto &v) { return -e[0].evaluate(v); }; g["Summand"] << "PositiveSummand | NegativeSummand"; g["Product"] << "Power Term*" >> [](auto e, auto &v) { return std::accumulate(e.begin(), e.end(), 1, [&](auto a, auto b) { return a * b.evaluate(v); }); }; g["NormalTerm"] << "'*' Power" >> [=](auto e, auto &v) { return e[0].evaluate(v); }; g["InverseTerm"] << "'/' Power" >> [=](auto e, auto &v) { return 1 / e[0].evaluate(v); }; g["Term"] << "NormalTerm | InverseTerm"; g["Power"] << "Atomic ('^' Power) | Atomic" >> [](auto e, auto &v) { return e.size() == 2 ? pow(e[0].evaluate(v), e[1].evaluate(v)) : e[0].evaluate(v); }; g["Atomic"] << "Number | Brackets | Variable"; g["Brackets"] << "'(' Sum ')'"; g["Variable"] << "Name" >> [](auto e, auto &v) { return v[e[0].string()]; }; g["Name"] << "[a-zA-Z] [a-zA-Z0-9]*"; // We can also use other programs as rules g.setProgramRule("Number", peg_parser::presets::createFloatProgram()); g.setStart(g["Expression"]); cout << "Enter an expression to be evaluated.\n"; VariableMap variables; while (true) { string str; cout << "> "; getline(cin, str); if (str == "q" || str == "quit") { break; } try { auto result = calculator.run(str, variables); cout << str << " = " << result << endl; } catch (peg_parser::SyntaxError &error) { auto syntax = error.syntax; cout << " "; cout << string(syntax->begin, ' '); cout << string(syntax->length(), '~'); cout << "^\n"; cout << " " << "Syntax error while parsing " << syntax->rule->name << endl; } } return 0; } ================================================ FILE: example/calculator_visitor.cpp ================================================ /** * This example demonstrate how we can use peg_parser::parser to define a * command-line calculator and use a visitor pattern to evaluate the result. */ #include #include #include #include int main() { using namespace std; struct Visitor; using Expression = peg_parser::Interpreter::Expression; struct Visitor { float result; unordered_map variables; float getValue(Expression e) { e.evaluate(*this); return result; } void visitAddition(Expression l, Expression r) { result = getValue(l) + getValue(r); } void visitSubtraction(Expression l, Expression r) { result = getValue(l) - getValue(r); } void visitMultiplication(Expression l, Expression r) { result = getValue(l) * getValue(r); } void visitDivision(Expression l, Expression r) { result = getValue(l) / getValue(r); } void visitPower(Expression l, Expression r) { result = pow(getValue(l), getValue(r)); } void visitVariable(Expression name) { result = variables[name.string()]; } void visitAssignment(Expression name, Expression value) { result = (variables[name.string()] = getValue(value)); } void visitNumber(Expression value) { result = stod(value.string()); } }; peg_parser::ParserGenerator calculator; auto &g = calculator; g.setSeparator(g["Whitespace"] << "[\t ]"); g["Expression"] << "Assign | Sum"; g["Assign"] << "Name '=' Sum" >> [](auto e, auto &v) { v.visitAssignment(e[0], e[1]); }; g["Sum"] << "Add | Subtract | Product"; g["Product"] << "Multiply | Divide | Exponent"; g["Exponent"] << "Power | Atomic"; g["Atomic"] << "Number | Brackets | Variable"; g["Brackets"] << "'(' Sum ')'"; g["Add"] << "Sum '+' Product" >> [](auto e, auto &v) { v.visitAddition(e[0], e[1]); }; g["Subtract"] << "Sum '-' Product" >> [](auto e, auto &v) { v.visitSubtraction(e[0], e[1]); }; g["Multiply"] << "Product '*' Exponent" >> [](auto e, auto &v) { v.visitMultiplication(e[0], e[1]); }; g["Divide"] << "Product '/' Exponent" >> [](auto e, auto &v) { v.visitDivision(e[0], e[1]); }; g["Power"] << "Atomic ('^' Exponent)" >> [](auto e, auto &v) { v.visitPower(e[0], e[1]); }; g["Variable"] << "Name" >> [](auto e, auto &v) { v.visitVariable(e); }; g["Name"] << "[a-zA-Z] [a-zA-Z0-9]*"; g["Number"] << "'-'? [0-9]+ ('.' [0-9]+)?" >> [](auto e, auto &v) { v.visitNumber(e); }; g.setStart(g["Expression"]); cout << "Enter an expression to be evaluated.\n"; while (true) { string str; cout << "> "; getline(cin, str); if (str == "q" || str == "quit") { break; } try { Visitor visitor; calculator.run(str, visitor); cout << str << " = " << visitor.result << endl; } catch (peg_parser::SyntaxError &error) { auto syntax = error.syntax; cout << " "; cout << string(syntax->begin, ' '); cout << string(syntax->length(), '~'); cout << "^\n"; cout << " " << "Syntax error while parsing " << syntax->rule->name << endl; } } return 0; } ================================================ FILE: example/json_parser.cpp ================================================ /** * This example demonstrate how we can use peg_parser::parser parse standard * JSON. https://en.wikipedia.org/wiki/JSON#Data_types_and_syntax */ #include #include #include #include #include #include #include #include /** Class to store JSON objects */ struct JSON { enum Type { NUMBER, STRING, BOOLEAN, ARRAY, OBJECT, EMPTY } type; std::variant, std::map> data; explicit JSON(double v) : type(NUMBER), data(v) {} explicit JSON(std::string &&v) : type(STRING), data(v) {} explicit JSON(bool v) : type(BOOLEAN), data(v) {} explicit JSON(std::vector &&v) : type(ARRAY), data(v) {} explicit JSON(std::map &&v) : type(OBJECT), data(v) {} explicit JSON() : type(EMPTY) {} }; /** Print JSON */ std::ostream &operator<<(std::ostream &stream, const JSON &json) { switch (json.type) { case JSON::NUMBER: { stream << std::get(json.data); break; } case JSON::STRING: { stream << '"' << std::get(json.data) << '"'; break; } case JSON::BOOLEAN: { stream << (std::get(json.data) ? "true" : "false"); break; } case JSON::ARRAY: { stream << '['; for (auto v : std::get>(json.data)) stream << v << ','; stream << ']'; break; } case JSON::OBJECT: { stream << '{'; for (auto v : std::get>(json.data)) { stream << '"' << v.first << '"' << ':' << v.second << ','; } stream << '}'; break; } case JSON::EMPTY: { stream << "null"; break; } } return stream; } /** Define the grammar */ peg_parser::ParserGenerator createJSONProgram() { peg_parser::ParserGenerator g; g.setSeparator(g["Separators"] << "[\t \n]"); g["JSON"] << "Number | String | Boolean | Array | Object | Empty"; // Number g.setProgramRule("Number", peg_parser::presets::createDoubleProgram(), [](auto e) { return JSON(e.evaluate()); }); // String g.setProgramRule("String", peg_parser::presets::createStringProgram("\"", "\""), [](auto e) { return JSON(e.evaluate()); }); // Boolean g["Boolean"] << "True | False"; g["True"] << "'true'" >> [](auto) { return JSON(true); }; g["False"] << "'false'" >> [](auto) { return JSON(false); }; // Array g["Array"] << "'[' (JSON (',' JSON)*)? ']'" >> [](auto e) { std::vector data(e.size()); std::transform(e.begin(), e.end(), data.begin(), [](auto v) { return v.evaluate(); }); return JSON(std::move(data)); }; // Object g["Object"] << "'{' (Pair (',' Pair)*)? '}'" >> [](auto e) { std::map data; for (auto p : e) { data[std::get(p[0].evaluate().data)] = p[1].evaluate(); } return JSON(std::move(data)); }; g["Pair"] << "String ':' JSON"; // Empty g["Empty"] << "'null'" >> [](auto) { return JSON(); }; g.setStart(g["JSON"]); return g; } /** Input */ int main() { using namespace std; auto json = createJSONProgram(); cout << "Enter a valid JSON expression.\n"; while (true) { string str; cout << "> "; getline(cin, str); if (str == "q" || str == "quit") { break; } try { auto result = json.run(str); cout << "Parsed JSON: " << result << endl; } catch (peg_parser::SyntaxError &error) { auto syntax = error.syntax; cout << " "; cout << string(syntax->begin, ' '); cout << string(syntax->length(), '~'); cout << "^\n"; cout << " " << "Syntax error while parsing " << syntax->rule->name << endl; } } return 0; } ================================================ FILE: example/python_indentation.cpp ================================================ /** * This is a proof-of-concept example that parses python-like indentation * blocks. */ #include #include #include #include #include int main() { using namespace std; struct Block { size_t begin, length; }; using Blocks = vector; peg_parser::ParserGenerator blockParser; blockParser["Indentation"] << "' '*"; /** storage for indentation depths */ std::vector indentations; /** initializer is necessarry to reset the state after syntax errors */ blockParser["InitBlocks"] << "''" << [&](auto &) -> bool { indentations.resize(0); return true; }; /** * matches the current block intendation * note that this rule is not cacheable as results are context-dependent */ blockParser["SameIndentation"] << "Indentation" << [&](auto &s) -> bool { return s->length() == indentations.back(); }; blockParser["SameIndentation"]->cacheable = false; /** matches a deeper block intendation */ blockParser["DeeperIndentation"] << "Indentation" << [&](auto &s) -> bool { return s->length() > indentations.back(); }; blockParser["DeeperIndentation"]->cacheable = false; // enters a new block and stores the indentation blockParser["EnterBlock"] << "Indentation" << [&](auto &s) -> bool { if (indentations.size() == 0 || s->length() > indentations.back()) { indentations.push_back(s->length()); return true; } else { return false; } }; blockParser["EnterBlock"]->cacheable = false; /** matches a line in the current block */ blockParser["Line"] << "SameIndentation (!'\n' .)+ '\n'"; blockParser.getRule("Line")->cacheable = false; /** matches an empty line */ blockParser["EmptyLine"] << "Indentation '\n'"; /** exits a block and pops the current indentation */ blockParser["ExitBlock"] << "''" << [&](auto &) -> bool { indentations.pop_back(); return true; }; blockParser.getRule("ExitBlock")->cacheable = false; /** store all successfully parsed blocks */ blockParser["Block"] << "&EnterBlock Line (EmptyLine | Block | Line)* &ExitBlock" >> [](auto e, Blocks &blocks) { for (auto a : e) a.evaluate(blocks); blocks.push_back(Block{e.position(), e.length()}); }; blockParser.setStart(blockParser["Start"] << "InitBlocks Block"); while (true) { string str, input; cout << "Enter a python-like indented string. Push enter twice to parse." << endl; cout << "> "; getline(cin, str); if (str == "q" || str == "quit") { break; } do { input += str + '\n'; cout << "- "; getline(cin, str); } while (str != ""); try { Blocks blocks; blockParser.run(input, blocks); cout << "matched " << blocks.size() << " blocks." << endl; for (auto b : blocks) { cout << "- from line " << std::count(input.begin(), input.begin() + b.begin, '\n') + 1; cout << " to " << std::count(input.begin(), input.begin() + b.begin + b.length, '\n') << endl; } } catch (peg_parser::SyntaxError &error) { auto syntax = error.syntax; cout << " "; cout << " " << "Syntax error at character " << syntax->end << " while parsing " << syntax->rule->name << endl; } } return 0; } ================================================ FILE: example/type_checker.cpp ================================================ /** * This example shows how the parser behaviour changes with a grammar * ambigouity in a c-like language. It is implemented using a filter callback in * the `Typename` rule. * * Note the different interpretation of `x * y` as either a pointer definition * or a multiplication. * * Example input: * `x * y` -> parsed as a multiplication * `type x` -> parsed as a type definition * `x * y` -> now parsed as a variable definition (pointer to `y` of type `x`) */ #include #include #include int main() { using namespace std; peg_parser::ParserGenerator typeChecker; unordered_set types; auto &g = typeChecker; g.setSeparator(g["Whitespace"] << "[\t ]"); g.setStart(g["Expression"] << "Typedef | Vardef | Multiplication"); g["Typedef"] << "'type' Name" >> [&](auto e) { types.emplace(e[0].string()); return "type definition"; }; g["Multiplication"] << "Variable '*' Variable" >> [](auto) { return "multiplication"; }; g["Vardef"] << "Type Name" >> [](auto) { return "variable definition"; }; // this rule only accepts types that have are declared in `types` g["Typename"] << "Name" << [&](auto s) -> bool { auto name = s->inner[0]->string(); return types.find(name) != types.end(); }; g["Type"] << "Typename '*'?"; g["Variable"] << "Name"; g["Atomic"] << "Variable"; g["Name"] << "[a-zA-Z] [a-zA-Z0-9]*"; while (true) { string str; cout << "> "; getline(cin, str); if (str == "q" || str == "quit") { break; } try { auto result = typeChecker.run(str); cout << str << " = " << result << endl; } catch (peg_parser::SyntaxError &error) { auto syntax = error.syntax; cout << " "; cout << string(syntax->begin, ' '); cout << string(syntax->length(), '~'); cout << "^\n"; cout << " " << "Syntax error while parsing " << syntax->rule->name << endl; } } return 0; } ================================================ FILE: glue/CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.14 FATAL_ERROR) # ---- Project ---- project( PEGParserGlue VERSION 1.0 LANGUAGES CXX ) # ---- Include guards ---- if(PROJECT_SOURCE_DIR STREQUAL PROJECT_BINARY_DIR) message( FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there." ) endif() # ---- Add dependencies via CPM ---- include(../cmake/CPM.cmake) CPMFindPackage(NAME PEGParser SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/..) CPMAddPackage( NAME Glue GITHUB_REPOSITORY TheLartians/Glue VERSION 1.5.1 ) CPMAddPackage( NAME PackageProject.cmake GITHUB_REPOSITORY TheLartians/PackageProject.cmake VERSION 1.4 ) # ---- Add source files ---- file(GLOB_RECURSE headers CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/include/*.h") file(GLOB_RECURSE sources CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/source/*.cpp") # ---- Create library ---- add_library(PEGParserGlue ${headers} ${sources}) set_target_properties(PEGParserGlue PROPERTIES CXX_STANDARD 17) target_compile_options(PEGParserGlue PUBLIC "$<$:/permissive->") target_link_libraries(PEGParserGlue PRIVATE PEGParser) target_link_libraries(PEGParserGlue PUBLIC Glue) target_include_directories( PEGParserGlue PUBLIC $ $ ) # ---- Create an installable target ---- packageProject( NAME ${PROJECT_NAME} NAMESPACE ${PROJECT_NAME} VERSION ${PROJECT_VERSION} BINARY_DIR ${PROJECT_BINARY_DIR} INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include INCLUDE_DESTINATION include/${PROJECT_NAME}-${PROJECT_VERSION} DEPENDENCIES Glue ) ================================================ FILE: glue/include/peg_parser/glue.h ================================================ #pragma once #include namespace peg_parser { glue::MapValue glue(); } ================================================ FILE: glue/source/glue.cpp ================================================ #include #include #include #include glue::MapValue peg_parser::glue() { using Any = glue::Any; using AnyFunction = glue::AnyFunction; using Program = peg_parser::ParserGenerator; using Expression = Program::Expression; auto parser = glue::createAnyMap(); parser["Expression"] = glue::createClass() .addMethod("evaluate", [](Expression &e, const Any &d) { return e.evaluate(d); }) .addMethod("size", [](Expression &e) { return e.size(); }) .addMethod("string", [](Expression &e) { return e.string(); }) .addMethod("position", [](Expression &e) { return e.position(); }) .addMethod("length", [](Expression &e) { return e.length(); }) .addMethod("get", [](Expression &e, unsigned i) { if (i < e.size()) { return e[i]; } else { throw std::runtime_error("invalid expression index"); } }); parser["Program"] = glue::createClass() .addConstructor<>() .addMethod("run", [](Program &g, const std::string &str, const Any &arg) { return g.run(str, arg); }) .addMethod("setRule", [](Program &g, const std::string &name, const std::string &grammar) { g.setRule(name, grammar); }) .addMethod("setRuleWithCallback", [](Program &g, const std::string &name, const std::string &grammar, AnyFunction callback) { g.setRule(name, grammar, [callback](auto e, const Any &v) -> Any { return callback(e, v); }); }) .addMethod("setStartRule", [](Program &g, const std::string &name) { g.setStart(g.getRule(name)); }) .addMethod("setSeparatorRule", [](Program &g, const std::string &name) { g.setSeparator(g.getRule(name)); }); return parser; } ================================================ FILE: include/peg_parser/generator.h ================================================ #pragma once #include "presets.h" namespace peg_parser { template class ParserGenerator : public Program { private: presets::GrammarProgram grammarProgram; std::unordered_map> rules; grammar::Node::Shared separatorRule; public: ParserGenerator() { grammarProgram = presets::createPEGProgram(); } std::shared_ptr getRule(const std::string &name) { auto it = rules.find(name); if (it != rules.end()) { return it->second; } auto rule = grammar::makeRule(name, grammar::Node::Error()); rules[name] = rule; return rule; } grammar::Node::Shared getRuleNode(const std::string &name) { auto rule = grammar::Node::WeakRule(getRule(std::string(name))); if (separatorRule) { auto separator = grammar::Node::ZeroOrMore(separatorRule); return grammar::Node::Sequence({separator, rule, separator}); } else { return rule; } } std::shared_ptr setRule( const std::string &name, const grammar::Node::Shared &grammar, const typename Interpreter::Callback &callback = typename Interpreter::Callback()) { auto rule = getRule(name); rule->node = grammar; this->interpreter.setEvaluator(rule, callback); return rule; } grammar::Node::Shared parseRule(const std::string_view &grammar) { presets::RuleGetter rg = [this](const auto &name) { return getRuleNode(std::string(name)); }; return grammarProgram.run(grammar, rg); } std::shared_ptr setRule( const std::string &name, const std::string_view &grammar, const typename Interpreter::Callback &callback = typename Interpreter::Callback()) { return setRule(name, parseRule(grammar), callback); } template std::shared_ptr setProgramRule(const std::string &name, Program subprogram, C &&callback) { auto rule = getRule(name); rule->node = grammar::Node::Rule(subprogram.parser.grammar); this->interpreter.setEvaluator( rule, [callback = std::forward(callback), interpreter = subprogram.interpreter]( auto e, Args &&...args) { return callback(interpreter.interpret(e[0].syntax()), std::forward(args)...); }); return rule; } template auto setProgramRule(const std::string &name, Program subprogram) { static_assert(sizeof...(Args2) == 0); static_assert(std::is_convertible::value); auto rule = getRule(name); rule->node = grammar::Node::Rule(subprogram.parser.grammar); this->interpreter.setEvaluator(rule, [interpreter = subprogram.interpreter](auto e, auto &&...) { return R(interpreter.interpret(e[0].syntax()).evaluate()); }); } std::shared_ptr setFilteredRule( const std::string &name, const std::string_view &grammar, const grammar::Node::FilterCallback &filter, const typename Interpreter::Callback &callback = typename Interpreter::Callback()) { return setRule(name, grammar::Node::Sequence({parseRule(grammar), grammar::Node::Filter(filter)}), callback); } void setSeparator(const std::shared_ptr &rule) { rule->hidden = true; separatorRule = grammar::Node::Rule(rule); } std::shared_ptr setSeparatorRule(const std::string &name, const grammar::Node::Shared &grammar) { auto rule = setRule(name, grammar); setSeparator(rule); return rule; } std::shared_ptr setSeparatorRule(const std::string &name, const std::string_view &grammar) { return setSeparatorRule(name, parseRule(grammar)); } void setStart(const std::shared_ptr &rule) { this->parser.grammar = rule; } void unsetSeparatorRule() { separatorRule.reset(); } /** Operator overloads */ struct OperatorDelegate { ParserGenerator *parent; std::string ruleName; std::string grammar; typename Interpreter::Callback callback; grammar::Node::FilterCallback filter; OperatorDelegate(ParserGenerator *p, const std::string &n) : parent(p), ruleName(n) {} OperatorDelegate(const OperatorDelegate &) = delete; OperatorDelegate &operator<<(const std::string_view &gr) { this->grammar = gr; return *this; } OperatorDelegate &operator>>(const typename Interpreter::Callback &cp) { this->callback = cp; return *this; } OperatorDelegate &operator<<(const grammar::Node::FilterCallback &ft) { this->filter = ft; return *this; } operator std::shared_ptr() { return parent->getRule(ruleName); } std::shared_ptr operator->() { return parent->getRule(ruleName); } ~OperatorDelegate() { if (grammar.size() > 0) { if (filter) { parent->setFilteredRule(ruleName, grammar, filter, callback); } else { parent->setRule(ruleName, grammar, callback); } } } }; OperatorDelegate operator[](const std::string &ruleName) { return OperatorDelegate(this, ruleName); } }; } // namespace peg_parser ================================================ FILE: include/peg_parser/grammar.h ================================================ #pragma once #include #include #include #include #include #include #include #include namespace peg_parser { struct SyntaxTree; namespace grammar { using Letter = char; struct Node; struct Rule { std::string name; std::shared_ptr node; bool hidden = false; bool cacheable = true; Rule(const std::string_view &n, const std::shared_ptr &t) : name(n), node(t) {} }; inline std::shared_ptr makeRule(const std::string_view &name, const std::shared_ptr &node) { return std::make_shared(name, node); } struct Node { using FilterCallback = std::function &)>; enum class Symbol { WORD, ANY, RANGE, SEQUENCE, CHOICE, ZERO_OR_MORE, ONE_OR_MORE, OPTIONAL, ALSO, NOT, EMPTY, ERROR, RULE, WEAK_RULE, END_OF_FILE, FILTER }; using Shared = std::shared_ptr; Symbol symbol; std::variant, Shared, std::weak_ptr, std::shared_ptr, std::string, std::array, FilterCallback> data; private: Node(Symbol s) : symbol(s) {} template Node(Symbol s, const T &d) : symbol(s), data(d) {} public: static Shared Word(const std::string &word) { return Shared(new Node(Symbol::WORD, word)); } static Shared Any() { return Shared(new Node(Symbol::ANY)); } static Shared Range(Letter a, Letter b) { return Shared(new Node(Symbol::RANGE, std::array({{a, b}}))); } static Shared Sequence(const std::vector &args) { return Shared(new Node(Symbol::SEQUENCE, args)); } static Shared Choice(const std::vector &args) { return Shared(new Node(Symbol::CHOICE, args)); } static Shared ZeroOrMore(const Shared &arg) { return Shared(new Node(Symbol::ZERO_OR_MORE, arg)); } static Shared OneOrMore(const Shared &arg) { return Shared(new Node(Symbol::ONE_OR_MORE, arg)); } static Shared Optional(const Shared &arg) { return Shared(new Node(Symbol::OPTIONAL, arg)); } static Shared Also(const Shared &arg) { return Shared(new Node(Symbol::ALSO, arg)); } static Shared Not(const Shared &arg) { return Shared(new Node(Symbol::NOT, arg)); } static Shared Empty() { return Shared(new Node(Symbol::EMPTY)); } static Shared Error() { return Shared(new Node(Symbol::ERROR)); } static Shared Rule(const std::shared_ptr &rule) { return Shared(new Node(Symbol::RULE, rule)); } static Shared WeakRule(const std::weak_ptr &rule) { return Shared(new Node(Symbol::WEAK_RULE, rule)); } static Shared EndOfFile() { return Shared(new Node(Symbol::END_OF_FILE)); } static Shared Filter(const FilterCallback &callback) { return Shared(new Node(Symbol::FILTER, callback)); } }; std::ostream &operator<<(std::ostream &stream, const Node &node); } // namespace grammar } // namespace peg_parser ================================================ FILE: include/peg_parser/interpreter.h ================================================ #pragma once #include #include #include #include "parser.h" namespace peg_parser { struct InterpreterError : public std::exception { std::shared_ptr tree; mutable std::string buffer; InterpreterError(const std::shared_ptr &t) : tree(t) {} const char *what() const noexcept override; }; template class Interpreter { public: class Expression; using Callback = std::function; class Expression { protected: struct iterator { using iterator_category = std::input_iterator_tag; using value_type = Expression; using pointer = Expression *; using reference = Expression &; const Expression &parent; size_t idx; iterator(const Expression &p, size_t i) : parent(p), idx(i) {} iterator &operator++() { idx++; return *this; } Expression operator*() const { return parent[idx]; } bool operator!=(const iterator &other) const { return other.idx != idx || &other.parent != &parent; } }; const Interpreter &interpreter; std::shared_ptr syntaxTree; public: Expression(const Interpreter &i, std::shared_ptr s) : interpreter(i), syntaxTree(s) {} auto size() const { return syntaxTree->inner.size(); } auto view() const { return syntaxTree->view(); } auto string() const { return std::string(view()); } auto position() const { return syntaxTree->begin; } auto length() const { return syntaxTree->length(); } auto rule() const { return syntaxTree->rule; } auto syntax() const { return syntaxTree; } Expression operator[](size_t idx) const { return interpreter.interpret(syntaxTree->inner[idx]); } std::optional operator[](std::string_view name) const { auto it = std::find_if(syntaxTree->inner.begin(), syntaxTree->inner.end(), [name](auto st) { return st->rule->name == name; }); if (it != syntaxTree->inner.end()) { return interpreter.interpret(*it); } return {}; } iterator begin() const { return iterator(*this, 0); } iterator end() const { return iterator(*this, size()); } template auto evaluateBy(const Interpreter &interpreter, Args2... args) const { return interpreter.evaluate(syntaxTree, args...); } R evaluate(Args... args) const { auto it = interpreter.evaluators.find(syntaxTree->rule.get()); if (it == interpreter.evaluators.end()) { if (interpreter.defaultEvaluator) { return interpreter.defaultEvaluator(*this, args...); } throw InterpreterError(syntaxTree); } return it->second(*this, args...); } }; private: std::unordered_map evaluators; static R __defaultEvaluator(const Expression &e, Args... args) { size_t N = e.size(); if (N > 0) { for (size_t i = 0; i < N - 1; ++i) { e[i].evaluate(std::forward(args)...); } return e[N - 1].evaluate(std::forward(args)...); } if (!std::is_same::value) { throw InterpreterError(e.syntax()); } }; public: Callback defaultEvaluator = __defaultEvaluator; std::shared_ptr makeRule(const std::string_view &name, const grammar::Node::Shared &node, const Callback &callback) { auto rule = std::make_shared(name, node); setEvaluator(rule, callback); return rule; } std::shared_ptr makeRule(const std::string &name, const std::shared_ptr &rule, const Callback &callback) { return makeRule(name, grammar::Node::Rule(rule), callback); } void setEvaluator(const std::shared_ptr &rule, const Callback &callback) { if (callback) { evaluators[rule.get()] = callback; } else { auto it = evaluators.find(rule.get()); if (it != evaluators.end()) { evaluators.erase(it); } } } Expression interpret(const std::shared_ptr &tree) const { return Expression{*this, tree}; } R evaluate(const std::shared_ptr &tree, Args... args) const { return interpret(tree).evaluate(args...); } }; class SyntaxError : public std::exception { private: mutable std::string buffer; public: std::shared_ptr syntax; SyntaxError(const std::shared_ptr &t) : syntax(t) {} const char *what() const noexcept override; }; template struct Program { using Expression = typename Interpreter::Expression; Parser parser; Interpreter interpreter; std::shared_ptr parse(const std::string_view &str) const { return parser.parse(str); } Expression interpret(const std::shared_ptr &tree) const { if (!tree->valid) { throw SyntaxError(tree); } return interpreter.interpret(tree); } R run(const std::string_view &str, Args &&...args) const { auto parsed = parser.parseAndGetError(str); if (!parsed.syntax->valid || parsed.syntax->end < str.size()) { throw SyntaxError(parsed.error); } return interpret(parsed.syntax).evaluate(std::forward(args)...); } }; } // namespace peg_parser ================================================ FILE: include/peg_parser/parser.h ================================================ #pragma once #include #include "grammar.h" namespace peg_parser { struct SyntaxTree { std::shared_ptr rule; std::string_view fullString; std::vector> inner; size_t begin, end; bool valid = false; bool active = true; bool recursive = false; SyntaxTree(const std::shared_ptr &r, std::string_view s, size_t p); size_t length() const { return end - begin; } std::string_view view() const { return fullString.substr(begin, length()); } std::string string() const { return std::string(view()); } }; struct Parser { struct Result { std::shared_ptr syntax; std::shared_ptr error; }; struct GrammarError : std::exception { enum Type { UNKNOWN_SYMBOL, INVALID_RULE } type; grammar::Node::Shared node; mutable std::string buffer; GrammarError(Type t, grammar::Node::Shared n) : type(t), node(n) {} const char *what() const noexcept override; }; std::shared_ptr grammar; Parser(const std::shared_ptr &grammar = std::make_shared("undefined", grammar::Node::Error())); static Result parseAndGetError(const std::string_view &str, std::shared_ptr grammar); static std::shared_ptr parse(const std::string_view &str, std::shared_ptr grammar); std::shared_ptr parse(const std::string_view &str) const; Result parseAndGetError(const std::string_view &str) const; }; std::ostream &operator<<(std::ostream &stream, const SyntaxTree &tree); } // namespace peg_parser ================================================ FILE: include/peg_parser/presets.h ================================================ #pragma once #include "interpreter.h" namespace peg_parser { namespace presets { Program createIntegerProgram(); Program createFloatProgram(); Program createDoubleProgram(); Program createHexProgram(); std::function defaultEscapeCodeCallback(); Program createCharacterProgram(const std::function escapeCodeCallback = defaultEscapeCodeCallback()); Program createStringProgram(const std::string &open, const std::string &close); using RuleGetter = const std::function &; using GrammarProgram = Program; GrammarProgram createPEGProgram(); } // namespace presets } // namespace peg_parser ================================================ FILE: source/grammar.cpp ================================================ #include #include #include using namespace peg_parser::grammar; namespace { /** alternative to `std::get` that works on iOS < 11 */ template const T &pget(const V &v) { if (auto r = std::get_if(&v)) { return *r; } else { throw std::runtime_error("corrupted grammar node"); } } } // namespace std::ostream &peg_parser::grammar::operator<<(std::ostream &stream, const Node &node) { using Symbol = peg_parser::grammar::Node::Symbol; switch (node.symbol) { case Node::Symbol::WORD: { stream << "'" << pget(node.data) << "'"; break; } case Node::Symbol::ANY: { stream << "."; break; } case Symbol::RANGE: { auto &v = pget>(node.data); stream << "[" << v[0] << "-" << v[1] << "]"; break; } case Symbol::SEQUENCE: { const auto &data = pget>(node.data); stream << "("; for (auto [i, n] : easy_iterator::enumerate(data)) { stream << *n << (i + 1 == data.size() ? "" : " "); } stream << ")"; break; } case Symbol::CHOICE: { stream << "("; const auto &data = pget>(node.data); for (auto [i, n] : easy_iterator::enumerate(data)) { stream << *n << (i + 1 == data.size() ? "" : " | "); } stream << ")"; break; } case Symbol::ZERO_OR_MORE: { const auto &data = pget(node.data); stream << *data << "*"; break; } case Symbol::ONE_OR_MORE: { const auto &data = pget(node.data); stream << *data << "+"; break; } case Node::Symbol::OPTIONAL: { stream << *pget(node.data) << "?"; break; } case Node::Symbol::ALSO: { stream << "&" << *pget(node.data); break; } case Node::Symbol::NOT: { stream << "!" << *pget(node.data); break; } case Node::Symbol::EMPTY: { stream << "''"; break; } case Node::Symbol::ERROR: { stream << "[]"; break; } case Node::Symbol::RULE: { auto rule = pget>(node.data); stream << rule->name; break; } case Node::Symbol::WEAK_RULE: { if (auto rule = pget>(node.data).lock()) { stream << rule->name; } else { stream << ""; } break; } case Node::Symbol::END_OF_FILE: { stream << ""; break; } case Node::Symbol::FILTER: { stream << ""; break; } } return stream; } ================================================ FILE: source/interpreter.cpp ================================================ #include #include using namespace peg_parser; const char *InterpreterError::what() const noexcept { if (buffer.size() == 0) { buffer = "no evaluator for rule '" + tree->rule->name + "'"; } return buffer.c_str(); } const char *SyntaxError::what() const noexcept { if (buffer.size() == 0) { buffer = "syntax error at character " + std::to_string(syntax->end + 1) + " while parsing " + syntax->rule->name; } return buffer.c_str(); } ================================================ FILE: source/parser.cpp ================================================ #include #include #include #include #include #include // Macros for debugging parsers // #define PEG_PARSER_TRACE #ifdef PEG_PARSER_TRACE # define PEG_PARSER_DEBUG_LOG # define PARSER_TRACE(X) \ LOG("parser[" << state.getPosition() << "," << state.current() << "]: " << __INDENT << X) # define PARSER_ADVANCE(X) \ LOG("parser[" << getPosition() << "," << current() << "]: " << __INDENT << X) #else # define PARSER_TRACE(X) # define PARSER_ADVANCE(X) #endif #ifdef PEG_PARSER_DEBUG_LOG # include # define LOG(X) std::cout << X << std::endl; namespace { std::string __INDENT = ""; } # define INCREASE_INDENT __INDENT = __INDENT + " " # define DECREASE_INDENT __INDENT = __INDENT.substr(0, __INDENT.size() - 2) #else # define INCREASE_INDENT # define DECREASE_INDENT #endif namespace { /** alternative to `std::get` that works on iOS < 11 */ template const T &pget(const V &v) { if (auto r = std::get_if(&v)) { return *r; } else { throw std::runtime_error("corrupted grammar node"); } } } // namespace using namespace peg_parser; namespace { // Code from boost // Reciprocal of the golden ratio helps spread entropy // and handles duplicates. // See Mike Seymour in magic-numbers-in-boosthash-combine: // http://stackoverflow.com/questions/4948780 template inline void hash_combine(std::size_t &seed, T const &v) { seed ^= std::hash()(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); } // Recursive template code derived from Matthieu M. template ::value - 1> struct HashValueImpl { static void apply(size_t &seed, Tuple const &tuple) { HashValueImpl::apply(seed, tuple); hash_combine(seed, std::get(tuple)); } }; template struct HashValueImpl { static void apply(size_t &seed, Tuple const &tuple) { hash_combine(seed, std::get<0>(tuple)); } }; template struct TupleHasher { size_t operator()(Tuple const &tt) const { size_t seed = 0; HashValueImpl::apply(seed, tt); return seed; } }; template std::string streamToString(T &&v) { std::stringstream stream; stream << v; return stream.str(); } class State { public: std::string_view string; private: size_t position; using CacheKey = std::tuple; using Cache = std::unordered_map, TupleHasher>; Cache cache; std::shared_ptr errorTree; public: size_t maxPosition; State(const std::string_view &s, size_t c = 0) : string(s), position(c), maxPosition(c) {} grammar::Letter current() { return position < string.size() ? string[position] : '\0'; } void advance(size_t amount = 1) { position += amount; if (position > string.size()) { position = string.size(); } if (position > maxPosition) { maxPosition = position; } PARSER_ADVANCE("advancing " << amount << " to " << position << ": '" << current() << "'"); } void setPosition(size_t p) { if (p == position) { return; } position = p; PARSER_ADVANCE("resetting to " << position << ": '" << current() << "'"); } size_t getPosition() { return position; } struct Saved { size_t position; size_t innerCount; }; Saved save() { return Saved{position, stack.size() > 0 ? stack.back()->inner.size() : 0}; } void load(const Saved &s) { if (stack.size() > 0) { stack.back()->end = getPosition(); stack.back()->inner.resize(s.innerCount); } setPosition(s.position); } bool isAtEnd() { return position == string.size(); } std::shared_ptr getCached(const std::shared_ptr &rule) { auto it = cache.find(std::make_pair(position, rule.get())); if (it != cache.end()) return it->second; return std::shared_ptr(); } void addToCache(const std::shared_ptr &tree) { cache[std::make_pair(tree->begin, tree->rule.get())] = tree; } const Cache &getCache() { return cache; } void removeFromCache(const std::shared_ptr &tree) { auto it = cache.find(std::make_pair(tree->begin, tree->rule.get())); if (it != cache.end()) { cache.erase(it); } } void addInnerSyntaxTree(const std::shared_ptr &tree) { if (stack.size() > 0 && !tree->rule->hidden) { stack.back()->inner.push_back(tree); } } std::vector> stack; std::shared_ptr getErrorTree() { return errorTree; } void trackError(const std::shared_ptr &tree) { if (!tree) { return; } if (tree->length() > 0 && !tree->rule->hidden) { if (errorTree) { if (tree->end >= errorTree->end) { errorTree = tree; } } else { errorTree = tree; } } } }; bool parse(const std::shared_ptr &node, State &state); std::shared_ptr parseRule(const std::shared_ptr &rule, State &state, bool useCache = true) { PARSER_TRACE("enter rule " << rule->name); INCREASE_INDENT; if (useCache && rule->cacheable) { auto cached = state.getCached(rule); if (cached) { PARSER_TRACE("cached"); if (cached->valid) { state.addInnerSyntaxTree(cached); state.advance(); state.setPosition(cached->end); } else { PARSER_TRACE("failed"); if (cached->active && !cached->recursive) { PARSER_TRACE("found left recursion"); cached->recursive = true; } } DECREASE_INDENT; PARSER_TRACE("exit rule " << rule->name); return cached; } } auto syntaxTree = std::make_shared(rule, state.string, state.getPosition()); if (useCache) { state.addToCache(syntaxTree); } auto saved = state.save(); state.stack.push_back(syntaxTree); syntaxTree->valid = parse(rule->node, state); syntaxTree->end = state.getPosition(); syntaxTree->active = false; state.stack.pop_back(); if (syntaxTree->valid) { if (useCache && syntaxTree->recursive) { PARSER_TRACE("enter left recursion: " << rule->name); while (true) { State recursionState(state.string, syntaxTree->begin); recursionState.trackError(state.getErrorTree()); // Copy the cache except the currect position to the recursion state // TODO: keeping the current state and modifying the cache in place is // probably much more efficient. for (auto &cached : state.getCache()) { if (std::get<0>(cached.first) != syntaxTree->begin) { recursionState.addToCache(cached.second); } } recursionState.addToCache(syntaxTree); auto tmp = parseRule(rule, recursionState, false); state.trackError(recursionState.getErrorTree()); if (tmp->valid && tmp->end > syntaxTree->end) { PARSER_TRACE("parsed left recursion"); syntaxTree = tmp; if (useCache) { state.addToCache(syntaxTree); } state.setPosition(tmp->end); } else { break; } } PARSER_TRACE("exit left recursion"); } state.addInnerSyntaxTree(syntaxTree); } else { state.trackError(syntaxTree); state.load(saved); } DECREASE_INDENT; PARSER_TRACE("exit rule " << rule->name); return syntaxTree; } bool parse(const std::shared_ptr &node, State &state) { using Node = peg_parser::grammar::Node; using Symbol = Node::Symbol; PARSER_TRACE("parsing " << *node); auto c = state.current(); switch (node->symbol) { case peg_parser::grammar::Node::Symbol::WORD: { auto saved = state.save(); for (auto c : pget(node->data)) { if (state.current() != c) { state.load(saved); PARSER_TRACE("failed"); return false; } state.advance(); } return true; } case peg_parser::grammar::Node::Symbol::ANY: { if (state.isAtEnd()) { PARSER_TRACE("failed"); return false; } else { state.advance(); return true; } } case Symbol::RANGE: { auto &v = pget>(node->data); if (c >= v[0] && c <= v[1]) { state.advance(); return true; } else { PARSER_TRACE("failed"); return false; } } case Symbol::SEQUENCE: { auto saved = state.save(); for (auto n : pget>(node->data)) { if (!parse(n, state)) { state.load(saved); return false; } } return true; } case Symbol::CHOICE: { for (auto n : pget>(node->data)) { if (parse(n, state)) { return true; } } return false; } case Symbol::ZERO_OR_MORE: { auto data = pget(node->data); while (parse(data, state)) { } return true; } case peg_parser::grammar::Node::Symbol::ONE_OR_MORE: { const auto &data = pget(node->data); if (!parse(data, state)) { return false; } while (parse(data, state)) { } return true; } case peg_parser::grammar::Node::Symbol::OPTIONAL: { const auto &data = pget(node->data); parse(data, state); return true; } case peg_parser::grammar::Node::Symbol::ALSO: { const auto &data = pget(node->data); auto saved = state.save(); auto result = parse(data, state); state.load(saved); return result; } case peg_parser::grammar::Node::Symbol::NOT: { const auto &data = pget(node->data); auto saved = state.save(); auto result = parse(data, state); state.load(saved); return !result; } case peg_parser::grammar::Node::Symbol::ERROR: { return false; } case peg_parser::grammar::Node::Symbol::EMPTY: { return true; } case peg_parser::grammar::Node::Symbol::RULE: { const auto &rule = pget>(node->data); return parseRule(rule, state)->valid; } case peg_parser::grammar::Node::Symbol::WEAK_RULE: { const auto &data = pget>(node->data); if (auto rule = data.lock()) { return parseRule(rule, state)->valid; } else { throw Parser::GrammarError(Parser::GrammarError::INVALID_RULE, node); } } case peg_parser::grammar::Node::Symbol::END_OF_FILE: { auto res = state.isAtEnd(); if (!res) { PARSER_TRACE("failed"); } return res; } case peg_parser::grammar::Node::Symbol::FILTER: { const auto &callback = pget(node->data); bool res; if (state.stack.size() > 0) { auto tree = state.stack.back(); tree->end = state.getPosition(); res = callback(tree); state.setPosition(tree->end); } else { res = false; } if (!res) { PARSER_TRACE("failed"); } return res; } } throw Parser::GrammarError(Parser::GrammarError::UNKNOWN_SYMBOL, node); } } // namespace SyntaxTree::SyntaxTree(const std::shared_ptr &r, std::string_view s, size_t p) : rule(r), fullString(s), begin(p), end(p), valid(false), active(true) {} const char *peg_parser::Parser::GrammarError::what() const noexcept { if (buffer.size() == 0) { std::string typeName; switch (type) { case UNKNOWN_SYMBOL: typeName = "UNKNOWN_SYMBOL"; break; case INVALID_RULE: typeName = "INVALID_RULE"; break; } buffer = "internal error in grammar node (" + typeName + "): " + streamToString(*node); } return buffer.c_str(); } Parser::Parser(const std::shared_ptr &g) : grammar(g) {} Parser::Result Parser::parseAndGetError(const std::string_view &str, std::shared_ptr grammar) { State state(str); PARSER_TRACE("Begin parsing of: '" << str << "'"); auto result = parseRule(grammar, state); auto error = state.getErrorTree(); if (!error) { error = result; } return Parser::Result{result, error}; } std::shared_ptr Parser::parse(const std::string_view &str, std::shared_ptr grammar) { return parseAndGetError(str, grammar).syntax; } std::shared_ptr Parser::parse(const std::string_view &str) const { return parse(str, grammar); } Parser::Result Parser::parseAndGetError(const std::string_view &str) const { return parseAndGetError(str, grammar); } std::ostream &peg_parser::operator<<(std::ostream &stream, const SyntaxTree &tree) { stream << tree.rule->name << '('; if (tree.inner.size() == 0) { stream << '\'' << tree.view() << '\''; } else { for (auto &&[i, arg] : easy_iterator::enumerate(tree.inner)) { stream << (*arg) << (i + 1 == tree.inner.size() ? "" : ", "); } } stream << ')'; return stream; } ================================================ FILE: source/presets.cpp ================================================ #include #include using namespace peg_parser; using namespace peg_parser::presets; using GN = grammar::Node; Program presets::createIntegerProgram() { Program program; auto pattern = GN::Sequence({GN::Optional(GN::Word("-")), GN::OneOrMore(GN::Range('0', '9'))}); program.parser.grammar = program.interpreter.makeRule( "Number", pattern, [](auto e) { return std::stoi(e.string()); }); return program; } namespace { grammar::Node::Shared createFloatGrammar() { return GN::Sequence( {GN::Optional(GN::Word("-")), GN::OneOrMore(GN::Range('0', '9')), GN::Optional(GN::Sequence({GN::Word("."), GN::OneOrMore(GN::Range('0', '9'))})), GN::Optional( GN::Sequence({GN::Choice({GN::Word("e"), GN::Word("E")}), GN::Optional(GN::Word("-")), GN::OneOrMore(GN::Range('0', '9'))}))}); } } // namespace Program presets::createFloatProgram() { Program program; program.parser.grammar = program.interpreter.makeRule( "Float", createFloatGrammar(), [](auto e) { return std::stof(e.string()); }); return program; } Program presets::createDoubleProgram() { Program program; program.parser.grammar = program.interpreter.makeRule( "Float", createFloatGrammar(), [](auto e) { return std::stod(e.string()); }); return program; } Program presets::createHexProgram() { Program program; auto pattern = GN::Sequence( {GN::OneOrMore(GN::Choice({GN::Range('0', '9'), GN::Range('a', 'f'), GN::Range('A', 'F')}))}); program.parser.grammar = program.interpreter.makeRule( "Hex", pattern, [](auto e) { return std::stoi(e.string(), 0, 16); }); return program; } std::function presets::defaultEscapeCodeCallback() { std::unordered_map codes{{'n', '\n'}, {'t', '\t'}, {'0', '\0'}}; return [codes](char c) { auto it = codes.find(c); if (it != codes.end()) { return it->second; } else { return c; } }; } Program presets::createCharacterProgram(const std::function escapeCodeCallback) { Program program; auto backslash = GN::Word("\\"); auto escaped = GN::Rule(program.interpreter.makeRule( "Escaped", GN::Sequence({backslash, GN::Any()}), [escapeCodeCallback](auto e) { return escapeCodeCallback(e.view()[1]); })); auto numberParser = createHexProgram(); auto escapedCode = GN::Rule(program.interpreter.makeRule( "escapedCode", GN::Sequence({backslash, GN::Rule(numberParser.parser.grammar)}), [interpreter = numberParser.interpreter](auto e) { return char(0 + e[0].evaluateBy(interpreter)); })); auto character = GN::Rule(program.interpreter.makeRule("SingleCharacter", GN::Any(), [](auto e) { return e.view()[0]; })); program.parser.grammar = program.interpreter.makeRule("Character", GN::Choice({escapedCode, escaped, character}), [](auto e) { return e[0].evaluate(); }); return program; } Program presets::createStringProgram(const std::string &open, const std::string &close) { Program program; auto characterProgram = createCharacterProgram(); auto pattern = GN::Sequence({GN::Word(open), GN::ZeroOrMore(GN::Sequence( {GN::Not(GN::Word(close)), GN::Rule(characterProgram.parser.grammar)})), GN::Word(close)}); program.parser.grammar = program.interpreter.makeRule( "String", pattern, [interpreter = characterProgram.interpreter](auto e) { std::string res; for (auto c : e) { res += c.evaluateBy(interpreter); } return res; }); return program; } GrammarProgram presets::createPEGProgram() { GrammarProgram program; auto whitespaceRule = makeRule("Whitespace", GN::ZeroOrMore(GN::Choice({GN::Word(" "), GN::Word("\t")}))); whitespaceRule->hidden = true; auto whitespace = GN::Rule(whitespaceRule); auto withWhitespace = [whitespace](GN::Shared node) { return GN::Sequence({whitespace, node, whitespace}); }; auto stringProgram = createStringProgram("'", "'"); auto expressionRule = program.interpreter.makeRule( "Expression", GN::Empty(), [](auto e, auto &g) { return e[0].evaluate(g); }); auto expression = GN::WeakRule(expressionRule); auto atomicRule = program.interpreter.makeRule("Atomic", GN::Empty(), [](auto e, auto &g) { return e[0].evaluate(g); }); auto atomic = GN::WeakRule(atomicRule); auto endOfFile = GN::Rule(program.interpreter.makeRule( "EndOfFile", GN::Word(""), [](auto, auto &) { return GN::EndOfFile(); })); auto any = GN::Rule( program.interpreter.makeRule("Any", GN::Word("."), [](auto, auto &) { return GN::Any(); })); auto selectCharacterProgram = createCharacterProgram(); auto selectCharacter = GN::Sequence({GN::Not(GN::Choice({GN::Word("-"), GN::Word("]")})), GN::Rule(selectCharacterProgram.parser.grammar)}); auto range = GN::Rule(program.interpreter.makeRule( "Range", GN::Sequence({selectCharacter, GN::Word("-"), selectCharacter}), [interpreter = selectCharacterProgram.interpreter](auto e, auto &) { return GN::Range(e[0].evaluateBy(interpreter), e[1].evaluateBy(interpreter)); })); auto singeCharacter = GN::Rule(program.interpreter.makeRule( "Character", selectCharacter, [interpreter = selectCharacterProgram.interpreter](auto e, auto &) { return GN::Word(std::string(1, e[0].evaluateBy(interpreter))); })); auto selectSequence = GN::Sequence( {GN::Word("["), GN::ZeroOrMore(GN::Choice({range, singeCharacter})), GN::Word("]")}); auto select = GN::Rule(program.interpreter.makeRule("Select", selectSequence, [](auto e, auto &g) { if (e.size() == 0) { return GN::Error(); } if (e.size() == 1) { return e[0].evaluate(g); } std::vector args; for (auto c : e) { args.push_back(c.evaluate(g)); } return GN::Choice(args); })); auto word = GN::Rule( program.interpreter.makeRule("Word", stringProgram.parser.grammar, [interpreter = stringProgram.interpreter](auto e, auto &) { auto word = e[0].evaluateBy(interpreter); if (word.size() == 0) { return GN::Empty(); } else { return GN::Word(e[0].evaluateBy(interpreter)); } })); auto ruleName = GN::Sequence({GN::Not(GN::Range('0', '9')), GN::OneOrMore(GN::Choice({GN::Range('a', 'z'), GN::Range('A', 'Z'), GN::Range('0', '9'), GN::Word("_")}))}); auto rule = GN::Rule( program.interpreter.makeRule("Rule", ruleName, [](auto e, auto &g) { return g(e.view()); })); auto brackets = GN::Sequence({GN::Word("("), expression, GN::Word(")")}); auto andPredicate = GN::Rule( program.interpreter.makeRule("AndPredicate", GN::Sequence({GN::Word("&"), atomic}), [](auto e, auto &g) { return GN::Also(e[0].evaluate(g)); })); auto notPredicate = GN::Rule( program.interpreter.makeRule("NotPredicate", GN::Sequence({GN::Word("!"), atomic}), [](auto e, auto &g) { return GN::Not(e[0].evaluate(g)); })); atomicRule->node = withWhitespace( GN::Choice({andPredicate, notPredicate, word, brackets, endOfFile, any, select, rule})); auto predicate = GN::Rule(makeRule("Predicate", GN::Choice({GN::Word("+"), GN::Word("*"), GN::Word("?")}))); auto unary = withWhitespace(GN::Rule(program.interpreter.makeRule( "Unary", GN::Sequence({GN::Rule(atomicRule), GN::Optional(predicate)}), [](auto e, auto &g) { auto inner = e[0].evaluate(g); if (e.size() == 1) { return inner; } auto op = e[1].view()[0]; if (op == '*') { return GN::ZeroOrMore(inner); } if (op == '+') { return GN::OneOrMore(inner); } if (op == '?') { return GN::Optional(inner); } throw std::runtime_error("unexpected unary operator"); }))); auto sequence = GN::Rule(program.interpreter.makeRule( "Sequence", GN::Sequence({unary, GN::ZeroOrMore(unary)}), [](auto e, auto &g) { if (e.size() == 1) { return e[0].evaluate(g); } std::vector args; for (auto c : e) { args.push_back(c.evaluate(g)); } return GN::Sequence(args); })); auto choice = GN::Rule(program.interpreter.makeRule( "Choice", GN::Sequence({sequence, GN::ZeroOrMore(GN::Sequence({GN::Word("|"), sequence}))}), [](auto e, auto &g) { if (e.size() == 1) { return e[0].evaluate(g); } std::vector args; for (auto c : e) { args.push_back(c.evaluate(g)); } return GN::Choice(args); })); expressionRule->node = withWhitespace(choice); auto fullExpression = program.interpreter.makeRule( "FullExpression", GN::Sequence({GN::Rule(expressionRule), GN::EndOfFile()}), [](auto e, auto &g) { return e[0].evaluate(g); }); program.parser.grammar = fullExpression; return program; } ================================================ FILE: test/CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.5 FATAL_ERROR) project(PEGParserTests LANGUAGES CXX) # ---- Options ---- option(ENABLE_TEST_COVERAGE "Enable test coverage" OFF) option(TEST_INSTALLED_VERSION "Test the version found by find_package" OFF) # --- Import tools ---- include(../cmake/tools.cmake) # ---- Dependencies ---- include(../cmake/CPM.cmake) CPMAddPackage( NAME Catch2 GITHUB_REPOSITORY catchorg/Catch2 VERSION 2.13.4 ) if(TEST_INSTALLED_VERSION) find_package(PEGParser REQUIRED) find_package(PEGParserGlue REQUIRED) else() CPMAddPackage(NAME PEGParser SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/..) CPMAddPackage(NAME PEGParserGlue SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/../glue) endif() CPMAddPackage( NAME Format.cmake GITHUB_REPOSITORY TheLartians/Format.cmake VERSION 1.6 OPTIONS "FORMAT_CHECK_CMAKE ON" ) # ---- Create binary ---- file(GLOB sources CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/source/*.cpp) add_executable(PEGParserTests ${sources}) target_link_libraries(PEGParserTests Catch2 PEGParser::PEGParser PEGParserGlue::PEGParserGlue) set_target_properties(PEGParserTests PROPERTIES CXX_STANDARD 17) # enable compiler warnings if(NOT TEST_INSTALLED_VERSION) if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU") target_compile_options(PEGParser PUBLIC -Wall -pedantic -Wextra -Werror) elseif(MSVC) target_compile_options(PEGParser PUBLIC /W4 /WX /wd4456) target_compile_definitions(PEGParserTests PUBLIC DOCTEST_CONFIG_USE_STD_HEADERS) endif() endif() # ---- Add PEGParserTests ---- enable_testing() include(${Catch2_SOURCE_DIR}/contrib/Catch.cmake) catch_discover_tests(PEGParserTests) # ---- code coverage ---- if(ENABLE_TEST_COVERAGE) target_compile_options(PEGParser PUBLIC -O0 -g -fprofile-arcs -ftest-coverage) target_link_options(PEGParser PUBLIC -fprofile-arcs -ftest-coverage) endif() ================================================ FILE: test/source/example.cpp ================================================ #include // clang-format off #include #include void example() { peg_parser::ParserGenerator g; // Define grammar and evaluation rules g.setSeparator(g["Whitespace"] << "[\t ]"); g["Sum" ] << "Add | Subtract | Product"; g["Product" ] << "Multiply | Divide | Atomic"; g["Atomic" ] << "Number | '(' Sum ')'"; g["Add" ] << "Sum '+' Product" >> [](auto e){ return e[0].evaluate() + e[1].evaluate(); }; g["Subtract"] << "Sum '-' Product" >> [](auto e){ return e[0].evaluate() - e[1].evaluate(); }; g["Multiply"] << "Product '*' Atomic" >> [](auto e){ return e[0].evaluate() * e[1].evaluate(); }; g["Divide" ] << "Product '/' Atomic" >> [](auto e){ return e[0].evaluate() / e[1].evaluate(); }; g["Number" ] << "'-'? [0-9]+ ('.' [0-9]+)?" >> [](auto e){ return stof(e.string()); }; g.setStart(g["Sum"]); // Execute a string auto input = "1 + 2 * (3+4)/2 - 3"; float result = g.run(input); // -> 5 std::cout << input << " = " << result << std::endl; } // clang-format on TEST_CASE("Example") { auto orig_buf = std::cout.rdbuf(); std::cout.rdbuf(NULL); CHECK_NOTHROW(example()); std::cout.rdbuf(orig_buf); } ================================================ FILE: test/source/glue.cpp ================================================ #include #include #include #include #include TEST_CASE("Extension") { using namespace peg_parser; auto parserGlue = peg_parser::glue(); glue::Context context; context.addRootMap(parserGlue); auto programGlue = parserGlue["Program"]; auto expressionGlue = parserGlue["Expression"]; auto createProgram = programGlue[glue::keys::constructorKey]; auto setRule = programGlue["setRule"]; auto setSeparator = programGlue["setSeparatorRule"]; auto setStart = programGlue["setStartRule"]; auto setRuleWithCallback = programGlue["setRuleWithCallback"]; auto run = programGlue["run"]; auto evaluate = expressionGlue["evaluate"]; auto get = expressionGlue["get"]; auto string = expressionGlue["string"]; auto size = expressionGlue["size"]; auto position = expressionGlue["position"]; auto length = expressionGlue["length"]; using VariableMap = std::unordered_map; auto program = createProgram(); REQUIRE_NOTHROW(setRule(program, "Whitespace", "[\t ]")); REQUIRE_NOTHROW(setSeparator(program, "Whitespace")); REQUIRE_NOTHROW(setStart(program, "Sum")); REQUIRE_NOTHROW(setRule(program, "Sum", "Add | Subtract | Product")); REQUIRE_NOTHROW(setRule(program, "Product", "Multiply | Divide | Atomic")); REQUIRE_NOTHROW(setRule(program, "Atomic", "Number | '(' Sum ')'")); REQUIRE_NOTHROW(setRuleWithCallback( program, "Add", "Sum '+' Product", glue::AnyFunction([=](const glue::Any &e, VariableMap &d) { return evaluate(get(e, 0), d)->get() + evaluate(get(e, 1), d)->get(); }))); REQUIRE_NOTHROW(setRuleWithCallback(program, "Subtract", "Sum '-' Product", glue::AnyFunction([=](const glue::Any &e, VariableMap &d) { return evaluate(get(e, 0), d)->get() - evaluate(get(e, 1), d)->get(); }))); REQUIRE_NOTHROW(setRuleWithCallback(program, "Multiply", "Product '*' Atomic", glue::AnyFunction([=](const glue::Any &e, VariableMap &d) { return evaluate(get(e, 0), d)->get() * evaluate(get(e, 1), d)->get(); }))); REQUIRE_NOTHROW(setRuleWithCallback(program, "Divide", "Product '/' Atomic", glue::AnyFunction([=](const glue::Any &e, VariableMap &d) { return evaluate(get(e, 0), d)->get() / evaluate(get(e, 1), d)->get(); }))); REQUIRE_NOTHROW(setRuleWithCallback(program, "Number", "'-'? [0-9]+ ('.' [0-9]+)?", glue::AnyFunction([=](const glue::Any &e, VariableMap &) { REQUIRE(size(e)->get() == 0); REQUIRE(position(e)->get() >= 0); REQUIRE(length(e)->get() == string(e)->get().size()); return float(stof(string(e)->get())); }))); REQUIRE_NOTHROW(setRuleWithCallback(program, "Variable", "[a-zA-Z]+", glue::AnyFunction([=](const glue::Any &e, VariableMap &d) { auto &vars = d; return vars[string(e)->get()]; }))); VariableMap variables; REQUIRE(run(program, "42", variables)->get() == Approx(42)); REQUIRE(run(program, "2+3", variables)->get() == Approx(5)); REQUIRE(run(program, "2*3", variables)->get() == Approx(6)); REQUIRE(run(program, "1+2+3", variables)->get() == Approx(6)); REQUIRE(run(program, "1+2*3", variables)->get() == Approx(7)); REQUIRE(run(program, "1+2-3", variables)->get() == Approx(0)); REQUIRE(run(program, "2*2/4*3", variables)->get() == Approx(3)); REQUIRE(run(program, "1 - 2*3/2 + 4", variables)->get() == Approx(2)); REQUIRE(run(program, "1 + 2 * (3+4)/ 2 - 3", variables)->get() == Approx(5)); } ================================================ FILE: test/source/main.cpp ================================================ #define CATCH_CONFIG_MAIN #include ================================================ FILE: test/source/parser.cpp ================================================ #include #include #include #include #include #include template std::string stream_to_string(const T &obj) { std::stringstream stream; stream << obj; return stream.str(); } using namespace peg_parser; TEST_CASE("Number Program") { auto program = presets::createIntegerProgram(); REQUIRE(program.run("42") == 42); REQUIRE(program.run("-3") == -3); REQUIRE_THROWS(program.run("42r")); REQUIRE_THROWS(program.run("not a number")); } TEST_CASE("Float Program") { auto testFloatProgram = [](auto p) { REQUIRE(p.run("42") == Approx(42)); REQUIRE(p.run("3.1412") == Approx(3.1412)); REQUIRE(p.run("2E10") == Approx(2E10)); REQUIRE(p.run("1.4e-3") == Approx(1.4e-3)); }; testFloatProgram(presets::createFloatProgram()); testFloatProgram(presets::createDoubleProgram()); } TEST_CASE("Hex Program") { auto parser = presets::createHexProgram(); REQUIRE(parser.run("42") == 0x42); REQUIRE(parser.run("FA34ABC") == 0xFA34ABC); } TEST_CASE("Character Program") { auto program = presets::createCharacterProgram(); REQUIRE(program.run("a") == 'a'); REQUIRE(program.run("5") == '5'); REQUIRE(program.run("\\\\") == '\\'); REQUIRE(program.run("\\n") == '\n'); REQUIRE(program.run("\\t") == '\t'); REQUIRE(program.run("\\0") == '\0'); } TEST_CASE("String Program") { auto [open, close] = GENERATE(as>(), std::make_tuple("'", "'"), std::make_tuple("``", "''"), std::make_tuple("begin ", " end")); auto program = presets::createStringProgram(open, close); REQUIRE(program.run(open + "Hello World!" + close) == "Hello World!"); REQUIRE(program.run(open + "Hello\\nEscaped \\" + close + "!" + close) == "Hello\nEscaped " + close + "!"); } TEST_CASE("PEG Parser") { auto rc = [](std::string_view name) { return grammar::Node::Rule(grammar::makeRule(name, grammar::Node::Empty())); }; auto parser = presets::createPEGProgram(); REQUIRE(stream_to_string(*parser.run("rule", rc)) == "rule"); REQUIRE(stream_to_string(*parser.run("rule_2", rc)) == "rule_2"); REQUIRE(stream_to_string(*parser.run("!rule", rc)) == "!rule"); REQUIRE(stream_to_string(*parser.run("&rule", rc)) == "&rule"); REQUIRE(stream_to_string(*parser.run("rule+", rc)) == "rule+"); REQUIRE(stream_to_string(*parser.run("rule*", rc)) == "rule*"); REQUIRE(stream_to_string(*parser.run("rule?", rc)) == "rule?"); REQUIRE(stream_to_string(*parser.run("'word'", rc)) == "'word'"); REQUIRE(stream_to_string(*parser.run("[a-z]", rc)) == "[a-z]"); REQUIRE(stream_to_string(*parser.run("[abc]", rc)) == "('a' | 'b' | 'c')"); REQUIRE(stream_to_string(*parser.run("[abc-de]", rc)) == "('a' | 'b' | [c-d] | 'e')"); REQUIRE(stream_to_string(*parser.run("[abc\\-d]", rc)) == "('a' | 'b' | 'c' | '-' | 'd')"); REQUIRE(stream_to_string(*parser.run("", rc)) == ""); REQUIRE(parser.run("''", rc)->symbol == grammar::Node::Symbol::EMPTY); REQUIRE(stream_to_string(*parser.run("''", rc)) == "''"); REQUIRE(parser.run("[]", rc)->symbol == grammar::Node::Symbol::ERROR); REQUIRE(stream_to_string(*parser.run("[]", rc)) == "[]"); REQUIRE(stream_to_string(*parser.run(".", rc)) == "."); REQUIRE(stream_to_string(*parser.run("a b c", rc)) == "(a b c)"); REQUIRE(stream_to_string(*parser.run("a | b |\tc", rc)) == "(a | b | c)"); REQUIRE(stream_to_string(*parser.run("'hello' | world '!'", rc)) == "('hello' | (world '!'))"); REQUIRE(stream_to_string(*parser.run("('a'+ (.? | b | '')* [0-9] &)", rc)) == "('a'+ (.? | b | '')* [0-9] &)"); REQUIRE_THROWS(parser.run("a | b | ", rc)); REQUIRE_THROWS(parser.run("a b @", rc)); REQUIRE_THROWS(parser.run("42", rc)); } TEST_CASE("Program with return value") { ParserGenerator program; REQUIRE_THROWS_AS(program.run("aa"), SyntaxError); REQUIRE_THROWS_WITH(program.run(""), "syntax error at character 1 while parsing undefined"); program.setRule("A", "'a'"); program.setStart(program.setRule("B", "A+")); REQUIRE(program.parser.parse("aa")->valid); REQUIRE_THROWS_AS(program.run("aa"), InterpreterError); REQUIRE_THROWS_WITH(program.run("aa"), "no evaluator for rule 'A'"); auto count = 0; program.setRule("A", "'a'", [&](auto) { return ++count; }); REQUIRE(program.run("aaa") == 3); REQUIRE(count == 3); } TEST_CASE("Program with argument") { ParserGenerator program; int count = 0; REQUIRE_THROWS(program.run("", count)); REQUIRE_THROWS(program.run("aa", count)); program.setRule("A", "'a'"); program.setStart(program.setRule("B", "A+")); REQUIRE(program.parser.parse("aa")->valid); REQUIRE_NOTHROW(program.run("aa", count)); program.setRule("A", "'a'", [&](auto, int &count) { ++count; }); REQUIRE_NOTHROW(program.run("aaa", count)); REQUIRE(count == 3); } TEST_CASE("Evaluation") { ParserGenerator numberProgram; numberProgram.setStart(numberProgram.setRule("Number", "'-'? [0-9] [0-9]*", [](auto e) { return std::stoi(e.string()); })); REQUIRE(numberProgram.run("3") == 3); REQUIRE(numberProgram.run("-42") == -42); ParserGenerator calculator; calculator.setSeparatorRule("Whitespace", "[\t ]"); calculator.setStart(calculator.setRule("Expression", "Sum")); calculator.setRule("Sum", "Product ('+' Product)*", [](auto e) { float res = 0; for (auto t : e) { res += t.evaluate(); } return res; }); calculator.setRule("Product", "Number ('*' Number)*", [](auto e) { float res = 1; for (auto t : e) { res *= t.evaluate(); } return res; }); calculator.setProgramRule("Number", numberProgram); REQUIRE(calculator.run("42") == 42); REQUIRE(calculator.run("1+2") == 3); REQUIRE(calculator.run("2 * 3") == 6); REQUIRE(calculator.run("1 + 2*3") == 7); REQUIRE(calculator.run(" 1 + 2*3*1 +4 * 5 ") == 27); REQUIRE_THROWS(calculator.run("1+2*")); } #include TEST_CASE("Left recursion") { ParserGenerator calculator; calculator.setSeparatorRule("Whitespace", "[\t ]"); calculator.setStart(calculator.setRule("Expression", "Sum | Atomic")); calculator.setRule("Sum", "Addition | Product"); calculator.setRule("NegativeSummand", "'-' Product", [](auto e) { return -e[0].evaluate(); }); calculator.setRule("Addition", "Sum ('+' Product | NegativeSummand)", [](auto e) { return e[0].evaluate() + e[1].evaluate(); }); calculator.setRule("Product", "Multiplication | Atomic"); calculator.setRule("Multiplication", "Product '*' Atomic", [](auto e) { return e[0].evaluate() * e[1].evaluate(); }); calculator.setRule("Atomic", "Number | Negative | Brackets"); calculator.setProgramRule("Number", presets::createFloatProgram()); calculator.setRule("Negative", "'-' Atomic", [](auto e) { return -e[0].evaluate(); }); calculator.setRule("Brackets", "'(' Expression ')'"); REQUIRE(calculator.run("42") == 42); REQUIRE(calculator.run("1+2") == 3); REQUIRE(calculator.run("1+2-3-5") == -5); REQUIRE(calculator.run("2 * 3") == 6); REQUIRE(calculator.run("1 + 2*3") == 7); REQUIRE(calculator.run(" 1 + 2*3*1 +4 * 5 ") == 27); REQUIRE(calculator.run("-42") == -42); REQUIRE(calculator.run("--42") == 42); REQUIRE(calculator.run("---42") == -42); REQUIRE(calculator.run("----------------------------------------------------42") == 42); REQUIRE_THROWS(calculator.run("1+2*")); } TEST_CASE("Filter") { ParserGenerator<> program; program.setStart(program.setFilteredRule("B", "A+", [](auto tree) { REQUIRE_THAT(tree->string(), Catch::Matchers::Matches("a+")); return tree->inner.size() % 3 == 0; })); program.setRule("A", "'a'"); auto N = GENERATE(range(1, 10)); REQUIRE(program.parse(std::string(N, 'a'))->valid == (N % 3 == 0)); } TEST_CASE("Broken Grammar") { SECTION("Wrong type") { auto node = grammar::Node::Range('1', '9'); node->data = std::string("nope"); auto rule = makeRule("Invalid", node); REQUIRE_THROWS_WITH(Parser::parse("1", rule), "corrupted grammar node"); } SECTION("Illegal type") { auto node = grammar::Node::Any(); node->symbol = grammar::Node::Symbol(-1); auto rule = makeRule("Invalid", node); REQUIRE_THROWS_WITH(Parser::parse("", rule), Catch::Matchers::Contains("UNKNOWN_SYMBOL")); } SECTION("Deleted Rule") { auto deletedRule = makeRule("deletedRule", grammar::Node::Any()); auto rule = makeRule("Rule", grammar::Node::WeakRule(deletedRule)); REQUIRE_NOTHROW(Parser::parse("x", rule)); deletedRule.reset(); REQUIRE_THROWS_AS(Parser::parse("x", rule), Parser::GrammarError); REQUIRE_THROWS_WITH(Parser::parse("x", rule), Catch::Matchers::Contains("")); } } TEST_CASE("Syntax Tree") { ParserGenerator<> program; program.setStart(program.setRule("B", "A+")); program.setRule("A", "."); auto tree = program.parse("abc"); REQUIRE(stream_to_string(*tree) == "B(A('a'), A('b'), A('c'))"); } TEST_CASE("C++ Operators") { ParserGenerator program; program["B"] << "A+" << [](auto tree) { return tree->inner.size() % 3 == 0; } >> [](auto e) { std::string res; for (auto arg : e) { res += arg.evaluate(); } return res; }; program["A"] << "." >> [](auto e) { return std::string(1, e.view()[0] + 1); }; program.setStart(program["B"]); REQUIRE_THROWS(program.run("ab")); REQUIRE(program.run("abc") == "bcd"); } TEST_CASE("Parsing") { ParserGenerator program; program.setStart(program["A"]); program["A"] << "B (' ' A) | B" >> [](auto e) { return std::accumulate(e.begin(), e.end(), 0, [](auto a, auto b) { return a + b.evaluate(); }); }; program["B"] << "&'b' . ''" >> [](auto) { return 1; }; REQUIRE_THROWS(program.run("a")); REQUIRE(program.run("b") == 1); REQUIRE(program.run("b b") == 2); REQUIRE(program.run("b b b") == 3); } TEST_CASE("Documentation Example") { ParserGenerator g; g.setSeparator(g["Whitespace"] << "[\t ]"); g["Sum"] << "Add | Subtract | Product"; g["Product"] << "Multiply | Divide | Atomic"; g["Atomic"] << "Number | '(' Sum ')'"; g["Add"] << "Sum '+' Product" >> [](auto e) { return e[0].evaluate() + e[1].evaluate(); }; g["Subtract"] << "Sum '-' Product" >> [](auto e) { return e[0].evaluate() - e[1].evaluate(); }; g["Multiply"] << "Product '*' Atomic" >> [](auto e) { return e[0].evaluate() * e[1].evaluate(); }; g["Divide"] << "Product '/' Atomic" >> [](auto e) { return e[0].evaluate() / e[1].evaluate(); }; g["Number"] << "'-'? [0-9]+ ('.' [0-9]+)?" >> [](auto e) { return stof(e.string()); }; g.setStart(g["Sum"]); REQUIRE(g.run("42") == Approx(42)); REQUIRE(g.run("2+3") == Approx(5)); REQUIRE(g.run("2*3") == Approx(6)); REQUIRE(g.run("1+2+3") == Approx(6)); REQUIRE(g.run("1+2*3") == Approx(7)); REQUIRE(g.run("1+2-3") == Approx(0)); REQUIRE(g.run("2*2/4*3") == Approx(3)); REQUIRE(g.run("1 - 2*3/2 + 4") == Approx(2)); REQUIRE(g.run("1 + 2 * (3+4)/ 2 - 3") == Approx(5)); } TEST_CASE("Subscript Operators") { ParserGenerator program; program["Word"] << "[a-z]+"; program["Yell"] << "[A-Z]+"; program["Start"] << "Word | Yell" >> [](auto e) { return bool(e["Yell"]); }; program.setStart(program["Start"]); REQUIRE(!program.run("hello")); REQUIRE(program.run("HELLO")); }