Repository: stephan-tolksdorf/fparsec Branch: master Commit: 156cbd751fac Files: 176 Total size: 4.3 MB Directory structure: gitextract_d9t2bjvu/ ├── .gitattributes ├── .github/ │ └── workflows/ │ └── ci.yml ├── .gitignore ├── .vscode/ │ └── tasks.json ├── Build/ │ ├── FParsec.Common.targets │ ├── fparsec-license.txt │ └── fparsec.snk ├── Directory.Build.props ├── Doc/ │ ├── html/ │ │ ├── about/ │ │ │ ├── changelog.html │ │ │ ├── contact.html │ │ │ ├── fparsec-vs-alternatives.html │ │ │ ├── index.html │ │ │ └── status-and-roadmap.html │ │ ├── css/ │ │ │ ├── print.css │ │ │ ├── screen-sidebar.css │ │ │ ├── style-ie.css │ │ │ ├── style-ie6.css │ │ │ └── style.css │ │ ├── download-and-installation.html │ │ ├── index.html │ │ ├── license.html │ │ ├── reference/ │ │ │ ├── charparsers.html │ │ │ ├── charstream.html │ │ │ ├── error.html │ │ │ ├── errormessage.html │ │ │ ├── errormessagelist.html │ │ │ ├── index.html │ │ │ ├── operatorprecedenceparser.html │ │ │ ├── parser-overview.html │ │ │ ├── position.html │ │ │ ├── primitives.html │ │ │ ├── reply.html │ │ │ ├── staticmapping.html │ │ │ └── text.html │ │ ├── tutorial.html │ │ └── users-guide/ │ │ ├── applying-parsers-in-sequence.html │ │ ├── customizing-error-messages.html │ │ ├── debugging-a-parser.html │ │ ├── index.html │ │ ├── internals-of-a-simple-parser-function.html │ │ ├── looking-ahead-and-backtracking.html │ │ ├── parser-functions.html │ │ ├── parsing-alternatives.html │ │ ├── parsing-sequences.html │ │ ├── parsing-with-user-state.html │ │ ├── performance-optimizations.html │ │ ├── running-parsers-on-input.html │ │ ├── tips-and-tricks.html │ │ └── where-is-the-monad.html │ ├── misc/ │ │ └── removed-many-variants.fs │ └── src/ │ ├── changelog.txt │ ├── contact.txt │ ├── documentation.txt │ ├── download-and-installation.txt │ ├── fparsec-vs-alternatives.txt │ ├── license.txt │ ├── reference-charparsers.txt │ ├── reference-charstream.txt │ ├── reference-error.txt │ ├── reference-errormessage.txt │ ├── reference-errormessagelist.txt │ ├── reference-operatorprecedenceparser.txt │ ├── reference-overview.txt │ ├── reference-position.txt │ ├── reference-primitives.txt │ ├── reference-reply.txt │ ├── reference-staticmapping.txt │ ├── reference-text.txt │ ├── reference.txt │ ├── status-and-roadmap.txt │ ├── template.html │ ├── tutorial.txt │ └── users-guide.txt ├── FParsec/ │ ├── AssemblyInfo.fs │ ├── CharParsers.fs │ ├── CharParsers.fsi │ ├── Emit.fs │ ├── Error.fs │ ├── Error.fsi │ ├── FParsec-LowTrust.fsproj │ ├── FParsec.fsproj │ ├── FParsec.targets │ ├── Internals.fs │ ├── Primitives.fs │ ├── Primitives.fsi │ ├── Range.fs │ ├── StaticMapping.fs │ └── StaticMapping.fsi ├── FParsec-LowTrust.sln ├── FParsec.sln ├── FParsecCS/ │ ├── Buffer.cs │ ├── CaseFoldTable.cs │ ├── CharSet.cs │ ├── CharStream.cs │ ├── CharStreamLT.cs │ ├── Cloning.cs │ ├── ErrorMessage.cs │ ├── ErrorMessageList.cs │ ├── Errors.cs │ ├── FParsecCS-LowTrust.csproj │ ├── FParsecCS.csproj │ ├── FParsecCS.targets │ ├── FastGenericEqualityERComparer.cs │ ├── HexFloat.cs │ ├── IdentifierValidator.cs │ ├── ManyChars.cs │ ├── OperatorPrecedenceParser.cs │ ├── Position.cs │ ├── Properties/ │ │ └── AssemblyInfo.cs │ ├── Reply.cs │ ├── StringBuffer.cs │ ├── Strings.cs │ └── Text.cs ├── NuGet.config ├── Samples/ │ ├── Calculator/ │ │ ├── Calculator-LowTrust.fsproj │ │ ├── Calculator.fsproj │ │ ├── Calculator.targets │ │ ├── InterpLexYacc-LowTrust.fsproj │ │ └── calculator.fs │ ├── FSharpParsingSample/ │ │ ├── FParsecVersion/ │ │ │ ├── InterpFParsec-LowTrust.fsproj │ │ │ ├── InterpFParsec.fsproj │ │ │ ├── InterpFParsec.targets │ │ │ ├── main.fs │ │ │ └── parser.fs │ │ ├── LexYaccVersion/ │ │ │ ├── Doc.html │ │ │ ├── InterpLexYacc.fsproj │ │ │ ├── ast.fs │ │ │ ├── interp.fs │ │ │ ├── lex.fs │ │ │ ├── lex.fsl │ │ │ ├── main.fs │ │ │ ├── pars.fs │ │ │ ├── pars.fsi │ │ │ ├── pars.fsy │ │ │ └── test.lang │ │ └── readme.txt │ ├── JSON/ │ │ ├── JsonParser-LowTrust.fsproj │ │ ├── JsonParser.fsproj │ │ ├── JsonParser.targets │ │ ├── PegParser-LowTrust.fsproj │ │ ├── ast.fs │ │ ├── main.fs │ │ ├── parser.fs │ │ └── test_json.txt │ ├── PEG/ │ │ ├── PegParser-LowTrust.fsproj │ │ ├── PegParser.fsproj │ │ ├── PegParser.targets │ │ ├── ast.fs │ │ ├── main.fs │ │ ├── parser.fs │ │ └── test_peg.txt │ └── Tutorial/ │ ├── Tutorial-LowTrust.fsproj │ ├── Tutorial.fsproj │ ├── Tutorial.targets │ └── tutorial.fs ├── Test/ │ ├── AllTests.fs │ ├── BufferTests.fs │ ├── CharParsersTests.fs │ ├── CharSetTests.fs │ ├── CharStreamTests.fs │ ├── CloningTests.fs │ ├── HexFloatTests.fs │ ├── IdentifierValidatorTests.fs │ ├── OperatorPrecedenceParserTests.fs │ ├── PrimitivesTests.fs │ ├── RangeTests.fs │ ├── StaticMappingTests.fs │ ├── StringBufferTests.fs │ ├── Test-LowTrust.fsproj │ ├── Test.fs │ ├── Test.fsproj │ ├── Test.targets │ └── TextTests.fs ├── global.json ├── pack.ps1 └── readme.md ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitattributes ================================================ * text=auto *.cs text diff=csharp *.fs text diff=csharp *.fsi text diff=csharp *.fsx text diff=csharp *.sln text eol=crlf *.csproj text *.fsproj text *.config text *.json text *.txt text *.html text linguist-documentation *.css text linguist-documentation *.jpg binary *.png binary *.gif binary ================================================ FILE: .github/workflows/ci.yml ================================================ name: Build and test on: push: branches: [master] pull_request: jobs: build-and-test-low-trust: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Setup .NET uses: actions/setup-dotnet@v3 with: global-json-file: global.json - name: Build Low-Trust version run: dotnet build Test/Test-LowTrust.fsproj -c Release-LowTrust -p:Platform=AnyCPU - name: Test Low-Trust version run: dotnet run --no-build --project Test/Test-LowTrust.fsproj -c Release-LowTrust -p:Platform=AnyCPU - name: Build samples for Low-Trust version run: dotnet build FParsec-LowTrust.sln -c Release-LowTrust -p:Platform=AnyCPU build-and-test-non-low-trust: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Setup .NET uses: actions/setup-dotnet@v3 - name: Build non-Low-Trust version run: dotnet build Test/Test.fsproj -c Release -p:Platform=AnyCPU - name: Test non-Low-Trust version run: dotnet run --no-build --project Test/Test.fsproj -c Release -p:Platform=AnyCPU - name: Build samples for non-Low-Trust version run: dotnet build FParsec.sln -c Release -p:Platform=AnyCPU ================================================ FILE: .gitignore ================================================ Thumbs.db Build/bin/ Unused/* bin/ obj/ *.user *.suo *.pyc *~ *.asv *.swp *.pdb packages/ version.props /.vs/ .idea .DS_Store ================================================ FILE: .vscode/tasks.json ================================================ { // See https://go.microsoft.com/fwlink/?LinkId=733558 // for the documentation about the tasks.json format "version": "2.0.0", "tasks": [ { "label": "Build Test (Config: Debug-LowTrust)", "command": "dotnet build Test/Test.fsproj -c Debug-LowTrust -p:Platform=AnyCPU", "type": "shell", "group": "build", "presentation": { "reveal": "silent" }, "problemMatcher": "$msCompile" }, { "label": "Build All (Config: Debug-LowTrust)", "command": "dotnet build FParsec.sln -c Debug-LowTrust -p:Platform=AnyCPU", "type": "shell", "group": "build", "presentation": { "reveal": "silent" }, "problemMatcher": "$msCompile" }, { "label": "Test (Config: Debug-LowTrust)", "dependsOn": ["Build Test (Config: Debug-LowTrust)"], "command": "'Test/bin/Any CPU/Debug-LowTrust/net6/Test'", "type": "shell", "group": "test", "presentation": { "reveal": "always" }, "problemMatcher": "$msCompile" } ] } ================================================ FILE: Build/FParsec.Common.targets ================================================ AnyCPU $(DefineConstants);SMALL_STATETAG $(DefineConstants);AGGRESSIVE_INLINING $(DefineConstants);UNALIGNED_READS LICENSE.txt true $(MSBuildThisFileDirectory)\fparsec.snk $(DefineConstants);STRONG_NAME ================================================ FILE: Build/fparsec-license.txt ================================================ The FParsec library in source and binary form is distributed under the Simplified BSD License. The Simplified BSD License (a.k.a. “2‐clause BSD License”) is a simple, permissive license that is OSI‐compliant. FParsec incorporates data derived from the Unicode Character Database v. 8.0.0, Copyright (c) 1991‒2015 Unicode, Inc., which is distributed under the following terms: http://www.unicode.org/terms_of_use.html#Exhibit1 -- FParsec Simplified BSD License Copyright (c) 2007‒2022, Stephan Tolksdorf. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. This software is provided by the copyright holders “as is” and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. In no event shall the copyright holders be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage. ================================================ FILE: Directory.Build.props ================================================ Debug;Release;Debug-LowTrust;Release-LowTrust false true true $(DefineConstants);LOW_TRUST false true false $(DefineConstants);DEBUG true $(DefineConstants);RELEASE $(AllowedOutputExtensionsInPackageBuildOutputFolder);.pdb 2.0.0 Stephan Tolksdorf Copyright © Stephan Tolksdorf FParsec http://www.quanttec.com/fparsec/ https://github.com/stephan-tolksdorf/fparsec ================================================ FILE: Doc/html/about/changelog.html ================================================ Changelog

Changelog

Version 2.0, 2022‒11‒01

  • Dropped .NET Framework 4.5 support and switched to using .NET 6.
  • Changed NuGet build to always enable code signing. Contributed by Radek Krahl – thanks Radek!
  • Norman Krämer fixed an error in the CharStream.Skip documentation – thanks Norman!
  • Nathan Adams fixed a typo in the User’s Guide – thanks Nathan!

Version 1.1.1, 2020‒02‒01

  • Fixed NuGet build to target the AnyCPU platform instead of the default platform of the build machine. Vadim Slynko and tpisciotta reported this issue – thanks Vadim and tpisciotta!

Version 1.1.0, 2020‒01‒05

  • Behaviour change: pfloat now parses out‐of‐range finite values as plus or minus infinity instead of returning an error. This unifies the pfloat behaviour on all platforms after the behaviour change of System.Double.Parse on .NET Core 3.
  • Enrico Sada modernized the F# and C# project files and the build script for the NuGet packages – thanks Enrico!
  • Added SourceLink support, which was prepared and championed by Cameron Taggart – thanks Cameron!
  • Maxime Didier fixed a bug in the Low‐Trust version of the CharStream constructors that accept a file path argument: The stream’s Name property wasn’t initialized. Thanks Maxime!
  • Fixed missing parser definitions in the Parsing JSON section of the tutorial spotted by Josh Quintus – thanks Josh!
  • Andre Wesseling fixed a parser definition in the Where is the monad section of the User’s Guide – thanks Andre!
  • Frederik K. fixed an error in the ErrorMessage documentation – thanks Frederik!
  • Jonathan Roeber fixed an error in the previousCharSatisfiesNot – thanks Jonathan!
  • Vegard Løkken fixed an error in the unicodeSpaces documentation – thanks Vegard!

Version 1.0.3, 2017‒08‒20

  • Modern solution and project files for .NET Core and VS 2017 were added to the source folders. Huge thanks to Marcus Griep for spearheading the effort to make FParsec .NET Standard compatible and contributing the new project and solution files!
  • The old build script for the NuGet packages was replaced by a PowerShell script that uses the new project files.
  • The FParsec NuGet package now contains assemblies for .NET Standard 1.6.
  • The non‐netstandard assemblies of FParsec now reference the FSharp.Core 4.0.0.1 NuGet package, which should maximize compatibility when binding redirects aren’t available.
  • A .vscode/tasks.json file with some task definitions for Visual Studio Code was added.
  • The source repository was moved to GitHub.
  • Added a stringsSepBy1 parser (contributed by Robin Munn – thanks Robin!).
  • Added a link to the Russian translation of the tutorial by Dmitry Vlasov – thanks Dmitry!
  • Fixed documentation typos. One was spotted by Brandon Dimperio, another by ZelteHonor – thanks Brandon and ZelteHonor!
  • Renamed CLR45 to AGGRESSIVE_INLINING to better match its purpose.

Version 1.0.2, 2015‒09‒27

  • replaced all uses of Char.GetUnicodeCategory with CharCodeInfo.GetUnicodeCategory, since the former may or may not track the current Unicode standard and the latter is the only one supported by the PCL API subset
  • updated the case folding, whitespace and XID property data tables to Unicode 8.0.0
  • added a PCL Profile 259 version to the FParsec NuGet package
  • removed the Silverlight, VS9 and VS10 solution files and the Mono Makefile
  • updated the Lex & Yacc version of the FSharpParsingSample to use the FsLexYacc NuGet packages
  • fixed documentation typos (two were spotted by Francois Nardon and Patrick McDonald – thanks Francois and Patrick!)

Version 1.0.1, 2013‒06‒25

  • The maintainership of the FParsec NuGet package(s) was handed over from Ryan Riley, Huw Simpson, Cameron Taggart and Khan Thompson to Stephan Tolksdorf. Thanks Ryan, Huw, Cameron and Khan for creating and maintaining the previous versions of the NuGet package!
  • FParsec now has two NuGet packages, built with a new fsx script
  • fixed a bug in one of the CharStream constructors (reported and patched by Andrew Smith – thanks Andrew!)
  • added USE_STATIC_MAPPING_FOR_IS_ANY_OF and UNALIGNED_READS as default compilation options in the Visual Studio projects (the default options now match the ones used by the “Big Data edition” NuGet package)
  • some minor code tweaking / micro‐optimizations
  • fixed some minor documentation issues

Version 1.0.0, 2012‒07‒19

  • disabled code generation in isAnyOf, isNoneOf, anyOf, skipAnyOf, noneOf and skipNoneOf by default (you can reenable it using the new USE_STATIC_MAPPING_FOR_IS_ANY_OF compilation option)
  • annotated some CharStream methods with the .NET 4.5 AggressiveInlining option (see the new CLR45 compilation option)
  • updated case folding and XID property tables to Unicode 6.1.0
  • fixed two documentation typos (spotted by Rasmus Meldgaard and Kurt Schelfthout – thanks Rasmus and Kurt!)

Version 0.9.2, 2012‒03‒09

  • fixed compilation in Visual Studio 11 Beta
  • added missing ReturnFrom member to parse builder object (reported by Kurt Schelfthout and Tomas Petricek – thanks Kurt and Tomas!)
  • added workaround for .NET ConsoleStream issue (reported by Alexander Kahl – thanks Alexander!)
  • set AllowPartiallyTrustedCallers and SecurityTransparent assembly attributes in LOW_TRUST NET4 build (as suggested by hammett – thanks hammett!)
  • changed encoding of FParsecCS/Strings.cs to UTF‐8 (with signature) to fix Visual Studio build on machines with Japanese locale (the encoding issue was reported on http://d.hatena.ne.jp/ZOETROPE – thank you!)
  • fixed some documentation issues (incorporating feedback from Alexander Gelkin, Antoine Latter and Stephen Swensen – thanks Alexander, Antoine and Stephen!)
  • add link to the Japanese translation of the tutorial by Gab_km (thanks Gab_km!)

Version 0.9.1, 2011‒05‒22

  • added /nooptimizationdata compiler flag as a workaround for an F# comiler issue (reported by Michael Giagnocavo – thanks Michael!)
  • fixed an issue in the JSON sample (reported by Ryan Riley – thanks Ryan!)
  • fixed the error message formatting when an error line contains unaccounted newlines or ends with a combining character sequence
  • added warning to installation notes that the regex parser doesn’t work on Mono (reported by Laurent Le Brun – thanks Laurent!)
  • fixed some documentation issues (one of which was reported by Michael Giagnocavo – thanks Michael!)

Version 0.9.0, 2011‒04‒26

Highlights
  • a new tutorial and user’s guide
  • 2x performance improvements due to a refactored low‐level API
  • new identifier parser for parsing identifiers based on Unicode XID syntax
  • new StaticMapping module for compiling static key to value mappings into optimized functions (supports char, int and string as key types)
Changes to high‐level API
Removed variants of many, sepBy, sepEndBy and manyTill

The ...Rev, ...Fold and ...Reduce variants of many, sepBy, sepEndByand manyTill have been removed.

If you previously used these variants, you can easily define them in your own code using the |>> combinator, as documented in the reference documentation for the previous version. For example:

let manyRev p = many p |>> List.rev
let manyFold acc0 f p = many p |>> List.fold f acc0
let manyReduce f defVal p = (many1 p |>> List.reduce f) <|>% defVal.

If you need optimized implementations, you can define them using the new Inline helper class. The file Doc/misc/removed‐many‐variants.fs contains optimized definitions for all removed variants.

Details on changes to manyChars, manyCharsTill and their variants

The behaviour of all variants of manyChars and manyCharsTill has slightly changed. Now manyChars cp is equivalent to many cp, except that it returns a string instead of char list. Previously, manyChars cp behaved like many (attempt cp), i.e. it automatically backtracked if the char parser had failed after consuming input. The same change has been made to the behaviour of all other variants of manyChars and manyCharsTill. The new behaviour is more consistent with the rest of the libary and allows a faster implementation with the new low‐level API.

There probably aren’t many parsers that relied on the old behaviour.

The behaviour change made the skip variants of manyChar and manyCharsTill obsolete, since e.g. skipManyChars would do exactly the same as skipMany cp. Hence, the skip variants have been removed.

Changes to low‐level API
  • The old CharStream and State classes have been merged into a single CharStream class with a mutable interface.
  • Parser functions now take a CharStream<'u> instance as the input argument.
  • The Reply type has been moved to the main FParsec namespace and no longer has a State member.
  • Parser state comparisons are now done with the help of the CharStream’s StateTag.
  • Various methods from the old CharStream.Iterator and State types have been renamed in the new CharStream class and have new signatures:

  • New CharStream methods:

  • The ErrorMessage and ErrorMessageList types are now defined in the C# library part. This allows us to implement full parsers in C#. The FParsec.Error module contains type abbreviations and active patters that provide the familiar interface to F# clients.
  • All error messages used by built‐in FParsec parsers are now defined in the C# classes FParsec.Strings and FParsec.Errors. This should simplify customization and internationalization efforts.
Background on low‐level API changes

Previously parsers were implemented as functions operating on an immutable parser state in the form of a State instance. A parser function received a State instance as the input and returned a State instance as part of its return value. Since State instances were immutable, a parser function had to create a new State instance to advance the input stream, e.g. by calling state.Advance(2).

This architecture was motivated by the desire to provide an API as “functional” as possible, an API that shields users from the underlying imperative/mutable nature of input streams. When FParsec originally started as a relatively close port of Haskell’s Parsec library, this design felt like a natural fit for a functional parser library. However, later, when FParsec moved away from its Parsec roots (to improve performance and provide more features), it became increasingly clear that the immutable CharStreamState‐design was the main obstacle preventing FParsec from reaching the performance of hand‐optimized recursive‐descent parsers.

Initial tests with some quick prototypes revealed that the allocation and garbage collection of temporary State instances took up to 50% or more of the run time of typical parsers – even though the State class was already heavily optimized. These tests also indicated that consolidating the stream and state classes into a classical imperative stream class simplified the overall library implementation and made the library source code more accessible to new users.

The main drawback of the API change is that it requires modifications to practically all low‐level parser code. Another drawback is that backtracking is slightly less convenient with the new low‐level API (as the parser state has to be explicitly saved and restored, while previously one could just continue with an old state instance).

Since FParsec’s high‐level API is only minimally affected by the change, the advantages seem to outweigh the costs.

Version 0.8.x, no release

New features/ improvements
  • case‐insensitive matching with pstringCI, charsTillStringCI, etc. (using the Unicode 1‐to‐1 case folding mappings for chars in the BMP)
  • various new parsers and combinators, including restOfLine, skipToString, manySatisfyMinMax, manyStrings, withSkippedString
  • new functions runParserOnSubstring and runParserOnSubstream
  • various performance improvements
  • Silverlight support
  • F# 1.9.6.16 compatibility
Design changes
  • standardized on a single input stream type (FParsec.CharStream) and a single concrete parser state type (FParsec.State)
  • refactored the Reply<_,_>, ErrorMessage and ParserError types:

    • error replies now also contain a complete State
    • whether a parser has changed the state is now determined by checking the input and the output state for equality, instead of testing the Consumed flag
    • replaced the Reply<_,_>.Flags with a Status field
    • replaced the various helper functions for constructing a Reply with three overloaded Reply<_,_> constructors (with different arities)
  • all char parsers are now “newline aware”, i.e. they normalize any of the three standard newline representations ("\n", "\r\n", "\r") to “n” and they properly increment the line count whenever they parse a newline; this means that the behaviour of almost all char parsers has changed with regard to how newline chars are handled
Bug fixes
  • The CharStream class now uses the serialization API to persist the decoder state for backtracking purposes. Previously it relied on the decoder loosing its state at block boundaries after a certain sequence of method calls. The previous approach works in practice for the .NET decoders of the standard unicode encodings and for simple stateless encodings like ASCII and ANSI, but it relies on undocumented behaviour and it does not work reliably for encodings like GB18030, ISO‐2022 or ISCII.
  • In previous FParsec versions the CharStream file path/System.IO.Stream constructors failed with an IndexOutOfRange exception when the file/stream was empty and encoding detection was not turned off (reported by Vesa Karvonen ‐ thanks Vesa!).
  • In previous FParsec versions the NumberLiteral.String returned by the numberLiteral parser included parsed suffix chars despite the documentation claiming the opposite. (The testing code was buggy too.) Applications that rely on this behaviour can now use the new NumberLiteralOptions.IncludeSuffixCharsInString to force the numberLiteral parser to include any suffix chars in the returned string.
  • Fixed behaviour of >>=?, >>? and .>>? when second parser fails with fatal error without changing the parser state.
  • Fixed behaviour of nextCharSatisfies[Not] when current “char” is a "\r\n" newline.
Other breaking changes
  • renamed the module CharParser to CharParsers
  • moved CharParser.OperatorPrecedenceParser into separate module
  • FParsec.Primitives:

    • subtle change: renamed message to fail and fail to failFatally
    • renamed pair, triple and quad to tuple2, tuple3 and tuple4
    • renamed manyFoldLeft to manyFold and changed the argument order of the accumulator and function argument
    • removed manyFoldRight
    • renamed count to parray and changed the return type, renamed skipCount to skipArray
    • renamed followedBy and notFollowedBy to followedByL and notFollowedByL and introduced followedBy and notFollowedBy functions that take no second argument
    • moved ParserResult<_> to CharParsers and changed constructor arguments
    • removed applyParser
    • removed |>>=, now >>= automatically uses an optimized branch for uncurried functions
    • removed endBy and endBy1 (endBy p sep can be replaced with many (p .>> sep) and endBy1 p sep with many1 (p .>> sep))
  • FParsec.CharParsers:

    • renamed manyTillString to charsTillString
    • removed applyParser from the public interface
    • removed getIndex, skip, registerNL, extract, regexp (these low‐level operations should be done directly through the State<_>/CharStream.Iterator interface)
    • removed anyCharOrNL (no longer needed, see design changes above)
    • removed nSatisfy (can be replaced with manySatisfyMinMax)
    • removed unicodeDigit and unicodeNumber (can be replaced with satisfy System.Char.IsDigit and satisfy System.Char.IsNumber)
    • moved the helper functions expectedError, unexpectedError etc. into the Error module
  • FParsec.CharStream:

    • string constructor takes more arguments
    • Iterator.Peek(i) now returns the EndOfStreamChar char instead of throwing an exception if the char peeked at lies before the beginning of the stream

Version 0.7.3.1, 2009‒02‒26

  • Fixed a bug in CharParser.normalizeNewlines/CharStream.NormalizeNewlines. This bug also affected the skipped and manyTillString parsers, which internaly call normalizeNewlines to normalize the returned string.

    The bug was reported by Greg Chapman ‐ thanks Greg!

    When given a multi‐line string in which the lines are delimited by "\r\n" but the last line does not end in a newline, the buggy normalizeNewlines replaced the chars on the last line with '\n' chars.

  • Changed the signature of Helper.SkipOverWhitespace.

Version 0.7.3, 2008‒12‒08

Breaking changes (all of which should have little or no impact on existing code bases):

  • CharStream.Iterator instances now compare equal if and only if they belong to the same CharStream and point to the same index (previously they compared only equal if their internal representations were identical)
  • the constructor argument of Error.otherError is now expected to be comparable with F#’s structural comparison function compare, see http://research.microsoft.com/fsharp/manual/spec2.aspx#_Toc207785725
  • the signature of the second ParserError.ToString overload has changed
  • CharParser.errorToString and printErrorLine have been deprecated

New features:

  • reimplemented the error formatting code in FParsec.Error
  • added new State<_>.AdvanceTo and CharStream.Iterator.Advance overloads
  • slightly modified the error reporting in Primitives.sepEndBy
  • some documentation fixes

Version 0.7.2, 2008‒11‒17

  • added CharParser.OperatorPrecedenceParser
  • changed the overflow checking in pint32 such that it will not be affected by an expected future change in F#’s int32 -> uint64 conversion behaviour
  • added CharParser.pint16, puint16, pint8, puint8
  • changed the signatures in CharParser.fsi to use the Parser<_,_> type abbreviation
  • fixed outdated documentation of CharParser.expectedError
  • some minor optimizations

Version 0.7.1, 2008‒09‒29

Breaking changes:

  • renamed Primitives.Reply._tag member to Flags and gave it a proper enumeration type
  • CharParser.State is now a reference type
  • Removed CharParser.State.Flags member
  • deprecated Primitives.reconstructError

Version 0.7.0.1, 2008‒09‒23

Breaking change:

  • changed the case of the FParsec.Error.Pos members (This wasn’t already done in 0.7 because of an oversight.)

Version 0.7.0, 2008‒09‒13

Bugfixes:

  • made FParsec.Error.Pos IComparable to prevent ParserError.ToString from throwing an exception under rare circumstances
  • corrected the argument checking for some CharStream.Iterator methods for very large arguments

New features:

  • compatibility with the F# CTP release
  • a configurable parser for number literals: CharParser.numberLiteral
  • CharParser.pfloat now also parses NaN, Infinity and hexadecimal floating point literals as supported by IEEE754r, C99 and Java (but different from the hex representation supported by F#)
  • new helper functions CharParser.floatToHexString, floatOfHexString, float32ToHexString and float32OfHexString
  • integer parsers: Charparser.pint32, puint64, puint32, puint64
  • new sample: a JSON parser
  • various optimizations and some code cleanup
  • new CharStream.Iterator members ReadUntil, Increment and Decrement
  • new State member AdvanceTo
  • new function Primitives.createParserForwardedToRef
  • new combinator |>>= in Primitives

Breaking changes:

  • renamed the parsers char and string to pchar and pstring (This is in deference to the built‐in F# functions char and string, which weren’t yet around when the first version of FParsec was released.)
  • changed the case of the properties of the Reply and State types (This reflects the emerging consensus in the F# community that all public members of types should be named in PascalCase.)
  • deprecated State.AdvanceNL (use the 3 parameter Advance overload instead)
  • deprecated the Primitives helper functions isOk, isEmpty, … (the Reply properties IsOk, IsEmpty,… should be used instead)
  • deprecated the CharParser helper functions matchChar, readChar, … (the State.Iter methods Match, Read, … should be used instead)
  • deprecated Primitives.option, <|>$ should be used instead
  • made CharParser.CharList internal (If you need this helper class for your code, just copy the implementation to your source.)
  • State.Flags() now has more bits (and less bits are reset on a position change)

Version 0.6.0, 2008‒05‒20

  • fixed a bug in manyTillString (the code keeping track of newlines was buggy)
  • fixed a bug in CharParser.<?> (the error reporting was inconsistent with Primitives.<?> in the rare case where <?> is applied inside an attempt (...) <?> label clause to a parser that returns an EmptyOk reply)
  • various changes for F# 1.9.4.15
  • added skipped parser to CharParser
  • added nextCharSatifiesNot, prevCharSatifiesNot, currCharSatisfies, currCharSatisfiesNot to CharParser module; the behaviours of the existing nextCharSatisfies and prevCharSatisfies were slightly changed (see fparsec.html for more details)
  • added TryWith and TryFinally members to Primitivs.ParserCombinator
  • added triple and quad parsers to Primitives module
  • set CompilationRepresentationFlags.PermitNull for Error.ParserError
  • various optimizations
  • some documentation fixes, including corrections for the docs of the CharParser error generation helper functions (expectedError etc.)

Version 0.5.1, 2008‒01‒20

  • added pipe2, pipe3 and pipe4 primitives
  • replaced count and skipCount primitives with optimized versions
  • minor optimizations in spaces and spaces1
  • added pfloat char parser
  • minor documentation fixes

Version 0.5.0, 2008‒01‒15

  • Major design change: all lazy computations were removed and the types Output and Reply unified. The new implementation is considerably simpler and also compiles with F# 1.9.3.7.
  • Fixed a bug in build.bat (reported by Santosh Zachariah ‐ thanks Santosh!)

Version 0.4.4, 2008‒01‒13

  • fixed a minor issue in CharParser.attempt
  • added .>>! and >>.! primitives
  • added skipManySatisfy and skipMany1Satisfy char parsers

Version 0.4.3, 2008‒01‒12

  • fixed bugs in the CharParser versions of <?> and attempt.
  • added >>? primitive
  • added skipSatisfy and skipSatisfyL char parsers
  • minor documentation fixes

Version 0.4.2, 2008‒01‒04

  • performance improvements in CharStream.Iterator
  • minor documentation fixes

Version 0.4.1, 2008‒01‒02

  • documentation fixes
  • new sample application: a parser for Parsing Expression Grammars
  • newline and unicodeNewline now return '\n', instead of 1 or 2
  • added whitespace parser and changed unicodeWhitespace
  • added spaces parser (equivalent to skipManyChars whitespace)
  • removed newlineRepl parameter from manyTillString
  • added skipManyTill and skipManyCharsTill
  • generalized types of skipManyChars and skipManyChars1

Version 0.4.0, 2007‒12‒30

Initial public release

================================================ FILE: Doc/html/about/contact.html ================================================ Contact

1.4 Contact

1.4.1 Contact

FParsec currently doesn’t have its own discussion forum or mailing list.
(Please let me know if you’d like that to be changed.)

Currently the best place to get a quick answer to any FParsec‐related question is: StackOverflow.com.

You can also email me (Stephan) directly at: fparsec [at] quanttec.com. Please don’t hesitate to contact me with any feedback or question regarding FParsec. I’m always happy to hear from FParsec users.

1.4.2 Impressum

Author: Stephan Tolksdorf

Address:
Geschwister‐Scholl‐Allee 253
25524 Itzehoe
Germany

================================================ FILE: Doc/html/about/fparsec-vs-alternatives.html ================================================ FParsec vs alternatives

1.1 FParsec vs alternatives

The following tables contain a bullet‐point comparison between FParsec and the two main alternatives for parsing with F#: parser generator tools (e.g. fslex & fsyacc) and “hand‐written” recursive descent parsers.

Table 1.1.1: Relative advantages
Parser‐generator tools FParsec Hand‐written
recursive‐descent parser
  • Declarative and easy‐to‐read syntax
  • Ensures adherence to grammar formalism
  • Can check for certain kinds of ambiguity in grammar
  • You don’t have to think about performance. Either the generated parser is fast enough, or not. There’s not much you can do about it.
  • Implemented as F# library, so no extra tools or build steps
  • Parsers are first‐class values within the language
  • Succinct and expressive syntax
  • Modular and easily extensible
  • Extensive set of predefined parsers and combinators
  • Semi‐automatically generated, highly readable error messages
  • Supports arbitrary lookahead and backtracking
  • Runtime‐configurable operator‐precedence parser component
  • Does not require a pre‐parsing tokenization phase
  • Comprehensive documentation
  • Extensively unit‐tested
  • No extra tools or build steps
  • Most amenable to individual requirements
  • Potentially as fast as technically possible
  • Parsers are relatively portable if you stick to simple language features and keep library dependencies to a minimum
Table 1.1.2: Relative disadvantages
Parser‐generator tools FParsec Hand‐written
recursive‐descent parser
  • Restricted to features of grammar formalism
  • Extra tools and compilation steps
  • Reliance on opaque generator tool, that is often hard to debug, optimize or extend
  • Static grammar that can’t be changed at runtime
  • Often hard to generate good error messages
  • Many tools generate comparatively slow parsers
  • Some tools have only limited Unicode support
  • Portability problems
  • Tradeoff between declarativeness and performance
  • Syntax less readable than PEG or Regular Expression syntax
  • Left‐recursive grammar rules have to be rewritten
  • Does not support a pre‐parsing tokenization phase
  • You have to learn the API
  • Limited to F#
  • Code‐dependence on FParsec
  • Aggressive performance optimizations add complexity to parts of the lower‐level FParsec source code
  • You have to write everything yourself, which can take a lot of effort
  • Implementing (fast) parsers requires some experience
  • Expression (sub)grammars with infix operators can be ugly and inefficient to parse with a pure recursive‐descent parser, so you might also have to write some kind of embedded operator precedence parser
================================================ FILE: Doc/html/about/index.html ================================================ About FParsec
================================================ FILE: Doc/html/about/status-and-roadmap.html ================================================ Status and roadmap

1.2 Status and roadmap

1.2.1 Status

FParsec has been in development for several years and can now be considered “stable”.

Version 1.0 of FParsec was released on 19 July 2012.

Note

Although FParsec has rather comprehensive unit tests (with code coverage close to 100% for many components), it likely still contains bugs. If you want to use FParsec in a production environment, you need to test your parsers thoroughly.

1.2.2 Future development

There are no firm plans for any major new features yet.

One goal for the future development of FParsec is to support a more declarative parser definition syntax without compromising on FParsec’s performance or language‐integrated nature.

For example, it would be nice if FParsec provided a way to automatically create optimized lexer functions from a series of typed regular expressions and associated mapping functions, ideally at compile time. Using such a feature could maybe look similar to

let lexer : Parser<AstNode, 'u> = 
    lex ["regex-with-1-capture-group", (fun x -> AstNode1(x))
         "regex-with-2-capture-groups", (fun x y -> AstNode2(x, y))
         (* ... *)]
================================================ FILE: Doc/html/css/print.css ================================================ /* #wrapper { max-width: none; } */ a, a code { color: #000000; text-decoration: underline; } code, .code { background-color: transparent; } code a, .code a { color: #000000; text-decoration: none; border: 0; } span.ck /* keyword */ /*, span.cb, span.cnu */ { color: #000000; } span.cr /* right arrow */ { color: #000000; } span.cc /* char literal */, span.cs /* string literal */ { color: #000000; } span.ce /* escaped char */ { color: #000000; } span.clc /* line comment*/, span.cbc /* block comment */ { color: #000000; } .interface-member-code pre { overflow: visible; white-space: pre; word-wrap: normal; } .interface-member-description { padding-left: 1em; } .interface-member-backlink { display: none; } ================================================ FILE: Doc/html/css/screen-sidebar.css ================================================ body { background-color: #e9e9e9; } html { height: 100%; overflow-y: scroll; } body { height: 100%; } #fixed-layer { display: block; position: fixed; left: 0px; top: 0px; width: 100%; height: 100%; overflow: hidden; } #wrapper { height: 100%; min-height: 100%; max-width: 73em; margin-left: auto; margin-right: auto; } #fixed-wrapper { position: relative; max-width: 73em; width: 100%; height: 100%; margin-left: auto; margin-right: auto; background-color: #ffffff; border: solid 1px #d9d9d9; } #sidebar { position: absolute; z-index: 2; left: 0; top: 0; width: 22.5em; height: 100%; overflow: auto; } #main { position: relative; z-index: 3; left: 0; top: 0; min-height: 100%; min-width: 40em; margin-left: 22.5em; background-color: #ffffff; border-left: solid 1px #d9d9d9; } #main-content { padding: 0 1.5em 1.5em 1.5em; } #top-links { margin-left: 1.5em; padding-top: 0.5em; margin-bottom: 0.5em; margin-top: -0.07143em; } #top-links span { font-size: 0.9286em; color: #333; } #top-links a { color: #333; } #nav-tree { margin-left: 1.5em; margin-right: 1.5em; margin-top: 1.5em; } #nav-tree > table { margin-top: -0.07143em; } #nav-tree table, #nav-tree td.nav-title { width: 100%; } .nav-entry td.n1 { font-size: 1.5em; padding-bottom: 0.6667em; line-height: 1em; } .nav-entry td.n1 a { color: #000000; } .nav-number { white-space: nowrap; min-width: 1em; } .nav-number.n1 { min-width: 0; } .nav-number a { padding-right: 0.5em; } .nav-number { text-align: right; /* must be right, even for left aligned number (for a continuous underline between number and title) */ } .nav-space { display: none; /*uncomment for left-aligned numbers*/ /* display: inline-block; width: 100%; height: 0px; vertical-align: middle; */ } tbody.selected { background-color: #f8f8f8; } .nav-entry a { outline: none; } .nav-entry .selected a { color: #000000; } .nav-entry:hover a { text-decoration: none; border-bottom: 1px solid; } #copyright { margin-left: 1.5em; margin-top: 0.8em; } #copyright span { font-size: 0.9286em; margin-left: 0.1em; color: #333; } #copyright a { color: #333; } ================================================ FILE: Doc/html/css/style-ie.css ================================================ code { white-space: nowrap; /* pre breaks the layout, even in ie8 */ } ================================================ FILE: Doc/html/css/style-ie6.css ================================================ #fixed-layer { display: none; } #main { margin-left: 0; border-right: solid 1px #d9d9d9; } ================================================ FILE: Doc/html/css/style.css ================================================ /* reset */ html, body, div, span, h1, h2, h3, h4, h5, h6, p, pre, a, code, em, strong, img, dl, dt, dd, ol, ul, li, table, caption, thead, tbody, tfoot, th, tr, td { margin: 0; padding: 0; border: 0; font-family: inherit; font-size: 100%; font-weight: inherit; font-style: inherit; vertical-align: baseline; text-align:left; text-decoration: none; } table { border-collapse: collapse; border-spacing: 0; } /* layout */ #fixed-layer { display: none; } #wrapper { max-width: 47.5em; margin-left: auto; margin-right: auto; } pre { overflow:auto; } .interface-member-code pre { overflow: visible; white-space: pre-wrap; /* css-3 */ white-space: -moz-pre-wrap; /* Mozilla, since 1999 */ white-space: -pre-wrap; /* Opera 4-6 */ white-space: -o-pre-wrap; /* Opera 7 */ word-wrap: break-word; /* Internet Explorer 5.5+ */ } /* typography */ body { font-family: "Cambria", "Georgia", "Palatino", "Palatino Linotype", "Times", "Times New Roman", serif; font-size: 87.5%; /* 14px */ line-height: 1.429em; } pre, code, .tt { font-family: Consolas, "DejaVu Sans Mono", "Bitstream Vera Sans Mono", "Lucida Sans Typewriter", "Courier New", "Courier"; /* can't put monospace in there because otherwise WebKit browsers would change the default size */ font-size: 0.9286em; /* 13px */ font-size-adjust: 0.461; /* at the same font size Consolas and Courier New have a considerably smaller x-height than the other fonts*/ line-height: 1.25em; } .title .section-number { /* Georgia's old-style figures don't look good in section numbers like 1.2.3.4 */ font-family: "Cambria", /* "Georgia", */ "Palatino", "Palatino Linotype", "Times", "Times New Roman", serif; } h1, h2, h3, h4, h5, h6 { font-weight: normal; /* for IE */ } h1 { font-size: 1.7143em; line-height: 1em; margin-top: 0.75em; margin-bottom: 0.5833em; } h2, h3 { font-size: 1.5em; line-height: 1em; margin-top: 1em; margin-bottom: 0.6666em; } .para { margin-bottom: 1em; } ul, ol { padding-left: 2em; } ul { list-style-type: disc; } ol { list-style-type: decimal; } ul.l2 { list-style-type: circle; } ul.l3 { list-style-type: square; } ul.l4 { list-style-type: circle; } ul.l5 { list-style-type: square; } div.dl { margin-top: 1em; margin-bottom: 1em; } .dl-title { font-size: 1.3em; line-height: 1em; } dt { margin-top: 1em; font-style: italic; } dd { padding-left: 2em; } dd > .para > ul:first-child { padding-left: 0em; } .fn-title { font-weight: bold; margin-top: 3em; margin-bottom: 0.5em; } th.fn { padding-right: 1em; } th.fn, td.fn { padding-bottom: 1em; } em, .i { font-style: italic; } strong, .b { font-weight: bold; } .s { text-decoration: line-through; } .small { font-size: 0.9em; } sup { font-size: 0.85em; line-height: 1em; } sup.fn-mark { font-size: 0.8em; line-height: 1em; } /* design */ #breadcrumbs { padding-top: 0.5em; margin-bottom: 0.5em; } .toc-toc-title { font-size: 1.5em; line-height: 1em; margin-top: 1em; margin-bottom: 0.3333em; } .local-toc ol { list-style-type:none; padding-left: 0; } td.toc-title { width: 100%; } .toc-number { white-space: nowrap; min-width: 1em; } .toc-number a { padding-right: 0.5em; } .toc-number { text-align: right; /* must be right, even for left aligned number (for a continuous underline between number and title) */ } .toc-space { display: none; /*uncomment for left-aligned numbers*/ /* display: inline-block; width: 100%; height: 0px; vertical-align: middle; */ } .toc-entry .selected a { color: #000000; } .toc-entry a { outline: none; } .toc-entry:hover a { text-decoration: none; border-bottom: 1px solid; } a { color: #003399; } a:hover { border-bottom-style: solid; border-bottom-width: 1px; } a:target, span:target, a:target code, span:target code, .interface-member:target > .interface-member-code .interface-member-marker, .table:target > table > caption .table-title { background-color: #dddddd; } div:target > .title > span, div:target > .title > span > code { background-color: #e4e4e4; } .para.lcinp + .para>.admonition { margin-top: 1.36em; } .admonition { border-top: 1px solid #999999; border-bottom: 1px solid #999999; padding-top: 0.2em; padding-bottom: 0.3em; margin-top: 1em; margin-bottom: 1em; } .admonition-title { font-weight: bold; font-style: italic; } .admonition .para { margin-top: 1em; margin-bottom: 0em; } .admonition .para._1 { margin-top: 0em; } code { white-space: pre; margin-left: 1px; margin-right: 1px; background-color: #f8f8f8; } .code { background-color : #f8f8f8; padding-bottom: 4px; } .code + .code { border-top: 1px solid #dcdcdc; padding-top: 3px; } p + .code { margin-top: 1px; } .code + p { margin-top: 1px; } .interface-member-code .code, .interface-code .code { padding: 2px; border-top: 1px solid #dcdcdc; border-bottom: 1px solid #dcdcdc; } code a, .code a { color: #000000; text-decoration: none; border-bottom: 1px dashed #c8c8c8; } code a:hover, .code a:hover { text-decoration: none; border-bottom-style: solid; } a code, .title code { color: inherit; background-color: transparent; } .title code a { border-bottom-style:none; } .title code a:hover { border-bottom-style:solid; } a code span, .title code span { color: inherit !important; } /* .cb: boolean literal .cbc: block comment .cc: char literal .ce: escaped char .cei: escaped identifier .ci: identifier .cin: invalid .ck: keyword .clc: line comment .cm: member access/scope resolution operator .cn: number literal .cnu: null literal .co: operator .col: other literal .cp: punctuation .cpr: preprocessor .cr: right arrow (in functional languages) .cra: range operator/ ellipsis .cre: reserved .cs: string literal .ctv: type variable .cv: void */ span.ci /* identifier */, span.tv /* type variable */, span.cb /* boolean literal */, span.cn /* number literal */, span.cnu /* null literal */, span.cp /* punctuation */, span.cra /* range operator/ ellipsis */, span.cm /* member access/scope resolution operator */, span.co /* operator */ { color: #000000; } span.ck /* keyword */, span.cbt, span.cv { color: #0048a0; } span.cr /* right arrow */ { color: #0048a0; } span.cc /* char literal */, span.cs /* string literal */ { color: #940c00; } span.ce /* escaped char */ { color: #d0532c; } span.clc /* line comment*/, span.cbc /* block comment */ { color: #007e1e; } span.cin /* invalid */ { color: White; background-color: red; } span.cpr /* preprocessor */ { color: #009293; } /* the following definitions are for debugging the syntax highlighter */ /* span.cp { color: red; } span.co { color: Fuchsia; } */ /* interface reference */ .interface-member { margin-bottom: 2em; } .interface-member-code { margin-bottom: 0.5em; } .interface-member-backlink { float: right; color: #000000; font-family: "Lucida Sans Unicode", "Lucida Grande", "Lucida Sans", "DejaVu Sans", sans-serif; text-decoration: none; font-size: 16px; line-height: 20px; margin-bottom: -2px; } .interface-member-backlink:hover { border-bottom: 0; } .interface-member-description { padding-left: 2.075em; } .interface-member-marker { font-weight: bold; } .interface-member-marker .ck { color: #003C85; } /* tables */ .table { margin-top: 0.2em; } .table table { width: 100%; border-top: 1px solid #000000; border-bottom: 1px solid #000000; } .table thead th { font-size: 1.1em; font-weight: bold; border-top: 1px solid #000000; border-bottom: 1px solid #000000; } .table .table-caption-prefix { font-style: normal; margin-right: 0.2em; font-weight: bold; } .table caption { font-style: italic; padding-bottom: 0.3em; } .table td, .api-table th { padding-top: 0.3em; padding-bottom: 0.2em; } .table tbody td { border-top: 1px solid #dddddd; } .table td._1 { padding-right: 1em; } .api-table { margin-top: 2em; } #v0_9\.renamings caption { display: none; } .table td > ul { padding-left: 1em; } #relative-advantages thead th { padding-left: 1em; } #relative-advantages td._1 { width: 34%; padding-right: 1em; } #relative-advantages td._2 { width: 35%; padding-right: 1em; } #relative-disadvantages { margin-top: 1.5em; } #relative-disadvantages thead th { padding-left: 1em; } #relative-disadvantages td._1 { width: 34%; padding-right: 1em; } #relative-disadvantages td._2 { width: 35%; padding-right: 1em; } /* parser overview tables */ #parsing-single-chars td._1 { width: 35%; } #parsing-strings-directly td._1 { width: 38.5%; } #parsing-strings-with-the-help-of-other-parsers td._1 { width: 36%; } #parsing-whitespace td._1 { width: 35%; } #chaining-and-piping-parsers td._1 { width: 27%; } #parsing-sequences td._1 { width: 23%; } #parsing-sequences td._2 { width: 22%; } #parsing-alternatives-and-recovering-from-errors td._1 { width: 28%; } #conditional-parsing-and-looking-ahead td._1 { width: 35%; } #customizing-error-messages td._1 { width: 28%; } #user-state-handling-and-getting-the-input-stream-position td._1 { width: 24%; } ================================================ FILE: Doc/html/download-and-installation.html ================================================ Download and installation

3 Download and installation

FParsec is distributed in source code form and as NuGet packages.

If you’re new to FParsec, I’d recommend to start by downloading the source code package and experimenting a bit with the included sample projects. With the project and solution files building the library and the samples is as easy as clicking a button.

The source package also includes a complete copy of the HTML documentation for offline viewing.

3.1 NuGet packages

There are two NuGet packages of FParsec, which are built with different configuration options.

The basic package uses the Low‐Trust version of FParsec, which uses no unverifiable code and is optimized for maximum portability. The main limitation of this version is that any input stream is completely read into a string before parsing, which limits the maximum practical input size. This package also contains assemblies for .NET Standard 2.0.

The “Big Data edition” package uses the non‐Low‐Trust version of FParsec that is optimized for maximum performance and supports extremely large input streams. Since this configuration is also the default configuration of the solution files included with the source code, it is sometimes referred to as the “normal” version of FParsec. This version of FParsec does use “unsafe” (i.e. unverifiable) code involving unmanaged pointers. It also uses code generation in the implementation of isAnyOf, isNoneOf, anyOf, skipAnyOf, noneOf and skipNoneOf. Unfortunately, this version is currently not compatible with .NET Standard/.NET Core.

Should you measure a significant performance degradation when switching to the Big Data edition, you’re probably inadvertently recreating the same isAnyOf‐ or isNoneOf‐based parsers again and again, as explained here and here.

The .NET Framework assemblies in the NuGet packages are strongly signed. Their assembly version numbers will only be incremented for breaking changes. The .NET Standard assembly in the FParsec package is not signed.

The NuGet packages include PDBs and SourceLink support, which should allow you to step through FParsec code in the debugger of your IDE.

3.2 Getting the source

FParsec’s source code repository is hosted on GitHub at: github.com/stephan‐tolksdorf/fparsec

You can clone the source code using Git or you can download it as a zip‐file.

It’s an FParsec project policy to check only stable and tested code into the master branch of the GitHub repository, so you can normally just work with the master version of FParsec.

Tip

Fork is a great free GUI for Git for Windows and MacOS.

3.3 FParsec is built as two DLLs

FParsec’s source code is written in both C# and F#. Since neither the C# nor the F# compiler directly support the other language, the respective components need to be built separately.

Hence, FParsec is built as two DLLs. The C# bits are compiled into the FParsecCS.dll and the F# bits (which depend on the C# bits) are compiled into FParsec.dll.

Projects that use FParsec thus have to reference both DLLs.

If you reference the DLLs in the F# Interactive console, you need to reference FParsecCS.dll before you reference FParsec.dll.

Note

If you don’t want to distribute the FParsec DLLs together with the assembly of your project, you can use the staticlink command‐line option of the F# compiler to merge the FParsec DLLs into your assembly.

Unfortunately, the same option cannot be used to merge FParsecCS.dll into the FParsec.dll, as the public definitions in FParsecCS.dll wouldn’t be reexported by FParsec.dll. For similar reasons it also doesn’t seem to be possible to use tools like ILMerge or il‐repack to obtain a merged FParsec.dll that can be properly consumed by F# programs.

3.4 Building FParsec from source

The solution file FParsec.sln in the root source folder and the associated project files in the subfolders can be used to build FParsec from the command line or with IDEs such as Visual Studio 2019 or JetBrains Rider.

To build the Low‐Trust version of FParsec, you have to specifiy either Debug-LowTrust or Release-LowTrust as the configuration. The Debug and Release configurations build the non‐Low‐Trust version of FParsec, which currently is not compatible with the .NET Core runtime.

Note

In contrast to JetBrains Rider, Visual Studio 2019 currently does not support setting the supported target frameworks depending on the configuration. Due to this issue one currently has to use the separate FParsec-LowTrust.sln solution for building the Low‐Trust version of FParsec in VS 2019.

The Test project in the solution files contains the unit tests for FParsec.

The file .vscode/tasks.json contains some convenient task definitions for Visual Studio Code.

The NuGet packages are built with the pack.ps1 PowerShell script.

3.5 The Low‐Trust version of FParsec

For optimization reasons the normal implementation (the “Big Data edition”) of FParsec involves unverifiable code using unmanaged pointers and runtime code generation.

If you compile FParsec with the LOW_TRUST conditional compiler symbol, the unverifiable code is replaced with a “safe” alternative. This allows FParsec to be run in environments with “reduced trust”, such as medium trust ASP.NET applications, and it also allows FParsec to be compiled against reduced subsets of the .NET API.

In the Debug-LowTrust and Release-LowTrust configurations of the FParsec.sln solution file in the root source folder, LOW_TRUST is automatically defined as true.

The Low‐Trust version of FParsec has the following two major limitations:

  • A CharStream that is constructed from a System.IO.Stream or a file path reads the complete file into a single string during construction. This severely limits the maximum practical input stream size.
  • The StaticMapping module is not supported.

3.6 Configuration options

You can configure FParsec’s source code with a number of conditional compilation symbols (a.k.a. preprocessor defines). Besides the Low‐Trust option, these symbols mostly serve tuning purposes.

Options for FParsecCS.dll
LOW_TRUST

See above.

AGGRESSIVE_INLINING

Requires a version of NET ≥ 4.5.

Annotates some functions with the MethodImplOptions.AggressiveInlining attribute.

PCL

Compile for a PCL subset of the .NET API.

SMALL_STATETAG

Use a 32‐bit StateTag in the CharStream class instead of the default 64‐bit one.

This is an optimization for 32‐bit runtimes. You can find more information about the state tag in section 5.4.3 of the user’s guide.

UNALIGNED_READS

This option does not affect the Low‐Trust version of FParsec.

Optimize for CPUs that support fast unaligned memory reads, i.e. any modern x86‐based CPU.

This option only makes a noticeable difference is some specific situations.

Options for FParsec.dll
LOW_TRUST

See above.

UNALIGNED_READS

See above.

NOINLINE

Do not force inlining of certain parser combinators.

This option enables you to step through the respective combinators during debugging.

USE_STATIC_MAPPING_FOR_IS_ANY_OF

This option does not affect the Low‐Trust version of FParsec.

Use StaticMapping.createStaticCharIndicatorFunction for the implementation of isAnyOf, isNoneOf, anyOf, skipAnyOf, noneOf and skipNoneOf for generating optimized char predicate functions using runtime code generation.

Runtime code generation is a relatively expensive operation, so this optimization is primarily meant for parsers that are applied to large (or lots of) input streams. Please see the remarks for the StaticMapping module for more information.

If you run into noticeable performance problems or memory leaks when enabling this option, you’re probably inadvertently recreating the same isAnyOf‐ or isNoneOf‐based parser again and again, as explained here and here.

DEBUG_STATIC_MAPPING

This option does not affect the Low‐Trust version of FParsec.

================================================ FILE: Doc/html/index.html ================================================ FParsec Documentation

FParsec Documentation

FParsec is a parser combinator library for F#.

With FParsec you can implement recursive‐descent text parsers for formal grammars.

FParsec’s features include:

  • support for context‐sensitive, infinite look‐ahead grammars,
  • automatically generated, highly readable error messages,
  • Unicode support,
  • efficient support for very large files,
  • an embeddable, runtime‐configurable operator‐precedence parser component,
  • a simple, efficient and easily extensible API,
  • an implementation thoroughly optimized for performance,
  • comprehensive documentation,
  • a permissive open source license.

FParsec is an F# adaptation of Parsec, the popular parser combinator library for Haskell by Daan Leijen. While the implementations of Parsec and FParsec are completely different, they share a similar top‐level API.

Latest release: FParsec 2.0.0, 2022‒11‒01, Download, NuGet packages, Changes

================================================ FILE: Doc/html/license.html ================================================ License

2 License

Except where noted otherwise, the FParsec library in source and binary form is distributed under the Simplified BSD License. The Simplified BSD License (a.k.a. “2‐clause BSD License”) is a simple, permissive license that is OSI‐compliant.

FParsec incorporates data derived from the Unicode Character Database v. 8.0.0, Copyright (c) 1991‒2015 Unicode, Inc., which is distributed under the following terms:
http://www.unicode.org/terms_of_use.html#Exhibit1

The documentation in the Doc folder is licensed under the Creative Commons Attribution‐NonCommercial 3.0 Unported License. This Creative Commons license does not allow you to use the documentation for commercial purposes without permission. This means, for example, that you cannot sell the documentation in book form for profit or put it on a web content farm in order to earn money with ads. However, you can of course use the documentation in a commercial context (e.g. put it on the intranet of a commercial corporation), as long as you’re not trying to directly earn money from the text of the documentation.

2.1 Simplified BSD License

Copyright (c) 2007‒2022, Stephan Tolksdorf. All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

  • Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
  • Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

This software is provided by the copyright holders “as is” and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. In no event shall the copyright holders be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage.

2.2 Creative Commons Attribution‐NonCommercial 3.0 Unported License

================================================ FILE: Doc/html/reference/charparsers.html ================================================ FParsec.CharParsers

6.3 FParsec.CharParsers

6.3.1 Interface

// FParsec.dll

[<AutoOpen>] // module is automatically opened when FParsec namespace is opened
module FParsec.CharParsers

open FParsec.Error
open FParsec.Primitives

// Running parsers on input
// ========================
type ParserResult<'Result,'UserState>=
     | Success of 'Result * 'UserState * Position
     | Failure of string * ParserError * 'UserState

val runParserOnString:
         Parser<'a,'u> -> 'u -> streamName: string -> string
      -> ParserResult<'a,'u>

val runParserOnSubstring:
        Parser<'a,'u> -> 'u -> streamName: string -> string -> int -> int
     -> ParserResult<'a,'u>

val runParserOnStream:
        Parser<'a,'u> -> 'u -> streamName: string
     -> System.IO.Stream -> System.Text.Encoding
     -> ParserResult<'a,'u>

val runParserOnFile:
        Parser<'a,'u> -> 'u -> path: string -> System.Text.Encoding
     -> ParserResult<'a,'u>

val run: Parser<'a, unit> -> string -> ParserResult<'a,unit>

// Reading the input stream position and handling the user state
// =============================================================
val getPosition: Parser<Position,'u>

val getUserState: Parser<'u,'u>
val setUserState: 'u -> Parser<unit,'u>
val updateUserState: ('u -> 'u) -> Parser<unit,'u>

val userStateSatisfies: ('u -> bool) -> Parser<unit,'u>

// Parsing single chars
// ====================
val pchar:      char ->       Parser<char,'u>
val skipChar:   char ->       Parser<unit,'u>
val charReturn: char -> 'a -> Parser<'a,'u>

val anyChar:     Parser<char,'u>
val skipAnyChar: Parser<unit,'u>

val satisfy:      (char -> bool)           -> Parser<char,'u>
val skipSatisfy:  (char -> bool)           -> Parser<unit,'u>
val satisfyL:     (char -> bool) -> string -> Parser<char,'u>
val skipSatisfyL: (char -> bool) -> string -> Parser<unit,'u>

val anyOf:      seq<char> -> Parser<char,'u>
val skipAnyOf:  seq<char> -> Parser<unit,'u>
val noneOf:     seq<char> -> Parser<char,'u>
val skipNoneOf: seq<char> -> Parser<unit,'u>

val asciiLower:  Parser<char,'u>
val asciiUpper:  Parser<char,'u>
val asciiLetter: Parser<char,'u>

val lower:  Parser<char,'u>
val upper:  Parser<char,'u>
val letter: Parser<char,'u>

val digit: Parser<char,'u> // parses '0'-'9'
val hex:   Parser<char,'u> // parses '0'-'9', 'a'-'f', 'A'-'F'
val octal: Parser<char,'u> // parses '0'-'7'

// predicate functions corresponding to the above parsers
val isAnyOf:  seq<char> ->  (char -> bool)
val isNoneOf: seq<char> ->  (char -> bool)
val inline isAsciiUpper:  char -> bool
val inline isAsciiLower:  char -> bool
val inline isAsciiLetter: char -> bool
val inline isUpper:       char -> bool
val inline isLower:       char -> bool
val inline isLetter:      char -> bool
val inline isDigit:       char -> bool
val inline isHex:         char -> bool
val inline isOctal:       char -> bool

// Parsing whitespace
// ==================
val tab:                 Parser<char,'U>

val newline:             Parser<char,'u>
val skipNewline:         Parser<unit,'u>
val newlineReturn: 'a -> Parser<'a,'u>

val unicodeNewline:             Parser<char,'u>
val skipUnicodeNewline:         Parser<unit,'u>
val unicodeNewlineReturn: 'a -> Parser<'a,'u>

val spaces:  Parser<unit,'u>
val spaces1: Parser<unit,'u>

val unicodeSpaces:  Parser<unit,'u>
val unicodeSpaces1: Parser<unit,'u>

val eof: Parser<unit,'u>

// Parsing strings directly
// ========================
val pstring:      string ->       Parser<string,'u>
val skipString:   string ->       Parser<unit,'u>
val stringReturn: string -> 'a -> Parser<'a,'u>

val pstringCI:      string ->       Parser<string,'u>
val skipStringCI:   string ->       Parser<unit,'u>
val stringCIReturn: string -> 'a -> Parser<'a,'u>

val anyString:     int32 -> Parser<string,'u>
val skipAnyString: int32 -> Parser<unit,'u>

val restOfLine:     skipNewline: bool -> Parser<string,'u>
val skipRestOfLine: skipNewline: bool -> Parser<unit,'u>

val charsTillString:
    string -> skipString: bool -> maxCount: int -> Parser<string,'u>
val skipCharsTillString:
    string -> skipString: bool -> maxCount: int -> Parser<unit,'u>

val charsTillStringCI:
    string -> skipString: bool -> maxCount: int -> Parser<string,'u>
val skipCharsTillStringCI:
    string -> skipString: bool -> maxCount: int -> Parser<unit,'u>

val manySatisfy:       (char -> bool)                   -> Parser<string,'u>
val manySatisfy2:      (char -> bool) -> (char -> bool) -> Parser<string,'u>
val skipManySatisfy:   (char -> bool)                   -> Parser<unit,'u>
val skipManySatisfy2:  (char -> bool) -> (char -> bool) -> Parser<unit,'u>

val many1Satisfy:      (char -> bool)                   -> Parser<string,'u>
val many1Satisfy2:     (char -> bool) -> (char -> bool) -> Parser<string,'u>
val skipMany1Satisfy:  (char -> bool)                   -> Parser<unit,'u>
val skipMany1Satisfy2: (char -> bool) -> (char -> bool) -> Parser<unit,'u>

val many1SatisfyL:
    (char -> bool)                   -> string -> Parser<string,'u>
val many1Satisfy2L:
    (char -> bool) -> (char -> bool) -> string -> Parser<string,'u>
val skipMany1SatisfyL:
    (char -> bool)                   -> string -> Parser<unit,'u>
val skipMany1Satisfy2L:
    (char -> bool) -> (char -> bool) -> string -> Parser<unit,'u>

val manyMinMaxSatisfy:
    int -> int -> (char -> bool)                   -> Parser<string,'u>
val manyMinMaxSatisfy2:
    int -> int -> (char -> bool) -> (char -> bool) -> Parser<string,'u>
val skipManyMinMaxSatisfy:
    int -> int -> (char -> bool)                   -> Parser<unit,'u>
val skipManyMinMaxSatisfy2:
    int -> int -> (char -> bool) -> (char -> bool) -> Parser<unit,'u>
val manyMinMaxSatisfyL:
    int -> int -> (char -> bool)                   -> string -> Parser<string,'u>
val manyMinMaxSatisfy2L:
    int -> int -> (char -> bool) -> (char -> bool) -> string -> Parser<string,'u>
val skipManyMinMaxSatisfyL:
    int -> int -> (char -> bool)                   -> string -> Parser<unit,'u>
val skipManyMinMaxSatisfy2L:
    int -> int -> (char -> bool) -> (char -> bool) -> string -> Parser<unit,'u>

val regex:  string -> Parser<string,'u>

type IdentifierOptions =
    new: ?isAsciiIdStart: (char -> bool) *
         ?isAsciiIdContinue: (char -> bool) *
         ?normalization: System.Text.NormalizationForm *
         ?normalizeBeforeValidation: bool *
         ?allowJoinControlChars: bool *
         ?preCheckStart: (char -> bool) *
         ?preCheckContinue: (char -> bool) *
         ?allowAllNonAsciiCharsInPreCheck: bool *
         ?label: string *
         ?invalidCharMessage: string -> IdentifierOptions

val identifier: IdentifierOptions -> Parser<string, 'u>

// Parsing strings with the help of other parsers
// ==============================================

val manyChars:   Parser<char,'u>                    -> Parser<string,'u>
val manyChars2:  Parser<char,'u> -> Parser<char,'u> -> Parser<string,'u>

val many1Chars:  Parser<char,'u>                    -> Parser<string,'u>
val many1Chars2: Parser<char,'u> -> Parser<char,'u> -> Parser<string,'u>

val manyCharsTill:
       Parser<char,'u>                    -> Parser<'b,'u> -> Parser<string,'u>
val manyCharsTill2:
       Parser<char,'u> -> Parser<char,'u> -> Parser<'b,'u> -> Parser<string,'u>
val manyCharsTillApply:
       Parser<char,'u>                    -> Parser<'b,'u> -> (string -> 'b -> 'c)
    -> Parser<'c,'u>
val manyCharsTillApply2:
       Parser<char,'u> -> Parser<char,'u> -> Parser<'b,'u> -> (string -> 'b -> 'c)
    -> Parser<'c,'u>

val many1CharsTill:
       Parser<char,'u>                    -> Parser<'b,'u> -> Parser<string,'u>
val many1CharsTill2:
       Parser<char,'u> -> Parser<char,'u> -> Parser<'b,'u> -> Parser<string,'u>
val many1CharsTillApply:
       Parser<char,'u>                    -> Parser<'b,'u> -> (string -> 'b -> 'c)
    -> Parser<'c,'u>
val many1CharsTillApply2:
       Parser<char,'u> -> Parser<char,'u> -> Parser<'b,'u> -> (string -> 'b -> 'c)
    -> Parser<'c,'u>

val manyStrings:   Parser<string,'u>                      -> Parser<string,'u>
val manyStrings2:  Parser<string,'u> -> Parser<string,'u> -> Parser<string,'u>
val many1Strings:  Parser<string,'u>                      -> Parser<string,'u>
val many1Strings2: Parser<string,'u> -> Parser<string,'u> -> Parser<string,'u>

val stringsSepBy:  Parser<string,'u> -> Parser<string,'u> -> Parser<string,'u>
val stringsSepBy1: Parser<string,'u> -> Parser<string,'u> -> Parser<string,'u>

val skipped: Parser<unit,'u> -> Parser<string,'u>

val withSkippedString: (string -> 'a -> 'b) -> Parser<'a,'u> -> Parser<'b,'u>

// Parsing numbers
// ===============
type NumberLiteralOptions = //...

type NumberLiteral = //...

val numberLiteral:  NumberLiteralOptions -> string -> Parser<NumberLiteral,'u>
val numberLiteralE:
       NumberLiteralOptions -> errorInCaseNoLiteralFound: ErrorMessageList
    -> CharStream<'u> -> Reply<NumberLiteral>

val pfloat: Parser<float,'u>

val pint64: Parser<int64,'u>
val pint32: Parser<int32,'u>
val pint16: Parser<int16,'u>
val pint8:  Parser<int8,'u>

val puint64: Parser<uint64,'u>
val puint32: Parser<uint32,'u>
val puint16: Parser<uint16,'u>
val puint8:  Parser<uint8,'u>

// Conditional parsing
// ===================

val notFollowedByEof: Parser<unit,'u>

val followedByNewline: Parser<unit,'u>
val notFollowedByNewline: Parser<unit,'u>

val followedByString:      string -> Parser<unit,'u>
val followedByStringCI:    string -> Parser<unit,'u>
val notFollowedByString:   string -> Parser<unit,'u>
val notFollowedByStringCI: string -> Parser<unit,'u>

val nextCharSatisfies:        (char -> bool)         -> Parser<unit,'u>
val nextCharSatisfiesNot:     (char -> bool)         -> Parser<unit,'u>
val next2CharsSatisfy:        (char -> char -> bool) -> Parser<unit,'u>
val next2CharsSatisfyNot:     (char -> char -> bool) -> Parser<unit,'u>
val previousCharSatisfies:    (char -> bool)         -> Parser<unit,'u>
val previousCharSatisfiesNot: (char -> bool)         -> Parser<unit,'u>

// Helper functions
// ================
[<Literal>]
val EOS: char = CharStream.Iterator.EndOfStreamChar

val foldCase: string -> string

val normalizeNewlines: string -> string

val floatToHexString:   float -> string
val floatOfHexString:   string -> float
val float32ToHexString: float32 -> string
val float32OfHexString: string -> float32

6.3.2 Members

type ParserResult<'Result,'UserState>

Values of this union type are returned by the runParser functions (not by Parser<_,_> functions).

| Success of 'Result * 'UserState * Position

Success(result, userState, endPos) holds the result and the user state returned by a successful parser, together with the position where the parser stopped.

| Failure of string * ParserError * 'UserState

Failure(errorAsString, error, userState) holds the parser error and the user state returned by a failing parser, together with the string representation of the parser error. The ParserError value error contains an ErrorMessageList and the position and user state value associated with the error.

val runParserOnString:
         Parser<'a,'u> -> 'u -> streamName: string -> string
      -> ParserResult<'a,'u>

runParserOnString p ustate streamName str runs the parser p on the content of the string str, starting with the initial user state ustate. The streamName is used in error messages to describe the source of the input (e.g. a file path) and may be empty. The parser’s Reply is captured and returned as a ParserResult value.

val runParserOnSubstring:
        Parser<'a,'u> -> 'u -> streamName: string -> string -> int -> int
     -> ParserResult<'a,'u>

runParserOnSubstring p ustate streamName str index count runs the parser p directly on the content of the string str between the indices index (inclusive) and index + count (exclusive), starting with the initial user state ustate. The streamName is used in error messages to describe the source of the input (e.g. a file path) and may be empty. The parser’s Reply is captured and returned as a ParserResult value.

val runParserOnStream:
        Parser<'a,'u> -> 'u -> streamName: string
     -> System.IO.Stream -> System.Text.Encoding
     -> ParserResult<'a,'u>

runParserOnStream p ustate streamName stream encoding runs the parser p on the content of the System.IO.Stream stream, starting with the initial user state ustate. The streamName is used in error messages to describe the source of the input (e.g. a file path) and may be empty. In case no Unicode byte order mark is found, the stream data is assumed to be encoded with the given encoding. The parser’s Reply is captured and returned as a ParserResult value.

val runParserOnFile:
        Parser<'a,'u> -> 'u -> path: string -> System.Text.Encoding
     -> ParserResult<'a,'u>

runParserOnFile p ustate path encoding runs the parser p on the content of the file at the given path, starting with the initial user state ustate. In case no Unicode byte order mark is found, the file data is assumed to be encoded with the given encoding. The parser’s Reply is captured and returned as a ParserResult value.

val run: Parser<'a, unit> -> string -> ParserResult<'a,unit>

run parser str is a convenient abbreviation for runParserOnString parser () "" str.

val getPosition: Parser<Position,'u>

The parser getPosition returns the current position in the input stream.

getPosition is defined as fun stream -> Reply(stream.Position).

val getUserState: Parser<'u,'u>

The parser getUserState returns the current user state.

getUserState is defined as fun stream -> Reply(stream.UserState).

val setUserState: 'u -> Parser<unit,'u>

The parser setUserState u sets the user state to u.

setUserState u is defined as

fun stream ->
    stream.UserState <- u
    Reply(())
val updateUserState: ('u -> 'u) -> Parser<unit,'u>

updateUserState f is defined as

fun stream ->
    stream.UserState <- f stream.UserState
    Reply(())
val userStateSatisfies: ('u -> bool) -> Parser<unit,'u>

The parser userStateSatisfies f succeeds if the predicate function f returns true when applied to the current UserState, otherwise it fails.

Note

If the parser userStateSatisfies f fails, it returns no descriptive error message; hence it should only be used together with other parsers that take care of a potential error.

val pchar: char -> Parser<char,'u>

pchar c parses the char c and returns c. If c = '\r' or c = '\n' then pchar c will parse any one newline ("\n", "\r\n" or "\r") and return c.

val skipChar: char -> Parser<unit,'u>

skipChar c is an optimized implementation of pchar c |>> ignore.

val charReturn: char -> 'a -> Parser<'a,'u>

charReturn c result is an optimized implementation of pchar c >>% result.

val anyChar: Parser<char,'u>

anyChar parses any single char or newline ("\n", "\r\n" or "\r"). Returns the parsed char, or '\n' in case a newline was parsed.

val skipAnyChar: Parser<unit,'u>

skipAnyChar is an optimized implementation of anyChar |>> ignore.

val satisfy: (char -> bool) -> Parser<char,'u>

satisfy f parses any one char or newline for which the predicate function f returns true. It returns the parsed char. Any newline ("\n", "\r\n" or "\r") is converted to the single char '\n'. Thus, to accept a newline f '\n' must return true. f will never be called with '\r' and satisfy f will never return the result '\r'.

For example, satisfy (fun c -> '0' <= c && c <= '9') parses any decimal digit.

Note

If the parser satisfy f fails, it returns no descriptive error message (because it does not know what chars f accepts); hence it should only be used together with other parsers that take care of a potential error. Alternatively, satisfyL f label can be used to ensure a more descriptive error message.

val skipSatisfy: (char -> bool) -> Parser<unit,'u>

skipSatisfy f is an optimized implementation of satisfy f |>> ignore.

val satisfyL: (char -> bool) -> string -> Parser<char,'u>

satisfy f label is an optimized implementation of satisfy f <?> label.

val skipSatisfyL: (char -> bool) -> string -> Parser<unit,'u>

skipSatisfyL f label is an optimized implementation of skipSatisfy f <?> label.

val anyOf: seq<char> -> Parser<char,'u>

anyOf str parses any char contained in the char sequence chars. It returns the parsed char. If chars contains the char '\n', anyOf chars parses any newline ("\n", "\r\n" or "\r") and returns it as '\n'. (Note that it does not make a difference whether or not chars contains '\r' and that anyOf chars will never return '\r'.)

For example, anyOf ". \t\n" will parse any of the chars '.', ' ', '\t' or any newline.

anyOf chars is defined as satisfy (isAnyOf chars).

For performance critical parsers it might be worth replacing instances of anyOf in loops with a manySatisfy‐based parser. For example, manyChars (anyOf ". \t\n") could be replaced with manySatisfy (function '.'|' '|'\t'|'\n' -> true | _ -> false).

This function is affected by the USE_STATIC_MAPPING_FOR_IS_ANY_OF compilation option.

val skipAnyOf: seq<char> -> Parser<unit,'u>

skipAnyOf chars is an optimized implementation of anyOf chars |>> ignore.

This function is affected by the USE_STATIC_MAPPING_FOR_IS_ANY_OF compilation option.

val noneOf: seq<char> -> Parser<char,'u>

noneOf chars parses any char not contained in the char sequence chars. It returns the parsed char. If chars does not contain the char '\n', noneOf chars parses any newline ("\n", "\r\n" or "\r") and returns it as as '\n'. (Note that it does not make a difference whether or not chars contains '\r' and that noneOf chars will never return '\r'.)

For example, noneOf ". \t\n" will parse any char other than '.', ' ', '\t', '\r' or '\n'.

noneOf chars is defined as satisfy (isNoneOf chars).

For performance critical parsers it might be worth replacing instances of noneOf in loops with a manySatisfy‐based parser. For example, manyChars (noneOf ". \t\n") could be replaced with manySatisfy (function '.'|' '|'\t'|'\n' -> false | _ -> true).

This function is affected by the USE_STATIC_MAPPING_FOR_IS_ANY_OF compilation option.

val skipNoneOf: seq<char> -> Parser<unit,'u>

skipNoneOf chars is an optimized implementation of noneOf chars |>> ignore.

This function is affected by the USE_STATIC_MAPPING_FOR_IS_ANY_OF compilation option.

val asciiLower: Parser<char,'u>

Parses any char in the range 'a''z'. Returns the parsed char.

val asciiUpper: Parser<char,'u>

Parses any char in the range 'A''Z'. Returns the parsed char.

val asciiLetter: Parser<char,'u>

Parses any char in the range 'a''z' and 'A''Z'. Returns the parsed char.

val lower: Parser<char,'u>

Parses any UTF‐16 lowercase letter char identified by System.Char.IsLower. Returns the parsed char.

val upper: Parser<char,'u>

Parses any UTF‐16 uppercase letter char identified by System.Char.IsUpper. Returns the parsed char.

val letter: Parser<char,'u>

Parses any UTF‐16 letter char identified by System.Char.IsLetter. Returns the parsed char.

val digit: Parser<char,'u>

Parses any char in the range '0''9'. Returns the parsed char.

val hex: Parser<char,'u>

Parses any char in the range '0''9', 'a''f' and 'A''F'. Returns the parsed char.

val octal: Parser<char,'u>

Parses any char in the range '0''7'. Returns the parsed char.

val isAnyOf: seq<char> -> (char -> bool)

isAnyOf chars returns a predicate function. When this predicate function is applied to a char, it returns true if and only if the char is contained in the char sequence chars.

For example, the function isAnyOf ".,;" returns true when applied to the chars '.', ',' or ';', and false for all other chars.

This function is affected by the USE_STATIC_MAPPING_FOR_IS_ANY_OF compilation option.

val isNoneOf: seq<char> -> (char -> bool)

isNoneOf chars returns a predicate function. When this predicate function is applied to a char, it returns true if and only if the char is not contained in char sequence chars.

For example, the function isNoneOf ".,;" returns false when applied to the chars '.', ',' or ';', and true for all other chars.

This function is affected by the USE_STATIC_MAPPING_FOR_IS_ANY_OF compilation option.

val inline isAsciiUpper: char -> bool

Returns true for any char in the range 'A''Z' and false for all other chars.

val inline isAsciiLower: char -> bool

Returns true for any char in the range 'a''z' and false for all other chars.

val inline isAsciiLetter: char -> bool

Returns true for any char in the range 'a''z', 'A''Z' and false for all other chars.

val inline isUpper: char -> bool

isUpper is equivalent to System.Char.IsUpper.

val inline isLower: char -> bool

isLower is equivalent to System.Char.IsLower.

val inline isLetter: char -> bool

isLetter is equivalent to System.Char.IsLetter.

val inline isDigit: char -> bool

Returns true for any char in the range '0''9' and false for all other chars.

val inline isHex: char -> bool

Returns true for any char in the range '0''9', 'a''f', 'A''F' and false for all other chars.

val inline isOctal: char -> bool

Returns true for any char in the range '0''7' and false for all other chars.

val tab: Parser<char,'U>

Parses the tab char '\t' and returns '\t'.

Note

A tab char is treated like any other non‐newline char: the column number is incremented by (only) 1.

val newline: Parser<char,'u>

Parses a newline ("\n", "\r\n" or "\r"). Returns '\n'. Is equivalent to pchar '\n'.

val skipNewline: Parser<unit,'u>

skipNewline is an optimized implementation of newline |>> ignore.

val newlineReturn: 'a -> Parser<'a,'u>

newlineReturn result is an optimized implementation of newline >>% result.

val unicodeNewline: Parser<char,'u>

Parses a Unicode newline ("\n", "\r\n", "\r", "\u0085", "\u2028", or "\u2029"). Returns '\n'. In contrast to all other parsers in FParsec except unicodeWhitespace this parser also increments the internal line count for Unicode newline characters other than '\n' and '\r'.

Note

This method does not recognize the form feed char '\f' ('\u000C') as a newline character.

Note

This parser is included only for the sake of completeness. If you design your own parser grammar, we recommend not to accept any character sequence other than "\n", "\r\n" or "\r" for a newline. The three usual newline representations already make text parsing complicated enough.

val skipUnicodeNewline: Parser<unit,'u>

skipUnicodeNewline is an optimized implementation of newline |>> ignore.

val unicodeNewlineReturn: 'a -> Parser<'a,'u>

unicodeNewlineReturn result is an optimized implementation of newline >>% result.

val spaces: Parser<unit,'u>

Skips over any sequence of zero or more whitespaces (space (' '), tab ('\t') or newline ("\n", "\r\n" or "\r")).

val spaces1: Parser<unit,'u>

Skips over any sequence of one or more whitespaces (space (' '), tab('\t') or newline ("\n", "\r\n" or "\r")).

val unicodeSpaces: Parser<unit,'u>

Skips over any sequence of zero or more Unicode whitespace chars and registers any Unicode newline ("\n", "\r\n", "\r", "\u0085", "\u2028"or "\u2029") as a newline.

Note

This method does not recognize the form feed char '\f' ('\u000C') as a newline character.

Note

This parser is included only for the sake of completeness. If you design your own parser grammar, we recommend not to accept any whitespace character other than ' ', '\t', '\r' and '\n'. There is no need to make whitespace parsing unnecessary complicated and slow.

val unicodeSpaces1: Parser<unit,'u>

Skips over any sequence of one or more Unicode whitespace char and registers any Unicode newline ("\n", "\r\n", "\r", "\u0085", "\u2028"or "\u2029") as a newline.

See also the notes above for unicodeSpaces.

val eof: Parser<unit,'u>

The parser eof only succeeds at the end of the input. It never consumes input.

val pstring: string -> Parser<string,'u>

pstring str parses the string str and returns str. It is an atomic parser: either it succeeds or it fails without consuming any input.

str may not contain newline chars ('\n' or '\r'), otherwise pstring str raises an ArgumentException.

val skipString: string -> Parser<unit,'u>

skipString str is an optimized implementation of pstring str |>> ignore.

val stringReturn: string -> 'a -> Parser<'a,'u>

stringReturn str result is an optimized implementation of pstring str >>% result.

val pstringCI: string -> Parser<string,'u>

pstringCI str parses any string that case‐insensitively matches the string str. It returns the parsed string. pstringCI str is an atomic parser: either it succeeds or it fails without consuming any input.

str may not contain newline chars ('\n' or '\r'), otherwise pstringCI str raises an ArgumentException.

val skipStringCI: string -> Parser<unit,'u>

skipStringCI str is an optimized implementation of pstringCI str |>> ignore.

val stringCIReturn: string -> 'a -> Parser<'a,'u>

stringCIReturn str result is an optimized implementation of pstringCI str >>% result.

val anyString: int32 -> Parser<string,'u>

anyString n parses any sequence of n chars or newlines ("\n", "\r\n" or "\r"). It returns the parsed string. In the returned string all newlines are normalized to "\n". anyString n is an atomic parser: either it succeeds or it fails without consuming any input.

val skipAnyString: int32 -> Parser<unit,'u>

skipAnyString n is an optimized implementation of anyString n |>> ignore.

val restOfLine: skipNewline: bool -> Parser<string,'u>

restOfLine skipNewline parses any chars before the end of the line and, if skipNewline is true, skips to the beginning of the next line (if there is one). It returns the parsed chars before the end of the line as a string (without a newline). A line is terminated by a newline ("\n", "\r\n" or "\r") or the end of the input stream.

For example, sepBy (restOfLine false) newline will parse an input file and split it into lines:

> run (sepBy (restOfLine false) newline) "line1\nline2\n";;
val it : ParserResult<string list,unit> = Success: ["line1"; "line2"; ""]

Note that you could not use many (restOfLine true) in this example, because at the end of the input restOfLine succeeds without consuming input, which would cause many to throw an exception.

val skipRestOfLine: skipNewline: bool -> Parser<unit,'u>

skipRestOfLine skipNewline is an optimized implementation of restOfLine skipNewline |>> ignore.

val charsTillString:
    string -> skipString: bool -> maxCount: int -> Parser<string,'u>

charsTillString skipString maxCount parses all chars before the first occurance of the string str and, if skipString is true, skips over str. It returns the parsed chars before the string. If more than maxCount chars come before the first occurance of str, the parser fails after consuming maxCount chars.

Newlines ("\n", "\r\n" or "\r") are counted as single chars and in the returned string all newlines are normalized to "\n", but str may not contain any newline.

charsTillString str maxCount raises

val skipCharsTillString:
    string -> skipString: bool -> maxCount: int -> Parser<unit,'u>

skipCharsTillString str maxCount is an optimized implementation of charsTillString str maxCount |>> ignore.

val charsTillStringCI:
    string -> skipString: bool -> maxCount: int -> Parser<string,'u>

charsTillStringCI str maxCount parses all chars before the first case‐insensitive occurance of the string str and, if skipString is true, skips over it. It returns the parsed chars before the string. If more than maxCount chars come before the first case‐insensitive occurance of str the parser fails after consuming maxCount chars.

Newlines ("\n", "\r\n" or "\r") are counted as single chars, but str may not contain any newline.

charsTillStringCI str maxCount raises

val skipCharsTillStringCI:
    string -> skipString: bool -> maxCount: int -> Parser<unit,'u>

skipCharsTillStringCI str maxCount is an optimized implementation of charsTillStringCI str maxCount |>> ignore.

val manySatisfy: (char -> bool) -> Parser<string,'u>

manySatisfy f parses a sequence of zero or more chars that satisfy the predicate function f (i.e. chars for which f returns true). It returns the parsed chars as a string.

Any newline ("\n", "\r\n" or "\r") is converted to the single char '\n'. Thus, to accept a newline f '\n' must return true. f will never be called with '\r' and the string returned by manySatisfy f will never contain an '\r'.

For example, manySatisfy (function ' '|'\t'|'\n' -> true | _ -> false) parses zero or more whitespaces and returns them as a string.

Caution

The function predicate f must not access the currently used CharStream itself, because manySatisfy relies on f not having any side‐effect on the internal state of the stream.

val manySatisfy2: (char -> bool) -> (char -> bool) -> Parser<string,'u>

manySatisfy2 f1 f behaves like manySatisfy f, except that the first char of the parsed string must satisfy f1 instead of f.

For example, manySatisfy ((=) '.') isDigit will parse a dot followed by zero or more decimal digits. If there is no dot, the parser succeeds with an empty string.

val skipManySatisfy: (char -> bool) -> Parser<unit,'u>

skipManySatisfy f is an optimized implementation of manySatisfy f |>> ignore.

val skipManySatisfy2: (char -> bool) -> (char -> bool) -> Parser<unit,'u>

skipManySatisfy2 f1 f is an optimized implementation of manySatisfy2 f1 f |>> ignore.

val many1Satisfy: (char -> bool) -> Parser<string,'u>

many1Satisfy f parses a sequence of one or more chars that satisfy the predicate function f (i.e. chars for which f returns true). It returns the parsed chars as a string. If the first char does not satisfy f, this parser fails without consuming input.

Any newline ("\n", "\r\n" or "\r") is converted to the single char '\n'. Thus, to accept a newline f '\n' must return true. f will never be called with '\r' and the string returned by many1Satisfy f will never contain an '\r'.

For example, many1Satisfy isDigit parses a number consisting of one or more decimal digits and returns it as a string.

Caution

The function predicate f must not access the currently used CharStream itself, because many1Satisfy relies on f not having any side‐effect on the internal state of the stream.

Note

If the parser many1Satisfy f fails, it returns no descriptive error message (because it does not know what chars f accepts); hence it should only be used together with other parsers that take care of a potential error. Alternatively, many1SatisfyL f label can be used to ensure a more descriptive error message.

val many1Satisfy2: (char -> bool) -> (char -> bool) -> Parser<string,'u>

many1Satisfy2 f1 f behaves like many1Satisfy f, except that the first char of the parsed string must satisfy f1 instead of f.

For example, many1Satisfy2 isLetter (fun c -> isLetter c || isDigit c) will parse any string consisting of one letter followed by zero or more letters or digits.

val skipMany1Satisfy: (char -> bool) -> Parser<unit,'u>

skipMany1Satisfy f is an optimized implementation of many1Satisfy f |>> ignore.

val skipMany1Satisfy2: (char -> bool) -> (char -> bool) -> Parser<unit,'u>

skipMany1Satisfy2 f1 f is an optimized implementation of many1Satisfy2 f1 f |>> ignore.

val many1SatisfyL:
    (char -> bool) -> string -> Parser<string,'u>

many1SatisfyL f label is an optimized implementation of many1Satisfy f <?> label.

val many1Satisfy2L:
    (char -> bool) -> (char -> bool) -> string -> Parser<string,'u>

many1Satisfy2L f1 f label is an optimized implementation of many1Satisfy2 f1 f <?> label.

val skipMany1SatisfyL:
    (char -> bool)     -> string -> Parser<unit,'u>

skipMany1SatisfyL f label is an optimized implementation of skipMany1Satisfy f <?> label.

val skipMany1Satisfy2L:
    (char -> bool) -> (char -> bool) -> string -> Parser<unit,'u>

skipMany1Satisfy2L f1 f label is an optimized implementation of skipMany1Satisfy2 f1 f <?> label.

val manyMinMaxSatisfy:
    int -> int -> (char -> bool) -> Parser<string,'u>

manyMinMaxSatisfy minCount maxCount f parses a sequence of minCount or more chars that satisfy the predicate function f (i.e. chars for which f returns true), but not more than maxCount chars. It returns the parsed chars as a string. This parser is atomic, i.e. if the first minCount chars do not all satisfy f, the parser fails without consuming any input.

Any newline ("\n", "\r\n" or "\r") is converted to the single char '\n'. Thus, to accept a newline f '\n' must return true. f will never be called with '\r' and the string returned by manyMinMaxSatisfy minCount maxCount f will never contain an '\r'.

manyMinMaxSatisfy minCount maxCount f raises an ArgumentOutOfRangeException if maxCount is negative.

For example, manyMinMaxSatisfy 4 8 isHex parses a string that consists of at least 4 hexadecimal digits. If there are 8 or more hex chars, this parser stops after the 8th.

Caution

The function predicate f must not access the currently used CharStream itself, because manyMinMaxSatisfy relies on f not having any side‐effect on the internal state of the stream.

Note

If the parser manyMinMaxSatisfy minCount maxCount f fails, it returns no descriptive error message (because it does not know what chars f accepts); hence it should only be used together with other parsers that take care of a potential error. Alternatively, manyMinMaxSatisfyL f label can be used to ensure a more descriptive error message.

val manyMinMaxSatisfy2:
    int -> int -> (char -> bool) -> (char -> bool) -> Parser<string,'u>

manyMinMaxSatisfy2 minCount maxCount f1 f behaves like manyMinMaxSatisfy minCount maxCount f, except that the first char of the parsed string must satisfy f1 instead of f.

For example, manyMinMaxSatisfy2 3 5 ((=) '.') isDigit parses a dot followed by 2‒4 decimal digits.

val skipManyMinMaxSatisfy:
    int -> int -> (char -> bool) -> Parser<unit,'u>

skipManyMinMaxSatisfy minCount maxCount f is an optimized implementation of manyMinMaxSatisfy minCount maxCount f |>> ignore.

val skipManyMinMaxSatisfy2:
    int -> int -> (char -> bool) -> (char -> bool) -> Parser<unit,'u>

skipManyMinMaxSatisfy2 minCount maxCount f1 f is an optimized implementation of manyMinMaxSatisfy2 minCount maxCount f1 f |>> ignore.

val manyMinMaxSatisfyL:
    int -> int -> (char -> bool) -> string -> Parser<string,'u>

manyMinMaxSatisfyL minCount maxCount f label is an optimized implementation of manyMinMaxSatisfy minCount maxCount f <?> label.

val manyMinMaxSatisfy2L:
    int -> int -> (char -> bool) -> (char -> bool) -> string -> Parser<string,'u>

manyMinMaxSatisfy2L minCount maxCount f1 f label is an optimized implementation of manyMinMaxSatisfy2 minCount maxCount f1 f <?> label.

val skipManyMinMaxSatisfyL:
    int -> int -> (char -> bool) -> string -> Parser<unit,'u>

skipManyMinMaxSatisfyL minCount maxCount f label is an optimized implementation of skipManyMinMaxSatisfy minCount maxCount f <?> label.

val skipManyMinMaxSatisfy2L:
    int -> int -> (char -> bool) -> (char -> bool) -> string -> Parser<unit,'u>

skipManyMinMaxSatisfy2L minCount maxCount f1 f label is an optimized implementation of skipManyMinMaxSatisfy2 minCount maxCount f1 f <?> label.

val regex: string -> Parser<string,'u>

regex pattern matches the .NET regular expression given by the string pattern on the chars beginning at the current index in the input stream. If the regular expression matches, the parser skips the matched chars and returns them as a string. If the regular expression does not match, the parser fails without consuming input.

The System.Text.RegularExpressions.Regex object that is internally used to match the pattern is constructed with the RegexOptions MultiLine and ExplicitCapture. In order to ensure that the regular expression can only match at the beginning of a string, "\\A" is automatically prepended to the pattern. You should avoid the use of greedy expressions like ".*", because these might trigger a scan of the complete input every time the regex is matched.

Newline chars ('\r' and '\n') in the pattern are interpreted literally. For example, an '\n' char in the pattern will only match "\n", not "\r" or "\r\n". However, in the returned string all newlines ("\n", "\r\n" or "\r") are normalized to "\n".

For large files the regular expression is not applied to a string containing all the remaining chars in the stream. The number of chars that are guaranteed to be visible to the regular expression is specified during construction of the CharStream. If one of the runParser functions is used to run the parser, this number is 43690.

type IdentifierOptions =
    new: ?isAsciiIdStart: (char -> bool) *
         ?isAsciiIdContinue: (char -> bool) *
         ?normalization: System.Text.NormalizationForm *
         ?normalizeBeforeValidation: bool *
         ?allowJoinControlChars: bool *
         ?preCheckStart: (char -> bool) *
         ?preCheckContinue: (char -> bool) *
         ?allowAllNonAsciiCharsInPreCheck: bool *
         ?label: string *
         ?invalidCharMessage: string -> IdentifierOptions

The configuration options for the identifier parser.

isAsciiIdStart

Specifies the ASCII characters that are valid as the first character of an identifier. This predicate function is called once for each char in the range '\u0001''\u007f' during construction of the IdentifierOptions object. By default, the ASCII chars 'A''Z' and 'a''z' can start an identifier.

isAsciiIdContinue

Specifies the ASCII characters that are valid as non‐first characters of an identifier. This predicate function is called once for each char in the range '\u0001''\u007f' during construction of the IdentifierOptions object. Normally the chars for which isAsciiIdContinue returns true should include all chars for which isAsciiIdStart returns true. By default, the ASCII chars 'A''Z', 'a''z', '0''9' and '_' are accepted at non‐start positions.

normalization

This option is not supported in the Silverlight version of FParsec.
The normalization form to which identifier strings are normalized. The value must be one of the four enum values of System.Text.NormalizationForm. If no normalization parameter is given, no normalization is performed.

The normalization is performed with the System.String.Normalize method provided by the Base Class Library.

normalizeBeforeValidation

This option is not supported in the Silverlight version of FParsec.
Indicates whether the identifier string should be normalized before validation (but after the pre‐check). By default, identifiers are normalized after they have been validated. Normalization before validation will only work properly with non‐default pre‐check options.

allowJoinControlChars

Indicates whether the two join control characters (zero‐width non‐joiner and zero‐width joiner) are allowed at any non‐start character position in the identifier.

preCheckStart, preCheckContinue

These two char predicates are used to identify potential identifier strings in the input. The first UTF‐16 char of an identifier must satisfy preCheckStart, the following chars must satify preCheckContinue. Input chars that don’t pass the pre‐check aren’t included in the identifier string, while characters that pass the pre‐check but not the identifier validation trigger a parser error. For the identifier parser to work properly, the pre‐check functions must accept a superset of valid identifier characters.

If you specify no preCheckStart (preCheckContinue) parameter, a default function will be used that accepts all chars that satisfy isAsciiIdStart (isAsciiIdContinue) as well as all non‐ASCII characters in the Basic Multilingual Plane with the XID_Start (XID_Continue) property and all surrogate chars. preCheckContinue by default also accepts the two join control characters.

If you pass the option allowAllNonAsciiCharsInPreCheck = true, the pre‐check predicates are only called once for each char in the range '\u0001''\u007f' during construction of the IdentifierOptions object (in order to construct a lookup table).

allowAllNonAsciiCharsInPreCheck

Indicates whether all non‐ASCII chars should be accepted in the pre‐check, irrespective of whether the (default) pre‐check functions return true for these chars.

label

The string label that is used in error messages if no identifier is found. The default is "identifier".

invalidCharMessage

The error message that is reported when an invalid char is found during validation of an identifier (after the pre‐check). The default is "The identifier contains an invalid character at the indicated position.".

The following example implements a parser for Python identifiers as described in PEP‐3131:

let pythonIdentifier =
    let isAsciiIdStart    = fun c -> isAsciiLetter c || c = '_'
    let isAsciiIdContinue = fun c -> isAsciiLetter c || isDigit c || c = '_'

    identifier (IdentifierOptions(
                    isAsciiIdStart = isAsciiIdStart,
                    isAsciiIdContinue = isAsciiIdContinue,
                    normalization = System.Text.NormalizationForm.FormKC,
                    normalizeBeforeValidation = true,
                    allowAllNonAsciiCharsInPreCheck = true))
val identifier: IdentifierOptions -> Parser<string, 'u>

The identifier parser is a configurable parser for the XID identifier syntax specified in the Unicode Standard Annex #31.

By default, a valid identifier string must begin with a Unicode character with the XID_Start property and continue with zero or more characters with the XID_Continue property. The specification of which characters have these properties can be found in the DerivedCoreProperties file in the Unicode Character Database. Currently FParsec implements the XID specification of Unicode 8.0.0.

Within the ASCII character range '\u0001''\u007f' you can customize the set of accepted characters through the isAsciiIdStart and isAsciiIdContinue parameters (the XID default allows 'a''z' and 'A''Z' at any position and '_' and '0''9' only in non‐start positions). For example, to accept the same ASCII characters that are valid in F# identifiers, you could use the following IdentifierOptions:

let isAsciiIdStart c =
    isAsciiLetter c || c = '_'

let isAsciiIdContinue c =
    isAsciiLetter c || isDigit c || c = '_' || c = '\''

identifier (IdentifierOptions(isAsciiIdStart    = isAsciiIdStart,
                              isAsciiIdContinue = isAsciiIdContinue))

By default, identifiers cannot contain the two join control characters zero‐width non‐joiner and zero‐width joiner. While these characters can be abused to create distinct identifiers that look confusingly similar or even identical, they are also necessary to create identifiers with the correct visual appearance for common words or phrases in certain languages. Section 2.3 of the Unicode Standard Annex #31 recommends to accept join control characters if the identifier system is supposed to support “natural representations of terms in modern, customary use”. However, in order to minimize the potential for abuse it also recommends accepting these characters only in some very specific contexts.

Unfortunately, the proposed rules describing the contexts in which join control character should be allowed are rather difficult to implement, especially with the limited Unicode support in .NET. For this reason the identifier parser currently only supports a simpler option: if you set the parameter allowJoinControlChars to true, the parser accepts the two join control characters in any non‐start position. Whether this setting is a reasonable compromise between not supporting join control characters at all and implementing the complicated rules proposed in Annex #31 obviously depends on the individual requirements of your project. An example of a programming language that adopted the same compromise is ECMAScript 5.

Apart from the joint control characters, no layout or format control characters are allowed in identifiers. This is in accordance to the recommendation of the Unicode Standard Annex #31, but contrary to what Annex #15 recommended prior to Unicode version 4.1. Programming languages whose identifier syntax is based on the recommendations of earlier versions of the Unicode standard may require that layout and format control characters are ignored or filtered out, as for example is the case for C#. However, since the identifier syntax of these languages isn’t based on the XID properties, one can’t parse their identifiers with this parser anyway.

By providing a value for the normalization parameter, you can ensure that identifiers are returned in a particular Unicode normalization form. By default, an identifier is normalized after it has been validated. Since XID identifiers are “closed under normalization”, a valid identifier is guaranteed to stay valid after normalization. The reverse, however, is not true, since not all identifier strings that are valid after normalization are also valid prior to normalization. If you want the identifier string to be normalized before validation, you have to set the normalizeBeforeValidation parameter to true and specify appropriate preCheckStart and preCheckContinue parameters.

Silverlight does not support Unicode normalization, so the Silverlight version of FParsec does not support the normalization and normalizeBeforeValidation parameters.

The identifier parser uses the preCheckStart and preCheckContinue predicate functions to identify potential identifier strings in the input. The first UTF‐16 char of the identifier must satisfy preCheckStart, the following chars must satifsy preCheckContinue. Input chars that don’t pass the pre‐check aren’t included in the identifier string, while characters that pass the pre‐check but not the identifier validation trigger a parser error (FatalError). For the identifier parser to work properly, the preCheck functions must accept a superset of valid identifier characters.

If you specify no preCheckStart (preCheckContinue) parameter, a default function will be used that accepts all chars that satisfy isAsciiIdStart (isAsciiIdContinue) as well as all non‐ASCII characters in the Basic Multilingual Plane with the XID_Start (XID_Continue) property and all surrogate chars. preCheckContinue by default also accepts the two join control characters. If you set the parameter allowAllNonAsciiCharsInPreCheck to true, all non‐ASCII chars will be accepted in the pre‐check, irrespective of whether the (default) pre‐check functions return true for these chars.

By passing custom preCheckStart and preCheckContinue functions you can modify the error reporting behaviour and support identifier strings that are only valid after normalization. You can also exclude specific UTF‐16 chars that would otherwise be valid in identifiers, though you’d have to be careful to cover all (pre‐)normalization forms.

In the following examples we will demonstrate the effect of custom pre‐check functions on identifier parsing. For this we first set up two identifier parsers, ident and identP, with differing sets of options. Both parsers accept the same ASCII chars in identifiers. In particular, both do not accept the underscore char '_' in identifiers. However, only identP lets underscores through the pre‐check.

// we don't allow underscores in identifiers ...
let isAsciiIdStart c    = isAsciiLetter c
let isAsciiIdContinue c = isAsciiLetter c || isDigit c

// ... but accept them in in the pre-check
let preCheckStart c    = isAsciiLetter c || c = '_'
let preCheckContinue c = isAsciiLetter c || isDigit c || c = '_'

type NF = System.Text.NormalizationForm

let opts = IdentifierOptions(isAsciiIdStart    = isAsciiIdStart,
                             isAsciiIdContinue = isAsciiIdContinue,
                             normalization = NF.FormKC,
                             // The following option isn't really useful without
                             // modified pre-check options. We only set the
                             // option here to prove this point in an example below.
                             normalizeBeforeValidation = true)

let optsWithPreCheck = IdentifierOptions(isAsciiIdStart = isAsciiIdStart,
                                         isAsciiIdContinue = isAsciiIdContinue,
                                         preCheckStart = preCheckStart,
                                         preCheckContinue = preCheckContinue,
                                         allowAllNonAsciiCharsInPreCheck = true,
                                         normalization = NF.FormKC,
                                         normalizeBeforeValidation = true)

let ident  : Parser<string, unit> = identifier opts
let identP : Parser<string, unit> = identifier optsWithPreCheck

Both ident and identP parse simple identifiers without a problem:

> run (ident .>> eof) "täst1";;
val it : ParserResult<string,unit> = Success: "täst1"
> run (identP .>> eof) "täst2";;
val it : ParserResult<string,unit> = Success: "täst2"

The identifier parser with the default pre‐check functions will treat underscores just like whitespace or any other non‐identifier character:

> run (ident .>> eof) "test_id";;
val it : ParserResult<string,unit> = Failure:
Error in Ln: 1 Col: 5
test_id
    ^
Expecting: end of input

Since ident only consumed the "test" part of the input string, the eof parser complained that it was expecting to be applied at the end of the input.

When we use identP instead, we get a different error message:

> run (identP .>> eof) "test_id";;
val it : ParserResult<string,unit> = Failure:
Error in Ln: 1 Col: 5
test_id
    ^
The identifier contains an invalid character at the indicated position.

This time the underscore passed the pre‐check, but not the identifier validation.

As mentioned above, a custom pre‐check is also neccessary to make the normalizeBeforeValidation option work properly. With the default pre‐check options the identifier parser doesn’t accept "MC²" as an identifier, even with the normalization set to NFKC:

> run (ident .>> eof) "MC²";;
val it : ParserResult<string,unit> = Failure:
Error in Ln: 1 Col: 3
MC²
  ^
Expecting: end of input

identP on the other hand doesn’t have this issue, because it accepts all non‐ASCII chars in the pre‐check:

> run (identP .>> eof) "MC²";;
val it : ParserResult<string,unit> = Success: "MC2"
val manyChars: Parser<char,'u> -> Parser<string,'u>

manyChars cp parses a sequence of zero or more chars with the char parser cp. It returns the parsed chars as a string.

manyChars cp is an optimized implementation of many cp that returns the chars as a string instead of a char list.

Many string parsers can be conveniently implemented with both manyChars and manySatisfy. In these cases you should generally prefer the faster manySatisfy. For example, the parser manySatisfyL isHex "hex integer" is more efficient than manyChars hex.

If you are using manyChars for a parser similar to manyChars (notFollowedBy endp >>. p), you should check whether this use of manyChars can be replaced with the more specialized manyCharsTill parser.

val manyChars2: Parser<char,'u> -> Parser<char,'u> -> Parser<string,'u>

manyChars2 cp1 cp behaves like manyChars2 cp, except that it parses the first char with cp1 instead of cp.

For example, manyChars2 letter (letter <|> digit) will parse a letter followed by letters or digits and return the chars as a string. If the first char is not a letter, the parser succeeds with an empty string. Note, however, that this parser could be more efficiently implemented using manySatisfy2L.

val many1Chars: Parser<char,'u> -> Parser<string,'u>

many1Chars cp parses a sequence of one or more chars with the char parser cp. It returns the parsed chars as a string.

many1Chars cp is an optimized implementation of many1 cp that returns the chars as a string instead of a char list.

Many string parsers can be conveniently implemented with both many1Chars and many1Satisfy. In these cases you should generally prefer the faster many1Satisfy. For example, the parser many1SatisfyL isHex "hex integer" is more efficient than many1Chars hex.

val many1Chars2: Parser<char,'u> -> Parser<char,'u> -> Parser<string,'u>

many1Chars2 cp1 cp behaves like many1Chars2 cp, except that it parses the first char with cp1 instead of cp.

For example, many1Chars2 letter (letter <|> digit) will parse a letter followed by letters or digits and return the chars as a string. Note, however, that this parser could be more efficiently implemented using many1Satisfy2L.

val manyCharsTill:
       Parser<char,'u> -> Parser<'b,'u> -> Parser<string,'u>

manyCharsTill cp endp parses chars with the char parser cp until the parser endp succeeds. It stops after endp and returns the parsed chars as a string.

manyCharsTill cp endp is an optimized implementation of manyTill cp endp that returns the chars as a string instead of a char list.

val manyCharsTill2:
       Parser<char,'u> -> Parser<char,'u> -> Parser<'b,'u> -> Parser<string,'u>

manyCharsTill2 cp1 cp endp behaves like manyCharsTill cp endp, except that it parses the first char with cp1 instead of cp.

val manyCharsTillApply:
       Parser<char,'u>  -> Parser<'b,'u> -> (string -> 'b -> 'c)
    -> Parser<'c,'u>

manyCharsTillApply cp endp f behaves like manyCharsTill cp endp, except that it returns the result of the function application f str b, where str is the parsed string and b is result returned by endp.

val manyCharsTillApply2:
       Parser<char,'u> -> Parser<char,'u> -> Parser<'b,'u> -> (string -> 'b -> 'c)
    -> Parser<'c,'u>

manyCharsTillApply2 cp1 cp endp f behaves like manyCharsTillApply cp endp f, except that it parses the first char with cp1 instead of cp.

val many1CharsTill:
       Parser<char,'u> -> Parser<'b,'u> -> Parser<string,'u>

many1CharsTill cp endp parses one char with the char parser cp. Then it parses more chars with cp until the parser endp succeeds. It stops after endp and returns the parsed chars as a string.

many1CharsTill cp endp is an optimized implementation of many1Till cp endp that returns the chars as a string instead of a char list.

val many1CharsTill2:
       Parser<char,'u> -> Parser<char,'u> -> Parser<'b,'u> -> Parser<string,'u>

many1CharsTill2 cp1 cp endp behaves like many1CharsTill cp endp, except that it parses the first char with cp1 instead of cp.

val many1CharsTillApply:
       Parser<char,'u>   -> Parser<'b,'u> -> (string -> 'b -> 'c)
    -> Parser<'c,'u>

many1CharsTillApply cp endp f behaves like many1CharsTill cp endp, except that it returns the result of the function application f str b, where str is the parsed string and b is result returned by endp.

val many1CharsTillApply2:
       Parser<char,'u> -> Parser<char,'u> -> Parser<'b,'u> -> (string -> 'b -> 'c)
    -> Parser<'c,'u>

many1CharsTillApply2 cp1 cp endp f behaves like many1CharsTillApply cp endp f, except that it parses the first char with cp1 instead of cp.

val manyStrings: Parser<string,'u> -> Parser<string,'u>

manyStrings sp parses a sequence of zero or more strings with the string parser sp. It returns the strings in concatenated form.

manyStrings sp is an optimized implementation of many sp |>> List.fold (fun acc s -> acc + s) "".

val manyStrings2: Parser<string,'u> -> Parser<string,'u> -> Parser<string,'u>

manyStrings2 sp1 sp behaves like manyStrings sp, except that it parses the first string with sp1 instead of sp.

val many1Strings: Parser<string,'u> -> Parser<string,'u>

many1Strings sp parses a sequence of one or more strings with the string parser sp. It returns the strings in concatenated form. Note that many1Strings sp does not require the first string to be non‐empty.

many1Strings sp is an optimized implementation of many1 sp |>> List.reduce (+).

val many1Strings2: Parser<string,'u> -> Parser<string,'u> -> Parser<string,'u>

many1Strings2 sp1 sp behaves like many1Strings sp, except that it parses the first string with sp1 instead of sp.

val stringsSepBy: Parser<string,'u> -> Parser<string,'u> -> Parser<string,'u>

stringsSepBy sp sep parses zero or more occurrences of the string parser sp separated by sep (in EBNF: (sp (sep sp)*)?). It returns the strings parsed by sp and sep in concatenated form.

stringsSepBy behaves like sepBy, except that instead of returning a list of the results of only the first argument parser it returns a concatenated string of all strings returned by both argument parsers (in the sequence they occurred).

With stringsSepBy you can for example implement an efficient parser for the following string literal format:

  stringLiteral: '"' (normalChar|escapedChar)* '"'
  normalChar:    any char except '\' and '"'
  escapedChar:   '\\' ('\\'|'"'|'n'|'r'|'t')

The parser implementation exploits the fact that two (possibly empty) normal char snippets must be separated by an escaped char:

let stringLiteral =
    let str s = pstring s
    let normalCharSnippet = manySatisfy (fun c -> c <> '\\' && c <> '"')
    let escapedChar = str "\\" >>. (anyOf "\\\"nrt" |>> function
                                                        | 'n' -> "\n"
                                                        | 'r' -> "\r"
                                                        | 't' -> "\t"
                                                        | c   -> string c)
    between (str "\"") (str "\"")
            (stringsSepBy normalCharSnippet escapedChar)
val stringsSepBy1: Parser<string,'u> -> Parser<string,'u> -> Parser<string,'u>

stringsSepBy1 sp sep parses one or more occurrences of the string parser sp separated by sep (in EBNF: (sp (sep sp)*)). It returns the strings parsed by sp and sep in concatenated form.

stringsSepBy1 behaves like stringsSepBy, except that it fails without consuming input if sp does not succeed at least once.

val skipped: Parser<unit,'u> -> Parser<string,'u>

skipped p applies the parser p and returns the chars skipped over by p as a string. All newlines ("\r\n", "\r" or "\n") are normalized to "\n".

val withSkippedString: (string -> 'a -> 'b) -> Parser<'a,'u> -> Parser<'b,'u>

p |> withSkippedString f applies the parser p and returns the result f str x, where str is the string skipped over by p and x is the result returned by p.

type NumberLiteralOptions

An enum type that encodes the various options of the numberLiteral parser:

type NumberLiteralOptions =
| None                       = 0
| AllowSuffix                = 0b000000000001
| AllowMinusSign             = 0b000000000010
| AllowPlusSign              = 0b000000000100
| AllowFraction              = 0b000000001000
| AllowFractionWOIntegerPart = 0b000000010000
| AllowExponent              = 0b000000100000
| AllowHexadecimal           = 0b000001000000
| AllowBinary                = 0b000010000000
| AllowOctal                 = 0b000100000000
| AllowInfinity              = 0b001000000000
| AllowNaN                   = 0b010000000000

| IncludeSuffixCharsInString = 0b100000000000

| DefaultInteger             = 0b000111000110
| DefaultUnsignedInteger     = 0b000111000000
| DefaultFloat               = 0b011001101110

If all flags are set any literal matching the following regular expression is accepted:

[+-]?((([0-9]+(\.[0-9]*)?|\.[0-9]+)([eE][+-]?[0-9]+)?
      |0[xX]([0-9a-fA-F]+(\.[0-9a-fA-F]*)?|\.[0-9a-fA-F]+)([pP][+-]?[0-9]+)?
      |0[oO][0-7]+
      |0[bB][01]+
      )[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?
     |[iI][nN][fF]([iI][nN][iI][tT][yY])?
     |[nN][aA][nN]
     )

Hexadecimal literals must begin with 0x or 0X, octal literals with 0o or 0O and binary literals with 0b or 0B. If the respective flags are set, hexadecimal floating‐point literals as supported by IEEE 754r, C99 and Java are accepted.

Some remarks on the individual flags:

AllowSuffix

Allows up to 4 suffix chars. Such chars are used in many programming languages to determine the type of a number. For example, in F# the literal "123UL" represents the unsigned 64‐bit integer 123.

AllowFraction

Allows a fraction in decimal and hexadecimal literals.

AllowFractionWOIntegerPart

Allows number literals with a fraction but no integer part, e.g. ".123" or "0x.abc". This flag can only be used together with AllowFraction.

AllowExponent

Allows exponents in decimal literals (beginning with an "e" or "E") and in hexadecimal literals (beginning with a "p" or "P").

AllowInfinity

Allows "Inf" or "Infinity" literals (case‐insensitive).

AllowNaN

Allows "NaN" literals (case‐insensitive).

IncludeSuffixCharsInString

Instructs the numberLiteral parser to include any parsed suffix chars in the NumberLiteral.String member.

type NumberLiteral

The return type of the numberLiteral parser. An instance contains the parsed number literal and various bits of information about it. Note that the String member contains the string literal without the suffix chars, except if the NumberLiteralOptions passed to the numberLiteral parser have the IncludeSuffixCharsInString flag set. Any parsed suffix chars are always available through the SuffixChar14 members.

type NumberLiteral =
    member String: string

    member SuffixLength: int
    member SuffixChar1: char // EOS if no suffix char was parsed
    member SuffixChar2: char // EOS if less than 2 suffix chars were parsed
    member SuffixChar3: char ...
    member SuffixChar4: char

    member Info: NumberLiteralResultFlags
    member HasMinusSign: bool
    member HasPlusSign: bool
    member HasIntegerPart: bool
    member HasFraction: bool
    member HasExponent: bool
    member IsInteger: bool // not (HasFraction || HasExponent)
    member IsDecimal: bool
    member IsHexadecimal: bool
    member IsBinary: bool
    member IsOctal: bool
    member IsNaN: bool
    member IsInfinity: bool

and NumberLiteralResultFlags =
    | None             = 0
    | SuffixLengthMask = 0b0000000000001111
    | HasMinusSign     = 0b0000000000010000
    | HasPlusSign      = 0b0000000000100000
    | HasIntegerPart   = 0b0000000001000000
    | HasFraction      = 0b0000000010000000
    | HasExponent      = 0b0000000100000000
    | IsDecimal        = 0b0000001000000000
    | IsHexadecimal    = 0b0000010000000000
    | IsBinary         = 0b0000100000000000
    | IsOctal          = 0b0001000000000000
    | BaseMask         = 0b0001111000000000
    | IsInfinity       = 0b0010000000000000
    | IsNaN            = 0b0100000000000000
val numberLiteral: NumberLiteralOptions -> string -> Parser<NumberLiteral,'u>

numberLiteral options label parses a number literal and returns the result in form of a NumberLiteral value. The given NumberLiteralOptions argument determines the kind of number literals accepted. The string label is used in the Expected error message that is generated when the parser fails without consuming input.

The parser fails without consuming input if not at least one digit (including the 0 in the format specifiers "0x" etc.) can be parsed. It fails after consuming input, if no decimal digit comes after an exponent marker or no valid digit comes after a format specifier.

The parser in the following example employs numberLiteral to parse decimal numbers as either integer or float values:

open FParsec
open FParsec.Primitives
open FParsec.CharParsers

type Number = Int   of int64
            | Float of float

                    // -?[0-9]+(\.[0-9]*)?([eE][+-]?[0-9]+)?
let numberFormat =     NumberLiteralOptions.AllowMinusSign
                   ||| NumberLiteralOptions.AllowFraction
                   ||| NumberLiteralOptions.AllowExponent

let pnumber : Parser<Number, unit> =
    numberLiteral numberFormat "number"
    |>> fun nl ->
            if nl.IsInteger then Int (int64 nl.String)
            else Float (float nl.String)

Some test runs:

> run pnumber "123";;
val it : ParserResult<Number,unit> = Success: Int 123L

> run pnumber "-123.456E-7";;
val it : ParserResult<Number,unit> = Success: Float -1.23456e-05

> run pnumber "-";;
val it : ParserResult<Number,unit> = Failure:
Error in Ln: 1 Col: 1
-
^
Expecting: number

> run pnumber "123.456E-a";;
val it : ParserResult<Number,unit> = Failure:
Error in Ln: 1 Col: 10
123.456E-a
         ^
Expecting: decimal digit

> run pnumber "1E9999";;
System.OverflowException:
   Value was either too large or too small for a Double.
   at (... stack trace ...)
stopped due to error

The next example improves on the error reporting in case of overflows. It also demonstrates how to support hexadecimal numbers and a suffix to indicate the integer format:

open FParsec
open FParsec.Error
open FParsec.Primitives
open FParsec.CharParsers

type Number = Int32 of int32
            | Int64 of int64
            | Float of float

// We want to support decimal or hexadecimal numbers with an optional minus
// sign. Integers may have an 'L' suffix to indicate that the number should
// be parsed as a 64-bit integer.
let numberFormat =     NumberLiteralOptions.AllowMinusSign
                   ||| NumberLiteralOptions.AllowFraction
                   ||| NumberLiteralOptions.AllowExponent
                   ||| NumberLiteralOptions.AllowHexadecimal
                   ||| NumberLiteralOptions.AllowSuffix

let pnumber : Parser<Number, unit> =
    let parser = numberLiteral numberFormat "number"
    fun stream ->
        let reply = parser stream
        if reply.Status = Ok then
            let nl = reply.Result // the parsed NumberLiteral
            if nl.SuffixLength = 0
               || (   nl.IsInteger
                   && nl.SuffixLength = 1 && nl.SuffixChar1 = 'L')
            then
                try
                    let result = if nl.IsInteger then
                                     if nl.SuffixLength = 0 then
                                         Int32 (int32 nl.String)
                                     else
                                         Int64 (int64 nl.String)
                                 else
                                     if nl.IsHexadecimal then
                                         Float (floatOfHexString nl.String)
                                     else
                                         Float (float nl.String)
                    Reply(result)
                with
                | :? System.OverflowException as e ->
                    stream.Skip(-nl.String.Length)
                    Reply(FatalError, messageError e.Message)
            else
                stream.Skip(-nl.SuffixLength)
                Reply(Error, messageError "invalid number suffix")
        else // reconstruct error reply
            Reply(reply.Status, reply.Error)

Some test runs:

> run pnumber "123";;
val it : ParserResult<Number,unit> = Success: Int32 123

> run pnumber "-0xffL";;
val it : ParserResult<Number,unit> = Success: Int64 -255L

> run pnumber "123.123";;
val it : ParserResult<Number,unit> = Success: Float 123.123

> run pnumber "0xabc.defP-4";;
val it : ParserResult<Number,unit> = Success: Float 171.8044281

> run pnumber "-0x";;
val it : ParserResult<Number,unit> = Failure:
Error in Ln: 1 Col: 4
-0x
   ^
Note: The error occurred at the end of the input stream.
Expecting: hexadecimal digit

> run pnumber "0x123UL";;
val it : ParserResult<Number,unit> = Failure:
Error in Ln: 1 Col: 6
0x123UL
     ^
invalid number suffix

> run pnumber "1E9999";;
val it : ParserResult<Number,unit> = Failure:
Error in Ln: 1 Col: 1
1E9999
^
Value was either too large or too small for a Double.
val numberLiteralE:
       NumberLiteralOptions -> errorInCaseNoLiteralFound: ErrorMessageList
    -> CharStream<'u> -> Reply<NumberLiteral>

numberLiteralE is an uncurried version of numberLiteral that can be used to implement number parsers without having to construct a numberLiteral closure.

val pfloat: Parser<float,'u>

Parses a floating point number in the decimal format (in regular expression notation)

[0-9]+(\.[0-9]*)?([eE][+-]?[0-9]+)?

or the hexadecimal format

0[xX][0-9a-fA-F]+(\.[0-9a-fA-F]*)?([pP][+-]?[0-9]+)?

(as supported by IEEE 754r, C99 and Java, where e.g. 0x1f.cP-5 represents 31.75 * 2‒5).

The special values NaN and Inf(inity)? (case‐insensitive) are also recognized. All recognized numbers may be prefixed with a plus or minus sign.

Fractions without a leading digit, as for example “.5”, are not supported.

The parser fails

  • without consuming input, if not at least one digit (including the 0 in 0x) can be parsed,
  • after consuming input, if no digit comes after an exponent marker or no hex digit comes after 0x.
Note

Values that can’t be represented as a finite float after rounding are parsed as plus or minus infinity. This behaviour changed between FParsec versions 1.0.3 and 1.0.10, following the respective behaviour change of System.Double.Parse on .NET Core 3.

Note

The pfloat parser is based on the configurable numberLiteral parser. If you’d like to support a different floating‐point format, there’s a good chance you can implement a parser for that format by some simple changes to a copy of the pfloat source.

val pint64: Parser<int64,'u>

Parses a 64‐bit signed integer number in the decimal, hexadecimal (0[xX]), octal (0[oO]) and binary (0[bB]) formats (in regular expression notation):

[+-]?([0-9]+
     |0[xX][0-9a-fA-F]+
     |0[oO][0-7]+
     |0[bB][01]+
     )

The parser fails

  • without consuming input, if not at least one digit (including the 0 in the format specifiers 0x etc.) can be parsed,
  • after consuming input, if no digit comes after an exponent marker or no digit comes after a format specifier,
  • after consuming input, if the value represented by the input string is greater than System.Int64.MaxValue or less than System.Int64.MinValue.
val pint32: Parser<int32,'u>

pint32 parses a 32‐bit signed integer and behaves like pint64, except for the different return type and smaller integer range.

val pint16: Parser<int16,'u>

pint16 parses a 16‐bit signed integer and behaves like pint64, except for the different return type and smaller integer range.

val pint8: Parser<int8,'u>

pint8 parses an 8‐bit signed integer and behaves like pint64, except for the different return type and smaller integer range.

val puint64: Parser<uint64,'u>

Parses numbers in the decimal, hexadecimal (0[xX]), octal (0[oO]) and binary (0[bB]) formats (in regular expression notation):

[0-9]+
|0[xX][0-9a-fA-F]+
|0[oO][0-7]+
|0[bB][01]+

Note that the parser does not accept a leading plus sign.

The parser fails

  • without consuming input, if not at least one digit (including the 0 in the format specifiers 0x etc.) can be parsed,
  • after consuming input, if no digit comes after an exponent marker or no digit comes after a format specifier,
  • after consuming input, if the value represented by the input string is greater than System.UInt64.MaxValue.
val puint32: Parser<uint32,'u>

puint32 parses a 32‐bit unsigned integer and behaves like puint64, except for the different return type and smaller integer range.

val puint16: Parser<uint16,'u>

puint16 parses a 16‐bit unsigned integer and behaves like puint64, except for the different return type and smaller integer range.

val puint8: Parser<uint8,'u>

puint8 parses an 8‐bit unsigned integer and behaves like puint64, except for the different return type and smaller integer range.

val notFollowedByEof: Parser<unit,'u>

notFollowedByEof is an optimized implementation of notFollowedByL eof "end of input".

val followedByNewline: Parser<unit,'u>

followedByNewline is an optimized implementation of followedByL newline "newline".

val notFollowedByNewline: Parser<unit,'u>

notFollowedByNewline is an optimized implementation of notFollowedByL newline "newline".

val followedByString: string -> Parser<unit,'u>

followedByString str is an optimized implementation of followedByL (pstring str) ("'" + str + "'").

val followedByStringCI: string -> Parser<unit,'u>

followedByStringCI str is an optimized implementation of followedByL (pstringCI str) ("'" + str + "'").

val notFollowedByString: string -> Parser<unit,'u>

notFollowedByString str is an optimized implementation of notFollowedByL (pstring str) ("'" + str + "'").

val notFollowedByStringCI: string -> Parser<unit,'u>

notFollowedByStringCI str is an optimized implementation of notFollowedByL (pstringCI str) ("'" + str + "'").

val nextCharSatisfies: (char -> bool) -> Parser<unit,'u>

nextCharSatisfies f is an optimized implementation of followedBy (satisfy f).

Note

If this parser fails, it returns no descriptive error message; hence it should only be used together with parsers that take care of a potential error.

val nextCharSatisfiesNot: (char -> bool) -> Parser<unit,'u>

nextCharSatisfiesNot f is an optimized implementation of notFollowedBy (satisfy f).

Note

If this parser fails, it returns no descriptive error message; hence it should only be used together with parsers that take care of a potential error.

val next2CharsSatisfy: (char -> char -> bool) -> Parser<unit,'u>

next2CharsSatisfy f succeeds if the predicate function f returns true when applied to the next 2 chars in the input stream, otherwise it fails. If there aren’t 2 chars remaining in the input stream, this parser fails (as opposed to next2CharsSatisfyNot). This parser never changes the parser state. Any newline ("\n", "\r\n" or "\r") in the input is interpreted as a single char '\n'.

Note

If this parser fails, it returns no descriptive error message; hence it should only be used together with parsers that take care of a potential error.

val next2CharsSatisfyNot: (char -> char -> bool) -> Parser<unit,'u>

next2CharsSatisfy f succeeds if the predicate function f returns false when applied to the next 2 chars in the input stream, otherwise it fails. If there aren’t 2 chars remaining in the input stream, this parser succeeds (as opposed to next2CharsSatisfy). This parser never changes the parser state. Any newline ("\n", "\r\n" or "\r") in the input is interpreted as a single char '\n'.

Note

If this parser fails, it returns no descriptive error message; hence it should only be used together with parsers that take care of a potential error.

val previousCharSatisfies: (char -> bool) -> Parser<unit,'u>

previousCharSatisfies f succeeds if the predicate function f returns true when applied to the previous char in the stream, otherwise it fails. If there is no previous char (because the input stream is at the beginning), this parser fails (as opposed to previousCharSatisfiesNot). This parser never changes the parser state. Any newline ("\n", "\r\n" or "\r") in the input is interpreted as a single char '\n'.

Note

If this parser fails, it returns no descriptive error message; hence it should only be used together with parsers that take care of a potential error.

val previousCharSatisfiesNot: (char -> bool) -> Parser<unit,'u>

previousCharSatisfiesNot f succeeds if the predicate function f returns false when applied to the previous char in the stream, otherwise it fails. If there is no previous char (because the stream is at the beginning),If this parser fails, it returns no descriptive error message; hence it should only be used this parser succeeds (as opposed to previousCharSatisfies). This parser never changes the parser state. Any newline ("\n", "\r\n" or "\r") in the input is interpreted as a single char '\n'.

Note

If this parser fails, it returns no descriptive error message; hence it should only be used together with parsers that take care of a potential error.

val foldCase: string -> string

Forwards all calls to FParsec.Text.FoldCase.

val normalizeNewlines: string -> string

Forwards all calls to FParsec.Text.NormalizeNewlines.

val floatToHexString: float -> string

Returns a hexadecimal string representation of the float argument. The hexadecimal format is the one supported by IEEE 754r, C99 and Java. This function produces the same output as the Double.toHexString method in Java.

val floatOfHexString: string -> float

Returns the float value represented by the given string in hexadecimal format. The supported input format is (in regular expression notation):

[+-]?((0[xX])?([0-9a-fA-F]+(\.[0-9a-fA-F]*)?|\.[0-9a-fA-F]+)([pP][+-]?[0-9]+)?
     |[iI][nN][fF]([iI][nN][iI][tT][yY])?
     |[nN][aA][nN]
     )

Note that no leading or trailing whitespace is allowed, neither are trailing format specifiers such as f or d.

For example, a valid input string is 0x1f.cP-5, which represents the value 31.75 * 2‒5.

The numerical value represented by the input string is conceptually converted to an “infinitely precise” binary value that is then rounded to type float by the usual round‐to‐nearest (and ties‐to‐even) rule of IEEE 754 floating‐point arithmetic. The special values NaN and Inf(inity)? (case insensitive) are also recognized. Signs of zero and Infinity values are preserved.

A System.FormatException is raised if the string representation is invalid. A System.OverflowException is raised, if the value represented by the input string (after rounding) is greater than System.Double.MaxValue or less than System.Double.MinValue.

val float32ToHexString: float32 -> string

Returns a hexadecimal string representation of the float32 argument. The hexadecimal format is the one supported by IEEE 754r, C99 and Java. This function produces the same output as the Float.toHexString method in Java.

val float32OfHexString: string -> float32

Returns the float32 value represented by the given string in hexadecimal format. The supported input format is (in regular expression notation):

[+-]?((0[xX])?([0-9a-fA-F]+(\.[0-9a-fA-F]*)?|\.[0-9a-fA-F]+)([pP][+-]?[0-9]+)?
     |[iI][nN][fF]([iI][nN][iI][tT][yY])?
     |[nN][aA][nN]
     )

Note that no leading or trailing whitespace is allowed, neither are trailing format specifiers such as f or d.

For example, a valid input string is 0x1f.cP-5, which represents the value 31.75 * 2‒5.

The numerical value represented by the input string is conceptually converted to an “infinitely precise” binary value that is then rounded to type float32 by the usual round‐to‐nearest (and ties‐to‐even) rule of IEEE 754 floating‐point arithmetic. The special values NaN and Inf(inity)? (case insensitive) are also recognized. Signs of zero and Infinity values are preserved.

Note that in general float32OfHexString(str) is not equivalent to float32 (floatOfHexString(str)), because the latter version rounds twice.

A System.FormatException is raised if the string representation is invalid. A System.OverflowException is raised, if the value represented by the input string (after rounding) is greater than System.Float.MaxValue or less than System.Float.MinValue.

================================================ FILE: Doc/html/reference/charstream.html ================================================ FParsec.CharStream

6.11 FParsec.CharStream

6.11.1 CharStream

Provides read‐access to a sequence of UTF‐16 chars.

6.11.1.1 Interface

// FParsecCS.dll

namespace FParsec

type CharStream =
  interface System.IDisposable

  new:    chars: string * index: int * length: int
       -> CharStream
  new:    chars: string * index: int * length: int * streamBeginIndex: int64
       -> CharStream

  new:    chars: char[] * index: int * length: int
       -> CharStream
  new:    chars: char[] * index: int * length: int * streamBeginIndex: int64
       -> CharStream

  new:    chars: NativePtr<char> * length: int
       -> CharStream
  new:    chars: NativePtr<char> * length: int * streamBeginIndex: int64
       -> CharStream

  new:    path: string * encoding: System.Text.Encoding
       -> CharStream
  new:    path: string
        * encoding: System.Text.Encoding * detectEncodingFromByteOrderMarks: bool
       -> CharStream
  new:    path: string
        * encoding: System.Text.Encoding * detectEncodingFromByteOrderMarks: bool
        * blockSize: int * blockOverlap: int * minRegexSpace: int
        * byteBufferLength: int
       -> CharStream

  new:    stream: System.IO.Stream * encoding: System.Text.Encoding
       -> CharStream
  new:    stream: System.IO.Stream * leaveOpen: bool
        * encoding: System.Text.Encoding
       -> CharStream
  new:    stream: System.IO.Stream * leaveOpen: bool
        * encoding: System.Text.Encoding * detectEncodingFromByteOrderMarks: bool
       -> CharStream
  new:    stream: System.IO.Stream * leaveOpen: bool
        * encoding: System.Text.Encoding * detectEncodingFromByteOrderMarks: bool
        * blockSize: int * blockOverlap: int * minRegexSpace: int
        * byteBufferLength: int
       -> CharStream

  member Dispose: unit -> unit

  member BlockOverlap: int

  member IndexOfFirstChar: int64
  member IndexOfLastCharPlus1: int64

  member IsBeginOfStream: bool
  member IsEndOfStream: bool

  member Index: int64
  member IndexToken: CharStreamIndexToken
  member Line: int64
  member LineBegin: int64
  member Column: int64
  member Name: string with get, set
  member Position: Position

  val mutable StateTag: uint64

  member Seek: index: int64 -> unit
  member Seek: indexToken: CharStreamIndexToken -> unit

  static val EndOfStreamChar: char

  member Peek:  unit -> char
  member Peek2: unit -> TwoChars
  member Peek:  utf16Offset: int    -> char
  member Peek:  utf16Offset: uint32 -> char

  member PeekString: length: int -> string
  member PeekString: buffer: char[] * bufferIndex: int * length: int -> int
  member PeekString: buffer: NativePtr<char> * length: int -> int

  member Match: char -> bool
  member Match: chars: string -> bool
  member Match: chars: char[] * charsIndex: int * length: int -> bool
  member Match: chars: NativePtr<char> * length: int -> bool

  member MatchCaseFolded: caseFoldedChar: char -> bool
  member MatchCaseFolded: caseFoldedChars: string -> bool
  member MatchCaseFolded: caseFoldedChars: NativePtr<char> * length:int -> bool

  member Match: System.Text.RegularExpressions.Regex
                -> System.Text.RegularExpressions.Match
  member MinRegexSpace: int with get, set

  member RegisterNewline: unit -> bool
  member RegisterNewlines: lineOffset: int   -> newColumnMinus1: int   -> bool
  member RegisterNewlines: lineOffset: int64 -> newColumnMinus1: int64 -> bool

  // The following methods require manual registration of skipped newlines

  member Skip: unit -> unit
  member Skip: utf16Offset: int    -> unit
  member Skip: utf16Offset: uint32 -> unit
  member Skip: utf16Offset: int64  -> unit

  member SkipAndPeek: unit   -> char
  member SkipAndPeek: utf16Offset: int    -> char
  member SkipAndPeek: utf16Offset: uint32 -> char

  member Skip: char -> bool
  member Skip: TwoChars -> bool
  member Skip: chars: string -> bool
  member Skip: chars: char[] * charsIndex: int * length: int -> bool
  member Skip: chars: NativePtr<char> * length: int -> bool

  member SkipCaseFolded: caseFoldedChar: char -> bool
  member SkipCaseFolded: caseFoldedChars: string -> bool
  member SkipCaseFolded: caseFoldedChars: NativePtr<char> * length:int -> bool

  member Read: unit -> char
  member Read: length: int -> string
  member Read: buffer: char[] * bufferIndex: int * length: int -> int
  member Read: buffer: NativePtr<char> * length: int -> int

  member ReadFrom: indexOfFirstChar: CharStreamIndexToken -> string

  // The following methods automatically register skipped newlines

  member SkipWhitespace: unit -> bool
  member SkipUnicodeWhitespace: unit -> bool

  member SkipNewline: unit -> bool
  member SkipUnicodeNewline: unit -> bool

  member SkipNewlineThenWhitespace:
      powerOf2TabStopDistance: int * allowFormFeed: bool -> int

  member SkipRestOfLine: skipNewline: bool -> unit
  member ReadRestOfLine: skipNewline: bool -> string

  member ReadCharOrNewline: unit -> char

  member SkipCharsOrNewlines: maxCount: int -> int
  member ReadCharsOrNewlines: maxCount: int * normalizeNewlines: bool -> string

  member SkipCharsOrNewlinesWhile:
      predicate: (char -> bool) -> int
  member SkipCharsOrNewlinesWhile:
      predicateForFirstChar: (char -> bool) * predicate: (char -> bool) -> int
  member SkipCharsOrNewlinesWhile:
      predicate: (char -> bool) * minCount: int * maxCount: int -> int
  member SkipCharsOrNewlinesWhile:
      predicateForFirstChar: (char -> bool) * predicate: (char -> bool)
    * minCount: int * maxCount: int -> int

  member ReadCharsOrNewlinesWhile:
      predicate: (char -> bool)
    * normalizeNewlines: bool -> string
  member ReadCharsOrNewlinesWhile:
      predicateForFirstChar: (char -> bool) * predicate: (char -> bool)
    * normalizeNewlines: bool -> string
  member ReadCharsOrNewlinesWhile:
      predicate: (char -> bool)
    * minCount: int * maxCount: int * normalizeNewlines: bool -> string
  member ReadCharsOrNewlinesWhile:
      predicateForFirstChar: (char -> bool) * predicate: (char -> bool)
    * minCount: int * maxCount: int * normalizeNewlines: bool -> string

  member SkipCharsOrNewlinesUntilString:
      str: string * maxCount: int
    * foundString: out<bool> -> int
  member SkipCharsOrNewlinesUntilString:
      str: string * maxCount: int * normalizeNewlines: bool
    * skippedCharsIfStringFoundOtherwiseNull: out<string> -> int

  member SkipCharsOrNewlinesUntilCaseFoldedString:
      caseFoldedString: string * maxCount: int
    * foundString: out<bool> -> int
  member SkipCharsOrNewlinesUntilCaseFoldedString:
      caseFoldedString: string * maxCount: int * normalizeNewlines: bool
    * skippedCharsIfStringFoundOtherwiseNull: out<string> -> int

6.11.1.2 Remarks

The CharStream class provides a unified interface for efficiently reading UTF‐16 chars from a binary stream or an in‐memory char buffer (e.g. a string). It is optimized for the use in backtracking parser applications and supports arbitrary char‐based seeking, even for streams larger than the addressable memory (on 32‐bit platforms).

The CharStream class is the base class of CharStream<'TUserState>, which adds a user‐definable state component and some convenience methods for working with the state of a CharStream instance.

A CharStream constructed from a System.IO.Stream or a file path reads the stream block‐wise and only holds the most recently accessed block in memory. The blocks overlap in order to provide efficient access on the boundary between blocks.

If the char content is already available as a string or a char array, a CharStream can be directly constructed from the char buffer (without needing to copy the buffer). The overhead of accessing an in‐memory char buffer through a CharStream is minimal.

Position information

The position of the next char in the stream is described by the following 4 properties:

  • Index, the index of the UTF‐16 char in the stream,
  • Line, the line number for the next char,
  • LineBegin, the index of the first char of the line that also contains the next char,
  • Name, a description or identifier for the stream.

The LineBegin can be combined with the Index to calculate a Column number.

Among these properties the char index is the most important one, as the CharStream uses it to uniquely identify a UTF‐16 char in the stream.

The other 3 properties further describe the text location of the char identified by the index, but they are not necessary for the core functionality of the CharStream class. The CharStream class keeps track of this additional position information to provide a more convenient interface to higher‐level library functions, in particular to assist debugging and error reporting purposes.

Newlines

For performance reasons the most basic stream operations do not automatically recognize newlines (end‐of‐line markers) in the stream content. If you skip any newline with these methods, you have to manually register the newline afterwards with one of the RegisterNewline methods (otherwise the line and column count becomes incorrect).

In order to provide a convenient interface for parser routines, the CharStream class also provides some more advanced methods that automatically register any skipped standard newline ("\n", "\r\n" and "\r"). Additionally, it provides two methods that automatically register any Unicode newline (SkipUnicodeWhitespace and SkipUnicodeNewline).

It should be obvious from the method names which methods automatically register newlines and which don’t.

Case‐insensitive matching

The MatchCaseFolded and SkipCaseFolded members match the content of the stream “case‐insensitively” with a reference string. In this instance “case‐insensitive” means that before the chars are matched with the reference string they are mapped to a canonical form where case differences are erased. For performance reasons MatchCaseFolded only applies the (non‐Turkic) 1‐to‐1 case folding mappings (v. 8.0.0) for Unicode code points in the Basic Multilingual Plane, i.e. code points below 0x10000. These mappings are sufficient for many case‐insensitive parser grammars encountered in practice, but they are not appropriate for matching arbitrary natural language content. Please also note that the CharStream class performs no Unicode normalization.

Non‐sequential access

This note does not apply to the Low‐Trust version of FParsec.
If you construct a CharStream from a System.IO.Stream or a file path and you backtrack over a distance long enough to require the CharStream to reread a previous block, then the underlying byte stream needs to support seeking, otherwise a NotSupportedException is thrown. Furthermore, the Decoder for the input Encoding must be serializable if you backtrack to a block other than the first in the stream. Note that file streams created for regular disk files are always seekable and all the .NET standard decoders are serializable. In order to support non‐seekable streams for applications which don’t require extensive backtracking, no exception will be thrown before an operation actually requires backtracking and the necessary capabilities of the stream or decoder are not available.

Decoder errors

A CharStream constructed from a binary input stream decodes the input data with the help of a Decoder instance obtained via the Encodings’s GetDecoder method. Depending on the configuration of the encoding the decoder might throw an exception if it encounters invalid byte sequences, usually a System.Text.DecoderFallbackException or a System.IO.ArgumentException. [1]

Disposable interface

This note does not apply to the Low‐Trust version of FParsec.
A CharStream holds managed and unmanaged resources that need to be explicitly released. Hence, it is very important that CharStream objects are promptly disposed after use. Where possible CharStream objects should only be used within a “using” block (C#), a “use” expression( F#) or similar constructs in other languages.

Thread safety

CharStream members are not thread‐safe.

Low‐Trust version

If you compile FParsec with the LOW_TRUST conditional compiler symbol, the CharStream class differs from the normal version as follows:

  • No unverifiable code involving pointers is used. This allows FParsec to be executed in an environment with reduced trust, such as medium trust ASP.NET applications or Silverlight applications.
  • A CharStream that is constructed from a System.IO.Stream or a file path reads the complete file into a single string during construction. This severely limits the maximum practical stream size.
  • Although the CharStream class still supports the IDisposable interface, disposing the CharStream instances is no longer necessary, since no resources are hold that need to be explicitly released.

See also section 3.5.

6.11.1.3 I/O exceptions

If you construct a CharStream from a System.IO.Stream or a file path, the constructor and any CharStream operation that requires reading chars from the underlying byte stream may throw one of the following exceptions.

In the Low‐Trust version, the constructor decodes the complete byte stream and hence only the constructor may throw one of these exceptions.

Note

Doing actual work in a constructor and potentially throwing exceptions seems to be a somewhat controversial design. We think it’s the right choice for the CharStream class, because this way you can a have a reasonable expectation that the CharStream actually works after you’ve successfully constructed it.

In general it is not safe to continue to use a CharStream instance after one of these exceptions was thrown, though calling Dispose() is always safe.

NotSupportedException

Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encoding’s Decoder is not serializable. See also the remarks above on non‐sequential access.

IOException

An I/O occurred while reading data from the underlying byte stream.

ArgumentException

The underlying byte stream contains invalid bytes and the Encoding was constructed with the throwOnInvalidBytes option.

DecoderFallbackException

The underlying byte stream contains invalid bytes for which the decoder fallback threw this exception.

The byte index of the invalid bytes in the stream is stored as a boxed System.Int64 in the "Stream.Position" entry of the Data member of the exception instance. The precision of the index depends on the precision of the DecoderFallbackException’s Index member. If the underlying System.IO.Stream is not seekable, the byte index only takes into account the bytes read by the CharStream, but not any bytes read before the CharStream was constructed.

6.11.1.4 Members

new:    chars: string * index: int * length: int
     -> CharStream

Is equivalent to new CharStream(chars, index, length, 0L).

new:    chars: string * index: int * length: int * streamBeginIndex: int64
     -> CharStream

Constructs a CharStream from the chars in the string argument between the indices index (inclusive) and index + length (exclusive). By directly referencing the chars in the string this constructor avoids any copy of the string content.

The first char in the stream is assigned the index streamBeginIndex. A positive streamBeginIndex allows you for example to create a substream of another CharStream, i.e. a CharStream instance that only contains a sub‐segment of another char stream but is accessible through the same char indices.

chars must not be null. An ArgumentOutOfRangeException is thrown if the arguments do not satisfy the following conditions:

  • index ≥ 0, length ≥ 0, index + lengthchars.Length and
  • 0 ≤ streamBeginIndex < 260.
Important

This note does not apply to the Low‐Trust version of FParsec.
The given string is “pinned” until the CharStream is disposed. Pinning the string prevents the GC from moving it around in memory during garbage collection. On .NET (at least in versions up to and including 4.0) the pinning has no effect if the string is large enough to be allocated on the Large Object Heap, i.e. has a length of about 42500 chars or more. However, pinning smaller strings does constrain the normal operations of the GC. Thus, to minimize the negative impact on the GC, you should dispose CharStream instances constructed from small strings as soon as you’re done parsing it. If you keep a large number of CharStream instances constructed from small strings around for an extended period of time, you risk fragmenting the heap.

new:    chars: char[] * index: int * length: int
     -> CharStream

This constructor is not available in the Low‐Trust version of FParsec.

Is equivalent to new CharStream(chars, index, length, 0L).

new:    chars: char[] * index: int * length: int * streamBeginIndex: int64
     -> CharStream

This constructor is not available in the Low‐Trust version of FParsec.

Constructs a CharStream from the chars in the char array argument between the indices index (inclusive) and index + length (exclusive). By directly referencing the chars in the char array this constructor avoids any copy of the char array content.

The first char in the stream is assigned the index streamBeginIndex. A positive streamBeginIndex allows you for example to create a substream of another CharStream, i.e. a CharStream instance that only contains a sub‐segment of another char stream but is accessible through the same char indices.

chars must not be null. An ArgumentOutOfRangeException is thrown if the arguments do not satisfy the following conditions:

  • index ≥ 0, length ≥ 0, index + lengthchars.Length and
  • 0 ≤ streamBeginIndex < 260.
Note

A CharStream constructed from a char array does not support .NET regex matching via the Match method.

Important

The given char array is “pinned” until the CharStream is disposed. Pinning the char array prevents the GC from moving it around in memory during garbage collection. On .NET (at least in versions up to and including 4.0) the pinning has no effect if the char array is large enough to be allocated on the Large Object Heap, i.e. has a length of about 42500 chars or more. However, pinning smaller char arrays does constrain the normal operations of the GC. Thus, to minimize the negative impact on the GC, you should dispose CharStream instances constructed from small char arrays as soon as you’re done parsing it. If you keep a large number of CharStream instances constructed from small char arrays around for an extended period of time, you risk fragmenting the heap.

new:    chars: NativePtr<char> * length: int
     -> CharStream

This constructor is not available in the Low‐Trust version of FParsec.

Is equivalent to new CharStream(chars, length, 0L).

new:    chars: NativePtr<char> * length: int * streamBeginIndex: int64
     -> CharStream

This constructor is not available in the Low‐Trust version of FParsec.

Constructs a CharStream from the length chars at the pointer address. By directly referencing the chars at the pointer address this constructor avoids any copy of the char buffer.

The first char in the stream is assigned the index streamBeginIndex. A positive streamBeginIndex allows you for example to create a substream of another CharStream, i.e. a CharStream instance that only contains a sub‐segment of another char stream but is accessible through the same char indices.

chars must not be null. An ArgumentOutOfRangeException is thrown if the arguments do not satisfy the following conditions:

  • length ≥ 0, chars + length must not overflow and
  • 0 ≤ streamBeginIndex < 260.
Note

A CharStream constructed from a pointer does not support .NET regex matching via the Match method.

new:    path: string * encoding: System.Text.Encoding
     -> CharStream

Is equivalent to new CharStream(path, encoding, true).

new:    path: string
      * encoding: System.Text.Encoding * detectEncodingFromByteOrderMarks: bool
     -> CharStream

Is equivalent to

new CharStream(
    path, encoding, detectEncodingFromByteOrderMarks,
    blockSize = DefaultBlockSize (* = 3*2^16 ≈ 200k *),
    blockOverlap = DefaultBlockSize/3,
    minRegexSpace = ((DefaultBlockSize/3)*2)/3,
    byteBufferLength = DefaultByteBufferLength
)
new:    path: string
      * encoding: System.Text.Encoding * detectEncodingFromByteOrderMarks: bool
      * blockSize: int * blockOverlap: int * minRegexSpace: int
      * byteBufferLength: int
     -> CharStream

Constructs a CharStream from a FileStream as if by calling

new CharStream(
    new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, 4096,
                   FileOptions.SequentialScan),
    leaveOpen = false,
    encoding = encoding,
    detectEncoding = true,
    blockSize = DefaultBlockSize (* = 3*2^16 ≈ 200k *),
    blockOverlap = DefaultBlockSize/3,
    minRegexSpace = ((DefaultBlockSize/3)*2)/3,
    byteBufferLength = DefaultByteBufferLength
)

If an exception occurs after the FileStream is constructed but before the CharStream constructor is finished, the FileStream is disposed.

Note

The FileStream constructor might throw an exception, too.

new:    stream: System.IO.Stream * encoding: System.Text.Encoding
     -> CharStream

Is equivalent to new CharStream(stream, false, encoding, true).

new:    stream: System.IO.Stream * leaveOpen: bool
      * encoding: System.Text.Encoding
     -> CharStream

Is equivalent to new CharStream(stream, leaveOpen, encoding, true).

new:    stream: System.IO.Stream * leaveOpen: bool
      * encoding: System.Text.Encoding * detectEncodingFromByteOrderMarks: bool
     -> CharStream

Is equivalent to

new CharStream(
    stream, leaveOpen, encoding, detectEncodingFromByteOrderMarks,
    blockSize = DefaultBlockSize (* = 3*2^16 ≈ 200k *),
    blockOverlap = DefaultBlockSize/3,
    minRegexSpace = ((DefaultBlockSize/3)*2)/3,
    byteBufferLength = DefaultByteBufferLength
)
new:    stream: System.IO.Stream * leaveOpen: bool
      * encoding: System.Text.Encoding * detectEncodingFromByteOrderMarks: bool
      * blockSize: int * blockOverlap: int * minRegexSpace: int
      * byteBufferLength: int
     -> CharStream

Constructs a CharStream from a System.IO.Stream.

The normal version of the CharStream class supports stream sizes up to approximately (231/p)×(blockSizeblockOverlap) chars, where p is 4 on a 32‐bit CLR and 8 on a 64‐bit CLR.
The Low‐Trust version only supports streams small enough that the complete content can be read into a single string.

Note

This constructor reads the first block of chars from the input stream and hence can throw any of the I/O related exceptions detailed in the exceptions section above.

Arguments:

stream

The byte stream providing the input. If stream.CanRead returns false, an ArgumentException is thrown.

leaveOpen

Indicates whether the stream should be left open when the CharStream has finished reading it.

encoding

The default Encoding used for decoding the byte stream into chars.

If the preamble returned by encoding.GetPreamble() is present at the beginning of the stream, the CharStream will skip over it.

detectEncodingFromByteOrderMarks

Indicates whether the constructor should detect the encoding from a unicode byte‐order mark at the beginning of the stream. An encoding detected from a byte‐order mark overrides the default encoding. The standard byte‐order marks for the following encodings are supported: UTF‐8, UTF‐16 LE/BE and UTF‐32 LE/BE.

blockSize

The number of chars per block. The value is rounded up to the first positive multiple of 1536. The default is 3×216 ≈ 200k.

blockOverlap

The number of chars at the end of a block that are preserved when reading the next block into into its internal char buffer. If this value is less than encoding.GetMaxCharCount(1) or not less than blockSize/2, the default value is used instead. The default is blockSize/3.

byteBufferLength

The size of the byte buffer used for decoding purposes. The default is 212 = 4KB.

member Dispose: unit -> unit

Releases all resources used by the CharStream. If the CharStream was constructed from a System.IO.Stream or a file path and the constructor was not called with leaveOpen = true, the byte stream is closed.

member BlockOverlap: int

The number of chars at the end of a block that are preserved when the CharStream reads the next block into its internal char buffer.

This value is only relevant for optimization purposes and as the maximum value for MinRegexSpace.

This value can only be set at construction time with the respective constructor parameter.

If the CharStream is constructed from a string, char array or char pointer or only contains 1 block, then this value is 0. In the Low‐Trust version this value is always 0.

member IndexOfFirstChar: int64

The index of the first char in the stream. This value is determined by the streamIndexOffset argument of some of the CharStream constructors. By default this value is 0.

member IndexOfLastCharPlus1: int64

The index of the last char of the stream plus 1, or Int64.MaxValue if the end of the stream has not yet been detected.

member IsBeginOfStream: bool

Indicates whether the next char in the stream is the first char, i.e. whether Index equals IndexOfFirstChar.

If the stream is empty, this value is always true.

member IsEndOfStream: bool

Indicates whether there is no char remaining in the stream, i.e. whether Index equals IndexOfLastCharPlus1.

If the stream is empty, this value is always true.

member Index: int64

The stream index of the next char.

member IndexToken: CharStreamIndexToken

A CharStreamIndexToken value representing the current Index value.

member Line: int64

The line number for the next char. (The line count starts with 1.)

member LineBegin: int64

The stream index of the first char of the line that also contains the next char.

member Column: int64

The UTF‐16 column number of the next char, i.e. IndexLineBegin + 1.

member Name: string with get, set

This string is used in error messages to describe the input stream.

If the CharStream is constructed from a file path, the constructor initializes the Name value with the file path value. Otherwise, Name is initialized to null.

If the stream content is the concatenated content of multiple input files, you can improve error messages and help debugging by setting the name and resetting the line and column count at the transitions between the different content pieces.

Setting the Name value increments the StateTag by 1, independent of whether the new value is different from the previous one.

member Position: Position

Returns new Position(Name, Index, Line, Column).

val mutable StateTag: uint64

The StateTag’s purpose is to provide an efficient way to determine whether the publically visible state of the CharStream has changed after a series of method calls. For the purpose of this property, the state is defined as the aggregate of the Index, Line, LineBegin and Name values. The UserState value of CharStream<'UserState> instances is also part of the CharStream state. If a method or property setter changes one or more of these state values it increments the StateTag’s by 1. Thus, to determine whether a series of method calls has changed the CharStream, it is often enough to compare the StateTag values from before and after the method calls.

The StateTag property is primarily meant for use in the implementation of parser combinators. If you directly call CharStream methods, you normally don’t need the StateTag to determine whether the state has changed, because that is usually obvious from either the method’s return value or the context in which it was called. Please see section 5.4.3 for more details on the design rationale behind the StateTag.

member Seek: index: int64 -> unit

Seeks the CharStream to the char with the specified index in the stream.

If you pass an index larger than the index of the last char in the stream, this method seeks the stream to the end of the stream, i.e. to one char past the last char in the stream.

The index is zero‐based, except if the CharStream was constructed with a positive streamIndexOffset argument, in which case the index of the first char equals the value of the streamIndexOffset argument (and the IndexOfFirstChar value).

When this method changes the stream position, it increments the StateTag by 1. When it does not change the position, it may or may not increment the StateTag by 1.

An ArgumentOutOfRangeException is thrown if the index is less than the IndexOfFirstChar. This method may also throw any of the I/O related exceptions detailed above.

member Seek: indexToken: CharStreamIndexToken -> unit

This method is an optimized implementation of Seek(GetIndex(indexToken)).

static val EndOfStreamChar: char

The char returned by Peek and Read at the end of the stream.

The value is '\uFFFF'.

member Peek: unit -> char

Returns the next char without changing the state of the CharStream.

At the end of the CharStream the EndOfStreamChar ('\uFFFF') is returned.

member Peek2: unit -> TwoChars

Peek2() is an optimized implementation of new TwoChars(Peek(), Peek(1)).

member Peek: utf16Offset: int -> char

Returns the char at the stream index Index + utf16Offset, without changing the state of the CharStream.

If Index + utf16Offset is smaller than the index of the first char in the stream or larger than the index of the last char in the stream, the EndOfStreamChar ('\uFFFF') is returned.

This method may throw any of the I/O related exceptions detailed above.

member Peek: utf16Offset: uint32 -> char

This method is an optimized implementation of Peek(int) for uint32 arguments.

member PeekString: length: int -> string

Returns a string with the next length stream chars, without changing the state of the CharStream.

If less than length chars are remaining in the stream, only the remaining chars are returned.

Note

This note does not apply to the Low‐Trust version of FParsec.
If length is greater than the number of remaining chars in the stream, a temporary string with length chars may be allocated. For very large length values this might lead to an OutOfMemoryException even though a string with only the remaining chars in the stream would comfortably fit into memory.

Please also note that the maximum length of a string on .NET is less than 230. Allocating a string larger than the maximum length will always yield an OutOfMemoryException, even on 64‐bit systems with enough physical memory.

If length is negative, an ArgumentOutOfRangeException is thrown. This method may also throw any of the I/O related exceptions detailed above.

member PeekString: buffer: char[] * bufferIndex: int * length: int -> int

Copies the next length stream chars into buffer, without changing the state of the CharStream. Returns the number of chars copied.

The chars are written into buffer beginning at the index bufferIndex. If less than length chars are remaining in the stream, only the remaining chars are copied.

An ArgumentOutOfRangeException is thrown if the arguments do not satisfy the following conditions: bufferIndex ≥ 0, length ≥ 0 and bufferIndex + lengthbuffer.Length. This method may also throw any of the I/O related exceptions detailed above.

member PeekString: buffer: NativePtr<char> * length: int -> int

This method is not available in the Low‐Trust version of FParsec.

Copies the next length stream chars into the buffer at the specified pointer address, without changing the state of the CharStream. Returns the number of chars copied.

If less than length chars are remaining in the stream, only the remaining chars are copied.

If length is negative, an ArgumentOutOfRangeException is thrown. This method may also throw any of the I/O related exceptions detailed above.

member Match: char -> bool

Returns true if the next char in the stream matches the specified char. At the end of the stream Match always returns false.

This method does not change the state of the CharStream.

This method may throw any of the I/O related exceptions detailed above.

member Match: chars: string -> bool

Returns true if the passed string chars matches the next chars.Length stream chars.

If not all the chars match or if there are not enough chars remaining in the stream, false is returned. If chars is empty, true is returned. chars must not be null.

This method does not change the state of the CharStream.

This method may throw any of the I/O related exceptions detailed above.

member Match: chars: char[] * charsIndex: int * length: int -> bool

Returns true if the next length stream chars match the chars in the array chars at the indices charIndex to charsIndex + length - 1.

If not all the chars match or if there are not enough chars remaining in the stream, false is returned. If length is 0, true is returned. chars must not be null.

This method does not change the state of the CharStream.

An ArgumentOutOfRangeException is thrown if the arguments do not satisfy the following conditions: charsIndex ≥ 0, length ≥ 0 and charsIndex + lengthchars.Length. This method may also throw any of the I/O related exceptions detailed above.

member Match: chars: NativePtr<char> * length: int -> bool

This method is not available in the Low‐Trust version of FParsec.

Returns true if the next length stream chars match the chars at the specified pointer address.

If not all the chars match or if there are not enough chars remaining in the stream, false is returned. If length is 0, true is returned.

This method does not change the state of the CharStream.

If length is negative, an ArgumentOutOfRangeException is thrown. This method may also throw any of the I/O related exceptions detailed above.

member MatchCaseFolded: caseFoldedChar: char -> bool

Behaves like Match(caseFoldedChar), except that the next char in the stream is case‐folded before it is compared with caseFoldedChar.

Note

While the char in the stream is case‐folded before it is matched, the char caseFoldedChar is assumed to already be case‐folded (e.g. with the help of FParsec.Text.FoldCase). Please also see the above remarks on case‐insensitive matching.

member MatchCaseFolded: caseFoldedChars: string -> bool

Behaves like Match(caseFoldedChars), except that the chars in the stream are case‐folded before they are compared with caseFoldedChars.

Note

While the chars in the CharStream are case‐folded before they are matched, the chars in the string argument caseFoldedChars are assumed to already be case‐folded (e.g. with the help of FParsec.Text.FoldCase). Please also see the above remarks on case‐insensitive matching.

member MatchCaseFolded: caseFoldedChars: NativePtr<char> * length:int -> bool

This method is not available in the Low‐Trust version of FParsec.

Behaves like Match(caseFoldedChars, length), except that the chars in the stream are case‐folded before they are compared with the chars at the pointer address caseFoldedChars.

Note

While the chars in the CharStream are case‐folded before they are matched, the chars at the pointer address caseFoldedChars are assumed to already be case‐folded (e.g. with the help of FParsec.Text.FoldCase). Please also see the above remarks on case‐insensitive matching.

Applies the given regular expression to the stream chars beginning with the next char. Returns the resulting Match object.

For performance reasons you should specify the regular expression such that it can only match at the beginning of a string, for example by prepending "\\A".

For CharStream instances constructed from strings the regular expression is applied to a string containing all the remaining chars in the stream.

For CharStream instances constructed from large binary streams (with more than 1 block) the regular expression is not applied to a string containing all the remaining chars in the stream. Here the MinRegexSpace value determines the minimum number of chars that are guaranteed to be visible to the regular expression (assuming there are still enough chars remaining in the stream). The exact number of chars visible to the regular expression may be affected even by calls to CharStream methods like Peek or Match that otherwise guarantee to not change the (outwardly visible) state of the CharStream.

This method may throw any of the I/O related exceptions detailed above.

Important

This note does not apply to the Low‐Trust version of FParsec.
This method is not supported by CharStream instances constructed directly from char arrays or pointers. A NotSupportedException is thrown if this method is called on such a CharStream instance.

Important

This note does not apply to the Low‐Trust version of FParsec.
If the CharStream was constructed from a System.IO.Stream or a file path, the regular expression is applied to an internal mutable buffer. Since the Match object may work lazily, i.e. compute return values not before they are needed, you need to retrieve all the required information from the Match object before you continue to access the CharStream, otherwise you might get back invalid match results. Note that all strings returned by the Match object are, of course, immutable.

member MinRegexSpace: int with get, set

The number of chars that are guaranteed to be visible to a regular expression when it is matched by Match (assuming there are enough chars remaining in the stream).

The value must be non‐negative and not greater than BlockOverlap. The default value is 2/3 of BlockOverlap.

If the CharStream is constructed from a string, char array or char pointer or has only 1 block, then this value has no relevance and calling the property setter has no effect. (No Low‐Trust version CharStream instance has more than 1 block.)

The MinRegexSpace value is not recorded in CharStreamState instances and setting its value does not affect the StateTag.

An ArgumentOutOfRangeException is thrown if you try to set the property on a multi‐block CharStream instance to a negative value or a value larger than the BlockOverlap.

member RegisterNewline: unit -> bool

Registers a newline (an end‐of‐line character) at the previous stream char, i.e. increments the Line value by 1 and sets the LineBegin to Index.

The previous LineBegin value must not equal Index. (For performance reasons this condition is only checked by an assert check in the debug build).

This method also increments the StateTag by 1.

member RegisterNewlines: lineOffset: int -> newColumnMinus1: int -> bool

Increments the Line value by lineOffset and sets the LineBegin value to Index - newColumnMinus1 (so that the Column value becomes newColumnMinus1 + 1).

The lineOffset must not be 0, the new Line value must be greater than 0 and and the new LineBegin value must be different from the previous one. (For performance reasons these conditions are only checked by assert checks in the debug build).

This method also increments the StateTag by 1.

member RegisterNewlines: lineOffset: int64 -> newColumnMinus1: int64 -> bool

This method is a variant of RegisterNewlines for int64 arguments.

member Skip: unit -> unit

Advances the position within the stream by 1 char, except at the end of the stream, where it does nothing.

When this method changes the stream position, it increments the StateTag by 1; otherwise, it does not change the StateTag.

This method may throw any of the I/O related exceptions detailed above.

member Skip: utf16Offset: int -> unit

Advances the position within the stream by utf16Offset chars.

The new position within the stream will be min(Index + utf16Offset, IndexOfLastCharPlus1). This means you can’t move past the end of the stream, because any position beyond the last char in the stream is interpreted as precisely one char beyond the last char.

An ArgumentOutOfRangeException is thrown if the new position would lie before the beginning of the CharStream, i.e. if the new index would be less than IndexOfFirstChar. This method may also throw any of the I/O related exceptions detailed above.

When this method changes the stream position, it increments the StateTag by 1. When it does not change the position (because the given offset is 0 or because the stream has already reached the end and the offset is positive), it may or may not increment the StateTag by 1.

member Skip: utf16Offset: uint32 -> unit

This method is an optimized implementation of Skip for uint32 offsets.

member Skip: utf16Offset: int64 -> unit

This method is a variant of Skip for int64 offsets.

member SkipAndPeek: unit -> char

c <- SkipAndPeek() is an optimized implementation of Skip(); c <- Peek().

member SkipAndPeek: utf16Offset: int -> char

c <- SkipAndPeek(utf16Offset) is an optimized implementation of Skip(utf16Offset); c <- Peek(), with the following exception for negative offsets n:
If the new position would lie before the beginning of the CharStream, i.e. if the new index would be less than IndexOfFirstChar, then SkipAndPeek(n) does not throw an exception like stream.Skip(n) would do. Instead it sets the position of the stream to IndexOfFirstChar and returns the EndOfStreamChar ('\uFFFF').

member SkipAndPeek: utf16Offset: uint32 -> char

c <- SkipAndPeek(utf16Offset) is an optimized implementation of Skip(utf16Offset); c <- Peek().

member Skip: char -> bool

Skips over the next char in the stream if this char matches the passed argument char. Returns true if the chars match; otherwise, false. At the end of the stream this method always returns false.

When this method changes the stream position, it increments the StateTag by 1; otherwise, it does not change the StateTag.

This method may throw any of the I/O related exceptions detailed above.

member Skip: TwoChars -> bool

Skips over the next two chars in the stream if these chars match the two chars in the passed TwoChars value. Returns true if the chars match.

If not both chars match or if there are less than 2 chars remaining in the stream, no char is skipped and false is returned.

When this method changes the stream position, it increments the StateTag by 1; otherwise, it does not change the StateTag.

This method may throw any of the I/O related exceptions detailed above.

member Skip: chars: string -> bool

Skips over the next chars.Length chars in the stream if these chars match the passed string chars. Returns true if the chars match.

If not all the chars match or if there are not enough chars remaining in the stream, no char is skipped and false is returned. If chars is empty, true is returned. chars must not be null.

When this method changes the stream position, it increments the StateTag by 1; otherwise, it does not change the StateTag, except if chars is empty, in which case it may or may not increment the StateTag by 1.

This method may throw any of the I/O related exceptions detailed above.

member Skip: chars: char[] * charsIndex: int * length: int -> bool

Skips over the next length chars in the stream if these chars match the chars in the passed array chars at the indices charIndex to charsIndex + length - 1. Returns true if the chars match.

If not all the chars match or if there are not enough chars remaining in the stream, false is returned and the position within the CharStream is not changed. If length is 0, true is returned. chars must not be null.

When this method changes the stream position, it increments the StateTag by 1; otherwise, it does not change the StateTag, except if length is 0, in which case it may or may not increment the StateTag by 1.

An ArgumentOutOfRangeException is thrown if the arguments do not satisfy the following conditions: charsIndex ≥ 0, length ≥ 0 and charsIndex + lengthchars.Length. This method may also throw any of the I/O related exceptions detailed above.

member Skip: chars: NativePtr<char> * length: int -> bool

This method is not available in the Low‐Trust version of FParsec.

Skips over the next length chars in the stream if these chars match the chars at the pointer address chars. Returns true if the chars match.

If not all the chars match or if there are not enough chars remaining in the stream, false is returned and the position within the CharStream is not changed. If length is 0, true is returned.

When this method changes the stream position, it increments the StateTag by 1; otherwise, it does not change the StateTag, except if length is 0, in which case it may or may not increment the StateTag by 1.

If length is negative, an ArgumentOutOfRangeException is thrown. This method may also throw any of the I/O related exceptions detailed above.

member SkipCaseFolded: caseFoldedChar: char -> bool

Behaves like Skip(caseFoldedChar), except that the next char in the stream is case‐folded before it is compared with caseFoldedChar.

Note

While the char in the stream is case‐folded before it is matched, the char caseFoldedChar is assumed to already be case‐folded (e.g. with the help of FParsec.Text.FoldCase). Please also see the above remarks on case‐insensitive matching.

member SkipCaseFolded: caseFoldedChars: string -> bool

Behaves like Skip(caseFoldedChars), except that the chars in the stream are case‐folded before they are compared with caseFoldedChars.

Note

While the chars in the CharStream are case‐folded before they are matched, the chars in the string argument caseFoldedChars are assumed to already be case‐folded (e.g. with the help of FParsec.Text.FoldCase). Please also see the above remarks on case‐insensitive matching.

member SkipCaseFolded: caseFoldedChars: NativePtr<char> * length:int -> bool

This method is not available in the Low‐Trust version of FParsec.

Behaves like Skip(caseFoldedChars), except that the chars in the stream are case‐folded before they are compared with the chars at the pointer address caseFoldedChars.

Note

While the chars in the CharStream are case‐folded before they are matched, the chars at the pointer address caseFoldedChars are assumed to already be case‐folded (e.g. with the help of FParsec.Text.FoldCase). Please also see the above remarks on case‐insensitive matching.

member Read: unit -> char

Skips over the next char in the stream. Returns the skipped char.

At the end of the stream Read() does not change the stream position and returns the EndOfStreamChar ('\uFFFF').

When this method changes the stream position, it increments the StateTag by 1; otherwise, it does not change the StateTag.

This method may throw any of the I/O related exceptions detailed above.

member Read: length: int -> string

Skips over the next length chars in the stream. Returns the skipped chars as a string.

If less than length chars are remaining in the stream, only the remaining chars are skipped and returned.

When this method changes the stream position, it increments the StateTag by 1; otherwise, it does not change the StateTag, except if length is 0, in which case it may or may not increment the StateTag by 1.

If length is negative, an ArgumentOutOfRangeException is thrown. This method may also throw any of the I/O related exceptions detailed above.

member Read: buffer: char[] * bufferIndex: int * length: int -> int

Skips over the next length stream chars and copies the skipped chars into buffer. Returns the number of copied and skipped chars.

The chars are written into buffer beginning at the index bufferIndex. If less than length chars are remaining in the stream, only the remaining chars are copied and skipped.

When this method changes the stream position, it increments the StateTag by 1; otherwise, it does not change the StateTag, except if length is 0, in which case it may or may not increment the StateTag by 1.

An ArgumentOutOfRangeException is thrown if the arguments do not satisfy the following conditions: bufferIndex ≥ 0, length ≥ 0 and bufferIndex + lengthbuffer.Length. This method may also throw any of the I/O related exceptions detailed above.

member Read: buffer: NativePtr<char> * length: int -> int

This method is not available in the Low‐Trust version of FParsec.

Skips over the next length stream chars and copies the skipped chars into the buffer at the given pointer address. Returns the number of copied and skipped chars.

If less than length chars are remaining in the stream, only the remaining chars are copied and skipped.

When this method changes the stream position, it increments the StateTag by 1; otherwise, it does not change the StateTag, except if length is 0, in which case it may or may not increment the StateTag by 1.

If length is negative, an ArgumentOutOfRangeException is thrown. This method may also throw any of the I/O related exceptions detailed above.

member ReadFrom: indexOfFirstChar: CharStreamIndexToken -> string

Returns a string with the chars between the stream index indexOfFirstChar (inclusive) and the current Index of the stream (exclusive).

This method trows

It may also throw any of the I/O related exceptions detailed above.

Note

You may only pass CharStreamToken values that were retrieved from the CharStream instance on which you’re calling ReadFrom. Passing a CharStreamToken value that was created for another CharStream instance triggers an assert exception in debug builds and will otherwise lead to undefined behaviour.

member SkipWhitespace: unit -> bool

Skips over any sequence of space (' '), tab ('\t') or newline ('\r', '\n') chars. Returns true if it skips at least one char, otherwise false.

This method registers any skipped standard newline ("\n", "\r\n" or "\r").

When this method skips at least one char, it increments the StateTag by 1; otherwise, it does not change the StateTag.

This method may throw any of the I/O related exceptions detailed above.

member SkipUnicodeWhitespace: unit -> bool

Skips over any sequence of unicode whitespace chars (as identified by System.Char.IsWhiteSpace). Returns true if it skips at least one char, otherwise false.

This method registers any skipped unicode newline ("\n", "\r\n", "\r", "\u0085", "\u000C", "\u2028" or "\u2029").

Note

This method recognizes the form feed char '\f' ('\u000C') as a Unicode whitespace character, but not as a newline character.

When this method skips at least one char, it increments the StateTag by 1; otherwise, it does not change the StateTag.

This method may throw any of the I/O related exceptions detailed above.

member SkipNewline: unit -> bool

Skips over a standard newline ("\n", "\r\n" or "\r"). Returns true if a newline is skipped, otherwise false.

When this method skips a newline, it also registers it.

When this method skips a newline, it increments the StateTag by 1, otherwise it does not change the StateTag.

This method may throw any of the I/O related exceptions detailed above.

member SkipUnicodeNewline: unit -> bool

Skips over a unicode newline ("\n", "\r\n", "\r", "\u0085", "\u2028", or "\u2029"). Returns true if a newline is skipped, otherwise false.

Note

This method does not recognize the form feed char '\f' ('\u000C') as a newline character.

When this method skips a newline, it also registers it.

When this method skips a newline, it increments the StateTag by 1, otherwise it does not change the StateTag.

This method may throw any of the I/O related exceptions detailed above.

member SkipNewlineThenWhitespace:
    powerOf2TabStopDistance: int * allowFormFeed: bool -> int

Skips over a newline ("\n", "\r\n" or "\r") followed by any (possibly empty) sequence of whitespace chars (' ', '\t', '\r', '\n' and optionally '\f').

If this method skips no chars because the next stream char is no newline char, it returns ‒1. Otherwise it returns the indentation of the first line with non‐whitespace characters.

The indentation is calculated as follows:

  • Any newline char ('\r' or '\n') or form feed char ('\f') resets the indentation to 0.
  • Any space char (' ') increments the indentation by 1.
  • Any tab char ('\t') increments the indentation by
    powerOf2TabStopDistance ‐ (indentation modulo powerOf2TabStopDistance).

The maximum indentation is 231 ‐ 1. If skipping a whitespace char would cause the indentation to overflow, the char is not skipped and the method returns the indentation up to that char.

An ArgumentOutOfRangeException is thrown if powerOf2TabStopDistance is not a positive power of 2.

The value of the allowFormFeed argument determines whether this method accepts the form feed char '\f' as a whitespace char.

This method registers all skipped standard newlines ("\n", "\r\n" or "\r").

When this method changes the stream position, it increments the StateTag by 1; otherwise, it does not change the StateTag.

This method may throw any of the I/O related exceptions detailed above.

member SkipRestOfLine: skipNewline: bool -> unit

Skips over any chars before the next newline ("\n", "\r\n" or "\r") or the end of the stream. If skipNewline is true and a newline is present, the newline is also skipped.

When this method changes the stream position, it increments the StateTag by 1; otherwise, it does not change the StateTag.

This method may throw any of the I/O related exceptions detailed above.

member ReadRestOfLine: skipNewline: bool -> string

ReadRestOfLine(skipNewline) behaves like SkipRestOfLine(skipNewline), except that it returns a string with the skipped chars (without a newline).

member ReadCharOrNewline: unit -> char

Skips over any single char or standard newline ("\n", "\r\n" or "\r").

This method returns '\n' when it skips a newline. Otherwise, it returns the skipped char, except at the end of the stream, where it returns the EndOfStreamChar ('\uffff').

When this method skips a newline, it also registers it.

When this method skips a char or newline, it increments the StateTag by 1; otherwise, it does not change the StateTag.

This method may throw any of the I/O related exceptions detailed above.

member SkipCharsOrNewlines: maxCount: int -> int

Skips over up to maxCount chars. Returns the number of skipped chars.

The number of actually skipped chars is less than maxCount if the end of the stream is reached after less than maxCount chars.

This method counts standard newlines ("\n", "\r\n" or "\r") as single chars. When this method skips a newline, it also registers it.

When this method changes the stream position, it increments the StateTag by 1; otherwise, it does not change the StateTag.

An ArgumentOutOfRangeException is thrown if maxCount is negative. This method may also throw any of the I/O related exceptions detailed above.

member ReadCharsOrNewlines: maxCount: int * normalizeNewlines: bool -> string

Behaves like SkipCharsOrNewlines(maxCount), except that it returns a string with the skipped chars.

The normalizeNewlines parameter determines whether all newlines ("\n", "\r\n" or "\r") in the returned string are normalized to '\n' or whether they are preserved in the original form they are encountered in the input.

member SkipCharsOrNewlinesWhile:
    predicate: (char -> bool) -> int

Skips over a sequence of chars that satisfy the predicate function. Stops at the first char for which predicate returns false. Returns the number of skipped chars.

This method counts standard newlines ("\n", "\r\n" or "\r") as single chars and passes them to the predicate function as single '\n' chars. When this method skips a newline, it also registers it.

When this method changes the stream position, it increments the StateTag by 1; otherwise, it does not change the StateTag.

Caution

The predicate function must not access the CharStream instance itself, because SkipCharsOrNewlinesWhile relies on predicate not having any side‐effect on the internal state of the stream.

This method may throw any of the I/O related exceptions detailed above.

member SkipCharsOrNewlinesWhile:
    predicateForFirstChar: (char -> bool) * predicate: (char -> bool) -> int

Behaves like SkipCharsOrNewlinesWhile(predicate), except that the first char to be skipped must satisfy predicateForFirstChar instead of predicate.

member SkipCharsOrNewlinesWhile:
    predicate: (char -> bool) * minCount: int * maxCount: int -> int

Skips over a sequence of up to maxCount chars that satisfy the predicate function, but backtracks to the start if it can only skip less than minCount chars. Returns the number of skipped chars.

This method counts standard newlines ("\n", "\r\n" or "\r") as single chars and passes them to the predicate function as single '\n' chars. When this method skips a newline, it also registers it.

An ArgumentOutOfRangeException is thrown if maxCount is negative. This method may also throw any of the I/O related exceptions detailed above.

Caution

The predicate function must not access the CharStream instance itself, because SkipCharsOrNewlinesWhile relies on predicate not having any side‐effect on the internal state of the stream.

member SkipCharsOrNewlinesWhile:
    predicateForFirstChar: (char -> bool) * predicate: (char -> bool)
  * minCount: int * maxCount: int -> int

Behaves like SkipCharsOrNewlinesWhile(predicate, nMin, nMax), except that the first char to be skipped must satisfy predicateForFirstChar instead of predicate.

member ReadCharsOrNewlinesWhile:
    predicate: (char -> bool)
  * normalizeNewlines: bool -> string

Behaves like SkipCharsOrNewlinesWhile(predicate), except that it returns a string with the skipped chars.

The normalizeNewlines parameter determines whether all newlines ("\n", "\r\n" or "\r") in the returned string are normalized to '\n' or whether they are preserved in the original form they are encountered in the input.

member ReadCharsOrNewlinesWhile:
    predicateForFirstChar: (char -> bool) * predicate: (char -> bool)
  * normalizeNewlines: bool -> string

Behaves like ReadCharsOrNewlinesWhile(predicate, normalizeNewlines), except that the first char to be skipped must satisfy predicateForFirstChar instead of predicate.

member ReadCharsOrNewlinesWhile:
    predicate: (char -> bool)
  * minCount: int * maxCount: int * normalizeNewlines: bool -> string

Behaves like SkipCharsOrNewlinesWhile(predicate, minCount, maxCount), except that it returns a string with the skipped chars.

The normalizeNewlines parameter determines whether all newlines ("\n", "\r\n" or "\r") in the returned string are normalized to '\n' or whether they are preserved in the original form they are encountered in the input.

member ReadCharsOrNewlinesWhile:
    predicateForFirstChar: (char -> bool) * predicate: (char -> bool)
  * minCount: int * maxCount: int * normalizeNewlines: bool -> string

Behaves like ReadCharsOrNewlinesWhile(predicate, minCount, maxCount, normalizeNewlines), except that the first char to be skipped must satisfy predicateForFirstChar instead of predicate.

member SkipCharsOrNewlinesUntilString:
    str: string * maxCount: int
  * foundString: out<bool> -> int

Skips over all stream chars before the first occurrence of the specified string or the end of the stream, but not over more than maxCount chars. Assigns true to the output parameter if the string is found, otherwise false.

This method registers skipped newlines ("\n", "\r\n" or "\r") and counts them as single chars. However, no newline normalization takes place when the argument string str is matched with the stream chars. Hence, str should either contain no newlines or only in the form they occur in the stream. If str starts with '\n', then SkipCharsOrNewlinesUntilString will not find occurences of str in the stream that start in the middle of an "\r\n" newline.

When this method changes the stream position, it increments the StateTag by 1; otherwise, it does not change the StateTag.

This method throws

  • an ArgumentException, if the string argument is empty, and
  • an ArgumentOutRangeException, if nMax is negative.

It may also throw any of the I/O related exceptions detailed above.

member SkipCharsOrNewlinesUntilString:
    str: string * maxCount: int * normalizeNewlines: bool
  * skippedCharsIfStringFoundOtherwiseNull: out<string> -> int

Behaves like SkipCharsOrNewlinesUntilString(str, maxCount, outBool), except that its output parameter is a string instead of a boolean. If str is found, a string with the skipped chars is assigned to this output parameter; otherwise, null is assigned to the output parameter.

The normalizeNewlines parameter determines whether all newlines ("\n", "\r\n" or "\r") in the output string are normalized to '\n' or are preserved in the original form they are encountered in the input.

member SkipCharsOrNewlinesUntilCaseFoldedString:
    caseFoldedString: string * maxCount: int
  * foundString: out<bool> -> int

Behaves like SkipCharsOrNewlinesUntilString(caseFoldedString, maxCount, foundString), except that the chars in the stream are case‐folded before they are compared with caseFoldedChars.

Note

While the chars in the CharStream are case‐folded before they are matched, the chars in the string argument caseFoldedString are assumed to already be case‐folded (e.g. with the help of FParsec.Text.FoldCase). Please also see the above remarks on case‐insensitive matching.

member SkipCharsOrNewlinesUntilCaseFoldedString:
    caseFoldedString: string * maxCount: int * normalizeNewlines: bool
  * skippedCharsIfStringFoundOtherwiseNull: out<string> -> int

Behaves like SkipCharsOrNewlinesUntilString(caseFoldedString, maxCount, normalizeNewlines, skippedCharsIfStringFoundOtherwiseNull), except that the chars in the stream are case‐folded before they are compared with caseFoldedChars.

Note

While the chars in the stream are case‐folded before they are matched, the chars in the string argument caseFoldedString are assumed to already be case‐folded (e.g. with the help of FParsec.Text.FoldCase). Please also see the above remarks on case‐insensitive matching.

6.11.2 CharStream<TUserState>

Provides read‐access to a sequence of UTF‐16 chars.

6.11.2.1 Interface

[<Sealed>]
type CharStream<'TUserState> =
  inherit CharStream

  // has the same constructors as CharStream

  member UserState: 'TUserState with get, set

  member State: CharStreamState<'TUserState>

  member BacktrackTo: CharStreamState<'TUserState> -> unit

  member ReadFrom:
      stateWhereStringBegins: CharStreamState<'TUserState>
    * normalizeNewlines: bool
   -> string

  member CreateSubstream<'TSubStreamUserState>:
      stateWhereSubstreamBegins: CharStreamState<'TUserState>
   -> CharStream<'TSubStreamUserState>

6.11.2.2 Remarks

The CharStream<'TUserState> class adds a user definable state component to its base class CharStream.

The user state is accessible through the property UserState. It has the type 'TUserState.

You can retrieve a snapshot of the complete stream state, including the user state, from the State property. The value returned from the State property has the type CharStreamState<'TUserState>. You can pass a CharStreamState value to the BacktrackTo method in order to restore a previous state of the CharStream.

Important

'TUserState must be an immutable type or at least be treated as an immutable type if you want BacktrackTo to completely restore old values of the user state. Hence, when you need to change the user state, you should set a new 'TUserState value to the UserState property of the CharStream instance, not mutate the existing 'TUserState value.

6.11.2.3 Members

member UserState: 'TUserState with get, set

The current user state value.

Setting the UserState value increments the StateTag by 1, independent of whether the new value is different from the previous one.

member State: CharStreamState<'TUserState>

Returns a snapshot of the current StateTag, Index, Line, LineBegin, Name, and UserState values in the form of an immutable CharStreamState value.

member BacktrackTo: CharStreamState<'TUserState> -> unit

Restores the stream to the state represented by the given CharStreamState value.

For example:

fun (stream: CharStream<'u>) ->
    let state = stream.State
    // ... (do something with stream that might change the state)
    stream.BacktrackTo(state) // restores stream to previous state
    // ...

This method throws an ArgumentException if the CharStreamState instance is zero‐initialized (i.e. constructed with the default value type constructor). It may also throw any of the I/O related exceptions detailed above.

Note

You may only pass CharStreamState values that were retrieved from the CharStream instance on which you’re calling BacktrackTo. Passing a CharStreamState value that was created for another CharStream instance triggers an assert exception in debug builds and will otherwise lead to undefined behaviour.

member ReadFrom:
    stateWhereStringBegins: CharStreamState<'TUserState>
  * normalizeNewlines: bool
 -> string

Returns a string with the chars between the index of the stateWhereStringBegins (inclusive) and the current Index of the stream (exclusive).

The normalizeNewlines parameter determines whether all newlines ("\n", "\r\n" or "\r") in the returned string are normalized to '\n' or whether they are preserved in the original form they are encountered in the input. (If stateWhereStringBegins.Line equals the current Line, this method will never normalize any newlines in the returned string.)

This method trows

It may also throw any of the I/O related exceptions detailed above.

Note

You may only pass CharStreamState values that were retrieved from the CharStream instance on which you’re calling ReadFrom. Passing a CharStreamState value that was created for another CharStream instance triggers an assert exception in debug builds and will otherwise lead to undefined behaviour.

member CreateSubstream<'TSubStreamUserState>:
    stateWhereSubstreamBegins: CharStreamState<'TUserState>
 -> CharStream<'TSubStreamUserState>

Creates a new CharStream<'TUserState> instance with the stream chars between the index of the stateWhereSubstreamBegins (inclusive) and the current Index of the stream (exclusive).

The state of the substream is initialized to stateWhereSubstreamBegin, so that the stream and the substream will report the same position (Index, Line, LineBegin and Name) for corresponding chars. However, the beginning and end will normally differ between stream and substream, in particular the IndexOfFirstChar and IndexOfLastCharPlus1 values will normally differ between stream and substream.

An example:

open FParsec
open FParsec.Primitives
open FParsec.CharParsers
open FParsec.Error

let embeddedBlock (beginDelim: string) (endDelim: string) : Parser<_,_> =
  let expectedEmbeddedBlock = expected "embedded block"
  fun stream ->
    if stream.Skip(beginDelim) then
      let stateAtBegin = stream.State
      let mutable foundString = false
      let maxChars = System.Int32.MaxValue
      stream.SkipCharsOrNewlinesUntilString(endDelim, maxChars, &foundString)
      |> ignore
      if foundString then
        // create substream with content between beginDelim and endDelim
        use substream = stream.CreateSubstream<unit>(stateAtBegin)
        // here we would normally work with the substream,
        // in this example we will just extract the string content
        let str = substream.ReadCharsOrNewlines(System.Int32.MaxValue, true)
        Reply(str)
      else
        Reply(Error, expectedString endDelim)
    else
      Reply(Error, expectedEmbeddedBlock)
> run (embeddedBlock "/*" "*/") "/*substream content*/";;
val it : ParserResult<string,unit> = Success: "substream content"
Note

This note does not apply to the Low‐Trust version of FParsec.
If you create a substream for a CharStream instance with more than one block, the content of the substream needs to be copied. Thus, you can minimize the overhead associated with creating a substream by ensuring that the CharStream has only one block, either by choosing a sufficiently large blockSize, or by creating the CharStream from a string or char buffer.

You may use a stream and its substreams concurrently. However, notice the following warning:

Caution

This note does not apply to the Low‐Trust version of FParsec.
You may not dispose a stream before all of its substreams are disposed. Disposing a stream before all its substreams are disposed triggers an assert exception in debug builds and otherwise lead to undefined behaviour.

This method trows

It may also throw any of the I/O related exceptions detailed above.

Note

You may only pass CharStreamState values that were retrieved from the CharStream instance on which you’re calling CreateSubstream. Passing a CharStreamState value that was created for another CharStream instance triggers an assert exception in debug builds and will otherwise lead to undefined behaviour.

6.11.3 CharStreamIndexToken

An opaque representation of a CharStream char index.

type CharStreamIndexToken = struct
    member GetIndex: CharStream -> int64
end

CharStream methods can handle CharStreamIndexToken values more efficiently than integer char indices.

You can retrieve CharStreamIndexToken values from the CharStream.IndexToken and CharStreamState<_>.IndexToken properties.

You can get the char index corresponding to a given CharStreamIndexToken value by calling its GetIndex method with the CharStream instance from which the token was retrieved.

Zero‐initialized CharStreamIndexToken values constructed with the default value type constructor are not valid and trying to call a CharStream method with such an instance will trigger an exception.

Note

A CharStreamIndexToken instance may only be used together with the CharSteam instance it was created for.

member GetIndex: CharStream -> int64

Returns the stream index represented by the CharStreamIndexToken instance.

The CharStream instance passed as the argument must be the CharStream instance from which the CharStreamIndexToken was retrieved. Passing a different CharStream instance triggers an assert exception in debug builds and will otherwise lead to undefined behaviour.

An InvalidOperationException is thrown if the CharStreamIndexToken value is zero‐initialized (i.e. constructed with the default value type constructor).

6.11.4 CharStreamState

An immutable value type representation of the state of a CharStream.

type CharStreamState<'TUserState> = struct
    member Tag: int64
    member IndexToken: CharStreamIndexToken
    member Line: int64
    member LineBegin: int64
    member Name: string
    member UserState: 'TUserState

    member GetIndex:    CharStream<'TUserState> -> int64
    member GetPosition: CharStream<'TUserState> -> Position
end

You can retrieve CharStreamState values from the CharStream<_>.State property. By passing a CharStreamState value to the BacktrackTo method of a CharStream<_> instance, you can restore the stream to the state represented by the CharStreamState value.

Zero‐initialized CharStreamState values constructed with the default value type constructor are not valid and trying to call a CharStream method with such an instance will trigger an exception.

Note

A CharStreamState instance may only be used together with the CharSteam instance it was created for.

member GetIndex: CharStream<'TUserState> -> int64

state.GetIndex(stream) is an optimized implementation of state.IndexToken.GetIndex(stream).

The CharStream<'TUserState> instance passed as the argument must be the CharStream instance from which the CharStreamState was retrieved. Passing a different CharStream instance triggers an assert exception in debug builds and will otherwise lead to undefined behaviour.

An InvalidOperationException is thrown if the CharStreamState instance is zero‐initialized (i.e. constructed with the default value type constructor).

member GetPosition: CharStream<'TUserState> -> Position

state.GetPosition(stream) is an optimized implementation of new Position(state.Name, state.GetIndex(stream), state.Line, state.Column).

The CharStream<'TUserState> instance passed as the argument must be the CharStream instance from which the CharStreamState was retrieved. Passing a different CharStream instance triggers an assert exception in debug builds and will otherwise lead to undefined behaviour.

An InvalidOperationException is thrown if the CharStreamState instance is zero‐initialized (i.e. constructed with the default value type constructor).

6.11.5 TwoChars

An immutable value type representation of two chars:

type TwoChars = struct
    new: char0: char * char1: char -> TwoChars
    val Char0: char
    val Char1: char
end
Footnotes:
[1] The detection of invalid byte sequences by the .NET decoders is not entirely reliable. For example, System.Text.UnicodeEncoding (UTF‐16) has an alignment related bug in .NET versions prior to 4.0 that sometimes leads to invalid surrogate pairs not being detected. The implementations of more complicated encodings, like GB18030, ISO‐2022 and ISCII, also have several issues with regard to the detection of invalid input data.
================================================ FILE: Doc/html/reference/error.html ================================================ FParsec.Error

6.7 FParsec.Error

6.7.1 Interface

// FParsec.dll

[<AutoOpen>] // module is automatically opened when FParsec namespace is opened
module FParsec.Error

// The following type abbreviations and active patterns allow you to
// treat the ErrorMessage type almost as if it was defined as:
//
// [<CustomEquality; NoComparison>]
// type ErrorMessage =
//      | Expected           of string
//      | ExpectedString     of string
//      | ExpectedStringCI   of string
//      | Unexpected         of string
//      | UnexpectedString   of string
//      | UnexpectedStringCI of string
//      | Message            of string
//      | NestedError        of Position * obj * ErrorMessageList
//      | CompoundError      of string * Position * obj * ErrorMessageList
//      | OtherErrorMessage  of obj

type Expected           = ErrorMessage.Expected
type ExpectedString     = ErrorMessage.ExpectedString
type ExpectedStringCI   = ErrorMessage.ExpectedCaseInsensitiveString
type Unexpected         = ErrorMessage.Unexpected
type UnexpectedString   = ErrorMessage.UnexpectedString
type UnexpectedStringCI = ErrorMessage.UnexpectedCaseInsensitiveString
type Message            = ErrorMessage.Message
type NestedError        = ErrorMessage.NestedError
type CompoundError      = ErrorMessage.CompoundError
type OtherErrorMessage  = ErrorMessage.Other

// Unfortunately, F# currently doesn't support active patterns with more
// than 7 cases, so we have to use partial patterns.

val (|Expected|_|):           ErrorMessage -> string option
val (|ExpectedString|_|):     ErrorMessage -> string option
val (|ExpectedStringCI|_|):   ErrorMessage -> string option
val (|Unexpected|_|):         ErrorMessage -> string option
val (|UnexpectedString|_|):   ErrorMessage -> string option
val (|UnexpectedStringCI|_|): ErrorMessage -> string option
val (|Message|_|):            ErrorMessage -> string option
val (|NestedError|_|):        ErrorMessage
                           -> (Position * obj * ErrorMessageList) option
val (|CompoundError|_|):      ErrorMessage
                           -> (string * Position * obj * ErrorMessageList) option
val (|OtherErrorMessage|_|):  ErrorMessage -> obj option


// The following literal definition and active pattern allow you to
// treat the ErrorMessageList type as if it was defined as:
//
// [<CompilationRepresentation(CompilationRepresentationFlags.UseNullAsTrueValue);
//   CustomEquality; NoComparison>]
// type ErrorMessageList =
//     | AddErrorMessage of ErrorMessage * ErrorMessageList
//     | NoErrorMessages
// with
//   static member Merge: ErrorMessageList * ErrorMessageList -> ErrorMessageList
//   static member ToHashSet: ErrorMessageList -> HashSet<ErrorMessage>
//   static member ToSortedArray: ErrorMessageList -> ErrorMessage[]

[<Literal>]
val NoErrorMessages: ErrorMessageList = null

val (|ErrorMessageList|NoErrorMessages|):
    ErrorMessageList -> Choice<ErrorMessage*ErrorMessageList,unit>


// Helper functions for creating an ErrorMessageList with a single ErrorMessage
val expected:                string -> ErrorMessageList
val expectedStringError:     string -> ErrorMessageList
val expectedStringCIError:   string -> ErrorMessageList
val unexpected:              string -> ErrorMessageList
val unexpectedStringError:   string -> ErrorMessageList
val unexpectedStringCIError: string -> ErrorMessageList
val messageError:            string -> ErrorMessageList
val otherError:              obj    -> ErrorMessageList
val nestedError:
              CharStream<_> -> ErrorMessageList -> ErrorMessageList
val compoundError:
    string -> CharStream<_> -> ErrorMessageList -> ErrorMessageList


// Two convenient helper functions
val mergeErrors: ErrorMessageList -> ErrorMessageList -> ErrorMessageList
val isSingleErrorMessageOfType: ErrorMessageType -> ErrorMessageList


// A simple container type for holding an ErrorMessageList
// together with its associated input stream position and user state
[<Sealed>]
type ParserError =
  new:   position: Position
       * userState: obj
       * messages: ErrorMessageList
      -> ParserError

  member Position:  Position
  member UserState: obj
  member Messages:  ErrorMessageList

  override ToString: unit -> string
  member   ToString: streamWhereErrorOccurred: CharStream -> string

  member WriteTo:
           textWriter: System.IO.TextWriter
         * streamWhereErrorOccurred: CharStream
         * ?tabSize: int
         * ?columnWidth: int
         * ?initialIndentation: string * ?indentationIncrement: string
         -> unit

  member WriteTo:
           textWriter: System.IO.TextWriter
         * getStream: (position -> CharStream)
         * ?tabSize: int
         * ?columnWidth: int
         * ?initialIndentation: string * ?indentationIncrement: string
         -> unit

  member WriteTo:
           textWriter: System.IO.TextWriter
         * ?positionPrinter:
             (System.IO.TextWriter -> Position -> string -> int -> unit)
         * ?columnWidth: int
         * ?initialIndentation: string * ?indentationIncrement: string
         -> unit

  override Equals: obj -> bool
  override GetHashCode: unit -> int

6.7.2 Members

val expected: string -> ErrorMessageList

expected label creates an ErrorMessageList with a single Expected label message.

val expectedStringError: string -> ErrorMessageList

expectedStringError str creates an ErrorMessageList with a single ExpectedString str message.

val expectedStringCIError: string -> ErrorMessageList

expectedStringCIError str creates an ErrorMessageList with a single ExpectedStringCI str message.

val unexpected: string -> ErrorMessageList

unexpected label creates an ErrorMessageList with a single Unexpected label message.

val unexpectedStringError: string -> ErrorMessageList

unexpectedStringError str creates an ErrorMessageList with a single UnexpectedString str message.

val unexpectedStringCIError: string -> ErrorMessageList

unexpectedStringCIError str creates an ErrorMessageList with a single UnexpectedStringCI str message.

val messageError: string -> ErrorMessageList

messageError msg creates an ErrorMessageList with a single Message msg message.

val otherError: obj -> ErrorMessageList

otherError o creates an ErrorMessageList with a single OtherError o message.

val nestedError:
              CharStream<_> -> ErrorMessageList -> ErrorMessageList

nestedError stream msgs creates an ErrorMessageList with a single NestedError(stream.Position, stream.UserState, msgs) message, except if msgs is already an ErrorMessageList with a single NestedError message, in which case msgs is returned instead.

val compoundError:
    string -> CharStream<_> -> ErrorMessageList -> ErrorMessageList

compoundError label stream msgs creates an ErrorMessageList with a single CompoundError(label, stream.Position, stream.UserState, msgs) message, except if msgs is an ErrorMessageList with a single NestedError(pos2, ustate2, msgs2) message, in which case an ErrorMessageList with a single CompoundError(label, pos2, ustate2, msgs2) message is returned instead.

mergeErrors error1 error2 is an abbreviation for ErrorMessageList.Merge(error1, error2).

val isSingleErrorMessageOfType: ErrorMessageType -> ErrorMessageList

isSingleErrorMessageOfType ty msgs returns true if and only ifmsgs is an ErrorMessageList with a single ErrorMessage with the ErrorMessageType ty.

[<Sealed>]
type ParserError

ParserError is a simple container type for holding an ErrorMessageList together with its associated input stream position and user state.

The ParserError class has the following members:

new:   position: Position
     * userState: obj
     * messages: ErrorMessageList
    -> ParserError

Constructs a ParserError from an ErrorMessageList and its associated position.

member Position: Position

The input stream position of the parser error.

member UserState: obj

The user state associated with the parser error.

member Messages: ErrorMessageList

The error messages of the parser error.

override ToString: unit -> string

Is equivalent to

use sw = new System.IO.StringWriter()
WriteTo(sw)
sw.ToString()
member ToString: streamWhereErrorOccurred: CharStream -> string

Is equivalent to

use sw = new System.IO.StringWriter()
WriteTo(sw, streamWhereErrorOccurred)
sw.ToString()
member WriteTo:
         textWriter: System.IO.TextWriter
       * streamWhereErrorOccurred: CharStream
       * ?tabSize: int
       * ?columnWidth: int
       * ?initialIndentation: string * ?indentationIncrement: string
       -> unit

Is equivalent to

let getStream (pos: Position) =
    if pos.StreamName = Position.StreamName then streamWhereErrorOccurred
    else null

WriteTo(textWriter,
        getStream,
        ?tabSize = tabSize,
        ?columWidth = columnWidth,
        ?initialIndentation = initialIndentation,
        ?indentationIncrement = indentationIncrement)
member WriteTo:
         textWriter: System.IO.TextWriter
       * getStream: (position -> CharStream)
       * ?tabSize: int
       * ?columnWidth: int
       * ?initialIndentation: string * ?indentationIncrement: string
       -> unit

Writes a string representation of the ParserError to the given TextWriter value.

For each error getStream is called with the error position. The returned CharStream must be null or contain the content of the CharStream for which the error was generated (at the original indices).

If getStream returns a non‐null CharStream, the printed error position information is augmented with the line of text surrounding the error position, together with a ‘^’‐marker pointing to the exact location of the error in the input stream.

The tabSize parameter (default value: 8) specifies the tab stop distance that this method assumes when counting text columns. This parameter only has an effect for error positions where getStream returns a non‐null CharStream.

The columnWidth parameter (default value: 79) specifies the number of char columns that this method should try to fit its output to.

member WriteTo:
         textWriter: System.IO.TextWriter
       * ?positionPrinter:
           (System.IO.TextWriter -> Position -> string -> int -> unit)
       * ?columnWidth: int
       * ?initialIndentation: string * ?indentationIncrement: string
       -> unit

Writes a string representation of the ParserError to the given TextWriter value.

The format of the position information can be customized by specifying the positionPrinter argument. The given function is expected to print a representation of the passed Position value to the passed TextWriter value. If possible, it should indent text lines with the passed string and take into account the maximum column count (including indentation) passed as the last argument.

================================================ FILE: Doc/html/reference/errormessage.html ================================================ FParsec.ErrorMessage

6.8 FParsec.ErrorMessage

6.8.1 Interface

// FParsecCS.dll

namespace FParsec

type ErrorMessageType = Expected                        = 0
                      | ExpectedString                  = 1
                      | ExpectedCaseInsensitiveString   = 2
                      | Unexpected                      = 3
                      | UnexpectedString                = 4
                      | UnexpectedCaseInsensitiveString = 5
                      | Message                         = 6
                      | NestedError                     = 7
                      | CompoundError                   = 8
                      | Other                           = 9

type ErrorMessage =
  member Type: ErrorMessageType

  override Equals: obj -> bool
  override GetHashCode: unit -> int
  interface System.IEquatable<ErrorMessageList>

// nested types
type ErrorMessage.Expected =
  inherit ErrorMessage
  new: label: string -> ErrorMessage.Expected
  member Label: string

type ErrorMessage.ExpectedString =
  inherit ErrorMessage
  new: string -> ErrorMessage.ExpectedString
  member String: string

type ErrorMessage.ExpectedCaseInsensitiveString =
  inherit ErrorMessage
  new: string -> ErrorMessage.ExpectedCaseInsensitiveString
  member String: string

type ErrorMessage.Unexpected =
  inherit ErrorMessage
  new: label: string -> ErrorMessage.Unexpected
  member Label: string

type ErrorMessage.UnexpectedString =
  inherit ErrorMessage
  new: string -> ErrorMessage.UnexpectedString
  member String: string

type ErrorMessage.UnexpectedCaseInsensitiveString =
  inherit ErrorMessage
  new: string -> ErrorMessage.UnexpectedCaseInsensitiveString
  member String: string

type ErrorMessage.Message =
  inherit ErrorMessage
  new: string -> ErrorMessage.Message
  member String: string

type ErrorMessage.NestedError =
  inherit ErrorMessage

  new:   position: Position * userState: obj * messages: ErrorMessageList
      -> ErrorMessage.NestedError

  member Position:  Position
  member UserState: obj
  member Messages:  ErrorMessageList

type ErrorMessage.CompoundError =
  inherit ErrorMessage

  new:   labelOfCompound: string
       * nestedErrorPosition: Position
       * nestedErrorUserState: obj
       * nestedErrorMessages: ErrorMessageList
      -> ErrorMessage.CompoundError

  member LabelOfCompound: string
  member NestedErrorPosition: Position
  member NestedErrorUserState: obj
  member NestedErrorMessages: ErrorMessageList

type ErrorMessage.Other =
  inherit ErrorMessage
  new: data: obj -> ErrorMessage.Other
  member Data: obj

6.8.2 Remarks

ErrorMessage is the abstract base class for FParsec error messages. Parser functions return ErrorMessage values within an ErrorMessageList.

There are several subtypes of ErrorMessage that represent specific kind of error messages. These subtypes are defined as nested classes within ErrorMessage.

The active patterns and type abbreviations in the FParsec.Error module allow you to treat the ErrorMessage type almost as if it was defined as an F# discriminated union type.

6.8.3 Members

type ErrorMessage

ErrorMessage is the abstract base class for FParsec error messages.

type ErrorMessage =
  member Type: ErrorMessageType

  override Equals: obj -> bool
  override GetHashCode: unit -> int
  interface System.IEquatable<ErrorMessageList>

Please also see the remarks above.

type ErrorMessage.Expected

Parsers report this ErrorMessage when the input does not match the expected input.

type ErrorMessage.Expected =
  inherit ErrorMessage
  new: label: string -> ErrorMessage.Expected
  member Label: string

The string label describes the expected input.

This error message can be generated with the labeling operator <?>.

type ErrorMessage.ExpectedString

Parsers report this ErrorMessage when the input does not match an expected string constant.

type ErrorMessage.ExpectedString =
  inherit ErrorMessage
  new: string -> ErrorMessage.ExpectedString
  member String: string

This ErrorMessage is mainly generated by the pstring parser and its variants.

type ErrorMessage.ExpectedCaseInsensitiveString

Parsers report this ErrorMessage when the input does not match an expected case‐insensitive string constant.

type ErrorMessage.ExpectedCaseInsensitiveString =
  inherit ErrorMessage
  new: string -> ErrorMessage.ExpectedCaseInsensitiveString
  member CaseInsensitiveString: string

This ErrorMessage is mainly generated by the pstringCI parsers and its variants.

type ErrorMessage.Unexpected

Parsers report this ErrorMessage when they encounter some unexpected input.

type ErrorMessage.Unexpected =
  inherit ErrorMessage
  new: label: string -> ErrorMessage.Unexpected
  member Label: string

The string label describes the unexpected input.

This ErrorMessage is mainly generated by the notFollowedByL primitive.

type ErrorMessage.UnexpectedString

Parsers report this ErrorMessage when they encounter an unexpected string constant.

type ErrorMessage.UnexpectedString =
  inherit ErrorMessage
  new: string -> ErrorMessage.UnexpectedString
  member String: string

This ErrorMessage is mainly generated by the notFollowedByString parser.

type ErrorMessage.UnexpectedCaseInsensitiveString

Parsers report this ErrorMessage when they encounter an unexpected case‐insensitive string constant.

type ErrorMessage.UnexpectedCaseInsensitiveString =
  inherit ErrorMessage
  new: string -> ErrorMessage.UnexpectedCaseInsensitiveString
  member CaseInsensitiveString: string

This ErrorMessage is mainly generated by the notFollowedByStringCI parser.

type ErrorMessage.Message

Parsers report this ErrorMessage when an the error does not fit the other ErrorMessage types.

type ErrorMessage.Message =
  inherit ErrorMessage
  new: string -> ErrorMessage.Message
  member String: string

This error message can be generated with the fail and failFatally primitives.

type ErrorMessage.NestedError

Parsers report this ErrorMessage when they backtracked after an error occurred.

type ErrorMessage.NestedError =
  inherit ErrorMessage

  new:   position: Position * userState: obj * messages: ErrorMessageList
      -> ErrorMessage.NestedError

  member Position:  Position
  member UserState: obj
  member Messages:  ErrorMessageList

The Position property describes the stream position where the original error occurred that triggered the backtracking. The UserState property contains the user state value from before the backtracking (upcasted to obj). The Messages property contains the error messages of the original error.

This error message is mainly generated by the attempt, >>? and .>>? primitives.

type ErrorMessage.CompoundError

Parsers report this ErrorMessage when a “compound” failed to parse.

type ErrorMessage.CompoundError =
  inherit ErrorMessage

  new:   labelOfCompound: string
       * nestedErrorPosition: Position
       * nestedErrorUserState: obj
       * nestedErrorMessages: ErrorMessageList
      -> ErrorMessage.CompoundError

  member LabelOfCompound: string
  member NestedErrorPosition: Position
  member NestedErrorUserState: obj
  member NestedErrorMessages: ErrorMessageList

This error message is mainly generated by the compound‐labelling operator <??>.

type ErrorMessage.Other

User‐defined parsers can return this ErrorMessage to report application‐specific error data.

type ErrorMessage.Other =
  inherit ErrorMessage
  new: data: obj -> ErrorMessage.Other
  member Data: obj

To display OtherError values in error messages, you will have to define your own error printer, as ParserError.ToString/WriteTo ignores them.

================================================ FILE: Doc/html/reference/errormessagelist.html ================================================ FParsec.ErrorMessageList

6.9 FParsec.ErrorMessageList

Represents a list of error messages.

6.9.1 Interface

// FParsecCS.dll

namespace FParsec

[<Sealed; AllowNullLiteral>]
type ErrorMessageList =
  member Head: ErrorMessage
  member Tail: ErrorMessageList

  new: head: ErrorMessage -> ErrorMessageList
  new: head: ErrorMessage * tail: ErrorMessageList -> ErrorMessageList
  new: head: ErrorMessage * tailMessage: ErrorMessage -> ErrorMessageList

  static member Merge: ErrorMessageList * ErrorMessageList -> ErrorMessageList
  static member ToHashSet: ErrorMessageList -> HashSet<ErrorMessage>
  static member ToSortedArray: ErrorMessageList -> ErrorMessage[]

  override Equals: obj -> bool
  override GetHashCode: unit -> int
  interface System.IEquatable<ErrorMessageList>

6.9.2 Remarks

The ErrorMessageList represents a list of error messages in which the order of the messages carries no meaning and any duplicates and empty messages are ignored. Essentially, an ErrorMessageList is constructed as a singly‐linked list, but used as a set.

A null value represents an empty ErrorMessageList.

The ErrorMessage values in an ErrorMessageList are usually all associated with the same input stream position and user state. For example, the error messages returned by a parser in a Reply value describe an error at the CharStream position that is current when the parser returns.

In order to enforce set semantics in comparison operations, the ErrorMessageList overrides the Equals and GetHashCode.

6.9.3 Members

member Head: ErrorMessage

The first ErrorMessage in this list. This property is never null.

member Tail: ErrorMessageList

The remaining ErrorMessage values in this list after the first ErrorMessage.

If there are no remaining ErrorMessage values, this property is null.

new: head: ErrorMessage -> ErrorMessageList

Constructs a new ErrorMessageList with a single ErrorMessage value.

This constructor throws a NullReferenceException if head is null.

new: head: ErrorMessage * tail: ErrorMessageList -> ErrorMessageList

Constructs a new ErrorMessageList with Head set to head and Tail set to tail.

This constructor throws a NullReferenceException if head is null.

new: head: ErrorMessage * tailMessage: ErrorMessage -> ErrorMessageList

new ErrorMessageList(head, tailmessage) is equivalent to new ErrorMessageList(head, new ErrorMessageList(tailMessage)).

static member Merge: ErrorMessageList * ErrorMessageList -> ErrorMessageList

Creates a new ErrorMessageList that contains the ErrorMessage values from both argument lists.

The order of the ErrorMessage values in the newly created list is an implementation detail that you should not depend on.

static member ToHashSet: ErrorMessageList -> HashSet<ErrorMessage>

Converts the ErrorMessageList to a HashSet<ErrorMessageList>. Duplicate error messages and empty Expected..., Unexpected... and Message messages are filtered out when the list is converted to a set.

static member ToSortedArray: ErrorMessageList -> ErrorMessage[]

Converts the ErrorMessageList to a array that is sorted by a total order. Duplicate error messages and empty Expected..., Unexpected... and Message messages are filtered out when the list is converted to the array.

The order of the sorted array is an implementation detail and may change in the future.

================================================ FILE: Doc/html/reference/index.html ================================================ Reference
================================================ FILE: Doc/html/reference/operatorprecedenceparser.html ================================================ FParsec.OperatorPrecedenceParser

6.4 FParsec.OperatorPrecedenceParser

6.4.1 Interface

// FParsecCS.dll

namespace FParsec

type Associativity  = None  = 0
                    | Left  = 1
                    | Right = 2

type OperatorType  = Infix   = 0
                   | Prefix  = 1
                   | Postfix = 2


type Operator<'TTerm, 'TAfterString, 'TUserState>=
  member Type: OperatorType
  member Associativity: Associativity
  member Precedence: int

  member IsAssociative: bool
  member IsTernary: bool

  member String: string
  member TernaryRightString: string // null for non-ternary operators

// the following four types inherit from Operator<_,_,_>
type InfixOperator<'TTerm, 'TAfterString, 'TUserState> = // ...
type PrefixOperator<'TTerm, 'TAfterString, 'TUserState> = // ...
type PostfixOperator<'TTerm, 'TAfterString, 'TUserState> = // ...
type TernaryOperator<'TTerm, 'TAfterString, 'TUserState> = // ...


type OperatorPrecedenceParser<'TTerm, 'TAfterString, 'TUserState> =
  member ExpressionParser: Parser<'TTerm,'TUserState>
  member TermParser: Parser<'TTerm,'TUserState> with get, set

  member AddOperator: Operator<'TTerm, 'TAfterString, 'TUserState> -> unit

  member RemoveOperator: Operator<'TTerm, 'TAfterString, 'TUserState> -> bool
  member RemoveInfixOperator: string -> bool
  member RemovePrefixOperator: string -> bool
  member RemovePostfixOperator: string -> bool
  member RemoveTernaryOperator: string * string -> bool
  member Operators: seq<PrecedenceParserOp<'a,'u>>

  member OperatorConflictErrorFormatter:
    (   Position * Operator<'TTerm, 'TAfterString, 'TUserState> * 'TAfterString
     -> Position * Operator<'TTerm, 'TAfterString, 'TUserState> * 'TAfterString
     -> ErrorMessageList)
    with get, set

  member MissingTernary2ndStringErrorFormatter:
    (   Position * Position
      * TernaryOperator<'TTerm, 'TAfterString, 'TUserState> * 'TAfterString
     -> ErrorMessageList)
    with get, set

6.4.2 Members

type Operator<'TTerm, 'TAfterString, 'TUserState>

The Operator type represents an immutable operator definition for the OperatorPrecedenceParser<'TTerm, 'TAfterString, 'TUserState> (OPP) class.

[<ReferenceEquality>]
type Operator<'TTerm, 'TAfterString, 'TUserState> =
  member Type: OperatorType
  member Associativity: Associativity
  member Precedence: int

  member IsAssociative: bool
  member IsTernary: bool

  member String: string
  member TernaryRightString: string // null for non-ternary operators

The Operator class is the abstract base class of the InfixOperator, PrefixOperator, PostfixOperator and TernaryOperator classes. With these four concrete classes you can define binary infix (e.g. “1 + 1”), unary prefix (e.g. “‒1”), unary postfix (e.g. “1++”) and C‐style ternary operators (e.g. “a ? b : c”) for the OperatorPrecedenceParser (OPP) class.

If you have look at the constructors for the concrete operator classes, you’ll see that operators are constructed from an operator string, an “after‐string‐parser”, a precedence level, an associativity value and a mapping function that is applied after the expression is parsed.

Ternary operators are treated as special infix operators and require a string and associated after‐string‐parser parser for each of the two operator parts.

Associativity and precedence

While infix operators can be left‐, right‐ and non‐associative (see the Associativity type), prefix and postfix operators can only be associative (true) or non‐associative (false). See below for details on how precedence and associativity influence the operator precedence parser.

Textual representation of operators

The operator string and the after‐string‐parser determine the textual representation of an operator. Usually, the after‐string‐parser is used for parsing the whitespace after an operator string.

OPP instances have separate “namespaces” for prefix operators on the one hand and infix, postfix or ternary operators on the other hand. Hence, you can configure an OPP instance to recognize a prefix operator with the same string as the (first) string of an infix, postfix or ternary operator. However, no two prefix operators and no two infix, postfix or ternary operators can have the same (first) string. The second string of a ternary operator cannot be used for any other operator at the same time.

The OPP class parses operator strings greedily. This means, for example, that if you define a prefix operator with the string "-" and another prefix operator with the string "--", then the input -- in a prefix location will always be parsed as a -- operator, never as two successive - operators.

How the OPP applies the after‐string‐parser

If the OPP encounters the operator string in the input, it will apply the after‐string‐parser directly after the operator string. If the after‐string‐parser succeeds, the operator will be accepted. If the after‐string‐parser fails without consuming input (or changing the parser state any another way), the OPP will backtrack to before the operator string and will not try to parse any other operator at this location. If the after‐string‐parser parser fails after consuming input, the OPP will itself fail with this error.

This backtracking behaviour can be exploited to conditionally accept an operator depending on the input following the operator string. For example, the after‐string‐parser definition in PrefixOperator("not", notFollowedBy letter >>. spaces, 1, true, (* ... *)) will ensure that the "not" in "notAnOperator" cannot be parsed as an operator.

The mapping function argument of the operator constructors

When an OPP instance has finished parsing a sub‐expresssion involving an operator, it uses the mapping function supplied as the last argument to the operator constructor to map the parsed term(s) to a new term. Usually this mapping function constructs an AST node or directly transforms the terminal values.

The operator classes InfixOperator, PrefixOperator, etc. all support two alternative types of mapping functions. The simpler type of mapping function only gets passed the parsed term(s). The other type of mapping function also gets passed the result(s) of the after‐string‐parser(s).

More uses of the after‐string‐parser

The combination of individually configurable after‐string‐parsers and mapping functions make the OPP class quite flexible in addressing various practical parsing needs.

One use of the after‐string‐parser is discussed in the user’s guide section on parsing F# infix operators.

Another use is demonstrated in the following example. It shows how you can use the after‐string‐parser to get hold of the precise text location of the parsed operator (which is often useful for diagnostic purposes in your application):

open FParsec
open FParsec.Primitives
open FParsec.CharParsers

let opp = new OperatorPrecedenceParser<_,_,_>()

let ws = spaces

type Assoc = Associativity

let adjustPosition offset (pos: Position) =
    Position(pos.StreamName, pos.Index + int64 offset,
             pos.Line, pos.Column + int64 offset)

// To simplify infix operator definitions, we define a helper function.
let addInfixOperator str prec assoc mapping =
    let op = InfixOperator(str, getPosition .>> ws, prec, assoc, (),
                           fun opPos leftTerm rightTerm ->
                               mapping
                                   (adjustPosition -str.Length opPos)
                                   leftTerm rightTerm)
    opp.AddOperator(op)

// Of course, you can define similar functions for other operator types.

// With the helper function in place, you can define an operator with
// a mapping function that gets passed the text location of the
// parsed operator as the first argument.
addInfixOperator "+" 1 Assoc.Left (fun opPos leftTerm rightTerm -> (* ... *))


Members of Operator<'TTerm, 'TAfterString, 'TUserState>:

member Type: OperatorType

The operator’s type: Infix, Prefix or Postfix.

Ternary operators are treated as special infix operators.

member Associativity: Associativity

The operator’s associativity: None, Left or Right.

For associative prefix operators this value is Associativity.Right, for associative postfix operators this value is Associativity.Left.

member Precedence: int

The operator’s precedence value. The value is always greater than zero. Operators with a numerically higher precedence value take precedence over operators with lower precedence values.

member IsAssociative: bool

Is equivalent to Associativity != Associativity.None.

member IsTernary: bool

Indicates whether the operator is a TernaryOperator.

member String: string

The operator’s string specified during construction.

For ternary operators this property returns the left string.

member TernaryRightString: string

The right string of a TernaryOperator.

For non‐ternary operators this property is null.

type InfixOperator<'TTerm, 'TAfterString, 'TUserState>

The InfixOperator<'TTerm, 'TAfterString, 'TUserState> type represents a binary infix operator definition (e.g. the + in 1 + 1) for the OperatorPrecedenceParser class.

type InfixOperator<'TTerm, 'TAfterString, 'TUserState> =
  inherit Operator<'TTerm, 'TAfterString, 'TUserState>

  new:   operatorString: string
       * afterStringParser: Parser<'TAfterString,'TUserState>
       * precedence: int
       * associativity: Associativity
       * mapping: 'TTerm -> 'TTerm -> 'TTerm
      -> InfixOperator<'TTerm, 'TAfterString, 'TUserState>

  new:   operatorString: string
       * afterStringParser: Parser<'TAfterString,'TUserState>
       * precedence: int
       * associativity: Associativity
       * dummy: unit // disambiguates overloads in F#
       * mapping: 'TAfterString -> 'TTerm -> 'TTerm -> 'TTerm
      -> InfixOperator<'TTerm, 'TAfterString, 'TUserState>

The two constructors only differ in the type of the mapping they accept. To help F#’s type inference discern both constructors, the second constructor accepts an additional dummy argument.

Please see the documentation for the Operator base class for more information.

type PrefixOperator<'TTerm, 'TAfterString, 'TUserState>

The PrefixOperator<'TTerm, 'TAfterString, 'TUserState> type represents a unary prefix operator definition (e.g. the - in -1) for the OperatorPrecedenceParser class.

type PrefixOperator<'TTerm, 'TAfterString, 'TUserState> =
  inherit Operator<'TTerm, 'TAfterString, 'TUserState>

  new:   operatorString: string
       * afterStringParser: Parser<'TAfterString,'TUserState>
       * precedence: int
       * isAssociative: bool
       * mapping: 'TTerm -> 'TTerm
      -> PrefixOperator<'TTerm, 'TAfterString, 'TUserState>

  new:   operatorString: string
       * afterStringParser: Parser<'TAfterString,'TUserState>
       * precedence: int
       * isAssociative: bool
       * dummy: unit // disambiguates overloads in F#
       * mapping: 'TAfterString -> 'TTerm -> 'TTerm
      -> PrefixOperator<'TTerm, 'TAfterString, 'TUserState>

The two constructors only differ in the type of the mapping they accept. To help F#’s type inference discern both constructors, the second constructor accepts an additional dummy argument.

Please see the documentation for the Operator base class for more information.

type PostfixOperator<'TTerm, 'TAfterString, 'TUserState>

The PostfixOperator<'TTerm, 'TAfterString, 'TUserState> type represents a unary postfix operator definition (e.g. the ++ in 1++) for the OperatorPrecedenceParser class.

type PostfixOperator<'TTerm, 'TAfterString, 'TUserState> =
  inherit Operator<'TTerm, 'TAfterString, 'TUserState>

  new:   operatorString: string
       * afterStringParser: Parser<'TAfterString,'TUserState>
       * precedence: int
       * isAssociative: bool
       * mapping: 'TTerm -> 'TTerm
      -> PostfixOperator<'TTerm, 'TAfterString, 'TUserState>

  new:   operatorString: string
       * afterStringParser: Parser<'TAfterString,'TUserState>
       * precedence: int
       * isAssociative: bool
       * dummy: unit // disambiguates overloads in F#
       * mapping: 'TAfterString -> 'TTerm -> 'TTerm
      -> PostfixOperator<'TTerm, 'TAfterString, 'TUserState>

The two constructors only differ in the type of the mapping they accept. To help F#’s type inference discern both constructors, the second constructor accepts an additional dummy argument.

Please see the documentation for the Operator base class for more information.

type TernaryOperator<'TTerm, 'TAfterString, 'TUserState>

The TernaryOperator<'TTerm, 'TAfterString, 'TUserState> type represents a C‐style ternary operator definition (e.g. the ? : in a ? b : c) for the OperatorPrecedenceParser class.

type TernaryOperator<'TTerm, 'TAfterString, 'TUserState> =
  inherit Operator<'TTerm, 'TAfterString, 'TUserState>

  new:   leftString: string
       * afterLeftStringParser: Parser<'TAfterString,'TUserState>
       * rightString: string
       * afterRightStringParser: Parser<'TAfterString,'TUserState>
       * precedence: int
       * associativity: Associativity
       * mapping: 'TTerm -> 'TTerm -> 'TTerm -> 'TTerm
      -> TernaryOperator<'TTerm, 'TAfterString, 'TUserState>

  new:   operatorString: string
       * afterStringParser: Parser<'TAfterString,'TUserState>
       * precedence: int
       * isAssociative: bool
       * dummy: unit // disambiguates overloads in F#
       * mapping:   'TAfterString -> 'TAfterString -> 'TTerm -> 'TTerm -> 'TTerm
                 -> 'TTerm
      -> TernaryOperator<'TTerm, 'TAfterString, 'TUserState>

The two constructors only differ in the type of the mapping they accept. To help F#’s type inference discern both constructors, the second constructor accepts an additional dummy argument.

Please see the documentation for the Operator base class for more information.

type OperatorPrecedenceParser<'TTerm, 'TAfterString, 'TUserState>

The OperatorPrecedenceParser class (OPP) represents a dynamically configurable parser for parsing expression grammars involving binary infix (e.g. 1 + 1), unary prefix (e.g. -1), unary postfix (e.g. 1++) and C‐style ternary operators (e.g. a ? b : c).

You can configure an OPP instance by adding and removing operator definitions in the form of Operator values. If you add an operator that conflicts with a previous operator definition, AddOperator will raise an ArgumentException. The Operators property returns a snapshot of the currently defined set of operators. The RemoveInfixOperator, RemovePrefixOperator, etc. members remove operator definitions based only on their text representation. All Remove... members return false if no matching operator was previously defined, otherwise true.

The actual expression parser of the OPP is exposed through the ExpressionParser property. The ExpressionParser value is a constant closure that forwards all work to internal instance methods. This ensures that the behaviour of the expression parser always reflects the latest configuration of the OPP instance. You can safely call the ExpressionParser concurrently from multiple threads, as long as the configuration of the OPP instance is not changed at the same time.

Before you can call the ExpressionParser you first need to set the TermParser. The OPP instance uses the TermParser to parse the terms in between the operators. Often the TermParser will not just parse terminal values but will also recursively call the ExpressionParser, for example to parse an expression between parentheses. Note that the TermParser also needs to consume any trailing whitespace.

This example shows how to define a parser for very simple arithmetic expressions:

open FParsec
open FParsec.Primitives
open FParsec.CharParsers

let ws = spaces
let str_ws s = pstring s >>. ws

let opp = new OperatorPrecedenceParser<float,unit,unit>()
let expr = opp.ExpressionParser
let term = (pfloat .>> ws) <|> between (str_ws "(") (str_ws ")") expr
opp.TermParser <- term

type Assoc = Associativity

opp.AddOperator(InfixOperator("+", ws, 1, Assoc.Left, fun x y -> x + y))
opp.AddOperator(InfixOperator("*", ws, 2, Assoc.Left, fun x y -> x * y))
> run expr "1 + 2*(3 + 4)";;
val it : ParserResult<float,unit> = Success: 15.0

The following points explain how expressions are parsed depending on precedence and associativity of the involved operators:

  • Operators with higher precedence bind tighter. For example, if the prefix operator “~” has a lower precedence than the infix operator “&” then “~x&y” will be parsed as “~(x&y)”.
  • Ternary operators are treated as special infix operators. The middle expression (e.g. “expr2” in “expr1 ? expr2 : expr3”) is parsed as a “fresh” expression that is not influenced by the precedence of the surrounding operators.
  • Operators with identical precedence are parsed as follows:

    Here o1,   o2   are two infix operators,
         pre1, pre2 are two prefix operators,
         po1,  po2  are two postfix operators
    and all operators have identical precedence.
    
    pre1 x o1 y  ==>  (pre1 x) o1 y
    x o1 y po1   ==>  x o1 (y po1)
    x o1 y o2 z  ==>  (x o1 y) o2 z  if o1 and o2 are left-associative
    x o1 y o2 z  ==>  x o1 (y o2 z)  if o1 and o2 are right-associative
    pre1 x po1   ==>  (pre1 x) po1   if pre1 or po1  is associative
    pre1 pre2 x  ==>  pre1 (pre2 x)  if pre1 or pre2 is associative
    x po1 po2    ==>  (x po1) po2    if po1  or po2  is associative
      
  • If the parser encounters conflicting operators, e.g. if a right‐associative infix operators follows a left‐associative operator with the same precedence level, the OPP fails and returns with an error generated with the help of the OperatorConflictErrorFormatter.

    In the following situations the OPP will fail with an operator conflict error:

    [Same notation as above, all operators have identical precedence.]
    
    x o1 y o2 z  if o1 and o2 have different associativity
                 or o1 and o2 are non-associative
    pre1 pre2 x  if pre1 and pre2 are non-associative
    pre1 x po1   if pre1 and po1  are non-associative
    x po1 po2    if po1  and po2  are non-associative
    

    By giving all operators different precedence levels and making all operators associative, you can exclude any possible operator conflict. A practical reason for defining operators that can lead to conflicts in the inputs (e.g. non‐associative operators) is to force the user to explicitely parenthesize an expression involving such operators.


Members of OperatorPrecedenceParser<'TTerm, 'TAfterString, 'TUserState>:

member ExpressionParser: Parser<'TTerm,'TUserState>

The expression parser. This is a constant closure that forwards all work to internal instance methods, so that the behaviour of the expression parser always reflects the latest configuration of the OPP instance.

You can safely call the ExpressionParser concurrently from multiple threads, as long as the configuration of the OperatorPrecedenceParser instance is not changed at the same time.

member TermParser: Parser<'TTerm,'TUserState> with get, set

This parser is called to parse the terms in between the operators. There is no default, so you must set this parser before you can call the ExpressionParser. Note that the term parser is also expected to parse any whitespace after a term.

member AddOperator: Operator<'TTerm, 'TAfterString, 'TUserState> -> unit

Adds an operator to the grammar. Raises an ArgumentException if the operator definition conflicts with a previous definition.

member RemoveOperator: Operator<'TTerm, 'TAfterString, 'TUserState> -> bool

Removes the given Operator instance from the grammar. Returns false if the Operator instance was not previously registered, otherwise true.

member RemoveInfixOperator: string -> bool

Removes the InfixOperator with the given string from the grammar. Returns false if no infix operator with that string was previously registered, otherwise true.

member RemovePrefixOperator: string -> bool

Removes the PrefixOperator with the given string from the grammar. Returns false if no prefix operator with that string was previously registered, otherwise true.

member RemovePostfixOperator: string -> bool

Removes the PostfixOperator with the given string from the grammar. Returns false if no postfix operator with that string was previously registered, otherwise true.

member RemoveTernaryOperator: string * string -> bool

Removes the TernaryOperator with the given left and right strings from the grammar. Returns false if no ternary operator with these strings was previously registered, otherwise true.

member Operators: seq<PrecedenceParserOp<'a,'u>>

Returns a sequence with a snapshot of the operators currently registered with the OperatorPrecedenceParser.

member OperatorConflictErrorFormatter:
  (   Position * Operator<'TTerm, 'TAfterString, 'TUserState> * 'TAfterString
   -> Position * Operator<'TTerm, 'TAfterString, 'TUserState> * 'TAfterString
   -> ErrorMessageList)
  with get, set

The OperatorConflictErrorFormatter function is called by the OPP instance when it encounters conflicting operators in the input. The two passed tuples contain the stream positions, operator definitions and the after‐string‐parser values for the two conflicting operators. The returned ErrorMessageList will become part of the error messages returned by the OPP’s ExpressionParser.

You can set this formatter to customize the error messages generated when the OPP instance encounters conflicting operators in the inputs. Of course, if your operator grammar doesn’t allow for conflicting operators in the input, the OperatorConflictErrorFormatter will never be called and there’s no need to customize it. The user’s guide section on parsing F# infix operators contains an example with a custom OperatorConflictErrorFormatter.

member MissingTernary2ndStringErrorFormatter:
  (   Position * Position
    * TernaryOperator<'TTerm, 'TAfterString, 'TUserState> * 'TAfterString
   -> ErrorMessageList)
  with get, set

The MissingTernary2ndStringErrorFormatter function is called by the OPP instance when it can’t parse the second operator string of a C‐style ternary operator (e.g. the : in a ? b : c). The passed tuple contains (in order) the position of the first operator string, the position where the the second string was expected, the operator definition and the after‐string‐parser value for the left operator part. The returned ErrorMessageList will become part of the error messages returned by the OPP’s ExpressionParser.

================================================ FILE: Doc/html/reference/parser-overview.html ================================================ Parser overview

6.1 Parser overview

Table 6.1.1: Parsing single chars
Parser Description
pchar c
(variants: skipChar, charReturn)
Parses the char c.
anyChar
(variant: skipAnyChar)
Parses any one char.
satisfy f
(variants: (skipS|s)atisfy[L])
Parses any one char for which the predicate function f returns true.
anyOf str
(variant: skipAnyOf)
Parses any one char in the string str.
noneOf str
(variant: skipNoneOf)
Parses any one char not in the string str.
letter
(variants: lower, upper)
Parses any one unicode letter char identified by System.Char.IsLetter.
asciiLetter
(variants: asciiLower, asciiUpper)
Parses any one char in the range 'a''z' and 'A''Z'.
digit
(variants: hex, octal)
Parses any one char in the range '0''9'.
Table 6.1.2: Parsing strings directly
Parser Description
pstring str
(variants: skipString, stringReturn)
Parses the string str.
pstringCI str
(variants: skipStringCI, stringCIReturn)
Parses any string that case‐insensitively matches the string str.
anyString n
(variants: skipAnyString)
Parses any sequence of n chars.
restOfLine skipNewline
(variant: skipRestOfLine)
Parses any chars before the end of the line and, if skipNewline is true, skips to the beginning of the next line (if there is one).
charsTillString str skipString nMax
(variants: charsTillStringCI, skipCharsTillString[CI])
Parses all chars before the first occurance of the string str and, if skipString is true, skips over str. Fails if more than nMax chars come before str.
manySatisfy f
(variant: skipManySatisfy)
Parses a sequence of zero or more chars that satisfy the predicate function f (i.e. chars for which f returns true).
manySatisfy2 f1 f
(variant: skipManySatisfy2)
Parses a sequence of zero or more chars, where the first char must satisfy the predicate function f1 and the remaining chars must satisfy f.
many1Satisfy f
(variants: (skipM|m)any1Satisfy[2][L])
Parses a sequence of one or more chars that satisfy the predicate function f.
manyMinMaxSatisfy nMin nMax f
(variants: (skipM|m)anyMinMaxSatisfy[2][L])
Parses a sequence of nMin or more chars that satisfy the predicate function f, but not more than nMax chars.
regex pattern Parses a sequence of one or more chars matched by the .NET regular expression string pattern.
identifier options Parses a Unicode identifier.
Table 6.1.3: Parsing strings with the help of other parsers
Parser Description
manyChars cp
(variants: manyChars2)
Parses a sequence of zero or more chars with the char parser cp.
many1Chars cp
(variants: many1Chars2)
Parses a sequence of one or more chars with the char parser cp.
manyCharsTill cp endp
(variants: manyCharsTill[Apply][2])
Parses chars with the char parser cp until the parser endp succeeds. Stops after endp.
manyStrings sp
(variant: many[1]Strings[2])
Parses a sequence of zero or more strings with the parser sp. Returns the parsed strings in concatenated form.
stringsSepBy sp sep Parses a sequence of zero or more occurrences of sp separated by sep. Returns the strings parsed with sp and sep in concatenated form.
skipped p Applies the parser p. Returns the chars skipped over by p as a string.
p |> withSkippedString f Applies the parser p. Returns f str x, where str is the string skipped over by p and x is the result returned by p.
Table 6.1.4: Parsing numbers
Parser Description
pfloat Parses a double‐precision floating‐point number.
pint64
(variants: pint(8|16|32))
Parses a 64‐bit signed integer.
puint64
(variants: puint(8|16|32))
Parses a 64‐bit unsigned integer.
numberLiteral options label Parses a number literal and returns the result in form of a NumberLiteral value.
Table 6.1.5: Parsing whitespace
Parser Description
newline
(variants: skipNewline, newlineReturn, unicodeNewline)
Parses a newline ("\n", "\r\n" or "\r"). Returns '\n'.
unicodeNewline
(variants: skipUnicodeNewline, unicodeNewlineReturn)
Parses a Unicode newline ("\n", "\r\n", "\r", "\u0085", "\u2028" or "\u2029"). Returns '\n'.
spaces
(variant: spaces1)
Skips over any sequence of whitespace chars (' ', '\t' or a newline).
unicodeSpaces
(variant: unicodeSpaces1)
Skips over any sequence of Unicode whitespace chars and recognizes ("\n", "\r\n", "\r", "\u0085", "\u2028" and "\u2029") as newlines.
eof Only succeeds at the end of the input.
Table 6.1.6: Chaining and piping parsers
Parser Description
preturn x Returns x.
p >>% x Applies the parser p. Returns x.
p |>> f Applies the parser p. Returns f x, where x is the result returned by p.
p1 >>. p2 Applies the parsers p1 and p2 in sequence. Returns the result of p2.
p1 .>> p2 Applies the parsers p1 and p2 in sequence. Returns the result of p1.
p1 .>>. p2 Applies the parsers p1 and p2 in sequence. Returns the results in a tuple.
between pBegin pEnd p Applies the parsers pBegin, p and pEnd in sequence. Returns the result of p.
pipe2 p1 p2 f
(variants: pipe(3|4|5)
Applies the parsers p1 and p2 in sequence. Returns f x1 x2, where x1 and x2 are the results returned by p1 and p2.
p >>= f First applies the parser p, then applies the function f to the result returned by p and finally applies the parser returned by f.
Table 6.1.7: Parsing sequences
Parser PEG Description
tuple2 p1 p2
(variants: tuple(3|4|5))
p1 p2 Applies the parsers p1 and p2 in sequence. Returns the results in a tuple.
parray n p
(variants: skipArray)
Parses n occurrences of p. Returns the results in an array.
many p
(variant: skipMany)
p* Parses zero or more occurrences of p. Returns the results in a list.
many1 p
(variant: skipMany1)
p+ Parses one or more occurrences of p. Returns the results in a list.
sepBy p sep
(variants: sepBy1, skipSepBy[1])
(p (sep p)*)? Parses zero or more occurrences of p, separated by sep. Returns the results in a list.
sepEndBy p sep
(variants: sepEndBy1, skipSepEndBy[1])
(p (sep p)* sep?)? Parses zero or more occurrences of p, separated and optionally ended by sep. Returns the results in a list.
manyTill p endp
(variants: many1Till, skipMany[1]Till)
(!endp p)* endp Parses zero or more occurrences of p for as long as endp does not succeed. Stops after endp succeeded. Returns the results returned by p in a list.
chainl1 p op
(variants: chain(l|r)[1])
p (op p)* Parses one or more occurrences of p, separated by sep. Returns f_n (... (f_2 (f_1 x_1 x_2) x_3) ...) x_n+1, where f_1 to f_n are the functions returned by the parser op and x_1 to x_n+1 are the values returned by p.
Table 6.1.8: Parsing alternatives and recovering from errors
Parser Description
p1 <|> p2 Parses p1 or p2. The parser p2 is only tried if p1 fails with a non‐fatal error and without changing the parser state. The stream position is part of the parser state, so if p1 fails after consuming input, p2 will not be tried.
choice ps
(variant: choiceL)
Is equivalent to p1 <|> p2 <|> ... <|> pn <|> pzero, where p1pn are the parsers in the sequence ps.
p <|>% x Parses p or returns x. Is equivalent to p1 <|> preturn x.
opt p
(variant: optional)
Parses an optional occurrence of p as an option value. Is equivalent to (p |>> Some) <|>% None
attempt p Parses p. If p fails after changing the parser state, attempt p will backtrack to the original parser state before reporting a (non‐fatal) error. Thus, attempt p1 <|> p2 will continue to try to parse p2 even if p1 fails after consuming input.
p1 >>? p2
(variants: .>>?, .>>.?, >>=?)
Behaves like p1 >>. p2, but will backtrack to the beginning if p2 fails with a non‐fatal error and with an unchanged parser state, even if p1 has changed the parser state.
Table 6.1.9: Conditional parsing and looking ahead
Parser Description
notEmpty p Behaves like p, but fails when p succeeds without consuming input or changing the parser state in any other way.
followedBy p
(variant: notFollowedBy)
Succeeds without changing the parser state if the parser p succeeds at the current position.
followedByL p label
(variant: notFollowedByL)
Behaves like followedBy p, but uses the string label to generate a more descriptive error message in case p fails. The string label should describe p.
notFollowedByEof Is an optimized version of notFollowedByL eof "end of input".
followedByString str
(variants: (notF|f)ollowedByString[CI])
Is an optimized version of followedByL (pstring str) ("'" + str + "'").
nextCharSatisfies f
(variants: next2CharsSatisfy, previousCharSatisfies)
Is an optimized version of followedBy (satisfy f).
nextCharSatisfiesNot f
(variants: next2CharsSatisfyNot, previousCharSatisfiesNot)
Is an optimized version of notFollowedBy (satisfy f).
lookAhead p Parses p and restores the original parser state afterwards.
Table 6.1.10: Customizing error messages
Parser Description
p <?> label Applies the parser p. If p does not change the parser state (usually because p failed), the error messages are replaced with expectedError label. The string label should describe p.
p <??> label Behaves like p <?> label, but when p fails after changing the parser state, a CompoundError message is generated with both the given label and the error messages generated by p.
fail msg Always fails with a messageError msg. The string msg will be displayed together with other error messages generated for the same input position.
failFatally msg Always fails with a messageError msg. Returns with a FatalError, so that no error recovery is attempted (except via backtracking constructs).
Table 6.1.11: User state handling and getting the input stream position
Parser Description
getUserState Returns the current user state.
setUserState u Sets the user state to u.
updateUserState f Sets the user state to f u, where u is the current user state.
userStateSatisfies f Succeeds if the current user state satisfies the predicate function f.
getPosition Returns the current position in the input stream.
================================================ FILE: Doc/html/reference/position.html ================================================ FParsec.Position

6.10 FParsec.Position

// FParsecCS.dll

namespace FParsec

type Position =
  new: streamName: string * index: int64 * line: int64 * column: int64 -> Position

  member StreamName: string
  member Index: int64
  member Line: int64
  member Column: int64

  override ToString: unit -> string

  interface System.IEquatable<Position>
  interface System.IComparable<Position>
  interface System.IComparable
================================================ FILE: Doc/html/reference/primitives.html ================================================ FParsec.Primitives

6.2 FParsec.Primitives

6.2.1 Interface

// FParsec.dll

[<AutoOpen>] // module is automatically opened when FParsec namespace is opened
module FParsec.Primitives

[<Literal>] val Ok:         ReplyStatus = ReplyStatus.Ok
[<Literal>] val Error:      ReplyStatus = ReplyStatus.Error
[<Literal>] val FatalError: ReplyStatus = ReplyStatus.FatalError


type Parser<'TResult, 'TUserState> = CharStream<'TUserState> -> Reply<'TResult>


// Two basic primitives that are only seldomly directly used in user code:
val preturn: 'a -> Parser<'a,'u>
val pzero: Parser<'a,'u>


// Chaining and piping parsers
// ==============================
val (>>=):  Parser<'a,'u> -> ('a -> Parser<'b,'u>) -> Parser<'b,'u>

val (>>%):   Parser<'a,'u> -> 'b -> Parser<'b,'u>
val (>>.):   Parser<'a,'u> -> Parser<'b,'u> -> Parser<'b,'u>
val (.>>):   Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a,'u>
val (.>>.):  Parser<'a,'u> -> Parser<'b,'u> -> Parser<('a * 'b),'u>
val between: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'c,'u>

val (|>>): Parser<'a,'u> -> ('a -> 'b) -> Parser<'b,'u>

val pipe2: Parser<'a,'u> -> Parser<'b,'u> -> ('a -> 'b -> 'c) -> Parser<'c,'u>

val pipe3: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u>
           -> ('a -> 'b -> 'c -> 'd) -> Parser<'d,'u>

val pipe4: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u>
           -> ('a -> 'b -> 'c -> 'd -> 'e) -> Parser<'e,'u>

val pipe5: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u>
           -> Parser<'e,'u>
           -> ('a -> 'b -> 'c -> 'd -> 'e -> 'f) -> Parser<'f,'u>


// Parsing alternatives and recovering from errors
// ===============================================
val (<|>):       Parser<'a,'u>  -> Parser<'a,'u> -> Parser<'a,'u>
val choice:  seq<Parser<'a,'u>> -> Parser<'a,'u>
val choiceL: seq<Parser<'a,'u>> -> string -> Parser<'a,'u>

val (<|>%):   Parser<'a,'u> -> 'a -> Parser<'a,'u>
val opt:      Parser<'a,'u> -> Parser<'a option,'u>
val optional: Parser<'a,'u> -> Parser<unit,'u>

val attempt: Parser<'a,'u> -> Parser<'a,'u>

val (>>=?): Parser<'a,'u> -> ('a -> Parser<'b,'u>) -> Parser<'b,'u>
val (>>?):  Parser<'a,'u> -> Parser<'b,'u> -> Parser<'b,'u>
val (.>>?): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a,'u>
val (.>>.?): Parser<'a,'u> -> Parser<'b,'u> -> Parser<('a * 'b),'u>


// Conditional parsing and looking ahead
// =====================================
val notEmpty: Parser<'a,'u> -> Parser<'a,'u>

val followedBy:     Parser<'a,'u> -> Parser<unit,'u>
val followedByL:    Parser<'a,'u> -> string -> Parser<unit,'u>
val notFollowedBy:  Parser<'a,'u> -> Parser<unit,'u>
val notFollowedByL: Parser<'a,'u> -> string -> Parser<unit,'u>

val lookAhead: Parser<'a,'u> -> Parser<'a,'u>


// Customizing error messages
// ==========================
val (<?>):  Parser<'a,'u> -> string -> Parser<'a,'u>
val (<??>): Parser<'a,'u> -> string -> Parser<'a,'u>

val fail:        string -> Parser<'a,'u>
val failFatally: string -> Parser<'a,'u>


// Parsing sequences
// =================
val tuple2: Parser<'a,'u> -> Parser<'b,'u> -> Parser<('a * 'b),'u>
val tuple3: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u>
            -> Parser<('a * 'b * 'c),'u>
val tuple4: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u>
            -> Parser<('a * 'b * 'c * 'd),'u>
val tuple5: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u>
            -> Parser<'e,'u> -> Parser<('a * 'b * 'c * 'd * 'e),'u>

val parray:    int -> Parser<'a,'u> -> Parser<'a[],'u>
val skipArray: int -> Parser<'a,'u> -> Parser<unit,'u>

val many:      Parser<'a,'u> -> Parser<'a list,'u>
val many1:     Parser<'a,'u> -> Parser<'a list,'u>
val skipMany:  Parser<'a,'u> -> Parser<unit,'u>
val skipMany1: Parser<'a,'u> -> Parser<unit,'u>

val sepBy:         Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u>
val sepBy1:        Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u>
val skipSepBy:     Parser<'a,'u> -> Parser<'b,'u> -> Parser<unit,'u>
val skipSepBy1:    Parser<'a,'u> -> Parser<'b,'u> -> Parser<unit,'u>

val sepEndBy:      Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u>
val sepEndBy1:     Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u>
val skipSepEndBy:  Parser<'a,'u> -> Parser<'b,'u> -> Parser<unit,'u>
val skipSepEndBy1: Parser<'a,'u> -> Parser<'b,'u> -> Parser<unit,'u>

val manyTill:      Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u>
val many1Till:     Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u>
val skipManyTill:  Parser<'a,'u> -> Parser<'b,'u> -> Parser<unit,'u>
val skipMany1Till: Parser<'a,'u> -> Parser<'b,'u> -> Parser<unit,'u>

[<CompilationRepresentationFlags.Static>]
type Inline =
  static member inline Many: stateFromFirstElement: ('T -> 'State)
                           * foldState: ('State -> 'T -> 'State)
                           * resultFromState: ('State -> 'Result)
                           * elementParser: Parser<'T,'U>
                           * ?firstElementParser: Parser<'T,'U>
                           * ?resultForEmptySequence: (unit -> 'Result)
                          -> Parser<'Result,'U>

  static member inline SepBy: stateFromFirstElement: ('T -> 'State)
                            * foldState: ('State -> 'Separator -> 'T -> 'State)
                            * resultFromState: ('State -> 'Result)
                            * elementParser: Parser<'T,'U>
                            * separatorParser: Parser<'Separator,'U>
                            * ?firstElementParser: Parser<'T,'U>
                            * ?resultForEmptySequence: (unit -> 'Result)
                            * ?separatorMayEndSequence: bool
                           -> Parser<'Result,'U>

  static member inline ManyTill: stateFromFirstElement: ('T -> 'State)
                               * foldState: ('State -> 'T -> 'State)
                               * resultFromStateAndEnd: ('State -> 'E -> 'Result)
                               * elementParser: Parser<'T,'U>
                               * endParser: Parser<'E,'U>
                               * ?firstElementParser: Parser<'T,'U>
                               * ?resultForEmptySequence: ('E -> 'Result)
                              -> Parser<'Result,'U>

val chainl1: Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u>       -> Parser<'a,'u>
val chainl:  Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u> -> 'a -> Parser<'a,'u>
val chainr1: Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u>       -> Parser<'a,'u>
val chainr:  Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u> -> 'a -> Parser<'a,'u>


// Building parsers using F#'s computation expression syntax
type ParserCombinator = // ...
val parse: ParserCombinator

// Building mutually recursive parser values
val createParserForwardedToRef: unit -> Parser<'a,'u> * Parser<'a,'u> ref

6.2.2 Members

[<Literal>] val Ok: ReplyStatus = ReplyStatus.Ok

This ReplyStatus value indicates that a parser succeeded.

[<Literal>] val Error: ReplyStatus = ReplyStatus.Error

This ReplyStatus value indicates that a parser failed.

[<Literal>] val FatalError: ReplyStatus = ReplyStatus.FatalError

This ReplyStatus value indicates that a parser failed and no error recovery (except after backtracking) should be tried.

type Parser<'TResult, 'TUserState> = CharStream<'TUserState> -> Reply<'TResult>

The type of the parser functions supported throughout the FParsec library.

val preturn: 'a -> Parser<'a,'u>

The parser preturn x always succeeds with the result x (without changing the parser state).

preturn x is defined as fun stream -> Reply(x).

val pzero: Parser<'a,'u>

The parser pzero always fails with an empty error message list, i.e. an unspecified error.

pzero x is defined as fun stream -> Reply(Error, NoErrorMessage).

val (>>=): Parser<'a,'u> -> ('a -> Parser<'b,'u>) -> Parser<'b,'u>

The parser p >>= f first applies the parser p to the input, then applies the function f to the result returned by p and finally applies the parser returned by f to the input.

Note

Please see the user’s guide chapter Applying parsers in sequence for an in‐depth discussion of the behaviour of this and other sequencing combinators.

The >>= combinator is the conceptual foundation for all combinators that consecutively apply multiple parsers to the input. In order to precisely define its behaviour we give an equivalent definition:

let (>>=) (p: Parser<'a,'u>) (f: 'a -> Parser<'b,'u>) =
    fun stream ->
        let reply1 = p stream
        if reply1.Status = Ok then
            let p2 = f reply1.Result
            let stateTag = stream.StateTag
            let mutable reply2 = p2 stream
            if stateTag = stream.StateTag then
                reply2.Error <- mergeErrors reply1.Error reply2.Error
            reply2
        else
            Reply(reply1.Status, reply1.Error)
val (>>%): Parser<'a,'u> -> 'b -> Parser<'b,'u>

The parser p >>% x applies the parser p and returns the result x.

p >>% x is an optimized implementation of p >>= fun _ -> preturn x.

val (>>.): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'b,'u>

The parser p1 >>. p2 applies the parsers p1 and p2 in sequence and returns the result of p2.

p1 >>. p2 is an optimized implementation of p1 >>= fun _ -> p2.

val (.>>): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a,'u>

The parser p1 .>> p2 applies the parsers p1 and p2 in sequence and returns the result of p1.

p1 .>> p2 is an optimized implementation of p1 >>= fun x -> p2 >>% x.

val (.>>.): Parser<'a,'u> -> Parser<'b,'u> -> Parser<('a * 'b),'u>

The parser p1 .>>. p2 applies the parsers p1 and p2 in sequence and returns the results in a tuple.

p1 .>>. p2 is an optimized implementation of

p1 >>= fun a ->
p2 >>= fun b -> preturn (a, b)
val between: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'c,'u>

The parser between popen pclose p applies the parsers pOpen, p and pEnd in sequence. It returns the result of p.

between popen pclose p is an optimized implementation of popen >>. p .>> pclose.

val (|>>): Parser<'a,'u> -> ('a -> 'b) -> Parser<'b,'u>

The parser p |>> f applies the parser p and returns the result of the function application f x, where x is the result returned by p.

p |>> f is an optimized implementation of p >>= fun x -> preturn (f x).

val pipe2: Parser<'a,'u> -> Parser<'b,'u> -> ('a -> 'b -> 'c) -> Parser<'c,'u>

The parser pipe2 p1 p2 f applies the parsers p1 and p2 in sequence. It returns the result of the function application f a b, where a and b are the results returned by p1 and p2.

pipe2 p1 p2 f is an optimized implementation of

p1 >>= fun a ->
p2 >>= fun b -> preturn (f a b)
val pipe3: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u>
           -> ('a -> 'b -> 'c -> 'd) -> Parser<'d,'u>

The parser pipe3 p1 p2 p3 f applies the parsers p1, p2 and p3 in sequence. It returns the result of the function application f a b c, where a, b and c are the results returned by p1, p2 and p3.

pipe3 p1 p2 p3 f is an optimized implementation of

p1 >>= fun a ->
p2 >>= fun b ->
p3 >>= fun c -> preturn (f a b c)
val pipe4: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u>
           -> ('a -> 'b -> 'c -> 'd -> 'e) -> Parser<'e,'u>

The parser pipe4 p1 p2 p3 p4 f applies the parsers p1, p2, p3 and p4 in sequence. It returns the result of the function application f a b c d, where a, b, c and d are the results returned by p1, p2, p3 and p4.

pipe4 p1 p2 p3 p4 f is an optimized implementation of

p1 >>= fun a ->
p2 >>= fun b ->
p3 >>= fun c ->
p4 >>= fun d -> preturn (f a b c d)
val pipe5: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u>
           -> Parser<'e,'u>
           -> ('a -> 'b -> 'c -> 'd -> 'e -> 'f) -> Parser<'f,'u>

The parser pipe5 p1 p2 p3 p4 p5 f applies the parsers p1, p2, p3, p4 and p5 in sequence. It returns the result of the function application f a b c d e, where a, b, c, d and e are the results returned by p1, p2, p3, p4 and p5.

pipe5 p1 p2 p3 p4 p5 f is an optimized implementation of

p1 >>= fun a ->
p2 >>= fun b ->
p3 >>= fun c ->
p4 >>= fun d ->
p5 >>= fun e -> preturn (f a b c d e)
val (<|>): Parser<'a,'u> -> Parser<'a,'u> -> Parser<'a,'u>

The parser p1 <|> p2 first applies the parser p1. If p1 succeeds, the result of p1 is returned. If p1 fails with a non‐fatal error and without changing the parser state, the parser p2 is applied. Note: The stream position is part of the parser state, so if p1 fails after consuming input, p2 will not be applied.

The choice combinator is a generalization of <|> to more than two parsers.

Note

Please see the user’s guide chapter on Parsing alternatives for an in‐depth discussion of the behaviour of this combinator.

val choice: seq<Parser<'a,'u>> -> Parser<'a,'u>

The parser choice ps is an optimized implementation of p1 <|> p2 <|> ... <|> pn , where p1pn are the parsers in the sequence ps.

choice [p] is equivalent to p.
choice [] is equivalent to pzero.

val choiceL: seq<Parser<'a,'u>> -> string -> Parser<'a,'u>

The parser choiceL ps label is an optimized implementation of choice ps <?> label.

choiceL is slightly faster than choice, because it doesn’t have to aggregate error messages.

val (<|>%): Parser<'a,'u> -> 'a -> Parser<'a,'u>

The parser p <|>% x is an optimized implementation of p <|> preturn x.

val opt: Parser<'a,'u> -> Parser<'a option,'u>

The parser opt p parses an optional occurrence of p as an option value.

opt p is an optimized implementation of (p |>> Some) <|>% None.

val optional: Parser<'a,'u> -> Parser<unit,'u>

The parser optional p skips over an optional occurrence of p.

optional p is an optimized implementation of (p >>% ()) <|>% ().

val attempt: Parser<'a,'u> -> Parser<'a,'u>

The parser attempt p applies the parser p. If p fails after changing the parser state or with a fatal error, attempt p will backtrack to the original parser state and report a non‐fatal error.

val (>>=?): Parser<'a,'u> -> ('a -> Parser<'b,'u>) -> Parser<'b,'u>

The parser p >>=? f behaves like p >>= f, except that it will backtrack to the beginning if the parser returned by f fails with a non‐fatal error and without changing the parser state, even if p1 has changed the parser state.

val (>>?): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'b,'u>

The parser p1 >>? p2 behaves like p1 >>. p2, except that it will backtrack to the beginning if p2 fails with a non‐fatal error and without changing the parser state, even if p1 has changed the parser state.

val (.>>?): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a,'u>

The parser p1 .>>? p2 behaves like p1 .>> p2, except that it will backtrack to the beginning if p2 fails with a non‐fatal error and without changing the parser state, even if p1 has changed the parser state.

val (.>>.?): Parser<'a,'u> -> Parser<'b,'u> -> Parser<('a * 'b),'u>

The parser p1 .>>.? p2 behaves like p1 .>>. p2, except that it will backtrack to the beginning if p2 fails with a non‐fatal error and without changing the parser state, even if p1 has changed the parser state.

val notEmpty: Parser<'a,'u> -> Parser<'a,'u>

The parser notEmpty p behaves like p, except that it fails when p succeeds without consuming input or changing the parser state in any other way.

notEmpty is useful for forcing sequence parsers to consume input. For example, notEmpty (manySatisfy f) behaves like many1Satisfy f.

val followedBy: Parser<'a,'u> -> Parser<unit,'u>

The parser followedBy p succeeds if the parser p succeeds at the current position. Otherwise it fails with a non‐fatal error. This parser never changes the parser state.

If the parser followedBy p fails, it returns no descriptive error message. Hence it should only be used together with other parsers that take care of a potential error. Alternatively, followedByL p label can be used to ensure a more descriptive error message.

val followedByL: Parser<'a,'u> -> string -> Parser<unit,'u>

The parser followedByL p behaves like followedBy p, except that it returns an Expected label error message when the parser p fails.

val notFollowedBy: Parser<'a,'u> -> Parser<unit,'u>

The parser notFollowedBy p succeeds if the parser p fails to parse at the current position. Otherwise it fails with a non‐fatal error. This parser never changes the parser state.

If the parser notFollowedBy p fails, it returns no descriptive error message. Hence it should only be used together with other parsers that take care of a potential error. Alternatively, notFollowedByL p label can be used to ensure a more descriptive error message.

val notFollowedByL: Parser<'a,'u> -> string -> Parser<unit,'u>

The parser notFollowedByL p behaves like notFollowedBy p, except that it returns an Unexpected label error message when the parser p fails.

val lookAhead: Parser<'a,'u> -> Parser<'a,'u>

The parser lookAhead p parses p and restores the original parser state afterwards. If p fails after changing the parser state, the error messages are wrapped in a NestedError. If it succeeds, any error messages are discarded. Fatal errors are turned into normal errors.

val (<?>): Parser<'a,'u> -> string -> Parser<'a,'u>

The parser p <?> label applies the parser p. If p does not change the parser state (usually because p failed), the error messages are replaced with expected label.

Please also see the user’s guide chapter on customizing error messages.

val (<??>): Parser<'a,'u> -> string -> Parser<'a,'u>

The parser p <??> label behaves like p <?> label, except that when p fails after changing the parser state (for example, because p consumes input before it fails), a CompoundError message is generated with both the given string label and the error messages generated by p.

Please also see the user’s guide chapter on customizing error messages.

val fail: string -> Parser<'a,'u>

The parser fail msg always fails with a messageError msg. The string msg will be displayed together with other error messages generated for the same input position.

fail msg is equivalent to fun stream -> Reply(Error, messageError msg).

val failFatally: string -> Parser<'a,'u>

The parser failFatally msg always fails with a messageError msg. It returns with a FatalError, so that no error recovery is attempted (except via backtracking constructs).

failFatally msg is equivalent to fun stream -> Reply(FatalError, messageError msg).

val tuple2: Parser<'a,'u> -> Parser<'b,'u> -> Parser<('a * 'b),'u>

The parser tuple2 p1 p2 applies the parsers p1 and p2 in sequence and returns the results in a tuple.

tuple2 p1 p2 is defined as p1 .>>. p2 and is equivalent to pipe2 p1 p2 (fun a b -> (a, b)).

val tuple3: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u>
            -> Parser<('a * 'b * 'c),'u>

The parser tuple3 p1 p2 p3 applies the parsers p1, p2 and p3 in sequence and returns the results in a tuple.

tuple3 p1 p2 p3 is equivalent to pipe3 p1 p2 p3 (fun a b c -> (a, b, c)).

val tuple4: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u>
            -> Parser<('a * 'b * 'c * 'd),'u>

The parser tuple4 p1 p2 p3 p4 applies the parsers p1, p2, p3 and p4 in sequence and returns the results in a tuple.

tuple4 p1 p2 p3 p4 is equivalent to pipe4 p1 p2 p3 p4 (fun a b c d -> (a, b, c, d)).

val tuple5: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u>
            -> Parser<'e,'u> -> Parser<('a * 'b * 'c * 'd * 'e),'u>

The parser tuple5 p1 p2 p3 p4 p5 applies the parsers p1, p2, p3, p4 and p5 in sequence and returns the results in a tuple.

tuple5 p1 p2 p3 p4 p5 is equivalent to pipe5 p1 p2 p3 p4 p5 (fun a b c d e -> (a, b, c, d, e)).

val parray: int -> Parser<'a,'u> -> Parser<'a[],'u>

The parser parray n p parses n occurrences of p and returns the results in an array.

For example, parray 3 p is equivalent to pipe3 p p p (fun a b c -> [|a;b;c|]).

val skipArray: int -> Parser<'a,'u> -> Parser<unit,'u>

The parser skipArray n p is an optimized implementation of parray n p |>> ignore.

val many: Parser<'a,'u> -> Parser<'a list,'u>

The parser many p repeatedly applies the parser p until p fails. It returns a list of the results returned by p. At the end of the sequence p must fail without changing the parser state and without signalling a FatalError, otherwise many p will fail with the error reported by p.

many p tries to guard against an infinite loop by raising an exception if p succeeds without changing the parser state.

Ignoring efficiency issues, stack space usage and the infinite recursion case, many could be defined as follows:

let rec many p =
    (p >>= fun hd ->
               many p
               |>> fun tl -> hd::tl)
    <|>% []
val many1: Parser<'a,'u> -> Parser<'a list,'u>

The parser many1 p behaves like many p, except that it requires p to succeed at least one time.

many1 p is equivalent to pipe2 p (many p) (fun hd tl -> hd::tl).

val skipMany: Parser<'a,'u> -> Parser<unit,'u>

The parser skipMany p is an optimized implementation of many p |>> ignore.

val skipMany1: Parser<'a,'u> -> Parser<unit,'u>

The parser skipMany1 p is an optimized implementation of many1 p |>> ignore.

val sepBy: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u>

The parser sepBy p sep parses zero or more occurrences of p separated by sep (in EBNF: (p (sep p)*)?). It returns a list of the results returned by p.

sepBy p sep is almost equivalent to pipe2 p (many (sep >>. p)) (fun hd tl -> hd::tl) <|>% [], except with regard to a case rarely encountered in practice: If sep succeeds without changing the parser state and p then fails without changing the state, then sepBy p sep fails too, while the parser given by the almost equivalent definition would succeed.

val sepBy1: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u>

The parser sepBy1 p sep parses one or more occurrences of p separated by sep (in EBNF: p (sep p)*).

The parser sepBy1 p behaves like sepBy p, except that it requires p to succeed at least one time. Hence, if sepBy1 succeeds, the returned list always contains at least one value.

val skipSepBy: Parser<'a,'u> -> Parser<'b,'u> -> Parser<unit,'u>

The parser skipSepBy p sep is an optimized implementation of sepBy p sep |>> ignore.

val skipSepBy1: Parser<'a,'u> -> Parser<'b,'u> -> Parser<unit,'u>

The parser skipSepBy1 p sep is an optimized implementation of sepBy1 p sep |>> ignore.

val sepEndBy: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u>

The parser sepEndBy p sep parses zero or more occurrences of p separated and optionally ended by sep (in EBNF: (p (sep p)* sep?)?). It returns a list of the results returned by p.

sepEndBy p sep tries to guard against an infinite loop by raising an exception if p and sep succeed without changing the parser state.

Ignoring efficiency issues, stack space usage and the infinite recursion case, sepEndBy could be defined as follows:

let rec sepEndBy p sep =
    (p >>= fun hd ->
               sep >>. sepEndBy p sep <|>% []
               |>> fun tl -> hd::tl)
    <|>% []
val sepEndBy1: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u>

The parser sepEndBy1 p sep parses one or more occurrences of p separated and optionally ended by sep (in EBNF: p (sep p)* sep?). It returns a list of the results returned by p.

The parser sepEndBy1 p behaves like sepEndBy p, except that it requires p to succeed at least one time. Hence, if sepEndBy1 succeeds, the returned list always contains at least one value.

val skipSepEndBy: Parser<'a,'u> -> Parser<'b,'u> -> Parser<unit,'u>

The parser skipSepEndBy p sep is an optimized implementation of sepEndBy p sep |>> ignore.

val skipSepEndBy1: Parser<'a,'u> -> Parser<'b,'u> -> Parser<unit,'u>

The parser skipSepEndBy1 p sep is an optimized implementation of sepEndBy1 p sep |>> ignore.

val manyTill: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u>

The parser manyTill p endp repeatedly applies the parser p for as long as endp fails (without changing the parser state). It returns a list of the results returned by p.

manyTill p endp is an optimized variant of many (notFollowedBy endp >>. p) .>> endp that doesn’t have to apply endp twice at the end of the sequence and that fails with the error reported by endp if endp fails after changing the parser state.

val many1Till: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u>

The parser many1Till p endp behaves like manyTill p endp, except that it requires p to succeed at least one time.

many1Till p endp is an optimized implementation of pipe2 p (manyTill p endp) (fun hd tl -> hd::tl).

val skipManyTill: Parser<'a,'u> -> Parser<'b,'u> -> Parser<unit,'u>

The parser skipManyTill p endp is an optimized implementation of manyTill p endp |>> ignore.

val skipMany1Till: Parser<'a,'u> -> Parser<'b,'u> -> Parser<unit,'u>

The parser skipMany1Till p endp is an optimized implementation of many1Till p endp |>> ignore.

[<CompilationRepresentationFlags.Static>]
type Inline =

Inline is a static class that contains the following inline helper methods for defining optimized sequence parsers:

static member inline Many: stateFromFirstElement: ('T -> 'State)
                         * foldState: ('State -> 'T -> 'State)
                         * resultFromState: ('State -> 'Result)
                         * elementParser: Parser<'T,'U>
                         * ?firstElementParser: Parser<'T,'U>
                         * ?resultForEmptySequence: (unit -> 'Result)
                        -> Parser<'Result,'U>

Inline.Many is an inline helper method for defining optimized sequence parsers.

Inline.Many(stateFromFirstElement, foldState, resultFromState, elementParser) expands to an optimized implementation of

many1 elementParser // requires at least 1 element
|>> function hd::tl ->
    resultFromState (List.fold foldState (stateFromFirstElement hd) tl)
Note

The 'State argument to the foldState function is completely independent of FParsec’s usual parser state. The term “accumulator” would be a more accurate name for the argument, but that is just too unwieldy to use in the method signature.

If you pass a value for the optional argument resultForEmptySequence, the parser expands to an optimized implementation of

many elementParser // accepts empty sequence
|>> function
    | [] -> resultForEmptySequence()
    | hd::tl -> resultFromState (List.fold foldState (stateFromFirstElement hd) tl)

If you pass a value for the optional argument firstElementParser, the first element of the sequence will be parsed with firstElementParser instead of elementParser.

The following example shows how you can use Inline.Many to define an optimized parser that behaves like many1 p |>> List.reduce f but avoids the temporary allocation of a list:

let many1Reduce p f =
    Inline.Many(elementParser = p,
                stateFromFirstElement = (fun x0 -> x0),
                foldState = (fun acc y -> f acc y),
                resultFromState = (fun acc -> acc))

A simple test run:

> run (many1Reduce (pint32 .>> spaces) (+)) "1 2 3";;
val it : ParserResult<int32,unit> = Success: 6

The following example shows how you can use Inline.Many to create an optimized sequence parser that returns an array instead of a list:

let manyA2 p1 p =
    Inline.Many(firstElementParser = p1,
                elementParser = p,
                stateFromFirstElement = (fun x0 ->
                                             let ra = ResizeArray<_>()
                                             ra.Add(x0)
                                             ra),
                foldState = (fun ra x -> ra.Add(x); ra),
                resultFromState = (fun ra -> ra.ToArray()),
                resultForEmptySequence = (fun () -> [||]))

let manyA p = manyA2 p p

Two simple test runs:

> run (manyA (pint32 .>> spaces)) "";;
val it : ParserResult<int32 [],unit> = Success: [||]
> run (manyA (pint32 .>> spaces)) "1 2 3";;
val it : ParserResult<int32 [],unit> = Success: [|1; 2; 3|]
static member inline SepBy: stateFromFirstElement: ('T -> 'State)
                          * foldState: ('State -> 'Separator -> 'T -> 'State)
                          * resultFromState: ('State -> 'Result)
                          * elementParser: Parser<'T,'U>
                          * separatorParser: Parser<'Separator,'U>
                          * ?firstElementParser: Parser<'T,'U>
                          * ?resultForEmptySequence: (unit -> 'Result)
                          * ?separatorMayEndSequence: bool
                         -> Parser<'Result,'U>

Inline.SepBy is an inline helper method for defining optimized sequence parsers. By default, parsers defined with Inline.SepBy parse sequences of the form (in EBNF): element (separator element)*

Inline.SepBy(stateFromFirstElement, foldState, resultFromState, elementParser, separatorParser) expands to an optimized implementation of

pipe2 elementParser (many (separatorParser .>>. elementParser))
      (fun elem0 sepsAndElems ->
          sepsAndElems
          |> List.fold (fun acc (sep, e) -> foldState acc sep e)
                       (stateFromFirstElement elem0)
          |> resultFromState)

For most practical purposes the behaviour of the expanded Inline.SepBy parser and the above definition based on many can be considered equivalent, but there is a fringe case where the behaviour differs: If separatorParser succeeds without changing the parser state and elementParser then fails without changing the parser state, then the Inline.SepBy parser fails too, while the parser given by the definition based on many would succeed.

Note

The 'State argument to the foldState function is completely independent of FParsec’s usual parser state. The term “accumulator” would be a more accurate name for the argument, but that is just too unwieldy to use in the method signature.

If you pass true as the value for the optional argument separatorMayEndSequence, a separator may also end the sequence, i.e. the parser will accept sequences of the following form (in EBNF):

element (separator element)* separator?

Note that foldState is not called with the value of an ending separator.

If you pass a value for the optional argument resultForEmptySequence, the parser returned by Inline.SepBy will call resultForEmptySequence to create the parser result when it encounters an empty sequence. If you don’t pass a resultForEmptySequence function, the parser will fail for an empty sequence.

If you pass a value for the optional argument firstElementParser, the first element of a sequence will be parsed with firstElementParser instead of elementParser.

The following example shows how you can use Inline.SepBy to define an optimized parser that behaves like sepBy1 p sep |>> List.reduce f but avoids the temporary allocation of a list:

let sepBy1Reduce p sep f =
    Inline.SepBy(elementParser = p,
                 separatorParser = sep,
                 stateFromFirstElement = (fun x0 -> x0),
                 foldState = (fun acc _ y -> f acc y),
                 resultFromState = (fun acc -> acc))

A simple test run:

> run (sepBy1Reduce pint32 (pstring "," >>. spaces) (+)) "1, 2, 3";;
val it : ParserResult<int32,unit> = Success: 6

The following example shows how one could define CharParsers.stringsSepBy using Inline.SepBy:

let stringsSepBy p sep =
    Inline.SepBy(elementParser = p,
                 separatorParser = sep,
                 stateFromFirstElement =
                     (fun str -> let sb = System.Text.StringBuilder()
                                 sb.Append(str : string)),
                                 // sb.Append returns sb
                 foldState =
                     (fun sb sep str -> sb.Append(sep : string)
                                          .Append(str : string)),
                 resultFromState = (fun sb -> sb.ToString()))

let testParser : Parser<string,unit> =
    stringsSepBy (manySatisfy isLetter) (pstring @"\\" >>% @"\")
> run testParser "";;
val it : ParserResult<string,unit> = Success: ""
> run testParser @"abc\\def\\\\";;
val it : ParserResult<string,unit> = Success: "abc\def\\"
static member inline ManyTill: stateFromFirstElement: ('T -> 'State)
                             * foldState: ('State -> 'T -> 'State)
                             * resultFromStateAndEnd: ('State -> 'E -> 'Result)
                             * elementParser: Parser<'T,'U>
                             * endParser: Parser<'E,'U>
                             * ?firstElementParser: Parser<'T,'U>
                             * ?resultForEmptySequence: ('E -> 'Result)
                            -> Parser<'Result,'U>

Inline.ManyTill is an inline helper method for defining optimized sequence parsers.

Inline.ManyTill(stateFromFirstElement, foldState, resultFromState, elementParser, endParser) expands to an optimized implementation of

many1Till elementParser endParser // requires at least 1 element
|>> function hd::tl ->
    resultFromState (List.fold foldState (stateFromFirstElement hd) tl)
Note

The 'State argument to the foldState function is completely independent of FParsec’s usual parser state. The term “accumulator” would be a more accurate name for the argument, but that is just too unwieldy to use in the method signature.

If you pass a value for the optional argument resultForEmptySequence, the parser expands to an optimized implementation of

manyTill elementParser endParser // accepts empty sequence
|>> function
    | [] -> resultForEmptySequence()
    | hd::tl -> resultFromState (List.fold foldState (stateFromFirstElement hd) tl)

If you pass a value for the optional argument firstElementParser, the first element of the sequence will be parsed with firstElementParser instead of elementParser.

The following example shows how one could define CharParsers.manyCharsTill2 using Inline.ManyTill:

let myManyCharsTillApply2 cp1 cp endp f =
    Inline.ManyTill(firstElementParser = cp1,
                    elementParser = cp,
                    endParser = endp,
                    stateFromFirstElement =
                        (fun c -> let sb = System.Text.StringBuilder()
                                  sb.Append(c : char)),
                                  // sb.Append returns sb
                    foldState = (fun sb c -> sb.Append(c : char)),
                    resultFromStateAndEnd = (fun sb e -> f (sb.ToString()) e),
                    resultForEmptySequence = (fun e -> f "" e))

let myManyCharsTillApply cp endp f = myManyCharsTillApply2 cp cp endp f

let myRestOfLine : Parser<string,unit> =
    myManyCharsTillApply anyChar ((newline >>% "\\n") <|> (eof >>% ""))
                         (fun str nl -> str + nl)
> run myRestOfLine "";;
val it : ParserResult<string,unit> = Success: ""
> run myRestOfLine "abc\r\ndef";;
val it : ParserResult<string,unit> = Success: "abc\n"
val chainl1: Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u> -> Parser<'a,'u>

The parser chainl1 p op parses one or more occurrences of p separated by op (in EBNF: p (op p)*). It returns the value obtained by left associative application of all functions returned by op to the results returned by p, i.e. f_n (...(f_2 (f_1 x_1 x_2) x_3) ...) x_n+1, where f_1 to f_n are the functions returned by theparser op and x_1 to x_n+1 are the values returned by p. If only a single occurance of p and no occurance of op is parsed, the result of p is returned directly.

The chainl1 implementation uses constant stack space.

val chainl: Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u> -> 'a -> Parser<'a,'u>

The parser chainl p op defVal is equivalent to chainl1 p op <|>% defVal.

val chainr1: Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u> -> Parser<'a,'u>

The parser chainr1 p op parses one or more occurrences of p separated by op (in EBNF: p (op p)*). It returns the value obtained by right associative application of all functions returned by op to the results returned by p, i.e. f1 x_1 (f_2 x_2 (... (f_n x_n x_n+1) ...)), where f_1 to f_n are the functions returned by the parser op and x_1 to x_n+1 are the values returned by p. If only a single occurance of p and no occurance of op is parsed, the result of p is returned directly.

The chainr1 implementation uses constant stack space.

val chainr: Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u> -> 'a -> Parser<'a,'u>

The parser chainr p op defVal is equivalent to chainr1 p op <|>% defVal.

type ParserCombinator =

This class is defined as

[<Sealed>]
type ParserCombinator() =
  member t.Delay(f)   = fun state -> (f ()) state
  member t.Return(x)  = preturn x
  member t.Bind(p, f) = p >>= f
  member t.Zero()     = pzero
  member t.ReturnFrom(p) = p
  member t.TryWith(p, cf) = fun state -> try p state
                                         with e -> (cf e) state
  member t.TryFinally(p, ff) = fun state -> try p state
                                            finally ff ()

Instances of this class can be used to build parsers using F#’s computation expression syntax. The default instance for this purpose is parse.

Please see the user’s guide chapter “Where is the monad?” for an introduction to the parse {...} syntax.

Some constructs supported by parse and their translations are

let! pat = expr in pexpr   ==>   expr >>= (fun pat -> pexpr)

let pat = expr in pexpr    ==>   let pat = expr in pexpr

do! expr in pexpr          ==>   expr >>= (fun  () -> pexpr)

do expr in pexpr           ==>   expr; pexpr

if expr then pexpr1        ==>   if expr then pexpr1
else pexpr2                      else pexpr2

if expr then pexpr         ==>   if expr then pexpr1 else pzero

return exp                 ==>   preturn rexpr

return! expr               ==>   expr

where expr is any F# expression and pexpr is an expression of type Parser<_,_>. You need to use the !‐constructs whenever you have a right hand side expression that evaluates to a parser.

val parse: ParserCombinator

A builder object of type ParserCombinator for building parsers using F#’s computation expression syntax.

val createParserForwardedToRef: unit -> Parser<'a,'u> * Parser<'a,'u> ref

let p, pRef = createParserForwardedToRef() creates a parser p that forwards all calls to the parser in the reference cell pRef. Initially, pRef holds a reference to a dummy parser that raises an exception on any invocation.

The JSON parser example in the tutorial shows how you can use createParserForwardedToRef to define a parser for a recursive grammar.

================================================ FILE: Doc/html/reference/reply.html ================================================ FParsec.Reply

6.6 FParsec.Reply

Represents the return value of a Parser function.

6.6.1 Interface

// FParsecCS.dll

namespace FParsec

type ReplyStatus = Ok         =  1
                 | Error      =  0
                 | FatalError = -1


type Reply<'TResult> = struct
  new: 'TResult -> Reply<'TResult>
  new: ReplyStatus * ErrorMessageList -> Reply<'TResult>
  new: ReplyStatus * 'TResult * ErrorMessageList -> Reply<'TResult>

  val mutable Status: ReplyStatus
  /// If Status <> Ok then the Result value is undefined and may be null.
  val mutable Result: 'TResult
  val mutable Error: ErrorMessageList

  override Equals: obj -> bool
  override GetHashCode: unit -> int
  interface System.IEquatable<Reply<'TResult>>
end

6.6.2 Remarks

The Reply type is the return type of Parser functions. Similar to a tuple, a Reply value can be viewed as a simple aggregate of its three fields Status, Result and Error.

The value of the Status field indicates whether the parser returning the reply succeeded (ReplyStatus.Ok) or failed (ReplyStatus.Error or ReplyStatus.FatalError). If the value of the Status field is Ok, the Result field contains a parser result value; otherwise, its value is undefined.

The Equals override ignores the Result value when it compares two Reply values with Status <> Ok.

Note

The Reply fields are mutable because that allows us to implement library primitives with more compact code, for which the .NET JIT produces faster machine code.

Of course, if you object to mutable structs on religious grounds or if you’re not familiar with the somewhat subtle behaviour of mutable structs in certain sitations, you can always treat the Reply type as if it was immutable.

6.6.3 Members

new: 'TResult -> Reply<'TResult>

Constructs a Reply instance with the Status field set to Ok, the Result field set to the argument value and the Error field set to null.

new: ReplyStatus * ErrorMessageList -> Reply<'TResult>

Constructs a Reply instance with the Status and Error fields set to the respective argument values and the Result field initialized to Unchecked.defaultof<'TResult>.

This constructor is usually used for constructing an error reply, like in Reply(Error, expected "something").

new: ReplyStatus * 'TResult * ErrorMessageList -> Reply<'TResult>

Constructs a Reply instance with the Status, Result and Error fields set to the respective argument values.

val mutable Status: ReplyStatus

The Status field contains a ReplyStatus enum value indicating whether a parser succeeded (Ok) or failed (Error or FatalError). By returning a FatalError instead of an Error a parser can signal that no error recovery should be tried (except through backtracking mechanisms).

val mutable Result: 'TResult

If the value of the Status field is Ok, the Result field contains a parser result value; otherwise, its value is undefined and may be equal to Unchecked.defaultof<'TResult>. (The result value in a Reply returned by an unsuccessful parser is generally an implementation detail of the parser that you should not depend on.)

val mutable Error: ErrorMessageList

The Error field holds a list of error messages in the form of an ErrorMessageList value. An empty ErrorMessageList is represented as a null value.

The error messages returned by a parser in a Reply value implicitly refer to the state of the CharStream as it is when the parser returns. Since the ErrorMessage values stored in the ErrorMessageList do not themselves contain an error position, they can only be interpreted together with the position of the CharStream as it is when the parser returns.

================================================ FILE: Doc/html/reference/staticmapping.html ================================================ FParsec.StaticMapping

6.5 FParsec.StaticMapping

This module defines functions for creating optimized mapping functions between keys and values.

This module is not available in the Low‐Trust version of FParsec.

6.5.1 Interface

// FParsec.dll

namespace FParsec

type Range = struct
  new: min: int * max: int -> Range

  val Min: int
  val Max: int
end

module StaticMapping =

  val createStaticCharIndicatorFunction:
          invert: bool -> charsInSet: seq<char>   -> (char -> bool)

  val createStaticCharRangeIndicatorFunction:
          invert: bool -> rangesInSet: seq<Range> -> (char -> bool)

  val createStaticIntIndicatorFunction:
          invert: bool -> valuesInSet: seq<int>   -> (int -> bool)

  val createStaticIntRangeIndicatorFunction:
          invert: bool -> rangesInSet: seq<Range> -> (int -> bool)


  val createStaticIntMapping:
          defaultValue: 'T -> keyValues: seq<int*'T>   -> (int -> 'T)

  val createStaticIntRangeMapping:
          defaultValue: 'T -> keyValues: seq<Range*'T> -> (int -> 'T)


  val createStaticStringMapping:
          defaultValue: 'T -> keyValues: seq<string*'T> -> (string -> 'T)

6.5.2 Remarks

The functions in the StaticMapping module use runtime code generation via System.Reflection.Emit to create optimized mapping functions between keys and values.

Note

Runtime code generation is relatively expensive, so the functions in this module should only be used for optimizing static mappings that are potentially called a (very) large number of times.

Note

The code generated for the optimized mapping functions will occupy memory until the associated AppDomain is unloaded.

The performance of the generated functions depends a lot on the individual key‐value mapping and the application‐specific call pattern. Ignoring the overhead of the function call, the generated mapping functions should generally be as fast as an equivalent statically compiled switch‐statement in C# or F#. In certain cases they will even be faster.

The code size of the generated functions increases about linearly with the number of key ranges (i.e. continuous sequences of keys with the same value). Hence, you should only use the StaticMapping module for small mappings. If you try to turn arbitrarily large key‐value mappings into static mapping functions, you’ll likely hit upon certain implementation limitations (of this module’s code, of Reflection.Emit or of the CLR’s JIT).

If the conditional compilation symbol DEBUG_STATIC_MAPPING is defined when compiling FParsec, the generated mapping functions will compute each result with two different methods and check the results against each other. Of course, this means that they will take more than twice the time than without the DEBUG_STATIC_MAPPING symbol and will also consume more memory. In Debug builds of FParsec DEBUG_STATIC_MAPPING is switched on by default, since the StaticMapping module is still relatively new.

Note

Measuring and comparing the performance of the generated mapping functions only makes sense in Release builds.

6.5.3 Members

type Range

Represents an immutable range between the integer values Min and Max (inclusive).

type Range = struct
  new: min: int * max: int -> Range

  val Min: int
  val Max: int
end

The Min value must not be larger than the Max value. In a Debug build this condition is checked by an assert‐check in the Range constructor.

val createStaticCharIndicatorFunction:
        invert: bool -> charsInSet: seq<char> -> (char -> bool)

Creates an optimized indicator function for the chars specified by the charsInSet sequence.

If invert is false (true), the returned indicator function will return true (false) if and only if it is called with a char contained in charsInSet.

charsInSet may contain duplicate char values.

Internally, this function collects continuous ranges of chars into Range values and then uses the same compilation strategy as createStaticCharRangeIndicatorFunction.

Please also see the remarks at the beginning of this section.

val createStaticCharRangeIndicatorFunction:
        invert: bool -> rangesInSet: seq<Range> -> (char -> bool)

Creates an optimized indicator function for the chars in the ranges specified by the rangesInSet sequence.

If invert is false (true), the returned indicator function will return true (false) if and only if it is called with a char contained in at least one of the ranges of rangesInSet.

rangesInSet may contain overlapping or duplicate ranges. However, the ranges must not contain values less than 0 or greater than 0xffff (the minimum and maximum UTF‐16 char values), otherwise an ArgumentException is thrown.

Please also see the remarks at the beginning of this section.

val createStaticIntIndicatorFunction:
        invert: bool -> valuesInSet: seq<int> -> (int -> bool)

Creates an optimized indicator function for the integers specified by the valuesInSet sequence.

If invert is false (true), the returned indicator function will return true (false) if and only if it is called with an integer contained in valuesInSet.

valuesInSet may contain duplicate integer values.

Internally, this function collects continues ranges of integer into Range values and then uses the same compilation strategy as createStaticIntRangeIndicatorFunction.

Please also see the remarks at the beginning of this section.

val createStaticIntRangeIndicatorFunction:
        invert: bool -> rangesInSet: seq<Range> -> (int -> bool)

Creates an optimized indicator function for the integers in the ranges specified by the rangesInSet sequence.

If invert is false (true), the returned indicator function will return true (false) if and only if it is called with an int contained in at least one of the ranges of rangesInSet.

rangesInSet may contain overlapping or duplicate ranges.

Please also see the remarks at the beginning of this section.

val createStaticIntMapping:
        defaultValue: 'T -> keyValues: seq<int*'T> -> (int -> 'T)

Creates an optimized mapping function that maps integer keys to values.

The keyValues sequence specifies the key‐value pairs for the mapping. All keys not specified in keyValues are mapped to defaultValue.

This function throws an ArgumentException if keyValues contains a duplicate key.

Internally, this function collects continues ranges of integer keys with equal values[1] into Range values and then uses the same compilation strategy as createStaticIntRangeMapping.

Please also see the remarks at the beginning of this section.

val createStaticIntRangeMapping:
        defaultValue: 'T -> keyValues: seq<Range*'T> -> (int -> 'T)

Creates an optimized mapping function that maps integer key ranges to values.

The keyValues sequence specifies the range‐value pairs for the mapping. All keys not contained in one of the ranges in keyValues are mapped to defaultValue.

This function throws an ArgumentException if keyValues contains an overlapping or duplicate key range.

Please also see the remarks at the beginning of this section.

val createStaticStringMapping:
        defaultValue: 'T -> keyValues: seq<string*'T> -> (string -> 'T)

Creates an optimized mapping function that maps string keys to values.

The keyValues sequence specifies the key‐value pairs for the mapping. All keys not specified in keyValues are mapped to defaultValue. A null key is not supported.

createStaticStringMapping throws an ArgumentException if keyValues contains a duplicate key or a null key. If the generated mapping function is called with a null string, it throws a NullReferenceException.

Note

The compilation strategy employed by createStaticStringMapping does not handle all mappings equally well. It is optimized for mapping a relatively small set of string symbols to constants. If you want to use createStaticStringMapping to optimize a frequently used mapping in your program, you should test how well createStaticStringMapping handles your situation (in a Release build!) and see whether the performance is worth the compilation costs and the additional code dependency.

Please also see the remarks at the beginning of this section.

Footnotes:
[1] In the case of a reference type the values are only compared for reference‐equality. In the case of a value type the values are only compared if the type implements System.IEquality<_> or is an int enum type.
================================================ FILE: Doc/html/reference/text.html ================================================ FParsec.Text

6.12 FParsec.Text

6.12.1 Interface

// FParsecCS.dll

namespace FParsec

type Text =
    static member CountTextElements: string -> int
    static member FoldCase: char -> char
    static member FoldCase: string -> string
    static member IsWhitespace: char -> bool
    static member NormalizeNewlines: string -> string

6.12.2 Members

static member CountTextElements: string -> int

FParsec.Text.CountTextElements(str) is equivalent to System.Globalization.StringInfo(str).LengthInTextElements.

static member FoldCase: char -> char

FParsec.Text.FoldCase(chr) is an optimized implementation of FParsec.Text.FoldCase(string chr).

static member FoldCase: string -> string

Returns a case‐folded copy of the string argument. All chars are mapped using the (non‐Turkic) 1‐to‐1 case folding mappings (v. 8.0.0) for Unicode code points in the Basic Multilingual Plane, i.e. code points below 0x10000. If the case‐folded string equals the argument string, the original argument is returned (to preserve its reference identity). If the argument is null, null is returned.

static member IsWhitespace: char -> bool

FParsec.Text.IsWhitespace(chr) is a faster implementation of System.Char.IsWhiteSpace(chr).

In the LOW_TRUST‐version of FParsec this method simply forwards all calls to System.Char.IsWhiteSpace(chr).

static member NormalizeNewlines: string -> string

Returns the passed string with all occurrences of "\r\n" and "\r" replaced by "\n". If the normalized string equals the argument string, the original argument is returned (to preserve its reference identity). If the argument is null, null is returned.

================================================ FILE: Doc/html/tutorial.html ================================================ Tutorial

4 Tutorial

This tutorial introduces you to the basic concepts of FParsec. Our goal is to give you an intuition for how you can build parser applications using the FParsec library. We will only cover the basic ideas and only cursorily explore FParsec’s API, but hopefully we will cover enough ground to enable you to further explore FParsec with the help of the user’s guide, the API reference and the sample parsers in the Samples folder.

A Japanese translation of this tutorial by Gab_km is available here.
A Russian translation of this tutorial by Dmitry Vlasov is available here.

4.1 Preliminaries

FParsec is built as two DLLs: FParsec.dll and FParsecCS.dll. To use FParsec in your project, you can either let NuGet install one of the NuGet packages, or you can build the two FParsec DLLs from source. The easiest way to build FParsec from source is using the Visual Studio solution files in the Build/VS11 folder of the source code package. Any project that uses FParsec has to reference both DLLs. See Download and Installation for more details.

All FParsec types and modules are declared in the FParsec namespace. This namespace contains some basic classes (such as CharStream and Reply) and four F# modules, namely

  • Primitives, containing basic type definitions and parser combinators,
  • CharParsers, containing parsers for chars, strings and numbers, and functions for applying parsers to input streams,
  • Error, containing types and helper functions for creating, processing and formatting parser error messages,
  • StaticMapping, containing functions for compiling static key‐value mappings into optimized functions.

All code snippets in this tutorial assume that you’ve opened the FParsec namespace:

open FParsec

Opening the FParsec namespace also automatically opens the Primitives, CharParsers and Error modules.

Note

All code snippets in this tutorial are contained in the Samples/Tutorial project. Having this project open while reading the tutorial can be quite helpful. For example, you can hover the mouse over an identifier to get an Intellisense popup with the inferred type. And if you’re curious how a library function is implemented, you can click the Go to definition context menu option to view its source code.

4.2 Parsing a single float

Parsing input with FParsec involves two steps:

  1. building a parser and
  2. applying the parser to the input.

Let’s start with a simple example: parsing a single floating‐point number in a string.

In this case the first step, building the parser, is trivial, because the CharParsers module already comes with a built‐in float parser:

val pfloat: Parser<float,'u>

The generic type Parser<'Result,'UserState> is the type of all parsers in FParsec. If you follow the hyperlink into the reference, you’ll see that Parser is a type abbreviation for a function type. However, at this point we don’t need to go into the details of the Parser type. It’s enough to note that the first type argument represents the type of the parser result. Thus, in the case of pfloat the type tells us that if the parser succeeds it returns a floating‐point number of type float. We won’t use a “user state” in this tutorial, so you can just ignore the second type argument for the time being.

To apply the pfloat parser to a string, we can use the run function from the CharParsers module:

val run: Parser<'Result, unit> -> string -> ParserResult<'Result,unit>

run is the simplest function out of several provided by the CharParsers module for running parsers on input. Other functions allow you, for example, to run parsers directly on the contents of a file or a System.IO.Stream.

run applies the parser passed as the first argument to the string passed as the second argument and returns the return value of the parser in form of a ParserResult value. The ParserResult type is a discriminated union type with the two cases: Success and Failure. In case the parser succeeds, the ParserResult value contains the result value, otherwise it contains an error message.

To simplify testing we write a little helper function that prints the result value or error message:

let test p str =
    match run p str with
    | Success(result, _, _)   -> printfn "Success: %A" result
    | Failure(errorMsg, _, _) -> printfn "Failure: %s" errorMsg

With this helper function in place, we can test pfloat by executing

test pfloat "1.25"

which produces the output

Success: 1.25

Testing pfloat with a number literal that has an invalid exponent

test pfloat "1.25E 3"

yields the error message

Failure: Error in Ln: 1 Col: 6
1.25E 3
     ^
Expecting: decimal digit

4.3 Parsing a float between brackets

Implementing parsers with FParsec typically means combining higher‐level parsers from lower‐level ones. You start with the parser primitives provided by the library and then successively combine these into higher‐level parsers until you finally have a single parser for the complete input.

In the following sections we will illustrate this approach by discussing various sample parsers that build on each other. In this section we will begin with a very simple parser for a floating‐point number between brackets:

let str s = pstring s
let floatBetweenBrackets = str "[" >>. pfloat .>> str "]"
Note

If you’re trying to compile this or another code snippet and you get a compiler error mentioning F#’s “value restriction”, please see section 4.10.

The definition of str and floatBetweenBrackets involves three library functions that we haven’t yet introduced: pstring, >>. and .>>.

The function

val pstring: string -> Parser<string,'u>

takes a string as the argument and returns a parser for that string. When this parser is applied to an input stream it checks whether the following chars in the input stream match the given string. If the chars match the complete string, the parser consumes them, i.e. skips over them. Otherwise it fails without consuming any input. When the parser succeeds, it also returns the given string as the parser result, but since the string is a constant, you’ll rarely make use of the result.

The pstring function isn’t named string because otherwise it would hide the built‐in F# function string. In general, parser names in FParsec that would otherwise conflict with built‐in F# function names are prefixed by a single p char. pfloat is another example for this naming convention.

To save a few keystrokes we abbreviate pstring as str. So, for instance, str "[" is a parser that skips over the char '['.

The binary operators >>. and .>> have the following types:

val (>>.): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'b,'u>
val (.>>): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a,'u>

As you can see from these signatures, both operators are parser combinators that construct a new parser from the two argument parsers. The parser p1 >>. p2 parses p1 and p2 in sequence and returns the result of p2. The parser p1 .>> p2 also parses p1 and p2 in sequence, but it returns the result of p1 instead of p2. In each case the point points to the side of the parser whose result is returned. By combining both operators in p1 >>. p2 .>> p3 we obtain a parser that parses p1, p2 and p3 in sequence and returns the result from p2.

Note

With the somewhat imprecise wording “parses p1 and p2 in sequence” we actually mean: The parser p1 is applied to the input and if p1 succeeds then p2 is applied to the remaining input; in case any of the two element parsers fails, the aggregate parser immediately propagates the error message.

In the documentation for FParsec we often use expressions such as “parses p” or “parses an occurrence of p” instead of the technically more accurate “applies the parser p to the remaining input and if p succeeds …”, hoping that the exact meaning is obvious from the context.

The following tests show that floatBetweenBrackets parses valid input as expected and produces informative error messages when it encounters invalid input:

> test floatBetweenBrackets "[1.0]";;
Success: 1.0

> test floatBetweenBrackets "[]";;
Failure: Error in Ln: 1 Col: 2
[]
 ^
Expecting: floating-point number

> test floatBetweenBrackets "[1.0";;
Failure: Error in Ln: 1 Col: 5
[1.0
    ^
Note: The error occurred at the end of the input stream.
Expecting: ']'

4.4 Abstracting parsers

One of FParsec’s greatest strengths is the ease with which you can define your own parser abstractions.

Take for instance the floatBetweenBrackets from the previous section. If you intend to also parse other elements between strings, you could define your own specialized combinator for this purpose:

let betweenStrings s1 s2 p = str s1 >>. p .>> str s2

You could then define floatInBrackets and other parsers with the help of this combinator:

let floatBetweenBrackets = pfloat |> betweenStrings "[" "]"
let floatBetweenDoubleBrackets = pfloat |> betweenStrings "[[" "]]"
Note

In case you’re new to F#:
pfloat |> betweenStrings "[" "]" is just another way to write betweenStrings "[" "]" pfloat.

Once you notice that you frequently need to apply a parser between two others, you could go a step further and factor betweenStrings as follows:

let between pBegin pEnd p  = pBegin >>. p .>> pEnd
let betweenStrings s1 s2 p = p |> between (str s1) (str s2)

Actually, you don’t need to define between, because this is already a built‐in FParsec combinator.

These are all trivial examples, of course. But since FParsec is merely an F# library and not some external parser generator tool, there are no limits to the abstractions you can define. You can write functions that take whatever input you need, do some arbitrarily complex computations on the input and then return a special purpose parser or parser combinator.

For example, you could write a function that takes a regular‐expression pattern as the input and returns a Parser for parsing input conforming to that pattern. This function could use another parser to parse the pattern into an AST and then compile this AST into a special‐purpose parser function. Alternatively, it could construct a .NET regular expression from the pattern and then return a parser function that uses FParsec’s CharStream API to directly apply the regex to the input stream (which is what the built‐in regex parser actually does).

Another example are extensible parser applications. By storing parser functions in dictionaries or other data structures and defining an appropriate extension protocol, you could allow plugins to dynamically register new parsers or modify existing ones.

The possibilities are really endless. But before you can fully exploit these possibilities, you first need to be familiar with the fundamentals of FParsec.

4.5 Parsing a list of floats

We’ve already spent three sections on discussing how to parse a single floating‐point number, so it’s about time we try something more ambitious: parsing a list of floating‐point numbers.

Let us first assume that we need to parse a sequence of floating‐point numbers in brackets, i.e. text in the following EBNF format: ("[" float "]")*. Valid input strings in this format are for example: "", "[1.0]", "[2][3][4]".

Since we already have a parser for a float between brackets, we only need a way to repeatedly apply this parser to parse a sequence. This is what the many combinator is for:

val many: Parser<'a,'u> -> Parser<'a list,'u>

The parser many p repeatedly applies the parser p until p fails, i.e. it “greedily” parses as many occurrences of p as possible. The results of p are returned as a list in the order of occurrence.

Some simple tests show that many floatInBrackets works as expected:

> test (many floatBetweenBrackets) "";;
Success: []
> test (many floatBetweenBrackets) "[1.0]";;
Success: [1.0]
> test (many floatBetweenBrackets) "[2][3][4]";;
Success: [2.0; 3.0; 4.0]

If floatBetweenBrackets fails after consuming input, then the combined parser fails too:

> test (many floatBetweenBrackets) "[1][2.0E]";;
Failure: Error in Ln: 1 Col: 9
[1][2.0E]
        ^
Expecting: decimal digit

Note that many also succeeds for an empty sequence. If you want to require at least one element, you can use many1 instead:

> test (many1 floatBetweenBrackets) "(1)";;
Failure: Error in Ln: 1 Col: 1
(1)
^
Expecting: '['
Tip

If you’d prefer the last error message to be worded in terms of the higher level floatBetweenBrackets parser instead of the lower level str "[" parser, you could use the <?> operator as in the following example:

> test (many1 (floatBetweenBrackets <?> "float between brackets")) "(1)";;
Failure: Error in Ln: 1 Col: 1
(1)
^
Expecting: float between brackets

Please see section 5.8 of the user’s guide to learn more about customizing error messages.

If you just want to skip over a sequence and don’t need the list of parser results, you could use the optimized combinators skipMany or skipMany1 instead of many and many1.

Another frequently used combinator for parsing sequences is sepBy:

val sepBy: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list, 'u>

sepBy takes an “element” parser and a “separator” parser as the arguments and returns a parser for a list of elements separated by separators. In EBNF notation sepBy p pSep could be written as (p (pSep p)*)?. Similar to many, there are several variants of sepBy.

With the help of sepBy we can parse a more readable list format, where floating‐point numbers are separated by a comma:

floatList: "[" (float ("," float)*)? "]"

Valid input strings in this format are for example: "[]", "[1.0]", "[2,3,4]".

The straightforward implementation of this format is

let floatList = str "[" >>. sepBy pfloat (str ",") .>> str "]"

Testing floatList with valid test strings yields the expected result:

> test floatList "[]";;
Success: []
> test floatList "[1.0]";;
Success: [1.0]
> test floatList "[4,5,6]";;
Success: [4.0; 5.0; 6.0]

Testing with invalid input shows that floatList produces helpful error messages:

> test floatList "[1.0,]";;
Failure: Error in Ln: 1 Col: 6
[1.0,]
     ^
Expecting: floating-point number

> test floatList "[1.0,2.0";;
Failure: Error in Ln: 1 Col: 9
[1.0,2.0
        ^
Note: The error occurred at the end of the input stream.
Expecting: ',' or ']'

4.6 Handling whitespace

FParsec treats whitespace (spaces, tabs, newlines, etc) just as any other input, so our floatList parser can’t yet deal with whitespace:

> test floatBetweenBrackets "[1.0, 2.0]";;
Failure: Error in Ln: 1 Col: 5
[1.0, 2.0]
    ^
Expecting: ']'

If we want the parser to ignore whitespace, we need to make this explicit in the parser definition.

First, we need to define what we want to accept as whitespace. For simplicity we will just use the built‐in spaces parser, which skips over any (possibly empty) sequence of ' ', '\t', '\r' or '\n' chars.

let ws = spaces

Next, we need to insert the ws parser at every point where we want to ignore whitespace. In general it’s best to skip whitespace after one parses elements, i.e. skip trailing instead of leading whitespace, because that reduces the need for backtracking (which will be explained below). Hence, we insert ws at two places to skip over any whitespace after brackets or numbers:

let str_ws s = pstring s .>> ws
let float_ws = pfloat .>> ws
let numberList = str_ws "[" >>. sepBy float_ws (str_ws ",") .>> str_ws "]"

A simple test shows that numberList ignores whitespace:

> test numberList @"[ 1 ,
                          2 ] ";;
Success: [1.0; 2.0]

If we introduce an error on the second line, we see that FParsec automatically keeps track of the line count:

> test numberList @"[ 1,
                         2; 3]";;

Failure: Error in Ln: 2 Col: 27
                         2; 3]
                          ^
Expecting: ',' or ']'

Our numberList parser still doesn’t skip leading whitespace, because that’s not necessary when we put it together with other parsers that skip all trailing whitespace. If we wanted to parse a whole input stream with only a list of floating‐point numbers, we could use the following parser:

let numberListFile = ws >>. numberList .>> eof

The end‐of‐file parser eof will generate an error if the end of the stream hasn’t been reached. This is useful for making sure that the complete input gets consumed. Without the eof parser the following test wouldn’t produce an error:

> test numberListFile " [1, 2, 3] [4]";;
Failure: Error in Ln: 1 Col: 12
 [1, 2, 3] [4]
           ^
Expecting: end of input

4.7 Parsing string data

FParsec contains various built‐in parsers for chars, strings, numbers and whitespace. In this section we will introduce a few of the char and string parsers. For an overview of all available parsers please refer to the parser overview in the reference.

You’ve already seen several applications of the pstring parser (abbreviated as str), which simply skips over a constant string in the input. When the pstring parser succeeds, it also returns the skipped string as the parser result. The following example demonstrates this:

> test (many (str "a" <|> str "b")) "abba";;
Success: ["a"; "b"; "b"; "a"]

In this example we also used the <|> combinator to combine two alternative parsers. We’ll discuss this combinator in more detail below.

Note

We refer to both pstring and pstring "a" as “parsers”. Strictly speaking, pstring is function taking a string argument and returning a Parser, but it’s more convenient to just refer to it as a (parametric) parser.

When you don’t need the result of the pstring parser, you can alternatively use the skipString parser, which returns the unit value () instead of the argument string. In this case it doesn’t make any difference to performance whether you use pstring or skipString, since the returned string is a constant. However, for most other built‐in parsers and combinators you should prefer the variants with the “skip” name prefix when you don’t need the parser result values, because these will generally be faster. If you look at the parser overview, you’ll see “skip” variants for many of the built‐in parsers and combinators.

If you want to parse a case insensitive string constant you can use pstringCI and skipStringCI. For example:

> test (skipStringCI "<float>" >>. pfloat) "<FLOAT>1.0";;
Success: 1.0

Frequently one needs to parse string variables whose chars have to satisfy certain criteria. For instance, identifiers in programming languages often need to start with a letter or underscore and then need to continue with letters, digits or underscores. To parse such an identifier you could use the following parser:

let identifier =
    let isIdentifierFirstChar c = isLetter c || c = '_'
    let isIdentifierChar c = isLetter c || isDigit c || c = '_'

    many1Satisfy2L isIdentifierFirstChar isIdentifierChar "identifier"
    .>> ws // skips trailing whitespace

Here we have used the many1Satisfy2L string parser, which is one of several primitives for parsing strings based on char predicates (i.e. functions that take a char as input and return a boolean value). It parses any sequence of one or more chars (hence the “many1” in the name) whose first char satisfies the first predicate function and whose remaining chars satisfy the second predicate (hence the “Satisfy2”). The string label given as the third argument (hence the “L”) is used in error message to describe the expected input.

The following tests show how this parser works:

> test identifier "_";;
Success: "_"
> test identifier "_test1=";;
Success: "_test1"
> test identifier "1";;
Failure: Error in Ln: 1 Col: 1
1
^
Expecting: identifier
Tip

If you want to parse identifiers based on the Unicode XID syntax, consider using the built‐in identifier parser.

Many string formats are complicated enough that you need to combine several char and string parser primitives. For example, consider the following string literal format:

  stringLiteral: '"' (normalChar|escapedChar)* '"'
  normalChar:    any char except '\' and '"'
  escapedChar:   '\\' ('\\'|'"'|'n'|'r'|'t')

A straightforward translation of this grammar to FParsec looks like:

let stringLiteral =
    let normalChar = satisfy (fun c -> c <> '\\' && c <> '"')
    let unescape c = match c with
                     | 'n' -> '\n'
                     | 'r' -> '\r'
                     | 't' -> '\t'
                     | c   -> c
    let escapedChar = pstring "\\" >>. (anyOf "\\nrt\"" |>> unescape)
    between (pstring "\"") (pstring "\"")
            (manyChars (normalChar <|> escapedChar))

In this example we use several library functions that we haven’t yet introduced:

  • satisfy parses any char that satisfies the given predicate function.
  • anyOf parses any char contained in the argument string.
  • The pipeline combinator |>> applies the function on the right side (unescape) to the result of the parser on the left side (anyOf "\\nrt\"").
  • The choice combinator <|> applies the parser on the right side if the parser on the left side fails, so that normalChar <|> escapedChar can parse both normal and escaped chars. (We will discuss this operator in more detail two sections below.)
  • manyChars parses a sequence of chars with the given char parser and returns it as a string.

Let’s test the stringLiteral parser with a few test inputs:

> test stringLiteral "\"abc\"";;
Success: "abc"
> test stringLiteral "\"abc\\\"def\\\\ghi\"";;
Success: "abc"def\ghi"
> test stringLiteral "\"abc\\def\"";;
Failure: Error in Ln: 1 Col: 6
"abc\def"
     ^
Expecting: any char in ‘\nrt"’

Instead of parsing the string literal char‐by‐char we could also parse it “snippet‐by‐snippet”:

let stringLiteral2 =
    let normalCharSnippet = many1Satisfy (fun c -> c <> '\\' && c <> '"')
    let escapedChar = pstring "\\" >>. (anyOf "\\nrt\"" |>> function
                                                            | 'n' -> "\n"
                                                            | 'r' -> "\r"
                                                            | 't' -> "\t"
                                                            | c   -> string c)
    between (pstring "\"") (pstring "\"")
            (manyStrings (normalCharSnippet <|> escapedChar))

Here we have used the manyStrings combinator, which parses a sequence of strings with the given string parser and returns the strings in concatenated form.

Note

We have to require normalCharSnippet to consume at least one char, i.e. use many1Satisfy instead of manySatisfy. Otherwise normalCharSnippet would succeed even if doesn’t consume input, escapedChar would never be called and manyStrings would eventually throw an exception to prevent an infinite loop.

Parsing a string chunk‐wise using an optimized parser like many1Satisfy is usually a bit faster than parsing it char‐wise using manyChars and satisfy. In this case we can optimize our parser even a bit further – once we realize that two normal char snippets must be separated by at least one escaped char:

let stringLiteral3 =
    let normalCharSnippet = manySatisfy (fun c -> c <> '\\' && c <> '"')
    let escapedChar = (* like in stringLiteral2 *)
    between (pstring "\"") (pstring "\"")
            (stringsSepBy normalCharSnippet escapedChar)

The stringsSepBy combinator parses a sequence of strings (with the first argument parser) separated by other strings (parsed with the second argument parser). It returns all parsed strings, including the separator strings, as a single, concatenated string.

Note that stringLiteral3 uses manySatisfy instead of many1Satisfy in its normalCharSnippet definition, so that it can parse escaped chars that are not separated by normal chars. This can’t lead to an infinite loop because escapedChar can’t succeed without consuming input.

4.8 Sequentially applying parsers

Whenever you need to apply multiple parsers in sequence and only need the result of one of them, a suitable combination of >>. and .>> operators will do the job. However, these combinators won’t suffice if you need the result of more than one of the involved parsers. In that case you can use the pipe2, …, pipe5 combinators, which apply multiple parsers in sequence and pass all the individual results to a function that computes the aggregate result.

For instance, with the pipe2 combinator

val pipe2: Parser<'a,'u> -> Parser<'b,'u> -> ('a -> b -> 'c) -> Parser<'c,'u>

you can construct a parser pipe2 p1 p2 f that sequentially applies the two parsers p1 and p2 and then returns the result of the function application f x1 x2, where x1 and x2 are the results returned by p1 and p2.

In the following example we use pipe2 to parse a product of two numbers:

let product = pipe2 float_ws (str_ws "*" >>. float_ws)
                    (fun x y -> x * y)
> test product "3 * 5";;
Success: 15.0

The pipe2-5 combinators are particularly useful for constructing AST objects. In the following example we use pipe3 to parse a string constant definition into a StringConstant object:

type StringConstant = StringConstant of string * string

let stringConstant = pipe3 identifier (str_ws "=") stringLiteral
                           (fun id _ str -> StringConstant(id, str))
> test stringConstant "myString = \"stringValue\"";;
Success: StringConstant ("myString","stringValue")

If you just want to return the parsed values as a tuple, you can use the predefined tuple2-5 parsers. For instance, tuple2 p1 p2 is equivalent to pipe2 p1 p2 (fun x1 x2 -> (x1, x2)).

The tuple2 parser is also available under the operator name .>>., so that you can write p1 .>>. p2 instead of tuple2 p1 p2. In the following example we parse a pair of comma separated numbers with this operator:

> test (float_ws .>>. (str_ws "," >>. float_ws)) "123, 456";;
Success: (123.0, 456.0)

Hopefully you find the >>‐with‐1‐or‐2‐dots‐notation intuitive by now.

If you need a pipe or tuple parser with more than 5 arguments, you can easily construct one using the existing ones. For example, do you have an idea how to define a pipe7 parser? This footnote gives a possible solution: [1]

4.9 Parsing alternatives

In the section on Parsing string data we already shortly introduced the choice combinator <|>:

val (<|>): Parser<'a,'u> -> Parser<'a,'u> -> Parser<'a,u>

This combinator allows you to support multiple alternative input formats at a given input position. For example, in the above section we used <|> to combine a parser for unescaped chars and a parser for escaped chars into a parser that supports both: normalChar <|> escapedChar.

Another example that shows how <|> works is the following parser for boolean variables:

let boolean =     (stringReturn "true"  true)
              <|> (stringReturn "false" false)

Here we have also used the stringReturn parser, which skips the string constant given as the first argument and, if successful, returns the value given as the second argument.

Testing the boolean parser with some inputs yields:

> test boolean "false";;
Success: false
> test boolean "true";;
Success: true
> test boolean "tru";;
Failure: Error in Ln: 1 Col: 1
tru
^
Expecting: 'false' or 'true'

The behaviour of the <|> combinator has two important characteristics:

  • <|> only tries the parser on the right side if the parser on the left side fails. It does not implement a longest match rule.
  • However, it only tries the right parser if the left parser fails without consuming input.

A consequence of the second point is that the following test fails because the parser on the left side of <|> consumes whitespace before it fails:

> test ((ws >>. str "a") <|> (ws >>. str "b")) " b";;
Failure: Error in Ln: 1 Col: 2
 b
 ^
Expecting: 'a'

Fortunately, we can easily fix this parser by factoring out ws:

> test (ws >>. (str "a" <|> str "b")) " b";;
Success: "b"

If you’re curious why <|> behaves this way and how you can handle situations where you need <|> to try the alternative parser even if the first parser fails after consuming input, please see section 5.6 and section 5.7 in the user’s guide.

If you want to try more than two alternative parsers, you can chain multiple <|> operators, like in p1 <|> p2 <|> p3 <|> ..., or you can use the choice combinator, which accepts a sequence of parsers as the argument, like in choice [p1; p2; p3; ...].

4.10 F#’s value restriction

When you start writing your own parsers with FParsec or try to compile some individual code snippets from above, you’ll come across a compiler issue that often causes some head‐scratching among new users of F# and FParsec: the value restriction. In this section we’ll explain the value restriction and how you can handle it in your FParsec programs.

Note

If you find the discussion in this section too technical for the moment, just skip to the next section and come back later when you actually see a compiler message mentioning “value restriction” for the first time.

F#’s value restriction is the reason that the following code snippet does not compile

open FParsec
let p = pstring "test"

even though the following snippet compiles without a problem[2]:

open FParsec
let p = pstring "test"
run p "input"

The compiler error generated for the first sample is the following:

error FS0030: Value restriction.
The value 'p' has been inferred to have generic type
    val p : Parser<string,'_a>
Either make the arguments to 'p' explicit or,
if you do not intend for it to be generic, add a type annotation.

When you work with FParsec you’ll sooner or later see this or similar error messages, in particular if you work with the interactive console prompt. Fortunately, this kind of error is usually easy to workaround.

The problem with the first snippet above is that the F# compiler infers the p value to have an unresolved generic type, although F# doesn’t permit a generic value in this situation. The return type of the pstring function is Parser<string,'u>, where the type parameter 'u represents the type of the CharStream user state. Since there is nothing in the first snippet that constrains this type parameter, the compiler infers the type Parser<string,'_a> for the parser value p, with '_a representing an unresolved type parameter.

In the second snippet this problem doesn’t occur, because the use of p as the first argument to the run function constrains the user state type. Since run only accepts parsers of type Parser<'t,unit>, the compiler infers the non‐generic type Parser<string,unit> for p.

This example suggests two ways to handle the value restriction in FParsec programs:

  • Either make sure that the type of a parser value is constrained to a non‐generic type by subsequent uses of this parser value in the same compilation unit,
  • or provide an explicit type annotation to manually constrain the type of the parser value (usually, a few type annotations in key locations are enough for a whole parser module).

Often it is convenient to define some type abbreviations like the following

type UserState = unit // doesn't have to be unit, of course
type Parser<'t> = Parser<'t, UserState>

With such an abbreviation in place, type annotations become as simple as

let p : Parser<_> = pstring "test"

Of course, constraining the type of a parser value to a non‐generic type is only a solution if you don’t actually need a generic type. If you do need a generic value, you’ll have to apply other techniques, as they are for example explained in the F# reference or in a blog entry by Dmitry Lomov. However, FParsec Parser values (not parametric parser functions) are usually only used in the context of a specific parser application with a fixed user state type. In that situation constraining the type is indeed the appropriate measure to avoid a value restriction error.

4.11 Parsing JSON

Now that we have discussed the basics of FParsec we are well prepared to work through a real world parser example: a JSON parser.

JSON (JavaScript Object Notation) is a text‐based data interchange format with a simple and lightweight syntax. You can find descriptions of the syntax on json.org and in RFC 4626.

In many applications one only has to deal with JSON files describing one particular kind of object. In such a context it sometimes can be appropriate to write a specialized parser just for that specific kind of JSON file. In this tutorial, however, we will follow a more general approach. We will implement a parser that can parse any general JSON file into an AST, i.e. an intermediate data structure describing the contents of the file. Applications can then conveniently query this data structure and extract the information they need. This is an approach comparable to that of XML parsers which build a data structure describing the document tree of an XML document. The great advantage of this approach is that the JSON parser itself becomes reusable and the document specific parsing logic can be expressed in the form of simple functions processing the AST of the JSON document.

The natural way to implement an AST in F# is with the help of a discriminated union type. If you look at the JSON specification, you can see that a JSON value can be a string, a number, a boolean, null, a comma‐separated list of values in square brackets, or an object with a sequence of key‐value pairs in curly brackets.

In our parser we will use the following union type to represent JSON values:

type Json = JString of string
          | JNumber of float
          | JBool   of bool
          | JNull
          | JList   of Json list
          | JObject of Map<string, Json>

Here we’ve chosen the F# list type to represent a sequence of values and the Map type to represent a sequence of key‐value pairs, because these types are particularly convenient to process in F#.[3] Note that the Json type is recursive, since both JList and JObject values can themselves contain Json values. Our parser will have to reflect this recursive structure.

Tip

If you’re new to FParsec and have a little time, it would be a good exercise to try to implement the JSON parser on your own (with the help of the reference documentation). This tutorial already covered almost everything you need and the JSON grammar is simple enough that this shouldn’t take too much time. Of course, you can always peek at the implementation below if you get stuck.

We start the actual parser implementation by covering the simple null and boolean cases:

let jnull  = stringReturn "null" JNull
let jtrue  = stringReturn "true"  (JBool true)
let jfalse = stringReturn "false" (JBool false)

Handling the number case is just as simple, because the JSON number format is based on the typical floating‐point number format used in many programming languages and hence can be parsed with FParsec’s built‐in pfloat parser:

let jnumber = pfloat |>> JNumber

(Note that F# allows us to pass the object constructor JNumber as a function argument.)

If you compare the precise number format supported by pfloat with that in the JSON spec, you’ll see that pfloat supports a superset of the JSON format. In contrast to the JSON format the pfloat parser also recognizes NaN and Infinity values, accepts a leading plus sign, accepts leading zeros and even supports the hexadecimal float format of Java and C99. Depending on the context this behaviour can be considered a feature or a limitation of the parser. For most applications it probably doesn’t matter, and the JSON RFC clearly states that a JSON parser may support a superset of the JSON syntax. However, if you’d rather only support the exact JSON number format, you can implement such a float parser rather easily based on the configurable numberLiteral parser (just have a look at how this is currently done in the pfloat source).

The JSON string format takes a little more effort to implement, but we’ve already parsed a similar format with the stringLiteral parsers in section 4.7, so we can just adapt one of those parsers for our purpose:

let str s = pstring s

let stringLiteral =
    let escape =  anyOf "\"\\/bfnrt"
                  |>> function
                      | 'b' -> "\b"
                      | 'f' -> "\u000C"
                      | 'n' -> "\n"
                      | 'r' -> "\r"
                      | 't' -> "\t"
                      | c   -> string c // every other char is mapped to itself

    let unicodeEscape =
    	/// converts a hex char ([0-9a-fA-F]) to its integer number (0-15)
        let hex2int c = (int c &&& 15) + (int c >>> 6)*9

        str "u" >>. pipe4 hex hex hex hex (fun h3 h2 h1 h0 ->
            (hex2int h3)*4096 + (hex2int h2)*256 + (hex2int h1)*16 + hex2int h0
            |> char |> string
        )

    let escapedCharSnippet = str "\\" >>. (escape <|> unicodeEscape)
    let normalCharSnippet  = manySatisfy (fun c -> c <> '"' && c <> '\\')

    between (str "\"") (str "\"")
            (stringsSepBy normalCharSnippet escapedCharSnippet)

let jstring = stringLiteral |>> JString

stringLiteral parses string literals as a sequence of normal char snippets separated by escaped char snippets. A normal char snippet is any sequence of chars that does not contain the chars '"' and '\\'. An escaped char snippet consists of a backslash followed by any of the chars '\\', '\"', '/', 'b', 'f', 'n', 'r', 't', or an Unicode escape. An Unicode escape consists of an 'u' followed by four hex chars representing an UTF‐16 code point.

The grammar rules for JSON lists and objects are recursive, because any list or object can contain itself any kind of JSON value. Hence, in order to write parsers for the list and object grammar rules, we need a way to refer to the parser for any kind of JSON value, even though we haven’t yet constructed this parser. Like it is so often in computing, we can solve this problem by introducing an extra indirection:

let jvalue, jvalueRef = createParserForwardedToRef<Json, unit>()

As you might have guessed from the name, createParserForwardedToRef creates a parser (jvalue) that forwards all invocations to the parser in a reference cell (jvalueRef). Initially, the reference cell holds a dummy parser, but since the reference cell is mutable, we can later replace the dummy parser with the actual value parser, once we have finished constructing it.

The JSON RFC sensibly only permits spaces, (horizontal) tabs, line feeds and carriage returns as whitespace characters, which allows us to use the built‐in spaces parser for parsing whitespace:

let ws = spaces

Both JSON lists and objects are syntactically represented as a comma‐separated lists of “elements” between brackets, where whitespace is allowed before and after any bracket, comma and list element. We can conveniently parse such lists with the following helper function:

let listBetweenStrings sOpen sClose pElement f =
    between (str sOpen) (str sClose)
            (ws >>. sepBy (pElement .>> ws) (str "," >>. ws) |>> f)

This function takes four arguments: an opening string, a closing string, an element parser and a function that is applied to the parsed list of elements.

With the help of this function we can define the parser for a JSON list as follows:

let jlist   = listBetweenStrings "[" "]" jvalue JList

JSON objects are lists of key‐value pairs, so we need a parser for a key‐value pair:

let keyValue = stringLiteral .>>. (ws >>. str ":" >>. ws >>. jvalue)

(Remember, the points on both sides of .>>. indicate that the results of the two parsers on both sides are returned as a tuple.)

By passing the keyValue parser to listBetweenStrings we obtain a parser for JSON objects:

let jobject = listBetweenStrings "{" "}" keyValue (Map.ofList >> JObject)

Having defined parsers for all the possible kind of JSON values, we can combine the different cases with a choice parser to obtain the finished parser for JSON values:

do jvalueRef := choice [jobject
                        jlist
                        jstring
                        jnumber
                        jtrue
                        jfalse
                        jnull]

The jvalue parser doesn’t accept leading or trailing whitespace, so we need to define our parser for complete JSON documents as follows:

let json = ws >>. jvalue .>> ws .>> eof

This parser will try to consume a complete JSON input stream and, if successful, will return a Json AST of the input as the parser result

And that’s it, we’re finished with our JSON parser. If you want to try this parser out on some sample input, please take a look at the JSON project in the Samples folder.

4.12 What now?

If this tutorial has whet your appetite for a more in‐depth introduction to FParsec, just head over to the user’s guide. If you can’t wait to write your own parser, then bookmark the parser overview page, maybe take a short look at the example parsers in the Samples folder and just start hacking. You can always consult the user’s guide at a later point should you get stuck somewhere.

Footnotes:
[1]
let pipe7 p1 p2 p3 p4 p5 p6 p7 f =
    pipe4 p1 p2 p3 (tuple4 p4 p5 p6 p7)
          (fun x1 x2 x3 (x4, x5, x6, x7) -> f x1 x2 x3 x4 x5 x6 x7)
[2] Assuming you referenced the two FParsec DLLs.
[3] If you need to parse huge sequences and objects, it might be more appropriate to use an array and dictionary for JList and JObject respectively.
================================================ FILE: Doc/html/users-guide/applying-parsers-in-sequence.html ================================================ Applying parsers in sequence

5.4 Applying parsers in sequence

Now that we have discussed how Parser functions work, we can start explaining how FParsec’s parser combinators work.

In this chapter we will discuss combinators that allow you to apply multiple parsers in sequence, i.e. parse the beginning of the input with the first parser, then parse the following input with the second parser, and so on.

5.4.1 The definition of >>.

The simplest combinators for sequentially applying two parsers are

val (>>.):   Parser<'a,'u> -> Parser<'b,'u> -> Parser<'b,'u>
val (.>>):   Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a,'u>

Both operators take two parsers as arguments and return a combined parser that applies the two parsers in sequence. As you can infer from the type signatures, p1 >>. p2 returns the result of p2 and p1 .>> p2 the result of p1. In each case the point points to the parser whose result is returned.

In order to explain exactly what it means to apply two parser in sequence, we give a full definition of the >>. operator:

let (>>.) (p1: Parser<'a,'u>) (p2: Parser<'b,'u>) =
    fun stream ->
        let reply1 = p1 stream
        if reply1.Status = Ok then
            let stateTag = stream.StateTag
            let mutable reply2 = p2 stream
            if stateTag = stream.StateTag then                        // (1)
                reply2.Error <- mergeErrors reply1.Error reply2.Error // (2)
            reply2
        else // reconstruct error reply with new result type
            Reply(reply1.Status, reply1.Error)

The implementation of p1 >>. p2 should be quite self‐explanatory: First p1 is applied to the input stream. If p1 succeeds, i.e. if the status of reply1 is Ok, p2 is applied to stream and the reply of p2 then becomes the reply of the combined parser. However, if p1 fails, its reply is immediately propagated as the reply of the combined parser. Since reply1 has type Reply<'a,'u> but p1 >>. p2 needs to return a Reply<'b,'u>, the error reply needs to be reconstructed with a new result type before it can be returned.

5.4.2 Merging error messages

We mentioned earlier that the error messages returned in the Reply.Error field implicitly refer to the state of the CharStream at the time the parser returns. In particular the error messages refer to the then current stream position. Since the messages do not contain themselves a separate record of the error position they can only be interpreted together with the CharStream state.

When p2 does not change the parser state, the error messages from both replies refer to the state of the CharStream as it is when p1 >>. p2 returns. Thus, the combinator needs to merge the (immutable) ErrorMessageLists from both replies, so that the returned list contains all the relevant error messages (see the line marked with (2)).

In order to check whether the CharStream state has changed, the combinator does not compare the full states from before and after p2 is invoked. Instead it only compares the StateTag values (see line (1)). This improves performance and — for most practical purpose — is almost equivalent to comparing the full state, as we will discuss below.

Note

The way (>>.) handles errors and merges error messages is a template for all combinators in FParsec that perform multiple sequential parser invocations.

You may wonder why the error messages get merged even though p1 succeeded. The somewhat counterintuitive reason is that parsers can return nonempty error message lists even when they don’t fail. For example, a parser that skips over the optional string "str" will return Reply(Ok, (), expectedString "str") if it doesn’t find the string in the input. In this case the error message describes what further input the parser could have parsed at the current stream position. If subsequently a parser fails at the same position, all error messages for the same position can be aggregated to give the user as much information as possible about what went wrong and what alternative inputs could have been parsed at the given position.

The following sample demonstrates the helpful effect of this error handling behaviour:

let str s = pstring s

let oneOrTwoInts =
    str "(" >>. tuple2 pint32 (opt (str "," >>. spaces >>. pint32)) .>> str ")"
> run oneOrTwoInts "(1 2)";;
val it : ParserResult<(int32 * int32 option),unit> = Failure:
Error in Ln: 1 Col: 3
(1 2)
  ^
Expecting: ')' or ','

This error message wouldn’t mention the possibility of a missing comma if the .>> combinator did not merge error messages for the same position when the left‐hand side parser succeeds.

5.4.3 The StateTag

Parser combinators often need to check whether a parser has changed the CharStream state. In a typical FParsec application these checks are performed so frequently that an efficient implementation is important for the overall parser performance. Since a straightforward comparison of the complete CharStream states can be quite expensive, the CharStream class provides a shortcut for this purpose: the StateTag.

The StateTag is a simple integer counter that is incremented every time a CharStream method changes the state. Thus, if the StateTag hasn’t changed, you can safely infer that the state hasn’t changed either.[1] Except for some special cases, the opposite is also true: if the StateTag has changed, the state has changed too.

In the following special cases checking whether the StateTag has changed is not equivalent to checking whether the CharStream state has changed, because the tag may change even though the state doesn’t:

  • A parser calls the basic Skip or Read methods with a 0 offset or an empty argument string.
  • A parser seeks the CharStream to the current position or replaces the user state with the current value.
  • A parser makes several calls to CharStream methods and in later calls undoes the changes it made in earlier calls.

The first and second cases only have practical relevance for generic or parameterized parsers and can be simply avoided by checking the arguments before calling the respective CharStream methods. The third case only arises in the context of backtracking and it too can be easily avoided, either by using the BacktrackTo method for backtracking or by manually restoring the StateTag after the backtracking.

In practice these special cases are extremely rare, usually without consequences for the parser behaviour and always easily avoidable. Hence, FParsec combinators make free use of the StateTag to check whether a parser has changed the CharStream state.

5.4.4 Generalizing >>.

The parsers p1 .>> p2 and p1 >>. p2 only return the results of p1 and p2 respectively. If you want to combine the results from both p1 and p2, you could use the pipe2 combinator instead:

val pipe2: Parser<'a,'u> -> Parser<'b,'u> -> ('a -> 'b -> 'c) -> Parser<'c,'u>

The parser pipe2 p1 p2 f will apply p1 and p2 in sequence, exactly like >>., but instead of returning one of the result values of p1 and p2 it will return the result of the function application f x1 x2, where x1 and x2 are the results returned by p1 and p2.

There are also pipe3, pipe4 and pipe5 combinators, in case you need more than two arguments. Often these combinators are used to pass arguments to object constructors, like in the following example of a parser for a comma‐separated list of XYZ coordinates:

type Data = Point of float*float*float

let ws = spaces
let str s = pstring s .>> ws
let number = pfloat .>> ws
let point = pipe3 number (str "," >>. number) (str "," >>. number)
                  (fun x y z -> Point(x, y, z))
> run point "1, 2, 3";;
val it : ParserResult<Data,unit> = Success: Point (1.0,2.0,3.0)

If you just want to return the parsed values as a tuple, you can use the predefined tuple2-5 parsers. For example, tuple2 p1 p2 is equivalent to pipe2 p1 p2 (fun x1 x2 -> (x1, x2).

tuple2 is also available under the operator name .>>., so that you can write p1 .>>. p2 instead of tuple2 p1 p2.

There is no pipe1 combinator, but there is an operator for the same purpose:

val (|>>): Parser<'a,'u> -> ('a -> 'b) -> Parser<'b, 'u>

This operator is used similarly to the F#’s ubiquitous pipeline operator |>:

type Expression = Number of int
                | Identifier of string

let number = pint32 |>> Number
> run number "123";;
val it : ParserResult<Expression,unit> = Success: Number 123

5.4.5 The >>= combinator

All the sequencing and piping combinators we have discussed so far could be implemented with the help of the “bind” combinator:

val (>>=): Parser<'a,'u> -> ('a -> Parser<'b,'u>) -> Parser<'b,'u>

Instead of two parsers this combinator takes a parser and a function producing a parser as arguments. The combined parser p >>= f first applies the parser p to the input, then it applies the function f to the result returned by p and finally it applies the parser returned by f to the input. If we knew in advance that p returns x then p >>= f would be equivalent to p >>. (f x).

The >>= combinator is quite versatile. For example, the following code implements five of the previously discussed combinators in terms of >>= and the trivial preturn primitive:

let preturn x = fun stream -> Reply(x)

let (|>>) p  f    = p  >>= fun x -> preturn (f x)
let (.>>) p1 p2   = p1 >>= fun x -> p2 >>= fun _ -> preturn x
let (>>.) p1 p2   = p1 >>= fun _ -> p2 >>= fun y -> preturn y
let (.>>.) p1 p2  = p1 >>= fun x -> p2 >>= fun y -> preturn (x, y)
let pipe2 p1 p2 f = p1 >>= fun x -> p2 >>= fun y -> preturn (f x y)

In typical FParsec code >>= is only seldomly used, because in many situations where >>= could in principle be used one of the other specialized operators is more convenient to use and faster. However, on a conceptual level this combinator is important, because its generality allows us to define and test many combinators through their equivalence with a parser defined in terms of >>=. This combinator is also significant for the role it plays in the monadic parser construction syntax, see section 5.10.

Footnotes:
[1] Of course, this doesn’t apply if you manually set back the StateTag to the old value. There is also the purely theoretical possibility that the StateTag has overflown and was incremented exactly 264 times (or 232 if you define the SMALL_STATETAG conditional compiler symbol).
================================================ FILE: Doc/html/users-guide/customizing-error-messages.html ================================================ Customizing error messages

5.8 Customizing error messages

Generating relevant and informative parser error messages is one of FParsec’s greatest strengths. The top‐down approach of recursive‐descent parsing guarantees that there is always enough context to describe the exact cause of a parser error and how it could be avoided. FParsec exploits this context to automatically generate descriptive error messages whenever possible. This chapter explains how you can ensure with minimal efforts that your parser always produces understandable error messages.

As we already described in detail in section 5.4.2, error reporting in FParsec is based on the following two principles:

  • Parsers that fail or could have consumed more input return as part of their Reply an ErrorMessageList describing the input they expected or the reason they failed.
  • Parser combinators aggregate all error messages that apply to the same input position and then propagate these error messages as appropriate.

The various error messages in the previous chapters demonstrate that the built‐in error reporting usually works quite well even without any intervention by the parser author. However, sometimes FParsec lacks the information necessary to produce an informative error message by itself.

Consider for example the many1Satisfy f parser, which parses a string consisting of one or more chars satisfying the predicate function f. If this parser fails to parse at least one char, the generated error is not very helpful:

> run (many1Satisfy isLetter) "123";;
val it : ParserResult<string,unit> = Failure:
Error in Ln: 1 Col: 1
123
^
Unknown Error(s)

The problem here is that many1Satisfy can’t describe what chars the function predicate accepts. Hence, when you don’t use many1Satisfy as part of a combined parser that takes care of a potential error, you better replace it with many1SatisfyL, which allows you to describe the accepted input with a label (hence the “L”):

> run (many1SatisfyL isLetter "identifier") "123";;
val it : ParserResult<string,unit> = Failure:
Error in Ln: 1 Col: 1
123
^
Expecting: identifier

There are also labelled variants of other parsers and combinators, for example choiceL and notFollowedByL.

If there is no labelled parser variant or you want to replace a predefined error message, you can always use the labelling operator

val (<?>): Parser<'a,'u> -> string -> Parser<'a,'u>

The parser p <?> label behaves like p, except that the error messages are replaced with expectedError label if p does not change the parser state (usually because p failed).

For example, if FParsec didn’t provide many1SatisfyL, you could define it yourself as

let many1SatisfyL f label = many1Satisfy f <?> label

The labelling operator is particularly useful for producing error messages in terms of higher‐level grammar productions instead of error messages in terms of lower‐level component parsers. Suppose you want to parse a string literal with the following parser

let literal_ = between (pstring "\"") (pstring "\"")
                       (manySatisfy ((<>) '"'))

If this parser encounters input that doesn’t start with a double quote it will fail with the error message produced by the parser for the opening quote:

> run literal_ "123";;
val it : ParserResult<string,unit> = Failure:
Error in Ln: 1 Col: 1
123
^
Expecting: '"'

In situations like these an error message that mentions the aggregate thing you’re trying to parse will often be more helpful:

let literal = literal_ <?> "string literal in double quotes"
> run literal "123";;
val it : ParserResult<string,unit> = Failure:
Error in Ln: 1 Col: 1
123
^
Expecting: string literal in double quotes

Note that <?> only replaces the error message if the parser doesn’t consume input. For example, our literal parser won’t mention that we’re trying to parse a string literal if it fails after the initial double quote:

> run literal "\"abc def";;
val it : ParserResult<string,unit> = Failure:
Error in Ln: 1 Col: 9
"abc def
        ^
Note: The error occurred at the end of the input stream.
Expecting: '"'

With the compound labelling operator <??> you can make sure that the compound gets mentioned even if the parser fails after consuming input:

let literal = literal_ <??> "string literal in double quotes"
> run literal "\"abc def";;
val it : ParserResult<string,unit> = Failure:
Error in Ln: 1 Col: 1
"abc def
^
Expecting: string literal in double quotes

string literal in double quotes could not be parsed because:
  Error in Ln: 1 Col: 9
  "abc def
          ^
  Note: The error occurred at the end of the input stream.
  Expecting: '"'
Tip

If you don’t like the formatting of these error messages, you can write a custom formatter for your application. The data structure in which error messages are stored is easy to query and process. See the reference for the Error module.

The parsers we discussed so far in this chapter only generated Expected error messages, but FParsec also supports other type of error messages. For example, the notFollowedByL parser generates Unexpected error messages:

> run (notFollowedByL spaces "whitespace") " ";;
val it : ParserResult<unit,unit> = Failure:
Error in Ln: 1 Col: 1

^
Unexpected: whitespace

Error messages that don’t fit into the Expected and Unexpected categories can be produced with the fail and failFatally primitives:

let theory =
    charsTillString "3) " true System.Int32.MaxValue
     >>. (pstring "profit" <|> fail "So much about that theory ... ;-)")

let practice = "1) Write open source library 2) ??? 3) lot's of unpaid work"

> run theory practice;;
val it : ParserResult<string,unit> = Failure:
Error in Ln: 1 Col: 40
1) Write open source library 2) ??? 3) lot's of unpaid work
                                       ^
Expecting: 'profit'
Other error messages:
  So much about that theory... ;-)

If you can’t get the built‐in operators and parsers to produce the error message you need, you can always drop down one API level and write a special‐purpose parser combinator.

The following example shows how you can define a custom between combinator that includes the position of the opening delimiter as part of the error message that gets generated when the closing delimiter cannot be parsed.

let betweenL (popen: Parser<_,_>) (pclose: Parser<_,_>) (p: Parser<_,_>) label =
  let expectedLabel = expected label
  let notClosedError (pos: Position) =
     messageError (sprintf "The %s opened at %s was not closed."
                           label (pos.ToString()))
  fun (stream: CharStream<_>) ->
    // The following code might look a bit complicated, but that's mainly
    // because we manually apply three parsers in sequence and have to merge
    // the errors when they refer to the same parser state.
    let state0 = stream.State
    let reply1 = popen stream
    if reply1.Status = Ok then
      let stateTag1 = stream.StateTag
      let reply2 = p stream
      let error2 = if stateTag1 <> stream.StateTag then reply2.Error
                   else mergeErrors reply1.Error reply2.Error
      if reply2.Status = Ok then
        let stateTag2 = stream.StateTag
        let reply3 = pclose stream
        let error3 = if stateTag2 <> stream.StateTag then reply3.Error
                     else mergeErrors error2 reply3.Error
        if reply3.Status = Ok then
          Reply(Ok, reply2.Result, error3)
        else
          Reply(reply3.Status,
                mergeErrors error3 (notClosedError (state0.GetPosition(stream))))
      else
        Reply(reply2.Status, reply2.Error)
    else
      let error = if state0.Tag <> stream.StateTag then reply1.Error
                  else expectedLabel
      Reply(reply1.Status, error)

The behaviour of the betweenL combinator differs from that of the standard between combinator in two ways:

  • If popen fails without changing the parser state, betweenL popen p pclose label fails with expected label, just like between popen p pclose <?> label would have.
  • If pclose fails without changing the parser state, betweenL additionally prints the opening position of the compound.

The following tests demonstrate this behaviour:

let stringLiteral = betweenL (str "\"") (str "\"")
                             (manySatisfy ((<>) '"'))
                             "string literal in double quotes"
> run stringLiteral "\"test\"";;
val it : ParserResult<string,unit> = Success: "test"

> run stringLiteral "\"test";;
val it : ParserResult<string,unit> = Failure:
Error in Ln: 1 Col: 6
"test
     ^
Note: The error occurred at the end of the input stream.
Expecting: '"'
Other messages:
  The string literal in double quotes opened at (Ln: 1, Col: 1) was not closed.

> run stringLiteral "test";;
val it : ParserResult<string,unit> = Failure:
Error in Ln: 1 Col: 1
test
^
Expecting: string literal in double quotes
================================================ FILE: Doc/html/users-guide/debugging-a-parser.html ================================================ Debugging a parser

5.11 Debugging a parser

Debugging a parser implemented with the help of a combinator library has its special challenges. In particular, setting a breakpoint and stepping through the code is not as straightforward as in a regular recursive descent parser. Furthermore, stack traces can be difficult to decipher because of the ubiquitous use of anonymous functions.[1] However, with the help of the techniques we explain in this chapter, working around these issues should be easy.

5.11.1 Setting a breakpoint

Suppose you have a combined parser like

let buggyParser = pipe2 parserA parserB (fun a b -> ...)

and you would like to break into the debugger whenever buggyParser calls parserB. One thing you could try is to set a breakpoint at the beginning of parserB. However, that’s only possible if parserB is not itself a combined parser, and even then you still have the problem that your breakpoint is also triggered whenever parserB is called from any other place in your source. Similarly, a breakpoint you set in pipe2 will probably be triggered by many other parsers besides buggyParser.

Fortunately there’s a simple workaround if you can modify and recompile the code. Just define a wrapper function like the following

let BP (p: Parser<_,_>) stream =
    p stream // set a breakpoint here

Then redefine the buggy parser as

let buggyParser = pipe2 parserA (BP parserB) (fun a b -> ...)

If you now set a breakpoint at the body of the BP function, it will be triggered whenever parserB is called from buggyParser.

With such a wrapper it’s also easy define a precise conditional breakpoint. For example, if you only want to break once the parser has reached line 100 of the input file, you could use the breakpoint condition stream.Line >= 100.

By the way, you don’t need to set the breakpoint in the debugger. You can also write it directly into the code:

let BP (p: Parser<_,_>) (stream: CharStream<_>) =
    // this will execute much faster than a
    // conditional breakpoint set in the debugger
    if stream.Line >= 100L then
        System.Diagnostics.Debugger.Break()
    p stream
Note

There are some issues with setting breakpoints in or stepping into anonymous or curried F# functions in Visual Studio 2008. In Visual Studio 2010 many of these issues have been fixed.

If you’re using Visual Studio, don’t forget to switch on the “Suppress JIT optimization on module load” option in the Tools – Options – Debugging – General dialog. And, when possible, use a debug build (of FParsec) for debugging.

5.11.2 Tracing a parser

Occasionally you have a parser that doesn’t work as expected and playing around with the input or staring at the code long enough just isn’t enough for figuring out what’s wrong. In such cases the best way to proceed usually is to trace the execution of the parser. Unfortunately, stepping through the parser under a debugger can be quite tedious, because it involves stepping through long sequences of nested invocations of parser combinators. A more convenient approach often is to output tracing information to the console or a logging service.

A simple helper function for printing trace information to the console could like the following example:

let (<!>) (p: Parser<_,_>) label : Parser<_,_> =
    fun stream ->
        printfn "%A: Entering %s" stream.Position label
        let reply = p stream
        printfn "%A: Leaving %s (%A)" stream.Position label reply.Status
        reply

To demonstrate how you could use such a tracing operator, let’s try to debug the following buggy (and completely silly) parser:

let number = many1Satisfy isDigit

let emptyElement = pstring "[]" : Parser<_,unit>
let numberElement = pstring "[" >>. number .>> pstring "]"
let nanElement = pstring "[NaN]"

let element = choice [emptyElement
                      numberElement
                      nanElement] .>> spaces

let elements : Parser<_,unit> = many element

The following test run shows that the above parser is indeed buggy:

> run elements "[] [123] [NaN]";;
val it : ParserResult<string list,unit> = Failure:
Error in Ln: 1 Col: 11
[] [123] [NaN]
          ^
Unknown Error(s)

You probably don’t need trace information to figure out why the "NaN" bit of the string doesn’t get parsed, but let’s pretend you do. Obviously, there’s something wrong with the element parser. To find out what’s wrong, let’s decorate the element parser and all subparsers with the <!> operator and an appropriate label:

let number = many1Satisfy isDigit <!> "number"

let emptyElement  = pstring "[]"                           <!> "emptyElement"
let numberElement = pstring "[" >>. number .>> pstring "]" <!> "numberElement"
let nanElement    = pstring "[NaN]"                        <!> "nanElement"

let element = choice [emptyElement
                      numberElement
                      nanElement] .>> spaces <!> "element"

let elements  : Parser<_,unit> = many element

If you now run the parser on the same input as before, you get the following output:

> run elements "[] [123] [NaN]";;
(Ln: 1, Col: 1): Entering element
(Ln: 1, Col: 1): Entering emptyElement
(Ln: 1, Col: 3): Leaving emptyElement (Ok)
(Ln: 1, Col: 4): Leaving element (Ok)
(Ln: 1, Col: 4): Entering element
(Ln: 1, Col: 4): Entering emptyElement
(Ln: 1, Col: 4): Leaving emptyElement (Error)
(Ln: 1, Col: 4): Entering numberElement
(Ln: 1, Col: 5): Entering number
(Ln: 1, Col: 8): Leaving number (Ok)
(Ln: 1, Col: 9): Leaving numberElement (Ok)
(Ln: 1, Col: 10): Leaving element (Ok)
(Ln: 1, Col: 10): Entering element
(Ln: 1, Col: 10): Entering emptyElement
(Ln: 1, Col: 10): Leaving emptyElement (Error)
(Ln: 1, Col: 10): Entering numberElement
(Ln: 1, Col: 11): Entering number
(Ln: 1, Col: 11): Leaving number (Error)
(Ln: 1, Col: 11): Leaving numberElement (Error)
(Ln: 1, Col: 11): Leaving element (Error)
val it : ParserResult<string list,unit> = Failure:
Error in Ln: 1 Col: 11
[] [123] [NaN]
          ^
Unknown Error(s)

This trace log clearly reveals that the element parser failed because the numberElement parser failed after consuming the left bracket and thus the choice parser never got to try the the nanElement parser. Of course, this issue could be easily avoided by factoring out the bracket parsers from the emptyElement, numberElement and nanElement parsers. Also, if we had used many1SatisfyL instead of manySatisfy for the number parser, we would have gotten an error message more descriptive than “Unknown error(s)” (see the chapter on customizing error messages).

Footnotes:
[1] Although, debugging a parser written with a combinator library is often still easier than debugging one generated by an opaque parser generator tool.
================================================ FILE: Doc/html/users-guide/index.html ================================================ User’s Guide

5 User’s Guide

This user’s guide is an in‐depth introduction to parsing with FParsec. It explains how Parser functions work, covers the most important parser combinators in detail, explains how you can customize error messages, and discusses some important practical aspects of parser writing, such as debugging and performance optimizations.

The aim of this user’s guide is to prepare you for writing “real world” parser applications with FParsec. It doesn’t try to cover every feature of the library, but focuses on covering the core concepts such that you can gain a deep understanding of the library design.

Although there is some overlap between the tutorial and this user’s guide, it’s probably a good idea to read the tutorial first, since it will give you a quick overview of the library that will later help you put things into perspective. You might also want to experiment with some small parsers before you start reading the user’s guide, or maybe in parallel to reading it, so that it becomes easier for you to relate the dry technical discussions to exciting practical applications ☺

The first seven chapters of this user’s guide build on each other. The remaining chapters are rather independent and can be read in any order.

================================================ FILE: Doc/html/users-guide/internals-of-a-simple-parser-function.html ================================================ Internals of a simple Parser function

5.3 Internals of a simple Parser function

In the beginning of this user’s guide we noted that asciiLower “parses” a lower case ASCII char and that skipString “skips” over a string, but we haven’t yet explained what it actually means for a Parser function to “parse” a letter or “skip” a string. That’s what we will do in this chapter. To explain how Parser functions work, we will discuss the implementation of a simple string parser. This also gives us the opportunity to explain some important details about the Reply and CharStream types.

5.3.1 The code

The parser whose implementation we will discuss in this chapter is

val stringReturn: string -> 'a -> Parser<'a,'u>

Like skipString str the parser stringReturn str result skips over the string str, but it returns result as part of its reply value, instead of the unit value () that skipString str returns. This makes stringReturn a bit more general than skipString. Indeed, the two library parsers pstring and skipString are actually implemented with the help of stringReturn. For example, skipString str is defined as stringReturn str ().

A simplified version[1] of the actual implementation of stringReturn in the library is

let stringReturn str result : Parser<_,_> =                                // 1
    checkStringContainsNoNewlineChar str "pstring/skipString/stringReturn" // 2
    let error = expectedString str                                         // 3
    fun stream ->                                                          // 4
        if stream.Skip(str) then                                           // 5
            Reply(result)                                                  // 6
        else                                                               // 7
            Reply(Error, error)                                            // 8

Let’s start with the general structure of this implementation: We define a function stringReturn with two parameters that returns a function closure. The type annotation : Parser<_,_> on line 1 fixes the type of the returned function closure to Parser<'a,'u> and in particular constrains the type of its argument to CharStream<'u>. Remember, the type Parser<'a,'u> is simply an abbreviation for CharStream<'u> -> Reply<'a>, where 'a represents the result type and 'u the user state type.

Implementing our parameterized parser as a function returning a parser closure allows us to factor out common setup work that only needs to be done once for every parser.[2] In this case we only need to check once (line 2) whether the string contains a newline char, i.e. '\r' or '\n', (we’ll explain below why this is necessary) and in line 3 we preconstruct the error message that is later used whenever the parser is applied and doesn’t find str in the input (we’ll write more about error messages in later chapters).

The actual parsing logic is completely straightforward: On line 5 the parser calls the CharStream’s Skip method with the argument str. If the next chars in the stream match str, Skip advances the stream’s position by the length of the passed string and returns true; otherwise, it doesn’t change the position of the stream and returns false. Thus, if the string is skipped, the parser returns with a Reply value containing the result (line 6). Otherwise, it returns a Reply with the preconstructed error message (line 8).

5.3.2 The Reply type

This is a good time to discuss the Reply type in a little more detail.

type Reply<'TResult> = struct
  new: 'TResult -> Reply<'TResult>
  new: ReplyStatus * ErrorMessageList -> Reply<'TResult>
  new: ReplyStatus * 'TResult * ErrorMessageList -> Reply<'TResult>

  val mutable Status: ReplyStatus
  /// If Status <> Ok then the Result value is undefined and may be null.
  val mutable Result: 'TResult
  val mutable Error: ErrorMessageList
end

Similar to a tuple, a Reply can be seen as an aggregate of it three fields: Status, Result and Error. The Status field contains a ReplyStatus enum value indicating whether the parser succeeded (Ok) or failed (Error or FatalError). By returning a FatalError instead of an Error a parser can signal that no error recovery should be tried (except through backtracking mechanisms, which we explain later). If the Status is Ok, the Result field contains the parser result; otherwise, its value is undefined (and null). The Error field holds a list of error messages in the form of an ErrorMessageList value. An empty ErrorMessageList is represented as a null value.

The 1‐argument constructor we use in line 6 sets the status to Ok and the result value to result. The 2‐argument constructor we use in line 8 sets the status to Error and the error message to error. The Reply type also defines a 3‐argument constructor, which simply sets the fields to the respective argument values. The default valuetype constructor with 0 arguments initializes the Reply value to Reply(Error, null).

The error messages returned in the Reply value implicitly refer to the current stream position. Since the ErrorMessage values stored in the ErrorMessageList do not themselves contain an error position, they can only be interpreted together with the position of the CharStream as it is when the parser returns.

5.3.3 The parser state and the line and column count

Usually one CharStream<'u> instance is created per input file and all parser functions involved in parsing elements of the same file are passed the same CharStream instance. Since calling the methods of a CharStream may change its state, parser functions have to be careful about when and how they change the CharStream state, because it obviously may affect all parsers subsequently called.

In the example above, stringReturn only advances the stream position when it succeeds. This makes it an atomic string parser, because it does not consume input if only the beginning of the argument string matches the input. Whether or not a parser consumes input before it fails has important implications for the error handling, as we will discuss later in this user’s guide.

Except for the freely customizable UserState, all the mutable state information in the CharStream<'u> instance pertains to the location of the next char in the text stream. The most important element of the state is the char Index, which uniquely identifies the UTF‐16 char in the stream. In addition to the index of the next char, the CharStream also keeps track of char’s Line number and the index of the first char in the line, the LineBegin. By combining the Index and LineBegin we can calculate a Column. The CharStream’s Name serves as a description or identifier for the stream.

Only the char index is strictly necessary for the core stream functionality. We also store the other pieces of state information in a CharStream<'u> instance because having all parser state information in one place reduces complexity and allows us to expose a more convenient API to Parser functions.

Note

The CharStream<'u>.State property returns a snapshot of all the mutable state components in the form of a CharStreamState<'u> value.

The state information that is exposed through the CharStream<'u>.State property is all the state that is tracked by FParsec parsers, which is why we also refer to it as the parser state.[3]

Ideally, the CharStream class would keep track of the column and line count in a completely automated fashion. Ideally, the CharStream class would give the user a way to freely specify the recognized set of newline character sequences and all CharStream methods then would automatically detect such newlines in the input. Unfortunately, such a configuration option would be difficult to implement efficiently and would likely have a severe impact on performance (at least in comparison to the hard‐coded alternative, and with the current language and compiler support).

Since the CharStream can’t provide automatic support for all possible notions of a newline, it exposes two sets of methods in its interface. One set provides the basic stream operations, such as skipping a certain number of UTF‐16 chars or matching a string with the stream content. These methods come without any automatic newline detection, but they offer optimal performance and give the user complete freedom to manually register any kind of newline. The other set of methods provides some frequently needed higher‐level text operations, such as skipping over a sequence of whitespace chars or reading a sequence of chars satisfying a given predicate function. These other methods automatically detect any of the 3 standard newline char sequences "\n", "\r\n" and "\r", because that’s the notion of a newline used by most text applications. In combination both sets of methods cover the needs of a majority of text parsers in a convenient and efficient manner.

Note

Maybe you wonder why we don’t just leave the line and column count completely to the user instead of complicating the CharStream API. The reason we keep track of a line count in the CharStream class is that most non‐trivial text‐parsing applications require a line count for error reporting purposes. Implementing it at a relatively low API level brings significant performance advantages and relieves higher‐level API users from constantly having to code around the special case of newline chars.

If you have a look at the reference documentation for CharStream, you’ll see that the CharStream methods that automatically detect newlines are easily discernible by their name. The Skip method we used in the above example does not belong to these methods, which is why we have to make sure in line 2 that the string doen’t contain any newlines. In practice one hardly ever uses a parser like stringReturn with a string containing a newline, hence lifting this restriction wouldn’t be worth the effort, especially since simple workarounds are available.[4]

Footnotes:
[1] The library version is a bit more complicated because it contains optimized paths for argument strings with only 1 or 2 chars.
[2] Even parsers without a parameter, like e.g. asciiLower, are actually compiled as properties returning a new function object every time they are called. This is because the user state type variable makes asciiLower generic, while function values can only have a non‐generic type.
[3] Strictly speaking, a CharStream<'a> instance has a little more publically observable mutable state than the one that is also exposed through the State property. For example, the MinRegexSpace configuration parameter is not tracked in the State parameter. Another example is the value of the IndexOfLastCharPlus1 property which changes once the last char of the stream is detected. However, there shouldn’t be a reason that a parser needs to restore the old values of these properties upon backtracking, so we just treat these properties as constant and ignore them when we discuss the mutable CharStream state.
[4] For example, stringReturn "str1\nstr2" result can be replaced with attempt (skipString "str1" >>. newline >>. stringReturn "str2" result).
================================================ FILE: Doc/html/users-guide/looking-ahead-and-backtracking.html ================================================ Looking ahead and backtracking

5.7 Looking ahead and backtracking

5.7.1 Backtracking

Sometimes you need more than the default one token look‐ahead of <|>, either because it really can’t be avoided or because avoiding it would be too inconvenient. In those instances you can use one of the combinators attempt, >>?, .>>? or >>=? to force a parser to backtrack after an error.

The attempt combinator

val attempt: Parser<'a,'u> -> Parser<'a,'u>

takes a parser as the argument and returns a wrapped parser that behaves exactly like the argument, except that if the argument parser fails with an output state different from the input state or with a fatal error, the wrapped parser will backtrack to the original input state and report a non‐fatal error.

You can observe the effect of the attempt combinator in the following error message:

> run (attempt (pstring "a" >>. pstring "b")) "ac";;
val it : ParserResult<string,unit> = Failure:
Error in Ln: 1 Col: 1
ac
^

The parser backtracked after:
  Error in Ln: 1 Col: 2
  ac
   ^
  Expecting: 'b'

The next example demonstrates the effect of attempt on the choice combinator.

let str s = pstring s
let ab = str "a" .>>. str "b"
let ac = str "a" .>>. str "c"

Without attempt the following test produces an error:

> run (ab <|> ac) "ac";;
val it : ParserResult<(string * string),unit> = Failure:
Error in Ln: 1 Col: 2
ac
 ^
Expecting: 'b'

By introducing attempt we allow the <|> combinator to recover from the error in the first branch:

> run ((attempt ab) <|> ac) "ac";;
val it : ParserResult<(string * string),unit> = Success: ("a", "c")

Sometimes it can be a disadvantage that attempt will trigger backtracking after any error returned by the argument parser, no matter how much content the parser has consumed. Consider for example a parser like prefix >>. expr, where expr is a parser for a potentially large and deeply nested expression. If you wrap this parser with attempt then the wrapped parser will not only backtrack if an error occurs within the prefix or directly after the prefix, but also if it occurs anywhere in the expression. However, in most cases you only want the parser to backtrack if the error occurs directly after the prefix, not if the error occurs deeply inside the expression parser. For situations like this FParsec defines the >>?, .>>?, .>>.? and >>=? operators.

The >>? combinator

val (>>?):  Parser<'a,'u> -> Parser<'b,'u> -> Parser<'b,'u>

behaves like the >>. operator, except that p1 >>? p2 will backtrack to the beginning if p2 fails with a non‐fatal error and without changing the parser state, even if p1 has changed the parser state. Similarly, .>>?, .>>.? and >>=? behave like .>>, .>>. and >>=, except that they will backtrack to the beginning if the second parser fails with a non‐fatal error and without changing the parser state

The following tests illustrate the differences between backtracking implemented via attempt and .>>.?.

let bInBrackets = str "[" >>. str "b" .>> str "]"

A test with attempt on the left side of <|>:

> run ((attempt (str "a" .>>. bInBrackets)) <|> ac) "a[B]";;
val it : ParserResult<(string * string),unit> = Failure:
Error in Ln: 1 Col: 2
a[B]
 ^
Expecting: 'c'

A test with attempt on both sides of <|>:

> run ((attempt (str "a" .>>. bInBrackets)) <|> attempt ac) "a[B]";;
val it : ParserResult<(string * string),unit> = Failure:
Error in Ln: 1 Col: 1
a[B]
^

The parser backtracked after:
  Error in Ln: 1 Col: 2
  a[B]
   ^
  Expecting: 'c'

The parser backtracked after:
  Error in Ln: 1 Col: 3
  a[B]
    ^
  Expecting: 'b'

A test with .>>.? instead of attempt on the left side of <|>:

> run (str "a" .>>.? bInBrackets <|> ac) "a[B]";;
val it : ParserResult<(string * string),unit> = Failure:
Error in Ln: 1 Col: 3
a[B]
  ^
Expecting: 'b'

You can of course chain multiple of the >>? and .>>? operators to backtrack longer distances, like in prefix1 >>? prefix2 >>? p .>>? postfix.

Note

When implementing backtracking parsers you should generally prefer the >>?, .>>? and .>>.? combinators to the attempt combinator, because the former combinators offer finer control over the exact backtracking behaviour and hence will often lead to better error reporting. Note however that neither can completely replace the other.

Backtracking combinators can also be useful when parsing sequences. In the chapter “Parsing sequences” we briefly discussed the following example:

let ws = spaces
let str s = pstring s
let numberInBrackets = str "[" >>. pint32 .>> str "]" .>> ws
> run (many numberInBrackets >>. str "[c]") "[1] [2] [c]";;
val it : ParserResult<string,unit> = Failure:
Error in Ln: 1 Col: 10
[1] [2] [c]
         ^
Expecting: integer number (32-bit, signed)

The problem here is that the argument parser to many fails after consuming input if it encounters a bracket that is not followed by a digit. If we decided that this is a defect of the parser as opposed to the grammar, we could fix it by simply replacing a >>. with >>?.

let numberInBrackets = str "[" >>? pint32 .>> str "]" .>> ws
> run (many numberInBrackets .>> str "[c]") "[1] [2] [c]";;
val it : ParserResult<int32 list,unit> = Success: [1; 2]

A similar example is the sepEndBy1 combinator for parsing a sequence of one or more elements separated and optionally ended by a separator. If FParsec didn’t provide this combinator, you could define it yourself using many and >>?:

let sepEndBy1_ p sep =
    pipe2 p (many (sep >>? p)) (fun hd tl -> hd::tl) .>> opt sep

The following tests show that our sepEndBy1 replacement works as expected:

> run (sepEndBy1_ pint32 (str ";")) "1;2;3";;
val it : ParserResult<int32 list,unit> = Success: [1; 2; 3]
> run (sepEndBy1_ pint32 (str ";")) "1;2;3;";;
val it : ParserResult<int32 list,unit> = Success: [1; 2; 3]

Note however that in contrast to sepEndBy1_ the version of sepEndBy1 provided by FParsec doesn’t need to parse the separator twice when it terminates a sequence.

5.7.2 Parser predicates

The backtracking combinators allow you to “look ahead” by tentatively parsing input and then backtracking if an error occurs. However, they don’t allow you to conditionally parse the input with one parser depending on the success or failure of another parser. This is what the following two combinators are for:

val followedBy:    Parser<'a,'u> -> Parser<unit,'u>
val notFollowedBy: Parser<'a,'u> -> Parser<unit,'u>

The parser followedBy p (notFollowedBy p) succeeds without changing the parser state if p succeeds (fails) when applied at the current position.

For example, both the following parser definitions only parse positive integer literals without a leading zero:

let p1 = followedBy    (satisfy ((<>) '0')) >>. pint32
let p2 = notFollowedBy (pstring "0")        >>. pint32

Both definitions will correctly parse "123" and fail to parse "01":

> run p1 "123";;
val it : ParserResult<int32,unit> = Success: 123
> run p1 "01";;
val it : ParserResult<int32,unit> =  Failure:
Error in Ln: 1 Col: 1
01
^
Unknown Error(s)
> run p2 "123";;
val it : ParserResult<int32,unit> = Success: 123
> run p2 "01";;
val it : ParserResult<int32,unit> = Failure:
Error in Ln: 1 Col: 1
01
^
Unknown Error(s)

While both parsers work as expected, the generated error messages aren’t very helpful. The problem is that followedBy and notFollowedBy can’t generate better error messages, because they don’t know what kind of input their argument parsers accept.[1] To improve the error messages you can either use the “labeled” combinator variants followedByL and notFollowedByL or you could use the labelling operator <?> that we will discuss in the next chapter.

For example:

> run (followedByL (satisfy ((<>) '0')) "positive int w/o leading 0" >>. pint32)
      "01";;
val it : ParserResult<int32,unit> =  Failure:
Error in Ln: 1 Col: 1
01
^
Expecting: positive int w/o leading 0

> run (followedBy (satisfy ((<>) '0')) >>. pint32 <?> "positive int w/o leading 0")
      "01";;
val it : ParserResult<int32,unit> = Failure:
Error in Ln: 1 Col: 1
01
^
Expecting: positive int w/o leading 0

> run (notFollowedByL (pstring "0") "'0'" >>. pint32) "01";;
val it : ParserResult<int32,unit> = Failure:
Error in Ln: 1 Col: 1
01
^
Unexpected: '0'

The parser notFollowedByL (pstring "0") "'0'" from the last example could actually be simplified to notFollowedByString "0", which uses the specialized parser predicate notFollowedByString. In table 6.1.9 you’ll find an overview of all available parser predicates.

A frequent application for the notFollowedBy predicate are sequence parsers similar to many (notFollowedBy pEnd >>. p) .>> pEnd. If you are writing such a parser, you should check whether you can replace it with an application of one of the manyTill parsers. Please consult the reference for more details.

Before we conclude this chapter we want to emphasize that you’re not limited to the built‐in (backtracking) combinators of FParsec. A great advantage of FParsec is the simplicity with which you can write custom combinators using the low‐level API.

For example, you could define a combinator that backtracks if the result of the argument parser doesn’t satisfy a predicate function:

let resultSatisfies predicate msg (p: Parser<_,_>) : Parser<_,_> =
    let error = messageError msg
    fun stream ->
      let state = stream.State
      let reply = p stream
      if reply.Status <> Ok || predicate reply.Result then reply
      else
          stream.BacktrackTo(state) // backtrack to beginning
          Reply(Error, error)

With this combinator you could conveniently define a parser for positive ints:

let positiveInt = pint32 |> resultSatisfies (fun x -> x > 0)
                                            "The integer must be positive."
> run positiveInt "1";;
val it : ParserResult<int32,unit> = Success: 1
> run positiveInt "-1";;
Error in Ln: 1 Col: 1
-1
^
The integer must be positive.
Footnotes:
[1] In the case of notFollowedBy p the problem is clear: notFollowedBy p fails if p succeeds and when p succeeds, p doesn’t generate an error message that notFollowedBy could reuse. In the case of followedBy p the situation is different: followedBy p fails if p fails, so followedBy could try to reuse the error messages generated by p. However, the error messages generated by the argument parser will in practice often not suffice to explain what kind of input is expected. So, for reasons of consistency and performance, followedBy doesn’t even try to reuse the error messages generated by the argument parser.
================================================ FILE: Doc/html/users-guide/parser-functions.html ================================================ Parser functions

5.1 Parser functions

An FParsec parser is a function that reads input from a text stream. When it succeeds, it returns a result value (e.g. a parsed number or an AST node); when it fails, it returns error messages describing what went wrong.

The following type abbreviation from the Primitives module defines the basic type of parser function supported throughout the FParsec library:

type Parser<'Result,'UserState> = CharStream<'UserState> -> Reply<'Result>

As you can see from this definition, parser functions only accept a single argument: a CharStream<'UserState> instance. The CharStream class is FParsec’s specialized stream type for “text” streams, i.e. streams of Unicode chars. A CharStream can either be created directly from a string or it can be created from a file path or System.IO.Stream. In the latter cases the CharStream will take care of decoding the binary input into UTF‐16 chars, similar to what a System.IO.StreamReader does. What separates CharStream from the StreamReader and similar classes is that it comes with some advanced features that make it especially suitable for backtracking parser applications.

We will discuss the purpose of the 'UserState type in more detail in later chapters. For now it’s enough to note that the user state is a user‐definable component of the CharStream state. If you don’t need a user state, you will normally define 'UserState to be unit. To save some key strokes and screen real estate, we usually abbreviate 'UserState as 'u.

The Reply<'Result> value returned from a parser function is a a simple value type container for the parser result and possible error messages. It contains a status field indicating whether the parser succeeded or not, a field for the result value (of type 'Result) and a field with a possibly empty list of error messages. We will explain these fields in more details in section 5.3.

A very basic example of a parser is the asciiLower parser from the CharParsers module:

val asciiLower: Parser<char,'u>

It parses any lower case ASCII char, i.e. any char in the range 'a''z', and, if successful, returns the parsed char as part of its reply.

Many predefined parsers expect one or more parameter values as arguments. Take for instance the skipString function:

val skipString: string -> Parser<unit,'u>

It takes a string as an argument and returns a parser that skips over this (and only this) string in the input.

Note

Implementing parser grammars with FParsec usually means composing parsers for higher‐level grammar rules from parsers for lower‐level rules. You start with simple parsers for the leaf nodes of your grammar and then work your way up step‐by‐step until you eventually obtain a parser for the complete grammar. The simple representation of parsers as functions makes this composition particularly easy and allows for a straightforward and intuitive implementation of the library primitives.

================================================ FILE: Doc/html/users-guide/parsing-alternatives.html ================================================ Parsing alternatives

5.6 Parsing alternatives

FParsec’s main operator for trying to parse input with alternative parsers is

val (<|>): Parser<'a,'u> -> Parser<'a,'u> -> Parser<'a,'u>

This operator implements a form of prioritized choice: it only tries to parse input with the second parser if the first parser fails.

The following example illustrates this behaviour:

type Char = AsciiChar of char
          | Char of char

let asciiLetter = asciiLetter |>> AsciiChar
let letter = letter |>> Char
> run (asciiLetter <|> letter) "a";;
val it : ParserResult<Char,unit> = Success: AsciiChar 'a'
> run (letter <|> asciiLetter) "a";;
val it : ParserResult<Char,unit> = Success: Char 'a'
> run (asciiLetter <|> letter) "ä";;
val it : ParserResult<Char,unit> = Success: Char 'ä'

The prioritized choice also implies that FParsec doesn’t enforce a longest‐match rule like in regular expressions:

> run (pstring "a" <|> pstring "ab") "ab";;
val it : ParserResult<string,unit> = Success: "a"

If you want to accept more than two alternatives, you can either chain multiple <|> operators, like in p1 <|> p2 <|> p3, or you can use the choice combinator, which accepts a sequence of parsers as the argument, like in choice [p1; p2; p3]. In both cases the argument parsers are tried from left to right until a parser succeeds.

A good understanding of the <|> operator is important for productively working with FParsec, so let’s have a look at its implementation:

let (<|>) (p1: Parser<'a,'u>) (p2: Parser<'a,'u>) : Parser<'a,'u> =
    fun stream ->
        let stateTag = stream.StateTag
        let mutable reply = p1 stream
        if reply.Status = Error && stateTag = stream.StateTag then
            let error1 = reply.Error
            reply <- p2 stream
            if stateTag = stream.StateTag then
                reply.Error <- mergeErrors reply.Error error1
        reply

As you can see, the parser p1 <|> p2 works as follows: First, it applies the parser p1 to the input stream. If p1 succeeds, the reply of p1 is returned. If p1 fails with a non‐fatal error (i.e. with the status Error, not FatalError) and without changing the parser state, the parser p2 is applied. If p2 does not change the parser state, the error messages from both parsers are merged. (We compare the StateTag values instead of the actual parser states for optimization reasons, see section 5.4.3.)

The most important point to note here is that p1 <|> p2 will always return with the reply of p1 if p1 changes the parser state, even if p1 eventually fails. Remember that the stream position is part of the parser state, so if p1 fails after consuming input, p2 will not be applied. Since a parser usually consumes input as soon as it can accept at least one atomic token from the input, this means that p1 <|> p2 by default implements backtracking with only a “one token look‐ahead”.

Consider the following example:

let parserA = spaces >>. pstring "a"
let parserB = spaces >>. pstring "b"
run (parserA <|> parserB) " b";;
> run (parserA <|> parserB) " b";;
val it : ParserResult<string,unit> = Failure:
Error in Ln: 1 Col: 2
 b
 ^
Expecting: 'a'

The combined parser fails because parserA fails after consuming the whitespace, so that parserB never gets tried.

Of course, this simple parser could be easily fixed by factoring out the common prefix:

> run (spaces >>. (pstring "a" <|> pstring "b")) " b";;
val it : ParserResult<string,unit> = Success: "b"

The restriction of the look‐ahead in p1 <|> p2 may strike you as odd at first, but it has two big advantages:

  1. The error reporting is simplified and error messages are easier to understand because terminal errors can only occur at one position at a time.
  2. Parser developers are guided towards more efficient grammar implementations because parsers requiring more than a one token look‐ahead need to be explicitly annotated with the attempt or >>? combinators (see the next chapter).[1]
Footnotes:
[1] In case you’re wondering: No, we’re not trying to sell a design limitation as a feature here. In Parsec, the Haskell library on which FParsec’s design was originally based, the limited look‐ahead is essential for the library design, because it allows Parsec to exploit Haskell’s laziness in order to ensure space efficiency. FParsec has a different implementation in which the limited look‐ahead has no effect on space efficiency. We stick to the limited look‐ahead because we think it’s the appropriate default behaviour for a parser combinator library like FParsec. Now, admittedly, if FParsec could automatically optimize the implementation of a parser in a way that minimized backtracking, e.g. by automatically left‐factoring grammars, then backtracking would be less of a problem and a different default behaviour might become more attractive.
================================================ FILE: Doc/html/users-guide/parsing-sequences.html ================================================ Parsing sequences

5.5 Parsing sequences

In the previous chapter we discussed various ways to sequentially apply two or more parsers. In this section we will explain how to repeatedly apply the same parser in order to parse a sequence with an arbitrary number of elements.

5.5.1 The many parser

In regular expressions and many grammar formalisms a Kleene Star marks a parser rule that can be repeatedly applied. For example, number* could represent a sequence of zero or more numbers.

In FParsec the many combinator takes the place of the Kleene Star:

val many: Parser<'a,'u> -> Parser<'a list, 'u>

With many the number example could be translated into the following FParsec code:

let ws = spaces
let number = pint32 .>> ws
> run (many number) "1 2 3 4";;
val it : ParserResult<int32 list,unit> = Success: [1; 2; 3; 4]

The parser many p repeatedly applies the parser p until p fails, i.e. it “greedily” parses as many occurrences of p as possible. The results of p are returned as a list in the order of occurrence.

At the end of a sequence parsed with many p the argument parser p must fail without consuming input (or changing the parser state in any other way). When p fails after consuming input, many p fails with the error returned by p.

The following example illustrates this behaviour:

let ws = spaces
let str s = pstring s
let numberInBrackets = str "[" >>. pint32 .>> str "]" .>> ws

The many numberInBrackets parser successfully parses the first two numbers in this test run:

> run (many numberInBrackets .>> str "(c)") "[1] [2] (c)";;
val it : ParserResult<int32 list,unit> = Success: [1; 2]

However, the same parser fails while trying to parse the 3rd number in this test run:

> run (many numberInBrackets >>. str "[c]") "[1] [2] [c]";;
val it : ParserResult<string,unit> = Failure:
Error in Ln: 1 Col: 10
[1] [2] [c]
         ^
Expecting: integer number (32-bit, signed)

The many parser failed here because the numberInBrackets parser failed after consuming input. In the chapter on looking ahead and backtracking we’ll come back to this example and discuss how you can modify the numberInBrackets parser such that it fails without consuming input if an opening bracket is not followed by a number.[1]

Since many p continues until p fails, you have to be a little careful not to supply an argument parser p that can succeed without consuming input. The following example shows what happens if you accidentally supply such an argument parser:

> run (many (many digit .>> ws)) "123 456";;
System.InvalidOperationException: (Ln: 1, Col: 8): The combinator 'many' was
applied to a parser that succeeds without consuming input and without changing
the parser state in any other way. (If no exception had been raised, the
combinator likely would have entered an infinite loop.)
   (... stack trace ...)
Stopped due to error

The problem here is that many digit .>> ws will succeed without changing the parser state if it can’t parse any digits or trailing whitespace. Thus, if the combined parser hadn’t have thrown an exception, it would have entered an infinite loop at the end of the input.

We can easily avoid the error in the last example by requiring the inner parser to consume at least one digit. Instead of many digit, which succeeds with an empty list if can’t parse any digits, we can use many1 digit, which fails if it can’t parse at least one digit:

> run (many (many1 digit .>> ws)) "123 456";;
val it : ParserResult<char list list,unit> =
  Success: [['1'; '2'; '3']; ['4'; '5'; '6']]

Before we continue, we should point out that an example like many1 digit is somewhat artificial, because you hardly ever want to parse digit chars into a list. If you want to parse numbers, one of the number parsers is usually the best way forward. If you actually need the individual chars, you normally need them as a string, not as a list.

Tip

If you want to parse a sequence of chars, you should generally prefer one of the specialized string parsers.

If you just want to skip over a sequence and don’t need the list of parser results, you can use the optimized combinators skipMany or skipMany1.

5.5.2 sepBy and sepEndBy

Often the elements of a sequence are separated by some separator. A convenient way to parse such sequences are the sepBy and sepEndBy combinators.

sepBy p sep parses a sequence of p separated by sep and returns the results in a list. sepEndBy parses a sequence of p separated and optionally ended by sep.

With these combinators you could for example define the following two parsers for a semicolon‐separated list of numbers in brackets:

let str s = pstring s
let sepList    = between (str "[") (str "]") (sepBy    pint32 (str ";"))
let sepEndList = between (str "[") (str "]") (sepEndBy pint32 (str ";"))

The sepList parser only accepts lists where the semicolons only occur between two numbers:

> run sepList "[]";;
val it : ParserResult<int32 list,unit> = Success: []
> run sepList "[1;2;3]";;
val it : ParserResult<int32 list,unit> = Success: [1; 2; 3]
> run sepList "[1;2;3;]";;
val it : ParserResult<int32 list,unit> = Failure:
Error in Ln: 1 Col: 8
[1;2;3;]
       ^
Expecting: integer number (32-bit, signed)

The sepEndList parser also accepts a terminating semicolon:

> run sepEndList "[1;2;3]";;
val it : ParserResult<int32 list,unit> = Success: [1; 2; 3]
> run sepEndList "[1;2;3;]";;
val it : ParserResult<int32 list,unit> = Success: [1; 2; 3]

Like for the many combinator, there are also variants of the sepBy and sepEndBy parsers that require at least one element in the sequence and/or skip over a sequence without returning the results. Have a look at the parser overview.

5.5.3 Parsing a sequence without creating a list

If you want to parse a sequence and you don’t need the results as an F# list, you can avoid the allocation of a temporary list by defing a custom sequence parser using the inline helper methods Inline.Many and Inline.SepBy.

For example, if you wanted to define a variant of many that parses the elements directly into a ResizeArray, i.e. a System.Collections.Generic.List, you could use the following definition:

let manyRA p =
  // the compiler expands the call to Inline.Many to an optimized sequence parser
  Inline.Many(elementParser = p,
              stateFromFirstElement = (fun x0 ->
                                         let ra = ResizeArray<_>()
                                         ra.Add(x0)
                                         ra),
              foldState = (fun ra x -> ra.Add(x); ra),
              resultFromState = (fun ra -> ra),
              resultForEmptySequence = (fun () -> ResizeArray<_>()))

A test run:

> run (manyRA (pint32 .>> spaces)) "1 2 3";;
val it : ParserResult<System.Collections.Generic.List<int32>,unit> =
  Success: seq [1; 2; 3]

The reference documentation for the Inline class contains some more examples.

Footnotes:
[1]

many doesn’t automatically backtrack when the argument parser fails after changing the parser state for two reasons:

  • In most situations automatic backtracking would only obscure error messages, because the reported input error was indeed severe and backtracking would only trigger secondary error messages that detract from the main error.
  • In the few instances where you rely on backtracking behaviour you can easily introduce it using the combinators detailed in section 5.7. Marking the occasions where you rely on backtracking with these combinators makes your parser implementations easier to debug and optimize.
================================================ FILE: Doc/html/users-guide/parsing-with-user-state.html ================================================ Parsing with user state

5.9 Parsing with user state

Each CharStream<'u> holds a value of the freely definable user state type 'u. In previous chapters we just ignored the user state and always assumed 'u to be unit. In this section we finally get to discuss the purpose of the user state and how you can use it in your parsers.

5.9.1 Overview

The user state allows you to introduce additional variables into the state tracked by FParsec parsers. It has the following two important properties:

  • The user state is stored in the CharStream<'u> instance and hence associated with the input. It is not shared globally and not associated with particular parser instances. The same parser instances can be concurrently applied to different CharStream<'u> instances with different user state instances.
  • The user state is tracked by FParsec parsers together with the input stream position. This means in particular that a parser restores the previous user state value when it backtracks.
Important

If you want changes to the user state to be undone during backtracking, you must change the user state by assigning a new value to the user state, not by mutating an existing user state value.

With the help of the user state you can implement context sensitive parsers, i.e. parsers whose behaviour not only depends on the immediate input but also on the context of the input. In general this works as follows:

  1. You establish a context by defining variables in the user state.
  2. You update the context depending on the input by letting parsers update the user state.
  3. You parse input depending on the context by making the parser behaviour dependent on the user state variables.

The user state is exposed through the UserState property of the CharStream<'u>. You can implement parsers using the low‐level API that directly access this property, or you can use the following parser primitives from the CharParsers module:

The next section contains an example employing updateUserState to change the user state and userStateSatisfies to check for parser preconditions.

5.9.2 Recursive grammars with nesting restrictions

An important area of application for context sensitive parsers are recursive grammars where certain grammar elements cannot nest within others or where grammar elements need to be parsed differently depending on the nesting context.

Consider for example a textual markup languages like HTML. Many such markup languages support various “inline tags” to annotate text in a paragraph. Usually these inline tags can nest arbitrarily, except for a few tags with special restrictions. One of these restrictions often is that hyperlinks must not contain hyperlinks, even though they can contain any other inline content. Other restrictions may apply to elements allowed in superscript text or footnotes. A convenient way to enforce such restrictions during parsing is to introduce variables into the user state that keep track of the nesting context. The following example demonstrates this approach.[1]

The following parser for a tiny markup‐language employs the user state

  1. to ensure that nested hyperlinks are not accepted and
  2. to parse potentially nested quotations between matching pairs of '\'' or '\"' chars.
open FParsec

type Element = Text of string
             | Bold of Element list
             | Italic of Element list
             | Url of string * Element list
             | Quote of char * Element list

type UserState =
    {InLink: bool
     QuoteStack: char list}
    with
       static member Default = {InLink = false; QuoteStack = []}

let ws    = spaces
let ws1   = spaces1
let str s = pstring s

let elements, elementsR = createParserForwardedToRef()

let text = many1Satisfy (isNoneOf "<>'\"\\") |>> Text
let escape = str "\\" >>. (anyChar |>> (string >> Text))

let quote (q: char) =
  let pq = str (string q)

  let pushQuote =
      updateUserState (fun us -> {us with QuoteStack = q::us.QuoteStack})

  let popQuote =
      updateUserState (fun us -> {us with QuoteStack = List.tail us.QuoteStack})

  let isNotInQuote =
      userStateSatisfies (fun us -> match us.QuoteStack with
                                    | c::_ when c = q -> false
                                    | _ -> true)

  isNotInQuote >>. between pq pq
                           (between pushQuote popQuote
                                    (elements |>> fun ps -> Quote(q, ps)))

// helper functions for defining tags

let tagOpenBegin tag =
    str ("<" + tag)
    >>? nextCharSatisfiesNot isLetter // make sure tag name is complete
    <?> "<" + tag + "> tag"

let tagOpen tag = tagOpenBegin tag >>. str ">"
let tagClose tag = str ("</" + tag + ">")

let tag t p f =
    between (tagOpen t) (tagClose t)
            (p |>> f)

let attributeValue =
    ws >>. str "=" >>. ws
    >>. between (str "\"") (str "\"")
                (manySatisfy (isNoneOf "\n\""))

let attribute s = str s >>. attributeValue

let nonNestedTag tag pAttributesAndClose pBody f
                 isInTag setInTag setNotInTag =
    tagOpenBegin tag
    >>. ((fun stream ->
            if not (isInTag stream.UserState) then
                stream.UserState <- setInTag stream.UserState
                Reply(())
            else // generate error at start of tag
                stream.Skip(-tag.Length - 1)
                Reply(FatalError,
                      messageError ("Nested <" + tag + "> tags are not allowed.")))
         >>. pipe2 pAttributesAndClose pBody f
             .>> (tagClose tag >>. updateUserState setNotInTag))

// the tags

let bold   = tag "b" elements Bold
let italic = tag "i" elements Italic

let url = nonNestedTag "a" (ws >>. attribute "href" .>> (ws >>. str ">"))
                       elements
                       (fun url phrases -> Url(url, phrases))
                       (fun us -> us.InLink)
                       (fun us -> {us with InLink = true})
                       (fun us -> {us with InLink = false})


let element = choice [text
                      escape
                      quote '\''
                      quote '\"'
                      bold
                      italic
                      url]

do elementsR:= many element

let document = elements .>> eof

> runParserOnString document UserState.Default ""
    "A \"'text' with 'nested \"<b>quotes</b>\"'.\"";;
val it : ParserResult<Element list,UserState> = Success:
[Text "A ";
 Quote
   ('"',
    [Quote ('\'',[Text "text"]); Text " with ";
     Quote ('\'',[Text "nested "; Quote ('"',[Bold [Text "quotes"]])]); Text "."])]

> runParserOnString document UserState.Default ""
    @"<b>Text <i></i>with</b> <a href=""url"">'link' but no \<blink\></a>";;
val it : ParserResult<Element list,UserState> = Success:
[Bold [Text "Text "; Italic []; Text "with"]; Text " ";
 Url("url",
     [Quote ('\'',[Text "link"]); Text " but no "; Text "<"; Text "blink";
      Text ">"])]

> runParserOnString document UserState.Default ""
    "<a href=\"url\"><a href=\"nested\">test</a></a>";;
val it : ParserResult<Element list,UserState> = Failure:
Error in Ln: 1 Col: 15
<a href="url"><a href="nested">test</a></a>
              ^
Nested <a> tags are not allowed.

5.9.3 Parameterizing a parser through the user state

The user state is also a good place to store parser configuration data that is specific to a “parser job”. For example, a compiler that processes multiple compilation units could put configuration data that is specific to the compilation unit, e.g. include paths, into the user state and then parse different compilation units with the same Parser instance, like in the following code:

type CompilationUnitAST = (* ... *)

type UserState = {
    IncludePaths = string list
    (* ... *)
}

let parser : Parser<CompilationUnitAST, UserState> = (* ... *)

let parseCompilationUnit file encoding includePaths (* ... *) =
    let initialUserState = {IncludePaths = includePaths; (* ... *)}
    runParserOnFile parser initialUserState file encoding
Footnotes:
[1] An alternative way to handle such restrictions at the parser level would be to define separate instances of the parser for each possible combination of restrictions, e.g. separate parsers for inline elements at the top level, for inline elements within hyperlinks, for elements within hyperlinks within superscript text and so on. However, with an increasing number of restrictions this approach quickly falls victim to the combinatorial explosion caused by the recursive nature of the involved parsers.
================================================ FILE: Doc/html/users-guide/performance-optimizations.html ================================================ Performance optimizations

5.12 Performance optimizations

In the past, the relatively poor performance of parser combinator libraries has often been cited as the primary impediment to their more widespread adoption. For this reason optimal performance stood front and center as a design goal during the development of FParsec and a lot of effort has been spent on optimizing parsing speed. As a result, FParsec has become so fast that parsers implemented with FParsec often significantly outperform parsers created by parser generator tools like fslex & fsyacc.

In general, a parser implemented in FParsec can get close to the performance of a hand‐optimized recursive‐descent parser written in C#. Due to the multi‐layered architecture of the FParsec API, you always have the option to fall back to the lower‐level API should a particular parser component implemented with the high‐level API turn out to be too slow. Hence, if you choose FParsec for implementing your parsers, you don’t have to worry that performance will become a reason for switching away from FParsec.

5.12.1 Performance guidelines

If you strive for optimal performance in your parser applications, try to adhere to the following guidelines:

Avoid backtracking

Try to avoid backtracking where possible. Sometimes it’s already enough to factor out a common prefix from a parser expression to avoid backtracking, e.g. by transforming (prefix >>? p1) <|> (prefix >>? p2) to prefix >>. (p1 <|> p2). Some simple backtracking can also be avoided by parsing whitespace as trailing whitespace instead of leading whitespace.

If you’re designing a programming or markup language, you should try to minimize the need for backtracking, both to simplify parsing and to avoid exponential worst‐case behaviour.

Prefer specialized parsers

FParsec provides a number of specialized parsers and combinators for various purposes. Using more specialized primitives instead of reimplementing them with generic combinators will often safe you time and improve parsing speed.

In particular:

  • Prefer the skip... variants of parsers and combinators if you don’t need the parser results.
  • Parse whitespace with the built‐in whitespace parsers.
  • Parse numbers with the built‐in number parsers.
  • Prefer to parse strings with the many[1]Satisfy[2][L] parsers.
  • Consider parsing unicode identifiers with the identifier parser.
Construct parsers once

Constructing a parser can be relatively expensive in comparison to a single invocation of the parser. Hence, if you repeatedly apply the same parser, you should make sure that you construct the parser only once, either by preconstructing it at the beginning or by lazily constructing the parser and then caching it.

Usually the place where parsers get inadvertently constructed more than once is inside closures.

For example, if you have a local function like

fun stream ->
    let reply = (parser1 >>. parser2) stream
    if reply.Status = Ok then // ...
    else // ...

you should avoid the repeated construction of parser1 >>. parser2 every time the closure is called by moving the construction outside of the closure, as in

let parser = parser1 >>. parser2
fun stream ->
    let reply = parser stream
    if reply.Status = Ok then //...
    else // ...

Also, you shouldn’t wrap a parser expression inside a function just to avoid F#’s value restriction if you can achieve the same goal with a type annotation. For example, you should not try to fix the compiler error in the first example of the tutorial chapter on F#’s value restriction by replacing

let p = pstring "test"

with

let p stream = pstring "test" stream
Avoid parse {...} expressions
Avoid regex parsers

The regex parser parses a string by applying a .NET regular expression to the input. Since .NET regular expressions are relatively slow, you should reserve the use of the regex parser for patterns that you can’t easily express with other FParsec parsers and combinators.

Consider optimizing large choice parsers

Formal grammars for programming languages or DSLs often have one or two grammar rules at their core that essentially just enumerate a long list of possible ways to form a statement or expression in that language. A straightforward FParsec implementation of such a grammar rule typically uses the choice combinator to combine a list of parsers for all the alternatives.

Usually such an implementation with a large choice‐based parser will do just fine. However, if parsing performance is critical for your application, replacing a large choice parser with a custom‐made combinator can be an optimization with a high benefit‐cost ratio. The next section explains this optimization in more detail.

5.12.2 Low‐level parser implementations

FParsec’s high‐level API consists of its built‐in parsers and combinators in the Primitives and CharParsers module. The high‐level API allows you to easily construct parsers in a concise and rather declarative way. Usually you will author most of your parsers using the high‐level API, because that’s the most productive way to do it.

However, sometimes you might find that a specific piece of parser functionality is a bit inconvenient to express through the high‐level API or that the high‐level implementation isn’t as fast as you had hoped for. In those situations it’s a great advantage that FParsec allows you to drop down to the low‐level API, so that you can implement your own special‐purpose parser and combinator primitives.

We have already covered the basics of the low‐level API in the chapters on the internals of a simple parser function and applying parsers in sequence. In this section we will discuss some examples that demonstrate how you can use low‐level parser implementations for optimization purposes.

One example of a parser implemented using the low‐level API is contained in the samples folder of the FParsec distribution in samples/FSharpParsingSample/FParsecVersion/parser.fs. It is a parser for an identifier string that is not identical with a keyword.

The low‐level implementation uses another parser, identifierString, to parse an identifier string and then backtracks when the parsed string is a keyword:

let identifier : Parser<string, unit> =
    let expectedIdentifier = expected "identifier"
    fun stream ->
        let state = stream.State
        let reply = identifierString stream
        if reply.Status <> Ok || not (isKeyword reply.Result) then reply
        else // result is keyword, so backtrack to before the string
            stream.BacktrackTo(state)
            Reply(Error, expectedIdentifier)

The same parser could also be implemented with the high‐level API:

let identifier =
    attempt (identifierString
             >>= fun str ->
                     if not (isKeyword str) then preturn str
                     else pzero) <?> "identifier"

The high‐level version is a bit more concise, but whether it is also easier to understand is debatable. The low‐level version seems at least a bit more self‐explanatory and hence is probably more accessible to new FParsec users. Since the low‐level implementation is also significantly faster than the high‐level one, this is a good example for a parser that can be improved through a low‐level implementation.

If you wanted to optimize the performance of the identifier parser even more, you could replace the identifierString parser invocation with direct calls to CharStream methods. However, whether the potential performance gain would be worth the loss in code modularity and maintainability is questionable. A more promising optimization often is to integrate the identifier parser into a higher‐level choice‐based parser, like it is done below in the last example of this section.

choice parsers with long list of argument parsers are performance‐wise one of the weakest spots of FParsec’s high‐level API. As we noted in the previous section, formal grammars for programming languages or DSLs often have one or two grammar rules at their core that essentially just enumerate a long list of possible ways to form a statement or expression in that language. A straightforward implementation of such a grammar rule using the choice combinator yields only sub‐optimal performance, since the choice parser has no knowledge about its argument parsers and has to try one parser after another.

This makes large choice‐based parsers an excellent optimization opportunity. With your knowledge about the parser grammar you can often narrow down the set of possible parsers just by peeking at the following one or two chars in the input. Having identified the set of possible parsers (often only consisting of one parser), you can then considerably speed up the dispatch to the right subparser.

For example, take a look at the JSON‐value parser from the tutorial:

choice [jobject
        jlist
        jstring
        jnumber
        jtrue
        jfalse
        jnull]

If you look at the definitions for the argument parsers, you’ll see that in almost all cases one can decide which parser should handle the input just based on the next char in the input. Hence, we could replace the choice‐based parser with the following low‐level implementation:

let error = expected "JSON value"
fun (stream: CharStream<_>) ->
    match stream.Peek() with
    | '{' -> jobject stream
    | '[' -> jlist stream
    | '"' -> jstring stream
    | 't' when stream.Skip("true")  -> Reply(JBool true)
    | 'f' when stream.Skip("false") -> Reply(JBool false)
    | 'n' when stream.Skip("null")  -> Reply(JNull)
    | _ ->
        let stateTag = stream.StateTag
        let mutable reply = jnumber stream
        if reply.Status = Error && stateTag = stream.StateTag then
           reply.Error <- error
        reply

A drawback of such a low‐level implementation is that you have to be a bit careful not to overlook any of the possible grammar cases. This is why we applied the jnumber parser in the “catch‐all” case, so that we don’t depend on the precise grammar rules for numbers.

You also need to consider how the low‐level implementation affects error messages. When a choice parser fails, it will generate an error message with the error messages from all the argument parsers it tried. This gives a human reader usually enough context to understand the error. For a low‐level implementation it can take a little more effort to ensure that the error messages for every case contain enough information about the grammar context. For example, in our implementation above we had to replace the default error message by jnumber with a custom one, so that the error message generated by the catch‐all case doesn’t create the impression that a JSON value can only be a number.

By now it is probably obvious that a low‐level parser implementation can actually be quite simple to write, but that it also comes at a certain cost in terms of code modularity and maintainability. Having the option of a low‐level implementation can certainly be what saves a project in certain situations and should give you some peace of mind with regard to parser performance, but generally you should only consider it as a backup option for those cases where you really need it.

The following example shows again how you can replace a choice‐based parser with a low‐level implementation, this time with a grammar that is a bit more representative of a typical programming language:

type Expr = Number float
          | LetBinding ...
          | IfThenElse ...
          | ...

type UserState = //...

type Parser<'result> = Parser<'result, UserState>

type Keyword = None = 0
             | If   = 1
             | Let  = 2
             // ...

let stringToKeyword = createStaticStringMapping
                          Keyword.None
                          ["if", Keyword.If
                           "let", Keyword.Let
                           // ...
                          ]

let str s = pstring s

let identifierString : Parser<string> = // ...

let identifierRest (id: string) : Parser<Expr> = ...

let number : Parser<Expr> = // ... (parser for floating-point number)

let ifThenElseRest   : Parser<Expr> = // ...
let letBindingRest   : Parser<Expr> = // ...
let exprInParensRest : Parser<Expr> = // ...

// The parser after this comment is a replacement for
//     let identifierStringButNoKeyword =
//         (* implementation like identifier parser in the first example above *)
//
//     let identifier   : Parser<Expr> = identifierStringButNoKeyword
//                                       >>= identifierRest
//
//     let ifThenElse   : Parser<Expr> = str "if"  >>. ifThenElseRest
//     let letBinding   : Parser<Expr> = str "let" >>. letBindingRest
//     let exprInParens : Parser<Expr> = str "("   >>. exprInParensRest
//
//     let expr = choice [identifierStringNoKeyword
//                        number
//                        ifThenElse
//                        exprInParens
//                        // ...
//                       ]
//
let expr : Parser<Expr> =
  fun stream ->
    let stateTag = stream.StateTag
    let reply = identifierString stream
    if reply.Status = Ok then
      match stringToKeyword reply.Result with
      | Keyword.None -> identifierRest reply.Result stream
      | Keyword.If   -> ifThenElseRest stream
      | Keyword.Let  -> letBindingRest stream
      // ...
    elif reply.Status = Error && stateTag = stream.StateTag then // no identifier
      match stream.Peek() with
      | '(' -> stream.Skip(); exprInParensRest stream
      | c when isDigit c -> number stream
      // ...
    else // error within identifier string
      Reply(reply.Status, reply.Error)

================================================ FILE: Doc/html/users-guide/running-parsers-on-input.html ================================================ Running parsers on input

5.2 Running parsers on input

While it is not difficult to construct a CharStream instance yourself, then apply a parser function to the CharStream, then interpret the returned Reply value and finally dispose the CharStream again, it takes less effort to instead use one of the several runParser... functions from the CharParsers module.

Among the runParser... functions run is the most convenient for simple testing purposes:

val run: Parser<'a, unit> -> string -> ParserResult<'a,unit>

run applies the parser given as the first argument to a CharStream constructed from the string argument and then captures the return value as ParserResult value. The ParserResult type is a simple discriminated union that is a bit more convenient to interpret than the Reply values returned by Parser functions.

For example:

> run pint32 "0xff";;
val it : ParserResult<int32,unit> = Success: 255

> run pint32 "0xgf";;
val it : ParserResult<int32,unit> = Failure:
Error in Ln: 1 Col: 3
0xgf
  ^
Expecting: hexadecimal digit

The text messages displayed in these examples after the = signs are the default string representations of the returned ParserResult values, just like they are printed in the F# Interactive console. The reference documentation describes the two union cases Success and Failure of the ParserResult type in more detail.

run only supports parser functions with no user state, i.e. with a unit user state. If you want to test parsers that depend on a user state, you will need to use one of the other runParser... functions, e.g. runParserOnString. Please see the reference for more details on the runParser... functions.

Note that the runParser... functions are primarily meant for the “end‐users” of parsers, i.e. those users that apply an aggregate parser on the content of a complete input stream. This is a situation different from the one where you implement a Parser function yourself. In the latter case you typically work directly with the input CharStream and output Reply values.

================================================ FILE: Doc/html/users-guide/tips-and-tricks.html ================================================ Tips and tricks

5.13 Tips and tricks

5.13.1 Parallel parsing

If your parser grammar is suitable for parallel parsing, parallelizing the parser has the potential to dramatically accelerate parsing on multi‐core machines. In the following we will shortly discuss requirements and strategies for parallelizing an FParsec parser.

For a parser grammar to be well suited for parallel parsing, the grammar and the typical input must satisfy the following two criteria:

  • Parts of the input must be independently parseable, i.e. parts must be parseable without knowlege about the other parts.
  • These parts must be large enough and easily enough identifiable within the total input.

Often, the easiest and most beneficial way to parallelize the parsing stage of an application is to parse multiple input files in parallel. In the simplest case you have multiple independent “compilation units” that can be parsed in parallel. This works even for C/C++, where a badly designed preprocesser generally makes efficient parsing quite hard. In many programming languages and markup languages you can also parse in parallel files that are “included”, “opened” or “imported” within source files. However, this usually only works if the language allows such includes only at well‐defined points in the grammar. In languages like C/C++, where the unstructured text content of other files can be included at essentially arbitrary positions in the source, parsing the included files in parallel is generally quite hard. (In C/C++ it’s even hard to avoid parsing the same file multiple times when it is included multiple times).

If you’re dealing with large input files or very slow parsers, it might also be worth trying to parse multiple sections within a single file in parallel. For this to be efficient there must be a fast way to find the start and end points of such sections. For example, if you are parsing a large serialized data structure, the format might allow you to easily skip over segments within the file, so that you can chop up the input into multiple independent parts that can be parsed in parallel. Another example could be a programming languages whose grammar makes it easy to skip over a complete class or function definition, e.g. by finding the closing brace or by interpreting the indentation. In this case it might be worth not to parse the definitions directly when they are encountered, but instead to skip over them, push their text content into a queue and then to process that queue in parallel.

Here are some tips for parallel parsing with FParsec:

  • All FParsec parsers are thread‐safe and can be safely applied concurrently to different CharStream instances, as long as you don’t introduce mutable shared state yourself.
  • CharStream instances are not thread‐safe and a single instance must not be accessed concurrently.
  • However, you can call the CreateSubstream method to create a substream for a CharStream. A CharStream and its substreams can be safely accessed concurrently.
  • If you want to parse multiple files in parallel, you should also create the CharStream instances in parallel, because the CharStream constructors that accept file paths or binary streams perform I/O operations that benefit from parallelization.
  • If you parallelize your parser, consider introducing an option for switching off parallel execution, since debugging a multi‐threaded parser is harder than debugging a single‐threaded one.

5.13.2 Dispatching parsers through a dictionary

A technique that is often useful for making a parser modular and easily extensible is to store Parser functions in dictionaries and then to delegate parsing to one of the Parser functions in the dictionary based on the input.

For example, a parser for a markup language could be implemented by defining a generic tag parser that delegates the parsing of the tagged content to a specific parser for the respective tag name. The following code shows how this could be done:

open FParsec
open System.Collections.Generic

// For simplicity we don't define a full-blown markup language here,
// just a parser for two simple non-recursive "tags" in square brackets.
// The chapter on "parsing with user state" contains a slightly more developed
// sample for a markup language, though without a dictionary-based tag parser.

type Tag = Bold of string
         | Url of string * string

// We store the tag parser dictionary in the user state, so that we can
// concurrently parse multiple input streams with the same parser instance
// but differerent tag dictionaries.

type TagParserMap = Dictionary<string,Parser<Tag,UserState>>

and UserState = {
        TagParsers: TagParserMap
     }

let defaultTagParsers = TagParserMap()

let isTagNameChar1 = fun c -> isLetter c || c = '_'
let isTagNameChar = fun c -> isTagNameChar1 c || isDigit c
let expectedTag = expected "tag starting with '['"

let tag : Parser<Tag, UserState> =
  fun stream ->
    if stream.Skip('[') then
        let name = stream.ReadCharsOrNewlinesWhile(isTagNameChar1, isTagNameChar,
                                                   false)
        if name.Length <> 0 then
            let mutable p = Unchecked.defaultof<_>
            if stream.UserState.TagParsers.TryGetValue(name, &p) then p stream
            else
                stream.Skip(-name.Length)
                Reply(Error, messageError ("unknown tag name '" + name + "'"))
        else Reply(Error, expected "tag name")
    else Reply(Error, expectedTag)

let str s = pstring s
let ws = spaces
let text = manySatisfy (function '['|']' -> false | _ -> true)

defaultTagParsers.Add("b", str "]" >>. text .>> str "[/b]" |>> Bold)

defaultTagParsers.Add("url",      (str "=" >>. manySatisfy ((<>)']') .>> str "]")
                             .>>. (text .>> str "[/url]")
                             |>> Url)

let parseTagString str =
    runParserOnString tag {TagParsers = TagParserMap(defaultTagParsers)} "" str

> parseTagString "[b]bold text[/b]";;
val it : ParserResult<Tag,UserState> = Success: Bold "bold text"

> parseTagString "[url=http://tryfsharp.org]try F#[/url]";;
val it : ParserResult<Tag,UserState> =
  Success: Url ("http://tryfsharp.org","try F#")

> parseTagString "[bold]test[/bold]";;
val it : ParserResult<Tag,UserState> = Failure:
Error in Ln: 1 Col: 2
[bold]test[/bold]
 ^
unknown tag name 'bold'

5.13.3 Memoizing parsers

If your parser implementation backtracks a lot when parsing typical inputs and as a result repeatedly applies some Parser functions at the same input position, it can be beneficial to memoize these Parser functions, i.e. cache their results for each input position.

In the extreme case, memoization can mean the difference between linear and exponential execution times. In practice, FParsec is typically used for formal grammars that hardly require any extensive backtracking, so that memoization would usually only have a negative affect on performance.

In situation where you really do need to memoize parsers, you can work with a generic memoize combinator like the one in the following example:

open FParsec
open System.Collections.Generic

// We need a place to store the cached parser results. Since we want parser
// instances to be able to concurrently access different caches for different
// input streams, we will use a user state variable for this purpose. Since we
// don't want the backtracking to undo changes to the cache,  we will use a
// mutable dictionary for this purpose.

type UserState = {
        MemoCache: Dictionary<MemoKey, obj>
        // ...
    }

// An entry in the MemoCache must be uniquely identified by its MemoKey. In this
// example the MemoKey includes the stream index value and a reference to the
// memoized parser instance. Should the result of a memoized Parser function in
// your implementation also depend on the UserState value, you will have to
// extend the MemoKey with a UserState member. Similarly, if you want to cache
// results for more than one stream in the MemoCache, you'll have to extend the
// MemoKey with an identifier for the stream.

and [<CustomEquality; NoComparison>]
    MemoKey = struct
        new (parser: obj, stream: CharStream) =
            {Parser = parser; Index = stream.Index}

        val Parser: obj
        val Index: int64

        interface System.IEquatable<MemoKey> with
            member t.Equals(other: MemoKey) =
                t.Index = other.Index && t.Parser = other.Parser

        override t.Equals(otherObj: obj) =
            match otherObj with
            | :? MemoKey as other ->
                t.Index = other.Index && t.Parser = other.Parser
            | _ -> false

        override t.GetHashCode() = int32 t.Index
end

/// Returns a memoized version of the argument parser
let memoize (p: Parser<'a,UserState>) : Parser<'a,UserState> =
    fun stream ->
        let key = MemoKey(p, stream)
        let memoCache = stream.UserState.MemoCache
        let mutable boxedReply = null
        if memoCache.TryGetValue(key, &boxedReply) then
            boxedReply :?> Reply<'a>
        else
            let reply = p stream
            memoCache.Add(key, box reply)
            reply

5.13.4 Parsing F# infix operators

F# supports user‐definable infix operators whose precedence and associativity depend on the first chars of the operator name. For example, the F# spec states that operators that start with * are left‐associative, while operators that start with ** are right associative and have a higher precedence, so that 1*2*.3**4**.5 is parsed as ((1*2)*.(3**(4 **.5))).

Since the precedence and associativity rules are fixed, you can parse F# expressions with a static operator precedence grammar, i.e. without having to reconfigure the parser when a new operator is defined in the parsed source code. However, it’s probably not immediately obvious how to do this with FParsec’s OperatorPrecedenceParser class (OPP), since the OPP normally expects all possible operators to be (individually) specified before they are used.

The trick to supporting whole classes of operator names without having to reconfigure the OPP at run‐time is to shift part of the operator parsing to the after‐string‐parser, like in the following example:

open FParsec

type Expr = InfixOpExpr of string * Expr * Expr
          | Number of int

let ws  = spaces  // whitespace parser

let isSymbolicOperatorChar = isAnyOf "!%&*+-./<=>@^|~?"
let remainingOpChars_ws = manySatisfy isSymbolicOperatorChar .>> ws

let opp = new OperatorPrecedenceParser<Expr, string, unit>()
opp.TermParser <- pint32 .>> ws |>> Number

// a helper function for adding infix operators to opp
let addSymbolicInfixOperators prefix precedence associativity =
    let op = InfixOperator(prefix, remainingOpChars_ws,
                           precedence, associativity, (),
                           fun remOpChars expr1 expr2 ->
                               InfixOpExpr(prefix + remOpChars, expr1, expr2))
    opp.AddOperator(op)

// the operator definitions:
addSymbolicInfixOperators "*"  10 Associativity.Left
addSymbolicInfixOperators "**" 20 Associativity.Right
// ...

> run opp.ExpressionParser "1*2*.3**4**.5";;
val it : ParserResult<Expr,unit> = Success
InfixOpExpr
  ("*.", InfixOpExpr ("*", Number 1, Number 2),
         InfixOpExpr ("**", Number 3, InfixOpExpr ("**.", Number 4, Number 5)))

If you use the after‐string‐parser in this manner for operators that can lead to operator conflicts in the input, e.g. non‐associative operators, then you also need to replace the default OperatorConflictErrorFormatter, since otherwise the default formatter may print truncated operator names:

addSymbolicInfixOperators "<"  1 Associativity.None
> run opp.ExpressionParser "1 <= 2 <=. 3";;
val it : ParserResult<Expr,unit> = Failure:
Error in Ln: 1 Col: 9
1 <= 2 <=. 3
        ^
The infix operator '<' (precedence: 1, non-associative) conflicts with the
infix operator '<' (precedence: 1, non-associative) on the same line at column 3.

An error formatter that prints the full operator names could look like the following:

opp.OperatorConflictErrorFormatter <-
  fun (pos1, op1, afterString1) (pos2, op2, afterString2) ->
    let msg = sprintf "The operator '%s' conflicts with the previous operator '%s' at %A."
                       (op2.String + afterString2)
                       (op1.String + afterString1) pos1
    messageError msg
> run opp.ExpressionParser "1 <= 2 <=. 3";;
val it : ParserResult<Expr,unit> = Failure:
Error in Ln: 1 Col: 9
1 <= 2 <=. 3
        ^
The operator '<=.' conflicts with the previous operator '<=' at (Ln: 1, Col: 3).
================================================ FILE: Doc/html/users-guide/where-is-the-monad.html ================================================ Where is the monad?

5.10 Where is the monad?

If you have previously used Haskell’s Parsec library or an early version of FParsec you’re probably wondering by now where the “monadic syntax” has gone. There’s also a chance that you’ve stumbled upon FParsec while searching for a “monadic parser library” for F#/.Net and you’re now wondering whether FParsec actually is one.

To answer these questions right away: FParsec supports a monadic parser construction syntax, but this syntax is only an optional feature, not the foundation of the library design. FParsec doesn’t use the monadic syntax internally and we no longer recommend using it for new parser projects when performance is a concern.

5.10.1 An example using the monadic syntax

With the monadic syntax you can, for example, write a parser for a pair of floating‐point numbers as follows:

open FParsec

let ws = spaces // whitespace parser

let str_ws str = parse {do! skipString str
                        do! ws
                        return ()}

let number_ws = parse {let! number = pfloat
                       do! ws
                       return number}

let pairOfNumbers = parse {do! str_ws "("
                           let! number1 = number_ws
                           let! number2 = number_ws
                           do! str_ws ")"
                           return (number1, number2)}

We’ll explain how the F# compiler handles the parse {...} expressions in the next section. For now, just compare the previous implementation with the following one using the usual FParsec combinators:

open FParsec

let ws = spaces // whitespace parser

let str_ws str = skipString str >>. ws

let number_ws = pfloat .>> ws

let pairOfNumbers = between (str_ws "(") (str_ws ")")
                            (tuple2 number_ws number_ws)

The latter implementation is obviously more concise, but – at least for users without prior exposure to FParsec – the first implementation is probably a bit more intuitive and self‐explanatory. What makes the first implementation so intuitive is that the syntax of the parse {...} expressions is a) very close to what developers are used to from their normal work with F# and b) expressive enough that it obviates the need for many of FParsec’s basic combinators. Unfortunately, the intuitiveness of the monadic syntax comes at the price of a large performance penalty.

5.10.2 How the monadic syntax works

To explain how the monadic syntax works, we need to take a look at how the F# compiler translates the parse {...} expressions.

The foundation for the monadic syntax is the >>= combinator introduced in section 5.4.5:

val (>>=): Parser<'a,'u> -> ('a -> Parser<'b,'u>) -> Parser<'b>

This operator takes a parser and a function returning a parser as arguments. The combined parser p >>= f first applies the parser p to the input, then it applies the function f to the result returned by p and finally it applies the parser returned by f to the input. As we exlained in section 5.4.5, this way to combine parsers is powerful enough that we can express many other sequencing combinators in terms of >>= and preturn.

For example, we could implement the pipe3 combinator for sequentially applying three parsers as follows:

let pipe3 p1 p2 p3 f =
    p1 >>= fun x1 ->
             p2 >>= fun x2 ->
                      p3 >>= fun x3 ->
                               preturn (f x1 x2 x3)

Directly using the >>= and preturn combinators obviously leads to somewhat unwieldy and unreadable expressions. Fortunately, F#’s computation expressions allow us to rewrite this expression in a more intuitive way:

let pipe3 p1 p2 p3 f =
    parse {let! x1 = p1
           let! x2 = p2
           let! x3 = p3
           return f x1 x2 x3}

The parse object that we reference in this and other code snippets of this chapter is a so‐called “builder” object for computation expressions. It is defined in FParsec’s Primitives module. Using the methods of this object, the F# compiler translates the computation expression in the curly braces to the following equivalent expression:

let pipe3 p1 p2 p3 f =
    parse.Delay(fun () ->
      parse.Bind(p1, fun x1 ->
        parse.Bind(p2, fun x2 ->
          parse.Bind(p3, fun x3 ->
            parse.Return(f (x1 x2 x3))))))

When we replace the parse object method calls with the respective method bodies, we will see that this definition is equivalent to our original definition using >>= and preturn.

The Bind, Return and Delay methods of the parse object are defined as:

    member t.Bind(p, f) = p >>= f
    member t.Return(x) = preturn x
    member t.Delay(f:(unit -> Parser<'a,'u>)) = fun stream -> (f ()) stream

Substituting these method bodies into the previous expression yields an expression that is very similar to the original one (except for the additional indirection introduced by the Delay method[1]):

let pipe3 p1 p2 p3 f =
    fun stream ->
      (p1 >>= fun x1 ->
                p2 >>= fun x2 ->
                         p3 >>= fun x3 ->
                                  preturn (f x1 x2 x3)) stream

In summary, the parse {...} syntax is syntactic sugar for defining parsers with the >>= operator. The expressiveness of this syntax stems from the power of the >>= operator.

5.10.3 The term “monad”

A function with a signature like the one of the >>= operator is often called “bind”. The above examples make it obvious why: the >>= combinator binds the result of the parser on the left‐hand side to the function argument on the right‐hand side.

The Parser type together with the >>= and preturn operations constitute a monad, which is an abstraction in type theory that denotes this kind of combination of a generic type with associated bind and return operations.

Discussing the theoretical background of monads would be outside the scope of this user’s guide. For our purposes it is enough to note that the monad abstraction is so useful for certain applications that F# comes with built‐in syntax support for monadic expressions. FParsec utilizes this language feature (computation expressions) to enable parse {...} expressions.

Be assured that you don’t need to know anything about monads in general in order to use FParsec’s parse {...} expressions. To fully understand this feature all you need to know to is how the F# compiler translates parse {...} expressions into normal code.

Besides let!, do! and return there are some more language constructs that are supported inside parse {...} expressions. Please refer to the reference documentation for more information.

5.10.4 Why the monadic syntax is slow

Compared to parsers implemented with only the usual FParsec operators and functions, parsers implemented with parse {...} expressions can be up to several times slower.

The relatively bad performance can be directly attributed to the way parse {...} expressions are compiled. As you have seen above, a parse {...} expression is simply translated into a series of nested closures that are chained through calls to the >>= operator. With the current compiler technology and the current implementation of FParsec this introduces some significant overhead.

Every time a Parser function constructed with the parse {...} syntax is called:

  • Two function closures get newly instantiated for each invocation of the >>= operator: the closure that is passed as the second argument to >>= and the closure that is returned by >>=.
  • Any parser created inside a parse {...} expression gets (re‐)created every time execution reaches that point in the expression.

In principle, you can avoid the overhead described in the second point by moving the construction of parser functions out of the parse {...} expression.

For example, you can avoid the repeated construction of the skipString parsers in

let numberInParens = parse {do! skipString "("
                            let! number = pfloat
                            do! skipString ")"
                            return number}

by rewriting the code as

let parenOpen = skipString "("
let parenClose = skipString ")"
let numberInParens = parse {do! parenOpen
                            let! number = pfloat
                            do! parenClose
                            return number}

However, if you wanted to factor out any parser construction from a parse {...} expression, you’d also have to factor out any use of parser combinators, which would take away a lot from the attractiveness of the syntax.

If performance is not that important for your application, you can just ignore that a parser like skipString "(" is repeatedly constructed, since its construction is relatively cheap. But if you do the same for parsers based on regex or anyOf, where the construction potentially involves some relatively expensive compilation or runtime code generation, you might be surprised just how slow your parsers can become.

Because of the described performance issues, we recommend not to use parse {...} expressions and instead work with FParsec’s rich set of operators and other combinators. Not only does the operator‐based notation (which is used everywhere else in FParsec’s documentation) lead to faster parsers, it also allows for more concise parser code with a higher signal‐to‐noise ratio.

Footnotes:
[1] The computation expression specification does not require a Delay method. So, we could avoid the overhead associated with the additional indirection by removing the Delay method from the ParserCombinator class. However, this would make the behaviour of parse expressions somewhat counter‐intuitive, as the behaviour would differ from the behaviour of F#’s seq and async expressions.
================================================ FILE: Doc/misc/removed-many-variants.fs ================================================ open FParsec // the following optimized variants of the many combinator were removed in the update to 0.9 of FParsec let manyRev p = Inline.Many((fun x -> [x]), (fun xs x -> x::xs), (fun xs -> xs), p, resultForEmptySequence = fun () -> []) let manyFold acc0 f p = let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) Inline.Many((fun x -> optF.Invoke(acc0, x)), (fun acc x -> optF.Invoke(acc, x)), (fun acc -> acc), p, resultForEmptySequence = fun () -> acc0) let manyReduce f altX p = let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) Inline.Many((fun x0 -> x0), (fun x0 x -> optF.Invoke(x0, x)), (fun x0 -> x0), p, resultForEmptySequence = fun () -> altX) let many1Rev p = Inline.Many((fun x -> [x]), (fun xs x -> x::xs), (fun xs -> xs), p) let many1Fold acc0 f p = let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) Inline.Many((fun x -> optF.Invoke(acc0, x)), (fun acc x -> optF.Invoke(acc, x)), (fun x -> x), p) let many1Reduce f p = let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) Inline.Many((fun x0 -> x0), (fun x0 x -> optF.Invoke(x0, x)), (fun x0 -> x0), p) let sepByRev p sep = Inline.SepBy((fun x -> [x]), (fun xs _ x -> x::xs), (fun xs -> xs), p, sep, resultForEmptySequence = fun () -> []) let sepByFold acc0 f p sep = let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) Inline.SepBy((fun x -> optF.Invoke(acc0, x)), (fun acc _ x -> optF.Invoke(acc, x)), (fun acc -> acc), p, sep, resultForEmptySequence = fun () -> acc0) let sepByReduce f altX p sep = let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) Inline.SepBy((fun x0 -> x0), (fun x0 _ x -> optF.Invoke(x0, x)), (fun x0 -> x0), p, sep, resultForEmptySequence = fun () -> altX) let sepBy1Rev p sep = Inline.SepBy((fun x -> [x]), (fun xs _ x -> x::xs), (fun xs -> xs), p, sep) let sepBy1Fold acc0 f p sep = let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) Inline.SepBy((fun x -> optF.Invoke(acc0, x)), (fun acc _ x -> optF.Invoke(acc, x)), (fun acc -> acc), p, sep) let sepBy1Reduce f p sep = let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) Inline.SepBy((fun x0 -> x0), (fun x0 _ x -> optF.Invoke(x0, x)), (fun x0 -> x0), p, sep) let sepEndByRev p sep = Inline.SepBy((fun x -> [x]), (fun xs _ x -> x::xs), (fun xs -> xs), p, sep, separatorMayEndSequence = true, resultForEmptySequence = fun () -> []) let sepEndByFold acc0 f p sep = let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) Inline.SepBy((fun x -> optF.Invoke(acc0, x)), (fun acc _ x -> optF.Invoke(acc, x)), (fun acc -> acc), p, sep, separatorMayEndSequence = true, resultForEmptySequence = fun () -> acc0) let sepEndByReduce f altX p sep = let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) Inline.SepBy((fun x0 -> x0), (fun x0 _ x -> optF.Invoke(x0, x)), (fun x0 -> x0), p, sep, separatorMayEndSequence = true, resultForEmptySequence = fun () -> altX) let sepEndBy1Rev p sep = Inline.SepBy((fun x -> [x]), (fun xs _ x -> x::xs), (fun xs -> xs), p, sep, separatorMayEndSequence = true) let sepEndBy1Fold acc0 f p sep = let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) Inline.SepBy((fun x -> optF.Invoke(acc0, x)), (fun acc _ x -> optF.Invoke(acc, x)), (fun acc -> acc), p, sep, separatorMayEndSequence = true) let sepEndBy1Reduce f p sep = let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) Inline.SepBy((fun x0 -> x0), (fun x0 _ x -> optF.Invoke(x0, x)), (fun x0 -> x0), p, sep, separatorMayEndSequence = true) let manyTillRev p endp = Inline.ManyTill((fun x -> [x]), (fun xs x -> x::xs), (fun xs _ -> xs), p, endp, resultForEmptySequence = fun _ -> []) let manyTillFold acc0 f p endp = let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) Inline.ManyTill((fun x -> optF.Invoke(acc0, x)), (fun acc x -> optF.Invoke(acc, x)), (fun acc _ -> acc), p, endp, resultForEmptySequence = fun _ -> acc0) let manyTillReduce f altX p endp = let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) Inline.ManyTill((fun x0 -> x0), (fun x0 x -> optF.Invoke(x0, x)), (fun x0 _ -> x0), p, endp, resultForEmptySequence = fun _ -> altX) ================================================ FILE: Doc/src/changelog.txt ================================================ [section Changelog] [no-subsection-numbers] [section#v2_0 Version 2.0, 2022-11-01] - Dropped support for .NET Framework 4.5 and PCL and switched to using .NET 6. - Changed NuGet build to always enable code signing. [url "https://github.com/stephan-tolksdorf/fparsec/pull/55" Contributed] by Radek Krahl -- thanks Radek! - Norman Krämer [url "https://github.com/stephan-tolksdorf/fparsec/pull/54" fixed] an error in the `CharStream.Skip` documentation -- thanks Norman! - Nathan Adams [url "https://github.com/stephan-tolksdorf/fparsec/pull/56" fixed] a typo in the User's Guide -- thanks Nathan! - Theodore Tsirpanis [url "https://github.com/stephan-tolksdorf/fparsec/pull/92" optimized and cleaned-up] the codebase, taking advantage of new framework and langauge features and removing PCL support -- thanks Theodore! [/section] [section#v1_1_1 Version 1.1.1, 2020-02-01] - Fixed NuGet build to target the AnyCPU platform instead of the default platform of the build machine. Vadim Slynko and tpisciotta reported this issue -- thanks Vadim and tpisciotta! [/section] [section#v1_1 Version 1.1.0, 2020-01-05] - **Behaviour change**: `pfloat` now parses out-of-range finite values as plus or minus infinity instead of returning an error. This unifies the `pfloat` behaviour on all platforms after [url "https://docs.microsoft.com/en-us/dotnet/core/compatibility/2.2-3.0#floating-point-parsing-operations-no-longer-fail-or-throw-an-overflowexception" the behaviour change of `System.Double.Parse` on .NET Core 3]. - Enrico Sada [url "https://github.com/stephan-tolksdorf/fparsec/pull/38" modernized] the F# and C# project files and the build script for the NuGet packages -- thanks Enrico! - Added SourceLink support, which was prepared and championed by Cameron Taggart -- thanks Cameron! - Maxime Didier [url "https://github.com/stephan-tolksdorf/fparsec/pull/20" fixed] a bug in the Low-Trust version of the `CharStream` constructors that accept a file path argument: The stream's `Name` property wasn't initialized. Thanks Maxime! - Fixed missing parser definitions in the @Parsing JSON@ section of the tutorial spotted by Josh Quintus -- thanks Josh! - Andre Wesseling [url "https://github.com/stephan-tolksdorf/fparsec/pull/18" fixed] a parser definition in the @Where is the monad@ section of the User's Guide -- thanks Andre! - Frederik K. [url "https://github.com/stephan-tolksdorf/fparsec/pull/28" fixed] an error in the `ErrorMessage` documentation -- thanks Frederik! - Jonathan Roeber [url "https://github.com/stephan-tolksdorf/fparsec/pull/40" fixed] an error in the `previousCharSatisfiesNot` -- thanks Jonathan! - Vegard Løkken [url "https://github.com/stephan-tolksdorf/fparsec/pull/42" fixed] an error in the `unicodeSpaces` documentation -- thanks Vegard! [/section] [section#v1_0_3 Version 1.0.3, 2017-08-20] - Modern solution and project files for .NET Core and VS 2017 were added to the source folders. Huge thanks to [url "https://github.com/neoeinstein" Marcus Griep] for spearheading the effort to make FParsec .NET Standard compatible and contributing the new project and solution files! - The old build script for the NuGet packages was replaced by a PowerShell script that uses the new project files. - The FParsec NuGet package now contains assemblies for .NET Standard 1.6. - The non-netstandard assemblies of FParsec now reference the FSharp.Core 4.0.0.1 NuGet package, which should maximize compatibility when binding redirects aren't available. - A [url "https://github.com/stephan-tolksdorf/fparsec/blob/master/.vscode/tasks.json" [= .vscode/tasks.json]] file with some task definitions for Visual Studio Code was added. - The source repository was moved to GitHub. - Added a `stringsSepBy1` parser ([url "https://github.com/stephan-tolksdorf/fparsec/pull/4" contributed by Robin Munn] -- thanks Robin!). - Added a link to the [url "http://dmitriyvlasov.ru/publication/fparsec-tutorial/" Russian translation of the tutorial] by Dmitry Vlasov -- thanks Dmitry! - Fixed documentation typos. One was spotted by Brandon Dimperio, another by ZelteHonor -- thanks Brandon and ZelteHonor! - Renamed `CLR45` to `AGGRESSIVE_INLINING` to better match its purpose. [/section] [section#v1_0_2 Version 1.0.2, 2015-09-27] - replaced all uses of `[url "https://msdn.microsoft.com/en-us/library/hz49h034.aspx" Char.GetUnicodeCategory]` with `[url "https://msdn.microsoft.com/en-us/library/h6sx68ke.aspx" CharCodeInfo.GetUnicodeCategory]`, since the former may or may not track the current Unicode standard and the latter is the only one supported by the PCL API subset - updated the case folding, whitespace and XID property data tables to Unicode 8.0.0 - added a PCL Profile 259 version to the FParsec NuGet package - removed the Silverlight, VS9 and VS10 solution files and the Mono Makefile - updated the Lex & Yacc version of the =FSharpParsingSample= to use the [url "http://fsprojects.github.io/FsLexYacc/" `FsLexYacc`] NuGet packages - fixed documentation typos (two were spotted by Francois Nardon and Patrick McDonald -- thanks Francois and Patrick!) [/section] [section#v1_0_1 Version 1.0.1, 2013-06-25] - The maintainership of the FParsec NuGet package(s) was handed over from Ryan Riley, Huw Simpson, Cameron Taggart and Khan Thompson to Stephan Tolksdorf. Thanks Ryan, Huw, Cameron and Khan for creating and maintaining the previous versions of the NuGet package! - FParsec now has two @NuGet packages@, built with a new fsx script - fixed a [url "https://bitbucket.org/fparsec/main/pull-request/3/bug-fix-use-the-leaveopen-parameter-passed/diff" bug in one of the `CharStream` constructors] (reported and patched by Andrew Smith -- thanks Andrew!) - added `USE_STATIC_MAPPING_FOR_IS_ANY_OF` and `UNALIGNED_READS` as default compilation options in the Visual Studio projects (the default options now match the ones used by the "Big Data edition" NuGet package) - some minor code tweaking / micro-optimizations - fixed some minor documentation issues [/section] [section#v1_0 Version 1.0.0, 2012-07-19] - disabled code generation in `isAnyOf`, `isNoneOf`, `anyOf`, `skipAnyOf`, `noneOf` and `skipNoneOf` by default (you can reenable it using the new `USE_STATIC_MAPPING_FOR_IS_ANY_OF` compilation option) - annotated some `CharStream` methods with the .NET 4.5 `AggressiveInlining` option (see the new `CLR45` compilation option) - updated case folding and XID property tables to Unicode 6.1.0 - fixed two documentation typos (spotted by Rasmus Meldgaard and Kurt Schelfthout -- thanks Rasmus and Kurt!) [/section] [section#v0_9_2 Version 0.9.2, 2012-03-09] - fixed compilation in Visual Studio 11 Beta - added missing `ReturnFrom` member to `parse` builder object (reported by Kurt Schelfthout and Tomas Petricek -- thanks Kurt and Tomas!) - added workaround for .NET `ConsoleStream` [url "https://bitbucket.org/fparsec/main/issue/23/reading-from-systemio__consolestream-hangs" issue] (reported by Alexander Kahl -- thanks Alexander!) - set `AllowPartiallyTrustedCallers` and `SecurityTransparent` assembly attributes in LOW_TRUST NET4 build (as suggested by hammett -- thanks hammett!) - changed encoding of [= FParsecCS/Strings.cs] to UTF-8 (with signature) to fix Visual Studio build on machines with Japanese locale (the encoding issue was reported on [url "http://d.hatena.ne.jp/ZOETROPE"] -- thank you!) - fixed some documentation issues (incorporating feedback from Alexander Gelkin, Antoine Latter and Stephen Swensen -- thanks Alexander, Antoine and Stephen!) - add link to the [url "http://blog.livedoor.jp/gab_km/archives/1437534.html" Japanese translation of the tutorial] by Gab_km (thanks Gab_km!) [/section] [section#v0_9_1 Version 0.9.1, 2011-05-22] - added [= /nooptimizationdata] compiler flag as a workaround for an F# comiler [url "https://bitbucket.org/fparsec/main/issue/16/fparsec-needs-nooptimizationdata-to-avoid" issue] (reported by Michael Giagnocavo -- thanks Michael!) - fixed an [url "https://bitbucket.org/fparsec/main/issue/17/json-parser-fails-to-build" issue] in the JSON sample (reported by Ryan Riley -- thanks Ryan!) - fixed the error message formatting when an error line contains unaccounted newlines or ends with a combining character sequence - added warning to [^building-fparsec-with-mono installation notes] that the `regex` parser doesn't work on Mono (reported by Laurent Le Brun -- thanks Laurent!) - fixed some documentation issues (one of which was reported by Michael Giagnocavo -- thanks Michael!) [/section] [section#v0_9 Version 0.9.0, 2011-04-26] - @Highlights@ - @Changes to high-level API@ - [@ Removed variants of `many`, `sepBy`, `sepEndBy` and `manyTill`] - [@ Details on changes to `manyChars`, `manyCharsTill` and their variants] - @Changes to low-level API@ - @Background on low-level API changes@ [dl [ #Highlights#] [ - a new @tutorial@ and @user's guide@ - 2x performance improvements due to a refactored low-level API - new `identifier` parser for parsing identifiers based on Unicode XID syntax - new `StaticMapping` module for compiling static key to value mappings into optimized functions (supports `char`, `int` and `string` as key types) ] [[# Changes to high-level API]] [ - the modules `FParsed.Primitives`, `FParsec.CharParsers` and `FParsec.Error` are now automatically opened when the `FParsec` namespace is opened - new combinators `.>>.`, `.>>.?`, `notEmpty`, `stringsSepBy` - new parsers `identifier`, `[^unicodeSpaces-parsers unicodeSpaces[1]]`, `notFollowedByEof` - `whitespace` and `unicodeWhitespace` has been removed - `unicodeNewline` no longer recognizes the form feed char `'\f'` (`'\u000C'`) as a newline character - some variants of `many`, `sepBy`, `sepEndBy`and `manyTill` [^removed-variants-of-many-sepby-sependby-and-manytill have been removed] - the `...FoldApply` inline variants of `many`, `sepBy`, `sepEndBy` and `manyTill` have been consolidated in the `[^reference.Primitives.members.Inline FParsec.Primitives.Inline]` helper class - sequence parsers now throw a `System.InvalidOperationException` instead of a `System.Exception` to prevent an infinite loop - `anyOf`, `noneOf`, `isAnyOf` and `isNoneOf` now use runtime code generation (except in the [^low-trust-version Low-Trust version]). *If you run into performance issues after upgrading to version 0.9*, make sure that you don't unnecessarily recreate `anyOf` or `noneOf` parsers, see [^construct-parsers-once here] and [^why-the-monadic-syntax-is-slow here]. - `pstring`, `notFollowedByString` and similar parsers now have optimized code paths for argument strings with only 1 char - the behaviour of `manyChars` and `manyCharsTill` and their variants [^details-on-changes-to-manychars-manycharstill-and-their-variants has slightly changed] - the skip variants of `manyChars` and `manyCharsTill` [^removed-skip-variants-of-manyChars have been removed] - Some renamings and function signature changes: [table#renamings [[Old] [New]] [[`[no-auto-link restOfLine]`] [`restOfLine true`]] [[`[no-auto-link skipRestOfLine]`] [`skipRestOfLine true`]] [[`skipToEndOfLine`] [`skipRestOfLine false`]] [[`skipToString[CI] str n`] [`skipCharsTillString[CI] str false n`]] [[`[no-auto-link charsTillString][CI] str n`] [`charsTillString[CI] str true n`]] [[`[no-auto-link skipCharsTillString][CI] str n`] [`skipCharsTillString[CI] str true n`]] [[`followedByChar chr`] [`` if chr = '\r' || chr = '\n' then followedByNewline else followedByString (string chr) ``]] [[`notFollowedByChar chr`] [`` if chr = '\r' || chr = '\n' then notFollowedByNewline else notFollowedByString (string chr) ``]] [[`currentCharSatisfies f`] [`nextCharSatisfies f`]] [[`[no-auto-link nextCharSatisfies] f`] [`next2CharsSatisfy (fun _ c1 -> f c1)`]] ] - `OperatorPrecedenceParser` has changed: - all types have been moved from the `[no-auto-link FParsec.OperatorPrecedenceParser]` module into the main `FParsec` namespace - the operator types `InfixOp`, `PrefixOp`, ... classes have been renamed to `InfixOperator`, `PrefixOperator`, ... - `Assoc` has been renamed to `Associativity` - the "whitespace-parser" argument of the operator types has been [^Operator generalized into an "after-string-parser" argument] - if you previously used the `InfixOp'`, `PrefixOp'`, ... constructors to supply a mapping that read the text position of the parsed operator from the passed `[no-auto-link State]` instance, read [^get-position-with-after-string-parser this] - the `AddOperators` method has been removed, call `AddOperator` instead - the `OperatorConflictErrorFormatter` replaced the `OperatorConflictHandler` ] [[# Removed variants of `many`, `sepBy`, `sepEndBy` and `manyTill`]] [ The `...Rev`, `...Fold` and `...Reduce` variants of `many`, `sepBy`, `sepEndBy`and `manyTill` have been removed. If you previously used these variants, you can easily define them in your own code using the `|>>` combinator, as documented in the reference documentation for the previous version. For example: `` let manyRev p = many p |>> List.rev let manyFold acc0 f p = many p |>> List.fold f acc0 let manyReduce f defVal p = (many1 p |>> List.reduce f) <|>% defVal. `` If you need *optimized* implementations, you can define them using the new `Inline` helper class. The file [url "https://github.com/stephan-tolksdorf/fparsec/blob/master/Doc/misc/removed-many-variants.fs" [= Doc/misc/removed-many-variants.fs]] contains optimized definitions for all removed variants. ] [[# Details on changes to `manyChars`, `manyCharsTill` and their variants]] [ The behaviour of all variants of `manyChars` and `manyCharsTill` has slightly changed. Now `manyChars cp` is equivalent to `many cp`, except that it returns a string instead of char list. Previously, `manyChars cp` behaved like `many (attempt cp)`, i.e. it automatically backtracked if the char parser had failed after consuming input. The same change has been made to the behaviour of all other variants of `manyChars` and `manyCharsTill`. The new behaviour is more consistent with the rest of the libary and allows a faster implementation with the new low-level API. There probably aren't many parsers that relied on the old behaviour. The behaviour change made the `skip` variants of `manyChar` and `manyCharsTill` obsolete, since e.g. `skipManyChars` would do exactly the same as `skipMany cp`. [#removed-skip-variants-of-manyChars Hence, the `skip` variants have been removed]. ] [[# Changes to low-level API]] [ - The old `[no-auto-link CharStream]` and `[no-auto-link State]` classes have been merged into a single `CharStream` class with a mutable interface. - `Parser` functions now take a `[^CharStream_1 CharStream<'u>\ ]` instance as the input argument. - The `Reply` type has been moved to the main `FParsec` namespace and no longer has a `[no-auto-link State]` member. - Parser state comparisons are now done with the help of the `CharStream`'s `StateTag`. - Various methods from the old `[no-auto-link CharStream.Iterator]` and `[no-auto-link State]` types have been renamed in the new `CharStream` class and have new signatures: - When you adapt old code, the following changes **require particular attention**: - the old `[no-auto-link Iterator.Read]` methods *did not* advance the input stream position, but the new `CharStream.Read` now *do* (as is the expected behaviour in traditional stream classes) - the old `[no-auto-link Read]()` is equivalent to the new `Peek()` - the old `[no-auto-link Read](int)` is equivalent to the new `PeekString(int)` - the old `[no-auto-link Peek]()` is equivalent to the new (and old) `[^Peek_int Peek](1)` - More renamings: - `Next`, `Advance` ? `[^Skip-members Skip]` - `_Increment`, `_Decrement` ? `[^SkipAndPeek-members SkipAndPeek]` - `[no-auto-link SkipRestOfLine]` with string output ? `ReadRestOfLine` - `SkipCharOrNewlines` with string output ? `ReadCharsOrNewlines` - `SkipToString` ? `[^SkipCharsOrNewlinesUntilString-members SkipCharsOrNewlinesUntilString]` - `SkipToStringCI` ? `[^SkipCharsOrNewlinesUntilCaseFoldedString-members SkipCharsOrNewlinesUntilCaseFoldedString]` - `ReadUntil` ? `ReadFrom` - `[no-auto-link CharStream.FoldCase]` ? `Text.FoldCase` - `[no-auto-link CharStream.NormalizeNewlines]` ? `Text.NormalizeNewlines` - New `CharStream` methods: - `[^Skip-members Skip]` methods with char and string arguments - `SkipUnicodeWhitespace` - `SkipNewlineThenWhitespace` - The `ErrorMessage` and `ErrorMessageList` types are now defined in the C# library part. This allows us to implement full parsers in C#. The `FParsec.Error` module contains type abbreviations and active patters that provide the familiar interface to F# clients. - All error messages used by built-in FParsec parsers are now defined in the C# classes `FParsec.Strings` and `FParsec.Errors`. This should simplify customization and internationalization efforts. ] [[# Background on low-level API changes]] [ Previously parsers were implemented as functions operating on an immutable parser state in the form of a `State` instance. A parser function received a `State` instance as the input and returned a `State` instance as part of its return value. Since `State` instances were immutable, a parser function had to create a new `State` instance to advance the input stream, e.g. by calling `state.Advance(2)`. This architecture was motivated by the desire to provide an API as "functional" as possible, an API that shields users from the underlying imperative/mutable nature of input streams. When FParsec originally started as a relatively close port of Haskell's Parsec library, this design felt like a natural fit for a functional parser library. However, later, when FParsec moved away from its Parsec roots (to improve performance and provide more features), it became increasingly clear that the immutable `CharStream`-`State`-design was the main obstacle preventing FParsec from reaching the performance of hand-optimized recursive-descent parsers. Initial tests with some quick prototypes revealed that the allocation and garbage collection of temporary `State` instances took up to 50% or more of the run time of typical parsers -- even though the `State` class was already heavily optimized. These tests also indicated that consolidating the stream and state classes into a classical imperative stream class simplified the overall library implementation and made the library source code more accessible to new users. The main drawback of the API change is that it requires modifications to practically all low-level parser code. Another drawback is that backtracking is slightly less convenient with the new low-level API (as the parser state has to be explicitly saved and restored, while previously one could just continue with an old state instance). Since FParsec's high-level API is only minimally affected by the change, the advantages seem to outweigh the costs. ] ] [/section] [no-auto-link] [section#v0_8_x Version 0.8.x, no release] [dl [New features/ improvements] [ - case-insensitive matching with `pstringCI`, `charsTillStringCI`, etc. (using the Unicode 1-to-1 case folding mappings for chars in the BMP) - various new parsers and combinators, including `restOfLine`, `skipToString`, `manySatisfyMinMax`, `manyStrings`, `withSkippedString` - new functions `runParserOnSubstring` and `runParserOnSubstream` - various performance improvements - Silverlight support - F# 1.9.6.16 compatibility ] [Design changes] [ - standardized on a single input stream type (`FParsec.CharStream`) and a single concrete parser state type (`FParsec.State`) - refactored the `Reply<_,_>`, `ErrorMessage` and `ParserError` types: * error replies now also contain a complete `State` * whether a parser has changed the state is now determined by checking the input and the output state for equality, instead of testing the `Consumed` flag * replaced the `Reply<_,_>.Flags` with a `Status` field * replaced the various helper functions for constructing a `Reply` with three overloaded `Reply<_,_>` constructors (with different arities) - all char parsers are now "newline aware", i.e. they normalize any of the three standard newline representations (`"\n"`, `"\r\n"`, `"\r"`) to "\n" and they properly increment the line count whenever they parse a newline; **this means that the behaviour of almost all char parsers has changed with regard to how newline chars are handled** ] [Bug fixes] [ - The `CharStream` class now uses the serialization API to persist the decoder state for backtracking purposes. Previously it relied on the decoder loosing its state at block boundaries after a certain sequence of method calls. The previous approach works in practice for the .NET decoders of the standard unicode encodings and for simple stateless encodings like ASCII and ANSI, but it relies on undocumented behaviour and it does not work reliably for encodings like GB18030, ISO-2022 or ISCII. - In previous FParsec versions the `CharStream` file path/System.IO.Stream constructors failed with an `IndexOutOfRange` exception when the file/stream was empty and encoding detection was not turned off (reported by Vesa Karvonen - thanks Vesa!). - In previous FParsec versions the `NumberLiteral.String` returned by the `numberLiteral` parser included parsed suffix chars despite the documentation claiming the opposite. (The testing code was buggy too.) Applications that rely on this behaviour can now use the new `NumberLiteralOptions.IncludeSuffixCharsInString` to force the `numberLiteral` parser to include any suffix chars in the returned string. - Fixed behaviour of `>>=?`, `>>?` and `.>>?` when second parser fails with fatal error without changing the parser state. - Fixed behaviour of `nextCharSatisfies[Not]` when current "char" is a `"\r\n"` newline. ] [Other breaking changes] [ - renamed the module `CharParser` to `CharParsers` - moved `CharParser.OperatorPrecedenceParser` into separate module - `FParsec.Primitives`: * **subtle change:** renamed `message` to `fail` and `fail` to `failFatally` * renamed `pair`, `triple` and `quad` to `tuple2`, `tuple3` and `tuple4` * renamed `manyFoldLeft` to `manyFold` and changed the argument order of the accumulator and function argument * removed `manyFoldRight` * renamed `count` to `parray` and changed the return type, renamed `skipCount` to `skipArray` * renamed `followedBy` and `notFollowedBy` to `followedByL` and `notFollowedByL` and introduced `followedBy` and `notFollowedBy` functions that take no second argument * moved `ParserResult<_>` to `CharParsers` and changed constructor arguments * removed applyParser * removed `|>>=`, now `>>=` automatically uses an optimized branch for uncurried functions * removed `endBy` and `endBy1` (`endBy p sep` can be replaced with `many (p .>> sep)` and `endBy1 p sep` with `many1 (p .>> sep)`) - `FParsec.CharParsers`: * renamed `manyTillString` to `charsTillString` * removed `applyParser` from the public interface * removed `getIndex`, `skip`, `registerNL`, `extract`, `regexp` (these low-level operations should be done directly through the `State<_>`/`CharStream.Iterator` interface) * removed `anyCharOrNL` (no longer needed, see design changes above) * removed `nSatisfy` (can be replaced with `manySatisfyMinMax`) * removed `unicodeDigit` and `unicodeNumber` (can be replaced with `satisfy System.Char.IsDigit` and `satisfy System.Char.IsNumber`) * moved the helper functions `expectedError`, `unexpectedError` etc. into the `Error` module - `FParsec.CharStream`: * string constructor takes more arguments * `Iterator.Peek(i)` now returns the `EndOfStreamChar` char instead of throwing an exception if the char peeked at lies before the beginning of the stream ] ] [/section] [section#v0_7_3_1 Version 0.7.3.1, 2009-02-26] - Fixed a bug in `CharParser.normalizeNewlines`/`CharStream.NormalizeNewlines`. This bug also affected the `skipped` and `manyTillString` parsers, which internaly call `normalizeNewlines` to normalize the returned string. The bug was reported by Greg Chapman - thanks Greg! When given a multi-line string in which the lines are delimited by `"\r\n"` but the last line does not end in a newline, the buggy `normalizeNewlines` replaced the chars on the last line with `'\n'` chars. - Changed the signature of `Helper.SkipOverWhitespace`. [/section] [section#v0_7_3 Version 0.7.3, 2008-12-08] Breaking changes (all of which should have little or no impact on existing code bases): - `CharStream.Iterator` instances now compare equal if and only if they belong to the same `CharStream` and point to the same index (previously they compared only equal if their internal representations were identical) - the constructor argument of `Error.otherError` is now expected to be comparable with F#'s structural comparison function `compare`, see http://research.microsoft.com/fsharp/manual/spec2.aspx#_Toc207785725 - the signature of the second `ParserError.ToString` overload has changed - `CharParser.errorToString` and `printErrorLine` have been deprecated New features: - reimplemented the error formatting code in `FParsec.Error` - added new `State<_>.AdvanceTo` and `CharStream.Iterator.Advance` overloads - slightly modified the error reporting in `Primitives.sepEndBy` - some documentation fixes [/section] [section#v0_7_2 Version 0.7.2, 2008-11-17] - added `CharParser.OperatorPrecedenceParser` - changed the overflow checking in `pint32` such that it will not be affected by an expected future change in F#'s `int32 -> uint64` conversion behaviour - added `CharParser.pint16`, `puint16`, `pint8`, `puint8` - changed the signatures in CharParser.fsi to use the `Parser<_,_>` type abbreviation - fixed outdated documentation of `CharParser.expectedError` - some minor optimizations [/section] [section#v0_7_1 Version 0.7.1, 2008-09-29] Breaking changes: - renamed `Primitives.Reply._tag` member to `Flags` and gave it a proper enumeration type - `CharParser.State` is now a reference type - Removed `CharParser.State.Flags` member - deprecated `Primitives.reconstructError` [/section] [section#v0_7_0_1 Version 0.7.0.1, 2008-09-23] Breaking change: - changed the case of the `FParsec.Error.Pos` members (This wasn't already done in 0.7 because of an oversight.) [/section] [section#v0_7 Version 0.7.0, 2008-09-13] Bugfixes: - made `FParsec.Error.Pos` IComparable to prevent `ParserError.ToString` from throwing an exception under rare circumstances - corrected the argument checking for some `CharStream.Iterator` methods for very large arguments New features: - compatibility with the F# CTP release - a configurable parser for number literals: `CharParser.numberLiteral` - `CharParser.pfloat` now also parses `NaN`, `Infinity` and hexadecimal floating point literals as supported by IEEE754r, C99 and Java (but different from the hex representation supported by F#) - new helper functions `CharParser.floatToHexString`, `floatOfHexString`, `float32ToHexString` and `float32OfHexString` - integer parsers: `Charparser.pint32`, `puint64`, `puint32`, `puint64` - new sample: a JSON parser - various optimizations and some code cleanup - new `CharStream.Iterator` members `ReadUntil`, `Increment` and `Decrement` - new `State` member `AdvanceTo` - new function `Primitives.createParserForwardedToRef` - new combinator `|>>=` in `Primitives` Breaking changes: - renamed the parsers `char` and `string` to `pchar` and `pstring` (This is in deference to the built-in F# functions `char` and `string`, which weren't yet around when the first version of FParsec was released.) - changed the case of the properties of the `Reply` and `State` types (This reflects the emerging consensus in the F# community that all public members of types should be named in PascalCase.) - deprecated `State.AdvanceNL` (use the 3 parameter Advance overload instead) - deprecated the `Primitives` helper functions `isOk`, `isEmpty`, ... (the `Reply` properties `IsOk`, `IsEmpty`,... should be used instead) - deprecated the `CharParser` helper functions `matchChar`, `readChar`, ... (the `State.Iter` methods `Match`, `Read`, ... should be used instead) - deprecated `Primitives.option`, `<|>$` should be used instead - made `CharParser.CharList` internal (If you need this helper class for your code, just copy the implementation to your source.) - `State.Flags()` now has more bits (and less bits are reset on a position change) [/section] [section#v0_6 Version 0.6.0, 2008-05-20] - fixed a bug in `manyTillString` (the code keeping track of newlines was buggy) - fixed a bug in `CharParser.` (the error reporting was inconsistent with `Primitives.` in the rare case where `` is applied inside an `attempt (...) label` clause to a parser that returns an `EmptyOk` reply) - various changes for F# 1.9.4.15 - added `skipped` parser to `CharParser` - added `nextCharSatifiesNot`, `prevCharSatifiesNot`, `currCharSatisfies`, `currCharSatisfiesNot` to `CharParser` module; the behaviours of the existing `nextCharSatisfies` and `prevCharSatisfies` were slightly changed (see =fparsec.html= for more details) - added `TryWith` and `TryFinally` members to `Primitivs.ParserCombinator` - added `triple` and `quad` parsers to `Primitives` module - set `CompilationRepresentationFlags.PermitNull` for `Error.ParserError` - various optimizations - some documentation fixes, including corrections for the docs of the `CharParser` error generation helper functions (`expectedError` etc.) [/section] [section#v0_5_1 Version 0.5.1, 2008-01-20] - added `pipe2`, `pipe3` and `pipe4` primitives - replaced `count` and `skipCount` primitives with optimized versions - minor optimizations in `spaces` and `spaces1` - added `pfloat` char parser - minor documentation fixes [/section] [section#v0_5 Version 0.5.0, 2008-01-15] - Major design change: all lazy computations were removed and the types `Output` and `Reply` unified. The new implementation is considerably simpler and also compiles with F# 1.9.3.7. - Fixed a bug in =build.bat= (reported by Santosh Zachariah - thanks Santosh!) [/section] [section#v0_4_4 Version 0.4.4, 2008-01-13] - fixed a minor issue in CharParser.attempt - added `.>>!` and `>>.!` primitives - added `skipManySatisfy` and `skipMany1Satisfy` char parsers [/section] [section#v0_4_3 Version 0.4.3, 2008-01-12] - fixed bugs in the CharParser versions of `` and `attempt`. - added `>>?` primitive - added `skipSatisfy` and `skipSatisfyL` char parsers - minor documentation fixes [/section] [section#v0_4_2 Version 0.4.2, 2008-01-04] - performance improvements in `CharStream.Iterator` - minor documentation fixes [/section] [section#v0_4_1 Version 0.4.1, 2008-01-02] - documentation fixes - new sample application: a parser for Parsing Expression Grammars - `newline` and `unicodeNewline` now return `'\n'`, instead of 1 or 2 - added `whitespace` parser and changed `unicodeWhitespace` - added `spaces` parser (equivalent to `skipManyChars whitespace`) - removed `newlineRepl` parameter from `manyTillString` - added `skipManyTill` and `skipManyCharsTill` - generalized types of skipManyChars and skipManyChars1 [/section] [section#v0_4 Version 0.4.0, 2007-12-30] Initial public release [/section] [/no-auto-link] [/section] ================================================ FILE: Doc/src/contact.txt ================================================ [section Contact] [section Contact] FParsec currently doesn't have its own discussion forum or mailing list.[br] (Please let me know if you'd like that to be changed.) Currently the best place to get a quick answer to any FParsec-related question is: [url "https://stackoverflow.com/search?q=fparsec" StackOverflow.com]. You can also email me (Stephan) directly at: [url "mailto:fparsec [at] quanttec.com" fparsec \[at\] quanttec.com]. Please don't hesitate to contact me with any feedback or question regarding FParsec. I'm always happy to hear from FParsec users. [/section] [section Impressum] *Author:* Stephan Tolksdorf *Address:*[br] Geschwister-Scholl-Allee 253[br] 25524 Itzehoe[br] Germany[br] [url "mailto:fparsec [at] quanttec.com" fparsec \[at\] quanttec.com] [/section] [/section] ================================================ FILE: Doc/src/documentation.txt ================================================ [section FParsec Documentation] [html-template "template.html"] [(* [link-to-dir-for-index] *)] [default-code-language "f#"] [split-section] FParsec is a [url "https://en.wikipedia.org/wiki/Parser_combinator" parser combinator] library for [url "http://fsharp.org" F#]. With FParsec you can implement [url "https://en.wikipedia.org/wiki/Recursive_descent_parser" recursive-descent] text parsers for [url "https://en.wikipedia.org/wiki/Formal_grammar" formal grammars]. FParsec's features include: - support for context-sensitive, infinite look-ahead grammars, - automatically generated, highly readable error messages, - Unicode support, - efficient support for very large files, - an embeddable, runtime-configurable [url "https://en.wikipedia.org/wiki/Operator-precedence_parser" operator-precedence parser] component, - a simple, efficient and easily extensible API, - an implementation thoroughly optimized for performance, - comprehensive documentation, - a permissive open source @license@. FParsec is an F# adaptation of [url "https://www.haskell.org/haskellwiki/Parsec" Parsec], the popular parser combinator library for Haskell by [url "https://www.microsoft.com/en-us/research/people/daan" Daan Leijen]. While the implementations of Parsec and FParsec are completely different, they share a similar top-level API. [/ **Latest release:**] FParsec 2.0.0, 2022-11-01, [url "https://github.com/stephan-tolksdorf/fparsec/archive/master.zip" Download], [^download-and-installation.nuget-packages NuGet packages], [^about.changelog Changes] [auto-link{ do-not-pick-up-as-link-targets = [ "ToString"], only-link-if-used-as-member = ["UserState", "Tag"], only-link-if-not-used-as-member = ["Error"], only-link-if-reference = ["minRegexSpace", "normalizeNewlines", "skipString", "skipNewline", "String"], links = ["Error" : reference.Primitives.members.Error, "FParsec.Primitives" : reference.Primitives, "FParsec.CharParsers" : reference.CharParsers, "FParsec.Error" : reference.Error , "CharParsers.normalizeNewlines" : reference.CharParsers.members.normalizeNewlines, "Text.FoldCase" : reference.Text.members.FoldCase, "FParsec.Text.FoldCase" : reference.Text.members.FoldCase, "Text.NormalizeNewlines" : reference.Text.members.NormalizeNewlines, "FParsec.Text.NormalizeNewlines" : reference.Text.members.NormalizeNewlines, "RError" : reference.Reply.members.Error, "case-insensitively" : reference.CharStream.CharStream.remarks.case-insensitive-matching, "CharStream" : reference.CharStream.CharStream_1, "CharStream<_>" : reference.CharStream.CharStream_1, "ErrorMessageList.Merge" : reference.ErrorMessageList.members.Merge, "api-reference": reference ], urls = ["FileStream" : "https://msdn.microsoft.com/en-us/library/system.io.filestream.aspx", "System.IO.Stream" : "https://msdn.microsoft.com/en-us/library/system.io.stream.aspx", "Decoder" : "https://msdn.microsoft.com/en-us/library/system.text.decoder.aspx", "System.Text.Encoding" : "https://msdn.microsoft.com/en-us/library/system.text.encoding.aspx", "Encoding" : "https://msdn.microsoft.com/en-us/library/system.text.encoding.aspx", "System.IEquatable" : "https://msdn.microsoft.com/en-us/library/ms131187.aspx", "System.IComparable" : "https://msdn.microsoft.com/en-us/library/system.icomparable.aspx", "System.Char.IsWhiteSpace" : "https://msdn.microsoft.com/en-us/library/t809ektx.aspx", "System.Collections.IStructuralEquatable" : "https://msdn.microsoft.com/en-us/library/system.collections.istructuralequatable(VS.100).aspx", "HashSet" : "https://msdn.microsoft.com/en-us/library/bb359438.aspx", "System.Globalization.StringInfo" : "https://msdn.microsoft.com/en-us/library/system.globalization.stringinfo.aspx", "LengthInTextElements" : "https://msdn.microsoft.com/en-us/library/system.globalization.stringinfo.lengthintextelements.aspx", "System.Text.RegularExpressions" : "https://msdn.microsoft.com/en-us/library/system.text.regularexpressions.aspx", "System.Text.RegularExpressions.Regex" : "https://msdn.microsoft.com/en-us/library/system.text.regularexpressions.regex.aspx", "System.Text.NormalizationForm" : "https://msdn.microsoft.com/en-us/library/system.text.normalizationform.aspx", "Regex" : "https://msdn.microsoft.com/en-us/library/system.text.regularexpressions.regex.aspx", "System.Text.RegularExpressions.Match" : "https://msdn.microsoft.com/en-us/library/system.text.regularexpressions.match.aspx", "StringWriter" : "https://msdn.microsoft.com/en-us/library/system.io.stringwriter.aspx", "System.IO.StringWriter" : "https://msdn.microsoft.com/en-us/library/system.io.stringwriter.aspx", "TextWriter" : "https://msdn.microsoft.com/en-us/library/system.io.textwriter.aspx", "System.IO.TextWriter" : "https://msdn.microsoft.com/en-us/library/system.io.textwriter.aspx", "ArgumentException" : "https://msdn.microsoft.com/en-us/library/system.argumentexception.aspx", "ArgumentOutOfRangeException" : "https://msdn.microsoft.com/en-us/library/system.argumentoutofrangeexception.aspx", "NotSupportedException" : "https://msdn.microsoft.com/en-us/library/system.notsupportedexception.aspx", "NullReferenceException" : "https://msdn.microsoft.com/en-us/library/system.nullreferenceexception.aspx", "IOException" : "https://msdn.microsoft.com/en-us/library/system.io.ioexception.aspx", "DecoderFallbackException" : "https://msdn.microsoft.com/en-us/library/system.text.decoderfallbackexception.aspx", "computation-expressions": "https://msdn.microsoft.com/en-us/library/dd233182.aspx", "computation-expression": "https://msdn.microsoft.com/en-us/library/dd233182.aspx", "monad": "https://en.wikipedia.org/wiki/Monad_%28functional_programming%29", "f-interactive": "https://msdn.microsoft.com/en-us/library/dd233175.aspx", "MethodImplOptions": "https://msdn.microsoft.com/en-us/library/system.runtime.compilerservices.methodimploptions(v=vs.110).aspx" ] }] [section#about About FParsec] [split-section] [output-in-subdirectory] [include "fparsec-vs-alternatives.txt"] [include "status-and-roadmap.txt"] [include "changelog.txt"] [include "contact.txt"] [/section] [include "license.txt"] [include "download-and-installation.txt"] [include "tutorial.txt"] [include "users-guide.txt"] [include "reference.txt"] [/auto-link] [/section] ================================================ FILE: Doc/src/download-and-installation.txt ================================================ [section Download and installation] FParsec is distributed in source code form and as NuGet packages. If you're new to FParsec, I'd recommend to start by downloading the [url "https://github.com/stephan-tolksdorf/fparsec/archive/master.zip" source code package] and experimenting a bit with the included sample projects. With the project and solution files building the library and the samples is as easy as clicking a button. The source package also includes a complete copy of the HTML documentation for offline viewing. [toc] [section NuGet packages] There are two NuGet packages of FParsec, which are built with different configuration options. The [url "https://nuget.org/packages/FParsec" basic package] uses the @Low-Trust version@ of FParsec, which uses no [url "https://msdn.microsoft.com/en-us/library/t2yzs44b.aspx" unverifiable code] and is optimized for maximum portability. The main limitation of this version is that any input stream is completely read into a string before parsing, which limits the maximum practical input size. This package also contains assemblies for .NET Standard 2.0. The [url "https://nuget.org/packages/FParsec-Big-Data-Edition/" "Big Data edition" package] uses the non-Low-Trust version of FParsec that is optimized for maximum performance and supports extremely large input streams. Since this configuration is also the default configuration of the solution files included with the source code, it is sometimes referred to as the "normal" version of FParsec. This version of FParsec does use "unsafe" (i.e. unverifiable) code involving unmanaged pointers. It also uses code generation in the implementation of `isAnyOf`, `isNoneOf`, `anyOf`, `skipAnyOf`, `noneOf` and `skipNoneOf`. [* Unfortunately, this version is currently not compatible with .NET Standard/.NET Core.] Should you measure a significant performance /degradation/ when switching to the Big Data edition, you're probably inadvertently recreating the same `isAnyOf`- or `isNoneOf`-based parsers again and again, as explained [^construct-parsers-once here] and [^why-the-monadic-syntax-is-slow here]. The .NET Framework assemblies in the NuGet packages are strongly signed. Their assembly version numbers will only be incremented for breaking changes. The .NET Standard assembly in the `FParsec` package is not signed. The NuGet packages include PDBs and SourceLink support, which should allow you to step through FParsec code in the debugger of your IDE. [/section] [section Getting the source] FParsec's source code repository is hosted on GitHub at: [url "https://github.com/stephan-tolksdorf/fparsec" github.com/stephan-tolksdorf/fparsec] You can clone the source code using Git or you can [url "https://github.com/stephan-tolksdorf/fparsec/archive/master.zip" download it as a zip-file]. It's an FParsec project policy to check only stable and tested code into the master branch of the GitHub repository, so you can normally just work with the master version of FParsec. [tip [url "https://git-fork.com/" Fork] is a great free GUI for Git for Windows and MacOS. ] [/section] [section FParsec is built as two DLLs] FParsec's source code is written in both C# and F#. Since neither the C# nor the F# compiler directly support the other language, the respective components need to be built separately. Hence, FParsec is built as two DLLs. The C# bits are compiled into the =FParsecCS.dll= and the F# bits (which depend on the C# bits) are compiled into =FParsec.dll=. *Projects that use FParsec thus have to reference both DLLs.* If you reference the DLLs in the [@ F# Interactive] console, you need to reference `FParsecCS.dll` before you reference `FParsec.dll`. [note If you don't want to distribute the FParsec DLLs together with the assembly of your project, you can use the [url "https://msdn.microsoft.com/en-us/library/dd233171.aspx" =staticlink= command-line option] of the F# compiler to merge the FParsec DLLs into your assembly. Unfortunately, the same option cannot be used to merge =FParsecCS.dll= into the =FParsec.dll=, as the public definitions in =FParsecCS.dll= wouldn't be reexported by =FParsec.dll=. For similar reasons it also doesn't seem to be possible to use tools like [url "http://research.microsoft.com/en-us/people/mbarnett/ILMerge.aspx" ILMerge] or [url "http://code.google.com/p/il-repack/" il-repack] to obtain a merged =FParsec.dll= that can be properly consumed by F# programs. ] [/section] [section Building FParsec from source] The solution file [= FParsec.sln] in the root source folder and the associated project files in the subfolders can be used to build FParsec from the command line or with IDEs such as Visual Studio 2019 or JetBrains Rider. To build the Low-Trust version of FParsec, you have to specifiy either `Debug-LowTrust` or `Release-LowTrust` as the configuration. The `Debug` and `Release` configurations build the non-Low-Trust version of FParsec, which currently is not compatible with the .NET Core runtime. [note In contrast to JetBrains Rider, Visual Studio 2019 currently does not support setting the supported target frameworks depending on the configuration. Due to this issue one currently has to use the separate `FParsec-LowTrust.sln` solution for building the Low-Trust version of FParsec in VS 2019.] The =Test= project in the solution files contains the unit tests for FParsec. The file [url "https://github.com/stephan-tolksdorf/fparsec/blob/master/.vscode/tasks.json" [= .vscode/tasks.json]] contains some convenient task definitions for Visual Studio Code. The NuGet packages are built with the [url "https://github.com/stephan-tolksdorf/fparsec/blob/master/pack.ps1" [= pack.ps1]] PowerShell script. [/section] [section#low-trust-version The Low-Trust version of FParsec] For optimization reasons the normal implementation (the "Big Data edition") of FParsec involves [url "https://msdn.microsoft.com/en-us/library/t2yzs44b.aspx" unverifiable code] using unmanaged pointers and runtime code generation. If you compile FParsec with the `LOW_TRUST` conditional compiler symbol, the unverifiable code is replaced with a "safe" alternative. This allows FParsec to be run in environments with "reduced trust", such as medium trust ASP.NET applications, and it also allows FParsec to be compiled against reduced subsets of the .NET API. In the `Debug-LowTrust` and `Release-LowTrust` configurations of the [= FParsec.sln] solution file in the root source folder, `LOW_TRUST` is automatically defined as `true`. The Low-Trust version of FParsec has the following two major limitations: - A `CharStream` that is constructed from a `System.IO.Stream` or a file path reads the complete file into a single string during construction. *This severely limits the maximum practical input stream size.* - The `StaticMapping` module is not supported. [/section] [section Configuration options] You can configure FParsec's source code with a number of conditional compilation symbols (a.k.a. preprocessor defines). Besides the [^low-trust-version Low-Trust option], these symbols mostly serve tuning purposes. [dl Options for =FParsecCS.dll= [`LOW_TRUST`] [See [^low-trust-version above].] [`#AGGRESSIVE_INLINING#`] [ Requires a version of NET ≥ 4.5. Annotates some functions with the `MethodImplOptions.AggressiveInlining` attribute. ] [`PCL`] [Compile for a PCL subset of the .NET API. Removed in version 2.0.0.] [`#SMALL_STATETAG#`] [ Use a 32-bit `StateTag` in the `CharStream` class instead of the default 64-bit one. This is an optimization for 32-bit runtimes. You can find more information about the state tag in [^ users-guide.applying-parsers-in-sequence.the-statetag] of the user's guide. ] [`#UNALIGNED_READS#`] [ [small [/ This option does not affect the @Low-Trust version@ of FParsec.]][br] Optimize for CPUs that support fast unaligned memory reads, i.e. any modern x86-based CPU. This option only makes a noticeable difference is some specific situations. ] ] [dl Options for =FParsec.dll= [`LOW_TRUST`] [See [^low-trust-version above].] [`[no-auto-link UNALIGNED_READS]`] [ See [^UNALIGNED_READS above]. ] [`NOINLINE`] [ Do not force inlining of certain parser combinators. This option enables you to step through the respective combinators during debugging. ] [`#USE_STATIC_MAPPING_FOR_IS_ANY_OF#`] [ [small [/ This option does not affect the @Low-Trust version@ of FParsec.]][br] Use `StaticMapping.createStaticCharIndicatorFunction` for the implementation of `isAnyOf`, `isNoneOf`, `anyOf`, `skipAnyOf`, `noneOf` and `skipNoneOf` for generating optimized char predicate functions using runtime code generation. Runtime code generation is a relatively expensive operation, so this optimization is primarily meant for parsers that are applied to large (or lots of) input streams. Please see the remarks for the `StaticMapping` module for more information. If you run into noticeable performance problems or memory leaks when enabling this option, you're probably inadvertently recreating the same `isAnyOf`- or `isNoneOf`-based parser again and again, as explained [^construct-parsers-once here] and [^why-the-monadic-syntax-is-slow here]. ] [`DEBUG_STATIC_MAPPING` ] [ [small [/ This option does not affect the @Low-Trust version@ of FParsec.]][br] See [^DEBUG_STATIC_MAPPING `StaticMapping` documentation]. ] ] [/section] [/section] ================================================ FILE: Doc/src/fparsec-vs-alternatives.txt ================================================ [section FParsec vs alternatives] The following tables contain a bullet-point comparison between FParsec and the two main alternatives for parsing with F#: parser generator tools (e.g. fslex & fsyacc) and "hand-written" recursive descent parsers. [table Relative advantages [[Parser-generator tools] [FParsec] [Hand-written[br] recursive-descent parser]] [ [ - Declarative and easy-to-read syntax - Ensures adherence to grammar formalism - Can check for certain kinds of ambiguity in grammar - You don't have to think about performance. Either the generated parser is fast enough, or not. There's not much you can do about it. ] [ - Implemented as F# library, so no extra tools or build steps - Parsers are first-class values within the language - Succinct and expressive syntax - Modular and easily extensible - Extensive set of predefined parsers and combinators - Semi-automatically generated, highly readable error messages - Supports arbitrary lookahead and backtracking - Runtime-configurable operator-precedence parser component - Does not require a pre-parsing tokenization phase - Comprehensive documentation - Extensively unit-tested ] [ - No extra tools or build steps - Most amenable to individual requirements - Potentially as fast as technically possible - Parsers are relatively portable if you stick to simple language features and keep library dependencies to a minimum ] ] ] [table Relative disadvantages [[Parser-generator tools] [FParsec] [Hand-written[br] recursive-descent parser]] [ [ - Restricted to features of grammar formalism - Extra tools and compilation steps - Reliance on opaque generator tool, that is often hard to debug, optimize or extend - Static grammar that can't be changed at runtime - Often hard to generate good error messages - Many tools generate comparatively slow parsers - Some tools have only limited Unicode support - Portability problems ] [ - Tradeoff between declarativeness and performance - Syntax less readable than PEG or Regular Expression syntax - [url "https://en.wikipedia.org/wiki/Left_recursion" Left-recursive] grammar rules have to be rewritten - Does not support a pre-parsing tokenization phase - You have to learn the API - Limited to F# - Code-dependence on FParsec - Aggressive performance optimizations add complexity to parts of the lower-level FParsec source code ] [ - You have to write everything yourself, which can take a lot of effort - Implementing (fast) parsers requires some experience - Expression (sub)grammars with infix operators can be ugly and inefficient to parse with a pure recursive-descent parser, so you might also have to write some kind of embedded operator precedence parser ] ] ] [/section] ================================================ FILE: Doc/src/license.txt ================================================ [section License] Except where noted otherwise, the FParsec library in source and binary form is distributed under the @Simplified BSD License@. The Simplified BSD License (a.k.a. "2-clause BSD License") is a simple, permissive license that is [url "http://www.opensource.org/licenses/bsd-license.php" OSI-compliant]. FParsec incorporates data derived from the [url "http://unicode.org/ucd/" Unicode Character Database] v. 8.0.0, Copyright (c) 1991-2015 Unicode, Inc., which is distributed under the following terms:[br][url "http://www.unicode.org/terms_of_use.html#Exhibit1"] The documentation in the =Doc= folder is licensed under the [@ Creative Commons Attribution-NonCommercial 3.0 Unported License]. This Creative Commons license does not allow you to use the documentation for commercial purposes without permission. This means, for example, that you cannot sell the documentation in book form for profit or put it on a web content farm in order to earn money with ads. However, *you can of course use the documentation in a commercial context* (e.g. put it on the intranet of a commercial corporation), as long as you're not trying to directly earn money from the text of the documentation. [section Simplified BSD License] Copyright (c) 2007-2022, Stephan Tolksdorf. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. [* This software is provided by the copyright holders "as is" and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. In no event shall the copyright holders be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage.] [/section] [section Creative Commons Attribution-NonCommercial 3.0 Unported License] Summary: [url "https://creativecommons.org/licenses/by-nc/3.0/"] The full license text: [url "https://creativecommons.org/licenses/by-nc/3.0/legalcode"] [/section] [/section] ================================================ FILE: Doc/src/reference-charparsers.txt ================================================  [section#CharParsers FParsec.CharParsers] [interface-reference] [section Interface] [$$interface] [/section] [section Members] [interface-members] [`` // FParsec.dll [] // module is automatically opened when FParsec namespace is opened module [no-auto-link FParsec.CharParsers] open FParsec.Error open FParsec.Primitives ``] [`` // Running parsers on input // ======================== ``] [`` type @ParserResult@<'Result,'UserState>``] [ Values of this union type are returned by the @runParser functions@ (not by `Parser<_,_>` functions). [interface-members] [``= ``] [`` | @Success@ of 'Result * 'UserState * Position ``] [`Success(result, userState, endPos)` holds the result and the user state returned by a successful parser, together with the position where the parser stopped.] [`` | @Failure@ of string * ParserError * 'UserState ``] [ `Failure(errorAsString, error, userState)` holds the parser error and the user state returned by a failing parser, together with the string representation of the parser error. The `ParserError` value `error` contains an `ErrorMessageList` and the position and user state value associated with the error. ] [/interface-members] ] [`` [#runparser-functions] ``] [`` val @runParserOnString@: Parser<'a,'u> -> 'u -> streamName: string -> string -> ParserResult<'a,'u> ``] [ `runParserOnString p ustate streamName str` runs the parser `p` on the content of the string `str`, starting with the initial user state `ustate`. The `streamName` is used in error messages to describe the source of the input (e.g. a file path) and may be empty. The parser's `Reply` is captured and returned as a `ParserResult` value. ] [`` val @runParserOnSubstring@: Parser<'a,'u> -> 'u -> streamName: string -> string -> int -> int -> ParserResult<'a,'u> ``] [ `runParserOnSubstring p ustate streamName str index count` runs the parser `p` directly on the content of the string `str` between the indices `index` (inclusive) and `index + count` (exclusive), starting with the initial user state `ustate`. The `streamName` is used in error messages to describe the source of the input (e.g. a file path) and may be empty. The parser's `Reply` is captured and returned as a `ParserResult` value. ] [`` val @runParserOnStream@: Parser<'a,'u> -> 'u -> streamName: string -> System.IO.Stream -> System.Text.Encoding -> ParserResult<'a,'u> ``] [ `runParserOnStream p ustate streamName stream encoding` runs the parser `p` on the content of the `System.IO.Stream` `stream`, starting with the initial user state `ustate`. The `streamName` is used in error messages to describe the source of the input (e.g. a file path) and may be empty. In case no Unicode byte order mark is found, the stream data is assumed to be encoded with the given `encoding`. The parser's `Reply` is captured and returned as a `ParserResult` value. ] [`` val @runParserOnFile@: Parser<'a,'u> -> 'u -> path: string -> System.Text.Encoding -> ParserResult<'a,'u> ``] [ `runParserOnFile p ustate path encoding` runs the parser `p` on the content of the file at the given `path`, starting with the initial user state `ustate`. In case no Unicode byte order mark is found, the file data is assumed to be encoded with the given `encoding`. The parser's `Reply` is captured and returned as a `ParserResult` value. ] [`` val @run@: Parser<'a, unit> -> string -> ParserResult<'a,unit> ``] [ `run parser str` is a convenient abbreviation for `runParserOnString parser () "" str`. ] [`` // Reading the input stream position and handling the user state // ============================================================= ``] [`` val @getPosition@: Parser ``] [ The parser `getPosition` returns the current position in the input stream. `getPosition` is defined as `fun stream -> Reply(stream.Position)`. ] [`` val @getUserState@: Parser<'u,'u> ``] [ The parser `getUserState` returns the current user state. `getUserState` is defined as `fun stream -> Reply(stream.UserState)`. ] [``val @setUserState@: 'u -> Parser ``] [ The parser `setUserState u` sets the user state to `u`. `setUserState u` is defined as `` fun stream -> stream.UserState <- u Reply(()) `` ] [`` val @updateUserState@: ('u -> 'u) -> Parser ``] [ `updateUserState f` is defined as `` fun stream -> stream.UserState <- f stream.UserState Reply(()) `` ] [`` val @userStateSatisfies@: ('u -> bool) -> Parser ``] [ The parser `userStateSatisfies f` succeeds if the predicate function `f` returns `true` when applied to the current `UserState`, otherwise it fails. [note If the parser `userStateSatisfies f` fails, it returns no descriptive error message; hence it should only be used together with other parsers that take care of a potential error. ] ] [`` // Parsing single chars // ==================== ``] [`` val @pchar@: char -> Parser ``] [`pchar c` parses the char `c` and returns `c`. If `c = '\r'` or `c = '\n'` then `pchar c` will parse any one newline (`"\n"`, `"\r\n"` or `"\r"`) and return `c`. ] [`` val @skipChar@: char -> Parser ``] [`skipChar c` is an optimized implementation of `pchar c |>> ignore`.] [`` val @charReturn@: char -> 'a -> Parser<'a,'u> ``] [`charReturn c result` is an optimized implementation of `pchar c >>% result`.] [`` val @anyChar@: Parser ``] [`anyChar` parses any single char or newline (`"\n"`, `"\r\n"` or `"\r"`). Returns the parsed char, or `'\n'` in case a newline was parsed. ] [`` val @skipAnyChar@: Parser ``] [`skipAnyChar` is an optimized implementation of `anyChar |>> ignore`. ] [`` [#satisfy-parsers] val @satisfy@: (char -> bool) -> Parser ``] [`satisfy f` parses any one char or newline for which the predicate function `f` returns `true`. It returns the parsed char. Any newline (`"\n"`, `"\r\n"` or `"\r"`) is converted to the single char `'\n'`. Thus, to accept a newline `f '\n'` must return `true`. `f` will never be called with `'\r'` and `satisfy f` will never return the result `'\r'`. For example, `satisfy (fun c -> '0' <= c && c <= '9')` parses any decimal digit. [note If the parser `satisfy f` fails, it returns no descriptive error message (because it does not know what chars `f` accepts); hence it should only be used together with other parsers that take care of a potential error. Alternatively, `satisfyL f label` can be used to ensure a more descriptive error message. ] ] [`` val @skipSatisfy@: (char -> bool) -> Parser ``] [`skipSatisfy f` is an optimized implementation of `satisfy f |>> ignore`.] [`` val @satisfyL@: (char -> bool) -> string -> Parser ``] [`satisfy f label` is an optimized implementation of `satisfy f label`.] [`` val @skipSatisfyL@: (char -> bool) -> string -> Parser ``] [`skipSatisfyL f label` is an optimized implementation of `skipSatisfy f label`.] [`` val @anyOf@: seq -> Parser ``] [`anyOf str` parses any char contained in the char sequence `chars`. It returns the parsed char. If `chars` contains the char `'\n'`, `anyOf chars` parses any newline (`"\n"`, `"\r\n"` or `"\r"`) and returns it as `'\n'`. (Note that it does not make a difference whether or not `chars` contains `'\r'` and that `anyOf chars` will never return `'\r'`.) For example, `anyOf ". \t\n"` will parse any of the chars `'.'`, `' '`, `'\t'` or any newline. `anyOf chars` is defined as `satisfy (isAnyOf chars)`. For performance critical parsers it might be worth replacing instances of `anyOf` in loops with a `manySatisfy`-based parser. For example, `manyChars (anyOf ". \t\n")` could be replaced with `manySatisfy (function '.'|' '|'\t'|'\n' -> true | _ -> false)`. This function is affected by the `USE_STATIC_MAPPING_FOR_IS_ANY_OF` compilation option. ] [`` val @skipAnyOf@: seq -> Parser ``] [ `skipAnyOf chars` is an optimized implementation of `anyOf chars |>> ignore`. This function is affected by the `USE_STATIC_MAPPING_FOR_IS_ANY_OF` compilation option. ] [`` val @noneOf@: seq -> Parser ``] [`noneOf chars` parses any char not contained in the char sequence `chars`. It returns the parsed char. If `chars` does not contain the char `'\n'`, `noneOf chars` parses any newline (`"\n"`, `"\r\n"` or `"\r"`) and returns it as as `'\n'`. (Note that it does not make a difference whether or not `chars` contains `'\r'` and that `noneOf chars` will never return `'\r'`.) For example, `noneOf ". \t\n"` will parse any char other than `'.'`, `' '`, `'\t'`, `'\r'` or `'\n'`. `noneOf chars` is defined as `satisfy (isNoneOf chars)`. For performance critical parsers it might be worth replacing instances of `noneOf` in loops with a `manySatisfy`-based parser. For example, `manyChars (noneOf ". \t\n")` could be replaced with `manySatisfy (function '.'|' '|'\t'|'\n' -> false | _ -> true)`. This function is affected by the `USE_STATIC_MAPPING_FOR_IS_ANY_OF` compilation option. ] [`` val @skipNoneOf@: seq -> Parser ``] [ `skipNoneOf chars` is an optimized implementation of `noneOf chars |>> ignore`. This function is affected by the `USE_STATIC_MAPPING_FOR_IS_ANY_OF` compilation option. ] [`` val @asciiLower@: Parser ``] [Parses any char in the range `'a'` - `'z'`. Returns the parsed char.] [`` val @asciiUpper@: Parser ``] [Parses any char in the range `'A'` - `'Z'`. Returns the parsed char.] [`` val @asciiLetter@: Parser ``] [Parses any char in the range `'a'` - `'z'` and `'A'` - `'Z'`. Returns the parsed char.] [`` val @lower@: Parser ``] [Parses any UTF-16 lowercase letter char identified by `System.Char.IsLower`. Returns the parsed char.] [`` val @upper@: Parser ``] [Parses any UTF-16 uppercase letter char identified by `System.Char.IsUpper`. Returns the parsed char.] [`` val @letter@: Parser ``] [Parses any UTF-16 letter char identified by `System.Char.IsLetter`. Returns the parsed char.] [`` val @digit@: Parser``] [Parses any char in the range `'0'` - `'9'`. Returns the parsed char.] [`` // parses '0'-'9' ``] [`` val @hex@: Parser``] [Parses any char in the range `'0'` - `'9'`, `'a'` - `'f'` and `'A'` - `'F'`. Returns the parsed char.] [`` // parses '0'-'9', 'a'-'f', 'A'-'F' ``] [`` val @octal@: Parser``] [Parses any char in the range `'0'` - `'7'`. Returns the parsed char.] [`` // parses '0'-'7' ``] [`` // predicate functions corresponding to the above parsers ``] [`` val @isAnyOf@: seq -> (char -> bool) ``] [`isAnyOf chars` returns a predicate function. When this predicate function is applied to a char, it returns `true` if and only if the char is contained in the char sequence `chars`. For example, the function `isAnyOf ".,;"` returns `true` when applied to the chars `'.'`, `','` or `';'`, and `false` for all other chars. This function is affected by the `USE_STATIC_MAPPING_FOR_IS_ANY_OF` compilation option. ] [`` val @isNoneOf@: seq -> (char -> bool) ``] [`isNoneOf chars` returns a predicate function. When this predicate function is applied to a char, it returns `true` if and only if the char is not contained in char sequence `chars`. For example, the function `isNoneOf ".,;"` returns `false` when applied to the chars `'.'`, `','` or `';'`, and `true` for all other chars. This function is affected by the `USE_STATIC_MAPPING_FOR_IS_ANY_OF` compilation option. ] [`` val inline @isAsciiUpper@: char -> bool ``] [Returns `true` for any char in the range `'A'` - `'Z'` and `false` for all other chars.] [`` val inline @isAsciiLower@: char -> bool ``] [Returns `true` for any char in the range `'a'` - `'z'` and `false` for all other chars.] [`` val inline @isAsciiLetter@: char -> bool ``] [Returns `true` for any char in the range `'a'` - `'z'`, `'A'` - `'Z'` and `false` for all other chars.] [`` val inline @isUpper@: char -> bool ``] [`isUpper` is equivalent to `System.Char.IsUpper`.] [`` val inline @isLower@: char -> bool ``] [`isLower` is equivalent to `System.Char.IsLower`.] [`` val inline @isLetter@: char -> bool ``] [`isLetter` is equivalent to `System.Char.IsLetter`.] [`` val inline @isDigit@: char -> bool ``] [Returns `true` for any char in the range `'0'` - `'9'` and `false` for all other chars.] [`` val inline @isHex@: char -> bool ``] [Returns `true` for any char in the range `'0'` - `'9'`, `'a'` - `'f'`, `'A'` - `'F'` and `false` for all other chars.] [`` val inline @isOctal@: char -> bool ``] [Returns `true` for any char in the range `'0'` - `'7'` and `false` for all other chars.] [`` // Parsing whitespace // ================== ``] [`` val @tab@: Parser ``] [Parses the tab char `'\t'` and returns `'\t'`. [note A tab char is treated like any other non-newline char: the column number is incremented by (only) 1.] ] [`` val @newline@: Parser ``] [Parses a newline (`"\n"`, `"\r\n"` or `"\r"`). Returns `'\n'`. Is equivalent to `pchar '\n'`.] [`` val @skipNewline@: Parser ``] [`skipNewline` is an optimized implementation of `newline |>> ignore`.] [`` val @newlineReturn@: 'a -> Parser<'a,'u> ``] [`newlineReturn result` is an optimized implementation of `newline >>% result`.] [`` val @unicodeNewline@: Parser ``] [ Parses a Unicode newline (`"\n"`, `"\r\n"`, `"\r"`, `"\u0085"`, `"\u2028"`, or `"\u2029"`). Returns `'\n'`. In contrast to all other parsers in FParsec except `unicodeWhitespace` this parser also increments the internal line count for Unicode newline characters other than `'\n'` and `'\r'`. [note This method does not recognize the form feed char `'\f'` (`'\u000C'`) as a newline character.] [note This parser is included only for the sake of completeness. If you design your own parser grammar, we recommend not to accept any character sequence other than `"\n"`, `"\r\n"` or `"\r"` for a newline. The three usual newline representations already make text parsing complicated enough. ] ] [`` val @skipUnicodeNewline@: Parser ``] [`skipUnicodeNewline` is an optimized implementation of `newline |>> ignore`.] [`` val @unicodeNewlineReturn@: 'a -> Parser<'a,'u> ``] [`unicodeNewlineReturn result` is an optimized implementation of `newline >>% result`.] [`` val @spaces@: Parser ``] [Skips over any sequence of *zero* or more whitespaces (space (`' '`), tab (`'\t'`) or newline (`"\n"`, `"\r\n"` or `"\r"`)).] [`` val @spaces1@: Parser ``] [Skips over any sequence of *one* or more whitespaces (space (`' '`), tab(`'\t'`) or newline (`"\n"`, `"\r\n"` or `"\r"`)).] [`` [#unicodeSpaces-parsers] val @unicodeSpaces@: Parser ``] [ Skips over any sequence of *zero* or more Unicode whitespace chars and registers any Unicode newline (`"\n"`, `"\r\n"`, `"\r"`, `"\u0085"`, `"\u2028"`or `"\u2029"`) as a newline. [note This method does not recognize the form feed char `'\f'` (`'\u000C'`) as a newline character.] [note This parser is included only for the sake of completeness. If you design your own parser grammar, we recommend not to accept any whitespace character other than `' '`, `'\t'`, `'\r'` and `'\n'`. There is no need to make whitespace parsing unnecessary complicated and slow. ] ] [`` val @unicodeSpaces1@: Parser ``] [ Skips over any sequence of *one* or more Unicode whitespace char and registers any Unicode newline (`"\n"`, `"\r\n"`, `"\r"`, `"\u0085"`, `"\u2028"`or `"\u2029"`) as a newline. See also the notes above for `unicodeSpaces`. ] [`` val @eof@: Parser ``] [The parser `eof` only succeeds at the end of the input. It never consumes input.] [`` // Parsing strings directly // ======================== ``] [`` val @pstring@: string -> Parser ``] [`pstring str` parses the string `str` and returns `str`. It is an atomic parser: either it succeeds or it fails without consuming any input. `str` may not contain newline chars (`'\n'` or `'\r'`), otherwise `pstring str` raises an `ArgumentException`. ] [`` val @skipString@: string -> Parser ``] [`skipString str` is an optimized implementation of `pstring str |>> ignore`.] [`` val @stringReturn@: string -> 'a -> Parser<'a,'u> ``] [`stringReturn str result` is an optimized implementation of `pstring str >>% result`.] [`` val @pstringCI@: string -> Parser ``] [`pstringCI str` parses any string that case-insensitively matches the string `str`. It returns the *parsed* string. `pstringCI str` is an atomic parser: either it succeeds or it fails without consuming any input. `str` may not contain newline chars (`'\n'` or `'\r'`), otherwise `pstringCI str` raises an `ArgumentException`. ] [`` val @skipStringCI@: string -> Parser ``] [`skipStringCI str` is an optimized implementation of `pstringCI str |>> ignore`.] [`` val @stringCIReturn@: string -> 'a -> Parser<'a,'u> ``] [`stringCIReturn str result` is an optimized implementation of `pstringCI str >>% result`.] [`` val @anyString@: int32 -> Parser ``] [`anyString n` parses any sequence of `n` chars or newlines (`"\n"`, `"\r\n"` or `"\r"`). It returns the parsed string. In the returned string all newlines are normalized to `"\n"`. `anyString n` is an atomic parser: either it succeeds or it fails without consuming any input. ] [`` val @skipAnyString@: int32 -> Parser ``] [`skipAnyString n` is an optimized implementation of `anyString n |>> ignore`.] [`` val @restOfLine@: skipNewline: bool -> Parser ``] [`restOfLine skipNewline` parses any chars before the end of the line and, if `skipNewline` is `true`, skips to the beginning of the next line (if there is one). It returns the parsed chars before the end of the line as a string (without a newline). A line is terminated by a newline (`"\n"`, `"\r\n"` or `"\r"`) or the end of the input stream. For example, `sepBy (restOfLine false) newline` will parse an input file and split it into lines: ``{fsi} > run (sepBy (restOfLine false) newline) "line1\nline2\n";; val it : ParserResult = Success: ["line1"; "line2"; ""] `` Note that you could not use `many (restOfLine true)` in this example, because at the end of the input `restOfLine` succeeds without consuming input, which would cause `many` to throw an exception. ] [`` val @skipRestOfLine@: skipNewline: bool -> Parser ``] [`skipRestOfLine skipNewline` is an optimized implementation of `restOfLine skipNewline |>> ignore`.] [`` [#charsTillString-parsers] val @charsTillString@: string -> skipString: bool -> maxCount: int -> Parser ``] [`charsTillString skipString maxCount` parses all chars before the first occurance of the string `str` and, if `skipString` is `true`, skips over `str`. It returns the parsed chars before the string. If more than `maxCount` chars come before the first occurance of `str`, the parser *fails after consuming* `maxCount` chars. Newlines (`"\n"`, `"\r\n"` or `"\r"`) are counted as single chars and in the returned string all newlines are normalized to `"\n"`, but `str` may not contain any newline. `charsTillString str maxCount` raises - an `ArgumentException`, if `str` contains a newline char (`'\n'` or `'\r'`), - an `ArgumentOutOfRangeException`, if `maxCount` is negative. ] [`` val @skipCharsTillString@: string -> skipString: bool -> maxCount: int -> Parser ``] [`skipCharsTillString str maxCount` is an optimized implementation of `charsTillString str maxCount |>> ignore`.] [`` val @charsTillStringCI@: string -> skipString: bool -> maxCount: int -> Parser ``] [ `charsTillStringCI str maxCount` parses all chars before the first case-insensitive occurance of the string `str` and, if `skipString` is `true`, skips over it. It returns the parsed chars before the string. If more than `maxCount` chars come before the first case-insensitive occurance of `str` the parser *fails after consuming* `maxCount` chars. Newlines (`"\n"`, `"\r\n"` or `"\r"`) are counted as single chars, but `str` may not contain any newline. `charsTillStringCI str maxCount` raises - an `ArgumentException`, if `str` contains a newline char (`'\n'` or `'\r'`), - an `ArgumentOutOfRangeException`, if `maxCount` is negative. ] [`` val @skipCharsTillStringCI@: string -> skipString: bool -> maxCount: int -> Parser ``] [`skipCharsTillStringCI str maxCount` is an optimized implementation of `charsTillStringCI str maxCount |>> ignore`.] [`` val @manySatisfy@: (char -> bool) -> Parser ``] [ `manySatisfy f` parses a sequence of *zero* or more chars that satisfy the predicate function `f` (i.e. chars for which `f` returns `true`). It returns the parsed chars as a string. Any newline (`"\n"`, `"\r\n"` or `"\r"`) is converted to the single char `'\n'`. Thus, to accept a newline `f '\n'` must return `true`. `f` will never be called with `'\r'` and the string returned by `manySatisfy f` will never contain an `'\r'`. For example, `manySatisfy (function ' '|'\t'|'\n' -> true | _ -> false)` parses zero or more whitespaces and returns them as a string. [caution The function predicate `f` must not access the currently used `CharStream` itself, because `manySatisfy` relies on `f` not having any side-effect on the internal state of the stream. ] ] [`` val @manySatisfy2@: (char -> bool) -> (char -> bool) -> Parser ``] [`manySatisfy2 f1 f` behaves like `manySatisfy f`, except that the first char of the parsed string must satisfy `f1` instead of `f`. For example, `manySatisfy ((=) '.') isDigit` will parse a dot followed by zero or more decimal digits. If there is no dot, the parser succeeds with an empty string. ] [`` val @skipManySatisfy@: (char -> bool) -> Parser ``] [`skipManySatisfy f` is an optimized implementation of `manySatisfy f |>> ignore`.] [`` val @skipManySatisfy2@: (char -> bool) -> (char -> bool) -> Parser ``] [`skipManySatisfy2 f1 f` is an optimized implementation of `manySatisfy2 f1 f |>> ignore`.] [`` [#many1Satisfy-parsers] val @many1Satisfy@: (char -> bool) -> Parser ``] [`many1Satisfy f` parses a sequence of *one* or more chars that satisfy the predicate function `f` (i.e. chars for which `f` returns `true`). It returns the parsed chars as a string. If the first char does not satisfy `f`, this parser fails without consuming input. Any newline (`"\n"`, `"\r\n"` or `"\r"`) is converted to the single char `'\n'`. Thus, to accept a newline `f '\n'` must return `true`. `f` will never be called with `'\r'` and the string returned by `many1Satisfy f` will never contain an `'\r'`. For example, `many1Satisfy isDigit` parses a number consisting of one or more decimal digits and returns it as a string. [caution The function predicate `f` must not access the currently used `CharStream` itself, because `many1Satisfy` relies on `f` not having any side-effect on the internal state of the stream. ] [note If the parser `many1Satisfy f` fails, it returns no descriptive error message (because it does not know what chars `f` accepts); hence it should only be used together with other parsers that take care of a potential error. Alternatively, `many1SatisfyL f label` can be used to ensure a more descriptive error message. ] ] [`` val @many1Satisfy2@: (char -> bool) -> (char -> bool) -> Parser ``] [`many1Satisfy2 f1 f` behaves like `many1Satisfy f`, except that the first char of the parsed string must satisfy `f1` instead of `f`. For example, `many1Satisfy2 isLetter (fun c -> isLetter c || isDigit c)` will parse any string consisting of one letter followed by zero or more letters or digits. ] [`` val @skipMany1Satisfy@: (char -> bool) -> Parser ``] [`skipMany1Satisfy f` is an optimized implementation of `many1Satisfy f |>> ignore`.] [`` val @skipMany1Satisfy2@: (char -> bool) -> (char -> bool) -> Parser ``] [`skipMany1Satisfy2 f1 f` is an optimized implementation of `many1Satisfy2 f1 f |>> ignore`.] [`` val @many1SatisfyL@: (char -> bool) -> string -> Parser ``] [`many1SatisfyL f label` is an optimized implementation of `many1Satisfy f label`.] [`` val @many1Satisfy2L@: (char -> bool) -> (char -> bool) -> string -> Parser ``] [`many1Satisfy2L f1 f label` is an optimized implementation of `many1Satisfy2 f1 f label`.] [`` val @skipMany1SatisfyL@: (char -> bool) -> string -> Parser ``] [`skipMany1SatisfyL f label` is an optimized implementation of `skipMany1Satisfy f label`.] [`` val @skipMany1Satisfy2L@: (char -> bool) -> (char -> bool) -> string -> Parser ``] [`skipMany1Satisfy2L f1 f label` is an optimized implementation of `skipMany1Satisfy2 f1 f label`.] [`` [#manyMinMaxSatisfy-parsers] val @manyMinMaxSatisfy@: int -> int -> (char -> bool) -> Parser ``] [`manyMinMaxSatisfy minCount maxCount f` parses a sequence of `minCount` or more chars that satisfy the predicate function `f` (i.e. chars for which `f` returns `true`), but not more than `maxCount` chars. It returns the parsed chars as a string. This parser is atomic, i.e. if the first `minCount` chars do not all satisfy `f`, the parser fails without consuming any input. Any newline (`"\n"`, `"\r\n"` or `"\r"`) is converted to the single char `'\n'`. Thus, to accept a newline `f '\n'` must return `true`. `f` will never be called with `'\r'` and the string returned by `manyMinMaxSatisfy minCount maxCount f` will never contain an `'\r'`. `manyMinMaxSatisfy minCount maxCount f` raises an `ArgumentOutOfRangeException` if `maxCount` is negative. For example, `manyMinMaxSatisfy 4 8 isHex` parses a string that consists of at least 4 hexadecimal digits. If there are 8 or more hex chars, this parser stops after the 8th. [caution The function predicate `f` must not access the currently used `CharStream` itself, because `manyMinMaxSatisfy` relies on `f` not having any side-effect on the internal state of the stream. ] [note If the parser `manyMinMaxSatisfy minCount maxCount f` fails, it returns no descriptive error message (because it does not know what chars `f` accepts); hence it should only be used together with other parsers that take care of a potential error. Alternatively, `manyMinMaxSatisfyL f label` can be used to ensure a more descriptive error message. ] ] [`` val @manyMinMaxSatisfy2@: int -> int -> (char -> bool) -> (char -> bool) -> Parser ``] [`manyMinMaxSatisfy2 minCount maxCount f1 f` behaves like `manyMinMaxSatisfy minCount maxCount f`, except that the first char of the parsed string must satisfy `f1` instead of `f`. For example, `manyMinMaxSatisfy2 3 5 ((=) '.') isDigit` parses a dot followed by 2-4 decimal digits. ] [`` val @skipManyMinMaxSatisfy@: int -> int -> (char -> bool) -> Parser ``] [`skipManyMinMaxSatisfy minCount maxCount f` is an optimized implementation of `manyMinMaxSatisfy minCount maxCount f |>> ignore`.] [`` val @skipManyMinMaxSatisfy2@: int -> int -> (char -> bool) -> (char -> bool) -> Parser ``] [`skipManyMinMaxSatisfy2 minCount maxCount f1 f` is an optimized implementation of `manyMinMaxSatisfy2 minCount maxCount f1 f |>> ignore`.] [`` val @manyMinMaxSatisfyL@: int -> int -> (char -> bool) -> string -> Parser ``] [`manyMinMaxSatisfyL minCount maxCount f label` is an optimized implementation of `manyMinMaxSatisfy minCount maxCount f label`.] [`` val @manyMinMaxSatisfy2L@: int -> int -> (char -> bool) -> (char -> bool) -> string -> Parser ``] [`manyMinMaxSatisfy2L minCount maxCount f1 f label` is an optimized implementation of `manyMinMaxSatisfy2 minCount maxCount f1 f label`.] [`` val @skipManyMinMaxSatisfyL@: int -> int -> (char -> bool) -> string -> Parser ``] [`skipManyMinMaxSatisfyL minCount maxCount f label` is an optimized implementation of `skipManyMinMaxSatisfy minCount maxCount f label`.] [`` val @skipManyMinMaxSatisfy2L@: int -> int -> (char -> bool) -> (char -> bool) -> string -> Parser ``] [`skipManyMinMaxSatisfy2L minCount maxCount f1 f label` is an optimized implementation of `skipManyMinMaxSatisfy2 minCount maxCount f1 f label`.] [`` val @regex@: string -> Parser ``] [ `regex pattern` matches the .NET [url "https://msdn.microsoft.com/en-us/library/az24scfc.aspx" regular expression] given by the string `pattern` on the chars beginning at the current index in the input stream. If the regular expression matches, the parser skips the matched chars and returns them as a string. If the regular expression does not match, the parser fails without consuming input. The `System.Text.RegularExpressions.Regex` object that is internally used to match the pattern is constructed with the `[url "https://msdn.microsoft.com/en-us/library/system.text.regularexpressions.regexoptions.aspx" RegexOptions]` `MultiLine` and `ExplicitCapture`. In order to ensure that the regular expression can only match at the beginning of a string, `"\\A"` is automatically prepended to the pattern. You should avoid the use of greedy expressions like `".*"`, because these might trigger a scan of the complete input every time the regex is matched. Newline chars (`'\r'` and `'\n'`) in the pattern are interpreted literally. For example, an `'\n'` char in the pattern will only match `"\n"`, not `"\r"` or `"\r\n"`. However, in the returned string all newlines (`"\n"`, `"\r\n"` or `"\r"`) are normalized to `"\n"`. For large files the regular expression is *not* applied to a string containing *all* the remaining chars in the stream. The number of chars that are guaranteed to be visible to the regular expression is specified during construction of the `CharStream`. If one of the @runParser functions@ is used to run the parser, this number is 43690. ] [`` type @IdentifierOptions@ = new: ?isAsciiIdStart: (char -> bool) * ?isAsciiIdContinue: (char -> bool) * ?normalization: System.Text.NormalizationForm * ?normalizeBeforeValidation: bool * ?allowJoinControlChars: bool * ?preCheckStart: (char -> bool) * ?preCheckContinue: (char -> bool) * ?allowAllNonAsciiCharsInPreCheck: bool * ?label: string * ?invalidCharMessage: string -> IdentifierOptions ``] [ The configuration options for the `identifier` parser. [dl [`isAsciiIdStart`] [ Specifies the ASCII characters that are valid as the first character of an identifier. This predicate function is called once for each char in the range `'\u0001'`--`'\u007f'` during construction of the `IdentifierOptions` object. By default, the ASCII chars `'A'`--`'Z'` and `'a'`--`'z'` can start an identifier. ] [`isAsciiIdContinue`] [ Specifies the ASCII characters that are valid as non-first characters of an identifier. This predicate function is called once for each char in the range `'\u0001'`--`'\u007f'` during construction of the `IdentifierOptions` object. Normally the chars for which `isAsciiIdContinue` returns `true` should include all chars for which `isAsciiIdStart` returns `true`. By default, the ASCII chars `'A'`--`'Z'`, `'a'`--`'z'`, `'0'`--`'9'` and `'_'` are accepted at non-start positions. ] [`normalization`] [ [small [/ This option is not supported in the Silverlight version of FParsec.]][br] The [url "https://www.Unicode.org/reports/tr15/" normalization form] to which identifier strings are normalized. The value must be one of the four enum values of `System.Text.NormalizationForm`. If no `normalization` parameter is given, no normalization is performed. The normalization is performed with the [url "https://msdn.microsoft.com/en-us/library/ebza6ck1.aspx" `System.String.Normalize`] method provided by the Base Class Library. ] [`normalizeBeforeValidation`] [ [small [/ This option is not supported in the Silverlight version of FParsec.]][br] Indicates whether the identifier string should be normalized before validation (but after the pre-check). By default, identifiers are normalized after they have been validated. Normalization before validation will only work properly with non-default pre-check options. ] [`allowJoinControlChars`] [ Indicates whether the two join control characters ([url "https://en.wikipedia.org/wiki/Zero-width_non-joiner" zero-width non-joiner] and [url "https://en.wikipedia.org/wiki/Zero-width_joiner" zero-width joiner]) are allowed at any non-start character position in the identifier. ] [`preCheckStart`, `preCheckContinue`] [ These two char predicates are used to identify potential identifier strings in the input. The first UTF-16 char of an identifier must satisfy `preCheckStart`, the following chars must satify `preCheckContinue`. Input chars that don't pass the pre-check aren't included in the identifier string, while characters that pass the pre-check but not the identifier validation trigger a parser error. For the `identifier` parser to work properly, the pre-check functions must accept a superset of valid identifier characters. If you specify no `preCheckStart` (`preCheckContinue`) parameter, a default function will be used that accepts all chars that satisfy `isAsciiIdStart` (`isAsciiIdContinue`) as well as all non-ASCII characters in the Basic Multilingual Plane with the =XID_Start= (=XID_Continue=) property and all surrogate chars. `preCheckContinue` by default also accepts the two join control characters. If you pass the option `allowAllNonAsciiCharsInPreCheck = true`, the pre-check predicates are only called once for each char in the range `'\u0001'` - `'\u007f'` during construction of the `IdentifierOptions` object (in order to construct a lookup table). ] [`allowAllNonAsciiCharsInPreCheck`] [ Indicates whether all non-ASCII chars should be accepted in the pre-check, irrespective of whether the (default) pre-check functions return `true` for these chars. ] [`label`] [ The string label that is used in error messages if no identifier is found. The default is `"identifier"`. ] [`invalidCharMessage`] [ The error message that is reported when an invalid char is found during validation of an identifier (after the pre-check). The default is `"The identifier contains an invalid character at the indicated position."`. ] ] The following example implements a parser for [url "https://docs.python.org/3/index.html" Python] identifiers as described in [url "https://www.python.org/dev/peps/pep-3131/" PEP-3131]: `` let pythonIdentifier = let isAsciiIdStart = fun c -> isAsciiLetter c || c = '_' let isAsciiIdContinue = fun c -> isAsciiLetter c || isDigit c || c = '_' identifier (IdentifierOptions( isAsciiIdStart = isAsciiIdStart, isAsciiIdContinue = isAsciiIdContinue, normalization = [url "https://msdn.microsoft.com/en-us/library/system.text.normalizationform.aspx" System.Text.NormalizationForm].FormKC, normalizeBeforeValidation = true, allowAllNonAsciiCharsInPreCheck = true)) `` ] [`` val @identifier@: IdentifierOptions -> Parser ``] [The `identifier` parser is a configurable parser for the XID identifier syntax specified in the [url "http://www.Unicode.org/reports/tr31/" Unicode Standard Annex #31]. By default, a valid identifier string must begin with a Unicode character with the =XID_Start= property and continue with zero or more characters with the =XID_Continue= property. The specification of which characters have these properties can be found in the [url "http://www.Unicode.org/Public/8.0.0/ucd/DerivedCoreProperties.txt" DerivedCoreProperties] file in the [url "http://www.Unicode.org/usc" Unicode Character Database]. Currently FParsec implements the XID specification of Unicode 8.0.0. Within the ASCII character range `'\u0001'`--`'\u007f'` you can customize the set of accepted characters through the `isAsciiIdStart` and `isAsciiIdContinue` parameters (the XID default allows `'a'`--`'z'` and `'A'`--`'Z'` at any position and `'_'` and `'0'`--`'9'` only in non-start positions). For example, to accept the same ASCII characters that are valid in F# identifiers, you could use the following `IdentifierOptions`: `` let isAsciiIdStart c = isAsciiLetter c || c = '_' let isAsciiIdContinue c = isAsciiLetter c || isDigit c || c = '_' || c = '\'' identifier (IdentifierOptions(isAsciiIdStart = isAsciiIdStart, isAsciiIdContinue = isAsciiIdContinue)) `` By default, identifiers cannot contain the two join control characters [url "https://en.wikipedia.org/wiki/Zero-width_non-joiner" zero-width non-joiner] and [url "https://en.wikipedia.org/wiki/Zero-width_joiner" zero-width joiner]. While these characters can be abused to create distinct identifiers that look confusingly similar or even identical, they are also necessary to create identifiers with the correct visual appearance for common words or phrases in certain languages. [url "http://www.Unicode.org/reports/tr31/#Layout_and_Format_Control_Characters" Section 2.3] of the Unicode Standard Annex #31 recommends to accept join control characters if the identifier system is supposed to support "natural representations of terms in modern, customary use". However, in order to minimize the potential for abuse it also recommends accepting these characters only in some very specific contexts. Unfortunately, the proposed rules describing the contexts in which join control character should be allowed are rather difficult to implement, especially with the limited Unicode support in .NET. For this reason the `identifier` parser currently only supports a simpler option: if you set the parameter `allowJoinControlChars` to `true`, the parser accepts the two join control characters in any non-start position. Whether this setting is a reasonable compromise between not supporting join control characters at all and implementing the complicated rules proposed in Annex #31 obviously depends on the individual requirements of your project. An example of a programming language that [url "https://mail.mozilla.org/pipermail/es5-discuss/2009-June/002832.html" adopted] the same compromise is [url "http://www.ecma-international.org/publications/standards/Ecma-262.htm" ECMAScript 5]. Apart from the joint control characters, no layout or format control characters are allowed in identifiers. This is in accordance to the recommendation of the Unicode Standard Annex #31, but contrary to what Annex #15 [url "http://www.Unicode.org/reports/tr15/tr15-23.html#Programming_Language_Identifiers" recommended] prior to Unicode version 4.1. Programming languages whose identifier syntax is based on the recommendations of earlier versions of the Unicode standard may require that layout and format control characters are ignored or filtered out, as for example is the case for C\#. However, since the identifier syntax of these languages isn't based on the XID properties, one can't parse their identifiers with this parser anyway. By providing a value for the `normalization` parameter, you can ensure that identifiers are returned in a particular Unicode [url "http://www.Unicode.org/reports/tr15/" normalization form]. By default, an identifier is normalized *after* it has been validated. Since XID identifiers are "closed under normalization", a valid identifier is guaranteed to stay valid after normalization. The reverse, however, is not true, since not all identifier strings that are valid after normalization are also valid prior to normalization. If you want the identifier string to be normalized before validation, you have to set the `normalizeBeforeValidation` parameter to `true` and specify appropriate `preCheckStart` and `preCheckContinue` parameters. Silverlight does not support Unicode normalization, so the Silverlight version of FParsec does not support the `normalization` and `normalizeBeforeValidation` parameters. The `identifier` parser uses the `preCheckStart` and `preCheckContinue` predicate functions to identify potential identifier strings in the input. The first UTF-16 char of the identifier must satisfy `preCheckStart`, the following chars must satifsy `preCheckContinue`. Input chars that don't pass the pre-check aren't included in the identifier string, while characters that pass the pre-check but not the identifier validation trigger a parser error (`FatalError`). For the `identifier` parser to work properly, the `preCheck` functions must accept a superset of valid identifier characters. If you specify no `preCheckStart` (`preCheckContinue`) parameter, a default function will be used that accepts all chars that satisfy `isAsciiIdStart` (`isAsciiIdContinue`) as well as all non-ASCII characters in the Basic Multilingual Plane with the =XID_Start= (=XID_Continue=) property and all surrogate chars. `preCheckContinue` by default also accepts the two join control characters. If you set the parameter `allowAllNonAsciiCharsInPreCheck` to `true`, all non-ASCII chars will be accepted in the pre-check, irrespective of whether the (default) pre-check functions return `true` for these chars. By passing custom `preCheckStart` and `preCheckContinue` functions you can modify the error reporting behaviour and support identifier strings that are only valid after normalization. You can also exclude specific UTF-16 chars that would otherwise be valid in identifiers, though you'd have to be careful to cover all (pre-)normalization forms. In the following examples we will demonstrate the effect of custom pre-check functions on identifier parsing. For this we first set up two identifier parsers, `ident` and `identP`, with differing sets of options. Both parsers accept the same ASCII chars in identifiers. In particular, both do not accept the underscore char `'_'` in identifiers. However, only `identP` lets underscores through the pre-check. `` // we don't allow underscores in identifiers ... let isAsciiIdStart c = isAsciiLetter c let isAsciiIdContinue c = isAsciiLetter c || isDigit c // ... but accept them in in the pre-check let preCheckStart c = isAsciiLetter c || c = '_' let preCheckContinue c = isAsciiLetter c || isDigit c || c = '_' type NF = System.Text.NormalizationForm let opts = IdentifierOptions(isAsciiIdStart = isAsciiIdStart, isAsciiIdContinue = isAsciiIdContinue, normalization = NF.FormKC, // The following option isn't really useful without // modified pre-check options. We only set the // option here to prove this point in an example below. normalizeBeforeValidation = true) let optsWithPreCheck = IdentifierOptions(isAsciiIdStart = isAsciiIdStart, isAsciiIdContinue = isAsciiIdContinue, preCheckStart = preCheckStart, preCheckContinue = preCheckContinue, allowAllNonAsciiCharsInPreCheck = true, normalization = NF.FormKC, normalizeBeforeValidation = true) let ident : Parser = identifier opts let identP : Parser = identifier optsWithPreCheck `` Both `ident` and `identP` parse simple identifiers without a problem: ``{fsi} > run (ident .>> eof) "täst1";; val it : ParserResult = Success: "täst1" > run (identP .>> eof) "täst2";; val it : ParserResult = Success: "täst2" `` The identifier parser with the default pre-check functions will treat underscores just like whitespace or any other non-identifier character: ``{fsi} > run (ident .>> eof) "test_id";; val it : ParserResult = Failure: Error in Ln: 1 Col: 5 test_id ^ Expecting: end of input `` Since `ident` only consumed the `"test"` part of the input string, the `eof` parser complained that it was expecting to be applied at the end of the input. When we use `identP` instead, we get a different error message: ``{fsi} > run (identP .>> eof) "test_id";; val it : ParserResult = Failure: Error in Ln: 1 Col: 5 test_id ^ The identifier contains an invalid character at the indicated position. `` This time the underscore passed the pre-check, but not the identifier validation. As mentioned above, a custom pre-check is also neccessary to make the `normalizeBeforeValidation` option work properly. With the default pre-check options the identifier parser doesn't accept `"MC²"` as an identifier, even with the normalization set to NFKC: ``{fsi} > run (ident .>> eof) "MC²";; val it : ParserResult = Failure: Error in Ln: 1 Col: 3 MC² ^ Expecting: end of input `` `identP` on the other hand doesn't have this issue, because it accepts all non-ASCII chars in the pre-check: ``{fsi} > run (identP .>> eof) "MC²";; val it : ParserResult = Success: "MC2" `` ] [`` // Parsing strings with the help of other parsers // ============================================== ``] [`` val @manyChars@: Parser -> Parser ``] [ `manyChars cp` parses a sequence of *zero* or more chars with the char parser `cp`. It returns the parsed chars as a string. `manyChars cp` is an optimized implementation of `many cp` that returns the chars as a string instead of a char list. Many string parsers can be conveniently implemented with both `manyChars` and `manySatisfy`. In these cases you should generally prefer the faster `manySatisfy`. For example, the parser `manySatisfyL isHex "hex integer"` is more efficient than `manyChars hex`. If you are using `manyChars` for a parser similar to `manyChars (notFollowedBy endp >>. p)`, you should check whether this use of `manyChars` can be replaced with the more specialized `manyCharsTill` parser. ] [`` val @manyChars2@: Parser -> Parser -> Parser ``] [ `manyChars2 cp1 cp` behaves like `manyChars2 cp`, except that it parses the first char with `cp1` instead of `cp`. For example, `manyChars2 letter (letter <|> digit)` will parse a letter followed by letters or digits and return the chars as a string. If the first char is not a letter, the parser succeeds with an empty string. Note, however, that this parser could be more efficiently implemented using `manySatisfy2L`. ] [`` val @many1Chars@: Parser -> Parser ``] [ `many1Chars cp` parses a sequence of *one* or more chars with the char parser `cp`. It returns the parsed chars as a string. `many1Chars cp` is an optimized implementation of `many1 cp` that returns the chars as a string instead of a char list. Many string parsers can be conveniently implemented with both `many1Chars` and `many1Satisfy`. In these cases you should generally prefer the faster `many1Satisfy`. For example, the parser `many1SatisfyL isHex "hex integer"` is more efficient than `many1Chars hex`. ] [`` val @many1Chars2@: Parser -> Parser -> Parser ``] [ `many1Chars2 cp1 cp` behaves like `many1Chars2 cp`, except that it parses the first char with `cp1` instead of `cp`. For example, `many1Chars2 letter (letter <|> digit)` will parse a letter followed by letters or digits and return the chars as a string. Note, however, that this parser could be more efficiently implemented using `many1Satisfy2L`. ] [`` [#manyCharsTill-parsers] val @manyCharsTill@: Parser -> Parser<'b,'u> -> Parser ``] [ `manyCharsTill cp endp` parses chars with the char parser `cp` until the parser `endp` succeeds. It stops after `endp` and returns the parsed chars as a string. `manyCharsTill cp endp` is an optimized implementation of `manyTill cp endp` that returns the chars as a string instead of a char list. ] [`` val @manyCharsTill2@: Parser -> Parser -> Parser<'b,'u> -> Parser ``] [ `manyCharsTill2 cp1 cp endp` behaves like `manyCharsTill cp endp`, except that it parses the first char with `cp1` instead of `cp`. ] [`` val @manyCharsTillApply@: Parser -> Parser<'b,'u> -> (string -> 'b -> 'c) -> Parser<'c,'u> ``] [ `manyCharsTillApply cp endp f` behaves like `manyCharsTill cp endp`, except that it returns the result of the function application `f str b`, where `str` is the parsed string and `b` is result returned by `endp`. ] [`` val @manyCharsTillApply2@: Parser -> Parser -> Parser<'b,'u> -> (string -> 'b -> 'c) -> Parser<'c,'u> ``] [ `manyCharsTillApply2 cp1 cp endp f` behaves like `manyCharsTillApply cp endp f`, except that it parses the first char with `cp1` instead of `cp`. ] [`` val @many1CharsTill@: Parser -> Parser<'b,'u> -> Parser ``] [ `many1CharsTill cp endp` parses one char with the char parser `cp`. Then it parses more chars with `cp` until the parser `endp` succeeds. It stops after `endp` and returns the parsed chars as a string. `many1CharsTill cp endp` is an optimized implementation of `many1Till cp endp` that returns the chars as a string instead of a char list. ] [`` val @many1CharsTill2@: Parser -> Parser -> Parser<'b,'u> -> Parser ``] [ `many1CharsTill2 cp1 cp endp` behaves like `many1CharsTill cp endp`, except that it parses the first char with `cp1` instead of `cp`. ] [`` val @many1CharsTillApply@: Parser -> Parser<'b,'u> -> (string -> 'b -> 'c) -> Parser<'c,'u> ``] [ `many1CharsTillApply cp endp f` behaves like `many1CharsTill cp endp`, except that it returns the result of the function application `f str b`, where `str` is the parsed string and `b` is result returned by `endp`. ] [`` val @many1CharsTillApply2@: Parser -> Parser -> Parser<'b,'u> -> (string -> 'b -> 'c) -> Parser<'c,'u> ``] [ `many1CharsTillApply2 cp1 cp endp f` behaves like `many1CharsTillApply cp endp f`, except that it parses the first char with `cp1` instead of `cp`. ] [`` [#manyStrings-parsers] val @manyStrings@: Parser -> Parser ``] [ `manyStrings sp` parses a sequence of *zero* or more strings with the string parser `sp`. It returns the strings in concatenated form. `manyStrings sp` is an optimized implementation of `many sp |>> List.fold (fun acc s -> acc + s) ""`. ] [`` val @manyStrings2@: Parser -> Parser -> Parser ``] [ `manyStrings2 sp1 sp` behaves like `manyStrings sp`, except that it parses the first string with `sp1` instead of `sp`. ] [`` val @many1Strings@: Parser -> Parser ``] [ `many1Strings sp` parses a sequence of *one* or more strings with the string parser `sp`. It returns the strings in concatenated form. Note that `many1Strings sp` does not require the first string to be non-empty. `many1Strings sp` is an optimized implementation of `many1 sp |>> List.reduce (+)`. ] [`` val @many1Strings2@: Parser -> Parser -> Parser ``] [ `many1Strings2 sp1 sp` behaves like `many1Strings sp`, except that it parses the first string with `sp1` instead of `sp`. ] [`` val @stringsSepBy@: Parser -> Parser -> Parser ``] [ `stringsSepBy sp sep` parses *zero* or more occurrences of the string parser `sp` separated by `sep` (in EBNF: `{EBNF}(sp (sep sp)*)?`). It returns the strings parsed by `sp` *and* `sep` in concatenated form. `stringsSepBy` behaves like `sepBy`, except that instead of returning a list of the results of only the first argument parser it returns a concatenated string of all strings returned by both argument parsers (in the sequence they occurred). With `stringsSepBy` you can for example implement an efficient parser for the following string literal format: ``{other} stringLiteral: '"' (normalChar|escapedChar)* '"' normalChar: any char except '\' and '"' escapedChar: '\\' ('\\'|'"'|'n'|'r'|'t') `` The parser implementation exploits the fact that two (possibly empty) normal char snippets must be separated by an escaped char: `` let stringLiteral = let str s = pstring s let normalCharSnippet = manySatisfy (fun c -> c <> '\\' && c <> '"') let escapedChar = str "\\" >>. (anyOf "\\\"nrt" |>> function | 'n' -> "\n" | 'r' -> "\r" | 't' -> "\t" | c -> string c) between (str "\"") (str "\"") (stringsSepBy normalCharSnippet escapedChar) `` ] [`` val @stringsSepBy1@: Parser -> Parser -> Parser ``] [ `stringsSepBy1 sp sep` parses *one* or more occurrences of the string parser `sp` separated by `sep` (in EBNF: `{EBNF}(sp (sep sp)*)`). It returns the strings parsed by `sp` *and* `sep` in concatenated form. `stringsSepBy1` behaves like `stringsSepBy`, except that it fails without consuming input if `sp` does not succeed at least once. ] [`` val @skipped@: Parser -> Parser ``] [`skipped p` applies the parser `p` and returns the chars skipped over by `p` as a string. All newlines (`"\r\n"`, `"\r"` or `"\n"`) are normalized to `"\n"`.] [`` val @withSkippedString@: (string -> 'a -> 'b) -> Parser<'a,'u> -> Parser<'b,'u> ``] [`p |> withSkippedString f` applies the parser `p` and returns the result `f str x`, where `str` is the string skipped over by `p` and `x` is the result returned by `p`.] [`` // Parsing numbers // =============== ``] [`` type @NumberLiteralOptions@``] [ An enum type that encodes the various options of the `numberLiteral` parser: `` type NumberLiteralOptions = | None = 0 | AllowSuffix = 0b000000000001 | AllowMinusSign = 0b000000000010 | AllowPlusSign = 0b000000000100 | AllowFraction = 0b000000001000 | AllowFractionWOIntegerPart = 0b000000010000 | AllowExponent = 0b000000100000 | AllowHexadecimal = 0b000001000000 | AllowBinary = 0b000010000000 | AllowOctal = 0b000100000000 | AllowInfinity = 0b001000000000 | AllowNaN = 0b010000000000 | IncludeSuffixCharsInString = 0b100000000000 | DefaultInteger = 0b000111000110 | DefaultUnsignedInteger = 0b000111000000 | DefaultFloat = 0b011001101110 `` If all flags are set any literal matching the following regular expression is accepted: ``{regex} [+-]?((([0-9]+(\.[0-9]*)?|\.[0-9]+)([eE][+-]?[0-9]+)? |0[xX]([0-9a-fA-F]+(\.[0-9a-fA-F]*)?|\.[0-9a-fA-F]+)([pP][+-]?[0-9]+)? |0[oO][0-7]+ |0[bB][01]+ )[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]? |[iI][nN][fF]([iI][nN][iI][tT][yY])? |[nN][aA][nN] ) `` Hexadecimal literals must begin with `{regex}0x` or `{regex}0X`, octal literals with `{regex}0o` or `{regex}0O` and binary literals with `{regex}0b` or `{regex}0B`. If the respective flags are set, hexadecimal *floating-point* literals as supported by IEEE 754r, C99 and Java are accepted. Some remarks on the individual flags: [dl [`AllowSuffix`] [Allows up to 4 suffix chars. Such chars are used in many programming languages to determine the type of a number. For example, in F# the literal `"123UL"` represents the unsigned 64-bit integer 123.] [`AllowFraction`] [Allows a fraction in decimal and hexadecimal literals.] [`AllowFractionWOIntegerPart`] [Allows number literals with a fraction but no integer part, e.g. `".123"` or `"0x.abc"`. This flag can only be used together with `AllowFraction`.] [`AllowExponent`] [Allows exponents in decimal literals (beginning with an `"e"` or `"E"`) and in hexadecimal literals (beginning with a `"p"` or `"P"`).] [`AllowInfinity`] [Allows `"Inf"` or `"Infinity"` literals (case-insensitive).] [`AllowNaN`] [Allows `"NaN"` literals (case-insensitive).] [`IncludeSuffixCharsInString`] [Instructs the `numberLiteral` parser to include any parsed suffix chars in the `NumberLiteral.String` member.] ] ] [`` = //... ``] [`` type @NumberLiteral@``] [ The return type of the `numberLiteral` parser. An instance contains the parsed number literal and various bits of information about it. Note that the `String` member contains the string literal *without* the suffix chars, except if the `NumberLiteralOptions` passed to the `numberLiteral` parser have the `IncludeSuffixCharsInString` flag set. Any parsed suffix chars are always available through the `SuffixChar1` - `4` members. `` type NumberLiteral = member String: string member #SuffixLength#: int member #SuffixChar1#: char // EOS if no suffix char was parsed member #SuffixChar2#: char // EOS if less than 2 suffix chars were parsed member #SuffixChar3#: char ... member #SuffixChar4#: char member Info: NumberLiteralResultFlags member #HasMinusSign#: bool member #HasPlusSign#: bool member #HasIntegerPart#: bool member #HasFraction#: bool member #HasExponent#: bool member #IsInteger#: bool // not (HasFraction || HasExponent) member #IsDecimal#: bool member #IsHexadecimal#: bool member #IsBinary#: bool member #IsOctal#: bool member #IsNaN#: bool member #IsInfinity#: bool and NumberLiteralResultFlags = | None = 0 | SuffixLengthMask = 0b0000000000001111 | HasMinusSign = 0b0000000000010000 | HasPlusSign = 0b0000000000100000 | HasIntegerPart = 0b0000000001000000 | HasFraction = 0b0000000010000000 | HasExponent = 0b0000000100000000 | IsDecimal = 0b0000001000000000 | IsHexadecimal = 0b0000010000000000 | IsBinary = 0b0000100000000000 | IsOctal = 0b0001000000000000 | BaseMask = 0b0001111000000000 | IsInfinity = 0b0010000000000000 | IsNaN = 0b0100000000000000 `` ] [`` = //... ``] [`` val @numberLiteral@: NumberLiteralOptions -> string -> Parser ``] [ `numberLiteral options label` parses a number literal and returns the result in form of a `NumberLiteral` value. The given `NumberLiteralOptions` argument determines the kind of number literals accepted. The string `label` is used in the `Expected` error message that is generated when the parser fails without consuming input. The parser fails without consuming input if not at least one digit (including the =0= in the format specifiers `"0x"` etc.) can be parsed. It fails after consuming input, if no decimal digit comes after an exponent marker or no valid digit comes after a format specifier. The parser in the following example employs `numberLiteral` to parse decimal numbers as either `integer` or `float` values: `` open FParsec open FParsec.Primitives open FParsec.CharParsers type Number = Int of int64 | Float of float // -?[0-9]+(\.[0-9]*)?([eE][+-]?[0-9]+)? let numberFormat = NumberLiteralOptions.AllowMinusSign ||| NumberLiteralOptions.AllowFraction ||| NumberLiteralOptions.AllowExponent let pnumber : Parser = numberLiteral numberFormat "number" |>> fun nl -> if nl.IsInteger then Int (int64 nl.String) else Float (float nl.String) `` Some test runs: ``{fsi} > run pnumber "123";; val it : ParserResult = Success: Int 123L > run pnumber "-123.456E-7";; val it : ParserResult = Success: Float -1.23456e-05 > run pnumber "-";; val it : ParserResult = Failure: Error in Ln: 1 Col: 1 - ^ Expecting: number > run pnumber "123.456E-a";; val it : ParserResult = Failure: Error in Ln: 1 Col: 10 123.456E-a ^ Expecting: decimal digit > run pnumber "1E9999";; System.OverflowException: Value was either too large or too small for a Double. at (... stack trace ...) stopped due to error `` The next example improves on the error reporting in case of overflows. It also demonstrates how to support hexadecimal numbers and a suffix to indicate the integer format: `` open FParsec open FParsec.Error open FParsec.Primitives open FParsec.CharParsers type Number = Int32 of int32 | Int64 of int64 | Float of float // We want to support decimal or hexadecimal numbers with an optional minus // sign. Integers may have an 'L' suffix to indicate that the number should // be parsed as a 64-bit integer. let numberFormat = NumberLiteralOptions.AllowMinusSign ||| NumberLiteralOptions.AllowFraction ||| NumberLiteralOptions.AllowExponent ||| NumberLiteralOptions.AllowHexadecimal ||| NumberLiteralOptions.AllowSuffix let pnumber : Parser = let parser = numberLiteral numberFormat "number" fun stream -> let reply = parser stream if reply.Status = Ok then let nl = reply.Result // the parsed NumberLiteral if nl.SuffixLength = 0 || ( nl.IsInteger && nl.SuffixLength = 1 && nl.SuffixChar1 = 'L') then try let result = if nl.IsInteger then if nl.SuffixLength = 0 then Int32 (int32 nl.String) else Int64 (int64 nl.String) else if nl.IsHexadecimal then Float (floatOfHexString nl.String) else Float (float nl.String) Reply(result) with | :? System.OverflowException as e -> stream.Skip(-nl.String.Length) Reply(FatalError, messageError e.Message) else stream.Skip(-nl.SuffixLength) Reply(Error, messageError "invalid number suffix") else // reconstruct error reply Reply(reply.Status, reply.Error) `` Some test runs: ``{fsi} > run pnumber "123";; val it : ParserResult = Success: Int32 123 > run pnumber "-0xffL";; val it : ParserResult = Success: Int64 -255L > run pnumber "123.123";; val it : ParserResult = Success: Float 123.123 > run pnumber "0xabc.defP-4";; val it : ParserResult = Success: Float 171.8044281 > run pnumber "-0x";; val it : ParserResult = Failure: Error in Ln: 1 Col: 4 -0x ^ Note: The error occurred at the end of the input stream. Expecting: hexadecimal digit > run pnumber "0x123UL";; val it : ParserResult = Failure: Error in Ln: 1 Col: 6 0x123UL ^ invalid number suffix > run pnumber "1E9999";; val it : ParserResult = Failure: Error in Ln: 1 Col: 1 1E9999 ^ Value was either too large or too small for a Double. `` ] [`` val @numberLiteralE@: NumberLiteralOptions -> errorInCaseNoLiteralFound: ErrorMessageList -> CharStream<'u> -> Reply ``] [`numberLiteralE` is an uncurried version of `numberLiteral` that can be used to implement number parsers without having to construct a `numberLiteral` closure.] [`` val @pfloat@: Parser ``] [ Parses a floating point number in the decimal format (in regular expression notation) ``{regex} [0-9]+(\.[0-9]*)?([eE][+-]?[0-9]+)? `` or the hexadecimal format ``{regex} 0[xX][0-9a-fA-F]+(\.[0-9a-fA-F]*)?([pP][+-]?[0-9]+)? `` (as supported by IEEE 754r, C99 and Java, where e.g. `{regex}0x1f.cP-5` represents 31.75 * 2[sup -5]). The special values `{regex}NaN` and `{regex}Inf(inity)?` (case-insensitive) are also recognized. All recognized numbers may be prefixed with a plus or minus sign. Fractions without a leading digit, as for example ".5", are *not* supported. The parser fails - without consuming input, if not at least one digit (including the `{regex}0` in `{regex}0x`) can be parsed, - after consuming input, if no digit comes after an exponent marker or no hex digit comes after `{regex}0x`. [note Values that can't be represented as a finite `float` after rounding are parsed as plus or minus infinity. This behaviour changed between FParsec versions 1.0.3 and 1.0.10, following the [url "https://docs.microsoft.com/en-us/dotnet/core/compatibility/2.2-3.0#floating-point-parsing-operations-no-longer-fail-or-throw-an-overflowexception" respective behaviour change of `System.Double.Parse` on .NET Core 3]. ] [note The `pfloat` parser is based on the configurable `numberLiteral` parser. If you'd like to support a different floating-point format, there's a good chance you can implement a parser for that format by some simple changes to a copy of the `pfloat` source.] ] [`` [#pint-parsers] val @pint64@: Parser ``] [ Parses a 64-bit signed integer number in the decimal, hexadecimal (`{regex}0[xX]`), octal (`{regex}0[oO]`) and binary (`{regex}0[bB]`) formats (in regular expression notation): ``{regex} [+-]?([0-9]+ |0[xX][0-9a-fA-F]+ |0[oO][0-7]+ |0[bB][01]+ ) `` The parser fails - without consuming input, if not at least one digit (including the `{regex}0` in the format specifiers `{regex}0x` etc.) can be parsed, - after consuming input, if no digit comes after an exponent marker or no digit comes after a format specifier, - after consuming input, if the value represented by the input string is greater than `System.Int64.MaxValue` or less than `System.Int64.MinValue`. ] [`` val @pint32@: Parser ``] [ `pint32` parses a 32-bit signed integer and behaves like `pint64`, except for the different return type and smaller integer range. ] [`` val @pint16@: Parser ``] [ `pint16` parses a 16-bit signed integer and behaves like `pint64`, except for the different return type and smaller integer range. ] [`` val @pint8@: Parser ``] [ `pint8` parses an 8-bit signed integer and behaves like `pint64`, except for the different return type and smaller integer range.] [`` [#puint-parsers] val @puint64@: Parser ``] [ Parses numbers in the decimal, hexadecimal (`{regex}0[xX]`), octal (`{regex}0[oO]`) and binary (`{regex}0[bB]`) formats (in regular expression notation): ``{regex} [0-9]+ |0[xX][0-9a-fA-F]+ |0[oO][0-7]+ |0[bB][01]+ `` Note that the parser does not accept a leading plus sign. The parser fails - without consuming input, if not at least one digit (including the `{regex}0` in the format specifiers `{regex}0x` etc.) can be parsed, - after consuming input, if no digit comes after an exponent marker or no digit comes after a format specifier, - after consuming input, if the value represented by the input string is greater than `System.UInt64.MaxValue`. ] [`` val @puint32@: Parser ``] [ `puint32` parses a 32-bit unsigned integer and behaves like `puint64`, except for the different return type and smaller integer range. ] [`` val @puint16@: Parser ``] [ `puint16` parses a 16-bit unsigned integer and behaves like `puint64`, except for the different return type and smaller integer range. ] [`` val @puint8@: Parser ``] [ `puint8` parses an 8-bit unsigned integer and behaves like `puint64`, except for the different return type and smaller integer range. ] [`` // Conditional parsing // =================== ``] [`` val @notFollowedByEof@: Parser ``] [ `notFollowedByEof` is an optimized implementation of `notFollowedByL eof "end of input"`. ] [`` val @followedByNewline@: Parser ``] [`followedByNewline` is an optimized implementation of `followedByL newline "newline"`.] [`` val @notFollowedByNewline@: Parser ``] [`notFollowedByNewline` is an optimized implementation of `notFollowedByL newline "newline"`.] [`` [#followedByString-parsers] val @followedByString@: string -> Parser ``] [`followedByString str` is an optimized implementation of `followedByL (pstring str) ("'" + str + "'")`.] [`` val @followedByStringCI@: string -> Parser ``] [`followedByStringCI str` is an optimized implementation of `followedByL (pstringCI str) ("'" + str + "'")`.] [`` val @notFollowedByString@: string -> Parser ``] [`notFollowedByString str` is an optimized implementation of `notFollowedByL (pstring str) ("'" + str + "'")`.] [`` val @notFollowedByStringCI@: string -> Parser ``] [`notFollowedByStringCI str` is an optimized implementation of `notFollowedByL (pstringCI str) ("'" + str + "'")`.] [`` [#charSatisfies-parsers] val @nextCharSatisfies@: (char -> bool) -> Parser ``] [ `nextCharSatisfies f` is an optimized implementation of `followedBy (satisfy f)`. [note If this parser fails, it returns no descriptive error message; hence it should only be used together with parsers that take care of a potential error. ] ] [`` val @nextCharSatisfiesNot@: (char -> bool) -> Parser ``] [ `nextCharSatisfiesNot f` is an optimized implementation of `notFollowedBy (satisfy f)`. [note If this parser fails, it returns no descriptive error message; hence it should only be used together with parsers that take care of a potential error. ] ] [`` val @next2CharsSatisfy@: (char -> char -> bool) -> Parser ``] [ `next2CharsSatisfy f` succeeds if the predicate function `f` returns `true` when applied to the next 2 chars in the input stream, otherwise it fails. If there aren't 2 chars remaining in the input stream, this parser fails (as opposed to `next2CharsSatisfyNot`). This parser never changes the parser state. Any newline (`"\n"`, `"\r\n"` or `"\r"`) in the input is interpreted as a single char `'\n'`. [note If this parser fails, it returns no descriptive error message; hence it should only be used together with parsers that take care of a potential error. ] ] [`` val @next2CharsSatisfyNot@: (char -> char -> bool) -> Parser ``] [ `next2CharsSatisfy f` succeeds if the predicate function `f` returns `false` when applied to the next 2 chars in the input stream, otherwise it fails. If there aren't 2 chars remaining in the input stream, this parser succeeds (as opposed to `next2CharsSatisfy`). This parser never changes the parser state. Any newline (`"\n"`, `"\r\n"` or `"\r"`) in the input is interpreted as a single char `'\n'`. [note If this parser fails, it returns no descriptive error message; hence it should only be used together with parsers that take care of a potential error. ] ] [`` val @previousCharSatisfies@: (char -> bool) -> Parser ``] [ `previousCharSatisfies f` succeeds if the predicate function `f` returns `true` when applied to the previous char in the stream, otherwise it fails. If there is no previous char (because the input stream is at the beginning), this parser fails (as opposed to `previousCharSatisfiesNot`). This parser never changes the parser state. Any newline (`"\n"`, `"\r\n"` or `"\r"`) in the input is interpreted as a single char `'\n'`. [note If this parser fails, it returns no descriptive error message; hence it should only be used together with parsers that take care of a potential error. ] ] [`` val @previousCharSatisfiesNot@: (char -> bool) -> Parser ``] [ `previousCharSatisfiesNot f` succeeds if the predicate function `f` returns `false` when applied to the previous char in the stream, otherwise it fails. If there is no previous char (because the stream is at the beginning),If this parser fails, it returns no descriptive error message; hence it should only be used this parser succeeds (as opposed to `previousCharSatisfies`). This parser never changes the parser state. Any newline (`"\n"`, `"\r\n"` or `"\r"`) in the input is interpreted as a single char `'\n'`. [note If this parser fails, it returns no descriptive error message; hence it should only be used together with parsers that take care of a potential error. ] ] [`` // Helper functions // ================ ``] [`` [] val #EOS#: char = CharStream.Iterator.EndOfStreamChar ``] [`` val @foldCase@: string -> string ``] [ Forwards all calls to `FParsec.Text.FoldCase`. ] [`` val @normalizeNewlines@: string -> string ``] [ Forwards all calls to `FParsec.Text.NormalizeNewlines`. ] [`` val @floatToHexString@: float -> string ``] [ Returns a hexadecimal string representation of the `float` argument. The hexadecimal format is the one supported by IEEE 754r, C99 and Java. This function produces the same output as the [url "http://java.sun.com/javase/6/docs/api/java/lang/Double.html#toHexString(double)" `Double.toHexString`] method in Java. ] [`` val @floatOfHexString@: string -> float ``] [ Returns the float value represented by the given string in hexadecimal format. The supported input format is (in regular expression notation): ``{regex} [+-]?((0[xX])?([0-9a-fA-F]+(\.[0-9a-fA-F]*)?|\.[0-9a-fA-F]+)([pP][+-]?[0-9]+)? |[iI][nN][fF]([iI][nN][iI][tT][yY])? |[nN][aA][nN] ) `` Note that no leading or trailing whitespace is allowed, neither are trailing format specifiers such as `{regex}f` or `{regex}d`. For example, a valid input string is `{regex}0x1f.cP-5`, which represents the value 31.75 * 2[sup -5]. The numerical value represented by the input string is conceptually converted to an "infinitely precise" binary value that is then rounded to type `float` by the usual round-to-nearest (and ties-to-even) rule of IEEE 754 floating-point arithmetic. The special values `{regex}NaN` and `{regex}Inf(inity)?` (case insensitive) are also recognized. Signs of zero and Infinity values are preserved. A `System.FormatException` is raised if the string representation is invalid. A `System.OverflowException` is raised, if the value represented by the input string (after rounding) is greater than `System.Double.MaxValue` or less than `System.Double.MinValue`. ] [`` val @float32ToHexString@: float32 -> string ``] [ Returns a hexadecimal string representation of the `float32` argument. The hexadecimal format is the one supported by IEEE 754r, C99 and Java. This function produces the same output as the `[url "http://java.sun.com/javase/6/docs/api/java/lang/Float.html#toHexString(float)" Float.toHexString] ` method in Java. ] [`` val @float32OfHexString@: string -> float32 ``] [ Returns the `float32` value represented by the given string in hexadecimal format. The supported input format is (in regular expression notation): ``{regex} [+-]?((0[xX])?([0-9a-fA-F]+(\.[0-9a-fA-F]*)?|\.[0-9a-fA-F]+)([pP][+-]?[0-9]+)? |[iI][nN][fF]([iI][nN][iI][tT][yY])? |[nN][aA][nN] ) `` Note that no leading or trailing whitespace is allowed, neither are trailing format specifiers such as `{regex}f` or `{regex}d`. For example, a valid input string is `{regex}0x1f.cP-5`, which represents the value 31.75 * 2[sup -5]. The numerical value represented by the input string is conceptually converted to an "infinitely precise" binary value that is then rounded to type `float32` by the usual round-to-nearest (and ties-to-even) rule of IEEE 754 floating-point arithmetic. The special values `NaN` and `Inf(inity)?` (case insensitive) are also recognized. Signs of zero and Infinity values are preserved. Note that in general `float32OfHexString(str)` is *not* equivalent to `float32 (floatOfHexString(str))`, because the latter version rounds twice. A `System.FormatException` is raised if the string representation is invalid. A `System.OverflowException` is raised, if the value represented by the input string (after rounding) is greater than `System.Float.MaxValue` or less than `System.Float.MinValue`. ] [/interface-members] [/section] [/interface-reference] [/section] ================================================ FILE: Doc/src/reference-charstream.txt ================================================  [section#CharStream FParsec.CharStream] [toc] [auto-link{hide-outer-auto-links = ["UserState"], links = ["UserState" : CharStream_1.members.UserState]}] [section#CharStream CharStream] Provides read-access to a sequence of UTF-16 chars. [interface-reference] [section Interface] [$$interface] [/section] [section Remarks] The `CharStream` class provides a unified interface for efficiently reading UTF-16 chars from a binary stream or an in-memory char buffer (e.g. a string). It is optimized for the use in backtracking parser applications and supports arbitrary *char-based* seeking, even for streams larger than the addressable memory (on 32-bit platforms). [** The `CharStream` class is the base class of `[^CharStream_1 CharStream<'TUserState>\ ]`], which adds a user-definable state component and some convenience methods for working with the state of a `CharStream` instance. [#block-wise A `CharStream` constructed from a `[url "https://msdn.microsoft.com/en-us/library/system.io.stream.aspx" System.IO.Stream]` or a file path reads the stream block-wise and only holds the most recently accessed block in memory.] The blocks overlap in order to provide efficient access on the boundary between blocks. If the char content is already available as a string or a char array, a `CharStream` can be directly constructed from the char buffer (without needing to copy the buffer). The overhead of accessing an in-memory char buffer through a `CharStream` is minimal. [dl [Position information] [ The position of the next char in the stream is described by the following 4 properties: - `Index`, the index of the UTF-16 char in the stream, - `Line`, the line number for the next char, - `LineBegin`, the index of the first char of the line that also contains the next char, - `Name`, a description or identifier for the stream. The `LineBegin` can be combined with the `Index` to calculate a `Column` number. Among these properties the char index is the most important one, as the `CharStream` uses it to uniquely identify a UTF-16 char in the stream. The other 3 properties further describe the text location of the char identified by the index, but they are not necessary for the core functionality of the `CharStream` class. The `CharStream` class keeps track of this additional position information to provide a more convenient interface to higher-level library functions, in particular to assist debugging and error reporting purposes. ] [Newlines] [ For performance reasons the most basic stream operations do *not* automatically recognize [url "https://en.wikipedia.org/wiki/Newline" newlines] (end-of-line markers) in the stream content. If you skip any newline with these methods, you have to manually register the newline afterwards with one of the [^RegisterNewline-methods `RegisterNewline` methods] (otherwise the line and column count becomes incorrect). In order to provide a convenient interface for parser routines, the `CharStream` class also provides [^methods-that-register-newlines some more advanced methods] that automatically register any skipped standard newline (`"\n"`, `"\r\n"` and `"\r"`). Additionally, it provides two methods that automatically register any Unicode newline (`SkipUnicodeWhitespace` and `SkipUnicodeNewline`). It should be obvious from the method names which methods automatically register newlines and which don't. ] [#Case-insensitive matching#] [ The `MatchCaseFolded` and `SkipCaseFolded` members match the content of the stream "case-insensitively" with a reference string. In this instance "case-insensitive" means that before the chars are matched with the reference string they are [url "http://unicode.org/reports/tr21/tr21-5.html#Caseless_Matching" mapped to a canonical form where case differences are erased]. For performance reasons `MatchCaseFolded` only applies the (non-Turkic) 1-to-1 [url "http://www.unicode.org/Public/8.0.0/ucd/CaseFolding.txt" case folding mappings] (v. 8.0.0) for Unicode code points in the Basic Multilingual Plane, i.e. code points below 0x10000. These mappings are sufficient for many case-insensitive parser grammars encountered in practice, but they are not appropriate for matching arbitrary natural language content. Please also note that the `CharStream` class performs no Unicode [url "http://unicode.org/reports/tr15/" normalization]. ] [ #Non-sequential access#] [ [small [/ This note does not apply to the [^low-trust Low-Trust version] of FParsec.]][br] If you construct a `CharStream` from a `System.IO.Stream` or a file path and you backtrack over a distance long enough to require the `CharStream` to reread a previous block, then the underlying **byte stream needs to support seeking**, otherwise a `NotSupportedException` is thrown. Furthermore, the [** [^Decoder Decoder] for the input [^Encoding Encoding] must be serializable] if you backtrack to a block other than the first in the stream. Note that *file streams created for regular disk files are always seekable and all the .NET standard decoders are serializable*. In order to support non-seekable streams for applications which don't require extensive backtracking, no exception will be thrown before an operation actually requires backtracking and the necessary capabilities of the stream or decoder are not available. ] [Decoder errors] [ A `CharStream` constructed from a binary input stream decodes the input data with the help of a `Decoder` instance obtained via the `Encodings`'s `GetDecoder` method. Depending on the configuration of the encoding the decoder might throw an exception if it encounters invalid byte sequences, usually a `System.Text.DecoderFallbackException` or a `System.IO.ArgumentException`. [fn The detection of invalid byte sequences by the .NET decoders is not entirely reliable. For example, `System.Text.UnicodeEncoding` (UTF-16) has an alignment related bug in .NET versions prior to 4.0 that sometimes leads to invalid surrogate pairs not being detected. The implementations of more complicated encodings, like GB18030, ISO-2022 and ISCII, also have [url "https://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=432648" several issues] with regard to the detection of invalid input data.] ] [Disposable interface] [ [small [/ This note does not apply to the [^low-trust Low-Trust version] of FParsec.]][br] A `CharStream` holds managed and unmanaged resources that need to be explicitly released. Hence, it is very important that `CharStream` objects are promptly disposed after use. Where possible `CharStream` objects should only be used within a "using" block (C#), a "use" expression( F#) or similar constructs in other languages. ] [Thread safety] [ `CharStream` members are not thread-safe. ] [[#low-trust Low-Trust version]] [ If you compile FParsec with the `LOW_TRUST` conditional compiler symbol, the `CharStream` class differs from the normal version as follows: - No [url "https://msdn.microsoft.com/en-us/library/t2yzs44b.aspx" unverifiable code] involving pointers is used. *This allows FParsec to be executed in an environment with reduced trust*, such as medium trust ASP.NET applications or Silverlight applications. - A `CharStream` that is constructed from a `System.IO.Stream` or a file path reads the complete file into a single string during construction. *This severely limits the maximum practical stream size.* - Although the `CharStream` class still supports the `IDisposable` interface, disposing the `CharStream` instances is no longer necessary, since no resources are hold that need to be explicitly released. See also [^download-and-installation.low-trust-version]. ] ] [/section] [section#exceptions I/O exceptions] If you construct a `CharStream` from a `System.IO.Stream` or a file path, the constructor and any `CharStream` operation that requires reading chars from the underlying byte stream may throw one of the following exceptions. In the [^low-trust Low-Trust version], the constructor decodes the complete byte stream and hence only the constructor may throw one of these exceptions. [note Doing actual work in a constructor and potentially throwing exceptions seems to be a somewhat controversial design. We think it's the right choice for the `CharStream` class, because this way you can a have a reasonable expectation that the `CharStream` actually works after you've successfully constructed it.] In general it is *not* safe to continue to use a `CharStream` instance after one of these exceptions was thrown, though calling `Dispose()` is always safe. [dl [`[no-auto-link NotSupportedException]`] [Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the `Encoding`'s `Decoder` is not serializable. See also the remarks above on @non-sequential access@.] [`[no-auto-link IOException]`] [An I/O occurred while reading data from the underlying byte stream.] [`[no-auto-link ArgumentException]`] [The underlying byte stream contains invalid bytes and the `Encoding` was constructed with the `throwOnInvalidBytes` option.] [`[no-auto-link DecoderFallbackException]`] [The underlying byte stream contains invalid bytes for which the decoder fallback threw this exception. The byte index of the invalid bytes in the stream is stored as a boxed `System.Int64` in the `"Stream.Position"` entry of the `[url "https://msdn.microsoft.com/en-us/library/system.exception.data.aspx" Data]` member of the exception instance. The precision of the index depends on the precision of the `DecoderFallbackException`'s `[url "http://msdn.microsoft.com/en-us/library/system.text.decoderfallbackexception.index.aspx" Index]` member. If the underlying `System.IO.Stream` is not seekable, the byte index only takes into account the bytes read by the `CharStream`, but not any bytes read before the `CharStream` was constructed. ] ] [/section] [section Members] [interface-members] [`` // FParsecCS.dll namespace FParsec type [+CharStream] = interface System.IDisposable ``] [`` @new@: chars: string * index: int * length: int -> CharStream ``] [#new_string Is equivalent to `[^ new_string_offset new CharStream](chars, index, length, 0L)`. ] [`` @new@: chars: string * index: int * length: int * streamBeginIndex: int64 -> CharStream ``] [#new_string_offset Constructs a `CharStream` from the chars in the string argument between the indices `index` (inclusive) and `index + length` (exclusive). By directly referencing the chars in the string this constructor avoids any copy of the string content. The first char in the stream is assigned the index `streamBeginIndex`. A positive `streamBeginIndex` allows you for example to create a substream of another `CharStream`, i.e. a `CharStream` instance that only contains a sub-segment of another char stream but is accessible through the same char indices. `chars` must not be null. An `ArgumentOutOfRangeException` is thrown if the arguments do not satisfy the following conditions: - `index` ≥ 0, `length` ≥ 0, `index` + `length` ≤ `chars.Length` and - 0 ≤ `streamBeginIndex` < 2[sup 60]. [important [small [/ This note does not apply to the [^low-trust Low-Trust version] of FParsec.]][br] The given string is "[url "https://msdn.microsoft.com/en-us/library/83y4ak54.aspx" pinned]" until the `CharStream` is disposed. Pinning the string prevents the GC from moving it around in memory during garbage collection. On .NET (at least in versions up to and including 4.0) the pinning has no effect if the string is large enough to be allocated on the Large Object Heap, i.e. has a length of about 42500 chars or more. However, pinning smaller strings does constrain the normal operations of the GC. Thus, **to minimize the negative impact on the GC, you should dispose `CharStream` instances constructed from small strings as soon as you're done parsing it**. If you keep a large number of `CharStream` instances constructed from small strings around for an extended period of time, you risk fragmenting the heap. ] ] [`` @new@: chars: char[] * index: int * length: int -> CharStream ``] [#new_char-array [small [/ This constructor is not available in the [^low-trust Low-Trust version] of FParsec.]] Is equivalent to `[^ new_char-array_offset new CharStream](chars, index, length, 0L)`. ] [`` @new@: chars: char[] * index: int * length: int * streamBeginIndex: int64 -> CharStream ``] [#new_char-array_offset [small [/ This constructor is not available in the [^low-trust Low-Trust version] of FParsec.]] Constructs a `CharStream` from the chars in the char array argument between the indices `index` (inclusive) and `index + length` (exclusive). By directly referencing the chars in the char array this constructor avoids any copy of the char array content. The first char in the stream is assigned the index `streamBeginIndex`. A positive `streamBeginIndex` allows you for example to create a substream of another `CharStream`, i.e. a `CharStream` instance that only contains a sub-segment of another char stream but is accessible through the same char indices. `chars` must not be null. An `ArgumentOutOfRangeException` is thrown if the arguments do not satisfy the following conditions: - `index` ≥ 0, `length` ≥ 0, `index` + `length` ≤ `chars.Length` and - 0 ≤ `streamBeginIndex` < 2[sup 60]. [note A `CharStream` constructed from a char array does not support .NET regex matching via the `[^Match_Regex Match]` method.] [important The given char array is "[url "https://msdn.microsoft.com/en-us/library/83y4ak54.aspx" pinned]" until the `CharStream` is disposed. Pinning the char array prevents the GC from moving it around in memory during garbage collection. On .NET (at least in versions up to and including 4.0) the pinning has no effect if the char array is large enough to be allocated on the Large Object Heap, i.e. has a length of about 42500 chars or more. However, pinning smaller char arrays does constrain the normal operations of the GC. Thus, **to minimize the negative impact on the GC, you should dispose `CharStream` instances constructed from small char arrays as soon as you're done parsing it**. If you keep a large number of `CharStream` instances constructed from small char arrays around for an extended period of time, you risk fragmenting the heap. ] ] [`` @new@: chars: NativePtr * length: int -> CharStream ``] [#new_char-pointer [small [/ This constructor is not available in the [^low-trust Low-Trust version] of FParsec.]][br] Is equivalent to `[^ new_char-pointer_offset new CharStream](chars, length, 0L)`. ] [`` @new@: chars: NativePtr * length: int * streamBeginIndex: int64 -> CharStream ``] [#new_char-pointer_offset [small [/ This constructor is not available in the [^low-trust Low-Trust version] of FParsec.]] Constructs a `CharStream` from the `length` chars at the pointer address. By directly referencing the chars at the pointer address this constructor avoids any copy of the char buffer. The first char in the stream is assigned the index `streamBeginIndex`. A positive `streamBeginIndex` allows you for example to create a substream of another `CharStream`, i.e. a `CharStream` instance that only contains a sub-segment of another char stream but is accessible through the same char indices. `chars` must not be null. An `ArgumentOutOfRangeException` is thrown if the arguments do not satisfy the following conditions: - `length` ≥ 0, `chars + length` must not overflow and - 0 ≤ `streamBeginIndex` < 2[sup 60]. [note A `CharStream` constructed from a pointer does not support .NET regex matching via the `[^Match_Regex Match]` method.] ] [`` @new@: path: string * encoding: System.Text.Encoding -> CharStream ``] [#new_file-path Is equivalent to `[^ new_file-path_2 new CharStream](path, encoding, true)`. ] [`` @new@: path: string * encoding: System.Text.Encoding * detectEncodingFromByteOrderMarks: bool -> CharStream ``] [#new_file-path_2 Is equivalent to `` [^ new_file-path_3 new CharStream]( path, encoding, detectEncodingFromByteOrderMarks, blockSize = DefaultBlockSize (* = 3*2^16 ≈ 200k *), blockOverlap = DefaultBlockSize/3, minRegexSpace = ((DefaultBlockSize/3)*2)/3, byteBufferLength = DefaultByteBufferLength ) `` ] [`` @new@: path: string * encoding: System.Text.Encoding * detectEncodingFromByteOrderMarks: bool * blockSize: int * blockOverlap: int * minRegexSpace: int * byteBufferLength: int -> CharStream ``] [#new_file-path_3 Constructs a `CharStream` from a `FileStream` as if by calling `` [^ new_stream_4 new CharStream]( new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, FileOptions.SequentialScan), leaveOpen = false, encoding = encoding, detectEncoding = true, blockSize = DefaultBlockSize (* = 3*2^16 ≈ 200k *), blockOverlap = DefaultBlockSize/3, minRegexSpace = ((DefaultBlockSize/3)*2)/3, byteBufferLength = DefaultByteBufferLength ) `` If an exception occurs after the `FileStream` is constructed but before the `CharStream` constructor is finished, the `FileStream` is disposed. [note The `FileStream` constructor might throw an exception, too.] ] [`` @new@: stream: System.IO.Stream * encoding: System.Text.Encoding -> CharStream ``] [#new_stream Is equivalent to `[^ new_stream_3 new CharStream](stream, false, encoding, true)`. ] [`` @new@: stream: System.IO.Stream * leaveOpen: bool * encoding: System.Text.Encoding -> CharStream ``] [#new_stream_2 Is equivalent to `[^ new_stream_3 new CharStream](stream, leaveOpen, encoding, true)`. ] [`` @new@: stream: System.IO.Stream * leaveOpen: bool * encoding: System.Text.Encoding * detectEncodingFromByteOrderMarks: bool -> CharStream ``] [#new_stream_3 Is equivalent to `` [^ new_stream_4 new CharStream]( stream, leaveOpen, encoding, detectEncodingFromByteOrderMarks, blockSize = DefaultBlockSize (* = 3*2^16 ≈ 200k *), blockOverlap = DefaultBlockSize/3, minRegexSpace = ((DefaultBlockSize/3)*2)/3, byteBufferLength = DefaultByteBufferLength ) `` ] [`` @new@: stream: System.IO.Stream * leaveOpen: bool * encoding: System.Text.Encoding * detectEncodingFromByteOrderMarks: bool * blockSize: int * blockOverlap: int * minRegexSpace: int * byteBufferLength: int -> CharStream ``] [#new_stream_4 Constructs a `CharStream` from a `System.IO.Stream`. The normal version of the `CharStream` class supports stream sizes up to approximately (2[sup 31]/p)×(`blockSize` - `blockOverlap`) chars, where p is 4 on a 32-bit CLR and 8 on a 64-bit CLR.[br]The [^low-trust Low-Trust version] only supports streams small enough that the complete content can be read into a single string. [note This constructor reads the first block of chars from the input stream and hence can throw any of the I/O related exceptions detailed in the @exceptions@ section above.] Arguments: [dl [`stream`] [The byte stream providing the input. If `stream.[url "https://msdn.microsoft.com/en-us/library/system.io.stream.canread.aspx" CanRead]` returns `false`, an `ArgumentException` is thrown.] [`leaveOpen`] [Indicates whether the `stream` should be left open when the `CharStream` has finished reading it.] [`encoding`] [The default `Encoding` used for decoding the byte stream into chars. If the preamble returned by `encoding.[url "https://msdn.microsoft.com/en-us/library/system.text.encoding.getpreamble.aspx" GetPreamble]()` is present at the beginning of the stream, the `CharStream` will skip over it. ] [`detectEncodingFromByteOrderMarks`] [Indicates whether the constructor should detect the encoding from a unicode [url "https://en.wikipedia.org/wiki/Byte-order_mark" byte-order mark] at the beginning of the stream. An encoding detected from a byte-order mark overrides the default `encoding`. The standard byte-order marks for the following encodings are supported: UTF-8, UTF-16 LE/BE and UTF-32 LE/BE.] [`blockSize`] [The number of chars per block. The value is rounded up to the first positive multiple of 1536. The default is 3×2[sup 16] ≈ 200k.] [`[#blockOverlapParameter]blockOverlap`] [The number of chars at the end of a block that are preserved when reading the next block into into its internal char buffer. If this value is less than `encoding.GetMaxCharCount(1)` or not less than `blockSize/2`, the default value is used instead. The default is `blockSize/3`.] [byteBufferLength] [The size of the byte buffer used for decoding purposes. The default is 2[sup 12] = 4KB.] ] ] [`` member @Dispose@: unit -> unit ``] [Releases all resources used by the `CharStream`. If the `CharStream` was constructed from a `System.IO.Stream` or a file path and the constructor was not called with `leaveOpen = true`, the byte stream is closed. ] [`` member @BlockOverlap@: int ``] [ The number of chars at the end of a block that are preserved when the `CharStream` reads the next block into its internal char buffer. This value is only relevant for optimization purposes and as the maximum value for `MinRegexSpace`. This value can only be set at construction time with the respective [^blockOverlapParameter constructor parameter]. If the `CharStream` is constructed from a string, char array or char pointer or only contains 1 block, then this value is 0. In the [^low-trust Low-Trust version] this value is always 0. ] [`` member @IndexOfFirstChar@: int64 ``] [ The index of the first char in the stream. This value is determined by the `streamIndexOffset` argument of some of the `CharStream` constructors. By default this value is 0. ] [`` member @IndexOfLastCharPlus1@: int64 ``] [ The index of the last char of the stream plus 1, or `Int64.MaxValue` if the end of the stream has not yet been detected. ] [`` member @IsBeginOfStream@: bool ``] [ Indicates whether the next char in the stream is the first char, i.e. whether `Index` equals `IndexOfFirstChar`. If the stream is empty, this value is always `true`. ] [`` member @IsEndOfStream@: bool ``] [ Indicates whether there is no char remaining in the stream, i.e. whether `Index` equals `IndexOfLastCharPlus1`. If the stream is empty, this value is always `true`. ] [`` member @Index@: int64 ``] [ The stream index of the next char. ] [`` member @IndexToken@: CharStreamIndexToken ``] [ A `CharStreamIndexToken` value representing the current `Index` value. ] [`` member @Line@: int64 ``] [ The line number for the next char. (The line count starts with 1.) ] [`` member @LineBegin@: int64 ``] [ The stream index of the first char of the line that also contains the next char. ] [`` member @Column@: int64 ``] [The UTF-16 column number of the next char, i.e. `Index` - `LineBegin` + 1. ] [`` member @Name@: string with get, set ``] [ This string is used in error messages to describe the input stream. If the `CharStream` is constructed from a file path, the constructor initializes the `Name` value with the file path value. Otherwise, `Name` is initialized to `null`. If the stream content is the concatenated content of multiple input files, you can improve error messages and help debugging by setting the name and resetting the line and column count at the transitions between the different content pieces. Setting the `Name` value increments the `StateTag` by 1, independent of whether the new value is different from the previous one. ] [`` member @Position@: [^ reference.Position Position] ``] [ Returns `new [^ reference.Position Position](Name, Index, Line, Column)`. ] [`` val mutable @StateTag@: uint64 ``] [ The `StateTag`'s purpose is to provide an efficient way to determine whether the publically visible state of the `CharStream` has changed after a series of method calls. For the purpose of this property, the state is defined as the aggregate of the `Index`, `Line`, `LineBegin` and `Name` values. The `UserState` value of `CharStream<'UserState>` instances is also part of the `CharStream` state. If a method or property setter changes one or more of these state values it increments the `StateTag`'s by 1. Thus, to determine whether a series of method calls has changed the `CharStream`, it is often enough to compare the `StateTag` values from before and after the method calls. The `StateTag` property is primarily meant for use in the implementation of parser combinators. If you directly call `CharStream` methods, you normally don't need the `StateTag` to determine whether the state has changed, because that is usually obvious from either the method's return value or the context in which it was called. Please see [^ users-guide.applying-parsers-in-sequence.the-statetag] for more details on the design rationale behind the `StateTag`. ] [`` member @Seek@: index: int64 -> unit ``] [ Seeks the `CharStream` to the char with the specified index in the stream. If you pass an index larger than the index of the last char in the stream, this method seeks the stream to the end of the stream, i.e. to one char past the last char in the stream. The index is zero-based, except if the `CharStream` was constructed with a positive `streamIndexOffset` argument, in which case the index of the first char equals the value of the `streamIndexOffset` argument (and the `IndexOfFirstChar` value). When this method changes the stream position, it increments the `StateTag` by 1. When it does not change the position, it may or may not increment the `StateTag` by 1. An `ArgumentOutOfRangeException` is thrown if the index is less than the `IndexOfFirstChar`. This method may also throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @Seek@: indexToken: CharStreamIndexToken -> unit ``] [#Seek_CharStreamIndexToken This method is an optimized implementation of `@Seek@(GetIndex(indexToken))`. ] [`` static val @EndOfStreamChar@: char ``] [ The char returned by `Peek` and `Read` at the end of the stream. The value is `'\uFFFF'`. ] [`` member @Peek@: unit -> char ``] [Returns the next char without changing the state of the `CharStream`. At the end of the `CharStream` the `EndOfStreamChar` (`'\uFFFF'`) is returned.] [`` member @Peek2@: unit -> TwoChars ``] [ `Peek2()` is an optimized implementation of `new TwoChars(Peek(), [^Peek_int Peek](1))`. ] [`` member @Peek@: utf16Offset: int -> char ``] [#Peek_int Returns the char at the stream index `Index + utf16Offset`, without changing the state of the `CharStream`. If `Index + utf16Offset` is smaller than the index of the first char in the stream or larger than the index of the last char in the stream, the `EndOfStreamChar` (`'\uFFFF'`) is returned. This method may throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @Peek@: utf16Offset: uint32 -> char ``] [#Peek_uint32 This method is an optimized implementation of `[^Peek_int Peek](int)` for `uint32` arguments. ] [`` member @PeekString@: length: int -> string ``] [#PeekString Returns a string with the next `length` stream chars, without changing the state of the `CharStream`. If less than `length` chars are remaining in the stream, only the remaining chars are returned. [note [small [/ This note does not apply to the [^low-trust Low-Trust version] of FParsec.]][br] If `length` is greater than the number of remaining chars in the stream, a temporary string with `length` chars may be allocated. For very large `length` values this might lead to an `OutOfMemoryException` even though a string with only the remaining chars in the stream would comfortably fit into memory. Please also note that the maximum length of a string on .NET is less than 2[sup 30]. Allocating a string larger than the maximum length will always yield an `OutOfMemoryException`, even on 64-bit systems with enough physical memory. ] If `length` is negative, an `ArgumentOutOfRangeException` is thrown. This method may also throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @PeekString@: buffer: char[] * bufferIndex: int * length: int -> int ``] [#PeekString_char-array Copies the next `length` stream chars into `buffer`, without changing the state of the `CharStream`. Returns the number of chars copied. The chars are written into `buffer` beginning at the index `bufferIndex`. If less than `length` chars are remaining in the stream, only the remaining chars are copied. An `ArgumentOutOfRangeException` is thrown if the arguments do not satisfy the following conditions: `bufferIndex` ≥ 0, `length` ≥ 0 and `bufferIndex` + `length` ≤ `buffer.Length`. This method may also throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @PeekString@: buffer: NativePtr * length: int -> int ``] [#PeekString_char-pointer [small [/ This method is not available in the [^low-trust Low-Trust version] of FParsec.]] Copies the next `length` stream chars into the buffer at the specified pointer address, without changing the state of the `CharStream`. Returns the number of chars copied. If less than `length` chars are remaining in the stream, only the remaining chars are copied. If `length` is negative, an `ArgumentOutOfRangeException` is thrown. This method may also throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @Match@: char -> bool ``] [ Returns `true` if the next char in the stream matches the specified char. At the end of the stream `Match` always returns `false`. This method does not change the state of the `CharStream`. This method may throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @Match@: chars: string -> bool ``] [#Match_string Returns `true` if the passed string `chars` matches the next `chars.Length` stream chars. If not all the chars match or if there are not enough chars remaining in the stream, `false` is returned. If `chars` is empty, `true` is returned. `chars` must not be `null`. This method does not change the state of the `CharStream`. This method may throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @Match@: chars: char[] * charsIndex: int * length: int -> bool ``] [#Match_char-array Returns `true` if the next `length` stream chars match the chars in the array `chars` at the indices `charIndex` to `charsIndex + length - 1`. If not all the chars match or if there are not enough chars remaining in the stream, `false` is returned. If `length` is 0, `true` is returned. `chars` must not be `null`. This method does not change the state of the `CharStream`. An `ArgumentOutOfRangeException` is thrown if the arguments do not satisfy the following conditions: `charsIndex` ≥ 0, `length` ≥ 0 and `charsIndex` + `length` ≤ `chars.Length`. This method may also throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @Match@: chars: NativePtr * length: int -> bool ``] [#Match_char-pointer [small [/ This method is not available in the [^low-trust Low-Trust version] of FParsec.]] Returns `true` if the next `length` stream chars match the chars at the specified pointer address. If not all the chars match or if there are not enough chars remaining in the stream, `false` is returned. If `length` is 0, `true` is returned. This method does not change the state of the `CharStream`. If `length` is negative, an `ArgumentOutOfRangeException` is thrown. This method may also throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @MatchCaseFolded@: caseFoldedChar: char -> bool ``] [#MatchCaseFolded_char Behaves like `[^Match Match](caseFoldedChar)`, except that the next char in the stream is case-folded before it is compared with `caseFoldedChar`. [note While the char in the stream is case‐folded before it is matched, the char `caseFoldedChar` is assumed to already be case-folded (e.g. with the help of `FParsec.Text.FoldCase`). Please also see the above remarks on @case-insensitive matching@. ] ] [`` member @MatchCaseFolded@: caseFoldedChars: string -> bool ``] [ Behaves like `[^Match_string Match](caseFoldedChars)`, except that the chars in the stream are case-folded before they are compared with `caseFoldedChars`. [note While the chars in the `CharStream` are case‐folded before they are matched, the chars in the string argument `caseFoldedChars` are assumed to already be case-folded (e.g. with the help of `FParsec.Text.FoldCase`). Please also see the above remarks on @case-insensitive matching@. ] ] [`` member @MatchCaseFolded@: caseFoldedChars: NativePtr * length:int -> bool ``] [#MatchCaseFolded_char-pointer [small [/ This method is not available in the [^low-trust Low-Trust version] of FParsec.]] Behaves like `[^Match_char-pointer Match](caseFoldedChars, length)`, except that the chars in the stream are case-folded before they are compared with the chars at the pointer address `caseFoldedChars`. [note While the chars in the `CharStream` are case‐folded before they are matched, the chars at the pointer address `caseFoldedChars` are assumed to already be case-folded (e.g. with the help of `FParsec.Text.FoldCase`). Please also see the above remarks on @case-insensitive matching@. ] ] [`` member @Match@: System.Text.RegularExpressions.Regex -> System.Text.RegularExpressions.Match ``] [#Match_Regex Applies the given regular expression to the stream chars beginning with the next char. Returns the resulting `Match` object. For performance reasons you should specify the regular expression such that it can only match at the beginning of a string, for example by prepending `"\\A"`. For `CharStream` instances constructed from strings the regular expression is applied to a string containing *all* the remaining chars in the stream. For `CharStream` instances constructed from large binary streams (with more than 1 block) the regular expression is not applied to a string containing all the remaining chars in the stream. Here the `MinRegexSpace` value determines the *minimum* number of chars that are guaranteed to be visible to the regular expression (assuming there are still enough chars remaining in the stream). The exact number of chars visible to the regular expression may be affected even by calls to `CharStream` methods like `[^Peek_int Peek]` or `[^Match_string Match]` that otherwise guarantee to not change the (outwardly visible) state of the `CharStream`. This method may throw any of the [^exceptions I/O related exceptions] detailed above. [important [small [/ This note does not apply to the [^low-trust Low-Trust version] of FParsec.]][br] This method is not supported by `CharStream` instances constructed directly from char arrays or pointers. A `NotSupportedException` is thrown if this method is called on such a `CharStream` instance. ] [important [small [/ This note does not apply to the [^low-trust Low-Trust version] of FParsec.]][br] If the `CharStream` was constructed from a `System.IO.Stream` or a file path, the regular expression is applied to an internal *mutable* buffer. Since the `Match` object may work lazily, i.e. compute return values not before they are needed, you need to *retrieve all the required information from the `Match` object before you continue to access the `CharStream`*, otherwise you might get back invalid match results. Note that all strings returned by the `Match` object are, of course, immutable.] ] [`` member @MinRegexSpace@: int with get, set ``] [ The number of chars that are guaranteed to be visible to a regular expression when it is matched by `[^Match_Regex Match]` (assuming there are enough chars remaining in the stream). The value must be non-negative and not greater than `BlockOverlap`. The default value is 2/3 of `BlockOverlap`. If the `CharStream` is constructed from a string, char array or char pointer or has only 1 block, then this value has no relevance and calling the property setter has no effect. (No [^low-trust Low-Trust version] `CharStream` instance has more than 1 block.) The `MinRegexSpace` value is not recorded in `CharStreamState` instances and setting its value does not affect the `StateTag`. An `ArgumentOutOfRangeException` is thrown if you try to set the property on a multi-block `CharStream` instance to a negative value or a value larger than the `BlockOverlap`. ] [`` [#RegisterNewline-methods] member @RegisterNewline@: unit -> bool ``] [ Registers a newline (an end-of-line character) at the previous stream char, i.e. increments the `Line` value by 1 and sets the `LineBegin` to `Index`. The previous `LineBegin` value must not equal `Index`. (For performance reasons this condition is only checked by an assert check in the debug build). This method also increments the `StateTag` by 1. ] [`` member @RegisterNewlines@: lineOffset: int -> newColumnMinus1: int -> bool ``] [ Increments the `Line` value by `lineOffset` and sets the `LineBegin` value to `Index - newColumnMinus1` (so that the `Column` value becomes `newColumnMinus1` + 1). The `lineOffset` must not be 0, the new `Line` value must be greater than 0 and and the new `LineBegin` value must be different from the previous one. (For performance reasons these conditions are only checked by assert checks in the debug build). This method also increments the `StateTag` by 1. ] [`` member @RegisterNewlines@: lineOffset: int64 -> newColumnMinus1: int64 -> bool ``] [#RegisterNewlines_int64 This method is a variant of `@RegisterNewlines@` for `int64` arguments. ] [`` // The following methods require manual registration of skipped newlines ``] [`` [#Skip-members]member @Skip@: unit -> unit ``] [ Advances the position within the stream by 1 char, except at the end of the stream, where it does nothing. When this method changes the stream position, it increments the `StateTag` by 1; otherwise, it does not change the `StateTag`. This method may throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @Skip@: utf16Offset: int -> unit ``] [#Skip_int Advances the position within the stream by `utf16Offset` chars. The new position within the stream will be `min(Index + utf16Offset, IndexOfLastCharPlus1)`. This means you can't move past the end of the stream, because any position beyond the last char in the stream is interpreted as precisely one char beyond the last char. An `ArgumentOutOfRangeException` is thrown if the new position would lie before the beginning of the `CharStream`, i.e. if the new index would be less than `IndexOfFirstChar`. This method may also throw any of the [^exceptions I/O related exceptions] detailed above. When this method changes the stream position, it increments the `StateTag` by 1. When it does not change the position (because the given offset is 0 or because the stream has already reached the end and the offset is positive), it may or may not increment the `StateTag` by 1. ] [`` member @Skip@: utf16Offset: uint32 -> unit ``] [#Skip_uint32 This method is an optimized implementation of `Skip` for `uint32` offsets. ] [`` member @Skip@: utf16Offset: int64 -> unit ``] [#Skip_int64 This method is a variant of `Skip` for `int64` offsets. ] [`` [#SkipAndPeek-members]member @SkipAndPeek@: unit -> char ``] [ `c <- SkipAndPeek()` is an optimized implementation of `Skip(); c <- Peek()`. ] [`` member @SkipAndPeek@: utf16Offset: int -> char ``] [#SkipAndPeek_int `c <- SkipAndPeek(utf16Offset)` is an optimized implementation of `Skip(utf16Offset); c <- Peek()`, with the following *exception for negative offsets* `n`:[br] If the new position would lie before the beginning of the `CharStream`, i.e. if the new index would be less than `IndexOfFirstChar`, then `SkipAndPeek(n)` does not throw an exception like `stream.Skip(n)` would do. Instead it sets the position of the stream to `IndexOfFirstChar` and returns the `EndOfStreamChar` (`'\uFFFF'`). ] [`` member @SkipAndPeek@: utf16Offset: uint32 -> char ``] [#SkipAndPeek_uint32 `c <- SkipAndPeek(utf16Offset)` is an optimized implementation of `[^Skip_uint32 Skip](utf16Offset); c <- Peek()`. ] [`` member @Skip@: char -> bool ``] [#Skip_char Skips over the next char in the stream if this char matches the passed argument char. Returns `true` if the chars match; otherwise, `false`. At the end of the stream this method always returns `false`. When this method changes the stream position, it increments the `StateTag` by 1; otherwise, it does not change the `StateTag`. This method may throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @Skip@: TwoChars -> bool ``] [#Skip_TwoChars Skips over the next two chars in the stream if these chars match the two chars in the passed `TwoChars` value. Returns `true` if the chars match. If not both chars match or if there are less than 2 chars remaining in the stream, no char is skipped and `false` is returned. When this method changes the stream position, it increments the `StateTag` by 1; otherwise, it does not change the `StateTag`. This method may throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @Skip@: chars: string -> bool ``] [#Skip_string Skips over the next `chars.Length` chars in the stream if these chars match the passed string `chars`. Returns `true` if the chars match. If not all the chars match or if there are not enough chars remaining in the stream, no char is skipped and `false` is returned. If `chars` is empty, `true` is returned. `chars` must not be `null`. When this method changes the stream position, it increments the `StateTag` by 1; otherwise, it does not change the `StateTag`, except if `chars` is empty, in which case it may or may not increment the `StateTag` by 1. This method may throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @Skip@: chars: char[] * charsIndex: int * length: int -> bool ``] [#Skip_char-array Skips over the next `length` chars in the stream if these chars match the chars in the passed array `chars` at the indices `charIndex` to `charsIndex + length - 1`. Returns `true` if the chars match. If not all the chars match or if there are not enough chars remaining in the stream, `false` is returned and the position within the `CharStream` is not changed. If `length` is 0, `true` is returned. `chars` must not be `null`. When this method changes the stream position, it increments the `StateTag` by 1; otherwise, it does not change the `StateTag`, except if `length` is 0, in which case it may or may not increment the `StateTag` by 1. An `ArgumentOutOfRangeException` is thrown if the arguments do not satisfy the following conditions: `charsIndex` ≥ 0, `length` ≥ 0 and `charsIndex` + `length` ≤ `chars.Length`. This method may also throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @Skip@: chars: NativePtr * length: int -> bool ``] [#Skip_char-pointer [small [/ This method is not available in the [^low-trust Low-Trust version] of FParsec.]] Skips over the next `length` chars in the stream if these chars match the chars at the pointer address `chars`. Returns `true` if the chars match. If not all the chars match or if there are not enough chars remaining in the stream, `false` is returned and the position within the `CharStream` is not changed. If `length` is 0, `true` is returned. When this method changes the stream position, it increments the `StateTag` by 1; otherwise, it does not change the `StateTag`, except if `length` is 0, in which case it may or may not increment the `StateTag` by 1. If `length` is negative, an `ArgumentOutOfRangeException` is thrown. This method may also throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @SkipCaseFolded@: caseFoldedChar: char -> bool ``] [#SkipCaseFolded_char Behaves like `[^Skip_char Skip](caseFoldedChar)`, except that the next char in the stream is case-folded before it is compared with `caseFoldedChar`. [note While the char in the stream is case‐folded before it is matched, the char `caseFoldedChar` is assumed to already be case-folded (e.g. with the help of `FParsec.Text.FoldCase`). Please also see the above remarks on @case-insensitive matching@. ] ] [`` member @SkipCaseFolded@: caseFoldedChars: string -> bool ``] [ Behaves like `[^Skip_string Skip](caseFoldedChars)`, except that the chars in the stream are case-folded before they are compared with `caseFoldedChars`. [note While the chars in the `CharStream` are case‐folded before they are matched, the chars in the string argument `caseFoldedChars` are assumed to already be case-folded (e.g. with the help of `FParsec.Text.FoldCase`). Please also see the above remarks on @case-insensitive matching@. ] ] [`` member @SkipCaseFolded@: caseFoldedChars: NativePtr * length:int -> bool ``] [#SkipCaseFolded_char-pointer [small [/ This method is not available in the [^low-trust Low-Trust version] of FParsec.]] Behaves like `[^Skip_char-pointer Skip](caseFoldedChars)`, except that the chars in the stream are case-folded before they are compared with the chars at the pointer address `caseFoldedChars`. [note While the chars in the `CharStream` are case‐folded before they are matched, the chars at the pointer address `caseFoldedChars` are assumed to already be case-folded (e.g. with the help of `FParsec.Text.FoldCase`). Please also see the above remarks on @case-insensitive matching@. ] ] [`` member @Read@: unit -> char ``] [Skips over the next char in the stream. Returns the skipped char. At the end of the stream `Read()` does not change the stream position and returns the `EndOfStreamChar` (`'\uFFFF'`). When this method changes the stream position, it increments the `StateTag` by 1; otherwise, it does not change the `StateTag`. This method may throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @Read@: length: int -> string ``] [#Read_int Skips over the next `length` chars in the stream. Returns the skipped chars as a string. If less than `length` chars are remaining in the stream, only the remaining chars are skipped and returned. When this method changes the stream position, it increments the `StateTag` by 1; otherwise, it does not change the `StateTag`, except if `length` is 0, in which case it may or may not increment the `StateTag` by 1. If `length` is negative, an `ArgumentOutOfRangeException` is thrown. This method may also throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @Read@: buffer: char[] * bufferIndex: int * length: int -> int ``] [#Read_char-array Skips over the next `length` stream chars and copies the skipped chars into `buffer`. Returns the number of copied and skipped chars. The chars are written into `buffer` beginning at the index `bufferIndex`. If less than `length` chars are remaining in the stream, only the remaining chars are copied and skipped. When this method changes the stream position, it increments the `StateTag` by 1; otherwise, it does not change the `StateTag`, except if `length` is 0, in which case it may or may not increment the `StateTag` by 1. An `ArgumentOutOfRangeException` is thrown if the arguments do not satisfy the following conditions: `bufferIndex` ≥ 0, `length` ≥ 0 and `bufferIndex` + `length` ≤ `buffer.Length`. This method may also throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @Read@: buffer: NativePtr * length: int -> int ``] [#Read_char-pointer [small [/ This method is not available in the [^low-trust Low-Trust version] of FParsec.]] Skips over the next `length` stream chars and copies the skipped chars into the buffer at the given pointer address. Returns the number of copied and skipped chars. If less than `length` chars are remaining in the stream, only the remaining chars are copied and skipped. When this method changes the stream position, it increments the `StateTag` by 1; otherwise, it does not change the `StateTag`, except if `length` is 0, in which case it may or may not increment the `StateTag` by 1. If `length` is negative, an `ArgumentOutOfRangeException` is thrown. This method may also throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @ReadFrom@: indexOfFirstChar: CharStreamIndexToken -> string ``] [ Returns a string with the chars between the stream index `indexOfFirstChar` (inclusive) and the current `Index` of the stream (exclusive). This method trows - an `ArgumentOutOfRangeException`, if `Index < indexOfFirstChar`, and - an `ArgumentException`, if the `CharStreamIndexToken` is a zero-initialized instance (i.e. constructed with the default value type constructor). It may also throw any of the [^exceptions I/O related exceptions] detailed above. [note You may only pass `CharStreamToken` values that were retrieved from the `CharStream` instance on which you're calling `ReadFrom`. Passing a `CharStreamToken` value that was created for another `CharStream` instance triggers an assert exception in debug builds and will otherwise lead to undefined behaviour.] ] [`` [#methods-that-register-newlines] // The following methods automatically register skipped newlines ``] [`` member @SkipWhitespace@: unit -> bool ``] [ Skips over any sequence of space (`' '`), tab (`'\t'`) or newline (`'\r'`, `'\n'`) chars. Returns `true` if it skips at least one char, otherwise `false`. This method registers any skipped standard newline (`"\n"`, `"\r\n"` or `"\r"`). When this method skips at least one char, it increments the `StateTag` by 1; otherwise, it does not change the `StateTag`. This method may throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @SkipUnicodeWhitespace@: unit -> bool ``] [ Skips over any sequence of unicode whitespace chars (as identified by `System.Char.IsWhiteSpace`). Returns `true` if it skips at least one char, otherwise `false`. This method registers any skipped unicode newline (`"\n"`, `"\r\n"`, `"\r"`, `"\u0085"`, `"\u000C"`, `"\u2028"` or `"\u2029"`). [note This method recognizes the form feed char `'\f'` (`'\u000C'`) as a Unicode whitespace character, but not as a newline character.] When this method skips at least one char, it increments the `StateTag` by 1; otherwise, it does not change the `StateTag`. This method may throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @SkipNewline@: unit -> bool ``] [ Skips over a standard newline (`"\n"`, `"\r\n"` or `"\r"`). Returns `true` if a newline is skipped, otherwise `false`. When this method skips a newline, it also registers it. When this method skips a newline, it increments the `StateTag` by 1, otherwise it does not change the `StateTag`. This method may throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @SkipUnicodeNewline@: unit -> bool ``] [ Skips over a unicode newline (`"\n"`, `"\r\n"`, `"\r"`, `"\u0085"`, `"\u2028"`, or `"\u2029"`). Returns `true` if a newline is skipped, otherwise `false`. [note This method does not recognize the form feed char `'\f'` (`'\u000C'`) as a newline character.] When this method skips a newline, it also registers it. When this method skips a newline, it increments the `StateTag` by 1, otherwise it does not change the `StateTag`. This method may throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @SkipNewlineThenWhitespace@: powerOf2TabStopDistance: int * allowFormFeed: bool -> int ``] [ Skips over a newline (`"\n"`, `"\r\n"` or `"\r"`) followed by any (possibly empty) sequence of whitespace chars (`' '`, `'\t'`, `'\r'`, `'\n'` and optionally `'\f'`). If this method skips no chars because the next stream char is no newline char, it returns -1. Otherwise it returns the indentation of the first line with non-whitespace characters. The *indentation* is calculated as follows: - Any newline char (`'\r'` or `'\n'`) or form feed char (`'\f'`) resets the *indentation* to 0. - Any space char (`' '`) increments the *indentation* by 1. - Any tab char (`'\t'`) increments the *indentation* by[br] `powerOf2TabStopDistance` - (*indentation* modulo `powerOf2TabStopDistance`). The maximum indentation is 2[sup 31] - 1. If skipping a whitespace char would cause the indentation to overflow, the char is not skipped and the method returns the indentation up to that char. An `ArgumentOutOfRangeException` is thrown if `powerOf2TabStopDistance` is not a positive power of 2. The value of the `allowFormFeed` argument determines whether this method accepts the form feed char `'\f'` as a whitespace char. This method registers all skipped standard newlines (`"\n"`, `"\r\n"` or `"\r"`). When this method changes the stream position, it increments the `StateTag` by 1; otherwise, it does not change the `StateTag`. This method may throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @SkipRestOfLine@: skipNewline: bool -> unit ``] [ Skips over any chars before the next newline (`"\n"`, `"\r\n"` or `"\r"`) or the end of the stream. If `skipNewline` is `true` and a newline is present, the newline is also skipped. When this method changes the stream position, it increments the `StateTag` by 1; otherwise, it does not change the `StateTag`. This method may throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @ReadRestOfLine@: skipNewline: bool -> string ``] [ `ReadRestOfLine(skipNewline)` behaves like `SkipRestOfLine(skipNewline)`, except that it returns a string with the skipped chars (without a newline). ] [`` member @ReadCharOrNewline@: unit -> char ``] [ Skips over any single char or standard newline (`"\n"`, `"\r\n"` or `"\r"`). This method returns `'\n'` when it skips a newline. Otherwise, it returns the skipped char, except at the end of the stream, where it returns the `EndOfStreamChar` (`'\uffff'`). When this method skips a newline, it also registers it. When this method skips a char or newline, it increments the `StateTag` by 1; otherwise, it does not change the `StateTag`. This method may throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @SkipCharsOrNewlines@: maxCount: int -> int ``] [ Skips over up to `maxCount` chars. Returns the number of skipped chars. The number of actually skipped chars is less than `maxCount` if the end of the stream is reached after less than `maxCount` chars. This method counts standard newlines (`"\n"`, `"\r\n"` or `"\r"`) as single chars. When this method skips a newline, it also registers it. When this method changes the stream position, it increments the `StateTag` by 1; otherwise, it does not change the `StateTag`. An `ArgumentOutOfRangeException` is thrown if `maxCount` is negative. This method may also throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @ReadCharsOrNewlines@: maxCount: int * normalizeNewlines: bool -> string ``] [ Behaves like `@SkipCharsOrNewlines@(maxCount)`, except that it returns a string with the skipped chars. The `normalizeNewlines` parameter determines whether all newlines (`"\n"`, `"\r\n"` or `"\r"`) in the returned string are normalized to `'\n'` or whether they are preserved in the original form they are encountered in the input. ] [`` member @SkipCharsOrNewlinesWhile@: predicate: (char -> bool) -> int ``] [ Skips over a sequence of chars that satisfy the `predicate` function. Stops at the first char for which `predicate` returns `false`. Returns the number of skipped chars. This method counts standard newlines (`"\n"`, `"\r\n"` or `"\r"`) as single chars and passes them to the predicate function as single `'\n'` chars. When this method skips a newline, it also registers it. When this method changes the stream position, it increments the `StateTag` by 1; otherwise, it does not change the `StateTag`. [caution The `predicate` function must not access the `CharStream` instance itself, because `SkipCharsOrNewlinesWhile` relies on `predicate` not having any side-effect on the internal state of the stream. ] This method may throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @SkipCharsOrNewlinesWhile@: predicateForFirstChar: (char -> bool) * predicate: (char -> bool) -> int ``] [#SkipCharsOrNewlinesWhile2 Behaves like `@SkipCharsOrNewlinesWhile@(predicate)`, except that the first char to be skipped must satisfy `predicateForFirstChar` instead of `predicate`. ] [`` member @SkipCharsOrNewlinesWhile@: predicate: (char -> bool) * minCount: int * maxCount: int -> int ``] [#SkipCharsOrNewlinesWhile_int_int Skips over a sequence of up to `maxCount` chars that satisfy the `predicate` function, but backtracks to the start if it can only skip less than `minCount` chars. Returns the number of skipped chars. This method counts standard newlines (`"\n"`, `"\r\n"` or `"\r"`) as single chars and passes them to the predicate function as single `'\n'` chars. When this method skips a newline, it also registers it. An `ArgumentOutOfRangeException` is thrown if `maxCount` is negative. This method may also throw any of the [^exceptions I/O related exceptions] detailed above. [caution The `predicate` function must not access the `CharStream` instance itself, because `SkipCharsOrNewlinesWhile` relies on `predicate` not having any side-effect on the internal state of the stream. ] ] [`` member @SkipCharsOrNewlinesWhile@: predicateForFirstChar: (char -> bool) * predicate: (char -> bool) * minCount: int * maxCount: int -> int ``] [#SkipCharsOrNewlinesWhile2_int_int Behaves like `[^SkipCharsOrNewlinesWhile_int_int SkipCharsOrNewlinesWhile](predicate, nMin, nMax)`, except that the first char to be skipped must satisfy `predicateForFirstChar` instead of `predicate`. ] [`` member @ReadCharsOrNewlinesWhile@: predicate: (char -> bool) * normalizeNewlines: bool -> string ``] [#ReadCharsOrNewlinesWhile Behaves like `@SkipCharsOrNewlinesWhile@(predicate)`, except that it returns a string with the skipped chars. The `normalizeNewlines` parameter determines whether all newlines (`"\n"`, `"\r\n"` or `"\r"`) in the returned string are normalized to `'\n'` or whether they are preserved in the original form they are encountered in the input. ] [`` member @ReadCharsOrNewlinesWhile@: predicateForFirstChar: (char -> bool) * predicate: (char -> bool) * normalizeNewlines: bool -> string ``] [#ReadCharsOrNewlinesWhile2 Behaves like `@ReadCharsOrNewlinesWhile@(predicate, normalizeNewlines)`, except that the first char to be skipped must satisfy `predicateForFirstChar` instead of `predicate`. ] [`` member @ReadCharsOrNewlinesWhile@: predicate: (char -> bool) * minCount: int * maxCount: int * normalizeNewlines: bool -> string ``] [#ReadCharsOrNewlinesWhile_int_int Behaves like `[^SkipCharsOrNewlinesWhile_int_int SkipCharsOrNewlinesWhile](predicate, minCount, maxCount)`, except that it returns a string with the skipped chars. The `normalizeNewlines` parameter determines whether all newlines (`"\n"`, `"\r\n"` or `"\r"`) in the returned string are normalized to `'\n'` or whether they are preserved in the original form they are encountered in the input. ] [`` member @ReadCharsOrNewlinesWhile@: predicateForFirstChar: (char -> bool) * predicate: (char -> bool) * minCount: int * maxCount: int * normalizeNewlines: bool -> string ``] [#ReadCharsOrNewlinesWhile2_int_int Behaves like `[^ReadCharsOrNewlinesWhile_int_int ReadCharsOrNewlinesWhile](predicate, minCount, maxCount, normalizeNewlines)`, except that the first char to be skipped must satisfy `predicateForFirstChar` instead of `predicate`. ] [`` [#SkipCharsOrNewlinesUntilString-members] member @SkipCharsOrNewlinesUntilString@: str: string * maxCount: int * foundString: out -> int ``] [ Skips over all stream chars before the first occurrence of the specified string or the end of the stream, but not over more than `maxCount` chars. Assigns `true` to the output parameter if the string is found, otherwise `false`. This method registers skipped newlines (`"\n"`, `"\r\n"` or `"\r"`) and counts them as single chars. However, no newline normalization takes place when the argument string `str` is matched with the stream chars. Hence, `str` should either contain no newlines or only in the form they occur in the stream. If `str` starts with `'\n'`, then `SkipCharsOrNewlinesUntilString` will not find occurences of `str` in the stream that start in the middle of an `"\r\n"` newline. When this method changes the stream position, it increments the `StateTag` by 1; otherwise, it does not change the `StateTag`. This method throws - an `ArgumentException`, if the string argument is empty, and - an `ArgumentOutRangeException`, if `nMax` is negative. It may also throw any of the [^exceptions I/O related exceptions] detailed above. ] [`` member @SkipCharsOrNewlinesUntilString@: str: string * maxCount: int * normalizeNewlines: bool * skippedCharsIfStringFoundOtherwiseNull: out -> int ``] [#SkipCharsOrNewlinesUntilString_string Behaves like `@SkipCharsOrNewlinesUntilString@(str, maxCount, outBool)`, except that its output parameter is a string instead of a boolean. If `str` is found, a string with the skipped chars is assigned to this output parameter; otherwise, `null` is assigned to the output parameter. The `normalizeNewlines` parameter determines whether all newlines (`"\n"`, `"\r\n"` or `"\r"`) in the output string are normalized to `'\n'` or are preserved in the original form they are encountered in the input. ] [`` [#SkipCharsOrNewlinesUntilCaseFoldedString-members] member @SkipCharsOrNewlinesUntilCaseFoldedString@: caseFoldedString: string * maxCount: int * foundString: out -> int ``] [ Behaves like `@SkipCharsOrNewlinesUntilString@(caseFoldedString, maxCount, foundString)`, except that the chars in the stream are case-folded before they are compared with `caseFoldedChars`. [note While the chars in the `CharStream` are case‐folded before they are matched, the chars in the string argument `caseFoldedString` are assumed to already be case-folded (e.g. with the help of `FParsec.Text.FoldCase`). Please also see the above remarks on @case-insensitive matching@.] ] [`` member @SkipCharsOrNewlinesUntilCaseFoldedString@: caseFoldedString: string * maxCount: int * normalizeNewlines: bool * skippedCharsIfStringFoundOtherwiseNull: out -> int ``] [#SkipCharsOrNewlinesUntilCaseFoldedString_string Behaves like `[^SkipCharsOrNewlinesUntilString_string SkipCharsOrNewlinesUntilString](caseFoldedString, maxCount,` `normalizeNewlines, skippedCharsIfStringFoundOtherwiseNull)`, except that the chars in the stream are case-folded before they are compared with `caseFoldedChars`. [note While the chars in the stream are case‐folded before they are matched, the chars in the string argument `caseFoldedString` are assumed to already be case-folded (e.g. with the help of `FParsec.Text.FoldCase`). Please also see the above remarks on @case-insensitive matching@.] ] [/interface-members] [/section] [/interface-reference] [/section] [section#CharStream_1 CharStream] Provides read-access to a sequence of UTF-16 chars. [interface-reference] [section Interface] [$$interface] [/section] [section Remarks] The `CharStream<'TUserState>` class adds a user definable state component to its base class `CharStream`. The user state is accessible through the property `UserState`. It has the type `'TUserState`. You can retrieve a snapshot of the complete stream state, including the user state, from the `State` property. The value returned from the `State` property has the type `CharStreamState<'TUserState>`. You can pass a `CharStreamState` value to the `BacktrackTo` method in order to restore a previous state of the `CharStream`. [important `'TUserState` must be an immutable type or at least be treated as an immutable type if you want `BacktrackTo` to completely restore old values of the user state. Hence, when you need to change the user state, you should set a new `'TUserState` value to the `UserState` property of the `CharStream` instance, *not* mutate the existing `'TUserState` value. ] [/section] [section Members] [interface-members] [`` [] type CharStream<'TUserState> = inherit @CharStream@ // has the same constructors as CharStream ``] [`` member @UserState@: 'TUserState with get, set ``] [ The current user state value. Setting the `UserState` value increments the `StateTag` by 1, independent of whether the new value is different from the previous one. ] [`` member @State@: CharStreamState<'TUserState> ``] [ Returns a snapshot of the current `StateTag`, `Index`, `Line`, `LineBegin`, `Name`, and `UserState` values in the form of an immutable `CharStreamState` value. ] [`` member @BacktrackTo@: CharStreamState<'TUserState> -> unit ``] [ Restores the stream to the state represented by the given `CharStreamState` value. For example: `` fun (stream: CharStream<'u>) -> let state = stream.State // ... (do something with stream that might change the state) stream.BacktrackTo(state) // restores stream to previous state // ... `` This method throws an `ArgumentException` if the `CharStreamState` instance is zero-initialized (i.e. constructed with the default value type constructor). It may also throw any of the [^exceptions I/O related exceptions] detailed above. [note You may only pass `CharStreamState` values that were retrieved from the `CharStream` instance on which you're calling `BacktrackTo`. Passing a `CharStreamState` value that was created for another `CharStream` instance triggers an assert exception in debug builds and will otherwise lead to undefined behaviour.] ] [`` member @ReadFrom@: stateWhereStringBegins: CharStreamState<'TUserState> * normalizeNewlines: bool -> string ``] [ Returns a string with the chars between the index of the `stateWhereStringBegins` (inclusive) and the current `Index` of the stream (exclusive). The `normalizeNewlines` parameter determines whether all newlines (`"\n"`, `"\r\n"` or `"\r"`) in the returned string are normalized to `'\n'` or whether they are preserved in the original form they are encountered in the input. (If `stateWhereStringBegins.[^CharStreamState.Line Line]` equals the current `Line`, this method will never normalize any newlines in the returned string.) This method trows - an `ArgumentOutOfRangeException`, if `Index < [^ GetIndex_state GetIndex](stateWhereStringBegins)`, and - an `ArgumentException`, if the `CharStreamState` instance is zero-initialized (i.e. constructed with the default value type constructor). It may also throw any of the [^exceptions I/O related exceptions] detailed above. [note You may only pass `CharStreamState` values that were retrieved from the `CharStream` instance on which you're calling `ReadFrom`. Passing a `CharStreamState` value that was created for another `CharStream` instance triggers an assert exception in debug builds and will otherwise lead to undefined behaviour.] ] [`` member @CreateSubstream@<'TSubStreamUserState>: stateWhereSubstreamBegins: CharStreamState<'TUserState> -> CharStream<'TSubStreamUserState> ``] [ Creates a new `CharStream<'TUserState>` instance with the stream chars between the index of the `stateWhereSubstreamBegins` (inclusive) and the current `Index` of the stream (exclusive). The state of the substream is initialized to `stateWhereSubstreamBegin`, so that the stream and the substream will report the same position (`Index`, `Line`, `LineBegin` and `Name`) for corresponding chars. However, the beginning and end will normally differ between stream and substream, in particular the `IndexOfFirstChar` and `IndexOfLastCharPlus1` values will normally differ between stream and substream. An example: `` open FParsec open FParsec.Primitives open FParsec.CharParsers open FParsec.Error let embeddedBlock (beginDelim: string) (endDelim: string) : Parser<_,_> = let expectedEmbeddedBlock = expected "embedded block" fun stream -> if stream.Skip(beginDelim) then let stateAtBegin = stream.State let mutable foundString = false let maxChars = System.Int32.MaxValue stream.SkipCharsOrNewlinesUntilString(endDelim, maxChars, &foundString) |> ignore if foundString then // create substream with content between beginDelim and endDelim use substream = stream.CreateSubstream(stateAtBegin) // here we would normally work with the substream, // in this example we will just extract the string content let str = substream.ReadCharsOrNewlines(System.Int32.MaxValue, true) Reply(str) else Reply(Error, expectedString endDelim) else Reply(Error, expectedEmbeddedBlock) `` ``{fsi} > run (embeddedBlock "/*" "*/") "/*substream content*/";; val it : ParserResult = Success: "substream content" `` [note [/ This note does not apply to the [^low-trust Low-Trust version] of FParsec.][br] If you create a substream for a `CharStream` instance with more than one block, the content of the substream needs to be copied. Thus, you can minimize the overhead associated with creating a substream by ensuring that the `CharStream` has only one block, either by choosing a sufficiently large `blockSize`, or by creating the `CharStream` from a string or char buffer.] You may use a stream and its substreams concurrently. However, notice the following warning: [caution [/ This note does not apply to the [^low-trust Low-Trust version] of FParsec.][br] You may not dispose a stream before all of its substreams are disposed. Disposing a stream before all its substreams are disposed triggers an assert exception in debug builds and otherwise lead to undefined behaviour.] This method trows - an `ArgumentOutOfRangeException`, if `Index < [^ GetIndex_state GetIndex](stateWhereSubstreamBegins)`, and - an `ArgumentException`, if the `CharStreamState` instance is zero-initialized (i.e. constructed with the default value type constructor). It may also throw any of the [^exceptions I/O related exceptions] detailed above. [note You may only pass `CharStreamState` values that were retrieved from the `CharStream` instance on which you're calling `CreateSubstream`. Passing a `CharStreamState` value that was created for another `CharStream` instance triggers an assert exception in debug builds and will otherwise lead to undefined behaviour.] ] [/interface-members] [/section] [/interface-reference] [/section] [interface-reference] [section#CharStreamIndexToken CharStreamIndexToken] An opaque representation of a `CharStream` char index. [$$interface] `CharStream` methods can handle `CharStreamIndexToken` values more efficiently than integer char indices. You can retrieve `CharStreamIndexToken` values from the `CharStream.IndexToken` and `CharStreamState<_>.IndexToken` properties. You can get the char index corresponding to a given `CharStreamIndexToken` value by calling its `GetIndex` method with the `CharStream` instance from which the token was retrieved. Zero-initialized `CharStreamIndexToken` values constructed with the default value type constructor are *not* valid and trying to call a `CharStream` method with such an instance will trigger an exception. [note A `CharStreamIndexToken` instance *may only be used together with the `CharSteam` instance it was created for*. ] [interface-members] [`` type CharStreamIndexToken = struct ``] [`` member @GetIndex@: CharStream -> int64 ``] [ Returns the stream index represented by the `CharStreamIndexToken` instance. The `CharStream` instance passed as the argument must be the `CharStream` instance from which the `CharStreamIndexToken` was retrieved. Passing a different `CharStream` instance triggers an assert exception in debug builds and will otherwise lead to undefined behaviour. An `InvalidOperationException` is thrown if the `CharStreamIndexToken` value is zero-initialized (i.e. constructed with the default value type constructor). ] [`` end ``] [/interface-members] [/section] [/interface-reference] [auto-link{hide-outer-auto-links = ["IndexToken", "Line", "LineBegin", "Name", "UserState"], do-not-pick-up-as-link-targets = ["IndexToken", "Line", "LineBegin", "Name", "UserState" ]}] [interface-reference] [section#CharStreamState CharStreamState] An immutable value type representation of the state of a `CharStream`. [$$interface] You can retrieve `CharStreamState` values from the `[^ State CharStream<_>.State]` property. By passing a `CharStreamState` value to the `BacktrackTo` method of a `@CharStream<_>\ @` instance, you can restore the stream to the state represented by the `CharStreamState` value. Zero-initialized `CharStreamState` values constructed with the default value type constructor are *not* valid and trying to call a `CharStream` method with such an instance will trigger an exception. [note A `CharStreamState` instance *may only be used together with the `CharSteam` instance it was created for*. ] [interface-members] [`` type CharStreamState<'TUserState> = struct member #Tag#: int64 member #IndexToken#: CharStreamIndexToken member #Line#: int64 member #LineBegin#: int64 member #Name#: string member #UserState#: 'TUserState ``] [`` member @GetIndex@: CharStream<'TUserState> -> int64 ``] [#GetIndex_state `state.GetIndex(stream)` is an optimized implementation of `state.[no-auto-link IndexToken].[^CharStreamIndexToken.GetIndex GetIndex](stream)`. The `[^CharStream_1 CharStream<'TUserState>\ ]` instance passed as the argument must be the `CharStream` instance from which the `CharStreamState` was retrieved. Passing a different `CharStream` instance triggers an assert exception in debug builds and will otherwise lead to undefined behaviour. An `InvalidOperationException` is thrown if the `CharStreamState` instance is zero-initialized (i.e. constructed with the default value type constructor). ] [`` member @GetPosition@: CharStream<'TUserState> -> Position ``] [ `state.GetPosition(stream)` is an optimized implementation of `new [^ reference.Position Position]([no-auto-link state.Name, state.[^GetIndex_state GetIndex](stream), state.Line, state.Column])`. The `[^CharStream_1 CharStream<'TUserState>\ ]` instance passed as the argument must be the `CharStream` instance from which the `CharStreamState` was retrieved. Passing a different `CharStream` instance triggers an assert exception in debug builds and will otherwise lead to undefined behaviour. An `InvalidOperationException` is thrown if the `CharStreamState` instance is zero-initialized (i.e. constructed with the default value type constructor). ] [`` end ``] [/interface-members] [/section] [/interface-reference] [interface-reference] [section#TwoChars TwoChars] An immutable value type representation of two chars: [$$interface] [interface-members] [`` type TwoChars = struct new: char0: char * char1: char -> [no-auto-link TwoChars] val #Char0#: char val #Char1#: char end ``] [/interface-members] [/section] [/interface-reference] [/auto-link] [/auto-link] [/section] ================================================ FILE: Doc/src/reference-error.txt ================================================ [auto-link{do-not-pick-up-as-link-targets = ["Error"], hide-outer-auto-links = ["Position"]}] [section#Error FParsec.Error] [interface-reference] [section Interface] [$$interface] [/section] [section Members] [interface-members] [`` // FParsec.dll [] // module is automatically opened when FParsec namespace is opened module [no-auto-link FParsec.Error] ``] [`` [#discriminated-union-type] // The following type abbreviations and active patterns allow you to // treat the ErrorMessage type almost as if it was defined as: // // [] // type ErrorMessage = // | Expected of string // | ExpectedString of string // | ExpectedStringCI of string // | Unexpected of string // | UnexpectedString of string // | UnexpectedStringCI of string // | Message of string // | NestedError of Position * obj * ErrorMessageList // | CompoundError of string * Position * obj * ErrorMessageList // | OtherErrorMessage of obj type #Expected# = ErrorMessage.Expected type #ExpectedString# = ErrorMessage.ExpectedString type #ExpectedStringCI# = ErrorMessage.ExpectedCaseInsensitiveString type #Unexpected# = ErrorMessage.Unexpected type #UnexpectedString# = ErrorMessage.UnexpectedString type #UnexpectedStringCI# = ErrorMessage.UnexpectedCaseInsensitiveString type #Message# = ErrorMessage.Message type #NestedError# = ErrorMessage.NestedError type #CompoundError# = ErrorMessage.CompoundError type #OtherErrorMessage# = ErrorMessage.Other // Unfortunately, F# currently doesn't support active patterns with more // than 7 cases, so we have to use partial patterns. val (|[no-auto-link Expected]|_|): ErrorMessage -> string option val (|[no-auto-link ExpectedString]|_|): ErrorMessage -> string option val (|[no-auto-link ExpectedStringCI]|_|): ErrorMessage -> string option val (|[no-auto-link Unexpected]|_|): ErrorMessage -> string option val (|[no-auto-link UnexpectedString]|_|): ErrorMessage -> string option val (|[no-auto-link UnexpectedStringCI]|_|): ErrorMessage -> string option val (|[no-auto-link Message]|_|): ErrorMessage -> string option val (|[no-auto-link NestedError]|_|): ErrorMessage -> (Position * obj * ErrorMessageList) option val (|[no-auto-link CompoundError]|_|): ErrorMessage -> (string * Position * obj * ErrorMessageList) option val (|[no-auto-link OtherErrorMessage]|_|): ErrorMessage -> obj option // The following literal definition and active pattern allow you to // treat the ErrorMessageList type as if it was defined as: // // [] // type ErrorMessageList = // | AddErrorMessage of ErrorMessage * ErrorMessageList // | NoErrorMessages // with // static member Merge: ErrorMessageList * ErrorMessageList -> ErrorMessageList // static member ToHashSet: ErrorMessageList -> HashSet // static member ToSortedArray: ErrorMessageList -> ErrorMessage[] [] val #NoErrorMessages#: ErrorMessageList = null val (|[no-auto-link ErrorMessageList|NoErrorMessages]|): ErrorMessageList -> Choice ``] [`` // Helper functions for creating an ErrorMessageList with a single ErrorMessage val @expected@: string -> ErrorMessageList ``] [`expected label` creates an `ErrorMessageList` with a single `Expected label` message.] [`` val @expectedStringError@: string -> ErrorMessageList ``] [`expectedStringError str` creates an `ErrorMessageList` with a single `ExpectedString str` message.] [`` val @expectedStringCIError@: string -> ErrorMessageList ``] [`expectedStringCIError str` creates an `ErrorMessageList` with a single `ExpectedStringCI str` message.] [`` val @unexpected@: string -> ErrorMessageList ``] [`unexpected label` creates an `ErrorMessageList` with a single `Unexpected label` message.] [`` val @unexpectedStringError@: string -> ErrorMessageList ``] [`unexpectedStringError str` creates an `ErrorMessageList` with a single `UnexpectedString str` message.] [`` val @unexpectedStringCIError@: string -> ErrorMessageList ``] [`unexpectedStringCIError str` creates an `ErrorMessageList` with a single `UnexpectedStringCI str` message.] [`` val @messageError@: string -> ErrorMessageList ``] [`messageError msg` creates an `ErrorMessageList` with a single `Message msg` message.] [`` val @otherError@: obj -> ErrorMessageList ``] [`otherError o` creates an `ErrorMessageList` with a single `OtherError o` message.] [`` val @nestedError@: CharStream<_> -> ErrorMessageList -> ErrorMessageList ``] [`nestedError stream msgs` creates an `ErrorMessageList` with a single `NestedError(stream.Position, stream.UserState, msgs)` message, except if `msgs` is already an `ErrorMessageList` with a single `NestedError` message, in which case `msgs` is returned instead.] [`` val @compoundError@: string -> CharStream<_> -> ErrorMessageList -> ErrorMessageList ``] [ `compoundError label stream msgs` creates an `ErrorMessageList` with a single `CompoundError(label, stream.Position, stream.UserState, msgs)` message, except if `msgs` is an `ErrorMessageList` with a single `NestedError(pos2, ustate2, msgs2)` message, in which case an `ErrorMessageList` with a single `CompoundError(label, pos2, ustate2, msgs2)` message is returned instead.] [`` // Two convenient helper functions ``] [`` val @mergeErrors@: ErrorMessageList -> ErrorMessageList -> ErrorMessageList ``] [`mergeErrors error1 error2` is an abbreviation for `ErrorMessageList.Merge(error1, error2)`.] [`` val @isSingleErrorMessageOfType@: ErrorMessageType -> ErrorMessageList ``] [ `isSingleErrorMessageOfType ty msgs` returns `true` if and only if`msgs` is an `ErrorMessageList` with a single `ErrorMessage` with the `ErrorMessageType` `ty`. ] [`` // A simple container type for holding an ErrorMessageList // together with its associated input stream position and user state ``] [`` [] type @ParserError@``] [ `ParserError` is a simple container type for holding an `ErrorMessageList` together with its associated input stream position and user state. The `ParserError` class has the following members: [interface-members] [`` = ``] [`` @new@: position: [^reference.Position Position] * userState: obj * messages: ErrorMessageList -> ParserError ``] [Constructs a `ParserError` from an `ErrorMessageList` and its associated position.] [`` member @Position@: [^reference.Position Position] ``] [ The input stream position of the parser error. ] [`` member @UserState@: obj ``] [ The user state associated with the parser error. ] [`` member @Messages@: ErrorMessageList ``] [#member-ErrorMessageList The error messages of the parser error. ] [`` [#ToString/WriteTo] override @ToString@: unit -> string ``] [ Is equivalent to `` use sw = new System.IO.StringWriter() WriteTo(sw) sw.ToString() `` ] [`` member @ToString@: streamWhereErrorOccurred: CharStream -> string ``] [#ToString_CharStream Is equivalent to `` use sw = new System.IO.StringWriter() [^WriteTo_stream WriteTo](sw, streamWhereErrorOccurred) sw.ToString() `` ] [`` member @WriteTo@: textWriter: System.IO.TextWriter * streamWhereErrorOccurred: CharStream * ?tabSize: int * ?columnWidth: int * ?initialIndentation: string * ?indentationIncrement: string -> unit ``] [#WriteTo_stream Is equivalent to `` let getStream (pos: Position) = if pos.StreamName = @Position@.StreamName then streamWhereErrorOccurred else null [^WriteTo_getStream WriteTo](textWriter, [* getStream], ?tabSize = tabSize, ?columWidth = columnWidth, ?initialIndentation = initialIndentation, ?indentationIncrement = indentationIncrement) `` ] [`` member @WriteTo@: textWriter: System.IO.TextWriter * getStream: (position -> CharStream) * ?tabSize: int * ?columnWidth: int * ?initialIndentation: string * ?indentationIncrement: string -> unit ``] [#WriteTo_getStream Writes a string representation of the `ParserError` to the given `TextWriter` value. For each error `getStream` is called with the error position. The returned `CharStream` must be `null` or contain the content of the `CharStream` for which the error was generated (at the original indices). If `getStream` returns a non-null `CharStream`, the printed error position information is augmented with the line of text surrounding the error position, together with a '^'-marker pointing to the exact location of the error in the input stream. The `tabSize` parameter (default value: 8) specifies the tab stop distance that this method assumes when counting text columns. This parameter only has an effect for error positions where `getStream` returns a non-null `CharStream`. The `columnWidth` parameter (default value: 79) specifies the number of char columns that this method should try to fit its output to. ] [`` member @WriteTo@: textWriter: System.IO.TextWriter * ?positionPrinter: (System.IO.TextWriter -> [^reference.Position Position] -> string -> int -> unit) * ?columnWidth: int * ?initialIndentation: string * ?indentationIncrement: string -> unit ``] [ Writes a string representation of the `ParserError` to the given `TextWriter` value. The format of the position information can be customized by specifying the `positionPrinter` argument. The given function is expected to print a representation of the passed `[^reference.Position Position]` value to the passed `TextWriter` value. If possible, it should indent text lines with the passed string and take into account the maximum column count (including indentation) passed as the last argument. ] [`` override Equals: obj -> bool override GetHashCode: unit -> int ``] [/interface-members] ] [/interface-members] [/section] [/interface-reference] [/section] [/auto-link] ================================================ FILE: Doc/src/reference-errormessage.txt ================================================  [section#ErrorMessage FParsec.ErrorMessage] [interface-reference] [section Interface] [$$interface] [/section] [section Remarks] `ErrorMessage` is the abstract base class for FParsec error messages. `Parser` functions return `ErrorMessage` values within an `ErrorMessageList`. There are [^members.nested-types several subtypes] of `ErrorMessage` that represent specific kind of error messages. These subtypes are defined as nested classes within `ErrorMessage`. The [^Error.members.discriminated-union-type active patterns and type abbreviations] in the `FParsec.Error` module allow you to treat the `ErrorMessage` type almost as if it was defined as an F# discriminated union type. [/section] [section Members] [interface-members] [`` // FParsecCS.dll namespace FParsec type #ErrorMessageType# = [#ErrorMessageType..Expected Expected] = 0 | [#ErrorMessageType..ExpectedString ExpectedString] = 1 | [#ErrorMessageType..ExpectedCaseInsensitiveString ExpectedCaseInsensitiveString] = 2 | [#ErrorMessageType..Unexpected Unexpected] = 3 | [#ErrorMessageType..UnexpectedString UnexpectedString] = 4 | [#ErrorMessageType..UnexpectedCaseInsensitiveString UnexpectedCaseInsensitiveString] = 5 | [#ErrorMessageType..Message Message] = 6 | [#ErrorMessageType..NestedError NestedError] = 7 | [#ErrorMessageType..CompoundError CompoundError] = 8 | [#ErrorMessageType..Other Other] = 9 ``] [`` type @ErrorMessage@``] [ `ErrorMessage` is the abstract base class for FParsec error messages. `` type ErrorMessage = member [no-auto-link Type]: ErrorMessageType override Equals: obj -> bool override GetHashCode: unit -> int interface System.IEquatable `` Please also see the @remarks@ above. ] [`` = member [no-auto-link Type]: ErrorMessageType override Equals: obj -> bool override GetHashCode: unit -> int interface System.IEquatable [#nested-types]// nested types ``] [`` type @ErrorMessage.Expected@``] [ Parsers report this `ErrorMessage` when the input does not match the expected input. `` type ErrorMessage.Expected = inherit ErrorMessage new: label: string -> ErrorMessage.Expected member Label: string `` The string label describes the expected input. This error message can be generated with the labeling operator ``. ] [`` = inherit ErrorMessage new: label: string -> ErrorMessage.Expected member Label: string ``] [`` type @ErrorMessage.ExpectedString@``] [ Parsers report this `ErrorMessage` when the input does not match an expected string constant. `` type ErrorMessage.ExpectedString = inherit ErrorMessage new: string -> ErrorMessage.ExpectedString member String: string `` This `ErrorMessage` is mainly generated by the `pstring` parser and its variants. ] [`` = inherit ErrorMessage new: string -> ErrorMessage.ExpectedString member String: string ``] [`` type @ErrorMessage.ExpectedCaseInsensitiveString@``] [ Parsers report this `ErrorMessage` when the input does not match an expected case-insensitive string constant. `` type ErrorMessage.ExpectedCaseInsensitiveString = inherit ErrorMessage new: string -> ErrorMessage.ExpectedCaseInsensitiveString member CaseInsensitiveString: string `` This `ErrorMessage` is mainly generated by the `pstringCI` parsers and its variants. ] [`` = inherit ErrorMessage new: string -> ErrorMessage.ExpectedCaseInsensitiveString member String: string ``] [`` type @ErrorMessage.Unexpected@``] [ Parsers report this `ErrorMessage` when they encounter some unexpected input. `` type ErrorMessage.Unexpected = inherit ErrorMessage new: label: string -> ErrorMessage.Unexpected member Label: string `` The string label describes the unexpected input. This `ErrorMessage` is mainly generated by the `notFollowedByL` primitive. ] [`` = inherit ErrorMessage new: label: string -> ErrorMessage.Unexpected member Label: string ``] [`` type @ErrorMessage.UnexpectedString@``] [ Parsers report this `ErrorMessage` when they encounter an unexpected string constant. `` type ErrorMessage.UnexpectedString = inherit ErrorMessage new: string -> ErrorMessage.UnexpectedString member String: string `` This `ErrorMessage` is mainly generated by the `notFollowedByString` parser. ] [`` = inherit ErrorMessage new: string -> ErrorMessage.UnexpectedString member String: string ``] [`` type @ErrorMessage.UnexpectedCaseInsensitiveString@``] [ Parsers report this `ErrorMessage` when they encounter an unexpected case-insensitive string constant. `` type ErrorMessage.UnexpectedCaseInsensitiveString = inherit ErrorMessage new: string -> ErrorMessage.UnexpectedCaseInsensitiveString member CaseInsensitiveString: string `` This `ErrorMessage` is mainly generated by the `notFollowedByStringCI` parser. ] [`` = inherit ErrorMessage new: string -> ErrorMessage.UnexpectedCaseInsensitiveString member String: string ``] [`` type @ErrorMessage.Message@``] [ Parsers report this `ErrorMessage` when an the error does not fit the other `ErrorMessage` types. `` type ErrorMessage.Message = inherit ErrorMessage new: string -> ErrorMessage.Message member String: string `` This error message can be generated with the `fail` and `failFatally` primitives. ] [`` = inherit ErrorMessage new: string -> ErrorMessage.Message member String: string ``] [`` type @ErrorMessage.NestedError@``] [ Parsers report this `ErrorMessage` when they backtracked after an error occurred. `` type ErrorMessage.NestedError = inherit ErrorMessage new: position: Position * userState: obj * messages: ErrorMessageList -> ErrorMessage.NestedError member Position: Position member UserState: obj member Messages: ErrorMessageList `` The `Position` property describes the stream position where the original error occurred that triggered the backtracking. The `UserState` property contains the user state value from before the backtracking (upcasted to `obj`). The `Messages` property contains the error messages of the original error. This error message is mainly generated by the `attempt`, `>>?` and `.>>?` primitives. ] [`` = inherit ErrorMessage new: position: Position * userState: obj * messages: ErrorMessageList -> ErrorMessage.NestedError member Position: Position member UserState: obj member Messages: ErrorMessageList ``] [`` type @ErrorMessage.CompoundError@``] [ Parsers report this `ErrorMessage` when a "compound" failed to parse. `` type ErrorMessage.CompoundError = inherit ErrorMessage new: labelOfCompound: string * nestedErrorPosition: Position * nestedErrorUserState: obj * nestedErrorMessages: ErrorMessageList -> ErrorMessage.CompoundError member LabelOfCompound: string member NestedErrorPosition: Position member NestedErrorUserState: obj member NestedErrorMessages: ErrorMessageList `` This error message is mainly generated by the compound-labelling operator ``. ] [`` = inherit ErrorMessage new: labelOfCompound: string * nestedErrorPosition: Position * nestedErrorUserState: obj * nestedErrorMessages: ErrorMessageList -> ErrorMessage.CompoundError member LabelOfCompound: string member NestedErrorPosition: Position member NestedErrorUserState: obj member NestedErrorMessages: ErrorMessageList ``] [`` type @ErrorMessage.Other@``] [ User-defined parsers can return this `ErrorMessage` to report application-specific error data. `` type ErrorMessage.Other = inherit ErrorMessage new: data: obj -> ErrorMessage.Other member Data: obj `` To display `OtherError` values in error messages, you will have to define your own error printer, as `ParserError.\ @ToString/WriteTo@` ignores them. ] [`` = inherit ErrorMessage new: data: obj -> ErrorMessage.Other member Data: obj ``] [/interface-members] [/section] [/interface-reference] [/section] ================================================ FILE: Doc/src/reference-errormessagelist.txt ================================================  [section#ErrorMessageList FParsec.ErrorMessageList] Represents a list of error messages. [interface-reference] [section Interface] [$$interface] [/section] [section Remarks] The `ErrorMessageList` represents a list of error messages in which the order of the messages carries no meaning and any duplicates and empty messages are ignored. Essentially, an `ErrorMessageList` is *constructed as a singly-linked list, but used as a set*. A `null` value represents an empty `ErrorMessageList`. The `ErrorMessage` values in an `ErrorMessageList` are usually all associated with the same input stream position and user state. For example, the error messages returned by a parser in a `Reply` value describe an error at the `CharStream` position that is current when the parser returns. In order to enforce set semantics in comparison operations, the `ErrorMessageList` overrides the `Equals` and `GetHashCode`. [/section] [section Members] [interface-members] [`` // FParsecCS.dll namespace FParsec [] type ErrorMessageList``] [`` = ``] [`` member @Head@: ErrorMessage ``] [ The first `ErrorMessage` in this list. This property is never `null`. ] [`` member @Tail@: ErrorMessageList ``] [ The remaining `ErrorMessage` values in this list after the first `ErrorMessage`. If there are no remaining `ErrorMessage` values, this property is `null`. ] [`` @new@: head: ErrorMessage -> ErrorMessageList ``] [#new-1 Constructs a new `ErrorMessageList` with a single `ErrorMessage` value. This constructor throws a `NullReferenceException` if `head` is null. ] [`` @new@: head: ErrorMessage * tail: ErrorMessageList -> ErrorMessageList ``] [#new-2 Constructs a new `ErrorMessageList` with `Head` set to `head` and `Tail` set to `tail`. This constructor throws a `NullReferenceException` if `head` is null. ] [`` @new@: head: ErrorMessage * tailMessage: ErrorMessage -> ErrorMessageList ``] [#new-3 `new ErrorMessageList(head, tailmessage)` is equivalent to `new [^new-2 ErrorMessageList](head, new [^new-1 ErrorMessageList](tailMessage))`. ] [`` static member @Merge@: ErrorMessageList * ErrorMessageList -> ErrorMessageList ``] [ Creates a new `ErrorMessageList` that contains the `ErrorMessage` values from both argument lists. The order of the `ErrorMessage` values in the newly created list is an implementation detail that you should not depend on. ] [`` static member @ToHashSet@: ErrorMessageList -> HashSet ``] [ Converts the `ErrorMessageList` to a `HashSet`. Duplicate error messages and empty `Expected...`, `Unexpected...` and `Message` messages are filtered out when the list is converted to a set. ] [`` static member @ToSortedArray@: ErrorMessageList -> ErrorMessage[] ``] [ Converts the `ErrorMessageList` to a array that is sorted by a total order. Duplicate error messages and empty `Expected...`, `Unexpected...` and `Message` messages are filtered out when the list is converted to the array. The order of the sorted array is an implementation detail and may change in the future. ] [`` override Equals: obj -> bool override GetHashCode: unit -> int interface System.IEquatable ``] [/interface-members] [/section] [/interface-reference] [/section] ================================================ FILE: Doc/src/reference-operatorprecedenceparser.txt ================================================  [interface-reference] [section#OperatorPrecedenceParser FParsec.OperatorPrecedenceParser] [section Interface] [$$interface] [/section] [section Members] [interface-members] [`` // FParsecCS.dll namespace FParsec type #Associativity# = [#Associativity..None None] = 0 | [#Associativity..Left Left] = 1 | [#Associativity..Right Right] = 2 type #OperatorType# = [#OperatorType..Infix Infix] = 0 | [#OperatorType..Prefix Prefix] = 1 | [#OperatorType..Postfix Postfix] = 2 ``] [`` type @Operator@<'TTerm, 'TAfterString, 'TUserState>``] [ The `Operator` type represents an immutable operator definition for the `OperatorPrecedenceParser<'TTerm, 'TAfterString, 'TUserState>` (OPP) class. `` [] type Operator<'TTerm, 'TAfterString, 'TUserState> = member Type: OperatorType member Associativity: Associativity member Precedence: int member IsAssociative: bool member IsTernary: bool member @String@: string member TernaryRightString: string // null for non-ternary operators `` The `Operator` class is the abstract base class of the `InfixOperator`, `PrefixOperator`, `PostfixOperator` and `TernaryOperator` classes. With these four concrete classes you can define binary infix (e.g. "1 + 1"), unary prefix (e.g. "-1"), unary postfix (e.g. "1++") and C-style ternary operators (e.g. "a ? b : c") for the `OperatorPrecedenceParser` (OPP) class. If you have look at the constructors for the concrete operator classes, you'll see that operators are constructed from an operator string, an "after-string-parser", a precedence level, an associativity value and a mapping function that is applied after the expression is parsed. Ternary operators are treated as special infix operators and require a string and associated after-string-parser parser for each of the two operator parts. [dl [Associativity and precedence] [ While infix operators can be left-, right- and non-associative (see the `Associativity` type), prefix and postfix operators can only be associative (`true`) or non-associative (`false`). See below for details on [^precedence-associativity how precedence and associativity influence the operator precedence parser]. ] [Textual representation of operators] [ The operator string and the after-string-parser determine the textual representation of an operator. Usually, the after-string-parser is used for parsing the whitespace after an operator string. OPP instances have separate "namespaces" for prefix operators on the one hand and infix, postfix or ternary operators on the other hand. Hence, you can configure an OPP instance to recognize a prefix operator with the same string as the (first) string of an infix, postfix or ternary operator. However, no two prefix operators and no two infix, postfix or ternary operators can have the same (first) string. The second string of a ternary operator cannot be used for any other operator at the same time. The OPP class parses operator strings greedily. This means, for example, that if you define a prefix operator with the string `"-"` and another prefix operator with the string `"--"`, then the input `--` in a prefix location will always be parsed as a `--` operator, never as two successive `-` operators. ] [How the OPP applies the after-string-parser] [ If the OPP encounters the operator string in the input, it will apply the after-string-parser directly after the operator string. If the after-string-parser succeeds, the operator will be accepted. If the after-string-parser fails without consuming input (or changing the parser state any another way), the OPP will backtrack to before the operator string and will not try to parse any other operator at this location. If the after-string-parser parser fails after consuming input, the OPP will itself fail with this error. This backtracking behaviour can be exploited to conditionally accept an operator depending on the input following the operator string. For example, the after-string-parser definition in `PrefixOperator("not", notFollowedBy letter >>. spaces, 1, true, (* ... *))` will ensure that the `"not"` in `"notAnOperator"` cannot be parsed as an operator. ] [The mapping function argument of the operator constructors] [ When an OPP instance has finished parsing a sub-expresssion involving an operator, it uses the mapping function supplied as the last argument to the operator constructor to map the parsed term(s) to a new term. Usually this mapping function constructs an AST node or directly transforms the terminal values. The operator classes `InfixOperator`, `PrefixOperator`, etc. all support two alternative types of mapping functions. The simpler type of mapping function only gets passed the parsed term(s). The other type of mapping function also gets passed the result(s) of the after-string-parser(s). ] [More uses of the after-string-parser] [ The combination of individually configurable after-string-parsers and mapping functions make the OPP class quite flexible in addressing various practical parsing needs. One use of the after-string-parser is discussed in the user's guide section on [@ parsing F# infix operators]. Another use is demonstrated in the following example. It shows [#get-position-with-after-string-parser how you can use the after-string-parser to get hold of the precise text location of the parsed operator] (which is often useful for diagnostic purposes in your application): `` open FParsec open FParsec.Primitives open FParsec.CharParsers let opp = new OperatorPrecedenceParser<_,_,_>() let ws = spaces type Assoc = Associativity let adjustPosition offset (pos: Position) = Position(pos.[^Position..StreamName StreamName], pos.[^Position..Index Index] + int64 offset, pos.[^Position..Line Line], pos.[^Position..Column Column] + int64 offset) // To simplify infix operator definitions, we define a helper function. let addInfixOperator str prec assoc mapping = let op = InfixOperator(str, getPosition .>> ws, prec, assoc, (), fun opPos leftTerm rightTerm -> mapping (adjustPosition -str.Length opPos) leftTerm rightTerm) opp.AddOperator(op) // Of course, you can define similar functions for other operator types. // With the helper function in place, you can define an operator with // a mapping function that gets passed the text location of the // parsed operator as the first argument. addInfixOperator "+" 1 Assoc.Left (fun opPos leftTerm rightTerm -> (* ... *)) `` ] ] [br]Members of `Operator<'TTerm, 'TAfterString, 'TUserState>`: [interface-members] [``= ``] [`` member @Type@: OperatorType ``] [ The operator's type: `[^OperatorType..Infix Infix]`, `[^OperatorType..Prefix Prefix]` or `[^OperatorType..Postfix Postfix]`. Ternary operators are treated as special infix operators. ] [`` member @`Associativity@: @Associativity@ ``] [#Operator..Associativity The operator's associativity: `[^Associativity..None None]`, `[^Associativity..None Left]` or `[^Associativity..None Right]`. For associative prefix operators this value is `Associativity.Right`, for associative postfix operators this value is `Associativity.Left`. ] [`` member @Precedence@: int ``] [ The operator's precedence value. The value is always greater than zero. Operators with a numerically higher precedence value take precedence over operators with lower precedence values. ] [`` member @IsAssociative@: bool ``] [Is equivalent to `[^Operator..Associativity Associativity] != Associativity.None`.] [`` member @IsTernary@: bool ``] [Indicates whether the operator is a `TernaryOperator`.] [`` member @String@: string ``] [The operator's string specified during construction. For ternary operators this property returns the left string. ] [`` member @TernaryRightString@: string``] [ The right string of a `TernaryOperator`. For non-ternary operators this property is null. ] [`` // null for non-ternary operators ``] [/interface-members] ] [`` // the following four types inherit from Operator<_,_,_> ``] [`` type @InfixOperator@<'TTerm, 'TAfterString, 'TUserState>``] [ The `InfixOperator<'TTerm, 'TAfterString, 'TUserState>` type represents a binary infix operator definition (e.g. the `+` in `1 + 1`) for the `OperatorPrecedenceParser` class. `` type InfixOperator<'TTerm, 'TAfterString, 'TUserState> = inherit Operator<'TTerm, 'TAfterString, 'TUserState> new: operatorString: string * afterStringParser: Parser<'TAfterString,'TUserState> * precedence: int * associativity: Associativity * mapping: 'TTerm -> 'TTerm -> 'TTerm -> InfixOperator<'TTerm, 'TAfterString, 'TUserState> new: operatorString: string * afterStringParser: Parser<'TAfterString,'TUserState> * precedence: int * associativity: Associativity * dummy: unit // disambiguates overloads in F# * mapping: 'TAfterString -> 'TTerm -> 'TTerm -> 'TTerm -> InfixOperator<'TTerm, 'TAfterString, 'TUserState> `` [* The two constructors only differ in the type of the mapping they accept. To help F#'s type inference discern both constructors, the second constructor accepts an additional dummy argument.] Please see the documentation for the `Operator` base class for more information. ] [`` = // ... ``] [`` type @PrefixOperator@<'TTerm, 'TAfterString, 'TUserState>``] [ The `PrefixOperator<'TTerm, 'TAfterString, 'TUserState>` type represents a unary prefix operator definition (e.g. the `-` in `-1`) for the `OperatorPrecedenceParser` class. `` type PrefixOperator<'TTerm, 'TAfterString, 'TUserState> = inherit Operator<'TTerm, 'TAfterString, 'TUserState> new: operatorString: string * afterStringParser: Parser<'TAfterString,'TUserState> * precedence: int * isAssociative: bool * mapping: 'TTerm -> 'TTerm -> PrefixOperator<'TTerm, 'TAfterString, 'TUserState> new: operatorString: string * afterStringParser: Parser<'TAfterString,'TUserState> * precedence: int * isAssociative: bool * dummy: unit // disambiguates overloads in F# * mapping: 'TAfterString -> 'TTerm -> 'TTerm -> PrefixOperator<'TTerm, 'TAfterString, 'TUserState> `` [* The two constructors only differ in the type of the mapping they accept. To help F#'s type inference discern both constructors, the second constructor accepts an additional dummy argument.] Please see the documentation for the `Operator` base class for more information. ] [`` = // ... ``] [`` type @PostfixOperator@<'TTerm, 'TAfterString, 'TUserState>``] [ The `PostfixOperator<'TTerm, 'TAfterString, 'TUserState>` type represents a unary postfix operator definition (e.g. the `++` in `1++`) for the `OperatorPrecedenceParser` class. `` type PostfixOperator<'TTerm, 'TAfterString, 'TUserState> = inherit Operator<'TTerm, 'TAfterString, 'TUserState> new: operatorString: string * afterStringParser: Parser<'TAfterString,'TUserState> * precedence: int * isAssociative: bool * mapping: 'TTerm -> 'TTerm -> PostfixOperator<'TTerm, 'TAfterString, 'TUserState> new: operatorString: string * afterStringParser: Parser<'TAfterString,'TUserState> * precedence: int * isAssociative: bool * dummy: unit // disambiguates overloads in F# * mapping: 'TAfterString -> 'TTerm -> 'TTerm -> PostfixOperator<'TTerm, 'TAfterString, 'TUserState> `` [* The two constructors only differ in the type of the mapping they accept. To help F#'s type inference discern both constructors, the second constructor accepts an additional dummy argument.] Please see the documentation for the `Operator` base class for more information. ] [`` = // ... ``] [`` type @TernaryOperator@<'TTerm, 'TAfterString, 'TUserState>``] [ The `TernaryOperator<'TTerm, 'TAfterString, 'TUserState>` type represents a C-style ternary operator definition (e.g. the `{none}? :` in `{c++}a ? b : c`) for the `OperatorPrecedenceParser` class. `` type TernaryOperator<'TTerm, 'TAfterString, 'TUserState> = inherit Operator<'TTerm, 'TAfterString, 'TUserState> new: leftString: string * afterLeftStringParser: Parser<'TAfterString,'TUserState> * rightString: string * afterRightStringParser: Parser<'TAfterString,'TUserState> * precedence: int * associativity: Associativity * mapping: 'TTerm -> 'TTerm -> 'TTerm -> 'TTerm -> TernaryOperator<'TTerm, 'TAfterString, 'TUserState> new: operatorString: string * afterStringParser: Parser<'TAfterString,'TUserState> * precedence: int * isAssociative: bool * dummy: unit // disambiguates overloads in F# * mapping: 'TAfterString -> 'TAfterString -> 'TTerm -> 'TTerm -> 'TTerm -> 'TTerm -> TernaryOperator<'TTerm, 'TAfterString, 'TUserState> `` [* The two constructors only differ in the type of the mapping they accept. To help F#'s type inference discern both constructors, the second constructor accepts an additional dummy argument.] Please see the documentation for the `Operator` base class for more information. ] [`` = // ... ``] [`` type @OperatorPrecedenceParser@<'TTerm, 'TAfterString, 'TUserState>``] [ The `OperatorPrecedenceParser` class (OPP) represents a dynamically configurable parser for parsing expression grammars involving binary infix (e.g. `1 + 1`), unary prefix (e.g. `-1`), unary postfix (e.g. `1++`) and C-style ternary operators (e.g. `{c++}a ? b : c`). You can configure an OPP instance by adding and removing operator definitions in the form of [** `Operator`] values. If you add an operator that conflicts with a previous operator definition, `AddOperator` will raise an `ArgumentException`. The `Operators` property returns a snapshot of the currently defined set of operators. The `RemoveInfixOperator`, `RemovePrefixOperator`, etc. members remove operator definitions based only on their text representation. All `Remove...` members return `false` if no matching operator was previously defined, otherwise `true`. The actual expression parser of the OPP is exposed through the `[** ExpressionParser]` property. The `ExpressionParser` value is a constant closure that forwards all work to internal instance methods. This ensures that the behaviour of the expression parser always reflects the latest configuration of the OPP instance. You can safely call the `ExpressionParser` concurrently from multiple threads, as long as the configuration of the OPP instance is not changed at the same time. Before you can call the `ExpressionParser` you first need to set the `[** TermParser]`. The OPP instance uses the `TermParser` to parse the terms in between the operators. Often the `TermParser` will not just parse terminal values but will also recursively call the `ExpressionParser`, for example to parse an expression between parentheses. Note that the `TermParser` also needs to consume any trailing whitespace. This example shows how to define a parser for very simple arithmetic expressions: `` open FParsec open FParsec.Primitives open FParsec.CharParsers let ws = spaces let str_ws s = pstring s >>. ws let opp = new OperatorPrecedenceParser() let expr = opp.ExpressionParser let term = (pfloat .>> ws) <|> between (str_ws "(") (str_ws ")") expr opp.TermParser <- term type Assoc = Associativity opp.AddOperator(InfixOperator("+", ws, 1, Assoc.Left, fun x y -> x + y)) opp.AddOperator(InfixOperator("*", ws, 2, Assoc.Left, fun x y -> x * y)) `` ``{fsi} > run expr "1 + 2*(3 + 4)";; val it : ParserResult = Success: 15.0 `` [#precedence-associativity]The following points explain *how expressions are parsed depending on precedence and associativity* of the involved operators: - Operators with higher precedence bind tighter. For example, if the prefix operator "~" has a lower precedence than the infix operator "&" then "~x&y" will be parsed as "~(x&y)". - Ternary operators are treated as special infix operators. The middle expression (e.g. "expr2" in "expr1 ? expr2 : expr3") is parsed as a "fresh" expression that is not influenced by the precedence of the surrounding operators. - Operators with identical precedence are parsed as follows: ``{none} Here o1, o2 are two infix operators, pre1, pre2 are two prefix operators, po1, po2 are two postfix operators and all operators have identical precedence. pre1 x o1 y ==> (pre1 x) o1 y x o1 y po1 ==> x o1 (y po1) x o1 y o2 z ==> (x o1 y) o2 z if o1 and o2 are left-associative x o1 y o2 z ==> x o1 (y o2 z) if o1 and o2 are right-associative pre1 x po1 ==> (pre1 x) po1 if pre1 or po1 is associative pre1 pre2 x ==> pre1 (pre2 x) if pre1 or pre2 is associative x po1 po2 ==> (x po1) po2 if po1 or po2 is associative `` - If the parser encounters #conflicting operators#, e.g. if a right-associative infix operators follows a left-associative operator with the same precedence level, the OPP fails and returns with an error generated with the help of the `OperatorConflictErrorFormatter`. In the following situations the OPP will fail with an operator conflict error: ``{none} [Same notation as above, all operators have identical precedence.] x o1 y o2 z if o1 and o2 have different associativity or o1 and o2 are non-associative pre1 pre2 x if pre1 and pre2 are non-associative pre1 x po1 if pre1 and po1 are non-associative x po1 po2 if po1 and po2 are non-associative `` By giving all operators different precedence levels and making all operators associative, you can exclude any possible operator conflict. A practical reason for defining operators that can lead to conflicts in the inputs (e.g. non-associative operators) is to force the user to explicitely parenthesize an expression involving such operators. [br]Members of `OperatorPrecedenceParser<'TTerm, 'TAfterString, 'TUserState>`: [interface-members] [`` = ``] [`` member @ExpressionParser@: Parser<'TTerm,'TUserState> ``] [ The expression parser. This is a constant closure that forwards all work to internal instance methods, so that the behaviour of the expression parser always reflects the latest configuration of the OPP instance. You can safely call the `ExpressionParser` concurrently from multiple threads, as long as the configuration of the `OperatorPrecedenceParser` instance is not changed at the same time. ] [`` member @TermParser@: Parser<'TTerm,'TUserState> with get, set ``] [ This parser is called to parse the terms in between the operators. There is no default, so you must set this parser before you can call the `ExpressionParser`. Note that the term parser is also expected to parse any whitespace after a term. ] [`` member @AddOperator@: Operator<'TTerm, 'TAfterString, 'TUserState> -> unit ``] [ Adds an operator to the grammar. Raises an `ArgumentException` if the operator definition conflicts with a previous definition. ] [`` member @RemoveOperator@: Operator<'TTerm, 'TAfterString, 'TUserState> -> bool ``] [ Removes the given `Operator` instance from the grammar. Returns `false` if the `Operator` instance was not previously registered, otherwise `true`. ] [`` member @RemoveInfixOperator@: string -> bool ``] [ Removes the `InfixOperator` with the given string from the grammar. Returns `false` if no infix operator with that string was previously registered, otherwise `true`. ] [`` member @RemovePrefixOperator@: string -> bool ``] [ Removes the `PrefixOperator` with the given string from the grammar. Returns `false` if no prefix operator with that string was previously registered, otherwise `true`. ] [`` member @RemovePostfixOperator@: string -> bool ``] [ Removes the `PostfixOperator` with the given string from the grammar. Returns `false` if no postfix operator with that string was previously registered, otherwise `true`. ] [`` member @RemoveTernaryOperator@: string * string -> bool ``] [ Removes the `TernaryOperator` with the given left and right strings from the grammar. Returns `false` if no ternary operator with these strings was previously registered, otherwise `true`. ] [`` member @Operators@: seq> ``] [ Returns a sequence with a snapshot of the operators currently registered with the `OperatorPrecedenceParser`. ] [`` member @OperatorConflictErrorFormatter@: ( Position * Operator<'TTerm, 'TAfterString, 'TUserState> * 'TAfterString -> Position * Operator<'TTerm, 'TAfterString, 'TUserState> * 'TAfterString -> ErrorMessageList) with get, set ``] [ The `OperatorConflictErrorFormatter` function is called by the OPP instance when it encounters @conflicting operators@ in the input. The two passed tuples contain the stream positions, operator definitions and the after-string-parser values for the two conflicting operators. The returned `ErrorMessageList` will become part of the error messages returned by the OPP's `ExpressionParser`. You can set this formatter to customize the error messages generated when the OPP instance encounters conflicting operators in the inputs. Of course, if your operator grammar doesn't allow for conflicting operators in the input, the `OperatorConflictErrorFormatter` will never be called and there's no need to customize it. The user's guide section on [@ parsing F# infix operators] contains an example with a custom `OperatorConflictErrorFormatter`. ] [`` member @MissingTernary2ndStringErrorFormatter@: ( Position * Position * TernaryOperator<'TTerm, 'TAfterString, 'TUserState> * 'TAfterString -> ErrorMessageList) with get, set ``] [ The `MissingTernary2ndStringErrorFormatter` function is called by the OPP instance when it can't parse the second operator string of a C-style ternary operator (e.g. the `{c++}:` in `{c++}a ? b : c`). The passed tuple contains (in order) the position of the first operator string, the position where the the second string was expected, the operator definition and the after-string-parser value for the left operator part. The returned `ErrorMessageList` will become part of the error messages returned by the OPP's `ExpressionParser`. ] [/interface-members] ] [/interface-members] [/section] [/section] [/interface-reference] ================================================ FILE: Doc/src/reference-overview.txt ================================================ [section Parser overview] - @Parsing single chars@ - @Parsing strings directly@ - @Parsing strings with the help of other parsers@ - @Parsing numbers@ - @Parsing whitespace@ - @Chaining and piping parsers@ - @Parsing sequences@ - @Parsing alternatives and recovering from errors@ - @Conditional parsing and looking ahead@ - [^customizing-error-messages-table Customizing error messages] - @User state handling and getting the input stream position@ [table{format="api-table"} Parsing single chars [[Parser] [Description]] [[[** `pchar c`] [br][small (variants: `skipChar`, `charReturn`)]] [Parses the char `c`.]] [[[** `anyChar`] [br][small (variant: `skipAnyChar`)]] [Parses any one char.]] [[[** `satisfy f`] [br][small (variants: `[^satisfy-parsers (skipS|s)atisfy[L]]`)]] [Parses any one char for which the predicate function `f` returns `true`.]] [[[** `anyOf str`] [br][small (variant: `skipAnyOf`)]] [Parses any one char in the string `str`.]] [[[** `noneOf str`] [br][small (variant: `skipNoneOf`)]] [Parses any one char not in the string `str`.]] [[[** `letter`] [br][small (variants: `lower`, `upper`)]] [Parses any one unicode letter char identified by `System.Char.IsLetter`.]] [[[** `asciiLetter`] [br][small (variants: `asciiLower`, `asciiUpper`)]] [Parses any one char in the range `'a'` - `'z'` and `'A'` - `'Z'`.]] [[[** `digit`] [br][small (variants: `hex`, `octal`)]] [Parses any one char in the range `'0'` - `'9'`.]] ] [table{format="api-table"} Parsing strings directly [[Parser] [Description]] [[[** `pstring str`] [br][small (variants: `@skipString@`, `stringReturn`)]] [Parses the string `str`.]] [[[** `pstringCI str`] [br][small (variants: `skipStringCI`, `stringCIReturn`)]] [Parses any string that @case-insensitively@ matches the string `str`.]] [[[** `anyString n`] [br][small (variants: `skipAnyString`)]] [Parses any sequence of `n` chars.]] [[[** `restOfLine skipNewline`] [br][small (variant: `skipRestOfLine`)]] [Parses any chars before the end of the line and, if `skipNewline` is `true`, skips to the beginning of the next line (if there is one).]] [[[** `charsTillString str skipString nMax`] [br][small (variants: `charsTillStringCI`, `[^charsTillString-parsers skipCharsTillString[CI]]`)]] [Parses all chars before the first occurance of the string `str` and, if `skipString` is `true`, skips over `str`. Fails if more than `nMax` chars come before `str`.]] [[[** `manySatisfy f`] [br][small (variant: `skipManySatisfy`)]] [Parses a sequence of *zero* or more chars that satisfy the predicate function `f` (i.e. chars for which `f` returns `true`).]] [[[** `manySatisfy2 f1 f`] [br][small (variant: `skipManySatisfy2`)]] [Parses a sequence of *zero* or more chars, where the first char must satisfy the predicate function `f1` and the remaining chars must satisfy `f`.]] [[[** `many1Satisfy f`] [br][small (variants: `[^many1Satisfy-parsers (skipM|m)any1Satisfy[2][L]]`)]] [Parses a sequence of *one* or more chars that satisfy the predicate function `f`.]] [[[** `manyMinMaxSatisfy nMin nMax f`] [br][small (variants: `[^manyMinMaxSatisfy-parsers (skipM|m)anyMinMaxSatisfy[2][L]]`)]] [Parses a sequence of `nMin` or more chars that satisfy the predicate function `f`, but not more than `nMax` chars.]] [[[** `regex pattern`]] [Parses a sequence of *one* or more chars matched by the .NET regular expression string `pattern`.]] [[[** `identifier options`]] [Parses a Unicode identifier.]] ] [table{format="api-table"} Parsing strings with the help of other parsers [[Parser] [Description]] [[[** `manyChars cp`] [br][small (variants: `[^manyChars-parsers manyChars2]`)]] [Parses a sequence of *zero* or more chars with the char parser `cp`.]] [[[** `many1Chars cp`] [br][small (variants: `[^many1Chars-parsers many1Chars2]`)]] [Parses a sequence of *one* or more chars with the char parser `cp`.]] [[[** `manyCharsTill cp endp`] [br][small (variants: `[^manyCharsTill-parsers manyCharsTill[Apply][2]]`)]] [Parses chars with the char parser `cp` until the parser `endp` succeeds. Stops after `endp`.]] [[[** `manyStrings sp`] [br][small (variant: `[^manyStrings-parsers many[1]Strings[2]]`)]] [Parses a sequence of *zero* or more strings with the parser `sp`. Returns the parsed strings in concatenated form.]] [[[** `stringsSepBy sp sep`]] [Parses a sequence of *zero* or more occurrences of `sp` separated by `sep`. Returns the strings parsed with `sp` *and* `sep` in concatenated form.]] [[[** `skipped p`]] [Applies the parser `p`. Returns the chars skipped over by `p` as a string.]] [[[** `p |> withSkippedString f`]] [Applies the parser `p`. Returns `f str x`, where `str` is the string skipped over by `p` and `x` is the result returned by `p`.]] ] [table{format="api-table"} Parsing numbers [[Parser] [Description]] [[[** `pfloat`]] [Parses a double-precision floating-point number.]] [[[** `pint64`][br][small (variants: `[^pint-parsers pint(8|16|32)]`)]] [Parses a 64-bit signed integer.]] [[[** `puint64`][br][small (variants: `[^puint-parsers puint(8|16|32)]`)]] [Parses a 64-bit unsigned integer.]] [[[** `numberLiteral options label`]] [Parses a number literal and returns the result in form of a `NumberLiteral` value.]] ] [table{format="api-table"} Parsing whitespace [[Parser] [Description]] [[[** `newline`] [br][small (variants: `@skipNewline@`, `newlineReturn`, `unicodeNewline`)]] [Parses a newline (`"\n"`, `"\r\n"` or `"\r"`). Returns `'\n'`.]] [[[** `unicodeNewline`] [br][small (variants: `@skipUnicodeNewline@`, `unicodeNewlineReturn`)]] [Parses a Unicode newline (`"\n"`, `"\r\n"`, `"\r"`, `"\u0085"`, `"\u2028"` or `"\u2029"`). Returns `'\n'`.]] [[[** `spaces`] [br][small (variant: `spaces1`)]] [Skips over any sequence of whitespace chars (`' '`, `'\t'` or a newline).]] [[[** `unicodeSpaces`] [br][small (variant: `unicodeSpaces1`)]] [Skips over any sequence of Unicode whitespace chars and recognizes (`"\n"`, `"\r\n"`, `"\r"`, `"\u0085"`, `"\u2028"` and `"\u2029"`) as newlines.]] [[[** `eof`]] [Only succeeds at the end of the input.]] ] [table{format="api-table"} Chaining and piping parsers [[Parser] [Description]] [[[** `preturn x`]] [Returns `x`.]] [[[** `p >>% x`]] [Applies the parser `p`. Returns `x`.]] [[[** `p |>> f`]] [Applies the parser `p`. Returns `f x`, where `x` is the result returned by `p`.]] [[[** `p1 >>. p2`]] [Applies the parsers `p1` and `p2` in sequence. Returns the result of `p2`.]] [[[** `p1 .>> p2`]] [Applies the parsers `p1` and `p2` in sequence. Returns the result of `p1`.]] [[[** `p1 .>>. p2`]] [Applies the parsers `p1` and `p2` in sequence. Returns the results in a tuple.]] [[[** `between pBegin pEnd p`]] [Applies the parsers `pBegin`, `p` and `pEnd` in sequence. Returns the result of `p`.]] [[[** `pipe2 p1 p2 f`] [br][small (variants: `[^pipe-parsers pipe(3|4|5)]`]] [Applies the parsers `p1` and `p2` in sequence. Returns `f x1 x2`, where `x1` and `x2` are the results returned by `p1` and `p2`.]] [[[** `p >>= f`]] [First applies the parser `p`, then applies the function `f` to the result returned by `p` and finally applies the parser returned by `f`.]] ] [table{format="api-table"} Parsing sequences [[Parser] [PEG] [Description]] [[[** `tuple2 p1 p2`] [br][small (variants: `[^tuple-parsers tuple(3|4|5)]`)]] [`{PEG}p1 p2`] [Applies the parsers `p1` and `p2` in sequence. Returns the results in a tuple.]] [[[** `parray n p`] [br][small (variants: `skipArray`)]] [] [Parses `n` occurrences of `p`. Returns the results in an array.]] [[[** `many p`] [br][small (variant: `skipMany`)]] [`{PEG}p*`] [Parses *zero* or more occurrences of `p`. Returns the results in a list.]] [[[** `many1 p`] [br][small (variant: `skipMany1`)]] [`{PEG}p+`] [Parses *one* or more occurrences of `p`. Returns the results in a list.]] [[[** `sepBy p sep`] [br][small (variants: `sepBy1`, `[^sepBy-parsers skipSepBy[1]]`)]] [`{PEG}(p (sep p)*)?`] [Parses *zero* or more occurrences of `p`, separated by `sep`. Returns the results in a list.]] [[[** `sepEndBy p sep`] [br][small (variants: `sepEndBy1`, `[^sepEndBy-parsers skipSepEndBy[1]]`)]] [`{PEG}(p (sep p)* sep?)?`] [Parses *zero* or more occurrences of `p`, separated and optionally ended by `sep`. Returns the results in a list.]] [[[** `manyTill p endp`] [br][small (variants: `many1Till`, `[^manyTill-parsers skipMany[1]Till]`)]] [`{PEG}(!endp p)* endp`] [Parses *zero* or more occurrences of `p` for as long as `endp` does not succeed. Stops after `endp` succeeded. Returns the results returned by `p` in a list.]] [[[** `chainl1 p op`] [br][small (variants: `[^chain-parsers chain(l|r)[1]]`)]] [`{EBNF}p (op p)*`] [Parses *one* or more occurrences of `p`, separated by `sep`. Returns [small `f_n (... (f_2 (f_1 x_1 x_2) x_3) ...) x_n+1`], where `f_1` to `f_n` are the functions returned by the parser `op` and `x_1` to `x_n+1` are the values returned by `p`.]] ] [table{format="api-table"} Parsing alternatives and recovering from errors [[Parser] [Description]] [[[** `p1 <|> p2`]] [Parses `p1` or `p2`. The parser `p2` is only tried if `p1` fails with a non-fatal error and *without changing the parser state*. The stream position is part of the parser state, so *if `p1` fails after consuming input, `p2` will not be tried*.]] [[[** `choice ps`] [br][small (variant: `choiceL`)]] [Is equivalent to `p1 <|> p2 <|> ... <|> pn <|> pzero`, where `p1` ... `pn` are the parsers in the sequence `ps`.]] [[[** `p <|>% x`]] [Parses `p` or returns `x`. Is equivalent to `p1 <|> preturn x`.]] [[[** `opt p`] [br][small (variant: `optional`)]] [Parses an optional occurrence of `p` as an option value. Is equivalent to `(p |>> Some) <|>% None`]] [[[** `attempt p`]] [ Parses `p`. If `p` fails after changing the parser state, `attempt p` will *backtrack* to the original parser state before reporting a (non-fatal) error. Thus, `attempt p1 <|> p2` will continue to try to parse `p2` even if `p1` fails after consuming input.]] [[[** `p1 >>? p2`] [br][small (variants: `.>>?`, `.>>.?`, `>>=?`)]] [Behaves like `p1 >>. p2`, but will *backtrack* to the beginning if `p2` fails with a non-fatal error and with an unchanged parser state, even if `p1` has changed the parser state.]] ] [table{format="api-table"} Conditional parsing and looking ahead [[Parser] [Description]] [[[** `notEmpty p`]] [Behaves like `p`, but fails when `p` succeeds without consuming input or changing the parser state in any other way.]] [[[** `followedBy p`] [br][small (variant: `notFollowedBy`)]] [Succeeds without changing the parser state if the parser `p` succeeds at the current position.]] [[[** `followedByL p label`] [br][small (variant: `notFollowedByL`)]] [Behaves like `followedBy p`, but uses the string `label` to generate a more descriptive error message in case `p` fails. The string `label` should describe `p`.]] [[[** `notFollowedByEof`]] [Is an optimized version of `notFollowedByL eof "end of input"`.]] [[[** `followedByString str`] [br][small (variants: `[^followedByString-parsers (notF|f)ollowedByString[CI]]`)]] [Is an optimized version of `followedByL (pstring str) ("'" + str + "'")`.]] [[[** `nextCharSatisfies f`] [br][small (variants: `next2CharsSatisfy`, `previousCharSatisfies`)]] [Is an optimized version of `followedBy (satisfy f)`.]] [[[** `nextCharSatisfiesNot f`] [br][small (variants: `next2CharsSatisfyNot`, `previousCharSatisfiesNot`)]] [Is an optimized version of `notFollowedBy (satisfy f)`.]] [[[** `lookAhead p`]] [Parses `p` and restores the original parser state afterwards.]] ] [table#customizing-error-messages-table{format="api-table"} Customizing error messages [[Parser] [Description]] [[[** `p label`]] [Applies the parser `p`. If `p` does not change the parser state (usually because `p` failed), the error messages are replaced with `expectedError label`. The string `label` should describe `p`.]] [[[** `p label`]] [Behaves like `p label`, but when `p` fails after changing the parser state, a `CompoundError` message is generated with both the given `label` and the error messages generated by `p`.]] [[[** `fail msg`]] [Always fails with a `messageError msg`. The string `msg` will be displayed together with other error messages generated for the same input position.]] [[[** `failFatally msg`]] [Always fails with a `messageError msg`. Returns with a `FatalError`, so that no error recovery is attempted (except via backtracking constructs).]] ] [table{format="api-table"} User state handling and getting the input stream position [[Parser] [Description]] [[[** `getUserState`]] [Returns the current user state.]] [[[** `setUserState u`]] [Sets the user state to `u`.]] [[[** `updateUserState f`]] [Sets the user state to `f u`, where `u` is the current user state.]] [[[** `userStateSatisfies f`]] [Succeeds if the current user state satisfies the predicate function `f`.]] [[[** `getPosition`]] [Returns the current position in the input stream.]] ] [/section] ================================================ FILE: Doc/src/reference-position.txt ================================================  [auto-link{hide-outer-auto-links = ["Position", "Index", "Line", "Column"]}] [interface-reference] [section#Position FParsec.Position] [$$interface] [interface-members] [`` // FParsecCS.dll namespace FParsec type Position = new: streamName: string * index: int64 * line: int64 * column: int64 -> Position member [#Position..StreamName StreamName]: string member [#Position..Index Index]: int64 member [#Position..Line Line]: int64 member [#Position..Column Column]: int64 override ToString: unit -> string interface System.IEquatable interface [url "https://msdn.microsoft.com/en-us/library/4d7sx9hd.aspx" System.IComparable] interface System.IComparable ``] [/interface-members] [/section] [/interface-reference] [/auto-link] ================================================ FILE: Doc/src/reference-primitives.txt ================================================ [section#Primitives FParsec.Primitives] [interface-reference] [section Interface] [$$interface] [/section] [section Members] [interface-members] [`` // FParsec.dll [] // module is automatically opened when FParsec namespace is opened module [no-auto-link FParsec.Primitives] ``] [``[] val @Ok@: ReplyStatus = ReplyStatus.Ok ``] [This `ReplyStatus` value indicates that a parser succeeded.] [``[] val @Error@: ReplyStatus = ReplyStatus.Error ``] [This `ReplyStatus` value indicates that a parser failed.] [``[] val @FatalError@: ReplyStatus = ReplyStatus.FatalError ``] [This `ReplyStatus` value indicates that a parser failed and no error recovery (except after backtracking) should be tried.] [`` type @Parser@<'TResult, 'TUserState> = CharStream<'TUserState> -> Reply<'TResult> ``] [The type of the parser functions supported throughout the FParsec library.] [`` // Two basic primitives that are only seldomly directly used in user code: ``] [`` val @preturn@: 'a -> Parser<'a,'u> ``] [The parser `preturn x` always succeeds with the result `x` (without changing the parser state). `preturn x` is defined as `fun stream -> Reply(x)`. ] [``val @pzero@: Parser<'a,'u> ``] [The parser `pzero` always fails with an empty error message list, i.e. an unspecified error. `pzero x` is defined as `fun stream -> Reply(Error, NoErrorMessage)`. ] [`` // Chaining and piping parsers // ============================== ``] [``val (@\ >>=\ @): Parser<'a,'u> -> ('a -> Parser<'b,'u>) -> Parser<'b,'u> ``] [The parser `p >>= f` first applies the parser `p` to the input, then applies the function `f` to the result returned by `p` and finally applies the parser returned by `f` to the input. [note Please see the user's guide chapter @Applying parsers in sequence@ for an in-depth discussion of the behaviour of this and other sequencing combinators. ] The `>>=` combinator is the conceptual foundation for all combinators that consecutively apply multiple parsers to the input. In order to precisely define its behaviour we give an equivalent definition: `` let (>>=) (p: Parser<'a,'u>) (f: 'a -> Parser<'b,'u>) = fun stream -> let reply1 = p stream if reply1.Status = Ok then let p2 = f reply1.Result let stateTag = stream.StateTag let mutable reply2 = p2 stream if stateTag = stream.StateTag then reply2.[^RError Error] <- mergeErrors reply1.[^RError Error] reply2.[^RError Error] reply2 else Reply(reply1.Status, reply1.[^RError Error]) `` ] [`` val (@\ >>%\ @): Parser<'a,'u> -> 'b -> Parser<'b,'u> ``] [ The parser `p >>% x` applies the parser `p` and returns the result `x`. `p >>% x` is an optimized implementation of `p >>= fun _ -> preturn x`. ] [``val (@\ >>.\ @): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'b,'u> ``] [ The parser `p1 >>. p2` applies the parsers `p1` and `p2` in sequence and returns the result of `p2`. `p1 >>. p2` is an optimized implementation of `p1 >>= fun _ -> p2`. ] [``val (@\ .>>\ @): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a,'u> ``] [ The parser `p1 .>> p2` applies the parsers `p1` and `p2` in sequence and returns the result of `p1`. `p1 .>> p2` is an optimized implementation of `p1 >>= fun x -> p2 >>% x`. ] [``val (@\ .>>.\ @): Parser<'a,'u> -> Parser<'b,'u> -> Parser<('a * 'b),'u> ``] [ The parser `p1 .>>. p2` applies the parsers `p1` and `p2` in sequence and returns the results in a tuple. `p1 .>>. p2` is an optimized implementation of `` p1 >>= fun a -> p2 >>= fun b -> preturn (a, b) `` ] [``val @between@: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'c,'u> ``] [ The parser `between popen pclose p` applies the parsers `pOpen`, `p` and `pEnd` in sequence. It returns the result of `p`. `between popen pclose p` is an optimized implementation of `popen >>. p .>> pclose`. ] [`` val (@\ |>>\ @): Parser<'a,'u> -> ('a -> 'b) -> Parser<'b,'u> ``] [ The parser `p |>> f` applies the parser `p` and returns the result of the function application `f x`, where `x` is the result returned by `p`. `p |>> f` is an optimized implementation of `p >>= fun x -> preturn (f x)`. ] [`` [#pipe-parsers] val @pipe2@: Parser<'a,'u> -> Parser<'b,'u> -> ('a -> 'b -> 'c) -> Parser<'c,'u> ``] [ The parser `pipe2 p1 p2 f` applies the parsers `p1` and `p2` in sequence. It returns the result of the function application `f a b`, where `a` and `b` are the results returned by `p1` and `p2`. `pipe2 p1 p2 f` is an optimized implementation of `` p1 >>= fun a -> p2 >>= fun b -> preturn (f a b) `` ] [`` val @pipe3@: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> ('a -> 'b -> 'c -> 'd) -> Parser<'d,'u> ``] [ The parser `pipe3 p1 p2 p3 f` applies the parsers `p1`, `p2` and `p3` in sequence. It returns the result of the function application `f a b c`, where `a`, `b` and `c` are the results returned by `p1`, `p2` and `p3`. `pipe3 p1 p2 p3 f` is an optimized implementation of `` p1 >>= fun a -> p2 >>= fun b -> p3 >>= fun c -> preturn (f a b c) `` ] [`` val @pipe4@: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u> -> ('a -> 'b -> 'c -> 'd -> 'e) -> Parser<'e,'u> ``] [ The parser `pipe4 p1 p2 p3 p4 f` applies the parsers `p1`, `p2`, `p3` and `p4` in sequence. It returns the result of the function application `f a b c d`, where `a`, `b`, `c` and `d` are the results returned by `p1`, `p2`, `p3` and `p4`. `pipe4 p1 p2 p3 p4 f` is an optimized implementation of `` p1 >>= fun a -> p2 >>= fun b -> p3 >>= fun c -> p4 >>= fun d -> preturn (f a b c d) `` ] [`` val @pipe5@: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u> -> Parser<'e,'u> -> ('a -> 'b -> 'c -> 'd -> 'e -> 'f) -> Parser<'f,'u> ``] [ The parser `pipe5 p1 p2 p3 p4 p5 f` applies the parsers `p1`, `p2`, `p3`, `p4` and `p5` in sequence. It returns the result of the function application `f a b c d e`, where `a`, `b`, `c`, `d` and `e` are the results returned by `p1`, `p2`, `p3`, `p4` and `p5`. `pipe5 p1 p2 p3 p4 p5 f` is an optimized implementation of `` p1 >>= fun a -> p2 >>= fun b -> p3 >>= fun c -> p4 >>= fun d -> p5 >>= fun e -> preturn (f a b c d e) `` ] [`` // Parsing alternatives and recovering from errors // =============================================== ``] [``val (@\ <|>\ @): Parser<'a,'u> -> Parser<'a,'u> -> Parser<'a,'u> ``] [The parser `p1 <|> p2` first applies the parser `p1`. If `p1` succeeds, the result of `p1` is returned. If `p1` fails with a non-fatal error and *without changing the parser state*, the parser `p2` is applied. Note: The stream position is part of the parser state, so if `p1` fails after consuming input, `p2` will not be applied. The `choice` combinator is a generalization of `<|>` to more than two parsers. [note Please see the user's guide chapter on @Parsing alternatives@ for an in-depth discussion of the behaviour of this combinator. ] ] [``val @choice@: seq> -> Parser<'a,'u> ``] [The parser `choice ps` is an optimized implementation of `p1 <|> p2 <|> ... <|> pn `, where `p1` ... `pn` are the parsers in the sequence `ps`. `choice [p]` is equivalent to `p`.[br] `choice []` is equivalent to `pzero`. ] [``val @choiceL@: seq> -> string -> Parser<'a,'u> ``] [ The parser `choiceL ps label` is an optimized implementation of `choice ps label`. `choiceL` is slightly faster than `choice`, because it doesn't have to aggregate error messages. ] [`` val (@\ <|>%\ @): Parser<'a,'u> -> 'a -> Parser<'a,'u> ``] [ The parser `p <|>% x` is an optimized implementation of `p <|> preturn x`. ] [``val @opt@: Parser<'a,'u> -> Parser<'a option,'u> ``] [ The parser `opt p` parses an optional occurrence of `p` as an option value. `opt p` is an optimized implementation of `(p |>> Some) <|>% None`. ] [``val @optional@: Parser<'a,'u> -> Parser ``] [ The parser `optional p` skips over an optional occurrence of `p`. `optional p` is an optimized implementation of `(p >>% ()) <|>% ()`. ] [`` val @attempt@: Parser<'a,'u> -> Parser<'a,'u> ``] [ The parser `attempt p` applies the parser `p`. If `p` fails after changing the parser state or with a fatal error, `attempt p` will backtrack to the original parser state and report a non-fatal error. ] [`` val (@\ >>=?\ @): Parser<'a,'u> -> ('a -> Parser<'b,'u>) -> Parser<'b,'u> ``] [ The parser `p >>=? f` behaves like `p >>= f`, except that it will backtrack to the beginning if the parser returned by `f` fails with a non-fatal error and without changing the parser state, even if `p1` has changed the parser state. ] [``val (@\ >>?\ @): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'b,'u> ``] [ The parser `p1 >>? p2` behaves like `p1 >>. p2`, except that it will backtrack to the beginning if `p2` fails with a non-fatal error and without changing the parser state, even if `p1` has changed the parser state. ] [``val (@\ .>>?\ @): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a,'u> ``] [ The parser `p1 .>>? p2` behaves like `p1 .>> p2`, except that it will backtrack to the beginning if `p2` fails with a non-fatal error and without changing the parser state, even if `p1` has changed the parser state. ] [``val (@\ .>>.?\ @): Parser<'a,'u> -> Parser<'b,'u> -> Parser<('a * 'b),'u> ``] [ The parser `p1 .>>.? p2` behaves like `p1 .>>. p2`, except that it will backtrack to the beginning if `p2` fails with a non-fatal error and without changing the parser state, even if `p1` has changed the parser state. ] [`` // Conditional parsing and looking ahead // ===================================== ``] [``val @notEmpty@: Parser<'a,'u> -> Parser<'a,'u> ``] [The parser `notEmpty p` behaves like `p`, except that it fails when `p` succeeds without consuming input or changing the parser state in any other way. `notEmpty` is useful for forcing sequence parsers to consume input. For example, `notEmpty (manySatisfy f)` behaves like `many1Satisfy f`. ] [`` val @followedBy@: Parser<'a,'u> -> Parser ``] [The parser `followedBy p` succeeds if the parser `p` succeeds at the current position. Otherwise it fails with a non-fatal error. This parser never changes the parser state. If the parser `followedBy p` fails, it returns no descriptive error message. Hence it should only be used together with other parsers that take care of a potential error. Alternatively, `followedByL p label` can be used to ensure a more descriptive error message.] [``val @followedByL@: Parser<'a,'u> -> string -> Parser ``] [The parser `followedByL p` behaves like `followedBy p`, except that it returns an `Expected label` error message when the parser `p` fails.] [``val @notFollowedBy@: Parser<'a,'u> -> Parser ``] [The parser `notFollowedBy p` succeeds if the parser `p` fails to parse at the current position. Otherwise it fails with a non-fatal error. This parser never changes the parser state. If the parser `notFollowedBy p` fails, it returns no descriptive error message. Hence it should only be used together with other parsers that take care of a potential error. Alternatively, `notFollowedByL p label` can be used to ensure a more descriptive error message.] [``val @notFollowedByL@: Parser<'a,'u> -> string -> Parser ``] [The parser `notFollowedByL p` behaves like `notFollowedBy p`, except that it returns an `Unexpected label` error message when the parser `p` fails.] [`` val @lookAhead@: Parser<'a,'u> -> Parser<'a,'u> ``] [ The parser `lookAhead p` parses `p` and restores the original parser state afterwards. If `p` fails after changing the parser state, the error messages are wrapped in a `NestedError`. If it succeeds, any error messages are discarded. Fatal errors are turned into normal errors. ] [`` // Customizing error messages // ========================== ``] [`` val (@\ \ @): Parser<'a,'u> -> string -> Parser<'a,'u> ``] [ The parser `p label` applies the parser `p`. If `p` does not change the parser state (usually because `p` failed), the error messages are replaced with `expected label`. Please also see the user's guide chapter on @customizing error messages@. ] [``val (@\ \ @): Parser<'a,'u> -> string -> Parser<'a,'u> ``] [The parser `p label` behaves like `p label`, except that when `p` fails after changing the parser state (for example, because `p` consumes input before it fails), a `CompoundError` message is generated with both the given string `label` and the error messages generated by `p`. Please also see the user's guide chapter on @customizing error messages@. ] [`` val @fail@: string -> Parser<'a,'u> ``] [ The parser `fail msg` always fails with a `messageError msg`. The string `msg` will be displayed together with other error messages generated for the same input position. `fail msg` is equivalent to `fun stream -> Reply(Error, messageError msg)`. ] [``val @failFatally@: string -> Parser<'a,'u> ``] [ The parser `failFatally msg` always fails with a `messageError msg`. It returns with a `FatalError`, so that no error recovery is attempted (except via backtracking constructs). `failFatally msg` is equivalent to `fun stream -> Reply(FatalError, messageError msg)`. ] [`` // Parsing sequences // ================= ``] [`` [#tuple-parsers]val @tuple2@: Parser<'a,'u> -> Parser<'b,'u> -> Parser<('a * 'b),'u> ``] [ The parser `tuple2 p1 p2` applies the parsers `p1` and `p2` in sequence and returns the results in a tuple. `tuple2 p1 p2` is defined as `p1 .>>. p2` and is equivalent to `pipe2 p1 p2 (fun a b -> (a, b))`. ] [`` val @tuple3@: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<('a * 'b * 'c),'u> ``] [ The parser `tuple3 p1 p2 p3` applies the parsers `p1`, `p2` and `p3` in sequence and returns the results in a tuple. `tuple3 p1 p2 p3` is equivalent to `pipe3 p1 p2 p3 (fun a b c -> (a, b, c))`. ] [`` val @tuple4@: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u> -> Parser<('a * 'b * 'c * 'd),'u> ``] [ The parser `tuple4 p1 p2 p3 p4` applies the parsers `p1`, `p2`, `p3` and `p4` in sequence and returns the results in a tuple. `tuple4 p1 p2 p3 p4` is equivalent to `pipe4 p1 p2 p3 p4 (fun a b c d -> (a, b, c, d))`. ] [`` val @tuple5@: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u> -> Parser<'e,'u> -> Parser<('a * 'b * 'c * 'd * 'e),'u> ``] [ The parser `tuple5 p1 p2 p3 p4 p5` applies the parsers `p1`, `p2`, `p3`, `p4` and `p5` in sequence and returns the results in a tuple. `tuple5 p1 p2 p3 p4 p5` is equivalent to `pipe5 p1 p2 p3 p4 p5 (fun a b c d e -> (a, b, c, d, e))`. ] [`` val @parray@: int -> Parser<'a,'u> -> Parser<'a[],'u> ``] [The parser `parray n p` parses `n` occurrences of `p` and returns the results in an array. For example, `parray 3 p` is equivalent to `pipe3 p p p (fun a b c -> [|a;b;c|])`.] [``val @skipArray@: int -> Parser<'a,'u> -> Parser ``] [The parser `skipArray n p` is an optimized implementation of `parray n p |>> ignore`.] [`` [#many-parsers] val @many@: Parser<'a,'u> -> Parser<'a list,'u> ``] [The parser `many p` repeatedly applies the parser `p` until `p` fails. It returns a list of the results returned by `p`. At the end of the sequence `p` must fail without changing the parser state and without signalling a `FatalError`, otherwise `many p` will fail with the error reported by `p`. `many p` tries to guard against an infinite loop by raising an exception if `p` succeeds without changing the parser state. Ignoring efficiency issues, stack space usage and the infinite recursion case, `many` could be defined as follows: `` let rec many p = (p >>= fun hd -> many p |>> fun tl -> hd::tl) <|>% [] `` ] [`` val @many1@: Parser<'a,'u> -> Parser<'a list,'u> ``] [ The parser `many1 p` behaves like `many p`, except that it requires `p` to succeed at least one time. `many1 p` is equivalent to `pipe2 p (many p) (fun hd tl -> hd::tl)`. ] [`` val @skipMany@: Parser<'a,'u> -> Parser ``] [ The parser `skipMany p` is an optimized implementation of `many p |>> ignore`. ] [`` val @skipMany1@: Parser<'a,'u> -> Parser ``] [ The parser `skipMany1 p` is an optimized implementation of `many1 p |>> ignore`. ] [`` [#sepBy-parsers] val @sepBy@: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u> ``] [ The parser `sepBy p sep` parses *zero* or more occurrences of `p` separated by `sep` (in EBNF: `{EBNF}(p (sep p)*)?`). It returns a list of the results returned by `p`. `sepBy p sep` is almost equivalent to `pipe2 p (many (sep >>. p)) (fun hd tl -> hd::tl) <|>% []`, except with regard to a case rarely encountered in practice: If `sep` succeeds without changing the parser state and `p` then fails without changing the state, then `sepBy p sep` fails too, while the parser given by the almost equivalent definition would succeed. ] [`` val @sepBy1@: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u> ``] [ The parser `sepBy1 p sep` parses *one* or more occurrences of `p` separated by `sep` (in EBNF: `{EBNF}p (sep p)*`). The parser `sepBy1 p` behaves like `sepBy p`, except that it requires `p` to succeed at least one time. Hence, if `sepBy1` succeeds, the returned list always contains at least one value. ] [`` val @skipSepBy@: Parser<'a,'u> -> Parser<'b,'u> -> Parser ``] [ The parser `skipSepBy p sep` is an optimized implementation of `sepBy p sep |>> ignore`. ] [`` val @skipSepBy1@: Parser<'a,'u> -> Parser<'b,'u> -> Parser ``] [ The parser `skipSepBy1 p sep` is an optimized implementation of `sepBy1 p sep |>> ignore`. ] [`` [#sepEndBy-parsers] val @sepEndBy@: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u> ``] [ The parser `sepEndBy p sep` parses *zero* or more occurrences of `p` separated and optionally ended by `sep` (in EBNF: `{EBNF}(p (sep p)* sep?)?`). It returns a list of the results returned by `p`. `sepEndBy p sep` tries to guard against an infinite loop by raising an exception if `p` and `sep` succeed without changing the parser state. Ignoring efficiency issues, stack space usage and the infinite recursion case, `sepEndBy` could be defined as follows: `` let rec sepEndBy p sep = (p >>= fun hd -> sep >>. sepEndBy p sep <|>% [] |>> fun tl -> hd::tl) <|>% [] `` ] [`` val @sepEndBy1@: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u> ``] [ The parser `sepEndBy1 p sep` parses *one* or more occurrences of `p` separated and optionally ended by `sep` (in EBNF: `{EBNF}p (sep p)* sep?`). It returns a list of the results returned by `p`. The parser `sepEndBy1 p` behaves like `sepEndBy p`, except that it requires `p` to succeed at least one time. Hence, if `sepEndBy1` succeeds, the returned list always contains at least one value. ] [`` val @skipSepEndBy@: Parser<'a,'u> -> Parser<'b,'u> -> Parser ``] [ The parser `skipSepEndBy p sep` is an optimized implementation of `sepEndBy p sep |>> ignore`. ] [`` val @skipSepEndBy1@: Parser<'a,'u> -> Parser<'b,'u> -> Parser ``] [ The parser `skipSepEndBy1 p sep` is an optimized implementation of `sepEndBy1 p sep |>> ignore`. ] [`` [#manyTill-parsers] val @manyTill@: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u> ``] [ The parser `manyTill p endp` repeatedly applies the parser `p` for as long as `endp` fails (without changing the parser state). It returns a list of the results returned by `p`. `manyTill p endp` is an optimized variant of `many (notFollowedBy endp >>. p) .>> endp` that doesn't have to apply `endp` twice at the end of the sequence and that fails with the error reported by `endp` if `endp` fails after changing the parser state. ] [`` val @many1Till@: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u> ``] [ The parser `many1Till p endp` behaves like `manyTill p endp`, except that it requires `p` to succeed at least one time. `many1Till p endp` is an optimized implementation of `pipe2 p (manyTill p endp) (fun hd tl -> hd::tl)`. ] [`` val @skipManyTill@: Parser<'a,'u> -> Parser<'b,'u> -> Parser ``] [ The parser `skipManyTill p endp` is an optimized implementation of `manyTill p endp |>> ignore`. ] [`` val @skipMany1Till@: Parser<'a,'u> -> Parser<'b,'u> -> Parser ``] [ The parser `skipMany1Till p endp` is an optimized implementation of `many1Till p endp |>> ignore`. ] [`` [] type @Inline@ = ``] [ `Inline` is a static class that contains the following inline helper methods for defining optimized sequence parsers: - `Inline.Many` - `Inline.SepBy` - `Inline.ManyTill` ] [`` static member inline @Many@: stateFromFirstElement: ('T -> 'State) * foldState: ('State -> 'T -> 'State) * resultFromState: ('State -> 'Result) * elementParser: Parser<'T,'U> * ?firstElementParser: Parser<'T,'U> * ?resultForEmptySequence: (unit -> 'Result) -> Parser<'Result,'U> ``] [#Inline..Many{no-auto-link = ["Inline", "Inline.Many"]} `Inline.Many` is an inline helper method for defining optimized sequence parsers. `Inline.Many(stateFromFirstElement, foldState, resultFromState, elementParser)` expands to an optimized implementation of `` many1 elementParser // requires at least 1 element |>> function hd::tl -> resultFromState (List.fold foldState (stateFromFirstElement hd) tl) `` [note The `'State` argument to the `foldState` function is completely independent of FParsec's usual parser state. The term "accumulator" would be a more accurate name for the argument, but that is just too unwieldy to use in the method signature.] If you pass a value for the optional argument [** `resultForEmptySequence`], the parser expands to an optimized implementation of `` many elementParser // accepts empty sequence |>> function | [] -> resultForEmptySequence() | hd::tl -> resultFromState (List.fold foldState (stateFromFirstElement hd) tl) `` If you pass a value for the optional argument [** `firstElementParser`], the first element of the sequence will be parsed with `firstElementParser` instead of `elementParser`. The following example shows how you can use `Inline.Many` to define an optimized parser that behaves like `many1 p |>> List.reduce f` but avoids the temporary allocation of a list: `` let many1Reduce p f = Inline.Many(elementParser = p, stateFromFirstElement = (fun x0 -> x0), foldState = (fun acc y -> f acc y), resultFromState = (fun acc -> acc)) `` A simple test run: ``{fsi} > run (many1Reduce (pint32 .>> spaces) (+)) "1 2 3";; val it : ParserResult = Success: 6 `` The following example shows how you can use `Inline.Many` to create an optimized sequence parser that returns an array instead of a list: `` let manyA2 p1 p = Inline.Many(firstElementParser = p1, elementParser = p, stateFromFirstElement = (fun x0 -> let ra = ResizeArray<_>() ra.Add(x0) ra), foldState = (fun ra x -> ra.Add(x); ra), resultFromState = (fun ra -> ra.ToArray()), resultForEmptySequence = (fun () -> [||])) let manyA p = manyA2 p p `` Two simple test runs: ``{fsi} > run (manyA (pint32 .>> spaces)) "";; val it : ParserResult = Success: [||] > run (manyA (pint32 .>> spaces)) "1 2 3";; val it : ParserResult = Success: [|1; 2; 3|] `` ] [`` static member inline @SepBy@: stateFromFirstElement: ('T -> 'State) * foldState: ('State -> 'Separator -> 'T -> 'State) * resultFromState: ('State -> 'Result) * elementParser: Parser<'T,'U> * separatorParser: Parser<'Separator,'U> * ?firstElementParser: Parser<'T,'U> * ?resultForEmptySequence: (unit -> 'Result) * ?separatorMayEndSequence: bool -> Parser<'Result,'U> ``] [#Inline..SepBy{no-auto-link = ["Inline", "Inline.SepBy"]} `Inline.SepBy` is an inline helper method for defining optimized sequence parsers. By default, parsers defined with `Inline.SepBy` parse sequences of the form (in EBNF): `{EBNF}element (separator element)*` `Inline.SepBy(stateFromFirstElement, foldState, resultFromState, elementParser,` `separatorParser)` expands to an optimized implementation of `` pipe2 elementParser (many (separatorParser .>>. elementParser)) (fun elem0 sepsAndElems -> sepsAndElems |> List.fold (fun acc (sep, e) -> foldState acc sep e) (stateFromFirstElement elem0) |> resultFromState) `` [small For most practical purposes the behaviour of the expanded `Inline.SepBy` parser and the above definition based on `many` can be considered equivalent, but there is a fringe case where the behaviour differs: If `separatorParser` succeeds without changing the parser state and `elementParser` then fails without changing the parser state, then the `Inline.SepBy` parser fails too, while the parser given by the definition based on `many` would succeed.] [note The `'State` argument to the `foldState` function is completely independent of FParsec's usual parser state. The term "accumulator" would be a more accurate name for the argument, but that is just too unwieldy to use in the method signature.] If you pass `true` as the value for the optional argument `separatorMayEndSequence`, a separator may also end the sequence, i.e. the parser will accept sequences of the following form (in EBNF): ``{EBNF}element (separator element)* separator?`` Note that `foldState` is not called with the value of an ending separator. If you pass a value for the optional argument `resultForEmptySequence`, the parser returned by `Inline.SepBy` will call `resultForEmptySequence` to create the parser result when it encounters an empty sequence. If you don't pass a `resultForEmptySequence` function, the parser will fail for an empty sequence. If you pass a value for the optional argument `firstElementParser`, the first element of a sequence will be parsed with `firstElementParser` instead of `elementParser`. The following example shows how you can use `Inline.SepBy` to define an optimized parser that behaves like `sepBy1 p sep |>> List.reduce f` but avoids the temporary allocation of a list: `` let sepBy1Reduce p sep f = Inline.SepBy(elementParser = p, separatorParser = sep, stateFromFirstElement = (fun x0 -> x0), foldState = (fun acc _ y -> f acc y), resultFromState = (fun acc -> acc)) `` A simple test run: ``{fsi} > run (sepBy1Reduce pint32 (pstring "," >>. spaces) (+)) "1, 2, 3";; val it : ParserResult = Success: 6 `` The following example shows how one could define `CharParsers.stringsSepBy` using `Inline.SepBy`: `` let stringsSepBy p sep = Inline.SepBy(elementParser = p, separatorParser = sep, stateFromFirstElement = (fun str -> let sb = System.Text.StringBuilder() sb.Append(str : string)), // sb.Append returns sb foldState = (fun sb sep str -> sb.Append(sep : string) .Append(str : string)), resultFromState = (fun sb -> sb.ToString())) let testParser : Parser = stringsSepBy (manySatisfy isLetter) (pstring @"\\" >>% @"\") `` ``{fsi} > run testParser "";; val it : ParserResult = Success: "" > run testParser @"abc\\def\\\\";; val it : ParserResult = Success: "abc\def\\" `` ] [`` static member inline @ManyTill@: stateFromFirstElement: ('T -> 'State) * foldState: ('State -> 'T -> 'State) * resultFromStateAndEnd: ('State -> 'E -> 'Result) * elementParser: Parser<'T,'U> * endParser: Parser<'E,'U> * ?firstElementParser: Parser<'T,'U> * ?resultForEmptySequence: ('E -> 'Result) -> Parser<'Result,'U> ``] [#Inline..ManyTill{no-auto-link = ["Inline", "Inline.ManyTill"]} `Inline.ManyTill` is an inline helper method for defining optimized sequence parsers. `Inline.ManyTill(stateFromFirstElement, foldState, resultFromState, elementParser,` `endParser)` expands to an optimized implementation of `` many1Till elementParser endParser // requires at least 1 element |>> function hd::tl -> resultFromState (List.fold foldState (stateFromFirstElement hd) tl) `` [note The `'State` argument to the `foldState` function is completely independent of FParsec's usual parser state. The term "accumulator" would be a more accurate name for the argument, but that is just too unwieldy to use in the method signature.] If you pass a value for the optional argument [** `resultForEmptySequence`], the parser expands to an optimized implementation of `` manyTill elementParser endParser // accepts empty sequence |>> function | [] -> resultForEmptySequence() | hd::tl -> resultFromState (List.fold foldState (stateFromFirstElement hd) tl) `` If you pass a value for the optional argument [** `firstElementParser`], the first element of the sequence will be parsed with `firstElementParser` instead of `elementParser`. The following example shows how one could define `CharParsers.manyCharsTill2` using `Inline.ManyTill`: `` let myManyCharsTillApply2 cp1 cp endp f = Inline.ManyTill(firstElementParser = cp1, elementParser = cp, endParser = endp, stateFromFirstElement = (fun c -> let sb = System.Text.StringBuilder() sb.Append(c : char)), // sb.Append returns sb foldState = (fun sb c -> sb.Append(c : char)), resultFromStateAndEnd = (fun sb e -> f (sb.ToString()) e), resultForEmptySequence = (fun e -> f "" e)) let myManyCharsTillApply cp endp f = myManyCharsTillApply2 cp cp endp f let myRestOfLine : Parser = myManyCharsTillApply anyChar ((newline >>% "\\n") <|> (eof >>% "")) (fun str nl -> str + nl) `` ``{fsi} > run myRestOfLine "";; val it : ParserResult = Success: "" > run myRestOfLine "abc\r\ndef";; val it : ParserResult = Success: "abc\n" `` ] [`` [#chain-parsers] val @chainl1@: Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u> -> Parser<'a,'u> ``] [The parser `chainl1 p op` parses one or more occurrences of `p` separated by `op` (in EBNF: `p (op p)*`). It returns the value obtained by *left* associative application of all functions returned by `op` to the results returned by `p`, i.e. `f_n (...(f_2 (f_1 x_1 x_2) x_3) ...) x_n+1`, where `f_1` to `f_n` are the functions returned by theparser `op` and `x_1` to `x_n+1` are the values returned by `p`. If only a single occurance of `p` and no occurance of `op` is parsed, the result of `p` is returned directly. The `chainl1` implementation uses constant stack space. ] [`` val @chainl@: Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u> -> 'a -> Parser<'a,'u> ``] [The parser `chainl p op defVal` is equivalent to `chainl1 p op <|>% defVal`.] [`` val @chainr1@: Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u> -> Parser<'a,'u> ``] [The parser `chainr1 p op` parses one or more occurrences of `p` separated by `op` (in EBNF: `p (op p)*`). It returns the value obtained by *right* associative application of all functions returned by `op` to the results returned by `p`, i.e. `f1 x_1 (f_2 x_2 (... (f_n x_n x_n+1) ...))`, where `f_1` to `f_n` are the functions returned by the parser `op` and `x_1` to `x_n+1` are the values returned by `p`. If only a single occurance of `p` and no occurance of `op` is parsed, the result of `p` is returned directly. The `chainr1` implementation uses constant stack space. ] [`` val @chainr@: Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u> -> 'a -> Parser<'a,'u> ``] [The parser `chainr p op defVal` is equivalent to `chainr1 p op <|>% defVal`.] [`` // Building parsers using F#'s computation expression syntax type @ParserCombinator@ = // ... ``] [ This class is defined as `` [] type ParserCombinator() = member t.Delay(f) = fun state -> (f ()) state member t.Return(x) = preturn x member t.Bind(p, f) = p >>= f member t.Zero() = pzero member t.ReturnFrom(p) = p member t.TryWith(p, cf) = fun state -> try p state with e -> (cf e) state member t.TryFinally(p, ff) = fun state -> try p state finally ff () `` Instances of this class can be used to build parsers using F#'s @computation expression@ syntax. The default instance for this purpose is `parse`. Please see the user's guide chapter "@Where is the monad?@" for an introduction to the `parse {...}` syntax. Some constructs supported by `parse` and their translations are `` let! pat = expr in pexpr ==> expr >>= (fun pat -> pexpr) let pat = expr in pexpr ==> let pat = expr in pexpr do! expr in pexpr ==> expr >>= (fun () -> pexpr) do expr in pexpr ==> expr; pexpr if expr then pexpr1 ==> if expr then pexpr1 else pexpr2 else pexpr2 if expr then pexpr ==> if expr then pexpr1 else pzero return exp ==> preturn rexpr return! expr ==> expr `` where `expr` is any F# expression and `pexpr` is an expression of type `Parser<_,_>`. You need to use the `!`-constructs whenever you have a right hand side expression that evaluates to a parser. ] [`` val @parse@: ParserCombinator ``] [ A builder object of type `ParserCombinator` for building parsers using F#'s @computation expression@ syntax. ] [`` // Building mutually recursive parser values ``] [`` val @createParserForwardedToRef@: unit -> Parser<'a,'u> * Parser<'a,'u> ref ``] [ `let p, pRef = createParserForwardedToRef()` creates a parser `p` that forwards all calls to the parser in the reference cell `pRef`. Initially, `pRef` holds a reference to a dummy parser that raises an exception on any invocation. The JSON parser example in the tutorial [^createParserForwardedToRef-example shows] how you can use `createParserForwardedToRef` to define a parser for a recursive grammar. ] [/interface-members] [/section] [/interface-reference] [/section] ================================================ FILE: Doc/src/reference-reply.txt ================================================  [section#Reply FParsec.Reply] Represents the return value of a `Parser` function. [interface-reference] [section Interface] [$$interface] [/section] [section Remarks] The `Reply` type is the return type of `Parser` functions. Similar to a tuple, a `Reply` value can be viewed as a simple aggregate of its three fields `Status`, `Result` and `@Error@`. The value of the `Status` field indicates whether the parser returning the reply succeeded (`ReplyStatus.Ok`) or failed (`ReplyStatus.Error` or `ReplyStatus.FatalError`). If the value of the `Status` field is `Ok`, the `Result` field contains a parser result value; otherwise, its value is undefined. The `Equals` override ignores the `Result` value when it compares two `Reply` values with `Status <> Ok`. [note The `Reply` fields are mutable because that allows us to implement library primitives with more compact code, for which the .NET JIT produces faster machine code. Of course, if you object to mutable structs [url "https://stackoverflow.com/questions/441309/why-are-mutable-structs-evil" on religious grounds] or if you're not familiar with the somewhat subtle behaviour of mutable structs in certain sitations, you can always treat the `Reply` type as if it was immutable. ] [/section] [section Members] [interface-members] [`` // FParsecCS.dll namespace FParsec type #ReplyStatus# = [#ReplyStatus..Ok Ok] = 1 | [#ReplyStatus..Error Error] = 0 | [#ReplyStatus..FatalError FatalError] = -1 type Reply<'TResult> = struct ``] [`` @new@: 'TResult -> Reply<'TResult> ``] [#new-1 Constructs a `Reply` instance with the `Status` field set to `Ok`, the `Result` field set to the argument value and the `@Error@` field set to `null`. ] [`` @new@: ReplyStatus * ErrorMessageList -> Reply<'TResult> ``] [#new-2 Constructs a `Reply` instance with the `Status` and `@Error@` fields set to the respective argument values and the `Result` field initialized to `Unchecked.defaultof<'TResult>`. This constructor is usually used for constructing an error reply, like in `Reply(Error, expected "something")`. ] [`` @new@: ReplyStatus * 'TResult * ErrorMessageList -> Reply<'TResult> ``] [#new-3 Constructs a `Reply` instance with the `Status`, `Result` and `@Error@` fields set to the respective argument values. ] [`` val mutable @Status@: ReplyStatus ``] [ The `Status` field contains a `ReplyStatus` enum value indicating whether a parser succeeded (`Ok`) or failed (`Error` or `FatalError`). By returning a `FatalError` instead of an `Error` a parser can signal that no error recovery should be tried (except through backtracking mechanisms). ] [`` /// If Status <> Ok then the Result value is undefined and may be null. ``] [`` val mutable @Result@: 'TResult ``] [ If the value of the `Status` field is `Ok`, the `Result` field contains a parser result value; otherwise, its value is undefined and may be equal to `Unchecked.defaultof<'TResult>`. (The result value in a `Reply` returned by an unsuccessful parser is generally an implementation detail of the parser that you should not depend on.) ] [`` val mutable @Error@: ErrorMessageList ``] [ The `Error` field holds a list of error messages in the form of an `ErrorMessageList` value. An empty `ErrorMessageList` is represented as a `null` value. The error messages returned by a parser in a `Reply` value implicitly refer to the state of the `CharStream` as it is when the parser returns. Since the `ErrorMessage` values stored in the `ErrorMessageList` do not themselves contain an error position, they can only be interpreted together with the position of the `CharStream` as it is when the parser returns. ] [`` override Equals: obj -> bool override GetHashCode: unit -> int interface System.IEquatable> end ``] [/interface-members] [/section] [/interface-reference] [/section] ================================================ FILE: Doc/src/reference-staticmapping.txt ================================================  [section#StaticMapping FParsec.StaticMapping] This module defines functions for creating optimized mapping functions between keys and values. This module is not available in the @Low-Trust version@ of FParsec. [interface-reference] [section Interface] [$$interface] [/section] [section Remarks] The functions in the `StaticMapping` module use runtime code generation via `System.Reflection.Emit` to create optimized mapping functions between keys and values. [note Runtime code generation is relatively expensive, so the functions in this module should only be used for optimizing static mappings that are potentially called a (very) large number of times.] [note The code generated for the optimized mapping functions will occupy memory until the associated AppDomain is unloaded. ] The performance of the generated functions depends a lot on the individual key-value mapping and the application-specific call pattern. Ignoring the overhead of the function call, the generated mapping functions should generally be as fast as an equivalent statically compiled switch-statement in C# or F#. In certain cases they will even be faster. The code size of the generated functions increases about linearly with the number of key ranges (i.e. continuous sequences of keys with the same value). Hence, you should only use the `StaticMapping` module for small mappings. If you try to turn arbitrarily large key-value mappings into static mapping functions, you'll likely hit upon certain implementation limitations (of this module's code, of `Reflection.Emit` or of the CLR's JIT). [#DEBUG_STATIC_MAPPING] If the conditional compilation symbol `DEBUG_STATIC_MAPPING` is defined when compiling FParsec, the generated mapping functions will compute each result with two different methods and check the results against each other. Of course, this means that they will take more than twice the time than without the `DEBUG_STATIC_MAPPING` symbol and will also consume more memory. In Debug builds of FParsec `DEBUG_STATIC_MAPPING` is switched on by default, since the `StaticMapping` module is still relatively new. [note Measuring and comparing the performance of the generated mapping functions only makes sense in Release builds.] [/section] [section Members] [interface-members] [`` // FParsec.dll namespace FParsec ``] [`` type @Range@``] [ Represents an immutable range between the integer values `Min` and `Max` (inclusive). `` type Range = struct new: min: int * max: int -> Range val Min: int val Max: int end `` The `Min` value must not be larger than the `Max` value. In a Debug build this condition is checked by an assert-check in the `Range` constructor. ] [`` = struct new: min: int * max: int -> Range val Min: int val Max: int end module [no-auto-link StaticMapping] = ``] [`` val @createStaticCharIndicatorFunction@: invert: bool -> charsInSet: seq -> (char -> bool) ``] [ Creates an optimized indicator function for the chars specified by the `charsInSet` sequence. If `invert` is `false` (`true`), the returned indicator function will return `true` (`false`) if and only if it is called with a char contained in `charsInSet`. `charsInSet` may contain duplicate char values. Internally, this function collects continuous ranges of chars into `Range` values and then uses the same compilation strategy as `createStaticCharRangeIndicatorFunction`. Please also see the @remarks@ at the beginning of this section. ] [`` val @createStaticCharRangeIndicatorFunction@: invert: bool -> rangesInSet: seq -> (char -> bool) ``] [ Creates an optimized indicator function for the chars in the ranges specified by the `rangesInSet` sequence. If `invert` is `false` (`true`), the returned indicator function will return `true` (`false`) if and only if it is called with a char contained in at least one of the ranges of `rangesInSet`. `rangesInSet` may contain overlapping or duplicate ranges. However, the ranges must not contain values less than `0` or greater than `0xffff` (the minimum and maximum UTF-16 char values), otherwise an `ArgumentException` is thrown. Please also see the @remarks@ at the beginning of this section. ] [`` val @createStaticIntIndicatorFunction@: invert: bool -> valuesInSet: seq -> (int -> bool) ``] [ Creates an optimized indicator function for the integers specified by the `valuesInSet` sequence. If `invert` is `false` (`true`), the returned indicator function will return `true` (`false`) if and only if it is called with an integer contained in `valuesInSet`. `valuesInSet` may contain duplicate integer values. Internally, this function collects continues ranges of integer into `Range` values and then uses the same compilation strategy as `createStaticIntRangeIndicatorFunction`. Please also see the @remarks@ at the beginning of this section. ] [`` val @createStaticIntRangeIndicatorFunction@: invert: bool -> rangesInSet: seq -> (int -> bool) ``] [ Creates an optimized indicator function for the integers in the ranges specified by the `rangesInSet` sequence. If `invert` is `false` (`true`), the returned indicator function will return `true` (`false`) if and only if it is called with an `int` contained in at least one of the ranges of `rangesInSet`. `rangesInSet` may contain overlapping or duplicate ranges. Please also see the @remarks@ at the beginning of this section. ] [`` val @createStaticIntMapping@: defaultValue: 'T -> keyValues: seq -> (int -> 'T) ``] [ Creates an optimized mapping function that maps integer keys to values. The `keyValues` sequence specifies the key-value pairs for the mapping. All keys not specified in `keyValues` are mapped to `defaultValue`. This function throws an `ArgumentException` if `keyValues` contains a duplicate key. Internally, this function collects continues ranges of integer keys with equal values[fn In the case of a reference type the values are only compared for reference-equality. In the case of a value type the values are only compared if the type implements `System.IEquality<_>` or is an `int` enum type.] into `Range` values and then uses the same compilation strategy as `createStaticIntRangeMapping`. Please also see the @remarks@ at the beginning of this section. ] [`` val @createStaticIntRangeMapping@: defaultValue: 'T -> keyValues: seq -> (int -> 'T) ``] [ Creates an optimized mapping function that maps integer key ranges to values. The `keyValues` sequence specifies the range-value pairs for the mapping. All keys not contained in one of the ranges in `keyValues` are mapped to `defaultValue`. This function throws an `ArgumentException` if `keyValues` contains an overlapping or duplicate key range. Please also see the @remarks@ at the beginning of this section. ] [`` val @createStaticStringMapping@: defaultValue: 'T -> keyValues: seq -> (string -> 'T) ``] [ Creates an optimized mapping function that maps string keys to values. The `keyValues` sequence specifies the key-value pairs for the mapping. All keys not specified in `keyValues` are mapped to `defaultValue`. A `null` key is not supported. `createStaticStringMapping` throws an `ArgumentException` if `keyValues` contains a duplicate key or a `null` key. If the generated mapping function is called with a `null` string, it throws a `NullReferenceException`. [note The compilation strategy employed by `createStaticStringMapping` does not handle all mappings equally well. It is optimized for mapping a relatively small set of string symbols to constants. If you want to use `createStaticStringMapping` to optimize a frequently used mapping in your program, you should test how well `createStaticStringMapping` handles your situation (in a Release build!) and see whether the performance is worth the compilation costs and the additional code dependency.] Please also see the @remarks@ at the beginning of this section. ] [`` ``] [/interface-members] [/section] [/interface-reference] [/section] ================================================ FILE: Doc/src/reference-text.txt ================================================  [section#Text FParsec.Text] [interface-reference] [section Interface] [$$interface] [/section] [section Members] [interface-members] [`` // FParsecCS.dll namespace FParsec type Text = ``] [`` static member @CountTextElements@: string -> int ``] [ `FParsec.Text.CountTextElements(str)` is equivalent to `System.Globalization.StringInfo(str).LengthInTextElements`. ] [`` static member @FoldCase@: char -> char ``] [#FoldCase_char `FParsec.Text.FoldCase(chr)` is an optimized implementation of `FParsec.Text.FoldCase(string chr)`. ] [`` static member @FoldCase@: string -> string ``] [ Returns a case-folded copy of the string argument. All chars are mapped using the (non-Turkic) 1-to-1 [url "http://www.unicode.org/Public/8.0.0/ucd/CaseFolding.txt" case folding mappings] (v. 8.0.0) for Unicode code points in the Basic Multilingual Plane, i.e. code points below 0x10000. If the case-folded string equals the argument string, the original argument is returned (to preserve its reference identity). If the argument is `null`, `null` is returned. ] [`` static member @IsWhitespace@: char -> bool ``] [ `FParsec.Text.IsWhitespace(chr)` is a faster implementation of `System.Char.IsWhiteSpace(chr)`. In the `LOW_TRUST`-version of FParsec this method simply forwards all calls to `System.Char.IsWhiteSpace(chr)`. ] [`` static member @NormalizeNewlines@: string -> string ``] [ Returns the passed string with all occurrences of `"\r\n"` and `"\r"` replaced by `"\n"`. If the normalized string equals the argument string, the original argument is returned (to preserve its reference identity). If the argument is `null`, `null` is returned. ] [`` ``] [/interface-members] [/section] [/interface-reference] [/section] ================================================ FILE: Doc/src/reference.txt ================================================  [section Reference] [split-section] [output-in-subdirectory] [include "reference-overview.txt"] [include "reference-primitives.txt"] [include "reference-charparsers.txt"] [include "reference-operatorprecedenceparser.txt"] [include "reference-staticmapping.txt"] [include "reference-reply.txt"] [include "reference-error.txt"] [include "reference-errormessage.txt"] [include "reference-errormessagelist.txt"] [include "reference-position.txt"] [include "reference-charstream.txt"] [include "reference-text.txt"] [/section] ================================================ FILE: Doc/src/status-and-roadmap.txt ================================================ [section Status and roadmap] [section Status] FParsec has been in development for several years and can now be considered "stable". Version 1.0 of FParsec was released on 19 July 2012. [note Although FParsec has rather comprehensive unit tests (with code coverage close to 100% for many components), it likely still contains bugs. If you want to use FParsec in a production environment, you need to test your parsers thoroughly. ] [/section] [section Future development] There are no firm plans for any major new features yet. One goal for the future development of FParsec is to support a more declarative parser definition syntax without compromising on FParsec's performance or language-integrated nature. For example, it would be nice if FParsec provided a way to automatically create optimized lexer functions from a series of typed regular expressions and associated mapping functions, ideally at compile time. Using such a feature could maybe look similar to `` let lexer : Parser = lex ["regex-with-1-capture-group", (fun x -> AstNode1(x)) "regex-with-2-capture-groups", (fun x y -> AstNode2(x, y)) (* ... *)] `` [/section] [/section] ================================================ FILE: Doc/src/template.html ================================================  {title; text-only, no-number}
{content}
================================================ FILE: Doc/src/tutorial.txt ================================================ [section Tutorial] This tutorial introduces you to the basic concepts of FParsec. Our goal is to give you an intuition for how you can build parser applications using the FParsec library. We will only cover the basic ideas and only cursorily explore FParsec's API, but hopefully we will cover enough ground to enable you to further explore FParsec with the help of the @user's guide@, the @API reference@ and the sample parsers in the =Samples= folder. [*A Japanese translation of this tutorial by Gab_km is available [url "http://blog.livedoor.jp/gab_km/archives/1437534.html" here].][br] [*A Russian translation of this tutorial by Dmitry Vlasov is available [url "http://dmitriyvlasov.ru/publication/fparsec-tutorial/" here].] [toc] [section Preliminaries] FParsec is built as two DLLs: =FParsec.dll= and =FParsecCS.dll=. To use FParsec in your project, you can either let [url "http://nuget.org" NuGet] install one of the [^nuget-packages NuGet packages], or you can build the two FParsec DLLs from source. The easiest way to build FParsec from source is using the Visual Studio solution files in the [=Build/VS11] folder of the [url "https://github.com/stephan-tolksdorf/fparsec/archive/master.zip" source code package]. Any project that uses FParsec has to reference both DLLs. See @Download and Installation@ for more details. All FParsec types and modules are declared in the `FParsec` namespace. This namespace contains some basic classes (such as `CharStream` and `Reply`) and four F# modules, namely - `Primitives`, containing basic type definitions and parser combinators, - `CharParsers`, containing parsers for chars, strings and numbers, and functions for applying parsers to input streams, - `Error`, containing types and helper functions for creating, processing and formatting parser error messages, - `StaticMapping`, containing functions for compiling static key-value mappings into optimized functions. All code snippets in this tutorial assume that you've opened the `FParsec` namespace: `` open FParsec `` Opening the `FParsec` namespace also automatically opens the `Primitives`, `CharParsers` and `[^FParsec..Error Error]` modules. [note All code snippets in this tutorial are contained in the [=Samples/Tutorial] project. Having this project open while reading the tutorial can be quite helpful. For example, you can hover the mouse over an identifier to get an Intellisense popup with the inferred type. And if you're curious how a library function is implemented, you can click the /Go to definition/ context menu option to view its source code. ] [/section] [section Parsing a single float] Parsing input with FParsec involves two steps: 1) building a parser and 2) applying the parser to the input. Let's start with a simple example: parsing a single floating-point number in a string. In this case the first step, building the parser, is trivial, because the `CharParsers` module already comes with a built-in float parser: ``val pfloat: Parser`` The generic type `Parser<'Result,'UserState>` is the type of all parsers in FParsec. If you follow the hyperlink into the reference, you'll see that `Parser` is a type abbreviation for a function type. However, at this point we don't need to go into the details of the `Parser` type. It's enough to note that the first type argument represents the type of the parser result. Thus, in the case of `pfloat` the type tells us that if the parser succeeds it returns a floating-point number of type `float`. We won't use a "user state" in this tutorial, so you can just ignore the second type argument for the time being. To apply the `pfloat` parser to a string, we can use the `run` function from the `CharParsers` module: ``val run: Parser<'Result, unit> -> string -> ParserResult<'Result,unit>`` `run` is the simplest function out of [^runparser-functions several] provided by the `CharParsers` module for running parsers on input. Other functions allow you, for example, to run parsers directly on the contents of a file or a `System.IO.Stream`. `run` applies the parser passed as the first argument to the string passed as the second argument and returns the return value of the parser in form of a `ParserResult` value. The `ParserResult` type is a discriminated union type with the two cases: `Success` and `Failure`. In case the parser succeeds, the `ParserResult` value contains the result value, otherwise it contains an error message. To simplify testing we write a little helper function that prints the result value or error message: `` let test p str = match run p str with | Success(result, _, _) -> printfn "Success: %A" result | Failure(errorMsg, _, _) -> printfn "Failure: %s" errorMsg `` With this helper function in place, we can test `pfloat` by executing ``test pfloat "1.25"`` which produces the output ``{fsi}Success: 1.25`` Testing `pfloat` with a number literal that has an invalid exponent ``test pfloat "1.25E 3"`` yields the error message ``{fsi} Failure: Error in Ln: 1 Col: 6 1.25E 3 ^ Expecting: decimal digit `` [/section] [section Parsing a float between brackets] Implementing parsers with FParsec typically means combining higher-level parsers from lower-level ones. You start with the parser primitives provided by the library and then successively combine these into higher-level parsers until you finally have a single parser for the complete input. In the following sections we will illustrate this approach by discussing various sample parsers that build on each other. In this section we will begin with a very simple parser for a floating-point number between brackets: `` let str s = pstring s let floatBetweenBrackets = str "[" >>. pfloat .>> str "]" `` [note If you're trying to compile this or another code snippet and you get a compiler error mentioning F#'s "value restriction", please see [^fs-value-restriction]. ] The definition of `str` and `floatBetweenBrackets` involves three library functions that we haven't yet introduced: `pstring`, `>>.` and `.>>`. The function ``val pstring: string -> Parser`` takes a string as the argument and returns a parser for that string. When this parser is applied to an input stream it checks whether the following chars in the input stream match the given string. If the chars match the complete string, the parser consumes them, i.e. skips over them. Otherwise it fails without consuming any input. When the parser succeeds, it also returns the given string as the parser result, but since the string is a constant, you'll rarely make use of the result. The `pstring` function isn't named `string` because otherwise it would hide the built-in F# function `string`. In general, parser names in FParsec that would otherwise conflict with built-in F# function names are prefixed by a single p char. `pfloat` is another example for this naming convention. To save a few keystrokes we abbreviate `pstring` as `str`. So, for instance, `str "["` is a parser that skips over the char `'['`. The binary operators `>>.` and `.>>` have the following types: `` val (>>.): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'b,'u> val (.>>): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a,'u> `` As you can see from these signatures, both operators are parser combinators that construct a new parser from the two argument parsers. The parser `p1 >>. p2` parses `p1` and `p2` in sequence and returns the result of `p2`. The parser `p1 .>> p2` also parses `p1` and `p2` in sequence, but it returns the result of `p1` instead of `p2`. In each case the point points to the side of the parser whose result is returned. By combining both operators in `p1 >>. p2 .>> p3` we obtain a parser that parses `p1`, `p2` and `p3` in sequence and returns the result from `p2`. [note With the somewhat imprecise wording "parses `p1` and `p2` in sequence" we actually mean: The parser `p1` is applied to the input and if `p1` succeeds then `p2` is applied to the remaining input; in case any of the two element parsers fails, the aggregate parser immediately propagates the error message. In the documentation for FParsec we often use expressions such as "parses `p`" or "parses an occurrence of `p`" instead of the technically more accurate "applies the parser `p` to the remaining input and if `p` succeeds ...", hoping that the exact meaning is obvious from the context. ] The following tests show that `floatBetweenBrackets` parses valid input as expected and produces informative error messages when it encounters invalid input: ``{fsi} > test floatBetweenBrackets "[1.0]";; Success: 1.0 > test floatBetweenBrackets "[]";; Failure: Error in Ln: 1 Col: 2 [] ^ Expecting: floating-point number > test floatBetweenBrackets "[1.0";; Failure: Error in Ln: 1 Col: 5 [1.0 ^ Note: The error occurred at the end of the input stream. Expecting: ']' `` [(* [ Note that all infix F# operators that begin with `<` or '>' (leading '.' characters are ignored) are left-associative. Hence, `p1 >>. p2 .>> p3` is equivalent with `(p1 >>. p2) .>> p3`. However, in this case the associativity has no effect on the aggregate parser behaviour. ] *)] [/section] [section Abstracting parsers] One of FParsec's greatest strengths is the ease with which you can define your own parser abstractions. Take for instance the `floatBetweenBrackets` from the previous section. If you intend to also parse other elements between strings, you could define your own specialized combinator for this purpose: `` let betweenStrings s1 s2 p = str s1 >>. p .>> str s2 `` You could then define `floatInBrackets` and other parsers with the help of this combinator: `` let floatBetweenBrackets = pfloat |> betweenStrings "[" "]" let floatBetweenDoubleBrackets = pfloat |> betweenStrings "[[" "]]" `` [note In case you're new to F#:[br] `pfloat |> betweenStrings "[" "]"` is just another way to write `betweenStrings "[" "]" pfloat`.] Once you notice that you frequently need to apply a parser between two others, you could go a step further and factor `betweenStrings` as follows: `` let [no-auto-link between] pBegin pEnd p = pBegin >>. p .>> pEnd let betweenStrings s1 s2 p = p |> [no-auto-link between] (str s1) (str s2) `` Actually, you don't need to define `between`, because this is already a built-in FParsec combinator. These are all trivial examples, of course. But since FParsec is merely an F# library and not some external parser generator tool, there are no limits to the abstractions you can define. You can write functions that take whatever input you need, do some arbitrarily complex computations on the input and then return a special purpose parser or parser combinator. For example, you could write a function that takes a regular-expression pattern as the input and returns a `Parser` for parsing input conforming to that pattern. This function could use another parser to parse the pattern into an AST and then compile this AST into a special-purpose parser function. Alternatively, it could construct a .NET regular expression from the pattern and then return a parser function that uses FParsec's `CharStream` API to directly apply the regex to the input stream (which is what the built-in `regex` parser actually does). Another example are extensible parser applications. By storing parser functions in dictionaries or other data structures and defining an appropriate extension protocol, you could allow plugins to dynamically register new parsers or modify existing ones. The possibilities are really endless. But before you can fully exploit these possibilities, you first need to be familiar with the fundamentals of FParsec. [/section] [section Parsing a list of floats] We've already spent three sections on discussing how to parse a single floating-point number, so it's about time we try something more ambitious: parsing a list of floating-point numbers. Let us first assume that we need to parse a sequence of floating-point numbers in brackets, i.e. text in the following EBNF format: `{EBNF}("[" float "]")*`. Valid input strings in this format are for example: `""`, `"[1.0]"`, `"[2][3][4]"`. Since we already have a parser for a float between brackets, we only need a way to repeatedly apply this parser to parse a sequence. This is what the `many` combinator is for: ``val many: Parser<'a,'u> -> Parser<'a list,'u>`` The parser `many p` repeatedly applies the parser `p` until `p` fails, i.e. it "greedily" parses as many occurrences of `p` as possible. The results of `p` are returned as a list in the order of occurrence. Some simple tests show that `many floatInBrackets` works as expected: ``{fsi} > test (many floatBetweenBrackets) "";; Success: [] > test (many floatBetweenBrackets) "[1.0]";; Success: [1.0] > test (many floatBetweenBrackets) "[2][3][4]";; Success: [2.0; 3.0; 4.0] `` If `floatBetweenBrackets` fails *after consuming input*, then the combined parser fails too: ``{fsi} > test (many floatBetweenBrackets) "[1][2.0E]";; Failure: Error in Ln: 1 Col: 9 [1][2.0E] ^ Expecting: decimal digit `` Note that `many` also succeeds for an empty sequence. If you want to require at least one element, you can use `many1` instead: ``{fsi} > test (many1 floatBetweenBrackets) "(1)";; Failure: Error in Ln: 1 Col: 1 (1) ^ Expecting: '[' `` [tip If you'd prefer the last error message to be worded in terms of the higher level `floatBetweenBrackets` parser instead of the lower level `str "["` parser, you could use the `` operator as in the following example: ``{fsi} > test (many1 (floatBetweenBrackets "float between brackets")) "(1)";; Failure: Error in Ln: 1 Col: 1 (1) ^ Expecting: float between brackets `` Please see [^users-guide.customizing-error-messages] of the user's guide to learn more about customizing error messages. ] If you just want to skip over a sequence and don't need the list of parser results, you could use the optimized combinators `skipMany` or `skipMany1` instead of `many` and `many1`. Another frequently used combinator for parsing sequences is `sepBy`: ``val sepBy: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list, 'u>`` `sepBy` takes an "element" parser and a "separator" parser as the arguments and returns a parser for a list of elements separated by separators. In EBNF notation `sepBy p pSep` could be written as `{EBNF}(p (pSep p)*)?`. Similar to `many`, there are [^sepBy-parsers several variants] of `sepBy`. With the help of `sepBy` we can parse a more readable list format, where floating-point numbers are separated by a comma: ``{EBNF}floatList: "[" (float ("," float)*)? "]"`` Valid input strings in this format are for example: `"[]"`, `"[1.0]"`, `"[2,3,4]"`. The straightforward implementation of this format is `` let floatList = str "[" >>. sepBy pfloat (str ",") .>> str "]" `` Testing `floatList` with valid test strings yields the expected result: ``{fsi} > test floatList "[]";; Success: [] > test floatList "[1.0]";; Success: [1.0] > test floatList "[4,5,6]";; Success: [4.0; 5.0; 6.0] `` Testing with invalid input shows that `floatList` produces helpful error messages: ``{fsi} > test floatList "[1.0,]";; Failure: Error in Ln: 1 Col: 6 [1.0,] ^ Expecting: floating-point number > test floatList "[1.0,2.0";; Failure: Error in Ln: 1 Col: 9 [1.0,2.0 ^ Note: The error occurred at the end of the input stream. Expecting: ',' or ']' `` [/section] [section Handling whitespace] FParsec treats whitespace (spaces, tabs, newlines, etc) just as any other input, so our `floatList` parser can't yet deal with whitespace: ``{fsi} > test floatBetweenBrackets "[1.0, 2.0]";; Failure: Error in Ln: 1 Col: 5 [1.0, 2.0] ^ Expecting: ']' `` If we want the parser to ignore whitespace, we need to make this explicit in the parser definition. First, we need to define what we want to accept as whitespace. For simplicity we will just use the built-in `spaces` parser, which skips over any (possibly empty) sequence of `' '`, `'\t'`, `'\r'` or `'\n'` chars. `` let ws = spaces `` Next, we need to insert the `ws` parser at every point where we want to ignore whitespace. In general it's best to skip whitespace *after* one parses elements, i.e. skip trailing instead of leading whitespace, because that reduces the need for backtracking (which will be explained below). Hence, we insert `ws` at two places to skip over any whitespace after brackets or numbers: `` let str_ws s = pstring s .>> ws let float_ws = pfloat .>> ws let numberList = str_ws "[" >>. sepBy float_ws (str_ws ",") .>> str_ws "]" `` A simple test shows that `numberList` ignores whitespace: ``{fsi} > test numberList @"[ 1 , 2 ] ";; Success: [1.0; 2.0] `` If we introduce an error on the second line, we see that `FParsec` automatically keeps track of the line count: ``{fsi} > test numberList @"[ 1, 2; 3]";; Failure: Error in Ln: 2 Col: 27 2; 3] ^ Expecting: ',' or ']' `` Our `numberList` parser still doesn't skip leading whitespace, because that's not necessary when we put it together with other parsers that skip all trailing whitespace. If we wanted to parse a whole input stream with only a list of floating-point numbers, we could use the following parser: `` let numberListFile = ws >>. numberList .>> eof `` The end-of-file parser `eof` will generate an error if the end of the stream hasn't been reached. This is useful for making sure that the complete input gets consumed. Without the `eof` parser the following test wouldn't produce an error: ``{fsi} > test numberListFile " [1, 2, 3] [4]";; Failure: Error in Ln: 1 Col: 12 [1, 2, 3] [4] ^ Expecting: end of input `` [/section] [section Parsing string data] FParsec contains various built-in parsers for chars, strings, numbers and whitespace. In this section we will introduce a few of the char and string parsers. For an overview of all available parsers please refer to the @parser overview@ in the reference. You've already seen several applications of the `pstring` parser (abbreviated as `str`), which simply skips over a constant string in the input. When the `pstring` parser succeeds, it also returns the skipped string as the parser result. The following example demonstrates this: ``{fsi} > test (many (str "a" <|> str "b")) "abba";; Success: ["a"; "b"; "b"; "a"] `` In this example we also used the `<|>` combinator to combine two alternative parsers. We'll discuss this combinator in more detail below. [note We refer to both `pstring` and `pstring "a"` as "parsers". Strictly speaking, `pstring` is function taking a string argument and returning a `Parser`, but it's more convenient to just refer to it as a (parametric) parser. ] When you don't need the result of the `pstring` parser, you can alternatively use the `skipString` parser, which returns the `unit` value `()` instead of the argument string. In this case it doesn't make any difference to performance whether you use `pstring` or `skipString`, since the returned string is a constant. However, for most other built-in parsers and combinators you should prefer the variants with the "skip" name prefix when you don't need the parser result values, because these will generally be faster. If you look at the @parser overview@, you'll see "skip" variants for many of the built-in parsers and combinators. If you want to parse a [+ c]ase [+ i]nsensitive string constant you can use `pstringCI` and `skipStringCI`. For example: ``{fsi} > test (skipStringCI "" >>. pfloat) "1.0";; Success: 1.0 `` Frequently one needs to parse string variables whose chars have to satisfy certain criteria. For instance, identifiers in programming languages often need to start with a letter or underscore and then need to continue with letters, digits or underscores. To parse such an identifier you could use the following parser: `` let identifier = let isIdentifierFirstChar c = isLetter c || c = '_' let isIdentifierChar c = isLetter c || isDigit c || c = '_' many1Satisfy2L isIdentifierFirstChar isIdentifierChar "identifier" .>> ws // skips trailing whitespace `` Here we have used the `many1Satisfy2L` string parser, which is one of several primitives for parsing strings based on char predicates (i.e. functions that take a char as input and return a boolean value). It parses any sequence of one or more chars (hence the "many1" in the name) whose first char satisfies the first predicate function and whose remaining chars satisfy the second predicate (hence the "Satisfy2"). The string label given as the third argument (hence the "L") is used in error message to describe the expected input. The following tests show how this parser works: ``{fsi} > test identifier "_";; Success: "_" > test identifier "_test1=";; Success: "_test1" > test identifier "1";; Failure: Error in Ln: 1 Col: 1 1 ^ Expecting: identifier `` [tip If you want to parse identifiers based on the Unicode XID syntax, consider using the built-in `identifier` parser.] Many string formats are complicated enough that you need to combine several char and string parser primitives. For example, consider the following string literal format: ``{EBNF} stringLiteral: '"' (normalChar|escapedChar)* '"' normalChar: any char except '\' and '"' escapedChar: '\\' ('\\'|'"'|'n'|'r'|'t') `` A straightforward translation of this grammar to FParsec looks like: `` let stringLiteral = let normalChar = satisfy (fun c -> c <> '\\' && c <> '"') let unescape c = match c with | 'n' -> '\n' | 'r' -> '\r' | 't' -> '\t' | c -> c let escapedChar = pstring "\\" >>. (anyOf "\\nrt\"" |>> unescape) between (pstring "\"") (pstring "\"") (manyChars (normalChar <|> escapedChar)) `` In this example we use several library functions that we haven't yet introduced: - `satisfy` parses any char that satisfies the given predicate function. - `anyOf` parses any char contained in the argument string. - The pipeline combinator `|>>` applies the function on the right side (`unescape`) to the result of the parser on the left side (`anyOf "\\nrt\""`). - The choice combinator `<|>` applies the parser on the right side if the parser on the left side fails, so that `normalChar <|> escapedChar` can parse both normal and escaped chars. (We will discuss this operator in more detail two sections below.) - `manyChars` parses a sequence of chars with the given char parser and returns it as a string. Let's test the `stringLiteral` parser with a few test inputs: ``{fsi} > test stringLiteral "\"abc\"";; Success: "abc" > test stringLiteral "\"abc\\\"def\\\\ghi\"";; Success: "abc"def\ghi" > test stringLiteral "\"abc\\def\"";; Failure: Error in Ln: 1 Col: 6 "abc\def" ^ Expecting: any char in ‘\nrt"’ `` Instead of parsing the string literal char-by-char we could also parse it "snippet-by-snippet": `` let stringLiteral2 = let normalCharSnippet = many1Satisfy (fun c -> c <> '\\' && c <> '"') let escapedChar = pstring "\\" >>. (anyOf "\\nrt\"" |>> function | 'n' -> "\n" | 'r' -> "\r" | 't' -> "\t" | c -> string c) between (pstring "\"") (pstring "\"") (manyStrings (normalCharSnippet <|> escapedChar)) `` Here we have used the `manyStrings` combinator, which parses a sequence of strings with the given string parser and returns the strings in concatenated form. [note We have to require `normalCharSnippet` to consume at least one char, i.e. use `many1Satisfy` instead of `manySatisfy`. Otherwise `normalCharSnippet` would succeed even if doesn't consume input, `escapedChar` would never be called and `manyStrings` would eventually throw an exception to prevent an infinite loop. ] Parsing a string chunk-wise using an optimized parser like `many1Satisfy` is usually a bit faster than parsing it char-wise using `manyChars` and `satisfy`. In this case we can optimize our parser even a bit further -- once we realize that two normal char snippets must be separated by at least one escaped char: `` let stringLiteral3 = let normalCharSnippet = manySatisfy (fun c -> c <> '\\' && c <> '"') let escapedChar = (* like in stringLiteral2 *) between (pstring "\"") (pstring "\"") (stringsSepBy normalCharSnippet escapedChar) `` The `stringsSepBy` combinator parses a sequence of strings (with the first argument parser) separated by other strings (parsed with the second argument parser). It returns all parsed strings, including the separator strings, as a single, concatenated string. Note that `stringLiteral3` uses `manySatisfy` instead of `many1Satisfy` in its `normalCharSnippet` definition, so that it can parse escaped chars that are not separated by normal chars. This can't lead to an infinite loop because `escapedChar` can't succeed without consuming input. [/section] [section Sequentially applying parsers] Whenever you need to apply multiple parsers in sequence and only need the result of one of them, a suitable combination of `>>.` and `.>>` operators will do the job. However, these combinators won't suffice if you need the result of more than one of the involved parsers. In that case you can use the `pipe2`, ..., `pipe5` combinators, which apply multiple parsers in sequence and pass all the individual results to a function that computes the aggregate result. For instance, with the `pipe2` combinator ``val pipe2: Parser<'a,'u> -> Parser<'b,'u> -> ('a -> b -> 'c) -> Parser<'c,'u>`` you can construct a parser `pipe2 p1 p2 f` that sequentially applies the two parsers `p1` and `p2` and then returns the result of the function application `f x1 x2`, where `x1` and `x2` are the results returned by `p1` and `p2`. In the following example we use `pipe2` to parse a product of two numbers: `` let product = pipe2 float_ws (str_ws "*" >>. float_ws) (fun x y -> x * y) `` ``{fsi} > test product "3 * 5";; Success: 15.0 `` The `pipe2-5` combinators are particularly useful for constructing AST objects. In the following example we use `pipe3` to parse a string constant definition into a `StringConstant` object: `` type StringConstant = StringConstant of string * string let stringConstant = pipe3 identifier (str_ws "=") stringLiteral (fun id _ str -> StringConstant(id, str)) `` ``{fsi} > test stringConstant "myString = \"stringValue\"";; Success: StringConstant ("myString","stringValue") `` If you just want to return the parsed values as a tuple, you can use the predefined `tuple2-5` parsers. For instance, `tuple2 p1 p2` is equivalent to `pipe2 p1 p2 (fun x1 x2 -> (x1, x2))`. The `tuple2` parser is also available under the operator name `.>>.`, so that you can write `p1 .>>. p2` instead of `tuple2 p1 p2`. In the following example we parse a pair of comma separated numbers with this operator: ``{fsi} > test (float_ws .>>. (str_ws "," >>. float_ws)) "123, 456";; Success: (123.0, 456.0) `` Hopefully you find the [/ `>>`-with-1-or-2-dots-notation] intuitive by now. If you need a `pipe` or `tuple` parser with more than 5 arguments, you can easily construct one using the existing ones. For example, do you have an idea how to define a `pipe7` parser? This footnote gives a possible solution: [fn `` let pipe7 p1 p2 p3 p4 p5 p6 p7 f = pipe4 p1 p2 p3 (tuple4 p4 p5 p6 p7) (fun x1 x2 x3 (x4, x5, x6, x7) -> f x1 x2 x3 x4 x5 x6 x7) `` ] [/section] [section Parsing alternatives] In the section on `Parsing string data` we already shortly introduced the choice combinator `<|>`: `` val (<|>): Parser<'a,'u> -> Parser<'a,'u> -> Parser<'a,u> `` This combinator allows you to support multiple alternative input formats at a given input position. For example, in the above section we used `<|>` to combine a parser for unescaped chars and a parser for escaped chars into a parser that supports both: `normalChar <|> escapedChar`. Another example that shows how `<|>` works is the following parser for boolean variables: `` let boolean = (stringReturn "true" true) <|> (stringReturn "false" false) `` Here we have also used the `stringReturn` parser, which skips the string constant given as the first argument and, if successful, returns the value given as the second argument. Testing the `boolean` parser with some inputs yields: ``{fsi} > test boolean "false";; Success: false > test boolean "true";; Success: true > test boolean "tru";; Failure: Error in Ln: 1 Col: 1 tru ^ Expecting: 'false' or 'true' `` The behaviour of the `<|>` combinator has two important characteristics: - `<|>` only tries the parser on the right side if the parser on the left side fails. It does *not* implement a longest match rule. - However, it only tries the right parser if the left parser fails *without consuming input*. A consequence of the second point is that the following test fails because the parser on the left side of `<|>` consumes whitespace before it fails: ``{fsi} > test ((ws >>. str "a") <|> (ws >>. str "b")) " b";; Failure: Error in Ln: 1 Col: 2 b ^ Expecting: 'a' `` Fortunately, we can easily fix this parser by factoring out `ws`: ``{fsi} > test (ws >>. (str "a" <|> str "b")) " b";; Success: "b" `` If you're curious why `<|>` behaves this way and how you can handle situations where you need `<|>` to try the alternative parser even if the first parser fails after consuming input, please see [^users-guide.parsing-alternatives] and [^users-guide.looking-ahead-and-backtracking] in the user's guide. If you want to try more than two alternative parsers, you can chain multiple `<|>` operators, like in `p1 <|> p2 <|> p3 <|> ...`, or you can use the `choice` combinator, which accepts a sequence of parsers as the argument, like in `choice [p1; p2; p3; ...]`. [/section] [section F#'s value restriction] When you start writing your own parsers with FParsec or try to compile some individual code snippets from above, you'll come across a compiler issue that often causes some head-scratching among new users of F# and FParsec: the /value restriction/. In this section we'll explain the value restriction and how you can handle it in your FParsec programs. [note If you find the discussion in this section too technical for the moment, just skip to the next section and come back later when you actually see a compiler message mentioning "value restriction" for the first time.] F#'s value restriction is the reason that the following code snippet does not compile `` open FParsec let p = pstring "test" `` even though the following snippet compiles without a problem[fn Assuming you referenced the two FParsec DLLs.]: `` open FParsec let p = pstring "test" run p "input" `` The compiler error generated for the first sample is the following: ``{fsi} error FS0030: Value restriction. The value 'p' has been inferred to have generic type val p : Parser Either make the arguments to 'p' explicit or, if you do not intend for it to be generic, add a type annotation. `` When you work with FParsec you'll sooner or later see this or similar error messages, in particular if you work with the interactive console prompt. Fortunately, this kind of error is usually easy to workaround. The problem with the first snippet above is that the F# compiler infers the `p` value to have an unresolved generic type, although F# doesn't permit a generic value in this situation. The return type of the `pstring` function is `Parser`, where the type parameter `'u` represents the type of the `CharStream` user state. Since there is nothing in the first snippet that constrains this type parameter, the compiler infers the type `Parser` for the parser value `p`, with `'_a` representing an unresolved type parameter. In the second snippet this problem doesn't occur, because the use of `p` as the first argument to the `run` function constrains the user state type. Since `run` only accepts parsers of type `Parser<'t,unit>`, the compiler infers the non-generic type `Parser` for `p`. This example suggests two ways to handle the value restriction in FParsec programs: - Either make sure that the type of a parser value is constrained to a non-generic type by subsequent uses of this parser value *in the same compilation unit*, - or provide an explicit type annotation to manually constrain the type of the parser value (usually, a few type annotations in key locations are enough for a whole parser module). Often it is convenient to define some type abbreviations like the following `` type UserState = unit // doesn't have to be unit, of course type Parser<'t> = Parser<'t, UserState> `` With such an abbreviation in place, type annotations become as simple as `` let p : Parser<_> = pstring "test" `` Of course, constraining the type of a parser value to a non-generic type is only a solution if you don't actually need a generic type. If you do need a generic value, you'll have to apply other techniques, as they are for example explained in the [url "http://msdn.microsoft.com/en-us/library/dd233183.aspx" F# reference] or in a [url "http://blogs.msdn.com/b/mulambda/archive/2010/05/01/value-restriction-in-f.aspx" blog entry] by Dmitry Lomov. However, FParsec `Parser` values (not parametric parser functions) are usually only used in the context of a specific parser application with a fixed user state type. In that situation constraining the type is indeed the appropriate measure to avoid a value restriction error. [/section] [section Parsing JSON] Now that we have discussed the basics of FParsec we are well prepared to work through a real world parser example: a JSON parser. JSON (JavaScript Object Notation) is a text-based data interchange format with a simple and lightweight syntax. You can find descriptions of the syntax on [url "http://json.org" json.org] and in [url "http://www.ietf.org/rfc/rfc4627.txt" RFC 4626]. In many applications one only has to deal with JSON files describing one particular kind of object. In such a context it sometimes can be appropriate to write a specialized parser just for that specific kind of JSON file. In this tutorial, however, we will follow a more general approach. We will implement a parser that can parse any general JSON file into an AST, i.e. an intermediate data structure describing the contents of the file. Applications can then conveniently query this data structure and extract the information they need. This is an approach comparable to that of XML parsers which build a data structure describing the document tree of an XML document. The great advantage of this approach is that the JSON parser itself becomes reusable and the document specific parsing logic can be expressed in the form of simple functions processing the AST of the JSON document. The natural way to implement an AST in F# is with the help of a discriminated union type. If you look at the [url "http://json.org" JSON specification], you can see that a JSON value can be a string, a number, a boolean, null, a comma-separated list of values in square brackets, or an object with a sequence of key-value pairs in curly brackets. In our parser we will use the following union type to represent JSON values: `` type Json = JString of string | JNumber of float | JBool of bool | JNull | JList of Json list | JObject of Map `` Here we've chosen the F# `list` type to represent a sequence of values and the `Map` type to represent a sequence of key-value pairs, because these types are particularly convenient to process in F#.[fn If you need to parse huge sequences and objects, it might be more appropriate to use an array and dictionary for JList and JObject respectively.] Note that the `Json` type is recursive, since both `JList` and `JObject` values can themselves contain `Json` values. Our parser will have to reflect this recursive structure. [tip If you're new to FParsec and have a little time, it would be a good exercise to try to implement the JSON parser on your own (with the help of the reference documentation). This tutorial already covered almost everything you need and the JSON grammar is simple enough that this shouldn't take too much time. Of course, you can always peek at the implementation below if you get stuck.] We start the actual parser implementation by covering the simple `null` and boolean cases: `` let jnull = stringReturn "null" JNull let jtrue = stringReturn "true" (JBool true) let jfalse = stringReturn "false" (JBool false) `` Handling the number case is just as simple, because the JSON number format is based on the typical floating-point number format used in many programming languages and hence can be parsed with FParsec's built-in `pfloat` parser: `` let jnumber = pfloat |>> JNumber `` (Note that F# allows us to pass the object constructor `JNumber` as a function argument.) If you compare the precise number format supported by `pfloat` with that in the JSON spec, you'll see that `pfloat` supports a superset of the JSON format. In contrast to the JSON format the `pfloat` parser also recognizes `NaN` and `Infinity` values, accepts a leading plus sign, accepts leading zeros and even supports the hexadecimal float format of Java and C99. Depending on the context this behaviour can be considered a feature or a limitation of the parser. For most applications it probably doesn't matter, and the JSON RFC clearly states that a JSON parser may support a superset of the JSON syntax. However, if you'd rather only support the exact JSON number format, you can implement such a float parser rather easily based on the configurable `numberLiteral` parser (just have a look at how this is currently done in the `pfloat` source). The JSON string format takes a little more effort to implement, but we've already parsed a similar format with the `stringLiteral` parsers in [^parsing-string-data], so we can just adapt one of those parsers for our purpose: `` let str s = pstring s let stringLiteral = let escape = anyOf "\"\\/bfnrt" |>> function | 'b' -> "\b" | 'f' -> "\u000C" | 'n' -> "\n" | 'r' -> "\r" | 't' -> "\t" | c -> string c // every other char is mapped to itself let unicodeEscape = /// converts a hex char ([0-9a-fA-F]) to its integer number (0-15) let hex2int c = (int c &&& 15) + (int c >>> 6)*9 str "u" >>. pipe4 hex hex hex hex (fun h3 h2 h1 h0 -> (hex2int h3)*4096 + (hex2int h2)*256 + (hex2int h1)*16 + hex2int h0 |> char |> string ) let escapedCharSnippet = str "\\" >>. (escape <|> unicodeEscape) let normalCharSnippet = manySatisfy (fun c -> c <> '"' && c <> '\\') between (str "\"") (str "\"") (stringsSepBy normalCharSnippet escapedCharSnippet) let jstring = stringLiteral |>> JString `` `stringLiteral` parses string literals as a sequence of normal char snippets separated by escaped char snippets. A normal char snippet is any sequence of chars that does not contain the chars `'"'` and `'\\'`. An escaped char snippet consists of a backslash followed by any of the chars `'\\'`, `'\"'`, `'/'`, `'b'`, `'f'`, `'n'`, `'r'`, `'t'`, or an Unicode escape. An Unicode escape consists of an `'u'` followed by four hex chars representing an UTF-16 code point. [#createParserForwardedToRef-example] The grammar rules for JSON lists and objects are recursive, because any list or object can contain itself any kind of JSON value. Hence, in order to write parsers for the list and object grammar rules, we need a way to refer to the parser for any kind of JSON value, even though we haven't yet constructed this parser. Like it is so often in computing, we can solve this problem by introducing an extra indirection: `` let jvalue, jvalueRef = createParserForwardedToRef() `` As you might have guessed from the name, `createParserForwardedToRef` creates a parser (`jvalue`) that forwards all invocations to the parser in a reference cell (`jvalueRef`). Initially, the reference cell holds a dummy parser, but since the reference cell is mutable, we can later replace the dummy parser with the actual value parser, once we have finished constructing it. The JSON RFC sensibly only permits spaces, (horizontal) tabs, line feeds and carriage returns as whitespace characters, which allows us to use the built-in `spaces` parser for parsing whitespace: `` let ws = spaces `` Both JSON lists and objects are syntactically represented as a comma-separated lists of "elements" between brackets, where whitespace is allowed before and after any bracket, comma and list element. We can conveniently parse such lists with the following helper function: `` let listBetweenStrings sOpen sClose pElement f = between (str sOpen) (str sClose) (ws >>. sepBy (pElement .>> ws) (str "," >>. ws) |>> f) `` This function takes four arguments: an opening string, a closing string, an element parser and a function that is applied to the parsed list of elements. With the help of this function we can define the parser for a JSON list as follows: `` let jlist = listBetweenStrings "[" "]" jvalue JList `` JSON objects are lists of key-value pairs, so we need a parser for a key-value pair: `` let keyValue = stringLiteral .>>. (ws >>. str ":" >>. ws >>. jvalue) `` (Remember, the points on both sides of `.>>.` indicate that the results of the two parsers on both sides are returned as a tuple.) By passing the `keyValue` parser to `listBetweenStrings` we obtain a parser for JSON objects: `` let jobject = listBetweenStrings "{" "}" keyValue (Map.ofList >> JObject) `` [#json-value-parser] Having defined parsers for all the possible kind of JSON values, we can combine the different cases with a `choice` parser to obtain the finished parser for JSON values: `` do jvalueRef := choice [jobject jlist jstring jnumber jtrue jfalse jnull] `` The `jvalue` parser doesn't accept leading or trailing whitespace, so we need to define our parser for complete JSON documents as follows: `` let json = ws >>. jvalue .>> ws .>> eof `` This parser will try to consume a complete JSON input stream and, if successful, will return a `Json` AST of the input as the parser result And that's it, we're finished with our JSON parser. If you want to try this parser out on some sample input, please take a look at the JSON project in the =Samples= folder. [/section] [section What now?] If this tutorial has whet your appetite for a more in-depth introduction to FParsec, just head over to the @user's guide@. If you can't wait to write your own parser, then bookmark the @parser overview@ page, maybe take a short look at the example parsers in the =Samples= folder and just start hacking. You can always consult the user's guide at a later point should you get stuck somewhere. [/section] [/section] ================================================ FILE: Doc/src/users-guide.txt ================================================  [section User's Guide] [split-section] [output-in-subdirectory] [#^ RError reference.Reply.members.Error] This user's guide is an in-depth introduction to parsing with FParsec. It explains how `Parser` functions work, covers the most important parser combinators in detail, explains how you can customize error messages, and discusses some important practical aspects of parser writing, such as debugging and performance optimizations. The aim of this user's guide is to prepare you for writing "real world" parser applications with FParsec. It doesn't try to cover every feature of the library, but focuses on covering the core concepts such that you can gain a deep understanding of the library design. Although there is some overlap between the @tutorial@ and this user's guide, it's probably a good idea to read the tutorial first, since it will give you a quick overview of the library that will later help you put things into perspective. You might also want to experiment with some small parsers before you start reading the user's guide, or maybe in parallel to reading it, so that it becomes easier for you to relate the dry technical discussions to exciting practical applications ☺ The first seven chapters of this user's guide build on each other. The remaining chapters are rather independent and can be read in any order. [section Parser functions] An FParsec parser is a function that reads input from a text stream. When it succeeds, it returns a result value (e.g. a parsed number or an [url "https://en.wikipedia.org/wiki/Abstract_syntax_tree" AST] node); when it fails, it returns error messages describing what went wrong. The following type abbreviation from the `Primitives` module defines the basic type of parser function supported throughout the FParsec library: ``type Parser<'Result,'UserState> = CharStream<'UserState> -> Reply<'Result>`` As you can see from this definition, parser functions only accept a single argument: a `CharStream<'UserState>` instance. The `CharStream` class is FParsec's specialized stream type for "text" streams, i.e. streams of Unicode chars. A `CharStream` can either be created directly from a string or it can be created from a file path or `System.IO.Stream`. In the latter cases the `CharStream` will take care of decoding the binary input into UTF-16 chars, similar to what a `System.IO.StreamReader` does. What separates `CharStream` from the `StreamReader` and similar classes is that it comes with some advanced features that make it especially suitable for backtracking parser applications. We will discuss the purpose of the `'UserState` type in more detail in later chapters. For now it's enough to note that the user state is a user-definable component of the `CharStream` state. If you don't need a user state, you will normally define `'UserState` to be `unit`. To save some key strokes and screen real estate, we usually abbreviate `'UserState` as `'u`. The `Reply<'Result>` value returned from a parser function is a a simple value type container for the parser result and possible error messages. It contains a status field indicating whether the parser succeeded or not, a field for the result value (of type `'Result`) and a field with a possibly empty list of error messages. We will explain these fields in more details in [^internals-of-a-simple-parser-function]. A very basic example of a parser is the `asciiLower` parser from the `CharParsers` module: ``val asciiLower: Parser`` It parses any lower case ASCII char, i.e. any char in the range `'a'` - `'z'`, and, if successful, returns the parsed char as part of its reply. Many predefined parsers expect one or more parameter values as arguments. Take for instance the `skipString` function: ``val skipString: string -> Parser`` It takes a string as an argument and returns a parser that skips over this (and only this) string in the input. [note Implementing parser grammars with FParsec usually means composing parsers for higher-level grammar rules from parsers for lower-level rules. You start with simple parsers for the leaf nodes of your grammar and then work your way up step-by-step until you eventually obtain a parser for the complete grammar. The simple representation of parsers as functions makes this composition particularly easy and allows for a straightforward and intuitive implementation of the library primitives. ] [/section] [section Running parsers on input] While it is not difficult to construct a `CharStream` instance yourself, then apply a parser function to the `CharStream`, then interpret the returned `Reply` value and finally dispose the `CharStream` again, it takes less effort to instead use one of the several @`runParser...` functions@ from the `CharParsers` module. Among the `runParser...` functions `run` is the most convenient for simple testing purposes: ``val @run@: Parser<'a, unit> -> string -> ParserResult<'a,unit>`` `run` applies the parser given as the first argument to a `CharStream` constructed from the string argument and then captures the return value as `ParserResult` value. The `ParserResult` type is a simple discriminated union that is a bit more convenient to interpret than the `Reply` values returned by `Parser` functions. For example: ``{fsi} > run pint32 "0xff";; val it : ParserResult = Success: 255 > run pint32 "0xgf";; val it : ParserResult = Failure: Error in Ln: 1 Col: 3 0xgf ^ Expecting: hexadecimal digit `` The text messages displayed in these examples after the `=` signs are the default string representations of the returned `ParserResult` values, just like they are printed in the [@ F# Interactive] console. The reference documentation describes the two union cases `Success` and `Failure` of the `ParserResult` type in more detail. `run` only supports parser functions with no user state, i.e. with a `unit` user state. If you want to test parsers that depend on a user state, you will need to use one of the other `runParser...` functions, e.g. `runParserOnString`. Please see the reference for more details on the @`runParser...` functions@. Note that the `runParser...` functions are primarily meant for the "end-users" of parsers, i.e. those users that apply an aggregate parser on the content of a complete input stream. This is a situation different from the one where you implement a `Parser` function yourself. In the latter case you typically work directly with the input `CharStream` and output `Reply` values. [/section] [section Internals of a simple `Parser` function] In the beginning of this user's guide we noted that `asciiLower` "parses" a lower case ASCII char and that `skipString` "skips" over a string, but we haven't yet explained what it actually means for a `Parser` function to "parse" a letter or "skip" a string. That's what we will do in this chapter. To explain how `Parser` functions work, we will discuss the implementation of a simple string parser. This also gives us the opportunity to explain some important details about the `Reply` and `CharStream` types. [section The code] The parser whose implementation we will discuss in this chapter is `` val stringReturn: string -> 'a -> Parser<'a,'u> `` Like `skipString str` the parser `stringReturn str result` skips over the string `str`, but it returns `result` as part of its reply value, instead of the `unit` value `()` that `skipString str` returns. This makes `stringReturn` a bit more general than `skipString`. Indeed, the two library parsers `pstring` and `skipString` are actually implemented with the help of `stringReturn`. For example, `skipString str` is defined as `stringReturn str ()`. A simplified version[fn The library version is a bit more complicated because it contains optimized paths for argument strings with only 1 or 2 chars.] of the actual implementation of `stringReturn` in the library is `` let [no-auto-link stringReturn] str result : Parser<_,_> = // 1 checkStringContainsNoNewlineChar str "pstring/skipString/stringReturn" // 2 let error = expectedString str // 3 fun stream -> // 4 if stream.Skip(str) then // 5 Reply(result) // 6 else // 7 Reply(Error, error) // 8 `` Let's start with the general structure of this implementation: We define a function [no-auto-link `stringReturn`] with two parameters that returns a function closure. The type annotation `: Parser<_,_>` on /line 1/ fixes the type of the returned function closure to `Parser<'a,'u>` and in particular constrains the type of its argument to `CharStream<'u>`. Remember, the type `Parser<'a,'u>` is simply an abbreviation for `CharStream<'u> -> Reply<'a>`, where `'a` represents the result type and `'u` the user state type. Implementing our parameterized parser as a function returning a parser closure allows us to factor out common setup work that only needs to be done once for every parser.[fn Even parsers without a parameter, like e.g. `asciiLower`, are actually compiled as properties returning a new function object every time they are called. This is because the user state type variable makes `asciiLower` generic, while function values can only have a non-generic type.] In this case we only need to check once (/line 2/) whether the string contains a newline char, i.e. `'\r'` or `'\n'`, (we'll explain below why this is necessary) and in /line 3/ we preconstruct the error message that is later used whenever the parser is applied and doesn't find `str` in the input (we'll write more about error messages in later chapters). The actual parsing logic is completely straightforward: On /line 5/ the parser calls the CharStream's `Skip` method with the argument `str`. If the next chars in the stream match `str`, `Skip` advances the stream's position by the length of the passed string and returns `true`; otherwise, it doesn't change the position of the stream and returns `false`. Thus, if the string is skipped, the parser returns with a `Reply` value containing the result (/line 6/). Otherwise, it returns a `Reply` with the preconstructed error message (/line 8/). [/section] [section The `Reply` type] This is a good time to discuss the `Reply` type in a little more detail. `` type Reply<'TResult> = struct new: 'TResult -> Reply<'TResult> new: ReplyStatus * ErrorMessageList -> Reply<'TResult> new: ReplyStatus * 'TResult * ErrorMessageList -> Reply<'TResult> val mutable Status: ReplyStatus /// If Status <> Ok then the Result value is undefined and may be null. val mutable Result: 'TResult val mutable Error: ErrorMessageList end `` Similar to a tuple, a `Reply` can be seen as an aggregate of it three fields: `Status`, `Result` and [no-auto-link `Error`]. The `Status` field contains a `ReplyStatus` enum value indicating whether the parser succeeded (`Ok`) or failed (`Error` or `FatalError`). By returning a `FatalError` instead of an `Error` a parser can signal that no error recovery should be tried (except through backtracking mechanisms, which we explain later). If the `Status` is `Ok`, the `Result` field contains the parser result; otherwise, its value is undefined (and `null`). The `Error` field holds a list of error messages in the form of an `ErrorMessageList` value. An empty `ErrorMessageList` is represented as a `null` value. The 1-argument constructor we use in /line 6/ sets the status to `Ok` and the result value to `result`. The 2-argument constructor we use in /line 8/ sets the status to `Error` and the error message to `error`. The `Reply` type also defines a 3-argument constructor, which simply sets the fields to the respective argument values. The default valuetype constructor with 0 arguments initializes the `Reply` value to `Reply(Error, null)`. The error messages returned in the `Reply` value implicitly refer to the current stream position. Since the `ErrorMessage` values stored in the `ErrorMessageList` do not themselves contain an error position, they can only be interpreted together with the position of the `CharStream` as it is when the parser returns. [/section] [section The parser state and the line and column count] Usually one `CharStream<'u>` instance is created per input file and all parser functions involved in parsing elements of the same file are passed the same `CharStream` instance. Since calling the methods of a `CharStream` may change its state, parser functions have to be careful about when and how they change the `CharStream` state, because it obviously may affect all parsers subsequently called. In the example above, `stringReturn` only advances the stream position when it succeeds. This makes it an *atomic* string parser, because it does not consume input if only the beginning of the argument string matches the input. Whether or not a parser consumes input before it fails has important implications for the error handling, as we will discuss later in this user's guide. Except for the freely customizable `UserState`, all the mutable state information in the `CharStream<'u>` instance pertains to the location of the next char in the text stream. The most important element of the state is the char `Index`, which uniquely identifies the UTF-16 char in the stream. In addition to the index of the next char, the `CharStream` also keeps track of char's `Line` number and the index of the first char in the line, the `LineBegin`. By combining the `Index` and `LineBegin` we can calculate a `Column`. The `CharStream`'s `Name` serves as a description or identifier for the stream. Only the char index is strictly necessary for the core stream functionality. We also store the other pieces of state information in a `CharStream<'u>` instance because having all parser state information in one place reduces complexity and allows us to expose a more convenient API to `Parser` functions. [note The `CharStream<'u>.State` property returns a snapshot of all the mutable state components in the form of a `CharStreamState<'u>` value. The state information that is exposed through the `CharStream<'u>.State` property is *all* the state that is tracked by `FParsec` parsers, which is why we also refer to it as *the parser state*.[fn Strictly speaking, a `CharStream<'a>` instance has a little more publically observable mutable state than the one that is also exposed through the `State` property. For example, the `MinRegexSpace` configuration parameter is not tracked in the `State` parameter. Another example is the value of the `IndexOfLastCharPlus1` property which changes once the last char of the stream is detected. However, there shouldn't be a reason that a parser needs to restore the old values of these properties upon backtracking, so we just treat these properties as constant and ignore them when we discuss the mutable `CharStream` state.] ] Ideally, the `CharStream` class would keep track of the column and line count in a completely automated fashion. Ideally, the `CharStream` class would give the user a way to freely specify the recognized set of newline character sequences and all `CharStream` methods then would automatically detect such newlines in the input. Unfortunately, such a configuration option would be difficult to implement efficiently and would likely have a severe impact on performance (at least in comparison to the hard-coded alternative, and with the current language and compiler support). Since the `CharStream` can't provide automatic support for all possible notions of a newline, it exposes two sets of methods in its interface. One set provides the basic stream operations, such as skipping a certain number of UTF-16 chars or matching a string with the stream content. These methods come without any automatic newline detection, but they offer optimal performance and give the user complete freedom to manually register any kind of newline. The other set of methods provides some frequently needed higher-level text operations, such as skipping over a sequence of whitespace chars or reading a sequence of chars satisfying a given predicate function. These other methods automatically detect any of the 3 standard newline char sequences `"\n"`, `"\r\n"` and `"\r"`, because that's the notion of a newline used by most text applications. In combination both sets of methods cover the needs of a majority of text parsers in a convenient and efficient manner. [note Maybe you wonder why we don't just leave the line and column count completely to the user instead of complicating the `CharStream` API. The reason we keep track of a line count in the `CharStream` class is that most non-trivial text-parsing applications require a line count for error reporting purposes. Implementing it at a relatively low API level brings significant performance advantages and relieves higher-level API users from constantly having to code around the special case of newline chars.] If you have a look at the reference documentation for `CharStream`, you'll see that the `CharStream` methods that automatically detect newlines are easily discernible by their name. The `Skip` method we used in the above example does *not* belong to these methods, which is why we have to make sure in /line 2/ that the string doen't contain any newlines. In practice one hardly ever uses a parser like `stringReturn` with a string containing a newline, hence lifting this restriction wouldn't be worth the effort, especially since simple workarounds are available.[fn For example, `stringReturn "str1\nstr2" result` can be replaced with `attempt (skipString "str1" >>. newline >>. stringReturn "str2" result)`.] [/section] [/section] [section Applying parsers in sequence] Now that we have discussed how `Parser` functions work, we can start explaining how FParsec's parser combinators work. In this chapter we will discuss combinators that allow you to apply multiple parsers in sequence, i.e. parse the beginning of the input with the first parser, then parse the following input with the second parser, and so on. [section The definition of `>>.`] The simplest combinators for sequentially applying two parsers are `` val (>>.): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'b,'u> val (.>>): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a,'u> `` Both operators take two parsers as arguments and return a combined parser that applies the two parsers in sequence. As you can infer from the type signatures, `p1 >>. p2` returns the result of `p2` and `p1 .>> p2` the result of `p1`. In each case the point points to the parser whose result is returned. In order to explain exactly what it means to apply two parser in sequence, we give a full definition of the `>>.` operator: `` let [no-auto-link (>>.)] (p1: Parser<'a,'u>) (p2: Parser<'b,'u>) = fun stream -> let reply1 = p1 stream if reply1.Status = Ok then let stateTag = stream.StateTag let mutable reply2 = p2 stream if stateTag = stream.StateTag then // (1) reply2.[^RError Error] <- mergeErrors reply1.[^RError Error] reply2.[^RError Error] // (2) reply2 else // reconstruct error reply with new result type Reply(reply1.Status, reply1.[^RError Error]) `` The implementation of `p1 >>. p2` should be quite self-explanatory: First `p1` is applied to the input `stream`. If `p1` succeeds, i.e. if the status of `reply1` is `Ok`, `p2` is applied to stream and the reply of `p2` then becomes the reply of the combined parser. However, if `p1` fails, its reply is immediately propagated as the reply of the combined parser. Since `reply1` has type `Reply<'a,'u>` but `p1 >>. p2` needs to return a `Reply<'b,'u>`, the error reply needs to be reconstructed with a new result type before it can be returned. [/section] [section Merging error messages] We mentioned earlier that the error messages returned in the `Reply.[^RError Error]` field implicitly refer to the state of the `CharStream` at the time the parser returns. In particular the error messages refer to the then current *stream position*. Since the messages do not contain themselves a separate record of the error position they can only be interpreted together with the `CharStream` state. When `p2` does not change the parser state, the error messages from both replies refer to the state of the `CharStream` as it is when `p1 >>. p2` returns. Thus, the combinator needs to merge the (immutable) `ErrorMessageList`s from both replies, so that the returned list contains all the relevant error messages (see the line marked with `(2)`). In order to check whether the `CharStream` state has changed, the combinator does not compare the full states from before and after `p2` is invoked. Instead it only compares the `StateTag` values (see line `(1)`). This improves performance and --- for most practical purpose --- is almost equivalent to comparing the full state, as we will discuss below. [note The way `[no-auto-link (>>.)]` handles errors and merges error messages is a template for all combinators in FParsec that perform multiple sequential parser invocations.] You may wonder why the error messages get merged even though `p1` succeeded. The somewhat counterintuitive reason is that parsers can return nonempty error message lists even when they don't fail. For example, a parser that skips over the *optional* string `"str"` will return `Reply(Ok, (), expectedString "str")` if it doesn't find the string in the input. In this case the error message describes what further input the parser could have parsed at the current stream position. If subsequently a parser fails at the same position, all error messages for the same position can be aggregated to give the user as much information as possible about what went wrong and what alternative inputs could have been parsed at the given position. The following sample demonstrates the helpful effect of this error handling behaviour: `` let str s = pstring s let oneOrTwoInts = str "(" >>. tuple2 pint32 (opt (str "," >>. spaces >>. pint32)) .>> str ")" `` ``{fsi} > run oneOrTwoInts "(1 2)";; val it : ParserResult<(int32 * int32 option),unit> = Failure: Error in Ln: 1 Col: 3 (1 2) ^ Expecting: ')' or ',' `` This error message wouldn't mention the possibility of a missing comma if the `.>>` combinator did not merge error messages for the same position when the left-hand side parser succeeds. [/section] [section The `StateTag`] Parser combinators often need to check whether a parser has changed the `CharStream` state. In a typical FParsec application these checks are performed so frequently that an efficient implementation is important for the overall parser performance. Since a straightforward comparison of the complete `CharStream` states can be quite expensive, the `CharStream` class provides a shortcut for this purpose: the `StateTag`. The `StateTag` is a simple integer counter that is incremented every time a `CharStream` method changes the state. Thus, if the `StateTag` hasn't changed, you can safely infer that the state hasn't changed either.[fn Of course, this doesn't apply if you manually set back the `StateTag` to the old value. There is also the purely theoretical possibility that the `StateTag` has overflown and was incremented exactly 2[sup 64] times (or 2[sup 32] if you define the `SMALL_STATETAG` conditional compiler symbol).] Except for some special cases, the opposite is also true: if the `StateTag` has changed, the state has changed too. In the following special cases checking whether the `StateTag` has changed is not equivalent to checking whether the `CharStream` state has changed, because the tag may change even though the state doesn't: - A parser calls the basic `Skip` or `Read` methods with a 0 offset or an empty argument string. - A parser seeks the `CharStream` to the current position or replaces the user state with the current value. - A parser makes several calls to `CharStream` methods and in later calls undoes the changes it made in earlier calls. The first and second cases only have practical relevance for generic or parameterized parsers and can be simply avoided by checking the arguments before calling the respective `CharStream` methods. The third case only arises in the context of backtracking and it too can be easily avoided, either by using the `BacktrackTo` method for backtracking or by manually restoring the `StateTag` after the backtracking. In practice these special cases are extremely rare, usually without consequences for the parser behaviour and always easily avoidable. Hence, FParsec combinators make free use of the `StateTag` to check whether a parser has changed the `CharStream` state. [/section] [section Generalizing `>>.`] The parsers `p1 .>> p2` and `p1 >>. p2` only return the results of `p1` and `p2` respectively. If you want to combine the results from both `p1` and `p2`, you could use the `pipe2` combinator instead: ``val pipe2: Parser<'a,'u> -> Parser<'b,'u> -> ('a -> 'b -> 'c) -> Parser<'c,'u>`` The parser `pipe2 p1 p2 f` will apply `p1` and `p2` in sequence, exactly like `>>.`, but instead of returning one of the result values of `p1` and `p2` it will return the result of the function application `f x1 x2`, where `x1` and `x2` are the results returned by `p1` and `p2`. There are also `pipe3`, `pipe4` and `pipe5` combinators, in case you need more than two arguments. Often these combinators are used to pass arguments to object constructors, like in the following example of a parser for a comma-separated list of XYZ coordinates: `` type Data = Point of float*float*float let ws = spaces let str s = pstring s .>> ws let number = pfloat .>> ws let point = pipe3 number (str "," >>. number) (str "," >>. number) (fun x y z -> Point(x, y, z)) `` ``{fsi} > run point "1, 2, 3";; val it : ParserResult = Success: Point (1.0,2.0,3.0) `` If you just want to return the parsed values as a tuple, you can use the predefined `tuple2-5` parsers. For example, `tuple2 p1 p2` is equivalent to `pipe2 p1 p2 (fun x1 x2 -> (x1, x2)`. `tuple2` is also available under the operator name `.>>.`, so that you can write `p1 .>>. p2` instead of `tuple2 p1 p2`. There is no `pipe1` combinator, but there is an operator for the same purpose: `` val (|>>): Parser<'a,'u> -> ('a -> 'b) -> Parser<'b, 'u> `` This operator is used similarly to the F#'s ubiquitous pipeline operator `|>`: `` type Expression = Number of int | Identifier of string let number = pint32 |>> Number `` ``{fsi} > run number "123";; val it : ParserResult = Success: Number 123 `` [/section] [section The `>>=` combinator] All the sequencing and piping combinators we have discussed so far could be implemented with the help of the "bind" combinator: ``val (>>=): Parser<'a,'u> -> ('a -> Parser<'b,'u>) -> Parser<'b,'u>`` Instead of two parsers this combinator takes a parser and a *function producing a parser* as arguments. The combined parser `p >>= f` first applies the parser `p` to the input, then it applies the function `f` to the result returned by `p` and finally it applies the parser returned by `f` to the input. If we knew in advance that `p` returns `x` then `p >>= f` would be equivalent to `p >>. (f x)`. The `>>=` combinator is quite versatile. For example, the following code implements five of the previously discussed combinators in terms of `>>=` and the trivial `preturn` primitive: `` let preturn x = fun stream -> Reply(x) let (|>>) p f = p >>= fun x -> preturn (f x) let (.>>) p1 p2 = p1 >>= fun x -> p2 >>= fun _ -> preturn x let (>>.) p1 p2 = p1 >>= fun _ -> p2 >>= fun y -> preturn y let (.>>.) p1 p2 = p1 >>= fun x -> p2 >>= fun y -> preturn (x, y) let pipe2 p1 p2 f = p1 >>= fun x -> p2 >>= fun y -> preturn (f x y) `` In typical FParsec code `>>=` is only seldomly used, because in many situations where `>>=` could in principle be used one of the other specialized operators is more convenient to use and faster. However, on a conceptual level this combinator is important, because its generality allows us to define and test many combinators through their equivalence with a parser defined in terms of `>>=`. This combinator is also significant for the role it plays in the monadic parser construction syntax, see [^where-is-the-monad]. [/section] [/section] [section Parsing sequences] In the previous chapter we discussed various ways to sequentially apply two or more parsers. In this section we will explain how to repeatedly apply the same parser in order to parse a sequence with an arbitrary number of elements. [section The `many` parser] In regular expressions and many grammar formalisms a [url "https://en.wikipedia.org/wiki/Kleene_star" Kleene Star] marks a parser rule that can be repeatedly applied. For example, `number*` could represent a sequence of zero or more numbers. In FParsec the `many` combinator takes the place of the Kleene Star: ``val many: Parser<'a,'u> -> Parser<'a list, 'u>`` With `many` the number example could be translated into the following FParsec code: `` let ws = spaces let number = pint32 .>> ws `` ``{fsi} > run (many number) "1 2 3 4";; val it : ParserResult = Success: [1; 2; 3; 4] `` The parser `many p` repeatedly applies the parser `p` until `p` fails, i.e. it "greedily" parses as many occurrences of `p` as possible. The results of `p` are returned as a list in the order of occurrence. At the end of a sequence parsed with `many p` the argument parser `p` must fail without consuming input (or changing the parser state in any other way). When `p` fails after consuming input, `many p` fails with the error returned by `p`. The following example illustrates this behaviour: `` let ws = spaces let str s = pstring s let numberInBrackets = str "[" >>. pint32 .>> str "]" .>> ws `` The `many numberInBrackets` parser successfully parses the first two numbers in this test run: ``{fsi} > run (many numberInBrackets .>> str "(c)") "[1] [2] (c)";; val it : ParserResult = Success: [1; 2] `` However, the same parser fails while trying to parse the 3rd number in this test run: ``{fsi} > run (many numberInBrackets >>. str "[c]") "[1] [2] [c]";; val it : ParserResult = Failure: Error in Ln: 1 Col: 10 [1] [2] [c] ^ Expecting: integer number (32-bit, signed) `` The `many` parser failed here because the `numberInBrackets` parser failed *after consuming input*. In the chapter on @looking ahead and backtracking@ we'll come back to this example and discuss how you can modify the `numberInBrackets` parser such that it fails without consuming input if an opening bracket is not followed by a number.[fn `many` doesn't automatically backtrack when the argument parser fails after changing the parser state for two reasons: - In most situations automatic backtracking would only obscure error messages, because the reported input error was indeed severe and backtracking would only trigger secondary error messages that detract from the main error. - In the few instances where you rely on backtracking behaviour you can easily introduce it using the combinators detailed in [^looking-ahead-and-backtracking]. Marking the occasions where you rely on backtracking with these combinators makes your parser implementations easier to debug and optimize. ] Since `many p` continues until `p` fails, you have to be a little careful not to supply an argument parser `p` that can succeed without consuming input. The following example shows what happens if you accidentally supply such an argument parser: ``{fsi} > run (many (many digit .>> ws)) "123 456";; System.InvalidOperationException: (Ln: 1, Col: 8): The combinator 'many' was applied to a parser that succeeds without consuming input and without changing the parser state in any other way. (If no exception had been raised, the combinator likely would have entered an infinite loop.) (... stack trace ...) Stopped due to error `` The problem here is that `many digit .>> ws` will succeed without changing the parser state if it can't parse any digits or trailing whitespace. Thus, if the combined parser hadn't have thrown an exception, it would have entered an infinite loop at the end of the input. We can easily avoid the error in the last example by requiring the inner parser to consume at least one digit. Instead of `many digit`, which succeeds with an empty list if can't parse any digits, we can use `many1 digit`, which fails if it can't parse at least one digit: ``{fsi} > run (many (many1 digit .>> ws)) "123 456";; val it : ParserResult = Success: [['1'; '2'; '3']; ['4'; '5'; '6']] `` Before we continue, we should point out that an example like `many1 digit` is somewhat artificial, because you hardly ever want to parse digit chars into a list. If you want to parse numbers, one of the [^parsing-numbers number parsers] is usually the best way forward. If you actually need the individual chars, you normally need them as a string, not as a list. [tip If you want to parse a sequence of chars, you should generally prefer one of the specialized [^parsing-strings-directly string parsers]. ] If you just want to skip over a sequence and don't need the list of parser results, you can use the optimized combinators `skipMany` or `skipMany1`. [/section] [section `sepBy` and `sepEndBy`] Often the elements of a sequence are separated by some separator. A convenient way to parse such sequences are the `sepBy` and `sepEndBy` combinators. `sepBy p sep` parses a sequence of `p` separated by `sep` and returns the results in a list. `sepEndBy` parses a sequence of `p` separated *and optionally ended* by `sep`. With these combinators you could for example define the following two parsers for a semicolon-separated list of numbers in brackets: `` let str s = pstring s let sepList = between (str "[") (str "]") (sepBy pint32 (str ";")) let sepEndList = between (str "[") (str "]") (sepEndBy pint32 (str ";")) `` The `sepList` parser only accepts lists where the semicolons only occur between two numbers: ``{fsi} > run sepList "[]";; val it : ParserResult = Success: [] > run sepList "[1;2;3]";; val it : ParserResult = Success: [1; 2; 3] > run sepList "[1;2;3;]";; val it : ParserResult = Failure: Error in Ln: 1 Col: 8 [1;2;3;] ^ Expecting: integer number (32-bit, signed) `` The `sepEndList` parser also accepts a terminating semicolon: ``{fsi} > run sepEndList "[1;2;3]";; val it : ParserResult = Success: [1; 2; 3] > run sepEndList "[1;2;3;]";; val it : ParserResult = Success: [1; 2; 3] `` Like for the `many` combinator, there are also variants of the `sepBy` and `sepEndBy` parsers that require at least one element in the sequence and/or skip over a sequence without returning the results. Have a look at the [^parsing-sequences parser overview]. [/section] [section Parsing a sequence without creating a list] If you want to parse a sequence and you don't need the results as an F# list, you can avoid the allocation of a temporary list by defing a custom sequence parser using the inline helper methods `Inline.Many` and `Inline.SepBy`. For example, if you wanted to define a variant of `many` that parses the elements directly into a `ResizeArray`, i.e. a `System.Collections.Generic.List`, you could use the following definition: `` let manyRA p = // the compiler expands the call to Inline.Many to an optimized sequence parser Inline.Many(elementParser = p, stateFromFirstElement = (fun x0 -> let ra = ResizeArray<_>() ra.Add(x0) ra), foldState = (fun ra x -> ra.Add(x); ra), resultFromState = (fun ra -> ra), resultForEmptySequence = (fun () -> ResizeArray<_>())) `` A test run: ``{fsi} > run (manyRA (pint32 .>> spaces)) "1 2 3";; val it : ParserResult,unit> = Success: seq [1; 2; 3] `` The reference documentation for the `Inline` class contains some more examples. [/section] [/section] [section Parsing alternatives] FParsec's main operator for trying to parse input with alternative parsers is `` val (<|>): Parser<'a,'u> -> Parser<'a,'u> -> Parser<'a,'u> `` This operator implements a form of *prioritized choice*: it only tries to parse input with the second parser if the first parser fails. The following example illustrates this behaviour: `` type Char = AsciiChar of char | Char of char let asciiLetter = asciiLetter |>> AsciiChar let letter = letter |>> Char `` ``{fsi} > run (asciiLetter <|> letter) "a";; val it : ParserResult = Success: AsciiChar 'a' > run (letter <|> asciiLetter) "a";; val it : ParserResult = Success: Char 'a' > run (asciiLetter <|> letter) "ä";; val it : ParserResult = Success: Char 'ä' `` The prioritized choice also implies that FParsec doesn't enforce a longest-match rule like in regular expressions: ``{fsi} > run (pstring "a" <|> pstring "ab") "ab";; val it : ParserResult = Success: "a" `` If you want to accept more than two alternatives, you can either chain multiple `<|>` operators, like in `p1 <|> p2 <|> p3`, or you can use the `choice` combinator, which accepts a sequence of parsers as the argument, like in `choice [p1; p2; p3]`. In both cases the argument parsers are tried from left to right until a parser succeeds. A good understanding of the `<|>` operator is important for productively working with FParsec, so let's have a look at its implementation: `` let (<|>) (p1: Parser<'a,'u>) (p2: Parser<'a,'u>) : Parser<'a,'u> = fun stream -> let stateTag = stream.StateTag let mutable reply = p1 stream if reply.Status = Error && stateTag = stream.StateTag then let error1 = reply.[^RError Error] reply <- p2 stream if stateTag = stream.StateTag then reply.[^RError Error] <- mergeErrors reply.[^RError Error] error1 reply `` As you can see, the parser `p1 <|> p2` works as follows: First, it applies the parser `p1` to the input stream. If `p1` succeeds, the reply of `p1` is returned. If `p1` fails with a non-fatal error (i.e. with the status `Error`, not `FatalError`) and *without changing the parser state*, the parser `p2` is applied. If `p2` does not change the parser state, the error messages from both parsers are merged. (We compare the `StateTag` values instead of the actual parser states for optimization reasons, see [^applying-parsers-in-sequence.the-statetag].) The most important point to note here is that `p1 <|> p2` will always return with the reply of `p1` if `p1` changes the parser state, even if `p1` eventually fails. Remember that the stream position is part of the parser state, so if `p1` fails after consuming input, `p2` will not be applied. Since a parser usually consumes input as soon as it can accept at least one atomic token from the input, this means that `p1 <|> p2` by default implements backtracking with only a "one token look-ahead". Consider the following example: `` let parserA = spaces >>. pstring "a" let parserB = spaces >>. pstring "b" run (parserA <|> parserB) " b";; `` ``{fsi} > run (parserA <|> parserB) " b";; val it : ParserResult = Failure: Error in Ln: 1 Col: 2 b ^ Expecting: 'a' `` The combined parser fails because `parserA` fails after consuming the whitespace, so that `parserB` never gets tried. Of course, this simple parser could be easily fixed by factoring out the common prefix: ``{fsi} > run (spaces >>. (pstring "a" <|> pstring "b")) " b";; val it : ParserResult = Success: "b" `` The restriction of the look-ahead in `p1 <|> p2` may strike you as odd at first, but it has two big advantages: 1) The error reporting is simplified and error messages are easier to understand because terminal errors can only occur at one position at a time. 2) Parser developers are guided towards more efficient grammar implementations because parsers requiring more than a one token look-ahead need to be explicitly annotated with the `attempt` or `>>?` combinators (see the [^looking-ahead-and-backtracking next chapter]).[fn In case you're wondering: No, we're not trying to sell a design limitation as a feature here. In Parsec, the Haskell library on which FParsec's design was originally based, the limited look-ahead is essential for the library design, because it allows Parsec to exploit Haskell's laziness in order to ensure space efficiency. FParsec has a different implementation in which the limited look-ahead has [^block-wise no effect on space efficiency]. We stick to the limited look-ahead because we think it's the appropriate default behaviour for a parser combinator library like FParsec. Now, admittedly, if FParsec could automatically optimize the implementation of a parser in a way that minimized backtracking, e.g. by automatically left-factoring grammars, then backtracking would be less of a problem and a different default behaviour might become more attractive.] [/section] [section Looking ahead and backtracking] [section Backtracking] Sometimes you need more than the default one token look-ahead of `<|>`, either because it really can't be avoided or because avoiding it would be too inconvenient. In those instances you can use one of the combinators `attempt`, `>>?`, `.>>?` or `>>=?` to force a parser to backtrack after an error. The `attempt` combinator `` val attempt: Parser<'a,'u> -> Parser<'a,'u> `` takes a parser as the argument and returns a wrapped parser that behaves exactly like the argument, except that if the argument parser fails with an output state different from the input state or with a fatal error, the wrapped parser will backtrack to the original input state and report a non-fatal error. You can observe the effect of the `attempt` combinator in the following error message: ``{fsi} > run (attempt (pstring "a" >>. pstring "b")) "ac";; val it : ParserResult = Failure: Error in Ln: 1 Col: 1 ac ^ The parser backtracked after: Error in Ln: 1 Col: 2 ac ^ Expecting: 'b' `` The next example demonstrates the effect of `attempt` on the choice combinator. `` let str s = pstring s let ab = str "a" .>>. str "b" let ac = str "a" .>>. str "c" `` Without `attempt` the following test produces an error: ``{fsi} > run (ab <|> ac) "ac";; val it : ParserResult<(string * string),unit> = Failure: Error in Ln: 1 Col: 2 ac ^ Expecting: 'b' `` By introducing `attempt` we allow the `<|>` combinator to recover from the error in the first branch: ``{fsi} > run ((attempt ab) <|> ac) "ac";; val it : ParserResult<(string * string),unit> = Success: ("a", "c") `` Sometimes it can be a disadvantage that `attempt` will trigger backtracking after any error returned by the argument parser, no matter how much content the parser has consumed. Consider for example a parser like `prefix >>. expr`, where `expr` is a parser for a potentially large and deeply nested expression. If you wrap this parser with `attempt` then the wrapped parser will not only backtrack if an error occurs within the prefix or directly after the prefix, but also if it occurs anywhere in the expression. However, in most cases you only want the parser to backtrack if the error occurs directly after the prefix, not if the error occurs deeply inside the expression parser. For situations like this FParsec defines the `>>?`, `.>>?`, `.>>.?` and `>>=?` operators. The `>>?` combinator `` val (>>?): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'b,'u> `` behaves like the `>>.` operator, except that `p1 >>? p2` will backtrack to the beginning if `p2` fails with a non-fatal error and without changing the parser state, even if `p1` has changed the parser state. Similarly, `.>>?`, `.>>.?` and `>>=?` behave like `.>>`, `.>>.` and `>>=`, except that they will backtrack to the beginning if the second parser fails with a non-fatal error and without changing the parser state The following tests illustrate the differences between backtracking implemented via `attempt` and `.>>.?`. `` let bInBrackets = str "[" >>. str "b" .>> str "]" `` A test with `attempt` on the left side of `<|>`: ``{fsi} > run ((attempt (str "a" .>>. bInBrackets)) <|> ac) "a[B]";; val it : ParserResult<(string * string),unit> = Failure: Error in Ln: 1 Col: 2 a[B] ^ Expecting: 'c' `` A test with `attempt` on both sides of `<|>`: ``{fsi} > run ((attempt (str "a" .>>. bInBrackets)) <|> attempt ac) "a[B]";; val it : ParserResult<(string * string),unit> = Failure: Error in Ln: 1 Col: 1 a[B] ^ The parser backtracked after: Error in Ln: 1 Col: 2 a[B] ^ Expecting: 'c' The parser backtracked after: Error in Ln: 1 Col: 3 a[B] ^ Expecting: 'b' `` A test with `.>>.?` instead of `attempt` on the left side of `<|>`: ``{fsi} > run (str "a" .>>.? bInBrackets <|> ac) "a[B]";; val it : ParserResult<(string * string),unit> = Failure: Error in Ln: 1 Col: 3 a[B] ^ Expecting: 'b' `` You can of course chain multiple of the `>>?` and `.>>?` operators to backtrack longer distances, like in `prefix1 >>? prefix2 >>? p .>>? postfix`. [note When implementing backtracking parsers you should generally prefer the `>>?`, `.>>?` and `.>>.?` combinators to the `attempt` combinator, because the former combinators offer finer control over the exact backtracking behaviour and hence will often lead to better error reporting. Note however that neither can completely replace the other. ] Backtracking combinators can also be useful when parsing sequences. In the chapter "Parsing sequences" we briefly discussed the following example: `` let ws = spaces let str s = pstring s let numberInBrackets = str "[" >>. pint32 .>> str "]" .>> ws `` ``{fsi} > run (many numberInBrackets >>. str "[c]") "[1] [2] [c]";; val it : ParserResult = Failure: Error in Ln: 1 Col: 10 [1] [2] [c] ^ Expecting: integer number (32-bit, signed) `` The problem here is that the argument parser to `many` fails after consuming input if it encounters a bracket that is not followed by a digit. If we decided that this is a defect of the parser as opposed to the grammar, we could fix it by simply replacing a `>>.` with `>>?`. `` let numberInBrackets = str "[" >>? pint32 .>> str "]" .>> ws `` ``{fsi} > run (many numberInBrackets .>> str "[c]") "[1] [2] [c]";; val it : ParserResult = Success: [1; 2] `` A similar example is the `sepEndBy1` combinator for parsing a sequence of one or more elements separated and optionally ended by a separator. If FParsec didn't provide this combinator, you could define it yourself using `many` and `>>?`: `` let sepEndBy1_ p sep = pipe2 p (many (sep >>? p)) (fun hd tl -> hd::tl) .>> opt sep `` The following tests show that our `sepEndBy1` replacement works as expected: ``{fsi} > run (sepEndBy1_ pint32 (str ";")) "1;2;3";; val it : ParserResult = Success: [1; 2; 3] > run (sepEndBy1_ pint32 (str ";")) "1;2;3;";; val it : ParserResult = Success: [1; 2; 3] `` Note however that in contrast to `sepEndBy1_` the version of `sepEndBy1` provided by FParsec doesn't need to parse the separator twice when it terminates a sequence. [/section] [section Parser predicates] The backtracking combinators allow you to "look ahead" by tentatively parsing input and then backtracking if an error occurs. However, they don't allow you to conditionally parse the input with one parser depending on the success or failure of another parser. This is what the following two combinators are for: `` val followedBy: Parser<'a,'u> -> Parser val notFollowedBy: Parser<'a,'u> -> Parser `` The parser `followedBy p` (`notFollowedBy p`) succeeds *without changing the parser state* if `p` succeeds (fails) when applied at the current position. For example, both the following parser definitions only parse positive integer literals without a leading zero: `` let p1 = followedBy (satisfy ((<>) '0')) >>. pint32 let p2 = notFollowedBy (pstring "0") >>. pint32 `` Both definitions will correctly parse `"123"` and fail to parse `"01"`: ``{fsi} > run p1 "123";; val it : ParserResult = Success: 123 > run p1 "01";; val it : ParserResult = Failure: Error in Ln: 1 Col: 1 01 ^ Unknown Error(s) > run p2 "123";; val it : ParserResult = Success: 123 > run p2 "01";; val it : ParserResult = Failure: Error in Ln: 1 Col: 1 01 ^ Unknown Error(s) `` While both parsers work as expected, the generated error messages aren't very helpful. The problem is that `followedBy` and `notFollowedBy` can't generate better error messages, because they don't know what kind of input their argument parsers accept.[fn In the case of `notFollowedBy p` the problem is clear: `notFollowedBy p` fails if `p` succeeds and when `p` succeeds, `p` doesn't generate an error message that `notFollowedBy` could reuse. In the case of `followedBy p` the situation is different: `followedBy p` fails if `p` fails, so `followedBy` could try to reuse the error messages generated by `p`. However, the error messages generated by the argument parser will in practice often not suffice to explain what kind of input is expected. So, for reasons of consistency and performance, `followedBy` doesn't even try to reuse the error messages generated by the argument parser.] To improve the error messages you can either use the "labeled" combinator variants `followedByL` and `notFollowedByL` or you could use the labelling operator `` that we will discuss in the next chapter. For example: `` > run (followedByL (satisfy ((<>) '0')) "positive int w/o leading 0" >>. pint32) "01";; val it : ParserResult = Failure: Error in Ln: 1 Col: 1 01 ^ Expecting: positive int w/o leading 0 > run (followedBy (satisfy ((<>) '0')) >>. pint32 "positive int w/o leading 0") "01";; val it : ParserResult = Failure: Error in Ln: 1 Col: 1 01 ^ Expecting: positive int w/o leading 0 > run (notFollowedByL (pstring "0") "'0'" >>. pint32) "01";; val it : ParserResult = Failure: Error in Ln: 1 Col: 1 01 ^ Unexpected: '0' `` The parser `notFollowedByL (pstring "0") "'0'"` from the last example could actually be simplified to `notFollowedByString "0"`, which uses the specialized parser predicate `notFollowedByString`. In [^reference.parser-overview.conditional-parsing-and-looking-ahead] you'll find an overview of all available parser predicates. A frequent application for the `notFollowedBy` predicate are sequence parsers similar to `many (notFollowedBy pEnd >>. p) .>> pEnd`. If you are writing such a parser, you should check whether you can replace it with an application of one of the [^manyTill-parsers `manyTill` parsers]. Please consult the reference for more details. Before we conclude this chapter we want to emphasize that you're not limited to the built-in (backtracking) combinators of FParsec. A great advantage of FParsec is the simplicity with which you can write custom combinators using the low-level API. For example, you could define a combinator that backtracks if the result of the argument parser doesn't satisfy a predicate function: `` let resultSatisfies predicate msg (p: Parser<_,_>) : Parser<_,_> = let error = messageError msg fun stream -> let state = stream.State let reply = p stream if reply.Status <> Ok || predicate reply.Result then reply else stream.BacktrackTo(state) // backtrack to beginning Reply(Error, error) `` With this combinator you could conveniently define a parser for positive ints: `` let positiveInt = pint32 |> resultSatisfies (fun x -> x > 0) "The integer must be positive." `` ``{fsi} > run positiveInt "1";; val it : ParserResult = Success: 1 > run positiveInt "-1";; Error in Ln: 1 Col: 1 -1 ^ The integer must be positive. `` [/section] [/section] [section Customizing error messages] Generating relevant and informative parser error messages is one of FParsec's greatest strengths. The top-down approach of recursive-descent parsing guarantees that there is always enough context to describe the exact cause of a parser error and how it could be avoided. FParsec exploits this context to automatically generate descriptive error messages whenever possible. This chapter explains how you can ensure with minimal efforts that your parser always produces understandable error messages. As we already described in detail in [^applying-parsers-in-sequence.merging-error-messages], error reporting in FParsec is based on the following two principles: - Parsers that fail or could have consumed more input return as part of their `Reply` an `ErrorMessageList` describing the input they expected or the reason they failed. - Parser combinators aggregate all error messages that apply to the same input position and then propagate these error messages as appropriate. The various error messages in the previous chapters demonstrate that the built-in error reporting usually works quite well even without any intervention by the parser author. However, sometimes FParsec lacks the information necessary to produce an informative error message by itself. Consider for example the `many1Satisfy f` parser, which parses a string consisting of one or more chars satisfying the predicate function `f`. If this parser fails to parse at least one char, the generated error is not very helpful: ``{fsi} > run (many1Satisfy isLetter) "123";; val it : ParserResult = Failure: Error in Ln: 1 Col: 1 123 ^ Unknown Error(s) `` The problem here is that `many1Satisfy` can't describe what chars the function predicate accepts. Hence, when you don't use `many1Satisfy` as part of a combined parser that takes care of a potential error, you better replace it with `many1SatisfyL`, which allows you to describe the accepted input with a label (hence the "L"): ``{fsi} > run (many1SatisfyL isLetter "identifier") "123";; val it : ParserResult = Failure: Error in Ln: 1 Col: 1 123 ^ Expecting: identifier `` There are also labelled variants of other parsers and combinators, for example `choiceL` and `notFollowedByL`. If there is no labelled parser variant or you want to replace a predefined error message, you can always use the labelling operator `` val (): Parser<'a,'u> -> string -> Parser<'a,'u> `` The parser `p label` behaves like `p`, except that the error messages are replaced with `expectedError label` if `p` does not change the parser state (usually because `p` failed). For example, if FParsec didn't provide `many1SatisfyL`, you could define it yourself as `` let many1SatisfyL f label = many1Satisfy f label `` The labelling operator is particularly useful for producing error messages in terms of higher-level grammar productions instead of error messages in terms of lower-level component parsers. Suppose you want to parse a string literal with the following parser `` let literal_ = between (pstring "\"") (pstring "\"") (manySatisfy ((<>) '"')) `` If this parser encounters input that doesn't start with a double quote it will fail with the error message produced by the parser for the opening quote: ``{fsi} > run literal_ "123";; val it : ParserResult = Failure: Error in Ln: 1 Col: 1 123 ^ Expecting: '"' `` In situations like these an error message that mentions the aggregate thing you're trying to parse will often be more helpful: `` let literal = literal_ "string literal in double quotes" `` ``{fsi} > run literal "123";; val it : ParserResult = Failure: Error in Ln: 1 Col: 1 123 ^ Expecting: string literal in double quotes `` Note that `` only replaces the error message if the parser doesn't consume input. For example, our `literal` parser won't mention that we're trying to parse a string literal if it fails after the initial double quote: ``{fsi} > run literal "\"abc def";; val it : ParserResult = Failure: Error in Ln: 1 Col: 9 "abc def ^ Note: The error occurred at the end of the input stream. Expecting: '"' `` With the compound labelling operator `` you can make sure that the compound gets mentioned even if the parser fails after consuming input: `` let literal = literal_ "string literal in double quotes" `` ``{fsi} > run literal "\"abc def";; val it : ParserResult = Failure: Error in Ln: 1 Col: 1 "abc def ^ Expecting: string literal in double quotes string literal in double quotes could not be parsed because: Error in Ln: 1 Col: 9 "abc def ^ Note: The error occurred at the end of the input stream. Expecting: '"' `` [tip If you don't like the formatting of these error messages, you can write a custom formatter for your application. The data structure in which error messages are stored is easy to query and process. See the reference for the [^FParsec..Error `Error` module]. ] The parsers we discussed so far in this chapter only generated `Expected` error messages, but FParsec also supports other type of error messages. For example, the `notFollowedByL` parser generates `Unexpected` error messages: ``{fsi} > run (notFollowedByL spaces "whitespace") " ";; val it : ParserResult = Failure: Error in Ln: 1 Col: 1 ^ Unexpected: whitespace `` Error messages that don't fit into the `Expected` and `Unexpected` categories can be produced with the `fail` and `failFatally` primitives: `` let theory = charsTillString "3) " true System.Int32.MaxValue >>. (pstring "profit" <|> fail "So much about that theory ... ;-)") let practice = "1) Write open source library 2) ??? 3) lot's of unpaid work" `` ``{fsi} > run theory practice;; val it : ParserResult = Failure: Error in Ln: 1 Col: 40 1) Write open source library 2) ??? 3) lot's of unpaid work ^ Expecting: 'profit' Other error messages: So much about that theory... ;-) `` If you can't get the built-in operators and parsers to produce the error message you need, you can always drop down one API level and write a special-purpose parser combinator. The following example shows how you can define a custom `between` combinator that includes the position of the opening delimiter as part of the error message that gets generated when the closing delimiter cannot be parsed. `` let betweenL (popen: Parser<_,_>) (pclose: Parser<_,_>) (p: Parser<_,_>) label = let expectedLabel = expected label let notClosedError (pos: Position) = messageError (sprintf "The %s opened at %s was not closed." label (pos.ToString())) fun (stream: CharStream<_>) -> // The following code might look a bit complicated, but that's mainly // because we manually apply three parsers in sequence and have to merge // the errors when they refer to the same parser state. let state0 = stream.State let reply1 = popen stream if reply1.Status = Ok then let stateTag1 = stream.StateTag let reply2 = p stream let error2 = if stateTag1 <> stream.StateTag then reply2.[^RError Error] else mergeErrors reply1.[^RError Error] reply2.[^RError Error] if reply2.Status = Ok then let stateTag2 = stream.StateTag let reply3 = pclose stream let error3 = if stateTag2 <> stream.StateTag then reply3.[^RError Error] else mergeErrors error2 reply3.[^RError Error] if reply3.Status = Ok then Reply(Ok, reply2.Result, error3) else Reply(reply3.Status, mergeErrors error3 (notClosedError (state0.GetPosition(stream)))) else Reply(reply2.Status, reply2.[^RError Error]) else let error = if state0.Tag <> stream.StateTag then reply1.[^RError Error] else expectedLabel Reply(reply1.Status, error) `` The behaviour of the `betweenL` combinator differs from that of the standard `between` combinator in two ways: - If `popen` fails without changing the parser state, `betweenL popen p pclose label` fails with `expected label`, just like `between popen p pclose label` would have. - If `pclose` fails without changing the parser state, `betweenL` additionally prints the opening position of the compound. The following tests demonstrate this behaviour: `` let stringLiteral = betweenL (str "\"") (str "\"") (manySatisfy ((<>) '"')) "string literal in double quotes" `` ``{fsi} > run stringLiteral "\"test\"";; val it : ParserResult = Success: "test" > run stringLiteral "\"test";; val it : ParserResult = Failure: Error in Ln: 1 Col: 6 "test ^ Note: The error occurred at the end of the input stream. Expecting: '"' Other messages: The string literal in double quotes opened at (Ln: 1, Col: 1) was not closed. > run stringLiteral "test";; val it : ParserResult = Failure: Error in Ln: 1 Col: 1 test ^ Expecting: string literal in double quotes `` [/section] [section Parsing with user state] Each `[^CharStream_1 CharStream<'u>\ ]` holds a value of the freely definable user state type `'u`. In previous chapters we just ignored the user state and always assumed `'u` to be `unit`. In this section we finally get to discuss the purpose of the user state and how you can use it in your parsers. [section Overview] The user state allows you to introduce additional variables into the state tracked by FParsec parsers. It has the following two important properties: - The user state is stored in the `[^CharStream_1 CharStream<'u>\ ]` instance and hence associated with the input. It is not shared globally and not associated with particular parser instances. The same parser instances can be concurrently applied to different `[^CharStream_1 CharStream<'u>\ ]` instances with different user state instances. - The user state is tracked by FParsec parsers together with the input stream position. This means in particular that a parser restores the previous user state value when it backtracks. [important If you want changes to the user state to be undone during backtracking, you must change the user state by assigning a new value to the user state, not by mutating an existing user state value. ] With the help of the user state you can implement context sensitive parsers, i.e. parsers whose behaviour not only depends on the immediate input but also on the context of the input. In general this works as follows: # You establish a context by defining variables in the user state. # You update the context depending on the input by letting parsers update the user state. # You parse input depending on the context by making the parser behaviour dependent on the user state variables. The user state is exposed through the `UserState` property of the `[^CharStream_1 CharStream<'u>\ ]`. You can implement parsers using the low-level API that directly access this property, or you can use the following parser primitives from the `CharParsers` module: - `getUserState`, - `setUserState`, - `updateUserState`, - `userStateSatisfies`. The next section contains an example employing `updateUserState` to change the user state and `userStateSatisfies` to check for parser preconditions. [/section] [section Recursive grammars with nesting restrictions] An important area of application for context sensitive parsers are recursive grammars where certain grammar elements cannot nest within others or where grammar elements need to be parsed differently depending on the nesting context. Consider for example a textual markup languages like HTML. Many such markup languages support various "inline tags" to annotate text in a paragraph. Usually these inline tags can nest arbitrarily, except for a few tags with special restrictions. One of these restrictions often is that hyperlinks must not contain hyperlinks, even though they can contain any other inline content. Other restrictions may apply to elements allowed in superscript text or footnotes. A convenient way to enforce such restrictions during parsing is to introduce variables into the user state that keep track of the nesting context. The following example demonstrates this approach.[fn An alternative way to handle such restrictions at the parser level would be to define separate instances of the parser for each possible combination of restrictions, e.g. separate parsers for inline elements at the top level, for inline elements within hyperlinks, for elements within hyperlinks within superscript text and so on. However, with an increasing number of restrictions this approach quickly falls victim to the combinatorial explosion caused by the recursive nature of the involved parsers.] The following parser for a tiny markup-language employs the user state # to ensure that nested hyperlinks are not accepted and # to parse potentially nested quotations between matching pairs of `'\''` or `'\"'` chars. `` open FParsec type Element = Text of string | Bold of Element list | Italic of Element list | Url of string * Element list | Quote of char * Element list type UserState = {InLink: bool QuoteStack: char list} with static member Default = {InLink = false; QuoteStack = []} let ws = spaces let ws1 = spaces1 let str s = pstring s let elements, elementsR = createParserForwardedToRef() let text = many1Satisfy (isNoneOf "<>'\"\\") |>> Text let escape = str "\\" >>. (anyChar |>> (string >> Text)) let quote (q: char) = let pq = str (string q) let pushQuote = updateUserState (fun us -> {us with QuoteStack = q::us.QuoteStack}) let popQuote = updateUserState (fun us -> {us with QuoteStack = List.tail us.QuoteStack}) let isNotInQuote = userStateSatisfies (fun us -> match us.QuoteStack with | c::_ when c = q -> false | _ -> true) isNotInQuote >>. between pq pq (between pushQuote popQuote (elements |>> fun ps -> Quote(q, ps))) // helper functions for defining tags let tagOpenBegin tag = str ("<" + tag) >>? nextCharSatisfiesNot isLetter // make sure tag name is complete "<" + tag + "> tag" let tagOpen tag = tagOpenBegin tag >>. str ">" let tagClose tag = str ("") let tag t p f = between (tagOpen t) (tagClose t) (p |>> f) let attributeValue = ws >>. str "=" >>. ws >>. between (str "\"") (str "\"") (manySatisfy (isNoneOf "\n\"")) let attribute s = str s >>. attributeValue let nonNestedTag tag pAttributesAndClose pBody f isInTag setInTag setNotInTag = tagOpenBegin tag >>. ((fun stream -> if not (isInTag stream.UserState) then stream.UserState <- setInTag stream.UserState Reply(()) else // generate error at start of tag stream.Skip(-tag.Length - 1) Reply(FatalError, messageError ("Nested <" + tag + "> tags are not allowed."))) >>. pipe2 pAttributesAndClose pBody f .>> (tagClose tag >>. updateUserState setNotInTag)) // the tags let bold = tag "b" elements Bold let italic = tag "i" elements Italic let url = nonNestedTag "a" (ws >>. attribute "href" .>> (ws >>. str ">")) elements (fun url phrases -> Url(url, phrases)) (fun us -> us.InLink) (fun us -> {us with InLink = true}) (fun us -> {us with InLink = false}) let element = choice [text escape quote '\'' quote '\"' bold italic url] do elementsR:= many element let document = elements .>> eof `` ``{fsi} > runParserOnString document UserState.Default "" "A \"'text' with 'nested \"quotes\"'.\"";; val it : ParserResult = Success: [Text "A "; Quote ('"', [Quote ('\'',[Text "text"]); Text " with "; Quote ('\'',[Text "nested "; Quote ('"',[Bold [Text "quotes"]])]); Text "."])] > runParserOnString document UserState.Default "" @"Text with 'link' but no \";; val it : ParserResult = Success: [Bold [Text "Text "; Italic []; Text "with"]; Text " "; Url("url", [Quote ('\'',[Text "link"]); Text " but no "; Text "<"; Text "blink"; Text ">"])] > runParserOnString document UserState.Default "" "test";; val it : ParserResult = Failure: Error in Ln: 1 Col: 15 test ^ Nested tags are not allowed. `` [/section] [section Parameterizing a parser through the user state] The user state is also a good place to store parser configuration data that is specific to a "parser job". For example, a compiler that processes multiple compilation units could put configuration data that is specific to the compilation unit, e.g. include paths, into the user state and then parse different compilation units with the same `Parser` instance, like in the following code: `` type CompilationUnitAST = (* ... *) type UserState = { IncludePaths = string list (* ... *) } let parser : Parser = (* ... *) let parseCompilationUnit file encoding includePaths (* ... *) = let initialUserState = {IncludePaths = includePaths; (* ... *)} runParserOnFile parser initialUserState file encoding `` [/section] [/section] [section Where is the monad?] If you have previously used Haskell's Parsec library or an early version of FParsec you're probably wondering by now where the "monadic syntax" has gone. There's also a chance that you've stumbled upon FParsec while searching for a "monadic parser library" for F#/.Net and you're now wondering whether FParsec actually is one. To answer these questions right away: FParsec supports a monadic parser construction syntax, but this syntax is only an optional feature, not the foundation of the library design. FParsec doesn't use the monadic syntax internally and we no longer recommend using it for new parser projects when performance is a concern. [section An example using the monadic syntax] With the monadic syntax you can, for example, write a parser for a pair of floating-point numbers as follows: `` open FParsec let ws = spaces // whitespace parser let str_ws str = parse {do! skipString str do! ws return ()} let number_ws = parse {let! number = pfloat do! ws return number} let pairOfNumbers = parse {do! str_ws "(" let! number1 = number_ws let! number2 = number_ws do! str_ws ")" return (number1, number2)} `` We'll explain how the F# compiler handles the `parse {...}` expressions in the next section. For now, just compare the previous implementation with the following one using the usual FParsec combinators: `` open FParsec let ws = spaces // whitespace parser let str_ws str = skipString str >>. ws let number_ws = pfloat .>> ws let pairOfNumbers = between (str_ws "(") (str_ws ")") (tuple2 number_ws number_ws) `` The latter implementation is obviously more concise, but -- at least for users without prior exposure to FParsec -- the first implementation is probably a bit more intuitive and self-explanatory. What makes the first implementation so intuitive is that the syntax of the `parse {...}` expressions is a) very close to what developers are used to from their normal work with F# and b) expressive enough that it obviates the need for many of FParsec's basic combinators. Unfortunately, the intuitiveness of the monadic syntax comes at the price of a large performance penalty. [/section] [section How the monadic syntax works] To explain how the monadic syntax works, we need to take a look at how the F# compiler translates the `parse {...}` expressions. The foundation for the monadic syntax is the `>>=` combinator introduced in [^applying-parsers-in-sequence.the-combinator]: ``val (>>=): Parser<'a,'u> -> ('a -> Parser<'b,'u>) -> Parser<'b>`` This operator takes a parser and a function returning a parser as arguments. The combined parser `p >>= f` first applies the parser `p` to the input, then it applies the function `f` to the result returned by `p` and finally it applies the parser returned by `f` to the input. As we exlained in [^applying-parsers-in-sequence.the-combinator], this way to combine parsers is powerful enough that we can express many other sequencing combinators in terms of `>>=` and `preturn`. For example, we could implement the `pipe3` combinator for sequentially applying three parsers as follows: `` let pipe3 p1 p2 p3 f = p1 >>= fun x1 -> p2 >>= fun x2 -> p3 >>= fun x3 -> preturn (f x1 x2 x3) `` Directly using the `>>=` and `preturn` combinators obviously leads to somewhat unwieldy and unreadable expressions. Fortunately, F#'s @computation expressions@ allow us to rewrite this expression in a more intuitive way: `` let pipe3 p1 p2 p3 f = parse {let! x1 = p1 let! x2 = p2 let! x3 = p3 return f x1 x2 x3} `` The `parse` object that we reference in this and other code snippets of this chapter is a so-called "builder" object for computation expressions. It is defined in FParsec's `Primitives` module. Using the methods of this object, the F# compiler translates the computation expression in the curly braces to the following equivalent expression: `` let pipe3 p1 p2 p3 f = parse.Delay(fun () -> parse.Bind(p1, fun x1 -> parse.Bind(p2, fun x2 -> parse.Bind(p3, fun x3 -> parse.Return(f (x1 x2 x3)))))) `` When we replace the `parse` object method calls with the respective method bodies, we will see that this definition is equivalent to our original definition using `>>=` and `preturn`. The `Bind`, `Return` and `Delay` methods of the `parse` object are defined as: `` member t.Bind(p, f) = p >>= f member t.Return(x) = preturn x member t.Delay(f:(unit -> Parser<'a,'u>)) = fun stream -> (f ()) stream `` Substituting these method bodies into the previous expression yields an expression that is very similar to the original one (except for the additional indirection introduced by the `Delay` method[fn The computation expression specification does not require a `Delay` method. So, we could avoid the overhead associated with the additional indirection by removing the `Delay` method from the `ParserCombinator` class. However, this would make the behaviour of `parse` expressions somewhat counter-intuitive, as the behaviour would differ from the behaviour of F#'s `seq` and `async` expressions.]): `` let pipe3 p1 p2 p3 f = fun stream -> (p1 >>= fun x1 -> p2 >>= fun x2 -> p3 >>= fun x3 -> preturn (f x1 x2 x3)) stream `` In summary, the `parse {...}` syntax is syntactic sugar for defining parsers with the `>>=` operator. The expressiveness of this syntax stems from the power of the `>>=` operator. [/section] [section The term "monad"] A function with a signature like the one of the `>>=` operator is often called "bind". The above examples make it obvious why: the `>>=` combinator binds the result of the parser on the left-hand side to the function argument on the right-hand side. The `Parser` type together with the `>>=` and `preturn` operations constitute a @monad@, which is an abstraction in type theory that denotes this kind of combination of a generic type with associated bind and return operations. Discussing the theoretical background of monads would be outside the scope of this user's guide. For our purposes it is enough to note that the monad abstraction is so useful for certain applications that F# comes with built-in syntax support for monadic expressions. FParsec utilizes this language feature (@computation expressions@) to enable `parse {...}` expressions. Be assured that you don't need to know anything about monads in general in order to use FParsec's `parse {...}` expressions. To fully understand this feature all you need to know to is how the F# compiler translates `parse {...}` expressions into normal code. Besides `let!`, `do!` and `return` there are some more language constructs that are supported inside `parse {...}` expressions. Please refer to the [^reference.Primitives.members.parse reference documentation] for more information. [/section] [section Why the monadic syntax is slow] Compared to parsers implemented with only the usual FParsec operators and functions, parsers implemented with `parse {...}` expressions can be up to several times slower. The relatively bad performance can be directly attributed to the way `parse {...}` expressions are compiled. As you have seen above, a `parse {...}` expression is simply translated into a series of nested closures that are chained through calls to the `>>=` operator. *With the current compiler technology and the current implementation of FParsec* this introduces some significant overhead. *Every time* a `Parser` function constructed with the `parse {...}` syntax is called: - Two function closures get newly instantiated for each invocation of the `>>=` operator: the closure that is passed as the second argument to `>>=` and the closure that is returned by `>>=`. - Any parser created inside a `parse {...}` expression gets (re-)created every time execution reaches that point in the expression. In principle, you can avoid the overhead described in the second point by moving the construction of parser functions out of the `parse {...}` expression. For example, you can avoid the repeated construction of the `skipString` parsers in `` let numberInParens = parse {do! skipString "(" let! number = pfloat do! skipString ")" return number} `` by rewriting the code as `` let parenOpen = skipString "(" let parenClose = skipString ")" let numberInParens = parse {do! parenOpen let! number = pfloat do! parenClose return number} `` However, if you wanted to factor out any parser construction from a `parse {...}` expression, you'd also have to factor out any use of parser combinators, which would take away a lot from the attractiveness of the syntax. If performance is not that important for your application, you can just ignore that a parser like `skipString "("` is repeatedly constructed, since its construction is relatively cheap. But if you do the same for parsers based on `regex` or `anyOf`, where the construction potentially involves some relatively expensive compilation or runtime code generation, you might be surprised just how slow your parsers can become. Because of the described performance issues, we recommend not to use `parse {...}` expressions and instead work with FParsec's rich set of operators and other combinators. Not only does the operator-based notation (which is used everywhere else in FParsec's documentation) lead to faster parsers, it also allows for more concise parser code with a higher signal-to-noise ratio. [/section] [/section] [section Debugging a parser] Debugging a parser implemented with the help of a combinator library has its special challenges. In particular, setting a breakpoint and stepping through the code is not as straightforward as in a regular recursive descent parser. Furthermore, stack traces can be difficult to decipher because of the ubiquitous use of anonymous functions.[fn Although, debugging a parser written with a combinator library is often still easier than debugging one generated by an opaque parser generator tool.] However, with the help of the techniques we explain in this chapter, working around these issues should be easy. [section Setting a breakpoint] Suppose you have a combined parser like ``let buggyParser = pipe2 parserA parserB (fun a b -> ...)`` and you would like to break into the debugger whenever `buggyParser` calls `parserB`. One thing you could try is to set a breakpoint at the beginning of `parserB`. However, that's only possible if `parserB` is not itself a combined parser, and even then you still have the problem that your breakpoint is also triggered whenever `parserB` is called from any other place in your source. Similarly, a breakpoint you set in `pipe2` will probably be triggered by many other parsers besides `buggyParser`. Fortunately there's a simple workaround if you can modify and recompile the code. Just define a wrapper function like the following `` let BP (p: Parser<_,_>) stream = p stream // set a breakpoint here `` Then redefine the buggy parser as `` let buggyParser = pipe2 parserA (BP parserB) (fun a b -> ...) `` If you now set a breakpoint at the body of the BP function, it will be triggered whenever `parserB` is called from `buggyParser`. With such a wrapper it's also easy define a precise conditional breakpoint. For example, if you only want to break once the parser has reached line 100 of the input file, you could use the breakpoint condition `stream.Line >= 100`. By the way, you don't need to set the breakpoint in the debugger. You can also write it directly into the code: `` let BP (p: Parser<_,_>) (stream: CharStream<_>) = // this will execute much faster than a // conditional breakpoint set in the debugger if stream.Line >= 100L then System.Diagnostics.Debugger.Break() p stream `` [note There are some issues with setting breakpoints in or stepping into anonymous or curried F# functions in Visual Studio 2008. In Visual Studio 2010 many of these issues have been fixed. If you're using Visual Studio, don't forget to switch on the "Suppress JIT optimization on module load" option in the Tools -- Options -- Debugging -- General dialog. And, when possible, use a debug build (of FParsec) for debugging. ] [/section] [section Tracing a parser] Occasionally you have a parser that doesn't work as expected and playing around with the input or staring at the code long enough just isn't enough for figuring out what's wrong. In such cases the best way to proceed usually is to trace the execution of the parser. Unfortunately, stepping through the parser under a debugger can be quite tedious, because it involves stepping through long sequences of nested invocations of parser combinators. A more convenient approach often is to output tracing information to the console or a logging service. A simple helper function for printing trace information to the console could like the following example: `` let () (p: Parser<_,_>) label : Parser<_,_> = fun stream -> printfn "%A: Entering %s" stream.Position label let reply = p stream printfn "%A: Leaving %s (%A)" stream.Position label reply.Status reply `` To demonstrate how you could use such a tracing operator, let's try to debug the following buggy (and completely silly) parser: `` let number = many1Satisfy isDigit let emptyElement = pstring "[]" : Parser<_,unit> let numberElement = pstring "[" >>. number .>> pstring "]" let nanElement = pstring "[NaN]" let element = choice [emptyElement numberElement nanElement] .>> spaces let elements : Parser<_,unit> = many element `` The following test run shows that the above parser is indeed buggy: ``{fsi} > run elements "[] [123] [NaN]";; val it : ParserResult = Failure: Error in Ln: 1 Col: 11 [] [123] [NaN] ^ Unknown Error(s) `` You probably don't need trace information to figure out why the `"NaN"` bit of the string doesn't get parsed, but let's pretend you do. Obviously, there's something wrong with the `element` parser. To find out what's wrong, let's decorate the `element` parser and all subparsers with the `` operator and an appropriate label: `` let number = many1Satisfy isDigit "number" let emptyElement = pstring "[]" "emptyElement" let numberElement = pstring "[" >>. number .>> pstring "]" "numberElement" let nanElement = pstring "[NaN]" "nanElement" let element = choice [emptyElement numberElement nanElement] .>> spaces "element" let elements : Parser<_,unit> = many element `` If you now run the parser on the same input as before, you get the following output: ``{fsi} > run elements "[] [123] [NaN]";; (Ln: 1, Col: 1): Entering element (Ln: 1, Col: 1): Entering emptyElement (Ln: 1, Col: 3): Leaving emptyElement (Ok) (Ln: 1, Col: 4): Leaving element (Ok) (Ln: 1, Col: 4): Entering element (Ln: 1, Col: 4): Entering emptyElement (Ln: 1, Col: 4): Leaving emptyElement (Error) (Ln: 1, Col: 4): Entering numberElement (Ln: 1, Col: 5): Entering number (Ln: 1, Col: 8): Leaving number (Ok) (Ln: 1, Col: 9): Leaving numberElement (Ok) (Ln: 1, Col: 10): Leaving element (Ok) (Ln: 1, Col: 10): Entering element (Ln: 1, Col: 10): Entering emptyElement (Ln: 1, Col: 10): Leaving emptyElement (Error) (Ln: 1, Col: 10): Entering numberElement (Ln: 1, Col: 11): Entering number (Ln: 1, Col: 11): Leaving number (Error) (Ln: 1, Col: 11): Leaving numberElement (Error) (Ln: 1, Col: 11): Leaving element (Error) val it : ParserResult = Failure: Error in Ln: 1 Col: 11 [] [123] [NaN] ^ Unknown Error(s) `` This trace log clearly reveals that the `element` parser failed because the `numberElement` parser failed after consuming the left bracket and thus the `choice` parser never got to try the the `nanElement` parser. Of course, this issue could be easily avoided by factoring out the bracket parsers from the `emptyElement`, `numberElement` and `nanElement` parsers. Also, if we had used `many1SatisfyL` instead of `manySatisfy` for the `number` parser, we would have gotten an error message more descriptive than "Unknown error(s)" (see the chapter on @customizing error messages@). [/section] [/section] [section Performance optimizations] In the past, the relatively poor performance of parser combinator libraries has often been cited as the primary impediment to their more widespread adoption. For this reason optimal performance stood front and center as a design goal during the development of FParsec and a lot of effort has been spent on optimizing parsing speed. As a result, FParsec has become so fast that parsers implemented with FParsec often significantly outperform parsers created by parser generator tools like fslex & fsyacc. In general, a parser implemented in FParsec can get close to the performance of a hand-optimized recursive-descent parser written in C#. Due to the multi-layered architecture of the FParsec API, you always have the option to fall back to the lower-level API should a particular parser component implemented with the high-level API turn out to be too slow. Hence, if you choose FParsec for implementing your parsers, you don't have to worry that performance will become a reason for switching away from FParsec. [section Performance guidelines] If you strive for optimal performance in your parser applications, try to adhere to the following guidelines: [dl [Avoid backtracking] [ Try to avoid backtracking where possible. Sometimes it's already enough to factor out a common prefix from a parser expression to avoid backtracking, e.g. by transforming `(prefix >>? p1) <|> (prefix >>? p2)` to `prefix >>. (p1 <|> p2)`. Some simple backtracking can also be avoided by parsing whitespace as trailing whitespace instead of leading whitespace. If you're designing a programming or markup language, you should try to minimize the need for backtracking, both to simplify parsing and to avoid exponential worst-case behaviour. ] [Prefer specialized parsers] [ FParsec provides a number of specialized parsers and combinators for various purposes. Using more specialized primitives instead of reimplementing them with generic combinators will often safe you time and improve parsing speed. In particular: - Prefer the `skip...` variants of parsers and combinators if you don't need the parser results. - Parse whitespace with the built-in whitespace parsers. - Parse numbers with the built-in number parsers. - Prefer to parse strings with the `many[1]Satisfy[2][L]` parsers. - Consider parsing unicode identifiers with the `identifier` parser. ] [[# Construct parsers once]] [ Constructing a parser can be relatively expensive in comparison to a single invocation of the parser. Hence, if you repeatedly apply the same parser, you should make sure that you construct the parser only once, either by preconstructing it at the beginning or by lazily constructing the parser and then caching it. Usually the place where parsers get inadvertently constructed more than once is inside closures. For example, if you have a local function like `` fun stream -> let reply = (parser1 >>. parser2) stream if reply.Status = Ok then // ... else // ... `` you should avoid the repeated construction of `parser1 >>. parser2` every time the closure is called by moving the construction outside of the closure, as in `` let parser = parser1 >>. parser2 fun stream -> let reply = parser stream if reply.Status = Ok then //... else // ... `` Also, you shouldn't wrap a parser expression inside a function just to avoid F#'s value restriction if you can achieve the same goal with a type annotation. For example, you should **not** try to fix the compiler error in the first example of the [^fs-value-restriction tutorial chapter on F#'s value restriction] by replacing ``let p = pstring "test"`` with ``let p stream = pstring "test" stream`` ] [Avoid `parse {...}` expressions] [ See @Why the monadic syntax is slow@. ] [Avoid `regex` parsers] [ The `regex` parser parses a string by applying a .NET regular expression to the input. Since .NET regular expressions are relatively slow, you should reserve the use of the `regex` parser for patterns that you can't easily express with other FParsec parsers and combinators. ] [Consider optimizing large `choice` parsers] [ Formal grammars for programming languages or DSLs often have one or two grammar rules at their core that essentially just enumerate a long list of possible ways to form a statement or expression in that language. A straightforward FParsec implementation of such a grammar rule typically uses the `choice` combinator to combine a list of parsers for all the alternatives. Usually such an implementation with a large `choice`-based parser will do just fine. However, if parsing performance is critical for your application, replacing a large `choice` parser with a custom-made combinator can be an optimization with a high benefit-cost ratio. The next section explains this optimization in more detail. ] ] [/section] [section Low-level parser implementations] FParsec's high-level API consists of its built-in parsers and combinators in the `Primitives` and `CharParsers` module. The high-level API allows you to easily construct parsers in a concise and rather declarative way. Usually you will author most of your parsers using the high-level API, because that's the most productive way to do it. However, sometimes you might find that a specific piece of parser functionality is a bit inconvenient to express through the high-level API or that the high-level implementation isn't as fast as you had hoped for. In those situations it's a great advantage that FParsec allows you to drop down to the low-level API, so that you can implement your own special-purpose parser and combinator primitives. We have already covered the basics of the low-level API in the chapters on the @internals of a simple parser function@ and [@ applying parsers in sequence]. In this section we will discuss some examples that demonstrate how you can use low-level parser implementations for optimization purposes. One example of a parser implemented using the low-level API is contained in the samples folder of the FParsec distribution in [= samples/FSharpParsingSample/FParsecVersion/parser.fs]. It is a parser for an identifier string that is not identical with a keyword. The low-level implementation uses another parser, `identifierString`, to parse an identifier string and then backtracks when the parsed string is a keyword: `` let identifier : Parser = let expectedIdentifier = expected "identifier" fun stream -> let state = stream.State let reply = identifierString stream if reply.Status <> Ok || not (isKeyword reply.Result) then reply else // result is keyword, so backtrack to before the string stream.BacktrackTo(state) Reply(Error, expectedIdentifier) `` The same parser could also be implemented with the high-level API: `` let identifier = attempt (identifierString >>= fun str -> if not (isKeyword str) then preturn str else pzero) "identifier" `` The high-level version is a bit more concise, but whether it is also easier to understand is debatable. The low-level version seems at least a bit more self-explanatory and hence is probably more accessible to new FParsec users. Since the low-level implementation is also significantly faster than the high-level one, this is a good example for a parser that can be improved through a low-level implementation. If you wanted to optimize the performance of the identifier parser even more, you could replace the `identifierString` parser invocation with direct calls to `CharStream` methods. However, whether the potential performance gain would be worth the loss in code modularity and maintainability is questionable. A more promising optimization often is to integrate the identifier parser into a higher-level `choice`-based parser, like it is done below in the last example of this section. `choice` parsers with long list of argument parsers are performance-wise one of the weakest spots of FParsec's high-level API. As we noted in the previous section, formal grammars for programming languages or DSLs often have one or two grammar rules at their core that essentially just enumerate a long list of possible ways to form a statement or expression in that language. A straightforward implementation of such a grammar rule using the `choice` combinator yields only sub-optimal performance, since the `choice` parser has no knowledge about its argument parsers and has to try one parser after another. This makes large `choice`-based parsers an excellent optimization opportunity. With your knowledge about the parser grammar you can often narrow down the set of possible parsers just by peeking at the following one or two chars in the input. Having identified the set of possible parsers (often only consisting of one parser), you can then considerably speed up the dispatch to the right subparser. For example, take a look at the @JSON-value parser@ from the tutorial: `` choice [jobject jlist jstring jnumber jtrue jfalse jnull] `` If you look at the definitions for the argument parsers, you'll see that in almost all cases one can decide which parser should handle the input just based on the next char in the input. Hence, we could replace the `choice`-based parser with the following low-level implementation: `` let error = expected "JSON value" fun (stream: CharStream<_>) -> match stream.Peek() with | '{' -> jobject stream | '[' -> jlist stream | '"' -> jstring stream | 't' when stream.Skip("true") -> Reply(JBool true) | 'f' when stream.Skip("false") -> Reply(JBool false) | 'n' when stream.Skip("null") -> Reply(JNull) | _ -> let stateTag = stream.StateTag let mutable reply = jnumber stream if reply.Status = Error && stateTag = stream.StateTag then reply.[^RError Error] <- error reply `` A drawback of such a low-level implementation is that you have to be a bit careful not to overlook any of the possible grammar cases. This is why we applied the `jnumber` parser in the "catch-all" case, so that we don't depend on the precise grammar rules for numbers. You also need to consider how the low-level implementation affects error messages. When a `choice` parser fails, it will generate an error message with the error messages from all the argument parsers it tried. This gives a human reader usually enough context to understand the error. For a low-level implementation it can take a little more effort to ensure that the error messages for every case contain enough information about the grammar context. For example, in our implementation above we had to replace the default error message by `jnumber` with a custom one, so that the error message generated by the catch-all case doesn't create the impression that a JSON value can only be a number. By now it is probably obvious that a low-level parser implementation can actually be quite simple to write, but that it also comes at a certain cost in terms of code modularity and maintainability. Having the option of a low-level implementation can certainly be what saves a project in certain situations and should give you some peace of mind with regard to parser performance, but generally you should only consider it as a backup option for those cases where you really need it. The following example shows again how you can replace a `choice`-based parser with a low-level implementation, this time with a grammar that is a bit more representative of a typical programming language: `` type Expr = Number float | LetBinding ... | IfThenElse ... | ... type UserState = //... type Parser<'result> = Parser<'result, UserState> type Keyword = None = 0 | If = 1 | Let = 2 // ... let stringToKeyword = createStaticStringMapping Keyword.None ["if", Keyword.If "let", Keyword.Let // ... ] let str s = pstring s let identifierString : Parser = // ... let identifierRest (id: string) : Parser = ... let number : Parser = // ... (parser for floating-point number) let ifThenElseRest : Parser = // ... let letBindingRest : Parser = // ... let exprInParensRest : Parser = // ... // The parser after this comment is a replacement for // let identifierStringButNoKeyword = // (* implementation like identifier parser in the first example above *) // // let identifier : Parser = identifierStringButNoKeyword // >>= identifierRest // // let ifThenElse : Parser = str "if" >>. ifThenElseRest // let letBinding : Parser = str "let" >>. letBindingRest // let exprInParens : Parser = str "(" >>. exprInParensRest // // let expr = choice [identifierStringNoKeyword // number // ifThenElse // exprInParens // // ... // ] // let expr : Parser = fun stream -> let stateTag = stream.StateTag let reply = identifierString stream if reply.Status = Ok then match stringToKeyword reply.Result with | Keyword.None -> identifierRest reply.Result stream | Keyword.If -> ifThenElseRest stream | Keyword.Let -> letBindingRest stream // ... elif reply.Status = Error && stateTag = stream.StateTag then // no identifier match stream.Peek() with | '(' -> stream.Skip(); exprInParensRest stream | c when isDigit c -> number stream // ... else // error within identifier string Reply(reply.Status, reply.[^RError Error]) `` [/section] [/section] [section Tips and tricks] [toc] [section Parallel parsing] If your parser grammar is suitable for parallel parsing, parallelizing the parser has the potential to dramatically accelerate parsing on multi-core machines. In the following we will shortly discuss requirements and strategies for parallelizing an FParsec parser. For a parser grammar to be well suited for parallel parsing, the grammar and the typical input must satisfy the following two criteria: - Parts of the input must be independently parseable, i.e. parts must be parseable without knowlege about the other parts. - These parts must be large enough and easily enough identifiable within the total input. Often, the easiest and most beneficial way to parallelize the parsing stage of an application is to parse multiple input files in parallel. In the simplest case you have multiple independent "compilation units" that can be parsed in parallel. This works even for C/C++, where a badly designed preprocesser generally makes efficient parsing quite hard. In many programming languages and markup languages you can also parse in parallel files that are "included", "opened" or "imported" within source files. However, this usually only works if the language allows such includes only at well-defined points in the grammar. In languages like C/C++, where the unstructured text content of other files can be included at essentially arbitrary positions in the source, parsing the included files in parallel is generally quite hard. (In C/C++ it's even hard to avoid parsing the same file multiple times when it is included multiple times). If you're dealing with large input files or very slow parsers, it might also be worth trying to parse multiple sections within a single file in parallel. For this to be efficient there must be a fast way to find the start and end points of such sections. For example, if you are parsing a large serialized data structure, the format might allow you to easily skip over segments within the file, so that you can chop up the input into multiple independent parts that can be parsed in parallel. Another example could be a programming languages whose grammar makes it easy to skip over a complete class or function definition, e.g. by finding the closing brace or by interpreting the indentation. In this case it *might* be worth not to parse the definitions directly when they are encountered, but instead to skip over them, push their text content into a queue and then to process that queue in parallel. Here are some tips for parallel parsing with FParsec: - All FParsec parsers are thread-safe and can be safely applied concurrently to different `CharStream` instances, as long as you don't introduce mutable shared state yourself. - `CharStream` instances are not thread-safe and a single instance must not be accessed concurrently. - However, you can call the `CreateSubstream` method to create a substream for a `CharStream`. A `CharStream` and its substreams can be safely accessed concurrently. - If you want to parse multiple files in parallel, you should also create the `CharStream` instances in parallel, because the `CharStream` constructors that accept file paths or binary streams perform I/O operations that benefit from parallelization. - If you parallelize your parser, consider introducing an option for switching off parallel execution, since debugging a multi-threaded parser is harder than debugging a single-threaded one. [/section] [section Dispatching parsers through a dictionary] A technique that is often useful for making a parser modular and easily extensible is to store `Parser` functions in dictionaries and then to delegate parsing to one of the `Parser` functions in the dictionary based on the input. For example, a parser for a markup language could be implemented by defining a generic tag parser that delegates the parsing of the tagged content to a specific parser for the respective tag name. The following code shows how this could be done: `` open FParsec open System.Collections.Generic // For simplicity we don't define a full-blown markup language here, // just a parser for two simple non-recursive "tags" in square brackets. // The chapter on "parsing with user state" contains a slightly more developed // sample for a markup language, though without a dictionary-based tag parser. type Tag = Bold of string | Url of string * string // We store the tag parser dictionary in the user state, so that we can // concurrently parse multiple input streams with the same parser instance // but differerent tag dictionaries. type TagParserMap = Dictionary> and UserState = { TagParsers: TagParserMap } let defaultTagParsers = TagParserMap() let isTagNameChar1 = fun c -> isLetter c || c = '_' let isTagNameChar = fun c -> isTagNameChar1 c || isDigit c let expectedTag = expected "tag starting with '['" let tag : Parser = fun stream -> if stream.Skip('[') then let name = stream.ReadCharsOrNewlinesWhile(isTagNameChar1, isTagNameChar, false) if name.Length <> 0 then let mutable p = Unchecked.defaultof<_> if stream.UserState.TagParsers.TryGetValue(name, &p) then p stream else stream.Skip(-name.Length) Reply(Error, messageError ("unknown tag name '" + name + "'")) else Reply(Error, expected "tag name") else Reply(Error, expectedTag) let str s = pstring s let ws = spaces let text = manySatisfy (function '['|']' -> false | _ -> true) defaultTagParsers.Add("b", str "]" >>. text .>> str "[/b]" |>> Bold) defaultTagParsers.Add("url", (str "=" >>. manySatisfy ((<>)']') .>> str "]") .>>. (text .>> str "[/url]") |>> Url) let parseTagString str = runParserOnString tag {TagParsers = TagParserMap(defaultTagParsers)} "" str `` ``{fsi} > parseTagString "[b]bold text[/b]";; val it : ParserResult = Success: Bold "bold text" > parseTagString "[url=http://tryfsharp.org]try F#[/url]";; val it : ParserResult = Success: Url ("http://tryfsharp.org","try F#") > parseTagString "[bold]test[/bold]";; val it : ParserResult = Failure: Error in Ln: 1 Col: 2 [bold]test[/bold] ^ unknown tag name 'bold' `` [/section] [section Memoizing parsers] If your parser implementation backtracks a lot when parsing typical inputs and as a result repeatedly applies some `Parser` functions at the same input position, it can be beneficial to memoize these `Parser` functions, i.e. cache their results for each input position. In the extreme case, [url "https://en.wikipedia.org/wiki/Memoization" memoization] can mean the difference between linear and exponential execution times. In practice, FParsec is typically used for formal grammars that hardly require any extensive backtracking, so that memoization would usually only have a negative affect on performance. In situation where you really do need to memoize parsers, you can work with a generic `memoize` combinator like the one in the following example: `` open FParsec open System.Collections.Generic // We need a place to store the cached parser results. Since we want parser // instances to be able to concurrently access different caches for different // input streams, we will use a user state variable for this purpose. Since we // don't want the backtracking to undo changes to the cache, we will use a // mutable dictionary for this purpose. type UserState = { MemoCache: Dictionary // ... } // An entry in the MemoCache must be uniquely identified by its MemoKey. In this // example the MemoKey includes the stream index value and a reference to the // memoized parser instance. Should the result of a memoized Parser function in // your implementation also depend on the UserState value, you will have to // extend the MemoKey with a UserState member. Similarly, if you want to cache // results for more than one stream in the MemoCache, you'll have to extend the // MemoKey with an identifier for the stream. and [] MemoKey = struct new (parser: obj, stream: CharStream) = {[no-auto-link Parser] = parser; [no-auto-link Index] = stream.Index} val [no-auto-link Parser]: obj val [no-auto-link Index]: int64 interface System.IEquatable with member t.Equals(other: MemoKey) = t.[no-auto-link Index] = other.[no-auto-link Index] && t.[no-auto-link Parser] = other.[no-auto-link Parser] override t.Equals(otherObj: obj) = match otherObj with | :? MemoKey as other -> t.[no-auto-link Index] = other.[no-auto-link Index] && t.[no-auto-link Parser] = other.[no-auto-link Parser] | _ -> false override t.GetHashCode() = int32 t.[no-auto-link Index] end /// Returns a memoized version of the argument parser let memoize (p: Parser<'a,UserState>) : Parser<'a,UserState> = fun stream -> let key = MemoKey(p, stream) let memoCache = stream.UserState.MemoCache let mutable boxedReply = null if memoCache.TryGetValue(key, &boxedReply) then boxedReply :?> Reply<'a> else let reply = p stream memoCache.Add(key, box reply) reply `` [/section] [section Parsing F# infix operators] F# supports user-definable infix operators whose precedence and associativity depend on the first chars of the operator name. For example, the [url "http://fsharp.org/specs/language-spec/4.0/FSharpSpec-4.0-latest.pdf" F# spec] states that operators that start with `*` are left-associative, while operators that start with `**` are right associative and have a higher precedence, so that `1*2*.3**4**.5` is parsed as `((1*2)*.(3**(4 **.5)))`. Since the precedence and associativity rules are fixed, you can parse F# expressions with a static operator precedence grammar, i.e. without having to reconfigure the parser when a new operator is defined in the parsed source code. However, it's probably not immediately obvious how to do this with FParsec's `OperatorPrecedenceParser` class (OPP), since the OPP normally expects all possible operators to be (individually) specified before they are used. The trick to supporting whole classes of operator names without having to reconfigure the OPP at run-time is to shift part of the operator parsing to the [^Operator after-string-parser], like in the following example: `` open FParsec type Expr = InfixOpExpr of string * Expr * Expr | Number of int let ws = spaces // whitespace parser let isSymbolicOperatorChar = isAnyOf "!%&*+-./<=>@^|~?" let remainingOpChars_ws = manySatisfy isSymbolicOperatorChar .>> ws let opp = new OperatorPrecedenceParser() opp.TermParser <- pint32 .>> ws |>> Number // a helper function for adding infix operators to opp let addSymbolicInfixOperators prefix precedence associativity = let op = InfixOperator(prefix, remainingOpChars_ws, precedence, associativity, (), fun remOpChars expr1 expr2 -> InfixOpExpr(prefix + remOpChars, expr1, expr2)) opp.AddOperator(op) // the operator definitions: addSymbolicInfixOperators "*" 10 Associativity.Left addSymbolicInfixOperators "**" 20 Associativity.Right // ... `` ``{fsi} > run opp.ExpressionParser "1*2*.3**4**.5";; val it : ParserResult = Success InfixOpExpr ("*.", InfixOpExpr ("*", Number 1, Number 2), InfixOpExpr ("**", Number 3, InfixOpExpr ("**.", Number 4, Number 5))) `` If you use the after-string-parser in this manner for operators that can lead to operator conflicts in the input, e.g. non-associative operators, then you also need to replace the default `OperatorConflictErrorFormatter`, since otherwise the default formatter may print truncated operator names: `` addSymbolicInfixOperators "<" 1 Associativity.None `` ``{fsi} > run opp.ExpressionParser "1 <= 2 <=. 3";; val it : ParserResult = Failure: Error in Ln: 1 Col: 9 1 <= 2 <=. 3 ^ The infix operator '<' (precedence: 1, non-associative) conflicts with the infix operator '<' (precedence: 1, non-associative) on the same line at column 3. `` An error formatter that prints the full operator names could look like the following: `` opp.OperatorConflictErrorFormatter <- fun (pos1, op1, afterString1) (pos2, op2, afterString2) -> let msg = sprintf "The operator '%s' conflicts with the previous operator '%s' at %A." (op2.[@ String] + afterString2) (op1.[@ String] + afterString1) pos1 messageError msg `` ``{fsi} > run opp.ExpressionParser "1 <= 2 <=. 3";; val it : ParserResult = Failure: Error in Ln: 1 Col: 9 1 <= 2 <=. 3 ^ The operator '<=.' conflicts with the previous operator '<=' at (Ln: 1, Col: 3). `` [/section] [/section] [/section] ================================================ FILE: FParsec/AssemblyInfo.fs ================================================ namespace FParsec open System.Reflection open System.Runtime.CompilerServices open System.Runtime.InteropServices [] #if LOW_TRUST [] [] #endif [] do () ================================================ FILE: FParsec/CharParsers.fs ================================================ // Copyright (c) Stephan Tolksdorf 2007-2019 // License: Simplified BSD License. See accompanying documentation. [] module FParsec.CharParsers open System.Diagnostics open System.Text open System.Text.RegularExpressions open System.Runtime.CompilerServices // for MethodImplAttribute #if !LOW_TRUST open Microsoft.FSharp.NativeInterop #endif open FParsec open FParsec.Internals open FParsec.Error open FParsec.Primitives #nowarn "9" // "Uses of this construct may result in the generation of unverifiable .NET IL code." #nowarn "51" // "The address-of operator may result in non-verifiable code." // ================ // Helper functions // ================ [] let EOS = '\uffff' let foldCase = Text.FoldCase : string -> string let normalizeNewlines = Text.NormalizeNewlines let floatToHexString = HexFloat.DoubleToHexString let floatOfHexString = HexFloat.DoubleFromHexString let float32ToHexString = HexFloat.SingleToHexString let float32OfHexString = HexFloat.SingleFromHexString // ======================== // Running parsers on input // ======================== [] type ParserResult<'Result,'UserState> = | Success of 'Result * 'UserState * Position | Failure of string * ParserError * 'UserState with member private t.StructuredFormatDisplay = match t with | Success(r,_,_) -> if typeof<'Result> = typeof then "Success: ()" else sprintf "Success: %A" r | Failure(msg,_,_) -> sprintf "Failure:\n%s" msg let internal applyParser (parser: Parser<'Result,'UserState>) (stream: CharStream<'UserState>) = let reply = parser stream if reply.Status = Ok then Success(reply.Result, stream.UserState, stream.Position) else let error = ParserError(stream.Position, stream.UserState, reply.Error) Failure(error.ToString(stream), error, stream.UserState) let runParserOnString (parser: Parser<'Result,'UserState>) (ustate: 'UserState) (streamName: string) (chars: string) = CharStream.ParseString(chars, 0, chars.Length, applyParser parser, ustate, streamName) let runParserOnSubstring (parser: Parser<'Result,'UserState>) (ustate: 'UserState) (streamName: string) (chars: string) (index: int) length = CharStream.ParseString(chars, index, length, applyParser parser, ustate, streamName) let runParserOnStream (parser: Parser<'Result,'UserState>) (ustate: 'UserState) (streamName: string) (byteStream: System.IO.Stream) (encoding: System.Text.Encoding) = #if LOW_TRUST let #else use #endif stream = new CharStream<'UserState>(byteStream, encoding) stream.UserState <- ustate stream.Name <- streamName applyParser parser stream let runParserOnFile (parser: Parser<'Result,'UserState>) (ustate: 'UserState) (path: string) (encoding: System.Text.Encoding) = #if LOW_TRUST let #else use #endif stream = new CharStream<'UserState>(path, encoding) stream.UserState <- ustate applyParser parser stream let run parser (string: string) = runParserOnString parser () "" string // ======= // Parsers // ======= // ------------------------------------------------------------- // Reading the input stream position and handling the user state // ------------------------------------------------------------- let getPosition : Parser = fun stream -> Reply(stream.Position) let getUserState : Parser<'u,'u> = fun stream -> Reply(stream.UserState) let setUserState (newUserState: 'u) : Parser = fun stream -> stream.UserState <- newUserState Reply(()) let updateUserState (f: 'u -> 'u) : Parser = fun stream -> stream.UserState <- f stream.UserState Reply(()) let userStateSatisfies f : Parser = fun stream -> let status = if f stream.UserState then Ok else Error Reply(status, (), NoErrorMessages) // -------------------- // Parsing single chars // -------------------- let newlineReturn result : Parser<_,'u> = fun stream -> if stream.SkipNewline() then Reply(result) else Reply(Error, Errors.ExpectedNewline) let newline<'u> = newlineReturn '\n' : Parser<_,'u> let skipNewline<'u> = newlineReturn () : Parser<_,'u> let unicodeNewlineReturn result : Parser<_,'u> = fun stream -> if stream.SkipUnicodeNewline() then Reply(result) else Reply(Error, Errors.ExpectedNewline) let unicodeNewline<'u> = unicodeNewlineReturn '\n' : Parser<_,'u> let skipUnicodeNewline<'u> = unicodeNewlineReturn () : Parser<_,'u> let internal charReturnE (c: char) result error : Parser<'a,'u> = fun stream -> if stream.Skip(c) then Reply(result) else Reply(Error, error) let charReturn c result : Parser<'a,'u> = match c with | '\r' | '\n' -> newlineReturn result | EOS -> invalidArg "c" "The char '\uffff' (EOS) is not a valid argument for the pchar/skipChar/charReturn parser. If you want to check for the end of the stream, consider using the `eof` parser." | _ -> charReturnE c result (expectedString (string c)) let pchar c = charReturn c c let skipChar c = charReturn c () /// returns true for chars '\u000E' - '\ufffe' let inline internal isCertainlyNoNLOrEOS (c: char) = // '\n' = '\u000A', '\r' = '\u000D' unativeint c - 0xEun < unativeint EOS - 0xEun let anyChar : Parser = fun stream -> let c = stream.ReadCharOrNewline() if c <> EOS then Reply(c) else Reply(Error, Errors.ExpectedAnyChar) let skipAnyChar : Parser = fun stream -> if stream.ReadCharOrNewline() <> EOS then Reply(()) else Reply(Error, Errors.ExpectedAnyChar) // doesn't check for newlines or EOS let #if !NOINLINE inline #endif internal fastInlineSatisfyE f error : Parser = fun stream -> let c = stream.Peek() if f c then stream.Skip() Reply(c) else Reply(Error, error) let internal satisfyE f error : Parser = fun stream -> let mutable reply = Reply() match stream.Peek() with | c when isCertainlyNoNLOrEOS c -> if f c then stream.Skip() reply.Status <- Ok reply.Result <- c else reply.Error <- error | '\r' | '\n' -> if f '\n' then stream.SkipNewline() |> ignore reply.Status <- Ok reply.Result <- '\n' else reply.Error <- error | c -> if c <> EOS && f c then stream.Skip() reply.Status <- Ok reply.Result <- c else reply.Error <- error reply let internal skipSatisfyE f error : Parser = fun stream -> let mutable reply = Reply() match stream.Peek() with | c when isCertainlyNoNLOrEOS c -> if f c then stream.Skip() reply.Status <- Ok else reply.Error <- error | '\r' | '\n' -> if f '\n' then stream.SkipNewline() |> ignore reply.Status <- Ok else reply.Error <- error | c -> if c <> EOS && f c then stream.Skip() reply.Status <- Ok else reply.Error <- error reply let satisfy f = satisfyE f NoErrorMessages let satisfyL f label = satisfyE f (expected label) let skipSatisfy f = skipSatisfyE f NoErrorMessages let skipSatisfyL f label = skipSatisfyE f (expected label) let private charsToString (chars: seq) = match chars with | :? string as str -> str | _ -> new string(Array.ofSeq chars) let isAnyOf (chars: seq) = #if LOW_TRUST let cs = new CharSet(charsToString chars) fun c -> cs.Contains(c) #else #if USE_STATIC_MAPPING_FOR_IS_ANY_OF StaticMapping.createStaticCharIndicatorFunction false chars #else let cs = new CharSet(charsToString chars) fun c -> cs.Contains(c) #endif #endif let isNoneOf (chars: seq) = #if LOW_TRUST let cs = new CharSet(charsToString chars) fun c -> not (cs.Contains(c)) #else #if USE_STATIC_MAPPING_FOR_IS_ANY_OF StaticMapping.createStaticCharIndicatorFunction true chars #else let cs = new CharSet(charsToString chars) fun c -> not (cs.Contains(c)) #endif #endif let anyOf (chars: seq) = let str = charsToString chars satisfyE (isAnyOf str) (Errors.ExpectedAnyCharIn(str)) let skipAnyOf (chars: seq) = let str = charsToString chars skipSatisfyE (isAnyOf str) (Errors.ExpectedAnyCharIn(str)) let noneOf (chars: seq) = let str = charsToString chars satisfyE (isNoneOf str) (Errors.ExpectedAnyCharNotIn(str)) let skipNoneOf (chars: seq) = let str = charsToString chars skipSatisfyE (isNoneOf str) (Errors.ExpectedAnyCharNotIn(str)) let inline isAsciiUpper (c: char) = uint32 c - uint32 'A' <= uint32 'Z' - uint32 'A' let inline isAsciiLower (c: char) = uint32 c - uint32 'a' <= uint32 'z' - uint32 'a' let inline isAsciiLetter (c: char) = let cc = uint32 c ||| uint32 ' ' cc - uint32 'a' <= uint32 'z' - uint32 'a' let inline isUpper (c: char) = isAsciiUpper c || (c > '\u007F' && System.Char.IsUpper(c)) let inline isLower (c: char) = isAsciiLower c || (c > '\u007F' && System.Char.IsLower(c)) let inline isLetter (c: char) = isAsciiLetter c || (c > '\u007F' && System.Char.IsLetter(c)) let inline isDigit (c: char) = uint32 c - uint32 '0' <= uint32 '9' - uint32 '0' let inline isHex (c: char) = let cc = uint32 c ||| uint32 ' ' isDigit c || cc - uint32 'a' <= uint32 'f' - uint32 'a' let inline isOctal (c: char) = uint32 c - uint32 '0' <= uint32 '7' - uint32 '0' let asciiUpper stream = fastInlineSatisfyE isAsciiUpper Errors.ExpectedAsciiUppercaseLetter stream let asciiLower stream = fastInlineSatisfyE isAsciiLower Errors.ExpectedAsciiLowercaseLetter stream let asciiLetter stream = fastInlineSatisfyE isAsciiLetter Errors.ExpectedAsciiLetter stream // unicode is the default for letters and ascii the default for numbers let upper stream = fastInlineSatisfyE isUpper Errors.ExpectedUppercaseLetter stream let lower stream = fastInlineSatisfyE isLower Errors.ExpectedLowercaseLetter stream let letter stream = fastInlineSatisfyE isLetter Errors.ExpectedLetter stream let digit stream = fastInlineSatisfyE isDigit Errors.ExpectedDecimalDigit stream let hex stream = fastInlineSatisfyE isHex Errors.ExpectedHexadecimalDigit stream let octal stream = fastInlineSatisfyE isOctal Errors.ExpectedOctalDigit stream let tab stream = fastInlineSatisfyE ((=) '\t') Errors.ExpectedTab stream let spaces : Parser = fun stream -> stream.SkipWhitespace() |> ignore Reply(()) let spaces1 : Parser = fun stream -> if stream.SkipWhitespace() then Reply(()) else Reply(Error, Errors.ExpectedWhitespace) let unicodeSpaces : Parser = fun stream -> stream.SkipUnicodeWhitespace() |> ignore Reply(()) let unicodeSpaces1 : Parser = fun stream -> if stream.SkipUnicodeWhitespace() then Reply(()) else Reply(Error, Errors.ExpectedWhitespace) let eof : Parser= fun stream -> if stream.IsEndOfStream then Reply(()) else Reply(Error, Errors.ExpectedEndOfInput) // ------------------------ // Parsing strings directly // ------------------------ let internal newlineOrEOSCharInStringArg name (arg: string) i = let msg2 = match arg[i] with |'\r'|'\n' -> " may not contain newline chars ('\r' or '\n')." | EOS -> " may not contain the char '\uffff' (EOS)" | _ -> failwith "newlineOrEOSCharInStringArg" raise (System.ArgumentException(concat3 "The string argument to " name msg2)) let internal checkStringContainsNoNewlineOrEOSChar s name = let i = findNewlineOrEOSChar s if i >= 0 then newlineOrEOSCharInStringArg name s i let stringReturn s result : Parser<'a,'u> = let inline checkNoNewlineOrEOSChar c i = if not (isCertainlyNoNLOrEOS c) then match c with |'\r'|'\n'|EOS -> newlineOrEOSCharInStringArg "pstring/skipString/stringReturn" s i | _ -> () let error = expectedString s match s.Length with | 0 -> preturn result | 1 -> let c = s[0] checkNoNewlineOrEOSChar c 0 charReturnE c result error | 2 -> let c0, c1 = s[0], s[1] checkNoNewlineOrEOSChar c0 0 checkNoNewlineOrEOSChar c1 1 let cs = TwoChars(c0, c1) fun stream -> if stream.Skip(cs) then Reply(result) else Reply(Error, error) | _ -> checkStringContainsNoNewlineOrEOSChar s "pstring/skipString/stringReturn" fun stream -> if stream.Skip(s) then Reply(result) else Reply(Error, error) let pstring s = stringReturn s s let skipString s = stringReturn s () let pstringCI s : Parser = checkStringContainsNoNewlineOrEOSChar s "pstringCI" let error = expectedStringCI s let cfs = foldCase s fun stream -> let index0 = stream.IndexToken if stream.SkipCaseFolded(cfs) then Reply(stream.ReadFrom(index0)) else Reply(Error, error) let stringCIReturn (s: string) result : Parser<'a,'u> = let error = expectedStringCI s if s.Length = 1 then let c = s[0] if not (isCertainlyNoNLOrEOS c) then match c with '\r'|'\n'|EOS -> newlineOrEOSCharInStringArg "skipStringCI/stringCIReturn" s 0 | _ -> () let cfc = Text.FoldCase(c) fun stream -> if stream.SkipCaseFolded(cfc) then Reply(result) else Reply(Error, error) else checkStringContainsNoNewlineOrEOSChar s "skipStringCI/stringCIReturn" let cfs = foldCase s fun stream -> if stream.SkipCaseFolded(cfs) then Reply(result) else Reply(Error, error) let skipStringCI s = stringCIReturn s () let anyString n : Parser = let error = Errors.ExpectedAnySequenceOfNChars(n) fun stream -> let state = stream.State let str = stream.ReadCharsOrNewlines(n, true) if str.Length = n then Reply(str) else stream.BacktrackTo(state) Reply(Error, error) let skipAnyString n : Parser = let error = Errors.ExpectedAnySequenceOfNChars(n) fun stream -> let state = stream.State if stream.SkipCharsOrNewlines(n) = n then Reply(()) else stream.BacktrackTo(state) Reply(Error, error) let restOfLine skipNewline : Parser<_,_> = fun stream -> Reply(stream.ReadRestOfLine(skipNewline)) let skipRestOfLine skipNewline : Parser<_,_> = fun stream -> stream.SkipRestOfLine(skipNewline) Reply(()) let charsTillString (s: string) skipString maxCount : Parser = checkStringContainsNoNewlineOrEOSChar s "charsTillString" if maxCount < 0 then raise (System.ArgumentOutOfRangeException("maxCount", "maxCount is negative.")) let error = Errors.CouldNotFindString(s) fun stream -> let mutable charsBeforeString = null stream.SkipCharsOrNewlinesUntilString(s, maxCount, true, &charsBeforeString) |> ignore if isNotNull charsBeforeString then if skipString then stream.Skip(s.Length) Reply(charsBeforeString) else Reply(Error, error) let charsTillStringCI (s: string) skipString maxCount : Parser = checkStringContainsNoNewlineOrEOSChar s "charsTillStringCI" if maxCount < 0 then raise (System.ArgumentOutOfRangeException("maxCount", "maxCount is negative.")) let cfs = foldCase s let error = Errors.CouldNotFindCaseInsensitiveString(s) fun stream -> let mutable charsBeforeString = null stream.SkipCharsOrNewlinesUntilCaseFoldedString(cfs, maxCount, true, &charsBeforeString) |> ignore if isNotNull charsBeforeString then if skipString then stream.Skip(s.Length) Reply(charsBeforeString) else Reply(Error, error) let skipCharsTillString (s: string) skipString maxCount : Parser = checkStringContainsNoNewlineOrEOSChar s "skipCharsTillString" if maxCount < 0 then raise (System.ArgumentOutOfRangeException("maxCount", "maxCount is negative.")) let error = Errors.CouldNotFindString(s) fun stream -> let mutable foundString = false stream.SkipCharsOrNewlinesUntilString(s, maxCount, &foundString) |> ignore if foundString then if skipString then stream.Skip(s.Length) Reply(()) else Reply(Error, error) let skipCharsTillStringCI (s: string) skipString maxCount : Parser = checkStringContainsNoNewlineOrEOSChar s "skipCharsTillStringCI" if maxCount < 0 then raise (System.ArgumentOutOfRangeException("maxCount", "maxCount is negative.")) let cfs = foldCase s let error = Errors.CouldNotFindCaseInsensitiveString(s) fun stream -> let mutable foundString = false stream.SkipCharsOrNewlinesUntilCaseFoldedString(cfs, maxCount, &foundString) |> ignore if foundString then if skipString then stream.Skip(s.Length) Reply(()) else Reply(Error, error) let #if !NOINLINE inline #endif internal manySatisfyImpl require1 (f1: char -> bool) (f: char -> bool) error : Parser = fun stream -> let str = stream.ReadCharsOrNewlinesWhile(f1, f, true) if not require1 || str.Length <> 0 then Reply(str) else Reply(Error, error) let #if !NOINLINE inline #endif internal skipManySatisfyImpl require1 (f1: char -> bool) (f: char -> bool) error : Parser = fun stream -> let n = stream.SkipCharsOrNewlinesWhile(f1, f) if not require1 || n <> 0 then Reply(()) else Reply(Error, error) let manySatisfy2 f1 f = manySatisfyImpl false f1 f NoErrorMessages let many1Satisfy2 f1 f = manySatisfyImpl true f1 f NoErrorMessages let many1Satisfy2L f1 f label = manySatisfyImpl true f1 f (expected label) let skipManySatisfy2 f1 f = skipManySatisfyImpl false f1 f NoErrorMessages let skipMany1Satisfy2 f1 f = skipManySatisfyImpl true f1 f NoErrorMessages let skipMany1Satisfy2L f1 f label = skipManySatisfyImpl true f1 f (expected label) let manySatisfy f = manySatisfy2 f f let many1Satisfy f = many1Satisfy2 f f let many1SatisfyL f label = many1Satisfy2L f f label let skipManySatisfy f = skipManySatisfy2 f f let skipMany1Satisfy f = skipMany1Satisfy2 f f let skipMany1SatisfyL f label = skipMany1Satisfy2L f f label let internal manyMinMaxSatisfy2E minCount maxCount f1 f error : Parser = if maxCount < 0 then raise (System.ArgumentOutOfRangeException("maxCount", "maxCount is negative.")) if minCount > 0 then fun stream -> let str = stream.ReadCharsOrNewlinesWhile(f1, f, minCount, maxCount, true) if str.Length <> 0 then Reply(str) else Reply(Error, error) else fun stream -> Reply(stream.ReadCharsOrNewlinesWhile(f1, f, 0, maxCount, true)) let internal skipManyMinMaxSatisfy2E minCount maxCount f1 f error : Parser = if maxCount < 0 then raise (System.ArgumentOutOfRangeException("maxCount", "maxCount is negative.")) if minCount > 0 then fun stream -> let n = stream.SkipCharsOrNewlinesWhile(f1, f, minCount, maxCount) if n <> 0 then Reply(()) else Reply(Error, error) else fun stream -> stream.SkipCharsOrNewlinesWhile(f1, f, 0, maxCount) |> ignore Reply(()) let manyMinMaxSatisfy minCount maxCount f = manyMinMaxSatisfy2E minCount maxCount f f NoErrorMessages let manyMinMaxSatisfyL minCount maxCount f label = manyMinMaxSatisfy2E minCount maxCount f f (expected label) let manyMinMaxSatisfy2 minCount maxCount f1 f = manyMinMaxSatisfy2E minCount maxCount f1 f NoErrorMessages let manyMinMaxSatisfy2L minCount maxCount f1 f label = manyMinMaxSatisfy2E minCount maxCount f1 f (expected label) let skipManyMinMaxSatisfy minCount maxCount f = skipManyMinMaxSatisfy2E minCount maxCount f f NoErrorMessages let skipManyMinMaxSatisfyL minCount maxCount f label = skipManyMinMaxSatisfy2E minCount maxCount f f (expected label) let skipManyMinMaxSatisfy2 minCount maxCount f1 f = skipManyMinMaxSatisfy2E minCount maxCount f1 f NoErrorMessages let skipManyMinMaxSatisfy2L minCount maxCount f1 f label = skipManyMinMaxSatisfy2E minCount maxCount f1 f (expected label) let internal regexE pattern error : Parser = let regex = new Regex("\\A" + pattern, RegexOptions.Multiline ||| RegexOptions.ExplicitCapture) fun stream -> let m = stream.Match(regex) if m.Success then let str = m.Value if findNewlineOrEOSChar str < 0 then if str.Length <> 0 then stream.Skip(str.Length) Reply(str) else let nStr = normalizeNewlines str let mutable nSkippedChars = 0 let n = stream.SkipCharsOrNewlines(nStr.Length) if n = nStr.Length then Reply(nStr) else Reply(FatalError, messageError "Internal error in the regex parser. Please report this error to fparsec@quanttec.com.") else Reply(Error, error) let regex pattern = regexE pattern (Errors.ExpectedStringMatchingRegex(pattern)) let regexL pattern label = regexE pattern (expected label) type private IdFlags = IdentifierValidator.IdentifierCharFlags type IdentifierOptions(?isAsciiIdStart, ?isAsciiIdContinue, ?normalization, ?normalizeBeforeValidation, ?allowJoinControlChars, ?preCheckStart, ?preCheckContinue, ?allowAllNonAsciiCharsInPreCheck, ?label, ?invalidCharMessage) = // we use match instead of defaultArg here, so that the function wrapper objects only get constructed when needed let isAsciiIdStart = match isAsciiIdStart with Some v -> v | _ -> IdentifierValidator.IsXIdStartOrSurrogate let isAsciiIdContinue = match isAsciiIdContinue with Some v -> v | _ -> IdentifierValidator.IsXIdContinueOrSurrogate let normalizationForm = defaultArg normalization (enum 0) let normalizeBeforeValidation = defaultArg normalizeBeforeValidation false let allowJoinControlChars = defaultArg allowJoinControlChars false let expectedIdentifierError = expected (defaultArg label Strings.Identifier) let invalidCharError = messageError (defaultArg invalidCharMessage Strings.IdentifierContainsInvalidCharacterAtIndicatedPosition) let allowAllNonAsciiCharsInPreCheck = defaultArg allowAllNonAsciiCharsInPreCheck false let preCheckStart = if preCheckStart.IsSome then preCheckStart.Value elif allowAllNonAsciiCharsInPreCheck then isAsciiIdStart else Unchecked.defaultof<_> let preCheckContinue = if preCheckContinue.IsSome then preCheckContinue.Value elif allowAllNonAsciiCharsInPreCheck then isAsciiIdContinue else Unchecked.defaultof<_> let asciiOptions = Array.zeroCreate 128 do for i = 1 to 127 do let c = char i let mutable v = IdFlags.None if isAsciiIdStart c then v <- v ||| IdFlags.NonContinue if isAsciiIdContinue c then v <- v ||| IdFlags.Continue if allowAllNonAsciiCharsInPreCheck then if preCheckStart c then v <- v ||| IdFlags.PreCheckNonContinue if preCheckContinue c then v <- v ||| IdFlags.PreCheckContinue asciiOptions[i] <- v let iv = new IdentifierValidator(asciiOptions) do iv.NormalizationForm <- normalizationForm iv.NormalizeBeforeValidation <- normalizeBeforeValidation iv.AllowJoinControlCharsAsIdContinueChars <- allowJoinControlChars let preCheck1 = if allowAllNonAsciiCharsInPreCheck then fun c -> let i = int c if i <= 0x7f then // not (x = y) currently yields better code here than (x <> y) not (asciiOptions[int c] &&& IdFlags.PreCheckNonContinue = IdFlags.None) else true elif isNotNull preCheckStart then preCheckStart else iv.IsIdStartOrSurrogateFunc let preCheck = if allowAllNonAsciiCharsInPreCheck then fun c -> let i = int c if i <= 0x7f then not (asciiOptions[i] &&& IdFlags.PreCheckContinue = IdFlags.None) else true elif isNotNull preCheckContinue then preCheckContinue else iv.IsIdContinueOrJoinControlOrSurrogateFunc member internal t.IdentifierValidator = iv member internal t.PreCheck1 = preCheck1 member internal t.PreCheck = preCheck member internal t.ExpectedIdentifierError = expectedIdentifierError member internal t.InvalidCharError = invalidCharError let identifier (identifierOptions: IdentifierOptions) : Parser = let validator = identifierOptions.IdentifierValidator let preCheck1 = identifierOptions.PreCheck1 let preCheck = identifierOptions.PreCheck let expectedIdentifierError = identifierOptions.ExpectedIdentifierError let invalidCharError = identifierOptions.InvalidCharError fun stream -> let str = stream.ReadCharsOrNewlinesWhile(preCheck1, preCheck, true) if str.Length <> 0 then let mutable errorPos = 0 let nstr = validator.ValidateAndNormalize(str, &errorPos) if isNotNull nstr then Reply(nstr) else stream.Skip(errorPos - str.Length) Reply(FatalError, invalidCharError) else Reply(Error, expectedIdentifierError) // ---------------------------------------------- // Parsing strings with the help of other parsers // ---------------------------------------------- let manyChars2 p1 p = ManyChars(p1, p).AsFSharpFunc let manyChars p = manyChars2 p p let many1Chars2 p1 p = Many1Chars(p1, p).AsFSharpFunc let many1Chars p = many1Chars2 p p let manyCharsTillApply2 p1 p endp f = ManyCharsTill(p1, p, endp, f).AsFSharpFunc let manyCharsTillApply p endp f = manyCharsTillApply2 p p endp f let manyCharsTill2 p1 p endp = manyCharsTillApply2 p1 p endp (fun str _ -> str) let manyCharsTill p endp = manyCharsTill2 p p endp let many1CharsTillApply2 p1 p endp f = Many1CharsTill(p1, p, endp, f).AsFSharpFunc let many1CharsTillApply p endp f = many1CharsTillApply2 p p endp f let many1CharsTill2 p1 p endp = many1CharsTillApply2 p1 p endp (fun str _ -> str) let many1CharsTill p endp = many1CharsTill2 p p endp let #if !NOINLINE inline #endif internal manyStringsImpl require1 (p1: Parser) (p: Parser) : Parser = fun stream -> let mutable stateTag = stream.StateTag let mutable reply = p1 stream if reply.Status = Ok then let result1 = reply.Result let mutable error = reply.Error stateTag <- stream.StateTag reply <- p stream if reply.Status <> Ok then reply.Result <- result1 else let result2 = reply.Result error <- reply.Error stateTag <- stream.StateTag reply <- p stream if reply.Status <> Ok then reply.Result <- result1 + result2 else let result3 = reply.Result error <- reply.Error stateTag <- stream.StateTag reply <- p stream if reply.Status <> Ok then reply.Result <- concat3 result1 result2 result3 else let result4 = reply.Result error <- reply.Error stateTag <- stream.StateTag reply <- p stream if reply.Status <> Ok then reply.Result <- concat4 result1 result2 result3 result4 else let n = 2*(result1.Length + result2.Length + result3.Length + result4.Length) + reply.Result.Length let sb = new StringBuilder(n) sb.Append(result1).Append(result2).Append(result3).Append(result4).Append(reply.Result) |> ignore error <- reply.Error stateTag <- stream.StateTag reply <- p stream while reply.Status = Ok do if stateTag = stream.StateTag then raiseInfiniteLoopException "manyStrings" stream error <- reply.Error sb.Append(reply.Result) |> ignore stateTag <- stream.StateTag reply <- p stream reply.Result <- sb.ToString() // We assume that the string parser changes the state when it succeeds, // so we don't need to merge more than 2 error message lists. if stateTag = stream.StateTag then if reply.Status = Error then reply.Status <- Ok if isNotNull error then reply.Error <- mergeErrors error reply.Error elif not require1 && reply.Status = Error && stateTag = stream.StateTag then reply.Status <- Ok reply.Result <- "" reply let manyStrings2 p1 p = manyStringsImpl false p1 p let manyStrings p = manyStrings2 p p let many1Strings2 p1 p = manyStringsImpl true p1 p let many1Strings p = many1Strings2 p p let #if !NOINLINE inline #endif internal stringsSepByImpl require1 (p: Parser) (sep: Parser) : Parser = fun stream -> let mutable stateTag = stream.StateTag let mutable reply = p stream if reply.Status = Ok then let result1 = reply.Result let mutable error = reply.Error stateTag <- stream.StateTag reply <- sep stream if reply.Status <> Ok then if stateTag = stream.StateTag then if reply.Status = Error then reply.Status <- Ok reply.Result <- result1 if isNotNull error then reply.Error <- mergeErrors error reply.Error else // We assume that at least one of the parsers sep and p consume // input when both are called consecutively and succeed. This // way we only have to merge a maximum of 3 error message lists. let mutable result = null let mutable error0 = error let mutable stateTag0 = stateTag let result2 = reply.Result error <- reply.Error stateTag <- stream.StateTag reply <- p stream if reply.Status = Ok then let result3 = reply.Result error0 <- error stateTag0 <- stateTag error <- reply.Error stateTag <- stream.StateTag reply <- sep stream if reply.Status <> Ok then result <- concat3 result1 result2 result3 else let result4 = reply.Result error0 <- error stateTag0 <- stateTag error <- reply.Error stateTag <- stream.StateTag reply <- p stream if reply.Status = Ok then let n = 2*(result1.Length + result2.Length + result3.Length + result4.Length) + reply.Result.Length let sb = new StringBuilder(n) sb.Append(result1).Append(result2).Append(result3).Append(result4) |> ignore while reply.Status = Ok do sb.Append(reply.Result) |> ignore error0 <- error stateTag0 <- stateTag error <- reply.Error stateTag <- stream.StateTag reply <- sep stream if reply.Status <> Ok then result <- sb.ToString() else sb.Append(reply.Result) |> ignore if stateTag0 = stream.StateTag then raiseInfiniteLoopException "stringsSepBy" stream error0 <- error stateTag0 <- stateTag error <- reply.Error stateTag <- stream.StateTag reply <- p stream if stateTag = stream.StateTag then if isNotNull result && reply.Status = Error then reply.Status <- Ok reply.Result <- result error <- mergeErrors error reply.Error if stateTag0 = stateTag then error <- mergeErrors error0 error reply.Error <- error elif not require1 && reply.Status = Error && stateTag = stream.StateTag then reply.Status <- Ok reply.Result <- "" reply let stringsSepBy p sep = stringsSepByImpl false p sep let stringsSepBy1 p sep = stringsSepByImpl true p sep let skipped (p: Parser) : Parser = fun stream -> let index0 = stream.IndexToken let line0 = stream.Line let reply = p stream if reply.Status = Ok then let str = stream.ReadFrom(index0) let nstr = if line0 = stream.Line then str else Text.NormalizeNewlines(str) Reply(Ok, nstr, reply.Error) else Reply(reply.Status, reply.Error) let withSkippedString (f: string -> 'a -> 'b) (p: Parser<'a,'u>) : Parser<'b,'u> = let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) fun stream -> let index0 = stream.IndexToken let line0 = stream.Line let reply = p stream if reply.Status = Ok then let str = stream.ReadFrom(index0) let nstr = if line0 = stream.Line then str else Text.NormalizeNewlines(str) let result = optF.Invoke(nstr, reply.Result) Reply(Ok, result, reply.Error) else Reply(reply.Status, reply.Error) // --------------- // Parsing numbers // --------------- [] type NumberLiteralOptions = | None = 0 | AllowSuffix = 0b000000000001 | AllowMinusSign = 0b000000000010 | AllowPlusSign = 0b000000000100 | AllowFraction = 0b000000001000 | AllowFractionWOIntegerPart = 0b000000010000 | AllowExponent = 0b000000100000 | AllowHexadecimal = 0b000001000000 | AllowBinary = 0b000010000000 | AllowOctal = 0b000100000000 | AllowInfinity = 0b001000000000 | AllowNaN = 0b010000000000 | IncludeSuffixCharsInString = 0b100000000000 | DefaultInteger = 0b000111000110 | DefaultUnsignedInteger = 0b000111000000 | DefaultFloat = 0b011001101110 type internal NLO = NumberLiteralOptions [] type NumberLiteralResultFlags = | None = 0 | SuffixLengthMask = 0b0000000000001111 | HasMinusSign = 0b0000000000010000 | HasPlusSign = 0b0000000000100000 | HasIntegerPart = 0b0000000001000000 | HasFraction = 0b0000000010000000 | HasExponent = 0b0000000100000000 | IsDecimal = 0b0000001000000000 | IsHexadecimal = 0b0000010000000000 | IsBinary = 0b0000100000000000 | IsOctal = 0b0001000000000000 | BaseMask = 0b0001111000000000 | IsInfinity = 0b0010000000000000 | IsNaN = 0b0100000000000000 type internal NLF = NumberLiteralResultFlags type NumberLiteral(string, info, suffixChar1, suffixChar2, suffixChar3, suffixChar4) = member t.String = string member t.SuffixLength = int (info &&& NLF.SuffixLengthMask) member t.SuffixChar1 = suffixChar1 member t.SuffixChar2 = suffixChar2 member t.SuffixChar3 = suffixChar3 member t.SuffixChar4 = suffixChar4 member t.Info = info member t.HasMinusSign = int (info &&& NLF.HasMinusSign) <> 0 member t.HasPlusSign = int (info &&& NLF.HasPlusSign) <> 0 member t.HasIntegerPart = int (info &&& NLF.HasIntegerPart) <> 0 member t.HasFraction = int (info &&& NLF.HasFraction) <> 0 member t.HasExponent = int (info &&& NLF.HasExponent) <> 0 member t.IsInteger = int (info &&& (NLF.HasFraction ||| NLF.HasExponent)) = 0 // HasIntegerPart must be set if HasFraction and HasExponent both aren't member t.IsDecimal = int (info &&& NLF.IsDecimal) <> 0 member t.IsHexadecimal = int (info &&& NLF.IsHexadecimal) <> 0 member t.IsBinary = int (info &&& NLF.IsBinary) <> 0 member t.IsOctal = int (info &&& NLF.IsOctal) <> 0 member t.IsNaN = int (info &&& NLF.IsNaN) <> 0 member t.IsInfinity = int (info &&& NLF.IsInfinity) <> 0 override t.Equals(other: obj) = match other with | :? NumberLiteral as other -> t.String = other.String && t.Info = other.Info && t.SuffixChar1 = other.SuffixChar1 && t.SuffixChar2 = other.SuffixChar2 && t.SuffixChar3 = other.SuffixChar3 && t.SuffixChar4 = other.SuffixChar4 | _ -> false override t.GetHashCode() = if isNotNull string then string.GetHashCode() else 0 let numberLiteralE (opt: NumberLiteralOptions) (errorInCaseNoLiteralFound: ErrorMessageList) (stream: CharStream<'u>) = let index0 = stream.IndexToken let stateTag = stream.StateTag let mutable c = stream.Peek() let mutable error = NoErrorMessages let mutable flags = NLF.None if c = '-' && (opt &&& NLO.AllowMinusSign) <> NLO.None then flags <- NLF.HasMinusSign c <- stream.SkipAndPeek() elif c = '+' && (opt &&& NLO.AllowPlusSign) <> NLO.None then flags <- NLF.HasPlusSign c <- stream.SkipAndPeek() let allowStartingPoint = NLO.AllowFraction ||| NLO.AllowFractionWOIntegerPart // for starting point both flags are required if isDigit c || (c = '.' && (opt &&& allowStartingPoint) = allowStartingPoint) then let mutable c1 = '\u0000' if c <> '0' || (c1 <- stream.SkipAndPeek(); c1 <= '9' || (opt &&& (NLO.AllowBinary ||| NLO.AllowOctal ||| NLO.AllowHexadecimal)) = NLO.None || ((int c1 ||| int ' ') = int 'e')) then flags <- flags ||| NLF.IsDecimal if c <> '.' then flags <- flags ||| NLF.HasIntegerPart if c <> '0' then c <- stream.SkipAndPeek() else c <- c1 while isDigit c do c <- stream.SkipAndPeek() if c = '.' && (opt &&& NLO.AllowFraction) <> NLO.None then flags <- flags ||| NLF.HasFraction c <- stream.SkipAndPeek() if isDigit c then c <- stream.SkipAndPeek() elif (flags &&& NLF.HasIntegerPart) = NLF.None then // at least one digit before or after the . is required error <- Errors.ExpectedDecimalDigit while isDigit c do c <- stream.SkipAndPeek() if (int c ||| int ' ') = int 'e' && isNull error && (opt &&& NLO.AllowExponent) <> NLO.None then flags <- flags ||| NLF.HasExponent c <- stream.SkipAndPeek() if c = '-' || c = '+' then c <- stream.SkipAndPeek() if not (isDigit c) then error <- Errors.ExpectedDecimalDigit while isDigit c do c <- stream.SkipAndPeek() else match int c1 ||| int ' ' with | 0x78 (* 'x' *) when (opt &&& NLO.AllowHexadecimal) <> NLO.None -> flags <- flags ||| NLF.IsHexadecimal c <- stream.SkipAndPeek() if isHex c then flags <- flags ||| NLF.HasIntegerPart c <- stream.SkipAndPeek() elif (opt &&& NLO.AllowFractionWOIntegerPart) = NLO.None then // integer part required error <- Errors.ExpectedHexadecimalDigit while isHex c do c <- stream.SkipAndPeek() if c = '.' && isNull error && (opt &&& NLO.AllowFraction) <> NLO.None then flags <- flags ||| NLF.HasFraction c <- stream.SkipAndPeek() if isHex c then c <- stream.SkipAndPeek() elif (flags &&& NLF.HasIntegerPart) = NLF.None then // at least one digit before or after the . is required error <- Errors.ExpectedHexadecimalDigit while isHex c do c <- stream.SkipAndPeek() elif (flags &&& NLF.HasIntegerPart) = NLF.None then // we neither have an integer part nor a fraction error <- Errors.ExpectedHexadecimalDigit if (int c ||| int ' ') = int 'p' && isNull error && (opt &&& NLO.AllowExponent) <> NLO.None then flags <- flags ||| NLF.HasExponent c <- stream.SkipAndPeek() if c = '-' || c = '+' then c <- stream.SkipAndPeek() if not (isDigit c) then error <- Errors.ExpectedDecimalDigit while isDigit c do c <- stream.SkipAndPeek() | 0x6f (* 'o' *) when (opt &&& NLO.AllowOctal) <> NLO.None -> flags <- flags ||| NLF.IsOctal c <- stream.SkipAndPeek() if isOctal c then flags <- flags ||| NLF.HasIntegerPart c <- stream.SkipAndPeek() else error <- Errors.ExpectedOctalDigit while isOctal c do c <- stream.SkipAndPeek() | 0x62 (* 'b' *) when (opt &&& NLO.AllowBinary) <> NLO.None -> flags <- flags ||| NLF.IsBinary c <- stream.SkipAndPeek() if c = '0' || c = '1' then flags <- flags ||| NLF.HasIntegerPart c <- stream.SkipAndPeek() else error <- Errors.ExpectedBinaryDigit while c = '0' || c = '1' do c <- stream.SkipAndPeek() | _ -> flags <- flags ||| (NLF.IsDecimal ||| NLF.HasIntegerPart) c <- c1 if isNull error then if (opt &&& NLO.AllowSuffix) = NLO.None || not (isAsciiLetter c) then let str = stream.ReadFrom(index0) Reply(NumberLiteral(str, flags, EOS, EOS, EOS, EOS)) else let mutable str = if (opt &&& NLO.IncludeSuffixCharsInString) <> NLO.None then null else stream.ReadFrom(index0) let mutable nSuffix = 1 let mutable s1 = c let mutable s2 = EOS let mutable s3 = EOS let mutable s4 = EOS c <- stream.SkipAndPeek() if isAsciiLetter c then nSuffix <- 2 s2 <- c c <- stream.SkipAndPeek() if isAsciiLetter c then nSuffix <- 3 s3 <- c c <- stream.SkipAndPeek() if isAsciiLetter c then nSuffix <- 4 s4 <- c c <- stream.SkipAndPeek() flags <- flags ||| (enum) nSuffix if (opt &&& NLO.IncludeSuffixCharsInString) <> NLO.None then str <- stream.ReadFrom(index0) Reply(NumberLiteral(str, flags, s1, s2, s3, s4)) else Reply(Error, error) else let cc = int c ||| int ' ' if if cc = int 'i' then (opt &&& NLO.AllowInfinity) <> NLO.None && stream.SkipCaseFolded("inf") && (flags <- flags ||| NLF.IsInfinity stream.SkipCaseFolded("inity") |> ignore true) elif cc = int 'n' then (opt &&& NLO.AllowNaN) <> NLO.None && stream.SkipCaseFolded("nan") && (flags <- flags ||| NLF.IsNaN true) else false then let str = stream.ReadFrom(index0) Reply(NumberLiteral(str, flags, EOS, EOS, EOS, EOS)) else if flags &&& (NLF.HasMinusSign ||| NLF.HasPlusSign) <> NLF.None then stream.Seek(index0) stream.StateTag <- stateTag Reply(Error, errorInCaseNoLiteralFound) let numberLiteral opt label = numberLiteralE opt (expected label) let pfloat : Parser = fun stream -> let reply = numberLiteralE NLO.DefaultFloat Errors.ExpectedFloatingPointNumber stream if reply.Status = Ok then let nl = reply.Result try let d = if nl.IsDecimal then System.Double.Parse(nl.String, System.Globalization.CultureInfo.InvariantCulture) elif nl.IsHexadecimal then floatOfHexString nl.String elif nl.IsInfinity then if nl.HasMinusSign then System.Double.NegativeInfinity else System.Double.PositiveInfinity else System.Double.NaN Reply(d) with | :? System.OverflowException -> Reply(if nl.HasMinusSign then System.Double.NegativeInfinity else System.Double.PositiveInfinity) | :? System.FormatException -> stream.Skip(-nl.String.Length) Reply(FatalError, messageError "The floating-point number has an invalid format (this error is unexpected, please report this error message to fparsec@quanttec.com).") else Reply(reply.Status, reply.Error) let internal parseUInt64 (c0: char) (stream: CharStream<'u>) (status: ReplyStatus byref) (error: ErrorMessageList byref) = Debug.Assert(isDigit c0 && (status = Ok)) // we rely on the compiler eliminating inactive branches let opt = NumberLiteralOptions.DefaultUnsignedInteger let limit10 = 1844674407370955160UL //(System.UInt64.MaxValue - 9UL)/10UL let maxDiv10 = 1844674407370955161UL //System.UInt64.MaxValue/10UL let maxMod10 = 5u //System.UInt64.MaxValue%10UL let limit16 = 1152921504606846975UL //(System.UInt64.MaxValue - 15UL)/16UL let maxDiv16 = 1152921504606846975UL //System.UInt64.MaxValue/16UL let maxMod16 = 15u //System.UInt64.MaxValue%16UL let limit8 = 2305843009213693951UL //(System.UInt64.MaxValue - 7UL)/8UL let maxDiv8 = 2305843009213693951UL //System.UInt64.MaxValue/8UL let maxMod8 = 7u //System.UInt64.MaxValue%8UL let limit2 = 9223372036854775807UL //(System.UInt64.MaxValue - 1UL)/2UL let maxDiv2 = 9223372036854775807UL //System.UInt64.MaxValue/2UL let maxMod2 = 1u //System.UInt64.MaxValue%2UL let mutable n = 0UL let mutable c = c0 let c1 = stream.SkipAndPeek() if (opt &&& (NLO.AllowBinary ||| NLO.AllowOctal ||| NLO.AllowHexadecimal)) = NLO.None || c <> '0' || c1 <= '9' then n <- uint64 (uint32 c - uint32 '0') c <- c1 while c >= '0' && c <= '9' do let nc = uint32 c - uint32 '0' if n <= limit10 || (maxMod10 < 9u && n = maxDiv10 && nc <= maxMod10) then n <- 10UL*n + uint64 nc c <- stream.SkipAndPeek() else status <- FatalError c <- '!' // break else let cc1 = uint32 c1 ||| uint32 ' ' if (opt &&& NLO.AllowHexadecimal) <> NLO.None && cc1 = uint32 'x' then c <- stream.SkipAndPeek() let mutable nc = uint32 0 if (let cc = uint32 c ||| uint32 ' ' if c <= '9' then nc <- uint32 c - uint32 '0'; c >= '0' else cc <= uint32 'f' && (nc <- cc - 0x57u; cc >= uint32 'a')) // 0x57u = uint32 'a' - 10u then n <- uint64 nc c <- stream.SkipAndPeek() while (let cc = uint32 c ||| uint32 ' ' if c <= '9' then nc <- uint32 c - uint32 '0'; c >= '0' else cc <= uint32 'f' && (nc <- cc - 0x57u; cc >= uint32 'a')) do if n <= limit16 || (maxMod16 < 15u && n = maxDiv16 && nc <= maxMod16) then n <- 16UL*n + uint64 nc c <- stream.SkipAndPeek() else status <- FatalError c <- '!' // break else status <- Error error <- Errors.ExpectedHexadecimalDigit elif (opt &&& NLO.AllowOctal) <> NLO.None && cc1 = uint32 'o' then c <- stream.SkipAndPeek() let mutable nc = uint32 c - uint32 '0' if nc = (nc &&& 7u) then n <- uint64 nc c <- stream.SkipAndPeek() nc <- uint32 c - uint32 '0' while nc = (nc &&& 7u) do if n <= limit8 || (maxMod8 < 7u && n = maxDiv8 && nc <= maxMod8) then n <- 8UL*n + uint64 nc c <- stream.SkipAndPeek() nc <- uint32 c - uint32 '0' else status <- FatalError nc <- 11u // break else status <- Error error <- Errors.ExpectedOctalDigit elif (opt &&& NLO.AllowBinary) <> NLO.None && cc1 = uint32 'b' then c <- stream.SkipAndPeek() let mutable nc = uint32 c - uint32 '0' if nc = (nc &&& 1u) then n <- uint64 nc c <- stream.SkipAndPeek() nc <- uint32 c - uint32 '0' while nc = (nc &&& 1u) do if n <= limit2 || (maxMod2 = 0u && n = maxDiv2 && nc = 0u) then n <- 2UL*n + uint64 nc c <- stream.SkipAndPeek() nc <- uint32 c - uint32 '0' else status <- FatalError nc <- 11u // break else status <- Error error <- Errors.ExpectedBinaryDigit // else c = 0 && not (isDigit c1) n let internal parseUInt32 (c0: char) (stream: CharStream<'u>) (status: ReplyStatus byref) (error: ErrorMessageList byref) = Debug.Assert(isDigit c0 && (status = Ok)) // we rely on the compiler eliminating inactive branches let opt = NumberLiteralOptions.DefaultUnsignedInteger let limit10 = 429496728u //(System.UInt32.MaxValue - 9u)/10u let maxDiv10 = 429496729u //System.UInt32.MaxValue/10u let maxMod10 = 5u //System.UInt32.MaxValue%10u let limit16 = 268435455u //(System.UInt32.MaxValue - 15u)/16u let maxDiv16 = 268435455u //System.UInt32.MaxValue/16u let maxMod16 = 15u //System.UInt32.MaxValue%16u let limit8 = 536870911u //(System.UInt32.MaxValue - 7u)/8u let maxDiv8 = 536870911u //System.UInt32.MaxValue/8u let maxMod8 = 7u //System.UInt32.MaxValue%8u let limit2 = 2147483647u //(System.UInt32.MaxValue - 1u)/2u let maxDiv2 = 2147483647u //System.UInt32.MaxValue/2u let maxMod2 = 1u //System.UInt32.MaxValue%2u let mutable n = 0u let mutable c = c0 let c1 = stream.SkipAndPeek() if (opt &&& (NLO.AllowBinary ||| NLO.AllowOctal ||| NLO.AllowHexadecimal)) = NLO.None || c <> '0' || c1 <= '9' then n <- uint32 c - uint32 '0' c <- c1 while c >= '0' && c <= '9' do let nc = uint32 c - uint32 '0' if n <= limit10 || (maxMod10 < 9u && n = maxDiv10 && nc <= maxMod10) then n <- 10u*n + nc c <- stream.SkipAndPeek() else status <- FatalError c <- '!' // break else let cc1 = uint32 c1 ||| uint32 ' ' if (opt &&& NLO.AllowHexadecimal) <> NLO.None && cc1 = uint32 'x' then c <- stream.SkipAndPeek() let mutable nc = uint32 0 if (let cc = uint32 c ||| uint32 ' ' if c <= '9' then nc <- uint32 c - uint32 '0'; c >= '0' else cc <= uint32 'f' && (nc <- cc - 0x57u; cc >= uint32 'a')) // 0x57u = uint32 'a' - 10u then n <- uint32 nc c <- stream.SkipAndPeek() while (let cc = uint32 c ||| uint32 ' ' if c <= '9' then nc <- uint32 c - uint32 '0'; c >= '0' else cc <= uint32 'f' && (nc <- cc - 0x57u; cc >= uint32 'a')) do if n <= limit16 || (maxMod16 < 15u && n = maxDiv16 && nc <= maxMod16) then n <- 16u*n + nc c <- stream.SkipAndPeek() else status <- FatalError c <- '!' // break else status <- Error error <- Errors.ExpectedHexadecimalDigit elif (opt &&& NLO.AllowOctal) <> NLO.None && cc1 = uint32 'o' then c <- stream.SkipAndPeek() let mutable nc = uint32 c - uint32 '0' if nc = (nc &&& 7u) then n <- uint32 nc c <- stream.SkipAndPeek() nc <- uint32 c - uint32 '0' while nc = (nc &&& 7u) do if n <= limit8 || (maxMod8 < 7u && n = maxDiv8 && nc <= maxMod8) then n <- 8u*n + nc c <- stream.SkipAndPeek() nc <- uint32 c - uint32 '0' else status <- FatalError nc <- 11u // break else status <- Error error <- Errors.ExpectedOctalDigit elif (opt &&& NLO.AllowBinary) <> NLO.None && cc1 = uint32 'b' then c <- stream.SkipAndPeek() let mutable nc = uint32 c - uint32 '0' if nc = (nc &&& 1u) then n <- uint32 nc c <- stream.SkipAndPeek() nc <- uint32 c - uint32 '0' while nc = (nc &&& 1u) do if n <= limit2 || (maxMod2 = 0u && n = maxDiv2 && nc = 0u) then n <- 2u*n + nc c <- stream.SkipAndPeek() nc <- uint32 c - uint32 '0' else status <- FatalError nc <- 11u // break else status <- Error error <- Errors.ExpectedBinaryDigit // else c = 0 && not (isDigit c1) n [] let internal overflowError message = if isNotNull message then messageError message // isNotNull prevents fsc from inlining the function else NoErrorMessages let inline internal pint (opt: NumberLiteralOptions) (max: 'uint) (uint64_: 'uint -> uint64) (uint: int -> 'uint) (uint_: uint32 -> 'uint) (uint__: uint64 -> 'uint) (int: 'uint -> 'int) (int_: int -> 'int) (errorInCaseNoLiteralFound: ErrorMessageList) (outOfRangeError: ErrorMessageList) (stream: CharStream<'u>) = // we rely on the compiler eliminating inactive branches after inlining let minusIsAllowed = (opt &&& NLO.AllowMinusSign) <> NLO.None let index = stream.IndexToken let stateTag = stream.StateTag let mutable c = stream.Peek() let mutable plusMinus1 = 1 let mutable signPresent = false if minusIsAllowed && c = '-' then plusMinus1 <- -1 signPresent <- true c <- stream.SkipAndPeek() elif (opt &&& NLO.AllowPlusSign) <> NLO.None && c = '+' then signPresent <- true c <- stream.SkipAndPeek() let mutable status = Ok let mutable error = NoErrorMessages let mutable result = Unchecked.defaultof<_> if c >= '0' && c <= '9' then let n = if uint64_ max <= uint64 System.UInt32.MaxValue then uint_ (parseUInt32 c stream (&status) (&error)) else uint__ (parseUInt64 c stream (&status) (&error)) let isUInt32Or64 = uint64_ max = uint64 System.UInt32.MaxValue || uint64_ max = System.UInt64.MaxValue if status = Ok && (isUInt32Or64 || (n <= max || (minusIsAllowed && plusMinus1 = -1 && n = max + uint 1))) then result <- if minusIsAllowed then int_ plusMinus1 * int n else int n elif status <> Error then status <- FatalError stream.Seek(index) stream.StateTag <- stateTag error <- outOfRangeError else status <- Error error <- errorInCaseNoLiteralFound if signPresent then stream.Seek(index) stream.StateTag <- stateTag Reply(status, result, error) let pint64 stream = pint NumberLiteralOptions.DefaultInteger (uint64 System.Int64.MaxValue) uint64 uint64 uint64 uint64 int64 int64 Errors.ExpectedInt64 Errors.NumberOutsideOfInt64Range stream let pint32 stream = pint NumberLiteralOptions.DefaultInteger (uint32 System.Int32.MaxValue) uint64 uint32 uint32 uint32 int32 int32 Errors.ExpectedInt32 Errors.NumberOutsideOfInt32Range stream // fsc's optimizer seems to have problems with literals of small int types let pint16 stream = pint NumberLiteralOptions.DefaultInteger ((*uint32 System.Int16.MaxValue*)0x7fffu) uint64 uint32 uint32 uint32 int16 int16 Errors.ExpectedInt16 Errors.NumberOutsideOfInt16Range stream let pint8 stream = pint NumberLiteralOptions.DefaultInteger ((*uint32 System.SByte.MaxValue*)0x7fu) uint64 uint32 uint32 uint32 sbyte sbyte Errors.ExpectedInt8 Errors.NumberOutsideOfInt8Range stream let puint64 stream = pint NumberLiteralOptions.DefaultUnsignedInteger System.UInt64.MaxValue uint64 uint64 uint64 uint64 uint64 uint64 Errors.ExpectedUInt64 Errors.NumberOutsideOfUInt64Range stream let puint32 stream = pint NumberLiteralOptions.DefaultUnsignedInteger System.UInt32.MaxValue uint64 uint32 uint32 uint32 uint32 uint32 Errors.ExpectedUInt32 Errors.NumberOutsideOfUInt32Range stream let puint16 stream = pint NumberLiteralOptions.DefaultUnsignedInteger 0xffffu uint64 uint32 uint32 uint32 uint16 uint16 Errors.ExpectedUInt16 Errors.NumberOutsideOfUInt16Range stream let puint8 stream = pint NumberLiteralOptions.DefaultUnsignedInteger 0xffu uint64 uint32 uint32 uint32 byte byte Errors.ExpectedUInt8 Errors.NumberOutsideOfUInt8Range stream // ------------------- // Conditional parsing // ------------------- let notFollowedByEof : Parser = fun stream -> if not (stream.IsEndOfStream) then Reply(()) else Reply(Error, Errors.UnexpectedEndOfInput) let followedByNewline : Parser = fun stream -> match stream.Peek() with |'\r' | '\n' -> Reply(()) | _ -> Reply(Error, Errors.ExpectedNewline) let notFollowedByNewline : Parser = fun stream -> match stream.Peek() with |'\r' | '\n' -> Reply(Error, Errors.UnexpectedNewline) | _ -> Reply(()) let followedByString (str: string) : Parser = checkStringContainsNoNewlineOrEOSChar str "followedByString" let error = expectedString str if str.Length = 1 then let chr = str[0] fun stream -> if stream.Match(chr) then Reply(()) else Reply(Error, error) else fun stream -> if stream.Match(str) then Reply(()) else Reply(Error, error) let followedByStringCI str : Parser = checkStringContainsNoNewlineOrEOSChar str "followedByStringCI" let error = expectedStringCI str if str.Length = 1 then let cfChr = Text.FoldCase(str[0]) fun stream -> if stream.MatchCaseFolded(cfChr) then Reply(()) else Reply(Error, error) else let cfStr = foldCase str fun stream -> if stream.MatchCaseFolded(cfStr) then Reply(()) else Reply(Error, error) let notFollowedByString str : Parser = checkStringContainsNoNewlineOrEOSChar str "notFollowedByString" let error = unexpectedString str if str.Length = 1 then let chr = str[0] fun stream -> if not (stream.Match(chr)) then Reply(()) else Reply(Error, error) else fun stream -> if not (stream.Match(str)) then Reply(()) else Reply(Error, error) let notFollowedByStringCI str : Parser = checkStringContainsNoNewlineOrEOSChar str "notFollowedByStringCI" let error = unexpectedStringCI str if str.Length = 1 then let cfChr = Text.FoldCase(str[0]) fun stream -> if not (stream.MatchCaseFolded(cfChr)) then Reply(()) else Reply(Error, error) else let cfStr = foldCase str fun stream -> if not (stream.MatchCaseFolded(cfStr)) then Reply(()) else Reply(Error, error) let inline private charDoesSatisfy f c = match c with | EOS -> Error | _ -> if f (if c <> '\r' then c else '\n') then Ok else Error let inline private charDoesSatisfyNot f c = match c with | EOS -> Ok | _ -> if not (f (if c <> '\r' then c else '\n')) then Ok else Error let previousCharSatisfies f : Parser = fun stream -> let status = charDoesSatisfy f (stream.Peek(-1)) Reply(status, (), NoErrorMessages) let previousCharSatisfiesNot f : Parser = fun stream -> let status = charDoesSatisfyNot f (stream.Peek(-1)) Reply(status, (), NoErrorMessages) let nextCharSatisfies f : Parser = fun stream -> let status = charDoesSatisfy f (stream.Peek()) Reply(status, (), NoErrorMessages) let nextCharSatisfiesNot f : Parser = fun stream -> let status = charDoesSatisfyNot f (stream.Peek()) Reply(status, (), NoErrorMessages) let next2CharsSatisfy f : Parser = let optF = OptimizedClosures.FSharpFunc.Adapt(f) fun stream -> let cs = stream.Peek2() let status = match cs.Char0, cs.Char1 with | _, EOS | EOS, _ -> Error | '\r', '\n' -> match stream.Peek(2u) with | EOS -> Error | c1 -> if optF.Invoke('\n', if c1 <> '\r' then c1 else '\n') then Ok else Error | c0, c1 -> if optF.Invoke((if c0 <> '\r' then c0 else '\n'), (if c1 <> '\r' then c1 else '\n')) then Ok else Error Reply(status, (), NoErrorMessages) let next2CharsSatisfyNot f : Parser = let optF = OptimizedClosures.FSharpFunc.Adapt(f) fun stream -> let cs = stream.Peek2() let status = match cs.Char0, cs.Char1 with | _, EOS | EOS, _ -> Ok | '\r', '\n' -> match stream.Peek(2u) with | EOS -> Ok | c1 -> if not (optF.Invoke('\n', if c1 <> '\r' then c1 else '\n')) then Ok else Error | c0, c1 -> if not (optF.Invoke((if c0 <> '\r' then c0 else '\n'), (if c1 <> '\r' then c1 else '\n'))) then Ok else Error Reply(status, (), NoErrorMessages) ================================================ FILE: FParsec/CharParsers.fsi ================================================ // Copyright (c) Stephan Tolksdorf 2007-2011 // License: Simplified BSD License. See accompanying documentation. [] module FParsec.CharParsers open Error open Primitives // ======================== // Running parsers on input // ======================== /// Values of this type are returned by the runParser functions (not by `Parser<_,_>` functions). type ParserResult<'Result,'UserState> = /// Success(result, userState, endPos) holds the result and the user state returned by a successful parser, /// together with the position where the parser stopped. | Success of 'Result * 'UserState * Position /// Failure(errorAsString, error, suserState) holds the parser error and the user state returned by a failing parser, /// together with a string representation of the parser error. | Failure of string * ParserError * 'UserState /// `runParserOnString p ustate streamName str` runs the parser `p` directly on the content of the string `str`, /// starting with the initial user state `ustate`. The `streamName` is used in error messages to describe /// the source of the input (e.g. a file path) and may be empty. /// The parser's `Reply` is captured and returned as a `ParserResult` value. val runParserOnString: Parser<'a,'u> -> 'u -> streamName: string -> string -> ParserResult<'a,'u> /// `runParserOnSubstring p ustate streamName str index count` runs the parser `p` directly on the content /// of the string `str` between the indices `index` (inclusive) and `index + count` (exclusive), /// starting with the initial user state `ustate`. The `streamName` is used in error messages to describe /// the source of the input (e.g. a file path) and may be empty. /// The parser's `Reply` is captured and returned as a `ParserResult` value. val runParserOnSubstring: Parser<'a,'u> -> 'u -> streamName: string -> string -> int -> int -> ParserResult<'a,'u> /// `runParserOnStream p ustate streamName stream encoding` runs the parser `p` on the content of /// the `System.IO.Stream` `stream`, starting with the initial user state `ustate`. The `streamName` /// is used in error messages to describe the source of the input (e.g. a file path) and may be empty. /// In case no unicode byte order mark is found, the stream data is assumed to be encoded with the given `encoding`. /// The parser's `Reply` is captured and returned as a `ParserResult` value. val runParserOnStream: Parser<'a,'u> -> 'u -> streamName: string -> System.IO.Stream -> System.Text.Encoding -> ParserResult<'a,'u> /// `runParserOnFile p ustate path encoding` runs the parser `p` on the content of the file /// at the given `path`, starting with the initial user state `ustate`. /// In case no unicode byte order mark is found, the file data is assumed to be encoded with the given `encoding`. /// The parser's `Reply` is captured and returned as a `ParserResult` value. val runParserOnFile: Parser<'a,'u> -> 'u -> path: string -> System.Text.Encoding -> ParserResult<'a,'u> /// `run parser str` is a convenient abbreviation for `runParserOnString parser () "" str`. val run: Parser<'Result, unit> -> string -> ParserResult<'Result,unit> // ======= // Parsers // ======= // ------------------------------------------------------------- // Reading the input stream position and handling the user state // ------------------------------------------------------------- /// The parser `getPosition` returns the current position in the input Stream. /// `getPosition` is equivalent to `fun stream -> Reply(stream.Position)`. val getPosition: Parser /// The parser `getUserState` returns the current user state. /// `getUserState` is equivalent to `fun stream -> Reply(stream.UserState)`. val getUserState: Parser<'u,'u> /// The parser `setUserState u` sets the user state to `u`. /// `setUserState u` is equivalent to `fun stream -> stream.UserState <- u; Reply(())`. val setUserState: 'u -> Parser /// `updateUserState f` is equivalent to `fun stream -> stream.UserState <- f stream.UserState; Reply(())`. val updateUserState: ('u -> 'u) -> Parser /// The parser `userStateSatisfies f` succeeds if `f` returns `true` /// when applied to the current user state, otherwise it fails. val userStateSatisfies: ('u -> bool) -> Parser // -------------------- // Parsing single chars // -------------------- /// `pchar c` parses the char `c` and returns `c`. /// If `c = '\r'` or `c = '\n'` then `pchar c` will parse any one newline ("\n", "\r\n" or "\r") and return `c`. val pchar: char -> Parser /// `skipChar c` is an optimized implementation of `pchar c |>> ignore`. val skipChar: char -> Parser /// `charReturn c x` is an optimized implementation of `pchar c >>% x`. val charReturn: char -> 'a -> Parser<'a,'u> /// `anyChar` parses any single char or newline ("\n", "\r\n" or "\r"). /// Returns the parsed char, or '\n' in case a newline was parsed. val anyChar: Parser /// `skipAnyChar` is an optimized implementation of `anyChar |>> ignore`. val skipAnyChar: Parser /// `satisfy f` parses any one char or newline for which the predicate function `f` returns `true`. /// It returns the parsed char. /// Any newline ("\n", "\r\n" or "\r") is converted to the single char '\n'. /// Thus, to accept a newline `f '\n'` must return `true`. `f` will never be called /// with '\r' and `satisfy f` will never return the result '\r'. val satisfy: (char -> bool) -> Parser /// `skipSatisfy f` is an optimized implementation of `satisfy f |>> ignore`. val skipSatisfy: (char -> bool) -> Parser /// `satisfy f label` is an optimized implementation of `satisfy f label`. val satisfyL: (char -> bool) -> string -> Parser /// `skipSatisfyL f label` is an optimized implementation of `skipSatisfy f label`. val skipSatisfyL: (char -> bool) -> string -> Parser /// `anyOf str` parses any char contained in the string `str`. It returns the parsed char. /// If `str` contains the char '\n', `anyOf str` parses any newline ("\n", "\r\n" or "\r") /// and returns it as '\n'. (Note that it does not make a difference whether or not /// `str` contains '\r'; `anyOf str` will never return '\r'.) val anyOf: seq -> Parser /// `skipAnyOf str` is an optimized implementation of `anyOf str |>> ignore`. val skipAnyOf: seq -> Parser /// `noneOf str` parses any char not contained in the string `str`. It returns the parsed char. /// If `str` does not contain the char '\n', `noneOf str` parses any newline ("\n", "\r\n" or "\r") /// and returns it as as '\n'. (Note that it does not make a difference whether or not /// `str` contains '\r'; `noneOf str` will never return '\r'.) val noneOf: seq -> Parser /// `skipNoneOf s` is an optimized implementation of `noneOf s |>> ignore`. val skipNoneOf: seq -> Parser /// Parses any char in the range 'A' - 'Z'. Returns the parsed char. val asciiUpper: Parser /// Parses any char in the range 'a' - 'z'. Returns the parsed char. val asciiLower: Parser /// Parses any char in the range 'a' - 'z' and 'A' - 'Z'. Returns the parsed char. val asciiLetter: Parser /// Parses any UTF-16 uppercase letter char identified by `System.Char.IsUpper`. /// Returns the parsed char. val upper: Parser /// Parses any UTF-16 lowercase letter char identified by `System.Char.IsLower`. /// Returns the parsed char. val lower: Parser /// Parses any UTF-16 letter char identified by `System.Char.IsLetter`. /// Returns the parsed char. val letter: Parser /// Parses any char in the range '0' - '9'. Returns the parsed char. val digit: Parser /// Parses any char in the range '0' - '9', 'a' - 'f' and 'A' - 'F'. Returns the parsed char. val hex: Parser /// Parses any char in the range '0' - '7'. Returns the parsed char. val octal: Parser // predicate functions corresponding to the above parsers /// `isAnyOf str` returns a predicate function. /// When this predicate function is applied to a char, it returns `true` if and only if the char is contained in `str`. val isAnyOf: seq -> (char -> bool) /// `isNoneOf str` returns a predicate function. /// When this predicate function is applied to a char, it returns `true` if and only if the char is not contained in `str`. val isNoneOf: seq -> (char -> bool) /// Returns `true` for any char in the range 'A' - 'Z' and `false` for all other chars. val inline isAsciiUpper: char -> bool /// Returns `true` for any char in the range 'a' - 'z' and `false` for all other chars. val inline isAsciiLower: char -> bool /// Returns `true` for any char in the range 'a' - 'z', 'A' - 'Z' and `false` for all other chars. val inline isAsciiLetter: char -> bool /// `isUpper` is equivalent to `System.Char.IsUpper`. val inline isUpper: char -> bool /// `isLower` is equivalent to `System.Char.IsLower`. val inline isLower: char -> bool /// `isLetter` is equivalent to `System.Char.IsLetter`. val inline isLetter: char -> bool /// Returns `true` for any char in the range '0' - '9' and `false` for all other chars. val inline isDigit: char -> bool /// Returns `true` for any char in the range '0' - '9', 'a' - 'f', 'A' - 'F' and `false` for all other chars. val inline isHex: char -> bool /// Returns `true` for any char in the range '0' - '7' and `false` for all other chars. val inline isOctal: char -> bool // ------------------ // Parsing whitespace // ------------------ /// Parses the tab char '\t' and returns '\t'. Note that a tab char is treated like any other non-newline char: /// the column number is incremented by (only) 1. val tab: Parser /// Parses a newline ("\n", "\r\n" or "\r"). Returns '\n'. /// Is equivalent to `pchar '\n'`. val newline<'u> : Parser /// `skipNewline` is an optimized implementation of `newline |>> ignore`. val skipNewline<'u> : Parser /// `newlineReturn x` is an optimized implementation of `newline >>% x`. val newlineReturn: 'a -> Parser<'a,'u> /// Parses a unicode newline ("\n", "\r\n", "\r", "\u0085", "\u2028", or "\u2029"). /// Returns '\n'. Note that this parser does not accept the formfeed char '\f' as a newline. /// In contrast to most other parsers in FParsec this parser also increments /// the internal line count for unicode newline characters other than '\n' and '\r'. val unicodeNewline<'u> : Parser /// `skipNewline` is an optimized implementation of `unicodeNewline |>> ignore`. val skipUnicodeNewline<'u> : Parser /// `newlineReturn x` is an optimized implementation of `unicodeNewline >>% x`. val unicodeNewlineReturn: 'a -> Parser<'a,'u> /// Skips over any sequence of *zero* or more whitespaces (space (' '), tab ('\t') /// or newline ("\n", "\r\n" or "\r")). val spaces: Parser /// Skips over any sequence of *one* or more whitespaces (space (' '), tab('\t') /// or newline ("\n", "\r\n" or "\r")). val spaces1: Parser /// Skips over any sequence of *zero* or more unicode whitespaces and /// registers any unicode newline ("\n", "\r\n", "\r", "\u0085, "\u000C", /// "\u2028"or "\u2029") as a newline. val unicodeSpaces: Parser /// Skips over any sequence of *one* or more unicode whitespaces and /// registers any unicode newline ("\n", "\r\n", "\r", "\u0085, "\u000C", /// "\u2028"or "\u2029") as a newline. val unicodeSpaces1: Parser /// The parser `eof` only succeeds at the end of the input. It never consumes input. val eof: Parser // ------------------------ // Parsing strings directly // ------------------------ /// `pstring str` parses the string `str` and returns `str`. /// It is an atomic parser: either it succeeds or it fails without consuming any input. /// `str` may not contain newline chars ('\n' or '\r'). val pstring: string -> Parser /// `skipString str` is an optimized implementation of `pstring str |>> ignore`. val skipString: string -> Parser /// `stringReturn str x` is an optimized implementation of `pstring str >>% x`. val stringReturn: string -> 'a -> Parser<'a,'u> /// `pstringCI str` parses any string that case-insensitively matches the string `str`. /// It returns the *parsed* string. /// `str` may not contain newline chars ('\n' or '\r'). val pstringCI: string -> Parser /// `skipStringCI str` is an optimized implementation of `pstringCI str |>> ignore`. val skipStringCI: string -> Parser /// `stringCIReturn str x` is an optimized implementation of `pstringCI str >>% x`. val stringCIReturn: string -> 'a -> Parser<'a,'u> /// `anyString n` parses any sequence of `n` chars or newlines ("\n", "\r\n" or "\r"). /// It returns the parsed string. In the returned string all newlines are normalized to "\n". /// `anyString n` is an atomic parser: either it succeeds or it fails without consuming any input. val anyString: int32 -> Parser /// `skipAnyString n` is an optimized implementation of `anyString n |>> ignore`. val skipAnyString: int32 -> Parser /// `restOfLine skipNewline` parses any chars before the end of the line /// and, if `skipNewline` is `true`, skips to the beginning of the next line (if there is one). /// It returns the parsed chars before the end of the line as a string (without a newline). /// A line is terminated by a newline ("\n", "\r\n" or "\r") or the end of the input stream. val restOfLine: bool -> Parser /// `skipRestOfLine skipNewline` is an optimized implementation of `restOfLine skipNewline |>> ignore`. val skipRestOfLine: bool -> Parser /// `charsTillString str skipString maxCount` parses all chars before the first occurance of the string `str` and, /// if `skipString` is `true`, skips over `str`. It returns the parsed chars before the string. /// If more than `maxCount` chars come before the first occurance of `str`, the parser *fails after consuming* `maxCount` chars. /// Newlines ("\n", "\r\n" or "\r") are counted as single chars and /// in the returned string all newlines are normalized to "\n". /// `charsTillString str maxCount` throws an `ArgumentOutOfRangeException` if `maxCount` is negative. val charsTillString: string -> skipString: bool -> maxCount: int -> Parser /// `skipCharsTillString str maxCount` is an optimized implementation of `charsTillString str maxCount |>> ignore`. val skipCharsTillString: string -> skipString: bool -> maxCount: int -> Parser /// `charsTillStringCI str skipString maxCount` parses all chars before the first case-insensitive occurance of the string `str` and, /// if `skipString` is `true`, skips over it. It returns the parsed chars before the string. /// If more than `maxCount` chars come before the first case-insensitive occurance of `str`, /// the parser *fails* after consuming `maxCount` chars. /// Newlines ("\n", "\r\n" or "\r") are counted as single chars and /// in the returned string all newlines are normalized to "\n". /// `charsTillStringCI str maxCount` throws an `ArgumentOutOfRangeException` if `maxCount` is negative. val charsTillStringCI: string -> skipString: bool -> maxCount: int -> Parser /// `skipCharsTillStringCI str maxCount` is an optimized implementation of `charsTillStringCI str maxCount |>> ignore`. val skipCharsTillStringCI: string -> skipString: bool -> maxCount: int -> Parser /// `manySatisfy f` parses a sequence of *zero* or more chars that satisfy the predicate function `f` /// (i.e. chars for which `f` returns `true`). It returns the parsed chars as a string. /// /// Any newline ("\n", "\r\n" or "\r") is converted to the single char '\n'. /// Thus, to accept a newline `f '\n'` must return `true`. `f` will never be called /// with '\r' and the string returned by `manySatisfy f` will never contain an '\r'. val manySatisfy: (char -> bool) -> Parser /// `manySatisfy2 f1 f` behaves like `manySatisfy f`, except that the /// first char of the parsed string must satisfy `f1` instead of `f`. val manySatisfy2: (char -> bool) -> (char -> bool) -> Parser /// `skipManySatisfy f` is an optimized implementation of `manySatisfy f |>> ignore`. val skipManySatisfy: (char -> bool) -> Parser /// `skipManySatisfy2 f1 f` is an optimized implementation of `manySatisfy2 f1 f |>> ignore`. val skipManySatisfy2: (char -> bool) -> (char -> bool) -> Parser /// `many1Satisfy f` parses a sequence of *one* or more chars that satisfy the predicate function `f` /// (i.e. chars for which `f` returns `true`). It returns the parsed chars as a string. /// If the first char does not satisfy `f`, this parser fails without consuming input. /// /// Any newline ("\n", "\r\n" or "\r") is converted to the single char '\n'. /// Thus, to accept a newline `f '\n'` must return `true`. `f` will never be called /// with '\r' and the string returned by `many1Satisfy f` will never contain an '\r'. val many1Satisfy: (char -> bool) -> Parser /// `many1Satisfy2 f1 f` behaves like `many1Satisfy f`, except that the /// first char of the parsed string must satisfy `f1` instead of `f`. val many1Satisfy2: (char -> bool) -> (char -> bool) -> Parser /// `skipMany1Satisfy f` is an optimized implementation of `many1Satisfy f |>> ignore`. val skipMany1Satisfy: (char -> bool) -> Parser /// `skipMany1Satisfy2 f1 f` is an optimized implementation of `many1Satisfy2 f1 f |>> ignore`. val skipMany1Satisfy2: (char -> bool) -> (char -> bool) -> Parser /// `many1SatisfyL f label` is an optimized implementation of `many1Satisfy f label`. val many1SatisfyL: (char -> bool) -> string -> Parser /// `many1Satisfy2L f1 f label` is an optimized implementation of `many1Satisfy2 f1 f label`. val many1Satisfy2L: (char -> bool) -> (char -> bool) -> string -> Parser /// `skipMany1SatisfyL f label` is an optimized implementation of `skipMany1Satisfy f label`. val skipMany1SatisfyL: (char -> bool) -> string -> Parser /// `skipMany1Satisfy2L f1 f label` is an optimized implementation of `skipMany1Satisfy2 f1 f label`. val skipMany1Satisfy2L: (char -> bool) -> (char -> bool) -> string -> Parser /// `manyMinMaxSatisfy minCount maxCount f` parses a sequence of `minCount` or more chars that satisfy the /// predicate function `f` (i.e. chars for which `f` returns `true`), but not more than `maxCount` chars. /// It returns the parsed chars as a string. This parser is atomic, i.e. if the first `minCount` chars /// do not all satisfy `f`, the parser fails without consuming any input. /// /// Any newline ("\n", "\r\n" or "\r") is converted to the single char '\n'. /// Thus, to accept a newline `f '\n'` must return `true`. `f` will never be called with '\r' /// and the string returned by `manyMinMaxSatisfy minCount maxCount f` will never contain an '\r'. /// /// `manyMinMaxSatisfy` throws an `ArgumentOutOfRangeException` if `maxCount` is negative. val manyMinMaxSatisfy: int -> int -> (char -> bool) -> Parser /// `manyMinMaxSatisfy2 minCount maxCount f1 f` behaves like `manyMinMaxSatisfy minCount maxCount f`, except that the first char of the parsed string must satisfy `f1` instead of `f`. val manyMinMaxSatisfy2: int -> int -> (char -> bool) -> (char -> bool) -> Parser /// `skipManyMinMaxSatisfy minCount maxCount f` is an optimized implementation of `manyMinMaxSatisfy minCount maxCount f |>> ignore`. val skipManyMinMaxSatisfy: int -> int -> (char -> bool) -> Parser /// `skipManyMinMaxSatisfy2 minCount maxCount f1 f` is an optimized implementation of `manyMinMaxSatisfy2 minCount maxCount f1 f |>> ignore`. val skipManyMinMaxSatisfy2: int -> int -> (char -> bool) -> (char -> bool) -> Parser /// `manyMinMaxSatisfyL minCount maxCount f label` is an optimized implementation of `manyMinMaxSatisfy minCount maxCount f label`. val manyMinMaxSatisfyL: int -> int -> (char -> bool) -> string -> Parser /// `manyMinMaxSatisfy2L minCount maxCount f1 f label` is an optimized implementation of `manyMinMaxSatisfy2 minCount maxCount f1 f label`. val manyMinMaxSatisfy2L: int -> int -> (char -> bool) -> (char -> bool) -> string -> Parser /// `skipManyMinMaxSatisfyL minCount maxCount f label` is an optimized implementation of `skipManyMinMaxSatisfy minCount maxCount f label`. val skipManyMinMaxSatisfyL: int -> int -> (char -> bool) -> string -> Parser /// `skipManyMinMaxSatisfy2L minCount maxCount f1 f label` is an optimized implementation of `skipManyMinMaxSatisfy2 minCount maxCount f1 f label`. val skipManyMinMaxSatisfy2L: int -> int -> (char -> bool) -> (char -> bool) -> string -> Parser /// `regex pattern` matches the .NET regular expression given by the string `pattern` on the chars /// beginning at the current index in the input stream. It returns the string matched by the regular expression. /// If the regular expression does not match, the parser fails without consuming input. /// /// The `System.Text.RegularExpressions.Regex` object that is internally used to match the pattern is constructed /// with the `RegexOptions` `MultiLine` and `ExplicitCapture`. In order to ensure that the regular expression /// can only match at the beginning of a string, "\\A" is automatically prepended to the pattern. /// /// Newline chars ('\r' and '\n') in the pattern are interpreted literally. /// For example, an '\n' char in the pattern will only match "\n", not "\r" or "\r\n". /// However, in the returned string all newlines ("\n", "\r\n" or "\r") are normalized to "\n". /// /// For large files the regular expression is *not* applied to a string containing *all* the remaining chars /// in the stream. The number of chars that are guaranteed to be visible to the regular expression is specified /// during construction of the `CharStream`. If one of the `runParser` function` is used to run the parser, /// this number is 43690. val regex: string -> Parser /// `regexL pattern label` is an optimized implementation of `regex pattern label`. val regexL: string -> string -> Parser type IdentifierOptions = new: ?isAsciiIdStart: (char -> bool) * ?isAsciiIdContinue: (char -> bool) * ?normalization: System.Text.NormalizationForm * ?normalizeBeforeValidation: bool * ?allowJoinControlChars: bool * ?preCheckStart: (char -> bool) * ?preCheckContinue: (char -> bool) * ?allowAllNonAsciiCharsInPreCheck: bool * ?label: string * ?invalidCharMessage: string -> IdentifierOptions /// The `identifier` parser is a configurable parser for the XID identifier syntax /// specified in Unicode Standard Annex #31. val identifier: IdentifierOptions -> Parser // ---------------------------------------------- // Parsing strings with the help of other parsers // ---------------------------------------------- /// `manyChars cp` parses a sequence of *zero* or more chars with the char parser `cp`. /// It returns the parsed chars as a string. /// /// `manyChars cp` is an optimized implementation of `many (attempt cp)` that returns /// the chars as a string instead of a char list. The equivalence to `many (attempt p)` /// instead of `many p` implies that `manyChars` never fails. val manyChars: Parser -> Parser /// `manyChars2 cp1 cp` behaves like `manyChars2 cp`, except that it parses the first char with `cp1` instead of `cp`. val manyChars2: Parser -> Parser -> Parser /// `many1Chars cp` parses a sequence of *one* or more chars with the char parser `cp`. /// It returns the parsed chars as a string. /// /// `many1Chars cp` is an optimized implementation of `many1 (attempt cp)` that returns /// the chars as a string instead of a char list. The equivalence to `many1 (attempt p)` /// instead of `many1 p` implies that `many1Chars` never fails after consuming input. val many1Chars: Parser -> Parser /// `many1Chars2 cp1 cp` behaves like `many1Chars2 cp`, except that it parses the first char with `cp1` instead of `cp`. val many1Chars2: Parser -> Parser -> Parser /// `manyCharsTill cp endp` parses chars with the char parser `cp` until the parser `endp` succeeds. /// It stops after `endp` and returns the parsed chars as a string. val manyCharsTill: Parser -> Parser<'b,'u> -> Parser /// `manyCharsTill2 cp1 cp endp` behaves like `manyCharsTill cp endp`, except that it parses the first char with `cp1` instead of `cp`. val manyCharsTill2: Parser -> Parser -> Parser<'b,'u> -> Parser /// `manyCharsTillApply cp endp f` parses chars with the char parser `cp` until the parser `endp` succeeds. /// It stops after `endp` and returns the result of the function application `f str b`, /// where `str` is the parsed string and `b` is result returned by `endp`. val manyCharsTillApply: Parser -> Parser<'b,'u> -> (string -> 'b -> 'c) -> Parser<'c,'u> /// `manyCharsTillApply2 cp1 cp endp` behaves like `manyCharsTillApply cp endp`, except that it parses the first char with `cp1` instead of `cp`. val manyCharsTillApply2: Parser -> Parser -> Parser<'b,'u> -> (string -> 'b -> 'c) -> Parser<'c,'u> /// `many1CharsTill cp endp` parses one char with the char parser `cp`. /// Then it parses more chars with `cp` until the parser `endp` succeeds. /// It stops after `endp` and returns the parsed chars as a string. /// /// `many1CharsTill cp endp` is an optimized implementation of `pipe2 cp (manyCharsTill cp endp) (fun c1 str -> c1.ToString() + str)` val many1CharsTill: Parser -> Parser<'b,'u> -> Parser /// `many1CharsTill2 cp1 cp endp` behaves like `many1CharsTill cp endp`, except that it parses the first char with `cp1` instead of `cp`. val many1CharsTill2: Parser -> Parser -> Parser<'b,'u> -> Parser /// `many1CharsTillApply cp endp` parses one char with the char parser `cp`. /// Then it parses more chars with `cp` until the parser `endp` succeeds. /// It stops after `endp` and returns the result of the function application `f str b`, /// where `str` is the parsed string and `b` is result returned by `endp`. val many1CharsTillApply: Parser -> Parser<'b,'u> -> (string -> 'b -> 'c) -> Parser<'c,'u> /// `many1CharsTillApply2 cp1 cp endp` behaves like `many1CharsTillApply cp endp`, except that it parses the first char with `cp1` instead of `cp`. val many1CharsTillApply2: Parser -> Parser -> Parser<'b,'u> -> (string -> 'b -> 'c) -> Parser<'c,'u> /// `manyStrings sp` parses a sequence of *zero* or more strings with the string parser `sp`. /// It returns the strings in concatenated form. /// `manyStrings sp` is an optimized implementation of `manyReduce (+) "" sp`. val manyStrings: Parser -> Parser /// `manyStrings2 sp1 sp` behaves like `manyStrings sp`, except that it parses the first string with `sp1` instead of `sp`. val manyStrings2: Parser -> Parser -> Parser /// `many1Strings sp` parses a sequence of *one* or more strings with the string parser `sp`. /// It returns the strings in concatenated form. /// Note that `many1Strings sp` does not require the first string to be non-empty. val many1Strings: Parser -> Parser /// `many1Strings2 sp1 sp` behaves like `many1Strings sp`, except that it parses the first string with `sp1` instead of `sp`. val many1Strings2: Parser -> Parser -> Parser /// `stringsSepBy sp sep` parses *zero* or more occurrences of `sp` separated by `sep`. /// It returns the strings parsed by `sp` *and* `sep` in concatenated form. val stringsSepBy: Parser -> Parser -> Parser /// `stringsSepBy1 sp sep` parses *one* or more occurrences of `sp` separated by `sep`. /// It returns the strings parsed by `sp` *and* `sep` in concatenated form. val stringsSepBy1: Parser -> Parser -> Parser /// `skipped p` applies the parser `p` and returns the chars skipped over by `p` as a string. /// All newlines ("\r\n", "\r" or "\n") are normalized to "\n". val skipped: Parser -> Parser /// `p |> withSkippedString f` applies the parser `p` and returns the result of `f str x`, /// where `str` is the string skipped over by `p` and `x` is the result returned by `p`. val withSkippedString: (string -> 'a -> 'b) -> Parser<'a,'u> -> Parser<'b,'u> // --------------- // Parsing numbers // --------------- /// Encodes the various options of the `numberLiteral` parser. [] type NumberLiteralOptions = | None = 0 | AllowSuffix = 0b000000000001 | AllowMinusSign = 0b000000000010 | AllowPlusSign = 0b000000000100 | AllowFraction = 0b000000001000 | AllowFractionWOIntegerPart = 0b000000010000 | AllowExponent = 0b000000100000 | AllowHexadecimal = 0b000001000000 | AllowBinary = 0b000010000000 | AllowOctal = 0b000100000000 | AllowInfinity = 0b001000000000 | AllowNaN = 0b010000000000 | IncludeSuffixCharsInString = 0b100000000000 | DefaultInteger = 0b000111000110 | DefaultUnsignedInteger = 0b000111000000 | DefaultFloat = 0b011001101110 /// The return type of the `numberLiteral` parser. An instance contains the parsed /// number literal and various bits of information about it. /// Note that the `String` member contains the string literal without the suffix chars, /// except if the `NumberLiteralOptions` passed to the `numberLiteral` parser have the /// `IncludeSuffixCharsInString` flag set. /// Any parsed suffix chars are always available through the `SuffixChar1` - `4` members. type NumberLiteral = new: string:string * info:NumberLiteralResultFlags * suffixChar1: char * suffixChar2: char * suffixChar3: char * suffixChar4: char -> NumberLiteral /// The parsed number literal string. Only includes the parsed suffix chars if the /// `NumberLiteralOptions` passed to the `numberLiteral` parser have the `IncludeSuffixCharsInString` flag set. member String: string /// Eencodes various bits of information on the string literal. member Info: NumberLiteralResultFlags member SuffixLength: int /// Returns the first suffix char, or EOS if no suffix char was parsed. member SuffixChar1: char /// Returns the second suffix char, or EOS if less than two suffix chars were parsed. member SuffixChar2: char /// Returns the third suffix char, or EOS if less than three suffix chars were parsed member SuffixChar3: char /// Returns the fourth suffix char, or EOS if less than four suffix chars were parsed member SuffixChar4: char member HasMinusSign: bool member HasPlusSign: bool member HasIntegerPart: bool member HasFraction: bool member HasExponent: bool member IsInteger: bool member IsDecimal: bool member IsHexadecimal: bool member IsBinary: bool member IsOctal: bool member IsNaN: bool member IsInfinity: bool override Equals: obj -> bool override GetHashCode: unit -> int and /// Encodes various bits of information about a parsed number literal. [] NumberLiteralResultFlags = | None = 0 | SuffixLengthMask = 0b0000000000001111 | HasMinusSign = 0b0000000000010000 | HasPlusSign = 0b0000000000100000 | HasIntegerPart = 0b0000000001000000 | HasFraction = 0b0000000010000000 | HasExponent = 0b0000000100000000 | IsDecimal = 0b0000001000000000 | IsHexadecimal = 0b0000010000000000 | IsBinary = 0b0000100000000000 | IsOctal = 0b0001000000000000 | BaseMask = 0b0001111000000000 | IsInfinity = 0b0010000000000000 | IsNaN = 0b0100000000000000 /// `numberLiteral options label` parses a number literal and returns the result in form /// of a `NumberLiteral` value. The given `NumberLiteralOptions` argument determines the kind /// of number literals accepted. The string `label` is used in the `Expected` error message /// that is generated when the parser fails without consuming input. /// /// The parser fails without consuming input, if not at least one digit (including the 0 in the /// format specifiers "0x" etc.) can be parsed. It fails after consuming input, if no decimal /// digit comes after an exponent marker or no valid digit comes after a format specifier. val numberLiteral: NumberLiteralOptions -> string -> Parser /// `numberLiteralE` is an uncurried version of `numberLiteral` that can be used to /// implement number parsers without having to construct a `numberLiteral` closure. val numberLiteralE: NumberLiteralOptions -> errorInCaseNoLiteralFound: ErrorMessageList -> CharStream<'u> -> Reply /// Parses a floating-point number in decimal or hexadecimal format. /// The special values NaN and Inf(inity)? (case insensitive) are also recognized. /// /// The parser fails /// without consuming input, if not at least one digit (including the '0' in "0x") can be parsed, /// after consuming input, if no digit comes after an exponent marker or no hex digit comes after "0x", /// after consuming input, if the value represented by the input string (after rounding) is greater than `System.Double.MaxValue` or less than `System.Double.MinValue`. val pfloat: Parser /// Parses an integer in decimal, hexadecimal ("0x" prefix), octal ("0o") or binary ("0b") format. /// The parser fails /// without consuming input, if not at least one digit (including the '0' in the format specifiers "0x" etc.) can be parsed, /// after consuming input, if no digit comes after an exponent marker or no hex digit comes after a format specifier, /// after consuming input, if the value represented by the input string is greater than `System.Int64.MaxValue` or less than `System.Int64.MinValue`. val pint64: Parser /// Parses an integer in decimal, hexadecimal ("0x" prefix), octal ("0o") or binary ("0b") format. /// The parser fails /// without consuming input, if not at least one digit (including the '0' in the format specifiers "0x" etc.) can be parsed, /// after consuming input, if no digit comes after an exponent marker or no hex digit comes after a format specifier, /// after consuming input, if the value represented by the input string is greater than `System.Int32.MaxValue` or less than `System.Int32.MinValue`. val pint32: Parser /// Parses an integer in decimal, hexadecimal ("0x" prefix), octal ("0o") or binary ("0b") format. /// The parser fails /// without consuming input, if not at least one digit (including the '0' in the format specifiers "0x" etc.) can be parsed, /// after consuming input, if no digit comes after an exponent marker or no hex digit comes after a format specifier, /// after consuming input, if the value represented by the input string is greater than `System.Int16.MaxValue` or less than `System.Int16.MinValue`. val pint16: Parser /// Parses an integer in decimal, hexadecimal ("0x" prefix), octal ("0o") or binary ("0b") format. /// The parser fails /// without consuming input, if not at least one digit (including the '0' in the format specifiers "0x" etc.) can be parsed, /// after consuming input, if no digit comes after an exponent marker or no hex digit comes after a format specifier, /// after consuming input, if the value represented by the input string is greater than 127 or less than -128. val pint8: Parser /// Parses an unsigned integer in decimal, hexadecimal ("0x" prefix), octal ("0o") or binary ("0b") format. /// The parser fails /// without consuming input, if not at least one digit (including the '0' in the format specifiers "0x" etc.) can be parsed, /// after consuming input, if no digit comes after an exponent marker or no hex digit comes after a format specifier, /// after consuming input, if the value represented by the input string is greater than `System.UInt64.MaxValue`. val puint64: Parser /// Parses an unsigned integer in decimal, hexadecimal ("0x" prefix), octal ("0o") or binary ("0b") format. /// The parser fails /// without consuming input, if not at least one digit (including the '0' in the format specifiers "0x" etc.) can be parsed, /// after consuming input, if no digit comes after an exponent marker or no hex digit comes after a format specifier, /// after consuming input, if the value represented by the input string is greater than `System.UInt32.MaxValue`. val puint32: Parser /// Parses an unsigned integer in decimal, hexadecimal ("0x" prefix), octal ("0o") or binary ("0b") format. /// The parser fails /// without consuming input, if not at least one digit (including the '0' in the format specifiers "0x" etc.) can be parsed, /// after consuming input, if no digit comes after an exponent marker or no hex digit comes after a format specifier, /// after consuming input, if the value represented by the input string is greater than `System.UInt16.MaxValue`. val puint16: Parser /// Parses an unsigned integer in decimal, hexadecimal ("0x" prefix), octal ("0o") or binary ("0b") format. /// The parser fails /// without consuming input, if not at least one digit (including the '0' in the format specifiers "0x" etc.) can be parsed, /// after consuming input, if no digit comes after an exponent marker or no hex digit comes after a format specifier, /// after consuming input, if the value represented by the input string is greater than 255. val puint8: Parser // ------------------- // Conditional parsing // ------------------- /// `notFollowedByEOF` is an optimized implementation of `notFollowedByL eof "end of input"`. val notFollowedByEof: Parser /// `followedByNewline` is an optimized implementation of `followedByL newline "newline"`. val followedByNewline: Parser /// `notFollowedByNewline` is an optimized implementation of `notFollowedByL newline "newline"`. val notFollowedByNewline: Parser /// `followedByString str` is an optimized implementation of `followedByL (pstring str) ("'" + str + "'")`. val followedByString: string -> Parser /// `followedByStringCI str` is an optimized implementation of `followedByL (pstringCI str) ("'" + str + "'")`. val followedByStringCI: string -> Parser /// `notFollowedByString str` is an optimized implementation of `notFollowedByL (pstring str) ("'" + str + "'")`. val notFollowedByString: string -> Parser /// `notFollowedByStringCI str` is an optimized implementation of `notFollowedByL (pstringCI str) ("'" + str + "'")`. val notFollowedByStringCI: string -> Parser /// `nextCharSatisfies f` is an optimized implementation of `followedBy (satisfy f)`. val nextCharSatisfies: (char -> bool) -> Parser /// `nextCharSatisfiesNot f` is an optimized implementation of `notFollowedBy (satisfy f)`. val nextCharSatisfiesNot: (char -> bool) -> Parser /// `next2CharsSatisfy f` succeeds if the predicate function `f` returns `true` /// when applied to the next 2 chars in the input stream, otherwise it fails. /// If there aren't 2 chars remaining in the input stream, this parser fails (as opposed to `next2CharsSatisfyNot`). /// This parser never changes the parser state. /// Any newline ("\n", "\r\n" or "\r") in the input is interpreted as a single char '\n'. /// If this parser fails, it returns no descriptive error message; hence it should only be /// used together with parsers that take care of a potential error. val next2CharsSatisfy: (char -> char -> bool) -> Parser /// `next2CharsSatisfyNot f` succeeds if the predicate function `f` returns `false` /// when applied to the next 2 chars in the input stream, otherwise it fails. /// If there aren't 2 chars remaining in the input stream, this parser succeeds (as opposed to `next2CharsSatisfy`). /// This parser never changes the parser state. /// Any newline ("\n", "\r\n" or "\r") in the input is interpreted as a single char '\n'. /// If this parser fails, it returns no descriptive error message; hence it should only be /// used together with parsers that take care of a potential error. val next2CharsSatisfyNot: (char -> char -> bool) -> Parser /// `previousCharSatisfies f` succeeds if the predicate function `f` returns `true` /// when applied to the previous char in the stream, otherwise it fails. /// If there is no previous char (because the stream is at the beginning), /// this parser fails (as opposed to `previousCharSatisfiesNot`). /// This parser never changes the parser state. /// Any newline ("\n", "\r\n" or "\r") in the input is interpreted as a single char '\n'. /// If this parser fails, it returns no descriptive error message; hence it should only be /// used together with parsers that take care of a potential error. val previousCharSatisfies: (char -> bool) -> Parser /// `previousCharSatisfies f` succeeds if the predicate function `f` returns `false` /// when applied to the previous char in the stream, otherwise it fails. /// If there is no previous char (because the stream is at the beginning), /// this parser succeeds (as opposed to `previousCharSatisfies`). /// This parser never changes the parser state. /// Any newline ("\n", "\r\n" or "\r") in the input is interpreted as a single char '\n'. /// If this parser fails, it returns no descriptive error message; hence it should only be /// used together with parsers that take care of a potential error. val previousCharSatisfiesNot: (char -> bool) -> Parser // ================ // Helper functions // ================ /// `EOS` is equal to `CharStream<'u>.EndOfStreamChar`. [] val EOS: char = '\uffff';; /// `foldCase str` returns a case-folded version of `str` /// with all chars mappend using the (non-Turkic) Unicode 1-to-1 case folding mappings /// for chars below 0x10000. If the argument is `null`, `null` is returned. val foldCase: string -> string /// `normalizeNewlines str` returns a version of `str` /// with all occurances of "\r\n" and "\r" replaced by "\n". /// If the argument is `null`, `null` is returned. val normalizeNewlines: string -> string /// Returns a hexadecimal string representation of the `float` argument. val floatToHexString: float -> string /// Returns the `float` value represented by the given string in hexadecimal format. /// Raises a `System.FormatException` in case the string representation is invalid. /// Raises a `System.OverflowException` if the (absolute) value is too large to be represented by a `float`. val floatOfHexString: string -> float /// Returns a hexadecimal string representation of the `float32` argument. val float32ToHexString: float32 -> string /// Returns the `float32` value represented by the given string in hexadecimal format. /// Raises a `System.FormatException` in case the string representation is invalid. /// Raises a `System.OverflowException` if the (absolute) value is too large to be represented by a `float32`. val float32OfHexString: string -> float32 ================================================ FILE: FParsec/Emit.fs ================================================ // Copyright (c) Stephan Tolksdorf 2010-2011 // License: Simplified BSD License. See accompanying documentation. module internal FParsec.Emit #if !LOW_TRUST open System.Diagnostics open System.Reflection open System.Reflection.Emit open System.Runtime.CompilerServices open Microsoft.FSharp.NativeInterop open FParsec.Internals open FParsec.Range #nowarn "9" // "Uses of this construct may result in the generation of unverifiable .NET IL code." #nowarn "51" // "The use of native pointers may result in unverifiable .NET IL code" let nativeMemoryAssociatedType = "NativeMemoryAssociatedType" let moduleBuilder = lazy ( let assemblyName = AssemblyName("FParsec.Emitted") let assemblyBuilder = AssemblyBuilder.DefineDynamicAssembly(assemblyName, AssemblyBuilderAccess.Run) let moduleBuilder = assemblyBuilder.DefineDynamicModule("FParsec.Emitted") // We create a dummy static class that will be used to associate the native memory with this dynamic assembly. moduleBuilder .DefineType(nativeMemoryAssociatedType, TypeAttributes.Abstract ||| TypeAttributes.Sealed ||| TypeAttributes.Public) .CreateType() |> ignore moduleBuilder ) // Allocates memory that can be used by the generated code. let allocTemporaryMem size = let associatedType = moduleBuilder.Value.GetType(nativeMemoryAssociatedType, true, false) RuntimeHelpers.AllocateTypeAssociatedMemory(associatedType, size) let createTypeBuilder name args parent (interfaces : System.Type[]) = moduleBuilder.Value.DefineType("FParsec.Emitted." + name, args, parent, interfaces) let loadI4 (ilg: ILGenerator) (i: int32) = ilg.Emit(OpCodes.Ldc_I4, i) let loadI8 (ilg: ILGenerator) (i: int64) = let i4 = int32 i if i <> int64 i4 then ilg.Emit(OpCodes.Ldc_I8, i) else loadI4 ilg i4 ilg.Emit(OpCodes.Conv_I8) let loadU8 (ilg: ILGenerator) (i: uint64) = if i > uint64 System.UInt32.MaxValue then ilg.Emit(OpCodes.Ldc_I8, int64 i) else loadI4 ilg (int32 i) ilg.Emit(OpCodes.Conv_U8) let loadI (ilg: ILGenerator) (i: nativeint) = if sizeof = 4 then ilg.Emit(OpCodes.Ldc_I4, int32 i) else ilg.Emit(OpCodes.Ldc_I8, int64 i) ilg.Emit(OpCodes.Conv_I) let loadU (ilg: ILGenerator) (i: unativeint) = if sizeof = 4 then ilg.Emit(OpCodes.Ldc_I4, int32 i) else ilg.Emit(OpCodes.Ldc_I8, int64 i) ilg.Emit(OpCodes.Conv_U) let private createLoaderForPrimitiveConstantsImpl (ty: System.Type) (ilg: ILGenerator) : ('T -> unit) = let ty = if ty.IsEnum then System.Enum.GetUnderlyingType(ty) else ty if ty = typeof< int32> then fun x -> loadI4 ilg (box x :?> int32) elif ty = typeof then fun x -> loadI4 ilg (int32 (box x :?> uint32)) elif ty = typeof then fun x -> loadI8 ilg (box x :?> int64) elif ty = typeof then fun x -> loadU8 ilg (box x :?> uint64) elif ty = typeof< int16> then fun x -> loadI4 ilg (int32 (box x :?> int16)) elif ty = typeof then fun x -> loadI4 ilg (int32 (box x :?> uint16)) elif ty = typeof then fun x -> loadI4 ilg (int32 (box x :?> char)) elif ty = typeof< int8> then fun x -> loadI4 ilg (int32 (box x :?> int8)) elif ty = typeof then fun x -> loadI4 ilg (int32 (box x :?> uint8)) elif ty = typeof then fun x -> ilg.Emit(if box x :?> bool then OpCodes.Ldc_I4_1 else OpCodes.Ldc_I4_0) elif ty = typeof then fun x -> ilg.Emit(OpCodes.Ldc_R8, (box x :?> double)) elif ty = typeof then fun x -> ilg.Emit(OpCodes.Ldc_R4, (box x :?> float32)) elif ty = typeof then fun x -> loadI ilg (box x :?> nativeint) elif ty = typeof then fun x -> loadU ilg (box x :?> unativeint) else invalidArg "ty" "Invalid type argument." let createLoaderForPrimitiveConstants<'T> ilg : ('T -> unit) = createLoaderForPrimitiveConstantsImpl typeof<'T> ilg let createLoaderForBoxedPrimitiveConstants (ty: System.Type) ilg : (obj -> unit) = createLoaderForPrimitiveConstantsImpl ty ilg let emitRangeCheck branchIfInRange (ilg: ILGenerator) (label: Label) minValue maxValue (range: Range) = Debug.Assert(minValue <= range.Min && range.Max <= maxValue) if minValue = range.Min && range.Max = maxValue then ilg.Emit(OpCodes.Pop) if branchIfInRange then ilg.Emit(OpCodes.Br, label) elif range.Min = range.Max then loadI4 ilg range.Min if branchIfInRange then ilg.Emit(OpCodes.Beq, label) else ilg.Emit(OpCodes.Bne_Un, label) elif minValue = range.Min then // we only have to check the right bound loadI4 ilg range.Max if branchIfInRange then ilg.Emit(OpCodes.Ble, label) else ilg.Emit(OpCodes.Bgt, label) elif range.Max = maxValue then // we only have to check the left bound loadI4 ilg range.Min if branchIfInRange then ilg.Emit(OpCodes.Bge, label) else ilg.Emit(OpCodes.Blt, label) else // we have to check both bounds if range.Min <> 0 then loadI4 ilg range.Min ilg.Emit(OpCodes.Sub) loadI4 ilg (range.Max - range.Min) if branchIfInRange then ilg.Emit(OpCodes.Ble_Un, label) // unsigned comparison else ilg.Emit(OpCodes.Bgt_Un, label) // unsigned comparison let emitBranchIfOutOfRange ilg label minValue maxValue range = emitRangeCheck false ilg label minValue maxValue range let emitBranchIfInRange ilg label minValue maxValue range = emitRangeCheck true ilg label minValue maxValue range let emitRangeTest pushFalseIfInRange (ilg: ILGenerator) minValue maxValue (range: Range) = Debug.Assert(minValue <= range.Min && range.Max <= maxValue) let emitNot() = ilg.Emit(OpCodes.Ldc_I4_0) ilg.Emit(OpCodes.Ceq) if minValue = range.Min && range.Max = maxValue then ilg.Emit(OpCodes.Pop) if pushFalseIfInRange then ilg.Emit(OpCodes.Ldc_I4_0) else ilg.Emit(OpCodes.Ldc_I4_1) elif range.Min = range.Max then loadI4 ilg range.Min ilg.Emit(OpCodes.Ceq) if pushFalseIfInRange then emitNot() elif minValue = range.Min then // we only have to check the right bound loadI4 ilg range.Max ilg.Emit(OpCodes.Cgt) if not pushFalseIfInRange then emitNot() elif range.Max = maxValue then // we only have to check the left bound loadI4 ilg range.Min ilg.Emit(OpCodes.Clt) if not pushFalseIfInRange then emitNot() else // we have to check both bounds if range.Min <> 0 then loadI4 ilg range.Min ilg.Emit(OpCodes.Sub) loadI4 ilg (range.Max - range.Min) ilg.Emit(OpCodes.Cgt_Un) // unsigned comparison if not pushFalseIfInRange then emitNot() let emitTwoRangeTest (ilg: ILGenerator) (loadVar: ILGenerator -> unit) inverse minValue maxValue (range1: Range) (range2: Range) = assert (range1.Max < range2.Min && range1.Max + 1 < range2.Min) let needOuterRangeCheck = minValue < range1.Min || range2.Max < maxValue let w = sizeof*8 if needOuterRangeCheck && (maxValue - minValue < w) then // use a simple bit vector test: // (bits >> (var - off)) & 1 let off = if minValue > 0 && maxValue < w then 0 else minValue let mutable bits = if inverse then unativeint -1n else 0un for r in [range1; range2] do for i in r.Min .. r.Max do let b = i - off if inverse then bits <- bits ^^^ (1un <<< b) else bits <- bits ||| (1un <<< b) loadU ilg bits loadVar ilg if off <> 0 then loadI4 ilg off ilg.Emit(OpCodes.Sub) ilg.Emit(OpCodes.Shr_Un) ilg.Emit(OpCodes.Ldc_I4_1) ilg.Emit(OpCodes.And) elif not needOuterRangeCheck || (range1.Max + 2 = range2.Min && range1.Min <> range1.Max && range2.Min <> range2.Max) then if needOuterRangeCheck then loadVar ilg emitRangeTest inverse ilg minValue maxValue (Range(range1.Min, range2.Max)) loadVar ilg emitRangeTest (not inverse) ilg minValue maxValue (Range(range1.Max + 1, range2.Min - 1)) if needOuterRangeCheck then if inverse then ilg.Emit(OpCodes.Or) else ilg.Emit(OpCodes.And) else loadVar ilg emitRangeTest inverse ilg minValue maxValue range1 loadVar ilg emitRangeTest inverse ilg minValue maxValue range2 if inverse then ilg.Emit(OpCodes.And) else ilg.Emit(OpCodes.Or) type TempLocals(ilg: ILGenerator) = let mutable intLocal = null let mutable boolLocal = null /// used by emitSetMembershipTest (and indirectly by emitSwitch) member t.GetIntLocal() = if isNull intLocal then intLocal <- ilg.DeclareLocal(typeof) intLocal /// used by emitSwitch member t.GetBoolLocal() = if isNull boolLocal then boolLocal <- ilg.DeclareLocal(typeof) boolLocal /// flag used for testing purposes let mutable noBitVectorTests = false let emitSetMembershipTest (ilg: ILGenerator) (loadVar: ILGenerator -> unit) (storeResult: ILGenerator -> unit) (temps: TempLocals) lengthCap densityThreshold minValue maxValue inverse (ranges: Range[]) = checkRangesAreValidSortedAndUnconnected ranges let endLabel = ilg.DefineLabel() let outOfRangeLabel = ilg.DefineLabel() let emitBitVectorTest minValue maxValue iBegin iEnd = let first, last = ranges[iBegin].Min, ranges[iEnd - 1].Max // set up bit vector in unmanaged memory let w = sizeof*8 // save a subtraction if it doesn't cost too much memory let off = if first > 0 && (last < w || (first < 3*w && (last >= first + w))) then 0 else first let lastMinusOff = uint32 (last - off) if lastMinusOff > uint32 System.Int32.MaxValue then raise (System.ArgumentException("The ranges span width is too large.")) let length = int (lastMinusOff/uint32 w + 1u) if uint32 length * uint32 w > uint32 System.Int32.MaxValue then raise (System.ArgumentException("The ranges span width is too large.")) let mutable stackVar = 0un let ptr = if length = 1 then NativePtr.ofNativeInt (NativePtr.toNativeInt &&stackVar) else NativePtr.ofNativeInt (allocTemporaryMem (length*sizeof)) // fill bit vector ptr[0..length - 1] let r = ranges[iBegin] let mutable rMin, rMax = r.Min - off, r.Max - off let mutable i = iBegin + 1 if not inverse then for j = 0 to length - 1 do let mutable n = 0un let j1w = (j + 1)*w while rMin < j1w do n <- n ||| (1un <<< rMin%w) if rMin < rMax then rMin <- rMin + 1 elif i < iEnd then let r = ranges[i] rMin <- r.Min - off; rMax <- r.Max - off i <- i + 1 else rMin <- System.Int32.MaxValue // break NativePtr.set ptr j n else for j = 0 to length - 1 do let mutable n = unativeint -1n let j1w = (j + 1)*w while rMin < j1w do n <- n ^^^ (1un <<< rMin%w) if rMin < rMax then rMin <- rMin + 1 elif i < iEnd then let r = ranges[i] rMin <- r.Min - off; rMax <- r.Max - off i <- i + 1 else rMin <- System.Int32.MaxValue // break NativePtr.set ptr j n let intTemp = temps.GetIntLocal() // t = (uint32)(x - off) loadVar ilg if off <> 0 then loadI4 ilg off ilg.Emit(OpCodes.Sub) ilg.Emit(OpCodes.Stloc, intTemp) // if (t > (uint32)(last - off)) goto outOfRangeLabel if minValue < off || length*w <= maxValue - off then ilg.Emit(OpCodes.Ldloc, intTemp) loadI4 ilg (last - off) ilg.Emit(OpCodes.Bgt_Un, outOfRangeLabel) if length = 1 then // x = *ptr loadU ilg stackVar else // x = *(ptr + t/w) loadU ilg (unativeint (NativePtr.toNativeInt ptr)) ilg.Emit(OpCodes.Ldloc, intTemp) loadI4 ilg w ilg.Emit(OpCodes.Div_Un) loadI4 ilg sizeof ilg.Emit(OpCodes.Mul) ilg.Emit(OpCodes.Add) ilg.Emit(OpCodes.Ldind_I) // result = (x >> t%w) & 1 ilg.Emit(OpCodes.Ldloc, intTemp) if length > 1 then loadI4 ilg w ilg.Emit(OpCodes.Rem_Un) ilg.Emit(OpCodes.Shr_Un) ilg.Emit(OpCodes.Ldc_I4_1) ilg.Emit(OpCodes.And) storeResult ilg ilg.Emit(OpCodes.Br, endLabel) let emitRangeTest inverse minValue maxValue (range: Range) = loadVar ilg emitRangeTest inverse ilg minValue maxValue range storeResult ilg ilg.Emit(OpCodes.Br, endLabel) let emitTwoRangeTest inverse minValue maxValue range1 range2 = emitTwoRangeTest ilg loadVar inverse minValue maxValue range1 range2 storeResult ilg ilg.Emit(OpCodes.Br, endLabel) let rec emitRegion minValue maxValue iBegin iEnd = Debug.Assert(iBegin < iEnd && minValue <= ranges[iBegin].Min && ranges[iEnd - 1].Max <= maxValue) match iEnd - iBegin with | 0 -> failwith "emitSetMembershipTest.emitRegion" | 1 -> emitRangeTest inverse minValue maxValue ranges[iBegin] | 2 -> emitTwoRangeTest inverse minValue maxValue ranges[iBegin] ranges[iBegin + 1] | _ -> // at least 3 ranges if not noBitVectorTests && density lengthCap ranges iBegin iEnd >= densityThreshold then emitBitVectorTest minValue maxValue iBegin iEnd else let i, pivotAroundRangeMax = findPivot ranges iBegin iEnd let label = ilg.DefineLabel() let r = ranges[i] loadVar ilg if pivotAroundRangeMax then loadI4 ilg r.Max ilg.Emit(OpCodes.Bgt, label) emitRegion minValue r.Max iBegin (i + 1) ilg.MarkLabel(label) emitRegion (r.Max + 1) maxValue (i + 1) iEnd else loadI4 ilg r.Min ilg.Emit(OpCodes.Blt, label) emitRegion r.Min maxValue i iEnd ilg.MarkLabel(label) emitRegion minValue (r.Min - 1) iBegin i if ranges.Length <> 0 then emitRegion minValue maxValue 0 ranges.Length ilg.MarkLabel(outOfRangeLabel) if inverse then ilg.Emit(OpCodes.Ldc_I4_1) else ilg.Emit(OpCodes.Ldc_I4_0) storeResult ilg ilg.MarkLabel(endLabel) let emitSwitch (ilg: ILGenerator) (loadVar: ILGenerator -> unit) (temps: TempLocals) lengthCap densityThreshold minValue maxValue (defaultLabel: Label) (ranges: Range[]) (labels: Label[]) = Debug.Assert(ranges.Length = labels.Length) checkLabelRangesAreValidSortedAndUnconnected ranges labels let emitJumpTable (* minValue maxValue *) iBegin iEnd = // We can't optimize the range check of the switch statement, // so we have no use for minValue and maxValue arguments. // (In LLVM we could use the 'unreachable' instruction for optimizing the range check.) Debug.Assert(iBegin + 2 <= iEnd) let first = ranges[iBegin].Min let off = first let length = let last = ranges[iEnd - 1].Max let lastMinusOff = last - off if uint32 lastMinusOff >= uint32 System.Int32.MaxValue then raise (System.ArgumentException("The ranges span width is too large.")) lastMinusOff + 1 // length <= Int32.MaxValue let jt = Array.zeroCreate length let mutable j = 0 for i = iBegin to iEnd - 1 do let r = ranges[i] let rMin, rMax = r.Min - off, r.Max - off while j < rMin do jt[j] <- defaultLabel j <- j + 1 let label = labels[i] while j <= rMax do jt[j] <- label j <- j + 1 loadVar ilg if off <> 0 then loadI4 ilg off ilg.Emit(OpCodes.Sub) ilg.Emit(OpCodes.Switch, jt) ilg.Emit(OpCodes.Br, defaultLabel) let emitBranchIfInRange2 label (defaultLabel: Label) minValue maxValue (range: Range) = if minValue < range.Min || range.Max < maxValue then loadVar ilg emitBranchIfInRange ilg label minValue maxValue range ilg.Emit(OpCodes.Br, defaultLabel) else ilg.Emit(OpCodes.Br, label) let emitBranchIfInRange label minValue maxValue (range: Range) = loadVar ilg emitBranchIfInRange ilg label minValue maxValue range let emitBranchIfOutOfRange label minValue maxValue (range: Range) = if minValue < range.Min || range.Max < maxValue then loadVar ilg emitBranchIfOutOfRange ilg label minValue maxValue range let rec emitRegion minValue maxValue iBegin iEnd = Debug.Assert(iBegin < iEnd && minValue <= ranges[iBegin].Min && ranges[iEnd - 1].Max <= maxValue) let pivotAroundRange i pivotAroundRangeMax = let label = ilg.DefineLabel() let r = ranges[i] loadVar ilg if pivotAroundRangeMax then loadI4 ilg r.Max ilg.Emit(OpCodes.Bgt, label) emitRegion minValue r.Max iBegin (i + 1) ilg.MarkLabel(label) emitRegion (r.Max + 1) maxValue (i + 1) iEnd else loadI4 ilg r.Min ilg.Emit(OpCodes.Blt, label) emitRegion r.Min maxValue i iEnd ilg.MarkLabel(label) emitRegion minValue (r.Min - 1) iBegin i match iEnd - iBegin with | 0 -> failwith "emitSwitch.emitRegion" | 1 -> emitBranchIfInRange2 labels[iBegin] defaultLabel minValue maxValue ranges[iBegin] | 2 -> let r1, r2 = ranges[iBegin], ranges[iBegin + 1] let l1, l2 = labels[iBegin], labels[iBegin + 1] if l1 = l2 then Debug.Assert(r1.Max + 1 < r2.Min) //emitBranchIfOutOfRange defaultLabel minValue maxValue (Range(r1.Min, r2.Max)) //emitBranchIfInRange2 defaultLabel l1 r1.Min r2.Max (Range(r1.Max + 1, r2.Min - 1)) emitTwoRangeTest ilg loadVar false minValue maxValue r1 r2 ilg.Emit(OpCodes.Brtrue, l1) ilg.Emit(OpCodes.Br, defaultLabel) else let rangesAreConnected = r1.Max + 1 = r2.Min let checkLeft, checkRight = minValue < r1.Min, r2.Max < maxValue if rangesAreConnected && ((checkLeft && checkRight) || (not checkLeft && not checkRight)) then emitBranchIfOutOfRange defaultLabel minValue maxValue (Range(r1.Min, r2.Max)) // If 64-bit .NET JIT can substitute both of the branches emitted below with // the code at the destination, it chooses to substitute the first. Hence, // we put the more likely case first (assuming that values are // uniformly distributed on {minValue ... maxValue}). // (The 32-bit .NET JIT (version 4) doesn't yet seem to seriously attempt // a code block reordering optimization.) if uint32 (r1.Max - r1.Min) >= uint32 (r2.Max - r2.Min) then emitBranchIfInRange2 l1 l2 r1.Min r2.Max r1 else emitBranchIfInRange2 l2 l1 r1.Min r2.Max r2 else if (if rangesAreConnected then checkRight (* not checkLeft *) else uint32 (r1.Max - r1.Min) >= uint32 (r2.Max - r2.Min)) then emitBranchIfInRange l1 minValue maxValue r1 let minRightValue = if checkLeft then minValue else r1.Max + 1 emitBranchIfInRange2 l2 defaultLabel minRightValue maxValue r2 else emitBranchIfInRange l2 minValue maxValue r2 let maxLeftValue = if checkRight then maxValue else r2.Min - 1 emitBranchIfInRange2 l1 defaultLabel minValue maxLeftValue r1 | _ -> // at least 3 ranges let allLabelsAreIdentical = let label = labels[iBegin] let mutable i = iBegin + 1 while i < iEnd && label.Equals(labels[i]) do i <- i + 1 i = iEnd if allLabelsAreIdentical then let bl = temps.GetBoolLocal() // emitSetMembershipTest doesn't use GetBoolLocal itself emitSetMembershipTest ilg loadVar (fun ilg -> ilg.Emit(OpCodes.Stloc, bl)) temps (lengthCap*8) (densityThreshold/32.) minValue maxValue false ranges[iBegin..(iEnd - 1)] ilg.Emit(OpCodes.Ldloc, bl) ilg.Emit(OpCodes.Brtrue, labels[iBegin]) ilg.Emit(OpCodes.Br, defaultLabel) elif density lengthCap ranges iBegin iEnd >= densityThreshold then emitJumpTable iBegin iEnd else let i, pivotAroundRangeMax = findPivot ranges iBegin iEnd pivotAroundRange i pivotAroundRangeMax if ranges.Length <> 0 then emitRegion minValue maxValue 0 ranges.Length else ilg.Emit(OpCodes.Br, defaultLabel) #endif ================================================ FILE: FParsec/Error.fs ================================================ // Copyright (c) Stephan Tolksdorf 2007-2011 // License: BSD-style. See accompanying documentation. [] module FParsec.Error //open FParsec open System.Diagnostics open System.Globalization open System.IO open FParsec.Internals // Unfortunately, F# currently doesn't support active patterns with more than 7 // cases, so we have to use partial patterns. type Expected = ErrorMessage.Expected type ExpectedString = ErrorMessage.ExpectedString type ExpectedStringCI = ErrorMessage.ExpectedCaseInsensitiveString type Unexpected = ErrorMessage.Unexpected type UnexpectedString = ErrorMessage.UnexpectedString type UnexpectedStringCI = ErrorMessage.UnexpectedCaseInsensitiveString type Message = ErrorMessage.Message type NestedError = ErrorMessage.NestedError type CompoundError = ErrorMessage.CompoundError type OtherErrorMessage = ErrorMessage.Other let (|Expected|_|) (msg: ErrorMessage) = if msg.Type = ErrorMessageType.Expected then Some msg.String else None let (|ExpectedString|_|) (msg: ErrorMessage) = if msg.Type = ErrorMessageType.ExpectedString then Some msg.String else None let (|ExpectedStringCI|_|) (msg: ErrorMessage) = if msg.Type = ErrorMessageType.ExpectedCaseInsensitiveString then Some msg.String else None let (|Unexpected|_|) (msg: ErrorMessage) = if msg.Type = ErrorMessageType.Unexpected then Some msg.String else None let (|UnexpectedString|_|) (msg: ErrorMessage) = if msg.Type = ErrorMessageType.UnexpectedString then Some msg.String else None let (|UnexpectedStringCI|_|) (msg: ErrorMessage) = if msg.Type = ErrorMessageType.UnexpectedCaseInsensitiveString then Some msg.String else None let (|Message|_|) (msg: ErrorMessage) = if msg.Type = ErrorMessageType.Message then Some msg.String else None let (|NestedError|_|) (msg: ErrorMessage) = if msg.Type = ErrorMessageType.NestedError then let ne = msg :?> ErrorMessage.NestedError Some((ne.Position, ne.UserState, ne.Messages)) else None let (|CompoundError|_|) (msg: ErrorMessage) = if msg.Type = ErrorMessageType.CompoundError then let ce = msg :?> ErrorMessage.CompoundError Some((ce.LabelOfCompound, ce.NestedErrorPosition, ce.NestedErrorUserState, ce.NestedErrorMessages)) else None let (|OtherErrorMessage|_|) (msg: ErrorMessage) = if msg.Type = ErrorMessageType.Other then let om = msg :?> ErrorMessage.Other Some om.Data else None [] let NoErrorMessages = null : ErrorMessageList let (|ErrorMessageList|NoErrorMessages|) (error: ErrorMessageList) = if isNotNull error then ErrorMessageList(error.Head, error.Tail) else NoErrorMessages let inline isSingleErrorMessageOfType (ty: ErrorMessageType) (error: ErrorMessageList) = isNotNull error && error.Head.Type = ty && isNull error.Tail let expected label = ErrorMessageList(ErrorMessage.Expected(label)) let expectedString str = ErrorMessageList(ErrorMessage.ExpectedString(str)) let expectedStringCI str = ErrorMessageList(ErrorMessage.ExpectedCaseInsensitiveString(str)) let unexpected label = ErrorMessageList(ErrorMessage.Unexpected(label)) let unexpectedString str = ErrorMessageList(ErrorMessage.UnexpectedString(str)) let unexpectedStringCI str = ErrorMessageList(ErrorMessage.UnexpectedCaseInsensitiveString(str)) let messageError msg = ErrorMessageList(ErrorMessage.Message(msg)) let otherError obj = ErrorMessageList(ErrorMessage.Other(obj : obj)) let nestedError (stream: CharStream<'u>) (error: ErrorMessageList) = (* // manually inlined: match error with | ErrorMessageList(NestedError _, NoErrorMessages) -> error | _ -> ErrorMessageList(NestedError(stream.Position, stream.UserState, error), NoErrorMessages) *) if error |> isSingleErrorMessageOfType ErrorMessageType.NestedError then error else ErrorMessageList(ErrorMessage.NestedError(stream.Position, stream.UserState, error)) let compoundError label (stream: CharStream<'u>) (error: ErrorMessageList) = // manually inlined: (* match error with | ErrorMessageList(NestedError(pos, ustate, msgs), NoErrorMessages) -> ErrorMessageList(CompoundError(label, pos, ustate, msgs), NoErrorMessages) | _ -> ErrorMessageList(CompoundError(label, stream.Position, stream.UserState, error), NoErrorMessages) *) if error |> isSingleErrorMessageOfType ErrorMessageType.NestedError then let ne = error.Head :?> ErrorMessage.NestedError ErrorMessageList(ErrorMessage.CompoundError(label, ne.Position, ne.UserState, ne.Messages)) else ErrorMessageList(ErrorMessage.CompoundError(label, stream.Position, stream.UserState, error)) let #if !NOINLINE inline #endif mergeErrors errorMessages1 errorMessages2 = ErrorMessageList.Merge(errorMessages1, errorMessages2) /// the default position printer let internal printPosition (tw: System.IO.TextWriter) (p: Position) (indent: string) (columnWidth: int) = tw.Write(indent) tw.WriteLine(Strings.ErrorPosition(p)) let internal printErrorPosition (tabSize: int) (lw: LineWrapper) (stream: CharStream<'u>) (p: Position) = /// writes the string with all whitespace chars replaced with ' ' let writeStringWithSimplifiedWhitespace (tw: TextWriter) (s: string) = let mutable i0 = 0 for i = 0 to s.Length - 1 do let c = s[i] if Text.IsWhitespace(c) then if i0 < i then tw.Write(s.Substring(i0, i - i0)) tw.Write(' ') i0 <- i + 1 if i0 < s.Length then if i0 = 0 then tw.Write(s) else tw.Write(s.Substring(i0, s.Length - i0)) let sn = getLineSnippet stream p (lw.ColumnWidth - lw.Indentation.Length) tabSize lw.WriterIsMultiCharGraphemeSafe let str = sn.String lw.PrintLine(Strings.ErrorPosition(p, sn.UnaccountedNewlines, sn.Column, sn.Utf16Column)) let msgs = ResizeArray<_>() if sn.LineContainsTabsBeforeIndex then let mutable msg = Strings.ColumnCountAssumesTabStopDistanceOfNChars(tabSize) if sn.Column = sn.Utf16Column then msg <- msg + Strings.Utf16ColumnCountOnlyCountsEachTabAs1Char msgs.Add(msg) if str.Length > 0 then let tw = lw.TextWriter tw.Write(lw.Indentation) writeStringWithSimplifiedWhitespace tw str tw.WriteLine() tw.Write(lw.Indentation) if sn.TextElementIndex > 0 then tw.Write(new string(' ', sn.TextElementIndex)) tw.Write('^') let d = sn.Index - sn.TextElementIndex if d <> 0 && not lw.WriterIsMultiCharGraphemeSafe then if d > 1 then tw.Write(new string('-', d - 1)) tw.Write('^') msgs.Add(Strings.ExactPositionBetweenCaretsDependsOnDisplayUnicodeCapabilities) tw.WriteLine() if sn.Index < str.Length then let i = sn.Index let c = str[i] if System.Char.IsSurrogate(c) then if System.Char.IsHighSurrogate(c) then if i + 1 < str.Length && System.Char.IsLowSurrogate(str[i + 1]) then msgs.Add(Strings.ErrorOccurredAtBeginningOfSurrogatePair(str.Substring(i, 2))) else msgs.Add(Strings.CharAtErrorPositionIsIsolatedHighSurrogate(c)) else // low surrogate if i > 0 && System.Char.IsHighSurrogate(str[i - 1]) then msgs.Add(Strings.ErrorOccurredAtSecondCharInSurrogatePair(str.Substring(i - 1, 2))) else msgs.Add(Strings.CharAtErrorPositionIsIsolatedLowSurrogate(c)) elif i > 0 then let c1 = str[i - 1] if System.Char.IsHighSurrogate(c1) then msgs.Add(Strings.CharBeforeErrorPositionIsIsolatedHighSurrogate(c1)) elif System.Char.IsLowSurrogate(c1) then msgs.Add(Strings.CharBeforeErrorPositionIsIsolatedLowSurrogate(c1)) else if p.Index = stream.IndexOfLastCharPlus1 then msgs.Add(Strings.ErrorOccurredAtEndOfInputStream) elif str.Length = 0 then msgs.Add(Strings.ErrorOccurredOnAnEmptyLine) else msgs.Add(Strings.ErrorOccurredAtEndOfLine) if sn.LengthOfTextElement > 1 && (sn.LengthOfTextElement > 2 || not (System.Char.IsSurrogate(str[sn.Index]))) then let n = sn.Index - sn.IndexOfTextElement + 1 let te = str.Substring(sn.IndexOfTextElement, sn.LengthOfTextElement) msgs.Add(Strings.ErrorOccurredAtNthCharInCombiningCharacterSequence(n, te)) elif sn.IsBetweenCRAndLF then msgs.Add(Strings.ErrorOccurredAtSecondCharInNewline) if sn.UnaccountedNewlines > 0 then let n = sn.UnaccountedNewlines msgs.Add(Strings.InputContainsAtLeastNUnaccountedNewlines(n)) if msgs.Count = 1 then lw.PrintLine(Strings.Note, msgs[0]) elif msgs.Count > 1 then let ind = lw.Indentation let ind2 = ind + " " lw.PrintLine(Strings.Note) for msg in msgs do lw.Print("* ") lw.Indentation <- ind2 lw.PrintLine(msg) lw.Indentation <- ind [] type ParserError(position: Position, userState: obj, messages: ErrorMessageList) = do if isNull position then nullArg "pos" let defaultColumnWidth = 79 let defaultIndentation = "" let defaultIndentationIncrement = " " let defaultTabSize = 8 member t.Position = position member t.UserState = userState member T.Messages = messages override t.ToString() = use sw = new System.IO.StringWriter() t.WriteTo(sw) sw.ToString() member t.ToString(streamWhereErrorOccurred: CharStream<'u>) = use sw = new System.IO.StringWriter() t.WriteTo(sw, streamWhereErrorOccurred) sw.ToString() member t.WriteTo(textWriter: System.IO.TextWriter, ?positionPrinter: (System.IO.TextWriter -> Position -> string -> int -> unit), ?columnWidth: int, ?initialIndentation: string, ?indentationIncrement: string) = let positionPrinter = defaultArg positionPrinter printPosition let columnWidth = defaultArg columnWidth defaultColumnWidth let ind = defaultArg initialIndentation defaultIndentation let indIncrement = defaultArg indentationIncrement defaultIndentationIncrement let lw = new LineWrapper(textWriter, columnWidth, Indentation = ind) t.WriteTo(lw, positionPrinter, indIncrement) member t.WriteTo(textWriter: System.IO.TextWriter, streamWhereErrorOccurred: CharStream<'u>, ?tabSize: int, ?columnWidth: int, ?initialIndentation: string, ?indentationIncrement: string) = let originalStreamName = t.Position.StreamName let getStream = fun (pos: Position) -> if pos.StreamName = originalStreamName then streamWhereErrorOccurred else null t.WriteTo(textWriter, getStream, ?tabSize = tabSize, ?columnWidth = columnWidth, ?initialIndentation = initialIndentation, ?indentationIncrement = indentationIncrement) member t.WriteTo(textWriter: System.IO.TextWriter, getStream: (Position -> CharStream<'u>), ?tabSize: int, ?columnWidth: int, ?initialIndentation: string, ?indentationIncrement: string) = let columnWidth = defaultArg columnWidth defaultColumnWidth let ind = defaultArg initialIndentation defaultIndentation let indIncrement = defaultArg indentationIncrement defaultIndentationIncrement let tabSize = defaultArg tabSize defaultTabSize let lw = new LineWrapper(textWriter, columnWidth, Indentation = ind) let positionPrinter = fun tw position indent columnWidth -> let stream = getStream position if isNotNull stream then printErrorPosition tabSize lw stream position else printPosition lw.TextWriter position indent columnWidth t.WriteTo(lw, positionPrinter, indIncrement) member private t.WriteTo(lw: LineWrapper, positionPrinter: System.IO.TextWriter -> Position -> string -> int -> unit, indentationIncrement: string) = let rec printMessages (position: Position) (msgs: ErrorMessageList) = positionPrinter lw.TextWriter position lw.Indentation lw.ColumnWidth let nra() = new ResizeArray<_>() let expectedA, unexpectedA, messageA, nestedA, compoundA = nra(), nra(), nra(), nra(), nra() let mutable otherCount = 0 for msg in ErrorMessageList.ToSortedArray(msgs) do match msg.Type with | ErrorMessageType.Expected -> expectedA.Add(msg.String) | ErrorMessageType.ExpectedString -> expectedA.Add(Strings.Quote(msg.String)) | ErrorMessageType.ExpectedCaseInsensitiveString -> expectedA.Add(Strings.QuoteCaseInsensitive(msg.String)) | ErrorMessageType.Unexpected -> unexpectedA.Add(msg.String) | ErrorMessageType.UnexpectedString -> unexpectedA.Add(Strings.Quote(msg.String)) | ErrorMessageType.UnexpectedCaseInsensitiveString -> unexpectedA.Add(Strings.QuoteCaseInsensitive(msg.String)) | ErrorMessageType.Message -> messageA.Add(msg.String) | ErrorMessageType.NestedError -> let ne = msg :?> ErrorMessage.NestedError nestedA.Add((ne.Position, ne.Messages)) | ErrorMessageType.CompoundError -> if not (isNullOrEmpty msg.String) then expectedA.Add(msg.String) let ce = msg :?> ErrorMessage.CompoundError compoundA.Add((ce.String, ce.NestedErrorPosition, ce.NestedErrorMessages)) | ErrorMessageType.Other -> otherCount <- otherCount + 1 | _ -> failwith "printMessages" let printArray title (a: ResizeArray) (sep: string) = lw.Print(title, " ") let n = a.Count for i = 0 to n - 3 do lw.Print(a[i], ", ") if n > 1 then lw.Print(a[n - 2], sep) if n > 0 then lw.Print(a[n - 1]) lw.Newline() if expectedA.Count > 0 then printArray Strings.Expecting expectedA Strings.Or if unexpectedA.Count > 0 then printArray Strings.Unexpected unexpectedA Strings.And let ind = lw.Indentation let indInd = ind + indentationIncrement if messageA.Count > 0 then if expectedA.Count > 0 || unexpectedA.Count > 0 then lw.PrintLine(Strings.OtherErrors) lw.Indentation <- indInd for m in messageA do lw.PrintLine(m) if expectedA.Count > 0 || unexpectedA.Count > 0 then lw.Indentation <- ind for label, pos2, msgs2 in compoundA do lw.Newline() lw.PrintLine(Strings.CompoundCouldNotBeParsedBecause(label)) lw.Indentation <- indInd printMessages pos2 msgs2 lw.Indentation <- ind for pos2, msgs2 in nestedA do lw.Newline() lw.PrintLine(Strings.ParserBacktrackedAfter) lw.Indentation <- indInd printMessages pos2 msgs2 lw.Indentation <- ind if expectedA.Count = 0 && unexpectedA.Count = 0 && messageA.Count = 0 && compoundA.Count = 0 && nestedA.Count = 0 then lw.PrintLine(Strings.UnknownErrors) printMessages position messages override t.Equals(value: obj) = referenceEquals (t :> obj) value || match value with | null -> false | :? ParserError as other -> t.Position = other.Position && t.Messages = other.Messages && t.UserState = other.UserState | _ -> false override t.GetHashCode() = t.Position.GetHashCode() ^^^ hash t.Messages let inline internal raiseInfiniteLoopException name stream = raise (FParsec.Internal.ParserCombinatorInInfiniteLoopHelper.CreateException(name, stream)) ================================================ FILE: FParsec/Error.fsi ================================================ // Copyright (c) Stephan Tolksdorf 2007-2011 // License: Simplified BSD License. See accompanying documentation. [] module FParsec.Error type Expected = ErrorMessage.Expected type ExpectedString = ErrorMessage.ExpectedString type ExpectedStringCI = ErrorMessage.ExpectedCaseInsensitiveString type Unexpected = ErrorMessage.Unexpected type UnexpectedString = ErrorMessage.UnexpectedString type UnexpectedStringCI = ErrorMessage.UnexpectedCaseInsensitiveString type Message = ErrorMessage.Message type NestedError = ErrorMessage.NestedError type CompoundError = ErrorMessage.CompoundError type OtherErrorMessage = ErrorMessage.Other val (|Expected|_|): ErrorMessage -> string option val (|ExpectedString|_|): ErrorMessage -> string option val (|ExpectedStringCI|_|): ErrorMessage -> string option val (|Unexpected|_|): ErrorMessage -> string option val (|UnexpectedString|_|): ErrorMessage -> string option val (|UnexpectedStringCI|_|): ErrorMessage -> string option val (|Message|_|): ErrorMessage -> string option val (|NestedError|_|): ErrorMessage -> (Position * obj * ErrorMessageList) option val (|CompoundError|_|): ErrorMessage -> (string * Position * obj * ErrorMessageList) option val (|OtherErrorMessage|_|): ErrorMessage -> obj option [] val NoErrorMessages: ErrorMessageList = null;; val (|ErrorMessageList|NoErrorMessages|): ErrorMessageList -> Choice val inline isSingleErrorMessageOfType: ErrorMessageType -> ErrorMessageList -> bool /// `expectedError label` creates an `ErrorMessageList` with a single `Expected label` message. val expected: string -> ErrorMessageList /// `expectedStringError str` creates an `ErrorMessageList` with a single `ExpectedString str` message. val expectedString: string -> ErrorMessageList /// `expectedStringCIError str` creates an `ErrorMessageList` with a single `ExpectedStringCI str` message. val expectedStringCI: string -> ErrorMessageList /// `unexpectedError label` creates an `ErrorMessageList` with a single `Unexpected label` message. val unexpected: string -> ErrorMessageList /// `unexpectedStringError str` creates an `ErrorMessageList` with a single `UnexpectedString str` message. val unexpectedString: string -> ErrorMessageList /// `unexpectedStringCIError str` creates an `ErrorMessageList` with a single `UnexpectedStringCI str` message. val unexpectedStringCI: string -> ErrorMessageList /// `messageError msg` creates an `ErrorMessageList` with a single `Message msg` message. val messageError: string -> ErrorMessageList /// `otherError o` creates an `ErrorMessageList` with a single `OtherError o` message. val otherError: obj -> ErrorMessageList /// `backtrackError stream msgs` creates an `ErrorMessageList` with a single `BacktrackPoint stream.Position msgs` message, /// except if `msgs` is already an `ErrorMessageList` with a single `BacktrackPoint(_, _)` message, /// in which case `msgs` is returned instead. val nestedError: CharStream<'u> -> ErrorMessageList -> ErrorMessageList /// `compoundError label state msgs` creates an `ErrorMessageList` with a single `CompoundError label stream.Position msgs` message, /// except if `msgs` is an `ErrorMessageList` with a single `BacktrackPoint(pos2, msgs2)` message, /// in which case an `ErrorMessageList` with a single `CompoundError label pos2 msgs2` message is returned instead. val compoundError: string -> CharStream<'u> -> ErrorMessageList -> ErrorMessageList /// `mergeErrors error1 error2` is equivalent to `ErrorMessageList.Merge(error1, error2)`. val #if !NOINLINE inline #endif mergeErrors: ErrorMessageList -> ErrorMessageList -> ErrorMessageList /// Represents a simple container type that brings together the position, user state and error messages of a parser error. [] type ParserError = new: Position * userState:obj * ErrorMessageList -> ParserError member Position: Position member UserState: obj member Messages: ErrorMessageList /// Returns a string representation of the `ParserError`. override ToString: unit -> string /// Returns a string representation of the `ParserError`. /// /// The given `CharStream` must contain the content of the original `CharStream` /// for which this `ParserError` was generated (at the original indices). /// /// For each error location the printed position information is augmented /// with the line of text surrounding the error position, together with a '^'-marker /// pointing to the exact location of the error in the input stream. member ToString: streamWhereErrorOccurred: CharStream<'u> -> string /// Writes a string representation of the `ParserError` to the given `TextWriter` value. /// /// The given `CharStream` must contain the content of the original `CharStream` /// for which this `ParserError` was generated (at the original indices). /// /// For each error location the printed position information is augmented /// with the line of text surrounding the error position, together with a '^'-marker /// pointing to the exact location of the error in the input stream. member WriteTo: textWriter: System.IO.TextWriter * streamWhereErrorOccurred: CharStream<'u> * ?tabSize: int * ?columnWidth: int * ?initialIndentation: string * ?indentationIncrement: string -> unit /// Writes a string representation of the `ParserError` to the given `TextWriter` value. /// /// For each error position `getStreamByName` is called with the `StreamName` of the `Position`. /// The returned `CharStream` must be `null` or contain the content of the `CharStream` for which /// the error was generated (at the original indices). /// /// If `getStreamByName` returns a non-null `CharStream`, the printed error position information is /// augmented with the line of text surrounding the error position, together with a '^'-marker /// pointing to the exact location of the error in the input stream. member WriteTo: textWriter: System.IO.TextWriter * getStream: (Position -> CharStream<'u>) * ?tabSize: int * ?columnWidth: int * ?initialIndentation: string * ?indentationIncrement: string -> unit /// Writes a string representation of the `ParserError` to the given `TextWriter` value. /// /// The format of the position information can be customized by specifying the `positionPrinter` /// argument. The given function is expected to print a representation of the passed `Position` value /// to the passed `TextWriter` value. If possible, it should indent text lines with the passed string /// and take into account the maximum column count (including indentation) passed as the last argument. member WriteTo: textWriter: System.IO.TextWriter * ?positionPrinter: (System.IO.TextWriter -> Position -> string -> int -> unit) * ?columnWidth: int * ?initialIndentation: string * ?indentationIncrement: string -> unit override Equals: obj -> bool override GetHashCode: unit -> int val inline internal raiseInfiniteLoopException: string -> CharStream -> 'a ================================================ FILE: FParsec/FParsec-LowTrust.fsproj ================================================  netstandard2.0;netstandard2.1 ================================================ FILE: FParsec/FParsec.fsproj ================================================ net6.0 ================================================ FILE: FParsec/FParsec.targets ================================================ FParsec FParsec true false /nooptimizationdata FParsec is a parser combinator library for F#. $(PackageTags);parser;combinator;f#;fsharp;c#;csharp;parsec;fparsec $(Summary) You can find comprehensive documentation for FParsec at http://www.quanttec.com/fparsec. The documentation includes a feature list, a tutorial, a user’s guide and an API reference. $(Description) This package uses the basic “low-trust” configuration of FParsec, which does not use any unverifiable code and is optimized for maximum portability. If you need to parse very large files or if you employ FParsec for performance-critical jobs, consider using the alternate “Big Data Edition” NuGet package (see https://nuget.org/packages/fparsec-big-data-edition). $(Description) This package uses a configuration of FParsec that supports very large input streams and is optimized for maximum performance in longer running processes. See http://www.quanttec.com/fparsec/download-and-installation.html for more information. This version of FParsec is currently not compatible with .NET Core. If you want to use .NET Core, please choose the other FParsec NuGet package instead (see https://nuget.org/packages/fparsec). FParsec-Big-Data-Edition $(DefineConstants);USE_STATIC_MAPPING_FOR_IS_ANY_OF $(TargetsForTfmSpecificBuildOutput);IncludeFParsecCSOutput true ================================================ FILE: FParsec/Internals.fs ================================================ // Copyright (c) Stephan Tolksdorf 2009-2011 // License: Simplified BSD License. See accompanying documentation. [] module FParsec.Internals open System.Diagnostics let inline referenceEquals<'a when 'a : not struct> (x: 'a) (y: 'a) = obj.ReferenceEquals(x, y) let inline isNull<'a when 'a : not struct> (x: 'a) = referenceEquals (box x) null let inline isNotNull<'a when 'a : not struct> (x: 'a) = not (isNull x) let inline isNullOrEmpty (s: string) = System.String.IsNullOrEmpty(s) // the F# compiler doesn't yet "fuse" multiple '+' string concatenations into one, as the C# compiler does let inline concat3 (a: string) (b: string) (c: string) = System.String.Concat(a, b, c) let inline concat4 (a: string) (b: string) (c: string) (d: string) = System.String.Concat(a, b, c, d) let inline concat5 (a: string) (b: string) (c: string) (d: string) (e: string) = System.String.Concat([|a;b;c;d;e|]) let inline concat6 (a: string) (b: string) (c: string) (d: string) (e: string) (f: string) = System.String.Concat([|a;b;c;d;e;f|]) let inline concat7 (a: string) (b: string) (c: string) (d: string) (e: string) (f: string) (g: string) = System.String.Concat([|a;b;c;d;e;f;g|]) let findNewlineOrEOSChar = Text.FindNewlineOrEOSChar let getSortedUniqueValues (s: seq<_>) = let a = Array.ofSeq s if a.Length = 0 then a else Array.sortInPlace a let mutable previous = a[0] let mutable n = 1 for i = 1 to a.Length - 1 do let c = a[i] if c <> previous then n <- n + 1 previous <- c if n = a.Length then a else let b = Array.zeroCreate n let mutable i = 0 for j = 0 to b.Length - 1 do let c = a[i] b[j] <- c i <- i + 1 while i < a.Length && a[i] = c do i <- i + 1 b /// A primitive pretty printer. type LineWrapper(tw: System.IO.TextWriter, columnWidth: int, writerIsMultiCharGraphemeSafe: bool) = do if columnWidth < 1 then invalidArg "columnWidth" "columnWidth must be positive." let mutable indentation = "" let mutable maxSpace = columnWidth let mutable space = columnWidth let mutable afterNewline = true let mutable afterSpace = false new (tw: System.IO.TextWriter, columnWidth: int) = LineWrapper(tw, columnWidth, not tw.Encoding.IsSingleByte) member t.TextWriter = tw member t.ColumnWidth = columnWidth member t.WriterIsMultiCharGraphemeSafe = writerIsMultiCharGraphemeSafe member t.Indentation with get() = indentation and set (s: string) = let s = if s.Length <= columnWidth - 1 then s else s.Substring(0, columnWidth - 1) // guarantee maxSpace >= 1 indentation <- s maxSpace <- columnWidth - s.Length if afterNewline then space <- maxSpace member t.Newline() = tw.WriteLine() afterNewline <- true afterSpace <- false space <- maxSpace member t.Space() = afterSpace <- true member t.Print(s: string) = if isNotNull s then let mutable start = 0 for i = 0 to s.Length - 1 do let c = s[i] if (if c <= ' ' then c = ' ' || (c >= '\t' && c <= '\r') else c >= '\u0085' && (c = '\u0085' || c = '\u2028' || c = '\u2029')) then // any ' ', tab or newlines if start < i then t.Write(s.Substring(start, i - start)) t.Space() start <- i + 1 if start < s.Length then if start = 0 then t.Write(s) else t.Write(s.Substring(start, s.Length - start)) member t.Print(s1, s2) = t.Print(s1); t.Print(s2) member t.Print(s1, s2, s3) = t.Print(s1); t.Print(s2); t.Print(s3) member t.PrintLine(s: string) = t.Print(s); t.Newline() member t.PrintLine(s1: string, s2: string) = t.Print(s1); t.Print(s2); t.Newline() member t.PrintLine(s1: string, s2: string, s3: string) = t.Print(s1); t.Print(s2); t.Print(s3); t.Newline() member private t.Write(s: string) = Debug.Assert(s.Length > 0) if afterNewline then tw.Write(indentation) afterNewline <- false let n = if writerIsMultiCharGraphemeSafe then Text.CountTextElements(s) else s.Length match afterSpace with | true when n + 1 <= space -> tw.Write(' ') tw.Write(s) space <- space - 1 - n afterSpace <- false | false when n <= space -> tw.Write(s) space <- space - n | _ when s.Length <= maxSpace -> tw.WriteLine() tw.Write(indentation) tw.Write(s) space <- maxSpace - n afterSpace <- false | _ -> t.Break(s) /// breaks a string into multiple lines along text element boundaries. member private t.Break(s: string) = Debug.Assert(s.Length > 0 && not afterNewline) if afterSpace then afterSpace <- false if space > 1 then tw.Write(' ') space <- space - 1 else tw.WriteLine() tw.Write(indentation) space <- maxSpace elif space = 0 then tw.WriteLine() tw.Write(indentation) space <- maxSpace let te = System.Globalization.StringInfo.GetTextElementEnumerator(s) te.MoveNext() |> ignore Debug.Assert(te.ElementIndex = 0) if writerIsMultiCharGraphemeSafe then let mutable startIndex = 0 while te.MoveNext() do space <- space - 1 if space = 0 then let index = te.ElementIndex tw.WriteLine(s.Substring(startIndex, index - startIndex)) tw.Write(indentation) space <- maxSpace startIndex <- index space <- space - 1 tw.Write(s.Substring(startIndex, s.Length - startIndex)) else // We don't break up text elements, but when we fit string pieces into lines we // use UTF-16 lengths instead of text element counts (in order to support displays // that have problems with combining character sequences). let mutable startIndex = 0 let mutable lastIndex = 0 while te.MoveNext() do let index = te.ElementIndex let count = index - startIndex if count < space then lastIndex <- index elif count = space || lastIndex <= startIndex then tw.WriteLine(s.Substring(startIndex, count)) tw.Write(indentation) space <- maxSpace startIndex <- index else tw.WriteLine(s.Substring(startIndex, lastIndex - startIndex)) tw.Write(indentation) space <- maxSpace startIndex <- lastIndex let index = s.Length let count = index - startIndex if count <= space then tw.Write(s.Substring(startIndex, count)) space <- space - count elif lastIndex <= startIndex then tw.WriteLine(s.Substring(startIndex, index - startIndex)) space <- maxSpace afterNewline <- true else tw.WriteLine(s.Substring(startIndex, lastIndex - startIndex)) tw.Write(indentation) tw.Write(s.Substring(lastIndex, index - lastIndex)) space <- maxSpace - (index - lastIndex) if space < 0 then tw.WriteLine() space <- maxSpace afterNewline <- true type LineSnippet = { String: string TextElementIndex: int Index: int IndexOfTextElement: int LengthOfTextElement: int UnaccountedNewlines: int Column: int64 Utf16Column: int64 // the UTF16 tabs are only counted as 1 char LineContainsTabsBeforeIndex: bool IsBetweenCRAndLF: bool } let getLineSnippet (stream: CharStream<'u>) (p: Position) (space: int) (tabSize: int) multiCharGraphemeSafe = Debug.Assert(space > 0 && tabSize > 0) Debug.Assert(p.Index >= stream.IndexOfFirstChar && p.Index <= stream.IndexOfLastCharPlus1) let isCombiningChar (s: string) = match System.Globalization.CharUnicodeInfo.GetUnicodeCategory(s, 0) with | System.Globalization.UnicodeCategory.NonSpacingMark | System.Globalization.UnicodeCategory.SpacingCombiningMark | System.Globalization.UnicodeCategory.EnclosingMark | System.Globalization.UnicodeCategory.Surrogate -> true | _ -> false let isUnicodeNewlineOrEos c = match c with | '\n' | '\r'| '\u0085'| '\u2028'| '\u2029' | '\uffff' -> true | _ -> false // we restrict the maximum column count, so that we don't accidentally // completely reread a multi-gigabyte file when it has no newlines let maxColForColCount = 1000 let maxExtraChars = 32 let colTooLarge = p.Column > int64 maxColForColCount let oldState = stream.State let mutable index = p.Index stream.Seek(index) // throws if index is too small if index <> stream.Index then raise (System.ArgumentException("The error position lies beyond the end of the stream.")) let isBetweenCRAndLF = stream.Peek() = '\n' && stream.Peek(-1) = '\r' if isBetweenCRAndLF then stream.Skip(-1) index <- index - 1L else let mutable c = stream.Peek() let mutable n = 2*space + maxExtraChars // skip to end of line, but not over more than n chars while not (isUnicodeNewlineOrEos c) && n <> 0 do c <- stream.SkipAndPeek() n <- n - 1 if not (isUnicodeNewlineOrEos c) then n <- maxExtraChars while isCombiningChar (stream.PeekString(2)) && n <> 0 do stream.Skip() |> ignore n <- n - 1 let endIndexToken = stream.IndexToken stream.Seek(index) let lineBegin = index - p.Column + 1L // use SkipAndPeek instead of Skip, so that we can't move past the beginning of the stream stream.SkipAndPeek(if not colTooLarge then -(int32 p.Column - 1) else -(maxColForColCount - 1)) |> ignore if colTooLarge then let mutable n = if p.Column > int64 System.Int32.MaxValue then maxExtraChars else min maxExtraChars (int32 p.Column - maxColForColCount) while isCombiningChar (stream.PeekString(2)) && n <> 0 do stream.SkipAndPeek(-1) |> ignore n <- n - 1 let mutable beginIndex = stream.Index let mutable columnOffset = beginIndex - lineBegin let mutable idx = int (index - beginIndex) let beginIndexToken = stream.IndexToken stream.Seek(endIndexToken) let mutable str = stream.ReadFrom(beginIndexToken) // we're done with the stream now stream.BacktrackTo(oldState) let mutable lastLineBeginIdx = 0 let mutable unaccountedNLs = 0 let mutable mayContainMultiCharGraphemes = false let mutable nTabs = 0 for i = 0 to str.Length - 1 do let c = str[i] if c >= ' ' then if c >= '\u0300' then mayContainMultiCharGraphemes <- true elif c = '\t' then nTabs <- nTabs + 1 elif c = '\n' || (c = '\r' && (i + 1 >= str.Length || str[i + 1] <> '\n')) then // there can be no newline after idx lastLineBeginIdx <- i + 1 unaccountedNLs <- unaccountedNLs + 1 mayContainMultiCharGraphemes <- false nTabs <- 0 if unaccountedNLs <> 0 then str <- str.Substring(lastLineBeginIdx) idx <- idx - lastLineBeginIdx columnOffset <- 0L let utf16Column = columnOffset + int64 (idx + 1) let mutable lineContainsTabsBeforeIndex = false if nTabs > 0 then // replace tabs with spaces let mutable off = if columnOffset = 0L then 0 else int32 (columnOffset%(int64 tabSize)) let sb = new System.Text.StringBuilder(str.Length + nTabs*tabSize) let mutable i0 = 0 let mutable idxIncr = 0 for i = 0 to str.Length - 1 do if str[i] = '\t' then if i > i0 then sb.Append(str, i0, i - i0) |> ignore let n = tabSize - (off + i)%tabSize sb.Append(' ', n) |> ignore off <- off + (n - 1) if i < idx then lineContainsTabsBeforeIndex <- true idxIncr <- idxIncr + (n - 1) i0 <- i + 1 if i0 < str.Length then sb.Append(str, i0, str.Length - i0) |> ignore str <- sb.ToString() idx <- idx + idxIncr let clip nBefore nAfter = let mutable nBefore, nAfter = nBefore, nAfter let mutable diff = nBefore + nAfter + 1 - space if diff > 0 then let d = nBefore - nAfter if d > 0 then let dd = min diff d nBefore <- nBefore - dd diff <- diff - dd elif d < 0 then let dd = min diff -d nAfter <- nAfter - dd diff <- diff - dd if diff <> 0 then if diff%2 = 0 then nBefore <- nBefore - diff/2 nAfter <- nAfter - diff/2 else nBefore <- nBefore - diff/2 nAfter <- nAfter - diff/2 - 1 nBefore, nAfter if not mayContainMultiCharGraphemes then let nBefore, nAfter = clip idx (if idx < str.Length then str.Length - idx - 1 else 0) {String = str.Substring(idx - nBefore, nBefore + nAfter + (if idx < str.Length then 1 else 0)) Index = nBefore TextElementIndex = nBefore IndexOfTextElement = nBefore LengthOfTextElement = 1 UnaccountedNewlines = unaccountedNLs Column = columnOffset + int64 (idx + 1) Utf16Column = utf16Column LineContainsTabsBeforeIndex = lineContainsTabsBeforeIndex IsBetweenCRAndLF = isBetweenCRAndLF} else let indices = System.Globalization.StringInfo.ParseCombiningCharacters(str) let mutable idxIdx = 0 // the indices index of the text element containing the str char at idx while idxIdx < indices.Length && indices[idxIdx] < idx do idxIdx <- idxIdx + 1 if (if idxIdx < indices.Length then indices[idxIdx] > idx else idx < str.Length) then idxIdx <- idxIdx - 1 let col = columnOffset + int64 (idxIdx + 1) let teIdx = if idxIdx < indices.Length then indices[idxIdx] else str.Length let teLength = (if idxIdx + 1 < indices.Length then indices[idxIdx + 1] else str.Length) - teIdx let mutable nBefore, nAfter = clip idxIdx (if idxIdx = indices.Length then 0 else indices.Length - idxIdx - 1) let mutable strBegin = let ii = idxIdx - nBefore in if ii < indices.Length then indices[ii] else str.Length let mutable strEnd = let ii = idxIdx + nAfter + 1 in if ii < indices.Length then indices[ii] else str.Length if not multiCharGraphemeSafe then while strEnd - strBegin > space && (nBefore > 0 || nAfter > 0) do if nBefore > nAfter then nBefore <- nBefore - 1 strBegin <- indices[idxIdx - nBefore] else nAfter <- nAfter - 1 strEnd <- indices[idxIdx + nAfter + 1] {String = str.Substring(strBegin, strEnd - strBegin) Index = idx - strBegin TextElementIndex = nBefore IndexOfTextElement = teIdx - strBegin LengthOfTextElement = teLength UnaccountedNewlines = unaccountedNLs Column = col Utf16Column = utf16Column LineContainsTabsBeforeIndex = lineContainsTabsBeforeIndex IsBetweenCRAndLF = isBetweenCRAndLF} ================================================ FILE: FParsec/Primitives.fs ================================================ // Copyright (c) Stephan Tolksdorf 2007-2011 // License: Simplified BSD License. See accompanying documentation. [] module FParsec.Primitives open FParsec.Internals open FParsec.Error [] let Ok = ReplyStatus.Ok [] let Error = ReplyStatus.Error [] let FatalError = ReplyStatus.FatalError type Parser<'a, 'u> = CharStream<'u> -> Reply<'a> // The `PrimitiveTests.Reference` module contains simple (but inefficient) // reference implementations of most of the functions below. // ================================= // Parser primitives and combinators // ================================= let preturn x : Parser<_,_> = fun stream -> Reply(x) let pzero : Parser<_,_> = fun stream -> Reply() // --------------------------- // Chaining and piping parsers // --------------------------- let (>>=) (p: Parser<'a,'u>) (f: 'a -> Parser<'b,'u>) = match box f with // optimization for uncurried functions | :? OptimizedClosures.FSharpFunc<'a, CharStream<'u>, Reply<'b>> as optF -> fun stream -> let reply1 = p stream if reply1.Status = Ok then if isNull reply1.Error then // in separate branch because the JIT can produce better code for a tail call optF.Invoke(reply1.Result, stream) else let stateTag1 = stream.StateTag let mutable reply2 = optF.Invoke(reply1.Result, stream) if stateTag1 = stream.StateTag then reply2.Error <- mergeErrors reply2.Error reply1.Error reply2 else Reply(reply1.Status, reply1.Error) | _ -> fun stream -> let reply1 = p stream if reply1.Status = Ok then let p2 = f reply1.Result if isNull reply1.Error then // in separate branch because the JIT can produce better code for a tail call p2 stream else let stateTag1 = stream.StateTag let mutable reply2 = p2 stream if stateTag1 = stream.StateTag then reply2.Error <- mergeErrors reply2.Error reply1.Error reply2 else Reply(reply1.Status, reply1.Error) let (>>%) (p: Parser<'a,'u>) x = fun stream -> let reply = p stream Reply(reply.Status, x, reply.Error) let (>>.) (p: Parser<'a,'u>) (q: Parser<'b,'u>) = fun stream -> let mutable reply1 = p stream if reply1.Status = Ok then if isNull reply1.Error then // in separate branch because the JIT can produce better code for a tail call q stream else let stateTag1 = stream.StateTag let mutable reply2 = q stream if stateTag1 = stream.StateTag then reply2.Error <- mergeErrors reply2.Error reply1.Error reply2 else Reply(reply1.Status, reply1.Error) let (.>>) (p: Parser<'a,'u>) (q: Parser<'b,'u>) = fun stream -> let mutable reply1 = p stream if reply1.Status = Ok then let stateTag1 = stream.StateTag let reply2 = q stream let error = if isNull reply1.Error then reply2.Error elif stateTag1 <> stream.StateTag then reply2.Error else mergeErrors reply2.Error reply1.Error reply1.Error <- error reply1.Status <- reply2.Status reply1 let (.>>.) (p: Parser<'a,'u>) (q: Parser<'b,'u>) = fun stream -> let reply1 = p stream if reply1.Status = Ok then let stateTag1 = stream.StateTag let reply2 = q stream let error = if stateTag1 <> stream.StateTag then reply2.Error else mergeErrors reply1.Error reply2.Error let result = if reply2.Status = Ok then (reply1.Result, reply2.Result) else Unchecked.defaultof<_> Reply(reply2.Status, result, error) else Reply(reply1.Status, reply1.Error) let between (popen: Parser<_,'u>) (pclose: Parser<_,'u>) (p: Parser<_,'u>) = fun stream -> let reply1 = popen stream if reply1.Status = Ok then let stateTag1 = stream.StateTag let mutable reply2 = p stream if reply2.Status = Ok then let stateTag2 = stream.StateTag let reply3 = pclose stream let error = if stateTag2 <> stream.StateTag then reply3.Error else let error2 = mergeErrors reply2.Error reply3.Error if stateTag1 <> stateTag2 then error2 else mergeErrors reply1.Error error2 reply2.Error <- error reply2.Status <- reply3.Status reply2 else let error = if stateTag1 <> stream.StateTag then reply2.Error else mergeErrors reply1.Error reply2.Error reply2.Error <- error reply2 else Reply(reply1.Status, reply1.Error) let (|>>) (p: Parser<'a,'u>) f = fun stream -> let reply = p stream Reply(reply.Status, (if reply.Status = Ok then f reply.Result else Unchecked.defaultof<_>), reply.Error) let pipe2 (p1: Parser<'a,'u>) (p2: Parser<'b,'u>) f = let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) fun stream -> let mutable reply = Reply() let reply1 = p1 stream let mutable error = reply1.Error if reply1.Status = Ok then let stateTag1 = stream.StateTag let reply2 = p2 stream error <- if stateTag1 <> stream.StateTag then reply2.Error else mergeErrors error reply2.Error if reply2.Status = Ok then reply.Result <- optF.Invoke(reply1.Result, reply2.Result) reply.Status <- Ok else reply.Status <- reply2.Status else reply.Status <- reply1.Status reply.Error <- error reply let pipe3 (p1: Parser<'a,'u>) (p2: Parser<'b,'u>) (p3: Parser<'c,'u>) f = let optF = OptimizedClosures.FSharpFunc<_,_,_,_>.Adapt(f) fun stream -> let mutable reply = Reply() let reply1 = p1 stream let mutable error = reply1.Error if reply1.Status = Ok then let stateTag1 = stream.StateTag let reply2 = p2 stream error <- if stateTag1 <> stream.StateTag then reply2.Error else mergeErrors error reply2.Error if reply2.Status = Ok then let stateTag2 = stream.StateTag let reply3 = p3 stream error <- if stateTag2 <> stream.StateTag then reply3.Error else mergeErrors error reply3.Error if reply3.Status = Ok then reply.Result <- optF.Invoke(reply1.Result, reply2.Result, reply3.Result) reply.Status <- Ok else reply.Status <- reply3.Status else reply.Status <- reply2.Status else reply.Status <- reply1.Status reply.Error <- error reply let pipe4 (p1: Parser<'a,'u>) (p2: Parser<'b,'u>) (p3: Parser<'c,'u>) (p4: Parser<'d,'u>) f = let optF = OptimizedClosures.FSharpFunc<_,_,_,_,_>.Adapt(f) fun stream -> let mutable reply = Reply() let reply1 = p1 stream let mutable error = reply1.Error if reply1.Status = Ok then let stateTag1 = stream.StateTag let reply2 = p2 stream error <- if stateTag1 <> stream.StateTag then reply2.Error else mergeErrors error reply2.Error if reply2.Status = Ok then let stateTag2 = stream.StateTag let reply3 = p3 stream error <- if stateTag2 <> stream.StateTag then reply3.Error else mergeErrors error reply3.Error if reply3.Status = Ok then let stateTag3 = stream.StateTag let reply4 = p4 stream error <- if stateTag3 <> stream.StateTag then reply4.Error else mergeErrors error reply4.Error if reply4.Status = Ok then reply.Result <- optF.Invoke(reply1.Result, reply2.Result, reply3.Result, reply4.Result) reply.Status <- Ok else reply.Status <- reply4.Status else reply.Status <- reply3.Status else reply.Status <- reply2.Status else reply.Status <- reply1.Status reply.Error <- error reply let pipe5 (p1: Parser<'a,'u>) (p2: Parser<'b,'u>) (p3: Parser<'c,'u>) (p4: Parser<'d,'u>) (p5: Parser<'e,'u>) f = let optF = OptimizedClosures.FSharpFunc<_,_,_,_,_,_>.Adapt(f) fun stream -> let mutable reply = Reply() let reply1 = p1 stream let mutable error = reply1.Error if reply1.Status = Ok then let stateTag1 = stream.StateTag let reply2 = p2 stream error <- if stateTag1 <> stream.StateTag then reply2.Error else mergeErrors error reply2.Error if reply2.Status = Ok then let stateTag2 = stream.StateTag let reply3 = p3 stream error <- if stateTag2 <> stream.StateTag then reply3.Error else mergeErrors error reply3.Error if reply3.Status = Ok then let stateTag3 = stream.StateTag let reply4 = p4 stream error <- if stateTag3 <> stream.StateTag then reply4.Error else mergeErrors error reply4.Error if reply4.Status = Ok then let stateTag4 = stream.StateTag let reply5 = p5 stream error <- if stateTag4 <> stream.StateTag then reply5.Error else mergeErrors error reply5.Error if reply5.Status = Ok then reply.Result <- optF.Invoke(reply1.Result, reply2.Result, reply3.Result, reply4.Result, reply5.Result) reply.Status <- Ok else reply.Status <- reply5.Status else reply.Status <- reply4.Status else reply.Status <- reply3.Status else reply.Status <- reply2.Status else reply.Status <- reply1.Status reply.Error <- error reply // ----------------------------------------------- // Parsing alternatives and recovering from errors // ----------------------------------------------- let (<|>) (p1: Parser<'a,'u>) (p2: Parser<'a,'u>) : Parser<'a,'u> = fun stream -> let mutable stateTag = stream.StateTag let mutable reply = p1 stream if reply.Status = Error && stateTag = stream.StateTag then let error = reply.Error reply <- p2 stream if stateTag = stream.StateTag then reply.Error <- mergeErrors reply.Error error reply let choice (ps: seq>) = match ps with | :? (Parser<'a,'u>[]) as ps -> if ps.Length = 0 then pzero else fun stream -> let stateTag = stream.StateTag let mutable error = NoErrorMessages let mutable reply = ps[0] stream let mutable i = 1 while reply.Status = Error && stateTag = stream.StateTag && i < ps.Length do error <- mergeErrors error reply.Error reply <- ps[i] stream i <- i + 1 if stateTag = stream.StateTag then error <- mergeErrors error reply.Error reply.Error <- error reply | :? (Parser<'a,'u> list) as ps -> match ps with | [] -> pzero | hd::tl -> fun stream -> let stateTag = stream.StateTag let mutable error = NoErrorMessages let mutable hd, tl = hd, tl let mutable reply = hd stream while reply.Status = Error && stateTag = stream.StateTag && (match tl with | h::t -> hd <- h; tl <- t; true | _ -> false) do error <- mergeErrors error reply.Error reply <- hd stream if stateTag = stream.StateTag then error <- mergeErrors error reply.Error reply.Error <- error reply | _ -> fun stream -> use iter = ps.GetEnumerator() if iter.MoveNext() then let stateTag = stream.StateTag let mutable error = NoErrorMessages let mutable reply = iter.Current stream while reply.Status = Error && stateTag = stream.StateTag && iter.MoveNext() do error <- mergeErrors error reply.Error reply <- iter.Current stream if stateTag = stream.StateTag then error <- mergeErrors error reply.Error reply.Error <- error reply else Reply() let choiceL (ps: seq>) label : Parser<_,_> = let error = expected label match ps with | :? (Parser<'a,'u>[]) as ps -> if ps.Length = 0 then fun stream -> Reply(Error, error) else fun stream -> let stateTag = stream.StateTag let mutable reply = ps[0] stream let mutable i = 1 while reply.Status = Error && stateTag = stream.StateTag && i < ps.Length do reply <- ps[i] stream i <- i + 1 if stateTag = stream.StateTag then reply.Error <- error reply | :? (Parser<'a,'u> list) as ps -> match ps with | [] -> fun stream -> Reply(Error, error) | hd::tl -> fun stream -> let stateTag = stream.StateTag let mutable hd, tl = hd, tl let mutable reply = hd stream while reply.Status = Error && stateTag = stream.StateTag && (match tl with | h::t -> hd <- h; tl <- t; true | _ -> false) do reply <- hd stream if stateTag = stream.StateTag then reply.Error <- error reply | _ -> fun stream -> use iter = ps.GetEnumerator() if iter.MoveNext() then let stateTag = stream.StateTag let mutable reply = iter.Current stream while reply.Status = Error && stateTag = stream.StateTag && iter.MoveNext() do reply <- iter.Current stream if stateTag = stream.StateTag then reply.Error <- error reply else Reply(Error, error) let (<|>%) (p: Parser<'a,'u>) x : Parser<'a,'u> = fun stream -> let stateTag = stream.StateTag let mutable reply = p stream if reply.Status = Error && stateTag = stream.StateTag then reply.Result <- x reply.Status <- Ok reply let opt (p: Parser<'a,'u>) : Parser<'a option,'u> = fun stream -> let stateTag = stream.StateTag let reply = p stream if reply.Status = Ok then Reply(Ok, Some reply.Result, reply.Error) else // None is represented as null let status = if reply.Status = Error && stateTag = stream.StateTag then Ok else reply.Status Reply(status, reply.Error) let optional (p: Parser<'a,'u>) : Parser = fun stream -> let stateTag = stream.StateTag let reply = p stream let status = if reply.Status = Error && stateTag = stream.StateTag then Ok else reply.Status Reply(status, (), reply.Error) let attempt (p: Parser<'a,'u>) : Parser<'a,'u> = fun stream -> // state is only declared mutable so it can be passed by ref, it won't be mutated let mutable state = CharStreamState(stream) // = stream.State (manually inlined) let mutable reply = p stream if reply.Status <> Ok then if state.Tag <> stream.StateTag then reply.Error <- nestedError stream reply.Error reply.Status <- Error // turns FatalErrors into Errors stream.BacktrackTo(&state) // passed by ref as a (slight) optimization elif reply.Status = FatalError then reply.Status <- Error reply let (>>=?) (p: Parser<'a,'u>) (f: 'a -> Parser<'b,'u>) : Parser<'b,'u> = let optF = OptimizedClosures.FSharpFunc<_,_,_>.Adapt(f) fun stream -> // state is only declared mutable so it can be passed by ref, it won't be mutated let mutable state = CharStreamState(stream) // = stream.State (manually inlined) let reply1 = p stream if reply1.Status = Ok then let stateTag1 = stream.StateTag let mutable reply2 = optF.Invoke(reply1.Result, stream) if stateTag1 = stream.StateTag then let error = mergeErrors reply1.Error reply2.Error if reply2.Status <> Error || stateTag1 = state.Tag then reply2.Error <- error else reply2.Error <- nestedError stream error stream.BacktrackTo(&state) // passed by ref as a (slight) optimization reply2 else Reply(reply1.Status, reply1.Error) let (>>?) (p: Parser<'a,'u>) (q: Parser<'b,'u>) : Parser<'b,'u> = fun stream -> // state is only declared mutable so it can be passed by ref, it won't be mutated let mutable state = CharStreamState(stream) // = stream.State (manually inlined) let reply1 = p stream if reply1.Status = Ok then let stateTag1 = stream.StateTag let mutable reply2 = q stream if stateTag1 = stream.StateTag then let error = mergeErrors reply1.Error reply2.Error if reply2.Status <> Error || stateTag1 = state.Tag then reply2.Error <- error else reply2.Error <- nestedError stream error stream.BacktrackTo(&state) // passed by ref as a (slight) optimization reply2 else Reply(reply1.Status, reply1.Error) let (.>>.?) (p: Parser<'a,'u>) (q: Parser<'b,'u>) : Parser<'a*'b,'u> = fun stream -> // state is only declared mutable so it can be passed by ref, it won't be mutated let mutable state = CharStreamState(stream) // = stream.State (manually inlined) let reply1 = p stream if reply1.Status = Ok then let stateTag1 = stream.StateTag let mutable reply2 = q stream if stateTag1 = stream.StateTag then let error = mergeErrors reply1.Error reply2.Error if reply2.Status <> Error || stateTag1 = state.Tag then reply2.Error <- error else reply2.Error <- nestedError stream error stream.BacktrackTo(&state) // passed by ref as a (slight) optimization let result = if reply2.Status = Ok then (reply1.Result, reply2.Result) else Unchecked.defaultof<_> Reply(reply2.Status, result, reply2.Error) else Reply(reply1.Status, reply1.Error) let (.>>?) (p: Parser<'a,'u>) (q: Parser<'b,'u>) : Parser<'a,'u> = fun stream -> // state is only declared mutable so it can be passed by ref, it won't be mutated let mutable state = CharStreamState(stream) // = stream.State (manually inlined) let mutable reply1 = p stream if reply1.Status = Ok then let stateTag1 = stream.StateTag let reply2 = q stream if stateTag1 = stream.StateTag then let error = mergeErrors reply1.Error reply2.Error if reply2.Status <> Error || stateTag1 = state.Tag then reply1.Error <- error reply1.Status <- reply2.Status else reply1.Error <- nestedError stream error stream.BacktrackTo(&state) // passed by ref as a (slight) optimization reply1.Status <- Error else reply1.Error <- reply2.Error reply1.Status <- reply2.Status reply1 // ------------------------------------- // Conditional parsing and looking ahead // ------------------------------------- let notEmpty (p: Parser<'a,'u>) : Parser<'a,'u> = fun stream -> let stateTag = stream.StateTag let mutable reply = p stream if stateTag = stream.StateTag && reply.Status = Ok then reply.Status <- Error reply // REVIEW: should `followedBy` use the error messages generated by `p`? let internal followedByE (p: Parser<'a,'u>) error : Parser = fun stream -> // state is only declared mutable so it can be passed by ref, it won't be mutated let mutable state = CharStreamState(stream) // = stream.State (manually inlined) let reply = p stream if state.Tag <> stream.StateTag then stream.BacktrackTo(&state) // passed by ref as a (slight) optimization if reply.Status = Ok then Reply(()) else Reply(Error, error) let followedBy p = followedByE p NoErrorMessages let followedByL p label = followedByE p (expected label) let internal notFollowedByE (p: Parser<'a,'u>) error : Parser = fun stream -> // state is only declared mutable so it can be passed by ref, it won't be mutated let mutable state = CharStreamState(stream) // = stream.State (manually inlined) let reply = p stream if state.Tag <> stream.StateTag then stream.BacktrackTo(&state) // passed by ref as a (slight) optimization if reply.Status <> Ok then Reply(()) else Reply(Error, error) let notFollowedBy p = notFollowedByE p NoErrorMessages let notFollowedByL p label = notFollowedByE p (unexpected label) let lookAhead (p: Parser<'a,'u>) : Parser<'a,'u> = fun stream -> // state is only declared mutable so it can be passed by ref, it won't be mutated let mutable state = CharStreamState(stream) // = stream.State (manually inlined) let mutable reply = p stream if reply.Status = Ok then reply.Error <- NoErrorMessages if state.Tag <> stream.StateTag then stream.BacktrackTo(&state) // passed by ref as a (slight) optimization else if state.Tag <> stream.StateTag then reply.Error <- nestedError stream reply.Error stream.BacktrackTo(&state) reply.Status <- Error // turn FatalErrors into normal Errors reply // -------------------------- // Customizing error messages // -------------------------- let () (p: Parser<'a,'u>) label : Parser<'a,'u> = let error = expected label fun stream -> let stateTag = stream.StateTag let mutable reply = p stream if stateTag = stream.StateTag then reply.Error <- error reply let () (p: Parser<'a,'u>) label : Parser<'a,'u> = let expErr = expected label fun stream -> // state is only declared mutable so it can be passed by ref, it won't be mutated let mutable state = CharStreamState(stream) // = stream.State (manually inlined) let mutable reply = p stream if reply.Status = Ok then if state.Tag = stream.StateTag then reply.Error <- expErr else if state.Tag = stream.StateTag then (* // manually inlined: let error = match reply.Error with | ErrorMessageList(NestedError(pos, userState, msgs), NoErrorMessages) -> ErrorMessageList(CompoundError(label, pos, userState, msgs), NoErrorMessages) | _ -> expErr *) let error = if reply.Error |> isSingleErrorMessageOfType ErrorMessageType.NestedError then let ne = reply.Error.Head :?> NestedError ErrorMessageList(CompoundError(label, ne.Position, ne.UserState, ne.Messages)) else expErr reply.Error <- error else reply.Error <- compoundError label stream reply.Error stream.BacktrackTo(&state) // we backtrack ... reply.Status <- FatalError // ... so we need to make sure normal parsing doesn't continue reply let fail msg : Parser<'a,'u> = let error = messageError msg fun stream -> Reply(Error, error) let failFatally msg : Parser<'a,'u> = let error = messageError msg fun stream -> Reply(FatalError, error) // ----------------- // Parsing sequences // ----------------- let tuple2 p1 p2 = p1 .>>. p2 let tuple3 p1 p2 p3 = pipe3 p1 p2 p3 (fun a b c -> (a, b, c)) let tuple4 p1 p2 p3 p4 = pipe4 p1 p2 p3 p4 (fun a b c d -> (a, b, c, d)) let tuple5 p1 p2 p3 p4 p5 = pipe5 p1 p2 p3 p4 p5 (fun a b c d e -> (a, b, c, d, e)) let parray n (p: Parser<'a,'u>) = if n = 0 then preturn [||] else fun stream -> let mutable reply = p stream let mutable error = reply.Error let mutable newReply = Reply() if reply.Status = Ok then let mutable xs = Array.zeroCreate n xs[0] <- reply.Result let mutable i = 1 while i < n do let mutable stateTag = stream.StateTag reply <- p stream error <- if stateTag <> stream.StateTag then reply.Error else mergeErrors error reply.Error if reply.Status = Ok then xs[i] <- reply.Result i <- i + 1 else i <- n // break newReply.Result <- xs // we set the result even if there was an error newReply.Error <- error newReply.Status <- reply.Status newReply let skipArray n (p: Parser<'a,'u>) = if n = 0 then preturn () else fun stream -> let mutable reply = p stream let mutable error = reply.Error let mutable newReply = Reply() if reply.Status = Ok then let mutable i = 1 while i < n do let mutable stateTag = stream.StateTag reply <- p stream error <- if stateTag <> stream.StateTag then reply.Error else mergeErrors error reply.Error if reply.Status = Ok then i <- i + 1 else i <- n // break // () is represented as null newReply.Error <- error newReply.Status <- reply.Status newReply [] type Inline = #if NOINLINE static member #else [] static member inline #endif Many(stateFromFirstElement, foldState, resultFromState, elementParser: Parser<_,_>, ?firstElementParser: Parser<_,_>, ?resultForEmptySequence) : Parser<_,_> = fun stream -> let mutable stateTag = stream.StateTag let firstElementParser = match firstElementParser with Some p -> p | _ -> elementParser let mutable reply = firstElementParser stream if reply.Status = Ok then let mutable xs = stateFromFirstElement reply.Result let mutable error = reply.Error stateTag <- stream.StateTag reply <- elementParser stream while reply.Status = Ok do if stateTag = stream.StateTag then raiseInfiniteLoopException "many" stream xs <- foldState xs reply.Result error <- reply.Error stateTag <- stream.StateTag reply <- elementParser stream if reply.Status = Error && stateTag = stream.StateTag then error <- mergeErrors error reply.Error Reply(Ok, resultFromState xs, error) else error <- if stateTag <> stream.StateTag then reply.Error else mergeErrors error reply.Error Reply(reply.Status, error) else match resultForEmptySequence with | Some _ (* if we bind f here, fsc won't be able to inline it *) when reply.Status = Error && stateTag = stream.StateTag -> Reply(Ok, (match resultForEmptySequence with Some f -> f() | _ -> Unchecked.defaultof<_>), reply.Error) | _ -> Reply(reply.Status, reply.Error) #if NOINLINE static member #else [] static member inline #endif SepBy(stateFromFirstElement, foldState, resultFromState, elementParser: Parser<_,_>, separatorParser: Parser<_,_>, ?firstElementParser: Parser<_,'u>, ?resultForEmptySequence, ?separatorMayEndSequence) : Parser<_,'u> = fun stream -> let mutable stateTag = stream.StateTag let firstElementParser = match firstElementParser with Some p -> p | _ -> elementParser let mutable reply = firstElementParser stream if reply.Status = Ok then let mutable xs = stateFromFirstElement reply.Result let mutable error = reply.Error stateTag <- stream.StateTag let mutable sepReply = separatorParser stream let mutable sepStateTag = stream.StateTag while sepReply.Status = Ok && (reply <- elementParser stream; reply.Status = Ok) do xs <- foldState xs sepReply.Result reply.Result if sepStateTag <> stream.StateTag then error <- reply.Error elif stateTag <> sepStateTag then error <- mergeErrors sepReply.Error reply.Error else raiseInfiniteLoopException "sep(End)By" stream stateTag <- stream.StateTag sepReply <- separatorParser stream sepStateTag <- stream.StateTag if sepReply.Status = Error && stateTag = sepStateTag then Reply(Ok, resultFromState xs, mergeErrors error sepReply.Error) else match separatorMayEndSequence with | Some true when reply.Status = Error && sepStateTag = stream.StateTag -> error <- mergeErrors (if stateTag <> sepStateTag then sepReply.Error else mergeErrors error sepReply.Error) reply.Error Reply(Ok, resultFromState xs, error) | _ when reply.Status <> Ok -> error <- if sepStateTag <> stream.StateTag then reply.Error else let error2 = mergeErrors sepReply.Error reply.Error if stateTag <> sepStateTag then error2 else mergeErrors error error2 Reply(reply.Status, error) | _ -> let error = if stateTag <> sepStateTag then sepReply.Error else mergeErrors error sepReply.Error Reply(sepReply.Status, error) else match resultForEmptySequence with | Some _ (* if we bind f here, fsc won't be able to inline it *) when reply.Status = Error && stateTag = stream.StateTag -> Reply(Ok, (match resultForEmptySequence with Some f -> f() | _ -> Unchecked.defaultof<_>), reply.Error) | _ -> Reply(reply.Status, reply.Error) #if NOINLINE static member #else [] static member inline #endif ManyTill(stateFromFirstElement, foldState, resultFromStateAndEnd, elementParser: Parser<_,_>, endParser: Parser<_,_>, ?firstElementParser: Parser<_,_>, ?resultForEmptySequence) : Parser<_,_> = fun stream -> // This is really, really ugly, but it does the job, // and it does it about as efficient as it can be done here. let firstElementParser = match firstElementParser with Some p -> p | _ -> elementParser match resultForEmptySequence with | None -> // require at least one element let mutable reply = firstElementParser stream if reply.Status = Ok then // ------------------------------------------------------------------ // the following code is duplicated in the match branch below let mutable xs = stateFromFirstElement reply.Result let mutable error = reply.Error let mutable stateTag = stream.StateTag let mutable endReply = endParser stream while endReply.Status = Error && stateTag = stream.StateTag do endReply.Status <- enum System.Int32.MinValue reply <- elementParser stream if reply.Status = Ok then if stateTag = stream.StateTag then raiseInfiniteLoopException "manyTill" stream xs <- foldState xs reply.Result error <- reply.Error stateTag <- stream.StateTag endReply <- endParser stream if endReply.Status = Ok then error <- if stateTag <> stream.StateTag then endReply.Error else mergeErrors error endReply.Error Reply(Ok, resultFromStateAndEnd xs endReply.Result, error) elif endReply.Status = enum System.Int32.MinValue then error <- if stateTag <> stream.StateTag then reply.Error else mergeErrors (mergeErrors error endReply.Error) reply.Error Reply(reply.Status, error) else error <- if stateTag <> stream.StateTag then endReply.Error else mergeErrors error endReply.Error Reply(endReply.Status, error) // ------------------------------------------------------------------ else Reply(reply.Status, reply.Error) | Some _ -> let mutable stateTag = stream.StateTag let mutable endReply = endParser stream if endReply.Status = Error && stateTag = stream.StateTag then let mutable reply = firstElementParser stream if reply.Status = Ok then // ------------------------------------------------------------------ // the following code is duplicated in the match branch above let mutable xs = stateFromFirstElement reply.Result let mutable error = reply.Error stateTag <- stream.StateTag endReply <- endParser stream while endReply.Status = Error && stateTag = stream.StateTag do endReply.Status <- enum System.Int32.MinValue reply <- elementParser stream if reply.Status = Ok then if stateTag = stream.StateTag then raiseInfiniteLoopException "manyTill" stream xs <- foldState xs reply.Result error <- reply.Error stateTag <- stream.StateTag endReply <- endParser stream if endReply.Status = Ok then error <- if stateTag <> stream.StateTag then endReply.Error else mergeErrors error endReply.Error Reply(Ok, resultFromStateAndEnd xs endReply.Result, error) elif endReply.Status = enum System.Int32.MinValue then error <- if stateTag <> stream.StateTag then reply.Error else mergeErrors (mergeErrors error endReply.Error) reply.Error Reply(reply.Status, error) else error <- if stateTag <> stream.StateTag then endReply.Error else mergeErrors error endReply.Error Reply(endReply.Status, error) // ------------------------------------------------------------------ else let error = if stateTag <> stream.StateTag then reply.Error else mergeErrors endReply.Error reply.Error Reply(reply.Status, error) elif endReply.Status = Ok then Reply(Ok, (match resultForEmptySequence with Some f -> f endReply.Result | _ -> Unchecked.defaultof<_>), endReply.Error) else Reply(endReply.Status, endReply.Error) let many p = Inline.Many((fun x -> [x]), (fun xs x -> x::xs), List.rev, p, resultForEmptySequence = fun () -> []) let many1 p = Inline.Many((fun x -> [x]), (fun xs x -> x::xs), List.rev, p) let skipMany p = Inline.Many((fun _ -> ()), (fun _ _ -> ()), (fun xs -> xs), p, resultForEmptySequence = fun () -> ()) let skipMany1 p = Inline.Many((fun _ -> ()), (fun _ _ -> ()), (fun xs -> xs), p) let sepBy p sep = Inline.SepBy((fun x -> [x]), (fun xs _ x -> x::xs), List.rev, p, sep, resultForEmptySequence = fun () -> []) let sepBy1 p sep = Inline.SepBy((fun x -> [x]), (fun xs _ x -> x::xs), List.rev, p, sep) let skipSepBy p sep = Inline.SepBy((fun _ -> ()), (fun _ _ _ -> ()), (fun xs -> xs), p, sep, resultForEmptySequence = fun () -> ()) let skipSepBy1 p sep = Inline.SepBy((fun _ -> ()), (fun _ _ _ -> ()), (fun xs -> xs), p, sep) let sepEndBy p sep = Inline.SepBy((fun x -> [x]), (fun xs _ x -> x::xs), List.rev, p, sep, separatorMayEndSequence = true, resultForEmptySequence = fun () -> []) let sepEndBy1 p sep = Inline.SepBy((fun x -> [x]), (fun xs _ x -> x::xs), List.rev, p, sep, separatorMayEndSequence = true) let skipSepEndBy p sep = Inline.SepBy((fun _ -> ()), (fun _ _ _ -> ()), (fun xs -> xs), p, sep, separatorMayEndSequence = true, resultForEmptySequence = fun () -> ()) let skipSepEndBy1 p sep = Inline.SepBy((fun _ -> ()), (fun _ _ _ -> ()), (fun xs -> xs), p, sep, separatorMayEndSequence = true) let manyTill p endp = Inline.ManyTill((fun x -> [x]), (fun xs x -> x::xs), (fun xs _ -> List.rev xs), p, endp, resultForEmptySequence = fun _ -> []) let many1Till p endp = Inline.ManyTill((fun x -> [x]), (fun xs x -> x::xs), (fun xs _ -> List.rev xs), p, endp) let skipManyTill p endp = Inline.ManyTill((fun _ -> ()), (fun _ _ -> ()), (fun _ _ -> ()), p, endp, resultForEmptySequence = fun _ -> ()) let skipMany1Till p endp = Inline.ManyTill((fun _ -> ()), (fun _ _ -> ()), (fun _ _ -> ()), p, endp) let chainl1 p op = Inline.SepBy((fun x0 -> x0), (fun x f y -> f x y), (fun x -> x), p, op) let chainl p op x = chainl1 p op <|>% x let chainr1 p op = Inline.SepBy(elementParser = p, separatorParser = op, stateFromFirstElement = (fun x0 -> [(Unchecked.defaultof<_>, x0)]), foldState = (fun acc op x -> (op, x)::acc), resultFromState = function // is called with (op, y) list in reverse order | ((op, y)::tl) -> let rec calc op y lst = match lst with | (op2, x)::tl -> calc op2 (op x y) tl | [] -> y // op is null calc op y tl | [] -> // shouldn't happen failwith "chainr1") let chainr p op x = chainr1 p op <|>% x // ------------------------------ // Computation expression syntax // ------------------------------ [] type ParserCombinator() = member t.Delay(f:(unit -> Parser<'a,'u>)) = fun stream -> (f()) stream member t.Return(x) = preturn x member t.Bind(p, f) = p >>= f member t.Zero() : Parser<'a,'u> = pzero member t.ReturnFrom(p: Parser<'a,'u>) = p // no Combine member by purpose member t.TryWith(p:Parser<'a,'u>, cf:(exn -> Parser<'a,'u>)) = fun stream -> (try p stream with e -> (cf e) stream) member t.TryFinally(p:Parser<'a,'u>, ff:(unit -> unit)) = fun stream -> try p stream finally ff () let parse = ParserCombinator() // ---------------------- // Other helper functions // ---------------------- let createParserForwardedToRef() = let dummyParser = fun stream -> failwith "a parser created with createParserForwardedToRef was not initialized" let r = ref dummyParser (fun stream -> r.Value stream), r : Parser<_,'u> * Parser<_,'u> ref ================================================ FILE: FParsec/Primitives.fsi ================================================ // Copyright (c) Stephan Tolksdorf 2007-2011 // License: Simplified BSD License. See accompanying documentation. [] module FParsec.Primitives open FParsec open FParsec.Error /// The parser succeeded. [] val Ok: ReplyStatus = ReplyStatus.Ok;; /// The parser failed. [] val Error: ReplyStatus = ReplyStatus.Error;; /// The parser failed and no error recovery (except after backtracking) should be tried. [] val FatalError: ReplyStatus = ReplyStatus.FatalError;; /// The type of the parser functions supported by FParsec combinators. type Parser<'Result, 'UserState> = CharStream<'UserState> -> Reply<'Result> // ================================= // Parser primitives and combinators // ================================= // Two basic primitives that are only seldomly directly used in user code: /// The parser `preturn x` always succeeds with the result `x` (without changing the parser state). /// `preturn x` is defined as `fun stream -> Reply(x)`. val preturn: 'a -> Parser<'a,'u> /// The parser `pzero` always fails with an empty error message list, i.e. an unspecified error. /// `pzero x` is defined as `fun stream -> Reply(Error, NoErrorMessages)`. val pzero: Parser<'a,'u> // --------------------------- // Chaining and piping parsers // --------------------------- /// The parser `p >>= f` first applies the parser `p` to the input, then applies the function `f` /// to the result returned by `p` and finally applies the parser returned by `f` to the input. val (>>=): Parser<'a,'u> -> ('a -> Parser<'b,'u>) -> Parser<'b,'u> /// The parser `p >>% x` applies the parser `p` and returns the result `x`. val (>>%): Parser<'a,'u> -> 'b -> Parser<'b,'u> /// The parser `p1 >>. p2` applies the parsers `p1` and `p2` in sequence and returns the result of `p2`. val (>>.): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'b,'u> /// The parser `p1 .>> p2` applies the parsers `p1` and `p2` in sequence and returns the result of `p1`. val (.>>): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a,'u> /// The parser `p1 .>>. p2` applies the parsers `p1` and `p2` in sequence and returns the results in a tuple. val (.>>.): Parser<'a,'u> -> Parser<'b,'u> -> Parser<('a * 'b),'u> /// The parser `between popen pclose p` applies the parsers `pOpen`, `p` and `pEnd` in sequence. /// It returns the result of `p`. val between: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'c,'u> /// The parser `p |>> f` applies the parser `p` and /// returns the result `f x`, where `x` is the result returned by `p`. val (|>>): Parser<'a,'u> -> ('a -> 'b) -> Parser<'b,'u> /// The parser `pipe2 p1 p2 f` applies the parsers `p1` and `p2` in sequence. /// It returns the result `f a b`, where `a` and `b` are the results returned by `p1` and `p2`. val pipe2: Parser<'a,'u> -> Parser<'b,'u> -> ('a -> 'b -> 'c) -> Parser<'c,'u> /// The parser `pipe3 p1 p2 p3 f` applies the parsers `p1`, `p2` and `p3` in sequence. /// It returns the result `f a b c`, where `a`, `b` and `c` are the results returned by `p1`, `p2` and `p3`. val pipe3: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> ('a -> 'b -> 'c -> 'd) -> Parser<'d,'u> /// The parser `pipe4 p1 p2 p3 p4 f` applies the parsers `p1`, `p2`, `p3` and `p4` in sequence. /// It returns the result `f a b c d`, where `a`, `b`, `c` and `d` are the results returned by `p1`, `p2`, `p3` and `p4`. val pipe4: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u> -> ('a -> 'b -> 'c -> 'd -> 'e) -> Parser<'e,'u> /// The parser `pipe5 p1 p2 p3 p4 p5 f` applies the parsers `p1`, `p2`, `p3`, `p4` and `p5` in sequence. /// It returns the result of the function application `f a b c d e`, where `a`, `b`, `c`, `d` and `e` are the results returned by `p1`, `p2`, `p3`, `p4` and `p5`. val pipe5: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u> -> Parser<'e,'u> -> ('a -> 'b -> 'c -> 'd -> 'e -> 'f) -> Parser<'f, 'u> // ----------------------------------------------- // Parsing alternatives and recovering from errors // ----------------------------------------------- /// The parser `p1 <|> p2` first applies the parser `p1`. /// If `p1` succeeds, the result of `p1` is returned. /// If `p1` fails with a non-fatal error and *without changing the parser state*, /// the parser `p2` is applied. /// Note: The stream position is part of the parser state, so if `p1` fails after consuming input, /// `p2` will not be applied. val (<|>): Parser<'a,'u> -> Parser<'a,'u> -> Parser<'a,'u> /// The parser `choice ps` is an optimized implementation of `p1 <|> p2 <|> ... <|> pn`, /// where `p1` ... `pn` are the parsers in the sequence `ps`. val choice: seq> -> Parser<'a,'u> /// The parser `choiceL ps label` is an optimized implementation of `choice ps label`. val choiceL: seq> -> string -> Parser<'a,'u> /// The parser `p <|>% x` is an optimized implementation of `p <|> preturn x`. val (<|>%): Parser<'a,'u> -> 'a -> Parser<'a,'u> /// The parser `opt p` parses an optional occurrence of `p` as an option value. /// `opt p` is an optimized implementation of `(p |>> Some) <|>% None`. val opt: Parser<'a,'u> -> Parser<'a option,'u> /// The parser `optional p` skips over an optional occurrence of `p`. /// `optional p` is an optimized implementation of `(p >>% ()) <|>% ()`. val optional: Parser<'a,'u> -> Parser /// The parser `attempt p` applies the parser `p`. /// If `p` fails after changing the parser state or with a fatal error, /// `attempt p` will backtrack to the original parser state and report a non-fatal error. val attempt: Parser<'a,'u> -> Parser<'a,'u> /// The parser `p >>=? f` behaves like `p >>= f`, except that it will backtrack to the beginning /// if the parser returned by `f` fails with a non-fatal error and without changing the parser state, /// even if `p` has changed the parser state. val (>>=?): Parser<'a,'u> -> ('a -> Parser<'b,'u>) -> Parser<'b,'u> /// The parser `p1 >>? p2` behaves like `p1 >>. p2`, except that it will backtrack /// to the beginning if `p2` fails with a non-fatal error and without changing the parser state, /// even if `p1` has changed the parser state. val (>>?): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'b,'u> /// The parser `p1 .>>? p2` behaves like `p1 .>> p2`, except that it will backtrack /// to the beginning if `p2` fails with a non-fatal error and without changing the parser state, /// even if `p1` has changed the parser state. val (.>>?): Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a,'u> /// The parser `p1 .>>.? p2` behaves like `p1 .>>. p2`, except that it will backtrack /// to the beginning if `p2` fails with a non-fatal error and without changing the parser state, /// even if `p1` has changed the parser state. val (.>>.?): Parser<'a,'u> -> Parser<'b,'u> -> Parser<('a * 'b),'u> // ------------------------------------- // Conditional parsing and looking ahead // ------------------------------------- /// The parser `notEmpty p` behaves like `p`, /// except that it fails when `p` succeeds without consuming input /// or changing the parser state in any other way. val notEmpty: Parser<'a,'u> -> Parser<'a,'u> /// The parser `followedBy p` succeeds if the parser `p` succeeds at the current position. /// Otherwise it fails with a non-fatal error. This parser never changes the parser state. /// If the parser `followedBy p` fails, it returns no descriptive error message. /// Hence it should only be used together with other parsers that take care of a potential error. /// Alternatively, `followedByL p label` can be used to ensure a more descriptive error message. val followedBy: Parser<'a,'u> -> Parser /// The parser `followedByL p` behaves like `followedBy p`, /// except that it returns an `Expected label` error message when the parser `p` fails. val followedByL: Parser<'a,'u> -> string -> Parser /// The parser `notFollowedBy p` succeeds if the parser `p` fails to parse at the current position. /// Otherwise it fails with a non-fatal error. This parser never changes the parser state. /// If the parser `notFollowedBy p` fails, it returns no descriptive error message. /// Hence it should only be used together with other parsers that take care of a potential error. /// Alternatively, `notFollowedByL p label` can be used to ensure a more descriptive error message. val notFollowedBy: Parser<'a,'u> -> Parser /// The parser `notFollowedByL p` behaves like `notFollowedBy p`, /// except that it returns an `Unexpected label` error message when the parser `p` fails. val notFollowedByL: Parser<'a,'u> -> string -> Parser /// The parser `lookAhead p` parses `p` and restores the original parse state afterwards. /// In case `p` fails after changing the parser state, the error messages are wrapped in a `NestedError`. /// If it succeeds, any error messages are discarded. Fatal errors are turned into normal errors. val lookAhead: Parser<'a,'u> -> Parser<'a,'u> // -------------------------- // Customizing error messages // -------------------------- /// The parser `p label` applies the parser `p`. If `p` does not change the parser state /// (usually because `p` failed), the error messages are replaced with `expected label`. val (): Parser<'a,'u> -> string -> Parser<'a,'u> /// The parser `p label` behaves like `p label`, except that when `p` fails /// after changing the parser state (for example, because `p` consumes input before it fails), /// a `CompoundError` message is generated with both the given string `label` and the /// error messages generated by `p`. val (): Parser<'a,'u> -> string -> Parser<'a,'u> /// The parser `fail msg` always fails with a `messageError msg`. /// The error message will be displayed together with other error messages generated for /// the same input position. val fail: string -> Parser<'a,'u> /// The parser `failFatally msg` always fails with a `messageError msg`. It signals a /// FatalError, so that no error recovery is attempted (except via backtracking constructs). val failFatally: string -> Parser<'a,'u> // ----------------- // Parsing sequences // ----------------- /// The parser `tuple2 p1 p2` applies the parsers `p1` and `p2` in sequence and /// returns the results in a tuple. /// `tuple2 p1 p2` is defined as `p1 .>>. p2`. val tuple2: Parser<'a,'u> -> Parser<'b,'u> -> Parser<('a * 'b),'u> /// The parser `tuple3 p1 p2 p3` applies the parsers `p1`, `p2` and `p3` in sequence and /// returns the results in a tuple. val tuple3: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<('a * 'b * 'c),'u> /// The parser `tuple4 p1 p2 p3 p4` applies the parsers `p1`, `p2`, `p3` and `p4` in sequence and /// returns the results in a tuple. val tuple4: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u> -> Parser<('a * 'b * 'c * 'd),'u> /// The parser `tuple5 p1 p2 p3 p4 p5` applies the parsers `p1`, `p2`, `p3`, `p4` and `p5` in sequence and /// returns the results in a tuple. val tuple5: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'c,'u> -> Parser<'d,'u> -> Parser<'e,'u> -> Parser<('a * 'b * 'c * 'd * 'e),'u> // p{n} /// The parser `parray n p` parses `n` occurences of `p` and /// returns the returns the results in an array. /// For example, `parray 3 p` is equivalent to `pipe3 p p p (fun a b c -> [|a;b;c|])`. val parray: int -> Parser<'a,'u> -> Parser<'a[],'u> /// The parser `skipArray n p` is an optimized implementation of `parray n p |>> ignore`. val skipArray: int -> Parser<'a,'u> -> Parser // p* /// The parser `many p` repeatedly applies the parser `p` until `p` fails. /// It returns a list of the results returned by `p`. /// At the end of the sequence `p` must fail without changing the parser state and without /// signalling a `FatalError`, otherwise `many p` will fail with the error reported by `p`. /// `many p` tries to guard against an infinite loop by throwing an exception /// if `p` succeeds without changing the parser state. val many: Parser<'a,'u> -> Parser<'a list,'u> /// The parser `skipMany p` is an optimized implementation of `many p |>> ignore`. val skipMany: Parser<'a,'u> -> Parser // p+ /// The parser `many1 p` behaves like `many p`, except that it requires `p` to succeed at least one time. /// `many1 p` is an optimized implementation of `pipe2 p (many p) (fun hd tl -> hd::tl)`. val many1: Parser<'a,'u> -> Parser<'a list,'u> /// The parser `skipMany1 p` is an optimized implementation of `many1 p |>> ignore`. val skipMany1: Parser<'a,'u> -> Parser // (p (sep p)*)? /// The parser `sepBy p sep` parses *zero* or more occurrences of `p` separated by `sep` /// (in EBNF notation: `(p (sep p)*)?`). val sepBy: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u> /// The parser `skipSepBy p sep` is an optimized implementation of `sepBy p sep |>> ignore`. val skipSepBy: Parser<'a,'u> -> Parser<'b,'u> -> Parser // p (sep p)* /// The parser `sepBy1 p sep` parses *one* or more occurrences of `p` separated by `sep` /// (in EBNF notation: `p (sep p)*`). val sepBy1: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u> /// The parser `skipSepBy1 p sep` is an optimized implementation of `sepBy1 p sep |>> ignore`. val skipSepBy1: Parser<'a,'u> -> Parser<'b,'u> -> Parser // (p (sep p)* sep?)? /// The parser `sepEndBy p sep` parses *zero* or more occurrences of `p` separated and /// optionally ended by `sep` (in EBNF notation: `(p (sep p)* sep?)?`). /// It returns a list of the results returned by `p`. val sepEndBy: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u> /// The parser `skipSepEndBy p sep` is an optimized implementation of `sepEndBy p sep |>> ignore`. val skipSepEndBy: Parser<'a,'u> -> Parser<'b,'u> -> Parser // p (sep p)* sep? /// The parser `sepEndBy1 p sep` parses *one* or more occurrences of `p` separated and /// optionally ended by `sep` (in EBNF notation: `p (sep p)* sep?`). /// It returns a list of the results returned by `p`. val sepEndBy1: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u> /// The parser `skipSepEndBy1 p sep` is an optimized implementation of `sepEndBy1 p sep |>> ignore`. val skipSepEndBy1: Parser<'a,'u> -> Parser<'b,'u> -> Parser /// The `parser manyTill p endp` repeatedly applies the parser `p` /// for as long as `endp` fails (without changing the parser state). /// It returns a list of the results returned by `p`. val manyTill: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u> /// The parser `skipManyTill p endp` is an optimized implementation of `manyTill p endp |>> ignore`. val skipManyTill: Parser<'a,'u> -> Parser<'b,'u> -> Parser /// The parser `many1Till p endp` behaves like `manyTill p endp`, except that it requires `p` to succeed at least one time. /// `many1Till p endp` is an optimized implementation of `pipe2 p (manyTill p endp) (fun hd tl -> hd::tl)`. val many1Till: Parser<'a,'u> -> Parser<'b,'u> -> Parser<'a list,'u> val skipMany1Till: Parser<'a,'u> -> Parser<'b,'u> -> Parser [] type Inline = #if NOINLINE static member #else [] static member inline #endif Many: stateFromFirstElement: ('T -> 'State) * foldState: ('State -> 'T -> 'State) * resultFromState: ('State -> 'Result) * elementParser: Parser<'T,'U> * ?firstElementParser: Parser<'T,'U> * ?resultForEmptySequence: (unit -> 'Result) -> Parser<'Result,'U> #if NOINLINE static member #else [] static member inline #endif SepBy: stateFromFirstElement: ('T -> 'State) * foldState: ('State -> 'Separator -> 'T -> 'State) * resultFromState: ('State -> 'Result) * elementParser: Parser<'T,'U> * separatorParser: Parser<'Separator,'U> * ?firstElementParser: Parser<'T,'U> * ?resultForEmptySequence: (unit -> 'Result) * ?separatorMayEndSequence: bool -> Parser<'Result,'U> #if NOINLINE static member #else [] static member inline #endif ManyTill: stateFromFirstElement: ('T -> 'State) * foldState: ('State -> 'T -> 'State) * resultFromStateAndEnd: ('State -> 'E -> 'Result) * elementParser: Parser<'T,'U> * endParser: Parser<'E,'U> * ?firstElementParser: Parser<'T,'U> * ?resultForEmptySequence: ('E -> 'Result) -> Parser<'Result,'U> // (((p op p) op p) ... op p) /// The parser `chainl1 p op` parses one or more occurrences of `p` separated by `op` /// (in EBNF notation: `p (op p)*`). /// It returns the value obtained by *left* associative application of all functions /// returned by `op` to the results returned by `p`, /// i.e. `f_n (... (f_2 (f_1 x_1 x_2) x_3) ...) x_n+1`, /// where `f_1` to `f_n` are the functions returned by the parser `op` and /// `x_1` to `x_n+1` are the values returned by `p`. If only a single occurance /// of `p` and no occurance of `op` is parsed, the result of `p` is returned directly. val chainl1: Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u> -> Parser<'a,'u> /// The parser `chainl p op defVal` is equivalent to `chainl1 p op <|>% defVal`. val chainl: Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u> -> 'a -> Parser<'a,'u> // (p op ... (p op (p op p))) /// The parser `chainr1 p op` parses one or more occurrences of `p` separated by `op` /// (in EBNF notation: `p (op p)*`). /// It returns the value obtained by *right* associative application of all functions /// returned by `op` to the results returned by `p`, /// i.e. `f1 x_1 (f_2 x_2 (... (f_n x_n x_n+1) ...))`, /// where `f_1` to `f_n` are the functions returned by the parser `op` and /// `x_1` to `x_n+1` are the values returned by `p`. If only a single occurance /// of `p` and no occurance of `op` is parsed, the result of `p` is returned directly. val chainr1: Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u> -> Parser<'a,'u> /// The parser `chainr p op defVal` is equivalent to `chainr1 p op <|>% defVal`. val chainr: Parser<'a,'u> -> Parser<('a -> 'a -> 'a),'u> -> 'a -> Parser<'a,'u> // ------------------------------ // Computation expression syntax // ------------------------------ /// The type of the "builder object" that can be used to build parsers with /// F#'s "computation expression" syntax a.k.a. "workflow" syntax. [] type ParserCombinator = new : unit -> ParserCombinator member Delay: f:(unit -> Parser<'a,'u>) -> Parser<'a,'u> member Return: 'a -> Parser<'a,'u> member Bind: Parser<'a,'u>*('a -> Parser<'b,'u>) -> Parser<'b,'u> member Zero: unit -> Parser<'a,'u> member ReturnFrom: Parser<'a,'u> -> Parser<'a,'u> // no Combine member by purpose member TryWith: p:Parser<'a,'u> * cf:(exn -> Parser<'a,'u>) -> Parser<'a,'u> member TryFinally: p:Parser<'a,'u>* ff:(unit -> unit) -> Parser<'a,'u> /// The builder object for building parsers using F#'s computation expression syntax. val parse : ParserCombinator // ---------------------- // Other helper functions // ---------------------- // a helper function for defining mutually recursive parser values /// `let p, pRef = createParserForwardedToRef()` creates a parser `p` that forwards all /// calls to the parser in the reference cell `pRef`. Initially, `pRef` holds a reference /// to a dummy parser that raises an exception on any invocation. val createParserForwardedToRef: unit -> Parser<'a,'u> * Parser<'a,'u> ref ================================================ FILE: FParsec/Range.fs ================================================ // Copyright (c) Stephan Tolksdorf 2010-2011 // License: Simplified BSD License. See accompanying documentation. namespace FParsec #if LOW_TRUST // we don't need the Range code in LOW_TRUST builds #else type Range = struct val Min: int val Max: int new (min, max) = assert (min <= max) {Min = min; Max = max} end [] module internal Range = open System.Collections.Generic open FParsec.Internals let int32Max = System.Int32.MaxValue let createInvalidRangeException() = System.ArgumentException("A range passed as an argument is invalid.") let checkRangesAreValidSortedAndUnconnected (ranges: Range[]) = if ranges.Length <> 0 then let r = ranges[0] if r.Min > r.Max then raise (createInvalidRangeException()) let mutable prevMax = r.Max for i = 1 to ranges.Length - 1 do let r = ranges[i] if r.Min > r.Max then raise (createInvalidRangeException()) if prevMax = int32Max || prevMax + 1 >= r.Min then invalidArg "ranges" "The ranges must be sorted and neither overlapping nor immediately adjacent." prevMax <- r.Max let checkLabelRangesAreValidSortedAndUnconnected (ranges: Range[]) (labels: System.Reflection.Emit.Label[]) = if ranges.Length <> labels.Length then invalidArg "labels" "The range and label arrays must have the same lengths." if ranges.Length <> 0 then let r = ranges[0] if r.Min > r.Max then raise (createInvalidRangeException()) let mutable prevMax = r.Max for i = 1 to ranges.Length - 1 do let r = ranges[i] if r.Min > r.Max then raise (createInvalidRangeException()) if prevMax = int32Max then invalidArg "ranges" "The ranges must be sorted and non-overlapping." if prevMax + 1 >= r.Min then if prevMax + 1 = r.Min then if labels[i - 1].Equals(labels[i]) then raise (System.ArgumentException("Ranges with the same associated label must not be immediately adjacent.")) else invalidArg "ranges" "The ranges must be sorted and non-overlapping." prevMax <- r.Max let rangeComparer = {new Comparer() with member t.Compare(r1, r2) = compare r1.Min r2.Min} let sortAndMergeRanges allowOverlappingRanges (ranges: Range[]) = if ranges.Length = 0 then [||] else System.Array.Sort(ranges, rangeComparer) let mutable connected = 0 let r = ranges[0] if r.Min > r.Max then raise (createInvalidRangeException()) let mutable prevMax = r.Max for i = 1 to ranges.Length - 1 do let r = ranges[i] if r.Min > r.Max then raise (createInvalidRangeException()) if prevMax < r.Min then if prevMax + 1 = r.Min then connected <- connected + 1 prevMax <- r.Max elif allowOverlappingRanges then connected <- connected + 1 if prevMax < r.Max then prevMax <- r.Max else invalidArg "ranges" "The value ranges must be non-overlapping." if connected = 0 then ranges else let rs = Array.zeroCreate (ranges.Length - connected) let mutable j = 0 for r in ranges do if j = 0 || prevMax <> int32Max && prevMax + 1 < r.Min then prevMax <- r.Max rs[j] <- r j <- j + 1 elif prevMax < r.Max then prevMax <- r.Max rs[j - 1] <- Range(rs[j - 1].Min, r.Max) rs /// If the comparer is not null, adjacent ranges with the same value are merged. let sortAndMergeKeyValueRanges (cmp: EqualityComparer<'T>) (keyValueRanges: seq) = // 'T could potentially be a large value type, // so we are trying to avoid copying 'T values where possible. let rvs = Array.ofSeq keyValueRanges if rvs.Length = 0 then [||], [||] else System.Array.Sort(rvs, {new Comparer() with member t.Compare((r1, _), (r2, _)) = compare r1.Min r2.Min}) let mutable connected = 0 let (r, _) as rv = rvs[0] if r.Min > r.Max then raise (createInvalidRangeException()) let mutable prevMax = r.Max let mutable prevRV = rv for i = 1 to rvs.Length - 1 do let (r, _) as rv = rvs[i] if r.Min > r.Max then raise (createInvalidRangeException()) if prevMax >= r.Min then invalidArg "keyValueRanges" "The ranges must be non-overlapping." if prevMax + 1 = r.Min && isNotNull cmp && cmp.Equals(snd prevRV, snd rv) then connected <- connected + 1 prevMax <- r.Max prevRV <- rv let n = rvs.Length - connected let rs, vs = Array.zeroCreate n, Array.zeroCreate n if connected = 0 then for i = 0 to rvs.Length - 1 do let rv = rvs[i] rs[i] <- fst rv vs[i] <- snd rv else let mutable j = 0 for ((r, _) as rv) in rvs do if j = 0 || not (prevMax + 1 = r.Min && cmp.Equals(snd prevRV, snd rv)) then rs[j] <- r vs[j] <- snd rv j <- j + 1 else rs[j - 1] <- Range(rs[j - 1].Min, r.Max) prevMax <- r.Max prevRV <- rv rs, vs let mergeSortedKeyLabelRanges (keys: int[]) (labels: System.Reflection.Emit.Label[]) = if keys.Length <> labels.Length then invalidArg "keys" "The key and label arrays must have the same lengths." if keys.Length = 0 then [||], [||] else let mutable prevKey = keys[0] let mutable connected = 0 for i = 1 to keys.Length - 1 do let key = keys[i] if key <= prevKey then invalidArg "keys" "The keys must be sorted and distinct." if key = prevKey + 1 && labels[i] = labels[i - 1] then connected <- connected + 1 prevKey <- key if connected = 0 then (keys |> Array.map (fun k -> Range(k, k))), labels else let ranges = Array.zeroCreate (keys.Length - connected) let newLabels = Array.zeroCreate (keys.Length - connected) let mutable i = 0 for j = 0 to ranges.Length - 1 do let label = labels[i] newLabels[j] <- label let first = keys[i] let mutable last = first i <- i + 1 while i < keys.Length && keys[i] = last + 1 && labels[i] = label do last <- last + 1 i <- i + 1 ranges[j] <- Range(first, last) ranges, newLabels /// Duplicate values are allowed. let collectSortAndMergeRanges (values: seq) = use iter = values.GetEnumerator() if not (iter.MoveNext()) then [||] else let ranges = ResizeArray<_>() let rec loop sorted min max = if iter.MoveNext() then let k = iter.Current if max <> int32Max && max + 1 = k then loop sorted min k else ranges.Add(Range(min, max)) loop (sorted && max < k) k k else ranges.Add(Range(min, max)) sorted let value = iter.Current let sorted = loop true value value let ranges = ranges.ToArray() if sorted then ranges else sortAndMergeRanges true ranges /// ranges, values = collectSortAndMergeKeyValueRanges (cmp: EqualityComparer<'T>) (keyValues: seq) /// Duplicate keys are not allowed. /// If the comparer is not null, consecutive keys with the same value are combined. let collectSortAndMergeKeyValueRanges (cmp: EqualityComparer<'T>) (keyValues: seq) = // 'T could potentially be a large value type, // so we are trying to avoid copying 'T values where possible. let kvs = Array.ofSeq keyValues System.Array.Sort(kvs, {new Comparer() with member t.Compare((k1, _), (k2,_)) = compare k1 k2}) if kvs.Length = 0 then [||], [||] else let mutable prevKey, _ = kvs[0] for i = 1 to kvs.Length - 1 do let k, _ = kvs[i] if k = prevKey then invalidArg "keyValues" "The sequence contains a duplicate key." prevKey <- k if isNull cmp then let ranges = Array.zeroCreate kvs.Length let values = Array.zeroCreate kvs.Length for i = 0 to kvs.Length - 1 do let k, _ as kv = kvs[i] ranges[i] <- Range(k, k) values[i] <- snd kv ranges, values else let ranges = ResizeArray<_>() let mutable kv = kvs[0] let mutable i = 0 while i < kvs.Length do let kv0 = kv let mutable k = fst kv i <- i + 1 while i < kvs.Length && (kv <- kvs[i] k + 1 = fst kv && cmp.Equals(snd kv0, snd kv)) do k <- k + 1 i <- i + 1 ranges.Add(Range(fst kv0, k)) let ranges = ranges.ToArray() let values = Array.zeroCreate ranges.Length let mutable j = 0 for i = 0 to ranges.Length - 1 do let r = ranges[i] values[i] <- snd kvs[j] j <- j + (r.Max - r.Min + 1) ranges, values /// sumOfLengths (ranges: Range[]) (iBegin: int) (iEnd: int) /// precondition: iBegin < iEnd, ranges must be sorted and non-overlapping let sumOfLengths (ranges: Range[]) iBegin iEnd = assert (iBegin < iEnd) // since the ranges are sorted non-overlapping, their sum is <= UInt32.MaxValue + 1 let mutable n = uint32 (iEnd - iBegin) for i = iBegin to iEnd - 1 do let r = ranges[i] n <- n + uint32 (r.Max - r.Min) if n <> 0u then double n else double System.UInt32.MaxValue + 1. // n has overflown by exactly 1 /// sumOfCappedLengths (lengthCap: int32) (ranges: Range[]) (iBegin: int) (iEnd: int) /// precondition: iBegin < iEnd, ranges must be sorted and non-overlapping /// a lengthCap <= 0 is interpreted as a lengthCap of 2^32 let sumOfCappedLengths lengthCap (ranges: Range[]) iBegin iEnd = assert (iBegin < iEnd) // since the ranges are sorted non-overlapping, their sum is <= UInt32.MaxValue + 1 let lengthCapM1 = if lengthCap > 0 then uint32 (lengthCap - 1) else System.UInt32.MaxValue let mutable n = uint32 (iEnd - iBegin) for i = iBegin to iEnd - 1 do let r = ranges[i] n <- n + min (uint32 (r.Max - r.Min)) lengthCapM1 if n <> 0u then double n else double System.UInt32.MaxValue + 1. // n has overflown by exactly 1 /// density lengthCap (ranges: Range[]) iBegin iEnd /// precondition: iBegin < iEnd, ranges must be sorted and non-overlapping let density lengthCap (ranges: Range[]) iBegin iEnd = assert (iBegin < iEnd) let n = sumOfCappedLengths lengthCap ranges iBegin iEnd let d = double ranges[iEnd - 1].Max - double ranges[iBegin].Min + 1. n/d /// rangeIndex, pivotAroundRangeMax = findPivot (ranges: Range[]) iBegin iEnd /// precondition: iBegin < iEnd, ranges must be sorted and non-overlapping let findPivot (ranges: Range[]) iBegin iEnd = assert (iBegin < iEnd) // the pivot heuristic is based on Korobeynikov (2007), http://llvm.org/pubs/2007-05-31-Switch-Lowering.pdf let mutable first, last = double ranges[iBegin].Min, double ranges[iEnd - 1].Max let mutable pivot, pivotAroundPreviousRangeMax = iBegin, false let mutable sumLeft, sumRight = 0., sumOfLengths ranges iBegin iEnd let sumHalf = sumRight*0.5 let mutable maxQuality, maxDistanceToMiddle = -1., sumRight let r = ranges[iBegin] let mutable nextMin, nextMax = double r.Min, double r.Max for i = iBegin + 1 to iEnd - 1 do let prevMax = nextMax let prevLength = nextMax - nextMin + 1. sumLeft <- sumLeft + prevLength sumRight <- sumRight - prevLength let r = ranges[i] nextMin <- double r.Min nextMax <- double r.Max let logDistance = System.Math.Log(nextMin - prevMax) let leftDensity = sumLeft/(prevMax - first + 2.) // add 2 instead of 1 to decrease the quality of let rightDensity = sumRight/(last - nextMin + 2.) // of the two most extreme possible pivot points let quality = (leftDensity + rightDensity)*logDistance if quality >= maxQuality then let distanceToMiddle = System.Math.Abs(sumLeft - sumHalf); if quality > maxQuality || distanceToMiddle < maxDistanceToMiddle then maxQuality <- quality maxDistanceToMiddle <- distanceToMiddle pivot <- i pivotAroundPreviousRangeMax <- sumLeft >= sumRight if pivotAroundPreviousRangeMax then (pivot - 1), true else pivot, false let rec findInSortedNonOverlappingRanges (ranges: Range[]) value = let rec loop iFirst iLast = if iFirst <= iLast then let middle = int ((uint32 (iFirst + iLast))/2u) let middleRange = ranges[middle] if value < middleRange.Min then loop iFirst (middle - 1) elif value > middleRange.Max then loop (middle + 1) iLast else middle else ~~~iFirst loop 0 (ranges.Length - 1) #endif ================================================ FILE: FParsec/StaticMapping.fs ================================================ // Copyright (c) Stephan Tolksdorf 2010-2012 // License: Simplified BSD License. See accompanying documentation. module FParsec.StaticMapping #if !LOW_TRUST open System.Reflection open System.Reflection.Emit open System.Runtime.Serialization open System.Diagnostics open System.Collections.Generic open System.Threading open FParsec open FParsec.Internals open FParsec.Range open FParsec.Emit /// Unsafe because it doesn't constrain the type argument to reference types. let private UnsafeReferenceEqualityComparer<'T> = { new EqualityComparer<'T>() with override t.Equals(x, y) = obj.ReferenceEquals(x, y) override t.GetHashCode(x) = System.Runtime.CompilerServices.RuntimeHelpers.GetHashCode(x) } type PhysicalEqualityComparer<'T> private () = static let instanceOrNull = let t = typeof<'T> if not t.IsValueType then UnsafeReferenceEqualityComparer<'T> elif t.IsEnum || typeof>.IsAssignableFrom(t) then EqualityComparer<'T>.Default else null static member InstanceOrNull = instanceOrNull let mutable private staticMappingCounter = 0 let private createStaticMappingTypeBuilder<'TIn,'TOut>() = let name = "StaticMapping" + (string (Interlocked.Increment(&staticMappingCounter))) let tb = createTypeBuilder name (TypeAttributes.Public ||| TypeAttributes.Sealed ||| TypeAttributes.Class) typeof> null let mb = tb.DefineMethod("Invoke", MethodAttributes.Public ||| MethodAttributes.HideBySig ||| MethodAttributes.Virtual, CallingConventions.HasThis, typeof<'TOut>, [|typeof<'TIn>|]) tb, mb.GetILGenerator() let createStaticMappingAssertException() = System.Exception("An internal assert check in FParsec.StaticMapping failed. Please report this error to fparsec@quanttec.com. (The Data member of the exception object contains the information needed to reproduce the error.)") let internal defaultMappingLengthCap = 32 let internal defaultMappingDensityThreshold = 0.4 let internal defaultIndicatorLengthCap = 32*8 let internal defaultIndicatorDensityThreshold = 0.4/32. let internal createStaticIntIndicatorFunctionImpl<'TInt when 'TInt : struct> lengthCap densityThreshold minValue maxValue invert ranges : ('TInt -> bool) = if not (typeof<'TInt> = typeof || typeof<'TInt> = typeof) then failwith "Only char and int are supported as input types." let tb, ilg = createStaticMappingTypeBuilder<'TInt, bool>() let resultLocal = ilg.DeclareLocal(typeof) emitSetMembershipTest ilg (fun ilg -> ilg.Emit(OpCodes.Ldarg_1)) // loads var (fun ilg -> ilg.Emit(OpCodes.Stloc, resultLocal)) // stores result (TempLocals(ilg)) lengthCap densityThreshold minValue maxValue invert ranges ilg.Emit(OpCodes.Ldloc, resultLocal) ilg.Emit(OpCodes.Ret) let t = tb.CreateType() let indicator = FormatterServices.GetUninitializedObject(t) :?> ('TInt -> bool) #if DEBUG_STATIC_MAPPING // saveEmitAssembly "FParsec.Emitted.dll" let raiseException key : unit = let e = createStaticMappingAssertException() e.Data["Argument"] <- key e.Data["IsInverted"] <- invert e.Data["Ranges"] <- ranges raise e let findKeyinRanges = (if typeof<'TInt> = typeof then (box (fun (key: char) -> findInSortedNonOverlappingRanges ranges (int key))) else (box (findInSortedNonOverlappingRanges ranges)) ) :?> ('TInt -> int) fun key -> let b1 = indicator key let b2_ = findKeyinRanges key >= 0 let b2 = if invert then not b2_ else b2_ if b1 <> b2 then raiseException key b1 #else indicator #endif let createStaticCharIndicatorFunction invert (charsInSet: seq) = let ranges = collectSortAndMergeRanges (charsInSet |> Seq.map (fun c -> int c)) createStaticIntIndicatorFunctionImpl defaultIndicatorLengthCap defaultIndicatorDensityThreshold 0 0xffff invert ranges let createStaticCharRangeIndicatorFunction invert (rangesInSet: seq) = let ranges = sortAndMergeRanges true (Array.ofSeq rangesInSet) if ranges.Length <> 0 && ranges[0].Min < 0 || ranges[ranges.Length - 1].Max > 0xffff then invalidArg "charRanges" "A range contains values outside the range of valid UTF-16 char values (0 - 0xffff)." createStaticIntIndicatorFunctionImpl defaultIndicatorLengthCap defaultIndicatorDensityThreshold 0 0xffff invert ranges let createStaticIntIndicatorFunction invert (valuesInSet: seq) = let ranges = collectSortAndMergeRanges valuesInSet createStaticIntIndicatorFunctionImpl defaultIndicatorLengthCap defaultIndicatorDensityThreshold System.Int32.MinValue System.Int32.MaxValue invert ranges let createStaticIntRangeIndicatorFunction invert (rangesInSet: seq) = let ranges = sortAndMergeRanges true (Array.ofSeq rangesInSet) createStaticIntIndicatorFunctionImpl defaultIndicatorLengthCap defaultIndicatorDensityThreshold System.Int32.MinValue System.Int32.MaxValue invert ranges let internal createStaticIntMappingImpl lengthCap densityThreshold minKey maxKey (defaultValue: 'T) (ranges: Range[]) (values: 'T[]) : (int -> 'T) = assert (ranges.Length = values.Length) if ranges.Length = 0 then fun _ -> defaultValue else let physicalEqualityComparer = PhysicalEqualityComparer<'T>.InstanceOrNull let T = typeof<'T> if T = typeof then let values = box values :?> bool[] let defaultValue = box defaultValue :?> bool box (createStaticIntIndicatorFunctionImpl (lengthCap*(defaultIndicatorLengthCap/defaultMappingLengthCap)) (densityThreshold*(defaultIndicatorDensityThreshold/defaultMappingDensityThreshold)) minKey maxKey defaultValue ranges) :?> (int -> 'T) else let tb, ilg = createStaticMappingTypeBuilder() let isPrimitive = T.IsPrimitive || T.IsEnum let loadConstant = if isPrimitive then createLoaderForPrimitiveConstants ilg else Unchecked.defaultof<_> let resultOrIndexLocal = ilg.DeclareLocal(if isPrimitive then T else typeof) let defaultLabel = ilg.DefineLabel() let returnLabel = ilg.DefineLabel() let labels = Array.zeroCreate ranges.Length let mutable needToEmit = null let mutable needToEmitCount = 0 let physicalEqualityComparer = PhysicalEqualityComparer<'T>.InstanceOrNull if isNull physicalEqualityComparer then for i = 0 to labels.Length - 1 do labels[i] <- ilg.DefineLabel() else // we don't need to emit multiple case handlers for identical values needToEmit <- Array.zeroCreate values.Length let valueLabels = Dictionary<'T,Label>(values.Length, physicalEqualityComparer) for i = 0 to values.Length - 1 do let value = values[i] let mutable label = Unchecked.defaultof<_> if not (valueLabels.TryGetValue(value, &label)) then needToEmit[i] <- true label <- ilg.DefineLabel() valueLabels.Add(value, label) labels[i] <- label needToEmitCount <- valueLabels.Count if needToEmitCount = values.Length then needToEmit <- null emitSwitch ilg (fun ilg -> ilg.Emit(OpCodes.Ldarg_1)) // loads key (TempLocals(ilg)) lengthCap densityThreshold minKey maxKey defaultLabel ranges labels let returnedValues = if isPrimitive || isNull needToEmit then null else Array.zeroCreate needToEmitCount let mutable returnedValuesCount = 0 for i = 0 to labels.Length - 1 do if isNull needToEmit || needToEmit[i] then ilg.MarkLabel(labels[i]) if isPrimitive then loadConstant (values[i]) else if isNotNull returnedValues then returnedValues[returnedValuesCount] <- values[i] loadI4 ilg returnedValuesCount returnedValuesCount <- returnedValuesCount + 1 ilg.Emit(OpCodes.Stloc, resultOrIndexLocal) ilg.Emit(OpCodes.Br, returnLabel) // return default value let defaultValueIsNull = not T.IsValueType && isNull (box defaultValue) ilg.MarkLabel(defaultLabel) if isPrimitive then loadConstant defaultValue ilg.Emit(OpCodes.Stloc, resultOrIndexLocal) else if defaultValueIsNull then ilg.Emit(OpCodes.Ldnull) else ilg.Emit(OpCodes.Ldarg_0) ilg.Emit(OpCodes.Ldfld, tb.DefineField("DefaultValue", T, FieldAttributes.Public)) ilg.Emit(OpCodes.Ret) // return result ilg.MarkLabel(returnLabel) if isPrimitive then ilg.Emit(OpCodes.Ldloc, resultOrIndexLocal) else // We could store all the values in individual fields to avoid the bounds check // and indirect load, but that probably wouldn't be worth the additional // code generation (and garbage collection?) costs (except for tiny mappings). ilg.Emit(OpCodes.Ldarg_0) ilg.Emit(OpCodes.Ldfld, tb.DefineField("Values", values.GetType(), FieldAttributes.Public)) ilg.Emit(OpCodes.Ldloc, resultOrIndexLocal) ilg.Emit(OpCodes.Ldelem, T) ilg.Emit(OpCodes.Ret) let t = tb.CreateType() let mapping = FormatterServices.GetUninitializedObject(t) :?> (int -> 'T) if not isPrimitive then // we can't use the previously used Fieldbuilders here, because SetValue is not implemented in FieldBuilders if not defaultValueIsNull then t.GetField("DefaultValue").SetValue(mapping, defaultValue) t.GetField("Values").SetValue(mapping, if isNotNull returnedValues then returnedValues else values) #if DEBUG_STATIC_MAPPING //saveEmitAssembly "FParsec.Emitted.dll" if isNull physicalEqualityComparer then mapping else let raiseException key : unit = let e = createStaticMappingAssertException() e.Data["Argument"] <- key e.Data["Ranges"] <- ranges e.Data["Values"] <- values e.Data["DefaultValue"] <- defaultValue raise e fun key -> let value = mapping key let index = findInSortedNonOverlappingRanges ranges key if index >= 0 then if not (physicalEqualityComparer.Equals(value, values[index])) then raiseException key else if not (physicalEqualityComparer.Equals(value, defaultValue)) then raiseException key value #else mapping #endif let internal filterOutDefaultValueRanges (comparer: EqualityComparer<_>) (ranges: Range[]) (values: _[]) defaultValue = if isNull comparer then ranges, values else let mutable n = 0 for v in values do if comparer.Equals(v, defaultValue) then n <- n + 1 if n = 0 then ranges, values else let N = values.Length - n let newRanges, newValues = Array.zeroCreate N, Array.zeroCreate N let mutable j = 0 for i = 0 to values.Length - 1 do let v = values[i] if not (comparer.Equals(v, defaultValue)) then newValues[j] <- v newRanges[j] <- ranges[i] j <- j + 1 newRanges, newValues // we need to use #seq instead of seq here to prevent the F# compiler // from unnecessarily wrapping the returned function value let createStaticIntMapping (defaultValue: 'T) (keyValues: #seq) = let valueComparer = PhysicalEqualityComparer<'T>.InstanceOrNull let ranges, values = collectSortAndMergeKeyValueRanges valueComparer keyValues let ranges, values = filterOutDefaultValueRanges valueComparer ranges values defaultValue createStaticIntMappingImpl defaultMappingLengthCap defaultMappingDensityThreshold System.Int32.MinValue System.Int32.MaxValue defaultValue ranges values let createStaticIntRangeMapping (defaultValue: 'T) (keyValues: #seq) = let valueComparer = PhysicalEqualityComparer<'T>.InstanceOrNull let ranges, values = sortAndMergeKeyValueRanges valueComparer keyValues let ranges, values = filterOutDefaultValueRanges valueComparer ranges values defaultValue createStaticIntMappingImpl defaultMappingLengthCap defaultMappingDensityThreshold System.Int32.MinValue System.Int32.MaxValue defaultValue ranges values type private IntType = U2 | U4 | U8 [] type Subtree(stringIndex: int, index: int, count: int) = struct member t.StringIndex = stringIndex member t.Index = index member t.Count = count // must be greater 0 end type SubtreeEqualityComparer<'T>(stringValues: (string*'T)[], valueComparer: EqualityComparer<'T>) = inherit EqualityComparer() override t.Equals(subtree1: Subtree, subtree2: Subtree) = let aligned = subtree1.StringIndex%2 = subtree2.StringIndex%2 // our string comparison code assumes an identical 4-byte-alignment let count = subtree1.Count count = subtree2.Count && (let mutable i = 0 while uint32 i < uint32 count do let string1, value1 = stringValues[subtree1.Index + i] let string2, value2 = stringValues[subtree2.Index + i] let remaining = string1.Length - subtree1.StringIndex if remaining = string2.Length - subtree2.StringIndex && (aligned || remaining <= 1) && valueComparer.Equals(value1, value2) && System.String.CompareOrdinal(string1, subtree1.StringIndex, string2, subtree2.StringIndex, remaining) = 0 then i <- i + 1 else i <- System.Int32.MinValue // break i = count) override t.GetHashCode(subtree: Subtree) = subtree.Count ^^^ valueComparer.GetHashCode(snd stringValues[subtree.Index]) let createStaticStringMapping (defaultValue: 'T) (keyValues: #seq) : (string -> 'T) = let T = typeof<'T> let physicalEqualityComparer = PhysicalEqualityComparer<'T>.InstanceOrNull let kvs = Array.ofSeq keyValues System.Array.Sort(kvs, {new Comparer() with member t.Compare((k1, _), (k2, _)) = System.String.CompareOrdinal(k1, k2)}) let mutable previousKey = null for (key, _) in kvs do if isNull key then invalidArg "keyValues" "The string keys must not be null." if key = previousKey then invalidArg "keyValues" "The strings keys must be different." previousKey <- key match kvs.Length with | 0 -> fun str -> let throwIfStringIsNull = str.Length defaultValue | 1 -> let key, value = kvs[0] fun str -> let throwIfStringIsNull = str.Length if str = key then value else defaultValue | _ -> let mutable i0 = if fst kvs[0] = "" then 1 else 0 let getMinMaxLength iBegin iEnd = assert (iBegin < iEnd) let firstKey, _ = kvs[iBegin] let mutable minLength = firstKey.Length let mutable maxLength = minLength for i = iBegin + 1 to iEnd - 1 do let key, _ = kvs[i] let length = key.Length minLength <- min length minLength maxLength <- max length maxLength minLength, maxLength let minLength, maxLength = getMinMaxLength i0 kvs.Length let findIndexOfFirstCharAfterCommonPrefix startIndex iBegin iEnd minKeyLength = let rec loop index = if index = minKeyLength then index else let c = (fst kvs[iBegin])[index] let rec keysEqualAtX i = if i = iEnd then true elif (fst kvs[i])[index] <> c then false else keysEqualAtX (i + 1) if not (keysEqualAtX (iBegin + 1)) then index else loop (index + 1) loop startIndex let prefixLength = findIndexOfFirstCharAfterCommonPrefix 0 i0 kvs.Length minLength // sort by first char after common prefix, then by length, then lexicographical System.Array.Sort(kvs, {new Comparer() with member t.Compare((k1, _), (k2, _)) = if k1.Length > prefixLength && k2.Length > prefixLength then let d = int k1[prefixLength] - int k2[prefixLength] if d <> 0 then d else let d = k1.Length - k2.Length if d <> 0 then d else System.String.CompareOrdinal(k1, k2) else k1.Length - k2.Length}) let tb, ilg = createStaticMappingTypeBuilder() let isPrimitive = T.IsPrimitive || T.IsEnum let physicalEqualityComparer = PhysicalEqualityComparer<'T>.InstanceOrNull let loadConstant = if isPrimitive then createLoaderForPrimitiveConstants ilg else Unchecked.defaultof<_> let lengthLocal = ilg.DeclareLocal(typeof) let loadLength() = ilg.Emit(OpCodes.Ldloc, lengthLocal) let storeLength() = ilg.Emit(OpCodes.Stloc, lengthLocal) let charPointerLocal = ilg.DeclareLocal(typeof>) let loadPtr() = ilg.Emit(OpCodes.Ldloc, charPointerLocal) let storePtr() = ilg.Emit(OpCodes.Stloc, charPointerLocal) // Declaring the following local as int instead of char improves // code generation on the 64-bit JIT. let chLocal = ilg.DeclareLocal(typeof) let loadCh = fun (_: ILGenerator) -> ilg.Emit(OpCodes.Ldloc, chLocal) let storeCh() = ilg.Emit(OpCodes.Stloc, chLocal) let resultOrIndexLocal = ilg.DeclareLocal(if isPrimitive then T else typeof) let loadResult() = ilg.Emit(OpCodes.Ldloc, resultOrIndexLocal) let storeResult() = ilg.Emit(OpCodes.Stloc, resultOrIndexLocal) let stringLocal = ilg.DeclareLocal(typeof, true) // pinned string let storeString() = ilg.Emit(OpCodes.Stloc, stringLocal) // set up local variables ilg.Emit(OpCodes.Ldarg_1) // load string argument ilg.Emit(OpCodes.Dup) ilg.Emit(OpCodes.Dup) storeString() // pins string // accessing .Length triggers null reference exception if string is null ilg.EmitCall(OpCodes.Call, typeof.GetMethod("get_Length"), null) storeLength() ilg.Emit(OpCodes.Conv_I) ilg.EmitCall(OpCodes.Call, typeof.GetMethod("get_OffsetToStringData"), null) ilg.Emit(OpCodes.Add) storePtr() let defaultLabel = ilg.DefineLabel() let returnLabel = ilg.DefineLabel() // some helper functions let dereferenceAndIncrementPtr intType doIncrement = loadPtr() if doIncrement then ilg.Emit(OpCodes.Dup) loadI4 ilg (match intType with | U2 -> 1*sizeof | U4 -> 2*sizeof | U8 -> 4*sizeof) ilg.Emit(OpCodes.Add) storePtr() match intType with | U2 -> ilg.Emit(OpCodes.Ldind_U2) | U4 -> ilg.Emit(OpCodes.Ldind_U4) | U8 -> ilg.Emit(OpCodes.Ldind_I8) let incrementPtrByNumberOfChars i = loadPtr() loadI4 ilg (i*sizeof) ilg.Emit(OpCodes.Add) storePtr() let returnedValueIndices = if isPrimitive then null else ResizeArray<_>(kvs.Length) let returnValue i = if isPrimitive then loadConstant (snd kvs[i]) else loadI4 ilg (returnedValueIndices.Count) returnedValueIndices.Add(i) storeResult() ilg.Emit(OpCodes.Br, returnLabel) let mutable longKeyData = new ResizeArray<_>(), null, null, null /// Emit a call to FParsec.Buffer.Equal helper function to compare /// a long segment of the input string. let emitLongStringComparison dataIndex dataLength isFinal = let data, fieldBuilder, methodInfo, pinnedDataLocal = longKeyData let mutable f, m, pdl = fieldBuilder, methodInfo, pinnedDataLocal if isNull f then f <- tb.DefineField("longKeyData", typeof, FieldAttributes.Public) m <- typeof.GetMethod("Equals", [|typeof>; typeof>; typeof|]) pdl <- ilg.DeclareLocal(typeof, true) longKeyData <- (data, f, m, pdl) ilg.Emit(OpCodes.Ldarg_0) ilg.Emit(OpCodes.Ldfld, f) ilg.Emit(OpCodes.Dup) ilg.Emit(OpCodes.Stloc, pdl) // pin data array loadI4 ilg dataIndex ilg.Emit(OpCodes.Ldelema, typeof) ilg.Emit(OpCodes.Conv_I) loadPtr() if not isFinal then incrementPtrByNumberOfChars (dataLength*2) loadI4 ilg dataLength ilg.Emit(OpCodes.Call, m) ilg.Emit(OpCodes.Ldnull) ilg.Emit(OpCodes.Stloc, pdl) // unpin data array ilg.Emit(OpCodes.Brfalse, defaultLabel) let emitStringComparison (key: string) idx length isFinal = if length > 0 then let mutable idx, length = idx, length if idx%2 = 1 then // align ptr to 4-byte boundary // (this assumes that the first char in a string is aligned) dereferenceAndIncrementPtr U2 (not isFinal || length > 1) loadI4 ilg (int key[idx]) ilg.Emit(OpCodes.Bne_Un, defaultLabel) idx <- idx + 1 length <- length - 1 if length > sizeof*4 then // store string data into longStringData let data, _, _, _ = longKeyData let dataIndex = data.Count while length >= 2 do // if necessary we will swap the byte order of the whole data array // when we assign it to the longKeyData field let v = uint32 key[idx] ||| (uint32 key[idx + 1] <<< 16) data.Add(v) idx <- idx + 2 length <- length - 2 if isFinal && length = 1 then data.Add(uint32 key[idx]) length <- 0 // emit call to string comparison function emitLongStringComparison dataIndex (data.Count - dataIndex) isFinal else #if UNALIGNED_READS if sizeof = 8 then while length >= 4 || (isFinal && length = 3) do dereferenceAndIncrementPtr U8 (not isFinal || length > 4) let v = (uint64 key[idx] ) ||| (uint64 key[idx + 1] <<< 16) ||| (uint64 key[idx + 2] <<< 32) ||| (if length > 3 then uint64 key[idx + 3] <<< 48 else 0UL) let v = if System.BitConverter.IsLittleEndian then v else Buffer.SwapByteOrder(v) loadU8 ilg v ilg.Emit(OpCodes.Bne_Un, defaultLabel) idx <- idx + 4 length <- length - 4 #endif while length >= 2 || (isFinal && length = 1) do dereferenceAndIncrementPtr U4 (not isFinal || length > 2) let v = if length = 1 then int key[idx] else int key[idx] ||| (int key[idx + 1] <<< 16) let v = if System.BitConverter.IsLittleEndian then v else int (Buffer.SwapByteOrder(uint32 v)) loadI4 ilg v ilg.Emit(OpCodes.Bne_Un, defaultLabel) idx <- idx + 2 length <- length - 2 if length > 0 then Debug.Assert(not isFinal) dereferenceAndIncrementPtr U2 true loadI4 ilg (int key[idx]) ilg.Emit(OpCodes.Bne_Un, defaultLabel) let subtreeLabels = if isNull physicalEqualityComparer then null else System.Collections.Generic.Dictionary(SubtreeEqualityComparer<'T>(kvs, physicalEqualityComparer)) // Partitions the key pairs iBegin..(iEnd - 1) into branches with identical "branch-key". // Returns [|iBegin, i2, ..., iN, iEnd], [|fst kvs[iBegin], fst kvs[i2], ..., fst kvs[iN]|] // where iBegin .. indexN are the indices where the branches start. let getBranchIndicesAndKeys (iBegin: int) iEnd getBranchKey = let mutable n = 0 let indices, keys = new ResizeArray(iEnd - iBegin), new ResizeArray(iEnd - iBegin) indices.Add(iBegin) let mutable prevKey : int = getBranchKey (fst kvs[iBegin]) keys.Add(prevKey) for i = iBegin + 1 to iEnd - 1 do let key = getBranchKey (fst kvs[i]) if key <> prevKey then prevKey <- key indices.Add(i) keys.Add(key) indices.Add(iEnd) // the indices array has one element more indices.ToArray(), keys.ToArray() // Returns labels for the subtrees given by the branchIndices and the subtreeStringIndex, // and an array with bools indicating whether the respective label was newly created. // If the dictionary already contains a label for an equivalent subtree, that label is returned; // otherwise, a new label is created. let getBranchLabels (subtreeLabels: Dictionary) subtreeStringIndex (branchIndices: int[]) = assert (branchIndices.Length >= 2 && branchIndices[0] < branchIndices[1]) let n = branchIndices.Length - 1 let isNewLabel = Array.zeroCreate n let labels = Array.zeroCreate n if isNull subtreeLabels then for i = 0 to n - 1 do isNewLabel[i] <- true labels[i] <- ilg.DefineLabel() else let mutable iBegin = branchIndices[0] for j = 1 to branchIndices.Length - 1 do let iEnd = branchIndices[j] let subtree = Subtree(subtreeStringIndex, iBegin, iEnd - iBegin) iBegin <- iEnd let b = j - 1 let mutable label = Unchecked.defaultof<_> if subtreeLabels.TryGetValue(subtree, &label) then labels[b] <- label else isNewLabel[b] <- true let label = ilg.DefineLabel() labels[b] <- label subtreeLabels.Add(subtree, label) labels, isNewLabel let tempLocals = new TempLocals(ilg) // Assumes keys in iBegin..(iEnd - 1) are sorted by the branch-key returned by getBranchKey. let switch getBranchKey loadVar minVarValue maxVarValue subtreeLabels iBegin iEnd subtreeStringIndex emitBranchIter = let branchIndices, branchKeys = getBranchIndicesAndKeys iBegin iEnd getBranchKey let branchLabels, isNewLabel = getBranchLabels subtreeLabels subtreeStringIndex branchIndices let switchRanges, switchLabels = mergeSortedKeyLabelRanges branchKeys branchLabels emitSwitch ilg loadVar tempLocals defaultMappingLengthCap defaultMappingDensityThreshold minVarValue maxVarValue defaultLabel switchRanges switchLabels for i = 0 to isNewLabel.Length - 1 do if isNewLabel[i] then ilg.MarkLabel(branchLabels[i]) emitBranchIter branchIndices[i] branchIndices[i + 1] let subtreeEqualityComparer = if isNull physicalEqualityComparer then Unchecked.defaultof<_> else SubtreeEqualityComparer<'T>(kvs, physicalEqualityComparer) let subtreeLabels = if isNull physicalEqualityComparer then null else Dictionary(subtreeEqualityComparer) let rec emitSubtree length idx iBegin iEnd = assert ( iBegin < iEnd && kvs[iBegin..(iEnd - 1)] |> Array.map (fun (k,_) -> k.Length) |> Array.forall ((=) length)) let idx1 = findIndexOfFirstCharAfterCommonPrefix idx iBegin iEnd length if idx <> idx1 then emitStringComparison (fst kvs[iBegin]) idx (idx1 - idx) (idx1 = length) if idx1 = length then assert (iBegin + 1 = iEnd) returnValue iBegin else let mutable emit = true if idx <> idx1 && isNotNull subtreeLabels then let subtree = Subtree(idx1, iBegin, iEnd - iBegin) let mutable label = Unchecked.defaultof<_> if subtreeLabels.TryGetValue(subtree, &label) then // an equivalent subtree has already been handled elsewhere ilg.Emit(OpCodes.Br, label) // jump to that code emit <- false else let label = ilg.DefineLabel() ilg.MarkLabel(label) subtreeLabels.Add(subtree, label) if emit then dereferenceAndIncrementPtr U2 (idx1 + 1 < length) storeCh() switch (fun str -> int str[idx1]) loadCh 0 0xffff (if idx1 + 1 < length || isNull subtreeLabels then subtreeLabels // we want to keep the switch branches local else Dictionary(subtreeEqualityComparer)) // when they only contain a return statement iBegin iEnd (idx1 + 1) (emitSubtree length (idx1 + 1)) let emitMaxLengthSubtree stringIndex iBegin iEnd = loadLength() loadI4 ilg maxLength ilg.Emit(OpCodes.Bne_Un, defaultLabel) emitSubtree maxLength stringIndex iBegin iEnd Debug.Assert(i0 < kvs.Length) if i0 <> 0 then // first key is empty let label = ilg.DefineLabel() loadLength() ilg.Emit(OpCodes.Brtrue, label) returnValue 0 ilg.MarkLabel(label) if minLength = maxLength then emitMaxLengthSubtree 0 i0 kvs.Length else // at least two non-empty keys with different lengths let checkMinLength() = loadLength() loadI4 ilg minLength ilg.Emit(OpCodes.Blt, defaultLabel) if prefixLength <> 0 then checkMinLength() emitStringComparison (fst kvs[i0]) 0 prefixLength false if prefixLength = minLength then let label = ilg.DefineLabel() loadLength() loadI4 ilg minLength ilg.Emit(OpCodes.Bne_Un, label) returnValue i0 ilg.MarkLabel(label) i0 <- i0 + 1 else // prefixLength = 0 if i0 = 0 && (fst kvs[0])[0] = '\u0000' then // If a key contains a zero as the first char, we can't avoid // the following length check (which we otherwise don't need for // the switch because of the null termination of strings). checkMinLength() if prefixLength + 1 = maxLength then // prefixLength <> 0 emitMaxLengthSubtree prefixLength i0 kvs.Length else let topLevelTreeLabels = if isNull subtreeEqualityComparer then null else Dictionary(subtreeEqualityComparer) // switch over char after prefix dereferenceAndIncrementPtr U2 (prefixLength + 1 < maxLength) storeCh() switch (fun str -> int str[prefixLength]) loadCh 0 0xffff topLevelTreeLabels i0 kvs.Length (prefixLength + 1) (fun iBegin iEnd -> // switch over length switch (fun str -> str.Length) (fun ilg -> loadLength()) 0 System.Int32.MaxValue subtreeLabels iBegin iEnd (prefixLength + 1) (fun iBegin iEnd -> emitSubtree (fst kvs[iBegin]).Length (prefixLength + 1) iBegin iEnd)) // return default value let defaultValueIsNull = not T.IsValueType && isNull (box defaultValue) ilg.MarkLabel(defaultLabel) if isPrimitive then loadConstant defaultValue storeResult() else if defaultValueIsNull then ilg.Emit(OpCodes.Ldnull) else ilg.Emit(OpCodes.Ldarg_0) ilg.Emit(OpCodes.Ldfld, tb.DefineField("DefaultValue", T, FieldAttributes.Public)) ilg.Emit(OpCodes.Ret) // return result ilg.MarkLabel(returnLabel) if isPrimitive then loadResult() else // We could store all the values in individual fields to avoid the bounds check // and indirect load, but that probably wouldn't be worth the additional // code generation (and garbage collection?) costs (except for tiny mappings). ilg.Emit(OpCodes.Ldarg_0) ilg.Emit(OpCodes.Ldfld, tb.DefineField("Values", typeof<'T[]>, FieldAttributes.Public)) loadResult() ilg.Emit(OpCodes.Ldelem, T) ilg.Emit(OpCodes.Ret) // compile type let t = tb.CreateType() // instantiate type let mapping = FormatterServices.GetUninitializedObject(t) :?> (string -> 'T) if not isPrimitive then // we can't use the previously used Fieldbuilders here, because SetValue is not implemented in FieldBuilders if not defaultValueIsNull then t.GetField("DefaultValue").SetValue(mapping, defaultValue) let values = Array.zeroCreate returnedValueIndices.Count let mutable j = 0 for i in returnedValueIndices do values[j] <- snd kvs[i] j <- j + 1 t.GetField("Values").SetValue(mapping, values) let data, _, _, _ = longKeyData if data.Count <> 0 then let dataArray = data.ToArray() if not (System.BitConverter.IsLittleEndian) then FParsec.Buffer.SwapByteOrder(dataArray) t.GetField("longKeyData").SetValue(mapping, dataArray) #if DEBUG_STATIC_MAPPING // saveEmitAssembly "FParsec.Emitted.dll" if isNull physicalEqualityComparer then mapping else let dict = new System.Collections.Generic.Dictionary(kvs.Length) for k, v in kvs do dict.Add(k, v) let errorHandler (key: string) : unit = let e = new System.Exception("An internal assert check in FParsec.StaticMapping.createStringMapping failed. Please report this error to fparsec@quanttec.com. (The Data member of the exception object contains the information needed to reproduce the error.)") e.Data["Argument"] <- key e.Data["KeysValues"] <- dict e.Data["DefaultValue"] <- defaultValue raise e fun key -> let mutable value = Unchecked.defaultof<_> if not (dict.TryGetValue(key, &value)) then value <- defaultValue let value2 = mapping key if not (physicalEqualityComparer.Equals(value, value2)) then errorHandler key value #else mapping #endif #endif ================================================ FILE: FParsec/StaticMapping.fsi ================================================ // Copyright (c) Stephan Tolksdorf 2010-2011 // License: Simplified BSD License. See accompanying documentation. module FParsec.StaticMapping #if !LOW_TRUST /// `createStaticCharIndicatorFunction invert charsInSet` /// creates an optimized indicator function for the chars specified by the `charsInSet` sequence. /// If `invert` is `false` (`true`), the returned indicator function will return `true` (`false`) /// if and only if it is called with a char contained in `charsInSet`. val createStaticCharIndicatorFunction: invert: bool -> charsInSet: seq -> (char -> bool) /// `createStaticCharRangeIndicatorFunction invert rangesInSet` /// creates an optimized indicator function for the chars in the ranges specified by the `rangesInSet` sequence. /// If `invert` is `false` (`true`), the returned indicator function will return `true` (`false`) if and only if it is /// called with a char contained in at least one of the ranges of `rangesInSet`. val createStaticCharRangeIndicatorFunction: invert: bool -> rangesInSet: seq -> (char -> bool) /// `createStaticIntIndicatorFunction invert valuesInSet` /// creates an optimized indicator function for the integers specified by the `valuesInSet` sequence. /// If `invert` is `false` (`true`), the returned indicator function will return `true` (`false`) if and only if it is /// called with an integer contained in `valuesInSet`. val createStaticIntIndicatorFunction: invert: bool -> valuesInSet: seq -> (int -> bool) /// `createStaticIntRangeIndicatorFunction invert rangesInSet` /// creates an optimized indicator function for the integers in the ranges specified by the `rangesInSet` sequence. /// If `invert` is `false` (`true`), the returned indicator function will return `true` (`false`) if and only if it is /// called with an `int` contained in at least one of the ranges of `rangesInSet`. val createStaticIntRangeIndicatorFunction: invert: bool -> rangesInSet: seq -> (int -> bool) /// `createStaticIntMapping defaultValue keyValues` /// creates an optimized mapping function that maps integer keys to values. /// The `keyValues` sequence specifies the key-value pairs for the mapping. /// All keys not specified in `keyValues` are mapped to `defaultValue`. val createStaticIntMapping: defaultValue: 'T -> keyValues: #seq -> (int -> 'T) /// `createStaticIntRangeMapping defaultValue keyValues` /// creates an optimized mapping function that maps integer key ranges to values. /// The `keyValues` sequence specifies the range-value pairs for the mapping. /// All keys not contained in one of the ranges in `keyValues` are mapped to `defaultValue`. val createStaticIntRangeMapping: defaultValue: 'T -> keyValues: #seq -> (int -> 'T) /// `createStaticStringMapping defaultValue keyValues` /// creates an optimized mapping function that maps string keys to values. /// The `keyValues` sequence specifies the key-value pairs for the mapping. /// All keys not specified in `keyValues` are mapped to `defaultValue`. A `null` key is not supported. val createStaticStringMapping: defaultValue: 'T -> keyValues: #seq -> (string -> 'T) val internal filterOutDefaultValueRanges: comparer: System.Collections.Generic.EqualityComparer<'T> -> ranges: Range[] -> values: 'T[] -> defaultValue: 'T -> Range[]*'T[] val internal createStaticIntIndicatorFunctionImpl<'TInt when 'TInt : struct> : lengthCap: int -> densityThreshold: double -> minValue: int -> maxValue: int -> invert: bool -> ranges: Range[] -> ('TInt -> bool) val internal createStaticIntMappingImpl: lengthCap: int -> densityThreshold: double -> minKey: int -> maxKey: int -> defaultValue: 'T -> ranges: Range[] -> values: 'T[] -> (int -> 'T) #endif ================================================ FILE: FParsec-LowTrust.sln ================================================ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 16 VisualStudioVersion = 16.0.28621.142 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FParsecCS", "FParsecCS\FParsecCS-LowTrust.csproj", "{8521556A-F853-4456-8D20-96C42F97E15A}" EndProject Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "FParsec", "FParsec\FParsec-LowTrust.fsproj", "{019F9A66-F105-43C7-841D-E4D312659B61}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Samples", "Samples", "{01D1CDB5-2645-4929-865F-79B755DBC5B8}" EndProject Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "Calculator", "Samples\Calculator\Calculator-LowTrust.fsproj", "{A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "FSharpParsingSample", "FSharpParsingSample", "{20AE5602-B9B6-434D-A41D-CB988AC49E79}" EndProject Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "InterpFParsec", "Samples\FSharpParsingSample\FParsecVersion\InterpFParsec-LowTrust.fsproj", "{B56A16AD-5BFE-4D99-932C-9073CAFF3D80}" EndProject Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "JsonParser", "Samples\JSON\JsonParser-LowTrust.fsproj", "{3889AFB4-60BC-46CB-9747-4BD2F413B351}" EndProject Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "PegParser", "Samples\PEG\PegParser-LowTrust.fsproj", "{2E8F33E4-77F0-4954-9486-239D7124EB86}" EndProject Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "Tutorial", "Samples\Tutorial\Tutorial-LowTrust.fsproj", "{CEDA985E-30D3-400E-9869-4A22D5F9ADA5}" EndProject Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "Test", "Test\Test-LowTrust.fsproj", "{5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Build", "Build", "{1C2DDDBD-BF95-4F55-8651-6EAD5D984BD9}" ProjectSection(SolutionItems) = preProject .travis.yml = .travis.yml appveyor.yml = appveyor.yml Directory.Build.props = Directory.Build.props Build\FParsec.Common.targets = Build\FParsec.Common.targets global.json = global.json pack.ps1 = pack.ps1 readme.md = readme.md EndProjectSection EndProject Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "InterpLexYacc", "Samples\FSharpParsingSample\LexYaccVersion\InterpLexYacc.fsproj", "{C0616007-EAC1-4648-9124-727B4539EEB4}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug-LowTrust|AnyCPU = Debug-LowTrust|AnyCPU Debug-LowTrust|x64 = Debug-LowTrust|x64 Debug-LowTrust|x86 = Debug-LowTrust|x86 Release-LowTrust|AnyCPU = Release-LowTrust|AnyCPU Release-LowTrust|x64 = Release-LowTrust|x64 Release-LowTrust|x86 = Release-LowTrust|x86 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {8521556A-F853-4456-8D20-96C42F97E15A}.Debug-LowTrust|AnyCPU.ActiveCfg = Debug-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Debug-LowTrust|AnyCPU.Build.0 = Debug-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Debug-LowTrust|x64.ActiveCfg = Debug-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Debug-LowTrust|x64.Build.0 = Debug-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Debug-LowTrust|x86.ActiveCfg = Debug-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Debug-LowTrust|x86.Build.0 = Debug-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Release-LowTrust|AnyCPU.ActiveCfg = Release-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Release-LowTrust|AnyCPU.Build.0 = Release-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Release-LowTrust|x64.ActiveCfg = Release-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Release-LowTrust|x64.Build.0 = Release-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Release-LowTrust|x86.ActiveCfg = Release-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Release-LowTrust|x86.Build.0 = Release-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Debug-LowTrust|AnyCPU.ActiveCfg = Debug-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Debug-LowTrust|AnyCPU.Build.0 = Debug-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Debug-LowTrust|x64.ActiveCfg = Debug-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Debug-LowTrust|x64.Build.0 = Debug-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Debug-LowTrust|x86.ActiveCfg = Debug-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Debug-LowTrust|x86.Build.0 = Debug-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Release-LowTrust|AnyCPU.ActiveCfg = Release-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Release-LowTrust|AnyCPU.Build.0 = Release-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Release-LowTrust|x64.ActiveCfg = Release-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Release-LowTrust|x64.Build.0 = Release-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Release-LowTrust|x86.ActiveCfg = Release-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Release-LowTrust|x86.Build.0 = Release-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Debug-LowTrust|AnyCPU.ActiveCfg = Debug-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Debug-LowTrust|AnyCPU.Build.0 = Debug-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Debug-LowTrust|x64.ActiveCfg = Debug-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Debug-LowTrust|x64.Build.0 = Debug-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Debug-LowTrust|x86.ActiveCfg = Debug-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Debug-LowTrust|x86.Build.0 = Debug-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Release-LowTrust|AnyCPU.ActiveCfg = Release-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Release-LowTrust|AnyCPU.Build.0 = Release-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Release-LowTrust|x64.ActiveCfg = Release-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Release-LowTrust|x64.Build.0 = Release-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Release-LowTrust|x86.ActiveCfg = Release-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Release-LowTrust|x86.Build.0 = Release-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Debug-LowTrust|AnyCPU.ActiveCfg = Debug-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Debug-LowTrust|AnyCPU.Build.0 = Debug-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Debug-LowTrust|x64.ActiveCfg = Debug-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Debug-LowTrust|x64.Build.0 = Debug-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Debug-LowTrust|x86.ActiveCfg = Debug-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Debug-LowTrust|x86.Build.0 = Debug-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Release-LowTrust|AnyCPU.ActiveCfg = Release-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Release-LowTrust|AnyCPU.Build.0 = Release-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Release-LowTrust|x64.ActiveCfg = Release-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Release-LowTrust|x64.Build.0 = Release-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Release-LowTrust|x86.ActiveCfg = Release-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Release-LowTrust|x86.Build.0 = Release-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Debug-LowTrust|AnyCPU.ActiveCfg = Debug-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Debug-LowTrust|AnyCPU.Build.0 = Debug-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Debug-LowTrust|x64.ActiveCfg = Debug-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Debug-LowTrust|x64.Build.0 = Debug-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Debug-LowTrust|x86.ActiveCfg = Debug-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Debug-LowTrust|x86.Build.0 = Debug-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Release-LowTrust|AnyCPU.ActiveCfg = Release-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Release-LowTrust|AnyCPU.Build.0 = Release-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Release-LowTrust|x64.ActiveCfg = Release-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Release-LowTrust|x64.Build.0 = Release-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Release-LowTrust|x86.ActiveCfg = Release-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Release-LowTrust|x86.Build.0 = Release-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Debug-LowTrust|AnyCPU.ActiveCfg = Debug-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Debug-LowTrust|AnyCPU.Build.0 = Debug-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Debug-LowTrust|x64.ActiveCfg = Debug-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Debug-LowTrust|x64.Build.0 = Debug-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Debug-LowTrust|x86.ActiveCfg = Debug-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Debug-LowTrust|x86.Build.0 = Debug-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Release-LowTrust|AnyCPU.ActiveCfg = Release-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Release-LowTrust|AnyCPU.Build.0 = Release-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Release-LowTrust|x64.ActiveCfg = Release-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Release-LowTrust|x64.Build.0 = Release-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Release-LowTrust|x86.ActiveCfg = Release-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Release-LowTrust|x86.Build.0 = Release-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Debug-LowTrust|AnyCPU.ActiveCfg = Debug-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Debug-LowTrust|AnyCPU.Build.0 = Debug-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Debug-LowTrust|x64.ActiveCfg = Debug-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Debug-LowTrust|x64.Build.0 = Debug-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Debug-LowTrust|x86.ActiveCfg = Debug-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Debug-LowTrust|x86.Build.0 = Debug-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Release-LowTrust|AnyCPU.ActiveCfg = Release-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Release-LowTrust|AnyCPU.Build.0 = Release-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Release-LowTrust|x64.ActiveCfg = Release-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Release-LowTrust|x64.Build.0 = Release-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Release-LowTrust|x86.ActiveCfg = Release-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Release-LowTrust|x86.Build.0 = Release-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Debug-LowTrust|AnyCPU.ActiveCfg = Debug-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Debug-LowTrust|AnyCPU.Build.0 = Debug-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Debug-LowTrust|x64.ActiveCfg = Debug-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Debug-LowTrust|x64.Build.0 = Debug-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Debug-LowTrust|x86.ActiveCfg = Debug-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Debug-LowTrust|x86.Build.0 = Debug-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Release-LowTrust|AnyCPU.ActiveCfg = Release-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Release-LowTrust|AnyCPU.Build.0 = Release-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Release-LowTrust|x64.ActiveCfg = Release-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Release-LowTrust|x64.Build.0 = Release-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Release-LowTrust|x86.ActiveCfg = Release-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Release-LowTrust|x86.Build.0 = Release-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Debug-LowTrust|AnyCPU.ActiveCfg = Debug-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Debug-LowTrust|AnyCPU.Build.0 = Debug-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Debug-LowTrust|x64.ActiveCfg = Debug-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Debug-LowTrust|x64.Build.0 = Debug-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Debug-LowTrust|x86.ActiveCfg = Debug-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Debug-LowTrust|x86.Build.0 = Debug-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Release-LowTrust|AnyCPU.ActiveCfg = Release-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Release-LowTrust|AnyCPU.Build.0 = Release-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Release-LowTrust|x64.ActiveCfg = Release-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Release-LowTrust|x64.Build.0 = Release-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Release-LowTrust|x86.ActiveCfg = Release-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Release-LowTrust|x86.Build.0 = Release-LowTrust|AnyCPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection GlobalSection(NestedProjects) = preSolution {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D} = {01D1CDB5-2645-4929-865F-79B755DBC5B8} {20AE5602-B9B6-434D-A41D-CB988AC49E79} = {01D1CDB5-2645-4929-865F-79B755DBC5B8} {B56A16AD-5BFE-4D99-932C-9073CAFF3D80} = {20AE5602-B9B6-434D-A41D-CB988AC49E79} {3889AFB4-60BC-46CB-9747-4BD2F413B351} = {01D1CDB5-2645-4929-865F-79B755DBC5B8} {2E8F33E4-77F0-4954-9486-239D7124EB86} = {01D1CDB5-2645-4929-865F-79B755DBC5B8} {CEDA985E-30D3-400E-9869-4A22D5F9ADA5} = {01D1CDB5-2645-4929-865F-79B755DBC5B8} {C0616007-EAC1-4648-9124-727B4539EEB4} = {20AE5602-B9B6-434D-A41D-CB988AC49E79} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {EA9DAE45-7810-49BB-BDDA-C57E105B79BC} EndGlobalSection EndGlobal ================================================ FILE: FParsec.sln ================================================ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 16 VisualStudioVersion = 16.0.28621.142 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FParsecCS", "FParsecCS\FParsecCS.csproj", "{8521556A-F853-4456-8D20-96C42F97E15A}" EndProject Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "FParsec", "FParsec\FParsec.fsproj", "{019F9A66-F105-43C7-841D-E4D312659B61}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Samples", "Samples", "{01D1CDB5-2645-4929-865F-79B755DBC5B8}" EndProject Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Calculator", "Samples\Calculator\Calculator.fsproj", "{A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "FSharpParsingSample", "FSharpParsingSample", "{20AE5602-B9B6-434D-A41D-CB988AC49E79}" EndProject Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "InterpFParsec", "Samples\FSharpParsingSample\FParsecVersion\InterpFParsec.fsproj", "{B56A16AD-5BFE-4D99-932C-9073CAFF3D80}" EndProject Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "JsonParser", "Samples\JSON\JsonParser.fsproj", "{3889AFB4-60BC-46CB-9747-4BD2F413B351}" EndProject Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "PegParser", "Samples\PEG\PegParser.fsproj", "{2E8F33E4-77F0-4954-9486-239D7124EB86}" EndProject Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Tutorial", "Samples\Tutorial\Tutorial.fsproj", "{CEDA985E-30D3-400E-9869-4A22D5F9ADA5}" EndProject Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Test", "Test\Test.fsproj", "{5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Build", "Build", "{1C2DDDBD-BF95-4F55-8651-6EAD5D984BD9}" ProjectSection(SolutionItems) = preProject .travis.yml = .travis.yml appveyor.yml = appveyor.yml Directory.Build.props = Directory.Build.props Build\FParsec.Common.targets = Build\FParsec.Common.targets global.json = global.json pack.ps1 = pack.ps1 readme.md = readme.md EndProjectSection EndProject Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "InterpLexYacc", "Samples\FSharpParsingSample\LexYaccVersion\InterpLexYacc.fsproj", "{C0616007-EAC1-4648-9124-727B4539EEB4}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|AnyCPU = Debug|AnyCPU Debug|x64 = Debug|x64 Debug|x86 = Debug|x86 Debug-LowTrust|AnyCPU = Debug-LowTrust|AnyCPU Debug-LowTrust|x64 = Debug-LowTrust|x64 Debug-LowTrust|x86 = Debug-LowTrust|x86 Release|AnyCPU = Release|AnyCPU Release|x64 = Release|x64 Release|x86 = Release|x86 Release-LowTrust|AnyCPU = Release-LowTrust|AnyCPU Release-LowTrust|x64 = Release-LowTrust|x64 Release-LowTrust|x86 = Release-LowTrust|x86 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {8521556A-F853-4456-8D20-96C42F97E15A}.Debug|AnyCPU.ActiveCfg = Debug|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Debug|AnyCPU.Build.0 = Debug|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Debug|x64.ActiveCfg = Debug|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Debug|x64.Build.0 = Debug|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Debug|x86.ActiveCfg = Debug|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Debug|x86.Build.0 = Debug|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Debug-LowTrust|AnyCPU.ActiveCfg = Debug-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Debug-LowTrust|AnyCPU.Build.0 = Debug-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Debug-LowTrust|x64.ActiveCfg = Debug-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Debug-LowTrust|x64.Build.0 = Debug-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Debug-LowTrust|x86.ActiveCfg = Debug-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Debug-LowTrust|x86.Build.0 = Debug-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Release|AnyCPU.ActiveCfg = Release|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Release|AnyCPU.Build.0 = Release|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Release|x64.ActiveCfg = Release|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Release|x64.Build.0 = Release|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Release|x86.ActiveCfg = Release|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Release|x86.Build.0 = Release|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Release-LowTrust|AnyCPU.ActiveCfg = Release-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Release-LowTrust|AnyCPU.Build.0 = Release-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Release-LowTrust|x64.ActiveCfg = Release-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Release-LowTrust|x64.Build.0 = Release-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Release-LowTrust|x86.ActiveCfg = Release-LowTrust|AnyCPU {8521556A-F853-4456-8D20-96C42F97E15A}.Release-LowTrust|x86.Build.0 = Release-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Debug|AnyCPU.ActiveCfg = Debug|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Debug|AnyCPU.Build.0 = Debug|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Debug|x64.ActiveCfg = Debug|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Debug|x64.Build.0 = Debug|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Debug|x86.ActiveCfg = Debug|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Debug|x86.Build.0 = Debug|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Debug-LowTrust|AnyCPU.ActiveCfg = Debug-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Debug-LowTrust|AnyCPU.Build.0 = Debug-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Debug-LowTrust|x64.ActiveCfg = Debug-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Debug-LowTrust|x64.Build.0 = Debug-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Debug-LowTrust|x86.ActiveCfg = Debug-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Debug-LowTrust|x86.Build.0 = Debug-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Release|AnyCPU.ActiveCfg = Release|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Release|AnyCPU.Build.0 = Release|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Release|x64.ActiveCfg = Release|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Release|x64.Build.0 = Release|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Release|x86.ActiveCfg = Release|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Release|x86.Build.0 = Release|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Release-LowTrust|AnyCPU.ActiveCfg = Release-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Release-LowTrust|AnyCPU.Build.0 = Release-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Release-LowTrust|x64.ActiveCfg = Release-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Release-LowTrust|x64.Build.0 = Release-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Release-LowTrust|x86.ActiveCfg = Release-LowTrust|AnyCPU {019F9A66-F105-43C7-841D-E4D312659B61}.Release-LowTrust|x86.Build.0 = Release-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Debug|AnyCPU.ActiveCfg = Debug|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Debug|AnyCPU.Build.0 = Debug|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Debug|x64.ActiveCfg = Debug|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Debug|x64.Build.0 = Debug|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Debug|x86.ActiveCfg = Debug|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Debug|x86.Build.0 = Debug|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Debug-LowTrust|AnyCPU.ActiveCfg = Debug-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Debug-LowTrust|AnyCPU.Build.0 = Debug-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Debug-LowTrust|x64.ActiveCfg = Debug-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Debug-LowTrust|x64.Build.0 = Debug-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Debug-LowTrust|x86.ActiveCfg = Debug-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Debug-LowTrust|x86.Build.0 = Debug-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Release|AnyCPU.ActiveCfg = Release|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Release|AnyCPU.Build.0 = Release|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Release|x64.ActiveCfg = Release|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Release|x64.Build.0 = Release|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Release|x86.ActiveCfg = Release|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Release|x86.Build.0 = Release|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Release-LowTrust|AnyCPU.ActiveCfg = Release-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Release-LowTrust|AnyCPU.Build.0 = Release-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Release-LowTrust|x64.ActiveCfg = Release-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Release-LowTrust|x64.Build.0 = Release-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Release-LowTrust|x86.ActiveCfg = Release-LowTrust|AnyCPU {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D}.Release-LowTrust|x86.Build.0 = Release-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Debug|AnyCPU.ActiveCfg = Debug|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Debug|AnyCPU.Build.0 = Debug|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Debug|x64.ActiveCfg = Debug|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Debug|x64.Build.0 = Debug|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Debug|x86.ActiveCfg = Debug|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Debug|x86.Build.0 = Debug|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Debug-LowTrust|AnyCPU.ActiveCfg = Debug-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Debug-LowTrust|AnyCPU.Build.0 = Debug-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Debug-LowTrust|x64.ActiveCfg = Debug-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Debug-LowTrust|x64.Build.0 = Debug-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Debug-LowTrust|x86.ActiveCfg = Debug-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Debug-LowTrust|x86.Build.0 = Debug-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Release|AnyCPU.ActiveCfg = Release|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Release|AnyCPU.Build.0 = Release|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Release|x64.ActiveCfg = Release|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Release|x64.Build.0 = Release|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Release|x86.ActiveCfg = Release|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Release|x86.Build.0 = Release|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Release-LowTrust|AnyCPU.ActiveCfg = Release-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Release-LowTrust|AnyCPU.Build.0 = Release-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Release-LowTrust|x64.ActiveCfg = Release-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Release-LowTrust|x64.Build.0 = Release-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Release-LowTrust|x86.ActiveCfg = Release-LowTrust|AnyCPU {B56A16AD-5BFE-4D99-932C-9073CAFF3D80}.Release-LowTrust|x86.Build.0 = Release-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Debug|AnyCPU.ActiveCfg = Debug|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Debug|AnyCPU.Build.0 = Debug|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Debug|x64.ActiveCfg = Debug|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Debug|x64.Build.0 = Debug|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Debug|x86.ActiveCfg = Debug|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Debug|x86.Build.0 = Debug|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Debug-LowTrust|AnyCPU.ActiveCfg = Debug-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Debug-LowTrust|AnyCPU.Build.0 = Debug-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Debug-LowTrust|x64.ActiveCfg = Debug-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Debug-LowTrust|x64.Build.0 = Debug-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Debug-LowTrust|x86.ActiveCfg = Debug-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Debug-LowTrust|x86.Build.0 = Debug-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Release|AnyCPU.ActiveCfg = Release|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Release|AnyCPU.Build.0 = Release|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Release|x64.ActiveCfg = Release|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Release|x64.Build.0 = Release|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Release|x86.ActiveCfg = Release|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Release|x86.Build.0 = Release|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Release-LowTrust|AnyCPU.ActiveCfg = Release-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Release-LowTrust|AnyCPU.Build.0 = Release-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Release-LowTrust|x64.ActiveCfg = Release-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Release-LowTrust|x64.Build.0 = Release-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Release-LowTrust|x86.ActiveCfg = Release-LowTrust|AnyCPU {3889AFB4-60BC-46CB-9747-4BD2F413B351}.Release-LowTrust|x86.Build.0 = Release-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Debug|AnyCPU.ActiveCfg = Debug|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Debug|AnyCPU.Build.0 = Debug|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Debug|x64.ActiveCfg = Debug|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Debug|x64.Build.0 = Debug|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Debug|x86.ActiveCfg = Debug|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Debug|x86.Build.0 = Debug|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Debug-LowTrust|AnyCPU.ActiveCfg = Debug-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Debug-LowTrust|AnyCPU.Build.0 = Debug-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Debug-LowTrust|x64.ActiveCfg = Debug-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Debug-LowTrust|x64.Build.0 = Debug-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Debug-LowTrust|x86.ActiveCfg = Debug-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Debug-LowTrust|x86.Build.0 = Debug-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Release|AnyCPU.ActiveCfg = Release|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Release|AnyCPU.Build.0 = Release|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Release|x64.ActiveCfg = Release|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Release|x64.Build.0 = Release|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Release|x86.ActiveCfg = Release|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Release|x86.Build.0 = Release|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Release-LowTrust|AnyCPU.ActiveCfg = Release-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Release-LowTrust|AnyCPU.Build.0 = Release-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Release-LowTrust|x64.ActiveCfg = Release-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Release-LowTrust|x64.Build.0 = Release-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Release-LowTrust|x86.ActiveCfg = Release-LowTrust|AnyCPU {2E8F33E4-77F0-4954-9486-239D7124EB86}.Release-LowTrust|x86.Build.0 = Release-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Debug|AnyCPU.ActiveCfg = Debug|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Debug|AnyCPU.Build.0 = Debug|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Debug|x64.ActiveCfg = Debug|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Debug|x64.Build.0 = Debug|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Debug|x86.ActiveCfg = Debug|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Debug|x86.Build.0 = Debug|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Debug-LowTrust|AnyCPU.ActiveCfg = Debug-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Debug-LowTrust|AnyCPU.Build.0 = Debug-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Debug-LowTrust|x64.ActiveCfg = Debug-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Debug-LowTrust|x64.Build.0 = Debug-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Debug-LowTrust|x86.ActiveCfg = Debug-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Debug-LowTrust|x86.Build.0 = Debug-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Release|AnyCPU.ActiveCfg = Release|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Release|AnyCPU.Build.0 = Release|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Release|x64.ActiveCfg = Release|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Release|x64.Build.0 = Release|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Release|x86.ActiveCfg = Release|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Release|x86.Build.0 = Release|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Release-LowTrust|AnyCPU.ActiveCfg = Release-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Release-LowTrust|AnyCPU.Build.0 = Release-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Release-LowTrust|x64.ActiveCfg = Release-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Release-LowTrust|x64.Build.0 = Release-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Release-LowTrust|x86.ActiveCfg = Release-LowTrust|AnyCPU {CEDA985E-30D3-400E-9869-4A22D5F9ADA5}.Release-LowTrust|x86.Build.0 = Release-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Debug|AnyCPU.ActiveCfg = Debug|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Debug|AnyCPU.Build.0 = Debug|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Debug|x64.ActiveCfg = Debug|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Debug|x64.Build.0 = Debug|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Debug|x86.ActiveCfg = Debug|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Debug|x86.Build.0 = Debug|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Debug-LowTrust|AnyCPU.ActiveCfg = Debug-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Debug-LowTrust|AnyCPU.Build.0 = Debug-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Debug-LowTrust|x64.ActiveCfg = Debug-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Debug-LowTrust|x64.Build.0 = Debug-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Debug-LowTrust|x86.ActiveCfg = Debug-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Debug-LowTrust|x86.Build.0 = Debug-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Release|AnyCPU.ActiveCfg = Release|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Release|AnyCPU.Build.0 = Release|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Release|x64.ActiveCfg = Release|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Release|x64.Build.0 = Release|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Release|x86.ActiveCfg = Release|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Release|x86.Build.0 = Release|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Release-LowTrust|AnyCPU.ActiveCfg = Release-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Release-LowTrust|AnyCPU.Build.0 = Release-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Release-LowTrust|x64.ActiveCfg = Release-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Release-LowTrust|x64.Build.0 = Release-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Release-LowTrust|x86.ActiveCfg = Release-LowTrust|AnyCPU {5AE7C1E6-A511-41A9-9A9F-E8A0944319A2}.Release-LowTrust|x86.Build.0 = Release-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Debug|AnyCPU.ActiveCfg = Debug|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Debug|AnyCPU.Build.0 = Debug|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Debug|x64.ActiveCfg = Debug|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Debug|x64.Build.0 = Debug|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Debug|x86.ActiveCfg = Debug|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Debug|x86.Build.0 = Debug|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Debug-LowTrust|AnyCPU.ActiveCfg = Debug-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Debug-LowTrust|AnyCPU.Build.0 = Debug-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Debug-LowTrust|x64.ActiveCfg = Debug-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Debug-LowTrust|x64.Build.0 = Debug-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Debug-LowTrust|x86.ActiveCfg = Debug-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Debug-LowTrust|x86.Build.0 = Debug-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Release|AnyCPU.ActiveCfg = Release|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Release|AnyCPU.Build.0 = Release|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Release|x64.ActiveCfg = Release|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Release|x64.Build.0 = Release|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Release|x86.ActiveCfg = Release|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Release|x86.Build.0 = Release|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Release-LowTrust|AnyCPU.ActiveCfg = Release-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Release-LowTrust|AnyCPU.Build.0 = Release-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Release-LowTrust|x64.ActiveCfg = Release-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Release-LowTrust|x64.Build.0 = Release-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Release-LowTrust|x86.ActiveCfg = Release-LowTrust|AnyCPU {C0616007-EAC1-4648-9124-727B4539EEB4}.Release-LowTrust|x86.Build.0 = Release-LowTrust|AnyCPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection GlobalSection(NestedProjects) = preSolution {A9B15BCE-C37B-4BA6-BC72-6E8A438F205D} = {01D1CDB5-2645-4929-865F-79B755DBC5B8} {20AE5602-B9B6-434D-A41D-CB988AC49E79} = {01D1CDB5-2645-4929-865F-79B755DBC5B8} {B56A16AD-5BFE-4D99-932C-9073CAFF3D80} = {20AE5602-B9B6-434D-A41D-CB988AC49E79} {3889AFB4-60BC-46CB-9747-4BD2F413B351} = {01D1CDB5-2645-4929-865F-79B755DBC5B8} {2E8F33E4-77F0-4954-9486-239D7124EB86} = {01D1CDB5-2645-4929-865F-79B755DBC5B8} {CEDA985E-30D3-400E-9869-4A22D5F9ADA5} = {01D1CDB5-2645-4929-865F-79B755DBC5B8} {C0616007-EAC1-4648-9124-727B4539EEB4} = {20AE5602-B9B6-434D-A41D-CB988AC49E79} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {EA9DAE45-7810-49BB-BDDA-C57E105B79BC} EndGlobalSection EndGlobal ================================================ FILE: FParsecCS/Buffer.cs ================================================ // Copyright (c) Stephan Tolksdorf 2007-2010 // License: Simplified BSD License. See accompanying documentation. using System; using System.Buffers.Binary; using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; namespace FParsec { public static class Buffer { #if !LOW_TRUST /// Calculates: end - begin.
/// Precondition: 2^31 > end - begin >= 0.
internal static unsafe uint PositiveDistance(char* begin, char* end) { return (uint)((byte*)end - (byte*)begin)/2; } /// Calculates: end - begin.
/// Precondition: end - begin >= 0.
internal static unsafe long PositiveDistance64(char* begin, char* end) { return (long)((ulong)((byte*)end - (byte*)begin)/2); } /// Copies size bytes from src to dst. Correctly handles overlapped memory blocks. static internal unsafe void Copy(byte* dst, byte* src, int size) { if (size < 0) throw new ArgumentOutOfRangeException("size", "The size must be non-negative."); System.Buffer.MemoryCopy(src, dst, size, size); } #endif internal static uint SwapByteOrder(uint value) => BinaryPrimitives.ReverseEndianness(value); internal static ulong SwapByteOrder(ulong value) => BinaryPrimitives.ReverseEndianness(value); internal static void SwapByteOrder(Span array) { for (int i = 0; i < array.Length; ++i) { array[i] = BinaryPrimitives.ReverseEndianness(array[i]); } } #if LOW_TRUST internal static uint[] CopyUIntsStoredInLittleEndianByteArray(ReadOnlySpan src, int srcIndex, int srcLength) { Debug.Assert(srcLength%sizeof(uint) == 0); var subArray = new uint[srcLength/sizeof(uint)]; src.Slice(srcIndex, srcLength).CopyTo(MemoryMarshal.AsBytes(new Span(subArray))); if (!BitConverter.IsLittleEndian) SwapByteOrder(subArray); return subArray; } #endif #if !LOW_TRUST // used by StaticMapping.createStaticStringMapping public static unsafe bool Equals(uint* ptr1, uint* ptr2, int length) => new ReadOnlySpan(ptr1, length).SequenceEqual(new ReadOnlySpan(ptr2, length)); internal static unsafe uint* LoadLittleEndianUInt32Data(byte* data, int offset, int length) { if (BitConverter.IsLittleEndian) { return (uint*)(data + offset); } void* buffer = (void*)RuntimeHelpers.AllocateTypeAssociatedMemory(typeof(Buffer), length); new ReadOnlySpan(data + offset, length).CopyTo(new Span(buffer, length)); Buffer.SwapByteOrder(new Span(buffer, length / sizeof(uint))); return (uint*)buffer; } #endif } } ================================================ FILE: FParsecCS/CaseFoldTable.cs ================================================ // Copyright (c) Stephan Tolksdorf 2009-2012 // License: Simplified BSD License. See accompanying documentation. using System; using System.Diagnostics; using System.Runtime.CompilerServices; namespace FParsec { internal static class CaseFoldTable { #if LOW_TRUST public static readonly char[] FoldedChars = CreateFoldedCharsArray(); private static char[] CreateFoldedCharsArray() { Debug.Assert(oneToOneMappings.Length%2 == 0); var table = new char[0x10000]; for (int i = 0; i < table.Length; ++i) table[i] = (char)i; for (int i = oneToOneMappings.Length - 2; i >= 0; i -= 2) table[oneToOneMappings[i]] = oneToOneMappings[i + 1]; return table; } #else public static readonly unsafe char* FoldedChars = Initialize(); private static unsafe char* Initialize() { // initialize FoldedCharsArray int n = oneToOneMappings.Length; Debug.Assert(n%2 == 0); char* chars = (char*)RuntimeHelpers.AllocateTypeAssociatedMemory(typeof(CaseFoldTable), 0x10000 * sizeof(char)); fixed (char* mappings = oneToOneMappings) { var uints = (uint*) chars; uint c0 = BitConverter.IsLittleEndian ? 0x10000u : 0x1u; for (int i = 0; i < 0x10000/2; i += 4) { uints[i ] = c0; uints[i + 1] = c0 + 0x20002u; uints[i + 2] = c0 + 0x40004u; uints[i + 3] = c0 + 0x60006u; c0 = unchecked(c0 + 0x80008u); } for (int i = n - 2; i >= 0; i -= 2) chars[mappings[i]] = mappings[i + 1]; } return chars; } #endif private const string oneToOneMappings = "\u0041\u0061\u0042\u0062\u0043\u0063\u0044\u0064\u0045\u0065\u0046\u0066\u0047\u0067\u0048\u0068\u0049\u0069\u004A\u006A\u004B\u006B\u004C\u006C\u004D\u006D\u004E\u006E\u004F\u006F\u0050\u0070\u0051\u0071\u0052\u0072\u0053\u0073\u0054\u0074\u0055\u0075\u0056\u0076\u0057\u0077\u0058\u0078\u0059\u0079\u005A\u007A\u00B5\u03BC\u00C0\u00E0\u00C1\u00E1\u00C2\u00E2\u00C3\u00E3\u00C4\u00E4\u00C5\u00E5\u00C6\u00E6\u00C7\u00E7\u00C8\u00E8\u00C9\u00E9\u00CA\u00EA\u00CB\u00EB\u00CC\u00EC\u00CD\u00ED\u00CE\u00EE\u00CF\u00EF\u00D0\u00F0\u00D1\u00F1\u00D2\u00F2\u00D3\u00F3\u00D4\u00F4\u00D5\u00F5\u00D6\u00F6\u00D8\u00F8\u00D9\u00F9\u00DA\u00FA\u00DB\u00FB\u00DC\u00FC\u00DD\u00FD\u00DE\u00FE\u0100\u0101\u0102\u0103\u0104\u0105\u0106\u0107\u0108\u0109\u010A\u010B\u010C\u010D\u010E\u010F\u0110\u0111\u0112\u0113\u0114\u0115\u0116\u0117\u0118\u0119\u011A\u011B\u011C\u011D\u011E\u011F\u0120\u0121\u0122\u0123\u0124\u0125\u0126\u0127\u0128\u0129\u012A\u012B\u012C\u012D\u012E\u012F\u0132\u0133\u0134\u0135\u0136\u0137\u0139\u013A\u013B\u013C\u013D\u013E\u013F\u0140\u0141\u0142\u0143\u0144\u0145\u0146\u0147\u0148\u014A\u014B\u014C\u014D\u014E\u014F\u0150\u0151\u0152\u0153\u0154\u0155\u0156\u0157\u0158\u0159\u015A\u015B\u015C\u015D\u015E\u015F\u0160\u0161\u0162\u0163\u0164\u0165\u0166\u0167\u0168\u0169\u016A\u016B\u016C\u016D\u016E\u016F\u0170\u0171\u0172\u0173\u0174\u0175\u0176\u0177\u0178\u00FF\u0179\u017A\u017B\u017C\u017D\u017E\u017F\u0073\u0181\u0253\u0182\u0183\u0184\u0185\u0186\u0254\u0187\u0188\u0189\u0256\u018A\u0257\u018B\u018C\u018E\u01DD\u018F\u0259\u0190\u025B\u0191\u0192\u0193\u0260\u0194\u0263\u0196\u0269\u0197\u0268\u0198\u0199\u019C\u026F\u019D\u0272\u019F\u0275\u01A0\u01A1\u01A2\u01A3\u01A4\u01A5\u01A6\u0280\u01A7\u01A8\u01A9\u0283\u01AC\u01AD\u01AE\u0288\u01AF\u01B0\u01B1\u028A\u01B2\u028B\u01B3\u01B4\u01B5\u01B6\u01B7\u0292\u01B8\u01B9\u01BC\u01BD\u01C4\u01C6\u01C5\u01C6\u01C7\u01C9\u01C8\u01C9\u01CA\u01CC\u01CB\u01CC\u01CD\u01CE\u01CF\u01D0\u01D1\u01D2\u01D3\u01D4\u01D5\u01D6\u01D7\u01D8\u01D9\u01DA\u01DB\u01DC\u01DE\u01DF\u01E0\u01E1\u01E2\u01E3\u01E4\u01E5\u01E6\u01E7\u01E8\u01E9\u01EA\u01EB\u01EC\u01ED\u01EE\u01EF\u01F1\u01F3\u01F2\u01F3\u01F4\u01F5\u01F6\u0195\u01F7\u01BF\u01F8\u01F9\u01FA\u01FB\u01FC\u01FD\u01FE\u01FF\u0200\u0201\u0202\u0203\u0204\u0205\u0206\u0207\u0208\u0209\u020A\u020B\u020C\u020D\u020E\u020F\u0210\u0211\u0212\u0213\u0214\u0215\u0216\u0217\u0218\u0219\u021A\u021B\u021C\u021D\u021E\u021F\u0220\u019E\u0222\u0223\u0224\u0225\u0226\u0227\u0228\u0229\u022A\u022B\u022C\u022D\u022E\u022F\u0230\u0231\u0232\u0233\u023A\u2C65\u023B\u023C\u023D\u019A\u023E\u2C66\u0241\u0242\u0243\u0180\u0244\u0289\u0245\u028C\u0246\u0247\u0248\u0249\u024A\u024B\u024C\u024D\u024E\u024F\u0345\u03B9\u0370\u0371\u0372\u0373\u0376\u0377\u037F\u03F3\u0386\u03AC\u0388\u03AD\u0389\u03AE\u038A\u03AF\u038C\u03CC\u038E\u03CD\u038F\u03CE\u0391\u03B1\u0392\u03B2\u0393\u03B3\u0394\u03B4\u0395\u03B5\u0396\u03B6\u0397\u03B7\u0398\u03B8\u0399\u03B9\u039A\u03BA\u039B\u03BB\u039C\u03BC\u039D\u03BD\u039E\u03BE\u039F\u03BF\u03A0\u03C0\u03A1\u03C1\u03A3\u03C3\u03A4\u03C4\u03A5\u03C5\u03A6\u03C6\u03A7\u03C7\u03A8\u03C8\u03A9\u03C9\u03AA\u03CA\u03AB\u03CB\u03C2\u03C3\u03CF\u03D7\u03D0\u03B2\u03D1\u03B8\u03D5\u03C6\u03D6\u03C0\u03D8\u03D9\u03DA\u03DB\u03DC\u03DD\u03DE\u03DF\u03E0\u03E1\u03E2\u03E3\u03E4\u03E5\u03E6\u03E7\u03E8\u03E9\u03EA\u03EB\u03EC\u03ED\u03EE\u03EF\u03F0\u03BA\u03F1\u03C1\u03F4\u03B8\u03F5\u03B5\u03F7\u03F8\u03F9\u03F2\u03FA\u03FB\u03FD\u037B\u03FE\u037C\u03FF\u037D\u0400\u0450\u0401\u0451\u0402\u0452\u0403\u0453\u0404\u0454\u0405\u0455\u0406\u0456\u0407\u0457\u0408\u0458\u0409\u0459\u040A\u045A\u040B\u045B\u040C\u045C\u040D\u045D\u040E\u045E\u040F\u045F\u0410\u0430\u0411\u0431\u0412\u0432\u0413\u0433\u0414\u0434\u0415\u0435\u0416\u0436\u0417\u0437\u0418\u0438\u0419\u0439\u041A\u043A\u041B\u043B\u041C\u043C\u041D\u043D\u041E\u043E\u041F\u043F\u0420\u0440\u0421\u0441\u0422\u0442\u0423\u0443\u0424\u0444\u0425\u0445\u0426\u0446\u0427\u0447\u0428\u0448\u0429\u0449\u042A\u044A\u042B\u044B\u042C\u044C\u042D\u044D\u042E\u044E\u042F\u044F\u0460\u0461\u0462\u0463\u0464\u0465\u0466\u0467\u0468\u0469\u046A\u046B\u046C\u046D\u046E\u046F\u0470\u0471\u0472\u0473\u0474\u0475\u0476\u0477\u0478\u0479\u047A\u047B\u047C\u047D\u047E\u047F\u0480\u0481\u048A\u048B\u048C\u048D\u048E\u048F\u0490\u0491\u0492\u0493\u0494\u0495\u0496\u0497\u0498\u0499\u049A\u049B\u049C\u049D\u049E\u049F\u04A0\u04A1\u04A2\u04A3\u04A4\u04A5\u04A6\u04A7\u04A8\u04A9\u04AA\u04AB\u04AC\u04AD\u04AE\u04AF\u04B0\u04B1\u04B2\u04B3\u04B4\u04B5\u04B6\u04B7\u04B8\u04B9\u04BA\u04BB\u04BC\u04BD\u04BE\u04BF\u04C0\u04CF\u04C1\u04C2\u04C3\u04C4\u04C5\u04C6\u04C7\u04C8\u04C9\u04CA\u04CB\u04CC\u04CD\u04CE\u04D0\u04D1\u04D2\u04D3\u04D4\u04D5\u04D6\u04D7\u04D8\u04D9\u04DA\u04DB\u04DC\u04DD\u04DE\u04DF\u04E0\u04E1\u04E2\u04E3\u04E4\u04E5\u04E6\u04E7\u04E8\u04E9\u04EA\u04EB\u04EC\u04ED\u04EE\u04EF\u04F0\u04F1\u04F2\u04F3\u04F4\u04F5\u04F6\u04F7\u04F8\u04F9\u04FA\u04FB\u04FC\u04FD\u04FE\u04FF\u0500\u0501\u0502\u0503\u0504\u0505\u0506\u0507\u0508\u0509\u050A\u050B\u050C\u050D\u050E\u050F\u0510\u0511\u0512\u0513\u0514\u0515\u0516\u0517\u0518\u0519\u051A\u051B\u051C\u051D\u051E\u051F\u0520\u0521\u0522\u0523\u0524\u0525\u0526\u0527\u0528\u0529\u052A\u052B\u052C\u052D\u052E\u052F\u0531\u0561\u0532\u0562\u0533\u0563\u0534\u0564\u0535\u0565\u0536\u0566\u0537\u0567\u0538\u0568\u0539\u0569\u053A\u056A\u053B\u056B\u053C\u056C\u053D\u056D\u053E\u056E\u053F\u056F\u0540\u0570\u0541\u0571\u0542\u0572\u0543\u0573\u0544\u0574\u0545\u0575\u0546\u0576\u0547\u0577\u0548\u0578\u0549\u0579\u054A\u057A\u054B\u057B\u054C\u057C\u054D\u057D\u054E\u057E\u054F\u057F\u0550\u0580\u0551\u0581\u0552\u0582\u0553\u0583\u0554\u0584\u0555\u0585\u0556\u0586\u10A0\u2D00\u10A1\u2D01\u10A2\u2D02\u10A3\u2D03\u10A4\u2D04\u10A5\u2D05\u10A6\u2D06\u10A7\u2D07\u10A8\u2D08\u10A9\u2D09\u10AA\u2D0A\u10AB\u2D0B\u10AC\u2D0C\u10AD\u2D0D\u10AE\u2D0E\u10AF\u2D0F\u10B0\u2D10\u10B1\u2D11\u10B2\u2D12\u10B3\u2D13\u10B4\u2D14\u10B5\u2D15\u10B6\u2D16\u10B7\u2D17\u10B8\u2D18\u10B9\u2D19\u10BA\u2D1A\u10BB\u2D1B\u10BC\u2D1C\u10BD\u2D1D\u10BE\u2D1E\u10BF\u2D1F\u10C0\u2D20\u10C1\u2D21\u10C2\u2D22\u10C3\u2D23\u10C4\u2D24\u10C5\u2D25\u10C7\u2D27\u10CD\u2D2D\u13F8\u13F0\u13F9\u13F1\u13FA\u13F2\u13FB\u13F3\u13FC\u13F4\u13FD\u13F5\u1E00\u1E01\u1E02\u1E03\u1E04\u1E05\u1E06\u1E07\u1E08\u1E09\u1E0A\u1E0B\u1E0C\u1E0D\u1E0E\u1E0F\u1E10\u1E11\u1E12\u1E13\u1E14\u1E15\u1E16\u1E17\u1E18\u1E19\u1E1A\u1E1B\u1E1C\u1E1D\u1E1E\u1E1F\u1E20\u1E21\u1E22\u1E23\u1E24\u1E25\u1E26\u1E27\u1E28\u1E29\u1E2A\u1E2B\u1E2C\u1E2D\u1E2E\u1E2F\u1E30\u1E31\u1E32\u1E33\u1E34\u1E35\u1E36\u1E37\u1E38\u1E39\u1E3A\u1E3B\u1E3C\u1E3D\u1E3E\u1E3F\u1E40\u1E41\u1E42\u1E43\u1E44\u1E45\u1E46\u1E47\u1E48\u1E49\u1E4A\u1E4B\u1E4C\u1E4D\u1E4E\u1E4F\u1E50\u1E51\u1E52\u1E53\u1E54\u1E55\u1E56\u1E57\u1E58\u1E59\u1E5A\u1E5B\u1E5C\u1E5D\u1E5E\u1E5F\u1E60\u1E61\u1E62\u1E63\u1E64\u1E65\u1E66\u1E67\u1E68\u1E69\u1E6A\u1E6B\u1E6C\u1E6D\u1E6E\u1E6F\u1E70\u1E71\u1E72\u1E73\u1E74\u1E75\u1E76\u1E77\u1E78\u1E79\u1E7A\u1E7B\u1E7C\u1E7D\u1E7E\u1E7F\u1E80\u1E81\u1E82\u1E83\u1E84\u1E85\u1E86\u1E87\u1E88\u1E89\u1E8A\u1E8B\u1E8C\u1E8D\u1E8E\u1E8F\u1E90\u1E91\u1E92\u1E93\u1E94\u1E95\u1E9B\u1E61\u1E9E\u00DF\u1EA0\u1EA1\u1EA2\u1EA3\u1EA4\u1EA5\u1EA6\u1EA7\u1EA8\u1EA9\u1EAA\u1EAB\u1EAC\u1EAD\u1EAE\u1EAF\u1EB0\u1EB1\u1EB2\u1EB3\u1EB4\u1EB5\u1EB6\u1EB7\u1EB8\u1EB9\u1EBA\u1EBB\u1EBC\u1EBD\u1EBE\u1EBF\u1EC0\u1EC1\u1EC2\u1EC3\u1EC4\u1EC5\u1EC6\u1EC7\u1EC8\u1EC9\u1ECA\u1ECB\u1ECC\u1ECD\u1ECE\u1ECF\u1ED0\u1ED1\u1ED2\u1ED3\u1ED4\u1ED5\u1ED6\u1ED7\u1ED8\u1ED9\u1EDA\u1EDB\u1EDC\u1EDD\u1EDE\u1EDF\u1EE0\u1EE1\u1EE2\u1EE3\u1EE4\u1EE5\u1EE6\u1EE7\u1EE8\u1EE9\u1EEA\u1EEB\u1EEC\u1EED\u1EEE\u1EEF\u1EF0\u1EF1\u1EF2\u1EF3\u1EF4\u1EF5\u1EF6\u1EF7\u1EF8\u1EF9\u1EFA\u1EFB\u1EFC\u1EFD\u1EFE\u1EFF\u1F08\u1F00\u1F09\u1F01\u1F0A\u1F02\u1F0B\u1F03\u1F0C\u1F04\u1F0D\u1F05\u1F0E\u1F06\u1F0F\u1F07\u1F18\u1F10\u1F19\u1F11\u1F1A\u1F12\u1F1B\u1F13\u1F1C\u1F14\u1F1D\u1F15\u1F28\u1F20\u1F29\u1F21\u1F2A\u1F22\u1F2B\u1F23\u1F2C\u1F24\u1F2D\u1F25\u1F2E\u1F26\u1F2F\u1F27\u1F38\u1F30\u1F39\u1F31\u1F3A\u1F32\u1F3B\u1F33\u1F3C\u1F34\u1F3D\u1F35\u1F3E\u1F36\u1F3F\u1F37\u1F48\u1F40\u1F49\u1F41\u1F4A\u1F42\u1F4B\u1F43\u1F4C\u1F44\u1F4D\u1F45\u1F59\u1F51\u1F5B\u1F53\u1F5D\u1F55\u1F5F\u1F57\u1F68\u1F60\u1F69\u1F61\u1F6A\u1F62\u1F6B\u1F63\u1F6C\u1F64\u1F6D\u1F65\u1F6E\u1F66\u1F6F\u1F67\u1F88\u1F80\u1F89\u1F81\u1F8A\u1F82\u1F8B\u1F83\u1F8C\u1F84\u1F8D\u1F85\u1F8E\u1F86\u1F8F\u1F87\u1F98\u1F90\u1F99\u1F91\u1F9A\u1F92\u1F9B\u1F93\u1F9C\u1F94\u1F9D\u1F95\u1F9E\u1F96\u1F9F\u1F97\u1FA8\u1FA0\u1FA9\u1FA1\u1FAA\u1FA2\u1FAB\u1FA3\u1FAC\u1FA4\u1FAD\u1FA5\u1FAE\u1FA6\u1FAF\u1FA7\u1FB8\u1FB0\u1FB9\u1FB1\u1FBA\u1F70\u1FBB\u1F71\u1FBC\u1FB3\u1FBE\u03B9\u1FC8\u1F72\u1FC9\u1F73\u1FCA\u1F74\u1FCB\u1F75\u1FCC\u1FC3\u1FD8\u1FD0\u1FD9\u1FD1\u1FDA\u1F76\u1FDB\u1F77\u1FE8\u1FE0\u1FE9\u1FE1\u1FEA\u1F7A\u1FEB\u1F7B\u1FEC\u1FE5\u1FF8\u1F78\u1FF9\u1F79\u1FFA\u1F7C\u1FFB\u1F7D\u1FFC\u1FF3\u2126\u03C9\u212A\u006B\u212B\u00E5\u2132\u214E\u2160\u2170\u2161\u2171\u2162\u2172\u2163\u2173\u2164\u2174\u2165\u2175\u2166\u2176\u2167\u2177\u2168\u2178\u2169\u2179\u216A\u217A\u216B\u217B\u216C\u217C\u216D\u217D\u216E\u217E\u216F\u217F\u2183\u2184\u24B6\u24D0\u24B7\u24D1\u24B8\u24D2\u24B9\u24D3\u24BA\u24D4\u24BB\u24D5\u24BC\u24D6\u24BD\u24D7\u24BE\u24D8\u24BF\u24D9\u24C0\u24DA\u24C1\u24DB\u24C2\u24DC\u24C3\u24DD\u24C4\u24DE\u24C5\u24DF\u24C6\u24E0\u24C7\u24E1\u24C8\u24E2\u24C9\u24E3\u24CA\u24E4\u24CB\u24E5\u24CC\u24E6\u24CD\u24E7\u24CE\u24E8\u24CF\u24E9\u2C00\u2C30\u2C01\u2C31\u2C02\u2C32\u2C03\u2C33\u2C04\u2C34\u2C05\u2C35\u2C06\u2C36\u2C07\u2C37\u2C08\u2C38\u2C09\u2C39\u2C0A\u2C3A\u2C0B\u2C3B\u2C0C\u2C3C\u2C0D\u2C3D\u2C0E\u2C3E\u2C0F\u2C3F\u2C10\u2C40\u2C11\u2C41\u2C12\u2C42\u2C13\u2C43\u2C14\u2C44\u2C15\u2C45\u2C16\u2C46\u2C17\u2C47\u2C18\u2C48\u2C19\u2C49\u2C1A\u2C4A\u2C1B\u2C4B\u2C1C\u2C4C\u2C1D\u2C4D\u2C1E\u2C4E\u2C1F\u2C4F\u2C20\u2C50\u2C21\u2C51\u2C22\u2C52\u2C23\u2C53\u2C24\u2C54\u2C25\u2C55\u2C26\u2C56\u2C27\u2C57\u2C28\u2C58\u2C29\u2C59\u2C2A\u2C5A\u2C2B\u2C5B\u2C2C\u2C5C\u2C2D\u2C5D\u2C2E\u2C5E\u2C60\u2C61\u2C62\u026B\u2C63\u1D7D\u2C64\u027D\u2C67\u2C68\u2C69\u2C6A\u2C6B\u2C6C\u2C6D\u0251\u2C6E\u0271\u2C6F\u0250\u2C70\u0252\u2C72\u2C73\u2C75\u2C76\u2C7E\u023F\u2C7F\u0240\u2C80\u2C81\u2C82\u2C83\u2C84\u2C85\u2C86\u2C87\u2C88\u2C89\u2C8A\u2C8B\u2C8C\u2C8D\u2C8E\u2C8F\u2C90\u2C91\u2C92\u2C93\u2C94\u2C95\u2C96\u2C97\u2C98\u2C99\u2C9A\u2C9B\u2C9C\u2C9D\u2C9E\u2C9F\u2CA0\u2CA1\u2CA2\u2CA3\u2CA4\u2CA5\u2CA6\u2CA7\u2CA8\u2CA9\u2CAA\u2CAB\u2CAC\u2CAD\u2CAE\u2CAF\u2CB0\u2CB1\u2CB2\u2CB3\u2CB4\u2CB5\u2CB6\u2CB7\u2CB8\u2CB9\u2CBA\u2CBB\u2CBC\u2CBD\u2CBE\u2CBF\u2CC0\u2CC1\u2CC2\u2CC3\u2CC4\u2CC5\u2CC6\u2CC7\u2CC8\u2CC9\u2CCA\u2CCB\u2CCC\u2CCD\u2CCE\u2CCF\u2CD0\u2CD1\u2CD2\u2CD3\u2CD4\u2CD5\u2CD6\u2CD7\u2CD8\u2CD9\u2CDA\u2CDB\u2CDC\u2CDD\u2CDE\u2CDF\u2CE0\u2CE1\u2CE2\u2CE3\u2CEB\u2CEC\u2CED\u2CEE\u2CF2\u2CF3\uA640\uA641\uA642\uA643\uA644\uA645\uA646\uA647\uA648\uA649\uA64A\uA64B\uA64C\uA64D\uA64E\uA64F\uA650\uA651\uA652\uA653\uA654\uA655\uA656\uA657\uA658\uA659\uA65A\uA65B\uA65C\uA65D\uA65E\uA65F\uA660\uA661\uA662\uA663\uA664\uA665\uA666\uA667\uA668\uA669\uA66A\uA66B\uA66C\uA66D\uA680\uA681\uA682\uA683\uA684\uA685\uA686\uA687\uA688\uA689\uA68A\uA68B\uA68C\uA68D\uA68E\uA68F\uA690\uA691\uA692\uA693\uA694\uA695\uA696\uA697\uA698\uA699\uA69A\uA69B\uA722\uA723\uA724\uA725\uA726\uA727\uA728\uA729\uA72A\uA72B\uA72C\uA72D\uA72E\uA72F\uA732\uA733\uA734\uA735\uA736\uA737\uA738\uA739\uA73A\uA73B\uA73C\uA73D\uA73E\uA73F\uA740\uA741\uA742\uA743\uA744\uA745\uA746\uA747\uA748\uA749\uA74A\uA74B\uA74C\uA74D\uA74E\uA74F\uA750\uA751\uA752\uA753\uA754\uA755\uA756\uA757\uA758\uA759\uA75A\uA75B\uA75C\uA75D\uA75E\uA75F\uA760\uA761\uA762\uA763\uA764\uA765\uA766\uA767\uA768\uA769\uA76A\uA76B\uA76C\uA76D\uA76E\uA76F\uA779\uA77A\uA77B\uA77C\uA77D\u1D79\uA77E\uA77F\uA780\uA781\uA782\uA783\uA784\uA785\uA786\uA787\uA78B\uA78C\uA78D\u0265\uA790\uA791\uA792\uA793\uA796\uA797\uA798\uA799\uA79A\uA79B\uA79C\uA79D\uA79E\uA79F\uA7A0\uA7A1\uA7A2\uA7A3\uA7A4\uA7A5\uA7A6\uA7A7\uA7A8\uA7A9\uA7AA\u0266\uA7AB\u025C\uA7AC\u0261\uA7AD\u026C\uA7B0\u029E\uA7B1\u0287\uA7B2\u029D\uA7B3\uAB53\uA7B4\uA7B5\uA7B6\uA7B7\uAB70\u13A0\uAB71\u13A1\uAB72\u13A2\uAB73\u13A3\uAB74\u13A4\uAB75\u13A5\uAB76\u13A6\uAB77\u13A7\uAB78\u13A8\uAB79\u13A9\uAB7A\u13AA\uAB7B\u13AB\uAB7C\u13AC\uAB7D\u13AD\uAB7E\u13AE\uAB7F\u13AF\uAB80\u13B0\uAB81\u13B1\uAB82\u13B2\uAB83\u13B3\uAB84\u13B4\uAB85\u13B5\uAB86\u13B6\uAB87\u13B7\uAB88\u13B8\uAB89\u13B9\uAB8A\u13BA\uAB8B\u13BB\uAB8C\u13BC\uAB8D\u13BD\uAB8E\u13BE\uAB8F\u13BF\uAB90\u13C0\uAB91\u13C1\uAB92\u13C2\uAB93\u13C3\uAB94\u13C4\uAB95\u13C5\uAB96\u13C6\uAB97\u13C7\uAB98\u13C8\uAB99\u13C9\uAB9A\u13CA\uAB9B\u13CB\uAB9C\u13CC\uAB9D\u13CD\uAB9E\u13CE\uAB9F\u13CF\uABA0\u13D0\uABA1\u13D1\uABA2\u13D2\uABA3\u13D3\uABA4\u13D4\uABA5\u13D5\uABA6\u13D6\uABA7\u13D7\uABA8\u13D8\uABA9\u13D9\uABAA\u13DA\uABAB\u13DB\uABAC\u13DC\uABAD\u13DD\uABAE\u13DE\uABAF\u13DF\uABB0\u13E0\uABB1\u13E1\uABB2\u13E2\uABB3\u13E3\uABB4\u13E4\uABB5\u13E5\uABB6\u13E6\uABB7\u13E7\uABB8\u13E8\uABB9\u13E9\uABBA\u13EA\uABBB\u13EB\uABBC\u13EC\uABBD\u13ED\uABBE\u13EE\uABBF\u13EF\uFF21\uFF41\uFF22\uFF42\uFF23\uFF43\uFF24\uFF44\uFF25\uFF45\uFF26\uFF46\uFF27\uFF47\uFF28\uFF48\uFF29\uFF49\uFF2A\uFF4A\uFF2B\uFF4B\uFF2C\uFF4C\uFF2D\uFF4D\uFF2E\uFF4E\uFF2F\uFF4F\uFF30\uFF50\uFF31\uFF51\uFF32\uFF52\uFF33\uFF53\uFF34\uFF54\uFF35\uFF55\uFF36\uFF56\uFF37\uFF57\uFF38\uFF58\uFF39\uFF59\uFF3A\uFF5A"; } // class CaseFoldTable } /* // The oneToOneMappings string has been generated with the following F# program, which // extracts the (non-Turkic) 1-to-1 case folding mappings for chars below 0x10000 from // http://www.unicode.org/Public/8.8.0/ucd/CaseFolding.txt open FParsec.Primitives open FParsec.CharParsers (* # CaseFolding-8.0.0.txt # Date: 2015-01-13, 18:16:36 GMT [MD] # # Unicode Character Database # Copyright (c) 1991-2015 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # # Case Folding Properties # # This file is a supplement to the UnicodeData file. # It provides a case folding mapping generated from the Unicode Character Database. # If all characters are mapped according to the full mapping below, then # case differences (according to UnicodeData.txt and SpecialCasing.txt) # are eliminated. # # The data supports both implementations that require simple case foldings # (where string lengths don't change), and implementations that allow full case folding # (where string lengths may grow). Note that where they can be supported, the # full case foldings are superior: for example, they allow "MASSE" and "Maße" to match. # # All code points not listed in this file map to themselves. # # NOTE: case folding does not preserve normalization formats! # # For information on case folding, including how to have case folding # preserve normalization formats, see Section 3.13 Default Case Algorithms in # The Unicode Standard. # # ================================================================================ # Format # ================================================================================ # The entries in this file are in the following machine-readable format: # # ; ; ; # # # The status field is: # C: common case folding, common mappings shared by both simple and full mappings. # F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. # S: simple case folding, mappings to single characters where different from F. # T: special case for uppercase I and dotted uppercase I # - For non-Turkic languages, this mapping is normally not used. # - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. # Note that the Turkic mappings do not maintain canonical equivalence without additional processing. # See the discussions of case mapping in the Unicode Standard for more information. # # Usage: # A. To do a simple case folding, use the mappings with status C + S. # B. To do a full case folding, use the mappings with status C + F. # # The mappings with status T can be used or omitted depending on the desired case-folding # behavior. (The default option is to exclude them.) # # ================================================================= # Property: Case_Folding # All code points not explicitly listed for Case_Folding # have the value C for the status field, and the code point itself for the mapping field. # ================================================================= *) // continue txt file as string let datastr = @"0041; C; 0061; # LATIN CAPITAL LETTER A 0042; C; 0062; # LATIN CAPITAL LETTER B 0043; C; 0063; # LATIN CAPITAL LETTER C 0044; C; 0064; # LATIN CAPITAL LETTER D 0045; C; 0065; # LATIN CAPITAL LETTER E 0046; C; 0066; # LATIN CAPITAL LETTER F 0047; C; 0067; # LATIN CAPITAL LETTER G 0048; C; 0068; # LATIN CAPITAL LETTER H 0049; C; 0069; # LATIN CAPITAL LETTER I 0049; T; 0131; # LATIN CAPITAL LETTER I 004A; C; 006A; # LATIN CAPITAL LETTER J 004B; C; 006B; # LATIN CAPITAL LETTER K 004C; C; 006C; # LATIN CAPITAL LETTER L 004D; C; 006D; # LATIN CAPITAL LETTER M 004E; C; 006E; # LATIN CAPITAL LETTER N 004F; C; 006F; # LATIN CAPITAL LETTER O 0050; C; 0070; # LATIN CAPITAL LETTER P 0051; C; 0071; # LATIN CAPITAL LETTER Q 0052; C; 0072; # LATIN CAPITAL LETTER R 0053; C; 0073; # LATIN CAPITAL LETTER S 0054; C; 0074; # LATIN CAPITAL LETTER T 0055; C; 0075; # LATIN CAPITAL LETTER U 0056; C; 0076; # LATIN CAPITAL LETTER V 0057; C; 0077; # LATIN CAPITAL LETTER W 0058; C; 0078; # LATIN CAPITAL LETTER X 0059; C; 0079; # LATIN CAPITAL LETTER Y 005A; C; 007A; # LATIN CAPITAL LETTER Z 00B5; C; 03BC; # MICRO SIGN 00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE 00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE 00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE 00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS 00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE 00C6; C; 00E6; # LATIN CAPITAL LETTER AE 00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA 00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE 00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE 00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS 00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE 00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE 00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX 00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS 00D0; C; 00F0; # LATIN CAPITAL LETTER ETH 00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE 00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE 00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE 00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE 00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS 00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE 00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE 00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE 00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS 00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE 00DE; C; 00FE; # LATIN CAPITAL LETTER THORN 00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S 0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON 0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE 0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK 0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE 0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX 010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE 010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON 010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON 0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE 0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON 0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE 0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE 0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK 011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON 011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX 011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE 0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE 0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA 0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX 0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE 0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE 012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON 012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE 012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE 0132; C; 0133; # LATIN CAPITAL LIGATURE IJ 0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX 0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA 0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE 013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA 013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON 013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT 0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE 0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE 0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA 0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON 0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE 014A; C; 014B; # LATIN CAPITAL LETTER ENG 014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON 014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE 0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE 0152; C; 0153; # LATIN CAPITAL LIGATURE OE 0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE 0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA 0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON 015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE 015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX 015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA 0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON 0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA 0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON 0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE 0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE 016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON 016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE 016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE 0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE 0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK 0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX 0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX 0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS 0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE 017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE 017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON 017F; C; 0073; # LATIN SMALL LETTER LONG S 0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK 0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR 0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX 0186; C; 0254; # LATIN CAPITAL LETTER OPEN O 0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK 0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D 018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK 018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR 018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E 018F; C; 0259; # LATIN CAPITAL LETTER SCHWA 0190; C; 025B; # LATIN CAPITAL LETTER OPEN E 0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK 0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK 0194; C; 0263; # LATIN CAPITAL LETTER GAMMA 0196; C; 0269; # LATIN CAPITAL LETTER IOTA 0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE 0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK 019C; C; 026F; # LATIN CAPITAL LETTER TURNED M 019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK 019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE 01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN 01A2; C; 01A3; # LATIN CAPITAL LETTER OI 01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK 01A6; C; 0280; # LATIN LETTER YR 01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO 01A9; C; 0283; # LATIN CAPITAL LETTER ESH 01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK 01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK 01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN 01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON 01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK 01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK 01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE 01B7; C; 0292; # LATIN CAPITAL LETTER EZH 01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED 01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE 01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON 01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON 01C7; C; 01C9; # LATIN CAPITAL LETTER LJ 01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J 01CA; C; 01CC; # LATIN CAPITAL LETTER NJ 01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J 01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON 01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON 01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON 01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON 01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON 01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE 01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON 01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE 01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON 01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON 01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON 01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE 01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON 01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON 01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK 01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON 01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON 01F1; C; 01F3; # LATIN CAPITAL LETTER DZ 01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z 01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE 01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR 01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN 01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE 01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE 01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE 01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE 0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE 0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE 0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE 0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE 0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE 020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE 020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE 020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE 0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE 0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE 0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE 0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE 0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW 021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW 021C; C; 021D; # LATIN CAPITAL LETTER YOGH 021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON 0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG 0222; C; 0223; # LATIN CAPITAL LETTER OU 0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK 0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE 0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA 022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON 022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON 022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE 0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON 0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON 023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE 023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE 023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR 023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE 0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP 0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE 0244; C; 0289; # LATIN CAPITAL LETTER U BAR 0245; C; 028C; # LATIN CAPITAL LETTER TURNED V 0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE 0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE 024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL 024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE 024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE 0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI 0370; C; 0371; # GREEK CAPITAL LETTER HETA 0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI 0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA 037F; C; 03F3; # GREEK CAPITAL LETTER YOT 0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS 0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS 0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS 038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS 038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS 038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS 038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS 0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA 0392; C; 03B2; # GREEK CAPITAL LETTER BETA 0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA 0394; C; 03B4; # GREEK CAPITAL LETTER DELTA 0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON 0396; C; 03B6; # GREEK CAPITAL LETTER ZETA 0397; C; 03B7; # GREEK CAPITAL LETTER ETA 0398; C; 03B8; # GREEK CAPITAL LETTER THETA 0399; C; 03B9; # GREEK CAPITAL LETTER IOTA 039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA 039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA 039C; C; 03BC; # GREEK CAPITAL LETTER MU 039D; C; 03BD; # GREEK CAPITAL LETTER NU 039E; C; 03BE; # GREEK CAPITAL LETTER XI 039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON 03A0; C; 03C0; # GREEK CAPITAL LETTER PI 03A1; C; 03C1; # GREEK CAPITAL LETTER RHO 03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA 03A4; C; 03C4; # GREEK CAPITAL LETTER TAU 03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON 03A6; C; 03C6; # GREEK CAPITAL LETTER PHI 03A7; C; 03C7; # GREEK CAPITAL LETTER CHI 03A8; C; 03C8; # GREEK CAPITAL LETTER PSI 03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA 03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA 03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA 03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA 03CF; C; 03D7; # GREEK CAPITAL KAI SYMBOL 03D0; C; 03B2; # GREEK BETA SYMBOL 03D1; C; 03B8; # GREEK THETA SYMBOL 03D5; C; 03C6; # GREEK PHI SYMBOL 03D6; C; 03C0; # GREEK PI SYMBOL 03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA 03DA; C; 03DB; # GREEK LETTER STIGMA 03DC; C; 03DD; # GREEK LETTER DIGAMMA 03DE; C; 03DF; # GREEK LETTER KOPPA 03E0; C; 03E1; # GREEK LETTER SAMPI 03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI 03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI 03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI 03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI 03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA 03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA 03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI 03F0; C; 03BA; # GREEK KAPPA SYMBOL 03F1; C; 03C1; # GREEK RHO SYMBOL 03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL 03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL 03F7; C; 03F8; # GREEK CAPITAL LETTER SHO 03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL 03FA; C; 03FB; # GREEK CAPITAL LETTER SAN 03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL 03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL 03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL 0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE 0401; C; 0451; # CYRILLIC CAPITAL LETTER IO 0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE 0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE 0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE 0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE 0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I 0407; C; 0457; # CYRILLIC CAPITAL LETTER YI 0408; C; 0458; # CYRILLIC CAPITAL LETTER JE 0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE 040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE 040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE 040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE 040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE 040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U 040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE 0410; C; 0430; # CYRILLIC CAPITAL LETTER A 0411; C; 0431; # CYRILLIC CAPITAL LETTER BE 0412; C; 0432; # CYRILLIC CAPITAL LETTER VE 0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE 0414; C; 0434; # CYRILLIC CAPITAL LETTER DE 0415; C; 0435; # CYRILLIC CAPITAL LETTER IE 0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE 0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE 0418; C; 0438; # CYRILLIC CAPITAL LETTER I 0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I 041A; C; 043A; # CYRILLIC CAPITAL LETTER KA 041B; C; 043B; # CYRILLIC CAPITAL LETTER EL 041C; C; 043C; # CYRILLIC CAPITAL LETTER EM 041D; C; 043D; # CYRILLIC CAPITAL LETTER EN 041E; C; 043E; # CYRILLIC CAPITAL LETTER O 041F; C; 043F; # CYRILLIC CAPITAL LETTER PE 0420; C; 0440; # CYRILLIC CAPITAL LETTER ER 0421; C; 0441; # CYRILLIC CAPITAL LETTER ES 0422; C; 0442; # CYRILLIC CAPITAL LETTER TE 0423; C; 0443; # CYRILLIC CAPITAL LETTER U 0424; C; 0444; # CYRILLIC CAPITAL LETTER EF 0425; C; 0445; # CYRILLIC CAPITAL LETTER HA 0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE 0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE 0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA 0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA 042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN 042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU 042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN 042D; C; 044D; # CYRILLIC CAPITAL LETTER E 042E; C; 044E; # CYRILLIC CAPITAL LETTER YU 042F; C; 044F; # CYRILLIC CAPITAL LETTER YA 0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA 0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT 0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E 0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS 0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS 046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS 046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS 046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI 0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI 0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA 0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA 0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT 0478; C; 0479; # CYRILLIC CAPITAL LETTER UK 047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA 047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO 047E; C; 047F; # CYRILLIC CAPITAL LETTER OT 0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA 048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL 048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN 048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK 0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN 0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE 0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK 0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER 0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER 049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER 049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE 049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE 04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA 04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER 04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE 04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK 04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA 04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER 04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER 04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U 04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE 04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER 04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE 04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER 04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE 04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA 04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE 04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER 04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA 04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE 04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK 04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL 04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK 04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL 04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE 04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL 04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE 04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS 04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE 04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE 04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA 04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS 04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS 04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS 04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE 04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON 04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS 04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS 04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O 04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS 04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS 04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON 04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS 04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE 04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS 04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER 04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS 04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK 04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK 04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE 0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE 0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE 0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE 0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE 0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE 050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE 050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE 050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE 0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE 0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK 0514; C; 0515; # CYRILLIC CAPITAL LETTER LHA 0516; C; 0517; # CYRILLIC CAPITAL LETTER RHA 0518; C; 0519; # CYRILLIC CAPITAL LETTER YAE 051A; C; 051B; # CYRILLIC CAPITAL LETTER QA 051C; C; 051D; # CYRILLIC CAPITAL LETTER WE 051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA 0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK 0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK 0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER 0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER 0528; C; 0529; # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK 052A; C; 052B; # CYRILLIC CAPITAL LETTER DZZHE 052C; C; 052D; # CYRILLIC CAPITAL LETTER DCHE 052E; C; 052F; # CYRILLIC CAPITAL LETTER EL WITH DESCENDER 0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB 0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN 0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM 0534; C; 0564; # ARMENIAN CAPITAL LETTER DA 0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH 0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA 0537; C; 0567; # ARMENIAN CAPITAL LETTER EH 0538; C; 0568; # ARMENIAN CAPITAL LETTER ET 0539; C; 0569; # ARMENIAN CAPITAL LETTER TO 053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE 053B; C; 056B; # ARMENIAN CAPITAL LETTER INI 053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN 053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH 053E; C; 056E; # ARMENIAN CAPITAL LETTER CA 053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN 0540; C; 0570; # ARMENIAN CAPITAL LETTER HO 0541; C; 0571; # ARMENIAN CAPITAL LETTER JA 0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD 0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH 0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN 0545; C; 0575; # ARMENIAN CAPITAL LETTER YI 0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW 0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA 0548; C; 0578; # ARMENIAN CAPITAL LETTER VO 0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA 054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH 054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH 054C; C; 057C; # ARMENIAN CAPITAL LETTER RA 054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH 054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW 054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN 0550; C; 0580; # ARMENIAN CAPITAL LETTER REH 0551; C; 0581; # ARMENIAN CAPITAL LETTER CO 0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN 0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR 0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH 0555; C; 0585; # ARMENIAN CAPITAL LETTER OH 0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH 0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN 10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN 10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN 10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN 10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON 10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN 10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN 10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN 10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN 10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN 10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN 10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS 10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN 10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR 10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON 10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR 10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR 10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE 10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN 10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR 10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN 10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR 10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR 10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN 10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR 10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN 10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN 10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN 10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL 10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL 10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR 10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN 10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN 10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE 10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE 10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE 10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE 10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR 10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE 10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN 10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN 13F8; C; 13F0; # CHEROKEE SMALL LETTER YE 13F9; C; 13F1; # CHEROKEE SMALL LETTER YI 13FA; C; 13F2; # CHEROKEE SMALL LETTER YO 13FB; C; 13F3; # CHEROKEE SMALL LETTER YU 13FC; C; 13F4; # CHEROKEE SMALL LETTER YV 13FD; C; 13F5; # CHEROKEE SMALL LETTER MV 1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW 1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW 1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW 1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE 1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE 1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW 1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW 1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA 1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW 1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE 1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE 1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW 1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW 1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE 1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE 1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON 1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE 1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW 1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS 1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA 1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW 1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW 1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE 1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE 1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW 1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW 1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW 1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON 1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW 1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW 1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE 1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE 1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW 1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE 1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW 1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW 1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW 1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE 1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS 1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE 1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE 1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE 1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE 1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE 1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW 1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON 1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW 1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE 1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW 1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE 1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE 1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE 1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE 1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW 1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW 1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW 1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW 1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW 1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW 1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE 1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS 1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE 1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW 1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE 1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE 1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS 1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE 1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW 1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE 1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS 1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE 1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX 1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW 1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW 1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW 1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS 1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE 1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE 1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING 1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE 1E9E; F; 0073 0073; # LATIN CAPITAL LETTER SHARP S 1E9E; S; 00DF; # LATIN CAPITAL LETTER SHARP S 1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW 1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE 1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE 1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE 1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE 1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE 1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW 1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE 1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE 1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE 1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE 1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW 1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW 1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE 1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE 1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE 1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE 1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE 1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE 1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW 1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE 1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW 1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW 1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE 1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE 1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE 1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE 1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE 1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW 1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE 1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE 1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE 1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE 1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW 1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW 1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE 1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE 1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE 1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE 1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE 1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW 1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE 1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW 1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE 1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE 1EFA; C; 1EFB; # LATIN CAPITAL LETTER MIDDLE-WELSH LL 1EFC; C; 1EFD; # LATIN CAPITAL LETTER MIDDLE-WELSH V 1EFE; C; 1EFF; # LATIN CAPITAL LETTER Y WITH LOOP 1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI 1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA 1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA 1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA 1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA 1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA 1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI 1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI 1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI 1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA 1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA 1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA 1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA 1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA 1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI 1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA 1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA 1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA 1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA 1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA 1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI 1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI 1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI 1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA 1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA 1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA 1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA 1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA 1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI 1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI 1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI 1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA 1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA 1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA 1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA 1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA 1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI 1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA 1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA 1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI 1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA 1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA 1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA 1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI 1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI 1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA 1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA 1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA 1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA 1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA 1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI 1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI 1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI 1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI 1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI 1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI 1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI 1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI 1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI 1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI 1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI 1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI 1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI 1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI 1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI 1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI 1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI 1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI 1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI 1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI 1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI 1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI 1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI 1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI 1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI 1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI 1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI 1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI 1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI 1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI 1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI 1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI 1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI 1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI 1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI 1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI 1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI 1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY 1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON 1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA 1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA 1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI 1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI 1FBE; C; 03B9; # GREEK PROSGEGRAMMENI 1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI 1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI 1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI 1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI 1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI 1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA 1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA 1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA 1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA 1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA 1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI 1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI 1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY 1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON 1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA 1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA 1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA 1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA 1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI 1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI 1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY 1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON 1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA 1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA 1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA 1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI 1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI 1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI 1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI 1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA 1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA 1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA 1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA 1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 2126; C; 03C9; # OHM SIGN 212A; C; 006B; # KELVIN SIGN 212B; C; 00E5; # ANGSTROM SIGN 2132; C; 214E; # TURNED CAPITAL F 2160; C; 2170; # ROMAN NUMERAL ONE 2161; C; 2171; # ROMAN NUMERAL TWO 2162; C; 2172; # ROMAN NUMERAL THREE 2163; C; 2173; # ROMAN NUMERAL FOUR 2164; C; 2174; # ROMAN NUMERAL FIVE 2165; C; 2175; # ROMAN NUMERAL SIX 2166; C; 2176; # ROMAN NUMERAL SEVEN 2167; C; 2177; # ROMAN NUMERAL EIGHT 2168; C; 2178; # ROMAN NUMERAL NINE 2169; C; 2179; # ROMAN NUMERAL TEN 216A; C; 217A; # ROMAN NUMERAL ELEVEN 216B; C; 217B; # ROMAN NUMERAL TWELVE 216C; C; 217C; # ROMAN NUMERAL FIFTY 216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED 216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED 216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND 2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED 24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A 24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B 24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C 24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D 24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E 24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F 24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G 24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H 24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I 24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J 24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K 24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L 24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M 24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N 24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O 24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P 24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q 24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R 24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S 24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T 24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U 24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V 24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W 24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X 24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y 24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z 2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU 2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY 2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE 2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI 2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO 2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU 2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE 2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO 2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA 2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE 2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE 2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I 2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI 2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO 2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE 2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE 2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI 2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU 2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI 2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI 2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO 2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO 2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU 2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU 2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU 2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU 2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE 2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA 2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI 2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI 2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA 2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU 2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI 2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI 2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA 2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU 2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS 2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL 2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO 2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS 2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS 2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS 2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA 2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA 2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC 2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A 2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR 2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE 2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE 2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL 2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER 2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER 2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER 2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA 2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK 2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A 2C70; C; 0252; # LATIN CAPITAL LETTER TURNED ALPHA 2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK 2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H 2C7E; C; 023F; # LATIN CAPITAL LETTER S WITH SWASH TAIL 2C7F; C; 0240; # LATIN CAPITAL LETTER Z WITH SWASH TAIL 2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA 2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA 2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA 2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA 2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE 2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU 2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA 2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE 2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE 2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA 2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA 2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA 2C98; C; 2C99; # COPTIC CAPITAL LETTER MI 2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI 2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI 2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O 2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI 2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO 2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA 2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU 2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA 2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI 2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI 2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI 2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU 2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF 2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN 2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE 2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA 2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI 2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI 2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU 2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI 2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI 2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI 2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH 2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI 2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI 2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI 2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA 2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA 2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI 2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT 2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA 2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA 2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA 2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA 2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI 2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI 2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA 2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE A646; C; A647; # CYRILLIC CAPITAL LETTER IOTA A648; C; A649; # CYRILLIC CAPITAL LETTER DJERV A64A; C; A64B; # CYRILLIC CAPITAL LETTER MONOGRAPH UK A64C; C; A64D; # CYRILLIC CAPITAL LETTER BROAD OMEGA A64E; C; A64F; # CYRILLIC CAPITAL LETTER NEUTRAL YER A650; C; A651; # CYRILLIC CAPITAL LETTER YERU WITH BACK YER A652; C; A653; # CYRILLIC CAPITAL LETTER IOTIFIED YAT A654; C; A655; # CYRILLIC CAPITAL LETTER REVERSED YU A656; C; A657; # CYRILLIC CAPITAL LETTER IOTIFIED A A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN A660; C; A661; # CYRILLIC CAPITAL LETTER REVERSED TSE A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM A668; C; A669; # CYRILLIC CAPITAL LETTER MONOCULAR O A66A; C; A66B; # CYRILLIC CAPITAL LETTER BINOCULAR O A66C; C; A66D; # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O A680; C; A681; # CYRILLIC CAPITAL LETTER DWE A682; C; A683; # CYRILLIC CAPITAL LETTER DZWE A684; C; A685; # CYRILLIC CAPITAL LETTER ZHWE A686; C; A687; # CYRILLIC CAPITAL LETTER CCHE A688; C; A689; # CYRILLIC CAPITAL LETTER DZZE A68A; C; A68B; # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK A68C; C; A68D; # CYRILLIC CAPITAL LETTER TWE A68E; C; A68F; # CYRILLIC CAPITAL LETTER TSWE A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE A694; C; A695; # CYRILLIC CAPITAL LETTER HWE A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE A698; C; A699; # CYRILLIC CAPITAL LETTER DOUBLE O A69A; C; A69B; # CYRILLIC CAPITAL LETTER CROSSED O A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN A726; C; A727; # LATIN CAPITAL LETTER HENG A728; C; A729; # LATIN CAPITAL LETTER TZ A72A; C; A72B; # LATIN CAPITAL LETTER TRESILLO A72C; C; A72D; # LATIN CAPITAL LETTER CUATRILLO A72E; C; A72F; # LATIN CAPITAL LETTER CUATRILLO WITH COMMA A732; C; A733; # LATIN CAPITAL LETTER AA A734; C; A735; # LATIN CAPITAL LETTER AO A736; C; A737; # LATIN CAPITAL LETTER AU A738; C; A739; # LATIN CAPITAL LETTER AV A73A; C; A73B; # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR A73C; C; A73D; # LATIN CAPITAL LETTER AY A73E; C; A73F; # LATIN CAPITAL LETTER REVERSED C WITH DOT A740; C; A741; # LATIN CAPITAL LETTER K WITH STROKE A742; C; A743; # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE A744; C; A745; # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE A746; C; A747; # LATIN CAPITAL LETTER BROKEN L A748; C; A749; # LATIN CAPITAL LETTER L WITH HIGH STROKE A74A; C; A74B; # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY A74C; C; A74D; # LATIN CAPITAL LETTER O WITH LOOP A74E; C; A74F; # LATIN CAPITAL LETTER OO A750; C; A751; # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER A752; C; A753; # LATIN CAPITAL LETTER P WITH FLOURISH A754; C; A755; # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL A756; C; A757; # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER A758; C; A759; # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE A75A; C; A75B; # LATIN CAPITAL LETTER R ROTUNDA A75C; C; A75D; # LATIN CAPITAL LETTER RUM ROTUNDA A75E; C; A75F; # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE A760; C; A761; # LATIN CAPITAL LETTER VY A762; C; A763; # LATIN CAPITAL LETTER VISIGOTHIC Z A764; C; A765; # LATIN CAPITAL LETTER THORN WITH STROKE A766; C; A767; # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER A768; C; A769; # LATIN CAPITAL LETTER VEND A76A; C; A76B; # LATIN CAPITAL LETTER ET A76C; C; A76D; # LATIN CAPITAL LETTER IS A76E; C; A76F; # LATIN CAPITAL LETTER CON A779; C; A77A; # LATIN CAPITAL LETTER INSULAR D A77B; C; A77C; # LATIN CAPITAL LETTER INSULAR F A77D; C; 1D79; # LATIN CAPITAL LETTER INSULAR G A77E; C; A77F; # LATIN CAPITAL LETTER TURNED INSULAR G A780; C; A781; # LATIN CAPITAL LETTER TURNED L A782; C; A783; # LATIN CAPITAL LETTER INSULAR R A784; C; A785; # LATIN CAPITAL LETTER INSULAR S A786; C; A787; # LATIN CAPITAL LETTER INSULAR T A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR A796; C; A797; # LATIN CAPITAL LETTER B WITH FLOURISH A798; C; A799; # LATIN CAPITAL LETTER F WITH STROKE A79A; C; A79B; # LATIN CAPITAL LETTER VOLAPUK AE A79C; C; A79D; # LATIN CAPITAL LETTER VOLAPUK OE A79E; C; A79F; # LATIN CAPITAL LETTER VOLAPUK UE A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL A7B3; C; AB53; # LATIN CAPITAL LETTER CHI A7B4; C; A7B5; # LATIN CAPITAL LETTER BETA A7B6; C; A7B7; # LATIN CAPITAL LETTER OMEGA AB70; C; 13A0; # CHEROKEE SMALL LETTER A AB71; C; 13A1; # CHEROKEE SMALL LETTER E AB72; C; 13A2; # CHEROKEE SMALL LETTER I AB73; C; 13A3; # CHEROKEE SMALL LETTER O AB74; C; 13A4; # CHEROKEE SMALL LETTER U AB75; C; 13A5; # CHEROKEE SMALL LETTER V AB76; C; 13A6; # CHEROKEE SMALL LETTER GA AB77; C; 13A7; # CHEROKEE SMALL LETTER KA AB78; C; 13A8; # CHEROKEE SMALL LETTER GE AB79; C; 13A9; # CHEROKEE SMALL LETTER GI AB7A; C; 13AA; # CHEROKEE SMALL LETTER GO AB7B; C; 13AB; # CHEROKEE SMALL LETTER GU AB7C; C; 13AC; # CHEROKEE SMALL LETTER GV AB7D; C; 13AD; # CHEROKEE SMALL LETTER HA AB7E; C; 13AE; # CHEROKEE SMALL LETTER HE AB7F; C; 13AF; # CHEROKEE SMALL LETTER HI AB80; C; 13B0; # CHEROKEE SMALL LETTER HO AB81; C; 13B1; # CHEROKEE SMALL LETTER HU AB82; C; 13B2; # CHEROKEE SMALL LETTER HV AB83; C; 13B3; # CHEROKEE SMALL LETTER LA AB84; C; 13B4; # CHEROKEE SMALL LETTER LE AB85; C; 13B5; # CHEROKEE SMALL LETTER LI AB86; C; 13B6; # CHEROKEE SMALL LETTER LO AB87; C; 13B7; # CHEROKEE SMALL LETTER LU AB88; C; 13B8; # CHEROKEE SMALL LETTER LV AB89; C; 13B9; # CHEROKEE SMALL LETTER MA AB8A; C; 13BA; # CHEROKEE SMALL LETTER ME AB8B; C; 13BB; # CHEROKEE SMALL LETTER MI AB8C; C; 13BC; # CHEROKEE SMALL LETTER MO AB8D; C; 13BD; # CHEROKEE SMALL LETTER MU AB8E; C; 13BE; # CHEROKEE SMALL LETTER NA AB8F; C; 13BF; # CHEROKEE SMALL LETTER HNA AB90; C; 13C0; # CHEROKEE SMALL LETTER NAH AB91; C; 13C1; # CHEROKEE SMALL LETTER NE AB92; C; 13C2; # CHEROKEE SMALL LETTER NI AB93; C; 13C3; # CHEROKEE SMALL LETTER NO AB94; C; 13C4; # CHEROKEE SMALL LETTER NU AB95; C; 13C5; # CHEROKEE SMALL LETTER NV AB96; C; 13C6; # CHEROKEE SMALL LETTER QUA AB97; C; 13C7; # CHEROKEE SMALL LETTER QUE AB98; C; 13C8; # CHEROKEE SMALL LETTER QUI AB99; C; 13C9; # CHEROKEE SMALL LETTER QUO AB9A; C; 13CA; # CHEROKEE SMALL LETTER QUU AB9B; C; 13CB; # CHEROKEE SMALL LETTER QUV AB9C; C; 13CC; # CHEROKEE SMALL LETTER SA AB9D; C; 13CD; # CHEROKEE SMALL LETTER S AB9E; C; 13CE; # CHEROKEE SMALL LETTER SE AB9F; C; 13CF; # CHEROKEE SMALL LETTER SI ABA0; C; 13D0; # CHEROKEE SMALL LETTER SO ABA1; C; 13D1; # CHEROKEE SMALL LETTER SU ABA2; C; 13D2; # CHEROKEE SMALL LETTER SV ABA3; C; 13D3; # CHEROKEE SMALL LETTER DA ABA4; C; 13D4; # CHEROKEE SMALL LETTER TA ABA5; C; 13D5; # CHEROKEE SMALL LETTER DE ABA6; C; 13D6; # CHEROKEE SMALL LETTER TE ABA7; C; 13D7; # CHEROKEE SMALL LETTER DI ABA8; C; 13D8; # CHEROKEE SMALL LETTER TI ABA9; C; 13D9; # CHEROKEE SMALL LETTER DO ABAA; C; 13DA; # CHEROKEE SMALL LETTER DU ABAB; C; 13DB; # CHEROKEE SMALL LETTER DV ABAC; C; 13DC; # CHEROKEE SMALL LETTER DLA ABAD; C; 13DD; # CHEROKEE SMALL LETTER TLA ABAE; C; 13DE; # CHEROKEE SMALL LETTER TLE ABAF; C; 13DF; # CHEROKEE SMALL LETTER TLI ABB0; C; 13E0; # CHEROKEE SMALL LETTER TLO ABB1; C; 13E1; # CHEROKEE SMALL LETTER TLU ABB2; C; 13E2; # CHEROKEE SMALL LETTER TLV ABB3; C; 13E3; # CHEROKEE SMALL LETTER TSA ABB4; C; 13E4; # CHEROKEE SMALL LETTER TSE ABB5; C; 13E5; # CHEROKEE SMALL LETTER TSI ABB6; C; 13E6; # CHEROKEE SMALL LETTER TSO ABB7; C; 13E7; # CHEROKEE SMALL LETTER TSU ABB8; C; 13E8; # CHEROKEE SMALL LETTER TSV ABB9; C; 13E9; # CHEROKEE SMALL LETTER WA ABBA; C; 13EA; # CHEROKEE SMALL LETTER WE ABBB; C; 13EB; # CHEROKEE SMALL LETTER WI ABBC; C; 13EC; # CHEROKEE SMALL LETTER WO ABBD; C; 13ED; # CHEROKEE SMALL LETTER WU ABBE; C; 13EE; # CHEROKEE SMALL LETTER WV ABBF; C; 13EF; # CHEROKEE SMALL LETTER YA FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z 10400; C; 10428; # DESERET CAPITAL LETTER LONG I 10401; C; 10429; # DESERET CAPITAL LETTER LONG E 10402; C; 1042A; # DESERET CAPITAL LETTER LONG A 10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH 10404; C; 1042C; # DESERET CAPITAL LETTER LONG O 10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO 10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I 10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E 10408; C; 10430; # DESERET CAPITAL LETTER SHORT A 10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH 1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O 1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO 1040C; C; 10434; # DESERET CAPITAL LETTER AY 1040D; C; 10435; # DESERET CAPITAL LETTER OW 1040E; C; 10436; # DESERET CAPITAL LETTER WU 1040F; C; 10437; # DESERET CAPITAL LETTER YEE 10410; C; 10438; # DESERET CAPITAL LETTER H 10411; C; 10439; # DESERET CAPITAL LETTER PEE 10412; C; 1043A; # DESERET CAPITAL LETTER BEE 10413; C; 1043B; # DESERET CAPITAL LETTER TEE 10414; C; 1043C; # DESERET CAPITAL LETTER DEE 10415; C; 1043D; # DESERET CAPITAL LETTER CHEE 10416; C; 1043E; # DESERET CAPITAL LETTER JEE 10417; C; 1043F; # DESERET CAPITAL LETTER KAY 10418; C; 10440; # DESERET CAPITAL LETTER GAY 10419; C; 10441; # DESERET CAPITAL LETTER EF 1041A; C; 10442; # DESERET CAPITAL LETTER VEE 1041B; C; 10443; # DESERET CAPITAL LETTER ETH 1041C; C; 10444; # DESERET CAPITAL LETTER THEE 1041D; C; 10445; # DESERET CAPITAL LETTER ES 1041E; C; 10446; # DESERET CAPITAL LETTER ZEE 1041F; C; 10447; # DESERET CAPITAL LETTER ESH 10420; C; 10448; # DESERET CAPITAL LETTER ZHEE 10421; C; 10449; # DESERET CAPITAL LETTER ER 10422; C; 1044A; # DESERET CAPITAL LETTER EL 10423; C; 1044B; # DESERET CAPITAL LETTER EM 10424; C; 1044C; # DESERET CAPITAL LETTER EN 10425; C; 1044D; # DESERET CAPITAL LETTER ENG 10426; C; 1044E; # DESERET CAPITAL LETTER OI 10427; C; 1044F; # DESERET CAPITAL LETTER EW 10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A 10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA 10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB 10C83; C; 10CC3; # OLD HUNGARIAN CAPITAL LETTER AMB 10C84; C; 10CC4; # OLD HUNGARIAN CAPITAL LETTER EC 10C85; C; 10CC5; # OLD HUNGARIAN CAPITAL LETTER ENC 10C86; C; 10CC6; # OLD HUNGARIAN CAPITAL LETTER ECS 10C87; C; 10CC7; # OLD HUNGARIAN CAPITAL LETTER ED 10C88; C; 10CC8; # OLD HUNGARIAN CAPITAL LETTER AND 10C89; C; 10CC9; # OLD HUNGARIAN CAPITAL LETTER E 10C8A; C; 10CCA; # OLD HUNGARIAN CAPITAL LETTER CLOSE E 10C8B; C; 10CCB; # OLD HUNGARIAN CAPITAL LETTER EE 10C8C; C; 10CCC; # OLD HUNGARIAN CAPITAL LETTER EF 10C8D; C; 10CCD; # OLD HUNGARIAN CAPITAL LETTER EG 10C8E; C; 10CCE; # OLD HUNGARIAN CAPITAL LETTER EGY 10C8F; C; 10CCF; # OLD HUNGARIAN CAPITAL LETTER EH 10C90; C; 10CD0; # OLD HUNGARIAN CAPITAL LETTER I 10C91; C; 10CD1; # OLD HUNGARIAN CAPITAL LETTER II 10C92; C; 10CD2; # OLD HUNGARIAN CAPITAL LETTER EJ 10C93; C; 10CD3; # OLD HUNGARIAN CAPITAL LETTER EK 10C94; C; 10CD4; # OLD HUNGARIAN CAPITAL LETTER AK 10C95; C; 10CD5; # OLD HUNGARIAN CAPITAL LETTER UNK 10C96; C; 10CD6; # OLD HUNGARIAN CAPITAL LETTER EL 10C97; C; 10CD7; # OLD HUNGARIAN CAPITAL LETTER ELY 10C98; C; 10CD8; # OLD HUNGARIAN CAPITAL LETTER EM 10C99; C; 10CD9; # OLD HUNGARIAN CAPITAL LETTER EN 10C9A; C; 10CDA; # OLD HUNGARIAN CAPITAL LETTER ENY 10C9B; C; 10CDB; # OLD HUNGARIAN CAPITAL LETTER O 10C9C; C; 10CDC; # OLD HUNGARIAN CAPITAL LETTER OO 10C9D; C; 10CDD; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE 10C9E; C; 10CDE; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE 10C9F; C; 10CDF; # OLD HUNGARIAN CAPITAL LETTER OEE 10CA0; C; 10CE0; # OLD HUNGARIAN CAPITAL LETTER EP 10CA1; C; 10CE1; # OLD HUNGARIAN CAPITAL LETTER EMP 10CA2; C; 10CE2; # OLD HUNGARIAN CAPITAL LETTER ER 10CA3; C; 10CE3; # OLD HUNGARIAN CAPITAL LETTER SHORT ER 10CA4; C; 10CE4; # OLD HUNGARIAN CAPITAL LETTER ES 10CA5; C; 10CE5; # OLD HUNGARIAN CAPITAL LETTER ESZ 10CA6; C; 10CE6; # OLD HUNGARIAN CAPITAL LETTER ET 10CA7; C; 10CE7; # OLD HUNGARIAN CAPITAL LETTER ENT 10CA8; C; 10CE8; # OLD HUNGARIAN CAPITAL LETTER ETY 10CA9; C; 10CE9; # OLD HUNGARIAN CAPITAL LETTER ECH 10CAA; C; 10CEA; # OLD HUNGARIAN CAPITAL LETTER U 10CAB; C; 10CEB; # OLD HUNGARIAN CAPITAL LETTER UU 10CAC; C; 10CEC; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE 10CAD; C; 10CED; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE 10CAE; C; 10CEE; # OLD HUNGARIAN CAPITAL LETTER EV 10CAF; C; 10CEF; # OLD HUNGARIAN CAPITAL LETTER EZ 10CB0; C; 10CF0; # OLD HUNGARIAN CAPITAL LETTER EZS 10CB1; C; 10CF1; # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN 10CB2; C; 10CF2; # OLD HUNGARIAN CAPITAL LETTER US 118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA 118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A 118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI 118A3; C; 118C3; # WARANG CITI CAPITAL LETTER YU 118A4; C; 118C4; # WARANG CITI CAPITAL LETTER YA 118A5; C; 118C5; # WARANG CITI CAPITAL LETTER YO 118A6; C; 118C6; # WARANG CITI CAPITAL LETTER II 118A7; C; 118C7; # WARANG CITI CAPITAL LETTER UU 118A8; C; 118C8; # WARANG CITI CAPITAL LETTER E 118A9; C; 118C9; # WARANG CITI CAPITAL LETTER O 118AA; C; 118CA; # WARANG CITI CAPITAL LETTER ANG 118AB; C; 118CB; # WARANG CITI CAPITAL LETTER GA 118AC; C; 118CC; # WARANG CITI CAPITAL LETTER KO 118AD; C; 118CD; # WARANG CITI CAPITAL LETTER ENY 118AE; C; 118CE; # WARANG CITI CAPITAL LETTER YUJ 118AF; C; 118CF; # WARANG CITI CAPITAL LETTER UC 118B0; C; 118D0; # WARANG CITI CAPITAL LETTER ENN 118B1; C; 118D1; # WARANG CITI CAPITAL LETTER ODD 118B2; C; 118D2; # WARANG CITI CAPITAL LETTER TTE 118B3; C; 118D3; # WARANG CITI CAPITAL LETTER NUNG 118B4; C; 118D4; # WARANG CITI CAPITAL LETTER DA 118B5; C; 118D5; # WARANG CITI CAPITAL LETTER AT 118B6; C; 118D6; # WARANG CITI CAPITAL LETTER AM 118B7; C; 118D7; # WARANG CITI CAPITAL LETTER BU 118B8; C; 118D8; # WARANG CITI CAPITAL LETTER PU 118B9; C; 118D9; # WARANG CITI CAPITAL LETTER HIYO 118BA; C; 118DA; # WARANG CITI CAPITAL LETTER HOLO 118BB; C; 118DB; # WARANG CITI CAPITAL LETTER HORR 118BC; C; 118DC; # WARANG CITI CAPITAL LETTER HAR 118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU 118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII 118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO " type CharMapping = | CommonMapping of int | SimpleMapping of int | TurkishMapping of int | FullMapping1 of int | FullMapping2 of int*int | FullMapping3 of int*int*int let hex2int c = (int c &&& 15) + (int c >>> 6)*9 // hex char to int let pCodePoint = manyMinMaxSatisfyL 4 5 isHex "codepoint with 4-5 hex digits" |>> fun s -> let mutable n = 0 for i = 0 to s.Length - 1 do n <- n*16 + hex2int s.[i] n let semi = skipString "; " let space = skipChar ' ' let pCharMapping = pipe3 pCodePoint (semi >>. anyChar) (semi >>. sepBy pCodePoint space .>> (semi >>. skipRestOfLine true)) (fun fromChar c toChars -> match c with | 'C' -> match toChars with | [n0] -> (fromChar, CommonMapping n0) | 'S' -> match toChars with | [n0] -> (fromChar, SimpleMapping n0) | 'T' -> match toChars with | [n0] -> (fromChar, TurkishMapping n0) | 'F' -> match toChars with | [n0] -> (fromChar, FullMapping1(n0)) | [n0; n1] -> (fromChar, FullMapping2(n0, n1)) | [n0; n1; n2] -> (fromChar, FullMapping3(n0, n1, n2))) let pAllMappings = many pCharMapping .>> eof let parseMappings() = match run pAllMappings datastr with | Success(xs, _,_) -> xs | Failure(msg,_,_) -> failwith msg let getOneToOneMappings() = parseMappings() |> List.choose (function (src, CommonMapping(dst)) | (src, SimpleMapping(dst)) when src < 0xffff -> Some (char src, char dst) | _ -> None) let getOneToOneMappingsAsStrings() = let pairs = getOneToOneMappings() let sb = new System.Text.StringBuilder() for c1, c2 in pairs do let c1s, c2s = (int c1).ToString("X4"), (int c2).ToString("X4") sb.Append("\u").Append(c1s).Append("\u").Append(c2s) |> ignore sb.ToString() let writeOneToOneMappingsToFile(path) = use file = new System.IO.StreamWriter(path, false, System.Text.Encoding.UTF8) let one2ones = getOneToOneMappingsAsStrings() file.WriteLine(one2ones) file.Close() writeOneToOneMappingsToFile(@"c:\temp\one2onemappings.txt") */ ================================================ FILE: FParsecCS/CharSet.cs ================================================ // Copyright (c) Stephan Tolksdorf 2008-2010 // License: Simplified BSD License. See accompanying documentation. using System; using System.Diagnostics; namespace FParsec { #if !LOW_TRUST unsafe #endif internal sealed class CharSet { private const int WordSize = 32; private const int Log2WordSize = 5; private int Min; private int Max; private int BitTableMin; private int[] BitTable; private string CharsNotInBitTable; // We use a string here instead of a char[] because the // .NET JITs tend to produce better code for loops involving strings. public CharSet(string chars) : this(chars, 32) {} // because of mandatory bounds checking, we wouldn't get any advantage from a fixed size table public CharSet(string chars, int maxTableSize) { if (chars.Length == 0) { BitTableMin = Min = 0x10000; Max = -1; BitTable = new int[0]; // charsNotInTable = null; return; } if (maxTableSize < 4) maxTableSize = 4; else if (maxTableSize > 0x10000/WordSize) maxTableSize = 0x10000/WordSize; int maxTableBits = maxTableSize*WordSize; char prevChar = chars[0]; Min = prevChar; Max = prevChar; BitTableMin = -1; int bitTableMax = -1; int nCharsNotInTable = 0; for (int i = 1; i < chars.Length; ++i) { char c = chars[i]; if (c == prevChar) continue; // filter out repeated chars prevChar = c; int prevMin = Min; if (c < Min) Min = c; int prevMax = Max; if (c > Max) Max = c; if (BitTableMin < 0) { // the first time the table range is exceeded the tableMin is set if (Max - Min >= maxTableBits) { BitTableMin = prevMin; // stays fixed bitTableMax = prevMax; // will be updated later nCharsNotInTable = 1; } } else if (c < BitTableMin || c >= BitTableMin + maxTableBits) { ++nCharsNotInTable; } else { bitTableMax = Math.Max(c, bitTableMax); } } if (BitTableMin < 0) { BitTableMin = Min; bitTableMax = Max; } int tableSize = bitTableMax - BitTableMin + 1 < maxTableBits ? (bitTableMax - BitTableMin + 1)/WordSize + ((bitTableMax - BitTableMin + 1)%WordSize != 0 ? 1 : 0) : maxTableSize; BitTable = new int[tableSize]; #if LOW_TRUST var notInTable = nCharsNotInTable > 0 ? new char[nCharsNotInTable] : null; #else CharsNotInBitTable = nCharsNotInTable > 0 ? new string('\u0000', nCharsNotInTable) : ""; fixed (char* notInTable = CharsNotInBitTable) { #endif prevChar = chars[0] != 'x' ? 'x' : 'y'; int n = 0; for (int i = 0; i < chars.Length; ++i) { char c = chars[i]; if (c == prevChar) continue; prevChar = c; int off = c - BitTableMin; int idx = off >> Log2WordSize; if (unchecked((uint)idx) < (uint)BitTable.Length) { BitTable[idx] |= 1 << off; // we don't need to mask off because C#'s operator<< does that for us } else { notInTable[n++] = c; } } Debug.Assert(n == nCharsNotInTable); #if !LOW_TRUST } #else if (nCharsNotInTable > 0) CharsNotInBitTable = new string(notInTable); #endif } public bool Contains(char value) { int off = value - BitTableMin; int idx = off >> Log2WordSize; if (unchecked((uint)idx) < (uint)BitTable.Length) { return ((BitTable[idx] >> off) & 1) != 0; // we don't need to mask off because C#'s operator>> does that for us } if (CharsNotInBitTable == null) return false; if (value >= Min && value <= Max) { foreach (char c in CharsNotInBitTable) { if (c == value) goto ReturnTrue; } } return false; ReturnTrue: return true; } } } ================================================ FILE: FParsecCS/CharStream.cs ================================================ // Copyright (c) Stephan Tolksdorf 2007-2012 // License: Simplified BSD License. See accompanying documentation. #if !LOW_TRUST using System; using System.IO; using System.Collections.Generic; using System.Text; using System.Text.RegularExpressions; using System.Diagnostics; using System.Reflection; using System.Runtime.Serialization; using System.Runtime.InteropServices; using System.Runtime.CompilerServices; using Microsoft.FSharp.Core; using FParsec.Cloning; namespace FParsec { /// An opaque representation of a CharStream index. public unsafe struct CharStreamIndexToken { #if DEBUG internal readonly CharStream CharStream; private long Index { get { return GetIndex(CharStream); } } #endif internal readonly char* Ptr; private readonly int BlockPlus1; /// Returns -1 if the IndexToken was zero-initialized. internal int Block { get { return unchecked(BlockPlus1 - 1); } } internal CharStreamIndexToken( #if DEBUG CharStream charStream, #endif char* ptr, int block) { #if DEBUG CharStream = charStream; #endif Ptr = ptr; BlockPlus1 = unchecked(block + 1); } private static void ThrowInvalidIndexToken() { throw new InvalidOperationException("The CharStreamIndexToken is invalid."); } public long GetIndex(CharStream charStreamFromWhichIndexTokenWasRetrieved) { int block = Block; if (block < 0) ThrowInvalidIndexToken(); // tests for a zero-initialized IndexToken #if DEBUG Debug.Assert(CharStream == charStreamFromWhichIndexTokenWasRetrieved); #endif return charStreamFromWhichIndexTokenWasRetrieved.GetIndex(Ptr, block); } } public struct TwoChars : IEquatable { private uint Chars; internal TwoChars(uint chars) { Chars = chars; } public TwoChars(char char0, char char1) { Chars = ((uint)char1 << 16) | (uint)char0; } public char Char0 { get { return unchecked((char)Chars); } } public char Char1 { get { return (char)(Chars >> 16); } } public override bool Equals(object obj) { return (obj is TwoChars) && Chars == ((TwoChars) obj).Chars; } public bool Equals(TwoChars other) { return Chars == other.Chars; } public override int GetHashCode() { return unchecked((int)Chars); } public static bool operator==(TwoChars left, TwoChars right) { return left.Chars == right.Chars; } public static bool operator!=(TwoChars left, TwoChars right) { return left.Chars != right.Chars; } } /// Provides read‐access to a sequence of UTF‐16 chars. public unsafe class CharStream : IDisposable { // In order to facilitate efficient backtracking we divide the stream into overlapping // blocks with equal number of chars. The blocks are overlapping, so that // backtracking over short distances at a block boundary doesn't trigger a reread of the // previous block. // // Block 0 // // -----------------|-------- Block 1 // Overlap // --------|--------|-------- Block 2 // Overlap // --------|--------|-------- // (...) // a '-' symbolizes a char, a '|' a block boundary. // // // In general there's no fixed relationship between the number of input bytes and the // number of input chars. Worse, the encoding can be stateful, which makes it necessary // to persist the decoder state over block boundaries. If we later want to // be able to reread a certain block, we therefore need to keep record of various // bits of information describing the state of the input stream at the beginning of a block: private class BlockInfo { /// the byte stream index of the first char in the block after the OverhangCharsAtBlockBegin public long ByteIndex; /// the value of the CharStream's ByteBufferIndex before the block is read public int ByteBufferIndex; /// the number of bytes in the stream from ByteIndex to the first char after the OverhangCharsAfterOverlap public int NumberOfBytesInOverlap; /// the last char in the overlap with the previous block (used for integrity checking) public char LastCharInOverlap; /// chars at the block begin that were already read together with chars of the last block before the overlap public string OverhangCharsAtBlockBegin; /// chars after the overlap with the previous block that were already read together with the overlap chars public string OverhangCharsAfterOverlap; // Unfortunately the Decoder API has no explicit methods for managing the state, // which forces us to use the comparatively inefficient serialization API // (via FParsec.Cloning) for this purpose. // The absence of explicit state management or at least a cloning method in the // Decoder interface is almost as puzzling to me as the absence of such methods // in System.Random. public CloneImage DecoderImageAtBlockBegin; public CloneImage DecoderImageAfterOverlap; public BlockInfo(long byteIndex, int byteBufferIndex, int nBytesInOverlapCount, char lastCharInOverlap, string overhangCharsAtBlockBegin, CloneImage decoderImageAtBlockBegin, string overhangCharsAfterOverlap, CloneImage decoderImageAfterOverlap) { ByteIndex = byteIndex; ByteBufferIndex = byteBufferIndex; NumberOfBytesInOverlap = nBytesInOverlapCount; LastCharInOverlap = lastCharInOverlap; OverhangCharsAtBlockBegin = overhangCharsAtBlockBegin; OverhangCharsAfterOverlap = overhangCharsAfterOverlap; DecoderImageAtBlockBegin = decoderImageAtBlockBegin; DecoderImageAfterOverlap = decoderImageAfterOverlap; } } private const int DefaultBlockSize = 3*(1 << 16); // 3*2^16 = 200k private const int DefaultByteBufferLength = (1 << 12); private static int MinimumByteBufferLength = 128; // must be larger than longest detectable preamble (we can only guess here) private const char EOS = '\uFFFF'; public const char EndOfStreamChar = EOS; /// Points to the current char in Buffer, /// or is null if the end of the stream has been reached. internal char* Ptr; /// Equals Ptr == null ? null : BufferBegin. internal char* PtrBegin; /// Equals Ptr == null ? null : BufferEnd. internal char* PtrEnd; /// Begin of the used part of the char buffer. Is constant. Is null if the CharStream is empty. internal char* BufferBegin; /// End of the used part of the char buffer. Varies for a multi-block stream. Is null if the CharStream is empty. internal char* BufferEnd; /// The block currently loaded in the buffer. internal int Block; /// Any CharStream method or property setter increments this value when it changes the CharStream state. /// Backtracking to an old state also restores the old value of the StateTag. public #if SMALL_STATETAG int #else long #endif StateTag; internal long IndexOfFirstCharInBlock; internal long _IndexOfFirstChar; /// The index of the first char in the stream. public long IndexOfFirstChar { get { return _IndexOfFirstChar; } } internal long _Line; /// The line number for the next char. (The line count starts with 1.) public long Line { get { return _Line; } } public void SetLine_WithoutCheckAndWithoutIncrementingTheStateTag(long line) { _Line = line; } internal long _LineBegin; /// The stream index of the first char of the line that also contains the next char. public long LineBegin { get { return _LineBegin; } } public void SetLineBegin_WithoutCheckAndWithoutIncrementingTheStateTag(long lineBegin) { _LineBegin = lineBegin; } /// The UTF‐16 column number of the next char, i.e. Index ‐ LineBegin + 1. public long Column { get { return Index - LineBegin + 1; } } internal string _Name; public string Name { get { return _Name; } set { _Name = value; ++StateTag; } } /// The Encoding that is used for decoding the underlying byte stream, or /// System.Text.UnicodeEncoding in case the stream was directly constructed /// from a string or char buffer. public Encoding Encoding { get; private set; } // If the CharStream is constructed from a binary stream, we use a managed string as the char // buffer. This allows us to apply regular expressions directly to the input. // In the case of multi-block CharStreams we thus have to mutate the buffer string through pointers. // This is safe as long as we use a newly constructed string and we don't pass a reference // to the internal buffer string to the "outside world". (The one instance where we have to pass // a reference to the buffer string is regex matching. See the docs for Match(regex) for more info.) // // Apart from Match(regex) we access the internal buffer only through a pinned pointer. // This way we avoid the overhead of redundant bounds checking and can support strings, char arrays // and unmanaged char buffers through the same interface. // // Pinning a string or char array makes life more difficult for the GC. However, as long as // the buffer is only short-lived or large enough to be allocated on the large object heap, // there shouldn't be a problem. Furthermore, the buffer strings for CharStreams constructed // from a binary stream are allocated through the StringBuffer interface and hence always live // on the large object heap. Thus, the only scenario to really worry about (and which the // documentation explicitly warns about) is when a large number of small CharStreams // are constructed directly from strings or char arrays and are used for an extended period of time. /// The string holding the char buffer, or null if the buffer is not part of a .NET string. internal string BufferString; /// A pointer to the beginning of BufferString, or null if BufferString is null. internal char* BufferStringPointer; /// Holds the GCHandle for CharStreams directly constructed from strings or char arrays. private GCHandle BufferHandle; /// Holds the StringBuffer for CharStreams constructed from a binary stream. private StringBuffer StringBuffer; #if DEBUG internal FSharpRef SubstreamCount = new FSharpRef(0); internal FSharpRef ParentSubstreamCount = null; #endif private MultiBlockData BlockData; internal bool IsSingleBlockStream { get { return BlockData == null; } } /// Contains the data and methods needed in case the input byte stream /// is large enough to span multiple blocks of the CharStream. private partial class MultiBlockData { public CharStream CharStream; public long IndexOfLastCharPlus1; /// The index of the last block of the stream, or Int32.MaxValue if the end of stream has not yet been detected. public int LastBlock; public Stream Stream; // we keep a separate record of the Stream.Position, so that we don't need to require Stream.CanSeek public long StreamPosition; // we use StreamLength to avoid calling Read() again on a non-seekable stream after it returned 0 once (see ticket #23) public long StreamLength; public bool LeaveOpen; public int MaxCharCountForOneByte; public Decoder Decoder; public bool DecoderIsSerializable; public int BlockSize; public int BlockOverlap; /// BufferBegin + BlockSize - minRegexSpace public char* RegexSpaceThreshold; /// The byte stream index of the first unused byte in the ByteBuffer. public long ByteIndex { get { return StreamPosition - (ByteBufferCount - ByteBufferIndex); } } public List Blocks; public byte[] ByteBuffer; public int ByteBufferIndex; public int ByteBufferCount; } public long IndexOfLastCharPlus1 { get { return BlockData != null ? BlockData.IndexOfLastCharPlus1 : IndexOfFirstChar + Buffer.PositiveDistance(BufferBegin, BufferEnd); } } public int BlockOverlap { get { return BlockData == null ? 0 : BlockData.BlockOverlap; } } public int MinRegexSpace { get { return BlockData == null ? 0 : (int)Buffer.PositiveDistance(BlockData.RegexSpaceThreshold, BufferBegin + BlockData.BlockSize); } set { if (BlockData != null) { if (value < 0 || value > BlockData.BlockOverlap) throw new ArgumentOutOfRangeException("value", "The MinRegexSpace value must be non-negative and not greater than the BlockOverlap."); BlockData.RegexSpaceThreshold = BufferBegin + BlockData.BlockSize - value; } } } public bool IsBeginOfStream { get { return Ptr == BufferBegin && Block == 0; } } public bool IsEndOfStream { get { return Ptr == null; } } public long Index { #if AGGRESSIVE_INLINING [MethodImpl(MethodImplOptions.AggressiveInlining)] #endif get { if (Ptr != null) { Debug.Assert(BufferBegin <= Ptr && Ptr < BufferEnd); if (sizeof(System.IntPtr) != 8) // the JIT removes the inactive branch return Buffer.PositiveDistance(PtrBegin, Ptr) + IndexOfFirstCharInBlock; else return Buffer.PositiveDistance64(PtrBegin, Ptr) + IndexOfFirstCharInBlock; } Debug.Assert(BlockData == null || BlockData.IndexOfLastCharPlus1 != Int64.MaxValue); return IndexOfLastCharPlus1; } } internal long GetIndex(char* ptr, int block) { if (ptr != null) { if (block == Block) { Debug.Assert(BufferBegin <= ptr && ptr < BufferEnd); if (sizeof(System.IntPtr) != 8) return Buffer.PositiveDistance(BufferBegin, ptr) + IndexOfFirstCharInBlock; else return Buffer.PositiveDistance64(BufferBegin, ptr) + IndexOfFirstCharInBlock; } else { Debug.Assert(BlockData != null && BufferBegin <= ptr && ptr < BufferBegin + BlockData.BlockSize); int blockSizeMinusOverlap = BlockData.BlockSize - BlockData.BlockOverlap; long indexOfBlockBegin = IndexOfFirstChar + Math.BigMul(block, blockSizeMinusOverlap); if (sizeof(System.IntPtr) != 8) return Buffer.PositiveDistance(BufferBegin, ptr) + indexOfBlockBegin; else return Buffer.PositiveDistance64(BufferBegin, ptr) + indexOfBlockBegin; } } Debug.Assert(BlockData == null || BlockData.IndexOfLastCharPlus1 != Int64.MaxValue); return IndexOfLastCharPlus1; } [DebuggerBrowsable(DebuggerBrowsableState.Never)] public Position Position { get { long index = Index; return new Position(_Name, index, Line, index - LineBegin + 1); } } // we don't have a public constructor that only takes a string to avoid potential confusion with a filepath constructor internal CharStream(string chars) { Debug.Assert(chars != null); BufferString = chars; BufferHandle = GCHandle.Alloc(chars, GCHandleType.Pinned); char* bufferBegin = (char*)BufferHandle.AddrOfPinnedObject(); BufferStringPointer = bufferBegin; CharConstructorContinue(bufferBegin, chars.Length); } public CharStream(string chars, int index, int length) : this(chars, index, length, 0) {} public CharStream(string chars, int index, int length, long streamIndexOffset) { if (chars == null) throw new ArgumentNullException("chars"); if (index < 0) throw new ArgumentOutOfRangeException("index", "index is negative."); if (length < 0 || length > chars.Length - index) throw new ArgumentOutOfRangeException("length", "index or length is out of range."); if (streamIndexOffset < 0 || streamIndexOffset >= (1L << 60)) throw new ArgumentOutOfRangeException("streamIndexOffset", "streamIndexOffset must be non-negative and less than 2^60."); IndexOfFirstCharInBlock = streamIndexOffset; _IndexOfFirstChar = streamIndexOffset; _LineBegin = streamIndexOffset; BufferString = chars; BufferHandle = GCHandle.Alloc(chars, GCHandleType.Pinned); char* pBufferString = (char*)BufferHandle.AddrOfPinnedObject(); BufferStringPointer = pBufferString; CharConstructorContinue(pBufferString + index, length); } public CharStream(char[] chars, int index, int length) : this(chars, index, length, 0) { } public CharStream(char[] chars, int index, int length, long streamIndexOffset) { if (chars == null) throw new ArgumentNullException("chars"); if (index < 0) throw new ArgumentOutOfRangeException("index", "index is negative."); if (length < 0 || length > chars.Length - index) throw new ArgumentOutOfRangeException("length", "index or length is out of range."); if (streamIndexOffset < 0 || streamIndexOffset >= (1L << 60)) throw new ArgumentOutOfRangeException("streamIndexOffset", "streamIndexOffset must be non-negative and less than 2^60."); IndexOfFirstCharInBlock = streamIndexOffset; _IndexOfFirstChar = streamIndexOffset; _LineBegin = streamIndexOffset; BufferHandle = GCHandle.Alloc(chars, GCHandleType.Pinned); char* bufferBegin = (char*)BufferHandle.AddrOfPinnedObject() + index; if (bufferBegin < unchecked(bufferBegin + length + 1)) { // a pedantic check ... CharConstructorContinue(bufferBegin, length); } else { // ... for a purely theoretic case BufferHandle.Free(); throw new ArgumentOutOfRangeException("length", "The char array may not be allocated directly below the end of the address space."); } } public CharStream(char* chars, int length) : this(chars, length, 0) {} public CharStream(char* chars, int length, long streamIndexOffset) { if (chars == null) throw new ArgumentNullException("chars"); if (length < 0) throw new ArgumentOutOfRangeException("length", "length is negative."); if (chars >= unchecked(chars + length + 1)) // chars + length + 1 must not overflow (the + 1 is needed for some methods below) throw new ArgumentOutOfRangeException("length", "length is too large."); if (streamIndexOffset < 0 || streamIndexOffset >= (1L << 60)) throw new ArgumentOutOfRangeException("streamIndexOffset", "streamIndexOffset must be non-negative and less than 2^60."); IndexOfFirstCharInBlock = streamIndexOffset; _IndexOfFirstChar = streamIndexOffset; _LineBegin = streamIndexOffset; CharConstructorContinue(chars, length); } private void CharConstructorContinue(char* bufferBegin, int length) { Debug.Assert((bufferBegin != null || length == 0) && length >= 0 && bufferBegin < unchecked(bufferBegin + length + 1)); // the + 1 is needed for some methods below if (length != 0) { BufferBegin = bufferBegin; BufferEnd = bufferBegin + length; Ptr = bufferBegin; PtrBegin = bufferBegin; PtrEnd = BufferEnd; } _Line = 1; Encoding = Encoding.Unicode; } internal CharStream(string chars, char* pChars, char* begin, int length) { Debug.Assert((chars == null ? pChars == null : pChars <= begin && length >= 0 && (int)Buffer.PositiveDistance(pChars, begin) <= chars.Length - length) && (begin == null ? length == 0 : length >= 0 && begin < unchecked(begin + length + 1))); BufferString = chars; BufferStringPointer = pChars; if (length != 0) { BufferBegin = begin; BufferEnd = begin + length; Ptr = begin; PtrBegin = begin; PtrEnd = BufferEnd; } _Line = 1; Encoding = Encoding.Unicode; } public CharStream(string path, Encoding encoding) : this(path, encoding, true, DefaultBlockSize, DefaultBlockSize/3, DefaultByteBufferLength) { } public CharStream(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks) : this(path, encoding, detectEncodingFromByteOrderMarks, DefaultBlockSize, DefaultBlockSize/3, DefaultByteBufferLength) { } public CharStream(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int blockSize, int blockOverlap, int byteBufferLength) { if (encoding == null) throw new ArgumentNullException("encoding"); var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, FileOptions.SequentialScan); try { StreamConstructorContinue(stream, false, encoding, detectEncodingFromByteOrderMarks, blockSize, blockOverlap, byteBufferLength); _Name = path; } catch { stream.Dispose(); throw; } } public CharStream(Stream stream, Encoding encoding) : this(stream, false, encoding, true, DefaultBlockSize, DefaultBlockSize/3, DefaultByteBufferLength) { } public CharStream(Stream stream, bool leaveOpen, Encoding encoding) : this(stream, leaveOpen, encoding, true, DefaultBlockSize, DefaultBlockSize/3, DefaultByteBufferLength) { } public CharStream(Stream stream, bool leaveOpen, Encoding encoding, bool detectEncodingFromByteOrderMarks) : this(stream, leaveOpen, encoding, detectEncodingFromByteOrderMarks, DefaultBlockSize, DefaultBlockSize/3, DefaultByteBufferLength) { } public CharStream(Stream stream, bool leaveOpen, Encoding encoding, bool detectEncodingFromByteOrderMarks, int blockSize, int blockOverlap, int byteBufferLength) { if (stream == null) throw new ArgumentNullException("stream"); if (!stream.CanRead) throw new ArgumentException("stream is not readable"); if (encoding == null) throw new ArgumentNullException("encoding"); StreamConstructorContinue(stream, leaveOpen, encoding, detectEncodingFromByteOrderMarks, blockSize, blockOverlap, byteBufferLength); } /// we modify this flag via reflection in the unit test private static bool DoNotRoundUpBlockSizeToSimplifyTesting = false; private void StreamConstructorContinue(Stream stream, bool leaveOpen, Encoding encoding, bool detectEncodingFromByteOrderMarks, int blockSize, int blockOverlap, int byteBufferLength) { if (byteBufferLength < MinimumByteBufferLength) byteBufferLength = MinimumByteBufferLength; int remainingBytesCount = -1; long streamPosition; long streamLength; if (stream.CanSeek) { streamPosition = stream.Position; streamLength = stream.Length; long remainingBytesCount64 = streamLength - streamPosition; if (remainingBytesCount64 <= Int32.MaxValue) { remainingBytesCount = (int)remainingBytesCount64; if (remainingBytesCount < byteBufferLength) byteBufferLength = remainingBytesCount; } } else { streamPosition = 0; streamLength = Int64.MaxValue; } byte[] byteBuffer = new byte[byteBufferLength]; int byteBufferCount = 0; do { int n = stream.Read(byteBuffer, byteBufferCount, byteBufferLength - byteBufferCount); if (n == 0) { remainingBytesCount = byteBufferCount; Debug.Assert(!stream.CanSeek || streamPosition + byteBufferCount == streamLength); streamLength = streamPosition + byteBufferCount; break; } byteBufferCount += n; } while (byteBufferCount < MinimumByteBufferLength); streamPosition += byteBufferCount; int preambleLength = Text.DetectPreamble(byteBuffer, byteBufferCount, ref encoding, detectEncodingFromByteOrderMarks); remainingBytesCount -= preambleLength; _Line = 1; Encoding = encoding; // we allow such small block sizes only to simplify testing if (blockSize < 8) blockSize = DefaultBlockSize; bool allCharsFitIntoOneBlock = false; if (remainingBytesCount >= 0 && remainingBytesCount/4 <= blockSize) { if (remainingBytesCount != 0) { try { int maxCharCount = Encoding.GetMaxCharCount(remainingBytesCount); // may throw ArgumentOutOfRangeException if (blockSize >= maxCharCount) { allCharsFitIntoOneBlock = true; blockSize = maxCharCount; } } catch (ArgumentOutOfRangeException) { } } else { allCharsFitIntoOneBlock = true; blockSize = 0; } } var buffer = StringBuffer.Create(blockSize); Debug.Assert(buffer.Length >= blockSize && (blockSize > 0 || buffer.StringPointer == null)); StringBuffer = buffer; BufferString = buffer.String; BufferStringPointer = buffer.StringPointer; char* bufferBegin = buffer.StringPointer + buffer.Index; try { Decoder decoder = encoding.GetDecoder(); if (allCharsFitIntoOneBlock) { int bufferCount = preambleLength == byteBufferCount ? 0 : Text.ReadAllRemainingCharsFromStream(bufferBegin, buffer.Length, byteBuffer, preambleLength, byteBufferCount, stream, streamPosition, decoder, streamPosition == streamLength); if (!leaveOpen) stream.Close(); if (bufferCount != 0) { BufferBegin = bufferBegin; Ptr = bufferBegin; PtrBegin = bufferBegin; BufferEnd = bufferBegin + bufferCount; PtrEnd = BufferEnd; } Block = 0; } else { if (!DoNotRoundUpBlockSizeToSimplifyTesting) blockSize = buffer.Length; BufferBegin = bufferBegin; BufferEnd = bufferBegin; var d = new MultiBlockData(); BlockData = d; d.CharStream = this; d.Stream = stream; d.StreamPosition = streamPosition; d.StreamLength = streamLength; d.LeaveOpen = leaveOpen; d.Decoder = decoder; d.DecoderIsSerializable = decoder.GetType().IsSerializable; d.ByteBuffer = byteBuffer; d.ByteBufferIndex = preambleLength; d.ByteBufferCount = byteBufferCount; d.MaxCharCountForOneByte = Math.Max(1, Encoding.GetMaxCharCount(1)); if (d.MaxCharCountForOneByte > 1024) // an arbitrary limit low enough that a char array with this size can be allocated on the stack throw new ArgumentException("The CharStream class does not support Encodings with GetMaxCharCount(1) > 1024."); if (blockSize < 3*d.MaxCharCountForOneByte) blockSize = 3*d.MaxCharCountForOneByte; // MaxCharCountForOneByte == the maximum number of overhang chars if( Math.Min(blockOverlap, blockSize - 2*blockOverlap) < d.MaxCharCountForOneByte || blockOverlap >= blockSize/2) blockOverlap = blockSize/3; d.BlockSize = blockSize; d.BlockOverlap = blockOverlap; d.RegexSpaceThreshold = bufferBegin + (blockSize - 2*blockOverlap/3); d.IndexOfLastCharPlus1 = Int64.MaxValue; Block = -2; // special value recognized by ReadBlock d.LastBlock = Int32.MaxValue; d.Blocks = new List(); // the first block has no overlap with a previous block d.Blocks.Add(new BlockInfo(preambleLength, preambleLength, 0, EOS, null, null, null, null)); d.ReadBlock(0); if (d.LastBlock == 0) { if (!d.LeaveOpen) d.Stream.Close(); BlockData = null; } } } catch { buffer.Dispose(); throw; } } public void Dispose() { #if DEBUG lock (SubstreamCount) { if (SubstreamCount.Value != 0) throw new InvalidOperationException("A CharStream must not be disposed before all of its Substreams have been disposed."); } if (ParentSubstreamCount != null) { lock (ParentSubstreamCount) --ParentSubstreamCount.Value; } #endif if (BufferHandle.IsAllocated) BufferHandle.Free(); if (StringBuffer != null) StringBuffer.Dispose(); if (BlockData != null && !BlockData.LeaveOpen) BlockData.Stream.Close(); Ptr = null; PtrBegin = null; PtrEnd = null; BufferBegin = null; BufferEnd = null; } [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Reliability", "CA2000:Dispose objects before losing scope", Justification="The CharStream is manually disposed.")] public static T ParseString(string chars, int index, int length, FSharpFunc,T> parser, TUserState userState, string streamName) { if (index < 0) throw new ArgumentOutOfRangeException("index", "index is negative."); if (length < 0 || length > chars.Length - index) throw new ArgumentOutOfRangeException("length", "length is out of range."); fixed (char* pChars = chars) { var stream = new CharStream(chars, pChars, pChars + index, length); stream.UserState = userState; stream._Name = streamName; try { return parser.Invoke(stream); } finally { #if DEBUG stream.Dispose(); #else // manually dispose stream stream.Ptr = null; stream.PtrBegin = null; stream.PtrEnd = null; stream.BufferBegin = null; stream.BufferEnd = null; #endif } } } private partial class MultiBlockData { /// Refills the ByteBuffer if no unused byte is remaining. /// Returns the number of unused bytes in the (refilled) ByteBuffer. private int FillByteBuffer() { int n = ByteBufferCount - ByteBufferIndex; if (n > 0) return n; return ClearAndRefillByteBuffer(0); } /// Refills the ByteBuffer starting at the given index. If the underlying byte /// stream contains enough bytes, the ByteBuffer is filled up to the ByteBuffer.Length. /// Returns the number of bytes available for consumption in the refilled ByteBuffer. private int ClearAndRefillByteBuffer(int byteBufferIndex) { Debug.Assert(byteBufferIndex >= 0 && byteBufferIndex <= ByteBuffer.Length); // Stream.Read is not guaranteed to use all the provided output buffer, so we need // to call it in a loop when we want to rely on the buffer being fully filled // (unless we reach the end of the stream). Knowing that the buffer always gets // completely filled allows us to calculate the buffer utilization after skipping // a certain number of input bytes. For most streams there will be only one loop // iteration anyway (or two at the end of the stream). int i = byteBufferIndex; int m = ByteBuffer.Length - byteBufferIndex; while (m != 0 && StreamPosition != StreamLength) { // we check the StreamPosition to avoid calling Read after it returned 0 at the end of the stream (see ticket #23) int c = Stream.Read(ByteBuffer, i, m); if (c != 0) { i += c; m -= c; StreamPosition += c; } else { Debug.Assert(!Stream.CanSeek || StreamPosition == StreamLength); StreamLength = StreamPosition; break; } } int n = i - byteBufferIndex; ByteBufferIndex = byteBufferIndex; ByteBufferCount = byteBufferIndex + n; return n; } /// Reads up to the given maximum number of chars into the given buffer. /// If more than the maximum number of chars have to be read from the stream in order to /// fill the buffer (due to the way the Decoder API works), the overhang chars are /// returned through the output parameter. /// Returns a pointer to one char after the last char read. private char* ReadCharsFromStream(char* buffer, int maxCount, out string overhangChars) { Debug.Assert(maxCount >= 0); fixed (byte* byteBuffer = ByteBuffer) { overhangChars = null; try { while (maxCount >= MaxCharCountForOneByte) {// if maxCount < MaxCharCountForOneByte, Convert could throw int nBytesInByteBuffer = FillByteBuffer(); bool flush = nBytesInByteBuffer == 0; int bytesUsed, charsUsed; bool completed = false; Decoder.Convert(byteBuffer + ByteBufferIndex, nBytesInByteBuffer, buffer, maxCount, flush, out bytesUsed, out charsUsed, out completed); ByteBufferIndex += bytesUsed; // GetChars consumed bytesUsed bytes from the byte buffer buffer += charsUsed; maxCount -= charsUsed; if (flush && completed) return buffer; } if (maxCount == 0) return buffer; char* cs = stackalloc char[MaxCharCountForOneByte]; for (;;) { int nBytesInByteBuffer = FillByteBuffer(); bool flush = nBytesInByteBuffer == 0; int bytesUsed, charsUsed; bool completed; Decoder.Convert(byteBuffer + ByteBufferIndex, nBytesInByteBuffer, cs, MaxCharCountForOneByte, flush, out bytesUsed, out charsUsed, out completed); ByteBufferIndex += bytesUsed; if (charsUsed > 0) { int i = 0; do { *buffer = cs[i]; ++buffer; ++i; if (--maxCount == 0) { if (i < charsUsed) overhangChars = new string(cs, i, charsUsed - i); return buffer; } } while (i < charsUsed); } if (flush && completed) return buffer; } } catch (DecoderFallbackException e) { e.Data.Add("Stream.Position", ByteIndex + e.Index); throw; } } } /// Reads a block of chars (which must be different from the current block) /// into the BufferString. If the current CharStream block is block - 1, this method /// seeks the CharStream to the first char after the overlap of the two blocks. /// Otherwise it seeks the CharStream to the first char in the block. It returns the /// CharStream.Ptr value at the new position (which can be null). internal char* ReadBlock(int block) { int prevBlock = CharStream.Block; if (block == prevBlock) throw new InvalidOperationException(); if (!DecoderIsSerializable && block > 0) { if (prevBlock > block) throw new NotSupportedException("The CharStream does not support seeking backwards over ranges longer than the block overlap because the Encoding's Decoder is not serializable. The decoder has the type: " + Decoder.GetType().FullName); while (prevBlock + 1 < block) ReadBlock(++prevBlock); } BlockInfo bi = Blocks[block]; // will throw if block is out of range int blockSizeMinusOverlap = BlockSize - BlockOverlap; long charIndex = Math.BigMul(block, blockSizeMinusOverlap); char* bufferBegin = CharStream.BufferBegin; char* begin, buffer; int nCharsToRead; // fill [0 ... BlockOverlap-1] if block > 0 if (prevBlock == block - 1) { Buffer.Copy((byte*)bufferBegin, (byte*)(bufferBegin + blockSizeMinusOverlap), BlockOverlap*sizeof(char)); Debug.Assert(bufferBegin[BlockOverlap - 1] == bi.LastCharInOverlap); begin = buffer = bufferBegin + BlockOverlap; } else if (prevBlock >= 0) { Stream.Seek(bi.ByteIndex, SeekOrigin.Begin); // will throw if Stream can't seek // now that there was no exception, we can change the state... StreamPosition = bi.ByteIndex; ClearAndRefillByteBuffer(bi.ByteBufferIndex); if (block != 0) Decoder = (Decoder)bi.DecoderImageAtBlockBegin.CreateClone(); else Decoder.Reset(); if (prevBlock == block + 1) { // move the overlap into [BlockSize - BlockOverlap, BlockSize - 1] before it gets overwritten Buffer.Copy((byte*)(bufferBegin + blockSizeMinusOverlap), (byte*)bufferBegin, BlockOverlap*sizeof(char)); } begin = buffer = bufferBegin; if (block > 0) { nCharsToRead = BlockOverlap; if (bi.OverhangCharsAtBlockBegin != null) { nCharsToRead -= bi.OverhangCharsAtBlockBegin.Length; for (int i = 0; i < bi.OverhangCharsAtBlockBegin.Length; ++i) *(buffer++) = bi.OverhangCharsAtBlockBegin[i]; } string overhangCharsAfterOverlap; buffer = ReadCharsFromStream(buffer, nCharsToRead, out overhangCharsAfterOverlap); if ( buffer != bufferBegin + BlockOverlap || ByteIndex != bi.ByteIndex + bi.NumberOfBytesInOverlap || *(buffer - 1) != bi.LastCharInOverlap || overhangCharsAfterOverlap != bi.OverhangCharsAfterOverlap) throw new IOException("CharStream: stream integrity error"); } } else { // ReadBlock was called from the constructor if (block != 0) throw new InvalidOperationException(); begin = buffer = bufferBegin; } // fill [0 ... BlockSize-BlockOverlap-1] if block == 0 // and [BlockOverlap ... BlockSize-BlockOverlap-1] otherwise if (block == 0) { nCharsToRead = blockSizeMinusOverlap; } else { nCharsToRead = blockSizeMinusOverlap - BlockOverlap; if (bi.OverhangCharsAfterOverlap != null) { nCharsToRead -= bi.OverhangCharsAfterOverlap.Length; for (int i = 0; i < bi.OverhangCharsAfterOverlap.Length; ++i) *(buffer++) = bi.OverhangCharsAfterOverlap[i]; } } string overhangCharsAtNextBlockBegin; buffer = ReadCharsFromStream(buffer, nCharsToRead, out overhangCharsAtNextBlockBegin); long byteIndexAtNextBlockBegin = ByteIndex; int byteBufferIndexAtNextBlockBegin = ByteBufferIndex; // fill [BlockSize-BlockOverlap ... BlockSize-1] if (block == Blocks.Count - 1) { // next block hasn't yet been read Cloner cloner = null; CloneImage decoderImageAtNextBlockBegin = null; if (DecoderIsSerializable) { cloner = Cloner.Create(Decoder.GetType()); decoderImageAtNextBlockBegin = cloner.CaptureImage(Decoder); } nCharsToRead = BlockOverlap; if (overhangCharsAtNextBlockBegin != null) { nCharsToRead -= overhangCharsAtNextBlockBegin.Length; for (int i = 0; i < overhangCharsAtNextBlockBegin.Length; ++i) *(buffer++) = overhangCharsAtNextBlockBegin[i]; } string overhangCharsAfterOverlapWithNextBlock; buffer = ReadCharsFromStream(buffer, nCharsToRead, out overhangCharsAfterOverlapWithNextBlock); if (LastBlock == Int32.MaxValue) { // last block hasn't yet been detected if (buffer == bufferBegin + BlockSize) { var decoderImageAfterOverlapWithNextBlock = !DecoderIsSerializable ? null : cloner.CaptureImage(Decoder); int nBytesInOverlapWithNextBlock = (int)(ByteIndex - byteIndexAtNextBlockBegin); Blocks.Add(new BlockInfo(byteIndexAtNextBlockBegin, byteBufferIndexAtNextBlockBegin, nBytesInOverlapWithNextBlock, *(buffer - 1), overhangCharsAtNextBlockBegin, decoderImageAtNextBlockBegin, overhangCharsAfterOverlapWithNextBlock, decoderImageAfterOverlapWithNextBlock)); } else { // we reached the end of the stream LastBlock = block; IndexOfLastCharPlus1 = CharStream.IndexOfFirstChar + charIndex + (buffer - bufferBegin); } } else if (IndexOfLastCharPlus1 != CharStream.IndexOfFirstChar + charIndex + (buffer - bufferBegin)) { throw new IOException("CharStream: stream integrity error"); } } else { BlockInfo nbi = Blocks[block + 1]; if (buffer != bufferBegin + blockSizeMinusOverlap || byteIndexAtNextBlockBegin != nbi.ByteIndex || byteBufferIndexAtNextBlockBegin != nbi.ByteBufferIndex || overhangCharsAtNextBlockBegin != nbi.OverhangCharsAtBlockBegin) throw new IOException("CharStream: stream integrity error"); if (prevBlock != block + 1 || (block == 0 && !DecoderIsSerializable)) { // jumping back to block 0 is supported even if the decoder is not serializable nCharsToRead = BlockOverlap; if (overhangCharsAtNextBlockBegin != null) { nCharsToRead -= overhangCharsAtNextBlockBegin.Length; for (int i = 0; i < overhangCharsAtNextBlockBegin.Length; ++i) *(buffer++) = overhangCharsAtNextBlockBegin[i]; } string overhangCharsAfterOverlapWithNextBlock; buffer = ReadCharsFromStream(buffer, nCharsToRead, out overhangCharsAfterOverlapWithNextBlock); int nBytesInOverlapWithNextBlock = (int)(ByteIndex - byteIndexAtNextBlockBegin); if (buffer != bufferBegin + BlockSize || nBytesInOverlapWithNextBlock != nbi.NumberOfBytesInOverlap || *(buffer - 1) != nbi.LastCharInOverlap || overhangCharsAfterOverlapWithNextBlock != nbi.OverhangCharsAfterOverlap) throw new IOException("CharStream: stream integrity error"); } else { Debug.Assert(bufferBegin[BlockSize - 1] == nbi.LastCharInOverlap); buffer += BlockOverlap; // we already copied the chars at the beginning of this function int off = nbi.NumberOfBytesInOverlap - (ByteBufferCount - ByteBufferIndex); if (off > 0) { // we wouldn't have gotten here if the Stream didn't support seeking Stream.Seek(off, SeekOrigin.Current); StreamPosition += off; ClearAndRefillByteBuffer(off%ByteBuffer.Length); } else { ByteBufferIndex += nbi.NumberOfBytesInOverlap; } Decoder = (Decoder)nbi.DecoderImageAfterOverlap.CreateClone(); } } CharStream.Block = block; //CharStream.CharIndex = charIndex; CharStream.IndexOfFirstCharInBlock = CharStream.IndexOfFirstChar + charIndex; CharStream.BufferEnd = buffer; if (begin != buffer) { CharStream.Ptr = begin; CharStream.PtrEnd = buffer; CharStream.PtrBegin = CharStream.BufferBegin; return begin; } else { CharStream.Ptr = null; CharStream.PtrEnd = null; CharStream.PtrBegin = null; return null; } } } // class MultiBlockData /// Returns an iterator pointing to the given index in the stream, /// or to the end of the stream if the indexed position lies beyond the last char in the stream. /// The index is negative or less than the BeginIndex. /// Accessing the char with the given index requires seeking in the underlying byte stream, but the byte stream does not support seeking or the Encoding's Decoder is not serializable. /// An I/O error occured. /// The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option. /// The input stream contains invalid bytes for which the decoder fallback threw this exception. /// Can not allocate enough memory for the internal data structure. /// Method is called after the stream was disposed. public void Seek(long index) { ++StateTag; // The following comparison is safe in case of an overflow since // 0 <= IndexOfFirstCharInBlock < 2^60 + 2^31 * 2^31 and BufferEnd - BufferBegin < 2^31, // where 2^31 is an upper bound for both the number of blocks and the number of chars in a block. long off = unchecked(index - IndexOfFirstCharInBlock); if (0 <= off && off < Buffer.PositiveDistance(BufferBegin, BufferEnd)) { Ptr = BufferBegin + (uint)off; PtrBegin = BufferBegin; PtrEnd = BufferEnd; return; } if (index < IndexOfFirstChar) { --StateTag; throw (new ArgumentOutOfRangeException("index", "The index is negative or less than the IndexOfFirstChar.")); } if (BlockData == null || index >= BlockData.IndexOfLastCharPlus1) { Ptr = null; PtrBegin = null; PtrEnd = null; return; } // we never get here for streams with only one block index -= IndexOfFirstChar; int blockSizeMinusOverlap = BlockData.BlockSize - BlockData.BlockOverlap; long idx_; long block_ = Math.DivRem(index, blockSizeMinusOverlap, out idx_); int block = block_ > Int32.MaxValue ? Int32.MaxValue : (int)block_; int idx = (int)idx_; Seek(block, idx); } private void Seek(int block, int indexInBlock) { Debug.Assert(block >= 0 && indexInBlock >= 0 && BlockData != null); if (block > Block) { if (indexInBlock < BlockData.BlockOverlap) { --block; indexInBlock += BlockData.BlockSize - BlockData.BlockOverlap; } } else if (block < Block) { int blockSizeMinusOverlap = BlockData.BlockSize - BlockData.BlockOverlap; if (indexInBlock >= blockSizeMinusOverlap) { ++block; indexInBlock -= blockSizeMinusOverlap; } } if (block == Block) { Debug.Assert(indexInBlock < Buffer.PositiveDistance(BufferBegin, BufferEnd)); PtrBegin = BufferBegin; PtrEnd = BufferEnd; } else { int last = BlockData.Blocks.Count - 1; if (block >= last) { BlockData.ReadBlock(last); while (Block < block && Block != BlockData.LastBlock) BlockData.ReadBlock(Block + 1); if (block != Block || indexInBlock >= Buffer.PositiveDistance(PtrBegin, PtrEnd)) { Ptr = null; PtrBegin = null; PtrEnd = null; return; } } else { BlockData.ReadBlock(block); Debug.Assert(indexInBlock < Buffer.PositiveDistance(PtrBegin, PtrEnd)); } } Ptr = BufferBegin + indexInBlock; } internal void Seek(char* ptr, int block) { if (ptr != null) { if (block != Block) { Debug.Assert(BlockData != null && ptr >= BufferBegin && ptr < BufferBegin + BlockData.BlockSize); int indexInBlock = (int)Buffer.PositiveDistance(BufferBegin, ptr); Seek(block, indexInBlock); } else { Debug.Assert(ptr >= BufferBegin && ptr < BufferEnd); Ptr = ptr; PtrBegin = BufferBegin; PtrEnd = BufferEnd; } } else { Ptr = null; PtrBegin = null; PtrEnd = null; } } private void SeekToFirstCharAfterLastCharOfCurrentBlock() { if (Ptr != null) { if (BlockData != null && Block != BlockData.LastBlock) BlockData.ReadBlock(Block + 1); else { Ptr = null; PtrBegin = null; PtrEnd = null; } } } [DebuggerBrowsable(DebuggerBrowsableState.Never)] public CharStreamIndexToken IndexToken { get { return new CharStreamIndexToken( #if DEBUG this, #endif Ptr, Block ); } } private static void ThrowInvalidIndexToken() { throw new ArgumentException("The CharStreamIndexToken is invalid."); } public void Seek(CharStreamIndexToken indexToken) { int block = indexToken.Block; if (block < 0) ThrowInvalidIndexToken(); // tests for zero-initialized IndexTokens #if DEBUG Debug.Assert(this == indexToken.CharStream); #endif if (Ptr != null && indexToken.Ptr != null && block == Block) { Ptr = indexToken.Ptr; Debug.Assert(Ptr >= BufferBegin && Ptr < BufferEnd); } else { Seek(indexToken.Ptr, block); } ++StateTag; } // Below we split many methods into a default method containing the code // for the most frequently used branch and a "...Continue" method containing // the code for the remaining branches. This allows the JIT to produce // faster code for the main branch and in a few cases even to inline it. public string ReadFrom(CharStreamIndexToken indexOfFirstChar) { int block = indexOfFirstChar.Block; if (block < 0) ThrowInvalidIndexToken(); // tests for zero-initialized IndexTokens #if DEBUG Debug.Assert(this == indexOfFirstChar.CharStream); #endif return ReadFrom(indexOfFirstChar.Ptr, block); } internal string ReadFrom(char* ptr, int block) { if (ptr != null && ptr < Ptr && block == Block) { Debug.Assert(BufferBegin <= ptr && Ptr < BufferEnd); return new string(ptr, 0, (int)Buffer.PositiveDistance(ptr, Ptr)); } return ReadFromContinue(ptr, block); } private string ReadFromContinue(char* ptr, int block) { ulong index1 = (ulong)GetIndex(ptr, block); ulong index2 = (ulong)Index; if (index1 < index2) { ulong length_ = index2 - index1; // The maximum theoretical string size is Int32.MaxValue, // though on .NET it is actually less than 2^30, since the maximum // object size is limited to Int32.MaxValue, even on 64-bit systems. if (length_ > Int32.MaxValue) { // OutOfMemoryException is the exception the .NET string constructor throws // if the the string length is larger than the maximum string length, // even if enough memory would be available. throw new OutOfMemoryException(); } int length = (int)length_; var stateTag = StateTag; Seek(ptr, block); var str = Read((int)length); StateTag = stateTag; return str; } else if (index1 > index2) throw new ArgumentException("The current position of the stream must not lie before the position corresponding to the given CharStreamIndexToken/CharStreamState."); return ""; } public void RegisterNewline() { var index = Index; Debug.Assert(index != _LineBegin); _LineBegin = index; ++_Line; ++StateTag; } private void RegisterNewlines(char* lineBegin, uint lineOffset) { Debug.Assert(BufferBegin <= lineBegin && lineBegin <= BufferEnd && lineOffset > 0); _Line += lineOffset; long newLineBegin = Buffer.PositiveDistance(BufferBegin, lineBegin) + IndexOfFirstCharInBlock; Debug.Assert(newLineBegin != _LineBegin); _LineBegin = newLineBegin; ++StateTag; } public void RegisterNewlines(int lineOffset, int newColumnMinus1) { Debug.Assert(lineOffset != 0 && newColumnMinus1 >= 0); _Line += lineOffset; Debug.Assert(_Line > 0); var newLineBegin = Index - newColumnMinus1; Debug.Assert(newLineBegin != _LineBegin); _LineBegin = Index - newColumnMinus1; ++StateTag; } public void RegisterNewlines(long lineOffset, long newColumnMinus1) { Debug.Assert(lineOffset != 0 && newColumnMinus1 >= 0); _Line += lineOffset; Debug.Assert(_Line > 0); var newLineBegin = Index - newColumnMinus1; Debug.Assert(newLineBegin != _LineBegin); _LineBegin = Index - newColumnMinus1; ++StateTag; } public char Peek() { char* ptr = Ptr; if (ptr != null) return *ptr; return EOS; } #if AGGRESSIVE_INLINING [MethodImpl(MethodImplOptions.AggressiveInlining)] #endif public void Skip() { char* ptr1 = Ptr + 1; if (ptr1 < PtrEnd) { Ptr = ptr1; ++StateTag; return; } SkipContinue(); } [MethodImplAttribute(MethodImplOptions.NoInlining)] private void SkipContinue() { SkipContinue(1u); } #if AGGRESSIVE_INLINING [MethodImpl(MethodImplOptions.AggressiveInlining)] #endif public char Read() { char* ptr = Ptr; char* ptr1 = ptr + 1; if (ptr1 < PtrEnd) { char c = *ptr; Ptr = ptr1; ++StateTag; return c; } return ReadContinue(); } [MethodImplAttribute(MethodImplOptions.NoInlining)] private char ReadContinue() { var c = Peek(); Skip(); return c; } #if AGGRESSIVE_INLINING [MethodImpl(MethodImplOptions.AggressiveInlining)] #endif public char SkipAndPeek() { char* ptr = Ptr + 1; if (ptr < PtrEnd) { Ptr = ptr; ++StateTag; return *ptr; } return SkipAndPeekContinue(); } [MethodImplAttribute(MethodImplOptions.NoInlining)] private char SkipAndPeekContinue() { return SkipAndPeekContinue(1u); } private static readonly bool IsLittleEndian = BitConverter.IsLittleEndian; // improves inlining and dead code elimination, at least with the .NET JIT #if AGGRESSIVE_INLINING [MethodImpl(MethodImplOptions.AggressiveInlining)] #endif public TwoChars Peek2() { char* ptr = Ptr; if (ptr + 1 < PtrEnd) { #if UNALIGNED_READS if (IsLittleEndian) { return new TwoChars(*((uint*)ptr)); } else { return new TwoChars(ptr[0], ptr[1]); } #else return new TwoChars(ptr[0], ptr[1]); #endif } return Peek2Continue(); } [MethodImplAttribute(MethodImplOptions.NoInlining)] private TwoChars Peek2Continue() { return new TwoChars(Peek(), Peek(1u)); } #if AGGRESSIVE_INLINING [MethodImpl(MethodImplOptions.AggressiveInlining)] #endif public char Peek(uint utf16Offset) { if (utf16Offset < Buffer.PositiveDistance(Ptr, PtrEnd)) return Ptr[utf16Offset]; return PeekContinue(utf16Offset); } [MethodImplAttribute(MethodImplOptions.NoInlining)] private char PeekContinue(uint utf16Offset) { if (Ptr == null || BlockData == null || Block == BlockData.LastBlock) return EOS; char* ptr = Ptr; int block = Block; var stateTag = StateTag; Seek(Index + utf16Offset); char c = Peek(); Seek(ptr, block); // backtrack StateTag = stateTag; return c; } #if AGGRESSIVE_INLINING [MethodImpl(MethodImplOptions.AggressiveInlining)] #endif public void Skip(uint utf16Offset) { if (utf16Offset < Buffer.PositiveDistance(Ptr, PtrEnd)) { Ptr += utf16Offset; ++StateTag; return; } SkipContinue(utf16Offset); } [MethodImplAttribute(MethodImplOptions.NoInlining)] private void SkipContinue(uint utf16Offset) { if (Ptr == null || utf16Offset == 0) return; if (BlockData == null || Block == BlockData.LastBlock) { Ptr = null; PtrBegin = null; PtrEnd = null; ++StateTag; return; } Seek(Index + utf16Offset); } #if AGGRESSIVE_INLINING [MethodImpl(MethodImplOptions.AggressiveInlining)] #endif public char SkipAndPeek(uint utf16Offset) { if (utf16Offset < Buffer.PositiveDistance(Ptr, PtrEnd)) { char* ptr = Ptr + utf16Offset; Ptr = ptr; ++StateTag; return *ptr; } return SkipAndPeekContinue(utf16Offset); } [MethodImplAttribute(MethodImplOptions.NoInlining)] private char SkipAndPeekContinue(uint utf16Offset) { SkipContinue(utf16Offset); return Peek(); } public char Peek(int utf16Offset) { // don't force inlining, because the .NET JIT doesn't optimize after inlining if (utf16Offset >= 0 ? utf16Offset < Buffer.PositiveDistance(Ptr, PtrEnd) : unchecked((uint)-utf16Offset) <= Buffer.PositiveDistance(PtrBegin, Ptr)) { return Ptr[utf16Offset]; } return PeekContinue(utf16Offset); } [MethodImplAttribute(MethodImplOptions.NoInlining)] private char PeekContinue(int utf16Offset) { if (utf16Offset >= 0) return PeekContinue((uint)utf16Offset); var newIndex = Index + utf16Offset; if (newIndex >= _IndexOfFirstChar) { char* ptr = Ptr; int block = Block; var stateTag = StateTag; Seek(Index + utf16Offset); char c = Peek(); Seek(ptr, block); StateTag = stateTag; return c; } return EOS; } public void Skip(int utf16Offset) { if (utf16Offset >= 0 ? utf16Offset < Buffer.PositiveDistance(Ptr, PtrEnd) : unchecked((uint)-utf16Offset) <= Buffer.PositiveDistance(PtrBegin, Ptr)) { Ptr = unchecked(Ptr + utf16Offset); // see https://connect.microsoft.com/VisualStudio/feedback/details/522944 ++StateTag; return; } SkipContinue(utf16Offset); } [MethodImplAttribute(MethodImplOptions.NoInlining)] private void SkipContinue(int utf16Offset) { if (utf16Offset >= 0) { SkipContinue((uint)utf16Offset); return; } Seek(Index + utf16Offset); } public void Skip(long utf16Offset) { if (utf16Offset >= 0 ? utf16Offset < Buffer.PositiveDistance(Ptr, PtrEnd) : unchecked((ulong)-utf16Offset) <= Buffer.PositiveDistance(PtrBegin, Ptr)) { Ptr = unchecked(Ptr + utf16Offset); // see https://connect.microsoft.com/VisualStudio/feedback/details/522944 ++StateTag; return; } SkipContinue(utf16Offset); } [MethodImplAttribute(MethodImplOptions.NoInlining)] private void SkipContinue(long utf16Offset) { long index = Index; Seek(utf16Offset > Int64.MaxValue - index ? Int64.MaxValue : index + utf16Offset); } public char SkipAndPeek(int utf16Offset) { if (utf16Offset >= 0 ? utf16Offset < Buffer.PositiveDistance(Ptr, PtrEnd) : unchecked((uint)-utf16Offset) <= Buffer.PositiveDistance(PtrBegin, Ptr)) { char* ptr = unchecked(Ptr + utf16Offset); // see https://connect.microsoft.com/VisualStudio/feedback/details/522944 Ptr = ptr; ++StateTag; return *ptr; } return SkipAndPeekContinue(utf16Offset); } [MethodImplAttribute(MethodImplOptions.NoInlining)] private char SkipAndPeekContinue(int utf16Offset) { if (utf16Offset >= 0) { SkipContinue((uint)utf16Offset); return Peek(); } var newIndex = Index + utf16Offset; if (newIndex >= IndexOfFirstChar) { Seek(Index + utf16Offset); return Peek(); } else { Seek(_IndexOfFirstChar); return EOS; } } public string PeekString(int length) { if (unchecked((uint)length) <= Buffer.PositiveDistance(Ptr, PtrEnd)) return new String(Ptr, 0, length); return PeekStringContinue(length); } [MethodImplAttribute(MethodImplOptions.NoInlining)] private string PeekStringContinue(int length) { return ReadContinue(length, true); } public string Read(int length) { char* ptr = Ptr; if (unchecked((uint)length) < Buffer.PositiveDistance(ptr, PtrEnd)) { Ptr += length; ++StateTag; return new String(ptr, 0, length); } return ReadContinue(length); } [MethodImplAttribute(MethodImplOptions.NoInlining)] private string ReadContinue(int length) { return ReadContinue(length, false); } private string ReadContinue(int length, bool backtrack) { if (length < 0) throw new ArgumentOutOfRangeException("length", "length is negative."); if (length == 0 || Ptr == null) return ""; if (BlockData == null) { int maxLength = (int)Buffer.PositiveDistance(Ptr, PtrEnd); if (length > maxLength) length = maxLength; } else { long maxLength = BlockData.IndexOfLastCharPlus1 - Index; if (length > maxLength) length = (int)maxLength; } string str = new String('\u0000', length); fixed (char* pStr = str) { int cc = ReadContinue(pStr, length, backtrack); if (cc == length) return str; return new String(pStr, 0, cc); } } public int PeekString(char[] buffer, int bufferIndex, int length) { return Read(buffer, bufferIndex, length, true); } public int Read(char[] buffer, int bufferIndex, int length) { return Read(buffer, bufferIndex, length, false); } private int Read(char[] buffer, int bufferIndex, int length, bool backtrack) { if (bufferIndex < 0) throw new ArgumentOutOfRangeException("bufferIndex", "bufferIndex is negative."); if (length > buffer.Length - bufferIndex) // throws if buffer is null throw new ArgumentOutOfRangeException("length", "bufferIndex or length is out of range."); // We must exit early for length == 0, because pining an empty array // would invoke implementation-defined behaviour. if (length <= 0) { if (length == 0) return 0; throw new ArgumentOutOfRangeException("length", "length is negative."); } fixed (char* pBuffer = buffer) return Read(pBuffer + bufferIndex, length, backtrack); } public int PeekString(char* buffer, int length) { return Read(buffer, length, true); } public int Read(char* buffer, int length) { return Read(buffer, length, false); } private int Read(char* buffer, int length, bool backtrack) { if (unchecked((uint)length) < Buffer.PositiveDistance(Ptr, PtrEnd)) { char* ptr = Ptr; int len = length; #if UNALIGNED_READS if ((unchecked((int)buffer) & 2) != 0 && len != 0) { // align buffer pointer *buffer = *ptr; ++buffer; ++ptr; --len; } len -= 8; while (len >= 0) { ((int*)buffer)[0] = ((int*)ptr)[0]; ((int*)buffer)[1] = ((int*)ptr)[1]; ((int*)buffer)[2] = ((int*)ptr)[2]; ((int*)buffer)[3] = ((int*)ptr)[3]; buffer += 8; ptr += 8; len -= 8; } if ((len & 4) != 0) { ((int*)buffer)[0] = ((int*)ptr)[0]; ((int*)buffer)[1] = ((int*)ptr)[1]; buffer += 4; ptr += 4; } if ((len & 2) != 0) { ((int*)buffer)[0] = ((int*)ptr)[0]; buffer += 2; ptr += 2; } #else len -= 2; while (len >= 0) { buffer[0] = ptr[0]; buffer[1] = ptr[1]; buffer += 2; ptr += 2; len -= 2; } #endif if ((len & 1) != 0) { *buffer = *ptr; ++ptr; } if (!backtrack) { Ptr = ptr; ++StateTag; } return length; } return ReadContinue(buffer, length, backtrack); } private int ReadContinue(char* buffer, int length, bool backtrack) { if (length < 0) throw new ArgumentOutOfRangeException("length", "length is negative."); if (length == 0 || Ptr == null) return 0; int oldLength = length; int oldBlock = Block; char* oldPtr = Ptr; char* ptr = Ptr; do { int len = Math.Min((int)Buffer.PositiveDistance(Ptr, PtrEnd), length); Debug.Assert(length > 0 && len > 0); length -= len; #if UNALIGNED_READS if ((unchecked((int)buffer) & 2) != 0) { // align buffer pointer *buffer = *ptr; ++buffer; ++ptr; --len; } len -= 8; while (len >= 0) { ((int*)buffer)[0] = ((int*)ptr)[0]; ((int*)buffer)[1] = ((int*)ptr)[1]; ((int*)buffer)[2] = ((int*)ptr)[2]; ((int*)buffer)[3] = ((int*)ptr)[3]; buffer += 8; ptr += 8; len -= 8; } if ((len & 4) != 0) { ((int*)buffer)[0] = ((int*)ptr)[0]; ((int*)buffer)[1] = ((int*)ptr)[1]; buffer += 4; ptr += 4; } if ((len & 2) != 0) { ((int*)buffer)[0] = ((int*)ptr)[0]; buffer += 2; ptr += 2; } #else len -= 2; while (len >= 0) { buffer[0] = ptr[0]; buffer[1] = ptr[1]; buffer += 2; ptr += 2; len -= 2; } #endif if ((len & 1) != 0) { *buffer = *ptr; ++buffer; ++ptr; } } while (length != 0 && BlockData != null && Block != BlockData.LastBlock && (ptr = BlockData.ReadBlock(Block + 1)) != null); if (!backtrack) { ++StateTag; if (ptr != PtrEnd) Ptr = ptr; else SeekToFirstCharAfterLastCharOfCurrentBlock(); } else { if (Block != oldBlock) Seek(oldPtr, oldBlock); } return oldLength - length; } public bool Match(char ch) { char* ptr = Ptr; return ptr != null && ch == *ptr; } public bool MatchCaseFolded(char caseFoldedChar) { char* ptr = Ptr; return ptr != null && caseFoldedChar == CaseFoldTable.FoldedChars[*ptr]; } public bool Skip(char ch) { char* ptr1 = Ptr + 1; if (ptr1 < PtrEnd && ch == *Ptr) { Ptr = ptr1; ++StateTag; return true; } return SkipContinue(ch); } [MethodImplAttribute(MethodImplOptions.NoInlining)] private bool SkipContinue(char ch) { if (Match(ch)) { Skip(); return true; } return false; } public bool SkipCaseFolded(char caseFoldedChar) { char* ptr1 = Ptr + 1; if (ptr1 < PtrEnd && caseFoldedChar == CaseFoldTable.FoldedChars[*Ptr]) { Ptr = ptr1; ++StateTag; return true; } return SkipCaseFoldedContinue(caseFoldedChar); } [MethodImplAttribute(MethodImplOptions.NoInlining)] private bool SkipCaseFoldedContinue(char caseFoldedChar) { if (MatchCaseFolded(caseFoldedChar)) { Skip(); return true; } return false; } #if AGGRESSIVE_INLINING [MethodImpl(MethodImplOptions.AggressiveInlining)] #endif public bool Skip(TwoChars twoChars) { char* ptr2 = Ptr + 2; if (ptr2 < PtrEnd) { #if UNALIGNED_READS if (IsLittleEndian) { if (new TwoChars(*((uint*)Ptr)) == twoChars) { Ptr = ptr2; ++StateTag; return true; } } else { if (twoChars.Char0 == Ptr[0] && twoChars.Char1 == Ptr[1]) { Ptr = ptr2; ++StateTag; return true; } } #else if (twoChars.Char0 == Ptr[0] && twoChars.Char1 == Ptr[1]) { Ptr = ptr2; ++StateTag; return true; } #endif return false; } return SkipContinue(twoChars); } [MethodImplAttribute(MethodImplOptions.NoInlining)] private bool SkipContinue(TwoChars twoChars) { char* cs = stackalloc char[2]; cs[0] = twoChars.Char0; cs[1] = twoChars.Char1; return SkipContinue(cs, 2, false); } public bool Match(string chars) { if (chars.Length <= Buffer.PositiveDistance(Ptr, PtrEnd)) { for (int i = 0; i < chars.Length; ++i) { if (Ptr[i] != chars[i]) goto ReturnFalse; } return true; ReturnFalse: return false; } return SkipContinue(chars, true); } public bool Skip(string chars) { if (chars.Length < Buffer.PositiveDistance(Ptr, PtrEnd)) { for (int i = 0; i < chars.Length; ++i) { if (Ptr[i] != chars[i]) goto ReturnFalse; } Ptr += chars.Length; ++StateTag; return true; ReturnFalse: return false; } return SkipContinue(chars, false); } [MethodImplAttribute(MethodImplOptions.NoInlining)] private bool SkipContinue(string chars, bool backtrackEvenIfCharsMatch) { fixed (char* pChars = chars) return SkipContinue(pChars, chars.Length, backtrackEvenIfCharsMatch); } public bool MatchCaseFolded(string caseFoldedChars) { if (caseFoldedChars.Length <= Buffer.PositiveDistance(Ptr, PtrEnd)) { for (int i = 0; i < caseFoldedChars.Length; ++i) { if (CaseFoldTable.FoldedChars[Ptr[i]] != caseFoldedChars[i]) goto ReturnFalse; } return true; ReturnFalse: return false; } return SkipCaseFoldedContinue(caseFoldedChars, true); } public bool SkipCaseFolded(string caseFoldedChars) { if (caseFoldedChars.Length < Buffer.PositiveDistance(Ptr, PtrEnd)) { for (int i = 0; i < caseFoldedChars.Length; ++i) { if (CaseFoldTable.FoldedChars[Ptr[i]] != caseFoldedChars[i]) goto ReturnFalse; } Ptr += caseFoldedChars.Length; ++StateTag; return true; ReturnFalse: return false; } return SkipCaseFoldedContinue(caseFoldedChars, false); } [MethodImplAttribute(MethodImplOptions.NoInlining)] private bool SkipCaseFoldedContinue(string caseFoldedChars, bool backtrackEvenIfCharsMatch) { fixed (char* pCaseFoldedChars = caseFoldedChars) return SkipCaseFoldedContinue(pCaseFoldedChars, caseFoldedChars.Length, backtrackEvenIfCharsMatch); } public bool Match(char[] chars, int charsIndex, int length) { return Skip(chars, charsIndex, length, true); } public bool Skip(char[] chars, int charsIndex, int length) { return Skip(chars, charsIndex, length, false); } private bool Skip(char[] chars, int charsIndex, int length, bool backtrack) { if (charsIndex < 0) throw new ArgumentOutOfRangeException("charsIndex", "charsIndex is negative."); if (length > chars.Length - charsIndex) // throws if chars is null throw new ArgumentOutOfRangeException("length", "length is out of range."); // We must exit early for length == 0, because pining an empty array // would invoke implementation-defined behaviour. if (length <= 0) { if (length < 0) throw new ArgumentOutOfRangeException("length", "length is negative."); if (!backtrack) ++StateTag; return true; } fixed (char* pChars = chars) return Skip(pChars + charsIndex, length, backtrack); } public bool Match(char* chars, int length) { return Skip(chars, length, true); } public bool Skip(char* chars, int length) { return Skip(chars, length, false); } private bool Skip(char* chars, int length, bool backtrackEvenIfCharsMatch) { if (unchecked((uint)length < Buffer.PositiveDistance(Ptr, PtrEnd))) { #if UNALIGNED_READS char* ptr = Ptr; int len = length - 2; while (len >= 0) { if (*((int*)ptr) != *((int*)chars)) goto ReturnFalse; ptr += 2; chars += 2; len -= 2; } if ((len & 1) != 0) { if (*ptr != *chars) goto ReturnFalse; ++ptr; } #else char* ptr = Ptr; int len = length; while (len != 0) { if (*ptr != *chars) goto ReturnFalse; ++ptr; ++chars; --len; } #endif if (!backtrackEvenIfCharsMatch) { Ptr = ptr; ++StateTag; } return true; ReturnFalse: return false; } return SkipContinue(chars, length, backtrackEvenIfCharsMatch); } public bool MatchCaseFolded(char* caseFoldedChars, int length) { return SkipCaseFolded(caseFoldedChars, length, true); } public bool SkipCaseFolded(char* caseFoldedChars, int length) { return SkipCaseFolded(caseFoldedChars, length, false); } private bool SkipCaseFolded(char* caseFoldedChars, int length, bool backtrackEvenIfCharsMatch) { if (unchecked((uint)length < Buffer.PositiveDistance(Ptr, PtrEnd))) { char* ptr = Ptr; int len = length; while (len != 0) { if (CaseFoldTable.FoldedChars[*ptr] != *caseFoldedChars) goto ReturnFalse; ++ptr; ++caseFoldedChars; --len; } if (!backtrackEvenIfCharsMatch) { Ptr = ptr; ++StateTag; } return true; ReturnFalse: return false; } return SkipCaseFoldedContinue(caseFoldedChars, length, backtrackEvenIfCharsMatch); } private bool SkipContinue(char* chars, int length, bool backtrackEvenIfCharsMatch) { if (length <= 0) { if (length < 0) throw new ArgumentOutOfRangeException("length", "length is negative."); return true; } if (Ptr == null) return false; int oldBlock = Block; char* oldPtr = Ptr; char* ptr = Ptr; for (;;) { Debug.Assert(length > 0); int len = (int)Buffer.PositiveDistance(ptr, PtrEnd); if (len < length) { if (BlockData == null || Block == BlockData.LastBlock) goto ReturnFalse; length -= len; } else { len = length; length = 0; } Debug.Assert(len > 0); #if UNALIGNED_READS len -= 2; while (len >= 0) { if (*((int*)ptr) != *((int*)chars)) goto ReturnFalse; ptr += 2; chars += 2; len -= 2; } if ((len & 1) != 0) { if (*ptr != *chars) goto ReturnFalse; ++ptr; ++chars; } #else do { if (*ptr != *chars) goto ReturnFalse; ++ptr; ++chars; --len; } while (len != 0); #endif if (length != 0) { Debug.Assert(BlockData != null && Block != BlockData.LastBlock); ptr = BlockData.ReadBlock(Block + 1); } else { if (backtrackEvenIfCharsMatch) { if (Block != oldBlock) Seek(oldPtr, oldBlock); } else { if (ptr != PtrEnd) Ptr = ptr; else SeekToFirstCharAfterLastCharOfCurrentBlock(); ++StateTag; } return true; } } ReturnFalse: if (Block != oldBlock) Seek(oldPtr, oldBlock); return false; } private bool SkipCaseFoldedContinue(char* caseFoldedChars, int length, bool backtrackEvenIfCharsMatch) { if (length <= 0) { if (length == 0) return true; throw new ArgumentOutOfRangeException("length", "length is negative."); } if (Ptr == null) return false; int oldBlock = Block; char* oldPtr = Ptr; char* ptr = Ptr; for (;;) { Debug.Assert(length > 0); int len = (int)Buffer.PositiveDistance(ptr, PtrEnd); if (len < length) { if (BlockData == null || Block == BlockData.LastBlock) goto ReturnFalse; length -= len; } else { len = length; length = 0; } Debug.Assert(len > 0); do { if (CaseFoldTable.FoldedChars[*ptr] != *caseFoldedChars) goto ReturnFalse; ++ptr; ++caseFoldedChars; --len; } while (len != 0); if (length != 0) { Debug.Assert(BlockData != null && Block != BlockData.LastBlock); ptr = BlockData.ReadBlock(Block + 1); } else { if (backtrackEvenIfCharsMatch) { if (Block != oldBlock) Seek(oldPtr, oldBlock); } else { if (ptr != PtrEnd) Ptr = ptr; else SeekToFirstCharAfterLastCharOfCurrentBlock(); ++StateTag; } return true; } } ReturnFalse: if (Block != oldBlock) Seek(oldPtr, oldBlock); return false; } public Match Match(Regex regex) { if (BufferString == null) throw new NotSupportedException("CharStream instances constructed from char arrays or char pointers do not support regular expression matching."); if (Ptr != null) { if (BlockData != null && Ptr > BlockData.RegexSpaceThreshold && Block != BlockData.LastBlock) { // BlockOverlap > MinRegexSpace char c = *Ptr; char* ptr = Ptr; BlockData.ReadBlock(Block + 1); int blockSizeMinusOverlap = BlockData.BlockSize - BlockData.BlockOverlap; Ptr = ptr - blockSizeMinusOverlap; PtrBegin = BufferBegin; // might have been set to null by ReadBlock PtrEnd = BufferEnd; Debug.Assert(*Ptr == c && BufferBegin <= Ptr && Ptr < BufferEnd); } int index = (int)Buffer.PositiveDistance(BufferStringPointer, Ptr); int length = (int)Buffer.PositiveDistance(Ptr, BufferEnd); return regex.Match(BufferString, index, length); } return regex.Match(""); } public bool SkipWhitespace() { char* lineBegin = null; uint lineOffset = 0; char* ptr = Ptr; char* end = unchecked(PtrEnd - 1); // - 1 to guarantee the lookahead for '\r', if (ptr + 1 < PtrEnd) { // PtrEnd might be null char c = *ptr; ++ptr; if (c > ' ') goto ReturnFalse; if (c == ' ') { if (*ptr > ' ') { Ptr = ptr; ++StateTag; return true; } goto Loop; } else { if (c == '\r') { if (*ptr == '\n') { ++ptr; if (ptr > end) goto Newline; } } else if (c != '\n') goto CheckTab; if (*ptr > ' ') { Ptr = ptr; RegisterNewline(); return true; } goto Newline; CheckTab: if (c != '\t') goto ReturnFalse; goto Loop; } Newline: lineBegin = ptr; ++lineOffset; Loop: for (;;) { if (ptr >= end) break; c = *ptr; ++ptr; if (c != ' ') { if (c != '\t') { if (c == '\r') { if (*ptr == '\n') ++ptr; goto Newline; } if (c == '\n') goto Newline; --ptr; Ptr = ptr; if (lineOffset == 0) { ++StateTag; return true; } else { RegisterNewlines(lineBegin, lineOffset); return true; } } } } } return SkipWhitespaceContinue(ptr, lineBegin, lineOffset); ReturnFalse: return false; } private bool SkipWhitespaceContinue(char* ptr, char* lineBegin, uint lineOffset) { var stateTag = StateTag; uint index = Buffer.PositiveDistance(Ptr, ptr); char c; if (index == 0) { c = Peek(); if (c == ' ' || c == '\t') c = SkipAndPeek(); else if (c != '\r' && c != '\n') return false; } else { if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); c = SkipAndPeek(index); } for (;;) { if (c == ' ' || c == '\t') c = SkipAndPeek(); else if (c != '\r' && c != '\n') { StateTag = stateTag + 1; return true; } else { char c0 = c; c = SkipAndPeek(); if (c0 == '\r' && c == '\n') c = SkipAndPeek(); RegisterNewline(); } } } public bool SkipUnicodeWhitespace() { char* lineBegin = null; uint lineOffset = 0; char* end = unchecked(PtrEnd - 1); // - 1 to guarantee the lookahead for '\r' char* ptr = Ptr; if (ptr + 1 < PtrEnd) { // PtrEnd might be null char c = *ptr; ++ptr; if (c == ' ') goto Loop; if (!Text.IsWhitespace(c)) return false; if (c <= '\r') { if (c == '\r') { if (*ptr == '\n') ++ptr; } else if (c != '\n') goto Loop; } else { if (c < '\u2028' ? c != '\u0085' : c > '\u2029') goto Loop; } Newline: lineBegin = ptr; ++lineOffset; Loop: for (;;) { if (ptr >= end) break; c = *ptr; ++ptr; if (c != ' ') { if (Text.IsWhitespace(c)) { if (c <= '\r') { if (c == '\r') { if (*ptr == '\n') ++ptr; goto Newline; } if (c == '\n') goto Newline; } else if (c < '\u2028' ? c == '\u0085' : c <= '\u2029') goto Newline; } else { --ptr; Ptr = ptr; if (lineOffset == 0) { ++StateTag; return true; } else { RegisterNewlines(lineBegin, lineOffset); return true; } } } } } return SkipUnicodeWhitespaceContinue(ptr, lineBegin, lineOffset); } private bool SkipUnicodeWhitespaceContinue(char* ptr, char* lineBegin, uint lineOffset) { var stateTag = StateTag; uint index = Buffer.PositiveDistance(Ptr, ptr); char c; if (index == 0) { c = Peek(); if (!Text.IsWhitespace(c)) return false; if (c == ' ' || c == '\t') c = SkipAndPeek(); } else { if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); c = SkipAndPeek(index); } for (;;) { if (c == ' ') c = SkipAndPeek(); else { if (!Text.IsWhitespace(c)) break; char c0 = c; c = SkipAndPeek(); if (c0 <= '\r') { if (c0 == '\r') { if (c == '\n') c = SkipAndPeek(); } else if (c0 != '\n') continue; } else if (c0 < '\u2028' ? c0 != '\u0085' : c0 > '\u2029') continue; RegisterNewline(); } } StateTag = stateTag + 1; return true; } public bool SkipNewline() { var ptr = Ptr; if (ptr + 2 < PtrEnd) { char c = *ptr; ++ptr; if (c == '\r') { if (*ptr == '\n') ++ptr; } else if (c != '\n') return false; Ptr = ptr; RegisterNewline(); return true; } else { var stateTag = StateTag; char c = Peek(); if (c == '\r') { c = SkipAndPeek(); if (c == '\n') Skip(); } else { if (c != '\n') return false; Skip(); } RegisterNewline(); StateTag = stateTag + 1; return true; } } public bool SkipUnicodeNewline() { var ptr = Ptr; if (ptr + 2 < PtrEnd) { char c = *ptr; ++ptr; if (c <= '\r') { if (c == '\r') { if (*ptr == '\n') ++ptr; } else if (c != '\n') goto ReturnFalse; } else if (c >= '\u2028' ? c > '\u2029' : c != '\u0085') goto ReturnFalse; Ptr = ptr; RegisterNewline(); return true; } else { char c = Peek(); uint n = 1; if (c <= '\r') { if (c == '\r') { if (Peek(1u) == '\n') n = 2; } else if (c != '\n') goto ReturnFalse; } else if (c >= '\u2028' ? c > '\u2029' : c != '\u0085') goto ReturnFalse; Skip(n); var stateTag = StateTag; RegisterNewline(); StateTag = stateTag; return true; } ReturnFalse: return false; } public int SkipNewlineThenWhitespace(int powerOf2TabStopDistance, bool allowFormFeed) { int tabStopDistanceMinus1 = unchecked(powerOf2TabStopDistance - 1); if (powerOf2TabStopDistance <= 0 || (powerOf2TabStopDistance & tabStopDistanceMinus1) != 0) throw new ArgumentOutOfRangeException("powerOf2TabStopDistance", "powerOf2TabStopDistance must be a positive power of 2."); char* lineBegin = null; uint lineOffset = 0; int ind = -1; char* end = unchecked(PtrEnd - 1); // - 1 to guarantee the lookahead for '\r' char* ptr = Ptr; if (ptr + 1 < PtrEnd) { // PtrEnd might be null char c = *ptr; ++ptr; if (c == '\r') { if (*ptr == '\n') ++ptr; } else if (c != '\n') { return -1; } Newline: lineBegin = ptr; ++lineOffset; ind = 0; for (;;) { if (ptr >= end) break; c = *ptr; ++ptr; if (c == ' ') { ind = unchecked(ind + 1); if (ind >= 0) continue; // indentation has overflown, so put back ' ' and return ind = unchecked(ind - 1); } else if (c <= '\r') { if (c == '\r') { if (*ptr == '\n') ++ptr; goto Newline; } if (c == '\n') goto Newline; if (c == '\t') { // ind = ind + tabStopDistance - ind%tabStopDistance int d = tabStopDistanceMinus1 + 1 - (ind & tabStopDistanceMinus1); ind = unchecked(ind + d); if (ind >= 0) continue; // indentation has overflown, so put back '\t' and return ind = unchecked(ind - d); } else if (c == '\f' && allowFormFeed) { ind = 0; continue; } } --ptr; Ptr = ptr; RegisterNewlines(lineBegin, lineOffset); return ind; } // end of block } return SkipNewlineWhitespaceContinue(ptr, lineBegin, lineOffset, ind, tabStopDistanceMinus1, allowFormFeed); } private int SkipNewlineWhitespaceContinue(char* ptr, char* lineBegin, uint lineOffset, int ind_, int tabStopDistanceMinus1, bool allowFormFeed) { var stateTag = StateTag; uint index = Buffer.PositiveDistance(Ptr, ptr); char c; if (index == 0) { c = Peek(); if (!(c == '\r' || c == '\n')) return -1; } else { RegisterNewlines(lineBegin, lineOffset); c = SkipAndPeek(index); } int ind = ind_; for (;;) { if (c == ' ') { ind = unchecked(ind + 1); if (ind >= 0) c = SkipAndPeek(); else { // indentation has overflown, so put back ' ' and return ind = unchecked(ind - 1); break; } } else if (c == '\r' || c == '\n') { ind = 0; char c0 = c; c = SkipAndPeek(); if (c0 == '\r' && c == '\n') c = SkipAndPeek(); RegisterNewline(); } else if (c == '\t') { // ind = ind + tabStopDistance - ind%tabStopDistance int d = tabStopDistanceMinus1 + 1 - (ind & tabStopDistanceMinus1); ind = unchecked(ind + d); if (ind >= 0) c = SkipAndPeek(); else { // indentation has overflown, so put back '\t' and return ind = unchecked(ind - d); break; } } else if (c == '\f' && allowFormFeed) { ind = 0; c = SkipAndPeek(); } else break; } StateTag = stateTag + 1; return ind; } public void SkipRestOfLine(bool skipNewline) { char* ptr = Ptr; char* end = unchecked(PtrEnd - 2); // - 2, so that we can do (*) without further checking if (ptr + 2 < PtrEnd) { // PtrEnd might be null for (;;) { char c = *ptr; if (c > '\r') { if (++ptr == end) break; } else if (c != '\r' && c != '\n') { if (++ptr == end) break; } else { if (!skipNewline) { if (ptr != Ptr) { Ptr = ptr; ++StateTag; } return; } else { ++ptr; if (c == '\r' && *ptr == '\n') ++ptr; Ptr = ptr; // (*) RegisterNewline(); return; } } } } SkipRestOfLineContinue(ptr, skipNewline); } private void SkipRestOfLineContinue(char* ptr, bool skipNewline) { var stateTag = StateTag; uint index = Buffer.PositiveDistance(Ptr, ptr); char c; if (index == 0) { c = Peek(); if (c == EOS || (!skipNewline && (c == '\r' || c == '\n'))) return; } else { c = SkipAndPeek(index); } while (c != EOS) { if (c == '\r' || c == '\n') { if (skipNewline) SkipNewline(); break; } c = SkipAndPeek(); } StateTag = stateTag + 1; return; } public string ReadRestOfLine(bool skipNewline) { char* ptr = Ptr; char* end = unchecked(PtrEnd - 2); // - 2, so that we can do (*) without further checking if (ptr + 2 < PtrEnd) { // PtrEnd might be null for (;;) { char c = *ptr; if (c > '\r') { if (++ptr == end) break; } else if (c != '\r' && c != '\n') { if (++ptr == end) break; } else { char* ptr0 = Ptr; if (!skipNewline) { if (ptr != ptr0) { Ptr = ptr; ++StateTag; return new string(ptr0, 0, (int)Buffer.PositiveDistance(ptr0, ptr)); } else { return ""; } } else { var skippedString = ptr == ptr0 ? "" : new string(ptr0, 0, (int)Buffer.PositiveDistance(ptr0, ptr)); ++ptr; if (c == '\r' && *ptr == '\n') ++ptr; Ptr = ptr; // (*) RegisterNewline(); return skippedString; } } } } return ReadRestOfLineContinue(ptr, skipNewline); } private string ReadRestOfLineContinue(char* ptr, bool skipNewline) { var stateTag = StateTag; var indexToken = IndexToken; uint index = Buffer.PositiveDistance(Ptr, ptr); char c; if (index == 0) { c = Peek(); if (c == EOS || (!skipNewline && (c == '\r' || c == '\n'))) return ""; } else { c = SkipAndPeek(index); } while (c != EOS) { if (c == '\r' || c == '\n') { var skippedString = ReadFrom(indexToken); if (skipNewline) SkipNewline(); StateTag = stateTag + 1; return skippedString; } c = SkipAndPeek(); } StateTag = stateTag + 1; return ReadFrom(indexToken); } public char ReadCharOrNewline() { var ptr = Ptr; if (ptr + 2 < PtrEnd) { char c = *ptr; ++ptr; if (c != '\r') { if (c != '\n') { Ptr = ptr; ++StateTag; return c; } } else if (*ptr == '\n') ++ptr; Ptr = ptr; RegisterNewline(); return '\n'; } else { char c0 = Peek(); if (c0 != EOS) { char c = SkipAndPeek(); var stateTag = StateTag; if (c0 != '\r') { if (c0 != '\n') return c0; } else if (c == '\n') Skip(); RegisterNewline(); StateTag = stateTag; return '\n'; } return EOS; } } public int SkipCharsOrNewlines(int maxCount) { if (maxCount < 0) throw new ArgumentOutOfRangeException("maxCount", "maxCount is negative."); char* lineBegin = null; uint lineOffset = 0; int nCRLF = 0; char* ptr = Ptr; if (ptr != null) { char* bufferEnd1 = PtrEnd - 1; // - 1 to guarantee the lookahead for '\r' char* end2 = unchecked(ptr + maxCount); char* end = end2 >= ptr && end2 <= bufferEnd1 ? end2 : bufferEnd1; if (ptr < end) { for (;;) { char c = *ptr; ++ptr; if (c > '\r') { if (ptr == end) break; } else { if (c == '\r') { if (*ptr == '\n') { ++ptr; ++nCRLF; if (end < bufferEnd1) ++end; } } else if (c != '\n') goto CheckBound; lineBegin = ptr; ++lineOffset; CheckBound: if (ptr >= end) break; } } if (end < bufferEnd1) { int count = (int)Buffer.PositiveDistance(Ptr, ptr) - nCRLF; Ptr = ptr; if (lineOffset == 0) { ++StateTag; return count; } else { RegisterNewlines(lineBegin, lineOffset); return count; } } } } return SkipCharsOrNewlinesContinue(ptr, lineBegin, lineOffset, nCRLF, maxCount); } private int SkipCharsOrNewlinesContinue( char* ptr, char* lineBegin, uint lineOffset, int nCRLF, int maxCount) { var stateTag = StateTag; uint index = Buffer.PositiveDistance(Ptr, ptr); char c; int count; if (index == 0) { if (maxCount == 0 || (c = Peek()) == EOS) return 0; count = 0; } else { if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); c = SkipAndPeek(index); count = (int)index - nCRLF; } for (;;) { if (c == EOS || count == maxCount) break; ++count; char c0 = c; c = SkipAndPeek(); if (c0 <= '\r') { if (c0 == '\r') { if (c == '\n') c = SkipAndPeek(); } else if (c0 != '\n') continue; RegisterNewline(); } } StateTag = unchecked(stateTag + 1); return count; } public string ReadCharsOrNewlines(int maxCount, bool normalizeNewlines) { if (maxCount < 0) throw new ArgumentOutOfRangeException("maxCount", "maxCount is negative."); char* lineBegin = null; uint lineOffset = 0; int nCRLF = 0; int nCR = 0; char* ptr = Ptr; if (ptr != null) { char* PtrEnd1 = PtrEnd - 1; // - 1 to guarantee the lookahead for '\r' char* end2 = unchecked(ptr + maxCount); char* end = end2 >= ptr && end2 <= PtrEnd1 ? end2 : PtrEnd1; if (ptr < end) { for (;;) { char c = *ptr; ++ptr; if (c > '\r') { if (ptr == end) break; } else { if (c == '\r') { if (*ptr == '\n') { ++ptr; ++nCRLF; if (end < PtrEnd1) ++end; } else { ++nCR; } } else if (c != '\n') goto CheckBound; lineBegin = ptr; ++lineOffset; CheckBound: if (ptr >= end) break; } } if (end < PtrEnd1) { char* ptr0 = Ptr; Ptr = ptr; int length = (int)Buffer.PositiveDistance(ptr0, ptr); if (lineOffset == 0) { ++StateTag; return new string(ptr0, 0, length); } RegisterNewlines(lineBegin, lineOffset); return !normalizeNewlines || (nCR | nCRLF) == 0 ? new string(ptr0, 0, length) : Text.CopyWithNormalizedNewlines(ptr0, length, nCRLF, nCR); } } } return ReadCharsOrNewlinesContinue(ptr, lineBegin, lineOffset, nCRLF, nCR, maxCount, normalizeNewlines); } private string ReadCharsOrNewlinesContinue( char* ptr, char* lineBegin, uint lineOffset, int nCRLF, int nCR, int maxCount, bool normalizeNewlines) { var stateTag = StateTag; var indexToken = IndexToken; uint index = Buffer.PositiveDistance(Ptr, ptr); char c; int count; if (index == 0) { if (maxCount == 0 || (c = Peek()) == EOS) return ""; count = 0; } else { if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); c = SkipAndPeek(index); count = (int)index - nCRLF; } for (;;) { if (c == EOS || count == maxCount) break; ++count; char c0 = c; c = SkipAndPeek(); if (c0 <= '\r') { if (c0 == '\r') { if (c == '\n') { ++nCRLF; c = SkipAndPeek(); } else { ++nCR; } } else if (c0 != '\n') continue; RegisterNewline(); } } StateTag = unchecked(stateTag + 1); string str = ReadFrom(indexToken); if ((nCR | nCRLF) == 0 || !normalizeNewlines) return str; fixed (char* pStr = str) return Text.CopyWithNormalizedNewlines(pStr, str.Length, nCRLF, nCR); } public int SkipCharsOrNewlinesWhile(Microsoft.FSharp.Core.FSharpFunc predicate) { return SkipCharsOrNewlinesWhile(predicate, predicate); } public int SkipCharsOrNewlinesWhile(FSharpFunc predicateForFirstChar, FSharpFunc predicate) { char* lineBegin = null; uint lineOffset = 0; int nCRLF = 0; char* ptr = Ptr; char* end = unchecked(PtrEnd - 1); // - 1 to guarantee the lookahead for '\r' if (ptr + 1 < PtrEnd) { // PtrEnd might be null char c = *ptr; ++ptr; if (c > '\r') { if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; } else if (c == '\r') { if (!predicateForFirstChar.Invoke('\n')) goto ReturnEmpty; if (*ptr == '\n') { ++ptr; ++nCRLF; } lineBegin = ptr; ++lineOffset; } else { if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; if (c == '\n') { lineBegin = ptr; lineOffset = 1; } } for (;;) { if (ptr >= end) goto EndOfBlock; c = *ptr; ++ptr; if (c > '\r') { if (!predicate.Invoke(c)) break; } else if (c == '\r') { if (!predicate.Invoke('\n')) break; if (*ptr == '\n') { ++ptr; ++nCRLF; } lineBegin = ptr; ++lineOffset; } else { if (!predicate.Invoke(c)) break; if (c == '\n') { lineBegin = ptr; ++lineOffset; } } } --ptr; int count = (int)Buffer.PositiveDistance(Ptr, ptr) - nCRLF; Ptr = ptr; if (lineOffset == 0) { ++StateTag; return count; } RegisterNewlines(lineBegin, lineOffset); return count; ReturnEmpty: return 0; } EndOfBlock: return SkipCharsOrNewlinesWhileContinue(ptr, lineBegin, lineOffset, nCRLF, predicateForFirstChar, predicate); } private int SkipCharsOrNewlinesWhileContinue( char* ptr, char* lineBegin, uint lineOffset, int nCRLF, FSharpFunc predicateForFirstChar, FSharpFunc predicate) { var stateTag = StateTag; uint index = Buffer.PositiveDistance(Ptr, ptr); char c; int count; if (index == 0) { c = Peek(); char cc = c == '\r' ? '\n' : c; if (c == EOS || !predicateForFirstChar.Invoke(cc)) return 0; count = 1; char c0 = c; c = SkipAndPeek(); if (cc == '\n') { if (c0 == '\r' && c == '\n') c = SkipAndPeek(); RegisterNewline(); } } else { if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); c = SkipAndPeek(index); count = (int)index - nCRLF; } for (;;) { if (c == EOS) break; if (c != '\r' && c != '\n') { if (!predicate.Invoke(c)) break; count = unchecked(count + 1); if (count >= 0) c = SkipAndPeek(); else { // overflow count = unchecked(count - 1); break; } } else { if (!predicate.Invoke('\n')) break; count = unchecked(count + 1); if (count >= 0) { char c0 = c; c = SkipAndPeek(); if (c0 == '\r' && c == '\n') c = SkipAndPeek(); RegisterNewline(); } else { count = unchecked(count - 1); break; } } } StateTag = unchecked(stateTag + 1); return count; } public string ReadCharsOrNewlinesWhile(FSharpFunc predicate, bool normalizeNewlines) { return ReadCharsOrNewlinesWhile(predicate, predicate, normalizeNewlines); } public string ReadCharsOrNewlinesWhile( FSharpFunc predicateForFirstChar, FSharpFunc predicate, bool normalizeNewlines) { char* lineBegin = null; uint lineOffset = 0; int nCRLF = 0; int nCR = 0; char* ptr = Ptr; char* end = unchecked(PtrEnd - 1); // - 1 to guarantee the lookahead for '\r' if (ptr + 1 < PtrEnd) { // PtrEnd might be null char c = *ptr; ++ptr; if (c > '\r') { if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; } else if (c == '\r') { if (!predicateForFirstChar.Invoke('\n')) goto ReturnEmpty; if (*ptr == '\n') { ++ptr; ++nCRLF; } else { ++nCR; } lineBegin = ptr; ++lineOffset; } else { if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; if (c == '\n') { lineBegin = ptr; lineOffset = 1; } } for (;;) { if (ptr >= end) goto EndOfBlock; c = *ptr; ++ptr; if (c > '\r') { if (!predicate.Invoke(c)) break; } else if (c == '\r') { if (!predicate.Invoke('\n')) break; if (*ptr == '\n') { ++ptr; ++nCRLF; } else { ++nCR; } lineBegin = ptr; ++lineOffset; } else { if (!predicate.Invoke(c)) break; if (c == '\n') { lineBegin = ptr; ++lineOffset; } } } --ptr; char* ptr0 = Ptr; Ptr = ptr; int length = (int)Buffer.PositiveDistance(ptr0, ptr); if (lineOffset == 0) { ++StateTag; return new string(ptr0, 0, length); } RegisterNewlines(lineBegin, lineOffset); return !normalizeNewlines || (nCR | nCRLF) == 0 ? new string(ptr0, 0, length) : Text.CopyWithNormalizedNewlines(ptr0, length, nCRLF, nCR); ReturnEmpty: return ""; } EndOfBlock: return ReadCharsOrNewlinesWhileContinue(ptr, lineBegin, lineOffset, nCRLF, nCR, predicateForFirstChar, predicate, normalizeNewlines); } private string ReadCharsOrNewlinesWhileContinue( char* ptr, char* lineBegin, uint lineOffset, int nCRLF, int nCR, FSharpFunc predicateForFirstChar, FSharpFunc predicate, bool normalizeNewlines) { var stateTag = StateTag; var indexToken = IndexToken; uint index = Buffer.PositiveDistance(Ptr, ptr); char c; int count; if (index == 0) { c = Peek(); char cc = c == '\r' ? '\n' : c; if (c == EOS || !predicateForFirstChar.Invoke(cc)) return ""; count = 1; char c0 = c; c = SkipAndPeek(); if (cc == '\n') { if (c0 == '\r') { if (c == '\n') { ++nCRLF; c = SkipAndPeek(); } else { ++nCR; } } RegisterNewline(); } } else { if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); c = SkipAndPeek(index); count = (int)index - nCRLF; } for (;;) { if (c == EOS) break; if (c != '\r' && c != '\n') { if (!predicate.Invoke(c)) break; count = unchecked(count + 1); if (count < 0) break; c = SkipAndPeek(); } else { if (!predicate.Invoke('\n')) break; count = unchecked(count + 1); if (count < 0) break; char c0 = c; c = SkipAndPeek(); if (c0 == '\r') { if (c == '\n') { ++nCRLF; c = SkipAndPeek(); } else { ++nCR; } } RegisterNewline(); } } StateTag = unchecked(stateTag + 1); string str = ReadFrom(indexToken); if ((nCR | nCRLF) == 0 || !normalizeNewlines) return str; fixed (char* pStr = str) return Text.CopyWithNormalizedNewlines(pStr, str.Length, nCRLF, nCR); } public int SkipCharsOrNewlinesWhile(FSharpFunc predicate, int minCount, int maxCount) { return SkipCharsOrNewlinesWhile(predicate, predicate, minCount, maxCount); } public int SkipCharsOrNewlinesWhile( FSharpFunc predicateForFirstChar, FSharpFunc predicate, int minCount, int maxCount) { if (maxCount < 0) throw new ArgumentOutOfRangeException("maxCount", "maxCount is negative."); char* lineBegin = null; uint lineOffset = 0; int nCRLF = 0; char* ptr = Ptr; if (ptr != null) { char* bufferEnd1 = unchecked(PtrEnd - 1); // - 1 to guarantee the lookahead for '\r' char* end2 = unchecked(ptr + maxCount); char* end = end2 >= ptr && end2 <= bufferEnd1 ? end2 : bufferEnd1; if (ptr < end) { char c = *ptr; ++ptr; if (c > '\r') { if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; } else if (c == '\r') { if (!predicateForFirstChar.Invoke('\n')) goto ReturnEmpty; if (*ptr == '\n') { ++ptr; ++nCRLF; if (end < bufferEnd1) ++end; } lineBegin = ptr; ++lineOffset; } else { if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; if (c == '\n') { lineBegin = ptr; ++lineOffset; } } for (;;) { if (ptr < end) { c = *ptr; ++ptr; if (c > '\r') { if (!predicate.Invoke(c)) break; } else if (c == '\r') { if (!predicate.Invoke('\n')) break; if (*ptr == '\n') { ++ptr; ++nCRLF; if (end < bufferEnd1) ++end; } lineBegin = ptr; ++lineOffset; } else { if (!predicate.Invoke(c)) break; if (c == '\n') { lineBegin = ptr; ++lineOffset; } } } else { if (end >= bufferEnd1) goto EndOfBlock; goto ReturnCount; } } --ptr; ReturnCount: int count = (int)Buffer.PositiveDistance(Ptr, ptr) - nCRLF; if (count >= minCount) { Ptr = ptr; if (lineOffset == 0) { ++StateTag; return count; } else { RegisterNewlines(lineBegin, lineOffset); return count; } } ReturnEmpty: return 0; } } EndOfBlock: return SkipCharsOrNewlinesWhileContinue(ptr, lineBegin, lineOffset, nCRLF, predicateForFirstChar, predicate, minCount, maxCount); } private int SkipCharsOrNewlinesWhileContinue( char* ptr, char* lineBegin, uint lineOffset, int nCRLF, FSharpFunc predicateForFirstChar, FSharpFunc predicate, int minCount, int maxCount) { var ptr0 = Ptr; var block0 = Block; var tag0 = StateTag; var line0 = _Line; var lineBegin0 = _LineBegin; uint index = Buffer.PositiveDistance(Ptr, ptr); char c; int count; if (index == 0) { c = Peek(); if (c == EOS || maxCount == 0) goto ReturnEmpty; if (c != '\r' && c != '\n') { if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; count = 1; c = SkipAndPeek(); } else { if (!predicateForFirstChar.Invoke('\n')) goto ReturnEmpty; count = 1; char c0 = c; c = SkipAndPeek(); if (c0 == '\r' && c == '\n') c = SkipAndPeek(); RegisterNewline(); } } else { if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); c = SkipAndPeek(index); count = (int)index - nCRLF; } for (;;) { if (c == EOS || count == maxCount) break; if (c != '\r' && c != '\n') { if (!predicate.Invoke(c)) break; ++count; c = SkipAndPeek(); } else { if (!predicate.Invoke('\n')) break; ++count; char c0 = c; c = SkipAndPeek(); if (c0 == '\r' && c == '\n') c = SkipAndPeek(); RegisterNewline(); } } if (count >= minCount) { StateTag = unchecked(tag0 + 1); return count; } ReturnEmpty: // backtrack Seek(ptr0, block0); _Line = line0; _LineBegin = lineBegin0; StateTag = tag0; return 0; } public string ReadCharsOrNewlinesWhile(FSharpFunc predicate, int minCount, int maxCount, bool normalizeNewlines) { return ReadCharsOrNewlinesWhile(predicate, predicate, minCount, maxCount, normalizeNewlines); } public string ReadCharsOrNewlinesWhile( FSharpFunc predicateForFirstChar, FSharpFunc predicate, int minCount, int maxCount, bool normalizeNewlines) { if (maxCount < 0) throw new ArgumentOutOfRangeException("maxCount", "maxCount is negative."); char* lineBegin = null; uint lineOffset = 0; int nCRLF = 0; int nCR = 0; char* ptr = Ptr; if (ptr != null) { char* bufferEnd1 = PtrEnd - 1; // - 1 to guarantee the lookahead for '\r' char* end2 = unchecked(ptr + maxCount); char* end = end2 >= ptr && end2 <= bufferEnd1 ? end2 : bufferEnd1; if (ptr < end) { char c = *ptr; ++ptr; if (c > '\r') { if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; } else if (c == '\r') { if (!predicateForFirstChar.Invoke('\n')) goto ReturnEmpty; if (*ptr == '\n') { ++ptr; ++nCRLF; if (end < bufferEnd1) ++end; } else { ++nCR; } lineBegin = ptr; lineOffset = 1; } else { if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; if (c == '\n') { lineBegin = ptr; lineOffset = 1; } } for (;;) { if (ptr < end) { c = *ptr; ++ptr; if (c > '\r') { if (!predicate.Invoke(c)) break; } else if (c == '\r') { if (!predicate.Invoke('\n')) break; if (*ptr == '\n') { ++ptr; ++nCRLF; if (end < bufferEnd1) ++end; } else { ++nCR; } lineBegin = ptr; ++lineOffset; } else { if (!predicate.Invoke(c)) break; if (c == '\n') { lineBegin = ptr; ++lineOffset; } } } else { if (end >= bufferEnd1) goto EndOfBlock; goto ReturnStringInBlock; } } --ptr; ReturnStringInBlock: { char* ptr0 = Ptr; int length = (int)Buffer.PositiveDistance(ptr0, ptr); if (length - nCRLF >= minCount) { Ptr = ptr; if (lineOffset == 0) { ++StateTag; return new string(ptr0, 0, length); } RegisterNewlines(lineBegin, lineOffset); return !normalizeNewlines || (nCR | nCRLF) == 0 ? new string(ptr0, 0, length) : Text.CopyWithNormalizedNewlines(ptr0, length, nCRLF, nCR); } } ReturnEmpty: return ""; } } EndOfBlock: return ReadCharsOrNewlinesWhileContinue(ptr, lineBegin, lineOffset, nCRLF, nCR, predicateForFirstChar, predicate, minCount, maxCount, normalizeNewlines); } private string ReadCharsOrNewlinesWhileContinue( char* ptr, char* lineBegin, uint lineOffset, int nCRLF, int nCR, FSharpFunc predicateForFirstChar, FSharpFunc predicate, int minCount, int maxCount, bool normalizeNewlines) { var ptr0 = Ptr; var block0 = Block; var tag0 = StateTag; var line0 = _Line; var lineBegin0 = _LineBegin; uint index = Buffer.PositiveDistance(Ptr, ptr); char c; int count; if (index == 0) { c = Peek(); if (c == EOS || maxCount == 0) goto ReturnEmpty; if (c != '\r' && c != '\n') { count = 1; if (!predicateForFirstChar.Invoke(c)) goto ReturnEmpty; c = SkipAndPeek(); } else { if (!predicateForFirstChar.Invoke('\n')) goto ReturnEmpty; count = 1; char c0 = c; c = SkipAndPeek(); if (c0 == '\r') { if (c == '\n') { ++nCRLF; c = SkipAndPeek(); } else { ++nCR; } } RegisterNewline(); } } else { if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); c = SkipAndPeek(index); count = (int)index - nCRLF; } for (;;) { if (c == EOS || count == maxCount) break; if (c != '\r' && c != '\n') { if (!predicate.Invoke(c)) break; ++count; c = SkipAndPeek(); } else { if (!predicate.Invoke('\n')) break; ++count; char c0 = c; c = SkipAndPeek(); if (c0 == '\r') { if (c == '\n') { ++nCRLF; c = SkipAndPeek(); } else { ++nCR; } } RegisterNewline(); } } if (count >= minCount) { StateTag = unchecked(tag0 + 1); string str = ReadFrom(ptr0, block0); if ((nCR | nCRLF) == 0 || !normalizeNewlines) return str; fixed (char* pStr = str) return Text.CopyWithNormalizedNewlines(pStr, str.Length, nCRLF, nCR); } ReturnEmpty: // backtrack Seek(ptr0, block0); _Line = line0; _LineBegin = lineBegin0; StateTag = tag0; return ""; } private static bool Rest3OfStringEquals(char* str1, char* str2, int length) { for (int i = 3; i < length; ++i) { if (str1[i] != str2[i]) goto ReturnFalse; } return true; ReturnFalse: return false; } private static bool Rest3OfStringEqualsCaseFolded(char* str1, char* cfStr2, int length) { char* cftable = CaseFoldTable.FoldedChars; for (int i = 3; i < length; ++i) { if (cftable[str1[i]] != cfStr2[i]) goto ReturnFalse; } return true; ReturnFalse: return false; } public int SkipCharsOrNewlinesUntilString(string str, int maxCount, out bool foundString) { int strLength = str.Length; // throws if str is null if (strLength == 0) throw new ArgumentException("The string argument is empty."); if (maxCount < 0) throw new ArgumentOutOfRangeException("maxCount", "maxCount is negative."); char* lineBegin = null; fixed (char* pStr = str) { uint lineOffset = 0; int nCRLF = 0; char* ptr = Ptr; if (ptr != null) { char* bufferEnd = PtrEnd; char* end1 = unchecked(bufferEnd - strLength); if (end1 >= ptr && end1 < bufferEnd) { char* end2 = unchecked(ptr + maxCount); char* end = end2 < ptr || end1 <= end2 ? end1 : end2; for (;;) { char c = *ptr; if (c != pStr[0]) { if (ptr == end) break; ++ptr; if (c > '\r' || c == '\t') continue; } else { Debug.Assert(ptr + strLength <= PtrEnd); if (strLength == 1 || (ptr[1] == pStr[1] && (strLength == 2 || (ptr[2] == pStr[2] && (strLength == 3 || Rest3OfStringEquals(ptr, pStr, strLength)))))) { foundString = true; int count = (int)Buffer.PositiveDistance(Ptr, ptr) - nCRLF; Ptr = ptr; if (lineOffset == 0) { if (count != 0) ++StateTag; return count; } else { RegisterNewlines(lineBegin, lineOffset); return count; } } c = *ptr; if (ptr == end) break; ++ptr; if (c > '\r' || c == '\t') continue; } if (c == '\r') { if (*ptr == '\n') { ++ptr; lineBegin = ptr; ++lineOffset; ++nCRLF; if (end < end1) ++end; else if (ptr > end) break; continue; } } else if (c != '\n') continue; lineBegin = ptr; ++lineOffset; } // for if (ptr < end1) { foundString = false; int count = (int)Buffer.PositiveDistance(Ptr, ptr) - nCRLF; Ptr = ptr; if (lineOffset == 0) { if (count != 0) ++StateTag; return count; } else { RegisterNewlines(lineBegin, lineOffset); return count; } } } } return SkipCharsOrNewlinesUntilStringContinue(ptr, lineBegin, lineOffset, nCRLF, pStr, strLength, maxCount, out foundString); } } private int SkipCharsOrNewlinesUntilStringContinue( char* ptr, char* lineBegin, uint lineOffset, int nCRLF, char* pStr, int strLength, int maxCount, out bool foundString) { var stateTag = StateTag; foundString = false; if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); uint index = Buffer.PositiveDistance(Ptr, ptr); char c = SkipAndPeek((uint)index); int count = (int)index - nCRLF; for (;;) { if (c != pStr[0] || !Match(pStr, strLength)) { if (c == EOS || count == maxCount) break; ++count; char c0 = c; c = SkipAndPeek(); if (c0 <= '\r') { if (c0 == '\r') { if (c == '\n') { c = SkipAndPeek(); } } else if (c0 != '\n') continue; RegisterNewline(); } } else { foundString = true; break; } } StateTag = count == 0 ? stateTag : unchecked(stateTag + 1); return count; } public int SkipCharsOrNewlinesUntilString( string str, int maxCount, bool normalizeNewlines, out string skippedCharsIfStringFoundOtherwiseNull) { int strLength = str.Length; // throws if str is null if (strLength == 0) throw new ArgumentException("The string argument is empty."); if (maxCount < 0) throw new ArgumentOutOfRangeException("maxCount", "maxCount is negative."); fixed (char* pStr = str) { char* lineBegin = null; uint lineOffset = 0; int nCRLF = 0; int nCR = 0; char* ptr = Ptr; if (ptr != null) { char* end1 = unchecked(PtrEnd - strLength); if (end1 >= ptr && end1 < PtrEnd) { char* end2 = unchecked(ptr + maxCount); char* end = end2 < ptr || end1 <= end2 ? end1 : end2; for (;;) { char c = *ptr; if (c != pStr[0]) { if (ptr == end) break; ++ptr; if (c > '\r' || c == '\t') continue; } else { Debug.Assert(ptr + strLength <= PtrEnd); if (strLength == 1 || (ptr[1] == pStr[1] && (strLength == 2 || (ptr[2] == pStr[2] && (strLength == 3 || Rest3OfStringEquals(ptr, pStr, strLength)))))) { char* ptr0 = Ptr; if (ptr != ptr0) { Ptr = ptr; int length = (int)Buffer.PositiveDistance(ptr0, ptr); if (lineOffset == 0) { if (length != 0) ++StateTag; skippedCharsIfStringFoundOtherwiseNull = new string(ptr0, 0, length); return length; } else { RegisterNewlines(lineBegin, lineOffset); skippedCharsIfStringFoundOtherwiseNull = !normalizeNewlines || (nCR | nCRLF) == 0 ? new string(ptr0, 0, length) : Text.CopyWithNormalizedNewlines(ptr0, length, nCRLF, nCR); return length - nCRLF; } } else { skippedCharsIfStringFoundOtherwiseNull = ""; return 0; } } c = *ptr; if (ptr == end) break; ++ptr; if (c > '\r' || c == '\t') continue; } if (c == '\r') { if (*ptr == '\n') { ++ptr; lineBegin = ptr; ++lineOffset; ++nCRLF; if (end < end1) ++end; else if (ptr > end) break; continue; } else { ++nCR; } } else if (c != '\n') continue; lineBegin = ptr; ++lineOffset; } // for if (ptr < end1) { skippedCharsIfStringFoundOtherwiseNull = null; int count = (int)Buffer.PositiveDistance(Ptr, ptr) - nCRLF; Ptr = ptr; if (lineOffset == 0) { if (count != 0) ++StateTag; return count; } else { RegisterNewlines(lineBegin, lineOffset); return count; } } } } return SkipCharsOrNewlinesUntilStringContinue(ptr, lineBegin, lineOffset, nCRLF, nCR, pStr, strLength, maxCount, normalizeNewlines, out skippedCharsIfStringFoundOtherwiseNull); } } private int SkipCharsOrNewlinesUntilStringContinue( char* ptr, char* lineBegin, uint lineOffset, int nCRLF, int nCR, char* pStr, int strLength, int maxCount, bool normalizeNewlines, out string skippedCharsIfStringFoundOtherwiseNull) { var stateTag = StateTag; var indexToken = IndexToken; if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); uint index = Buffer.PositiveDistance(Ptr, ptr); int count = (int)index - nCRLF; char c = SkipAndPeek(index); for (;;) { if (c != pStr[0] || !Match(pStr, strLength)) { if (c == EOS || count == maxCount) break; ++count; char c0 = c; c = SkipAndPeek(); if (c0 <= '\r') { if (c0 == '\r') { if (c == '\n') { c = SkipAndPeek(); ++nCRLF; } else { ++nCR; } } else if (c0 != '\n') continue; RegisterNewline(); } } else { // found string if (count != 0) { StateTag = unchecked(stateTag + 1); var s = ReadFrom(indexToken); if (!normalizeNewlines || (nCR | nCRLF) == 0) { skippedCharsIfStringFoundOtherwiseNull = s; return count; } else { fixed (char* ps = s) skippedCharsIfStringFoundOtherwiseNull = Text.CopyWithNormalizedNewlines(ps, s.Length, nCRLF, nCR); return count; } } else { StateTag = stateTag; skippedCharsIfStringFoundOtherwiseNull = ""; return 0; } } } StateTag = count == 0 ? stateTag : unchecked(stateTag + 1); skippedCharsIfStringFoundOtherwiseNull = null; return count; } public int SkipCharsOrNewlinesUntilCaseFoldedString( string caseFoldedString, int maxCount, out bool foundString) { int strLength = caseFoldedString.Length; // throws if str is null if (strLength == 0) throw new ArgumentException("The string argument is empty."); if (maxCount < 0) throw new ArgumentOutOfRangeException("maxCount", "maxCount is negative."); char* lineBegin = null; fixed (char* pStr = caseFoldedString) { uint lineOffset = 0; int nCRLF = 0; char* ptr = Ptr; if (ptr != null) { char* bufferEnd = PtrEnd; char* end1 = unchecked(bufferEnd - strLength); if (end1 >= ptr && end1 < bufferEnd) { char* end2 = unchecked(ptr + maxCount); char* end = end2 < ptr || end1 <= end2 ? end1 : end2; char* cftable = CaseFoldTable.FoldedChars; for (;;) { char c = cftable[*ptr]; if (c != pStr[0]) { if (ptr == end) break; ++ptr; if (c > '\r' || c == '\t') continue; } else { Debug.Assert(ptr + strLength <= PtrEnd); if (strLength == 1 || (cftable[ptr[1]] == pStr[1] && (strLength == 2 || (cftable[ptr[2]] == pStr[2] && (strLength == 3 || Rest3OfStringEqualsCaseFolded(ptr, pStr, strLength)))))) { foundString = true; int count = (int)Buffer.PositiveDistance(Ptr, ptr) - nCRLF; Ptr = ptr; if (lineOffset == 0) { if (count != 0) ++StateTag; return count; } else { RegisterNewlines(lineBegin, lineOffset); return count; } } c = *ptr; // we don't need to casefold here if (ptr == end) break; ++ptr; if (c > '\r' || c == '\t') continue; } if (c == '\r') { if (*ptr == '\n') { ++ptr; lineBegin = ptr; ++lineOffset; ++nCRLF; if (end < end1) ++end; else if (ptr > end) break; continue; } } else if (c != '\n') continue; lineBegin = ptr; ++lineOffset; } // for if (ptr < end1) { foundString = false; int count = (int)Buffer.PositiveDistance(Ptr, ptr) - nCRLF; Ptr = ptr; if (lineOffset == 0) { if (count != 0) ++StateTag; return count; } else { RegisterNewlines(lineBegin, lineOffset); return count; } } } } return SkipCharsOrNewlinesUntilCaseFoldedStringContinue(ptr, lineBegin, lineOffset, nCRLF, pStr, strLength, maxCount, out foundString); } } private int SkipCharsOrNewlinesUntilCaseFoldedStringContinue( char* ptr, char* lineBegin, uint lineOffset, int nCRLF, char* pStr, int strLength, int maxCount, out bool foundString) { var stateTag = StateTag; foundString = false; if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); uint index = Buffer.PositiveDistance(Ptr, ptr); char* cftable = CaseFoldTable.FoldedChars; char c = cftable[SkipAndPeek((uint)index)]; int count = (int)index - nCRLF; for (;;) { if (c != pStr[0] || !MatchCaseFolded(pStr, strLength)) { if (c == EOS || count == maxCount) break; ++count; char c0 = c; c = cftable[SkipAndPeek()]; if (c0 <= '\r') { if (c0 == '\r') { if (c == '\n') { c = cftable[SkipAndPeek()]; ++nCRLF; } } else if (c0 != '\n') continue; RegisterNewline(); } } else { foundString = true; break; } } StateTag = count == 0 ? stateTag : unchecked(stateTag + 1); return count; } public int SkipCharsOrNewlinesUntilCaseFoldedString( string caseFoldedString, int maxCount, bool normalizeNewlines, out string skippedCharsIfStringFoundOtherwiseNull) { int strLength = caseFoldedString.Length; // throws if str is null if (strLength == 0) throw new ArgumentException("The string argument is empty."); if (maxCount < 0) throw new ArgumentOutOfRangeException("maxCount", "maxCount is negative."); fixed (char* pStr = caseFoldedString) { char* lineBegin = null; uint lineOffset = 0; int nCRLF = 0; int nCR = 0; char* ptr = Ptr; if (ptr != null) { char* bufferEnd = PtrEnd; char* end1 = unchecked(bufferEnd - strLength); if (end1 >= ptr && end1 < bufferEnd) { char* end2 = unchecked(ptr + maxCount); char* end = end2 < ptr || end1 <= end2 ? end1 : end2; char* cftable = CaseFoldTable.FoldedChars; for (;;) { char c = cftable[*ptr]; if (c != pStr[0]) { if (ptr == end) break; ++ptr; if (c > '\r' || c == '\t') continue; } else { Debug.Assert(ptr + strLength <= PtrEnd); if (strLength == 1 || (cftable[ptr[1]] == pStr[1] && (strLength == 2 || (cftable[ptr[2]] == pStr[2] && (strLength == 3 || Rest3OfStringEqualsCaseFolded(ptr, pStr, strLength)))))) { char* ptr0 = Ptr; if (ptr != ptr0) { Ptr = ptr; int length = (int)Buffer.PositiveDistance(ptr0, ptr); if (lineOffset == 0) { if (length != 0) ++StateTag; skippedCharsIfStringFoundOtherwiseNull = new string(ptr0, 0, length); return length; } else { RegisterNewlines(lineBegin, lineOffset); skippedCharsIfStringFoundOtherwiseNull = !normalizeNewlines || (nCR | nCRLF) == 0 ? new string(ptr0, 0, length) : Text.CopyWithNormalizedNewlines(ptr0, length, nCRLF, nCR); return length - nCRLF; } } else { skippedCharsIfStringFoundOtherwiseNull = ""; return 0; } } c = *ptr; // we don't need to casefold here if (ptr == end) break; ++ptr; if (c > '\r' || c == '\t') continue; } if (c == '\r') { if (*ptr == '\n') { ++ptr; lineBegin = ptr; ++lineOffset; ++nCRLF; if (end < end1) ++end; else if (ptr > end) break; continue; } else { ++nCR; } } else if (c != '\n') continue; lineBegin = ptr; ++lineOffset; } // for if (ptr < end1) { skippedCharsIfStringFoundOtherwiseNull = null; int count = (int)Buffer.PositiveDistance(Ptr, ptr) - nCRLF; Ptr = ptr; if (lineOffset == 0) { if (count != 0) ++StateTag; return count; } else { RegisterNewlines(lineBegin, lineOffset); return count; } } } } return SkipCharsOrNewlinesUntilCaseFoldedStringContinue(ptr, lineBegin, lineOffset, nCRLF, nCR, pStr, strLength, maxCount, normalizeNewlines, out skippedCharsIfStringFoundOtherwiseNull); } } private int SkipCharsOrNewlinesUntilCaseFoldedStringContinue( char* ptr, char* lineBegin, uint lineOffset, int nCRLF, int nCR, char* pStr, int strLength, int maxCount, bool normalizeNewlines, out string skippedCharsIfStringFoundOtherwiseNull) { var stateTag = StateTag; var indexToken = IndexToken; if (lineOffset != 0) RegisterNewlines(lineBegin, lineOffset); uint index = Buffer.PositiveDistance(Ptr, ptr); int count = (int)index - nCRLF; char* cftable = CaseFoldTable.FoldedChars; char c = cftable[SkipAndPeek(index)]; for (;;) { if (c != pStr[0] || !MatchCaseFolded(pStr, strLength)) { if (c == EOS || count == maxCount) break; ++count; char c0 = c; c = cftable[SkipAndPeek()]; if (c0 <= '\r') { if (c0 == '\r') { if (c == '\n') { c = cftable[SkipAndPeek()]; ++nCRLF; } else { ++nCR; } } else if (c0 != '\n') continue; RegisterNewline(); } } else { // found string if (count != 0) { StateTag = unchecked(stateTag + 1); var s = ReadFrom(indexToken); if ((nCR | nCRLF) == 0 || !normalizeNewlines) { skippedCharsIfStringFoundOtherwiseNull = s; return count; } else { fixed (char* ps = s) skippedCharsIfStringFoundOtherwiseNull = Text.CopyWithNormalizedNewlines(ps, s.Length, nCRLF, nCR); return count; } } else { StateTag = stateTag; skippedCharsIfStringFoundOtherwiseNull = ""; return 0; } } } StateTag = count == 0 ? stateTag : unchecked(stateTag + 1); skippedCharsIfStringFoundOtherwiseNull = null; return count; } } public unsafe struct CharStreamState { #if DEBUG internal readonly CharStream CharStream; private long Index { get { return GetIndex(CharStream); } } #endif internal readonly char* Ptr; internal readonly int Block; #if SMALL_STATETAG public readonly int Tag; #else public readonly long Tag; #endif public readonly long Line; public readonly long LineBegin; public readonly TUserState UserState; public readonly string Name; // Public (though undocumented) as long as the .NET JIT doesn't // always inline CharStream.State public CharStreamState(CharStream charStream) { #if DEBUG CharStream = charStream; #endif Ptr = charStream.Ptr; Block = charStream.Block; Tag = charStream.StateTag; Line = charStream._Line; LineBegin = charStream._LineBegin; UserState = charStream._UserState; Name = charStream._Name; } [DebuggerBrowsable(DebuggerBrowsableState.Never)] public CharStreamIndexToken IndexToken { get { if (Line <= 0) // tests for a zero-initialized state throw new InvalidOperationException("The CharStreamState is invalid."); return new CharStreamIndexToken( #if DEBUG CharStream, #endif Ptr, Block); } } // On .NET calling an instance method of a generic struct can be more // expensive than calling an instance method of a generic class // (when the type parameter value is not statically known at the call // site and isn't a value type that makes the .NET JIT specialize // the code). // // Moving the actual implementations of the following methods into // the CharStream class allows the .NET JIT to inline them, // so that we effectively replace struct method calls with cheaper // class method calls. public long GetIndex(CharStream charStreamFromWhichStateWasRetrieved) { return charStreamFromWhichStateWasRetrieved.GetIndex(ref this); } public Position GetPosition(CharStream charStreamFromWhichStateWasRetrieved) { return charStreamFromWhichStateWasRetrieved.GetPosition(ref this); } } /// Provides read‐access to a sequence of UTF‐16 chars. public unsafe sealed class CharStream : CharStream { // we don't have a public constructor that only takes a string to avoid potential confusion with a filepath constructor internal CharStream(string chars) : base(chars) { } public CharStream(string chars, int index, int length) : base(chars, index, length) {} public CharStream(string chars, int index, int length, long streamIndexOffset) : base(chars, index, length, streamIndexOffset) { } public CharStream(char[] chars, int index, int length) : base(chars, index, length) { } public CharStream(char[] chars, int index, int length, long streamIndexOffset) : base(chars, index, length, streamIndexOffset) { } public CharStream(char* chars, int length) : base(chars, length) { } public CharStream(char* chars, int length, long streamIndexOffset) : base(chars, length, streamIndexOffset) { } internal CharStream(string chars, char* pChars, char* begin, int length) : base(chars, pChars, begin, length) { } public CharStream(string path, Encoding encoding) : base(path, encoding) { } public CharStream(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks) : base(path, encoding, detectEncodingFromByteOrderMarks) { } public CharStream(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int blockSize, int blockOverlap, int byteBufferLength) : base(path, encoding, detectEncodingFromByteOrderMarks, blockSize, blockOverlap, byteBufferLength) { } public CharStream(Stream stream, Encoding encoding) : base(stream, encoding) { } public CharStream(Stream stream, bool leaveOpen, Encoding encoding) : base(stream, leaveOpen, encoding) { } public CharStream(Stream stream, bool leaveOpen, Encoding encoding, bool detectEncodingFromByteOrderMarks) : base(stream, leaveOpen, encoding, detectEncodingFromByteOrderMarks) { } public CharStream(Stream stream, bool leaveOpen, Encoding encoding, bool detectEncodingFromByteOrderMarks, int blockSize, int blockOverlap, int byteBufferLength) : base(stream, leaveOpen, encoding, detectEncodingFromByteOrderMarks, blockSize, blockOverlap, byteBufferLength) {} internal TUserState _UserState; public TUserState UserState { get { return _UserState; } set { _UserState = value; ++StateTag; } } [DebuggerBrowsable(DebuggerBrowsableState.Never)] public CharStreamState State { get { return new CharStreamState(this); } } // GetIndex and GetPosition are helper methods for CharStreamState internal long GetIndex(ref CharStreamState state) { if (state.Line <= 0) // tests for a zero-initialized state throw new InvalidOperationException("The CharStreamState is invalid."); #if DEBUG Debug.Assert(this == state.CharStream); #endif return GetIndex(state.Ptr, state.Block); } internal Position GetPosition(ref CharStreamState state) { if (state.Line <= 0) // tests for a zero-initialized state throw new InvalidOperationException("The CharStreamState is invalid."); #if DEBUG Debug.Assert(this == state.CharStream); #endif long index = GetIndex(state.Ptr, state.Block); return new Position(state.Name, index, state.Line, index - state.LineBegin + 1); } // Passing a large struct by value is suboptimal, so for optimization purposes // we define internal overloads that take ref arguments. Unfortunately, C#/F# // doesn't have const-refs, so we can't make these overloads public (at least, // not without risking heart attacks within certain user demographics of this library). // An alternative would be to move the following methods into the CharStreamState class, // but IMHO the resulting API would feel less intuitive and be somewhat less disoverable. public void BacktrackTo(CharStreamState state) { BacktrackTo(ref state); } internal void BacktrackTo(ref CharStreamState state) { if (state.Line <= 0) // tests for zero-initialized states throw new ArgumentException("The CharStreamState is invalid."); #if DEBUG Debug.Assert(this == state.CharStream); #endif Seek(state.Ptr, state.Block); StateTag = state.Tag; _Line = state.Line; _LineBegin = state.LineBegin; _UserState = state.UserState; _Name = state.Name; } public string ReadFrom(CharStreamState stateWhereStringBegins, bool normalizeNewlines) { return ReadFrom(ref stateWhereStringBegins, normalizeNewlines); } internal string ReadFrom(ref CharStreamState stateWhereStringBegins, bool normalizeNewlines) { if (stateWhereStringBegins.Line <= 0) // tests for zero-initialized states throw new ArgumentException("The CharStreamState is invalid."); #if DEBUG Debug.Assert(this == stateWhereStringBegins.CharStream); #endif string str = ReadFrom(stateWhereStringBegins.Ptr, stateWhereStringBegins.Block); if (!normalizeNewlines || _Line == stateWhereStringBegins.Line) return str; return Text.NormalizeNewlines(str); } public CharStream CreateSubstream(CharStreamState stateWhereSubstreamBegins) { return CreateSubstream(ref stateWhereSubstreamBegins); } internal CharStream CreateSubstream(ref CharStreamState stateWhereSubstreamBegins) { if (stateWhereSubstreamBegins.Line <= 0) // tests for zero-initialized states throw new ArgumentException("The CharStreamState is invalid."); #if DEBUG Debug.Assert(this == stateWhereSubstreamBegins.CharStream); #endif CharStream subStream; if (IsSingleBlockStream) { // the CharStream has only one block, so its safe to // construct a new CharStream from a pointer into the original buffer char* ptr0 = stateWhereSubstreamBegins.Ptr; if (ptr0 == null) ptr0 = BufferEnd; char* end = Ptr; if (end == null) end = BufferEnd; if (end < ptr0) throw new ArgumentException("The current position of the stream must not lie before the position corresponding to the given CharStreamState."); int length = (int)Buffer.PositiveDistance(ptr0, end); subStream = new CharStream(BufferString, BufferStringPointer, ptr0, length); var indexOfFirstChar = Buffer.PositiveDistance(BufferBegin, ptr0) + _IndexOfFirstChar; subStream.IndexOfFirstCharInBlock = indexOfFirstChar; subStream._IndexOfFirstChar = indexOfFirstChar; } else if (Block == stateWhereSubstreamBegins.Block && Ptr != null && stateWhereSubstreamBegins.Ptr != null) { char* ptr0 = stateWhereSubstreamBegins.Ptr; char* end = Ptr; if (end < ptr0) throw new ArgumentException("The current position of the stream must not lie before the position corresponding to the given CharStreamState."); int length = (int)Buffer.PositiveDistance(ptr0, end); string subString = new String(ptr0, 0, length); subStream = new CharStream(subString); var indexOfFirstChar = Buffer.PositiveDistance(BufferBegin, ptr0) + _IndexOfFirstChar; subStream.IndexOfFirstCharInBlock = indexOfFirstChar; subStream._IndexOfFirstChar = indexOfFirstChar; } else { var subString = ReadFrom(ref stateWhereSubstreamBegins, false); subStream = new CharStream(subString); var indexOfFirstChar = GetIndex(stateWhereSubstreamBegins.Ptr, stateWhereSubstreamBegins.Block); subStream.IndexOfFirstCharInBlock = indexOfFirstChar; subStream._IndexOfFirstChar = indexOfFirstChar; } subStream.StateTag = stateWhereSubstreamBegins.Tag; subStream._Line = stateWhereSubstreamBegins.Line; subStream._LineBegin = stateWhereSubstreamBegins.LineBegin; subStream._Name = stateWhereSubstreamBegins.Name; #if DEBUG ++SubstreamCount.Value; subStream.ParentSubstreamCount = SubstreamCount; #endif return subStream; } } } #endif // !LOW_TRUST ================================================ FILE: FParsecCS/CharStreamLT.cs ================================================ // Copyright (c) Stephan Tolksdorf 2007-2011 // License: Simplified BSD License. See accompanying documentation. #if LOW_TRUST using System; using System.IO; using System.Text; using System.Text.RegularExpressions; using System.Diagnostics; using System.Runtime.CompilerServices; using Microsoft.FSharp.Core; namespace FParsec { /// An opaque representation of a CharStream index. public struct CharStreamIndexToken { #if DEBUG internal readonly CharStream CharStream; private long Index { get { return GetIndex(CharStream); } } #endif private readonly int IdxPlus1; /// Returns -1 if the IndexToken was zero-initialized. internal int Idx { get { return unchecked(IdxPlus1 - 1); } } internal CharStreamIndexToken( #if DEBUG CharStream charStream, #endif int idx) { #if DEBUG CharStream = charStream; #endif IdxPlus1 = unchecked(idx + 1); } private static void ThrowInvalidIndexToken() { throw new InvalidOperationException("The CharStreamIndexToken is invalid."); } public long GetIndex(CharStream charStreamFromWhichIndexTokenWasRetrieved) { int idx = Idx; if (idx == -1) ThrowInvalidIndexToken(); // tests for a zero-initialized IndexToken #if DEBUG Debug.Assert(CharStream == charStreamFromWhichIndexTokenWasRetrieved); #endif return charStreamFromWhichIndexTokenWasRetrieved.GetIndex(idx); } } public struct TwoChars : IEquatable { private uint Chars; internal TwoChars(uint chars) { Chars = chars; } public TwoChars(char char0, char char1) { Chars = ((uint)char1 << 16) | (uint)char0; } public char Char0 { get { return unchecked((char)Chars); } } public char Char1 { get { return (char)(Chars >> 16); } } public override bool Equals(object obj) { return (obj is TwoChars) && Chars == ((TwoChars) obj).Chars; } public bool Equals(TwoChars other) { return Chars == other.Chars; } public override int GetHashCode() { return unchecked((int)Chars); } public static bool operator==(TwoChars left, TwoChars right) { return left.Chars == right.Chars; } public static bool operator!=(TwoChars left, TwoChars right) { return left.Chars != right.Chars; } } /// Provides read‐access to a sequence of UTF‐16 chars. public class CharStream : IDisposable { private const int DefaultByteBufferLength = (1 << 12); private static int MinimumByteBufferLength = 128; // must be larger than longest detectable preamble (we can only guess here) private const char EOS = '\uFFFF'; public const char EndOfStreamChar = EOS; public int BlockOverlap { get { return 0; } } public int MinRegexSpace { get { return 0; } set { } } internal String String; /// The current index in the string, or Int32.MinValue if the end of the stream has been reached. internal int Idx; /// Index of the first char in the string belonging to the stream. Is always non-negative. internal int IndexBegin; /// 1 + index of the last char in the string belonging to the stream. Equals IndexBegin if the stream is empty. internal int IndexEnd; /// Any CharStream method or property setter increments this value when it changes the CharStream state. /// Backtracking to an old state also restores the old value of the StateTag. public #if SMALL_STATETAG uint #else ulong #endif StateTag; /// IndexOfFirstChar - IndexBegin internal long StringToStreamIndexOffset; public long IndexOfFirstChar { get { return (uint)IndexBegin + StringToStreamIndexOffset; } } public long IndexOfLastCharPlus1 { get { return (uint)IndexEnd + StringToStreamIndexOffset; } } public long Index { get { // return GetIndex(Idx); if (Idx >= 0) { Debug.Assert(Idx >= IndexBegin && Idx < IndexEnd); return (uint)Idx + StringToStreamIndexOffset; } else { Debug.Assert(Idx == Int32.MinValue); return (uint)IndexEnd + StringToStreamIndexOffset; } } } internal long GetIndex(int idx) { if (idx >= 0) { Debug.Assert(idx >= IndexBegin && idx < IndexEnd); return (uint)idx + StringToStreamIndexOffset; } else { Debug.Assert(idx == Int32.MinValue); return (uint)IndexEnd + StringToStreamIndexOffset; } } /// Indicates whether the Iterator points to the beginning of the CharStream. /// If the CharStream is empty, this property is always true. public bool IsBeginOfStream { get { return Idx == IndexBegin || (Idx < 0 && IndexBegin == IndexEnd); } } /// Indicates whether the Iterator points to the end of the CharStream, /// i.e. whether it points to one char beyond the last char in the CharStream. public bool IsEndOfStream { get { return Idx < 0; } } internal long _Line; public long Line { get { return _Line; } } public void SetLine_WithoutCheckAndWithoutIncrementingTheStateTag(long line) { _Line = line; } internal long _LineBegin; public long LineBegin { get { return _LineBegin; } } public void SetLineBegin_WithoutCheckAndWithoutIncrementingTheStateTag(long lineBegin) { _LineBegin = lineBegin; } /// The UTF‐16 column number of the next char, i.e. Index ‐ LineBegin + 1. public long Column { get { return Index - LineBegin + 1; } } internal string _Name; public string Name { get { return _Name; } set { _Name = value; ++StateTag; } } public Encoding Encoding { get; private set; } [DebuggerBrowsable(DebuggerBrowsableState.Never)] public Position Position { get { long index = Index; return new Position(_Name, index, Line, index - LineBegin + 1); } } internal CharStream(string chars) { Debug.Assert(chars != null); String = chars; Encoding = Encoding.Unicode; _Line = 1; var length = chars.Length; if (length != 0) { // Idx = 0 IndexEnd = length; } else { Idx = Int32.MinValue; // IndexEnd = 0 } } public CharStream(string chars, int index, int length) : this(chars, index, length, 0) {} public CharStream(string chars, int index, int length, long streamBeginIndex) { if (chars == null) throw new ArgumentNullException("chars"); if (index < 0) throw new ArgumentOutOfRangeException("index", "index is negative."); if (streamBeginIndex < 0 || streamBeginIndex >= (1L << 60)) throw new ArgumentOutOfRangeException("streamBeginIndex", "streamBeginIndex must be non-negative and less than 2^60."); int indexEnd = unchecked(index + length); if (indexEnd < index || indexEnd > chars.Length) throw new ArgumentOutOfRangeException("length", "index or length is out of range."); String = chars; Encoding = Encoding.Unicode; _Line = 1; Idx = length == 0 ? Int32.MinValue : index; IndexBegin = index; IndexEnd = indexEnd; _LineBegin = streamBeginIndex; StringToStreamIndexOffset = streamBeginIndex - index; } public CharStream(string path, Encoding encoding) : this(path, encoding, true, DefaultByteBufferLength) { } public CharStream(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks) : this(path, encoding, detectEncodingFromByteOrderMarks, DefaultByteBufferLength) { } public CharStream(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int byteBufferLength) { if (encoding == null) throw new ArgumentNullException("encoding"); Stream stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, FileOptions.SequentialScan); try { StreamConstructorContinue(stream, false, encoding, detectEncodingFromByteOrderMarks, byteBufferLength); _Name = path; } catch { stream.Dispose(); throw; } } public CharStream(Stream stream, Encoding encoding) : this(stream, false, encoding, true, DefaultByteBufferLength) { } public CharStream(Stream stream, bool leaveOpen, Encoding encoding) : this(stream, leaveOpen, encoding, true, DefaultByteBufferLength) { } public CharStream(Stream stream, bool leaveOpen, Encoding encoding, bool detectEncodingFromByteOrderMarks) : this(stream, leaveOpen, encoding, detectEncodingFromByteOrderMarks, DefaultByteBufferLength) { } public CharStream(Stream stream, bool leaveOpen, Encoding encoding, bool detectEncodingFromByteOrderMarks, int byteBufferLength) { if (stream == null) throw new ArgumentNullException("stream"); if (!stream.CanRead) throw new ArgumentException("stream is not readable"); if (encoding == null) throw new ArgumentNullException("encoding"); StreamConstructorContinue(stream, leaveOpen, encoding, detectEncodingFromByteOrderMarks, byteBufferLength); } private void StreamConstructorContinue(Stream stream, bool leaveOpen, Encoding encoding, bool detectEncodingFromByteOrderMarks, int byteBufferLength) { // the ByteBuffer must be larger than the longest detectable preamble if (byteBufferLength < MinimumByteBufferLength) byteBufferLength = MinimumByteBufferLength; int remainingBytesCount = -1; long streamPosition; if (stream.CanSeek) { streamPosition = stream.Position; long remainingBytesCount64 = stream.Length - streamPosition; if (remainingBytesCount64 <= Int32.MaxValue) { remainingBytesCount = (int)remainingBytesCount64; if (remainingBytesCount < byteBufferLength) byteBufferLength = remainingBytesCount; } } else { streamPosition = 0; } // byteBufferLength should be larger than the longest detectable preamble byte[] byteBuffer = new byte[byteBufferLength]; int byteBufferCount = 0; bool flush = false; do { int n = stream.Read(byteBuffer, byteBufferCount, byteBuffer.Length - byteBufferCount); if (n == 0) { remainingBytesCount = byteBufferCount; flush = true; break; } byteBufferCount += n; } while (byteBufferCount < MinimumByteBufferLength); streamPosition += byteBufferCount; int preambleLength = Text.DetectPreamble(byteBuffer, byteBufferCount, ref encoding, detectEncodingFromByteOrderMarks); remainingBytesCount -= preambleLength; Encoding = encoding; _Line = 1; if (remainingBytesCount != 0) { int charBufferLength = encoding.GetMaxCharCount(byteBufferLength); // might throw char[] charBuffer = new char[charBufferLength]; int stringBufferCapacity = 2*charBufferLength; if (remainingBytesCount > 0) { try { stringBufferCapacity = encoding.GetMaxCharCount(remainingBytesCount); // might throw } catch (ArgumentOutOfRangeException) { } } var sb = new StringBuilder(stringBufferCapacity); var decoder = encoding.GetDecoder(); Debug.Assert(preambleLength < byteBufferCount); int byteBufferIndex = preambleLength; for (;;) { try { int charBufferCount = decoder.GetChars(byteBuffer, byteBufferIndex, byteBufferCount - byteBufferIndex, charBuffer, 0, flush); sb.Append(charBuffer, 0, charBufferCount); } catch (DecoderFallbackException e) { e.Data.Add("Stream.Position", streamPosition - (byteBufferCount - byteBufferIndex) + e.Index); throw; } if (flush) break; byteBufferIndex = 0; byteBufferCount = stream.Read(byteBuffer, 0, byteBuffer.Length); streamPosition += byteBufferCount; flush = byteBufferCount == 0; } String = sb.ToString(); if (!leaveOpen) stream.Dispose(); } else { String = ""; } if (String.Length != 0) { // Idx = 0 IndexEnd = String.Length; } else { Idx = Int32.MinValue; // IndexEnd = 0 } } /// The low trust version of the CharStream class implements the IDisposable /// interface only for API compatibility. The Dispose method does not need to be called on /// low trust CharStream instances, because the instances hold no resources that need to be disposed. public void Dispose() {} [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Reliability", "CA2000:Dispose objects before losing scope", Justification="The CharStream is manually disposed.")] public static T ParseString(string chars, int index, int length, FSharpFunc,T> parser, TUserState userState, string streamName) { var stream = new CharStream(chars, index, length); stream.UserState = userState; stream._Name = streamName; return parser.Invoke(stream); } public void Seek(long index) { long idx = unchecked(index - StringToStreamIndexOffset); if (idx >= IndexBegin && idx < IndexEnd) { Idx = (int)idx; ++StateTag; return; } if (index < IndexOfFirstChar) throw (new ArgumentOutOfRangeException("index", "The index is negative or less than the IndexOfFirstChar.")); ++StateTag; Idx = Int32.MinValue; } [DebuggerBrowsable(DebuggerBrowsableState.Never)] public CharStreamIndexToken IndexToken { get { return new CharStreamIndexToken( #if DEBUG this, #endif Idx ); } } private static void ThrowInvalidIndexToken() { throw new ArgumentException("The CharStreamIndexToken is invalid."); } public void Seek(CharStreamIndexToken indexToken) { int idx = indexToken.Idx; if (idx == -1) ThrowInvalidIndexToken(); // tests for zero-initialized IndexTokens #if DEBUG Debug.Assert(this == indexToken.CharStream); #endif Idx = idx; Debug.Assert((Idx >= IndexBegin && Idx < IndexEnd) || Idx == Int32.MinValue); ++StateTag; } public string ReadFrom(CharStreamIndexToken indexToken) { int idx = indexToken.Idx; if (idx == -1) ThrowInvalidIndexToken(); // tests for zero-initialized IndexTokens #if DEBUG Debug.Assert(this == indexToken.CharStream); #endif return ReadFrom(idx); } internal string ReadFrom(int idx0) { if (idx0 >= 0) { Debug.Assert(idx0 >= IndexBegin && idx0 < IndexEnd); if (idx0 <= Idx) return String.Substring(idx0, Idx - idx0); if (Idx < 0) return String.Substring(idx0, IndexEnd - idx0); } else { Debug.Assert(idx0 == Int32.MinValue); if (Idx < 0) return ""; } throw new ArgumentException("The current position of the stream must not lie before the position corresponding to the given CharStreamIndexToken/CharStreamState."); } public void RegisterNewline() { ++_Line; var index = Index; Debug.Assert(index != _LineBegin); _LineBegin = index; ++StateTag; } private void RegisterNewLineBegin(int stringLineBegin, int lineOffset) { Debug.Assert(lineOffset > 0 && ((Idx >= stringLineBegin && Idx < IndexEnd) || Idx == Int32.MinValue) && stringLineBegin >= IndexBegin && stringLineBegin <= IndexEnd); _Line += lineOffset; long newLineBegin = (uint)stringLineBegin + StringToStreamIndexOffset; Debug.Assert(newLineBegin > _LineBegin); _LineBegin = newLineBegin; ++StateTag; } public void RegisterNewlines(int lineOffset, int newColumnMinus1) { _Line += lineOffset; Debug.Assert(_Line > 0 && newColumnMinus1 >= 0); var newLineBegin = Index - newColumnMinus1; Debug.Assert(lineOffset != 0 && newLineBegin != _LineBegin); _LineBegin = Index - newColumnMinus1; ++StateTag; } public void RegisterNewlines(long lineOffset, long newColumnMinus1) { _Line += lineOffset; Debug.Assert(_Line > 0 && newColumnMinus1 >= 0); var newLineBegin = Index - newColumnMinus1; Debug.Assert(lineOffset != 0 && newLineBegin != _LineBegin); _LineBegin = Index - newColumnMinus1; ++StateTag; } public char Peek() { int idx = Idx; if (idx >= 0) return String[idx]; return EOS; } public void Skip() { int idx = Idx + 1; if (unchecked((uint)idx) < (uint)IndexEnd) { Idx = idx; ++StateTag; } else if (idx == IndexEnd) { Idx = Int32.MinValue; ++StateTag; } } public char Read() { int idx = Idx; if (idx >= 0) { char c = String[idx]; ++idx; if (idx == IndexEnd) idx = Int32.MinValue; Idx = idx; ++StateTag; return c; } return EOS; } public char SkipAndPeek() { int idx = Idx + 1; if (unchecked((uint)idx) < (uint)IndexEnd) { Idx = idx; ++StateTag; return String[idx]; } else if (idx == IndexEnd) { Idx = Int32.MinValue; ++StateTag; } return EOS; } public TwoChars Peek2() { int idx = Idx + 1; if (unchecked((uint)idx) < (uint)IndexEnd) return new TwoChars(String[idx - 1], String[idx]); else if (idx == IndexEnd) return new TwoChars(String[idx - 1], EOS); else return new TwoChars(EOS, EOS); } public char Peek(uint utf16Offset) { int n = unchecked((int)utf16Offset); if (n >= 0) { // utf16Offset <= Int32.MaxValue int idx = unchecked(Idx + n); if (unchecked((uint)idx) < (uint)IndexEnd) return String[idx]; } return EOS; } public void Skip(uint utf16Offset) { ++StateTag; int n = unchecked((int)utf16Offset); if (n >= 0) { // utf16Offset <= Int32.MaxValue int idx = unchecked(Idx + n); if (unchecked((uint)idx) < (uint)IndexEnd) { Idx = idx; return; } } Idx = Int32.MinValue; return; } public char SkipAndPeek(uint utf16Offset) { ++StateTag; int n = unchecked((int)utf16Offset); if (n >= 0) { // utf16Offset <= Int32.MaxValue int idx = unchecked(Idx + n); if (unchecked((uint)idx) < (uint)IndexEnd) { Idx = idx; return String[idx]; } } Idx = Int32.MinValue; return EOS; } public char Peek(int utf16Offset) { int idx = unchecked(Idx + utf16Offset); if (utf16Offset < 0) goto Negative; if (unchecked((uint)idx) >= (uint)IndexEnd) goto EndOfStream; ReturnChar: return String[idx]; Negative: if (Idx >= 0) { if (idx >= IndexBegin) goto ReturnChar; } else { idx = IndexEnd + utf16Offset; if (idx >= IndexBegin) goto ReturnChar; } EndOfStream: return EOS; } public void Skip(int utf16Offset) { ++StateTag; int idx = unchecked(Idx + utf16Offset); if (utf16Offset < 0) goto Negative; if (unchecked((uint)idx) >= (uint)IndexEnd) goto EndOfStream; Return: Idx = idx; return; Negative: if (Idx >= 0) { if (idx >= IndexBegin) goto Return; } else { idx = IndexEnd + utf16Offset; if (idx >= IndexBegin) goto Return; } --StateTag; throw new ArgumentOutOfRangeException("utf16Offset", "Index + utf16Offset is negative or less than the index of the first char in the CharStream."); EndOfStream: idx = Int32.MinValue; goto Return; } public void Skip(long utf16Offset) { if (unchecked((int)utf16Offset) == utf16Offset) { Skip((int)utf16Offset); } else { if (utf16Offset < 0) throw new ArgumentOutOfRangeException("utf16Offset", "Index + utf16Offset is negative or less than the index of the first char in the CharStream."); ++StateTag; Idx = Int32.MinValue; } } public char SkipAndPeek(int utf16Offset) { ++StateTag; int idx = unchecked(Idx + utf16Offset); if (utf16Offset < 0) goto Negative; if (unchecked((uint)idx) >= (uint)IndexEnd) goto EndOfStream; ReturnChar: Idx = idx; return String[idx]; Negative: if (Idx >= 0) { if (idx >= IndexBegin) goto ReturnChar; } else { idx = IndexEnd + utf16Offset; if (idx >= IndexBegin) goto ReturnChar; if (IndexBegin == IndexEnd) goto EndOfStream; } Idx = IndexBegin; return EOS; EndOfStream: Idx = Int32.MinValue; return EOS; } public string PeekString(int length) { if (length < 0) throw new ArgumentOutOfRangeException("length", "length is negative."); int idx = Idx; if (unchecked((uint)idx) + (uint)length <= (uint)IndexEnd) return String.Substring(idx, length); else return idx < 0 ? "" : String.Substring(idx, IndexEnd - idx); } public string Read(int length) { if (length < 0) throw new ArgumentOutOfRangeException("length", "length is negative."); ++StateTag; var idx = Idx; int newIdx = unchecked(idx + length); if (unchecked((uint)newIdx) < (uint)IndexEnd) { Idx = newIdx; return String.Substring(idx, length); } else { Idx = Int32.MinValue; return idx < 0 ? "" : String.Substring(idx, IndexEnd - idx); } } public int PeekString(char[] buffer, int bufferIndex, int length) { return Read(buffer, bufferIndex, length, true); } public int Read(char[] buffer, int bufferIndex, int length) { return Read(buffer, bufferIndex, length, false); } private int Read(char[] buffer, int bufferIndex, int length, bool backtrack) { if (bufferIndex < 0) throw new ArgumentOutOfRangeException("bufferIndex", "bufferIndex is negative."); if (length < 0 || bufferIndex > buffer.Length - length) throw new ArgumentOutOfRangeException("length", "bufferIndex or length is out of range."); if (unchecked((uint)Idx) + (uint)length < (uint)IndexEnd) { for (int i = 0; i < length; ++i) buffer[bufferIndex + i] = String[Idx + i]; if (!backtrack) { Idx += length; ++StateTag; } return length; } else if (Idx >= 0) { int n = IndexEnd - Idx; for (int i = 0; i < n; ++i) buffer[bufferIndex + i] = String[Idx + i]; if (!backtrack) { Idx = Int32.MinValue; ++StateTag; } return n; } else { return 0; } } public bool Match(char ch) { return Idx >= 0 && String[Idx] == ch; } public bool MatchCaseFolded(char caseFoldedChar) { return Idx >= 0 && CaseFoldTable.FoldedChars[String[Idx]] == caseFoldedChar; } public bool Skip(char ch) { int idx = Idx; if (idx >= 0 && String[idx] == ch) { ++idx; if (idx == IndexEnd) idx = Int32.MinValue; Idx = idx; ++StateTag; return true; } return false; } public bool SkipCaseFolded(char caseFoldedChar) { int idx = Idx; if (idx >= 0 && CaseFoldTable.FoldedChars[String[idx]] == caseFoldedChar) { ++idx; if (idx == IndexEnd) idx = Int32.MinValue; Idx = idx; ++StateTag; return true; } return false; } public bool Skip(TwoChars twoChars) { int idx2 = unchecked(Idx + 2); if (unchecked((uint)idx2) < (uint)IndexEnd) { if (String[Idx] == twoChars.Char0 && String[Idx + 1] == twoChars.Char1) { ++StateTag; Idx = idx2; return true; } } else if (idx2 == IndexEnd && String[Idx] == twoChars.Char0 && String[Idx + 1] == twoChars.Char1) { ++StateTag; Idx = Int32.MinValue; return true; } return false; } public bool Match(string chars) { if (unchecked((uint)Idx) + (uint)chars.Length <= (uint)IndexEnd) { for (int i = 0; i < chars.Length; ++i) if (chars[i] != String[Idx + i]) goto ReturnFalse; return true; } return chars.Length == 0; ReturnFalse: return false; } public bool Skip(string chars) { int newIdx = unchecked(Idx + chars.Length); if (unchecked((uint)newIdx) <= (uint)IndexEnd) { for (int i = 0; i < chars.Length; ++i) if (chars[i] != String[Idx + i]) goto ReturnFalse; if (newIdx == IndexEnd) newIdx = Int32.MinValue; Idx = newIdx; ++StateTag; return true; } return chars.Length == 0; ReturnFalse: return false; } public bool MatchCaseFolded(string caseFoldedChars) { if (unchecked((uint)Idx) + (uint)caseFoldedChars.Length <= (uint)IndexEnd) { for (int i = 0; i < caseFoldedChars.Length; ++i) if (caseFoldedChars[i] != CaseFoldTable.FoldedChars[String[Idx + i]]) goto ReturnFalse; return true; } return caseFoldedChars.Length == 0; ReturnFalse: return false; } public bool SkipCaseFolded(string caseFoldedChars) { int newIdx = unchecked(Idx + caseFoldedChars.Length); if (unchecked((uint)newIdx) <= (uint)IndexEnd) { for (int i = 0; i < caseFoldedChars.Length; ++i) if (caseFoldedChars[i] != CaseFoldTable.FoldedChars[String[Idx + i]]) goto ReturnFalse; if (newIdx == IndexEnd) newIdx = Int32.MinValue; Idx = newIdx; ++StateTag; return true; } return caseFoldedChars.Length == 0; ReturnFalse: return false; } public bool Match(char[] chars, int charsIndex, int length) { return Skip(chars, charsIndex, length, true); } public bool Skip(char[] chars, int charsIndex, int length) { return Skip(chars, charsIndex, length, false); } private bool Skip(char[] chars, int charsIndex, int length, bool backtrackEvenIfCharsMatch) { if (charsIndex < 0) throw new ArgumentOutOfRangeException("charsIndex", "charsIndex is negative."); if (length < 0 || charsIndex > chars.Length - length) throw new ArgumentOutOfRangeException("length", "charsIndex or length is out of range."); int newIdx = unchecked(Idx + length); if (unchecked((uint)newIdx) <= (uint)IndexEnd) { for (int i = 0; i < length; ++i) if (chars[charsIndex + i] != String[Idx + i]) goto ReturnFalse; if (!backtrackEvenIfCharsMatch) { if (newIdx == IndexEnd) newIdx = Int32.MinValue; Idx = newIdx; ++StateTag; return true; } return true; } return length == 0; ReturnFalse: return false; } public Match Match(Regex regex) { if (Idx >= 0) return regex.Match(String, Idx, IndexEnd - Idx); return regex.Match(""); } public bool SkipWhitespace() { int lineBegin = 0; int lineOffset = 0; int idx = Idx; int end = IndexEnd; if (idx >= 0) { char c = String[idx]; ++idx; if (c > ' ') goto ReturnFalse; if (c == ' ') { if (idx != end && String[idx] > ' ') { Idx = idx; ++StateTag; return true; } goto Loop; } else { if (c == '\r') { if (idx != end && String[idx] == '\n') ++idx; } else if (c != '\n') goto CheckTab; if (idx != end && String[idx] > ' ') { Idx = idx; RegisterNewline(); return true; } goto Newline; CheckTab: if (c != '\t') goto ReturnFalse; goto Loop; } Newline: lineBegin = idx; ++lineOffset; Loop: for (;;) { if (idx != end) { c = String[idx]; ++idx; if (c != ' ') { if (c != '\t') { if (c == '\r') { if (idx != end && String[idx] == '\n') ++idx; goto Newline; } if (c == '\n') goto Newline; // end of whitespace --idx; break; } } } else { // end of stream, idx = Int32.MinValue; break; } } Idx = idx; if (lineOffset == 0) { ++StateTag; return true; } else { RegisterNewLineBegin(lineBegin, lineOffset); return true; } } ReturnFalse: return false; } public bool SkipUnicodeWhitespace() { int lineBegin = 0; int lineOffset = 0; int idx = Idx; int end = IndexEnd; if (idx >= 0) { char c = String[idx]; ++idx; if (c == ' ') goto Loop; if (!Text.IsWhitespace(c)) goto ReturnFalse; if (c <= '\r') { if (c == '\r') { if (idx != end && String[idx] == '\n') ++idx; } else if (c != '\n') goto Loop; } else if (c < '\u2028' ? c != '\u0085' : c > '\u2029') goto Loop; Newline: lineBegin = idx; ++lineOffset; Loop: for (;;) { if (idx != end) { c = String[idx]; ++idx; if (c != ' ') { if (Text.IsWhitespace(c)) { if (c <= '\r') { if (c == '\r') { if (idx != end && String[idx] == '\n') ++idx; goto Newline; } if (c == '\n') goto Newline; } else if (c < '\u2028' ? c == '\u0085' : c <= '\u2029') goto Newline; } else { // end of whitespace --idx; break; } } } else { // end of stream idx = Int32.MinValue; break; } } Idx = idx; if (lineOffset == 0) { ++StateTag; return true; } else { RegisterNewLineBegin(lineBegin, lineOffset); return true; } } ReturnFalse: return false; } public bool SkipNewline() { int idx = Idx; if (idx >= 0) { char c = String[idx]; ++idx; if (c == '\r') { if (idx != IndexEnd && String[idx] == '\n') ++idx; } else if (c != '\n') goto ReturnFalse; if (idx == IndexEnd) idx = Int32.MinValue; Idx = idx; RegisterNewline(); return true; } ReturnFalse: return false; } public bool SkipUnicodeNewline() { int idx = Idx; if (idx >= 0) { char c = String[idx]; ++idx; if (c < '\u0085') { if (c == '\r') { if (idx != IndexEnd && String[idx] == '\n') ++idx; } else if (c != '\n') goto ReturnFalse; } else if (c >= '\u2028' ? c > '\u2029' : c != '\u0085') goto ReturnFalse; if (idx == IndexEnd) idx = Int32.MinValue; Idx = idx; RegisterNewline(); return true; } ReturnFalse: return false; } public int SkipNewlineThenWhitespace(int powerOf2TabStopDistance, bool allowFormFeed) { int tabStopDistanceMinus1 = unchecked(powerOf2TabStopDistance - 1); if (powerOf2TabStopDistance <= 0 || (powerOf2TabStopDistance & tabStopDistanceMinus1) != 0) throw new ArgumentOutOfRangeException("powerOf2TabStopDistance", "powerOf2TabStopDistance must be a positive power of 2."); int lineBegin = 0; int lineOffset = 0; int idx = Idx; int indexEnd = IndexEnd; char c = '\u0000'; if (idx >= 0) c = String[idx]; ++idx; if (c == '\r') { if (idx != indexEnd && String[idx] == '\n') ++idx; } else if (c != '\n') { return -1; } Newline: lineBegin = idx; ++lineOffset; int ind = 0; for (;;) { if (idx != indexEnd) { c = String[idx]; ++idx; if (c == ' ') { ind = unchecked(ind + 1); if (ind >= 0) continue; // indentation has overflown, so put back ' ' and return ind = unchecked(ind - 1); } else if (c <= '\r') { if (c == '\r') { if (idx != indexEnd && String[idx] == '\n') ++idx; goto Newline; } if (c == '\n') goto Newline; if (c == '\t') { // ind = ind + tabStopDistance - ind%tabStopDistance int d = tabStopDistanceMinus1 - (ind & tabStopDistanceMinus1); ind = unchecked(ind + d + 1); if (ind >= 0) continue; // indentation has overflown, so put back '\t' and return ind = unchecked(ind - d - 1); } else if (c == '\f' && allowFormFeed) { ind = 0; continue; } } // end of indentation --idx; break; } else { // end of stream; idx = Int32.MinValue; break; } } Idx = idx; RegisterNewLineBegin(lineBegin, lineOffset); return ind; } public void SkipRestOfLine(bool skipNewline) { int idx = Idx; int indexEnd = IndexEnd; if (idx >= 0) { for (;;) { char c = String[idx]; if (c > '\r') { if (++idx == indexEnd) break; } else if (c != '\r' && c != '\n') { if (++idx == indexEnd) break; } else { if (!skipNewline) { if (idx != Idx) { Idx = idx; ++StateTag; } return; } else { ++idx; if (c == '\r' && idx != indexEnd && String[idx] == '\n') ++idx; if (idx == indexEnd) idx = Int32.MinValue; Idx = idx; RegisterNewline(); return; } } } // idx == indexEnd { Idx = Int32.MinValue; ++StateTag; } } } public string ReadRestOfLine(bool skipNewline) { int idx = Idx; int indexEnd = IndexEnd; if (idx >= 0) { for (;;) { char c = String[idx]; if (c > '\r') { if (++idx == indexEnd) break; } else if (c != '\r' && c != '\n') { if (++idx == indexEnd) break; } else { int idx0 = Idx; if (!skipNewline) { if (idx != idx0) { Idx = idx; ++StateTag; return String.Substring(idx0, idx - idx0); } else { return ""; } } else { var skippedString = idx == idx0 ? "" : String.Substring(idx0, idx - idx0); ++idx; if (c == '\r' && idx != indexEnd && String[idx] == '\n') ++idx; if (idx == indexEnd) idx = Int32.MinValue; Idx = idx; RegisterNewline(); return skippedString; } } } // idx == indexEnd { int idx0 = Idx; Idx = Int32.MinValue; ++StateTag; return String.Substring(idx0, indexEnd - idx0); } } return ""; } public char ReadCharOrNewline() { int idx = Idx; if (idx >= 0) { char c = String[idx]; ++idx; if (c != '\r') { if (c != '\n') { if (idx == IndexEnd) idx = Int32.MinValue; Idx = idx; ++StateTag; return c; } } else if (idx != IndexEnd && String[idx] == '\n') ++idx; if (idx == IndexEnd) idx = Int32.MinValue; Idx = idx; RegisterNewline(); return '\n'; } return EOS; } public int SkipCharsOrNewlines(int maxCharsOrNewlines) { if (maxCharsOrNewlines < 0) throw new ArgumentOutOfRangeException("maxCharsOrNewlines", "maxCharsOrNewlines is negative."); int lineBegin = 0; int lineOffset = 0; int nCRLF = 0; int idx = Idx; if (idx >= 0 && maxCharsOrNewlines > 0) { uint end2 = (uint)idx + (uint)maxCharsOrNewlines; int end = end2 > (uint)IndexEnd ? IndexEnd : (int)end2; for (;;) { if (idx >= end) break; char c = String[idx]; ++idx; if (c <= '\r') { if (c == '\r') { if (idx != IndexEnd && String[idx] == '\n') { ++idx; ++nCRLF; if (end != IndexEnd) ++end; } } else if (c != '\n') continue; lineBegin = idx; ++lineOffset; } } int count = idx - Idx - nCRLF; if (idx == IndexEnd) idx = Int32.MinValue; Idx = idx; if (lineOffset == 0) ++StateTag; else RegisterNewLineBegin(lineBegin, lineOffset); return count; } return 0; } public string ReadCharsOrNewlines(int maxCharsOrNewlines, bool normalizeNewlinesInReturnString) { if (maxCharsOrNewlines < 0) throw new ArgumentOutOfRangeException("maxCharsOrNewlines", "maxCharsOrNewlines is negative."); int lineBegin = 0; int lineOffset = 0; int nCRLF = 0; int nCR = 0; int idx = Idx; int indexEnd = IndexEnd; if (idx >= 0 && maxCharsOrNewlines > 0) { uint end2 = (uint)idx + (uint)maxCharsOrNewlines; int end = end2 > (uint)indexEnd ? indexEnd : (int)end2; for (;;) { if (idx >= end) break; char c = String[idx]; ++idx; if (c <= '\r') { if (c == '\r') { if (idx != indexEnd && String[idx] == '\n') { ++idx; ++nCRLF; if (end != indexEnd) ++end; } else { ++nCR; } } else if (c != '\n') continue; lineBegin = idx; ++lineOffset; } } int idx0 = Idx; int length = idx - idx0; if (idx == IndexEnd) idx = Int32.MinValue; Idx = idx; if (lineOffset == 0) { ++StateTag; return String.Substring(idx0, length); } else { RegisterNewLineBegin(lineBegin, lineOffset); return !normalizeNewlinesInReturnString || (nCR | nCRLF) == 0 ? String.Substring(idx0, length) : Text.CopyWithNormalizedNewlines(String, idx0, length, nCRLF, nCR); } } return ""; } public int SkipCharsOrNewlinesWhile(Microsoft.FSharp.Core.FSharpFunc f) { return SkipCharsOrNewlinesWhile(f, f); } public int SkipCharsOrNewlinesWhile(Microsoft.FSharp.Core.FSharpFunc f1, Microsoft.FSharp.Core.FSharpFunc f) { int lineOffset = 0; int nCRLF = 0; int lineBegin = 0; int idx = Idx; int end = IndexEnd; if (idx >= 0) { char c = String[idx]; ++idx; if (c > '\r') { if (!f1.Invoke(c)) goto ReturnEmpty; } else if (c == '\r') { if (!f1.Invoke('\n')) goto ReturnEmpty; if (idx != end && String[idx] == '\n') { ++idx; ++nCRLF; } lineBegin = idx; ++lineOffset; } else { if (!f1.Invoke(c)) goto ReturnEmpty; if (c == '\n') { lineBegin = idx; ++lineOffset; } } for (;;) { if (idx == end) goto ReturnCount; c = String[idx]; ++idx; if (c > '\r') { if (!f.Invoke(c)) break; } else if (c == '\r') { if (!f.Invoke('\n')) break; if (idx != end && String[idx] == '\n') { ++idx; ++nCRLF; } lineBegin = idx; ++lineOffset; } else { if (!f.Invoke(c)) break; if (c == '\n') { lineBegin = idx; ++lineOffset; } } } --idx; ReturnCount: int count = idx - Idx - nCRLF; if (idx == IndexEnd) idx = Int32.MinValue; Idx = idx; if (lineOffset == 0) ++StateTag; else RegisterNewLineBegin(lineBegin, lineOffset); return count; } ReturnEmpty: return 0; } public string ReadCharsOrNewlinesWhile(Microsoft.FSharp.Core.FSharpFunc f, bool normalizeNewlines) { return ReadCharsOrNewlinesWhile(f, f, normalizeNewlines); } public string ReadCharsOrNewlinesWhile(Microsoft.FSharp.Core.FSharpFunc f1, Microsoft.FSharp.Core.FSharpFunc f, bool normalizeNewlinesInReturnString) { int lineOffset = 0; int nCR = 0; int nCRLF = 0; int lineBegin = 0; int idx = Idx; int indexEnd = IndexEnd; if (idx >= 0) { char c = String[idx]; ++idx; if (c > '\r') { if (!f1.Invoke(c)) goto ReturnEmpty; } else if (c == '\r') { if (!f1.Invoke('\n')) goto ReturnEmpty; if (idx != indexEnd && String[idx] == '\n') { ++idx; ++nCRLF; } else { ++nCR; } lineBegin = idx; ++lineOffset; } else { if (!f1.Invoke(c)) goto ReturnEmpty; if (c == '\n') { lineBegin = idx; ++lineOffset; } } for (;;) { if (idx == indexEnd) goto ReturnString; c = String[idx]; ++idx; if (c > '\r') { if (!f.Invoke(c)) break; } else if (c == '\r') { if (!f.Invoke('\n')) break; if (idx != indexEnd && String[idx] == '\n') { ++idx; ++nCRLF; } else { ++nCR; } lineBegin = idx; ++lineOffset; } else { if (!f.Invoke(c)) break; if (c == '\n') { lineBegin = idx; ++lineOffset; } } } --idx; ReturnString: int idx0 = Idx; int length = idx - idx0; if (idx == indexEnd) idx = Int32.MinValue; Idx = idx; if (lineOffset == 0) { ++StateTag; return String.Substring(idx0, length); } else { RegisterNewLineBegin(lineBegin, lineOffset); return !normalizeNewlinesInReturnString || (nCR | nCRLF) == 0 ? String.Substring(idx0, length) : Text.CopyWithNormalizedNewlines(String, idx0, length, nCRLF, nCR); } } ReturnEmpty: return ""; } public int SkipCharsOrNewlinesWhile(Microsoft.FSharp.Core.FSharpFunc f, int minCharsOrNewlines, int maxCharsOrNewlines) { return SkipCharsOrNewlinesWhile(f, f, minCharsOrNewlines, maxCharsOrNewlines); } public int SkipCharsOrNewlinesWhile(Microsoft.FSharp.Core.FSharpFunc f1, Microsoft.FSharp.Core.FSharpFunc f, int minCharsOrNewlines, int maxCharsOrNewlines) { if (maxCharsOrNewlines < 0) throw new ArgumentOutOfRangeException("maxCharsOrNewlines", "maxCharsOrNewlines is negative."); int lineBegin = 0; int lineOffset = 0; int nCRLF = 0; int idx = Idx; int indexEnd = IndexEnd; if (idx >= 0 && maxCharsOrNewlines > 0) { uint end2 = (uint)idx + (uint)maxCharsOrNewlines; int end = end2 > (uint)indexEnd ? indexEnd : (int)end2; char c = String[idx]; ++idx; if (c > '\r') { if (!f1.Invoke(c)) goto ReturnEmpty; } else if (c == '\r') { if (!f1.Invoke('\n')) goto ReturnEmpty; if (idx != indexEnd && String[idx] == '\n') { ++idx; ++nCRLF; if (end != indexEnd) ++end; } lineBegin = idx; ++lineOffset; } else { if (!f1.Invoke(c)) goto ReturnEmpty; if (c == '\n') { lineBegin = idx; ++lineOffset; } } for (;;) { if (idx >= end) goto ReturnCount; c = String[idx]; ++idx; if (c > '\r') { if (!f.Invoke(c)) break; } else if (c == '\r') { if (!f.Invoke('\n')) break; if (idx != indexEnd && String[idx] == '\n') { ++idx; ++nCRLF; if (end != indexEnd) ++end; } lineBegin = idx; ++lineOffset; } else { if (!f.Invoke(c)) break; if (c == '\n') { lineBegin = idx; ++lineOffset; } } } --idx; ReturnCount: int count = idx - Idx - nCRLF; if (count >= minCharsOrNewlines) { if (idx == indexEnd) idx = Int32.MinValue; Idx = idx; if (lineOffset == 0) ++StateTag; else RegisterNewLineBegin(lineBegin, lineOffset); return count; } } ReturnEmpty: return 0; } public string ReadCharsOrNewlinesWhile(Microsoft.FSharp.Core.FSharpFunc f, int minCharsOrNewlines, int maxCharsOrNewlines, bool normalizeNewlinesInReturnString) { return ReadCharsOrNewlinesWhile(f, f, minCharsOrNewlines, maxCharsOrNewlines, normalizeNewlinesInReturnString); } public string ReadCharsOrNewlinesWhile(Microsoft.FSharp.Core.FSharpFunc f1, Microsoft.FSharp.Core.FSharpFunc f, int minCharsOrNewlines, int maxCharsOrNewlines, bool normalizeNewlinesInReturnString) { if (maxCharsOrNewlines < 0) throw new ArgumentOutOfRangeException("maxCharsOrNewlines", "maxCharsOrNewlines is negative."); int lineBegin = 0; int lineOffset = 0; int nCRLF = 0; int nCR = 0; int idx = Idx; int indexEnd = IndexEnd; if (idx >= 0 && maxCharsOrNewlines > 0) { uint end2 = (uint)idx + (uint)maxCharsOrNewlines; int end = end2 > (uint)indexEnd ? indexEnd : (int)end2; char c = String[idx]; ++idx; if (c > '\r') { if (!f1.Invoke(c)) goto ReturnEmpty; } else if (c == '\r') { if (!f1.Invoke('\n')) goto ReturnEmpty; if (idx != indexEnd && String[idx] == '\n') { ++idx; ++nCRLF; if (end != indexEnd) ++end; } else { ++nCR; } lineBegin = idx; ++lineOffset; } else { if (!f1.Invoke(c)) goto ReturnEmpty; if (c == '\n') { lineBegin = idx; ++lineOffset; } } for (;;) { if (idx >= end) goto ReturnString; c = String[idx]; ++idx; if (c > '\r') { if (!f.Invoke(c)) break; } else if (c == '\r') { if (!f.Invoke('\n')) break; if (idx != indexEnd && String[idx] == '\n') { ++idx; ++nCRLF; if (end != indexEnd) ++end; } else { ++nCR; } lineBegin = idx; ++lineOffset; } else { if (!f.Invoke(c)) break; if (c == '\n') { lineBegin = idx; ++lineOffset; } } } --idx; ReturnString: int idx0 = Idx; int length = idx - idx0; if (length - nCRLF >= minCharsOrNewlines) { if (idx == indexEnd) idx = Int32.MinValue; Idx = idx; if (lineOffset == 0) { ++StateTag; return String.Substring(idx0, length); } else { RegisterNewLineBegin(lineBegin, lineOffset); return !normalizeNewlinesInReturnString || (nCR | nCRLF) == 0 ? String.Substring(idx0, length) : Text.CopyWithNormalizedNewlines(String, idx0, length, nCRLF, nCR); } } } ReturnEmpty: return ""; } private static bool RestOfStringEquals(string str1, int str1Index, string str2) { for (int i1 = str1Index + 1, i2 = 1; i2 < str2.Length; ++i1, ++i2) { if (str1[i1] != str2[i2]) goto ReturnFalse; } return true; ReturnFalse: return false; } private static bool RestOfStringEqualsCI(string str1, int str1Index, string cfStr2) { char[] cftable = CaseFoldTable.FoldedChars; for (int i1 = str1Index + 1, i2 = 1; i2 < cfStr2.Length; ++i1, ++i2) { if (cftable[str1[i1]] != cfStr2[i2]) goto ReturnFalse; } return true; ReturnFalse: return false; } public int SkipCharsOrNewlinesUntilString(string str, int maxCharsOrNewlines, out bool foundString) { if (str.Length == 0) throw new ArgumentException("The string argument is empty."); if (maxCharsOrNewlines < 0) throw new ArgumentOutOfRangeException("maxCharsOrNewlines", "maxCharsOrNewlines is negative."); // The .NET 64-bit JIT emits inefficient code in the loop if we declare first as as char variable. int first = str[0]; int lineBegin = 0; int lineOffset = 0; int nCRLF = 0; int idx = Idx; int indexEnd = IndexEnd; int end1 = indexEnd - str.Length; if (idx >= 0) { uint end2 = (uint)idx + (uint)maxCharsOrNewlines; int end = end2 > (uint)indexEnd ? indexEnd : (int)end2; for (;;) { if (idx < end) { char c = String[idx]; if (c != first) { ++idx; if (c > '\r' || c == '\t') continue; } else { if (idx <= end1 && RestOfStringEquals(String, idx, str)) { foundString = true; break; } ++idx; if (c > '\r') continue; } if (c == '\r') { if (idx != indexEnd && String[idx] == '\n') { ++idx; ++nCRLF; if (end != indexEnd) ++end; } } else if (c != '\n') continue; lineBegin = idx; ++lineOffset; } else { foundString = idx <= end1 && String[idx] == first && RestOfStringEquals(String, idx, str); break; } } if (idx != Idx) { int count = idx - Idx - nCRLF; if (idx == indexEnd) idx = Int32.MinValue; Idx = idx; if (lineOffset == 0) ++StateTag; else RegisterNewLineBegin(lineBegin, lineOffset); return count; } } else { foundString = false; } return 0; } public int SkipCharsOrNewlinesUntilString(string str, int maxCharsOrNewlines, bool normalizeNewlinesInOutString, out string skippedCharsIfStringFoundOtherwiseNull) { if (maxCharsOrNewlines < 0) throw new ArgumentOutOfRangeException("maxCharsOrNewlines", "maxCharsOrNewlines is negative."); if (str.Length == 0) throw new ArgumentException("The string argument is empty."); // The .NET 64-bit JIT emits inefficient code in the loop if we declare first as as char variable. int first = str[0]; int lineBegin = 0; int lineOffset = 0; int nCRLF = 0; int nCR = 0; int idx = Idx; int end1 = IndexEnd - str.Length; if (idx >= 0) { uint end2 = (uint)idx + (uint)maxCharsOrNewlines; int end = end2 > (uint)IndexEnd ? IndexEnd : (int)end2; for (;;) { if (idx < end) { char c = String[idx]; if (c != first) { ++idx; if (c > '\r' || c == '\t') continue; } else { if (idx <= end1 && RestOfStringEquals(String, idx, str)) break; ++idx; if (c > '\r') continue; } if (c == '\r') { if (idx != IndexEnd && String[idx] == '\n') { ++idx; ++nCRLF; if (end != IndexEnd) ++end; } else { ++nCR; } } else if (c != '\n') continue; lineBegin = idx; ++lineOffset; } else { if (idx <= end1 && String[idx] == first && RestOfStringEquals(String, idx, str)) break; // string not found skippedCharsIfStringFoundOtherwiseNull = null; if (idx != Idx) { int count = idx - Idx - nCRLF; if (idx == IndexEnd) idx = Int32.MinValue; Idx = idx; if (lineOffset == 0) ++StateTag; else RegisterNewLineBegin(lineBegin, lineOffset); return count; } return 0; } } // found string int idx0 = Idx; int length = idx - idx0; if (length != 0) { Idx = idx; if (lineOffset == 0) { ++StateTag; skippedCharsIfStringFoundOtherwiseNull = String.Substring(idx0, length); return length; } else { RegisterNewLineBegin(lineBegin, lineOffset); skippedCharsIfStringFoundOtherwiseNull = !normalizeNewlinesInOutString || (nCR | nCRLF) == 0 ? String.Substring(idx0, length) : Text.CopyWithNormalizedNewlines(String, idx0, length, nCRLF, nCR); return length - nCRLF; } } else { skippedCharsIfStringFoundOtherwiseNull = ""; } } else { skippedCharsIfStringFoundOtherwiseNull = null; } return 0; } public int SkipCharsOrNewlinesUntilCaseFoldedString(string caseFoldedString, int maxCharsOrNewlines, out bool foundString) { if (maxCharsOrNewlines < 0) throw new ArgumentOutOfRangeException("maxCharsOrNewlines", "maxCharsOrNewlines is negative."); if (caseFoldedString.Length == 0) throw new ArgumentException("The string argument is empty."); // The .NET 64-bit JIT emits inefficient code in the loop if we declare first as as char variable. int first = caseFoldedString[0]; int lineBegin = 0; int lineOffset = 0; int nCRLF = 0; int idx = Idx; int end1 = IndexEnd - caseFoldedString.Length; char[] cftable = CaseFoldTable.FoldedChars; if (idx >= 0) { uint end2 = (uint)idx + (uint)maxCharsOrNewlines; int end = end2 > (uint)IndexEnd ? IndexEnd : (int)end2; for (;;) { if (idx < end) { char c = cftable[String[idx]]; if (c != first) { ++idx; if (c > '\r' || c == '\t') continue; } else { if (idx <= end1 && RestOfStringEqualsCI(String, idx, caseFoldedString)) { foundString = true; break; } ++idx; if (c > '\r') continue; } if (c == '\r') { if (idx != IndexEnd && String[idx] == '\n') { ++idx; ++nCRLF; if (end != IndexEnd) ++end; } } else if (c != '\n') continue; lineBegin = idx; ++lineOffset; } else { foundString = idx <= end1 && cftable[String[idx]] == first && RestOfStringEqualsCI(String, idx, caseFoldedString); break; } } if (idx != Idx) { int count = idx - Idx - nCRLF; if (idx == IndexEnd) idx = Int32.MinValue; Idx = idx; if (lineOffset == 0) ++StateTag; else RegisterNewLineBegin(lineBegin, lineOffset); return count; } } else { foundString = false; } return 0; } public int SkipCharsOrNewlinesUntilCaseFoldedString(string caseFoldedString, int maxCharsOrNewlines, bool normalizeNewlinesInOutString, out string skippedCharsIfStringFoundOtherwiseNull) { if (maxCharsOrNewlines < 0) throw new ArgumentOutOfRangeException("maxCharsOrNewlines", "maxCharsOrNewlines is negative."); if (caseFoldedString.Length == 0) throw new ArgumentException("The string argument is empty."); // The .NET 64-bit JIT emits inefficient code in the loop if we declare first as as char variable. int first = caseFoldedString[0]; int lineBegin = 0; int lineOffset = 0; int nCRLF = 0; int nCR = 0; int idx = Idx; int end1 = IndexEnd - caseFoldedString.Length; char[] cftable = CaseFoldTable.FoldedChars; if (idx >= 0) { uint end2 = (uint)idx + (uint)maxCharsOrNewlines; int end = end2 > (uint)IndexEnd ? IndexEnd : (int)end2; for (;;) { if (idx < end) { char c = cftable[String[idx]]; if (c != first) { ++idx; if (c > '\r' || c == '\t') continue; } else { if (idx <= end1 && RestOfStringEqualsCI(String, idx, caseFoldedString)) break; ++idx; if (c > '\r') continue; } if (c == '\r') { if (idx != IndexEnd && String[idx] == '\n') { ++idx; ++nCRLF; if (end != IndexEnd) ++end; } else { ++nCR; } } else if (c != '\n') continue; lineBegin = idx; ++lineOffset; } else { if (idx <= end1 && cftable[String[idx]] == first && RestOfStringEqualsCI(String, idx, caseFoldedString)) break; // string not found skippedCharsIfStringFoundOtherwiseNull = null; if (idx != Idx) { int count = idx - Idx - nCRLF; if (idx == IndexEnd) idx = Int32.MinValue; Idx = idx; if (lineOffset == 0) ++StateTag; else RegisterNewLineBegin(lineBegin, lineOffset); return count; } return 0; } } // found string int idx0 = Idx; int length = idx - idx0; if (length != 0) { Idx = idx; if (lineOffset == 0) { ++StateTag; skippedCharsIfStringFoundOtherwiseNull = String.Substring(idx0, length); return length; } else { RegisterNewLineBegin(lineBegin, lineOffset); skippedCharsIfStringFoundOtherwiseNull = !normalizeNewlinesInOutString || (nCR | nCRLF) == 0 ? String.Substring(idx0, length) : Text.CopyWithNormalizedNewlines(String, idx0, length, nCRLF, nCR); return length - nCRLF; } } else { skippedCharsIfStringFoundOtherwiseNull = ""; } } else { skippedCharsIfStringFoundOtherwiseNull = null; } return 0; } } // class CharStream public struct CharStreamState { #if DEBUG internal readonly CharStream CharStream; private long Index { get { return GetIndex(CharStream); } } #endif internal readonly int Idx; #if SMALL_STATETAG public readonly uint Tag; #else public readonly ulong Tag; #endif public readonly long Line; public readonly long LineBegin; public readonly TUserState UserState; public readonly string Name; public CharStreamState(CharStream charStream) { #if DEBUG CharStream = charStream; #endif Idx = charStream.Idx; Tag = charStream.StateTag; Line = charStream._Line; LineBegin = charStream._LineBegin; UserState = charStream._UserState; Name = charStream._Name; } private static void ThrowInvalidState() { throw new InvalidOperationException("The CharStreamState is invalid."); } public CharStreamIndexToken IndexToken { get { if (Line <= 0) ThrowInvalidState(); // tests for a zero-initialized state return new CharStreamIndexToken( #if DEBUG CharStream, #endif Idx); } } public long GetIndex(CharStream charStreamFromWhichStateWasRetrieved) { if (Line <= 0) ThrowInvalidState(); // tests for a zero-initialized state #if DEBUG Debug.Assert(CharStream == charStreamFromWhichStateWasRetrieved); #endif return charStreamFromWhichStateWasRetrieved.GetIndex(Idx); } public Position GetPosition(CharStream charStreamFromWhichStateWasRetrieved) { if (Line <= 0) ThrowInvalidState(); // tests for a zero-initialized state #if DEBUG Debug.Assert(CharStream == charStreamFromWhichStateWasRetrieved); #endif long index = charStreamFromWhichStateWasRetrieved.GetIndex(Idx); return new Position(Name, index, Line, index - LineBegin + 1); } } /// Provides read‐access to a sequence of UTF‐16 chars. public sealed class CharStream : CharStream { internal CharStream(string chars) : base(chars) {} public CharStream(string chars, int index, int length) : base(chars, index, length) {} public CharStream(string chars, int index, int length, long streamBeginIndex) : base(chars, index, length, streamBeginIndex) {} public CharStream(string path, Encoding encoding) : base(path, encoding) {} public CharStream(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks) : base(path, encoding, detectEncodingFromByteOrderMarks) {} public CharStream(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int byteBufferLength) : base(path, encoding, detectEncodingFromByteOrderMarks, byteBufferLength) {} public CharStream(Stream stream, Encoding encoding) : base(stream, encoding) {} public CharStream(Stream stream, bool leaveOpen, Encoding encoding) : base(stream, leaveOpen, encoding) {} public CharStream(Stream stream, bool leaveOpen, Encoding encoding, bool detectEncodingFromByteOrderMarks) : base(stream, leaveOpen, encoding, detectEncodingFromByteOrderMarks) {} public CharStream(Stream stream, bool leaveOpen, Encoding encoding, bool detectEncodingFromByteOrderMarks, int byteBufferLength) : base(stream, leaveOpen, encoding, detectEncodingFromByteOrderMarks, byteBufferLength) {} internal TUserState _UserState; public TUserState UserState { get { return _UserState; } set { _UserState = value; ++StateTag; } } [DebuggerBrowsable(DebuggerBrowsableState.Never)] public CharStreamState State { get { return new CharStreamState(this); } } private static void ThrowInvalidState() { throw new ArgumentException("The CharStreamState is invalid."); } public void BacktrackTo(CharStreamState state) { BacktrackTo(ref state); } public void BacktrackTo(ref CharStreamState state) { if (state.Line <= 0) ThrowInvalidState(); // tests for zero-initialized states #if DEBUG Debug.Assert(this == state.CharStream); #endif Idx = state.Idx; Debug.Assert((Idx >= IndexBegin && Idx < IndexEnd) || Idx == Int32.MinValue); StateTag = state.Tag; _Line = state.Line; _LineBegin = state.LineBegin; _UserState = state.UserState; _Name = state.Name; } public string ReadFrom(CharStreamState stateWhereStringBegins, bool normalizeNewlines) { return ReadFrom(ref stateWhereStringBegins, normalizeNewlines); } public string ReadFrom(ref CharStreamState state, bool normalizeNewlines) { if (state.Line <= 0) ThrowInvalidState(); // tests for zero-initialized states #if DEBUG Debug.Assert(this == state.CharStream); #endif var str = ReadFrom(state.Idx); if (!normalizeNewlines || state.Line == _Line) return str; return Text.NormalizeNewlines(str); } public CharStream CreateSubstream(CharStreamState stateWhereSubstreamBegins) { return CreateSubstream(ref stateWhereSubstreamBegins); } public CharStream CreateSubstream(ref CharStreamState stateWhereSubstreamBegins) { if (stateWhereSubstreamBegins.Line <= 0) ThrowInvalidState(); // tests for zero-initialized states #if DEBUG Debug.Assert(this == stateWhereSubstreamBegins.CharStream); #endif int idx0 = stateWhereSubstreamBegins.Idx; if (unchecked((uint)idx0 > (uint)Idx)) throw new ArgumentException("The current position of the stream must not lie before the position corresponding to the given CharStreamState."); var subStream = new CharStream(String); subStream._Name = stateWhereSubstreamBegins.Name; subStream.Idx = idx0 == Idx ? Int32.MinValue : idx0; subStream.IndexBegin = idx0 < 0 ? IndexEnd : idx0; subStream.IndexEnd = Idx < 0 ? IndexEnd : Idx; subStream.StringToStreamIndexOffset = StringToStreamIndexOffset; subStream._Line = stateWhereSubstreamBegins.Line; subStream._LineBegin = stateWhereSubstreamBegins.LineBegin; return subStream; } } } #endif ================================================ FILE: FParsecCS/Cloning.cs ================================================ // Copyright (c) Stephan Tolksdorf 2010-2011 // License: Simplified BSD License. See accompanying documentation. #if !LOW_TRUST using System; using System.Collections.Generic; using System.Reflection; using System.Runtime.Serialization; using System.Diagnostics; using System.Reflection.Emit; namespace FParsec.Cloning { // The classes in this namespace provide a cloning service based on the serialization API. // Capturing the state of an object and/or cloning it with this API is often at least an // order of magnitude faster than doing the same with the BinaryFormatter and a MemoryStream // (ignoring some initial setup costs and the JITing time). // Some implementation details: // // The serialization API of the BCL (as supported by the BinaryFormatter) spans several // interfaces, classes and attributes under the System.Runtime.Serialization namespace. // Unfortunately the publicly available API documentation provided by Microsoft does not // sufficiently cover certain important details of the serialization API behaviour. // // For example, the documentation only vaguely discusses whether serialization events // like OnDeserialized are invoked in a certain order on multiple objects in a graph. // It seems that many API users intuitively expect a certain ordering, at least in // simple cases, but it is also clear that an ordering can not be guaranteed in all // cases (e.g. in the case of a cyclic object graph). // // The .NET BinaryFormatter seems to attempt to invoke the deserialization events // on dependent objects first, but its behaviour is inconsistent and in some situations // arguably buggy. The following bug report discusses these issues in more detail: // https://connect.microsoft.com/VisualStudio/feedback/details/549277 // // For the sake of compatibility with the .NET BinaryFormatter we try to mimic the // basic principles of its behaviour. However, we certainly do not try to copy every // bug or inconsistency. // // In order to be on the safe side we sort the serialized object graph topologically // if it contains objects implementing an OnDeserialized handler or the ISerializable // or IObjectReferenence interfaces. Since the object graph can contain cycles, we // first identify strongly connected components using a variant of Tarjan's algorithm. // Any OnDeserializing handler, deserialization constructor, OnDeserialized handler or // IObjectReference.GetRealObject method (in that order) is then invoked in the // topological order starting with the most dependent objects. Objects in a strongly // connected component (with more than 1 object) are processed in the reverse order // in which the objects in the component where discovered during a depth-first search // of the serialized object graph. In a first pass the OnDeserializing handlers and // deserialization constructors of the objects in the component are invoked. Any // OnDeserialized handlers are then invoked in a second pass. // OnSerializing handlers are invoked immediately before an object is serialized. // OnSerialized handlers are invoked in an undefined order at the end of the // serialization job (not immediately after an object's subgraph has been serialized). // // We only allow an object implementing IObjectReference in a cycle of the serialized // object graph under the following conditions (which are more restrictive than // what the .NET BinaryFormatter enforces): // - There may only be 1 object implementing IObjectReference in a cycle. // - The type implementing IObjectReference must not be a value type. // - All objects containing references to the IObjectReference object must have reference types. // - The type implementing IObjectReference must not have any OnDeserialized handler. // - There must not be any other object in the cycle implementing ISerializable. // // Similar to the .NET BinaryFormatter we delay all IDeserializationCallbacks until // the end of the deserialization of the complete object graph (not just the relevant // subgraph). As explained in the referenced Connect bug report this behaviour has some // severe consequences for the usefulness of IDeserializationCallbacks and the // composability of the whole serialization API. However, for compatibility we really // have to stick to Microsoft's design, even if in our case it would actually // be simpler to invoke the callbacks in topological order as soon as an object's // subgraph (and its strongly connected component) is completely deserialized. // // If the serialized object graph contains unboxed value type instances, any event // handlers are invoked on boxed copies as follows: // OnSerializing and OnSerialized handlers are not called on the original value type // instance (which can be a field or an array element), but on a boxed copy of the // instance. Thus, if the handler mutates the instance, the changes do not show up in the // object graph that was serialized, though changes made by OnSerializing (but not // OnSerialized) will show up in the deserialized object graph. This behaviour // simplifies the implementation and is in accordance with the behaviour of the // .NET BinaryFormatter. // OnDeserializing and OnDeserialized handlers are invoked on a boxed value type instance // too, but this time any changes show up in the deserialized object graph, because // the boxed instance is copied into the deserialized object graph after the // OnDeserialized event (at least if the instance is not part of an object cycle). // This deviates from the BinaryFormatter behaviour in that // the BinaryFormatter seems to copy the boxed instance into the deserialized object // graph before the OnDeserialized event. However, since mutating the instance // in an OnDeserialized handler has no effect when using the BinaryFormatter, // hopefully no one causes an incompatibility with this implementation by actually trying // to mutate the instance. (Note that mutable structs with serialization event handlers // are extremely rare anyway). // An IDeserializationCallback.OnDeserialization handler is invoked on the boxed instance // after it has been copied into the deserialized object graph (and then is not copied // again), so any changes won't show up in the deserialized unboxed value type instance // (this holds for both the .NET BinaryFormatter and this implementation). /// Contains the serialized state of on object. public abstract class CloneImage { /// Deserializes the object state into a new object. public abstract object CreateClone(); internal CloneImage() {} } public abstract class Cloner { // public interface public readonly Type Type; /// Returns a cloner for the given run-time type. /// The run-time type of the objects to clone. The type must be serializable. public static Cloner Create(Type type) { lock (Cache) return CreateWithoutLock(type); } /// Copies the given object using the serialization API. /// The object to clone. instance.GetType() must equal the Type the Cloner was created for. public object Clone(object instance) { return CaptureImage(instance, false).CreateClone(); } /// Returns an image of the given object instance. /// The object to capture an image of. public CloneImage CaptureImage(object instance) { return CaptureImage(instance, true); } // internal/protected interface private readonly CloneEventHandlers EventHandlers; private Cloner(Type type, CloneEventHandlers eventHandlers) { Type = type; EventHandlers = eventHandlers; } internal abstract State CaptureShallowStateAndEnqueueNestedState(object value, CaptureContext captureContext); internal sealed class CaptureContext { public readonly bool IsReturnedToUser; public CaptureContext(bool stateIsReturnedToUser) { IsReturnedToUser = stateIsReturnedToUser; } // currently uses a static queue, but could easily be rewritten to use an instance queue public int GetObjectIndex(object instance, Cloner cloner) { Debug.Assert(instance.GetType() == cloner.Type); int objectIndex; if (!ObjectIndices.TryGetValue(instance, out objectIndex)) { objectIndex = ObjectIndices.Count; ObjectIndices.Add(instance, objectIndex); var item = new WorkItem{Cloner = cloner, Instance = instance}; WorkQueue.Enqueue(item); } return objectIndex; } } // internal interface internal abstract class State { /// May be null. public readonly CloneEventHandlers EventHandlers; /// Indices of nested objects in the object graph. May be null. public readonly int[] ObjectIndices; /// May be null. public int[] StronglyConnectedComponent; public abstract Type Type { get; } public abstract object CreateUninitializedObject(); public abstract void WriteToUninitializedObject(object instance, object[] objectGraph); public State(CloneEventHandlers eventHandlers, int[] objectIndices) { EventHandlers = eventHandlers; ObjectIndices = objectIndices; } private State() {} public static readonly State Dummy = new DummyState(); private sealed class DummyState : State { public override Type Type { get { throw new NotImplementedException(); } } public override object CreateUninitializedObject() { throw new NotImplementedException(); } public override void WriteToUninitializedObject(object instance, object[] objectGraph) { throw new NotImplementedException(); } } } private static readonly StreamingContext StreamingContext = new StreamingContext(StreamingContextStates.Clone); private static readonly FormatterConverter FormatterConverter = new FormatterConverter(); private static readonly Func CloneMemberwise = CreateMemberwiseCloneDelegate(); private static Func CreateMemberwiseCloneDelegate() { var dynamicMethod = new DynamicMethod("InvokeMemberwiseClone", typeof(object), new Type[]{typeof(object)}, true); var ilg = dynamicMethod.GetILGenerator(); ilg.Emit(OpCodes.Ldarg_0); var method = typeof(object).GetMethod("MemberwiseClone", BindingFlags.NonPublic | BindingFlags.Instance); ilg.EmitCall(OpCodes.Call, method, null); // non-virtual call ilg.Emit(OpCodes.Ret); return (Func)dynamicMethod.CreateDelegate(typeof(Func)); } // private data and methods // Cache serves as the synchronization root for the Create and CaptureImage methods private static readonly Dictionary Cache = new Dictionary(); private static Cloner CreateWithoutLock(Type type) { Cloner cloner; if (Cache.TryGetValue(type, out cloner)) return cloner; if (!type.IsSerializable) throw new SerializationException("The type '" + type.ToString() + "' is not marked as serializable."); if (!type.IsArray) { var eventHandlers = CloneEventHandlers.Create(type); if (eventHandlers != null && (eventHandlers.Events & CloneEvents.ISerializable) != 0) { cloner = new CustomSerializationCloner(type, eventHandlers); } else { bool typeIsBlittable; var fields = GetSerializedFields(type, out typeIsBlittable); if (typeIsBlittable && (eventHandlers == null || (eventHandlers.Events & CloneEvents.OnDeserializing) == 0)) cloner = new BlittableCloner(type, eventHandlers, fields); else cloner = new NativeSerializationCloner(type, eventHandlers, fields); } } else { // array var elementType = type.GetElementType(); if (elementType.IsPrimitive || elementType == typeof(string)) { cloner = new BlittableCloner(type, null, new FieldInfo[0]); } else { var elementCloner = CreateWithoutLock(elementType); if (elementType.IsValueType && elementCloner is BlittableCloner) cloner = new BlittableCloner(type, null, new FieldInfo[0]); else if (type.GetArrayRank() == 1) cloner = new Rank1ArrayCloner(type, elementCloner); else cloner = new RankNArrayCloner(type, elementCloner); } } Cache.Add(type, cloner); return cloner; } // for optimization purposes CaptureImage uses some static queues private sealed class PhyiscalEqualityObjectComparer : System.Collections.Generic.EqualityComparer { public override bool Equals(object x, object y) { return x == y; } public override int GetHashCode(object obj) { return System.Runtime.CompilerServices.RuntimeHelpers.GetHashCode(obj); } } private static readonly Dictionary ObjectIndices = new Dictionary(new PhyiscalEqualityObjectComparer()); private static readonly List States = new List(); private static readonly Queue WorkQueue = new Queue(); private static readonly List OnSerializedList = new List(); private static readonly List ObjectReferenceList = new List(); private struct WorkItem { public Cloner Cloner; public object Instance; public WorkItem(Cloner cloner, object instance) { Cloner = cloner; Instance = instance; } } private struct OnSerializedListItem { public CloneEventHandlers EventHandlers; public object Instance; public OnSerializedListItem(CloneEventHandlers cloneEventHandlers, object instance) { EventHandlers = cloneEventHandlers; Instance = instance; } } private static bool Contains(int[] arrayOrNull, int element) { if (arrayOrNull != null) { foreach (var e in arrayOrNull) if (e == element) return true; } return false; } private CloneImage CaptureImage(object instance, bool imageIsReturnedToUser) { if (instance.GetType() != Type) throw new ArgumentException("The object instance does not have the run-time type the Cloner was created for."); lock (Cache) { try { bool needSort = false; // reserve 0-index spot ObjectIndices.Add(State.Dummy, 0); States.Add(null); var captureInfo = new CaptureContext(imageIsReturnedToUser); ObjectIndices.Add(instance, 1); WorkQueue.Enqueue(new WorkItem(this, instance)); int deserializationCallbackCount = 0; do { var item = WorkQueue.Dequeue(); var cloner = item.Cloner; if (cloner.EventHandlers == null) { States.Add(item.Cloner.CaptureShallowStateAndEnqueueNestedState(item.Instance, captureInfo)); } else if (cloner.EventHandlers.Events == CloneEvents.ISerializable) { States.Add(item.Cloner.CaptureShallowStateAndEnqueueNestedState(item.Instance, captureInfo)); needSort = true; } else { var eventHandlers = cloner.EventHandlers; if ((eventHandlers.Events & CloneEvents.OnSerializing) != 0) eventHandlers.InvokeOnSerializing(item.Instance, StreamingContext); if ((eventHandlers.Events & CloneEvents.OnSerialized) != 0) OnSerializedList.Add(new OnSerializedListItem(eventHandlers, item.Instance)); var state = item.Cloner.CaptureShallowStateAndEnqueueNestedState(item.Instance, captureInfo); States.Add(state); eventHandlers = state.EventHandlers; // may be different from cloner.EventHandlers (for CustomSerializationState) if ((eventHandlers.Events & ( CloneEvents.ISerializable | CloneEvents.OnDeserialized | CloneEvents.IObjectReference)) != 0) // { needSort = true; if ((eventHandlers.Events & CloneEvents.IObjectReference) != 0) ObjectReferenceList.Add(States.Count - 1); } // unfortunately the BinaryFormatter doesn't guarantee any order for IDeserializationCallbacks if ((eventHandlers.Events & (CloneEvents.IDeserializationCallback)) != 0) ++deserializationCallbackCount; } } while (WorkQueue.Count != 0); var states = States.ToArray(); if (OnSerializedList.Count != 0) { foreach (var item in OnSerializedList) item.EventHandlers.InvokeOnSerialized(item.Instance, StreamingContext); } if (!needSort) return new SimpleImage(states, deserializationCallbackCount); int[] order = ComputeTopologicalOrder(states); if (ObjectReferenceList.Count != 0) { foreach (var index1 in ObjectReferenceList) { var state1 = states[index1]; var scc = state1.StronglyConnectedComponent; if (scc == null) continue; var type1 = state1.Type; if (type1.IsValueType) throw new SerializationException("The serialized object graph contains a cycle that includes a value type object (type: "+ type1.FullName +") implementing IObjectReference."); if ((state1.EventHandlers.Events & CloneEvents.OnDeserialized) != 0) throw new SerializationException("The serialized object graph contains a cycle that includes an object (type: "+ type1.FullName +") implementing IObjectReference and also exposing an OnDeserialized handler."); foreach (var index2 in scc) { if (index2 == index1) continue; var state2 = states[index2]; var type2 = state2.Type; if (state2.EventHandlers != null && (state2.EventHandlers.Events & (CloneEvents.ISerializable | CloneEvents.IObjectReference)) != 0) { var msg = String.Format("The serialized object graph contains a cycle that includes an object (type: {0}) implementing IObjectReference and another object (type: {1}) implementing ISerializable and/or IObjectReference .", type1.FullName, type2.FullName); throw new SerializationException(msg); } if (type2.IsValueType && Contains(state2.ObjectIndices, index1)) { var msg = String.Format("The serialized object graph contains a cycle that includes a value type object (type: {0}) referencing an IObjectReference object (type: {1}) in the same cycle.", type2.FullName, type1.FullName); throw new SerializationException(msg); } } } } return new OrderedImage(states, order, deserializationCallbackCount); } finally { States.Clear(); ObjectIndices.Clear(); if (WorkQueue.Count != 0) WorkQueue.Clear(); if (OnSerializedList.Count != 0) OnSerializedList.Clear(); if (ObjectReferenceList.Count != 0) ObjectReferenceList.Clear(); } } } private sealed class BlittableCloner : Cloner { internal readonly FieldInfo[] SerializedFields; public BlittableCloner(Type type, CloneEventHandlers eventHandlers, FieldInfo[] serializedFields) : base(type, eventHandlers) { Debug.Assert(serializedFields != null); SerializedFields = serializedFields; } internal override State CaptureShallowStateAndEnqueueNestedState(object instance, CaptureContext captureContext) { Debug.Assert(Type == instance.GetType()); if (captureContext.IsReturnedToUser) { return new BlittableState(EventHandlers, CloneMemberwise(instance)); } else { return new BlittableState(EventHandlers, instance); } } } private sealed class BlittableState : State { private object Value; public BlittableState(CloneEventHandlers eventHandlers, object value) : base(eventHandlers, null) { Value = value; } public override Type Type { get { return Value.GetType(); } } public override object CreateUninitializedObject() { return Cloner.CloneMemberwise(Value); } public override void WriteToUninitializedObject(object instance, object[] objectGraph) { } } private sealed class Rank1ArrayCloner : Cloner { Cloner PreviousElementCloner; public Rank1ArrayCloner(Type type, Cloner elementCloner) : base(type, null) { PreviousElementCloner = elementCloner; } internal override State CaptureShallowStateAndEnqueueNestedState(object instance, CaptureContext captureContext) { Debug.Assert(Type == instance.GetType()); var array = (Array)instance; var lowerBound = array.GetLowerBound(0); var length = array.Length; // should throw an exception if length > Int32.MaxValue if (length == 0) return new BlittableState(null, instance); var throwExceptionOnOverflow = checked(lowerBound + length); var objectIndices = new int[length]; var cloner = PreviousElementCloner; var previousType = cloner.Type; for (int i = 0; i < length; ++i) { var value = array.GetValue(lowerBound + i); if (value != null) { var type = value.GetType(); if (type != previousType) { cloner = CreateWithoutLock(type); previousType = type; } objectIndices[i] = captureContext.GetObjectIndex(value, cloner); } } PreviousElementCloner = cloner; return new Rank1ArrayState(Type.GetElementType(), lowerBound, objectIndices); } } private sealed class Rank1ArrayState : State { private readonly Type ElementType; private readonly int LowerBound; public Rank1ArrayState(Type elementType, int lowerBound, int[] objectIndices) : base(null, objectIndices) { Debug.Assert(objectIndices != null); ElementType = elementType; LowerBound = lowerBound; } public override Type Type { get { return ElementType.MakeArrayType(); } } public override object CreateUninitializedObject() { if (LowerBound == 0) return Array.CreateInstance(ElementType, ObjectIndices.Length); else return Array.CreateInstance(ElementType, new int[]{ObjectIndices.Length}, new int[]{LowerBound}); } public override void WriteToUninitializedObject(object instance, object[] objectGraph) { var array = (Array)instance; var objectIndices = ObjectIndices; for (int i = 0; i < objectIndices.Length; ++i) { var objectIndex = objectIndices[i]; if (objectIndex == 0) continue; array.SetValue(objectGraph[objectIndex], LowerBound + i); } } } private sealed class RankNArrayCloner : Cloner { Cloner PreviousElementCloner; public RankNArrayCloner(Type type, Cloner elementCloner) : base(type, null) { PreviousElementCloner = elementCloner; } internal override State CaptureShallowStateAndEnqueueNestedState(object instance, CaptureContext captureContext) { Debug.Assert(Type == instance.GetType()); var array = (Array)instance; var rank = array.Rank; var lowerBounds = new int[rank]; var lengths = new int[rank]; var ends = new int[rank]; var numberOfElements = 1; for (int d = 0; d < rank; ++d) { var lowerBound = array.GetLowerBound(d); lowerBounds[d] = lowerBound; var length = array.GetLength(d); lengths[d] = length; ends[d] = checked(lowerBound + length); numberOfElements = checked(numberOfElements * length); } var objectIndices = new int[numberOfElements]; var cloner = PreviousElementCloner; var previousType = cloner.Type; var indices = (int[])lowerBounds.Clone(); for (int i = 0; i < numberOfElements; ++i) { var value = array.GetValue(indices); if (value != null) { var type = value.GetType(); if (type != previousType) { cloner = CreateWithoutLock(type); previousType = type; } objectIndices[i] = captureContext.GetObjectIndex(value, cloner); } // increment multi-dimensional index var d = rank - 1; do { if (++indices[d] < ends[d]) break; indices[d] = lowerBounds[d]; } while (--d >= 0); } PreviousElementCloner = cloner; return new RankNArrayState(Type.GetElementType(), lengths, lowerBounds, ends, objectIndices); } } private sealed class RankNArrayState : State { private readonly Type ElementType; private readonly int[] Lengths; private readonly int[] LowerBounds; private readonly int[] Ends; public RankNArrayState(Type elementType, int[] lengths, int[] lowerBounds, int[] ends, int[] objectIndices) : base(null, objectIndices) { Debug.Assert(lengths != null && lengths.Length == lowerBounds.Length && lengths.Length == ends.Length && objectIndices != null); ElementType = elementType; Lengths = lengths; LowerBounds = lowerBounds; Ends = ends; } public override Type Type { get { return ElementType.MakeArrayType(Lengths.Length); } } public override object CreateUninitializedObject() { return Array.CreateInstance(ElementType, Lengths, LowerBounds); } public override void WriteToUninitializedObject(object instance, object[] objectGraph) { var array = (Array)instance; var indices = (int[])LowerBounds.Clone(); foreach (var objectIndex in ObjectIndices) { if (objectIndex != 0) array.SetValue(objectGraph[objectIndex], indices); // increment multi-dimensional index var d = LowerBounds.Length - 1; do { if (++indices[d] < Ends[d]) break; indices[d] = LowerBounds[d]; } while (--d >= 0); } } } private sealed class NativeSerializationCloner : Cloner { internal readonly FieldInfo[] SerializedFields; private readonly Cloner[] Cloners; private Func FieldValuesGetter; // lazily initialized internal Action FieldValuesSetter; // lazily initialized public NativeSerializationCloner(Type type, CloneEventHandlers eventHandlers, FieldInfo[] serializedFields) : base(type, eventHandlers) { SerializedFields = serializedFields; Cloners = new Cloner[serializedFields.Length]; } internal override State CaptureShallowStateAndEnqueueNestedState(object instance, CaptureContext captureContext) { Debug.Assert(Type == instance.GetType()); if (SerializedFields.Length == 0) return new NativeSerializationState(this); var getter = FieldValuesGetter; if (getter == null) FieldValuesGetter = getter = CreateFieldValuesGetter(Type, SerializedFields); var values = getter(instance); // GetFieldValues(instance, SerializedFields); int[] objectIndices = new int[values.Length]; for (int i = 0; i < values.Length; ++i) { var value = values[i]; if (value == null) continue; var type = value.GetType(); if (type.IsPrimitive || type == typeof(string)) continue; values[i] = null; var cloner = Cloners[i]; if (cloner == null || type != cloner.Type) { cloner = CreateWithoutLock(type); Cloners[i] = cloner; } objectIndices[i] = captureContext.GetObjectIndex(value, cloner); } return new NativeSerializationState(this, values, objectIndices); } } private sealed class NativeSerializationState : State { private readonly NativeSerializationCloner Cloner; private readonly object[] Values; // maybe null if object has no fields public NativeSerializationState(NativeSerializationCloner cloner) : base(cloner.EventHandlers, null) { Cloner = cloner; } public NativeSerializationState(NativeSerializationCloner cloner, object[] values, int[] objectIndices) : base(cloner.EventHandlers, objectIndices) { Debug.Assert(cloner != null && values.Length != 0 && values.Length == objectIndices.Length); Cloner = cloner; Values = values; } public override Type Type { get { return Cloner.Type; } } public override object CreateUninitializedObject() { return FormatterServices.GetUninitializedObject(Cloner.Type); } public override void WriteToUninitializedObject(object instance, object[] objectGraph) { if (ObjectIndices == null) return; var setter = Cloner.FieldValuesSetter; if (setter == null) Cloner.FieldValuesSetter = setter = CreateFieldValuesSetter(Cloner.Type, Cloner.SerializedFields); setter(instance, Values, ObjectIndices, objectGraph); } } // NativeSerializationProxyState is used by CustomSerializationCloner to store the state of // proxy objects which don't implement ISerializable. private sealed class NativeSerializationProxyState : State { private readonly Type Type_; private readonly FieldInfo[] Fields; private readonly object[] Values; public NativeSerializationProxyState(Type type, CloneEventHandlers eventHandlers) : base(eventHandlers, null) { Type_ = type; } public NativeSerializationProxyState(Type type, CloneEventHandlers eventHandlers, FieldInfo[] fields, object[] values, int[] objectIndices) : base(eventHandlers, objectIndices) { Debug.Assert(fields.Length == values.Length && values.Length == objectIndices.Length); Type_ = type; Fields = fields; Values = values; } public override Type Type { get { return Type_; } } public override object CreateUninitializedObject() { return FormatterServices.GetUninitializedObject(Type_); } public override void WriteToUninitializedObject(object instance, object[] objectGraph) { if (ObjectIndices == null) return; // We can't use a NativeSerializationCloner.FieldValuesSetter here // because some primitive values might have a type different from the type of the field // they are assigned to. FieldInfo.SetValue does some automatic conversions in those // cases that the FieldValuesSetter doesn't (e.g. integer type widening). for (int i = 0; i < ObjectIndices.Length; ++i) { var objectIndex = ObjectIndices[i]; if (objectIndex == 0) { var value = Values[i]; if (value != null) Fields[i].SetValue(instance, value); } else { Fields[i].SetValue(instance, objectGraph[objectIndex]); } } } } private struct CustomSerializationMemberInfo { public string Name; public Type Type; public object Value; } private sealed class CustomSerializationCloner : Cloner { internal readonly ConstructorInfo Constructor; internal Action ConstructorCaller; // lazily initalized private Cloner PreviousProxyCloner; private Cloner[] Cloners; private static Type[] SerializableConstructorArgumentTypes = new Type[] {typeof(SerializationInfo), typeof(StreamingContext)}; public CustomSerializationCloner(Type type, CloneEventHandlers eventHandlers) : base(type, eventHandlers) { Constructor = type.GetConstructor(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance, null, SerializableConstructorArgumentTypes, null); PreviousProxyCloner = this; } internal override State CaptureShallowStateAndEnqueueNestedState(object instance, CaptureContext captureContext) { Debug.Assert(Type == instance.GetType()); var info = new SerializationInfo(Type, FormatterConverter); ((ISerializable)instance).GetObjectData(info, StreamingContext); var n = info.MemberCount; var members = new CustomSerializationMemberInfo[n]; var objectIndices = new int[n]; if (Cloners == null || Cloners.Length != n) Cloners = new Cloner[n]; var iter = info.GetEnumerator(); for (int i = 0; iter.MoveNext(); ++i) { var entry = iter.Current; members[i].Name = entry.Name; members[i].Type = entry.ObjectType; var value = entry.Value; if (value == null) continue; Type type = value.GetType(); if (type.IsPrimitive || type == typeof(string)) { members[i].Value = value; continue; } var cloner = Cloners[i]; if (cloner == null || type != cloner.Type) { cloner = CreateWithoutLock(type); Cloners[i] = cloner; } objectIndices[i] = captureContext.GetObjectIndex(value, cloner); } Type proxyType; if (!info.IsFullTypeNameSetExplicit && !info.IsAssemblyNameSetExplicit) { proxyType = info.ObjectType; } else { try { var assembly = Assembly.Load(info.AssemblyName); proxyType = assembly.GetType(info.FullTypeName, true); } catch (Exception e) { var msg = "Can not load the type '" + info.FullTypeName + "' in the assembly '" + info.AssemblyName + "'."; throw new SerializationException(msg, e); } } if (proxyType == Type) { if (Constructor == null) throw new SerializationException("The ISerializable type '" + Type.ToString() + "' does not define a proper deserialization constructor."); return new CustomSerializationState(this, members, objectIndices); } Cloner proxyCloner; if (proxyType == PreviousProxyCloner.Type) { proxyCloner = PreviousProxyCloner; } else { proxyCloner = CreateWithoutLock(proxyType); PreviousProxyCloner = proxyCloner; } if (proxyType.IsArray) { // On .NET a NullReferenceException is thrown on deserialization of an array type proxy. throw new SerializationException("The type '" + Type.ToString() + "' uses an array type ('" + proxyType.ToString() + "') as its serialization proxy type."); } CustomSerializationCloner csc = proxyCloner as CustomSerializationCloner; if (csc != null) { if (csc.Constructor == null) throw new SerializationException("The ISerializable type '" + csc.Type.ToString() + "' does not define a proper deserialization constructor."); return new CustomSerializationState(csc, members, objectIndices); } if (n == 0) return new NativeSerializationProxyState(proxyType, proxyCloner.EventHandlers); FieldInfo[] proxyFields; { var nsc = proxyCloner as NativeSerializationCloner; if (nsc != null) { proxyFields = nsc.SerializedFields; } else { var bc = proxyCloner as BlittableCloner; Debug.Assert(bc != null); proxyFields = bc.SerializedFields; } } // The BinaryFormatter on .NET simply assigns the values in the SerializationInfo // to the field with the same name (of the most derived class) in the proxy object. // There are no checks whether all fields are assigned values or whether the target has // multiple fields with the same name. The types are only checked once the values are // assigned to the proxy object fields. Integer types are automatically widened and // types are cast to base or interface types if necessary. var proxyValues = new object[proxyFields.Length]; var proxyObjectIndices = new int[proxyFields.Length]; for (int i = 0; i < n; ++i) { var name = members[i].Name; for (int j = 0; j < proxyFields.Length; ++j) { if (name == proxyFields[j].Name) { proxyValues[j] = members[i].Value; proxyObjectIndices[j] = objectIndices[i]; break; } } } return new NativeSerializationProxyState(proxyType, proxyCloner.EventHandlers, proxyFields, proxyValues, proxyObjectIndices); } } private sealed class CustomSerializationState : State { private readonly CustomSerializationCloner Cloner; private readonly CustomSerializationMemberInfo[] Members; public CustomSerializationState(CustomSerializationCloner cloner, CustomSerializationMemberInfo[] members, int[] objectIndices) : base(cloner.EventHandlers, objectIndices) { Cloner = cloner; Members = members; } public override Type Type { get { return Cloner.Type; } } public override object CreateUninitializedObject() { return FormatterServices.GetUninitializedObject(Cloner.Type); } public override void WriteToUninitializedObject(object instance, object[] objectGraph) { var info = new SerializationInfo(Cloner.Type, FormatterConverter); for (int i = 0; i < Members.Length; ++i) { var member = Members[i]; var index = ObjectIndices[i]; var value = index == 0 ? member.Value : objectGraph[index]; info.AddValue(member.Name, value, member.Type); } var constructorCaller = Cloner.ConstructorCaller; if (constructorCaller == null) Cloner.ConstructorCaller = constructorCaller = CreateISerializableConstructorCaller(Cloner.Constructor); constructorCaller(instance, info, StreamingContext); } } private sealed class SimpleImage : CloneImage { private readonly Cloner.State[] States; private readonly int DeserializationCallbackCount; internal SimpleImage(Cloner.State[] states, int deserializationCallbackCount) { Debug.Assert(states.Length > 1 && states[0] == null); States = states; DeserializationCallbackCount = deserializationCallbackCount; } public override object CreateClone() { int callbackIndicesIndex = DeserializationCallbackCount; int[] callbackIndices = DeserializationCallbackCount == 0 ? null : new int[DeserializationCallbackCount]; var objects = new object[States.Length]; // States[0] is null for (int i = 1; i < States.Length; ++i) objects[i] = States[i].CreateUninitializedObject(); for (int index = States.Length - 1; index != 0; --index) { var state = States[index]; var instance = objects[index]; var eventHandlers = state.EventHandlers; if (eventHandlers == null) { state.WriteToUninitializedObject(objects[index], objects); } else { var events = eventHandlers.Events; Debug.Assert((events & ( CloneEvents.ISerializable | CloneEvents.OnDeserialized | CloneEvents.IObjectReference)) == 0); if ((events & CloneEvents.OnDeserializing) != 0) eventHandlers.InvokeOnDeserializing(instance, Cloner.StreamingContext); if ((events & CloneEvents.IDeserializationCallback) != 0) callbackIndices[--callbackIndicesIndex] = index; state.WriteToUninitializedObject(instance, objects); } } if (callbackIndices != null) { Debug.Assert(callbackIndicesIndex == 0); foreach (var index in callbackIndices) ((IDeserializationCallback)objects[index]).OnDeserialization(null); } return objects[1]; } } private sealed class OrderedImage : CloneImage { private readonly Cloner.State[] States; private readonly int[] Order; private readonly int DeserializationCallbackCount; internal OrderedImage(Cloner.State[] states, int[] order, int deserializationCallbackCount) { Debug.Assert(states.Length > 1 && states.Length == order.Length && states[0] == null); States = states; Order = order; DeserializationCallbackCount = deserializationCallbackCount; } public static object GetRealObject(object instance) { var or = (IObjectReference)instance; instance = or.GetRealObject(Cloner.StreamingContext); if (instance != or) { or = instance as IObjectReference; int i = 0; while (or != null) { if (++i == 100) throw new SerializationException("An object's implementation of the IObjectReference interface returned too many nested references to other objects that implement IObjectReference."); instance = or.GetRealObject(Cloner.StreamingContext); if (instance == or) break; or = instance as IObjectReference; } if (instance == null) throw new SerializationException("An object's IObjectReference.GetRealObject implementation returned null."); } return instance; } public override object CreateClone() { int callbackIndicesIndex = DeserializationCallbackCount; object[] callbackObjects = DeserializationCallbackCount == 0 ? null : new object[DeserializationCallbackCount]; var objects = new object[States.Length]; for (int i = 1; i < States.Length; ++i) objects[i] = States[i].CreateUninitializedObject(); var delayedOnDeserializedEvents = new List(); int objectReferenceIndex = 0; object objectReference = null; int[] lastScc = null; for (int i = Order.Length - 1; i != 0; --i) { var index = Order[i]; var state = States[index]; var scc = state.StronglyConnectedComponent; if (scc != lastScc) { lastScc = scc; if (objectReference != null) { ReplaceObjectReferenceInSCCWithRealObject(objectReference, objectReferenceIndex, objects); objectReferenceIndex = 0; objectReference = null; } if (delayedOnDeserializedEvents.Count != 0) InvokeDelayedOnDeserializedEvents(delayedOnDeserializedEvents, objects); // also clears delayedOnDeserializedEvents if (scc != null) { foreach (var idx in scc) { var handlers = States[idx].EventHandlers; if (handlers != null && (handlers.Events & CloneEvents.IObjectReference) != 0) { objectReferenceIndex = idx; objectReference = objects[idx]; objects[idx] = null; // set to null until we call ReplaceObjectReferenceInSCCWithRealObject } } } } var instance = objects[index]; var eventHandlers = state.EventHandlers; if (eventHandlers == null) { state.WriteToUninitializedObject(instance, objects); } else { var events = eventHandlers.Events; if (instance != null) { if ((events & CloneEvents.OnDeserializing) != 0) eventHandlers.InvokeOnDeserializing(instance, Cloner.StreamingContext); state.WriteToUninitializedObject(instance, objects); if ((events & CloneEvents.OnDeserialized) != 0) { if (scc == null) eventHandlers.InvokeOnDeserialized(instance, Cloner.StreamingContext); else delayedOnDeserializedEvents.Add(index); } if ((events & CloneEvents.IObjectReference) != 0) { Debug.Assert(state.StronglyConnectedComponent == null); objects[index] = GetRealObject(instance); } } else { Debug.Assert(index == objectReferenceIndex); } // It's a pity we have to process the IDeserializationCallback separately // from OnDeserialized events to stay compatible with the .NET BinaryFormatter. if ((events & CloneEvents.IDeserializationCallback) != 0) callbackObjects[--callbackIndicesIndex] = instance ?? objectReference; } } if (objectReference != null) ReplaceObjectReferenceInSCCWithRealObject(objectReference, objectReferenceIndex, objects); if (delayedOnDeserializedEvents.Count != 0) InvokeDelayedOnDeserializedEvents(delayedOnDeserializedEvents, objects); if (callbackObjects != null) { Debug.Assert(callbackIndicesIndex == 0); // We call the callback in in the reverse topological order at the end of // deserialization, which is similar to what the BinaryFormatter does, unfortunately. foreach (var obj in callbackObjects) ((IDeserializationCallback)obj).OnDeserialization(null); } return objects[1]; } private void InvokeDelayedOnDeserializedEvents(List indices, object[] objects) { foreach (var index in indices) { var handlers = States[index].EventHandlers; handlers.InvokeOnDeserialized(objects[index], Cloner.StreamingContext); } indices.Clear(); } private void ReplaceObjectReferenceInSCCWithRealObject(object objectReference, int objectReferenceIndex, object[] objects) { var state = States[objectReferenceIndex]; var eventHandlers = state.EventHandlers; var events = eventHandlers.Events; if ((events & CloneEvents.OnDeserializing) != 0) eventHandlers.InvokeOnDeserializing(objectReference, Cloner.StreamingContext); state.WriteToUninitializedObject(objectReference, objects); Debug.Assert((events & CloneEvents.OnDeserialized) == 0); objects[objectReferenceIndex] = GetRealObject(objectReference); // set all references to real object foreach (var index2 in state.StronglyConnectedComponent) { if (index2 == objectReferenceIndex) continue; var state2 = States[index2]; Debug.Assert(state2.EventHandlers == null || (state2.EventHandlers.Events & (CloneEvents.ISerializable | CloneEvents.IObjectReference)) == 0); if (Cloner.Contains(state2.ObjectIndices, objectReferenceIndex)) { Debug.Assert(!state2.Type.IsValueType); state2.WriteToUninitializedObject(objects[index2], objects); // overwrite all fields } } } } /// Returns the public and non-public fields of the type (and its base types), /// except fields with the NonSerialized attribute. In the returned array fields from /// derived types come before fields from base types. internal static FieldInfo[] GetSerializedFields(Type type, out bool typeIsBlittable) { Debug.Assert(type.IsSerializable && !type.IsInterface); // We need the fields of the most derived type first, but GetFields returns the // field in an undefined order, so we have to climb the type hierarchy. var fields = type.GetFields(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.DeclaredOnly); bool isBlittable = true; int nonSerialized = 0; foreach (var f in fields) { if (f.IsNotSerialized) ++nonSerialized; var ft = f.FieldType; if (!ft.IsPrimitive && ft != typeof(string)) { if (!ft.IsValueType) isBlittable = false; else { bool fIsBlittable; GetSerializedFields(ft, out fIsBlittable); isBlittable &= fIsBlittable; } } } int numberOfBases = 0; var bt = type.BaseType; while (bt != null && bt != typeof(object)) { if (!bt.IsSerializable) throw new SerializationException(BaseTypeNotSerializableMessage(bt, type)); ++numberOfBases; bt = bt.BaseType; } if (numberOfBases == 0) { if (nonSerialized == 0) { typeIsBlittable = isBlittable; return fields; } else { typeIsBlittable = false; var serializedFields = new FieldInfo[fields.Length - nonSerialized]; int i = 0; foreach (var f in fields) if (!f.IsNotSerialized) serializedFields[i++] = f; return serializedFields; } } else { var baseFieldArrays = new FieldInfo[numberOfBases][]; bt = type.BaseType; for (int i = 0; i < numberOfBases; ++i, bt = bt.BaseType) { var baseFields = bt.GetFields(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.DeclaredOnly); foreach (var bf in baseFields) { if (bf.IsNotSerialized) ++nonSerialized; var bft = bf.FieldType; if (!bft.IsPrimitive && bft != typeof(string)) { if (!bft.IsValueType) isBlittable = false; else { bool bfIsBlittable; GetSerializedFields(bft, out bfIsBlittable); isBlittable &= bfIsBlittable; } } } baseFieldArrays[i] = baseFields; } typeIsBlittable = nonSerialized == 0 & isBlittable; var numberOfSerializedFields = fields.Length - nonSerialized; foreach (var baseFields in baseFieldArrays) numberOfSerializedFields += baseFields.Length; if (nonSerialized == 0 && numberOfSerializedFields == fields.Length) return fields; var combinedFields = new FieldInfo[numberOfSerializedFields]; if (nonSerialized == 0) { int i = 0; foreach (var f in fields) combinedFields[i++] = f; foreach (var baseFields in baseFieldArrays) foreach (var bf in baseFields) combinedFields[i++] = bf; } else { int i = 0; foreach (var f in fields) if (!f.IsNotSerialized) combinedFields[i++] = f; foreach (var baseFields in baseFieldArrays) foreach (var bf in baseFields) if (!bf.IsNotSerialized) combinedFields[i++] = bf; } return combinedFields; } } internal static string BaseTypeNotSerializableMessage(Type baseType, Type childType) { return "The serializable type '" + childType.ToString() + "' has a base type '" + baseType.ToString() + "' that is not serializable."; } /* private static object[] GetFieldValues(object instance, FieldInfo[] fields) { var values = new object[fields.Length]; for (int i = 0; i < fields.Length; ++i) { var f = fields[i]; values[i] = f.GetValue(instance); } return values; } */ internal static Func CreateFieldValuesGetter(Type type, FieldInfo[] fields) { if (fields.Length == 0) throw new ArgumentException("The fields array must be non-empty."); var dynamicMethod = new DynamicMethod("FieldValuesGetter", MethodAttributes.Public | MethodAttributes.Static, CallingConventions.Standard, typeof(object[]), new Type[]{typeof(object), typeof(object)}, type, true); var ilg = dynamicMethod.GetILGenerator(); var isValueType = type.IsValueType; // arg 0: dummy argument (makes delegate invocation faster) // arg 1: (boxed) object instance ilg.DeclareLocal(typeof(object[])); // local 0: the returned values array ilg.DeclareLocal(typeof(object)); // local 1: temporary object value // create the values array ilg.Emit(OpCodes.Ldc_I4, fields.Length); ilg.Emit(OpCodes.Newarr, typeof(object)); ilg.Emit(OpCodes.Stloc_0); // cast/unbox the object instace ilg.Emit(OpCodes.Ldarg_1); if (!isValueType) ilg.Emit(OpCodes.Castclass, type); else ilg.Emit(OpCodes.Unbox, type); // The unbox IL construction doesn't return a normal managed pointer // but a "controlled-mutability" managed pointer. Since there's no way // to declare a controlled-mutability managed pointer local and one // can't convert such a pointer into a normal managed pointer, we can't // store away the pointer for later field accesses. Instead we use // OpCodes.Dup to keep the pointer around. Alternatively we could copy // the value type instance onto the stack, but that can be costly for // large value types. for (int i = 0; i < fields.Length; ++i) { if (i + 1 != fields.Length) ilg.Emit(OpCodes.Dup); var field = fields[i]; ilg.Emit(OpCodes.Ldfld, field); if (field.FieldType.IsValueType) ilg.Emit(OpCodes.Box, field.FieldType); ilg.Emit(OpCodes.Stloc_1); // store object into result array ilg.Emit(OpCodes.Ldloc_0); ilg.Emit(OpCodes.Ldc_I4, i); ilg.Emit(OpCodes.Ldloc_1); ilg.Emit(OpCodes.Stelem_Ref); } ilg.Emit(OpCodes.Ldloc_0); ilg.Emit(OpCodes.Ret); return (Func)dynamicMethod.CreateDelegate(typeof(Func), null); } /* private static void SetFieldValues(FieldInfo[] fields, object instance, object[] values, int[] objectIndices, object[] objectGraph) { for (int i = 0; i < objectIndices.Length; ++i) { var objectIndex = ObjectIndices[i]; if (objectIndex == 0) fields[i].SetValue(instance, values[i]); else fields[i].SetValue(instance, objectGraph[objectIndex]); } } */ internal static Action CreateFieldValuesSetter(Type type, FieldInfo[] fields) { if (fields.Length == 0) throw new ArgumentException("The fields array must be non-empty."); // It is important that we use the 8 argument DynamicMethod constructor // to associate the method with the type, so that the method is allowed // to set readonly (initonly) fields. var dynamicMethod = new DynamicMethod("FieldValuesSetter", MethodAttributes.Public | MethodAttributes.Static, CallingConventions.Standard, null, new Type[]{typeof(object), typeof(object), typeof(object[]), typeof(int[]), typeof(object[])}, type, true); var ilg = dynamicMethod.GetILGenerator(); var isValueType = type.IsValueType; // arg0: dummy argument (makes delegate invocation faster) // arg1: (boxed) object instance // arg2: values array // arg3: objectIndices array // arg4: objectGraph array // local 0: object index ilg.DeclareLocal(typeof(int)); ilg.Emit(OpCodes.Ldarg_1); if (!isValueType) ilg.Emit(OpCodes.Castclass, type); else ilg.Emit(OpCodes.Unbox, type); // returns a controlled-mutability pointer // which we can't store in a local... for (int i = 0; i < fields.Length; ++i) { if (i + 1 != fields.Length) ilg.Emit(OpCodes.Dup); // ... so we use OpCodes.Dup to keep it around var field = fields[i]; // is field value an object in the object graph array? ilg.Emit(OpCodes.Ldarg_3); ilg.Emit(OpCodes.Ldc_I4, i); ilg.Emit(OpCodes.Ldelem, typeof(int)); ilg.Emit(OpCodes.Stloc_0); ilg.Emit(OpCodes.Ldloc_0); var label1 = ilg.DefineLabel(); ilg.Emit(OpCodes.Brtrue, label1); // load boxed value ilg.Emit(OpCodes.Ldarg_2); ilg.Emit(OpCodes.Ldc_I4, i); ilg.Emit(OpCodes.Ldelem, typeof(object)); var label2 = ilg.DefineLabel(); ilg.Emit(OpCodes.Br, label2); // load object graph array ilg.MarkLabel(label1); ilg.Emit(OpCodes.Ldarg, 4); ilg.Emit(OpCodes.Ldloc_0); ilg.Emit(OpCodes.Ldelem, typeof(object)); ilg.MarkLabel(label2); // store value into field if (field.FieldType != typeof(object)) ilg.Emit(OpCodes.Unbox_Any, field.FieldType); ilg.Emit(OpCodes.Stfld, field); } ilg.Emit(OpCodes.Ret); return (Action)dynamicMethod.CreateDelegate(typeof(Action), null); } internal static Action CreateISerializableConstructorCaller(ConstructorInfo constructor) { var type = constructor.DeclaringType; var dynamicMethod = new DynamicMethod("SerializableConstructorCaller", MethodAttributes.Public | MethodAttributes.Static, CallingConventions.Standard, null, new Type[]{typeof(object), typeof(object), typeof(SerializationInfo), typeof(StreamingContext)}, type, true); var ilg = dynamicMethod.GetILGenerator(); var isValueType = type.IsValueType; ilg.Emit(OpCodes.Ldarg_1); if (!isValueType) ilg.Emit(OpCodes.Castclass, type); else ilg.Emit(OpCodes.Unbox, type); ilg.Emit(OpCodes.Ldarg_2); ilg.Emit(OpCodes.Ldarg_3); ilg.Emit(OpCodes.Call, constructor); ilg.Emit(OpCodes.Ret); return (Action)dynamicMethod.CreateDelegate(typeof(Action), null); } // The following is a non-recursive implementation of David J. Pearce's improved // version of Tarjan's algorithm for finding the strongly connected components of // a directed graph, see http://homepages.ecs.vuw.ac.nz/~djp/files/P05.pdf // The straighforward recursive version is obviously more elegant, but the // non-recursive one has the principal advantage of not ending in a stack overflow // for large components. // (We test this version against the simpler one in CloningTests.fs, of course) // Due to the non-recursive implementation we can also exploit that part of // what would otherwise be the call stack can be shared with the stack used // for holding elements of identified components (see the last paragraph of // section 2 in the referenced paper). // For optimization purposes we use a static stack, which makes // FindStronglyConnectedComponents and ComputeTopologicalOrder not thread-safe. private static int[] TopoIndices = new int[8]; private static void GrowTopoIndices() { var newArray = new int[2*TopoIndices.Length]; TopoIndices.CopyTo(newArray, 0); TopoIndices = newArray; } private static int GrowTopoIndices(int splitIndex) { Debug.Assert(splitIndex >= 0 && splitIndex <= TopoIndices.Length); int n = TopoIndices.Length; var newArray = new int[2*n]; Array.Copy(TopoIndices, newArray, splitIndex); var newSplitIndex = 2*n; int d = n - splitIndex; if (d != 0) { newSplitIndex -= d; Array.Copy(TopoIndices, splitIndex, newArray, newSplitIndex, n - splitIndex); } TopoIndices = newArray; return newSplitIndex; } private static int[] TopoSubIndices = new int[8]; private static void GrowTopoSubIndices() { var newArray = new int[2*TopoSubIndices.Length]; TopoSubIndices.CopyTo(newArray, 0); TopoSubIndices = newArray; } /// Fills the Strongly StronglyConnectedComponent fields of the /// states passed in the array. Returns an array mapping each state to an /// integer component identifier. /// /// The object states to traverse. The object with array index /// 0 is ignored. All other objects are assumed to be reachable from the object /// with array index 1. internal static int[] FindStronglyConnectedComponents(State[] states) { Debug.Assert(states.Length > 1); int[] components = new int[states.Length]; // The path stack and the component stack are both stored in TopoIndices. // The path stack starts at the beginning of TopoIndices, while // the component stack starts at the end and progresses in reverse direction. int pathStackCount = 0; // number of elements in the path stack int componentStackIndex = TopoIndices.Length; // index of element last inserted into component stack int counter = 1; // in the paper this variable is called "index" int reverseCounter = states.Length - 1; // in the paper this variable is called "C" bool root = true; int objectIndex = 1; // states[1] is state for the root object, states[0] is null int subIndex = 0; var subObjectIndices = states[objectIndex].ObjectIndices; components[1] = counter; ++counter; if (subObjectIndices != null) { for (;;) { while (subIndex < subObjectIndices.Length) { var subObjectIndex = subObjectIndices[subIndex]; ++subIndex; if (subObjectIndex == 0) continue; var subObjectComponent = components[subObjectIndex]; if (subObjectComponent == 0) { var subSubObjectIndices = states[subObjectIndex].ObjectIndices; if (subSubObjectIndices == null) { components[subObjectIndex] = reverseCounter; --reverseCounter; } else { subObjectIndices = subSubObjectIndices; components[subObjectIndex] = counter; ++counter; TopoIndices[pathStackCount] = objectIndex; TopoSubIndices[pathStackCount] = root ? subIndex : -subIndex; root = true; objectIndex = subObjectIndex; subIndex = 0; ++pathStackCount; if (pathStackCount == componentStackIndex) componentStackIndex = GrowTopoIndices(componentStackIndex); if (pathStackCount == TopoSubIndices.Length) GrowTopoSubIndices(); continue; } } else if (subObjectComponent < components[objectIndex]) { components[objectIndex] = subObjectComponent; root = false; } } if (root) { if (componentStackIndex < TopoIndices.Length) { int component = components[objectIndex]; if (components[TopoIndices[componentStackIndex]] >= component) { int next = componentStackIndex + 1; while (next < TopoIndices.Length && components[TopoIndices[next]] >= component) ++next; int d = next - componentStackIndex; var scc = new int[d + 1]; for (int i = 0; i < d; ++i) { int idx = TopoIndices[componentStackIndex + i]; scc[1 + i] = idx; states[idx].StronglyConnectedComponent = scc; components[idx] = reverseCounter; --counter; } scc[0] = objectIndex; states[objectIndex].StronglyConnectedComponent = scc; componentStackIndex = next; } } components[objectIndex] = reverseCounter; --counter; --reverseCounter; if (pathStackCount == 0) break; } else { TopoIndices[--componentStackIndex] = objectIndex; // we never need to grow the TopoIndices array here // because we immediately decrement pathStackCount next } --pathStackCount; int subObjectComponent_ = components[objectIndex]; objectIndex = TopoIndices[pathStackCount]; subIndex = TopoSubIndices[pathStackCount]; if (subIndex > 0) { root = true; } else { subIndex = -subIndex; root = false; } subObjectIndices = states[objectIndex].ObjectIndices; if (subObjectComponent_ < components[objectIndex]) { components[objectIndex] = subObjectComponent_; root = false; } } } return components; } private static int[] SccIndexStack = new int[8]; private static void GrowSccIndexStack() { var newStack = new int[2*SccIndexStack.Length]; SccIndexStack.CopyTo(newStack, 0); SccIndexStack = newStack; } /// Returns an array with the topologically sorted indices of the states. /// In the returned array the indices of states belonging to the same strongly /// connected component are adjacent (but the order within a strongly connected /// component is undefined). /// /// The object states to traverse. The object with array index /// 0 is ignored. All other objects are assumed to be reachable from the object /// with array index 1. internal static int[] ComputeTopologicalOrder(State[] states) { Debug.Assert(states.Length > 1); // Fill the State.StronglyConnectedComponent fields. // (We don't need the returned array, so we can recycle it for our purposes.) int[] orderedObjectIndices = FindStronglyConnectedComponents(states); Array.Clear(orderedObjectIndices, 0, orderedObjectIndices.Length); int nextPosition = orderedObjectIndices.Length - 1; TopoIndices = new int[2]; TopoSubIndices = new int[2]; SccIndexStack = new int[2]; // We traverse the graph non-recursively in depth-first order. int topoStackCount = 0; int sccIndexStackCount = 0; int[] visitedBits = new int[(checked(states.Length + 31))/32]; int objectIndex; int[] subObjectIndices; { var state = states[1]; if (state.StronglyConnectedComponent == null) { objectIndex = 1; subObjectIndices = state.ObjectIndices; if (subObjectIndices == null) { orderedObjectIndices[1] = 1; return orderedObjectIndices; } visitedBits[0] = 1 << 1; } else { foreach (var sccIndex in state.StronglyConnectedComponent) visitedBits[sccIndex/32] |= 1 << (sccIndex%32); objectIndex = state.StronglyConnectedComponent[0]; subObjectIndices = states[objectIndex].ObjectIndices; SccIndexStack[0] = 1; sccIndexStackCount = 1; } } int subIndex = subObjectIndices.Length - 1; for (;;) { // First we iterate over the sub objects... Debug.Assert(subObjectIndices != null); // (The states array was constructed in breadth-first order, while we construct // the topological order using depth-first search. With a bit of luck we can // keep the resulting orderedObjectIndices close to a simple increasing sequence // by iterating over the sub-objects in the depth-first search in reverse order.) while (subIndex >= 0) { var subObjectIndex = subObjectIndices[subIndex]; --subIndex; if (subObjectIndex == 0) continue; int w = subObjectIndex/32, b = subObjectIndex%32; if (((visitedBits[w] >> b) & 1) == 0) { var subState = states[subObjectIndex]; var subSubObjectIndices = subState.ObjectIndices; if (subState.StronglyConnectedComponent == null) { visitedBits[w] |= 1 << b; if (subSubObjectIndices == null) { orderedObjectIndices[nextPosition] = subObjectIndex; --nextPosition; continue; } subObjectIndices = subSubObjectIndices; } else { foreach (var sccIndex in subState.StronglyConnectedComponent) visitedBits[sccIndex/32] |= 1 << (sccIndex%32); subObjectIndex = subState.StronglyConnectedComponent[0]; subObjectIndices = states[subObjectIndex].ObjectIndices; SccIndexStack[sccIndexStackCount] = 1; if (++sccIndexStackCount == SccIndexStack.Length) GrowSccIndexStack(); } TopoIndices[topoStackCount] = objectIndex; TopoSubIndices[topoStackCount] = subIndex; ++topoStackCount; if (topoStackCount == TopoIndices.Length) GrowTopoIndices(); if (topoStackCount == TopoSubIndices.Length) GrowTopoSubIndices(); objectIndex = subObjectIndex; subIndex = subObjectIndices.Length - 1; continue; } } // ... then we iterate over other object in the same strongly connected component. var scc = states[objectIndex].StronglyConnectedComponent; if (scc == null) { orderedObjectIndices[nextPosition] = objectIndex; --nextPosition; } else { Debug.Assert(sccIndexStackCount > 0); int sccIndex = SccIndexStack[sccIndexStackCount - 1]; if (sccIndex < scc.Length) { objectIndex = scc[sccIndex]; subObjectIndices = states[objectIndex].ObjectIndices; subIndex = subObjectIndices.Length - 1; SccIndexStack[sccIndexStackCount - 1] = ++sccIndex; continue; } --sccIndexStackCount; for (int i = scc.Length - 1; i >= 0; --i) { sccIndex = scc[i]; orderedObjectIndices[nextPosition] = sccIndex; --nextPosition; } } if (topoStackCount == 0) break; --topoStackCount; objectIndex = TopoIndices[topoStackCount]; subIndex = TopoSubIndices[topoStackCount]; subObjectIndices = states[objectIndex].ObjectIndices; } return orderedObjectIndices; } } [Flags] internal enum CloneEvents { None = 0, OnSerializing = 1, OnSerialized = 2, OnDeserializing = 4, OnDeserialized = 8, ISerializable = 16, IDeserializationCallback = 32, IObjectReference = 64 } internal sealed class CloneEventHandlers { public readonly CloneEvents Events; private delegate void Handler(object instance, StreamingContext context); private readonly Handler OnSerializingHandler; private readonly Handler OnSerializedHandler; private readonly Handler OnDeserializingHandler; private readonly Handler OnDeserializedHandler; private CloneEventHandlers(CloneEvents events, Handler onSerializingHandler, Handler onSerializedHandler, Handler onDeserializingHandler, Handler onDeserializedHandler) { Events = events; OnSerializingHandler = onSerializingHandler; OnSerializedHandler = onSerializedHandler; OnDeserializingHandler = onDeserializingHandler; OnDeserializedHandler = onDeserializedHandler; } public void InvokeOnSerializing(object instance, StreamingContext context) { OnSerializingHandler.Invoke(instance, context); } public void InvokeOnSerialized(object instance, StreamingContext context) { OnSerializedHandler.Invoke(instance, context); } public void InvokeOnDeserializing(object instance, StreamingContext context) { OnDeserializingHandler.Invoke(instance, context); } public void InvokeOnDeserialized(object instance, StreamingContext context) { OnDeserializedHandler.Invoke(instance, context); } private static readonly CloneEventHandlers ISerializableOnly = new CloneEventHandlers(CloneEvents.ISerializable, null, null, null, null); private static readonly CloneEventHandlers ISerializableAndObjectReferenceOnly = new CloneEventHandlers(CloneEvents.ISerializable | CloneEvents.IObjectReference, null, null, null, null); private static Handler WithBoxedArgument(Action handler) { return (object obj, StreamingContext context) => handler((T)obj, context); } private static readonly MethodInfo WithBoxedArgumentMethodInfo = typeof(CloneEventHandlers).GetMethod("WithBoxedArgument", BindingFlags.Static | BindingFlags.NonPublic); private static Handler CreateHandler(Type type, MethodInfo mi) { var delegateType = typeof(Action<,>).MakeGenericType(type, typeof(StreamingContext)); var d = Delegate.CreateDelegate(delegateType, null, mi); return (Handler)WithBoxedArgumentMethodInfo.MakeGenericMethod(type).Invoke(null, new object[]{d}); } private static readonly Type typeofObject = typeof(object); private static readonly Type typeofISerializable = typeof(ISerializable); private static readonly Type typeofIObjectReference = typeof(IObjectReference); private static readonly Type typeofIDeserializationCallback = typeof(IDeserializationCallback); private static readonly Type typeofOnSerializingAttribute = typeof(OnSerializingAttribute); private static readonly Type typeofOnSerializedAttribute = typeof(OnSerializedAttribute); private static readonly Type typeofOnDeserializingAttribute = typeof(OnDeserializingAttribute); private static readonly Type typeofOnDeserializedAttribute = typeof(OnDeserializedAttribute); public static CloneEventHandlers Create(Type type) { Debug.Assert(type != null); if (type == typeofObject) return null; var events = CloneEvents.None; if (typeofISerializable.IsAssignableFrom(type)) events |= CloneEvents.ISerializable; if (typeofIObjectReference.IsAssignableFrom(type)) events |= CloneEvents.IObjectReference; if (typeofIDeserializationCallback.IsAssignableFrom(type)) events |= CloneEvents.IDeserializationCallback; var bt = type; for (;;) { var methods = bt.GetMethods(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.DeclaredOnly); for (int i = 0; i < methods.Length; ++i) { var mi = methods[i]; if ( mi.IsDefined(typeofOnSerializingAttribute, false) || mi.IsDefined(typeofOnSerializedAttribute, false) || mi.IsDefined(typeofOnDeserializingAttribute, false) || mi.IsDefined(typeofOnDeserializedAttribute, false)) return CreateContinue(type, events, bt, methods, i); } bt = bt.BaseType; if (bt == null || bt == typeofObject) break; if (!bt.IsSerializable) throw new SerializationException(Cloner.BaseTypeNotSerializableMessage(bt, type)); } if (events == 0) return null; if (events == CloneEvents.ISerializable) return ISerializableOnly; if (events == (CloneEvents.ISerializable | CloneEvents.IObjectReference)) return ISerializableAndObjectReferenceOnly; return new CloneEventHandlers(events, null, null, null, null); } private static CloneEventHandlers CreateContinue(Type type, CloneEvents events, Type baseType, MethodInfo[] methods, int i) { Delegate onSerializingHandler = null, onSerializedHandlers = null, onDeserializingHandlers = null, onDeserializedHandlers = null; var bt = baseType; for (;;) { for (; i < methods.Length; ++i) { var mi = methods[i]; if (mi.IsDefined(typeofOnSerializingAttribute, false)) { var d = CreateHandler(bt, mi); onSerializingHandler = onSerializingHandler == null ? d : Delegate.Combine(d, onSerializingHandler); // call base handler first } if (mi.IsDefined(typeofOnSerializedAttribute, false)) { var d = CreateHandler(bt, mi); onSerializedHandlers = onSerializedHandlers == null ? d : Delegate.Combine(d, onSerializedHandlers); } if (mi.IsDefined(typeofOnDeserializingAttribute, false)) { var d = CreateHandler(bt, mi); onDeserializingHandlers = onDeserializingHandlers == null ? d : Delegate.Combine(d, onDeserializingHandlers); } if (mi.IsDefined(typeofOnDeserializedAttribute, false)) { var d = CreateHandler(bt, mi); onDeserializedHandlers = onDeserializedHandlers == null ? d : Delegate.Combine(d, onDeserializedHandlers); } } bt = bt.BaseType; if (bt == null || bt == typeofObject) break; if (!bt.IsSerializable) throw new SerializationException(Cloner.BaseTypeNotSerializableMessage(bt, type)); methods = bt.GetMethods(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.DeclaredOnly); i = 0; } Handler onSerializing = null, onSerialized = null, onDeserializing = null, onDeserialized = null; if (onSerializingHandler != null) { events |= CloneEvents.OnSerializing; onSerializing = (Handler)onSerializingHandler; } if (onSerializedHandlers != null) { events |= CloneEvents.OnSerialized; onSerialized = (Handler)onSerializedHandlers; } if (onDeserializingHandlers != null) { events |= CloneEvents.OnDeserializing; onDeserializing = (Handler)onDeserializingHandlers; } if (onDeserializedHandlers != null) { events |= CloneEvents.OnDeserialized; onDeserialized = (Handler)onDeserializedHandlers; } return new CloneEventHandlers(events, onSerializing, onSerialized, onDeserializing, onDeserialized); } } } #endif ================================================ FILE: FParsecCS/ErrorMessage.cs ================================================ // Copyright (c) Stephan Tolksdorf 2010 // License: Simplified BSD License. See accompanying documentation. using System; using System.Diagnostics; using System.Collections.Generic; using Microsoft.FSharp.Core; namespace FParsec { public enum ErrorMessageType { Expected, ExpectedString, ExpectedCaseInsensitiveString, Unexpected, UnexpectedString, UnexpectedCaseInsensitiveString, Message, NestedError, CompoundError, Other } [DebuggerDisplay("{GetDebuggerDisplay(),nq}")] public class ErrorMessage : IEquatable { public readonly ErrorMessageType Type; [DebuggerBrowsable(DebuggerBrowsableState.Never)] internal string String; internal ErrorMessage(ErrorMessageType messageType) { Type = messageType; } public class Expected : ErrorMessage { public string Label { get { return String; } } public Expected(string labelForExpectedInput) : base(ErrorMessageType.Expected) { String = labelForExpectedInput; } } public class ExpectedString : ErrorMessage { public new string String { get { return base.String; } } public ExpectedString(string expectedString) : base(ErrorMessageType.ExpectedString) { base.String = expectedString; } } public class ExpectedCaseInsensitiveString : ErrorMessage { public string CaseInsensitiveString { get { return String; } } public ExpectedCaseInsensitiveString(string expectedCaseInsensitiveString) : base(ErrorMessageType.ExpectedCaseInsensitiveString) { String = expectedCaseInsensitiveString; } } public class Unexpected : ErrorMessage { public string Label { get { return String; } } public Unexpected(string labelForUnexpectedInput) : base(ErrorMessageType.Unexpected) { String = labelForUnexpectedInput; } } public class UnexpectedString : ErrorMessage { public new string String { get { return base.String; } } public UnexpectedString(string unexpectedString) : base(ErrorMessageType.UnexpectedString) { base.String = unexpectedString; } } public class UnexpectedCaseInsensitiveString : ErrorMessage { public string CaseInsensitiveString { get { return String; } } public UnexpectedCaseInsensitiveString(string unexpectedCaseInsensitiveString) : base(ErrorMessageType.UnexpectedCaseInsensitiveString) { String = unexpectedCaseInsensitiveString; } } public class Message : ErrorMessage { public new string String { get { return base.String; } } public Message(string message) : base(ErrorMessageType.Message) { base.String = message; } } public class NestedError : ErrorMessage { public Position Position { get; private set; } public object UserState { get; private set; } public ErrorMessageList Messages { get; private set; } public NestedError(Position position, object userState, ErrorMessageList messages) : base(ErrorMessageType.NestedError) { Position = position; UserState = userState; Messages = messages; } } public class CompoundError : ErrorMessage { public string LabelOfCompound { get { return String; } } public Position NestedErrorPosition { get; private set; } public object NestedErrorUserState { get; private set; } public ErrorMessageList NestedErrorMessages { get; private set; } public CompoundError(string labelOfCompound, Position nestedErrorPosition, object nestedErrorUserState, ErrorMessageList nestedErrorMessages) : base(ErrorMessageType.CompoundError) { String = labelOfCompound; NestedErrorPosition = nestedErrorPosition; NestedErrorUserState = nestedErrorUserState; NestedErrorMessages = nestedErrorMessages; } } public class Other : ErrorMessage { public object Data { get; private set; } public Other(object data) : base(ErrorMessageType.Other) { Data = data; } } public override bool Equals(object obj) { return Equals(obj as ErrorMessage); } public bool Equals(ErrorMessage other) { return (object)this == (object)other || ( (object)other != null && Type == other.Type && (Type > ErrorMessageType.Message ? EqualsHelper(other) : String == other.String)); } public static bool operator==(ErrorMessage left, ErrorMessage right) { return (object)left == (object)right || ( (object)left != null && (object)right != null && left.Type == right.Type && (left.Type > ErrorMessageType.Message ? left.EqualsHelper(right) : left.String == right.String)); } public static bool operator!=(ErrorMessage left, ErrorMessage right) { return !(left == right); } private bool EqualsHelper(ErrorMessage other) { Debug.Assert(Type == other.Type && Type > ErrorMessageType.Message); if (Type == ErrorMessageType.NestedError) { var ne1 = (NestedError)this; var ne2 = (NestedError)other; return ne1.Position == ne2.Position && ne1.Messages == ne2.Messages && LanguagePrimitives.GenericEqualityERComparer.Equals(ne1.UserState, ne2.UserState); } else if (Type == ErrorMessageType.CompoundError) { if (String != other.String) return false; var ce1 = (CompoundError)this; var ce2 = (CompoundError)other; return ce1.NestedErrorPosition == ce2.NestedErrorPosition && ce1.NestedErrorMessages == ce2.NestedErrorMessages && LanguagePrimitives.GenericEqualityERComparer.Equals(ce1.NestedErrorUserState, ce2.NestedErrorUserState); } else { // ErrorMessageType == ErrorMessageType.Other Debug.Assert(Type == ErrorMessageType.Other); return ((Other)this).Data == ((Other)other).Data; } } public override int GetHashCode() { return (int)Type ^ (String == null ? 0 : String.GetHashCode()); } private class ErrorMessageComparer : Comparer { public override int Compare(ErrorMessage x, ErrorMessage y) { if (x == null || y == null) { return x == null && y == null ? 0 : (x == null ? -1 : 1); } int d = (int)x.Type - (int)y.Type; if (d != 0) return d; var type = x.Type; if (type <= ErrorMessageType.Message) { Debug.Assert(type >= 0); return String.CompareOrdinal(x.String, y.String); } else if (type == ErrorMessageType.NestedError) { var ne1 = (NestedError)x; var ne2 = (NestedError)y; var c = Position.Compare(ne1.Position, ne2.Position); if (c != 0) return c; var msgs1 = ErrorMessageList.ToSortedArray(ne1.Messages); var msgs2 = ErrorMessageList.ToSortedArray(ne2.Messages); int n = Math.Min(msgs1.Length, msgs2.Length); for (int i = 0; i < n; ++i) { c = Compare(msgs1[i], msgs2[i]); if (c != 0) return c; } return msgs1.Length - msgs2.Length; } else if (type == ErrorMessageType.CompoundError) { var c = String.CompareOrdinal(x.String, y.String); if (c != 0) return c; var ce1 = (CompoundError)x; var ce2 = (CompoundError)y; c = Position.Compare(ce1.NestedErrorPosition, ce2.NestedErrorPosition); if (c != 0) return c; var msgs1 = ErrorMessageList.ToSortedArray(ce1.NestedErrorMessages); var msgs2 = ErrorMessageList.ToSortedArray(ce2.NestedErrorMessages); int n = Math.Min(msgs1.Length, msgs2.Length); for (int i = 0; i < n; ++i) { c = Compare(msgs1[i], msgs2[i]); if (c != 0) return c; } return msgs1.Length - msgs2.Length; } else { Debug.Assert(type == ErrorMessageType.Other); return 0; } } } internal static Comparer Comparer = new ErrorMessageComparer(); internal static ErrorMessage[] EmptyArray = new ErrorMessage[0]; internal string GetDebuggerDisplay() { switch (Type) { case ErrorMessageType.Expected: return String == null ? "Expected(null)" : Text.DoubleQuote("Expected(", String, ")"); case ErrorMessageType.ExpectedString: return String == null ? "ExpectedString(null)" : Text.DoubleQuote("ExpectedString(", String, ")"); case ErrorMessageType.ExpectedCaseInsensitiveString: return String == null ? "ExpectedCaseInsensitiveString(null)" : Text.DoubleQuote("ExpectedCaseInsensitiveString(", String, ")"); case ErrorMessageType.Unexpected: return String == null ? "Unexpected(null)" : Text.DoubleQuote("Unexpected(", String, ")"); case ErrorMessageType.UnexpectedString: return String == null ? "UnexpectedString(null)" : Text.DoubleQuote("UnexpectedString(", String, ")"); case ErrorMessageType.UnexpectedCaseInsensitiveString: return String == null ? "UnexpectedCaseInsensitiveString(null)" : Text.DoubleQuote("UnexpectedCaseInsensitiveString(", String, ")"); case ErrorMessageType.Message: return String == null ? "Message(null)" : Text.DoubleQuote("Message(", String, ")"); case ErrorMessageType.NestedError: { var ne = (NestedError)this; var pos = ne.Position == null ? "null" : ne.Position.ToString(); var msgs = ErrorMessageList.GetDebuggerDisplay(ne.Messages); return "NestedError(" + pos + ", ..., " + msgs + ")"; } case ErrorMessageType.CompoundError: { var ce = (CompoundError)this; var label = ce.String == null ? "null" : Text.Escape(ce.String, "", "\"", "\"", "", '"'); var pos = ce.NestedErrorPosition == null ? "" : ce.NestedErrorPosition.ToString(); var msgs = ErrorMessageList.GetDebuggerDisplay(ce.NestedErrorMessages); return "CompoundError(" + label + ", " + pos + ", ..., " + msgs + ")"; } case ErrorMessageType.Other: { var oe = (Other)this; return oe.Data == null ? "Other(null)" : "Other(" + oe.ToString() + ")"; } default: throw new InvalidOperationException(); } } } } ================================================ FILE: FParsecCS/ErrorMessageList.cs ================================================ // Copyright (c) Stephan Tolksdorf 2010 // License: Simplified BSD License. See accompanying documentation. using System; using System.Diagnostics; using System.Collections.Generic; namespace FParsec { [DebuggerDisplay("{ErrorMessageList.GetDebuggerDisplay(this),nq}"), DebuggerTypeProxy(typeof(ErrorMessageList.DebugView))] public sealed class ErrorMessageList : IEquatable { public readonly ErrorMessage Head; public readonly ErrorMessageList Tail; public ErrorMessageList(ErrorMessage head, ErrorMessageList tail) { var throwNullReferenceExceptionIfHeadIsNull = head.Type; Head = head; Tail = tail; } public ErrorMessageList(ErrorMessage message) { var throwNullReferenceExceptionIfMessageIsNull = message.Type; Head = message; } public ErrorMessageList(ErrorMessage message1, ErrorMessage message2) { var throwNullReferenceExceptionIfMessage1IsNull = message1.Type; Head = message1; Tail = new ErrorMessageList(message2); } public static ErrorMessageList Merge(ErrorMessageList list1, ErrorMessageList list2) { if ((object)list1 == null) return list2; return MergeContinue(list1, list2); } private static ErrorMessageList MergeContinue(ErrorMessageList list1, ErrorMessageList list2) { while ((object)list2 != null) { list1 = new ErrorMessageList(list2.Head, list1); list2 = list2.Tail; } return list1; } public static HashSet ToHashSet(ErrorMessageList messages) { var msgs = messages; var set = new HashSet(); for (; (object)msgs != null; msgs = msgs.Tail) { var msg = msgs.Head; Debug.Assert(msg.Type >= 0); if (msg.Type <= ErrorMessageType.Message && string.IsNullOrEmpty(msg.String)) continue; set.Add(msg); } return set; } public static ErrorMessage[] ToSortedArray(ErrorMessageList messages) { var set = ToHashSet(messages); var array = new ErrorMessage[set.Count]; set.CopyTo(array); Array.Sort(array, ErrorMessage.Comparer); return array; } public override bool Equals(object obj) { return Equals(obj as ErrorMessageList); } public bool Equals(ErrorMessageList other) { return (object)this == (object)other || ( (object)other != null && ToHashSet(this).SetEquals(ToHashSet(other))); } public static bool operator==(ErrorMessageList left, ErrorMessageList right) { return (object)left == (object)right || ( (object)left != null && (object)right != null && ToHashSet(left).SetEquals(ToHashSet(right))); } public static bool operator!=(ErrorMessageList left, ErrorMessageList right) { return !(left == right); } public override int GetHashCode() { var set = ToHashSet(this); var h = 0; foreach (var msg in set) h ^= msg.GetHashCode(); return h; } internal static string GetDebuggerDisplay(ErrorMessageList list) { var es = ErrorMessageList.ToSortedArray(list); switch (es.Length) { case 0: return "[]"; case 1: return "[" + es[0].GetDebuggerDisplay() + "]"; case 2: return "[" + es[0].GetDebuggerDisplay() + "; " + es[1].GetDebuggerDisplay() + "]"; case 3: return "[" + es[0].GetDebuggerDisplay() + "; " + es[1].GetDebuggerDisplay() + "; " + es[2].GetDebuggerDisplay() + "]"; default: return "[" + es[0].GetDebuggerDisplay() + "; " + es[1].GetDebuggerDisplay() + "; " + es[2].GetDebuggerDisplay() + "; ...]"; } } internal class DebugView { //[DebuggerBrowsable(DebuggerBrowsableState.Never)] private ErrorMessageList List; public DebugView(ErrorMessageList list) { List = list; } [DebuggerBrowsable(DebuggerBrowsableState.RootHidden)] public ErrorMessage[] Items { get { return ErrorMessageList.ToSortedArray(List); } } } } } ================================================ FILE: FParsecCS/Errors.cs ================================================ // Copyright (c) Stephan Tolksdorf 2010-2011 // License: Simplified BSD License. See accompanying documentation. using System; namespace FParsec { internal static class Errors { static private ErrorMessageList Expected(string str) { return new ErrorMessageList(new ErrorMessage.Expected(str)); } static private ErrorMessageList Unexpected(string str) { return new ErrorMessageList(new ErrorMessage.Unexpected(str)); } static private ErrorMessageList Message(string str) { return new ErrorMessageList(new ErrorMessage.Message(str)); } public static readonly ErrorMessageList ExpectedEndOfInput = Expected(Strings.EndOfInput); public static readonly ErrorMessageList UnexpectedEndOfInput = Unexpected(Strings.EndOfInput); public static readonly ErrorMessageList ExpectedAnyChar = Expected(Strings.AnyChar); public static readonly ErrorMessageList ExpectedWhitespace = Expected(Strings.Whitespace); public static readonly ErrorMessageList ExpectedAsciiUppercaseLetter = Expected(Strings.AsciiUppercaseLetter); public static readonly ErrorMessageList ExpectedAsciiLowercaseLetter = Expected(Strings.AsciiLowercaseLetter); public static readonly ErrorMessageList ExpectedAsciiLetter = Expected(Strings.AsciiLetter); public static readonly ErrorMessageList ExpectedUppercaseLetter = Expected(Strings.UppercaseLetter); public static readonly ErrorMessageList ExpectedLowercaseLetter = Expected(Strings.LowercaseLetter); public static readonly ErrorMessageList ExpectedLetter = Expected(Strings.Letter); public static readonly ErrorMessageList ExpectedBinaryDigit = Expected(Strings.BinaryDigit); public static readonly ErrorMessageList ExpectedOctalDigit = Expected(Strings.OctalDigit); public static readonly ErrorMessageList ExpectedDecimalDigit = Expected(Strings.DecimalDigit); public static readonly ErrorMessageList ExpectedHexadecimalDigit = Expected(Strings.HexadecimalDigit); public static readonly ErrorMessageList ExpectedNewline = Expected(Strings.Newline); public static readonly ErrorMessageList UnexpectedNewline = Unexpected(Strings.Newline); public static readonly ErrorMessageList ExpectedTab = Expected(Strings.Tab); public static readonly ErrorMessageList ExpectedFloatingPointNumber = Expected(Strings.FloatingPointNumber); public static readonly ErrorMessageList ExpectedInt64 = Expected(Strings.Int64); public static readonly ErrorMessageList ExpectedInt32 = Expected(Strings.Int32); public static readonly ErrorMessageList ExpectedInt16 = Expected(Strings.Int16); public static readonly ErrorMessageList ExpectedInt8 = Expected(Strings.Int8); public static readonly ErrorMessageList ExpectedUInt64 = Expected(Strings.UInt64); public static readonly ErrorMessageList ExpectedUInt32 = Expected(Strings.UInt32); public static readonly ErrorMessageList ExpectedUInt16 = Expected(Strings.UInt16); public static readonly ErrorMessageList ExpectedUInt8 = Expected(Strings.UInt8); public static readonly ErrorMessageList ExpectedPrefixOperator = Expected(Strings.PrefixOperator); public static readonly ErrorMessageList ExpectedInfixOperator = Expected(Strings.InfixOperator); public static readonly ErrorMessageList ExpectedPostfixOperator = Expected(Strings.PostfixOperator); public static readonly ErrorMessageList ExpectedInfixOrPostfixOperator = ErrorMessageList.Merge(ExpectedInfixOperator, ExpectedPostfixOperator); public static readonly ErrorMessageList NumberOutsideOfDoubleRange = Message(Strings.NumberOutsideOfDoubleRange); public static readonly ErrorMessageList NumberOutsideOfInt64Range = Message(Strings.NumberOutsideOfInt64Range); public static readonly ErrorMessageList NumberOutsideOfInt32Range = Message(Strings.NumberOutsideOfInt32Range); public static readonly ErrorMessageList NumberOutsideOfInt16Range = Message(Strings.NumberOutsideOfInt16Range); public static readonly ErrorMessageList NumberOutsideOfInt8Range = Message(Strings.NumberOutsideOfInt8Range); public static readonly ErrorMessageList NumberOutsideOfUInt64Range = Message(Strings.NumberOutsideOfUInt64Range); public static readonly ErrorMessageList NumberOutsideOfUInt32Range = Message(Strings.NumberOutsideOfUInt32Range); public static readonly ErrorMessageList NumberOutsideOfUInt16Range = Message(Strings.NumberOutsideOfUInt16Range); public static readonly ErrorMessageList NumberOutsideOfUInt8Range = Message(Strings.NumberOutsideOfUInt8Range); public static ErrorMessageList ExpectedAnyCharIn(string chars) { return Expected(Strings.AnyCharIn(chars)); } public static ErrorMessageList ExpectedAnyCharNotIn(string chars) { return Expected(Strings.AnyCharNotIn(chars)); } public static ErrorMessageList ExpectedStringMatchingRegex(string regexPattern) { return Expected(Strings.StringMatchingRegex(regexPattern)); } public static ErrorMessageList ExpectedAnySequenceOfNChars(int n) { return Expected(Strings.ExpectedAnySequenceOfNChars(n)); } public static ErrorMessageList CouldNotFindString(string str) { return Message(Strings.CouldNotFindString(str)); } public static ErrorMessageList CouldNotFindCaseInsensitiveString(string str) { return Message(Strings.CouldNotFindCaseInsensitiveString(str)); } public static ErrorMessageList OperatorsConflict(Position position1, Operator operator1, Position position2, Operator operator2) { return Message(Strings.OperatorsConflict(position1, operator1, position2, operator2)); } public static ErrorMessageList UnexpectedNonPrefixOperator(Operator op) { return new ErrorMessageList( ExpectedPrefixOperator.Head, new ErrorMessage.Unexpected(Strings.OperatorToString(op))); } public static ErrorMessageList MissingTernary2ndString(Position position1, Position position2, Operator op) { return new ErrorMessageList( new ErrorMessage.ExpectedString(op.TernaryRightString), new ErrorMessage.Message(Strings.OperatorStringIsRightPartOfTernaryOperator(position1, position2, op))); } } namespace Internal { // the internal namespace contains internal types that must be public for inlining reasons public static class ParserCombinatorInInfiniteLoopHelper { public static Exception CreateException(string combinatorName, CharStream stream) { return new InvalidOperationException(stream.Position.ToString() + ": The combinator '" + combinatorName + "' was applied to a parser that succeeds without consuming input and without changing the parser state in any other way. (If no exception had been raised, the combinator likely would have entered an infinite loop.)"); } } } } ================================================ FILE: FParsecCS/FParsecCS-LowTrust.csproj ================================================  netstandard2.0;netstandard2.1 ================================================ FILE: FParsecCS/FParsecCS.csproj ================================================ net6.0 ================================================ FILE: FParsecCS/FParsecCS.targets ================================================ FParsecCS FParsec true 1591 false ================================================ FILE: FParsecCS/FastGenericEqualityERComparer.cs ================================================ // Copyright (c) Stephan Tolksdorf 2010 // License: Simplified BSD License. See accompanying documentation. using System; using System.Collections; using System.Collections.Generic; using Microsoft.FSharp.Core; namespace FParsec { internal static class FastGenericEqualityERComparer { // if T is a reference type, accessing the field requires a hash table lookup public static readonly EqualityComparer Instance = FastGenericEqualityERComparer.Create(); /// For reference types it's faster to call Instance.Equals directly /// (due to limitations of the inliner of the .NET JIT.) public static bool Equals(T left, T right) { return Instance.Equals(left, right); } } internal static class FastGenericEqualityERComparer { public static EqualityComparer Create() { if (typeof(T).IsArray) return new ArrayStructuralEqualityERComparer(); if (typeof(IStructuralEquatable).IsAssignableFrom(typeof(T))) { var gct = typeof(T).IsValueType ? typeof(StructStructuralEqualityERComparer<>) : typeof(ClassStructuralEqualityERComparer<>); var ct = gct.MakeGenericType(typeof(T)); #if LOW_TRUST || NETSTANDARD1_6 return (EqualityComparer)Activator.CreateInstance(ct); #else return (EqualityComparer)System.Runtime.Serialization.FormatterServices.GetUninitializedObject(ct); #endif } return EqualityComparer.Default; } private class ClassStructuralEqualityERComparer : EqualityComparer where T : class, IStructuralEquatable { public override bool Equals(T x, T y) { return (object)x == (object)y || ((object)x != null && x.Equals(y, LanguagePrimitives.GenericEqualityERComparer)); } public override int GetHashCode(T obj) { if ((object)obj == null) throw new ArgumentNullException("obj"); return obj.GetHashCode(LanguagePrimitives.GenericEqualityERComparer); } } private class StructStructuralEqualityERComparer : EqualityComparer where T : struct, IStructuralEquatable { public override bool Equals(T x, T y) { return x.Equals(y, LanguagePrimitives.GenericEqualityERComparer); } public override int GetHashCode(T obj) { return obj.GetHashCode(LanguagePrimitives.GenericEqualityERComparer); } } /// Forwards all work to F#'s GenericEqualityERComparer. private class ArrayStructuralEqualityERComparer : EqualityComparer { public override bool Equals(T x, T y) { return (object)x == (object)y || LanguagePrimitives.GenericEqualityERComparer.Equals(x, y); } public override int GetHashCode(T obj) { if ((object)obj == null) throw new ArgumentNullException("obj"); return LanguagePrimitives.GenericEqualityERComparer.GetHashCode(obj); } } } } ================================================ FILE: FParsecCS/HexFloat.cs ================================================ // Copyright (c) Stephan Tolksdorf 2008-2013 // License: Simplified BSD License. See accompanying documentation. using System; namespace FParsec { public static class HexFloat { // see http://www.quanttec.com/fparsec/reference/charparsers.html#members.floatToHexString // for more information on the supported hexadecimal floating-point format #pragma warning disable 0429 // unreachable expression code #pragma warning disable 0162 // unreachable code // The non-LOW_TRUST code in this class relies on the endianness of floating-point // numbers in memory being the same as the normal platform endianness, // i.e. on *((uint*)(&s)) and *((ulong*)(&d)) returning the correct IEEE-754 bit // representation of the single and double precision numbers s and d. // I'm not aware of any .NET/Mono platform where this is not the case. // In the unlikely event anyone ever runs this code on a platform where // this is not the case the unit tests will detect the problem. private static ReadOnlySpan AsciiHexValuePlus1s => new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0, 0, 0, 0, 0, 0, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; public static string DoubleToHexString(double x) { const int expBits = 11; // bits for biased exponent const int maxBits = 53; // significant bits (including implicit bit) const int maxChars = 24; // "-0x1.fffffffffffffp-1022" const int maxBiasedExp = (1 << expBits) - 1; const int maxExp = 1 << (expBits - 1); // max n for which 0.5*2^n is a double const int bias = maxExp - 1; const int maxFractNibbles = (maxBits - 1 + 3)/4; const ulong mask = (1UL << (maxBits - 1)) - 1; // mask for lower (maxBits - 1) bits ulong xn = unchecked((ulong)BitConverter.DoubleToInt64Bits(x)); int sign = (int)(xn >> (maxBits - 1 + expBits)); int e = (int)((xn >> (maxBits - 1)) & maxBiasedExp); // the biased exponent ulong s = xn & mask; // the significand (without the implicit bit) if (e < maxBiasedExp) { if (e == 0 && s == 0) return sign == 0 ? "0x0.0p0" : "-0x0.0p0"; Span str = stackalloc char[maxChars]; int i = 0; if (sign != 0) str[i++] = '-'; str[i++] = '0'; str[i++] = 'x'; str[i++] = e > 0 ? '1' : '0'; str[i++] = '.'; if ((maxBits - 1)%4 > 0) { // normalize fraction to multiple of 4 bits s <<= 4 - (maxBits - 1)%4; } int lastNonNull = i; for (int j = 0; j < maxFractNibbles; ++j) { int h = unchecked((int) (s >> ((maxFractNibbles - 1 - j) << 2))) & 0xf; if (h != 0) lastNonNull = i; str[i++] = "0123456789abcdef"[h]; } i = lastNonNull + 1; str[i++] = 'p'; if (e >= bias) e -= bias; else { str[i++] = '-'; e = e > 0 ? -(e - bias) : bias - 1; } // e holds absolute unbiased exponent int li = e < 10 ? 1 : (e < 100 ? 2 : (e < 1000 ? 3 : 4)); // floor(log(10, e))) + 1 i += li; do { int r = e%10; e = e/10; str[--i] = (char) (48 + r); } while (e > 0); i += li; return str.Slice(0, i).ToString(); } else { if (s == 0) return sign == 0 ? "Infinity" : "-Infinity"; else return "NaN"; } } public static string SingleToHexString(float x) { const int expBits = 8; // bits for biased exponent const int maxBits = 24; // significant bits (including implicit bit) const int maxChars = 16; // "-0x1.fffffep-126" const int maxBiasedExp = (1 << expBits) - 1; const int maxExp = 1 << (expBits - 1); // max n for which 0.5*2^n is a double const int bias = maxExp - 1; const int maxFractNibbles = (maxBits - 1 + 3)/4; const uint mask = (1U << (maxBits - 1)) - 1; // mask for lower (maxBits - 1) bits #if NETSTANDARD2_0 uint xn = BitConverter.ToUInt32(BitConverter.GetBytes(x), 0); #else uint xn = (uint)BitConverter.SingleToInt32Bits(x); #endif int sign = (int)(xn >> (maxBits - 1 + expBits)); int e = (int)((xn >> (maxBits - 1)) & maxBiasedExp); // the biased exponent uint s = xn & mask; // the significand (without the implicit bit) if (e < maxBiasedExp) { if (e == 0 && s == 0) return sign == 0 ? "0x0.0p0" : "-0x0.0p0"; Span str = stackalloc char[maxChars]; int i = 0; if (sign != 0) str[i++] = '-'; str[i++] = '0'; str[i++] = 'x'; str[i++] = e > 0 ? '1' : '0'; str[i++] = '.'; int lastNonNull = i; if ((maxBits - 1)%4 > 0) { // normalize fraction to multiple of 4 bits s <<= 4 - (maxBits - 1)%4; } for (int j = 0; j < maxFractNibbles; ++j) { int h = (int)(s >> ((maxFractNibbles - 1 - j) << 2)) & 0xf; if (h != 0) lastNonNull = i; str[i++] = "0123456789abcdef"[h]; } i = lastNonNull + 1; str[i++] = 'p'; if (e >= bias) e -= bias; else { str[i++] = '-'; e = e > 0 ? -(e - bias) : bias - 1; } // e holds absolute unbiased exponent int li = e < 10 ? 1 : (e < 100 ? 2 : 3); // floor(log(10, e))) + 1 i += li; do { int r = e%10; e = e/10; str[--i] = (char)(48 + r); } while (e > 0); i += li; return str.Slice(0, i).ToString(); } else { if (s == 0) return sign == 0 ? "Infinity" : "-Infinity"; else return "NaN"; } } #pragma warning restore 0429 #pragma warning restore 0162 #if !LOW_TRUST unsafe #endif public static double DoubleFromHexString(string str) { const int expBits = 11; // bits for exponent const int maxBits = 53; // significant bits (including implicit bit) const int maxExp = 1 << (expBits - 1); // max n for which 0.5*2^n is a double const int minExp = -maxExp + 3; // min n for which 0.5*2^n is a normal double const int minSExp = minExp - (maxBits - 1); // min n for which 0.5*2^n is a subnormal double const int maxBits2 = maxBits + 2; const ulong mask = (1UL << (maxBits - 1)) - 1; // mask for lower (maxBits - 1) bits if (str == null) throw new ArgumentNullException("str"); int n = str.Length; if (n == 0) goto InvalidFormat; // n*4 <= Int32.MaxValue protects against an nBits overflow, // the additional -minSExp + 10 margin is needed for parsing the exponent if (n > (int.MaxValue + minSExp - 10)/4) throw new System.FormatException("The given hexadecimal string representation of a double precision floating-point number is too long."); int sign = 0; // 0 == positive, 1 == negative ulong xn = 0; // integer significand with up to maxBits + 2 bits, where the (maxBits + 2)th bit // (the least significant bit) is the logical OR of the (maxBits + 2)th and all following input bits int nBits = -1; // number of bits in xn, not counting leading zeros int exp = 0; // the base-2 exponent #if LOW_TRUST var s = str; #else fixed (char* s = str) { #endif int i = 0; // sign if (s[0] == '+') i = 1; else if (s[0] == '-') { i = 1; sign = 1; } // "0x" prefix if (i + 1 < n && (s[i + 1] == 'x' || s[i + 1] == 'X')) { if (s[i] != '0') goto InvalidFormat; i += 2; } bool pastDot = false; for (;;) { if (i == n) { if (!pastDot) exp = nBits; if (nBits >= 0) break; else goto InvalidFormat; } char c = s[i++]; int h; if (c < 128 && (h = AsciiHexValuePlus1s[c]) != 0) { --h; if (nBits <= 0 ) { xn |= (uint)h; nBits = 0; while (h > 0) { ++nBits; h >>= 1; } if (pastDot) exp -= 4 - nBits; } else if (nBits <= maxBits2 - 4) { xn <<= 4; xn |= (uint)h; nBits += 4; } else if (nBits < maxBits2) { int nRemBits = maxBits2 - nBits; int nSurplusBits = 4 - nRemBits; int surplusBits = h & (0xf >> nRemBits); // The .NET JIT is not able to emit branch-free code for // surplusBits = surplusBits != 0 ? 1 : 0; // So we use this version instead: surplusBits = (0xfffe >> surplusBits) & 1; // = surplusBits != 0 ? 1 : 0 xn <<= nRemBits; xn |= (uint)((h >> nSurplusBits) | surplusBits); nBits += 4; } else { xn |= (uint)((0xfffe >> h) & 1); // (0xfffe >> h) & 1 == h != 0 ? 1 : 0 nBits += 4; } } else if (c == '.') { if (pastDot) goto InvalidFormat; pastDot = true; exp = nBits >= 0 ? nBits : 0; // exponent for integer part of float } else if ((c | ' ') == 'p' && nBits >= 0) { if (!pastDot) exp = nBits; int eSign = 1; if (i < n && (s[i] == '-' || s[i] == '+')) { if (s[i] == '-') eSign = -1; ++i; } if (i == n) goto InvalidFormat; int e = 0; do { c = s[i++]; if (((uint)c - (uint)'0') <= 9) { if (e <= (int.MaxValue - 9)/10) e = e*10 + (c - '0'); else e = int.MaxValue - 8; } else goto InvalidFormat; } while (i < n); e*= eSign; // either e is exact or |e| >= int.MaxValue - 8 // |exp| <= n*4 <= int.MaxValue + minSExp - 10 // // Case 1: e and exp have the same sign // Case 1.a: e is exact && |exp + e| <= int.MaxValue ==> |exp + e| is exact // Case 1.b: |e| >= int.MaxValue - 8 || |exp + e| > int.MaxValue ==> |exp + e| >= int.MaxValue - 8 // Case 2: e and exp have opposite signs // Case 2.a: e is exact ==> |exp + e| is exact // Case 2.b: |e| >= int.MaxValue - 8 // ==> Case e > 0: // exp + e >= -(int.MaxValue + minSExp - 10) + (int.MaxValue - 8) = -minSExp + 2 > maxExp // Case e < 0: // exp + e <= (int.MaxValue + minSExp - 10) - (int.MaxValue - 8) = minSExp - 2 // // hence, |exp + e| is exact || exp + e > maxExp || exp + e < minSExp - 1 try { exp = checked (exp + e); } catch (System.OverflowException) { exp = e < 0 ? int.MinValue : int.MaxValue; } break; } else { --i; if (nBits == -1 && i + 3 <= n) { if ( ((s[i ] | ' ') == 'i') && ((s[i + 1] | ' ') == 'n') && ((s[i + 2] | ' ') == 'f') && (i + 3 == n || (i + 8 == n && ((s[i + 3] | ' ') == 'i') && ((s[i + 4] | ' ') == 'n') && ((s[i + 5] | ' ') == 'i') && ((s[i + 6] | ' ') == 't') && ((s[i + 7] | ' ') == 'y')))) { return sign == 0 ? Double.PositiveInfinity : Double.NegativeInfinity; } else if (i + 3 == n && ((s[i] | ' ') == 'n') && ((s[i + 1] | ' ') == 'a') && ((s[i + 2] | ' ') == 'n')) { return Double.NaN; } } goto InvalidFormat; } } // for #if !LOW_TRUST } // fixed #endif if (nBits == 0) return sign == 0 ? 0.0 : -0.0; if (exp <= maxExp) { if (exp >= minExp && nBits <= maxBits) { // not subnormal and no rounding is required if (nBits < maxBits) xn <<= maxBits - nBits; // normalize significand to maxBits xn &= mask; // mask out lower (maxBits - 1) bits, the most significant bit is encoded in exp } else { if (nBits < maxBits2) xn <<= maxBits2 - nBits; // normalize significand to (maxBits + 2) bits int isSubnormal = 0; if (exp < minExp) { if (exp < minSExp - 1) return sign == 0 ? 0.0 : -0.0; // underflow (minSExp - 1 could still be rounded to minSExp) isSubnormal = 1; do { xn = (xn >> 1) | (xn & 1); } while (++exp < minExp); if (xn <= 2) return sign == 0 ? 0.0 : -0.0; // underflow } int r = unchecked((int)xn) & 0x7; // (lsb, bit below lsb, logical OR of all bits below the bit below lsb) xn >>= 2; // truncate to maxBits if (r >= 6 || r == 3) { xn++; xn &= mask; if (xn == 0) { // rounded to a power of two exp += 1; if (exp > maxExp) goto Overflow; } } else { xn &= mask; } exp -= isSubnormal; } exp -= minExp - 1; // add bias xn = (((ulong)sign) << ((maxBits - 1) + expBits)) | (((ulong)exp) << (maxBits - 1)) | xn; #if LOW_TRUST return BitConverter.Int64BitsToDouble(unchecked((long)xn)); #else return *((double*)(&xn)); #endif } Overflow: string msg = n < 32 ? "The given string (\"" + str + "\") represents a value either too large or too small for a double precision floating-point number." : "The given string represents a value either too large or too small for a double precision floating-point number."; throw new System.OverflowException(msg); InvalidFormat: string errmsg = n < 32 ? "The given hexadecimal string representation of a double precision floating-point number (\"" + str + "\") is invalid." : "The given hexadecimal string representation of a double precision floating-point number is invalid."; throw new System.FormatException(errmsg); } #if !LOW_TRUST unsafe #endif public static float SingleFromHexString(string str) { const int expBits = 8; // bits for exponent const int maxBits = 24; // significant bits (including implicit bit) const int maxExp = 1 << (expBits - 1); // max n for which 0.5*2^n is a double const int minExp = -maxExp + 3; // min n for which 0.5*2^n is a normal double const int minSExp = minExp - (maxBits - 1); // min n for which 0.5*2^n is a subnormal Single const int maxBits2 = maxBits + 2; const int mask = (1 << (maxBits - 1)) - 1; // mask for lower (maxBits - 1) bits if (str == null) throw new ArgumentNullException("str"); int n = str.Length; if (n == 0) goto InvalidFormat; // n*4 <= Int32.MaxValue protects against an nBits overflow, // the additional -minSExp + 10 margin is needed for parsing the exponent if (n > (int.MaxValue + minSExp - 10)/4) throw new System.FormatException("The given hexadecimal string representation of a single precision floating-point number is too long."); int sign = 0; // 0 == positive, 1 == negative int xn = 0; // integer significand with up to maxBits + 2 bits, where the (maxBits + 2)th bit // (the least significant bit) is the logical OR of the (maxBits + 2)th and all following input bits int nBits = -1; // number of bits in xn, not counting leading zeros int exp = 0; // the base-2 exponent #if LOW_TRUST var s = str; #else fixed (char* s = str) { #endif int i = 0; // sign if (s[0] == '+') i = 1; else if (s[0] == '-') { i = 1; sign = 1; } // "0x" prefix if (i + 1 < n && (s[i + 1] == 'x' || s[i + 1] == 'X')) { if (s[i] != '0') goto InvalidFormat; i += 2; } bool pastDot = false; for (;;) { if (i == n) { if (!pastDot) exp = nBits; if (nBits >= 0) break; else goto InvalidFormat; } char c = s[i++]; int h; if (c < 128 && (h = AsciiHexValuePlus1s[c]) != 0) { --h; if (nBits <= 0 ) { xn |= h; nBits = 0; while (h > 0) { ++nBits; h >>= 1; } if (pastDot) exp -= 4 - nBits; } else if (nBits <= maxBits2 - 4) { xn <<= 4; xn |= h; nBits += 4; } else if (nBits < maxBits2) { int nRemBits = maxBits2 - nBits; int nSurplusBits = 4 - nRemBits; int surplusBits = h & (0xf >> nRemBits); // The .NET JIT is not able to emit branch-free code for // surplusBits = surplusBits != 0 ? 1 : 0; // So we use this version instead: surplusBits = (0xfffe >> surplusBits) & 1; // == surplusBits != 0 ? 1 : 0 xn <<= nRemBits; xn |= (h >> nSurplusBits) | surplusBits; nBits += 4; } else { xn |= (0xfffe >> h) & 1; // (0xfffe >> h) & 1 == h != 0 ? 1 : 0 nBits += 4; } } else if (c == '.') { if (pastDot) goto InvalidFormat; pastDot = true; exp = nBits >= 0 ? nBits : 0; // exponent for integer part of float } else if ((c | ' ') == 'p' && nBits >= 0) { if (!pastDot) exp = nBits; int eSign = 1; if (i < n && (s[i] == '-' || s[i] == '+')) { if (s[i] == '-') eSign = -1; ++i; } if (i == n) goto InvalidFormat; int e = 0; do { c = s[i++]; if (((uint)c - (uint)'0') <= 9) { if (e <= (int.MaxValue - 9)/10) e = e*10 + (c - '0'); else e = int.MaxValue - 8; } else goto InvalidFormat; } while (i < n); e*= eSign; // either e is exact or |e| >= int.MaxValue - 8 // |exp| <= n*4 <= int.MaxValue + minSExp - 10 // // Case 1: e and exp have the same sign // Case 1.a: e is exact && |exp + e| <= int.MaxValue ==> |exp + e| is exact // Case 1.b: |e| >= int.MaxValue - 8 || |exp + e| > int.MaxValue ==> |exp + e| >= int.MaxValue - 8 // Case 2: e and exp have opposite signs // Case 2.a: e is exact ==> |exp + e| is exact // Case 2.b: |e| >= int.MaxValue - 8 // ==> Case e > 0: // exp + e >= -(int.MaxValue + minSExp - 10) + (int.MaxValue - 8) = -minSExp + 2 > maxExp // Case e < 0: // exp + e <= (int.MaxValue + minSExp - 10) - (int.MaxValue - 8) = minSExp - 2 // // hence, |exp + e| is exact || exp + e > maxExp || exp + e < minSExp - 1 try { exp = checked (exp + e); } catch (System.OverflowException) { exp = e < 0 ? int.MinValue : int.MaxValue; } break; } else { --i; if (nBits == -1 && i + 3 <= n) { if ( ((s[i ] | ' ') == 'i') && ((s[i + 1] | ' ') == 'n') && ((s[i + 2] | ' ') == 'f') && (i + 3 == n || (i + 8 == n && ((s[i + 3] | ' ') == 'i') && ((s[i + 4] | ' ') == 'n') && ((s[i + 5] | ' ') == 'i') && ((s[i + 6] | ' ') == 't') && ((s[i + 7] | ' ') == 'y')))) { return sign == 0 ? Single.PositiveInfinity : Single.NegativeInfinity; } else if (i + 3 == n && ((s[i] | ' ') == 'n') && ((s[i + 1] | ' ') == 'a') && ((s[i + 2] | ' ') == 'n')) { return Single.NaN; } } goto InvalidFormat; } } // for #if !LOW_TRUST } // fixed #endif if (nBits == 0) return sign == 0 ? 0.0f : -0.0f; if (exp <= maxExp) { if (exp >= minExp && nBits <= maxBits) { // not subnormal and no rounding is required if (nBits < maxBits) xn <<= maxBits - nBits; // normalize significand to maxBits xn &= mask; // mask out lower (maxBits - 1) bits, the most significant bit is encoded in exp } else { if (nBits < maxBits2) xn <<= maxBits2 - nBits; // normalize significand to (maxBits + 2) bits int isSubnormal = 0; if (exp < minExp) { if (exp < minSExp - 1) return sign == 0 ? 0.0f : -0.0f; // underflow (minSExp - 1 could still be rounded to minSExp) isSubnormal = 1; do { xn = (xn >> 1) | (xn & 1); } while (++exp < minExp); if (xn <= 2) return sign == 0 ? 0.0f : -0.0f; // underflow } int r = xn & 0x7; // (lsb, bit below lsb, logical OR of all bits below the bit below lsb) xn >>= 2; // truncate to maxBits if (r >= 6 || r == 3) { xn++; xn &= mask; if (xn == 0) { // rounded to a power of two exp += 1; if (exp > maxExp) goto Overflow; } } else { xn &= mask; } exp -= isSubnormal; } exp -= minExp - 1; // add bias xn = (sign << ((maxBits - 1) + expBits)) | (exp << (maxBits - 1)) | xn; #if LOW_TRUST return BitConverter.ToSingle(BitConverter.GetBytes(xn), 0); #else return *((float*)(&xn)); #endif } Overflow: string msg = n < 32 ? "The given string (\"" + str + "\") represents a value either too large or too small for a single precision floating-point number." : "The given string represents a value either too large or too small for a single precision floating-point number."; throw new System.OverflowException(msg); InvalidFormat: string errmsg = n < 32 ? "The given hexadecimal string representation of a single precision floating-point number (\"" + str + "\") is invalid." : "The given hexadecimal string representation of a single precision floating-point number is invalid."; throw new System.FormatException(errmsg); } } // class HexFloat } ================================================ FILE: FParsecCS/IdentifierValidator.cs ================================================ // Copyright (c) Stephan Tolksdorf 2010-2012 // License: Simplified BSD License. See accompanying documentation. using System; using System.Text; using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using Microsoft.FSharp.Core; namespace FParsec { #if !LOW_TRUST unsafe #endif public sealed class IdentifierValidator { [Flags] internal enum IdentifierCharFlags : byte { None = 0, Continue = 1, NonContinue = 2, //Start = NonContinue | Continue, // the following two values are used by the FParsec identifier parser, not this class PreCheckContinue = 4, PreCheckNonContinue = 8, } public NormalizationForm NormalizationForm { get; set; } public bool NormalizeBeforeValidation { get; set; } public bool AllowJoinControlCharsAsIdContinueChars { get; set; } private readonly IdentifierCharFlags[] AsciiCharOptions; private void CheckAscii(char asciiChar) { if (asciiChar == 0 || asciiChar >= 128) throw new ArgumentOutOfRangeException("asciiChar", "The identifier char settings can only be read or set for non-zero ASCII chars, i.e. chars in the range '\u0001'-'\u007f'."); } public void SetIsAsciiNoIdChar(char asciiChar) { CheckAscii(asciiChar); AsciiCharOptions[asciiChar] = 0; } public void SetIsAsciiIdStartChar(char asciiChar) { CheckAscii(asciiChar); AsciiCharOptions[asciiChar] = IdentifierCharFlags.NonContinue | IdentifierCharFlags.Continue; } public void SetIsAsciiIdNonStartChar(char asciiChar) { CheckAscii(asciiChar); AsciiCharOptions[asciiChar] = IdentifierCharFlags.Continue; } public IdentifierValidator() { var ascii = new IdentifierCharFlags[128]; var start = IdentifierCharFlags.NonContinue | IdentifierCharFlags.Continue; // defaults as defined by XID_START/XID_CONTINUE for (int c = 'A'; c <= 'Z'; ++c) ascii[c] = start; for (int c = 'a'; c <= 'z'; ++c) ascii[c] = start; for (int c = '0'; c <= '9'; ++c) ascii[c] = IdentifierCharFlags.Continue; ascii['_'] = IdentifierCharFlags.Continue; AsciiCharOptions = ascii; } internal IdentifierValidator(IdentifierCharFlags[] asciiCharOptions) { Debug.Assert(asciiCharOptions.Length == 128); AsciiCharOptions = asciiCharOptions; } /// Returns the normalized string, or null in case an invalid identifier /// character is found. If an invalid character is found, the string index of the /// invalid character is assigned to the out parameter, otherwise -1. public string ValidateAndNormalize(string str, out int errorPosition) { // Pinning str and asciiOptions to avoid redundant bounds checks would actually // slow down the code for small to medium size identifiers because of the // (unnecessarily) high overhead associated with C#'s fixed statement. One // issue is that the .NET C# compiler emits null and 0-length checks even // though the C# standard leaves the respective behaviour undefined and // one hence can't rely on them. Another, more severe issue is that the // C# compiler puts the whole code inside the scope of the fixed statement // into a try-finally block, even if the whole function has no exception // handlers. The try-finally block in turn inhibits certain optimizations // by the JIT, in particular it seems to prevent the 32-bit .NET JIT from // compiling gotos into straighforward jumps. var asciiOptions = AsciiCharOptions; bool isSecondRound = false; bool isOnlyAscii = true; int i = 1; int length = str.Length; // throws if str is null if (length == 0) goto ReturnWithError; // check could be avoided for null-terminated buffer // Even if NormalizeBeforeValidation is set we first try to validate the // identifier without normalization. If we don't get an error, we normalize // after validation. If we get an error, we normalize and try // to validate the identifier a second time. This doesn't change results // because XID identifiers are "closed under normalization". IdStart: char c = str[0]; if (c < 128) { if ((asciiOptions[c] & IdentifierCharFlags.NonContinue) == 0) goto Error; } else { isOnlyAscii = false; if (!char.IsSurrogate(c)) { if (!IsXIdStartOrSurrogate(c)) goto Error; } else { if (i == length) goto Error; // check could be avoided for null-terminated buffer char c1 = str[1]; if (c > 0xDBFF || !char.IsLowSurrogate(c1)) goto ReturnWithError; int cp = (c - 0xD800)*0x400 + c1 - 0xDC00; // codepoint minus 0x10000 if (!IsXIdStartSmp(cp)) goto Error; ++i; } } if (i < length) { if (!AllowJoinControlCharsAsIdContinueChars) { for (;;) { c = str[i]; ++i; if (c < 128) { if ((asciiOptions[c] & IdentifierCharFlags.Continue) == 0) goto Error; if (i == length) break; } else { isOnlyAscii = false; if (!char.IsSurrogate(c)) { if (!IsXIdContinueOrSurrogate(c)) goto Error; if (i == length) break; } else { if (i == length) goto Error; // check could be avoided for null-terminated buffer char c1 = str[i]; if (c > 0xDBFF || !char.IsLowSurrogate(c1)) goto ReturnWithError; int cp = (c - 0xD800)*0x400 + c1 - 0xDC00; // codepoint minus 0x10000 if (!IsXIdContinueSmp(cp)) goto Error; if (++i >= length) break; } } } } else { // duplicates the code from the previous case, the only difference being the (*) line for (;;) { c = str[i]; ++i; if (c < 128) { if ((asciiOptions[c] & IdentifierCharFlags.Continue) == 0) goto Error; if (i == length) break; } else { isOnlyAscii = false; if (!char.IsSurrogate(c)) { if (!IsXIdContinueOrJoinControlOrSurrogate(c)) goto Error; // (*) if (i == length) break; } else { if (i == length) goto Error; // check could be avoided for null-terminated buffer char c1 = str[i]; if (c > 0xDBFF || !char.IsLowSurrogate(c1)) goto ReturnWithError; int cp = (c - 0xD800)*0x400 + c1 - 0xDC00; // codepoint minus 0x10000 if (!IsXIdContinueSmp(cp)) goto Error; if (++i >= length) break; } } } } } errorPosition = -1; if (NormalizationForm == 0 || (isOnlyAscii | isSecondRound)) return str; return str.Normalize(NormalizationForm); Error: if (NormalizeBeforeValidation && NormalizationForm != 0 && !(isOnlyAscii | isSecondRound)) { string nstr; try { nstr = str.Normalize(NormalizationForm); } // throws for invalid unicode characters catch (ArgumentException) { nstr = str; } if ((object)nstr != (object)str) { str = nstr; length = nstr.Length; isSecondRound = true; i = 1; goto IdStart; } } ReturnWithError: errorPosition = i - 1; return null; } private class IsIdStartCharOrSurrogateFSharpFunc : FSharpFunc { private IdentifierCharFlags[] AsciiCharOptions; public IsIdStartCharOrSurrogateFSharpFunc(IdentifierCharFlags[] asciiCharOptions) { AsciiCharOptions = asciiCharOptions; } public override bool Invoke(char ch) { if (ch < 128) return (AsciiCharOptions[ch] & IdentifierCharFlags.NonContinue) != 0; return IsXIdStartOrSurrogate(ch); } } private class IsIdContinueCharOrSurrogateFSharpFunc : FSharpFunc { private IdentifierCharFlags[] AsciiCharOptions; public IsIdContinueCharOrSurrogateFSharpFunc(IdentifierCharFlags[] asciiCharOptions) { AsciiCharOptions = asciiCharOptions; } public override bool Invoke(char ch) { if (ch < 128) return (AsciiCharOptions[ch] & IdentifierCharFlags.Continue) != 0; return IsXIdContinueOrSurrogate(ch); } } private class IsIdContinueCharOrJoinControlOrSurrogateFSharpFunc : FSharpFunc { private IdentifierCharFlags[] AsciiCharOptions; public IsIdContinueCharOrJoinControlOrSurrogateFSharpFunc(IdentifierCharFlags[] asciiCharOptions) { AsciiCharOptions = asciiCharOptions; } public override bool Invoke(char ch) { if (ch < 128) return (AsciiCharOptions[ch] & IdentifierCharFlags.Continue) != 0; return IsXIdContinueOrJoinControlOrSurrogate(ch); } } private FSharpFunc isIdStartOrSurrogateFunc; public FSharpFunc IsIdStartOrSurrogateFunc { get { return isIdStartOrSurrogateFunc ?? (isIdStartOrSurrogateFunc = new IsIdStartCharOrSurrogateFSharpFunc(AsciiCharOptions)); } } private FSharpFunc isIdContinueOrSurrogateFunc; public FSharpFunc IsIdContinueOrSurrogateFunc { get { return isIdContinueOrSurrogateFunc ?? (isIdContinueOrSurrogateFunc = new IsIdContinueCharOrSurrogateFSharpFunc(AsciiCharOptions)); } } private FSharpFunc isIdContinueOrJoinControlOrSurrogateFunc; public FSharpFunc IsIdContinueOrJoinControlOrSurrogateFunc { get { return isIdContinueOrJoinControlOrSurrogateFunc ?? (isIdContinueOrJoinControlOrSurrogateFunc = new IsIdContinueCharOrJoinControlOrSurrogateFSharpFunc(AsciiCharOptions)); } } // The XID_START/XID_CONTINUE property data is stored in two multiple-stage lookup tables: // the BMP codepoints (0 - 0xFFFF) are stored in a two-stage table and the SMP codepoints (0x10000 - 0x10FFFF) // are stored in a three-stage table. // // Each two-stage table consists of an integer index arrays and one bit array. // Each three-stage table consists of two integer index arrays and one bit array. // // The first stage array is divided into multiple parts: one for XID_START, one for XID_CONTINUE // and -- only for the BMP table -- one in which in addition to the XID_CONTINUE chars the two // JOIN_CONTROL chars "zero-width non-joiner" (ZWNJ, '\u200C') and "zero-width joiner" // (ZWJ, '\u200D') are marked. // All codepoints in the BMP reserved for surrogates are marked as XID_START and XID_CONTINUE. // // The bits in the last stage array are stored in 32-bit words, where each 32-bit word // is stored in the platform byte order. // // To determine whether a codepoint has a property in a three-stage table, // three indices are computed: // idx1 = the (log_2 table1Length) most significant bits of the codepoint // idx2 = table1(START|CONTINUE|CONTINUE_OR_JOIN_CONTROL)[idx]*table2BlockLength // + the following (log_2 table2BlockLength) bits of the codepoint // idx3 = table2[idx2]*table3BlockLength + the least significant (log_2 table3BlockLength) bits of the codepoint // If the bit in table3 at the bit index idx3 is set, the codepoint has the property, otherwise not. public static bool IsXIdStartOrSurrogate(char bmpCodePoint) { // should get inlined return (IsXIdStartOrSurrogate_(bmpCodePoint) & 1u) != 0; } private static uint IsXIdStartOrSurrogate_(char bmpCodePoint) { int cp = bmpCodePoint; int idx1 = cp >> XIdBmpTable2Log2BitBlockLength; const int f2 = 1 << (XIdBmpTable2Log2BitBlockLength - 5); const int m2 = f2 - 1; int idx2 = XIdStartBmpTable1[idx1]*f2 + ((cp >> 5) & m2); return XIdBmpTable2[idx2] >> (cp /* & 0x1fu */); // C#'s operator>> masks with 0x1fu, no matter whether we do too } public static bool IsXIdContinueOrSurrogate(char bmpCodePoint) { // should get inlined return (IsXIdContinueOrSurrogate_(bmpCodePoint) & 1u) != 0u; } private static uint IsXIdContinueOrSurrogate_(char bmpCodePoint) { int cp = bmpCodePoint; int idx1 = cp >> XIdBmpTable2Log2BitBlockLength; const int f2 = 1 << (XIdBmpTable2Log2BitBlockLength - 5); const int m2 = f2 - 1; int idx2 = XIdContinueBmpTable1[idx1]*f2 + ((cp >> 5) & m2); return XIdBmpTable2[idx2] >> (cp /* & 0x1fu */); // C#'s operator>> masks with 0x1fu, no matter whether we do too } public static bool IsXIdContinueOrJoinControlOrSurrogate(char bmpCodePoint) { // should get inlined return (IsXIdContinueOrJoinControlOrSurrogate_(bmpCodePoint) & 1u) != 0u; } private static uint IsXIdContinueOrJoinControlOrSurrogate_(char bmpCodePoint) { int cp = bmpCodePoint; int idx1 = cp >> XIdBmpTable2Log2BitBlockLength; const int f2 = 1 << (XIdBmpTable2Log2BitBlockLength - 5); const int m2 = f2 - 1; int idx2 = XIdContinueOrJoinerBmpTable1[idx1]*f2 + ((cp >> 5) & m2); return XIdBmpTable2[idx2] >> (cp /* & 0x1fu */); // C#'s operator>> masks with 0x1fu, no matter whether we do too } public static bool IsXIdStartSmp(int smpCodePointMinus0x10000) { // should get inlined return (IsXIdStartSmp_(smpCodePointMinus0x10000) & 1u) != 0; } private static uint IsXIdStartSmp_(int smpCodePointMinus0x10000) { int cp = smpCodePointMinus0x10000; int idx1 = cp >> (XIdSmpTable2Log2BlockLength + XIdSmpTable3Log2BlockLength); const int f2 = 1 << XIdSmpTable2Log2BlockLength, f3 = 1 << (XIdSmpTable3Log2BlockLength - 5); const int m2 = f2 - 1, m3 = f3 - 1; #if !LOW_TRUST if ((idx1 & (0xffffffffu << XIdSmpTable1Log2Length)) != 0) throw new IndexOutOfRangeException(); #endif int idx2 = XIdStartSmpTable1[idx1]*f2 + ((cp >> XIdSmpTable3Log2BlockLength) & m2); int idx3 = XIdSmpTable2[idx2]*f3 + ((cp >> 5) & m3); return XIdSmpTable3[idx3] >> (cp /* & 0x1fu */); // C#'s operator>> masks with 0x1fu, no matter whether we do too } public static bool IsXIdContinueSmp(int smpCodePointMinus0x10000) { // should get inlined return (IsXIdContinueSmp_(smpCodePointMinus0x10000) & 1u) != 0; } private static uint IsXIdContinueSmp_(int smpCodePointMinus0x10000) { int cp = smpCodePointMinus0x10000; int idx1 = cp >> (XIdSmpTable2Log2BlockLength + XIdSmpTable3Log2BlockLength); const int f2 = 1 << XIdSmpTable2Log2BlockLength, f3 = 1 << (XIdSmpTable3Log2BlockLength - 5); const int m2 = f2 - 1, m3 = f3 - 1; #if !LOW_TRUST if ((idx1 & (0xffffffffu << XIdSmpTable1Log2Length)) != 0) throw new IndexOutOfRangeException(); #endif int idx2 = XIdContinueSmpTable1[idx1]*f2 + ((cp >> XIdSmpTable3Log2BlockLength) & m2); int idx3 = XIdSmpTable2[idx2]*f3 + ((cp >> 5) & m3); return XIdSmpTable3[idx3] >> (cp /* & 0x1fu */); // C#'s operator>> masks with 0x1fu, no matter whether we do too } // tables for Unicode 8.0.0 private const int XIdStartBmpTable1Offset = 0; private const int XIdContinueBmpTable1Offset = 256; private const int XIdContinueOrJoinerBmpTable1Offset = 512; private const int XIdBmpTable1Size = 256; private const int XIdBmpTable1Log2Length = 8; private const int XIdBmpTable2Offset = 768; private const int XIdBmpTable2Size = 2816; private const int XIdBmpTable2Log2BitBlockLength = 8; private const int XIdStartSmpTable1Offset = 3584; private const int XIdContinueSmpTable1Offset = 3840; private const int XIdSmpTable1Size = 256; private const int XIdSmpTable1Log2Length = 8; private const int XIdSmpTable2Offset = 4096; private const int XIdSmpTable2Size = 704; private const int XIdSmpTable2Log2BlockLength = 5; private const int XIdSmpTable3Offset = 4800; private const int XIdSmpTable3Size = 1504; private const int XIdSmpTable3Log2BlockLength = 7; private static ReadOnlySpan DataArray => new byte[] { 0,2,3,4,6,8,10,12,14,16,18,20,22,24,26,28,30,2,32,33, 35,2,36,37,39,41,43,45,47,49,2,51,52,55,56,56,56,56,56,56, 56,56,56,56,57,59,56,56,61,63,56,56,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,64,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,65, 2,2,2,2,66,2,67,69,70,72,74,76,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,78,2,2,2,2, 2,2,2,2,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56, 56,56,56,56,56,56,56,56,56,2,79,80,82,83,84,86,1,2,3,5, 7,9,11,13,15,17,19,21,23,25,27,29,31,2,32,34,35,2,36,38, 40,42,44,46,48,50,2,51,53,55,56,56,56,56,56,56,56,56,56,56, 58,60,56,56,62,63,56,56,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,64,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,65,2,2,2,2, 66,2,68,69,71,73,75,77,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,78,2,2,2,2,2,2,2,2, 56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56, 56,56,56,56,56,2,79,81,82,83,85,87,1,2,3,5,7,9,11,13, 15,17,19,21,23,25,27,29,31,2,32,34,35,2,36,38,40,42,44,46, 48,50,2,51,54,55,56,56,56,56,56,56,56,56,56,56,58,60,56,56, 62,63,56,56,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,64,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,65,2,2,2,2,66,2,68,69, 71,73,75,77,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,78,2,2,2,2,2,2,2,2,56,56,56,56, 56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56, 56,2,79,81,82,83,85,87,0,0,0,0,0,0,0,0,254,255,255,7, 254,255,255,7,0,0,0,0,0,4,32,4,255,255,127,255,255,255,127,255, 0,0,0,0,0,0,255,3,254,255,255,135,254,255,255,7,0,0,0,0, 0,4,160,4,255,255,127,255,255,255,127,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,195,255,3,0,31,80,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,223,184,64,215,255,255,251,255,255,255, 255,255,255,255,255,255,191,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,223,184,192,215,255,255,251,255,255,255,255,255,255,255,255,255,191,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,3,252,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,251,252,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,254,255,255,255,127,2,254,255,255,255, 255,0,0,0,0,0,0,0,0,0,255,255,255,7,7,0,255,255,255,255, 255,255,254,255,255,255,127,2,254,255,255,255,255,0,254,255,255,255,255,191, 182,0,255,255,255,7,7,0,0,0,0,0,255,255,255,255,255,7,0,0, 0,192,254,255,255,255,255,255,255,255,255,255,255,255,47,0,96,192,0,156, 0,0,255,7,255,255,255,255,255,255,255,255,255,195,255,255,255,255,255,255, 255,255,255,255,255,255,239,159,255,253,255,159,0,0,253,255,255,255,0,0, 0,224,255,255,255,255,255,255,255,255,255,255,63,0,2,0,0,252,255,255, 255,7,48,4,0,0,255,255,255,255,255,255,255,231,255,255,255,255,255,255, 255,255,255,255,255,255,3,0,255,255,255,255,255,255,63,4,255,255,63,4, 16,1,0,0,255,255,255,1,0,0,0,0,0,0,0,0,255,255,31,0, 0,0,0,0,0,0,0,0,255,255,255,255,255,63,0,0,255,255,255,15, 0,0,0,0,0,0,0,0,255,255,31,0,0,0,0,0,248,255,255,255, 240,255,255,255,255,255,255,35,0,0,1,255,3,0,254,255,225,159,249,255, 255,253,197,35,0,64,0,176,3,0,3,0,255,255,255,255,255,255,255,255, 255,255,255,255,207,255,254,255,239,159,249,255,255,253,197,243,159,121,128,176, 207,255,3,0,224,135,249,255,255,253,109,3,0,0,0,94,0,0,28,0, 224,191,251,255,255,253,237,35,0,0,1,0,3,0,0,2,238,135,249,255, 255,253,109,211,135,57,2,94,192,255,63,0,238,191,251,255,255,253,237,243, 191,59,1,0,207,255,0,2,224,159,249,255,255,253,237,35,0,0,0,176, 3,0,2,0,232,199,61,214,24,199,255,3,0,0,1,0,0,0,0,0, 238,159,249,255,255,253,237,243,159,57,192,176,207,255,2,0,236,199,61,214, 24,199,255,195,199,61,129,0,192,255,0,0,224,223,253,255,255,253,255,35, 0,0,0,7,3,0,0,0,224,223,253,255,255,253,239,35,0,0,0,64, 3,0,6,0,239,223,253,255,255,253,255,227,223,61,96,7,207,255,0,0, 238,223,253,255,255,253,239,243,223,61,96,64,207,255,6,0,224,223,253,255, 255,255,255,39,0,64,0,128,3,0,0,252,224,255,127,252,255,255,251,47, 127,0,0,0,0,0,0,0,238,223,253,255,255,255,255,231,223,125,128,128, 207,255,0,252,236,255,127,252,255,255,251,47,127,132,95,255,192,255,12,0, 254,255,255,255,255,255,5,0,127,0,0,0,0,0,0,0,150,37,240,254, 174,236,5,32,95,0,0,240,0,0,0,0,254,255,255,255,255,255,255,7, 255,127,255,3,0,0,0,0,150,37,240,254,174,236,255,59,95,63,255,243, 0,0,0,0,1,0,0,0,0,0,0,0,255,254,255,255,255,31,0,0, 0,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,3, 255,3,160,194,255,254,255,255,255,31,254,255,223,255,255,254,255,255,255,31, 64,0,0,0,0,0,0,0,255,255,255,255,255,7,0,128,0,0,63,60, 98,192,225,255,3,64,0,0,255,255,255,255,191,32,255,255,255,255,255,247, 255,255,255,255,255,255,255,255,255,3,255,255,255,255,255,255,255,255,255,63, 255,255,255,255,191,32,255,255,255,255,255,247,255,255,255,255,255,255,255,255, 255,61,127,61,255,255,255,255,255,61,255,255,255,255,61,127,61,255,127,255, 255,255,255,255,255,255,61,255,255,255,255,255,255,255,255,7,0,0,0,0, 255,255,0,0,255,255,255,255,255,255,255,255,255,255,63,63,255,255,61,255, 255,255,255,255,255,255,255,231,0,254,3,0,255,255,0,0,255,255,255,255, 255,255,255,255,255,255,63,63,254,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,159,255,255,254,255,255,7, 255,255,255,255,255,255,255,255,255,199,255,1,255,223,3,0,255,255,3,0, 255,255,3,0,255,223,1,0,255,255,255,255,255,255,15,0,0,0,128,16, 0,0,0,0,255,223,31,0,255,255,31,0,255,255,15,0,255,223,13,0, 255,255,255,255,255,255,255,255,255,255,143,48,255,3,0,0,0,0,0,0, 255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,5,255,255, 255,255,255,255,255,255,63,0,0,56,255,3,255,255,255,255,255,255,255,255, 255,255,255,0,255,255,255,255,255,7,255,255,255,255,255,255,255,255,63,0, 255,255,255,127,0,0,0,0,0,0,255,255,255,63,31,0,255,255,255,255, 255,15,255,255,255,3,0,0,0,0,0,0,255,255,255,127,255,15,255,15, 192,255,255,255,255,63,31,0,255,255,255,255,255,15,255,255,255,3,255,7, 0,0,0,0,255,255,127,0,255,255,255,255,255,255,31,0,0,0,0,0, 0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,255,255,255,15, 255,255,255,255,255,255,255,127,255,255,255,159,255,3,255,3,128,0,255,63, 0,0,0,0,0,0,0,0,224,255,255,255,255,255,15,0,224,15,0,0, 0,0,0,0,248,255,255,255,1,192,0,252,255,255,255,255,63,0,0,0, 255,255,255,255,255,255,255,255,255,15,255,3,0,248,15,0,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,15,0,255,255,255,255,15,0,0,0, 0,224,0,252,255,255,255,63,0,0,0,0,0,0,0,0,0,0,0,0, 0,222,99,0,255,255,255,255,255,255,255,0,255,227,255,255,255,255,255,63, 0,0,0,0,0,0,0,0,0,0,247,255,255,255,127,3,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,63,240, 255,255,63,63,255,255,255,255,63,63,255,170,255,255,255,63,255,255,255,255, 255,255,223,95,220,31,207,15,255,31,220,31,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,2,128,0,0,255,31,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,128,1,0,16,0,0,0,2,128, 0,0,255,31,0,0,0,0,0,0,255,31,226,255,1,0,0,48,0,0, 0,0,0,128,1,0,16,0,0,0,2,128,0,0,255,31,0,0,0,0, 0,0,255,31,226,255,1,0,132,252,47,63,80,253,255,243,224,67,0,0, 255,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,127,255,255, 255,255,255,127,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 31,120,12,0,255,255,255,255,255,127,255,255,255,255,255,127,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,31,248,15,0,255,255,255,255, 191,32,255,255,255,255,255,255,255,128,0,0,255,255,127,0,127,127,127,127, 127,127,127,127,0,0,0,0,255,255,255,255,191,32,255,255,255,255,255,255, 255,128,0,128,255,255,127,0,127,127,127,127,127,127,127,127,255,255,255,255, 224,0,0,0,254,3,62,31,254,255,255,255,255,255,255,255,255,255,127,224, 254,255,255,255,255,255,255,255,255,255,255,247,224,0,0,0,254,255,62,31, 254,255,255,255,255,255,255,255,255,255,127,230,254,255,255,255,255,255,255,255, 255,255,255,247,224,255,255,255,255,63,254,255,255,255,255,255,255,255,255,255, 255,127,0,0,255,255,255,7,0,0,0,0,0,0,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,63,0, 0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,63,0,0,0,0,0, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,31,0,0, 0,0,0,0,0,0,255,255,255,255,255,63,255,31,255,255,0,12,0,0, 255,255,255,255,255,127,0,128,255,255,255,63,255,255,255,255,255,255,255,255, 255,255,0,0,255,31,255,255,255,15,0,0,255,255,255,255,255,255,240,191, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,3,0,0,0,128,255, 252,255,255,255,255,255,255,255,255,255,255,255,255,249,255,255,255,63,255,0, 0,0,0,0,0,0,128,255,187,247,255,255,7,0,0,0,255,255,255,255, 255,255,15,0,252,255,255,255,255,255,15,0,0,0,0,0,0,0,252,40, 255,255,255,255,255,0,0,0,255,255,255,255,255,255,15,0,255,255,255,255, 255,255,255,255,31,0,255,3,255,255,255,40,0,252,255,255,63,0,255,255, 127,0,0,0,255,255,255,31,240,255,255,255,255,255,7,0,0,128,0,0, 223,255,0,124,255,255,255,255,255,63,255,255,255,255,15,0,255,255,255,31, 255,255,255,255,255,255,255,255,1,128,255,3,255,255,255,127,255,255,255,255, 255,1,0,0,247,15,0,0,255,255,127,196,255,255,255,255,255,255,98,62, 5,0,0,56,255,7,28,0,255,255,255,255,255,255,127,0,255,63,255,3, 255,255,127,252,255,255,255,255,255,255,255,255,7,0,0,56,255,255,124,0, 126,126,126,0,127,127,255,255,255,255,255,247,63,0,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,7,0,0,0,126,126,126,0,127,127,255,255, 255,255,255,247,63,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,55,255,3,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,15,0,255,255,127,248,255,255,255,255,255,15,255,255,255,255, 255,255,255,255,255,255,255,255,255,63,255,255,255,255,255,255,255,255,255,255, 255,255,255,3,0,0,0,0,127,0,248,160,255,253,127,95,219,255,255,255, 255,255,255,255,255,255,255,255,255,255,3,0,0,0,248,255,255,255,255,255, 127,0,248,224,255,253,127,95,219,255,255,255,255,255,255,255,255,255,255,255, 255,255,3,0,0,0,248,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,63,240,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,63,0,0,255,255,255,255,255,255, 255,255,252,255,255,255,255,255,255,0,0,0,0,0,255,3,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,138,170,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,31,255,255,0,0,255,255,24,0,0,224,0,0, 0,0,138,170,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,31, 0,0,0,0,254,255,255,7,254,255,255,7,192,255,255,255,255,255,255,63, 255,255,255,127,252,252,252,28,0,0,0,0,0,0,255,3,254,255,255,135, 254,255,255,7,192,255,255,255,255,255,255,255,255,255,255,127,252,252,252,28, 0,0,0,0, 0,2,4,5,6,7,8,7,7,7,7,10,7,12,14,7,16,16,16,16, 16,16,16,16,16,16,17,18,19,7,7,20,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,1,3,4,5, 6,7,9,7,7,7,7,11,7,13,15,7,16,16,16,16,16,16,16,16, 16,16,17,18,19,7,7,20,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,21,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,0,1,2,3,3,5,7,9, 10,11,13,3,10,10,14,3,15,16,17,18,19,21,23,24,25,26,3,3, 3,3,3,3,0,1,2,4,3,6,8,9,10,12,13,3,10,10,14,3, 15,16,17,18,20,22,23,24,25,26,3,3,3,3,3,3,27,29,31,33, 35,37,39,3,3,41,3,43,45,47,49,3,3,51,3,3,3,53,3,3, 3,3,3,3,3,3,3,3,28,30,32,34,36,38,40,3,3,42,3,44, 46,48,50,3,3,52,3,3,3,53,3,3,3,3,3,3,3,3,3,3, 10,10,10,10,10,10,10,49,54,10,55,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,10,10,10,10,10,10,10,10, 56,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,10,10,10,10,57,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,10,10,10,10,58,60,62,64,3,3,3,3,3,3,65,67, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,10,10,10,10, 59,61,63,64,3,3,3,3,3,3,66,68,69,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,70,71,3,3, 3,3,3,3,69,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,70,72,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,76,77,78,10,10,79,80,81,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,73,74,75,3,3,3,76,77,78,10, 10,79,80,82,3,3,3,3,83,84,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,10,85,3,3, 3,3,3,3,3,3,3,3,87,88,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,10,86,3,3,3,3,3,3,3,3,3,3, 87,88,3,3,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, 10,10,10,10,10,10,10,10,10,89,10,10,10,10,10,10,10,10,10,10, 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, 10,10,90,10,91,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, 10,10,10,10,10,10,10,10,10,92,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,10,10,10,10,11,3,3,3,3,3,3,3, 3,3,3,3,3,3,10,93,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,255,239,255,255, 127,255,255,183,255,63,255,63,0,0,0,0,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,7,0,0,0,0,0,0,0,0,255,255,255,255, 255,255,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,255,255,255,31, 255,255,255,255,255,255,1,0,0,0,0,0,255,255,255,31,255,255,255,255, 255,255,1,0,1,0,0,0,255,255,255,255,0,0,255,255,255,7,255,255, 255,255,63,0,255,255,255,255,0,0,255,255,255,7,255,255,255,255,255,7, 255,255,255,63,255,255,255,255,15,255,62,0,0,0,0,0,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,63,0,0,0,0, 0,0,0,0,0,0,0,0,255,255,255,63,255,3,0,0,0,0,0,0, 0,0,0,0,255,255,255,255,255,0,255,255,255,255,255,255,15,0,0,0, 255,255,255,255,255,255,127,0,255,255,63,0,255,0,0,0,63,253,255,255, 255,255,191,145,255,255,63,0,255,255,127,0,255,255,255,127,0,0,0,0, 0,0,0,0,255,255,55,0,255,255,63,0,255,255,255,3,0,0,0,0, 0,0,0,0,255,255,255,255,255,255,255,192,0,0,0,0,0,0,0,0, 1,0,239,254,255,255,15,0,0,0,0,0,255,255,255,31,111,240,239,254, 255,255,15,135,0,0,0,0,255,255,255,31,255,255,255,31,0,0,0,0, 255,254,255,255,31,0,0,0,255,255,255,31,0,0,0,0,255,254,255,255, 127,0,0,0,255,255,255,255,255,255,63,0,255,255,63,0,255,255,7,0, 255,255,3,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255, 255,255,255,255,255,1,0,0,0,0,0,0,255,255,255,255,255,255,7,0, 255,255,255,255,255,255,7,0,248,255,255,255,255,255,255,0,0,0,0,0, 0,0,0,0,255,255,255,255,255,255,255,255,127,0,0,0,192,255,0,128, 248,255,255,255,255,255,0,0,0,0,255,255,255,1,0,0,255,255,255,255, 255,255,255,7,0,0,255,255,255,1,255,3,248,255,255,255,127,0,0,0, 0,0,255,255,255,255,71,0,255,255,255,255,255,255,223,255,0,0,255,255, 255,255,79,0,248,255,255,255,255,255,7,0,30,0,0,20,0,0,0,0, 255,255,255,255,255,255,255,255,31,28,255,23,0,0,0,0,255,255,251,255, 255,15,0,0,0,0,0,0,0,0,0,0,255,255,251,255,255,255,255,0, 0,0,0,0,0,0,0,0,127,189,255,191,255,1,255,255,255,255,255,127, 0,0,0,0,127,189,255,191,255,1,255,255,255,255,255,255,255,7,255,3, 224,159,249,255,255,253,237,35,0,0,1,224,3,0,0,0,239,159,249,255, 255,253,237,243,159,57,129,224,207,31,31,0,255,255,255,255,255,255,0,0, 176,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,191,0,255,3, 0,0,0,0,255,255,255,255,255,127,0,0,0,0,0,15,0,0,0,0, 255,255,255,255,255,255,63,255,1,0,0,63,0,0,0,0,255,255,255,255, 255,255,0,0,16,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255, 17,0,255,3,0,0,0,0,255,255,255,255,255,7,0,0,0,0,0,0, 0,0,0,0,255,255,255,255,255,255,255,0,255,3,0,0,0,0,0,0, 255,255,255,3,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,227, 255,15,255,3,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255, 255,255,255,255,0,0,0,128,0,0,0,0,255,255,255,255,255,255,255,255, 255,3,0,128,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,1, 255,255,255,255,255,255,255,255,255,255,255,255,255,127,0,0,255,255,255,255, 255,255,255,255,15,0,0,0,0,0,0,0,255,255,255,255,255,127,0,0, 0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,127,0,0,0, 0,0,0,0,255,255,255,255,255,255,255,1,255,255,255,127,0,0,0,0, 255,255,255,255,255,255,255,1,255,255,255,127,255,3,0,0,0,0,0,0, 0,0,0,0,0,0,255,255,255,63,0,0,0,0,0,0,0,0,0,0, 0,0,255,255,255,63,31,0,255,255,255,255,255,255,0,0,15,0,0,0, 248,255,255,224,255,255,255,255,255,255,127,0,15,0,255,3,248,255,255,224, 255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255, 255,255,255,255,31,0,1,0,0,0,0,0,255,255,255,255,255,255,255,255, 31,0,255,255,255,255,255,127,0,0,248,255,0,0,0,0,0,0,0,0, 0,0,0,0,0,128,255,255,0,0,0,0,0,0,0,0,0,0,0,0, 3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255, 255,255,255,255,255,255,255,255,255,7,255,31,255,1,255,3,0,0,0,0, 0,0,0,0,0,0,0,0,255,1,255,99,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,224,227,7,248, 231,15,0,0,0,60,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,28,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255, 255,255,223,255,255,255,255,255,255,255,255,223,100,222,255,235,239,255,255,255, 255,255,255,255,191,231,223,223,255,255,255,123,95,252,253,255,255,255,255,255, 255,255,255,255,63,255,255,255,253,255,255,247,255,255,255,247,255,255,223,255, 255,255,223,255,255,127,255,255,255,127,255,255,255,253,255,255,255,253,255,255, 247,15,0,0,0,0,0,0,255,253,255,255,255,253,255,255,247,207,255,255, 255,255,255,255,255,255,255,255,255,255,127,248,255,255,255,255,255,31,32,0, 16,0,0,248,254,255,0,0,0,0,0,0,0,0,0,0,255,255,255,255, 255,255,255,255,31,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255, 31,0,127,0,0,0,0,0,239,255,255,255,150,254,247,10,132,234,150,170, 150,247,247,94,255,251,255,15,238,251,255,15,0,0,0,0,0,0,0,0, 255,255,255,255,255,255,255,255,255,255,127,0,0,0,0,0,255,255,255,255, 255,255,31,0,255,255,255,255,255,255,255,255,255,255,255,63,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,3,0,0,0,0,0,0,0, 0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0, }; #if LOW_TRUST private static ReadOnlySpan XIdStartBmpTable1 => DataArray.Slice(XIdStartBmpTable1Offset, XIdBmpTable1Size); private static ReadOnlySpan XIdContinueBmpTable1 => DataArray.Slice(XIdContinueBmpTable1Offset, XIdBmpTable1Size); private static ReadOnlySpan XIdContinueOrJoinerBmpTable1 => DataArray.Slice(XIdContinueOrJoinerBmpTable1Offset, XIdBmpTable1Size); private static readonly uint[] XIdBmpTable2 = Buffer.CopyUIntsStoredInLittleEndianByteArray(DataArray, XIdBmpTable2Offset, XIdBmpTable2Size); private static ReadOnlySpan XIdStartSmpTable1 => DataArray.Slice(XIdStartSmpTable1Offset, XIdSmpTable1Size); private static ReadOnlySpan XIdContinueSmpTable1 => DataArray.Slice(XIdContinueSmpTable1Offset, XIdSmpTable1Size); private static ReadOnlySpan XIdSmpTable2 => DataArray.Slice(XIdSmpTable2Offset, XIdSmpTable2Size); private static readonly uint[] XIdSmpTable3 = Buffer.CopyUIntsStoredInLittleEndianByteArray(DataArray, XIdSmpTable3Offset, XIdSmpTable3Size); #else private static byte* Data => (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(DataArray)); private static byte* XIdStartBmpTable1 => Data + XIdStartBmpTable1Offset; private static byte* XIdContinueBmpTable1 => Data + XIdContinueBmpTable1Offset; private static byte* XIdContinueOrJoinerBmpTable1 => Data + XIdContinueOrJoinerBmpTable1Offset; private static readonly uint* XIdBmpTable2 = Buffer.LoadLittleEndianUInt32Data(Data, XIdBmpTable2Offset, XIdBmpTable2Size); private static byte* XIdStartSmpTable1 => Data + XIdStartSmpTable1Offset; private static byte* XIdContinueSmpTable1 => Data + XIdContinueSmpTable1Offset; private static byte* XIdSmpTable2 => Data + XIdSmpTable2Offset; private static readonly uint* XIdSmpTable3 = Buffer.LoadLittleEndianUInt32Data(Data, XIdSmpTable3Offset, XIdSmpTable3Size); #endif } } ================================================ FILE: FParsecCS/ManyChars.cs ================================================ // Copyright (c) Stephan Tolksdorf 2008-2010 // License: Simplified BSD License. See accompanying documentation. using System; using System.Text; using Microsoft.FSharp.Core; namespace FParsec { #if !LOW_TRUST internal unsafe struct _16CharBuffer { public UInt64 UInt64_0; public UInt64 UInt64_1; public UInt64 UInt64_2; public UInt64 UInt64_3; } #endif internal class Many1Chars : FSharpFunc, Reply> { protected FSharpFunc, Reply> CharParser1; protected FSharpFunc, Reply> CharParser; public Many1Chars(FSharpFunc, Reply> charParser1, FSharpFunc, Reply> charParser) { CharParser1 = charParser1; CharParser = charParser; } public override Reply Invoke(CharStream stream) { var reply = CharParser1.Invoke(stream); if (reply.Status == ReplyStatus.Ok) return ParseRestOfString(stream, reply.Result, reply.Error); else return new Reply{Status = reply.Status, Error = reply.Error}; } #if !LOW_TRUST unsafe #endif protected Reply ParseRestOfString(CharStream stream, char firstChar, ErrorMessageList error) { #if LOW_TRUST var sb = new StringBuilder(16); sb.Append(firstChar); #else _16CharBuffer buffer_; // produces more efficient code on .NET than stackalloc char[16] char* buffer = (char*)(&buffer_); buffer[0] = firstChar; char[] chars = null; uint n = 1; #endif for (;;) { var tag = stream.StateTag; var reply = CharParser.Invoke(stream); if (reply.Status == ReplyStatus.Ok) { if (tag == stream.StateTag) throw Internal.ParserCombinatorInInfiniteLoopHelper.CreateException("manyChars", stream); error = reply.Error; #if LOW_TRUST sb.Append(reply.Result); #else var i = n%16; if (i != 0) { buffer[i] = reply.Result; ++n; } else { if (chars == null) chars = new char[32]; else if (n == chars.Length) { var newChars = new char[2*chars.Length]; Array.Copy(chars, newChars, chars.Length); chars = newChars; } for (i = 0; i < 16; ++i) chars[n - 16 + i] = buffer[i]; buffer[0] = reply.Result; ++n; } #endif } else if (reply.Status == ReplyStatus.Error && tag == stream.StateTag) { string str; #if LOW_TRUST str = sb.ToString(); #else if (n <= 16) str = new String(buffer, 0, (int)n); else { for (uint i = (n - 1) & 0x7ffffff0u; i < n; ++i) chars[i] = buffer[i%16]; str = new string(chars, 0, (int)n); } #endif error = ErrorMessageList.Merge(error, reply.Error); return new Reply{Status = ReplyStatus.Ok, Result = str, Error = error}; } else { error = tag == stream.StateTag ? ErrorMessageList.Merge(error, reply.Error) : reply.Error; return new Reply{Status = reply.Status, Error = error}; } } } public FSharpFunc, Reply> AsFSharpFunc { get { return this; } } } internal class ManyChars : Many1Chars { public ManyChars(FSharpFunc, Reply> charParser1, FSharpFunc, Reply> charParser) : base(charParser1, charParser) { } public override Reply Invoke(CharStream stream) { var tag = stream.StateTag; var reply = CharParser1.Invoke(stream); if (reply.Status == ReplyStatus.Ok) return ParseRestOfString(stream, reply.Result, reply.Error); else if (reply.Status == ReplyStatus.Error && tag == stream.StateTag) return new Reply{Status = ReplyStatus.Ok, Result = "", Error = reply.Error}; else return new Reply{Status = reply.Status, Error = reply.Error}; } } internal class Many1CharsTill : FSharpFunc, Reply> { protected FSharpFunc, Reply> CharParser1; protected FSharpFunc, Reply> CharParser; protected FSharpFunc, Reply> EndParser; protected OptimizedClosures.FSharpFunc Mapping; public Many1CharsTill(FSharpFunc, Reply> charParser1, FSharpFunc, Reply> charParser, FSharpFunc, Reply> endParser, FSharpFunc> mapping) { CharParser1 = charParser1; CharParser = charParser; EndParser = endParser; Mapping = (OptimizedClosures.FSharpFunc)(object)OptimizedClosures.FSharpFunc.Adapt(mapping); } public override Reply Invoke(CharStream stream) { var reply = CharParser1.Invoke(stream); if (reply.Status == ReplyStatus.Ok) return ParseRestOfString(stream, reply.Result, reply.Error); else return new Reply{Status = reply.Status, Error = reply.Error}; } #if !LOW_TRUST unsafe #endif protected Reply ParseRestOfString(CharStream stream, char firstChar, ErrorMessageList error) { #if LOW_TRUST var sb = new StringBuilder(16); sb.Append(firstChar); #else _16CharBuffer buffer_; // produces more efficient code than stackalloc char[16] char* buffer = (char*)(&buffer_); buffer[0] = firstChar; char[] chars = null; uint n = 1; #endif for (;;) { var tag = stream.StateTag; var eReply = EndParser.Invoke(stream); if (eReply.Status == ReplyStatus.Error && tag == stream.StateTag) { var reply = CharParser.Invoke(stream); if (reply.Status == ReplyStatus.Ok) { if (tag == stream.StateTag) throw Internal.ParserCombinatorInInfiniteLoopHelper.CreateException("manyCharsTill", stream); error = reply.Error; #if LOW_TRUST sb.Append(reply.Result); #else var i = n%16; if (i != 0) { buffer[i] = reply.Result; ++n; } else { if (chars == null) chars = new char[32]; else if (n == chars.Length) { var newChars = new char[2*chars.Length]; Array.Copy(chars, newChars, chars.Length); chars = newChars; } for (i = 0; i < 16; ++i) chars[n - 16 + i] = buffer[i]; buffer[0] = reply.Result; ++n; } #endif } else { error = tag == stream.StateTag ? ErrorMessageList.Merge(ErrorMessageList.Merge(error, eReply.Error), reply.Error) : reply.Error; return new Reply{Status = reply.Status, Error = error}; } } else if (eReply.Status == ReplyStatus.Ok) { string str; #if LOW_TRUST str = sb.ToString(); #else if (n <= 16) str = new String(buffer, 0, (int)n); else { for (uint i = (n - 1) & 0x7ffffff0; i < n; ++i) chars[i] = buffer[i%16]; str = new string(chars, 0, (int)n); } #endif var result = Mapping.Invoke(str, eReply.Result); error = tag == stream.StateTag ? ErrorMessageList.Merge(error, eReply.Error) : eReply.Error; return new Reply{Status = ReplyStatus.Ok, Result = result, Error = error}; } else { error = tag == stream.StateTag ? ErrorMessageList.Merge(error, eReply.Error) : eReply.Error; return new Reply{Status = eReply.Status, Error = error}; } } } public FSharpFunc, Reply> AsFSharpFunc { get { return this; } } } internal class ManyCharsTill : Many1CharsTill { public ManyCharsTill(FSharpFunc, Reply> charParser1, FSharpFunc, Reply> charParser, FSharpFunc, Reply> endParser, FSharpFunc> mapping) : base(charParser1, charParser, endParser, mapping) { } public override Reply Invoke(CharStream stream) { var tag = stream.StateTag; var eReply = EndParser.Invoke(stream); if (eReply.Status == ReplyStatus.Error && tag == stream.StateTag) { var reply = CharParser1.Invoke(stream); if (reply.Status == ReplyStatus.Ok) { return ParseRestOfString(stream, reply.Result, reply.Error); } else { var error = tag == stream.StateTag ? ErrorMessageList.Merge(eReply.Error, reply.Error) : reply.Error; return new Reply{Status = reply.Status, Error = error}; } } else if (eReply.Status == ReplyStatus.Ok) { var result = Mapping.Invoke("", eReply.Result); return new Reply{Status = ReplyStatus.Ok, Result = result, Error = eReply.Error}; } else { return new Reply{Status = eReply.Status, Error = eReply.Error}; } } } } ================================================ FILE: FParsecCS/OperatorPrecedenceParser.cs ================================================ // Copyright (c) Stephan Tolksdorf 2008-2011 // License: Simplified BSD License. See accompanying documentation. using System; using Microsoft.FSharp.Core; using System.Diagnostics; using System.Collections.Generic; namespace FParsec { public enum Associativity { None = 0, Left = 1, Right = 2 } public enum OperatorType { Infix = 0, Prefix = 1, Postfix = 2 } public class Operator { public OperatorType Type { get; private set; } public string String { get; protected set; } internal FSharpFunc, Reply> AfterStringParser { get; private set; } public string TernaryRightString { get; protected set; } internal FSharpFunc, Reply> AfterTernaryRightStringParser { get; private set; } public bool IsTernary { get { return TernaryRightString != null; } } public int Precedence { get; protected set; } public Associativity Associativity { get; protected set; } public bool IsAssociative { get { return Associativity != Associativity.None; } } internal OptimizedClosures.FSharpFunc Mapping1 { get; private set; } internal OptimizedClosures.FSharpFunc Mapping2 { get; private set; } internal OptimizedClosures.FSharpFunc Mapping3 { get; private set; } private Operator() {} static readonly internal Operator ZeroPrecedenceOperator = new Operator{Type = OperatorType.Prefix}; private Operator(OperatorType type, string operatorString, FSharpFunc, Reply> afterStringParser, int precedence) { Debug.Assert(type >= OperatorType.Infix && type <= OperatorType.Postfix); Type = type; if (string.IsNullOrEmpty(operatorString)) throw new ArgumentException("operatorString", "The operator string must not be empty."); String = operatorString; if (afterStringParser == null) throw new ArgumentNullException("afterStringParser"); AfterStringParser = afterStringParser; if (precedence < 1) throw new ArgumentOutOfRangeException("precedence", "The operator precedence must be greater than 0."); Precedence = precedence; } internal Operator(string operatorString, FSharpFunc, Reply> afterStringParser, int precedence, Associativity associativity, FSharpFunc>> mapping) : this(OperatorType.Infix, operatorString, afterStringParser, precedence) { if (associativity < Associativity.None || associativity > Associativity.Right) throw new ArgumentOutOfRangeException("associativity", "The associativity argument is invalid."); Associativity = associativity; if (mapping == null) throw new ArgumentNullException("mapping"); Mapping2 = OptimizedClosures.FSharpFunc.Adapt(mapping); } internal Operator(OperatorType type, string operatorString, FSharpFunc, Reply> afterStringParser, int precedence, bool isAssociative, FSharpFunc> mapping) : this(type, operatorString, afterStringParser, precedence) { Debug.Assert(type == OperatorType.Prefix || type == OperatorType.Postfix); Associativity = !isAssociative ? Associativity.None : type == OperatorType.Prefix ? Associativity.Right : Associativity.Left; if (mapping == null) throw new ArgumentNullException("mapping"); Mapping1 = OptimizedClosures.FSharpFunc.Adapt(mapping); } internal Operator(string leftString, FSharpFunc, Reply> afterLeftStringParser, string rightString, FSharpFunc, Reply> afterRightStringParser, int precedence, Associativity associativity, FSharpFunc>>>> mapping) { Type = OperatorType.Infix; if (string.IsNullOrEmpty(leftString)) throw new ArgumentException("leftString", "The operator strings must not be empty."); String = leftString; if (afterLeftStringParser == null) throw new ArgumentNullException("afterLeftStringParser"); AfterStringParser = afterLeftStringParser; if (string.IsNullOrEmpty(rightString)) throw new ArgumentException("rightString", "The operator strings must not be empty."); TernaryRightString = rightString; if (afterRightStringParser == null) throw new ArgumentNullException("afterRightStringParser"); AfterTernaryRightStringParser = afterRightStringParser; if (precedence < 1) throw new ArgumentOutOfRangeException("precedence", "The operator precedence must be greater than 0."); Precedence = precedence; if (associativity < Associativity.None || associativity > Associativity.Right) throw new ArgumentOutOfRangeException("associativity", "The associativity argument is invalid."); Associativity = associativity; if (mapping == null) throw new ArgumentNullException("mapping"); Mapping3 = OptimizedClosures.FSharpFunc.Adapt(mapping); } protected class NoAfterStringUnaryMappingAdapter : OptimizedClosures.FSharpFunc { private FSharpFunc Mapping; public NoAfterStringUnaryMappingAdapter(FSharpFunc mapping) { Mapping = mapping; } public override TTerm Invoke(TAfterString afterString, TTerm term) { return Mapping.Invoke(term); } } protected class NoAfterStringBinaryMappingAdapter : OptimizedClosures.FSharpFunc { private OptimizedClosures.FSharpFunc Mapping; public NoAfterStringBinaryMappingAdapter(OptimizedClosures.FSharpFunc mapping) { Mapping = mapping; } public override TTerm Invoke(TAfterString afterString, TTerm leftTerm, TTerm rightTerm) { return Mapping.Invoke(leftTerm, rightTerm); } } protected class NoAfterStringTernaryMappingAdapter : OptimizedClosures.FSharpFunc { private OptimizedClosures.FSharpFunc Mapping; public NoAfterStringTernaryMappingAdapter(OptimizedClosures.FSharpFunc mapping) { Mapping = mapping; } public override TTerm Invoke(TAfterString afterLeftString, TAfterString afterRightString, TTerm leftTerm, TTerm middleTerm, TTerm rightTerm) { return Mapping.Invoke(leftTerm, middleTerm, rightTerm); } } } public sealed class InfixOperator : Operator { public InfixOperator(string operatorString, FSharpFunc, Reply> afterStringParser, int precedence, Associativity associativity, FSharpFunc> mapping) : base(operatorString, afterStringParser, precedence, associativity, mapping == null ? null : new NoAfterStringBinaryMappingAdapter(OptimizedClosures.FSharpFunc.Adapt(mapping))) {} public InfixOperator(string operatorString, FSharpFunc, Reply> afterStringParser, int precedence, Associativity associativity, Unit dummy, // disambiguates overloads in F# FSharpFunc>> mapping) : base(operatorString, afterStringParser, precedence, associativity, mapping) {} } public sealed class PrefixOperator : Operator { public PrefixOperator(string operatorString, FSharpFunc, Reply> afterStringParser, int precedence, bool isAssociative, FSharpFunc mapping) : base(OperatorType.Prefix, operatorString, afterStringParser, precedence, isAssociative, mapping == null ? null : new NoAfterStringUnaryMappingAdapter(mapping)) {} public PrefixOperator(string operatorString, FSharpFunc, Reply> afterStringParser, int precedence, bool isAssociative, Unit dummy, // disambiguates overloads in F# FSharpFunc> mapping) : base(OperatorType.Prefix, operatorString, afterStringParser, precedence, isAssociative, mapping) {} } public sealed class PostfixOperator : Operator { public PostfixOperator(string operatorString, FSharpFunc, Reply> afterStringParser, int precedence, bool isAssociative, FSharpFunc mapping) : base(OperatorType.Postfix, operatorString, afterStringParser, precedence, isAssociative, mapping == null ? null : new NoAfterStringUnaryMappingAdapter(mapping)) {} public PostfixOperator(string operatorString, FSharpFunc, Reply> afterStringParser, int precedence, bool isAssociative, Unit dummy, // disambiguates overloads in F# FSharpFunc> mapping) : base(OperatorType.Postfix, operatorString, afterStringParser, precedence, isAssociative, mapping) {} } public sealed class TernaryOperator : Operator { public TernaryOperator(string leftString, FSharpFunc, Reply> afterLeftStringParser, string rightString, FSharpFunc, Reply> afterRightStringParser, int precedence, Associativity associativity, FSharpFunc>> mapping) : base(leftString, afterLeftStringParser, rightString, afterRightStringParser, precedence, associativity, mapping == null ? null : new NoAfterStringTernaryMappingAdapter(OptimizedClosures.FSharpFunc.Adapt(mapping))) {} public TernaryOperator(string leftString, FSharpFunc, Reply> afterLeftStringParser, string rightString, FSharpFunc, Reply> afterRightStringParser, int precedence, Associativity associativity, Unit dummy, // disambiguates overloads in F# FSharpFunc>>>> mapping) : base(leftString, afterLeftStringParser, rightString, afterRightStringParser, precedence, associativity, mapping) {} } public class OperatorPrecedenceParser : FSharpFunc, Reply> { internal struct OperatorData { // declared as struct, so we can allocate it on the stack internal Operator Operator; internal TAfterString AfterStringValue; internal CharStreamIndexToken IndexToken; internal long Line; internal long LineBegin; } /// The length of LhsOps and RhsOps. Must be a power of 2. internal const int OpsArrayLength = 128; // LhsOps and RhsOps are arrays of operator arrays. LhsOps contains the prefix // operator definitions, RhsOps contains all other operator definitions. // Both have a fixed size of OpsArrayLength (which must be a power of 2). // All operators beginning with the same char modulo OpsArrayLength are // grouped together in the same inner array. The inner arrays are sorted // by the Operator.String property in descending lexical order. // The index of an inner array in the outer array is given by the // inner array's operator strings' first char modulo oppArrayLength. // An empty inner array is represended by null. private readonly Operator[][] LhsOps = new Operator[OpsArrayLength][]; private readonly Operator[][] RhsOps = new Operator[OpsArrayLength][]; // initialized to 0 private int PrefixOpCount; private int InfixOpCount; private int PostfixOpCount; private ErrorMessageList ExpectedInfixOrPostfixOperator; // initialized to null private readonly Dictionary> Reserved = new Dictionary>(); // The following two members aren't static because accessing static members of generic types is rather expensive. /// ParsePrefixOp returns this value to signal that it backtracked and we should try to parse a term. private readonly Operator ErrorOp = Operator.ZeroPrecedenceOperator; /// Can not be readonly because it is passed as as a ref (for performance reasons), but it is never mutated. private OperatorData ZeroPrecedenceOperatorData = new OperatorData{Operator = Operator.ZeroPrecedenceOperator}; public FSharpFunc, Reply> TermParser { get; set; } public FSharpFunc< Tuple, TAfterString>, ErrorMessageList> MissingTernary2ndStringErrorFormatter { get; set; } // C# really needs type abbreviations (or better type inference) private OptimizedClosures.FSharpFunc< Tuple, TAfterString>, Tuple, TAfterString>, ErrorMessageList> _OperatorConflictErrorFormatter; public FSharpFunc< Tuple, TAfterString>, FSharpFunc, TAfterString>, ErrorMessageList>> OperatorConflictErrorFormatter { get { return _OperatorConflictErrorFormatter; } set { _OperatorConflictErrorFormatter = OptimizedClosures.FSharpFunc, TAfterString>,Tuple, TAfterString>, ErrorMessageList> .Adapt(value); } } public OperatorPrecedenceParser() { MissingTernary2ndStringErrorFormatter = new DefaultMissingTernary2ndStringErrorFormatter(); OperatorConflictErrorFormatter = new DefaultOperatorConflictErrorFormatter(); } public FSharpFunc, Reply> ExpressionParser { get { return this; } } private bool FindPosition(Operator[][] ops, string str, out int arrayIndex, out int indexInArray) { var c0 = str[0]; int i = c0 & (OpsArrayLength - 1); arrayIndex = i; var array = ops[i]; int c = -1; int j = 0; if (array != null) { for (j = 0; j < array.Length; ++j) { c = String.CompareOrdinal(str, array[j].String); if (c >= 0) break; } } indexInArray = j; return c == 0; } private void ThrowDefinitionConflictException(Operator op, Operator oldOp) { throw new ArgumentException("The definition of the " + op.ToString() + " conflicts with (or duplicates) the previous definition of the " + oldOp.ToString() + "."); } public void AddOperator(Operator op) { Operator oldOp; if ( Reserved.TryGetValue(op.String, out oldOp) || (op.IsTernary && Reserved.TryGetValue(op.TernaryRightString, out oldOp))) { ThrowDefinitionConflictException(op, oldOp); } var ops = op.Type == OperatorType.Prefix ? LhsOps : RhsOps; int i, j; if (FindPosition(ops, op.String, out i, out j)) ThrowDefinitionConflictException(op, ops[i][j]); if (op.IsTernary) { int i2, j2; // make sure the Ternary2ndString isn't registered as an operator if (FindPosition(LhsOps, op.TernaryRightString, out i2, out j2)) ThrowDefinitionConflictException(op, LhsOps[i2][j2]); if (FindPosition(RhsOps, op.TernaryRightString, out i2, out j2)) ThrowDefinitionConflictException(op, RhsOps[i2][j2]); Reserved.Add(op.TernaryRightString, op); } var array = ops[i]; if (array == null) { ops[i] = new Operator[1]{op}; } else { int n = array.Length; var newArray = new Operator[n + 1]; if (j != 0) Array.Copy(array, 0, newArray, 0, j); newArray[j] = op; if (j != n) Array.Copy(array, j, newArray, j + 1, n - j); ops[i] = newArray; } if (op.Type == OperatorType.Infix) { ++InfixOpCount; if (InfixOpCount == 1) { ExpectedInfixOrPostfixOperator = PostfixOpCount == 0 ? Errors.ExpectedInfixOperator : Errors.ExpectedInfixOrPostfixOperator; } } else if (op.Type == OperatorType.Postfix) { ++PostfixOpCount; if (PostfixOpCount == 1) { ExpectedInfixOrPostfixOperator = InfixOpCount == 0 ? Errors.ExpectedPostfixOperator : Errors.ExpectedInfixOrPostfixOperator; } } else ++PrefixOpCount; } public bool RemoveInfixOperator(string opString) { return Remove(OperatorType.Infix, opString); } public bool RemovePrefixOperator(string opString) { return Remove(OperatorType.Prefix, opString); } public bool RemovePostfixOperator(string opString) { return Remove(OperatorType.Postfix, opString); } public bool RemoveTernaryOperator(string opStringLeft, string opStringRight) { Operator reservedOp; if (!Reserved.TryGetValue(opStringRight, out reservedOp) || opStringLeft != reservedOp.String) return false; Reserved.Remove(opStringRight); return Remove(OperatorType.Infix, opStringLeft); } public bool RemoveOperator(Operator op) { var ops = op.Type == OperatorType.Prefix ? LhsOps : RhsOps; int i, j; if (!FindPosition(ops, op.String, out i, out j)) return false; if (op != ops[i][j]) return false; return op.IsTernary ? RemoveTernaryOperator(op.String, op.TernaryRightString) : Remove(op.Type, op.String); } private bool Remove(OperatorType operatorType, string opString) { var ops = operatorType == OperatorType.Prefix ? LhsOps : RhsOps ; int i, j; if (!FindPosition(ops, opString, out i, out j)) return false; var array = ops[i]; var n = array.Length; if (n == 1) ops[i] = null; else { var newArray = new Operator[n - 1]; if (j != 0) Array.Copy(array, 0, newArray, 0, j); if (j + 1 != n) Array.Copy(array, j + 1, newArray, j, n - j - 1); ops[i] = newArray; } if (operatorType == OperatorType.Infix) { --InfixOpCount; if (InfixOpCount == 0) { ExpectedInfixOrPostfixOperator = PostfixOpCount == 0 ? null : Errors.ExpectedPostfixOperator; } } else if (operatorType == OperatorType.Postfix) { --PostfixOpCount; if (PostfixOpCount == 0) { ExpectedInfixOrPostfixOperator = InfixOpCount == 0 ? null : Errors.ExpectedInfixOperator; } } else --PrefixOpCount; return true; } public IEnumerable> Operators { get { var result = new Operator[PrefixOpCount + InfixOpCount + PostfixOpCount]; var n = 0; if (PrefixOpCount != 0) { foreach (var array in LhsOps) if (array != null) foreach (var op in array) result[n++] = op; } if ((InfixOpCount | PostfixOpCount) != 0) { foreach (var array in RhsOps) if (array != null) foreach (var op in array) result[n++] = op; } Debug.Assert(n == result.Length); return result; } } private Operator PeekOp(CharStream stream, Operator[][] ops) { var cs = stream.Peek2(); var c1 = cs.Char1; var c0 = cs.Char0; var array = ops[c0 & (OpsArrayLength - 1)]; if (array != null) { foreach (var op in array) { var s = op.String; if (s[0] == c0) { if ( s.Length <= 1 || (s[1] == c1 && (s.Length == 2 || stream.Match(s)))) return op; } else if (s[0] < c0) break; } } return null; } public override Reply Invoke(CharStream stream) { Reply reply = new Reply(); reply.Status = ReplyStatus.Ok; var nextOp = ParseExpression(ref ZeroPrecedenceOperatorData, ref reply, stream); Debug.Assert(nextOp == null); return reply; } // ============================================================================= // NOTE: The main complication in the below code arises from the handling of the // backtracking related to the after-string-parser. Please see the reference // documentation for an explanation of the after-string-parser behaviour. // ============================================================================= internal Operator ParseExpression(ref OperatorData prevOpData, // prevOpData is passed as ref for performance reasons, but is not mutated ref Reply reply, CharStream stream) { Operator op; if (PrefixOpCount != 0 && ((op = PeekOp(stream, LhsOps)) != null)) { op = ParsePrefixOp(ref prevOpData, op, ref reply, stream); // ParsePrefixOp returns ErrorOp when it backtracks and we should try to parse a term if (op == null) goto Break; if (op != ErrorOp) goto CheckNextOp; } var error = reply.Error; var stateTag = stream.StateTag; reply = TermParser.Invoke(stream); // <-- this is where we parse the terms if (stateTag == stream.StateTag) { error = ErrorMessageList.Merge(error, reply.Error); if (PrefixOpCount != 0) error = ErrorMessageList.Merge(error, Errors.ExpectedPrefixOperator); reply.Error = error; } if (reply.Status != ReplyStatus.Ok) goto ReturnNull; op = PeekOp(stream, RhsOps); CheckNextOp: if (op != null) { var prevOp = prevOpData.Operator; if (prevOp.Precedence > op.Precedence) goto Break; if (prevOp.Precedence < op.Precedence) goto Continue; // prevOp.Precedence == op.Precedence if (op.Type == OperatorType.Infix) { var assoc = prevOp.Associativity & op.Associativity; if (assoc == Associativity.Left || prevOp.Type == OperatorType.Prefix) goto Break; if (assoc == Associativity.Right) goto Continue; } else { if (prevOp.Type == OperatorType.Infix) goto Continue; Debug.Assert(prevOp.Type == OperatorType.Prefix && op.Type == OperatorType.Postfix); if ((prevOp.Associativity | op.Associativity) != Associativity.None) goto Break; } HandlePossibleConflict(ref prevOpData, op, ref reply, stream); } else { error = ErrorMessageList.Merge(reply.Error, ExpectedInfixOrPostfixOperator); reply.Error = error; } ReturnNull: op = null; Break: return op; Continue: return ParseExpressionContinue(ref prevOpData, op, ref reply, stream); } /// Parses the following prefix operators, plus the expression the operators apply to. private Operator ParsePrefixOp(ref OperatorData prevOpData, Operator op, ref Reply reply, CharStream stream) { var opData = new OperatorData(); opData.Line = stream.Line; opData.LineBegin = stream.LineBegin; opData.IndexToken = stream.IndexToken; opData.Operator = op; var userState = stream.UserState; #if DEBUG var ok = stream.Skip(op.String); Debug.Assert(ok); #else stream.Skip((uint)op.String.Length); #endif var stateTag = stream.StateTag; var asReply = op.AfterStringParser.Invoke(stream); if (asReply.Status == ReplyStatus.Ok) { opData.AfterStringValue = asReply.Result; var prevOp = prevOpData.Operator; if ( prevOp.Precedence != op.Precedence || prevOp.Type != OperatorType.Prefix || (prevOp.Associativity | op.Associativity) != Associativity.None) { reply.Error = asReply.Error; var nextOp = ParseExpression(ref opData, ref reply, stream); if (reply.Status == ReplyStatus.Ok) reply.Result = op.Mapping1.Invoke(opData.AfterStringValue, reply.Result); return nextOp; } // backtrack to the beginning of the operator stream.Seek(opData.IndexToken); stream.SetLine_WithoutCheckAndWithoutIncrementingTheStateTag(opData.Line); stream.SetLineBegin_WithoutCheckAndWithoutIncrementingTheStateTag(opData.LineBegin); stream.UserState = userState; stream.StateTag = stateTag - 1; ReportConflict(ref prevOpData, op, asReply.Result, ref reply, stream); return null; } else if (asReply.Status == ReplyStatus.Error && stateTag == stream.StateTag) { // backtrack to the beginning of the operator stream.Seek(opData.IndexToken); stream.StateTag = stateTag - 1; return ErrorOp; } else { reply.Error = asReply.Error; reply.Status = asReply.Status; return null; } } /// Parses (higher-precedence) infix and postfix operators after the first term, together with the argument expressions. private Operator ParseExpressionContinue(ref OperatorData prevOpData, Operator op, ref Reply reply, CharStream stream) { var opData = new OperatorData(); for (;;) { opData.Line = stream.Line; opData.LineBegin = stream.LineBegin; opData.IndexToken = stream.IndexToken; opData.Operator = op; #if DEBUG var ok = stream.Skip(op.String); Debug.Assert(ok); #else stream.Skip((uint)op.String.Length); #endif var stateTag = stream.StateTag; var asReply = op.AfterStringParser.Invoke(stream); if (asReply.Status == ReplyStatus.Ok) { opData.AfterStringValue = asReply.Result; reply.Error = asReply.Error; if (op.Type == OperatorType.Infix) { var result1 = reply.Result; if (!op.IsTernary) { var nextOp = ParseExpression(ref opData, ref reply, stream); if (reply.Status == ReplyStatus.Ok) reply.Result = op.Mapping2.Invoke(opData.AfterStringValue, result1, reply.Result); op = nextOp; if (op == null) break; goto CheckNextOp; } else { ParseExpression(ref ZeroPrecedenceOperatorData, ref reply, stream); if (reply.Status != ReplyStatus.Ok) goto ReturnNull; var result2 = reply.Result; if (stream.Skip(op.TernaryRightString)) { stateTag = stream.StateTag; asReply = op.AfterTernaryRightStringParser.Invoke(stream); if (asReply.Status == ReplyStatus.Ok) { reply.Error = asReply.Error; var nextOp = ParseExpression(ref opData, ref reply, stream); if (reply.Status == ReplyStatus.Ok) reply.Result = op.Mapping3.Invoke(opData.AfterStringValue, asReply.Result, result1, result2, reply.Result); op = nextOp; if (op == null) break; goto CheckNextOp; } else if (asReply.Status != ReplyStatus.Error || stateTag != stream.StateTag) { reply.Error = asReply.Error; reply.Status = asReply.Status; goto ReturnNull; } else { // backtrack stream.Skip(-op.TernaryRightString.Length); stream.StateTag -= 2; } } HandleMissingTernary2ndStringError(ref opData, ref reply, stream); goto ReturnNull; } } else { Debug.Assert(op.Type == OperatorType.Postfix); reply.Result = op.Mapping1.Invoke(opData.AfterStringValue, reply.Result); var lastOp = op; op = PeekOp(stream, RhsOps); // we check for adjacent postfix operators here ... if (op != null) { if (op.Type == OperatorType.Postfix && lastOp.Precedence <= op.Precedence) { if ( lastOp.Precedence < op.Precedence || (lastOp.Associativity | op.Associativity) != Associativity.None) continue; // ... so we can report conflicting postfix operators HandlePossibleConflict(ref opData, op, ref reply, stream); goto ReturnNull; } } else { reply.Error = ErrorMessageList.Merge(reply.Error, ExpectedInfixOrPostfixOperator); break; } } CheckNextOp: var prevOp = prevOpData.Operator; if (prevOp.Precedence < op.Precedence) continue; if (prevOp.Precedence > op.Precedence) break; // prevOp.Precedence == op.Precedence if (op.Type == OperatorType.Infix) { var assoc = prevOp.Associativity & op.Associativity; if (assoc == Associativity.Left || prevOp.Type == OperatorType.Prefix) break; if (assoc == Associativity.Right) continue; } else { // op.OperatorType == OperatorType.Postfix if (prevOp.Type == OperatorType.Infix) continue; Debug.Assert(prevOp.Type == OperatorType.Prefix); if ((prevOp.Associativity | op.Associativity) != Associativity.None) break; } HandlePossibleConflict(ref prevOpData, op, ref reply, stream); } else { // asReply.Status != ReplyStatus.Ok if (asReply.Status == ReplyStatus.Error && stateTag == stream.StateTag) { // backtrack stream.Seek(opData.IndexToken); stream.StateTag -= 2; reply.Error = ErrorMessageList.Merge(reply.Error, ExpectedInfixOrPostfixOperator); } else { reply.Error = asReply.Error; reply.Status = asReply.Status; } } ReturnNull: op = null; break; } return op; } private void HandleMissingTernary2ndStringError(ref OperatorData opData, ref Reply reply, CharStream stream) { var firstStringIndex = opData.IndexToken.GetIndex(stream); var firstStringColumn = firstStringIndex - opData.LineBegin + 1; var firstStringPos = new Position(stream.Name, firstStringIndex, opData.Line, firstStringColumn); var secondStringPos = stream.Position; var error1 = ExpectedInfixOrPostfixOperator; var error2 = MissingTernary2ndStringErrorFormatter.Invoke(Tuple.Create(firstStringPos, secondStringPos, (TernaryOperator)opData.Operator, opData.AfterStringValue)); reply.Error = ErrorMessageList.Merge(reply.Error, ErrorMessageList.Merge(error1, error2)); reply.Status = ReplyStatus.Error; } private void HandlePossibleConflict(ref OperatorData prevOpData, Operator op, ref Reply reply, CharStream stream) { // "possible" conflict, because it's not a conflict when the // after-string-parser fails without changing the parser state. var state = stream.State; var ok = stream.Skip(op.String); Debug.Assert(ok); var stateTag = stream.StateTag; var asReply = op.AfterStringParser.Invoke(stream); if (asReply.Status == ReplyStatus.Ok) { stream.BacktrackTo(ref state); ReportConflict(ref prevOpData, op, asReply.Result, ref reply, stream); } else if (asReply.Status == ReplyStatus.Error && stateTag == stream.StateTag) { // backtrack and ignore the operator stream.BacktrackTo(ref state); reply.Error = ErrorMessageList.Merge(reply.Error, ExpectedInfixOrPostfixOperator); } else { // report AfterStringParser error instead of conflict reply.Error = asReply.Error; reply.Status = asReply.Status; } } private void ReportConflict(ref OperatorData prevOpData, Operator op, TAfterString afterStringValue, ref Reply reply, CharStream stream) { var prevOpIndex = prevOpData.IndexToken.GetIndex(stream); var prevOpColumn = prevOpIndex - prevOpData.LineBegin + 1; var prevOpPos = new Position(stream.Name, prevOpIndex, prevOpData.Line, prevOpColumn); var error = _OperatorConflictErrorFormatter.Invoke( Tuple.Create(prevOpPos, prevOpData.Operator, prevOpData.AfterStringValue), Tuple.Create(stream.Position, op, afterStringValue)); reply.Error = ErrorMessageList.Merge(reply.Error, error); reply.Status = ReplyStatus.Error; } private sealed class DefaultMissingTernary2ndStringErrorFormatter : FSharpFunc, TAfterString>, ErrorMessageList> { public override ErrorMessageList Invoke(Tuple, TAfterString> value) { var position1 = value.Item1; var position2 = value.Item2; var op = value.Item3; return Errors.MissingTernary2ndString(position1, position2, op); } } private sealed class DefaultOperatorConflictErrorFormatter : OptimizedClosures.FSharpFunc, TAfterString>, Tuple, TAfterString>, ErrorMessageList> { public override ErrorMessageList Invoke(Tuple, TAfterString> arg1, Tuple, TAfterString> arg2) { return Errors.OperatorsConflict(arg1.Item1, arg1.Item2, arg2.Item1, arg2.Item2); } } } } ================================================ FILE: FParsecCS/Position.cs ================================================ // Copyright (c) Stephan Tolksdorf 2007-2009 // License: Simplified BSD License. See accompanying documentation. using System; namespace FParsec { public sealed class Position : IEquatable, IComparable, IComparable { public long Index { get; private set; } public long Line { get; private set; } public long Column { get; private set; } public string StreamName { get; private set; } public Position(string streamName, long index, long line, long column) { StreamName = streamName; Index = index; Line = line; Column = column; } public override string ToString() { var ln = String.IsNullOrEmpty(StreamName) ? "(Ln: " : Text.Escape(StreamName, "", "(\"", "\", Ln: ", "", '"'); return ln + Line.ToString() + ", Col: " + Column.ToString() + ")"; } public override bool Equals(object obj) { return Equals(obj as Position); } public bool Equals(Position other) { return (object)this == (object)other || ( (object)other != null && Index == other.Index && Line == other.Line && Column == other.Column && StreamName == other.StreamName); } public static bool operator==(Position left, Position right) { return (object)left == null ? (object)right == null : left.Equals(right); } public static bool operator!=(Position left, Position right) { return !(left == right); } public override int GetHashCode() { return Index.GetHashCode(); } public static int Compare(Position left, Position right) { if ((object)left != null) return left.CompareTo(right); return (object)right == null ? 0 : -1; } public int CompareTo(Position other) { if ((object)this == (object)other) return 0; if ((object)other == null) return 1; int r = String.CompareOrdinal(StreamName, other.StreamName); if (r != 0) return r; r = Line.CompareTo(other.Line); if (r != 0) return r; r = Column.CompareTo(other.Column); if (r != 0) return r; return Index.CompareTo(other.Index); } int IComparable.CompareTo(object value) { Position position = value as Position; if ((object)position != null) return CompareTo(position); if (value == null) return 1; throw new ArgumentException("Object must be of type Position."); } } } ================================================ FILE: FParsecCS/Properties/AssemblyInfo.cs ================================================ using System.Reflection; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; [assembly: ComVisible(false)] #if LOW_TRUST [assembly: System.Security.AllowPartiallyTrustedCallers] [assembly: System.Security.SecurityTransparent] #endif [assembly: InternalsVisibleTo ("FParsec" + FParsec.CommonAssemblyInfo.StrongNamePublicKey)] [assembly: InternalsVisibleTo (FParsec.CommonAssemblyInfo.TestAssemblyName + FParsec.CommonAssemblyInfo.StrongNamePublicKey)] namespace FParsec { internal static partial class CommonAssemblyInfo { public const string TestAssemblyName = "Test"; #if STRONG_NAME public const string StrongNamePublicKey = ", PublicKey=002400000480000094000000060200000024000052534131000400000100010077c6be48a40f5b" + "194ec9f992e5b512bbbba33e211354d9ee50c3214decddad8356470a9a19a9ee84637cbd6ff690" + "9527d3973741dbe0a69b1461eeae774af9a78de45618ffd6fe7c7d52e0441b92f3bc7e8fb5757f" + "b8b1611a0b6b8c9f9ef64edcf51d44218ae040f3015373fd261d30f8e1f5a1f914fd9ebcde7d7e" + "f42dbaa5"; #else public const string StrongNamePublicKey = ""; #endif }; } ================================================ FILE: FParsecCS/Reply.cs ================================================ // Copyright (c) Stephan Tolksdorf 2008-2010 // License: Simplified BSD License. See accompanying documentation. using System; namespace FParsec { public enum ReplyStatus { Ok = 1, Error = 0, FatalError = -1 } [System.Diagnostics.DebuggerDisplay("{GetDebuggerDisplay(),nq}")] public struct Reply : IEquatable> { public ErrorMessageList Error; public TResult Result; public ReplyStatus Status; public Reply(TResult result) { Result = result; Error = null; Status = ReplyStatus.Ok; } public Reply(ReplyStatus status, ErrorMessageList error) { Status = status; Error = error; Result = default(TResult); } public Reply(ReplyStatus status, TResult result, ErrorMessageList error) { Status = status; Error = error; Result = result; } public override bool Equals(object other) { if (!(other is Reply)) return false; return Equals((Reply) other); } public bool Equals(Reply other) { return Status == other.Status && (Status != ReplyStatus.Ok || FastGenericEqualityERComparer.Instance.Equals(Result, other.Result)) && Error == other.Error; } public override int GetHashCode() { return (int)Status ^ (Status != ReplyStatus.Ok ? 0 : FastGenericEqualityERComparer.Instance.GetHashCode(Result)); } public static bool operator==(Reply r1, Reply r2) { return r1.Equals(r2); } public static bool operator!=(Reply r1, Reply r2) { return !r1.Equals(r2); } private string GetDebuggerDisplay() { if (Status == ReplyStatus.Ok) { string result; if (Result == null) result = typeof(TResult) == typeof(Microsoft.FSharp.Core.Unit) ? "()" : "null"; else if (typeof(TResult) == typeof(string)) result = Text.DoubleQuote(Result.ToString()); else result = Result.ToString(); return Error == null ? "Reply(" + result + ")" : "Reply(Ok, " + result + ", " + ErrorMessageList.GetDebuggerDisplay(Error) + ")"; } else { var status = Status == ReplyStatus.Error ? "Error" : Status == ReplyStatus.FatalError ? "FatalError" : "(ReplyStatus)" + ((int)Status).ToString(); return Error == null ? "Reply(" + status + ", NoErrorMessages)" : "Reply(" + status + ", " + ErrorMessageList.GetDebuggerDisplay(Error) + ")"; } } } } ================================================ FILE: FParsecCS/StringBuffer.cs ================================================ // Copyright (c) Stephan Tolksdorf 2009 // License: Simplified BSD License. See accompanying documentation. #if !LOW_TRUST #if DEBUG #define DEBUG_STRINGBUFFER #endif using System; using System.Runtime.InteropServices; using System.Diagnostics; namespace FParsec { /// A substring of a pinned string on the large object heap. /// StringBuffers are cached in a pool and hence need to be properly disposed. internal unsafe sealed class StringBuffer : IDisposable { private PoolSegment Segment; public string String { get { return Segment == null ? "" : Segment.String; } } public char* StringPointer { get { return Segment == null ? null : Segment.StringPointer; } } public int Index { get; private set; } public int Length { get; private set; } private StringBuffer(PoolSegment segment, int index, int length) { Segment = segment; Index = index; Length = length; } private sealed class FreeChunk { public PoolSegment Segment; // free chunks in each segment form a doubly-linked list ordered by index public FreeChunk PrevInSegment; public FreeChunk NextInSegment; public static FreeChunk Smallest; public static FreeChunk Largest; // all free chunks together form a doubly-linked list ordered by size public FreeChunk PrevInSize; public FreeChunk NextInSize; public int Index; public int Size; public FreeChunk(PoolSegment segment, int index, int size) { Debug.Assert(segment.FirstFreeChunk == null && index >= 0 && size > 0 && index + size <= segment.Size); Segment = segment; Index = index; Size = size; segment.FirstFreeChunk = this; InsertIntoSizeList(); } public FreeChunk(PoolSegment segment, FreeChunk prevInSegment, FreeChunk nextInSegment, int index, int size) { Debug.Assert(index >= 0 && size > 0 && index + size <= segment.Size); Segment = segment; Index = index; Size = size; PrevInSegment = prevInSegment; NextInSegment = nextInSegment; if (prevInSegment != null) { Debug.Assert(prevInSegment.Index + prevInSegment.Size < index); prevInSegment.NextInSegment = this; } else { Debug.Assert(segment.FirstFreeChunk == nextInSegment); segment.FirstFreeChunk = this; } if (nextInSegment != null) { Debug.Assert(index + size < nextInSegment.Index); nextInSegment.PrevInSegment = this; } InsertIntoSizeList(); } private void InsertIntoSizeList() { var largest = FreeChunk.Largest; if (largest != null) { if (largest.Size <= Size) { largest.NextInSize = this; PrevInSize = largest; FreeChunk.Largest = this; } else { NextInSize = largest; var prev = largest.PrevInSize; largest.PrevInSize = this; if (prev != null) { PrevInSize = prev; prev.NextInSize = this; if (Size < prev.Size) MoveAfterSizeHasDecreased(); } else FreeChunk.Smallest = this; } } else { FreeChunk.Smallest = this; FreeChunk.Largest = this; } } public void Remove() { var prev = PrevInSegment; var next = NextInSegment; if (prev != null) prev.NextInSegment = next; else Segment.FirstFreeChunk = next; if (next != null) next.PrevInSegment = prev; prev = PrevInSize; next = NextInSize; if (prev != null) prev.NextInSize = next; else Smallest = next; if (next != null) next.PrevInSize = prev; else Largest = prev; } // the following two methods are dual to each other, // i.e. one can be transformed into the other by way of simple search & replace public void MoveAfterSizeHasDecreased() { Debug.Assert(Size < PrevInSize.Size); var prev = PrevInSize; var next = NextInSize; if (next != null) next.PrevInSize = prev; else Largest = prev; prev.NextInSize = next; next = prev; prev = prev.PrevInSize; while (prev != null && prev.Size > Size) { next = prev; prev = prev.PrevInSize; } NextInSize = next; next.PrevInSize = this; PrevInSize = prev; if (prev != null) prev.NextInSize = this; else Smallest = this; } public void MoveAfterSizeHasIncreased() { Debug.Assert(Size > NextInSize.Size); var next = NextInSize; var prev = PrevInSize; if (prev != null) prev.NextInSize = next; else Smallest = next; next.PrevInSize = prev; prev = next; next = next.NextInSize; while (next != null && next.Size < Size) { prev = next; next = next.NextInSize; } PrevInSize = prev; prev.NextInSize = this; NextInSize = next; if (next != null) next.PrevInSize = this; else Largest = this; } } private const int MinChunkSize = 1536; // 3 * 2^9 // segment sizes must be multiple of MinChunkSize and large enough to allocated on the LargeObjectHeap private const int FirstSegmentSmallSize = 42 * MinChunkSize; // 64 512 private const int FirstSegmentLargeSize = 128 * MinChunkSize; // 3 * 2^16 = 196 608 (default CharStream block size) private const int MaxSegmentSize = 640 * MinChunkSize; // 983 040 private static int MaxNumberOfUnusedSegments = 3; private static int NumberOfUnusedSegments; private sealed class PoolSegment : IDisposable { // segments form a doubly-linked list in the order they were constructed /// the last allocated segment private static PoolSegment Last; private PoolSegment Next; private PoolSegment Prev; public string String { get; private set; } /// String.Length - x, where x > 0 public int Size { get; private set; } public char* StringPointer { get; private set; } private GCHandle StringHandle; public FreeChunk FirstFreeChunk; public PoolSegment(int size, int firstBufferSize) { Debug.Assert(firstBufferSize > 0 && firstBufferSize <= size && (size <= MaxSegmentSize || firstBufferSize == size)); // + 1, so that no chunk can span the full string, which helps avoiding accidentally passing a reference to the internal buffer string to the "outside world" String = new String('\u0000', size + 1); Size = size; StringHandle = GCHandle.Alloc(String, GCHandleType.Pinned); StringPointer = (char*)StringHandle.AddrOfPinnedObject(); if (Last != null) { Last.Next = this; Prev = Last; } Last = this; if (firstBufferSize < size) new FreeChunk(this, firstBufferSize, size - firstBufferSize); // inserts itself into the lists } public void Dispose() { if (StringPointer != null) { Debug.Assert(FirstFreeChunk == null); if (FirstFreeChunk != null) throw new InvalidOperationException(); if (Prev != null) Prev.Next = Next; if (Next != null) Next.Prev = Prev; else Last = Prev; StringPointer = null; StringHandle.Free(); } } public static StringBuffer AllocateStringBufferInNewSegment(int length) { int segmentSize = length > MaxSegmentSize ? length : (Last == null && length <= FirstSegmentLargeSize) ? (length <= FirstSegmentSmallSize ? FirstSegmentSmallSize : FirstSegmentLargeSize) : MaxSegmentSize; return new StringBuffer(new PoolSegment(segmentSize, length), 0, length); } [Conditional("DEBUG_STRINGBUFFER")] public void AssertIntegrity() { Debug.Assert(StringPointer != null); int sumOfSegmentSizes = 0; { // check list of segments var segment = Last; Debug.Assert(segment.Next == null); var prev = segment.Prev; sumOfSegmentSizes += segment.Size; bool visitedThis = segment == this; while (prev != null) { Debug.Assert(segment == prev.Next); segment = prev; prev = prev.Prev; sumOfSegmentSizes += segment.Size; visitedThis = visitedThis || segment == this; } Debug.Assert(visitedThis); } { // check segment list of free chunks ordered by index var chunk = FirstFreeChunk; if (chunk != null) { Debug.Assert( chunk.Index >= 0 && chunk.Size > 0 && (chunk.PrevInSize != null ? chunk.Size >= chunk.PrevInSize.Size : chunk == FreeChunk.Smallest) && (chunk.NextInSize != null ? chunk.Size <= chunk.NextInSize.Size : chunk == FreeChunk.Largest)); int chunkEnd = chunk.Index + chunk.Size; var next = chunk.NextInSegment; while (next != null) { Debug.Assert( (chunk == next.PrevInSegment && chunkEnd < next.Index && next.Size > 0) && (next.PrevInSize != null ? next.Size >= next.PrevInSize.Size : next == FreeChunk.Smallest) && (next.NextInSize != null ? next.Size <= next.NextInSize.Size : next == FreeChunk.Largest)); chunk = next; chunkEnd = chunk.Index + chunk.Size; next = chunk.NextInSegment; } Debug.Assert(chunkEnd <= Size); } } { // check global list of free chunks ordered by size int free = 0; var chunk = FreeChunk.Smallest; if (chunk == null) Debug.Assert(FreeChunk.Largest == null); else { Debug.Assert(chunk.Size > 0 && chunk.PrevInSize == null); free += chunk.Size; var next = chunk.NextInSize; while (next != null) { Debug.Assert(chunk == next.PrevInSize && chunk.Size <= next.Size); chunk = next; free += chunk.Size; next = chunk.NextInSize; } Debug.Assert(chunk == FreeChunk.Largest); } Debug.Assert(Allocated == sumOfSegmentSizes - free); } } } /// Sum of the lengths of all currently allocated StringBuffers private static int Allocated = 0; private static object SyncRoot = new Object(); public static StringBuffer Create(int minLength) { int size = unchecked(minLength + (MinChunkSize - 1)); if (size > (MinChunkSize - 1)) { // minLength > 0 && minLength <= System.Int32.MaxValue - (MinChunkSize - 1) size -= (int)((uint)size%(uint)MinChunkSize); // round down to multiple of MinChunkSize lock (SyncRoot) { Allocated += size; FreeChunk chunk = FreeChunk.Largest; if (chunk != null) { // find smallest free chunk that is large enough to hold the buffer if (size > 10*MinChunkSize) { var prev = chunk.PrevInSize; while (prev != null && prev.Size >= size) { chunk = prev; prev = prev.PrevInSize; } } else { chunk = FreeChunk.Smallest; var next = chunk.NextInSize; while (chunk.Size < size && next != null) { chunk = next; next = next.NextInSize; } } if (size <= chunk.Size) { int index = chunk.Index; if (index == 0 && chunk.Size == chunk.Segment.Size) --NumberOfUnusedSegments; if (size != chunk.Size) { chunk.Index += size; chunk.Size -= size; var prev = chunk.PrevInSize; if (prev != null && chunk.Size < prev.Size) chunk.MoveAfterSizeHasDecreased(); } else chunk.Remove(); chunk.Segment.AssertIntegrity(); return new StringBuffer(chunk.Segment, index, size); } } return PoolSegment.AllocateStringBufferInNewSegment(size); } } else { if (minLength < 0) throw new ArgumentOutOfRangeException("minLength", "minLength is negative."); else if (minLength > 0) throw new ArgumentOutOfRangeException("minLength", "minLength is too large. The maximum string buffer length is approximately 2^30."); return new StringBuffer(null, 0, 0); } } public void Dispose() { int size = Length; Length = -1; if (size > 0) { lock (SyncRoot) { Allocated -= size; if (size <= MaxSegmentSize) { FreeChunk prev = null; FreeChunk next = Segment.FirstFreeChunk; while (next != null && Index > next.Index) { prev = next; next = next.NextInSegment; } if (prev == null || prev.Index + prev.Size != Index) { if (next != null && Index + size == next.Index) { next.Index = Index; next.Size += size; var nextNext = next.NextInSize; if (nextNext != null && next.Size > nextNext.Size) next.MoveAfterSizeHasIncreased(); } else { new FreeChunk(Segment, prev, next, Index, size); // inserts itself into the lists } } else { if (next != null && Index + size == next.Index) { prev.Size += size + next.Size; next.Remove(); } else { prev.Size += size; } if (prev.NextInSize != null && prev.Size > prev.NextInSize.Size) prev.MoveAfterSizeHasIncreased(); } Segment.AssertIntegrity(); var first = Segment.FirstFreeChunk; if (first.Size == Segment.Size && ++NumberOfUnusedSegments > MaxNumberOfUnusedSegments) { --NumberOfUnusedSegments; first.Remove(); Segment.Dispose(); } } else { // size > MaxSegmentSize Debug.Assert(size == Segment.Size); Segment.Dispose(); } } } } } } #endif ================================================ FILE: FParsecCS/Strings.cs ================================================ // Copyright (c) Stephan Tolksdorf 2010-2011 // License: Simplified BSD License. See accompanying documentation. using System; namespace FParsec { internal static class Strings { static internal string Quote(string stringToQuote) { return Text.SingleQuote(stringToQuote); } static internal string Quote(string prefix, string stringToQuote, string postfix) { return Text.SingleQuote(prefix, stringToQuote, postfix); } static internal string AsciiQuote(string prefix, string stringToQuote, string postfix) { return Text.AsciiEscape(stringToQuote, prefix, "'", "'", postfix, '\''); } static internal string QuoteCaseInsensitive(string caseInsensitiveStringToQuote) { return Quote("", caseInsensitiveStringToQuote, " (case-insensitive)"); } static private string OrdinalEnding(int value) { if (value < 1) throw new ArgumentOutOfRangeException("value", "The value must be greater than 0."); var n100 = value%100; var n10 = value%10; if (n100 < 11 || n100 > 13) { if (n10 == 1) return "st"; if (n10 == 2) return "nd"; if (n10 == 3) return "rd"; } return "th"; } public static readonly string EndOfInput = "end of input"; public static readonly string AnyChar = "any char"; public static readonly string Whitespace = "whitespace"; public static readonly string AsciiUppercaseLetter = "Ascii uppercase letter"; public static readonly string AsciiLowercaseLetter = "Ascii lowercase letter"; public static readonly string AsciiLetter = "Ascii letter"; public static readonly string UppercaseLetter = "uppercase letter"; public static readonly string LowercaseLetter = "lowercase letter"; public static readonly string Letter = "letter"; public static readonly string BinaryDigit = "binary digit"; public static readonly string OctalDigit = "octal digit"; public static readonly string DecimalDigit = "decimal digit"; public static readonly string HexadecimalDigit = "hexadecimal digit"; public static readonly string Newline = "newline"; public static readonly string Tab = "tab"; public static readonly string FloatingPointNumber = "floating-point number"; public static readonly string Int64 = "integer number (64-bit, signed)"; public static readonly string Int32 = "integer number (32-bit, signed)"; public static readonly string Int16 = "integer number (16-bit, signed)"; public static readonly string Int8 = "integer number (8-bit, signed)"; public static readonly string UInt64 = "integer number (64-bit, unsigned)"; public static readonly string UInt32 = "integer number (32-bit, unsigned)"; public static readonly string UInt16 = "integer number (16-bit, unsigned)"; public static readonly string UInt8 = "integer number (8-bit, unsigned)"; public static readonly string Identifier = "identifier"; public static readonly string IdentifierContainsInvalidCharacterAtIndicatedPosition = "The identifier contains an invalid character at the indicated position."; public static readonly string NumberOutsideOfDoubleRange = "This number is outside the allowable range for double precision floating-pointer numbers."; public static readonly string NumberOutsideOfInt64Range = "This number is outside the allowable range for signed 64-bit integers."; public static readonly string NumberOutsideOfInt32Range = "This number is outside the allowable range for signed 32-bit integers."; public static readonly string NumberOutsideOfInt16Range = "This number is outside the allowable range for signed 16-bit integers."; public static readonly string NumberOutsideOfInt8Range = "This number is outside the allowable range for signed 8-bit integers."; public static readonly string NumberOutsideOfUInt64Range = "This number is outside the allowable range for unsigned 64-bit integers."; public static readonly string NumberOutsideOfUInt32Range = "This number is outside the allowable range for unsigned 32-bit integers."; public static readonly string NumberOutsideOfUInt16Range = "This number is outside the allowable range for unsigned 16-bit integers."; public static readonly string NumberOutsideOfUInt8Range = "This number is outside the allowable range for unsigned 8-bit integers."; public static readonly string InfixOperator = "infix operator"; public static readonly string TernaryOperator = "ternary operator"; public static readonly string PrefixOperator = "prefix operator"; public static readonly string PostfixOperator = "postfix operator"; private static readonly string AnyCharIn1 = "any char in "; private static readonly string AnyCharIn2 = ""; private static readonly string AnyCharNotIn1 = "any char not in "; private static readonly string AnyCharNotIn2 = ""; private static readonly string AnySequenceOfNChars1 = "any sequence of "; private static readonly string AnySequenceOfNChars2 = " chars"; private static readonly string CouldNotFindString1 = "Could not find the string "; private static readonly string CouldNotFindString2 = "."; private static readonly string CouldNotFindCaseInsensitiveString1 = "Could not find the case-insensitive string "; private static readonly string CouldNotFindCaseInsensitiveString2 = "."; private static readonly string StringMatchingRegex1 = "string matching the regex "; private static readonly string StringMatchingRegex2 = ""; private static readonly string ErrorPositionStreamNameFormat = " {0}:"; private static readonly string ErrorPositionUnaccountedNewlinesFormat = " (+{0})"; private static readonly string ErrorPositionUtf16ColumnFormat = " (UTF16-Col: {0})"; private static readonly string ErrorPositionFormat = "Error in{0} Ln: {1}{2} Col: {3}{4}"; // 0: ErrorPositionStreamName or "" // 1: line // 2: ErrorPositionUnaccountedNewlines or "" // 3: column // 4: ErrorPositionUtf16Col public static string ErrorPosition(Position position) { var name = string.IsNullOrEmpty(position.StreamName) ? "" : string.Format(ErrorPositionStreamNameFormat, position.StreamName); return string.Format(ErrorPositionFormat, name, position.Line, "", position.Column, ""); } public static string ErrorPosition(Position position, int unaccountedNewlines, long column, long utf16Column) { var name = string.IsNullOrEmpty(position.StreamName) ? "" : string.Format(ErrorPositionStreamNameFormat, position.StreamName); var nlCorrection = unaccountedNewlines == 0 ? "" : string.Format(ErrorPositionUnaccountedNewlinesFormat, unaccountedNewlines); var utf16Col = column == utf16Column ? "" : string.Format(ErrorPositionUtf16ColumnFormat, utf16Column); return string.Format(ErrorPositionFormat, name, position.Line, nlCorrection, column, utf16Col); } public static readonly string Note = "Note: "; public static readonly string Expecting = "Expecting: "; public static readonly string Unexpected = "Unexpected: "; public static readonly string Comma = ", "; public static readonly string Or = " or "; public static readonly string And = " and "; private static readonly string CompoundCouldNotBeParsedBecauseFormat = "{0} could not be parsed because: "; public static string CompoundCouldNotBeParsedBecause(string compoundLabel) { return string.Format(CompoundCouldNotBeParsedBecauseFormat, compoundLabel); } public static readonly string ParserBacktrackedAfter = "The parser backtracked after: "; public static readonly string OtherErrors = "Other error messages: "; public static readonly string UnknownErrors = "Unknown Error(s)"; public static readonly string Utf16ColumnCountOnlyCountsEachTabAs1Char = " The UTF-16 column count only counts each tab as 1 char."; public static readonly string ExactPositionBetweenCaretsDependsOnDisplayUnicodeCapabilities = "The exact error position between the two ^ depends on the unicode capabilities of the display."; public static readonly string ErrorOccurredAtEndOfInputStream = "The error occurred at the end of the input stream."; public static readonly string ErrorOccurredOnAnEmptyLine = "The error occurred on an empty line."; public static readonly string ErrorOccurredAtEndOfLine = "The error occurred at the end of the line."; public static readonly string ErrorOccurredAtSecondCharInNewline = "The error occured at the 2nd char in the newline char sequence '\r\n'."; private static readonly string NonAssociative = "non-associative"; private static readonly string LeftAssociative = "left-associative"; private static readonly string RightAssociative = "right-associative"; private static readonly string OperatorToStringFormat = "{0} {1} (precedence: {2}{3}{4})"; // 0: InfixOperator/TernaryOperator/... // 1: operator strings // 2: precedence // 3: Comma if 4 is not empty, otherwise empty // 4: LeftAssociative/RightAssociative/... or empty if operator is an associative prefix or postfix operator // It would be more precise to write "UTF-16 colum" here, // but that would probably only confuse users in most situations. private static readonly string RelativePositionOnTheSameLine = "on the same line at column {0}"; private static readonly string RelativePositionOnPreviousLine = "on the previous line column {0}"; private static readonly string RelativePositionOnLineAbove = "{0} lines above column {1}"; private static readonly string RelativePositionOnDifferentLine = "at (Ln: {0}, Col: {1} )"; private static readonly string RelativePositionInDifferentFile = "at ({0}, Ln: {1}, Col: {2})"; private static readonly string OperatorsConflictsFormat = "The {1} conflicts with the {0} {2}."; // 0: previous operator // 1: current operator // 2: relative position of previous operator private static readonly string OperatorStringIsRightPartOfTernaryOperatorFormat = "{0} is the right part of the ternary operator {1}. The left part is {2}."; private static readonly string ColumnCountAssumesTabStopDistanceOfNChars1 = "The column count assumes a tab stop distance of "; private static readonly string ColumnCountAssumesTabStopDistanceOfNChars2 = " chars."; private static readonly string ErrorOccurredAtNthCharInCombiningCharacterSequence1 = "The error occurred at the "; private static readonly string ErrorOccurredAtNthCharInCombiningCharacterSequence2 = " char in the combining character sequence "; private static readonly string ErrorOccurredAtNthCharInCombiningCharacterSequence3 = "."; private static readonly string InputContainsAtLeastNUnaccountedNewlines1 = "The input contains at least "; private static readonly string InputContainsAtLeastNUnaccountedNewlines2Singular = " newline in the input that wasn't properly registered in the parser stream state."; private static readonly string InputContainsAtLeastNUnaccountedNewlines2Plural = " newlines in the input that weren't properly registered in the parser stream state."; private static readonly string ErrorOccurredAtBeginningOfSurrogatePair1 = "The error occurred at the beginning of the surrogate pair "; private static readonly string ErrorOccurredAtBeginningOfSurrogatePair2 = "."; private static readonly string ErrorOccurredAtSecondCharInSurrogatePair1 = "The error occurred at the second char in the surrogate pair "; private static readonly string ErrorOccurredAtSecondCharInSurrogatePair2 = "."; private static readonly string CharAtErrorPositionIsIsolatedHighSurrogate1 = "The char at the error position ('"; private static readonly string CharAtErrorPositionIsIsolatedHighSurrogate2 = "') is an isolated high surrogate."; private static readonly string CharAtErrorPositionIsIsolatedLowSurrogate1 = "The char at the error position ('"; private static readonly string CharAtErrorPositionIsIsolatedLowSurrogate2 = "') is an isolated low surrogate."; private static readonly string CharBeforeErrorPositionIsIsolatedHighSurrogate1 = "The char before the error position ('"; private static readonly string CharBeforeErrorPositionIsIsolatedHighSurrogate2 = "') is an isolated high surrogate."; private static readonly string CharBeforeErrorPositionIsIsolatedLowSurrogate1 = "The char before the error position ('"; private static readonly string CharBeforeErrorPositionIsIsolatedLowSurrogate2 = "') is an isolated low surrogate."; public static string AnyCharIn(string chars) { //return Quote(Strings.AnyCharIn1, chars, Strings.AnyCharIn2); return Strings.AnyCharIn1 + "‘" + chars + "’" + Strings.AnyCharIn2; // Review: Should we use different quotes if the string contains ‘ or ’ chars? } public static string AnyCharNotIn(string chars) { //return Quote(Strings.AnyCharNotIn1, chars, Strings.AnyCharNotIn2); return Strings.AnyCharNotIn1 + "‘" + chars + "’" + Strings.AnyCharNotIn2; } public static string StringMatchingRegex(string regexPattern) { return Quote(Strings.StringMatchingRegex1, regexPattern, Strings.StringMatchingRegex2); } public static string ExpectedAnySequenceOfNChars(int n) { return Strings.AnySequenceOfNChars1 + n.ToString() + Strings.AnySequenceOfNChars2; } public static string CouldNotFindString(string str) { return Quote(Strings.CouldNotFindString1, str, Strings.CouldNotFindString2); } public static string CouldNotFindCaseInsensitiveString(string str) { return Quote(Strings.CouldNotFindCaseInsensitiveString1, str, Strings.CouldNotFindCaseInsensitiveString2); } internal static string OperatorToString(Operator op) { var type = op.Type == OperatorType.Infix ? (op.IsTernary ? TernaryOperator : InfixOperator) : op.Type == OperatorType.Prefix ? PrefixOperator : PostfixOperator; var opString = op.IsTernary ? Quote(Quote("", op.String, " "), op.TernaryRightString, "") : Quote(op.String); var comma = op.Type != OperatorType.Infix && op.IsAssociative ? "" : Comma; var assoc = op.Type != OperatorType.Infix ? (op.IsAssociative ? "" : NonAssociative) : (op.Associativity == Associativity.Left ? LeftAssociative : op.Associativity == Associativity.Right ? RightAssociative : NonAssociative); return String.Format(OperatorToStringFormat, type, opString, op.Precedence, comma, assoc); } private static string RelativePosition(Position previousPosition, Position currentPosition) { if (previousPosition.StreamName == currentPosition.StreamName) { if (previousPosition.Line == currentPosition.Line) return String.Format(RelativePositionOnTheSameLine, previousPosition.Column); long diff = currentPosition.Line - previousPosition.Line; if (diff == 1) return String.Format(RelativePositionOnPreviousLine, previousPosition.Column); if (diff <= 3) return String.Format(RelativePositionOnLineAbove, diff, previousPosition.Column); return String.Format(RelativePositionOnDifferentLine, previousPosition.Line, previousPosition.Column); } return String.Format(RelativePositionInDifferentFile, Quote(previousPosition.StreamName), previousPosition.Line, previousPosition.Column); } public static string OperatorsConflict(Position previousPosition, Operator previousOperator, Position currentPosition, Operator currentOperator) { var prevOpString = OperatorToString(previousOperator); var currentOpString = OperatorToString(currentOperator); var relativePosition = RelativePosition(previousPosition, currentPosition); return String.Format(OperatorsConflictsFormat, prevOpString, currentOpString, relativePosition); } public static string OperatorStringIsRightPartOfTernaryOperator(Position position1, Position position2, Operator op) { return String.Format(OperatorStringIsRightPartOfTernaryOperatorFormat, Quote(op.TernaryRightString), Quote(Quote("", op.String, " "), op.TernaryRightString, ""), RelativePosition(position1, position2)); } public static string ColumnCountAssumesTabStopDistanceOfNChars(int n) { return ColumnCountAssumesTabStopDistanceOfNChars1 + n.ToString() + ColumnCountAssumesTabStopDistanceOfNChars2; } public static string ErrorOccurredAtNthCharInCombiningCharacterSequence(int n, string textElement) { return AsciiQuote(ErrorOccurredAtNthCharInCombiningCharacterSequence1 + n.ToString() + OrdinalEnding(n) + ErrorOccurredAtNthCharInCombiningCharacterSequence2, textElement, ErrorOccurredAtNthCharInCombiningCharacterSequence3); } public static string InputContainsAtLeastNUnaccountedNewlines(int n) { return InputContainsAtLeastNUnaccountedNewlines1 + n.ToString() + (n == 1 ? InputContainsAtLeastNUnaccountedNewlines2Singular : InputContainsAtLeastNUnaccountedNewlines2Plural); } public static string ErrorOccurredAtBeginningOfSurrogatePair(string surrogatePair) { return AsciiQuote(ErrorOccurredAtBeginningOfSurrogatePair1, surrogatePair, ErrorOccurredAtBeginningOfSurrogatePair2); } public static string ErrorOccurredAtSecondCharInSurrogatePair(string surrogatePair) { return AsciiQuote(ErrorOccurredAtSecondCharInSurrogatePair1, surrogatePair, ErrorOccurredAtSecondCharInSurrogatePair2); } public static string CharAtErrorPositionIsIsolatedHighSurrogate(char ch) { return CharAtErrorPositionIsIsolatedHighSurrogate1 + Text.HexEscape(ch) + CharAtErrorPositionIsIsolatedHighSurrogate2; } public static string CharAtErrorPositionIsIsolatedLowSurrogate(char ch) { return CharAtErrorPositionIsIsolatedLowSurrogate1 + Text.HexEscape(ch) + CharAtErrorPositionIsIsolatedLowSurrogate2; } public static string CharBeforeErrorPositionIsIsolatedHighSurrogate(char ch) { return CharBeforeErrorPositionIsIsolatedHighSurrogate1 + Text.HexEscape(ch) + CharBeforeErrorPositionIsIsolatedHighSurrogate2; } public static string CharBeforeErrorPositionIsIsolatedLowSurrogate(char ch) { return CharBeforeErrorPositionIsIsolatedLowSurrogate1 + Text.HexEscape(ch) + CharBeforeErrorPositionIsIsolatedLowSurrogate2; } } } ================================================ FILE: FParsecCS/Text.cs ================================================ // Copyright (c) Stephan Tolksdorf 2009-2010 // License: Simplified BSD License. See accompanying documentation. using System; using System.Text; using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; namespace FParsec { public static class Text { /// Detects the presence of an encoding preamble in the first count bytes of the byte buffer. /// If detectEncoding is false, this function only searches for the preamble of the given default encoding, /// otherwise also for any of the standard unicode byte order marks (UTF-8, UTF-16 LE/BE, UTF-32 LE/BE). /// If an encoding different from the given default encoding is detected, the new encoding /// is assigned to the encoding reference. /// Returns the number of bytes in the detected preamble, or 0 if no preamble is detected. /// internal static int DetectPreamble(byte[] buffer, int count, ref Encoding encoding, bool detectEncoding) { Debug.Assert(count >= 0); if (detectEncoding && count >= 2) { switch (buffer[0]) { case 0xEF: if (buffer[1] == 0xBB && count > 2 && buffer[2] == 0xBF) { if (encoding.CodePage != 65001) encoding = Encoding.UTF8; return 3; } break; case 0xFE: if (buffer[1] == 0xFF) { if (encoding.CodePage != 1201) encoding = Encoding.BigEndianUnicode; return 2; } break; case 0xFF: if (buffer[1] == 0xFE) { if (count >= 4 && buffer[2] == 0x00 && buffer[3] == 0x00) { if (encoding.CodePage != 12000) encoding = Encoding.UTF32; // UTF-32 little endian return 4; } else { if (encoding.CodePage != 1200) encoding = Encoding.Unicode; // UTF-16 little endian return 2; } } break; case 0x00: if (buffer[1] == 0x00 && count >= 4 && buffer[2] == 0xFE && buffer[3] == 0xFF) { if (encoding.CodePage != 12001) encoding = new UTF32Encoding(true, true); // UTF-32 big endian return 4; } break; } } #if NETSTANDARD2_0 byte[] preamble = encoding.GetPreamble(); #else ReadOnlySpan preamble = encoding.Preamble; #endif if (preamble.Length > 0 && count >= preamble.Length) { int i = 0; while (buffer[i] == preamble[i]) { if (++i == preamble.Length) return preamble.Length; } } return 0; } #if !LOW_TRUST /// Reads all remaining chars into the given buffer. If the remaining stream /// content holds more than the given maximum number of chars, an exception will be thrown. internal unsafe static int ReadAllRemainingCharsFromStream(char* buffer, int maxCount, byte[] byteBuffer, int byteBufferIndex, int byteBufferCount, System.IO.Stream stream, long streamPosition, Decoder decoder, bool flush) { Debug.Assert(maxCount > 0 && byteBufferIndex >= 0 && byteBufferIndex < byteBufferCount); fixed (byte* pByteBuffer = byteBuffer) { int bufferCount = 0; for (;;) { try { bufferCount += decoder.GetChars(pByteBuffer + byteBufferIndex, byteBufferCount - byteBufferIndex, buffer + bufferCount, maxCount - bufferCount, flush); } catch (DecoderFallbackException e) { e.Data.Add("Stream.Position", streamPosition - (byteBufferCount - byteBufferIndex) + e.Index); throw; } if (flush) break; byteBufferIndex = 0; // GetChars consumed all bytes in the byte buffer byteBufferCount = stream.Read(byteBuffer, 0, byteBuffer.Length); streamPosition += byteBufferCount; flush = byteBufferCount == 0; } return bufferCount; } } #endif /// Returns a case-folded copy of the string argument. All chars are mapped /// using the (non-Turkic) 1-to-1 case folding mappings (v. 6.0) for Unicode code /// points in the Basic Multilingual Plane, i.e. code points below 0x10000. /// If the argument is null, null is returned. #if LOW_TRUST static public string FoldCase(string str) { char[] cftable = CaseFoldTable.FoldedChars; if (str != null) { for (int i = 0; i < str.Length; ++i) { char c = str[i]; char cfc = cftable[c]; if (c != cfc) { StringBuilder sb = new StringBuilder(str); sb[i++] = cfc; for (; i < str.Length; ++i) { c = str[i]; cfc = cftable[c]; if (c != cfc) sb[i] = cfc; } return sb.ToString(); } } } return str; } #else static unsafe public string FoldCase(string str) { if (str != null) { fixed (char* src0 = str) { char* end = src0 + str.Length; char* cftable = CaseFoldTable.FoldedChars; char* src = src0; for (;;) { // src is null-terminated, so we can always read one char char c = *src; if (c == cftable[c]) { if (++src >= end) break; } else { string newString = new String('\u0000', str.Length); fixed (char* dst_ = newString) { src = src0; char* dst = dst_; do { *dst = cftable[*src]; ++src; ++dst; } while (src != end); } return newString; } } } } return str; } #endif #if !LOW_TRUST unsafe #endif static public char FoldCase(char ch) { return CaseFoldTable.FoldedChars[ch]; } internal static int FindNewlineOrEOSChar(string str) { int i; for (i = 0; i < str.Length; ++i) { char c = str[i]; // '\n' = '\u000A', '\r' = '\u000D' if (unchecked(c - 0xEu) < 0xFFFFu - 0xEu) continue; if (c == '\n' || c == '\r' || c == '\uffff') goto Return; } i = -1; Return: return i; } /// Returns the given string with all occurrences of "\r\n" and "\r" replaced /// by "\n". If the argument is null, null is returned. #if LOW_TRUST static public string NormalizeNewlines(string str) { if (str == null || str.Length == 0) return str; int nCR = 0; int nCRLF = 0; for (int i = 0; i < str.Length; ++i) { if (str[i] == '\r') { if (i + 1 < str.Length && str[i + 1] == '\n') ++nCRLF; else ++nCR; } } if (nCRLF == 0) { return nCR == 0 ? str : str.Replace('\r', '\n'); } else { return CopyWithNormalizedNewlines(str, 0, str.Length, nCRLF, nCR); } } static internal string CopyWithNormalizedNewlines(string src, int index, int length, int nCRLF, int nCR) { Debug.Assert(length > 0 && nCRLF >= 0 && nCR >= 0 && (nCRLF | nCR) != 0); if (nCRLF != 0) { StringBuilder sb = new StringBuilder(length - nCRLF); int end = index + length; int i0 = index; if (nCR == 0) { int nn = nCRLF; int i = index; for (;;) { char c = src[i++]; if (c == '\r') { sb.Append(src, i0, i - i0 - 1).Append('\n'); ++i; // skip over the '\n' in "\r\n" i0 = i; if (--nn == 0) break; } } } else { int nn = nCRLF + nCR; int i = index; for (;;) { char c = src[i++]; if (c == '\r') { sb.Append(src, i0, i - i0 - 1).Append('\n'); if (i < end && src[i] == '\n') ++i; // skip over the '\n' in "\r\n" i0 = i; if (--nn == 0) break; } } } if (i0 < end) sb.Append(src, i0, end - i0); return sb.ToString(); } else { return new StringBuilder(src, index, length, length).Replace('\r', '\n').ToString(); } } #else static unsafe public string NormalizeNewlines(string str) { int length; if (str == null || (length = str.Length) == 0) return str; fixed (char* src = str) { // the char buffer is guaranteed to be null-terminated (C# language specification on fixed statement) int nCR = 0; int nCRLF = 0; for (int i = 0; i < length; ++i) { if (src[i] == '\r') { if (src[i + 1] == '\n') ++nCRLF; // relies on null-termination else ++nCR; } } if (nCRLF == 0) { return nCR == 0 ? str : str.Replace('\r', '\n'); } else { return CopyWithNormalizedNewlines(src, length, nCRLF, nCR); } } } static unsafe internal string CopyWithNormalizedNewlines(char* src, int length, int nCRLF, int nCR) { Debug.Assert(length > 0 && nCRLF >= 0 && nCR >= 0 && (nCRLF | nCR) != 0); string newString = new String('\n', length - nCRLF); fixed (char* dst_ = newString) { char* dst = dst_; char* end = src + length; if (nCRLF != 0) { if (nCR == 0) { int nn = nCRLF; for (;;) { char c = *src; ++src; if (c != '\r') { *dst = c; ++dst; } else { ++src; // skip over the '\n' in "\r\n" *dst = '\n'; ++dst; if (--nn == 0) break; } } } else { int nn = nCRLF + nCR; for (;;) { char c = *src; ++src; if (c != '\r') { *dst = c; ++dst; } else { if (*src == '\n') ++src; // skip over the '\n' in "\r\n" (relies on null-termination) *dst = '\n'; ++dst; if (--nn == 0) break; } } } } else { int nn = nCR; for (;;) { char c = *src; ++src; if (c != '\r') { *dst = c; ++dst; } else { *dst = '\n'; ++dst; if (--nn == 0) break; } } } // copy remaining chars #if UNALIGNED_READS if (src != end) { uint len = Buffer.PositiveDistance(src, end); if ((unchecked((int)dst) & 2) != 0) { // align dest *dst = *src; ++src; ++dst; --len; } while (len >= 8) { ((int*)dst)[0] = ((int*)src)[0]; ((int*)dst)[1] = ((int*)src)[1]; ((int*)dst)[2] = ((int*)src)[2]; ((int*)dst)[3] = ((int*)src)[3]; src += 8; dst += 8; len -= 8; } if ((len & 4) != 0) { ((int*)dst)[0] = ((int*)src)[0]; ((int*)dst)[1] = ((int*)src)[1]; src += 4; dst += 4; } if ((len & 2) != 0) { ((int*)dst)[0] = ((int*)src)[0]; src += 2; dst += 2; } if ((len & 1) != 0) { *dst = *src; } } #else while (src < end) { *dst = *src; ++src; ++dst; } #endif } return newString; } #endif /// Returns System.Globalization.StringInfo(str).LengthInTextElements public static int CountTextElements(string str) { #if NET int count = 0, index = 0; while (index < str.Length) { index += System.Globalization.StringInfo.GetNextTextElementLength(str, index); count++; } return count; #else return new System.Globalization.StringInfo(str).LengthInTextElements; #endif } [Obsolete("Use System.Char.IsSurrogate instead.")] public static bool IsSurrogate(char ch) => char.IsSurrogate(ch); [Obsolete("Use System.Char.IsHighSurrogate instead.")] public static bool IsHighSurrogate(char ch) => char.IsHighSurrogate(ch); [Obsolete("Use System.Char.IsLowSurrogate instead.")] public static bool IsLowSurrogate(char ch) => char.IsLowSurrogate(ch); #if LOW_TRUST public static bool IsWhitespace(char ch) { return System.Char.IsWhiteSpace(ch); } #else internal unsafe static class IsWhitespaceHelper { // we use the same data structure and algorithm as for IdentifierValidator private static ReadOnlySpan DataArray => new byte[] { 0,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,3,1,1,1, 1,1,1,1,4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,0,1,2,2,3,1,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,4,5,6,2, 2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,0,62,0,0,1,0,0,0,0,0,0,0, 32,0,0,0,255,7,0,0,0,131,0,0,0,0,0,128, }; private const int Table1Offset = 0; private const int Table1Size = 128; private const int Table1Log2Length = 7; private const int Table2Offset = 128; private const int Table2Size = 80; private const int Table2Log2BlockLength = 4; private const int Table3Offset = Table2Offset + Table2Size; private const int Table3Size = 28; private const int Table3Log2BlockLength = 5; #if LOW_TRUST private static ReadOnlySpan Table1 => DataArray.Slice(0, Table2Offset); private static ReadOnlySpan Table2 => DataArray.Slice(Table2Offset, Table2Size); private static readonly uint[] Table3 = Buffer.CopyUIntsStoredInLittleEndianByteArray(DataArray, Table3Offset, Table3Size); #else private static byte* Data => (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(DataArray)); private static byte* Table1 => Data + Table1Offset; private static byte* Table2 => Data + Table2Offset; private static readonly uint* Table3 = Buffer.LoadLittleEndianUInt32Data(Data, Table3Offset, Table3Size); #endif public static uint IsWhitespace_(char ch) { int cp = ch; int idx1 = cp >> (Table2Log2BlockLength + Table3Log2BlockLength); const int f2 = 1 << Table2Log2BlockLength; const int m2 = f2 - 1; int idx2 = Table1[idx1]*f2 + ((cp >> Table3Log2BlockLength) & m2); int idx3 = Table2[idx2]; return Table3[idx3] >> (cp /* & 0x1fu */); // C#'s operator>> masks with 0x1fu, no matter whether we do too } } /// A faster implementation of System.Char.IsWhiteSpace. public static bool IsWhitespace(char ch) { // should get inlined return (IsWhitespaceHelper.IsWhitespace_(ch) & 1u) != 0; } #endif internal static string HexEscape(char c) { Span cs = stackalloc char[6]; cs[0] = '\\'; cs[1] = 'u'; int n = c; for (int j = 5; j > 1; --j) { cs[j] = "0123456789abcdef"[n & 0xf]; n >>= 4; } return cs.ToString(); } internal static string EscapeChar(char c) { switch (c) { case '\\': return "\\\\"; case '\'': return "\\\'"; case '\"': return "\\\""; case '\r': return "\\r"; case '\n': return "\\n"; case '\t': return "\\t"; case '\f': return "\\f"; case '\v': return "\\v"; case '\a': return "\\a"; case '\b': return "\\b"; default: return HexEscape(c); } } internal static string Concat(string str0, string str1, string str2, string str3, string str4) { #if !NET return str0 + str1 + str2 + str3 + str4; #else int length = str0.Length + str1.Length + str2.Length + str3.Length + str4.Length; return string.Create(length, (str0, str1, str2, str3, str4), static (span, x) => { int i = 0; x.str0.CopyTo(span[i..]); i += x.str0.Length; x.str1.CopyTo(span[i..]); i += x.str1.Length; x.str2.CopyTo(span[i..]); i += x.str2.Length; x.str3.CopyTo(span[i..]); i += x.str3.Length; x.str4.CopyTo(span[i..]); }); #endif } internal static string Escape(string str, string prefix1, string prefix2, string postfix1, string postfix2, char escapedQuoteChar) { Debug.Assert(str != null && prefix1 != null && prefix2 != null && postfix1 != null && postfix2 != null); StringBuilder sb = null; int i0 = 0; int i = 0; for (;;) { if (i >= str.Length) break; char c = str[i]; ++i; if (c > '\'' && c < '\u007f') { if (c != '\\') continue; } else if (c == ' ' || ( !Char.IsControl(c) && c != escapedQuoteChar && (c < '\u2028' || c > '\u2029'))) continue; if (sb is null) { sb = new StringBuilder(str.Length + prefix1.Length + prefix2.Length + postfix1.Length + postfix2.Length + 8); sb.Append(prefix1).Append(prefix2); } int n = i - i0 - 1; if (n != 0) sb.Append(str, i0, n); i0 = i; sb.Append(EscapeChar(c)); } if (sb is null) return Concat(prefix1, prefix2, str, postfix1, postfix2); if (i0 != i) sb.Append(str, i0, i - i0); return sb.Append(postfix1).Append(postfix2).ToString(); } internal static string AsciiEscape(string str, string prefix1, string prefix2, string postfix1, string postfix2, char escapedQuoteChar) { Debug.Assert(str != null && prefix1 != null && prefix2 != null && postfix1 != null && postfix2 != null); StringBuilder sb = null; int i0 = 0; int i = 0; for (;;) { if (i >= str.Length) break; char c = str[i]; ++i; if (c > '\'' && c < '\u007f') { if (c != '\\') continue; } else if (c == ' ' || (c >= ' ' && c <= '\'' && c != escapedQuoteChar)) continue; if (sb is null) { sb = new StringBuilder(str.Length + prefix1.Length + prefix2.Length + postfix1.Length + postfix2.Length + 8); sb.Append(prefix1).Append(prefix2); } int n = i - i0 - 1; if (n != 0) sb.Append(str, i0, n); i0 = i; sb.Append(EscapeChar(c)); } if (sb is null) return Concat(prefix1, prefix2, str, postfix1, postfix2); if (i0 != i) sb.Append(str, i0, i - i0); return sb.Append(postfix1).Append(postfix2).ToString(); } internal static string SingleQuote(string str) { return Escape(str, "", "'", "'", "", '\''); } internal static string SingleQuote(string prefix, string str, string postfix) { return Escape(str, prefix, "'", "'", postfix, '\''); } internal static string DoubleQuote(string str) { return Escape(str, "", "\"", "\"", "", '"'); } internal static string DoubleQuote(string prefix, string str, string postfix) { return Escape(str, prefix, "\"", "\"", postfix, '"'); } } // class Text } ================================================ FILE: NuGet.config ================================================ ================================================ FILE: Samples/Calculator/Calculator-LowTrust.fsproj ================================================ net6 ================================================ FILE: Samples/Calculator/Calculator.fsproj ================================================ Calculator Calculator net6 ================================================ FILE: Samples/Calculator/Calculator.targets ================================================ Calculator Calculator Exe false ================================================ FILE: Samples/Calculator/InterpLexYacc-LowTrust.fsproj ================================================  net6 ================================================ FILE: Samples/Calculator/calculator.fs ================================================  // Copyright (c) Stephan Tolksdorf 2007-2011 // License: Simplified BSD License. See accompanying documentation. // the parser definition //////////////////////// open FParsec let ws = spaces // skips any whitespace let str_ws s = pstring s >>. ws // we calculate with double precision floats let number = pfloat .>> ws // we set up an operator precedence parser for parsing the arithmetic expressions let opp = new OperatorPrecedenceParser() let expr = opp.ExpressionParser opp.TermParser <- number <|> between (str_ws "(") (str_ws ")") expr // operator definitions follow the schema // operator type, string, trailing whitespace parser, precedence, associativity, function to apply opp.AddOperator(InfixOperator("+", ws, 1, Associativity.Left, (+))) opp.AddOperator(InfixOperator("-", ws, 1, Associativity.Left, (-))) opp.AddOperator(InfixOperator("*", ws, 2, Associativity.Left, (*))) opp.AddOperator(InfixOperator("/", ws, 2, Associativity.Left, (/))) opp.AddOperator(InfixOperator("^", ws, 3, Associativity.Right, fun x y -> System.Math.Pow(x, y))) opp.AddOperator(PrefixOperator("-", ws, 4, true, fun x -> -x)) // we also want to accept the operators "exp" and "log", but we don't want to accept // expressions like "logexp" 2, so we require that non-symbolic operators are not // followed by letters let ws1 = nextCharSatisfiesNot isLetter >>. ws opp.AddOperator(PrefixOperator("log", ws1, 4, true, System.Math.Log)) opp.AddOperator(PrefixOperator("exp", ws1, 4, true, System.Math.Exp)) let completeExpression = ws >>. expr .>> eof // we append the eof parser to make // sure all input is consumed // running and testing the parser ///////////////////////////////// let calculate s = run completeExpression s let equals expectedValue r = match r with | Success (v, _, _) when v = expectedValue -> () | Success (v, _, _) -> failwith "Math is hard, let's go shopping!" | Failure (msg, err, _) -> printf "%s" msg; failwith msg let test() = calculate "10.5 + 123.25 + 877" |> equals 1010.75 calculate "10/2 + 123.125 + 877" |> equals 1005.125 calculate "(123 + log 1 + 877) * 9/3" |> equals 3000. calculate " ( ( exp 0 + (6 / ( 1 +2 ) )- 123456 )/ 2+123 + 877) * 3^2 / 3" |> equals (-182179.5) printfn "No errors" // currently the program only executes some tests do test() ================================================ FILE: Samples/FSharpParsingSample/FParsecVersion/InterpFParsec-LowTrust.fsproj ================================================  net6 ================================================ FILE: Samples/FSharpParsingSample/FParsecVersion/InterpFParsec.fsproj ================================================  net6 ================================================ FILE: Samples/FSharpParsingSample/FParsecVersion/InterpFParsec.targets ================================================  InterpFParsec InterpFParsec Exe false $(MSBuildProjectDirectory)/../LexYaccVersion/test.lang ================================================ FILE: Samples/FSharpParsingSample/FParsecVersion/main.fs ================================================ // Copyright (c) Stephan Tolksdorf 2008 // License: Simplified BSD License. See accompanying documentation. // This is a port of the parsing sample that came with the F# 1.9.4.19 // distribution. open FParsec open Ast [] let main(argv: string[]) = if argv.Length <> 1 then printf "usage: interp.exe \n" exit 1 // Run the parser prog on the file path in argv[0] // If the file has no byte order marks, System.Text.Encoding.Default // is assumed to be the encoding. let fileName = argv[0] let result = runParserOnFile Parser.prog () fileName System.Text.Encoding.UTF8 let myProg = match result with | Success (v, _, _) -> v | Failure (msg, _, _) -> System.Console.WriteLine(msg) exit 1 // count statements printf "#stmts = %d\n" (List.length (match myProg with Prog l -> l)); printf "running program...\n"; Interp.prog myProg 0 ================================================ FILE: Samples/FSharpParsingSample/FParsecVersion/parser.fs ================================================ // Copyright (c) Stephan Tolksdorf 2008. // License: Simplified BSD License. See accompanying documentation. // Compare this parser implementation with the implementation in ../LexYaccVersion. module Parser open System open System.Collections.Generic open FParsec open Ast // some lexical definitions /////////////////////////// let ws = spaces // skips any whitespace let str s = pstring s >>. ws // identifiers are strings of lower ascii chars that are not keywords let identifierString = many1Satisfy isLower .>> ws // [a-z]+ let keywords = ["while"; "begin"; "end"; "do"; "if"; "then"; "else"; "print"; "decr"] let keywordsSet = new HashSet(keywords) let isKeyword str = keywordsSet.Contains(str) //open FParsec.StaticMapping //let isKeyword = createStaticStringMapping false [for kw in keywords -> (kw, true)] let identifier : Parser = let expectedIdentifier = expected "identifier" fun stream -> let state = stream.State let reply = identifierString stream if reply.Status <> Ok || not (isKeyword reply.Result) then reply else // result is keyword, so backtrack to before the string stream.BacktrackTo(state) Reply(Error, expectedIdentifier) let numberFormat = NumberLiteralOptions.AllowMinusSign ||| NumberLiteralOptions.AllowFraction ||| NumberLiteralOptions.AllowExponent let numberLit = numberLiteral numberFormat "number" .>> ws // parsers for the original grammar productions /////////////////////////////////////////////// let pval = identifier |>> Val let number = numberLit |>> fun nl -> // an overflow will throw an exception, as in the original sample if nl.IsInteger then Int (int32 nl.String) else Float (float nl.String) // expr and decr are mutually recursive grammar grammar productions. // In order to break the cyclic dependency, we make expr a parser that // forwards all calls to a parser in a reference cell. let expr, exprRef = createParserForwardedToRef() // initially exprRef holds a reference to a dummy parser let pdecr = str "decr" >>. str "(" >>. expr .>> str ")" |>> Decr // replace dummy parser reference in exprRef do exprRef:= choice [pval; pdecr; number] // we need to try pval first, so we don't // accidentally try to parse an identifier // starting with "decr..." as a Decr statement // (this is a disadvantage of not having a tokenizer) let stmt, stmtRef = createParserForwardedToRef() let stmtList = sepBy1 stmt (str ";") let assign = pipe2 identifier (str ":=" >>. expr) (fun id e -> Assign(id, e)) let print = str "print" >>. expr |>> Print let pwhile = pipe2 (str "while" >>. expr) (str "do" >>. stmt) (fun e s -> While(e, s)) let seq = str "begin" >>. stmtList .>> str "end" |>> Seq let ifthen = pipe3 (str "if" >>. expr) (str "then" >>. stmt) (opt (str "else" >>. stmt)) (fun e s1 optS2 -> match optS2 with | None -> IfThen(e, s1) | Some s2 -> IfThenElse(e, s1, s2)) do stmtRef:= choice [assign; ifthen; pwhile; seq; print] // try assign first, so that an // identifier starting with a // keyword doesn't trigger an error let prog = ws >>. stmtList .>> eof |>> Prog ================================================ FILE: Samples/FSharpParsingSample/LexYaccVersion/Doc.html ================================================ The F# Parsing Sample

The F# Parsing Sample

Links: Up

This sample shows how to write a simple parser and lexer using F#, and the tools fslex and fsyacc in particular.

The sample is made up of the F# type declarations for the Abstract Syntax Tree types in ast.fs, the definition of the lexer in lex.fsl, the definition of the token types and parser in pars.fsy , an interpeter for the language in interp.fs and an F# driver program that plugs the parser and the interpreter together, in main.fs.

Suggested Exercises

  • Add an extra construct to the abstract syntax in ast.fs, e.g. an addition node for expressions. Define the semantics for the expression in interp.fs. Then add appropriate tokens and rules in the parser and lexer.
================================================ FILE: Samples/FSharpParsingSample/LexYaccVersion/InterpLexYacc.fsproj ================================================  net6 Exe false $(MSBuildProjectDirectory)/test.lang --unicode --module Parser ================================================ FILE: Samples/FSharpParsingSample/LexYaccVersion/ast.fs ================================================ // Copyright (c) Microsoft Corporation 2005-2006. // This sample code is provided "as is" without warranty of any kind. // We disclaim all warranties, either express or implied, including the // warranties of merchantability and fitness for a particular purpose. module Ast type Expr = | Val of string | Int of int | Float of float | Decr of Expr type Stmt = | Assign of string * Expr | While of Expr * Stmt | Seq of Stmt list | IfThen of Expr * Stmt | IfThenElse of Expr * Stmt * Stmt | Print of Expr type Prog = Prog of Stmt list ================================================ FILE: Samples/FSharpParsingSample/LexYaccVersion/interp.fs ================================================  // Original code: // Copyright (c) Microsoft Corporation 2005-2006. // This sample code is provided "as is" without warranty of any kind. // We disclaim all warranties, either express or implied, including the // warranties of merchantability and fitness for a particular purpose. // Modifications: // Copyright (c) Stephan Tolksdorf 2015. // License: Simplified BSD License. See accompanying documentation. module Interp open Ast open System.Collections.Generic type Value = INT of int | FLOAT of float type State = Dictionary let printVal os v = match v with | INT n -> Printf.fprintf os "%d" n | FLOAT f -> Printf.fprintf os "%g" f let rec prog (Prog l ) = stmts (new Dictionary<_,_>()) l and stmts s l = List.iter (stmt s) l and stmt (s: State) st = match st with | Assign (a,b) -> s[a] <- expr s b | While (a,b) -> while expr s a <> INT 0 do stmt s b | Seq l -> stmts s l | IfThen (g,t) -> if (expr s g <> INT 0) then stmt s t | IfThenElse (g,t,e) -> if (expr s g <> INT 0) then stmt s t else stmt s e | Print (e) -> Printf.printf "--> %a\n" printVal (expr s e) stdout.Flush() and expr (s: State) e = match e with | Val n -> match s.TryGetValue(n) with | true, v -> v | false, _ -> Printf.eprintf "warning: location %s not defined\n" n; INT 0 | Expr.Int n -> INT n | Expr.Float f -> FLOAT f | Decr e2 -> match expr s e2 with | INT n -> INT (n-1) | FLOAT f -> failwith "cannot decrement a float" ================================================ FILE: Samples/FSharpParsingSample/LexYaccVersion/lex.fs ================================================ # 13 "lex.fsl" module Lex open FSharp.Text.Lexing open Parser let lexeme = LexBuffer.LexemeString // Fslex generated parsers follow the same pattern as OCamllex // and Mossmllex generated parsers, and do not update line number // information automatically, partly because the knowledge of when // a newline has occured is best placed in the lexer rules. // Thus the following boiler-plate code is very useful: let newline (lexbuf: LexBuffer<_>) = lexbuf.StartPos <- lexbuf.StartPos.NextLine # 20 "lex.fs" let trans : uint16[] array = [| (* State 0 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 1us; 2us; 65535us; 65535us; 3us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 1us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 11us; 12us; 65535us; 65535us; 65535us; 17us; 65535us; 65535us; 16us; 16us; 16us; 16us; 16us; 16us; 16us; 16us; 16us; 16us; 14us; 13us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 15us; 5us; 15us; 7us; 6us; 15us; 15us; 15us; 8us; 15us; 15us; 15us; 15us; 15us; 15us; 10us; 15us; 15us; 15us; 9us; 15us; 15us; 4us; 15us; 15us; 15us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 18us; |]; (* State 1 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 2 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 3 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 2us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 4 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 49us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 5 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 45us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 6 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 41us; 26us; 40us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 7 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 37us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 36us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 8 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 35us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 9 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 32us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 10 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 28us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 11 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 12 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 13 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 14 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 27us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 15 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 16 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 21us; 65535us; 19us; 19us; 19us; 19us; 19us; 19us; 19us; 19us; 19us; 19us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 20us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 20us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 17 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 16us; 16us; 16us; 16us; 16us; 16us; 16us; 16us; 16us; 16us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 18 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 19 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 21us; 65535us; 19us; 19us; 19us; 19us; 19us; 19us; 19us; 19us; 19us; 19us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 20us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 20us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 20 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 24us; 24us; 24us; 24us; 24us; 24us; 24us; 24us; 24us; 24us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 21 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 22us; 22us; 22us; 22us; 22us; 22us; 22us; 22us; 22us; 22us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 22 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 23us; 23us; 23us; 23us; 23us; 23us; 23us; 23us; 23us; 23us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 20us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 20us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 23 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 23us; 23us; 23us; 23us; 23us; 23us; 23us; 23us; 23us; 23us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 20us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 20us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 24 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 25us; 25us; 25us; 25us; 25us; 25us; 25us; 25us; 25us; 25us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 25 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 25us; 25us; 25us; 25us; 25us; 25us; 25us; 25us; 25us; 25us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 26 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 27 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 28 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 29us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 29 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 30us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 30 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 31us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 31 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 32 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 33us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 33 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 34us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 34 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 35 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 36 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 37 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 38us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 38 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 39us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 39 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 40 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 44us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 41 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 42us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 42 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 43us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 43 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 44 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 45 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 46us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 46 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 47us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 47 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 48us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 48 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 49 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 50us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 50 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 51us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 51 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 52us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; (* State 52 *) [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 26us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; |] let actions : uint16[] = [|65535us; 0us; 1us; 65535us; 15us; 15us; 15us; 15us; 15us; 15us; 15us; 11us; 12us; 13us; 65535us; 15us; 16us; 65535us; 18us; 16us; 65535us; 65535us; 17us; 17us; 17us; 17us; 15us; 14us; 15us; 15us; 15us; 9us; 15us; 15us; 7us; 6us; 5us; 15us; 15us; 10us; 15us; 15us; 15us; 8us; 4us; 15us; 15us; 15us; 3us; 15us; 15us; 15us; 2us; |] let _fslex_tables = FSharp.Text.Lexing.UnicodeTables.Create(trans,actions) let rec _fslex_dummy () = _fslex_dummy() // Rule token and token lexbuf = match _fslex_tables.Interpret(0,lexbuf) with | 0 -> ( # 54 "lex.fsl" token lexbuf # 139 "lex.fs" ) | 1 -> ( # 55 "lex.fsl" newline lexbuf; token lexbuf # 144 "lex.fs" ) | 2 -> ( # 56 "lex.fsl" WHILE # 149 "lex.fs" ) | 3 -> ( # 57 "lex.fsl" BEGIN # 154 "lex.fs" ) | 4 -> ( # 58 "lex.fsl" END # 159 "lex.fs" ) | 5 -> ( # 59 "lex.fsl" DO # 164 "lex.fs" ) | 6 -> ( # 60 "lex.fsl" IF # 169 "lex.fs" ) | 7 -> ( # 61 "lex.fsl" THEN # 174 "lex.fs" ) | 8 -> ( # 62 "lex.fsl" ELSE # 179 "lex.fs" ) | 9 -> ( # 63 "lex.fsl" PRINT # 184 "lex.fs" ) | 10 -> ( # 64 "lex.fsl" DECR # 189 "lex.fs" ) | 11 -> ( # 65 "lex.fsl" LPAREN # 194 "lex.fs" ) | 12 -> ( # 66 "lex.fsl" RPAREN # 199 "lex.fs" ) | 13 -> ( # 67 "lex.fsl" SEMI # 204 "lex.fs" ) | 14 -> ( # 68 "lex.fsl" ASSIGN # 209 "lex.fs" ) | 15 -> ( # 70 "lex.fsl" ID(lexeme lexbuf) # 214 "lex.fs" ) | 16 -> ( # 72 "lex.fsl" INT (int32 (lexeme lexbuf)) # 219 "lex.fs" ) | 17 -> ( # 74 "lex.fsl" FLOAT (float (lexeme lexbuf)) # 224 "lex.fs" ) | 18 -> ( # 75 "lex.fsl" EOF # 229 "lex.fs" ) | _ -> failwith "token" # 3000000 "lex.fs" ================================================ FILE: Samples/FSharpParsingSample/LexYaccVersion/lex.fsl ================================================ // Copyright (c) Microsoft Corporation 2005-2006. // This sample code is provided "as is" without warranty of any kind. // We disclaim all warranties, either express or implied, including the // warranties of merchantability and fitness for a particular purpose. // // This file is a sample lexer specification for use with F# Lex (fslex.exe). //-------------------------------------------------------------- // Part I. Supporting F# definitions. Everything between braces // is F# code added to the generated file. { module Lex open FSharp.Text.Lexing open Parser let lexeme = LexBuffer.LexemeString // Fslex generated parsers follow the same pattern as OCamllex // and Mossmllex generated parsers, and do not update line number // information automatically, partly because the knowledge of when // a newline has occured is best placed in the lexer rules. // Thus the following boiler-plate code is very useful: let newline (lexbuf: LexBuffer<_>) = lexbuf.StartPos <- lexbuf.StartPos.NextLine } //-------------------------------------------------------------- // Part II. Define some regular expressions // // These are some regular expression definitions let digit = ['0'-'9'] let whitespace = [' ' '\t' ] let newline = ('\n' | '\r' '\n') //-------------------------------------------------------------- // Part III. Token generators and rules. // // These are the rules specifying the tokens matched by the lexer // // This lexer has only one generator ('token') // // Generator definitions can take arguments, e.g. // // rule token arg1 arg2 = parse ... rule token = parse | whitespace { token lexbuf } // keep lexing! | newline { newline lexbuf; token lexbuf } // record line break and keep lexing! | "while" { WHILE } // return a token! | "begin" { BEGIN } // ... | "end" { END } | "do" { DO } | "if" { IF } | "then" { THEN } | "else" { ELSE } | "print" { PRINT } | "decr" { DECR } | "(" { LPAREN } | ")" { RPAREN } | ";" { SEMI } | ":=" { ASSIGN } // ... | ['a'-'z']+ { ID(lexeme lexbuf) } // return a token carrying data! | ['-']?digit+ { INT (int32 (lexeme lexbuf)) } // return a token carrying data! | ['-']?digit+('.'digit+)?(['e''E']digit+)? { FLOAT (float (lexeme lexbuf)) } | eof { EOF } // return a token for the EOF // // Additional generator definitions would go here, e.g. // // and comment arg1 arg2 = parse ... // // and string arg1 arg2 = parse ... ================================================ FILE: Samples/FSharpParsingSample/LexYaccVersion/main.fs ================================================  // Original code: // Copyright (c) Microsoft Corporation 2005-2006. // This sample code is provided "as is" without warranty of any kind. // We disclaim all warranties, either express or implied, including the // warranties of merchantability and fitness for a particular purpose. // Modifications: // Copyright (c) Stephan Tolksdorf 2015. // License: Simplified BSD License. See accompanying documentation. // This program uses FsLex and FsYacc: // http://fsprojects.github.io/FsLexYacc/ open Ast open Printf open FSharp.Text.Lexing [] let main(argv: string[]) = if argv.Length <> 1 then printf "usage: interp.exe \n" exit 1 let stream = new System.IO.StreamReader(argv[0], System.Text.Encoding.UTF8) let myProg = // Create the lexer, presenting the bytes to the lexer as ASCII regardless of the original // encoding of the stream (the lexer specification // is designed to consume ASCII) let lexbuf = LexBuffer.FromTextReader(stream) // Call the parser try Parser.start Lex.token lexbuf with e -> let pos = lexbuf.EndPos printf "error near line %d, character %d\n%s\n" pos.Line pos.Column (e.ToString()); exit 1 // Now look at the resulting AST, e.g. count the number of top-level // statements, and then the overall number of nodes. printf "#stmts = %d\n" (List.length (match myProg with Prog l -> l)); printf "running program...\n"; Interp.prog myProg 0 ================================================ FILE: Samples/FSharpParsingSample/LexYaccVersion/pars.fs ================================================ // Implementation file for parser generated by fsyacc module Parser #nowarn "64";; // turn off warnings that type variables used in production annotations are instantiated to concrete type open FSharp.Text.Lexing open FSharp.Text.Parsing.ParseHelpers # 10 "pars.fsy" // This prelude is F# code that is available throughout this file. In this // case we just open a module to reveal some datatype definitions. open Ast # 14 "pars.fs" // This type is the type of tokens accepted by the parser type token = | DECR | LPAREN | RPAREN | WHILE | DO | END | BEGIN | IF | THEN | ELSE | PRINT | SEMI | ASSIGN | EOF | FLOAT of (System.Double) | INT of (System.Int32) | ID of (string) // This type is used to give symbolic names to token indexes, useful for error messages type tokenId = | TOKEN_DECR | TOKEN_LPAREN | TOKEN_RPAREN | TOKEN_WHILE | TOKEN_DO | TOKEN_END | TOKEN_BEGIN | TOKEN_IF | TOKEN_THEN | TOKEN_ELSE | TOKEN_PRINT | TOKEN_SEMI | TOKEN_ASSIGN | TOKEN_EOF | TOKEN_FLOAT | TOKEN_INT | TOKEN_ID | TOKEN_end_of_input | TOKEN_error // This type is used to give symbolic names to token indexes, useful for error messages type nonTerminalId = | NONTERM__startstart | NONTERM_start | NONTERM_Prog | NONTERM_Expr | NONTERM_Stmt | NONTERM_StmtList // This function maps tokens to integer indexes let tagOfToken (t:token) = match t with | DECR -> 0 | LPAREN -> 1 | RPAREN -> 2 | WHILE -> 3 | DO -> 4 | END -> 5 | BEGIN -> 6 | IF -> 7 | THEN -> 8 | ELSE -> 9 | PRINT -> 10 | SEMI -> 11 | ASSIGN -> 12 | EOF -> 13 | FLOAT _ -> 14 | INT _ -> 15 | ID _ -> 16 // This function maps integer indexes to symbolic token ids let tokenTagToTokenId (tokenIdx:int) = match tokenIdx with | 0 -> TOKEN_DECR | 1 -> TOKEN_LPAREN | 2 -> TOKEN_RPAREN | 3 -> TOKEN_WHILE | 4 -> TOKEN_DO | 5 -> TOKEN_END | 6 -> TOKEN_BEGIN | 7 -> TOKEN_IF | 8 -> TOKEN_THEN | 9 -> TOKEN_ELSE | 10 -> TOKEN_PRINT | 11 -> TOKEN_SEMI | 12 -> TOKEN_ASSIGN | 13 -> TOKEN_EOF | 14 -> TOKEN_FLOAT | 15 -> TOKEN_INT | 16 -> TOKEN_ID | 19 -> TOKEN_end_of_input | 17 -> TOKEN_error | _ -> failwith "tokenTagToTokenId: bad token" /// This function maps production indexes returned in syntax errors to strings representing the non terminal that would be produced by that production let prodIdxToNonTerminal (prodIdx:int) = match prodIdx with | 0 -> NONTERM__startstart | 1 -> NONTERM_start | 2 -> NONTERM_Prog | 3 -> NONTERM_Expr | 4 -> NONTERM_Expr | 5 -> NONTERM_Expr | 6 -> NONTERM_Expr | 7 -> NONTERM_Stmt | 8 -> NONTERM_Stmt | 9 -> NONTERM_Stmt | 10 -> NONTERM_Stmt | 11 -> NONTERM_Stmt | 12 -> NONTERM_Stmt | 13 -> NONTERM_StmtList | 14 -> NONTERM_StmtList | _ -> failwith "prodIdxToNonTerminal: bad production index" let _fsyacc_endOfInputTag = 19 let _fsyacc_tagOfErrorTerminal = 17 // This function gets the name of a token as a string let token_to_string (t:token) = match t with | DECR -> "DECR" | LPAREN -> "LPAREN" | RPAREN -> "RPAREN" | WHILE -> "WHILE" | DO -> "DO" | END -> "END" | BEGIN -> "BEGIN" | IF -> "IF" | THEN -> "THEN" | ELSE -> "ELSE" | PRINT -> "PRINT" | SEMI -> "SEMI" | ASSIGN -> "ASSIGN" | EOF -> "EOF" | FLOAT _ -> "FLOAT" | INT _ -> "INT" | ID _ -> "ID" // This function gets the data carried by a token as an object let _fsyacc_dataOfToken (t:token) = match t with | DECR -> (null : System.Object) | LPAREN -> (null : System.Object) | RPAREN -> (null : System.Object) | WHILE -> (null : System.Object) | DO -> (null : System.Object) | END -> (null : System.Object) | BEGIN -> (null : System.Object) | IF -> (null : System.Object) | THEN -> (null : System.Object) | ELSE -> (null : System.Object) | PRINT -> (null : System.Object) | SEMI -> (null : System.Object) | ASSIGN -> (null : System.Object) | EOF -> (null : System.Object) | FLOAT _fsyacc_x -> Microsoft.FSharp.Core.Operators.box _fsyacc_x | INT _fsyacc_x -> Microsoft.FSharp.Core.Operators.box _fsyacc_x | ID _fsyacc_x -> Microsoft.FSharp.Core.Operators.box _fsyacc_x let _fsyacc_gotos = [| 0us; 65535us; 1us; 65535us; 0us; 1us; 1us; 65535us; 0us; 2us; 5us; 65535us; 8us; 9us; 12us; 13us; 14us; 15us; 21us; 22us; 27us; 28us; 6us; 65535us; 0us; 29us; 16us; 17us; 18us; 29us; 23us; 24us; 25us; 26us; 30us; 31us; 2us; 65535us; 0us; 3us; 18us; 19us; |] let _fsyacc_sparseGotoTableRowOffsets = [|0us; 1us; 3us; 5us; 11us; 18us; |] let _fsyacc_stateToProdIdxsTableElements = [| 1us; 0us; 1us; 0us; 1us; 1us; 2us; 2us; 14us; 1us; 3us; 1us; 4us; 1us; 5us; 1us; 6us; 1us; 6us; 1us; 6us; 1us; 6us; 1us; 7us; 1us; 7us; 1us; 7us; 1us; 8us; 1us; 8us; 1us; 8us; 1us; 8us; 1us; 9us; 2us; 9us; 14us; 1us; 9us; 2us; 10us; 11us; 2us; 10us; 11us; 2us; 10us; 11us; 2us; 10us; 11us; 1us; 11us; 1us; 11us; 1us; 12us; 1us; 12us; 1us; 13us; 1us; 14us; 1us; 14us; |] let _fsyacc_stateToProdIdxsTableRowOffsets = [|0us; 2us; 4us; 6us; 9us; 11us; 13us; 15us; 17us; 19us; 21us; 23us; 25us; 27us; 29us; 31us; 33us; 35us; 37us; 39us; 42us; 44us; 47us; 50us; 53us; 56us; 58us; 60us; 62us; 64us; 66us; 68us; |] let _fsyacc_action_rows = 32 let _fsyacc_actionTableElements = [|5us; 32768us; 3us; 14us; 6us; 18us; 7us; 21us; 10us; 27us; 16us; 11us; 0us; 49152us; 0us; 16385us; 1us; 16386us; 11us; 30us; 0us; 16387us; 0us; 16388us; 0us; 16389us; 1us; 32768us; 1us; 8us; 4us; 32768us; 0us; 7us; 14us; 6us; 15us; 5us; 16us; 4us; 1us; 32768us; 2us; 10us; 0us; 16390us; 1us; 32768us; 12us; 12us; 4us; 32768us; 0us; 7us; 14us; 6us; 15us; 5us; 16us; 4us; 0us; 16391us; 4us; 32768us; 0us; 7us; 14us; 6us; 15us; 5us; 16us; 4us; 1us; 32768us; 4us; 16us; 5us; 32768us; 3us; 14us; 6us; 18us; 7us; 21us; 10us; 27us; 16us; 11us; 0us; 16392us; 5us; 32768us; 3us; 14us; 6us; 18us; 7us; 21us; 10us; 27us; 16us; 11us; 2us; 32768us; 5us; 20us; 11us; 30us; 0us; 16393us; 4us; 32768us; 0us; 7us; 14us; 6us; 15us; 5us; 16us; 4us; 1us; 32768us; 8us; 23us; 5us; 32768us; 3us; 14us; 6us; 18us; 7us; 21us; 10us; 27us; 16us; 11us; 1us; 16394us; 9us; 25us; 5us; 32768us; 3us; 14us; 6us; 18us; 7us; 21us; 10us; 27us; 16us; 11us; 0us; 16395us; 4us; 32768us; 0us; 7us; 14us; 6us; 15us; 5us; 16us; 4us; 0us; 16396us; 0us; 16397us; 5us; 32768us; 3us; 14us; 6us; 18us; 7us; 21us; 10us; 27us; 16us; 11us; 0us; 16398us; |] let _fsyacc_actionTableRowOffsets = [|0us; 6us; 7us; 8us; 10us; 11us; 12us; 13us; 15us; 20us; 22us; 23us; 25us; 30us; 31us; 36us; 38us; 44us; 45us; 51us; 54us; 55us; 60us; 62us; 68us; 70us; 76us; 77us; 82us; 83us; 84us; 90us; |] let _fsyacc_reductionSymbolCounts = [|1us; 1us; 1us; 1us; 1us; 1us; 4us; 3us; 4us; 3us; 4us; 6us; 2us; 1us; 3us; |] let _fsyacc_productionToNonTerminalTable = [|0us; 1us; 2us; 3us; 3us; 3us; 3us; 4us; 4us; 4us; 4us; 4us; 4us; 5us; 5us; |] let _fsyacc_immediateActions = [|65535us; 49152us; 16385us; 65535us; 16387us; 16388us; 16389us; 65535us; 65535us; 65535us; 16390us; 65535us; 65535us; 16391us; 65535us; 65535us; 65535us; 16392us; 65535us; 65535us; 16393us; 65535us; 65535us; 65535us; 65535us; 65535us; 16395us; 65535us; 16396us; 16397us; 65535us; 16398us; |] let _fsyacc_reductions () = [| # 184 "pars.fs" (fun (parseState : FSharp.Text.Parsing.IParseState) -> let _1 = parseState.GetInput(1) :?> Ast.Prog in Microsoft.FSharp.Core.Operators.box ( ( raise (FSharp.Text.Parsing.Accept(Microsoft.FSharp.Core.Operators.box _1)) ) : 'gentype__startstart)); # 193 "pars.fs" (fun (parseState : FSharp.Text.Parsing.IParseState) -> let _1 = parseState.GetInput(1) :?> 'gentype_Prog in Microsoft.FSharp.Core.Operators.box ( ( # 37 "pars.fsy" _1 ) # 37 "pars.fsy" : Ast.Prog )); # 204 "pars.fs" (fun (parseState : FSharp.Text.Parsing.IParseState) -> let _1 = parseState.GetInput(1) :?> 'gentype_StmtList in Microsoft.FSharp.Core.Operators.box ( ( # 40 "pars.fsy" Prog(List.rev(_1)) ) # 40 "pars.fsy" : 'gentype_Prog)); # 215 "pars.fs" (fun (parseState : FSharp.Text.Parsing.IParseState) -> let _1 = parseState.GetInput(1) :?> string in Microsoft.FSharp.Core.Operators.box ( ( # 42 "pars.fsy" Val(_1); ) # 42 "pars.fsy" : 'gentype_Expr)); # 226 "pars.fs" (fun (parseState : FSharp.Text.Parsing.IParseState) -> let _1 = parseState.GetInput(1) :?> System.Int32 in Microsoft.FSharp.Core.Operators.box ( ( # 43 "pars.fsy" Int(_1) ) # 43 "pars.fsy" : 'gentype_Expr)); # 237 "pars.fs" (fun (parseState : FSharp.Text.Parsing.IParseState) -> let _1 = parseState.GetInput(1) :?> System.Double in Microsoft.FSharp.Core.Operators.box ( ( # 44 "pars.fsy" Float(_1) ) # 44 "pars.fsy" : 'gentype_Expr)); # 248 "pars.fs" (fun (parseState : FSharp.Text.Parsing.IParseState) -> let _3 = parseState.GetInput(3) :?> 'gentype_Expr in Microsoft.FSharp.Core.Operators.box ( ( # 45 "pars.fsy" Decr(_3) ) # 45 "pars.fsy" : 'gentype_Expr)); # 259 "pars.fs" (fun (parseState : FSharp.Text.Parsing.IParseState) -> let _1 = parseState.GetInput(1) :?> string in let _3 = parseState.GetInput(3) :?> 'gentype_Expr in Microsoft.FSharp.Core.Operators.box ( ( # 47 "pars.fsy" Assign(_1,_3) ) # 47 "pars.fsy" : 'gentype_Stmt)); # 271 "pars.fs" (fun (parseState : FSharp.Text.Parsing.IParseState) -> let _2 = parseState.GetInput(2) :?> 'gentype_Expr in let _4 = parseState.GetInput(4) :?> 'gentype_Stmt in Microsoft.FSharp.Core.Operators.box ( ( # 48 "pars.fsy" While(_2,_4) ) # 48 "pars.fsy" : 'gentype_Stmt)); # 283 "pars.fs" (fun (parseState : FSharp.Text.Parsing.IParseState) -> let _2 = parseState.GetInput(2) :?> 'gentype_StmtList in Microsoft.FSharp.Core.Operators.box ( ( # 49 "pars.fsy" Seq(List.rev(_2)) ) # 49 "pars.fsy" : 'gentype_Stmt)); # 294 "pars.fs" (fun (parseState : FSharp.Text.Parsing.IParseState) -> let _2 = parseState.GetInput(2) :?> 'gentype_Expr in let _4 = parseState.GetInput(4) :?> 'gentype_Stmt in Microsoft.FSharp.Core.Operators.box ( ( # 50 "pars.fsy" IfThen(_2,_4) ) # 50 "pars.fsy" : 'gentype_Stmt)); # 306 "pars.fs" (fun (parseState : FSharp.Text.Parsing.IParseState) -> let _2 = parseState.GetInput(2) :?> 'gentype_Expr in let _4 = parseState.GetInput(4) :?> 'gentype_Stmt in let _6 = parseState.GetInput(6) :?> 'gentype_Stmt in Microsoft.FSharp.Core.Operators.box ( ( # 51 "pars.fsy" IfThenElse(_2,_4,_6) ) # 51 "pars.fsy" : 'gentype_Stmt)); # 319 "pars.fs" (fun (parseState : FSharp.Text.Parsing.IParseState) -> let _2 = parseState.GetInput(2) :?> 'gentype_Expr in Microsoft.FSharp.Core.Operators.box ( ( # 52 "pars.fsy" Print(_2) ) # 52 "pars.fsy" : 'gentype_Stmt)); # 330 "pars.fs" (fun (parseState : FSharp.Text.Parsing.IParseState) -> let _1 = parseState.GetInput(1) :?> 'gentype_Stmt in Microsoft.FSharp.Core.Operators.box ( ( # 55 "pars.fsy" [_1] ) # 55 "pars.fsy" : 'gentype_StmtList)); # 341 "pars.fs" (fun (parseState : FSharp.Text.Parsing.IParseState) -> let _1 = parseState.GetInput(1) :?> 'gentype_StmtList in let _3 = parseState.GetInput(3) :?> 'gentype_Stmt in Microsoft.FSharp.Core.Operators.box ( ( # 56 "pars.fsy" _3 :: _1 ) # 56 "pars.fsy" : 'gentype_StmtList)); |] # 354 "pars.fs" let tables : FSharp.Text.Parsing.Tables<_> = { reductions= _fsyacc_reductions (); endOfInputTag = _fsyacc_endOfInputTag; tagOfToken = tagOfToken; dataOfToken = _fsyacc_dataOfToken; actionTableElements = _fsyacc_actionTableElements; actionTableRowOffsets = _fsyacc_actionTableRowOffsets; stateToProdIdxsTableElements = _fsyacc_stateToProdIdxsTableElements; stateToProdIdxsTableRowOffsets = _fsyacc_stateToProdIdxsTableRowOffsets; reductionSymbolCounts = _fsyacc_reductionSymbolCounts; immediateActions = _fsyacc_immediateActions; gotos = _fsyacc_gotos; sparseGotoTableRowOffsets = _fsyacc_sparseGotoTableRowOffsets; tagOfErrorTerminal = _fsyacc_tagOfErrorTerminal; parseError = (fun (ctxt:FSharp.Text.Parsing.ParseErrorContext<_>) -> match parse_error_rich with | Some f -> f ctxt | None -> parse_error ctxt.Message); numTerminals = 20; productionToNonTerminalTable = _fsyacc_productionToNonTerminalTable } let engine lexer lexbuf startState = tables.Interpret(lexer, lexbuf, startState) let start lexer lexbuf : Ast.Prog = engine lexer lexbuf 0 :?> _ ================================================ FILE: Samples/FSharpParsingSample/LexYaccVersion/pars.fsi ================================================ // Signature file for parser generated by fsyacc module Parser type token = | DECR | LPAREN | RPAREN | WHILE | DO | END | BEGIN | IF | THEN | ELSE | PRINT | SEMI | ASSIGN | EOF | FLOAT of (System.Double) | INT of (System.Int32) | ID of (string) type tokenId = | TOKEN_DECR | TOKEN_LPAREN | TOKEN_RPAREN | TOKEN_WHILE | TOKEN_DO | TOKEN_END | TOKEN_BEGIN | TOKEN_IF | TOKEN_THEN | TOKEN_ELSE | TOKEN_PRINT | TOKEN_SEMI | TOKEN_ASSIGN | TOKEN_EOF | TOKEN_FLOAT | TOKEN_INT | TOKEN_ID | TOKEN_end_of_input | TOKEN_error type nonTerminalId = | NONTERM__startstart | NONTERM_start | NONTERM_Prog | NONTERM_Expr | NONTERM_Stmt | NONTERM_StmtList /// This function maps tokens to integer indexes val tagOfToken: token -> int /// This function maps integer indexes to symbolic token ids val tokenTagToTokenId: int -> tokenId /// This function maps production indexes returned in syntax errors to strings representing the non terminal that would be produced by that production val prodIdxToNonTerminal: int -> nonTerminalId /// This function gets the name of a token as a string val token_to_string: token -> string val start : (FSharp.Text.Lexing.LexBuffer<'cty> -> token) -> FSharp.Text.Lexing.LexBuffer<'cty> -> ( Ast.Prog ) ================================================ FILE: Samples/FSharpParsingSample/LexYaccVersion/pars.fsy ================================================ // Copyright (c) Microsoft Corporation 2005-2006. // This sample code is provided "as is" without warranty of any kind. // We disclaim all warranties, either express or implied, including the // warranties of merchantability and fitness for a particular purpose. // // This example shows how to write an F# Yacc parser file which creates // nodes that carry F# values. %{ // This prelude is F# code that is available throughout this file. In this // case we just open a module to reveal some datatype definitions. open Ast %} // The start token becomes a parser function in the compiled code: */ %start start // These are the terminal tokens of the grammar along with the types of // the data carried by each token: %token ID %token INT %token FLOAT %token DECR LPAREN RPAREN WHILE DO END BEGIN IF THEN ELSE PRINT SEMI ASSIGN EOF // This is the type of the data produced by a successful reduction of the 'start' // symbol: %type < Ast.Prog > start %% // These are the rules of the grammar along with the F# code of the // actions executed as rules are reduced. In this case the actions // produce data using F# data construction terms. start: Prog { $1 } Prog: StmtList { Prog(List.rev($1)) } Expr: ID { Val($1); } | INT { Int($1) } | FLOAT { Float($1) } | DECR LPAREN Expr RPAREN { Decr($3) } Stmt: ID ASSIGN Expr { Assign($1,$3) } | WHILE Expr DO Stmt { While($2,$4) } | BEGIN StmtList END { Seq(List.rev($2)) } | IF Expr THEN Stmt { IfThen($2,$4) } | IF Expr THEN Stmt ELSE Stmt { IfThenElse($2,$4,$6) } | PRINT Expr { Print($2) } StmtList: | Stmt { [$1] } | StmtList SEMI Stmt { $3 :: $1 } ================================================ FILE: Samples/FSharpParsingSample/LexYaccVersion/test.lang ================================================ a := 1; b := 0; if a then d := 20; if b then d := 40; print d; while d do begin d := decr(d); print d end; print d ================================================ FILE: Samples/FSharpParsingSample/readme.txt ================================================ This sample is derived from the parsing sample that shipped with the F# 1.9.4.19 distribution originally published at http://research.microsoft.com/fsharp/release.aspx The original sample code was provided by Microsoft under the following licence: // Copyright (c) Microsoft Corporation 2005-2006. // This sample code is provided "as is" without warranty of any kind. // We disclaim all warranties, either express or implied, including the // warranties of merchantability and fitness for a particular purpose. ================================================ FILE: Samples/JSON/JsonParser-LowTrust.fsproj ================================================ net6 ================================================ FILE: Samples/JSON/JsonParser.fsproj ================================================ net6 ================================================ FILE: Samples/JSON/JsonParser.targets ================================================ JsonParser JsonParser Exe false $(MSBuildProjectDirectory)/test_json.txt ================================================ FILE: Samples/JSON/PegParser-LowTrust.fsproj ================================================ net6 ================================================ FILE: Samples/JSON/ast.fs ================================================ // Copyright (c) Stephan Tolksdorf 2008 // License: Simplified BSD License. See accompanying documentation. module Ast [] type Json = JString of string | JNumber of float | JBool of bool | JNull | JList of Json list | JObject of Map with member private t.StructuredFormatDisplay = match t with | JString s -> box ("\"" + s + "\"") | JNumber f -> box f | JBool b -> box b | JNull -> box "null" | JList l -> box l | JObject m -> Map.toList m :> obj ================================================ FILE: Samples/JSON/main.fs ================================================ // Copyright (c) Stephan Tolksdorf 2008 // License: Simplified BSD License. See accompanying documentation. // See parser.fs for more information. open FParsec.CharParsers open Ast open Parser [] let main(args: string[]) = if args.Length <> 1 then printf "usage: json.exe \n" exit 1 // The parser is run on the file path in args[0]. // If the file has no byte order marks, System.Text.Encoding.Default // is assumed to be the encoding. // The parser result will be the abstract syntax tree of the input file. let result = parseJsonFile args[0] System.Text.Encoding.UTF8 // for the moment we just print out the AST match result with | Success (v, _, _) -> printf "The AST of the input file is:\n%A\n" v 0 | Failure (msg, err, _) -> printfn "%s" msg 1 ================================================ FILE: Samples/JSON/parser.fs ================================================ // Copyright (c) Stephan Tolksdorf 2008-2011 // License: Simplified BSD License. See accompanying documentation. module Parser open FParsec open Ast // This is a general JSON parser that will parse any JSON file into an AST. // See e.g. http://www.json.org/, for a specification of JSON. // The FParsec tutorial discusses this parser in detail. // Note that in typical applications you often don't need to parse any general // JSON file, but only files describing objects of a certain type. In those cases // it might be more convenient to parse the input with specialized parsers // instead of using the indirect approach via an intermediate AST. The parser // definitions below should be useful in any case. let jnull = stringReturn "null" JNull let jtrue = stringReturn "true" (JBool true) let jfalse = stringReturn "false" (JBool false) let jnumber = pfloat |>> JNumber // pfloat will accept a little more than specified by JSON // as valid numbers (such as NaN or Infinity), but that makes // it only more robust let str s = pstring s let stringLiteral = let escape = anyOf "\"\\/bfnrt" |>> function | 'b' -> "\b" | 'f' -> "\u000C" | 'n' -> "\n" | 'r' -> "\r" | 't' -> "\t" | c -> string c // every other char is mapped to itself let unicodeEscape = /// converts a hex char ([0-9a-fA-F]) to its integer number (0-15) let hex2int c = (int c &&& 15) + (int c >>> 6)*9 str "u" >>. pipe4 hex hex hex hex (fun h3 h2 h1 h0 -> (hex2int h3)*4096 + (hex2int h2)*256 + (hex2int h1)*16 + hex2int h0 |> char |> string ) let escapedCharSnippet = str "\\" >>. (escape <|> unicodeEscape) let normalCharSnippet = manySatisfy (fun c -> c <> '"' && c <> '\\') between (str "\"") (str "\"") (stringsSepBy normalCharSnippet escapedCharSnippet) let jstring = stringLiteral |>> JString // jvalue, jlist and jobject are three mutually recursive grammar productions. // In order to break the cyclic dependency, we make jvalue a parser that // forwards all calls to a parser in a reference cell. let jvalue, jvalueRef = createParserForwardedToRef() // initially jvalueRef holds a reference to a dummy parser let ws = spaces // skips any whitespace let listBetweenStrings sOpen sClose pElement f = between (str sOpen) (str sClose) (ws >>. sepBy (pElement .>> ws) (str "," .>> ws) |>> f) let keyValue = tuple2 stringLiteral (ws >>. str ":" >>. ws >>. jvalue) let jlist = listBetweenStrings "[" "]" jvalue JList let jobject = listBetweenStrings "{" "}" keyValue (Map.ofList >> JObject) do jvalueRef := choice [jobject jlist jstring jnumber jtrue jfalse jnull] let json = ws >>. jvalue .>> ws .>> eof let parseJsonString str = run json str // UTF8 is the default, but it will detect UTF16 or UTF32 byte-order marks automatically let parseJsonFile fileName encoding = runParserOnFile json () fileName encoding let parseJsonStream stream encoding = runParserOnStream json () "" stream System.Text.Encoding.UTF8 ================================================ FILE: Samples/JSON/test_json.txt ================================================ { "glossary": { "title": "example glossary", "GlossDiv": { "title": "S", "GlossList": { "GlossEntry": { "ID": "SGML", "SortAs": "SGML", "GlossTerm": "Standard Generalized Markup Language", "Acronym": "SGML", "Abbrev": "ISO 8879:1986", "GlossDef": { "para": "A meta-markup language, used to create markup languages such as DocBook.", "GlossSeeAlso": ["GML", "XML"] }, "GlossSee": "markup" } } } } } ================================================ FILE: Samples/PEG/PegParser-LowTrust.fsproj ================================================ net6 ================================================ FILE: Samples/PEG/PegParser.fsproj ================================================ net6 ================================================ FILE: Samples/PEG/PegParser.targets ================================================ PegParser PegParser Exe false $(MSBuildProjectDirectory)/test_peg.txt ================================================ FILE: Samples/PEG/ast.fs ================================================ // Copyright (c) Stephan Tolksdorf 2007-2008 // License: Simplified BSD License. See accompanying documentation. module Ast type Grammar = Definition list and Definition = Def of string * Expression and Range = | Char of char | Range of char * char and Expression = /// expression1 / expression2 / ... | Alt of Expression list /// expression1 expression2 ... | Seq of Expression list /// expression? | Opt of Expression /// expression* | Star of Expression /// expression+ | Plus of Expression /// &expression | And of Expression /// !expression | Not of Expression | Class of Range list | Literal of string | Identifier of string | Dot ================================================ FILE: Samples/PEG/main.fs ================================================ // Copyright (c) Stephan Tolksdorf 2007-2008 // License: Simplified BSD License. See accompanying documentation. // This is a simple parser for PEG grammars. // See parser.fs for more information. open FParsec open Ast [] let main(args: string[]) = if args.Length <> 1 then printf "usage: peg.exe \n" exit 1 // The parser is run on the file path in args[0]. // If the file has no byte order marks, System.Text.Encoding.Default // is assumed to be the encoding. // The parser result will be the abstract syntax tree of the input file. let fileName = args[0] let result = runParserOnFile Parser.pGrammar () fileName System.Text.Encoding.UTF8 // for the moment we just print out the AST match result with | Success (v, _, _) -> printf "The ast for the input file is:\n%A\n" v | Failure (msg, err, _) -> printf "%s\n" msg 0 ================================================ FILE: Samples/PEG/parser.fs ================================================ // Copyright (c) Stephan Tolksdorf 2007-2011 // License: Simplified BSD License. See accompanying documentation. module Parser open System open FParsec open Ast // The following is a close translation of the grammar on page 2 of // Parsing Expression Grammars: A Recognition-Based Syntactic Foundation, Bryan Ford. // 31st ACM Symposium on Principles of Programming Languages, January 14-16, 2004, Venice, Italy. // http://www.bford.info/pub/lang/peg.pdf // If you're new to FParsec, take a look at // http://www.quanttec.com/fparsec/reference/parser-overview.html // some abbreviations let str s = pstring s // Lexical syntax let pEndOfFile = eof //let pEndOfLine = skipNewline //let pSpace = skipAnyOf " \t\n" let pComment = str "#" >>. skipRestOfLine true let pSpacing = // literal translation: // skipManyChars (pSpace <|> pComment) // more efficient: skipSepBy spaces pComment let LEFTARROW = str "<-" >>. pSpacing let SLASH = str "/" >>. pSpacing let AND = str "&" >>. pSpacing let NOT = str "!" >>. pSpacing //let QUESTION = str "?" .>> pSpacing //let STAR = str "*" .>> pSpacing //let PLUS = str "+" .>> pSpacing let OPEN = str "(" >>. pSpacing let CLOSE = str ")" >>. pSpacing let DOT = str "." >>. pSpacing // Instead of the odd octal escapes in the original grammar, // we accept the usual UTF16 character escapes '\uxxxx' let pChar = let escape = anyOf "nrt'\"[]\\" |>> function | 'n' -> '\n' | 'r' -> '\r' | 't' -> '\t' | c -> c let unicodeEscape = str "u" >>. pipe4 hex hex hex hex (fun h3 h2 h1 h0 -> let hex2int c = (int c &&& 15) + (int c >>> 6)*9 // hex char to int (hex2int h3)*4096 + (hex2int h2)*256 + (hex2int h1)*16 + hex2int h0 |> char ) satisfy ((<>) '\\') <|> (str "\\" >>. (escape <|> unicodeEscape)) let pRange = pipe2 pChar (opt (str "-" >>. pChar)) (fun c1 c2Opt -> match c2Opt with | None -> Char c1 | Some(c2) -> Range(c1, c2)) let pClass = str "[" >>. (manyTill pRange (str "]") .>> pSpacing |>> Class) let pLiteralString = ( (str "\'" >>. (manyCharsTill pChar (str "\'"))) <|> (str "\"" >>. (manyCharsTill pChar (str "\"" )))) .>> pSpacing let pLiteral = pLiteralString |>> Literal let isIdentifierStart = fun c -> isAsciiLetter c || c = '_' // "A-Za-z_" let isIdentifierCont = fun c -> isAsciiLetter c || isDigit c || c = '_' // A-Za-z_0-9 let pIdentifierString = many1Satisfy2 isIdentifierStart isIdentifierCont .>> pSpacing let pIdentifier = pIdentifierString |>> Identifier let pDot = DOT >>% Dot // Hierarchical syntax // expression, sequence, prefix, suffix and primary are mutually recursive // grammar productions. In order to break the cyclic dependency, we make // pPrimary a parser that forwards all calls to a parser in a reference cell. let pPrimary, pPrimaryRef = createParserForwardedToRef() // initially pPrimary holds a reference to a dummy parser let pSuffix = // returns 'x' if there is no '?', '*' or '+' pipe2 pPrimary (anyOf "?*+" <|>% 'x') (fun p c -> match c with | '?' -> Opt p | '*' -> Star p | '+' -> Plus p | _ -> p) let pPrefix = choice [AND >>. (pSuffix |>> And) NOT >>. (pSuffix |>> Not) pSuffix] .>> pSpacing let pSequence = many pPrefix |>> function | [exp] -> exp | exps -> Seq exps let pExpression = sepBy1 pSequence SLASH |>> function | [exp] -> exp | exps -> Alt exps // only use an Alt for more than one alternative pPrimaryRef:= choice [pIdentifier .>>? notFollowedByString "<-" // backtracks to the beginning if the id is followed by "<-" between OPEN CLOSE pExpression pLiteral pClass pDot] let pDefinition = pipe2 pIdentifierString (LEFTARROW >>. pExpression) (fun s e -> Def (s, e)) let pGrammar: Parser<_, unit> = // one type annotation is enough for the whole parser pSpacing >>. many1 pDefinition .>> pEndOfFile ================================================ FILE: Samples/PEG/test_peg.txt ================================================ # Source: # Parsing Expression Grammars: A Recognition-Based Syntactic Foundation, Bryan Ford. # 31st ACM Symposium on Principles of Programming Languages, January 14-16, 2004, Venice, Italy. # http://www.bford.info/pub/lang/peg.pdf # PEG formally describing its own ASCII syntax # Hierarchical syntax Grammar <- Spacing Definition+ EndOfFile Definition <- Identifier LEFTARROW Expression Expression <- Sequence (SLASH Sequence)* Sequence <- Prefix* Prefix <- (AND / NOT)? Suffix Suffix <- Primary (QUESTION / STAR / PLUS)? Primary <- Identifier !LEFTARROW / OPEN Expression CLOSE / Literal / Class / DOT # Lexical syntax Identifier <- IdentStart IdentCont* Spacing IdentStart <- [a-zA-Z_] IdentCont <- IdentStart / [0-9] Literal <- ['] (!['] Char)* ['] Spacing / ["] (!["] Char)* ["] Spacing Class <- '[' (!']' Range)* ']' Spacing Range <- Char '-' Char / Char Char <- '\\' [nrt'"\[\]\\] / '\\' [0-2][0-7][0-7] / '\\' [0-7][0-7]? / !'\\' . LEFTARROW <- '<-' Spacing SLASH <- '/' Spacing AND <- '&' Spacing NOT <- '!' Spacing QUESTION <- '?' Spacing STAR <- '*' Spacing PLUS <- '+' Spacing OPEN <- '(' Spacing CLOSE <- ')' Spacing DOT <- '.' Spacing Spacing <- (Space / Comment)* Comment <- '#' (!EndOfLine .)* EndOfLine Space <- ' ' / '\t' / EndOfLine EndOfLine <- '\r\n' / '\n' / '\r' EndOfFile <- !. ================================================ FILE: Samples/Tutorial/Tutorial-LowTrust.fsproj ================================================ net6; ================================================ FILE: Samples/Tutorial/Tutorial.fsproj ================================================ net6 ================================================ FILE: Samples/Tutorial/Tutorial.targets ================================================ Tutorial Tutorial Exe false ================================================ FILE: Samples/Tutorial/tutorial.fs ================================================ // Copyright (c) Stephan Tolksdorf 2011 // License: Simplified BSD License. See accompanying documentation. // Source code for the tutorial in the documentation // 2 Parsing a single float open FParsec let test p str = match run p str with | Success(result, _, _) -> printfn "Success: %A" result | Failure(errorMsg, _, _) -> printfn "Failure: %s" errorMsg test pfloat "1.25" test pfloat "1.25E 2" // 3 Parsing a float between brackets let str s = pstring s let floatBetweenBrackets = str "[" >>. pfloat .>> str "]" test floatBetweenBrackets "[1.0]" test floatBetweenBrackets "[]" test floatBetweenBrackets "[1.0]" // 4 Abstracting parsers let betweenStrings s1 s2 p = str s1 >>. p .>> str s2 let floatBetweenBrackets_ = pfloat |> betweenStrings "[" "]" let floatBetweenDoubleBrackets_ = pfloat |> betweenStrings "[[" "]]" test floatBetweenBrackets_ "[1.0]" test floatBetweenDoubleBrackets_ "[[1.0]]" let between_ pBegin pEnd p = pBegin >>. p .>> pEnd let betweenStrings_ s1 s2 p = p |> between_ (str s1) (str s2) // 5 Parsing a list of floats test (many floatBetweenBrackets) "" test (many floatBetweenBrackets) "[1.0]" test (many floatBetweenBrackets) "[2][3][4]" test (many floatBetweenBrackets) "[1][2.0E]" test (many1 floatBetweenBrackets) "(1)" test (many1 (floatBetweenBrackets "float between brackets")) "(1)" let floatList = str "[" >>. sepBy pfloat (str ",") .>> str "]" test floatList "[]" test floatList "[1.0]" test floatList "[4,5,6]" test floatList "[1.0," // 6 Handling whitespace test floatBetweenBrackets "[1.0, 2.0]" let ws = spaces let str_ws s = pstring s .>> ws let float_ws = pfloat .>> ws let numberList = str_ws "[" >>. sepBy float_ws (str_ws ",") .>> str_ws "]" test numberList @"[ 1 , 2 ] " test numberList @"[ 1, 2; 3]" let numberListFile = ws >>. numberList .>> eof test numberListFile " [1, 2, 3] [4]" // 7 Parsing string data test (many (str "a" <|> str "b")) "abba" test (skipStringCI "" >>. pfloat) "1.0" let identifier = let isIdentifierFirstChar c = isLetter c || c = '_' let isIdentifierChar c = isLetter c || isDigit c || c = '_' many1Satisfy2L isIdentifierFirstChar isIdentifierChar "identifier" .>> ws // skips trailing whitepace test identifier "_" test identifier "_test1=" test identifier "1" let stringLiteral = let normalChar = satisfy (fun c -> c <> '\\' && c <> '"') let unescape c = match c with | 'n' -> '\n' | 'r' -> '\r' | 't' -> '\t' | c -> c let escapedChar = pstring "\\" >>. (anyOf "\\nrt\"" |>> unescape) between (pstring "\"") (pstring "\"") (manyChars (normalChar <|> escapedChar)) test stringLiteral "\"abc\"" test stringLiteral "\"abc\\\"def\\\\ghi\"" test stringLiteral "\"abc\\def\"" let stringLiteral2 = let normalCharSnippet = many1Satisfy (fun c -> c <> '\\' && c <> '"') let escapedChar = pstring "\\" >>. (anyOf "\\nrt\"" |>> function | 'n' -> "\n" | 'r' -> "\r" | 't' -> "\t" | c -> string c) between (pstring "\"") (pstring "\"") (manyStrings (normalCharSnippet <|> escapedChar)) test stringLiteral2 "\"abc\"" test stringLiteral2 "\"abc\\\"def\\\\ghi\"" test stringLiteral2 "\"abc\\def\"" let stringLiteral3 = let normalCharSnippet = manySatisfy (fun c -> c <> '\\' && c <> '"') let escapedChar = pstring "\\" >>. (anyOf "\\nrt\"" |>> function | 'n' -> "\n" | 'r' -> "\r" | 't' -> "\t" | c -> string c) between (pstring "\"") (pstring "\"") (stringsSepBy normalCharSnippet escapedChar) test stringLiteral3 "\"abc\"" test stringLiteral3 "\"abc\\\"def\\\\ghi\"" test stringLiteral3 "\"abc\\def\"" // 8 Sequentially applying parsers let product = pipe2 float_ws (str_ws "*" >>. float_ws) (fun x y -> x * y) test product "3 * 5";; type StringConstant = StringConstant of string * string let stringConstant = pipe3 identifier (str_ws "=") stringLiteral (fun id _ str -> StringConstant(id, str)) test stringConstant "myString = \"stringValue\"" test (float_ws .>>. (str_ws "," >>. float_ws)) "123, 456" let pipe7 p1 p2 p3 p4 p5 p6 p7 f = pipe4 p1 p2 p3 (tuple4 p4 p5 p6 p7) (fun x1 x2 x3 (x4, x5, x6, x7) -> f x1 x2 x3 x4 x5 x6 x7) // 9 Parsing alternatives let boolean = (stringReturn "true" true) <|> (stringReturn "false" false) test boolean "false" test boolean "true" test boolean "tru" test ((ws >>. str "a") <|> (ws >>. str "b")) " b" test (ws >>. (str "a" <|> str "b")) " b" ================================================ FILE: Test/AllTests.fs ================================================ // Copyright (c) Stephan Tolksdorf 2007-2011 // License: Simplified BSD License. See accompanying documentation. let run() = printfn "Testing FParsec.Buffer ..." FParsec.Test.BufferTests.run() printfn "Testing FParsec.CharSet ..." FParsec.Test.CharSetTests.run() printfn "Testing FParsec.HexFloat ..." FParsec.Test.HexFloatTests.run() printfn "Testing FParsec.Text ..." FParsec.Test.TextTests.run() #if !LOW_TRUST #if !DISABLE_STREAM_BACKTRACKING_TESTS // In .NET Core System.Text.Decoder no longer support serialization, see https://github.com/stephan-tolksdorf/fparsec/issues/95 printfn "Testing FParsec.Cloning ..." FParsec.Test.CloningTests.run() #endif printfn "Testing FParsec.StringBuffer ..." FParsec.Test.StringBufferTests.run() #endif printfn "Testing FParsec.CharStream ..." FParsec.Test.CharStreamTests.run() printfn "Testing FParsec.Primitives ..." FParsec.Test.PrimitivesTests.run() printfn "Testing FParsec.CharParsers ..." FParsec.Test.CharParsersTests.run() printfn "Testing FParsec.OperatorPrecedenceParserTests ..." FParsec.Test.OperatorPrecedenceParserTests.run() printfn "Testing FParsec.IdentifierValidator ..." FParsec.Test.IdentifierValidatorTests.run() #if !LOW_TRUST printfn "Testing FParsec.StaticMapping ... " printfn "(this can take a while)" if System.Diagnostics.Debugger.IsAttached then printfn "Note: When the Visual Studio debugger is attached, this test requires lots of memory." FParsec.Test.RangeTests.run() FParsec.Test.StaticMappingTests.run() #endif printfn "No error was found." [] let main _argv = #if NETCOREAPP System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance); #endif try run() 0 with | ex -> printfn $"error: {ex}" 1 ================================================ FILE: Test/BufferTests.fs ================================================ // Copyright (c) Stephan Tolksdorf 2010 // License: Simplified BSD License. See accompanying documentation. module FParsec.Test.BufferTests open System open System.Runtime.InteropServices open Microsoft.FSharp.NativeInterop open FParsec.Test.Test #nowarn "9" // "Uses of this construct may result in the generation of unverifiable .NET IL code." type Buffer = FParsec.Buffer let testSwapByteOrder() = Buffer.SwapByteOrder(0xffffffffu) |> Equal 0xffffffffu Buffer.SwapByteOrder(0x00000000u) |> Equal 0x00000000u Buffer.SwapByteOrder(0x12345678u) |> Equal 0x78563412u Buffer.SwapByteOrder(0xffffffffffffffffUL) |> Equal 0xffffffffffffffffUL Buffer.SwapByteOrder(0x0000000000000000UL) |> Equal 0x0000000000000000UL Buffer.SwapByteOrder(0x123456789abcdef0UL) |> Equal 0xf0debc9a78563412UL #if LOW_TRUST let array = [|0x12345678u; 0x9abcdef0u; 0x12345678u|] Buffer.SwapByteOrder(array) array |> Equal [|0x78563412u; 0xf0debc9au; 0x78563412u;|] #else let p = NativePtr.stackalloc 3 NativePtr.set p 0 0x12345678u NativePtr.set p 1 0x9abcdef0u NativePtr.set p 2 0x12345678u Buffer.SwapByteOrder(Span<_>(NativePtr.toVoidPtr p, 3)) NativePtr.get p 0 |> Equal 0x78563412u NativePtr.get p 1 |> Equal 0xf0debc9au NativePtr.get p 2 |> Equal 0x78563412u #endif Buffer.SwapByteOrder(0x01020304u) |> Equal 0x04030201u #if !LOW_TRUST let testCopy() = let n = 64 let bytes = Array.init n (fun i -> byte i) let buffer1 = Array.zeroCreate n : byte[] let buffer2 = Array.zeroCreate n : byte[] let handle = GCHandle.Alloc(buffer2, GCHandleType.Pinned) let buffer2Ptr = NativePtr.ofNativeInt (handle.AddrOfPinnedObject()) : nativeptr for iSrc = 0 to n do for iDst = 0 to n do for size = 0 to min (n - iSrc) (n - iDst) do Array.blit bytes 0 buffer1 0 n Array.blit bytes 0 buffer2 0 n System.Buffer.BlockCopy(buffer1, iSrc, buffer1, iDst, size) Buffer.Copy(NativePtr.add buffer2Ptr iDst, NativePtr.add buffer2Ptr iSrc, size) if buffer1 <> buffer2 then Fail() try Buffer.Copy(NativePtr.ofNativeInt 0n, NativePtr.ofNativeInt 0n, -1) Fail() with :? System.ArgumentOutOfRangeException -> () let testEqual() = let n = 16 let buffer1 = NativePtr.stackalloc n let buffer2 = NativePtr.stackalloc n for i = 0 to n - 1 do NativePtr.set buffer1 i (uint32 i) NativePtr.set buffer2 i (uint32 i) for length = 0 to n do for i = 0 to length - 1 do Buffer.Equals(buffer1, buffer2, length) |> True NativePtr.set buffer2 i 0xffffffffu Buffer.Equals(buffer1, buffer2, length) |> False NativePtr.set buffer2 i (uint32 i) #endif let run() = testSwapByteOrder() #if !LOW_TRUST testCopy() testEqual() #endif ================================================ FILE: Test/CharParsersTests.fs ================================================ // Copyright (c) Stephan Tolksdorf 2007-2011 // License: Simplified BSD License. See accompanying documentation. module FParsec.Test.CharParsersTests open System.Text.RegularExpressions open FParsec open FParsec.Error open FParsec.Primitives open FParsec.CharParsers open FParsec.Test.Test type NLO = NumberLiteralOptions type NLF = NumberLiteralResultFlags let testCharParsers() = pchar ' ' |> ROk " " 1 ' ' pchar '\t' |> ROk "\t\t" 1 '\t' pchar ' ' |> RError "" 0 (expectedString " ") pchar ' ' |> RError "x" 0 (expectedString " ") pchar '\r' |> RError "_\r" 0 Errors.ExpectedNewline newline |> RError "_\n" 0 Errors.ExpectedNewline newline |> RError "" 0 Errors.ExpectedNewline pchar '\n' |> ROkNL "\r" 1 '\n' newline |> ROkNL "\r" 1 '\n' pchar '\r' |> ROkNL "\r" 1 '\r' pchar '\n' |> ROkNL "\r\n" 2 '\n' pchar '\r' |> ROkNL "\r\n" 2 '\r' pchar '\n' |> ROkNL "\n" 1 '\n' pchar '\r' |> ROkNL "\n" 1 '\r' skipChar '\t' |> ROk "\t" 1 () charReturn '\t' 0 |> ROk "\t" 1 0 skipNewline |> ROkNL "\n" 1 () newlineReturn 0 |> ROkNL "\r\n" 2 0 try pchar EOS |> ignore; Fail() with :? System.ArgumentException -> () anyChar |> RError "" 0 Errors.ExpectedAnyChar skipAnyChar |> RError "" 0 Errors.ExpectedAnyChar anyChar |> ROk " " 1 ' ' anyChar |> ROk "\ufffe" 1 '\ufffe' skipAnyChar |> ROk " " 1 () anyChar |> ROk "\t\t" 1 '\t' skipAnyChar |> ROk "\t\t" 1 () anyChar |> ROkNL "\r\n" 2 '\n' skipAnyChar |> ROkNL "\r\n" 2 () anyChar |> ROkNL "\n\n" 1 '\n' skipAnyChar |> ROkNL "\n\n" 1 () satisfy (fun c -> true) |> RError "" 0 NoErrorMessages skipSatisfy (fun c -> true) |> RError "" 0 NoErrorMessages satisfyL (fun c -> true) "test" |> RError "" 0 (expected "test") skipSatisfyL (fun c -> true) "test" |> RError "" 0 (expected "test") satisfy ((=) '1') |> ROk "1" 1 '1' satisfy ((=) '\t') |> ROk "\t" 1 '\t' satisfy ((=) '1') |> ROk "11" 1 '1' satisfy ((=) '1') |> RError "0" 0 NoErrorMessages satisfyL ((=) '1') "test" |> RError "2" 0 (expected "test") satisfyL ((=) '\r') "test" |> RError "\r" 0 (expected "test") satisfy ((=) '\n') |> ROkNL "\r" 1 '\n' satisfy ((=) '\n') |> ROkNL "\r\n" 2 '\n' satisfy ((=) '\n') |> ROkNL "\n" 1 '\n' skipSatisfy ((=) '1') |> ROk "1" 1 () skipSatisfy ((=) '\t') |> ROk "\t" 1 () skipSatisfy ((=) '1') |> ROk "11" 1 () skipSatisfy ((=) '1') |> RError "0" 0 NoErrorMessages skipSatisfyL ((=) '1') "test" |> RError "2" 0 (expected "test") skipSatisfyL ((=) '\r') "test" |> RError "\r" 0 (expected "test") skipSatisfy ((=) '\n') |> ROkNL "\r" 1 () skipSatisfy ((=) '\n') |> ROkNL "\r\n" 2 () skipSatisfy ((=) '\n') |> ROkNL "\n" 1 () let testAnyNoneOf() = anyOf "1" |> ROk "1" 1 '1' anyOf "1" |> RError "2" 0 (Errors.ExpectedAnyCharIn("1")) noneOf "1" |> RError "1" 0 (Errors.ExpectedAnyCharNotIn("1")) noneOf "1" |> ROk "2" 1 '2' skipAnyOf "1" |> ROk "1" 1 () skipAnyOf "1" |> RError "2" 0 (Errors.ExpectedAnyCharIn("1")) skipNoneOf "1" |> RError "1" 0 (Errors.ExpectedAnyCharNotIn("1")) skipNoneOf "1" |> ROk "2" 1 () //#nowarn "44" // "This construct is deprecated." let testSpecialCharParsers() = for i = 0 to 1023 do let c = char i isUpper c |> Equal (System.Char.IsUpper(c)) isLower c |> Equal (System.Char.IsLower(c)) isLetter c |> Equal (System.Char.IsLetter(c)) isAsciiUpper c |> Equal (c <= '\u007f' && System.Char.IsUpper(c)) isAsciiLower c |> Equal (c <= '\u007f' && System.Char.IsLower(c)) isAsciiLetter c |> Equal (c <= '\u007f' && System.Char.IsLetter(c)) isDigit c |> Equal (c >= '0' && c <= '9') isHex c |> Equal ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) isOctal c |> Equal (c >= '0' && c <= '7') asciiUpper |> ROk "A" 1 'A' asciiUpper |> RError "a" 0 Errors.ExpectedAsciiUppercaseLetter asciiLower |> ROk "a" 1 'a' asciiLower |> RError "A" 0 Errors.ExpectedAsciiLowercaseLetter asciiLetter |> ROk "A" 1 'A' asciiLetter |> RError "1" 0 Errors.ExpectedAsciiLetter upper |> ROk "Ä" 1 'Ä' upper |> RError "ä" 0 Errors.ExpectedUppercaseLetter lower |> ROk "ä" 1 'ä' lower |> RError "Ä" 0 Errors.ExpectedLowercaseLetter letter |> ROk "Ä" 1 'Ä' letter |> RError "1" 0 Errors.ExpectedLetter digit |> ROk "1" 1 '1' digit |> RError "a" 0 Errors.ExpectedDecimalDigit hex |> ROk "a" 1 'a' hex |> RError "g" 0 Errors.ExpectedHexadecimalDigit octal |> ROk "7" 1 '7' octal |> RError "8" 0 Errors.ExpectedOctalDigit tab |> ROk "\t" 1 '\t' tab |> RError "\r" 0 Errors.ExpectedTab unicodeNewline |> ROkNL "\r" 1 '\n' unicodeNewline |> ROkNL "\r\n" 2 '\n' unicodeNewline |> ROkNL "\n" 1 '\n' unicodeNewline |> ROkNL "\u0085" 1 '\n' unicodeNewline |> ROkNL "\u2028" 1 '\n' unicodeNewline |> ROkNL "\u2029" 1 '\n' unicodeNewline |> RError "\f" 0 Errors.ExpectedNewline unicodeNewline |> RError "\t" 0 Errors.ExpectedNewline unicodeNewline |> RError "" 0 Errors.ExpectedNewline skipUnicodeNewline |> ROkNL "\u2028" 1 () let count p = many p |>> List.fold (fun c x -> c + 1) 0 match run (count unicodeNewline) "\n\r\r\n\u0085\u2028\u2029\r\n" with | Success(c,_,pos) -> c |> Equal 7; pos.Index |> Equal 9L; pos.Line |> Equal 8L; pos.Column |> Equal 1L | Failure _ -> Fail() spaces |> ROk "" 0 () spaces |> ROk " " 1 () spaces |> ROk " " 2 () spaces1 |> RError "" 0 Errors.ExpectedWhitespace spaces1 |> ROk " " 1 () spaces1 |> ROk " " 2 () unicodeSpaces |> ROk "" 0 () unicodeSpaces |> ROk " " 1 () unicodeSpaces |> ROk " \u200A" 2 () // '\u200A' is a "hair space" (interestingly, the '\u200B' "zero width space" character is not recognized as white space) unicodeSpaces1 |> RError "" 0 Errors.ExpectedWhitespace unicodeSpaces1 |> ROk " " 1 () unicodeSpaces1 |> ROk " \u200A" 2 () match run spaces "\n \r\t\t\r\n\n " with | Success(_, _, pos) -> pos.Index |> Equal 9L; pos.Line |> Equal 5L; pos.Column |> Equal 2L | _ -> Fail() match run spaces1 "\n \r\t\t\r\n\n " with | Success(_, _, pos) -> pos.Index |> Equal 9L; pos.Line |> Equal 5L; pos.Column |> Equal 2L | _ -> Fail() match run unicodeSpaces "\n \r\t\t\r\n\n \u0085\u000C\u2028\u2029 \r\n\t\u200A" with | Success(_, _, pos) -> pos.Index |> Equal 18L; pos.Line |> Equal 9L; pos.Column |> Equal 3L | _ -> Fail() match run unicodeSpaces1 "\n \r\t\t\r\n\n \u0085\u000C\u2028\u2029 \r\n\t\u200A" with | Success(_, _, pos) -> pos.Index |> Equal 18L; pos.Line |> Equal 9L; pos.Column |> Equal 3L | _ -> Fail() eof |> ROk "" 0 () (pchar '1' >>. eof) |> ROk "1" 1 () eof |> RError "1" 0 Errors.ExpectedEndOfInput let testStringParsers() = pstring "" |> ROk "1" 0 "" pstring "1" |> RError "" 0 (expectedString "1") pstring "1" |> RError "2" 0 (expectedString "1") pstring "1" |> ROk "1" 1 "1" pstring "12" |> RError "" 0 (expectedString "12") pstring "12" |> RError "1" 0 (expectedString "12") pstring "12" |> RError "22" 0 (expectedString "12") pstring "12" |> RError "13" 0 (expectedString "12") pstring "12" |> ROk "12" 2 "12" pstring "test" |> RError "pest" 0 (expectedString "test") pstring "test" |> ROk "test" 4 "test" skipString "test" |> ROk "test" 4 () stringReturn "test" -1 |> ROk "test" 4 -1 try pstring "\r" |> ignore; Fail() with :? System.ArgumentException -> () try pstring "\n" |> ignore; Fail() with :? System.ArgumentException -> () try pstring "\uffff" |> ignore; Fail() with :? System.ArgumentException -> () try pstring "\r1" |> ignore; Fail() with :? System.ArgumentException -> () try pstring "1\n" |> ignore; Fail() with :? System.ArgumentException -> () try pstring "12\n" |> ignore; Fail() with :? System.ArgumentException -> () pstringCI "t" |> RError "p" 0 (expectedStringCI "t") pstringCI "t" |> ROk "t" 1 "t" pstringCI "t" |> ROk "T" 1 "T" pstringCI "T" |> ROk "t" 1 "t" pstringCI "T" |> ROk "T" 1 "T" skipStringCI "t" |> RError "p" 0 (expectedStringCI "t") skipStringCI "t" |> ROk "t" 1 () skipStringCI "t" |> ROk "T" 1 () skipStringCI "T" |> ROk "t" 1 () skipStringCI "T" |> ROk "T" 1 () pstringCI "tEsT" |> RError "pest" 0 (expectedStringCI "tEsT") pstringCI "tEsT" |> ROk "TeSt" 4 "TeSt" skipStringCI "tEsT" |> RError "pest" 0 (expectedStringCI "tEsT") skipStringCI "tEsT" |> ROk "TeSt" 4 () stringCIReturn "tEsT" -1 |> ROk "TeSt" 4 -1 try skipStringCI "\n" |> ignore; Fail() with :? System.ArgumentException -> () try skipStringCI "12\n" |> ignore; Fail() with :? System.ArgumentException -> () anyString 3 |> RError "12" 0 (Errors.ExpectedAnySequenceOfNChars(3)) skipAnyString 3 |> RError "12" 0 (Errors.ExpectedAnySequenceOfNChars(3)) anyString 3 |> ROkNL "12\r\n4" 4 "12\n" skipAnyString 3 |> ROkNL "12\r\n4" 4 () skipped (skipAnyString 3) |> RError "12" 0 (Errors.ExpectedAnySequenceOfNChars(3)) skipAnyString 3 |> withSkippedString (fun str () -> str) |> RError "12" 0 (Errors.ExpectedAnySequenceOfNChars(3)) skipped (skipAnyString 3) |> ROk "123" 3 "123" skipAnyString 3 |> withSkippedString (fun str () -> str) |> ROk "123" 3 "123" skipped (skipAnyString 3) |> ROkNL "12\r\n4" 4 "12\n" skipAnyString 3 |> withSkippedString (fun str () -> str) |> ROkNL "12\r\n4" 4 "12\n" restOfLine true |> ROk "" 0 "" skipRestOfLine true |> ROk "" 0 () restOfLine true |> ROkNL "\r\n1" 2 "" skipRestOfLine true |> ROkNL "\r\n1" 2 () restOfLine true |> ROkNL " \r\n1" 4 " " skipRestOfLine true |> ROkNL " \r\n1" 4 () restOfLine false |> ROk "" 0 "" skipRestOfLine false |> ROk "" 0 () restOfLine false |> ROk "\r\n1" 0 "" skipRestOfLine false |> ROk "\r\n1" 0 () restOfLine false |> ROk " \r\n1" 2 " " skipRestOfLine false |> ROk " \r\n1" 2 () regex "abc" |> ROk "abc" 3 "abc" (anyChar >>. regex "abc") |> ROk "_abc" 4 "abc" regex ".*\r\r\n.*" |> ROkNL "abc\r\r\nabc" 9 "abc\n\nabc" regex "abc" |> RError "ab" 0 (Errors.ExpectedStringMatchingRegex("abc")) regexL "abc" "test" |> RError "ab" 0 (expected "test") let testIdentifier() = // We do most of the testing in IdentifierValidatorTests.fs. // Here we only test the identifier parser wrapper. let U = System.Char.ConvertFromUtf32 let ud800 = string (char 0xd800) let a_ud800 = "a" + ud800 let mc2 = "MC" + (string '²') let s1 = U 0x00010280 let expectedIdentifierError = expected Strings.Identifier let invalidCharacterError = messageError Strings.IdentifierContainsInvalidCharacterAtIndicatedPosition let defaultOpts = IdentifierOptions() identifier defaultOpts |> RError "" 0 expectedIdentifierError identifier defaultOpts |> RError "1" 0 expectedIdentifierError identifier defaultOpts |> RFatalError ud800 0 invalidCharacterError identifier defaultOpts |> RFatalError a_ud800 1 invalidCharacterError identifier defaultOpts |> ROk "a" 1 "a" identifier defaultOpts |> ROk "abc1" 4 "abc1" identifier defaultOpts |> ROk s1 2 s1 identifier defaultOpts |> RFatalError "क्‍" 2 invalidCharacterError identifier (IdentifierOptions(allowJoinControlChars=true)) |> ROk "क्‍" 3 "क्‍" identifier (IdentifierOptions(label="test")) |> RError "1" 0 (expected "test") identifier (IdentifierOptions(invalidCharMessage="test")) |> RFatalError "क्‍" 2 (messageError "test") identifier defaultOpts |> ROk "ϒ\u0308" 2 "ϒ\u0308" identifier defaultOpts |> ROk mc2 2 "MC" let normOpts = IdentifierOptions(normalization=System.Text.NormalizationForm.FormKC) let preNormOpts = IdentifierOptions(normalization=System.Text.NormalizationForm.FormKC, normalizeBeforeValidation=true, preCheckContinue= fun c -> FParsec.IdentifierValidator.IsXIdContinueOrSurrogate(c) || c > '\u007f') identifier normOpts |> ROk "ϒ\u0308" 2 "\u03AB" identifier normOpts |> ROk mc2 2 "MC" identifier preNormOpts |> ROk mc2 3 "MC2" let abOpts = IdentifierOptions(isAsciiIdStart=((=) 'a'), isAsciiIdContinue=((=) 'b')) identifier abOpts |> RError "b" 0 (expected Strings.Identifier) identifier abOpts |> ROk "aa" 1 "a" identifier abOpts |> ROk "abc" 2 "ab" let abNonAsciiOpts = IdentifierOptions(isAsciiIdStart=((=) 'a'), isAsciiIdContinue=((=) 'b'), allowAllNonAsciiCharsInPreCheck = true) identifier abNonAsciiOpts |> RError "b" 0 (expected Strings.Identifier) identifier abNonAsciiOpts |> ROk "aa" 1 "a" identifier abNonAsciiOpts |> ROk "abc" 2 "ab" identifier abNonAsciiOpts |> ROk "abä" 3 "abä" identifier abNonAsciiOpts |> RFatalError "ab\uFB1C" 2 invalidCharacterError let abPreOpts = IdentifierOptions(isAsciiIdStart=((=) 'a'), isAsciiIdContinue=((=) 'b'), preCheckStart = (fun c -> c >= 'a' && c <= 'b'), preCheckContinue = (fun c -> c >= 'b' && c <= 'c')) identifier abPreOpts |> RFatalError "b" 0 invalidCharacterError identifier abPreOpts |> RFatalError "abc" 2 invalidCharacterError let abPreNonAsciiOpts = IdentifierOptions(isAsciiIdStart=((=) 'a'), isAsciiIdContinue=((=) 'b'), preCheckStart = (fun c -> c >= 'a' && c <= 'b'), preCheckContinue = (fun c -> c >= 'b' && c <= 'c'), allowAllNonAsciiCharsInPreCheck = true) identifier abPreNonAsciiOpts |> RFatalError "b" 0 invalidCharacterError identifier abPreNonAsciiOpts |> RFatalError "abc" 2 invalidCharacterError identifier abPreNonAsciiOpts |> ROk "abä" 3 "abä" let testManySatisfy() = manySatisfy isDigit |> ROk "" 0 "" manySatisfy2 isHex isDigit |> ROk "" 0 "" manySatisfy isDigit |> ROk "123" 3 "123" manySatisfy2 isHex isDigit |> ROk "a23a" 3 "a23" skipManySatisfy isDigit |> ROk "" 0 () skipManySatisfy2 isHex isDigit |> ROk "" 0 () skipManySatisfy isDigit |> ROk "123" 3 () skipManySatisfy2 isHex isDigit |> ROk "a23a" 3 () many1Satisfy isDigit |> RError "a" 0 NoErrorMessages many1Satisfy2 isHex isDigit |> RError "g" 0 NoErrorMessages many1SatisfyL isDigit "test" |> RError "a" 0 (expected "test") many1Satisfy2L isHex isDigit "test" |> RError "g" 0 (expected "test") many1Satisfy isDigit |> ROk "123" 3 "123" many1Satisfy2 isHex isDigit |> ROk "a23a" 3 "a23" skipMany1SatisfyL isDigit "test" |> RError "a" 0 (expected "test") skipMany1Satisfy2L isHex isDigit "test" |> RError "g" 0 (expected "test") skipMany1Satisfy isDigit |> ROk "123" 3 () skipMany1Satisfy2 isHex isDigit |> ROk "a23a" 3 () manyMinMaxSatisfy 0 3 isDigit |> ROk "1234" 3 "123" manyMinMaxSatisfy 3 3 isDigit |> ROk "1234" 3 "123" manyMinMaxSatisfyL 4 4 isDigit "test" |> RError "123a" 0 (expected "test") manyMinMaxSatisfy2 0 3 isHex isDigit |> ROk "a234" 3 "a23" manyMinMaxSatisfy2 3 3 isHex isDigit |> ROk "a234" 3 "a23" manyMinMaxSatisfy2L 4 4 isHex isDigit "test" |> RError "a23a" 0 (expected "test") skipManyMinMaxSatisfy 0 3 isDigit |> ROk "1234" 3 () skipManyMinMaxSatisfy 3 3 isDigit |> ROk "1234" 3 () skipManyMinMaxSatisfyL 4 4 isDigit "test" |> RError "123a" 0 (expected "test") skipManyMinMaxSatisfy2 0 3 isHex isDigit |> ROk "a234" 3 () skipManyMinMaxSatisfy2 3 3 isHex isDigit |> ROk "a234" 3 () skipManyMinMaxSatisfy2L 4 4 isHex isDigit "test" |> RError "a23a" 0 (expected "test") try manyMinMaxSatisfy 0 -1 isDigit |> ROk "1234" 3 "123"; Fail() with :? System.ArgumentException -> () try skipManyMinMaxSatisfy 0 -1 isDigit |> ROk "1234" 3 (); Fail() with :? System.ArgumentException -> () let testMany() = let ps1 = (constantTestParsers '1' (expected "1"))[1..] // no parser that returns OK without changing the state let ps2 = (constantTestParsers '2' (expected "2"))[1..] let ps3 = (constantTestParsers '3' (expected "3"))[1..] let content = "the content doesn't matter" use stream = new FParsec.CharStream(content, 0, content.Length) let many1Chars2Ref p1 p = Inline.Many((fun c -> (new System.Text.StringBuilder()).Append(c: char)), (fun sb c -> sb.Append(c)), (fun sb -> sb.ToString()), p, p1) let manyChars2Ref p1 p = many1Chars2Ref p1 p <|>% "" let manySeq2 = seq {for p2 in ps2 do for p3 in ps3 do yield [p2; p3]} for p1 in ps1 do for ps in manySeq2 do let p_1, p_2, pr = seqParserAndReset2 ps checkParser (manyChars2 p1 p_1) (manyChars2Ref p1 p_2) stream; pr() checkParser (many1Chars2 p1 p_1) (many1Chars2Ref p1 p_2) stream; pr() manyChars digit |> ROkE "123" 3 "123" Errors.ExpectedDecimalDigit many1Chars digit |> ROkE "123" 3 "123" Errors.ExpectedDecimalDigit try manyChars (preturn ' ') stream |> ignore; Fail() with :? System.InvalidOperationException -> () let anyCharWithIndexMessage : Parser = fun stream -> let c = stream.ReadCharOrNewline() if c <> EOS then Reply(Ok, c, messageError (string stream.Index)) else Reply(Error, Errors.ExpectedAnyChar) let sb = new System.Text.StringBuilder() for i = 1 to 200 do let s = sb.Append(char (i%10)).ToString() manyChars (anyCharWithIndexMessage) |> ROkE s s.Length s (mergeErrors (messageError (string i)) Errors.ExpectedAnyChar) sb.Length <- 0 // Clear() is only supported in >= .NET 4 for i = 1 to 200 do let s = sb.Append(char (i%10)).ToString() manyCharsTill (anyCharWithIndexMessage) eof |> ROkE s s.Length s (messageError (string i)) let eps1 = constantTestParsers 1 (expected "11") let eps2 = constantTestParsers 2 (expected "22") let eps3 = constantTestParsers 3 (expected "33") let manyCharsTillRef p endp = Inline.ManyTill((fun c -> (new System.Text.StringBuilder()).Append(c: char)), (fun sb c -> sb.Append(c)), (fun sb _ -> sb.ToString()), p, endp, resultForEmptySequence = (fun _ -> "")) let many1CharsTillRef p endp = pipe2 p (manyCharsTillRef p endp) (fun c0 s -> string c0 + s) let manyTillSeq = seq {for endp1 in eps1 do for p1 in ps1 do for endp2 in eps2 do for p2 in ps2 do for endp3 in eps3 do for p3 in ps3[1..] do yield [p1; p2; p3;], [endp1; endp2; endp3; eps3[1]]} for ps, es in manyTillSeq do let p_1, p_2, pr = seqParserAndReset2 ps let e_1, e_2, er = seqParserAndReset2 es checkParser (manyCharsTill p_1 e_1) (manyCharsTillRef p_2 e_2) stream; pr(); er() checkParser (many1CharsTill p_1 e_1) (many1CharsTillRef p_2 e_2) stream; pr(); er() manyCharsTill2 letter digit (pchar '.') |> ROk "a23." 4 "a23" many1CharsTill2 letter digit (pchar '.') |> ROk "a23." 4 "a23" manyCharsTillApply digit (pchar '.') (fun str c -> str + string c) |> ROk "23." 3 "23." many1CharsTillApply digit (pchar '.') (fun str c -> str + string c) |> ROk "23." 3 "23." try manyCharsTill (preturn ' ') (fail "t") stream |> ignore; Fail() with :? System.InvalidOperationException -> () try many1CharsTill (preturn ' ') (fail "t") stream |> ignore; Fail() with :? System.InvalidOperationException -> () let sps1 = constantTestParsers "1" (expected "1") let sps2 = constantTestParsers "2" (expected "2") let sps3 = constantTestParsers "3" (expected "3") let sps4 = constantTestParsers "4" (expected "4") let sps5 = constantTestParsers "5" (expected "5") let sps6 = constantTestParsers "6" (expected "6") let sps7 = constantTestParsers "7" (expected "7") let manyStringsRef p = many p |>> List.fold (fun acc s -> acc + s) "" let many1StringsRef p = many1 p |>> List.reduce (+) let manySeq7 = seq {for p1 in sps1[1..] do for p2 in sps2[1..] do for p3 in sps3[1..] do for p4 in sps4[1..] do for p5 in sps5[1..] do for p6 in sps6[1..] do for p7 in sps7[1..] do yield [p1;p2;p3;p4;p5;p6;p7]} let sw = new System.Diagnostics.Stopwatch() for ps in manySeq7 do let p_1, p_2, pr = seqParserAndReset2 ps checkParser (manyStrings p_1) (manyStringsRef p_2) stream; pr() checkParser (many1Strings p_1) (many1StringsRef p_2) stream manyStrings2 (pstring "1") (pstring "2") |> ROkE "12223" 4 "1222" (expectedString "2") try manyStrings (preturn "1") stream |> ignore; Fail() with :? System.InvalidOperationException -> () let sepByTestParsers r1 e1 r2 e2 = let p1s = constantTestParsers r1 e1 let p2s = constantTestParsers r2 e2 seq {for p1 in p1s[1..] do for p2 in p2s do yield p1, p2} let sepBySeq3 = seq {for p1 in (constantTestParsers "1" (expected "p1"))[1..] do for sep1, p2 in sepByTestParsers "a" (expected "sep1") "2" (expected "p2") do for sep2, p3 in sepByTestParsers "b" (expected "sep2") "3" (expected "p3") do for sep3, p4 in sepByTestParsers "c" (expected "sep3") "4" (expected "p4") do yield [p1; p2; p3; p4], [sep1; sep2; sep3] // We exclude the following parameter combinations from regular test runs // because executing all of them just takes too much time. (* for sep4, p5 in sepByTestParsers "d" (expected "sep4") "5" (expected "p5") do yield [p1; p2; p3; p4; p5], [sep1; sep2; sep3; sep4] for p1, sep1 in sepByTestParsers "1" (expected "p1") "a" (expected "sep1") do for p2, sep2 in sepByTestParsers "2" (expected "p2") "b" (expected "sep2") do for p3, sep3 in sepByTestParsers "3" (expected "p3") "c" (expected "sep3") do for p4, sep4 in sepByTestParsers "4" (expected "p4") "d" (expected "sep4") do for p5 in (constantTestParsers "5" (expected "p5"))[1..] do yield [p1; p2; p3; p4; p5], [sep1; sep2; sep3; sep4] *) } let expectedStringsSepByResultForSepByReply (reply: Reply) = if reply.Status <> Ok then null else match reply.Result with | [] -> "" | [_] -> "1" | [_;_] -> "1a2" | [_;_;_] -> "1a2b3" | [_;_;_;_] -> "1a2b3c4" | [_;_;_;_;_] -> "1a2b3c4d5" | _ -> failwith "stringsSepByTest" let mutable i = 0 let userState0 = stream.UserState let tag0 = stream.StateTag for ps, ss in sepBySeq3 do i <- i + 1 let p, pr = seqParserAndReset ps let s, sr = seqParserAndReset ss checkParser (stringsSepBy p s) (fun stream -> pr(); sr() let r = sepBy p s stream let result = expectedStringsSepByResultForSepByReply r Reply(r.Status, result, r.Error)) stream pr(); sr(); checkParser (stringsSepBy1 p s) (fun stream -> pr(); sr() let r = sepBy1 p s stream let result = expectedStringsSepByResultForSepByReply r Reply(r.Status, result, r.Error)) stream try stringsSepBy (preturn "1") (preturn ";") stream |> ignore; Fail() with :? System.InvalidOperationException -> () let testSkipToString() = charsTillString "abc" false System.Int32.MaxValue |> RError "abbab" 5 (Errors.CouldNotFindString("abc")) charsTillString "abc" false System.Int32.MaxValue |> ROk "abc" 0 "" charsTillString "abc" false 0 |> ROk "abc" 0 "" charsTillString "abc" false System.Int32.MaxValue |> ROk "abdabc" 3 "abd" charsTillString "abc" false 3 |> ROk "abdabc" 3 "abd" charsTillString "abc" false 2 |> RError "abdabc" 2 (Errors.CouldNotFindString("abc")) charsTillStringCI "AbC" false System.Int32.MaxValue |> RError "abbab" 5 (Errors.CouldNotFindCaseInsensitiveString("AbC")) charsTillStringCI "AbC" false System.Int32.MaxValue |> ROk "aBc" 0 "" charsTillStringCI "AbC" false 0 |> ROk "abc" 0 "" charsTillStringCI "AbC" false System.Int32.MaxValue |> ROk "aBdaBc" 3 "aBd" charsTillStringCI "AbC" false 3 |> ROk "aBdaBc" 3 "aBd" charsTillStringCI "AbC" false 2 |> RError "aBdaBc" 2 (Errors.CouldNotFindCaseInsensitiveString("AbC")) skipCharsTillString "abc" false System.Int32.MaxValue |> RError "abbab" 5 (Errors.CouldNotFindString("abc")) skipCharsTillString "abc" false System.Int32.MaxValue |> ROk "abc" 0 () skipCharsTillString "abc" false 0 |> ROk "abc" 0 () skipCharsTillString "abc" false System.Int32.MaxValue |> ROk "abdabc" 3 () skipCharsTillString "abc" false 3 |> ROk "abdabc" 3 () skipCharsTillString "abc" false 2 |> RError "abdabc" 2 (Errors.CouldNotFindString("abc")) skipCharsTillStringCI "AbC" false System.Int32.MaxValue |> RError "abbab" 5 (Errors.CouldNotFindCaseInsensitiveString("AbC")) skipCharsTillStringCI "AbC" false System.Int32.MaxValue |> ROk "aBc" 0 () skipCharsTillStringCI "AbC" false 0 |> ROk "abc" 0 () skipCharsTillStringCI "AbC" false System.Int32.MaxValue |> ROk "aBdaBc" 3 () skipCharsTillStringCI "AbC" false 3 |> ROk "aBdaBc" 3 () skipCharsTillStringCI "AbC" false 2 |> RError "aBdaBc" 2 (Errors.CouldNotFindCaseInsensitiveString("AbC")) charsTillString "abc" true System.Int32.MaxValue |> RError "abbab" 5 (Errors.CouldNotFindString("abc")) charsTillString "abc" true System.Int32.MaxValue |> ROk "abc" 3 "" charsTillString "abc" true 0 |> ROk "abc" 3 "" charsTillString "abc" true System.Int32.MaxValue |> ROk "abdabc" 6 "abd" charsTillString "abc" true 3 |> ROk "abdabc" 6 "abd" charsTillString "abc" true 2 |> RError "abdabc" 2 (Errors.CouldNotFindString("abc")) charsTillStringCI "AbC" true System.Int32.MaxValue |> RError "abbab" 5 (Errors.CouldNotFindCaseInsensitiveString("AbC")) charsTillStringCI "AbC" true System.Int32.MaxValue |> ROk "aBc" 3 "" charsTillStringCI "AbC" true 0 |> ROk "abc" 3 "" charsTillStringCI "AbC" true System.Int32.MaxValue |> ROk "aBdaBc" 6 "aBd" charsTillStringCI "AbC" true 3 |> ROk "aBdaBc" 6 "aBd" charsTillStringCI "AbC" true 2 |> RError "aBdaBc" 2 (Errors.CouldNotFindCaseInsensitiveString("AbC")) skipCharsTillString "abc" true System.Int32.MaxValue |> RError "abbab" 5 (Errors.CouldNotFindString("abc")) skipCharsTillString "abc" true System.Int32.MaxValue |> ROk "abc" 3 () skipCharsTillString "abc" true 0 |> ROk "abc" 3 () skipCharsTillString "abc" true System.Int32.MaxValue |> ROk "abdabc" 6 () skipCharsTillString "abc" true 3 |> ROk "abdabc" 6 () skipCharsTillString "abc" true 2 |> RError "abdabc" 2 (Errors.CouldNotFindString("abc")) skipCharsTillStringCI "AbC" true System.Int32.MaxValue |> RError "abbab" 5 (Errors.CouldNotFindCaseInsensitiveString("AbC")) skipCharsTillStringCI "AbC" true System.Int32.MaxValue |> ROk "aBc" 3 () skipCharsTillStringCI "AbC" true 0 |> ROk "abc" 3 () skipCharsTillStringCI "AbC" true System.Int32.MaxValue |> ROk "aBdaBc" 6 () skipCharsTillStringCI "AbC" true 3 |> ROk "aBdaBc" 6 () skipCharsTillStringCI "AbC" true 2 |> RError "aBdaBc" 2 (Errors.CouldNotFindCaseInsensitiveString("AbC")) try charsTillString "1\r" false 1 |> ignore; Fail() with :? System.ArgumentException -> () try charsTillStringCI "1\r" false 1 |> ignore; Fail() with :? System.ArgumentException -> () try skipCharsTillString "1\r" false 1 |> ignore; Fail() with :? System.ArgumentException -> () try skipCharsTillStringCI "1\r" false 1 |> ignore; Fail() with :? System.ArgumentException -> () try charsTillString "1" false -1 |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try charsTillStringCI "1" false -1 |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try skipCharsTillString "1" false -1 |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try skipCharsTillStringCI "1" false -1 |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () let testNumberParsers() = let ROkI content i result parser = ROk content i result parser let ROk content result parser = ROk content (content.Length - 1) result parser let testNumberLiteral() = let all = NLO.AllowSuffix ||| NLO.AllowMinusSign ||| NLO.AllowPlusSign ||| NLO.AllowFraction ||| NLO.AllowFractionWOIntegerPart ||| NLO.AllowExponent ||| NLO.AllowHexadecimal ||| NLO.AllowBinary ||| NLO.AllowOctal ||| NLO.AllowInfinity ||| NLO.AllowNaN numberLiteral all "nl" |> RError "|" 0 (expected "nl") numberLiteral all "nl" |> RError "+|" 0 (expected "nl") numberLiteral all "nl" |> RError "-|" 0 (expected "nl") numberLiteral all "nl" |> RError "+n" 0 (expected "nl") numberLiteral all "nl" |> RError "-n" 0 (expected "nl") numberLiteral all "nl" |> ROk "0|" (NumberLiteral("0", NLF.IsDecimal ||| NLF.HasIntegerPart, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "+0|" (NumberLiteral("+0", NLF.HasPlusSign ||| NLF.IsDecimal ||| NLF.HasIntegerPart, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "-0|" (NumberLiteral("-0", NLF.HasMinusSign ||| NLF.IsDecimal ||| NLF.HasIntegerPart, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0u|" (NumberLiteral("0", NLF.IsDecimal ||| NLF.HasIntegerPart ||| ((enum) 1), 'u', EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0az|" (NumberLiteral("0", NLF.IsDecimal ||| NLF.HasIntegerPart ||| ((enum) 2), 'a', 'z', EOS, EOS)) numberLiteral all "nl" |> ROk "0uAZ|" (NumberLiteral("0", NLF.IsDecimal ||| NLF.HasIntegerPart ||| ((enum) 3), 'u', 'A', 'Z', EOS)) numberLiteral all "nl" |> ROk "0ulLF|" (NumberLiteral("0", NLF.IsDecimal ||| NLF.HasIntegerPart ||| ((enum) 4), 'u', 'l', 'L', 'F')) let all2 = all ||| NLO.IncludeSuffixCharsInString numberLiteral all2 "nl" |> ROk "0u|" (NumberLiteral("0u", NLF.IsDecimal ||| NLF.HasIntegerPart ||| ((enum) 1), 'u', EOS, EOS, EOS)) numberLiteral all2 "nl" |> ROk "0az|" (NumberLiteral("0az", NLF.IsDecimal ||| NLF.HasIntegerPart ||| ((enum) 2), 'a', 'z', EOS, EOS)) numberLiteral all2 "nl" |> ROk "0uAZ|" (NumberLiteral("0uAZ", NLF.IsDecimal ||| NLF.HasIntegerPart ||| ((enum) 3), 'u', 'A', 'Z', EOS)) numberLiteral all2 "nl" |> ROk "0ulLF|" (NumberLiteral("0ulLF", NLF.IsDecimal ||| NLF.HasIntegerPart ||| ((enum) 4), 'u', 'l', 'L', 'F')) numberLiteral all "nl" |> ROk ".0|" (NumberLiteral(".0", NLF.IsDecimal ||| NLF.HasFraction, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "1.|" (NumberLiteral("1.", NLF.IsDecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk ".0E0|" (NumberLiteral(".0E0", NLF.IsDecimal ||| NLF.HasFraction ||| NLF.HasExponent, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "+0e-123f|" (NumberLiteral("+0e-123", NLF.IsDecimal ||| NLF.HasPlusSign ||| NLF.HasIntegerPart ||| NLF.HasExponent ||| ((enum) 1), 'f', EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0.1E+123|" (NumberLiteral("0.1E+123", NLF.IsDecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| NLF.HasExponent, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0.0E0|" (NumberLiteral("0.0E0", NLF.IsDecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| NLF.HasExponent, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "9.9E9|" (NumberLiteral("9.9E9", NLF.IsDecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| NLF.HasExponent, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "00.00E00|" (NumberLiteral("00.00E00", NLF.IsDecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| NLF.HasExponent, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "99.99E99|" (NumberLiteral("99.99E99", NLF.IsDecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| NLF.HasExponent, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "-909.090e-09909z|" (NumberLiteral("-909.090e-09909", NLF.HasMinusSign ||| NLF.IsDecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| NLF.HasExponent ||| (enum) 1, 'z', EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0x.0|" (NumberLiteral("0x.0", NLF.IsHexadecimal ||| NLF.HasFraction, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0x0.|" (NumberLiteral("0x0.", NLF.IsHexadecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0X.fP0|" (NumberLiteral("0X.fP0", NLF.IsHexadecimal ||| NLF.HasFraction ||| NLF.HasExponent, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "+0xFp-0f|" (NumberLiteral("+0xFp-0", NLF.HasPlusSign ||| NLF.IsHexadecimal ||| NLF.HasIntegerPart ||| NLF.HasExponent ||| ((enum) 1), 'f', EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0xf.0AP+123|" (NumberLiteral("0xf.0AP+123", NLF.IsHexadecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| NLF.HasExponent, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0x0.0P0|" (NumberLiteral("0x0.0P0", NLF.IsHexadecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| NLF.HasExponent, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0xff.fp9|" (NumberLiteral("0xff.fp9", NLF.IsHexadecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| NLF.HasExponent, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0xa.aP9|" (NumberLiteral("0xa.aP9", NLF.IsHexadecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| NLF.HasExponent, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0xF.Fp0|" (NumberLiteral("0xF.Fp0", NLF.IsHexadecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| NLF.HasExponent, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0xA.AP9|" (NumberLiteral("0xA.AP9", NLF.IsHexadecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| NLF.HasExponent, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0x00.00P00|" (NumberLiteral("0x00.00P00", NLF.IsHexadecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| NLF.HasExponent, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0xff.ffp99|" (NumberLiteral("0xff.ffp99", NLF.IsHexadecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| NLF.HasExponent, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0xaa.aaP99|" (NumberLiteral("0xaa.aaP99", NLF.IsHexadecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| NLF.HasExponent, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0xFF.FFp00|" (NumberLiteral("0xFF.FFp00", NLF.IsHexadecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| NLF.HasExponent, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0xAA.AAP99|" (NumberLiteral("0xAA.AAP99", NLF.IsHexadecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| NLF.HasExponent, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "+0x0afFA0.afFA0P+9099A|" (NumberLiteral("+0x0afFA0.afFA0P+9099", NLF.HasPlusSign ||| NLF.IsHexadecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| NLF.HasExponent ||| (enum) 1, 'A', EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0b02" (NumberLiteral("0b0", NLF.IsBinary ||| NLF.HasIntegerPart, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "-0B0102" (NumberLiteral("-0B010", NLF.HasMinusSign ||| NLF.IsBinary ||| NLF.HasIntegerPart, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "+0B010ul|" (NumberLiteral("+0B010", NLF.HasPlusSign ||| NLF.IsBinary ||| NLF.HasIntegerPart ||| (enum) 2, 'u', 'l', EOS, EOS)) numberLiteral all "nl" |> ROk "-0o08" (NumberLiteral("-0o0", NLF.HasMinusSign ||| NLF.IsOctal ||| NLF.HasIntegerPart, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "0O0778" (NumberLiteral("0O077", NLF.IsOctal ||| NLF.HasIntegerPart, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "+0o1770ul|" (NumberLiteral("+0o1770", NLF.HasPlusSign ||| NLF.IsOctal ||| NLF.HasIntegerPart ||| (enum) 2, 'u', 'l', EOS, EOS)) numberLiteral all "nl" |> ROk "Infinityy" (NumberLiteral("Infinity", NLF.IsInfinity, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "-InFINitYy" (NumberLiteral("-InFINitY", NLF.HasMinusSign ||| NLF.IsInfinity, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "+iNfi" (NumberLiteral("+iNf", NLF.HasPlusSign ||| NLF.IsInfinity, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "NaNn" (NumberLiteral("NaN", NLF.IsNaN, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> ROk "-nAna" (NumberLiteral("-nAn", NLF.HasMinusSign ||| NLF.IsNaN, EOS, EOS, EOS, EOS)) numberLiteral all "nl" |> RError ".a" 1 Errors.ExpectedDecimalDigit numberLiteral all "nl" |> RError ".ea" 1 Errors.ExpectedDecimalDigit numberLiteral all "nl" |> RError ".E-1" 1 Errors.ExpectedDecimalDigit numberLiteral all "nl" |> RError ".1ea" 3 Errors.ExpectedDecimalDigit numberLiteral all "nl" |> RError "-1ea" 3 Errors.ExpectedDecimalDigit numberLiteral all "nl" |> RError "1.e-a" 4 Errors.ExpectedDecimalDigit numberLiteral all "nl" |> RError "1e+a" 3 Errors.ExpectedDecimalDigit numberLiteral all "nl" |> RError "0x.g" 3 Errors.ExpectedHexadecimalDigit numberLiteral all "nl" |> RError "0x.pa" 3 Errors.ExpectedHexadecimalDigit numberLiteral all "nl" |> RError "0x.p-1" 3 Errors.ExpectedHexadecimalDigit numberLiteral all "nl" |> RError "+0x.1pa" 6 Errors.ExpectedDecimalDigit numberLiteral all "nl" |> RError "0x1pa" 4 Errors.ExpectedDecimalDigit numberLiteral all "nl" |> RError "0x1.p-a" 6 Errors.ExpectedDecimalDigit numberLiteral all "nl" |> RError "0x1p+a" 5 Errors.ExpectedDecimalDigit numberLiteral all "nl" |> RError "0b3" 2 Errors.ExpectedBinaryDigit numberLiteral all "nl" |> RError "-0b.0" 3 Errors.ExpectedBinaryDigit numberLiteral all "nl" |> RError "+0ou" 3 Errors.ExpectedOctalDigit numberLiteral all "nl" |> RError "0o.0" 2 Errors.ExpectedOctalDigit numberLiteral (all ^^^ NLO.AllowPlusSign) "nl" |> RError "+1|" 0 (expected "nl") numberLiteral (all ^^^ NLO.AllowPlusSign) "nl" |> ROk "-1|" (NumberLiteral("-1", NLF.HasMinusSign ||| NLF.IsDecimal ||| NLF.HasIntegerPart, EOS, EOS, EOS, EOS)) numberLiteral (all ^^^ NLO.AllowMinusSign) "nl" |> RError "-1|" 0 (expected "nl") numberLiteral (all ^^^ NLO.AllowMinusSign) "nl" |> ROk "+1|" (NumberLiteral("+1", NLF.HasPlusSign ||| NLF.IsDecimal ||| NLF.HasIntegerPart, EOS, EOS, EOS, EOS)) numberLiteral (all ^^^ (NLO.AllowPlusSign ||| NLO.AllowMinusSign)) "nl" |> ROk "1|" (NumberLiteral("1", NLF.IsDecimal ||| NLF.HasIntegerPart, EOS, EOS, EOS, EOS)) numberLiteral (all ^^^ NLO.AllowFractionWOIntegerPart) "nl" |> RError ".0|" 0 (expected "nl") numberLiteral (all ^^^ NLO.AllowFractionWOIntegerPart) "nl" |> RError "0x.0|" 2 Errors.ExpectedHexadecimalDigit numberLiteral (all ^^^ NLO.AllowFraction) "nl" |> ROk "1." (NumberLiteral("1", NLF.IsDecimal ||| NLF.HasIntegerPart, EOS, EOS, EOS, EOS)) numberLiteral (all ^^^ NLO.AllowFraction) "nl" |> ROkI "10.10E2" 2 (NumberLiteral("10", NLF.IsDecimal ||| NLF.HasIntegerPart, EOS, EOS, EOS, EOS)) numberLiteral (all ^^^ NLO.AllowFraction) "nl" |> RError ".1" 0 (expected "nl") numberLiteral (all ^^^ NLO.AllowFraction) "nl" |> RError ".1" 0 (expected "nl") numberLiteral (all ^^^ NLO.AllowFraction) "nl" |> ROkI "0x0.1p2" 3 (NumberLiteral("0x0", NLF.IsHexadecimal ||| NLF.HasIntegerPart, EOS, EOS, EOS, EOS)) numberLiteral (all ^^^ NLO.AllowFraction) "nl" |> ROkI "10.10E2" 2 (NumberLiteral("10", NLF.IsDecimal ||| NLF.HasIntegerPart, EOS, EOS, EOS, EOS)) numberLiteral (all ^^^ NLO.AllowExponent) "nl" |> ROkI "1e1" 2 (NumberLiteral("1", NLF.IsDecimal ||| NLF.HasIntegerPart ||| (enum) 1, 'e', EOS, EOS, EOS)) numberLiteral (all ^^^ NLO.AllowExponent) "nl" |> ROkI "1.0e1" 4 (NumberLiteral("1.0", NLF.IsDecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| (enum) 1, 'e', EOS, EOS, EOS)) numberLiteral (all ^^^ NLO.AllowExponent) "nl" |> ROkI "0x1p1" 4 (NumberLiteral("0x1", NLF.IsHexadecimal ||| NLF.HasIntegerPart ||| (enum) 1, 'p', EOS, EOS, EOS)) numberLiteral (all ^^^ NLO.AllowExponent) "nl" |> ROkI "0x1.0p1" 6 (NumberLiteral("0x1.0", NLF.IsHexadecimal ||| NLF.HasIntegerPart ||| NLF.HasFraction ||| (enum) 1, 'p', EOS, EOS, EOS)) numberLiteral (all ^^^ NLO.AllowSuffix) "nl" |> ROk "0u" (NumberLiteral("0", NLF.IsDecimal ||| NLF.HasIntegerPart, EOS, EOS, EOS, EOS)) numberLiteral (all ^^^ NLO.AllowSuffix) "nl" |> ROk "0x1u" (NumberLiteral("0x1", NLF.IsHexadecimal ||| NLF.HasIntegerPart, EOS, EOS, EOS, EOS)) numberLiteral (all ^^^ NLO.AllowBinary) "nl" |> ROkI "0b1|" 2 (NumberLiteral("0", NLF.IsDecimal ||| NLF.HasIntegerPart ||| (enum) 1, 'b', EOS, EOS, EOS)) numberLiteral (all ^^^ NLO.AllowOctal) "nl" |> ROkI "0o1|" 2 (NumberLiteral("0", NLF.IsDecimal ||| NLF.HasIntegerPart ||| (enum) 1, 'o', EOS, EOS, EOS)) numberLiteral (all ^^^ NLO.AllowHexadecimal) "nl" |> ROkI "0x1|" 2 (NumberLiteral("0", NLF.IsDecimal ||| NLF.HasIntegerPart ||| (enum) 1, 'x', EOS, EOS, EOS)) numberLiteral (all ^^^ NLO.AllowInfinity) "nl" |> RError "Infinity|" 0 (expected "nl") numberLiteral (all ^^^ NLO.AllowInfinity) "nl" |> ROk "NaN|" (NumberLiteral("NaN", NLF.IsNaN, EOS, EOS, EOS, EOS)) numberLiteral (all ^^^ NLO.AllowNaN) "nl" |> RError "NaN|" 0 (expected "nl") numberLiteral (all ^^^ NLO.AllowNaN) "nl" |> ROk "Infinity|" (NumberLiteral("Infinity", NLF.IsInfinity, EOS, EOS, EOS, EOS)) testNumberLiteral() let testPfloat() = pfloat |> RError "" 0 Errors.ExpectedFloatingPointNumber pfloat |> RError "-0x" 3 Errors.ExpectedHexadecimalDigit pfloat |> ROk "0|" 0. pfloat |> ROk "+0|" 0. pfloat |> ROk "-0|" -0. pfloat |> ROk "0x0|" 0. pfloat |> ROk "+0x0|" 0. pfloat |> ROk "-0X0|" -0. pfloat |> ROk "+123|" 123. pfloat |> ROk "+0x123|" (floatOfHexString "0x123") pfloat |> ROk "+123e2|" 123e2 pfloat |> ROk "+0x123p2|" (floatOfHexString "0x123p2") pfloat |> ROk "-123.456e123|" -123.456e123 pfloat |> ROk "1e99999|" System.Double.PositiveInfinity pfloat |> ROk "0x1p99999|" System.Double.PositiveInfinity pfloat |> ROk "-1e99999|" System.Double.NegativeInfinity pfloat |> ROk "-0x1p99999|" System.Double.NegativeInfinity pfloat |> ROk "-0x123cde.123afAcEp123|" (floatOfHexString "-0x123cde.123afAcEp123") pfloat |> ROk "-0x1.fffffffffffffp1023|" -System.Double.MaxValue pfloat |> ROk "-0x1.fffffffffffffp1024|" System.Double.NegativeInfinity pfloat |> ROk "Inf|" System.Double.PositiveInfinity pfloat |> ROk "-Infinity|" System.Double.NegativeInfinity pfloat >>% 1 |> ROk "NaN|" 1 testPfloat() let testPuint64() = let expectedE = Errors.ExpectedUInt64 let overflowE = Errors.NumberOutsideOfUInt64Range puint64 |> RError "" 0 expectedE puint64 |> RError "+1" 0 expectedE puint64 |> RError "-1" 0 expectedE puint64 |> RFatalError "18446744073709551620" 0 overflowE puint64 |> RFatalError "18446744073709551619" 0 overflowE puint64 |> RFatalError "18446744073709551618" 0 overflowE puint64 |> RFatalError "18446744073709551617" 0 overflowE puint64 |> RFatalError "18446744073709551616" 0 overflowE puint64 |> RFatalError "0000018446744073709551616" 0 overflowE puint64 |> RFatalError "111111111111111111111" 0 overflowE puint64 |> ROk "0|" 0UL puint64 |> ROk "000|" 0UL puint64 |> ROk "12345678901234567890|" 12345678901234567890UL puint64 |> ROk "18446744073709551615|" System.UInt64.MaxValue puint64 |> ROk "018446744073709551614|" (System.UInt64.MaxValue - 1UL) puint64 |> ROk "018446744073709551613|" (System.UInt64.MaxValue - 2UL) puint64 |> ROk "018446744073709551612|" (System.UInt64.MaxValue - 3UL) puint64 |> ROk "018446744073709551611|" (System.UInt64.MaxValue - 4UL) puint64 |> ROk "018446744073709551610|" (System.UInt64.MaxValue - 5UL) puint64 |> ROk "018446744073709551609|" (System.UInt64.MaxValue - 6UL) puint64 |> ROk "0000018446744073709551615|" System.UInt64.MaxValue puint64 |> RError "0x" 2 Errors.ExpectedHexadecimalDigit puint64 |> RError "+0x1" 0 expectedE puint64 |> RFatalError "0x10000000000000000" 0 overflowE puint64 |> RFatalError "0x11111111111111111" 0 overflowE puint64 |> RFatalError "0Xfffffffffffffffff" 0 overflowE puint64 |> ROk "0x0|" 0UL puint64 |> ROk "0x000|" 0UL puint64 |> ROk "0x1234567890abcdef|" 0x1234567890abcdefUL puint64 |> ROk "0X1234567890ABCDEF|" 0x1234567890abcdefUL puint64 |> ROk "0xffffffffffffffff|" System.UInt64.MaxValue puint64 |> ROk "0xfffffffffffffffe|" (System.UInt64.MaxValue - 1UL) puint64 |> ROk "0xfffffffffffffff1|" (System.UInt64.MaxValue - 14UL) puint64 |> ROk "0xfffffffffffffff0|" (System.UInt64.MaxValue - 15UL) puint64 |> ROk "0xffffffffffffffef|" (System.UInt64.MaxValue - 16UL) puint64 |> ROk "0x00000ffffffffffffffff|" System.UInt64.MaxValue puint64 |> RError "0o" 2 Errors.ExpectedOctalDigit puint64 |> RError "+0o1" 0 expectedE puint64 |> RFatalError "0o2000000000000000000001" 0 overflowE puint64 |> RFatalError "0o2000000000000000000000" 0 overflowE puint64 |> RFatalError "0o7777777777777777777777" 0 overflowE puint64 |> RFatalError "0O77777777777777777777777" 0 overflowE puint64 |> ROk "0o0|" 0UL puint64 |> ROk "0o000|" 0UL puint64 |> ROk "0o1234567123456701234567|" 0o1234567123456701234567UL puint64 |> ROk "0o1777777777777777777777|" System.UInt64.MaxValue puint64 |> ROk "0o1777777777777777777776|" (System.UInt64.MaxValue - 1UL) puint64 |> ROk "0o1777777777777777777771|" (System.UInt64.MaxValue - 6UL) puint64 |> ROk "0o1777777777777777777770|" (System.UInt64.MaxValue - 7UL) puint64 |> ROk "0o1777777777777777777767|" (System.UInt64.MaxValue - 8UL) puint64 |> ROk "0O000001777777777777777777777|" System.UInt64.MaxValue puint64 |> RError "0b" 2 Errors.ExpectedBinaryDigit puint64 |> RError "+0b1" 0 expectedE puint64 |> RFatalError "0b10000000000000000000000000000000000000000000000000000000000000001" 0 overflowE puint64 |> RFatalError "0b10000000000000000000000000000000000000000000000000000000000000000" 0 overflowE puint64 |> RFatalError "0b11111111111111111111111111111111111111111111111111111111111111111" 0 overflowE puint64 |> ROk "0b0|" 0UL puint64 |> ROk "0b000|" 0UL puint64 |> ROk "0b1111111111111111111111111111111111111111111111111111111111111111|" System.UInt64.MaxValue puint64 |> ROk "0b1111111111111111111111111111111111111111111111111111111111111110|" (System.UInt64.MaxValue - 1UL) puint64 |> ROk "0b1111111111111111111111111111111111111111111111111111111111111101|" (System.UInt64.MaxValue - 2UL) puint64 |> ROk "0b1111111111111111111111111111111111111111111111111111111111111100|" (System.UInt64.MaxValue - 3UL) puint64 |> ROk "0B000001111111111111111111111111111111111111111111111111111111111111111|" System.UInt64.MaxValue testPuint64() let testPint64() = let expectedE = Errors.ExpectedInt64 let overflowE = Errors.NumberOutsideOfInt64Range pint64 |> RFatalError "18446744073709551615" 0 overflowE pint64 |> RFatalError "+00018446744073709551615" 0 overflowE pint64 |> RFatalError "-18446744073709551615" 0 overflowE pint64 |> RFatalError "-0018446744073709551615" 0 overflowE pint64 |> RFatalError "9223372036854775808" 0 overflowE pint64 |> RFatalError "+0009223372036854775808" 0 overflowE pint64 |> RFatalError "-9223372036854775809" 0 overflowE pint64 |> RFatalError "-09223372036854775809" 0 overflowE pint64 |> ROk "9223372036854775807|" System.Int64.MaxValue pint64 |> ROk "+000009223372036854775807|" System.Int64.MaxValue pint64 |> ROk "-9223372036854775808|" System.Int64.MinValue pint64 |> ROk "-009223372036854775808|" System.Int64.MinValue pint64 |> RFatalError "0xffffffffffffffff" 0 overflowE pint64 |> RFatalError "+0x000ffffffffffffffff" 0 overflowE pint64 |> RFatalError "-0xffffffffffffffff" 0 overflowE pint64 |> RFatalError "-0X0ffffffffffffffff" 0 overflowE pint64 |> RFatalError "0x8000000000000000" 0 overflowE pint64 |> RFatalError "+0x0008000000000000000" 0 overflowE pint64 |> RFatalError "-0x8000000000000001" 0 overflowE pint64 |> RFatalError "-0x0008000000000000001" 0 overflowE pint64 |> ROk "0x7fffffffffffffff|" System.Int64.MaxValue pint64 |> ROk "+0x000007fffffffffffffff|" System.Int64.MaxValue pint64 |> ROk "-0x8000000000000000|" System.Int64.MinValue pint64 |> ROk "-0x008000000000000000|" System.Int64.MinValue pint64 |> RFatalError "0o2000000000000000000000" 0 overflowE pint64 |> RFatalError "+0o002000000000000000000000" 0 overflowE pint64 |> RFatalError "-0o0002000000000000000000000" 0 overflowE pint64 |> RFatalError "0o1000000000000000000000" 0 overflowE pint64 |> RFatalError "0o1000000000000000000000" 0 overflowE pint64 |> RFatalError "+0o001000000000000000000000" 0 overflowE pint64 |> RFatalError "-0o1000000000000000000001" 0 overflowE pint64 |> RFatalError "-0O001000000000000000000001" 0 overflowE pint64 |> ROk "0o777777777777777777777|" System.Int64.MaxValue pint64 |> ROk "+0o00777777777777777777777|" System.Int64.MaxValue pint64 |> ROk "-0o1000000000000000000000|" System.Int64.MinValue pint64 |> ROk "-0O001000000000000000000000|" System.Int64.MinValue pint64 |> RFatalError "+0b00011111111111111111111111111111111111111111111111111111111111111111" 0 overflowE pint64 |> RFatalError "-0b011111111111111111111111111111111111111111111111111111111111111111" 0 overflowE pint64 |> RFatalError "+0B1000000000000000000000000000000000000000000000000000000000000000" 0 overflowE pint64 |> RFatalError "0b0001000000000000000000000000000000000000000000000000000000000000000" 0 overflowE pint64 |> RFatalError "-0b0001000000000000000000000000000000000000000000000000000000000000001" 0 overflowE pint64 |> RFatalError "-0b1000000000000000000000000000000000000000000000000000000000000001" 0 overflowE pint64 |> ROk "0b111111111111111111111111111111111111111111111111111111111111111|" System.Int64.MaxValue pint64 |> ROk "+0b00111111111111111111111111111111111111111111111111111111111111111|" System.Int64.MaxValue pint64 |> ROk "-0b1000000000000000000000000000000000000000000000000000000000000000|"System.Int64.MinValue pint64 |> ROk "-0B0001000000000000000000000000000000000000000000000000000000000000000|" System.Int64.MinValue testPint64() let testPuint32() = let expectedE = Errors.ExpectedUInt32 let overflowE = Errors.NumberOutsideOfUInt32Range puint32 |> RError "" 0 expectedE puint32 |> RError "+1" 0 expectedE puint32 |> RError "-1" 0 expectedE puint32 |> RFatalError "4294967300" 0 overflowE puint32 |> RFatalError "4294967299" 0 overflowE puint32 |> RFatalError "4294967298" 0 overflowE puint32 |> RFatalError "4294967297" 0 overflowE puint32 |> RFatalError "4294967296" 0 overflowE puint32 |> RFatalError "000004294967296" 0 overflowE puint32 |> RFatalError "11111111111" 0 overflowE puint32 |> ROk "0|" 0u puint32 |> ROk "000|" 0u puint32 |> ROk "1234567890|" 1234567890u puint32 |> ROk "4294967295|" System.UInt32.MaxValue puint32 |> ROk "4294967294|" (System.UInt32.MaxValue - 1u) puint32 |> ROk "4294967293|" (System.UInt32.MaxValue - 2u) puint32 |> ROk "4294967292|" (System.UInt32.MaxValue - 3u) puint32 |> ROk "4294967291|" (System.UInt32.MaxValue - 4u) puint32 |> ROk "4294967290|" (System.UInt32.MaxValue - 5u) puint32 |> ROk "4294967289|" (System.UInt32.MaxValue - 6u) puint32 |> ROk "000004294967295|" System.UInt32.MaxValue puint32 |> RError "0x" 2 Errors.ExpectedHexadecimalDigit puint32 |> RError "+0x1" 0 expectedE puint32 |> RFatalError "0x100000001" 0 overflowE puint32 |> RFatalError "0x100000000" 0 overflowE puint32 |> RFatalError "0x111111111" 0 overflowE puint32 |> RFatalError "0Xfffffffff" 0 overflowE puint32 |> ROk "0x0|" 0u puint32 |> ROk "0x000|" 0u puint32 |> ROk "0x1234abcd|" 0x1234abcdu puint32 |> ROk "0X1234ABCD|" 0x1234abcdu puint32 |> ROk "0xffffffff|" System.UInt32.MaxValue puint32 |> ROk "0xfffffffe|" (System.UInt32.MaxValue - 1u) puint32 |> ROk "0xfffffff1|" (System.UInt32.MaxValue - 14u) puint32 |> ROk "0xfffffff0|" (System.UInt32.MaxValue - 15u) puint32 |> ROk "0xffffffef|" (System.UInt32.MaxValue - 16u) puint32 |> ROk "0x00000ffffffff|" System.UInt32.MaxValue puint32 |> RError "0o" 2 Errors.ExpectedOctalDigit puint32 |> RError "+0o1" 0 expectedE puint32 |> RFatalError "0o40000000001" 0 overflowE puint32 |> RFatalError "0o40000000000" 0 overflowE puint32 |> RFatalError "0o777777777777" 0 overflowE puint32 |> RFatalError "0O7777777777777" 0 overflowE puint32 |> ROk "0o0|" 0u puint32 |> ROk "0o000|" 0u puint32 |> ROk "0o12345670123|" 0o12345670123u puint32 |> ROk "0o37777777777|" System.UInt32.MaxValue puint32 |> ROk "0o37777777776|" (System.UInt32.MaxValue - 1u) puint32 |> ROk "0o37777777771|" (System.UInt32.MaxValue - 6u) puint32 |> ROk "0o37777777770|" (System.UInt32.MaxValue - 7u) puint32 |> ROk "0o37777777767|" (System.UInt32.MaxValue - 8u) puint32 |> ROk "0O0000037777777777|" System.UInt32.MaxValue puint32 |> RError "0b" 2 Errors.ExpectedBinaryDigit puint32 |> RError "+0b1" 0 expectedE puint32 |> RFatalError "0b100000000000000000000000000000001" 0 overflowE puint32 |> RFatalError "0b100000000000000000000000000000000" 0 overflowE puint32 |> RFatalError "0B111111111111111111111111111111111" 0 overflowE puint32 |> ROk "0b0|" 0u puint32 |> ROk "0b000|" 0u puint32 |> ROk "0b11111111111111111111111111111111|" System.UInt32.MaxValue puint32 |> ROk "0b11111111111111111111111111111110|" (System.UInt32.MaxValue - 1u) puint32 |> ROk "0b11111111111111111111111111111101|" (System.UInt32.MaxValue - 2u) puint32 |> ROk "0b11111111111111111111111111111100|" (System.UInt32.MaxValue - 3u) puint32 |> ROk "0B0000011111111111111111111111111111111|" System.UInt32.MaxValue testPuint32() let testPint32() = let expectedE = Errors.ExpectedInt32 let overflowE = Errors.NumberOutsideOfInt32Range pint32 |> RFatalError "4294967295" 0 overflowE pint32 |> RFatalError "+4294967295" 0 overflowE pint32 |> RFatalError "-4294967295" 0 overflowE pint32 |> RFatalError "-004294967295" 0 overflowE pint32 |> RFatalError "2147483648" 0 overflowE pint32 |> RFatalError "+0002147483648" 0 overflowE pint32 |> RFatalError "-2147483649" 0 overflowE pint32 |> RFatalError "-02147483649" 0 overflowE pint32 |> ROk "2147483647|" System.Int32.MaxValue pint32 |> ROk "+000002147483647|" System.Int32.MaxValue pint32 |> ROk "-2147483648|" System.Int32.MinValue pint32 |> ROk "-002147483648|" System.Int32.MinValue pint32 |> RFatalError "0xffffffffffffffff" 0 overflowE pint32 |> RFatalError "+0x000ffffffffffffffff" 0 overflowE pint32 |> RFatalError "-0xffffffffffffffff" 0 overflowE pint32 |> RFatalError "-0X0ffffffffffffffff" 0 overflowE pint32 |> RFatalError "0x80000000" 0 overflowE pint32 |> RFatalError "+0x00080000000" 0 overflowE pint32 |> RFatalError "-0x80000001" 0 overflowE pint32 |> RFatalError "-0x00080000001" 0 overflowE pint32 |> ROk "0x7fffffff|" System.Int32.MaxValue pint32 |> ROk "+0x000007fffffff|" System.Int32.MaxValue pint32 |> ROk "-0x80000000|" System.Int32.MinValue pint32 |> ROk "-0x0080000000|" System.Int32.MinValue pint32 |> RFatalError "0o40000000000" 0 overflowE pint32 |> RFatalError "+0o0040000000000" 0 overflowE pint32 |> RFatalError "-0o00040000000000" 0 overflowE pint32 |> RFatalError "0o20000000000" 0 overflowE pint32 |> RFatalError "+0o0020000000000" 0 overflowE pint32 |> RFatalError "-0o20000000001" 0 overflowE pint32 |> RFatalError "-0O0020000000001" 0 overflowE pint32 |> ROk "0o17777777777|" System.Int32.MaxValue pint32 |> ROk "+0o0017777777777|" System.Int32.MaxValue pint32 |> ROk "-0o20000000000|" System.Int32.MinValue pint32 |> ROk "-0O0020000000000|" System.Int32.MinValue pint32 |> RFatalError "+0b000111111111111111111111111111111111" 0 overflowE pint32 |> RFatalError "-0b0111111111111111111111111111111111" 0 overflowE pint32 |> RFatalError "+0B1000000000000000000000000000000000000000000000000000000000000000" 0 overflowE pint32 |> RFatalError "0b0001000000000000000000000000000000000000000000000000000000000000000" 0 overflowE pint32 |> RFatalError "-0b0001000000000000000000000000000000000000000000000000000000000000001" 0 overflowE pint32 |> RFatalError "-0b1000000000000000000000000000000000000000000000000000000000000001" 0 overflowE pint32 |> ROk "0b1111111111111111111111111111111|" System.Int32.MaxValue pint32 |> ROk "+0b001111111111111111111111111111111|" System.Int32.MaxValue pint32 |> ROk "-0b10000000000000000000000000000000|"System.Int32.MinValue pint32 |> ROk "-0B00010000000000000000000000000000000|" System.Int32.MinValue testPint32() let testPintOther() = let overflowInt32 = Errors.NumberOutsideOfInt32Range let overflowInt16 = Errors.NumberOutsideOfInt16Range let overflowInt8 = Errors.NumberOutsideOfInt8Range let overflowUInt32 = Errors.NumberOutsideOfUInt32Range let overflowUInt16 = Errors.NumberOutsideOfUInt16Range let overflowUInt8 = Errors.NumberOutsideOfUInt8Range puint32 |> RError "+0|" 0 Errors.ExpectedUInt32 puint32 |> RFatalError "4294967296|" 0 overflowUInt32 puint32 |> RFatalError "00004294967296|" 0 overflowUInt32 puint32 |> RFatalError "11111111111|" 0 overflowUInt32 puint32 |> ROk "0|" 0u puint32 |> ROk "000|" 0u puint32 |> ROk "1234567890|" 1234567890u puint32 |> ROk "0001234567890|" 1234567890u puint32 |> ROk "4294967295|" System.UInt32.MaxValue puint32 |> ROk "0004294967295|" System.UInt32.MaxValue pint32 |> RError "+|" 0 Errors.ExpectedInt32 pint32 |> RFatalError "2147483648|" 0 overflowInt32 pint32 |> RFatalError "-00002147483649|" 0 overflowInt32 pint32 |> RFatalError "11111111111|" 0 overflowInt32 pint32 |> ROk "0|" 0 pint32 |> ROk "+000|" 0 pint32 |> ROk "1234567890|" 1234567890 pint32 |> ROk "0001234567890|" 1234567890 pint32 |> ROk "2147483647|" System.Int32.MaxValue pint32 |> ROk "+0002147483647|" System.Int32.MaxValue pint32 |> ROk "-2147483648|" System.Int32.MinValue pint32 |> ROk "-0002147483648|" System.Int32.MinValue pint32 |> RFatalError "0x80000000|" 0 overflowInt32 pint32 |> ROk "0x7fffffff|" 0x7fffffff pint32 |> ROk "-0x80000000|" -0x80000000 pint32 |> RFatalError "-0x80000001|" 0 overflowInt32 puint32 |> RFatalError "0x100000000|" 0 overflowUInt32 puint32 |> ROk "0xffffffff|" 0xffffffffu pint16 |> RFatalError "0x8000|" 0 overflowInt16 pint16 |> ROk "0x7fff|" 0x7fffs pint16 |> ROk "-0x8000|" -0x8000s pint16 |> RFatalError "-0x8001|" 0 overflowInt16 puint16 |> RFatalError "0x10000|" 0 overflowUInt16 puint16 |> ROk "0xffff|" 0xffffus pint8 |> RFatalError "0x80|" 0 overflowInt8 pint8 |> ROk "0x7f|" 0x7fy pint8 |> ROk "-0x80|" -0x80y pint8 |> RFatalError "-0x81|" 0 overflowInt8 puint8 |> RFatalError "0x100|" 0 overflowUInt8 puint8 |> ROk "0xff|" 0xffuy testPintOther() let testFollowedBy() = notFollowedByEof |> ROk " " 0 () notFollowedByEof |> RError "" 0 Errors.UnexpectedEndOfInput followedByNewline |> RError "1" 0 Errors.ExpectedNewline followedByNewline |> RError " " 0 Errors.ExpectedNewline followedByNewline |> ROk "\r" 0 () followedByNewline |> ROk "\n" 0 () notFollowedByNewline |> ROk "1" 0 () notFollowedByNewline |> ROk " " 0 () notFollowedByNewline |> RError "\r" 0 Errors.UnexpectedNewline notFollowedByNewline |> RError "\n" 0 Errors.UnexpectedNewline followedByString "a" |> ROk "a" 0 () followedByString "a" |> RError "A" 0 (expectedString "a") followedByString "123" |> ROk "123" 0 () followedByString "123" |> RError "124" 0 (expectedString "123") notFollowedByString "a" |> ROk "A" 0 () notFollowedByString "a" |> RError "a" 0 (unexpectedString "a") notFollowedByString "123" |> ROk "124" 0 () notFollowedByString "123" |> RError "123" 0 (unexpectedString "123") try followedByString "13\r" |> ignore; Fail() with :? System.ArgumentException -> () try notFollowedByString "13\r" |> ignore; Fail() with :? System.ArgumentException -> () followedByStringCI "A" |> ROk "a" 0 () followedByStringCI "A" |> ROk "A" 0 () followedByStringCI "A" |> RError "B" 0 (expectedStringCI "A") followedByStringCI "aBc" |> ROk "AbC" 0 () followedByStringCI "aBc" |> RError "Abd" 0 (expectedStringCI "aBc") notFollowedByStringCI "A" |> ROk "B" 0 () notFollowedByStringCI "A" |> RError "a" 0 (unexpectedStringCI "A") notFollowedByStringCI "A" |> RError "A" 0 (unexpectedStringCI "A") notFollowedByStringCI "aBc" |> ROk "Abd" 0 () notFollowedByStringCI "aBc" |> RError "AbC" 0 (unexpectedStringCI "aBc") try followedByStringCI "13\r" |> ignore; Fail() with :? System.ArgumentException -> () try notFollowedByStringCI "13\r" |> ignore; Fail() with :? System.ArgumentException -> () let one chr = fun c -> if c = chr then true else Fail() let oneN chr = fun c -> if c = chr then false else Fail() let eos1 = fun c -> Fail() nextCharSatisfies (one '2') |> ROk "2" 0 () nextCharSatisfies (one '\n') |> ROk "\n" 0 () nextCharSatisfies (one '\n') |> ROk "\r\n" 0 () nextCharSatisfies (one '\n') |> ROk "\r" 0 () nextCharSatisfies eos1 |> RError "" 0 NoErrorMessages nextCharSatisfies (oneN '1') |> RError "1" 0 NoErrorMessages nextCharSatisfies (oneN '\n') |> RError "\r" 0 NoErrorMessages nextCharSatisfiesNot (oneN '2') |> ROk "2" 0 () nextCharSatisfiesNot (oneN '\n') |> ROk "\n" 0 () nextCharSatisfiesNot (oneN '\n') |> ROk "\r\n" 0 () nextCharSatisfiesNot (oneN '\n') |> ROk "\r" 0 () nextCharSatisfiesNot eos1 |> ROk "" 0 () nextCharSatisfiesNot (one '1') |> RError "1" 0 NoErrorMessages nextCharSatisfiesNot (one '\n') |> RError "\r" 0 NoErrorMessages let two (str: string) = fun c0 c1 -> if c0 = str[0] && c1 = str[1] then true else Fail() let twoN (str: string) = fun c0 c1 -> if c0 = str[0] && c1 = str[1] then false else Fail() let eos2 = fun c0 c1 -> Fail() next2CharsSatisfy (two "12") |> ROk "12" 0 () next2CharsSatisfy (two "\n2") |> ROk "\r2" 0 () next2CharsSatisfy (two "\n2") |> ROk "\r\n2" 0 () next2CharsSatisfy (two "\n2") |> ROk "\n2" 0 () next2CharsSatisfy (two "\n\n") |> ROk "\n\r" 0 () next2CharsSatisfy (two "\n\n") |> ROk "\r\r" 0 () next2CharsSatisfy (two "\n\n") |> ROk "\r\n\r" 0 () next2CharsSatisfy eos2 |> RError "" 0 NoErrorMessages next2CharsSatisfy eos2 |> RError "1" 0 NoErrorMessages next2CharsSatisfy eos2 |> RError "\r" 0 NoErrorMessages next2CharsSatisfy eos2 |> RError "\r\n" 0 NoErrorMessages next2CharsSatisfy eos2 |> RError "\n" 0 NoErrorMessages next2CharsSatisfy (twoN "13") |> RError "13" 0 NoErrorMessages next2CharsSatisfy (twoN "\n\t") |> RError "\n\t" 0 NoErrorMessages next2CharsSatisfy (twoN "\n\t") |> RError "\r\n\t" 0 NoErrorMessages next2CharsSatisfy (twoN "\n\t") |> RError "\r\t" 0 NoErrorMessages next2CharsSatisfyNot (twoN "12") |> ROk "12" 0 () next2CharsSatisfyNot (twoN "\n2") |> ROk "\r2" 0 () next2CharsSatisfyNot (twoN "\n2") |> ROk "\r\n2" 0 () next2CharsSatisfyNot (twoN "\n2") |> ROk "\n2" 0 () next2CharsSatisfyNot (twoN "\n\n") |> ROk "\n\r" 0 () next2CharsSatisfyNot (twoN "\n\n") |> ROk "\r\r" 0 () next2CharsSatisfyNot (twoN "\n\n") |> ROk "\r\n\r" 0 () next2CharsSatisfyNot eos2 |> ROk "" 0 () next2CharsSatisfyNot eos2 |> ROk "1" 0 () next2CharsSatisfyNot eos2 |> ROk "\r" 0 () next2CharsSatisfyNot eos2 |> ROk "\r\n" 0 () next2CharsSatisfyNot eos2 |> ROk "\n" 0 () next2CharsSatisfyNot (two "13") |> RError "13" 0 NoErrorMessages next2CharsSatisfyNot (two "\n\t") |> RError "\n\t" 0 NoErrorMessages next2CharsSatisfyNot (two "\n\t") |> RError "\r\n\t" 0 NoErrorMessages next2CharsSatisfyNot (two "\n\t") |> RError "\r\t" 0 NoErrorMessages anyChar >>. previousCharSatisfies (one '1') |> ROk "12" 1 () anyChar >>. previousCharSatisfies (one '\n') |> ROkNL "\n1" 1 () anyChar >>. previousCharSatisfies (one '\n') |> ROkNL "\r\n1" 2 () anyChar >>. previousCharSatisfies (one '\n') |> ROkNL "\r1" 1 () anyChar >>. previousCharSatisfies (oneN '0') |> RError "01" 1 NoErrorMessages previousCharSatisfies eos1 |> RError "1" 0 NoErrorMessages previousCharSatisfies eos1 |> RError "" 0 NoErrorMessages anyChar >>. previousCharSatisfiesNot (oneN '1') |> ROk "12" 1 () anyChar >>. previousCharSatisfiesNot (oneN '\n') |> ROkNL "\n1" 1 () anyChar >>. previousCharSatisfiesNot (oneN '\n') |> ROkNL "\r\n1" 2 () anyChar >>. previousCharSatisfiesNot (oneN '\n') |> ROkNL "\r1" 1 () anyChar >>. previousCharSatisfiesNot (one '0') |> RError "01" 1 NoErrorMessages previousCharSatisfiesNot eos1 |> ROk "1" 0 () previousCharSatisfiesNot eos1 |> ROk "" 0 () let testUserStateParsers() = use stream = new CharStream<_>("test") stream.UserState <- 1 let reply = getUserState stream reply.Status |> Equal Ok reply.Error |> Equal NoErrorMessages reply.Result |> Equal 1 let reply = setUserState 2 stream reply.Status |> Equal Ok reply.Error |> Equal NoErrorMessages stream.UserState |> Equal 2 let reply = updateUserState (fun i -> i + 1) stream reply.Status |> Equal Ok reply.Error |> Equal NoErrorMessages stream.UserState |> Equal 3 let reply = userStateSatisfies ((=) 3) stream reply.Status |> Equal Ok reply.Error |> Equal NoErrorMessages let reply = userStateSatisfies ((<>) 3) stream reply.Status |> Equal Error reply.Error |> Equal NoErrorMessages let run() = testCharParsers() testAnyNoneOf() testSpecialCharParsers() testStringParsers() testIdentifier() testManySatisfy() testMany() testSkipToString() testNumberParsers() testFollowedBy() testUserStateParsers() ================================================ FILE: Test/CharSetTests.fs ================================================ // Copyright (c) Stephan Tolksdorf 2008-2010 // License: Simplified BSD License. See accompanying documentation. module FParsec.Test.CharSetTests open FParsec.Test.Test let basicTests() = let test s (sin: string) (sout: string) = let cs = FParsec.CharSet(s) for c in sin do cs.Contains(c) |> True for c in sout do cs.Contains(c) |> False test "" "" "a\u0000\uffff" test "a" "a" "\u0000\uffff" test "\u0000\uffffa" "a\u0000\uffffa" "b\u0001\ufffe" test "\u0002\u0001\u0399\u0400\u0401\u0399\u0400\u0401\uffffabc123" "\u0002\u0001\u0399\u0400\u0401\uffffabc123" "\u0000\u0398\u0402\ufffed0" let moreTests() = let rand = new System.Random(12345) for j = 0 to 20000 do let n = rand.Next(1, 100) let cs = Array.zeroCreate n for i = 1 to n/2 do let r = rand.Next() cs[i*2 - 2] <- char r cs[i*2 - 1] <- char (r >>> 16) if n%2 = 1 then cs[cs.Length - 1] <- char (rand.Next()) let set = FParsec.CharSet(new string(cs)) Array.sortInPlace cs let mutable c_1 = '\uffff' let mutable c = cs[0] for i = 0 to n - 1 do set.Contains(c) |> True if c <> c_1 && int c - 1 <> int c_1 then set.Contains(char (int c - 1)) |> False if i + 1 < n then let c1 = cs[i + 1] if c < '\uffff' && c <> c1 && int c + 1 <> int c1 then set.Contains(char (int c + 1)) |> False c_1 <- c c <- c1 let run() = basicTests() moreTests() ================================================ FILE: Test/CharStreamTests.fs ================================================ // Copyright (c) Stephan Tolksdorf 2007-2011 // License: Simplified BSD License. See accompanying documentation. module FParsec.Test.CharStreamTests #nowarn "9" // "Uses of this construct may result in the generation of unverifiable .NET IL code." #nowarn "51" // "The address-of operator may result in non-verifiable code." open System.Text open System.Text.RegularExpressions open Microsoft.FSharp.NativeInterop open FParsec open FParsec.Test.Test let EOS = CharStream.EndOfStreamChar let testNonStreamConstructors() = let s = "1234567890" let cs = s.ToCharArray() let regex = new System.Text.RegularExpressions.Regex(".*") let testStream (stream: CharStream) (index: int) (length: int) (indexOffset: int64) (supportsRegex: bool) = stream.IndexOfFirstChar |> Equal indexOffset stream.Index |> Equal indexOffset stream.LineBegin |> Equal indexOffset stream.Line |> Equal 1L stream.Encoding |> Equal System.Text.Encoding.Unicode if length > 0 then stream.Peek() |> Equal s[index] stream.Skip(s.Substring(index, length)) |> True stream.Index |> Equal (indexOffset + int64 length) stream.IsEndOfStream |> Equal true stream.Seek(indexOffset) stream.Index |> Equal indexOffset if supportsRegex then stream.Match(regex).Value |> Equal (s.Substring(index, length)) else try stream.Match(regex).Value |> ignore; Fail() with :? System.NotSupportedException -> () else stream.Peek() |> Equal EOS stream.IsEndOfStream |> True let testStringStream() = use stream = new CharStream(s) testStream stream 0 s.Length 0L true use stream = new CharStream(s, 0, s.Length) testStream stream 0 s.Length 0L true use stream = new CharStream(s, 0, s.Length, 1000L) testStream stream 0 s.Length 1000L true use stream = new CharStream(s, 1, s.Length - 1) testStream stream 1 (s.Length - 1) 0L true use stream = new CharStream(s, 1, 1, 1000L) testStream stream 1 1 1000L true use stream = new CharStream(s, 1, 0, 1000L) testStream stream 1 0 1000L true try new CharStream((null: string), 1, 10) |> ignore; Fail() with :? System.ArgumentNullException -> () try new CharStream(s, -1, 1) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try new CharStream(s, 11, 0) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try new CharStream(s, 1, 10) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try new CharStream(s, 0, 10, -1L) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try new CharStream(s, 0, 10, (1L <<< 60)) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () testStringStream() #if LOW_TRUST #else let testCharArrayStream() = use stream = new CharStream(cs, 0, s.Length) testStream stream 0 s.Length 0L false use stream = new CharStream(cs, 0, s.Length, 1000L) testStream stream 0 s.Length 1000L false use stream = new CharStream(cs, 1, s.Length - 1) testStream stream 1 (s.Length - 1) 0L false use stream = new CharStream(cs, 1, 1, 1000L) testStream stream 1 1 1000L false use stream = new CharStream(cs, 1, 0, 1000L) testStream stream 1 0 1000L false try new CharStream((null: char[]), 1, 10) |> ignore; Fail() with :? System.ArgumentNullException -> () try new CharStream(cs, -1, 1) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try new CharStream(cs, 11, 0) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try new CharStream(cs, 1, 10) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try new CharStream(cs, 0, 10, -1L) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try new CharStream(cs, 0, 10, (1L <<< 60)) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () testCharArrayStream() let testCharPointerStream() = let handle = System.Runtime.InteropServices.GCHandle.Alloc(cs, System.Runtime.InteropServices.GCHandleType.Pinned) let cp = NativePtr.ofNativeInt (handle.AddrOfPinnedObject()) use stream = new CharStream(NativePtr.add cp 0, s.Length) testStream stream 0 s.Length 0L false use stream = new CharStream(NativePtr.add cp 0, s.Length, 1000L) testStream stream 0 s.Length 1000L false use stream = new CharStream(NativePtr.add cp 1, s.Length - 1) testStream stream 1 (s.Length - 1) 0L false use stream = new CharStream(NativePtr.add cp 1, 1, 1000L) testStream stream 1 1 1000L false use stream = new CharStream(NativePtr.add cp 1, 0, 1000L) testStream stream 1 0 1000L false try new CharStream(NativePtr.ofNativeInt 0n, 10) |> ignore; Fail() with :? System.ArgumentNullException -> () try new CharStream(cp, -1) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () if sizeof = 4 then try new CharStream(cp, System.Int32.MaxValue) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try new CharStream(cp, 10, -1L) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try new CharStream(cp, 10, (1L <<< 60)) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () handle.Free() () testCharPointerStream() #endif let testStreamConstructorArgumentChecking() = let encoding = System.Text.Encoding.UTF8 let str = "1234567890" let streamBytes = Array.append (encoding.GetPreamble()) (encoding.GetBytes(str)) use stream = new System.IO.MemoryStream(streamBytes) try new CharStream((null: System.IO.Stream), false, encoding) |> ignore; Fail() with :? System.ArgumentNullException -> () try new CharStream(stream, null) |> ignore; Fail() with :? System.ArgumentNullException -> () let tempFilePath = System.IO.Path.GetTempFileName() use nonReadableStream = new System.IO.FileStream(tempFilePath, System.IO.FileMode.Open, System.IO.FileAccess.Write) try new CharStream(nonReadableStream, encoding) |> ignore; Fail() with :? System.ArgumentException -> () nonReadableStream.Write(streamBytes, 0, streamBytes.Length) nonReadableStream.Dispose() try new CharStream((null: string), encoding) |> ignore; Fail() with :? System.ArgumentNullException -> () try new CharStream("", (null: System.Text.Encoding)) |> ignore; Fail() with :? System.ArgumentNullException -> () use charStream = new CharStream(tempFilePath, System.Text.Encoding.ASCII, true) charStream.Name |> Equal tempFilePath charStream.Read(str.Length + 1) |> Equal str charStream.Dispose() System.IO.File.Delete(tempFilePath) type CustomPreambleUTF8Encoding(preamble: byte[]) = inherit System.Text.UTF8Encoding() override t.GetPreamble() = preamble let testEncodingDetection() = let s = "1234567890" let iso8859_1 = System.Text.Encoding.GetEncoding(28591) // an encoding we can't detect let test (e: System.Text.Encoding) = let bs0 = e.GetPreamble() use cs0 = new CharStream(new System.IO.MemoryStream(bs0, false), iso8859_1); cs0.Encoding.CodePage |> Equal (e.CodePage) bs0[1] <- 33uy use cs0 = new CharStream(new System.IO.MemoryStream(bs0, false), iso8859_1); cs0.Encoding|> ReferenceEqual iso8859_1 let bs = Array.append (e.GetPreamble()) (e.GetBytes(s)) use cs = new CharStream(new System.IO.MemoryStream(bs, false), iso8859_1); cs.Encoding.CodePage |> Equal (e.CodePage) cs.Read(s.Length) |> Equal s use cs2 = new CharStream(new System.IO.MemoryStream(bs, false), e); cs2.Encoding |> ReferenceEqual e cs2.Read(s.Length) |> Equal s use cs3 = new CharStream(new System.IO.MemoryStream(bs, false), false, iso8859_1, false); cs3.Encoding |> ReferenceEqual iso8859_1 test (System.Text.UTF32Encoding(false, true)) test (System.Text.UTF32Encoding(true, true)) test (System.Text.UnicodeEncoding(false, true)) test (System.Text.UnicodeEncoding(true, true)) test (System.Text.UTF8Encoding(true)) let e = CustomPreambleUTF8Encoding([|0uy;1uy;2uy;3uy;4uy|]) let bs = Array.append (e.GetPreamble()) (e.GetBytes(s)) use cs = new CharStream(new System.IO.MemoryStream(bs, false), e); cs.Encoding.CodePage |> Equal (e.CodePage) cs.Read(s.Length) |> Equal s /// creates a CharStream with block size 8 and block overlap 3 let createMultiBlockTestStream byteStream encoding = new CharStream(byteStream, false, encoding, true, #if LOW_TRUST #else #if DISABLE_STREAM_BACKTRACKING_TESTS 128, 64, #else 8, 3, #endif #endif 16); let createMultiBlockUtf8TestStream (chars: char[]) = let e = System.Text.Encoding.UTF8 let bs = e.GetBytes(chars) createMultiBlockTestStream (new System.IO.MemoryStream(bs, false)) e type NonSeekableMemoryStream(bytes: byte[]) = inherit System.IO.MemoryStream(bytes) override t.Seek(offset, origin) = raise (System.NotSupportedException()) override t.CanSeek = false #if !LOW_TRUST [] type NonSerializableUTF8Decoder() = inherit System.Text.Decoder() let decoder = System.Text.Encoding.UTF8.GetDecoder() override t.GetCharCount(bytes: byte[], index: int, count: int) : int = raise (System.NotImplementedException()) override t.GetChars(bytes: byte[], byteIndex: int, byteCount: int, chars: char[], charIndex: int): int = raise (System.NotImplementedException()) override t.Reset() = decoder.Reset() override t.Convert(bytes, byteCount, chars, charCount, flush, bytesUsed: byref, charsUsed: byref, completed: byref) = decoder.Convert(bytes, byteCount, chars, charCount, flush, &bytesUsed, &charsUsed, &completed) interface System.Runtime.Serialization.ISerializable with member t.GetObjectData(info, context) = raise (System.NotSupportedException()) type UTF8EncodingWithNonSerializableDecoder() = inherit System.Text.UTF8Encoding() override t.GetDecoder() = new NonSerializableUTF8Decoder() :> System.Text.Decoder let testNonSeekableCharStreamHandling() = let str = "1234567890ABCDEFGHIJKLMNOPQ" let streamBytes = Array.append (System.Text.Encoding.UTF8.GetPreamble()) (System.Text.Encoding.UTF8.GetBytes(str)) let testNonSeekableByteStream() = let encoding = System.Text.Encoding.UTF8 use byteStream = new NonSeekableMemoryStream(streamBytes) use stream = createMultiBlockTestStream byteStream System.Text.Encoding.Unicode stream.Skip(9) try stream.Skip(-9) |> ignore Fail() with :? System.NotSupportedException as e -> () use byteStream2 = new NonSeekableMemoryStream(streamBytes[..(6 + 3)]) use stream2 = createMultiBlockTestStream byteStream2 System.Text.Encoding.Unicode stream2.Read(7) |> Equal str[..6] stream2.IsEndOfStream |> True testNonSeekableByteStream() let testNonSerializableEncoding() = let nsEncoding = UTF8EncodingWithNonSerializableDecoder() use byteStream = new System.IO.MemoryStream(streamBytes) use stream = createMultiBlockTestStream byteStream nsEncoding // seeking forward should work stream.Read(str.Length) |> Equal str stream.IsEndOfStream |> True // ... and backtracking to the first block should work too stream.SkipAndPeek(-str.Length) |> Equal str[0] stream.Seek(int64 str.Length - 1L) stream.Read() |> Equal str[str.Length - 1] stream.IsEndOfStream |> True // ... but backtracking to a block other than the first should fail try stream.Seek(8L) Fail() with :? System.NotSupportedException as e -> () testNonSerializableEncoding() #endif let testDecoderFallbackExceptionHandling() = let encoding = System.Text.Encoding.GetEncoding("utf-32", System.Text.EncoderFallback.ExceptionFallback, System.Text.DecoderFallback.ExceptionFallback) let getStreamBytes bytes = Array.concat [|[|0x00uy|]; encoding.GetPreamble(); bytes|] let test (byteStream: System.IO.Stream) multiBlock (position: int64) = try use stream = if not multiBlock then new CharStream<_>(byteStream, encoding) else createMultiBlockTestStream byteStream encoding stream.Read(int position + 4) |> ignore Fail() with :? System.Text.DecoderFallbackException as e -> unbox (e.Data["Stream.Position"]) |> Equal position let shortStreamBytes = getStreamBytes (encoding.GetBytes("123\u00005")) shortStreamBytes[1 + 4 + 3*4 + 1] <- 0xd8uy use shortByteStream = new System.IO.MemoryStream(shortStreamBytes) shortByteStream.ReadByte() |> ignore test shortByteStream false (int64 (1 + 4 + 3*4)) use nsShortByteStream = new NonSeekableMemoryStream(shortStreamBytes) nsShortByteStream.ReadByte() |> ignore test nsShortByteStream false (int64 ( 4 + 3*4)) let longStreamBytes = getStreamBytes (encoding.GetBytes("12345678901\u00003")) longStreamBytes[1 + 4 + 11*4 + 1] <- 0xd8uy use longByteStream = new System.IO.MemoryStream(longStreamBytes) longByteStream.ReadByte() |> ignore test longByteStream true (int64 (1 + 4 + 11*4)) use nsLongByteStream = new NonSeekableMemoryStream(longStreamBytes) nsLongByteStream.ReadByte() |> ignore test nsLongByteStream true (int64 ( 4 + 11*4)) let testEmptyStream (stream: CharStream<_>) = let index0 = stream.Index stream.IsBeginOfStream |> True stream.IsEndOfStream |> True stream.IndexOfFirstChar |> Equal index0 stream.IndexOfLastCharPlus1 |> Equal index0 stream.Seek(index0 + 1L); stream.Index |> Equal index0 stream.Peek() |> Equal EOS stream.Peek2() |> Equal (TwoChars(EOS, EOS)) stream.Peek(0) |> Equal EOS stream.Peek(1) |> Equal EOS stream.Peek(-1) |> Equal EOS stream.Peek(System.Int32.MaxValue) |> Equal EOS stream.Peek(System.Int32.MinValue) |> Equal EOS stream.Peek(0u) |> Equal EOS stream.Peek(1u) |> Equal EOS stream.Peek(System.UInt32.MaxValue) |> Equal EOS stream.PeekString(0) |> Equal "" stream.PeekString(1) |> Equal "" stream.PeekString(System.Int32.MaxValue) |> Equal "" let array = [|'x'|] stream.PeekString(array, 0, 1) |> Equal 0 array[0] |> Equal 'x' #if LOW_TRUST #else let handle = System.Runtime.InteropServices.GCHandle.Alloc(array, System.Runtime.InteropServices.GCHandleType.Pinned) let arrayPtr = NativePtr.ofNativeInt (handle.AddrOfPinnedObject()) stream.PeekString(arrayPtr, 1) |> Equal 0 array[0] |> Equal 'x' #endif stream.Read() |> Equal EOS; stream.Index |> Equal index0 stream.Read(0) |> Equal ""; stream.Index |> Equal index0 stream.Read(1) |> Equal ""; stream.Index |> Equal index0 stream.Read(System.Int32.MaxValue) |> Equal ""; stream.Index |> Equal index0 stream.Read(array, 0, 1) |> Equal 0; stream.Index |> Equal index0 #if LOW_TRUST #else stream.Read(arrayPtr, 1) |> Equal 0 array[0] |> Equal 'x'; stream.Index |> Equal index0 #endif stream.Match(EOS) |> False stream.Match("") |> True stream.Match("x") |> False stream.MatchCaseFolded(EOS) |> False stream.MatchCaseFolded("") |> True stream.MatchCaseFolded("x") |> False stream.Match([||],0,0) |> True stream.Match([|'x'|],0,1) |> False #if LOW_TRUST #else stream.Match(arrayPtr, 0) |> True stream.Match(arrayPtr, 1) |> False stream.MatchCaseFolded(arrayPtr, 0) |> True stream.MatchCaseFolded(arrayPtr, 1) |> False #endif stream.Match(Regex("x")).Success |> False stream.Skip(EOS) |> False; stream.Index |> Equal index0 stream.Skip("") |> True; stream.Index |> Equal index0 stream.Skip("x") |> False; stream.Index |> Equal index0 stream.Skip(EOS) |> False; stream.Index |> Equal index0 stream.Skip("") |> True; stream.Index |> Equal index0 stream.Skip("x") |> False; stream.Index |> Equal index0 stream.Skip([||], 0, 0) |> True; stream.Index |> Equal index0 stream.Skip([|'x'|], 0, 1) |> False; stream.Index |> Equal index0 #if LOW_TRUST #else stream.Skip(arrayPtr, 0) |> True; stream.Index |> Equal index0 stream.Skip(arrayPtr, 1) |> False; stream.Index |> Equal index0 stream.Skip(arrayPtr, 0) |> True; stream.Index |> Equal index0 stream.Skip(arrayPtr, 1) |> False; stream.Index |> Equal index0 #endif let tag = stream.StateTag stream.Skip(); stream.Index |> Equal index0 stream.StateTag |> Equal tag stream.Skip(0); stream.Index |> Equal index0 stream.Skip(1); stream.Index |> Equal index0 stream.Skip(System.Int32.MaxValue); stream.Index |> Equal index0 try stream.Skip(-1) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Skip(System.Int32.MinValue) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () stream.Skip(0L); stream.Index |> Equal index0 stream.Skip(1L); stream.Index |> Equal index0 stream.Skip(System.Int64.MaxValue); stream.Index |> Equal index0 try stream.Skip(-1L) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Skip(System.Int64.MinValue) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () stream.Skip(0u); stream.Index |> Equal index0 stream.Skip(1u); stream.Index |> Equal index0 stream.Skip(System.UInt32.MaxValue); stream.Index |> Equal index0 stream.SkipAndPeek(0) |> Equal EOS; stream.Index |> Equal index0 stream.SkipAndPeek(1) |> Equal EOS; stream.Index |> Equal index0 stream.SkipAndPeek(System.Int32.MaxValue) |> Equal EOS; stream.Index |> Equal index0 stream.SkipAndPeek(-1) |> Equal EOS; stream.Index |> Equal index0 stream.SkipAndPeek(System.Int32.MinValue) |> Equal EOS; stream.Index |> Equal index0 stream.SkipAndPeek(0u) |> Equal EOS; stream.Index |> Equal index0 stream.SkipAndPeek(1u) |> Equal EOS; stream.Index |> Equal index0 stream.SkipAndPeek(System.UInt32.MaxValue) |> Equal EOS; stream.Index |> Equal index0 let state = stream.State stream.ReadFrom(state, false) |> Equal "" let tag = stream.StateTag stream.SkipWhitespace() |> False; stream.Index |> Equal index0 stream.SkipUnicodeWhitespace() |> False; stream.Index |> Equal index0 stream.SkipNewline() |> False; stream.Index |> Equal index0 stream.SkipUnicodeNewline() |> False; stream.Index |> Equal index0 stream.SkipNewlineThenWhitespace(8, true) |> Equal -1; stream.Index |> Equal index0 stream.SkipRestOfLine(false); stream.Index |> Equal index0 stream.SkipRestOfLine(true); stream.Index |> Equal index0 stream.ReadRestOfLine(false) |> Equal ""; stream.Index |> Equal index0 stream.ReadRestOfLine(true) |> Equal ""; stream.Index |> Equal index0 stream.ReadCharOrNewline() |> Equal EOS stream.SkipCharsOrNewlines(1) |> Equal 0; stream.Index |> Equal index0 stream.SkipCharsOrNewlines(System.Int32.MaxValue) |> Equal 0; stream.Index |> Equal index0 stream.ReadCharsOrNewlines(1, false) |> Equal ""; stream.Index |> Equal index0 stream.ReadCharsOrNewlines(1, true) |> Equal ""; stream.Index |> Equal index0 stream.ReadCharsOrNewlines(System.Int32.MaxValue, false) |> Equal ""; stream.Index |> Equal index0 stream.ReadCharsOrNewlines(System.Int32.MaxValue, true) |> Equal ""; stream.Index |> Equal index0 stream.SkipCharsOrNewlinesWhile((fun c -> true)) |> Equal 0; stream.Index |> Equal index0 stream.SkipCharsOrNewlinesWhile((fun c -> true), 0, 1) |> Equal 0; stream.Index |> Equal index0 stream.SkipCharsOrNewlinesWhile((fun c -> true), 0, System.Int32.MaxValue) |> Equal 0; stream.Index |> Equal index0 stream.ReadCharsOrNewlinesWhile((fun c -> true), false) |> Equal ""; stream.Index |> Equal index0 stream.ReadCharsOrNewlinesWhile((fun c -> true), true) |> Equal ""; stream.Index |> Equal index0 stream.ReadCharsOrNewlinesWhile((fun c -> true), 0, 1, false) |> Equal ""; stream.Index |> Equal index0 stream.ReadCharsOrNewlinesWhile((fun c -> true), 0, 1, true) |> Equal ""; stream.Index |> Equal index0 stream.ReadCharsOrNewlinesWhile((fun c -> true), 0, System.Int32.MaxValue, false) |> Equal ""; stream.Index |> Equal index0 stream.ReadCharsOrNewlinesWhile((fun c -> true), 0, System.Int32.MaxValue, true) |> Equal ""; stream.Index |> Equal index0 let mutable b = false stream.SkipCharsOrNewlinesUntilString("1", 1, &b) |> Equal 0; stream.Index |> Equal index0 stream.SkipCharsOrNewlinesUntilString("1", System.Int32.MaxValue, &b) |> Equal 0; stream.Index |> Equal index0 stream.SkipCharsOrNewlinesUntilCaseFoldedString("1", 1, &b) |> Equal 0; stream.Index |> Equal index0 stream.SkipCharsOrNewlinesUntilCaseFoldedString("1", System.Int32.MaxValue, &b) |> Equal 0; stream.Index |> Equal index0 let mutable s = "" stream.SkipCharsOrNewlinesUntilString("1", 1, false, &s) |> Equal 0; stream.Index |> Equal index0 stream.SkipCharsOrNewlinesUntilString("1", System.Int32.MaxValue, false, &s) |> Equal 0; stream.Index |> Equal index0 stream.SkipCharsOrNewlinesUntilCaseFoldedString("1", 1, false, &s) |> Equal 0; stream.Index |> Equal index0 stream.SkipCharsOrNewlinesUntilCaseFoldedString("1", System.Int32.MaxValue, false, &s) |> Equal 0; stream.Index |> Equal index0 stream.StateTag |> Equal tag #if LOW_TRUST #else handle.Free() #endif let testBasicCharStreamMethods (stream: CharStream) (refString: string) blockSize blockOverlap minRegexSpace = let index0 = stream.IndexOfFirstChar let dollarString = new string('$', refString.Length) let N = refString.Length let state0 = stream.State let seekStreamTo (i: int) = stream.BacktrackTo(state0) stream.Skip(uint32 i) let testProperties() = stream.BacktrackTo(state0) let tag = stream.StateTag stream.Name <- "Name2" stream.Name |> Equal "Name2" stream.StateTag |> Equal (tag + _1) stream.UserState <- -333 stream.UserState |> Equal -333 stream.StateTag |> Equal (tag + _1 + _1) stream.Skip() stream.Column |> Equal (stream.Index - stream.LineBegin + 1L) stream.StateTag <- tag stream.StateTag |> Equal tag let l = stream.Line stream.SetLine_WithoutCheckAndWithoutIncrementingTheStateTag(l + 2L) stream.Line |> Equal 3L stream.StateTag |> Equal tag let lb = stream.LineBegin stream.SetLineBegin_WithoutCheckAndWithoutIncrementingTheStateTag(lb + 1L) stream.LineBegin |> Equal (lb + 1L) stream.StateTag |> Equal tag (stream.BlockOverlap >= 0) |> True let minRegexSpace = stream.MinRegexSpace if minRegexSpace <> 0 then stream.MinRegexSpace <- minRegexSpace - 1 stream.MinRegexSpace |> Equal (minRegexSpace - 1) try stream.MinRegexSpace <- -1; Fail() with :? System.ArgumentException -> () try stream.MinRegexSpace <- stream.BlockOverlap + 1; Fail() with :? System.ArgumentException -> () stream.MinRegexSpace <- minRegexSpace testProperties() let testRegisterNewlines() = stream.BacktrackTo(state0) let line0 = stream.Line let lineBegin0 = stream.LineBegin let tag0 = stream.StateTag stream.Skip() stream.RegisterNewline() stream.StateTag |> Equal (tag0 + _1 + _1) stream.Line |> Equal (line0 + 1L) stream.LineBegin |> Equal stream.Index stream.BacktrackTo(state0) stream.Skip(3) stream.RegisterNewlines(2, 1) stream.StateTag |> Equal (tag0 + _1 + _1) stream.Line |> Equal (line0 + 2L) stream.LineBegin |> Equal (stream.Index - 1L) stream.BacktrackTo(state0) stream.Skip(3) stream.RegisterNewlines(2L, 1L) stream.StateTag |> Equal (tag0 + _1 + _1) stream.Line |> Equal (line0 + 2L) stream.LineBegin |> Equal (stream.Index - 1L) testRegisterNewlines() let testMove i1 i2 = let tag1 = state0.Tag + _1 let tag2 = tag1 + _1 let index1 = index0 + int64 (min i1 N) let index2 = index0 + int64 (min i2 N) let c1 = if i1 < N then refString[i1] else EOS let c2 = if i2 < N then refString[i2] else EOS let d = i2 - min i1 N stream.BacktrackTo(state0) stream.Seek(index0 + int64 i1) stream.Index |> Equal index1 let indexToken1 = stream.IndexToken stream.StateTag |> Equal tag1 stream.Peek() |> Equal c1 stream.IsBeginOfStream |> Equal (i1 = 0) stream.IsEndOfStream |> Equal (i1 >= N) stream.Seek(index0 + int64 i2) stream.Index |> Equal index2 stream.StateTag |> Equal tag2 stream.Peek() |> Equal c2 stream.IsBeginOfStream |> Equal (i2 = 0) stream.IsEndOfStream |> Equal (i2 >= N) indexToken1.GetIndex(stream) |> Equal index1 stream.Seek(indexToken1) stream.Index |> Equal index1 stream.Peek() |> Equal c1 seekStreamTo i1 stream.Peek(d) |> Equal c2 stream.Index |> Equal index1 stream.StateTag |> Equal tag1 if d >= 0 then seekStreamTo i1 stream.Peek(uint32 d) |> Equal c2 stream.Index |> Equal index1 stream.StateTag |> Equal tag1 let checkStream() = stream.Index |> Equal index2 if index1 <> index2 then stream.StateTag |> Equal tag2 else (stream.StateTag = tag2 - _1 || stream.StateTag = tag2) |> True seekStreamTo i1 stream.Skip(d) checkStream() seekStreamTo i1 stream.Skip(int64 d) checkStream() seekStreamTo i1 stream.SkipAndPeek(d) |> Equal c2 checkStream() if d >= 0 then if d = 1 then seekStreamTo i1 stream.Skip() stream.Index |> Equal index2 if index2 <> index1 then stream.StateTag |> Equal tag2 else stream.StateTag |> Equal tag1 seekStreamTo i1 stream.SkipAndPeek() |> Equal c2 stream.Index |> Equal index2 if index2 <> index1 then stream.StateTag |> Equal tag2 else stream.StateTag |> Equal tag1 seekStreamTo i1 stream.Skip(uint32 d) checkStream() seekStreamTo i1 stream.SkipAndPeek(uint32 d) |> Equal c2 checkStream() elif i2 = 0 then // d <= 0 seekStreamTo i1 stream.Peek(d - 1) |> Equal EOS stream.Index |> Equal index1 stream.StateTag |> Equal tag1 stream.Peek(System.Int32.MinValue) |> Equal EOS stream.Index |> Equal index1 stream.StateTag |> Equal tag1 seekStreamTo i1 stream.SkipAndPeek(d - 1) |> Equal EOS checkStream() seekStreamTo i1 stream.SkipAndPeek(System.Int32.MinValue) |> Equal EOS checkStream() for i1 = 0 to N + 2 do for i2 = 0 to N + 2 do testMove i1 i2 let testMoveException() = let endIndex = index0 + int64 N stream.Seek(System.Int64.MaxValue) stream.Index |> Equal endIndex stream.IsEndOfStream |> True try stream.Seek(-1L) |> ignore; Fail () with :? System.ArgumentOutOfRangeException -> () try stream.Seek(index0 - 1L) |> ignore; Fail () with :? System.ArgumentOutOfRangeException -> () try stream.Seek(System.Int64.MinValue) |> ignore; Fail () with :? System.ArgumentOutOfRangeException -> () stream.Seek(index0) let indexToken = CharStreamIndexToken() try stream.Seek(indexToken) |> ignore; Fail() with :? System.ArgumentException -> () try indexToken.GetIndex(stream) |> ignore; Fail() with :? System.InvalidOperationException -> () let state = CharStreamState() try stream.BacktrackTo(state); Fail() with :? System.ArgumentException -> () try state.GetIndex(stream) |> ignore; Fail() with :? System.InvalidOperationException -> () try state.GetPosition(stream) |> ignore; Fail() with :? System.InvalidOperationException -> () try stream.ReadFrom(state, true) |> ignore; Fail() with :? System.ArgumentException -> () try stream.CreateSubstream(state) |> ignore; Fail() with :? System.ArgumentException -> () try state.IndexToken |> ignore; Fail() with :? System.InvalidOperationException -> () for i = 0 to N do seekStreamTo i; stream.Skip(System.Int32.MaxValue) stream.Index |> Equal endIndex seekStreamTo i; stream.Skip(System.UInt32.MaxValue) stream.Index |> Equal endIndex seekStreamTo i; stream.Skip(System.Int64.MaxValue) stream.Index |> Equal endIndex seekStreamTo i; stream.SkipAndPeek(System.Int32.MaxValue) |> Equal EOS stream.Index |> Equal endIndex seekStreamTo i; stream.SkipAndPeek(System.UInt32.MaxValue) |> Equal EOS stream.Index |> Equal endIndex seekStreamTo i; stream.Peek(System.Int32.MaxValue) |> Equal EOS seekStreamTo i; stream.Peek(System.UInt32.MaxValue) |> Equal EOS // MinValue behaviour is checked in testMove try seekStreamTo i; stream.Seek(index0 - 1L) |> ignore; Fail () with :? System.ArgumentOutOfRangeException -> () try seekStreamTo i; stream.Seek(-1L) |> ignore; Fail () with :? System.ArgumentOutOfRangeException -> () try seekStreamTo i; stream.Seek(System.Int64.MinValue) |> ignore; Fail () with :? System.ArgumentOutOfRangeException -> () try seekStreamTo i; stream.Skip(-i - 1) |> ignore; Fail () with :? System.ArgumentOutOfRangeException -> () try seekStreamTo i; stream.Skip(int64 (-i - 1)) |> ignore; Fail () with :? System.ArgumentOutOfRangeException -> () try seekStreamTo i; stream.Skip(System.Int32.MinValue) |> ignore; Fail () with :? System.ArgumentOutOfRangeException -> () try seekStreamTo i; stream.Skip(System.Int64.MinValue) |> ignore; Fail () with :? System.ArgumentOutOfRangeException -> () testMoveException() let regex = new Regex(".*", RegexOptions.Singleline) let testMatch i n = let test (str: string) (result: bool) = assert (str.Length = n) let strA = str.ToCharArray() let cfStr = FParsec.Text.FoldCase(str) let cfStrA = cfStr.ToCharArray() seekStreamTo i let tag1 = stream.StateTag let index1 = stream.Index let tag2 = tag1 + _1 let index2 = min (stream.Index + int64 n) stream.IndexOfLastCharPlus1 let checkStream charsAreSkipped = if not charsAreSkipped then stream.Index |> Equal index1 stream.StateTag |> Equal tag1 else stream.Index |> Equal index2 if n <> 0 then stream.StateTag |> Equal tag2 else (stream.StateTag = tag1 || stream.StateTag = tag2) |> True stream.Match(str) |> Equal result checkStream false seekStreamTo i stream.Skip(str) |> Equal result checkStream result seekStreamTo i stream.MatchCaseFolded(cfStr) |> Equal result checkStream false seekStreamTo i stream.SkipCaseFolded(cfStr) |> Equal result checkStream result seekStreamTo i stream.Match(strA, 0, n) |> Equal result checkStream false seekStreamTo i stream.Skip(strA, 0, n) |> Equal result checkStream result if n = 1 then seekStreamTo i stream.Match(str[0]) |> Equal result checkStream false seekStreamTo i stream.Skip(str[0]) |> Equal result checkStream result seekStreamTo i stream.MatchCaseFolded(cfStr[0]) |> Equal result checkStream false seekStreamTo i stream.SkipCaseFolded(cfStr[0]) |> Equal result checkStream result elif n = 2 then seekStreamTo i if stream.Skip(FParsec.TwoChars(str[0], str[1])) <> result then stream.Skip(FParsec.TwoChars(str[0], str[1])) |> Equal result //stream.Skip(FParsec.TwoChars(str[0], str[1])) |> Equal result checkStream result elif n > 1 then seekStreamTo i let restIsEqual = stream.Peek(n - 1) = str[n - 1] // str only differs in first or last char seekStreamTo (i + 1) let index11 = stream.Index stream.Match(strA, 1, n - 1) |> Equal restIsEqual stream.Index |> Equal index11 stream.StateTag |> Equal tag1 seekStreamTo (i + 1) stream.Skip(strA, 1, n - 1) |> Equal restIsEqual if restIsEqual then stream.Index |> Equal index2 stream.StateTag |> Equal tag2 else stream.Index |> Equal index11 stream.StateTag |> Equal tag1 #if LOW_TRUST #else if n > 0 then let handle = System.Runtime.InteropServices.GCHandle.Alloc(str, System.Runtime.InteropServices.GCHandleType.Pinned) let strPtr = NativePtr.ofNativeInt (handle.AddrOfPinnedObject()) let cfHandle = System.Runtime.InteropServices.GCHandle.Alloc(cfStr, System.Runtime.InteropServices.GCHandleType.Pinned) let cfStrPtr = NativePtr.ofNativeInt (cfHandle.AddrOfPinnedObject()) seekStreamTo i stream.Match(strPtr, n) |> Equal result checkStream false seekStreamTo i stream.Skip(strPtr, n) |> Equal result checkStream result seekStreamTo i stream.MatchCaseFolded(cfStrPtr, n) |> Equal result checkStream false seekStreamTo i stream.SkipCaseFolded(cfStrPtr, n) |> Equal result checkStream result handle.Free() cfHandle.Free() else let handle = System.Runtime.InteropServices.GCHandle.Alloc("$", System.Runtime.InteropServices.GCHandleType.Pinned) let ptr = NativePtr.ofNativeInt (handle.AddrOfPinnedObject()) let mutable c = '$' seekStreamTo i stream.Match(ptr, n) |> Equal true checkStream false seekStreamTo i stream.Skip(ptr, n) |> Equal true checkStream true seekStreamTo i stream.MatchCaseFolded(ptr, n) |> Equal true checkStream false seekStreamTo i stream.SkipCaseFolded(ptr, n) |> Equal true checkStream true handle.Free() #endif if n = 0 then test "" true elif i < N then let ci1 = char (int refString[i] + 1) if n > 0 && i + n <= N then test (refString.Substring(i, n)) true if n = 1 then test (ci1.ToString()) false else test (ci1.ToString() + refString.Substring(i + 1, n - 1)) false test (refString.Substring(i, n - 1) + ((char (int (refString[i + n - 1]) + 1)).ToString())) false else test (refString.Substring(i, N - i) + (new string(refString[N - 1], n - (N - i)))) false seekStreamTo i let index = stream.Index let tag = stream.StateTag let mstr = stream.Match(regex).Value stream.Index |> Equal index stream.StateTag |> Equal tag let minLength = if blockOverlap = 0 then N - i else min minRegexSpace (N - i) (mstr.Length >= minLength) |> True mstr |> Equal (refString.Substring(i, mstr.Length)) else let str = new string(refString[N - 1], n) test str false seekStreamTo i let index = stream.Index let tag = stream.StateTag stream.Match(regex).Value |> Equal "" stream.Index |> Equal index stream.StateTag |> Equal tag for i = 0 to N do for n = 0 to N + 15 - i do testMatch i n let testMatchException() = let str = "$$$" let a = str.ToCharArray() for i in [0; 1; N - 1; N] do seekStreamTo i try stream.Match(null: string) |> ignore; Fail() with :? System.NullReferenceException -> () try stream.Skip(null: string) |> ignore; Fail() with :? System.NullReferenceException -> () try stream.MatchCaseFolded(null) |> ignore; Fail() with :? System.NullReferenceException -> () try stream.SkipCaseFolded(null) |> ignore; Fail() with :? System.NullReferenceException -> () try stream.Match((null: char[]), 0, 0) |> ignore; Fail() with :? System.NullReferenceException -> () try stream.Match(a, 0, 4) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Match(a, 2, 2) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Match(a, 3, 1) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Match(a, -1, 0) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Match(a, 0, -1) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Match(a, 0, System.Int32.MinValue) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Match(a, System.Int32.MinValue, 0) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Match(a, System.Int32.MinValue, System.Int32.MinValue) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Match(a, System.Int32.MinValue, System.Int32.MaxValue) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () #if LOW_TRUST #else if i <> N then try stream.Match(NativePtr.ofNativeInt 0n, 1) |> ignore; Fail() with :? System.NullReferenceException -> () let handle = System.Runtime.InteropServices.GCHandle.Alloc([|'$'|], System.Runtime.InteropServices.GCHandleType.Pinned) let ptr = NativePtr.ofNativeInt (handle.AddrOfPinnedObject()) try stream.Match(ptr, -1) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Match(ptr, System.Int32.MinValue) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () if i <> N then try stream.MatchCaseFolded(NativePtr.ofNativeInt 0n, 1) |> ignore; Fail() with :? System.NullReferenceException -> () try stream.MatchCaseFolded(ptr, -1) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.MatchCaseFolded(ptr, System.Int32.MinValue) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () handle.Free() #endif try stream.Match(null: Regex) |> ignore; Fail() with :? System.NullReferenceException -> () testMatchException() let testRead i n = seekStreamTo i let tag1 = stream.StateTag let index1 = stream.Index let tag2 = tag1 + _1 let index2 = min (index1 + int64 n) stream.IndexOfLastCharPlus1 let str = if i < N then refString.Substring(i, min n (N - i)) else "" let checkStream charsAreSkipped = if not charsAreSkipped then stream.Index |> Equal index1 stream.StateTag |> Equal tag1 else stream.Index |> Equal index2 if str.Length <> 0 then stream.StateTag |> Equal tag2 else (stream.StateTag = tag1 || stream.StateTag = tag2) |> True stream.Read(n) |> Equal str checkStream true seekStreamTo i stream.PeekString(n) |> Equal str checkStream false seekStreamTo i stream.PeekString(n) |> Equal str checkStream false let cs = Array.create (N + 3) '$' seekStreamTo i stream.Read(cs, i%3, min n N) |> Equal str.Length checkStream true new string(cs, i%3, str.Length) |> Equal str for j = 0 to i%3 - 1 do cs[j] |> Equal '$' for j = i%3 + str.Length to N - 2 do cs[j] |> Equal '$' Array.fill cs 0 (N + 3) '$' seekStreamTo i stream.PeekString(cs, i%3, min n N) |> Equal str.Length checkStream false new string(cs, i%3, str.Length) |> Equal str for j = 0 to i%3 - 1 do cs[j] |> Equal '$' for j = i%3 + str.Length to N - 2 do cs[j] |> Equal '$' #if LOW_TRUST #else let handle = System.Runtime.InteropServices.GCHandle.Alloc(cs, System.Runtime.InteropServices.GCHandleType.Pinned) let ptr = NativePtr.ofNativeInt (handle.AddrOfPinnedObject()) Array.fill cs 0 (N + 3) '$' seekStreamTo i stream.Read(NativePtr.add ptr (i%3), min n N) |> Equal str.Length checkStream true new string(cs, i%3, str.Length) |> Equal str for j = 0 to i%3 - 1 do cs[j] |> Equal '$' for j = i%3 + str.Length to N - 2 do cs[j] |> Equal '$' Array.fill cs 0 (N + 3) '$' seekStreamTo i stream.PeekString(NativePtr.add ptr (i%3), min n N) |> Equal str.Length new string(cs, i%3, str.Length) |> Equal str for j = 0 to i%3 - 1 do cs[j] |> Equal '$' for j = i%3 + str.Length to N - 2 do cs[j] |> Equal '$' handle.Free() #endif if n = 1 then seekStreamTo i stream.Read() |> Equal (if str.Length > 0 then str[0] else EOS) checkStream true elif n = 2 then let c2 = new FParsec.TwoChars((if str.Length > 0 then str[0] else EOS), (if str.Length = 2 then str[1] else EOS)) seekStreamTo i stream.Peek2() |> Equal c2 checkStream false seekStreamTo i let indexToken = stream.IndexToken stream.Skip(n) stream.ReadFrom(indexToken) |> Equal str checkStream true seekStreamTo i let pos1 = stream.Position let state = stream.State stream.Skip(n) state.GetIndex(stream) |> Equal index1 state.IndexToken.GetIndex(stream) |> Equal index1 state.GetPosition(stream) |> Equal pos1 stream.ReadFrom(state, false) |> Equal str checkStream true for i = 0 to N do for n = 0 to N + 15 - i do testRead i n let testReadException() = for i in [0; 1; N - 1; N] do seekStreamTo i let str2 = if i < N then refString[i..] else "" try let str = stream.Read(System.Int32.MaxValue) str |> Equal str2 seekStreamTo i with :? System.OutOfMemoryException -> () try stream.Read(-1) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Read(System.Int32.MinValue) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try let str = stream.PeekString(System.Int32.MaxValue) str |> Equal str2 stream.Index |> Equal (index0 + int64 i) with :? System.OutOfMemoryException -> () try stream.PeekString(-1) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.PeekString(System.Int32.MinValue) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () let a = Array.create 3 '$' try stream.Read(null, 0, 1) |> ignore; Fail() with :? System.NullReferenceException -> () try stream.Read(a, 0, 4) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Read(a, 2, 2) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Read(a, 3, 1) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Read(a, -1, 0) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Read(a, 0, -1) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Read(a, 0, System.Int32.MinValue) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Read(a, System.Int32.MinValue, 0) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Read(a, System.Int32.MinValue, System.Int32.MinValue) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Read(a, System.Int32.MinValue, System.Int32.MaxValue) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () #if LOW_TRUST #else if i <> N then try stream.Read(NativePtr.ofNativeInt 0n, 1) |> ignore; Fail() with :? System.NullReferenceException -> () let handle = System.Runtime.InteropServices.GCHandle.Alloc([|'_'|], System.Runtime.InteropServices.GCHandleType.Pinned) let ptr = NativePtr.ofNativeInt (handle.AddrOfPinnedObject()) let mutable c = '_' try stream.Read(ptr, -1) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.Read(ptr, System.Int32.MinValue) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () handle.Free() #endif let mutable indexToken = CharStreamIndexToken() try stream.ReadFrom(indexToken) |> ignore; Fail() with :? System.ArgumentException -> () let mutable state = CharStreamState<_>() try stream.ReadFrom(&state, false) |> ignore; Fail() with :? System.ArgumentException -> () if not stream.IsEndOfStream then stream.Skip() state <- stream.State indexToken <- stream.IndexToken stream.Skip(-1) try stream.ReadFrom(indexToken) |> ignore; Fail() with :? System.ArgumentException -> () try stream.ReadFrom(&state, false) |> ignore; Fail() with :? System.ArgumentException -> () testReadException() /// Cross verify the CharStream string wrapper version against the normal stream /// version. This is done by generating a random string and then checking /// randomly generated access sequences on CharStream instances with random parameters. let xTest() = let rand = System.Random(43563456) let generateRandomUnicodeChars size = let cs = Array.zeroCreate size let mutable i = 0 while i < cs.Length do let r = rand.Next() // see http://www.unicode.org/Public/UNIDATA/Blocks.txt if r &&& 0xffff < 0xfffe then if r < (System.Int32.MaxValue/3)*2 then // generate a char from the BMP with about a prob. of 2/3 let c = r % 0xffff if (c < 0xd800 || c > 0xdfff) then cs[i] <- char c i <- i + 1 else let c_ = 0x10000 + (r % 0x25000) let c = if c_ < 0x30000 then c_ else 0xe0000 ||| (c_ &&& 0xfff) let v = c - 0x10000 let h = char (0xd800 ||| (c >>> 10)) let l = char (0xdc00 ||| (c &&& 0x3ff)) if i + 1 < cs.Length then cs[i] <- h cs[i + 1] <- l i <- i + 2 cs let maxBlockSize = 100 // extremely small size for testing purposes only let maxReadSize = 120 let maxJumpSize = 200 let readBuffer = Array.create maxReadSize '_' let utf8 = System.Text.Encoding.GetEncoding(65001, System.Text.EncoderFallback.ReplacementFallback, new System.Text.DecoderReplacementFallback("XYZ")) // The handling of invalid input is buggy in the .NET decoder routines for UTF16 and GB18030, // so we can only use them for valid input (and we use the ExceptionFallback to verify that the input is valid). let utf16 = System.Text.Encoding.GetEncoding(1200, System.Text.EncoderFallback.ExceptionFallback, System.Text.DecoderFallback.ExceptionFallback) // utf16 litte endian let gb18030 = System.Text.Encoding.GetEncoding(54936, System.Text.EncoderFallback.ExceptionFallback, System.Text.DecoderFallback.ExceptionFallback) for j = 1 to 50 do let encoding, bytes, chars = match rand.Next()%4 with // 0 = utf8, 1 = utf16, 2-3 = gb18030 | 0 -> let bytes = Array.zeroCreate (1 <<< 16) rand.NextBytes(bytes) let chars = utf8.GetChars(bytes) utf8, bytes, chars | r -> let encoding = utf16//if r = 1 then utf16 else gb18030 let chars = generateRandomUnicodeChars (1 <<< 17) let bytes = encoding.GetBytes(chars) encoding, bytes, chars #if LOW_TRUST let chars = new string(chars) #endif use stringStream = new CharStream(chars, 0, chars.Length) let blockSize = 16 + rand.Next(maxBlockSize - 16) let maxCharsForOneByte = encoding.GetMaxCharCount(1) let blockOverlap = maxCharsForOneByte + rand.Next(blockSize/2 - maxCharsForOneByte) let byteBufferLength = 8 + rand.Next(maxBlockSize) let blockSizeMinusOverlap = blockSize - blockOverlap use charStream = new CharStream(new System.IO.MemoryStream(bytes), false, encoding, true, #if LOW_TRUST #else #if DISABLE_STREAM_BACKTRACKING_TESTS chars.Length, blockOverlap, #else blockSize, blockOverlap, #endif #endif byteBufferLength) if j%10 = 1 then charStream.Seek(0L) stringStream.Seek(0L) for i = 1 to chars.Length do Equal (stringStream.Read()) (charStream.Read()) if i%blockSizeMinusOverlap = blockOverlap && i >= blockSize then Equal (stringStream.Peek(-blockSize)) (charStream.Peek(-blockSize)) let mutable index = int32 (rand.Next(chars.Length)) charStream.Seek(int64 index) stringStream.Seek(int64 index) // a random walk with jumps... let mutable i = 0; let mutable resetCounter = 0 while i < 10000 do // biased towards jumping backwards let jumpSize = rand.Next(maxJumpSize) * if rand.Next(4) = 0 then 1 else -1 index <- index + jumpSize if 0 < index && index < chars.Length then charStream.Skip(jumpSize) stringStream.Skip(jumpSize) else resetCounter <- resetCounter + 1 index <- rand.Next(chars.Length) charStream.Seek(int64 index) stringStream.Seek(int64 index) let mutable doContinue = true while doContinue do if i % 500 = 0 then index <- rand.Next(chars.Length) charStream.Seek(int64 index) stringStream.Seek(int64 index) charStream.PeekString(3) |> Equal (stringStream.PeekString(3)) stringStream.Index |> Equal (int64 index) charStream.Index |> Equal (int64 index) match rand.Next(4) with | 0 -> let c = stringStream.Read() if c <> EOS || not stringStream.IsEndOfStream then charStream.Skip(c) |> True index <- index + 1 elif index >= 7 then charStream.Peek(-7) |> Equal (stringStream.Peek(-7)) | 1 -> let n = 1 + rand.Next(maxReadSize) let str = stringStream.Read(n) charStream.Skip(str) |> True index <- index + str.Length | 2 -> let n = 1 + rand.Next(maxReadSize) let str = stringStream.Read(n) let cfStr = FParsec.Text.FoldCase(str) charStream.SkipCaseFolded(cfStr) |> True index <- index + str.Length | _ -> let n = 1 + rand.Next(maxReadSize) let str = charStream.Read(n) stringStream.Skip(str) |> True index <- index + str.Length doContinue <- rand.Next(4) <> 0 // jump every 4th iteration on average i <- i + 1 let testSkipWhitespace() = // check fast path let testChars = [|'\t'; '\n'; '\r'; ' '; '\u0008'; '\f'; '\u0021'; |] // returns with the stream state unchanged let checkSkipWhitespace (cs: char[]) iBegin (stream: CharStream<_>) = let mutable line = 1 let mutable lineBegin = 0 let mutable i = iBegin let mutable indentation = System.Int32.MinValue let mutable containsFormFeed = false let mutable fLine = line let mutable fLineBegin = lineBegin let mutable fI = i let mutable fIndentation = indentation let tabStopDistance = 8 while i < cs.Length && (match cs[i] with | ' ' -> indentation <- indentation + 1 true | '\t' -> indentation <- indentation + (tabStopDistance - indentation%tabStopDistance) true | '\r' -> line <- line + 1; lineBegin <- i + 1; indentation <- 0 true | '\n' -> if i = iBegin || cs[i - 1] <> '\r' then line <- line + 1 lineBegin <- i + 1 indentation <- 0 true | '\f' -> if not containsFormFeed then containsFormFeed <- true fLine <- line fLineBegin <- lineBegin fI <- i fIndentation <- indentation indentation <- 0 true | _ -> false) do i <- i + 1 if not containsFormFeed then fLine <- line fLineBegin <- lineBegin fI <- i fIndentation <- indentation let state0 = stream.State let index0 = stream.Index let indexOffset = int32 index0 - iBegin let tag = if i <> iBegin then state0.Tag + _1 else state0.Tag let fTag = if fI <> iBegin then state0.Tag + _1 else state0.Tag let index = int64 ( i + indexOffset) let fIndex = int64 (fI + indexOffset) let lineBegin = if line <> 1 then int64 ( lineBegin + indexOffset) else state0.LineBegin let fLineBegin = if fLine <> 1 then int64 (fLineBegin + indexOffset) else state0.LineBegin let line = int64 line let fLine = int64 fLine try stream.SkipWhitespace() |> Equal (fI <> iBegin) stream.StateTag |> Equal fTag stream.Index |> Equal fIndex stream.Line |> Equal fLine stream.LineBegin |> Equal fLineBegin stream.Seek(stream.IndexOfFirstChar); stream.BacktrackTo(state0) with :? TestFailed -> stream.Seek(stream.IndexOfFirstChar); stream.BacktrackTo(state0) stream.SkipWhitespace() |> Equal (fI <> iBegin) stream.SkipUnicodeWhitespace() |> Equal (i <> iBegin) stream.StateTag |> Equal tag stream.Index |> Equal index stream.Line |> Equal line stream.LineBegin |> Equal lineBegin stream.Seek(stream.IndexOfFirstChar); stream.BacktrackTo(state0) let mutable ind = -3 if cs[iBegin] = '\r' || cs[iBegin] = '\n' then stream.SkipNewlineThenWhitespace(tabStopDistance, false) |> Equal fIndentation stream.StateTag |> Equal fTag stream.Index |> Equal fIndex stream.Line |> Equal fLine stream.LineBegin |> Equal fLineBegin stream.Seek(stream.IndexOfFirstChar); stream.BacktrackTo(state0) stream.SkipNewlineThenWhitespace(tabStopDistance, true) |> Equal indentation stream.StateTag |> Equal tag stream.Index |> Equal index stream.Line |> Equal line stream.LineBegin |> Equal lineBegin stream.Seek(stream.IndexOfFirstChar); stream.BacktrackTo(state0) else stream.SkipNewlineThenWhitespace(tabStopDistance, true) |> Equal -1 stream.StateTag |> Equal state0.Tag stream.Index |> Equal index0 stream.Line |> Equal state0.Line stream.LineBegin |> Equal state0.LineBegin stream.Seek(stream.IndexOfFirstChar); stream.BacktrackTo(state0) let testFastPath() = let cs = Array.create 11 '_' #if LOW_TRUST #else use stream = new CharStream(cs, 1, 10, 100L) #endif for c1 in testChars do cs[1] <- c1 for c2 in testChars do cs[2] <- c2 for c3 in testChars do cs[3] <- c3 for c4 in testChars do cs[4] <- c4 for c5 in testChars do cs[5] <- c5 for c6 in testChars do cs[6] <- c6 for c7 in testChars do cs[7] <- c7 #if LOW_TRUST let stream = new CharStream(new string(cs), 1, 10, 100L) #endif checkSkipWhitespace cs 1 stream // check end of block/stream handling for c7 in testChars do cs[7] <- c7 for c8 in testChars do cs[8] <- c8 for c9 in testChars do cs[9] <- c9 for c10 in testChars do cs[10] <- c10 #if LOW_TRUST let stream = new CharStream(new string(cs), 1, 10, 100L) #else stream.Seek(stream.IndexOfFirstChar) #endif stream.Skip(6) checkSkipWhitespace cs 7 stream stream.Skip(1) checkSkipWhitespace cs 8 stream stream.Skip(1) checkSkipWhitespace cs 9 stream stream.Skip(1) checkSkipWhitespace cs 10 stream #if LOW_TRUST let stream = new CharStream(new string(cs), 1, 10, 100L) #else stream.Seek(stream.IndexOfFirstChar) #endif stream.Skip(10) let tag = stream.StateTag stream.SkipWhitespace() |> False stream.StateTag |> Equal tag stream.Index |> Equal (stream.IndexOfFirstChar + 10L) stream.Line |> Equal 1L stream.LineBegin |> Equal stream.IndexOfFirstChar let testSlowPath() = let cs = Array.create 17 '_' // check end of block handling with multi-block CharStream (blockSize = 8, blockOverlap = 3) for c6 in testChars do cs[6] <- c6 for c7 in testChars do cs[7] <- c7 for c8 in testChars do cs[8] <- c8 for c9 in testChars do cs[9] <- c9 for c10 in testChars do cs[10] <- c10 use stream = createMultiBlockUtf8TestStream cs stream.Skip(6) checkSkipWhitespace cs 6 stream // will start in the fast path stream.Skip(1) checkSkipWhitespace cs 7 stream // will start in the slow path testFastPath() testSlowPath() let testSkipUnicodeWhitespace() = // We've already tested the the basic skipping logic in testSkipWhitespace. // Here we only test that the additional newline chars are correctly recognized. let testChars = [|'\u0008'; '\t'; '\n'; '\u000B'; '\f'; '\r'; ' '; '\u0021'; '\u0085'; '\u200a'; '\u2028'; '\u2029'; '\u205f'|] let checkSkipUnicodeWhitespace (cs: char[]) iBegin (stream: CharStream<_>) = let mutable line = 1 let mutable lineBegin = 0 let mutable i = iBegin while i < cs.Length && (match cs[i] with | '\r' | '\u0085' | '\u2028' | '\u2029' -> line <- line + 1; lineBegin <- i + 1; true | '\n' -> if i = iBegin || cs[i - 1] <> '\r' then line <- line + 1 lineBegin <- i + 1 true | c -> System.Char.IsWhiteSpace(c)) do i <- i + 1 let state0 = stream.State let index0 = stream.Index let indexOffset = int32 index0 - iBegin let tag = if i <> iBegin then state0.Tag + _1 else state0.Tag let index = int64 (i + indexOffset) let lineBegin = if line <> 1 then int64 (lineBegin + indexOffset) else state0.LineBegin let line = int64 line + (state0.Line - 1L) stream.SkipUnicodeWhitespace() |> Equal (i <> iBegin) stream.StateTag |> Equal tag stream.Index |> Equal index stream.Line |> Equal line stream.LineBegin |> Equal lineBegin stream.Seek(stream.IndexOfFirstChar); stream.BacktrackTo(state0) let isNewline c = match c with |'\n' | '\r' | '\u0085' | '\u2028' | '\u2029' -> true | _ -> false if iBegin + 2 >= i then if iBegin < cs.Length && isNewline cs[iBegin] then let index = if cs[iBegin] = '\r' && iBegin + 1 < cs.Length && cs[iBegin + 1] = '\n' then index0 + 2L else index0 + 1L stream.SkipUnicodeNewline() |> True stream.Index |> Equal index stream.StateTag |> Equal tag stream.Line |> Equal (state0.Line + 1L) stream.LineBegin |> Equal index stream.Seek(stream.IndexOfFirstChar); stream.BacktrackTo(state0) else stream.SkipUnicodeNewline() |> False stream.Index |> Equal index0 stream.StateTag |> Equal state0.Tag stream.Line |> Equal state0.Line stream.LineBegin |> Equal state0.LineBegin let testFastPath() = let cs = Array.create 11 '_' #if LOW_TRUST #else use stream = new CharStream(cs, 1, 10, 100L) #endif for c1 in testChars do cs[1] <- c1 for c2 in testChars do cs[2] <- c2 for c3 in testChars do cs[3] <- c3 for c4 in testChars do cs[4] <- c4 for c5 in testChars do cs[5] <- c5 #if LOW_TRUST let stream = new CharStream(new string(cs), 1, 10, 100L) #endif checkSkipUnicodeWhitespace cs 1 stream let testSlowPath() = let cs = Array.create 17 '_' for c7 in testChars do cs[7] <- c7 for c8 in testChars do cs[8] <- c8 for c9 in testChars do cs[9] <- c9 for c10 in testChars do cs[10] <- c10 use stream = createMultiBlockUtf8TestStream cs stream.Skip(7) checkSkipUnicodeWhitespace cs 7 stream testFastPath() testSlowPath() let testSkipNewlineWhitespace() = // Most of the testing for SkipNewlineWhitespace is done in testSkipWhitespace, // here we mostly test the error handling. let testArgumentChecking() = use stream = new CharStream("\r\n ") let state0 = stream.State let mutable indentation = 0 try stream.SkipNewlineThenWhitespace(0, true) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.SkipNewlineThenWhitespace(-1, true) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.SkipNewlineThenWhitespace(-8, true) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.SkipNewlineThenWhitespace(System.Int32.MinValue, true) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try stream.SkipNewlineThenWhitespace(7, true) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () let testIndentationComputation() = // some additional checks complementing those in testSkipWhitespace use stream = new CharStream("\r\n\t \t \t \t ") let state0 = stream.State stream.SkipNewlineThenWhitespace(1, true) |> Equal 11 stream.BacktrackTo(state0) stream.SkipNewlineThenWhitespace(2, true) |> Equal 13 stream.BacktrackTo(state0) stream.SkipNewlineThenWhitespace(4, true) |> Equal 17 let testIndentationOverflowHandling() = let mutable indentation = 0 let test (str: string) (tabStopDistance: int) (index: int) (indentation: int) = use stream = new CharStream(str) stream.SkipNewlineThenWhitespace(tabStopDistance, true) |> Equal indentation stream.Index |> Equal (int64 index) test "\r\n\t" (1 <<< 30) 3 (1 <<< 30) test "\r\n\t " (1 <<< 30) 4 ((1 <<< 30) + 1) test "\r\n\t\t" (1 <<< 30) 3 (1 <<< 30) test "\r\n\t \t" (1 <<< 30) 4 ((1 <<< 30) + 1) test "\r\n\t\t " (1 <<< 30) 3 (1 <<< 30) test "\r\n\t \t " (1 <<< 30) 4 ((1 <<< 30) + 1) let cs = Array.zeroCreate (1 + (1 <<< 15) + (1 <<< 16)) // 1 + 2^15 + 2^16 cs[0] <- '\n' // (2^15 - 1)*2^16 + 2^16 = 2^31 for i = 1 to (1 <<< 15) - 1 do cs[i] <- '\t' for i = (1 <<< 15) to ((1 <<< 15)) + (1 <<< 16) do cs[i] <- ' ' use stream = new CharStream(new string(cs)) stream.SkipNewlineThenWhitespace((1 <<< 16), true) |> Equal System.Int32.MaxValue stream.Index |> Equal (int64 (cs.Length - 2)) stream.Read(3) |> Equal " " // test slow path let e = System.Text.Encoding.Unicode let bs = e.GetBytes(cs) use stream = new CharStream(new System.IO.MemoryStream(bs, false), false, e, false, #if LOW_TRUST #else 6144, 2048, #endif 2048) stream.SkipNewlineThenWhitespace((1 <<< 16), true) |> Equal System.Int32.MaxValue stream.Index |> Equal (int64 (cs.Length - 2)) stream.Read(3) |> Equal " " testArgumentChecking() testIndentationComputation() testIndentationOverflowHandling() let testSkipRestOfLine() = let testChars = [|'\n'; '\r'; '\t'; '\u000C'; '\u000E'|] let checkSkipRestOfLine (cs: char[]) iBegin (stream: CharStream<_>) = let state0 = stream.State let indexOffset = stream.Index - int64 iBegin let mutable i = iBegin while i < cs.Length && (cs[i] <> '\r' && cs[i] <> '\n') do i <- i + 1 let index = indexOffset + int64 i let tag = if i <> iBegin then state0.Tag + _1 else state0.Tag let str = new string(cs, iBegin, i - iBegin) stream.SkipRestOfLine(false) stream.Index |> Equal index stream.StateTag |> Equal tag stream.Line |> Equal state0.Line stream.LineBegin |> Equal state0.LineBegin stream.Seek(stream.IndexOfFirstChar); stream.BacktrackTo(state0) stream.ReadRestOfLine(false) |> Equal str stream.Index |> Equal index stream.StateTag |> Equal tag stream.Line |> Equal state0.Line stream.LineBegin |> Equal state0.LineBegin stream.Seek(stream.IndexOfFirstChar); stream.BacktrackTo(state0) let mutable line2 = state0.Line if i < cs.Length then let c = cs[i] if c = '\r' || c = '\n' then i <- i + if c = '\r' && i + 1 < cs.Length && cs[i + 1] = '\n' then 2 else 1 line2 <- line2 + 1L let index2 = indexOffset + int64 i let tag2 = if i <> iBegin then state0.Tag + _1 else state0.Tag let lineBegin2 = if line2 <> state0.Line then index2 else state0.LineBegin try stream.SkipRestOfLine(true) stream.Index |> Equal index2 stream.StateTag |> Equal tag2 stream.Line |> Equal line2 stream.LineBegin |> Equal lineBegin2 stream.Seek(stream.IndexOfFirstChar); stream.BacktrackTo(state0) with TestFailed str -> stream.Seek(stream.IndexOfFirstChar); stream.BacktrackTo(state0) stream.SkipRestOfLine(true) stream.Index |> Equal index2 stream.ReadRestOfLine(true) |> Equal str stream.Index |> Equal index2 stream.StateTag |> Equal tag2 stream.Line |> Equal line2 stream.LineBegin |> Equal lineBegin2 stream.Seek(stream.IndexOfFirstChar); stream.BacktrackTo(state0) let testFastPath() = let cs = Array.create 8 '_' #if LOW_TRUST #else use stream = new CharStream(cs, 1, 7, 100L) #endif for c1 in testChars do cs[1] <- c1 for c2 in testChars do cs[2] <- c2 for c3 in testChars do cs[3] <- c3 for c4 in testChars do cs[4] <- c4 for c5 in testChars do cs[5] <- c5 #if LOW_TRUST use stream = new CharStream(new string(cs), 1, 7, 100L) #endif checkSkipRestOfLine cs 1 stream // check end of block/stream handling for c5 in testChars do cs[5] <- c5 for c6 in testChars do cs[6] <- c6 for c7 in testChars do cs[7] <- c7 #if LOW_TRUST use stream = new CharStream(new string(cs), 1, 7, 100L) #else stream.Seek(stream.IndexOfFirstChar) #endif stream.Skip(4) checkSkipRestOfLine cs 5 stream stream.Skip(1) checkSkipRestOfLine cs 6 stream stream.Skip(1) checkSkipRestOfLine cs 7 stream #if LOW_TRUST use stream = new CharStream(new string(cs), 1, 7, 100L) #else stream.Seek(stream.IndexOfFirstChar) #endif stream.Skip(7) checkSkipRestOfLine cs 8 stream let testSlowPath() = let cs = Array.create 17 '_' // check end of block handling with multi-block CharStream (blockSize = 8, blockOverlap = 3) for c5 in testChars do cs[5] <- c5 for c6 in testChars do cs[6] <- c6 for c7 in testChars do cs[7] <- c7 for c8 in testChars do cs[8] <- c8 use stream = createMultiBlockUtf8TestStream cs let s5 = stream.Skip(5) checkSkipRestOfLine cs 5 stream // will start in the fast path stream.Skip() checkSkipRestOfLine cs 6 stream // will start in the slow path testFastPath() testSlowPath() let testSkipCharsOrNewlines() = let mutable counter = 0 let check (stream: CharStream<_>) (cs: char[]) iBegin nMax = let state0 = stream.State let tag0 = state0.Tag let line0 = state0.Line let lineBegin0 = state0.LineBegin let index0 = stream.Index let indexOffset = int32 index0 - iBegin let lineOffset = int32 state0.Line - 1 let alwaysTrue = fun (c: char) -> true let alwaysFalse = fun (c: char) -> false let mutable nTrueN = 0 let nTrue = fun (c: char) -> if nTrueN > 0 then nTrueN <- nTrueN - 1; true else false for n = 0 to nMax do counter <- counter + 1 let mutable line = 1 let mutable lineBegin = 0 let mutable i = iBegin let mutable c = 0 while c < n && i < cs.Length do match cs[i] with | '\r' | '\n' -> i <- i + if cs[i] = '\r' && i + 1 < cs.Length && cs[i + 1] = '\n' then 2 else 1 line <- line + 1 lineBegin <- i | _ -> i <- i + 1 c <- c + 1 let consumed = c <> 0 let tag = if consumed then tag0 + _1 else tag0 let index = int64 (i + indexOffset) let containsNewline = line <> 1 let lineBegin = if containsNewline then int64 (lineBegin + indexOffset) else lineBegin0 let line = int64 (line + lineOffset) let checkStreamAndReset() = // this function needs to be fast stream.Index |> Equal index stream.StateTag |> Equal tag stream.Line |> Equal line stream.LineBegin |> Equal lineBegin stream.Seek(stream.IndexOfFirstChar); stream.BacktrackTo(state0) let str = new string(cs, iBegin, i - iBegin) let normalizedStr = FParsec.Text.NormalizeNewlines(str) if n = 1 then stream.ReadCharOrNewline() |> Equal (if c = 0 then EOS else normalizedStr[0]) checkStreamAndReset() stream.SkipNewline() |> Equal containsNewline if containsNewline then checkStreamAndReset() else stream.Index |> Equal index0 stream.StateTag |> Equal tag0 stream.Line |> Equal line0 stream.LineBegin |> Equal lineBegin0 stream.SkipUnicodeNewline() |> Equal containsNewline if containsNewline then checkStreamAndReset() else stream.Index |> Equal index0 stream.StateTag |> Equal tag0 stream.Line |> Equal line0 stream.LineBegin |> Equal lineBegin0 stream.SkipCharsOrNewlinesWhile(alwaysTrue, alwaysFalse) |> Equal c checkStreamAndReset() stream.ReadCharsOrNewlinesWhile(alwaysTrue, alwaysFalse, false) |> Equal str checkStreamAndReset() stream.ReadCharsOrNewlinesWhile(alwaysTrue, alwaysFalse, true) |> Equal normalizedStr checkStreamAndReset() stream.SkipCharsOrNewlinesWhile(alwaysTrue, alwaysFalse, 0, System.Int32.MaxValue) |> Equal c checkStreamAndReset() stream.ReadCharsOrNewlinesWhile(alwaysTrue, alwaysFalse, 0, System.Int32.MaxValue, false) |> Equal str checkStreamAndReset() stream.ReadCharsOrNewlinesWhile(alwaysTrue, alwaysFalse, 0, System.Int32.MaxValue, true) |> Equal normalizedStr checkStreamAndReset() stream.SkipCharsOrNewlines(n) |> Equal c checkStreamAndReset() stream.ReadCharsOrNewlines(n, false) |> Equal str stream.ReadFrom(state0, false) |> Equal str checkStreamAndReset() stream.ReadCharsOrNewlines(n, true) |> Equal normalizedStr stream.ReadFrom(state0, true) |> Equal normalizedStr checkStreamAndReset() try nTrueN <- n stream.SkipCharsOrNewlinesWhile(nTrue) |> Equal c checkStreamAndReset() with TestFailed _ -> stream.Seek(stream.IndexOfFirstChar); stream.BacktrackTo(state0) nTrueN <- n stream.SkipCharsOrNewlinesWhile(nTrue) |> Equal c nTrueN <- n stream.ReadCharsOrNewlinesWhile(nTrue, false) |> Equal str checkStreamAndReset() nTrueN <- n stream.ReadCharsOrNewlinesWhile(nTrue, true) |> Equal normalizedStr checkStreamAndReset() nTrueN <- n stream.SkipCharsOrNewlinesWhile(nTrue, 0, System.Int32.MaxValue) |> Equal c checkStreamAndReset() nTrueN <- n stream.ReadCharsOrNewlinesWhile(nTrue, 0, System.Int32.MaxValue, false) |> Equal str checkStreamAndReset() nTrueN <- n stream.ReadCharsOrNewlinesWhile(nTrue, 0, System.Int32.MaxValue, true) |> Equal normalizedStr checkStreamAndReset() nTrueN <- n stream.SkipCharsOrNewlinesWhile(alwaysTrue, 0, n) |> Equal c checkStreamAndReset() nTrueN <- n stream.ReadCharsOrNewlinesWhile(alwaysTrue, 0, n, false) |> Equal str checkStreamAndReset() nTrueN <- n stream.ReadCharsOrNewlinesWhile(alwaysTrue, 0, n, true) |> Equal normalizedStr checkStreamAndReset() let mutable foundString = false; stream.SkipCharsOrNewlinesUntilString("\u0000", n, &foundString) |> Equal c // the stream contains no '\u0000' foundString |> Equal false checkStreamAndReset() stream.SkipCharsOrNewlinesUntilCaseFoldedString("\u0000", n, &foundString) |> Equal c foundString |> Equal false checkStreamAndReset() let mutable str2 = null : string stream.SkipCharsOrNewlinesUntilString("\u0000", n, false, &str2) |> Equal c str2 |> IsNull checkStreamAndReset() stream.SkipCharsOrNewlinesUntilString("\u0000", n, true, &str2) |> Equal c str2 |> IsNull checkStreamAndReset() stream.SkipCharsOrNewlinesUntilCaseFoldedString("\u0000", n, false, &str2) |> Equal c str2 |> IsNull checkStreamAndReset() stream.SkipCharsOrNewlinesUntilCaseFoldedString("\u0000", n, true, &str2) |> Equal c str2 |> IsNull checkStreamAndReset() if i < cs.Length && str.IndexOf(cs[i]) = -1 then let cis = string cs[i] stream.SkipCharsOrNewlinesUntilString(cis, n, &foundString) |> Equal c // the stream contains no '\u0000' foundString |> Equal true checkStreamAndReset() stream.SkipCharsOrNewlinesUntilCaseFoldedString(cis, n, &foundString) |> Equal c foundString |> Equal true checkStreamAndReset() let mutable str2 = null : string stream.SkipCharsOrNewlinesUntilString(cis, n, false, &str2) |> Equal c str2 |> Equal str checkStreamAndReset() stream.SkipCharsOrNewlinesUntilString(cis, n, true, &str2) |> Equal c str2 |> Equal normalizedStr checkStreamAndReset() stream.SkipCharsOrNewlinesUntilCaseFoldedString(cis, n, false, &str2) |> Equal c str2 |> Equal str checkStreamAndReset() stream.SkipCharsOrNewlinesUntilCaseFoldedString(cis, n, true, &str2) |> Equal c str2 |> Equal normalizedStr checkStreamAndReset() let testChars = [|'\n'; '\r'; '\t'; '\u000C'; '\u000E'|] let testFastPath() = let cs = Array.create 11 '_' #if LOW_TRUST #else use stream = new CharStream(cs, 1, 10, 100L) #endif for c1 in testChars do cs[1] <- c1 for c2 in testChars do cs[2] <- c2 for c3 in testChars do cs[3] <- c3 for c4 in testChars do cs[4] <- c4 for c5 in testChars do cs[5] <- c5 for c6 in testChars do cs[6] <- c6 for c7 in testChars do cs[7] <- c7 #if LOW_TRUST use stream = new CharStream(new string(cs), 1, 10, 100L) #endif check stream cs 1 7 // check end of block/stream handling for c7 in testChars do cs[7] <- c7 for c8 in testChars do cs[8] <- c8 for c9 in testChars do cs[9] <- c9 for c10 in testChars do cs[10] <- c10 #if LOW_TRUST use stream = new CharStream(new string(cs), 1, 10, 100L) #else stream.Seek(stream.IndexOfFirstChar) #endif stream.Skip(6) check stream cs 7 5 stream.Skip() check stream cs 8 4 stream.Skip() check stream cs 9 3 stream.Skip() check stream cs 10 2 #if LOW_TRUST use stream = new CharStream(new string(cs), 1, 10, 100L) #else stream.Seek(stream.IndexOfFirstChar) #endif stream.Skip(10) check stream cs 11 1 let testSlowPath() = let cs = Array.create 17 '_' // check end of block handling with multi-block CharStream (blockSize = 8, blockOverlap = 3) for c6 in testChars do cs[6] <- c6 for c7 in testChars do cs[7] <- c7 for c8 in testChars do cs[8] <- c8 for c9 in testChars do cs[9] <- c9 for c10 in testChars do cs[10] <- c10 use stream = createMultiBlockUtf8TestStream cs stream.Skip(6) check stream cs 6 4 stream.Skip() check stream cs 7 4 stream.Skip() check stream cs 8 4 let testArgumentChecking() = let N = 10 let cs = Array.create N '_' let css = new string(cs) use stream = new CharStream(css, 0, cs.Length) let alwaysTrue = fun c -> true for i in [0; 1; N - 1; N] do let str = if i < N then css[i..] else "" let n = N - i stream.Seek(int64 i) stream.SkipCharsOrNewlines(System.Int32.MaxValue) |> Equal n stream.Index |> Equal (int64 N) stream.Seek(int64 i) stream.ReadCharsOrNewlines(System.Int32.MaxValue, true) |> Equal str stream.Index |> Equal (int64 N) stream.Seek(int64 i) try stream.SkipCharsOrNewlines(-1) |> ignore; Fail() with :? System.ArgumentException -> () try stream.SkipCharsOrNewlines(System.Int32.MinValue) |> ignore; Fail() with :? System.ArgumentException -> () try stream.ReadCharsOrNewlines(-1, true) |> ignore; Fail() with :? System.ArgumentException -> () try stream.ReadCharsOrNewlines(System.Int32.MinValue, true) |> ignore; Fail() with :? System.ArgumentException -> () stream.Seek(int64 i) stream.SkipCharsOrNewlinesWhile(alwaysTrue, -1, System.Int32.MaxValue) |> Equal n stream.Index |> Equal (int64 N) stream.Seek(int64 i) stream.ReadCharsOrNewlinesWhile(alwaysTrue, -1, System.Int32.MaxValue, true) |> Equal str stream.Index |> Equal (int64 N) stream.Seek(int64 i) try stream.SkipCharsOrNewlinesWhile(alwaysTrue, -1, -1) |> ignore; Fail() with :? System.ArgumentException -> () try stream.SkipCharsOrNewlinesWhile(alwaysTrue, -1, System.Int32.MinValue) |> ignore; Fail() with :? System.ArgumentException -> () try stream.ReadCharsOrNewlinesWhile(alwaysTrue, -1, -1, true) |> ignore; Fail() with :? System.ArgumentException -> () try stream.ReadCharsOrNewlinesWhile(alwaysTrue, -1, System.Int32.MinValue, true) |> ignore; Fail() with :? System.ArgumentException -> () let mutable found = false let mutable str = null try stream.SkipCharsOrNewlinesUntilString(null, 10, &found) |> ignore; Fail() with :? System.NullReferenceException -> () try stream.SkipCharsOrNewlinesUntilString(null, 10, false, &str) |> ignore; Fail() with :? System.NullReferenceException -> () try stream.SkipCharsOrNewlinesUntilString("", 10, &found) |> ignore; Fail() with :? System.ArgumentException -> () try stream.SkipCharsOrNewlinesUntilString("", 10, false, &str) |> ignore; Fail() with :? System.ArgumentException -> () try stream.SkipCharsOrNewlinesUntilString("_", -1, &found) |> ignore; Fail() with :? System.ArgumentException -> () try stream.SkipCharsOrNewlinesUntilString("_", -1, false, &str) |> ignore; Fail() with :? System.ArgumentException -> () try stream.SkipCharsOrNewlinesUntilString("_", System.Int32.MinValue, &found) |> ignore; Fail() with :? System.ArgumentException -> () try stream.SkipCharsOrNewlinesUntilString("_", System.Int32.MinValue, false, &str) |> ignore; Fail() with :? System.ArgumentException -> () try stream.SkipCharsOrNewlinesUntilCaseFoldedString(null, 10, &found) |> ignore; Fail() with :? System.NullReferenceException -> () try stream.SkipCharsOrNewlinesUntilCaseFoldedString(null, 10, false, &str) |> ignore; Fail() with :? System.NullReferenceException -> () try stream.SkipCharsOrNewlinesUntilCaseFoldedString("", 10, &found) |> ignore; Fail() with :? System.ArgumentException -> () try stream.SkipCharsOrNewlinesUntilCaseFoldedString("", 10, false, &str) |> ignore; Fail() with :? System.ArgumentException -> () try stream.SkipCharsOrNewlinesUntilCaseFoldedString("_", -1, &found) |> ignore; Fail() with :? System.ArgumentException -> () try stream.SkipCharsOrNewlinesUntilCaseFoldedString("_", -1, false, &str) |> ignore; Fail() with :? System.ArgumentException -> () try stream.SkipCharsOrNewlinesUntilCaseFoldedString("_", System.Int32.MinValue, &found) |> ignore; Fail() with :? System.ArgumentException -> () try stream.SkipCharsOrNewlinesUntilCaseFoldedString("_", System.Int32.MinValue, false, &str) |> ignore; Fail() with :? System.ArgumentException -> () let SkipCharsOrNewlinesWhileMinChars() = let cs = "0123456789" use stream = new CharStream(cs, 0, cs.Length) let smaller n = fun c -> int c < int '0' + n for n = 0 to 10 do stream.SkipCharsOrNewlinesWhile(smaller n, n, System.Int32.MaxValue) |> Equal n stream.Index |> Equal (int64 n) stream.Seek(0L) stream.ReadCharsOrNewlinesWhile(smaller n, n, System.Int32.MaxValue, true) |> ignore stream.Index |> Equal (int64 n) stream.Seek(0L) let tag = stream.StateTag stream.SkipCharsOrNewlinesWhile(smaller n, n + 1, System.Int32.MaxValue) |> Equal 0 stream.Index |> Equal 0L stream.StateTag |> Equal tag stream.ReadCharsOrNewlinesWhile(smaller n, n + 1, System.Int32.MaxValue, true) |> ignore stream.Index |> Equal 0L stream.StateTag |> Equal tag stream.SkipCharsOrNewlinesWhile((fun c -> true), n + 1, n) |> Equal 0 stream.Index |> Equal 0L stream.StateTag |> Equal tag stream.ReadCharsOrNewlinesWhile((fun c -> true), n + 1, n, true) |> ignore stream.Index |> Equal 0L stream.StateTag |> Equal tag stream.SkipCharsOrNewlinesWhile((fun c -> true), System.Int32.MaxValue, n) |> Equal 0 stream.Index |> Equal 0L stream.StateTag |> Equal tag stream.ReadCharsOrNewlinesWhile((fun c -> true), System.Int32.MaxValue, n, true) |> ignore stream.Index |> Equal 0L stream.StateTag |> Equal tag testFastPath() testSlowPath() testArgumentChecking() SkipCharsOrNewlinesWhileMinChars() let SkipCharsOrNewlinesUntilString() = // most of the testing has already been done in testSkipCharsOrNewlines let cs = "ABCDEFGHI\tJKLMNOPQRST".ToCharArray() use stream = createMultiBlockUtf8TestStream cs // blockSize = 8, blockOverlap = 3 for i0 = 0 to cs.Length - 1 do stream.Seek(0L); stream.Seek(int64 i0) for i1 = i0 to cs.Length - 1 do for n = 1 to cs.Length - i1 do let check strToFind maxChars isPresent = let iEnd = if isPresent then i1 elif maxChars < cs.Length - i0 then i0 + maxChars else cs.Length let skippedString = if not isPresent then null else new string(cs[i0..i1 - 1]) let state0 = stream.State let mutable found = false stream.SkipCharsOrNewlinesUntilString(strToFind, maxChars, &found) |> Equal (iEnd - i0) stream.Index |> Equal (int64 iEnd) found |> Equal isPresent stream.Skip(0L); stream.BacktrackTo(state0) let mutable str = null stream.SkipCharsOrNewlinesUntilString(strToFind, maxChars, false, &str) |> Equal (iEnd - i0) stream.Index |> Equal (int64 iEnd) str |> Equal skippedString stream.Skip(0L); stream.BacktrackTo(state0) let strToFindCI = FParsec.Text.FoldCase(strToFind) stream.SkipCharsOrNewlinesUntilCaseFoldedString(strToFindCI, maxChars, &found) |> Equal (iEnd - i0) stream.Index |> Equal (int64 iEnd) found |> Equal isPresent stream.Skip(0L); stream.BacktrackTo(state0) stream.SkipCharsOrNewlinesUntilCaseFoldedString(strToFindCI, maxChars, false, &str) |> Equal (iEnd - i0) stream.Index |> Equal (int64 iEnd) str |> Equal skippedString stream.Skip(0L); stream.BacktrackTo(state0) let strToFind = new string(cs, i1, n) check strToFind System.Int32.MaxValue true check strToFind (i1 - i0) true if i1 - i0 > 0 then check strToFind (i1 - i0 - 1) false if n > 1 then let strToNotFind = string (char (int strToFind[0] + 1)) + (if n > 1 then strToFind.Substring(1) else "") check strToNotFind System.Int32.MaxValue false let strToNotFind2 = strToFind.Substring(0, n - 1) + string (char (int strToFind[n - 1] + 1)) check strToNotFind2 System.Int32.MaxValue false let testCreateSubstream() = let test (stream: CharStream<_>) = let off = stream.IndexOfFirstChar let state0 = stream.State stream.ReadCharsOrNewlines(3, false) |> Equal " \r\n0" stream.Index |> Equal (off + 4L) stream.Line |> Equal 2L stream.LineBegin |> Equal (off + 3L) let state1 = stream.State stream.ReadCharsOrNewlines(5, false) |> Equal "1\n345" use substream = stream.CreateSubstream(state1) stream.BacktrackTo(state0) let substreamState0 = substream.State substream.IndexOfFirstChar |> Equal (off + 4L) substream.IndexOfLastCharPlus1 |> Equal (off + 9L) substream.Line |> Equal 2L substream.LineBegin |> Equal(off + 3L) substream.ReadCharsOrNewlines(10, false) |> Equal "1\n345" substream.Index |> Equal (off + 9L) substream.IsEndOfStream |> True substream.Line |> Equal 3L substream.LineBegin |> Equal (off + 6L) stream.ReadCharsOrNewlines(10, false) |> Equal " \r\n01\n3456" stream.Index |> Equal (off + 10L) stream.Line |> Equal 3L substream.BacktrackTo(substreamState0) substream.Read() |> Equal '1' let substreamState1 = substream.State substream.ReadCharsOrNewlines(3, false) |> Equal "\n34" use subSubstream = substream.CreateSubstream(substreamState1) subSubstream.IndexOfFirstChar |> Equal (off + 5L) subSubstream.IndexOfLastCharPlus1 |> Equal (off + 8L) subSubstream.Line |> Equal 2L subSubstream.LineBegin |> Equal (off + 3L) subSubstream.ReadCharsOrNewlines(10, false) |> Equal "\n34" subSubstream.Index |> Equal (off + 8L) subSubstream.IsEndOfStream |> True subSubstream.Line |> Equal 3L subSubstream.LineBegin |> Equal (off + 6L) substream.BacktrackTo(substreamState1) use subSubstream2 = substream.CreateSubstream(substreamState1) subSubstream2.IndexOfFirstChar |> Equal (off + 5L) subSubstream2.IndexOfLastCharPlus1 |> Equal (off + 5L) subSubstream2.Line |> Equal 2L subSubstream2.LineBegin |> Equal (off + 3L) subSubstream2.IsEndOfStream |> True substream.ReadCharsOrNewlines(10, false) |> Equal "\n345" substream.IsEndOfStream |> True let substreamStateEnd = substream.State use subSubstream3 = substream.CreateSubstream(substreamStateEnd) subSubstream3.IndexOfFirstChar |> Equal (off + 9L) subSubstream3.IndexOfLastCharPlus1 |> Equal (off + 9L) subSubstream3.Line |> Equal 3L subSubstream3.LineBegin |> Equal (off + 6L) subSubstream3.IsEndOfStream |> True substream.BacktrackTo(substreamState0) try substream.CreateSubstream(substreamState1) |> ignore; Fail() with :? System.ArgumentException -> () try substream.CreateSubstream(substreamStateEnd) |> ignore; Fail() with :? System.ArgumentException -> () stream.Skip(-1) let state2 = stream.State stream.Read() |> Equal '6' stream.IsEndOfStream |> True use substream2 = stream.CreateSubstream(state2) substream2.IndexOfFirstChar |> Equal (off + 9L) substream2.IndexOfLastCharPlus1 |> Equal (off + 10L) substream2.Index |> Equal(off + 9L) substream2.Line |> Equal 3L substream2.LineBegin |> Equal (off + 6L) substream2.Read() |> Equal '6' substream2.IsEndOfStream |> True let stateEnd = stream.State use substream3 = stream.CreateSubstream(stateEnd) substream3.IndexOfFirstChar |> Equal (off + 10L) substream3.IndexOfLastCharPlus1 |> Equal (off + 10L) substream3.Index |> Equal(off + 10L) substream3.Line |> Equal 3L substream3.LineBegin |> Equal (off + 6L) substream3.IsEndOfStream |> True stream.BacktrackTo(state1) use substream4 = stream.CreateSubstream(state1) substream4.IndexOfFirstChar |> Equal (off + 4L) substream4.IndexOfLastCharPlus1 |> Equal (off + 4L) substream4.Index |> Equal (off + 4L) substream4.Line |> Equal 2L substream4.LineBegin |> Equal (off + 3L) substream4.IsEndOfStream |> True stream.BacktrackTo(state0) try stream.CreateSubstream(state1) |> ignore; Fail() with :? System.ArgumentException -> () try stream.CreateSubstream(stateEnd) |> ignore; Fail() with :? System.ArgumentException -> () let str = " \r\n01\n3456" use stream1 = new CharStream("!" + str + "!", 1, str.Length, 100L) test stream1 #if !LOW_TRUST #if DEBUG let state = stream1.State use substream = stream1.CreateSubstream(state) try stream1.Dispose(); Fail() with :? System.InvalidOperationException -> () substream.Read() |> ignore #endif #endif #if !LOW_TRUST use stream2 = new CharStream(("!" + str + "!").ToCharArray(), 1, str.Length, 100L) test stream2 #endif use stream3 = createMultiBlockUtf8TestStream (str.ToCharArray()) // blockSize = 8, blockOverlap = 3 test stream3 let testTwoChars() = let cs = new TwoChars('\u0001', '\u0002') cs.Char0 |> Equal '\u0001' cs.Char1 |> Equal '\u0002' cs.Equals(TwoChars('\u0001', '\u0002')) |> True (box cs).Equals(TwoChars('\u0001', '\u0002')) |> True TwoChars.op_Equality(cs, TwoChars((2u <<< 16) ||| 1u)) |> True TwoChars.op_Inequality(cs, TwoChars('\u0001', '\u0002')) |> False cs.GetHashCode() |> Equal ((2 <<< 16) ||| 1) let cs2 = new TwoChars('\uffff', '\uffff') cs2.Char0 |> Equal '\uffff' cs2.Char1 |> Equal '\uffff' let run() = #if !LOW_TRUST setStaticField typeof "MinimumByteBufferLength" 10 setStaticField typeof "DoNotRoundUpBlockSizeToSimplifyTesting" true #endif testNonStreamConstructors() testStreamConstructorArgumentChecking() testEncodingDetection() #if !LOW_TRUST #if !DISABLE_STREAM_BACKTRACKING_TESTS testNonSeekableCharStreamHandling() #endif #endif testDecoderFallbackExceptionHandling() let testStreams() = let refString = "1234567890ABCDEF" use stringStream = new CharStream(" " + refString, 1, refString.Length, 100L) testBasicCharStreamMethods stringStream refString refString.Length 0 0 let be = new System.Text.UTF32Encoding(true, true) let bs = Array.append (be.GetPreamble()) (be.GetBytes(refString)) use fileStream = createMultiBlockTestStream (new System.IO.MemoryStream(bs, false)) System.Text.Encoding.Unicode fileStream.MinRegexSpace <- 3 testBasicCharStreamMethods fileStream refString 8 3 3 let refString2 = "1234567890ABCDEFGH" // exactly three blocks + 1 overlap let bs2 = System.Text.Encoding.Unicode.GetBytes(refString2) use fileStream = createMultiBlockTestStream (new System.IO.MemoryStream(bs2, false)) System.Text.Encoding.Unicode fileStream.MinRegexSpace <- 3 testBasicCharStreamMethods fileStream refString2 8 3 3 use emptyStringStream = new CharStream("x", 1, 0, 1000L) testEmptyStream emptyStringStream use emptyStringStream2 = new CharStream("") testEmptyStream emptyStringStream2 use emptyFileStream = createMultiBlockTestStream (new System.IO.MemoryStream(be.GetPreamble(), false)) System.Text.Encoding.Unicode testEmptyStream emptyFileStream use emptyFileStream2 = createMultiBlockTestStream (new System.IO.MemoryStream([||], false)) System.Text.Encoding.Unicode testEmptyStream emptyFileStream2 #if NETCORE Encoding.RegisterProvider CodePagesEncodingProvider.Instance #endif testStreams() xTest() testSkipWhitespace() testSkipUnicodeWhitespace() testSkipNewlineWhitespace() testSkipRestOfLine() testSkipCharsOrNewlines() SkipCharsOrNewlinesUntilString() testCreateSubstream() testTwoChars() ================================================ FILE: Test/CloningTests.fs ================================================ // Copyright (c) Stephan Tolksdorf 2010-2011 // License: Simplified BSD License. See accompanying documentation. module FParsec.Test.CloningTests #if !LOW_TRUST open FParsec.Test.Test open FParsec.Cloning // The organization of this test module is a bit messy currently, // so "Go to definition" is your friend here. // The important point is that the code coverage is close to 100%. type SerializationInfo = System.Runtime.Serialization.SerializationInfo type StreamingContext = System.Runtime.Serialization.StreamingContext type OnSerializingAttribute = System.Runtime.Serialization.OnSerializingAttribute type OnSerializedAttribute = System.Runtime.Serialization.OnSerializedAttribute type OnDeserializingAttribute = System.Runtime.Serialization.OnDeserializingAttribute type OnDeserializedAttribute = System.Runtime.Serialization.OnDeserializedAttribute type ISerializable = System.Runtime.Serialization.ISerializable type IObjectReference = System.Runtime.Serialization.IObjectReference type IDeserializationCallback = System.Runtime.Serialization.IDeserializationCallback type SerializationException = System.Runtime.Serialization.SerializationException type CloneEvents = FParsec.Cloning.CloneEvents type CloneEventHandlers = FParsec.Cloning.CloneEventHandlers type BindingFlags = System.Reflection.BindingFlags type KeyValuePair<'k,'v> = System.Collections.Generic.KeyValuePair<'k,'v> [] type TestState(objectIndices: int[]) = inherit Cloner.State(null, objectIndices) override t.Type = raise (System.NotImplementedException()) override t.CreateUninitializedObject() = raise (System.NotImplementedException()) override t.WriteToUninitializedObject(instance, objectGraph) = raise (System.NotImplementedException()) // a reference implementation let findStronglyConnectedComponents (states: Cloner.State[]) = let stack = new System.Collections.Generic.Stack() let mutable index = 1 let indices = Array.zeroCreate states.Length let lowlinks = Array.zeroCreate states.Length let rec tarjan (v: int) = indices[v] <- index lowlinks[v] <- index index <- index + 1 stack.Push(v) let objectIndices = states[v].ObjectIndices if objectIndices <> null then for w in objectIndices do if w <> 0 then if indices[w] = 0 then tarjan w lowlinks[v] <- min lowlinks[v] lowlinks[w] else if stack.Contains(w) then lowlinks[v] <- min lowlinks[v] indices[w] if lowlinks[v] = indices[v] then let mutable last = stack.Pop() if last <> v then let scc = new System.Collections.Generic.List() scc.Add(last) while last <> v do last <- stack.Pop() scc.Add(last) let scc = scc.ToArray() for i in scc do states[i].StronglyConnectedComponent <- scc tarjan 1 let copyTestStates (states: TestState[]) = states |> Array.map (fun s -> if s <> null then new TestState(s.ObjectIndices) else null) let upcastTestStates (states: TestState[]) = box states :?> Cloner.State[] let createRandomTestStateGraph (rand: System.Random) n = let states = Array.zeroCreate (n + 1) let p = rand.NextDouble() let p = p*p for i = 2 to n do let indices = ResizeArray() for j = 1 to n do if rand.NextDouble() < p then indices.Add(j) if indices.Count = 0 && rand.NextDouble() < 0.5 then states[i] <- new TestState(null) else if rand.NextDouble() < 0.5 then indices.Add(0) // 0 values should be ignored let indices = indices.ToArray() shuffleArray rand indices states[i] <- new TestState(indices) // find all roots let visited = Array.zeroCreate (n + 1) let unvisitedIndices = [|2..n|] let mutable unvisitedCount = n - 1 let roots = ResizeArray() while unvisitedCount <> 0 do let oldUnvisitedCount = unvisitedCount let rec mark index = visited[index] <- 1uy unvisitedCount <- unvisitedCount - 1 let indices = states[index].ObjectIndices if indices <> null then for idx in indices do if idx > 1 && visited[idx] = 0uy then mark idx let index = unvisitedIndices[rand.Next(oldUnvisitedCount)] roots.Add(index) mark index // remove newly marked indices from unvisitedIndices let mutable lag = 0 for i = 0 to oldUnvisitedCount - 1 do if visited[unvisitedIndices[i]] <> 0uy then lag <- lag + 1 elif lag <> 0 then unvisitedIndices[i - lag] <- unvisitedIndices[i] if rand.NextDouble() < p then roots.Add(1) states[1] <- TestState(roots.ToArray()) states let testStronglyConnectedComponents() = let test (states: TestState[]) = states[0] |> Equal null let states2 = upcastTestStates (copyTestStates states) let states = upcastTestStates states let components = Cloner.FindStronglyConnectedComponents(states) components[0] |> Equal 0 components[0] <- System.Int32.MaxValue (components |> Array.tryFind (fun c -> c <= 0)).IsNone |> True // basic consistency checks for i = 1 to states.Length - 1 do let c = components[i] if c <> 0 then let state = states[i] let scc = state.StronglyConnectedComponent if scc = null then components[i] <- 0 else for j in scc do states[j].StronglyConnectedComponent |> ReferenceEqual scc components[j] |> Equal c components[j] <- 0 System.Array.IndexOf(components, c) |> Equal -1 for i = 1 to states.Length - 1 do components[i] |> Equal 0 // compare result with reference implementation findStronglyConnectedComponents states2 for i = 1 to states.Length - 1 do let state1 = states[i] let state2 = states2[i] let scc1 = state1.StronglyConnectedComponent let scc2 = state2.StronglyConnectedComponent if scc2 = null then scc1 |> IsNull elif not (obj.ReferenceEquals(scc1, scc2)) then System.Array.Sort(scc1) System.Array.Sort(scc2) scc1 |> Equal scc2 // speed up future comparisons for j in state2.StronglyConnectedComponent do states2[j].StronglyConnectedComponent <- scc1 test [|null; TestState(null)|] test [|null; TestState([|1|])|] let rand = System.Random(1234) for i = 0 to 1000 do let graph = createRandomTestStateGraph rand 7 test graph for i = 8 to 300 do let graph = createRandomTestStateGraph rand i test graph let testComputeTopologicalOrder() = let test (states: Cloner.State[]) = let order = Cloner.ComputeTopologicalOrder(states) let marked = Array.zeroCreate states.Length // check that each index only occurs once for index in order do marked[index] |> Equal 0uy marked[index] <- 1uy System.Array.IndexOf(marked, 0uy) |> Equal -1 System.Array.Clear(marked, 0, marked.Length) // check dependency order marked[0] <- 1uy // states[0] is ignored for i = order.Length - 1 downto 1 do let index = order[i] if marked[index] = 0uy then let state = states[index] if state.StronglyConnectedComponent = null then marked[index] <- 1uy // all dependencies must be marked if state.ObjectIndices <> null then for j in state.ObjectIndices do marked[j] |> Equal 1uy else // objects within a strongly connected components have no defined order for j in state.StronglyConnectedComponent do marked[j] <- 1uy for j in state.StronglyConnectedComponent do for k in states[j].ObjectIndices do marked[k] |> Equal 1uy test [|null; TestState(null)|] test [|null; TestState([|1|])|] let rand = System.Random(1234) for i = 0 to 1000 do let graph = createRandomTestStateGraph rand 7 test (upcastTestStates graph) for i = 8 to 300 do let graph = createRandomTestStateGraph rand i test (upcastTestStates graph) let callStreamingContextCallback (context: StreamingContext) = (context.Context :?> (unit -> unit))() let addToContextList (context: StreamingContext) s = let r = context.Context :?> string list ref r.Value <- s::r.Value [] type NonSerializableBase() = class end type ClassWithNonSerializableBase() = inherit NonSerializableBase() type ClassWithSingleOnSerializingHandler() = member t.AnotherMethod() = () [] member private t.OnSerializing(context) = addToContextList context "OnSerializing" type ClassWithSingleOnSerializingHandlerAndNonSerializableBase() = inherit NonSerializableBase() member t.AnotherMethod() = () [] member private t.OnSerializing(context) = addToContextList context "OnSerializing" type ClassWithSingleOnSerializingHandlerInBase() = inherit ClassWithSingleOnSerializingHandler() type ClassWithSingleOnSerializingHandlerInBaseBase() = inherit ClassWithSingleOnSerializingHandlerInBase() type ClassWithSingleOnSerializedHandler() = member t.AnotherMethod() = () [] member private t.OnSerialized(context) = addToContextList context "OnSerialized" type ClassWithSingleOnDeserializingHandler() = member t.AnotherMethod() = () [] member private t.OnDeserializing(context) = addToContextList context "OnDeserializing" type ClassWithSingleOnDeserializedHandler() = member t.AnotherMethod() = () [] member private t.OnDeserialized(context) = addToContextList context "OnDeserialized" type ClassThatHasItAllBaseBase() = [] member private t.OnSerializing(context) = addToContextList context "OnSerializingBaseBase" [] member private t.OnDeserializing(context) = addToContextList context "OnDeserializingBaseBase" type ClassThatHasItAllBase() = inherit ClassThatHasItAllBaseBase() [] member private t.OnSerializing(context) = addToContextList context "OnSerializingBase" [] member private t.OnDeserializing(context) = addToContextList context "OnDeserializingBase" type ClassThatHasItAll() = inherit ClassThatHasItAllBase() member private t.OnSerializin(context) = raise (System.NotImplementedException()) [] member private t.OnSerializing(context) = addToContextList context "OnSerializing" [] member private t.OnSerialized(context) = addToContextList context "OnSerialized" [] member private t.OnDeserializing(context) = addToContextList context "OnDeserializing" [] member private t.OnDeserialized(context) = addToContextList context "OnDeserialized" interface ISerializable with member t.GetObjectData(info, context) = raise (System.NotImplementedException()) interface IDeserializationCallback with member t.OnDeserialization(sender) = raise (System.NotImplementedException()) interface IObjectReference with member t.GetRealObject(context) = raise (System.NotImplementedException()) let testCloningEventHandlers() = let contextList = ref [] let context = StreamingContext(System.Runtime.Serialization.StreamingContextStates.Clone, contextList) let () = CloneEventHandlers.Create(typeof) |> IsNull CloneEventHandlers.Create(typeof) |> IsNull let () = contextList.Value <- [] let instance = ClassWithSingleOnSerializingHandler() let handlers = CloneEventHandlers.Create(instance.GetType()) handlers.Events |> Equal CloneEvents.OnSerializing handlers.InvokeOnSerializing(instance, context) contextList.Value |> Equal ["OnSerializing"] let () = contextList.Value <- [] let instance = ClassWithSingleOnSerializingHandlerInBase() let handlers = CloneEventHandlers.Create(instance.GetType()) handlers.Events |> Equal CloneEvents.OnSerializing handlers.InvokeOnSerializing(instance, context) contextList.Value |> Equal ["OnSerializing"] let () = contextList.Value <- [] let instance = ClassWithSingleOnSerializingHandlerInBaseBase() let handlers = CloneEventHandlers.Create(instance.GetType()) handlers.Events |> Equal CloneEvents.OnSerializing handlers.InvokeOnSerializing(instance, context) contextList.Value |> Equal ["OnSerializing"] let () = contextList.Value <- [] let instance = ClassWithSingleOnSerializedHandler() let handlers = CloneEventHandlers.Create(instance.GetType()) handlers.Events |> Equal CloneEvents.OnSerialized handlers.InvokeOnSerialized(instance, context) contextList.Value |> Equal ["OnSerialized"] let () = contextList.Value <- [] let instance = ClassWithSingleOnDeserializingHandler() let handlers = CloneEventHandlers.Create(instance.GetType()) handlers.Events |> Equal CloneEvents.OnDeserializing handlers.InvokeOnDeserializing(instance, context) contextList.Value |> Equal ["OnDeserializing"] let () = contextList.Value <- [] let instance = ClassWithSingleOnDeserializedHandler() let handlers = CloneEventHandlers.Create(instance.GetType()) handlers.Events |> Equal CloneEvents.OnDeserialized handlers.InvokeOnDeserialized(instance, context) contextList.Value |> Equal ["OnDeserialized"] let () = let instance = {new ISerializable with member t.GetObjectData(info, context) = raise (System.NotImplementedException())} let handlers = CloneEventHandlers.Create(instance.GetType()) handlers.Events |> Equal CloneEvents.ISerializable let () = let instance = {new IDeserializationCallback with member t.OnDeserialization(sender) = raise (System.NotImplementedException())} let handlers = CloneEventHandlers.Create(instance.GetType()) handlers.Events |> Equal CloneEvents.IDeserializationCallback let () = let instance = {new ISerializable with member t.GetObjectData(info, context) = raise (System.NotImplementedException()) interface IObjectReference with member t.GetRealObject(context) = raise (System.NotImplementedException())} let handlers = CloneEventHandlers.Create(instance.GetType()) handlers.Events |> Equal (CloneEvents.ISerializable ||| CloneEvents.IObjectReference) let () = let instance = {new IObjectReference with member t.GetRealObject(context) = raise (System.NotImplementedException())} let handlers = CloneEventHandlers.Create(instance.GetType()) handlers.Events |> Equal CloneEvents.IObjectReference let () = let instance = new ClassThatHasItAll() let handlers = CloneEventHandlers.Create(instance.GetType()) handlers.Events |> Equal ( CloneEvents.OnSerializing ||| CloneEvents.OnSerialized ||| CloneEvents.OnDeserializing ||| CloneEvents.OnDeserialized ||| CloneEvents.ISerializable ||| CloneEvents.IDeserializationCallback ||| CloneEvents.IObjectReference) contextList.Value <- [] handlers.InvokeOnSerializing(instance, context) contextList.Value |> Equal ["OnSerializing"; "OnSerializingBase"; "OnSerializingBaseBase"] contextList.Value <- [] handlers.InvokeOnSerialized(instance, context) contextList.Value |> Equal ["OnSerialized"] contextList.Value <- [] handlers.InvokeOnDeserializing(instance, context) contextList.Value |> Equal ["OnDeserializing"; "OnDeserializingBase"; "OnDeserializingBaseBase"] contextList.Value <- [] handlers.InvokeOnDeserialized(instance, context) contextList.Value |> Equal ["OnDeserialized"] try CloneEventHandlers.Create(typeof) |> ignore Fail() with :? SerializationException -> () try CloneEventHandlers.Create(typeof) |> ignore Fail() with :? SerializationException -> () type TypeWithNoSerializedField() = [] val mutable Value: int [] val mutable OnSerializedWasCalled: bool [] val mutable OnDeserializedWasCalled: bool [] member private t.OnSerialized(context: StreamingContext) = t.OnSerializedWasCalled |> False t.OnSerializedWasCalled <- true [] member private t.OnDeserialized(context: StreamingContext) = t.OnDeserializedWasCalled |> False t.OnDeserializedWasCalled <- true type BlittableType(val1: int, val2: string, val3: KeyValuePair) = member t.Value1 = val1 member t.Value2 = val2 member t.Value3 = val3 [] type BlittableStructType(val1: int, val2: string, val3: KeyValuePair) = member t.Value1 = val1 member t.Value2 = val2 member t.Value3 = val3 type NonBlittableType1 = val Value1: int val Value2: int[] val Value3: KeyValuePair type NonBlittableType2 = val Value1: int [] val Value2: string val Value3: KeyValuePair type NonBlittableType3 = val Value1: int val Value2: string val Value3: KeyValuePair type BlittableTypeWithBase = inherit BlittableType val Value1: int val Value2: string val Value3: KeyValuePair type BlittableTypeWithNonBlittableBase1 = inherit NonBlittableType1 val Value1: int type BlittableTypeWithNonBlittableBase2 = inherit NonBlittableType2 val Value1: int type BlittableTypeWithNonBlittableBase3 = inherit NonBlittableType3 val Value1: int [] val Value2: int type BlittableTypeWithNonBlittableBase4 = inherit BlittableTypeWithNonBlittableBase3 val Value1: int let getFieldValues (fields: System.Reflection.FieldInfo[]) (instance: obj) = let values = Array.zeroCreate fields.Length for i = 0 to fields.Length - 1 do let f = fields[i] values[i] <- f.GetValue(instance) values let testGetSerializedFields() = let mutable blittable = false Cloner.GetSerializedFields(typeof, &blittable).Length |> Equal 0 blittable |> True Cloner.GetSerializedFields(typeof, &blittable).Length |> Equal 0 blittable |> True let test (ty: System.Type) = let rec getFields (ty: System.Type) = let fields = ty.GetFields(BindingFlags.Public ||| BindingFlags.NonPublic ||| BindingFlags.Instance ||| BindingFlags.DeclaredOnly) |> Array.filter (fun field -> not field.IsNotSerialized) let baseType = ty.BaseType if baseType = null then fields else Array.append fields (getFields baseType) let rec isBlittableField (field: System.Reflection.FieldInfo) = let ty = field.FieldType ty.IsPrimitive || ty = typeof || (ty.IsValueType && getFields ty |> Array.forall isBlittableField) let mutable isBlittable = false let fields = Cloner.GetSerializedFields(ty, &isBlittable) let fields2 = getFields ty let isBlittable2 = fields2 |> Array.forall isBlittableField fields.Length |> Equal fields2.Length for i = 0 to fields2.Length - 1 do let f1, f2 = fields[i], fields2[i] f1.Name |> Equal f2.Name f1.FieldType |> Equal f2.FieldType f1.DeclaringType |> Equal f2.DeclaringType isBlittable |> Equal isBlittable test typeof test typeof test typeof test typeof test typeof test typeof test typeof test typeof test typeof test typeof test typeof try test typeof Fail() with :? SerializationException -> () let testCreateFieldValuesGetter() = let test (instance: obj) = let mutable isBlittable = false let fields = Cloner.GetSerializedFields(instance.GetType(), &isBlittable) let fieldValuesGetter = Cloner.CreateFieldValuesGetter(instance.GetType(), fields) let values = getFieldValues fields instance fieldValuesGetter.Invoke(instance) |> Equal values test (BlittableType(1, "2", KeyValuePair(3, "4"))) test (BlittableStructType(1, "2", KeyValuePair(3, "4"))) let invokeSetter (setter: System.Action) (instance: obj) (values: obj[]) (objectIndices: int[]) (objectGraph: obj[]) = setter.Invoke(instance, values, objectIndices, objectGraph) let testCreateFieldValuesSetter() = let test (setter: System.Action) (instance: obj) (values: obj[]) (objectIndices: int[]) (objectGraph: obj[]) result = invokeSetter setter instance values objectIndices objectGraph instance |> Equal result let r1 = (1, "2", KeyValuePair(3, "4")) let ty1 = r1.GetType() let fields1 = ty1.GetFields(BindingFlags.NonPublic ||| BindingFlags.Instance) let setter1 = Cloner.CreateFieldValuesSetter(ty1, fields1) test setter1 (-1, "-1", KeyValuePair(-1, "-1")) [|1; "2"; null|] [|0; 0; 1|] [|null; KeyValuePair(3, "4")|] r1 test setter1 (-1, "-1", KeyValuePair(-1, "-1")) [|1; "2"; KeyValuePair(3, "4")|] [|0; 0; 0|] [||] r1 test setter1 (-1, "-1", KeyValuePair(-1, "-1")) [|null; null; null|] [|3; 2; 1|] [|null; KeyValuePair(3, "4"); "2"; 1;|] r1 let r2 = (BlittableStructType(1, "2", KeyValuePair(3, "4"))) let ty2 = r2.GetType() let fields2 = ty2.GetFields(BindingFlags.NonPublic ||| BindingFlags.Instance) let setter2 = Cloner.CreateFieldValuesSetter(ty2, fields2) test setter2 (BlittableStructType(-1, "-1", KeyValuePair(-1, "-1"))) [|1; "2"; null|] [|0; 0; 1|] [|null; KeyValuePair(3, "4")|] r2 test setter2 (BlittableStructType(-1, "-1", KeyValuePair(-1, "-1"))) [|1; "2"; KeyValuePair(3, "4")|] [|0; 0; 0|] [||] r2 test setter2 (BlittableStructType(-1, "-1", KeyValuePair(-1, "-1"))) [|null; null; null|] [|3; 2; 1|] [|null; KeyValuePair(3, "4"); "2"; 1;|] r2 type SerializableConstructorTestClass(calledFromISerializableConstructor: bool) = member t.CalledFromISerializableConstructor = calledFromISerializableConstructor private new (info: SerializationInfo, context: StreamingContext) = info.GetBoolean("ok") |> True new SerializableConstructorTestClass(true) type SerializableConstructorTestStruct = struct val mutable CalledFromISerializableConstructor: bool private new (info: SerializationInfo, context: StreamingContext) = info.GetBoolean("ok") |> True {CalledFromISerializableConstructor = true} end let testCreateISerializableConstructorCaller() = let context = new StreamingContext(System.Runtime.Serialization.StreamingContextStates.Clone) let info = new SerializationInfo(typeof, new System.Runtime.Serialization.FormatterConverter()) info.AddValue("ok", true) let instance1 = SerializableConstructorTestClass(false) instance1.CalledFromISerializableConstructor |> False let constructor1 = instance1.GetType().GetConstructor(BindingFlags.NonPublic ||| BindingFlags.Instance, null, [|typeof; typeof|], null) let constructorCaller1 = Cloner.CreateISerializableConstructorCaller(constructor1) constructorCaller1.Invoke(instance1, info, context) instance1.CalledFromISerializableConstructor |> True let mutable instance2 = SerializableConstructorTestStruct() instance2.CalledFromISerializableConstructor |> False let constructor2 = instance2.GetType().GetConstructor(BindingFlags.NonPublic ||| BindingFlags.Instance, null, [|typeof; typeof|], null) let constructorCaller2 = Cloner.CreateISerializableConstructorCaller(constructor2) let boxedInstance2 = box instance2 constructorCaller2.Invoke(boxedInstance2, info, context) instance2 <- unbox boxedInstance2 instance2.CalledFromISerializableConstructor |> True type NativeSerializationTestType(val1: int, val2: string, val3: obj, val4: obj[]) = member t.Value1 = val1 member t.Value2 = val2 member t.Value3 = val3 member t.Value4 = val4 let equalityCacheComparer = {new System.Collections.Generic.EqualityComparer() with override t.Equals((x1, x2), (y1, y2)) = obj.ReferenceEquals(x1, y1) && obj.ReferenceEquals(x2, y2) override t.GetHashCode((x1, x2)) = System.Runtime.CompilerServices.RuntimeHelpers.GetHashCode(x1)} let equalityCache = System.Collections.Generic.Dictionary(equalityCacheComparer) /// can deal with recursive values, i.e. cyclic object graphs let recEquals (value1: obj) (value2: obj) = let vv = (value1, value2) let mutable b = false if equalityCache.TryGetValue(vv, &b) then b else equalityCache.Add(vv, true) b <- value1 = value2 equalityCache[vv] <- b b let mutable onDeserializedList = [] : int list [] type NativeSerializationTestClassWithUnorderedEvents<'t>(id: int, value: 't) = let mutable value = value member t.Id = id member t.Value with get() = value and set v = value <- v [] val mutable OnSerializingWasCalled: bool [] val mutable OnSerializedWasCalled: bool [] val mutable OnDeserializingWasCalled: bool [] val mutable DeserializationCallbackWasCalled: bool [] member private t.OnSerializing(context: StreamingContext) = t.OnSerializingWasCalled |> False t.OnSerializingWasCalled <- true [] member private t.OnSerialized(context: StreamingContext) = t.OnSerializingWasCalled |> True t.OnSerializedWasCalled |> False t.OnSerializedWasCalled <- true [] member private t.OnDeserializing(context: StreamingContext) = t.OnSerializedWasCalled |> False t.OnDeserializingWasCalled |> False t.OnDeserializingWasCalled <- true interface IDeserializationCallback with member t.OnDeserialization(sender) = t.OnSerializingWasCalled |> False t.OnDeserializingWasCalled |> True t.DeserializationCallbackWasCalled |> False t.DeserializationCallbackWasCalled <- true override t.Equals(obj) = match obj with | :? NativeSerializationTestClassWithUnorderedEvents<'t> as o -> id = o.Id && recEquals value o.Value | _ -> false override t.GetHashCode() = raise (System.NotImplementedException()) [] type NativeSerializationTestClass<'t>(id_, value_) = inherit NativeSerializationTestClassWithUnorderedEvents<'t>(id_, value_) [] val mutable OnDeserializedWasCalled: bool [] val mutable DeserializationCallbackWasCalled: bool [] member private t.OnDeserialized(context: StreamingContext) = t.OnSerializingWasCalled |> False t.OnDeserializingWasCalled |> True t.OnDeserializedWasCalled |> False t.OnDeserializedWasCalled <- true onDeserializedList <- t.Id::onDeserializedList interface IDeserializationCallback with member t.OnDeserialization(sender) = t.OnSerializingWasCalled |> False t.OnDeserializingWasCalled |> True t.OnDeserializedWasCalled |> True t.DeserializationCallbackWasCalled |> False t.DeserializationCallbackWasCalled <- true override t.Equals(o) = match o with | :? NativeSerializationTestClass<'t> as o -> t.Id = o.Id && recEquals t.Value o.Value | _ -> false override t.GetHashCode() = raise (System.NotImplementedException()) [] type NativeSerializationTestClass2<'t, 't2>(id_, value_, value2: 't2) = inherit NativeSerializationTestClass<'t>(id_, value_) member t.Value2 = value2 override t.Equals(o) = match o with | :? NativeSerializationTestClass2<'t,'t2> as o -> t.Id = o.Id && recEquals t.Value o.Value && recEquals t.Value2 o.Value2 | _ -> false override t.GetHashCode() = raise (System.NotImplementedException()) [] type CustomSerializationTestClass<'t> private (id: int, value: 't, isConstructedFromSerializationInfo: bool) = let mutable value = value member t.Id = id member t.Value with get() = value and set v = value <- v [] static val mutable private GetObjectDataCounter: int interface ISerializable with member t.GetObjectData(info, context) = let c = CustomSerializationTestClass<'t>.GetObjectDataCounter CustomSerializationTestClass<'t>.GetObjectDataCounter <- c + 1 if c%3 <> 0 then info.AddValue("id", id) info.AddValue("value", value) else info.AddValue("value", value) info.AddValue("id", id) public new (id, value) = CustomSerializationTestClass(id, value, false) private new (info: SerializationInfo, context: StreamingContext) = CustomSerializationTestClass(info.GetValue("id", typeof) :?> int, info.GetValue("value", typeof<'t>) :?> 't, true) member t.IsConstructedFromSerializationInfo = isConstructedFromSerializationInfo [] val mutable OnSerializingWasCalled: bool [] val mutable OnSerializedWasCalled: bool [] val mutable OnDeserializingWasCalled: bool [] val mutable OnDeserializedWasCalled: bool [] val mutable DeserializationCallbackWasCalled: bool [] member private t.OnSerializing(context: StreamingContext) = t.OnSerializingWasCalled |> False t.OnSerializingWasCalled <- true [] member private t.OnSerialized(context: StreamingContext) = t.OnSerializingWasCalled |> True t.OnSerializedWasCalled |> False t.OnSerializedWasCalled <- true [] member private t.OnDeserializing(context: StreamingContext) = t.OnSerializingWasCalled |> False t.OnDeserializingWasCalled |> False t.OnDeserializingWasCalled <- true [] member private t.OnDeserialized(context: StreamingContext) = t.OnSerializingWasCalled |> False t.OnDeserializingWasCalled |> True t.OnDeserializedWasCalled |> False t.OnDeserializedWasCalled <- true onDeserializedList <- id::onDeserializedList interface IDeserializationCallback with member t.OnDeserialization(sender) = t.OnSerializingWasCalled |> False t.OnDeserializedWasCalled |> True t.DeserializationCallbackWasCalled |> False t.DeserializationCallbackWasCalled <- true override t.Equals(o) = match o with | :? CustomSerializationTestClass<'t> as o -> id = o.Id && recEquals value o.Value | _ -> false override t.GetHashCode() = raise (System.NotImplementedException()) type CustomSerializationTestClassProxyProxy<'t>(id: int, value: 't) = interface IObjectReference with member t.GetRealObject(context) = box (CustomSerializationTestClassWithProxy(id, value)) and CustomSerializationTestClassProxy<'t>(id: int, value: 't) = private new (info: SerializationInfo, context: StreamingContext) = CustomSerializationTestClassProxy(info.GetValue("id", typeof) :?> int, info.GetValue("value", typeof<'t>) :?> 't) [] val mutable OnDeserializingWasCalled: bool [] val mutable DeserializationCallbackWasCalled: bool [] member private t.OnDeserializing(context: StreamingContext) = t.OnDeserializingWasCalled |> False t.OnDeserializingWasCalled <- true interface IDeserializationCallback with member t.OnDeserialization(sender) = t.OnDeserializingWasCalled |> True t.DeserializationCallbackWasCalled |> False t.DeserializationCallbackWasCalled <- true interface ISerializable with member t.GetObjectData(info, context) = raise (System.NotImplementedException()) interface IObjectReference with member t.GetRealObject(context) = t.OnDeserializingWasCalled |> True t.DeserializationCallbackWasCalled |> False box (CustomSerializationTestClassProxyProxy(id, value)) and CustomSerializationTestClassProxyWithOnDeserialized<'t>(id: int, value: 't) = [] val mutable OnDeserializingWasCalled: bool [] val mutable OnDeserializedWasCalled: bool [] val mutable DeserializationCallbackWasCalled: bool [] member private t.OnDeserializing(context: StreamingContext) = t.OnDeserializingWasCalled |> False t.OnDeserializingWasCalled <- true [] member private t.OnDeserialized(context: StreamingContext) = t.OnDeserializingWasCalled |> True t.OnDeserializedWasCalled |> False t.OnDeserializedWasCalled <- true interface IDeserializationCallback with member t.OnDeserialization(sender) = t.OnDeserializedWasCalled |> True t.DeserializationCallbackWasCalled |> False t.DeserializationCallbackWasCalled <- true interface IObjectReference with member t.GetRealObject(context) = t.OnDeserializedWasCalled |> True t.DeserializationCallbackWasCalled |> False box (CustomSerializationTestClassProxyProxy(id, value)) and CustomSerializationTestClassProxy2<'t> = struct val mutable id: int val mutable value: 't member t.Id with get() = t.id and set v = t.id <- v member t.Value with get() = t.value and set v = t.value <- v interface IObjectReference with member t.GetRealObject(context) = box (CustomSerializationTestClassProxyProxy(t.id, t.value)) end and CustomSerializationTestClassWithProxy12<'t>(id_, value_) = inherit CustomSerializationTestClass<'t>(id_, value_) [] static val mutable private ProxyCounter: int interface ISerializable with override t.GetObjectData(info, context) = info.AddValue("id", t.Id) info.AddValue("value", t.Value) info.AddValue("unused", "data") let c = CustomSerializationTestClassWithProxy12<'t>.ProxyCounter CustomSerializationTestClassWithProxy12<'t>.ProxyCounter <- c + 1 if c%3 <> 0 then info.FullTypeName <- typeof>.FullName info.AssemblyName <- typeof>.Assembly.FullName else info.FullTypeName <- typeof>.FullName info.AssemblyName <- typeof>.Assembly.FullName and CustomSerializationTestClassWithProxy<'t>(id_, value_) = inherit CustomSerializationTestClass<'t>(id_, value_) [] static val mutable private ProxyCounter: int interface ISerializable with override t.GetObjectData(info, context) = info.AddValue("id", t.Id) info.AddValue("value", t.Value) info.AddValue("unused", "data") let c = CustomSerializationTestClassWithProxy<'t>.ProxyCounter CustomSerializationTestClassWithProxy<'t>.ProxyCounter <- c + 1 info.FullTypeName <- typeof>.FullName info.AssemblyName <- typeof>.Assembly.FullName type CustomSerializationTestClassWithSimpleProxyBase() = class end type CustomSerializationTestClassWithSimpleProxy() = interface ISerializable with override t.GetObjectData(info, context) = info.FullTypeName <- typeof.FullName info.AssemblyName <- typeof.Assembly.FullName type CustomSerializationTestClassWithInvalidProxy<'t>(id_: int, value_: 't) = inherit CustomSerializationTestClass<'t>(id_, value_) // misses a deserialization constructor interface ISerializable with override t.GetObjectData(info, context) = info.FullTypeName <- typeof>.FullName info.AssemblyName <- typeof>.Assembly.FullName type CustomSerializationTestClassInvalidProxy() = interface ISerializable with member t.GetObjectData(info, context) = raise (System.NotImplementedException()) type CustomSerializationTestClassWithInvalidProxy2<'t>(id: int, value: 't) = interface ISerializable with override t.GetObjectData(info, context) = info.FullTypeName <- typeof.FullName info.AssemblyName <- typeof.Assembly.FullName type CustomSerializationTestClassWithInvalidProxy3<'t>(id: int, value: 't) = interface ISerializable with override t.GetObjectData(info, context) = info.FullTypeName <- typeof.FullName info.AssemblyName <- typeof.Assembly.FullName type ClassWithNonExistentSerializationProxyClass() = interface ISerializable with override t.GetObjectData(info, context) = info.FullTypeName <- "_NonExistentType" info.AssemblyName <- "_NonExistentAssembly" type ClassWithBuggyObjectReferenceImplementation1() = interface IObjectReference with member t.GetRealObject(context) = null type ClassWithBuggyObjectReferenceImplementation2() = interface IObjectReference with member t.GetRealObject(context) = box (ClassWithBuggyObjectReferenceImplementation2()) type ClassWithObjectReferenceImplementationThatReturnsThis() = interface IObjectReference with member t.GetRealObject(context) = box t type ClassWithObjectReferenceImplementationThatReturnsThis2() = interface IObjectReference with member t.GetRealObject(context) = box (ClassWithObjectReferenceImplementationThatReturnsThis()) let testCloners() = let testBlittableCloner() = let v1 = (BlittableType(1, "2", KeyValuePair(3, "4"))) let v2 = Cloner.Create(v1.GetType()).Clone(v1) :?> BlittableType obj.ReferenceEquals(v1, v2) |> False v1.Value1 |> Equal v2.Value1 v1.Value2 |> Equal v2.Value2 v1.Value3 |> Equal v2.Value3 let v3 = Cloner.Create(v1.GetType()).CaptureImage(v1).CreateClone() :?> BlittableType obj.ReferenceEquals(v1, v3) |> False v1.Value1 |> Equal v3.Value1 v1.Value2 |> Equal v3.Value2 v1.Value3 |> Equal v3.Value3 let v1 = (BlittableStructType(1, "2", KeyValuePair(3, "4"))) let v2 = Cloner.Create(v1.GetType()).Clone(v1) :?> BlittableStructType v1.Value1 |> Equal v2.Value1 v1.Value2 |> Equal v2.Value2 v1.Value3 |> Equal v2.Value3 let v3 = Cloner.Create(v1.GetType()).CaptureImage(v1).CreateClone() :?> BlittableStructType v1.Value1 |> Equal v3.Value1 v1.Value2 |> Equal v3.Value2 v1.Value3 |> Equal v3.Value3 testBlittableCloner() let testArrayCloners() = let EqualArray (a: System.Array) (b: System.Array) = let r = b.Rank r |> Equal a.Rank if r = 1 then // the F# equality comparison is bugged for rank-1 arrays with non-zero lower bound let off = a.GetLowerBound(0) (b.GetLowerBound(0)) |> Equal off for i = 0 to a.GetLength(0) - 1 do b.GetValue(off + i) |> Equal (a.GetValue(off + i)) else a |> Equal b let v1 = box ([||] : int[]) Cloner.Create(v1.GetType()).Clone(v1) |> Equal v1 let v2 = box [|0; 1; 2|] Cloner.Create(v1.GetType()).Clone(v2) |> Equal v2 let v2b = System.Array.CreateInstance(typeof, [|3|], [|1|]) v2b.SetValue(1, 1) v2b.SetValue(2, 2) v2b.SetValue(3, 3) Cloner.Create(v2b.GetType()).Clone(v2b) :?> System.Array |> EqualArray v2b let v3 = box [|null; "1"; "2"|] Cloner.Create(v3.GetType()).Clone(v3) |> Equal v3 let v4 = box [|KeyValuePair(1,2); KeyValuePair(3,4)|] Cloner.Create(v4.GetType()).Clone(v4) |> Equal v4 let v5 = box ([||] : option[]) Cloner.Create(v5.GetType()).Clone(v5) |> Equal v5 let v6 = box [|None; Some 1; Some 2|] Cloner.Create(v6.GetType()).Clone(v6) |> Equal v6 let v6b = System.Array.CreateInstance(typeof>, [|3|], [|1|]) v6b.SetValue(None, 1) v6b.SetValue(Some 2, 2) v6b.SetValue(Some 3, 3) Cloner.Create(v6b.GetType()).Clone(v6b) :?> System.Array |> EqualArray v6b let v7 = [|box (Some 1); null; box [|Some 2; Some 3|]|] let cloner = Cloner.Create(v7.GetType()) cloner.Clone(v7) |> Equal (box v7) v7[0] <- box [|Some 4; Some 5|] v7[1] <- box (Some 6) v7[2] <- null cloner.Clone(v7) |> Equal (box v7) let v8 = Array3D.zeroCreate 0 0 0 Cloner.Create(v8.GetType()).Clone(v8) |> Equal (box v8) let v9 = Array3D.zeroCreate 3 4 5 for i = 0 to 2 do for j = 0 to 3 do for k = 0 to 4 do v9[i,j,k] <- i*3*4 + j*5 + k Cloner.Create(v9.GetType()).Clone(v9) |> Equal (box v9) let v10 = System.Array.CreateInstance(typeof, [|3; 5; 7|], [|1; 2; 3|]) for i = 0 to 2 do for j = 0 to 4 do for k = 0 to 6 do let c = i*5*7 + j*7 + k v10.SetValue((if c%3 = 0 then box [|Some c|] elif c%5 = 0 then null else box (Some c)), [|1 + i; 2 + j; 3 + k|]) Cloner.Create(v10.GetType()).Clone(v10) |> Equal (box v10) testArrayCloners() let testNativeSerializationCloner() = let () = let v = TypeWithNoSerializedField() let v2 = Cloner.Create(v.GetType()).Clone(v) :?> TypeWithNoSerializedField v2.GetType() |> Equal (v.GetType()) obj.ReferenceEquals(v, v2) |> False v.OnSerializedWasCalled |> True v.OnDeserializedWasCalled |> False v2.OnSerializedWasCalled |> False v2.OnDeserializedWasCalled |> True let () = let v = NativeSerializationTestClassWithUnorderedEvents(1, (2, "3")) let v2 = Cloner.Create(v.GetType()).Clone(v) :?> _ v2 |> Equal v v.OnSerializedWasCalled |> True v.OnSerializingWasCalled |> True v.OnDeserializingWasCalled |> False v2.OnSerializingWasCalled |> False v2.OnSerializedWasCalled |> False v2.DeserializationCallbackWasCalled |> True let () = let v = NativeSerializationTestClass2(1, box "2", (3, "4")) let cloner = Cloner.Create(v.GetType()) let v2 = cloner.Clone(v) :?> _ v2 |> Equal v v.OnSerializedWasCalled |> True v.OnSerializingWasCalled |> True v.OnDeserializingWasCalled |> False v.OnDeserializedWasCalled |> False v.DeserializationCallbackWasCalled |> False v2.OnSerializingWasCalled |> False v2.OnDeserializedWasCalled |> True v2.DeserializationCallbackWasCalled |> True v.OnSerializingWasCalled <- false; v.OnSerializedWasCalled <- false let v3 = cloner.Clone(v) :?> _ v3 |> Equal v v.Value <- Some 2 v.OnSerializingWasCalled <- false; v.OnSerializedWasCalled <- false let v4 = cloner.Clone(v) :?> _ v4 |> Equal v v.OnSerializingWasCalled <- false; v.OnSerializedWasCalled <- false let v5 = cloner.Clone(v) :?> _ v5 |> Equal v v.OnSerializingWasCalled <- false; v.OnSerializedWasCalled <- false v.Value <- ref 2 let v6 = cloner.Clone(v) :?> _ v6 |> Equal v v.OnSerializingWasCalled <- false; v.OnSerializedWasCalled <- false v.Value <- null let v7 = cloner.Clone(v) :?> _ v7 |> Equal v v.OnSerializingWasCalled <- false; v.OnSerializedWasCalled <- false v.Value <- ref 2 let v8 = cloner.Clone(v) :?> _ v8 |> Equal v v.OnSerializingWasCalled <- false; v.OnSerializedWasCalled <- false v.Value <- "2" let v9 = cloner.Clone(v) :?> _ v9 |> Equal v () testNativeSerializationCloner() let testCustomSerializationCloner() = let () = let v = CustomSerializationTestClass(1, box "2") let cloner = Cloner.Create(v.GetType()) let v2 = cloner.Clone(v) :?> _ v2 |> Equal v v.OnSerializedWasCalled |> True v.OnSerializingWasCalled |> True v.OnDeserializingWasCalled |> False v.OnDeserializedWasCalled |> False v.DeserializationCallbackWasCalled |> False v2.OnSerializingWasCalled |> False v2.OnDeserializedWasCalled |> True v2.DeserializationCallbackWasCalled |> True v.OnSerializingWasCalled <- false; v.OnSerializedWasCalled <- false let v3 = cloner.Clone(v) :?> _ v3 |> Equal v v.Value <- Some 2 v.OnSerializingWasCalled <- false; v.OnSerializedWasCalled <- false let v4 = cloner.Clone(v) :?> _ v4 |> Equal v v.OnSerializingWasCalled <- false; v.OnSerializedWasCalled <- false let v5 = cloner.Clone(v) :?> _ v5 |> Equal v v.OnSerializingWasCalled <- false; v.OnSerializedWasCalled <- false v.Value <- ref 2 let v6 = cloner.Clone(v) :?> _ v6 |> Equal v v.OnSerializingWasCalled <- false; v.OnSerializedWasCalled <- false v.Value <- null let v7 = cloner.Clone(v) :?> _ v7 |> Equal v v.OnSerializingWasCalled <- false; v.OnSerializedWasCalled <- false v.Value <- ref 2 let v8 = cloner.Clone(v) :?> _ v8 |> Equal v v.OnSerializingWasCalled <- false; v.OnSerializedWasCalled <- false v.Value <- "2" let v9 = cloner.Clone(v) :?> _ v9 |> Equal v let () = let v = box (CustomSerializationTestClassWithProxy12(1, KeyValuePair("2", 3))) :?> CustomSerializationTestClass> let cloner = Cloner.Create(v.GetType()) let v2 = cloner.Clone(v) :?> _ v2 |> Equal v v.OnSerializingWasCalled <- false; v.OnSerializedWasCalled <- false let v3 = cloner.Clone(v) :?> _ v3 |> Equal v v.OnSerializingWasCalled <- false; v.OnSerializedWasCalled <- false let v4 = cloner.Clone(v) :?> _ v4 |> Equal v let () = let v = box (CustomSerializationTestClassWithProxy12(1, Some 2)) :?> CustomSerializationTestClass let cloner = Cloner.Create(v.GetType()) let v2 = cloner.Clone(v) :?> _ v2 |> Equal v v.OnSerializingWasCalled <- false; v.OnSerializedWasCalled <- false let v3 = cloner.Clone(v) :?> _ v3 |> Equal v v.OnSerializingWasCalled <- false; v.OnSerializedWasCalled <- false let v4 = cloner.Clone(v) :?> _ v4 |> Equal v let () = let v = CustomSerializationTestClassWithSimpleProxy() let cloner = Cloner.Create(v.GetType()) let v2 = cloner.Clone(v) obj.ReferenceEquals(v, v2) |> False v2.GetType() |> Equal (typeof) let () = let v1 = CustomSerializationTestClassWithProxy12(1, null) let v2 = CustomSerializationTestClassWithProxy12(2, null) v1.Value <- v2 Cloner.Create(v1.GetType()).Clone(v1) |> Equal (box v1) v1.OnSerializingWasCalled <- false; v1.OnSerializedWasCalled <- false v2.OnSerializingWasCalled <- false; v2.OnSerializedWasCalled <- false v1.Value <- null v2.Value <- v1 Cloner.Create(v2.GetType()).Clone(v2) |> Equal (box v2) v1.OnSerializingWasCalled <- false; v1.OnSerializedWasCalled <- false v2.OnSerializingWasCalled <- false; v2.OnSerializedWasCalled <- false v1.Value <- v2 // creates object graph cycle involving IObjectReferences try Cloner.Create(v2.GetType()).Clone(v2) |> ignore Fail() with :? SerializationException -> () let () = let v1 = NativeSerializationTestClass>(1, KeyValuePair()) let v2 = CustomSerializationTestClassWithProxy(2, v1) v1.Value <- KeyValuePair(null, v2) try Cloner.Create(v1.GetType()).Clone(v1) |> ignore Fail() with :? SerializationException -> () let () = let v1 = NativeSerializationTestClass(1, null) let v2 = CustomSerializationTestClassProxyWithOnDeserialized(2, v1) v1.Value <- v2 try Cloner.Create(v1.GetType()).Clone(v1) |> ignore Fail() with :? SerializationException -> () let () = let v1 = NativeSerializationTestClass(1, null) let mutable v2 = CustomSerializationTestClassProxy2() v2.Id <- 2 v2.Value <- v1 v1.Value <- v2 try Cloner.Create(v1.GetType()).Clone(v1) |> ignore Fail() with :? SerializationException -> () try let instance = CustomSerializationTestClassWithInvalidProxy(1, "2") Cloner.Create(instance.GetType()).Clone(instance) |> ignore Fail() with :? SerializationException -> () try let instance = CustomSerializationTestClassWithInvalidProxy2(1, "2") Cloner.Create(instance.GetType()).Clone(instance) |> ignore Fail() with :? SerializationException -> () try let instance = CustomSerializationTestClassWithInvalidProxy3(1, "2") Cloner.Create(instance.GetType()).Clone(instance) |> ignore Fail() with :? SerializationException -> () try let instance = ClassWithNonExistentSerializationProxyClass() Cloner.Create(instance.GetType()).Clone(instance) |> ignore Fail() with :? SerializationException -> () testCustomSerializationCloner() let testObjectReferenceHandling() = let () = let instance = ClassWithObjectReferenceImplementationThatReturnsThis() Cloner.Create(instance.GetType()).Clone(instance) |> ignore let instance = ClassWithObjectReferenceImplementationThatReturnsThis2() Cloner.Create(instance.GetType()).Clone(instance) |> ignore try let instance = ClassWithBuggyObjectReferenceImplementation1() Cloner.Create(instance.GetType()).Clone(instance) |> ignore Fail() with :? SerializationException -> () try let instance = ClassWithBuggyObjectReferenceImplementation2() Cloner.Create(instance.GetType()).Clone(instance) |> ignore Fail() with :? SerializationException -> () testObjectReferenceHandling() let testCloning() = let () = let o2 = NativeSerializationTestClassWithUnorderedEvents(2, null) let o1 = NativeSerializationTestClassWithUnorderedEvents(1, o2) let os = [|o2; o1|] let cloner = Cloner.Create(os.GetType()) let os2 = unbox (cloner.Clone(os)) os2 |> Equal os o1.OnSerializedWasCalled |> True o1.OnDeserializingWasCalled |> False o2.OnSerializedWasCalled |> True o2.OnDeserializingWasCalled |> False let () = let o2 = NativeSerializationTestClass(1, null) let o1 = NativeSerializationTestClass(2, o2) let os = [|o2; o1|] let cloner = Cloner.Create(os.GetType()) onDeserializedList <- [] let os2 = unbox (cloner.Clone(os)) os2 |> Equal os onDeserializedList |> Equal [2; 1] o1.OnSerializedWasCalled |> True o1.OnDeserializingWasCalled |> False o2.OnSerializedWasCalled |> True o2.OnDeserializingWasCalled |> False let () = // cycle 1 let o6_2 = NativeSerializationTestClass(6, null) let o5_2 = NativeSerializationTestClass(5, o6_2) let o4_2 = CustomSerializationTestClassWithProxy(4, o5_2) o6_2.Value <- o4_2 // cycle 2 let o3_1 = NativeSerializationTestClass2(3, null, o4_2) let o2_1 = NativeSerializationTestClass(2, o3_1) let o1_1 = CustomSerializationTestClassWithProxy(1, o2_1) o3_1.Value <- o1_1 onDeserializedList <- [] let cloner = Cloner.Create(o1_1.GetType()) let o = cloner.Clone(o1_1) :?> CustomSerializationTestClassWithProxy o |> Equal o1_1 onDeserializedList |> Equal [2;3;5;6] // the order within the strongly connected components // is implementation defined let () = let o8 = CustomSerializationTestClassWithProxy(8, Some(2)) let o7 = CustomSerializationTestClassWithProxy(7, Some(1)) // cycle 1 let o6_2 = CustomSerializationTestClass(6, null) let o5_2 = CustomSerializationTestClass(5, (o6_2, box o7, box o8)) let o4_2 = CustomSerializationTestClass(4, o5_2) o6_2.Value <- o4_2 // cycle 2 let o3_1 = NativeSerializationTestClass2(3, null, o4_2) let o2_1 = NativeSerializationTestClass(2, o3_1) let o1_1 = NativeSerializationTestClass(1, o2_1) o3_1.Value <- o1_1 let o0 = CustomSerializationTestClass(0, o1_1) onDeserializedList <- [] let os = [|o8; o7; o5_2; o1_1; o0|] : obj[] let cloner = Cloner.Create(os.GetType()) let os2 = cloner.Clone(os) :?> obj[] os2[4] |> Equal os[4] onDeserializedList |> Equal [0;1;2;3;5;6;4] // the order within the strongly connected components // is implementation defined let reset() = onDeserializedList <- [] o0.OnSerializingWasCalled <- false; o0.OnSerializedWasCalled <- false o1_1.OnSerializingWasCalled <- false; o1_1.OnSerializedWasCalled <- false o2_1.OnSerializingWasCalled <- false; o2_1.OnSerializedWasCalled <- false o3_1.OnSerializingWasCalled <- false; o3_1.OnSerializedWasCalled <- false o4_2.OnSerializingWasCalled <- false; o4_2.OnSerializedWasCalled <- false o5_2.OnSerializingWasCalled <- false; o5_2.OnSerializedWasCalled <- false o6_2.OnSerializingWasCalled <- false; o6_2.OnSerializedWasCalled <- false o7.OnSerializingWasCalled <- false; o7.OnSerializedWasCalled <- false o8.OnSerializingWasCalled <- false; o8.OnSerializedWasCalled <- false reset() let os3 = cloner.Clone(os) :?> obj[] os3[4] |> Equal os[4] onDeserializedList |> Equal [0;1;2;3;5;6;4] reset() let o = Cloner.Create(o1_1.GetType()).Clone(o1_1) onDeserializedList |> Equal [1;2;3;4;5;6] o |> Equal (box o1_1) try Cloner.Create(typeof) |> ignore with :? System.Runtime.Serialization.SerializationException -> () try Cloner.Create(typeof).Clone(Some "") |> ignore; Fail() with :? System.ArgumentException -> () let encodingTests() = for e in System.Text.Encoding.GetEncodings() do let encoding = e.GetEncoding() let bs = encoding.GetBytes("test test") let decoder = encoding.GetDecoder() let cs = Array.zeroCreate 20 new string(cs, 0, decoder.GetChars(bs, 0, bs.Length, cs, 0)) |> Equal "test test" let cloner = FParsec.Cloning.Cloner.Create(decoder.GetType()) let image = cloner.CaptureImage(decoder) let decoder2 = image.CreateClone() :?> System.Text.Decoder new string(cs, 0, decoder2.GetChars(bs, 0, bs.Length, cs, 0)) |> Equal "test test" let run() = testStronglyConnectedComponents() testComputeTopologicalOrder() testCloningEventHandlers() testGetSerializedFields() testCreateFieldValuesGetter() testCreateFieldValuesSetter() testCreateISerializableConstructorCaller() testCloners() testCloning() encodingTests() #endif ================================================ FILE: Test/HexFloatTests.fs ================================================ // Copyright (c) Stephan Tolksdorf 2008-2010 // License: Simplified BSD License. See accompanying documentation. module FParsec.Test.HexFloatTests open FParsec.Test.Test let floatToHexString = FParsec.CharParsers.floatToHexString let floatOfHexString = FParsec.CharParsers.floatOfHexString let float32ToHexString = FParsec.CharParsers.float32ToHexString let float32OfHexString = FParsec.CharParsers.float32OfHexString let testDoubleHexFloat() = /// bitwise equal let BEqual a b = Equal (System.BitConverter.DoubleToInt64Bits(a)) (System.BitConverter.DoubleToInt64Bits(b)) // float32ToHexString /////////////////// let max = System.Double.MaxValue let eps = System.Math.Pow(2.0, -53.0) let min = System.Math.Pow(2.0, -1022.0) // smallest normal number let minmin = System.Math.Pow(2.0, -1074.0) // smallest subnormal number floatToHexString 0.0 |> Equal "0x0.0p0" floatToHexString -0.0 |> Equal "-0x0.0p0" floatToHexString 1.0 |> Equal "0x1.0p0" floatToHexString -1.0 |> Equal "-0x1.0p0" floatToHexString (1.0 + 4.*eps) |> Equal "0x1.0000000000002p0" floatToHexString (1.0 + 2.*eps) |> Equal "0x1.0000000000001p0" floatToHexString (1.0 - eps) |> Equal "0x1.fffffffffffffp-1" floatToHexString (1.0 - 2.*eps) |> Equal "0x1.ffffffffffffep-1" floatToHexString min |> Equal "0x1.0p-1022" floatToHexString (min + minmin) |> Equal "0x1.0000000000001p-1022" floatToHexString (min - minmin) |> Equal "0x0.fffffffffffffp-1022" floatToHexString (min - 2.*minmin) |> Equal "0x0.ffffffffffffep-1022" floatToHexString (minmin) |> Equal "0x0.0000000000001p-1022" floatToHexString max |> Equal "0x1.fffffffffffffp1023" floatToHexString System.Double.PositiveInfinity |> Equal "Infinity" floatToHexString System.Double.NegativeInfinity |> Equal "-Infinity" floatToHexString System.Double.NaN |> Equal "NaN" // floatOfHexString /////////////////// try floatOfHexString null |> ignore; Fail() with :? System.ArgumentNullException -> () let checkFormatError s = try floatOfHexString s |> ignore; Fail () with :? System.FormatException -> () checkFormatError "" checkFormatError "." checkFormatError "p1" checkFormatError ".p1" checkFormatError "1x1" checkFormatError "x1" checkFormatError "0xx1" checkFormatError "0x/" checkFormatError "0x:" checkFormatError "0x@" checkFormatError "0xG" checkFormatError "0x`" checkFormatError "0xg" checkFormatError "0.1pp1" checkFormatError "0.1p+" checkFormatError "0.1p-" checkFormatError "0.fg" checkFormatError "1.0 " checkFormatError "1.." checkFormatError "1.0." floatOfHexString "Inf" |> Equal System.Double.PositiveInfinity floatOfHexString "iNf" |> Equal System.Double.PositiveInfinity floatOfHexString "Infinity" |> Equal System.Double.PositiveInfinity floatOfHexString "+InFinITy" |> Equal System.Double.PositiveInfinity floatOfHexString "-Inf" |> Equal (-System.Double.PositiveInfinity) floatOfHexString "-InFinITy" |> Equal (-System.Double.PositiveInfinity) floatOfHexString "NaN" |> BEqual System.Double.NaN floatOfHexString "-nAn" |> BEqual System.Double.NaN floatOfHexString "+Nan" |> BEqual System.Double.NaN floatOfHexString "001" |> Equal 1.0 floatOfHexString "1." |> Equal 1.0 floatOfHexString "1.0" |> Equal 1.0 floatOfHexString "0x1" |> Equal 1.0 floatOfHexString "0X1" |> Equal 1.0 floatOfHexString "0x0001" |> Equal 1.0 floatOfHexString "0x1." |> Equal 1.0 floatOfHexString "0x1.0" |> Equal 1.0 floatOfHexString "0x001.0" |> Equal 1.0 floatOfHexString "1.0p0" |> Equal 1.0 floatOfHexString "1.0P0" |> Equal 1.0 floatOfHexString "001.00p+000" |> Equal 1.0 floatOfHexString ".100p+004" |> Equal 1.0 floatOfHexString ".0100p+008" |> Equal 1.0 floatOfHexString "00.100p+004" |> Equal 1.0 floatOfHexString "00.0100p+008" |> Equal 1.0 floatOfHexString "0010.0p-004" |> Equal 1.0 floatOfHexString "0x1.0p0" |> Equal 1.0 floatOfHexString "0X1.0P0" |> Equal 1.0 floatOfHexString "0x001.00p+000" |> Equal 1.0 floatOfHexString "0x00.100p+004" |> Equal 1.0 floatOfHexString "0x.100p+004" |> Equal 1.0 floatOfHexString "0x0010.0p-004" |> Equal 1.0 floatOfHexString "-001" |> Equal -1.0 floatOfHexString "-1." |> Equal -1.0 floatOfHexString "-1.0" |> Equal -1.0 floatOfHexString "-0x1" |> Equal -1.0 floatOfHexString "-0X1" |> Equal -1.0 floatOfHexString "-0x0001" |> Equal -1.0 floatOfHexString "-0x1." |> Equal -1.0 floatOfHexString "-0x1.0" |> Equal -1.0 floatOfHexString "-0x001.0" |> Equal -1.0 floatOfHexString "-1.0p0" |> Equal -1.0 floatOfHexString "-1.0P0" |> Equal -1.0 floatOfHexString "-001.00p+000" |> Equal -1.0 floatOfHexString "-.100p+004" |> Equal -1.0 floatOfHexString "-.0100p+008" |> Equal -1.0 floatOfHexString "-00.100p+004" |> Equal -1.0 floatOfHexString "-00.0100p+008" |> Equal -1.0 floatOfHexString "-0010.0p-004" |> Equal -1.0 floatOfHexString "-0x1.0p0" |> Equal -1.0 floatOfHexString "-0X1.0P0" |> Equal -1.0 floatOfHexString "-0x001.00p+000" |> Equal -1.0 floatOfHexString "-0x00.100p+004" |> Equal -1.0 floatOfHexString "-0x.100p+004" |> Equal -1.0 floatOfHexString "-0x0010.0p-004" |> Equal -1.0 floatOfHexString "+001" |> Equal 1.0 floatOfHexString "+1." |> Equal 1.0 floatOfHexString "+1.0" |> Equal 1.0 floatOfHexString "+.100p+004" |> Equal 1.0 floatOfHexString "+0x0010.0p-004" |> Equal 1.0 floatOfHexString "0" |> BEqual 0. floatOfHexString "0." |> BEqual 0. floatOfHexString "0.0" |> BEqual 0. floatOfHexString "00.0" |> BEqual 0. floatOfHexString "00.000" |> BEqual 0. floatOfHexString "00.000p0" |> BEqual 0. floatOfHexString "00.000p99999999" |> BEqual 0. floatOfHexString "0x0" |> BEqual 0. floatOfHexString "0x0." |> BEqual 0. floatOfHexString "0x0.0" |> BEqual 0. floatOfHexString "0x00.0" |> BEqual 0. floatOfHexString "0x00.000" |> BEqual 0. floatOfHexString "0x00.000p0" |> BEqual 0. floatOfHexString "0x00.000p99999999" |> BEqual 0. floatOfHexString "100P-2147483639" |> BEqual 0. floatOfHexString "100P-2147483640" |> BEqual 0. floatOfHexString "100P-2147483647" |> BEqual 0. floatOfHexString "100P-9999999999999999999999999" |> BEqual 0. floatOfHexString "0.001P-2147483639" |> BEqual 0. floatOfHexString "0.001P-2147483640" |> BEqual 0. floatOfHexString "0.001P-2147483647" |> BEqual 0. floatOfHexString "0.001P-9999999999999999999999999" |> BEqual 0. floatOfHexString "-0" |> BEqual -0.0 floatOfHexString "-0." |> BEqual -0.0 floatOfHexString "-0.0" |> BEqual -0.0 floatOfHexString "-00.0" |> BEqual -0.0 floatOfHexString "-00.000" |> BEqual -0.0 floatOfHexString "-00.000p0" |> BEqual -0. floatOfHexString "-00.000p99999999" |> BEqual -0. floatOfHexString "-0x0" |> BEqual -0.0 floatOfHexString "-0x0." |> BEqual -0.0 floatOfHexString "-0x0.0" |> BEqual -0.0 floatOfHexString "-0x00.0" |> BEqual -0.0 floatOfHexString "-0x00.000" |> BEqual -0.0 floatOfHexString "-0x00.000p0" |> BEqual -0.0 floatOfHexString "-0x00.000p0" |> BEqual -0. floatOfHexString "-0x00.000p99999999" |> BEqual -0. floatOfHexString "-100P-2147483639" |> BEqual -0. floatOfHexString "-100P-2147483640" |> BEqual -0. floatOfHexString "-100P-2147483647" |> BEqual -0. floatOfHexString "-100P-9999999999999999999999999" |> BEqual -0. floatOfHexString "-0.001P-2147483639" |> BEqual -0. floatOfHexString "-0.001P-2147483640" |> BEqual -0. floatOfHexString "-0.001P-2147483647" |> BEqual -0. floatOfHexString "-0.001P-9999999999999999999999999" |> BEqual -0. floatOfHexString "0x0123" |> Equal (double 0x0123) floatOfHexString "0x4567" |> Equal (double 0x4567) floatOfHexString "0x89ab" |> Equal (double 0x89ab) floatOfHexString "0x89AB" |> Equal (double 0x89ab) floatOfHexString "0xcdef" |> Equal (double 0xcdef) floatOfHexString "0xCDEF" |> Equal (double 0xcdef) let v = floatOfHexString "0x1.23456789abcde" floatOfHexString "0x123.456789abcde00p-8" |> Equal v floatOfHexString "0x91.a2b3c4d5e6f00p-7" |> Equal v floatOfHexString "0x48.d159e26af3780p-6" |> Equal v floatOfHexString "0x24.68acf13579bc0p-5" |> Equal v floatOfHexString "0x12.3456789abcde0p-4" |> Equal v floatOfHexString "0x9.1a2b3c4d5e6fp-3" |> Equal v floatOfHexString "0x4.8d159e26af378p-2" |> Equal v floatOfHexString "0x2.468acf13579bcp-1" |> Equal v floatOfHexString "0x.91a2b3c4d5e6f0p+1" |> Equal v floatOfHexString "0x.48d159e26af378p+2" |> Equal v floatOfHexString "0x.2468acf13579bcp+3" |> Equal v floatOfHexString "0x.123456789abcdep+4" |> Equal v floatOfHexString "0x.091a2b3c4d5e6f0p+5" |> Equal v floatOfHexString "0x.048d159e26af378p+6" |> Equal v floatOfHexString "0x.02468acf13579bcp+7" |> Equal v floatOfHexString "0x.0123456789abcdep+8" |> Equal v // near max floatOfHexString "0x1.fffffffffffffp1023" |> Equal max floatOfHexString "0x.1fffffffffffffp1027" |> Equal max floatOfHexString "0x.01fffffffffffffp1031" |> Equal max floatOfHexString "0x1f.ffffffffffffp1019" |> Equal max floatOfHexString "0x1fffffffffffff.p971" |> Equal max floatOfHexString "-0x1fffffffffffff000.p959" |> Equal (-max) floatOfHexString "0x1.fffffffffffff5p1023" |> Equal max floatOfHexString "0x1.fffffffffffff6p1023" |> Equal max floatOfHexString "0x1.fffffffffffff7p1023" |> Equal max floatOfHexString "0x1.fffffffffffff7ffffffffp1023" |> Equal max let checkOverflow s = try floatOfHexString s |> ignore; Fail () with :? System.OverflowException -> () checkOverflow "0x1.fffffffffffff8p1023" checkOverflow "0x1.fffffffffffff800000p1023" checkOverflow "0x1.ffffffffffffffp1023" checkOverflow "0x1p1024" checkOverflow "100P2147483639" checkOverflow "100P2147483640" checkOverflow "100P2147483647" checkOverflow "100P9999999999999999999999999" checkOverflow "0.001P2147483639" checkOverflow "0.001P2147483640" checkOverflow "0.001P2147483647" checkOverflow "0.001P9999999999999999999999999" // near 1 floatOfHexString "0x1.0000000000000f" |> Equal (1.0 + 2.*eps) floatOfHexString "0x1.0000000000000e" |> Equal (1.0 + 2.*eps) floatOfHexString "0x1.0000000000000d" |> Equal (1.0 + 2.*eps) floatOfHexString "0x1.0000000000000c" |> Equal (1.0 + 2.*eps) floatOfHexString "0x2.00000000000018p-1" |> Equal (1.0 + 2.*eps) floatOfHexString "0x4.0000000000003p-2" |> Equal (1.0 + 2.*eps) floatOfHexString "0x8.0000000000006p-3" |> Equal (1.0 + 2.*eps) floatOfHexString "0x1.0000000000000b" |> Equal (1.0 + 2.*eps) floatOfHexString "0x1.0000000000000a" |> Equal (1.0 + 2.*eps) floatOfHexString "0x2.00000000000014p-1" |> Equal (1.0 + 2.*eps) floatOfHexString "0x4.00000000000028p-2" |> Equal (1.0 + 2.*eps) floatOfHexString "0x8.0000000000005p-3" |> Equal (1.0 + 2.*eps) floatOfHexString "0x1.00000000000009" |> Equal (1.0 + 2.*eps) floatOfHexString "0x2.00000000000012p-1" |> Equal (1.0 + 2.*eps) floatOfHexString "0x4.00000000000024p-2" |> Equal (1.0 + 2.*eps) floatOfHexString "0x8.00000000000048p-3" |> Equal (1.0 + 2.*eps) floatOfHexString "0x1.000000000000088" |> Equal (1.0 + 2.*eps) floatOfHexString "0x2.00000000000011p-1" |> Equal (1.0 + 2.*eps) floatOfHexString "0x4.00000000000022p-2" |> Equal (1.0 + 2.*eps) floatOfHexString "0x8.00000000000044p-3" |> Equal (1.0 + 2.*eps) floatOfHexString "0x1.000000000000084" |> Equal (1.0 + 2.*eps) floatOfHexString "0x2.000000000000108p-1" |> Equal (1.0 + 2.*eps) floatOfHexString "0x4.00000000000021p-2" |> Equal (1.0 + 2.*eps) floatOfHexString "0x8.00000000000042p-3" |> Equal (1.0 + 2.*eps) floatOfHexString "0x1.000000000000082" |> Equal (1.0 + 2.*eps) floatOfHexString "0x2.000000000000104p-1" |> Equal (1.0 + 2.*eps) floatOfHexString "0x4.000000000000208p-2" |> Equal (1.0 + 2.*eps) floatOfHexString "0x8.00000000000041p-3" |> Equal (1.0 + 2.*eps) floatOfHexString "0x1.00000000000008" |> Equal (1.0) // round towards even floatOfHexString "0x2.0000000000001p-1" |> Equal (1.0) floatOfHexString "0x4.0000000000002p-2" |> Equal (1.0) floatOfHexString "0x8.0000000000004p-3" |> Equal (1.0) floatOfHexString "0x1.0000000000000800000010000" |> Equal (1.0 + 2.*eps) floatOfHexString "0x1.00000000000007ffffffffff" |> Equal (1.0) floatOfHexString "0x1.00000000000007" |> Equal (1.0) floatOfHexString "0x1.00000000000006" |> Equal (1.0) floatOfHexString "0x1.00000000000005" |> Equal (1.0) floatOfHexString "0x1.00000000000004" |> Equal (1.0) floatOfHexString "0x1.00000000000003" |> Equal (1.0) floatOfHexString "0x1.00000000000002" |> Equal (1.0) floatOfHexString "0x1.00000000000001" |> Equal (1.0) floatOfHexString "0x1.00000000000000" |> Equal (1.0) floatOfHexString "0x1.ffffffffffffffffP-1" |> Equal (1.0) floatOfHexString "0x1.fffffffffffffeP-1" |> Equal (1.0) floatOfHexString "0x1.fffffffffffffdP-1" |> Equal (1.0) floatOfHexString "0x1.fffffffffffffcP-1" |> Equal (1.0) floatOfHexString "0x1.fffffffffffffbP-1" |> Equal (1.0) floatOfHexString "0x1.fffffffffffffaP-1" |> Equal (1.0) floatOfHexString "0x1.fffffffffffff9P-1" |> Equal (1.0) floatOfHexString "0x1.fffffffffffff8P-1" |> Equal (1.0) // round towards even floatOfHexString "0x1.fffffffffffff800P-1" |> Equal (1.0) floatOfHexString "0x1.fffffffffffff7ffffffP-1" |> Equal (1.0 - eps) floatOfHexString "0x1.fffffffffffff7000001P-1" |> Equal (1.0 - eps) floatOfHexString "0x1.fffffffffffff7P-1" |> Equal (1.0 - eps) floatOfHexString "0x1.fffffffffffff6P-1" |> Equal (1.0 - eps) floatOfHexString "0x1.fffffffffffff5P-1" |> Equal (1.0 - eps) floatOfHexString "0x1.fffffffffffff4P-1" |> Equal (1.0 - eps) floatOfHexString "0x1.fffffffffffff3P-1" |> Equal (1.0 - eps) floatOfHexString "0x1.fffffffffffff2P-1" |> Equal (1.0 - eps) floatOfHexString "0x1.fffffffffffff1P-1" |> Equal (1.0 - eps) floatOfHexString "0x1.fffffffffffff0P-1" |> Equal (1.0 - eps) floatOfHexString "0x1.ffffffffffffefP-1" |> Equal (1.0 - eps) floatOfHexString "0x1.ffffffffffffe8001P-1" |> Equal (1.0 - eps) floatOfHexString "0x1.ffffffffffffe8P-1" |> Equal (1.0 - 2.*eps) // round towards even floatOfHexString "0x1.ffffffffffffe80P-1" |> Equal (1.0 - 2.*eps) floatOfHexString "0x1.ffffffffffffe7ffffP-1" |> Equal (1.0 - 2.*eps) floatOfHexString "0x1.ffffffffffffe70P-1" |> Equal (1.0 - 2.*eps) floatOfHexString "0x1.ffffffffffffe1P-1" |> Equal (1.0 - 2.*eps) floatOfHexString "0x1.0000000000000fP-1022" |> Equal (min + minmin) floatOfHexString "0x1.0000000000000eP-1022" |> Equal (min + minmin) floatOfHexString "0x1.0000000000000dP-1022" |> Equal (min + minmin) floatOfHexString "0x1.0000000000000cP-1022" |> Equal (min + minmin) floatOfHexString "0x1.0000000000000bP-1022" |> Equal (min + minmin) floatOfHexString "0x1.0000000000000aP-1022" |> Equal (min + minmin) floatOfHexString "0x1.00000000000009P-1022" |> Equal (min + minmin) floatOfHexString "0x1.00000000000008001P-1022" |> Equal (min + minmin) floatOfHexString "0x1.00000000000008P-1022" |> Equal (min) // round towards even floatOfHexString "0x1.000000000000080P-1022" |> Equal (min) floatOfHexString "0x1.00000000000007ffffP-1022" |> Equal (min) floatOfHexString "0x1.00000000000007P-1022" |> Equal (min) floatOfHexString "0x1.00000000000006P-1022" |> Equal (min) floatOfHexString "0x1.00000000000005P-1022" |> Equal (min) floatOfHexString "0x1.00000000000004P-1022" |> Equal (min) floatOfHexString "0x1.00000000000003P-1022" |> Equal (min) floatOfHexString "0x1.00000000000002P-1022" |> Equal (min) floatOfHexString "0x1.00000000000001P-1022" |> Equal (min) floatOfHexString "0x1.00000000000000P-1022" |> Equal (min) floatOfHexString "0x0.ffffffffffffffP-1022" |> Equal (min) floatOfHexString "0x0.fffffffffffffeP-1022" |> Equal (min) floatOfHexString "0x0.fffffffffffffdP-1022" |> Equal (min) floatOfHexString "0x0.fffffffffffffcP-1022" |> Equal (min) floatOfHexString "0x0.fffffffffffffbP-1022" |> Equal (min) floatOfHexString "0x0.fffffffffffffaP-1022" |> Equal (min) floatOfHexString "0x0.fffffffffffff9P-1022" |> Equal (min) floatOfHexString "0x0.fffffffffffff8P-1022" |> Equal (min) // round towards even floatOfHexString "0x0.fffffffffffff7fffP-1022" |> Equal (min - minmin) floatOfHexString "0x0.fffffffffffff7P-1022" |> Equal (min - minmin) floatOfHexString "0x0.fffffffffffff6P-1022" |> Equal (min - minmin) floatOfHexString "0x0.fffffffffffff5P-1022" |> Equal (min - minmin) floatOfHexString "0x0.fffffffffffff4P-1022" |> Equal (min - minmin) floatOfHexString "0x0.fffffffffffff3P-1022" |> Equal (min - minmin) floatOfHexString "0x0.fffffffffffff2P-1022" |> Equal (min - minmin) floatOfHexString "0x0.fffffffffffff1P-1022" |> Equal (min - minmin) floatOfHexString "0x0.fffffffffffff0P-1022" |> Equal (min - minmin) floatOfHexString "0x0.ffffffffffffefP-1022" |> Equal (min - minmin) floatOfHexString "0x0.ffffffffffffeeP-1022" |> Equal (min - minmin) floatOfHexString "0x0.ffffffffffffedP-1022" |> Equal (min - minmin) floatOfHexString "0x0.ffffffffffffecP-1022" |> Equal (min - minmin) floatOfHexString "0x0.ffffffffffffebP-1022" |> Equal (min - minmin) floatOfHexString "0x0.ffffffffffffeaP-1022" |> Equal (min - minmin) floatOfHexString "0x0.ffffffffffffe9P-1022" |> Equal (min - minmin) floatOfHexString "0x0.ffffffffffffe8001P-1022" |> Equal (min - minmin) floatOfHexString "0x0.ffffffffffffe8P-1022" |> Equal (min - 2.*minmin) // round towards even floatOfHexString "0x0.ffffffffffffe80P-1022" |> Equal (min - 2.*minmin) // round towards even floatOfHexString "0x0.ffffffffffffe7ffP-1022" |> Equal (min - 2.*minmin) floatOfHexString "0x0.00000000000019P-1022" |> Equal (2.*minmin) floatOfHexString "0x0.00000000000018P-1022" |> Equal (2.*minmin) // round towards even floatOfHexString "0x0.00000000000017ffP-1022" |> Equal (minmin) floatOfHexString "0x0.0000000000001P-1022" |> Equal (minmin) floatOfHexString "0x0.00000000000010P-1022" |> Equal (minmin) floatOfHexString "0x0.00000000000008001P-1022" |> Equal (minmin) floatOfHexString "0x0.00000000000008P-1022" |> Equal (0.) // round towards even floatOfHexString "0x0.00000000000007ffffP-1022" |> Equal (0.) floatOfHexString "0x1.P-1075" |> Equal (0.) // round trip checking ////////////////////// let rand = System.Random(123) let buffer = Array.zeroCreate 8 let randomFloat() = rand.NextBytes(buffer) System.BitConverter.ToDouble(buffer, 0) for i = 0 to 100000 do let f = randomFloat() let s = floatToHexString f let f2 = floatOfHexString s True (f = f2 || f <> f) let testSingleHexFloat() = /// bitwise equal let BEqual (a: float32) (b: float32) = Equal (System.BitConverter.GetBytes(a)) (System.BitConverter.GetBytes(b)) // float32ToHexString /////////////////// let max = System.Single.MaxValue let eps = (float32) (System.Math.Pow(2.0, -24.0)) let min = (float32) (System.Math.Pow(2.0, -126.0)) // smallest normal number let minmin = (float32) (System.Math.Pow(2.0, -149.0)) // smallest subnormal number float32ToHexString 0.0f |> Equal "0x0.0p0" float32ToHexString -0.0f |> Equal "-0x0.0p0" float32ToHexString 1.0f |> Equal "0x1.0p0" float32ToHexString -1.0f |> Equal "-0x1.0p0" float32ToHexString (1.0f + 4.f*eps) |> Equal "0x1.000004p0" float32ToHexString (1.0f + 2.f*eps) |> Equal "0x1.000002p0" float32ToHexString (1.0f - eps) |> Equal "0x1.fffffep-1" float32ToHexString (1.0f - 2.f*eps) |> Equal "0x1.fffffcp-1" float32ToHexString min |> Equal "0x1.0p-126" float32ToHexString (min + minmin) |> Equal "0x1.000002p-126" float32ToHexString (min - minmin) |> Equal "0x0.fffffep-126" float32ToHexString (min - 2.f*minmin) |> Equal "0x0.fffffcp-126" float32ToHexString (minmin) |> Equal "0x0.000002p-126" float32ToHexString max |> Equal "0x1.fffffep127" float32ToHexString System.Single.PositiveInfinity |> Equal "Infinity" float32ToHexString System.Single.NegativeInfinity |> Equal "-Infinity" float32ToHexString System.Single.NaN |> Equal "NaN" // float32OfHexString /////////////////// try float32OfHexString null |> ignore; Fail() with :? System.ArgumentNullException -> () let checkFormatError s = try float32OfHexString s |> ignore; Fail () with :? System.FormatException -> () checkFormatError "" checkFormatError "." checkFormatError "p1" checkFormatError ".p1" checkFormatError "1x1" checkFormatError "x1" checkFormatError "0xx1" checkFormatError "0x/" checkFormatError "0x:" checkFormatError "0x@" checkFormatError "0xG" checkFormatError "0x`" checkFormatError "0xg" checkFormatError "0.1pp1" checkFormatError "0.1p+" checkFormatError "0.1p-" checkFormatError "0.fg" checkFormatError "1.0 " checkFormatError "1.." checkFormatError "1.0." float32OfHexString "Inf" |> Equal System.Single.PositiveInfinity float32OfHexString "iNf" |> Equal System.Single.PositiveInfinity float32OfHexString "Infinity" |> Equal System.Single.PositiveInfinity float32OfHexString "+InFinITy" |> Equal System.Single.PositiveInfinity float32OfHexString "-Inf" |> Equal (-System.Single.PositiveInfinity) float32OfHexString "-InFinITy" |> Equal (-System.Single.PositiveInfinity) float32OfHexString "NaN" |> BEqual System.Single.NaN float32OfHexString "-nAn" |> BEqual System.Single.NaN float32OfHexString "+Nan" |> BEqual System.Single.NaN float32OfHexString "001" |> Equal 1.0f float32OfHexString "1." |> Equal 1.0f float32OfHexString "1.0" |> Equal 1.0f float32OfHexString "0x1" |> Equal 1.0f float32OfHexString "0X1" |> Equal 1.0f float32OfHexString "0x0001" |> Equal 1.0f float32OfHexString "0x1." |> Equal 1.0f float32OfHexString "0x1.0" |> Equal 1.0f float32OfHexString "0x001.0" |> Equal 1.0f float32OfHexString "1.0p0" |> Equal 1.0f float32OfHexString "1.0P0" |> Equal 1.0f float32OfHexString "001.00p+000" |> Equal 1.0f float32OfHexString ".100p+004" |> Equal 1.0f float32OfHexString ".0100p+008" |> Equal 1.0f float32OfHexString "00.100p+004" |> Equal 1.0f float32OfHexString "00.0100p+008" |> Equal 1.0f float32OfHexString "0010.0p-004" |> Equal 1.0f float32OfHexString "0x1.0p0" |> Equal 1.0f float32OfHexString "0X1.0P0" |> Equal 1.0f float32OfHexString "0x001.00p+000" |> Equal 1.0f float32OfHexString "0x00.100p+004" |> Equal 1.0f float32OfHexString "0x.100p+004" |> Equal 1.0f float32OfHexString "0x0010.0p-004" |> Equal 1.0f float32OfHexString "-001" |> Equal -1.0f float32OfHexString "-1." |> Equal -1.0f float32OfHexString "-1.0" |> Equal -1.0f float32OfHexString "-0x1" |> Equal -1.0f float32OfHexString "-0X1" |> Equal -1.0f float32OfHexString "-0x0001" |> Equal -1.0f float32OfHexString "-0x1." |> Equal -1.0f float32OfHexString "-0x1.0" |> Equal -1.0f float32OfHexString "-0x001.0" |> Equal -1.0f float32OfHexString "-1.0p0" |> Equal -1.0f float32OfHexString "-1.0P0" |> Equal -1.0f float32OfHexString "-001.00p+000" |> Equal -1.0f float32OfHexString "-.100p+004" |> Equal -1.0f float32OfHexString "-.0100p+008" |> Equal -1.0f float32OfHexString "-00.100p+004" |> Equal -1.0f float32OfHexString "-00.0100p+008" |> Equal -1.0f float32OfHexString "-0010.0p-004" |> Equal -1.0f float32OfHexString "-0x1.0p0" |> Equal -1.0f float32OfHexString "-0X1.0P0" |> Equal -1.0f float32OfHexString "-0x001.00p+000" |> Equal -1.0f float32OfHexString "-0x00.100p+004" |> Equal -1.0f float32OfHexString "-0x.100p+004" |> Equal -1.0f float32OfHexString "-0x0010.0p-004" |> Equal -1.0f float32OfHexString "+001" |> Equal 1.0f float32OfHexString "+1." |> Equal 1.0f float32OfHexString "+1.0" |> Equal 1.0f float32OfHexString "+.100p+004" |> Equal 1.0f float32OfHexString "+0x0010.0p-004" |> Equal 1.0f float32OfHexString "0" |> BEqual 0.f float32OfHexString "0." |> BEqual 0.f float32OfHexString "0.0" |> BEqual 0.f float32OfHexString "00.0" |> BEqual 0.f float32OfHexString "00.000" |> BEqual 0.f float32OfHexString "00.000p0" |> BEqual 0.f float32OfHexString "00.000p99999999" |> BEqual 0.f float32OfHexString "0x0" |> BEqual 0.f float32OfHexString "0x0." |> BEqual 0.f float32OfHexString "0x0.0" |> BEqual 0.f float32OfHexString "0x00.0" |> BEqual 0.f float32OfHexString "0x00.000" |> BEqual 0.f float32OfHexString "0x00.000p0" |> BEqual 0.f float32OfHexString "0x00.000p99999999" |> BEqual 0.f float32OfHexString "100P-2147483639" |> BEqual 0.f float32OfHexString "100P-2147483640" |> BEqual 0.f float32OfHexString "100P-2147483647" |> BEqual 0.f float32OfHexString "100P-9999999999999999999999999" |> BEqual 0.f float32OfHexString "0.001P-2147483639" |> BEqual 0.f float32OfHexString "0.001P-2147483640" |> BEqual 0.f float32OfHexString "0.001P-2147483647" |> BEqual 0.f float32OfHexString "0.001P-9999999999999999999999999" |> BEqual 0.f float32OfHexString "-0" |> BEqual -0.0f float32OfHexString "-0." |> BEqual -0.0f float32OfHexString "-0.0" |> BEqual -0.0f float32OfHexString "-00.0" |> BEqual -0.0f float32OfHexString "-00.000" |> BEqual -0.0f float32OfHexString "-00.000p0" |> BEqual -0.f float32OfHexString "-00.000p99999999" |> BEqual -0.f float32OfHexString "-0x0" |> BEqual -0.0f float32OfHexString "-0x0." |> BEqual -0.0f float32OfHexString "-0x0.0" |> BEqual -0.0f float32OfHexString "-0x00.0" |> BEqual -0.0f float32OfHexString "-0x00.000" |> BEqual -0.0f float32OfHexString "-0x00.000p0" |> BEqual -0.0f float32OfHexString "-0x00.000p0" |> BEqual -0.f float32OfHexString "-0x00.000p99999999" |> BEqual -0.f float32OfHexString "-100P-2147483639" |> BEqual -0.f float32OfHexString "-100P-2147483640" |> BEqual -0.f float32OfHexString "-100P-2147483647" |> BEqual -0.f float32OfHexString "-100P-9999999999999999999999999" |> BEqual -0.f float32OfHexString "-0.001P-2147483639" |> BEqual -0.f float32OfHexString "-0.001P-2147483640" |> BEqual -0.f float32OfHexString "-0.001P-2147483647" |> BEqual -0.f float32OfHexString "-0.001P-9999999999999999999999999" |> BEqual -0.f float32OfHexString "0x0123" |> Equal (single 0x0123) float32OfHexString "0x4567" |> Equal (single 0x4567) float32OfHexString "0x89ab" |> Equal (single 0x89ab) float32OfHexString "0x89AB" |> Equal (single 0x89ab) float32OfHexString "0xcdef" |> Equal (single 0xcdef) float32OfHexString "0xCDEF" |> Equal (single 0xcdef) let v = float32OfHexString "0x1.23456e" float32OfHexString "0x123.456e00p-8" |> Equal v float32OfHexString "0x91.a2b700p-7" |> Equal v float32OfHexString "0x48.d15b80p-6" |> Equal v float32OfHexString "0x24.68adc0p-5" |> Equal v float32OfHexString "0x12.3456e0p-4" |> Equal v float32OfHexString "0x9.1a2b70p-3" |> Equal v float32OfHexString "0x4.8d15b8p-2" |> Equal v float32OfHexString "0x2.468adcp-1" |> Equal v float32OfHexString "0x.91a2b70p+1" |> Equal v float32OfHexString "0x.48d15b8p+2" |> Equal v float32OfHexString "0x.2468adcp+3" |> Equal v float32OfHexString "0x.123456ep+4" |> Equal v float32OfHexString "0x.091a2b70p+5" |> Equal v float32OfHexString "0x.048d15b8p+6" |> Equal v float32OfHexString "0x.02468adcp+7" |> Equal v float32OfHexString "0x.0123456ep+8" |> Equal v // near max float32OfHexString "0x1.fffffep127" |> Equal max float32OfHexString "0x.1fffffep131" |> Equal max float32OfHexString "0x.01fffffep135" |> Equal max float32OfHexString "0x1f.ffffep123" |> Equal max float32OfHexString "0x1fffffe.p103" |> Equal max float32OfHexString "-0x1fffffe000.p91" |> Equal (-max) float32OfHexString "0x0.ffffff5p128" |> Equal max float32OfHexString "0x0.ffffff6p128" |> Equal max float32OfHexString "0x0.ffffff7p128" |> Equal max float32OfHexString "0x0.ffffff7ffffffffp128" |> Equal max let checkOverflow s = try float32OfHexString s |> ignore; Fail () with :? System.OverflowException -> () checkOverflow "0x0.ffffff8p128" checkOverflow "0x0.ffffff800000p128" checkOverflow "0x0.fffffffp128" checkOverflow "0x1p128" checkOverflow "100P2147483639" checkOverflow "100P2147483640" checkOverflow "100P2147483647" checkOverflow "100P9999999999999999999999999" checkOverflow "0.001P2147483639" checkOverflow "0.001P2147483640" checkOverflow "0.001P2147483647" checkOverflow "0.001P9999999999999999999999999" // near 1 float32OfHexString "0x1.000001e" |> Equal (1.f + 2.f*eps) float32OfHexString "0x1.000001c" |> Equal (1.f + 2.f*eps) float32OfHexString "0x1.000001a" |> Equal (1.f + 2.f*eps) float32OfHexString "0x1.0000018" |> Equal (1.f + 2.f*eps) float32OfHexString "0x2.0000024p-1" |> Equal (1.f + 2.f*eps) float32OfHexString "0x4.0000048p-2" |> Equal (1.f + 2.f*eps) float32OfHexString "0x8.0000090p-3" |> Equal (1.f + 2.f*eps) float32OfHexString "0x1.0000016" |> Equal (1.f + 2.f*eps) float32OfHexString "0x1.0000014" |> Equal (1.f + 2.f*eps) float32OfHexString "0x2.0000028p-1" |> Equal (1.f + 2.f*eps) float32OfHexString "0x4.0000050p-2" |> Equal (1.f + 2.f*eps) float32OfHexString "0x8.00000a0p-3" |> Equal (1.f + 2.f*eps) float32OfHexString "0x1.0000012" |> Equal (1.f + 2.f*eps) float32OfHexString "0x2.0000024p-1" |> Equal (1.f + 2.f*eps) float32OfHexString "0x4.0000048p-2" |> Equal (1.f + 2.f*eps) float32OfHexString "0x8.0000090p-3" |> Equal (1.f + 2.f*eps) float32OfHexString "0x1.00000110" |> Equal (1.f + 2.f*eps) float32OfHexString "0x2.00000220p-1" |> Equal (1.f + 2.f*eps) float32OfHexString "0x4.00000440p-2" |> Equal (1.f + 2.f*eps) float32OfHexString "0x8.00000880p-3" |> Equal (1.f + 2.f*eps) float32OfHexString "0x1.00000108" |> Equal (1.f + 2.f*eps) float32OfHexString "0x2.00000210p-1" |> Equal (1.f + 2.f*eps) float32OfHexString "0x4.00000420p-2" |> Equal (1.f + 2.f*eps) float32OfHexString "0x8.00000840p-3" |> Equal (1.f + 2.f*eps) float32OfHexString "0x1.00000104" |> Equal (1.f + 2.f*eps) float32OfHexString "0x2.00000208p-1" |> Equal (1.f + 2.f*eps) float32OfHexString "0x4.0000041p-2" |> Equal (1.f + 2.f*eps) float32OfHexString "0x8.0000082p-3" |> Equal (1.f + 2.f*eps) float32OfHexString "0x1.000001" |> Equal (1.f) // round towards even float32OfHexString "0x2.000002p-1" |> Equal (1.f) float32OfHexString "0x4.000004p-2" |> Equal (1.f) float32OfHexString "0x8.00000p-3" |> Equal (1.f) float32OfHexString "0x1.00000100000010000" |> Equal (1.f + 2.f*eps) float32OfHexString "0x1.000000ffffffffff" |> Equal (1.f) float32OfHexString "0x1.000000e" |> Equal (1.f) float32OfHexString "0x1.000000c" |> Equal (1.f) float32OfHexString "0x1.000000a" |> Equal (1.f) float32OfHexString "0x1.0000008" |> Equal (1.f) float32OfHexString "0x1.0000006" |> Equal (1.f) float32OfHexString "0x1.0000004" |> Equal (1.f) float32OfHexString "0x1.0000002" |> Equal (1.f) float32OfHexString "0x1.0000000" |> Equal (1.f) float32OfHexString "0x1.fffffffffP-1" |> Equal (1.f) float32OfHexString "0x1.ffffffeP-1" |> Equal (1.f) float32OfHexString "0x1.ffffffcP-1" |> Equal (1.f) float32OfHexString "0x1.ffffffaP-1" |> Equal (1.f) float32OfHexString "0x1.ffffff8P-1" |> Equal (1.f) float32OfHexString "0x1.ffffff6P-1" |> Equal (1.f) float32OfHexString "0x1.ffffff4P-1" |> Equal (1.f) float32OfHexString "0x1.ffffff2P-1" |> Equal (1.f) float32OfHexString "0x1.ffffffP-1" |> Equal (1.f) // round towards even float32OfHexString "0x1.ffffff0000P-1" |> Equal (1.f) float32OfHexString "0x1.fffffefffffP-1" |> Equal (1.f - eps) float32OfHexString "0x1.fffffee0001P-1" |> Equal (1.f - eps) float32OfHexString "0x1.fffffecP-1" |> Equal (1.f - eps) float32OfHexString "0x1.fffffeaP-1" |> Equal (1.f - eps) float32OfHexString "0x1.fffffe8P-1" |> Equal (1.f - eps) float32OfHexString "0x1.fffffe6P-1" |> Equal (1.f - eps) float32OfHexString "0x1.fffffe4P-1" |> Equal (1.f - eps) float32OfHexString "0x1.fffffe2P-1" |> Equal (1.f - eps) float32OfHexString "0x1.fffffe0P-1" |> Equal (1.f - eps) float32OfHexString "0x1.fffffd001P-1" |> Equal (1.f - eps) float32OfHexString "0x1.fffffdP-1" |> Equal (1.f - 2.f*eps) // round towards zero float32OfHexString "0x1.fffffd0P-1" |> Equal (1.f - 2.f*eps) float32OfHexString "0x1.fffffcfffP-1" |> Equal (1.f - 2.f*eps) float32OfHexString "0x1.fffffce0P-1" |> Equal (1.f - 2.f*eps) float32OfHexString "0x1.fffffc20P-1" |> Equal (1.f - 2.f*eps) float32OfHexString "0x0.800000fP-125" |> Equal (min + minmin) float32OfHexString "0x0.800000eP-125" |> Equal (min + minmin) float32OfHexString "0x0.800000dP-125" |> Equal (min + minmin) float32OfHexString "0x0.800000cP-125" |> Equal (min + minmin) float32OfHexString "0x0.800000bP-125" |> Equal (min + minmin) float32OfHexString "0x0.800000aP-125" |> Equal (min + minmin) float32OfHexString "0x0.8000009P-125" |> Equal (min + minmin) float32OfHexString "0x0.8000008001P-125" |> Equal (min + minmin) float32OfHexString "0x0.8000008P-125" |> Equal (min) // round towards even float32OfHexString "0x0.80000080P-125" |> Equal (min) float32OfHexString "0x0.8000007ffffP-125" |> Equal (min) float32OfHexString "0x0.8000007P-125" |> Equal (min) float32OfHexString "0x0.8000006P-125" |> Equal (min) float32OfHexString "0x0.8000005P-125" |> Equal (min) float32OfHexString "0x0.8000004P-125" |> Equal (min) float32OfHexString "0x0.8000003P-125" |> Equal (min) float32OfHexString "0x0.8000002P-125" |> Equal (min) float32OfHexString "0x0.8000001P-125" |> Equal (min) float32OfHexString "0x0.8000000P-125" |> Equal (min) float32OfHexString "0x0.7ffffffP-125" |> Equal (min) float32OfHexString "0x0.7fffffeP-125" |> Equal (min) float32OfHexString "0x0.7fffffdP-125" |> Equal (min) float32OfHexString "0x0.7fffffcP-125" |> Equal (min) float32OfHexString "0x0.7fffffbP-125" |> Equal (min) float32OfHexString "0x0.7fffffaP-125" |> Equal (min) float32OfHexString "0x0.7fffff9P-125" |> Equal (min) float32OfHexString "0x0.7fffff8P-125" |> Equal (min) // round towards even float32OfHexString "0x0.7fffff7fffP-125" |> Equal (min - minmin) float32OfHexString "0x0.7fffff7P-125" |> Equal (min - minmin) float32OfHexString "0x0.7fffff6P-125" |> Equal (min - minmin) float32OfHexString "0x0.7fffff5P-125" |> Equal (min - minmin) float32OfHexString "0x0.7fffff4P-125" |> Equal (min - minmin) float32OfHexString "0x0.7fffff3P-125" |> Equal (min - minmin) float32OfHexString "0x0.7fffff2P-125" |> Equal (min - minmin) float32OfHexString "0x0.7fffff1P-125" |> Equal (min - minmin) float32OfHexString "0x0.7fffff0P-125" |> Equal (min - minmin) float32OfHexString "0x0.7ffffefP-125" |> Equal (min - minmin) float32OfHexString "0x0.7ffffeeP-125" |> Equal (min - minmin) float32OfHexString "0x0.7ffffedP-125" |> Equal (min - minmin) float32OfHexString "0x0.7ffffecP-125" |> Equal (min - minmin) float32OfHexString "0x0.7ffffebP-125" |> Equal (min - minmin) float32OfHexString "0x0.7ffffeaP-125" |> Equal (min - minmin) float32OfHexString "0x0.7ffffe9P-125" |> Equal (min - minmin) float32OfHexString "0x0.7ffffe8001P-125" |> Equal (min - minmin) float32OfHexString "0x0.7ffffe8P-125" |> Equal (min - 2.f*minmin) // round towards even float32OfHexString "0x0.7ffffe80P-125" |> Equal (min - 2.f*minmin) // round towards even float32OfHexString "0x0.7ffffe7ffP-125" |> Equal (min - 2.f*minmin) float32OfHexString "0x0.0000019P-125" |> Equal (2.f*minmin) float32OfHexString "0x0.0000018P-125" |> Equal (2.f*minmin) // round towards even float32OfHexString "0x0.0000017ffP-125" |> Equal (minmin) float32OfHexString "0x0.000001P-125" |> Equal (minmin) float32OfHexString "0x0.0000010P-125" |> Equal (minmin) float32OfHexString "0x0.0000008001P-125" |> Equal (minmin) float32OfHexString "0x0.0000008P-125" |> Equal (0.f) // round towards even float32OfHexString "0x0.0000007ffffP-125" |> Equal (0.f) float32OfHexString "0x1P-150" |> Equal (0.f) // round trip checking ////////////////////// let rand = System.Random(123) let buffer = Array.zeroCreate 4 let randomFloat32() = rand.NextBytes(buffer) System.BitConverter.ToSingle(buffer, 0) for i = 0 to 100000 do let f = randomFloat32() let s = float32ToHexString f let f2 = float32OfHexString s True (f = f2 || f <> f) let run() = testDoubleHexFloat() testSingleHexFloat() ================================================ FILE: Test/IdentifierValidatorTests.fs ================================================ // Copyright (c) Stephan Tolksdorf 2010-2012 // License: Simplified BSD License. See accompanying documentation. module FParsec.Test.IdentifierValidatorTests #if NETCORE open System #endif open FParsec.Test.Test // the following string contains parts of http://www.unicode.org/Public/8.0.0/ucd/DerivedCoreProperties.txt let xidProperties = @" # DerivedCoreProperties-8.0.0.txt # Date: 2015-03-11, 22:29:21 GMT [MD] # # Unicode Character Database # Copyright (c) 1991-2015 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # ================================================ # Derived Property: XID_Start # ID_Start modified for closure under NFKx # Modified as described in UAX #15 # NOTE: Does NOT remove the non-NFKx characters. # Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string)) # NOTE: See UAX #31 for more information 0041..005A ; XID_Start # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; XID_Start # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z 00AA ; XID_Start # Lo FEMININE ORDINAL INDICATOR 00B5 ; XID_Start # L& MICRO SIGN 00BA ; XID_Start # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; XID_Start # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; XID_Start # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; XID_Start # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL 01BB ; XID_Start # Lo LATIN LETTER TWO WITH STROKE 01BC..01BF ; XID_Start # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; XID_Start # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; XID_Start # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL 0294 ; XID_Start # Lo LATIN LETTER GLOTTAL STOP 0295..02AF ; XID_Start # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; XID_Start # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C6..02D1 ; XID_Start # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02E0..02E4 ; XID_Start # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP 02EC ; XID_Start # Lm MODIFIER LETTER VOICING 02EE ; XID_Start # Lm MODIFIER LETTER DOUBLE APOSTROPHE 0370..0373 ; XID_Start # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI 0374 ; XID_Start # Lm GREEK NUMERAL SIGN 0376..0377 ; XID_Start # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA 037B..037D ; XID_Start # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL 037F ; XID_Start # L& GREEK CAPITAL LETTER YOT 0386 ; XID_Start # L& GREEK CAPITAL LETTER ALPHA WITH TONOS 0388..038A ; XID_Start # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS 038C ; XID_Start # L& GREEK CAPITAL LETTER OMICRON WITH TONOS 038E..03A1 ; XID_Start # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO 03A3..03F5 ; XID_Start # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL 03F7..0481 ; XID_Start # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA 048A..052F ; XID_Start # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER 0531..0556 ; XID_Start # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 0559 ; XID_Start # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING 0561..0587 ; XID_Start # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 05D0..05EA ; XID_Start # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV 05F0..05F2 ; XID_Start # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD 0620..063F ; XID_Start # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE 0640 ; XID_Start # Lm ARABIC TATWEEL 0641..064A ; XID_Start # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH 066E..066F ; XID_Start # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF 0671..06D3 ; XID_Start # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE 06D5 ; XID_Start # Lo ARABIC LETTER AE 06E5..06E6 ; XID_Start # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH 06EE..06EF ; XID_Start # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V 06FA..06FC ; XID_Start # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW 06FF ; XID_Start # Lo ARABIC LETTER HEH WITH INVERTED V 0710 ; XID_Start # Lo SYRIAC LETTER ALAPH 0712..072F ; XID_Start # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH 074D..07A5 ; XID_Start # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU 07B1 ; XID_Start # Lo THAANA LETTER NAA 07CA..07EA ; XID_Start # Lo [33] NKO LETTER A..NKO LETTER JONA RA 07F4..07F5 ; XID_Start # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE 07FA ; XID_Start # Lm NKO LAJANYALAN 0800..0815 ; XID_Start # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF 081A ; XID_Start # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT 0824 ; XID_Start # Lm SAMARITAN MODIFIER LETTER SHORT A 0828 ; XID_Start # Lm SAMARITAN MODIFIER LETTER I 0840..0858 ; XID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 08A0..08B4 ; XID_Start # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW 0904..0939 ; XID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; XID_Start # Lo DEVANAGARI SIGN AVAGRAHA 0950 ; XID_Start # Lo DEVANAGARI OM 0958..0961 ; XID_Start # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL 0971 ; XID_Start # Lm DEVANAGARI SIGN HIGH SPACING DOT 0972..0980 ; XID_Start # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI 0985..098C ; XID_Start # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L 098F..0990 ; XID_Start # Lo [2] BENGALI LETTER E..BENGALI LETTER AI 0993..09A8 ; XID_Start # Lo [22] BENGALI LETTER O..BENGALI LETTER NA 09AA..09B0 ; XID_Start # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA 09B2 ; XID_Start # Lo BENGALI LETTER LA 09B6..09B9 ; XID_Start # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA 09BD ; XID_Start # Lo BENGALI SIGN AVAGRAHA 09CE ; XID_Start # Lo BENGALI LETTER KHANDA TA 09DC..09DD ; XID_Start # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA 09DF..09E1 ; XID_Start # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; XID_Start # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 0A05..0A0A ; XID_Start # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; XID_Start # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; XID_Start # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA 0A2A..0A30 ; XID_Start # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA 0A32..0A33 ; XID_Start # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA 0A35..0A36 ; XID_Start # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA 0A38..0A39 ; XID_Start # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA 0A59..0A5C ; XID_Start # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA 0A5E ; XID_Start # Lo GURMUKHI LETTER FA 0A72..0A74 ; XID_Start # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR 0A85..0A8D ; XID_Start # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E 0A8F..0A91 ; XID_Start # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O 0A93..0AA8 ; XID_Start # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA 0AAA..0AB0 ; XID_Start # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA 0AB2..0AB3 ; XID_Start # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA 0AB5..0AB9 ; XID_Start # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA 0ABD ; XID_Start # Lo GUJARATI SIGN AVAGRAHA 0AD0 ; XID_Start # Lo GUJARATI OM 0AE0..0AE1 ; XID_Start # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL 0AF9 ; XID_Start # Lo GUJARATI LETTER ZHA 0B05..0B0C ; XID_Start # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L 0B0F..0B10 ; XID_Start # Lo [2] ORIYA LETTER E..ORIYA LETTER AI 0B13..0B28 ; XID_Start # Lo [22] ORIYA LETTER O..ORIYA LETTER NA 0B2A..0B30 ; XID_Start # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA 0B32..0B33 ; XID_Start # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA 0B35..0B39 ; XID_Start # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA 0B3D ; XID_Start # Lo ORIYA SIGN AVAGRAHA 0B5C..0B5D ; XID_Start # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; XID_Start # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL 0B71 ; XID_Start # Lo ORIYA LETTER WA 0B83 ; XID_Start # Lo TAMIL SIGN VISARGA 0B85..0B8A ; XID_Start # Lo [6] TAMIL LETTER A..TAMIL LETTER UU 0B8E..0B90 ; XID_Start # Lo [3] TAMIL LETTER E..TAMIL LETTER AI 0B92..0B95 ; XID_Start # Lo [4] TAMIL LETTER O..TAMIL LETTER KA 0B99..0B9A ; XID_Start # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA 0B9C ; XID_Start # Lo TAMIL LETTER JA 0B9E..0B9F ; XID_Start # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA 0BA3..0BA4 ; XID_Start # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA 0BA8..0BAA ; XID_Start # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA 0BAE..0BB9 ; XID_Start # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA 0BD0 ; XID_Start # Lo TAMIL OM 0C05..0C0C ; XID_Start # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L 0C0E..0C10 ; XID_Start # Lo [3] TELUGU LETTER E..TELUGU LETTER AI 0C12..0C28 ; XID_Start # Lo [23] TELUGU LETTER O..TELUGU LETTER NA 0C2A..0C39 ; XID_Start # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; XID_Start # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; XID_Start # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA 0C60..0C61 ; XID_Start # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C85..0C8C ; XID_Start # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L 0C8E..0C90 ; XID_Start # Lo [3] KANNADA LETTER E..KANNADA LETTER AI 0C92..0CA8 ; XID_Start # Lo [23] KANNADA LETTER O..KANNADA LETTER NA 0CAA..0CB3 ; XID_Start # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; XID_Start # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; XID_Start # Lo KANNADA SIGN AVAGRAHA 0CDE ; XID_Start # Lo KANNADA LETTER FA 0CE0..0CE1 ; XID_Start # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; XID_Start # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D05..0D0C ; XID_Start # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L 0D0E..0D10 ; XID_Start # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI 0D12..0D3A ; XID_Start # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA 0D3D ; XID_Start # Lo MALAYALAM SIGN AVAGRAHA 0D4E ; XID_Start # Lo MALAYALAM LETTER DOT REPH 0D5F..0D61 ; XID_Start # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL 0D7A..0D7F ; XID_Start # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K 0D85..0D96 ; XID_Start # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA 0D9A..0DB1 ; XID_Start # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA 0DB3..0DBB ; XID_Start # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA 0DBD ; XID_Start # Lo SINHALA LETTER DANTAJA LAYANNA 0DC0..0DC6 ; XID_Start # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA 0E01..0E30 ; XID_Start # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A 0E32 ; XID_Start # Lo THAI CHARACTER SARA AA 0E40..0E45 ; XID_Start # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO 0E46 ; XID_Start # Lm THAI CHARACTER MAIYAMOK 0E81..0E82 ; XID_Start # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG 0E84 ; XID_Start # Lo LAO LETTER KHO TAM 0E87..0E88 ; XID_Start # Lo [2] LAO LETTER NGO..LAO LETTER CO 0E8A ; XID_Start # Lo LAO LETTER SO TAM 0E8D ; XID_Start # Lo LAO LETTER NYO 0E94..0E97 ; XID_Start # Lo [4] LAO LETTER DO..LAO LETTER THO TAM 0E99..0E9F ; XID_Start # Lo [7] LAO LETTER NO..LAO LETTER FO SUNG 0EA1..0EA3 ; XID_Start # Lo [3] LAO LETTER MO..LAO LETTER LO LING 0EA5 ; XID_Start # Lo LAO LETTER LO LOOT 0EA7 ; XID_Start # Lo LAO LETTER WO 0EAA..0EAB ; XID_Start # Lo [2] LAO LETTER SO SUNG..LAO LETTER HO SUNG 0EAD..0EB0 ; XID_Start # Lo [4] LAO LETTER O..LAO VOWEL SIGN A 0EB2 ; XID_Start # Lo LAO VOWEL SIGN AA 0EBD ; XID_Start # Lo LAO SEMIVOWEL SIGN NYO 0EC0..0EC4 ; XID_Start # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; XID_Start # Lm LAO KO LA 0EDC..0EDF ; XID_Start # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; XID_Start # Lo TIBETAN SYLLABLE OM 0F40..0F47 ; XID_Start # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA 0F49..0F6C ; XID_Start # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA 0F88..0F8C ; XID_Start # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN 1000..102A ; XID_Start # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU 103F ; XID_Start # Lo MYANMAR LETTER GREAT SA 1050..1055 ; XID_Start # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL 105A..105D ; XID_Start # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE 1061 ; XID_Start # Lo MYANMAR LETTER SGAW KAREN SHA 1065..1066 ; XID_Start # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA 106E..1070 ; XID_Start # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA 1075..1081 ; XID_Start # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA 108E ; XID_Start # Lo MYANMAR LETTER RUMAI PALAUNG FA 10A0..10C5 ; XID_Start # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE 10C7 ; XID_Start # L& GEORGIAN CAPITAL LETTER YN 10CD ; XID_Start # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; XID_Start # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; XID_Start # Lm MODIFIER LETTER GEORGIAN NAR 10FD..1248 ; XID_Start # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; XID_Start # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; XID_Start # Lo ETHIOPIC SYLLABLE QHWA 125A..125D ; XID_Start # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE 1260..1288 ; XID_Start # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA 128A..128D ; XID_Start # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE 1290..12B0 ; XID_Start # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA 12B2..12B5 ; XID_Start # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE 12B8..12BE ; XID_Start # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO 12C0 ; XID_Start # Lo ETHIOPIC SYLLABLE KXWA 12C2..12C5 ; XID_Start # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE 12C8..12D6 ; XID_Start # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O 12D8..1310 ; XID_Start # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA 1312..1315 ; XID_Start # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE 1318..135A ; XID_Start # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA 1380..138F ; XID_Start # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE 13A0..13F5 ; XID_Start # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV 13F8..13FD ; XID_Start # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV 1401..166C ; XID_Start # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA 166F..167F ; XID_Start # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W 1681..169A ; XID_Start # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH 16A0..16EA ; XID_Start # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X 16EE..16F0 ; XID_Start # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL 16F1..16F8 ; XID_Start # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC 1700..170C ; XID_Start # Lo [13] TAGALOG LETTER A..TAGALOG LETTER YA 170E..1711 ; XID_Start # Lo [4] TAGALOG LETTER LA..TAGALOG LETTER HA 1720..1731 ; XID_Start # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA 1740..1751 ; XID_Start # Lo [18] BUHID LETTER A..BUHID LETTER HA 1760..176C ; XID_Start # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA 176E..1770 ; XID_Start # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA 1780..17B3 ; XID_Start # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU 17D7 ; XID_Start # Lm KHMER SIGN LEK TOO 17DC ; XID_Start # Lo KHMER SIGN AVAKRAHASANYA 1820..1842 ; XID_Start # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI 1843 ; XID_Start # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN 1844..1877 ; XID_Start # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA 1880..18A8 ; XID_Start # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA 18AA ; XID_Start # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA 18B0..18F5 ; XID_Start # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S 1900..191E ; XID_Start # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA 1950..196D ; XID_Start # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI 1970..1974 ; XID_Start # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 1980..19AB ; XID_Start # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA 19B0..19C9 ; XID_Start # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 1A00..1A16 ; XID_Start # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA 1A20..1A54 ; XID_Start # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA 1AA7 ; XID_Start # Lm TAI THAM SIGN MAI YAMOK 1B05..1B33 ; XID_Start # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA 1B45..1B4B ; XID_Start # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK 1B83..1BA0 ; XID_Start # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA 1BAE..1BAF ; XID_Start # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BBA..1BE5 ; XID_Start # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1C00..1C23 ; XID_Start # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A 1C4D..1C4F ; XID_Start # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; XID_Start # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; XID_Start # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CE9..1CEC ; XID_Start # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; XID_Start # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA 1CF5..1CF6 ; XID_Start # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; XID_Start # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL 1D2C..1D6A ; XID_Start # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1D6B..1D77 ; XID_Start # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; XID_Start # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; XID_Start # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; XID_Start # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA 1E00..1F15 ; XID_Start # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA 1F18..1F1D ; XID_Start # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA 1F20..1F45 ; XID_Start # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA 1F48..1F4D ; XID_Start # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA 1F50..1F57 ; XID_Start # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI 1F59 ; XID_Start # L& GREEK CAPITAL LETTER UPSILON WITH DASIA 1F5B ; XID_Start # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA 1F5D ; XID_Start # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA 1F5F..1F7D ; XID_Start # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA 1F80..1FB4 ; XID_Start # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI 1FB6..1FBC ; XID_Start # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI 1FBE ; XID_Start # L& GREEK PROSGEGRAMMENI 1FC2..1FC4 ; XID_Start # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI 1FC6..1FCC ; XID_Start # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 1FD0..1FD3 ; XID_Start # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 1FD6..1FDB ; XID_Start # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA 1FE0..1FEC ; XID_Start # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA 1FF2..1FF4 ; XID_Start # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 1FF6..1FFC ; XID_Start # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 2071 ; XID_Start # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; XID_Start # Lm SUPERSCRIPT LATIN SMALL LETTER N 2090..209C ; XID_Start # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 2102 ; XID_Start # L& DOUBLE-STRUCK CAPITAL C 2107 ; XID_Start # L& EULER CONSTANT 210A..2113 ; XID_Start # L& [10] SCRIPT SMALL G..SCRIPT SMALL L 2115 ; XID_Start # L& DOUBLE-STRUCK CAPITAL N 2118 ; XID_Start # Sm SCRIPT CAPITAL P 2119..211D ; XID_Start # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R 2124 ; XID_Start # L& DOUBLE-STRUCK CAPITAL Z 2126 ; XID_Start # L& OHM SIGN 2128 ; XID_Start # L& BLACK-LETTER CAPITAL Z 212A..212D ; XID_Start # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C 212E ; XID_Start # So ESTIMATED SYMBOL 212F..2134 ; XID_Start # L& [6] SCRIPT SMALL E..SCRIPT SMALL O 2135..2138 ; XID_Start # Lo [4] ALEF SYMBOL..DALET SYMBOL 2139 ; XID_Start # L& INFORMATION SOURCE 213C..213F ; XID_Start # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI 2145..2149 ; XID_Start # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J 214E ; XID_Start # L& TURNED SMALL F 2160..2182 ; XID_Start # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND 2183..2184 ; XID_Start # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C 2185..2188 ; XID_Start # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND 2C00..2C2E ; XID_Start # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; XID_Start # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE 2C60..2C7B ; XID_Start # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E 2C7C..2C7D ; XID_Start # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; XID_Start # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; XID_Start # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA 2CF2..2CF3 ; XID_Start # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; XID_Start # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE 2D27 ; XID_Start # L& GEORGIAN SMALL LETTER YN 2D2D ; XID_Start # L& GEORGIAN SMALL LETTER AEN 2D30..2D67 ; XID_Start # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; XID_Start # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D80..2D96 ; XID_Start # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO 2DA8..2DAE ; XID_Start # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO 2DB0..2DB6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO 2DB8..2DBE ; XID_Start # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO 2DC0..2DC6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO 2DC8..2DCE ; XID_Start # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO 2DD0..2DD6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO 2DD8..2DDE ; XID_Start # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO 3005 ; XID_Start # Lm IDEOGRAPHIC ITERATION MARK 3006 ; XID_Start # Lo IDEOGRAPHIC CLOSING MARK 3007 ; XID_Start # Nl IDEOGRAPHIC NUMBER ZERO 3021..3029 ; XID_Start # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE 3031..3035 ; XID_Start # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF 3038..303A ; XID_Start # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 303B ; XID_Start # Lm VERTICAL IDEOGRAPHIC ITERATION MARK 303C ; XID_Start # Lo MASU MARK 3041..3096 ; XID_Start # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE 309D..309E ; XID_Start # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK 309F ; XID_Start # Lo HIRAGANA DIGRAPH YORI 30A1..30FA ; XID_Start # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO 30FC..30FE ; XID_Start # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK 30FF ; XID_Start # Lo KATAKANA DIGRAPH KOTO 3105..312D ; XID_Start # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH 3131..318E ; XID_Start # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE 31A0..31BA ; XID_Start # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; XID_Start # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; XID_Start # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 4E00..9FD5 ; XID_Start # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5 A000..A014 ; XID_Start # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; XID_Start # Lm YI SYLLABLE WU A016..A48C ; XID_Start # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR A4D0..A4F7 ; XID_Start # Lo [40] LISU LETTER BA..LISU LETTER OE A4F8..A4FD ; XID_Start # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU A500..A60B ; XID_Start # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG A60C ; XID_Start # Lm VAI SYLLABLE LENGTHENER A610..A61F ; XID_Start # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG A62A..A62B ; XID_Start # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO A640..A66D ; XID_Start # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O A66E ; XID_Start # Lo CYRILLIC LETTER MULTIOCULAR O A67F ; XID_Start # Lm CYRILLIC PAYEROK A680..A69B ; XID_Start # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O A69C..A69D ; XID_Start # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN A6A0..A6E5 ; XID_Start # Lo [70] BAMUM LETTER A..BAMUM LETTER KI A6E6..A6EF ; XID_Start # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM A717..A71F ; XID_Start # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK A722..A76F ; XID_Start # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON A770 ; XID_Start # Lm MODIFIER LETTER US A771..A787 ; XID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; XID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; XID_Start # Lo LATIN LETTER SINOLOGICAL DOT A790..A7AD ; XID_Start # L& [30] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER L WITH BELT A7B0..A7B7 ; XID_Start # L& [8] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER OMEGA A7F7 ; XID_Start # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; XID_Start # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; XID_Start # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; XID_Start # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A803..A805 ; XID_Start # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O A807..A80A ; XID_Start # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO A80C..A822 ; XID_Start # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO A840..A873 ; XID_Start # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU A882..A8B3 ; XID_Start # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA A8F2..A8F7 ; XID_Start # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA A8FB ; XID_Start # Lo DEVANAGARI HEADSTROKE A8FD ; XID_Start # Lo DEVANAGARI JAIN OM A90A..A925 ; XID_Start # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO A930..A946 ; XID_Start # Lo [23] REJANG LETTER KA..REJANG LETTER A A960..A97C ; XID_Start # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH A984..A9B2 ; XID_Start # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA A9CF ; XID_Start # Lm JAVANESE PANGRANGKEP A9E0..A9E4 ; XID_Start # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA A9E6 ; XID_Start # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION A9E7..A9EF ; XID_Start # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA A9FA..A9FE ; XID_Start # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA AA00..AA28 ; XID_Start # Lo [41] CHAM LETTER A..CHAM LETTER HA AA40..AA42 ; XID_Start # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG AA44..AA4B ; XID_Start # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS AA60..AA6F ; XID_Start # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA AA70 ; XID_Start # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION AA71..AA76 ; XID_Start # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM AA7A ; XID_Start # Lo MYANMAR LETTER AITON RA AA7E..AAAF ; XID_Start # Lo [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O AAB1 ; XID_Start # Lo TAI VIET VOWEL AA AAB5..AAB6 ; XID_Start # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O AAB9..AABD ; XID_Start # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN AAC0 ; XID_Start # Lo TAI VIET TONE MAI NUENG AAC2 ; XID_Start # Lo TAI VIET TONE MAI SONG AADB..AADC ; XID_Start # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; XID_Start # Lm TAI VIET SYMBOL SAM AAE0..AAEA ; XID_Start # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA AAF2 ; XID_Start # Lo MEETEI MAYEK ANJI AAF3..AAF4 ; XID_Start # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK AB01..AB06 ; XID_Start # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; XID_Start # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; XID_Start # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO AB20..AB26 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO AB28..AB2E ; XID_Start # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO AB30..AB5A ; XID_Start # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG AB5C..AB5F ; XID_Start # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB60..AB65 ; XID_Start # L& [6] LATIN SMALL LETTER SAKHA YAT..GREEK LETTER SMALL CAPITAL OMEGA AB70..ABBF ; XID_Start # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA ABC0..ABE2 ; XID_Start # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM AC00..D7A3 ; XID_Start # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; XID_Start # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; XID_Start # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH F900..FA6D ; XID_Start # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; XID_Start # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; XID_Start # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; XID_Start # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH FB1D ; XID_Start # Lo HEBREW LETTER YOD WITH HIRIQ FB1F..FB28 ; XID_Start # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV FB2A..FB36 ; XID_Start # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH FB38..FB3C ; XID_Start # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH FB3E ; XID_Start # Lo HEBREW LETTER MEM WITH DAGESH FB40..FB41 ; XID_Start # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH FB43..FB44 ; XID_Start # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH FB46..FBB1 ; XID_Start # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBD3..FC5D ; XID_Start # Lo [139] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF MAKSURA WITH SUPERSCRIPT ALEF ISOLATED FORM FC64..FD3D ; XID_Start # Lo [218] ARABIC LIGATURE YEH WITH HAMZA ABOVE WITH REH FINAL FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD50..FD8F ; XID_Start # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM FD92..FDC7 ; XID_Start # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM FDF0..FDF9 ; XID_Start # Lo [10] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE SALLA ISOLATED FORM FE71 ; XID_Start # Lo ARABIC TATWEEL WITH FATHATAN ABOVE FE73 ; XID_Start # Lo ARABIC TAIL FRAGMENT FE77 ; XID_Start # Lo ARABIC FATHA MEDIAL FORM FE79 ; XID_Start # Lo ARABIC DAMMA MEDIAL FORM FE7B ; XID_Start # Lo ARABIC KASRA MEDIAL FORM FE7D ; XID_Start # Lo ARABIC SHADDA MEDIAL FORM FE7F..FEFC ; XID_Start # Lo [126] ARABIC SUKUN MEDIAL FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM FF21..FF3A ; XID_Start # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z FF41..FF5A ; XID_Start # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z FF66..FF6F ; XID_Start # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU FF70 ; XID_Start # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK FF71..FF9D ; XID_Start # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N FFA0..FFBE ; XID_Start # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH FFC2..FFC7 ; XID_Start # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E FFCA..FFCF ; XID_Start # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE FFD2..FFD7 ; XID_Start # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 10000..1000B ; XID_Start # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE 1000D..10026 ; XID_Start # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO 10028..1003A ; XID_Start # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO 1003C..1003D ; XID_Start # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE 1003F..1004D ; XID_Start # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO 10050..1005D ; XID_Start # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 10080..100FA ; XID_Start # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 10140..10174 ; XID_Start # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS 10280..1029C ; XID_Start # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X 102A0..102D0 ; XID_Start # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 10300..1031F ; XID_Start # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS 10330..10340 ; XID_Start # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA 10341 ; XID_Start # Nl GOTHIC LETTER NINETY 10342..10349 ; XID_Start # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL 1034A ; XID_Start # Nl GOTHIC LETTER NINE HUNDRED 10350..10375 ; XID_Start # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA 10380..1039D ; XID_Start # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU 103A0..103C3 ; XID_Start # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA 103C8..103CF ; XID_Start # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH 103D1..103D5 ; XID_Start # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED 10400..1044F ; XID_Start # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW 10450..1049D ; XID_Start # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO 10500..10527 ; XID_Start # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE 10530..10563 ; XID_Start # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW 10600..10736 ; XID_Start # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; XID_Start # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; XID_Start # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10800..10805 ; XID_Start # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; XID_Start # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; XID_Start # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO 10837..10838 ; XID_Start # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE 1083C ; XID_Start # Lo CYPRIOT SYLLABLE ZA 1083F..10855 ; XID_Start # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10860..10876 ; XID_Start # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW 10880..1089E ; XID_Start # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW 108E0..108F2 ; XID_Start # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH 108F4..108F5 ; XID_Start # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; XID_Start # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; XID_Start # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 10980..109B7 ; XID_Start # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; XID_Start # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; XID_Start # Lo KHAROSHTHI LETTER A 10A10..10A13 ; XID_Start # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; XID_Start # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA 10A19..10A33 ; XID_Start # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA 10A60..10A7C ; XID_Start # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH 10A80..10A9C ; XID_Start # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH 10AC0..10AC7 ; XID_Start # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW 10AC9..10AE4 ; XID_Start # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW 10B00..10B35 ; XID_Start # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE 10B40..10B55 ; XID_Start # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW 10B60..10B72 ; XID_Start # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW 10B80..10B91 ; XID_Start # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW 10C00..10C48 ; XID_Start # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 10C80..10CB2 ; XID_Start # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 10CC0..10CF2 ; XID_Start # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 11003..11037 ; XID_Start # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA 11083..110AF ; XID_Start # Lo [45] KAITHI LETTER A..KAITHI LETTER HA 110D0..110E8 ; XID_Start # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE 11103..11126 ; XID_Start # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA 11150..11172 ; XID_Start # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA 11176 ; XID_Start # Lo MAHAJANI LIGATURE SHRI 11183..111B2 ; XID_Start # Lo [48] SHARADA LETTER A..SHARADA LETTER HA 111C1..111C4 ; XID_Start # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM 111DA ; XID_Start # Lo SHARADA EKAM 111DC ; XID_Start # Lo SHARADA HEADSTROKE 11200..11211 ; XID_Start # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA 11213..1122B ; XID_Start # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA 11280..11286 ; XID_Start # Lo [7] MULTANI LETTER A..MULTANI LETTER GA 11288 ; XID_Start # Lo MULTANI LETTER GHA 1128A..1128D ; XID_Start # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA 1128F..1129D ; XID_Start # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA 1129F..112A8 ; XID_Start # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA 112B0..112DE ; XID_Start # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA 11305..1130C ; XID_Start # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L 1130F..11310 ; XID_Start # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI 11313..11328 ; XID_Start # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA 1132A..11330 ; XID_Start # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA 11332..11333 ; XID_Start # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA 11335..11339 ; XID_Start # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA 1133D ; XID_Start # Lo GRANTHA SIGN AVAGRAHA 11350 ; XID_Start # Lo GRANTHA OM 1135D..11361 ; XID_Start # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL 11480..114AF ; XID_Start # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA 114C4..114C5 ; XID_Start # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG 114C7 ; XID_Start # Lo TIRHUTA OM 11580..115AE ; XID_Start # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA 115D8..115DB ; XID_Start # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U 11600..1162F ; XID_Start # Lo [48] MODI LETTER A..MODI LETTER LLA 11644 ; XID_Start # Lo MODI SIGN HUVA 11680..116AA ; XID_Start # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 11700..11719 ; XID_Start # Lo [26] AHOM LETTER KA..AHOM LETTER JHA 118A0..118DF ; XID_Start # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 118FF ; XID_Start # Lo WARANG CITI OM 11AC0..11AF8 ; XID_Start # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL 12000..12399 ; XID_Start # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E ; XID_Start # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM 12480..12543 ; XID_Start # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU 13000..1342E ; XID_Start # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 14400..14646 ; XID_Start # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 16800..16A38 ; XID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; XID_Start # Lo [31] MRO LETTER TA..MRO LETTER TEK 16AD0..16AED ; XID_Start # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I 16B00..16B2F ; XID_Start # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU 16B40..16B43 ; XID_Start # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM 16B63..16B77 ; XID_Start # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS 16B7D..16B8F ; XID_Start # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ 16F00..16F44 ; XID_Start # Lo [69] MIAO LETTER PA..MIAO LETTER HHA 16F50 ; XID_Start # Lo MIAO LETTER NASALIZATION 16F93..16F9F ; XID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; XID_Start # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1BC00..1BC6A ; XID_Start # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; XID_Start # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; XID_Start # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL 1BC90..1BC99 ; XID_Start # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW 1D400..1D454 ; XID_Start # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; XID_Start # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A 1D49E..1D49F ; XID_Start # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D 1D4A2 ; XID_Start # L& MATHEMATICAL SCRIPT CAPITAL G 1D4A5..1D4A6 ; XID_Start # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K 1D4A9..1D4AC ; XID_Start # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q 1D4AE..1D4B9 ; XID_Start # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D 1D4BB ; XID_Start # L& MATHEMATICAL SCRIPT SMALL F 1D4BD..1D4C3 ; XID_Start # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N 1D4C5..1D505 ; XID_Start # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B 1D507..1D50A ; XID_Start # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G 1D50D..1D514 ; XID_Start # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q 1D516..1D51C ; XID_Start # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y 1D51E..1D539 ; XID_Start # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B 1D53B..1D53E ; XID_Start # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G 1D540..1D544 ; XID_Start # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M 1D546 ; XID_Start # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O 1D54A..1D550 ; XID_Start # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y 1D552..1D6A5 ; XID_Start # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J 1D6A8..1D6C0 ; XID_Start # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA 1D6C2..1D6DA ; XID_Start # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA 1D6DC..1D6FA ; XID_Start # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA 1D6FC..1D714 ; XID_Start # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA 1D716..1D734 ; XID_Start # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA 1D736..1D74E ; XID_Start # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA 1D750..1D76E ; XID_Start # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA 1D770..1D788 ; XID_Start # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA 1D78A..1D7A8 ; XID_Start # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7AA..1D7C2 ; XID_Start # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; XID_Start # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1E800..1E8C4 ; XID_Start # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON 1EE00..1EE03 ; XID_Start # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; XID_Start # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF 1EE21..1EE22 ; XID_Start # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM 1EE24 ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL HEH 1EE27 ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL HAH 1EE29..1EE32 ; XID_Start # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF 1EE34..1EE37 ; XID_Start # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH 1EE39 ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL DAD 1EE3B ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL GHAIN 1EE42 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED JEEM 1EE47 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED HAH 1EE49 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED YEH 1EE4B ; XID_Start # Lo ARABIC MATHEMATICAL TAILED LAM 1EE4D..1EE4F ; XID_Start # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN 1EE51..1EE52 ; XID_Start # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF 1EE54 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED SHEEN 1EE57 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED KHAH 1EE59 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED DAD 1EE5B ; XID_Start # Lo ARABIC MATHEMATICAL TAILED GHAIN 1EE5D ; XID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON 1EE5F ; XID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF 1EE61..1EE62 ; XID_Start # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM 1EE64 ; XID_Start # Lo ARABIC MATHEMATICAL STRETCHED HEH 1EE67..1EE6A ; XID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF 1EE6C..1EE72 ; XID_Start # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF 1EE74..1EE77 ; XID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH 1EE79..1EE7C ; XID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH 1EE7E ; XID_Start # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH 1EE80..1EE89 ; XID_Start # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH 1EE8B..1EE9B ; XID_Start # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN 1EEA1..1EEA3 ; XID_Start # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL 1EEA5..1EEA9 ; XID_Start # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; XID_Start # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; XID_Start # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; XID_Start # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; XID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; XID_Start # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2F800..2FA1D ; XID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D # Total code points: 109807 # ================================================ # Derived Property: XID_Continue # Mod_ID_Continue modified for closure under NFKx # Modified as described in UAX #15 # NOTE: Does NOT remove the non-NFKx characters. # Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string)) # NOTE: See UAX #31 for more information 0030..0039 ; XID_Continue # Nd [10] DIGIT ZERO..DIGIT NINE 0041..005A ; XID_Continue # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 005F ; XID_Continue # Pc LOW LINE 0061..007A ; XID_Continue # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z 00AA ; XID_Continue # Lo FEMININE ORDINAL INDICATOR 00B5 ; XID_Continue # L& MICRO SIGN 00B7 ; XID_Continue # Po MIDDLE DOT 00BA ; XID_Continue # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; XID_Continue # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; XID_Continue # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; XID_Continue # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL 01BB ; XID_Continue # Lo LATIN LETTER TWO WITH STROKE 01BC..01BF ; XID_Continue # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; XID_Continue # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; XID_Continue # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL 0294 ; XID_Continue # Lo LATIN LETTER GLOTTAL STOP 0295..02AF ; XID_Continue # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; XID_Continue # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C6..02D1 ; XID_Continue # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02E0..02E4 ; XID_Continue # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP 02EC ; XID_Continue # Lm MODIFIER LETTER VOICING 02EE ; XID_Continue # Lm MODIFIER LETTER DOUBLE APOSTROPHE 0300..036F ; XID_Continue # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X 0370..0373 ; XID_Continue # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI 0374 ; XID_Continue # Lm GREEK NUMERAL SIGN 0376..0377 ; XID_Continue # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA 037B..037D ; XID_Continue # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL 037F ; XID_Continue # L& GREEK CAPITAL LETTER YOT 0386 ; XID_Continue # L& GREEK CAPITAL LETTER ALPHA WITH TONOS 0387 ; XID_Continue # Po GREEK ANO TELEIA 0388..038A ; XID_Continue # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS 038C ; XID_Continue # L& GREEK CAPITAL LETTER OMICRON WITH TONOS 038E..03A1 ; XID_Continue # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO 03A3..03F5 ; XID_Continue # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL 03F7..0481 ; XID_Continue # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA 0483..0487 ; XID_Continue # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE 048A..052F ; XID_Continue # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER 0531..0556 ; XID_Continue # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 0559 ; XID_Continue # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING 0561..0587 ; XID_Continue # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 0591..05BD ; XID_Continue # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG 05BF ; XID_Continue # Mn HEBREW POINT RAFE 05C1..05C2 ; XID_Continue # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT 05C4..05C5 ; XID_Continue # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT 05C7 ; XID_Continue # Mn HEBREW POINT QAMATS QATAN 05D0..05EA ; XID_Continue # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV 05F0..05F2 ; XID_Continue # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD 0610..061A ; XID_Continue # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA 0620..063F ; XID_Continue # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE 0640 ; XID_Continue # Lm ARABIC TATWEEL 0641..064A ; XID_Continue # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH 064B..065F ; XID_Continue # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW 0660..0669 ; XID_Continue # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE 066E..066F ; XID_Continue # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF 0670 ; XID_Continue # Mn ARABIC LETTER SUPERSCRIPT ALEF 0671..06D3 ; XID_Continue # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE 06D5 ; XID_Continue # Lo ARABIC LETTER AE 06D6..06DC ; XID_Continue # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN 06DF..06E4 ; XID_Continue # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA 06E5..06E6 ; XID_Continue # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH 06E7..06E8 ; XID_Continue # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON 06EA..06ED ; XID_Continue # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM 06EE..06EF ; XID_Continue # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V 06F0..06F9 ; XID_Continue # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE 06FA..06FC ; XID_Continue # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW 06FF ; XID_Continue # Lo ARABIC LETTER HEH WITH INVERTED V 0710 ; XID_Continue # Lo SYRIAC LETTER ALAPH 0711 ; XID_Continue # Mn SYRIAC LETTER SUPERSCRIPT ALAPH 0712..072F ; XID_Continue # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH 0730..074A ; XID_Continue # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH 074D..07A5 ; XID_Continue # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU 07A6..07B0 ; XID_Continue # Mn [11] THAANA ABAFILI..THAANA SUKUN 07B1 ; XID_Continue # Lo THAANA LETTER NAA 07C0..07C9 ; XID_Continue # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE 07CA..07EA ; XID_Continue # Lo [33] NKO LETTER A..NKO LETTER JONA RA 07EB..07F3 ; XID_Continue # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE 07F4..07F5 ; XID_Continue # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE 07FA ; XID_Continue # Lm NKO LAJANYALAN 0800..0815 ; XID_Continue # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF 0816..0819 ; XID_Continue # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH 081A ; XID_Continue # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT 081B..0823 ; XID_Continue # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A 0824 ; XID_Continue # Lm SAMARITAN MODIFIER LETTER SHORT A 0825..0827 ; XID_Continue # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0828 ; XID_Continue # Lm SAMARITAN MODIFIER LETTER I 0829..082D ; XID_Continue # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0840..0858 ; XID_Continue # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0859..085B ; XID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK 08A0..08B4 ; XID_Continue # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW 08E3..0902 ; XID_Continue # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA 0903 ; XID_Continue # Mc DEVANAGARI SIGN VISARGA 0904..0939 ; XID_Continue # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093A ; XID_Continue # Mn DEVANAGARI VOWEL SIGN OE 093B ; XID_Continue # Mc DEVANAGARI VOWEL SIGN OOE 093C ; XID_Continue # Mn DEVANAGARI SIGN NUKTA 093D ; XID_Continue # Lo DEVANAGARI SIGN AVAGRAHA 093E..0940 ; XID_Continue # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II 0941..0948 ; XID_Continue # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI 0949..094C ; XID_Continue # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU 094D ; XID_Continue # Mn DEVANAGARI SIGN VIRAMA 094E..094F ; XID_Continue # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW 0950 ; XID_Continue # Lo DEVANAGARI OM 0951..0957 ; XID_Continue # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE 0958..0961 ; XID_Continue # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL 0962..0963 ; XID_Continue # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL 0966..096F ; XID_Continue # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE 0971 ; XID_Continue # Lm DEVANAGARI SIGN HIGH SPACING DOT 0972..0980 ; XID_Continue # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI 0981 ; XID_Continue # Mn BENGALI SIGN CANDRABINDU 0982..0983 ; XID_Continue # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA 0985..098C ; XID_Continue # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L 098F..0990 ; XID_Continue # Lo [2] BENGALI LETTER E..BENGALI LETTER AI 0993..09A8 ; XID_Continue # Lo [22] BENGALI LETTER O..BENGALI LETTER NA 09AA..09B0 ; XID_Continue # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA 09B2 ; XID_Continue # Lo BENGALI LETTER LA 09B6..09B9 ; XID_Continue # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA 09BC ; XID_Continue # Mn BENGALI SIGN NUKTA 09BD ; XID_Continue # Lo BENGALI SIGN AVAGRAHA 09BE..09C0 ; XID_Continue # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II 09C1..09C4 ; XID_Continue # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR 09C7..09C8 ; XID_Continue # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI 09CB..09CC ; XID_Continue # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU 09CD ; XID_Continue # Mn BENGALI SIGN VIRAMA 09CE ; XID_Continue # Lo BENGALI LETTER KHANDA TA 09D7 ; XID_Continue # Mc BENGALI AU LENGTH MARK 09DC..09DD ; XID_Continue # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA 09DF..09E1 ; XID_Continue # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09E2..09E3 ; XID_Continue # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL 09E6..09EF ; XID_Continue # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE 09F0..09F1 ; XID_Continue # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 0A01..0A02 ; XID_Continue # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; XID_Continue # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; XID_Continue # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; XID_Continue # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; XID_Continue # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA 0A2A..0A30 ; XID_Continue # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA 0A32..0A33 ; XID_Continue # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA 0A35..0A36 ; XID_Continue # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA 0A38..0A39 ; XID_Continue # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA 0A3C ; XID_Continue # Mn GURMUKHI SIGN NUKTA 0A3E..0A40 ; XID_Continue # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II 0A41..0A42 ; XID_Continue # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU 0A47..0A48 ; XID_Continue # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI 0A4B..0A4D ; XID_Continue # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA 0A51 ; XID_Continue # Mn GURMUKHI SIGN UDAAT 0A59..0A5C ; XID_Continue # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA 0A5E ; XID_Continue # Lo GURMUKHI LETTER FA 0A66..0A6F ; XID_Continue # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE 0A70..0A71 ; XID_Continue # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK 0A72..0A74 ; XID_Continue # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR 0A75 ; XID_Continue # Mn GURMUKHI SIGN YAKASH 0A81..0A82 ; XID_Continue # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA 0A83 ; XID_Continue # Mc GUJARATI SIGN VISARGA 0A85..0A8D ; XID_Continue # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E 0A8F..0A91 ; XID_Continue # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O 0A93..0AA8 ; XID_Continue # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA 0AAA..0AB0 ; XID_Continue # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA 0AB2..0AB3 ; XID_Continue # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA 0AB5..0AB9 ; XID_Continue # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA 0ABC ; XID_Continue # Mn GUJARATI SIGN NUKTA 0ABD ; XID_Continue # Lo GUJARATI SIGN AVAGRAHA 0ABE..0AC0 ; XID_Continue # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II 0AC1..0AC5 ; XID_Continue # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E 0AC7..0AC8 ; XID_Continue # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI 0AC9 ; XID_Continue # Mc GUJARATI VOWEL SIGN CANDRA O 0ACB..0ACC ; XID_Continue # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU 0ACD ; XID_Continue # Mn GUJARATI SIGN VIRAMA 0AD0 ; XID_Continue # Lo GUJARATI OM 0AE0..0AE1 ; XID_Continue # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL 0AE2..0AE3 ; XID_Continue # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL 0AE6..0AEF ; XID_Continue # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE 0AF9 ; XID_Continue # Lo GUJARATI LETTER ZHA 0B01 ; XID_Continue # Mn ORIYA SIGN CANDRABINDU 0B02..0B03 ; XID_Continue # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA 0B05..0B0C ; XID_Continue # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L 0B0F..0B10 ; XID_Continue # Lo [2] ORIYA LETTER E..ORIYA LETTER AI 0B13..0B28 ; XID_Continue # Lo [22] ORIYA LETTER O..ORIYA LETTER NA 0B2A..0B30 ; XID_Continue # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA 0B32..0B33 ; XID_Continue # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA 0B35..0B39 ; XID_Continue # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA 0B3C ; XID_Continue # Mn ORIYA SIGN NUKTA 0B3D ; XID_Continue # Lo ORIYA SIGN AVAGRAHA 0B3E ; XID_Continue # Mc ORIYA VOWEL SIGN AA 0B3F ; XID_Continue # Mn ORIYA VOWEL SIGN I 0B40 ; XID_Continue # Mc ORIYA VOWEL SIGN II 0B41..0B44 ; XID_Continue # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B47..0B48 ; XID_Continue # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; XID_Continue # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; XID_Continue # Mn ORIYA SIGN VIRAMA 0B56 ; XID_Continue # Mn ORIYA AI LENGTH MARK 0B57 ; XID_Continue # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; XID_Continue # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; XID_Continue # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL 0B62..0B63 ; XID_Continue # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B66..0B6F ; XID_Continue # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE 0B71 ; XID_Continue # Lo ORIYA LETTER WA 0B82 ; XID_Continue # Mn TAMIL SIGN ANUSVARA 0B83 ; XID_Continue # Lo TAMIL SIGN VISARGA 0B85..0B8A ; XID_Continue # Lo [6] TAMIL LETTER A..TAMIL LETTER UU 0B8E..0B90 ; XID_Continue # Lo [3] TAMIL LETTER E..TAMIL LETTER AI 0B92..0B95 ; XID_Continue # Lo [4] TAMIL LETTER O..TAMIL LETTER KA 0B99..0B9A ; XID_Continue # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA 0B9C ; XID_Continue # Lo TAMIL LETTER JA 0B9E..0B9F ; XID_Continue # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA 0BA3..0BA4 ; XID_Continue # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA 0BA8..0BAA ; XID_Continue # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA 0BAE..0BB9 ; XID_Continue # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA 0BBE..0BBF ; XID_Continue # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I 0BC0 ; XID_Continue # Mn TAMIL VOWEL SIGN II 0BC1..0BC2 ; XID_Continue # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU 0BC6..0BC8 ; XID_Continue # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI 0BCA..0BCC ; XID_Continue # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU 0BCD ; XID_Continue # Mn TAMIL SIGN VIRAMA 0BD0 ; XID_Continue # Lo TAMIL OM 0BD7 ; XID_Continue # Mc TAMIL AU LENGTH MARK 0BE6..0BEF ; XID_Continue # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE 0C00 ; XID_Continue # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE 0C01..0C03 ; XID_Continue # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA 0C05..0C0C ; XID_Continue # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L 0C0E..0C10 ; XID_Continue # Lo [3] TELUGU LETTER E..TELUGU LETTER AI 0C12..0C28 ; XID_Continue # Lo [23] TELUGU LETTER O..TELUGU LETTER NA 0C2A..0C39 ; XID_Continue # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; XID_Continue # Lo TELUGU SIGN AVAGRAHA 0C3E..0C40 ; XID_Continue # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II 0C41..0C44 ; XID_Continue # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR 0C46..0C48 ; XID_Continue # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI 0C4A..0C4D ; XID_Continue # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; XID_Continue # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; XID_Continue # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA 0C60..0C61 ; XID_Continue # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; XID_Continue # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; XID_Continue # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE 0C81 ; XID_Continue # Mn KANNADA SIGN CANDRABINDU 0C82..0C83 ; XID_Continue # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA 0C85..0C8C ; XID_Continue # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L 0C8E..0C90 ; XID_Continue # Lo [3] KANNADA LETTER E..KANNADA LETTER AI 0C92..0CA8 ; XID_Continue # Lo [23] KANNADA LETTER O..KANNADA LETTER NA 0CAA..0CB3 ; XID_Continue # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; XID_Continue # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBC ; XID_Continue # Mn KANNADA SIGN NUKTA 0CBD ; XID_Continue # Lo KANNADA SIGN AVAGRAHA 0CBE ; XID_Continue # Mc KANNADA VOWEL SIGN AA 0CBF ; XID_Continue # Mn KANNADA VOWEL SIGN I 0CC0..0CC4 ; XID_Continue # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR 0CC6 ; XID_Continue # Mn KANNADA VOWEL SIGN E 0CC7..0CC8 ; XID_Continue # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI 0CCA..0CCB ; XID_Continue # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; XID_Continue # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; XID_Continue # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK 0CDE ; XID_Continue # Lo KANNADA LETTER FA 0CE0..0CE1 ; XID_Continue # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; XID_Continue # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; XID_Continue # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE 0CF1..0CF2 ; XID_Continue # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D01 ; XID_Continue # Mn MALAYALAM SIGN CANDRABINDU 0D02..0D03 ; XID_Continue # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA 0D05..0D0C ; XID_Continue # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L 0D0E..0D10 ; XID_Continue # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI 0D12..0D3A ; XID_Continue # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA 0D3D ; XID_Continue # Lo MALAYALAM SIGN AVAGRAHA 0D3E..0D40 ; XID_Continue # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II 0D41..0D44 ; XID_Continue # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR 0D46..0D48 ; XID_Continue # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI 0D4A..0D4C ; XID_Continue # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU 0D4D ; XID_Continue # Mn MALAYALAM SIGN VIRAMA 0D4E ; XID_Continue # Lo MALAYALAM LETTER DOT REPH 0D57 ; XID_Continue # Mc MALAYALAM AU LENGTH MARK 0D5F..0D61 ; XID_Continue # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL 0D62..0D63 ; XID_Continue # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL 0D66..0D6F ; XID_Continue # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE 0D7A..0D7F ; XID_Continue # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K 0D82..0D83 ; XID_Continue # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA 0D85..0D96 ; XID_Continue # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA 0D9A..0DB1 ; XID_Continue # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA 0DB3..0DBB ; XID_Continue # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA 0DBD ; XID_Continue # Lo SINHALA LETTER DANTAJA LAYANNA 0DC0..0DC6 ; XID_Continue # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA 0DCA ; XID_Continue # Mn SINHALA SIGN AL-LAKUNA 0DCF..0DD1 ; XID_Continue # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA 0DD2..0DD4 ; XID_Continue # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA 0DD6 ; XID_Continue # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA 0DD8..0DDF ; XID_Continue # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA 0DE6..0DEF ; XID_Continue # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE 0DF2..0DF3 ; XID_Continue # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA 0E01..0E30 ; XID_Continue # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A 0E31 ; XID_Continue # Mn THAI CHARACTER MAI HAN-AKAT 0E32..0E33 ; XID_Continue # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM 0E34..0E3A ; XID_Continue # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU 0E40..0E45 ; XID_Continue # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO 0E46 ; XID_Continue # Lm THAI CHARACTER MAIYAMOK 0E47..0E4E ; XID_Continue # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN 0E50..0E59 ; XID_Continue # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE 0E81..0E82 ; XID_Continue # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG 0E84 ; XID_Continue # Lo LAO LETTER KHO TAM 0E87..0E88 ; XID_Continue # Lo [2] LAO LETTER NGO..LAO LETTER CO 0E8A ; XID_Continue # Lo LAO LETTER SO TAM 0E8D ; XID_Continue # Lo LAO LETTER NYO 0E94..0E97 ; XID_Continue # Lo [4] LAO LETTER DO..LAO LETTER THO TAM 0E99..0E9F ; XID_Continue # Lo [7] LAO LETTER NO..LAO LETTER FO SUNG 0EA1..0EA3 ; XID_Continue # Lo [3] LAO LETTER MO..LAO LETTER LO LING 0EA5 ; XID_Continue # Lo LAO LETTER LO LOOT 0EA7 ; XID_Continue # Lo LAO LETTER WO 0EAA..0EAB ; XID_Continue # Lo [2] LAO LETTER SO SUNG..LAO LETTER HO SUNG 0EAD..0EB0 ; XID_Continue # Lo [4] LAO LETTER O..LAO VOWEL SIGN A 0EB1 ; XID_Continue # Mn LAO VOWEL SIGN MAI KAN 0EB2..0EB3 ; XID_Continue # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM 0EB4..0EB9 ; XID_Continue # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU 0EBB..0EBC ; XID_Continue # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO 0EBD ; XID_Continue # Lo LAO SEMIVOWEL SIGN NYO 0EC0..0EC4 ; XID_Continue # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; XID_Continue # Lm LAO KO LA 0EC8..0ECD ; XID_Continue # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA 0ED0..0ED9 ; XID_Continue # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE 0EDC..0EDF ; XID_Continue # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; XID_Continue # Lo TIBETAN SYLLABLE OM 0F18..0F19 ; XID_Continue # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F20..0F29 ; XID_Continue # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE 0F35 ; XID_Continue # Mn TIBETAN MARK NGAS BZUNG NYI ZLA 0F37 ; XID_Continue # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS 0F39 ; XID_Continue # Mn TIBETAN MARK TSA -PHRU 0F3E..0F3F ; XID_Continue # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES 0F40..0F47 ; XID_Continue # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA 0F49..0F6C ; XID_Continue # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA 0F71..0F7E ; XID_Continue # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO 0F7F ; XID_Continue # Mc TIBETAN SIGN RNAM BCAD 0F80..0F84 ; XID_Continue # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA 0F86..0F87 ; XID_Continue # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS 0F88..0F8C ; XID_Continue # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN 0F8D..0F97 ; XID_Continue # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA 0F99..0FBC ; XID_Continue # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA 0FC6 ; XID_Continue # Mn TIBETAN SYMBOL PADMA GDAN 1000..102A ; XID_Continue # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU 102B..102C ; XID_Continue # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA 102D..1030 ; XID_Continue # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU 1031 ; XID_Continue # Mc MYANMAR VOWEL SIGN E 1032..1037 ; XID_Continue # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW 1038 ; XID_Continue # Mc MYANMAR SIGN VISARGA 1039..103A ; XID_Continue # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT 103B..103C ; XID_Continue # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA 103D..103E ; XID_Continue # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA 103F ; XID_Continue # Lo MYANMAR LETTER GREAT SA 1040..1049 ; XID_Continue # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE 1050..1055 ; XID_Continue # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL 1056..1057 ; XID_Continue # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR 1058..1059 ; XID_Continue # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL 105A..105D ; XID_Continue # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE 105E..1060 ; XID_Continue # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA 1061 ; XID_Continue # Lo MYANMAR LETTER SGAW KAREN SHA 1062..1064 ; XID_Continue # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO 1065..1066 ; XID_Continue # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA 1067..106D ; XID_Continue # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 106E..1070 ; XID_Continue # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA 1071..1074 ; XID_Continue # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE 1075..1081 ; XID_Continue # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA 1082 ; XID_Continue # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA 1083..1084 ; XID_Continue # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E 1085..1086 ; XID_Continue # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y 1087..108C ; XID_Continue # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 108D ; XID_Continue # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE 108E ; XID_Continue # Lo MYANMAR LETTER RUMAI PALAUNG FA 108F ; XID_Continue # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 1090..1099 ; XID_Continue # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE 109A..109C ; XID_Continue # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A 109D ; XID_Continue # Mn MYANMAR VOWEL SIGN AITON AI 10A0..10C5 ; XID_Continue # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE 10C7 ; XID_Continue # L& GEORGIAN CAPITAL LETTER YN 10CD ; XID_Continue # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; XID_Continue # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; XID_Continue # Lm MODIFIER LETTER GEORGIAN NAR 10FD..1248 ; XID_Continue # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; XID_Continue # Lo ETHIOPIC SYLLABLE QHWA 125A..125D ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE 1260..1288 ; XID_Continue # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA 128A..128D ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE 1290..12B0 ; XID_Continue # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA 12B2..12B5 ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE 12B8..12BE ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO 12C0 ; XID_Continue # Lo ETHIOPIC SYLLABLE KXWA 12C2..12C5 ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE 12C8..12D6 ; XID_Continue # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O 12D8..1310 ; XID_Continue # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA 1312..1315 ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE 1318..135A ; XID_Continue # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA 135D..135F ; XID_Continue # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK 1369..1371 ; XID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE 1380..138F ; XID_Continue # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE 13A0..13F5 ; XID_Continue # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV 13F8..13FD ; XID_Continue # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV 1401..166C ; XID_Continue # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA 166F..167F ; XID_Continue # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W 1681..169A ; XID_Continue # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH 16A0..16EA ; XID_Continue # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X 16EE..16F0 ; XID_Continue # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL 16F1..16F8 ; XID_Continue # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC 1700..170C ; XID_Continue # Lo [13] TAGALOG LETTER A..TAGALOG LETTER YA 170E..1711 ; XID_Continue # Lo [4] TAGALOG LETTER LA..TAGALOG LETTER HA 1712..1714 ; XID_Continue # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA 1720..1731 ; XID_Continue # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA 1732..1734 ; XID_Continue # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1740..1751 ; XID_Continue # Lo [18] BUHID LETTER A..BUHID LETTER HA 1752..1753 ; XID_Continue # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1760..176C ; XID_Continue # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA 176E..1770 ; XID_Continue # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA 1772..1773 ; XID_Continue # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U 1780..17B3 ; XID_Continue # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU 17B4..17B5 ; XID_Continue # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; XID_Continue # Mc KHMER VOWEL SIGN AA 17B7..17BD ; XID_Continue # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; XID_Continue # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU 17C6 ; XID_Continue # Mn KHMER SIGN NIKAHIT 17C7..17C8 ; XID_Continue # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU 17C9..17D3 ; XID_Continue # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT 17D7 ; XID_Continue # Lm KHMER SIGN LEK TOO 17DC ; XID_Continue # Lo KHMER SIGN AVAKRAHASANYA 17DD ; XID_Continue # Mn KHMER SIGN ATTHACAN 17E0..17E9 ; XID_Continue # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE 180B..180D ; XID_Continue # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE 1810..1819 ; XID_Continue # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE 1820..1842 ; XID_Continue # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI 1843 ; XID_Continue # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN 1844..1877 ; XID_Continue # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA 1880..18A8 ; XID_Continue # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA 18A9 ; XID_Continue # Mn MONGOLIAN LETTER ALI GALI DAGALGA 18AA ; XID_Continue # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA 18B0..18F5 ; XID_Continue # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S 1900..191E ; XID_Continue # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA 1920..1922 ; XID_Continue # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U 1923..1926 ; XID_Continue # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU 1927..1928 ; XID_Continue # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O 1929..192B ; XID_Continue # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA 1930..1931 ; XID_Continue # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA 1932 ; XID_Continue # Mn LIMBU SMALL LETTER ANUSVARA 1933..1938 ; XID_Continue # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA 1939..193B ; XID_Continue # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I 1946..194F ; XID_Continue # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE 1950..196D ; XID_Continue # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI 1970..1974 ; XID_Continue # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 1980..19AB ; XID_Continue # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA 19B0..19C9 ; XID_Continue # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 19D0..19D9 ; XID_Continue # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE 19DA ; XID_Continue # No NEW TAI LUE THAM DIGIT ONE 1A00..1A16 ; XID_Continue # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA 1A17..1A18 ; XID_Continue # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U 1A19..1A1A ; XID_Continue # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O 1A1B ; XID_Continue # Mn BUGINESE VOWEL SIGN AE 1A20..1A54 ; XID_Continue # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA 1A55 ; XID_Continue # Mc TAI THAM CONSONANT SIGN MEDIAL RA 1A56 ; XID_Continue # Mn TAI THAM CONSONANT SIGN MEDIAL LA 1A57 ; XID_Continue # Mc TAI THAM CONSONANT SIGN LA TANG LAI 1A58..1A5E ; XID_Continue # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA 1A60 ; XID_Continue # Mn TAI THAM SIGN SAKOT 1A61 ; XID_Continue # Mc TAI THAM VOWEL SIGN A 1A62 ; XID_Continue # Mn TAI THAM VOWEL SIGN MAI SAT 1A63..1A64 ; XID_Continue # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA 1A65..1A6C ; XID_Continue # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW 1A6D..1A72 ; XID_Continue # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI 1A73..1A7C ; XID_Continue # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN 1A7F ; XID_Continue # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1A80..1A89 ; XID_Continue # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE 1A90..1A99 ; XID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE 1AA7 ; XID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; XID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1B00..1B03 ; XID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; XID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; XID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA 1B34 ; XID_Continue # Mn BALINESE SIGN REREKAN 1B35 ; XID_Continue # Mc BALINESE VOWEL SIGN TEDUNG 1B36..1B3A ; XID_Continue # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA 1B3B ; XID_Continue # Mc BALINESE VOWEL SIGN RA REPA TEDUNG 1B3C ; XID_Continue # Mn BALINESE VOWEL SIGN LA LENGA 1B3D..1B41 ; XID_Continue # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG 1B42 ; XID_Continue # Mn BALINESE VOWEL SIGN PEPET 1B43..1B44 ; XID_Continue # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG 1B45..1B4B ; XID_Continue # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK 1B50..1B59 ; XID_Continue # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE 1B6B..1B73 ; XID_Continue # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG 1B80..1B81 ; XID_Continue # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR 1B82 ; XID_Continue # Mc SUNDANESE SIGN PANGWISAD 1B83..1BA0 ; XID_Continue # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA 1BA1 ; XID_Continue # Mc SUNDANESE CONSONANT SIGN PAMINGKAL 1BA2..1BA5 ; XID_Continue # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA6..1BA7 ; XID_Continue # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; XID_Continue # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG 1BAA ; XID_Continue # Mc SUNDANESE SIGN PAMAAEH 1BAB..1BAD ; XID_Continue # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; XID_Continue # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BB0..1BB9 ; XID_Continue # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE 1BBA..1BE5 ; XID_Continue # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BE6 ; XID_Continue # Mn BATAK SIGN TOMPI 1BE7 ; XID_Continue # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; XID_Continue # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BEA..1BEC ; XID_Continue # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O 1BED ; XID_Continue # Mn BATAK VOWEL SIGN KARO O 1BEE ; XID_Continue # Mc BATAK VOWEL SIGN U 1BEF..1BF1 ; XID_Continue # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H 1BF2..1BF3 ; XID_Continue # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN 1C00..1C23 ; XID_Continue # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A 1C24..1C2B ; XID_Continue # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU 1C2C..1C33 ; XID_Continue # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T 1C34..1C35 ; XID_Continue # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG 1C36..1C37 ; XID_Continue # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA 1C40..1C49 ; XID_Continue # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE 1C4D..1C4F ; XID_Continue # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C50..1C59 ; XID_Continue # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE 1C5A..1C77 ; XID_Continue # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; XID_Continue # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CD0..1CD2 ; XID_Continue # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA 1CD4..1CE0 ; XID_Continue # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE1 ; XID_Continue # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CE2..1CE8 ; XID_Continue # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CE9..1CEC ; XID_Continue # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CED ; XID_Continue # Mn VEDIC SIGN TIRYAK 1CEE..1CF1 ; XID_Continue # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA 1CF2..1CF3 ; XID_Continue # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA 1CF4 ; XID_Continue # Mn VEDIC TONE CANDRA ABOVE 1CF5..1CF6 ; XID_Continue # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1CF8..1CF9 ; XID_Continue # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE 1D00..1D2B ; XID_Continue # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL 1D2C..1D6A ; XID_Continue # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1D6B..1D77 ; XID_Continue # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; XID_Continue # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; XID_Continue # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; XID_Continue # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA 1DC0..1DF5 ; XID_Continue # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE 1DFC..1DFF ; XID_Continue # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 1E00..1F15 ; XID_Continue # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA 1F18..1F1D ; XID_Continue # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA 1F20..1F45 ; XID_Continue # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA 1F48..1F4D ; XID_Continue # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA 1F50..1F57 ; XID_Continue # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI 1F59 ; XID_Continue # L& GREEK CAPITAL LETTER UPSILON WITH DASIA 1F5B ; XID_Continue # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA 1F5D ; XID_Continue # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA 1F5F..1F7D ; XID_Continue # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA 1F80..1FB4 ; XID_Continue # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI 1FB6..1FBC ; XID_Continue # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI 1FBE ; XID_Continue # L& GREEK PROSGEGRAMMENI 1FC2..1FC4 ; XID_Continue # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI 1FC6..1FCC ; XID_Continue # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 1FD0..1FD3 ; XID_Continue # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 1FD6..1FDB ; XID_Continue # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA 1FE0..1FEC ; XID_Continue # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA 1FF2..1FF4 ; XID_Continue # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 1FF6..1FFC ; XID_Continue # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 203F..2040 ; XID_Continue # Pc [2] UNDERTIE..CHARACTER TIE 2054 ; XID_Continue # Pc INVERTED UNDERTIE 2071 ; XID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; XID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER N 2090..209C ; XID_Continue # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 20D0..20DC ; XID_Continue # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20E1 ; XID_Continue # Mn COMBINING LEFT RIGHT ARROW ABOVE 20E5..20F0 ; XID_Continue # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE 2102 ; XID_Continue # L& DOUBLE-STRUCK CAPITAL C 2107 ; XID_Continue # L& EULER CONSTANT 210A..2113 ; XID_Continue # L& [10] SCRIPT SMALL G..SCRIPT SMALL L 2115 ; XID_Continue # L& DOUBLE-STRUCK CAPITAL N 2118 ; XID_Continue # Sm SCRIPT CAPITAL P 2119..211D ; XID_Continue # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R 2124 ; XID_Continue # L& DOUBLE-STRUCK CAPITAL Z 2126 ; XID_Continue # L& OHM SIGN 2128 ; XID_Continue # L& BLACK-LETTER CAPITAL Z 212A..212D ; XID_Continue # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C 212E ; XID_Continue # So ESTIMATED SYMBOL 212F..2134 ; XID_Continue # L& [6] SCRIPT SMALL E..SCRIPT SMALL O 2135..2138 ; XID_Continue # Lo [4] ALEF SYMBOL..DALET SYMBOL 2139 ; XID_Continue # L& INFORMATION SOURCE 213C..213F ; XID_Continue # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI 2145..2149 ; XID_Continue # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J 214E ; XID_Continue # L& TURNED SMALL F 2160..2182 ; XID_Continue # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND 2183..2184 ; XID_Continue # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C 2185..2188 ; XID_Continue # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND 2C00..2C2E ; XID_Continue # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; XID_Continue # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE 2C60..2C7B ; XID_Continue # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E 2C7C..2C7D ; XID_Continue # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; XID_Continue # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; XID_Continue # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA 2CEF..2CF1 ; XID_Continue # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2CF2..2CF3 ; XID_Continue # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; XID_Continue # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE 2D27 ; XID_Continue # L& GEORGIAN SMALL LETTER YN 2D2D ; XID_Continue # L& GEORGIAN SMALL LETTER AEN 2D30..2D67 ; XID_Continue # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; XID_Continue # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D7F ; XID_Continue # Mn TIFINAGH CONSONANT JOINER 2D80..2D96 ; XID_Continue # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO 2DA8..2DAE ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO 2DB0..2DB6 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO 2DB8..2DBE ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO 2DC0..2DC6 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO 2DC8..2DCE ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO 2DD0..2DD6 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO 2DD8..2DDE ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO 2DE0..2DFF ; XID_Continue # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS 3005 ; XID_Continue # Lm IDEOGRAPHIC ITERATION MARK 3006 ; XID_Continue # Lo IDEOGRAPHIC CLOSING MARK 3007 ; XID_Continue # Nl IDEOGRAPHIC NUMBER ZERO 3021..3029 ; XID_Continue # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE 302A..302D ; XID_Continue # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK 302E..302F ; XID_Continue # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3031..3035 ; XID_Continue # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF 3038..303A ; XID_Continue # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 303B ; XID_Continue # Lm VERTICAL IDEOGRAPHIC ITERATION MARK 303C ; XID_Continue # Lo MASU MARK 3041..3096 ; XID_Continue # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE 3099..309A ; XID_Continue # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 309D..309E ; XID_Continue # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK 309F ; XID_Continue # Lo HIRAGANA DIGRAPH YORI 30A1..30FA ; XID_Continue # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO 30FC..30FE ; XID_Continue # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK 30FF ; XID_Continue # Lo KATAKANA DIGRAPH KOTO 3105..312D ; XID_Continue # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH 3131..318E ; XID_Continue # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE 31A0..31BA ; XID_Continue # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; XID_Continue # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; XID_Continue # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 4E00..9FD5 ; XID_Continue # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5 A000..A014 ; XID_Continue # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; XID_Continue # Lm YI SYLLABLE WU A016..A48C ; XID_Continue # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR A4D0..A4F7 ; XID_Continue # Lo [40] LISU LETTER BA..LISU LETTER OE A4F8..A4FD ; XID_Continue # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU A500..A60B ; XID_Continue # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG A60C ; XID_Continue # Lm VAI SYLLABLE LENGTHENER A610..A61F ; XID_Continue # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG A620..A629 ; XID_Continue # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE A62A..A62B ; XID_Continue # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO A640..A66D ; XID_Continue # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O A66E ; XID_Continue # Lo CYRILLIC LETTER MULTIOCULAR O A66F ; XID_Continue # Mn COMBINING CYRILLIC VZMET A674..A67D ; XID_Continue # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK A67F ; XID_Continue # Lm CYRILLIC PAYEROK A680..A69B ; XID_Continue # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O A69C..A69D ; XID_Continue # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN A69E..A69F ; XID_Continue # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E A6A0..A6E5 ; XID_Continue # Lo [70] BAMUM LETTER A..BAMUM LETTER KI A6E6..A6EF ; XID_Continue # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM A6F0..A6F1 ; XID_Continue # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A717..A71F ; XID_Continue # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK A722..A76F ; XID_Continue # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON A770 ; XID_Continue # Lm MODIFIER LETTER US A771..A787 ; XID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; XID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; XID_Continue # Lo LATIN LETTER SINOLOGICAL DOT A790..A7AD ; XID_Continue # L& [30] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER L WITH BELT A7B0..A7B7 ; XID_Continue # L& [8] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER OMEGA A7F7 ; XID_Continue # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; XID_Continue # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; XID_Continue # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; XID_Continue # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A802 ; XID_Continue # Mn SYLOTI NAGRI SIGN DVISVARA A803..A805 ; XID_Continue # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O A806 ; XID_Continue # Mn SYLOTI NAGRI SIGN HASANTA A807..A80A ; XID_Continue # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO A80B ; XID_Continue # Mn SYLOTI NAGRI SIGN ANUSVARA A80C..A822 ; XID_Continue # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO A823..A824 ; XID_Continue # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I A825..A826 ; XID_Continue # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E A827 ; XID_Continue # Mc SYLOTI NAGRI VOWEL SIGN OO A840..A873 ; XID_Continue # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU A880..A881 ; XID_Continue # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA A882..A8B3 ; XID_Continue # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA A8B4..A8C3 ; XID_Continue # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU A8C4 ; XID_Continue # Mn SAURASHTRA SIGN VIRAMA A8D0..A8D9 ; XID_Continue # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE A8E0..A8F1 ; XID_Continue # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA A8F2..A8F7 ; XID_Continue # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA A8FB ; XID_Continue # Lo DEVANAGARI HEADSTROKE A8FD ; XID_Continue # Lo DEVANAGARI JAIN OM A900..A909 ; XID_Continue # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE A90A..A925 ; XID_Continue # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO A926..A92D ; XID_Continue # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU A930..A946 ; XID_Continue # Lo [23] REJANG LETTER KA..REJANG LETTER A A947..A951 ; XID_Continue # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R A952..A953 ; XID_Continue # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA A960..A97C ; XID_Continue # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH A980..A982 ; XID_Continue # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR A983 ; XID_Continue # Mc JAVANESE SIGN WIGNYAN A984..A9B2 ; XID_Continue # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA A9B3 ; XID_Continue # Mn JAVANESE SIGN CECAK TELU A9B4..A9B5 ; XID_Continue # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG A9B6..A9B9 ; XID_Continue # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT A9BA..A9BB ; XID_Continue # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE A9BC ; XID_Continue # Mn JAVANESE VOWEL SIGN PEPET A9BD..A9C0 ; XID_Continue # Mc [4] JAVANESE CONSONANT SIGN KERET..JAVANESE PANGKON A9CF ; XID_Continue # Lm JAVANESE PANGRANGKEP A9D0..A9D9 ; XID_Continue # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE A9E0..A9E4 ; XID_Continue # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA A9E5 ; XID_Continue # Mn MYANMAR SIGN SHAN SAW A9E6 ; XID_Continue # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION A9E7..A9EF ; XID_Continue # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA A9F0..A9F9 ; XID_Continue # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE A9FA..A9FE ; XID_Continue # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA AA00..AA28 ; XID_Continue # Lo [41] CHAM LETTER A..CHAM LETTER HA AA29..AA2E ; XID_Continue # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE AA2F..AA30 ; XID_Continue # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI AA31..AA32 ; XID_Continue # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE AA33..AA34 ; XID_Continue # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA AA35..AA36 ; XID_Continue # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA AA40..AA42 ; XID_Continue # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG AA43 ; XID_Continue # Mn CHAM CONSONANT SIGN FINAL NG AA44..AA4B ; XID_Continue # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS AA4C ; XID_Continue # Mn CHAM CONSONANT SIGN FINAL M AA4D ; XID_Continue # Mc CHAM CONSONANT SIGN FINAL H AA50..AA59 ; XID_Continue # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE AA60..AA6F ; XID_Continue # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA AA70 ; XID_Continue # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION AA71..AA76 ; XID_Continue # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM AA7A ; XID_Continue # Lo MYANMAR LETTER AITON RA AA7B ; XID_Continue # Mc MYANMAR SIGN PAO KAREN TONE AA7C ; XID_Continue # Mn MYANMAR SIGN TAI LAING TONE-2 AA7D ; XID_Continue # Mc MYANMAR SIGN TAI LAING TONE-5 AA7E..AAAF ; XID_Continue # Lo [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O AAB0 ; XID_Continue # Mn TAI VIET MAI KANG AAB1 ; XID_Continue # Lo TAI VIET VOWEL AA AAB2..AAB4 ; XID_Continue # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U AAB5..AAB6 ; XID_Continue # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O AAB7..AAB8 ; XID_Continue # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AAB9..AABD ; XID_Continue # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN AABE..AABF ; XID_Continue # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC0 ; XID_Continue # Lo TAI VIET TONE MAI NUENG AAC1 ; XID_Continue # Mn TAI VIET TONE MAI THO AAC2 ; XID_Continue # Lo TAI VIET TONE MAI SONG AADB..AADC ; XID_Continue # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; XID_Continue # Lm TAI VIET SYMBOL SAM AAE0..AAEA ; XID_Continue # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA AAEB ; XID_Continue # Mc MEETEI MAYEK VOWEL SIGN II AAEC..AAED ; XID_Continue # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI AAEE..AAEF ; XID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU AAF2 ; XID_Continue # Lo MEETEI MAYEK ANJI AAF3..AAF4 ; XID_Continue # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK AAF5 ; XID_Continue # Mc MEETEI MAYEK VOWEL SIGN VISARGA AAF6 ; XID_Continue # Mn MEETEI MAYEK VIRAMA AB01..AB06 ; XID_Continue # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; XID_Continue # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; XID_Continue # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO AB20..AB26 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO AB28..AB2E ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO AB30..AB5A ; XID_Continue # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG AB5C..AB5F ; XID_Continue # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB60..AB65 ; XID_Continue # L& [6] LATIN SMALL LETTER SAKHA YAT..GREEK LETTER SMALL CAPITAL OMEGA AB70..ABBF ; XID_Continue # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA ABC0..ABE2 ; XID_Continue # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM ABE3..ABE4 ; XID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE5 ; XID_Continue # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE6..ABE7 ; XID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP ABE8 ; XID_Continue # Mn MEETEI MAYEK VOWEL SIGN UNAP ABE9..ABEA ; XID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG ABEC ; XID_Continue # Mc MEETEI MAYEK LUM IYEK ABED ; XID_Continue # Mn MEETEI MAYEK APUN IYEK ABF0..ABF9 ; XID_Continue # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE AC00..D7A3 ; XID_Continue # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; XID_Continue # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; XID_Continue # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH F900..FA6D ; XID_Continue # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; XID_Continue # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; XID_Continue # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; XID_Continue # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH FB1D ; XID_Continue # Lo HEBREW LETTER YOD WITH HIRIQ FB1E ; XID_Continue # Mn HEBREW POINT JUDEO-SPANISH VARIKA FB1F..FB28 ; XID_Continue # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV FB2A..FB36 ; XID_Continue # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH FB38..FB3C ; XID_Continue # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH FB3E ; XID_Continue # Lo HEBREW LETTER MEM WITH DAGESH FB40..FB41 ; XID_Continue # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH FB43..FB44 ; XID_Continue # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH FB46..FBB1 ; XID_Continue # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBD3..FC5D ; XID_Continue # Lo [139] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF MAKSURA WITH SUPERSCRIPT ALEF ISOLATED FORM FC64..FD3D ; XID_Continue # Lo [218] ARABIC LIGATURE YEH WITH HAMZA ABOVE WITH REH FINAL FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD50..FD8F ; XID_Continue # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM FD92..FDC7 ; XID_Continue # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM FDF0..FDF9 ; XID_Continue # Lo [10] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE SALLA ISOLATED FORM FE00..FE0F ; XID_Continue # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 FE20..FE2F ; XID_Continue # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF FE33..FE34 ; XID_Continue # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE FE4D..FE4F ; XID_Continue # Pc [3] DASHED LOW LINE..WAVY LOW LINE FE71 ; XID_Continue # Lo ARABIC TATWEEL WITH FATHATAN ABOVE FE73 ; XID_Continue # Lo ARABIC TAIL FRAGMENT FE77 ; XID_Continue # Lo ARABIC FATHA MEDIAL FORM FE79 ; XID_Continue # Lo ARABIC DAMMA MEDIAL FORM FE7B ; XID_Continue # Lo ARABIC KASRA MEDIAL FORM FE7D ; XID_Continue # Lo ARABIC SHADDA MEDIAL FORM FE7F..FEFC ; XID_Continue # Lo [126] ARABIC SUKUN MEDIAL FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM FF10..FF19 ; XID_Continue # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE FF21..FF3A ; XID_Continue # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z FF3F ; XID_Continue # Pc FULLWIDTH LOW LINE FF41..FF5A ; XID_Continue # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z FF66..FF6F ; XID_Continue # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU FF70 ; XID_Continue # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK FF71..FF9D ; XID_Continue # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N FF9E..FF9F ; XID_Continue # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK FFA0..FFBE ; XID_Continue # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH FFC2..FFC7 ; XID_Continue # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E FFCA..FFCF ; XID_Continue # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE FFD2..FFD7 ; XID_Continue # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 10000..1000B ; XID_Continue # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE 1000D..10026 ; XID_Continue # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO 10028..1003A ; XID_Continue # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO 1003C..1003D ; XID_Continue # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE 1003F..1004D ; XID_Continue # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO 10050..1005D ; XID_Continue # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 10080..100FA ; XID_Continue # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 10140..10174 ; XID_Continue # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS 101FD ; XID_Continue # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE 10280..1029C ; XID_Continue # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X 102A0..102D0 ; XID_Continue # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 102E0 ; XID_Continue # Mn COPTIC EPACT THOUSANDS MARK 10300..1031F ; XID_Continue # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS 10330..10340 ; XID_Continue # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA 10341 ; XID_Continue # Nl GOTHIC LETTER NINETY 10342..10349 ; XID_Continue # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL 1034A ; XID_Continue # Nl GOTHIC LETTER NINE HUNDRED 10350..10375 ; XID_Continue # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA 10376..1037A ; XID_Continue # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII 10380..1039D ; XID_Continue # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU 103A0..103C3 ; XID_Continue # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA 103C8..103CF ; XID_Continue # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH 103D1..103D5 ; XID_Continue # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED 10400..1044F ; XID_Continue # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW 10450..1049D ; XID_Continue # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO 104A0..104A9 ; XID_Continue # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 10500..10527 ; XID_Continue # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE 10530..10563 ; XID_Continue # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW 10600..10736 ; XID_Continue # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; XID_Continue # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; XID_Continue # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 10800..10805 ; XID_Continue # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 10808 ; XID_Continue # Lo CYPRIOT SYLLABLE JO 1080A..10835 ; XID_Continue # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO 10837..10838 ; XID_Continue # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE 1083C ; XID_Continue # Lo CYPRIOT SYLLABLE ZA 1083F..10855 ; XID_Continue # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10860..10876 ; XID_Continue # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW 10880..1089E ; XID_Continue # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW 108E0..108F2 ; XID_Continue # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH 108F4..108F5 ; XID_Continue # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; XID_Continue # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; XID_Continue # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 10980..109B7 ; XID_Continue # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; XID_Continue # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; XID_Continue # Lo KHAROSHTHI LETTER A 10A01..10A03 ; XID_Continue # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R 10A05..10A06 ; XID_Continue # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O 10A0C..10A0F ; XID_Continue # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA 10A10..10A13 ; XID_Continue # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; XID_Continue # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA 10A19..10A33 ; XID_Continue # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA 10A38..10A3A ; XID_Continue # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW 10A3F ; XID_Continue # Mn KHAROSHTHI VIRAMA 10A60..10A7C ; XID_Continue # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH 10A80..10A9C ; XID_Continue # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH 10AC0..10AC7 ; XID_Continue # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW 10AC9..10AE4 ; XID_Continue # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW 10AE5..10AE6 ; XID_Continue # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10B00..10B35 ; XID_Continue # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE 10B40..10B55 ; XID_Continue # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW 10B60..10B72 ; XID_Continue # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW 10B80..10B91 ; XID_Continue # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW 10C00..10C48 ; XID_Continue # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 10C80..10CB2 ; XID_Continue # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 10CC0..10CF2 ; XID_Continue # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 11000 ; XID_Continue # Mc BRAHMI SIGN CANDRABINDU 11001 ; XID_Continue # Mn BRAHMI SIGN ANUSVARA 11002 ; XID_Continue # Mc BRAHMI SIGN VISARGA 11003..11037 ; XID_Continue # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA 11038..11046 ; XID_Continue # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA 11066..1106F ; XID_Continue # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE 1107F..11081 ; XID_Continue # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA 11082 ; XID_Continue # Mc KAITHI SIGN VISARGA 11083..110AF ; XID_Continue # Lo [45] KAITHI LETTER A..KAITHI LETTER HA 110B0..110B2 ; XID_Continue # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II 110B3..110B6 ; XID_Continue # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; XID_Continue # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU 110B9..110BA ; XID_Continue # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA 110D0..110E8 ; XID_Continue # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE 110F0..110F9 ; XID_Continue # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE 11100..11102 ; XID_Continue # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA 11103..11126 ; XID_Continue # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA 11127..1112B ; XID_Continue # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU 1112C ; XID_Continue # Mc CHAKMA VOWEL SIGN E 1112D..11134 ; XID_Continue # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA 11136..1113F ; XID_Continue # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE 11150..11172 ; XID_Continue # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA 11173 ; XID_Continue # Mn MAHAJANI SIGN NUKTA 11176 ; XID_Continue # Lo MAHAJANI LIGATURE SHRI 11180..11181 ; XID_Continue # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA 11182 ; XID_Continue # Mc SHARADA SIGN VISARGA 11183..111B2 ; XID_Continue # Lo [48] SHARADA LETTER A..SHARADA LETTER HA 111B3..111B5 ; XID_Continue # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II 111B6..111BE ; XID_Continue # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O 111BF..111C0 ; XID_Continue # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA 111C1..111C4 ; XID_Continue # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM 111CA..111CC ; XID_Continue # Mn [3] SHARADA SIGN NUKTA..SHARADA EXTRA SHORT VOWEL MARK 111D0..111D9 ; XID_Continue # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE 111DA ; XID_Continue # Lo SHARADA EKAM 111DC ; XID_Continue # Lo SHARADA HEADSTROKE 11200..11211 ; XID_Continue # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA 11213..1122B ; XID_Continue # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA 1122C..1122E ; XID_Continue # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II 1122F..11231 ; XID_Continue # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI 11232..11233 ; XID_Continue # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU 11234 ; XID_Continue # Mn KHOJKI SIGN ANUSVARA 11235 ; XID_Continue # Mc KHOJKI SIGN VIRAMA 11236..11237 ; XID_Continue # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA 11280..11286 ; XID_Continue # Lo [7] MULTANI LETTER A..MULTANI LETTER GA 11288 ; XID_Continue # Lo MULTANI LETTER GHA 1128A..1128D ; XID_Continue # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA 1128F..1129D ; XID_Continue # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA 1129F..112A8 ; XID_Continue # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA 112B0..112DE ; XID_Continue # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA 112DF ; XID_Continue # Mn KHUDAWADI SIGN ANUSVARA 112E0..112E2 ; XID_Continue # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II 112E3..112EA ; XID_Continue # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA 112F0..112F9 ; XID_Continue # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE 11300..11301 ; XID_Continue # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU 11302..11303 ; XID_Continue # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA 11305..1130C ; XID_Continue # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L 1130F..11310 ; XID_Continue # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI 11313..11328 ; XID_Continue # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA 1132A..11330 ; XID_Continue # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA 11332..11333 ; XID_Continue # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA 11335..11339 ; XID_Continue # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA 1133C ; XID_Continue # Mn GRANTHA SIGN NUKTA 1133D ; XID_Continue # Lo GRANTHA SIGN AVAGRAHA 1133E..1133F ; XID_Continue # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I 11340 ; XID_Continue # Mn GRANTHA VOWEL SIGN II 11341..11344 ; XID_Continue # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR 11347..11348 ; XID_Continue # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI 1134B..1134D ; XID_Continue # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA 11350 ; XID_Continue # Lo GRANTHA OM 11357 ; XID_Continue # Mc GRANTHA AU LENGTH MARK 1135D..11361 ; XID_Continue # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL 11362..11363 ; XID_Continue # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL 11366..1136C ; XID_Continue # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; XID_Continue # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA 11480..114AF ; XID_Continue # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA 114B0..114B2 ; XID_Continue # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II 114B3..114B8 ; XID_Continue # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL 114B9 ; XID_Continue # Mc TIRHUTA VOWEL SIGN E 114BA ; XID_Continue # Mn TIRHUTA VOWEL SIGN SHORT E 114BB..114BE ; XID_Continue # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU 114BF..114C0 ; XID_Continue # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA 114C1 ; XID_Continue # Mc TIRHUTA SIGN VISARGA 114C2..114C3 ; XID_Continue # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA 114C4..114C5 ; XID_Continue # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG 114C7 ; XID_Continue # Lo TIRHUTA OM 114D0..114D9 ; XID_Continue # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE 11580..115AE ; XID_Continue # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA 115AF..115B1 ; XID_Continue # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II 115B2..115B5 ; XID_Continue # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR 115B8..115BB ; XID_Continue # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU 115BC..115BD ; XID_Continue # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA 115BE ; XID_Continue # Mc SIDDHAM SIGN VISARGA 115BF..115C0 ; XID_Continue # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA 115D8..115DB ; XID_Continue # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U 115DC..115DD ; XID_Continue # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU 11600..1162F ; XID_Continue # Lo [48] MODI LETTER A..MODI LETTER LLA 11630..11632 ; XID_Continue # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II 11633..1163A ; XID_Continue # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI 1163B..1163C ; XID_Continue # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU 1163D ; XID_Continue # Mn MODI SIGN ANUSVARA 1163E ; XID_Continue # Mc MODI SIGN VISARGA 1163F..11640 ; XID_Continue # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA 11644 ; XID_Continue # Lo MODI SIGN HUVA 11650..11659 ; XID_Continue # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE 11680..116AA ; XID_Continue # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 116AB ; XID_Continue # Mn TAKRI SIGN ANUSVARA 116AC ; XID_Continue # Mc TAKRI SIGN VISARGA 116AD ; XID_Continue # Mn TAKRI VOWEL SIGN AA 116AE..116AF ; XID_Continue # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II 116B0..116B5 ; XID_Continue # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU 116B6 ; XID_Continue # Mc TAKRI SIGN VIRAMA 116B7 ; XID_Continue # Mn TAKRI SIGN NUKTA 116C0..116C9 ; XID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 11700..11719 ; XID_Continue # Lo [26] AHOM LETTER KA..AHOM LETTER JHA 1171D..1171F ; XID_Continue # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; XID_Continue # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; XID_Continue # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; XID_Continue # Mc AHOM VOWEL SIGN E 11727..1172B ; XID_Continue # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER 11730..11739 ; XID_Continue # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE 118A0..118DF ; XID_Continue # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 118E0..118E9 ; XID_Continue # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE 118FF ; XID_Continue # Lo WARANG CITI OM 11AC0..11AF8 ; XID_Continue # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL 12000..12399 ; XID_Continue # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E ; XID_Continue # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM 12480..12543 ; XID_Continue # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU 13000..1342E ; XID_Continue # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 14400..14646 ; XID_Continue # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 16800..16A38 ; XID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; XID_Continue # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A60..16A69 ; XID_Continue # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE 16AD0..16AED ; XID_Continue # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I 16AF0..16AF4 ; XID_Continue # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B00..16B2F ; XID_Continue # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU 16B30..16B36 ; XID_Continue # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16B40..16B43 ; XID_Continue # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM 16B50..16B59 ; XID_Continue # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 16B63..16B77 ; XID_Continue # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS 16B7D..16B8F ; XID_Continue # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ 16F00..16F44 ; XID_Continue # Lo [69] MIAO LETTER PA..MIAO LETTER HHA 16F50 ; XID_Continue # Lo MIAO LETTER NASALIZATION 16F51..16F7E ; XID_Continue # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG 16F8F..16F92 ; XID_Continue # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F ; XID_Continue # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; XID_Continue # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1BC00..1BC6A ; XID_Continue # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; XID_Continue # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; XID_Continue # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL 1BC90..1BC99 ; XID_Continue # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW 1BC9D..1BC9E ; XID_Continue # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK 1D165..1D166 ; XID_Continue # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D167..1D169 ; XID_Continue # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16D..1D172 ; XID_Continue # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 1D17B..1D182 ; XID_Continue # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE 1D185..1D18B ; XID_Continue # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD ; XID_Continue # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO 1D242..1D244 ; XID_Continue # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME 1D400..1D454 ; XID_Continue # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; XID_Continue # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A 1D49E..1D49F ; XID_Continue # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D 1D4A2 ; XID_Continue # L& MATHEMATICAL SCRIPT CAPITAL G 1D4A5..1D4A6 ; XID_Continue # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K 1D4A9..1D4AC ; XID_Continue # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q 1D4AE..1D4B9 ; XID_Continue # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D 1D4BB ; XID_Continue # L& MATHEMATICAL SCRIPT SMALL F 1D4BD..1D4C3 ; XID_Continue # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N 1D4C5..1D505 ; XID_Continue # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B 1D507..1D50A ; XID_Continue # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G 1D50D..1D514 ; XID_Continue # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q 1D516..1D51C ; XID_Continue # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y 1D51E..1D539 ; XID_Continue # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B 1D53B..1D53E ; XID_Continue # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G 1D540..1D544 ; XID_Continue # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M 1D546 ; XID_Continue # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O 1D54A..1D550 ; XID_Continue # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y 1D552..1D6A5 ; XID_Continue # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J 1D6A8..1D6C0 ; XID_Continue # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA 1D6C2..1D6DA ; XID_Continue # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA 1D6DC..1D6FA ; XID_Continue # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA 1D6FC..1D714 ; XID_Continue # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA 1D716..1D734 ; XID_Continue # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA 1D736..1D74E ; XID_Continue # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA 1D750..1D76E ; XID_Continue # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA 1D770..1D788 ; XID_Continue # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA 1D78A..1D7A8 ; XID_Continue # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7AA..1D7C2 ; XID_Continue # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; XID_Continue # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; XID_Continue # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1DA00..1DA36 ; XID_Continue # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN 1DA3B..1DA6C ; XID_Continue # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT 1DA75 ; XID_Continue # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS 1DA84 ; XID_Continue # Mn SIGNWRITING LOCATION HEAD NECK 1DA9B..1DA9F ; XID_Continue # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 1DAA1..1DAAF ; XID_Continue # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 1E800..1E8C4 ; XID_Continue # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON 1E8D0..1E8D6 ; XID_Continue # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1EE00..1EE03 ; XID_Continue # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; XID_Continue # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF 1EE21..1EE22 ; XID_Continue # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM 1EE24 ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL HEH 1EE27 ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL HAH 1EE29..1EE32 ; XID_Continue # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF 1EE34..1EE37 ; XID_Continue # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH 1EE39 ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL DAD 1EE3B ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL GHAIN 1EE42 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED JEEM 1EE47 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED HAH 1EE49 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED YEH 1EE4B ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED LAM 1EE4D..1EE4F ; XID_Continue # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN 1EE51..1EE52 ; XID_Continue # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF 1EE54 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED SHEEN 1EE57 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED KHAH 1EE59 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED DAD 1EE5B ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED GHAIN 1EE5D ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON 1EE5F ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF 1EE61..1EE62 ; XID_Continue # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM 1EE64 ; XID_Continue # Lo ARABIC MATHEMATICAL STRETCHED HEH 1EE67..1EE6A ; XID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF 1EE6C..1EE72 ; XID_Continue # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF 1EE74..1EE77 ; XID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH 1EE79..1EE7C ; XID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH 1EE7E ; XID_Continue # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH 1EE80..1EE89 ; XID_Continue # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH 1EE8B..1EE9B ; XID_Continue # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN 1EEA1..1EEA3 ; XID_Continue # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL 1EEA5..1EEA9 ; XID_Continue # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; XID_Continue # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; XID_Continue # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; XID_Continue # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; XID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; XID_Continue # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2F800..2FA1D ; XID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 # Total code points: 112333 " module Parser = open FParsec.Primitives open FParsec.CharParsers let hex2int c = (int c &&& 15) + (int c >>> 6)*9 // hex char to int let pCodePoint = manyMinMaxSatisfyL 4 6 isHex "codepoint with 4-6 hex digits" |>> fun s -> let mutable n = 0 for i = 0 to s.Length - 1 do n <- n*16 + hex2int s[i] n let skipToBeginOfSection name = skipCharsTillString ("Derived Property: " + name) true System.Int32.MaxValue >>. skipRestOfLine true >>. skipMany (nextCharSatisfiesNot isHex >>. skipRestOfLine true) let str s = pstring s let range = pipe2 pCodePoint (str ".." >>. pCodePoint <|>% -1) (fun i1 i2 -> if i2 <> -1 then (i1, i2) else (i1, i1)) .>> (spaces >>. str ";" >>. skipRestOfLine true) let sectionEnd = newline >>. str "# Total code points:" let section name = skipToBeginOfSection name >>. many1 range .>> sectionEnd let xidRanges = section "XID_Start" .>>. section "XID_Continue" let parseXIdRanges() = run xidRanges xidProperties open FParsec let testCharPredicates() = let xidStartRanges, xidContinueRanges = match Parser.parseXIdRanges() with | CharParsers.Success(ranges, _, _ ) -> ranges | CharParsers.Failure(msg,_,_) -> failwith msg let checkPredicate fBmp fSmp ranges = let mutable lastLast = -1 for (first, last) in ranges do for i = lastLast + 1 to first - 1 do let b = if i < 0x10000 then fBmp (char i) else fSmp (i - 0x10000) let isSurrogate = (i >= 0xd800 && i <= 0xdfff) b |> Equal isSurrogate for i = first to last do let b = if i < 0x10000 then fBmp (char i) else fSmp (i - 0x10000) b |> True lastLast <- last for i = lastLast + 1 to 0x10ffff do let b = if i < 0x10000 then fBmp (char i) else fSmp (i - 0x10000) let isSurrogate = (i >= 0xd800 && i <= 0xdfff) b |> Equal isSurrogate checkPredicate IdentifierValidator.IsXIdStartOrSurrogate FParsec.IdentifierValidator.IsXIdStartSmp xidStartRanges checkPredicate IdentifierValidator.IsXIdContinueOrSurrogate FParsec.IdentifierValidator.IsXIdContinueSmp xidContinueRanges let iv = new IdentifierValidator() let isIdStartOrSurrogateF = iv.IsIdStartOrSurrogateFunc let isIdContinueOrSurrogateF = iv.IsIdContinueOrSurrogateFunc let isIdContinueOrJoinControlOrSurrogateF = iv.IsIdContinueOrJoinControlOrSurrogateFunc for i = 0 to 0xffff do let c = char i IdentifierValidator.IsXIdContinueOrJoinControlOrSurrogate(c) |> Equal (IdentifierValidator.IsXIdContinueOrSurrogate(c) || (c >= '\u200C' && c <= '\u200D')) isIdStartOrSurrogateF(c) |> Equal (IdentifierValidator.IsXIdStartOrSurrogate(c)) isIdContinueOrSurrogateF(c) |> Equal (IdentifierValidator.IsXIdContinueOrSurrogate(c)) isIdContinueOrJoinControlOrSurrogateF(c) |> Equal (IdentifierValidator.IsXIdContinueOrJoinControlOrSurrogate(c)) isIdStartOrSurrogateF '!' |> False isIdContinueOrSurrogateF '!' |> False isIdContinueOrJoinControlOrSurrogateF '!' |> False iv.SetIsAsciiIdNonStartChar('!') isIdStartOrSurrogateF '!' |> False isIdContinueOrSurrogateF '!' |> True isIdContinueOrJoinControlOrSurrogateF '!' |> True iv.SetIsAsciiIdStartChar('!') isIdStartOrSurrogateF '!' |> True isIdContinueOrSurrogateF '!' |> True isIdContinueOrJoinControlOrSurrogateF '!' |> True try IdentifierValidator.IsXIdStartSmp(-1) |> ignore; Fail() with :? System.IndexOutOfRangeException -> () try IdentifierValidator.IsXIdStartSmp(0x100000) |> ignore; Fail() with :? System.IndexOutOfRangeException -> () try IdentifierValidator.IsXIdStartSmp(System.Int32.MinValue) |> ignore; Fail() with :? System.IndexOutOfRangeException -> () try IdentifierValidator.IsXIdStartSmp(System.Int32.MaxValue) |> ignore; Fail() with :? System.IndexOutOfRangeException -> () try IdentifierValidator.IsXIdContinueSmp(-1) |> ignore; Fail() with :? System.IndexOutOfRangeException -> () try IdentifierValidator.IsXIdContinueSmp(0x100000) |> ignore; Fail() with :? System.IndexOutOfRangeException -> () try IdentifierValidator.IsXIdContinueSmp(System.Int32.MinValue) |> ignore; Fail() with :? System.IndexOutOfRangeException -> () try IdentifierValidator.IsXIdContinueSmp(System.Int32.MaxValue) |> ignore; Fail() with :? System.IndexOutOfRangeException -> () type IdFlags = IdentifierValidator.IdentifierCharFlags let testIdentifierValidator() = let iv = IdentifierValidator() try iv.SetIsAsciiIdNonStartChar('\u0000') with :? System.ArgumentOutOfRangeException -> () try iv.SetIsAsciiIdNonStartChar('\u0080') with :? System.ArgumentOutOfRangeException -> () try iv.SetIsAsciiIdStartChar('\u0000') with :? System.ArgumentOutOfRangeException -> () try iv.SetIsAsciiIdStartChar('\u0080') with :? System.ArgumentOutOfRangeException -> () try iv.SetIsAsciiNoIdChar('\u0000') with :? System.ArgumentOutOfRangeException -> () try iv.SetIsAsciiNoIdChar('\u0080') with :? System.ArgumentOutOfRangeException -> () iv.IsIdStartOrSurrogateFunc('$') |> False iv.IsIdContinueOrSurrogateFunc('$') |> False iv.IsIdContinueOrJoinControlOrSurrogateFunc('$') |> False iv.SetIsAsciiIdStartChar('$') iv.IsIdStartOrSurrogateFunc('$') |> True iv.IsIdContinueOrSurrogateFunc('$') |> True iv.IsIdContinueOrJoinControlOrSurrogateFunc('$') |> True iv.SetIsAsciiIdNonStartChar('$') iv.IsIdStartOrSurrogateFunc('$') |> False iv.IsIdContinueOrSurrogateFunc('$') |> True iv.IsIdContinueOrJoinControlOrSurrogateFunc('$') |> True iv.SetIsAsciiNoIdChar('$') iv.IsIdStartOrSurrogateFunc('$') |> False iv.IsIdContinueOrSurrogateFunc('$') |> False iv.IsIdContinueOrJoinControlOrSurrogateFunc('$') |> False iv.SetIsAsciiIdStartChar('$') iv.IsIdStartOrSurrogateFunc('$') |> True iv.IsIdContinueOrSurrogateFunc('$') |> True iv.IsIdContinueOrJoinControlOrSurrogateFunc('$') |> True iv.SetIsAsciiIdNonStartChar('?') iv.IsIdStartOrSurrogateFunc('?') |> False iv.IsIdContinueOrSurrogateFunc('?') |> True iv.IsIdContinueOrJoinControlOrSurrogateFunc('?') |> True let start = IdFlags.NonContinue ||| IdFlags.Continue let opts = Array.zeroCreate 128 opts[int '$'] <- start opts[int '?'] <- IdFlags.Continue for i = 0 to 127 do if IdentifierValidator.IsXIdStartOrSurrogate(char i) then opts[i] <- start elif IdentifierValidator.IsXIdContinueOrSurrogate(char i) then opts[i] <- IdFlags.Continue let isSurrogate (str: string) (index: int) = System.Char.IsHighSurrogate(str[index]) && index + 1 < str.Length && System.Char.IsLowSurrogate(str[index + 1]) let isIdStart (opts: IdFlags[]) (str: string) (index: int) = let c = (str[index]) if int c < opts.Length then int (opts[int c] &&& IdFlags.NonContinue) <> 0 elif not (System.Char.IsSurrogate(c)) then IdentifierValidator.IsXIdStartOrSurrogate(c) else isSurrogate str index && IdentifierValidator.IsXIdStartSmp(System.Char.ConvertToUtf32(str, index) - 0x10000) let isIdContinue (opts: IdFlags[]) allowJoiner (str: string) (index: int) = let c = (str[index]) if int c < opts.Length then int (opts[int c] &&& IdFlags.Continue) <> 0 elif not (System.Char.IsSurrogate(c)) then IdentifierValidator.IsXIdContinueOrSurrogate(c) || (allowJoiner && c >= '\u200c' && c <= '\u200d') else isSurrogate str index && IdentifierValidator.IsXIdContinueSmp(System.Char.ConvertToUtf32(str, index) - 0x10000) let isIdentifier (opts: IdFlags[]) allowJoiner (str: string) = if str.Length > 0 && isIdStart opts str 0 then let rec loop iLast = let i = iLast + if isSurrogate str iLast then 2 else 1 if i < str.Length then if isIdContinue opts allowJoiner str i then loop i else (false, i) else (true, -1) loop 0 else (false, 0) isIdStart opts "\uFB1C" 0 |> Equal false isIdContinue opts false "\uFB1C" 0 |> Equal false isIdStart opts "\uFB1D" 0 |> Equal true isIdContinue opts false "\uFB1D" 0 |> Equal true isIdStart opts "\uFB1E" 0 |> Equal false isIdContinue opts false "\uFB1E" 0 |> Equal true let U = System.Char.ConvertFromUtf32 // the \Uxxxxxxxx char escapes currently don't work in the F# compiler isIdStart opts (U 0x000101FD) 0 |> Equal false isIdContinue opts false (U 0x000101FD) 0 |> Equal true isIdStart opts (U 0x000101FF) 0 |> Equal false isIdContinue opts false (U 0x000101FF) 0 |> Equal false isIdStart opts (U 0x00010280) 0 |> Equal true isIdContinue opts false (U 0x00010280) 0 |> Equal true let chars = [|"a"; " "; "1"; "$"; "?"; "\u200C"; "\u200D"; "\uFB1C"; "\uFB1D"; "\uFB1E"; // We can't use simple string literals for latin-1 chars due to // normalization issue that is caused by the F# compiler not flagging // strings with chars in the 0x80-0xff range by a trailing 1 in the #US // metadata section of assemblies. // (The .NET CLR requires a trailing 1 for chars in 0x80-0xff, though // ECMA-335 does not, which is arguably an error in the ECMA spec.) U 0xa0; U 0xaa; U 0xb2; U 0x20a8; U 0x000101FD; U 0x000101FF; U 0x00010280; U 0x0001d400; U 0x0001d6c1; U 0x0001d7ce;|] let checkValidate (id: string) = let check allowJoiner = let isId, errorPos = isIdentifier opts allowJoiner id let idN = try id.Normalize(System.Text.NormalizationForm.FormKC) with :? System.ArgumentException -> null let isIdN, errorPosN = if idN <> null then isIdentifier opts allowJoiner idN else false, errorPos let mutable errorPos1 = 0 let str1 = iv.ValidateAndNormalize(id, &errorPos1) if isId then str1 |> Equal id errorPos1 |> Equal -1 else str1 |> Equal null errorPos1 |> Equal errorPos iv.NormalizationForm <- System.Text.NormalizationForm.FormKC let mutable errorPos2 = 0 let str2 = iv.ValidateAndNormalize(id, &errorPos2) iv.NormalizeBeforeValidation <- true let mutable errorPos3 = 0 let str3 = iv.ValidateAndNormalize(id, &errorPos3) iv.NormalizationForm <- enum 0 iv.NormalizeBeforeValidation <- false if isId then str2 |> Equal idN errorPos2 |> Equal -1 str3 |> Equal idN errorPos3 |> Equal -1 else str2 |> Equal null errorPos2 |> Equal errorPos if isIdN then str3 |> Equal idN errorPos3 |> Equal -1 else str3 |> Equal null errorPos3 |> Equal errorPosN check false iv.AllowJoinControlCharsAsIdContinueChars <- true check true iv.AllowJoinControlCharsAsIdContinueChars <- false let mutable errorPos = 0 iv.NormalizationForm <- System.Text.NormalizationForm.FormC iv.ValidateAndNormalize("ϒ\u0308", &errorPos) |> Equal "\u03D4" iv.NormalizationForm <- System.Text.NormalizationForm.FormKC iv.ValidateAndNormalize("ϒ\u0308", &errorPos) |> Equal "\u03AB" iv.NormalizationForm <- enum 0 let mutable i = 0 checkValidate "" for c1 in chars do checkValidate c1 for c2 in chars do checkValidate (System.String.Concat(c1, c2)) for c3 in chars do i <- i + 1 checkValidate (System.String.Concat(c1, c2, c3)) // check illegal characters let d800 = string (char '\uD800') // "\ud800" doesn't work here let dc00 = string (char '\uDC00') checkValidate d800 checkValidate dc00 checkValidate (d800 + "a") checkValidate (dc00 + "a") checkValidate "\uFFFF" checkValidate ("a" + d800 + "a") checkValidate ("a" + dc00 + "a") checkValidate "a\uFFFF" checkValidate ("ab" + d800) checkValidate ("ab" + dc00) checkValidate (d800 + d800) checkValidate ("a" + d800 + d800) checkValidate "ab\uFFFF" let run() = testCharPredicates() testIdentifierValidator() ================================================ FILE: Test/OperatorPrecedenceParserTests.fs ================================================ // Copyright (c) Stephan Tolksdorf 2008-2011 // License: Simplified BSD License. See accompanying documentation. module FParsec.Test.OperatorPrecedenceParserTests open FParsec open FParsec.Error open FParsec.Primitives open FParsec.CharParsers open FParsec.Test.Test // the tests for this module are somewhat ad hoc and ugly... type Expr = O1 of Expr*Expr | O2 of Expr*Expr | O3 of Expr*Expr | Pre1 of Expr | Pre2 of Expr | Po1 of Expr | Po2 of Expr | Val of int | T1 of Expr*Expr*Expr | T2 of Expr*Expr*Expr type Expr2 = Op of (string*Position)*Expr2*Expr2 | Pre of (string*Position)*Expr2 | Post of (string*Position)*Expr2 | Tern of (string*Position)*(string*Position)*Expr2*Expr2*Expr2 | Value of Position*int let ws = spaces let ws1 = spaces1 let testRemove (opp: OperatorPrecedenceParser<_,_,_>) (op: Operator<_,_,_>) = try opp.AddOperator(op); Fail() with :? System.ArgumentException -> () opp.RemoveOperator(op) |> True opp.RemoveOperator(op) |> False opp.AddOperator(op) match op.Type with | OperatorType.Prefix -> opp.RemovePrefixOperator(op.String) |> True opp.RemovePrefixOperator(op.String) |> False | OperatorType.Postfix -> opp.RemovePostfixOperator(op.String) |> True opp.RemovePostfixOperator(op.String) |> False | OperatorType.Infix when not op.IsTernary -> opp.RemoveInfixOperator(op.String) |> True opp.RemoveInfixOperator(op.String) |> False | OperatorType.Infix when op.IsTernary -> opp.RemoveTernaryOperator(op.String, op.TernaryRightString) |> True opp.RemoveTernaryOperator(op.String, op.TernaryRightString) |> False | _ -> Fail() let testRemoveSeq rand opp ops = let ops = Seq.toArray ops shuffleArray rand ops for op in ops do testRemove opp op let testOpParser() = let opp = new OperatorPrecedenceParser<_,_,_>() let expr = opp.ExpressionParser opp.TermParser <- preturn System.Int32.MinValue // check "greedy" op parsing, correct sorting and finding in internal op data structure opp.AddOperator(PrefixOperator("\u0302", ws, 1, true, fun _ -> 0)) opp.AddOperator(PrefixOperator("\u0303", ws, 1, true, fun _ -> 1)) opp.AddOperator(PrefixOperator("\u0203", ws, 1, true, fun _ -> 2)) opp.AddOperator(PrefixOperator("\u0403", ws, 1, true, fun _ -> 3)) opp.AddOperator(PrefixOperator("\u0503", ws, 1, true, fun _ -> 4)) opp.AddOperator(PrefixOperator("\u0103", ws, 1, true, fun _ -> 5)) opp.AddOperator(PrefixOperator("\u0304", ws, 1, true, fun _ -> -1)) opp.AddOperator(PrefixOperator("\u0303\u0303", ws, 1, true, fun _ -> 6)) opp.AddOperator(PrefixOperator("\u0303\u0302", ws, 1, true, fun _ -> 7)) opp.AddOperator(PrefixOperator("\u0303\u0304", ws, 1, true, fun _ -> 8)) opp.AddOperator(PrefixOperator("\u0203\u0202", ws, 1, true, fun _ -> 9)) opp.AddOperator(PrefixOperator("\u0203\u0203", ws, 1, true, fun _ -> 10)) opp.AddOperator(PrefixOperator("\u0203\u0204", ws, 1, true, fun _ -> 11)) opp.AddOperator(PrefixOperator("\u0403\u0404", ws, 1, true, fun _ -> 12)) opp.AddOperator(PrefixOperator("\u0403\u0403", ws, 1, true, fun _ -> 13)) opp.AddOperator(PrefixOperator("\u0403\u0402", ws, 1, true, fun _ -> 14)) opp.AddOperator(PrefixOperator("\u0503\u0403", ws, 1, true, fun _ -> 15)) opp.AddOperator(PrefixOperator("\u0503\u0402", ws, 1, true, fun _ -> 16)) opp.AddOperator(PrefixOperator("\u0503\u0404", ws, 1, true, fun _ -> 17)) opp.AddOperator(PrefixOperator("\u0103\u0103\u0103\u0103", ws, 1, true, fun _ -> 18)) opp.AddOperator(PrefixOperator("\u0103\u0103\u0103", ws, 1, true, fun _ -> 19)) opp.AddOperator(PrefixOperator("\u0103\u0102\u0102", ws, 1, true, fun _ -> 20)) opp.AddOperator(PrefixOperator("\u0103\u0102", ws, 1, true, fun _ -> 21)) opp.AddOperator(PrefixOperator("\u0103\u0103", ws, 1, true, fun _ -> 22)) opp.AddOperator(PrefixOperator("\u0103\u0101", ws, 1, true, fun _ -> 23)) opp.AddOperator(PrefixOperator("\u0303\u0303\u0303", ws, 1, true, fun _ -> 24)) opp.AddOperator(PrefixOperator("\u0303\u0303\u0303\u0303", ws, 1, true, fun _ -> 25)) opp.AddOperator(PrefixOperator("\u0303\u0302\u0302", ws, 1, true, fun _ -> 26)) opp.AddOperator(PrefixOperator("\u0203\u0202\u0202\u0202", ws, 1, true, fun _ -> 27)) opp.AddOperator(PrefixOperator("\u0203\u0202\u0202", ws, 1, true, fun _ -> 28)) opp.AddOperator(PrefixOperator("\u0203\u0203\u0203", ws, 1, true, fun _ -> 29)) opp.AddOperator(PrefixOperator("\u0403\u0403\u0403", ws, 1, true, fun _ -> 30)) opp.AddOperator(PrefixOperator("\u0403\u0402\u0402", ws, 1, true, fun _ -> 31)) opp.AddOperator(PrefixOperator("\u0403\u0402\u0402\u402", ws, 1, true, fun _ -> 32)) opp.AddOperator(PrefixOperator("\u0603\u0602", ws, 1, true, fun _ -> 33)) opp.AddOperator(PrefixOperator("\u0603\u0603", ws, 1, true, fun _ -> 34)) let expectedPrefix = Errors.ExpectedPrefixOperator let ROk content result parser = ROkE content content.Length result expectedPrefix parser let ROkI content i result parser = ROkE content i result expectedPrefix parser expr |> ROkI "\u0301" 0 System.Int32.MinValue expr |> ROk "\u0302" 0 expr |> ROk "\u0303" 1 expr |> ROk "\u0203" 2 expr |> ROk "\u0403" 3 expr |> ROk "\u0503" 4 expr |> ROk "\u0103" 5 expr |> ROkI "\u0003" 0 System.Int32.MinValue expr |> ROkI "\u0703" 0 System.Int32.MinValue expr |> ROk "\u0304" -1 expr |> ROk "\u0303\u0303" 6 expr |> ROk "\u0303\u0302" 7 expr |> ROk "\u0303\u0304" 8 expr |> ROkI "\u0003\u0303" 0 System.Int32.MinValue expr |> ROkI "\u0703\u0302" 0 System.Int32.MinValue expr |> ROkI "\u0603\u0601" 0 System.Int32.MinValue expr |> ROk "\u0603\u0602" 33 expr |> ROk "\u0603\u0603" 34 expr |> ROkI "\u0603\u0604" 0 System.Int32.MinValue expr |> ROk "\u0203\u0202" 9 expr |> ROk "\u0203\u0203" 10 expr |> ROk "\u0203\u0204" 11 expr |> ROk "\u0403\u0404" 12 expr |> ROk "\u0403\u0403" 13 expr |> ROk "\u0403\u0402" 14 expr |> ROk "\u0503\u0403" 15 expr |> ROk "\u0503\u0402" 16 expr |> ROk "\u0503\u0404" 17 expr |> ROk "\u0103\u0103\u0103\u0103" 18 expr |> ROk "\u0103\u0103\u0103" 19 expr |> ROkI "\u0103\u0103\u0103\u0102" 3 19 expr |> ROk "\u0103\u0102\u0102" 20 expr |> ROk "\u0103\u0102" 21 expr |> ROk "\u0103\u0103" 22 expr |> ROk "\u0103\u0101" 23 expr |> ROkI "\u0103\u0101\u0102" 2 23 expr |> ROk "\u0303\u0303\u0303" 24 expr |> ROk "\u0303\u0303\u0303\u0302" 24 expr |> ROk "\u0303\u0303\u0303\u0303" 25 expr |> ROk "\u0303\u0302\u0302" 26 expr |> ROk "\u0203\u0202\u0202\u0202" 27 expr |> ROk "\u0203\u0202\u0202" 28 expr |> ROkI "\u0203\u0202\u0202\u0201" 3 28 expr |> ROk "\u0203\u0203\u0203" 29 expr |> ROk "\u0403\u0403\u0403" 30 expr |> ROk "\u0403\u0402\u0402" 31 expr |> ROk "\u0403\u0402\u0402\u402" 32 expr |> ROkI "\u0403\u0402\u0402\u0401" 3 31 // check whitespace parsing and parser state propagation let withMsg m p = p .>> (fail m <|>% ()) let testPrefixOpParser wsParser termParser = opp.AddOperator(PrefixOperator("+", wsParser, 1, true, fun x -> x + 1)) opp.TermParser <- termParser let expr = opp.ExpressionParser let expr2 = pipe2 (many (pchar '+' >>? wsParser Strings.PrefixOperator)) termParser (fun ps i -> i + List.length ps) checkParserStr expr expr2 "" checkParserStr expr expr2 "+" checkParserStr expr expr2 "1" checkParserStr expr expr2 "+" checkParserStr expr expr2 "+ " checkParserStr expr expr2 "+1" checkParserStr expr expr2 "+ 1" checkParserStr expr expr2 "++" checkParserStr expr expr2 "+ +" checkParserStr expr expr2 "+ + " checkParserStr expr expr2 "++1" checkParserStr expr expr2 "++ 1" checkParserStr expr expr2 "+ + 1" opp.RemovePrefixOperator("+") |> True testPrefixOpParser (ws |> withMsg "e1") (preturn 0 |> withMsg "e2") testPrefixOpParser (ws |> withMsg "e1") (fail "e2") testPrefixOpParser (ws |> withMsg "e1") (failFatally "e2") testPrefixOpParser (ws |> withMsg "e1") (preturn 0 |> withMsg "e2") testPrefixOpParser (fail "e1") (preturn 0 |> withMsg "e2" ) testPrefixOpParser (failFatally "e1") (preturn 0 |> withMsg "e2" ) testPrefixOpParser (ws |> withMsg "e1") (pint32 |> withMsg "e2") testPrefixOpParser (ws1 |> withMsg "e1") (preturn 0 |> withMsg "e2") testPrefixOpParser (ws1 >>. fail "e1") (preturn 0 |> withMsg "e2") testPrefixOpParser (ws1 |> withMsg "e1") (pint32 |> withMsg "e2") let testInfixOpParser wsParser termParser = opp.AddOperator(InfixOperator("+", wsParser, 1, Associativity.Left, fun x y -> x + y)) opp.TermParser <- termParser let expr = opp.ExpressionParser let expect label = preturn () label let term = expect Strings.PrefixOperator >>. termParser let infixOp = pstring "+" >>? wsParser Strings.InfixOperator let expr2 = pipe2 term ((infixOp >>. (term .>> (opt infixOp))) <|>% 0) (fun x y -> x + y) checkParserStr expr expr2 "+" checkParserStr expr expr2 "+ " checkParserStr expr expr2 "1+" checkParserStr expr expr2 "1 +" checkParserStr expr expr2 "1 + " checkParserStr expr expr2 "1+2" checkParserStr expr expr2 "1 +2" checkParserStr expr expr2 "1 + 2" checkParserStr expr expr2 "1 + 2" checkParserStr expr expr2 "1+2 " checkParserStr expr expr2 "1 +2 " checkParserStr expr expr2 "1 + 2 " checkParserStr expr expr2 "1 + 2 " opp.RemoveOperator(InfixOperator("+", wsParser, 1, Associativity.Left, fun x y -> x + y)) |> False opp.RemoveInfixOperator("+") |> True testInfixOpParser (ws |> withMsg "e1") (preturn 0 |> withMsg "e2") testInfixOpParser (fail "e1") (pint32 .>> ws |> withMsg "e2") testInfixOpParser (failFatally "e1") (pint32 .>> ws |> withMsg "e2") testInfixOpParser (ws |> withMsg "e1") (pint32 .>> ws |> withMsg "e2") testInfixOpParser (ws1 |> withMsg "e1") (pint32 .>> ws |> withMsg "e2") testInfixOpParser (ws1 >>. fail "e1") (pint32 .>> ws |> withMsg "e2") testInfixOpParser (ws1 |> withMsg "e1") (pint32 .>> ws |> withMsg "e2") let testTernary2ndOpParser opWsParser = let wsm = (ws |> withMsg "e1") let term = (pint32 |> withMsg "e2") .>> wsm opp.TermParser <- term opp.AddOperator(TernaryOperator("?", wsm, ":", opWsParser, 1, Associativity.Left, fun x y z -> x + y + z)) opp.MissingTernary2ndStringErrorFormatter <- fun (_, _, op, _) -> expected op.TernaryRightString let expr2 = let op str wsParser = skipString str >>? wsParser let expect label = preturn () label let term = expect "prefix operator" >>. term let op1 = op "?" wsm "infix operator" let op2 = expect Strings.InfixOperator >>. (op ":" opWsParser ":") pipe2 term (tuple2 (op1 >>. term) (op2 >>. term .>> expect Strings.InfixOperator) <|>% (0,0)) (fun x (y,z) -> x + y + z) checkParserStr expr expr2 "1 ?" checkParserStr expr expr2 "1 ?" checkParserStr expr expr2 "1 ?: 3" checkParserStr expr expr2 "1 ? : 3" checkParserStr expr expr2 "1 ? 2" checkParserStr expr expr2 "1 ? 2 " checkParserStr expr expr2 "1 ? 2: " checkParserStr expr expr2 "1 ? 2 :" checkParserStr expr expr2 "1 ? 2:3" checkParserStr expr expr2 "1 ? 2: 3" checkParserStr expr expr2 "1 ? 2 :3" checkParserStr expr expr2 "1 ? 2 : 3" opp.RemoveTernaryOperator("?", ":") |> True testTernary2ndOpParser (ws |> withMsg "e") testTernary2ndOpParser (ws1 |> withMsg "e") testTernary2ndOpParser (fail "e") testTernary2ndOpParser (failFatally "e") testTernary2ndOpParser (ws1 >>. fail "e") let rand = new System.Random(1234) testRemoveSeq rand opp opp.Operators let testConflictAfterStringParserHandling() = let opp = new OperatorPrecedenceParser<_,_,_>() let expr = opp.ExpressionParser opp.TermParser <- pint32 let conflictError = messageError "conflict" opp.OperatorConflictErrorFormatter <- fun _ _ -> conflictError opp.AddOperator(PrefixOperator("+", spaces, 1, false, fun x -> x + 1)) opp.AddOperator(PrefixOperator("++", spaces, 1, true, fun x -> x + 2)) expr |> ROk "+ ++1" 5 4 opp.RemovePrefixOperator("++") |> True opp.AddOperator(PrefixOperator("++", spaces, 1, false, fun x -> x + 2)) expr |> RError "+ ++1" 2 conflictError opp.RemovePrefixOperator("++") |> True opp.AddOperator(PrefixOperator("++", spaces1, 1, false, fun x -> x + 2)) expr |> RError "+ ++1" 2 (mergeErrors Errors.ExpectedPrefixOperator Errors.ExpectedInt32) opp.RemovePrefixOperator("++") |> True opp.AddOperator(PrefixOperator("++", failFatally "e", 1, false, fun x -> x + 2)) expr |> RFatalError "+ ++1" 4 (messageError "e") opp.RemovePrefixOperator("++") |> True opp.AddOperator(InfixOperator("+", spaces, 1, Associativity.Left, fun x y -> x + y)) opp.AddOperator(InfixOperator("-", spaces, 1, Associativity.Right, fun x y -> x - y)) expr |> RError "1+2- 3" 3 conflictError opp.RemoveInfixOperator("-") |> True opp.AddOperator(InfixOperator("-", spaces1, 1, Associativity.Right, fun x y -> x - y)) expr |> ROkE "1+2-3" 3 3 Errors.ExpectedInfixOperator opp.RemoveInfixOperator("-") |> True opp.AddOperator(InfixOperator("-", failFatally "e", 1, Associativity.Right, fun x y -> x - y)) expr |> RFatalError "1+2- 3" 4 (messageError "e") let testAlternativeOpConstructors() = let opp = new OperatorPrecedenceParser<_,_,_>() let expr = opp.ExpressionParser let str = skipString let posWS = getPosition .>> ws let term = pipe2 getPosition (pint32 .>> ws) (fun pos x -> Value(pos, x)) opp.TermParser <- term opp.AddOperator(PrefixOperator("-", posWS, 1, true, (), fun pos x -> Pre(("-", pos), x))) opp.AddOperator(PostfixOperator("++", posWS, 1, true, (), fun pos x -> Post(("++", pos), x))) opp.AddOperator(InfixOperator("*", posWS, 1, Associativity.Left, (), fun pos x y -> Op(("*", pos), x, y))) opp.AddOperator(TernaryOperator("?", posWS, ":", posWS, 1, Associativity.Left, (), fun pos1 pos2 x y z -> Tern(("?", pos1), (":", pos2), x, y, z))) let op = many1SatisfyL (fun c -> match c with '*' | '+' | '-' | '?' | ':' -> true | _ -> false) "operator" .>>. posWS let expectInfixOrPostfix = fun stream -> Reply(Ok, Errors.ExpectedInfixOrPostfixOperator) let expr2 = pipe3 (tuple4 term op term op) (tuple2 op term) (tuple2 op (tuple2 op term)) (fun (v12, multOp, v3, plusPlusOp) (qMarkOp, v4) (colonOp, (minusOp, v5)) -> Tern(qMarkOp, colonOp, Op(multOp, v12, Post(plusPlusOp, v3)), v4, Pre(minusOp, v5))) .>> expectInfixOrPostfix checkParserStr expr expr2 "12 * 3++ ? 4 : -5" let rand = new System.Random(1234) testRemoveSeq rand opp opp.Operators let testPrecAndAssoc() = let opp = new OperatorPrecedenceParser<_,_,_>() let expr = opp.ExpressionParser opp.TermParser <- pint32 .>> ws |>> Val opp.AddOperator(InfixOperator("o1l", ws, 1, Associativity.Left, fun x y -> O1(x,y))) opp.AddOperator(InfixOperator("o1r", ws, 1, Associativity.Right, fun x y -> O1(x,y))) opp.AddOperator(InfixOperator("o1n", ws, 1, Associativity.None, fun x y -> O1(x,y))) opp.AddOperator(InfixOperator("o2l", ws, 1, Associativity.Left, fun x y -> O2(x,y))) opp.AddOperator(InfixOperator("o2r", ws, 1, Associativity.Right, fun x y -> O2(x,y))) opp.AddOperator(InfixOperator("o2n", ws, 1, Associativity.None, fun x y -> O2(x,y))) opp.AddOperator(InfixOperator("o3l", ws, 1, Associativity.Left, fun x y -> O3(x,y))) opp.AddOperator(InfixOperator("o3r", ws, 1, Associativity.Right, fun x y -> O3(x,y))) opp.AddOperator(InfixOperator("o1l*", ws, 2, Associativity.Left, fun x y -> O1(x,y))) opp.AddOperator(InfixOperator("o1r*", ws, 2, Associativity.Right, fun x y -> O1(x,y))) opp.AddOperator(InfixOperator("o1n*", ws, 2, Associativity.None, fun x y -> O1(x,y))) opp.AddOperator(InfixOperator("o2l*", ws, 2, Associativity.Left, fun x y -> O2(x,y))) opp.AddOperator(InfixOperator("o2r*", ws, 2, Associativity.Right, fun x y -> O2(x,y))) opp.AddOperator(InfixOperator("o2n*", ws, 2, Associativity.None, fun x y -> O2(x,y))) opp.AddOperator(InfixOperator("o3l*", ws, 2, Associativity.Left, fun x y -> O3(x,y))) opp.AddOperator(InfixOperator("o3r*", ws, 2, Associativity.Right, fun x y -> O3(x,y))) opp.AddOperator(InfixOperator("o1l**", ws, 3, Associativity.Left, fun x y -> O1(x,y))) opp.AddOperator(InfixOperator("o2l**", ws, 3, Associativity.Left, fun x y -> O2(x,y))) opp.AddOperator(InfixOperator("o3l**", ws, 3, Associativity.Left, fun x y -> O3(x,y))) opp.AddOperator(InfixOperator("o1r**", ws, 3, Associativity.Right, fun x y -> O1(x,y))) opp.AddOperator(InfixOperator("o2r**", ws, 3, Associativity.Right, fun x y -> O2(x,y))) opp.AddOperator(InfixOperator("o3r**", ws, 3, Associativity.Right, fun x y -> O3(x,y))) opp.AddOperator(TernaryOperator("t1l", ws, "tt1l", ws, 1, Associativity.Left, fun x y z -> T1(x,y,z))) opp.AddOperator(TernaryOperator("t1r", ws, "tt1r", ws, 1, Associativity.Right, fun x y z -> T1(x,y,z))) opp.AddOperator(TernaryOperator("t1n", ws, "tt1n", ws, 1, Associativity.None, fun x y z -> T1(x,y,z))) opp.AddOperator(TernaryOperator("t2l", ws, "tt2l", ws, 1, Associativity.Left, fun x y z -> T2(x,y,z))) opp.AddOperator(TernaryOperator("t2r", ws, "tt2r", ws, 1, Associativity.Right, fun x y z -> T2(x,y,z))) opp.AddOperator(TernaryOperator("t2n", ws, "tt2n", ws, 1, Associativity.None, fun x y z -> T2(x,y,z))) opp.AddOperator(TernaryOperator("t1l*", ws, "tt1l*", ws, 2, Associativity.Left, fun x y z -> T1(x,y,z))) opp.AddOperator(TernaryOperator("t1l**", ws, "tt1l**", ws, 3, Associativity.Left, fun x y z -> T1(x,y,z))) opp.AddOperator(TernaryOperator("t1r*", ws, "tt1r*", ws, 2, Associativity.Right, fun x y z -> T1(x,y,z))) opp.AddOperator(TernaryOperator("t1n*", ws, "tt1n*", ws, 2, Associativity.None, fun x y z -> T1(x,y,z))) opp.AddOperator(TernaryOperator("t2l*", ws, "tt2l*", ws, 2, Associativity.Left, fun x y z -> T2(x,y,z))) opp.AddOperator(TernaryOperator("t2r*", ws, "tt2r*", ws, 2, Associativity.Right, fun x y z -> T2(x,y,z))) opp.AddOperator(TernaryOperator("t2n*", ws, "tt2n*", ws, 2, Associativity.None, fun x y z -> T2(x,y,z))) let poOp1 = PostfixOperator("po1", ws, 1, true, fun x -> Po1(x)) opp.AddOperator(poOp1) opp.AddOperator(PostfixOperator("po1n", ws, 1, false, fun x -> Po1(x))) opp.AddOperator(PostfixOperator("po2", ws, 1, true, fun x -> Po2(x))) opp.AddOperator(PostfixOperator("po2n", ws, 1, false, fun x -> Po2(x))) opp.AddOperator(PostfixOperator("po1*", ws, 2, true, fun x -> Po1(x))) opp.AddOperator(PostfixOperator("po1n*", ws, 2, false, fun x -> Po1(x))) opp.AddOperator(PostfixOperator("po2*", ws, 2, true, fun x -> Po2(x))) opp.AddOperator(PostfixOperator("po2n*", ws, 2, false, fun x -> Po2(x))) // do some tests without prefix operators defined (there's a separate code branch in OPP.ParseExpression) let expectedInfixOrPostfix = Errors.ExpectedInfixOrPostfixOperator let ROk content result parser = ROkE content content.Length result expectedInfixOrPostfix parser expr |> RError "" 0 Errors.ExpectedInt32 expr |> ROk "1 o1l 2 o2l 3" (O2(O1(Val(1),Val(2)),Val(3))) expr |> ROk "1 o1r* 2 o2r 3" (O2(O1(Val(1),Val(2)),Val(3))) expr |> ROk "1 o1r 2 o2r 3" (O1(Val(1),O2(Val(2),Val(3)))) expr |> ROk "1 o1l 2 o2l* 3" (O1(Val(1),O2(Val(2),Val(3)))) expr |> ROk "1 o1n 2 po1n" (O1(Val(1), Po1(Val(2)))) // add prefix operators opp.AddOperator(PrefixOperator("pre1", ws, 1, true, fun x -> Pre1(x))) expr |> RError "po1" 0 (ErrorMessageList.Merge(Errors.ExpectedPrefixOperator, Errors.ExpectedInt32)) opp.AddOperator(PrefixOperator("pre1n", ws, 1, false, fun x -> Pre1(x))) opp.AddOperator(PrefixOperator("pre2", ws, 1, true, fun x -> Pre2(x))) opp.AddOperator(PrefixOperator("pre2n", ws, 1, false, fun x -> Pre2(x))) opp.AddOperator(PrefixOperator("pre1*", ws, 2, true, fun x -> Pre1(x))) opp.AddOperator(PrefixOperator("pre1n*", ws, 2, false, fun x -> Pre1(x))) opp.AddOperator(PrefixOperator("pre2*", ws, 2, true, fun x -> Pre2(x))) opp.AddOperator(PrefixOperator("pre2n*", ws, 2, false, fun x -> Pre2(x))) // add operators a second time with opposite fixity opp.AddOperator(PrefixOperator("o1l", ws, 1, true, fun x -> failwith "o1l")) opp.AddOperator(PrefixOperator("o1r", ws, 1, true, fun x -> failwith "o1r")) opp.AddOperator(PrefixOperator("o1n", ws, 1, true, fun x -> failwith "o1n")) opp.AddOperator(PrefixOperator("o2l", ws, 1, true, fun x -> failwith "o2l")) opp.AddOperator(PrefixOperator("o2r", ws, 1, true, fun x -> failwith "o2r")) opp.AddOperator(PrefixOperator("o2n", ws, 1, true, fun x -> failwith "o2n")) opp.AddOperator(PrefixOperator("o1l*", ws, 2, true, fun x -> failwith "o1l*")) opp.AddOperator(PrefixOperator("o1l**", ws, 3, true, fun x -> failwith "o1l**")) opp.AddOperator(PrefixOperator("o1r*", ws, 2, true, fun x -> failwith "o1r*")) opp.AddOperator(PrefixOperator("o1n*", ws, 2, true, fun x -> failwith "o1n*")) opp.AddOperator(PrefixOperator("o2l*", ws, 2, true, fun x -> failwith "o2l*")) opp.AddOperator(PrefixOperator("o2r*", ws, 2, true, fun x -> failwith "o2r*")) opp.AddOperator(PrefixOperator("o2n*", ws, 2, true, fun x -> failwith "o2n*")) opp.AddOperator(PrefixOperator("t1l", ws, 1, true, fun x -> failwith "t1l")) opp.AddOperator(PrefixOperator("t1r", ws, 1, true, fun x -> failwith "t1r")) opp.AddOperator(PrefixOperator("t1n", ws, 1, true, fun x -> failwith "t1n")) opp.AddOperator(PrefixOperator("t2l", ws, 1, true, fun x -> failwith "t2l")) opp.AddOperator(PrefixOperator("t2r", ws, 1, true, fun x -> failwith "t2r")) opp.AddOperator(PrefixOperator("t2n", ws, 1, true, fun x -> failwith "t2n")) opp.AddOperator(PrefixOperator("t1l*", ws, 2, true, fun x -> failwith "t1l*")) opp.AddOperator(PrefixOperator("t1l**", ws, 3, true, fun x -> failwith "t1l**")) opp.AddOperator(PrefixOperator("t1r*", ws, 2, true, fun x -> failwith "t1r*")) opp.AddOperator(PrefixOperator("t1n*", ws, 2, true, fun x -> failwith "t1n*")) opp.AddOperator(PrefixOperator("t2l*", ws, 2, true, fun x -> failwith "t2l*")) opp.AddOperator(PrefixOperator("t2r*", ws, 2, true, fun x -> failwith "t2r*")) opp.AddOperator(PrefixOperator("t2n*", ws, 2, true, fun x -> failwith "t2n*")) opp.AddOperator(PrefixOperator("po1", ws, 1, true, fun x -> failwith "po1")) opp.AddOperator(PrefixOperator("po1n", ws, 1, true, fun x -> failwith "po1n")) opp.AddOperator(PrefixOperator("po2", ws, 1, true, fun x -> failwith "po2")) opp.AddOperator(PrefixOperator("po2n", ws, 1, true, fun x -> failwith "po2n")) opp.AddOperator(PrefixOperator("po1*", ws, 2, true, fun x -> failwith "po1*")) opp.AddOperator(PrefixOperator("po1n*", ws, 2, true, fun x -> failwith "po1n*")) opp.AddOperator(PrefixOperator("po2*", ws, 2, true, fun x -> failwith "po2*")) opp.AddOperator(PrefixOperator("po2n*", ws, 2, true, fun x -> failwith "po2n*")) opp.AddOperator(InfixOperator("pre1", ws, 1, Associativity.Left, fun x y -> failwith "pre1")) opp.AddOperator(InfixOperator("pre1n", ws, 1, Associativity.Left, fun x y -> failwith "pre1n")) opp.AddOperator(PostfixOperator("pre2", ws, 1, true, fun x -> failwith "pre2")) opp.AddOperator(PostfixOperator("pre2n", ws, 1, true, fun x -> failwith "pre2n")) opp.AddOperator(InfixOperator("pre1*", ws, 2, Associativity.Left, fun x y -> failwith "pre1*")) opp.AddOperator(InfixOperator("pre1n*", ws, 2, Associativity.Left, fun x y -> failwith "pre1n*")) opp.AddOperator(PostfixOperator("pre2*", ws, 2, true, fun x -> failwith "pre2*")) opp.AddOperator(PostfixOperator("pre2n*", ws, 2, true, fun x -> failwith "pre2n")) expr |> ROk "1 o1l 2 o2l 3" (O2(O1(Val(1),Val(2)),Val(3))) expr |> ROk "1 o1r* 2 o2r 3" (O2(O1(Val(1),Val(2)),Val(3))) expr |> ROk "1 o1r 2 o2r 3" (O1(Val(1),O2(Val(2),Val(3)))) expr |> ROk "1 o1l 2 o2l* 3" (O1(Val(1),O2(Val(2),Val(3)))) expr |> ROk "1 o1l 2 o2l* 3 o3l** 4" (O1(Val(1),O2(Val(2),O3(Val(3),Val(4))))) expr |> ROk "1 o1l 2 o2l* 3 o3l 4" (O3((O1(Val(1),O2(Val(2),Val(3)))),Val(4))) expr |> ROk "1 o1l 2 o2l** 3 o3l* 4" (O1(Val(1),O3(O2(Val(2),Val(3)),Val(4)))) expr |> ROk "1 o1r 2 o2r* 3 o3r** 4" (O1(Val(1),O2(Val(2),O3(Val(3),Val(4))))) expr |> ROk "1 o1r 2 o2r* 3 o3r 4" (O1(Val(1),O3(O2(Val(2),Val(3)),Val(4)))) expr |> ROk "1 t1l 2 tt1l 3 t2l 4 tt2l 5" (T2(T1(Val(1),Val(2),Val(3)),Val(4),Val(5))) expr |> ROk "1 t1r* 2 tt1r* 3 t2r 4 tt2r 5" (T2(T1(Val(1),Val(2),Val(3)),Val(4),Val(5))) expr |> ROk "1 t1r 2 tt1r 3 t2r 4 tt2r 5" (T1(Val(1),Val(2),T2(Val(3),Val(4),Val(5)))) expr |> ROk "1 t1l 2 tt1l 3 t2l* 4 tt2l* 5" (T1(Val(1),Val(2),T2(Val(3),Val(4),Val(5)))) expr |> ROk "1 t1l* 2 po1 tt1l* 3 t2l* 4 o1l 5 tt2l* 6" (T2(T1(Val(1),Po1(Val(2)),Val(3)),O1(Val(4),Val(5)),Val(6))) expr |> ROk "1 t1n 2 o1n 3 tt1n 4" (T1(Val(1),O1(Val(2),Val(3)),Val(4))) expr |> ROk "pre1 1 o1l 2" (O1(Pre1(Val(1)), Val(2))) expr |> ROk "pre1* 1 o1l 2" (O1(Pre1(Val(1)), Val(2))) expr |> ROk "pre1 1 o1l* 2" (Pre1(O1(Val(1), Val(2)))) expr |> ROk "1 o1l pre1 2" (O1(Val(1), Pre1(Val(2)))) expr |> ROk "1 o1l pre1* 2" (O1(Val(1), Pre1(Val(2)))) expr |> ROk "1 o1l* pre1 2" (O1(Val(1), Pre1(Val(2)))) expr |> ROk "1 o1l 2 po1" (O1(Val(1), Po1(Val(2)))) expr |> ROk "1 o1l 2 po1*" (O1(Val(1), Po1(Val(2)))) expr |> ROk "1 o1l* 2 po1" (Po1(O1(Val(1), Val(2)))) expr |> ROk "1 o1l pre1 2 po1" (O1(Val(1), Po1(Pre1(Val(2))))) expr |> ROk "1 o1l pre1* 2 po1" (O1(Val(1), Po1(Pre1(Val(2))))) expr |> ROk "1 o1l pre1 2 po1*" (O1(Val(1), Pre1(Po1(Val(2))))) expr |> ROk "1 o1l* pre1 2 po1" (Po1(O1(Val(1), Pre1(Val(2))))) expr |> ROk "1 o1r* pre1 2 po1" (Po1(O1(Val(1), Pre1(Val(2))))) expr |> ROk "1 o1l* pre1 2 po1*" (O1(Val(1), Pre1(Po1(Val(2))))) expr |> ROk "1 o1l 2 po1*" (O1(Val(1), Po1(Val(2)))) expr |> ROk "1 o1l* 2 po1" (Po1(O1(Val(1), Val(2)))) expr |> ROk "1 o1l 2 o2l* 3 po1" (O1(Val(1),Po1(O2(Val(2),Val(3))))) expr |> ROk "1 o1l pre1 pre2 2" (O1(Val(1), Pre1(Pre2(Val(2))))) expr |> ROk "1 o1l pre1* pre2 2" (O1(Val(1), Pre1(Pre2(Val(2))))) expr |> ROk "1 o1l pre1 pre2* 2" (O1(Val(1), Pre1(Pre2(Val(2))))) expr |> ROk "1 o1l 2 po1 po2" (O1(Val(1), Po2(Po1(Val(2))))) expr |> ROk "1 o1l* 2 po1 po2" (Po2(Po1(O1(Val(1), Val(2))))) expr |> ROk "1 o1l* 2 po1 po2*" (Po2(Po1(O1(Val(1), Val(2))))) expr |> ROk "1 o1l* 2 po1* po2" (Po2(O1(Val(1), Po1(Val(2))))) expr |> ROk "1 o1l pre1 2 po1 po2" (O1(Val(1), Po2(Po1(Pre1(Val(2)))))) expr |> ROk "1 o1l pre1 2 po1* po2" (O1(Val(1), Po2(Pre1(Po1(Val(2)))))) expr |> ROk "1 o1l pre1 2 po1* po2*" (O1(Val(1), Pre1(Po2(Po1(Val(2)))))) expr |> ROk "1 o1l pre1 2 po1 po2*" (O1(Val(1), Po2(Po1(Pre1(Val(2)))))) expr |> ROk "1 o1l* pre1 2 po1 po2" (Po2(Po1(O1(Val(1), Pre1(Val(2)))))) expr |> ROk "1 o1l* pre1 2 po1* po2" (Po2(O1(Val(1), Pre1(Po1(Val(2)))))) expr |> ROk "1 o1l* pre1 2 po1* po2" (Po2(O1(Val(1), Pre1(Po1(Val(2)))))) expr |> ROk "pre1 1 o1l 2 po1" (O1(Pre1(Val(1)), Po1(Val(2)))) expr |> ROk "pre1 1 o1l* 2 po1" (Po1(Pre1(O1(Val(1), Val(2))))) expr |> ROk "pre1* 1 o1l* 2 po1" (Po1(O1(Pre1(Val(1)), Val(2)))) expr |> ROk "pre1 1 o1l* 2 po1*" (Pre1(O1(Val(1), Po1(Val(2))))) expr |> ROk "pre1 1 o1l** 2 po1*" (Pre1(Po1(O1(Val(1), Val(2))))) opp.OperatorConflictErrorFormatter <- fun _ _ -> messageError "conflict" let conflictE = messageError "conflict" expr |> ROk "pre1n 1 o1n 2" (O1(Pre1(Val(1)), Val(2))) expr |> ROk "1 po1n o1n 2" (O1(Po1(Val(1)), Val(2))) expr |> ROk "1 o1n pre1n 2" (O1(Val(1), Pre1(Val(2)))) expr |> ROk "1 o1n 2 po1n" (O1(Val(1), Po1(Val(2)))) expr |> ROk "1 o1l* 2 o2r 3" (O2(O1(Val(1),Val(2)),Val(3))) expr |> ROk "1 o1l 2 o2r* 3" (O1(Val(1),O2(Val(2),Val(3)))) expr |> RError "1 o1l 2 o2r 3" 9 conflictE expr |> RError "1 o1l 2 o2n 3" 9 conflictE expr |> RError "1 o1n 2 o2n 3" 9 conflictE expr |> RError "1 o1l 2 o2l* 3 o2r 4" 16 conflictE expr |> RError "1 o1l 2 o2l* 3 o2n 4" 16 conflictE expr |> RError "1 o1n 2 o2l* 3 o2n 4" 16 conflictE expr |> ROk "1 t1l* 2 tt1l* 3 t2r 4 tt2r 5" (T2(T1(Val(1),Val(2),Val(3)),Val(4),Val(5))) expr |> ROk "1 t1l 2 tt1l 3 t2r* 4 tt2r* 5" (T1(Val(1),Val(2),T2(Val(3),Val(4),Val(5)))) expr |> RError "1 t1l 2 tt1l 3 t2r 4 tt2r 5" 17 conflictE expr |> RError "1 t1l 2 tt1l 3 o1r 4" 17 conflictE expr |> RError "1 o1r 2 t1l 3 tt1l 4" 9 conflictE expr |> ROk "pre1n 1 po1" (Po1(Pre1(Val(1)))) expr |> ROk "pre1 1 po1n" (Po1(Pre1(Val(1)))) expr |> ROk "pre1n* 1 po1n" (Po1(Pre1(Val(1)))) expr |> ROk "pre1n 1 po1n*" (Pre1(Po1(Val(1)))) expr |> RError "pre1n 1 po1n" 9 conflictE expr |> ROk "pre1 pre2n 1" (Pre1(Pre2(Val(1)))) expr |> ROk "pre1n pre2 1" (Pre1(Pre2(Val(1)))) expr |> ROk "pre1n* pre2n 1" (Pre1(Pre2(Val(1)))) expr |> ROk "pre1n pre2n* 1" (Pre1(Pre2(Val(1)))) expr |> RError "pre1n pre2n 1" 7 conflictE expr |> ROk "1 po1 po2n" (Po2(Po1(Val(1)))) expr |> ROk "1 po1n po2" (Po2(Po1(Val(1)))) expr |> ROk "1 po1n* po2n" (Po2(Po1(Val(1)))) expr |> ROk "1 po1n po2n*" (Po2(Po1(Val(1)))) expr |> RError "1 po1n po2n" 9 conflictE let rand = new System.Random(1234) testRemoveSeq rand opp opp.Operators let testExceptions() = let opp = new OperatorPrecedenceParser() opp.AddOperator(TernaryOperator("?", spaces, ":", spaces, 1, Associativity.Left, fun _ _ _ -> 0)) try opp.AddOperator(TernaryOperator("??", spaces, ":", spaces, 1, Associativity.Left, fun _ _ _ -> 0)) with :? System.ArgumentException -> () try opp.AddOperator(PrefixOperator(":", spaces, 2, false, fun x -> 0)) with :? System.ArgumentException -> () opp.AddOperator(PrefixOperator("+", spaces, 1, true, fun x -> 0)) opp.AddOperator(InfixOperator("-", spaces, 1, Associativity.Left, fun x y -> 0)) try opp.AddOperator(PrefixOperator(":", spaces, 2, false, fun x -> 0)) with :? System.ArgumentException -> () try opp.AddOperator(TernaryOperator("x", spaces, "+", spaces, 1, Associativity.Left, fun _ _ _ -> 0)) with :? System.ArgumentException -> () try opp.AddOperator(TernaryOperator("x", spaces, "-", spaces, 1, Associativity.Left, fun _ _ _ -> 0)) with :? System.ArgumentException -> () try PrefixOperator(null, spaces, 1, true, fun x -> x) |> ignore; Fail() with :? System.ArgumentException -> () try PrefixOperator("", spaces, 1, true, fun x -> x) |> ignore; Fail() with :? System.ArgumentException -> () try PrefixOperator("+", Unchecked.defaultof<_>, 1, true, fun x -> x) |> ignore; Fail() with :? System.ArgumentNullException -> () try PrefixOperator("+", spaces, 0, true, fun x -> x) |> ignore; Fail() with :? System.ArgumentException -> () (PrefixOperator("+", spaces, 1, true, fun x -> 0)).IsAssociative |> True try InfixOperator("+", spaces, 1, enum -1 , fun x y -> x + y) |> ignore; Fail() with :? System.ArgumentException -> () try InfixOperator("+", spaces, 1, enum 3 , fun x y -> x + y) |> ignore; Fail() with :? System.ArgumentException -> () try PrefixOperator("+", spaces, 1, true, (), Unchecked.defaultof<_>) |> ignore; Fail() with :? System.ArgumentNullException -> () try InfixOperator("+", spaces, 1, Associativity.Left, (), Unchecked.defaultof<_>) |> ignore; Fail() with :? System.ArgumentNullException -> () try TernaryOperator(null, spaces, "2", spaces, 1, Associativity.Left, fun x y z -> x) |> ignore; Fail() with :? System.ArgumentException -> () try TernaryOperator("", spaces, "2", spaces, 1, Associativity.Left, fun x y z -> x) |> ignore; Fail() with :? System.ArgumentException -> () try TernaryOperator("1", spaces, null, spaces, 1, Associativity.Left, fun x y z -> x) |> ignore; Fail() with :? System.ArgumentException -> () try TernaryOperator("1", spaces, "", spaces, 1, Associativity.Left, fun x y z -> x) |> ignore; Fail() with :? System.ArgumentException -> () try TernaryOperator("1", Unchecked.defaultof<_>, "2", spaces, 1, Associativity.Left, fun x y z -> x) |> ignore; Fail() with :? System.ArgumentException -> () try TernaryOperator("1", spaces, "2", Unchecked.defaultof<_>, 1, Associativity.Left, fun x y z -> x) |> ignore; Fail() with :? System.ArgumentException -> () try TernaryOperator("1", spaces, "2", spaces, 0, Associativity.Left, fun x y z -> x) |> ignore; Fail() with :? System.ArgumentException -> () try TernaryOperator("1", spaces, "2", spaces, 1, enum -1, fun x y z -> x) |> ignore; Fail() with :? System.ArgumentException -> () try TernaryOperator("1", spaces, "2", spaces, 1, enum 3, fun x y z -> x) |> ignore; Fail() with :? System.ArgumentException -> () try TernaryOperator("1", spaces, "2", spaces, 1, Associativity.Left, (), Unchecked.defaultof<_>) |> ignore; Fail() with :? System.ArgumentException -> () let run() = testOpParser() testConflictAfterStringParserHandling() testAlternativeOpConstructors() testPrecAndAssoc() testExceptions() ================================================ FILE: Test/PrimitivesTests.fs ================================================ // Copyright (c) Stephan Tolksdorf 2007-2011 // License: Simplified BSD License. See accompanying documentation. module FParsec.Test.PrimitivesTests open FParsec open FParsec.Error module Reference = [] let Ok = FParsec.Primitives.Ok [] let Error = FParsec.Primitives.Error [] let FatalError = FParsec.Primitives.FatalError type Parser<'a,'u> = FParsec.Primitives.Parser<'a,'u> type FParsec.Reply<'a> with member t.WithError(error: ErrorMessageList) = Reply(t.Status, t.Result, error) let reconstructErrorReply (reply: Reply<_>) = Reply(reply.Status, reply.Error) let preturn x = fun stream -> Reply(x) let pzero : Parser<'a,'u> = fun stream -> Reply(Error, NoErrorMessages) let (>>=) (p: Parser<'a,'u>) (f: 'a -> Parser<'b,'u>) = fun stream -> let reply1 = p stream if reply1.Status = Ok then let p2 = f reply1.Result let stateTag1 = stream.StateTag let reply2 = p2 stream if stateTag1 <> stream.StateTag then reply2 else reply2.WithError(mergeErrors reply1.Error reply2.Error) else reconstructErrorReply reply1 let (>>%) p x = p >>= fun _ -> preturn x let (>>.) p1 p2 = p1 >>= fun _ -> p2 let (.>>) p1 p2 = p1 >>= fun x -> p2 >>% x let (.>>.) p1 p2 = p1 >>= fun x1 -> p2 >>= fun x2 -> preturn (x1, x2) let between popen pclose p = popen >>. (p .>> pclose) let (|>>) p f = p >>= fun a -> preturn (f a) let pipe2 p1 p2 f = p1 >>= fun x1 -> p2 >>= fun x2 -> preturn (f x1 x2) let pipe3 p1 p2 p3 f = p1 >>= fun x1 -> p2 >>= fun x2 -> p3 >>= fun x3 -> preturn (f x1 x2 x3) let pipe4 p1 p2 p3 p4 f = p1 >>= fun x1 -> p2 >>= fun x2 -> p3 >>= fun x3 -> p4 >>= fun x4 -> preturn (f x1 x2 x3 x4) let pipe5 p1 p2 p3 p4 p5 f = p1 >>= fun x1 -> p2 >>= fun x2 -> p3 >>= fun x3 -> p4 >>= fun x4 -> p5 >>= fun x5 -> preturn (f x1 x2 x3 x4 x5) let () (p: Parser<'a,'u>) label : Parser<'a,'u> = fun stream -> let stateTag = stream.StateTag let reply = p stream if stateTag <> stream.StateTag then reply else reply.WithError(expected label) let () (p: Parser<'a,'u>) label : Parser<'a,'u> = fun stream -> let mutable state = stream.State let reply = p stream if reply.Status = Ok then if state.Tag <> stream.StateTag then reply else reply.WithError(expected label) else if state.Tag <> stream.StateTag then let error = compoundError label stream reply.Error stream.BacktrackTo(&state) Reply(FatalError, error) else let error = match reply.Error with | ErrorMessageList(NestedError(pos, ustate, msgs), NoErrorMessages) -> ErrorMessageList(CompoundError(label, pos, ustate, msgs), NoErrorMessages) | _ -> expected label reply.WithError(error) let fail msg : Parser<'a,'u> = fun stream -> Reply(Error, messageError msg) let failFatally msg : Parser<'a,'u> = fun stream -> Reply(FatalError, messageError msg) let (<|>) (p1: Parser<'a,'u>) (p2: Parser<'a,'u>) : Parser<'a,'u> = fun stream -> let stateTag = stream.StateTag let reply1 = p1 stream if reply1.Status = Error && stateTag = stream.StateTag then let reply2 = p2 stream if stateTag <> stream.StateTag then reply2 else reply2.WithError(mergeErrors reply1.Error reply2.Error) else reply1 let choice (ps: seq>) = List.fold (fun p pc -> p <|> pc) pzero (List.ofSeq ps) let (<|>%) p x = p <|> preturn x let opt p = (p |>> Some) <|>% None let optional p = (p >>% ()) <|>% () let notEmpty (p: Parser<'a,'u>) : Parser<'a,'u> = fun stream -> let stateTag = stream.StateTag let reply = p stream if reply.Status <> Ok || stateTag <> stream.StateTag then reply else Reply(Error, reply.Error) let attempt (p: Parser<'a,'u>) : Parser<'a,'u> = fun stream -> let mutable state = stream.State let reply = p stream if reply.Status = Ok then reply elif state.Tag = stream.StateTag then Reply(Error, reply.Error) else let error = nestedError stream reply.Error stream.BacktrackTo(&state) Reply(Error, error) let (>>=?) (p: Parser<'a,'u>) (f: 'a -> Parser<'b,'u>) : Parser<'b,'u> = fun stream -> let mutable state = stream.State let reply1 = p stream if reply1.Status = Ok then let p2 = f reply1.Result let stateTag1 = stream.StateTag let reply2 = p2 stream if stateTag1 <> stream.StateTag then reply2 else let error = mergeErrors reply1.Error reply2.Error if reply2.Status <> Error then reply2.WithError(error) elif state.Tag = stateTag1 then Reply(Error, error) else let error = nestedError stream error stream.BacktrackTo(&state) Reply(Error, error) else reconstructErrorReply reply1 let (>>?) p1 p2 = p1 >>=? fun _ -> p2 let (.>>?) p1 p2 = p1 >>=? fun x -> p2 >>% x let (.>>.?) p1 p2 = p1 >>=? fun x1 -> p2 >>= fun x2 -> preturn (x1, x2) let lookAhead (p: Parser<'a,'u>) : Parser<'a,'u> = fun stream -> let mutable state = stream.State let reply = p stream if reply.Status = Ok then if state.Tag <> stream.StateTag then stream.BacktrackTo(&state) Reply(reply.Result) else if state.Tag = stream.StateTag then Reply(Error, reply.Error) else let error = nestedError stream reply.Error stream.BacktrackTo(&state) Reply(Error, error) let followedByE (p: Parser<'a,'u>) error : Parser = fun stream -> let mutable state = stream.State let reply = p stream if state.Tag <> stream.StateTag then stream.BacktrackTo(&state) if reply.Status = Ok then Reply(()) else Reply(Error, error) let followedBy p = followedByE p NoErrorMessages let followedByL p label = followedByE p (expected label) let notFollowedByE (p: Parser<'a,'u>) error : Parser = fun stream -> let mutable state = stream.State let reply = p stream if state.Tag <> stream.StateTag then stream.BacktrackTo(&state) if reply.Status <> Ok then Reply(()) else Reply(Error, error) let notFollowedBy p = notFollowedByE p NoErrorMessages let notFollowedByL p label = notFollowedByE p (unexpected label) let tuple2 p1 p2 = pipe2 p1 p2 (fun a b -> (a, b)) let tuple3 p1 p2 p3 = pipe3 p1 p2 p3 (fun a b c -> (a, b, c)) let tuple4 p1 p2 p3 p4 = pipe4 p1 p2 p3 p4 (fun a b c d -> (a, b, c, d)) let tuple5 p1 p2 p3 p4 p5 = pipe5 p1 p2 p3 p4 p5 (fun a b c d e -> (a, b, c, d, e)) let parray n (p : Parser<_,_>) = let rec loop i = if i = n then preturn [] else p >>= fun hd -> (loop (i + 1) |>> fun tl -> hd::tl) loop 0 |>> Array.ofList let skipArray n p = parray n p |>> ignore // Note that the actual implemention of `many` tries to guard against // an infinite loop/recursion by throwing an exception if the given parser // argument succeeds without changing the stream. let rec many p = many1 p <|>% [] and many1 p = p >>= fun hd -> many p |>> (fun tl -> hd::tl) // a version of many (p1 .>>. p2) that does not succeed if `p1` succeeds /// without changing the parser state and `p2` fails without changing the state let rec manyPair p1 p2 = p1 |>> (fun x1 -> p2 >>= fun x2 -> manyPair p1 p2 |>> fun tl -> (x1, x2)::tl) <|>% (preturn []) >>= fun p -> p let rec sepBy p sep = sepBy1 p sep <|>% [] and sepBy1 p sep = p >>= fun hd -> manyPair sep p |>> fun sepPs -> hd::(List.map snd sepPs) let rec sepEndBy p sep = sepEndBy1 p sep <|>% [] and sepEndBy1 p sep = p >>= fun hd -> sep >>. sepEndBy p sep <|>% [] |>> fun tl -> hd::tl let manyTill (p: Parser<'a,'u>) (endp: Parser<'b,'u>) = let rec parse (stream: CharStream<'u>) acc error = let stateTag = stream.StateTag let replyE = endp stream if replyE.Status = Error && stateTag = stream.StateTag then let replyP = p stream if replyP.Status = Ok then if stateTag = stream.StateTag then failwith "infinite loop" parse stream (replyP.Result::acc) replyP.Error else let error = if stateTag <> stream.StateTag then replyP.Error else mergeErrors (mergeErrors error replyE.Error) replyP.Error Reply(replyP.Status, error) else let error = if stateTag <> stream.StateTag then replyE.Error else mergeErrors error replyE.Error if replyE.Status = Ok then Reply(Ok, List.rev acc, error) else Reply(replyE.Status, error) fun stream -> parse stream [] NoErrorMessages let many1Till p endp = pipe2 p (manyTill p endp) (fun hd tl -> hd::tl) let chainl1 p op = p >>= fun x -> manyPair op p |>> fun opPs -> List.fold (fun x (f, y) -> f x y) x opPs let chainl p op x = chainl1 p op <|>% x let chainr1 p op = let rec calc x rhs = match rhs with | (f, y)::tl -> f x (calc y tl) | [] -> x pipe2 p (manyPair op p) calc let chainr p op x = chainr1 p op <|>% x open FParsec.Primitives open FParsec.Test.Test let testPrimitives() = let content = "the content doesn't matter" use stream = new FParsec.CharStream(content, 0, content.Length) let ps1 = Array.append (constantTestParsers 1 (expected "1")) [|(fun s -> s.UserState <- s.UserState + 1; Reply(1))|] let ps1b = constantTestParsers 11 (expected "1b") let ps1c = constantTestParsers 111 (expected "1c") let ps1d = constantTestParsers 1111 (expected "1d") let parserSeq4 = seq {for p1 in ps1 do for p2 in ps1b do for p3 in ps1c do for p4 in ps1d do yield [p1;p2;p3;p4]} let ps2 = constantTestParsers 2u (expected "2") let ps2b = constantTestParsers 22u (expected "2b") let ps2c = constantTestParsers 222u (expected "2c") let ps3 = constantTestParsers 3s (expected "3") let ps4 = constantTestParsers 4L (expected "4") let ps5 = constantTestParsers 5y (expected "5") let checkParser p1 p2 = checkParser p1 p2 stream let checkComb comb1 comb2 = for p in ps1 do checkParser (comb1 p) (comb2 p) let checkCombA comb1 comb2 arg = let adapt comb p = comb p arg checkComb (adapt comb1) (adapt comb2) let checkComb2 comb1 comb2 = for p1 in ps1 do for p2 in ps2 do checkParser (comb1 p1 p2) (comb2 p1 p2) let checkComb2A comb1 comb2 arg = let adapt comb p1 p2 = comb p1 p2 arg checkComb2 (adapt comb1) (adapt comb2) let checkBind bind1 bind2 = for p1 in ps1 do for p2 in ps2 do let f1 = fun r -> r |> Equal 1 p2 let f2 = fun r state -> r |> Equal 1 p2 state checkParser (bind1 p1 f1) (bind2 p1 f1) checkParser (bind1 p1 f2) (bind2 p1 f2) let checkComb3 comb1 comb2 = for p1 in ps1 do for p2 in ps2 do for p3 in ps3 do checkParser (comb1 p1 p2 p3) (comb2 p1 p2 p3) let checkComb3A comb1 comb2 arg = let adapt comb p1 p2 p3 = comb p1 p2 p3 arg checkComb3 (adapt comb1) (adapt comb2) let checkComb4 comb1 comb2 = for p1 in ps1 do for p2 in ps2 do for p3 in ps3 do for p4 in ps4 do checkParser (comb1 p1 p2 p3 p4) (comb2 p1 p2 p3 p4) let checkComb4A comb1 comb2 arg = let adapt comb p1 p2 p3 p4 = comb p1 p2 p3 p4 arg checkComb4 (adapt comb1) (adapt comb2) let checkComb5 comb1 comb2 = for p1 in ps1 do for p2 in ps2 do for p3 in ps3 do for p4 in ps4 do for p5 in ps5 do checkParser (comb1 p1 p2 p3 p4 p5) (comb2 p1 p2 p3 p4 p5) let checkComb5A comb1 comb2 arg = let adapt comb p1 p2 p3 p4 p5 = comb p1 p2 p3 p4 p5 arg checkComb5 (adapt comb1) (adapt comb2) let testBasicPrimitives() = checkParser (preturn 42) (Reference.preturn 42) checkParser pzero Reference.pzero checkCombA () Reference.() "test" checkBind (>>=) Reference.(>>=) checkCombA (>>%) Reference.(>>%) "test" checkComb2 (>>.) Reference.(>>.) checkComb2 (.>>) Reference.(.>>) checkComb2 (.>>.) Reference.(.>>.) checkComb3 between Reference.between checkCombA (|>>) Reference.(|>>) (fun x -> x + 3) checkComb2A pipe2 Reference.pipe2 (fun a b -> (a, b)) checkComb3A pipe3 Reference.pipe3 (fun a b c -> (a, b, c)) checkComb4A pipe4 Reference.pipe4 (fun a b c d -> (a, b, c, d)) checkComb5A pipe5 Reference.pipe5 (fun a b c d e -> (a, b, c, d, e)) checkComb2 tuple2 Reference.tuple2 checkComb3 tuple3 Reference.tuple3 checkComb4 tuple4 Reference.tuple4 checkComb5 tuple5 Reference.tuple5 checkCombA () Reference.() "test" checkCombA () Reference.() "test" let btestp : Parser<_,_> = fun stream -> let mutable state0 = stream.State stream.Skip() stream.UserState <- stream.UserState + 1 let error = nestedError stream (expected "test") stream.BacktrackTo(&state0) Reply(Error, error) checkParser (() btestp "btest") (Reference.() btestp "btest") checkParser (fail "test") (Reference.fail "test") checkParser (failFatally "test") (Reference.failFatally "test") for p1 in ps1 do for p2 in ps1b do checkParser ((<|>) p1 p2) (Reference.(<|>) p1 p2) for ps in Seq.append (Seq.singleton []) parserSeq4 do let refChoice = Reference.choice ps let refChoiceL = refChoice "test" // choice and choiceL use different implementations depending on whether // the type of the supplied sequence is a list, an array or a seq, // so we must test all 3 input types. let psa = Array.ofSeq ps let pss = match ps with | [] -> Seq.empty | [p1;p2;p3;p4] -> seq {yield p1 yield p2 yield p3 yield p4} | _ -> failwith "shouldn't happen" checkParser (choice ps) refChoice checkParser (choiceL ps "test") refChoiceL checkParser (choice psa) refChoice checkParser (choiceL psa "test") refChoiceL checkParser (choice pss) refChoice checkParser (choiceL pss "test") refChoiceL checkCombA (<|>%) Reference.(<|>%) 99 checkComb opt Reference.opt checkComb optional Reference.optional checkComb notEmpty Reference.notEmpty checkComb attempt Reference.attempt checkBind (>>=?) Reference.(>>=?) checkComb2 (>>?) Reference.(>>?) checkComb2 (.>>?) Reference.(.>>?) checkComb2 (.>>.?) Reference.(.>>.?) checkComb followedBy Reference.followedBy checkCombA followedByL Reference.followedByL "test" checkComb notFollowedBy Reference.notFollowedBy checkCombA notFollowedByL Reference.notFollowedByL "test" checkComb lookAhead Reference.lookAhead testBasicPrimitives() let testPArray() = // parray for ps in parserSeq4 do let p1, p2, pr = seqParserAndReset2 (List.ofSeq ps) checkParser (parray 0 p1) (Reference.parray 0 p2); pr() checkParser (parray 1 p1) (Reference.parray 1 p2); pr() checkParser (parray 2 p1) (Reference.parray 2 p2); pr() checkParser (parray 3 p1) (Reference.parray 3 p2); pr() checkParser (parray 4 p1) (Reference.parray 4 p2); pr() checkParser (skipArray 0 p1) (Reference.skipArray 0 p2); pr() checkParser (skipArray 1 p1) (Reference.skipArray 1 p2); pr() checkParser (skipArray 2 p1) (Reference.skipArray 2 p2); pr() checkParser (skipArray 3 p1) (Reference.skipArray 3 p2); pr() checkParser (skipArray 4 p1) (Reference.skipArray 4 p2); pr() testPArray() let foldTestF acc x = (acc + 1)*(acc + 1) + x let reduceOrDefault f d lst = match lst with | [] -> d | _ -> List.reduce f lst let testMany() = let manySeq3 = // parserSeq4 without parsers that return Ok without changing the state seq {for p1 in ps1[1..] do for p2 in ps1b[1..] do for p3 in ps1c[1..] do yield [p1;p2;p3]} let f = foldTestF for ps in manySeq3 do let p1, p2, pr = seqParserAndReset2 ps let rMany = Reference.many p2 checkParser (many p1) rMany; pr() checkParser (skipMany p1) (rMany |>> ignore); pr() let rMany1 = Reference.many1 p2 checkParser (many1 p1) rMany1; pr() checkParser (skipMany1 p1) (rMany1 |>> ignore); pr() try many (preturn 0) stream |> ignore; Fail () with :? System.InvalidOperationException -> () try many1 (preturn 0) stream |> ignore; Fail () with :? System.InvalidOperationException -> () testMany() let sepByTestParsers r1 e1 r2 e2 = let ps1 = constantTestParsers r1 e1 let ps2 = constantTestParsers r2 e2 // all parser combinations except "ok without state change", "ok without state change" seq { for p2 in ps2[1..] do yield ps1[0], p2 for p1 in ps1[1..] do for p2 in ps2 do yield p1, p2 } let testSeqEndBy() = let sepEndSeq3 = seq {for p1 in (constantTestParsers 1 (expected "p1"))[1..] do for sep1, p2 in sepByTestParsers 'a' (expected "sep1") 2 (expected "p2") do for sep2, p3 in sepByTestParsers 'b' (expected "sep2") 3 (expected "p3") do yield [p1; p2; p3;], [sep1; sep2;]} let f = foldTestF let mutable i = 0 for ps, ss in sepEndSeq3 do i <- i + 1 let p1, p2, pr = seqParserAndReset2 ps let s1, s2, sr = seqParserAndReset2 ss let rSepBy = Reference.sepBy p2 s2 checkParser (sepBy p1 s1) rSepBy; pr(); sr() checkParser (skipSepBy p1 s1) (rSepBy |>> ignore); pr(); sr() let rSepBy1 = Reference.sepBy1 p2 s2 checkParser (sepBy1 p1 s1) rSepBy1; pr(); sr() checkParser (skipSepBy1 p1 s1) (rSepBy1 |>> ignore); pr(); sr() let rSepEndBy = Reference.sepEndBy p2 s2 checkParser (sepEndBy p1 s1) rSepEndBy; pr(); sr() checkParser (skipSepEndBy p1 s1) (rSepEndBy |>> ignore); pr(); sr() let rSepEndBy1 = Reference.sepEndBy1 p2 s2 checkParser (sepEndBy1 p1 s1) rSepEndBy1; pr(); sr() checkParser (skipSepEndBy1 p1 s1) (rSepEndBy1 |>> ignore); pr(); sr() try sepBy (preturn 0) (preturn 0) stream |> ignore; Fail () with :? System.InvalidOperationException -> () try sepBy1 (preturn 0) (preturn 0) stream |> ignore; Fail () with :? System.InvalidOperationException -> () try sepEndBy (preturn 0) (preturn 0) stream |> ignore; Fail () with :? System.InvalidOperationException -> () try sepEndBy1 (preturn 0) (preturn 0) stream |> ignore; Fail () with :? System.InvalidOperationException -> () testSeqEndBy() let testManyTill() = let manyTillSeq3 = seq {for endp1 in ps2 do for p1 in ps1[1..] do for endp2 in ps2b do for p2 in ps1b[1..] do for endp3 in ps2c do for p3 in ps1c[1..] do yield [p1; p2; p3], [endp1; endp2; endp3; ps2c[0]]} let f = foldTestF let mutable i = 0 for ps, es in manyTillSeq3 do i <- i + 1 let p1, p2, pr = seqParserAndReset2 ps let e1, e2, er = seqParserAndReset2 es let rManyTill = Reference.manyTill p2 e2 checkParser (manyTill p1 e1) rManyTill; pr(); er() checkParser (skipManyTill p1 e1) (rManyTill |>> ignore); pr(); er() let rMany1Till = Reference.many1Till p2 e2 checkParser (many1Till p1 e1) rMany1Till; pr(); er() checkParser (skipMany1Till p1 e1) (rMany1Till |>> ignore); pr(); er() try manyTill (preturn 0) (fail "test") stream |> ignore; Fail () with :? System.InvalidOperationException -> () try many1Till (preturn 0) (fail "test") stream |> ignore; Fail () with :? System.InvalidOperationException -> () testManyTill() let testChain() = let chainSeq3 = seq {for p1 in ps1 do for op1, p2 in sepByTestParsers (+) (expected "op1") 2 (expected "p2") do for op2, p3 in sepByTestParsers (*) (expected "op2") 3 (expected "p3") do yield [p1; p2; p3;], [op1; op2;]} for ps, ops in chainSeq3 do let p1, p2, pr = seqParserAndReset2 ps let op1, op2, opr = seqParserAndReset2 ops checkParser (chainl p1 op1 -1) (Reference.chainl p2 op2 -1); pr(); opr() checkParser (chainl1 p1 op1) (Reference.chainl1 p2 op2); pr(); opr() checkParser (chainr p1 op1 -1) (Reference.chainr p2 op2 -1); pr(); opr() checkParser (chainr1 p1 op1) (Reference.chainr1 p2 op2); pr(); opr() testChain() let run() = testPrimitives() ================================================ FILE: Test/RangeTests.fs ================================================ // Copyright (c) Stephan Tolksdorf 2010 // License: Simplified BSD License. See accompanying documentation. module FParsec.Test.RangeTests #if !LOW_TRUST open FParsec open FParsec.Range open FParsec.Test.Test let int32Min = System.Int32.MinValue let int32Max = System.Int32.MaxValue let testLabel1, testLabel2 = let dm = new System.Reflection.Emit.DynamicMethod("__DummMethodForObtainingLabels", null, null) let ilg = dm.GetILGenerator() let label1, label2 = ilg.DefineLabel(), ilg.DefineLabel() ilg.Emit(System.Reflection.Emit.OpCodes.Ret); dm.CreateDelegate(typeof) |> ignore label1, label2 let testCheckRanges() = let l, l2 = testLabel1, testLabel2 #if DEBUG // in DEBUG builds one can't construct invalid ranges (due to an assert check) #else try checkRangesAreValidSortedAndUnconnected [|Range(1, -1)|] |> ignore Fail() with :? System.ArgumentException -> () try checkLabelRangesAreValidSortedAndUnconnected [|Range(1, -1)|] [|l|] |> ignore Fail() with :? System.ArgumentException -> () try checkRangesAreValidSortedAndUnconnected [|Range(0, 0); Range(2, 1)|] |> ignore Fail() with :? System.ArgumentException -> () try checkLabelRangesAreValidSortedAndUnconnected [|Range(0, 0); Range(2, 1)|] [|l; l2|] |> ignore Fail() with :? System.ArgumentException -> () #endif try checkRangesAreValidSortedAndUnconnected [|Range(0, 0); Range(0, 0)|] |> ignore Fail() with :? System.ArgumentException -> () try checkLabelRangesAreValidSortedAndUnconnected [|Range(0, 0); Range(0, 0)|] [|l; l2|]|> ignore Fail() with :? System.ArgumentException -> () try checkRangesAreValidSortedAndUnconnected [|Range(0, 0); Range(1, 1)|] |> ignore Fail() with :? System.ArgumentException -> () try checkLabelRangesAreValidSortedAndUnconnected[|Range(0, 0); Range(1, 1)|] [|l; l|]|> ignore Fail() with :? System.ArgumentException -> () try checkRangesAreValidSortedAndUnconnected [|Range(0, 0); Range(2, 2); Range(3, 3);|] |> ignore Fail() with :? System.ArgumentException -> () try checkLabelRangesAreValidSortedAndUnconnected [|Range(0, 0); Range(2, 2); Range(3, 3);|] [|l; l; l|]|> ignore Fail() with :? System.ArgumentException -> () try checkRangesAreValidSortedAndUnconnected [|Range(1, 1); Range(0, 0)|] |> ignore Fail() with :? System.ArgumentException -> () try checkLabelRangesAreValidSortedAndUnconnected [|Range(1, 1); Range(0, 0)|] [|l; l2|]|> ignore Fail() with :? System.ArgumentException -> () try checkRangesAreValidSortedAndUnconnected [|Range(int32Min, int32Max); Range(int32Max, int32Max)|] |> ignore Fail() with :? System.ArgumentException -> () try checkLabelRangesAreValidSortedAndUnconnected [|Range(int32Min, int32Max); Range(int32Max, int32Max)|] [|l; l2|] |> ignore Fail() with :? System.ArgumentException -> () try checkRangesAreValidSortedAndUnconnected [|Range(int32Min, int32Max); Range(int32Min, int32Min)|] |> ignore Fail() with :? System.ArgumentException -> () try checkLabelRangesAreValidSortedAndUnconnected [|Range(int32Min, int32Max); Range(int32Min, int32Min)|] [|l; l2|] |> ignore Fail() with :? System.ArgumentException -> () try checkLabelRangesAreValidSortedAndUnconnected [|Range(int32Min, int32Max)|] [|l; l2|] |> ignore Fail() with :? System.ArgumentException -> () let testSortAndMergeRanges() = sortAndMergeRanges false [||] |> Equal [||] sortAndMergeRanges false [|Range(1, 1); Range(-1,-1)|] |> Equal [|Range(-1, -1); Range(1, 1)|] sortAndMergeRanges false [|Range(int32Min, int32Max)|] |> Equal [|Range(int32Min, int32Max)|] sortAndMergeRanges false [|Range(int32Min, 1); Range(2, int32Max)|] |> Equal [|Range(int32Min, int32Max)|] sortAndMergeRanges false [|Range(2, int32Max); Range(int32Min, 1)|] |> Equal [|Range(int32Min, int32Max)|] sortAndMergeRanges false [|Range(1, 2); Range(3, 4); Range(5, 6)|] |> Equal [|Range(1, 6)|] sortAndMergeRanges false [|Range(1, 2); Range(4, 4); Range(5, 6)|] |> Equal [|Range(1, 2); Range(4, 6)|] sortAndMergeRanges false [|Range(1, 2); Range(4, 4); Range(6, 6)|] |> Equal [|Range(1, 2); Range(4, 4); Range(6, 6)|] sortAndMergeRanges true [|Range(int32Min, int32Max); Range(int32Min + 1, int32Max)|] |> Equal [|Range(int32Min, int32Max)|] sortAndMergeRanges true [|Range(int32Min, int32Max); Range(int32Min, int32Max - 1)|] |> Equal [|Range(int32Min, int32Max)|] sortAndMergeRanges true [|Range(0,0); Range(int32Min, int32Max); Range(int32Min + 1, int32Max)|] |> Equal [|Range(int32Min, int32Max)|] sortAndMergeRanges true [|Range(0,0); Range(int32Min, int32Max); Range(int32Min, int32Max - 1)|] |> Equal [|Range(int32Min, int32Max)|] sortAndMergeRanges true [|Range(int32Min + 1, int32Max); Range(int32Min, int32Max - 1)|] |> Equal [|Range(int32Min, int32Max)|] sortAndMergeRanges true [|Range(1, 3); Range(3, 4); Range(5, 6)|] |> Equal [|Range(1, 6)|] sortAndMergeRanges true [|Range(1, 2); Range(3, 5); Range(5, 6)|] |> Equal [|Range(1, 6)|] sortAndMergeRanges true [|Range(1, 2); Range(2, 5); Range(5, 6)|] |> Equal [|Range(1, 6)|] sortAndMergeRanges true [|Range(1, 5); Range(3, 4); Range(5, 6)|] |> Equal [|Range(1, 6)|] sortAndMergeRanges true [|Range(1, 5); Range(5, 6); Range(3, 5)|] |> Equal [|Range(1, 6)|] sortAndMergeRanges true [|Range(1, 5); Range(3, 5); Range(5, 6); Range(1, 7)|] |> Equal [|Range(1, 7)|] #if DEBUG // in DEBUG builds one can't construct invalid ranges (due to an assert check) #else try sortAndMergeRanges false [|Range(1, -1);|] |> ignore Fail() with :? System.ArgumentException -> () try sortAndMergeRanges false [|Range(0, 0); Range(2, 1)|] |> ignore Fail() with :? System.ArgumentException -> () #endif try sortAndMergeRanges false [|Range(0, 0); Range(0, 0)|] |> ignore Fail() with :? System.ArgumentException -> () try sortAndMergeRanges false [|Range(0, 0); Range(1, 2); Range(2, 2)|] |> ignore Fail() with :? System.ArgumentException -> () try sortAndMergeRanges false [|Range(int32Min, int32Max); Range(0, 0)|] |> ignore Fail() with :? System.ArgumentException -> () let testSortAndMergeKeyValueRanges() = let cmp = System.Collections.Generic.EqualityComparer.Default sortAndMergeKeyValueRanges null [||] |> Equal ([||], [||]) sortAndMergeKeyValueRanges null [|Range(1, 1), 0; Range(-1,-1), 1|] |> Equal ([|Range(-1, -1); Range(1, 1)|], [|1; 0|]) sortAndMergeKeyValueRanges cmp [|Range(1, 1), 0; Range(-1,-1), 1|] |> Equal ([|Range(-1, -1); Range(1, 1)|], [|1; 0|]) sortAndMergeKeyValueRanges cmp [|Range(1, 1), 0; Range(-1, 0), 0|] |> Equal ([|Range(-1, 1)|], [|0|]) sortAndMergeKeyValueRanges null [|Range(2, int32Max), 0; Range(int32Min, 1), 0|] |> Equal ([|Range(int32Min, 1); Range(2, int32Max)|], [|0; 0|]) sortAndMergeKeyValueRanges cmp [|Range(2, int32Max), 0; Range(int32Min, 1), 0|] |> Equal ([|Range(int32Min, int32Max)|], [|0|]) sortAndMergeKeyValueRanges cmp [|Range(1, 2), 0; Range(3, 4), 0; Range(6, 6), 0|] |> Equal ([|Range(1, 4); Range(6, 6)|], [|0; 0|]) sortAndMergeKeyValueRanges cmp [|Range(1, 1), 0; Range(3, 4), 0; Range(5, 6), 0|] |> Equal ([|Range(1, 1); Range(3, 6)|], [|0; 0|]) #if DEBUG // in DEBUG builds one can't construct invalid ranges (due to an assert check) #else try sortAndMergeKeyValueRanges null [|Range(1, -1), 0;|] |> ignore Fail() with :? System.ArgumentException -> () try sortAndMergeKeyValueRanges null [|Range(0, 0), 0; Range(2, 1), 0|] |> ignore Fail() with :? System.ArgumentException -> () #endif try sortAndMergeKeyValueRanges null [|Range(0, 0), 0; Range(0, 0), 1|] |> ignore Fail() with :? System.ArgumentException -> () try sortAndMergeKeyValueRanges null [|Range(0, 0), 0; Range(1, 2), 1; Range(2, 2), 2|] |> ignore Fail() with :? System.ArgumentException -> () try sortAndMergeKeyValueRanges null [|Range(int32Min, int32Max), 0; Range(0, 0), 1|] |> ignore Fail() with :? System.ArgumentException -> () let testMergeSortedKeyLabelRanges() = let l, l2 = testLabel1, testLabel2 mergeSortedKeyLabelRanges [||] [||] |> Equal ([||], [||]) mergeSortedKeyLabelRanges [|1|] [|l|] |> Equal ([|Range(1,1)|], [|l|]) mergeSortedKeyLabelRanges [|1;2;3|] [|l;l;l|] |> Equal ([|Range(1,3)|], [|l|]) mergeSortedKeyLabelRanges [|1;2;3|] [|l;l2;l|] |> Equal ([|Range(1,1);Range(2,2);Range(3,3)|], [|l;l2;l|]) mergeSortedKeyLabelRanges [|1;2;3;5|] [|l;l;l;l|] |> Equal ([|Range(1,3); Range(5,5)|], [|l;l|]) mergeSortedKeyLabelRanges [|1;2;3;5;6|] [|l;l;l;l;l|] |> Equal ([|Range(1,3); Range(5,6)|], [|l;l|]) mergeSortedKeyLabelRanges [|1;2;3;5;6|] [|l;l2;l2;l;l|] |> Equal ([|Range(1,1); Range(2,3); Range(5,6)|], [|l;l2;l|]) mergeSortedKeyLabelRanges [|1;3;5|] [|l;l;l|] |> Equal ([|Range(1, 1); Range(3, 3); Range(5,5)|], [|l;l;l|]) mergeSortedKeyLabelRanges [|int32Max - 1; int32Max|] [|l;l|] |> Equal ([|Range(int32Max - 1, int32Max)|], [|l|]) try mergeSortedKeyLabelRanges [||] [|l|] |> ignore Fail() with :? System.ArgumentException -> () try mergeSortedKeyLabelRanges [|1; 1|] [|l; l|] |> ignore Fail() with :? System.ArgumentException -> () try mergeSortedKeyLabelRanges [|1; 0|] [|l; l|] |> ignore Fail() with :? System.ArgumentException -> () let testCollectSortAndMergeRanges() = let cmp = System.Collections.Generic.EqualityComparer.Default let rand = new System.Random(1234) // check all possible subsets of {1,2,...,N} let N = 8 let set = new ResizeArray<_>(N) for n = 0 to (1 <<< N) - 1 do set.Clear() let mutable b = n let mutable i = 1 while b <> 0 do if (b &&& 1) <> 0 then set.Add(i) b <- b >>> 1 i <- i + 1 let keys = set.ToArray() let ranges = keys |> Array.map (fun k -> Range(k, k)) let mergedRanges1 = collectSortAndMergeRanges keys let mergedRanges2 = sortAndMergeRanges false ranges mergedRanges1 |> Equal mergedRanges2 let mergedRanges1b, rangeValues = collectSortAndMergeKeyValueRanges cmp (keys |> Array.map (fun k -> k,0)) mergedRanges1b |> Equal mergedRanges2 rangeValues.Length |> Equal mergedRanges2.Length rangeValues |> Array.forall ((=) 0) |> True if n <> 0 then set.Add(keys[rand.Next(keys.Length)]) set.Add(keys[rand.Next(keys.Length)]) let keys2 = set.ToArray() shuffleArray rand keys2 let mergedRanges3 = collectSortAndMergeRanges keys2 mergedRanges3 |> Equal mergedRanges2 collectSortAndMergeRanges [|int32Max; int32Min|] |> Equal [|Range(int32Min, int32Min); Range(int32Max, int32Max)|] collectSortAndMergeRanges [|int32Max - 1; int32Max; int32Min; int32Min + 1|] |> Equal [|Range(int32Min, int32Min + 1); Range(int32Max - 1, int32Max)|] collectSortAndMergeKeyValueRanges null [|2,0; 1,0|] |> Equal ([|Range(1, 1); Range(2, 2)|], [|0; 0|]) try collectSortAndMergeKeyValueRanges null [|1,1;2,2;1,3|] |> ignore Fail() with :? System.ArgumentException -> () let testSumsOfLengths() = sumOfLengths [|Range(1,1)|] 0 1 |> Equal 1. sumOfLengths [|Range(-1,1); Range(2,2)|] 0 2 |> Equal 4. sumOfLengths [|Range(int32Min, int32Max)|] 0 1 |> Equal (double int32Max - double int32Min + 1.) sumOfLengths [|Range(int32Min, int32Max); Range(int32Min, 0); Range(1, int32Max); Range(int32Min, int32Max);|] 1 3 |> Equal (double int32Max - double int32Min + 1.) sumOfCappedLengths 1 [|Range(1,1)|] 0 1 |> Equal 1. sumOfCappedLengths 3 [|Range(-1,1); Range(2,2)|] 0 2 |> Equal 4. sumOfCappedLengths 1 [|Range(-1,1); Range(2,2)|] 0 2 |> Equal 2. sumOfCappedLengths int32Max [|Range(int32Min, int32Max)|] 0 1 |> Equal (double int32Max) sumOfCappedLengths 1 [|Range(int32Min, int32Max)|] 0 1 |> Equal 1. sumOfCappedLengths int32Max [|Range(int32Min, int32Max); Range(int32Min, 0); Range(1, int32Max); Range(int32Min, int32Max);|] 1 3 |> Equal (double int32Max + double int32Max) sumOfCappedLengths int32Max [|Range(int32Min, -2); Range(-1,0); Range(1, int32Max)|] 0 3 |> Equal (double int32Max - double int32Min + 1.) let testFindPivot() = findPivot [|Range(1,1)|] 0 1 |> Equal (0, false) findPivot [|Range(1,1); Range(3,3)|] 0 2 |> Equal (0, true) findPivot [|Range(1,1); Range(3,3)|] 1 2 |> Equal (1, false) findPivot [|Range(1,1); Range(3,4)|] 0 2|> Equal (1, false) findPivot [|Range(0,1); Range(2,3); Range(5,5)|] 0 3 |> Equal (1, true) findPivot [|Range(0,1); Range(2,3); Range(5,5); Range(8, 10)|] 0 4 |> Equal (2, true) findPivot [|Range(1, 1); Range(3,3); Range(5,5); Range(7,7)|] 0 4 |> Equal (1, true) findPivot [|Range(1, 1); Range(2,3); Range(5,6); Range(7,7)|] 0 4 |> Equal (1, true) findPivot [|Range(1, 1); Range(2,5); Range(6,8)|] 0 3 |> Equal (1, true) let run() = testCheckRanges() testSortAndMergeRanges() testSortAndMergeKeyValueRanges() testMergeSortedKeyLabelRanges() testCollectSortAndMergeRanges() testSumsOfLengths() testFindPivot() #endif ================================================ FILE: Test/StaticMappingTests.fs ================================================ // Copyright (c) Stephan Tolksdorf 2010-2011 // License: Simplified BSD License. See accompanying documentation. module FParsec.Test.StaticMappingTests #if !LOW_TRUST open FParsec open FParsec.Range open FParsec.StaticMapping open FParsec.Test.Test type EqualityComparer<'a> = System.Collections.Generic.EqualityComparer<'a> let testCreateIndicatorFunction() = let test (ranges: Range[]) value (minValue: int) (maxValue: int) indicator = let mutable i = minValue for r in ranges do while i < r.Min do indicator i |> Equal (not value) i <- i + 1 while i <= r.Max do indicator i |> Equal value i <- i + 1 while i <= maxValue do indicator i |> Equal (not value) i <- i + 1 FParsec.Emit.noBitVectorTests <- true // check all possible subsets of {1,2,...,N} let N = 11 let set = new ResizeArray<_>(N) for n = 0 to (1 <<< N) - 1 do let mutable b = n let mutable i = 1 while b <> 0 do if (b &&& 1) <> 0 then set.Add(i) b <- b >>> 1 i <- i + 1 let ranges = collectSortAndMergeRanges set let indicator = createStaticIntIndicatorFunctionImpl 0 0. 0 (N + 1) false ranges test ranges true 0 (N + 1) indicator let indicator2 = createStaticIntIndicatorFunctionImpl 0 0. 0 (N + 1) true ranges test ranges false 0 (N + 1) indicator2 set.Clear() FParsec.Emit.noBitVectorTests <- false let rand = new System.Random(1234) // check some random subsets of {1,2,...,N} let N = 16000 for n = 1 to 1000 do let p = rand.NextDouble() for i = 0 to N do if rand.NextDouble() <= p then set.Add(i) let invert = rand.NextDouble() <= 0.5 let ranges = collectSortAndMergeRanges set let indicator = createStaticIntIndicatorFunctionImpl 32 0.4 0 N invert ranges test ranges (not invert) 0 N indicator set.Clear() let () = let ranges = [|Range(1,1); Range(30,31)|] let indicator = createStaticIntIndicatorFunctionImpl 0 0. 0 31 false ranges test ranges true 0 31 indicator let () = let ranges = [|Range(1,1); Range(30,32)|] let indicator = createStaticIntIndicatorFunctionImpl 0 0. 0 32 false ranges test ranges true 0 32 indicator let () = let ranges = [|Range(1,1); Range(60,63)|] let indicator = createStaticIntIndicatorFunctionImpl 0 0. 0 63 false ranges test ranges true 0 63 indicator let () = let ranges = [|Range(1,1); Range(60,64)|] let indicator = createStaticIntIndicatorFunctionImpl 0 0. 0 64 false ranges test ranges true 0 64 indicator let () = let ranges = [|Range(1,1); Range(3,5); Range(6,7)|] let indicator = createStaticIntRangeIndicatorFunction false ranges test ranges true 0 9 indicator let indicator2 = createStaticIntIndicatorFunction false [1;3;4;5;6;7] test ranges true 0 9 indicator2 let () = let indicator = createStaticCharIndicatorFunction false ['\u0000';'\ufffe';'\uffff'] indicator '\u0000' |> Equal true indicator '\u0001' |> Equal false indicator '\ufffd' |> Equal false indicator '\ufffe' |> Equal true indicator '\uffff' |> Equal true let indicator2 = createStaticCharRangeIndicatorFunction false [Range(0,0); Range(0xfffe, 0xffff)] indicator2 '\u0000' |> Equal true indicator2 '\u0001' |> Equal false indicator2 '\ufffd' |> Equal false indicator2 '\ufffe' |> Equal true indicator2 '\uffff' |> Equal true let () = try createStaticCharRangeIndicatorFunction false [Range(0xfffe, 0xffff); Range(-1,0);] |> ignore; Fail() with :? System.ArgumentException -> () try createStaticCharRangeIndicatorFunction false [Range(0,0); Range(0xfffe, 0x10000)] |> ignore; Fail() with :? System.ArgumentException -> () () type TestStruct(value: int) = struct end [] type TestStruct2 = struct val Field1: int64 val Field2: int64 new (value: int) = {Field1 = int64 value; Field2 = int64 value} override t.Equals(other: obj) = match other with | :? TestStruct2 as o -> t.Field1 = o.Field1 && t.Field2 = o.Field2 | _ -> false override t.GetHashCode() = 0 end let testCreateStaticIntMapping() = let test (ranges: Range[]) (values: 't[]) defaultValue (minKey: int) (maxKey: int) mapping = let mutable i = minKey for r, value in Seq.zip ranges values do while i < r.Min do mapping i |> Equal defaultValue i <- i + 1 while i <= r.Max do mapping i |> Equal value i <- i + 1 while i <= maxKey do mapping i |> Equal defaultValue i <- i + 1 let ranges = new ResizeArray() let rand = new System.Random(1234) let N = 16000 for n = 0 to 2000 do ranges.Clear() let maxRangeLength = 1 + rand.Next(128) let mutable i = 0 while i < N do let length = 1 + rand.Next(maxRangeLength) let i2 = min (i + length) N ranges.Add(Range(i, i2 - 1)) i <- i2 let ranges = ranges.ToArray() let values = Array.zeroCreate ranges.Length let mutable lastValue = 7 for i = 0 to values.Length - 1 do // value in 0-7, but different from the last value lastValue <- (lastValue + 1 + rand.Next(7)) % 8 values[i] <- byte lastValue let ranges, values = filterOutDefaultValueRanges EqualityComparer<_>.Default ranges values 0uy let mapping = createStaticIntMappingImpl //defaultMappingLengthCap defaultMappingDensityThreshold 16 0.90 0 (N - 1) 0uy ranges values test ranges values 0uy 0 (N - 1) mapping let test2 keyValues defaultValue = let mapping = createStaticIntMapping defaultValue keyValues for k,v in keyValues do mapping k |> Equal v mapping ((keyValues |> List.minBy (fun (k,v) -> k) |> fst) - 1) |> Equal defaultValue mapping ((keyValues |> List.maxBy (fun (k,v) -> k) |> fst) + 1) |> Equal defaultValue mapping System.Int32.MinValue |> Equal defaultValue test2 [0, 1; 1, 1; 2, 1;] 0 test2 [1, true; 2, true; 3, true; 4, true] false test2 [1, '1'; 2, '2'; 3, '1'; 4, '0'] '0' test2 [1, 1y; 2, 2y; 3, 1y; 4, 0y] 0y test2 [1, 1uy; 2, 2uy; 3, 1uy; 4, 0uy] 0uy test2 [1, 1s; 2, 2s; 3, 1s; 4, 0s] 0s test2 [1, 1us; 2, 2us; 3, 1us; 4, 0us] 0us test2 [1, 1u; 2, 2u; 3, 1u; 4, 0u] 0u test2 [1, 1L; 2, 2L; 3, 1L; 4, 0L; 5, System.Int64.MaxValue] 0L test2 [1, 1UL; 2, 2UL; 3, 1UL; 4, 0UL; 5, System.UInt64.MaxValue] 0UL test2 [1, 1n; 2, 2n; 3, 1n; 4, 0n] 0n test2 [1, 1un; 2, 2un; 3, 1un; 4, 0un] 0un test2 [1, 1.f; 2, 2.f; 3, 1.f; 4, 0.f] 0.f test2 [1, 1.; 2, 2.; 3, 1.; 4, 0.] 0. test2 [1, "1"; 2, "2"; 3, "1"; 4, ""] "" test2 [1, "1"; 2, "2"; 3, "1"; 4, ""] null test2 [1, TestStruct(1); 2, TestStruct(2); 3, TestStruct(1); 4, TestStruct(0)] (TestStruct(0)) test2 [1, TestStruct2(1); 2, TestStruct2(2); 3, TestStruct2(1); 4, TestStruct2(0)] (TestStruct2(0)) test2 [1, FParsec.Associativity.Left; 2, FParsec.Associativity.Right; 3, FParsec.Associativity.Left; 4, FParsec.Associativity.None] FParsec.Associativity.None let () = let mapping = createStaticIntRangeMapping 0 [Range(1,1), 1; Range(3,3), 2; Range(4,5), 2; Range(6,6), 0] mapping 0 |> Equal 0 mapping 1 |> Equal 1 mapping 2 |> Equal 0 mapping 3 |> Equal 2 mapping 4 |> Equal 2 mapping 5 |> Equal 2 mapping 6 |> Equal 0 mapping 7 |> Equal 0 let () = try createStaticIntMapping 0 [1, 0; 1, 0] |> ignore; Fail() with :? System.ArgumentException -> () try createStaticIntRangeMapping 0 [Range(0, 1), 0; Range(1, 2), 0] |> ignore; Fail() with :? System.ArgumentException -> () () let testCreateStaticStringMapping() = let testStringComparison() = for nn = 64 to 71 do let chars = [|for i = 1 to nn do yield char (32 + i)|] let str = new string(chars) for n = 0 to str.Length - 1 do let subStr = str.Substring(0, n) let mapping = createStaticStringMapping 0 [str, 1; subStr, 2] mapping str |> Equal 1 mapping subStr |> Equal 2 for i = 0 to chars.Length - 1 do let c = chars[i] chars[i] <- char (int c + 1) mapping (new string(chars)) |> Equal 0 chars[i] <- char 0 mapping (new string(chars)) |> Equal 0 chars[i] <- c testStringComparison() let test defaultValue stringValues defaultTestStrings = let mapping = createStaticStringMapping defaultValue stringValues for str, value in stringValues do let v = mapping str v |> Equal value for str in defaultTestStrings do let v = mapping str v |> Equal defaultValue test 0 [] ["\u0000"] test 0 ["", 1] ["\u0000"] test 0 ["\u0000", 1] [""; "a"] test 0 ["", 1; "\u0000", 2] ["a"] test 0 ["\u0000", 1; "\u0001", 2] [""; "\u0002"] test 0 ["\u0000", 1; "\u0001", 2; "\u0002\u0003", 3] [""; "\u0002"] test 0 ["", 1; "\u0000", 2; "\u0001", 3] ["\u0002"] test 0 ["\u0001", 3; "\u0000", 2; "", 1] ["\u0002"] test 0 ["\u0001", 2; "\u0002\u0003", 3] [""; "\u0000"; "\u0002"] test 0 ["\u0001", 1; "\u0002", 2] [""; "\u0000"] test 0 ["", 1; "\u0001", 2; "\u0002", 3] ["\u0000"; "a"] test 0 ["\u0001", 1; "\u0002", 1] [""; "\u0000"; "a"] test 0 ["", 1; "\u0001", 2; "\u0002", 2] ["\u0000"; "a"] test 0 ["", 1; "\u0000", 2; "\u0001", 2] ["\u0002"; "a"] test 0 ["prefix1", 1; "prefix2", 2; "prefix3", 3] [""; "prefix"; "prefix\u0000"] test 0 ["prefix1", 2; "prefix2", 2; "prefix3", 2] [""; "prefix"; "prefix\u0000"] test 0 ["prefix1postfix", 2; "prefix2postfix", 2; "prefix3postfix.", 2] [""; "prefix"; "prefix\u0000"] test 0 ["", -1; "prefix", 1; "prefix1", 1; "prefix2", 2; "prefix3", 3] ["prefix\u0000"] test 0 ["", -1; "postfix1", 1; "postfix2", 2; "test/postfix1", 1; "test/postfix2", 2; "test/test/postfix1", 1; "test/test/postfix2", 2; "test/test/test/postfix1", 1; "test/test/test/postfix2", 2; "test/test/test|postfix1", 1; "test/test/test|postfix2", 2] [] test 0 [|"abstract", 1; "and", 2; "as", 3; "assert", 4; "base", 5; "begin", 6; "class", 7; "default", 8; "delegate", 9; "done", 10; "downcast", 11; "downto", 12; "elif", 13; "else", 14; "end", 15; "exception", 16; "extern", 17; "finally", 18; "for", 19; "fun", 20; "function", 21; "if", 22; "in", 23; "inherit", 24; "inline", 25; "interface", 26; "internal", 27; "lazy", 28; "match", 29; "member", 30; "module", 31; "mutable", 32; "namespace", 33; "new", 34; "of", 35; "open", 36; "or", 37; "override", 38; "private", 39; "public", 40; "rec", 41; "static", 42; "struct", 43; "then", 44; "to", 45; "try", 46; "type", 47; "upcast", 48; "use", 49; "val", 50; "void", 51; "when", 52; "with", 53; "false", 54; "true", 55; "let", 56; "do", 57; "while", 58; "yield", 59; "return", 60; "asr", 61;" land", 61; "lor", 63; "lsl", 64; "lsr", 65; "lxor", 66; "mod", 67; "sig", 68; "atomic", 69; "break", 70; "checked", 71; "component", 72; "const", 73; "constraint", 74; "constructor", 75; "continue", 76; "eager", 77; "event", 78; "external", 79; "fixed", 80; "functor", 81; "global", 82; "include", 83; "method", 84; "mixin", 85; "object", 86; "parallel", 87; "process", 88; "protected", 89; "pure", 90; "sealed", 91; "tailcall", 92; "trait", 93; "virtual", 94; "volatile", 95|] [] test "" ["1", "1"; "2", "2"; "221", "221"; "222", "222"; "3", "1"; "4", "4"] [""; "0"] test null ["1", "1"; "2", "2"; "221", "221"; "222", "222"; "3", "1"; "4", ""] [""; "0"] test (TestStruct(0)) ["1", TestStruct(1); "2", TestStruct(2); "221", TestStruct(221); "222", TestStruct(222); "3", TestStruct(1); "4", TestStruct(0)] [""; "0"] test (TestStruct2(0)) ["1", TestStruct2(1); "2", TestStruct2(2); "221", TestStruct2(221); "222", TestStruct2(222); "3", TestStruct2(1); "4", TestStruct2(0)] [""; "0"] test FParsec.Associativity.Left ["1", FParsec.Associativity.None; "2", FParsec.Associativity.Right; "3", FParsec.Associativity.None] [""; "4"] try createStaticStringMapping 0 [null, 1] |> ignore; Fail() with :? System.ArgumentException -> () try createStaticStringMapping 0 ["1", 1; null, 2] |> ignore; Fail() with :? System.ArgumentException -> () try createStaticStringMapping 0 ["", 1; "", 2] |> ignore; Fail() with :? System.ArgumentException -> () try createStaticStringMapping 0 ["1", 1; "", 2; "3", 3; "", 4] |> ignore; Fail() with :? System.ArgumentException -> () try createStaticStringMapping 0 ["1", 1; "1", 2] |> ignore; Fail() with :? System.ArgumentException -> () try createStaticStringMapping 0 ["0", 1; "1", 2; "3", 3; "1", 4] |> ignore; Fail() with :? System.ArgumentException -> () try createStaticStringMapping 0 [] null |> ignore; Fail() with :? System.NullReferenceException | :? System.ArgumentNullException -> () try createStaticStringMapping 0 ["1", 1] null |> ignore; Fail() with :? System.NullReferenceException | :? System.ArgumentNullException -> () try createStaticStringMapping 0 ["1", 1; "2", 2] null |> ignore; Fail() with :? System.NullReferenceException | :? System.ArgumentNullException -> () let run() = testCreateIndicatorFunction() testCreateStaticIntMapping() testCreateStaticStringMapping() #endif ================================================ FILE: Test/StringBufferTests.fs ================================================ // Copyright (c) Stephan Tolksdorf 2009-2010 // License: Simplified BSD License. See accompanying documentation. module FParsec.Test.StringBufferTests #if LOW_TRUST let run() = () #else open FParsec.Test.Test type StringBuffer = FParsec.StringBuffer // This test relies on the internal assert checks in StringBuffer, // hence this is only really a proper test in Debug builds or // if you compile FParsecCS with the DEBUG_STRINGBUFFER define. let test() = let ty = typeof let getStaticField name = getStaticField ty name let minChunkSize = getStaticField "MinChunkSize" : int let firstSegmentSmallSize = getStaticField "FirstSegmentSmallSize" : int let firstSegmentLargeSize = getStaticField "FirstSegmentLargeSize" : int let maxSegmentSize = getStaticField "MaxSegmentSize" : int let testConstructor() = let buffer1 = StringBuffer.Create(0) buffer1.Dispose() let buffer1 = StringBuffer.Create(firstSegmentSmallSize) buffer1.Dispose() let buffer2 = StringBuffer.Create(maxSegmentSize + 1) buffer2.Dispose() try StringBuffer.Create(System.Int32.MaxValue) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () try StringBuffer.Create(-1) |> ignore; Fail() with :? System.ArgumentOutOfRangeException -> () testConstructor() let rand = System.Random(1054754) let maxBufferSize = 196608 let maxTotalSize = 1 <<< 22 let buffers = new ResizeArray<_>() let mutable allocated = 0 let mutable maxReached = false for i = 1 to 10000 do if (not maxReached && rand.Next(2) = 0) || buffers.Count = 0 then let maxSize = rand.Next(maxBufferSize + 1) let n = rand.Next(1, 11) for i = 1 to n do let size = rand.Next(maxSize + 1) if allocated + size < maxTotalSize then let buffer = StringBuffer.Create(size) allocated <- allocated + buffer.Length buffers.Add(buffer) else maxReached <- true else maxReached <- false let n = rand.Next(1, buffers.Count + 1) for i = 1 to n do let idx = rand.Next(buffers.Count) let buffer = buffers[idx] allocated <- allocated - buffer.Length buffer.Dispose() buffers.RemoveAt(idx) buffers.Reverse() for b in buffers do b.Dispose() let run() = test() #endif ================================================ FILE: Test/Test-LowTrust.fsproj ================================================  net6 ================================================ FILE: Test/Test.fs ================================================ // Copyright (c) Stephan Tolksdorf 2007-2009 // License: Simplified BSD License. See accompanying documentation. module FParsec.Test.Test open System.Runtime.CompilerServices #if NETCORE open System.Reflection #endif open FParsec open FParsec.Error open FParsec.Primitives exception TestFailed of string let Fail() = raise (TestFailed("Test failed.")) let True cond = if not cond then Fail () let False cond = if cond then Fail () let IsNull x = match box x with | null -> () | _ -> Fail() [] let EqualFail_ a b = Fail() // inline to allow the F# compiler to optimize the equality comparison let inline Equal a b = if not (a = b) then EqualFail_ a b let NotEqual a b = if a = b then Fail () let ReferenceEqual (a: 't) (b: 't) = if not (System.Object.ReferenceEquals(a, b)) then Fail () let private ROkE_ withNewline (content: string) nSkippedChars result error (parser: Parser<_,_>) = use stream = new CharStream(content, 0, content.Length) let mutable reply = parser stream if reply.Status <> Ok || reply.Result <> result || reply.Error <> error || stream.Index <> (int64 nSkippedChars) || stream.LineBegin = 0L <> (not withNewline) then System.Diagnostics.Debugger.Break() stream.Seek(0L) stream.SetLine_WithoutCheckAndWithoutIncrementingTheStateTag(1L) stream.SetLineBegin_WithoutCheckAndWithoutIncrementingTheStateTag(0L) reply <- parser stream reply.Status |> Equal Ok reply.Result |> Equal result reply.Error |> Equal error stream.Index |> Equal (int64 nSkippedChars) stream.LineBegin = 0L |> Equal (not withNewline) let ROk content nSkippedChars result parser = ROkE_ false content nSkippedChars result NoErrorMessages parser let ROkE content nSkippedChars result error parser = ROkE_ false content nSkippedChars result error parser let ROkNL content nSkippedChars result parser = ROkE_ true content nSkippedChars result NoErrorMessages parser let private RError_ status (content: string) nSkippedChars error (parser: Parser<_,_>) = use stream = new CharStream(content, 0, content.Length) let mutable reply = parser stream if reply.Status <> status || reply.Error <> error || stream.Index <> (int64 nSkippedChars) then System.Diagnostics.Debugger.Break() stream.Seek(0L) stream.SetLine_WithoutCheckAndWithoutIncrementingTheStateTag(1L) stream.SetLineBegin_WithoutCheckAndWithoutIncrementingTheStateTag(0L) reply <- parser stream reply.Status |> Equal Error reply.Error |> Equal error stream.Index |> Equal (int64 nSkippedChars) let RError content nSkippedChars error parser = RError_ Error content nSkippedChars error parser let RFatalError content nSkippedChars error parser = RError_ FatalError content nSkippedChars error parser //let EqualParser (parser1: Parser<'a,'u>) state (parser2: Parser<'a,'u>) = // let reply1 = parser1 state // let reply2 = parser2 state // Equal reply1 reply2 // we use the following flag to allow test parsers with mutable state // to repeat the last action in order to simplify debugging let mutable checkParserRepeat = false let checkParser (parser1: Parser<'a,'u>) (parser2: Parser<'a,'u>) (stream: CharStream<'u>) = let state0 = stream.State let mutable reply1 = parser1 stream let mutable state1 = stream.State let mutable index1 = stream.Index stream.BacktrackTo(state0) let mutable reply2 = parser2 stream let mutable state2 = stream.State let mutable index2 = stream.Index if reply1 <> reply2 || index1 <> index2 || state1.Line <> state2.Line || state1.LineBegin <> state2.LineBegin || state1.Name <> state2.Name || state1.UserState <> state2.UserState || (state1.Tag <> state0.Tag) <> (state2.Tag <> state0.Tag) then if System.Diagnostics.Debugger.IsAttached then System.Diagnostics.Debugger.Break() checkParserRepeat <- true // step into the following parser calls to see what goes wrong stream.BacktrackTo(state0) reply1 <- parser1 stream state1 <- stream.State index1 <- stream.Index stream.BacktrackTo(state0) reply2 <- parser2 stream state2 <- stream.State index2 <- stream.Index stream.BacktrackTo(state0) Equal reply1.Status reply2.Status Equal reply1.Error reply2.Error if reply1.Status = Ok then Equal reply1.Result reply2.Result Equal index1 stream.Index Equal state1.Line stream.Line Equal state1.LineBegin stream.LineBegin Equal state1.Name stream.Name Equal state1.UserState stream.UserState Equal (state1.Tag <> state0.Tag) (stream.StateTag <> state0.Tag) let checkParserStr parser1 parser2 (str: string) = use stream = new CharStream(str, 0, str.Length) checkParser parser1 parser2 stream let constantTestParsers r e : Parser<'a, int>[] = [| // we rely on the order of these parsers fun s -> Reply(Ok, r, e); fun s -> s.UserState <- s.UserState + 1; Reply(Ok, r, e) fun s -> Reply(Error, e); fun s -> s.UserState <- s.UserState + 1; Reply(Error, e); fun s -> Reply(FatalError, e); fun s -> s.UserState <- s.UserState + 1; Reply(FatalError, e); |] /// Builds a parser from a list of constant test parsers. The first parser /// will be used for the first invocation, the second for the next /// invocation, and so on. The reset function can be used to reset the aggregate parser. let seqParserAndReset ps = let mutable psr = ps let mutable inRepeat = false (fun stream -> if checkParserRepeat && not inRepeat then inRepeat <- true psr <- ps match psr with | hd::tl -> psr <- tl; hd stream | [] -> Reply(Error, NoErrorMessages)), (fun () -> psr <- ps) let seqParserAndReset2 ps = let p1, p1r = seqParserAndReset ps let p2, p2r = seqParserAndReset ps p1, p2, (fun () -> p1r(); p2r()) let setStaticField (t: System.Type) name v = t.GetField(name, BindingFlags.NonPublic ||| BindingFlags.Static).SetValue(null, v) let getStaticField (t: System.Type) name = unbox (t.GetField(name, BindingFlags.NonPublic ||| BindingFlags.Static).GetValue()) let shuffleArray (rand: System.Random) (xs: 'a[]) = let n = xs.Length for i = 0 to n - 2 do let r = rand.Next(n - i - 1); let t = xs[i] xs[i] <- xs[i + r] xs[i + r] <- t let inline _1< ^t when ^t : (static member One : ^t) > = LanguagePrimitives.GenericOne< ^t > ================================================ FILE: Test/Test.fsproj ================================================ net6 ================================================ FILE: Test/Test.targets ================================================ Test FParsec.Test Exe false $(DefineConstants);USE_STATIC_MAPPING_FOR_IS_ANY_OF;DISABLE_STREAM_BACKTRACKING_TESTS $(DefineConstants);NETCORE ================================================ FILE: Test/TextTests.fs ================================================ // Copyright (c) Stephan Tolksdorf 2009-2010 // License: Simplified BSD License. See accompanying documentation. module FParsec.Test.TextTests #if NETCORE open System.Reflection #endif open FParsec.Test.Test type Text = FParsec.Text let testFoldCase() = Text.FoldCase(null) |> Equal null for s in [""; "a"; "aa"; "aaa"] do Text.FoldCase(s) |> ReferenceEqual s Text.FoldCase("A") |> Equal "a" Text.FoldCase("aA") |> Equal "aa" Text.FoldCase("aaA") |> Equal "aaa" Text.FoldCase("abcAOUÄÖÜdef") |> Equal "abcaouäöüdef" let oneToOneMappings = let a = typeof.Assembly getStaticField (a.GetType("FParsec.CaseFoldTable")) "oneToOneMappings" : string let mutable j = 0 for i in 0..2..(oneToOneMappings.Length - 2) do let c = int oneToOneMappings[i] for k = j to c - 1 do Text.FoldCase((char k).ToString())[0] |> Equal (char k) Text.FoldCase((char c).ToString())[0] |> Equal oneToOneMappings[i + 1] j <- c + 1 j |> Equal 0xff3b let testNormalizeNewlines() = Text.NormalizeNewlines(null) |> Equal null Text.NormalizeNewlines("") |> ReferenceEqual "" Text.NormalizeNewlines("ab") |> ReferenceEqual "ab" let check (cs: char[]) n = let str = new string(cs, 0, n) let nstr = str.Replace("\r\n", "\n").Replace("\r", "\n") let nstr2 = Text.NormalizeNewlines(str) Equal nstr nstr2 let rec test (cs: char[]) n i = if i < n then cs[i] <- '\r' test cs n (i + 1) cs[i] <- '\n' test cs n (i + 1) cs[i] <- '_' test cs n (i + 1) else check cs n let N = 10 let cs = Array.zeroCreate N for n = 1 to 8 do // test all possible character sequences of length n consisting of '\r','\n' or '_' chars test cs n 0 // make sure there is no size-specific copying problem for n = 1 to 24 do let s = new string('_', n) Text.NormalizeNewlines("\r" + s) |> Equal ("\n" + s) Text.NormalizeNewlines("\r\n" + s) |> Equal ("\n" + s) Text.NormalizeNewlines("_\n_\r\n_\r\r_\r\n_\r\n\n\r_") |> Equal "_\n_\n_\n\n_\n_\n\n\n_" let testCountTextElements() = let countTextElementsRef s = let te = System.Globalization.StringInfo.GetTextElementEnumerator(s) let mutable count = 0 while te.MoveNext() do count <- count + 1 count let chars = [|"\u0020"; "\u007e"; "\U0001D41A"; "\u001F";" \u007F"; // control "\u00AD"; "\U0001D173"; // format string '\ud800'; // surrogate (uses string '...' to work around an fsc parser issue) "\u0333"; "\U000101FD" // nonspacing mark "\u0BBE"; "\U0001D166" // spacing combining mark "\u20DD" // enclosing mark |] for c in chars do Text.CountTextElements(c) |> Equal (countTextElementsRef c) for c1 in chars do for c2 in chars do let s = c1 + c2 Text.CountTextElements(s) |> Equal (countTextElementsRef s) let rand = System.Random(1234) let strings = Array.zeroCreate 5 for i = 0 to 100000 do for j = 0 to strings.Length - 1 do strings[j] <- chars[rand.Next()%chars.Length] let s = System.String.Concat(strings) Text.CountTextElements(s) |> Equal (countTextElementsRef s) let testIsSurrogate() = for c = 0 to 0xffff do let c = char c Text.IsSurrogate(c) |> Equal (System.Char.IsSurrogate(c)) Text.IsLowSurrogate(c) |> Equal (System.Char.IsLowSurrogate(c)) Text.IsHighSurrogate(c) |> Equal (System.Char.IsHighSurrogate(c)) let testIsWhitespace() = for c = 0 to 0xffff do Text.IsWhitespace(char c) |> Equal (System.Char.IsWhiteSpace(char c)) let run() = testNormalizeNewlines() testFoldCase() testCountTextElements() testIsWhitespace() testIsSurrogate() ================================================ FILE: global.json ================================================ { "sdk": { "version": "7.0.100", "rollForward": "latestMajor" } } ================================================ FILE: pack.ps1 ================================================ # This PowerShell script builds the FParsec NuGet packages. # # Run this script from the VS2019 Command Prompt, e.g. with # powershell -ExecutionPolicy ByPass -File pack.ps1 -versionSuffix "" > pack.out.txt # or on macOS e.g. with # pwsh -File pack.ps1 -versionSuffix "" > pack.out.txt Param( [string]$versionSuffix = "dev" ) $ErrorActionPreference = 'Stop' $configSuffices = $('-LowTrust') # The non-LowTrust version currently doesn't pass the tests. $testTargetFrameworks = @{'' = $('net6') '-LowTrust' = $('net6')} function invoke([string] $cmd) { echo '' echo $cmd Invoke-Expression $cmd if ($LastExitCode -ne 0) { throw "Non-zero exit code: $LastExitCode" } } foreach ($folder in $("nupkgs", "FParsecCS\obj", "FParsecCS\bin", "FParsec\obj", "FParsec\bin")) { try { Remove-Item $folder -recurse } catch {} } foreach ($configSuffix in $configSuffices) { $config = "Release$configSuffix" $props = "-c $config -p:VersionSuffix=$versionSuffix -p:FParsecNuGet=true -p:Platform=AnyCPU" invoke "dotnet build FParsec/FParsec$configSuffix.fsproj $props -v n" invoke "dotnet pack FParsec/FParsec$configSuffix.fsproj $props -o ""$pwd\nupkgs""" invoke "dotnet build Test/Test$configSuffix.fsproj $props -v n" foreach ($tf in $testTargetFrameworks[$configSuffix]) { invoke "dotnet run --no-build --project Test/Test$configSuffix.fsproj $props" } } ================================================ FILE: readme.md ================================================ # FParsec FParsec is a [parser combinator](https://en.wikipedia.org/wiki/Parser_combinator) library for [F#](http://fsharp.org/). With FParsec you can implement [recursive‐descent](https://en.wikipedia.org/wiki/Recursive_descent_parser) text parsers for [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar). FParsec’s features include: - support for context‐sensitive, infinite look‐ahead grammars, - automatically generated, highly readable error messages, - Unicode support, - efficient support for very large files, - an embeddable, runtime‐configurable [operator‐precedence parser](https://en.wikipedia.org/wiki/Operator-precedence_parser) component, - a simple, efficient and easily extensible API, - an implementation thoroughly optimized for performance, - comprehensive documentation, - a permissive open source license. ## Documentation - [FParsec vs alternatives](http://www.quanttec.com/fparsec/about/fparsec-vs-alternatives.html) - [NuGet packages and building FParsec from source](http://www.quanttec.com/fparsec/download-and-installation.html) - [Tutorial](http://www.quanttec.com/fparsec/tutorial.html) - [User's guide](http://www.quanttec.com/fparsec/users-guide/) - [Parser quick reference](http://www.quanttec.com/fparsec/reference/parser-overview.html) - [Reference](http://www.quanttec.com/fparsec/reference/) ## License - *Code*: 2-clause BSD license ("Simplified BSD License") - *Data*: FParsec includes some data derived from the Unicode Character Database which is distributed under the [Unicode, Inc. License Agreement](http://www.unicode.org/copyright.html#Exhibit1). - *Documentation*: Creative Commons Attribution‐NonCommercial 3.0 Unported License See the [www.quanttec.com/fparsec/license.html](http://www.quanttec.com/fparsec/license.html) for more details.